From 8ee225744f109b19e7d2412cbc50d4586991d8cf Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Fri, 10 Jul 2020 21:14:06 -0700
Subject: [PATCH 001/771] [lldb/Test] Fix missing yaml2obj in Xcode standalone
 build.

Rather than trying to find the yaml2obj from dotest we should pass it in
like we do for dsymutil and FileCheck.
---
 .../Python/lldbsuite/test/configuration.py    | 10 ++++++++++
 lldb/packages/Python/lldbsuite/test/dotest.py | 12 +++++++----
 .../Python/lldbsuite/test/dotest_args.py      |  2 +-
 .../Python/lldbsuite/test/lldbtest.py         | 20 ++++---------------
 lldb/test/API/CMakeLists.txt                  |  4 ++++
 lldb/test/API/lit.cfg.py                      |  3 +++
 lldb/test/API/lit.site.cfg.py.in              |  1 +
 lldb/utils/lldb-dotest/CMakeLists.txt         |  5 +++++
 lldb/utils/lldb-dotest/lldb-dotest.in         |  2 ++
 9 files changed, 38 insertions(+), 21 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/configuration.py b/lldb/packages/Python/lldbsuite/test/configuration.py
index f05152253c75f..ca27864463002 100644
--- a/lldb/packages/Python/lldbsuite/test/configuration.py
+++ b/lldb/packages/Python/lldbsuite/test/configuration.py
@@ -57,6 +57,9 @@
 # Path to the FileCheck testing tool. Not optional.
 filecheck = None
 
+# Path to the yaml2obj tool. Not optional.
+yaml2obj = None
+
 # The arch might dictate some specific CFLAGS to be passed to the toolchain to build
 # the inferior programs.  The global variable cflags_extras provides a hook to do
 # just that.
@@ -163,6 +166,13 @@ def get_filecheck_path():
     if filecheck and os.path.lexists(filecheck):
         return filecheck
 
+def get_yaml2obj_path():
+    """
+    Get the path to the yaml2obj tool.
+    """
+    if yaml2obj and os.path.lexists(yaml2obj):
+        return yaml2obj
+
 def is_reproducer_replay():
     """
     Returns true when dotest is being replayed from a reproducer. Never use
diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py
index f9975b27c4759..8238168d0fb6d 100644
--- a/lldb/packages/Python/lldbsuite/test/dotest.py
+++ b/lldb/packages/Python/lldbsuite/test/dotest.py
@@ -272,13 +272,17 @@ def parseOptionsAndInitTestdirs():
         configuration.dsymutil = seven.get_command_output(
             'xcrun -find -toolchain default dsymutil')
 
+
+    # The lldb-dotest script produced by the CMake build passes in a path to a
+    # working FileCheck and yaml2obj binary. So does one specific Xcode
+    # project target. However, when invoking dotest.py directly, a valid
+    # --filecheck and --yaml2obj option needs to be given.
     if args.filecheck:
-        # The lldb-dotest script produced by the CMake build passes in a path
-        # to a working FileCheck binary. So does one specific Xcode project
-        # target. However, when invoking dotest.py directly, a valid --filecheck
-        # option needs to be given.
         configuration.filecheck = os.path.abspath(args.filecheck)
 
+    if args.yaml2obj:
+        configuration.yaml2obj = os.path.abspath(args.yaml2obj)
+
     if not configuration.get_filecheck_path():
         logging.warning('No valid FileCheck executable; some tests may fail...')
         logging.warning('(Double-check the --filecheck argument to dotest.py)')
diff --git a/lldb/packages/Python/lldbsuite/test/dotest_args.py b/lldb/packages/Python/lldbsuite/test/dotest_args.py
index ff2ac5a47ea50..d6f59efdf28b4 100644
--- a/lldb/packages/Python/lldbsuite/test/dotest_args.py
+++ b/lldb/packages/Python/lldbsuite/test/dotest_args.py
@@ -51,7 +51,7 @@ def create_parser():
                                                            suggestions: do not lump the "-A arch1 -A arch2" together such that the -E option applies to only one of the architectures'''))
 
     group.add_argument('--dsymutil', metavar='dsymutil', dest='dsymutil', help=textwrap.dedent('Specify which dsymutil to use.'))
-
+    group.add_argument('--yaml2obj', metavar='yaml2obj', dest='yaml2obj', help=textwrap.dedent('Specify which yaml2obj binary to use.'))
     group.add_argument('--filecheck', metavar='filecheck', dest='filecheck', help=textwrap.dedent('Specify which FileCheck binary to use.'))
 
     # Test filtering options
diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py
index ddb79de0ab32f..29561d4794beb 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbtest.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py
@@ -1633,20 +1633,6 @@ def findBuiltClang(self):
 
         return os.environ["CC"]
 
-    def findYaml2obj(self):
-        """
-        Get the path to the yaml2obj executable, which can be used to create
-        test object files from easy to write yaml instructions.
-
-        Throws an Exception if the executable cannot be found.
-        """
-        # Tries to find yaml2obj at the same folder as clang
-        clang_dir = os.path.dirname(self.findBuiltClang())
-        path = distutils.spawn.find_executable("yaml2obj", clang_dir)
-        if path is not None:
-            return path
-        raise Exception("yaml2obj executable not found")
-
 
     def yaml2obj(self, yaml_path, obj_path):
         """
@@ -1654,8 +1640,10 @@ def yaml2obj(self, yaml_path, obj_path):
 
         Throws subprocess.CalledProcessError if the object could not be created.
         """
-        yaml2obj = self.findYaml2obj()
-        command = [yaml2obj, "-o=%s" % obj_path, yaml_path]
+        yaml2obj_bin = configuration.get_yaml2obj_path()
+        if not yaml2obj_bin:
+            self.assertTrue(False, "No valid FileCheck executable specified")
+        command = [yaml2obj_bin, "-o=%s" % obj_path, yaml_path]
         system([command])
 
     def getBuildFlags(
diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt
index 9aad9fc750cae..34f3522c8dfec 100644
--- a/lldb/test/API/CMakeLists.txt
+++ b/lldb/test/API/CMakeLists.txt
@@ -49,6 +49,7 @@ set(LLDB_DEFAULT_TEST_EXECUTABLE "${LLVM_RUNTIME_OUTPUT_INTDIR}/lldb${CMAKE_EXEC
 # Set the paths to default llvm tools.
 set(LLDB_DEFAULT_TEST_DSYMUTIL "${LLVM_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/dsymutil${CMAKE_EXECUTABLE_SUFFIX}")
 set(LLDB_DEFAULT_TEST_FILECHECK "${LLVM_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/FileCheck${CMAKE_EXECUTABLE_SUFFIX}")
+set(LLDB_DEFAULT_TEST_YAML2OBJ "${LLVM_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/yaml2obj${CMAKE_EXECUTABLE_SUFFIX}")
 
 if (TARGET clang)
   set(LLDB_DEFAULT_TEST_COMPILER "${LLVM_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin/clang${CMAKE_EXECUTABLE_SUFFIX}")
@@ -60,6 +61,7 @@ set(LLDB_TEST_EXECUTABLE "${LLDB_DEFAULT_TEST_EXECUTABLE}" CACHE PATH "lldb exec
 set(LLDB_TEST_COMPILER "${LLDB_DEFAULT_TEST_COMPILER}" CACHE PATH "C Compiler to use for building LLDB test inferiors")
 set(LLDB_TEST_DSYMUTIL "${LLDB_DEFAULT_TEST_DSYMUTIL}" CACHE PATH "dsymutil used for generating dSYM bundles")
 set(LLDB_TEST_FILECHECK "${LLDB_DEFAULT_TEST_FILECHECK}" CACHE PATH "FileCheck used for testing purposes")
+set(LLDB_TEST_YAML2OBJ "${LLDB_DEFAULT_TEST_YAML2OBJ}" CACHE PATH "yaml2obj used for testing purposes")
 
 if ("${LLDB_TEST_COMPILER}" STREQUAL "")
   message(FATAL_ERROR "LLDB test compiler not specified. Tests will not run.")
@@ -145,6 +147,7 @@ if(LLDB_BUILT_STANDALONE)
   string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}")
   string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}")
   string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_FILECHECK "${LLDB_TEST_FILECHECK}")
+  string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_YAML2OBJ "${LLDB_TEST_YAML2OBJ}")
 
   # Remaining ones must be paths to the provided LLVM build-tree.
   if(LLVM_CONFIGURATION_TYPES)
@@ -172,6 +175,7 @@ string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_EXECUTAB
 string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_COMPILER "${LLDB_TEST_COMPILER}")
 string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_DSYMUTIL "${LLDB_TEST_DSYMUTIL}")
 string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_FILECHECK "${LLDB_TEST_FILECHECK}")
+string(REPLACE ${CMAKE_CFG_INTDIR} ${dotest_args_replacement} LLDB_TEST_YAML2OBJ "${LLDB_TEST_YAML2OBJ}")
 
 # Configure the API test suite.
 configure_lit_site_cfg(
diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py
index 632d883e0da9a..57e7509a9f7e5 100644
--- a/lldb/test/API/lit.cfg.py
+++ b/lldb/test/API/lit.cfg.py
@@ -182,6 +182,9 @@ def find_python_interpreter():
 if config.filecheck:
   dotest_cmd += ['--filecheck', config.filecheck]
 
+if config.yaml2obj:
+  dotest_cmd += ['--yaml2obj', config.yaml2obj]
+
 if config.lldb_libs_dir:
   dotest_cmd += ['--lldb-libs-dir', config.lldb_libs_dir]
 
diff --git a/lldb/test/API/lit.site.cfg.py.in b/lldb/test/API/lit.site.cfg.py.in
index e97f867b265b8..866dc1675e7cf 100644
--- a/lldb/test/API/lit.site.cfg.py.in
+++ b/lldb/test/API/lit.site.cfg.py.in
@@ -30,6 +30,7 @@ config.test_arch = '@LLDB_TEST_ARCH@'
 config.test_compiler = '@LLDB_TEST_COMPILER@'
 config.dsymutil = '@LLDB_TEST_DSYMUTIL@'
 config.filecheck = '@LLDB_TEST_FILECHECK@'
+config.yaml2obj = '@LLDB_TEST_YAML2OBJ@'
 # The API tests use their own module caches.
 config.lldb_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_LLDB@", "lldb-api")
 config.clang_module_cache = os.path.join("@LLDB_TEST_MODULE_CACHE_CLANG@", "lldb-api")
diff --git a/lldb/utils/lldb-dotest/CMakeLists.txt b/lldb/utils/lldb-dotest/CMakeLists.txt
index 0278c370f7fe1..0ef60c1427610 100644
--- a/lldb/utils/lldb-dotest/CMakeLists.txt
+++ b/lldb/utils/lldb-dotest/CMakeLists.txt
@@ -26,6 +26,7 @@ if(LLDB_BUILT_STANDALONE)
     string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}")
     string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}")
     string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}")
+    string(REPLACE ${LLVM_RUNTIME_OUTPUT_INTDIR} ${config_runtime_output_dir} LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}")
 
     # Remaining ones must be paths to the provided LLVM build-tree.
     if(${config_type} IN_LIST LLVM_CONFIGURATION_TYPES)
@@ -37,6 +38,7 @@ if(LLDB_BUILT_STANDALONE)
       string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}")
       string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}")
       string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}")
+      string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}")
       string(REPLACE ${CMAKE_CFG_INTDIR} ${config_type} LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}")
     else()
       # Single-configuration generator like Ninja.
@@ -47,6 +49,7 @@ if(LLDB_BUILT_STANDALONE)
       string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}")
       string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}")
       string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}")
+      string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ_CONFIGURED}")
       string(REPLACE ${CMAKE_CFG_INTDIR} "." LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}")
     endif()
 
@@ -65,6 +68,7 @@ elseif(NOT "${CMAKE_CFG_INTDIR}" STREQUAL ".")
     string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}")
     string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}")
     string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}")
+    string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}")
     string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}")
 
     configure_file(
@@ -80,6 +84,7 @@ else()
   set(LLDB_TEST_COMPILER_CONFIGURED "${LLDB_TEST_COMPILER}")
   set(LLDB_TEST_DSYMUTIL_CONFIGURED "${LLDB_TEST_DSYMUTIL}")
   set(LLDB_TEST_FILECHECK_CONFIGURED "${LLDB_TEST_FILECHECK}")
+  set(LLDB_TEST_YAML2OBJ_CONFIGURED "${LLDB_TEST_YAML2OBJ}")
   set(LLDB_LIBS_DIR_CONFIGURED "${LLDB_LIBS_DIR}")
 
   configure_file(
diff --git a/lldb/utils/lldb-dotest/lldb-dotest.in b/lldb/utils/lldb-dotest/lldb-dotest.in
index 36d5fd38cc6d5..ee0ea6dff748c 100755
--- a/lldb/utils/lldb-dotest/lldb-dotest.in
+++ b/lldb/utils/lldb-dotest/lldb-dotest.in
@@ -10,6 +10,7 @@ executable = '@LLDB_TEST_EXECUTABLE_CONFIGURED@'
 compiler = '@LLDB_TEST_COMPILER_CONFIGURED@'
 dsymutil = '@LLDB_TEST_DSYMUTIL_CONFIGURED@'
 filecheck = '@LLDB_TEST_FILECHECK_CONFIGURED@'
+yaml2obj = '@LLDB_TEST_YAML2OBJ_CONFIGURED@'
 lldb_libs_dir = "@LLDB_LIBS_DIR_CONFIGURED@"
 lldb_build_intel_pt = "@LLDB_BUILD_INTEL_PT@"
 
@@ -24,6 +25,7 @@ if __name__ == '__main__':
     cmd.extend(['--executable', executable])
     cmd.extend(['--compiler', compiler])
     cmd.extend(['--dsymutil', dsymutil])
+    cmd.extend(['--yaml2obj', yaml2obj])
     cmd.extend(['--filecheck', filecheck])
     cmd.extend(['--lldb-libs-dir', lldb_libs_dir])
     if lldb_build_intel_pt == "1":

From b5667d00e0447747419a783697b84a37f59ce055 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Fri, 10 Jul 2020 16:45:02 -0500
Subject: [PATCH 002/771] [OpenMP][CUDA] Fix std::complex in GPU regions

The old way worked to some degree for C++-mode but in C mode we actually
tried to introduce variants of macros (e.g., isinf). To make both modes
work reliably we get rid of those extra variants and directly use NVIDIA
intrinsics in the complex implementation. While this has to be revisited
as we add other GPU targets which want to reuse the code, it should be
fine for now.

Reviewed By: tra, JonChesterfield, yaxunl

Differential Revision: https://reviews.llvm.org/D83591
---
 .../Headers/__clang_cuda_complex_builtins.h   | 52 +++++++++----------
 clang/lib/Headers/__clang_cuda_math.h         | 10 ----
 .../test/Headers/nvptx_device_math_complex.c  | 31 +++++++++--
 .../Headers/nvptx_device_math_complex.cpp     | 31 +++++++++--
 4 files changed, 76 insertions(+), 48 deletions(-)

diff --git a/clang/lib/Headers/__clang_cuda_complex_builtins.h b/clang/lib/Headers/__clang_cuda_complex_builtins.h
index c48c754ed1a4b..8c10ff6b461fd 100644
--- a/clang/lib/Headers/__clang_cuda_complex_builtins.h
+++ b/clang/lib/Headers/__clang_cuda_complex_builtins.h
@@ -23,20 +23,16 @@
 #define __DEVICE__ __device__ inline
 #endif
 
-// Make the algorithms available for C and C++ by selecting the right functions.
-#if defined(__cplusplus)
-// TODO: In OpenMP mode we cannot overload isinf/isnan/isfinite the way we
-// overload all other math functions because old math system headers and not
-// always conformant and return an integer instead of a boolean. Until that has
-// been addressed we need to work around it. For now, we substituate with the
-// calls we would have used to implement those three functions. Note that we
-// could use the C alternatives as well.
-#define _ISNANd ::__isnan
-#define _ISNANf ::__isnanf
-#define _ISINFd ::__isinf
-#define _ISINFf ::__isinff
-#define _ISFINITEd ::__isfinited
-#define _ISFINITEf ::__finitef
+// To make the algorithms available for C and C++ in CUDA and OpenMP we select
+// different but equivalent function versions. TODO: For OpenMP we currently
+// select the native builtins as the overload support for templates is lacking.
+#if !defined(_OPENMP)
+#define _ISNANd std::isnan
+#define _ISNANf std::isnan
+#define _ISINFd std::isinf
+#define _ISINFf std::isinf
+#define _ISFINITEd std::isfinite
+#define _ISFINITEf std::isfinite
 #define _COPYSIGNd std::copysign
 #define _COPYSIGNf std::copysign
 #define _SCALBNd std::scalbn
@@ -46,20 +42,20 @@
 #define _LOGBd std::logb
 #define _LOGBf std::logb
 #else
-#define _ISNANd isnan
-#define _ISNANf isnanf
-#define _ISINFd isinf
-#define _ISINFf isinff
-#define _ISFINITEd isfinite
-#define _ISFINITEf isfinitef
-#define _COPYSIGNd copysign
-#define _COPYSIGNf copysignf
-#define _SCALBNd scalbn
-#define _SCALBNf scalbnf
-#define _ABSd abs
-#define _ABSf absf
-#define _LOGBd logb
-#define _LOGBf logbf
+#define _ISNANd __nv_isnand
+#define _ISNANf __nv_isnanf
+#define _ISINFd __nv_isinfd
+#define _ISINFf __nv_isinff
+#define _ISFINITEd __nv_isfinited
+#define _ISFINITEf __nv_finitef
+#define _COPYSIGNd __nv_copysign
+#define _COPYSIGNf __nv_copysignf
+#define _SCALBNd __nv_scalbn
+#define _SCALBNf __nv_scalbnf
+#define _ABSd __nv_fabs
+#define _ABSf __nv_fabsf
+#define _LOGBd __nv_logb
+#define _LOGBf __nv_logbf
 #endif
 
 #if defined(__cplusplus)
diff --git a/clang/lib/Headers/__clang_cuda_math.h b/clang/lib/Headers/__clang_cuda_math.h
index 2e8e6ae71d9cf..332e616702acf 100644
--- a/clang/lib/Headers/__clang_cuda_math.h
+++ b/clang/lib/Headers/__clang_cuda_math.h
@@ -340,16 +340,6 @@ __DEVICE__ float y1f(float __a) { return __nv_y1f(__a); }
 __DEVICE__ double yn(int __a, double __b) { return __nv_yn(__a, __b); }
 __DEVICE__ float ynf(int __a, float __b) { return __nv_ynf(__a, __b); }
 
-// In C++ mode OpenMP takes the system versions of these because some math
-// headers provide the wrong return type. This cannot happen in C and we can and
-// want to use the specialized versions right away.
-#if defined(_OPENMP) && !defined(__cplusplus)
-__DEVICE__ int isinff(float __x) { return __nv_isinff(__x); }
-__DEVICE__ int isinf(double __x) { return __nv_isinfd(__x); }
-__DEVICE__ int isnanf(float __x) { return __nv_isnanf(__x); }
-__DEVICE__ int isnan(double __x) { return __nv_isnand(__x); }
-#endif
-
 #pragma pop_macro("__DEVICE__")
 #pragma pop_macro("__DEVICE_VOID__")
 #pragma pop_macro("__FAST_OR_SLOW")
diff --git a/clang/test/Headers/nvptx_device_math_complex.c b/clang/test/Headers/nvptx_device_math_complex.c
index 0e212592dd2ba..6e3e8bffbd24e 100644
--- a/clang/test/Headers/nvptx_device_math_complex.c
+++ b/clang/test/Headers/nvptx_device_math_complex.c
@@ -11,12 +11,34 @@
 #include <complex.h>
 #endif
 
-// CHECK-DAG: define weak {{.*}} @__mulsc3
-// CHECK-DAG: define weak {{.*}} @__muldc3
-// CHECK-DAG: define weak {{.*}} @__divsc3
-// CHECK-DAG: define weak {{.*}} @__divdc3
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call double @__nv_copysign(
 
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call float @__nv_copysignf(
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call i32 @__nv_isfinited(
+// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call double @__nv_scalbn(
+// CHECK-DAG: call double @__nv_fabs(
+// CHECK-DAG: call double @__nv_logb(
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call i32 @__nv_finitef(
+// CHECK-DAG: call float @__nv_copysignf(
 // CHECK-DAG: call float @__nv_scalbnf(
+// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @__nv_logbf(
+
 void test_scmplx(float _Complex a) {
 #pragma omp target
   {
@@ -24,7 +46,6 @@ void test_scmplx(float _Complex a) {
   }
 }
 
-// CHECK-DAG: call double @__nv_scalbn(
 void test_dcmplx(double _Complex a) {
 #pragma omp target
   {
diff --git a/clang/test/Headers/nvptx_device_math_complex.cpp b/clang/test/Headers/nvptx_device_math_complex.cpp
index 58ed24b74b0e4..e4b78deb05d7b 100644
--- a/clang/test/Headers/nvptx_device_math_complex.cpp
+++ b/clang/test/Headers/nvptx_device_math_complex.cpp
@@ -5,12 +5,34 @@
 
 #include <complex>
 
-// CHECK-DAG: define weak {{.*}} @__mulsc3
-// CHECK-DAG: define weak {{.*}} @__muldc3
-// CHECK-DAG: define weak {{.*}} @__divsc3
-// CHECK-DAG: define weak {{.*}} @__divdc3
+// CHECK: define weak {{.*}} @__muldc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call double @__nv_copysign(
 
+// CHECK: define weak {{.*}} @__mulsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call float @__nv_copysignf(
+
+// CHECK: define weak {{.*}} @__divdc3
+// CHECK-DAG: call i32 @__nv_isnand(
+// CHECK-DAG: call i32 @__nv_isinfd(
+// CHECK-DAG: call i32 @__nv_isfinited(
+// CHECK-DAG: call double @__nv_copysign(
+// CHECK-DAG: call double @__nv_scalbn(
+// CHECK-DAG: call double @__nv_fabs(
+// CHECK-DAG: call double @__nv_logb(
+
+// CHECK: define weak {{.*}} @__divsc3
+// CHECK-DAG: call i32 @__nv_isnanf(
+// CHECK-DAG: call i32 @__nv_isinff(
+// CHECK-DAG: call i32 @__nv_finitef(
+// CHECK-DAG: call float @__nv_copysignf(
 // CHECK-DAG: call float @__nv_scalbnf(
+// CHECK-DAG: call float @__nv_fabsf(
+// CHECK-DAG: call float @__nv_logbf(
+
 void test_scmplx(std::complex<float> a) {
 #pragma omp target
   {
@@ -18,7 +40,6 @@ void test_scmplx(std::complex<float> a) {
   }
 }
 
-// CHECK-DAG: call double @__nv_scalbn(
 void test_dcmplx(std::complex<double> a) {
 #pragma omp target
   {

From 256e4d46a67517056d1e45d71c02424db01eff44 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Fri, 10 Jul 2020 22:52:04 -0700
Subject: [PATCH 003/771] Fix signed vs unsigned comparison warnings a
 different way.

---
 .../MachONormalizedFileBinaryReaderTests.cpp  | 20 ++++++++--------
 .../MachONormalizedFileYAMLTests.cpp          | 24 +++++++++----------
 2 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp
index aad5f8afcfdc3..fbf18a8d9e007 100644
--- a/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp
+++ b/lld/unittests/MachOTests/MachONormalizedFileBinaryReaderTests.cpp
@@ -75,7 +75,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64) {
       fromBinary(fileBytes, sizeof(fileBytes), "x86_64");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
   EXPECT_TRUE(f->undefinedSymbols.empty());
@@ -106,7 +106,7 @@ TEST(BinaryReaderTest, empty_obj_x86) {
       fromBinary(fileBytes, sizeof(fileBytes), "i386");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
   EXPECT_TRUE(f->undefinedSymbols.empty());
@@ -137,7 +137,7 @@ TEST(BinaryReaderTest, empty_obj_ppc) {
       fromBinary(fileBytes, sizeof(fileBytes), "ppc");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
   EXPECT_TRUE(f->undefinedSymbols.empty());
@@ -168,7 +168,7 @@ TEST(BinaryReaderTest, empty_obj_armv7) {
       fromBinary(fileBytes, sizeof(fileBytes), "armv7");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
   EXPECT_TRUE(f->undefinedSymbols.empty());
@@ -182,7 +182,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64_arm7) {
       fromBinary(fileBytes, sizeof(fileBytes), "x86_64");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
   EXPECT_TRUE(f->undefinedSymbols.empty());
@@ -191,7 +191,7 @@ TEST(BinaryReaderTest, empty_obj_x86_64_arm7) {
       fromBinary(fileBytes, sizeof(fileBytes), "armv7");
   EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_armv7);
   EXPECT_EQ((int)(f2->fileType), MH_OBJECT);
-  EXPECT_EQ((int)(f2->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f2->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f2->localSymbols.empty());
   EXPECT_TRUE(f2->globalSymbols.empty());
   EXPECT_TRUE(f2->undefinedSymbols.empty());
@@ -268,7 +268,7 @@ TEST(BinaryReaderTest, hello_obj_x86_64) {
 
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
   const Section& text = f->sections[0];
   EXPECT_TRUE(text.segmentName.equals("__TEXT"));
@@ -393,7 +393,7 @@ TEST(BinaryReaderTest, hello_obj_x86) {
 
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
   const Section& text = f->sections[0];
   EXPECT_TRUE(text.segmentName.equals("__TEXT"));
@@ -525,7 +525,7 @@ TEST(BinaryReaderTest, hello_obj_armv7) {
 
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
   const Section& text = f->sections[0];
   EXPECT_TRUE(text.segmentName.equals("__TEXT"));
@@ -669,7 +669,7 @@ TEST(BinaryReaderTest, hello_obj_ppc) {
 
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc);
   EXPECT_EQ((int)(f->fileType), MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
   const Section& text = f->sections[0];
   EXPECT_TRUE(text.segmentName.equals("__TEXT"));
diff --git a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp
index 6ceb197b4b84a..dbfe3a051811a 100644
--- a/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp
+++ b/lld/unittests/MachOTests/MachONormalizedFileYAMLTests.cpp
@@ -50,7 +50,7 @@ TEST(ObjectFileYAML, empty_ppc) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_ppc);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)(int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -66,7 +66,7 @@ TEST(ObjectFileYAML, empty_x86_64) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)(int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -82,7 +82,7 @@ TEST(ObjectFileYAML, empty_x86) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -98,7 +98,7 @@ TEST(ObjectFileYAML, empty_armv6) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -114,7 +114,7 @@ TEST(ObjectFileYAML, empty_armv7) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -130,7 +130,7 @@ TEST(ObjectFileYAML, empty_armv7s) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7s);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_TRUE(f->sections.empty());
   EXPECT_TRUE(f->localSymbols.empty());
   EXPECT_TRUE(f->globalSymbols.empty());
@@ -143,7 +143,7 @@ TEST(ObjectFileYAML, roundTrip) {
     NormalizedFile f;
     f.arch = lld::MachOLinkingContext::arch_x86_64;
     f.fileType = llvm::MachO::MH_OBJECT;
-    f.flags = llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
+    f.flags = (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS;
     f.os = lld::MachOLinkingContext::OS::macOSX;
     toYAML(f, intermediate);
   }
@@ -151,7 +151,7 @@ TEST(ObjectFileYAML, roundTrip) {
     std::unique_ptr<NormalizedFile> f2 = fromYAML(intermediate);
     EXPECT_EQ(f2->arch, lld::MachOLinkingContext::arch_x86_64);
     EXPECT_EQ((int)(f2->fileType), llvm::MachO::MH_OBJECT);
-    EXPECT_EQ((int)(f2->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+    EXPECT_EQ((int)(f2->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
     EXPECT_TRUE(f2->sections.empty());
     EXPECT_TRUE(f2->localSymbols.empty());
     EXPECT_TRUE(f2->globalSymbols.empty());
@@ -275,7 +275,7 @@ TEST(ObjectFileYAML, hello_x86_64) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86_64);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
 
   const Section& sect1 = f->sections[0];
@@ -405,7 +405,7 @@ TEST(ObjectFileYAML, hello_x86) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_x86);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
 
   const Section& sect1 = f->sections[0];
@@ -533,7 +533,7 @@ TEST(ObjectFileYAML, hello_armv6) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv6);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
 
   const Section& sect1 = f->sections[0];
@@ -673,7 +673,7 @@ TEST(ObjectFileYAML, hello_armv7) {
     "...\n");
   EXPECT_EQ(f->arch, lld::MachOLinkingContext::arch_armv7);
   EXPECT_EQ(f->fileType, llvm::MachO::MH_OBJECT);
-  EXPECT_EQ((int)(f->flags), llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
+  EXPECT_EQ((int)(f->flags), (int)llvm::MachO::MH_SUBSECTIONS_VIA_SYMBOLS);
   EXPECT_EQ(f->sections.size(), 2UL);
 
   const Section& sect1 = f->sections[0];

From c98699582a6333bbe76ff7853b4cd6beb45754cf Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Mon, 6 Jul 2020 19:13:37 -0500
Subject: [PATCH 004/771] [OpenMP][NFC] Remove unused (always fixed) arguments

There are various runtime calls in the device runtime with unused, or
always fixed, arguments. This is bad for all sorts of reasons. Clean up
two before as we match them in OpenMPOpt now.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D83268
---
 clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp    | 20 ++++++++-----------
 clang/test/OpenMP/nvptx_data_sharing.cpp      |  4 ++--
 clang/test/OpenMP/nvptx_parallel_codegen.cpp  | 10 +++++-----
 clang/test/OpenMP/nvptx_target_codegen.cpp    |  2 +-
 .../OpenMP/nvptx_target_teams_codegen.cpp     |  4 ++--
 .../nvptx_target_teams_distribute_codegen.cpp |  2 +-
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |  5 +++++
 .../deviceRTLs/common/src/parallel.cu         |  9 ++-------
 openmp/libomptarget/deviceRTLs/interface.h    |  6 ++----
 9 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index cabd06bd76e84..cbd443134e7a8 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -38,11 +38,9 @@ enum OpenMPRTLFunctionNVPTX {
   /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
   OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
   /// Call to void __kmpc_kernel_prepare_parallel(void
-  /// *outlined_function, int16_t
-  /// IsOMPRuntimeInitialized);
+  /// *outlined_function);
   OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
-  /// Call to bool __kmpc_kernel_parallel(void **outlined_function,
-  /// int16_t IsOMPRuntimeInitialized);
+  /// Call to bool __kmpc_kernel_parallel(void **outlined_function);
   OMPRTL_NVPTX__kmpc_kernel_parallel,
   /// Call to void __kmpc_kernel_end_parallel();
   OMPRTL_NVPTX__kmpc_kernel_end_parallel,
@@ -1466,8 +1464,7 @@ void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
   CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
 
   // TODO: Optimize runtime initialization and pass in correct value.
-  llvm::Value *Args[] = {WorkFn.getPointer(),
-                         /*RequiresOMPRuntime=*/Bld.getInt16(1)};
+  llvm::Value *Args[] = {WorkFn.getPointer()};
   llvm::Value *Ret = CGF.EmitRuntimeCall(
       createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
   Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
@@ -1595,17 +1592,16 @@ CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
   }
   case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
     /// Build void __kmpc_kernel_prepare_parallel(
-    /// void *outlined_function, int16_t IsOMPRuntimeInitialized);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrTy, CGM.Int16Ty};
+    /// void *outlined_function);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
     auto *FnTy =
         llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
     RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
     break;
   }
   case OMPRTL_NVPTX__kmpc_kernel_parallel: {
-    /// Build bool __kmpc_kernel_parallel(void **outlined_function,
-    /// int16_t IsOMPRuntimeInitialized);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy, CGM.Int16Ty};
+    /// Build bool __kmpc_kernel_parallel(void **outlined_function);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
     llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
     auto *FnTy =
         llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
@@ -2569,7 +2565,7 @@ void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
     llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
 
     // Prepare for parallel region. Indicate the outlined function.
-    llvm::Value *Args[] = {ID, /*RequiresOMPRuntime=*/Bld.getInt16(1)};
+    llvm::Value *Args[] = {ID};
     CGF.EmitRuntimeCall(
         createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
         Args);
diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp
index 2ee6bd2b4701c..1372246c7fc8c 100644
--- a/clang/test/OpenMP/nvptx_data_sharing.cpp
+++ b/clang/test/OpenMP/nvptx_data_sharing.cpp
@@ -55,7 +55,7 @@ void test_ds(){
 // CK1: [[A:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 0
 // CK1: [[B:%.+]] = getelementptr inbounds %struct._globalized_locals_ty, %struct._globalized_locals_ty* [[GLOBALSTACK2]], i32 0, i32 1
 // CK1: store i32 10, i32* [[A]]
-// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1)
+// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}})
 // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS1]], i64 1)
 // CK1: [[SHARGSTMP1:%.+]] = load i8**, i8*** [[SHAREDARGS1]]
 // CK1: [[SHARGSTMP2:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP1]], i64 0
@@ -65,7 +65,7 @@ void test_ds(){
 // CK1: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CK1: call void @__kmpc_end_sharing_variables()
 // CK1: store i32 100, i32* [[B]]
-// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}}, i16 1)
+// CK1: call void @__kmpc_kernel_prepare_parallel({{.*}})
 // CK1: call void @__kmpc_begin_sharing_variables(i8*** [[SHAREDARGS2]], i64 2)
 // CK1: [[SHARGSTMP3:%.+]] = load i8**, i8*** [[SHAREDARGS2]]
 // CK1: [[SHARGSTMP4:%.+]] = getelementptr inbounds i8*, i8** [[SHARGSTMP3]], i64 0
diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
index c8b15c8f6e3ba..ad25e0d775d12 100644
--- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp
@@ -92,7 +92,7 @@ int bar(int n){
 //
 // CHECK: [[AWAIT_WORK]]
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) #[[#CONVERGENT:]]
-// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]]
+// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
 // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
@@ -166,13 +166,13 @@ int bar(int n){
 // CHECK-DAG: [[MWS:%.+]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize()
 // CHECK: [[MTMP1:%.+]] = sub nuw i32 [[MNTH]], [[MWS]]
 // CHECK: call void @__kmpc_kernel_init(i32 [[MTMP1]]
-// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*),
+// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN1]]_wrapper to i8*))
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: call void @__kmpc_serialized_parallel(
 // CHECK: {{call|invoke}} void [[PARALLEL_FN3:@.+]](
 // CHECK: call void @__kmpc_end_serialized_parallel(
-// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN2]]_wrapper to i8*),
+// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN2]]_wrapper to i8*))
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK-64-DAG: load i32, i32* [[REF_A]]
@@ -211,7 +211,7 @@ int bar(int n){
 //
 // CHECK: [[AWAIT_WORK]]
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
-// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]],
+// CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
 // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
 // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
 // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
@@ -291,7 +291,7 @@ int bar(int n){
 // CHECK: br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK: [[IF_THEN]]
-// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN4]]_wrapper to i8*),
+// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* [[PARALLEL_FN4]]_wrapper to i8*))
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
 // CHECK: br label {{%?}}[[IF_END:.+]]
diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp
index 91f31185d8c1a..56f04cb01f0aa 100644
--- a/clang/test/OpenMP/nvptx_target_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_codegen.cpp
@@ -612,7 +612,7 @@ int baz(int f, double &a) {
 // CHECK: call void @__kmpc_end_serialized_parallel(%struct.ident_t* [[UNKNOWN]], i32 [[GTID]])
 // CHECK: br label
 
-// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1)
+// CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*))
 // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_PTR:%.+]], i{{64|32}} 2)
 // CHECK: [[SHARED:%.+]] = load i8**, i8*** [[SHARED_PTR]],
 // CHECK: [[REF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED]], i{{64|32}} 0
diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
index 3ab955fa85080..8ff393f074e4a 100644
--- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp
@@ -68,7 +68,7 @@ int bar(int n){
   //
   // CHECK: [[AWAIT_WORK]]
   // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
-  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i16 1)
+  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
   // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
   // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
   // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
@@ -154,7 +154,7 @@ int bar(int n){
   //
   // CHECK: [[AWAIT_WORK]]
   // CHECK: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
-  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]], i16 1)
+  // CHECK: [[KPR:%.+]] = call i1 @__kmpc_kernel_parallel(i8** [[OMP_WORK_FN]])
   // CHECK: [[KPRB:%.+]] = zext i1 [[KPR]] to i8
   // store i8 [[KPRB]], i8* [[OMP_EXEC_STATUS]], align 1
   // CHECK: [[WORK:%.+]] = load i8*, i8** [[OMP_WORK_FN]],
diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
index fe294bbddf2b7..4f23f18730cc2 100644
--- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp
@@ -88,7 +88,7 @@ int bar(int n){
   // CHECK: [[I_ADDR:%.+]] = getelementptr inbounds [[GLOB_TY]], [[GLOB_TY]]* [[RD]], i32 0, i32 0
   //
   // CHECK: call void @__kmpc_for_static_init_4(
-  // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*), i16 1)
+  // CHECK: call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void (i16, i32)* @{{.+}} to i8*))
   // CHECK: call void @__kmpc_begin_sharing_variables(i8*** [[SHARED_VARS_PTR:%.+]], i{{64|32}} 1)
   // CHECK: [[SHARED_VARS_BUF:%.+]] = load i8**, i8*** [[SHARED_VARS_PTR]],
   // CHECK: [[VARS_BUF:%.+]] = getelementptr inbounds i8*, i8** [[SHARED_VARS_BUF]], i{{64|32}} 0
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index f286403e657c9..bf799a781ae17 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -584,6 +584,11 @@ __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr,
 __OMP_RTL(__kmpc_task_allow_completion_event, false, VoidPtr, IdentPtr,
           /* Int */ Int32, /* kmp_task_t */ VoidPtr)
 
+/// Note that device runtime functions (in the following) do not necessarily
+/// need attributes as we expect to see the definitions.
+__OMP_RTL(__kmpc_kernel_parallel, false, Int1, VoidPtrPtr)
+__OMP_RTL(__kmpc_kernel_prepare_parallel, false, Void, VoidPtr)
+
 __OMP_RTL(__last, false, Void, )
 
 #undef __OMP_RTL
diff --git a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
index 4f3c3ac0c08a6..20b03e9bab1b4 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/parallel.cu
@@ -72,10 +72,8 @@ INLINE static uint16_t determineNumberOfThreads(uint16_t NumThreadsClause,
 }
 
 // This routine is always called by the team master..
-EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
-                                           int16_t IsOMPRuntimeInitialized) {
+EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn) {
   PRINT0(LD_IO, "call to __kmpc_kernel_prepare_parallel\n");
-  ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized, "Expected initialized runtime.");
 
   omptarget_nvptx_workFn = WorkFn;
 
@@ -120,12 +118,9 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
 // returns True if this thread is active, else False.
 //
 // Only the worker threads call this routine.
-EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
-                                   int16_t IsOMPRuntimeInitialized) {
+EXTERN bool __kmpc_kernel_parallel(void **WorkFn) {
   PRINT0(LD_IO | LD_PAR, "call to __kmpc_kernel_parallel\n");
 
-  ASSERT0(LT_FUSSY, IsOMPRuntimeInitialized, "Expected initialized runtime.");
-
   // Work function and arguments for L1 parallel region.
   *WorkFn = omptarget_nvptx_workFn;
 
diff --git a/openmp/libomptarget/deviceRTLs/interface.h b/openmp/libomptarget/deviceRTLs/interface.h
index 39ce73cba957c..4d352bc648fa3 100644
--- a/openmp/libomptarget/deviceRTLs/interface.h
+++ b/openmp/libomptarget/deviceRTLs/interface.h
@@ -424,10 +424,8 @@ EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
 EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
                                     int16_t RequiresDataSharing);
 EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
-EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
-                                           int16_t IsOMPRuntimeInitialized);
-EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
-                                   int16_t IsOMPRuntimeInitialized);
+EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn);
+EXTERN bool __kmpc_kernel_parallel(void **WorkFn);
 EXTERN void __kmpc_kernel_end_parallel();
 
 EXTERN void __kmpc_data_sharing_init_stack();

From b726c55709a0a5e31a26c8e381544348c5dcd402 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Mon, 6 Jul 2020 19:29:23 -0500
Subject: [PATCH 005/771] [OpenMP][NFC] Fix some typos

---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 0b2e4f24bd178..d7572bf7dc534 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -210,7 +210,7 @@ struct OMPInformationCache : public InformationCache {
       }
 
       // Remove the to-be-deleted indices in reverse order as prior
-      // modifcations will not modify the smaller indices.
+      // modifications will not modify the smaller indices.
       while (!ToBeDeleted.empty()) {
         unsigned Idx = ToBeDeleted.pop_back_val();
         UV[Idx] = UV.back();
@@ -304,7 +304,7 @@ struct OMPInformationCache : public InformationCache {
     return true;
   }
 
-  // Helper to collect all uses of the decleration in the UsesMap.
+  // Helper to collect all uses of the declaration in the UsesMap.
   unsigned collectUses(RuntimeFunctionInfo &RFI, bool CollectStats = true) {
     unsigned NumUses = 0;
     if (!RFI.Declaration)
@@ -519,7 +519,7 @@ struct OpenMPOpt {
     return Changed;
   }
 
-  /// Try to eliminiate runtime calls by reusing existing ones.
+  /// Try to eliminate runtime calls by reusing existing ones.
   bool deduplicateRuntimeCalls() {
     bool Changed = false;
 
@@ -615,7 +615,7 @@ struct OpenMPOpt {
     return Ident;
   }
 
-  /// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or
+  /// Try to eliminate calls of \p RFI in \p F by reusing an existing one or
   /// \p ReplVal if given.
   bool deduplicateRuntimeCalls(Function &F,
                                OMPInformationCache::RuntimeFunctionInfo &RFI,
@@ -789,7 +789,7 @@ struct OpenMPOpt {
     });
   }
 
-  /// The underyling module.
+  /// The underlying module.
   Module &M;
 
   /// The SCC we are operating on.

From 54bd3751ceebe6eb67804a1ed8be72943817852f Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Mon, 6 Jul 2020 19:30:14 -0500
Subject: [PATCH 006/771] [OpenMP][NFC] Add convenient helper and early exit
 check

---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index d7572bf7dc534..b2e30a4d2b79a 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -161,6 +161,9 @@ struct OMPInformationCache : public InformationCache {
     /// Clear UsesMap for runtime function.
     void clearUsesMap() { UsesMap.clear(); }
 
+    /// Boolean conversion that is true if the runtime function was found.
+    operator bool() const { return Declaration; }
+
     /// Return the vector of uses in function \p F.
     UseVector &getOrCreateUseVector(Function *F) {
       std::shared_ptr<UseVector> &UV = UsesMap[F];
@@ -411,6 +414,9 @@ struct OpenMPOpt {
 
   /// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
   bool run() {
+    if (SCC.empty())
+      return false;
+
     bool Changed = false;
 
     LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()

From e8039ad4def0c4a2499cfbaba38bcc8ef48dee92 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Mon, 6 Jul 2020 19:19:12 -0500
Subject: [PATCH 007/771] [OpenMP] Identify GPU kernels (aka. OpenMP target
 regions)

We now identify GPU kernels, that is entry points into the GPU code.
These kernels (can) correspond to OpenMP target regions. With this patch
we identify and on request print them via remarks.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D83269
---
 llvm/include/llvm/Transforms/IPO/OpenMPOpt.h  |  12 ++
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp         | 127 +++++++++++++-----
 .../OpenMP/gpu_kernel_detection_remarks.ll    |  27 ++++
 3 files changed, 133 insertions(+), 33 deletions(-)
 create mode 100644 llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll

diff --git a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
index 0bd81ea8f5431..d96187b73f9bb 100644
--- a/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
+++ b/llvm/include/llvm/Transforms/IPO/OpenMPOpt.h
@@ -17,6 +17,9 @@ namespace llvm {
 
 namespace omp {
 
+/// Summary of a kernel (=entry point for target offloading).
+using Kernel = Function *;
+
 /// Helper to remember if the module contains OpenMP (runtime calls), to be used
 /// foremost with containsOpenMP.
 struct OpenMPInModule {
@@ -30,8 +33,17 @@ struct OpenMPInModule {
   bool isKnown() { return Value != OpenMP::UNKNOWN; }
   operator bool() { return Value != OpenMP::NOT_FOUND; }
 
+  /// Return the known kernels (=GPU entry points) in the module.
+  SmallPtrSetImpl<Kernel> &getKernels() { return Kernels; }
+
+  /// Identify kernels in the module and populate the Kernels set.
+  void identifyKernels(Module &M);
+
 private:
   enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN;
+
+  /// Collection of known kernels (=GPU entry points) in the module.
+  SmallPtrSet<Kernel, 8> Kernels;
 };
 
 /// Helper to determine if \p M contains OpenMP (runtime calls).
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index b2e30a4d2b79a..f0fc8a6c8c4ac 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -39,6 +39,8 @@ static cl::opt<bool> DisableOpenMPOptimizations(
 
 static cl::opt<bool> PrintICVValues("openmp-print-icv-values", cl::init(false),
                                     cl::Hidden);
+static cl::opt<bool> PrintOpenMPKernels("openmp-print-gpu-kernels",
+                                        cl::init(false), cl::Hidden);
 
 STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
           "Number of OpenMP runtime calls deduplicated");
@@ -48,6 +50,8 @@ STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
           "Number of OpenMP runtime functions identified");
 STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
           "Number of OpenMP runtime function uses identified");
+STATISTIC(NumOpenMPTargetRegionKernels,
+          "Number of OpenMP target region entry points (=kernels) identified");
 
 #if !defined(NDEBUG)
 static constexpr auto TAG = "[" DEBUG_TYPE "]";
@@ -99,9 +103,10 @@ struct AAICVTracker;
 struct OMPInformationCache : public InformationCache {
   OMPInformationCache(Module &M, AnalysisGetter &AG,
                       BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
-                      SmallPtrSetImpl<Function *> &ModuleSlice)
+                      SmallPtrSetImpl<Function *> &ModuleSlice,
+                      SmallPtrSetImpl<Kernel> &Kernels)
       : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice),
-        OMPBuilder(M) {
+        OMPBuilder(M), Kernels(Kernels) {
     OMPBuilder.initialize();
     initializeRuntimeFunctions();
     initializeInternalControlVars();
@@ -399,6 +404,9 @@ struct OMPInformationCache : public InformationCache {
 
     // TODO: We should attach the attributes defined in OMPKinds.def.
   }
+
+  /// Collection of known kernels (\see Kernel) in the module.
+  SmallPtrSetImpl<Kernel> &Kernels;
 };
 
 struct OpenMPOpt {
@@ -423,26 +431,10 @@ struct OpenMPOpt {
                       << " functions in a slice with "
                       << OMPInfoCache.ModuleSlice.size() << " functions\n");
 
-    /// Print initial ICV values for testing.
-    /// FIXME: This should be done from the Attributor once it is added.
-    if (PrintICVValues) {
-      InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel};
-
-      for (Function *F : OMPInfoCache.ModuleSlice) {
-        for (auto ICV : ICVs) {
-          auto ICVInfo = OMPInfoCache.ICVs[ICV];
-          auto Remark = [&](OptimizationRemark OR) {
-            return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
-                      << " Value: "
-                      << (ICVInfo.InitValue
-                              ? ICVInfo.InitValue->getValue().toString(10, true)
-                              : "IMPLEMENTATION_DEFINED");
-          };
-
-          emitRemarkOnFunction(F, "OpenMPICVTracker", Remark);
-        }
-      }
-    }
+    if (PrintICVValues)
+      printICVs();
+    if (PrintOpenMPKernels)
+      printKernels();
 
     Changed |= runAttributor();
 
@@ -455,6 +447,42 @@ struct OpenMPOpt {
     return Changed;
   }
 
+  /// Print initial ICV values for testing.
+  /// FIXME: This should be done from the Attributor once it is added.
+  void printICVs() const {
+    InternalControlVar ICVs[] = {ICV_nthreads, ICV_active_levels, ICV_cancel};
+
+    for (Function *F : OMPInfoCache.ModuleSlice) {
+      for (auto ICV : ICVs) {
+        auto ICVInfo = OMPInfoCache.ICVs[ICV];
+        auto Remark = [&](OptimizationRemark OR) {
+          return OR << "OpenMP ICV " << ore::NV("OpenMPICV", ICVInfo.Name)
+                    << " Value: "
+                    << (ICVInfo.InitValue
+                            ? ICVInfo.InitValue->getValue().toString(10, true)
+                            : "IMPLEMENTATION_DEFINED");
+        };
+
+        emitRemarkOnFunction(F, "OpenMPICVTracker", Remark);
+      }
+    }
+  }
+
+  /// Print OpenMP GPU kernels for testing.
+  void printKernels() const {
+    for (Function *F : SCC) {
+      if (!OMPInfoCache.Kernels.count(F))
+        continue;
+
+      auto Remark = [&](OptimizationRemark OR) {
+        return OR << "OpenMP GPU kernel "
+                  << ore::NV("OpenMPGPUKernel", F->getName()) << "\n";
+      };
+
+      emitRemarkOnFunction(F, "OpenMPGPU", Remark);
+    }
+  }
+
   /// Return the call if \p U is a callee use in a regular call. If \p RFI is
   /// given it has to be the callee or a nullptr is returned.
   static CallInst *getCallIfRegularCall(
@@ -775,7 +803,7 @@ struct OpenMPOpt {
   template <typename RemarkKind,
             typename RemarkCallBack = function_ref<RemarkKind(RemarkKind &&)>>
   void emitRemark(Instruction *Inst, StringRef RemarkName,
-                  RemarkCallBack &&RemarkCB) {
+                  RemarkCallBack &&RemarkCB) const {
     Function *F = Inst->getParent()->getParent();
     auto &ORE = OREGetter(F);
 
@@ -785,9 +813,10 @@ struct OpenMPOpt {
 
   /// Emit a remark on a function. Since only OptimizationRemark is supporting
   /// this, it can't be made generic.
-  void emitRemarkOnFunction(
-      Function *F, StringRef RemarkName,
-      function_ref<OptimizationRemark(OptimizationRemark &&)> &&RemarkCB) {
+  void
+  emitRemarkOnFunction(Function *F, StringRef RemarkName,
+                       function_ref<OptimizationRemark(OptimizationRemark &&)>
+                           &&RemarkCB) const {
     auto &ORE = OREGetter(F);
 
     ORE.emit([&]() {
@@ -1044,7 +1073,8 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
   SetVector<Function *> Functions(SCC.begin(), SCC.end());
   BumpPtrAllocator Allocator;
   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
-                                /*CGSCC*/ &Functions, ModuleSlice);
+                                /*CGSCC*/ &Functions, ModuleSlice,
+                                OMPInModule.getKernels());
 
   Attributor A(Functions, InfoCache, CGUpdater);
 
@@ -1109,9 +1139,9 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass {
     AnalysisGetter AG;
     SetVector<Function *> Functions(SCC.begin(), SCC.end());
     BumpPtrAllocator Allocator;
-    OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG,
-                                  Allocator,
-                                  /*CGSCC*/ &Functions, ModuleSlice);
+    OMPInformationCache InfoCache(
+        *(Functions.back()->getParent()), AG, Allocator,
+        /*CGSCC*/ &Functions, ModuleSlice, OMPInModule.getKernels());
 
     Attributor A(Functions, InfoCache, CGUpdater);
 
@@ -1125,14 +1155,45 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass {
 
 } // end anonymous namespace
 
+void OpenMPInModule::identifyKernels(Module &M) {
+
+  NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+  if (!MD)
+    return;
+
+  for (auto *Op : MD->operands()) {
+    if (Op->getNumOperands() < 2)
+      continue;
+    MDString *KindID = dyn_cast<MDString>(Op->getOperand(1));
+    if (!KindID || KindID->getString() != "kernel")
+      continue;
+
+    Function *KernelFn =
+        mdconst::dyn_extract_or_null<Function>(Op->getOperand(0));
+    if (!KernelFn)
+      continue;
+
+    ++NumOpenMPTargetRegionKernels;
+
+    Kernels.insert(KernelFn);
+  }
+}
+
 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
   if (OMPInModule.isKnown())
     return OMPInModule;
-
 #define OMP_RTL(_Enum, _Name, ...)                                             \
-  if (M.getFunction(_Name))                                                    \
-    return OMPInModule = true;
+  else if (M.getFunction(_Name)) OMPInModule = true;
 #include "llvm/Frontend/OpenMP/OMPKinds.def"
+
+  // Identify kernels once. TODO: We should split the OMPInformationCache into a
+  // module and an SCC part. The kernel information, among other things, could
+  // go into the module part.
+  if (OMPInModule.isKnown() && OMPInModule) {
+    OMPInModule.identifyKernels(M);
+    return true;
+  }
+
   return OMPInModule = false;
 }
 
diff --git a/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
new file mode 100644
index 0000000000000..ccdf0b981dc2c
--- /dev/null
+++ b/llvm/test/Transforms/OpenMP/gpu_kernel_detection_remarks.ll
@@ -0,0 +1,27 @@
+; RUN: opt -passes=openmpopt -pass-remarks=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel
+; RUN: opt        -openmpopt -pass-remarks=openmp-opt -openmp-print-gpu-kernels -disable-output < %s 2>&1 | FileCheck %s --implicit-check-not=non_kernel
+
+; CHECK-DAG: remark: <unknown>:0:0: OpenMP GPU kernel kernel1
+; CHECK-DAG: remark: <unknown>:0:0: OpenMP GPU kernel kernel2
+
+define void @kernel1() {
+  ret void
+}
+
+define void @kernel2() {
+  ret void
+}
+
+define void @non_kernel() {
+  ret void
+}
+
+; Needed to trigger the openmp-opt pass
+declare dso_local void @__kmpc_kernel_prepare_parallel(i8*)
+
+!nvvm.annotations = !{!2, !0, !1, !3, !1, !2}
+
+!0 = !{void ()* @kernel1, !"kernel", i32 1}
+!1 = !{void ()* @non_kernel, !"non_kernel", i32 1}
+!2 = !{null, !"align", i32 1}
+!3 = !{void ()* @kernel2, !"kernel", i32 1}

From 624d34afff5de099a6f84e678c81055556c3d42d Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Mon, 6 Jul 2020 19:26:01 -0500
Subject: [PATCH 008/771] [OpenMP] Compute a proper module slice for the CGSCCC
 pass

The module slice describes which functions we can analyze and transform
while working on an SCC as part of the CGSCC OpenMPOpt pass. So far, we
simply restricted it to the SCC. In a follow up we will need to have a
bigger scope which is why this patch introduces a proper identification
of the module slice. In short, everything that has a transitive
reference to a function in the SCC or is transitively referenced by one
is fair game.

Reviewed By: sstefan1

Differential Revision: https://reviews.llvm.org/D83270
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 107 +++++++++++++++++++-------
 1 file changed, 80 insertions(+), 27 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index f0fc8a6c8c4ac..38647b5eae688 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -57,6 +57,28 @@ STATISTIC(NumOpenMPTargetRegionKernels,
 static constexpr auto TAG = "[" DEBUG_TYPE "]";
 #endif
 
+/// Apply \p CB to all uses of \p F. If \p LookThroughConstantExprUses is
+/// true, constant expression users are not given to \p CB but their uses are
+/// traversed transitively.
+template <typename CBTy>
+static void foreachUse(Function &F, CBTy CB,
+                       bool LookThroughConstantExprUses = true) {
+  SmallVector<Use *, 8> Worklist(make_pointer_range(F.uses()));
+
+  for (unsigned idx = 0; idx < Worklist.size(); ++idx) {
+    Use &U = *Worklist[idx];
+
+    // Allow use in constant bitcasts and simply look through them.
+    if (LookThroughConstantExprUses && isa<ConstantExpr>(U.getUser())) {
+      for (Use &CEU : cast<ConstantExpr>(U.getUser())->uses())
+        Worklist.push_back(&CEU);
+      continue;
+    }
+
+    CB(U);
+  }
+}
+
 /// Helper struct to store tracked ICV values at specif instructions.
 struct ICVValue {
   Instruction *Inst;
@@ -102,11 +124,12 @@ struct AAICVTracker;
 /// Attributor runs.
 struct OMPInformationCache : public InformationCache {
   OMPInformationCache(Module &M, AnalysisGetter &AG,
-                      BumpPtrAllocator &Allocator, SetVector<Function *> *CGSCC,
-                      SmallPtrSetImpl<Function *> &ModuleSlice,
+                      BumpPtrAllocator &Allocator, SetVector<Function *> &CGSCC,
                       SmallPtrSetImpl<Kernel> &Kernels)
-      : InformationCache(M, AG, Allocator, CGSCC), ModuleSlice(ModuleSlice),
-        OMPBuilder(M), Kernels(Kernels) {
+      : InformationCache(M, AG, Allocator, &CGSCC), OMPBuilder(M),
+        Kernels(Kernels) {
+    initializeModuleSlice(CGSCC);
+
     OMPBuilder.initialize();
     initializeRuntimeFunctions();
     initializeInternalControlVars();
@@ -196,20 +219,20 @@ struct OMPInformationCache : public InformationCache {
     /// Run the callback \p CB on each use and forget the use if the result is
     /// true. The callback will be fed the function in which the use was
     /// encountered as second argument.
-    void foreachUse(function_ref<bool(Use &, Function &)> CB) {
-      for (auto &It : UsesMap)
-        foreachUse(CB, It.first, It.second.get());
+    void foreachUse(SmallVectorImpl<Function *> &SCC,
+                    function_ref<bool(Use &, Function &)> CB) {
+      for (Function *F : SCC)
+        foreachUse(CB, F);
     }
 
     /// Run the callback \p CB on each use within the function \p F and forget
     /// the use if the result is true.
-    void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F,
-                    UseVector *Uses = nullptr) {
+    void foreachUse(function_ref<bool(Use &, Function &)> CB, Function *F) {
       SmallVector<unsigned, 8> ToBeDeleted;
       ToBeDeleted.clear();
 
       unsigned Idx = 0;
-      UseVector &UV = Uses ? *Uses : getOrCreateUseVector(F);
+      UseVector &UV = getOrCreateUseVector(F);
 
       for (Use *U : UV) {
         if (CB(*U, *F))
@@ -232,8 +255,45 @@ struct OMPInformationCache : public InformationCache {
     DenseMap<Function *, std::shared_ptr<UseVector>> UsesMap;
   };
 
+  /// Initialize the ModuleSlice member based on \p SCC. ModuleSlices contains
+  /// (a subset of) all functions that we can look at during this SCC traversal.
+  /// This includes functions (transitively) called from the SCC and the
+  /// (transitive) callers of SCC functions. We also can look at a function if
+  /// there is a "reference edge", i.a., if the function somehow uses (!=calls)
+  /// a function in the SCC or a caller of a function in the SCC.
+  void initializeModuleSlice(SetVector<Function *> &SCC) {
+    ModuleSlice.insert(SCC.begin(), SCC.end());
+
+    SmallPtrSet<Function *, 16> Seen;
+    SmallVector<Function *, 16> Worklist(SCC.begin(), SCC.end());
+    while (!Worklist.empty()) {
+      Function *F = Worklist.pop_back_val();
+      ModuleSlice.insert(F);
+
+      for (Instruction &I : instructions(*F))
+        if (auto *CB = dyn_cast<CallBase>(&I))
+          if (Function *Callee = CB->getCalledFunction())
+            if (Seen.insert(Callee).second)
+              Worklist.push_back(Callee);
+    }
+
+    Seen.clear();
+    Worklist.append(SCC.begin(), SCC.end());
+    while (!Worklist.empty()) {
+      Function *F = Worklist.pop_back_val();
+      ModuleSlice.insert(F);
+
+      // Traverse all transitive uses.
+      foreachUse(*F, [&](Use &U) {
+        if (auto *UsrI = dyn_cast<Instruction>(U.getUser()))
+          if (Seen.insert(UsrI->getFunction()).second)
+            Worklist.push_back(UsrI->getFunction());
+      });
+    }
+  }
+
   /// The slice of the module we are allowed to look at.
-  SmallPtrSetImpl<Function *> &ModuleSlice;
+  SmallPtrSet<Function *, 8> ModuleSlice;
 
   /// An OpenMP-IR-Builder instance
   OpenMPIRBuilder OMPBuilder;
@@ -548,7 +608,7 @@ struct OpenMPOpt {
       return true;
     };
 
-    RFI.foreachUse(DeleteCallCB);
+    RFI.foreachUse(SCC, DeleteCallCB);
 
     return Changed;
   }
@@ -633,7 +693,7 @@ struct OpenMPOpt {
                                   /* GlobalOnly */ true, SingleChoice);
       return false;
     };
-    RFI.foreachUse(CombineIdentStruct);
+    RFI.foreachUse(SCC, CombineIdentStruct);
 
     if (!Ident || !SingleChoice) {
       // The IRBuilder uses the insertion block to get to the module, this is
@@ -733,7 +793,7 @@ struct OpenMPOpt {
       Changed = true;
       return true;
     };
-    RFI.foreachUse(ReplaceAndDeleteCB);
+    RFI.foreachUse(SCC, ReplaceAndDeleteCB);
 
     return Changed;
   }
@@ -776,7 +836,7 @@ struct OpenMPOpt {
     OMPInformationCache::RuntimeFunctionInfo &GlobThreadNumRFI =
         OMPInfoCache.RFIs[OMPRTL___kmpc_global_thread_num];
 
-    GlobThreadNumRFI.foreachUse([&](Use &U, Function &F) {
+    GlobThreadNumRFI.foreachUse(SCC, [&](Use &U, Function &F) {
       if (CallInst *CI = getCallIfRegularCall(U, &GlobThreadNumRFI))
         AddUserArgs(*CI);
       return false;
@@ -938,7 +998,7 @@ struct AAICVTrackerFunction : public AAICVTracker {
       return true;
     };
 
-    GetterRFI.foreachUse(ReplaceAndDeleteCB);
+    GetterRFI.foreachUse(ReplaceAndDeleteCB, getAnchorScope());
     return Changed;
   }
 
@@ -1048,12 +1108,9 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
   if (DisableOpenMPOptimizations)
     return PreservedAnalyses::all();
 
-  SmallPtrSet<Function *, 16> ModuleSlice;
   SmallVector<Function *, 16> SCC;
-  for (LazyCallGraph::Node &N : C) {
+  for (LazyCallGraph::Node &N : C)
     SCC.push_back(&N.getFunction());
-    ModuleSlice.insert(SCC.back());
-  }
 
   if (SCC.empty())
     return PreservedAnalyses::all();
@@ -1073,8 +1130,7 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
   SetVector<Function *> Functions(SCC.begin(), SCC.end());
   BumpPtrAllocator Allocator;
   OMPInformationCache InfoCache(*(Functions.back()->getParent()), AG, Allocator,
-                                /*CGSCC*/ &Functions, ModuleSlice,
-                                OMPInModule.getKernels());
+                                /*CGSCC*/ Functions, OMPInModule.getKernels());
 
   Attributor A(Functions, InfoCache, CGUpdater);
 
@@ -1112,14 +1168,11 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass {
     if (DisableOpenMPOptimizations || skipSCC(CGSCC))
       return false;
 
-    SmallPtrSet<Function *, 16> ModuleSlice;
     SmallVector<Function *, 16> SCC;
     for (CallGraphNode *CGN : CGSCC)
       if (Function *Fn = CGN->getFunction())
-        if (!Fn->isDeclaration()) {
+        if (!Fn->isDeclaration())
           SCC.push_back(Fn);
-          ModuleSlice.insert(Fn);
-        }
 
     if (SCC.empty())
       return false;
@@ -1141,7 +1194,7 @@ struct OpenMPOptLegacyPass : public CallGraphSCCPass {
     BumpPtrAllocator Allocator;
     OMPInformationCache InfoCache(
         *(Functions.back()->getParent()), AG, Allocator,
-        /*CGSCC*/ &Functions, ModuleSlice, OMPInModule.getKernels());
+        /*CGSCC*/ Functions, OMPInModule.getKernels());
 
     Attributor A(Functions, InfoCache, CGUpdater);
 

From 5b0581aedc2252481462970503d1085dc27e65eb Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Mon, 6 Jul 2020 19:57:37 -0500
Subject: [PATCH 009/771] [OpenMP] Replace function pointer uses in GPU state
 machine

In non-SPMD mode we create a state machine like code to identify the
parallel region the GPU worker threads should execute next. The
identification uses the parallel region function pointer as that allows
it to work even if the kernel (=target region) and the parallel region
are in separate TUs. However, taking the address of a function comes
with various downsides. With this patch we will identify the most common
situation and replace the function pointer use with a dummy global
symbol (for identification purposes only). That means, if the parallel
region is only called from a single target region (or kernel), we do not
use the function pointer of the parallel region to identify it but a new
global symbol.

Fixes PR46450.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D83271
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp         | 164 ++++++++++++++++++
 ..._state_machine_function_ptr_replacement.ll | 153 ++++++++++++++++
 2 files changed, 317 insertions(+)
 create mode 100644 llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 38647b5eae688..4df65f81912ba 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -52,6 +52,9 @@ STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
           "Number of OpenMP runtime function uses identified");
 STATISTIC(NumOpenMPTargetRegionKernels,
           "Number of OpenMP target region entry points (=kernels) identified");
+STATISTIC(
+    NumOpenMPParallelRegionsReplacedInGPUStateMachine,
+    "Number of OpenMP parallel regions replaced with ID in GPU state machines");
 
 #if !defined(NDEBUG)
 static constexpr auto TAG = "[" DEBUG_TYPE "]";
@@ -496,6 +499,8 @@ struct OpenMPOpt {
     if (PrintOpenMPKernels)
       printKernels();
 
+    Changed |= rewriteDeviceCodeStateMachine();
+
     Changed |= runAttributor();
 
     // Recollect uses, in case Attributor deleted any.
@@ -849,6 +854,31 @@ struct OpenMPOpt {
       AddUserArgs(*GTIdArgs[u]);
   }
 
+  /// Kernel (=GPU) optimizations and utility functions
+  ///
+  ///{{
+
+  /// Check if \p F is a kernel, hence entry point for target offloading.
+  bool isKernel(Function &F) { return OMPInfoCache.Kernels.count(&F); }
+
+  /// Cache to remember the unique kernel for a function.
+  DenseMap<Function *, Optional<Kernel>> UniqueKernelMap;
+
+  /// Find the unique kernel that will execute \p F, if any.
+  Kernel getUniqueKernelFor(Function &F);
+
+  /// Find the unique kernel that will execute \p I, if any.
+  Kernel getUniqueKernelFor(Instruction &I) {
+    return getUniqueKernelFor(*I.getFunction());
+  }
+
+  /// Rewrite the device (=GPU) code state machine create in non-SPMD mode in
+  /// the cases we can avoid taking the address of a function.
+  bool rewriteDeviceCodeStateMachine();
+
+  ///
+  ///}}
+
   /// Emit a remark generically
   ///
   /// This template function can be used to generically emit a remark. The
@@ -930,6 +960,140 @@ struct OpenMPOpt {
   }
 };
 
+Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
+  if (!OMPInfoCache.ModuleSlice.count(&F))
+    return nullptr;
+
+  // Use a scope to keep the lifetime of the CachedKernel short.
+  {
+    Optional<Kernel> &CachedKernel = UniqueKernelMap[&F];
+    if (CachedKernel)
+      return *CachedKernel;
+
+    // TODO: We should use an AA to create an (optimistic and callback
+    //       call-aware) call graph. For now we stick to simple patterns that
+    //       are less powerful, basically the worst fixpoint.
+    if (isKernel(F)) {
+      CachedKernel = Kernel(&F);
+      return *CachedKernel;
+    }
+
+    CachedKernel = nullptr;
+    if (!F.hasLocalLinkage())
+      return nullptr;
+  }
+
+  auto GetUniqueKernelForUse = [&](const Use &U) -> Kernel {
+    if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
+      // Allow use in equality comparisons.
+      if (Cmp->isEquality())
+        return getUniqueKernelFor(*Cmp);
+      return nullptr;
+    }
+    if (auto *CB = dyn_cast<CallBase>(U.getUser())) {
+      // Allow direct calls.
+      if (CB->isCallee(&U))
+        return getUniqueKernelFor(*CB);
+      // Allow the use in __kmpc_kernel_prepare_parallel calls.
+      if (Function *Callee = CB->getCalledFunction())
+        if (Callee->getName() == "__kmpc_kernel_prepare_parallel")
+          return getUniqueKernelFor(*CB);
+      return nullptr;
+    }
+    // Disallow every other use.
+    return nullptr;
+  };
+
+  // TODO: In the future we want to track more than just a unique kernel.
+  SmallPtrSet<Kernel, 2> PotentialKernels;
+  foreachUse(F, [&](const Use &U) {
+    PotentialKernels.insert(GetUniqueKernelForUse(U));
+  });
+
+  Kernel K = nullptr;
+  if (PotentialKernels.size() == 1)
+    K = *PotentialKernels.begin();
+
+  // Cache the result.
+  UniqueKernelMap[&F] = K;
+
+  return K;
+}
+
+bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
+  constexpr unsigned KMPC_KERNEL_PARALLEL_WORK_FN_PTR_ARG_NO = 0;
+
+  OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI =
+      OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel];
+
+  bool Changed = false;
+  if (!KernelPrepareParallelRFI)
+    return Changed;
+
+  for (Function *F : SCC) {
+
+    // Check if the function is uses in a __kmpc_kernel_prepare_parallel call at
+    // all.
+    bool UnknownUse = false;
+    unsigned NumDirectCalls = 0;
+
+    SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
+    foreachUse(*F, [&](Use &U) {
+      if (auto *CB = dyn_cast<CallBase>(U.getUser()))
+        if (CB->isCallee(&U)) {
+          ++NumDirectCalls;
+          return;
+        }
+
+      if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
+        ToBeReplacedStateMachineUses.push_back(&U);
+        return;
+      }
+      if (CallInst *CI = OpenMPOpt::getCallIfRegularCall(
+              *U.getUser(), &KernelPrepareParallelRFI)) {
+        ToBeReplacedStateMachineUses.push_back(&U);
+        return;
+      }
+      UnknownUse = true;
+    });
+
+    // If this ever hits, we should investigate.
+    if (UnknownUse || NumDirectCalls != 1)
+      continue;
+
+    // TODO: This is not a necessary restriction and should be lifted.
+    if (ToBeReplacedStateMachineUses.size() != 2)
+      continue;
+
+    // Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give
+    // up if the function is not called from a unique kernel.
+    Kernel K = getUniqueKernelFor(*F);
+    if (!K)
+      continue;
+
+    // We now know F is a parallel body function called only from the kernel K.
+    // We also identified the state machine uses in which we replace the
+    // function pointer by a new global symbol for identification purposes. This
+    // ensures only direct calls to the function are left.
+
+    Module &M = *F->getParent();
+    Type *Int8Ty = Type::getInt8Ty(M.getContext());
+
+    auto *ID = new GlobalVariable(
+        M, Int8Ty, /* isConstant */ true, GlobalValue::PrivateLinkage,
+        UndefValue::get(Int8Ty), F->getName() + ".ID");
+
+    for (Use *U : ToBeReplacedStateMachineUses)
+      U->set(ConstantExpr::getBitCast(ID, U->get()->getType()));
+
+    ++NumOpenMPParallelRegionsReplacedInGPUStateMachine;
+
+    Changed = true;
+  }
+
+  return Changed;
+}
+
 /// Abstract Attribute for tracking ICV values.
 struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
   using Base = StateWrapper<BooleanState, AbstractAttribute>;
diff --git a/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
new file mode 100644
index 0000000000000..0a8d7a9d231ae
--- /dev/null
+++ b/llvm/test/Transforms/OpenMP/gpu_state_machine_function_ptr_replacement.ll
@@ -0,0 +1,153 @@
+; RUN: opt -S -passes=openmpopt -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s
+; RUN: opt -S        -openmpopt -pass-remarks=openmp-opt -openmp-print-gpu-kernels < %s | FileCheck %s
+
+; C input used for this test:
+
+; void bar(void) {
+;     #pragma omp parallel
+;     { }
+; }
+; void foo(void) {
+;   #pragma omp target teams
+;   {
+;     #pragma omp parallel
+;     {}
+;     bar();
+;     #pragma omp parallel
+;     {}
+;   }
+; }
+
+; Verify we replace the function pointer uses for the first and last outlined
+; region (1 and 3) but not for the middle one (2) because it could be called from
+; another kernel.
+
+; CHECK-DAG: @__omp_outlined__1_wrapper.ID = private constant i8 undef
+; CHECK-DAG: @__omp_outlined__3_wrapper.ID = private constant i8 undef
+
+; CHECK-DAG:   icmp eq i8* %5, @__omp_outlined__1_wrapper.ID
+; CHECK-DAG:   icmp eq i8* %7, @__omp_outlined__3_wrapper.ID
+
+; CHECK-DAG:   call void @__kmpc_kernel_prepare_parallel(i8* @__omp_outlined__1_wrapper.ID)
+; CHECK-DAG:   call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__2_wrapper to i8*))
+; CHECK-DAG:   call void @__kmpc_kernel_prepare_parallel(i8* @__omp_outlined__3_wrapper.ID)
+
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+define internal void @__omp_offloading_35_a1e179_foo_l7_worker() {
+entry:
+  %work_fn = alloca i8*, align 8
+  %exec_status = alloca i8, align 1
+  store i8* null, i8** %work_fn, align 8
+  store i8 0, i8* %exec_status, align 1
+  br label %.await.work
+
+.await.work:                                      ; preds = %.barrier.parallel, %entry
+  call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
+  %0 = call i1 @__kmpc_kernel_parallel(i8** %work_fn)
+  %1 = zext i1 %0 to i8
+  store i8 %1, i8* %exec_status, align 1
+  %2 = load i8*, i8** %work_fn, align 8
+  %should_terminate = icmp eq i8* %2, null
+  br i1 %should_terminate, label %.exit, label %.select.workers
+
+.select.workers:                                  ; preds = %.await.work
+  %3 = load i8, i8* %exec_status, align 1
+  %is_active = icmp ne i8 %3, 0
+  br i1 %is_active, label %.execute.parallel, label %.barrier.parallel
+
+.execute.parallel:                                ; preds = %.select.workers
+  %4 = call i32 @__kmpc_global_thread_num(%struct.ident_t* null)
+  %5 = load i8*, i8** %work_fn, align 8
+  %work_match = icmp eq i8* %5, bitcast (void ()* @__omp_outlined__1_wrapper to i8*)
+  br i1 %work_match, label %.execute.fn, label %.check.next
+
+.execute.fn:                                      ; preds = %.execute.parallel
+  call void @__omp_outlined__1_wrapper()
+  br label %.terminate.parallel
+
+.check.next:                                      ; preds = %.execute.parallel
+  %6 = load i8*, i8** %work_fn, align 8
+  %work_match1 = icmp eq i8* %6, bitcast (void ()* @__omp_outlined__2_wrapper to i8*)
+  br i1 %work_match1, label %.execute.fn2, label %.check.next3
+
+.execute.fn2:                                     ; preds = %.check.next
+  call void @__omp_outlined__2_wrapper()
+  br label %.terminate.parallel
+
+.check.next3:                                     ; preds = %.check.next
+  %7 = load i8*, i8** %work_fn, align 8
+  %work_match4 = icmp eq i8* %7, bitcast (void ()* @__omp_outlined__3_wrapper to i8*)
+  br i1 %work_match4, label %.execute.fn5, label %.check.next6
+
+.execute.fn5:                                     ; preds = %.check.next3
+  call void @__omp_outlined__3_wrapper()
+  br label %.terminate.parallel
+
+.check.next6:                                     ; preds = %.check.next3
+  %8 = bitcast i8* %2 to void ()*
+  call void %8()
+  br label %.terminate.parallel
+
+.terminate.parallel:                              ; preds = %.check.next6, %.execute.fn5, %.execute.fn2, %.execute.fn
+  call void @__kmpc_kernel_end_parallel()
+  br label %.barrier.parallel
+
+.barrier.parallel:                                ; preds = %.terminate.parallel, %.select.workers
+  call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0)
+  br label %.await.work
+
+.exit:                                            ; preds = %.await.work
+  ret void
+}
+
+define weak void @__omp_offloading_35_a1e179_foo_l7() {
+  call void @__omp_offloading_35_a1e179_foo_l7_worker()
+  call void @__omp_outlined__()
+  ret void
+}
+
+define internal void @__omp_outlined__() {
+  call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__1_wrapper to i8*))
+  call void @bar()
+  call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__3_wrapper to i8*))
+  ret void
+}
+
+define internal void @__omp_outlined__1() {
+  ret void
+}
+
+define internal void @__omp_outlined__1_wrapper() {
+  call void @__omp_outlined__1()
+  ret void
+}
+
+define hidden void @bar() {
+  call void @__kmpc_kernel_prepare_parallel(i8* bitcast (void ()* @__omp_outlined__2_wrapper to i8*))
+  ret void
+}
+
+define internal void @__omp_outlined__2_wrapper() {
+  ret void
+}
+
+define internal void @__omp_outlined__3_wrapper() {
+  ret void
+}
+
+declare void @__kmpc_kernel_prepare_parallel(i8* %WorkFn)
+
+declare zeroext i1 @__kmpc_kernel_parallel(i8** nocapture %WorkFn)
+
+declare void @__kmpc_kernel_end_parallel()
+
+declare void @__kmpc_barrier_simple_spmd(%struct.ident_t* nocapture readnone %loc_ref, i32 %tid)
+
+declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture readnone)
+
+
+!nvvm.annotations = !{!0}
+
+!0 = !{void ()* @__omp_offloading_35_a1e179_foo_l7, !"kernel", i32 1}

From c44702bcdf8aa829e28399d0d4ac4bfc5ac4fff1 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Sat, 11 Jul 2020 07:17:28 +0000
Subject: [PATCH 010/771] Remove unused variable
 `KMPC_KERNEL_PARALLEL_WORK_FN_PTR_ARG_NO` (NFC)

This fixes a compiler warning.
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 4df65f81912ba..7d93e78357b34 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1021,8 +1021,6 @@ Kernel OpenMPOpt::getUniqueKernelFor(Function &F) {
 }
 
 bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
-  constexpr unsigned KMPC_KERNEL_PARALLEL_WORK_FN_PTR_ARG_NO = 0;
-
   OMPInformationCache::RuntimeFunctionInfo &KernelPrepareParallelRFI =
       OMPInfoCache.RFIs[OMPRTL___kmpc_kernel_prepare_parallel];
 

From dce6bc18c4e1d086182f9faa3f984912566a3c20 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Sat, 11 Jul 2020 02:36:07 -0500
Subject: [PATCH 011/771] [OpenMP][FIX] remove unused variable and long if-else
 chain

MSVC throws an error if you use "too many" if-else in a row:
  `Frontend/OpenMP/OMPKinds.def(570): fatal error C1061: compiler limit:
    blocks nested too deeply`
We work around it now...
---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 7d93e78357b34..f25e95466407d 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1397,9 +1397,17 @@ void OpenMPInModule::identifyKernels(Module &M) {
 bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
   if (OMPInModule.isKnown())
     return OMPInModule;
+
+  // MSVC doesn't like long if-else chains for some reason and instead just
+  // issues an error. Work around it..
+  do {
 #define OMP_RTL(_Enum, _Name, ...)                                             \
-  else if (M.getFunction(_Name)) OMPInModule = true;
+  if (M.getFunction(_Name)) {                                                  \
+    OMPInModule = true;                                                        \
+    break;                                                                     \
+  }
 #include "llvm/Frontend/OpenMP/OMPKinds.def"
+  } while (false);
 
   // Identify kernels once. TODO: We should split the OMPInformationCache into a
   // module and an SCC part. The kernel information, among other things, could

From c3bdc9814d947946bf8e1062f6bf41b7f8813f80 Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Sat, 11 Jul 2020 10:10:59 +0100
Subject: [PATCH 012/771] [clang-tidy] Reworked enum options handling(again)

Reland b9306fd after fixing the issue causing mac builds to fail unittests.

Following on from D77085, I was never happy with the passing a mapping to the option get/store functions. This patch addresses this by using explicit specializations to handle the serializing and deserializing of enum options.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D82188
---
 .../clang-tidy/ClangTidyCheck.cpp             | 28 ++++---
 clang-tools-extra/clang-tidy/ClangTidyCheck.h | 80 +++++++++++--------
 .../abseil/StringFindStartswithCheck.cpp      |  4 +-
 .../cppcoreguidelines/InitVariablesCheck.cpp  |  4 +-
 .../ProBoundsConstantArrayIndexCheck.cpp      |  1 -
 .../clang-tidy/modernize/LoopConvertCheck.cpp | 54 +++++++------
 .../modernize/MakeSmartPtrCheck.cpp           |  4 +-
 .../clang-tidy/modernize/PassByValueCheck.cpp |  4 +-
 .../modernize/ReplaceAutoPtrCheck.cpp         |  4 +-
 .../modernize/ReplaceRandomShuffleCheck.cpp   |  4 +-
 .../performance/MoveConstructorInitCheck.cpp  |  4 +-
 .../TypePromotionInMathFnCheck.cpp            |  4 +-
 .../UnnecessaryValueParamCheck.cpp            |  4 +-
 .../readability/IdentifierNamingCheck.cpp     | 34 +++++---
 .../readability/IdentifierNamingCheck.h       |  6 ++
 .../clang-tidy/utils/IncludeSorter.cpp        | 13 +--
 .../clang-tidy/utils/IncludeSorter.h          |  9 ++-
 .../utils/TransformerClangTidyCheck.cpp       |  5 +-
 .../clang-tidy/ClangTidyOptionsTest.cpp       | 51 +++++++-----
 19 files changed, 174 insertions(+), 143 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp
index 780a3569afdbe..e149978bcdeaf 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp
@@ -161,11 +161,13 @@ void ClangTidyCheck::OptionsView::store(ClangTidyOptions::OptionMap &Options,
   store(Options, LocalName, llvm::itostr(Value));
 }
 
-llvm::Expected<int64_t> ClangTidyCheck::OptionsView::getEnumInt(
-    StringRef LocalName, ArrayRef<std::pair<StringRef, int64_t>> Mapping,
-    bool CheckGlobal, bool IgnoreCase) {
-  auto Iter = CheckGlobal ? findPriorityOption(CheckOptions, NamePrefix, LocalName)
-                          : CheckOptions.find((NamePrefix + LocalName).str());
+llvm::Expected<int64_t>
+ClangTidyCheck::OptionsView::getEnumInt(StringRef LocalName,
+                                        ArrayRef<NameAndValue> Mapping,
+                                        bool CheckGlobal, bool IgnoreCase) {
+  auto Iter = CheckGlobal
+                  ? findPriorityOption(CheckOptions, NamePrefix, LocalName)
+                  : CheckOptions.find((NamePrefix + LocalName).str());
   if (Iter == CheckOptions.end())
     return llvm::make_error<MissingOptionError>((NamePrefix + LocalName).str());
 
@@ -174,19 +176,19 @@ llvm::Expected<int64_t> ClangTidyCheck::OptionsView::getEnumInt(
   unsigned EditDistance = -1;
   for (const auto &NameAndEnum : Mapping) {
     if (IgnoreCase) {
-      if (Value.equals_lower(NameAndEnum.first))
-        return NameAndEnum.second;
-    } else if (Value.equals(NameAndEnum.first)) {
-      return NameAndEnum.second;
-    } else if (Value.equals_lower(NameAndEnum.first)) {
-      Closest = NameAndEnum.first;
+      if (Value.equals_lower(NameAndEnum.second))
+        return NameAndEnum.first;
+    } else if (Value.equals(NameAndEnum.second)) {
+      return NameAndEnum.first;
+    } else if (Value.equals_lower(NameAndEnum.second)) {
+      Closest = NameAndEnum.second;
       EditDistance = 0;
       continue;
     }
-    unsigned Distance = Value.edit_distance(NameAndEnum.first);
+    unsigned Distance = Value.edit_distance(NameAndEnum.second);
     if (Distance < EditDistance) {
       EditDistance = Distance;
-      Closest = NameAndEnum.first;
+      Closest = NameAndEnum.second;
     }
   }
   if (EditDistance < 3)
diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.h b/clang-tools-extra/clang-tidy/ClangTidyCheck.h
index dfe01a8aaa30f..3c625ee0cb796 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyCheck.h
+++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.h
@@ -26,6 +26,13 @@ class SourceManager;
 
 namespace tidy {
 
+/// This class should be specialized by any enum type that needs to be converted
+/// to and from an \ref llvm::StringRef.
+template <class T> struct OptionEnumMapping {
+  // Specializations of this struct must implement this function.
+  static ArrayRef<std::pair<T, StringRef>> getEnumMapping() = delete;
+};
+
 template <typename T> class OptionError : public llvm::ErrorInfo<T> {
   std::error_code convertToErrorCode() const override {
     return llvm::inconvertibleErrorCode();
@@ -312,36 +319,38 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback {
     }
 
     /// Read a named option from the ``Context`` and parse it as an
-    /// enum type ``T`` using the \p Mapping provided. If \p IgnoreCase is set,
-    /// it will search the mapping ignoring the case.
+    /// enum type ``T``.
     ///
     /// Reads the option with the check-local name \p LocalName from the
     /// ``CheckOptions``. If the corresponding key is not present, returns a
     /// ``MissingOptionError``. If the key can't be parsed as a ``T`` returns a
     /// ``UnparseableEnumOptionError``.
+    ///
+    /// \ref clang::tidy::OptionEnumMapping must be specialized for ``T`` to
+    /// supply the mapping required to convert between ``T`` and a string.
     template <typename T>
     std::enable_if_t<std::is_enum<T>::value, llvm::Expected<T>>
-    get(StringRef LocalName, ArrayRef<std::pair<StringRef, T>> Mapping,
-        bool IgnoreCase = false) {
-      if (llvm::Expected<int64_t> ValueOr = getEnumInt(
-              LocalName, typeEraseMapping(Mapping), false, IgnoreCase))
+    get(StringRef LocalName, bool IgnoreCase = false) {
+      if (llvm::Expected<int64_t> ValueOr =
+              getEnumInt(LocalName, typeEraseMapping<T>(), false, IgnoreCase))
         return static_cast<T>(*ValueOr);
       else
         return std::move(ValueOr.takeError());
     }
 
     /// Read a named option from the ``Context`` and parse it as an
-    /// enum type ``T`` using the \p Mapping provided. If \p IgnoreCase is set,
-    /// it will search the mapping ignoring the case.
+    /// enum type ``T``.
     ///
     /// Reads the option with the check-local name \p LocalName from the
     /// ``CheckOptions``. If the corresponding key is not present or it can't be
     /// parsed as a ``T``, returns \p Default.
+    ///
+    /// \ref clang::tidy::OptionEnumMapping must be specialized for ``T`` to
+    /// supply the mapping required to convert between ``T`` and a string.
     template <typename T>
     std::enable_if_t<std::is_enum<T>::value, T>
-    get(StringRef LocalName, ArrayRef<std::pair<StringRef, T>> Mapping,
-        T Default, bool IgnoreCase = false) {
-      if (auto ValueOr = get(LocalName, Mapping, IgnoreCase))
+    get(StringRef LocalName, T Default, bool IgnoreCase = false) {
+      if (auto ValueOr = get<T>(LocalName, IgnoreCase))
         return *ValueOr;
       else
         logErrToStdErr(ValueOr.takeError());
@@ -349,40 +358,41 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback {
     }
 
     /// Read a named option from the ``Context`` and parse it as an
-    /// enum type ``T`` using the \p Mapping provided. If \p IgnoreCase is set,
-    /// it will search the mapping ignoring the case.
+    /// enum type ``T``.
     ///
     /// Reads the option with the check-local name \p LocalName from local or
     /// global ``CheckOptions``. Gets local option first. If local is not
     /// present, falls back to get global option. If global option is not
     /// present either, returns a ``MissingOptionError``. If the key can't be
     /// parsed as a ``T`` returns a ``UnparseableEnumOptionError``.
+    ///
+    /// \ref clang::tidy::OptionEnumMapping must be specialized for ``T`` to
+    /// supply the mapping required to convert between ``T`` and a string.
     template <typename T>
     std::enable_if_t<std::is_enum<T>::value, llvm::Expected<T>>
     getLocalOrGlobal(StringRef LocalName,
-                     ArrayRef<std::pair<StringRef, T>> Mapping,
                      bool IgnoreCase = false) {
-      if (llvm::Expected<int64_t> ValueOr = getEnumInt(
-              LocalName, typeEraseMapping(Mapping), true, IgnoreCase))
+      if (llvm::Expected<int64_t> ValueOr =
+              getEnumInt(LocalName, typeEraseMapping<T>(), true, IgnoreCase))
         return static_cast<T>(*ValueOr);
       else
         return std::move(ValueOr.takeError());
     }
 
     /// Read a named option from the ``Context`` and parse it as an
-    /// enum type ``T`` using the \p Mapping provided. If \p IgnoreCase is set,
-    /// it will search the mapping ignoring the case.
+    /// enum type ``T``.
     ///
     /// Reads the option with the check-local name \p LocalName from local or
     /// global ``CheckOptions``. Gets local option first. If local is not
     /// present, falls back to get global option. If global option is not
     /// present either or it can't be parsed as a ``T``, returns \p Default.
+    ///
+    /// \ref clang::tidy::OptionEnumMapping must be specialized for ``T`` to
+    /// supply the mapping required to convert between ``T`` and a string.
     template <typename T>
     std::enable_if_t<std::is_enum<T>::value, T>
-    getLocalOrGlobal(StringRef LocalName,
-                     ArrayRef<std::pair<StringRef, T>> Mapping, T Default,
-                     bool IgnoreCase = false) {
-      if (auto ValueOr = getLocalOrGlobal(LocalName, Mapping, IgnoreCase))
+    getLocalOrGlobal(StringRef LocalName, T Default, bool IgnoreCase = false) {
+      if (auto ValueOr = getLocalOrGlobal<T>(LocalName, IgnoreCase))
         return *ValueOr;
       else
         logErrToStdErr(ValueOr.takeError());
@@ -400,21 +410,25 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback {
                int64_t Value) const;
 
     /// Stores an option with the check-local name \p LocalName as the string
-    /// representation of the Enum \p Value using the \p Mapping to \p Options.
+    /// representation of the Enum \p Value to \p Options.
+    ///
+    /// \ref clang::tidy::OptionEnumMapping must be specialized for ``T`` to
+    /// supply the mapping required to convert between ``T`` and a string.
     template <typename T>
     std::enable_if_t<std::is_enum<T>::value>
-    store(ClangTidyOptions::OptionMap &Options, StringRef LocalName, T Value,
-          ArrayRef<std::pair<StringRef, T>> Mapping) {
+    store(ClangTidyOptions::OptionMap &Options, StringRef LocalName, T Value) {
+      ArrayRef<std::pair<T, StringRef>> Mapping =
+          OptionEnumMapping<T>::getEnumMapping();
       auto Iter = llvm::find_if(
-          Mapping, [&](const std::pair<StringRef, T> &NameAndEnum) {
-            return NameAndEnum.second == Value;
+          Mapping, [&](const std::pair<T, StringRef> &NameAndEnum) {
+            return NameAndEnum.first == Value;
           });
       assert(Iter != Mapping.end() && "Unknown Case Value");
-      store(Options, LocalName, Iter->first);
+      store(Options, LocalName, Iter->second);
     }
 
   private:
-    using NameAndValue = std::pair<StringRef, int64_t>;
+    using NameAndValue = std::pair<int64_t, StringRef>;
 
     llvm::Expected<int64_t> getEnumInt(StringRef LocalName,
                                        ArrayRef<NameAndValue> Mapping,
@@ -422,12 +436,14 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback {
 
     template <typename T>
     std::enable_if_t<std::is_enum<T>::value, std::vector<NameAndValue>>
-    typeEraseMapping(ArrayRef<std::pair<StringRef, T>> Mapping) {
+    typeEraseMapping() {
+      ArrayRef<std::pair<T, StringRef>> Mapping =
+          OptionEnumMapping<T>::getEnumMapping();
       std::vector<NameAndValue> Result;
       Result.reserve(Mapping.size());
       for (auto &MappedItem : Mapping) {
-        Result.emplace_back(MappedItem.first,
-                            static_cast<int64_t>(MappedItem.second));
+        Result.emplace_back(static_cast<int64_t>(MappedItem.first),
+                            MappedItem.second);
       }
       return Result;
     }
diff --git a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp
index df4dbd5ff180d..11bbcbcb527f5 100644
--- a/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp
+++ b/clang-tools-extra/clang-tidy/abseil/StringFindStartswithCheck.cpp
@@ -27,7 +27,6 @@ StringFindStartswithCheck::StringFindStartswithCheck(StringRef Name,
       StringLikeClasses(utils::options::parseStringList(
           Options.get("StringLikeClasses", "::std::basic_string"))),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            utils::IncludeSorter::getMapping(),
                                             utils::IncludeSorter::IS_LLVM)),
       AbseilStringsMatchHeader(
           Options.get("AbseilStringsMatchHeader", "absl/strings/match.h")) {}
@@ -122,8 +121,7 @@ void StringFindStartswithCheck::storeOptions(
     ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "StringLikeClasses",
                 utils::options::serializeStringList(StringLikeClasses));
-  Options.store(Opts, "IncludeStyle", IncludeStyle,
-                utils::IncludeSorter::getMapping());
+  Options.store(Opts, "IncludeStyle", IncludeStyle);
   Options.store(Opts, "AbseilStringsMatchHeader", AbseilStringsMatchHeader);
 }
 
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp
index 2be3bc4ab3cd1..f1755d3f9b855 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/InitVariablesCheck.cpp
@@ -27,13 +27,11 @@ InitVariablesCheck::InitVariablesCheck(StringRef Name,
                                        ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            utils::IncludeSorter::getMapping(),
                                             utils::IncludeSorter::IS_LLVM)),
       MathHeader(Options.get("MathHeader", "math.h")) {}
 
 void InitVariablesCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
-  Options.store(Opts, "IncludeStyle", IncludeStyle,
-                utils::IncludeSorter::getMapping());
+  Options.store(Opts, "IncludeStyle", IncludeStyle);
   Options.store(Opts, "MathHeader", MathHeader);
 }
 
diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp
index b48511287f883..dd0bedd742a40 100644
--- a/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp
+++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/ProBoundsConstantArrayIndexCheck.cpp
@@ -22,7 +22,6 @@ ProBoundsConstantArrayIndexCheck::ProBoundsConstantArrayIndexCheck(
     StringRef Name, ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context), GslHeader(Options.get("GslHeader", "")),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            utils::IncludeSorter::getMapping(),
                                             utils::IncludeSorter::IS_LLVM)) {}
 
 void ProBoundsConstantArrayIndexCheck::storeOptions(
diff --git a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp
index 215ba341f21fd..b90af1521baf5 100644
--- a/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/LoopConvertCheck.cpp
@@ -28,6 +28,31 @@ using namespace llvm;
 
 namespace clang {
 namespace tidy {
+
+template <> struct OptionEnumMapping<modernize::Confidence::Level> {
+  static llvm::ArrayRef<std::pair<modernize::Confidence::Level, StringRef>>
+  getEnumMapping() {
+    static constexpr std::pair<modernize::Confidence::Level, StringRef>
+        Mapping[] = {{modernize::Confidence::CL_Reasonable, "reasonable"},
+                     {modernize::Confidence::CL_Safe, "safe"},
+                     {modernize::Confidence::CL_Risky, "risky"}};
+    return makeArrayRef(Mapping);
+  }
+};
+
+template <> struct OptionEnumMapping<modernize::VariableNamer::NamingStyle> {
+  static llvm::ArrayRef<
+      std::pair<modernize::VariableNamer::NamingStyle, StringRef>>
+  getEnumMapping() {
+    static constexpr std::pair<modernize::VariableNamer::NamingStyle, StringRef>
+        Mapping[] = {{modernize::VariableNamer::NS_CamelCase, "CamelCase"},
+                     {modernize::VariableNamer::NS_CamelBack, "camelBack"},
+                     {modernize::VariableNamer::NS_LowerCase, "lower_case"},
+                     {modernize::VariableNamer::NS_UpperCase, "UPPER_CASE"}};
+    return makeArrayRef(Mapping);
+  }
+};
+
 namespace modernize {
 
 static const char LoopNameArray[] = "forLoopArray";
@@ -44,25 +69,6 @@ static const char EndVarName[] = "endVar";
 static const char DerefByValueResultName[] = "derefByValueResult";
 static const char DerefByRefResultName[] = "derefByRefResult";
 
-static ArrayRef<std::pair<StringRef, Confidence::Level>>
-getConfidenceMapping() {
-  static constexpr std::pair<StringRef, Confidence::Level> Mapping[] = {
-      {"reasonable", Confidence::CL_Reasonable},
-      {"safe", Confidence::CL_Safe},
-      {"risky", Confidence::CL_Risky}};
-  return makeArrayRef(Mapping);
-}
-
-static ArrayRef<std::pair<StringRef, VariableNamer::NamingStyle>>
-getStyleMapping() {
-  static constexpr std::pair<StringRef, VariableNamer::NamingStyle> Mapping[] =
-      {{"CamelCase", VariableNamer::NS_CamelCase},
-       {"camelBack", VariableNamer::NS_CamelBack},
-       {"lower_case", VariableNamer::NS_LowerCase},
-       {"UPPER_CASE", VariableNamer::NS_UpperCase}};
-  return makeArrayRef(Mapping);
-}
-
 // shared matchers
 static const TypeMatcher AnyType() { return anything(); }
 
@@ -474,15 +480,13 @@ LoopConvertCheck::RangeDescriptor::RangeDescriptor()
 LoopConvertCheck::LoopConvertCheck(StringRef Name, ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context), TUInfo(new TUTrackingInfo),
       MaxCopySize(Options.get("MaxCopySize", 16ULL)),
-      MinConfidence(Options.get("MinConfidence", getConfidenceMapping(),
-                                Confidence::CL_Reasonable)),
-      NamingStyle(Options.get("NamingStyle", getStyleMapping(),
-                              VariableNamer::NS_CamelCase)) {}
+      MinConfidence(Options.get("MinConfidence", Confidence::CL_Reasonable)),
+      NamingStyle(Options.get("NamingStyle", VariableNamer::NS_CamelCase)) {}
 
 void LoopConvertCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
   Options.store(Opts, "MaxCopySize", std::to_string(MaxCopySize));
-  Options.store(Opts, "MinConfidence", MinConfidence, getConfidenceMapping());
-  Options.store(Opts, "NamingStyle", NamingStyle, getStyleMapping());
+  Options.store(Opts, "MinConfidence", MinConfidence);
+  Options.store(Opts, "NamingStyle", NamingStyle);
 }
 
 void LoopConvertCheck::registerMatchers(MatchFinder *Finder) {
diff --git a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp
index e34fd7038bb86..c677043946f7f 100644
--- a/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/MakeSmartPtrCheck.cpp
@@ -45,7 +45,6 @@ MakeSmartPtrCheck::MakeSmartPtrCheck(StringRef Name, ClangTidyContext *Context,
                                      StringRef MakeSmartPtrFunctionName)
     : ClangTidyCheck(Name, Context),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            utils::IncludeSorter::getMapping(),
                                             utils::IncludeSorter::IS_LLVM)),
       MakeSmartPtrFunctionHeader(
           Options.get("MakeSmartPtrFunctionHeader", StdMemoryHeader)),
@@ -54,8 +53,7 @@ MakeSmartPtrCheck::MakeSmartPtrCheck(StringRef Name, ClangTidyContext *Context,
       IgnoreMacros(Options.getLocalOrGlobal("IgnoreMacros", true)) {}
 
 void MakeSmartPtrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
-  Options.store(Opts, "IncludeStyle", IncludeStyle,
-                utils::IncludeSorter::getMapping());
+  Options.store(Opts, "IncludeStyle", IncludeStyle);
   Options.store(Opts, "MakeSmartPtrFunctionHeader", MakeSmartPtrFunctionHeader);
   Options.store(Opts, "MakeSmartPtrFunction", MakeSmartPtrFunctionName);
   Options.store(Opts, "IgnoreMacros", IgnoreMacros);
diff --git a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp
index ed1a1a26bb62b..b6dedfbc2b6eb 100644
--- a/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/PassByValueCheck.cpp
@@ -121,13 +121,11 @@ collectParamDecls(const CXXConstructorDecl *Ctor,
 PassByValueCheck::PassByValueCheck(StringRef Name, ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            utils::IncludeSorter::getMapping(),
                                             utils::IncludeSorter::IS_LLVM)),
       ValuesOnly(Options.get("ValuesOnly", false)) {}
 
 void PassByValueCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
-  Options.store(Opts, "IncludeStyle", IncludeStyle,
-                utils::IncludeSorter::getMapping());
+  Options.store(Opts, "IncludeStyle", IncludeStyle);
   Options.store(Opts, "ValuesOnly", ValuesOnly);
 }
 
diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp
index 295be200bca6f..f98254dbf7c83 100644
--- a/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/ReplaceAutoPtrCheck.cpp
@@ -75,12 +75,10 @@ ReplaceAutoPtrCheck::ReplaceAutoPtrCheck(StringRef Name,
                                          ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            utils::IncludeSorter::getMapping(),
                                             utils::IncludeSorter::IS_LLVM)) {}
 
 void ReplaceAutoPtrCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
-  Options.store(Opts, "IncludeStyle", IncludeStyle,
-                utils::IncludeSorter::getMapping());
+  Options.store(Opts, "IncludeStyle", IncludeStyle);
 }
 
 void ReplaceAutoPtrCheck::registerMatchers(MatchFinder *Finder) {
diff --git a/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp b/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp
index 9cfbd87239dce..66917df3e91d2 100644
--- a/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp
+++ b/clang-tools-extra/clang-tidy/modernize/ReplaceRandomShuffleCheck.cpp
@@ -24,7 +24,6 @@ ReplaceRandomShuffleCheck::ReplaceRandomShuffleCheck(StringRef Name,
                                                      ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            utils::IncludeSorter::getMapping(),
                                             utils::IncludeSorter::IS_LLVM)) {}
 
 void ReplaceRandomShuffleCheck::registerMatchers(MatchFinder *Finder) {
@@ -52,8 +51,7 @@ void ReplaceRandomShuffleCheck::registerPPCallbacks(
 
 void ReplaceRandomShuffleCheck::storeOptions(
     ClangTidyOptions::OptionMap &Opts) {
-  Options.store(Opts, "IncludeStyle", IncludeStyle,
-                utils::IncludeSorter::getMapping());
+  Options.store(Opts, "IncludeStyle", IncludeStyle);
 }
 
 void ReplaceRandomShuffleCheck::check(const MatchFinder::MatchResult &Result) {
diff --git a/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp b/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp
index d09673fa7f23f..4cbb014867c4d 100644
--- a/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp
+++ b/clang-tools-extra/clang-tidy/performance/MoveConstructorInitCheck.cpp
@@ -24,7 +24,6 @@ MoveConstructorInitCheck::MoveConstructorInitCheck(StringRef Name,
                                                    ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            utils::IncludeSorter::getMapping(),
                                             utils::IncludeSorter::IS_LLVM)) {}
 
 void MoveConstructorInitCheck::registerMatchers(MatchFinder *Finder) {
@@ -97,8 +96,7 @@ void MoveConstructorInitCheck::registerPPCallbacks(
 }
 
 void MoveConstructorInitCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
-  Options.store(Opts, "IncludeStyle", IncludeStyle,
-                utils::IncludeSorter::getMapping());
+  Options.store(Opts, "IncludeStyle", IncludeStyle);
 }
 
 } // namespace performance
diff --git a/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp b/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp
index d08cec1a2c3ca..597445d0fc266 100644
--- a/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp
+++ b/clang-tools-extra/clang-tidy/performance/TypePromotionInMathFnCheck.cpp
@@ -32,7 +32,6 @@ TypePromotionInMathFnCheck::TypePromotionInMathFnCheck(
     StringRef Name, ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            utils::IncludeSorter::getMapping(),
                                             utils::IncludeSorter::IS_LLVM)) {}
 
 void TypePromotionInMathFnCheck::registerPPCallbacks(
@@ -44,8 +43,7 @@ void TypePromotionInMathFnCheck::registerPPCallbacks(
 
 void TypePromotionInMathFnCheck::storeOptions(
     ClangTidyOptions::OptionMap &Opts) {
-  Options.store(Opts, "IncludeStyle", IncludeStyle,
-                utils::IncludeSorter::getMapping());
+  Options.store(Opts, "IncludeStyle", IncludeStyle);
 }
 
 void TypePromotionInMathFnCheck::registerMatchers(MatchFinder *Finder) {
diff --git a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp
index 5b5f2ff994783..5de53b1840f12 100644
--- a/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp
+++ b/clang-tools-extra/clang-tidy/performance/UnnecessaryValueParamCheck.cpp
@@ -69,7 +69,6 @@ UnnecessaryValueParamCheck::UnnecessaryValueParamCheck(
     StringRef Name, ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            utils::IncludeSorter::getMapping(),
                                             utils::IncludeSorter::IS_LLVM)),
       AllowedTypes(
           utils::options::parseStringList(Options.get("AllowedTypes", ""))) {}
@@ -181,8 +180,7 @@ void UnnecessaryValueParamCheck::registerPPCallbacks(
 
 void UnnecessaryValueParamCheck::storeOptions(
     ClangTidyOptions::OptionMap &Opts) {
-  Options.store(Opts, "IncludeStyle", IncludeStyle,
-                utils::IncludeSorter::getMapping());
+  Options.store(Opts, "IncludeStyle", IncludeStyle);
   Options.store(Opts, "AllowedTypes",
                 utils::options::serializeStringList(AllowedTypes));
 }
diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
index 6e7fcaa4345a5..c885aac89072a 100644
--- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
+++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.cpp
@@ -26,6 +26,26 @@ using namespace clang::ast_matchers;
 
 namespace clang {
 namespace tidy {
+
+llvm::ArrayRef<
+    std::pair<readability::IdentifierNamingCheck::CaseType, StringRef>>
+OptionEnumMapping<
+    readability::IdentifierNamingCheck::CaseType>::getEnumMapping() {
+  static constexpr std::pair<readability::IdentifierNamingCheck::CaseType,
+                             StringRef>
+      Mapping[] = {
+          {readability::IdentifierNamingCheck::CT_AnyCase, "aNy_CasE"},
+          {readability::IdentifierNamingCheck::CT_LowerCase, "lower_case"},
+          {readability::IdentifierNamingCheck::CT_UpperCase, "UPPER_CASE"},
+          {readability::IdentifierNamingCheck::CT_CamelBack, "camelBack"},
+          {readability::IdentifierNamingCheck::CT_CamelCase, "CamelCase"},
+          {readability::IdentifierNamingCheck::CT_CamelSnakeCase,
+           "Camel_Snake_Case"},
+          {readability::IdentifierNamingCheck::CT_CamelSnakeBack,
+           "camel_Snake_Back"}};
+  return llvm::makeArrayRef(Mapping);
+}
+
 namespace readability {
 
 // clang-format off
@@ -99,16 +119,6 @@ static StringRef const StyleNames[] = {
 #undef NAMING_KEYS
 // clang-format on
 
-static constexpr std::pair<StringRef, IdentifierNamingCheck::CaseType>
-    Mapping[] = {
-        {"aNy_CasE", IdentifierNamingCheck::CT_AnyCase},
-        {"lower_case", IdentifierNamingCheck::CT_LowerCase},
-        {"UPPER_CASE", IdentifierNamingCheck::CT_UpperCase},
-        {"camelBack", IdentifierNamingCheck::CT_CamelBack},
-        {"CamelCase", IdentifierNamingCheck::CT_CamelCase},
-        {"Camel_Snake_Case", IdentifierNamingCheck::CT_CamelSnakeCase},
-        {"camel_Snake_Back", IdentifierNamingCheck::CT_CamelSnakeBack}};
-
 IdentifierNamingCheck::IdentifierNamingCheck(StringRef Name,
                                              ClangTidyContext *Context)
     : RenamerClangTidyCheck(Name, Context),
@@ -117,7 +127,7 @@ IdentifierNamingCheck::IdentifierNamingCheck(StringRef Name,
 
   for (auto const &Name : StyleNames) {
     auto CaseOptional = [&]() -> llvm::Optional<CaseType> {
-      auto ValueOr = Options.get((Name + "Case").str(), makeArrayRef(Mapping));
+      auto ValueOr = Options.get<CaseType>((Name + "Case").str());
       if (ValueOr)
         return *ValueOr;
       llvm::logAllUnhandledErrors(
@@ -148,7 +158,7 @@ void IdentifierNamingCheck::storeOptions(ClangTidyOptions::OptionMap &Opts) {
     if (NamingStyles[i]) {
       if (NamingStyles[i]->Case) {
         Options.store(Opts, (StyleNames[i] + "Case").str(),
-                      *NamingStyles[i]->Case, llvm::makeArrayRef(Mapping));
+                      *NamingStyles[i]->Case);
       }
       Options.store(Opts, (StyleNames[i] + "Prefix").str(),
                     NamingStyles[i]->Prefix);
diff --git a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h
index 04bf53fe16b56..0f6c77b2c9a86 100644
--- a/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h
+++ b/clang-tools-extra/clang-tidy/readability/IdentifierNamingCheck.h
@@ -75,6 +75,12 @@ class IdentifierNamingCheck final : public RenamerClangTidyCheck {
 };
 
 } // namespace readability
+template <>
+struct OptionEnumMapping<readability::IdentifierNamingCheck::CaseType> {
+  static llvm::ArrayRef<
+      std::pair<readability::IdentifierNamingCheck::CaseType, StringRef>>
+  getEnumMapping();
+};
 } // namespace tidy
 } // namespace clang
 
diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp
index f946b3a1a6f97..c9d018f076e76 100644
--- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp
+++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.cpp
@@ -175,13 +175,14 @@ Optional<FixItHint> IncludeSorter::CreateIncludeInsertion(StringRef FileName,
                                     IncludeStmt);
 }
 
-llvm::ArrayRef<std::pair<StringRef, IncludeSorter::IncludeStyle>>
-IncludeSorter::getMapping() {
-  static constexpr std::pair<StringRef, IncludeSorter::IncludeStyle> Mapping[] =
-      {{"llvm", IS_LLVM}, {"google", IS_Google}};
+} // namespace utils
+
+llvm::ArrayRef<std::pair<utils::IncludeSorter::IncludeStyle, StringRef>>
+OptionEnumMapping<utils::IncludeSorter::IncludeStyle>::getEnumMapping() {
+  static constexpr std::pair<utils::IncludeSorter::IncludeStyle, StringRef>
+      Mapping[] = {{utils::IncludeSorter::IS_LLVM, "llvm"},
+                   {utils::IncludeSorter::IS_Google, "google"}};
   return makeArrayRef(Mapping);
 }
-
-} // namespace utils
 } // namespace tidy
 } // namespace clang
diff --git a/clang-tools-extra/clang-tidy/utils/IncludeSorter.h b/clang-tools-extra/clang-tidy/utils/IncludeSorter.h
index 7dab2cc536a48..1d8997364e5ce 100644
--- a/clang-tools-extra/clang-tidy/utils/IncludeSorter.h
+++ b/clang-tools-extra/clang-tidy/utils/IncludeSorter.h
@@ -9,7 +9,7 @@
 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDESORTER_H
 #define LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDESORTER_H
 
-#include "../ClangTidy.h"
+#include "../ClangTidyCheck.h"
 #include <string>
 
 namespace clang {
@@ -25,8 +25,6 @@ class IncludeSorter {
   /// Supported include styles.
   enum IncludeStyle { IS_LLVM = 0, IS_Google = 1 };
 
-  static ArrayRef<std::pair<StringRef, IncludeStyle>> getMapping();
-
   /// The classifications of inclusions, in the order they should be sorted.
   enum IncludeKinds {
     IK_MainTUInclude = 0,    ///< e.g. ``#include "foo.h"`` when editing foo.cc
@@ -66,6 +64,11 @@ class IncludeSorter {
 };
 
 } // namespace utils
+
+template <> struct OptionEnumMapping<utils::IncludeSorter::IncludeStyle> {
+  static ArrayRef<std::pair<utils::IncludeSorter::IncludeStyle, StringRef>>
+  getEnumMapping();
+};
 } // namespace tidy
 } // namespace clang
 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANG_TIDY_INCLUDESORTER_H
diff --git a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp
index 665fd5140ceb2..03af5dd1565f8 100644
--- a/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/utils/TransformerClangTidyCheck.cpp
@@ -33,7 +33,6 @@ TransformerClangTidyCheck::TransformerClangTidyCheck(
     StringRef Name, ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context), Rule(MakeRule(getLangOpts(), Options)),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            IncludeSorter::getMapping(),
                                             IncludeSorter::IS_LLVM)) {
   if (Rule)
     assert(llvm::all_of(Rule->Cases, hasExplanation) &&
@@ -46,7 +45,6 @@ TransformerClangTidyCheck::TransformerClangTidyCheck(RewriteRule R,
                                                      ClangTidyContext *Context)
     : ClangTidyCheck(Name, Context), Rule(std::move(R)),
       IncludeStyle(Options.getLocalOrGlobal("IncludeStyle",
-                                            IncludeSorter::getMapping(),
                                             IncludeSorter::IS_LLVM)) {
   assert(llvm::all_of(Rule->Cases, hasExplanation) &&
          "clang-tidy checks must have an explanation by default;"
@@ -112,8 +110,7 @@ void TransformerClangTidyCheck::check(
 
 void TransformerClangTidyCheck::storeOptions(
     ClangTidyOptions::OptionMap &Opts) {
-  Options.store(Opts, "IncludeStyle", IncludeStyle,
-                IncludeSorter::getMapping());
+  Options.store(Opts, "IncludeStyle", IncludeStyle);
 }
 
 } // namespace utils
diff --git a/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp b/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp
index a089281bf16c4..63f9a06e91bec 100644
--- a/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp
+++ b/clang-tools-extra/unittests/clang-tidy/ClangTidyOptionsTest.cpp
@@ -6,6 +6,20 @@
 
 namespace clang {
 namespace tidy {
+
+enum class Colours { Red, Orange, Yellow, Green, Blue, Indigo, Violet };
+
+template <> struct OptionEnumMapping<Colours> {
+  static llvm::ArrayRef<std::pair<Colours, StringRef>> getEnumMapping() {
+    static constexpr std::pair<Colours, StringRef> Mapping[] = {
+        {Colours::Red, "Red"},       {Colours::Orange, "Orange"},
+        {Colours::Yellow, "Yellow"}, {Colours::Green, "Green"},
+        {Colours::Blue, "Blue"},     {Colours::Indigo, "Indigo"},
+        {Colours::Violet, "Violet"}};
+    return makeArrayRef(Mapping);
+  }
+};
+
 namespace test {
 
 TEST(ParseLineFilter, EmptyFilter) {
@@ -208,16 +222,10 @@ TEST(CheckOptionsValidation, ValidIntOptions) {
 #undef CHECK_ERROR_INT
 }
 
+// FIXME: Figure out why this test causes crashes on mac os.
+#ifndef __APPLE__
 TEST(ValidConfiguration, ValidEnumOptions) {
 
-  enum class Colours { Red, Orange, Yellow, Green, Blue, Indigo, Violet };
-  static constexpr std::pair<StringRef, Colours> Mapping[] = {
-      {"Red", Colours::Red},       {"Orange", Colours::Orange},
-      {"Yellow", Colours::Yellow}, {"Green", Colours::Green},
-      {"Blue", Colours::Blue},     {"Indigo", Colours::Indigo},
-      {"Violet", Colours::Violet}};
-  static const auto Map = makeArrayRef(Mapping);
-
   ClangTidyOptions Options;
   auto &CheckOptions = Options.CheckOptions;
 
@@ -237,34 +245,37 @@ TEST(ValidConfiguration, ValidEnumOptions) {
 #define CHECK_ERROR_ENUM(Name, Expected)                                       \
   CHECK_ERROR(Name, UnparseableEnumOptionError, Expected)
 
-  CHECK_VAL(TestCheck.getLocal("Valid", Map), Colours::Red);
-  CHECK_VAL(TestCheck.getGlobal("GlobalValid", Map), Colours::Violet);
-  CHECK_VAL(TestCheck.getLocal("ValidWrongCase", Map, /*IgnoreCase*/ true),
-            Colours::Red);
+  CHECK_VAL(TestCheck.getIntLocal<Colours>("Valid"), Colours::Red);
+  CHECK_VAL(TestCheck.getIntGlobal<Colours>("GlobalValid"), Colours::Violet);
+
   CHECK_VAL(
-      TestCheck.getGlobal("GlobalValidWrongCase", Map, /*IgnoreCase*/ true),
-      Colours::Violet);
-  CHECK_ERROR_ENUM(TestCheck.getLocal("Invalid", Map),
+      TestCheck.getIntLocal<Colours>("ValidWrongCase", /*IgnoreCase*/ true),
+      Colours::Red);
+  CHECK_VAL(TestCheck.getIntGlobal<Colours>("GlobalValidWrongCase",
+                                            /*IgnoreCase*/ true),
+            Colours::Violet);
+  CHECK_ERROR_ENUM(TestCheck.getIntLocal<Colours>("Invalid"),
                    "invalid configuration value "
                    "'Scarlet' for option 'test.Invalid'");
-  CHECK_ERROR_ENUM(TestCheck.getLocal("ValidWrongCase", Map),
+  CHECK_ERROR_ENUM(TestCheck.getIntLocal<Colours>("ValidWrongCase"),
                    "invalid configuration value 'rED' for option "
                    "'test.ValidWrongCase'; did you mean 'Red'?");
-  CHECK_ERROR_ENUM(TestCheck.getLocal("NearMiss", Map),
+  CHECK_ERROR_ENUM(TestCheck.getIntLocal<Colours>("NearMiss"),
                    "invalid configuration value 'Oragne' for option "
                    "'test.NearMiss'; did you mean 'Orange'?");
-  CHECK_ERROR_ENUM(TestCheck.getGlobal("GlobalInvalid", Map),
+  CHECK_ERROR_ENUM(TestCheck.getIntGlobal<Colours>("GlobalInvalid"),
                    "invalid configuration value "
                    "'Purple' for option 'GlobalInvalid'");
-  CHECK_ERROR_ENUM(TestCheck.getGlobal("GlobalValidWrongCase", Map),
+  CHECK_ERROR_ENUM(TestCheck.getIntGlobal<Colours>("GlobalValidWrongCase"),
                    "invalid configuration value 'vIOLET' for option "
                    "'GlobalValidWrongCase'; did you mean 'Violet'?");
-  CHECK_ERROR_ENUM(TestCheck.getGlobal("GlobalNearMiss", Map),
+  CHECK_ERROR_ENUM(TestCheck.getIntGlobal<Colours>("GlobalNearMiss"),
                    "invalid configuration value 'Yelow' for option "
                    "'GlobalNearMiss'; did you mean 'Yellow'?");
 
 #undef CHECK_ERROR_ENUM
 }
+#endif
 
 #undef CHECK_VAL
 #undef CHECK_ERROR

From 4abdcdb45ee22d77dd64a71cb41e967d35361280 Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Sat, 11 Jul 2020 10:42:57 +0100
Subject: [PATCH 013/771] Fix gn builds after 943660fd1

---
 llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
index 3bf40626fc808..bfc2c7ae51106 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
@@ -7,7 +7,7 @@ static_library("OpenMP") {
   ]
   public_deps = [ "//llvm/include/llvm/Frontend/OpenMP:public_tablegen" ]
   sources = [
-    "OMPConstants.cpp",
+    "OMP.cpp",
     "OMPContext.cpp",
     "OMPIRBuilder.cpp",
   ]

From 8fb91dfeed1bd1ffdfd31a345e1bf7cf0b7c86e2 Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Sat, 11 Jul 2020 10:45:17 +0100
Subject: [PATCH 014/771] Revert "Fix gn builds after 943660fd1"

This reverts commit 4abdcdb45ee22d77dd64a71cb41e967d35361280.
---
 llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
index bfc2c7ae51106..3bf40626fc808 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
@@ -7,7 +7,7 @@ static_library("OpenMP") {
   ]
   public_deps = [ "//llvm/include/llvm/Frontend/OpenMP:public_tablegen" ]
   sources = [
-    "OMP.cpp",
+    "OMPConstants.cpp",
     "OMPContext.cpp",
     "OMPIRBuilder.cpp",
   ]

From 35af6f11e04b777b73035f59bfabb68a08ca4ad9 Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Sat, 11 Jul 2020 11:42:05 +0100
Subject: [PATCH 015/771] Reland Fix gn build after 943660f

---
 llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
index 3bf40626fc808..07b265bcb288b 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
@@ -7,7 +7,7 @@ static_library("OpenMP") {
   ]
   public_deps = [ "//llvm/include/llvm/Frontend/OpenMP:public_tablegen" ]
   sources = [
-    "OMPConstants.cpp",
+    "OMP.cpp.inc",
     "OMPContext.cpp",
     "OMPIRBuilder.cpp",
   ]

From 09a95f51fb1fb86442418d891f67a43e2a3ca698 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Sat, 11 Jul 2020 06:43:28 -0400
Subject: [PATCH 016/771] [gn build] (manually) merge 943660fd15f193

---
 llvm/lib/Frontend/OpenMP/CMakeLists.txt             |  2 +-
 .../llvm/include/llvm/Frontend/OpenMP/BUILD.gn      |  8 --------
 .../gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn  | 13 +++++++++++--
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/Frontend/OpenMP/CMakeLists.txt b/llvm/lib/Frontend/OpenMP/CMakeLists.txt
index f88e3ed986623..068283fd82e07 100644
--- a/llvm/lib/Frontend/OpenMP/CMakeLists.txt
+++ b/llvm/lib/Frontend/OpenMP/CMakeLists.txt
@@ -15,4 +15,4 @@ add_llvm_component_library(LLVMFrontendOpenMP
   intrinsics_gen
   omp_gen
   omp_cpp
-  )
\ No newline at end of file
+  )
diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Frontend/OpenMP/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Frontend/OpenMP/BUILD.gn
index 9942a3647b58e..a18f8db5f5ebd 100644
--- a/llvm/utils/gn/secondary/llvm/include/llvm/Frontend/OpenMP/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/include/llvm/Frontend/OpenMP/BUILD.gn
@@ -6,13 +6,6 @@ tablegen("OMP") {
   output_name = "OMP.h.inc"
 }
 
-tablegen("OMPImpl") {
-  visibility = [ ":public_tablegen" ]
-  args = [ "-gen-directive-impl" ]
-  td_file = "OMP.td"
-  output_name = "OMP.cpp.inc"
-}
-
 # Groups all tablegen() calls that create .inc files that are included in
 # Frontent/OpenMP's public headers (just one so far).
 # //llvm/lib/Frontend/OpenMP has this as a public_dep, so targets depending on
@@ -21,6 +14,5 @@ group("public_tablegen") {
   public_deps = [
     # Frontend/OpenMP's public headers include OMP.h.inc.
     ":OMP",
-    ":OMPImpl",
   ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
index 07b265bcb288b..688a25e3c1df1 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
@@ -1,14 +1,23 @@
+import("//llvm/utils/TableGen/tablegen.gni")
+
+tablegen("OMPImpl") {
+  visibility = [ ":OpenMP" ]
+  args = [ "-gen-directive-impl" ]
+  td_file = "//llvm/include/llvm/Frontend/OpenMP/OMP.td"
+  output_name = "OMP.cpp"
+}
+
 static_library("OpenMP") {
   output_name = "LLVMFrontendOpenMP"
   deps = [
+    ":OMPImpl",
     "//llvm/lib/IR",
     "//llvm/lib/Support",
     "//llvm/lib/Transforms/Utils",
   ]
   public_deps = [ "//llvm/include/llvm/Frontend/OpenMP:public_tablegen" ]
   sources = [
-    "OMP.cpp.inc",
     "OMPContext.cpp",
     "OMPIRBuilder.cpp",
-  ]
+  ] + get_target_outputs(":OMPImpl")
 }

From 4500db8c59621a31c622862a2946457fdee481ce Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 11 Jul 2020 13:52:33 +0300
Subject: [PATCH 017/771] Revert "Reland "[InstCombine] Lower infinite combine
 loop detection thresholds"""

And there's a new hit: https://bugs.llvm.org/show_bug.cgi?id=46680
This reverts commit 7103c87596efccd532e9fe04a6ba6a200fed8481.
---
 llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index e810b3de25bc8..d1c1e54188251 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -123,13 +123,8 @@ STATISTIC(NumReassoc  , "Number of reassociations");
 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
               "Controls which instructions are visited");
 
-// FIXME: these limits eventually should be as low as 2.
 static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
-#ifndef NDEBUG
-static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
-#else
 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
-#endif
 
 static cl::opt<bool>
 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),

From f7907e9d223d8484f9afd457ba614c2db2ae4743 Mon Sep 17 00:00:00 2001
From: Alexey Lapshin <a.v.lapshin@mail.ru>
Date: Fri, 19 Jun 2020 23:55:05 +0300
Subject: [PATCH 018/771] [TRE] allow TRE for non-capturing calls.

The current implementation of Tail Recursion Elimination has a very restricted
pre-requisite: AllCallsAreTailCalls. i.e. it requires that no function
call receives a pointer to local stack. Generally, function calls that
receive a pointer to local stack but do not capture it - should not
break TRE. This fix allows us to do TRE if it is proved that no pointer
to the local stack is escaped.

Reviewed by: efriedma

Differential Revision: https://reviews.llvm.org/D82085
---
 .../Scalar/TailRecursionElimination.cpp       | 113 ++++++----------
 llvm/test/Transforms/TailCallElim/basic.ll    |   7 +-
 .../TailCallElim/tre-multiple-exits.ll        | 125 ++++++++++++++++++
 .../tre-noncapturing-alloca-calls.ll          |  74 +++++++++++
 4 files changed, 247 insertions(+), 72 deletions(-)
 create mode 100644 llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
 create mode 100644 llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll

diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index 5bb1d54d7d127..bfd312a52ea58 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -81,6 +81,7 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
+#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "tailcallelim"
@@ -92,7 +93,10 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 /// Scan the specified function for alloca instructions.
 /// If it contains any dynamic allocas, returns false.
 static bool canTRE(Function &F) {
-  // Because of PR962, we don't TRE dynamic allocas.
+  // TODO: We don't do TRE if dynamic allocas are used.
+  // Dynamic allocas allocate stack space which should be
+  // deallocated before new iteration started. That is
+  // currently not implemented.
   return llvm::all_of(instructions(F), [](Instruction &I) {
     auto *AI = dyn_cast<AllocaInst>(&I);
     return !AI || AI->isStaticAlloca();
@@ -185,11 +189,9 @@ struct AllocaDerivedValueTracker {
 };
 }
 
-static bool markTails(Function &F, bool &AllCallsAreTailCalls,
-                      OptimizationRemarkEmitter *ORE) {
+static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
   if (F.callsFunctionThatReturnsTwice())
     return false;
-  AllCallsAreTailCalls = true;
 
   // The local stack holds all alloca instructions and all byval arguments.
   AllocaDerivedValueTracker Tracker;
@@ -272,11 +274,8 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls,
         }
       }
 
-      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
+      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
         DeferredTails.push_back(CI);
-      } else {
-        AllCallsAreTailCalls = false;
-      }
     }
 
     for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
@@ -313,8 +312,6 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls,
       LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
       CI->setTailCall();
       Modified = true;
-    } else {
-      AllCallsAreTailCalls = false;
     }
   }
 
@@ -325,7 +322,16 @@ static bool markTails(Function &F, bool &AllCallsAreTailCalls,
 /// instruction from after the call to before the call, assuming that all
 /// instructions between the call and this instruction are movable.
 ///
-static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
+static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA,
+                             DenseMap<Value *, AllocaInst *> &AllocaForValue) {
+  if (isa<DbgInfoIntrinsic>(I))
+    return true;
+
+  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+    if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
+        llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue))
+      return true;
+
   // FIXME: We can move load/store/call/free instructions above the call if the
   // call does not mod/ref the memory location being processed.
   if (I->mayHaveSideEffects())  // This also handles volatile loads.
@@ -392,7 +398,6 @@ class TailRecursionEliminator {
   // createTailRecurseLoopHeader the first time we find a call we can eliminate.
   BasicBlock *HeaderBB = nullptr;
   SmallVector<PHINode *, 8> ArgumentPHIs;
-  bool RemovableCallsMustBeMarkedTail = false;
 
   // PHI node to store our return value.
   PHINode *RetPN = nullptr;
@@ -414,13 +419,15 @@ class TailRecursionEliminator {
   // The instruction doing the accumulating.
   Instruction *AccumulatorRecursionInstr = nullptr;
 
+  // The cache for <value, alloca instruction> pairs.
+  DenseMap<Value *, AllocaInst *> AllocaForValue;
+
   TailRecursionEliminator(Function &F, const TargetTransformInfo *TTI,
                           AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
                           DomTreeUpdater &DTU)
       : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
 
-  CallInst *findTRECandidate(Instruction *TI,
-                             bool CannotTailCallElimCallsMarkedTail);
+  CallInst *findTRECandidate(Instruction *TI);
 
   void createTailRecurseLoopHeader(CallInst *CI);
 
@@ -428,11 +435,9 @@ class TailRecursionEliminator {
 
   bool eliminateCall(CallInst *CI);
 
-  bool foldReturnAndProcessPred(ReturnInst *Ret,
-                                bool CannotTailCallElimCallsMarkedTail);
+  bool foldReturnAndProcessPred(ReturnInst *Ret);
 
-  bool processReturningBlock(ReturnInst *Ret,
-                             bool CannotTailCallElimCallsMarkedTail);
+  bool processReturningBlock(ReturnInst *Ret);
 
   void cleanupAndFinalize();
 
@@ -443,8 +448,7 @@ class TailRecursionEliminator {
 };
 } // namespace
 
-CallInst *TailRecursionEliminator::findTRECandidate(
-    Instruction *TI, bool CannotTailCallElimCallsMarkedTail) {
+CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) {
   BasicBlock *BB = TI->getParent();
 
   if (&BB->front() == TI) // Make sure there is something before the terminator.
@@ -464,9 +468,9 @@ CallInst *TailRecursionEliminator::findTRECandidate(
     --BBI;
   }
 
-  // If this call is marked as a tail call, and if there are dynamic allocas in
-  // the function, we cannot perform this optimization.
-  if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
+  assert((!CI->isTailCall() || !CI->isNoTailCall()) &&
+         "Incompatible call site attributes(Tail,NoTail)");
+  if (!CI->isTailCall())
     return nullptr;
 
   // As a special case, detect code like this:
@@ -498,26 +502,13 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
   BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry);
   BI->setDebugLoc(CI->getDebugLoc());
 
-  // If this function has self recursive calls in the tail position where some
-  // are marked tail and some are not, only transform one flavor or another.
-  // We have to choose whether we move allocas in the entry block to the new
-  // entry block or not, so we can't make a good choice for both. We make this
-  // decision here based on whether the first call we found to remove is
-  // marked tail.
-  // NOTE: We could do slightly better here in the case that the function has
-  // no entry block allocas.
-  RemovableCallsMustBeMarkedTail = CI->isTailCall();
-
-  // If this tail call is marked 'tail' and if there are any allocas in the
-  // entry block, move them up to the new entry block.
-  if (RemovableCallsMustBeMarkedTail)
-    // Move all fixed sized allocas from HeaderBB to NewEntry.
-    for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
-                              NEBI = NewEntry->begin();
-         OEBI != E;)
-      if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
-        if (isa<ConstantInt>(AI->getArraySize()))
-          AI->moveBefore(&*NEBI);
+  // Move all fixed sized allocas from HeaderBB to NewEntry.
+  for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
+                            NEBI = NewEntry->begin();
+       OEBI != E;)
+    if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
+      if (isa<ConstantInt>(AI->getArraySize()))
+        AI->moveBefore(&*NEBI);
 
   // Now that we have created a new block, which jumps to the entry
   // block, insert a PHI node for each argument of the function.
@@ -592,7 +583,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
   Instruction *AccRecInstr = nullptr;
   BasicBlock::iterator BBI(CI);
   for (++BBI; &*BBI != Ret; ++BBI) {
-    if (canMoveAboveCall(&*BBI, CI, AA))
+    if (canMoveAboveCall(&*BBI, CI, AA, AllocaForValue))
       continue;
 
     // If we can't move the instruction above the call, it might be because it
@@ -620,9 +611,6 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
   if (!HeaderBB)
     createTailRecurseLoopHeader(CI);
 
-  if (RemovableCallsMustBeMarkedTail && !CI->isTailCall())
-    return false;
-
   // Ok, now that we know we have a pseudo-entry block WITH all of the
   // required PHI nodes, add entries into the PHI node for the actual
   // parameters passed into the tail-recursive call.
@@ -672,8 +660,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
   return true;
 }
 
-bool TailRecursionEliminator::foldReturnAndProcessPred(
-    ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
+bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
   BasicBlock *BB = Ret->getParent();
 
   bool Change = false;
@@ -698,8 +685,7 @@ bool TailRecursionEliminator::foldReturnAndProcessPred(
   while (!UncondBranchPreds.empty()) {
     BranchInst *BI = UncondBranchPreds.pop_back_val();
     BasicBlock *Pred = BI->getParent();
-    if (CallInst *CI =
-            findTRECandidate(BI, CannotTailCallElimCallsMarkedTail)) {
+    if (CallInst *CI = findTRECandidate(BI)) {
       LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
                         << "INTO UNCOND BRANCH PRED: " << *Pred);
       FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU);
@@ -720,9 +706,8 @@ bool TailRecursionEliminator::foldReturnAndProcessPred(
   return Change;
 }
 
-bool TailRecursionEliminator::processReturningBlock(
-    ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
-  CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
+bool TailRecursionEliminator::processReturningBlock(ReturnInst *Ret) {
+  CallInst *CI = findTRECandidate(Ret);
   if (!CI)
     return false;
 
@@ -810,35 +795,25 @@ bool TailRecursionEliminator::eliminate(Function &F,
     return false;
 
   bool MadeChange = false;
-  bool AllCallsAreTailCalls = false;
-  MadeChange |= markTails(F, AllCallsAreTailCalls, ORE);
-  if (!AllCallsAreTailCalls)
-    return MadeChange;
+  MadeChange |= markTails(F, ORE);
 
   // If this function is a varargs function, we won't be able to PHI the args
   // right, so don't even try to convert it...
   if (F.getFunctionType()->isVarArg())
     return MadeChange;
 
-  // If false, we cannot perform TRE on tail calls marked with the 'tail'
-  // attribute, because doing so would cause the stack size to increase (real
-  // TRE would deallocate variable sized allocas, TRE doesn't).
-  bool CanTRETailMarkedCall = canTRE(F);
+  if (!canTRE(F))
+    return MadeChange;
 
   TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU);
 
   // Change any tail recursive calls to loops.
-  //
-  // FIXME: The code generator produces really bad code when an 'escaping
-  // alloca' is changed from being a static alloca to being a dynamic alloca.
-  // Until this is resolved, disable this transformation if that would ever
-  // happen.  This bug is PR962.
   for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
     BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB.
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
-      bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall);
+      bool Change = TRE.processReturningBlock(Ret);
       if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
-        Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall);
+        Change = TRE.foldReturnAndProcessPred(Ret);
       MadeChange |= Change;
     }
   }
diff --git a/llvm/test/Transforms/TailCallElim/basic.ll b/llvm/test/Transforms/TailCallElim/basic.ll
index 6116014a024b1..669210da6314b 100644
--- a/llvm/test/Transforms/TailCallElim/basic.ll
+++ b/llvm/test/Transforms/TailCallElim/basic.ll
@@ -12,15 +12,16 @@ define void @test0() {
 	ret void
 }
 
-; PR615. Make sure that we do not move the alloca so that it interferes with the tail call.
+; Make sure that we do not do TRE if pointer to local stack
+; escapes through function call.
 define i32 @test1() {
 ; CHECK: i32 @test1()
 ; CHECK-NEXT: alloca
 	%A = alloca i32		; <i32*> [#uses=2]
 	store i32 5, i32* %A
 	call void @use(i32* %A)
-; CHECK: tail call i32 @test1
-	%X = tail call i32 @test1()		; <i32> [#uses=1]
+; CHECK: call i32 @test1
+	%X = call i32 @test1()		; <i32> [#uses=1]
 	ret i32 %X
 }
 
diff --git a/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll b/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
new file mode 100644
index 0000000000000..8f69087dd879d
--- /dev/null
+++ b/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
@@ -0,0 +1,125 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
+
+; This test checks that TRE would be done for only one recursive call.
+; The test_multiple_exits function has three recursive calls.
+; First recursive call could not be eliminated because there is
+; escaped pointer to local variable. Second recursive call could
+; be eliminated. Thrid recursive call could not be eliminated since
+; this is not last call. Thus, test checks that TRE would be done
+; for only second recursive call.
+
+; IR for that test was generated from the following C++ source:
+;
+; void capture_arg (int*);
+; void test_multiple_exits (int param);
+;   if (param >= 0 && param < 10) {
+;     int temp;
+;     capture_arg(&temp);
+;     // TRE could not be done because pointer to local
+;     // variable "temp" is escaped.
+;     test_multiple_exits(param + 1);
+;   } else if (param >=10 && param < 20) {
+;     // TRE should be done.
+;     test_multiple_exits(param + 1);
+;   } else if (param >= 20 && param < 22) {
+;     // TRE could not be done since recursive
+;     // call is not last call.
+;     test_multiple_exits(param + 1);
+;     func();
+;   }
+;
+;   return;
+; }
+
+; Function Attrs: noinline optnone uwtable
+declare void @_Z11capture_argPi(i32* %param) #0
+
+; Function Attrs: noinline optnone uwtable
+declare void @_Z4funcv() #0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @_Z19test_multiple_exitsi(i32 %param) local_unnamed_addr #2 {
+; CHECK-LABEL: @_Z19test_multiple_exitsi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TEMP:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[PARAM_TR:%.*]] = phi i32 [ [[PARAM:%.*]], [[ENTRY:%.*]] ], [ [[ADD6:%.*]], [[IF_THEN5:%.*]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[PARAM_TR]], 10
+; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TEMP]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP1]]) #1
+; CHECK-NEXT:    call void @_Z11capture_argPi(i32* nonnull [[TEMP]])
+; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[PARAM_TR]], 1
+; CHECK-NEXT:    call void @_Z19test_multiple_exitsi(i32 [[ADD]])
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP1]]) #1
+; CHECK-NEXT:    br label [[IF_END14:%.*]]
+; CHECK:       if.else:
+; CHECK-NEXT:    [[PARAM_OFF:%.*]] = add i32 [[PARAM_TR]], -10
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[PARAM_OFF]], 10
+; CHECK-NEXT:    br i1 [[TMP2]], label [[IF_THEN5]], label [[IF_ELSE7:%.*]]
+; CHECK:       if.then5:
+; CHECK-NEXT:    [[ADD6]] = add nuw nsw i32 [[PARAM_TR]], 1
+; CHECK-NEXT:    br label [[TAILRECURSE]]
+; CHECK:       if.else7:
+; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[PARAM_TR]], -2
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 20
+; CHECK-NEXT:    br i1 [[TMP4]], label [[IF_THEN11:%.*]], label [[IF_END14]]
+; CHECK:       if.then11:
+; CHECK-NEXT:    [[ADD12:%.*]] = add nsw i32 [[PARAM_TR]], 1
+; CHECK-NEXT:    tail call void @_Z19test_multiple_exitsi(i32 [[ADD12]])
+; CHECK-NEXT:    tail call void @_Z4funcv()
+; CHECK-NEXT:    ret void
+; CHECK:       if.end14:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %temp = alloca i32, align 4
+  %0 = icmp ult i32 %param, 10
+  br i1 %0, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  %1 = bitcast i32* %temp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #2
+  call void @_Z11capture_argPi(i32* nonnull %temp)
+  %add = add nuw nsw i32 %param, 1
+  call void @_Z19test_multiple_exitsi(i32 %add)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #2
+  br label %if.end14
+
+if.else:                                          ; preds = %entry
+  %param.off = add i32 %param, -10
+  %2 = icmp ult i32 %param.off, 10
+  br i1 %2, label %if.then5, label %if.else7
+
+if.then5:                                         ; preds = %if.else
+  %add6 = add nuw nsw i32 %param, 1
+  call void @_Z19test_multiple_exitsi(i32 %add6)
+  br label %if.end14
+
+if.else7:                                         ; preds = %if.else
+  %3 = and i32 %param, -2
+  %4 = icmp eq i32 %3, 20
+  br i1 %4, label %if.then11, label %if.end14
+
+if.then11:                                        ; preds = %if.else7
+  %add12 = add nsw i32 %param, 1
+  call void @_Z19test_multiple_exitsi(i32 %add12)
+  call void @_Z4funcv()
+  br label %if.end14
+
+if.end14:                                         ; preds = %if.then5, %if.then11, %if.else7, %if.then
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+attributes #0 = { nofree noinline norecurse nounwind uwtable }
+attributes #1 = { nounwind uwtable }
+attributes #2 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll b/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
new file mode 100644
index 0000000000000..2168437fc5706
--- /dev/null
+++ b/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
@@ -0,0 +1,74 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
+
+; IR for that test was generated from the following C++ source:
+;
+;int count;
+;__attribute__((noinline)) void globalIncrement(const int* param) { count += *param; }
+;
+;void test(int recurseCount)
+;{
+;    if (recurseCount == 0) return;
+;    int temp = 10;
+;    globalIncrement(&temp);
+;    test(recurseCount - 1);
+;}
+;
+
+@count = dso_local local_unnamed_addr global i32 0, align 4
+
+; Function Attrs: nofree noinline norecurse nounwind uwtable
+declare void @_Z15globalIncrementPKi(i32* nocapture readonly %param) #0
+
+; Test that TRE could be done for recursive tail routine containing
+; call to function receiving a pointer to local stack.
+
+; Function Attrs: nounwind uwtable
+define dso_local void @_Z4testi(i32 %recurseCount) local_unnamed_addr #1 {
+; CHECK-LABEL: @_Z4testi(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TEMP:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
+; CHECK:       tailrecurse:
+; CHECK-NEXT:    [[RECURSECOUNT_TR:%.*]] = phi i32 [ [[RECURSECOUNT:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[IF_END:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RECURSECOUNT_TR]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[TEMP]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP0]])
+; CHECK-NEXT:    store i32 10, i32* [[TEMP]], align 4
+; CHECK-NEXT:    call void @_Z15globalIncrementPKi(i32* nonnull [[TEMP]])
+; CHECK-NEXT:    [[SUB]] = add nsw i32 [[RECURSECOUNT_TR]], -1
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP0]])
+; CHECK-NEXT:    br label [[TAILRECURSE]]
+; CHECK:       return:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %temp = alloca i32, align 4
+  %cmp = icmp eq i32 %recurseCount, 0
+  br i1 %cmp, label %return, label %if.end
+
+if.end:                                           ; preds = %entry
+  %0 = bitcast i32* %temp to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #6
+  store i32 10, i32* %temp, align 4
+  call void @_Z15globalIncrementPKi(i32* nonnull %temp)
+  %sub = add nsw i32 %recurseCount, -1
+  call void @_Z4testi(i32 %sub)
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #6
+  br label %return
+
+return:                                           ; preds = %entry, %if.end
+  ret void
+}
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
+
+attributes #0 = { nofree noinline norecurse nounwind uwtable }
+attributes #1 = { nounwind uwtable }
+attributes #2 = { argmemonly nounwind willreturn }

From d7a05698efcfa6c596bcaadd8d5154612990f8f3 Mon Sep 17 00:00:00 2001
From: Christudasan Devadasan <Christudasan.Devadasan@amd.com>
Date: Sat, 11 Jul 2020 00:19:51 +0530
Subject: [PATCH 019/771] [AMDGPU] Move LowerSwitch pass to CodeGenPrepare.

It is possible that LowerSwitch pass leaves certain blocks
unreachable from the entry. If not removed, these dead blocks
can cause undefined behavior in the subsequent passes.
It caused a crash in the AMDGPU backend after the instruction
selection when a PHI node has its incoming values coming from
these unreachable blocks.

In the AMDGPU pass flow, the last invocation of UnreachableBlockElim
precedes where LowerSwitch is currently placed and eventually
missed out on the opportunity to get these blocks eliminated.
This patch ensures that LowerSwitch pass get inserted earlier
to make use of the existing unreachable block elimination pass.

Reviewed By: sameerds, arsenm

Differential Revision: https://reviews.llvm.org/D83584
---
 .../lib/Target/AMDGPU/AMDGPUTargetMachine.cpp |  7 ++-
 .../switch-default-block-unreachable.ll       | 60 +++++++++++++++++++
 2 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
index 8604f5005eb2b..b4b10835837cd 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp
@@ -787,10 +787,15 @@ void AMDGPUPassConfig::addCodeGenPrepare() {
 
   if (EnableLoadStoreVectorizer)
     addPass(createLoadStoreVectorizerPass());
+
+  // LowerSwitch pass may introduce unreachable blocks that can
+  // cause unexpected behavior for subsequent passes. Placing it
+  // here seems better that these blocks would get cleaned up by
+  // UnreachableBlockElim inserted next in the pass flow.
+  addPass(createLowerSwitchPass());
 }
 
 bool AMDGPUPassConfig::addPreISel() {
-  addPass(createLowerSwitchPass());
   addPass(createFlattenCFGPass());
   return false;
 }
diff --git a/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll
new file mode 100644
index 0000000000000..13c4dc80be156
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/switch-default-block-unreachable.ll
@@ -0,0 +1,60 @@
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx908 -verify-machineinstrs -stop-after=amdgpu-isel -o - %s | FileCheck -check-prefix=GCN %s
+define void @test() #1 {
+  ; Clean up the unreachable blocks introduced with LowerSwitch pass.
+  ; This test ensures that, in the pass flow, UnreachableBlockElim pass
+  ; follows the LowerSwitch. Otherwise, this testcase will crash
+  ; immediately after the instruction selection due to the incomplete
+  ; PHI node in an MBB whose incoming values were never codegenerated.
+  ;
+  ; GCN-LABEL: name: test
+  ; GCN: bb.{{[0-9]+}}.entry:
+  ; GCN: bb.{{[0-9]+}}.entry.true.blk:
+  ; GCN: bb.{{[0-9]+}}.entry.false.blk:
+  ; GCN: bb.{{[0-9]+}}.switch.blk:
+
+  ; GCN-NOT: bb.{{[0-9]+}}.preheader.blk
+  ; GCN-NOT: bb.{{[0-9]+}}.pre.false.blk:
+  ; GCN-NOT: bb.{{[0-9]+}}.unreach.blk:
+  ; GCN-NOT: PHI
+
+  ; GCN: bb.{{[0-9]+}}.exit:
+  entry:
+    %idx = tail call i32 @llvm.amdgcn.workitem.id.x() #0
+    br i1 undef, label %entry.true.blk, label %entry.false.blk
+
+  entry.true.blk:                                   ; preds = %entry
+    %exit.cmp = icmp ult i32 %idx, 3
+    br i1 %exit.cmp, label %switch.blk, label %exit
+
+  entry.false.blk:                                  ; preds = %entry
+    unreachable
+
+  switch.blk:                                       ; preds = %entry.true.blk
+    switch i32 %idx, label %preheader.blk [
+      i32 0, label %exit
+      i32 1, label %exit
+      i32 2, label %exit
+    ]
+
+  preheader.blk:                                    ; preds = %switch.blk
+    %pre.exit = icmp ult i32 %idx, 5
+    br i1 %pre.exit, label %unreach.blk, label %pre.false.blk
+
+  pre.false.blk:                                    ; preds = %preheader.blk
+    %call.pre.false = tail call i32 @func(i32 %idx) #0
+    br label %unreach.blk
+
+  unreach.blk:                                      ; preds = %preheader.blk, %pre.false.blk
+    %phi.val = phi i32 [ %call.pre.false, %pre.false.blk ], [ undef, %preheader.blk ]
+    store i32 %phi.val, i32* undef
+    unreachable
+
+  exit:                                             ; preds = %switch.blk
+    ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #0
+declare i32 @func(i32)#0
+
+attributes #0 = { nounwind readnone }
+attributes #1 = { nounwind }

From 850b150cff3dfb5f2113d9c3c483e2d22b318ced Mon Sep 17 00:00:00 2001
From: sstefan1 <sstipanovic@s-energize.com>
Date: Sat, 11 Jul 2020 14:24:56 +0200
Subject: [PATCH 020/771] [Attributor][NFC] Add more debug output for deleted
 functions

---
 llvm/lib/Transforms/IPO/Attributor.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 6d7f08bfbe07c..7f252079e0532 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1180,6 +1180,9 @@ ChangeStatus Attributor::cleanupIR() {
     }
   }
 
+  LLVM_DEBUG(dbgs() << "[Attributor] DeadInsts size: " << DeadInsts.size()
+                    << "\n");
+
   RecursivelyDeleteTriviallyDeadInstructions(DeadInsts);
 
   if (unsigned NumDeadBlocks = ToBeDeletedBlocks.size()) {
@@ -1238,6 +1241,9 @@ ChangeStatus Attributor::cleanupIR() {
 
   NumFnDeleted += ToBeDeletedFunctions.size();
 
+  LLVM_DEBUG(dbgs() << "[Attributor] Deleted " << NumFnDeleted
+                    << " functions after manifest.\n");
+
 #ifdef EXPENSIVE_CHECKS
   for (Function *F : Functions) {
     if (ToBeDeletedFunctions.count(F))

From 0b4cf802fad4f504aefbeb70c061e60cff10d153 Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Fri, 10 Jul 2020 10:56:28 -0400
Subject: [PATCH 021/771] [fix-irreducible] Skip unreachable predecessors.

Summary:
- Skip unreachable predecessors during header detection in SCC. Those
  unreachable blocks would be generated in the switch lowering pass in
  the corner cases or other frontends. Even though they could be removed
  through the CFG simplification, we should skip them during header
  detection.

Reviewers: sameerds

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83562
---
 llvm/lib/Transforms/Utils/FixIrreducible.cpp  |  3 +++
 .../Transforms/FixIrreducible/unreachable.ll  | 24 +++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 llvm/test/Transforms/FixIrreducible/unreachable.ll

diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 510c033f64743..452463c9b6277 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -281,6 +281,9 @@ static bool makeReducible(LoopInfo &LI, DominatorTree &DT, Graph &&G) {
     LLVM_DEBUG(dbgs() << "Found headers:");
     for (auto BB : reverse(Blocks)) {
       for (const auto P : predecessors(BB)) {
+        // Skip unreachable predecessors.
+        if (!DT.isReachableFromEntry(P))
+          continue;
         if (!Blocks.count(P)) {
           LLVM_DEBUG(dbgs() << " " << BB->getName());
           Headers.insert(BB);
diff --git a/llvm/test/Transforms/FixIrreducible/unreachable.ll b/llvm/test/Transforms/FixIrreducible/unreachable.ll
new file mode 100644
index 0000000000000..71cd81e01953e
--- /dev/null
+++ b/llvm/test/Transforms/FixIrreducible/unreachable.ll
@@ -0,0 +1,24 @@
+; RUN: opt %s -fix-irreducible -S -o - | FileCheck %s
+
+; CHECK-LABEL: @unreachable(
+; CHECK: entry:
+; CHECK-NOT: irr.guard:
+define void @unreachable(i32 %n) {
+entry:
+  br label %loop.body
+
+loop.body:
+  br label %inner.block
+
+unreachable.block:
+  br label %inner.block
+
+inner.block:
+  br i1 undef, label %loop.exit, label %loop.latch
+
+loop.latch:
+  br label %loop.body
+
+loop.exit:
+  ret void
+}

From 81db614411bdc8f95e5b7e2acaf551507eb7201b Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Sat, 11 Jul 2020 10:09:09 -0400
Subject: [PATCH 022/771] Fix `-Wunused-variable` warnings. NFC.

---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index f25e95466407d..2a3b2abf61762 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1043,12 +1043,12 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
           return;
         }
 
-      if (auto *Cmp = dyn_cast<ICmpInst>(U.getUser())) {
+      if (isa<ICmpInst>(U.getUser())) {
         ToBeReplacedStateMachineUses.push_back(&U);
         return;
       }
-      if (CallInst *CI = OpenMPOpt::getCallIfRegularCall(
-              *U.getUser(), &KernelPrepareParallelRFI)) {
+      if (OpenMPOpt::getCallIfRegularCall(*U.getUser(),
+                                          &KernelPrepareParallelRFI)) {
         ToBeReplacedStateMachineUses.push_back(&U);
         return;
       }

From 102828249c8ec9ab43ee84f496274f2853ed899c Mon Sep 17 00:00:00 2001
From: Yash Jain <yash.jain@polymagelabs.com>
Date: Sat, 11 Jul 2020 20:54:18 +0530
Subject: [PATCH 023/771] [MLIR] Parallelize affine.for op to 1-D
 affine.parallel op

Introduce pass to convert parallel affine.for op into 1-D affine.parallel op.
Run using --affine-parallelize. Removes test-detect-parallel: pass for checking
parallel affine.for ops.

Signed-off-by: Yash Jain <yash.jain@polymagelabs.com>

Differential Revision: https://reviews.llvm.org/D83193
---
 mlir/include/mlir/Dialect/Affine/Passes.h     |   4 +
 mlir/include/mlir/Dialect/Affine/Passes.td    |   5 +
 mlir/include/mlir/Dialect/Affine/Utils.h      |   7 ++
 .../Affine/Transforms/AffineParallelize.cpp   |  50 ++++++++
 .../Dialect/Affine/Transforms/CMakeLists.txt  |   2 +
 mlir/lib/Dialect/Affine/Utils/Utils.cpp       |  15 +++
 .../Dialect/Affine/parallelism-detection.mlir |  47 -------
 mlir/test/Dialect/Affine/parallelize.mlir     | 118 ++++++++++++++++++
 mlir/test/lib/Dialect/Affine/CMakeLists.txt   |   1 -
 .../Affine/TestParallelismDetection.cpp       |  47 -------
 mlir/tools/mlir-opt/mlir-opt.cpp              |   2 -
 11 files changed, 201 insertions(+), 97 deletions(-)
 create mode 100644 mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
 delete mode 100644 mlir/test/Dialect/Affine/parallelism-detection.mlir
 create mode 100644 mlir/test/Dialect/Affine/parallelize.mlir
 delete mode 100644 mlir/test/lib/Dialect/Affine/TestParallelismDetection.cpp

diff --git a/mlir/include/mlir/Dialect/Affine/Passes.h b/mlir/include/mlir/Dialect/Affine/Passes.h
index 0d7c3be240c99..18b3b790338d8 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.h
+++ b/mlir/include/mlir/Dialect/Affine/Passes.h
@@ -36,6 +36,10 @@ std::unique_ptr<OperationPass<FuncOp>> createSimplifyAffineStructuresPass();
 std::unique_ptr<OperationPass<FuncOp>>
 createAffineLoopInvariantCodeMotionPass();
 
+/// Creates a pass to convert all parallel affine.for's into 1-d affine.parallel
+/// ops.
+std::unique_ptr<OperationPass<FuncOp>> createAffineParallelizePass();
+
 /// Performs packing (or explicit copying) of accessed memref regions into
 /// buffers in the specified faster memory space through either pointwise copies
 /// or DMA operations.
diff --git a/mlir/include/mlir/Dialect/Affine/Passes.td b/mlir/include/mlir/Dialect/Affine/Passes.td
index 06e0920413a95..810640058155f 100644
--- a/mlir/include/mlir/Dialect/Affine/Passes.td
+++ b/mlir/include/mlir/Dialect/Affine/Passes.td
@@ -112,6 +112,11 @@ def AffineVectorize : FunctionPass<"affine-super-vectorize"> {
   ];
 }
 
+def AffineParallelize : FunctionPass<"affine-parallelize"> {
+  let summary = "Convert affine.for ops into 1-D affine.parallel";
+  let constructor = "mlir::createAffineParallelizePass()";
+}
+
 def SimplifyAffineStructures : FunctionPass<"simplify-affine-structures"> {
   let summary = "Simplify affine expressions in maps/sets and normalize "
                 "memrefs";
diff --git a/mlir/include/mlir/Dialect/Affine/Utils.h b/mlir/include/mlir/Dialect/Affine/Utils.h
index a2c0211b301e3..19df93f760f5e 100644
--- a/mlir/include/mlir/Dialect/Affine/Utils.h
+++ b/mlir/include/mlir/Dialect/Affine/Utils.h
@@ -15,9 +15,16 @@
 
 namespace mlir {
 
+class AffineForOp;
 class AffineIfOp;
+class AffineParallelOp;
 struct LogicalResult;
 
+/// Replaces parallel affine.for op with 1-d affine.parallel op.
+/// mlir::isLoopParallel detect the parallel affine.for ops.
+/// There is no cost model currently used to drive this parallelization.
+void affineParallelize(AffineForOp forOp);
+
 /// Hoists out affine.if/else to as high as possible, i.e., past all invariant
 /// affine.fors/parallel's. Returns success if any hoisting happened; folded` is
 /// set to true if the op was folded or erased. This hoisting could lead to
diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
new file mode 100644
index 0000000000000..b3651e2022458
--- /dev/null
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineParallelize.cpp
@@ -0,0 +1,50 @@
+//===- AffineParallelize.cpp - Affineparallelize Pass---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements a parallelizer for affine loop nests that is able to
+// perform inner or outer loop parallelization.
+//
+//===----------------------------------------------------------------------===//
+
+#include "PassDetail.h"
+#include "mlir/Analysis/AffineStructures.h"
+#include "mlir/Analysis/LoopAnalysis.h"
+#include "mlir/Analysis/Utils.h"
+#include "mlir/Dialect/Affine/IR/AffineOps.h"
+#include "mlir/Dialect/Affine/IR/AffineValueMap.h"
+#include "mlir/Dialect/Affine/Passes.h"
+#include "mlir/Dialect/Affine/Passes.h.inc"
+#include "mlir/Dialect/Affine/Utils.h"
+#include "mlir/Transforms/LoopUtils.h"
+#include "llvm/Support/Debug.h"
+
+#define DEBUG_TYPE "affine-parallel"
+
+using namespace mlir;
+
+namespace {
+/// Convert all parallel affine.for op into 1-D affine.parallel op.
+struct AffineParallelize : public AffineParallelizeBase<AffineParallelize> {
+  void runOnFunction() override;
+};
+} // namespace
+
+void AffineParallelize::runOnFunction() {
+  FuncOp f = getFunction();
+  SmallVector<AffineForOp, 8> parallelizableLoops;
+  f.walk([&](AffineForOp loop) {
+    if (isLoopParallel(loop))
+      parallelizableLoops.push_back(loop);
+  });
+  for (AffineForOp loop : parallelizableLoops)
+    affineParallelize(loop);
+}
+
+std::unique_ptr<OperationPass<FuncOp>> mlir::createAffineParallelizePass() {
+  return std::make_unique<AffineParallelize>();
+}
diff --git a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
index 0098c3e210914..dddcc93adf0d6 100644
--- a/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
+++ b/mlir/lib/Dialect/Affine/Transforms/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_mlir_dialect_library(MLIRAffineTransforms
   AffineDataCopyGeneration.cpp
   AffineLoopInvariantCodeMotion.cpp
+  AffineParallelize.cpp
   LoopTiling.cpp
   LoopUnroll.cpp
   LoopUnrollAndJam.cpp
@@ -17,6 +18,7 @@ add_mlir_dialect_library(MLIRAffineTransforms
 
   LINK_LIBS PUBLIC
   MLIRAffineOps
+  MLIRAffineUtils
   MLIREDSC
   MLIRIR
   MLIRPass
diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
index 57e6bce90844b..39e9cbc61e961 100644
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -129,6 +129,21 @@ static AffineIfOp hoistAffineIfOp(AffineIfOp ifOp, Operation *hoistOverOp) {
   return hoistedIfOp;
 }
 
+/// Replace affine.for with a 1-d affine.parallel and clone the former's body
+/// into the latter while remapping values.
+void mlir::affineParallelize(AffineForOp forOp) {
+  Location loc = forOp.getLoc();
+  OpBuilder outsideBuilder(forOp);
+  // Creating empty 1-D affine.parallel op.
+  AffineParallelOp newPloop = outsideBuilder.create<AffineParallelOp>(
+      loc, llvm::None, llvm::None, forOp.getLowerBoundMap(),
+      forOp.getLowerBoundOperands(), forOp.getUpperBoundMap(),
+      forOp.getUpperBoundOperands());
+  // Steal the body of the old affine for op and erase it.
+  newPloop.region().takeBody(forOp.region());
+  forOp.erase();
+}
+
 // Returns success if any hoisting happened.
 LogicalResult mlir::hoistAffineIfOp(AffineIfOp ifOp, bool *folded) {
   // Bail out early if the ifOp returns a result.  TODO: Consider how to
diff --git a/mlir/test/Dialect/Affine/parallelism-detection.mlir b/mlir/test/Dialect/Affine/parallelism-detection.mlir
deleted file mode 100644
index 0788e6f8fb208..0000000000000
--- a/mlir/test/Dialect/Affine/parallelism-detection.mlir
+++ /dev/null
@@ -1,47 +0,0 @@
-// RUN: mlir-opt -allow-unregistered-dialect %s -test-detect-parallel -split-input-file -verify-diagnostics | FileCheck %s
-
-// CHECK-LABEL: func @loop_nest_3d_outer_two_parallel
-func @loop_nest_3d_outer_two_parallel(%N : index) {
-  %0 = alloc() : memref<1024 x 1024 x vector<64xf32>>
-  %1 = alloc() : memref<1024 x 1024 x vector<64xf32>>
-  %2 = alloc() : memref<1024 x 1024 x vector<64xf32>>
-  affine.for %i = 0 to %N {
-    // expected-remark@-1 {{parallel loop}}
-    affine.for %j = 0 to %N {
-      // expected-remark@-1 {{parallel loop}}
-      affine.for %k = 0 to %N {
-        // expected-remark@-1 {{sequential loop}}
-        %5 = affine.load %0[%i, %k] : memref<1024x1024xvector<64xf32>>
-        %6 = affine.load %1[%k, %j] : memref<1024x1024xvector<64xf32>>
-        %7 = affine.load %2[%i, %j] : memref<1024x1024xvector<64xf32>>
-        %8 = mulf %5, %6 : vector<64xf32>
-        %9 = addf %7, %8 : vector<64xf32>
-        affine.store %9, %2[%i, %j] : memref<1024x1024xvector<64xf32>>
-      }
-    }
-  }
-  return
-}
-
-// -----
-
-// CHECK-LABEL: unknown_op_conservative
-func @unknown_op_conservative() {
-  affine.for %i = 0 to 10 {
-    // expected-remark@-1 {{sequential loop}}
-    "unknown"() : () -> ()
-  }
-  return
-}
-
-// -----
-
-// CHECK-LABEL: non_affine_load
-func @non_affine_load() {
-  %0 = alloc() : memref<100 x f32>
-  affine.for %i = 0 to 100 {
-    // expected-remark@-1 {{sequential loop}}
-    load %0[%i] : memref<100 x f32>
-  }
-  return
-}
diff --git a/mlir/test/Dialect/Affine/parallelize.mlir b/mlir/test/Dialect/Affine/parallelize.mlir
new file mode 100644
index 0000000000000..5287628185c5b
--- /dev/null
+++ b/mlir/test/Dialect/Affine/parallelize.mlir
@@ -0,0 +1,118 @@
+// RUN: mlir-opt %s -allow-unregistered-dialect -affine-parallelize| FileCheck %s
+
+// For multiple nested for-loops.
+// CHECK-DAG: [[MAP5:#map[0-9]+]] = affine_map<(d0, d1, d2, d3, d4, d5, d6, d7) -> (d0 + d1, d2 * 2 + d3, d4 * 2 + d5, d6 + d7)>
+// CHECK-LABEL:    func @reduce_window_max() {
+func @reduce_window_max() {
+  %cst = constant 0.000000e+00 : f32
+  %0 = alloc() : memref<1x8x8x64xf32>
+  %1 = alloc() : memref<1x18x18x64xf32>
+  affine.for %arg0 = 0 to 1 {
+    affine.for %arg1 = 0 to 8 {
+      affine.for %arg2 = 0 to 8 {
+        affine.for %arg3 = 0 to 64 {
+          affine.store %cst, %0[%arg0, %arg1, %arg2, %arg3] : memref<1x8x8x64xf32>
+        }
+      }
+    }
+  }
+  affine.for %arg0 = 0 to 1 {
+    affine.for %arg1 = 0 to 8 {
+      affine.for %arg2 = 0 to 8 {
+        affine.for %arg3 = 0 to 64 {
+          affine.for %arg4 = 0 to 1 {
+            affine.for %arg5 = 0 to 3 {
+              affine.for %arg6 = 0 to 3 {
+                affine.for %arg7 = 0 to 1 {
+                  %2 = affine.load %0[%arg0, %arg1, %arg2, %arg3] : memref<1x8x8x64xf32>
+                  %3 = affine.load %1[%arg0 + %arg4, %arg1 * 2 + %arg5, %arg2 * 2 + %arg6, %arg3 + %arg7] : memref<1x18x18x64xf32>
+                  %4 = cmpf "ogt", %2, %3 : f32
+                  %5 = select %4, %2, %3 : f32
+                  affine.store %5, %0[%arg0, %arg1, %arg2, %arg3] : memref<1x8x8x64xf32>
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+  return
+}
+
+// CHECK:        %[[cst:.*]] = constant 0.000000e+00 : f32
+// CHECK:        %[[v0:.*]] = alloc() : memref<1x8x8x64xf32>
+// CHECK:        %[[v1:.*]] = alloc() : memref<1x18x18x64xf32>
+// CHECK:        affine.parallel (%[[arg0:.*]]) = (0) to (1) {
+// CHECK:          affine.parallel (%[[arg1:.*]]) = (0) to (8) {
+// CHECK:            affine.parallel (%[[arg2:.*]]) = (0) to (8) {
+// CHECK:              affine.parallel (%[[arg3:.*]]) = (0) to (64) {
+// CHECK:                affine.store %[[cst]], %[[v0]][%[[arg0]], %[[arg1]], %[[arg2]], %[[arg3]]] : memref<1x8x8x64xf32>
+// CHECK:              }
+// CHECK:            }
+// CHECK:          }
+// CHECK:        }
+// CHECK:        affine.parallel (%[[a0:.*]]) = (0) to (1) {
+// CHECK:          affine.parallel (%[[a1:.*]]) = (0) to (8) {
+// CHECK:            affine.parallel (%[[a2:.*]]) = (0) to (8) {
+// CHECK:              affine.parallel (%[[a3:.*]]) = (0) to (64) {
+// CHECK:                affine.parallel (%[[a4:.*]]) = (0) to (1) {
+// CHECK:                  affine.for %[[a5:.*]] = 0 to 3 {
+// CHECK:                    affine.for %[[a6:.*]] = 0 to 3 {
+// CHECK:                      affine.parallel (%[[a7:.*]]) = (0) to (1) {
+// CHECK:                        %[[lhs:.*]] = affine.load %[[v0]][%[[a0]], %[[a1]], %[[a2]], %[[a3]]] : memref<1x8x8x64xf32>
+// CHECK:                        %[[rhs:.*]] = affine.load %[[v1]][%[[a0]] + %[[a4]], %[[a1]] * 2 + %[[a5]], %[[a2]] * 2 + %[[a6]], %[[a3]] + %[[a7]]] : memref<1x18x18x64xf32>
+// CHECK:                        %[[res:.*]] = cmpf "ogt", %[[lhs]], %[[rhs]] : f32
+// CHECK:                        %[[sel:.*]] = select %[[res]], %[[lhs]], %[[rhs]] : f32
+// CHECK:                        affine.store %[[sel]], %[[v0]][%[[a0]], %[[a1]], %[[a2]], %[[a3]]] : memref<1x8x8x64xf32>
+// CHECK:                      }
+// CHECK:                    }
+// CHECK:                  }
+// CHECK:                }
+// CHECK:              }
+// CHECK:            }
+// CHECK:          }
+// CHECK:        }
+// CHECK:      }
+
+func @loop_nest_3d_outer_two_parallel(%N : index) {
+  %0 = alloc() : memref<1024 x 1024 x vector<64xf32>>
+  %1 = alloc() : memref<1024 x 1024 x vector<64xf32>>
+  %2 = alloc() : memref<1024 x 1024 x vector<64xf32>>
+  affine.for %i = 0 to %N {
+    affine.for %j = 0 to %N {
+      %7 = affine.load %2[%i, %j] : memref<1024x1024xvector<64xf32>>
+      affine.for %k = 0 to %N {
+        %5 = affine.load %0[%i, %k] : memref<1024x1024xvector<64xf32>>
+        %6 = affine.load %1[%k, %j] : memref<1024x1024xvector<64xf32>>
+        %8 = mulf %5, %6 : vector<64xf32>
+        %9 = addf %7, %8 : vector<64xf32>
+        affine.store %9, %2[%i, %j] : memref<1024x1024xvector<64xf32>>
+      }
+    }
+  }
+  return
+}
+
+// CHECK:      affine.parallel (%[[arg1:.*]]) = (0) to (symbol(%arg0)) {
+// CHECK-NEXT:        affine.parallel (%[[arg2:.*]]) = (0) to (symbol(%arg0)) {
+// CHECK:          affine.for %[[arg3:.*]] = 0 to %arg0 {
+
+// CHECK-LABEL: unknown_op_conservative
+func @unknown_op_conservative() {
+  affine.for %i = 0 to 10 {
+// CHECK:  affine.for %[[arg1:.*]] = 0 to 10 {
+    "unknown"() : () -> ()
+  }
+  return
+}
+
+// CHECK-LABEL: non_affine_load
+func @non_affine_load() {
+  %0 = alloc() : memref<100 x f32>
+  affine.for %i = 0 to 100 {
+// CHECK:  affine.for %{{.*}} = 0 to 100 {
+    load %0[%i] : memref<100 x f32>
+  }
+  return
+}
diff --git a/mlir/test/lib/Dialect/Affine/CMakeLists.txt b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
index 68a0b06e0e318..3d08fed788e2e 100644
--- a/mlir/test/lib/Dialect/Affine/CMakeLists.txt
+++ b/mlir/test/lib/Dialect/Affine/CMakeLists.txt
@@ -3,7 +3,6 @@ add_mlir_library(MLIRAffineTransformsTestPasses
   TestAffineDataCopy.cpp
   TestAffineLoopUnswitching.cpp
   TestLoopPermutation.cpp
-  TestParallelismDetection.cpp
   TestVectorizationUtils.cpp
 
   EXCLUDE_FROM_LIBMLIR
diff --git a/mlir/test/lib/Dialect/Affine/TestParallelismDetection.cpp b/mlir/test/lib/Dialect/Affine/TestParallelismDetection.cpp
deleted file mode 100644
index b19e260316939..0000000000000
--- a/mlir/test/lib/Dialect/Affine/TestParallelismDetection.cpp
+++ /dev/null
@@ -1,47 +0,0 @@
-//===- ParallelismDetection.cpp - Parallelism Detection pass ------------*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements a pass to detect parallel affine 'affine.for' ops.
-//
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Analysis/Utils.h"
-#include "mlir/Dialect/Affine/IR/AffineOps.h"
-#include "mlir/IR/Builders.h"
-#include "mlir/Pass/Pass.h"
-
-using namespace mlir;
-
-namespace {
-
-struct TestParallelismDetection
-    : public PassWrapper<TestParallelismDetection, FunctionPass> {
-  void runOnFunction() override;
-};
-
-} // end anonymous namespace
-
-// Walks the function and emits a note for all 'affine.for' ops detected as
-// parallel.
-void TestParallelismDetection::runOnFunction() {
-  FuncOp f = getFunction();
-  OpBuilder b(f.getBody());
-  f.walk([&](AffineForOp forOp) {
-    if (isLoopParallel(forOp))
-      forOp.emitRemark("parallel loop");
-    else
-      forOp.emitRemark("sequential loop");
-  });
-}
-
-namespace mlir {
-void registerTestParallelismDetection() {
-  PassRegistration<TestParallelismDetection> pass(
-      "test-detect-parallel", "Test parallelism detection ");
-}
-} // namespace mlir
diff --git a/mlir/tools/mlir-opt/mlir-opt.cpp b/mlir/tools/mlir-opt/mlir-opt.cpp
index 0ce5fac1223c6..f749c7ad98adf 100644
--- a/mlir/tools/mlir-opt/mlir-opt.cpp
+++ b/mlir/tools/mlir-opt/mlir-opt.cpp
@@ -62,7 +62,6 @@ void registerTestMatchers();
 void registerTestMemRefDependenceCheck();
 void registerTestMemRefStrideCalculation();
 void registerTestOpaqueLoc();
-void registerTestParallelismDetection();
 void registerTestPreparationPassWithAllowedMemrefResults();
 void registerTestReducer();
 void registerTestGpuParallelLoopMappingPass();
@@ -138,7 +137,6 @@ void registerTestPasses() {
   registerTestMemRefDependenceCheck();
   registerTestMemRefStrideCalculation();
   registerTestOpaqueLoc();
-  registerTestParallelismDetection();
   registerTestPreparationPassWithAllowedMemrefResults();
   registerTestReducer();
   registerTestGpuParallelLoopMappingPass();

From 6e42a417bacbfd5a1f58b0ccb7c9b34ff9e54523 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Sat, 11 Jul 2020 12:42:05 -0400
Subject: [PATCH 024/771] [flang][openmp] Check clauses allowed semantic with
 tablegen generated map

Summary:
This patch is enabling the generation of clauses enum sets for semantics check in Flang through
tablegen. Enum sets and directive - sets map is generated by the new tablegen infrsatructure for OpenMP
and other directive languages.
The semantic checks for OpenMP are modified to use this newly generated map.

Reviewers: DavidTruby, sscalpone, kiranchandramohan, ichoyjx, jdoerfert

Reviewed By: DavidTruby, ichoyjx

Subscribers: mgorny, yaxunl, hiraditya, guansong, sstefan1, aaron.ballman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83326
---
 flang/lib/Semantics/check-omp-structure.cpp   | 494 +----------------
 flang/lib/Semantics/check-omp-structure.h     |  21 +-
 .../test/Semantics/omp-clause-validity01.f90  |   1 -
 .../llvm/Frontend/Directive/DirectiveBase.td  |   6 +
 .../llvm/Frontend/OpenMP/CMakeLists.txt       |   1 +
 llvm/include/llvm/Frontend/OpenMP/OMP.td      | 506 +++++++++++-------
 llvm/test/TableGen/directive1.td              |  55 ++
 llvm/test/TableGen/directive2.td              |  54 +-
 llvm/utils/TableGen/DirectiveEmitter.cpp      | 222 +++++++-
 llvm/utils/TableGen/TableGen.cpp              |  10 +-
 llvm/utils/TableGen/TableGenBackends.h        |   1 +
 11 files changed, 685 insertions(+), 686 deletions(-)

diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index b4e86faffe195..a5f65bcbc8044 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -13,58 +13,6 @@
 
 namespace Fortran::semantics {
 
-static OmpClauseSet doAllowedClauses{llvm::omp::Clause::OMPC_private,
-    llvm::omp::Clause::OMPC_firstprivate, llvm::omp::Clause::OMPC_lastprivate,
-    llvm::omp::Clause::OMPC_linear, llvm::omp::Clause::OMPC_reduction};
-static OmpClauseSet doAllowedOnceClauses{llvm::omp::Clause::OMPC_schedule,
-    llvm::omp::Clause::OMPC_collapse, llvm::omp::Clause::OMPC_ordered};
-
-static OmpClauseSet simdAllowedClauses{llvm::omp::Clause::OMPC_linear,
-    llvm::omp::Clause::OMPC_aligned, llvm::omp::Clause::OMPC_private,
-    llvm::omp::Clause::OMPC_lastprivate, llvm::omp::Clause::OMPC_reduction};
-static OmpClauseSet simdAllowedOnceClauses{llvm::omp::Clause::OMPC_collapse,
-    llvm::omp::Clause::OMPC_safelen, llvm::omp::Clause::OMPC_simdlen};
-
-static OmpClauseSet parallelAllowedClauses{llvm::omp::Clause::OMPC_default,
-    llvm::omp::Clause::OMPC_private, llvm::omp::Clause::OMPC_firstprivate,
-    llvm::omp::Clause::OMPC_shared, llvm::omp::Clause::OMPC_copyin,
-    llvm::omp::Clause::OMPC_reduction};
-static OmpClauseSet parallelAllowedOnceClauses{llvm::omp::Clause::OMPC_if,
-    llvm::omp::Clause::OMPC_num_threads, llvm::omp::Clause::OMPC_proc_bind};
-
-static OmpClauseSet taskloopAllowedClauses{llvm::omp::Clause::OMPC_shared,
-    llvm::omp::Clause::OMPC_private, llvm::omp::Clause::OMPC_firstprivate,
-    llvm::omp::Clause::OMPC_lastprivate, llvm::omp::Clause::OMPC_default,
-    llvm::omp::Clause::OMPC_untied, llvm::omp::Clause::OMPC_mergeable,
-    llvm::omp::Clause::OMPC_nogroup};
-static OmpClauseSet taskloopAllowedOnceClauses{llvm::omp::Clause::OMPC_collapse,
-    llvm::omp::Clause::OMPC_if, llvm::omp::Clause::OMPC_final,
-    llvm::omp::Clause::OMPC_priority};
-static OmpClauseSet taskloopAllowedExclusiveClauses{
-    llvm::omp::Clause::OMPC_grainsize, llvm::omp::Clause::OMPC_num_tasks};
-
-static OmpClauseSet distributeAllowedClauses{llvm::omp::Clause::OMPC_private,
-    llvm::omp::Clause::OMPC_firstprivate, llvm::omp::Clause::OMPC_lastprivate};
-static OmpClauseSet distributeAllowedOnceClauses{
-    llvm::omp::Clause::OMPC_collapse, llvm::omp::Clause::OMPC_dist_schedule};
-
-static OmpClauseSet targetAllowedClauses{llvm::omp::Clause::OMPC_if,
-    llvm::omp::Clause::OMPC_private, llvm::omp::Clause::OMPC_firstprivate,
-    llvm::omp::Clause::OMPC_map, llvm::omp::Clause::OMPC_is_device_ptr,
-    llvm::omp::Clause::OMPC_depend};
-static OmpClauseSet targetAllowedOnceClauses{llvm::omp::Clause::OMPC_device,
-    llvm::omp::Clause::OMPC_defaultmap, llvm::omp::Clause::OMPC_nowait};
-
-static OmpClauseSet teamsAllowedClauses{llvm::omp::Clause::OMPC_private,
-    llvm::omp::Clause::OMPC_firstprivate, llvm::omp::Clause::OMPC_shared,
-    llvm::omp::Clause::OMPC_reduction};
-static OmpClauseSet teamsAllowedOnceClauses{llvm::omp::Clause::OMPC_num_teams,
-    llvm::omp::Clause::OMPC_thread_limit, llvm::omp::Clause::OMPC_default};
-
-static OmpClauseSet sectionsAllowedClauses{llvm::omp::Clause::OMPC_private,
-    llvm::omp::Clause::OMPC_firstprivate, llvm::omp::Clause::OMPC_lastprivate,
-    llvm::omp::Clause::OMPC_reduction};
-
 std::string OmpStructureChecker::ContextDirectiveAsFortran() {
   auto dir = llvm::omp::getOpenMPDirectiveName(GetContext().directive).str();
   std::transform(dir.begin(), dir.end(), dir.begin(),
@@ -186,19 +134,18 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) {
     CheckMatching<parser::OmpLoopDirective>(beginLoopDir, *endLoopDir);
   }
 
-  if (beginDir.v != llvm::omp::Directive::OMPD_do)
-    PushContext(beginDir.source, beginDir.v);
+  if (beginDir.v != llvm::omp::Directive::OMPD_do) {
+    PushContextAndClauseSets(beginDir.source, beginDir.v);
+  } else {
+    // 2.7.1 do-clause -> private-clause |
+    //                    firstprivate-clause |
+    //                    lastprivate-clause |
+    //                    linear-clause |
+    //                    reduction-clause |
+    //                    schedule-clause |
+    //                    collapse-clause |
+    //                    ordered-clause
 
-  switch (beginDir.v) {
-  // 2.7.1 do-clause -> private-clause |
-  //                    firstprivate-clause |
-  //                    lastprivate-clause |
-  //                    linear-clause |
-  //                    reduction-clause |
-  //                    schedule-clause |
-  //                    collapse-clause |
-  //                    ordered-clause
-  case llvm::omp::Directive::OMPD_do: {
     // nesting check
     HasInvalidWorksharingNesting(beginDir.source,
         {llvm::omp::Directive::OMPD_do, llvm::omp::Directive::OMPD_sections,
@@ -210,218 +157,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPLoopConstruct &x) {
             llvm::omp::Directive::OMPD_ordered,
             llvm::omp::Directive::OMPD_atomic,
             llvm::omp::Directive::OMPD_master});
-    PushContext(beginDir.source, llvm::omp::Directive::OMPD_do);
-    SetContextAllowed(doAllowedClauses);
-    SetContextAllowedOnce(doAllowedOnceClauses);
-  } break;
-
-  // 2.11.1 parallel-do-clause -> parallel-clause |
-  //                              do-clause
-  case llvm::omp::Directive::OMPD_parallel_do: {
-    SetContextAllowed(parallelAllowedClauses | doAllowedClauses);
-    SetContextAllowedOnce(parallelAllowedOnceClauses | doAllowedOnceClauses);
-  } break;
-
-  // 2.8.1 simd-clause -> safelen-clause |
-  //                      simdlen-clause |
-  //                      linear-clause |
-  //                      aligned-clause |
-  //                      private-clause |
-  //                      lastprivate-clause |
-  //                      reduction-clause |
-  //                      collapse-clause
-  case llvm::omp::Directive::OMPD_simd: {
-    SetContextAllowed(simdAllowedClauses);
-    SetContextAllowedOnce(simdAllowedOnceClauses);
-  } break;
-
-  // 2.8.3 do-simd-clause -> do-clause |
-  //                         simd-clause
-  case llvm::omp::Directive::OMPD_do_simd: {
-    SetContextAllowed(doAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(doAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  // 2.11.4 parallel-do-simd-clause -> parallel-clause |
-  //                                   do-simd-clause
-  case llvm::omp::Directive::OMPD_parallel_do_simd: {
-    SetContextAllowed(
-        parallelAllowedClauses | doAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(parallelAllowedOnceClauses | doAllowedOnceClauses |
-        simdAllowedOnceClauses);
-  } break;
-
-  // 2.9.2 taskloop-clause -> if-clause |
-  //                          shared-clause |
-  //                          private-clause |
-  //                          firstprivate-clause |
-  //                          lastprivate-clause |
-  //                          default-clause |
-  //                          grainsize-clause |
-  //                          num-tasks-clause |
-  //                          collapse-clause |
-  //                          final-clause |
-  //                          priority-clause |
-  //                          untied-clause |
-  //                          mergeable-clause |
-  //                          nogroup-clause
-  case llvm::omp::Directive::OMPD_taskloop: {
-    SetContextAllowed(taskloopAllowedClauses);
-    SetContextAllowedOnce(taskloopAllowedOnceClauses);
-    SetContextAllowedExclusive(taskloopAllowedExclusiveClauses);
-  } break;
-
-  // 2.9.3 taskloop-simd-clause -> taskloop-clause |
-  //                               simd-clause
-  case llvm::omp::Directive::OMPD_taskloop_simd: {
-    SetContextAllowed((taskloopAllowedClauses | simdAllowedClauses) -
-        llvm::omp::Clause::OMPC_reduction);
-    SetContextAllowedOnce(taskloopAllowedOnceClauses | simdAllowedOnceClauses);
-    SetContextAllowedExclusive(taskloopAllowedExclusiveClauses);
-  } break;
-
-  // 2.10.8 distribute-clause -> private-clause |
-  //                             firstprivate-clause |
-  //                             lastprivate-clause |
-  //                             collapse-clause |
-  //                             dist-schedule-clause
-  case llvm::omp::Directive::OMPD_distribute: {
-    SetContextAllowed(distributeAllowedClauses);
-    SetContextAllowedOnce(distributeAllowedOnceClauses);
-  } break;
-
-  // 2.10.9 distribute-simd-clause -> distribute-clause |
-  //                                  simd-clause
-  case llvm::omp::Directive::OMPD_distribute_simd: {
-    SetContextAllowed(distributeAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(
-        distributeAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  // 2.10.10 distribute-parallel-do-clause -> distribute-clause |
-  //                                          parallel-do-clause
-  case llvm::omp::Directive::OMPD_distribute_parallel_do: {
-    SetContextAllowed(
-        distributeAllowedClauses | parallelAllowedClauses | doAllowedClauses);
-    SetContextAllowedOnce(distributeAllowedOnceClauses |
-        parallelAllowedOnceClauses | doAllowedOnceClauses);
-  } break;
-
-  // 2.10.11 distribute-parallel-do-simd-clause -> distribute-clause |
-  //                                               parallel-do-simd-clause
-  case llvm::omp::Directive::OMPD_distribute_parallel_do_simd: {
-    SetContextAllowed(distributeAllowedClauses | parallelAllowedClauses |
-        doAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(distributeAllowedOnceClauses |
-        parallelAllowedOnceClauses | doAllowedOnceClauses | simdAllowedClauses);
-  } break;
-
-  // 2.11.6 target-parallel-do-clause -> target-clause |
-  //                                     parallel-do-clause
-  case llvm::omp::Directive::OMPD_target_parallel_do: {
-    SetContextAllowed(
-        targetAllowedClauses | parallelAllowedClauses | doAllowedClauses);
-    SetContextAllowedOnce(
-        (targetAllowedOnceClauses | parallelAllowedOnceClauses |
-            doAllowedOnceClauses) -
-        llvm::omp::Clause::OMPC_nowait);
-  } break;
-
-  // 2.11.7 target-parallel-do-simd-clause -> target-clause |
-  //                                          parallel-do-simd-clause
-  case llvm::omp::Directive::OMPD_target_parallel_do_simd: {
-    SetContextAllowed(targetAllowedClauses | parallelAllowedClauses |
-        doAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(
-        (targetAllowedOnceClauses | parallelAllowedOnceClauses |
-            doAllowedOnceClauses | simdAllowedOnceClauses) -
-        llvm::omp::Clause::OMPC_nowait);
-  } break;
-
-  // 2.11.8 target-simd-clause -> target-clause |
-  //                              simd-clause
-  case llvm::omp::Directive::OMPD_target_simd: {
-    SetContextAllowed(targetAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  // 2.11.10 teams-distribute-clause -> teams-clause |
-  //                                    distribute-clause
-  case llvm::omp::Directive::OMPD_teams_distribute: {
-    SetContextAllowed(teamsAllowedClauses | distributeAllowedClauses);
-    SetContextAllowedOnce(
-        teamsAllowedOnceClauses | distributeAllowedOnceClauses);
-  } break;
-
-  // 2.11.11 teams-distribute-simd-clause -> teams-clause |
-  //                                         distribute-simd-clause
-  case llvm::omp::Directive::OMPD_teams_distribute_simd: {
-    SetContextAllowed(
-        teamsAllowedClauses | distributeAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  // 2.11.12 target-teams-distribute-clause -> target-clause |
-  //                                           teams-distribute-clause
-  case llvm::omp::Directive::OMPD_target_teams_distribute: {
-    SetContextAllowed(
-        targetAllowedClauses | teamsAllowedClauses | distributeAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses | teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses);
-  } break;
-
-  // 2.11.13 target-teams-distribute-simd-clause -> target-clause |
-  //                                                teams-distribute-simd-clause
-  case llvm::omp::Directive::OMPD_target_teams_distribute_simd: {
-    SetContextAllowed(targetAllowedClauses | teamsAllowedClauses |
-        distributeAllowedClauses | simdAllowedClauses);
-    SetContextAllowed(targetAllowedOnceClauses | teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  // 2.11.14 teams-distribute-parallel-do-clause -> teams-clause |
-  //                                                distribute-parallel-do-clause
-  case llvm::omp::Directive::OMPD_teams_distribute_parallel_do: {
-    SetContextAllowed(teamsAllowedClauses | distributeAllowedClauses |
-        parallelAllowedClauses | doAllowedClauses);
-    SetContextAllowedOnce(teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | parallelAllowedOnceClauses |
-        doAllowedOnceClauses);
-  } break;
-
-  // 2.11.15 target-teams-distribute-parallel-do-clause -> target-clause |
-  //                                                       teams-distribute-parallel-do-clause
-  case llvm::omp::Directive::OMPD_target_teams_distribute_parallel_do: {
-    SetContextAllowed(targetAllowedClauses | teamsAllowedClauses |
-        distributeAllowedClauses | parallelAllowedClauses | doAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses | teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | parallelAllowedOnceClauses |
-        doAllowedOnceClauses);
-  } break;
-
-  // 2.11.16 teams-distribute-parallel-do-clause -> teams-clause |
-  //                                                distribute-parallel-do-simd-clause
-  case llvm::omp::Directive::OMPD_teams_distribute_parallel_do_simd: {
-    SetContextAllowed(teamsAllowedClauses | distributeAllowedClauses |
-        parallelAllowedClauses | doAllowedClauses | simdAllowedClauses);
-    SetContextAllowedOnce(teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | parallelAllowedOnceClauses |
-        doAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  case llvm::omp::Directive::OMPD_target_teams_distribute_parallel_do_simd: {
-    SetContextAllowed(targetAllowedClauses | teamsAllowedClauses |
-        distributeAllowedClauses | parallelAllowedClauses | doAllowedClauses |
-        simdAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses | teamsAllowedOnceClauses |
-        distributeAllowedOnceClauses | parallelAllowedOnceClauses |
-        doAllowedOnceClauses | simdAllowedOnceClauses);
-  } break;
-
-  default:
-    // TODO others
-    break;
+    PushContextAndClauseSets(beginDir.source, llvm::omp::Directive::OMPD_do);
   }
 }
 
@@ -436,12 +172,8 @@ void OmpStructureChecker::Enter(const parser::OmpEndLoopDirective &x) {
   // 2.7.1 end-do -> END DO [nowait-clause]
   // 2.8.3 end-do-simd -> END DO SIMD [nowait-clause]
   case llvm::omp::Directive::OMPD_do:
-    SetContextDirectiveEnum(llvm::omp::Directive::OMPD_end_do);
-    SetContextAllowed(OmpClauseSet{llvm::omp::Clause::OMPC_nowait});
-    break;
   case llvm::omp::Directive::OMPD_do_simd:
-    SetContextDirectiveEnum(llvm::omp::Directive::OMPD_end_do_simd);
-    SetContextAllowed(OmpClauseSet{llvm::omp::Clause::OMPC_nowait});
+    SetClauseSets(dir.v);
     break;
   default:
     // no clauses are allowed
@@ -455,112 +187,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPBlockConstruct &x) {
   const auto &beginDir{
       CheckMatching<parser::OmpBlockDirective>(beginBlockDir, endBlockDir)};
 
-  PushContext(beginDir.source, beginDir.v);
-  switch (beginDir.v) {
-  // 2.5 parallel-clause -> if-clause |
-  //                        num-threads-clause |
-  //                        default-clause |
-  //                        private-clause |
-  //                        firstprivate-clause |
-  //                        shared-clause |
-  //                        copyin-clause |
-  //                        reduction-clause |
-  //                        proc-bind-clause
-  case llvm::omp::Directive::OMPD_parallel: {
-    // reserve for nesting check
-    SetContextAllowed(parallelAllowedClauses);
-    SetContextAllowedOnce(parallelAllowedOnceClauses);
-  } break;
-  // 2.7.3 single-clause -> private-clause |
-  //                        firstprivate-clause
-  case llvm::omp::Directive::OMPD_single:
-    SetContextAllowed({llvm::omp::Clause::OMPC_private,
-        llvm::omp::Clause::OMPC_firstprivate});
-    break;
-  // 2.7.4 workshare (no clauses are allowed)
-  case llvm::omp::Directive::OMPD_workshare:
-    break;
-  // 2.11.3 parallel-workshare-clause -> parallel-clause
-  case llvm::omp::Directive::OMPD_parallel_workshare: {
-    SetContextAllowed(parallelAllowedClauses);
-    SetContextAllowedOnce(parallelAllowedOnceClauses);
-  } break;
-    // 2.9.1 task-clause -> if-clause |
-    //                      final-clause |
-    //                      untied-clause |
-    //                      default-clause |
-    //                      mergeable-clause |
-    //                      private-clause |
-    //                      firstprivate-clause |
-    //                      shared-clause |
-    //                      depend-clause |
-    //                      priority-clause
-  case llvm::omp::Directive::OMPD_task: {
-    OmpClauseSet allowed{llvm::omp::Clause::OMPC_untied,
-        llvm::omp::Clause::OMPC_default, llvm::omp::Clause::OMPC_mergeable,
-        llvm::omp::Clause::OMPC_private, llvm::omp::Clause::OMPC_firstprivate,
-        llvm::omp::Clause::OMPC_shared, llvm::omp::Clause::OMPC_depend};
-    SetContextAllowed(allowed);
-    OmpClauseSet allowedOnce{llvm::omp::Clause::OMPC_if,
-        llvm::omp::Clause::OMPC_final, llvm::omp::Clause::OMPC_priority};
-    SetContextAllowedOnce(allowedOnce);
-  } break;
-  // 2.10.4 target-clause -> if-clause |
-  //                         device-clause |
-  //                         private-clause |
-  //                         firstprivate-clause |
-  //                         map-clause |
-  //                         is-device-ptr-clause |
-  //                         defaultmap-clause |
-  //                         nowait-clause |
-  //                         depend-clause
-  case llvm::omp::Directive::OMPD_target: {
-    SetContextAllowed(targetAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses);
-  } break;
-  // 2.10.7 teams-clause -> num-teams-clause |
-  //                        thread-limit-clause |
-  //                        default-clause |
-  //                        private-clause |
-  //                        firstprivate-clause |
-  //                        shared-clause |
-  //                        reduction-clause
-  case llvm::omp::Directive::OMPD_teams: {
-    SetContextAllowed(teamsAllowedClauses);
-    SetContextAllowedOnce(teamsAllowedOnceClauses);
-  } break;
-  // 2.11.9 target-teams -> target-clause |
-  //                        teams-clause
-  case llvm::omp::Directive::OMPD_target_teams: {
-    SetContextAllowed(targetAllowedClauses | teamsAllowedClauses);
-    SetContextAllowedOnce(targetAllowedOnceClauses | teamsAllowedOnceClauses);
-  } break;
-  // 2.10.1 target-data-clause -> if-clause |
-  //                              device-clause |
-  //                              map-clause |
-  //                              use-device-ptr-clause
-  case llvm::omp::Directive::OMPD_target_data: {
-    OmpClauseSet allowed{llvm::omp::Clause::OMPC_if,
-        llvm::omp::Clause::OMPC_map, llvm::omp::Clause::OMPC_use_device_ptr};
-    SetContextAllowed(allowed);
-    SetContextAllowedOnce({llvm::omp::Clause::OMPC_device});
-    SetContextRequired({llvm::omp::Clause::OMPC_map});
-  } break;
-  // 2.13.1 master (no clauses are allowed)
-  case llvm::omp::Directive::OMPD_master:
-    break;
-  // 2.11.5 target-parallel-clause -> target-clause |
-  //                                  parallel-clause
-  case llvm::omp::Directive::OMPD_target_parallel: {
-    SetContextAllowed((targetAllowedClauses | parallelAllowedClauses) -
-        llvm::omp::Clause::OMPC_copyin);
-    SetContextAllowedOnce(
-        targetAllowedOnceClauses | parallelAllowedOnceClauses);
-  } break;
-  default:
-    // TODO others
-    break;
-  }
+  PushContextAndClauseSets(beginDir.source, beginDir.v);
 }
 
 void OmpStructureChecker::Leave(const parser::OpenMPBlockConstruct &) {
@@ -574,25 +201,7 @@ void OmpStructureChecker::Enter(const parser::OpenMPSectionsConstruct &x) {
   const auto &beginDir{CheckMatching<parser::OmpSectionsDirective>(
       beginSectionsDir, endSectionsDir)};
 
-  PushContext(beginDir.source, beginDir.v);
-  switch (beginDir.v) {
-  // 2.7.2 sections-clause -> private-clause |
-  //                          firstprivate-clause |
-  //                          lastprivate-clause |
-  //                          reduction-clause
-  case llvm::omp::Directive::OMPD_sections: {
-    SetContextAllowed(sectionsAllowedClauses);
-  } break;
-    // 2.11.2 -> parallel-sections-clause -> parallel-clause |
-    //                                       sections-clause
-  case llvm::omp::Directive::OMPD_parallel_sections: {
-    SetContextAllowed(parallelAllowedClauses | sectionsAllowedClauses);
-    SetContextAllowedOnce(parallelAllowedOnceClauses);
-  } break;
-  default:
-    // TODO others
-    break;
-  }
+  PushContextAndClauseSets(beginDir.source, beginDir.v);
 }
 
 void OmpStructureChecker::Leave(const parser::OpenMPSectionsConstruct &) {
@@ -616,19 +225,7 @@ void OmpStructureChecker::Enter(const parser::OmpEndSectionsDirective &x) {
 
 void OmpStructureChecker::Enter(const parser::OpenMPDeclareSimdConstruct &x) {
   const auto &dir{std::get<parser::Verbatim>(x.t)};
-  PushContext(dir.source, llvm::omp::Directive::OMPD_declare_simd);
-  // 2.8.2 declare-simd-clause -> simdlen-clause |
-  //                              linear-clause |
-  //                              aligned-clause |
-  //                              uniform-clause |
-  //                              inbranch-clause |
-  //                              notinbranch-clause
-  OmpClauseSet allowed{llvm::omp::Clause::OMPC_linear,
-      llvm::omp::Clause::OMPC_aligned, llvm::omp::Clause::OMPC_uniform};
-  SetContextAllowed(allowed);
-  SetContextAllowedOnce({llvm::omp::Clause::OMPC_simdlen});
-  SetContextAllowedExclusive(
-      {llvm::omp::Clause::OMPC_inbranch, llvm::omp::Clause::OMPC_notinbranch});
+  PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_declare_simd);
 }
 
 void OmpStructureChecker::Leave(const parser::OpenMPDeclareSimdConstruct &) {
@@ -652,57 +249,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPDeclareTargetConstruct &) {
 void OmpStructureChecker::Enter(
     const parser::OpenMPSimpleStandaloneConstruct &x) {
   const auto &dir{std::get<parser::OmpSimpleStandaloneDirective>(x.t)};
-  PushContext(dir.source, dir.v);
-  switch (dir.v) {
-  case llvm::omp::Directive::OMPD_barrier: {
-    // 2.13.3 barrier
-  } break;
-  case llvm::omp::Directive::OMPD_taskwait: {
-    // 2.13.4 taskwait
-  } break;
-  case llvm::omp::Directive::OMPD_taskyield: {
-    // 2.9.4 taskyield
-  } break;
-  case llvm::omp::Directive::OMPD_target_enter_data: {
-    // 2.10.2 target-enter-data-clause -> if-clause |
-    //                                    device-clause |
-    //                                    map-clause |
-    //                                    depend-clause |
-    //                                    nowait-clause
-    OmpClauseSet allowed{llvm::omp::Clause::OMPC_map,
-        llvm::omp::Clause::OMPC_depend, llvm::omp::Clause::OMPC_nowait};
-    SetContextAllowed(allowed);
-    OmpClauseSet allowedOnce{
-        llvm::omp::Clause::OMPC_device, llvm::omp::Clause::OMPC_if};
-    SetContextAllowedOnce(allowedOnce);
-    SetContextRequired({llvm::omp::Clause::OMPC_map});
-  } break;
-  case llvm::omp::Directive::OMPD_target_exit_data: {
-    // 2.10.3  target-enter-data-clause -> if-clause |
-    //                                     device-clause |
-    //                                     map-clause |
-    //                                     depend-clause |
-    //                                     nowait-clause
-    OmpClauseSet allowed{llvm::omp::Clause::OMPC_map,
-        llvm::omp::Clause::OMPC_depend, llvm::omp::Clause::OMPC_nowait};
-    SetContextAllowed(allowed);
-    OmpClauseSet allowedOnce{
-        llvm::omp::Clause::OMPC_device, llvm::omp::Clause::OMPC_if};
-    SetContextAllowedOnce(allowedOnce);
-    SetContextRequired({llvm::omp::Clause::OMPC_map});
-  } break;
-  case llvm::omp::Directive::OMPD_target_update: {
-    // 2.10.5 target-update
-  } break;
-  case llvm::omp::Directive::OMPD_ordered: {
-    // 2.13.8 ordered-construct-clause -> depend-clause
-    OmpClauseSet allowed{llvm::omp::Clause::OMPC_depend};
-    SetContextAllowed(allowed);
-  } break;
-  default:
-    // TODO others
-    break;
-  }
+  PushContextAndClauseSets(dir.source, dir.v);
 }
 
 void OmpStructureChecker::Leave(
@@ -712,7 +259,7 @@ void OmpStructureChecker::Leave(
 
 void OmpStructureChecker::Enter(const parser::OpenMPFlushConstruct &x) {
   const auto &dir{std::get<parser::Verbatim>(x.t)};
-  PushContext(dir.source, llvm::omp::Directive::OMPD_flush);
+  PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_flush);
 }
 
 void OmpStructureChecker::Leave(const parser::OpenMPFlushConstruct &) {
@@ -721,7 +268,7 @@ void OmpStructureChecker::Leave(const parser::OpenMPFlushConstruct &) {
 
 void OmpStructureChecker::Enter(const parser::OpenMPCancelConstruct &x) {
   const auto &dir{std::get<parser::Verbatim>(x.t)};
-  PushContext(dir.source, llvm::omp::Directive::OMPD_cancel);
+  PushContextAndClauseSets(dir.source, llvm::omp::Directive::OMPD_cancel);
 }
 
 void OmpStructureChecker::Leave(const parser::OpenMPCancelConstruct &) {
@@ -731,7 +278,8 @@ void OmpStructureChecker::Leave(const parser::OpenMPCancelConstruct &) {
 void OmpStructureChecker::Enter(
     const parser::OpenMPCancellationPointConstruct &x) {
   const auto &dir{std::get<parser::Verbatim>(x.t)};
-  PushContext(dir.source, llvm::omp::Directive::OMPD_cancellation_point);
+  PushContextAndClauseSets(
+      dir.source, llvm::omp::Directive::OMPD_cancellation_point);
 }
 
 void OmpStructureChecker::Leave(
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index 1585b0c861add..eff0eb4aa76be 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -25,6 +25,9 @@ using OmpDirectiveSet = Fortran::common::EnumSet<llvm::omp::Directive,
 using OmpClauseSet =
     Fortran::common::EnumSet<llvm::omp::Clause, llvm::omp::Clause_enumSize>;
 
+#define GEN_FLANG_DIRECTIVE_CLAUSE_SETS
+#include "llvm/Frontend/OpenMP/OMP.cpp.inc"
+
 namespace llvm {
 namespace omp {
 static OmpDirectiveSet parallelSet{Directive::OMPD_distribute_parallel_do,
@@ -151,6 +154,9 @@ class OmpStructureChecker : public virtual BaseChecker {
   void Enter(const parser::OmpScheduleClause &);
 
 private:
+#define GEN_FLANG_DIRECTIVE_CLAUSE_MAP
+#include "llvm/Frontend/OpenMP/OMP.cpp.inc"
+
   struct OmpContext {
     OmpContext(parser::CharBlock source, llvm::omp::Directive d)
         : directiveSource{source}, directive{d} {}
@@ -216,7 +222,20 @@ class OmpStructureChecker : public virtual BaseChecker {
   void PushContext(const parser::CharBlock &source, llvm::omp::Directive dir) {
     ompContext_.emplace_back(source, dir);
   }
-
+  void SetClauseSets(llvm::omp::Directive dir) {
+    ompContext_.back().allowedClauses = directiveClausesTable[dir].allowed;
+    ompContext_.back().allowedOnceClauses =
+        directiveClausesTable[dir].allowedOnce;
+    ompContext_.back().allowedExclusiveClauses =
+        directiveClausesTable[dir].allowedExclusive;
+    ompContext_.back().requiredClauses =
+        directiveClausesTable[dir].requiredOneOf;
+  }
+  void PushContextAndClauseSets(
+      const parser::CharBlock &source, llvm::omp::Directive dir) {
+    PushContext(source, dir);
+    SetClauseSets(dir);
+  }
   void RequiresConstantPositiveParameter(
       const llvm::omp::Clause &clause, const parser::ScalarIntConstantExpr &i);
   void RequiresPositiveParameter(
diff --git a/flang/test/Semantics/omp-clause-validity01.f90 b/flang/test/Semantics/omp-clause-validity01.f90
index e3f43dc5445e6..77e40e323e5f9 100644
--- a/flang/test/Semantics/omp-clause-validity01.f90
+++ b/flang/test/Semantics/omp-clause-validity01.f90
@@ -458,7 +458,6 @@
   enddo
   !$omp end taskloop simd
 
-  !ERROR: REDUCTION clause is not allowed on the TASKLOOP SIMD directive
   !$omp taskloop simd reduction(+:a)
   do i = 1, N
      a = a + 3.14
diff --git a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
index 785a520613b96..3c295a1d7c5f3 100644
--- a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
+++ b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
@@ -43,6 +43,9 @@ class DirectiveLanguage {
   // Header file included in the implementation code generated. Ususally the
   // output file of the declaration code generation. Can be left blank.
   string includeHeader = "";
+
+  // EnumSet class name used for clauses to generated the allowed clauses map.
+  string clauseEnumSetClass = "";
 }
 
 // Information about a specific clause.
@@ -92,6 +95,9 @@ class Directive<string d> {
   // List of clauses that are allowed to appear only once.
   list<VersionedClause> allowedOnceClauses = [];
 
+  // List of clauses that are allowed but mutually exclusive.
+  list<VersionedClause> allowedExclusiveClauses = [];
+
   // List of clauses that are required.
   list<VersionedClause> requiredClauses = [];
 
diff --git a/llvm/include/llvm/Frontend/OpenMP/CMakeLists.txt b/llvm/include/llvm/Frontend/OpenMP/CMakeLists.txt
index 69f503675940d..3ff89888bfd64 100644
--- a/llvm/include/llvm/Frontend/OpenMP/CMakeLists.txt
+++ b/llvm/include/llvm/Frontend/OpenMP/CMakeLists.txt
@@ -1,3 +1,4 @@
 set(LLVM_TARGET_DEFINITIONS OMP.td)
 tablegen(LLVM OMP.h.inc --gen-directive-decl)
+tablegen(LLVM OMP.cpp.inc --gen-directive-gen)
 add_public_tablegen_target(omp_gen)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index bd81eeb011272..a565bdf90b3f6 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -24,6 +24,7 @@ def OpenMP : DirectiveLanguage {
   let makeEnumAvailableInNamespace = 1;
   let enableBitmaskEnumInNamespace = 1;
   let includeHeader = "llvm/Frontend/OpenMP/OMP.h.inc";
+  let clauseEnumSetClass = "OmpClauseSet";
 }
 
 //===----------------------------------------------------------------------===//
@@ -201,10 +202,7 @@ def OMPC_Notinbranch : Clause<"notinbranch"> {}
 def OMP_ThreadPrivate : Directive<"threadprivate"> {}
 def OMP_Parallel : Directive<"parallel"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_Shared>,
@@ -212,11 +210,14 @@ def OMP_Parallel : Directive<"parallel"> {
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_Allocate>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_NumThreads>,
+    VersionedClause<OMPC_ProcBind>,
+  ];
 }
 def OMP_Task : Directive<"task"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
@@ -224,12 +225,16 @@ def OMP_Task : Directive<"task"> {
     VersionedClause<OMPC_Untied>,
     VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_Depend>,
-    VersionedClause<OMPC_Priority>,
     VersionedClause<OMPC_InReduction>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Detach, 50>,
     VersionedClause<OMPC_Affinity, 50>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_Final>,
+    VersionedClause<OMPC_Priority>
+  ];
 }
 def OMP_Simd : Directive<"simd"> {
   let allowedClauses = [
@@ -237,15 +242,17 @@ def OMP_Simd : Directive<"simd"> {
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_Aligned>,
-    VersionedClause<OMPC_SafeLen>,
-    VersionedClause<OMPC_SimdLen>,
-    VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_If, 50>,
     VersionedClause<OMPC_NonTemporal, 50>,
     VersionedClause<OMPC_Order, 50>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Collapse>,
+    VersionedClause<OMPC_SafeLen>,
+    VersionedClause<OMPC_SimdLen>,
+    VersionedClause<OMPC_If, 50>,
+  ];
 }
 def OMP_For : Directive<"for"> {
   let allowedClauses = [
@@ -273,7 +280,8 @@ def OMP_Do : Directive<"do"> {
   let allowedOnceClauses = [
     VersionedClause<OMPC_Schedule>,
     VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_Ordered>
+    VersionedClause<OMPC_Ordered>,
+    VersionedClause<OMPC_NoWait>
   ];
 }
 def OMP_Sections : Directive<"sections"> {
@@ -345,30 +353,34 @@ def OMP_Atomic : Directive<"atomic"> {
 def OMP_Target : Directive<"target"> {
   let allowedClauses = [
     VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_Map>,
     VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Depend>,
-    VersionedClause<OMPC_DefaultMap>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_IsDevicePtr>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_UsesAllocators, 50>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Device>,
+    VersionedClause<OMPC_DefaultMap>,
+    VersionedClause<OMPC_NoWait>
+  ];
 }
 def OMP_Teams : Directive<"teams"> {
   let allowedClauses = [
-    VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_NumTeams>,
-    VersionedClause<OMPC_ThreadLimit>,
     VersionedClause<OMPC_Allocate>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Default>,
+    VersionedClause<OMPC_NumTeams>,
+    VersionedClause<OMPC_ThreadLimit>
+  ];
 }
 def OMP_Cancel : Directive<"cancel"> {
   let allowedClauses = [
@@ -386,50 +398,64 @@ def OMP_Requires : Directive<"requires"> {
 }
 def OMP_TargetData : Directive<"target data"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_Map>,
     VersionedClause<OMPC_UseDevicePtr>,
     VersionedClause<OMPC_UseDeviceAddr, 50>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Device>,
+    VersionedClause<OMPC_If>
+  ];
+  let requiredClauses = [
+    VersionedClause<OMPC_Map>
+  ];
 }
 def OMP_TargetEnterData : Directive<"target enter data"> {
   let allowedClauses = [
+    VersionedClause<OMPC_Depend>,
+    VersionedClause<OMPC_Map>
+  ];
+  let allowedOnceClauses = [
     VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Device>,
-    VersionedClause<OMPC_Map>,
-    VersionedClause<OMPC_NoWait>,
-    VersionedClause<OMPC_Depend>
+    VersionedClause<OMPC_NoWait>
   ];
 }
 def OMP_TargetExitData : Directive<"target exit data"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_Depend>,
+    VersionedClause<OMPC_Map>
+  ];
+  let allowedOnceClauses = [
     VersionedClause<OMPC_Device>,
-    VersionedClause<OMPC_Map>,
-    VersionedClause<OMPC_NoWait>,
-    VersionedClause<OMPC_Depend>
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_NoWait>
+  ];
+  let requiredClauses = [
+    VersionedClause<OMPC_Map>
   ];
 }
 def OMP_TargetParallel : Directive<"target parallel"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_Map>,
     VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Depend>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
-    VersionedClause<OMPC_DefaultMap>,
-    VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_IsDevicePtr>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_UsesAllocators, 50>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_DefaultMap>,
+    VersionedClause<OMPC_Device>,
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_NumThreads>,
+    VersionedClause<OMPC_ProcBind>
+  ];
 }
 def OMP_TargetParallelFor : Directive<"target parallel for"> {
   let allowedClauses = [
@@ -459,27 +485,31 @@ def OMP_TargetParallelFor : Directive<"target parallel for"> {
 }
 def OMP_TargetParallelDo : Directive<"target parallel do"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_Map>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_LastPrivate>,
-    VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Depend>,
-    VersionedClause<OMPC_DefaultMap>,
-    VersionedClause<OMPC_NumThreads>,
-    VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_Schedule>,
-    VersionedClause<OMPC_Ordered>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_IsDevicePtr>,
     VersionedClause<OMPC_Allocator>,
     VersionedClause<OMPC_Order>,
-    VersionedClause<OMPC_UsesAllocators>
+    VersionedClause<OMPC_UsesAllocators>,
+    VersionedClause<OMPC_Default>,
+    VersionedClause<OMPC_Copyin>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_NumThreads>,
+    VersionedClause<OMPC_ProcBind>,
+    VersionedClause<OMPC_Device>,
+    VersionedClause<OMPC_DefaultMap>,
+    VersionedClause<OMPC_Schedule>,
+    VersionedClause<OMPC_Collapse>,
+    VersionedClause<OMPC_Ordered>,
+    VersionedClause<OMPC_NoWait>
   ];
 }
 def OMP_TargetUpdate : Directive<"target update"> {
@@ -558,27 +588,29 @@ def OMP_ParallelForSimd : Directive<"parallel for simd"> {
 }
 def OMP_ParallelDoSimd : Directive<"parallel do simd"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_LastPrivate>,
-    VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_Schedule>,
-    VersionedClause<OMPC_SafeLen>,
-    VersionedClause<OMPC_SimdLen>,
     VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_Aligned>,
-    VersionedClause<OMPC_Ordered>,
     VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal>,
     VersionedClause<OMPC_Order>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_NumThreads>,
+    VersionedClause<OMPC_ProcBind>,
+    VersionedClause<OMPC_Schedule>,
+    VersionedClause<OMPC_Ordered>,
+    VersionedClause<OMPC_Collapse>,
+    VersionedClause<OMPC_SafeLen>,
+    VersionedClause<OMPC_SimdLen>
+  ];
 }
 def OMP_ParallelMaster : Directive<"parallel master"> {
   let allowedClauses = [
@@ -597,7 +629,6 @@ def OMP_ParallelMaster : Directive<"parallel master"> {
 def OMP_ParallelSections : Directive<"parallel sections"> {
   let allowedClauses = [
     VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Private>,
@@ -608,6 +639,9 @@ def OMP_ParallelSections : Directive<"parallel sections"> {
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Allocate>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_NumThreads>
+  ];
 }
 def OMP_ForSimd : Directive<"for simd"> {
   let allowedClauses = [
@@ -643,7 +677,8 @@ def OMP_DoSimd : Directive<"do simd"> {
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_Ordered>,
     VersionedClause<OMPC_SafeLen>,
-    VersionedClause<OMPC_SimdLen>
+    VersionedClause<OMPC_SimdLen>,
+    VersionedClause<OMPC_NoWait>
   ];
 }
 def OMP_CancellationPoint : Directive<"cancellation point"> {}
@@ -653,53 +688,74 @@ def OMP_DeclareMapper : Directive<"declare mapper"> {
     VersionedClause<OMPC_Map>
   ];
 }
-def OMP_DeclareSimd : Directive<"declare simd"> {}
+def OMP_DeclareSimd : Directive<"declare simd"> {
+  let allowedClauses = [
+    VersionedClause<OMPC_Linear>,
+    VersionedClause<OMPC_Aligned>,
+    VersionedClause<OMPC_Uniform>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_SimdLen>
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<OMPC_Inbranch>,
+    VersionedClause<OMPC_Notinbranch>
+  ];
+}
 def OMP_TaskLoop : Directive<"taskloop"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_Final>,
     VersionedClause<OMPC_Untied>,
     VersionedClause<OMPC_Mergeable>,
-    VersionedClause<OMPC_Priority>,
-    VersionedClause<OMPC_GrainSize>,
     VersionedClause<OMPC_NoGroup>,
-    VersionedClause<OMPC_NumTasks>,
     VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_InReduction>,
     VersionedClause<OMPC_Allocate>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_Collapse>,
+    VersionedClause<OMPC_Final>,
+    VersionedClause<OMPC_Priority>,
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<OMPC_GrainSize>,
+    VersionedClause<OMPC_NumTasks>
+  ];
 }
 def OMP_TaskLoopSimd : Directive<"taskloop simd"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_Private>,
+    VersionedClause<OMPC_Aligned>,
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_InReduction>,
     VersionedClause<OMPC_LastPrivate>,
-    VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_Final>,
-    VersionedClause<OMPC_Untied>,
-    VersionedClause<OMPC_Mergeable>,
-    VersionedClause<OMPC_Priority>,
     VersionedClause<OMPC_Linear>,
-    VersionedClause<OMPC_Aligned>,
-    VersionedClause<OMPC_SafeLen>,
-    VersionedClause<OMPC_SimdLen>,
-    VersionedClause<OMPC_GrainSize>,
+    VersionedClause<OMPC_Mergeable>,
     VersionedClause<OMPC_NoGroup>,
-    VersionedClause<OMPC_NumTasks>,
-    VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_InReduction>,
-    VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_NonTemporal, 50>,
     VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_Private>,
+    VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_Untied>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_Collapse>,
+    VersionedClause<OMPC_SafeLen>,
+    VersionedClause<OMPC_SimdLen>,
+    VersionedClause<OMPC_Final>,
+    VersionedClause<OMPC_Priority>
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<OMPC_GrainSize>,
+    VersionedClause<OMPC_NumTasks>
   ];
 }
 def OMP_Distribute : Directive<"distribute"> {
@@ -707,10 +763,12 @@ def OMP_Distribute : Directive<"distribute"> {
     VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_LastPrivate>,
-    VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_DistSchedule>,
     VersionedClause<OMPC_Allocate>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Collapse>,
+    VersionedClause<OMPC_DistSchedule>
+  ];
 }
 def OMP_DeclareTarget : Directive<"declare target"> {}
 def OMP_EndDeclareTarget : Directive<"end declare target"> {}
@@ -735,21 +793,25 @@ def OMP_DistributeParallelFor : Directive<"distribute parallel for"> {
 }
 def OMP_DistributeParallelDo : Directive<"distribute parallel do"> {
   let allowedClauses = [
+    VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_Order>,
+    VersionedClause<OMPC_Default>,
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Copyin>,
+    VersionedClause<OMPC_Linear>
+  ];
+  let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_DistSchedule>,
     VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NumThreads>,
-    VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_ProcBind>,
-    VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_Copyin>,
     VersionedClause<OMPC_Schedule>,
-    VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Order>
+    VersionedClause<OMPC_Ordered>
   ];
 }
 def OMP_DistributeParallelForSimd : Directive<"distribute parallel for simd"> {
@@ -802,22 +864,31 @@ def OMP_DistributeParallelDoSimd : Directive<"distribute parallel do simd"> {
 }
 def OMP_DistributeSimd : Directive<"distribute simd"> {
   let allowedClauses = [
-    VersionedClause<OMPC_Private>,
+    VersionedClause<OMPC_Aligned>,
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_Copyin>,
+    VersionedClause<OMPC_Default>,
+    VersionedClause<OMPC_Linear>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_NonTemporal, 50>,
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_Private>,
+    VersionedClause<OMPC_Reduction>
+  ];
+  let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_Linear>,
-    VersionedClause<OMPC_Aligned>,
-    VersionedClause<OMPC_SafeLen>,
-    VersionedClause<OMPC_SimdLen>,
-    VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_If, 50>,
-    VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_NumThreads>,
+    VersionedClause<OMPC_Ordered>,
+    VersionedClause<OMPC_ProcBind>,
+    VersionedClause<OMPC_Schedule>,
+    VersionedClause<OMPC_SafeLen>,
+    VersionedClause<OMPC_SimdLen>
   ];
 }
+
 def OMP_TargetParallelForSimd : Directive<"target parallel for simd"> {
   let allowedClauses = [
     VersionedClause<OMPC_If>,
@@ -880,27 +951,33 @@ def OMP_TargetParallelDoSimd : Directive<"target parallel do simd"> {
 }
 def OMP_TargetSimd : Directive<"target simd"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Device>,
-    VersionedClause<OMPC_Map>,
-    VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_NoWait>,
+    VersionedClause<OMPC_Aligned>,
+    VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Depend>,
-    VersionedClause<OMPC_DefaultMap>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_IsDevicePtr>,
     VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Linear>,
-    VersionedClause<OMPC_Aligned>,
-    VersionedClause<OMPC_SafeLen>,
-    VersionedClause<OMPC_SimdLen>,
-    VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_Map>,
     VersionedClause<OMPC_NonTemporal, 50>,
+    VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_Private>,
+    VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_UsesAllocators, 50>
   ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Collapse>,
+    VersionedClause<OMPC_SafeLen>,
+    VersionedClause<OMPC_SimdLen>,
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_NumThreads>,
+    VersionedClause<OMPC_ProcBind>,
+    VersionedClause<OMPC_Device>,
+    VersionedClause<OMPC_DefaultMap>,
+    VersionedClause<OMPC_Schedule>
+  ];
 }
 def OMP_TeamsDistribute : Directive<"teams distribute"> {
   let allowedClauses = [
@@ -919,26 +996,29 @@ def OMP_TeamsDistribute : Directive<"teams distribute"> {
 }
 def OMP_TeamsDistributeSimd : Directive<"teams distribute simd"> {
   let allowedClauses = [
-    VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_Private>,
+    VersionedClause<OMPC_Aligned>,
+    VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_FirstPrivate>,
-    VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_NumTeams>,
-    VersionedClause<OMPC_ThreadLimit>,
     VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_Linear>,
+    VersionedClause<OMPC_NonTemporal, 50>,
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_Private>,
+    VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Shared>
+  ];
+  let allowedOnceClauses = [
     VersionedClause<OMPC_Collapse>,
+    VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_Linear>,
-    VersionedClause<OMPC_Aligned>,
+    VersionedClause<OMPC_If, 50>,
+    VersionedClause<OMPC_NumTeams>,
     VersionedClause<OMPC_SafeLen>,
     VersionedClause<OMPC_SimdLen>,
-    VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_If, 50>,
-    VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>
+    VersionedClause<OMPC_ThreadLimit>
   ];
 }
+
 def OMP_TeamsDistributeParallelForSimd :
     Directive<"teams distribute parallel for simd"> {
   let allowedClauses = [
@@ -968,27 +1048,29 @@ def OMP_TeamsDistributeParallelForSimd :
 def OMP_TeamsDistributeParallelDoSimd :
     Directive<"teams distribute parallel do simd"> {
   let allowedClauses = [
+    VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Linear>,
+    VersionedClause<OMPC_Order>,
+    VersionedClause<OMPC_Aligned>,
+    VersionedClause<OMPC_NonTemporal>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Default>,
+    VersionedClause<OMPC_NumTeams>,
+    VersionedClause<OMPC_ThreadLimit>,
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NumThreads>,
-    VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_ProcBind>,
-    VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_Reduction>,
     VersionedClause<OMPC_Schedule>,
-    VersionedClause<OMPC_Linear>,
-    VersionedClause<OMPC_Aligned>,
     VersionedClause<OMPC_SafeLen>,
     VersionedClause<OMPC_SimdLen>,
-    VersionedClause<OMPC_NumTeams>,
-    VersionedClause<OMPC_ThreadLimit>,
-    VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_NonTemporal>,
-    VersionedClause<OMPC_Order>
+    VersionedClause<OMPC_If>,
   ];
 }
 def OMP_TeamsDistributeParallelFor :
@@ -1016,68 +1098,78 @@ def OMP_TeamsDistributeParallelFor :
 def OMP_TeamsDistributeParallelDo :
     Directive<"teams distribute parallel do"> {
   let allowedClauses = [
+    VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_Copyin>,
+    VersionedClause<OMPC_Linear>
+  ];
+let allowedOnceClauses = [
+    VersionedClause<OMPC_NumTeams>,
+    VersionedClause<OMPC_ThreadLimit>,
+    VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_DistSchedule>,
+    VersionedClause<OMPC_Ordered>,
+    VersionedClause<OMPC_Order>,
     VersionedClause<OMPC_If>,
     VersionedClause<OMPC_NumThreads>,
-    VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_ProcBind>,
-    VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_Shared>,
-    VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_Schedule>,
-    VersionedClause<OMPC_NumTeams>,
-    VersionedClause<OMPC_ThreadLimit>,
-    VersionedClause<OMPC_Copyin>,
-    VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Order>
+    VersionedClause<OMPC_Schedule>
   ];
 }
 def OMP_TargetTeams : Directive<"target teams"> {
   let allowedClauses = [
     VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_Map>,
     VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Depend>,
-    VersionedClause<OMPC_DefaultMap>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_IsDevicePtr>,
-    VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
-    VersionedClause<OMPC_NumTeams>,
-    VersionedClause<OMPC_ThreadLimit>,
     VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_Shared>
+  ];
+
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Device>,
+    VersionedClause<OMPC_NoWait>,
+    VersionedClause<OMPC_DefaultMap>,
+    VersionedClause<OMPC_Default>,
+    VersionedClause<OMPC_NumTeams>,
+    VersionedClause<OMPC_ThreadLimit>
   ];
 }
 def OMP_TargetTeamsDistribute : Directive<"target teams distribute"> {
   let allowedClauses = [
     VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_Map>,
     VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Depend>,
-    VersionedClause<OMPC_DefaultMap>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_IsDevicePtr>,
-    VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_UsesAllocators, 50>,
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_LastPrivate>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Device>,
+    VersionedClause<OMPC_NoWait>,
+    VersionedClause<OMPC_DefaultMap>,
+    VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_NumTeams>,
     VersionedClause<OMPC_ThreadLimit>,
-    VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Collapse>,
-    VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_UsesAllocators, 50>
+    VersionedClause<OMPC_DistSchedule>
   ];
 }
+
 def OMP_TargetTeamsDistributeParallelFor :
     Directive<"target teams distribute parallel for"> {
   let allowedClauses = [
@@ -1110,28 +1202,33 @@ def OMP_TargetTeamsDistributeParallelDo :
     Directive<"target teams distribute parallel do"> {
   let allowedClauses = [
     VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_Map>,
     VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Depend>,
-    VersionedClause<OMPC_DefaultMap>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_IsDevicePtr>,
-    VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_UsesAllocators>,
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_Copyin>,
+    VersionedClause<OMPC_Linear>,
+    VersionedClause<OMPC_Ordered>,
+    VersionedClause<OMPC_Order>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Device>,
+    VersionedClause<OMPC_DefaultMap>,
+    VersionedClause<OMPC_NoWait>,
+    VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_NumTeams>,
     VersionedClause<OMPC_ThreadLimit>,
-    VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_DistSchedule>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Schedule>,
-    VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_Order>,
-    VersionedClause<OMPC_UsesAllocators>
   ];
 }
 def OMP_TargetTeamsDistributeParallelForSimd :
@@ -1170,63 +1267,69 @@ def OMP_TargetTeamsDistributeParallelForSimd :
 def OMP_TargetTeamsDistributeParallelDoSimd :
     Directive<"target teams distribute parallel do simd"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Device>,
     VersionedClause<OMPC_Map>,
     VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_Depend>,
-    VersionedClause<OMPC_DefaultMap>,
     VersionedClause<OMPC_FirstPrivate>,
     VersionedClause<OMPC_IsDevicePtr>,
-    VersionedClause<OMPC_Default>,
-    VersionedClause<OMPC_Shared>,
     VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_UsesAllocators>,
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_Copyin>,
+    VersionedClause<OMPC_Linear>,
+    VersionedClause<OMPC_Ordered>,
+    VersionedClause<OMPC_Order>,
+    VersionedClause<OMPC_Aligned>,
+    VersionedClause<OMPC_NonTemporal>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_Device>,
+    VersionedClause<OMPC_NoWait>,
+    VersionedClause<OMPC_DefaultMap>,
+    VersionedClause<OMPC_Default>,
     VersionedClause<OMPC_NumTeams>,
     VersionedClause<OMPC_ThreadLimit>,
-    VersionedClause<OMPC_LastPrivate>,
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_DistSchedule>,
     VersionedClause<OMPC_NumThreads>,
     VersionedClause<OMPC_ProcBind>,
     VersionedClause<OMPC_Schedule>,
-    VersionedClause<OMPC_Linear>,
-    VersionedClause<OMPC_Aligned>,
     VersionedClause<OMPC_SafeLen>,
-    VersionedClause<OMPC_SimdLen>,
-    VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_NonTemporal>,
-    VersionedClause<OMPC_Order>,
-    VersionedClause<OMPC_UsesAllocators>
+    VersionedClause<OMPC_SimdLen>
   ];
 }
 def OMP_TargetTeamsDistributeSimd :
     Directive<"target teams distribute simd"> {
   let allowedClauses = [
-    VersionedClause<OMPC_If>,
-    VersionedClause<OMPC_Device>,
-    VersionedClause<OMPC_Map>,
-    VersionedClause<OMPC_Private>,
-    VersionedClause<OMPC_NoWait>,
+    VersionedClause<OMPC_Aligned>,
+    VersionedClause<OMPC_Allocate>,
     VersionedClause<OMPC_Depend>,
-    VersionedClause<OMPC_DefaultMap>,
     VersionedClause<OMPC_FirstPrivate>,
-    VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_If>,
     VersionedClause<OMPC_IsDevicePtr>,
-    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_LastPrivate>,
+    VersionedClause<OMPC_Linear>,
+    VersionedClause<OMPC_Map>,
+    VersionedClause<OMPC_NonTemporal, 50>,
+    VersionedClause<OMPC_Order, 50>,
+    VersionedClause<OMPC_Private>,
     VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Shared>,
+    VersionedClause<OMPC_UsesAllocators, 50>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_Device>,
+    VersionedClause<OMPC_DefaultMap>,
+    VersionedClause<OMPC_NoWait>,
     VersionedClause<OMPC_NumTeams>,
     VersionedClause<OMPC_ThreadLimit>,
     VersionedClause<OMPC_Collapse>,
     VersionedClause<OMPC_DistSchedule>,
-    VersionedClause<OMPC_Linear>,
-    VersionedClause<OMPC_Aligned>,
     VersionedClause<OMPC_SafeLen>,
     VersionedClause<OMPC_SimdLen>,
-    VersionedClause<OMPC_Allocate>,
-    VersionedClause<OMPC_NonTemporal, 50>,
-    VersionedClause<OMPC_Order, 50>,
-    VersionedClause<OMPC_UsesAllocators, 50>
   ];
 }
 def OMP_Allocate : Directive<"allocate"> {
@@ -1359,7 +1462,22 @@ def OMP_Scan : Directive<"scan"> {
 }
 def OMP_BeginDeclareVariant : Directive<"begin declare variant"> {}
 def OMP_EndDeclareVariant : Directive<"end declare variant"> {}
-def OMP_ParallelWorkshare : Directive<"parallel workshare"> {}
+def OMP_ParallelWorkshare : Directive<"parallel workshare"> {
+  let allowedClauses = [
+    VersionedClause<OMPC_Allocate>,
+    VersionedClause<OMPC_Copyin>,
+    VersionedClause<OMPC_Default>,
+    VersionedClause<OMPC_FirstPrivate>,
+    VersionedClause<OMPC_Private>,
+    VersionedClause<OMPC_Reduction>,
+    VersionedClause<OMPC_Shared>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<OMPC_If>,
+    VersionedClause<OMPC_NumThreads>,
+    VersionedClause<OMPC_ProcBind>
+  ];
+}
 def OMP_Workshare : Directive<"workshare"> {}
 def OMP_EndDo : Directive<"end do"> {}
 def OMP_EndDoSimd : Directive<"end do simd"> {}
diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td
index 8b3cc8702bd49..b293196d4d556 100644
--- a/llvm/test/TableGen/directive1.td
+++ b/llvm/test/TableGen/directive1.td
@@ -1,5 +1,6 @@
 // RUN: llvm-tblgen -gen-directive-decl -I %p/../../include %s | FileCheck -match-full-lines %s
 // RUN: llvm-tblgen -gen-directive-impl -I %p/../../include %s | FileCheck -match-full-lines %s -check-prefix=IMPL
+// RUN: llvm-tblgen -gen-directive-gen -I %p/../../include %s | FileCheck -match-full-lines %s -check-prefix=GEN
 
 include "llvm/Frontend/Directive/DirectiveBase.td"
 
@@ -126,3 +127,57 @@ def TDL_DirA : Directive<"dira"> {
 // IMPL-NEXT:    }
 // IMPL-NEXT:    llvm_unreachable("Invalid Tdl Directive kind");
 // IMPL-NEXT:  }
+// IMPL-EMPTY:
+
+
+
+// GEN:       #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS
+// GEN-NEXT:  #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS
+// GEN-EMPTY:
+// GEN-NEXT:  namespace llvm {
+// GEN-NEXT:  namespace tdl {
+// GEN-EMPTY:
+// GEN-NEXT:    // Sets for dira
+// GEN-EMPTY:
+// GEN-NEXT:    static  allowedClauses_TDLD_dira {
+// GEN-NEXT:      llvm::tdl::Clause::TDLC_clausea,
+// GEN-NEXT:      llvm::tdl::Clause::TDLC_clauseb,
+// GEN-NEXT:    };
+// GEN-EMPTY:
+// GEN-NEXT:    static  allowedOnceClauses_TDLD_dira {
+// GEN-NEXT:    };
+// GEN-EMPTY:
+// GEN-NEXT:    static  allowedExclusiveClauses_TDLD_dira {
+// GEN-NEXT:    };
+// GEN-EMPTY:
+// GEN-NEXT:    static  requiredClauses_TDLD_dira {
+// GEN-NEXT:    };
+// GEN-NEXT:  } // namespace tdl
+// GEN-NEXT:  } // namespace llvm
+// GEN-EMPTY:
+// GEN-NEXT:  #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS
+// GEN-EMPTY:
+// GEN-NEXT:  #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_MAP
+// GEN-NEXT:  #undef GEN_FLANG_DIRECTIVE_CLAUSE_MAP
+// GEN-EMPTY:
+// GEN-NEXT:  struct TdlDirectiveClauses {
+// GEN-NEXT:    const  allowed;
+// GEN-NEXT:    const  allowedOnce;
+// GEN-NEXT:    const  allowedExclusive;
+// GEN-NEXT:    const  requiredOneOf;
+// GEN-NEXT:  };
+// GEN-EMPTY:
+// GEN-NEXT:  std::unordered_map<llvm::tdl::Directive, TdlDirectiveClauses>
+// GEN-NEXT:      directiveClausesTable = {
+// GEN-NEXT:    {llvm::tdl::Directive::TDLD_dira,
+// GEN-NEXT:      {
+// GEN-NEXT:        llvm::tdl::allowedClauses_TDLD_dira,
+// GEN-NEXT:        llvm::tdl::allowedOnceClauses_TDLD_dira,
+// GEN-NEXT:        llvm::tdl::allowedExclusiveClauses_TDLD_dira,
+// GEN-NEXT:        llvm::tdl::requiredClauses_TDLD_dira,
+// GEN-NEXT:      }
+// GEN-NEXT:    },
+// GEN-NEXT:  };
+// GEN-EMPTY:
+// GEN-NEXT:  #endif // GEN_FLANG_DIRECTIVE_CLAUSE_MAP
+
diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td
index 06c7aabcf3adc..517c79d457988 100644
--- a/llvm/test/TableGen/directive2.td
+++ b/llvm/test/TableGen/directive2.td
@@ -1,5 +1,6 @@
 // RUN: llvm-tblgen -gen-directive-decl -I %p/../../include %s | FileCheck -match-full-lines %s
 // RUN: llvm-tblgen -gen-directive-impl -I %p/../../include %s | FileCheck -match-full-lines %s -check-prefix=IMPL
+// RUN: llvm-tblgen -gen-directive-gen -I %p/../../include %s | FileCheck -match-full-lines %s -check-prefix=GEN
 
 include "llvm/Frontend/Directive/DirectiveBase.td"
 
@@ -71,7 +72,7 @@ def TDL_DirA : Directive<"dira"> {
 // IMPL-NEXT:  using namespace llvm;
 // IMPL-NEXT:  using namespace tdl;
 // IMPL-EMPTY:
-// IMPL:       Directive llvm::tdl::getTdlDirectiveKind(llvm::StringRef Str) {
+// IMPL-NEXT:  Directive llvm::tdl::getTdlDirectiveKind(llvm::StringRef Str) {
 // IMPL-NEXT:    return llvm::StringSwitch<Directive>(Str)
 // IMPL-NEXT:      .Case("dira",TDLD_dira)
 // IMPL-NEXT:      .Default(TDLD_dira);
@@ -119,3 +120,54 @@ def TDL_DirA : Directive<"dira"> {
 // IMPL-NEXT:    }
 // IMPL-NEXT:    llvm_unreachable("Invalid Tdl Directive kind");
 // IMPL-NEXT:  }
+
+
+// GEN:      #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_SETS
+// GEN-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_SETS
+// GEN-EMPTY:
+// GEN-NEXT: namespace llvm {
+// GEN-NEXT: namespace tdl {
+// GEN-EMPTY:
+// GEN-NEXT:   // Sets for dira
+// GEN-EMPTY:
+// GEN-NEXT:   static  allowedClauses_TDLD_dira {
+// GEN-NEXT:     llvm::tdl::Clause::TDLC_clausea,
+// GEN-NEXT:     llvm::tdl::Clause::TDLC_clauseb,
+// GEN-NEXT:   };
+// GEN-EMPTY:
+// GEN-NEXT:   static  allowedOnceClauses_TDLD_dira {
+// GEN-NEXT:   };
+// GEN-EMPTY:
+// GEN-NEXT:   static  allowedExclusiveClauses_TDLD_dira {
+// GEN-NEXT:   };
+// GEN-EMPTY:
+// GEN-NEXT:   static  requiredClauses_TDLD_dira {
+// GEN-NEXT:   };
+// GEN-NEXT: } // namespace tdl
+// GEN-NEXT: } // namespace llvm
+// GEN-EMPTY:
+// GEN-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_SETS
+// GEN-EMPTY:
+// GEN-NEXT: #ifdef GEN_FLANG_DIRECTIVE_CLAUSE_MAP
+// GEN-NEXT: #undef GEN_FLANG_DIRECTIVE_CLAUSE_MAP
+// GEN-EMPTY:
+// GEN-NEXT: struct TdlDirectiveClauses {
+// GEN-NEXT:   const  allowed;
+// GEN-NEXT:   const  allowedOnce;
+// GEN-NEXT:   const  allowedExclusive;
+// GEN-NEXT:   const  requiredOneOf;
+// GEN-NEXT: };
+// GEN-EMPTY:
+// GEN-NEXT: std::unordered_map<llvm::tdl::Directive, TdlDirectiveClauses>
+// GEN-NEXT:     directiveClausesTable = {
+// GEN-NEXT:   {llvm::tdl::Directive::TDLD_dira,
+// GEN-NEXT:     {
+// GEN-NEXT:       llvm::tdl::allowedClauses_TDLD_dira,
+// GEN-NEXT:       llvm::tdl::allowedOnceClauses_TDLD_dira,
+// GEN-NEXT:       llvm::tdl::allowedExclusiveClauses_TDLD_dira,
+// GEN-NEXT:       llvm::tdl::requiredClauses_TDLD_dira,
+// GEN-NEXT:     }
+// GEN-NEXT:   },
+// GEN-NEXT: };
+// GEN-EMPTY:
+// GEN-NEXT: #endif // GEN_FLANG_DIRECTIVE_CLAUSE_MAP
diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp
index f51f98872bb51..fc4a6757f8086 100644
--- a/llvm/utils/TableGen/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/DirectiveEmitter.cpp
@@ -14,12 +14,30 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/StringSet.h"
 #include "llvm/TableGen/Error.h"
 #include "llvm/TableGen/Record.h"
 #include "llvm/TableGen/TableGenBackend.h"
 
 using namespace llvm;
 
+namespace {
+// Simple RAII helper for defining ifdef-undef-endif scopes.
+class IfDefScope {
+public:
+  IfDefScope(StringRef Name, raw_ostream &OS) : Name(Name), OS(OS) {
+    OS << "#ifdef " << Name << "\n"
+       << "#undef " << Name << "\n";
+  }
+
+  ~IfDefScope() { OS << "\n#endif // " << Name << "\n\n"; }
+
+private:
+  StringRef Name;
+  raw_ostream &OS;
+};
+} // end anonymous namespace
+
 namespace llvm {
 
 // Get Directive or Clause name formatted by replacing whitespaces with
@@ -205,16 +223,21 @@ void GenerateGetKind(const std::vector<Record *> &Records, raw_ostream &OS,
 void GenerateCaseForVersionedClauses(const std::vector<Record *> &Clauses,
                                      raw_ostream &OS, StringRef DirectiveName,
                                      StringRef DirectivePrefix,
-                                     StringRef ClausePrefix) {
+                                     StringRef ClausePrefix,
+                                     llvm::StringSet<> &Cases) {
   for (const auto &C : Clauses) {
     const auto MinVersion = C->getValueAsInt("minVersion");
     const auto MaxVersion = C->getValueAsInt("maxVersion");
     const auto SpecificClause = C->getValueAsDef("clause");
-    const auto ClauseName = SpecificClause->getValueAsString("name");
-    OS << "        case " << ClausePrefix << getFormattedName(ClauseName)
-       << ":\n";
-    OS << "          return " << MinVersion << " <= Version && " << MaxVersion
-       << " >= Version;\n";
+    const auto ClauseName =
+        getFormattedName(SpecificClause->getValueAsString("name"));
+
+    if (Cases.find(ClauseName) == Cases.end()) {
+      Cases.insert(ClauseName);
+      OS << "        case " << ClausePrefix << ClauseName << ":\n";
+      OS << "          return " << MinVersion << " <= Version && " << MaxVersion
+         << " >= Version;\n";
+    }
   }
 }
 
@@ -239,24 +262,32 @@ void GenerateIsAllowedClause(const std::vector<Record *> &Directives,
     const auto &AllowedClauses = D->getValueAsListOfDefs("allowedClauses");
     const auto &AllowedOnceClauses =
         D->getValueAsListOfDefs("allowedOnceClauses");
+    const auto &AllowedExclusiveClauses =
+        D->getValueAsListOfDefs("allowedExclusiveClauses");
     const auto &RequiredClauses = D->getValueAsListOfDefs("requiredClauses");
 
     OS << "    case " << DirectivePrefix << getFormattedName(DirectiveName)
        << ":\n";
-    if (AllowedClauses.size() == 0 && AllowedOnceClauses.size() == 0 && 
-        AllowedOnceClauses.size() == 0) {
+    if (AllowedClauses.size() == 0 && AllowedOnceClauses.size() == 0 &&
+        AllowedExclusiveClauses.size() == 0 && RequiredClauses.size() == 0) {
       OS << "      return false;\n";
     } else {
       OS << "      switch (C) {\n";
 
+      llvm::StringSet<> Cases;
+
       GenerateCaseForVersionedClauses(AllowedClauses, OS, DirectiveName,
-                                      DirectivePrefix, ClausePrefix);
+                                      DirectivePrefix, ClausePrefix, Cases);
 
       GenerateCaseForVersionedClauses(AllowedOnceClauses, OS, DirectiveName,
-                                      DirectivePrefix, ClausePrefix);
+                                      DirectivePrefix, ClausePrefix, Cases);
+
+      GenerateCaseForVersionedClauses(AllowedExclusiveClauses, OS,
+                                      DirectiveName, DirectivePrefix,
+                                      ClausePrefix, Cases);
 
       GenerateCaseForVersionedClauses(RequiredClauses, OS, DirectiveName,
-                                      DirectivePrefix, ClausePrefix);
+                                      DirectivePrefix, ClausePrefix, Cases);
 
       OS << "        default:\n";
       OS << "          return false;\n";
@@ -271,9 +302,143 @@ void GenerateIsAllowedClause(const std::vector<Record *> &Directives,
   OS << "}\n"; // End of function isAllowedClauseForDirective
 }
 
+// Generate a simple enum set with the give clauses.
+void GenerateClauseSet(const std::vector<Record *> &Clauses, raw_ostream &OS,
+                       StringRef ClauseEnumSetClass, StringRef ClauseSetPrefix,
+                       StringRef DirectiveName, StringRef DirectivePrefix,
+                       StringRef ClausePrefix, StringRef CppNamespace) {
+
+  OS << "\n";
+  OS << "  static " << ClauseEnumSetClass << " " << ClauseSetPrefix
+     << DirectivePrefix << getFormattedName(DirectiveName) << " {\n";
+
+  for (const auto &C : Clauses) {
+    const auto SpecificClause = C->getValueAsDef("clause");
+    const auto ClauseName = SpecificClause->getValueAsString("name");
+    OS << "    llvm::" << CppNamespace << "::Clause::" << ClausePrefix
+       << getFormattedName(ClauseName) << ",\n";
+  }
+  OS << "  };\n";
+}
+
+// Generate an enum set for the 4 kinds of clauses linked to a directive.
+void GenerateDirectiveClauseSets(const std::vector<Record *> &Directives,
+                                 raw_ostream &OS, StringRef LanguageName,
+                                 StringRef ClauseEnumSetClass,
+                                 StringRef DirectivePrefix,
+                                 StringRef ClausePrefix,
+                                 StringRef CppNamespace) {
+
+  IfDefScope Scope("GEN_FLANG_DIRECTIVE_CLAUSE_SETS", OS);
+
+  OS << "\n";
+  OS << "namespace llvm {\n";
+
+  // Open namespaces defined in the directive language.
+  llvm::SmallVector<StringRef, 2> Namespaces;
+  llvm::SplitString(CppNamespace, Namespaces, "::");
+  for (auto Ns : Namespaces)
+    OS << "namespace " << Ns << " {\n";
+
+  for (const auto &D : Directives) {
+    const auto DirectiveName = D->getValueAsString("name");
+
+    const auto &AllowedClauses = D->getValueAsListOfDefs("allowedClauses");
+    const auto &AllowedOnceClauses =
+        D->getValueAsListOfDefs("allowedOnceClauses");
+    const auto &AllowedExclusiveClauses =
+        D->getValueAsListOfDefs("allowedExclusiveClauses");
+    const auto &RequiredClauses = D->getValueAsListOfDefs("requiredClauses");
+
+    OS << "\n";
+    OS << "  // Sets for " << DirectiveName << "\n";
+
+    GenerateClauseSet(AllowedClauses, OS, ClauseEnumSetClass, "allowedClauses_",
+                      DirectiveName, DirectivePrefix, ClausePrefix,
+                      CppNamespace);
+    GenerateClauseSet(AllowedOnceClauses, OS, ClauseEnumSetClass,
+                      "allowedOnceClauses_", DirectiveName, DirectivePrefix,
+                      ClausePrefix, CppNamespace);
+    GenerateClauseSet(AllowedExclusiveClauses, OS, ClauseEnumSetClass,
+                      "allowedExclusiveClauses_", DirectiveName,
+                      DirectivePrefix, ClausePrefix, CppNamespace);
+    GenerateClauseSet(RequiredClauses, OS, ClauseEnumSetClass,
+                      "requiredClauses_", DirectiveName, DirectivePrefix,
+                      ClausePrefix, CppNamespace);
+  }
+
+  // Closing namespaces
+  for (auto Ns : llvm::reverse(Namespaces))
+    OS << "} // namespace " << Ns << "\n";
+
+  OS << "} // namespace llvm\n";
+}
+
+// Generate a map of directive (key) with DirectiveClauses struct as values.
+// The struct holds the 4 sets of enumeration for the 4 kinds of clauses
+// allowances (allowed, allowed once, allowed exclusive and required).
+void GenerateDirectiveClauseMap(const std::vector<Record *> &Directives,
+                                raw_ostream &OS, StringRef LanguageName,
+                                StringRef ClauseEnumSetClass,
+                                StringRef DirectivePrefix,
+                                StringRef ClausePrefix,
+                                StringRef CppNamespace) {
+
+  IfDefScope Scope("GEN_FLANG_DIRECTIVE_CLAUSE_MAP", OS);
+
+  OS << "\n";
+  OS << "struct " << LanguageName << "DirectiveClauses {\n";
+  OS << "  const " << ClauseEnumSetClass << " allowed;\n";
+  OS << "  const " << ClauseEnumSetClass << " allowedOnce;\n";
+  OS << "  const " << ClauseEnumSetClass << " allowedExclusive;\n";
+  OS << "  const " << ClauseEnumSetClass << " requiredOneOf;\n";
+  OS << "};\n";
+
+  OS << "\n";
+
+  OS << "std::unordered_map<llvm::" << CppNamespace << "::Directive, "
+     << LanguageName << "DirectiveClauses>\n";
+  OS << "    directiveClausesTable = {\n";
+
+  for (const auto &D : Directives) {
+    const auto FormattedDirectiveName =
+        getFormattedName(D->getValueAsString("name"));
+    OS << "  {llvm::" << CppNamespace << "::Directive::" << DirectivePrefix
+       << FormattedDirectiveName << ",\n";
+    OS << "    {\n";
+    OS << "      llvm::" << CppNamespace << "::allowedClauses_"
+       << DirectivePrefix << FormattedDirectiveName << ",\n";
+    OS << "      llvm::" << CppNamespace << "::allowedOnceClauses_"
+       << DirectivePrefix << FormattedDirectiveName << ",\n";
+    OS << "      llvm::" << CppNamespace << "::allowedExclusiveClauses_"
+       << DirectivePrefix << FormattedDirectiveName << ",\n";
+    OS << "      llvm::" << CppNamespace << "::requiredClauses_"
+       << DirectivePrefix << FormattedDirectiveName << ",\n";
+    OS << "    }\n";
+    OS << "  },\n";
+  }
+
+  OS << "};\n";
+}
+
 // Generate the implemenation section for the enumeration in the directive
 // language
-void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
+void EmitDirectivesFlangImpl(const std::vector<Record *> &Directives,
+                             raw_ostream &OS, StringRef LanguageName,
+                             StringRef ClauseEnumSetClass,
+                             StringRef DirectivePrefix, StringRef ClausePrefix,
+                             StringRef CppNamespace) {
+
+  GenerateDirectiveClauseSets(Directives, OS, LanguageName, ClauseEnumSetClass,
+                              DirectivePrefix, ClausePrefix, CppNamespace);
+
+  GenerateDirectiveClauseMap(Directives, OS, LanguageName, ClauseEnumSetClass,
+                             DirectivePrefix, ClausePrefix, CppNamespace);
+}
+
+// Generate the implemenation section for the enumeration in the directive
+// language.
+void EmitDirectivesGen(RecordKeeper &Records, raw_ostream &OS) {
 
   const auto &DirectiveLanguages =
       Records.getAllDerivedDefinitions("DirectiveLanguage");
@@ -289,12 +454,40 @@ void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
   StringRef LanguageName = DirectiveLanguage->getValueAsString("name");
   StringRef ClausePrefix = DirectiveLanguage->getValueAsString("clausePrefix");
   StringRef CppNamespace = DirectiveLanguage->getValueAsString("cppNamespace");
-  StringRef IncludeHeader =
-      DirectiveLanguage->getValueAsString("includeHeader");
+  StringRef ClauseEnumSetClass =
+      DirectiveLanguage->getValueAsString("clauseEnumSetClass");
 
   const auto &Directives = Records.getAllDerivedDefinitions("Directive");
   const auto &Clauses = Records.getAllDerivedDefinitions("Clause");
 
+  EmitDirectivesFlangImpl(Directives, OS, LanguageName, ClauseEnumSetClass,
+                          DirectivePrefix, ClausePrefix, CppNamespace);
+}
+
+// Generate the implemenation for the enumeration in the directive
+// language. This code can be included in library.
+void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
+
+  const auto &DirectiveLanguages =
+      Records.getAllDerivedDefinitions("DirectiveLanguage");
+
+  if (DirectiveLanguages.size() != 1) {
+    PrintError("A single definition of DirectiveLanguage is needed.");
+    return;
+  }
+
+  const auto &DirectiveLanguage = DirectiveLanguages[0];
+  StringRef DirectivePrefix =
+      DirectiveLanguage->getValueAsString("directivePrefix");
+  StringRef LanguageName = DirectiveLanguage->getValueAsString("name");
+  StringRef ClausePrefix = DirectiveLanguage->getValueAsString("clausePrefix");
+  StringRef CppNamespace = DirectiveLanguage->getValueAsString("cppNamespace");
+  const auto &Directives = Records.getAllDerivedDefinitions("Directive");
+  const auto &Clauses = Records.getAllDerivedDefinitions("Clause");
+
+  StringRef IncludeHeader =
+      DirectiveLanguage->getValueAsString("includeHeader");
+
   if (!IncludeHeader.empty())
     OS << "#include \"" << IncludeHeader << "\"\n\n";
 
@@ -323,6 +516,7 @@ void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
   GenerateGetName(Clauses, OS, "Clause", ClausePrefix, LanguageName,
                   CppNamespace);
 
+  // isAllowedClauseForDirective(Directive D, Clause C, unsigned Version)
   GenerateIsAllowedClause(Directives, OS, LanguageName, DirectivePrefix,
                           ClausePrefix, CppNamespace);
 }
diff --git a/llvm/utils/TableGen/TableGen.cpp b/llvm/utils/TableGen/TableGen.cpp
index 7438749a1243e..8015a58471cad 100644
--- a/llvm/utils/TableGen/TableGen.cpp
+++ b/llvm/utils/TableGen/TableGen.cpp
@@ -56,6 +56,7 @@ enum ActionType {
   GenAutomata,
   GenDirectivesEnumDecl,
   GenDirectivesEnumImpl,
+  GenDirectivesEnumGen,
 };
 
 namespace llvm {
@@ -132,9 +133,11 @@ cl::opt<ActionType> Action(
                    "Generate llvm-exegesis tables"),
         clEnumValN(GenAutomata, "gen-automata", "Generate generic automata"),
         clEnumValN(GenDirectivesEnumDecl, "gen-directive-decl",
-                   "Generate directive related declaration code"),
+                   "Generate directive related declaration code (header file)"),
         clEnumValN(GenDirectivesEnumImpl, "gen-directive-impl",
-                   "Generate directive related implementation code")));
+                   "Generate directive related implementation code"),
+        clEnumValN(GenDirectivesEnumGen, "gen-directive-gen",
+                   "Generate directive related implementation code part")));
 
 cl::OptionCategory PrintEnumsCat("Options for -print-enums");
 cl::opt<std::string> Class("class", cl::desc("Print Enum list for this class"),
@@ -265,6 +268,9 @@ bool LLVMTableGenMain(raw_ostream &OS, RecordKeeper &Records) {
   case GenDirectivesEnumImpl:
     EmitDirectivesImpl(Records, OS);
     break;
+  case GenDirectivesEnumGen:
+    EmitDirectivesGen(Records, OS);
+    break;
   }
 
   return false;
diff --git a/llvm/utils/TableGen/TableGenBackends.h b/llvm/utils/TableGen/TableGenBackends.h
index 9e6171abcabfc..92204f39f8fa0 100644
--- a/llvm/utils/TableGen/TableGenBackends.h
+++ b/llvm/utils/TableGen/TableGenBackends.h
@@ -92,6 +92,7 @@ void EmitExegesis(RecordKeeper &RK, raw_ostream &OS);
 void EmitAutomata(RecordKeeper &RK, raw_ostream &OS);
 void EmitDirectivesDecl(RecordKeeper &RK, raw_ostream &OS);
 void EmitDirectivesImpl(RecordKeeper &RK, raw_ostream &OS);
+void EmitDirectivesGen(RecordKeeper &RK, raw_ostream &OS);
 
 } // End llvm namespace
 

From 5d2c3e031a6861b3e95673d0e238c09938dd9c0d Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Sat, 11 Jul 2020 10:04:27 -0400
Subject: [PATCH 025/771] Fix regression due to test hip-version.hip

Added RocmInstallationDetector to Darwin and MinGW.

Fixed duplicate ROCm detector in ROCm toolchain.
---
 clang/lib/Driver/ToolChains/AMDGPU.cpp  |  6 +++---
 clang/lib/Driver/ToolChains/AMDGPU.h    |  3 ---
 clang/lib/Driver/ToolChains/Darwin.cpp  |  8 +++++++-
 clang/lib/Driver/ToolChains/Darwin.h    |  4 ++++
 clang/lib/Driver/ToolChains/FreeBSD.cpp |  5 +++++
 clang/lib/Driver/ToolChains/FreeBSD.h   |  2 ++
 clang/lib/Driver/ToolChains/HIP.cpp     |  1 -
 clang/lib/Driver/ToolChains/MinGW.cpp   |  9 ++++++++-
 clang/lib/Driver/ToolChains/MinGW.h     |  4 ++++
 clang/test/Driver/hip-version.hip       | 12 ++++++++++++
 10 files changed, 45 insertions(+), 9 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/AMDGPU.cpp b/clang/lib/Driver/ToolChains/AMDGPU.cpp
index cfc71d7810b46..bc6d1fcd4a008 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.cpp
+++ b/clang/lib/Driver/ToolChains/AMDGPU.cpp
@@ -489,9 +489,9 @@ bool AMDGPUToolChain::isWave64(const llvm::opt::ArgList &DriverArgs,
 /// ROCM Toolchain
 ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
                              const ArgList &Args)
-    : AMDGPUToolChain(D, Triple, Args),
-      RocmInstallation(D, Triple, Args, /*DetectHIPRuntime=*/false,
-                       /*DetectDeviceLib=*/true) {}
+    : AMDGPUToolChain(D, Triple, Args) {
+  RocmInstallation.detectDeviceLibrary();
+}
 
 void AMDGPUToolChain::addClangTargetOptions(
     const llvm::opt::ArgList &DriverArgs,
diff --git a/clang/lib/Driver/ToolChains/AMDGPU.h b/clang/lib/Driver/ToolChains/AMDGPU.h
index 71c66188b0456..5d44faf28b053 100644
--- a/clang/lib/Driver/ToolChains/AMDGPU.h
+++ b/clang/lib/Driver/ToolChains/AMDGPU.h
@@ -90,9 +90,6 @@ class LLVM_LIBRARY_VISIBILITY AMDGPUToolChain : public Generic_ELF {
 };
 
 class LLVM_LIBRARY_VISIBILITY ROCMToolChain : public AMDGPUToolChain {
-protected:
-  RocmInstallationDetector RocmInstallation;
-
 public:
   ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
                 const llvm::opt::ArgList &Args);
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 6bf42e6029eb5..2e1190c34ea7a 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -779,7 +779,7 @@ MachO::MachO(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
 /// Darwin - Darwin tool chain for i386 and x86_64.
 Darwin::Darwin(const Driver &D, const llvm::Triple &Triple, const ArgList &Args)
     : MachO(D, Triple, Args), TargetInitialized(false),
-      CudaInstallation(D, Triple, Args) {}
+      CudaInstallation(D, Triple, Args), RocmInstallation(D, Triple, Args) {}
 
 types::ID MachO::LookupTypeForExtension(StringRef Ext) const {
   types::ID Ty = ToolChain::LookupTypeForExtension(Ext);
@@ -831,6 +831,11 @@ void Darwin::AddCudaIncludeArgs(const ArgList &DriverArgs,
   CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
 }
 
+void Darwin::AddHIPIncludeArgs(const ArgList &DriverArgs,
+                               ArgStringList &CC1Args) const {
+  RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
+}
+
 // This is just a MachO name translation routine and there's no
 // way to join this into ARMTargetParser without breaking all
 // other assumptions. Maybe MachO should consider standardising
@@ -2736,4 +2741,5 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
 
 void Darwin::printVerboseInfo(raw_ostream &OS) const {
   CudaInstallation.print(OS);
+  RocmInstallation.print(OS);
 }
diff --git a/clang/lib/Driver/ToolChains/Darwin.h b/clang/lib/Driver/ToolChains/Darwin.h
index a543a8fc27b9d..64c252efea7df 100644
--- a/clang/lib/Driver/ToolChains/Darwin.h
+++ b/clang/lib/Driver/ToolChains/Darwin.h
@@ -10,6 +10,7 @@
 #define LLVM_CLANG_LIB_DRIVER_TOOLCHAINS_DARWIN_H
 
 #include "Cuda.h"
+#include "ROCm.h"
 #include "clang/Driver/DarwinSDKInfo.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
@@ -293,6 +294,7 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public MachO {
   mutable Optional<DarwinSDKInfo> SDKInfo;
 
   CudaInstallationDetector CudaInstallation;
+  RocmInstallationDetector RocmInstallation;
 
 private:
   void AddDeploymentTarget(llvm::opt::DerivedArgList &Args) const;
@@ -475,6 +477,8 @@ class LLVM_LIBRARY_VISIBILITY Darwin : public MachO {
 
   void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                           llvm::opt::ArgStringList &CC1Args) const override;
+  void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                         llvm::opt::ArgStringList &CC1Args) const override;
 
   bool UseObjCMixedDispatch() const override {
     // This is only used with the non-fragile ABI and non-legacy dispatch.
diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp
index 14cf278c19d9e..909ac5e992129 100644
--- a/clang/lib/Driver/ToolChains/FreeBSD.cpp
+++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp
@@ -425,6 +425,11 @@ void FreeBSD::AddCudaIncludeArgs(const ArgList &DriverArgs,
   CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
 }
 
+void FreeBSD::AddHIPIncludeArgs(const ArgList &DriverArgs,
+                                ArgStringList &CC1Args) const {
+  RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
+}
+
 Tool *FreeBSD::buildAssembler() const {
   return new tools::freebsd::Assembler(*this);
 }
diff --git a/clang/lib/Driver/ToolChains/FreeBSD.h b/clang/lib/Driver/ToolChains/FreeBSD.h
index bca3f6b741b6a..abc0876cef260 100644
--- a/clang/lib/Driver/ToolChains/FreeBSD.h
+++ b/clang/lib/Driver/ToolChains/FreeBSD.h
@@ -68,6 +68,8 @@ class LLVM_LIBRARY_VISIBILITY FreeBSD : public Generic_ELF {
                            llvm::opt::ArgStringList &CmdArgs) const override;
   void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                           llvm::opt::ArgStringList &CC1Args) const override;
+  void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                         llvm::opt::ArgStringList &CC1Args) const override;
 
   llvm::ExceptionHandling
   GetExceptionModel(const llvm::opt::ArgList &Args) const override;
diff --git a/clang/lib/Driver/ToolChains/HIP.cpp b/clang/lib/Driver/ToolChains/HIP.cpp
index 32734f5c11809..7d17f809690ea 100644
--- a/clang/lib/Driver/ToolChains/HIP.cpp
+++ b/clang/lib/Driver/ToolChains/HIP.cpp
@@ -224,7 +224,6 @@ HIPToolChain::HIPToolChain(const Driver &D, const llvm::Triple &Triple,
   // Lookup binaries into the driver directory, this is used to
   // discover the clang-offload-bundler executable.
   getProgramPaths().push_back(getDriver().Dir);
-  RocmInstallation.detectHIPRuntime();
 }
 
 void HIPToolChain::addClangTargetOptions(
diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp
index b233e210d889e..a1a1b413fb6c6 100644
--- a/clang/lib/Driver/ToolChains/MinGW.cpp
+++ b/clang/lib/Driver/ToolChains/MinGW.cpp
@@ -398,7 +398,8 @@ llvm::ErrorOr<std::string> toolchains::MinGW::findClangRelativeSysroot() {
 
 toolchains::MinGW::MinGW(const Driver &D, const llvm::Triple &Triple,
                          const ArgList &Args)
-    : ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args) {
+    : ToolChain(D, Triple, Args), CudaInstallation(D, Triple, Args),
+      RocmInstallation(D, Triple, Args) {
   getProgramPaths().push_back(getDriver().getInstalledDir());
 
   if (getDriver().SysRoot.size())
@@ -500,8 +501,14 @@ void toolchains::MinGW::AddCudaIncludeArgs(const ArgList &DriverArgs,
   CudaInstallation.AddCudaIncludeArgs(DriverArgs, CC1Args);
 }
 
+void toolchains::MinGW::AddHIPIncludeArgs(const ArgList &DriverArgs,
+                                          ArgStringList &CC1Args) const {
+  RocmInstallation.AddHIPIncludeArgs(DriverArgs, CC1Args);
+}
+
 void toolchains::MinGW::printVerboseInfo(raw_ostream &OS) const {
   CudaInstallation.print(OS);
+  RocmInstallation.print(OS);
 }
 
 // Include directories for various hosts:
diff --git a/clang/lib/Driver/ToolChains/MinGW.h b/clang/lib/Driver/ToolChains/MinGW.h
index 46264a55cfc7b..2f1559fcf34cd 100644
--- a/clang/lib/Driver/ToolChains/MinGW.h
+++ b/clang/lib/Driver/ToolChains/MinGW.h
@@ -11,6 +11,7 @@
 
 #include "Cuda.h"
 #include "Gnu.h"
+#include "ROCm.h"
 #include "clang/Driver/Tool.h"
 #include "clang/Driver/ToolChain.h"
 #include "llvm/Support/ErrorOr.h"
@@ -81,6 +82,8 @@ class LLVM_LIBRARY_VISIBILITY MinGW : public ToolChain {
 
   void AddCudaIncludeArgs(const llvm::opt::ArgList &DriverArgs,
                           llvm::opt::ArgStringList &CC1Args) const override;
+  void AddHIPIncludeArgs(const llvm::opt::ArgList &DriverArgs,
+                         llvm::opt::ArgStringList &CC1Args) const override;
 
   void printVerboseInfo(raw_ostream &OS) const override;
 
@@ -91,6 +94,7 @@ class LLVM_LIBRARY_VISIBILITY MinGW : public ToolChain {
 
 private:
   CudaInstallationDetector CudaInstallation;
+  RocmInstallationDetector RocmInstallation;
 
   std::string Base;
   std::string GccLibDir;
diff --git a/clang/test/Driver/hip-version.hip b/clang/test/Driver/hip-version.hip
index cf80ae15ac6da..eb1295210cfc1 100644
--- a/clang/test/Driver/hip-version.hip
+++ b/clang/test/Driver/hip-version.hip
@@ -5,6 +5,10 @@
 // RUN: %clang -v --rocm-path=%S/Inputs/rocm 2>&1 \
 // RUN:   | FileCheck -check-prefixes=FOUND %s
 
+// RUN: %clang -v --rocm-path=%S/Inputs/rocm 2>&1 \
+// RUN:   -target amdgcn-amd-amdhsa \
+// RUN:   | FileCheck -check-prefixes=FOUND %s
+
 // FOUND: Found HIP installation: {{.*Inputs.*rocm}}, version 3.6.20214-a2917cd
 
 // When --rocm-path is set and .hipVersion is not found, use default version
@@ -12,11 +16,19 @@
 // RUN: %clang -v --rocm-path=%S 2>&1 \
 // RUN:   | FileCheck -check-prefixes=DEFAULT %s
 
+// RUN: %clang -v --rocm-path=%S 2>&1 \
+// RUN:   -target amdgcn-amd-amdhsa \
+// RUN:   | FileCheck -check-prefixes=DEFAULT %s
+
 // DEFAULT: Found HIP installation: {{.*Driver}}, version 3.5.
 
 // RUN: %clang -v --rocm-path=%S --hip-version=3.7.0 2>&1 \
 // RUN:   | FileCheck -check-prefixes=SPECIFIED %s
 
+// RUN: %clang -v --rocm-path=%S --hip-version=3.7.0 2>&1 \
+// RUN:   -target amdgcn-amd-amdhsa \
+// RUN:   | FileCheck -check-prefixes=SPECIFIED %s
+
 // SPECIFIED: Found HIP installation: {{.*Driver}}, version 3.7.0
 
 // RUN: %clang -v --rocm-path=%S --hip-version=3.7 2>&1 \

From 5937434677afc5be47977f8d340ff499589f2ef3 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Sat, 11 Jul 2020 11:57:17 -0500
Subject: [PATCH 026/771] [OpenMP] Silence unused symbol warning with proper
 ifdefs

---
 openmp/libomptarget/deviceRTLs/common/src/reduction.cu | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
index 0230fa26ac107..7604f024eeb4d 100644
--- a/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
+++ b/openmp/libomptarget/deviceRTLs/common/src/reduction.cu
@@ -54,6 +54,7 @@ INLINE static void gpu_irregular_warp_reduce(void *reduce_data,
   }
 }
 
+#if !defined(__CUDA_ARCH__) || __CUDA_ARCH__ < 700
 INLINE static uint32_t
 gpu_irregular_simd_reduce(void *reduce_data, kmp_ShuffleReductFctPtr shflFct) {
   uint32_t size, remote_id, physical_lane_id;
@@ -72,6 +73,7 @@ gpu_irregular_simd_reduce(void *reduce_data, kmp_ShuffleReductFctPtr shflFct) {
   } while (logical_lane_id % 2 == 0 && size > 1);
   return (logical_lane_id == 0);
 }
+#endif
 
 INLINE
 static int32_t nvptx_parallel_reduce_nowait(

From 8f183d9f3d13d66a679bd449b1f5d34942560028 Mon Sep 17 00:00:00 2001
From: clementval <clementval@gmail.com>
Date: Sat, 11 Jul 2020 12:59:14 -0400
Subject: [PATCH 027/771] [openmp] Remove unused variable in DirectiveEmitter

---
 llvm/utils/TableGen/DirectiveEmitter.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp
index fc4a6757f8086..ebcd6873205ea 100644
--- a/llvm/utils/TableGen/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/DirectiveEmitter.cpp
@@ -458,7 +458,6 @@ void EmitDirectivesGen(RecordKeeper &Records, raw_ostream &OS) {
       DirectiveLanguage->getValueAsString("clauseEnumSetClass");
 
   const auto &Directives = Records.getAllDerivedDefinitions("Directive");
-  const auto &Clauses = Records.getAllDerivedDefinitions("Clause");
 
   EmitDirectivesFlangImpl(Directives, OS, LanguageName, ClauseEnumSetClass,
                           DirectivePrefix, ClausePrefix, CppNamespace);

From d8c35031a39e7b1bf9524ddd325c7a91dbb05f1d Mon Sep 17 00:00:00 2001
From: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Date: Sat, 11 Jul 2020 11:47:07 -0700
Subject: [PATCH 028/771] [examples] fix ExceptionDemo

Code didn't compile in a release build.  Guard debug output with
ifndef NDEBUG.

Differential Revision: https://reviews.llvm.org/D83628
---
 llvm/examples/ExceptionDemo/ExceptionDemo.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/examples/ExceptionDemo/ExceptionDemo.cpp b/llvm/examples/ExceptionDemo/ExceptionDemo.cpp
index 0ecb527f4ec05..1b3ec7c91ddee 100644
--- a/llvm/examples/ExceptionDemo/ExceptionDemo.cpp
+++ b/llvm/examples/ExceptionDemo/ExceptionDemo.cpp
@@ -792,7 +792,7 @@ _Unwind_Reason_Code ourPersonality(int version, _Unwind_Action actions,
   }
 #endif
 
-  const uint8_t *lsda = _Unwind_GetLanguageSpecificData(context);
+  const uint8_t *lsda = (const uint8_t *)_Unwind_GetLanguageSpecificData(context);
 
 #ifdef DEBUG
   fprintf(stderr,
@@ -1959,11 +1959,13 @@ int main(int argc, char *argv[]) {
 
     executionEngine->finalizeObject();
 
+#ifndef NDEBUG
     fprintf(stderr, "\nBegin module dump:\n\n");
 
     module->dump();
 
     fprintf(stderr, "\nEnd module dump:\n");
+#endif
 
     fprintf(stderr, "\n\nBegin Test:\n");
 

From 47872adf6ae236c798d05b7229e00f363ab2fe0f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sat, 11 Jul 2020 12:21:41 -0700
Subject: [PATCH 029/771] [X86] Add test cases for missed opportunities to use
 vpternlog due to a bitcast between the logic ops.

These test cases fail to use vpternlog because the AND was converted
to a blend shuffle and then converted back to AND during shuffle lowering.
This results in the AND having a different type than it started with.
This prevents our custom matching logic from seeing the two logic ops.
---
 llvm/test/CodeGen/X86/avx512-logic.ll   | 34 +++++++++++++++++++
 llvm/test/CodeGen/X86/avx512vl-logic.ll | 44 +++++++++++++++++++++++++
 2 files changed, 78 insertions(+)

diff --git a/llvm/test/CodeGen/X86/avx512-logic.ll b/llvm/test/CodeGen/X86/avx512-logic.ll
index c2a4da1ba5622..88a3b5aea9bd4 100644
--- a/llvm/test/CodeGen/X86/avx512-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512-logic.ll
@@ -885,3 +885,37 @@ define <16 x i32> @ternlog_xor_andn(<16 x i32> %x, <16 x i32> %y, <16 x i32> %z)
   %c = xor <16 x i32> %b, %z
   ret <16 x i32> %c
 }
+
+define <16 x i32> @ternlog_or_and_mask(<16 x i32> %x, <16 x i32> %y) {
+; KNL-LABEL: ternlog_or_and_mask:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    vpandq {{.*}}(%rip), %zmm0, %zmm0
+; KNL-NEXT:    vpord %zmm1, %zmm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: ternlog_or_and_mask:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT:    vorps %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %a = and <16 x i32> %x, <i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255, i32 255>
+  %b = or <16 x i32> %a, %y
+  ret <16 x i32> %b
+}
+
+define <8 x i64> @ternlog_xor_and_mask(<8 x i64> %x, <8 x i64> %y) {
+; KNL-LABEL: ternlog_xor_and_mask:
+; KNL:       ## %bb.0:
+; KNL-NEXT:    vpandd {{.*}}(%rip), %zmm0, %zmm0
+; KNL-NEXT:    vpxorq %zmm1, %zmm0, %zmm0
+; KNL-NEXT:    retq
+;
+; SKX-LABEL: ternlog_xor_and_mask:
+; SKX:       ## %bb.0:
+; SKX-NEXT:    vandps {{.*}}(%rip), %zmm0, %zmm0
+; SKX-NEXT:    vxorps %zmm1, %zmm0, %zmm0
+; SKX-NEXT:    retq
+  %a = and <8 x i64> %x, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
+  %b = xor <8 x i64> %a, %y
+  ret <8 x i64> %b
+}
diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll
index 0647f4e33bf23..26d905ebeae77 100644
--- a/llvm/test/CodeGen/X86/avx512vl-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll
@@ -987,3 +987,47 @@ define <4 x i32> @ternlog_xor_andn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
   %c = xor <4 x i32> %b, %z
   ret <4 x i32> %c
 }
+
+define <4 x i32> @ternlog_or_and_mask(<4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: ternlog_or_and_mask:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %a = and <4 x i32> %x, <i32 255, i32 255, i32 255, i32 255>
+  %b = or <4 x i32> %a, %y
+  ret <4 x i32> %b
+}
+
+define <8 x i32> @ternlog_or_and_mask_ymm(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: ternlog_or_and_mask_ymm:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %a = and <8 x i32> %x, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
+  %b = or <8 x i32> %a, %y
+  ret <8 x i32> %b
+}
+
+define <2 x i64> @ternlog_xor_and_mask(<2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: ternlog_xor_and_mask:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    vandps {{.*}}(%rip), %xmm0, %xmm0
+; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0
+; CHECK-NEXT:    retq
+  %a = and <2 x i64> %x, <i64 1099511627775, i64 1099511627775>
+  %b = xor <2 x i64> %a, %y
+  ret <2 x i64> %b
+}
+
+define <4 x i64> @ternlog_xor_and_mask_ymm(<4 x i64> %x, <4 x i64> %y) {
+; CHECK-LABEL: ternlog_xor_and_mask_ymm:
+; CHECK:       ## %bb.0:
+; CHECK-NEXT:    vandps {{.*}}(%rip), %ymm0, %ymm0
+; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0
+; CHECK-NEXT:    retq
+  %a = and <4 x i64> %x, <i64 72057594037927935, i64 72057594037927935, i64 72057594037927935, i64 72057594037927935>
+  %b = xor <4 x i64> %a, %y
+  ret <4 x i64> %b
+}

From 3b04af4d84fbffa6a2e90cfd187ed01092b45684 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Sat, 11 Jul 2020 20:05:28 +0000
Subject: [PATCH 030/771] Fix some memory leak in MLIRContext with respect to
 registered types/attributes interfaces

Differential Revision: https://reviews.llvm.org/D83618
---
 mlir/lib/IR/MLIRContext.cpp | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/mlir/lib/IR/MLIRContext.cpp b/mlir/lib/IR/MLIRContext.cpp
index 4c31ef318fd9b..a4e833cbf77c5 100644
--- a/mlir/lib/IR/MLIRContext.cpp
+++ b/mlir/lib/IR/MLIRContext.cpp
@@ -331,6 +331,12 @@ class MLIRContextImpl {
 
 public:
   MLIRContextImpl() : identifiers(identifierAllocator) {}
+  ~MLIRContextImpl() {
+    for (auto typeMapping : registeredTypes)
+      typeMapping.second->~AbstractType();
+    for (auto attrMapping : registeredAttributes)
+      attrMapping.second->~AbstractAttribute();
+  }
 };
 } // end namespace mlir
 

From 44b0b7cf6605c41728f445c363415b9b6f48db04 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Sat, 11 Jul 2020 20:05:37 +0000
Subject: [PATCH 031/771] Fix one memory leak in the MLIRParser by using
 std::unique_ptr to hold the new block pointer

 This is NFC when there is no parsing error.

Differential Revision: https://reviews.llvm.org/D83619
---
 mlir/lib/Parser/Parser.cpp | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp
index 0e4589a209181..fc9d449ecc143 100644
--- a/mlir/lib/Parser/Parser.cpp
+++ b/mlir/lib/Parser/Parser.cpp
@@ -1504,7 +1504,8 @@ ParseResult OperationParser::parseRegion(
   pushSSANameScope(isIsolatedNameScope);
 
   // Parse the first block directly to allow for it to be unnamed.
-  Block *block = new Block();
+  auto owning_block = std::make_unique<Block>();
+  Block *block = owning_block.get();
 
   // Add arguments to the entry block.
   if (!entryArguments.empty()) {
@@ -1519,7 +1520,6 @@ ParseResult OperationParser::parseRegion(
       }
       if (addDefinition(placeholderArgPair.first,
                         block->addArgument(placeholderArgPair.second))) {
-        delete block;
         return failure();
       }
     }
@@ -1530,19 +1530,17 @@ ParseResult OperationParser::parseRegion(
   }
 
   if (parseBlock(block)) {
-    delete block;
     return failure();
   }
 
   // Verify that no other arguments were parsed.
   if (!entryArguments.empty() &&
       block->getNumArguments() > entryArguments.size()) {
-    delete block;
     return emitError("entry block arguments were already defined");
   }
 
   // Parse the rest of the region.
-  region.push_back(block);
+  region.push_back(owning_block.release());
   if (parseRegionBody(region))
     return failure();
 

From b8409c03ed90807f3d49c7d98dceea98cf461f7a Mon Sep 17 00:00:00 2001
From: Michael Liao <michael.hliao@gmail.com>
Date: Sat, 11 Jul 2020 16:19:09 -0400
Subject: [PATCH 032/771] Fix `-Wreturn-type` warning. NFC.

---
 clang/lib/Tooling/Syntax/BuildTree.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Tooling/Syntax/BuildTree.cpp b/clang/lib/Tooling/Syntax/BuildTree.cpp
index 6d13f1ace83ba..1f192180ec451 100644
--- a/clang/lib/Tooling/Syntax/BuildTree.cpp
+++ b/clang/lib/Tooling/Syntax/BuildTree.cpp
@@ -750,6 +750,7 @@ class BuildTreeVisitor : public RecursiveASTVisitor<BuildTreeVisitor> {
         return new (allocator()) syntax::FloatUserDefinedLiteralExpression;
       }
     }
+    llvm_unreachable("Unknown literal operator kind.");
   }
 
   bool WalkUpFromUserDefinedLiteral(UserDefinedLiteral *S) {

From 6792069a3fdb412d06dd3cc42a6181c6fb7db860 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 11 Jul 2020 22:51:25 +0200
Subject: [PATCH 033/771] [NewGVN] Regenerate test checks (NFC)

---
 llvm/test/Transforms/NewGVN/assumes.ll | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/llvm/test/Transforms/NewGVN/assumes.ll b/llvm/test/Transforms/NewGVN/assumes.ll
index 065cc0fb62e08..ea20b38bff6af 100644
--- a/llvm/test/Transforms/NewGVN/assumes.ll
+++ b/llvm/test/Transforms/NewGVN/assumes.ll
@@ -1,16 +1,28 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -newgvn -S | FileCheck %s
 
-; CHECK-LABEL: @test1
-; CHECK: ret i32 %arg
 define i32 @test1(i32 %arg) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[ARG:%.*]], 5
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    ret i32 [[ARG]]
+;
   %cmp = icmp sge i32 %arg, 5
   call void @llvm.assume(i1 %cmp)
   ret i32 %arg
 }
 
-; CHECK-LABEL: @test2
-; CHECK: ret i32 %arg
 define i32 @test2(i32 %arg, i1 %b) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:    br label [[BB:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1, [[TMP0:%.*]] ], [ 2, [[BB]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[ARG:%.*]], [[A]]
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    br i1 [[B:%.*]], label [[BB]], label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    ret i32 [[ARG]]
+;
   br label %bb
 
 bb:

From 4dbe82eef34e5ab8a9b0dabdbca194ff6858fc7f Mon Sep 17 00:00:00 2001
From: kuter <kuterdinel@gmail.com>
Date: Sun, 12 Jul 2020 02:23:21 +0300
Subject: [PATCH 034/771] [Attributor] Introudce attribute seed allow list.

---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 22 +++++++++++++
 llvm/lib/Transforms/IPO/Attributor.cpp        | 13 ++++++++
 llvm/test/Transforms/Attributor/allow_list.ll | 33 +++++++++++++++++++
 3 files changed, 68 insertions(+)
 create mode 100644 llvm/test/Transforms/Attributor/allow_list.ll

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index c6261845b765a..d2666d4b86827 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -891,6 +891,13 @@ struct Attributor {
     // No matching attribute found, create one.
     // Use the static create method.
     auto &AA = AAType::createForPosition(IRP, *this);
+
+    // If we are currenty seeding attributes, enforce seeding rules.
+    if (SeedingPeriod && !shouldSeedAttribute(AA)) {
+      AA.getState().indicatePessimisticFixpoint();
+      return AA;
+    }
+
     registerAA(AA);
 
     // For now we ignore naked and optnone functions.
@@ -918,8 +925,15 @@ struct Attributor {
       return AA;
     }
 
+    // Allow seeded attributes to declare dependencies.
+    // Remember the seeding state.
+    bool OldSeedingPeriod = SeedingPeriod;
+    SeedingPeriod = false;
+
     updateAA(AA);
 
+    SeedingPeriod = OldSeedingPeriod;
+
     if (TrackDependence && AA.getState().isValidState())
       recordDependence(AA, const_cast<AbstractAttribute &>(*QueryingAA),
                        DepClass);
@@ -1345,6 +1359,10 @@ struct Attributor {
   ChangeStatus
   rewriteFunctionSignatures(SmallPtrSetImpl<Function *> &ModifiedFns);
 
+  /// Check if the Attribute \p AA should be seeded.
+  /// See getOrCreateAAFor.
+  bool shouldSeedAttribute(AbstractAttribute &AA);
+
   /// The set of all abstract attributes.
   ///{
   using AAVector = SmallVector<AbstractAttribute *, 64>;
@@ -1410,6 +1428,10 @@ struct Attributor {
   /// Invoke instructions with at least a single dead successor block.
   SmallVector<WeakVH, 16> InvokeWithDeadSuccessor;
 
+  /// Wheather attributes are being `seeded`, always false after ::run function
+  /// gets called \see getOrCreateAAFor.
+  bool SeedingPeriod = true;
+
   /// Functions, blocks, and instructions we delete after manifest is done.
   ///
   ///{
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 7f252079e0532..6e5625d26c38b 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -78,6 +78,12 @@ static cl::opt<bool>
                                   "wrappers for non-exact definitions."),
                          cl::init(false));
 
+static cl::list<std::string>
+    SeedAllowList("attributor-seed-allow-list", cl::Hidden,
+                  cl::desc("Comma seperated list of attrbute names that are "
+                           "allowed to be seeded."),
+                  cl::ZeroOrMore, cl::CommaSeparated);
+
 /// Logic operators for the change status enum class.
 ///
 ///{
@@ -1256,6 +1262,7 @@ ChangeStatus Attributor::cleanupIR() {
 }
 
 ChangeStatus Attributor::run() {
+  SeedingPeriod = false;
   runTillFixpoint();
   ChangeStatus ManifestChange = manifestAttributes();
   ChangeStatus CleanupChange = cleanupIR();
@@ -1452,6 +1459,12 @@ bool Attributor::registerFunctionSignatureRewrite(
   return true;
 }
 
+bool Attributor::shouldSeedAttribute(AbstractAttribute &AA) {
+  if (SeedAllowList.size() == 0)
+    return true;
+  return std::count(SeedAllowList.begin(), SeedAllowList.end(), AA.getName());
+}
+
 ChangeStatus Attributor::rewriteFunctionSignatures(
     SmallPtrSetImpl<Function *> &ModifiedFns) {
   ChangeStatus Changed = ChangeStatus::UNCHANGED;
diff --git a/llvm/test/Transforms/Attributor/allow_list.ll b/llvm/test/Transforms/Attributor/allow_list.ll
new file mode 100644
index 0000000000000..7670090cb03b7
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/allow_list.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -passes=attributor --attributor-seed-allow-list asd < %s | FileCheck %s --check-prefixes=CHECK_DISABLED
+; RUN: opt -S -passes=attributor --attributor-seed-allow-list AAValueSimplify < %s | FileCheck %s --check-prefixes=CHECK_ENABLED
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Function Attrs: nounwind uwtable
+define internal i32 @range_test(i32 %a) #0 {
+; CHECK_DISABLED-LABEL: define {{[^@]+}}@range_test
+; CHECK_DISABLED-SAME: (i32 [[A:%.*]])
+; CHECK_DISABLED-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[A]], 100
+; CHECK_DISABLED-NEXT:    [[TMP2:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK_DISABLED-NEXT:    ret i32 [[TMP2]]
+;
+  %1 = icmp sgt i32 %a, 100
+  %2 = zext i1 %1 to i32
+  ret i32 %2
+}
+
+; Function Attrs: nounwind uwtable
+define i32 @range_use() #0 {
+; CHECK_DISABLED-LABEL: define {{[^@]+}}@range_use()
+; CHECK_DISABLED-NEXT:    [[TMP1:%.*]] = call i32 @range_test(i32 123)
+; CHECK_DISABLED-NEXT:    ret i32 [[TMP1]]
+;
+; CHECK_ENABLED-LABEL: define {{[^@]+}}@range_use()
+; CHECK_ENABLED-NEXT:    ret i32 1
+;
+  %1 = call i32 @range_test(i32 123)
+  ret i32 %1
+}
+
+attributes #0 = { nounwind uwtable noinline }
\ No newline at end of file

From d1bcddb5c1fe7135e712b0e08874ed64c70f3e49 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sat, 11 Jul 2020 16:44:34 -0700
Subject: [PATCH 035/771] [llvm-objdump][test] Move tests after
 dc4a6f5db4f0178bae43ef615cc8902c759d6195

Move RISCV/ to ELF/RISCV/ as well.
---
 llvm/test/tools/llvm-objdump/ARM/lit.local.cfg                  | 2 --
 llvm/test/tools/llvm-objdump/{ => ELF}/ARM/Inputs/debug.c       | 0
 llvm/test/tools/llvm-objdump/{ => ELF}/ARM/Inputs/wide-char.c   | 0
 .../llvm-objdump/{ => ELF}/ARM/debug-vars-dwarf4-sections.s     | 0
 llvm/test/tools/llvm-objdump/{ => ELF}/ARM/debug-vars-dwarf4.s  | 0
 .../llvm-objdump/{ => ELF}/ARM/debug-vars-dwarf5-sections.s     | 0
 llvm/test/tools/llvm-objdump/{ => ELF}/ARM/debug-vars-dwarf5.s  | 0
 .../tools/llvm-objdump/{ => ELF}/ARM/debug-vars-wide-chars.s    | 0
 llvm/test/tools/llvm-objdump/{ => ELF}/PowerPC/debug-vars.s     | 0
 llvm/test/tools/llvm-objdump/{ => ELF}/RISCV/lit.local.cfg      | 0
 .../tools/llvm-objdump/{ => ELF}/RISCV/unknown-arch-attr.test   | 0
 llvm/test/tools/llvm-objdump/PowerPC/lit.local.cfg              | 2 --
 12 files changed, 4 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-objdump/ARM/lit.local.cfg
 rename llvm/test/tools/llvm-objdump/{ => ELF}/ARM/Inputs/debug.c (100%)
 rename llvm/test/tools/llvm-objdump/{ => ELF}/ARM/Inputs/wide-char.c (100%)
 rename llvm/test/tools/llvm-objdump/{ => ELF}/ARM/debug-vars-dwarf4-sections.s (100%)
 rename llvm/test/tools/llvm-objdump/{ => ELF}/ARM/debug-vars-dwarf4.s (100%)
 rename llvm/test/tools/llvm-objdump/{ => ELF}/ARM/debug-vars-dwarf5-sections.s (100%)
 rename llvm/test/tools/llvm-objdump/{ => ELF}/ARM/debug-vars-dwarf5.s (100%)
 rename llvm/test/tools/llvm-objdump/{ => ELF}/ARM/debug-vars-wide-chars.s (100%)
 rename llvm/test/tools/llvm-objdump/{ => ELF}/PowerPC/debug-vars.s (100%)
 rename llvm/test/tools/llvm-objdump/{ => ELF}/RISCV/lit.local.cfg (100%)
 rename llvm/test/tools/llvm-objdump/{ => ELF}/RISCV/unknown-arch-attr.test (100%)
 delete mode 100644 llvm/test/tools/llvm-objdump/PowerPC/lit.local.cfg

diff --git a/llvm/test/tools/llvm-objdump/ARM/lit.local.cfg b/llvm/test/tools/llvm-objdump/ARM/lit.local.cfg
deleted file mode 100644
index 236e1d3441665..0000000000000
--- a/llvm/test/tools/llvm-objdump/ARM/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not 'ARM' in config.root.targets:
-    config.unsupported = True
diff --git a/llvm/test/tools/llvm-objdump/ARM/Inputs/debug.c b/llvm/test/tools/llvm-objdump/ELF/ARM/Inputs/debug.c
similarity index 100%
rename from llvm/test/tools/llvm-objdump/ARM/Inputs/debug.c
rename to llvm/test/tools/llvm-objdump/ELF/ARM/Inputs/debug.c
diff --git a/llvm/test/tools/llvm-objdump/ARM/Inputs/wide-char.c b/llvm/test/tools/llvm-objdump/ELF/ARM/Inputs/wide-char.c
similarity index 100%
rename from llvm/test/tools/llvm-objdump/ARM/Inputs/wide-char.c
rename to llvm/test/tools/llvm-objdump/ELF/ARM/Inputs/wide-char.c
diff --git a/llvm/test/tools/llvm-objdump/ARM/debug-vars-dwarf4-sections.s b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4-sections.s
similarity index 100%
rename from llvm/test/tools/llvm-objdump/ARM/debug-vars-dwarf4-sections.s
rename to llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4-sections.s
diff --git a/llvm/test/tools/llvm-objdump/ARM/debug-vars-dwarf4.s b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4.s
similarity index 100%
rename from llvm/test/tools/llvm-objdump/ARM/debug-vars-dwarf4.s
rename to llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf4.s
diff --git a/llvm/test/tools/llvm-objdump/ARM/debug-vars-dwarf5-sections.s b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf5-sections.s
similarity index 100%
rename from llvm/test/tools/llvm-objdump/ARM/debug-vars-dwarf5-sections.s
rename to llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf5-sections.s
diff --git a/llvm/test/tools/llvm-objdump/ARM/debug-vars-dwarf5.s b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf5.s
similarity index 100%
rename from llvm/test/tools/llvm-objdump/ARM/debug-vars-dwarf5.s
rename to llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-dwarf5.s
diff --git a/llvm/test/tools/llvm-objdump/ARM/debug-vars-wide-chars.s b/llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-wide-chars.s
similarity index 100%
rename from llvm/test/tools/llvm-objdump/ARM/debug-vars-wide-chars.s
rename to llvm/test/tools/llvm-objdump/ELF/ARM/debug-vars-wide-chars.s
diff --git a/llvm/test/tools/llvm-objdump/PowerPC/debug-vars.s b/llvm/test/tools/llvm-objdump/ELF/PowerPC/debug-vars.s
similarity index 100%
rename from llvm/test/tools/llvm-objdump/PowerPC/debug-vars.s
rename to llvm/test/tools/llvm-objdump/ELF/PowerPC/debug-vars.s
diff --git a/llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg b/llvm/test/tools/llvm-objdump/ELF/RISCV/lit.local.cfg
similarity index 100%
rename from llvm/test/tools/llvm-objdump/RISCV/lit.local.cfg
rename to llvm/test/tools/llvm-objdump/ELF/RISCV/lit.local.cfg
diff --git a/llvm/test/tools/llvm-objdump/RISCV/unknown-arch-attr.test b/llvm/test/tools/llvm-objdump/ELF/RISCV/unknown-arch-attr.test
similarity index 100%
rename from llvm/test/tools/llvm-objdump/RISCV/unknown-arch-attr.test
rename to llvm/test/tools/llvm-objdump/ELF/RISCV/unknown-arch-attr.test
diff --git a/llvm/test/tools/llvm-objdump/PowerPC/lit.local.cfg b/llvm/test/tools/llvm-objdump/PowerPC/lit.local.cfg
deleted file mode 100644
index 091332439b186..0000000000000
--- a/llvm/test/tools/llvm-objdump/PowerPC/lit.local.cfg
+++ /dev/null
@@ -1,2 +0,0 @@
-if not 'PowerPC' in config.root.targets:
-    config.unsupported = True

From 77272d177a2d7128cf09dc2d27b353cc3e1ecae0 Mon Sep 17 00:00:00 2001
From: Zequan Wu <zequanwu@google.com>
Date: Sat, 11 Jul 2020 20:49:26 -0700
Subject: [PATCH 036/771] [COFF] Fix endianness of .llvm.call-graph-profile
 section data

---
 llvm/lib/MC/WinCOFFObjectWriter.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/MC/WinCOFFObjectWriter.cpp b/llvm/lib/MC/WinCOFFObjectWriter.cpp
index 94a8d56c55fce..4796ef531054b 100644
--- a/llvm/lib/MC/WinCOFFObjectWriter.cpp
+++ b/llvm/lib/MC/WinCOFFObjectWriter.cpp
@@ -1116,9 +1116,9 @@ uint64_t WinCOFFObjectWriter::writeObject(MCAssembler &Asm,
     for (const MCAssembler::CGProfileEntry &CGPE : Asm.CGProfile) {
       uint32_t FromIndex = CGPE.From->getSymbol().getIndex();
       uint32_t ToIndex = CGPE.To->getSymbol().getIndex();
-      OS.write((const char *)&FromIndex, sizeof(uint32_t));
-      OS.write((const char *)&ToIndex, sizeof(uint32_t));
-      OS.write((const char *)&CGPE.Count, sizeof(uint64_t));
+      support::endian::write(OS, FromIndex, W.Endian);
+      support::endian::write(OS, ToIndex, W.Endian);
+      support::endian::write(OS, CGPE.Count, W.Endian);
     }
   }
 

From 6634aef71f3b5e9820d2955bd6b39d2744de06eb Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 12 Jul 2020 10:12:48 +0200
Subject: [PATCH 037/771] [SCCP] Add test for predicate info condition handling
 (NFC)

---
 .../Transforms/SCCP/predicateinfo-cond.ll     | 110 ++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 llvm/test/Transforms/SCCP/predicateinfo-cond.ll

diff --git a/llvm/test/Transforms/SCCP/predicateinfo-cond.ll b/llvm/test/Transforms/SCCP/predicateinfo-cond.ll
new file mode 100644
index 0000000000000..d8528918babed
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/predicateinfo-cond.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -ipsccp < %s | FileCheck %s
+
+; Test that information about the true/false value of conditions themselves
+; is also used, not information implied by comparisions.
+
+define i32 @switch(i32 %x) {
+; CHECK-LABEL: @switch(
+; CHECK-NEXT:    switch i32 [[X:%.*]], label [[CASE_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 0, label [[CASE_0:%.*]]
+; CHECK-NEXT:    i32 2, label [[CASE_2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       case.0:
+; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 1
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       case.2:
+; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[X]], 1
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       case.default:
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[ADD]], [[CASE_0]] ], [ [[SUB]], [[CASE_2]] ], [ 1, [[CASE_DEFAULT]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+;
+  switch i32 %x, label %case.default [
+  i32 0, label %case.0
+  i32 2, label %case.2
+  ]
+
+case.0:
+  %add = add i32 %x, 1
+  br label %end
+
+case.2:
+  %sub = sub i32 %x, 1
+  br label %end
+
+case.default:
+  br label %end
+
+end:
+  %phi = phi i32 [ %add, %case.0 ], [ %sub, %case.2 ], [ 1, %case.default]
+  ret i32 %phi
+}
+
+define i1 @assume(i32 %x) {
+; CHECK-LABEL: @assume(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp sge i32 %x, 0
+  call void @llvm.assume(i1 %cmp)
+  ret i1 %cmp
+}
+
+define i32 @branch(i32 %x) {
+; CHECK-LABEL: @branch(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN1:%.*]], label [[IF_THEN2:%.*]]
+; CHECK:       if.then1:
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF2_THEN1:%.*]], label [[IF2_THEN2:%.*]]
+; CHECK:       if2.then1:
+; CHECK-NEXT:    br label [[IF2_END:%.*]]
+; CHECK:       if2.then2:
+; CHECK-NEXT:    br label [[IF2_END]]
+; CHECK:       if2.end:
+; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[IF2_THEN1]] ], [ 1, [[IF2_THEN2]] ]
+; CHECK-NEXT:    ret i32 [[PHI]]
+; CHECK:       if.then2:
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF3_THEN1:%.*]], label [[IF3_THEN2:%.*]]
+; CHECK:       if3.then1:
+; CHECK-NEXT:    br label [[IF3_END:%.*]]
+; CHECK:       if3.then2:
+; CHECK-NEXT:    br label [[IF3_END]]
+; CHECK:       if3.end:
+; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 0, [[IF3_THEN1]] ], [ 1, [[IF3_THEN2]] ]
+; CHECK-NEXT:    ret i32 [[PHI2]]
+;
+  %cmp = icmp sge i32 %x, 0
+  br i1 %cmp, label %if.then1, label %if.then2
+
+if.then1:
+  br i1 %cmp, label %if2.then1, label %if2.then2
+
+if2.then1:
+  br label %if2.end
+
+if2.then2:
+  br label %if2.end
+
+if2.end:
+  %phi = phi i32 [ 0, %if2.then1 ], [ 1, %if2.then2 ]
+  ret i32 %phi
+
+if.then2:
+  br i1 %cmp, label %if3.then1, label %if3.then2
+
+if3.then1:
+  br label %if3.end
+
+if3.then2:
+  br label %if3.end
+
+if3.end:
+  %phi2 = phi i32 [ 0, %if3.then1 ], [ 1, %if3.then2 ]
+  ret i32 %phi2
+}
+
+declare void @llvm.assume(i1)

From 66f1dcd872dba189ee054fb016f4bff535fb5afc Mon Sep 17 00:00:00 2001
From: Ten Tzen <tentzen@microsoft.com>
Date: Sun, 12 Jul 2020 01:37:56 -0700
Subject: [PATCH 038/771] [Windows SEH] Fix the frame-ptr of a nested-filter
 within a _finally

This change fixed a SEH bug (exposed by test58 & test61 in MSVC test xcpt4u.c);
when an Except-filter is located inside a finally, the frame-pointer generated today
via intrinsic @llvm.eh.recoverfp is the frame-pointer of the immediate
parent _finally, not the frame-ptr of outermost host function.

The fix is to retrieve the Establisher's frame-pointer that was previously saved in
parent's frame.
The prolog of a filter inside a _finally should be like code below:

%0 = call i8* @llvm.eh.recoverfp(i8* bitcast (@"?fin$0@0@main@@"), i8*%frame_pointer)
%1 = call i8* @llvm.localrecover(i8* bitcast (@"?fin$0@0@main@@"), i8*%0, i32 0)
%2 = bitcast i8* %1 to i8**
%3 = load i8*, i8** %2, align 8

Differential Revision: https://reviews.llvm.org/D77982
---
 clang/lib/CodeGen/CGException.cpp             | 43 +++++++++++++++++++
 clang/lib/CodeGen/CodeGenFunction.h           |  3 ++
 .../CodeGen/windows-seh-filter-inFinally.c    | 36 ++++++++++++++++
 3 files changed, 82 insertions(+)
 create mode 100644 clang/test/CodeGen/windows-seh-filter-inFinally.c

diff --git a/clang/lib/CodeGen/CGException.cpp b/clang/lib/CodeGen/CGException.cpp
index 2494f38b3159c..bdf70252b5ade 100644
--- a/clang/lib/CodeGen/CGException.cpp
+++ b/clang/lib/CodeGen/CGException.cpp
@@ -1815,6 +1815,48 @@ void CodeGenFunction::EmitCapturedLocals(CodeGenFunction &ParentCGF,
     llvm::Constant *ParentI8Fn =
         llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
     ParentFP = Builder.CreateCall(RecoverFPIntrin, {ParentI8Fn, EntryFP});
+
+    // if the parent is a _finally, the passed-in ParentFP is the FP
+    // of parent _finally, not Establisher's FP (FP of outermost function).
+    // Establkisher FP is 2nd paramenter passed into parent _finally.
+    // Fortunately, it's always saved in parent's frame. The following
+    // code retrieves it, and escapes it so that spill instruction won't be
+    // optimized away.
+    if (ParentCGF.ParentCGF != nullptr) {
+      // Locate and escape Parent's frame_pointer.addr alloca
+      // Depending on target, should be 1st/2nd one in LocalDeclMap.
+      // Let's just scan for ImplicitParamDecl with VoidPtrTy.
+      llvm::AllocaInst *FramePtrAddrAlloca = nullptr;
+      for (auto &I : ParentCGF.LocalDeclMap) {
+        const VarDecl *D = cast<VarDecl>(I.first);
+        if (isa<ImplicitParamDecl>(D) &&
+            D->getType() == getContext().VoidPtrTy) {
+          assert(D->getName().startswith("frame_pointer"));
+          FramePtrAddrAlloca = cast<llvm::AllocaInst>(I.second.getPointer());
+          break;
+        }
+      }
+      assert(FramePtrAddrAlloca);
+      auto InsertPair = ParentCGF.EscapedLocals.insert(
+          std::make_pair(FramePtrAddrAlloca, ParentCGF.EscapedLocals.size()));
+      int FrameEscapeIdx = InsertPair.first->second;
+
+      // an example of a filter's prolog::
+      // %0 = call i8* @llvm.eh.recoverfp(bitcast(@"?fin$0@0@main@@"),..)
+      // %1 = call i8* @llvm.localrecover(bitcast(@"?fin$0@0@main@@"),..)
+      // %2 = bitcast i8* %1 to i8**
+      // %3 = load i8*, i8* *%2, align 8
+      //   ==> %3 is the frame-pointer of outermost host function
+      llvm::Function *FrameRecoverFn = llvm::Intrinsic::getDeclaration(
+          &CGM.getModule(), llvm::Intrinsic::localrecover);
+      llvm::Constant *ParentI8Fn =
+          llvm::ConstantExpr::getBitCast(ParentCGF.CurFn, Int8PtrTy);
+      ParentFP = Builder.CreateCall(
+          FrameRecoverFn, {ParentI8Fn, ParentFP,
+                           llvm::ConstantInt::get(Int32Ty, FrameEscapeIdx)});
+      ParentFP = Builder.CreateBitCast(ParentFP, CGM.VoidPtrPtrTy);
+      ParentFP = Builder.CreateLoad(Address(ParentFP, getPointerAlign()));
+    }
   }
 
   // Create llvm.localrecover calls for all captures.
@@ -2013,6 +2055,7 @@ void CodeGenFunction::pushSEHCleanup(CleanupKind Kind,
 
 void CodeGenFunction::EnterSEHTryStmt(const SEHTryStmt &S) {
   CodeGenFunction HelperCGF(CGM, /*suppressNewContext=*/true);
+  HelperCGF.ParentCGF = this;
   if (const SEHFinallyStmt *Finally = S.getFinallyHandler()) {
     // Outline the finally block.
     llvm::Function *FinallyFunc =
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index b1841d646643c..1fc2ed76ca9e6 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -264,6 +264,9 @@ class CodeGenFunction : public CodeGenTypeCache {
   CodeGenModule &CGM;  // Per-module state.
   const TargetInfo &Target;
 
+  // For EH/SEH outlined funclets, this field points to parent's CGF
+  CodeGenFunction *ParentCGF = nullptr;
+
   typedef std::pair<llvm::Value *, llvm::Value *> ComplexPairTy;
   LoopInfoStack LoopStack;
   CGBuilderTy Builder;
diff --git a/clang/test/CodeGen/windows-seh-filter-inFinally.c b/clang/test/CodeGen/windows-seh-filter-inFinally.c
new file mode 100644
index 0000000000000..f9dfca14f0209
--- /dev/null
+++ b/clang/test/CodeGen/windows-seh-filter-inFinally.c
@@ -0,0 +1,36 @@
+// RUN: %clang_cc1 -triple x86_64-windows -fms-extensions -Wno-implicit-function-declaration -S -emit-llvm %s -o - | FileCheck %s
+
+// CHECK: %[[dst:[0-9-]+]] = call i8* @llvm.eh.recoverfp(i8* bitcast (void (i8, i8*)* @"?fin$0@0@main@@" to i8*), i8* %frame_pointer)
+// CHECK-NEXT: %[[dst1:[0-9-]+]] = call i8* @llvm.localrecover(i8* bitcast (void (i8, i8*)* @"?fin$0@0@main@@" to i8*), i8* %[[dst]], i32 0)
+// CHECK-NEXT: %[[dst2:[0-9-]+]] = bitcast i8* %[[dst1]] to i8**
+// CHECK-NEXT: = load i8*, i8** %[[dst2]], align 8
+
+int
+main(int argc, char *argv[])
+{
+    int Counter = 0;
+    //
+    // Try/except within the finally clause of a try/finally.
+    //
+    __try {
+      Counter -= 1;
+    }
+    __finally {
+      __try {
+        Counter += 2;
+        // RtlRaiseStatus(STATUS_INTEGER_OVERFLOW);
+      } __except(Counter) {
+        __try {
+          Counter += 3;
+        }
+        __finally {
+          if (abnormal_termination() == 1) {
+            Counter += 5;
+          }
+        }
+      }
+    }
+    // expect Counter == 9
+    return 1;
+}
+

From 39009a8245dae78250081b16fc679ce338af405a Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 12 Jul 2020 08:51:49 -0400
Subject: [PATCH 039/771] [DAGCombiner] tighten fast-math constraints for fma
 fold

fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)

This is only allowed when "reassoc" is present on the fadd.

As discussed in D80801, this transform goes beyond
what is allowed by "contract" FMF (-ffp-contract=fast).
That is because we are fusing the trailing add of 'E' with a
multiply, but without "reassoc", the code mandates that the
products A*B and C*D are added together before adding in 'E'.

I've added this example to the LangRef to try to clarify the
meaning of "contract". If that seems reasonable, we should
probably do something similar for the clang docs because
there does not appear to be any formal spec for the behavior
of -ffp-contract=fast.

Differential Revision: https://reviews.llvm.org/D82499
---
 llvm/docs/LangRef.rst                         |  4 +++-
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  7 +++++--
 llvm/test/CodeGen/AArch64/fadd-combines.ll    |  9 +++++++--
 llvm/test/CodeGen/X86/fma_patterns.ll         | 19 +++++++++++++------
 4 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index c2d6200e67fa8..86d315be74bcf 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -2778,7 +2778,9 @@ floating-point transformations.
 
 ``contract``
    Allow floating-point contraction (e.g. fusing a multiply followed by an
-   addition into a fused multiply-and-add).
+   addition into a fused multiply-and-add). This does not enable reassociating
+   to form arbitrary contractions. For example, ``(a*b) + (c*d) + e`` can not
+   be transformed into ``(a*b) + ((c*d) + e)`` to create two fma operations.
 
 ``afn``
    Approximate functions - Allow substitution of approximate calculations for
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0d84cd89f5aee..42e6e12f3f027 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -11986,6 +11986,8 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
 
   SDNodeFlags Flags = N->getFlags();
   bool CanFuse = Options.UnsafeFPMath || isContractable(N);
+  bool CanReassociate =
+      Options.UnsafeFPMath || N->getFlags().hasAllowReassociation();
   bool AllowFusionGlobally = (Options.AllowFPOpFusion == FPOpFusion::Fast ||
                               CanFuse || HasFMAD);
   // If the addition is not contractable, do not combine.
@@ -12028,13 +12030,14 @@ SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
 
   // fadd (fma A, B, (fmul C, D)), E --> fma A, B, (fma C, D, E)
   // fadd E, (fma A, B, (fmul C, D)) --> fma A, B, (fma C, D, E)
+  // This requires reassociation because it changes the order of operations.
   SDValue FMA, E;
-  if (CanFuse && N0.getOpcode() == PreferredFusedOpcode &&
+  if (CanReassociate && N0.getOpcode() == PreferredFusedOpcode &&
       N0.getOperand(2).getOpcode() == ISD::FMUL && N0.hasOneUse() &&
       N0.getOperand(2).hasOneUse()) {
     FMA = N0;
     E = N1;
-  } else if (CanFuse && N1.getOpcode() == PreferredFusedOpcode &&
+  } else if (CanReassociate && N1.getOpcode() == PreferredFusedOpcode &&
              N1.getOperand(2).getOpcode() == ISD::FMUL && N1.hasOneUse() &&
              N1.getOperand(2).hasOneUse()) {
     FMA = N1;
diff --git a/llvm/test/CodeGen/AArch64/fadd-combines.ll b/llvm/test/CodeGen/AArch64/fadd-combines.ll
index 0e4f2c02c3110..2ff4858307802 100644
--- a/llvm/test/CodeGen/AArch64/fadd-combines.ll
+++ b/llvm/test/CodeGen/AArch64/fadd-combines.ll
@@ -207,6 +207,10 @@ define double @fadd_fma_fmul_1(double %a, double %b, double %c, double %d, doubl
   ret double %a2
 }
 
+; Minimum FMF - the 1st fadd is contracted because that combines
+; fmul+fadd as specified by the order of operations; the 2nd fadd
+; requires reassociation to fuse with c*d.
+
 define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n0) nounwind {
 ; CHECK-LABEL: fadd_fma_fmul_fmf:
 ; CHECK:       // %bb.0:
@@ -220,13 +224,14 @@ define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n
   ret float %a2
 }
 
-; Minimum FMF, commute final add operands, change type.
+; Not minimum FMF.
 
 define float @fadd_fma_fmul_2(float %a, float %b, float %c, float %d, float %n0) nounwind {
 ; CHECK-LABEL: fadd_fma_fmul_2:
 ; CHECK:       // %bb.0:
-; CHECK-NEXT:    fmadd s2, s2, s3, s4
+; CHECK-NEXT:    fmul s2, s2, s3
 ; CHECK-NEXT:    fmadd s0, s0, s1, s2
+; CHECK-NEXT:    fadd s0, s4, s0
 ; CHECK-NEXT:    ret
   %m1 = fmul float %a, %b
   %m2 = fmul float %c, %d
diff --git a/llvm/test/CodeGen/X86/fma_patterns.ll b/llvm/test/CodeGen/X86/fma_patterns.ll
index 3049365b6f328..43b1f4a79aff8 100644
--- a/llvm/test/CodeGen/X86/fma_patterns.ll
+++ b/llvm/test/CodeGen/X86/fma_patterns.ll
@@ -1821,6 +1821,10 @@ define double @fadd_fma_fmul_1(double %a, double %b, double %c, double %d, doubl
   ret double %a2
 }
 
+; Minimum FMF - the 1st fadd is contracted because that combines
+; fmul+fadd as specified by the order of operations; the 2nd fadd
+; requires reassociation to fuse with c*d.
+
 define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n0) nounwind {
 ; FMA-LABEL: fadd_fma_fmul_fmf:
 ; FMA:       # %bb.0:
@@ -1846,25 +1850,28 @@ define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n
   ret float %a2
 }
 
-; Minimum FMF, commute final add operands, change type.
+; Not minimum FMF.
 
 define float @fadd_fma_fmul_2(float %a, float %b, float %c, float %d, float %n0) nounwind {
 ; FMA-LABEL: fadd_fma_fmul_2:
 ; FMA:       # %bb.0:
-; FMA-NEXT:    vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4
-; FMA-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
+; FMA-NEXT:    vmulss %xmm3, %xmm2, %xmm2
+; FMA-NEXT:    vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2
+; FMA-NEXT:    vaddss %xmm2, %xmm4, %xmm0
 ; FMA-NEXT:    retq
 ;
 ; FMA4-LABEL: fadd_fma_fmul_2:
 ; FMA4:       # %bb.0:
-; FMA4-NEXT:    vfmaddss {{.*#+}} xmm2 = (xmm2 * xmm3) + xmm4
+; FMA4-NEXT:    vmulss %xmm3, %xmm2, %xmm2
 ; FMA4-NEXT:    vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2
+; FMA4-NEXT:    vaddss %xmm0, %xmm4, %xmm0
 ; FMA4-NEXT:    retq
 ;
 ; AVX512-LABEL: fadd_fma_fmul_2:
 ; AVX512:       # %bb.0:
-; AVX512-NEXT:    vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4
-; AVX512-NEXT:    vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
+; AVX512-NEXT:    vmulss %xmm3, %xmm2, %xmm2
+; AVX512-NEXT:    vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2
+; AVX512-NEXT:    vaddss %xmm2, %xmm4, %xmm0
 ; AVX512-NEXT:    retq
   %m1 = fmul float %a, %b
   %m2 = fmul float %c, %d

From 032810f58986cd568980227c9531de91d8bcb1cd Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Sat, 11 Jul 2020 15:15:22 -0700
Subject: [PATCH 040/771] [NFC] Fix comment style in MLIR unittests to conform
 to LLVM coding standards.

Differential Revision: https://reviews.llvm.org/D83632
---
 .../Dialect/SPIRV/DeserializationTest.cpp     | 10 +++---
 mlir/unittests/SDBM/SDBMTest.cpp              |  2 +-
 mlir/unittests/TableGen/EnumsGenTest.cpp      |  5 ++-
 mlir/unittests/TableGen/FormatTest.cpp        | 18 +++++-----
 mlir/unittests/TableGen/StructsGenTest.cpp    | 34 +++++++++----------
 5 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/mlir/unittests/Dialect/SPIRV/DeserializationTest.cpp b/mlir/unittests/Dialect/SPIRV/DeserializationTest.cpp
index a81b7741deea0..fe5632d7ae165 100644
--- a/mlir/unittests/Dialect/SPIRV/DeserializationTest.cpp
+++ b/mlir/unittests/Dialect/SPIRV/DeserializationTest.cpp
@@ -25,7 +25,7 @@
 
 using namespace mlir;
 
-// Load the SPIRV dialect
+/// Load the SPIRV dialect.
 static DialectRegistration<spirv::SPIRVDialect> SPIRVRegistration;
 
 using ::testing::StrEq;
@@ -159,7 +159,7 @@ TEST_F(DeserializationTest, InsufficientWordFailure) {
   addHeader();
   binary.push_back((2u << 16) |
                    static_cast<uint32_t>(spirv::Opcode::OpTypeVoid));
-  // Missing word for type <id>
+  // Missing word for type <id>.
 
   ASSERT_FALSE(deserialize());
   expectDiagnostic("insufficient words for the last instruction");
@@ -248,7 +248,7 @@ TEST_F(DeserializationTest, FunctionMissingEndFailure) {
   auto voidType = addVoidType();
   auto fnType = addFunctionType(voidType, {});
   addFunction(voidType, fnType);
-  // Missing OpFunctionEnd
+  // Missing OpFunctionEnd.
 
   ASSERT_FALSE(deserialize());
   expectDiagnostic("expected OpFunctionEnd instruction");
@@ -260,7 +260,7 @@ TEST_F(DeserializationTest, FunctionMissingParameterFailure) {
   auto i32Type = addIntType(32);
   auto fnType = addFunctionType(voidType, {i32Type});
   addFunction(voidType, fnType);
-  // Missing OpFunctionParameter
+  // Missing OpFunctionParameter.
 
   ASSERT_FALSE(deserialize());
   expectDiagnostic("expected OpFunctionParameter instruction");
@@ -271,7 +271,7 @@ TEST_F(DeserializationTest, FunctionMissingLabelForFirstBlockFailure) {
   auto voidType = addVoidType();
   auto fnType = addFunctionType(voidType, {});
   addFunction(voidType, fnType);
-  // Missing OpLabel
+  // Missing OpLabel.
   addReturn();
   addFunctionEnd();
 
diff --git a/mlir/unittests/SDBM/SDBMTest.cpp b/mlir/unittests/SDBM/SDBMTest.cpp
index e599094ee5929..61d670650b4bf 100644
--- a/mlir/unittests/SDBM/SDBMTest.cpp
+++ b/mlir/unittests/SDBM/SDBMTest.cpp
@@ -17,7 +17,7 @@
 
 using namespace mlir;
 
-// Load the SDBM dialect
+/// Load the SDBM dialect.
 static DialectRegistration<SDBMDialect> SDBMRegistration;
 
 static MLIRContext *ctx() {
diff --git a/mlir/unittests/TableGen/EnumsGenTest.cpp b/mlir/unittests/TableGen/EnumsGenTest.cpp
index 47f0910403b2f..a5580197a0a39 100644
--- a/mlir/unittests/TableGen/EnumsGenTest.cpp
+++ b/mlir/unittests/TableGen/EnumsGenTest.cpp
@@ -13,12 +13,11 @@
 #include "gmock/gmock.h"
 #include <type_traits>
 
-// Pull in generated enum utility declarations
+/// Pull in generated enum utility declarations and definitions.
 #include "EnumsGenTest.h.inc"
-// And definitions
 #include "EnumsGenTest.cpp.inc"
 
-// Test namespaces and enum class/utility names
+/// Test namespaces and enum class/utility names.
 using Outer::Inner::ConvertToEnum;
 using Outer::Inner::ConvertToString;
 using Outer::Inner::StrEnum;
diff --git a/mlir/unittests/TableGen/FormatTest.cpp b/mlir/unittests/TableGen/FormatTest.cpp
index ee609291150c7..0cae408bc3fb8 100644
--- a/mlir/unittests/TableGen/FormatTest.cpp
+++ b/mlir/unittests/TableGen/FormatTest.cpp
@@ -19,14 +19,14 @@ TEST(FormatTest, EmptyFmtStr) {
   EXPECT_TRUE(result.empty());
 }
 
-// Allow extra unused positional parameters
+/// Allow extra unused positional parameters.
 TEST(FormatTest, EmptyFmtStrExtraParams) {
   FmtContext ctx;
   std::string result = std::string(tgfmt("", &ctx, "a", "b", "c"));
   EXPECT_TRUE(result.empty());
 }
 
-// Allow unused placeholder substitution in context
+/// Allow unused placeholder substitution in context.
 TEST(FormatTest, EmptyFmtStrPopulatedCtx) {
   FmtContext ctx;
   ctx.withBuilder("builder");
@@ -40,21 +40,21 @@ TEST(FormatTest, LiteralFmtStr) {
   EXPECT_THAT(result, StrEq("void foo {}"));
 }
 
-// Print single dollar literally
+/// Print single dollar literally.
 TEST(FormatTest, AdjacentDollar) {
   FmtContext ctx;
   std::string result = std::string(tgfmt("$", &ctx));
   EXPECT_THAT(result, StrEq("$"));
 }
 
-// Print dangling dollar literally
+/// Print dangling dollar literally.
 TEST(FormatTest, DanglingDollar) {
   FmtContext ctx;
   std::string result = std::string(tgfmt("foo bar baz$", &ctx));
   EXPECT_THAT(result, StrEq("foo bar baz$"));
 }
 
-// Allow escape dollars with '$$'
+/// Allow escape dollars with '$$'.
 TEST(FormatTest, EscapeDollars) {
   FmtContext ctx;
   std::string result =
@@ -72,14 +72,14 @@ TEST(FormatTest, PositionalFmtStr) {
   EXPECT_THAT(result, StrEq("a b 43 d"));
 }
 
-// Output the placeholder if missing substitution
+/// Output the placeholder if missing substitution.
 TEST(FormatTest, PositionalFmtStrMissingParams) {
   FmtContext ctx;
   std::string result = std::string(tgfmt("$0 %1 $2", &ctx));
   EXPECT_THAT(result, StrEq("$0<no-subst-found> %1 $2<no-subst-found>"));
 }
 
-// Allow flexible reference of positional parameters
+/// Allow flexible reference of positional parameters.
 TEST(FormatTest, PositionalFmtStrFlexibleRef) {
   FmtContext ctx;
   std::string result = std::string(tgfmt("$2 $0 $2", &ctx, "a", "b", "c"));
@@ -122,7 +122,7 @@ TEST(FormatTest, PlaceHolderMissingSubst) {
   EXPECT_THAT(result, StrEq("$_op<no-subst-found>"));
 }
 
-// Test commonly used delimiters in C++
+/// Test commonly used delimiters in C++.
 TEST(FormatTest, PlaceHolderFmtStrDelimiter) {
   FmtContext ctx;
   ctx.addSubst("m", "");
@@ -130,7 +130,7 @@ TEST(FormatTest, PlaceHolderFmtStrDelimiter) {
   EXPECT_THAT(result, StrEq("{([])}|"));
 }
 
-// Test allowed characters in placeholder symbol
+/// Test allowed characters in placeholder symbol.
 TEST(FormatTest, CustomPlaceHolderFmtStrPlaceHolderChars) {
   FmtContext ctx;
   ctx.addSubst("m", "0 ");
diff --git a/mlir/unittests/TableGen/StructsGenTest.cpp b/mlir/unittests/TableGen/StructsGenTest.cpp
index 19aff1c83b0fe..c58fedb4ec4f0 100644
--- a/mlir/unittests/TableGen/StructsGenTest.cpp
+++ b/mlir/unittests/TableGen/StructsGenTest.cpp
@@ -17,12 +17,12 @@
 
 namespace mlir {
 
-// Pull in generated enum utility declarations
+/// Pull in generated enum utility declarations and definitions.
 #include "StructAttrGenTest.h.inc"
-// And definitions
 #include "StructAttrGenTest.cpp.inc"
-// Helper that returns an example test::TestStruct for testing its
-// implementation.
+
+/// Helper that returns an example test::TestStruct for testing its
+/// implementation.
 static test::TestStruct getTestStruct(mlir::MLIRContext *context) {
   auto integerType = mlir::IntegerType::get(32, context);
   auto integerAttr = mlir::IntegerAttr::get(integerType, 127);
@@ -39,16 +39,16 @@ static test::TestStruct getTestStruct(mlir::MLIRContext *context) {
                                optionalAttr, context);
 }
 
-// Validates that test::TestStruct::classof correctly identifies a valid
-// test::TestStruct.
+/// Validates that test::TestStruct::classof correctly identifies a valid
+/// test::TestStruct.
 TEST(StructsGenTest, ClassofTrue) {
   mlir::MLIRContext context;
   auto structAttr = getTestStruct(&context);
   ASSERT_TRUE(test::TestStruct::classof(structAttr));
 }
 
-// Validates that test::TestStruct::classof fails when an extra attribute is in
-// the class.
+/// Validates that test::TestStruct::classof fails when an extra attribute is in
+/// the class.
 TEST(StructsGenTest, ClassofExtraFalse) {
   mlir::MLIRContext context;
   mlir::DictionaryAttr structAttr = getTestStruct(&context);
@@ -69,8 +69,8 @@ TEST(StructsGenTest, ClassofExtraFalse) {
   ASSERT_FALSE(test::TestStruct::classof(badDictionary));
 }
 
-// Validates that test::TestStruct::classof fails when a NamedAttribute has an
-// incorrect name.
+/// Validates that test::TestStruct::classof fails when a NamedAttribute has an
+/// incorrect name.
 TEST(StructsGenTest, ClassofBadNameFalse) {
   mlir::MLIRContext context;
   mlir::DictionaryAttr structAttr = getTestStruct(&context);
@@ -90,8 +90,8 @@ TEST(StructsGenTest, ClassofBadNameFalse) {
   ASSERT_FALSE(test::TestStruct::classof(badDictionary));
 }
 
-// Validates that test::TestStruct::classof fails when a NamedAttribute has an
-// incorrect type.
+/// Validates that test::TestStruct::classof fails when a NamedAttribute has an
+/// incorrect type.
 TEST(StructsGenTest, ClassofBadTypeFalse) {
   mlir::MLIRContext context;
   mlir::DictionaryAttr structAttr = getTestStruct(&context);
@@ -115,8 +115,8 @@ TEST(StructsGenTest, ClassofBadTypeFalse) {
   ASSERT_FALSE(test::TestStruct::classof(badDictionary));
 }
 
-// Validates that test::TestStruct::classof fails when a NamedAttribute is
-// missing.
+/// Validates that test::TestStruct::classof fails when a NamedAttribute is
+/// missing.
 TEST(StructsGenTest, ClassofMissingFalse) {
   mlir::MLIRContext context;
   mlir::DictionaryAttr structAttr = getTestStruct(&context);
@@ -132,7 +132,7 @@ TEST(StructsGenTest, ClassofMissingFalse) {
   ASSERT_FALSE(test::TestStruct::classof(badDictionary));
 }
 
-// Validate the accessor for the FloatAttr value.
+/// Validate the accessor for the FloatAttr value.
 TEST(StructsGenTest, GetFloat) {
   mlir::MLIRContext context;
   auto structAttr = getTestStruct(&context);
@@ -140,7 +140,7 @@ TEST(StructsGenTest, GetFloat) {
   EXPECT_EQ(returnedAttr.getValueAsDouble(), 0.25);
 }
 
-// Validate the accessor for the IntegerAttr value.
+/// Validate the accessor for the IntegerAttr value.
 TEST(StructsGenTest, GetInteger) {
   mlir::MLIRContext context;
   auto structAttr = getTestStruct(&context);
@@ -148,7 +148,7 @@ TEST(StructsGenTest, GetInteger) {
   EXPECT_EQ(returnedAttr.getInt(), 127);
 }
 
-// Validate the accessor for the ElementsAttr value.
+/// Validate the accessor for the ElementsAttr value.
 TEST(StructsGenTest, GetElements) {
   mlir::MLIRContext context;
   auto structAttr = getTestStruct(&context);

From be9f363704a802b10b30d853f1bb6571e5ebed94 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sun, 12 Jul 2020 08:14:52 -0700
Subject: [PATCH 041/771] [AVRInstPrinter] printOperand: support llvm-objdump
 --print-imm-hex

Differential Revision: https://reviews.llvm.org/D83634
---
 llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp | 2 +-
 llvm/test/MC/AVR/hex-immediates.s                   | 7 +++++++
 2 files changed, 8 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/MC/AVR/hex-immediates.s

diff --git a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
index 815a309a8caef..42fac5e2e000e 100644
--- a/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
+++ b/llvm/lib/Target/AVR/MCTargetDesc/AVRInstPrinter.cpp
@@ -131,7 +131,7 @@ void AVRInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
       O << getPrettyRegisterName(Op.getReg(), MRI);
     }
   } else if (Op.isImm()) {
-    O << Op.getImm();
+    O << formatImm(Op.getImm());
   } else {
     assert(Op.isExpr() && "Unknown operand kind in printOperand");
     O << *Op.getExpr();
diff --git a/llvm/test/MC/AVR/hex-immediates.s b/llvm/test/MC/AVR/hex-immediates.s
new file mode 100644
index 0000000000000..ca4c8b9f33551
--- /dev/null
+++ b/llvm/test/MC/AVR/hex-immediates.s
@@ -0,0 +1,7 @@
+; RUN: llvm-mc -filetype=obj -triple=avr %s -o %t
+; RUN: llvm-objdump -d %t | FileCheck %s --check-prefix=DEC
+; RUN: llvm-objdump -d --print-imm-hex %t | FileCheck %s --check-prefix=HEX
+
+; DEC: ldi r24, 66
+; HEX: ldi r24, 0x42
+  ldi r24, 0x42

From d589372704fc7da0c143cbfe27f930a9d7dd333b Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sun, 12 Jul 2020 17:48:05 +0200
Subject: [PATCH 042/771] [SCCP] Extend nonnull metadata test (NFC)

---
 llvm/test/Transforms/SCCP/metadata.ll | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/SCCP/metadata.ll b/llvm/test/Transforms/SCCP/metadata.ll
index 43e4c59571e9a..844e2103ae318 100644
--- a/llvm/test/Transforms/SCCP/metadata.ll
+++ b/llvm/test/Transforms/SCCP/metadata.ll
@@ -44,16 +44,39 @@ define i32 @load_range_single_volatile(i32* %p) {
   ret i32 %v
 }
 
-define void @load_nonnull(i32** %p) {
+define void @load_nonnull(i32** %p, i32** %p2) {
 ; CHECK-LABEL: @load_nonnull(
 ; CHECK-NEXT:    [[V:%.*]] = load i32*, i32** [[P:%.*]], align 8, !nonnull !2
+; CHECK-NEXT:    [[V2:%.*]] = load i32*, i32** [[P2:%.*]], align 8, !nonnull !2
 ; CHECK-NEXT:    [[C1:%.*]] = icmp ne i32* [[V]], null
 ; CHECK-NEXT:    call void @use(i1 [[C1]])
+; CHECK-NEXT:    [[C2:%.*]] = icmp eq i32* [[V]], null
+; CHECK-NEXT:    call void @use(i1 [[C2]])
+; CHECK-NEXT:    [[C3:%.*]] = icmp ne i32* null, [[V]]
+; CHECK-NEXT:    call void @use(i1 [[C3]])
+; CHECK-NEXT:    [[C4:%.*]] = icmp eq i32* null, [[V]]
+; CHECK-NEXT:    call void @use(i1 [[C4]])
+; CHECK-NEXT:    [[C5:%.*]] = icmp eq i32* [[V]], [[V2]]
+; CHECK-NEXT:    call void @use(i1 [[C5]])
+; CHECK-NEXT:    [[C6:%.*]] = icmp ne i32* [[V]], [[V2]]
+; CHECK-NEXT:    call void @use(i1 [[C6]])
 ; CHECK-NEXT:    ret void
 ;
   %v = load i32*, i32** %p, !nonnull !{}
+  %v2 = load i32*, i32** %p2, !nonnull !{}
   %c1 = icmp ne i32* %v, null
   call void @use(i1 %c1)
+  %c2 = icmp eq i32* %v, null
+  call void @use(i1 %c2)
+  %c3 = icmp ne i32* null, %v
+  call void @use(i1 %c3)
+  %c4 = icmp eq i32* null, %v
+  call void @use(i1 %c4)
+  ; There is no particular relationship between two nonnull values.
+  %c5 = icmp eq i32* %v, %v2
+  call void @use(i1 %c5)
+  %c6 = icmp ne i32* %v, %v2
+  call void @use(i1 %c6)
   ret void
 }
 

From 69e60c9dc76653c10c4e8f7af1743307532102eb Mon Sep 17 00:00:00 2001
From: Ayke van Laethem <aykevanlaethem@gmail.com>
Date: Thu, 25 Jun 2020 13:57:58 +0200
Subject: [PATCH 043/771] [LLD][ELF][AVR] Implement the missing relocation
 types

Implements the missing relocation types for AVR target.
The results have been cross-checked with binutils.

Original patch by LemonBoy. Some changes by me.

Differential Revision: https://reviews.llvm.org/D78741
---
 lld/ELF/Arch/AVR.cpp     | 122 ++++++++++++++++++++++++++++++++++++++-
 lld/test/ELF/avr-reloc.s |  84 +++++++++++++++++++++++++++
 2 files changed, 205 insertions(+), 1 deletion(-)
 create mode 100644 lld/test/ELF/avr-reloc.s

diff --git a/lld/ELF/Arch/AVR.cpp b/lld/ELF/Arch/AVR.cpp
index 9b733837dd5d0..4513a970b32d7 100644
--- a/lld/ELF/Arch/AVR.cpp
+++ b/lld/ELF/Arch/AVR.cpp
@@ -54,11 +54,131 @@ AVR::AVR() { noneRel = R_AVR_NONE; }
 
 RelExpr AVR::getRelExpr(RelType type, const Symbol &s,
                         const uint8_t *loc) const {
-  return R_ABS;
+  switch (type) {
+  case R_AVR_7_PCREL:
+  case R_AVR_13_PCREL:
+    return R_PC;
+  default:
+    return R_ABS;
+  }
+}
+
+static void writeLDI(uint8_t *loc, uint64_t val) {
+  write16le(loc, (read16le(loc) & 0xf0f0) | (val & 0xf0) << 4 | (val & 0x0f));
 }
 
 void AVR::relocate(uint8_t *loc, const Relocation &rel, uint64_t val) const {
   switch (rel.type) {
+  case R_AVR_8:
+    checkUInt(loc, val, 8, rel);
+    *loc = val;
+    break;
+  case R_AVR_16:
+    // Note: this relocation is often used between code and data space, which
+    // are 0x800000 apart in the output ELF file. The bitmask cuts off the high
+    // bit.
+    write16le(loc, val & 0xffff);
+    break;
+  case R_AVR_16_PM:
+    checkAlignment(loc, val, 2, rel);
+    checkUInt(loc, val >> 1, 16, rel);
+    write16le(loc, val >> 1);
+    break;
+  case R_AVR_32:
+    checkUInt(loc, val, 32, rel);
+    write32le(loc, val);
+    break;
+
+  case R_AVR_LDI:
+    checkUInt(loc, val, 8, rel);
+    writeLDI(loc, val & 0xff);
+    break;
+
+  case R_AVR_LO8_LDI_NEG:
+    writeLDI(loc, -val & 0xff);
+    break;
+  case R_AVR_LO8_LDI:
+    writeLDI(loc, val & 0xff);
+    break;
+  case R_AVR_HI8_LDI_NEG:
+    writeLDI(loc, (-val >> 8) & 0xff);
+    break;
+  case R_AVR_HI8_LDI:
+    writeLDI(loc, (val >> 8) & 0xff);
+    break;
+  case R_AVR_HH8_LDI_NEG:
+    writeLDI(loc, (-val >> 16) & 0xff);
+    break;
+  case R_AVR_HH8_LDI:
+    writeLDI(loc, (val >> 16) & 0xff);
+    break;
+  case R_AVR_MS8_LDI_NEG:
+    writeLDI(loc, (-val >> 24) & 0xff);
+    break;
+  case R_AVR_MS8_LDI:
+    writeLDI(loc, (val >> 24) & 0xff);
+    break;
+
+  case R_AVR_LO8_LDI_PM:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (val >> 1) & 0xff);
+    break;
+  case R_AVR_HI8_LDI_PM:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (val >> 9) & 0xff);
+    break;
+  case R_AVR_HH8_LDI_PM:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (val >> 17) & 0xff);
+    break;
+
+  case R_AVR_LO8_LDI_PM_NEG:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (-val >> 1) & 0xff);
+    break;
+  case R_AVR_HI8_LDI_PM_NEG:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (-val >> 9) & 0xff);
+    break;
+  case R_AVR_HH8_LDI_PM_NEG:
+    checkAlignment(loc, val, 2, rel);
+    writeLDI(loc, (-val >> 17) & 0xff);
+    break;
+
+  case R_AVR_PORT5:
+    checkUInt(loc, val, 5, rel);
+    write16le(loc, (read16le(loc) & 0xff07) | (val << 3));
+    break;
+  case R_AVR_PORT6:
+    checkUInt(loc, val, 6, rel);
+    write16le(loc, (read16le(loc) & 0xf9f0) | (val & 0x30) << 5 | (val & 0x0f));
+    break;
+
+  // Since every jump destination is word aligned we gain an extra bit
+  case R_AVR_7_PCREL: {
+    checkInt(loc, val, 7, rel);
+    checkAlignment(loc, val, 2, rel);
+    const uint16_t target = (val - 2) >> 1;
+    write16le(loc, (read16le(loc) & 0xfc07) | ((target & 0x7f) << 3));
+    break;
+  }
+  case R_AVR_13_PCREL: {
+    checkAlignment(loc, val, 2, rel);
+    const uint16_t target = (val - 2) >> 1;
+    write16le(loc, (read16le(loc) & 0xf000) | (target & 0xfff));
+    break;
+  }
+
+  case R_AVR_6:
+    checkInt(loc, val, 6, rel);
+    write16le(loc, (read16le(loc) & 0xd3f8) | (val & 0x20) << 8 |
+                       (val & 0x18) << 7 | (val & 0x07));
+    break;
+  case R_AVR_6_ADIW:
+    checkInt(loc, val, 6, rel);
+    write16le(loc, (read16le(loc) & 0xff30) | (val & 0x30) << 2 | (val & 0x0F));
+    break;
+
   case R_AVR_CALL: {
     uint16_t hi = val >> 17;
     uint16_t lo = val >> 1;
diff --git a/lld/test/ELF/avr-reloc.s b/lld/test/ELF/avr-reloc.s
new file mode 100644
index 0000000000000..49f78044068b9
--- /dev/null
+++ b/lld/test/ELF/avr-reloc.s
@@ -0,0 +1,84 @@
+; REQUIRES: avr
+; RUN: llvm-mc -filetype=obj -triple=avr -mcpu=atmega328p %s -o %t.o
+; RUN: ld.lld %t.o --defsym=a=0x12345678 --defsym=b=30 -o %t
+; RUN: llvm-objdump -d --print-imm-hex %t | FileCheck %s
+; RUN: llvm-objdump -s %t | FileCheck --check-prefix=HEX %s
+
+.section .LDI,"ax",@progbits
+; CHECK-LABEL: section .LDI:
+; CHECK:       ldi     r20, 0x78
+; CHECK-NEXT:  ldi     r20, 0x56
+; CHECK-NEXT:  ldi     r20, 0x34
+; CHECK-NEXT:  ldi     r20, 0x12
+; CHECK-NEXT:  ldi     r20, 0x3c
+; CHECK-NEXT:  ldi     r20, 0x2b
+; CHECK-NEXT:  ldi     r20, 0x1a
+; CHECK-NEXT:  ldi     r20, 0xff
+ldi r20, lo8(a)     ; R_AVR_LO8_LDI
+ldi r20, hi8(a)     ; R_AVR_HI8_LDI
+ldi r20, hh8(a)     ; R_AVR_HH8_LDI
+ldi r20, hhi8(a)    ; R_AVR_MS8_LDI
+
+ldi r20, pm_lo8(a)  ; R_AVR_LO8_LDI_PM
+ldi r20, pm_hi8(a)  ; R_AVR_HI8_LDI_PM
+ldi r20, pm_hh8(a)  ; R_AVR_HH8_LDI_PM
+
+ldi r20, b+225
+
+.section .LDI_NEG,"ax",@progbits
+; CHECK-LABEL: section .LDI_NEG:
+; CHECK:       ldi     r20, 0x88
+; CHECK-NEXT:  ldi     r20, 0xa9
+; CHECK-NEXT:  ldi     r20, 0xcb
+; CHECK-NEXT:  ldi     r20, 0xed
+; CHECK-NEXT:  ldi     r20, 0xc4
+; CHECK-NEXT:  ldi     r20, 0xd4
+; CHECK-NEXT:  ldi     r20, 0xe5
+ldi r20, lo8(-(a))     ; R_AVR_LO8_LDI_NEG
+ldi r20, hi8(-(a))     ; R_AVR_HI8_LDI_NEG
+ldi r20, hh8(-(a))     ; R_AVR_HH8_LDI_NEG
+ldi r20, hhi8(-(a))    ; R_AVR_MS8_LDI_NEG
+
+ldi r20, pm_lo8(-(a))  ; R_AVR_LO8_LDI_PM_NEG
+ldi r20, pm_hi8(-(a))  ; R_AVR_HI8_LDI_PM_NEG
+ldi r20, pm_hh8(-(a))  ; R_AVR_HH8_LDI_PM_NEG
+
+;; The disassembler is not yet able to decode those opcodes
+;; 9e 8e    std    Y+30, r9
+;; 9e 8c    ldd    r9, Y+30
+;; 4e 96    adiw   r24, 0x1e
+.section .SIX,"ax",@progbits
+; HEX-LABEL: section .SIX:
+; HEX-NEXT:  9e8e9e8c 4e96
+std Y+b, r9   ; R_AVR_6
+ldd r9, Y+b   ; R_AVR_6
+adiw r24, b   ; R_AVR_6_ADIW
+
+.section .PORT,"ax",@progbits
+; CHECK-LABEL: section .PORT:
+; CHECK:       in     r20, 0x1e
+; CHECK-NEXT:  sbic   0x1e, 0x1
+in    r20, b  ; R_AVR_PORT6
+sbic  b, 1    ; R_AVR_PORT5
+
+;; The disassembler is not yet able to decode those opcodes
+;; 0f c0    rjmp   .+30
+;; ee cf    rjmp   .-36
+;; 69 f0    breq   .+26
+;; 61 f3    breq   .-40
+.section .PCREL,"ax",@progbits
+; HEX-LABEL: section .PCREL:
+; HEX-NEXT:  0fc0eecf 69f061f3
+foo:
+rjmp foo + 32  ; R_AVR_13_PCREL
+rjmp foo - 32  ; R_AVR_13_PCREL
+breq foo + 32  ; R_AVR_7_PCREL
+breq foo - 32  ; R_AVR_7_PCREL
+
+.section .DATA,"ax",@progbits
+; HEX-LABEL: section .DATA:
+; HEX-NEXT:  {{.*}} 1e1e000f 00785634 12
+.byte b        ; R_AVR_8
+.short b       ; R_AVR_16
+.short gs(b)   ; R_AVR_16_PM
+.long a        ; R_AVR_32

From 152a9fef1b3b44f2c224cb8096b3d649279f2578 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 11 Jul 2020 19:08:21 -0700
Subject: [PATCH 044/771] BPF: permit .maps section variables with typedef type

Currently, llvm when see a global variable in .maps section,
it ensures its type must be a struct type. Then pointee
will be further evaluated for the structure members.
In normal cases, the pointee type will be skipped.

Although this is what current all bpf programs are doing,
but it is a little bit restrictive. For example, it is legitimate
for users to have:
typedef struct { int key_size; int value_size; } __map_t;
__map_t map __attribute__((section(".maps")));

This patch lifts this restriction and typedef of
a struct type is also allowed for .maps section variables.
To avoid create unnecessary fixup entries when traversal
started with typedef/struct type, the new implementation
first traverse all map struct members and then traverse
the typedef/struct type. This way, in internal BTFDebug
implementation, no fixup entries are generated.

Two new unit tests are added for typedef and const
struct in .maps section. Also tested with kernel bpf selftests.

Differential Revision: https://reviews.llvm.org/D83638
---
 llvm/lib/Target/BPF/BTFDebug.cpp       | 32 +++++----
 llvm/test/CodeGen/BPF/BTF/map-def-2.ll | 90 ++++++++++++++++++++++++++
 llvm/test/CodeGen/BPF/BTF/map-def-3.ll | 65 +++++++++++++++++++
 llvm/test/CodeGen/BPF/BTF/map-def.ll   | 58 ++++++++---------
 4 files changed, 199 insertions(+), 46 deletions(-)
 create mode 100644 llvm/test/CodeGen/BPF/BTF/map-def-2.ll
 create mode 100644 llvm/test/CodeGen/BPF/BTF/map-def-3.ll

diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 6ada75adba969..4510e93574892 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -664,7 +664,17 @@ void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) {
     return;
   }
 
-  // MapDef type is a struct type
+  // MapDef type may be a struct type or a non-pointer derived type
+  const DIType *OrigTy = Ty;
+  while (auto *DTy = dyn_cast<DIDerivedType>(Ty)) {
+    auto Tag = DTy->getTag();
+    if (Tag != dwarf::DW_TAG_typedef && Tag != dwarf::DW_TAG_const_type &&
+        Tag != dwarf::DW_TAG_volatile_type &&
+        Tag != dwarf::DW_TAG_restrict_type)
+      break;
+    Ty = DTy->getBaseType();
+  }
+
   const auto *CTy = dyn_cast<DICompositeType>(Ty);
   if (!CTy)
     return;
@@ -673,27 +683,15 @@ void BTFDebug::visitMapDefType(const DIType *Ty, uint32_t &TypeId) {
   if (Tag != dwarf::DW_TAG_structure_type || CTy->isForwardDecl())
     return;
 
-  // Record this type
+  // Visit all struct members to ensure pointee type is visited
   const DINodeArray Elements = CTy->getElements();
-  bool HasBitField = false;
-  for (const auto *Element : Elements) {
-    auto E = cast<DIDerivedType>(Element);
-    if (E->isBitField()) {
-      HasBitField = true;
-      break;
-    }
-  }
-
-  auto TypeEntry =
-      std::make_unique<BTFTypeStruct>(CTy, true, HasBitField, Elements.size());
-  StructTypes.push_back(TypeEntry.get());
-  TypeId = addType(std::move(TypeEntry), CTy);
-
-  // Visit all struct members
   for (const auto *Element : Elements) {
     const auto *MemberType = cast<DIDerivedType>(Element);
     visitTypeEntry(MemberType->getBaseType());
   }
+
+  // Visit this type, struct or a const/typedef/volatile/restrict type
+  visitTypeEntry(OrigTy, TypeId, false, false);
 }
 
 /// Read file contents from the actual file or from the source
diff --git a/llvm/test/CodeGen/BPF/BTF/map-def-2.ll b/llvm/test/CodeGen/BPF/BTF/map-def-2.ll
new file mode 100644
index 0000000000000..bf3c4a7961fbf
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/BTF/map-def-2.ll
@@ -0,0 +1,90 @@
+; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+;
+; Source code:
+;   struct key_type {
+;     int a1;
+;   };
+;   typedef struct map_type {
+;     struct key_type *key;
+;   } _map_type;
+;   typedef _map_type __map_type;
+;   __map_type __attribute__((section(".maps"))) hash_map;
+; Compilation flag:
+;   clang -target bpf -O2 -g -S -emit-llvm t2.c
+
+%struct.map_type = type { %struct.key_type* }
+%struct.key_type = type { i32 }
+
+@hash_map = dso_local local_unnamed_addr global %struct.map_type zeroinitializer, section ".maps", align 8, !dbg !0
+
+; CHECK:             .long   0                               # BTF_KIND_PTR(id = 1)
+; CHECK-NEXT:        .long   33554432                        # 0x2000000
+; CHECK-NEXT:        .long   2
+; CHECK-NEXT:        .long   1                               # BTF_KIND_STRUCT(id = 2)
+; CHECK-NEXT:        .long   67108865                        # 0x4000001
+; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   10
+; CHECK-NEXT:        .long   3
+; CHECK-NEXT:        .long   0                               # 0x0
+; CHECK-NEXT:        .long   13                              # BTF_KIND_INT(id = 3)
+; CHECK-NEXT:        .long   16777216                        # 0x1000000
+; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   16777248                        # 0x1000020
+; CHECK-NEXT:        .long   17                              # BTF_KIND_TYPEDEF(id = 4)
+; CHECK-NEXT:        .long   134217728                       # 0x8000000
+; CHECK-NEXT:        .long   5
+; CHECK-NEXT:        .long   28                              # BTF_KIND_TYPEDEF(id = 5)
+; CHECK-NEXT:        .long   134217728                       # 0x8000000
+; CHECK-NEXT:        .long   6
+; CHECK-NEXT:        .long   38                              # BTF_KIND_STRUCT(id = 6)
+; CHECK-NEXT:        .long   67108865                        # 0x4000001
+; CHECK-NEXT:        .long   8
+; CHECK-NEXT:        .long   47
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   0                               # 0x0
+; CHECK-NEXT:        .long   51                              # BTF_KIND_VAR(id = 7)
+; CHECK-NEXT:        .long   234881024                       # 0xe000000
+; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   60                              # BTF_KIND_DATASEC(id = 8)
+; CHECK-NEXT:        .long   251658241                       # 0xf000001
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   7
+; CHECK-NEXT:        .long   hash_map
+; CHECK-NEXT:        .long   8
+
+; CHECK:             .ascii  "key_type"                      # string offset=1
+; CHECK:             .ascii  "a1"                            # string offset=10
+; CHECK:             .ascii  "int"                           # string offset=13
+; CHECK:             .ascii  "__map_type"                    # string offset=17
+; CHECK:             .ascii  "_map_type"                     # string offset=28
+; CHECK:             .ascii  "map_type"                      # string offset=38
+; CHECK:             .ascii  "key"                           # string offset=47
+; CHECK:             .ascii  "hash_map"                      # string offset=51
+; CHECK:             .ascii  ".maps"                         # string offset=60
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!16, !17, !18}
+!llvm.ident = !{!19}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "hash_map", scope: !2, file: !3, line: 8, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git b8409c03ed90807f3d49c7d98dceea98cf461f7a)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "t2.c", directory: "/tmp/home/yhs/tmp1")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIDerivedType(tag: DW_TAG_typedef, name: "__map_type", file: !3, line: 7, baseType: !7)
+!7 = !DIDerivedType(tag: DW_TAG_typedef, name: "_map_type", file: !3, line: 6, baseType: !8)
+!8 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "map_type", file: !3, line: 4, size: 64, elements: !9)
+!9 = !{!10}
+!10 = !DIDerivedType(tag: DW_TAG_member, name: "key", scope: !8, file: !3, line: 5, baseType: !11, size: 64)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !12, size: 64)
+!12 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "key_type", file: !3, line: 1, size: 32, elements: !13)
+!13 = !{!14}
+!14 = !DIDerivedType(tag: DW_TAG_member, name: "a1", scope: !12, file: !3, line: 2, baseType: !15, size: 32)
+!15 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!16 = !{i32 7, !"Dwarf Version", i32 4}
+!17 = !{i32 2, !"Debug Info Version", i32 3}
+!18 = !{i32 1, !"wchar_size", i32 4}
+!19 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git b8409c03ed90807f3d49c7d98dceea98cf461f7a)"}
diff --git a/llvm/test/CodeGen/BPF/BTF/map-def-3.ll b/llvm/test/CodeGen/BPF/BTF/map-def-3.ll
new file mode 100644
index 0000000000000..e05470782ec26
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/BTF/map-def-3.ll
@@ -0,0 +1,65 @@
+; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+;
+; Source code:
+;   struct key_type {
+;     int a1;
+;   };
+;   const struct key_type __attribute__((section(".maps"))) hash_map;
+; Compilation flag:
+;   clang -target bpf -O2 -g -S -emit-llvm t3.c
+
+%struct.key_type = type { i32 }
+
+@hash_map = dso_local local_unnamed_addr constant %struct.key_type zeroinitializer, section ".maps", align 4, !dbg !0
+
+; CHECK:             .long   1                               # BTF_KIND_INT(id = 1)
+; CHECK-NEXT:        .long   16777216                        # 0x1000000
+; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   16777248                        # 0x1000020
+; CHECK-NEXT:        .long   0                               # BTF_KIND_CONST(id = 2)
+; CHECK-NEXT:        .long   167772160                       # 0xa000000
+; CHECK-NEXT:        .long   3
+; CHECK-NEXT:        .long   5                               # BTF_KIND_STRUCT(id = 3)
+; CHECK-NEXT:        .long   67108865                        # 0x4000001
+; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   14
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   0                               # 0x0
+; CHECK-NEXT:        .long   17                              # BTF_KIND_VAR(id = 4)
+; CHECK-NEXT:        .long   234881024                       # 0xe000000
+; CHECK-NEXT:        .long   2
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   26                              # BTF_KIND_DATASEC(id = 5)
+; CHECK-NEXT:        .long   251658241                       # 0xf000001
+; CHECK-NEXT:        .long   0
+; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   hash_map
+; CHECK-NEXT:        .long   4
+
+; CHECK:             .ascii  "int"                           # string offset=1
+; CHECK:             .ascii  "key_type"                      # string offset=5
+; CHECK:             .ascii  "a1"                            # string offset=14
+; CHECK:             .ascii  "hash_map"                      # string offset=17
+; CHECK:             .ascii  ".maps"                         # string offset=26
+
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!11, !12, !13}
+!llvm.ident = !{!14}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "hash_map", scope: !2, file: !3, line: 4, type: !6, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 5bd074629f00d4798674b411cf00216f38016483)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "t3.c", directory: "/tmp/home/yhs/tmp1")
+!4 = !{}
+!5 = !{!0}
+!6 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !7)
+!7 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "key_type", file: !3, line: 1, size: 32, elements: !8)
+!8 = !{!9}
+!9 = !DIDerivedType(tag: DW_TAG_member, name: "a1", scope: !7, file: !3, line: 2, baseType: !10, size: 32)
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !{i32 7, !"Dwarf Version", i32 4}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{i32 1, !"wchar_size", i32 4}
+!14 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 5bd074629f00d4798674b411cf00216f38016483)"}
diff --git a/llvm/test/CodeGen/BPF/BTF/map-def.ll b/llvm/test/CodeGen/BPF/BTF/map-def.ll
index cf777880efa17..e12cde3ef98ae 100644
--- a/llvm/test/CodeGen/BPF/BTF/map-def.ll
+++ b/llvm/test/CodeGen/BPF/BTF/map-def.ll
@@ -28,41 +28,41 @@
 ; CHECK-NEXT:        .long   168
 ; CHECK-NEXT:        .long   168
 ; CHECK-NEXT:        .long   65
-; CHECK-NEXT:        .long   1                       # BTF_KIND_STRUCT(id = 1)
-; CHECK-NEXT:        .long   67108866                # 0x4000002
-; CHECK-NEXT:        .long   16
-; CHECK-NEXT:        .long   10
-; CHECK-NEXT:        .long   2
-; CHECK-NEXT:        .long   0                       # 0x0
-; CHECK-NEXT:        .long   14
-; CHECK-NEXT:        .long   5
-; CHECK-NEXT:        .long   64                      # 0x40
-; CHECK-NEXT:        .long   0                       # BTF_KIND_PTR(id = 2)
+; CHECK-NEXT:        .long   0                       # BTF_KIND_PTR(id = 1)
 ; CHECK-NEXT:        .long   33554432                # 0x2000000
-; CHECK-NEXT:        .long   3
-; CHECK-NEXT:        .long   20                      # BTF_KIND_STRUCT(id = 3)
+; CHECK-NEXT:        .long   2
+; CHECK-NEXT:        .long   1                       # BTF_KIND_STRUCT(id = 2)
 ; CHECK-NEXT:        .long   67108866                # 0x4000002
 ; CHECK-NEXT:        .long   8
-; CHECK-NEXT:        .long   29
-; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   10
+; CHECK-NEXT:        .long   3
 ; CHECK-NEXT:        .long   0                       # 0x0
-; CHECK-NEXT:        .long   31
-; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   12
+; CHECK-NEXT:        .long   3
 ; CHECK-NEXT:        .long   32                      # 0x20
-; CHECK-NEXT:        .long   33                      # BTF_KIND_INT(id = 4)
+; CHECK-NEXT:        .long   14                      # BTF_KIND_INT(id = 3)
 ; CHECK-NEXT:        .long   16777216                # 0x1000000
 ; CHECK-NEXT:        .long   4
 ; CHECK-NEXT:        .long   16777248                # 0x1000020
-; CHECK-NEXT:        .long   0                       # BTF_KIND_PTR(id = 5)
+; CHECK-NEXT:        .long   0                       # BTF_KIND_PTR(id = 4)
 ; CHECK-NEXT:        .long   33554432                # 0x2000000
-; CHECK-NEXT:        .long   6
-; CHECK-NEXT:        .long   37                      # BTF_KIND_INT(id = 6)
+; CHECK-NEXT:        .long   5
+; CHECK-NEXT:        .long   18                      # BTF_KIND_INT(id = 5)
 ; CHECK-NEXT:        .long   16777216                # 0x1000000
 ; CHECK-NEXT:        .long   4
 ; CHECK-NEXT:        .long   32                      # 0x20
+; CHECK-NEXT:        .long   31                      # BTF_KIND_STRUCT(id = 6)
+; CHECK-NEXT:        .long   67108866                # 0x4000002
+; CHECK-NEXT:        .long   16
+; CHECK-NEXT:        .long   40
+; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   0                       # 0x0
+; CHECK-NEXT:        .long   44
+; CHECK-NEXT:        .long   4
+; CHECK-NEXT:        .long   64                      # 0x40
 ; CHECK-NEXT:        .long   50                      # BTF_KIND_VAR(id = 7)
 ; CHECK-NEXT:        .long   234881024               # 0xe000000
-; CHECK-NEXT:        .long   1
+; CHECK-NEXT:        .long   6
 ; CHECK-NEXT:        .long   1
 ; CHECK-NEXT:        .long   59                      # BTF_KIND_DATASEC(id = 8)
 ; CHECK-NEXT:        .long   251658241               # 0xf000001
@@ -71,21 +71,21 @@
 ; CHECK-NEXT:        .long   hash_map
 ; CHECK-NEXT:        .long   16
 ; CHECK-NEXT:        .byte   0                       # string offset=0
-; CHECK-NEXT:        .ascii  "map_type"              # string offset=1
+; CHECK-NEXT:        .ascii  "key_type"              # string offset=1
 ; CHECK-NEXT:        .byte   0
-; CHECK-NEXT:        .ascii  "key"                   # string offset=10
+; CHECK-NEXT:        .byte   97                      # string offset=10
 ; CHECK-NEXT:        .byte   0
-; CHECK-NEXT:        .ascii  "value"                 # string offset=14
+; CHECK-NEXT:        .byte   98                      # string offset=12
 ; CHECK-NEXT:        .byte   0
-; CHECK-NEXT:        .ascii  "key_type"              # string offset=20
+; CHECK-NEXT:        .ascii  "int"                   # string offset=14
 ; CHECK-NEXT:        .byte   0
-; CHECK-NEXT:        .byte   97                      # string offset=29
+; CHECK-NEXT:        .ascii  "unsigned int"          # string offset=18
 ; CHECK-NEXT:        .byte   0
-; CHECK-NEXT:        .byte   98                      # string offset=31
+; CHECK-NEXT:        .ascii  "map_type"              # string offset=31
 ; CHECK-NEXT:        .byte   0
-; CHECK-NEXT:        .ascii  "int"                   # string offset=33
+; CHECK-NEXT:        .ascii  "key"                   # string offset=40
 ; CHECK-NEXT:        .byte   0
-; CHECK-NEXT:        .ascii  "unsigned int"          # string offset=37
+; CHECK-NEXT:        .ascii  "value"                 # string offset=44
 ; CHECK-NEXT:        .byte   0
 ; CHECK-NEXT:        .ascii  "hash_map"              # string offset=50
 ; CHECK-NEXT:        .byte   0

From 82a5157ff1650e3366f7a9c619269766ad1d5e93 Mon Sep 17 00:00:00 2001
From: Ayal Zaks <ayal.zaks@intel.com>
Date: Thu, 9 Jul 2020 12:57:45 +0300
Subject: [PATCH 045/771] [LV] Fixing versioning-for-unit-stide of loops with
 small trip count

This patch fixes D81345 and PR46652.

If a loop with a small trip count is compiled w/o -Os/-Oz, Loop Access Analysis
still generates runtime checks for unit strides that will version the loop.

In such cases, the loop vectorizer should either re-run the analysis or bail-out
from vectorizing the loop, as done prior to D81345. The latter is applied for
now as the former requires refactoring.

Differential Revision: https://reviews.llvm.org/D83470
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 10 +++++--
 llvm/test/Transforms/LoopVectorize/optsize.ll | 26 +++++++++++++++++++
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 10e690d56ffd1..35af8e4257789 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4949,8 +4949,14 @@ bool LoopVectorizationCostModel::runtimeChecksRequired() {
     return true;
   }
 
-  assert(Legal->getLAI()->getSymbolicStrides().empty() &&
-         "Specializing for stride == 1 under -Os/-Oz");
+  // FIXME: Avoid specializing for stride==1 instead of bailing out.
+  if (!Legal->getLAI()->getSymbolicStrides().empty()) {
+    reportVectorizationFailure("Runtime stride check for small trip count",
+        "runtime stride == 1 checks needed. Enable vectorization of "
+        "this loop without such check by compiling with -Os/-Oz",
+        "CantVersionLoopWithOptForSize", ORE, TheLoop);
+    return true;
+  }
 
   return false;
 }
diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll
index 8def1ab0a0e83..0e88f362746fb 100644
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -221,6 +221,32 @@ for.end:
   ret void
 }
 
+; PR46652: Check that the need for stride==1 check prevents vectorizing a loop
+; having tiny trip count, when compiling w/o -Os/-Oz.
+; CHECK-LABEL: @pr46652
+; CHECK-NOT: vector.scevcheck
+; CHECK-NOT: vector.body
+; CHECK-LABEL: for.body
+
+@g = external global [1 x i16], align 1
+
+define void @pr46652(i16 %stride) {
+entry:
+  br label %for.body
+
+for.body:                                        ; preds = %for.body, %entry
+  %l1.02 = phi i16 [ 1, %entry ], [ %inc9, %for.body ]
+  %mul = mul nsw i16 %l1.02, %stride
+  %arrayidx6 = getelementptr inbounds [1 x i16], [1 x i16]* @g, i16 0, i16 %mul
+  %0 = load i16, i16* %arrayidx6, align 1
+  %inc9 = add nuw nsw i16 %l1.02, 1
+  %exitcond.not = icmp eq i16 %inc9, 16
+  br i1 %exitcond.not, label %for.end, label %for.body
+
+for.end:                                        ; preds = %for.body
+  ret void
+}
+
 !llvm.module.flags = !{!0}
 !0 = !{i32 1, !"ProfileSummary", !1}
 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}

From 65dc97b79eb1979c54e7e17c411ea5f58f8dcc9c Mon Sep 17 00:00:00 2001
From: mydeveloperday <mydeveloperday@gmail.com>
Date: Sun, 12 Jul 2020 18:43:14 +0100
Subject: [PATCH 046/771] [clang-format] PR46609 clang-format does not obey
 `PointerAlignment: Right` for ellipsis in declarator for pack

Summary:
https://bugs.llvm.org/show_bug.cgi?id=46609

Ensure `*...` obey they left/middle/right rules of Pointer alignment

Reviewed By: curdeius

Differential Revision: https://reviews.llvm.org/D83564
---
 clang/lib/Format/TokenAnnotator.cpp   |  5 +++++
 clang/unittests/Format/FormatTest.cpp | 17 ++++++++++++-----
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp
index a74015d3b4dc3..7f8e351265127 100644
--- a/clang/lib/Format/TokenAnnotator.cpp
+++ b/clang/lib/Format/TokenAnnotator.cpp
@@ -2844,6 +2844,11 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line,
             Left.Previous &&
             !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon,
                                     tok::l_square));
+  // Ensure right pointer alignement with ellipsis e.g. int *...P
+  if (Left.is(tok::ellipsis) && Left.Previous &&
+      Left.Previous->isOneOf(tok::star, tok::amp, tok::ampamp))
+    return Style.PointerAlignment != FormatStyle::PAS_Right;
+
   if (Right.is(tok::star) && Left.is(tok::l_paren))
     return false;
   if (Left.is(tok::star) && Right.isOneOf(tok::star, tok::amp, tok::ampamp))
diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp
index ff9a64e81d5b5..6ac3ffbffd1c8 100644
--- a/clang/unittests/Format/FormatTest.cpp
+++ b/clang/unittests/Format/FormatTest.cpp
@@ -5325,7 +5325,7 @@ TEST_F(FormatTest, DeductionGuides) {
   verifyFormat("template <class... Ts> S(Ts...) -> S<Ts...>;");
   verifyFormat(
       "template <class... T>\n"
-      "array(T &&... t) -> array<std::common_type_t<T...>, sizeof...(T)>;");
+      "array(T &&...t) -> array<std::common_type_t<T...>, sizeof...(T)>;");
   verifyFormat("template <class T> A() -> A<decltype(p->foo<3>())>;");
   verifyFormat("template <class T> A() -> A<decltype(foo<traits<1>>)>;");
   verifyFormat("template <class T> A() -> A<sizeof(p->foo<1>)>;");
@@ -8179,13 +8179,20 @@ TEST_F(FormatTest, AttributePenaltyBreaking) {
 }
 
 TEST_F(FormatTest, UnderstandsEllipsis) {
+  FormatStyle Style = getLLVMStyle();
   verifyFormat("int printf(const char *fmt, ...);");
   verifyFormat("template <class... Ts> void Foo(Ts... ts) { Foo(ts...); }");
-  verifyFormat("template <class... Ts> void Foo(Ts *... ts) {}");
+  verifyFormat("template <class... Ts> void Foo(Ts *...ts) {}");
+
+  verifyFormat("template <int *...PP> a;", Style);
+
+  Style.PointerAlignment = FormatStyle::PAS_Left;
+  verifyFormat("template <class... Ts> void Foo(Ts*... ts) {}", Style);
+
+  verifyFormat("template <int*... PP> a;", Style);
 
-  FormatStyle PointersLeft = getLLVMStyle();
-  PointersLeft.PointerAlignment = FormatStyle::PAS_Left;
-  verifyFormat("template <class... Ts> void Foo(Ts*... ts) {}", PointersLeft);
+  Style.PointerAlignment = FormatStyle::PAS_Middle;
+  verifyFormat("template <int *... PP> a;", Style);
 }
 
 TEST_F(FormatTest, AdaptivelyFormatsPointersAndReferences) {

From 04013a07ac3b67eb176ddfd1ddaeda41415c038f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 12 Jul 2020 10:29:45 -0700
Subject: [PATCH 047/771] [X86] Fix two places that appear to misuse
 peekThroughOneUseBitcasts

peekThroughOneUseBitcasts checks the use count of the operand of the bitcast. Not the bitcast itself. So I think that means we need to do any outside haseOneUse checks before calling the function not after.

I was working on another patch where I misused the function and did a very quick audit to see if I there were other similar mistakes.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D83598
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 721b262aa433b..7657125e1e5ad 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -36464,9 +36464,9 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
         (V.getOpcode() == X86ISD::PSHUFLW ||
          V.getOpcode() == X86ISD::PSHUFHW) &&
         V.getOpcode() != N.getOpcode() &&
-        V.hasOneUse()) {
+        V.hasOneUse() && V.getOperand(0).hasOneUse()) {
       SDValue D = peekThroughOneUseBitcasts(V.getOperand(0));
-      if (D.getOpcode() == X86ISD::PSHUFD && D.hasOneUse()) {
+      if (D.getOpcode() == X86ISD::PSHUFD) {
         SmallVector<int, 4> VMask = getPSHUFShuffleMask(V);
         SmallVector<int, 4> DMask = getPSHUFShuffleMask(D);
         int NOffset = N.getOpcode() == X86ISD::PSHUFLW ? 0 : 4;
@@ -36903,10 +36903,11 @@ static SDValue combineShuffle(SDNode *N, SelectionDAG &DAG,
   // insert into a zero vector. This helps get VZEXT_MOVL closer to
   // scalar_to_vectors where 256/512 are canonicalized to an insert and a
   // 128-bit scalar_to_vector. This reduces the number of isel patterns.
-  if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps()) {
+  if (N->getOpcode() == X86ISD::VZEXT_MOVL && !DCI.isBeforeLegalizeOps() &&
+      N->getOperand(0).hasOneUse()) {
     SDValue V = peekThroughOneUseBitcasts(N->getOperand(0));
 
-    if (V.getOpcode() == ISD::INSERT_SUBVECTOR && V.hasOneUse() &&
+    if (V.getOpcode() == ISD::INSERT_SUBVECTOR &&
         V.getOperand(0).isUndef() && isNullConstant(V.getOperand(2))) {
       SDValue In = V.getOperand(1);
       MVT SubVT =

From f8f007e378e1ed84fadf281f05166a4463a79316 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 12 Jul 2020 10:30:27 -0700
Subject: [PATCH 048/771] [X86] Consistently use 128 as the PSHUFB/VPPERM index
 for zero

Bit 7 of the index controls zeroing, the other bits are ignored when bit 7 is set. Shuffle lowering was using 128 and shuffle combining was using 255. Seems like we should be consistent.

This patch changes shuffle combining to use 128 to match lowering.

Reviewed By: RKSimon

Differential Revision: https://reviews.llvm.org/D83587
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
 llvm/test/CodeGen/X86/vector-trunc.ll   | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 7657125e1e5ad..450927aaf5cc7 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -35043,7 +35043,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
         continue;
       }
       if (M == SM_SentinelZero) {
-        PSHUFBMask.push_back(DAG.getConstant(255, DL, MVT::i8));
+        PSHUFBMask.push_back(DAG.getConstant(0x80, DL, MVT::i8));
         continue;
       }
       M = Ratio * M + i % Ratio;
@@ -35074,7 +35074,7 @@ static SDValue combineX86ShuffleChain(ArrayRef<SDValue> Inputs, SDValue Root,
         continue;
       }
       if (M == SM_SentinelZero) {
-        VPPERMMask.push_back(DAG.getConstant(128, DL, MVT::i8));
+        VPPERMMask.push_back(DAG.getConstant(0x80, DL, MVT::i8));
         continue;
       }
       M = Ratio * M + i % Ratio;
diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll
index a5f6be558e8cf..1d596f5db3aeb 100644
--- a/llvm/test/CodeGen/X86/vector-trunc.ll
+++ b/llvm/test/CodeGen/X86/vector-trunc.ll
@@ -456,7 +456,7 @@ define <8 x i16> @trunc8i32_8i16_lshr(<8 x i32> %a) {
 ;
 ; SSSE3-LABEL: trunc8i32_8i16_lshr:
 ; SSSE3:       # %bb.0: # %entry
-; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2,3,6,7,10,11,14,15,10,11,14,15,14,15,255,255]
+; SSSE3-NEXT:    movdqa {{.*#+}} xmm2 = [2,3,6,7,10,11,14,15,10,11,14,15,14,15,128,128]
 ; SSSE3-NEXT:    pshufb %xmm2, %xmm1
 ; SSSE3-NEXT:    pshufb %xmm2, %xmm0
 ; SSSE3-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]

From 7a1bcf9f9a95fca9dcf8e42f8eb845db3643fffb Mon Sep 17 00:00:00 2001
From: mydeveloperday <mydeveloperday@gmail.com>
Date: Sun, 12 Jul 2020 18:57:14 +0100
Subject: [PATCH 049/771] [polly] NFC clang-format change following D83564

---
 polly/lib/Analysis/ScopDetection.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp
index abe189f3e890a..53d0b705c055d 100644
--- a/polly/lib/Analysis/ScopDetection.cpp
+++ b/polly/lib/Analysis/ScopDetection.cpp
@@ -383,7 +383,7 @@ ScopDetection::ScopDetection(Function &F, const DominatorTree &DT,
 
 template <class RR, typename... Args>
 inline bool ScopDetection::invalid(DetectionContext &Context, bool Assert,
-                                   Args &&... Arguments) const {
+                                   Args &&...Arguments) const {
   if (!Context.Verifying) {
     RejectLog &Log = Context.Log;
     std::shared_ptr<RR> RejectReason = std::make_shared<RR>(Arguments...);

From f4d29d6e8c43cfd924d9d7cc1ac0c269b2788e75 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 9 Jul 2020 13:30:50 +0100
Subject: [PATCH 050/771] [Matrix] Tighten LangRef definitions and Verifier
 checks.

This tightens the matrix intrinsic definitions in LLVM LangRef and adds
correspondings checks to the IR Verifier.

Differential Revision: https://reviews.llvm.org/D83477
---
 llvm/docs/LangRef.rst                   |  96 ++++++++++--------
 llvm/lib/IR/Verifier.cpp                |  59 ++++++++++--
 llvm/test/Verifier/matrix-intrinsics.ll | 123 +++++++++++++++++++++---
 3 files changed, 217 insertions(+), 61 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 86d315be74bcf..02c92f1a4daa6 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15524,6 +15524,7 @@ The argument to this intrinsic must be a vector of floating-point values.
 
 Syntax:
 """""""
+This is an overloaded intrinsic.
 
 ::
 
@@ -15548,17 +15549,20 @@ Matrix Intrinsics
 -----------------
 
 Operations on matrixes requiring shape information (like number of rows/columns
-or the memory layout) can be expressed using the matrix intrinsics. Matrixes are
-embedded in a flat vector and the intrinsics take the dimensions as arguments.
-Currently column-major layout is assumed. The intrinsics support both integer
-and floating point matrixes.
+or the memory layout) can be expressed using the matrix intrinsics. These
+intrinsics require matrix dimensions to be passed as immediate arguments, and
+matrixes are passed and returned as vectors. This means that for a ``R`` x
+``C`` matrix, element ``i`` of column ``j`` is at index ``j * R + i`` in the
+corresponding vector, with indices starting at 0. Currently column-major layout
+is assumed.  The intrinsics support both integer and floating point matrixes.
 
 
 '``llvm.matrix.transpose.*``' Intrinsic
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Syntax:
 """""""
+This is an overloaded intrinsic.
 
 ::
 
@@ -15567,21 +15571,24 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.matrix.transpose.*``' intrinsic treats %In as containing a matrix
-with <Rows> rows and <Cols> columns and returns the transposed matrix embedded in
-the result vector.
+The '``llvm.matrix.transpose.*``' intrinsics treat %In as a <Rows> x <Cols> matrix
+and return the transposed matrix in the result vector.
 
 Arguments:
 """"""""""
 
-The <Rows> and <Cols> arguments must be constant integers. The vector argument
-%In and the returned vector must have <Rows> * <Cols> elements.
+First argument %In is vector that corresponds to a <Rows> x <Cols> matrix.
+Thus, arguments <Rows> and <Cols> correspond to the number of rows and columns,
+respectively, and must be positive, constant integers. The returned vector must
+have <Rows> * <Cols> elements, and have the same float or integer element type
+as %In.
 
 '``llvm.matrix.multiply.*``' Intrinsic
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Syntax:
 """""""
+This is an overloaded intrinsic.
 
 ::
 
@@ -15590,18 +15597,19 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.matrix.multiply.*``' intrinsic treats %A as a matrix with <OuterRows>
-rows and <Inner> columns, %B as a matrix with <Inner> rows and <OuterColumns>
-columns and multiplies them. The result matrix is returned embedded in the
-result vector.
+The '``llvm.matrix.multiply.*``' intrinsics treat %A as a <OuterRows> x <Inner>
+matrix, %B as a <Inner> x <OuterColumns> matrix, and multiplies them. The result
+matrix is returned in the result vector.
 
 Arguments:
 """"""""""
 
-The <OuterRows>, <Inner> and <OuterColumns> arguments must be constant
-integers. The vector argument %A must have <OuterRows> * <Inner> elements, %B
-must have <Inner> * <OuterColumns> elements and the returned vector must have
-<OuterRows> * <OuterColumns> elements.
+The first vector argument %A corresponds to a matrix with <OuterRows> * <Inner>
+elements, and the second argument %B to a matrix with <Inner> * <OuterColumns>
+elements. Arguments <OuterRows>, <Inner> and <OuterColumns> must be positive,
+constant integers. The returned vector must have <OuterRows> * <OuterColumns>
+elements. Vectors %A, %B, and the returned vector all have the same float or
+integer element type.
 
 
 '``llvm.matrix.column.major.load.*``' Intrinsic
@@ -15609,6 +15617,7 @@ must have <Inner> * <OuterColumns> elements and the returned vector must have
 
 Syntax:
 """""""
+This is an overloaded intrinsic.
 
 ::
 
@@ -15618,22 +15627,26 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.matrix.column.major.load.*``' intrinsic loads a matrix with <Rows>
-rows and <Cols> columns, using a stride of %Stride between columns. For two
-consecutive columns A and B, %Stride refers to the distance (the number of
-elements) between the start of column A and the start of column B. The result
-matrix is returned embedded in the result vector. This allows for convenient
-loading of sub matrixes.  If <IsVolatile> is true, the intrinsic is considered
-a :ref:`volatile memory access <volatile>`.
-
-If the %Ptr argument is known to be aligned to some boundary, this can be
-specified as an attribute on the argument.
+The '``llvm.matrix.column.major.load.*``' intrinsics load a <Rows> x <Cols>
+matrix using a stride of %Stride to compute the start address of the different
+columns.  This allows for convenient loading of sub matrixes. If <IsVolatile>
+is true, the intrinsic is considered a :ref:`volatile memory access
+<volatile>`. The result matrix is returned in the result vector. If the %Ptr
+argument is known to be aligned to some boundary, this can be specified as an
+attribute on the argument.
 
 Arguments:
 """"""""""
 
-The <IsVolatile>, <Rows> and <Cols> arguments must be constant integers. The
-returned vector must have <Rows> * <Cols> elements. %Stride must be >= <Rows>.
+The first argument %Ptr is a pointer type to the returned vector type, and
+correponds to the start address to load from. The second argument %Stride is a
+postive, constant integer with %Stride ``>=`` <Rows>. %Stride is used to compute
+the column memory addresses. I.e., for a column ``C``, its start memory
+addresses is calculated with %Ptr + ``C`` * %Stride. The third Argument
+<IsVolatile> is a boolean value.  The fourth and fifth arguments, <Rows> and
+<Cols>, correspond to the number of rows and columns, respectively, and must be
+positive, constant integers. The returned vector must have <Rows> * <Cols>
+elements.
 
 The :ref:`align <attr_align>` parameter attribute can be provided
 for the %Ptr arguments.
@@ -15653,12 +15666,10 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.matrix.column.major.store.*``' intrinsic stores the matrix with
-<Rows> rows and <Cols> columns embedded in %In, using a stride of %Stride
-between columns. For two consecutive columns A and B, %Stride refers to the
-distance (the number of elements) between the start of column A and the start
-of column B. If <IsVolatile> is true, the intrinsic is considered a
-:ref:`volatile memory access <volatile>`.
+The '``llvm.matrix.column.major.store.*``' intrinsics store the <Rows> x <Cols>
+matrix in %In to memory using a stride of %Stride between columns. If
+<IsVolatile> is true, the intrinsic is considered a :ref:`volatile memory
+access <volatile>`.
 
 If the %Ptr argument is known to be aligned to some boundary, this can be
 specified as an attribute on the argument.
@@ -15666,8 +15677,15 @@ specified as an attribute on the argument.
 Arguments:
 """"""""""
 
-The <IsVolatile>, <Rows>, <Cols> arguments must be constant integers. The
-vector argument %In must have <Rows> * <Cols> elements. %Stride must be >= <Rows>.
+The first argument %In is a vector that corresponds to a <Rows> x <Cols> matrix
+to be stored to memory. The second argument %Ptr is a pointer to the vector
+type of %In, and is the start address of the matrix in memory. The third
+argument %Stride is a positive, constant integer with %Stride ``>=`` <Rows>.
+%Stride is used to compute the column memory addresses. I.e., for a column
+``C``, its start memory addresses is calculated with %Ptr + ``C`` * %Stride.
+The fourth argument <IsVolatile> is a boolean value. The arguments <Rows> and
+<Cols> correspond to the number of rows and columns, respectively, and must be
+positive, constant integers.
 
 The :ref:`align <attr_align>` parameter attribute can be provided
 for the %Ptr arguments.
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 8fa87b7489013..994082fbdb7c1 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5006,36 +5006,77 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
   case Intrinsic::matrix_transpose:
   case Intrinsic::matrix_column_major_load:
   case Intrinsic::matrix_column_major_store: {
+    Function *IF = Call.getCalledFunction();
+    ConstantInt *Stride = nullptr;
     ConstantInt *NumRows;
     ConstantInt *NumColumns;
-    VectorType *TypeToCheck;
+    VectorType *ResultTy;
+    Type *Op0ElemTy = nullptr;
+    Type *Op1ElemTy = nullptr;
     switch (ID) {
     case Intrinsic::matrix_multiply:
       NumRows = cast<ConstantInt>(Call.getArgOperand(2));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(4));
-      TypeToCheck = cast<VectorType>(Call.getType());
+      ResultTy = cast<VectorType>(Call.getType());
+      Op0ElemTy =
+          cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
+      Op1ElemTy =
+          cast<VectorType>(Call.getArgOperand(1)->getType())->getElementType();
       break;
     case Intrinsic::matrix_transpose:
       NumRows = cast<ConstantInt>(Call.getArgOperand(1));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(2));
-      TypeToCheck = cast<VectorType>(Call.getType());
+      ResultTy = cast<VectorType>(Call.getType());
+      Op0ElemTy =
+          cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
       break;
-    case Intrinsic::matrix_column_major_load:
+    case Intrinsic::matrix_column_major_load: {
+      Stride = dyn_cast<ConstantInt>(Call.getArgOperand(1));
       NumRows = cast<ConstantInt>(Call.getArgOperand(3));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(4));
-      TypeToCheck = cast<VectorType>(Call.getType());
+      ResultTy = cast<VectorType>(Call.getType());
+      auto *VecTy = cast<VectorType>(
+          cast<PointerType>(Call.getArgOperand(0)->getType())->getElementType());
+      Op0ElemTy = VecTy->getElementType();
+      }
       break;
-    case Intrinsic::matrix_column_major_store:
+    case Intrinsic::matrix_column_major_store: {
+      Stride = dyn_cast<ConstantInt>(Call.getArgOperand(2));
       NumRows = cast<ConstantInt>(Call.getArgOperand(4));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(5));
-      TypeToCheck = cast<VectorType>(Call.getArgOperand(0)->getType());
+      ResultTy = cast<VectorType>(Call.getArgOperand(0)->getType());
+      Op0ElemTy =
+          cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
+      auto *VecTy = cast<VectorType>(
+          cast<PointerType>(Call.getArgOperand(1)->getType())->getElementType());
+      Op1ElemTy = VecTy->getElementType();
+      }
       break;
     default:
       llvm_unreachable("unexpected intrinsic");
     }
-    Assert(TypeToCheck->getNumElements() ==
+
+    Assert(ResultTy->getElementType()->isIntegerTy() ||
+           ResultTy->getElementType()->isFloatingPointTy(),
+           "Result type must be an integer or floating-point type!", IF);
+
+    Assert(ResultTy->getElementType() == Op0ElemTy,
+           "Vector element type mismatch of the result and first operand "
+           "vector!", IF);
+
+    if (Op1ElemTy)
+      Assert(ResultTy->getElementType() == Op1ElemTy,
+             "Vector element type mismatch of the result and second operand "
+             "vector!", IF);
+
+    Assert(ResultTy->getNumElements() ==
                NumRows->getZExtValue() * NumColumns->getZExtValue(),
-           "result of a matrix operation does not fit in the returned vector");
+           "Result of a matrix operation does not fit in the returned vector!");
+
+    if (Stride)
+      Assert(Stride->getZExtValue() >= NumRows->getZExtValue(),
+             "Stride must be greater or equal than the number of rows!", IF);
+
     break;
   }
   };
diff --git a/llvm/test/Verifier/matrix-intrinsics.ll b/llvm/test/Verifier/matrix-intrinsics.ll
index 6b2a4c501c660..5afab26a48c53 100644
--- a/llvm/test/Verifier/matrix-intrinsics.ll
+++ b/llvm/test/Verifier/matrix-intrinsics.ll
@@ -3,9 +3,9 @@
 declare <4 x float> @llvm.matrix.transpose.v4f32(<4 x float>, i32, i32)
 define <4 x float> @transpose(<4 x float> %m, i32 %arg) {
 ; CHECK: assembly parsed, but does not verify as correct!
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
 ; CHECK-NEXT: immarg operand has non-immediate parameter
 ; CHECK-NEXT: i32 %arg
 ; CHECK-NEXT:   %result.3 = call <4 x float> @llvm.matrix.transpose.v4f32(<4 x float> %result.2, i32 %arg, i32 2)
@@ -22,9 +22,9 @@ define <4 x float> @transpose(<4 x float> %m, i32 %arg) {
 
 declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)
 define <4 x float> @multiply(<4 x float> %m, i32 %arg) {
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
 ; CHECK-NEXT: immarg operand has non-immediate parameter
 ; CHECK-NEXT: i32 %arg
 ; CHECK-NEXT:   %result.3 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> %result.2, <4 x float> %m, i32 %arg, i32 2, i32 1)
@@ -38,9 +38,9 @@ define <4 x float> @multiply(<4 x float> %m, i32 %arg) {
 declare <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4f32(<4 x float>*, i64, i1, i32, i32)
 declare <6 x float> @llvm.matrix.column.major.load.v6f32.p0v6f32(<6 x float>*, i64, i1, i32, i32)
 define <4 x float> @column.major_load(<4 x float>* %m, <6 x float>* %n, i32 %arg) {
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
 ; CHECK-NEXT: immarg operand has non-immediate parameter
 ; CHECK-NEXT: i32 %arg
 ; CHECK-NEXT:   %result.3 = call <6 x float> @llvm.matrix.column.major.load.v6f32.p0v6f32(<6 x float>* %n, i64 2, i1 true, i32 3, i32 %arg)
@@ -54,13 +54,110 @@ define <4 x float> @column.major_load(<4 x float>* %m, <6 x float>* %n, i32 %arg
 declare void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i64, i1, i32, i32)
 declare void @llvm.matrix.column.major.store.v6f32.p0v6f32(<6 x float>, <6 x float>*, i64, i1, i32, i32)
 define void @column.major_store(<4 x float>* %m, <6 x float>* %n, i64 %arg) {
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
   call void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float> zeroinitializer, <4 x float>* %m, i64 0, i1 false, i32 0, i32 0)
   call void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float> zeroinitializer, <4 x float>* %m, i64 2, i1 false, i32 1, i32 2)
   call void @llvm.matrix.column.major.store.v6f32.p0v6f32(<6 x float> zeroinitializer, <6 x float>* %n, i64 2, i1 false, i32 3, i32 3)
   call void @llvm.matrix.column.major.store.v6f32.p0v6f32(<6 x float> zeroinitializer, <6 x float>* %n, i64 %arg, i1 false, i32 3, i32 3)
   ret void
 }
+
+declare <4 x float> @llvm.matrix.transpose.v4f32.v4i32(<4 x i32>, i32, i32)
+declare <4 x i32> @llvm.matrix.transpose.v4i32.v4f32(<4 x float>, i32, i32)
+
+define <4 x float> @transpose_mixed_types(<4 x float> %fvec, <4 x i32> %ivec, i32 %arg) {
+;
+; CHECK-NEXT: Intrinsic has incorrect argument type!
+; CHECK-NEXT: <4 x float> (<4 x i32>, i32, i32)* @llvm.matrix.transpose.v4f32.v4i32
+; CHECK-NEXT: Intrinsic has incorrect argument type!
+; CHECK-NEXT: <4 x i32> (<4 x float>, i32, i32)* @llvm.matrix.transpose.v4i32.v4f32
+;
+  %result.0 = call <4 x float> @llvm.matrix.transpose.v4f32.v4i32(<4 x i32> %ivec, i32 0, i32 0)
+  %result.1 = call <4 x i32> @llvm.matrix.transpose.v4i32.v4f32(<4 x float> %result.0, i32 3, i32 2)
+  ret <4 x float> %result.0
+}
+
+declare <4 x i32>   @llvm.matrix.multiply.v4i32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)
+declare <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4f32(<4 x i32>, <4 x float>, i32, i32, i32)
+declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, i32, i32, i32)
+declare <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32)
+
+define <4 x float> @multiply_mixed_types(<4 x i32> %ivec, <4 x float> %fvec, i32 %arg) {
+;
+; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
+; CHECK-NEXT: <4 x i32> (<4 x float>, <4 x float>, i32, i32, i32)* @llvm.matrix.multiply.v4i32.v4f32.v4f32
+; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
+; CHECK-NEXT: <4 x float> (<4 x i32>, <4 x float>, i32, i32, i32)* @llvm.matrix.multiply.v4f32.v4i32.v4f32
+; CHECK-NEXT: Vector element type mismatch of the result and second operand vector!
+; CHECK-NEXT: <4 x float> (<4 x float>, <4 x i32>, i32, i32, i32)* @llvm.matrix.multiply.v4f32.v4f32.v4i32
+; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
+; CHECK-NEXT: <4 x float> (<4 x i32>, <4 x i32>, i32, i32, i32)* @llvm.matrix.multiply.v4f32.v4i32.v4i32
+;
+  %result.0 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4f32.v4f32(<4 x float> %fvec, <4 x float> %fvec, i32 2, i32 2, i32 2)
+  %result.1 = call <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4f32(<4 x i32> %result.0, <4 x float> %fvec, i32 2, i32 2, i32 2)
+  %result.2 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4i32(<4 x float> %fvec, <4 x i32> %ivec, i32 2, i32 2, i32 2)
+  %result.3 = call <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4i32(<4 x i32> %ivec, <4 x i32> %ivec, i32 2, i32 2, i32 2)
+  ret <4 x float> %result.3
+}
+
+declare <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4i32(<4 x i32>*, i64, i1, i32, i32)
+declare <4 x i32> @llvm.matrix.column.major.load.v4i32.p0v4f32(<4 x float>*, i64, i1, i32, i32)
+
+define <4 x float> @column.major_load_mixed_types(<4 x i32>* %m, <4 x float>* %n, i32 %arg) {
+;
+; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
+; CHECK-NEXT: <4 x float> (<4 x i32>*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4f32.p0v4i32
+; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
+; CHECK-NEXT: <4 x i32> (<4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4i32.p0v4f32
+;
+  %result.0 = call <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4i32(<4 x i32>* %m, i64 2, i1 false, i32 2, i32 2)
+  %result.1 = call <4 x i32> @llvm.matrix.column.major.load.v4i32.p0v4f32(<4 x float>* %n, i64 2, i1 false, i32 2, i32 2)
+  ret <4 x float> %result.0
+}
+
+declare void @llvm.matrix.column.major.store.v4i32.p0v4f32(<4 x i32>, <4 x float>*, i64, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v4f32.p0v4i32(<4 x float>, <4 x i32>*, i64, i1, i32, i32)
+
+define void @column.major_store_mixed_types(<4 x float>* %m, <4 x i32>* %n, i64 %arg) {
+;
+; CHECK-NEXT: Vector element type mismatch of the result and second operand vector!
+; CHECK-NEXT: void (<4 x i32>, <4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4i32.p0v4f32
+; CHECK-NEXT: Vector element type mismatch of the result and second operand vector!
+; CHECK-NEXT: void (<4 x float>, <4 x i32>*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4f32.p0v4i32
+;
+  call void @llvm.matrix.column.major.store.v4i32.p0v4f32(<4 x i32> zeroinitializer, <4 x float>* %m, i64 2, i1 false, i32 2, i32 2)
+  call void @llvm.matrix.column.major.store.v4f32.p0v4i32(<4 x float> zeroinitializer, <4 x i32>* %n, i64 2, i1 false, i32 2, i32 2)
+  ret void
+}
+
+declare void @llvm.matrix.column.major.store.v4f32p0.p0v4f32(<4 x float*>, <4 x float>*, i64, i1, i32, i32)
+
+define void @column.major_store_non_int_float_type(<4 x float>* %m, <4 x float>* %n, i64 %arg) {
+;
+; CHECK-NEXT: Result type must be an integer or floating-point type!
+; CHECK-NEXT: void (<4 x float*>, <4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4p0f32.p0v4f32
+;
+  call void @llvm.matrix.column.major.store.v4f32p0.p0v4f32(<4 x float*> zeroinitializer, <4 x float>* %n, i64 2, i1 false, i32 2, i32 2)
+  ret void
+}
+
+define <4 x float> @column.major_load_stride_too_small(<4 x float>* %m, i32 %arg) {
+;
+; CHECK-NEXT: Stride must be greater or equal than the number of rows!
+; CHECK-NEXT: <4 x float> (<4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4f32.p0v4f32
+;
+  %result.1 = call <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4f32(<4 x float>* %m, i64 1, i1 false, i32 2, i32 2)
+  ret <4 x float> %result.1
+}
+
+define void @column.major_store_stride_too_small(<4 x float>* %m, i64 %arg) {
+;
+; CHECK-NEXT: Stride must be greater or equal than the number of rows!
+; CHECK-NEXT: void (<4 x float>, <4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4f32.p0v4f32
+;
+  call void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float> zeroinitializer, <4 x float>* %m, i64 1, i1 false, i32 2, i32 2)
+  ret void
+}

From 4ff7ed33108d9039fd960a4979b2e1503888582c Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Sun, 12 Jul 2020 19:19:25 +0100
Subject: [PATCH 051/771] Revert "[Matrix] Tighten LangRef definitions and
 Verifier checks."

This reverts commit f4d29d6e8c43cfd924d9d7cc1ac0c269b2788e75.

Hm, some build bot failures, reverting it while I investigate that.
---
 llvm/docs/LangRef.rst                   |  96 ++++++++----------
 llvm/lib/IR/Verifier.cpp                |  59 ++----------
 llvm/test/Verifier/matrix-intrinsics.ll | 123 +++---------------------
 3 files changed, 61 insertions(+), 217 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 02c92f1a4daa6..86d315be74bcf 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15524,7 +15524,6 @@ The argument to this intrinsic must be a vector of floating-point values.
 
 Syntax:
 """""""
-This is an overloaded intrinsic.
 
 ::
 
@@ -15549,20 +15548,17 @@ Matrix Intrinsics
 -----------------
 
 Operations on matrixes requiring shape information (like number of rows/columns
-or the memory layout) can be expressed using the matrix intrinsics. These
-intrinsics require matrix dimensions to be passed as immediate arguments, and
-matrixes are passed and returned as vectors. This means that for a ``R`` x
-``C`` matrix, element ``i`` of column ``j`` is at index ``j * R + i`` in the
-corresponding vector, with indices starting at 0. Currently column-major layout
-is assumed.  The intrinsics support both integer and floating point matrixes.
+or the memory layout) can be expressed using the matrix intrinsics. Matrixes are
+embedded in a flat vector and the intrinsics take the dimensions as arguments.
+Currently column-major layout is assumed. The intrinsics support both integer
+and floating point matrixes.
 
 
 '``llvm.matrix.transpose.*``' Intrinsic
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Syntax:
 """""""
-This is an overloaded intrinsic.
 
 ::
 
@@ -15571,24 +15567,21 @@ This is an overloaded intrinsic.
 Overview:
 """""""""
 
-The '``llvm.matrix.transpose.*``' intrinsics treat %In as a <Rows> x <Cols> matrix
-and return the transposed matrix in the result vector.
+The '``llvm.matrix.transpose.*``' intrinsic treats %In as containing a matrix
+with <Rows> rows and <Cols> columns and returns the transposed matrix embedded in
+the result vector.
 
 Arguments:
 """"""""""
 
-First argument %In is vector that corresponds to a <Rows> x <Cols> matrix.
-Thus, arguments <Rows> and <Cols> correspond to the number of rows and columns,
-respectively, and must be positive, constant integers. The returned vector must
-have <Rows> * <Cols> elements, and have the same float or integer element type
-as %In.
+The <Rows> and <Cols> arguments must be constant integers. The vector argument
+%In and the returned vector must have <Rows> * <Cols> elements.
 
 '``llvm.matrix.multiply.*``' Intrinsic
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Syntax:
 """""""
-This is an overloaded intrinsic.
 
 ::
 
@@ -15597,19 +15590,18 @@ This is an overloaded intrinsic.
 Overview:
 """""""""
 
-The '``llvm.matrix.multiply.*``' intrinsics treat %A as a <OuterRows> x <Inner>
-matrix, %B as a <Inner> x <OuterColumns> matrix, and multiplies them. The result
-matrix is returned in the result vector.
+The '``llvm.matrix.multiply.*``' intrinsic treats %A as a matrix with <OuterRows>
+rows and <Inner> columns, %B as a matrix with <Inner> rows and <OuterColumns>
+columns and multiplies them. The result matrix is returned embedded in the
+result vector.
 
 Arguments:
 """"""""""
 
-The first vector argument %A corresponds to a matrix with <OuterRows> * <Inner>
-elements, and the second argument %B to a matrix with <Inner> * <OuterColumns>
-elements. Arguments <OuterRows>, <Inner> and <OuterColumns> must be positive,
-constant integers. The returned vector must have <OuterRows> * <OuterColumns>
-elements. Vectors %A, %B, and the returned vector all have the same float or
-integer element type.
+The <OuterRows>, <Inner> and <OuterColumns> arguments must be constant
+integers. The vector argument %A must have <OuterRows> * <Inner> elements, %B
+must have <Inner> * <OuterColumns> elements and the returned vector must have
+<OuterRows> * <OuterColumns> elements.
 
 
 '``llvm.matrix.column.major.load.*``' Intrinsic
@@ -15617,7 +15609,6 @@ integer element type.
 
 Syntax:
 """""""
-This is an overloaded intrinsic.
 
 ::
 
@@ -15627,26 +15618,22 @@ This is an overloaded intrinsic.
 Overview:
 """""""""
 
-The '``llvm.matrix.column.major.load.*``' intrinsics load a <Rows> x <Cols>
-matrix using a stride of %Stride to compute the start address of the different
-columns.  This allows for convenient loading of sub matrixes. If <IsVolatile>
-is true, the intrinsic is considered a :ref:`volatile memory access
-<volatile>`. The result matrix is returned in the result vector. If the %Ptr
-argument is known to be aligned to some boundary, this can be specified as an
-attribute on the argument.
+The '``llvm.matrix.column.major.load.*``' intrinsic loads a matrix with <Rows>
+rows and <Cols> columns, using a stride of %Stride between columns. For two
+consecutive columns A and B, %Stride refers to the distance (the number of
+elements) between the start of column A and the start of column B. The result
+matrix is returned embedded in the result vector. This allows for convenient
+loading of sub matrixes.  If <IsVolatile> is true, the intrinsic is considered
+a :ref:`volatile memory access <volatile>`.
+
+If the %Ptr argument is known to be aligned to some boundary, this can be
+specified as an attribute on the argument.
 
 Arguments:
 """"""""""
 
-The first argument %Ptr is a pointer type to the returned vector type, and
-correponds to the start address to load from. The second argument %Stride is a
-postive, constant integer with %Stride ``>=`` <Rows>. %Stride is used to compute
-the column memory addresses. I.e., for a column ``C``, its start memory
-addresses is calculated with %Ptr + ``C`` * %Stride. The third Argument
-<IsVolatile> is a boolean value.  The fourth and fifth arguments, <Rows> and
-<Cols>, correspond to the number of rows and columns, respectively, and must be
-positive, constant integers. The returned vector must have <Rows> * <Cols>
-elements.
+The <IsVolatile>, <Rows> and <Cols> arguments must be constant integers. The
+returned vector must have <Rows> * <Cols> elements. %Stride must be >= <Rows>.
 
 The :ref:`align <attr_align>` parameter attribute can be provided
 for the %Ptr arguments.
@@ -15666,10 +15653,12 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.matrix.column.major.store.*``' intrinsics store the <Rows> x <Cols>
-matrix in %In to memory using a stride of %Stride between columns. If
-<IsVolatile> is true, the intrinsic is considered a :ref:`volatile memory
-access <volatile>`.
+The '``llvm.matrix.column.major.store.*``' intrinsic stores the matrix with
+<Rows> rows and <Cols> columns embedded in %In, using a stride of %Stride
+between columns. For two consecutive columns A and B, %Stride refers to the
+distance (the number of elements) between the start of column A and the start
+of column B. If <IsVolatile> is true, the intrinsic is considered a
+:ref:`volatile memory access <volatile>`.
 
 If the %Ptr argument is known to be aligned to some boundary, this can be
 specified as an attribute on the argument.
@@ -15677,15 +15666,8 @@ specified as an attribute on the argument.
 Arguments:
 """"""""""
 
-The first argument %In is a vector that corresponds to a <Rows> x <Cols> matrix
-to be stored to memory. The second argument %Ptr is a pointer to the vector
-type of %In, and is the start address of the matrix in memory. The third
-argument %Stride is a positive, constant integer with %Stride ``>=`` <Rows>.
-%Stride is used to compute the column memory addresses. I.e., for a column
-``C``, its start memory addresses is calculated with %Ptr + ``C`` * %Stride.
-The fourth argument <IsVolatile> is a boolean value. The arguments <Rows> and
-<Cols> correspond to the number of rows and columns, respectively, and must be
-positive, constant integers.
+The <IsVolatile>, <Rows>, <Cols> arguments must be constant integers. The
+vector argument %In must have <Rows> * <Cols> elements. %Stride must be >= <Rows>.
 
 The :ref:`align <attr_align>` parameter attribute can be provided
 for the %Ptr arguments.
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 994082fbdb7c1..8fa87b7489013 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5006,77 +5006,36 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
   case Intrinsic::matrix_transpose:
   case Intrinsic::matrix_column_major_load:
   case Intrinsic::matrix_column_major_store: {
-    Function *IF = Call.getCalledFunction();
-    ConstantInt *Stride = nullptr;
     ConstantInt *NumRows;
     ConstantInt *NumColumns;
-    VectorType *ResultTy;
-    Type *Op0ElemTy = nullptr;
-    Type *Op1ElemTy = nullptr;
+    VectorType *TypeToCheck;
     switch (ID) {
     case Intrinsic::matrix_multiply:
       NumRows = cast<ConstantInt>(Call.getArgOperand(2));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(4));
-      ResultTy = cast<VectorType>(Call.getType());
-      Op0ElemTy =
-          cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
-      Op1ElemTy =
-          cast<VectorType>(Call.getArgOperand(1)->getType())->getElementType();
+      TypeToCheck = cast<VectorType>(Call.getType());
       break;
     case Intrinsic::matrix_transpose:
       NumRows = cast<ConstantInt>(Call.getArgOperand(1));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(2));
-      ResultTy = cast<VectorType>(Call.getType());
-      Op0ElemTy =
-          cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
+      TypeToCheck = cast<VectorType>(Call.getType());
       break;
-    case Intrinsic::matrix_column_major_load: {
-      Stride = dyn_cast<ConstantInt>(Call.getArgOperand(1));
+    case Intrinsic::matrix_column_major_load:
       NumRows = cast<ConstantInt>(Call.getArgOperand(3));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(4));
-      ResultTy = cast<VectorType>(Call.getType());
-      auto *VecTy = cast<VectorType>(
-          cast<PointerType>(Call.getArgOperand(0)->getType())->getElementType());
-      Op0ElemTy = VecTy->getElementType();
-      }
+      TypeToCheck = cast<VectorType>(Call.getType());
       break;
-    case Intrinsic::matrix_column_major_store: {
-      Stride = dyn_cast<ConstantInt>(Call.getArgOperand(2));
+    case Intrinsic::matrix_column_major_store:
       NumRows = cast<ConstantInt>(Call.getArgOperand(4));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(5));
-      ResultTy = cast<VectorType>(Call.getArgOperand(0)->getType());
-      Op0ElemTy =
-          cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
-      auto *VecTy = cast<VectorType>(
-          cast<PointerType>(Call.getArgOperand(1)->getType())->getElementType());
-      Op1ElemTy = VecTy->getElementType();
-      }
+      TypeToCheck = cast<VectorType>(Call.getArgOperand(0)->getType());
       break;
     default:
       llvm_unreachable("unexpected intrinsic");
     }
-
-    Assert(ResultTy->getElementType()->isIntegerTy() ||
-           ResultTy->getElementType()->isFloatingPointTy(),
-           "Result type must be an integer or floating-point type!", IF);
-
-    Assert(ResultTy->getElementType() == Op0ElemTy,
-           "Vector element type mismatch of the result and first operand "
-           "vector!", IF);
-
-    if (Op1ElemTy)
-      Assert(ResultTy->getElementType() == Op1ElemTy,
-             "Vector element type mismatch of the result and second operand "
-             "vector!", IF);
-
-    Assert(ResultTy->getNumElements() ==
+    Assert(TypeToCheck->getNumElements() ==
                NumRows->getZExtValue() * NumColumns->getZExtValue(),
-           "Result of a matrix operation does not fit in the returned vector!");
-
-    if (Stride)
-      Assert(Stride->getZExtValue() >= NumRows->getZExtValue(),
-             "Stride must be greater or equal than the number of rows!", IF);
-
+           "result of a matrix operation does not fit in the returned vector");
     break;
   }
   };
diff --git a/llvm/test/Verifier/matrix-intrinsics.ll b/llvm/test/Verifier/matrix-intrinsics.ll
index 5afab26a48c53..6b2a4c501c660 100644
--- a/llvm/test/Verifier/matrix-intrinsics.ll
+++ b/llvm/test/Verifier/matrix-intrinsics.ll
@@ -3,9 +3,9 @@
 declare <4 x float> @llvm.matrix.transpose.v4f32(<4 x float>, i32, i32)
 define <4 x float> @transpose(<4 x float> %m, i32 %arg) {
 ; CHECK: assembly parsed, but does not verify as correct!
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
 ; CHECK-NEXT: immarg operand has non-immediate parameter
 ; CHECK-NEXT: i32 %arg
 ; CHECK-NEXT:   %result.3 = call <4 x float> @llvm.matrix.transpose.v4f32(<4 x float> %result.2, i32 %arg, i32 2)
@@ -22,9 +22,9 @@ define <4 x float> @transpose(<4 x float> %m, i32 %arg) {
 
 declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)
 define <4 x float> @multiply(<4 x float> %m, i32 %arg) {
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
 ; CHECK-NEXT: immarg operand has non-immediate parameter
 ; CHECK-NEXT: i32 %arg
 ; CHECK-NEXT:   %result.3 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> %result.2, <4 x float> %m, i32 %arg, i32 2, i32 1)
@@ -38,9 +38,9 @@ define <4 x float> @multiply(<4 x float> %m, i32 %arg) {
 declare <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4f32(<4 x float>*, i64, i1, i32, i32)
 declare <6 x float> @llvm.matrix.column.major.load.v6f32.p0v6f32(<6 x float>*, i64, i1, i32, i32)
 define <4 x float> @column.major_load(<4 x float>* %m, <6 x float>* %n, i32 %arg) {
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
 ; CHECK-NEXT: immarg operand has non-immediate parameter
 ; CHECK-NEXT: i32 %arg
 ; CHECK-NEXT:   %result.3 = call <6 x float> @llvm.matrix.column.major.load.v6f32.p0v6f32(<6 x float>* %n, i64 2, i1 true, i32 3, i32 %arg)
@@ -54,110 +54,13 @@ define <4 x float> @column.major_load(<4 x float>* %m, <6 x float>* %n, i32 %arg
 declare void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i64, i1, i32, i32)
 declare void @llvm.matrix.column.major.store.v6f32.p0v6f32(<6 x float>, <6 x float>*, i64, i1, i32, i32)
 define void @column.major_store(<4 x float>* %m, <6 x float>* %n, i64 %arg) {
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
-; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
   call void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float> zeroinitializer, <4 x float>* %m, i64 0, i1 false, i32 0, i32 0)
   call void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float> zeroinitializer, <4 x float>* %m, i64 2, i1 false, i32 1, i32 2)
   call void @llvm.matrix.column.major.store.v6f32.p0v6f32(<6 x float> zeroinitializer, <6 x float>* %n, i64 2, i1 false, i32 3, i32 3)
   call void @llvm.matrix.column.major.store.v6f32.p0v6f32(<6 x float> zeroinitializer, <6 x float>* %n, i64 %arg, i1 false, i32 3, i32 3)
   ret void
 }
-
-declare <4 x float> @llvm.matrix.transpose.v4f32.v4i32(<4 x i32>, i32, i32)
-declare <4 x i32> @llvm.matrix.transpose.v4i32.v4f32(<4 x float>, i32, i32)
-
-define <4 x float> @transpose_mixed_types(<4 x float> %fvec, <4 x i32> %ivec, i32 %arg) {
-;
-; CHECK-NEXT: Intrinsic has incorrect argument type!
-; CHECK-NEXT: <4 x float> (<4 x i32>, i32, i32)* @llvm.matrix.transpose.v4f32.v4i32
-; CHECK-NEXT: Intrinsic has incorrect argument type!
-; CHECK-NEXT: <4 x i32> (<4 x float>, i32, i32)* @llvm.matrix.transpose.v4i32.v4f32
-;
-  %result.0 = call <4 x float> @llvm.matrix.transpose.v4f32.v4i32(<4 x i32> %ivec, i32 0, i32 0)
-  %result.1 = call <4 x i32> @llvm.matrix.transpose.v4i32.v4f32(<4 x float> %result.0, i32 3, i32 2)
-  ret <4 x float> %result.0
-}
-
-declare <4 x i32>   @llvm.matrix.multiply.v4i32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4f32(<4 x i32>, <4 x float>, i32, i32, i32)
-declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, i32, i32, i32)
-declare <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32)
-
-define <4 x float> @multiply_mixed_types(<4 x i32> %ivec, <4 x float> %fvec, i32 %arg) {
-;
-; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
-; CHECK-NEXT: <4 x i32> (<4 x float>, <4 x float>, i32, i32, i32)* @llvm.matrix.multiply.v4i32.v4f32.v4f32
-; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
-; CHECK-NEXT: <4 x float> (<4 x i32>, <4 x float>, i32, i32, i32)* @llvm.matrix.multiply.v4f32.v4i32.v4f32
-; CHECK-NEXT: Vector element type mismatch of the result and second operand vector!
-; CHECK-NEXT: <4 x float> (<4 x float>, <4 x i32>, i32, i32, i32)* @llvm.matrix.multiply.v4f32.v4f32.v4i32
-; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
-; CHECK-NEXT: <4 x float> (<4 x i32>, <4 x i32>, i32, i32, i32)* @llvm.matrix.multiply.v4f32.v4i32.v4i32
-;
-  %result.0 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4f32.v4f32(<4 x float> %fvec, <4 x float> %fvec, i32 2, i32 2, i32 2)
-  %result.1 = call <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4f32(<4 x i32> %result.0, <4 x float> %fvec, i32 2, i32 2, i32 2)
-  %result.2 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4i32(<4 x float> %fvec, <4 x i32> %ivec, i32 2, i32 2, i32 2)
-  %result.3 = call <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4i32(<4 x i32> %ivec, <4 x i32> %ivec, i32 2, i32 2, i32 2)
-  ret <4 x float> %result.3
-}
-
-declare <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4i32(<4 x i32>*, i64, i1, i32, i32)
-declare <4 x i32> @llvm.matrix.column.major.load.v4i32.p0v4f32(<4 x float>*, i64, i1, i32, i32)
-
-define <4 x float> @column.major_load_mixed_types(<4 x i32>* %m, <4 x float>* %n, i32 %arg) {
-;
-; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
-; CHECK-NEXT: <4 x float> (<4 x i32>*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4f32.p0v4i32
-; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
-; CHECK-NEXT: <4 x i32> (<4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4i32.p0v4f32
-;
-  %result.0 = call <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4i32(<4 x i32>* %m, i64 2, i1 false, i32 2, i32 2)
-  %result.1 = call <4 x i32> @llvm.matrix.column.major.load.v4i32.p0v4f32(<4 x float>* %n, i64 2, i1 false, i32 2, i32 2)
-  ret <4 x float> %result.0
-}
-
-declare void @llvm.matrix.column.major.store.v4i32.p0v4f32(<4 x i32>, <4 x float>*, i64, i1, i32, i32)
-declare void @llvm.matrix.column.major.store.v4f32.p0v4i32(<4 x float>, <4 x i32>*, i64, i1, i32, i32)
-
-define void @column.major_store_mixed_types(<4 x float>* %m, <4 x i32>* %n, i64 %arg) {
-;
-; CHECK-NEXT: Vector element type mismatch of the result and second operand vector!
-; CHECK-NEXT: void (<4 x i32>, <4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4i32.p0v4f32
-; CHECK-NEXT: Vector element type mismatch of the result and second operand vector!
-; CHECK-NEXT: void (<4 x float>, <4 x i32>*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4f32.p0v4i32
-;
-  call void @llvm.matrix.column.major.store.v4i32.p0v4f32(<4 x i32> zeroinitializer, <4 x float>* %m, i64 2, i1 false, i32 2, i32 2)
-  call void @llvm.matrix.column.major.store.v4f32.p0v4i32(<4 x float> zeroinitializer, <4 x i32>* %n, i64 2, i1 false, i32 2, i32 2)
-  ret void
-}
-
-declare void @llvm.matrix.column.major.store.v4f32p0.p0v4f32(<4 x float*>, <4 x float>*, i64, i1, i32, i32)
-
-define void @column.major_store_non_int_float_type(<4 x float>* %m, <4 x float>* %n, i64 %arg) {
-;
-; CHECK-NEXT: Result type must be an integer or floating-point type!
-; CHECK-NEXT: void (<4 x float*>, <4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4p0f32.p0v4f32
-;
-  call void @llvm.matrix.column.major.store.v4f32p0.p0v4f32(<4 x float*> zeroinitializer, <4 x float>* %n, i64 2, i1 false, i32 2, i32 2)
-  ret void
-}
-
-define <4 x float> @column.major_load_stride_too_small(<4 x float>* %m, i32 %arg) {
-;
-; CHECK-NEXT: Stride must be greater or equal than the number of rows!
-; CHECK-NEXT: <4 x float> (<4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4f32.p0v4f32
-;
-  %result.1 = call <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4f32(<4 x float>* %m, i64 1, i1 false, i32 2, i32 2)
-  ret <4 x float> %result.1
-}
-
-define void @column.major_store_stride_too_small(<4 x float>* %m, i64 %arg) {
-;
-; CHECK-NEXT: Stride must be greater or equal than the number of rows!
-; CHECK-NEXT: void (<4 x float>, <4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4f32.p0v4f32
-;
-  call void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float> zeroinitializer, <4 x float>* %m, i64 1, i1 false, i32 2, i32 2)
-  ret void
-}

From 445897334741c53e98f8044f5f33ab1e888b3818 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 12 Jul 2020 15:56:26 -0400
Subject: [PATCH 052/771] [InstCombine] fold mul of zext/sext bools to 'and'

Similar to rG40fcc42:
The base case only worked because we were relying on a
poison-unsafe select transform; if that is fixed, we
would regress on patterns like this.

The extra use tests show that the select transform can't
be applied consistently. So it may be a regression to have
an extra instruction on 1 test, but that result was not
created safely and does not happen reliably.
---
 .../Transforms/InstCombine/InstCombineMulDivRem.cpp   | 10 ++++++++++
 llvm/test/Transforms/InstCombine/mul.ll               | 11 ++++++-----
 2 files changed, 16 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 2965103d40295..c6233a68847dd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -376,6 +376,16 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
     Value *And = Builder.CreateAnd(X, Y, "mulbool");
     return CastInst::Create(Instruction::ZExt, And, I.getType());
   }
+  // (sext bool X) * (zext bool Y) --> sext (and X, Y)
+  // (zext bool X) * (sext bool Y) --> sext (and X, Y)
+  // Note: -1 * 1 == 1 * -1  == -1
+  if (((match(Op0, m_SExt(m_Value(X))) && match(Op1, m_ZExt(m_Value(Y)))) ||
+       (match(Op0, m_ZExt(m_Value(X))) && match(Op1, m_SExt(m_Value(Y))))) &&
+      X->getType()->isIntOrIntVectorTy(1) && X->getType() == Y->getType() &&
+      (Op0->hasOneUse() || Op1->hasOneUse())) {
+    Value *And = Builder.CreateAnd(X, Y, "mulbool");
+    return CastInst::Create(Instruction::SExt, And, I.getType());
+  }
 
   // (bool X) * Y --> X ? Y : 0
   // Y * (bool X) --> X ? Y : 0
diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index 9d1b8ad457e41..059b18d30b907 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -247,8 +247,8 @@ define i32 @mul_bools_sext_use3(i1 %x, i1 %y) {
 
 define <3 x i32> @mul_bools_mixed_ext(<3 x i1> %x, <3 x i1> %y) {
 ; CHECK-LABEL: @mul_bools_mixed_ext(
-; CHECK-NEXT:    [[NARROW:%.*]] = and <3 x i1> [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = sext <3 x i1> [[NARROW]] to <3 x i32>
+; CHECK-NEXT:    [[MULBOOL:%.*]] = and <3 x i1> [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sext <3 x i1> [[MULBOOL]] to <3 x i32>
 ; CHECK-NEXT:    ret <3 x i32> [[R]]
 ;
   %zx = zext <3 x i1> %x to <3 x i32>
@@ -261,8 +261,8 @@ define i32 @mul_bools_mixed_ext_use1(i1 %x, i1 %y) {
 ; CHECK-LABEL: @mul_bools_mixed_ext_use1(
 ; CHECK-NEXT:    [[ZY:%.*]] = zext i1 [[Y:%.*]] to i32
 ; CHECK-NEXT:    call void @use32(i32 [[ZY]])
-; CHECK-NEXT:    [[NARROW:%.*]] = and i1 [[Y]], [[X:%.*]]
-; CHECK-NEXT:    [[R:%.*]] = sext i1 [[NARROW]] to i32
+; CHECK-NEXT:    [[MULBOOL:%.*]] = and i1 [[X:%.*]], [[Y]]
+; CHECK-NEXT:    [[R:%.*]] = sext i1 [[MULBOOL]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %sx = sext i1 %x to i32
@@ -276,7 +276,8 @@ define i32 @mul_bools_mixed_ext_use2(i1 %x, i1 %y) {
 ; CHECK-LABEL: @mul_bools_mixed_ext_use2(
 ; CHECK-NEXT:    [[SY:%.*]] = sext i1 [[Y:%.*]] to i32
 ; CHECK-NEXT:    call void @use32(i32 [[SY]])
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[X:%.*]], i32 [[SY]], i32 0
+; CHECK-NEXT:    [[MULBOOL:%.*]] = and i1 [[Y]], [[X:%.*]]
+; CHECK-NEXT:    [[R:%.*]] = sext i1 [[MULBOOL]] to i32
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %zx = zext i1 %x to i32

From b92c2bb6a2058611d727c4e2ce3a928f0a3e647d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 12 Jul 2020 12:58:10 -0700
Subject: [PATCH 053/771] [X86] Add CPU name strings to
 getIntelProcessorTypeAndSubtype and getAMDProcessorTypeAndSubtype in
 compiler-rt.

These aren't used in compiler-rt, but I plan to make a similar
change to the equivalent code in Host.cpp where the mapping from
type/subtype is an unnecessary complication. Having the CPU strings
here will help keep the code somewhat synchronized.
---
 compiler-rt/lib/builtins/cpu_model.c | 125 ++++++++++++++++++---------
 1 file changed, 85 insertions(+), 40 deletions(-)

diff --git a/compiler-rt/lib/builtins/cpu_model.c b/compiler-rt/lib/builtins/cpu_model.c
index 042657232d8e8..8346bb62dcfb4 100644
--- a/compiler-rt/lib/builtins/cpu_model.c
+++ b/compiler-rt/lib/builtins/cpu_model.c
@@ -272,12 +272,17 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
   }
 }
 
-static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
-                                            const unsigned *Features,
-                                            unsigned *Type, unsigned *Subtype) {
+static const char *
+getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                                const unsigned *Features,
+                                unsigned *Type, unsigned *Subtype) {
 #define testFeature(F)                                                         \
   (Features[F / 32] & (F % 32)) != 0
 
+  // We select CPU strings to match the code in Host.cpp, but we don't use them
+  // in compiler-rt.
+  const char *CPU = 0;
+
   switch (Family) {
   case 6:
     switch (Model) {
@@ -288,13 +293,17 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                // 0Fh. All processors are manufactured using the 65 nm process.
     case 0x16: // Intel Celeron processor model 16h. All processors are
                // manufactured using the 65 nm process
+      CPU = "core2";
+      *Type = INTEL_CORE2;
+      break;
     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
                // 17h. All processors are manufactured using the 45 nm process.
                //
                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
                // the 45 nm process.
-      *Type = INTEL_CORE2; // "penryn"
+      CPU = "penryn";
+      *Type = INTEL_CORE2;
       break;
     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 45 nm process.
@@ -302,25 +311,29 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                // As found in a Summer 2010 model iMac.
     case 0x1f:
     case 0x2e:              // Nehalem EX
-      *Type = INTEL_COREI7; // "nehalem"
+      CPU = "nehalem";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_NEHALEM;
       break;
     case 0x25: // Intel Core i7, laptop version.
     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 32 nm process.
     case 0x2f: // Westmere EX
-      *Type = INTEL_COREI7; // "westmere"
+      CPU = "westmere";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_WESTMERE;
       break;
     case 0x2a: // Intel Core i7 processor. All processors are manufactured
                // using the 32 nm process.
     case 0x2d:
-      *Type = INTEL_COREI7; //"sandybridge"
+      CPU = "sandybridge";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_SANDYBRIDGE;
       break;
     case 0x3a:
     case 0x3e:              // Ivy Bridge EP
-      *Type = INTEL_COREI7; // "ivybridge"
+      CPU = "ivybridge";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_IVYBRIDGE;
       break;
 
@@ -329,7 +342,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x3f:
     case 0x45:
     case 0x46:
-      *Type = INTEL_COREI7; // "haswell"
+      CPU = "haswell";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_HASWELL;
       break;
 
@@ -338,7 +352,8 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x47:
     case 0x4f:
     case 0x56:
-      *Type = INTEL_COREI7; // "broadwell"
+      CPU = "broadwell";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_BROADWELL;
       break;
 
@@ -349,39 +364,47 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x9e:              // Kaby Lake desktop
     case 0xa5:              // Comet Lake-H/S
     case 0xa6:              // Comet Lake-U
-      *Type = INTEL_COREI7; // "skylake"
+      CPU = "skylake";
+      *Type = INTEL_COREI7;
       *Subtype = INTEL_COREI7_SKYLAKE;
       break;
 
     // Skylake Xeon:
     case 0x55:
       *Type = INTEL_COREI7;
-      if (testFeature(FEATURE_AVX512BF16))
-        *Subtype = INTEL_COREI7_COOPERLAKE; // "cooperlake"
-      else if (testFeature(FEATURE_AVX512VNNI))
-        *Subtype = INTEL_COREI7_CASCADELAKE; // "cascadelake"
-      else
-        *Subtype = INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+      if (testFeature(FEATURE_AVX512BF16)) {
+        CPU = "cooperlake";
+        *Subtype = INTEL_COREI7_COOPERLAKE;
+      } else if (testFeature(FEATURE_AVX512VNNI)) {
+        CPU = "cascadelake";
+        *Subtype = INTEL_COREI7_CASCADELAKE;
+      } else {
+        CPU = "skylake-avx512";
+        *Subtype = INTEL_COREI7_SKYLAKE_AVX512;
+      }
       break;
 
     // Cannonlake:
     case 0x66:
+      CPU = "cannonlake";
       *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_CANNONLAKE; // "cannonlake"
+      *Subtype = INTEL_COREI7_CANNONLAKE;
       break;
 
     // Icelake:
     case 0x7d:
     case 0x7e:
+      CPU = "icelake-client";
       *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client"
+      *Subtype = INTEL_COREI7_ICELAKE_CLIENT;
       break;
 
     // Icelake Xeon:
     case 0x6a:
     case 0x6c:
+      CPU = "icelake-server";
       *Type = INTEL_COREI7;
-      *Subtype = INTEL_COREI7_ICELAKE_SERVER; // "icelake-server"
+      *Subtype = INTEL_COREI7_ICELAKE_SERVER;
       break;
 
     case 0x1c: // Most 45 nm Intel Atom processors
@@ -389,8 +412,9 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x27: // 32 nm Atom Medfield
     case 0x35: // 32 nm Atom Midview
     case 0x36: // 32 nm Atom Midview
+      CPU = "bonnell";
       *Type = INTEL_BONNELL;
-      break; // "bonnell"
+      break;
 
     // Atom Silvermont codes from the Intel software optimization guide.
     case 0x37:
@@ -399,26 +423,32 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x5a:
     case 0x5d:
     case 0x4c: // really airmont
+      CPU = "silvermont";
       *Type = INTEL_SILVERMONT;
-      break; // "silvermont"
+      break;
     // Goldmont:
     case 0x5c: // Apollo Lake
     case 0x5f: // Denverton
+      CPU = "goldmont";
       *Type = INTEL_GOLDMONT;
       break; // "goldmont"
     case 0x7a:
+      CPU = "goldmont-plus";
       *Type = INTEL_GOLDMONT_PLUS;
       break;
     case 0x86:
+      CPU = "tremont";
       *Type = INTEL_TREMONT;
       break;
 
     case 0x57:
-      *Type = INTEL_KNL; // knl
+      CPU = "knl";
+      *Type = INTEL_KNL;
       break;
 
     case 0x85:
-      *Type = INTEL_KNM; // knm
+      CPU = "knm";
+      *Type = INTEL_KNM;
       break;
 
     default: // Unknown family 6 CPU.
@@ -428,17 +458,22 @@ static void getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
   default:
     break; // Unknown.
   }
+
+  return CPU;
 }
 
-static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
-                                          const unsigned *Features,
-                                          unsigned *Type, unsigned *Subtype) {
-  // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
-  // appears to be no way to generate the wide variety of AMD-specific targets
-  // from the information returned from CPUID.
+static const char *
+getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                              const unsigned *Features,
+                              unsigned *Type, unsigned *Subtype) {
+  // We select CPU strings to match the code in Host.cpp, but we don't use them
+  // in compiler-rt.
+  const char *CPU = 0;
+
   switch (Family) {
   case 16:
-    *Type = AMDFAM10H; // "amdfam10"
+    CPU = "amdfam10";
+    *Type = AMDFAM10H;
     switch (Model) {
     case 2:
       *Subtype = AMDFAM10H_BARCELONA;
@@ -452,44 +487,54 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     }
     break;
   case 20:
+    CPU = "btver1";
     *Type = AMD_BTVER1;
-    break; // "btver1";
+    break;
   case 21:
+    CPU = "bdver1";
     *Type = AMDFAM15H;
     if (Model >= 0x60 && Model <= 0x7f) {
+      CPU = "bdver4";
       *Subtype = AMDFAM15H_BDVER4;
-      break; // "bdver4"; 60h-7Fh: Excavator
+      break; // 60h-7Fh: Excavator
     }
     if (Model >= 0x30 && Model <= 0x3f) {
+      CPU = "bdver3";
       *Subtype = AMDFAM15H_BDVER3;
-      break; // "bdver3"; 30h-3Fh: Steamroller
+      break; // 30h-3Fh: Steamroller
     }
     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
+      CPU = "bdver2";
       *Subtype = AMDFAM15H_BDVER2;
-      break; // "bdver2"; 02h, 10h-1Fh: Piledriver
+      break; // 02h, 10h-1Fh: Piledriver
     }
     if (Model <= 0x0f) {
       *Subtype = AMDFAM15H_BDVER1;
-      break; // "bdver1"; 00h-0Fh: Bulldozer
+      break; // 00h-0Fh: Bulldozer
     }
     break;
   case 22:
+    CPU = "btver2";
     *Type = AMD_BTVER2;
-    break; // "btver2"
+    break;
   case 23:
+    CPU = "znver1";
     *Type = AMDFAM17H;
     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
+      CPU = "znver2";
       *Subtype = AMDFAM17H_ZNVER2;
-      break; // "znver2"; 30h-3fh, 71h: Zen2
+      break; // 30h-3fh, 71h: Zen2
     }
     if (Model <= 0x0f) {
       *Subtype = AMDFAM17H_ZNVER1;
-      break; // "znver1"; 00h-0Fh: Zen1
+      break; // 00h-0Fh: Zen1
     }
     break;
   default:
-    break; // "generic"
+    break; // Unknown AMD CPU.
   }
+
+  return CPU;
 }
 
 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,

From ea84dc9500df383b4fe07199134033f358411e59 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 12 Jul 2020 12:58:17 -0700
Subject: [PATCH 054/771] [X86] Add CPU string output to
 getIntelProcessorTypeAndSubtype/getAMDProcessorTypeAndSubtype in Host.cpp

Rather than converting type/subtype into strings, just directly
select the string as part of family/model decoding. This avoids
the need for creating fake Type/SubTypes for CPUs not supported
by compiler-rtl. I've left the Type/SubType in place where it matches
compiler-rt so that the code can be diffed, but the Type/SubType
is no longer used by Host.cpp.

compiler-rt was already updated to select strings that aren't used
so the code will look similar.
---
 llvm/include/llvm/Support/X86TargetParser.def |  26 --
 llvm/lib/Support/Host.cpp                     | 366 ++++++++----------
 2 files changed, 170 insertions(+), 222 deletions(-)

diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
index 4b96c66b0e290..9e9f0985d15ea 100644
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ b/llvm/include/llvm/Support/X86TargetParser.def
@@ -48,25 +48,6 @@ X86_CPU_TYPE_COMPAT("knm",           INTEL_KNM,           "knm")
 X86_CPU_TYPE_COMPAT("goldmont",      INTEL_GOLDMONT,      "goldmont")
 X86_CPU_TYPE_COMPAT("goldmont-plus", INTEL_GOLDMONT_PLUS, "goldmont-plus")
 X86_CPU_TYPE_COMPAT("tremont",       INTEL_TREMONT,       "tremont")
-// Entries below this are not in libgcc/compiler-rt.
-X86_CPU_TYPE       ("i386",          INTEL_i386)
-X86_CPU_TYPE       ("i486",          INTEL_i486)
-X86_CPU_TYPE       ("pentium",       INTEL_PENTIUM)
-X86_CPU_TYPE       ("pentium-mmx",   INTEL_PENTIUM_MMX)
-X86_CPU_TYPE       ("pentiumpro",    INTEL_PENTIUM_PRO)
-X86_CPU_TYPE       ("pentium2",      INTEL_PENTIUM_II)
-X86_CPU_TYPE       ("pentium3",      INTEL_PENTIUM_III)
-X86_CPU_TYPE       ("pentium4",      INTEL_PENTIUM_IV)
-X86_CPU_TYPE       ("pentium-m",     INTEL_PENTIUM_M)
-X86_CPU_TYPE       ("yonah",         INTEL_CORE_DUO)
-X86_CPU_TYPE       ("nocona",        INTEL_NOCONA)
-X86_CPU_TYPE       ("prescott",      INTEL_PRESCOTT)
-X86_CPU_TYPE       ("i486",          AMD_i486)
-X86_CPU_TYPE       ("pentium",       AMDPENTIUM)
-X86_CPU_TYPE       ("athlon",        AMD_ATHLON)
-X86_CPU_TYPE       ("athlon-xp",     AMD_ATHLON_XP)
-X86_CPU_TYPE       ("k8",            AMD_K8)
-X86_CPU_TYPE       ("k8-sse3",       AMD_K8SSE3)
 
 // Alternate names supported by __builtin_cpu_is and target multiversioning.
 X86_CPU_TYPE_COMPAT_ALIAS(INTEL_BONNELL,    "atom")
@@ -112,13 +93,6 @@ X86_CPU_SUBTYPE_COMPAT("znver2",         AMDFAM17H_ZNVER2,            "znver2")
 X86_CPU_SUBTYPE_COMPAT("cascadelake",    INTEL_COREI7_CASCADELAKE,    "cascadelake")
 X86_CPU_SUBTYPE_COMPAT("tigerlake",      INTEL_COREI7_TIGERLAKE,      "tigerlake")
 X86_CPU_SUBTYPE_COMPAT("cooperlake",     INTEL_COREI7_COOPERLAKE,     "cooperlake")
-// Entries below this are not in libgcc/compiler-rt.
-X86_CPU_SUBTYPE       ("core2",          INTEL_CORE2_65)
-X86_CPU_SUBTYPE       ("penryn",         INTEL_CORE2_45)
-X86_CPU_SUBTYPE       ("k6",             AMDPENTIUM_K6)
-X86_CPU_SUBTYPE       ("k6-2",           AMDPENTIUM_K62)
-X86_CPU_SUBTYPE       ("k6-3",           AMDPENTIUM_K63)
-X86_CPU_SUBTYPE       ("geode",          AMDPENTIUM_GEODE)
 #undef X86_CPU_SUBTYPE_COMPAT
 #undef X86_CPU_SUBTYPE
 
diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 8dc8c4e9775ac..362b5850b394a 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -583,7 +583,7 @@ static void detectX86FamilyModel(unsigned EAX, unsigned *Family,
   }
 }
 
-static void
+static StringRef
 getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                                 const unsigned *Features,
                                 unsigned *Type, unsigned *Subtype) {
@@ -591,31 +591,33 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     return (Features[F / 32] & (1U << (F % 32))) != 0;
   };
 
+  StringRef CPU;
+
   switch (Family) {
   case 3:
-    *Type = X86::INTEL_i386;
+    CPU = "i386";
     break;
   case 4:
-    *Type = X86::INTEL_i486;
+    CPU = "i486";
     break;
   case 5:
     if (testFeature(X86::FEATURE_MMX)) {
-      *Type = X86::INTEL_PENTIUM_MMX;
+      CPU = "pentium-mmx";
       break;
     }
-    *Type = X86::INTEL_PENTIUM;
+    CPU = "pentium";
     break;
   case 6:
     switch (Model) {
     case 0x01: // Pentium Pro processor
-      *Type = X86::INTEL_PENTIUM_PRO;
+      CPU = "pentiumpro";
       break;
     case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
                // model 03
     case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
                // model 05, and Intel Celeron processor, model 05
     case 0x06: // Celeron processor, model 06
-      *Type = X86::INTEL_PENTIUM_II;
+      CPU = "pentium2";
       break;
     case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
                // processor, model 07
@@ -623,19 +625,19 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                // model 08, and Celeron processor, model 08
     case 0x0a: // Pentium III Xeon processor, model 0Ah
     case 0x0b: // Pentium III processor, model 0Bh
-      *Type = X86::INTEL_PENTIUM_III;
+      CPU = "pentium3";
       break;
     case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
     case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
                // 0Dh. All processors are manufactured using the 90 nm process.
     case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
                // Integrated Processor with Intel QuickAssist Technology
-      *Type = X86::INTEL_PENTIUM_M;
+      CPU = "pentium-m";
       break;
     case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
                // 0Eh. All processors are manufactured using the 65 nm process.
-      *Type = X86::INTEL_CORE_DUO;
-      break;   // yonah
+      CPU = "yonah";
+      break;
     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
                // mobile processor, Intel Core 2 Extreme processor, Intel
@@ -643,8 +645,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                // 0Fh. All processors are manufactured using the 65 nm process.
     case 0x16: // Intel Celeron processor model 16h. All processors are
                // manufactured using the 65 nm process
-      *Type = X86::INTEL_CORE2; // "core2"
-      *Subtype = X86::INTEL_CORE2_65;
+      CPU = "core2";
+      *Type = X86::INTEL_CORE2;
       break;
     case 0x17: // Intel Core 2 Extreme processor, Intel Xeon processor, model
                // 17h. All processors are manufactured using the 45 nm process.
@@ -652,34 +654,38 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
                // 45nm: Penryn , Wolfdale, Yorkfield (XE)
     case 0x1d: // Intel Xeon processor MP. All processors are manufactured using
                // the 45 nm process.
-      *Type = X86::INTEL_CORE2; // "penryn"
-      *Subtype = X86::INTEL_CORE2_45;
+      CPU = "penryn";
+      *Type = X86::INTEL_CORE2;
       break;
     case 0x1a: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 45 nm process.
     case 0x1e: // Intel(R) Core(TM) i7 CPU         870  @ 2.93GHz.
                // As found in a Summer 2010 model iMac.
     case 0x1f:
-    case 0x2e:             // Nehalem EX
-      *Type = X86::INTEL_COREI7; // "nehalem"
+    case 0x2e:              // Nehalem EX
+      CPU = "nehalem";
+      *Type = X86::INTEL_COREI7;
       *Subtype = X86::INTEL_COREI7_NEHALEM;
       break;
     case 0x25: // Intel Core i7, laptop version.
     case 0x2c: // Intel Core i7 processor and Intel Xeon processor. All
                // processors are manufactured using the 32 nm process.
     case 0x2f: // Westmere EX
-      *Type = X86::INTEL_COREI7; // "westmere"
+      CPU = "westmere";
+      *Type = X86::INTEL_COREI7;
       *Subtype = X86::INTEL_COREI7_WESTMERE;
       break;
     case 0x2a: // Intel Core i7 processor. All processors are manufactured
                // using the 32 nm process.
     case 0x2d:
-      *Type = X86::INTEL_COREI7; //"sandybridge"
+      CPU = "sandybridge";
+      *Type = X86::INTEL_COREI7;
       *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
       break;
     case 0x3a:
-    case 0x3e:             // Ivy Bridge EP
-      *Type = X86::INTEL_COREI7; // "ivybridge"
+    case 0x3e:              // Ivy Bridge EP
+      CPU = "ivybridge";
+      *Type = X86::INTEL_COREI7;
       *Subtype = X86::INTEL_COREI7_IVYBRIDGE;
       break;
 
@@ -688,7 +694,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x3f:
     case 0x45:
     case 0x46:
-      *Type = X86::INTEL_COREI7; // "haswell"
+      CPU = "haswell";
+      *Type = X86::INTEL_COREI7;
       *Subtype = X86::INTEL_COREI7_HASWELL;
       break;
 
@@ -697,7 +704,8 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x47:
     case 0x4f:
     case 0x56:
-      *Type = X86::INTEL_COREI7; // "broadwell"
+      CPU = "broadwell";
+      *Type = X86::INTEL_COREI7;
       *Subtype = X86::INTEL_COREI7_BROADWELL;
       break;
 
@@ -708,39 +716,47 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x9e:              // Kaby Lake desktop
     case 0xa5:              // Comet Lake-H/S
     case 0xa6:              // Comet Lake-U
-      *Type = X86::INTEL_COREI7; // "skylake"
+      CPU = "skylake";
+      *Type = X86::INTEL_COREI7;
       *Subtype = X86::INTEL_COREI7_SKYLAKE;
       break;
 
     // Skylake Xeon:
     case 0x55:
       *Type = X86::INTEL_COREI7;
-      if (testFeature(X86::FEATURE_AVX512BF16))
-        *Subtype = X86::INTEL_COREI7_COOPERLAKE; // "cooperlake"
-      else if (testFeature(X86::FEATURE_AVX512VNNI))
-        *Subtype = X86::INTEL_COREI7_CASCADELAKE; // "cascadelake"
-      else
-        *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512; // "skylake-avx512"
+      if (testFeature(X86::FEATURE_AVX512BF16)) {
+        CPU = "cooperlake";
+        *Subtype = X86::INTEL_COREI7_COOPERLAKE;
+      } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
+        CPU = "cascadelake";
+        *Subtype = X86::INTEL_COREI7_CASCADELAKE;
+      } else {
+        CPU = "skylake-avx512";
+        *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
+      }
       break;
 
     // Cannonlake:
     case 0x66:
+      CPU = "cannonlake";
       *Type = X86::INTEL_COREI7;
-      *Subtype = X86::INTEL_COREI7_CANNONLAKE; // "cannonlake"
+      *Subtype = X86::INTEL_COREI7_CANNONLAKE;
       break;
 
     // Icelake:
     case 0x7d:
     case 0x7e:
+      CPU = "icelake-client";
       *Type = X86::INTEL_COREI7;
-      *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT; // "icelake-client"
+      *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
       break;
 
     // Icelake Xeon:
     case 0x6a:
     case 0x6c:
+      CPU = "icelake-server";
       *Type = X86::INTEL_COREI7;
-      *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER; // "icelake-server"
+      *Subtype = X86::INTEL_COREI7_ICELAKE_SERVER;
       break;
 
     case 0x1c: // Most 45 nm Intel Atom processors
@@ -748,8 +764,9 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x27: // 32 nm Atom Medfield
     case 0x35: // 32 nm Atom Midview
     case 0x36: // 32 nm Atom Midview
+      CPU = "bonnell";
       *Type = X86::INTEL_BONNELL;
-      break; // "bonnell"
+      break;
 
     // Atom Silvermont codes from the Intel software optimization guide.
     case 0x37:
@@ -758,14 +775,17 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     case 0x5a:
     case 0x5d:
     case 0x4c: // really airmont
+      CPU = "silvermont";
       *Type = X86::INTEL_SILVERMONT;
-      break; // "silvermont"
+      break;
     // Goldmont:
     case 0x5c: // Apollo Lake
     case 0x5f: // Denverton
+      CPU = "goldmont";
       *Type = X86::INTEL_GOLDMONT;
-      break; // "goldmont"
+      break;
     case 0x7a:
+      CPU = "goldmont-plus";
       *Type = X86::INTEL_GOLDMONT_PLUS;
       break;
     case 0x86:
@@ -773,193 +793,140 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
       break;
 
     case 0x57:
-      *Type = X86::INTEL_KNL; // knl
+      CPU = "tremont";
+      *Type = X86::INTEL_KNL;
       break;
 
     case 0x85:
-      *Type = X86::INTEL_KNM; // knm
+      CPU = "knm";
+      *Type = X86::INTEL_KNM;
       break;
 
     default: // Unknown family 6 CPU, try to guess.
+      // Don't both with Type/Subtype here, they aren't used by the caller.
+      // They're used above to keep the code in sync with compiler-rt.
       // TODO detect tigerlake host from model
       if (testFeature(X86::FEATURE_AVX512VP2INTERSECT)) {
-        *Type = X86::INTEL_COREI7;
-        *Subtype = X86::INTEL_COREI7_TIGERLAKE;
-        break;
-      }
-
-      if (testFeature(X86::FEATURE_AVX512VBMI2)) {
-        *Type = X86::INTEL_COREI7;
-        *Subtype = X86::INTEL_COREI7_ICELAKE_CLIENT;
-        break;
-      }
-
-      if (testFeature(X86::FEATURE_AVX512VBMI)) {
-        *Type = X86::INTEL_COREI7;
-        *Subtype = X86::INTEL_COREI7_CANNONLAKE;
-        break;
-      }
-
-      if (testFeature(X86::FEATURE_AVX512BF16)) {
-        *Type = X86::INTEL_COREI7;
-        *Subtype = X86::INTEL_COREI7_COOPERLAKE;
-        break;
-      }
-
-      if (testFeature(X86::FEATURE_AVX512VNNI)) {
-        *Type = X86::INTEL_COREI7;
-        *Subtype = X86::INTEL_COREI7_CASCADELAKE;
-        break;
-      }
-
-      if (testFeature(X86::FEATURE_AVX512VL)) {
-        *Type = X86::INTEL_COREI7;
-        *Subtype = X86::INTEL_COREI7_SKYLAKE_AVX512;
-        break;
-      }
-
-      if (testFeature(X86::FEATURE_AVX512ER)) {
-        *Type = X86::INTEL_KNL; // knl
-        break;
-      }
-
-      if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
-        if (testFeature(X86::FEATURE_SHA)) {
-          *Type = X86::INTEL_GOLDMONT;
-        } else {
-          *Type = X86::INTEL_COREI7;
-          *Subtype = X86::INTEL_COREI7_SKYLAKE;
-        }
-        break;
-      }
-      if (testFeature(X86::FEATURE_ADX)) {
-        *Type = X86::INTEL_COREI7;
-        *Subtype = X86::INTEL_COREI7_BROADWELL;
-        break;
-      }
-      if (testFeature(X86::FEATURE_AVX2)) {
-        *Type = X86::INTEL_COREI7;
-        *Subtype = X86::INTEL_COREI7_HASWELL;
-        break;
-      }
-      if (testFeature(X86::FEATURE_AVX)) {
-        *Type = X86::INTEL_COREI7;
-        *Subtype = X86::INTEL_COREI7_SANDYBRIDGE;
-        break;
-      }
-      if (testFeature(X86::FEATURE_SSE4_2)) {
-        if (testFeature(X86::FEATURE_MOVBE)) {
-          *Type = X86::INTEL_SILVERMONT;
-        } else {
-          *Type = X86::INTEL_COREI7;
-          *Subtype = X86::INTEL_COREI7_NEHALEM;
-        }
-        break;
-      }
-      if (testFeature(X86::FEATURE_SSE4_1)) {
-        *Type = X86::INTEL_CORE2; // "penryn"
-        *Subtype = X86::INTEL_CORE2_45;
-        break;
-      }
-      if (testFeature(X86::FEATURE_SSSE3)) {
-        if (testFeature(X86::FEATURE_MOVBE)) {
-          *Type = X86::INTEL_BONNELL; // "bonnell"
-        } else {
-          *Type = X86::INTEL_CORE2; // "core2"
-          *Subtype = X86::INTEL_CORE2_65;
-        }
-        break;
-      }
-      if (testFeature(X86::FEATURE_64BIT)) {
-        *Type = X86::INTEL_CORE2; // "core2"
-        *Subtype = X86::INTEL_CORE2_65;
-        break;
-      }
-      if (testFeature(X86::FEATURE_SSE3)) {
-        *Type = X86::INTEL_CORE_DUO;
-        break;
+        CPU = "tigerlake";
+      } else if (testFeature(X86::FEATURE_AVX512VBMI2)) {
+        CPU = "icelake-client";
+      } else if (testFeature(X86::FEATURE_AVX512VBMI)) {
+        CPU = "cannonlake";
+      } else if (testFeature(X86::FEATURE_AVX512BF16)) {
+        CPU = "cooperlake";
+      } else if (testFeature(X86::FEATURE_AVX512VNNI)) {
+        CPU = "cascadelake";
+      } else if (testFeature(X86::FEATURE_AVX512VL)) {
+        CPU = "skylake-avx512";
+      } else if (testFeature(X86::FEATURE_AVX512ER)) {
+        CPU = "knl";
+      } else if (testFeature(X86::FEATURE_CLFLUSHOPT)) {
+        if (testFeature(X86::FEATURE_SHA))
+          CPU = "goldmont";
+        else
+          CPU = "skylake";
+      } else if (testFeature(X86::FEATURE_ADX)) {
+        CPU = "broadwell";
+      } else if (testFeature(X86::FEATURE_AVX2)) {
+        CPU = "haswell";
+      } else if (testFeature(X86::FEATURE_AVX)) {
+        CPU = "sandybridge";
+      } else if (testFeature(X86::FEATURE_SSE4_2)) {
+        if (testFeature(X86::FEATURE_MOVBE))
+          CPU = "silvermont";
+        else
+          CPU = "nehalem";
+      } else if (testFeature(X86::FEATURE_SSE4_1)) {
+        CPU = "penryn";
+      } else if (testFeature(X86::FEATURE_SSSE3)) {
+        if (testFeature(X86::FEATURE_MOVBE))
+          CPU = "bonnell";
+        else
+          CPU = "core2";
+      } else if (testFeature(X86::FEATURE_64BIT)) {
+        CPU = "core2";
+      } else if (testFeature(X86::FEATURE_SSE3)) {
+        CPU = "yonah";
+      } else if (testFeature(X86::FEATURE_SSE2)) {
+        CPU = "pentium-m";
+      } else if (testFeature(X86::FEATURE_SSE)) {
+        CPU = "pentium3";
+      } else if (testFeature(X86::FEATURE_MMX)) {
+        CPU = "pentium2";
+      } else {
+        CPU = "pentiumpro";
       }
-      if (testFeature(X86::FEATURE_SSE2)) {
-        *Type = X86::INTEL_PENTIUM_M;
-        break;
-      }
-      if (testFeature(X86::FEATURE_SSE)) {
-        *Type = X86::INTEL_PENTIUM_III;
-        break;
-      }
-      if (testFeature(X86::FEATURE_MMX)) {
-        *Type = X86::INTEL_PENTIUM_II;
-        break;
-      }
-      *Type = X86::INTEL_PENTIUM_PRO;
       break;
     }
     break;
   case 15: {
     if (testFeature(X86::FEATURE_64BIT)) {
-      *Type = X86::INTEL_NOCONA;
+      CPU = "nocona";
       break;
     }
     if (testFeature(X86::FEATURE_SSE3)) {
-      *Type = X86::INTEL_PRESCOTT;
+      CPU = "prescott";
       break;
     }
-    *Type = X86::INTEL_PENTIUM_IV;
+    CPU = "pentium4";
     break;
   }
   default:
-    break; /*"generic"*/
+    break; // Unknown.
   }
+
+  return CPU;
 }
 
-static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
-                                          const unsigned *Features,
-                                          unsigned *Type, unsigned *Subtype) {
+static StringRef
+getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
+                              const unsigned *Features,
+                              unsigned *Type, unsigned *Subtype) {
   auto testFeature = [&](unsigned F) {
     return (Features[F / 32] & (1U << (F % 32))) != 0;
   };
 
-  // FIXME: this poorly matches the generated SubtargetFeatureKV table.  There
-  // appears to be no way to generate the wide variety of AMD-specific targets
-  // from the information returned from CPUID.
+  StringRef CPU;
+
   switch (Family) {
   case 4:
-    *Type = X86::AMD_i486;
+    CPU = "i486";
     break;
   case 5:
-    *Type = X86::AMDPENTIUM;
+    CPU = "pentium";
     switch (Model) {
     case 6:
     case 7:
-      *Subtype = X86::AMDPENTIUM_K6;
-      break; // "k6"
+      CPU = "k6";
+      break;
     case 8:
-      *Subtype = X86::AMDPENTIUM_K62;
-      break; // "k6-2"
+      CPU = "k6-2";
+      break;
     case 9:
     case 13:
-      *Subtype = X86::AMDPENTIUM_K63;
-      break; // "k6-3"
+      CPU = "k6-3";
+      break;
     case 10:
-      *Subtype = X86::AMDPENTIUM_GEODE;
-      break; // "geode"
+      CPU = "geode";
+      break;
     }
     break;
   case 6:
     if (testFeature(X86::FEATURE_SSE)) {
-      *Type = X86::AMD_ATHLON_XP;
-      break; // "athlon-xp"
+      CPU = "athlon-xp";
+      break;
     }
-    *Type = X86::AMD_ATHLON;
-    break; // "athlon"
+    CPU = "athlon";
+    break;
   case 15:
     if (testFeature(X86::FEATURE_SSE3)) {
-      *Type = X86::AMD_K8SSE3;
-      break; // "k8-sse3"
+      CPU = "k8-sse3";
+      break;
     }
-    *Type = X86::AMD_K8;
-    break; // "k8"
+    CPU = "k8";
+    break;
   case 16:
+    CPU = "amdfam10";
     *Type = X86::AMDFAM10H; // "amdfam10"
     switch (Model) {
     case 2:
@@ -974,44 +941,54 @@ static void getAMDProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     }
     break;
   case 20:
+    CPU = "btver1";
     *Type = X86::AMD_BTVER1;
-    break; // "btver1";
+    break;
   case 21:
+    CPU = "bdver1";
     *Type = X86::AMDFAM15H;
     if (Model >= 0x60 && Model <= 0x7f) {
+      CPU = "bdver4";
       *Subtype = X86::AMDFAM15H_BDVER4;
-      break; // "bdver4"; 60h-7Fh: Excavator
+      break; // 60h-7Fh: Excavator
     }
     if (Model >= 0x30 && Model <= 0x3f) {
+      CPU = "bdver3";
       *Subtype = X86::AMDFAM15H_BDVER3;
-      break; // "bdver3"; 30h-3Fh: Steamroller
+      break; // 30h-3Fh: Steamroller
     }
     if ((Model >= 0x10 && Model <= 0x1f) || Model == 0x02) {
+      CPU = "bdver2";
       *Subtype = X86::AMDFAM15H_BDVER2;
-      break; // "bdver2"; 02h, 10h-1Fh: Piledriver
+      break; // 02h, 10h-1Fh: Piledriver
     }
     if (Model <= 0x0f) {
       *Subtype = X86::AMDFAM15H_BDVER1;
-      break; // "bdver1"; 00h-0Fh: Bulldozer
+      break; // 00h-0Fh: Bulldozer
     }
     break;
   case 22:
+    CPU = "btver2";
     *Type = X86::AMD_BTVER2;
-    break; // "btver2"
+    break;
   case 23:
+    CPU = "znver1";
     *Type = X86::AMDFAM17H;
     if ((Model >= 0x30 && Model <= 0x3f) || Model == 0x71) {
+      CPU = "znver2";
       *Subtype = X86::AMDFAM17H_ZNVER2;
-      break; // "znver2"; 30h-3fh, 71h: Zen2
+      break; // 30h-3fh, 71h: Zen2
     }
     if (Model <= 0x0f) {
       *Subtype = X86::AMDFAM17H_ZNVER1;
-      break; // "znver1"; 00h-0Fh: Zen1
+      break; // 00h-0Fh: Zen1
     }
     break;
   default:
-    break; // "generic"
+    break; // Unknown AMD CPU.
   }
+
+  return CPU;
 }
 
 static void getAvailableFeatures(unsigned ECX, unsigned EDX, unsigned MaxLeaf,
@@ -1161,26 +1138,23 @@ StringRef sys::getHostCPUName() {
   detectX86FamilyModel(EAX, &Family, &Model);
   getAvailableFeatures(ECX, EDX, MaxLeaf, Features);
 
+  // These aren't consumed in this file, but we try to keep some source code the
+  // same or similar to compiler-rt.
   unsigned Type = 0;
   unsigned Subtype = 0;
 
+  StringRef CPU;
+
   if (Vendor == SIG_INTEL) {
-    getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype);
+    CPU = getIntelProcessorTypeAndSubtype(Family, Model, Features, &Type,
+                                          &Subtype);
   } else if (Vendor == SIG_AMD) {
-    getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type, &Subtype);
+    CPU = getAMDProcessorTypeAndSubtype(Family, Model, Features, &Type,
+                                        &Subtype);
   }
 
-  // Check subtypes first since those are more specific.
-#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \
-  if (Subtype == X86::ENUM) \
-    return ARCHNAME;
-#include "llvm/Support/X86TargetParser.def"
-
-  // Now check types.
-#define X86_CPU_TYPE(ARCHNAME, ENUM) \
-  if (Type == X86::ENUM) \
-    return ARCHNAME;
-#include "llvm/Support/X86TargetParser.def"
+  if (!CPU.empty())
+    return CPU;
 
   return "generic";
 }

From 90c577a113e97212e02d5956d6db45e701e3552f Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 12 Jul 2020 12:58:23 -0700
Subject: [PATCH 055/771] [X86] Remove model number based detection for
 'pentiumpro', 'pentium2', 'pentium3', 'pentium-m', and 'yonah' from
 getHostCPUName.

For model 6 CPUs, we have a fallback detection method based on
available features. That mechanism should be enough to detect
these early family 6 CPUs as they only differ in the features
used by the detection anyway.
---
 llvm/lib/Support/Host.cpp | 29 -----------------------------
 1 file changed, 29 deletions(-)

diff --git a/llvm/lib/Support/Host.cpp b/llvm/lib/Support/Host.cpp
index 362b5850b394a..658c1ee74cfec 100644
--- a/llvm/lib/Support/Host.cpp
+++ b/llvm/lib/Support/Host.cpp
@@ -609,35 +609,6 @@ getIntelProcessorTypeAndSubtype(unsigned Family, unsigned Model,
     break;
   case 6:
     switch (Model) {
-    case 0x01: // Pentium Pro processor
-      CPU = "pentiumpro";
-      break;
-    case 0x03: // Intel Pentium II OverDrive processor, Pentium II processor,
-               // model 03
-    case 0x05: // Pentium II processor, model 05, Pentium II Xeon processor,
-               // model 05, and Intel Celeron processor, model 05
-    case 0x06: // Celeron processor, model 06
-      CPU = "pentium2";
-      break;
-    case 0x07: // Pentium III processor, model 07, and Pentium III Xeon
-               // processor, model 07
-    case 0x08: // Pentium III processor, model 08, Pentium III Xeon processor,
-               // model 08, and Celeron processor, model 08
-    case 0x0a: // Pentium III Xeon processor, model 0Ah
-    case 0x0b: // Pentium III processor, model 0Bh
-      CPU = "pentium3";
-      break;
-    case 0x09: // Intel Pentium M processor, Intel Celeron M processor model 09.
-    case 0x0d: // Intel Pentium M processor, Intel Celeron M processor, model
-               // 0Dh. All processors are manufactured using the 90 nm process.
-    case 0x15: // Intel EP80579 Integrated Processor and Intel EP80579
-               // Integrated Processor with Intel QuickAssist Technology
-      CPU = "pentium-m";
-      break;
-    case 0x0e: // Intel Core Duo processor, Intel Core Solo processor, model
-               // 0Eh. All processors are manufactured using the 65 nm process.
-      CPU = "yonah";
-      break;
     case 0x0f: // Intel Core 2 Duo processor, Intel Core 2 Duo mobile
                // processor, Intel Core 2 Quad processor, Intel Core 2 Quad
                // mobile processor, Intel Core 2 Extreme processor, Intel

From 572c2905aeaef00a6fedfc4c54f21856ba4cc34e Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Sun, 12 Jul 2020 14:11:39 -0700
Subject: [PATCH 056/771] [mlir][ODS] Add support for specifying the namespace
 of an interface.

The namespace can be specified using the `cppNamespace` field. This matches the functionality already present on dialects, enums, etc. This fixes problems with using interfaces on operations in a different namespace than the interface was defined in.

Differential Revision: https://reviews.llvm.org/D83604
---
 mlir/include/mlir/IR/OpAsmInterface.td            |  1 +
 mlir/include/mlir/IR/OpBase.td                    |  7 +++++++
 mlir/include/mlir/IR/OpImplementation.h           |  3 +--
 mlir/include/mlir/IR/SymbolInterfaces.td          |  1 +
 mlir/include/mlir/IR/SymbolTable.h                |  3 +--
 mlir/include/mlir/Interfaces/CallInterfaces.h     |  3 ++-
 mlir/include/mlir/Interfaces/CallInterfaces.td    |  2 ++
 .../mlir/Interfaces/ControlFlowInterfaces.h       | 13 +++++++------
 .../mlir/Interfaces/ControlFlowInterfaces.td      |  4 ++++
 mlir/include/mlir/Interfaces/CopyOpInterface.h    |  5 +----
 mlir/include/mlir/Interfaces/CopyOpInterface.td   |  1 +
 .../mlir/Interfaces/DerivedAttributeOpInterface.h |  3 +--
 .../Interfaces/DerivedAttributeOpInterface.td     |  1 +
 .../mlir/Interfaces/InferTypeOpInterface.h        |  5 +++--
 .../mlir/Interfaces/InferTypeOpInterface.td       |  2 ++
 mlir/include/mlir/Interfaces/LoopLikeInterface.h  |  5 +----
 mlir/include/mlir/Interfaces/LoopLikeInterface.td |  1 +
 .../mlir/Interfaces/SideEffectInterfaces.h        | 14 +++++++-------
 .../mlir/Interfaces/SideEffectInterfaces.td       |  7 +++++++
 .../mlir/Interfaces/VectorUnrollInterface.h       |  5 +----
 .../mlir/Interfaces/VectorUnrollInterface.td      |  1 +
 mlir/include/mlir/Interfaces/ViewLikeInterface.h  |  5 +----
 mlir/include/mlir/Interfaces/ViewLikeInterface.td |  1 +
 mlir/include/mlir/TableGen/Interfaces.h           |  3 +++
 mlir/include/mlir/TableGen/OpTrait.h              |  2 +-
 mlir/include/mlir/TableGen/SideEffects.h          |  2 +-
 mlir/lib/TableGen/Interfaces.cpp                  |  5 +++++
 mlir/lib/TableGen/OpTrait.cpp                     |  9 ++++++---
 mlir/lib/TableGen/Operator.cpp                    |  2 +-
 mlir/lib/TableGen/SideEffects.cpp                 |  8 ++++++--
 mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp       |  7 ++++---
 mlir/tools/mlir-tblgen/OpInterfacesGen.cpp        | 15 ++++++++++++++-
 32 files changed, 96 insertions(+), 50 deletions(-)

diff --git a/mlir/include/mlir/IR/OpAsmInterface.td b/mlir/include/mlir/IR/OpAsmInterface.td
index 752536a9e9a10..ec50288348c4c 100644
--- a/mlir/include/mlir/IR/OpAsmInterface.td
+++ b/mlir/include/mlir/IR/OpAsmInterface.td
@@ -22,6 +22,7 @@ def OpAsmOpInterface : OpInterface<"OpAsmOpInterface"> {
     This interface provides hooks to interact with the AsmPrinter and AsmParser
     classes.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     InterfaceMethod<[{
diff --git a/mlir/include/mlir/IR/OpBase.td b/mlir/include/mlir/IR/OpBase.td
index 4344d075bc34c..9cc57a6172894 100644
--- a/mlir/include/mlir/IR/OpBase.td
+++ b/mlir/include/mlir/IR/OpBase.td
@@ -1803,6 +1803,12 @@ class Interface<string name> {
   // The name given to the c++ interface class.
   string cppClassName = name;
 
+  // The C++ namespace that this interface should be placed into.
+  //
+  // To specify nested namespaces, use "::" as the delimiter, e.g., given
+  // "A::B", ops will be placed in `namespace A { namespace B { <def> } }`.
+  string cppNamespace = "";
+
   // The list of methods defined by this interface.
   list<InterfaceMethod> methods = [];
 
@@ -1838,6 +1844,7 @@ class DeclareOpInterfaceMethods<OpInterface interface,
       : OpInterface<interface.cppClassName> {
     let description = interface.description;
     let cppClassName = interface.cppClassName;
+    let cppNamespace = interface.cppNamespace;
     let methods = interface.methods;
 
     // This field contains a set of method names that should always have their
diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h
index 126d20eacbe49..20660be4347c1 100644
--- a/mlir/include/mlir/IR/OpImplementation.h
+++ b/mlir/include/mlir/IR/OpImplementation.h
@@ -764,6 +764,7 @@ class OpAsmDialectInterface
   virtual void getAsmBlockArgumentNames(Block *block,
                                         OpAsmSetValueNameFn setNameFn) const {}
 };
+} // end namespace mlir
 
 //===--------------------------------------------------------------------===//
 // Operation OpAsm interface.
@@ -772,6 +773,4 @@ class OpAsmDialectInterface
 /// The OpAsmOpInterface, see OpAsmInterface.td for more details.
 #include "mlir/IR/OpAsmInterface.h.inc"
 
-} // end namespace mlir
-
 #endif
diff --git a/mlir/include/mlir/IR/SymbolInterfaces.td b/mlir/include/mlir/IR/SymbolInterfaces.td
index 86b33aa36a607..148551324868a 100644
--- a/mlir/include/mlir/IR/SymbolInterfaces.td
+++ b/mlir/include/mlir/IR/SymbolInterfaces.td
@@ -27,6 +27,7 @@ def Symbol : OpInterface<"SymbolOpInterface"> {
     See [Symbols and SymbolTables](SymbolsAndSymbolTables.md) for more details
     and constraints on `Symbol` operations.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     InterfaceMethod<"Returns the name of this symbol.",
diff --git a/mlir/include/mlir/IR/SymbolTable.h b/mlir/include/mlir/IR/SymbolTable.h
index 0b035836ec61d..7e52011f81ffb 100644
--- a/mlir/include/mlir/IR/SymbolTable.h
+++ b/mlir/include/mlir/IR/SymbolTable.h
@@ -252,10 +252,9 @@ class SymbolTable : public TraitBase<ConcreteType, SymbolTable> {
 };
 
 } // end namespace OpTrait
+} // end namespace mlir
 
 /// Include the generated symbol interfaces.
 #include "mlir/IR/SymbolInterfaces.h.inc"
 
-} // end namespace mlir
-
 #endif // MLIR_IR_SYMBOLTABLE_H
diff --git a/mlir/include/mlir/Interfaces/CallInterfaces.h b/mlir/include/mlir/Interfaces/CallInterfaces.h
index ddfd5a942e49f..cc8e26eceba3f 100644
--- a/mlir/include/mlir/Interfaces/CallInterfaces.h
+++ b/mlir/include/mlir/Interfaces/CallInterfaces.h
@@ -23,8 +23,9 @@ namespace mlir {
 struct CallInterfaceCallable : public PointerUnion<SymbolRefAttr, Value> {
   using PointerUnion<SymbolRefAttr, Value>::PointerUnion;
 };
+} // end namespace mlir
 
+/// Include the generated interface declarations.
 #include "mlir/Interfaces/CallInterfaces.h.inc"
-} // end namespace mlir
 
 #endif // MLIR_INTERFACES_CALLINTERFACES_H
diff --git a/mlir/include/mlir/Interfaces/CallInterfaces.td b/mlir/include/mlir/Interfaces/CallInterfaces.td
index 18d927571d413..7db6730c5e992 100644
--- a/mlir/include/mlir/Interfaces/CallInterfaces.td
+++ b/mlir/include/mlir/Interfaces/CallInterfaces.td
@@ -29,6 +29,7 @@ def CallOpInterface : OpInterface<"CallOpInterface"> {
     indirect calls to other operations `call_indirect %foo`. An operation that
     uses this interface, must *not* also provide the `CallableOpInterface`.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     InterfaceMethod<[{
@@ -70,6 +71,7 @@ def CallableOpInterface : OpInterface<"CallableOpInterface"> {
     `%foo = dialect.create_function(...)`. These operations may only contain a
     single region, or subroutine.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     InterfaceMethod<[{
diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
index e18c46f745a2f..7e609ca13a097 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
@@ -70,12 +70,6 @@ class RegionSuccessor {
   ValueRange inputs;
 };
 
-//===----------------------------------------------------------------------===//
-// ControlFlow Interfaces
-//===----------------------------------------------------------------------===//
-
-#include "mlir/Interfaces/ControlFlowInterfaces.h.inc"
-
 //===----------------------------------------------------------------------===//
 // ControlFlow Traits
 //===----------------------------------------------------------------------===//
@@ -101,4 +95,11 @@ struct ReturnLike : public TraitBase<ConcreteType, ReturnLike> {
 
 } // end namespace mlir
 
+//===----------------------------------------------------------------------===//
+// ControlFlow Interfaces
+//===----------------------------------------------------------------------===//
+
+/// Include the generated interface declarations.
+#include "mlir/Interfaces/ControlFlowInterfaces.h.inc"
+
 #endif // MLIR_INTERFACES_CONTROLFLOWINTERFACES_H
diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
index 34c7bade6fe12..8b5a0b769ab17 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
@@ -25,6 +25,8 @@ def BranchOpInterface : OpInterface<"BranchOpInterface"> {
     This interface provides information for branching terminator operations,
     i.e. terminator operations with successors.
   }];
+  let cppNamespace = "::mlir";
+
   let methods = [
     InterfaceMethod<[{
         Returns a mutable range of operands that correspond to the arguments of
@@ -96,6 +98,8 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> {
     branching behavior between held regions, i.e. this interface allows for
     expressing control flow information for region holding operations.
   }];
+  let cppNamespace = "::mlir";
+
   let methods = [
     InterfaceMethod<[{
         Returns the operands of this operation used as the entry arguments when
diff --git a/mlir/include/mlir/Interfaces/CopyOpInterface.h b/mlir/include/mlir/Interfaces/CopyOpInterface.h
index d6dc409c2471c..2f38eb326b53e 100644
--- a/mlir/include/mlir/Interfaces/CopyOpInterface.h
+++ b/mlir/include/mlir/Interfaces/CopyOpInterface.h
@@ -15,10 +15,7 @@
 
 #include "mlir/IR/OpDefinition.h"
 
-namespace mlir {
-
+/// Include the generated interface declarations.
 #include "mlir/Interfaces/CopyOpInterface.h.inc"
 
-} // namespace mlir
-
 #endif // MLIR_INTERFACES_COPYOPINTERFACE_H_
diff --git a/mlir/include/mlir/Interfaces/CopyOpInterface.td b/mlir/include/mlir/Interfaces/CopyOpInterface.td
index 658474d70d86d..a503abc185d90 100644
--- a/mlir/include/mlir/Interfaces/CopyOpInterface.td
+++ b/mlir/include/mlir/Interfaces/CopyOpInterface.td
@@ -19,6 +19,7 @@ def CopyOpInterface : OpInterface<"CopyOpInterface"> {
   let description = [{
     A copy-like operation is one that copies from source value to target value.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     InterfaceMethod<
diff --git a/mlir/include/mlir/Interfaces/DerivedAttributeOpInterface.h b/mlir/include/mlir/Interfaces/DerivedAttributeOpInterface.h
index debafc2438d28..63cd09f5bc42c 100644
--- a/mlir/include/mlir/Interfaces/DerivedAttributeOpInterface.h
+++ b/mlir/include/mlir/Interfaces/DerivedAttributeOpInterface.h
@@ -15,8 +15,7 @@
 
 #include "mlir/IR/OpDefinition.h"
 
-namespace mlir {
+/// Include the generated interface declarations.
 #include "mlir/Interfaces/DerivedAttributeOpInterface.h.inc"
-} // namespace mlir
 
 #endif // MLIR_INTERFACES_DERIVEDATTRIBUTEOPINTERFACE_H_
diff --git a/mlir/include/mlir/Interfaces/DerivedAttributeOpInterface.td b/mlir/include/mlir/Interfaces/DerivedAttributeOpInterface.td
index e6f370752bcf2..92c901840790e 100644
--- a/mlir/include/mlir/Interfaces/DerivedAttributeOpInterface.td
+++ b/mlir/include/mlir/Interfaces/DerivedAttributeOpInterface.td
@@ -23,6 +23,7 @@ def DerivedAttributeOpInterface : OpInterface<"DerivedAttributeOpInterface"> {
     from information of the operation. ODS generates convenience accessors for
     derived attributes and can be used to simplify translations.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     StaticInterfaceMethod<
diff --git a/mlir/include/mlir/Interfaces/InferTypeOpInterface.h b/mlir/include/mlir/Interfaces/InferTypeOpInterface.h
index 67faeb56a51c9..1ae4aa688c84f 100644
--- a/mlir/include/mlir/Interfaces/InferTypeOpInterface.h
+++ b/mlir/include/mlir/Interfaces/InferTypeOpInterface.h
@@ -95,8 +95,6 @@ LogicalResult inferReturnTensorTypes(
 LogicalResult verifyInferredResultTypes(Operation *op);
 } // namespace detail
 
-#include "mlir/Interfaces/InferTypeOpInterface.h.inc"
-
 namespace OpTrait {
 
 /// Tensor type inference trait that constructs a tensor from the inferred
@@ -119,4 +117,7 @@ class InferTensorType : public TraitBase<ConcreteType, InferTensorType> {
 } // namespace OpTrait
 } // namespace mlir
 
+/// Include the generated interface declarations.
+#include "mlir/Interfaces/InferTypeOpInterface.h.inc"
+
 #endif // MLIR_INTERFACES_INFERTYPEOPINTERFACE_H_
diff --git a/mlir/include/mlir/Interfaces/InferTypeOpInterface.td b/mlir/include/mlir/Interfaces/InferTypeOpInterface.td
index 723cf99d38b31..c5132986ec976 100644
--- a/mlir/include/mlir/Interfaces/InferTypeOpInterface.td
+++ b/mlir/include/mlir/Interfaces/InferTypeOpInterface.td
@@ -25,6 +25,7 @@ def InferTypeOpInterface : OpInterface<"InferTypeOpInterface"> {
     Interface to infer the return types for an operation that could be used
     during op construction, verification or type inference.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     StaticInterfaceMethod<
@@ -73,6 +74,7 @@ def InferShapedTypeOpInterface : OpInterface<"InferShapedTypeOpInterface"> {
 
     The components consists of element type, shape and raw attribute.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     StaticInterfaceMethod<
diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.h b/mlir/include/mlir/Interfaces/LoopLikeInterface.h
index 5891470c9c6e1..48399ad0d53a8 100644
--- a/mlir/include/mlir/Interfaces/LoopLikeInterface.h
+++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.h
@@ -15,10 +15,7 @@
 
 #include "mlir/IR/OpDefinition.h"
 
-namespace mlir {
-
+/// Include the generated interface declarations.
 #include "mlir/Interfaces/LoopLikeInterface.h.inc"
 
-} // namespace mlir
-
 #endif // MLIR_INTERFACES_LOOPLIKEINTERFACE_H_
diff --git a/mlir/include/mlir/Interfaces/LoopLikeInterface.td b/mlir/include/mlir/Interfaces/LoopLikeInterface.td
index cc05030352e74..0e4191b97f97a 100644
--- a/mlir/include/mlir/Interfaces/LoopLikeInterface.td
+++ b/mlir/include/mlir/Interfaces/LoopLikeInterface.td
@@ -20,6 +20,7 @@ def LoopLikeOpInterface : OpInterface<"LoopLikeOpInterface"> {
     Encodes properties of a loop. Operations that implement this interface will
     be considered by loop-invariant code motion.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     InterfaceMethod<[{
diff --git a/mlir/include/mlir/Interfaces/SideEffectInterfaces.h b/mlir/include/mlir/Interfaces/SideEffectInterfaces.h
index 76932e2ef5293..181d218838ffb 100644
--- a/mlir/include/mlir/Interfaces/SideEffectInterfaces.h
+++ b/mlir/include/mlir/Interfaces/SideEffectInterfaces.h
@@ -215,13 +215,6 @@ struct Read : public Effect::Base<Read> {};
 struct Write : public Effect::Base<Write> {};
 } // namespace MemoryEffects
 
-//===----------------------------------------------------------------------===//
-// SideEffect Interfaces
-//===----------------------------------------------------------------------===//
-
-/// Include the definitions of the side effect interfaces.
-#include "mlir/Interfaces/SideEffectInterfaces.h.inc"
-
 //===----------------------------------------------------------------------===//
 // SideEffect Utilities
 //===----------------------------------------------------------------------===//
@@ -237,4 +230,11 @@ bool wouldOpBeTriviallyDead(Operation *op);
 
 } // end namespace mlir
 
+//===----------------------------------------------------------------------===//
+// SideEffect Interfaces
+//===----------------------------------------------------------------------===//
+
+/// Include the definitions of the side effect interfaces.
+#include "mlir/Interfaces/SideEffectInterfaces.h.inc"
+
 #endif // MLIR_INTERFACES_SIDEEFFECTS_H
diff --git a/mlir/include/mlir/Interfaces/SideEffectInterfaces.td b/mlir/include/mlir/Interfaces/SideEffectInterfaces.td
index 26f2a9a7e4555..2a4da16deec2d 100644
--- a/mlir/include/mlir/Interfaces/SideEffectInterfaces.td
+++ b/mlir/include/mlir/Interfaces/SideEffectInterfaces.td
@@ -142,6 +142,9 @@ class SideEffect<EffectOpInterfaceBase interface, string effectName,
   /// The parent interface that the effect belongs to.
   string interfaceTrait = interface.trait;
 
+  /// The cpp namespace of the interface trait.
+  string cppNamespace = interface.cppNamespace;
+
   /// The derived effect that is being applied.
   string effect = effectName;
 
@@ -156,6 +159,9 @@ class SideEffectsTraitBase<EffectOpInterfaceBase parentInterface,
   /// The name of the interface trait to use.
   let trait = parentInterface.trait;
 
+  /// The cpp namespace of the interface trait.
+  string cppNamespace = parentInterface.cppNamespace;
+
   /// The name of the base effects class.
   string baseEffectName = parentInterface.baseEffectName;
 
@@ -177,6 +183,7 @@ def MemoryEffectsOpInterface
     An interface used to query information about the memory effects applied by
     an operation.
   }];
+  let cppNamespace = "::mlir";
 }
 
 // The base class for defining specific memory effects.
diff --git a/mlir/include/mlir/Interfaces/VectorUnrollInterface.h b/mlir/include/mlir/Interfaces/VectorUnrollInterface.h
index a1cf39c17ebe8..a68cc3411533b 100644
--- a/mlir/include/mlir/Interfaces/VectorUnrollInterface.h
+++ b/mlir/include/mlir/Interfaces/VectorUnrollInterface.h
@@ -17,10 +17,7 @@
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/StandardTypes.h"
 
-namespace mlir {
-
+/// Include the generated interface declarations.
 #include "mlir/Interfaces/VectorUnrollInterface.h.inc"
 
-} // namespace mlir
-
 #endif // MLIR_INTERFACES_VECTORUNROLLINTERFACE_H
diff --git a/mlir/include/mlir/Interfaces/VectorUnrollInterface.td b/mlir/include/mlir/Interfaces/VectorUnrollInterface.td
index b9cff8bdab1d0..166780b20e772 100644
--- a/mlir/include/mlir/Interfaces/VectorUnrollInterface.td
+++ b/mlir/include/mlir/Interfaces/VectorUnrollInterface.td
@@ -19,6 +19,7 @@ def VectorUnrollOpInterface : OpInterface<"VectorUnrollOpInterface"> {
   let description = [{
     Encodes properties of an operation on vectors that can be unrolled.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     InterfaceMethod<[{
diff --git a/mlir/include/mlir/Interfaces/ViewLikeInterface.h b/mlir/include/mlir/Interfaces/ViewLikeInterface.h
index fe7dd803ccfba..8d319bbeee18f 100644
--- a/mlir/include/mlir/Interfaces/ViewLikeInterface.h
+++ b/mlir/include/mlir/Interfaces/ViewLikeInterface.h
@@ -15,10 +15,7 @@
 
 #include "mlir/IR/OpDefinition.h"
 
-namespace mlir {
-
+/// Include the generated interface declarations.
 #include "mlir/Interfaces/ViewLikeInterface.h.inc"
 
-} // namespace mlir
-
 #endif // MLIR_INTERFACES_VIEWLIKEINTERFACE_H_
diff --git a/mlir/include/mlir/Interfaces/ViewLikeInterface.td b/mlir/include/mlir/Interfaces/ViewLikeInterface.td
index 20b03b2315b18..bb00aff488b2a 100644
--- a/mlir/include/mlir/Interfaces/ViewLikeInterface.td
+++ b/mlir/include/mlir/Interfaces/ViewLikeInterface.td
@@ -21,6 +21,7 @@ def ViewLikeOpInterface : OpInterface<"ViewLikeOpInterface"> {
     takes in a (view of) buffer (and potentially some other operands) and returns
     another view of buffer.
   }];
+  let cppNamespace = "::mlir";
 
   let methods = [
     InterfaceMethod<
diff --git a/mlir/include/mlir/TableGen/Interfaces.h b/mlir/include/mlir/TableGen/Interfaces.h
index 4e12ed81fca12..a3462097e4802 100644
--- a/mlir/include/mlir/TableGen/Interfaces.h
+++ b/mlir/include/mlir/TableGen/Interfaces.h
@@ -76,6 +76,9 @@ class Interface {
   // Return the name of this interface.
   StringRef getName() const;
 
+  // Return the C++ namespace of this interface.
+  StringRef getCppNamespace() const;
+
   // Return the methods of this interface.
   ArrayRef<InterfaceMethod> getMethods() const;
 
diff --git a/mlir/include/mlir/TableGen/OpTrait.h b/mlir/include/mlir/TableGen/OpTrait.h
index 69c09b600d38d..cf8c506eb9f7e 100644
--- a/mlir/include/mlir/TableGen/OpTrait.h
+++ b/mlir/include/mlir/TableGen/OpTrait.h
@@ -98,7 +98,7 @@ class InterfaceOpTrait : public OpTrait {
   OpInterface getOpInterface() const;
 
   // Returns the trait corresponding to a C++ trait class.
-  StringRef getTrait() const;
+  std::string getTrait() const;
 
   static bool classof(const OpTrait *t) {
     return t->getKind() == Kind::Interface;
diff --git a/mlir/include/mlir/TableGen/SideEffects.h b/mlir/include/mlir/TableGen/SideEffects.h
index 4680105152526..7e464476cea11 100644
--- a/mlir/include/mlir/TableGen/SideEffects.h
+++ b/mlir/include/mlir/TableGen/SideEffects.h
@@ -30,7 +30,7 @@ class SideEffect : public Operator::VariableDecorator {
   StringRef getBaseEffectName() const;
 
   // Return the name of the Interface that the effect belongs to.
-  StringRef getInterfaceTrait() const;
+  std::string getInterfaceTrait() const;
 
   // Return the name of the resource class.
   StringRef getResource() const;
diff --git a/mlir/lib/TableGen/Interfaces.cpp b/mlir/lib/TableGen/Interfaces.cpp
index 0a6dd5f6a6425..1e6101f83caba 100644
--- a/mlir/lib/TableGen/Interfaces.cpp
+++ b/mlir/lib/TableGen/Interfaces.cpp
@@ -84,6 +84,11 @@ StringRef Interface::getName() const {
   return def->getValueAsString("cppClassName");
 }
 
+// Return the C++ namespace of this interface.
+StringRef Interface::getCppNamespace() const {
+  return def->getValueAsString("cppNamespace");
+}
+
 // Return the methods of this interface.
 ArrayRef<InterfaceMethod> Interface::getMethods() const { return methods; }
 
diff --git a/mlir/lib/TableGen/OpTrait.cpp b/mlir/lib/TableGen/OpTrait.cpp
index b32c647b2c958..dbfd0d374b83a 100644
--- a/mlir/lib/TableGen/OpTrait.cpp
+++ b/mlir/lib/TableGen/OpTrait.cpp
@@ -27,7 +27,7 @@ OpTrait OpTrait::create(const llvm::Init *init) {
     return OpTrait(Kind::Pred, def);
   if (def->isSubClassOf("GenInternalOpTrait"))
     return OpTrait(Kind::Internal, def);
-  if (def->isSubClassOf("OpInterface"))
+  if (def->isSubClassOf("OpInterfaceTrait"))
     return OpTrait(Kind::Interface, def);
   assert(def->isSubClassOf("NativeOpTrait"));
   return OpTrait(Kind::Native, def);
@@ -56,8 +56,11 @@ OpInterface InterfaceOpTrait::getOpInterface() const {
   return OpInterface(def);
 }
 
-llvm::StringRef InterfaceOpTrait::getTrait() const {
-  return def->getValueAsString("trait");
+std::string InterfaceOpTrait::getTrait() const {
+  llvm::StringRef trait = def->getValueAsString("trait");
+  llvm::StringRef cppNamespace = def->getValueAsString("cppNamespace");
+  return cppNamespace.empty() ? trait.str()
+                              : (cppNamespace + "::" + trait).str();
 }
 
 bool InterfaceOpTrait::shouldDeclareMethods() const {
diff --git a/mlir/lib/TableGen/Operator.cpp b/mlir/lib/TableGen/Operator.cpp
index 7e8b4d8160004..3dd924566a8fd 100644
--- a/mlir/lib/TableGen/Operator.cpp
+++ b/mlir/lib/TableGen/Operator.cpp
@@ -336,7 +336,7 @@ void tblgen::Operator::populateTypeInferenceInfo(
             llvm::formatv("{0}::Trait", inferTypeOpInterface).str()))
       return;
     if (const auto *opTrait = dyn_cast<tblgen::InterfaceOpTrait>(&trait))
-      if (opTrait->getTrait().startswith(inferTypeOpInterface))
+      if (&opTrait->getDef() == inferTrait)
         return;
 
     if (!def.isSubClassOf("AllTypesMatch"))
diff --git a/mlir/lib/TableGen/SideEffects.cpp b/mlir/lib/TableGen/SideEffects.cpp
index a2116ba3c37ba..286cacfdacf8b 100644
--- a/mlir/lib/TableGen/SideEffects.cpp
+++ b/mlir/lib/TableGen/SideEffects.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "mlir/TableGen/SideEffects.h"
+#include "llvm/ADT/Twine.h"
 #include "llvm/TableGen/Record.h"
 
 using namespace mlir;
@@ -24,8 +25,11 @@ StringRef SideEffect::getBaseEffectName() const {
   return def->getValueAsString("baseEffectName");
 }
 
-StringRef SideEffect::getInterfaceTrait() const {
-  return def->getValueAsString("interfaceTrait");
+std::string SideEffect::getInterfaceTrait() const {
+  StringRef trait = def->getValueAsString("interfaceTrait");
+  StringRef cppNamespace = def->getValueAsString("cppNamespace");
+  return cppNamespace.empty() ? trait.str()
+                              : (cppNamespace + "::" + trait).str();
 }
 
 StringRef SideEffect::getResource() const {
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index dcf40691e17f4..b2b4245989b58 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -887,7 +887,8 @@ static bool canGenerateUnwrappedBuilder(Operator &op) {
 }
 
 static bool canInferType(Operator &op) {
-  return op.getTrait("InferTypeOpInterface::Trait") && op.getNumRegions() == 0;
+  return op.getTrait("::mlir::InferTypeOpInterface::Trait") &&
+         op.getNumRegions() == 0;
 }
 
 void OpEmitter::genSeparateArgParamBuilder() {
@@ -1917,7 +1918,7 @@ void OpEmitter::genOpAsmInterface() {
   // TODO: We could also add a flag to allow operations to opt in to this
   // generation, even if they only have a single operation.
   int numResults = op.getNumResults();
-  if (numResults <= 1 || op.getTrait("OpAsmOpInterface::Trait"))
+  if (numResults <= 1 || op.getTrait("::mlir::OpAsmOpInterface::Trait"))
     return;
 
   SmallVector<StringRef, 4> resultNames(numResults);
@@ -1927,7 +1928,7 @@ void OpEmitter::genOpAsmInterface() {
   // Don't add the trait if none of the results have a valid name.
   if (llvm::all_of(resultNames, [](StringRef name) { return name.empty(); }))
     return;
-  opClass.addTrait("OpAsmOpInterface::Trait");
+  opClass.addTrait("::mlir::OpAsmOpInterface::Trait");
 
   // Generate the right accessor for the number of results.
   auto &method = opClass.newMethod("void", "getAsmResultNames",
diff --git a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
index 5a5501d42b7ef..8b27bc6de7c5e 100644
--- a/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpInterfacesGen.cpp
@@ -150,11 +150,16 @@ struct TypeInterfaceGenerator : public InterfaceGenerator {
 static void emitInterfaceDef(Interface interface, StringRef valueType,
                              raw_ostream &os) {
   StringRef interfaceName = interface.getName();
+  StringRef cppNamespace = interface.getCppNamespace();
+  cppNamespace.consume_front("::");
 
   // Insert the method definitions.
   bool isOpInterface = isa<OpInterface>(interface);
   for (auto &method : interface.getMethods()) {
-    emitCPPType(method.getReturnType(), os) << interfaceName << "::";
+    emitCPPType(method.getReturnType(), os);
+    if (!cppNamespace.empty())
+      os << cppNamespace << "::";
+    os << interfaceName << "::";
     emitMethodNameAndArgs(method, os, valueType, /*addThisArg=*/false,
                           /*addConst=*/!isOpInterface);
 
@@ -287,6 +292,11 @@ void InterfaceGenerator::emitTraitDecl(Interface &interface,
 }
 
 void InterfaceGenerator::emitInterfaceDecl(Interface interface) {
+  llvm::SmallVector<StringRef, 2> namespaces;
+  llvm::SplitString(interface.getCppNamespace(), namespaces, "::");
+  for (StringRef ns : namespaces)
+    os << "namespace " << ns << " {\n";
+
   StringRef interfaceName = interface.getName();
   auto interfaceTraitsName = (interfaceName + "InterfaceTraits").str();
 
@@ -321,6 +331,9 @@ void InterfaceGenerator::emitInterfaceDecl(Interface interface) {
     os << *extraDecls << "\n";
 
   os << "};\n";
+
+  for (StringRef ns : llvm::reverse(namespaces))
+    os << "} // namespace " << ns << "\n";
 }
 
 bool InterfaceGenerator::emitInterfaceDecls() {

From 0a01fc96e24b7c7de2141a2ea07593500ea34732 Mon Sep 17 00:00:00 2001
From: Alexey Lapshin <a.v.lapshin@mail.ru>
Date: Sun, 12 Jul 2020 22:46:37 +0300
Subject: [PATCH 057/771] Revert "[TRE] allow TRE for non-capturing calls."

This reverts commit f7907e9d223d8484f9afd457ba614c2db2ae4743.

That commit caused error on multi-stage build.
---
 .../Scalar/TailRecursionElimination.cpp       | 113 ++++++++++------
 llvm/test/Transforms/TailCallElim/basic.ll    |   7 +-
 .../TailCallElim/tre-multiple-exits.ll        | 125 ------------------
 .../tre-noncapturing-alloca-calls.ll          |  74 -----------
 4 files changed, 72 insertions(+), 247 deletions(-)
 delete mode 100644 llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
 delete mode 100644 llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll

diff --git a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
index bfd312a52ea58..5bb1d54d7d127 100644
--- a/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -81,7 +81,6 @@
 #include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
 using namespace llvm;
 
 #define DEBUG_TYPE "tailcallelim"
@@ -93,10 +92,7 @@ STATISTIC(NumAccumAdded, "Number of accumulators introduced");
 /// Scan the specified function for alloca instructions.
 /// If it contains any dynamic allocas, returns false.
 static bool canTRE(Function &F) {
-  // TODO: We don't do TRE if dynamic allocas are used.
-  // Dynamic allocas allocate stack space which should be
-  // deallocated before new iteration started. That is
-  // currently not implemented.
+  // Because of PR962, we don't TRE dynamic allocas.
   return llvm::all_of(instructions(F), [](Instruction &I) {
     auto *AI = dyn_cast<AllocaInst>(&I);
     return !AI || AI->isStaticAlloca();
@@ -189,9 +185,11 @@ struct AllocaDerivedValueTracker {
 };
 }
 
-static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
+static bool markTails(Function &F, bool &AllCallsAreTailCalls,
+                      OptimizationRemarkEmitter *ORE) {
   if (F.callsFunctionThatReturnsTwice())
     return false;
+  AllCallsAreTailCalls = true;
 
   // The local stack holds all alloca instructions and all byval arguments.
   AllocaDerivedValueTracker Tracker;
@@ -274,8 +272,11 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
         }
       }
 
-      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI))
+      if (!IsNoTail && Escaped == UNESCAPED && !Tracker.AllocaUsers.count(CI)) {
         DeferredTails.push_back(CI);
+      } else {
+        AllCallsAreTailCalls = false;
+      }
     }
 
     for (auto *SuccBB : make_range(succ_begin(BB), succ_end(BB))) {
@@ -312,6 +313,8 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
       LLVM_DEBUG(dbgs() << "Marked as tail call candidate: " << *CI << "\n");
       CI->setTailCall();
       Modified = true;
+    } else {
+      AllCallsAreTailCalls = false;
     }
   }
 
@@ -322,16 +325,7 @@ static bool markTails(Function &F, OptimizationRemarkEmitter *ORE) {
 /// instruction from after the call to before the call, assuming that all
 /// instructions between the call and this instruction are movable.
 ///
-static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA,
-                             DenseMap<Value *, AllocaInst *> &AllocaForValue) {
-  if (isa<DbgInfoIntrinsic>(I))
-    return true;
-
-  if (const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
-    if (II->getIntrinsicID() == Intrinsic::lifetime_end &&
-        llvm::findAllocaForValue(II->getArgOperand(1), AllocaForValue))
-      return true;
-
+static bool canMoveAboveCall(Instruction *I, CallInst *CI, AliasAnalysis *AA) {
   // FIXME: We can move load/store/call/free instructions above the call if the
   // call does not mod/ref the memory location being processed.
   if (I->mayHaveSideEffects())  // This also handles volatile loads.
@@ -398,6 +392,7 @@ class TailRecursionEliminator {
   // createTailRecurseLoopHeader the first time we find a call we can eliminate.
   BasicBlock *HeaderBB = nullptr;
   SmallVector<PHINode *, 8> ArgumentPHIs;
+  bool RemovableCallsMustBeMarkedTail = false;
 
   // PHI node to store our return value.
   PHINode *RetPN = nullptr;
@@ -419,15 +414,13 @@ class TailRecursionEliminator {
   // The instruction doing the accumulating.
   Instruction *AccumulatorRecursionInstr = nullptr;
 
-  // The cache for <value, alloca instruction> pairs.
-  DenseMap<Value *, AllocaInst *> AllocaForValue;
-
   TailRecursionEliminator(Function &F, const TargetTransformInfo *TTI,
                           AliasAnalysis *AA, OptimizationRemarkEmitter *ORE,
                           DomTreeUpdater &DTU)
       : F(F), TTI(TTI), AA(AA), ORE(ORE), DTU(DTU) {}
 
-  CallInst *findTRECandidate(Instruction *TI);
+  CallInst *findTRECandidate(Instruction *TI,
+                             bool CannotTailCallElimCallsMarkedTail);
 
   void createTailRecurseLoopHeader(CallInst *CI);
 
@@ -435,9 +428,11 @@ class TailRecursionEliminator {
 
   bool eliminateCall(CallInst *CI);
 
-  bool foldReturnAndProcessPred(ReturnInst *Ret);
+  bool foldReturnAndProcessPred(ReturnInst *Ret,
+                                bool CannotTailCallElimCallsMarkedTail);
 
-  bool processReturningBlock(ReturnInst *Ret);
+  bool processReturningBlock(ReturnInst *Ret,
+                             bool CannotTailCallElimCallsMarkedTail);
 
   void cleanupAndFinalize();
 
@@ -448,7 +443,8 @@ class TailRecursionEliminator {
 };
 } // namespace
 
-CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) {
+CallInst *TailRecursionEliminator::findTRECandidate(
+    Instruction *TI, bool CannotTailCallElimCallsMarkedTail) {
   BasicBlock *BB = TI->getParent();
 
   if (&BB->front() == TI) // Make sure there is something before the terminator.
@@ -468,9 +464,9 @@ CallInst *TailRecursionEliminator::findTRECandidate(Instruction *TI) {
     --BBI;
   }
 
-  assert((!CI->isTailCall() || !CI->isNoTailCall()) &&
-         "Incompatible call site attributes(Tail,NoTail)");
-  if (!CI->isTailCall())
+  // If this call is marked as a tail call, and if there are dynamic allocas in
+  // the function, we cannot perform this optimization.
+  if (CI->isTailCall() && CannotTailCallElimCallsMarkedTail)
     return nullptr;
 
   // As a special case, detect code like this:
@@ -502,13 +498,26 @@ void TailRecursionEliminator::createTailRecurseLoopHeader(CallInst *CI) {
   BranchInst *BI = BranchInst::Create(HeaderBB, NewEntry);
   BI->setDebugLoc(CI->getDebugLoc());
 
-  // Move all fixed sized allocas from HeaderBB to NewEntry.
-  for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
-                            NEBI = NewEntry->begin();
-       OEBI != E;)
-    if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
-      if (isa<ConstantInt>(AI->getArraySize()))
-        AI->moveBefore(&*NEBI);
+  // If this function has self recursive calls in the tail position where some
+  // are marked tail and some are not, only transform one flavor or another.
+  // We have to choose whether we move allocas in the entry block to the new
+  // entry block or not, so we can't make a good choice for both. We make this
+  // decision here based on whether the first call we found to remove is
+  // marked tail.
+  // NOTE: We could do slightly better here in the case that the function has
+  // no entry block allocas.
+  RemovableCallsMustBeMarkedTail = CI->isTailCall();
+
+  // If this tail call is marked 'tail' and if there are any allocas in the
+  // entry block, move them up to the new entry block.
+  if (RemovableCallsMustBeMarkedTail)
+    // Move all fixed sized allocas from HeaderBB to NewEntry.
+    for (BasicBlock::iterator OEBI = HeaderBB->begin(), E = HeaderBB->end(),
+                              NEBI = NewEntry->begin();
+         OEBI != E;)
+      if (AllocaInst *AI = dyn_cast<AllocaInst>(OEBI++))
+        if (isa<ConstantInt>(AI->getArraySize()))
+          AI->moveBefore(&*NEBI);
 
   // Now that we have created a new block, which jumps to the entry
   // block, insert a PHI node for each argument of the function.
@@ -583,7 +592,7 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
   Instruction *AccRecInstr = nullptr;
   BasicBlock::iterator BBI(CI);
   for (++BBI; &*BBI != Ret; ++BBI) {
-    if (canMoveAboveCall(&*BBI, CI, AA, AllocaForValue))
+    if (canMoveAboveCall(&*BBI, CI, AA))
       continue;
 
     // If we can't move the instruction above the call, it might be because it
@@ -611,6 +620,9 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
   if (!HeaderBB)
     createTailRecurseLoopHeader(CI);
 
+  if (RemovableCallsMustBeMarkedTail && !CI->isTailCall())
+    return false;
+
   // Ok, now that we know we have a pseudo-entry block WITH all of the
   // required PHI nodes, add entries into the PHI node for the actual
   // parameters passed into the tail-recursive call.
@@ -660,7 +672,8 @@ bool TailRecursionEliminator::eliminateCall(CallInst *CI) {
   return true;
 }
 
-bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
+bool TailRecursionEliminator::foldReturnAndProcessPred(
+    ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
   BasicBlock *BB = Ret->getParent();
 
   bool Change = false;
@@ -685,7 +698,8 @@ bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
   while (!UncondBranchPreds.empty()) {
     BranchInst *BI = UncondBranchPreds.pop_back_val();
     BasicBlock *Pred = BI->getParent();
-    if (CallInst *CI = findTRECandidate(BI)) {
+    if (CallInst *CI =
+            findTRECandidate(BI, CannotTailCallElimCallsMarkedTail)) {
       LLVM_DEBUG(dbgs() << "FOLDING: " << *BB
                         << "INTO UNCOND BRANCH PRED: " << *Pred);
       FoldReturnIntoUncondBranch(Ret, BB, Pred, &DTU);
@@ -706,8 +720,9 @@ bool TailRecursionEliminator::foldReturnAndProcessPred(ReturnInst *Ret) {
   return Change;
 }
 
-bool TailRecursionEliminator::processReturningBlock(ReturnInst *Ret) {
-  CallInst *CI = findTRECandidate(Ret);
+bool TailRecursionEliminator::processReturningBlock(
+    ReturnInst *Ret, bool CannotTailCallElimCallsMarkedTail) {
+  CallInst *CI = findTRECandidate(Ret, CannotTailCallElimCallsMarkedTail);
   if (!CI)
     return false;
 
@@ -795,25 +810,35 @@ bool TailRecursionEliminator::eliminate(Function &F,
     return false;
 
   bool MadeChange = false;
-  MadeChange |= markTails(F, ORE);
+  bool AllCallsAreTailCalls = false;
+  MadeChange |= markTails(F, AllCallsAreTailCalls, ORE);
+  if (!AllCallsAreTailCalls)
+    return MadeChange;
 
   // If this function is a varargs function, we won't be able to PHI the args
   // right, so don't even try to convert it...
   if (F.getFunctionType()->isVarArg())
     return MadeChange;
 
-  if (!canTRE(F))
-    return MadeChange;
+  // If false, we cannot perform TRE on tail calls marked with the 'tail'
+  // attribute, because doing so would cause the stack size to increase (real
+  // TRE would deallocate variable sized allocas, TRE doesn't).
+  bool CanTRETailMarkedCall = canTRE(F);
 
   TailRecursionEliminator TRE(F, TTI, AA, ORE, DTU);
 
   // Change any tail recursive calls to loops.
+  //
+  // FIXME: The code generator produces really bad code when an 'escaping
+  // alloca' is changed from being a static alloca to being a dynamic alloca.
+  // Until this is resolved, disable this transformation if that would ever
+  // happen.  This bug is PR962.
   for (Function::iterator BBI = F.begin(), E = F.end(); BBI != E; /*in loop*/) {
     BasicBlock *BB = &*BBI++; // foldReturnAndProcessPred may delete BB.
     if (ReturnInst *Ret = dyn_cast<ReturnInst>(BB->getTerminator())) {
-      bool Change = TRE.processReturningBlock(Ret);
+      bool Change = TRE.processReturningBlock(Ret, !CanTRETailMarkedCall);
       if (!Change && BB->getFirstNonPHIOrDbg() == Ret)
-        Change = TRE.foldReturnAndProcessPred(Ret);
+        Change = TRE.foldReturnAndProcessPred(Ret, !CanTRETailMarkedCall);
       MadeChange |= Change;
     }
   }
diff --git a/llvm/test/Transforms/TailCallElim/basic.ll b/llvm/test/Transforms/TailCallElim/basic.ll
index 669210da6314b..6116014a024b1 100644
--- a/llvm/test/Transforms/TailCallElim/basic.ll
+++ b/llvm/test/Transforms/TailCallElim/basic.ll
@@ -12,16 +12,15 @@ define void @test0() {
 	ret void
 }
 
-; Make sure that we do not do TRE if pointer to local stack
-; escapes through function call.
+; PR615. Make sure that we do not move the alloca so that it interferes with the tail call.
 define i32 @test1() {
 ; CHECK: i32 @test1()
 ; CHECK-NEXT: alloca
 	%A = alloca i32		; <i32*> [#uses=2]
 	store i32 5, i32* %A
 	call void @use(i32* %A)
-; CHECK: call i32 @test1
-	%X = call i32 @test1()		; <i32> [#uses=1]
+; CHECK: tail call i32 @test1
+	%X = tail call i32 @test1()		; <i32> [#uses=1]
 	ret i32 %X
 }
 
diff --git a/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll b/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
deleted file mode 100644
index 8f69087dd879d..0000000000000
--- a/llvm/test/Transforms/TailCallElim/tre-multiple-exits.ll
+++ /dev/null
@@ -1,125 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
-
-; This test checks that TRE would be done for only one recursive call.
-; The test_multiple_exits function has three recursive calls.
-; First recursive call could not be eliminated because there is
-; escaped pointer to local variable. Second recursive call could
-; be eliminated. Thrid recursive call could not be eliminated since
-; this is not last call. Thus, test checks that TRE would be done
-; for only second recursive call.
-
-; IR for that test was generated from the following C++ source:
-;
-; void capture_arg (int*);
-; void test_multiple_exits (int param);
-;   if (param >= 0 && param < 10) {
-;     int temp;
-;     capture_arg(&temp);
-;     // TRE could not be done because pointer to local
-;     // variable "temp" is escaped.
-;     test_multiple_exits(param + 1);
-;   } else if (param >=10 && param < 20) {
-;     // TRE should be done.
-;     test_multiple_exits(param + 1);
-;   } else if (param >= 20 && param < 22) {
-;     // TRE could not be done since recursive
-;     // call is not last call.
-;     test_multiple_exits(param + 1);
-;     func();
-;   }
-;
-;   return;
-; }
-
-; Function Attrs: noinline optnone uwtable
-declare void @_Z11capture_argPi(i32* %param) #0
-
-; Function Attrs: noinline optnone uwtable
-declare void @_Z4funcv() #0
-
-; Function Attrs: noinline nounwind uwtable
-define dso_local void @_Z19test_multiple_exitsi(i32 %param) local_unnamed_addr #2 {
-; CHECK-LABEL: @_Z19test_multiple_exitsi(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TEMP:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
-; CHECK:       tailrecurse:
-; CHECK-NEXT:    [[PARAM_TR:%.*]] = phi i32 [ [[PARAM:%.*]], [[ENTRY:%.*]] ], [ [[ADD6:%.*]], [[IF_THEN5:%.*]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = icmp ult i32 [[PARAM_TR]], 10
-; CHECK-NEXT:    br i1 [[TMP0]], label [[IF_THEN:%.*]], label [[IF_ELSE:%.*]]
-; CHECK:       if.then:
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TEMP]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP1]]) #1
-; CHECK-NEXT:    call void @_Z11capture_argPi(i32* nonnull [[TEMP]])
-; CHECK-NEXT:    [[ADD:%.*]] = add nuw nsw i32 [[PARAM_TR]], 1
-; CHECK-NEXT:    call void @_Z19test_multiple_exitsi(i32 [[ADD]])
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP1]]) #1
-; CHECK-NEXT:    br label [[IF_END14:%.*]]
-; CHECK:       if.else:
-; CHECK-NEXT:    [[PARAM_OFF:%.*]] = add i32 [[PARAM_TR]], -10
-; CHECK-NEXT:    [[TMP2:%.*]] = icmp ult i32 [[PARAM_OFF]], 10
-; CHECK-NEXT:    br i1 [[TMP2]], label [[IF_THEN5]], label [[IF_ELSE7:%.*]]
-; CHECK:       if.then5:
-; CHECK-NEXT:    [[ADD6]] = add nuw nsw i32 [[PARAM_TR]], 1
-; CHECK-NEXT:    br label [[TAILRECURSE]]
-; CHECK:       if.else7:
-; CHECK-NEXT:    [[TMP3:%.*]] = and i32 [[PARAM_TR]], -2
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[TMP3]], 20
-; CHECK-NEXT:    br i1 [[TMP4]], label [[IF_THEN11:%.*]], label [[IF_END14]]
-; CHECK:       if.then11:
-; CHECK-NEXT:    [[ADD12:%.*]] = add nsw i32 [[PARAM_TR]], 1
-; CHECK-NEXT:    tail call void @_Z19test_multiple_exitsi(i32 [[ADD12]])
-; CHECK-NEXT:    tail call void @_Z4funcv()
-; CHECK-NEXT:    ret void
-; CHECK:       if.end14:
-; CHECK-NEXT:    ret void
-;
-entry:
-  %temp = alloca i32, align 4
-  %0 = icmp ult i32 %param, 10
-  br i1 %0, label %if.then, label %if.else
-
-if.then:                                          ; preds = %entry
-  %1 = bitcast i32* %temp to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #2
-  call void @_Z11capture_argPi(i32* nonnull %temp)
-  %add = add nuw nsw i32 %param, 1
-  call void @_Z19test_multiple_exitsi(i32 %add)
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #2
-  br label %if.end14
-
-if.else:                                          ; preds = %entry
-  %param.off = add i32 %param, -10
-  %2 = icmp ult i32 %param.off, 10
-  br i1 %2, label %if.then5, label %if.else7
-
-if.then5:                                         ; preds = %if.else
-  %add6 = add nuw nsw i32 %param, 1
-  call void @_Z19test_multiple_exitsi(i32 %add6)
-  br label %if.end14
-
-if.else7:                                         ; preds = %if.else
-  %3 = and i32 %param, -2
-  %4 = icmp eq i32 %3, 20
-  br i1 %4, label %if.then11, label %if.end14
-
-if.then11:                                        ; preds = %if.else7
-  %add12 = add nsw i32 %param, 1
-  call void @_Z19test_multiple_exitsi(i32 %add12)
-  call void @_Z4funcv()
-  br label %if.end14
-
-if.end14:                                         ; preds = %if.then5, %if.then11, %if.else7, %if.then
-  ret void
-}
-
-; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
-
-; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
-
-attributes #0 = { nofree noinline norecurse nounwind uwtable }
-attributes #1 = { nounwind uwtable }
-attributes #2 = { argmemonly nounwind willreturn }
diff --git a/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll b/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
deleted file mode 100644
index 2168437fc5706..0000000000000
--- a/llvm/test/Transforms/TailCallElim/tre-noncapturing-alloca-calls.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -tailcallelim -verify-dom-info -S | FileCheck %s
-
-; IR for that test was generated from the following C++ source:
-;
-;int count;
-;__attribute__((noinline)) void globalIncrement(const int* param) { count += *param; }
-;
-;void test(int recurseCount)
-;{
-;    if (recurseCount == 0) return;
-;    int temp = 10;
-;    globalIncrement(&temp);
-;    test(recurseCount - 1);
-;}
-;
-
-@count = dso_local local_unnamed_addr global i32 0, align 4
-
-; Function Attrs: nofree noinline norecurse nounwind uwtable
-declare void @_Z15globalIncrementPKi(i32* nocapture readonly %param) #0
-
-; Test that TRE could be done for recursive tail routine containing
-; call to function receiving a pointer to local stack.
-
-; Function Attrs: nounwind uwtable
-define dso_local void @_Z4testi(i32 %recurseCount) local_unnamed_addr #1 {
-; CHECK-LABEL: @_Z4testi(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TEMP:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    br label [[TAILRECURSE:%.*]]
-; CHECK:       tailrecurse:
-; CHECK-NEXT:    [[RECURSECOUNT_TR:%.*]] = phi i32 [ [[RECURSECOUNT:%.*]], [[ENTRY:%.*]] ], [ [[SUB:%.*]], [[IF_END:%.*]] ]
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[RECURSECOUNT_TR]], 0
-; CHECK-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[IF_END]]
-; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[TEMP]] to i8*
-; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull [[TMP0]])
-; CHECK-NEXT:    store i32 10, i32* [[TEMP]], align 4
-; CHECK-NEXT:    call void @_Z15globalIncrementPKi(i32* nonnull [[TEMP]])
-; CHECK-NEXT:    [[SUB]] = add nsw i32 [[RECURSECOUNT_TR]], -1
-; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP0]])
-; CHECK-NEXT:    br label [[TAILRECURSE]]
-; CHECK:       return:
-; CHECK-NEXT:    ret void
-;
-entry:
-  %temp = alloca i32, align 4
-  %cmp = icmp eq i32 %recurseCount, 0
-  br i1 %cmp, label %return, label %if.end
-
-if.end:                                           ; preds = %entry
-  %0 = bitcast i32* %temp to i8*
-  call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #6
-  store i32 10, i32* %temp, align 4
-  call void @_Z15globalIncrementPKi(i32* nonnull %temp)
-  %sub = add nsw i32 %recurseCount, -1
-  call void @_Z4testi(i32 %sub)
-  call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #6
-  br label %return
-
-return:                                           ; preds = %entry, %if.end
-  ret void
-}
-
-; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #2
-
-; Function Attrs: argmemonly nounwind willreturn
-declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #2
-
-attributes #0 = { nofree noinline norecurse nounwind uwtable }
-attributes #1 = { nounwind uwtable }
-attributes #2 = { argmemonly nounwind willreturn }

From 02cfa7530d9e7cfd8ea940dab4173afb7938b831 Mon Sep 17 00:00:00 2001
From: Dimitry Andric <dimitry@andric.com>
Date: Sun, 12 Jul 2020 20:32:26 +0200
Subject: [PATCH 058/771] Bump the default target CPU for i386-freebsd to i686

Summary:
Similar to what we have done downstream, some time ago:
https://svnweb.freebsd.org/changeset/base/353936

This followed some discussions on the freebsd-arch mailing lists, and
most people agreed that it was a better default, and also it worked
around several issues where clang generated libcalls to 64 bit atomic
primitives, instead of using cmpxchg8b.

Reviewers: emaste, brooks, rsmith

Reviewed By: emaste

Subscribers: arichardson, krytarowski, jfb, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83645
---
 clang/lib/Driver/ToolChains/Arch/X86.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang/lib/Driver/ToolChains/Arch/X86.cpp b/clang/lib/Driver/ToolChains/Arch/X86.cpp
index aa95c4189d1e2..2cc44c09917f5 100644
--- a/clang/lib/Driver/ToolChains/Arch/X86.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/X86.cpp
@@ -94,6 +94,7 @@ const char *x86::getX86TargetCPU(const ArgList &Args,
 
   switch (Triple.getOS()) {
   case llvm::Triple::FreeBSD:
+    return "i686";
   case llvm::Triple::NetBSD:
   case llvm::Triple::OpenBSD:
     return "i486";

From c73f425f84ad18e4b610dff7d21a5844fb0da5d7 Mon Sep 17 00:00:00 2001
From: Shinji Okumura <okuraofvegetable@gmail.com>
Date: Sun, 12 Jul 2020 19:11:49 +0900
Subject: [PATCH 059/771] [Attributor] Add
 AAValueSimplifyCallSiteArgument::manifest

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D82861
---
 .../Transforms/IPO/AttributorAttributes.cpp   | 24 +++++++
 llvm/test/Transforms/Attributor/range.ll      | 65 +++++++++++++++++++
 2 files changed, 89 insertions(+)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index dbc1541b9950e..7e9fd61eeb41e 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -4670,6 +4670,30 @@ struct AAValueSimplifyCallSiteArgument : AAValueSimplifyFloating {
   AAValueSimplifyCallSiteArgument(const IRPosition &IRP, Attributor &A)
       : AAValueSimplifyFloating(IRP, A) {}
 
+  /// See AbstractAttribute::manifest(...).
+  ChangeStatus manifest(Attributor &A) override {
+    ChangeStatus Changed = ChangeStatus::UNCHANGED;
+
+    if (SimplifiedAssociatedValue.hasValue() &&
+        !SimplifiedAssociatedValue.getValue())
+      return Changed;
+
+    Value &V = getAssociatedValue();
+    auto *C = SimplifiedAssociatedValue.hasValue()
+                  ? dyn_cast<Constant>(SimplifiedAssociatedValue.getValue())
+                  : UndefValue::get(V.getType());
+    if (C) {
+      Use &U = cast<CallBase>(&getAnchorValue())->getArgOperandUse(getArgNo());
+      // We can replace the AssociatedValue with the constant.
+      if (&V != C && V.getType() == C->getType()) {
+        if (A.changeUseAfterManifest(U, *C))
+          Changed = ChangeStatus::CHANGED;
+      }
+    }
+
+    return Changed | AAValueSimplify::manifest(A);
+  }
+
   void trackStatistics() const override {
     STATS_DECLTRACK_CSARG_ATTR(value_simplify)
   }
diff --git a/llvm/test/Transforms/Attributor/range.ll b/llvm/test/Transforms/Attributor/range.ll
index 03338b4ce4999..f105bb3fad0e1 100644
--- a/llvm/test/Transforms/Attributor/range.ll
+++ b/llvm/test/Transforms/Attributor/range.ll
@@ -1063,6 +1063,71 @@ end:
 }
 
 
+define i32 @func(i1 %c) {
+; CHECK-LABEL: define {{[^@]+}}@func
+; CHECK-SAME: (i1 [[C:%.*]])
+; CHECK-NEXT:    [[RET:%.*]] = select i1 [[C]], i32 0, i32 1
+; CHECK-NEXT:    ret i32 [[RET]]
+;
+  %ret = select i1 %c, i32 0, i32 1
+  ret i32 %ret
+}
+
+define i32 @simplify_callsite_argument(i1 %d) {
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@simplify_callsite_argument
+; IS__TUNIT_OPM-SAME: (i1 [[D:%.*]])
+; IS__TUNIT_OPM-NEXT:    [[C:%.*]] = select i1 [[D]], i1 true, i1 false
+; IS__TUNIT_OPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; IS__TUNIT_OPM:       t:
+; IS__TUNIT_OPM-NEXT:    [[RET1:%.*]] = call i32 @func(i1 [[C]]) #2, !range !3
+; IS__TUNIT_OPM-NEXT:    ret i32 [[RET1]]
+; IS__TUNIT_OPM:       f:
+; IS__TUNIT_OPM-NEXT:    [[RET2:%.*]] = call i32 @func(i1 false) #2, !range !3
+; IS__TUNIT_OPM-NEXT:    ret i32 [[RET2]]
+;
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@simplify_callsite_argument
+; IS__TUNIT_NPM-SAME: (i1 [[D:%.*]])
+; IS__TUNIT_NPM-NEXT:    [[C:%.*]] = select i1 [[D]], i1 true, i1 false
+; IS__TUNIT_NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; IS__TUNIT_NPM:       t:
+; IS__TUNIT_NPM-NEXT:    [[RET1:%.*]] = call i32 @func(i1 true) #1, !range !4
+; IS__TUNIT_NPM-NEXT:    ret i32 [[RET1]]
+; IS__TUNIT_NPM:       f:
+; IS__TUNIT_NPM-NEXT:    [[RET2:%.*]] = call i32 @func(i1 false) #1, !range !4
+; IS__TUNIT_NPM-NEXT:    ret i32 [[RET2]]
+;
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@simplify_callsite_argument
+; IS__CGSCC_OPM-SAME: (i1 [[D:%.*]])
+; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = select i1 [[D]], i1 true, i1 false
+; IS__CGSCC_OPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; IS__CGSCC_OPM:       t:
+; IS__CGSCC_OPM-NEXT:    [[RET1:%.*]] = call i32 @func(i1 [[C]])
+; IS__CGSCC_OPM-NEXT:    ret i32 [[RET1]]
+; IS__CGSCC_OPM:       f:
+; IS__CGSCC_OPM-NEXT:    [[RET2:%.*]] = call i32 @func(i1 false)
+; IS__CGSCC_OPM-NEXT:    ret i32 [[RET2]]
+;
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@simplify_callsite_argument
+; IS__CGSCC_NPM-SAME: (i1 [[D:%.*]])
+; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = select i1 [[D]], i1 true, i1 false
+; IS__CGSCC_NPM-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; IS__CGSCC_NPM:       t:
+; IS__CGSCC_NPM-NEXT:    [[RET1:%.*]] = call i32 @func(i1 true)
+; IS__CGSCC_NPM-NEXT:    ret i32 [[RET1]]
+; IS__CGSCC_NPM:       f:
+; IS__CGSCC_NPM-NEXT:    [[RET2:%.*]] = call i32 @func(i1 false)
+; IS__CGSCC_NPM-NEXT:    ret i32 [[RET2]]
+;
+  %c = select i1 %d, i1 true, i1 false
+  br i1 %c, label %t, label %f
+t:
+  %ret1 = call i32 @func(i1 %c)
+  ret i32 %ret1
+f:
+  %ret2 = call i32 @func(i1 false)
+  ret i32 %ret2
+}
+
 !0 = !{i32 0, i32 10}
 !1 = !{i32 10, i32 100}
 

From 111167895d47558989f9f3a593a82527b016c7e7 Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Sun, 12 Jul 2020 15:38:37 -0700
Subject: [PATCH 060/771] [clang] Add -Wsuggest-override

This patch adds `-Wsuggest-override`, which allows for more aggressive enforcement of modern C++ best practices, as well as better compatibility with gcc, which has had its own `-Wsuggest-override` since version 5.1.

Clang already has `-Winconsistent-missing-override`, which only warns in the case where there is at least one function already marked `override` in a class. This warning strengthens that warning by suggesting the `override` keyword regardless of whether it is already present anywhere.

The text between suggest-override and inconsistent-missing-override is now shared, using `TextSubstitution` for the entire diagnostic text.

Reviewed By: dblaikie

Differential Revision: https://reviews.llvm.org/D82728
---
 clang/include/clang/Basic/DiagnosticGroups.td |  7 +++-
 .../clang/Basic/DiagnosticSemaKinds.td        | 22 ++++++++---
 clang/include/clang/Sema/Sema.h               |  2 +-
 clang/lib/Sema/SemaDeclCXX.cpp                | 33 +++++++++-------
 .../SemaCXX/warn-suggest-destructor-override  | 27 +++++++++++++
 clang/test/SemaCXX/warn-suggest-override      | 38 +++++++++++++++++++
 6 files changed, 107 insertions(+), 22 deletions(-)
 create mode 100644 clang/test/SemaCXX/warn-suggest-destructor-override
 create mode 100644 clang/test/SemaCXX/warn-suggest-override

diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td
index 6a50ceef41913..1e829be4028e4 100644
--- a/clang/include/clang/Basic/DiagnosticGroups.td
+++ b/clang/include/clang/Basic/DiagnosticGroups.td
@@ -280,9 +280,12 @@ def CXX98CompatPedantic : DiagGroup<"c++98-compat-pedantic",
 
 def CXX11Narrowing : DiagGroup<"c++11-narrowing">;
 
-def CXX11WarnOverrideDestructor :
+def CXX11WarnInconsistentOverrideDestructor :
   DiagGroup<"inconsistent-missing-destructor-override">;
-def CXX11WarnOverrideMethod : DiagGroup<"inconsistent-missing-override">;
+def CXX11WarnInconsistentOverrideMethod :
+  DiagGroup<"inconsistent-missing-override">;
+def CXX11WarnSuggestOverrideDestructor : DiagGroup<"suggest-destructor-override">;
+def CXX11WarnSuggestOverride : DiagGroup<"suggest-override">;
 
 // Original name of this warning in Clang
 def : DiagGroup<"c++0x-narrowing", [CXX11Narrowing]>;
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 24e942037ecfb..71517edd6659b 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2367,12 +2367,22 @@ def override_keyword_hides_virtual_member_function : Error<
   "%select{function|functions}1">;
 def err_function_marked_override_not_overriding : Error<
   "%0 marked 'override' but does not override any member functions">;
-def warn_destructor_marked_not_override_overriding : Warning <
-  "%0 overrides a destructor but is not marked 'override'">,
-  InGroup<CXX11WarnOverrideDestructor>, DefaultIgnore;
-def warn_function_marked_not_override_overriding : Warning <
-  "%0 overrides a member function but is not marked 'override'">,
-  InGroup<CXX11WarnOverrideMethod>;
+def warn_destructor_marked_not_override_overriding : TextSubstitution <
+  "%0 overrides a destructor but is not marked 'override'">;
+def warn_function_marked_not_override_overriding : TextSubstitution <
+  "%0 overrides a member function but is not marked 'override'">;
+def warn_inconsistent_destructor_marked_not_override_overriding : Warning <
+  "%sub{warn_destructor_marked_not_override_overriding}0">,
+  InGroup<CXX11WarnInconsistentOverrideDestructor>, DefaultIgnore;
+def warn_inconsistent_function_marked_not_override_overriding : Warning <
+  "%sub{warn_function_marked_not_override_overriding}0">,
+  InGroup<CXX11WarnInconsistentOverrideMethod>;
+def warn_suggest_destructor_marked_not_override_overriding : Warning <
+  "%sub{warn_destructor_marked_not_override_overriding}0">,
+  InGroup<CXX11WarnSuggestOverrideDestructor>, DefaultIgnore;
+def warn_suggest_function_marked_not_override_overriding : Warning <
+  "%sub{warn_function_marked_not_override_overriding}0">,
+  InGroup<CXX11WarnSuggestOverride>, DefaultIgnore;
 def err_class_marked_final_used_as_base : Error<
   "base %0 is marked '%select{final|sealed}1'">;
 def warn_abstract_final_class : Warning<
diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h
index e75ac185eb2cb..6f7ad8076718d 100644
--- a/clang/include/clang/Sema/Sema.h
+++ b/clang/include/clang/Sema/Sema.h
@@ -6965,7 +6965,7 @@ class Sema final {
 
   /// DiagnoseAbsenceOfOverrideControl - Diagnose if 'override' keyword was
   /// not used in the declaration of an overriding method.
-  void DiagnoseAbsenceOfOverrideControl(NamedDecl *D);
+  void DiagnoseAbsenceOfOverrideControl(NamedDecl *D, bool Inconsistent);
 
   /// CheckForFunctionMarkedFinal - Checks whether a virtual member function
   /// overrides a virtual member function marked 'final', according to
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 9cad6debc600c..515a2e9690ed1 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -3045,7 +3045,7 @@ void Sema::CheckOverrideControl(NamedDecl *D) {
       << MD->getDeclName();
 }
 
-void Sema::DiagnoseAbsenceOfOverrideControl(NamedDecl *D) {
+void Sema::DiagnoseAbsenceOfOverrideControl(NamedDecl *D, bool Inconsistent) {
   if (D->isInvalidDecl() || D->hasAttr<OverrideAttr>())
     return;
   CXXMethodDecl *MD = dyn_cast<CXXMethodDecl>(D);
@@ -3061,12 +3061,22 @@ void Sema::DiagnoseAbsenceOfOverrideControl(NamedDecl *D) {
       return;
 
   if (MD->size_overridden_methods() > 0) {
-    unsigned DiagID = isa<CXXDestructorDecl>(MD)
-                          ? diag::warn_destructor_marked_not_override_overriding
-                          : diag::warn_function_marked_not_override_overriding;
-    Diag(MD->getLocation(), DiagID) << MD->getDeclName();
-    const CXXMethodDecl *OMD = *MD->begin_overridden_methods();
-    Diag(OMD->getLocation(), diag::note_overridden_virtual_function);
+    auto EmitDiag = [&](unsigned DiagInconsistent, unsigned DiagSuggest) {
+      unsigned DiagID =
+          Inconsistent && !Diags.isIgnored(DiagInconsistent, MD->getLocation())
+              ? DiagInconsistent
+              : DiagSuggest;
+      Diag(MD->getLocation(), DiagID) << MD->getDeclName();
+      const CXXMethodDecl *OMD = *MD->begin_overridden_methods();
+      Diag(OMD->getLocation(), diag::note_overridden_virtual_function);
+    };
+    if (isa<CXXDestructorDecl>(MD))
+      EmitDiag(
+          diag::warn_inconsistent_destructor_marked_not_override_overriding,
+          diag::warn_suggest_destructor_marked_not_override_overriding);
+    else
+      EmitDiag(diag::warn_inconsistent_function_marked_not_override_overriding,
+               diag::warn_suggest_function_marked_not_override_overriding);
   }
 }
 
@@ -6749,13 +6759,10 @@ void Sema::CheckCompletedCXXClass(Scope *S, CXXRecordDecl *Record) {
     }
   }
 
-  if (HasMethodWithOverrideControl &&
-      HasOverridingMethodWithoutOverrideControl) {
-    // At least one method has the 'override' control declared.
-    // Diagnose all other overridden methods which do not have 'override'
-    // specified on them.
+  if (HasOverridingMethodWithoutOverrideControl) {
+    bool HasInconsistentOverrideControl = HasMethodWithOverrideControl;
     for (auto *M : Record->methods())
-      DiagnoseAbsenceOfOverrideControl(M);
+      DiagnoseAbsenceOfOverrideControl(M, HasInconsistentOverrideControl);
   }
 
   // Check the defaulted secondary comparisons after any other member functions.
diff --git a/clang/test/SemaCXX/warn-suggest-destructor-override b/clang/test/SemaCXX/warn-suggest-destructor-override
new file mode 100644
index 0000000000000..1cfff748678f2
--- /dev/null
+++ b/clang/test/SemaCXX/warn-suggest-destructor-override
@@ -0,0 +1,27 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s -verify -Wsuggest-destructor-override
+
+struct A {
+  ~A();
+  virtual void run();
+};
+
+struct B : public A {
+  ~B();
+};
+
+struct C {
+  virtual void run();
+  virtual ~C();  // expected-note 2{{overridden virtual function is here}}
+};
+
+struct D : public C {
+  void run();
+  ~D();
+  // expected-warning@-1 {{'~D' overrides a destructor but is not marked 'override'}}
+};
+
+struct E : public C {
+  void run();
+  virtual ~E();
+  // expected-warning@-1 {{'~E' overrides a destructor but is not marked 'override'}}
+};
diff --git a/clang/test/SemaCXX/warn-suggest-override b/clang/test/SemaCXX/warn-suggest-override
new file mode 100644
index 0000000000000..e06c939ff001f
--- /dev/null
+++ b/clang/test/SemaCXX/warn-suggest-override
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -fsyntax-only -std=c++11 %s -verify -Wsuggest-override
+
+struct A {
+  ~A();
+  void run();
+};
+
+struct B : public A {
+  ~B();
+  void run();
+};
+
+struct C {
+  virtual void run(); // expected-note 2{{overridden virtual function is here}}
+  virtual ~C();
+};
+
+struct D : public C {
+  void run();
+  // expected-warning@-1 {{'run()' overrides a member function but is not marked 'override'}}
+  ~D();
+};
+
+struct E : public C {
+  virtual void run();
+  // expected-warning@-1 {{'run()' overrides a member function but is not marked 'override'}}
+  virtual ~E();
+};
+
+struct F : public C {
+  void run() override;
+  ~F() override;
+};
+
+struct G : public C {
+  void run() final;
+  ~G() final;
+};

From b4dbb37f32e554e4d6f118d9ddd87717721ea664 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Sun, 12 Jul 2020 16:58:27 -0700
Subject: [PATCH 061/771] [X86] Rename
 X86_CPU_TYPE_COMPAT_ALIAS/X86_CPU_TYPE_COMPAT/X86_CPU_SUBTYPE_COMPAT macros.
 NFC

Remove _COMPAT. Drop the ARCHNAME. Remove the non-COMPAT versions
that are no longer needed.

We now only use these macros in places where we need compatibility
with libgcc/compiler-rt. So we don't need to call out _COMPAT
specifically.
---
 clang/lib/Basic/Targets/X86.cpp               |   6 +-
 clang/lib/CodeGen/CGBuiltin.cpp               |   6 +-
 llvm/include/llvm/Support/X86TargetParser.def | 110 ++++++++----------
 llvm/include/llvm/Support/X86TargetParser.h   |   4 +-
 4 files changed, 58 insertions(+), 68 deletions(-)

diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index e280a72166451..543f232d24591 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -1062,9 +1062,9 @@ void X86TargetInfo::getCPUSpecificCPUDispatchFeatures(
 bool X86TargetInfo::validateCpuIs(StringRef FeatureStr) const {
   return llvm::StringSwitch<bool>(FeatureStr)
 #define X86_VENDOR(ENUM, STRING) .Case(STRING, true)
-#define X86_CPU_TYPE_COMPAT_ALIAS(ENUM, ALIAS) .Case(ALIAS, true)
-#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true)
-#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) .Case(STR, true)
+#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS) .Case(ALIAS, true)
+#define X86_CPU_TYPE(ENUM, STR) .Case(STR, true)
+#define X86_CPU_SUBTYPE(ENUM, STR) .Case(STR, true)
 #include "llvm/Support/X86TargetParser.def"
       .Default(false);
 }
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 1d81ede5dc31e..35a93a7889f40 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -11655,11 +11655,11 @@ Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
   std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
 #define X86_VENDOR(ENUM, STRING)                                               \
   .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
-#define X86_CPU_TYPE_COMPAT_ALIAS(ENUM, ALIAS)             \
+#define X86_CPU_TYPE_ALIAS(ENUM, ALIAS)                                        \
   .Case(ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
-#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR)                               \
+#define X86_CPU_TYPE(ENUM, STR)                                                \
   .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
-#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR)                            \
+#define X86_CPU_SUBTYPE(ENUM, STR)                                             \
   .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
 #include "llvm/Support/X86TargetParser.def"
                                .Default({0, 0});
diff --git a/llvm/include/llvm/Support/X86TargetParser.def b/llvm/include/llvm/Support/X86TargetParser.def
index 9e9f0985d15ea..697f8c70f962d 100644
--- a/llvm/include/llvm/Support/X86TargetParser.def
+++ b/llvm/include/llvm/Support/X86TargetParser.def
@@ -20,80 +20,70 @@ X86_VENDOR(VENDOR_AMD,   "amd")
 #undef X86_VENDOR
 
 // This macro is used for cpu types present in compiler-rt/libgcc.
-#ifndef X86_CPU_TYPE_COMPAT
-#define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) X86_CPU_TYPE(ARCHNAME, ENUM)
-#endif
-
 #ifndef X86_CPU_TYPE
-#define X86_CPU_TYPE(ARCHNAME, ENUM)
+#define X86_CPU_TYPE(ENUM, STR)
 #endif
 
-#ifndef X86_CPU_TYPE_COMPAT_ALIAS
-#define X86_CPU_TYPE_COMPAT_ALIAS(ENUM, STR)
+#ifndef X86_CPU_TYPE_ALIAS
+#define X86_CPU_TYPE_ALIAS(ENUM, STR)
 #endif
 
-// The first part of this list must match what is implemented in libgcc and
-// compilert-rt. Clang uses this to know how to implement __builtin_cpu_is.
-X86_CPU_TYPE_COMPAT("bonnell",       INTEL_BONNELL,       "bonnell")
-X86_CPU_TYPE_COMPAT("core2",         INTEL_CORE2,         "core2")
-X86_CPU_TYPE_COMPAT("nehalem",       INTEL_COREI7,        "corei7")
-X86_CPU_TYPE_COMPAT("amdfam10",      AMDFAM10H,           "amdfam10h")
-X86_CPU_TYPE_COMPAT("bdver1",        AMDFAM15H,           "amdfam15h")
-X86_CPU_TYPE_COMPAT("silvermont",    INTEL_SILVERMONT,    "silvermont")
-X86_CPU_TYPE_COMPAT("knl",           INTEL_KNL,           "knl")
-X86_CPU_TYPE_COMPAT("btver1",        AMD_BTVER1,          "btver1")
-X86_CPU_TYPE_COMPAT("btver2",        AMD_BTVER2,          "btver2")
-X86_CPU_TYPE_COMPAT("znver1",        AMDFAM17H,           "amdfam17h")
-X86_CPU_TYPE_COMPAT("knm",           INTEL_KNM,           "knm")
-X86_CPU_TYPE_COMPAT("goldmont",      INTEL_GOLDMONT,      "goldmont")
-X86_CPU_TYPE_COMPAT("goldmont-plus", INTEL_GOLDMONT_PLUS, "goldmont-plus")
-X86_CPU_TYPE_COMPAT("tremont",       INTEL_TREMONT,       "tremont")
+// This list must match what is implemented in libgcc and compilert-rt. Clang
+// uses this to know how to implement __builtin_cpu_is.
+X86_CPU_TYPE(INTEL_BONNELL,       "bonnell")
+X86_CPU_TYPE(INTEL_CORE2,         "core2")
+X86_CPU_TYPE(INTEL_COREI7,        "corei7")
+X86_CPU_TYPE(AMDFAM10H,           "amdfam10h")
+X86_CPU_TYPE(AMDFAM15H,           "amdfam15h")
+X86_CPU_TYPE(INTEL_SILVERMONT,    "silvermont")
+X86_CPU_TYPE(INTEL_KNL,           "knl")
+X86_CPU_TYPE(AMD_BTVER1,          "btver1")
+X86_CPU_TYPE(AMD_BTVER2,          "btver2")
+X86_CPU_TYPE(AMDFAM17H,           "amdfam17h")
+X86_CPU_TYPE(INTEL_KNM,           "knm")
+X86_CPU_TYPE(INTEL_GOLDMONT,      "goldmont")
+X86_CPU_TYPE(INTEL_GOLDMONT_PLUS, "goldmont-plus")
+X86_CPU_TYPE(INTEL_TREMONT,       "tremont")
 
 // Alternate names supported by __builtin_cpu_is and target multiversioning.
-X86_CPU_TYPE_COMPAT_ALIAS(INTEL_BONNELL,    "atom")
-X86_CPU_TYPE_COMPAT_ALIAS(AMDFAM10H,        "amdfam10")
-X86_CPU_TYPE_COMPAT_ALIAS(AMDFAM15H,        "amdfam15")
-X86_CPU_TYPE_COMPAT_ALIAS(INTEL_SILVERMONT, "slm")
+X86_CPU_TYPE_ALIAS(INTEL_BONNELL,    "atom")
+X86_CPU_TYPE_ALIAS(AMDFAM10H,        "amdfam10")
+X86_CPU_TYPE_ALIAS(AMDFAM15H,        "amdfam15")
+X86_CPU_TYPE_ALIAS(INTEL_SILVERMONT, "slm")
 
-#undef X86_CPU_TYPE_COMPAT_ALIAS
-#undef X86_CPU_TYPE_COMPAT
+#undef X86_CPU_TYPE_ALIAS
 #undef X86_CPU_TYPE
 
 // This macro is used for cpu subtypes present in compiler-rt/libgcc.
-#ifndef X86_CPU_SUBTYPE_COMPAT
-#define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) X86_CPU_SUBTYPE(ARCHNAME, ENUM)
-#endif
-
 #ifndef X86_CPU_SUBTYPE
-#define X86_CPU_SUBTYPE(ARCHNAME, ENUM)
+#define X86_CPU_SUBTYPE(ENUM, STR)
 #endif
 
-// The first part of this list must match what is implemented in libgcc and
-// compilert-rt. Clang uses this to know how to implement __builtin_cpu_is.
-X86_CPU_SUBTYPE_COMPAT("nehalem",        INTEL_COREI7_NEHALEM,        "nehalem")
-X86_CPU_SUBTYPE_COMPAT("westmere",       INTEL_COREI7_WESTMERE,       "westmere")
-X86_CPU_SUBTYPE_COMPAT("sandybridge",    INTEL_COREI7_SANDYBRIDGE,    "sandybridge")
-X86_CPU_SUBTYPE_COMPAT("amdfam10",       AMDFAM10H_BARCELONA,         "barcelona")
-X86_CPU_SUBTYPE_COMPAT("amdfam10",       AMDFAM10H_SHANGHAI,          "shanghai")
-X86_CPU_SUBTYPE_COMPAT("amdfam10",       AMDFAM10H_ISTANBUL,          "istanbul")
-X86_CPU_SUBTYPE_COMPAT("bdver1",         AMDFAM15H_BDVER1,            "bdver1")
-X86_CPU_SUBTYPE_COMPAT("bdver2",         AMDFAM15H_BDVER2,            "bdver2")
-X86_CPU_SUBTYPE_COMPAT("bdver3",         AMDFAM15H_BDVER3,            "bdver3")
-X86_CPU_SUBTYPE_COMPAT("bdver4",         AMDFAM15H_BDVER4,            "bdver4")
-X86_CPU_SUBTYPE_COMPAT("znver1",         AMDFAM17H_ZNVER1,            "znver1")
-X86_CPU_SUBTYPE_COMPAT("ivybridge",      INTEL_COREI7_IVYBRIDGE,      "ivybridge")
-X86_CPU_SUBTYPE_COMPAT("haswell",        INTEL_COREI7_HASWELL,        "haswell")
-X86_CPU_SUBTYPE_COMPAT("broadwell",      INTEL_COREI7_BROADWELL,      "broadwell")
-X86_CPU_SUBTYPE_COMPAT("skylake",        INTEL_COREI7_SKYLAKE,        "skylake")
-X86_CPU_SUBTYPE_COMPAT("skylake-avx512", INTEL_COREI7_SKYLAKE_AVX512, "skylake-avx512")
-X86_CPU_SUBTYPE_COMPAT("cannonlake",     INTEL_COREI7_CANNONLAKE,     "cannonlake")
-X86_CPU_SUBTYPE_COMPAT("icelake-client", INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
-X86_CPU_SUBTYPE_COMPAT("icelake-server", INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
-X86_CPU_SUBTYPE_COMPAT("znver2",         AMDFAM17H_ZNVER2,            "znver2")
-X86_CPU_SUBTYPE_COMPAT("cascadelake",    INTEL_COREI7_CASCADELAKE,    "cascadelake")
-X86_CPU_SUBTYPE_COMPAT("tigerlake",      INTEL_COREI7_TIGERLAKE,      "tigerlake")
-X86_CPU_SUBTYPE_COMPAT("cooperlake",     INTEL_COREI7_COOPERLAKE,     "cooperlake")
-#undef X86_CPU_SUBTYPE_COMPAT
+// This list must match what is implemented in libgcc and compilert-rt. Clang
+// uses this to know how to implement __builtin_cpu_is.
+X86_CPU_SUBTYPE(INTEL_COREI7_NEHALEM,        "nehalem")
+X86_CPU_SUBTYPE(INTEL_COREI7_WESTMERE,       "westmere")
+X86_CPU_SUBTYPE(INTEL_COREI7_SANDYBRIDGE,    "sandybridge")
+X86_CPU_SUBTYPE(AMDFAM10H_BARCELONA,         "barcelona")
+X86_CPU_SUBTYPE(AMDFAM10H_SHANGHAI,          "shanghai")
+X86_CPU_SUBTYPE(AMDFAM10H_ISTANBUL,          "istanbul")
+X86_CPU_SUBTYPE(AMDFAM15H_BDVER1,            "bdver1")
+X86_CPU_SUBTYPE(AMDFAM15H_BDVER2,            "bdver2")
+X86_CPU_SUBTYPE(AMDFAM15H_BDVER3,            "bdver3")
+X86_CPU_SUBTYPE(AMDFAM15H_BDVER4,            "bdver4")
+X86_CPU_SUBTYPE(AMDFAM17H_ZNVER1,            "znver1")
+X86_CPU_SUBTYPE(INTEL_COREI7_IVYBRIDGE,      "ivybridge")
+X86_CPU_SUBTYPE(INTEL_COREI7_HASWELL,        "haswell")
+X86_CPU_SUBTYPE(INTEL_COREI7_BROADWELL,      "broadwell")
+X86_CPU_SUBTYPE(INTEL_COREI7_SKYLAKE,        "skylake")
+X86_CPU_SUBTYPE(INTEL_COREI7_SKYLAKE_AVX512, "skylake-avx512")
+X86_CPU_SUBTYPE(INTEL_COREI7_CANNONLAKE,     "cannonlake")
+X86_CPU_SUBTYPE(INTEL_COREI7_ICELAKE_CLIENT, "icelake-client")
+X86_CPU_SUBTYPE(INTEL_COREI7_ICELAKE_SERVER, "icelake-server")
+X86_CPU_SUBTYPE(AMDFAM17H_ZNVER2,            "znver2")
+X86_CPU_SUBTYPE(INTEL_COREI7_CASCADELAKE,    "cascadelake")
+X86_CPU_SUBTYPE(INTEL_COREI7_TIGERLAKE,      "tigerlake")
+X86_CPU_SUBTYPE(INTEL_COREI7_COOPERLAKE,     "cooperlake")
 #undef X86_CPU_SUBTYPE
 
 
diff --git a/llvm/include/llvm/Support/X86TargetParser.h b/llvm/include/llvm/Support/X86TargetParser.h
index 4a4fb8ccc4ccf..66c474b5c2750 100644
--- a/llvm/include/llvm/Support/X86TargetParser.h
+++ b/llvm/include/llvm/Support/X86TargetParser.h
@@ -34,7 +34,7 @@ enum ProcessorVendors : unsigned {
 // as a proxy for what's in libgcc/compiler-rt.
 enum ProcessorTypes : unsigned {
   CPU_TYPE_DUMMY,
-#define X86_CPU_TYPE(ARCHNAME, ENUM) \
+#define X86_CPU_TYPE(ENUM, STRING) \
   ENUM,
 #include "llvm/Support/X86TargetParser.def"
   CPU_TYPE_MAX
@@ -44,7 +44,7 @@ enum ProcessorTypes : unsigned {
 // as a proxy for what's in libgcc/compiler-rt.
 enum ProcessorSubtypes : unsigned {
   CPU_SUBTYPE_DUMMY,
-#define X86_CPU_SUBTYPE(ARCHNAME, ENUM) \
+#define X86_CPU_SUBTYPE(ENUM, STRING) \
   ENUM,
 #include "llvm/Support/X86TargetParser.def"
   CPU_SUBTYPE_MAX

From 49e5f603d40083dce9c05796e3cde3a185c3beba Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Mon, 23 Mar 2020 15:07:51 -0700
Subject: [PATCH 062/771] Rename/refactor isIntegerConstantExpression to
 getIntegerConstantExpression

There is a version that just tests (also called
isIntegerConstantExpression) & whereas this version is specifically used
when the value is of interest (a few call sites were actually refactored
to calling the test-only version) so let's make the API look more like
it.

Reviewers: aaron.ballman

Differential Revision: https://reviews.llvm.org/D76646
---
 clang/include/clang/AST/Expr.h    |  13 ++--
 clang/lib/AST/ASTContext.cpp      |  16 ++---
 clang/lib/AST/ExprConstant.cpp    |  19 +++--
 clang/lib/AST/MicrosoftMangle.cpp |   6 +-
 clang/lib/CodeGen/CGBuiltin.cpp   | 114 +++++++++++++-----------------
 clang/lib/CodeGen/CGExpr.cpp      |  16 +++--
 clang/lib/Sema/SemaAttr.cpp       |  12 ++--
 clang/lib/Sema/SemaChecking.cpp   | 107 +++++++++++++---------------
 clang/lib/Sema/SemaDecl.cpp       |   8 +--
 clang/lib/Sema/SemaDeclAttr.cpp   |  77 ++++++++++----------
 clang/lib/Sema/SemaExprCXX.cpp    |  14 ++--
 clang/lib/Sema/SemaOpenMP.cpp     |  66 +++++++++--------
 clang/lib/Sema/SemaOverload.cpp   |  30 ++++----
 clang/lib/Sema/SemaStmtAttr.cpp   |   6 +-
 clang/lib/Sema/SemaType.cpp       |  26 +++----
 15 files changed, 255 insertions(+), 275 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 66eafaaab715e..a42c7bb5a9f26 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -510,16 +510,15 @@ class Expr : public ValueStmt {
   /// semantically correspond to a bool.
   bool isKnownToHaveBooleanValue(bool Semantic = true) const;
 
-  /// isIntegerConstantExpr - Return true if this expression is a valid integer
-  /// constant expression, and, if so, return its value in Result.  If not a
-  /// valid i-c-e, return false and fill in Loc (if specified) with the location
-  /// of the invalid expression.
+  /// isIntegerConstantExpr - Return the value if this expression is a valid
+  /// integer constant expression.  If not a valid i-c-e, return None and fill
+  /// in Loc (if specified) with the location of the invalid expression.
   ///
   /// Note: This does not perform the implicit conversions required by C++11
   /// [expr.const]p5.
-  bool isIntegerConstantExpr(llvm::APSInt &Result, const ASTContext &Ctx,
-                             SourceLocation *Loc = nullptr,
-                             bool isEvaluated = true) const;
+  Optional<llvm::APSInt> getIntegerConstantExpr(const ASTContext &Ctx,
+                                                SourceLocation *Loc = nullptr,
+                                                bool isEvaluated = true) const;
   bool isIntegerConstantExpr(const ASTContext &Ctx,
                              SourceLocation *Loc = nullptr) const;
 
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 2ba643f12a82f..807028885652d 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -9471,17 +9471,15 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
           const ConstantArrayType* CAT)
           -> std::pair<bool,llvm::APInt> {
         if (VAT) {
-          llvm::APSInt TheInt;
+          Optional<llvm::APSInt> TheInt;
           Expr *E = VAT->getSizeExpr();
-          if (E && E->isIntegerConstantExpr(TheInt, *this))
-            return std::make_pair(true, TheInt);
-          else
-            return std::make_pair(false, TheInt);
-        } else if (CAT) {
-            return std::make_pair(true, CAT->getSize());
-        } else {
-            return std::make_pair(false, llvm::APInt());
+          if (E && (TheInt = E->getIntegerConstantExpr(*this)))
+            return std::make_pair(true, *TheInt);
+          return std::make_pair(false, llvm::APSInt());
         }
+        if (CAT)
+          return std::make_pair(true, CAT->getSize());
+        return std::make_pair(false, llvm::APInt());
       };
 
       bool HaveLSize, HaveRSize;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a4dc0ccad1e0f..011dc890496d0 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14883,16 +14883,22 @@ bool Expr::isIntegerConstantExpr(const ASTContext &Ctx,
   return true;
 }
 
-bool Expr::isIntegerConstantExpr(llvm::APSInt &Value, const ASTContext &Ctx,
-                                 SourceLocation *Loc, bool isEvaluated) const {
+Optional<llvm::APSInt> Expr::getIntegerConstantExpr(const ASTContext &Ctx,
+                                                    SourceLocation *Loc,
+                                                    bool isEvaluated) const {
   assert(!isValueDependent() &&
          "Expression evaluator can't be called on a dependent expression.");
 
-  if (Ctx.getLangOpts().CPlusPlus11)
-    return EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, &Value, Loc);
+  APSInt Value;
+
+  if (Ctx.getLangOpts().CPlusPlus11) {
+    if (EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, &Value, Loc))
+      return Value;
+    return None;
+  }
 
   if (!isIntegerConstantExpr(Ctx, Loc))
-    return false;
+    return None;
 
   // The only possible side-effects here are due to UB discovered in the
   // evaluation (for instance, INT_MAX + 1). In such a case, we are still
@@ -14906,8 +14912,7 @@ bool Expr::isIntegerConstantExpr(llvm::APSInt &Value, const ASTContext &Ctx,
   if (!::EvaluateAsInt(this, ExprResult, Ctx, SE_AllowSideEffects, Info))
     llvm_unreachable("ICE cannot be evaluated!");
 
-  Value = ExprResult.Val.getInt();
-  return true;
+  return ExprResult.Val.getInt();
 }
 
 bool Expr::isCXX98IntegralConstantExpr(const ASTContext &Ctx) const {
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 529f301e46964..09579c28061ae 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1372,9 +1372,9 @@ void MicrosoftCXXNameMangler::mangleIntegerLiteral(const llvm::APSInt &Value,
 
 void MicrosoftCXXNameMangler::mangleExpression(const Expr *E) {
   // See if this is a constant expression.
-  llvm::APSInt Value;
-  if (E->isIntegerConstantExpr(Value, Context.getASTContext())) {
-    mangleIntegerLiteral(Value, E->getType()->isBooleanType());
+  if (Optional<llvm::APSInt> Value =
+          E->getIntegerConstantExpr(Context.getASTContext())) {
+    mangleIntegerLiteral(*Value, E->getType()->isBooleanType());
     return;
   }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 35a93a7889f40..3588e33714d2f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4419,11 +4419,9 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       } else {
         // If this is required to be a constant, constant fold it so that we
         // know that the generated intrinsic gets a ConstantInt.
-        llvm::APSInt Result;
-        bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
-        assert(IsConst && "Constant arg isn't actually constant?");
-        (void)IsConst;
-        ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
+        ArgValue = llvm::ConstantInt::get(
+            getLLVMContext(),
+            *E->getArg(i)->getIntegerConstantExpr(getContext()));
       }
 
       // If the intrinsic arg type is different from the builtin arg type
@@ -5596,13 +5594,14 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
     llvm::Triple::ArchType Arch) {
   // Get the last argument, which specifies the vector type.
-  llvm::APSInt NeonTypeConst;
   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
-  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
+  Optional<llvm::APSInt> NeonTypeConst =
+      Arg->getIntegerConstantExpr(getContext());
+  if (!NeonTypeConst)
     return nullptr;
 
   // Determine the type of this overloaded NEON intrinsic.
-  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
+  NeonTypeFlags Type(NeonTypeConst->getZExtValue());
   bool Usgn = Type.isUnsigned();
   bool Quad = Type.isQuad();
   const bool HasLegalHalfType = getTarget().hasLegalHalfType();
@@ -6885,10 +6884,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     } else {
       // If this is required to be a constant, constant fold it so that we know
       // that the generated intrinsic gets a ConstantInt.
-      llvm::APSInt Result;
-      bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
-      assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
-      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
+      Ops.push_back(llvm::ConstantInt::get(
+          getLLVMContext(),
+          *E->getArg(i)->getIntegerConstantExpr(getContext())));
     }
   }
 
@@ -7099,9 +7097,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
 
   // Get the last argument, which specifies the vector type.
   assert(HasExtraArg);
-  llvm::APSInt Result;
   const Expr *Arg = E->getArg(E->getNumArgs()-1);
-  if (!Arg->isIntegerConstantExpr(Result, getContext()))
+  Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext());
+  if (!Result)
     return nullptr;
 
   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
@@ -7114,7 +7112,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       Ty = DoubleTy;
 
     // Determine whether this is an unsigned conversion or not.
-    bool usgn = Result.getZExtValue() == 1;
+    bool usgn = Result->getZExtValue() == 1;
     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
 
     // Call the appropriate intrinsic.
@@ -7123,7 +7121,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   }
 
   // Determine the type of this overloaded NEON intrinsic.
-  NeonTypeFlags Type(Result.getZExtValue());
+  NeonTypeFlags Type = Result->getZExtValue();
   bool usgn = Type.isUnsigned();
   bool rightShift = false;
 
@@ -7267,11 +7265,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
 
 template<typename Integer>
 static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
-  llvm::APSInt IntVal;
-  bool IsConst = E->isIntegerConstantExpr(IntVal, Context);
-  assert(IsConst && "Sema should have checked this was a constant");
-  (void)IsConst;
-  return IntVal.getExtValue();
+  return E->getIntegerConstantExpr(Context)->getExtValue();
 }
 
 static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
@@ -7544,13 +7538,13 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
   assert(E->getNumArgs() >= 3);
 
   // Get the last argument, which specifies the vector type.
-  llvm::APSInt Result;
   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
-  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
+  Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(CGF.getContext());
+  if (!Result)
     return nullptr;
 
   // Determine the type of this overloaded NEON intrinsic.
-  NeonTypeFlags Type(Result.getZExtValue());
+  NeonTypeFlags Type = Result->getZExtValue();
   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
   if (!Ty)
     return nullptr;
@@ -8936,11 +8930,9 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     } else {
       // If this is required to be a constant, constant fold it so that we know
       // that the generated intrinsic gets a ConstantInt.
-      llvm::APSInt Result;
-      bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
-      assert(IsConst && "Constant arg isn't actually constant?");
-      (void)IsConst;
-      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
+      Ops.push_back(llvm::ConstantInt::get(
+          getLLVMContext(),
+          *E->getArg(i)->getIntegerConstantExpr(getContext())));
     }
   }
 
@@ -8955,12 +8947,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Result;
   }
 
-  llvm::APSInt Result;
   const Expr *Arg = E->getArg(E->getNumArgs()-1);
   NeonTypeFlags Type(0);
-  if (Arg->isIntegerConstantExpr(Result, getContext()))
+  if (Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext()))
     // Determine the type of this overloaded NEON intrinsic.
-    Type = NeonTypeFlags(Result.getZExtValue());
+    Type = NeonTypeFlags(Result->getZExtValue());
 
   bool usgn = Type.isUnsigned();
   bool quad = Type.isQuad();
@@ -11791,10 +11782,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
 
     // If this is required to be a constant, constant fold it so that we know
     // that the generated intrinsic gets a ConstantInt.
-    llvm::APSInt Result;
-    bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
-    assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
-    Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
+    Ops.push_back(llvm::ConstantInt::get(
+        getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
   }
 
   // These exist so that the builtin that takes an immediate can be bounds
@@ -15073,11 +15062,8 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *X = EmitScalarExpr(E->getArg(0));
     // Constant-fold the M4 and M5 mask arguments.
-    llvm::APSInt M4, M5;
-    bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
-    bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
-    assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
-    (void)IsConstM4; (void)IsConstM5;
+    llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
+    llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
     // Check whether this instance can be represented via a LLVM standard
     // intrinsic.  We only support some combinations of M4 and M5.
     Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -15132,10 +15118,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     Value *X = EmitScalarExpr(E->getArg(0));
     Value *Y = EmitScalarExpr(E->getArg(1));
     // Constant-fold the M4 mask argument.
-    llvm::APSInt M4;
-    bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
-    assert(IsConstM4 && "Constant arg isn't actually constant?");
-    (void)IsConstM4;
+    llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
     // Check whether this instance can be represented via a LLVM standard
     // intrinsic.  We only support some values of M4.
     Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -15169,10 +15152,7 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     Value *X = EmitScalarExpr(E->getArg(0));
     Value *Y = EmitScalarExpr(E->getArg(1));
     // Constant-fold the M4 mask argument.
-    llvm::APSInt M4;
-    bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
-    assert(IsConstM4 && "Constant arg isn't actually constant?");
-    (void)IsConstM4;
+    llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
     // Check whether this instance can be represented via a LLVM standard
     // intrinsic.  We only support some values of M4.
     Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -15839,10 +15819,11 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
     Address Dst = EmitPointerWithAlignment(E->getArg(0));
     Value *Src = EmitScalarExpr(E->getArg(1));
     Value *Ldm = EmitScalarExpr(E->getArg(2));
-    llvm::APSInt isColMajorArg;
-    if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
+    Optional<llvm::APSInt> isColMajorArg =
+        E->getArg(3)->getIntegerConstantExpr(getContext());
+    if (!isColMajorArg)
       return nullptr;
-    bool isColMajor = isColMajorArg.getSExtValue();
+    bool isColMajor = isColMajorArg->getSExtValue();
     NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
     unsigned IID = isColMajor ? II.IID_col : II.IID_row;
     if (IID == 0)
@@ -15883,10 +15864,11 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
     Value *Dst = EmitScalarExpr(E->getArg(0));
     Address Src = EmitPointerWithAlignment(E->getArg(1));
     Value *Ldm = EmitScalarExpr(E->getArg(2));
-    llvm::APSInt isColMajorArg;
-    if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
+    Optional<llvm::APSInt> isColMajorArg =
+        E->getArg(3)->getIntegerConstantExpr(getContext());
+    if (!isColMajorArg)
       return nullptr;
-    bool isColMajor = isColMajorArg.getSExtValue();
+    bool isColMajor = isColMajorArg->getSExtValue();
     NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
     unsigned IID = isColMajor ? II.IID_col : II.IID_row;
     if (IID == 0)
@@ -15933,16 +15915,20 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
     Address SrcA = EmitPointerWithAlignment(E->getArg(1));
     Address SrcB = EmitPointerWithAlignment(E->getArg(2));
     Address SrcC = EmitPointerWithAlignment(E->getArg(3));
-    llvm::APSInt LayoutArg;
-    if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
+    Optional<llvm::APSInt> LayoutArg =
+        E->getArg(4)->getIntegerConstantExpr(getContext());
+    if (!LayoutArg)
       return nullptr;
-    int Layout = LayoutArg.getSExtValue();
+    int Layout = LayoutArg->getSExtValue();
     if (Layout < 0 || Layout > 3)
       return nullptr;
     llvm::APSInt SatfArg;
     if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1)
       SatfArg = 0;  // .b1 does not have satf argument.
-    else if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
+    else if (Optional<llvm::APSInt> OptSatfArg =
+                 E->getArg(5)->getIntegerConstantExpr(getContext()))
+      SatfArg = *OptSatfArg;
+    else
       return nullptr;
     bool Satf = SatfArg.getSExtValue();
     NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
@@ -16271,9 +16257,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
   case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
   case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
   case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
-    llvm::APSInt LaneConst;
-    if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
-      llvm_unreachable("Constant arg isn't actually constant?");
+    llvm::APSInt LaneConst =
+        *E->getArg(1)->getIntegerConstantExpr(getContext());
     Value *Vec = EmitScalarExpr(E->getArg(0));
     Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
     Value *Extract = Builder.CreateExtractElement(Vec, Lane);
@@ -16299,9 +16284,8 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
   case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
   case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
   case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
-    llvm::APSInt LaneConst;
-    if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
-      llvm_unreachable("Constant arg isn't actually constant?");
+    llvm::APSInt LaneConst =
+        *E->getArg(1)->getIntegerConstantExpr(getContext());
     Value *Vec = EmitScalarExpr(E->getArg(0));
     Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
     Value *Val = EmitScalarExpr(E->getArg(2));
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index 9e8770573d701..ab29e32929ceb 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3868,15 +3868,17 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
     llvm::APSInt ConstLength;
     if (Length) {
       // Idx = LowerBound + Length - 1;
-      if (Length->isIntegerConstantExpr(ConstLength, C)) {
-        ConstLength = ConstLength.zextOrTrunc(PointerWidthInBits);
+      if (Optional<llvm::APSInt> CL = Length->getIntegerConstantExpr(C)) {
+        ConstLength = CL->zextOrTrunc(PointerWidthInBits);
         Length = nullptr;
       }
       auto *LowerBound = E->getLowerBound();
       llvm::APSInt ConstLowerBound(PointerWidthInBits, /*isUnsigned=*/false);
-      if (LowerBound && LowerBound->isIntegerConstantExpr(ConstLowerBound, C)) {
-        ConstLowerBound = ConstLowerBound.zextOrTrunc(PointerWidthInBits);
-        LowerBound = nullptr;
+      if (LowerBound) {
+        if (Optional<llvm::APSInt> LB = LowerBound->getIntegerConstantExpr(C)) {
+          ConstLowerBound = LB->zextOrTrunc(PointerWidthInBits);
+          LowerBound = nullptr;
+        }
       }
       if (!Length)
         --ConstLength;
@@ -3913,8 +3915,10 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
                              : BaseTy;
       if (auto *VAT = C.getAsVariableArrayType(ArrayTy)) {
         Length = VAT->getSizeExpr();
-        if (Length->isIntegerConstantExpr(ConstLength, C))
+        if (Optional<llvm::APSInt> L = Length->getIntegerConstantExpr(C)) {
+          ConstLength = *L;
           Length = nullptr;
+        }
       } else {
         auto *CAT = C.getAsConstantArrayType(ArrayTy);
         ConstLength = CAT->getSize();
diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp
index b354e810974c4..f9785e4bea5e2 100644
--- a/clang/lib/Sema/SemaAttr.cpp
+++ b/clang/lib/Sema/SemaAttr.cpp
@@ -300,20 +300,18 @@ void Sema::ActOnPragmaPack(SourceLocation PragmaLoc, PragmaMsStackAction Action,
   // If specified then alignment must be a "small" power of two.
   unsigned AlignmentVal = 0;
   if (Alignment) {
-    llvm::APSInt Val;
+    Optional<llvm::APSInt> Val;
 
     // pack(0) is like pack(), which just works out since that is what
     // we use 0 for in PackAttr.
-    if (Alignment->isTypeDependent() ||
-        Alignment->isValueDependent() ||
-        !Alignment->isIntegerConstantExpr(Val, Context) ||
-        !(Val == 0 || Val.isPowerOf2()) ||
-        Val.getZExtValue() > 16) {
+    if (Alignment->isTypeDependent() || Alignment->isValueDependent() ||
+        !(Val = Alignment->getIntegerConstantExpr(Context)) ||
+        !(*Val == 0 || Val->isPowerOf2()) || Val->getZExtValue() > 16) {
       Diag(PragmaLoc, diag::warn_pragma_pack_invalid_alignment);
       return; // Ignore
     }
 
-    AlignmentVal = (unsigned) Val.getZExtValue();
+    AlignmentVal = (unsigned)Val->getZExtValue();
   }
   if (Action == Sema::PSK_Show) {
     // Show the current alignment, making sure to show the right value
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index efaf36a693061..c501c706a97bc 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2284,10 +2284,7 @@ bool Sema::CheckARMCoprocessorImmediate(const TargetInfo &TI,
   if (CoprocArg->isTypeDependent() || CoprocArg->isValueDependent())
     return false;
 
-  llvm::APSInt CoprocNoAP;
-  bool IsICE = CoprocArg->isIntegerConstantExpr(CoprocNoAP, Context);
-  (void)IsICE;
-  assert(IsICE && "Coprocossor immediate is not a constant expression");
+  llvm::APSInt CoprocNoAP = *CoprocArg->getIntegerConstantExpr(Context);
   int64_t CoprocNo = CoprocNoAP.getExtValue();
   assert(CoprocNo >= 0 && "Coprocessor immediate must be non-negative");
 
@@ -2599,8 +2596,7 @@ bool Sema::CheckBPFBuiltinFunctionCall(unsigned BuiltinID,
 
   // The second argument needs to be a constant int
   Arg = TheCall->getArg(1);
-  llvm::APSInt Value;
-  if (!Arg->isIntegerConstantExpr(Value, Context)) {
+  if (!Arg->isIntegerConstantExpr(Context)) {
     Diag(Arg->getBeginLoc(), diag::err_preserve_field_info_not_const)
         << 2 << Arg->getSourceRange();
     return true;
@@ -3198,11 +3194,10 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
                                            CallExpr *TheCall) {
   if (BuiltinID == SystemZ::BI__builtin_tabort) {
     Expr *Arg = TheCall->getArg(0);
-    llvm::APSInt AbortCode(32);
-    if (Arg->isIntegerConstantExpr(AbortCode, Context) &&
-        AbortCode.getSExtValue() >= 0 && AbortCode.getSExtValue() < 256)
-      return Diag(Arg->getBeginLoc(), diag::err_systemz_invalid_tabort_code)
-             << Arg->getSourceRange();
+    if (Optional<llvm::APSInt> AbortCode = Arg->getIntegerConstantExpr(Context))
+      if (AbortCode->getSExtValue() >= 0 && AbortCode->getSExtValue() < 256)
+        return Diag(Arg->getBeginLoc(), diag::err_systemz_invalid_tabort_code)
+               << Arg->getSourceRange();
   }
 
   // For intrinsics which take an immediate value as part of the instruction,
@@ -4923,21 +4918,21 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
   }
 
   if (SubExprs.size() >= 2 && Form != Init) {
-    llvm::APSInt Result(32);
-    if (SubExprs[1]->isIntegerConstantExpr(Result, Context) &&
-        !isValidOrderingForOp(Result.getSExtValue(), Op))
-      Diag(SubExprs[1]->getBeginLoc(),
-           diag::warn_atomic_op_has_invalid_memory_order)
-          << SubExprs[1]->getSourceRange();
+    if (Optional<llvm::APSInt> Result =
+            SubExprs[1]->getIntegerConstantExpr(Context))
+      if (!isValidOrderingForOp(Result->getSExtValue(), Op))
+        Diag(SubExprs[1]->getBeginLoc(),
+             diag::warn_atomic_op_has_invalid_memory_order)
+            << SubExprs[1]->getSourceRange();
   }
 
   if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) {
     auto *Scope = Args[Args.size() - 1];
-    llvm::APSInt Result(32);
-    if (Scope->isIntegerConstantExpr(Result, Context) &&
-        !ScopeModel->isValid(Result.getZExtValue())) {
-      Diag(Scope->getBeginLoc(), diag::err_atomic_op_has_invalid_synch_scope)
-          << Scope->getSourceRange();
+    if (Optional<llvm::APSInt> Result =
+            Scope->getIntegerConstantExpr(Context)) {
+      if (!ScopeModel->isValid(Result->getZExtValue()))
+        Diag(Scope->getBeginLoc(), diag::err_atomic_op_has_invalid_synch_scope)
+            << Scope->getSourceRange();
     }
     SubExprs.push_back(Scope);
   }
@@ -5805,8 +5800,7 @@ bool Sema::SemaBuiltinVSX(CallExpr *TheCall) {
            << TheCall->getSourceRange();
 
   // Check the third argument is a compile time constant
-  llvm::APSInt Value;
-  if(!TheCall->getArg(2)->isIntegerConstantExpr(Value, Context))
+  if (!TheCall->getArg(2)->isIntegerConstantExpr(Context))
     return Diag(TheCall->getBeginLoc(),
                 diag::err_vsx_builtin_nonconstant_argument)
            << 3 /* argument index */ << TheCall->getDirectCallee()
@@ -5901,17 +5895,18 @@ ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
         TheCall->getArg(i)->isValueDependent())
       continue;
 
-    llvm::APSInt Result(32);
-    if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
+    Optional<llvm::APSInt> Result;
+    if (!(Result = TheCall->getArg(i)->getIntegerConstantExpr(Context)))
       return ExprError(Diag(TheCall->getBeginLoc(),
                             diag::err_shufflevector_nonconstant_argument)
                        << TheCall->getArg(i)->getSourceRange());
 
     // Allow -1 which will be translated to undef in the IR.
-    if (Result.isSigned() && Result.isAllOnesValue())
+    if (Result->isSigned() && Result->isAllOnesValue())
       continue;
 
-    if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
+    if (Result->getActiveBits() > 64 ||
+        Result->getZExtValue() >= numElements * 2)
       return ExprError(Diag(TheCall->getBeginLoc(),
                             diag::err_shufflevector_argument_too_large)
                        << TheCall->getArg(i)->getSourceRange());
@@ -6158,10 +6153,11 @@ bool Sema::SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum,
 
   if (Arg->isTypeDependent() || Arg->isValueDependent()) return false;
 
-  if (!Arg->isIntegerConstantExpr(Result, Context))
+  Optional<llvm::APSInt> R;
+  if (!(R = Arg->getIntegerConstantExpr(Context)))
     return Diag(TheCall->getBeginLoc(), diag::err_constant_integer_arg_type)
            << FDecl->getDeclName() << Arg->getSourceRange();
-
+  Result = *R;
   return false;
 }
 
@@ -10321,14 +10317,15 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
 
       // If the shift amount is a positive constant, drop the width by
       // that much.
-      llvm::APSInt shift;
-      if (BO->getRHS()->isIntegerConstantExpr(shift, C) &&
-          shift.isNonNegative()) {
-        unsigned zext = shift.getZExtValue();
-        if (zext >= L.Width)
-          L.Width = (L.NonNegative ? 0 : 1);
-        else
-          L.Width -= zext;
+      if (Optional<llvm::APSInt> shift =
+              BO->getRHS()->getIntegerConstantExpr(C)) {
+        if (shift->isNonNegative()) {
+          unsigned zext = shift->getZExtValue();
+          if (zext >= L.Width)
+            L.Width = (L.NonNegative ? 0 : 1);
+          else
+            L.Width -= zext;
+        }
       }
 
       return L;
@@ -10352,9 +10349,9 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
       IntRange L = GetExprRange(C, BO->getLHS(), opWidth, InConstantContext);
 
       // If the divisor is constant, use that.
-      llvm::APSInt divisor;
-      if (BO->getRHS()->isIntegerConstantExpr(divisor, C)) {
-        unsigned log2 = divisor.logBase2(); // floor(log_2(divisor))
+      if (Optional<llvm::APSInt> divisor =
+              BO->getRHS()->getIntegerConstantExpr(C)) {
+        unsigned log2 = divisor->logBase2(); // floor(log_2(divisor))
         if (log2 >= L.Width)
           L.Width = (L.NonNegative ? 0 : 1);
         else
@@ -10786,23 +10783,20 @@ static void AnalyzeComparison(Sema &S, BinaryOperator *E) {
   Expr *RHS = E->getRHS();
 
   if (T->isIntegralType(S.Context)) {
-    llvm::APSInt RHSValue;
-    llvm::APSInt LHSValue;
-
-    bool IsRHSIntegralLiteral = RHS->isIntegerConstantExpr(RHSValue, S.Context);
-    bool IsLHSIntegralLiteral = LHS->isIntegerConstantExpr(LHSValue, S.Context);
+    Optional<llvm::APSInt> RHSValue = RHS->getIntegerConstantExpr(S.Context);
+    Optional<llvm::APSInt> LHSValue = LHS->getIntegerConstantExpr(S.Context);
 
     // We don't care about expressions whose result is a constant.
-    if (IsRHSIntegralLiteral && IsLHSIntegralLiteral)
+    if (RHSValue && LHSValue)
       return AnalyzeImpConvsInComparison(S, E);
 
     // We only care about expressions where just one side is literal
-    if (IsRHSIntegralLiteral ^ IsLHSIntegralLiteral) {
+    if ((bool)RHSValue ^ (bool)LHSValue) {
       // Is the constant on the RHS or LHS?
-      const bool RhsConstant = IsRHSIntegralLiteral;
+      const bool RhsConstant = (bool)RHSValue;
       Expr *Const = RhsConstant ? RHS : LHS;
       Expr *Other = RhsConstant ? LHS : RHS;
-      const llvm::APSInt &Value = RhsConstant ? RHSValue : LHSValue;
+      const llvm::APSInt &Value = RhsConstant ? *RHSValue : *LHSValue;
 
       // Check whether an integer constant comparison results in a value
       // of 'true' or 'false'.
@@ -11760,8 +11754,8 @@ static void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
     if (SourcePrecision > 0 && TargetPrecision > 0 &&
         SourcePrecision > TargetPrecision) {
 
-      llvm::APSInt SourceInt;
-      if (E->isIntegerConstantExpr(SourceInt, S.Context)) {
+      if (Optional<llvm::APSInt> SourceInt =
+              E->getIntegerConstantExpr(S.Context)) {
         // If the source integer is a constant, convert it to the target
         // floating point type. Issue a warning if the value changes
         // during the whole conversion.
@@ -11769,11 +11763,11 @@ static void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
             S.Context.getFloatTypeSemantics(QualType(TargetBT, 0)));
         llvm::APFloat::opStatus ConversionStatus =
             TargetFloatValue.convertFromAPInt(
-                SourceInt, SourceBT->isSignedInteger(),
+                *SourceInt, SourceBT->isSignedInteger(),
                 llvm::APFloat::rmNearestTiesToEven);
 
         if (ConversionStatus != llvm::APFloat::opOK) {
-          std::string PrettySourceValue = SourceInt.toString(10);
+          std::string PrettySourceValue = SourceInt->toString(10);
           SmallString<32> PrettyTargetValue;
           TargetFloatValue.toString(PrettyTargetValue, TargetPrecision);
 
@@ -14124,9 +14118,10 @@ namespace {
           return;
         if (Expr *RHS = BinOp->getRHS()) {
           RHS = RHS->IgnoreParenCasts();
-          llvm::APSInt Value;
+          Optional<llvm::APSInt> Value;
           VarWillBeReased =
-            (RHS && RHS->isIntegerConstantExpr(Value, Context) && Value == 0);
+              (RHS && (Value = RHS->getIntegerConstantExpr(Context)) &&
+               *Value == 0);
         }
       }
     }
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index f5e375134c293..dc0f3d68fde38 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -13141,20 +13141,20 @@ void Sema::FinalizeDeclaration(Decl *ThisDecl) {
     if (!MagicValueExpr) {
       continue;
     }
-    llvm::APSInt MagicValueInt;
-    if (!MagicValueExpr->isIntegerConstantExpr(MagicValueInt, Context)) {
+    Optional<llvm::APSInt> MagicValueInt;
+    if (!(MagicValueInt = MagicValueExpr->getIntegerConstantExpr(Context))) {
       Diag(I->getRange().getBegin(),
            diag::err_type_tag_for_datatype_not_ice)
         << LangOpts.CPlusPlus << MagicValueExpr->getSourceRange();
       continue;
     }
-    if (MagicValueInt.getActiveBits() > 64) {
+    if (MagicValueInt->getActiveBits() > 64) {
       Diag(I->getRange().getBegin(),
            diag::err_type_tag_for_datatype_too_large)
         << LangOpts.CPlusPlus << MagicValueExpr->getSourceRange();
       continue;
     }
-    uint64_t MagicValue = MagicValueInt.getZExtValue();
+    uint64_t MagicValue = MagicValueInt->getZExtValue();
     RegisterTypeTagForDatatype(I->getArgumentKind(),
                                MagicValue,
                                I->getMatchingCType(),
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index 1a0594512a606..ece93cbd6a9bd 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -240,9 +240,9 @@ template <typename AttrInfo>
 static bool checkUInt32Argument(Sema &S, const AttrInfo &AI, const Expr *Expr,
                                 uint32_t &Val, unsigned Idx = UINT_MAX,
                                 bool StrictlyUnsigned = false) {
-  llvm::APSInt I(32);
+  Optional<llvm::APSInt> I = llvm::APSInt(32);
   if (Expr->isTypeDependent() || Expr->isValueDependent() ||
-      !Expr->isIntegerConstantExpr(I, S.Context)) {
+      !(I = Expr->getIntegerConstantExpr(S.Context))) {
     if (Idx != UINT_MAX)
       S.Diag(getAttrLoc(AI), diag::err_attribute_argument_n_type)
           << &AI << Idx << AANT_ArgumentIntegerConstant
@@ -253,19 +253,19 @@ static bool checkUInt32Argument(Sema &S, const AttrInfo &AI, const Expr *Expr,
     return false;
   }
 
-  if (!I.isIntN(32)) {
+  if (!I->isIntN(32)) {
     S.Diag(Expr->getExprLoc(), diag::err_ice_too_large)
-        << I.toString(10, false) << 32 << /* Unsigned */ 1;
+        << I->toString(10, false) << 32 << /* Unsigned */ 1;
     return false;
   }
 
-  if (StrictlyUnsigned && I.isSigned() && I.isNegative()) {
+  if (StrictlyUnsigned && I->isSigned() && I->isNegative()) {
     S.Diag(getAttrLoc(AI), diag::err_attribute_requires_positive_integer)
         << &AI << /*non-negative*/ 1;
     return false;
   }
 
-  Val = (uint32_t)I.getZExtValue();
+  Val = (uint32_t)I->getZExtValue();
   return true;
 }
 
@@ -332,16 +332,16 @@ static bool checkFunctionOrMethodParameterIndex(
   unsigned NumParams =
       (HP ? getFunctionOrMethodNumParams(D) : 0) + HasImplicitThisParam;
 
-  llvm::APSInt IdxInt;
+  Optional<llvm::APSInt> IdxInt;
   if (IdxExpr->isTypeDependent() || IdxExpr->isValueDependent() ||
-      !IdxExpr->isIntegerConstantExpr(IdxInt, S.Context)) {
+      !(IdxInt = IdxExpr->getIntegerConstantExpr(S.Context))) {
     S.Diag(getAttrLoc(AI), diag::err_attribute_argument_n_type)
         << &AI << AttrArgNum << AANT_ArgumentIntegerConstant
         << IdxExpr->getSourceRange();
     return false;
   }
 
-  unsigned IdxSource = IdxInt.getLimitedValue(UINT_MAX);
+  unsigned IdxSource = IdxInt->getLimitedValue(UINT_MAX);
   if (IdxSource < 1 || (!IV && IdxSource > NumParams)) {
     S.Diag(getAttrLoc(AI), diag::err_attribute_argument_out_of_bounds)
         << &AI << AttrArgNum << IdxExpr->getSourceRange();
@@ -1605,8 +1605,8 @@ void Sema::AddAssumeAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,
   }
 
   if (!E->isValueDependent()) {
-    llvm::APSInt I(64);
-    if (!E->isIntegerConstantExpr(I, Context)) {
+    Optional<llvm::APSInt> I = llvm::APSInt(64);
+    if (!(I = E->getIntegerConstantExpr(Context))) {
       if (OE)
         Diag(AttrLoc, diag::err_attribute_argument_n_type)
           << &TmpAttr << 1 << AANT_ArgumentIntegerConstant
@@ -1618,27 +1618,22 @@ void Sema::AddAssumeAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,
       return;
     }
 
-    if (!I.isPowerOf2()) {
+    if (!I->isPowerOf2()) {
       Diag(AttrLoc, diag::err_alignment_not_power_of_two)
         << E->getSourceRange();
       return;
     }
 
-    if (I > Sema::MaximumAlignment)
+    if (*I > Sema::MaximumAlignment)
       Diag(CI.getLoc(), diag::warn_assume_aligned_too_great)
           << CI.getRange() << Sema::MaximumAlignment;
   }
 
-  if (OE) {
-    if (!OE->isValueDependent()) {
-      llvm::APSInt I(64);
-      if (!OE->isIntegerConstantExpr(I, Context)) {
-        Diag(AttrLoc, diag::err_attribute_argument_n_type)
-          << &TmpAttr << 2 << AANT_ArgumentIntegerConstant
-          << OE->getSourceRange();
-        return;
-      }
-    }
+  if (OE && !OE->isValueDependent() && !OE->isIntegerConstantExpr(Context)) {
+    Diag(AttrLoc, diag::err_attribute_argument_n_type)
+        << &TmpAttr << 2 << AANT_ArgumentIntegerConstant
+        << OE->getSourceRange();
+    return;
   }
 
   D->addAttr(::new (Context) AssumeAlignedAttr(Context, CI, E, OE));
@@ -2729,36 +2724,36 @@ static void handleSentinelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   unsigned sentinel = (unsigned)SentinelAttr::DefaultSentinel;
   if (AL.getNumArgs() > 0) {
     Expr *E = AL.getArgAsExpr(0);
-    llvm::APSInt Idx(32);
+    Optional<llvm::APSInt> Idx = llvm::APSInt(32);
     if (E->isTypeDependent() || E->isValueDependent() ||
-        !E->isIntegerConstantExpr(Idx, S.Context)) {
+        !(Idx = E->getIntegerConstantExpr(S.Context))) {
       S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
           << AL << 1 << AANT_ArgumentIntegerConstant << E->getSourceRange();
       return;
     }
 
-    if (Idx.isSigned() && Idx.isNegative()) {
+    if (Idx->isSigned() && Idx->isNegative()) {
       S.Diag(AL.getLoc(), diag::err_attribute_sentinel_less_than_zero)
         << E->getSourceRange();
       return;
     }
 
-    sentinel = Idx.getZExtValue();
+    sentinel = Idx->getZExtValue();
   }
 
   unsigned nullPos = (unsigned)SentinelAttr::DefaultNullPos;
   if (AL.getNumArgs() > 1) {
     Expr *E = AL.getArgAsExpr(1);
-    llvm::APSInt Idx(32);
+    Optional<llvm::APSInt> Idx = llvm::APSInt(32);
     if (E->isTypeDependent() || E->isValueDependent() ||
-        !E->isIntegerConstantExpr(Idx, S.Context)) {
+        !(Idx = E->getIntegerConstantExpr(S.Context))) {
       S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
           << AL << 2 << AANT_ArgumentIntegerConstant << E->getSourceRange();
       return;
     }
-    nullPos = Idx.getZExtValue();
+    nullPos = Idx->getZExtValue();
 
-    if ((Idx.isSigned() && Idx.isNegative()) || nullPos > 1) {
+    if ((Idx->isSigned() && Idx->isNegative()) || nullPos > 1) {
       // FIXME: This error message could be improved, it would be nice
       // to say what the bounds actually are.
       S.Diag(AL.getLoc(), diag::err_attribute_sentinel_not_zero_or_one)
@@ -4833,19 +4828,19 @@ static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E,
   if (E->isValueDependent())
     return E;
 
-  llvm::APSInt I(64);
-  if (!E->isIntegerConstantExpr(I, S.Context)) {
+  Optional<llvm::APSInt> I = llvm::APSInt(64);
+  if (!(I = E->getIntegerConstantExpr(S.Context))) {
     S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type)
         << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
     return nullptr;
   }
   // Make sure we can fit it in 32 bits.
-  if (!I.isIntN(32)) {
-    S.Diag(E->getExprLoc(), diag::err_ice_too_large) << I.toString(10, false)
-                                                     << 32 << /* Unsigned */ 1;
+  if (!I->isIntN(32)) {
+    S.Diag(E->getExprLoc(), diag::err_ice_too_large)
+        << I->toString(10, false) << 32 << /* Unsigned */ 1;
     return nullptr;
   }
-  if (I < 0)
+  if (*I < 0)
     S.Diag(E->getExprLoc(), diag::warn_attribute_argument_n_negative)
         << &AL << Idx << E->getSourceRange();
 
@@ -5686,18 +5681,18 @@ static void handleMSP430InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   }
 
   Expr *NumParamsExpr = static_cast<Expr *>(AL.getArgAsExpr(0));
-  llvm::APSInt NumParams(32);
-  if (!NumParamsExpr->isIntegerConstantExpr(NumParams, S.Context)) {
+  Optional<llvm::APSInt> NumParams = llvm::APSInt(32);
+  if (!(NumParams = NumParamsExpr->getIntegerConstantExpr(S.Context))) {
     S.Diag(AL.getLoc(), diag::err_attribute_argument_type)
         << AL << AANT_ArgumentIntegerConstant
         << NumParamsExpr->getSourceRange();
     return;
   }
   // The argument should be in range 0..63.
-  unsigned Num = NumParams.getLimitedValue(255);
+  unsigned Num = NumParams->getLimitedValue(255);
   if (Num > 63) {
     S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
-        << AL << (int)NumParams.getSExtValue()
+        << AL << (int)NumParams->getSExtValue()
         << NumParamsExpr->getSourceRange();
     return;
   }
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index d885920b6c14e..e3aa817c62249 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -2073,29 +2073,29 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal,
     // per CWG1464. Otherwise, if it's not a constant, we must have an
     // unparenthesized array type.
     if (!(*ArraySize)->isValueDependent()) {
-      llvm::APSInt Value;
       // We've already performed any required implicit conversion to integer or
       // unscoped enumeration type.
       // FIXME: Per CWG1464, we are required to check the value prior to
       // converting to size_t. This will never find a negative array size in
       // C++14 onwards, because Value is always unsigned here!
-      if ((*ArraySize)->isIntegerConstantExpr(Value, Context)) {
-        if (Value.isSigned() && Value.isNegative()) {
+      if (Optional<llvm::APSInt> Value =
+              (*ArraySize)->getIntegerConstantExpr(Context)) {
+        if (Value->isSigned() && Value->isNegative()) {
           return ExprError(Diag((*ArraySize)->getBeginLoc(),
                                 diag::err_typecheck_negative_array_size)
                            << (*ArraySize)->getSourceRange());
         }
 
         if (!AllocType->isDependentType()) {
-          unsigned ActiveSizeBits =
-            ConstantArrayType::getNumAddressingBits(Context, AllocType, Value);
+          unsigned ActiveSizeBits = ConstantArrayType::getNumAddressingBits(
+              Context, AllocType, *Value);
           if (ActiveSizeBits > ConstantArrayType::getMaxSizeBits(Context))
             return ExprError(
                 Diag((*ArraySize)->getBeginLoc(), diag::err_array_too_large)
-                << Value.toString(10) << (*ArraySize)->getSourceRange());
+                << Value->toString(10) << (*ArraySize)->getSourceRange());
         }
 
-        KnownArraySize = Value.getZExtValue();
+        KnownArraySize = Value->getZExtValue();
       } else if (TypeIdParens.isValid()) {
         // Can't have dynamic array size when the type-id is in parentheses.
         Diag((*ArraySize)->getBeginLoc(), diag::ext_new_paren_array_nonconst)
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index b27abb54c170f..d1ddf10724172 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -5989,8 +5989,7 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG,
   // Deal with non-constant score and user condition expressions.
   auto HandleNonConstantScoresAndConditions = [this](Expr *&E,
                                                      bool IsScore) -> bool {
-    llvm::APSInt Result;
-    if (!E || E->isIntegerConstantExpr(Result, Context))
+    if (!E || E->isIntegerConstantExpr(Context))
       return false;
 
     if (IsScore) {
@@ -6476,14 +6475,14 @@ bool OpenMPIterationSpaceChecker::setStep(Expr *NewStep, bool Subtract) {
     //  loop. If test-expr is of form b relational-op var and relational-op is
     //  > or >= then incr-expr must cause var to increase on each iteration of
     //  the loop.
-    llvm::APSInt Result;
-    bool IsConstant = NewStep->isIntegerConstantExpr(Result, SemaRef.Context);
+    Optional<llvm::APSInt> Result =
+        NewStep->getIntegerConstantExpr(SemaRef.Context);
     bool IsUnsigned = !NewStep->getType()->hasSignedIntegerRepresentation();
     bool IsConstNeg =
-        IsConstant && Result.isSigned() && (Subtract != Result.isNegative());
+        Result && Result->isSigned() && (Subtract != Result->isNegative());
     bool IsConstPos =
-        IsConstant && Result.isSigned() && (Subtract == Result.isNegative());
-    bool IsConstZero = IsConstant && !Result.getBoolValue();
+        Result && Result->isSigned() && (Subtract == Result->isNegative());
+    bool IsConstZero = Result && !Result->getBoolValue();
 
     // != with increment is treated as <; != with decrement is treated as >
     if (!TestIsLessOp.hasValue())
@@ -7914,9 +7913,9 @@ static ExprResult widenIterationCount(unsigned Bits, Expr *E, Sema &SemaRef) {
 static bool fitsInto(unsigned Bits, bool Signed, const Expr *E, Sema &SemaRef) {
   if (E == nullptr)
     return false;
-  llvm::APSInt Result;
-  if (E->isIntegerConstantExpr(Result, SemaRef.Context))
-    return Signed ? Result.isSignedIntN(Bits) : Result.isIntN(Bits);
+  if (Optional<llvm::APSInt> Result =
+          E->getIntegerConstantExpr(SemaRef.Context))
+    return Signed ? Result->isSignedIntN(Bits) : Result->isIntN(Bits);
   return false;
 }
 
@@ -8189,9 +8188,7 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
 
   // Calculate the last iteration number beforehand instead of doing this on
   // each iteration. Do not do this if the number of iterations may be kfold-ed.
-  llvm::APSInt Result;
-  bool IsConstant =
-      LastIteration.get()->isIntegerConstantExpr(Result, SemaRef.Context);
+  bool IsConstant = LastIteration.get()->isIntegerConstantExpr(SemaRef.Context);
   ExprResult CalcLastIteration;
   if (!IsConstant) {
     ExprResult SaveRef =
@@ -12582,15 +12579,16 @@ isNonNegativeIntegerValue(Expr *&ValExpr, Sema &SemaRef, OpenMPClauseKind CKind,
 
     ValExpr = Value.get();
     // The expression must evaluate to a non-negative integer value.
-    llvm::APSInt Result;
-    if (ValExpr->isIntegerConstantExpr(Result, SemaRef.Context) &&
-        Result.isSigned() &&
-        !((!StrictlyPositive && Result.isNonNegative()) ||
-          (StrictlyPositive && Result.isStrictlyPositive()))) {
-      SemaRef.Diag(Loc, diag::err_omp_negative_expression_in_clause)
-          << getOpenMPClauseName(CKind) << (StrictlyPositive ? 1 : 0)
-          << ValExpr->getSourceRange();
-      return false;
+    if (Optional<llvm::APSInt> Result =
+            ValExpr->getIntegerConstantExpr(SemaRef.Context)) {
+      if (Result->isSigned() &&
+          !((!StrictlyPositive && Result->isNonNegative()) ||
+            (StrictlyPositive && Result->isStrictlyPositive()))) {
+        SemaRef.Diag(Loc, diag::err_omp_negative_expression_in_clause)
+            << getOpenMPClauseName(CKind) << (StrictlyPositive ? 1 : 0)
+            << ValExpr->getSourceRange();
+        return false;
+      }
     }
     if (!BuildCapture)
       return true;
@@ -13215,9 +13213,9 @@ OMPClause *Sema::ActOnOpenMPScheduleClause(
       // OpenMP [2.7.1, Restrictions]
       //  chunk_size must be a loop invariant integer expression with a positive
       //  value.
-      llvm::APSInt Result;
-      if (ValExpr->isIntegerConstantExpr(Result, Context)) {
-        if (Result.isSigned() && !Result.isStrictlyPositive()) {
+      if (Optional<llvm::APSInt> Result =
+              ValExpr->getIntegerConstantExpr(Context)) {
+        if (Result->isSigned() && !Result->isStrictlyPositive()) {
           Diag(ChunkSizeLoc, diag::err_omp_negative_expression_in_clause)
               << "schedule" << 1 << ChunkSize->getSourceRange();
           return nullptr;
@@ -15688,12 +15686,12 @@ OMPClause *Sema::ActOnOpenMPLinearClause(
 
     // Warn about zero linear step (it would be probably better specified as
     // making corresponding variables 'const').
-    llvm::APSInt Result;
-    bool IsConstant = StepExpr->isIntegerConstantExpr(Result, Context);
-    if (IsConstant && !Result.isNegative() && !Result.isStrictlyPositive())
-      Diag(StepLoc, diag::warn_omp_linear_step_zero) << Vars[0]
-                                                     << (Vars.size() > 1);
-    if (!IsConstant && CalcStep.isUsable()) {
+    if (Optional<llvm::APSInt> Result =
+            StepExpr->getIntegerConstantExpr(Context)) {
+      if (!Result->isNegative() && !Result->isStrictlyPositive())
+        Diag(StepLoc, diag::warn_omp_linear_step_zero)
+            << Vars[0] << (Vars.size() > 1);
+    } else if (CalcStep.isUsable()) {
       // Calculate the step beforehand instead of doing this on each iteration.
       // (This is not used if the number of iterations may be kfold-ed).
       CalcStepExpr = CalcStep.get();
@@ -18225,9 +18223,9 @@ OMPClause *Sema::ActOnOpenMPDistScheduleClause(
       // OpenMP [2.7.1, Restrictions]
       //  chunk_size must be a loop invariant integer expression with a positive
       //  value.
-      llvm::APSInt Result;
-      if (ValExpr->isIntegerConstantExpr(Result, Context)) {
-        if (Result.isSigned() && !Result.isStrictlyPositive()) {
+      if (Optional<llvm::APSInt> Result =
+              ValExpr->getIntegerConstantExpr(Context)) {
+        if (Result->isSigned() && !Result->isStrictlyPositive()) {
           Diag(ChunkSizeLoc, diag::err_omp_negative_expression_in_clause)
               << "dist_schedule" << ChunkSize->getSourceRange();
           return nullptr;
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 8635397f48067..7c6acf011d574 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -346,7 +346,6 @@ NarrowingKind StandardConversionSequence::getNarrowingKind(
                ToType->isRealFloatingType()) {
       if (IgnoreFloatToIntegralConversion)
         return NK_Not_Narrowing;
-      llvm::APSInt IntConstantValue;
       const Expr *Initializer = IgnoreNarrowingConversion(Ctx, Converted);
       assert(Initializer && "Unknown conversion expression");
 
@@ -354,19 +353,20 @@ NarrowingKind StandardConversionSequence::getNarrowingKind(
       if (Initializer->isValueDependent())
         return NK_Dependent_Narrowing;
 
-      if (Initializer->isIntegerConstantExpr(IntConstantValue, Ctx)) {
+      if (Optional<llvm::APSInt> IntConstantValue =
+              Initializer->getIntegerConstantExpr(Ctx)) {
         // Convert the integer to the floating type.
         llvm::APFloat Result(Ctx.getFloatTypeSemantics(ToType));
-        Result.convertFromAPInt(IntConstantValue, IntConstantValue.isSigned(),
+        Result.convertFromAPInt(*IntConstantValue, IntConstantValue->isSigned(),
                                 llvm::APFloat::rmNearestTiesToEven);
         // And back.
-        llvm::APSInt ConvertedValue = IntConstantValue;
+        llvm::APSInt ConvertedValue = *IntConstantValue;
         bool ignored;
         Result.convertToInteger(ConvertedValue,
                                 llvm::APFloat::rmTowardZero, &ignored);
         // If the resulting value is different, this was a narrowing conversion.
-        if (IntConstantValue != ConvertedValue) {
-          ConstantValue = APValue(IntConstantValue);
+        if (*IntConstantValue != ConvertedValue) {
+          ConstantValue = APValue(*IntConstantValue);
           ConstantType = Initializer->getType();
           return NK_Constant_Narrowing;
         }
@@ -430,17 +430,18 @@ NarrowingKind StandardConversionSequence::getNarrowingKind(
         (FromWidth == ToWidth && FromSigned != ToSigned) ||
         (FromSigned && !ToSigned)) {
       // Not all values of FromType can be represented in ToType.
-      llvm::APSInt InitializerValue;
       const Expr *Initializer = IgnoreNarrowingConversion(Ctx, Converted);
 
       // If it's value-dependent, we can't tell whether it's narrowing.
       if (Initializer->isValueDependent())
         return NK_Dependent_Narrowing;
 
-      if (!Initializer->isIntegerConstantExpr(InitializerValue, Ctx)) {
+      Optional<llvm::APSInt> OptInitializerValue;
+      if (!(OptInitializerValue = Initializer->getIntegerConstantExpr(Ctx))) {
         // Such conversions on variables are always narrowing.
         return NK_Variable_Narrowing;
       }
+      llvm::APSInt &InitializerValue = *OptInitializerValue;
       bool Narrowing = false;
       if (FromWidth < ToWidth) {
         // Negative -> unsigned is narrowing. Otherwise, more bits is never
@@ -2183,21 +2184,22 @@ bool Sema::IsIntegralPromotion(Expr *From, QualType FromType, QualType ToType) {
   // compatibility.
   if (From) {
     if (FieldDecl *MemberDecl = From->getSourceBitField()) {
-      llvm::APSInt BitWidth;
+      Optional<llvm::APSInt> BitWidth;
       if (FromType->isIntegralType(Context) &&
-          MemberDecl->getBitWidth()->isIntegerConstantExpr(BitWidth, Context)) {
-        llvm::APSInt ToSize(BitWidth.getBitWidth(), BitWidth.isUnsigned());
+          (BitWidth =
+               MemberDecl->getBitWidth()->getIntegerConstantExpr(Context))) {
+        llvm::APSInt ToSize(BitWidth->getBitWidth(), BitWidth->isUnsigned());
         ToSize = Context.getTypeSize(ToType);
 
         // Are we promoting to an int from a bitfield that fits in an int?
-        if (BitWidth < ToSize ||
-            (FromType->isSignedIntegerType() && BitWidth <= ToSize)) {
+        if (*BitWidth < ToSize ||
+            (FromType->isSignedIntegerType() && *BitWidth <= ToSize)) {
           return To->getKind() == BuiltinType::Int;
         }
 
         // Are we promoting to an unsigned int from an unsigned bitfield
         // that fits into an unsigned int?
-        if (FromType->isUnsignedIntegerType() && BitWidth <= ToSize) {
+        if (FromType->isUnsignedIntegerType() && *BitWidth <= ToSize) {
           return To->getKind() == BuiltinType::UInt;
         }
 
diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp
index e9d3c755eb232..c7b97ec4d975a 100644
--- a/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/clang/lib/Sema/SemaStmtAttr.cpp
@@ -335,15 +335,15 @@ static Attr *handleOpenCLUnrollHint(Sema &S, Stmt *St, const ParsedAttr &A,
 
   if (NumArgs == 1) {
     Expr *E = A.getArgAsExpr(0);
-    llvm::APSInt ArgVal(32);
+    Optional<llvm::APSInt> ArgVal;
 
-    if (!E->isIntegerConstantExpr(ArgVal, S.Context)) {
+    if (!(ArgVal = E->getIntegerConstantExpr(S.Context))) {
       S.Diag(A.getLoc(), diag::err_attribute_argument_type)
           << A << AANT_ArgumentIntegerConstant << E->getSourceRange();
       return nullptr;
     }
 
-    int Val = ArgVal.getSExtValue();
+    int Val = ArgVal->getSExtValue();
 
     if (Val <= 0) {
       S.Diag(A.getRange().getBegin(),
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index b8f7f1a581590..13426cbf2db40 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -2476,8 +2476,8 @@ QualType Sema::BuildVectorType(QualType CurType, Expr *SizeExpr,
     return Context.getDependentVectorType(CurType, SizeExpr, AttrLoc,
                                                VectorType::GenericVector);
 
-  llvm::APSInt VecSize(32);
-  if (!SizeExpr->isIntegerConstantExpr(VecSize, Context)) {
+  Optional<llvm::APSInt> VecSize = SizeExpr->getIntegerConstantExpr(Context);
+  if (!VecSize) {
     Diag(AttrLoc, diag::err_attribute_argument_type)
         << "vector_size" << AANT_ArgumentIntegerConstant
         << SizeExpr->getSourceRange();
@@ -2489,13 +2489,13 @@ QualType Sema::BuildVectorType(QualType CurType, Expr *SizeExpr,
                                                VectorType::GenericVector);
 
   // vecSize is specified in bytes - convert to bits.
-  if (!VecSize.isIntN(61)) {
+  if (!VecSize->isIntN(61)) {
     // Bit size will overflow uint64.
     Diag(AttrLoc, diag::err_attribute_size_too_large)
         << SizeExpr->getSourceRange() << "vector";
     return QualType();
   }
-  uint64_t VectorSizeBits = VecSize.getZExtValue() * 8;
+  uint64_t VectorSizeBits = VecSize->getZExtValue() * 8;
   unsigned TypeSize = static_cast<unsigned>(Context.getTypeSize(CurType));
 
   if (VectorSizeBits == 0) {
@@ -2540,8 +2540,8 @@ QualType Sema::BuildExtVectorType(QualType T, Expr *ArraySize,
   }
 
   if (!ArraySize->isTypeDependent() && !ArraySize->isValueDependent()) {
-    llvm::APSInt vecSize(32);
-    if (!ArraySize->isIntegerConstantExpr(vecSize, Context)) {
+    Optional<llvm::APSInt> vecSize = ArraySize->getIntegerConstantExpr(Context);
+    if (!vecSize) {
       Diag(AttrLoc, diag::err_attribute_argument_type)
         << "ext_vector_type" << AANT_ArgumentIntegerConstant
         << ArraySize->getSourceRange();
@@ -2555,7 +2555,7 @@ QualType Sema::BuildExtVectorType(QualType T, Expr *ArraySize,
     }
     // Unlike gcc's vector_size attribute, the size is specified as the
     // number of elements, not the number of bytes.
-    unsigned vectorSize = static_cast<unsigned>(vecSize.getZExtValue());
+    unsigned vectorSize = static_cast<unsigned>(vecSize->getZExtValue());
 
     if (vectorSize == 0) {
       Diag(AttrLoc, diag::err_attribute_zero_size)
@@ -6254,13 +6254,15 @@ static bool BuildAddressSpaceIndex(Sema &S, LangAS &ASIdx,
                                    const Expr *AddrSpace,
                                    SourceLocation AttrLoc) {
   if (!AddrSpace->isValueDependent()) {
-    llvm::APSInt addrSpace(32);
-    if (!AddrSpace->isIntegerConstantExpr(addrSpace, S.Context)) {
+    Optional<llvm::APSInt> OptAddrSpace =
+        AddrSpace->getIntegerConstantExpr(S.Context);
+    if (!OptAddrSpace) {
       S.Diag(AttrLoc, diag::err_attribute_argument_type)
           << "'address_space'" << AANT_ArgumentIntegerConstant
           << AddrSpace->getSourceRange();
       return false;
     }
+    llvm::APSInt &addrSpace = *OptAddrSpace;
 
     // Bounds checking.
     if (addrSpace.isSigned()) {
@@ -7712,9 +7714,9 @@ static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
   }
   // The number of elements must be an ICE.
   Expr *numEltsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
-  llvm::APSInt numEltsInt(32);
+  Optional<llvm::APSInt> numEltsInt;
   if (numEltsExpr->isTypeDependent() || numEltsExpr->isValueDependent() ||
-      !numEltsExpr->isIntegerConstantExpr(numEltsInt, S.Context)) {
+      !(numEltsInt = numEltsExpr->getIntegerConstantExpr(S.Context))) {
     S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
         << Attr << AANT_ArgumentIntegerConstant
         << numEltsExpr->getSourceRange();
@@ -7730,7 +7732,7 @@ static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
 
   // The total size of the vector must be 64 or 128 bits.
   unsigned typeSize = static_cast<unsigned>(S.Context.getTypeSize(CurType));
-  unsigned numElts = static_cast<unsigned>(numEltsInt.getZExtValue());
+  unsigned numElts = static_cast<unsigned>(numEltsInt->getZExtValue());
   unsigned vecSize = typeSize * numElts;
   if (vecSize != 64 && vecSize != 128) {
     S.Diag(Attr.getLoc(), diag::err_attribute_bad_neon_vector_size) << CurType;

From c94332919bd922032e979b3ae3ced5ca5bdf9650 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Sun, 12 Jul 2020 20:29:19 -0700
Subject: [PATCH 063/771] Revert "Rename/refactor isIntegerConstantExpression
 to getIntegerConstantExpression"

Broke buildbots since I hadn't updated this patch in a while. Sorry for
the noise.

This reverts commit 49e5f603d40083dce9c05796e3cde3a185c3beba.
---
 clang/include/clang/AST/Expr.h    |  13 ++--
 clang/lib/AST/ASTContext.cpp      |  16 +++--
 clang/lib/AST/ExprConstant.cpp    |  19 ++---
 clang/lib/AST/MicrosoftMangle.cpp |   6 +-
 clang/lib/CodeGen/CGBuiltin.cpp   | 114 +++++++++++++++++-------------
 clang/lib/CodeGen/CGExpr.cpp      |  16 ++---
 clang/lib/Sema/SemaAttr.cpp       |  12 ++--
 clang/lib/Sema/SemaChecking.cpp   | 107 +++++++++++++++-------------
 clang/lib/Sema/SemaDecl.cpp       |   8 +--
 clang/lib/Sema/SemaDeclAttr.cpp   |  77 ++++++++++----------
 clang/lib/Sema/SemaExprCXX.cpp    |  14 ++--
 clang/lib/Sema/SemaOpenMP.cpp     |  66 ++++++++---------
 clang/lib/Sema/SemaOverload.cpp   |  30 ++++----
 clang/lib/Sema/SemaStmtAttr.cpp   |   6 +-
 clang/lib/Sema/SemaType.cpp       |  26 ++++---
 15 files changed, 275 insertions(+), 255 deletions(-)

diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index a42c7bb5a9f26..66eafaaab715e 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -510,15 +510,16 @@ class Expr : public ValueStmt {
   /// semantically correspond to a bool.
   bool isKnownToHaveBooleanValue(bool Semantic = true) const;
 
-  /// isIntegerConstantExpr - Return the value if this expression is a valid
-  /// integer constant expression.  If not a valid i-c-e, return None and fill
-  /// in Loc (if specified) with the location of the invalid expression.
+  /// isIntegerConstantExpr - Return true if this expression is a valid integer
+  /// constant expression, and, if so, return its value in Result.  If not a
+  /// valid i-c-e, return false and fill in Loc (if specified) with the location
+  /// of the invalid expression.
   ///
   /// Note: This does not perform the implicit conversions required by C++11
   /// [expr.const]p5.
-  Optional<llvm::APSInt> getIntegerConstantExpr(const ASTContext &Ctx,
-                                                SourceLocation *Loc = nullptr,
-                                                bool isEvaluated = true) const;
+  bool isIntegerConstantExpr(llvm::APSInt &Result, const ASTContext &Ctx,
+                             SourceLocation *Loc = nullptr,
+                             bool isEvaluated = true) const;
   bool isIntegerConstantExpr(const ASTContext &Ctx,
                              SourceLocation *Loc = nullptr) const;
 
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 807028885652d..2ba643f12a82f 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -9471,15 +9471,17 @@ QualType ASTContext::mergeTypes(QualType LHS, QualType RHS,
           const ConstantArrayType* CAT)
           -> std::pair<bool,llvm::APInt> {
         if (VAT) {
-          Optional<llvm::APSInt> TheInt;
+          llvm::APSInt TheInt;
           Expr *E = VAT->getSizeExpr();
-          if (E && (TheInt = E->getIntegerConstantExpr(*this)))
-            return std::make_pair(true, *TheInt);
-          return std::make_pair(false, llvm::APSInt());
+          if (E && E->isIntegerConstantExpr(TheInt, *this))
+            return std::make_pair(true, TheInt);
+          else
+            return std::make_pair(false, TheInt);
+        } else if (CAT) {
+            return std::make_pair(true, CAT->getSize());
+        } else {
+            return std::make_pair(false, llvm::APInt());
         }
-        if (CAT)
-          return std::make_pair(true, CAT->getSize());
-        return std::make_pair(false, llvm::APInt());
       };
 
       bool HaveLSize, HaveRSize;
diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index 011dc890496d0..a4dc0ccad1e0f 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -14883,22 +14883,16 @@ bool Expr::isIntegerConstantExpr(const ASTContext &Ctx,
   return true;
 }
 
-Optional<llvm::APSInt> Expr::getIntegerConstantExpr(const ASTContext &Ctx,
-                                                    SourceLocation *Loc,
-                                                    bool isEvaluated) const {
+bool Expr::isIntegerConstantExpr(llvm::APSInt &Value, const ASTContext &Ctx,
+                                 SourceLocation *Loc, bool isEvaluated) const {
   assert(!isValueDependent() &&
          "Expression evaluator can't be called on a dependent expression.");
 
-  APSInt Value;
-
-  if (Ctx.getLangOpts().CPlusPlus11) {
-    if (EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, &Value, Loc))
-      return Value;
-    return None;
-  }
+  if (Ctx.getLangOpts().CPlusPlus11)
+    return EvaluateCPlusPlus11IntegralConstantExpr(Ctx, this, &Value, Loc);
 
   if (!isIntegerConstantExpr(Ctx, Loc))
-    return None;
+    return false;
 
   // The only possible side-effects here are due to UB discovered in the
   // evaluation (for instance, INT_MAX + 1). In such a case, we are still
@@ -14912,7 +14906,8 @@ Optional<llvm::APSInt> Expr::getIntegerConstantExpr(const ASTContext &Ctx,
   if (!::EvaluateAsInt(this, ExprResult, Ctx, SE_AllowSideEffects, Info))
     llvm_unreachable("ICE cannot be evaluated!");
 
-  return ExprResult.Val.getInt();
+  Value = ExprResult.Val.getInt();
+  return true;
 }
 
 bool Expr::isCXX98IntegralConstantExpr(const ASTContext &Ctx) const {
diff --git a/clang/lib/AST/MicrosoftMangle.cpp b/clang/lib/AST/MicrosoftMangle.cpp
index 09579c28061ae..529f301e46964 100644
--- a/clang/lib/AST/MicrosoftMangle.cpp
+++ b/clang/lib/AST/MicrosoftMangle.cpp
@@ -1372,9 +1372,9 @@ void MicrosoftCXXNameMangler::mangleIntegerLiteral(const llvm::APSInt &Value,
 
 void MicrosoftCXXNameMangler::mangleExpression(const Expr *E) {
   // See if this is a constant expression.
-  if (Optional<llvm::APSInt> Value =
-          E->getIntegerConstantExpr(Context.getASTContext())) {
-    mangleIntegerLiteral(*Value, E->getType()->isBooleanType());
+  llvm::APSInt Value;
+  if (E->isIntegerConstantExpr(Value, Context.getASTContext())) {
+    mangleIntegerLiteral(Value, E->getType()->isBooleanType());
     return;
   }
 
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 3588e33714d2f..35a93a7889f40 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -4419,9 +4419,11 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
       } else {
         // If this is required to be a constant, constant fold it so that we
         // know that the generated intrinsic gets a ConstantInt.
-        ArgValue = llvm::ConstantInt::get(
-            getLLVMContext(),
-            *E->getArg(i)->getIntegerConstantExpr(getContext()));
+        llvm::APSInt Result;
+        bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
+        assert(IsConst && "Constant arg isn't actually constant?");
+        (void)IsConst;
+        ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
       }
 
       // If the intrinsic arg type is different from the builtin arg type
@@ -5594,14 +5596,13 @@ Value *CodeGenFunction::EmitCommonNeonBuiltinExpr(
     SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
     llvm::Triple::ArchType Arch) {
   // Get the last argument, which specifies the vector type.
+  llvm::APSInt NeonTypeConst;
   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
-  Optional<llvm::APSInt> NeonTypeConst =
-      Arg->getIntegerConstantExpr(getContext());
-  if (!NeonTypeConst)
+  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
     return nullptr;
 
   // Determine the type of this overloaded NEON intrinsic.
-  NeonTypeFlags Type(NeonTypeConst->getZExtValue());
+  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
   bool Usgn = Type.isUnsigned();
   bool Quad = Type.isQuad();
   const bool HasLegalHalfType = getTarget().hasLegalHalfType();
@@ -6884,9 +6885,10 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
     } else {
       // If this is required to be a constant, constant fold it so that we know
       // that the generated intrinsic gets a ConstantInt.
-      Ops.push_back(llvm::ConstantInt::get(
-          getLLVMContext(),
-          *E->getArg(i)->getIntegerConstantExpr(getContext())));
+      llvm::APSInt Result;
+      bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
+      assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
+      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
     }
   }
 
@@ -7097,9 +7099,9 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
 
   // Get the last argument, which specifies the vector type.
   assert(HasExtraArg);
+  llvm::APSInt Result;
   const Expr *Arg = E->getArg(E->getNumArgs()-1);
-  Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext());
-  if (!Result)
+  if (!Arg->isIntegerConstantExpr(Result, getContext()))
     return nullptr;
 
   if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
@@ -7112,7 +7114,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       Ty = DoubleTy;
 
     // Determine whether this is an unsigned conversion or not.
-    bool usgn = Result->getZExtValue() == 1;
+    bool usgn = Result.getZExtValue() == 1;
     unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
 
     // Call the appropriate intrinsic.
@@ -7121,7 +7123,7 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
   }
 
   // Determine the type of this overloaded NEON intrinsic.
-  NeonTypeFlags Type = Result->getZExtValue();
+  NeonTypeFlags Type(Result.getZExtValue());
   bool usgn = Type.isUnsigned();
   bool rightShift = false;
 
@@ -7265,7 +7267,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
 
 template<typename Integer>
 static Integer GetIntegerConstantValue(const Expr *E, ASTContext &Context) {
-  return E->getIntegerConstantExpr(Context)->getExtValue();
+  llvm::APSInt IntVal;
+  bool IsConst = E->isIntegerConstantExpr(IntVal, Context);
+  assert(IsConst && "Sema should have checked this was a constant");
+  (void)IsConst;
+  return IntVal.getExtValue();
 }
 
 static llvm::Value *SignOrZeroExtend(CGBuilderTy &Builder, llvm::Value *V,
@@ -7538,13 +7544,13 @@ static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID
   assert(E->getNumArgs() >= 3);
 
   // Get the last argument, which specifies the vector type.
+  llvm::APSInt Result;
   const Expr *Arg = E->getArg(E->getNumArgs() - 1);
-  Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(CGF.getContext());
-  if (!Result)
+  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
     return nullptr;
 
   // Determine the type of this overloaded NEON intrinsic.
-  NeonTypeFlags Type = Result->getZExtValue();
+  NeonTypeFlags Type(Result.getZExtValue());
   llvm::VectorType *Ty = GetNeonType(&CGF, Type);
   if (!Ty)
     return nullptr;
@@ -8930,9 +8936,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     } else {
       // If this is required to be a constant, constant fold it so that we know
       // that the generated intrinsic gets a ConstantInt.
-      Ops.push_back(llvm::ConstantInt::get(
-          getLLVMContext(),
-          *E->getArg(i)->getIntegerConstantExpr(getContext())));
+      llvm::APSInt Result;
+      bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
+      assert(IsConst && "Constant arg isn't actually constant?");
+      (void)IsConst;
+      Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
     }
   }
 
@@ -8947,11 +8955,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
     return Result;
   }
 
+  llvm::APSInt Result;
   const Expr *Arg = E->getArg(E->getNumArgs()-1);
   NeonTypeFlags Type(0);
-  if (Optional<llvm::APSInt> Result = Arg->getIntegerConstantExpr(getContext()))
+  if (Arg->isIntegerConstantExpr(Result, getContext()))
     // Determine the type of this overloaded NEON intrinsic.
-    Type = NeonTypeFlags(Result->getZExtValue());
+    Type = NeonTypeFlags(Result.getZExtValue());
 
   bool usgn = Type.isUnsigned();
   bool quad = Type.isQuad();
@@ -11782,8 +11791,10 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
 
     // If this is required to be a constant, constant fold it so that we know
     // that the generated intrinsic gets a ConstantInt.
-    Ops.push_back(llvm::ConstantInt::get(
-        getLLVMContext(), *E->getArg(i)->getIntegerConstantExpr(getContext())));
+    llvm::APSInt Result;
+    bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
+    assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
+    Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
   }
 
   // These exist so that the builtin that takes an immediate can be bounds
@@ -15062,8 +15073,11 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     llvm::Type *ResultType = ConvertType(E->getType());
     Value *X = EmitScalarExpr(E->getArg(0));
     // Constant-fold the M4 and M5 mask arguments.
-    llvm::APSInt M4 = *E->getArg(1)->getIntegerConstantExpr(getContext());
-    llvm::APSInt M5 = *E->getArg(2)->getIntegerConstantExpr(getContext());
+    llvm::APSInt M4, M5;
+    bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
+    bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
+    assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
+    (void)IsConstM4; (void)IsConstM5;
     // Check whether this instance can be represented via a LLVM standard
     // intrinsic.  We only support some combinations of M4 and M5.
     Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -15118,7 +15132,10 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     Value *X = EmitScalarExpr(E->getArg(0));
     Value *Y = EmitScalarExpr(E->getArg(1));
     // Constant-fold the M4 mask argument.
-    llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
+    llvm::APSInt M4;
+    bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
+    assert(IsConstM4 && "Constant arg isn't actually constant?");
+    (void)IsConstM4;
     // Check whether this instance can be represented via a LLVM standard
     // intrinsic.  We only support some values of M4.
     Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -15152,7 +15169,10 @@ Value *CodeGenFunction::EmitSystemZBuiltinExpr(unsigned BuiltinID,
     Value *X = EmitScalarExpr(E->getArg(0));
     Value *Y = EmitScalarExpr(E->getArg(1));
     // Constant-fold the M4 mask argument.
-    llvm::APSInt M4 = *E->getArg(2)->getIntegerConstantExpr(getContext());
+    llvm::APSInt M4;
+    bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
+    assert(IsConstM4 && "Constant arg isn't actually constant?");
+    (void)IsConstM4;
     // Check whether this instance can be represented via a LLVM standard
     // intrinsic.  We only support some values of M4.
     Intrinsic::ID ID = Intrinsic::not_intrinsic;
@@ -15819,11 +15839,10 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
     Address Dst = EmitPointerWithAlignment(E->getArg(0));
     Value *Src = EmitScalarExpr(E->getArg(1));
     Value *Ldm = EmitScalarExpr(E->getArg(2));
-    Optional<llvm::APSInt> isColMajorArg =
-        E->getArg(3)->getIntegerConstantExpr(getContext());
-    if (!isColMajorArg)
+    llvm::APSInt isColMajorArg;
+    if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
       return nullptr;
-    bool isColMajor = isColMajorArg->getSExtValue();
+    bool isColMajor = isColMajorArg.getSExtValue();
     NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
     unsigned IID = isColMajor ? II.IID_col : II.IID_row;
     if (IID == 0)
@@ -15864,11 +15883,10 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
     Value *Dst = EmitScalarExpr(E->getArg(0));
     Address Src = EmitPointerWithAlignment(E->getArg(1));
     Value *Ldm = EmitScalarExpr(E->getArg(2));
-    Optional<llvm::APSInt> isColMajorArg =
-        E->getArg(3)->getIntegerConstantExpr(getContext());
-    if (!isColMajorArg)
+    llvm::APSInt isColMajorArg;
+    if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
       return nullptr;
-    bool isColMajor = isColMajorArg->getSExtValue();
+    bool isColMajor = isColMajorArg.getSExtValue();
     NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
     unsigned IID = isColMajor ? II.IID_col : II.IID_row;
     if (IID == 0)
@@ -15915,20 +15933,16 @@ CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
     Address SrcA = EmitPointerWithAlignment(E->getArg(1));
     Address SrcB = EmitPointerWithAlignment(E->getArg(2));
     Address SrcC = EmitPointerWithAlignment(E->getArg(3));
-    Optional<llvm::APSInt> LayoutArg =
-        E->getArg(4)->getIntegerConstantExpr(getContext());
-    if (!LayoutArg)
+    llvm::APSInt LayoutArg;
+    if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
       return nullptr;
-    int Layout = LayoutArg->getSExtValue();
+    int Layout = LayoutArg.getSExtValue();
     if (Layout < 0 || Layout > 3)
       return nullptr;
     llvm::APSInt SatfArg;
     if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1)
       SatfArg = 0;  // .b1 does not have satf argument.
-    else if (Optional<llvm::APSInt> OptSatfArg =
-                 E->getArg(5)->getIntegerConstantExpr(getContext()))
-      SatfArg = *OptSatfArg;
-    else
+    else if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
       return nullptr;
     bool Satf = SatfArg.getSExtValue();
     NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
@@ -16257,8 +16271,9 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
   case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
   case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
   case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
-    llvm::APSInt LaneConst =
-        *E->getArg(1)->getIntegerConstantExpr(getContext());
+    llvm::APSInt LaneConst;
+    if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
+      llvm_unreachable("Constant arg isn't actually constant?");
     Value *Vec = EmitScalarExpr(E->getArg(0));
     Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
     Value *Extract = Builder.CreateExtractElement(Vec, Lane);
@@ -16284,8 +16299,9 @@ Value *CodeGenFunction::EmitWebAssemblyBuiltinExpr(unsigned BuiltinID,
   case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
   case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
   case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
-    llvm::APSInt LaneConst =
-        *E->getArg(1)->getIntegerConstantExpr(getContext());
+    llvm::APSInt LaneConst;
+    if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
+      llvm_unreachable("Constant arg isn't actually constant?");
     Value *Vec = EmitScalarExpr(E->getArg(0));
     Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
     Value *Val = EmitScalarExpr(E->getArg(2));
diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp
index ab29e32929ceb..9e8770573d701 100644
--- a/clang/lib/CodeGen/CGExpr.cpp
+++ b/clang/lib/CodeGen/CGExpr.cpp
@@ -3868,17 +3868,15 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
     llvm::APSInt ConstLength;
     if (Length) {
       // Idx = LowerBound + Length - 1;
-      if (Optional<llvm::APSInt> CL = Length->getIntegerConstantExpr(C)) {
-        ConstLength = CL->zextOrTrunc(PointerWidthInBits);
+      if (Length->isIntegerConstantExpr(ConstLength, C)) {
+        ConstLength = ConstLength.zextOrTrunc(PointerWidthInBits);
         Length = nullptr;
       }
       auto *LowerBound = E->getLowerBound();
       llvm::APSInt ConstLowerBound(PointerWidthInBits, /*isUnsigned=*/false);
-      if (LowerBound) {
-        if (Optional<llvm::APSInt> LB = LowerBound->getIntegerConstantExpr(C)) {
-          ConstLowerBound = LB->zextOrTrunc(PointerWidthInBits);
-          LowerBound = nullptr;
-        }
+      if (LowerBound && LowerBound->isIntegerConstantExpr(ConstLowerBound, C)) {
+        ConstLowerBound = ConstLowerBound.zextOrTrunc(PointerWidthInBits);
+        LowerBound = nullptr;
       }
       if (!Length)
         --ConstLength;
@@ -3915,10 +3913,8 @@ LValue CodeGenFunction::EmitOMPArraySectionExpr(const OMPArraySectionExpr *E,
                              : BaseTy;
       if (auto *VAT = C.getAsVariableArrayType(ArrayTy)) {
         Length = VAT->getSizeExpr();
-        if (Optional<llvm::APSInt> L = Length->getIntegerConstantExpr(C)) {
-          ConstLength = *L;
+        if (Length->isIntegerConstantExpr(ConstLength, C))
           Length = nullptr;
-        }
       } else {
         auto *CAT = C.getAsConstantArrayType(ArrayTy);
         ConstLength = CAT->getSize();
diff --git a/clang/lib/Sema/SemaAttr.cpp b/clang/lib/Sema/SemaAttr.cpp
index f9785e4bea5e2..b354e810974c4 100644
--- a/clang/lib/Sema/SemaAttr.cpp
+++ b/clang/lib/Sema/SemaAttr.cpp
@@ -300,18 +300,20 @@ void Sema::ActOnPragmaPack(SourceLocation PragmaLoc, PragmaMsStackAction Action,
   // If specified then alignment must be a "small" power of two.
   unsigned AlignmentVal = 0;
   if (Alignment) {
-    Optional<llvm::APSInt> Val;
+    llvm::APSInt Val;
 
     // pack(0) is like pack(), which just works out since that is what
     // we use 0 for in PackAttr.
-    if (Alignment->isTypeDependent() || Alignment->isValueDependent() ||
-        !(Val = Alignment->getIntegerConstantExpr(Context)) ||
-        !(*Val == 0 || Val->isPowerOf2()) || Val->getZExtValue() > 16) {
+    if (Alignment->isTypeDependent() ||
+        Alignment->isValueDependent() ||
+        !Alignment->isIntegerConstantExpr(Val, Context) ||
+        !(Val == 0 || Val.isPowerOf2()) ||
+        Val.getZExtValue() > 16) {
       Diag(PragmaLoc, diag::warn_pragma_pack_invalid_alignment);
       return; // Ignore
     }
 
-    AlignmentVal = (unsigned)Val->getZExtValue();
+    AlignmentVal = (unsigned) Val.getZExtValue();
   }
   if (Action == Sema::PSK_Show) {
     // Show the current alignment, making sure to show the right value
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index c501c706a97bc..efaf36a693061 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -2284,7 +2284,10 @@ bool Sema::CheckARMCoprocessorImmediate(const TargetInfo &TI,
   if (CoprocArg->isTypeDependent() || CoprocArg->isValueDependent())
     return false;
 
-  llvm::APSInt CoprocNoAP = *CoprocArg->getIntegerConstantExpr(Context);
+  llvm::APSInt CoprocNoAP;
+  bool IsICE = CoprocArg->isIntegerConstantExpr(CoprocNoAP, Context);
+  (void)IsICE;
+  assert(IsICE && "Coprocossor immediate is not a constant expression");
   int64_t CoprocNo = CoprocNoAP.getExtValue();
   assert(CoprocNo >= 0 && "Coprocessor immediate must be non-negative");
 
@@ -2596,7 +2599,8 @@ bool Sema::CheckBPFBuiltinFunctionCall(unsigned BuiltinID,
 
   // The second argument needs to be a constant int
   Arg = TheCall->getArg(1);
-  if (!Arg->isIntegerConstantExpr(Context)) {
+  llvm::APSInt Value;
+  if (!Arg->isIntegerConstantExpr(Value, Context)) {
     Diag(Arg->getBeginLoc(), diag::err_preserve_field_info_not_const)
         << 2 << Arg->getSourceRange();
     return true;
@@ -3194,10 +3198,11 @@ bool Sema::CheckSystemZBuiltinFunctionCall(unsigned BuiltinID,
                                            CallExpr *TheCall) {
   if (BuiltinID == SystemZ::BI__builtin_tabort) {
     Expr *Arg = TheCall->getArg(0);
-    if (Optional<llvm::APSInt> AbortCode = Arg->getIntegerConstantExpr(Context))
-      if (AbortCode->getSExtValue() >= 0 && AbortCode->getSExtValue() < 256)
-        return Diag(Arg->getBeginLoc(), diag::err_systemz_invalid_tabort_code)
-               << Arg->getSourceRange();
+    llvm::APSInt AbortCode(32);
+    if (Arg->isIntegerConstantExpr(AbortCode, Context) &&
+        AbortCode.getSExtValue() >= 0 && AbortCode.getSExtValue() < 256)
+      return Diag(Arg->getBeginLoc(), diag::err_systemz_invalid_tabort_code)
+             << Arg->getSourceRange();
   }
 
   // For intrinsics which take an immediate value as part of the instruction,
@@ -4918,21 +4923,21 @@ ExprResult Sema::BuildAtomicExpr(SourceRange CallRange, SourceRange ExprRange,
   }
 
   if (SubExprs.size() >= 2 && Form != Init) {
-    if (Optional<llvm::APSInt> Result =
-            SubExprs[1]->getIntegerConstantExpr(Context))
-      if (!isValidOrderingForOp(Result->getSExtValue(), Op))
-        Diag(SubExprs[1]->getBeginLoc(),
-             diag::warn_atomic_op_has_invalid_memory_order)
-            << SubExprs[1]->getSourceRange();
+    llvm::APSInt Result(32);
+    if (SubExprs[1]->isIntegerConstantExpr(Result, Context) &&
+        !isValidOrderingForOp(Result.getSExtValue(), Op))
+      Diag(SubExprs[1]->getBeginLoc(),
+           diag::warn_atomic_op_has_invalid_memory_order)
+          << SubExprs[1]->getSourceRange();
   }
 
   if (auto ScopeModel = AtomicExpr::getScopeModel(Op)) {
     auto *Scope = Args[Args.size() - 1];
-    if (Optional<llvm::APSInt> Result =
-            Scope->getIntegerConstantExpr(Context)) {
-      if (!ScopeModel->isValid(Result->getZExtValue()))
-        Diag(Scope->getBeginLoc(), diag::err_atomic_op_has_invalid_synch_scope)
-            << Scope->getSourceRange();
+    llvm::APSInt Result(32);
+    if (Scope->isIntegerConstantExpr(Result, Context) &&
+        !ScopeModel->isValid(Result.getZExtValue())) {
+      Diag(Scope->getBeginLoc(), diag::err_atomic_op_has_invalid_synch_scope)
+          << Scope->getSourceRange();
     }
     SubExprs.push_back(Scope);
   }
@@ -5800,7 +5805,8 @@ bool Sema::SemaBuiltinVSX(CallExpr *TheCall) {
            << TheCall->getSourceRange();
 
   // Check the third argument is a compile time constant
-  if (!TheCall->getArg(2)->isIntegerConstantExpr(Context))
+  llvm::APSInt Value;
+  if(!TheCall->getArg(2)->isIntegerConstantExpr(Value, Context))
     return Diag(TheCall->getBeginLoc(),
                 diag::err_vsx_builtin_nonconstant_argument)
            << 3 /* argument index */ << TheCall->getDirectCallee()
@@ -5895,18 +5901,17 @@ ExprResult Sema::SemaBuiltinShuffleVector(CallExpr *TheCall) {
         TheCall->getArg(i)->isValueDependent())
       continue;
 
-    Optional<llvm::APSInt> Result;
-    if (!(Result = TheCall->getArg(i)->getIntegerConstantExpr(Context)))
+    llvm::APSInt Result(32);
+    if (!TheCall->getArg(i)->isIntegerConstantExpr(Result, Context))
       return ExprError(Diag(TheCall->getBeginLoc(),
                             diag::err_shufflevector_nonconstant_argument)
                        << TheCall->getArg(i)->getSourceRange());
 
     // Allow -1 which will be translated to undef in the IR.
-    if (Result->isSigned() && Result->isAllOnesValue())
+    if (Result.isSigned() && Result.isAllOnesValue())
       continue;
 
-    if (Result->getActiveBits() > 64 ||
-        Result->getZExtValue() >= numElements * 2)
+    if (Result.getActiveBits() > 64 || Result.getZExtValue() >= numElements*2)
       return ExprError(Diag(TheCall->getBeginLoc(),
                             diag::err_shufflevector_argument_too_large)
                        << TheCall->getArg(i)->getSourceRange());
@@ -6153,11 +6158,10 @@ bool Sema::SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum,
 
   if (Arg->isTypeDependent() || Arg->isValueDependent()) return false;
 
-  Optional<llvm::APSInt> R;
-  if (!(R = Arg->getIntegerConstantExpr(Context)))
+  if (!Arg->isIntegerConstantExpr(Result, Context))
     return Diag(TheCall->getBeginLoc(), diag::err_constant_integer_arg_type)
            << FDecl->getDeclName() << Arg->getSourceRange();
-  Result = *R;
+
   return false;
 }
 
@@ -10317,15 +10321,14 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
 
       // If the shift amount is a positive constant, drop the width by
       // that much.
-      if (Optional<llvm::APSInt> shift =
-              BO->getRHS()->getIntegerConstantExpr(C)) {
-        if (shift->isNonNegative()) {
-          unsigned zext = shift->getZExtValue();
-          if (zext >= L.Width)
-            L.Width = (L.NonNegative ? 0 : 1);
-          else
-            L.Width -= zext;
-        }
+      llvm::APSInt shift;
+      if (BO->getRHS()->isIntegerConstantExpr(shift, C) &&
+          shift.isNonNegative()) {
+        unsigned zext = shift.getZExtValue();
+        if (zext >= L.Width)
+          L.Width = (L.NonNegative ? 0 : 1);
+        else
+          L.Width -= zext;
       }
 
       return L;
@@ -10349,9 +10352,9 @@ static IntRange GetExprRange(ASTContext &C, const Expr *E, unsigned MaxWidth,
       IntRange L = GetExprRange(C, BO->getLHS(), opWidth, InConstantContext);
 
       // If the divisor is constant, use that.
-      if (Optional<llvm::APSInt> divisor =
-              BO->getRHS()->getIntegerConstantExpr(C)) {
-        unsigned log2 = divisor->logBase2(); // floor(log_2(divisor))
+      llvm::APSInt divisor;
+      if (BO->getRHS()->isIntegerConstantExpr(divisor, C)) {
+        unsigned log2 = divisor.logBase2(); // floor(log_2(divisor))
         if (log2 >= L.Width)
           L.Width = (L.NonNegative ? 0 : 1);
         else
@@ -10783,20 +10786,23 @@ static void AnalyzeComparison(Sema &S, BinaryOperator *E) {
   Expr *RHS = E->getRHS();
 
   if (T->isIntegralType(S.Context)) {
-    Optional<llvm::APSInt> RHSValue = RHS->getIntegerConstantExpr(S.Context);
-    Optional<llvm::APSInt> LHSValue = LHS->getIntegerConstantExpr(S.Context);
+    llvm::APSInt RHSValue;
+    llvm::APSInt LHSValue;
+
+    bool IsRHSIntegralLiteral = RHS->isIntegerConstantExpr(RHSValue, S.Context);
+    bool IsLHSIntegralLiteral = LHS->isIntegerConstantExpr(LHSValue, S.Context);
 
     // We don't care about expressions whose result is a constant.
-    if (RHSValue && LHSValue)
+    if (IsRHSIntegralLiteral && IsLHSIntegralLiteral)
       return AnalyzeImpConvsInComparison(S, E);
 
     // We only care about expressions where just one side is literal
-    if ((bool)RHSValue ^ (bool)LHSValue) {
+    if (IsRHSIntegralLiteral ^ IsLHSIntegralLiteral) {
       // Is the constant on the RHS or LHS?
-      const bool RhsConstant = (bool)RHSValue;
+      const bool RhsConstant = IsRHSIntegralLiteral;
       Expr *Const = RhsConstant ? RHS : LHS;
       Expr *Other = RhsConstant ? LHS : RHS;
-      const llvm::APSInt &Value = RhsConstant ? *RHSValue : *LHSValue;
+      const llvm::APSInt &Value = RhsConstant ? RHSValue : LHSValue;
 
       // Check whether an integer constant comparison results in a value
       // of 'true' or 'false'.
@@ -11754,8 +11760,8 @@ static void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
     if (SourcePrecision > 0 && TargetPrecision > 0 &&
         SourcePrecision > TargetPrecision) {
 
-      if (Optional<llvm::APSInt> SourceInt =
-              E->getIntegerConstantExpr(S.Context)) {
+      llvm::APSInt SourceInt;
+      if (E->isIntegerConstantExpr(SourceInt, S.Context)) {
         // If the source integer is a constant, convert it to the target
         // floating point type. Issue a warning if the value changes
         // during the whole conversion.
@@ -11763,11 +11769,11 @@ static void CheckImplicitConversion(Sema &S, Expr *E, QualType T,
             S.Context.getFloatTypeSemantics(QualType(TargetBT, 0)));
         llvm::APFloat::opStatus ConversionStatus =
             TargetFloatValue.convertFromAPInt(
-                *SourceInt, SourceBT->isSignedInteger(),
+                SourceInt, SourceBT->isSignedInteger(),
                 llvm::APFloat::rmNearestTiesToEven);
 
         if (ConversionStatus != llvm::APFloat::opOK) {
-          std::string PrettySourceValue = SourceInt->toString(10);
+          std::string PrettySourceValue = SourceInt.toString(10);
           SmallString<32> PrettyTargetValue;
           TargetFloatValue.toString(PrettyTargetValue, TargetPrecision);
 
@@ -14118,10 +14124,9 @@ namespace {
           return;
         if (Expr *RHS = BinOp->getRHS()) {
           RHS = RHS->IgnoreParenCasts();
-          Optional<llvm::APSInt> Value;
+          llvm::APSInt Value;
           VarWillBeReased =
-              (RHS && (Value = RHS->getIntegerConstantExpr(Context)) &&
-               *Value == 0);
+            (RHS && RHS->isIntegerConstantExpr(Value, Context) && Value == 0);
         }
       }
     }
diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index dc0f3d68fde38..f5e375134c293 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -13141,20 +13141,20 @@ void Sema::FinalizeDeclaration(Decl *ThisDecl) {
     if (!MagicValueExpr) {
       continue;
     }
-    Optional<llvm::APSInt> MagicValueInt;
-    if (!(MagicValueInt = MagicValueExpr->getIntegerConstantExpr(Context))) {
+    llvm::APSInt MagicValueInt;
+    if (!MagicValueExpr->isIntegerConstantExpr(MagicValueInt, Context)) {
       Diag(I->getRange().getBegin(),
            diag::err_type_tag_for_datatype_not_ice)
         << LangOpts.CPlusPlus << MagicValueExpr->getSourceRange();
       continue;
     }
-    if (MagicValueInt->getActiveBits() > 64) {
+    if (MagicValueInt.getActiveBits() > 64) {
       Diag(I->getRange().getBegin(),
            diag::err_type_tag_for_datatype_too_large)
         << LangOpts.CPlusPlus << MagicValueExpr->getSourceRange();
       continue;
     }
-    uint64_t MagicValue = MagicValueInt->getZExtValue();
+    uint64_t MagicValue = MagicValueInt.getZExtValue();
     RegisterTypeTagForDatatype(I->getArgumentKind(),
                                MagicValue,
                                I->getMatchingCType(),
diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp
index ece93cbd6a9bd..1a0594512a606 100644
--- a/clang/lib/Sema/SemaDeclAttr.cpp
+++ b/clang/lib/Sema/SemaDeclAttr.cpp
@@ -240,9 +240,9 @@ template <typename AttrInfo>
 static bool checkUInt32Argument(Sema &S, const AttrInfo &AI, const Expr *Expr,
                                 uint32_t &Val, unsigned Idx = UINT_MAX,
                                 bool StrictlyUnsigned = false) {
-  Optional<llvm::APSInt> I = llvm::APSInt(32);
+  llvm::APSInt I(32);
   if (Expr->isTypeDependent() || Expr->isValueDependent() ||
-      !(I = Expr->getIntegerConstantExpr(S.Context))) {
+      !Expr->isIntegerConstantExpr(I, S.Context)) {
     if (Idx != UINT_MAX)
       S.Diag(getAttrLoc(AI), diag::err_attribute_argument_n_type)
           << &AI << Idx << AANT_ArgumentIntegerConstant
@@ -253,19 +253,19 @@ static bool checkUInt32Argument(Sema &S, const AttrInfo &AI, const Expr *Expr,
     return false;
   }
 
-  if (!I->isIntN(32)) {
+  if (!I.isIntN(32)) {
     S.Diag(Expr->getExprLoc(), diag::err_ice_too_large)
-        << I->toString(10, false) << 32 << /* Unsigned */ 1;
+        << I.toString(10, false) << 32 << /* Unsigned */ 1;
     return false;
   }
 
-  if (StrictlyUnsigned && I->isSigned() && I->isNegative()) {
+  if (StrictlyUnsigned && I.isSigned() && I.isNegative()) {
     S.Diag(getAttrLoc(AI), diag::err_attribute_requires_positive_integer)
         << &AI << /*non-negative*/ 1;
     return false;
   }
 
-  Val = (uint32_t)I->getZExtValue();
+  Val = (uint32_t)I.getZExtValue();
   return true;
 }
 
@@ -332,16 +332,16 @@ static bool checkFunctionOrMethodParameterIndex(
   unsigned NumParams =
       (HP ? getFunctionOrMethodNumParams(D) : 0) + HasImplicitThisParam;
 
-  Optional<llvm::APSInt> IdxInt;
+  llvm::APSInt IdxInt;
   if (IdxExpr->isTypeDependent() || IdxExpr->isValueDependent() ||
-      !(IdxInt = IdxExpr->getIntegerConstantExpr(S.Context))) {
+      !IdxExpr->isIntegerConstantExpr(IdxInt, S.Context)) {
     S.Diag(getAttrLoc(AI), diag::err_attribute_argument_n_type)
         << &AI << AttrArgNum << AANT_ArgumentIntegerConstant
         << IdxExpr->getSourceRange();
     return false;
   }
 
-  unsigned IdxSource = IdxInt->getLimitedValue(UINT_MAX);
+  unsigned IdxSource = IdxInt.getLimitedValue(UINT_MAX);
   if (IdxSource < 1 || (!IV && IdxSource > NumParams)) {
     S.Diag(getAttrLoc(AI), diag::err_attribute_argument_out_of_bounds)
         << &AI << AttrArgNum << IdxExpr->getSourceRange();
@@ -1605,8 +1605,8 @@ void Sema::AddAssumeAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,
   }
 
   if (!E->isValueDependent()) {
-    Optional<llvm::APSInt> I = llvm::APSInt(64);
-    if (!(I = E->getIntegerConstantExpr(Context))) {
+    llvm::APSInt I(64);
+    if (!E->isIntegerConstantExpr(I, Context)) {
       if (OE)
         Diag(AttrLoc, diag::err_attribute_argument_n_type)
           << &TmpAttr << 1 << AANT_ArgumentIntegerConstant
@@ -1618,22 +1618,27 @@ void Sema::AddAssumeAlignedAttr(Decl *D, const AttributeCommonInfo &CI, Expr *E,
       return;
     }
 
-    if (!I->isPowerOf2()) {
+    if (!I.isPowerOf2()) {
       Diag(AttrLoc, diag::err_alignment_not_power_of_two)
         << E->getSourceRange();
       return;
     }
 
-    if (*I > Sema::MaximumAlignment)
+    if (I > Sema::MaximumAlignment)
       Diag(CI.getLoc(), diag::warn_assume_aligned_too_great)
           << CI.getRange() << Sema::MaximumAlignment;
   }
 
-  if (OE && !OE->isValueDependent() && !OE->isIntegerConstantExpr(Context)) {
-    Diag(AttrLoc, diag::err_attribute_argument_n_type)
-        << &TmpAttr << 2 << AANT_ArgumentIntegerConstant
-        << OE->getSourceRange();
-    return;
+  if (OE) {
+    if (!OE->isValueDependent()) {
+      llvm::APSInt I(64);
+      if (!OE->isIntegerConstantExpr(I, Context)) {
+        Diag(AttrLoc, diag::err_attribute_argument_n_type)
+          << &TmpAttr << 2 << AANT_ArgumentIntegerConstant
+          << OE->getSourceRange();
+        return;
+      }
+    }
   }
 
   D->addAttr(::new (Context) AssumeAlignedAttr(Context, CI, E, OE));
@@ -2724,36 +2729,36 @@ static void handleSentinelAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   unsigned sentinel = (unsigned)SentinelAttr::DefaultSentinel;
   if (AL.getNumArgs() > 0) {
     Expr *E = AL.getArgAsExpr(0);
-    Optional<llvm::APSInt> Idx = llvm::APSInt(32);
+    llvm::APSInt Idx(32);
     if (E->isTypeDependent() || E->isValueDependent() ||
-        !(Idx = E->getIntegerConstantExpr(S.Context))) {
+        !E->isIntegerConstantExpr(Idx, S.Context)) {
       S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
           << AL << 1 << AANT_ArgumentIntegerConstant << E->getSourceRange();
       return;
     }
 
-    if (Idx->isSigned() && Idx->isNegative()) {
+    if (Idx.isSigned() && Idx.isNegative()) {
       S.Diag(AL.getLoc(), diag::err_attribute_sentinel_less_than_zero)
         << E->getSourceRange();
       return;
     }
 
-    sentinel = Idx->getZExtValue();
+    sentinel = Idx.getZExtValue();
   }
 
   unsigned nullPos = (unsigned)SentinelAttr::DefaultNullPos;
   if (AL.getNumArgs() > 1) {
     Expr *E = AL.getArgAsExpr(1);
-    Optional<llvm::APSInt> Idx = llvm::APSInt(32);
+    llvm::APSInt Idx(32);
     if (E->isTypeDependent() || E->isValueDependent() ||
-        !(Idx = E->getIntegerConstantExpr(S.Context))) {
+        !E->isIntegerConstantExpr(Idx, S.Context)) {
       S.Diag(AL.getLoc(), diag::err_attribute_argument_n_type)
           << AL << 2 << AANT_ArgumentIntegerConstant << E->getSourceRange();
       return;
     }
-    nullPos = Idx->getZExtValue();
+    nullPos = Idx.getZExtValue();
 
-    if ((Idx->isSigned() && Idx->isNegative()) || nullPos > 1) {
+    if ((Idx.isSigned() && Idx.isNegative()) || nullPos > 1) {
       // FIXME: This error message could be improved, it would be nice
       // to say what the bounds actually are.
       S.Diag(AL.getLoc(), diag::err_attribute_sentinel_not_zero_or_one)
@@ -4828,19 +4833,19 @@ static Expr *makeLaunchBoundsArgExpr(Sema &S, Expr *E,
   if (E->isValueDependent())
     return E;
 
-  Optional<llvm::APSInt> I = llvm::APSInt(64);
-  if (!(I = E->getIntegerConstantExpr(S.Context))) {
+  llvm::APSInt I(64);
+  if (!E->isIntegerConstantExpr(I, S.Context)) {
     S.Diag(E->getExprLoc(), diag::err_attribute_argument_n_type)
         << &AL << Idx << AANT_ArgumentIntegerConstant << E->getSourceRange();
     return nullptr;
   }
   // Make sure we can fit it in 32 bits.
-  if (!I->isIntN(32)) {
-    S.Diag(E->getExprLoc(), diag::err_ice_too_large)
-        << I->toString(10, false) << 32 << /* Unsigned */ 1;
+  if (!I.isIntN(32)) {
+    S.Diag(E->getExprLoc(), diag::err_ice_too_large) << I.toString(10, false)
+                                                     << 32 << /* Unsigned */ 1;
     return nullptr;
   }
-  if (*I < 0)
+  if (I < 0)
     S.Diag(E->getExprLoc(), diag::warn_attribute_argument_n_negative)
         << &AL << Idx << E->getSourceRange();
 
@@ -5681,18 +5686,18 @@ static void handleMSP430InterruptAttr(Sema &S, Decl *D, const ParsedAttr &AL) {
   }
 
   Expr *NumParamsExpr = static_cast<Expr *>(AL.getArgAsExpr(0));
-  Optional<llvm::APSInt> NumParams = llvm::APSInt(32);
-  if (!(NumParams = NumParamsExpr->getIntegerConstantExpr(S.Context))) {
+  llvm::APSInt NumParams(32);
+  if (!NumParamsExpr->isIntegerConstantExpr(NumParams, S.Context)) {
     S.Diag(AL.getLoc(), diag::err_attribute_argument_type)
         << AL << AANT_ArgumentIntegerConstant
         << NumParamsExpr->getSourceRange();
     return;
   }
   // The argument should be in range 0..63.
-  unsigned Num = NumParams->getLimitedValue(255);
+  unsigned Num = NumParams.getLimitedValue(255);
   if (Num > 63) {
     S.Diag(AL.getLoc(), diag::err_attribute_argument_out_of_bounds)
-        << AL << (int)NumParams->getSExtValue()
+        << AL << (int)NumParams.getSExtValue()
         << NumParamsExpr->getSourceRange();
     return;
   }
diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp
index e3aa817c62249..d885920b6c14e 100644
--- a/clang/lib/Sema/SemaExprCXX.cpp
+++ b/clang/lib/Sema/SemaExprCXX.cpp
@@ -2073,29 +2073,29 @@ Sema::BuildCXXNew(SourceRange Range, bool UseGlobal,
     // per CWG1464. Otherwise, if it's not a constant, we must have an
     // unparenthesized array type.
     if (!(*ArraySize)->isValueDependent()) {
+      llvm::APSInt Value;
       // We've already performed any required implicit conversion to integer or
       // unscoped enumeration type.
       // FIXME: Per CWG1464, we are required to check the value prior to
       // converting to size_t. This will never find a negative array size in
       // C++14 onwards, because Value is always unsigned here!
-      if (Optional<llvm::APSInt> Value =
-              (*ArraySize)->getIntegerConstantExpr(Context)) {
-        if (Value->isSigned() && Value->isNegative()) {
+      if ((*ArraySize)->isIntegerConstantExpr(Value, Context)) {
+        if (Value.isSigned() && Value.isNegative()) {
           return ExprError(Diag((*ArraySize)->getBeginLoc(),
                                 diag::err_typecheck_negative_array_size)
                            << (*ArraySize)->getSourceRange());
         }
 
         if (!AllocType->isDependentType()) {
-          unsigned ActiveSizeBits = ConstantArrayType::getNumAddressingBits(
-              Context, AllocType, *Value);
+          unsigned ActiveSizeBits =
+            ConstantArrayType::getNumAddressingBits(Context, AllocType, Value);
           if (ActiveSizeBits > ConstantArrayType::getMaxSizeBits(Context))
             return ExprError(
                 Diag((*ArraySize)->getBeginLoc(), diag::err_array_too_large)
-                << Value->toString(10) << (*ArraySize)->getSourceRange());
+                << Value.toString(10) << (*ArraySize)->getSourceRange());
         }
 
-        KnownArraySize = Value->getZExtValue();
+        KnownArraySize = Value.getZExtValue();
       } else if (TypeIdParens.isValid()) {
         // Can't have dynamic array size when the type-id is in parentheses.
         Diag((*ArraySize)->getBeginLoc(), diag::ext_new_paren_array_nonconst)
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index d1ddf10724172..b27abb54c170f 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -5989,7 +5989,8 @@ Sema::checkOpenMPDeclareVariantFunction(Sema::DeclGroupPtrTy DG,
   // Deal with non-constant score and user condition expressions.
   auto HandleNonConstantScoresAndConditions = [this](Expr *&E,
                                                      bool IsScore) -> bool {
-    if (!E || E->isIntegerConstantExpr(Context))
+    llvm::APSInt Result;
+    if (!E || E->isIntegerConstantExpr(Result, Context))
       return false;
 
     if (IsScore) {
@@ -6475,14 +6476,14 @@ bool OpenMPIterationSpaceChecker::setStep(Expr *NewStep, bool Subtract) {
     //  loop. If test-expr is of form b relational-op var and relational-op is
     //  > or >= then incr-expr must cause var to increase on each iteration of
     //  the loop.
-    Optional<llvm::APSInt> Result =
-        NewStep->getIntegerConstantExpr(SemaRef.Context);
+    llvm::APSInt Result;
+    bool IsConstant = NewStep->isIntegerConstantExpr(Result, SemaRef.Context);
     bool IsUnsigned = !NewStep->getType()->hasSignedIntegerRepresentation();
     bool IsConstNeg =
-        Result && Result->isSigned() && (Subtract != Result->isNegative());
+        IsConstant && Result.isSigned() && (Subtract != Result.isNegative());
     bool IsConstPos =
-        Result && Result->isSigned() && (Subtract == Result->isNegative());
-    bool IsConstZero = Result && !Result->getBoolValue();
+        IsConstant && Result.isSigned() && (Subtract == Result.isNegative());
+    bool IsConstZero = IsConstant && !Result.getBoolValue();
 
     // != with increment is treated as <; != with decrement is treated as >
     if (!TestIsLessOp.hasValue())
@@ -7913,9 +7914,9 @@ static ExprResult widenIterationCount(unsigned Bits, Expr *E, Sema &SemaRef) {
 static bool fitsInto(unsigned Bits, bool Signed, const Expr *E, Sema &SemaRef) {
   if (E == nullptr)
     return false;
-  if (Optional<llvm::APSInt> Result =
-          E->getIntegerConstantExpr(SemaRef.Context))
-    return Signed ? Result->isSignedIntN(Bits) : Result->isIntN(Bits);
+  llvm::APSInt Result;
+  if (E->isIntegerConstantExpr(Result, SemaRef.Context))
+    return Signed ? Result.isSignedIntN(Bits) : Result.isIntN(Bits);
   return false;
 }
 
@@ -8188,7 +8189,9 @@ checkOpenMPLoop(OpenMPDirectiveKind DKind, Expr *CollapseLoopCountExpr,
 
   // Calculate the last iteration number beforehand instead of doing this on
   // each iteration. Do not do this if the number of iterations may be kfold-ed.
-  bool IsConstant = LastIteration.get()->isIntegerConstantExpr(SemaRef.Context);
+  llvm::APSInt Result;
+  bool IsConstant =
+      LastIteration.get()->isIntegerConstantExpr(Result, SemaRef.Context);
   ExprResult CalcLastIteration;
   if (!IsConstant) {
     ExprResult SaveRef =
@@ -12579,16 +12582,15 @@ isNonNegativeIntegerValue(Expr *&ValExpr, Sema &SemaRef, OpenMPClauseKind CKind,
 
     ValExpr = Value.get();
     // The expression must evaluate to a non-negative integer value.
-    if (Optional<llvm::APSInt> Result =
-            ValExpr->getIntegerConstantExpr(SemaRef.Context)) {
-      if (Result->isSigned() &&
-          !((!StrictlyPositive && Result->isNonNegative()) ||
-            (StrictlyPositive && Result->isStrictlyPositive()))) {
-        SemaRef.Diag(Loc, diag::err_omp_negative_expression_in_clause)
-            << getOpenMPClauseName(CKind) << (StrictlyPositive ? 1 : 0)
-            << ValExpr->getSourceRange();
-        return false;
-      }
+    llvm::APSInt Result;
+    if (ValExpr->isIntegerConstantExpr(Result, SemaRef.Context) &&
+        Result.isSigned() &&
+        !((!StrictlyPositive && Result.isNonNegative()) ||
+          (StrictlyPositive && Result.isStrictlyPositive()))) {
+      SemaRef.Diag(Loc, diag::err_omp_negative_expression_in_clause)
+          << getOpenMPClauseName(CKind) << (StrictlyPositive ? 1 : 0)
+          << ValExpr->getSourceRange();
+      return false;
     }
     if (!BuildCapture)
       return true;
@@ -13213,9 +13215,9 @@ OMPClause *Sema::ActOnOpenMPScheduleClause(
       // OpenMP [2.7.1, Restrictions]
       //  chunk_size must be a loop invariant integer expression with a positive
       //  value.
-      if (Optional<llvm::APSInt> Result =
-              ValExpr->getIntegerConstantExpr(Context)) {
-        if (Result->isSigned() && !Result->isStrictlyPositive()) {
+      llvm::APSInt Result;
+      if (ValExpr->isIntegerConstantExpr(Result, Context)) {
+        if (Result.isSigned() && !Result.isStrictlyPositive()) {
           Diag(ChunkSizeLoc, diag::err_omp_negative_expression_in_clause)
               << "schedule" << 1 << ChunkSize->getSourceRange();
           return nullptr;
@@ -15686,12 +15688,12 @@ OMPClause *Sema::ActOnOpenMPLinearClause(
 
     // Warn about zero linear step (it would be probably better specified as
     // making corresponding variables 'const').
-    if (Optional<llvm::APSInt> Result =
-            StepExpr->getIntegerConstantExpr(Context)) {
-      if (!Result->isNegative() && !Result->isStrictlyPositive())
-        Diag(StepLoc, diag::warn_omp_linear_step_zero)
-            << Vars[0] << (Vars.size() > 1);
-    } else if (CalcStep.isUsable()) {
+    llvm::APSInt Result;
+    bool IsConstant = StepExpr->isIntegerConstantExpr(Result, Context);
+    if (IsConstant && !Result.isNegative() && !Result.isStrictlyPositive())
+      Diag(StepLoc, diag::warn_omp_linear_step_zero) << Vars[0]
+                                                     << (Vars.size() > 1);
+    if (!IsConstant && CalcStep.isUsable()) {
       // Calculate the step beforehand instead of doing this on each iteration.
       // (This is not used if the number of iterations may be kfold-ed).
       CalcStepExpr = CalcStep.get();
@@ -18223,9 +18225,9 @@ OMPClause *Sema::ActOnOpenMPDistScheduleClause(
       // OpenMP [2.7.1, Restrictions]
       //  chunk_size must be a loop invariant integer expression with a positive
       //  value.
-      if (Optional<llvm::APSInt> Result =
-              ValExpr->getIntegerConstantExpr(Context)) {
-        if (Result->isSigned() && !Result->isStrictlyPositive()) {
+      llvm::APSInt Result;
+      if (ValExpr->isIntegerConstantExpr(Result, Context)) {
+        if (Result.isSigned() && !Result.isStrictlyPositive()) {
           Diag(ChunkSizeLoc, diag::err_omp_negative_expression_in_clause)
               << "dist_schedule" << ChunkSize->getSourceRange();
           return nullptr;
diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp
index 7c6acf011d574..8635397f48067 100644
--- a/clang/lib/Sema/SemaOverload.cpp
+++ b/clang/lib/Sema/SemaOverload.cpp
@@ -346,6 +346,7 @@ NarrowingKind StandardConversionSequence::getNarrowingKind(
                ToType->isRealFloatingType()) {
       if (IgnoreFloatToIntegralConversion)
         return NK_Not_Narrowing;
+      llvm::APSInt IntConstantValue;
       const Expr *Initializer = IgnoreNarrowingConversion(Ctx, Converted);
       assert(Initializer && "Unknown conversion expression");
 
@@ -353,20 +354,19 @@ NarrowingKind StandardConversionSequence::getNarrowingKind(
       if (Initializer->isValueDependent())
         return NK_Dependent_Narrowing;
 
-      if (Optional<llvm::APSInt> IntConstantValue =
-              Initializer->getIntegerConstantExpr(Ctx)) {
+      if (Initializer->isIntegerConstantExpr(IntConstantValue, Ctx)) {
         // Convert the integer to the floating type.
         llvm::APFloat Result(Ctx.getFloatTypeSemantics(ToType));
-        Result.convertFromAPInt(*IntConstantValue, IntConstantValue->isSigned(),
+        Result.convertFromAPInt(IntConstantValue, IntConstantValue.isSigned(),
                                 llvm::APFloat::rmNearestTiesToEven);
         // And back.
-        llvm::APSInt ConvertedValue = *IntConstantValue;
+        llvm::APSInt ConvertedValue = IntConstantValue;
         bool ignored;
         Result.convertToInteger(ConvertedValue,
                                 llvm::APFloat::rmTowardZero, &ignored);
         // If the resulting value is different, this was a narrowing conversion.
-        if (*IntConstantValue != ConvertedValue) {
-          ConstantValue = APValue(*IntConstantValue);
+        if (IntConstantValue != ConvertedValue) {
+          ConstantValue = APValue(IntConstantValue);
           ConstantType = Initializer->getType();
           return NK_Constant_Narrowing;
         }
@@ -430,18 +430,17 @@ NarrowingKind StandardConversionSequence::getNarrowingKind(
         (FromWidth == ToWidth && FromSigned != ToSigned) ||
         (FromSigned && !ToSigned)) {
       // Not all values of FromType can be represented in ToType.
+      llvm::APSInt InitializerValue;
       const Expr *Initializer = IgnoreNarrowingConversion(Ctx, Converted);
 
       // If it's value-dependent, we can't tell whether it's narrowing.
       if (Initializer->isValueDependent())
         return NK_Dependent_Narrowing;
 
-      Optional<llvm::APSInt> OptInitializerValue;
-      if (!(OptInitializerValue = Initializer->getIntegerConstantExpr(Ctx))) {
+      if (!Initializer->isIntegerConstantExpr(InitializerValue, Ctx)) {
         // Such conversions on variables are always narrowing.
         return NK_Variable_Narrowing;
       }
-      llvm::APSInt &InitializerValue = *OptInitializerValue;
       bool Narrowing = false;
       if (FromWidth < ToWidth) {
         // Negative -> unsigned is narrowing. Otherwise, more bits is never
@@ -2184,22 +2183,21 @@ bool Sema::IsIntegralPromotion(Expr *From, QualType FromType, QualType ToType) {
   // compatibility.
   if (From) {
     if (FieldDecl *MemberDecl = From->getSourceBitField()) {
-      Optional<llvm::APSInt> BitWidth;
+      llvm::APSInt BitWidth;
       if (FromType->isIntegralType(Context) &&
-          (BitWidth =
-               MemberDecl->getBitWidth()->getIntegerConstantExpr(Context))) {
-        llvm::APSInt ToSize(BitWidth->getBitWidth(), BitWidth->isUnsigned());
+          MemberDecl->getBitWidth()->isIntegerConstantExpr(BitWidth, Context)) {
+        llvm::APSInt ToSize(BitWidth.getBitWidth(), BitWidth.isUnsigned());
         ToSize = Context.getTypeSize(ToType);
 
         // Are we promoting to an int from a bitfield that fits in an int?
-        if (*BitWidth < ToSize ||
-            (FromType->isSignedIntegerType() && *BitWidth <= ToSize)) {
+        if (BitWidth < ToSize ||
+            (FromType->isSignedIntegerType() && BitWidth <= ToSize)) {
           return To->getKind() == BuiltinType::Int;
         }
 
         // Are we promoting to an unsigned int from an unsigned bitfield
         // that fits into an unsigned int?
-        if (FromType->isUnsignedIntegerType() && *BitWidth <= ToSize) {
+        if (FromType->isUnsignedIntegerType() && BitWidth <= ToSize) {
           return To->getKind() == BuiltinType::UInt;
         }
 
diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp
index c7b97ec4d975a..e9d3c755eb232 100644
--- a/clang/lib/Sema/SemaStmtAttr.cpp
+++ b/clang/lib/Sema/SemaStmtAttr.cpp
@@ -335,15 +335,15 @@ static Attr *handleOpenCLUnrollHint(Sema &S, Stmt *St, const ParsedAttr &A,
 
   if (NumArgs == 1) {
     Expr *E = A.getArgAsExpr(0);
-    Optional<llvm::APSInt> ArgVal;
+    llvm::APSInt ArgVal(32);
 
-    if (!(ArgVal = E->getIntegerConstantExpr(S.Context))) {
+    if (!E->isIntegerConstantExpr(ArgVal, S.Context)) {
       S.Diag(A.getLoc(), diag::err_attribute_argument_type)
           << A << AANT_ArgumentIntegerConstant << E->getSourceRange();
       return nullptr;
     }
 
-    int Val = ArgVal->getSExtValue();
+    int Val = ArgVal.getSExtValue();
 
     if (Val <= 0) {
       S.Diag(A.getRange().getBegin(),
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 13426cbf2db40..b8f7f1a581590 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -2476,8 +2476,8 @@ QualType Sema::BuildVectorType(QualType CurType, Expr *SizeExpr,
     return Context.getDependentVectorType(CurType, SizeExpr, AttrLoc,
                                                VectorType::GenericVector);
 
-  Optional<llvm::APSInt> VecSize = SizeExpr->getIntegerConstantExpr(Context);
-  if (!VecSize) {
+  llvm::APSInt VecSize(32);
+  if (!SizeExpr->isIntegerConstantExpr(VecSize, Context)) {
     Diag(AttrLoc, diag::err_attribute_argument_type)
         << "vector_size" << AANT_ArgumentIntegerConstant
         << SizeExpr->getSourceRange();
@@ -2489,13 +2489,13 @@ QualType Sema::BuildVectorType(QualType CurType, Expr *SizeExpr,
                                                VectorType::GenericVector);
 
   // vecSize is specified in bytes - convert to bits.
-  if (!VecSize->isIntN(61)) {
+  if (!VecSize.isIntN(61)) {
     // Bit size will overflow uint64.
     Diag(AttrLoc, diag::err_attribute_size_too_large)
         << SizeExpr->getSourceRange() << "vector";
     return QualType();
   }
-  uint64_t VectorSizeBits = VecSize->getZExtValue() * 8;
+  uint64_t VectorSizeBits = VecSize.getZExtValue() * 8;
   unsigned TypeSize = static_cast<unsigned>(Context.getTypeSize(CurType));
 
   if (VectorSizeBits == 0) {
@@ -2540,8 +2540,8 @@ QualType Sema::BuildExtVectorType(QualType T, Expr *ArraySize,
   }
 
   if (!ArraySize->isTypeDependent() && !ArraySize->isValueDependent()) {
-    Optional<llvm::APSInt> vecSize = ArraySize->getIntegerConstantExpr(Context);
-    if (!vecSize) {
+    llvm::APSInt vecSize(32);
+    if (!ArraySize->isIntegerConstantExpr(vecSize, Context)) {
       Diag(AttrLoc, diag::err_attribute_argument_type)
         << "ext_vector_type" << AANT_ArgumentIntegerConstant
         << ArraySize->getSourceRange();
@@ -2555,7 +2555,7 @@ QualType Sema::BuildExtVectorType(QualType T, Expr *ArraySize,
     }
     // Unlike gcc's vector_size attribute, the size is specified as the
     // number of elements, not the number of bytes.
-    unsigned vectorSize = static_cast<unsigned>(vecSize->getZExtValue());
+    unsigned vectorSize = static_cast<unsigned>(vecSize.getZExtValue());
 
     if (vectorSize == 0) {
       Diag(AttrLoc, diag::err_attribute_zero_size)
@@ -6254,15 +6254,13 @@ static bool BuildAddressSpaceIndex(Sema &S, LangAS &ASIdx,
                                    const Expr *AddrSpace,
                                    SourceLocation AttrLoc) {
   if (!AddrSpace->isValueDependent()) {
-    Optional<llvm::APSInt> OptAddrSpace =
-        AddrSpace->getIntegerConstantExpr(S.Context);
-    if (!OptAddrSpace) {
+    llvm::APSInt addrSpace(32);
+    if (!AddrSpace->isIntegerConstantExpr(addrSpace, S.Context)) {
       S.Diag(AttrLoc, diag::err_attribute_argument_type)
           << "'address_space'" << AANT_ArgumentIntegerConstant
           << AddrSpace->getSourceRange();
       return false;
     }
-    llvm::APSInt &addrSpace = *OptAddrSpace;
 
     // Bounds checking.
     if (addrSpace.isSigned()) {
@@ -7714,9 +7712,9 @@ static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
   }
   // The number of elements must be an ICE.
   Expr *numEltsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
-  Optional<llvm::APSInt> numEltsInt;
+  llvm::APSInt numEltsInt(32);
   if (numEltsExpr->isTypeDependent() || numEltsExpr->isValueDependent() ||
-      !(numEltsInt = numEltsExpr->getIntegerConstantExpr(S.Context))) {
+      !numEltsExpr->isIntegerConstantExpr(numEltsInt, S.Context)) {
     S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
         << Attr << AANT_ArgumentIntegerConstant
         << numEltsExpr->getSourceRange();
@@ -7732,7 +7730,7 @@ static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
 
   // The total size of the vector must be 64 or 128 bits.
   unsigned typeSize = static_cast<unsigned>(S.Context.getTypeSize(CurType));
-  unsigned numElts = static_cast<unsigned>(numEltsInt->getZExtValue());
+  unsigned numElts = static_cast<unsigned>(numEltsInt.getZExtValue());
   unsigned vecSize = typeSize * numElts;
   if (vecSize != 64 && vecSize != 128) {
     S.Diag(Attr.getLoc(), diag::err_attribute_bad_neon_vector_size) << CurType;

From 78443666bc18a6957d279a0f58319c8a3e57771a Mon Sep 17 00:00:00 2001
From: Atmn Patel <a335pate@uwaterloo.ca>
Date: Sun, 12 Jul 2020 22:19:40 -0500
Subject: [PATCH 064/771] [OpenMP] Add firstprivate as a default data-sharing
 attribute to clang

This implements the default(firstprivate) clause as defined in OpenMP
Technical Report 8 (2.22.4).

Reviewed By: jdoerfert, ABataev

Differential Revision: https://reviews.llvm.org/D75591
---
 .../checks/openmp-use-default-none.rst        |    9 +
 .../checkers/openmp-use-default-none.cpp      |   52 +-
 clang/docs/LibASTMatchersReference.html       |   19 +-
 clang/include/clang/ASTMatchers/ASTMatchers.h |   24 +-
 .../clang/Basic/DiagnosticParseKinds.td       |    2 +
 clang/lib/ASTMatchers/Dynamic/Registry.cpp    |    1 +
 clang/lib/Parse/ParseOpenMP.cpp               |   12 +-
 clang/lib/Sema/SemaOpenMP.cpp                 |   76 +-
 ...stribute_parallel_for_default_messages.cpp |   31 +-
 ...ute_parallel_for_simd_default_messages.cpp |   30 +-
 clang/test/OpenMP/driver.c                    |    1 +
 .../test/OpenMP/parallel_default_messages.cpp |   26 +-
 .../OpenMP/parallel_for_default_messages.cpp  |   23 +-
 .../parallel_for_simd_default_messages.cpp    |   23 +-
 clang/test/OpenMP/parallel_master_codegen.cpp |  156 ++
 .../parallel_master_default_messages.cpp      |   24 +-
 .../parallel_sections_default_messages.cpp    |    6 +-
 .../target_parallel_default_messages.cpp      |   24 +-
 .../target_parallel_for_default_messages.cpp  |   23 +-
 ...get_parallel_for_simd_default_messages.cpp |   23 +-
 .../OpenMP/target_teams_default_messages.cpp  |   24 +-
 ...rget_teams_distribute_default_messages.cpp |   23 +-
 ...stribute_parallel_for_default_messages.cpp |   23 +-
 ...ute_parallel_for_simd_default_messages.cpp |   23 +-
 clang/test/OpenMP/task_default_messages.cpp   |   23 +-
 clang/test/OpenMP/task_messages.cpp           |   16 +
 clang/test/OpenMP/teams_default_messages.cpp  |   24 +-
 .../teams_distribute_default_messages.cpp     |   24 +-
 ...stribute_parallel_for_default_messages.cpp |   24 +-
 ...ute_parallel_for_simd_default_messages.cpp |   23 +-
 ...teams_distribute_simd_default_messages.cpp |   25 +-
 .../ASTMatchers/ASTMatchersNarrowingTest.cpp  | 1749 ++++++++---------
 .../ASTMatchers/ASTMatchersNodeTest.cpp       |  825 ++++----
 clang/unittests/ASTMatchers/ASTMatchersTest.h |   77 +-
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |    1 +
 35 files changed, 2030 insertions(+), 1459 deletions(-)

diff --git a/clang-tools-extra/docs/clang-tidy/checks/openmp-use-default-none.rst b/clang-tools-extra/docs/clang-tidy/checks/openmp-use-default-none.rst
index 4223a10bd6e9b..77114100ba1cb 100644
--- a/clang-tools-extra/docs/clang-tidy/checks/openmp-use-default-none.rst
+++ b/clang-tools-extra/docs/clang-tidy/checks/openmp-use-default-none.rst
@@ -51,3 +51,12 @@ Example
     // WARNING: OpenMP directive ``parallel`` specifies ``default(shared)``
     //          clause. Consider using ``default(none)`` clause instead.
   }
+
+  // ``parallel`` directive can have ``default`` clause, and said clause is
+  // specified, but with ``firstprivate`` kind, which is not ``none``, diagnose.
+  void p0_3() {
+  #pragma omp parallel default(firstprivate)
+    ;
+    // WARNING: OpenMP directive ``parallel`` specifies ``default(firstprivate)``
+    //          clause. Consider using ``default(none)`` clause instead.
+  }
diff --git a/clang-tools-extra/test/clang-tidy/checkers/openmp-use-default-none.cpp b/clang-tools-extra/test/clang-tidy/checkers/openmp-use-default-none.cpp
index 35d2d17b1e0e8..d1d3b0e441f3f 100644
--- a/clang-tools-extra/test/clang-tidy/checkers/openmp-use-default-none.cpp
+++ b/clang-tools-extra/test/clang-tidy/checkers/openmp-use-default-none.cpp
@@ -1,5 +1,5 @@
-// RUN: %check_clang_tidy %s openmp-use-default-none %t -- -- -fopenmp=libomp -fopenmp-version=40
-// RUN: %check_clang_tidy -std=c11 %s openmp-use-default-none %t -- -- -x c -fopenmp=libomp -fopenmp-version=40
+// RUN: %check_clang_tidy %s openmp-use-default-none %t -- -- -fopenmp=libomp -fopenmp-version=51
+// RUN: %check_clang_tidy -std=c11 %s openmp-use-default-none %t -- -- -x c -fopenmp=libomp -fopenmp-version=51
 
 //----------------------------------------------------------------------------//
 // Null cases.
@@ -42,6 +42,15 @@ void p0_2() {
   // CHECK-NOTES: :[[@LINE-3]]:22: note: existing 'default' clause specified here
 }
 
+// 'parallel' directive can have 'default' clause, and said clause specified,
+// but with 'firstprivate' kind, which is not 'none', diagnose.
+void p0_3() {
+#pragma omp parallel default(firstprivate)
+  ;
+  // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'parallel' specifies 'default(firstprivate)' clause, consider using 'default(none)' clause instead
+  // CHECK-NOTES: :[[@LINE-3]]:22: note: existing 'default' clause specified here
+}
+
 // 'task' directive.
 
 // 'task' directive can have 'default' clause, but said clause is not
@@ -68,6 +77,15 @@ void p1_2() {
   // CHECK-NOTES: :[[@LINE-3]]:18: note: existing 'default' clause specified here
 }
 
+// 'task' directive can have 'default' clause, and said clause specified,
+// but with 'firstprivate' kind, which is not 'none', diagnose.
+void p1_3() {
+#pragma omp task default(firstprivate)
+  ;
+  // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'task' specifies 'default(firstprivate)' clause, consider using 'default(none)' clause instead
+  // CHECK-NOTES: :[[@LINE-3]]:18: note: existing 'default' clause specified here
+}
+
 // 'teams' directive. (has to be inside of 'target' directive)
 
 // 'teams' directive can have 'default' clause, but said clause is not
@@ -97,6 +115,16 @@ void p2_2() {
   // CHECK-NOTES: :[[@LINE-3]]:19: note: existing 'default' clause specified here
 }
 
+// 'teams' directive can have 'default' clause, and said clause specified,
+// but with 'firstprivate' kind, which is not 'none', diagnose.
+void p2_3() {
+#pragma omp target
+#pragma omp teams default(firstprivate)
+  ;
+  // CHECK-NOTES: :[[@LINE-2]]:1: warning: OpenMP directive 'teams' specifies 'default(firstprivate)' clause, consider using 'default(none)' clause instead
+  // CHECK-NOTES: :[[@LINE-3]]:19: note: existing 'default' clause specified here
+}
+
 // 'taskloop' directive.
 
 // 'taskloop' directive can have 'default' clause, but said clause is not
@@ -126,6 +154,16 @@ void p3_2(const int a) {
   // CHECK-NOTES: :[[@LINE-4]]:22: note: existing 'default' clause specified here
 }
 
+// 'taskloop' directive can have 'default' clause, and said clause specified,
+// but with 'firstprivate' kind, which is not 'none', diagnose.
+void p3_3(const int a) {
+#pragma omp taskloop default(firstprivate)
+  for (int b = 0; b < a; b++)
+    ;
+  // CHECK-NOTES: :[[@LINE-3]]:1: warning: OpenMP directive 'taskloop' specifies 'default(firstprivate)' clause, consider using 'default(none)' clause instead
+  // CHECK-NOTES: :[[@LINE-4]]:22: note: existing 'default' clause specified here
+}
+
 //----------------------------------------------------------------------------//
 // Combined directives.
 // Let's not test every single possible permutation/combination of directives,
@@ -158,3 +196,13 @@ void p4_2(const int a) {
   // CHECK-NOTES: :[[@LINE-3]]:1: warning: OpenMP directive 'parallel for' specifies 'default(shared)' clause, consider using 'default(none)' clause instead
   // CHECK-NOTES: :[[@LINE-4]]:26: note: existing 'default' clause specified here
 }
+
+// 'parallel' directive can have 'default' clause, and said clause specified,
+// but with 'firstprivate' kind, which is not 'none', diagnose.
+void p4_3(const int a) {
+#pragma omp parallel for default(firstprivate)
+  for (int b = 0; b < a; b++)
+    ;
+  // CHECK-NOTES: :[[@LINE-3]]:1: warning: OpenMP directive 'parallel for' specifies 'default(firstprivate)' clause, consider using 'default(none)' clause instead
+  // CHECK-NOTES: :[[@LINE-4]]:26: note: existing 'default' clause specified here
+}
diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html
index 2256cbf718698..60ff6ffe60567 100644
--- a/clang/docs/LibASTMatchersReference.html
+++ b/clang/docs/LibASTMatchersReference.html
@@ -676,9 +676,10 @@ <h2 id="decl-matchers">Node Matchers</h2>
 
   #pragma omp parallel default(none)
   #pragma omp parallel default(shared)
+  #pragma omp parallel default(firstprivate)
   #pragma omp parallel
 
-``ompDefaultClause()`` matches ``default(none)`` and ``default(shared)``.
+``ompDefaultClause()`` matches ``default(none)``, ``default(shared)``, and ``default(firstprivate)``.
 </pre></td></tr>
 
 
@@ -3783,6 +3784,7 @@ <h2 id="narrowing-matchers">Narrowing Matchers</h2>
   #pragma omp parallel
   #pragma omp parallel default(none)
   #pragma omp parallel default(shared)
+  #pragma omp parallel default(firstprivate)
 
 ``ompDefaultClause(isNoneKind())`` matches only ``default(none)``.
 </pre></td></tr>
@@ -3796,11 +3798,26 @@ <h2 id="narrowing-matchers">Narrowing Matchers</h2>
   #pragma omp parallel
   #pragma omp parallel default(none)
   #pragma omp parallel default(shared)
+  #pragma omp parallel default(firstprivate)
 
 ``ompDefaultClause(isSharedKind())`` matches only ``default(shared)``.
 </pre></td></tr>
 
 
+<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1OMPDefaultClause.html">OMPDefaultClause</a>&gt;</td><td class="name" onclick="toggle('isFirstPrivateKind0')"><a name="isFirstPrivateKind0Anchor">isSharedKind</a></td><td></td></tr>
+<tr><td colspan="4" class="doc" id="isFirstPrivateKind0"><pre>Matches if the OpenMP ``default`` clause has ``firstprivate`` kind specified.
+
+Given
+
+  #pragma omp parallel
+  #pragma omp parallel default(none)
+  #pragma omp parallel default(shared)
+  #pragma omp parallel default(firstprivate)
+
+``ompDefaultClause(isFirstPrivateKind())`` matches only ``default(firstprivate)``.
+</pre></td></tr>
+
+
 <tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1OMPExecutableDirective.html">OMPExecutableDirective</a>&gt;</td><td class="name" onclick="toggle('isAllowedToContainClauseKind0')"><a name="isAllowedToContainClauseKind0Anchor">isAllowedToContainClauseKind</a></td><td>OpenMPClauseKind CKind</td></tr>
 <tr><td colspan="4" class="doc" id="isAllowedToContainClauseKind0"><pre>Matches if the OpenMP directive is allowed to contain the specified OpenMP
 clause kind.
diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h
index f16fb876cdd38..643419743a119 100644
--- a/clang/include/clang/ASTMatchers/ASTMatchers.h
+++ b/clang/include/clang/ASTMatchers/ASTMatchers.h
@@ -7190,10 +7190,12 @@ AST_MATCHER_P(OMPExecutableDirective, hasAnyClause,
 /// \code
 ///   #pragma omp parallel default(none)
 ///   #pragma omp parallel default(shared)
+///   #pragma omp parallel default(firstprivate)
 ///   #pragma omp parallel
 /// \endcode
 ///
-/// ``ompDefaultClause()`` matches ``default(none)`` and ``default(shared)``.
+/// ``ompDefaultClause()`` matches ``default(none)``, ``default(shared)``, and
+/// ``default(firstprivate)``
 extern const internal::VariadicDynCastAllOfMatcher<OMPClause, OMPDefaultClause>
     ompDefaultClause;
 
@@ -7205,6 +7207,7 @@ extern const internal::VariadicDynCastAllOfMatcher<OMPClause, OMPDefaultClause>
 ///   #pragma omp parallel
 ///   #pragma omp parallel default(none)
 ///   #pragma omp parallel default(shared)
+///   #pragma omp parallel default(firstprivate)
 /// \endcode
 ///
 /// ``ompDefaultClause(isNoneKind())`` matches only ``default(none)``.
@@ -7220,6 +7223,7 @@ AST_MATCHER(OMPDefaultClause, isNoneKind) {
 ///   #pragma omp parallel
 ///   #pragma omp parallel default(none)
 ///   #pragma omp parallel default(shared)
+///   #pragma omp parallel default(firstprivate)
 /// \endcode
 ///
 /// ``ompDefaultClause(isSharedKind())`` matches only ``default(shared)``.
@@ -7227,6 +7231,24 @@ AST_MATCHER(OMPDefaultClause, isSharedKind) {
   return Node.getDefaultKind() == llvm::omp::OMP_DEFAULT_shared;
 }
 
+/// Matches if the OpenMP ``default`` clause has ``firstprivate`` kind
+/// specified.
+///
+/// Given
+///
+/// \code
+///   #pragma omp parallel
+///   #pragma omp parallel default(none)
+///   #pragma omp parallel default(shared)
+///   #pragma omp parallel default(firstprivate)
+/// \endcode
+///
+/// ``ompDefaultClause(isFirstPrivateKind())`` matches only
+/// ``default(firstprivate)``.
+AST_MATCHER(OMPDefaultClause, isFirstPrivateKind) {
+  return Node.getDefaultKind() == llvm::omp::OMP_DEFAULT_firstprivate;
+}
+
 /// Matches if the OpenMP directive is allowed to contain the specified OpenMP
 /// clause kind.
 ///
diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index f5b32a6ba5fa9..1038a4119d4cb 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -1334,6 +1334,8 @@ def warn_omp_more_one_device_type_clause
       InGroup<OpenMPClauses>;
 def err_omp_variant_ctx_second_match_extension : Error<
   "only a single match extension allowed per OpenMP context selector">;
+def err_omp_invalid_dsa: Error<
+  "data-sharing attribute '%0' in '%1' clause requires OpenMP version %2 or above">;
 
 // Pragma loop support.
 def err_pragma_loop_missing_argument : Error<
diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
index a0a65092a92b4..ec2215804c098 100644
--- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp
@@ -389,6 +389,7 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(isExpr);
   REGISTER_MATCHER(isExternC);
   REGISTER_MATCHER(isFinal);
+  REGISTER_MATCHER(isFirstPrivateKind);
   REGISTER_MATCHER(isImplicit);
   REGISTER_MATCHER(isInStdNamespace);
   REGISTER_MATCHER(isInTemplateInstantiation);
diff --git a/clang/lib/Parse/ParseOpenMP.cpp b/clang/lib/Parse/ParseOpenMP.cpp
index afcef30438434..5223755c8fdf1 100644
--- a/clang/lib/Parse/ParseOpenMP.cpp
+++ b/clang/lib/Parse/ParseOpenMP.cpp
@@ -1441,7 +1441,7 @@ bool Parser::parseOMPDeclareVariantMatchClause(SourceLocation Loc,
 /// Parsing of simple OpenMP clauses like 'default' or 'proc_bind'.
 ///
 ///    default-clause:
-///         'default' '(' 'none' | 'shared' ')
+///         'default' '(' 'none' | 'shared'  | 'firstprivate' ')
 ///
 ///    proc_bind-clause:
 ///         'proc_bind' '(' 'master' | 'close' | 'spread' ')
@@ -2772,7 +2772,7 @@ OMPClause *Parser::ParseOpenMPSingleExprClause(OpenMPClauseKind Kind,
 /// Parsing of simple OpenMP clauses like 'default' or 'proc_bind'.
 ///
 ///    default-clause:
-///         'default' '(' 'none' | 'shared' ')'
+///         'default' '(' 'none' | 'shared' | 'firstprivate' ')'
 ///
 ///    proc_bind-clause:
 ///         'proc_bind' '(' 'master' | 'close' | 'spread' ')'
@@ -2785,6 +2785,14 @@ OMPClause *Parser::ParseOpenMPSimpleClause(OpenMPClauseKind Kind,
   llvm::Optional<SimpleClauseData> Val = parseOpenMPSimpleClause(*this, Kind);
   if (!Val || ParseOnly)
     return nullptr;
+  if (getLangOpts().OpenMP < 51 && Kind == OMPC_default &&
+      static_cast<DefaultKind>(Val.getValue().Type) ==
+          OMP_DEFAULT_firstprivate) {
+    Diag(Val.getValue().LOpen, diag::err_omp_invalid_dsa)
+        << getOpenMPClauseName(OMPC_firstprivate)
+        << getOpenMPClauseName(OMPC_default) << "5.1";
+    return nullptr;
+  }
   return Actions.ActOnOpenMPSimpleClause(
       Kind, Val.getValue().Type, Val.getValue().TypeLoc, Val.getValue().LOpen,
       Val.getValue().Loc, Val.getValue().RLoc);
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index b27abb54c170f..920463da40277 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -53,9 +53,10 @@ static const Expr *checkMapClauseExpressionBase(
 namespace {
 /// Default data sharing attributes, which can be applied to directive.
 enum DefaultDataSharingAttributes {
-  DSA_unspecified = 0, /// Data sharing attribute not specified.
-  DSA_none = 1 << 0,   /// Default data sharing attribute 'none'.
-  DSA_shared = 1 << 1, /// Default data sharing attribute 'shared'.
+  DSA_unspecified = 0,       /// Data sharing attribute not specified.
+  DSA_none = 1 << 0,         /// Default data sharing attribute 'none'.
+  DSA_shared = 1 << 1,       /// Default data sharing attribute 'shared'.
+  DSA_firstprivate = 1 << 2, /// Default data sharing attribute 'firstprivate'.
 };
 
 /// Stack for tracking declarations used in OpenMP directives and
@@ -684,6 +685,11 @@ class DSAStackTy {
     getTopOfStack().DefaultAttr = DSA_shared;
     getTopOfStack().DefaultAttrLoc = Loc;
   }
+  /// Set default data sharing attribute to firstprivate.
+  void setDefaultDSAFirstPrivate(SourceLocation Loc) {
+    getTopOfStack().DefaultAttr = DSA_firstprivate;
+    getTopOfStack().DefaultAttrLoc = Loc;
+  }
   /// Set default data mapping attribute to Modifier:Kind
   void setDefaultDMAAttr(OpenMPDefaultmapClauseModifier M,
                          OpenMPDefaultmapClauseKind Kind,
@@ -1183,6 +1189,15 @@ DSAStackTy::DSAVarData DSAStackTy::getDSA(const_iterator &Iter,
     return DVar;
   case DSA_none:
     return DVar;
+  case DSA_firstprivate:
+    if (VD->getStorageDuration() == SD_Static &&
+        VD->getDeclContext()->isFileContext()) {
+      DVar.CKind = OMPC_unknown;
+    } else {
+      DVar.CKind = OMPC_firstprivate;
+    }
+    DVar.ImplicitDSALoc = Iter->DefaultAttrLoc;
+    return DVar;
   case DSA_unspecified:
     // OpenMP [2.9.1.1, Data-sharing Attribute Rules for Variables Referenced
     // in a Construct, implicitly determined, p.2]
@@ -2058,7 +2073,13 @@ bool Sema::isOpenMPCapturedByRef(const ValueDecl *D, unsigned Level,
         // If the variable is artificial and must be captured by value - try to
         // capture by value.
         !(isa<OMPCapturedExprDecl>(D) && !D->hasAttr<OMPCaptureNoInitAttr>() &&
-          !cast<OMPCapturedExprDecl>(D)->getInit()->isGLValue());
+          !cast<OMPCapturedExprDecl>(D)->getInit()->isGLValue()) &&
+        // If the variable is implicitly firstprivate and scalar - capture by
+        // copy
+        !(DSAStack->getDefaultDSA() == DSA_firstprivate &&
+          !DSAStack->hasExplicitDSA(
+              D, [](OpenMPClauseKind K) { return K != OMPC_unknown; }, Level) &&
+          !DSAStack->isLoopControlVariable(D, Level).first);
   }
 
   // When passing data by copy, we need to make sure it fits the uintptr size
@@ -2185,10 +2206,13 @@ VarDecl *Sema::isOpenMPCapturedDecl(ValueDecl *D, bool CheckScopeInfo,
         DSAStack->isClauseParsingMode());
     // Global shared must not be captured.
     if (VD && !VD->hasLocalStorage() && DVarPrivate.CKind == OMPC_unknown &&
-        (DSAStack->getDefaultDSA() != DSA_none || DVarTop.CKind == OMPC_shared))
+        ((DSAStack->getDefaultDSA() != DSA_none &&
+          DSAStack->getDefaultDSA() != DSA_firstprivate) ||
+         DVarTop.CKind == OMPC_shared))
       return nullptr;
     if (DVarPrivate.CKind != OMPC_unknown ||
-        (VD && DSAStack->getDefaultDSA() == DSA_none))
+        (VD && (DSAStack->getDefaultDSA() == DSA_none ||
+                DSAStack->getDefaultDSA() == DSA_firstprivate)))
       return VD ? VD : cast<VarDecl>(DVarPrivate.PrivateCopy->getDecl());
   }
   return nullptr;
@@ -3333,10 +3357,19 @@ class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> {
       // in the construct, and does not have a predetermined data-sharing
       // attribute, must have its data-sharing attribute explicitly determined
       // by being listed in a data-sharing attribute clause.
-      if (DVar.CKind == OMPC_unknown && Stack->getDefaultDSA() == DSA_none &&
+      if (DVar.CKind == OMPC_unknown &&
+          (Stack->getDefaultDSA() == DSA_none ||
+           Stack->getDefaultDSA() == DSA_firstprivate) &&
           isImplicitOrExplicitTaskingRegion(DKind) &&
           VarsWithInheritedDSA.count(VD) == 0) {
-        VarsWithInheritedDSA[VD] = E;
+        bool InheritedDSA = Stack->getDefaultDSA() == DSA_none;
+        if (!InheritedDSA && Stack->getDefaultDSA() == DSA_firstprivate) {
+          DSAStackTy::DSAVarData DVar =
+              Stack->getImplicitDSA(VD, /*FromParent=*/false);
+          InheritedDSA = DVar.CKind == OMPC_unknown;
+        }
+        if (InheritedDSA)
+          VarsWithInheritedDSA[VD] = E;
         return;
       }
 
@@ -3438,7 +3471,9 @@ class DSAAttrChecker final : public StmtVisitor<DSAAttrChecker, void> {
 
       // Define implicit data-sharing attributes for task.
       DVar = Stack->getImplicitDSA(VD, /*FromParent=*/false);
-      if (isOpenMPTaskingDirective(DKind) && DVar.CKind != OMPC_shared &&
+      if (((isOpenMPTaskingDirective(DKind) && DVar.CKind != OMPC_shared) ||
+           (Stack->getDefaultDSA() == DSA_firstprivate &&
+            DVar.CKind == OMPC_firstprivate && !DVar.RefExpr)) &&
           !Stack->isLoopControlVariable(VD).first) {
         ImplicitFirstprivate.push_back(E);
         return;
@@ -5342,8 +5377,10 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
 
   ErrorFound = Res.isInvalid() || ErrorFound;
 
-  // Check variables in the clauses if default(none) was specified.
-  if (DSAStack->getDefaultDSA() == DSA_none) {
+  // Check variables in the clauses if default(none) or
+  // default(firstprivate) was specified.
+  if (DSAStack->getDefaultDSA() == DSA_none ||
+      DSAStack->getDefaultDSA() == DSA_firstprivate) {
     DSAAttrChecker DSAChecker(DSAStack, *this, nullptr);
     for (OMPClause *C : Clauses) {
       switch (C->getClauseKind()) {
@@ -5454,7 +5491,8 @@ StmtResult Sema::ActOnOpenMPExecutableDirective(
     if (P.getFirst()->isImplicit() || isa<OMPCapturedExprDecl>(P.getFirst()))
       continue;
     ErrorFound = true;
-    if (DSAStack->getDefaultDSA() == DSA_none) {
+    if (DSAStack->getDefaultDSA() == DSA_none ||
+        DSAStack->getDefaultDSA() == DSA_firstprivate) {
       Diag(P.second->getExprLoc(), diag::err_omp_no_dsa_for_variable)
           << P.first << P.second->getSourceRange();
       Diag(DSAStack->getDefaultDSALocation(), diag::note_omp_default_dsa_none);
@@ -12932,10 +12970,20 @@ OMPClause *Sema::ActOnOpenMPDefaultClause(DefaultKind Kind,
         << getOpenMPClauseName(OMPC_default);
     return nullptr;
   }
-  if (Kind == OMP_DEFAULT_none)
+
+  switch (Kind) {
+  case OMP_DEFAULT_none:
     DSAStack->setDefaultDSANone(KindKwLoc);
-  else if (Kind == OMP_DEFAULT_shared)
+    break;
+  case OMP_DEFAULT_shared:
     DSAStack->setDefaultDSAShared(KindKwLoc);
+    break;
+  case OMP_DEFAULT_firstprivate:
+    DSAStack->setDefaultDSAFirstPrivate(KindKwLoc);
+    break;
+  default:
+    llvm_unreachable("DSA unexpected in OpenMP default clause");
+  }
 
   return new (Context)
       OMPDefaultClause(Kind, KindKwLoc, StartLoc, LParenLoc, EndLoc);
diff --git a/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp
index 0629ba096d0c2..67e4615ae8c01 100644
--- a/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_default_messages.cpp
@@ -2,8 +2,17 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 template <class T, int N>
 T tmain(T argc) {
   int i;
@@ -14,12 +23,12 @@ T tmain(T argc) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -34,7 +43,7 @@ T tmain(T argc) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -62,12 +71,12 @@ int main(int argc, char **argv) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -82,7 +91,7 @@ int main(int argc, char **argv) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -98,5 +107,15 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     foo();
 
+#ifdef OMP51
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (i = 0; i < argc; ++i) {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return (tmain<int, 5>(argc) + tmain<char, 1>(argv[0][0])); // expected-note {{in instantiation of function template specialization 'tmain<int, 5>' requested here}} expected-note {{in instantiation of function template specialization 'tmain<char, 1>' requested here}}
 }
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
index b9c5546ec5d95..9aab00f16c48f 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_default_messages.cpp
@@ -2,8 +2,17 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp -ferror-limit 100 -o - %s -Wuninitialized -DOMP51 -fopenmp-version=51
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized -DOMP51 -fopenmp-version=51
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 template <class T, int N>
 T tmain(T argc) {
   int i;
@@ -14,12 +23,12 @@ T tmain(T argc) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -34,7 +43,7 @@ T tmain(T argc) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -62,12 +71,12 @@ int main(int argc, char **argv) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -82,7 +91,7 @@ int main(int argc, char **argv) {
     foo();
 #pragma omp target
 #pragma omp teams
-#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target
@@ -90,6 +99,15 @@ int main(int argc, char **argv) {
 #pragma omp distribute parallel for simd default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (i = 0; i < argc; ++i)  // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     foo();
+#ifdef OpenMP51
+#pragma omp target
+#pragma omp teams
+#pragma omp distribute parallel for simd default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (i = 0; i < argc; ++i) {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
 
 #pragma omp parallel default(none) // expected-note 2 {{explicit data sharing attribute requested here}}
 #pragma omp target
diff --git a/clang/test/OpenMP/driver.c b/clang/test/OpenMP/driver.c
index fa5bd1a8b5f8d..047478256f9f5 100644
--- a/clang/test/OpenMP/driver.c
+++ b/clang/test/OpenMP/driver.c
@@ -47,6 +47,7 @@
 // RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=31 | FileCheck --check-prefix=CHECK-VERSION %s
 // RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=40 | FileCheck --check-prefix=CHECK-VERSION %s
 // RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=45 | FileCheck --check-prefix=CHECK-VERSION %s
+// RUN: %clang %s -c -E -dM -fopenmp-simd -fopenmp-version=51 | FileCheck --check-prefix=CHECK-VERSION %s
 
 // CHECK-VERSION-NOT: #define _OPENMP
 
diff --git a/clang/test/OpenMP/parallel_default_messages.cpp b/clang/test/OpenMP/parallel_default_messages.cpp
index 6b8ad67051850..b098c43852a85 100644
--- a/clang/test/OpenMP/parallel_default_messages.cpp
+++ b/clang/test/OpenMP/parallel_default_messages.cpp
@@ -4,18 +4,25 @@
 // RUN: %clang_cc1 -verify=expected,ge40 -fopenmp-version=40 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
 // RUN: %clang_cc1 -verify -fopenmp-version=31 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
 // RUN: %clang_cc1 -verify -fopenmp-version=30 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+// RUN: %clang_cc1 -verify=expected,ge40 -fopenmp-version=51 -fopenmp -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
+// RUN: %clang_cc1 -verify=expected,ge40 -fopenmp-version=51 -fopenmp-simd -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
 
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   const int c = 0;
 
   #pragma omp parallel default // expected-error {{expected '(' after 'default'}}
-  #pragma omp parallel default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-  #pragma omp parallel default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
-  #pragma omp parallel default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
-  #pragma omp parallel default (shared), default(shared) // expected-error {{directive '#pragma omp parallel' cannot contain more than one 'default' clause}}
-  #pragma omp parallel default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel default(  // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
+#pragma omp parallel default(none                     // expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel default(shared), default(shared) // expected-error {{directive '#pragma omp parallel' cannot contain more than one 'default' clause}}
+#pragma omp parallel default(x)                       // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 
   #pragma omp parallel default(none) // expected-note {{explicit data sharing attribute requested here}}
@@ -27,5 +34,14 @@ int main(int argc, char **argv) {
 
   #pragma omp parallel default(none) // ge40-note {{explicit data sharing attribute requested here}}
   (void)c; // ge40-error {{variable 'c' must have explicitly specified data sharing attributes}}
+
+#ifdef OMP51
+#pragma omp parallel default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/parallel_for_default_messages.cpp b/clang/test/OpenMP/parallel_for_default_messages.cpp
index b02fa8803a3b3..c64b76948c018 100644
--- a/clang/test/OpenMP/parallel_for_default_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   int i;
 #pragma omp parallel for default // expected-error {{expected '(' after 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel for default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}}
@@ -21,7 +30,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for default(shared), default(shared) // expected-error {{directive '#pragma omp parallel for' cannot contain more than one 'default' clause}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 
@@ -34,5 +43,13 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     foo();
 
+#ifdef OMP51
+#pragma omp parallel for default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (i = 0; i < argc; ++i) {
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/parallel_for_simd_default_messages.cpp
index 570ee14bbc84b..6368d280de5db 100644
--- a/clang/test/OpenMP/parallel_for_simd_default_messages.cpp
+++ b/clang/test/OpenMP/parallel_for_simd_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   int i;
 #pragma omp parallel for simd default // expected-error {{expected '(' after 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for simd default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for simd default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp parallel for simd default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}}
@@ -21,7 +30,7 @@ int main(int argc, char **argv) {
 #pragma omp parallel for simd default(shared), default(shared) // expected-error {{directive '#pragma omp parallel for simd' cannot contain more than one 'default' clause}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp parallel for simd default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 
@@ -34,5 +43,13 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}} expected-error {{variable 'i' must have explicitly specified data sharing attributes}}
     foo();
 
+#ifdef OMP51
+#pragma omp parallel for default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (i = 0; i < argc; ++i) {
+    x++; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    y++; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp
index 9ffa941314b98..82e18c80f103e 100644
--- a/clang/test/OpenMP/parallel_master_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_codegen.cpp
@@ -118,6 +118,162 @@ void parallel_master_private() {
 
 #endif
 
+#ifdef CK31
+///==========================================================================///
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix CK31
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK31
+
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK31 -fopenmp-version=51 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// CK31-DAG:   %struct.ident_t = type { i32, i32, i32, i32, i8* }
+// CK31-DAG:   [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+
+void parallel_master_default_firstprivate() {
+  int a;
+#pragma omp parallel master default(firstprivate)
+  a++;
+}
+
+// CK31-LABEL: define void @{{.+}}parallel_master{{.+}}
+// CK31:       [[A_VAL:%.+]] = alloca i32{{.+}}
+// CK31:       [[A_CASTED:%.+]] = alloca i64
+// CK31:       [[ZERO_VAL:%.+]] = load i32, i32* [[A_VAL]]
+// CK31:       [[CONV:%.+]] = bitcast i64* [[A_CASTED]] to i32*
+// CK31:       store i32 [[ZERO_VAL]], i32* [[CONV]]
+// CK31:       [[ONE_VAL:%.+]] = load i64, i64* [[A_CASTED]]
+// CK31:       call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[ONE_VAL]])
+// CK31:       ret void
+
+// CK31:       [[GLOBAL_TID_ADDR:%.+]] = alloca i32*
+// CK31:       [[BOUND_TID_ADDR:%.+]] = alloca i32*
+// CK31:       [[A_ADDR:%.+]] = alloca i64{{.+}}
+// CK31:       store i32* [[GLOBAL_TID:%.+]], i32** [[GLOBAL_TID_ADDR]]{{.+}}
+// CK31:       store i32* [[BOUND_TID:%.+]], i32** [[BOUND_TID_ADDR]]
+// CK31:       store i64 [[A_VAL]], i64* [[A_ADDR]]
+// CK31:       [[CONV]] = bitcast i64* [[A_ADDR]]
+// CK31:       [[ZERO_VAL]] = load i32*, i32** [[GLOBAL_TID_ADDR]]
+// CK31:       [[ONE_VAL]] = load i32, i32* [[ZERO_VAL]]
+// CK31:       [[TWO_VAL:%.+]] = call i32 @__kmpc_master(%struct.ident_t* @0, i32 [[ONE_VAL]])
+// CK31:       [[THREE:%.+]] = icmp ne i32 [[TWO_VAL]], 0
+// CK31:       br i1 %3, label [[OMP_IF_THEN:%.+]], label [[OMP_IF_END:%.+]]
+
+// CK31:       [[FOUR:%.+]] = load i32, i32* [[CONV:%.+]]
+// CK31:       [[INC:%.+]] = add nsw i32 [[FOUR]]
+// CK31:       store i32 [[INC]], i32* [[CONV]]
+// CK31:       call void @__kmpc_end_master(%struct.ident_t* @0, i32 [[ONE_VAL]])
+// CK31:       br label [[OMP_IF_END]]
+
+// CK31:       ret void
+
+#endif
+
+#ifdef CK32
+///==========================================================================///
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix CK32
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -fopenmp -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -fopenmp -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s --check-prefix CK32
+
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -verify -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -fopenmp-simd -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o %t %s
+// RUN: %clang_cc1 -DCK32 -fopenmp-version=51 -fopenmp-simd -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// SIMD-ONLY0-NOT: {{__kmpc|__tgt}}
+
+// CK32-DAG:   %struct.ident_t = type { i32, i32, i32, i32, i8* }
+// CK32-DAG:   [[STR:@.+]] = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00"
+
+struct St {
+  int a, b;
+  static int y;
+  St() : a(0), b(0) {}
+  ~St() {}
+};
+int St::y = 0;
+
+void parallel_master_default_firstprivate() {
+  St a = St();
+  static int y = 0;
+#pragma omp parallel master default(firstprivate)
+  {
+    a.a += 1;
+    a.b += 1;
+    y++;
+    a.y++;
+  }
+}
+
+// CK32-LABEL: define {{.+}} @{{.+}}parallel_master_default_firstprivate{{.+}}
+// CK32: [[A_VAL:%.+]] = alloca %struct.St{{.+}}
+// CK32: [[Y_CASTED:%.+]] = alloca i64
+// CK32: call void @[[CTOR:.+]](%struct.St* [[A_VAL]])
+// CK32: [[ZERO:%.+]] = load i32, i32* @{{.+}}parallel_master_default_firstprivate{{.+}}
+// CK32: [[CONV:%.+]] = bitcast i64* [[Y_CASTED]] to i32*
+// CK32: store i32 [[ZERO]], i32* [[CONV]]
+// CK32: [[ONE:%.+]] = load i64, i64* [[Y_CASTED]]
+// CK32: call void {{.+}}@{{.+}} %struct.St* [[A_VAL]], i64 [[ONE]])
+// CK32: call void [[DTOR:@.+]](%struct.St* [[A_VAL]])
+
+// CK32: [[THIS_ADDR:%.+]] = alloca %struct.St*
+// CK32: store %struct.St* [[THIS:%.+]], %struct.St** [[THIS_ADDR]]
+// CK32: [[THIS_ONE:%.+]] = load %struct.St*, %struct.St** [[THIS_ADDR]]
+// CK32: call void [[CTOR_2:.+]](%struct.St* [[THIS_ONE]])
+// CK32: ret void
+
+// CK32: [[GLOBAL_TID_ADDR:%.+]] = alloca i32*
+// CK32: [[BOUND_TID_ADDR:%.+]] = alloca i32*
+// CK32: [[A_ADDR:%.+]] = alloca %struct.St
+// CK32: [[Y_ADDR:%.+]] = alloca i64
+// CK32: store i32* [[GLOBAL_TID:%.+]], i32** [[GLOBAL_TID_ADDR]]
+// CK32: store i32* %.bound_tid., i32** [[BOUND_TID_ADDR]]
+// CK32: store %struct.St* [[A_VAL]], %struct.St** [[A_ADDR]]{{.+}}
+// CK32: store i64 [[Y:%.+]], i64* [[Y_ADDR]]
+// CK32: [[ONE:%.+]] = load i32*, i32** [[GLOBAL_TID_ADDR]]
+// CK32: [[TWO:%.+]] = load i32, i32* [[ONE]]
+// CK32: [[THREE:%.+]] = call i32 @{{.+}} i32 [[TWO]])
+// CK32: [[FOUR:%.+]] = icmp ne i32 [[THREE]], 0
+// CK32: br i1 [[FOUR]], label [[IF_THEN:%.+]], label [[IF_END:%.+]]
+
+// CK32: [[A_1:%.+]] = getelementptr inbounds %struct.St, %struct.St* [[ZERO]], i32 0, i32 0
+// CK32: [[FIVE:%.+]] = load i32, i32* [[A_1]]
+// CK32: [[ADD:%.+]] = add nsw i32 [[FIVE]], 1
+// CK32: store i32 [[ADD]], i32* [[A_1]]
+// CK32: [[B:%.+]] = getelementptr inbounds %struct.St, %struct.St* [[ZERO]], i32 0, i32 1
+// CK32: [[SIX:%.+]] = load i32, i32* [[B]]
+// CK32: [[ADD_2:%.+]] = add nsw i32 [[SIX]], 1
+// CK32: store i32 [[ADD_2]], i32* [[B]]
+// CK32: [[SEVEN:%.+]] = load i32, i32* [[CONV]]
+// CK32: [[INC:%.+]] = add nsw i32 [[SEVEN]], 1
+// CK32: store i32 [[INC]], i32* [[CONV]]
+// CK32: [[EIGHT:%.+]] = load i32, i32* [[FUNC:@.+]]
+// CK32: [[INC_3:%.+]] = add nsw i32 [[EIGHT]], 1
+// CK32: store i32 [[INC_3]], i32* @{{.+}}
+// CK32: call void @{{.+}} i32 [[TWO]])
+// CK32: br label [[IF_END]]
+
+// CK32: [[DTOR]](%struct.St* [[THIS]])
+// CK32: [[THIS_ADDR]] = alloca %struct.St*
+// CK32: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]]
+// CK32: [[THIS_ONE]] = load %struct.St*, %struct.St** [[THIS_ADDR]]
+// CK32: call void @_ZN2StD2Ev(%struct.St* [[THIS_ONE]])
+
+// CK32: [[THIS_ADDR]] = alloca %struct.St*
+// CK32: store %struct.St* [[THIS]], %struct.St** [[THIS_ADDR]]
+// CK32: [[THIS_ONE]] = load %struct.St*, %struct.St** [[THIS_ADDR]]
+// CK32: [[A_VAL]] = getelementptr inbounds %struct.St, %struct.St* [[THIS_ONE]], i32 0, i32 0
+// CK32: store i32 0, i32* [[A_VAL]]
+// CK32: [[B_VAL:%.+]] = getelementptr inbounds %struct.St, %struct.St* [[THIS_ONE]], i32 0, i32 1
+// CK32: store i32 0, i32* [[B_VAL]]
+// CK32: ret void
+
+// CK32: [[THIS_ADDR:%.+]] = alloca %struct.St*
+// CK32: store %struct.St* %this, %struct.St** [[THIS_ADDR]]
+// CK32: [[THIS_ONE]] = load %struct.St*, %struct.St** [[THIS_ADDR]]
+
+#endif
+
 #ifdef CK4
 ///==========================================================================///
 // RUN: %clang_cc1 -DCK4 -verify -fopenmp -x c++ -triple x86_64-unknown-unknown -emit-llvm %s -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefix CK4
diff --git a/clang/test/OpenMP/parallel_master_default_messages.cpp b/clang/test/OpenMP/parallel_master_default_messages.cpp
index 557cba5aa322a..39f78ea53ae16 100644
--- a/clang/test/OpenMP/parallel_master_default_messages.cpp
+++ b/clang/test/OpenMP/parallel_master_default_messages.cpp
@@ -2,20 +2,29 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
 #pragma omp parallel master default // expected-error {{expected '(' after 'default'}}
   {
-#pragma omp parallel master default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel master default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
     {
-#pragma omp parallel master default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel master default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
       {
 #pragma omp parallel master default(none // expected-error {{expected ')'}} expected-note {{to match this '('}}
         {
 #pragma omp parallel master default(shared), default(shared) // expected-error {{directive '#pragma omp parallel master' cannot contain more than one 'default' clause}}
           {
-#pragma omp parallel master default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel master default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
             {
               foo();
             }
@@ -37,5 +46,14 @@ int main(int argc, char **argv) {
       ++argc;  // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     }
   }
+
+#ifdef OMP51
+#pragma omp parallel master default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/parallel_sections_default_messages.cpp b/clang/test/OpenMP/parallel_sections_default_messages.cpp
index d6a10fe56b344..cfa95445fb536 100644
--- a/clang/test/OpenMP/parallel_sections_default_messages.cpp
+++ b/clang/test/OpenMP/parallel_sections_default_messages.cpp
@@ -7,15 +7,15 @@ void foo();
 int main(int argc, char **argv) {
 #pragma omp parallel sections default // expected-error {{expected '(' after 'default'}}
   {
-#pragma omp parallel sections default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp parallel sections default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
     {
-#pragma omp parallel sections default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel sections default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
       {
 #pragma omp parallel sections default(none // expected-error {{expected ')'}} expected-note {{to match this '('}}
         {
 #pragma omp parallel sections default(shared), default(shared) // expected-error {{directive '#pragma omp parallel sections' cannot contain more than one 'default' clause}}
           {
-#pragma omp parallel sections default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp parallel sections default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
             {
               foo();
             }
diff --git a/clang/test/OpenMP/target_parallel_default_messages.cpp b/clang/test/OpenMP/target_parallel_default_messages.cpp
index 0691cdf37e4eb..c8f68659438fe 100644
--- a/clang/test/OpenMP/target_parallel_default_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_default_messages.cpp
@@ -2,20 +2,29 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target parallel default // expected-error {{expected '(' after 'default'}}
   foo();
-  #pragma omp target parallel default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target parallel default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
-  #pragma omp target parallel default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
   #pragma omp target parallel default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
   #pragma omp target parallel default (shared), default(shared) // expected-error {{directive '#pragma omp target parallel' cannot contain more than one 'default' clause}}
   foo();
-  #pragma omp target parallel default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 
   #pragma omp target parallel default(none) // expected-note {{explicit data sharing attribute requested here}}
@@ -28,5 +37,14 @@ int main(int argc, char **argv) {
   #pragma omp target parallel default(none) // expected-note {{explicit data sharing attribute requested here}}
   #pragma omp parallel default(shared)
   ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
+
+#ifndef OMP51
+#pragma omp target parallel default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_parallel_for_default_messages.cpp b/clang/test/OpenMP/target_parallel_for_default_messages.cpp
index fc6ba43138d76..4a3aae68e0865 100644
--- a/clang/test/OpenMP/target_parallel_for_default_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -DOMP51 -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   int i;
 #pragma omp target parallel for default // expected-error {{expected '(' after 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target parallel for default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}}
@@ -21,7 +30,7 @@ int main(int argc, char **argv) {
 #pragma omp target parallel for default(shared), default(shared) // expected-error {{directive '#pragma omp target parallel for' cannot contain more than one 'default' clause}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 
@@ -34,5 +43,13 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     foo();
 
+#ifndef OMP51
+#pragma omp target parallel for default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  for (i = 0; i < argc; ++i) {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp
index daa93b9c9050b..48489309ef037 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   int i;
 #pragma omp target parallel for simd default // expected-error {{expected '(' after 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for simd default( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for simd default() // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 #pragma omp target parallel for simd default(none // expected-error {{expected ')'}} expected-note {{to match this '('}} expected-note {{explicit data sharing attribute requested here}}
@@ -21,7 +30,7 @@ int main(int argc, char **argv) {
 #pragma omp target parallel for simd default(shared), default(shared) // expected-error {{directive '#pragma omp target parallel for simd' cannot contain more than one 'default' clause}}
   for (i = 0; i < argc; ++i)
     foo();
-#pragma omp target parallel for simd default(x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (i = 0; i < argc; ++i)
     foo();
 
@@ -34,5 +43,13 @@ int main(int argc, char **argv) {
   for (i = 0; i < argc; ++i) // expected-error {{variable 'i' must have explicitly specified data sharing attributes}} expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
     foo();
 
+#ifndef OMP51
+#pragma omp target parallel for simd default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  for (int i = 0; i < argc; i++) {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_teams_default_messages.cpp b/clang/test/OpenMP/target_teams_default_messages.cpp
index 21fa8270ef6a7..85c417f8f9853 100644
--- a/clang/test/OpenMP/target_teams_default_messages.cpp
+++ b/clang/test/OpenMP/target_teams_default_messages.cpp
@@ -2,20 +2,29 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
 #pragma omp target teams default // expected-error {{expected '(' after 'default'}}
   foo();
-#pragma omp target teams default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target teams default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
-#pragma omp target teams default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 #pragma omp target teams default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
 #pragma omp target teams default (shared), default(shared) // expected-error {{directive '#pragma omp target teams' cannot contain more than one 'default' clause}}
   foo();
-#pragma omp target teams default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 
 #pragma omp target teams default(none) // expected-note {{explicit data sharing attribute requested here}}
@@ -24,5 +33,14 @@ int main(int argc, char **argv) {
 #pragma omp target teams default(none) // expected-note {{explicit data sharing attribute requested here}}
 #pragma omp parallel default(shared)
   ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
+
+#ifndef OMP51
+#pragma omp target teams default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_teams_distribute_default_messages.cpp b/clang/test/OpenMP/target_teams_distribute_default_messages.cpp
index fd834e7cba32c..a490ad61385ff 100644
--- a/clang/test/OpenMP/target_teams_distribute_default_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_default_messages.cpp
@@ -2,24 +2,41 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=51 -DOMP51 %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-simd -fopenmp-version=51 -DOMP51 %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target teams distribute default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
-  #pragma omp target teams distribute default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target teams distribute default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
-  #pragma omp target teams distribute default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target teams distribute default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target teams distribute default (shared), default(shared) // expected-error {{directive '#pragma omp target teams distribute' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
-  #pragma omp target teams distribute default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
   #pragma omp target teams distribute default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifndef OMP51
+#pragma omp target teams distribute default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  for (int i = 0; i < 200; i++) {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp
index 00e0704a6ccac..2fe7931369618 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_default_messages.cpp
@@ -2,24 +2,41 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
 #pragma omp target teams distribute parallel for default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
- #pragma omp target teams distribute parallel for default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target teams distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
-#pragma omp target teams distribute parallel for default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 #pragma omp target teams distribute parallel for default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
 #pragma omp target teams distribute parallel for default (shared), default(shared) // expected-error {{directive '#pragma omp target teams distribute parallel for' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
-#pragma omp target teams distribute parallel for default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
 #pragma omp target teams distribute parallel for default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifndef OMP51
+#pragma omp target teams distribute parallel for default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  for (int i = 0; i < 200; i++) {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp
index 7c46c964d2ec3..e5ff856222501 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_default_messages.cpp
@@ -2,16 +2,25 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
 #pragma omp target teams distribute parallel for simd default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
 
-#pragma omp target teams distribute parallel for simd default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp target teams distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
 
-#pragma omp target teams distribute parallel for simd default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
 #pragma omp target teams distribute parallel for simd default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -20,11 +29,19 @@ int main(int argc, char **argv) {
 #pragma omp target teams distribute parallel for simd default (shared), default(shared) // expected-error {{directive '#pragma omp target teams distribute parallel for simd' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
 
-#pragma omp target teams distribute parallel for simd default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp target teams distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
 #pragma omp target teams distribute parallel for simd default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifndef OMP51
+#pragma omp target teams distribute parallel for simd default(firstprivate) // expected-error {{data-sharing attribute 'firstprivate' in 'default' clause requires OpenMP version 5.1 or above}}
+  for (int i = 0; i < argc; ++i) {
+    ++x;
+    ++y;
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/task_default_messages.cpp b/clang/test/OpenMP/task_default_messages.cpp
index 4826c253aa043..8b6809ee05d56 100644
--- a/clang/test/OpenMP/task_default_messages.cpp
+++ b/clang/test/OpenMP/task_default_messages.cpp
@@ -2,15 +2,24 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
 #pragma omp task default                          // expected-error {{expected '(' after 'default'}}
-#pragma omp task default(                         // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
-#pragma omp task default()                        // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp task default(                         // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp task default()                        // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
 #pragma omp task default(none                     // expected-error {{expected ')'}} expected-note {{to match this '('}}
 #pragma omp task default(shared), default(shared) // expected-error {{directive '#pragma omp task' cannot contain more than one 'default' clause}}
-#pragma omp task default(x)                       // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp task default(x)                       // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 
 #pragma omp task default(none) // expected-note {{explicit data sharing attribute requested here}}
@@ -19,5 +28,13 @@ int main(int argc, char **argv) {
 #pragma omp task default(none) // expected-note {{explicit data sharing attribute requested here}}
 #pragma omp task default(shared)
   ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
+
+#ifdef OMP51
+#pragma omp task default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
   return 0;
 }
diff --git a/clang/test/OpenMP/task_messages.cpp b/clang/test/OpenMP/task_messages.cpp
index 8b3183e0bd93e..13cbfb6c45693 100644
--- a/clang/test/OpenMP/task_messages.cpp
+++ b/clang/test/OpenMP/task_messages.cpp
@@ -4,6 +4,9 @@
 // RUN: %clang_cc1 -verify=expected,omp45 -fopenmp-version=45 -fopenmp-simd -ferror-limit 200 -std=c++11 -o - %s -Wuninitialized
 // RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-version=50 -fopenmp-simd -ferror-limit 200 -std=c++11 -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-version=51 -DOMP51 -fopenmp -ferror-limit 100 -std=c++11 -o - %s -Wuninitialized
+// RUN: %clang_cc1 -verify=expected,omp50 -fopenmp-version=51 -DOMP51 -fopenmp-simd -ferror-limit 100 -std=c++11 -o - %s -Wuninitialized
+
 void xxx(int argc) {
   int x; // expected-note {{initialize the variable 'x' to silence this warning}}
 #pragma omp task
@@ -16,6 +19,10 @@ void foo() {
 }
 
 typedef unsigned long omp_event_handle_t;
+namespace {
+static int y = 0;
+}
+static int x = 0;
 
 #pragma omp task // expected-error {{unexpected OpenMP directive '#pragma omp task'}}
 
@@ -52,6 +59,15 @@ int foo() {
 #pragma omp task default(none) // expected-note 2 {{explicit data sharing attribute requested here}}
 #pragma omp task default(shared)
   ++a; // expected-error 2 {{variable 'a' must have explicitly specified data sharing attributes}}
+#ifdef OMP51
+#pragma omp task default(firstprivate) // expected-note 4 {{explicit data sharing attribute requested here}}
+#pragma omp task
+  {
+    ++x; // expected-error 2 {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error 2 {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
 #pragma omp task default(none) // expected-note 2 {{explicit data sharing attribute requested here}}
 #pragma omp task
   // expected-error@+1 {{calling a private constructor of class 'S'}}
diff --git a/clang/test/OpenMP/teams_default_messages.cpp b/clang/test/OpenMP/teams_default_messages.cpp
index a025050406000..b117ef4948a0f 100644
--- a/clang/test/OpenMP/teams_default_messages.cpp
+++ b/clang/test/OpenMP/teams_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd -o - %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp -o - %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd -o - %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target
   #pragma omp teams default // expected-error {{expected '(' after 'default'}}
   foo();
   #pragma omp target
-  #pragma omp teams default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp teams default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   foo();
   #pragma omp target
-  #pragma omp teams default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
   #pragma omp target
   #pragma omp teams default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -21,7 +30,7 @@ int main(int argc, char **argv) {
   #pragma omp teams default (shared), default(shared) // expected-error {{directive '#pragma omp teams' cannot contain more than one 'default' clause}}
   foo();
   #pragma omp target
-  #pragma omp teams default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   foo();
 
   #pragma omp target
@@ -32,5 +41,14 @@ int main(int argc, char **argv) {
   #pragma omp teams default(none) // expected-note {{explicit data sharing attribute requested here}}
   #pragma omp parallel default(shared)
   ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
+
+#ifdef OMP51
+#pragma omp target
+#pragma omp teams default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
   return 0;
 }
diff --git a/clang/test/OpenMP/teams_distribute_default_messages.cpp b/clang/test/OpenMP/teams_distribute_default_messages.cpp
index 7f000208303b7..1d5fd40c53a6b 100644
--- a/clang/test/OpenMP/teams_distribute_default_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target
   #pragma omp teams distribute default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp teams distribute default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
   #pragma omp teams distribute default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -21,12 +30,21 @@ int main(int argc, char **argv) {
   #pragma omp teams distribute default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
   #pragma omp target
   #pragma omp teams distribute default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifdef OMP51
+#pragma omp target
+#pragma omp teams distribute default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (int i = 0; i < 200; i++) {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp
index 2c46623985070..3a414543be806 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp %s -Wuninitialized
+
+// RUN: %clang_cc1 -verify -fopenmp-version=51 -DOMP51 -fopenmp-simd %s -Wuninitialized
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target
   #pragma omp teams distribute parallel for default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp teams distribute parallel for default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute parallel for default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
   #pragma omp teams distribute parallel for default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -21,12 +30,21 @@ int main(int argc, char **argv) {
   #pragma omp teams distribute parallel for default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute parallel for' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute parallel for default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
   #pragma omp target
   #pragma omp teams distribute parallel for default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifdef OMP51
+#pragma omp target
+#pragma omp teams distribute parallel for default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (int i = 0; i < 200; i++) {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp
index 93017a8233ffe..ce7f35b479592 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_default_messages.cpp
@@ -2,17 +2,26 @@
 
 // RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
 
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -fopenmp-version=51 -DOMP51
+
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -fopenmp-version=51 -DOMP51
+
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target
   #pragma omp teams distribute parallel for simd default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for simd default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp teams distribute parallel for simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for simd default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute parallel for simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
   #pragma omp teams distribute parallel for simd default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -21,12 +30,20 @@ int main(int argc, char **argv) {
   #pragma omp teams distribute parallel for simd default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute parallel for simd' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute parallel for simd default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute parallel for simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
   #pragma omp target
   #pragma omp teams distribute parallel for simd default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#ifdef OpenMP51
+#pragma omp teams distribute parallel for default(firstprivate) // expected-note 2 {{explicit data sharing attribute requested here}}
+  for (int i = 0; i < 200; i++) {
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+  }
+#endif
+
   return 0;
 }
diff --git a/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp b/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp
index 2775210ae048f..11f5d1cd1fc8f 100644
--- a/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_default_messages.cpp
@@ -1,18 +1,23 @@
-// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp %s -Wuninitialized -fopenmp-version=51
 
-// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized
+// RUN: %clang_cc1 -verify -fopenmp-simd %s -Wuninitialized -fopenmp-version=51
 
 void foo();
 
+namespace {
+static int y = 0;
+}
+static int x = 0;
+
 int main(int argc, char **argv) {
   #pragma omp target
   #pragma omp teams distribute simd default // expected-error {{expected '(' after 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute simd default ( // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
+#pragma omp teams distribute simd default( // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}} expected-error {{expected ')'}} expected-note {{to match this '('}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute simd default () // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute simd default() // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
   #pragma omp teams distribute simd default (none // expected-error {{expected ')'}} expected-note {{to match this '('}}
@@ -21,12 +26,22 @@ int main(int argc, char **argv) {
   #pragma omp teams distribute simd default (shared), default(shared) // expected-error {{directive '#pragma omp teams distribute simd' cannot contain more than one 'default' clause}}
   for (int i=0; i<200; i++) foo();
   #pragma omp target
-  #pragma omp teams distribute simd default (x) // expected-error {{expected 'none' or 'shared' in OpenMP clause 'default'}}
+#pragma omp teams distribute simd default(x) // expected-error {{expected 'none', 'shared' or 'firstprivate' in OpenMP clause 'default'}}
   for (int i=0; i<200; i++) foo();
 
   #pragma omp target
   #pragma omp teams distribute simd default(none) // expected-note {{explicit data sharing attribute requested here}}
   for (int i=0; i<200; i++) ++argc; // expected-error {{variable 'argc' must have explicitly specified data sharing attributes}}
 
+#pragma omp target
+#pragma omp teams distribute simd default(firstprivate) // expected-note {{explicit data sharing attribute requested here}}
+  for (int i = 0; i < 200; i++)
+    ++x; // expected-error {{variable 'x' must have explicitly specified data sharing attributes}}
+
+#pragma omp target
+#pragma omp teams distribute simd default(firstprivate) // expected-note {{explicit data sharing attribute requested here}}
+  for (int i = 0; i < 200; i++)
+    ++y; // expected-error {{variable 'y' must have explicitly specified data sharing attributes}}
+
   return 0;
 }
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
index aeb4fd098d224..687908043a8d3 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
@@ -103,9 +103,9 @@ TEST(IsExpandedFromMacro, ShouldMatchFromCommandLine) {
   StringRef input = R"cc(
     void Test() { FOUR_PLUS_FOUR; }
   )cc";
-  EXPECT_TRUE(matchesConditionally(input,
-                                   binaryOperator(isExpandedFromMacro("FOUR_PLUS_FOUR")),
-                                   true, {"-std=c++11", "-DFOUR_PLUS_FOUR=4+4"}));
+  EXPECT_TRUE(matchesConditionally(
+      input, binaryOperator(isExpandedFromMacro("FOUR_PLUS_FOUR")), true,
+      {"-std=c++11", "-DFOUR_PLUS_FOUR=4+4"}));
 }
 
 TEST(IsExpandedFromMacro, ShouldNotMatchBeginOnly) {
@@ -143,31 +143,31 @@ TEST(IsExpandedFromMacro, ShouldNotMatchDifferentInstances) {
 }
 
 TEST(AllOf, AllOverloadsWork) {
-  const char Program[] =
-      "struct T { };"
-      "int f(int, T*, int, int);"
-      "void g(int x) { T t; f(x, &t, 3, 4); }";
-  EXPECT_TRUE(matches(Program,
-      callExpr(allOf(callee(functionDecl(hasName("f"))),
-                     hasArgument(0, declRefExpr(to(varDecl())))))));
-  EXPECT_TRUE(matches(Program,
-      callExpr(allOf(callee(functionDecl(hasName("f"))),
-                     hasArgument(0, declRefExpr(to(varDecl()))),
-                     hasArgument(1, hasType(pointsTo(
-                                        recordDecl(hasName("T")))))))));
-  EXPECT_TRUE(matches(Program,
-      callExpr(allOf(callee(functionDecl(hasName("f"))),
-                     hasArgument(0, declRefExpr(to(varDecl()))),
-                     hasArgument(1, hasType(pointsTo(
-                                        recordDecl(hasName("T"))))),
-                     hasArgument(2, integerLiteral(equals(3)))))));
-  EXPECT_TRUE(matches(Program,
-      callExpr(allOf(callee(functionDecl(hasName("f"))),
-                     hasArgument(0, declRefExpr(to(varDecl()))),
-                     hasArgument(1, hasType(pointsTo(
-                                        recordDecl(hasName("T"))))),
-                     hasArgument(2, integerLiteral(equals(3))),
-                     hasArgument(3, integerLiteral(equals(4)))))));
+  const char Program[] = "struct T { };"
+                         "int f(int, T*, int, int);"
+                         "void g(int x) { T t; f(x, &t, 3, 4); }";
+  EXPECT_TRUE(matches(
+      Program, callExpr(allOf(callee(functionDecl(hasName("f"))),
+                              hasArgument(0, declRefExpr(to(varDecl())))))));
+  EXPECT_TRUE(matches(
+      Program,
+      callExpr(
+          allOf(callee(functionDecl(hasName("f"))),
+                hasArgument(0, declRefExpr(to(varDecl()))),
+                hasArgument(1, hasType(pointsTo(recordDecl(hasName("T")))))))));
+  EXPECT_TRUE(matches(
+      Program, callExpr(allOf(
+                   callee(functionDecl(hasName("f"))),
+                   hasArgument(0, declRefExpr(to(varDecl()))),
+                   hasArgument(1, hasType(pointsTo(recordDecl(hasName("T"))))),
+                   hasArgument(2, integerLiteral(equals(3)))))));
+  EXPECT_TRUE(matches(
+      Program, callExpr(allOf(
+                   callee(functionDecl(hasName("f"))),
+                   hasArgument(0, declRefExpr(to(varDecl()))),
+                   hasArgument(1, hasType(pointsTo(recordDecl(hasName("T"))))),
+                   hasArgument(2, integerLiteral(equals(3))),
+                   hasArgument(3, integerLiteral(equals(4)))))));
 }
 
 TEST(DeclarationMatcher, MatchHas) {
@@ -176,127 +176,103 @@ TEST(DeclarationMatcher, MatchHas) {
   EXPECT_TRUE(matches("class X {};", HasClassX));
 
   DeclarationMatcher YHasClassX =
-    recordDecl(hasName("Y"), has(recordDecl(hasName("X"))));
+      recordDecl(hasName("Y"), has(recordDecl(hasName("X"))));
   EXPECT_TRUE(matches("class Y { class X {}; };", YHasClassX));
   EXPECT_TRUE(notMatches("class X {};", YHasClassX));
-  EXPECT_TRUE(
-    notMatches("class Y { class Z { class X {}; }; };", YHasClassX));
+  EXPECT_TRUE(notMatches("class Y { class Z { class X {}; }; };", YHasClassX));
 }
 
 TEST(DeclarationMatcher, MatchHasRecursiveAllOf) {
   DeclarationMatcher Recursive =
-    recordDecl(
-      has(recordDecl(
-        has(recordDecl(hasName("X"))),
-        has(recordDecl(hasName("Y"))),
-        hasName("Z"))),
-      has(recordDecl(
-        has(recordDecl(hasName("A"))),
-        has(recordDecl(hasName("B"))),
-        hasName("C"))),
-      hasName("F"));
-
-  EXPECT_TRUE(matches(
-    "class F {"
-      "  class Z {"
-      "    class X {};"
-      "    class Y {};"
-      "  };"
-      "  class C {"
-      "    class A {};"
-      "    class B {};"
-      "  };"
-      "};", Recursive));
-
-  EXPECT_TRUE(matches(
-    "class F {"
-      "  class Z {"
-      "    class A {};"
-      "    class X {};"
-      "    class Y {};"
-      "  };"
-      "  class C {"
-      "    class X {};"
-      "    class A {};"
-      "    class B {};"
-      "  };"
-      "};", Recursive));
-
-  EXPECT_TRUE(matches(
-    "class O1 {"
-      "  class O2 {"
-      "    class F {"
-      "      class Z {"
-      "        class A {};"
-      "        class X {};"
-      "        class Y {};"
-      "      };"
-      "      class C {"
-      "        class X {};"
-      "        class A {};"
-      "        class B {};"
-      "      };"
-      "    };"
-      "  };"
-      "};", Recursive));
+      recordDecl(has(recordDecl(has(recordDecl(hasName("X"))),
+                                has(recordDecl(hasName("Y"))), hasName("Z"))),
+                 has(recordDecl(has(recordDecl(hasName("A"))),
+                                has(recordDecl(hasName("B"))), hasName("C"))),
+                 hasName("F"));
+
+  EXPECT_TRUE(matches("class F {"
+                      "  class Z {"
+                      "    class X {};"
+                      "    class Y {};"
+                      "  };"
+                      "  class C {"
+                      "    class A {};"
+                      "    class B {};"
+                      "  };"
+                      "};",
+                      Recursive));
+
+  EXPECT_TRUE(matches("class F {"
+                      "  class Z {"
+                      "    class A {};"
+                      "    class X {};"
+                      "    class Y {};"
+                      "  };"
+                      "  class C {"
+                      "    class X {};"
+                      "    class A {};"
+                      "    class B {};"
+                      "  };"
+                      "};",
+                      Recursive));
+
+  EXPECT_TRUE(matches("class O1 {"
+                      "  class O2 {"
+                      "    class F {"
+                      "      class Z {"
+                      "        class A {};"
+                      "        class X {};"
+                      "        class Y {};"
+                      "      };"
+                      "      class C {"
+                      "        class X {};"
+                      "        class A {};"
+                      "        class B {};"
+                      "      };"
+                      "    };"
+                      "  };"
+                      "};",
+                      Recursive));
 }
 
 TEST(DeclarationMatcher, MatchHasRecursiveAnyOf) {
-  DeclarationMatcher Recursive =
-    recordDecl(
-      anyOf(
-        has(recordDecl(
-          anyOf(
-            has(recordDecl(
-              hasName("X"))),
-            has(recordDecl(
-              hasName("Y"))),
-            hasName("Z")))),
-        has(recordDecl(
-          anyOf(
-            hasName("C"),
-            has(recordDecl(
-              hasName("A"))),
-            has(recordDecl(
-              hasName("B")))))),
-        hasName("F")));
+  DeclarationMatcher Recursive = recordDecl(
+      anyOf(has(recordDecl(anyOf(has(recordDecl(hasName("X"))),
+                                 has(recordDecl(hasName("Y"))), hasName("Z")))),
+            has(recordDecl(anyOf(hasName("C"), has(recordDecl(hasName("A"))),
+                                 has(recordDecl(hasName("B")))))),
+            hasName("F")));
 
   EXPECT_TRUE(matches("class F {};", Recursive));
   EXPECT_TRUE(matches("class Z {};", Recursive));
   EXPECT_TRUE(matches("class C {};", Recursive));
   EXPECT_TRUE(matches("class M { class N { class X {}; }; };", Recursive));
   EXPECT_TRUE(matches("class M { class N { class B {}; }; };", Recursive));
-  EXPECT_TRUE(
-    matches("class O1 { class O2 {"
-              "  class M { class N { class B {}; }; }; "
-              "}; };", Recursive));
+  EXPECT_TRUE(matches("class O1 { class O2 {"
+                      "  class M { class N { class B {}; }; }; "
+                      "}; };",
+                      Recursive));
 }
 
 TEST(DeclarationMatcher, MatchNot) {
   DeclarationMatcher NotClassX =
-    cxxRecordDecl(
-      isDerivedFrom("Y"),
-      unless(hasName("X")));
+      cxxRecordDecl(isDerivedFrom("Y"), unless(hasName("X")));
   EXPECT_TRUE(notMatches("", NotClassX));
   EXPECT_TRUE(notMatches("class Y {};", NotClassX));
   EXPECT_TRUE(matches("class Y {}; class Z : public Y {};", NotClassX));
   EXPECT_TRUE(notMatches("class Y {}; class X : public Y {};", NotClassX));
   EXPECT_TRUE(
-    notMatches("class Y {}; class Z {}; class X : public Y {};",
-               NotClassX));
+      notMatches("class Y {}; class Z {}; class X : public Y {};", NotClassX));
 
   DeclarationMatcher ClassXHasNotClassY =
-    recordDecl(
-      hasName("X"),
-      has(recordDecl(hasName("Z"))),
-      unless(
-        has(recordDecl(hasName("Y")))));
+      recordDecl(hasName("X"), has(recordDecl(hasName("Z"))),
+                 unless(has(recordDecl(hasName("Y")))));
   EXPECT_TRUE(matches("class X { class Z {}; };", ClassXHasNotClassY));
-  EXPECT_TRUE(notMatches("class X { class Y {}; class Z {}; };",
-                         ClassXHasNotClassY));
+  EXPECT_TRUE(
+      notMatches("class X { class Y {}; class Z {}; };", ClassXHasNotClassY));
 
   DeclarationMatcher NamedNotRecord =
-    namedDecl(hasName("Foo"), unless(recordDecl()));
+      namedDecl(hasName("Foo"), unless(recordDecl()));
   EXPECT_TRUE(matches("void Foo(){}", NamedNotRecord));
   EXPECT_TRUE(notMatches("struct Foo {};", NamedNotRecord));
 }
@@ -318,67 +294,61 @@ TEST(CastExpression, HasCastKind) {
 
 TEST(DeclarationMatcher, HasDescendant) {
   DeclarationMatcher ZDescendantClassX =
-    recordDecl(
-      hasDescendant(recordDecl(hasName("X"))),
-      hasName("Z"));
+      recordDecl(hasDescendant(recordDecl(hasName("X"))), hasName("Z"));
   EXPECT_TRUE(matches("class Z { class X {}; };", ZDescendantClassX));
   EXPECT_TRUE(
-    matches("class Z { class Y { class X {}; }; };", ZDescendantClassX));
+      matches("class Z { class Y { class X {}; }; };", ZDescendantClassX));
+  EXPECT_TRUE(matches("class Z { class A { class Y { class X {}; }; }; };",
+                      ZDescendantClassX));
   EXPECT_TRUE(
-    matches("class Z { class A { class Y { class X {}; }; }; };",
-            ZDescendantClassX));
-  EXPECT_TRUE(
-    matches("class Z { class A { class B { class Y { class X {}; }; }; }; };",
-            ZDescendantClassX));
+      matches("class Z { class A { class B { class Y { class X {}; }; }; }; };",
+              ZDescendantClassX));
   EXPECT_TRUE(notMatches("class Z {};", ZDescendantClassX));
 
-  DeclarationMatcher ZDescendantClassXHasClassY =
-    recordDecl(
-      hasDescendant(recordDecl(has(recordDecl(hasName("Y"))),
-                               hasName("X"))),
+  DeclarationMatcher ZDescendantClassXHasClassY = recordDecl(
+      hasDescendant(recordDecl(has(recordDecl(hasName("Y"))), hasName("X"))),
       hasName("Z"));
   EXPECT_TRUE(matches("class Z { class X { class Y {}; }; };",
                       ZDescendantClassXHasClassY));
   EXPECT_TRUE(
-    matches("class Z { class A { class B { class X { class Y {}; }; }; }; };",
-            ZDescendantClassXHasClassY));
-  EXPECT_TRUE(notMatches(
-    "class Z {"
-      "  class A {"
-      "    class B {"
-      "      class X {"
-      "        class C {"
-      "          class Y {};"
-      "        };"
-      "      };"
-      "    }; "
-      "  };"
-      "};", ZDescendantClassXHasClassY));
+      matches("class Z { class A { class B { class X { class Y {}; }; }; }; };",
+              ZDescendantClassXHasClassY));
+  EXPECT_TRUE(notMatches("class Z {"
+                         "  class A {"
+                         "    class B {"
+                         "      class X {"
+                         "        class C {"
+                         "          class Y {};"
+                         "        };"
+                         "      };"
+                         "    }; "
+                         "  };"
+                         "};",
+                         ZDescendantClassXHasClassY));
 
   DeclarationMatcher ZDescendantClassXDescendantClassY =
-    recordDecl(
-      hasDescendant(recordDecl(hasDescendant(recordDecl(hasName("Y"))),
-                               hasName("X"))),
-      hasName("Z"));
-  EXPECT_TRUE(
-    matches("class Z { class A { class X { class B { class Y {}; }; }; }; };",
-            ZDescendantClassXDescendantClassY));
-  EXPECT_TRUE(matches(
-    "class Z {"
-      "  class A {"
-      "    class X {"
-      "      class B {"
-      "        class Y {};"
-      "      };"
-      "      class Y {};"
-      "    };"
-      "  };"
-      "};", ZDescendantClassXDescendantClassY));
+      recordDecl(hasDescendant(recordDecl(
+                     hasDescendant(recordDecl(hasName("Y"))), hasName("X"))),
+                 hasName("Z"));
+  EXPECT_TRUE(
+      matches("class Z { class A { class X { class B { class Y {}; }; }; }; };",
+              ZDescendantClassXDescendantClassY));
+  EXPECT_TRUE(matches("class Z {"
+                      "  class A {"
+                      "    class X {"
+                      "      class B {"
+                      "        class Y {};"
+                      "      };"
+                      "      class Y {};"
+                      "    };"
+                      "  };"
+                      "};",
+                      ZDescendantClassXDescendantClassY));
 }
 
 TEST(DeclarationMatcher, HasDescendantMemoization) {
   DeclarationMatcher CannotMemoize =
-    decl(hasDescendant(typeLoc().bind("x")), has(decl()));
+      decl(hasDescendant(typeLoc().bind("x")), has(decl()));
   EXPECT_TRUE(matches("void f() { int i; }", CannotMemoize));
 }
 
@@ -401,39 +371,36 @@ TEST(DeclarationMatcher, HasAncestorMemoization) {
   // That node can't be memoized so we have to check for it before trying to put
   // it on the cache.
   DeclarationMatcher CannotMemoize = classTemplateSpecializationDecl(
-    hasAnyTemplateArgument(templateArgument().bind("targ")),
-    forEach(fieldDecl(hasAncestor(forStmt()))));
+      hasAnyTemplateArgument(templateArgument().bind("targ")),
+      forEach(fieldDecl(hasAncestor(forStmt()))));
 
   EXPECT_TRUE(notMatches("template <typename T> struct S;"
-                           "template <> struct S<int>{ int i; int j; };",
+                         "template <> struct S<int>{ int i; int j; };",
                          CannotMemoize));
 }
 
 TEST(DeclarationMatcher, HasAttr) {
   EXPECT_TRUE(matches("struct __attribute__((warn_unused)) X {};",
                       decl(hasAttr(clang::attr::WarnUnused))));
-  EXPECT_FALSE(matches("struct X {};",
-                       decl(hasAttr(clang::attr::WarnUnused))));
+  EXPECT_FALSE(matches("struct X {};", decl(hasAttr(clang::attr::WarnUnused))));
 }
 
-
 TEST(DeclarationMatcher, MatchAnyOf) {
   DeclarationMatcher YOrZDerivedFromX = cxxRecordDecl(
-    anyOf(hasName("Y"), allOf(isDerivedFrom("X"), hasName("Z"))));
+      anyOf(hasName("Y"), allOf(isDerivedFrom("X"), hasName("Z"))));
   EXPECT_TRUE(matches("class X {}; class Z : public X {};", YOrZDerivedFromX));
   EXPECT_TRUE(matches("class Y {};", YOrZDerivedFromX));
   EXPECT_TRUE(
-    notMatches("class X {}; class W : public X {};", YOrZDerivedFromX));
+      notMatches("class X {}; class W : public X {};", YOrZDerivedFromX));
   EXPECT_TRUE(notMatches("class Z {};", YOrZDerivedFromX));
 
   DeclarationMatcher XOrYOrZOrU =
-    recordDecl(anyOf(hasName("X"), hasName("Y"), hasName("Z"), hasName("U")));
+      recordDecl(anyOf(hasName("X"), hasName("Y"), hasName("Z"), hasName("U")));
   EXPECT_TRUE(matches("class X {};", XOrYOrZOrU));
   EXPECT_TRUE(notMatches("class V {};", XOrYOrZOrU));
 
-  DeclarationMatcher XOrYOrZOrUOrV =
-    recordDecl(anyOf(hasName("X"), hasName("Y"), hasName("Z"), hasName("U"),
-                     hasName("V")));
+  DeclarationMatcher XOrYOrZOrUOrV = recordDecl(anyOf(
+      hasName("X"), hasName("Y"), hasName("Z"), hasName("U"), hasName("V")));
   EXPECT_TRUE(matches("class X {};", XOrYOrZOrUOrV));
   EXPECT_TRUE(matches("class Y {};", XOrYOrZOrUOrV));
   EXPECT_TRUE(matches("class Z {};", XOrYOrZOrUOrV));
@@ -447,8 +414,8 @@ TEST(DeclarationMatcher, MatchAnyOf) {
   EXPECT_TRUE(notMatches("int F() { return 1; }", MixedTypes));
 
   EXPECT_TRUE(
-    matches("void f() try { } catch (int) { } catch (...) { }",
-            cxxCatchStmt(anyOf(hasDescendant(varDecl()), isCatchAll()))));
+      matches("void f() try { } catch (int) { } catch (...) { }",
+              cxxCatchStmt(anyOf(hasDescendant(varDecl()), isCatchAll()))));
 }
 
 TEST(DeclarationMatcher, ClassIsDerived) {
@@ -460,19 +427,17 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   EXPECT_TRUE(notMatches("class Y;", IsDerivedFromX));
   EXPECT_TRUE(notMatches("", IsDerivedFromX));
   EXPECT_TRUE(matches("class X {}; template<int N> class Y : Y<N-1>, X {};",
-    IsDerivedFromX));
+                      IsDerivedFromX));
   EXPECT_TRUE(matches("class X {}; template<int N> class Y : X, Y<N-1> {};",
-    IsDerivedFromX));
+                      IsDerivedFromX));
 
-  DeclarationMatcher IsZDerivedFromX = cxxRecordDecl(hasName("Z"),
-    isDerivedFrom("X"));
-  EXPECT_TRUE(
-    matches(
-      "class X {};"
-      "template<int N> class Y : Y<N-1> {};"
-      "template<> class Y<0> : X {};"
-      "class Z : Y<1> {};",
-      IsZDerivedFromX));
+  DeclarationMatcher IsZDerivedFromX =
+      cxxRecordDecl(hasName("Z"), isDerivedFrom("X"));
+  EXPECT_TRUE(matches("class X {};"
+                      "template<int N> class Y : Y<N-1> {};"
+                      "template<> class Y<0> : X {};"
+                      "class Z : Y<1> {};",
+                      IsZDerivedFromX));
 
   DeclarationMatcher IsDirectlyDerivedFromX =
       cxxRecordDecl(isDirectlyDerivedFrom("X"));
@@ -493,145 +458,138 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   EXPECT_TRUE(notMatches("", IsAX));
 
   DeclarationMatcher ZIsDerivedFromX =
-    cxxRecordDecl(hasName("Z"), isDerivedFrom("X"));
+      cxxRecordDecl(hasName("Z"), isDerivedFrom("X"));
   DeclarationMatcher ZIsDirectlyDerivedFromX =
       cxxRecordDecl(hasName("Z"), isDirectlyDerivedFrom("X"));
   EXPECT_TRUE(
-    matches("class X {}; class Y : public X {}; class Z : public Y {};",
-            ZIsDerivedFromX));
+      matches("class X {}; class Y : public X {}; class Z : public Y {};",
+              ZIsDerivedFromX));
   EXPECT_TRUE(
       notMatches("class X {}; class Y : public X {}; class Z : public Y {};",
                  ZIsDirectlyDerivedFromX));
-  EXPECT_TRUE(
-    matches("class X {};"
-              "template<class T> class Y : public X {};"
-              "class Z : public Y<int> {};", ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {};"
+                      "template<class T> class Y : public X {};"
+                      "class Z : public Y<int> {};",
+                      ZIsDerivedFromX));
   EXPECT_TRUE(notMatches("class X {};"
                          "template<class T> class Y : public X {};"
                          "class Z : public Y<int> {};",
                          ZIsDirectlyDerivedFromX));
   EXPECT_TRUE(matches("class X {}; template<class T> class Z : public X {};",
                       ZIsDerivedFromX));
+  EXPECT_TRUE(matches("template<class T> class X {}; "
+                      "template<class T> class Z : public X<T> {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("template<class T, class U=T> class X {}; "
+                      "template<class T> class Z : public X<T> {};",
+                      ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("template<class T> class X {}; "
-              "template<class T> class Z : public X<T> {};",
-            ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("template<class T, class U=T> class X {}; "
-              "template<class T> class Z : public X<T> {};",
-            ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template<class X> class A { class Z : public X {}; };",
-               ZIsDerivedFromX));
+      notMatches("template<class X> class A { class Z : public X {}; };",
+                 ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("template<class X> class A { public: class Z : public X {}; }; "
-              "class X{}; void y() { A<X>::Z z; }", ZIsDerivedFromX));
+      matches("template<class X> class A { public: class Z : public X {}; }; "
+              "class X{}; void y() { A<X>::Z z; }",
+              ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("template <class T> class X {}; "
+      matches("template <class T> class X {}; "
               "template<class Y> class A { class Z : public X<Y> {}; };",
-            ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template<template<class T> class X> class A { "
-                 "  class Z : public X<int> {}; };", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("template<template<class T> class X> class A { "
-              "  public: class Z : public X<int> {}; }; "
-              "template<class T> class X {}; void y() { A<X>::Z z; }",
-            ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template<class X> class A { class Z : public X::D {}; };",
-               ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("template<class X> class A { public: "
-              "  class Z : public X::D {}; }; "
-              "class Y { public: class X {}; typedef X D; }; "
-              "void y() { A<Y>::Z z; }", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class X {}; typedef X Y; class Z : public Y {};",
-            ZIsDerivedFromX));
+              ZIsDerivedFromX));
+  EXPECT_TRUE(notMatches("template<template<class T> class X> class A { "
+                         "  class Z : public X<int> {}; };",
+                         ZIsDerivedFromX));
+  EXPECT_TRUE(matches("template<template<class T> class X> class A { "
+                      "  public: class Z : public X<int> {}; }; "
+                      "template<class T> class X {}; void y() { A<X>::Z z; }",
+                      ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("template<class T> class Y { typedef typename T::U X; "
-              "  class Z : public X {}; };", ZIsDerivedFromX));
-  EXPECT_TRUE(matches("class X {}; class Z : public ::X {};",
+      notMatches("template<class X> class A { class Z : public X::D {}; };",
+                 ZIsDerivedFromX));
+  EXPECT_TRUE(matches("template<class X> class A { public: "
+                      "  class Z : public X::D {}; }; "
+                      "class Y { public: class X {}; typedef X D; }; "
+                      "void y() { A<Y>::Z z; }",
                       ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {}; typedef X Y; class Z : public Y {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("template<class T> class Y { typedef typename T::U X; "
+                      "  class Z : public X {}; };",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {}; class Z : public ::X {};", ZIsDerivedFromX));
   EXPECT_TRUE(
-    notMatches("template<class T> class X {}; "
+      notMatches("template<class T> class X {}; "
                  "template<class T> class A { class Z : public X<T>::D {}; };",
-               ZIsDerivedFromX));
+                 ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("template<class T> class X { public: typedef X<T> D; }; "
+      matches("template<class T> class X { public: typedef X<T> D; }; "
               "template<class T> class A { public: "
               "  class Z : public X<T>::D {}; }; void y() { A<int>::Z z; }",
-            ZIsDerivedFromX));
+              ZIsDerivedFromX));
   EXPECT_TRUE(
-    notMatches("template<class X> class A { class Z : public X::D::E {}; };",
-               ZIsDerivedFromX));
+      notMatches("template<class X> class A { class Z : public X::D::E {}; };",
+                 ZIsDerivedFromX));
   EXPECT_TRUE(
-    matches("class X {}; typedef X V; typedef V W; class Z : public W {};",
-            ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class X {}; class Y : public X {}; "
-              "typedef Y V; typedef V W; class Z : public W {};",
-            ZIsDerivedFromX));
+      matches("class X {}; typedef X V; typedef V W; class Z : public W {};",
+              ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {}; class Y : public X {}; "
+                      "typedef Y V; typedef V W; class Z : public W {};",
+                      ZIsDerivedFromX));
   EXPECT_TRUE(notMatches("class X {}; class Y : public X {}; "
                          "typedef Y V; typedef V W; class Z : public W {};",
                          ZIsDirectlyDerivedFromX));
   EXPECT_TRUE(
-    matches("template<class T, class U> class X {}; "
+      matches("template<class T, class U> class X {}; "
               "template<class T> class A { class Z : public X<T, int> {}; };",
-            ZIsDerivedFromX));
+              ZIsDerivedFromX));
   EXPECT_TRUE(
-    notMatches("template<class X> class D { typedef X A; typedef A B; "
+      notMatches("template<class X> class D { typedef X A; typedef A B; "
                  "  typedef B C; class Z : public C {}; };",
-               ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class X {}; typedef X A; typedef A B; "
-              "class Z : public B {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class X {}; typedef X A; typedef A B; typedef B C; "
-              "class Z : public C {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class U {}; typedef U X; typedef X V; "
-              "class Z : public V {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class Base {}; typedef Base X; "
-              "class Z : public Base {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class Base {}; typedef Base Base2; typedef Base2 X; "
-              "class Z : public Base {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("class Base {}; class Base2 {}; typedef Base2 X; "
-                 "class Z : public Base {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    matches("class A {}; typedef A X; typedef A Y; "
-              "class Z : public Y {};", ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template <typename T> class Z;"
-                 "template <> class Z<void> {};"
-                 "template <typename T> class Z : public Z<void> {};",
-               IsDerivedFromX));
-  EXPECT_TRUE(
-    matches("template <typename T> class X;"
-              "template <> class X<void> {};"
-              "template <typename T> class X : public X<void> {};",
-            IsDerivedFromX));
-  EXPECT_TRUE(matches(
-    "class X {};"
-      "template <typename T> class Z;"
-      "template <> class Z<void> {};"
-      "template <typename T> class Z : public Z<void>, public X {};",
-    ZIsDerivedFromX));
-  EXPECT_TRUE(
-    notMatches("template<int> struct X;"
+                 ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {}; typedef X A; typedef A B; "
+                      "class Z : public B {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class X {}; typedef X A; typedef A B; typedef B C; "
+                      "class Z : public C {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class U {}; typedef U X; typedef X V; "
+                      "class Z : public V {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class Base {}; typedef Base X; "
+                      "class Z : public Base {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class Base {}; typedef Base Base2; typedef Base2 X; "
+                      "class Z : public Base {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(notMatches("class Base {}; class Base2 {}; typedef Base2 X; "
+                         "class Z : public Base {};",
+                         ZIsDerivedFromX));
+  EXPECT_TRUE(matches("class A {}; typedef A X; typedef A Y; "
+                      "class Z : public Y {};",
+                      ZIsDerivedFromX));
+  EXPECT_TRUE(notMatches("template <typename T> class Z;"
+                         "template <> class Z<void> {};"
+                         "template <typename T> class Z : public Z<void> {};",
+                         IsDerivedFromX));
+  EXPECT_TRUE(matches("template <typename T> class X;"
+                      "template <> class X<void> {};"
+                      "template <typename T> class X : public X<void> {};",
+                      IsDerivedFromX));
+  EXPECT_TRUE(
+      matches("class X {};"
+              "template <typename T> class Z;"
+              "template <> class Z<void> {};"
+              "template <typename T> class Z : public Z<void>, public X {};",
+              ZIsDerivedFromX));
+  EXPECT_TRUE(
+      notMatches("template<int> struct X;"
                  "template<int i> struct X : public X<i-1> {};",
-               cxxRecordDecl(isDerivedFrom(recordDecl(hasName("Some"))))));
+                 cxxRecordDecl(isDerivedFrom(recordDecl(hasName("Some"))))));
   EXPECT_TRUE(matches(
-    "struct A {};"
+      "struct A {};"
       "template<int> struct X;"
       "template<int i> struct X : public X<i-1> {};"
       "template<> struct X<0> : public A {};"
       "struct B : public X<42> {};",
-    cxxRecordDecl(hasName("B"), isDerivedFrom(recordDecl(hasName("A"))))));
+      cxxRecordDecl(hasName("B"), isDerivedFrom(recordDecl(hasName("A"))))));
   EXPECT_TRUE(notMatches(
       "struct A {};"
       "template<int> struct X;"
@@ -645,7 +603,7 @@ TEST(DeclarationMatcher, ClassIsDerived) {
   // get rid of the Variable(...) matching and match the right template
   // declarations directly.
   const char *RecursiveTemplateOneParameter =
-    "class Base1 {}; class Base2 {};"
+      "class Base1 {}; class Base2 {};"
       "template <typename T> class Z;"
       "template <> class Z<void> : public Base1 {};"
       "template <> class Z<int> : public Base2 {};"
@@ -654,21 +612,21 @@ TEST(DeclarationMatcher, ClassIsDerived) {
       "template <typename T> class Z : public Z<float>, public Z<double> {};"
       "void f() { Z<float> z_float; Z<double> z_double; Z<char> z_char; }";
   EXPECT_TRUE(matches(
-    RecursiveTemplateOneParameter,
-    varDecl(hasName("z_float"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1")))))));
+      RecursiveTemplateOneParameter,
+      varDecl(hasName("z_float"),
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1")))))));
   EXPECT_TRUE(notMatches(
-    RecursiveTemplateOneParameter,
-    varDecl(hasName("z_float"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base2")))))));
-  EXPECT_TRUE(matches(
-    RecursiveTemplateOneParameter,
-    varDecl(hasName("z_char"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1"),
-                                                 isDerivedFrom("Base2")))))));
+      RecursiveTemplateOneParameter,
+      varDecl(hasName("z_float"),
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base2")))))));
+  EXPECT_TRUE(
+      matches(RecursiveTemplateOneParameter,
+              varDecl(hasName("z_char"),
+                      hasInitializer(hasType(cxxRecordDecl(
+                          isDerivedFrom("Base1"), isDerivedFrom("Base2")))))));
 
   const char *RecursiveTemplateTwoParameters =
-    "class Base1 {}; class Base2 {};"
+      "class Base1 {}; class Base2 {};"
       "template <typename T1, typename T2> class Z;"
       "template <typename T> class Z<void, T> : public Base1 {};"
       "template <typename T> class Z<int, T> : public Base2 {};"
@@ -679,34 +637,31 @@ TEST(DeclarationMatcher, ClassIsDerived) {
       "void f() { Z<float, void> z_float; Z<double, void> z_double; "
       "           Z<char, void> z_char; }";
   EXPECT_TRUE(matches(
-    RecursiveTemplateTwoParameters,
-    varDecl(hasName("z_float"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1")))))));
-  EXPECT_TRUE(notMatches(
-    RecursiveTemplateTwoParameters,
-    varDecl(hasName("z_float"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base2")))))));
-  EXPECT_TRUE(matches(
-    RecursiveTemplateTwoParameters,
-    varDecl(hasName("z_char"),
-            hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1"),
-                                                 isDerivedFrom("Base2")))))));
-  EXPECT_TRUE(matches(
-    "namespace ns { class X {}; class Y : public X {}; }",
-    cxxRecordDecl(isDerivedFrom("::ns::X"))));
+      RecursiveTemplateTwoParameters,
+      varDecl(hasName("z_float"),
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base1")))))));
   EXPECT_TRUE(notMatches(
-    "class X {}; class Y : public X {};",
-    cxxRecordDecl(isDerivedFrom("::ns::X"))));
+      RecursiveTemplateTwoParameters,
+      varDecl(hasName("z_float"),
+              hasInitializer(hasType(cxxRecordDecl(isDerivedFrom("Base2")))))));
+  EXPECT_TRUE(
+      matches(RecursiveTemplateTwoParameters,
+              varDecl(hasName("z_char"),
+                      hasInitializer(hasType(cxxRecordDecl(
+                          isDerivedFrom("Base1"), isDerivedFrom("Base2")))))));
+  EXPECT_TRUE(matches("namespace ns { class X {}; class Y : public X {}; }",
+                      cxxRecordDecl(isDerivedFrom("::ns::X"))));
+  EXPECT_TRUE(notMatches("class X {}; class Y : public X {};",
+                         cxxRecordDecl(isDerivedFrom("::ns::X"))));
 
   EXPECT_TRUE(matches(
-    "class X {}; class Y : public X {};",
-    cxxRecordDecl(isDerivedFrom(recordDecl(hasName("X")).bind("test")))));
+      "class X {}; class Y : public X {};",
+      cxxRecordDecl(isDerivedFrom(recordDecl(hasName("X")).bind("test")))));
 
-  EXPECT_TRUE(matches(
-    "template<typename T> class X {};"
-      "template<typename T> using Z = X<T>;"
-      "template <typename T> class Y : Z<T> {};",
-    cxxRecordDecl(isDerivedFrom(namedDecl(hasName("X"))))));
+  EXPECT_TRUE(matches("template<typename T> class X {};"
+                      "template<typename T> using Z = X<T>;"
+                      "template <typename T> class Y : Z<T> {};",
+                      cxxRecordDecl(isDerivedFrom(namedDecl(hasName("X"))))));
 }
 
 TEST(DeclarationMatcher, IsDerivedFromEmptyName) {
@@ -737,24 +692,24 @@ TEST(DeclarationMatcher, ObjCClassIsDerived) {
 
   DeclarationMatcher IsDirectlyDerivedFromX =
       objcInterfaceDecl(isDirectlyDerivedFrom("X"));
-  EXPECT_TRUE(
-      matchesObjC("@interface X @end @interface Y : X @end", IsDirectlyDerivedFromX));
+  EXPECT_TRUE(matchesObjC("@interface X @end @interface Y : X @end",
+                          IsDirectlyDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface X @end @interface Y<__covariant ObjectType> : X @end",
       IsDirectlyDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface X @end @compatibility_alias Y X; @interface Z : Y @end",
       IsDirectlyDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface X @end typedef X Y; @interface Z : Y @end",
-      IsDirectlyDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface X @end typedef X Y; @interface Z : Y @end",
+                  IsDirectlyDerivedFromX));
   EXPECT_TRUE(notMatchesObjC("@interface X @end", IsDirectlyDerivedFromX));
   EXPECT_TRUE(notMatchesObjC("@class X;", IsDirectlyDerivedFromX));
   EXPECT_TRUE(notMatchesObjC("@class Y;", IsDirectlyDerivedFromX));
   EXPECT_TRUE(notMatchesObjC("@interface X @end @compatibility_alias Y X;",
                              IsDirectlyDerivedFromX));
-  EXPECT_TRUE(notMatchesObjC("@interface X @end typedef X Y;",
-                             IsDirectlyDerivedFromX));
+  EXPECT_TRUE(
+      notMatchesObjC("@interface X @end typedef X Y;", IsDirectlyDerivedFromX));
 
   DeclarationMatcher IsAX = objcInterfaceDecl(isSameOrDerivedFrom("X"));
   EXPECT_TRUE(matchesObjC("@interface X @end @interface Y : X @end", IsAX));
@@ -775,9 +730,9 @@ TEST(DeclarationMatcher, ObjCClassIsDerived) {
                           ZIsDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface X @end typedef X Y; @interface Z : Y @end", ZIsDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface X @end typedef X Y; @interface Z : Y @end",
-      ZIsDirectlyDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface X @end typedef X Y; @interface Z : Y @end",
+                  ZIsDirectlyDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface A @end typedef A X; typedef A Y; @interface Z : Y @end",
       ZIsDerivedFromX));
@@ -798,27 +753,33 @@ TEST(DeclarationMatcher, ObjCClassIsDerived) {
       ZIsDirectlyDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface A @end @compatibility_alias X A; @compatibility_alias Y A;"
-      "@interface Z : Y @end", ZIsDerivedFromX));
+      "@interface Z : Y @end",
+      ZIsDerivedFromX));
   EXPECT_TRUE(matchesObjC(
       "@interface A @end @compatibility_alias X A; @compatibility_alias Y A;"
-      "@interface Z : Y @end", ZIsDirectlyDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface Y @end typedef Y X; @interface Z : X @end", ZIsDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface Y @end typedef Y X; @interface Z : X @end",
+      "@interface Z : Y @end",
       ZIsDirectlyDerivedFromX));
   EXPECT_TRUE(matchesObjC(
-      "@interface A @end @compatibility_alias Y A; typedef Y X;"
-      "@interface Z : A @end", ZIsDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface A @end @compatibility_alias Y A; typedef Y X;"
-      "@interface Z : A @end", ZIsDirectlyDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface A @end typedef A Y; @compatibility_alias X Y;"
-      "@interface Z : A @end", ZIsDerivedFromX));
-  EXPECT_TRUE(matchesObjC(
-      "@interface A @end typedef A Y; @compatibility_alias X Y;"
-      "@interface Z : A @end", ZIsDirectlyDerivedFromX));
+      "@interface Y @end typedef Y X; @interface Z : X @end", ZIsDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface Y @end typedef Y X; @interface Z : X @end",
+                  ZIsDirectlyDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface A @end @compatibility_alias Y A; typedef Y X;"
+                  "@interface Z : A @end",
+                  ZIsDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface A @end @compatibility_alias Y A; typedef Y X;"
+                  "@interface Z : A @end",
+                  ZIsDirectlyDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface A @end typedef A Y; @compatibility_alias X Y;"
+                  "@interface Z : A @end",
+                  ZIsDerivedFromX));
+  EXPECT_TRUE(
+      matchesObjC("@interface A @end typedef A Y; @compatibility_alias X Y;"
+                  "@interface Z : A @end",
+                  ZIsDirectlyDerivedFromX));
 }
 
 TEST(DeclarationMatcher, IsLambda) {
@@ -830,42 +791,41 @@ TEST(DeclarationMatcher, IsLambda) {
 TEST(Matcher, BindMatchedNodes) {
   DeclarationMatcher ClassX = has(recordDecl(hasName("::X")).bind("x"));
 
-  EXPECT_TRUE(matchAndVerifyResultTrue("class X {};",
-                                       ClassX, std::make_unique<VerifyIdIsBoundTo<CXXRecordDecl>>("x")));
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      "class X {};", ClassX,
+      std::make_unique<VerifyIdIsBoundTo<CXXRecordDecl>>("x")));
 
-  EXPECT_TRUE(matchAndVerifyResultFalse("class X {};",
-                                        ClassX, std::make_unique<VerifyIdIsBoundTo<CXXRecordDecl>>("other-id")));
+  EXPECT_TRUE(matchAndVerifyResultFalse(
+      "class X {};", ClassX,
+      std::make_unique<VerifyIdIsBoundTo<CXXRecordDecl>>("other-id")));
 
   TypeMatcher TypeAHasClassB = hasDeclaration(
-    recordDecl(hasName("A"), has(recordDecl(hasName("B")).bind("b"))));
+      recordDecl(hasName("A"), has(recordDecl(hasName("B")).bind("b"))));
 
-  EXPECT_TRUE(matchAndVerifyResultTrue("class A { public: A *a; class B {}; };",
-                                       TypeAHasClassB,
-                                       std::make_unique<VerifyIdIsBoundTo<Decl>>("b")));
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      "class A { public: A *a; class B {}; };", TypeAHasClassB,
+      std::make_unique<VerifyIdIsBoundTo<Decl>>("b")));
 
   StatementMatcher MethodX =
-    callExpr(callee(cxxMethodDecl(hasName("x")))).bind("x");
+      callExpr(callee(cxxMethodDecl(hasName("x")))).bind("x");
 
-  EXPECT_TRUE(matchAndVerifyResultTrue("class A { void x() { x(); } };",
-                                       MethodX,
-                                       std::make_unique<VerifyIdIsBoundTo<CXXMemberCallExpr>>("x")));
+  EXPECT_TRUE(matchAndVerifyResultTrue(
+      "class A { void x() { x(); } };", MethodX,
+      std::make_unique<VerifyIdIsBoundTo<CXXMemberCallExpr>>("x")));
 }
 
 TEST(Matcher, BindTheSameNameInAlternatives) {
   StatementMatcher matcher = anyOf(
-    binaryOperator(hasOperatorName("+"),
-                   hasLHS(expr().bind("x")),
-                   hasRHS(integerLiteral(equals(0)))),
-    binaryOperator(hasOperatorName("+"),
-                   hasLHS(integerLiteral(equals(0))),
-                   hasRHS(expr().bind("x"))));
+      binaryOperator(hasOperatorName("+"), hasLHS(expr().bind("x")),
+                     hasRHS(integerLiteral(equals(0)))),
+      binaryOperator(hasOperatorName("+"), hasLHS(integerLiteral(equals(0))),
+                     hasRHS(expr().bind("x"))));
 
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    // The first branch of the matcher binds x to 0 but then fails.
-    // The second branch binds x to f() and succeeds.
-    "int f() { return 0 + f(); }",
-    matcher,
-    std::make_unique<VerifyIdIsBoundTo<CallExpr>>("x")));
+      // The first branch of the matcher binds x to 0 but then fails.
+      // The second branch binds x to f() and succeeds.
+      "int f() { return 0 + f(); }", matcher,
+      std::make_unique<VerifyIdIsBoundTo<CallExpr>>("x")));
 }
 
 TEST(Matcher, BindsIDForMemoizedResults) {
@@ -873,48 +833,48 @@ TEST(Matcher, BindsIDForMemoizedResults) {
   // kick in.
   DeclarationMatcher ClassX = recordDecl(hasName("X")).bind("x");
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "class A { class B { class X {}; }; };",
-    DeclarationMatcher(anyOf(
-      recordDecl(hasName("A"), hasDescendant(ClassX)),
-      recordDecl(hasName("B"), hasDescendant(ClassX)))),
-    std::make_unique<VerifyIdIsBoundTo<Decl>>("x", 2)));
+      "class A { class B { class X {}; }; };",
+      DeclarationMatcher(
+          anyOf(recordDecl(hasName("A"), hasDescendant(ClassX)),
+                recordDecl(hasName("B"), hasDescendant(ClassX)))),
+      std::make_unique<VerifyIdIsBoundTo<Decl>>("x", 2)));
 }
 
 TEST(HasType, MatchesAsString) {
   EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z() {Y* y; y->x(); }",
-            cxxMemberCallExpr(on(hasType(asString("class Y *"))))));
+      matches("class Y { public: void x(); }; void z() {Y* y; y->x(); }",
+              cxxMemberCallExpr(on(hasType(asString("class Y *"))))));
   EXPECT_TRUE(
-    matches("class X { void x(int x) {} };",
-            cxxMethodDecl(hasParameter(0, hasType(asString("int"))))));
+      matches("class X { void x(int x) {} };",
+              cxxMethodDecl(hasParameter(0, hasType(asString("int"))))));
   EXPECT_TRUE(matches("namespace ns { struct A {}; }  struct B { ns::A a; };",
                       fieldDecl(hasType(asString("ns::A")))));
-  EXPECT_TRUE(matches("namespace { struct A {}; }  struct B { A a; };",
-                      fieldDecl(hasType(asString("struct (anonymous namespace)::A")))));
+  EXPECT_TRUE(
+      matches("namespace { struct A {}; }  struct B { A a; };",
+              fieldDecl(hasType(asString("struct (anonymous namespace)::A")))));
 }
 
 TEST(Matcher, HasOperatorNameForOverloadedOperatorCall) {
   StatementMatcher OpCallAndAnd =
-    cxxOperatorCallExpr(hasOverloadedOperatorName("&&"));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("&&"));
   EXPECT_TRUE(matches("class Y { }; "
-                        "bool operator&&(Y x, Y y) { return true; }; "
-                        "Y a; Y b; bool c = a && b;", OpCallAndAnd));
+                      "bool operator&&(Y x, Y y) { return true; }; "
+                      "Y a; Y b; bool c = a && b;",
+                      OpCallAndAnd));
   StatementMatcher OpCallLessLess =
-    cxxOperatorCallExpr(hasOverloadedOperatorName("<<"));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("<<"));
   EXPECT_TRUE(notMatches("class Y { }; "
-                           "bool operator&&(Y x, Y y) { return true; }; "
-                           "Y a; Y b; bool c = a && b;",
+                         "bool operator&&(Y x, Y y) { return true; }; "
+                         "Y a; Y b; bool c = a && b;",
                          OpCallLessLess));
   StatementMatcher OpStarCall =
-    cxxOperatorCallExpr(hasOverloadedOperatorName("*"));
-  EXPECT_TRUE(matches("class Y; int operator*(Y &); void f(Y &y) { *y; }",
-                      OpStarCall));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("*"));
+  EXPECT_TRUE(
+      matches("class Y; int operator*(Y &); void f(Y &y) { *y; }", OpStarCall));
   DeclarationMatcher ClassWithOpStar =
-    cxxRecordDecl(hasMethod(hasOverloadedOperatorName("*")));
-  EXPECT_TRUE(matches("class Y { int operator*(); };",
-                      ClassWithOpStar));
-  EXPECT_TRUE(notMatches("class Y { void myOperator(); };",
-                         ClassWithOpStar)) ;
+      cxxRecordDecl(hasMethod(hasOverloadedOperatorName("*")));
+  EXPECT_TRUE(matches("class Y { int operator*(); };", ClassWithOpStar));
+  EXPECT_TRUE(notMatches("class Y { void myOperator(); };", ClassWithOpStar));
   DeclarationMatcher AnyOpStar = functionDecl(hasOverloadedOperatorName("*"));
   EXPECT_TRUE(matches("class Y; int operator*(Y &);", AnyOpStar));
   EXPECT_TRUE(matches("class Y { int operator*(); };", AnyOpStar));
@@ -926,23 +886,22 @@ TEST(Matcher, HasOperatorNameForOverloadedOperatorCall) {
   EXPECT_TRUE(matches("class Y { Y operator&&(Y &); };", AnyAndOp));
 }
 
-
 TEST(Matcher, NestedOverloadedOperatorCalls) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "class Y { }; "
+      "class Y { }; "
       "Y& operator&&(Y& x, Y& y) { return x; }; "
       "Y a; Y b; Y c; Y d = a && b && c;",
-    cxxOperatorCallExpr(hasOverloadedOperatorName("&&")).bind("x"),
-    std::make_unique<VerifyIdIsBoundTo<CXXOperatorCallExpr>>("x", 2)));
+      cxxOperatorCallExpr(hasOverloadedOperatorName("&&")).bind("x"),
+      std::make_unique<VerifyIdIsBoundTo<CXXOperatorCallExpr>>("x", 2)));
   EXPECT_TRUE(matches("class Y { }; "
-                        "Y& operator&&(Y& x, Y& y) { return x; }; "
-                        "Y a; Y b; Y c; Y d = a && b && c;",
+                      "Y& operator&&(Y& x, Y& y) { return x; }; "
+                      "Y a; Y b; Y c; Y d = a && b && c;",
                       cxxOperatorCallExpr(hasParent(cxxOperatorCallExpr()))));
   EXPECT_TRUE(
-    matches("class Y { }; "
+      matches("class Y { }; "
               "Y& operator&&(Y& x, Y& y) { return x; }; "
               "Y a; Y b; Y c; Y d = a && b && c;",
-            cxxOperatorCallExpr(hasDescendant(cxxOperatorCallExpr()))));
+              cxxOperatorCallExpr(hasDescendant(cxxOperatorCallExpr()))));
 }
 
 TEST(Matcher, VarDecl_Storage) {
@@ -971,9 +930,9 @@ TEST(Matcher, VarDecl_StorageDuration) {
 
   EXPECT_TRUE(matches(T, varDecl(hasName("x"), hasAutomaticStorageDuration())));
   EXPECT_TRUE(
-    notMatches(T, varDecl(hasName("y"), hasAutomaticStorageDuration())));
+      notMatches(T, varDecl(hasName("y"), hasAutomaticStorageDuration())));
   EXPECT_TRUE(
-    notMatches(T, varDecl(hasName("a"), hasAutomaticStorageDuration())));
+      notMatches(T, varDecl(hasName("a"), hasAutomaticStorageDuration())));
 
   EXPECT_TRUE(matches(T, varDecl(hasName("y"), hasStaticStorageDuration())));
   EXPECT_TRUE(matches(T, varDecl(hasName("a"), hasStaticStorageDuration())));
@@ -991,48 +950,48 @@ TEST(Matcher, VarDecl_StorageDuration) {
 }
 
 TEST(Matcher, FindsVarDeclInFunctionParameter) {
-  EXPECT_TRUE(matches(
-    "void f(int i) {}",
-    varDecl(hasName("i"))));
+  EXPECT_TRUE(matches("void f(int i) {}", varDecl(hasName("i"))));
 }
 
 TEST(UnaryExpressionOrTypeTraitExpression, MatchesCorrectType) {
-  EXPECT_TRUE(matches("void x() { int a = sizeof(a); }", sizeOfExpr(
-    hasArgumentOfType(asString("int")))));
-  EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }", sizeOfExpr(
-    hasArgumentOfType(asString("float")))));
+  EXPECT_TRUE(matches("void x() { int a = sizeof(a); }",
+                      sizeOfExpr(hasArgumentOfType(asString("int")))));
+  EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }",
+                         sizeOfExpr(hasArgumentOfType(asString("float")))));
   EXPECT_TRUE(matches(
-    "struct A {}; void x() { A a; int b = sizeof(a); }",
-    sizeOfExpr(hasArgumentOfType(hasDeclaration(recordDecl(hasName("A")))))));
-  EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }", sizeOfExpr(
-    hasArgumentOfType(hasDeclaration(recordDecl(hasName("string")))))));
+      "struct A {}; void x() { A a; int b = sizeof(a); }",
+      sizeOfExpr(hasArgumentOfType(hasDeclaration(recordDecl(hasName("A")))))));
+  EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }",
+                         sizeOfExpr(hasArgumentOfType(
+                             hasDeclaration(recordDecl(hasName("string")))))));
 }
 
 TEST(IsInteger, MatchesIntegers) {
   EXPECT_TRUE(matches("int i = 0;", varDecl(hasType(isInteger()))));
-  EXPECT_TRUE(matches(
-    "long long i = 0; void f(long long) { }; void g() {f(i);}",
-    callExpr(hasArgument(0, declRefExpr(
-      to(varDecl(hasType(isInteger()))))))));
+  EXPECT_TRUE(
+      matches("long long i = 0; void f(long long) { }; void g() {f(i);}",
+              callExpr(hasArgument(
+                  0, declRefExpr(to(varDecl(hasType(isInteger()))))))));
 }
 
 TEST(IsInteger, ReportsNoFalsePositives) {
   EXPECT_TRUE(notMatches("int *i;", varDecl(hasType(isInteger()))));
-  EXPECT_TRUE(notMatches("struct T {}; T t; void f(T *) { }; void g() {f(&t);}",
-                         callExpr(hasArgument(0, declRefExpr(
-                           to(varDecl(hasType(isInteger()))))))));
+  EXPECT_TRUE(
+      notMatches("struct T {}; T t; void f(T *) { }; void g() {f(&t);}",
+                 callExpr(hasArgument(
+                     0, declRefExpr(to(varDecl(hasType(isInteger()))))))));
 }
 
 TEST(IsSignedInteger, MatchesSignedIntegers) {
   EXPECT_TRUE(matches("int i = 0;", varDecl(hasType(isSignedInteger()))));
-  EXPECT_TRUE(notMatches("unsigned i = 0;",
-                         varDecl(hasType(isSignedInteger()))));
+  EXPECT_TRUE(
+      notMatches("unsigned i = 0;", varDecl(hasType(isSignedInteger()))));
 }
 
 TEST(IsUnsignedInteger, MatchesUnsignedIntegers) {
   EXPECT_TRUE(notMatches("int i = 0;", varDecl(hasType(isUnsignedInteger()))));
-  EXPECT_TRUE(matches("unsigned i = 0;",
-                      varDecl(hasType(isUnsignedInteger()))));
+  EXPECT_TRUE(
+      matches("unsigned i = 0;", varDecl(hasType(isUnsignedInteger()))));
 }
 
 TEST(IsAnyPointer, MatchesPointers) {
@@ -1059,8 +1018,8 @@ TEST(IsAnyCharacter, ReportsNoFalsePositives) {
 TEST(IsArrow, MatchesMemberVariablesViaArrow) {
   EXPECT_TRUE(matches("class Y { void x() { this->y; } int y; };",
                       memberExpr(isArrow())));
-  EXPECT_TRUE(matches("class Y { void x() { y; } int y; };",
-                      memberExpr(isArrow())));
+  EXPECT_TRUE(
+      matches("class Y { void x() { y; } int y; };", memberExpr(isArrow())));
   EXPECT_TRUE(notMatches("class Y { void x() { (*this).y; } int y; };",
                          memberExpr(isArrow())));
   EXPECT_TRUE(matches("template <class T> class Y { void x() { this->m; } };",
@@ -1080,10 +1039,9 @@ TEST(IsArrow, MatchesStaticMemberVariablesViaArrow) {
 }
 
 TEST(IsArrow, MatchesMemberCallsViaArrow) {
-  EXPECT_TRUE(matches("class Y { void x() { this->x(); } };",
-                      memberExpr(isArrow())));
-  EXPECT_TRUE(matches("class Y { void x() { x(); } };",
-                      memberExpr(isArrow())));
+  EXPECT_TRUE(
+      matches("class Y { void x() { this->x(); } };", memberExpr(isArrow())));
+  EXPECT_TRUE(matches("class Y { void x() { x(); } };", memberExpr(isArrow())));
   EXPECT_TRUE(notMatches("class Y { void x() { Y y; y.x(); } };",
                          memberExpr(isArrow())));
   EXPECT_TRUE(
@@ -1128,20 +1086,18 @@ TEST(Matcher, ParameterCount) {
 }
 
 TEST(Matcher, References) {
-  DeclarationMatcher ReferenceClassX = varDecl(
-    hasType(references(recordDecl(hasName("X")))));
-  EXPECT_TRUE(matches("class X {}; void y(X y) { X &x = y; }",
-                      ReferenceClassX));
+  DeclarationMatcher ReferenceClassX =
+      varDecl(hasType(references(recordDecl(hasName("X")))));
   EXPECT_TRUE(
-    matches("class X {}; void y(X y) { const X &x = y; }", ReferenceClassX));
+      matches("class X {}; void y(X y) { X &x = y; }", ReferenceClassX));
+  EXPECT_TRUE(
+      matches("class X {}; void y(X y) { const X &x = y; }", ReferenceClassX));
   // The match here is on the implicit copy constructor code for
   // class X, not on code 'X x = y'.
+  EXPECT_TRUE(matches("class X {}; void y(X y) { X x = y; }", ReferenceClassX));
+  EXPECT_TRUE(notMatches("class X {}; extern X x;", ReferenceClassX));
   EXPECT_TRUE(
-    matches("class X {}; void y(X y) { X x = y; }", ReferenceClassX));
-  EXPECT_TRUE(
-    notMatches("class X {}; extern X x;", ReferenceClassX));
-  EXPECT_TRUE(
-    notMatches("class X {}; void y(X *y) { X *&x = y; }", ReferenceClassX));
+      notMatches("class X {}; void y(X *y) { X *&x = y; }", ReferenceClassX));
 }
 
 TEST(QualType, hasLocalQualifiers) {
@@ -1149,16 +1105,15 @@ TEST(QualType, hasLocalQualifiers) {
                          varDecl(hasType(hasLocalQualifiers()))));
   EXPECT_TRUE(matches("int *const j = nullptr;",
                       varDecl(hasType(hasLocalQualifiers()))));
-  EXPECT_TRUE(matches("int *volatile k;",
-                      varDecl(hasType(hasLocalQualifiers()))));
-  EXPECT_TRUE(notMatches("int m;",
-                         varDecl(hasType(hasLocalQualifiers()))));
+  EXPECT_TRUE(
+      matches("int *volatile k;", varDecl(hasType(hasLocalQualifiers()))));
+  EXPECT_TRUE(notMatches("int m;", varDecl(hasType(hasLocalQualifiers()))));
 }
 
 TEST(IsExternC, MatchesExternCFunctionDeclarations) {
   EXPECT_TRUE(matches("extern \"C\" void f() {}", functionDecl(isExternC())));
-  EXPECT_TRUE(matches("extern \"C\" { void f() {} }",
-                      functionDecl(isExternC())));
+  EXPECT_TRUE(
+      matches("extern \"C\" { void f() {} }", functionDecl(isExternC())));
   EXPECT_TRUE(notMatches("void f() {}", functionDecl(isExternC())));
 }
 
@@ -1186,7 +1141,7 @@ TEST(IsDefaulted, MatchesDefaultedFunctionDeclarations) {
 
 TEST(IsDeleted, MatchesDeletedFunctionDeclarations) {
   EXPECT_TRUE(
-    notMatches("void Func();", functionDecl(hasName("Func"), isDeleted())));
+      notMatches("void Func();", functionDecl(hasName("Func"), isDeleted())));
   EXPECT_TRUE(matches("void Func() = delete;",
                       functionDecl(hasName("Func"), isDeleted())));
 }
@@ -1195,14 +1150,15 @@ TEST(IsNoThrow, MatchesNoThrowFunctionDeclarations) {
   EXPECT_TRUE(notMatches("void f();", functionDecl(isNoThrow())));
   EXPECT_TRUE(notMatches("void f() throw(int);", functionDecl(isNoThrow())));
   EXPECT_TRUE(
-    notMatches("void f() noexcept(false);", functionDecl(isNoThrow())));
+      notMatches("void f() noexcept(false);", functionDecl(isNoThrow())));
   EXPECT_TRUE(matches("void f() throw();", functionDecl(isNoThrow())));
   EXPECT_TRUE(matches("void f() noexcept;", functionDecl(isNoThrow())));
 
   EXPECT_TRUE(notMatches("void f();", functionProtoType(isNoThrow())));
-  EXPECT_TRUE(notMatches("void f() throw(int);", functionProtoType(isNoThrow())));
   EXPECT_TRUE(
-    notMatches("void f() noexcept(false);", functionProtoType(isNoThrow())));
+      notMatches("void f() throw(int);", functionProtoType(isNoThrow())));
+  EXPECT_TRUE(
+      notMatches("void f() noexcept(false);", functionProtoType(isNoThrow())));
   EXPECT_TRUE(matches("void f() throw();", functionProtoType(isNoThrow())));
   EXPECT_TRUE(matches("void f() noexcept;", functionProtoType(isNoThrow())));
 }
@@ -1249,41 +1205,41 @@ TEST(hasInitStatement, MatchesRangeForInitializers) {
 
 TEST(TemplateArgumentCountIs, Matches) {
   EXPECT_TRUE(
-    matches("template<typename T> struct C {}; C<int> c;",
-            classTemplateSpecializationDecl(templateArgumentCountIs(1))));
+      matches("template<typename T> struct C {}; C<int> c;",
+              classTemplateSpecializationDecl(templateArgumentCountIs(1))));
   EXPECT_TRUE(
-    notMatches("template<typename T> struct C {}; C<int> c;",
-               classTemplateSpecializationDecl(templateArgumentCountIs(2))));
+      notMatches("template<typename T> struct C {}; C<int> c;",
+                 classTemplateSpecializationDecl(templateArgumentCountIs(2))));
 
   EXPECT_TRUE(matches("template<typename T> struct C {}; C<int> c;",
                       templateSpecializationType(templateArgumentCountIs(1))));
   EXPECT_TRUE(
-    notMatches("template<typename T> struct C {}; C<int> c;",
-               templateSpecializationType(templateArgumentCountIs(2))));
+      notMatches("template<typename T> struct C {}; C<int> c;",
+                 templateSpecializationType(templateArgumentCountIs(2))));
 }
 
 TEST(IsIntegral, Matches) {
-  EXPECT_TRUE(matches("template<int T> struct C {}; C<42> c;",
-                      classTemplateSpecializationDecl(
-                        hasAnyTemplateArgument(isIntegral()))));
+  EXPECT_TRUE(matches(
+      "template<int T> struct C {}; C<42> c;",
+      classTemplateSpecializationDecl(hasAnyTemplateArgument(isIntegral()))));
   EXPECT_TRUE(notMatches("template<typename T> struct C {}; C<int> c;",
                          classTemplateSpecializationDecl(hasAnyTemplateArgument(
-                           templateArgument(isIntegral())))));
+                             templateArgument(isIntegral())))));
 }
 
 TEST(EqualsIntegralValue, Matches) {
   EXPECT_TRUE(matches("template<int T> struct C {}; C<42> c;",
                       classTemplateSpecializationDecl(
-                        hasAnyTemplateArgument(equalsIntegralValue("42")))));
+                          hasAnyTemplateArgument(equalsIntegralValue("42")))));
   EXPECT_TRUE(matches("template<int T> struct C {}; C<-42> c;",
                       classTemplateSpecializationDecl(
-                        hasAnyTemplateArgument(equalsIntegralValue("-42")))));
+                          hasAnyTemplateArgument(equalsIntegralValue("-42")))));
   EXPECT_TRUE(matches("template<int T> struct C {}; C<-0042> c;",
                       classTemplateSpecializationDecl(
-                        hasAnyTemplateArgument(equalsIntegralValue("-34")))));
+                          hasAnyTemplateArgument(equalsIntegralValue("-34")))));
   EXPECT_TRUE(notMatches("template<int T> struct C {}; C<42> c;",
                          classTemplateSpecializationDecl(hasAnyTemplateArgument(
-                           equalsIntegralValue("0042")))));
+                             equalsIntegralValue("0042")))));
 }
 
 TEST(Matcher, MatchesAccessSpecDecls) {
@@ -1304,7 +1260,7 @@ TEST(Matcher, MatchesFinal) {
                       cxxMethodDecl(isFinal())));
   EXPECT_TRUE(notMatches("class X {};", cxxRecordDecl(isFinal())));
   EXPECT_TRUE(
-    notMatches("class X { virtual void f(); };", cxxMethodDecl(isFinal())));
+      notMatches("class X { virtual void f(); };", cxxMethodDecl(isFinal())));
 }
 
 TEST(Matcher, MatchesVirtualMethod) {
@@ -1315,12 +1271,12 @@ TEST(Matcher, MatchesVirtualMethod) {
 
 TEST(Matcher, MatchesVirtualAsWrittenMethod) {
   EXPECT_TRUE(matches("class A { virtual int f(); };"
-                        "class B : public A { int f(); };",
+                      "class B : public A { int f(); };",
                       cxxMethodDecl(isVirtualAsWritten(), hasName("::A::f"))));
   EXPECT_TRUE(
-    notMatches("class A { virtual int f(); };"
+      notMatches("class A { virtual int f(); };"
                  "class B : public A { int f(); };",
-               cxxMethodDecl(isVirtualAsWritten(), hasName("::B::f"))));
+                 cxxMethodDecl(isVirtualAsWritten(), hasName("::B::f"))));
 }
 
 TEST(Matcher, MatchesPureMethod) {
@@ -1358,26 +1314,26 @@ TEST(Matcher, MatchesMoveAssignmentOperator) {
 
 TEST(Matcher, MatchesConstMethod) {
   EXPECT_TRUE(
-    matches("struct A { void foo() const; };", cxxMethodDecl(isConst())));
+      matches("struct A { void foo() const; };", cxxMethodDecl(isConst())));
   EXPECT_TRUE(
-    notMatches("struct A { void foo(); };", cxxMethodDecl(isConst())));
+      notMatches("struct A { void foo(); };", cxxMethodDecl(isConst())));
 }
 
 TEST(Matcher, MatchesOverridingMethod) {
   EXPECT_TRUE(matches("class X { virtual int f(); }; "
-                        "class Y : public X { int f(); };",
+                      "class Y : public X { int f(); };",
                       cxxMethodDecl(isOverride(), hasName("::Y::f"))));
   EXPECT_TRUE(notMatches("class X { virtual int f(); }; "
-                           "class Y : public X { int f(); };",
+                         "class Y : public X { int f(); };",
                          cxxMethodDecl(isOverride(), hasName("::X::f"))));
   EXPECT_TRUE(notMatches("class X { int f(); }; "
-                           "class Y : public X { int f(); };",
+                         "class Y : public X { int f(); };",
                          cxxMethodDecl(isOverride())));
   EXPECT_TRUE(notMatches("class X { int f(); int f(int); }; ",
                          cxxMethodDecl(isOverride())));
   EXPECT_TRUE(
-    matches("template <typename Base> struct Y : Base { void f() override;};",
-            cxxMethodDecl(isOverride(), hasName("::Y::f"))));
+      matches("template <typename Base> struct Y : Base { void f() override;};",
+              cxxMethodDecl(isOverride(), hasName("::Y::f"))));
 }
 
 TEST(Matcher, ConstructorArgument) {
@@ -1385,44 +1341,38 @@ TEST(Matcher, ConstructorArgument) {
       ast_type_traits::TK_AsIs,
       cxxConstructExpr(hasArgument(0, declRefExpr(to(varDecl(hasName("y")))))));
 
+  EXPECT_TRUE(matches(
+      "class X { public: X(int); }; void x() { int y; X x(y); }", Constructor));
   EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { int y; X x(y); }",
-            Constructor));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { int y; X x = X(y); }",
-            Constructor));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { int y; X x = y; }",
-            Constructor));
+      matches("class X { public: X(int); }; void x() { int y; X x = X(y); }",
+              Constructor));
   EXPECT_TRUE(
-    notMatches("class X { public: X(int); }; void x() { int z; X x(z); }",
-               Constructor));
+      matches("class X { public: X(int); }; void x() { int y; X x = y; }",
+              Constructor));
+  EXPECT_TRUE(notMatches(
+      "class X { public: X(int); }; void x() { int z; X x(z); }", Constructor));
 
   StatementMatcher WrongIndex =
       traverse(ast_type_traits::TK_AsIs,
                cxxConstructExpr(
                    hasArgument(42, declRefExpr(to(varDecl(hasName("y")))))));
-  EXPECT_TRUE(
-    notMatches("class X { public: X(int); }; void x() { int y; X x(y); }",
-               WrongIndex));
+  EXPECT_TRUE(notMatches(
+      "class X { public: X(int); }; void x() { int y; X x(y); }", WrongIndex));
 }
 
 TEST(Matcher, ConstructorArgumentCount) {
   auto Constructor1Arg =
       traverse(ast_type_traits::TK_AsIs, cxxConstructExpr(argumentCountIs(1)));
 
+  EXPECT_TRUE(matches("class X { public: X(int); }; void x() { X x(0); }",
+                      Constructor1Arg));
+  EXPECT_TRUE(matches("class X { public: X(int); }; void x() { X x = X(0); }",
+                      Constructor1Arg));
+  EXPECT_TRUE(matches("class X { public: X(int); }; void x() { X x = 0; }",
+                      Constructor1Arg));
   EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { X x(0); }",
-            Constructor1Arg));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { X x = X(0); }",
-            Constructor1Arg));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { X x = 0; }",
-            Constructor1Arg));
-  EXPECT_TRUE(
-    notMatches("class X { public: X(int, int); }; void x() { X x(0, 0); }",
-               Constructor1Arg));
+      notMatches("class X { public: X(int, int); }; void x() { X x(0, 0); }",
+                 Constructor1Arg));
 }
 
 TEST(Matcher, ConstructorListInitialization) {
@@ -1430,19 +1380,16 @@ TEST(Matcher, ConstructorListInitialization) {
       traverse(ast_type_traits::TK_AsIs,
                varDecl(has(cxxConstructExpr(isListInitialization()))));
 
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { X x{0}; }",
-            ConstructorListInit));
-  EXPECT_FALSE(
-    matches("class X { public: X(int); }; void x() { X x(0); }",
-            ConstructorListInit));
+  EXPECT_TRUE(matches("class X { public: X(int); }; void x() { X x{0}; }",
+                      ConstructorListInit));
+  EXPECT_FALSE(matches("class X { public: X(int); }; void x() { X x(0); }",
+                       ConstructorListInit));
 }
 
 TEST(ConstructorDeclaration, IsImplicit) {
   // This one doesn't match because the constructor is not added by the
   // compiler (it is not needed).
-  EXPECT_TRUE(notMatches("class Foo { };",
-                         cxxConstructorDecl(isImplicit())));
+  EXPECT_TRUE(notMatches("class Foo { };", cxxConstructorDecl(isImplicit())));
   // The compiler added the implicit default constructor.
   EXPECT_TRUE(matches("class Foo { }; Foo* f = new Foo();",
                       cxxConstructorDecl(isImplicit())));
@@ -1456,8 +1403,8 @@ TEST(ConstructorDeclaration, IsImplicit) {
 TEST(ConstructorDeclaration, IsExplicit) {
   EXPECT_TRUE(matches("struct S { explicit S(int); };",
                       cxxConstructorDecl(isExplicit())));
-  EXPECT_TRUE(notMatches("struct S { S(int); };",
-                         cxxConstructorDecl(isExplicit())));
+  EXPECT_TRUE(
+      notMatches("struct S { S(int); };", cxxConstructorDecl(isExplicit())));
   EXPECT_TRUE(notMatches("template<bool b> struct S { explicit(b) S(int);};",
                          cxxConstructorDecl(isExplicit()), langCxx20OrLater()));
   EXPECT_TRUE(matches("struct S { explicit(true) S(int);};",
@@ -1488,9 +1435,9 @@ TEST(DeductionGuideDeclaration, IsExplicit) {
 }
 
 TEST(ConstructorDeclaration, Kinds) {
-  EXPECT_TRUE(matches(
-      "struct S { S(); };",
-      cxxConstructorDecl(isDefaultConstructor(), unless(isImplicit()))));
+  EXPECT_TRUE(
+      matches("struct S { S(); };", cxxConstructorDecl(isDefaultConstructor(),
+                                                       unless(isImplicit()))));
   EXPECT_TRUE(notMatches(
       "struct S { S(); };",
       cxxConstructorDecl(isCopyConstructor(), unless(isImplicit()))));
@@ -1501,9 +1448,9 @@ TEST(ConstructorDeclaration, Kinds) {
   EXPECT_TRUE(notMatches(
       "struct S { S(const S&); };",
       cxxConstructorDecl(isDefaultConstructor(), unless(isImplicit()))));
-  EXPECT_TRUE(matches(
-      "struct S { S(const S&); };",
-      cxxConstructorDecl(isCopyConstructor(), unless(isImplicit()))));
+  EXPECT_TRUE(
+      matches("struct S { S(const S&); };",
+              cxxConstructorDecl(isCopyConstructor(), unless(isImplicit()))));
   EXPECT_TRUE(notMatches(
       "struct S { S(const S&); };",
       cxxConstructorDecl(isMoveConstructor(), unless(isImplicit()))));
@@ -1514,9 +1461,9 @@ TEST(ConstructorDeclaration, Kinds) {
   EXPECT_TRUE(notMatches(
       "struct S { S(S&&); };",
       cxxConstructorDecl(isCopyConstructor(), unless(isImplicit()))));
-  EXPECT_TRUE(matches(
-      "struct S { S(S&&); };",
-      cxxConstructorDecl(isMoveConstructor(), unless(isImplicit()))));
+  EXPECT_TRUE(
+      matches("struct S { S(S&&); };",
+              cxxConstructorDecl(isMoveConstructor(), unless(isImplicit()))));
 }
 
 TEST(ConstructorDeclaration, IsUserProvided) {
@@ -1527,7 +1474,7 @@ TEST(ConstructorDeclaration, IsUserProvided) {
   EXPECT_TRUE(notMatches("struct S { S() = delete; };",
                          cxxConstructorDecl(isUserProvided())));
   EXPECT_TRUE(
-    matches("struct S { S(); };", cxxConstructorDecl(isUserProvided())));
+      matches("struct S { S(); };", cxxConstructorDecl(isUserProvided())));
   EXPECT_TRUE(matches("struct S { S(); }; S::S(){}",
                       cxxConstructorDecl(isUserProvided())));
 }
@@ -1538,11 +1485,11 @@ TEST(ConstructorDeclaration, IsDelegatingConstructor) {
   EXPECT_TRUE(notMatches("struct S { S(){} S(int X) : X(X) {} int X; };",
                          cxxConstructorDecl(isDelegatingConstructor())));
   EXPECT_TRUE(matches(
-    "struct S { S() : S(0) {} S(int X) : X(X) {} int X; };",
-    cxxConstructorDecl(isDelegatingConstructor(), parameterCountIs(0))));
+      "struct S { S() : S(0) {} S(int X) : X(X) {} int X; };",
+      cxxConstructorDecl(isDelegatingConstructor(), parameterCountIs(0))));
   EXPECT_TRUE(matches(
-    "struct S { S(); S(int X); int X; }; S::S(int X) : S() {}",
-    cxxConstructorDecl(isDelegatingConstructor(), parameterCountIs(1))));
+      "struct S { S(); S(int X); int X; }; S::S(int X) : S() {}",
+      cxxConstructorDecl(isDelegatingConstructor(), parameterCountIs(1))));
 }
 
 TEST(StringLiteral, HasSize) {
@@ -1584,38 +1531,28 @@ TEST(Matcher, HasNameSupportsNamespaces) {
 }
 
 TEST(Matcher, HasNameSupportsOuterClasses) {
-  EXPECT_TRUE(
-    matches("class A { class B { class C; }; };",
-            recordDecl(hasName("A::B::C"))));
-  EXPECT_TRUE(
-    matches("class A { class B { class C; }; };",
-            recordDecl(hasName("::A::B::C"))));
-  EXPECT_TRUE(
-    matches("class A { class B { class C; }; };",
-            recordDecl(hasName("B::C"))));
-  EXPECT_TRUE(
-    matches("class A { class B { class C; }; };",
-            recordDecl(hasName("C"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("c::B::C"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("A::c::C"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("A::B::A"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("::C"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("::B::C"))));
+  EXPECT_TRUE(matches("class A { class B { class C; }; };",
+                      recordDecl(hasName("A::B::C"))));
+  EXPECT_TRUE(matches("class A { class B { class C; }; };",
+                      recordDecl(hasName("::A::B::C"))));
+  EXPECT_TRUE(matches("class A { class B { class C; }; };",
+                      recordDecl(hasName("B::C"))));
+  EXPECT_TRUE(
+      matches("class A { class B { class C; }; };", recordDecl(hasName("C"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("c::B::C"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("A::c::C"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("A::B::A"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("::C"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("::B::C"))));
   EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
                          recordDecl(hasName("z::A::B::C"))));
-  EXPECT_TRUE(
-    notMatches("class A { class B { class C; }; };",
-               recordDecl(hasName("A+B::C"))));
+  EXPECT_TRUE(notMatches("class A { class B { class C; }; };",
+                         recordDecl(hasName("A+B::C"))));
 }
 
 TEST(Matcher, HasNameSupportsInlinedNamespaces) {
@@ -1629,10 +1566,10 @@ TEST(Matcher, HasNameSupportsInlinedNamespaces) {
 TEST(Matcher, HasNameSupportsAnonymousNamespaces) {
   StringRef code = "namespace a { namespace { class C; } }";
   EXPECT_TRUE(
-    matches(code, recordDecl(hasName("a::(anonymous namespace)::C"))));
+      matches(code, recordDecl(hasName("a::(anonymous namespace)::C"))));
   EXPECT_TRUE(matches(code, recordDecl(hasName("a::C"))));
   EXPECT_TRUE(
-    matches(code, recordDecl(hasName("::a::(anonymous namespace)::C"))));
+      matches(code, recordDecl(hasName("::a::(anonymous namespace)::C"))));
   EXPECT_TRUE(matches(code, recordDecl(hasName("::a::C"))));
 }
 
@@ -1689,7 +1626,7 @@ TEST(Matcher, HasAnyName) {
 
   EXPECT_TRUE(notMatches(Code, recordDecl(hasAnyName("::C", "::b::C"))));
   EXPECT_TRUE(
-    matches(Code, recordDecl(hasAnyName("::C", "::b::C", "::a::b::C"))));
+      matches(Code, recordDecl(hasAnyName("::C", "::b::C", "::a::b::C"))));
 
   std::vector<StringRef> Names = {"::C", "::b::C", "::a::b::C"};
   EXPECT_TRUE(matches(Code, recordDecl(hasAnyName(Names))));
@@ -1697,27 +1634,27 @@ TEST(Matcher, HasAnyName) {
 
 TEST(Matcher, IsDefinition) {
   DeclarationMatcher DefinitionOfClassA =
-    recordDecl(hasName("A"), isDefinition());
+      recordDecl(hasName("A"), isDefinition());
   EXPECT_TRUE(matches("class A {};", DefinitionOfClassA));
   EXPECT_TRUE(notMatches("class A;", DefinitionOfClassA));
 
   DeclarationMatcher DefinitionOfVariableA =
-    varDecl(hasName("a"), isDefinition());
+      varDecl(hasName("a"), isDefinition());
   EXPECT_TRUE(matches("int a;", DefinitionOfVariableA));
   EXPECT_TRUE(notMatches("extern int a;", DefinitionOfVariableA));
 
   DeclarationMatcher DefinitionOfMethodA =
-    cxxMethodDecl(hasName("a"), isDefinition());
+      cxxMethodDecl(hasName("a"), isDefinition());
   EXPECT_TRUE(matches("class A { void a() {} };", DefinitionOfMethodA));
   EXPECT_TRUE(notMatches("class A { void a(); };", DefinitionOfMethodA));
 
   DeclarationMatcher DefinitionOfObjCMethodA =
-    objcMethodDecl(hasName("a"), isDefinition());
+      objcMethodDecl(hasName("a"), isDefinition());
   EXPECT_TRUE(matchesObjC("@interface A @end "
                           "@implementation A; -(void)a {} @end",
                           DefinitionOfObjCMethodA));
-  EXPECT_TRUE(notMatchesObjC("@interface A; - (void)a; @end",
-                             DefinitionOfObjCMethodA));
+  EXPECT_TRUE(
+      notMatchesObjC("@interface A; - (void)a; @end", DefinitionOfObjCMethodA));
 }
 
 TEST(Matcher, HandlesNullQualTypes) {
@@ -1728,7 +1665,7 @@ TEST(Matcher, HandlesNullQualTypes) {
   // We don't really care whether this matcher succeeds; we're testing that
   // it completes without crashing.
   EXPECT_TRUE(matches(
-    "struct A { };"
+      "struct A { };"
       "template <typename T>"
       "void f(T t) {"
       "  T local_t(t /* this becomes a null QualType in the AST */);"
@@ -1736,13 +1673,10 @@ TEST(Matcher, HandlesNullQualTypes) {
       "void g() {"
       "  f(0);"
       "}",
-    expr(hasType(TypeMatcher(
-      anyOf(
-        TypeMatcher(hasDeclaration(anything())),
-        pointsTo(AnyType),
-        references(AnyType)
-        // Other QualType matchers should go here.
-      ))))));
+      expr(hasType(TypeMatcher(anyOf(TypeMatcher(hasDeclaration(anything())),
+                                     pointsTo(AnyType), references(AnyType)
+                                     // Other QualType matchers should go here.
+                                     ))))));
 }
 
 TEST(ObjCIvarRefExprMatcher, IvarExpr) {
@@ -1750,10 +1684,10 @@ TEST(ObjCIvarRefExprMatcher, IvarExpr) {
       "@interface A @end "
       "@implementation A { A *x; } - (void) func { x = 0; } @end";
   EXPECT_TRUE(matchesObjC(ObjCString, objcIvarRefExpr()));
-  EXPECT_TRUE(matchesObjC(ObjCString, objcIvarRefExpr(
-        hasDeclaration(namedDecl(hasName("x"))))));
-  EXPECT_FALSE(matchesObjC(ObjCString, objcIvarRefExpr(
-        hasDeclaration(namedDecl(hasName("y"))))));
+  EXPECT_TRUE(matchesObjC(
+      ObjCString, objcIvarRefExpr(hasDeclaration(namedDecl(hasName("x"))))));
+  EXPECT_FALSE(matchesObjC(
+      ObjCString, objcIvarRefExpr(hasDeclaration(namedDecl(hasName("y"))))));
 }
 
 TEST(BlockExprMatcher, BlockExpr) {
@@ -1761,24 +1695,19 @@ TEST(BlockExprMatcher, BlockExpr) {
 }
 
 TEST(StatementCountIs, FindsNoStatementsInAnEmptyCompoundStatement) {
-  EXPECT_TRUE(matches("void f() { }",
-                      compoundStmt(statementCountIs(0))));
-  EXPECT_TRUE(notMatches("void f() {}",
-                         compoundStmt(statementCountIs(1))));
+  EXPECT_TRUE(matches("void f() { }", compoundStmt(statementCountIs(0))));
+  EXPECT_TRUE(notMatches("void f() {}", compoundStmt(statementCountIs(1))));
 }
 
 TEST(StatementCountIs, AppearsToMatchOnlyOneCount) {
-  EXPECT_TRUE(matches("void f() { 1; }",
-                      compoundStmt(statementCountIs(1))));
-  EXPECT_TRUE(notMatches("void f() { 1; }",
-                         compoundStmt(statementCountIs(0))));
-  EXPECT_TRUE(notMatches("void f() { 1; }",
-                         compoundStmt(statementCountIs(2))));
+  EXPECT_TRUE(matches("void f() { 1; }", compoundStmt(statementCountIs(1))));
+  EXPECT_TRUE(notMatches("void f() { 1; }", compoundStmt(statementCountIs(0))));
+  EXPECT_TRUE(notMatches("void f() { 1; }", compoundStmt(statementCountIs(2))));
 }
 
 TEST(StatementCountIs, WorksWithMultipleStatements) {
-  EXPECT_TRUE(matches("void f() { 1; 2; 3; }",
-                      compoundStmt(statementCountIs(3))));
+  EXPECT_TRUE(
+      matches("void f() { 1; 2; 3; }", compoundStmt(statementCountIs(3))));
 }
 
 TEST(StatementCountIs, WorksWithNestedCompoundStatements) {
@@ -1806,19 +1735,19 @@ TEST(Member, DoesNotMatchTheBaseExpression) {
 
 TEST(Member, MatchesInMemberFunctionCall) {
   EXPECT_TRUE(matches("void f() {"
-                        "  struct { void first() {}; } s;"
-                        "  s.first();"
-                        "};",
+                      "  struct { void first() {}; } s;"
+                      "  s.first();"
+                      "};",
                       memberExpr(member(hasName("first")))));
 }
 
 TEST(Member, MatchesMember) {
-  EXPECT_TRUE(matches(
-    "struct A { int i; }; void f() { A a; a.i = 2; }",
-    memberExpr(hasDeclaration(fieldDecl(hasType(isInteger()))))));
-  EXPECT_TRUE(notMatches(
-    "struct A { float f; }; void f() { A a; a.f = 2.0f; }",
-    memberExpr(hasDeclaration(fieldDecl(hasType(isInteger()))))));
+  EXPECT_TRUE(
+      matches("struct A { int i; }; void f() { A a; a.i = 2; }",
+              memberExpr(hasDeclaration(fieldDecl(hasType(isInteger()))))));
+  EXPECT_TRUE(
+      notMatches("struct A { float f; }; void f() { A a; a.f = 2.0f; }",
+                 memberExpr(hasDeclaration(fieldDecl(hasType(isInteger()))))));
 }
 
 TEST(Member, BitFields) {
@@ -1841,26 +1770,26 @@ TEST(Member, InClassInitializer) {
 }
 
 TEST(Member, UnderstandsAccess) {
-  EXPECT_TRUE(matches(
-    "struct A { int i; };", fieldDecl(isPublic(), hasName("i"))));
-  EXPECT_TRUE(notMatches(
-    "struct A { int i; };", fieldDecl(isProtected(), hasName("i"))));
-  EXPECT_TRUE(notMatches(
-    "struct A { int i; };", fieldDecl(isPrivate(), hasName("i"))));
+  EXPECT_TRUE(
+      matches("struct A { int i; };", fieldDecl(isPublic(), hasName("i"))));
+  EXPECT_TRUE(notMatches("struct A { int i; };",
+                         fieldDecl(isProtected(), hasName("i"))));
+  EXPECT_TRUE(
+      notMatches("struct A { int i; };", fieldDecl(isPrivate(), hasName("i"))));
 
-  EXPECT_TRUE(notMatches(
-    "class A { int i; };", fieldDecl(isPublic(), hasName("i"))));
-  EXPECT_TRUE(notMatches(
-    "class A { int i; };", fieldDecl(isProtected(), hasName("i"))));
-  EXPECT_TRUE(matches(
-    "class A { int i; };", fieldDecl(isPrivate(), hasName("i"))));
+  EXPECT_TRUE(
+      notMatches("class A { int i; };", fieldDecl(isPublic(), hasName("i"))));
+  EXPECT_TRUE(notMatches("class A { int i; };",
+                         fieldDecl(isProtected(), hasName("i"))));
+  EXPECT_TRUE(
+      matches("class A { int i; };", fieldDecl(isPrivate(), hasName("i"))));
 
-  EXPECT_TRUE(notMatches(
-    "class A { protected: int i; };", fieldDecl(isPublic(), hasName("i"))));
+  EXPECT_TRUE(notMatches("class A { protected: int i; };",
+                         fieldDecl(isPublic(), hasName("i"))));
   EXPECT_TRUE(matches("class A { protected: int i; };",
                       fieldDecl(isProtected(), hasName("i"))));
-  EXPECT_TRUE(notMatches(
-    "class A { protected: int i; };", fieldDecl(isPrivate(), hasName("i"))));
+  EXPECT_TRUE(notMatches("class A { protected: int i; };",
+                         fieldDecl(isPrivate(), hasName("i"))));
 
   // Non-member decls have the AccessSpecifier AS_none and thus aren't matched.
   EXPECT_TRUE(notMatches("int i;", varDecl(isPublic(), hasName("i"))));
@@ -1883,35 +1812,35 @@ TEST(hasDynamicExceptionSpec, MatchesDynamicExceptionSpecifications) {
   EXPECT_TRUE(
       matches("void l() throw(...);", functionDecl(hasDynamicExceptionSpec())));
 
-  EXPECT_TRUE(notMatches("void f();", functionProtoType(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(
+      notMatches("void f();", functionProtoType(hasDynamicExceptionSpec())));
   EXPECT_TRUE(notMatches("void g() noexcept;",
                          functionProtoType(hasDynamicExceptionSpec())));
   EXPECT_TRUE(notMatches("void h() noexcept(true);",
                          functionProtoType(hasDynamicExceptionSpec())));
   EXPECT_TRUE(notMatches("void i() noexcept(false);",
                          functionProtoType(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(
-      matches("void j() throw();", functionProtoType(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(
-      matches("void k() throw(int);", functionProtoType(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(
-      matches("void l() throw(...);", functionProtoType(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(matches("void j() throw();",
+                      functionProtoType(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(matches("void k() throw(int);",
+                      functionProtoType(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(matches("void l() throw(...);",
+                      functionProtoType(hasDynamicExceptionSpec())));
 }
 
 TEST(HasObjectExpression, DoesNotMatchMember) {
   EXPECT_TRUE(notMatches(
-    "class X {}; struct Z { X m; }; void f(Z z) { z.m; }",
-    memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
+      "class X {}; struct Z { X m; }; void f(Z z) { z.m; }",
+      memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
 }
 
 TEST(HasObjectExpression, MatchesBaseOfVariable) {
   EXPECT_TRUE(matches(
-    "struct X { int m; }; void f(X x) { x.m; }",
-    memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
-  EXPECT_TRUE(matches(
-    "struct X { int m; }; void f(X* x) { x->m; }",
-    memberExpr(hasObjectExpression(
-      hasType(pointsTo(recordDecl(hasName("X"))))))));
+      "struct X { int m; }; void f(X x) { x.m; }",
+      memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
+  EXPECT_TRUE(matches("struct X { int m; }; void f(X* x) { x->m; }",
+                      memberExpr(hasObjectExpression(
+                          hasType(pointsTo(recordDecl(hasName("X"))))))));
   EXPECT_TRUE(matches("template <class T> struct X { void f() { T t; t.m; } };",
                       cxxDependentScopeMemberExpr(hasObjectExpression(
                           declRefExpr(to(namedDecl(hasName("t"))))))));
@@ -1936,14 +1865,12 @@ TEST(HasObjectExpression, MatchesBaseOfMemberFunc) {
 
 TEST(HasObjectExpression,
      MatchesObjectExpressionOfImplicitlyFormedMemberExpression) {
-  EXPECT_TRUE(matches(
-    "class X {}; struct S { X m; void f() { this->m; } };",
-    memberExpr(hasObjectExpression(
-      hasType(pointsTo(recordDecl(hasName("S"))))))));
-  EXPECT_TRUE(matches(
-    "class X {}; struct S { X m; void f() { m; } };",
-    memberExpr(hasObjectExpression(
-      hasType(pointsTo(recordDecl(hasName("S"))))))));
+  EXPECT_TRUE(matches("class X {}; struct S { X m; void f() { this->m; } };",
+                      memberExpr(hasObjectExpression(
+                          hasType(pointsTo(recordDecl(hasName("S"))))))));
+  EXPECT_TRUE(matches("class X {}; struct S { X m; void f() { m; } };",
+                      memberExpr(hasObjectExpression(
+                          hasType(pointsTo(recordDecl(hasName("S"))))))));
 }
 
 TEST(Field, DoesNotMatchNonFieldMembers) {
@@ -1958,17 +1885,17 @@ TEST(Field, MatchesField) {
 }
 
 TEST(IsVolatileQualified, QualifiersMatch) {
-  EXPECT_TRUE(matches("volatile int i = 42;",
-                      varDecl(hasType(isVolatileQualified()))));
-  EXPECT_TRUE(notMatches("volatile int *i;",
-                         varDecl(hasType(isVolatileQualified()))));
+  EXPECT_TRUE(
+      matches("volatile int i = 42;", varDecl(hasType(isVolatileQualified()))));
+  EXPECT_TRUE(
+      notMatches("volatile int *i;", varDecl(hasType(isVolatileQualified()))));
   EXPECT_TRUE(matches("typedef volatile int v_int; v_int i = 42;",
                       varDecl(hasType(isVolatileQualified()))));
 }
 
 TEST(IsConstQualified, MatchesConstInt) {
-  EXPECT_TRUE(matches("const int i = 42;",
-                      varDecl(hasType(isConstQualified()))));
+  EXPECT_TRUE(
+      matches("const int i = 42;", varDecl(hasType(isConstQualified()))));
 }
 
 TEST(IsConstQualified, MatchesConstPointer) {
@@ -1986,43 +1913,41 @@ TEST(IsConstQualified, MatchesThroughTypedef) {
 TEST(IsConstQualified, DoesNotMatchInappropriately) {
   EXPECT_TRUE(notMatches("typedef int nonconst_int; nonconst_int i = 42;",
                          varDecl(hasType(isConstQualified()))));
-  EXPECT_TRUE(notMatches("int const* p;",
-                         varDecl(hasType(isConstQualified()))));
+  EXPECT_TRUE(
+      notMatches("int const* p;", varDecl(hasType(isConstQualified()))));
 }
 
 TEST(DeclCount, DeclCountIsCorrect) {
-  EXPECT_TRUE(matches("void f() {int i,j;}",
-                      declStmt(declCountIs(2))));
-  EXPECT_TRUE(notMatches("void f() {int i,j; int k;}",
-                         declStmt(declCountIs(3))));
-  EXPECT_TRUE(notMatches("void f() {int i,j, k, l;}",
-                         declStmt(declCountIs(3))));
+  EXPECT_TRUE(matches("void f() {int i,j;}", declStmt(declCountIs(2))));
+  EXPECT_TRUE(
+      notMatches("void f() {int i,j; int k;}", declStmt(declCountIs(3))));
+  EXPECT_TRUE(
+      notMatches("void f() {int i,j, k, l;}", declStmt(declCountIs(3))));
 }
 
-
 TEST(EachOf, TriggersForEachMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "class A { int a; int b; };",
-    recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
-                      has(fieldDecl(hasName("b")).bind("v")))),
-    std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v", 2)));
+      "class A { int a; int b; };",
+      recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
+                        has(fieldDecl(hasName("b")).bind("v")))),
+      std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v", 2)));
 }
 
 TEST(EachOf, BehavesLikeAnyOfUnlessBothMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "class A { int a; int c; };",
-    recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
-                      has(fieldDecl(hasName("b")).bind("v")))),
-    std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v", 1)));
+      "class A { int a; int c; };",
+      recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
+                        has(fieldDecl(hasName("b")).bind("v")))),
+      std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v", 1)));
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "class A { int c; int b; };",
-    recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
-                      has(fieldDecl(hasName("b")).bind("v")))),
-    std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v", 1)));
-  EXPECT_TRUE(notMatches(
-    "class A { int c; int d; };",
-    recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
-                      has(fieldDecl(hasName("b")).bind("v"))))));
+      "class A { int c; int b; };",
+      recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
+                        has(fieldDecl(hasName("b")).bind("v")))),
+      std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v", 1)));
+  EXPECT_TRUE(
+      notMatches("class A { int c; int d; };",
+                 recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
+                                   has(fieldDecl(hasName("b")).bind("v"))))));
 }
 
 TEST(Optionally, SubmatchersDoNotMatch) {
@@ -2056,29 +1981,30 @@ TEST(IsTemplateInstantiation, MatchesImplicitClassTemplateInstantiation) {
   // Make sure that we can both match the class by name (::X) and by the type
   // the template was instantiated with (via a field).
 
-  EXPECT_TRUE(matches(
-    "template <typename T> class X {}; class A {}; X<A> x;",
-    cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
+  EXPECT_TRUE(
+      matches("template <typename T> class X {}; class A {}; X<A> x;",
+              cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 
   EXPECT_TRUE(matches(
-    "template <typename T> class X { T t; }; class A {}; X<A> x;",
-    cxxRecordDecl(isTemplateInstantiation(), hasDescendant(
-      fieldDecl(hasType(recordDecl(hasName("A"))))))));
+      "template <typename T> class X { T t; }; class A {}; X<A> x;",
+      cxxRecordDecl(
+          isTemplateInstantiation(),
+          hasDescendant(fieldDecl(hasType(recordDecl(hasName("A"))))))));
 }
 
 TEST(IsTemplateInstantiation, MatchesImplicitFunctionTemplateInstantiation) {
   EXPECT_TRUE(matches(
-    "template <typename T> void f(T t) {} class A {}; void g() { f(A()); }",
-    functionDecl(hasParameter(0, hasType(recordDecl(hasName("A")))),
-                 isTemplateInstantiation())));
+      "template <typename T> void f(T t) {} class A {}; void g() { f(A()); }",
+      functionDecl(hasParameter(0, hasType(recordDecl(hasName("A")))),
+                   isTemplateInstantiation())));
 }
 
 TEST(IsTemplateInstantiation, MatchesExplicitClassTemplateInstantiation) {
-  EXPECT_TRUE(matches(
-    "template <typename T> class X { T t; }; class A {};"
-      "template class X<A>;",
-    cxxRecordDecl(isTemplateInstantiation(), hasDescendant(
-      fieldDecl(hasType(recordDecl(hasName("A"))))))));
+  EXPECT_TRUE(matches("template <typename T> class X { T t; }; class A {};"
+                      "template class X<A>;",
+                      cxxRecordDecl(isTemplateInstantiation(),
+                                    hasDescendant(fieldDecl(
+                                        hasType(recordDecl(hasName("A"))))))));
 
   // Make sure that we match the instantiation instead of the template
   // definition by checking whether the member function is present.
@@ -2091,21 +2017,21 @@ TEST(IsTemplateInstantiation, MatchesExplicitClassTemplateInstantiation) {
 
 TEST(IsTemplateInstantiation,
      MatchesInstantiationOfPartiallySpecializedClassTemplate) {
-  EXPECT_TRUE(matches(
-    "template <typename T> class X {};"
-      "template <typename T> class X<T*> {}; class A {}; X<A*> x;",
-    cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
+  EXPECT_TRUE(
+      matches("template <typename T> class X {};"
+              "template <typename T> class X<T*> {}; class A {}; X<A*> x;",
+              cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 }
 
 TEST(IsTemplateInstantiation,
      MatchesInstantiationOfClassTemplateNestedInNonTemplate) {
-  EXPECT_TRUE(matches(
-    "class A {};"
-      "class X {"
-      "  template <typename U> class Y { U u; };"
-      "  Y<A> y;"
-      "};",
-    cxxRecordDecl(hasName("::X::Y"), isTemplateInstantiation())));
+  EXPECT_TRUE(
+      matches("class A {};"
+              "class X {"
+              "  template <typename U> class Y { U u; };"
+              "  Y<A> y;"
+              "};",
+              cxxRecordDecl(hasName("::X::Y"), isTemplateInstantiation())));
 }
 
 TEST(IsTemplateInstantiation, DoesNotMatchInstantiationsInsideOfInstantiation) {
@@ -2113,31 +2039,30 @@ TEST(IsTemplateInstantiation, DoesNotMatchInstantiationsInsideOfInstantiation) {
   // normal use case as long as the uppermost instantiation always is marked
   // as template instantiation, but it might be confusing as a predicate.
   EXPECT_TRUE(matches(
-    "class A {};"
+      "class A {};"
       "template <typename T> class X {"
       "  template <typename U> class Y { U u; };"
       "  Y<T> y;"
       "}; X<A> x;",
-    cxxRecordDecl(hasName("::X<A>::Y"), unless(isTemplateInstantiation()))));
+      cxxRecordDecl(hasName("::X<A>::Y"), unless(isTemplateInstantiation()))));
 }
 
 TEST(IsTemplateInstantiation, DoesNotMatchExplicitClassTemplateSpecialization) {
-  EXPECT_TRUE(notMatches(
-    "template <typename T> class X {}; class A {};"
-      "template <> class X<A> {}; X<A> x;",
-    cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
+  EXPECT_TRUE(
+      notMatches("template <typename T> class X {}; class A {};"
+                 "template <> class X<A> {}; X<A> x;",
+                 cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 }
 
 TEST(IsTemplateInstantiation, DoesNotMatchNonTemplate) {
-  EXPECT_TRUE(notMatches(
-    "class A {}; class Y { A a; };",
-    cxxRecordDecl(isTemplateInstantiation())));
+  EXPECT_TRUE(notMatches("class A {}; class Y { A a; };",
+                         cxxRecordDecl(isTemplateInstantiation())));
 }
 
 TEST(IsInstantiated, MatchesInstantiation) {
   EXPECT_TRUE(
-    matches("template<typename T> class A { T i; }; class Y { A<int> a; };",
-            cxxRecordDecl(isInstantiated())));
+      matches("template<typename T> class A { T i; }; class Y { A<int> a; };",
+              cxxRecordDecl(isInstantiated())));
 }
 
 TEST(IsInstantiated, NotMatchesDefinition) {
@@ -2147,7 +2072,7 @@ TEST(IsInstantiated, NotMatchesDefinition) {
 
 TEST(IsInTemplateInstantiation, MatchesInstantiationStmt) {
   EXPECT_TRUE(matches("template<typename T> struct A { A() { T i; } };"
-                        "class Y { A<int> a; }; Y y;",
+                      "class Y { A<int> a; }; Y y;",
                       declStmt(isInTemplateInstantiation())));
 }
 
@@ -2158,8 +2083,8 @@ TEST(IsInTemplateInstantiation, NotMatchesDefinitionStmt) {
 
 TEST(IsInstantiated, MatchesFunctionInstantiation) {
   EXPECT_TRUE(
-    matches("template<typename T> void A(T t) { T i; } void x() { A(0); }",
-            functionDecl(isInstantiated())));
+      matches("template<typename T> void A(T t) { T i; } void x() { A(0); }",
+              functionDecl(isInstantiated())));
 }
 
 TEST(IsInstantiated, NotMatchesFunctionDefinition) {
@@ -2169,8 +2094,8 @@ TEST(IsInstantiated, NotMatchesFunctionDefinition) {
 
 TEST(IsInTemplateInstantiation, MatchesFunctionInstantiationStmt) {
   EXPECT_TRUE(
-    matches("template<typename T> void A(T t) { T i; } void x() { A(0); }",
-            declStmt(isInTemplateInstantiation())));
+      matches("template<typename T> void A(T t) { T i; } void x() { A(0); }",
+              declStmt(isInTemplateInstantiation())));
 }
 
 TEST(IsInTemplateInstantiation, NotMatchesFunctionDefinitionStmt) {
@@ -2183,11 +2108,11 @@ TEST(IsInTemplateInstantiation, Sharing) {
   // FIXME: Node sharing is an implementation detail, exposing it is ugly
   // and makes the matcher behave in non-obvious ways.
   EXPECT_TRUE(notMatches(
-    "int j; template<typename T> void A(T t) { j += 42; } void x() { A(0); }",
-    Matcher));
+      "int j; template<typename T> void A(T t) { j += 42; } void x() { A(0); }",
+      Matcher));
   EXPECT_TRUE(matches(
-    "int j; template<typename T> void A(T t) { j += t; } void x() { A(0); }",
-    Matcher));
+      "int j; template<typename T> void A(T t) { j += t; } void x() { A(0); }",
+      Matcher));
 }
 
 TEST(IsInstantiationDependent, MatchesNonValueTypeDependent) {
@@ -2232,48 +2157,41 @@ TEST(IsValueDependent, MatchesInstantiationDependent) {
       expr(isValueDependent())));
 }
 
-TEST(IsExplicitTemplateSpecialization,
-     DoesNotMatchPrimaryTemplate) {
-  EXPECT_TRUE(notMatches(
-    "template <typename T> class X {};",
-    cxxRecordDecl(isExplicitTemplateSpecialization())));
-  EXPECT_TRUE(notMatches(
-    "template <typename T> void f(T t);",
-    functionDecl(isExplicitTemplateSpecialization())));
+TEST(IsExplicitTemplateSpecialization, DoesNotMatchPrimaryTemplate) {
+  EXPECT_TRUE(notMatches("template <typename T> class X {};",
+                         cxxRecordDecl(isExplicitTemplateSpecialization())));
+  EXPECT_TRUE(notMatches("template <typename T> void f(T t);",
+                         functionDecl(isExplicitTemplateSpecialization())));
 }
 
 TEST(IsExplicitTemplateSpecialization,
      DoesNotMatchExplicitTemplateInstantiations) {
-  EXPECT_TRUE(notMatches(
-    "template <typename T> class X {};"
-      "template class X<int>; extern template class X<long>;",
-    cxxRecordDecl(isExplicitTemplateSpecialization())));
-  EXPECT_TRUE(notMatches(
-    "template <typename T> void f(T t) {}"
-      "template void f(int t); extern template void f(long t);",
-    functionDecl(isExplicitTemplateSpecialization())));
+  EXPECT_TRUE(
+      notMatches("template <typename T> class X {};"
+                 "template class X<int>; extern template class X<long>;",
+                 cxxRecordDecl(isExplicitTemplateSpecialization())));
+  EXPECT_TRUE(
+      notMatches("template <typename T> void f(T t) {}"
+                 "template void f(int t); extern template void f(long t);",
+                 functionDecl(isExplicitTemplateSpecialization())));
 }
 
 TEST(IsExplicitTemplateSpecialization,
      DoesNotMatchImplicitTemplateInstantiations) {
-  EXPECT_TRUE(notMatches(
-    "template <typename T> class X {}; X<int> x;",
-    cxxRecordDecl(isExplicitTemplateSpecialization())));
-  EXPECT_TRUE(notMatches(
-    "template <typename T> void f(T t); void g() { f(10); }",
-    functionDecl(isExplicitTemplateSpecialization())));
+  EXPECT_TRUE(notMatches("template <typename T> class X {}; X<int> x;",
+                         cxxRecordDecl(isExplicitTemplateSpecialization())));
+  EXPECT_TRUE(
+      notMatches("template <typename T> void f(T t); void g() { f(10); }",
+                 functionDecl(isExplicitTemplateSpecialization())));
 }
 
-TEST(IsExplicitTemplateSpecialization,
-     MatchesExplicitTemplateSpecializations) {
-  EXPECT_TRUE(matches(
-    "template <typename T> class X {};"
-      "template<> class X<int> {};",
-    cxxRecordDecl(isExplicitTemplateSpecialization())));
-  EXPECT_TRUE(matches(
-    "template <typename T> void f(T t) {}"
-      "template<> void f(int t) {}",
-    functionDecl(isExplicitTemplateSpecialization())));
+TEST(IsExplicitTemplateSpecialization, MatchesExplicitTemplateSpecializations) {
+  EXPECT_TRUE(matches("template <typename T> class X {};"
+                      "template<> class X<int> {};",
+                      cxxRecordDecl(isExplicitTemplateSpecialization())));
+  EXPECT_TRUE(matches("template <typename T> void f(T t) {}"
+                      "template<> void f(int t) {}",
+                      functionDecl(isExplicitTemplateSpecialization())));
 }
 
 TEST(TypeMatching, MatchesNoReturn) {
@@ -2314,8 +2232,8 @@ TEST(TypeMatching, MatchesNoReturn) {
 
   EXPECT_TRUE(
       matches("struct S { [[noreturn]] S(); };", functionDecl(isNoReturn())));
-  EXPECT_TRUE(matches("struct S { [[noreturn]] S() {} };",
-                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(
+      matches("struct S { [[noreturn]] S() {} };", functionDecl(isNoReturn())));
 
   // ---
 
@@ -2344,14 +2262,12 @@ TEST(TypeMatching, MatchesNoReturn) {
   // ---
 
   EXPECT_TRUE(matchesC("__attribute__((noreturn)) void func();",
-                      functionDecl(isNoReturn())));
+                       functionDecl(isNoReturn())));
   EXPECT_TRUE(matchesC("__attribute__((noreturn)) void func() {}",
-                      functionDecl(isNoReturn())));
+                       functionDecl(isNoReturn())));
 
-  EXPECT_TRUE(matchesC("_Noreturn void func();",
-                      functionDecl(isNoReturn())));
-  EXPECT_TRUE(matchesC("_Noreturn void func() {}",
-                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matchesC("_Noreturn void func();", functionDecl(isNoReturn())));
+  EXPECT_TRUE(matchesC("_Noreturn void func() {}", functionDecl(isNoReturn())));
 }
 
 TEST(TypeMatching, MatchesBool) {
@@ -2383,45 +2299,42 @@ TEST(TypeMatching, MatchesArrayTypes) {
   EXPECT_TRUE(notMatches("struct A {}; A a[7];",
                          arrayType(hasElementType(builtinType()))));
 
+  EXPECT_TRUE(matches("int const a[] = { 2, 3 };",
+                      qualType(arrayType(hasElementType(builtinType())))));
   EXPECT_TRUE(matches(
-    "int const a[] = { 2, 3 };",
-    qualType(arrayType(hasElementType(builtinType())))));
-  EXPECT_TRUE(matches(
-    "int const a[] = { 2, 3 };",
-    qualType(isConstQualified(), arrayType(hasElementType(builtinType())))));
-  EXPECT_TRUE(matches(
-    "typedef const int T; T x[] = { 1, 2 };",
-    qualType(isConstQualified(), arrayType())));
+      "int const a[] = { 2, 3 };",
+      qualType(isConstQualified(), arrayType(hasElementType(builtinType())))));
+  EXPECT_TRUE(matches("typedef const int T; T x[] = { 1, 2 };",
+                      qualType(isConstQualified(), arrayType())));
 
   EXPECT_TRUE(notMatches(
-    "int a[] = { 2, 3 };",
-    qualType(isConstQualified(), arrayType(hasElementType(builtinType())))));
-  EXPECT_TRUE(notMatches(
-    "int a[] = { 2, 3 };",
-    qualType(arrayType(hasElementType(isConstQualified(), builtinType())))));
+      "int a[] = { 2, 3 };",
+      qualType(isConstQualified(), arrayType(hasElementType(builtinType())))));
   EXPECT_TRUE(notMatches(
-    "int const a[] = { 2, 3 };",
-    qualType(arrayType(hasElementType(builtinType())),
-             unless(isConstQualified()))));
+      "int a[] = { 2, 3 };",
+      qualType(arrayType(hasElementType(isConstQualified(), builtinType())))));
+  EXPECT_TRUE(notMatches("int const a[] = { 2, 3 };",
+                         qualType(arrayType(hasElementType(builtinType())),
+                                  unless(isConstQualified()))));
 
-  EXPECT_TRUE(matches("int a[2];",
-                      constantArrayType(hasElementType(builtinType()))));
+  EXPECT_TRUE(
+      matches("int a[2];", constantArrayType(hasElementType(builtinType()))));
   EXPECT_TRUE(matches("const int a = 0;", qualType(isInteger())));
 }
 
 TEST(TypeMatching, DecayedType) {
-  EXPECT_TRUE(matches("void f(int i[]);", valueDecl(hasType(decayedType(hasDecayedType(pointerType()))))));
+  EXPECT_TRUE(
+      matches("void f(int i[]);",
+              valueDecl(hasType(decayedType(hasDecayedType(pointerType()))))));
   EXPECT_TRUE(notMatches("int i[7];", decayedType()));
 }
 
 TEST(TypeMatching, MatchesComplexTypes) {
   EXPECT_TRUE(matches("_Complex float f;", complexType()));
-  EXPECT_TRUE(matches(
-    "_Complex float f;",
-    complexType(hasElementType(builtinType()))));
-  EXPECT_TRUE(notMatches(
-    "_Complex float f;",
-    complexType(hasElementType(isInteger()))));
+  EXPECT_TRUE(
+      matches("_Complex float f;", complexType(hasElementType(builtinType()))));
+  EXPECT_TRUE(notMatches("_Complex float f;",
+                         complexType(hasElementType(isInteger()))));
 }
 
 TEST(NS, Anonymous) {
@@ -2482,38 +2395,38 @@ TEST(DeclarationMatcher, InStdNamespace) {
 
 TEST(EqualsBoundNodeMatcher, QualType) {
   EXPECT_TRUE(matches(
-    "int i = 1;", varDecl(hasType(qualType().bind("type")),
-                          hasInitializer(ignoringParenImpCasts(
-                            hasType(qualType(equalsBoundNode("type"))))))));
+      "int i = 1;", varDecl(hasType(qualType().bind("type")),
+                            hasInitializer(ignoringParenImpCasts(
+                                hasType(qualType(equalsBoundNode("type"))))))));
   EXPECT_TRUE(notMatches("int i = 1.f;",
                          varDecl(hasType(qualType().bind("type")),
                                  hasInitializer(ignoringParenImpCasts(hasType(
-                                   qualType(equalsBoundNode("type"))))))));
+                                     qualType(equalsBoundNode("type"))))))));
 }
 
 TEST(EqualsBoundNodeMatcher, NonMatchingTypes) {
   EXPECT_TRUE(notMatches(
-    "int i = 1;", varDecl(namedDecl(hasName("i")).bind("name"),
-                          hasInitializer(ignoringParenImpCasts(
-                            hasType(qualType(equalsBoundNode("type"))))))));
+      "int i = 1;", varDecl(namedDecl(hasName("i")).bind("name"),
+                            hasInitializer(ignoringParenImpCasts(
+                                hasType(qualType(equalsBoundNode("type"))))))));
 }
 
 TEST(EqualsBoundNodeMatcher, Stmt) {
   EXPECT_TRUE(
-    matches("void f() { if(true) {} }",
-            stmt(allOf(ifStmt().bind("if"),
-                       hasParent(stmt(has(stmt(equalsBoundNode("if")))))))));
+      matches("void f() { if(true) {} }",
+              stmt(allOf(ifStmt().bind("if"),
+                         hasParent(stmt(has(stmt(equalsBoundNode("if")))))))));
 
   EXPECT_TRUE(notMatches(
-    "void f() { if(true) { if (true) {} } }",
-    stmt(allOf(ifStmt().bind("if"), has(stmt(equalsBoundNode("if")))))));
+      "void f() { if(true) { if (true) {} } }",
+      stmt(allOf(ifStmt().bind("if"), has(stmt(equalsBoundNode("if")))))));
 }
 
 TEST(EqualsBoundNodeMatcher, Decl) {
   EXPECT_TRUE(matches(
-    "class X { class Y {}; };",
-    decl(allOf(recordDecl(hasName("::X::Y")).bind("record"),
-               hasParent(decl(has(decl(equalsBoundNode("record")))))))));
+      "class X { class Y {}; };",
+      decl(allOf(recordDecl(hasName("::X::Y")).bind("record"),
+                 hasParent(decl(has(decl(equalsBoundNode("record")))))))));
 
   EXPECT_TRUE(notMatches("class X { class Y {}; };",
                          decl(allOf(recordDecl(hasName("::X")).bind("record"),
@@ -2522,21 +2435,21 @@ TEST(EqualsBoundNodeMatcher, Decl) {
 
 TEST(EqualsBoundNodeMatcher, Type) {
   EXPECT_TRUE(matches(
-    "class X { int a; int b; };",
-    recordDecl(
-      has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
-      has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))));
+      "class X { int a; int b; };",
+      recordDecl(
+          has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
+          has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))));
 
   EXPECT_TRUE(notMatches(
-    "class X { int a; double b; };",
-    recordDecl(
-      has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
-      has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))));
+      "class X { int a; double b; };",
+      recordDecl(
+          has(fieldDecl(hasName("a"), hasType(type().bind("t")))),
+          has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))));
 }
 
 TEST(EqualsBoundNodeMatcher, UsingForEachDescendant) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "int f() {"
+      "int f() {"
       "  if (1) {"
       "    int i = 9;"
       "  }"
@@ -2546,63 +2459,65 @@ TEST(EqualsBoundNodeMatcher, UsingForEachDescendant) {
       "  }"
       "  return 0;"
       "}",
-    // Look for variable declarations within functions whose type is the same
-    // as the function return type.
-    functionDecl(returns(qualType().bind("type")),
-                 forEachDescendant(varDecl(hasType(
-                   qualType(equalsBoundNode("type")))).bind("decl"))),
-    // Only i and j should match, not k.
-    std::make_unique<VerifyIdIsBoundTo<VarDecl>>("decl", 2)));
+      // Look for variable declarations within functions whose type is the same
+      // as the function return type.
+      functionDecl(
+          returns(qualType().bind("type")),
+          forEachDescendant(varDecl(hasType(qualType(equalsBoundNode("type"))))
+                                .bind("decl"))),
+      // Only i and j should match, not k.
+      std::make_unique<VerifyIdIsBoundTo<VarDecl>>("decl", 2)));
 }
 
 TEST(EqualsBoundNodeMatcher, FiltersMatchedCombinations) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "void f() {"
+      "void f() {"
       "  int x;"
       "  double d;"
       "  x = d + x - d + x;"
       "}",
-    functionDecl(
-      hasName("f"), forEachDescendant(varDecl().bind("d")),
-      forEachDescendant(declRefExpr(to(decl(equalsBoundNode("d")))))),
-    std::make_unique<VerifyIdIsBoundTo<VarDecl>>("d", 5)));
+      functionDecl(
+          hasName("f"), forEachDescendant(varDecl().bind("d")),
+          forEachDescendant(declRefExpr(to(decl(equalsBoundNode("d")))))),
+      std::make_unique<VerifyIdIsBoundTo<VarDecl>>("d", 5)));
 }
 
 TEST(EqualsBoundNodeMatcher, UnlessDescendantsOfAncestorsMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-    "struct StringRef { int size() const; const char* data() const; };"
+      "struct StringRef { int size() const; const char* data() const; };"
       "void f(StringRef v) {"
       "  v.data();"
       "}",
-    cxxMemberCallExpr(
-      callee(cxxMethodDecl(hasName("data"))),
-      on(declRefExpr(to(
-        varDecl(hasType(recordDecl(hasName("StringRef")))).bind("var")))),
-      unless(hasAncestor(stmt(hasDescendant(cxxMemberCallExpr(
-        callee(cxxMethodDecl(anyOf(hasName("size"), hasName("length")))),
-        on(declRefExpr(to(varDecl(equalsBoundNode("var")))))))))))
-      .bind("data"),
-    std::make_unique<VerifyIdIsBoundTo<Expr>>("data", 1)));
+      cxxMemberCallExpr(
+          callee(cxxMethodDecl(hasName("data"))),
+          on(declRefExpr(to(
+              varDecl(hasType(recordDecl(hasName("StringRef")))).bind("var")))),
+          unless(hasAncestor(stmt(hasDescendant(cxxMemberCallExpr(
+              callee(cxxMethodDecl(anyOf(hasName("size"), hasName("length")))),
+              on(declRefExpr(to(varDecl(equalsBoundNode("var")))))))))))
+          .bind("data"),
+      std::make_unique<VerifyIdIsBoundTo<Expr>>("data", 1)));
 
   EXPECT_FALSE(matches(
-    "struct StringRef { int size() const; const char* data() const; };"
+      "struct StringRef { int size() const; const char* data() const; };"
       "void f(StringRef v) {"
       "  v.data();"
       "  v.size();"
       "}",
-    cxxMemberCallExpr(
-      callee(cxxMethodDecl(hasName("data"))),
-      on(declRefExpr(to(
-        varDecl(hasType(recordDecl(hasName("StringRef")))).bind("var")))),
-      unless(hasAncestor(stmt(hasDescendant(cxxMemberCallExpr(
-        callee(cxxMethodDecl(anyOf(hasName("size"), hasName("length")))),
-        on(declRefExpr(to(varDecl(equalsBoundNode("var")))))))))))
-      .bind("data")));
+      cxxMemberCallExpr(
+          callee(cxxMethodDecl(hasName("data"))),
+          on(declRefExpr(to(
+              varDecl(hasType(recordDecl(hasName("StringRef")))).bind("var")))),
+          unless(hasAncestor(stmt(hasDescendant(cxxMemberCallExpr(
+              callee(cxxMethodDecl(anyOf(hasName("size"), hasName("length")))),
+              on(declRefExpr(to(varDecl(equalsBoundNode("var")))))))))))
+          .bind("data")));
 }
 
 TEST(NullPointerConstants, Basic) {
   EXPECT_TRUE(matches("#define NULL ((void *)0)\n"
-                        "void *v1 = NULL;", expr(nullPointerConstant())));
+                      "void *v1 = NULL;",
+                      expr(nullPointerConstant())));
   EXPECT_TRUE(matches("void *v2 = nullptr;", expr(nullPointerConstant())));
   EXPECT_TRUE(matches("void *v3 = __null;", expr(nullPointerConstant())));
   EXPECT_TRUE(matches("char *cp = (char *)0;", expr(nullPointerConstant())));
@@ -2635,10 +2550,10 @@ TEST(HasExternalFormalLinkage, Basic) {
 }
 
 TEST(HasDefaultArgument, Basic) {
-  EXPECT_TRUE(matches("void x(int val = 0) {}",
-                      parmVarDecl(hasDefaultArgument())));
-  EXPECT_TRUE(notMatches("void x(int val) {}",
-                      parmVarDecl(hasDefaultArgument())));
+  EXPECT_TRUE(
+      matches("void x(int val = 0) {}", parmVarDecl(hasDefaultArgument())));
+  EXPECT_TRUE(
+      notMatches("void x(int val) {}", parmVarDecl(hasDefaultArgument())));
 }
 
 TEST(IsAtPosition, Basic) {
@@ -2691,24 +2606,18 @@ TEST(HasArraySize, Basic) {
 }
 
 TEST(HasDefinition, MatchesStructDefinition) {
-  EXPECT_TRUE(matches("struct x {};",
-                      cxxRecordDecl(hasDefinition())));
-  EXPECT_TRUE(notMatches("struct x;",
-                      cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(matches("struct x {};", cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(notMatches("struct x;", cxxRecordDecl(hasDefinition())));
 }
 
 TEST(HasDefinition, MatchesClassDefinition) {
-  EXPECT_TRUE(matches("class x {};",
-                      cxxRecordDecl(hasDefinition())));
-  EXPECT_TRUE(notMatches("class x;",
-                      cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(matches("class x {};", cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(notMatches("class x;", cxxRecordDecl(hasDefinition())));
 }
 
 TEST(HasDefinition, MatchesUnionDefinition) {
-  EXPECT_TRUE(matches("union x {};",
-                      cxxRecordDecl(hasDefinition())));
-  EXPECT_TRUE(notMatches("union x;",
-                      cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(matches("union x {};", cxxRecordDecl(hasDefinition())));
+  EXPECT_TRUE(notMatches("union x;", cxxRecordDecl(hasDefinition())));
 }
 
 TEST(IsScopedEnum, MatchesScopedEnum) {
@@ -2727,19 +2636,19 @@ TEST(HasTrailingReturn, MatchesTrailingReturn) {
   EXPECT_TRUE(matches("auto Y() -> int { return 0; }",
                       functionDecl(hasTrailingReturn())));
   EXPECT_TRUE(matches("auto X() -> int;", functionDecl(hasTrailingReturn())));
-  EXPECT_TRUE(notMatches("int X() { return 0; }",
-                      functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(
+      notMatches("int X() { return 0; }", functionDecl(hasTrailingReturn())));
   EXPECT_TRUE(notMatches("int X();", functionDecl(hasTrailingReturn())));
   EXPECT_TRUE(notMatchesC("void X();", functionDecl(hasTrailingReturn())));
 }
 
 TEST(HasTrailingReturn, MatchesLambdaTrailingReturn) {
   EXPECT_TRUE(matches(
-          "auto lambda2 = [](double x, double y) -> double {return x + y;};",
-          functionDecl(hasTrailingReturn())));
-  EXPECT_TRUE(notMatches(
-          "auto lambda2 = [](double x, double y) {return x + y;};",
-          functionDecl(hasTrailingReturn())));
+      "auto lambda2 = [](double x, double y) -> double {return x + y;};",
+      functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(
+      notMatches("auto lambda2 = [](double x, double y) {return x + y;};",
+                 functionDecl(hasTrailingReturn())));
 }
 
 TEST(IsAssignmentOperator, Basic) {
@@ -2772,23 +2681,15 @@ TEST(IsComparisonOperator, Basic) {
 }
 
 TEST(HasInit, Basic) {
-  EXPECT_TRUE(
-    matches("int x{0};",
-            initListExpr(hasInit(0, expr()))));
-  EXPECT_FALSE(
-    matches("int x{0};",
-            initListExpr(hasInit(1, expr()))));
-  EXPECT_FALSE(
-    matches("int x;",
-            initListExpr(hasInit(0, expr()))));
+  EXPECT_TRUE(matches("int x{0};", initListExpr(hasInit(0, expr()))));
+  EXPECT_FALSE(matches("int x{0};", initListExpr(hasInit(1, expr()))));
+  EXPECT_FALSE(matches("int x;", initListExpr(hasInit(0, expr()))));
 }
 
 TEST(Matcher, isMain) {
-  EXPECT_TRUE(
-    matches("int main() {}", functionDecl(isMain())));
+  EXPECT_TRUE(matches("int main() {}", functionDecl(isMain())));
 
-  EXPECT_TRUE(
-    notMatches("int main2() {}", functionDecl(isMain())));
+  EXPECT_TRUE(notMatches("int main2() {}", functionDecl(isMain())));
 }
 
 TEST(OMPExecutableDirective, isStandaloneDirective) {
@@ -2867,11 +2768,18 @@ void x() {
   EXPECT_TRUE(matchesWithOpenMP(Source3, Matcher));
 
   StringRef Source4 = R"(
+void x() {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(matchesWithOpenMP51(Source4, Matcher));
+
+  StringRef Source5 = R"(
 void x(int x) {
 #pragma omp parallel num_threads(x)
 ;
 })";
-  EXPECT_TRUE(matchesWithOpenMP(Source4, Matcher));
+  EXPECT_TRUE(matchesWithOpenMP(Source5, Matcher));
 }
 
 TEST(OMPDefaultClause, isNoneKind) {
@@ -2907,10 +2815,17 @@ void x() {
 
   StringRef Source4 = R"(
 void x(int x) {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP51(Source4, Matcher));
+
+  const std::string Source5 = R"(
+void x(int x) {
 #pragma omp parallel num_threads(x)
 ;
 })";
-  EXPECT_TRUE(notMatchesWithOpenMP(Source4, Matcher));
+  EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
 }
 
 TEST(OMPDefaultClause, isSharedKind) {
@@ -2946,10 +2861,63 @@ void x() {
 
   StringRef Source4 = R"(
 void x(int x) {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP51(Source4, Matcher));
+
+  const std::string Source5 = R"(
+void x(int x) {
 #pragma omp parallel num_threads(x)
 ;
 })";
-  EXPECT_TRUE(notMatchesWithOpenMP(Source4, Matcher));
+  EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
+}
+
+TEST(OMPDefaultClause, isFirstPrivateKind) {
+  auto Matcher = ompExecutableDirective(
+      hasAnyClause(ompDefaultClause(isFirstPrivateKind())));
+
+  const std::string Source0 = R"(
+void x() {
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP(Source0, Matcher));
+
+  const std::string Source1 = R"(
+void x() {
+#pragma omp parallel
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP(Source1, Matcher));
+
+  const std::string Source2 = R"(
+void x() {
+#pragma omp parallel default(shared)
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP(Source2, Matcher));
+
+  const std::string Source3 = R"(
+void x() {
+#pragma omp parallel default(none)
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP(Source3, Matcher));
+
+  const std::string Source4 = R"(
+void x(int x) {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(matchesWithOpenMP51(Source4, Matcher));
+
+  const std::string Source5 = R"(
+void x(int x) {
+#pragma omp parallel num_threads(x)
+;
+})";
+  EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
 }
 
 TEST(OMPExecutableDirective, isAllowedToContainClauseKind) {
@@ -2984,24 +2952,31 @@ void x() {
   EXPECT_TRUE(matchesWithOpenMP(Source3, Matcher));
 
   StringRef Source4 = R"(
+void x() {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(matchesWithOpenMP51(Source4, Matcher));
+
+  StringRef Source5 = R"(
 void x(int x) {
 #pragma omp parallel num_threads(x)
 ;
 })";
-  EXPECT_TRUE(matchesWithOpenMP(Source4, Matcher));
+  EXPECT_TRUE(matchesWithOpenMP(Source5, Matcher));
 
-  StringRef Source5 = R"(
+  StringRef Source6 = R"(
 void x() {
 #pragma omp taskyield
 })";
-  EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
+  EXPECT_TRUE(notMatchesWithOpenMP(Source6, Matcher));
 
-  StringRef Source6 = R"(
+  StringRef Source7 = R"(
 void x() {
 #pragma omp task
 ;
 })";
-  EXPECT_TRUE(matchesWithOpenMP(Source6, Matcher));
+  EXPECT_TRUE(matchesWithOpenMP(Source7, Matcher));
 }
 
 TEST(HasAnyBase, DirectBase) {
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
index 59e0f74b3910b..895c8ae48adc1 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
@@ -118,13 +118,13 @@ TEST_P(ASTMatchersTest, TranslationUnitDecl) {
                    "int MyVar2;\n"
                    "}  // namespace NameSpace\n";
   EXPECT_TRUE(matches(
-    Code, varDecl(hasName("MyVar1"), hasDeclContext(translationUnitDecl()))));
+      Code, varDecl(hasName("MyVar1"), hasDeclContext(translationUnitDecl()))));
   EXPECT_FALSE(matches(
-    Code, varDecl(hasName("MyVar2"), hasDeclContext(translationUnitDecl()))));
+      Code, varDecl(hasName("MyVar2"), hasDeclContext(translationUnitDecl()))));
   EXPECT_TRUE(matches(
-    Code,
-    varDecl(hasName("MyVar2"),
-            hasDeclContext(decl(hasDeclContext(translationUnitDecl()))))));
+      Code,
+      varDecl(hasName("MyVar2"),
+              hasDeclContext(decl(hasDeclContext(translationUnitDecl()))))));
 }
 
 TEST_P(ASTMatchersTest, LinkageSpecDecl) {
@@ -158,10 +158,10 @@ TEST_P(ASTMatchersTest,
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(notMatches("template<typename T> class X { };"
-                           "template<> class X<int> { int a; };",
-                         classTemplateDecl(hasName("X"),
-                                           hasDescendant(fieldDecl(hasName("a"))))));
+  EXPECT_TRUE(notMatches(
+      "template<typename T> class X { };"
+      "template<> class X<int> { int a; };",
+      classTemplateDecl(hasName("X"), hasDescendant(fieldDecl(hasName("a"))))));
 }
 
 TEST_P(ASTMatchersTest,
@@ -169,18 +169,17 @@ TEST_P(ASTMatchersTest,
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(notMatches("template<typename T, typename U> class X { };"
-                           "template<typename T> class X<T, int> { int a; };",
-                         classTemplateDecl(hasName("X"),
-                                           hasDescendant(fieldDecl(hasName("a"))))));
+  EXPECT_TRUE(notMatches(
+      "template<typename T, typename U> class X { };"
+      "template<typename T> class X<T, int> { int a; };",
+      classTemplateDecl(hasName("X"), hasDescendant(fieldDecl(hasName("a"))))));
 }
 
 TEST(ASTMatchersTestCUDA, CUDAKernelCallExpr) {
   EXPECT_TRUE(matchesWithCuda("__global__ void f() { }"
-                                "void g() { f<<<1, 2>>>(); }",
+                              "void g() { f<<<1, 2>>>(); }",
                               cudaKernelCallExpr()));
-  EXPECT_TRUE(notMatchesWithCuda("void f() {}",
-                                 cudaKernelCallExpr()));
+  EXPECT_TRUE(notMatchesWithCuda("void f() {}", cudaKernelCallExpr()));
 }
 
 TEST(ASTMatchersTestCUDA, HasAttrCUDA) {
@@ -316,56 +315,50 @@ TEST_P(ASTMatchersTest, CallExpr_CXX) {
   // FIXME: Do we want to overload Call() to directly take
   // Matcher<Decl>, too?
   StatementMatcher MethodX =
-    callExpr(hasDeclaration(cxxMethodDecl(hasName("x"))));
+      callExpr(hasDeclaration(cxxMethodDecl(hasName("x"))));
 
   EXPECT_TRUE(matches("class Y { void x() { x(); } };", MethodX));
   EXPECT_TRUE(notMatches("class Y { void x() {} };", MethodX));
 
   StatementMatcher MethodOnY =
-    cxxMemberCallExpr(on(hasType(recordDecl(hasName("Y")))));
+      cxxMemberCallExpr(on(hasType(recordDecl(hasName("Y")))));
 
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    notMatches("class Y { public: void x(); }; void z(Y *&y) { y->x(); }",
-               MethodOnY));
-  EXPECT_TRUE(
-    notMatches("class Y { public: void x(); }; void z(Y y[]) { y->x(); }",
-               MethodOnY));
-  EXPECT_TRUE(
-    notMatches("class Y { public: void x(); }; void z() { Y *y; y->x(); }",
-               MethodOnY));
+  EXPECT_TRUE(matches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
+                      MethodOnY));
+  EXPECT_TRUE(matches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
+                      MethodOnY));
+  EXPECT_TRUE(notMatches(
+      "class Y { public: void x(); }; void z(Y *&y) { y->x(); }", MethodOnY));
+  EXPECT_TRUE(notMatches(
+      "class Y { public: void x(); }; void z(Y y[]) { y->x(); }", MethodOnY));
+  EXPECT_TRUE(notMatches(
+      "class Y { public: void x(); }; void z() { Y *y; y->x(); }", MethodOnY));
 
   StatementMatcher MethodOnYPointer =
-    cxxMemberCallExpr(on(hasType(pointsTo(recordDecl(hasName("Y"))))));
+      cxxMemberCallExpr(on(hasType(pointsTo(recordDecl(hasName("Y"))))));
 
   EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z() { Y *y; y->x(); }",
-            MethodOnYPointer));
+      matches("class Y { public: void x(); }; void z() { Y *y; y->x(); }",
+              MethodOnYPointer));
   EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y *&y) { y->x(); }",
-            MethodOnYPointer));
+      matches("class Y { public: void x(); }; void z(Y *&y) { y->x(); }",
+              MethodOnYPointer));
   EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y y[]) { y->x(); }",
-            MethodOnYPointer));
+      matches("class Y { public: void x(); }; void z(Y y[]) { y->x(); }",
+              MethodOnYPointer));
   EXPECT_TRUE(
-    notMatches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
-               MethodOnYPointer));
+      notMatches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
+                 MethodOnYPointer));
   EXPECT_TRUE(
-    notMatches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
-               MethodOnYPointer));
+      notMatches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
+                 MethodOnYPointer));
 }
 
 TEST_P(ASTMatchersTest, LambdaExpr) {
   if (!GetParam().isCXX11OrLater()) {
     return;
   }
-  EXPECT_TRUE(matches("auto f = [] (int i) { return i; };",
-                      lambdaExpr()));
+  EXPECT_TRUE(matches("auto f = [] (int i) { return i; };", lambdaExpr()));
 }
 
 TEST_P(ASTMatchersTest, CXXForRangeStmt) {
@@ -378,7 +371,7 @@ TEST_P(ASTMatchersTest, CXXForRangeStmt_CXX11) {
     return;
   }
   EXPECT_TRUE(matches("int as[] = { 1, 2, 3 };"
-                        "void f() { for (auto &a : as); }",
+                      "void f() { for (auto &a : as); }",
                       cxxForRangeStmt()));
 }
 
@@ -387,15 +380,13 @@ TEST_P(ASTMatchersTest, SubstNonTypeTemplateParmExpr) {
     return;
   }
   EXPECT_FALSE(matches("template<int N>\n"
-                         "struct A {  static const int n = 0; };\n"
-                         "struct B : public A<42> {};",
-                         traverse(TK_AsIs,
-                       substNonTypeTemplateParmExpr())));
+                       "struct A {  static const int n = 0; };\n"
+                       "struct B : public A<42> {};",
+                       traverse(TK_AsIs, substNonTypeTemplateParmExpr())));
   EXPECT_TRUE(matches("template<int N>\n"
-                        "struct A {  static const int n = N; };\n"
-                        "struct B : public A<42> {};",
-                         traverse(TK_AsIs,
-                      substNonTypeTemplateParmExpr())));
+                      "struct A {  static const int n = N; };\n"
+                      "struct B : public A<42> {};",
+                      traverse(TK_AsIs, substNonTypeTemplateParmExpr())));
 }
 
 TEST_P(ASTMatchersTest, NonTypeTemplateParmDecl) {
@@ -405,7 +396,7 @@ TEST_P(ASTMatchersTest, NonTypeTemplateParmDecl) {
   EXPECT_TRUE(matches("template <int N> void f();",
                       nonTypeTemplateParmDecl(hasName("N"))));
   EXPECT_TRUE(
-    notMatches("template <typename T> void f();", nonTypeTemplateParmDecl()));
+      notMatches("template <typename T> void f();", nonTypeTemplateParmDecl()));
 }
 
 TEST_P(ASTMatchersTest, TemplateTypeParmDecl) {
@@ -414,8 +405,7 @@ TEST_P(ASTMatchersTest, TemplateTypeParmDecl) {
   }
   EXPECT_TRUE(matches("template <typename T> void f();",
                       templateTypeParmDecl(hasName("T"))));
-  EXPECT_TRUE(
-    notMatches("template <int N> void f();", templateTypeParmDecl()));
+  EXPECT_TRUE(notMatches("template <int N> void f();", templateTypeParmDecl()));
 }
 
 TEST_P(ASTMatchersTest, UserDefinedLiteral) {
@@ -423,9 +413,9 @@ TEST_P(ASTMatchersTest, UserDefinedLiteral) {
     return;
   }
   EXPECT_TRUE(matches("constexpr char operator \"\" _inc (const char i) {"
-                        "  return i + 1;"
-                        "}"
-                        "char c = 'a'_inc;",
+                      "  return i + 1;"
+                      "}"
+                      "char c = 'a'_inc;",
                       userDefinedLiteral()));
 }
 
@@ -434,9 +424,7 @@ TEST_P(ASTMatchersTest, FlowControl) {
   EXPECT_TRUE(matches("void f() { while(1) { continue; } }", continueStmt()));
   EXPECT_TRUE(matches("void f() { goto FOO; FOO: ;}", gotoStmt()));
   EXPECT_TRUE(matches("void f() { goto FOO; FOO: ;}",
-                      labelStmt(
-                        hasDeclaration(
-                          labelDecl(hasName("FOO"))))));
+                      labelStmt(hasDeclaration(labelDecl(hasName("FOO"))))));
   EXPECT_TRUE(matches("void f() { FOO: ; void *ptr = &&FOO; goto *ptr; }",
                       addrLabelExpr()));
   EXPECT_TRUE(matches("void f() { return; }", returnStmt()));
@@ -450,8 +438,9 @@ TEST_P(ASTMatchersTest, CXXOperatorCallExpr) {
   StatementMatcher OpCall = cxxOperatorCallExpr();
   // Unary operator
   EXPECT_TRUE(matches("class Y { }; "
-                        "bool operator!(Y x) { return false; }; "
-                        "Y y; bool c = !y;", OpCall));
+                      "bool operator!(Y x) { return false; }; "
+                      "Y y; bool c = !y;",
+                      OpCall));
   // No match -- special operators like "new", "delete"
   // FIXME: operator new takes size_t, for which we need stddef.h, for which
   // we need to figure out include paths in the test.
@@ -460,12 +449,13 @@ TEST_P(ASTMatchersTest, CXXOperatorCallExpr) {
   //             "void *operator new(size_t size) { return 0; } "
   //             "Y *y = new Y;", OpCall));
   EXPECT_TRUE(notMatches("class Y { }; "
-                           "void operator delete(void *p) { } "
-                           "void a() {Y *y = new Y; delete y;}", OpCall));
+                         "void operator delete(void *p) { } "
+                         "void a() {Y *y = new Y; delete y;}",
+                         OpCall));
   // Binary operator
   EXPECT_TRUE(matches("class Y { }; "
-                        "bool operator&&(Y x, Y y) { return true; }; "
-                        "Y a; Y b; bool c = a && b;",
+                      "bool operator&&(Y x, Y y) { return true; }; "
+                      "Y a; Y b; bool c = a && b;",
                       OpCall));
   // No match -- normal operator, not an overloaded one.
   EXPECT_TRUE(notMatches("bool x = true, y = true; bool t = x && y;", OpCall));
@@ -481,30 +471,25 @@ TEST_P(ASTMatchersTest, ThisPointerType) {
       traverse(ast_type_traits::TK_AsIs,
                cxxMemberCallExpr(thisPointerType(recordDecl(hasName("Y")))));
 
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y *&y) { y->x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z(Y y[]) { y->x(); }",
-            MethodOnY));
-  EXPECT_TRUE(
-    matches("class Y { public: void x(); }; void z() { Y *y; y->x(); }",
-            MethodOnY));
-
+  EXPECT_TRUE(matches("class Y { public: void x(); }; void z() { Y y; y.x(); }",
+                      MethodOnY));
+  EXPECT_TRUE(matches("class Y { public: void x(); }; void z(Y &y) { y.x(); }",
+                      MethodOnY));
   EXPECT_TRUE(matches(
-    "class Y {"
-      "  public: virtual void x();"
-      "};"
-      "class X : public Y {"
-      "  public: virtual void x();"
-      "};"
-      "void z() { X *x; x->Y::x(); }", MethodOnY));
+      "class Y { public: void x(); }; void z(Y *&y) { y->x(); }", MethodOnY));
+  EXPECT_TRUE(matches(
+      "class Y { public: void x(); }; void z(Y y[]) { y->x(); }", MethodOnY));
+  EXPECT_TRUE(matches(
+      "class Y { public: void x(); }; void z() { Y *y; y->x(); }", MethodOnY));
+
+  EXPECT_TRUE(matches("class Y {"
+                      "  public: virtual void x();"
+                      "};"
+                      "class X : public Y {"
+                      "  public: virtual void x();"
+                      "};"
+                      "void z() { X *x; x->Y::x(); }",
+                      MethodOnY));
 }
 
 TEST_P(ASTMatchersTest, DeclRefExpr) {
@@ -512,29 +497,27 @@ TEST_P(ASTMatchersTest, DeclRefExpr) {
     // FIXME: Add a test for `declRefExpr()` that does not depend on C++.
     return;
   }
-  StatementMatcher Reference =
-    declRefExpr(to(
-      varDecl(hasInitializer(
-        cxxMemberCallExpr(thisPointerType(recordDecl(hasName("Y"))))))));
+  StatementMatcher Reference = declRefExpr(to(varDecl(hasInitializer(
+      cxxMemberCallExpr(thisPointerType(recordDecl(hasName("Y"))))))));
 
-  EXPECT_TRUE(matches(
-    "class Y {"
-      " public:"
-      "  bool x() const;"
-      "};"
-      "void z(const Y &y) {"
-      "  bool b = y.x();"
-      "  if (b) {}"
-      "}", Reference));
+  EXPECT_TRUE(matches("class Y {"
+                      " public:"
+                      "  bool x() const;"
+                      "};"
+                      "void z(const Y &y) {"
+                      "  bool b = y.x();"
+                      "  if (b) {}"
+                      "}",
+                      Reference));
 
-  EXPECT_TRUE(notMatches(
-    "class Y {"
-      " public:"
-      "  bool x() const;"
-      "};"
-      "void z(const Y &y) {"
-      "  bool b = y.x();"
-      "}", Reference));
+  EXPECT_TRUE(notMatches("class Y {"
+                         " public:"
+                         "  bool x() const;"
+                         "};"
+                         "void z(const Y &y) {"
+                         "  bool b = y.x();"
+                         "}",
+                         Reference));
 }
 
 TEST_P(ASTMatchersTest, CXXMemberCallExpr) {
@@ -542,32 +525,32 @@ TEST_P(ASTMatchersTest, CXXMemberCallExpr) {
     return;
   }
   StatementMatcher CallOnVariableY =
-    cxxMemberCallExpr(on(declRefExpr(to(varDecl(hasName("y"))))));
-
-  EXPECT_TRUE(matches(
-    "class Y { public: void x() { Y y; y.x(); } };", CallOnVariableY));
-  EXPECT_TRUE(matches(
-    "class Y { public: void x() const { Y y; y.x(); } };", CallOnVariableY));
-  EXPECT_TRUE(matches(
-    "class Y { public: void x(); };"
-      "class X : public Y { void z() { X y; y.x(); } };", CallOnVariableY));
-  EXPECT_TRUE(matches(
-    "class Y { public: void x(); };"
-      "class X : public Y { void z() { X *y; y->x(); } };", CallOnVariableY));
+      cxxMemberCallExpr(on(declRefExpr(to(varDecl(hasName("y"))))));
+
+  EXPECT_TRUE(matches("class Y { public: void x() { Y y; y.x(); } };",
+                      CallOnVariableY));
+  EXPECT_TRUE(matches("class Y { public: void x() const { Y y; y.x(); } };",
+                      CallOnVariableY));
+  EXPECT_TRUE(matches("class Y { public: void x(); };"
+                      "class X : public Y { void z() { X y; y.x(); } };",
+                      CallOnVariableY));
+  EXPECT_TRUE(matches("class Y { public: void x(); };"
+                      "class X : public Y { void z() { X *y; y->x(); } };",
+                      CallOnVariableY));
   EXPECT_TRUE(notMatches(
-    "class Y { public: void x(); };"
+      "class Y { public: void x(); };"
       "class X : public Y { void z() { unsigned long y; ((X*)y)->x(); } };",
-    CallOnVariableY));
+      CallOnVariableY));
 }
 
 TEST_P(ASTMatchersTest, UnaryExprOrTypeTraitExpr) {
-  EXPECT_TRUE(matches("void x() { int a = sizeof(a); }",
-                      unaryExprOrTypeTraitExpr()));
+  EXPECT_TRUE(
+      matches("void x() { int a = sizeof(a); }", unaryExprOrTypeTraitExpr()));
 }
 
 TEST_P(ASTMatchersTest, AlignOfExpr) {
-  EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }",
-                         alignOfExpr(anything())));
+  EXPECT_TRUE(
+      notMatches("void x() { int a = sizeof(a); }", alignOfExpr(anything())));
   // FIXME: Uncomment once alignof is enabled.
   // EXPECT_TRUE(matches("void x() { int a = alignof(a); }",
   //                     unaryExprOrTypeTraitExpr()));
@@ -603,11 +586,10 @@ TEST_P(ASTMatchersTest, MemberExpr_MatchesVariable) {
     return;
   }
   EXPECT_TRUE(
-    matches("class Y { void x() { this->y; } int y; };", memberExpr()));
-  EXPECT_TRUE(
-    matches("class Y { void x() { y; } int y; };", memberExpr()));
+      matches("class Y { void x() { this->y; } int y; };", memberExpr()));
+  EXPECT_TRUE(matches("class Y { void x() { y; } int y; };", memberExpr()));
   EXPECT_TRUE(
-    matches("class Y { void x() { Y y; y.y; } int y; };", memberExpr()));
+      matches("class Y { void x() { Y y; y.y; } int y; };", memberExpr()));
   EXPECT_TRUE(matches("template <class T>"
                       "class X : T { void f() { this->T::v; } };",
                       cxxDependentScopeMemberExpr()));
@@ -623,8 +605,8 @@ TEST_P(ASTMatchersTest, MemberExpr_MatchesStaticVariable) {
   }
   EXPECT_TRUE(matches("class Y { void x() { this->y; } static int y; };",
                       memberExpr()));
-  EXPECT_TRUE(notMatches("class Y { void x() { y; } static int y; };",
-                         memberExpr()));
+  EXPECT_TRUE(
+      notMatches("class Y { void x() { y; } static int y; };", memberExpr()));
   EXPECT_TRUE(notMatches("class Y { void x() { Y::y; } static int y; };",
                          memberExpr()));
 }
@@ -658,21 +640,21 @@ TEST_P(ASTMatchersTest, FunctionDecl_CXX) {
   if (!GetParam().hasDelayedTemplateParsing()) {
     // FIXME: Fix this test to work with delayed template parsing.
     // Dependent contexts, but a non-dependent call.
-    EXPECT_TRUE(matches("void f(); template <int N> void g() { f(); }",
-                        CallFunctionF));
     EXPECT_TRUE(
-      matches("void f(); template <int N> struct S { void g() { f(); } };",
-              CallFunctionF));
+        matches("void f(); template <int N> void g() { f(); }", CallFunctionF));
+    EXPECT_TRUE(
+        matches("void f(); template <int N> struct S { void g() { f(); } };",
+                CallFunctionF));
   }
 
   // Depedent calls don't match.
   EXPECT_TRUE(
-    notMatches("void f(int); template <typename T> void g(T t) { f(t); }",
-               CallFunctionF));
+      notMatches("void f(int); template <typename T> void g(T t) { f(t); }",
+                 CallFunctionF));
   EXPECT_TRUE(
-    notMatches("void f(int);"
+      notMatches("void f(int);"
                  "template <typename T> struct S { void g(T t) { f(t); } };",
-               CallFunctionF));
+                 CallFunctionF));
 
   EXPECT_TRUE(matches("void f(...);", functionDecl(isVariadic())));
   EXPECT_TRUE(matches("void f(...);", functionDecl(parameterCountIs(0))));
@@ -692,9 +674,8 @@ TEST_P(ASTMatchersTest,
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(
-    matches("template <typename T> void f(T t) {}",
-            functionTemplateDecl(hasName("f"))));
+  EXPECT_TRUE(matches("template <typename T> void f(T t) {}",
+                      functionTemplateDecl(hasName("f"))));
 }
 
 TEST_P(ASTMatchersTest, FunctionTemplate_DoesNotMatchFunctionDeclarations) {
@@ -709,12 +690,11 @@ TEST_P(ASTMatchersTest,
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(
-    notMatches("void g(); template <typename T> void f(T t) {}"
-                 "template <> void f(int t) { g(); }",
-               functionTemplateDecl(hasName("f"),
-                                    hasDescendant(declRefExpr(to(
-                                      functionDecl(hasName("g"))))))));
+  EXPECT_TRUE(notMatches(
+      "void g(); template <typename T> void f(T t) {}"
+      "template <> void f(int t) { g(); }",
+      functionTemplateDecl(hasName("f"), hasDescendant(declRefExpr(to(
+                                             functionDecl(hasName("g"))))))));
 }
 
 TEST_P(ASTMatchersTest, ClassTemplateSpecializationDecl) {
@@ -722,7 +702,7 @@ TEST_P(ASTMatchersTest, ClassTemplateSpecializationDecl) {
     return;
   }
   EXPECT_TRUE(matches("template<typename T> struct A {};"
-                        "template<> struct A<int> {};",
+                      "template<> struct A<int> {};",
                       classTemplateSpecializationDecl()));
   EXPECT_TRUE(matches("template<typename T> struct A {}; A<int> a;",
                       classTemplateSpecializationDecl()));
@@ -756,13 +736,11 @@ TEST_P(ASTMatchersTest, Matcher_ConstructorCall) {
       traverse(ast_type_traits::TK_AsIs, cxxConstructExpr());
 
   EXPECT_TRUE(
-    matches("class X { public: X(); }; void x() { X x; }", Constructor));
-  EXPECT_TRUE(
-    matches("class X { public: X(); }; void x() { X x = X(); }",
-            Constructor));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { X x = 0; }",
-            Constructor));
+      matches("class X { public: X(); }; void x() { X x; }", Constructor));
+  EXPECT_TRUE(matches("class X { public: X(); }; void x() { X x = X(); }",
+                      Constructor));
+  EXPECT_TRUE(matches("class X { public: X(int); }; void x() { X x = 0; }",
+                      Constructor));
   EXPECT_TRUE(matches("class X {}; void x(int) { X x; }", Constructor));
 }
 
@@ -779,9 +757,9 @@ TEST_P(ASTMatchersTest, Matcher_ThisExpr) {
     return;
   }
   EXPECT_TRUE(
-    matches("struct X { int a; int f () { return a; } };", cxxThisExpr()));
+      matches("struct X { int a; int f () { return a; } };", cxxThisExpr()));
   EXPECT_TRUE(
-    notMatches("struct X { int f () { int a; return a; } };", cxxThisExpr()));
+      notMatches("struct X { int f () { int a; return a; } };", cxxThisExpr()));
 }
 
 TEST_P(ASTMatchersTest, Matcher_BindTemporaryExpression) {
@@ -794,30 +772,27 @@ TEST_P(ASTMatchersTest, Matcher_BindTemporaryExpression) {
 
   StringRef ClassString = "class string { public: string(); ~string(); }; ";
 
-  EXPECT_TRUE(
-    matches(ClassString +
-              "string GetStringByValue();"
-                "void FunctionTakesString(string s);"
-                "void run() { FunctionTakesString(GetStringByValue()); }",
-            TempExpression));
+  EXPECT_TRUE(matches(
+      ClassString + "string GetStringByValue();"
+                    "void FunctionTakesString(string s);"
+                    "void run() { FunctionTakesString(GetStringByValue()); }",
+      TempExpression));
 
-  EXPECT_TRUE(
-    notMatches(ClassString +
-                 "string* GetStringPointer(); "
-                   "void FunctionTakesStringPtr(string* s);"
-                   "void run() {"
-                   "  string* s = GetStringPointer();"
-                   "  FunctionTakesStringPtr(GetStringPointer());"
-                   "  FunctionTakesStringPtr(s);"
-                   "}",
-               TempExpression));
+  EXPECT_TRUE(notMatches(ClassString +
+                             "string* GetStringPointer(); "
+                             "void FunctionTakesStringPtr(string* s);"
+                             "void run() {"
+                             "  string* s = GetStringPointer();"
+                             "  FunctionTakesStringPtr(GetStringPointer());"
+                             "  FunctionTakesStringPtr(s);"
+                             "}",
+                         TempExpression));
 
-  EXPECT_TRUE(
-    notMatches("class no_dtor {};"
-                 "no_dtor GetObjByValue();"
-                 "void ConsumeObj(no_dtor param);"
-                 "void run() { ConsumeObj(GetObjByValue()); }",
-               TempExpression));
+  EXPECT_TRUE(notMatches("class no_dtor {};"
+                         "no_dtor GetObjByValue();"
+                         "void ConsumeObj(no_dtor param);"
+                         "void run() { ConsumeObj(GetObjByValue()); }",
+                         TempExpression));
 }
 
 TEST_P(ASTMatchersTest, MaterializeTemporaryExpr_MatchesTemporaryCXX11CXX14) {
@@ -872,10 +847,9 @@ TEST_P(ASTMatchersTest, Matcher_NewExpression) {
   StatementMatcher New = cxxNewExpr();
 
   EXPECT_TRUE(matches("class X { public: X(); }; void x() { new X; }", New));
+  EXPECT_TRUE(matches("class X { public: X(); }; void x() { new X(); }", New));
   EXPECT_TRUE(
-    matches("class X { public: X(); }; void x() { new X(); }", New));
-  EXPECT_TRUE(
-    matches("class X { public: X(int); }; void x() { new X(0); }", New));
+      matches("class X { public: X(int); }; void x() { new X(0); }", New));
   EXPECT_TRUE(matches("class X {}; void x(int) { new X; }", New));
 }
 
@@ -883,8 +857,8 @@ TEST_P(ASTMatchersTest, Matcher_DeleteExpression) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches("struct A {}; void f(A* a) { delete a; }",
-                      cxxDeleteExpr()));
+  EXPECT_TRUE(
+      matches("struct A {}; void f(A* a) { delete a; }", cxxDeleteExpr()));
 }
 
 TEST_P(ASTMatchersTest, Matcher_NoexceptExpression) {
@@ -907,7 +881,7 @@ TEST_P(ASTMatchersTest, Matcher_DefaultArgument) {
   StatementMatcher Arg = cxxDefaultArgExpr();
   EXPECT_TRUE(matches("void x(int, int = 0) { int y; x(y); }", Arg));
   EXPECT_TRUE(
-    matches("class X { void x(int, int = 0) { int y; x(y); } };", Arg));
+      matches("class X { void x(int, int = 0) { int y; x(y); } };", Arg));
   EXPECT_TRUE(notMatches("void x(int, int = 0) { int y; x(y, 0); }", Arg));
 }
 
@@ -951,7 +925,7 @@ TEST_P(ASTMatchersTest, IntegerLiteral) {
 
   // Non-matching cases (character literals, float and double)
   EXPECT_TRUE(notMatches("int i = L'a';",
-                         HasIntLiteral));  // this is actually a character
+                         HasIntLiteral)); // this is actually a character
   // literal cast to int
   EXPECT_TRUE(notMatches("int i = 'a';", HasIntLiteral));
   EXPECT_TRUE(notMatches("int i = 1e10;", HasIntLiteral));
@@ -974,13 +948,13 @@ TEST_P(ASTMatchersTest, FloatLiteral) {
   EXPECT_TRUE(matches("double i = 5.0;", floatLiteral(equals(5.0))));
   EXPECT_TRUE(matches("double i = 5.0;", floatLiteral(equals(5.0f))));
   EXPECT_TRUE(
-    matches("double i = 5.0;", floatLiteral(equals(llvm::APFloat(5.0)))));
+      matches("double i = 5.0;", floatLiteral(equals(llvm::APFloat(5.0)))));
 
   EXPECT_TRUE(notMatches("float i = 10;", HasFloatLiteral));
   EXPECT_TRUE(notMatches("double i = 5.0;", floatLiteral(equals(6.0))));
   EXPECT_TRUE(notMatches("double i = 5.0;", floatLiteral(equals(6.0f))));
   EXPECT_TRUE(
-    notMatches("double i = 5.0;", floatLiteral(equals(llvm::APFloat(6.0)))));
+      notMatches("double i = 5.0;", floatLiteral(equals(llvm::APFloat(6.0)))));
 }
 
 TEST_P(ASTMatchersTest, CXXNullPtrLiteralExpr) {
@@ -1051,9 +1025,9 @@ TEST_P(ASTMatchersTest, ParenListExpr) {
     return;
   }
   EXPECT_TRUE(
-    matches("template<typename T> class foo { void bar() { foo X(*this); } };"
+      matches("template<typename T> class foo { void bar() { foo X(*this); } };"
               "template class foo<int>;",
-            varDecl(hasInitializer(parenListExpr(has(unaryOperator()))))));
+              varDecl(hasInitializer(parenListExpr(has(unaryOperator()))))));
 }
 
 TEST_P(ASTMatchersTest, StmtExpr) {
@@ -1064,9 +1038,8 @@ TEST_P(ASTMatchersTest, StmtExpr) {
 TEST_P(ASTMatchersTest, PredefinedExpr) {
   // __func__ expands as StringLiteral("foo")
   EXPECT_TRUE(matches("void foo() { __func__; }",
-                      predefinedExpr(
-                        hasType(asString("const char [4]")),
-                        has(stringLiteral()))));
+                      predefinedExpr(hasType(asString("const char [4]")),
+                                     has(stringLiteral()))));
 }
 
 TEST_P(ASTMatchersTest, AsmStatement) {
@@ -1080,7 +1053,7 @@ TEST_P(ASTMatchersTest, HasCondition) {
   }
 
   StatementMatcher Condition =
-    ifStmt(hasCondition(cxxBoolLiteral(equals(true))));
+      ifStmt(hasCondition(cxxBoolLiteral(equals(true))));
 
   EXPECT_TRUE(matches("void x() { if (true) {} }", Condition));
   EXPECT_TRUE(notMatches("void x() { if (false) {} }", Condition));
@@ -1096,24 +1069,24 @@ TEST_P(ASTMatchersTest, ConditionalOperator) {
     return;
   }
 
-  StatementMatcher Conditional = conditionalOperator(
-    hasCondition(cxxBoolLiteral(equals(true))),
-    hasTrueExpression(cxxBoolLiteral(equals(false))));
+  StatementMatcher Conditional =
+      conditionalOperator(hasCondition(cxxBoolLiteral(equals(true))),
+                          hasTrueExpression(cxxBoolLiteral(equals(false))));
 
   EXPECT_TRUE(matches("void x() { true ? false : true; }", Conditional));
   EXPECT_TRUE(notMatches("void x() { false ? false : true; }", Conditional));
   EXPECT_TRUE(notMatches("void x() { true ? true : false; }", Conditional));
 
-  StatementMatcher ConditionalFalse = conditionalOperator(
-    hasFalseExpression(cxxBoolLiteral(equals(false))));
+  StatementMatcher ConditionalFalse =
+      conditionalOperator(hasFalseExpression(cxxBoolLiteral(equals(false))));
 
   EXPECT_TRUE(matches("void x() { true ? true : false; }", ConditionalFalse));
   EXPECT_TRUE(
-    notMatches("void x() { true ? false : true; }", ConditionalFalse));
+      notMatches("void x() { true ? false : true; }", ConditionalFalse));
 
   EXPECT_TRUE(matches("void x() { true ? true : false; }", ConditionalFalse));
   EXPECT_TRUE(
-    notMatches("void x() { true ? false : true; }", ConditionalFalse));
+      notMatches("void x() { true ? false : true; }", ConditionalFalse));
 }
 
 TEST_P(ASTMatchersTest, BinaryConditionalOperator) {
@@ -1132,18 +1105,17 @@ TEST_P(ASTMatchersTest, BinaryConditionalOperator) {
   EXPECT_TRUE(matches("void x() { 1 ?: 0; }", AlwaysOne));
 
   StatementMatcher FourNotFive = binaryConditionalOperator(
-    hasTrueExpression(opaqueValueExpr(
-      hasSourceExpression((integerLiteral(equals(4)))))),
-    hasFalseExpression(integerLiteral(equals(5))));
+      hasTrueExpression(
+          opaqueValueExpr(hasSourceExpression((integerLiteral(equals(4)))))),
+      hasFalseExpression(integerLiteral(equals(5))));
 
   EXPECT_TRUE(matches("void x() { 4 ?: 5; }", FourNotFive));
 }
 
 TEST_P(ASTMatchersTest, ArraySubscriptExpr) {
-  EXPECT_TRUE(matches("int i[2]; void f() { i[1] = 1; }",
-                      arraySubscriptExpr()));
-  EXPECT_TRUE(notMatches("int i; void f() { i = 1; }",
-                         arraySubscriptExpr()));
+  EXPECT_TRUE(
+      matches("int i[2]; void f() { i[1] = 1; }", arraySubscriptExpr()));
+  EXPECT_TRUE(notMatches("int i; void f() { i = 1; }", arraySubscriptExpr()));
 }
 
 TEST_P(ASTMatchersTest, ForStmt) {
@@ -1178,10 +1150,9 @@ TEST_P(ASTMatchersTest, CompoundStatement_DoesNotMatchEmptyStruct) {
   }
   // It's not a compound statement just because there's "{}" in the source
   // text. This is an AST search, not grep.
-  EXPECT_TRUE(notMatches("namespace n { struct S {}; }",
-                         compoundStmt()));
-  EXPECT_TRUE(matches("namespace n { struct S { void f() {{}} }; }",
-                      compoundStmt()));
+  EXPECT_TRUE(notMatches("namespace n { struct S {}; }", compoundStmt()));
+  EXPECT_TRUE(
+      matches("namespace n { struct S { void f() {{}} }; }", compoundStmt()));
 }
 
 TEST_P(ASTMatchersTest, CastExpr_MatchesExplicitCasts) {
@@ -1242,8 +1213,8 @@ TEST_P(ASTMatchersTest, CXXReinterpretCastExpr_DoesNotMatchOtherCasts) {
   EXPECT_TRUE(notMatches("void* p = static_cast<void*>(&p);",
                          cxxReinterpretCastExpr()));
   EXPECT_TRUE(notMatches("struct B { virtual ~B() {} }; struct D : B {};"
-                           "B b;"
-                           "D* p = dynamic_cast<D*>(&b);",
+                         "B b;"
+                         "D* p = dynamic_cast<D*>(&b);",
                          cxxReinterpretCastExpr()));
 }
 
@@ -1262,11 +1233,10 @@ TEST_P(ASTMatchersTest, CXXFunctionalCastExpr_DoesNotMatchOtherCasts) {
   }
   StringRef FooClass = "class Foo { public: Foo(const char*); };";
   EXPECT_TRUE(
-    notMatches(FooClass + "void r() { Foo f = (Foo) \"hello world\"; }",
-               cxxFunctionalCastExpr()));
-  EXPECT_TRUE(
-    notMatches(FooClass + "void r() { Foo f = \"hello world\"; }",
-               cxxFunctionalCastExpr()));
+      notMatches(FooClass + "void r() { Foo f = (Foo) \"hello world\"; }",
+                 cxxFunctionalCastExpr()));
+  EXPECT_TRUE(notMatches(FooClass + "void r() { Foo f = \"hello world\"; }",
+                         cxxFunctionalCastExpr()));
 }
 
 TEST_P(ASTMatchersTest, CXXDynamicCastExpr) {
@@ -1274,8 +1244,8 @@ TEST_P(ASTMatchersTest, CXXDynamicCastExpr) {
     return;
   }
   EXPECT_TRUE(matches("struct B { virtual ~B() {} }; struct D : B {};"
-                        "B b;"
-                        "D* p = dynamic_cast<D*>(&b);",
+                      "B b;"
+                      "D* p = dynamic_cast<D*>(&b);",
                       cxxDynamicCastExpr()));
 }
 
@@ -1283,8 +1253,7 @@ TEST_P(ASTMatchersTest, CXXStaticCastExpr_MatchesSimpleCase) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches("void* p(static_cast<void*>(&p));",
-                      cxxStaticCastExpr()));
+  EXPECT_TRUE(matches("void* p(static_cast<void*>(&p));", cxxStaticCastExpr()));
 }
 
 TEST_P(ASTMatchersTest, CXXStaticCastExpr_DoesNotMatchOtherCasts) {
@@ -1292,13 +1261,13 @@ TEST_P(ASTMatchersTest, CXXStaticCastExpr_DoesNotMatchOtherCasts) {
     return;
   }
   EXPECT_TRUE(notMatches("char* p = (char*)(&p);", cxxStaticCastExpr()));
-  EXPECT_TRUE(notMatches("char q, *p = const_cast<char*>(&q);",
-                         cxxStaticCastExpr()));
+  EXPECT_TRUE(
+      notMatches("char q, *p = const_cast<char*>(&q);", cxxStaticCastExpr()));
   EXPECT_TRUE(notMatches("void* p = reinterpret_cast<char*>(&p);",
                          cxxStaticCastExpr()));
   EXPECT_TRUE(notMatches("struct B { virtual ~B() {} }; struct D : B {};"
-                           "B b;"
-                           "D* p = dynamic_cast<D*>(&b);",
+                         "B b;"
+                         "D* p = dynamic_cast<D*>(&b);",
                          cxxStaticCastExpr()));
 }
 
@@ -1311,11 +1280,11 @@ TEST_P(ASTMatchersTest, CStyleCastExpr_DoesNotMatchOtherCasts) {
     return;
   }
   EXPECT_TRUE(notMatches("char* p = static_cast<char*>(0);"
-                           "char q, *r = const_cast<char*>(&q);"
-                           "void* s = reinterpret_cast<char*>(&s);"
-                           "struct B { virtual ~B() {} }; struct D : B {};"
-                           "B b;"
-                           "D* t = dynamic_cast<D*>(&b);",
+                         "char q, *r = const_cast<char*>(&q);"
+                         "void* s = reinterpret_cast<char*>(&s);"
+                         "struct B { virtual ~B() {} }; struct D : B {};"
+                         "B b;"
+                         "D* t = dynamic_cast<D*>(&b);",
                          cStyleCastExpr()));
 }
 
@@ -1335,12 +1304,12 @@ TEST_P(ASTMatchersTest, ImplicitCastExpr_MatchesSimpleCase) {
 }
 
 TEST_P(ASTMatchersTest, ImplicitCastExpr_DoesNotMatchIncorrectly) {
-  // This test verifies that implicitCastExpr() matches exactly when implicit casts
-  // are present, and that it ignores explicit and paren casts.
+  // This test verifies that implicitCastExpr() matches exactly when implicit
+  // casts are present, and that it ignores explicit and paren casts.
 
   // These two test cases have no casts.
-  EXPECT_TRUE(notMatches("int x = 0;",
-                         varDecl(hasInitializer(implicitCastExpr()))));
+  EXPECT_TRUE(
+      notMatches("int x = 0;", varDecl(hasInitializer(implicitCastExpr()))));
   EXPECT_TRUE(
       notMatches("int x = (0);", varDecl(hasInitializer(implicitCastExpr()))));
   EXPECT_TRUE(notMatches("void f() { int x = 0; double d = (double) x; }",
@@ -1393,7 +1362,7 @@ TEST_P(ASTMatchersTest, InitListExpr) {
   EXPECT_TRUE(matches("struct B { int x, y; }; struct B b = { 5, 6 };",
                       initListExpr(hasType(recordDecl(hasName("B"))))));
   EXPECT_TRUE(
-    matches("int i[1] = {42, [0] = 43};", integerLiteral(equals(42))));
+      matches("int i[1] = {42, [0] = 43};", integerLiteral(equals(42))));
 }
 
 TEST_P(ASTMatchersTest, InitListExpr_CXX) {
@@ -1441,8 +1410,7 @@ TEST_P(ASTMatchersTest, UsingDecl_MatchesUsingDeclarations) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches("namespace X { int x; } using X::x;",
-                      usingDecl()));
+  EXPECT_TRUE(matches("namespace X { int x; } using X::x;", usingDecl()));
 }
 
 TEST_P(ASTMatchersTest, UsingDecl_MatchesShadowUsingDelcarations) {
@@ -1460,7 +1428,7 @@ TEST_P(ASTMatchersTest, UsingDirectiveDecl_MatchesUsingNamespace) {
   EXPECT_TRUE(matches("namespace X { int x; } using namespace X;",
                       usingDirectiveDecl()));
   EXPECT_FALSE(
-    matches("namespace X { int x; } using X::x;", usingDirectiveDecl()));
+      matches("namespace X { int x; } using X::x;", usingDirectiveDecl()));
 }
 
 TEST_P(ASTMatchersTest, WhileStmt) {
@@ -1499,11 +1467,11 @@ TEST_P(ASTMatchersTest, CxxExceptionHandling_SimpleCases) {
   EXPECT_TRUE(matches("void foo() try { } catch(int X) { }", cxxCatchStmt()));
   EXPECT_TRUE(matches("void foo() try { } catch(int X) { }", cxxTryStmt()));
   EXPECT_TRUE(
-    notMatches("void foo() try { } catch(int X) { }", cxxThrowExpr()));
-  EXPECT_TRUE(matches("void foo() try { throw; } catch(int X) { }",
-                      cxxThrowExpr()));
-  EXPECT_TRUE(matches("void foo() try { throw 5;} catch(int X) { }",
-                      cxxThrowExpr()));
+      notMatches("void foo() try { } catch(int X) { }", cxxThrowExpr()));
+  EXPECT_TRUE(
+      matches("void foo() try { throw; } catch(int X) { }", cxxThrowExpr()));
+  EXPECT_TRUE(
+      matches("void foo() try { throw 5;} catch(int X) { }", cxxThrowExpr()));
   EXPECT_TRUE(matches("void foo() try { throw; } catch(...) { }",
                       cxxCatchStmt(isCatchAll())));
   EXPECT_TRUE(notMatches("void foo() try { throw; } catch(int) { }",
@@ -1542,9 +1510,8 @@ TEST_P(ASTMatchersTest, QualType) {
 
 TEST_P(ASTMatchersTest, ConstantArrayType) {
   EXPECT_TRUE(matches("int a[2];", constantArrayType()));
-  EXPECT_TRUE(notMatches(
-    "void f() { int a[] = { 2, 3 }; int b[a[0]]; }",
-    constantArrayType(hasElementType(builtinType()))));
+  EXPECT_TRUE(notMatches("void f() { int a[] = { 2, 3 }; int b[a[0]]; }",
+                         constantArrayType(hasElementType(builtinType()))));
 
   EXPECT_TRUE(matches("int a[42];", constantArrayType(hasSize(42))));
   EXPECT_TRUE(matches("int b[2*21];", constantArrayType(hasSize(42))));
@@ -1555,12 +1522,12 @@ TEST_P(ASTMatchersTest, DependentSizedArrayType) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches(
-    "template <typename T, int Size> class array { T data[Size]; };",
-    dependentSizedArrayType()));
-  EXPECT_TRUE(notMatches(
-    "int a[42]; int b[] = { 2, 3 }; void f() { int c[b[0]]; }",
-    dependentSizedArrayType()));
+  EXPECT_TRUE(
+      matches("template <typename T, int Size> class array { T data[Size]; };",
+              dependentSizedArrayType()));
+  EXPECT_TRUE(
+      notMatches("int a[42]; int b[] = { 2, 3 }; void f() { int c[b[0]]; }",
+                 dependentSizedArrayType()));
 }
 
 TEST_P(ASTMatchersTest, IncompleteArrayType) {
@@ -1575,22 +1542,21 @@ TEST_P(ASTMatchersTest, VariableArrayType) {
   EXPECT_TRUE(matches("void f(int b) { int a[b]; }", variableArrayType()));
   EXPECT_TRUE(notMatches("int a[] = {2, 3}; int b[42];", variableArrayType()));
 
-  EXPECT_TRUE(matches(
-    "void f(int b) { int a[b]; }",
-    variableArrayType(hasSizeExpr(ignoringImpCasts(declRefExpr(to(
-      varDecl(hasName("b")))))))));
+  EXPECT_TRUE(matches("void f(int b) { int a[b]; }",
+                      variableArrayType(hasSizeExpr(ignoringImpCasts(
+                          declRefExpr(to(varDecl(hasName("b")))))))));
 }
 
 TEST_P(ASTMatchersTest, AtomicType) {
   if (llvm::Triple(llvm::sys::getDefaultTargetTriple()).getOS() !=
-    llvm::Triple::Win32) {
+      llvm::Triple::Win32) {
     // FIXME: Make this work for MSVC.
     EXPECT_TRUE(matches("_Atomic(int) i;", atomicType()));
 
-    EXPECT_TRUE(matches("_Atomic(int) i;",
-                        atomicType(hasValueType(isInteger()))));
-    EXPECT_TRUE(notMatches("_Atomic(float) f;",
-                           atomicType(hasValueType(isInteger()))));
+    EXPECT_TRUE(
+        matches("_Atomic(int) i;", atomicType(hasValueType(isInteger()))));
+    EXPECT_TRUE(
+        notMatches("_Atomic(float) f;", atomicType(hasValueType(isInteger()))));
   }
 }
 
@@ -1608,9 +1574,9 @@ TEST_P(ASTMatchersTest, AutoType) {
 
   // FIXME: Matching against the type-as-written can't work here, because the
   //        type as written was not deduced.
-  //EXPECT_TRUE(matches("auto a = 1;",
+  // EXPECT_TRUE(matches("auto a = 1;",
   //                    autoType(hasDeducedType(isInteger()))));
-  //EXPECT_TRUE(notMatches("auto b = 2.0;",
+  // EXPECT_TRUE(notMatches("auto b = 2.0;",
   //                       autoType(hasDeducedType(isInteger()))));
 }
 
@@ -1657,48 +1623,43 @@ TEST_P(ASTMatchersTest, FunctionProtoType_CXX) {
 
 TEST_P(ASTMatchersTest, ParenType) {
   EXPECT_TRUE(
-    matches("int (*array)[4];", varDecl(hasType(pointsTo(parenType())))));
+      matches("int (*array)[4];", varDecl(hasType(pointsTo(parenType())))));
   EXPECT_TRUE(notMatches("int *array[4];", varDecl(hasType(parenType()))));
 
   EXPECT_TRUE(matches(
-    "int (*ptr_to_func)(int);",
-    varDecl(hasType(pointsTo(parenType(innerType(functionType())))))));
+      "int (*ptr_to_func)(int);",
+      varDecl(hasType(pointsTo(parenType(innerType(functionType())))))));
   EXPECT_TRUE(notMatches(
-    "int (*ptr_to_array)[4];",
-    varDecl(hasType(pointsTo(parenType(innerType(functionType())))))));
+      "int (*ptr_to_array)[4];",
+      varDecl(hasType(pointsTo(parenType(innerType(functionType())))))));
 }
 
 TEST_P(ASTMatchersTest, PointerType) {
   // FIXME: Reactive when these tests can be more specific (not matching
   // implicit code on certain platforms), likely when we have hasDescendant for
   // Types/TypeLocs.
-  //EXPECT_TRUE(matchAndVerifyResultTrue(
+  // EXPECT_TRUE(matchAndVerifyResultTrue(
   //    "int* a;",
   //    pointerTypeLoc(pointeeLoc(typeLoc().bind("loc"))),
   //    std::make_unique<VerifyIdIsBoundTo<TypeLoc>>("loc", 1)));
-  //EXPECT_TRUE(matchAndVerifyResultTrue(
+  // EXPECT_TRUE(matchAndVerifyResultTrue(
   //    "int* a;",
   //    pointerTypeLoc().bind("loc"),
   //    std::make_unique<VerifyIdIsBoundTo<TypeLoc>>("loc", 1)));
-  EXPECT_TRUE(matches(
-    "int** a;",
-    loc(pointerType(pointee(qualType())))));
-  EXPECT_TRUE(matches(
-    "int** a;",
-    loc(pointerType(pointee(pointerType())))));
-  EXPECT_TRUE(matches(
-    "int* b; int* * const a = &b;",
-    loc(qualType(isConstQualified(), pointerType()))));
+  EXPECT_TRUE(matches("int** a;", loc(pointerType(pointee(qualType())))));
+  EXPECT_TRUE(matches("int** a;", loc(pointerType(pointee(pointerType())))));
+  EXPECT_TRUE(matches("int* b; int* * const a = &b;",
+                      loc(qualType(isConstQualified(), pointerType()))));
 
   StringRef Fragment = "int *ptr;";
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("ptr"),
-                                           hasType(blockPointerType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("ptr"),
-                                           hasType(memberPointerType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("ptr"),
-                                        hasType(pointerType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("ptr"),
-                                           hasType(referenceType()))));
+  EXPECT_TRUE(notMatches(Fragment,
+                         varDecl(hasName("ptr"), hasType(blockPointerType()))));
+  EXPECT_TRUE(notMatches(
+      Fragment, varDecl(hasName("ptr"), hasType(memberPointerType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("ptr"), hasType(pointerType()))));
+  EXPECT_TRUE(
+      notMatches(Fragment, varDecl(hasName("ptr"), hasType(referenceType()))));
 }
 
 TEST_P(ASTMatchersTest, PointerType_CXX) {
@@ -1763,28 +1724,28 @@ TEST_P(ASTMatchersTest, AutoRefTypes) {
                        "auto &c = a;"
                        "auto &&d = c;"
                        "auto &&e = 2;";
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("a"),
-                                           hasType(referenceType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("b"),
-                                           hasType(referenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("c"),
-                                        hasType(referenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("c"),
-                                        hasType(lValueReferenceType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("c"),
-                                           hasType(rValueReferenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("d"),
-                                        hasType(referenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("d"),
-                                        hasType(lValueReferenceType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("d"),
-                                           hasType(rValueReferenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("e"),
-                                        hasType(referenceType()))));
-  EXPECT_TRUE(notMatches(Fragment, varDecl(hasName("e"),
-                                           hasType(lValueReferenceType()))));
-  EXPECT_TRUE(matches(Fragment, varDecl(hasName("e"),
-                                        hasType(rValueReferenceType()))));
+  EXPECT_TRUE(
+      notMatches(Fragment, varDecl(hasName("a"), hasType(referenceType()))));
+  EXPECT_TRUE(
+      notMatches(Fragment, varDecl(hasName("b"), hasType(referenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("c"), hasType(referenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("c"), hasType(lValueReferenceType()))));
+  EXPECT_TRUE(notMatches(
+      Fragment, varDecl(hasName("c"), hasType(rValueReferenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("d"), hasType(referenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("d"), hasType(lValueReferenceType()))));
+  EXPECT_TRUE(notMatches(
+      Fragment, varDecl(hasName("d"), hasType(rValueReferenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("e"), hasType(referenceType()))));
+  EXPECT_TRUE(notMatches(
+      Fragment, varDecl(hasName("e"), hasType(lValueReferenceType()))));
+  EXPECT_TRUE(
+      matches(Fragment, varDecl(hasName("e"), hasType(rValueReferenceType()))));
 }
 
 TEST_P(ASTMatchersTest, EnumType) {
@@ -1796,34 +1757,29 @@ TEST_P(ASTMatchersTest, EnumType_CXX) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches("enum Color { Green }; Color color;",
-                      loc(enumType())));
+  EXPECT_TRUE(matches("enum Color { Green }; Color color;", loc(enumType())));
 }
 
 TEST_P(ASTMatchersTest, EnumType_CXX11) {
   if (!GetParam().isCXX11OrLater()) {
     return;
   }
-  EXPECT_TRUE(matches("enum class Color { Green }; Color color;",
-                      loc(enumType())));
+  EXPECT_TRUE(
+      matches("enum class Color { Green }; Color color;", loc(enumType())));
 }
 
 TEST_P(ASTMatchersTest, PointerType_MatchesPointersToConstTypes) {
-  EXPECT_TRUE(matches("int b; int * const a = &b;",
-                      loc(pointerType())));
-  EXPECT_TRUE(matches("int b; int * const a = &b;",
-                      loc(pointerType())));
-  EXPECT_TRUE(matches(
-    "int b; const int * a = &b;",
-    loc(pointerType(pointee(builtinType())))));
-  EXPECT_TRUE(matches(
-    "int b; const int * a = &b;",
-    pointerType(pointee(builtinType()))));
+  EXPECT_TRUE(matches("int b; int * const a = &b;", loc(pointerType())));
+  EXPECT_TRUE(matches("int b; int * const a = &b;", loc(pointerType())));
+  EXPECT_TRUE(matches("int b; const int * a = &b;",
+                      loc(pointerType(pointee(builtinType())))));
+  EXPECT_TRUE(matches("int b; const int * a = &b;",
+                      pointerType(pointee(builtinType()))));
 }
 
 TEST_P(ASTMatchersTest, TypedefType) {
-  EXPECT_TRUE(matches("typedef int X; X a;", varDecl(hasName("a"),
-                                                     hasType(typedefType()))));
+  EXPECT_TRUE(matches("typedef int X; X a;",
+                      varDecl(hasName("a"), hasType(typedefType()))));
 }
 
 TEST_P(ASTMatchersTest, TemplateSpecializationType) {
@@ -1864,13 +1820,13 @@ TEST_P(ASTMatchersTest, ElaboratedType) {
     // FIXME: Add a test for `elaboratedType()` that does not depend on C++.
     return;
   }
-  EXPECT_TRUE(matches(
-    "namespace N {"
-      "  namespace M {"
-      "    class D {};"
-      "  }"
-      "}"
-      "N::M::D d;", elaboratedType()));
+  EXPECT_TRUE(matches("namespace N {"
+                      "  namespace M {"
+                      "    class D {};"
+                      "  }"
+                      "}"
+                      "N::M::D d;",
+                      elaboratedType()));
   EXPECT_TRUE(matches("class C {} c;", elaboratedType()));
   EXPECT_TRUE(notMatches("class C {}; C c;", elaboratedType()));
 }
@@ -1885,30 +1841,29 @@ TEST_P(ASTMatchersTest, SubstTemplateTypeParmType) {
                    "}"
                    "int i = F<int>();";
   EXPECT_FALSE(matches(code, binaryOperator(hasLHS(
-    expr(hasType(substTemplateTypeParmType()))))));
+                                 expr(hasType(substTemplateTypeParmType()))))));
   EXPECT_TRUE(matches(code, binaryOperator(hasRHS(
-    expr(hasType(substTemplateTypeParmType()))))));
+                                expr(hasType(substTemplateTypeParmType()))))));
 }
 
 TEST_P(ASTMatchersTest, NestedNameSpecifier) {
   if (!GetParam().isCXX()) {
     return;
   }
-  EXPECT_TRUE(matches("namespace ns { struct A {}; } ns::A a;",
-                      nestedNameSpecifier()));
+  EXPECT_TRUE(
+      matches("namespace ns { struct A {}; } ns::A a;", nestedNameSpecifier()));
   EXPECT_TRUE(matches("template <typename T> class A { typename T::B b; };",
                       nestedNameSpecifier()));
-  EXPECT_TRUE(matches("struct A { void f(); }; void A::f() {}",
-                      nestedNameSpecifier()));
+  EXPECT_TRUE(
+      matches("struct A { void f(); }; void A::f() {}", nestedNameSpecifier()));
   EXPECT_TRUE(matches("namespace a { namespace b {} } namespace ab = a::b;",
                       nestedNameSpecifier()));
 
-  EXPECT_TRUE(matches(
-    "struct A { static void f() {} }; void g() { A::f(); }",
-    nestedNameSpecifier()));
-  EXPECT_TRUE(notMatches(
-    "struct A { static void f() {} }; void g(A* a) { a->f(); }",
-    nestedNameSpecifier()));
+  EXPECT_TRUE(matches("struct A { static void f() {} }; void g() { A::f(); }",
+                      nestedNameSpecifier()));
+  EXPECT_TRUE(
+      notMatches("struct A { static void f() {} }; void g(A* a) { a->f(); }",
+                 nestedNameSpecifier()));
 }
 
 TEST_P(ASTMatchersTest, NullStmt) {
@@ -1929,10 +1884,10 @@ TEST_P(ASTMatchersTest, NestedNameSpecifier_MatchesTypes) {
     return;
   }
   NestedNameSpecifierMatcher Matcher = nestedNameSpecifier(
-    specifiesType(hasDeclaration(recordDecl(hasName("A")))));
+      specifiesType(hasDeclaration(recordDecl(hasName("A")))));
   EXPECT_TRUE(matches("struct A { struct B {}; }; A::B b;", Matcher));
-  EXPECT_TRUE(matches("struct A { struct B { struct C {}; }; }; A::B::C c;",
-                      Matcher));
+  EXPECT_TRUE(
+      matches("struct A { struct B { struct C {}; }; }; A::B::C c;", Matcher));
   EXPECT_TRUE(notMatches("namespace A { struct B {}; } A::B b;", Matcher));
 }
 
@@ -1940,8 +1895,8 @@ TEST_P(ASTMatchersTest, NestedNameSpecifier_MatchesNamespaceDecls) {
   if (!GetParam().isCXX()) {
     return;
   }
-  NestedNameSpecifierMatcher Matcher = nestedNameSpecifier(
-    specifiesNamespace(hasName("ns")));
+  NestedNameSpecifierMatcher Matcher =
+      nestedNameSpecifier(specifiesNamespace(hasName("ns")));
   EXPECT_TRUE(matches("namespace ns { struct A {}; } ns::A a;", Matcher));
   EXPECT_TRUE(notMatches("namespace xx { struct A {}; } xx::A a;", Matcher));
   EXPECT_TRUE(notMatches("struct ns { struct A {}; }; ns::A a;", Matcher));
@@ -1953,16 +1908,15 @@ TEST_P(ASTMatchersTest,
     return;
   }
   EXPECT_TRUE(matches(
-    "struct A { struct B { struct C {}; }; }; A::B::C c;",
-    nestedNameSpecifier(hasPrefix(specifiesType(asString("struct A"))))));
-  EXPECT_TRUE(matches(
-    "struct A { struct B { struct C {}; }; }; A::B::C c;",
-    nestedNameSpecifierLoc(hasPrefix(
-      specifiesTypeLoc(loc(qualType(asString("struct A"))))))));
+      "struct A { struct B { struct C {}; }; }; A::B::C c;",
+      nestedNameSpecifier(hasPrefix(specifiesType(asString("struct A"))))));
+  EXPECT_TRUE(matches("struct A { struct B { struct C {}; }; }; A::B::C c;",
+                      nestedNameSpecifierLoc(hasPrefix(specifiesTypeLoc(
+                          loc(qualType(asString("struct A"))))))));
   EXPECT_TRUE(matches(
-    "namespace N { struct A { struct B { struct C {}; }; }; } N::A::B::C c;",
-    nestedNameSpecifierLoc(hasPrefix(
-      specifiesTypeLoc(loc(qualType(asString("struct N::A"))))))));
+      "namespace N { struct A { struct B { struct C {}; }; }; } N::A::B::C c;",
+      nestedNameSpecifierLoc(hasPrefix(
+          specifiesTypeLoc(loc(qualType(asString("struct N::A"))))))));
 }
 
 template <typename T>
@@ -1980,18 +1934,18 @@ class VerifyAncestorHasChildIsEqual : public BoundNodesCallback {
     // to equalsNode.
     const T *TypedNode = cast<T>(Node);
     return selectFirst<T>(
-      "", match(stmt(hasParent(
-        stmt(has(stmt(equalsNode(TypedNode)))).bind(""))),
-                *Node, Context)) != nullptr;
+               "", match(stmt(hasParent(
+                             stmt(has(stmt(equalsNode(TypedNode)))).bind(""))),
+                         *Node, Context)) != nullptr;
   }
   bool verify(const BoundNodes &Nodes, ASTContext &Context, const Decl *Node) {
     // Use the original typed pointer to verify we can pass pointers to subtypes
     // to equalsNode.
     const T *TypedNode = cast<T>(Node);
     return selectFirst<T>(
-      "", match(decl(hasParent(
-        decl(has(decl(equalsNode(TypedNode)))).bind(""))),
-                *Node, Context)) != nullptr;
+               "", match(decl(hasParent(
+                             decl(has(decl(equalsNode(TypedNode)))).bind(""))),
+                         *Node, Context)) != nullptr;
   }
   bool verify(const BoundNodes &Nodes, ASTContext &Context, const Type *Node) {
     // Use the original typed pointer to verify we can pass pointers to subtypes
@@ -1999,9 +1953,9 @@ class VerifyAncestorHasChildIsEqual : public BoundNodesCallback {
     const T *TypedNode = cast<T>(Node);
     const auto *Dec = Nodes.getNodeAs<FieldDecl>("decl");
     return selectFirst<T>(
-      "", match(fieldDecl(hasParent(decl(has(fieldDecl(
-        hasType(type(equalsNode(TypedNode)).bind(""))))))),
-                *Dec, Context)) != nullptr;
+               "", match(fieldDecl(hasParent(decl(has(fieldDecl(
+                             hasType(type(equalsNode(TypedNode)).bind(""))))))),
+                         *Dec, Context)) != nullptr;
   }
 };
 
@@ -2100,43 +2054,31 @@ TEST(ASTMatchersTestObjC, ObjCMessageExpr) {
                           "  Str *up = [text uppercaseString];"
                           "} "
                           "@end ";
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(anything())));
+  EXPECT_TRUE(matchesObjC(Objc1String, objcMessageExpr(anything())));
   EXPECT_TRUE(matchesObjC(Objc1String,
-                          objcMessageExpr(hasAnySelector({
-                                          "contents", "meth:"}))
+                          objcMessageExpr(hasAnySelector({"contents", "meth:"}))
 
-                         ));
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(hasSelector("contents"))));
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(hasAnySelector("contents", "contentsA"))));
-  EXPECT_FALSE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(hasAnySelector("contentsB", "contentsC"))));
+                              ));
+  EXPECT_TRUE(
+      matchesObjC(Objc1String, objcMessageExpr(hasSelector("contents"))));
   EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(matchesSelector("cont*"))));
+      Objc1String, objcMessageExpr(hasAnySelector("contents", "contentsA"))));
   EXPECT_FALSE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(matchesSelector("?cont*"))));
-  EXPECT_TRUE(notMatchesObjC(
-    Objc1String,
-    objcMessageExpr(hasSelector("contents"), hasNullSelector())));
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(hasSelector("contents"), hasUnarySelector())));
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(hasSelector("contents"), numSelectorArgs(0))));
-  EXPECT_TRUE(matchesObjC(
-    Objc1String,
-    objcMessageExpr(matchesSelector("uppercase*"),
-                    argumentCountIs(0)
-    )));
+      Objc1String, objcMessageExpr(hasAnySelector("contentsB", "contentsC"))));
+  EXPECT_TRUE(
+      matchesObjC(Objc1String, objcMessageExpr(matchesSelector("cont*"))));
+  EXPECT_FALSE(
+      matchesObjC(Objc1String, objcMessageExpr(matchesSelector("?cont*"))));
+  EXPECT_TRUE(
+      notMatchesObjC(Objc1String, objcMessageExpr(hasSelector("contents"),
+                                                  hasNullSelector())));
+  EXPECT_TRUE(matchesObjC(Objc1String, objcMessageExpr(hasSelector("contents"),
+                                                       hasUnarySelector())));
+  EXPECT_TRUE(matchesObjC(Objc1String, objcMessageExpr(hasSelector("contents"),
+                                                       numSelectorArgs(0))));
+  EXPECT_TRUE(
+      matchesObjC(Objc1String, objcMessageExpr(matchesSelector("uppercase*"),
+                                               argumentCountIs(0))));
 }
 
 TEST(ASTMatchersTestObjC, ObjCDecls) {
@@ -2157,33 +2099,17 @@ TEST(ASTMatchersTestObjC, ObjCDecls) {
                          "- (void)abc_doThing {} "
                          "@end ";
 
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcProtocolDecl(hasName("Proto"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcImplementationDecl(hasName("Thing"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcCategoryDecl(hasName("ABC"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcCategoryImplDecl(hasName("ABC"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcMethodDecl(hasName("protoDidThing"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcMethodDecl(hasName("abc_doThing"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcMethodDecl(hasName("anything"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcIvarDecl(hasName("_ivar"))));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcPropertyDecl(hasName("enabled"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcProtocolDecl(hasName("Proto"))));
+  EXPECT_TRUE(
+      matchesObjC(ObjCString, objcImplementationDecl(hasName("Thing"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcCategoryDecl(hasName("ABC"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcCategoryImplDecl(hasName("ABC"))));
+  EXPECT_TRUE(
+      matchesObjC(ObjCString, objcMethodDecl(hasName("protoDidThing"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcMethodDecl(hasName("abc_doThing"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcMethodDecl(hasName("anything"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcIvarDecl(hasName("_ivar"))));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcPropertyDecl(hasName("enabled"))));
 }
 
 TEST(ASTMatchersTestObjC, ObjCExceptionStmts) {
@@ -2194,18 +2120,10 @@ TEST(ASTMatchersTestObjC, ObjCExceptionStmts) {
                          "  } @finally {}"
                          "}";
 
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcTryStmt()));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcThrowStmt()));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcCatchStmt()));
-  EXPECT_TRUE(matchesObjC(
-    ObjCString,
-    objcFinallyStmt()));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcTryStmt()));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcThrowStmt()));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcCatchStmt()));
+  EXPECT_TRUE(matchesObjC(ObjCString, objcFinallyStmt()));
 }
 
 TEST(ASTMatchersTestObjC, ObjCAutoreleasePoolStmt) {
@@ -2274,11 +2192,18 @@ void x() {
   EXPECT_TRUE(matchesWithOpenMP(Source3, Matcher));
 
   StringRef Source4 = R"(
+void x() {
+#pragma omp parallel default(firstprivate)
+;
+})";
+  EXPECT_TRUE(matchesWithOpenMP51(Source4, Matcher));
+
+  StringRef Source5 = R"(
 void x(int x) {
 #pragma omp parallel num_threads(x)
 ;
 })";
-  EXPECT_TRUE(notMatchesWithOpenMP(Source4, Matcher));
+  EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
 }
 
 TEST(ASTMatchersTest, Finder_DynamicOnlyAcceptsSomeMatchers) {
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTest.h b/clang/unittests/ASTMatchers/ASTMatchersTest.h
index 8669ebd552c83..bde6297f82ddc 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTest.h
+++ b/clang/unittests/ASTMatchers/ASTMatchersTest.h
@@ -20,10 +20,10 @@ namespace clang {
 namespace ast_matchers {
 
 using clang::tooling::buildASTFromCodeWithArgs;
+using clang::tooling::FileContentMappings;
+using clang::tooling::FrontendActionFactory;
 using clang::tooling::newFrontendActionFactory;
 using clang::tooling::runToolOnCodeWithArgs;
-using clang::tooling::FrontendActionFactory;
-using clang::tooling::FileContentMappings;
 
 class BoundNodesCallback {
 public:
@@ -38,7 +38,8 @@ class BoundNodesCallback {
 // If 'FindResultVerifier' is NULL, sets *Verified to true when Run is called.
 class VerifyMatch : public MatchFinder::MatchCallback {
 public:
-  VerifyMatch(std::unique_ptr<BoundNodesCallback> FindResultVerifier, bool *Verified)
+  VerifyMatch(std::unique_ptr<BoundNodesCallback> FindResultVerifier,
+              bool *Verified)
       : Verified(Verified), FindResultReviewer(std::move(FindResultVerifier)) {}
 
   void run(const MatchFinder::MatchResult &Result) override {
@@ -124,17 +125,16 @@ testing::AssertionResult matchesConditionally(
     return testing::AssertionFailure() << "Parsing error in \"" << Code << "\"";
   }
   if (Found != DynamicFound) {
-    return testing::AssertionFailure() << "Dynamic match result ("
-                                       << DynamicFound
-                                       << ") does not match static result ("
-                                       << Found << ")";
+    return testing::AssertionFailure()
+           << "Dynamic match result (" << DynamicFound
+           << ") does not match static result (" << Found << ")";
   }
   if (!Found && ExpectMatch) {
     return testing::AssertionFailure()
-      << "Could not find match in \"" << Code << "\"";
+           << "Could not find match in \"" << Code << "\"";
   } else if (Found && !ExpectMatch) {
     return testing::AssertionFailure()
-      << "Found unexpected match in \"" << Code << "\"";
+           << "Found unexpected match in \"" << Code << "\"";
   }
   return testing::AssertionSuccess();
 }
@@ -216,7 +216,8 @@ matchesConditionallyWithCuda(const Twine &Code, const T &AMatcher,
       "                      size_t sharedSize = 0,"
       "                      cudaStream_t stream = 0);"
       "extern \"C\" unsigned __cudaPushCallConfiguration("
-      "    dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = 0);";
+      "    dim3 gridDim, dim3 blockDim, size_t sharedMem = 0, void *stream = "
+      "0);";
 
   bool Found = false, DynamicFound = false;
   MatchFinder Finder;
@@ -233,22 +234,20 @@ matchesConditionallyWithCuda(const Twine &Code, const T &AMatcher,
   std::vector<std::string> Args = {
       "-xcuda",  "-fno-ms-extensions",     "--cuda-host-only",     "-nocudainc",
       "-target", "x86_64-unknown-unknown", std::string(CompileArg)};
-  if (!runToolOnCodeWithArgs(Factory->create(),
-                             CudaHeader + Code, Args)) {
+  if (!runToolOnCodeWithArgs(Factory->create(), CudaHeader + Code, Args)) {
     return testing::AssertionFailure() << "Parsing error in \"" << Code << "\"";
   }
   if (Found != DynamicFound) {
-    return testing::AssertionFailure() << "Dynamic match result ("
-                                       << DynamicFound
-                                       << ") does not match static result ("
-                                       << Found << ")";
+    return testing::AssertionFailure()
+           << "Dynamic match result (" << DynamicFound
+           << ") does not match static result (" << Found << ")";
   }
   if (!Found && ExpectMatch) {
     return testing::AssertionFailure()
-      << "Could not find match in \"" << Code << "\"";
+           << "Could not find match in \"" << Code << "\"";
   } else if (Found && !ExpectMatch) {
     return testing::AssertionFailure()
-      << "Found unexpected match in \"" << Code << "\"";
+           << "Found unexpected match in \"" << Code << "\"";
   }
   return testing::AssertionSuccess();
 }
@@ -276,13 +275,28 @@ testing::AssertionResult notMatchesWithOpenMP(const Twine &Code,
   return matchesConditionally(Code, AMatcher, false, {"-fopenmp=libomp"});
 }
 
+template <typename T>
+testing::AssertionResult matchesWithOpenMP51(const Twine &Code,
+                                             const T &AMatcher) {
+  return matchesConditionally(Code, AMatcher, true,
+                              {"-fopenmp=libomp", "-fopenmp-version=51"});
+}
+
+template <typename T>
+testing::AssertionResult notMatchesWithOpenMP51(const Twine &Code,
+                                                const T &AMatcher) {
+  return matchesConditionally(Code, AMatcher, false,
+                              {"-fopenmp=libomp", "-fopenmp-version=51"});
+}
+
 template <typename T>
 testing::AssertionResult matchAndVerifyResultConditionally(
     const Twine &Code, const T &AMatcher,
     std::unique_ptr<BoundNodesCallback> FindResultVerifier, bool ExpectResult) {
   bool VerifiedResult = false;
   MatchFinder Finder;
-  VerifyMatch VerifyVerifiedResult(std::move(FindResultVerifier), &VerifiedResult);
+  VerifyMatch VerifyVerifiedResult(std::move(FindResultVerifier),
+                                   &VerifiedResult);
   Finder.addMatcher(AMatcher, &VerifyVerifiedResult);
   std::unique_ptr<FrontendActionFactory> Factory(
       newFrontendActionFactory(&Finder));
@@ -296,10 +310,10 @@ testing::AssertionResult matchAndVerifyResultConditionally(
   }
   if (!VerifiedResult && ExpectResult) {
     return testing::AssertionFailure()
-      << "Could not verify result in \"" << Code << "\"";
+           << "Could not verify result in \"" << Code << "\"";
   } else if (VerifiedResult && !ExpectResult) {
     return testing::AssertionFailure()
-      << "Verified unexpected result in \"" << Code << "\"";
+           << "Verified unexpected result in \"" << Code << "\"";
   }
 
   VerifiedResult = false;
@@ -307,15 +321,15 @@ testing::AssertionResult matchAndVerifyResultConditionally(
   std::unique_ptr<ASTUnit> AST(
       buildASTFromCodeWithArgs(Code.toStringRef(Buffer), Args));
   if (!AST.get())
-    return testing::AssertionFailure() << "Parsing error in \"" << Code
-                                       << "\" while building AST";
+    return testing::AssertionFailure()
+           << "Parsing error in \"" << Code << "\" while building AST";
   Finder.matchAST(AST->getASTContext());
   if (!VerifiedResult && ExpectResult) {
     return testing::AssertionFailure()
-      << "Could not verify result in \"" << Code << "\" with AST";
+           << "Could not verify result in \"" << Code << "\" with AST";
   } else if (VerifiedResult && !ExpectResult) {
     return testing::AssertionFailure()
-      << "Verified unexpected result in \"" << Code << "\" with AST";
+           << "Verified unexpected result in \"" << Code << "\" with AST";
   }
 
   return testing::AssertionSuccess();
@@ -327,8 +341,8 @@ template <typename T>
 testing::AssertionResult matchAndVerifyResultTrue(
     const Twine &Code, const T &AMatcher,
     std::unique_ptr<BoundNodesCallback> FindResultVerifier) {
-  return matchAndVerifyResultConditionally(
-      Code, AMatcher, std::move(FindResultVerifier), true);
+  return matchAndVerifyResultConditionally(Code, AMatcher,
+                                           std::move(FindResultVerifier), true);
 }
 
 template <typename T>
@@ -342,8 +356,7 @@ testing::AssertionResult matchAndVerifyResultFalse(
 // Implements a run method that returns whether BoundNodes contains a
 // Decl bound to Id that can be dynamically cast to T.
 // Optionally checks that the check succeeded a specific number of times.
-template <typename T>
-class VerifyIdIsBoundTo : public BoundNodesCallback {
+template <typename T> class VerifyIdIsBoundTo : public BoundNodesCallback {
 public:
   // Create an object that checks that a node of type \c T was bound to \c Id.
   // Does not check for a certain number of matches.
@@ -386,7 +399,7 @@ class VerifyIdIsBoundTo : public BoundNodesCallback {
       if (const NamedDecl *Named = Nodes->getNodeAs<NamedDecl>(Id)) {
         Name = Named->getNameAsString();
       } else if (const NestedNameSpecifier *NNS =
-        Nodes->getNodeAs<NestedNameSpecifier>(Id)) {
+                     Nodes->getNodeAs<NestedNameSpecifier>(Id)) {
         llvm::raw_string_ostream OS(Name);
         NNS->print(OS, PrintingPolicy(LangOptions()));
       }
@@ -398,7 +411,7 @@ class VerifyIdIsBoundTo : public BoundNodesCallback {
       return true;
     }
     EXPECT_TRUE(M.count(Id) == 0 ||
-      M.find(Id)->second.template get<T>() == nullptr);
+                M.find(Id)->second.template get<T>() == nullptr);
     return false;
   }
 
@@ -437,4 +450,4 @@ class ASTMatchersTest : public ::testing::Test,
 } // namespace ast_matchers
 } // namespace clang
 
-#endif  // LLVM_CLANG_UNITTESTS_AST_MATCHERS_AST_MATCHERS_TEST_H
+#endif // LLVM_CLANG_UNITTESTS_AST_MATCHERS_AST_MATCHERS_TEST_H
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index bf799a781ae17..93ea63c1c2e60 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -982,6 +982,7 @@ __OMP_CANCEL_KIND(taskgroup, 4)
 
 __OMP_DEFAULT_KIND(none)
 __OMP_DEFAULT_KIND(shared)
+__OMP_DEFAULT_KIND(firstprivate)
 __OMP_DEFAULT_KIND(unknown)
 
 #undef __OMP_DEFAULT_KIND

From 4d5fd0ee5ebda8979a448f5de397e3f1321b1ca8 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Sun, 12 Jul 2020 21:04:31 -0700
Subject: [PATCH 065/771] [MC][RISCV] Set UseIntegratedAssembler to true

to align with most other targets. Also, -fintegrated-as is the default
for clang -target riscv*.
---
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp | 1 -
 llvm/test/CodeGen/RISCV/branch-relaxation.ll          | 4 ++--
 llvm/test/CodeGen/RISCV/inline-asm-abi-names.ll       | 4 ++--
 llvm/test/CodeGen/RISCV/inline-asm.ll                 | 4 ++--
 llvm/test/CodeGen/RISCV/large-stack.ll                | 4 ++++
 5 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
index 8db1738566ac8..089a2def4c210 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCAsmInfo.cpp
@@ -27,7 +27,6 @@ RISCVMCAsmInfo::RISCVMCAsmInfo(const Triple &TT) {
   ExceptionsType = ExceptionHandling::DwarfCFI;
   Data16bitsDirective = "\t.half\t";
   Data32bitsDirective = "\t.word\t";
-  UseIntegratedAssembler = false;
 }
 
 const MCExpr *RISCVMCAsmInfo::getExprForFDESymbol(const MCSymbol *Sym,
diff --git a/llvm/test/CodeGen/RISCV/branch-relaxation.ll b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
index 56f0f27a06488..3d617bf0b26b4 100644
--- a/llvm/test/CodeGen/RISCV/branch-relaxation.ll
+++ b/llvm/test/CodeGen/RISCV/branch-relaxation.ll
@@ -11,7 +11,7 @@ define void @relax_bcc(i1 %a) nounwind {
 ; CHECK-NEXT:    j .LBB0_2
 ; CHECK-NEXT:  .LBB0_1: # %iftrue
 ; CHECK-NEXT:    #APP
-; CHECK-NEXT:    .space 4096
+; CHECK-NEXT:    .zero 4096
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:  .LBB0_2: # %tail
 ; CHECK-NEXT:    ret
@@ -38,7 +38,7 @@ define i32 @relax_jal(i1 %a) nounwind {
 ; CHECK-NEXT:    #APP
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    #APP
-; CHECK-NEXT:    .space 1048576
+; CHECK-NEXT:    .zero 1048576
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    addi a0, zero, 1
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/RISCV/inline-asm-abi-names.ll b/llvm/test/CodeGen/RISCV/inline-asm-abi-names.ll
index 4d85e3ea006b8..f9ed4aed6ca32 100644
--- a/llvm/test/CodeGen/RISCV/inline-asm-abi-names.ll
+++ b/llvm/test/CodeGen/RISCV/inline-asm-abi-names.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -no-integrated-as < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
-; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs -no-integrated-as < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I %s
 
 ; These test that we can use both the architectural names (x*) and the ABI names
diff --git a/llvm/test/CodeGen/RISCV/inline-asm.ll b/llvm/test/CodeGen/RISCV/inline-asm.ll
index 43f951e352a68..de5d9a5f22a83 100644
--- a/llvm/test/CodeGen/RISCV/inline-asm.ll
+++ b/llvm/test/CodeGen/RISCV/inline-asm.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs -no-integrated-as < %s \
 ; RUN:   | FileCheck -check-prefix=RV32I %s
-; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs -no-integrated-as < %s \
 ; RUN:   | FileCheck -check-prefix=RV64I %s
 
 @gi = external global i32
diff --git a/llvm/test/CodeGen/RISCV/large-stack.ll b/llvm/test/CodeGen/RISCV/large-stack.ll
index 7acf0f4076e85..7cc6e83d7d85c 100644
--- a/llvm/test/CodeGen/RISCV/large-stack.ll
+++ b/llvm/test/CodeGen/RISCV/large-stack.ll
@@ -64,10 +64,12 @@ define void @test_emergency_spill_slot(i32 %a) {
 ; RV32I-FPELIM-NEXT:    add a1, a2, a1
 ; RV32I-FPELIM-NEXT:    #APP
 ; RV32I-FPELIM-NEXT:    nop
+; RV32I-FPELIM-EMPTY:
 ; RV32I-FPELIM-NEXT:    #NO_APP
 ; RV32I-FPELIM-NEXT:    sw a0, 0(a1)
 ; RV32I-FPELIM-NEXT:    #APP
 ; RV32I-FPELIM-NEXT:    nop
+; RV32I-FPELIM-EMPTY:
 ; RV32I-FPELIM-NEXT:    #NO_APP
 ; RV32I-FPELIM-NEXT:    lui a0, 97
 ; RV32I-FPELIM-NEXT:    addi a0, a0, 672
@@ -103,10 +105,12 @@ define void @test_emergency_spill_slot(i32 %a) {
 ; RV32I-WITHFP-NEXT:    add a1, a2, a1
 ; RV32I-WITHFP-NEXT:    #APP
 ; RV32I-WITHFP-NEXT:    nop
+; RV32I-WITHFP-EMPTY:
 ; RV32I-WITHFP-NEXT:    #NO_APP
 ; RV32I-WITHFP-NEXT:    sw a0, 0(a1)
 ; RV32I-WITHFP-NEXT:    #APP
 ; RV32I-WITHFP-NEXT:    nop
+; RV32I-WITHFP-EMPTY:
 ; RV32I-WITHFP-NEXT:    #NO_APP
 ; RV32I-WITHFP-NEXT:    lui a0, 97
 ; RV32I-WITHFP-NEXT:    addi a0, a0, 688

From b6912c879ed848fd59c108e8b90fe0180893ee56 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Mon, 13 Jul 2020 12:15:44 +0800
Subject: [PATCH 066/771] [PowerPC] Support constrained conversion in SPE
 target

This patch adds support for constrained int/fp conversion between
signed/unsigned i32 and f32/f64.

Reviewed By: jhibbits

Differential Revision: https://reviews.llvm.org/D82747
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp |   8 +-
 llvm/lib/Target/PowerPC/PPCInstrSPE.td      |  16 +-
 llvm/test/CodeGen/PowerPC/fp-strict-conv.ll | 274 ++++++++++++++++++++
 3 files changed, 288 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/fp-strict-conv.ll

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 49140bab51343..575ad68fecd99 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -423,6 +423,9 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
 
   if (Subtarget.hasSPE()) {
     // SPE has built-in conversions
+    setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Legal);
+    setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Legal);
+    setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Legal);
     setOperationAction(ISD::FP_TO_SINT, MVT::i32, Legal);
     setOperationAction(ISD::SINT_TO_FP, MVT::i32, Legal);
     setOperationAction(ISD::UINT_TO_FP, MVT::i32, Legal);
@@ -572,9 +575,10 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
       setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
   } else {
     // PowerPC does not have FP_TO_UINT on 32-bit implementations.
-    if (Subtarget.hasSPE())
+    if (Subtarget.hasSPE()) {
+      setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Legal);
       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Legal);
-    else
+    } else
       setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
   }
 
diff --git a/llvm/lib/Target/PowerPC/PPCInstrSPE.td b/llvm/lib/Target/PowerPC/PPCInstrSPE.td
index 935c3044ae470..858eb0c9fe500 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrSPE.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrSPE.td
@@ -158,7 +158,7 @@ def EFDCFSF        : EFXForm_2a<755, (outs sperc:$RT), (ins spe4rc:$RB),
 
 def EFDCFSI        : EFXForm_2a<753, (outs sperc:$RT), (ins gprc:$RB),
                                 "efdcfsi $RT, $RB", IIC_FPDGeneral,
-                                [(set f64:$RT, (sint_to_fp i32:$RB))]>;
+                                [(set f64:$RT, (any_sint_to_fp i32:$RB))]>;
 
 def EFDCFSID       : EFXForm_2a<739, (outs sperc:$RT), (ins gprc:$RB),
                                 "efdcfsid $RT, $RB", IIC_FPDGeneral,
@@ -169,7 +169,7 @@ def EFDCFUF        : EFXForm_2a<754, (outs sperc:$RT), (ins spe4rc:$RB),
 
 def EFDCFUI        : EFXForm_2a<752, (outs sperc:$RT), (ins gprc:$RB),
                                 "efdcfui $RT, $RB", IIC_FPDGeneral,
-                                [(set f64:$RT, (uint_to_fp i32:$RB))]>;
+                                [(set f64:$RT, (any_uint_to_fp i32:$RB))]>;
 
 def EFDCFUID       : EFXForm_2a<738, (outs sperc:$RT), (ins gprc:$RB),
                                 "efdcfuid $RT, $RB", IIC_FPDGeneral,
@@ -197,7 +197,7 @@ def EFDCTSIDZ      : EFXForm_2a<747, (outs gprc:$RT), (ins sperc:$RB),
 
 def EFDCTSIZ       : EFXForm_2a<762, (outs gprc:$RT), (ins sperc:$RB),
                                 "efdctsiz $RT, $RB", IIC_FPDGeneral,
-                                [(set i32:$RT, (fp_to_sint f64:$RB))]>;
+                                [(set i32:$RT, (any_fp_to_sint f64:$RB))]>;
 
 def EFDCTUF        : EFXForm_2a<758, (outs sperc:$RT), (ins spe4rc:$RB),
                                 "efdctuf $RT, $RB", IIC_FPDGeneral, []>;
@@ -212,7 +212,7 @@ def EFDCTUIDZ      : EFXForm_2a<746, (outs gprc:$RT), (ins sperc:$RB),
 
 def EFDCTUIZ       : EFXForm_2a<760, (outs gprc:$RT), (ins sperc:$RB),
                                 "efdctuiz $RT, $RB", IIC_FPDGeneral,
-                                [(set i32:$RT, (fp_to_uint f64:$RB))]>;
+                                [(set i32:$RT, (any_fp_to_uint f64:$RB))]>;
 
 def EFDDIV         : EFXForm_1<745, (outs sperc:$RT), (ins sperc:$RA, sperc:$RB),
                                "efddiv $RT, $RA, $RB", IIC_FPDivD,
@@ -261,14 +261,14 @@ def EFSCFSF        : EFXForm_2a<723, (outs spe4rc:$RT), (ins spe4rc:$RB),
 
 def EFSCFSI        : EFXForm_2a<721, (outs spe4rc:$RT), (ins gprc:$RB),
                                 "efscfsi $RT, $RB", IIC_FPSGeneral,
-                                [(set f32:$RT, (sint_to_fp i32:$RB))]>;
+                                [(set f32:$RT, (any_sint_to_fp i32:$RB))]>;
 
 def EFSCFUF        : EFXForm_2a<722, (outs spe4rc:$RT), (ins spe4rc:$RB),
                                 "efscfuf $RT, $RB", IIC_FPSGeneral, []>;
 
 def EFSCFUI        : EFXForm_2a<720, (outs spe4rc:$RT), (ins gprc:$RB),
                                 "efscfui $RT, $RB", IIC_FPSGeneral,
-                                [(set f32:$RT, (uint_to_fp i32:$RB))]>;
+                                [(set f32:$RT, (any_uint_to_fp i32:$RB))]>;
 
 let isCompare = 1 in {
 def EFSCMPEQ       : EFXForm_3<718, (outs crrc:$crD), (ins spe4rc:$RA, spe4rc:$RB),
@@ -288,7 +288,7 @@ def EFSCTSI        : EFXForm_2a<725, (outs gprc:$RT), (ins spe4rc:$RB),
 
 def EFSCTSIZ       : EFXForm_2a<730, (outs gprc:$RT), (ins spe4rc:$RB),
                                 "efsctsiz $RT, $RB", IIC_FPSGeneral,
-                                [(set i32:$RT, (fp_to_sint f32:$RB))]>;
+                                [(set i32:$RT, (any_fp_to_sint f32:$RB))]>;
 
 def EFSCTUF        : EFXForm_2a<726, (outs sperc:$RT), (ins spe4rc:$RB),
                                 "efsctuf $RT, $RB", IIC_FPSGeneral, []>;
@@ -299,7 +299,7 @@ def EFSCTUI        : EFXForm_2a<724, (outs gprc:$RT), (ins spe4rc:$RB),
 
 def EFSCTUIZ       : EFXForm_2a<728, (outs gprc:$RT), (ins spe4rc:$RB),
                                 "efsctuiz $RT, $RB", IIC_FPSGeneral,
-                                [(set i32:$RT, (fp_to_uint f32:$RB))]>;
+                                [(set i32:$RT, (any_fp_to_uint f32:$RB))]>;
 
 def EFSDIV         : EFXForm_1<713, (outs spe4rc:$RT), (ins spe4rc:$RA, spe4rc:$RB),
                                "efsdiv $RT, $RA, $RB", IIC_FPDivD,
diff --git a/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll
new file mode 100644
index 0000000000000..ab806a19c158e
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/fp-strict-conv.ll
@@ -0,0 +1,274 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names < %s -mcpu=e500 \
+; RUN:   -mtriple=powerpc-unknown-linux-gnu -mattr=spe | FileCheck %s \
+; RUN:   -check-prefix=SPE
+
+declare i32 @llvm.experimental.constrained.fptosi.i32.f64(double, metadata)
+declare i64 @llvm.experimental.constrained.fptosi.i64.f64(double, metadata)
+declare i64 @llvm.experimental.constrained.fptoui.i64.f64(double, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f64(double, metadata)
+
+declare i32 @llvm.experimental.constrained.fptosi.i32.f32(float, metadata)
+declare i64 @llvm.experimental.constrained.fptosi.i64.f32(float, metadata)
+declare i64 @llvm.experimental.constrained.fptoui.i64.f32(float, metadata)
+declare i32 @llvm.experimental.constrained.fptoui.i32.f32(float, metadata)
+
+declare double @llvm.experimental.constrained.sitofp.f64.i32(i32, metadata, metadata)
+declare double @llvm.experimental.constrained.sitofp.f64.i64(i64, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i32(i32, metadata, metadata)
+declare double @llvm.experimental.constrained.uitofp.f64.i64(i64, metadata, metadata)
+
+declare float @llvm.experimental.constrained.sitofp.f32.i64(i64, metadata, metadata)
+declare float @llvm.experimental.constrained.sitofp.f32.i32(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i32(i32, metadata, metadata)
+declare float @llvm.experimental.constrained.uitofp.f32.i64(i64, metadata, metadata)
+
+define i32 @d_to_i32(double %m) #0 {
+; SPE-LABEL: d_to_i32:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo r3, r3, r4
+; SPE-NEXT:    efdctsiz r3, r3
+; SPE-NEXT:    blr
+entry:
+  %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f64(double %m, metadata !"fpexcept.strict") #0
+  ret i32 %conv
+}
+
+define i64 @d_to_i64(double %m) #0 {
+; SPE-LABEL: d_to_i64:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr r0
+; SPE-NEXT:    stw r0, 4(r1)
+; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    .cfi_offset lr, 4
+; SPE-NEXT:    evmergelo r4, r3, r4
+; SPE-NEXT:    evmergehi r3, r4, r4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    bl __fixdfdi
+; SPE-NEXT:    lwz r0, 20(r1)
+; SPE-NEXT:    addi r1, r1, 16
+; SPE-NEXT:    mtlr r0
+; SPE-NEXT:    blr
+entry:
+  %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f64(double %m, metadata !"fpexcept.strict") #0
+  ret i64 %conv
+}
+
+define i64 @d_to_u64(double %m) #0 {
+; SPE-LABEL: d_to_u64:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr r0
+; SPE-NEXT:    stw r0, 4(r1)
+; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    .cfi_offset lr, 4
+; SPE-NEXT:    evmergelo r4, r3, r4
+; SPE-NEXT:    evmergehi r3, r4, r4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    bl __fixunsdfdi
+; SPE-NEXT:    lwz r0, 20(r1)
+; SPE-NEXT:    addi r1, r1, 16
+; SPE-NEXT:    mtlr r0
+; SPE-NEXT:    blr
+entry:
+  %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f64(double %m, metadata !"fpexcept.strict") #0
+  ret i64 %conv
+}
+
+define zeroext i32 @d_to_u32(double %m) #0 {
+; SPE-LABEL: d_to_u32:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    evmergelo r3, r3, r4
+; SPE-NEXT:    efdctuiz r3, r3
+; SPE-NEXT:    blr
+entry:
+  %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f64(double %m, metadata !"fpexcept.strict") #0
+  ret i32 %conv
+}
+
+define signext i32 @f_to_i32(float %m) #0 {
+; SPE-LABEL: f_to_i32:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    efsctsiz r3, r3
+; SPE-NEXT:    blr
+entry:
+  %conv = call i32 @llvm.experimental.constrained.fptosi.i32.f32(float %m, metadata !"fpexcept.strict") #0
+  ret i32 %conv
+}
+
+define i64 @f_to_i64(float %m) #0 {
+; SPE-LABEL: f_to_i64:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr r0
+; SPE-NEXT:    stw r0, 4(r1)
+; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    .cfi_offset lr, 4
+; SPE-NEXT:    bl __fixsfdi
+; SPE-NEXT:    lwz r0, 20(r1)
+; SPE-NEXT:    addi r1, r1, 16
+; SPE-NEXT:    mtlr r0
+; SPE-NEXT:    blr
+entry:
+  %conv = call i64 @llvm.experimental.constrained.fptosi.i64.f32(float %m, metadata !"fpexcept.strict") #0
+  ret i64 %conv
+}
+
+define i64 @f_to_u64(float %m) #0 {
+; SPE-LABEL: f_to_u64:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr r0
+; SPE-NEXT:    stw r0, 4(r1)
+; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    .cfi_offset lr, 4
+; SPE-NEXT:    bl __fixunssfdi
+; SPE-NEXT:    lwz r0, 20(r1)
+; SPE-NEXT:    addi r1, r1, 16
+; SPE-NEXT:    mtlr r0
+; SPE-NEXT:    blr
+entry:
+  %conv = call i64 @llvm.experimental.constrained.fptoui.i64.f32(float %m, metadata !"fpexcept.strict") #0
+  ret i64 %conv
+}
+
+define zeroext i32 @f_to_u32(float %m) #0 {
+; SPE-LABEL: f_to_u32:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    efsctuiz r3, r3
+; SPE-NEXT:    blr
+entry:
+  %conv = call i32 @llvm.experimental.constrained.fptoui.i32.f32(float %m, metadata !"fpexcept.strict") #0
+  ret i32 %conv
+}
+
+define double @i32_to_d(i32 signext %m) #0 {
+; SPE-LABEL: i32_to_d:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    efdcfsi r4, r3
+; SPE-NEXT:    evmergehi r3, r4, r4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+entry:
+  %conv = tail call double @llvm.experimental.constrained.sitofp.f64.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret double %conv
+}
+
+define double @i64_to_d(i64 %m) #0 {
+; SPE-LABEL: i64_to_d:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr r0
+; SPE-NEXT:    stw r0, 4(r1)
+; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    .cfi_offset lr, 4
+; SPE-NEXT:    bl __floatdidf
+; SPE-NEXT:    evmergelo r4, r3, r4
+; SPE-NEXT:    evmergehi r3, r4, r4
+; SPE-NEXT:    lwz r0, 20(r1)
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    addi r1, r1, 16
+; SPE-NEXT:    mtlr r0
+; SPE-NEXT:    blr
+entry:
+  %conv = tail call double @llvm.experimental.constrained.sitofp.f64.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret double %conv
+}
+
+define double @u32_to_d(i32 zeroext %m) #0 {
+; SPE-LABEL: u32_to_d:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    efdcfui r4, r3
+; SPE-NEXT:    evmergehi r3, r4, r4
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    blr
+entry:
+  %conv = tail call double @llvm.experimental.constrained.uitofp.f64.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret double %conv
+}
+
+define double @u64_to_d(i64 %m) #0 {
+; SPE-LABEL: u64_to_d:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr r0
+; SPE-NEXT:    stw r0, 4(r1)
+; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    .cfi_offset lr, 4
+; SPE-NEXT:    bl __floatundidf
+; SPE-NEXT:    evmergelo r4, r3, r4
+; SPE-NEXT:    evmergehi r3, r4, r4
+; SPE-NEXT:    lwz r0, 20(r1)
+; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
+; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
+; SPE-NEXT:    addi r1, r1, 16
+; SPE-NEXT:    mtlr r0
+; SPE-NEXT:    blr
+entry:
+  %conv = tail call double @llvm.experimental.constrained.uitofp.f64.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret double %conv
+}
+
+define float @i32_to_f(i32 signext %m) #0 {
+; SPE-LABEL: i32_to_f:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    efscfsi r3, r3
+; SPE-NEXT:    blr
+entry:
+  %conv = tail call float @llvm.experimental.constrained.sitofp.f32.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %conv
+}
+
+define float @i64_to_f(i64 %m) #0 {
+; SPE-LABEL: i64_to_f:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr r0
+; SPE-NEXT:    stw r0, 4(r1)
+; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    .cfi_offset lr, 4
+; SPE-NEXT:    bl __floatdisf
+; SPE-NEXT:    lwz r0, 20(r1)
+; SPE-NEXT:    addi r1, r1, 16
+; SPE-NEXT:    mtlr r0
+; SPE-NEXT:    blr
+entry:
+  %conv = tail call float @llvm.experimental.constrained.sitofp.f32.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %conv
+}
+
+define float @u32_to_f(i32 zeroext %m) #0 {
+; SPE-LABEL: u32_to_f:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    efscfui r3, r3
+; SPE-NEXT:    blr
+entry:
+  %conv = tail call float @llvm.experimental.constrained.uitofp.f32.i32(i32 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %conv
+}
+
+define float @u64_to_f(i64 %m) #0 {
+; SPE-LABEL: u64_to_f:
+; SPE:       # %bb.0: # %entry
+; SPE-NEXT:    mflr r0
+; SPE-NEXT:    stw r0, 4(r1)
+; SPE-NEXT:    stwu r1, -16(r1)
+; SPE-NEXT:    .cfi_def_cfa_offset 16
+; SPE-NEXT:    .cfi_offset lr, 4
+; SPE-NEXT:    bl __floatundisf
+; SPE-NEXT:    lwz r0, 20(r1)
+; SPE-NEXT:    addi r1, r1, 16
+; SPE-NEXT:    mtlr r0
+; SPE-NEXT:    blr
+entry:
+  %conv = tail call float @llvm.experimental.constrained.uitofp.f32.i64(i64 %m, metadata !"round.dynamic", metadata !"fpexcept.strict") #0
+  ret float %conv
+}
+
+attributes #0 = { strictfp }

From ac8dc526c4717907bed11b2fc7ab0db5a0f466ba Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail@cn.ibm.com>
Date: Mon, 13 Jul 2020 04:31:04 +0000
Subject: [PATCH 067/771] [PowerPC] Enhance tests for D83276. NFC.

---
 .../PowerPC/stack-clash-prologue-nounwind.ll  | 474 ++++++++++++++++++
 .../CodeGen/PowerPC/stack-clash-prologue.ll   |  51 +-
 2 files changed, 510 insertions(+), 15 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll

diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll
new file mode 100644
index 0000000000000..e595d8a732a5c
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue-nounwind.ll
@@ -0,0 +1,474 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mtriple=powerpc64le-linux-gnu < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-LE %s
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mtriple=powerpc64-linux-gnu < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-BE %s
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mtriple=powerpc-linux-gnu < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-32 %s
+
+; Free probe
+define i8 @f0() #0 nounwind {
+; CHECK-LE-LABEL: f0:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, -64(r1)
+; CHECK-LE-NEXT:    lbz r3, -64(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f0:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, -64(r1)
+; CHECK-BE-NEXT:    lbz r3, -64(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f0:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    stwu r1, -80(r1)
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    addi r1, r1, 80
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 64
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f1() #0 "stack-probe-size"="0" nounwind {
+; CHECK-LE-LABEL: f1:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    li r0, 259
+; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:  .LBB1_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdu r12, -16(r1)
+; CHECK-LE-NEXT:    bdnz .LBB1_1
+; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    addi r1, r1, 4144
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f1:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    li r0, 260
+; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:  .LBB1_1: # %entry
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    stdu r12, -16(r1)
+; CHECK-BE-NEXT:    bdnz .LBB1_1
+; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    addi r1, r1, 4160
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f1:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    li r0, 257
+; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:  .LBB1_1: # %entry
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    bdnz .LBB1_1
+; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    addi r1, r1, 4112
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 4096
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f2() #0 nounwind {
+; CHECK-LE-LABEL: f2:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    stdu r12, -48(r1)
+; CHECK-LE-NEXT:    li r0, 16
+; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:  .LBB2_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdu r12, -4096(r1)
+; CHECK-LE-NEXT:    bdnz .LBB2_1
+; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f2:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    stdu r12, -64(r1)
+; CHECK-BE-NEXT:    li r0, 16
+; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:  .LBB2_1: # %entry
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    stdu r12, -4096(r1)
+; CHECK-BE-NEXT:    bdnz .LBB2_1
+; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f2:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    li r0, 16
+; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:  .LBB2_1: # %entry
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stwu r12, -4096(r1)
+; CHECK-32-NEXT:    bdnz .LBB2_1
+; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 65536
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
+; CHECK-LE-LABEL: f3:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    stdu r12, -48(r1)
+; CHECK-LE-NEXT:    stdu r12, -32768(r1)
+; CHECK-LE-NEXT:    stdu r12, -32768(r1)
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f3:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    stdu r12, -64(r1)
+; CHECK-BE-NEXT:    stdu r12, -32768(r1)
+; CHECK-BE-NEXT:    stdu r12, -32768(r1)
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f3:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    stwu r12, -32768(r1)
+; CHECK-32-NEXT:    stwu r12, -32768(r1)
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 65536
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+; Same as f2, but without protection.
+define i8 @f4() nounwind {
+; CHECK-LE-LABEL: f4:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    lis r0, -2
+; CHECK-LE-NEXT:    ori r0, r0, 65488
+; CHECK-LE-NEXT:    stdux r1, r1, r0
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f4:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lis r0, -2
+; CHECK-BE-NEXT:    ori r0, r0, 65472
+; CHECK-BE-NEXT:    stdux r1, r1, r0
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f4:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    lis r0, -2
+; CHECK-32-NEXT:    ori r0, r0, 65520
+; CHECK-32-NEXT:    stwux r1, r1, r0
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 65536
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
+; CHECK-LE-LABEL: f5:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    stdu r12, -48(r1)
+; CHECK-LE-NEXT:    li r0, 16
+; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:    lis r0, -1
+; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:  .LBB5_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    bdnz .LBB5_1
+; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f5:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    stdu r12, -64(r1)
+; CHECK-BE-NEXT:    li r0, 16
+; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:    lis r0, -1
+; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:  .LBB5_1: # %entry
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    stdux r12, r1, r0
+; CHECK-BE-NEXT:    bdnz .LBB5_1
+; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f5:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    li r0, 16
+; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:    lis r0, -1
+; CHECK-32-NEXT:    nop
+; CHECK-32-NEXT:  .LBB5_1: # %entry
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stwux r12, r1, r0
+; CHECK-32-NEXT:    bdnz .LBB5_1
+; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 1048576
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f6() #0 nounwind {
+; CHECK-LE-LABEL: f6:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    stdu r12, -48(r1)
+; CHECK-LE-NEXT:    lis r0, 4
+; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:  .LBB6_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdu r12, -4096(r1)
+; CHECK-LE-NEXT:    bdnz .LBB6_1
+; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 48(r1)
+; CHECK-LE-NEXT:    lbz r3, 48(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f6:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    stdu r12, -64(r1)
+; CHECK-BE-NEXT:    lis r0, 4
+; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:  .LBB6_1: # %entry
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    stdu r12, -4096(r1)
+; CHECK-BE-NEXT:    bdnz .LBB6_1
+; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 64(r1)
+; CHECK-BE-NEXT:    lbz r3, 64(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f6:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    stwu r12, -16(r1)
+; CHECK-32-NEXT:    lis r0, 4
+; CHECK-32-NEXT:    nop
+; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:  .LBB6_1: # %entry
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stwu r12, -4096(r1)
+; CHECK-32-NEXT:    bdnz .LBB6_1
+; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 16(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 16(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 1073741824
+  %b = getelementptr inbounds i8, i8* %a, i64 63
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
+; CHECK-LE-LABEL: f7:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    lis r0, -1
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    ori r0, r0, 13776
+; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    li r0, 15258
+; CHECK-LE-NEXT:    mtctr r0
+; CHECK-LE-NEXT:    lis r0, -1
+; CHECK-LE-NEXT:    nop
+; CHECK-LE-NEXT:  .LBB7_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    bdnz .LBB7_1
+; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    li r3, 3
+; CHECK-LE-NEXT:    stb r3, 41(r1)
+; CHECK-LE-NEXT:    lbz r3, 41(r1)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    blr
+;
+; CHECK-BE-LABEL: f7:
+; CHECK-BE:       # %bb.0: # %entry
+; CHECK-BE-NEXT:    lis r0, -1
+; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    ori r0, r0, 13760
+; CHECK-BE-NEXT:    stdux r12, r1, r0
+; CHECK-BE-NEXT:    li r0, 15258
+; CHECK-BE-NEXT:    mtctr r0
+; CHECK-BE-NEXT:    lis r0, -1
+; CHECK-BE-NEXT:    nop
+; CHECK-BE-NEXT:  .LBB7_1: # %entry
+; CHECK-BE-NEXT:    #
+; CHECK-BE-NEXT:    stdux r12, r1, r0
+; CHECK-BE-NEXT:    bdnz .LBB7_1
+; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    li r3, 3
+; CHECK-BE-NEXT:    stb r3, 57(r1)
+; CHECK-BE-NEXT:    lbz r3, 57(r1)
+; CHECK-BE-NEXT:    ld r1, 0(r1)
+; CHECK-BE-NEXT:    blr
+;
+; CHECK-32-LABEL: f7:
+; CHECK-32:       # %bb.0: # %entry
+; CHECK-32-NEXT:    lis r0, -1
+; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    ori r0, r0, 13808
+; CHECK-32-NEXT:    stwux r12, r1, r0
+; CHECK-32-NEXT:    li r0, 15258
+; CHECK-32-NEXT:    mtctr r0
+; CHECK-32-NEXT:    lis r0, -1
+; CHECK-32-NEXT:    nop
+; CHECK-32-NEXT:  .LBB7_1: # %entry
+; CHECK-32-NEXT:    #
+; CHECK-32-NEXT:    stwux r12, r1, r0
+; CHECK-32-NEXT:    bdnz .LBB7_1
+; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    sub r0, r1, r12
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    stb r3, 9(r1)
+; CHECK-32-NEXT:    mr r0, r31
+; CHECK-32-NEXT:    lbz r3, 9(r1)
+; CHECK-32-NEXT:    lwz r31, 0(r1)
+; CHECK-32-NEXT:    mr r1, r31
+; CHECK-32-NEXT:    mr r31, r0
+; CHECK-32-NEXT:    blr
+entry:
+  %a = alloca i8, i64 1000000007
+  %b = getelementptr inbounds i8, i8* %a, i64 101
+  store volatile i8 3, i8* %a
+  %c = load volatile i8, i8* %a
+  ret i8 %c
+}
+
+attributes #0 = { "probe-stack"="inline-asm" }
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index e595d8a732a5c..eb8e05eef519f 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -41,7 +41,7 @@ entry:
   ret i8 %c
 }
 
-define i8 @f1() #0 "stack-probe-size"="0" nounwind {
+define i8 @f1() #0 "stack-probe-size"="0" {
 ; CHECK-LE-LABEL: f1:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mr r12, r1
@@ -52,6 +52,7 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind {
 ; CHECK-LE-NEXT:    stdu r12, -16(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB1_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    .cfi_def_cfa_offset 4144
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
 ; CHECK-LE-NEXT:    lbz r3, 48(r1)
@@ -68,6 +69,7 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind {
 ; CHECK-BE-NEXT:    stdu r12, -16(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB1_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 4160
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
 ; CHECK-BE-NEXT:    lbz r3, 64(r1)
@@ -84,10 +86,11 @@ define i8 @f1() #0 "stack-probe-size"="0" nounwind {
 ; CHECK-32-NEXT:    stwu r12, -16(r1)
 ; CHECK-32-NEXT:    bdnz .LBB1_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
-; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r12
-; CHECK-32-NEXT:    stb r3, 16(r1)
 ; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    .cfi_def_cfa_offset 4112
+; CHECK-32-NEXT:    li r3, 3
+; CHECK-32-NEXT:    stb r3, 16(r1)
 ; CHECK-32-NEXT:    lbz r3, 16(r1)
 ; CHECK-32-NEXT:    addi r1, r1, 4112
 ; CHECK-32-NEXT:    blr
@@ -99,7 +102,7 @@ entry:
   ret i8 %c
 }
 
-define i8 @f2() #0 nounwind {
+define i8 @f2() #0 {
 ; CHECK-LE-LABEL: f2:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mr r12, r1
@@ -111,6 +114,7 @@ define i8 @f2() #0 nounwind {
 ; CHECK-LE-NEXT:    stdu r12, -4096(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB2_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    .cfi_def_cfa_offset 65584
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
 ; CHECK-LE-NEXT:    lbz r3, 48(r1)
@@ -128,6 +132,7 @@ define i8 @f2() #0 nounwind {
 ; CHECK-BE-NEXT:    stdu r12, -4096(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB2_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 65600
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
 ; CHECK-BE-NEXT:    lbz r3, 64(r1)
@@ -146,8 +151,9 @@ define i8 @f2() #0 nounwind {
 ; CHECK-32-NEXT:    bdnz .LBB2_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
 ; CHECK-32-NEXT:    sub r0, r1, r12
-; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    .cfi_def_cfa_offset 65552
+; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    stb r3, 16(r1)
 ; CHECK-32-NEXT:    mr r0, r31
 ; CHECK-32-NEXT:    lbz r3, 16(r1)
@@ -163,13 +169,14 @@ entry:
   ret i8 %c
 }
 
-define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
+define i8 @f3() #0 "stack-probe-size"="32768" {
 ; CHECK-LE-LABEL: f3:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mr r12, r1
 ; CHECK-LE-NEXT:    stdu r12, -48(r1)
 ; CHECK-LE-NEXT:    stdu r12, -32768(r1)
 ; CHECK-LE-NEXT:    stdu r12, -32768(r1)
+; CHECK-LE-NEXT:    .cfi_def_cfa_offset 65584
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
 ; CHECK-LE-NEXT:    lbz r3, 48(r1)
@@ -182,6 +189,7 @@ define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
 ; CHECK-BE-NEXT:    stdu r12, -64(r1)
 ; CHECK-BE-NEXT:    stdu r12, -32768(r1)
 ; CHECK-BE-NEXT:    stdu r12, -32768(r1)
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 65600
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
 ; CHECK-BE-NEXT:    lbz r3, 64(r1)
@@ -195,8 +203,9 @@ define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
 ; CHECK-32-NEXT:    stwu r12, -32768(r1)
 ; CHECK-32-NEXT:    stwu r12, -32768(r1)
 ; CHECK-32-NEXT:    sub r0, r1, r12
-; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    .cfi_def_cfa_offset 65552
+; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    stb r3, 16(r1)
 ; CHECK-32-NEXT:    mr r0, r31
 ; CHECK-32-NEXT:    lbz r3, 16(r1)
@@ -213,12 +222,13 @@ entry:
 }
 
 ; Same as f2, but without protection.
-define i8 @f4() nounwind {
+define i8 @f4() {
 ; CHECK-LE-LABEL: f4:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    lis r0, -2
 ; CHECK-LE-NEXT:    ori r0, r0, 65488
 ; CHECK-LE-NEXT:    stdux r1, r1, r0
+; CHECK-LE-NEXT:    .cfi_def_cfa_offset 65584
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
 ; CHECK-LE-NEXT:    lbz r3, 48(r1)
@@ -230,6 +240,7 @@ define i8 @f4() nounwind {
 ; CHECK-BE-NEXT:    lis r0, -2
 ; CHECK-BE-NEXT:    ori r0, r0, 65472
 ; CHECK-BE-NEXT:    stdux r1, r1, r0
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 65600
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
 ; CHECK-BE-NEXT:    lbz r3, 64(r1)
@@ -241,8 +252,9 @@ define i8 @f4() nounwind {
 ; CHECK-32-NEXT:    lis r0, -2
 ; CHECK-32-NEXT:    ori r0, r0, 65520
 ; CHECK-32-NEXT:    stwux r1, r1, r0
-; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    .cfi_def_cfa_offset 65552
+; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    stb r3, 16(r1)
 ; CHECK-32-NEXT:    mr r0, r31
 ; CHECK-32-NEXT:    lbz r3, 16(r1)
@@ -258,7 +270,7 @@ entry:
   ret i8 %c
 }
 
-define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
+define i8 @f5() #0 "stack-probe-size"="65536" {
 ; CHECK-LE-LABEL: f5:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mr r12, r1
@@ -272,6 +284,7 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
 ; CHECK-LE-NEXT:    stdux r12, r1, r0
 ; CHECK-LE-NEXT:    bdnz .LBB5_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    .cfi_def_cfa_offset 1048624
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
 ; CHECK-LE-NEXT:    lbz r3, 48(r1)
@@ -291,6 +304,7 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
 ; CHECK-BE-NEXT:    stdux r12, r1, r0
 ; CHECK-BE-NEXT:    bdnz .LBB5_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 1048640
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
 ; CHECK-BE-NEXT:    lbz r3, 64(r1)
@@ -311,8 +325,9 @@ define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
 ; CHECK-32-NEXT:    bdnz .LBB5_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
 ; CHECK-32-NEXT:    sub r0, r1, r12
-; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    .cfi_def_cfa_offset 1048592
+; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    stb r3, 16(r1)
 ; CHECK-32-NEXT:    mr r0, r31
 ; CHECK-32-NEXT:    lbz r3, 16(r1)
@@ -328,7 +343,7 @@ entry:
   ret i8 %c
 }
 
-define i8 @f6() #0 nounwind {
+define i8 @f6() #0 {
 ; CHECK-LE-LABEL: f6:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mr r12, r1
@@ -341,6 +356,7 @@ define i8 @f6() #0 nounwind {
 ; CHECK-LE-NEXT:    stdu r12, -4096(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB6_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    .cfi_def_cfa_offset 1073741872
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
 ; CHECK-LE-NEXT:    lbz r3, 48(r1)
@@ -359,6 +375,7 @@ define i8 @f6() #0 nounwind {
 ; CHECK-BE-NEXT:    stdu r12, -4096(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB6_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 1073741888
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
 ; CHECK-BE-NEXT:    lbz r3, 64(r1)
@@ -378,8 +395,9 @@ define i8 @f6() #0 nounwind {
 ; CHECK-32-NEXT:    bdnz .LBB6_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
 ; CHECK-32-NEXT:    sub r0, r1, r12
-; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    .cfi_def_cfa_offset 1073741840
+; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    stb r3, 16(r1)
 ; CHECK-32-NEXT:    mr r0, r31
 ; CHECK-32-NEXT:    lbz r3, 16(r1)
@@ -395,7 +413,7 @@ entry:
   ret i8 %c
 }
 
-define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
+define i8 @f7() #0 "stack-probe-size"="65536" {
 ; CHECK-LE-LABEL: f7:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    lis r0, -1
@@ -411,6 +429,7 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
 ; CHECK-LE-NEXT:    stdux r12, r1, r0
 ; CHECK-LE-NEXT:    bdnz .LBB7_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    .cfi_def_cfa_offset 1000000048
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 41(r1)
 ; CHECK-LE-NEXT:    lbz r3, 41(r1)
@@ -432,6 +451,7 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
 ; CHECK-BE-NEXT:    stdux r12, r1, r0
 ; CHECK-BE-NEXT:    bdnz .LBB7_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    .cfi_def_cfa_offset 1000000064
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 57(r1)
 ; CHECK-BE-NEXT:    lbz r3, 57(r1)
@@ -454,8 +474,9 @@ define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
 ; CHECK-32-NEXT:    bdnz .LBB7_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
 ; CHECK-32-NEXT:    sub r0, r1, r12
-; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    sub r0, r1, r0
+; CHECK-32-NEXT:    .cfi_def_cfa_offset 1000000016
+; CHECK-32-NEXT:    li r3, 3
 ; CHECK-32-NEXT:    stb r3, 9(r1)
 ; CHECK-32-NEXT:    mr r0, r31
 ; CHECK-32-NEXT:    lbz r3, 9(r1)

From e808cab824488af137b62902e65dec3827b83b46 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Mon, 13 Jul 2020 11:14:59 +0700
Subject: [PATCH 068/771] [InstCombine] Improve select -> phi canonicalization:
 consider more blocks

We can try to replace select with a Phi not in its parent block alone,
but also in blocks of its arguments. We benefit from it when select's
argument is a Phi.

Differential Revision: https://reviews.llvm.org/D83284
Reviewed By: nikic
---
 .../InstCombine/InstCombineSelect.cpp         |  21 ++-
 llvm/test/Transforms/InstCombine/select.ll    | 169 +++++++++++++++++-
 2 files changed, 184 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 233fb3878ba72..17124f717af79 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2443,11 +2443,11 @@ Instruction *InstCombiner::foldVectorSelect(SelectInst &Sel) {
   return nullptr;
 }
 
-static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,
-                                    InstCombiner::BuilderTy &Builder) {
+static Instruction *foldSelectToPhiImpl(SelectInst &Sel, BasicBlock *BB,
+                                        const DominatorTree &DT,
+                                        InstCombiner::BuilderTy &Builder) {
   // Find the block's immediate dominator that ends with a conditional branch
   // that matches select's condition (maybe inverted).
-  BasicBlock *BB = Sel.getParent();
   auto *IDomNode = DT[BB]->getIDom();
   if (!IDomNode)
     return nullptr;
@@ -2500,6 +2500,21 @@ static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,
   return PN;
 }
 
+static Instruction *foldSelectToPhi(SelectInst &Sel, const DominatorTree &DT,
+                                    InstCombiner::BuilderTy &Builder) {
+  // Try to replace this select with Phi in one of these blocks.
+  SmallSetVector<BasicBlock *, 4> CandidateBlocks;
+  CandidateBlocks.insert(Sel.getParent());
+  for (Value *V : Sel.operands())
+    if (auto *I = dyn_cast<Instruction>(V))
+      CandidateBlocks.insert(I->getParent());
+
+  for (BasicBlock *BB : CandidateBlocks)
+    if (auto *PN = foldSelectToPhiImpl(Sel, BB, DT, Builder))
+      return PN;
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
   Value *CondVal = SI.getCondition();
   Value *TrueVal = SI.getTrueValue();
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index f990a58f984ce..08e547a6ea0ad 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -2250,11 +2250,40 @@ define i32 @test_select_into_phi_not_idom(i1 %cond, i32 %A, i32 %B)  {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[A:%.*]], [[IF_TRUE]] ], [ [[B:%.*]], [[IF_FALSE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], i32 [[PHI]], i32 [[A]]
-; CHECK-NEXT:    ret i32 [[SEL]]
+; CHECK-NEXT:    ret i32 [[A:%.*]]
+;
+entry:
+  br i1 %cond, label %if.true, label %if.false
+
+if.true:
+  br label %merge
+
+if.false:
+  br label %merge
+
+merge:
+  %phi = phi i32 [%A, %if.true], [%B, %if.false]
+  br label %exit
+
+exit:
+  %sel = select i1 %cond, i32 %phi, i32 %A
+  ret i32 %sel
+}
+
+define i32 @test_select_into_phi_not_idom_2(i1 %cond, i32 %A, i32 %B)  {
+; CHECK-LABEL: @test_select_into_phi_not_idom_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK:       if.true:
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       if.false:
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[B:%.*]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -2269,11 +2298,145 @@ merge:
   %phi = phi i32 [%A, %if.true], [%B, %if.false]
   br label %exit
 
+exit:
+  %sel = select i1 %cond, i32 %B, i32 %phi
+  ret i32 %sel
+}
+
+define i32 @test_select_into_phi_not_idom_inverted(i1 %cond, i32 %A, i32 %B)  {
+; CHECK-LABEL: @test_select_into_phi_not_idom_inverted(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_FALSE:%.*]], label [[IF_TRUE:%.*]]
+; CHECK:       if.true:
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       if.false:
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[B:%.*]], [[IF_FALSE]] ], [ [[A:%.*]], [[IF_TRUE]] ]
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+entry:
+  %inverted = xor i1 %cond, 1
+  br i1 %inverted, label %if.true, label %if.false
+
+if.true:
+  br label %merge
+
+if.false:
+  br label %merge
+
+merge:
+  %phi = phi i32 [%A, %if.true], [%B, %if.false]
+  br label %exit
+
+exit:
+  %sel = select i1 %cond, i32 %phi, i32 %A
+  ret i32 %sel
+}
+
+define i32 @test_select_into_phi_not_idom_inverted_2(i1 %cond, i32 %A, i32 %B)  {
+; CHECK-LABEL: @test_select_into_phi_not_idom_inverted_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_FALSE:%.*]], label [[IF_TRUE:%.*]]
+; CHECK:       if.true:
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       if.false:
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[B:%.*]], [[IF_FALSE]] ], [ [[A:%.*]], [[IF_TRUE]] ]
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+entry:
+  %inverted = xor i1 %cond, 1
+  br i1 %inverted, label %if.true, label %if.false
+
+if.true:
+  br label %merge
+
+if.false:
+  br label %merge
+
+merge:
+  %phi = phi i32 [%A, %if.true], [%B, %if.false]
+  br label %exit
+
+exit:
+  %sel = select i1 %cond, i32 %B, i32 %phi
+  ret i32 %sel
+}
+
+define i32 @test_select_into_phi_not_idom_no_dom_input_1(i1 %cond, i32 %A, i32 %B, i32 *%p)  {
+; CHECK-LABEL: @test_select_into_phi_not_idom_no_dom_input_1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK:       if.true:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       if.false:
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[A:%.*]], [[IF_FALSE]] ], [ [[C]], [[IF_TRUE]] ]
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+entry:
+  br i1 %cond, label %if.true, label %if.false
+
+if.true:
+  %C = load i32, i32* %p
+  br label %merge
+
+if.false:
+  br label %merge
+
+merge:
+  %phi = phi i32 [%C, %if.true], [%B, %if.false]
+  br label %exit
+
 exit:
   %sel = select i1 %cond, i32 %phi, i32 %A
   ret i32 %sel
 }
 
+define i32 @test_select_into_phi_not_idom_no_dom_input_2(i1 %cond, i32 %A, i32 %B, i32 *%p)  {
+; CHECK-LABEL: @test_select_into_phi_not_idom_no_dom_input_2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[COND:%.*]], label [[IF_TRUE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK:       if.true:
+; CHECK-NEXT:    br label [[MERGE:%.*]]
+; CHECK:       if.false:
+; CHECK-NEXT:    [[C:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[SEL:%.*]] = phi i32 [ [[C]], [[IF_FALSE]] ], [ [[B:%.*]], [[IF_TRUE]] ]
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[SEL]]
+;
+entry:
+  br i1 %cond, label %if.true, label %if.false
+
+if.true:
+  br label %merge
+
+if.false:
+  %C = load i32, i32* %p
+  br label %merge
+
+merge:
+  %phi = phi i32 [%A, %if.true], [%C, %if.false]
+  br label %exit
+
+exit:
+  %sel = select i1 %cond, i32 %B, i32 %phi
+  ret i32 %sel
+}
+
 ; Negative tests to ensure we don't remove selects with undef true/false values.
 ; See https://bugs.llvm.org/show_bug.cgi?id=31633
 ; https://lists.llvm.org/pipermail/llvm-dev/2016-October/106182.html

From 07c4c7e7959b7fd09830bbdf4dcd533e98aa45ab Mon Sep 17 00:00:00 2001
From: Aleksandr Platonov <platonov.aleksandr@huawei.com>
Date: Mon, 13 Jul 2020 09:04:29 +0200
Subject: [PATCH 069/771] [clangd] Fix tests build for GCC5
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Summary:
Build log:
```
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp: In member function ‘virtual void clang::clangd::{anonymous}::PreamblePatchTest_Define_Test::TestBody()’:
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:267:3: error: could not convert ‘(const char*)"\012        #define BAR\012        [[BAR]]"’ from ‘const char*’ to ‘llvm::StringLitera ’
   };
   ^
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:267:3: error: could not convert ‘(const char*)"#line 0 \".*main.cpp\"\012#line 2\012#define         BAR\012"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:267:3: error: could not convert ‘(const char*)"\012        #define BAR \\\012\012        [[BAR]]"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:267:3: error: could not convert ‘(const char*)"#line 0 \".*main.cpp\"\012#line 2\012#define         BAR\012"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:267:3: error: could not convert ‘(const char*)"\012        #define \\\012                BAR\012        [[BAR]]"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:267:3: error: could not convert ‘(const char*)"#line 0 \".*main.cpp\"\012#line 3\012#define         BAR\012"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp: In member function ‘virtual void clang::clangd::{anonymous}::PreamblePatchTest_LocateMacroAtWorks_Test::TestBody()’:
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)""’ from ‘const char*’ to ‘llvm::StringLiteral’
   };
   ^
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)"\012            #define $def^FOO\012            $use^FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)""’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)"\012            #define $def^FOO\012            #undef $use^FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)""’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)"\012            #define $def^FOO\012            #undef FOO\012            $use^FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)""’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)"\012            #define \\\012              $def^FOO\012            $use^FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)""’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)"\012            #\\\012              define /* FOO */\\\012              /* FOO */ $def^FOO\012            $use^FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)"#define FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:357:3: error: could not convert ‘(const char*)"\012            #define BAR\012            #define $def^FOO\012            $use^FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp: In member function ‘virtual void clang::clangd::{anonymous}::PreamblePatchTest_RefsToMacros_Test::TestBody()’:
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:445:3: error: could not convert ‘(const char*)""’ from ‘const char*’ to ‘llvm::StringLiteral’
   };
   ^
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:445:3: error: could not convert ‘(const char*)"\012            #define ^FOO\012            ^[[FOO]]"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:445:3: error: could not convert ‘(const char*)"#define FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:445:3: error: could not convert ‘(const char*)"\012            #define BAR\012            #define ^FOO\012            ^[[FOO]]"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:445:3: error: could not convert ‘(const char*)""’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:445:3: error: could not convert ‘(const char*)"\012            #define ^FOO\012            #undef ^FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp: In member function ‘virtual void clang::clangd::{anonymous}::PreamblePatch_ModifiedBounds_Test::TestBody()’:
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:512:3: error: could not convert ‘(const char*)""’ from ‘const char*’ to ‘llvm::StringLiteral’
   };
   ^
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:512:3: error: could not convert ‘(const char*)"\012            #define FOO\012            FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:512:3: error: could not convert ‘(const char*)"#define FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:512:3: error: could not convert ‘(const char*)"#define BAR"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:512:3: error: could not convert ‘(const char*)"\012            #define FOO\012            #undef FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
llvm-project/clang-tools-extra/clangd/unittests/PreambleTests.cpp:512:3: error: could not convert ‘(const char*)"#define FOO"’ from ‘const char*’ to ‘llvm::StringLiteral’
```

Patch by @ArcsinX !

Reviewers: kadircet, sammccall

Reviewed By: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83548
---
 .../clangd/unittests/PreambleTests.cpp         | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clangd/unittests/PreambleTests.cpp b/clang-tools-extra/clangd/unittests/PreambleTests.cpp
index 8c9669a945dd7..8482a1cc8237c 100644
--- a/clang-tools-extra/clangd/unittests/PreambleTests.cpp
+++ b/clang-tools-extra/clangd/unittests/PreambleTests.cpp
@@ -230,8 +230,8 @@ std::string getPreamblePatch(llvm::StringRef Baseline,
 TEST(PreamblePatchTest, Define) {
   // BAR should be defined while parsing the AST.
   struct {
-    llvm::StringLiteral Contents;
-    llvm::StringLiteral ExpectedPatch;
+    const char *const Contents;
+    const char *const ExpectedPatch;
   } Cases[] = {
       {
           R"cpp(
@@ -270,7 +270,7 @@ TEST(PreamblePatchTest, Define) {
     SCOPED_TRACE(Case.Contents);
     Annotations Modified(Case.Contents);
     EXPECT_THAT(getPreamblePatch("", Modified.code()),
-                MatchesRegex(Case.ExpectedPatch.str()));
+                MatchesRegex(Case.ExpectedPatch));
 
     auto AST = createPatchedAST("", Modified.code());
     ASSERT_TRUE(AST);
@@ -304,8 +304,8 @@ TEST(PreamblePatchTest, OrderingPreserved) {
 
 TEST(PreamblePatchTest, LocateMacroAtWorks) {
   struct {
-    llvm::StringLiteral Baseline;
-    llvm::StringLiteral Modified;
+    const char *const Baseline;
+    const char *const Modified;
   } Cases[] = {
       // Addition of new directive
       {
@@ -417,8 +417,8 @@ TEST(PreamblePatchTest, LocateMacroAtDeletion) {
 
 TEST(PreamblePatchTest, RefsToMacros) {
   struct {
-    llvm::StringLiteral Baseline;
-    llvm::StringLiteral Modified;
+    const char *const Baseline;
+    const char *const Modified;
   } Cases[] = {
       // Newly added
       {
@@ -491,8 +491,8 @@ TEST(TranslatePreamblePatchLocation, Simple) {
 
 TEST(PreamblePatch, ModifiedBounds) {
   struct {
-    llvm::StringLiteral Baseline;
-    llvm::StringLiteral Modified;
+    const char *const Baseline;
+    const char *const Modified;
   } Cases[] = {
       // Size increased
       {

From 9df6afbb5c99c103ef27614a8987934ef82f110f Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Mon, 13 Jul 2020 08:28:13 +0000
Subject: [PATCH 070/771] [MLIR][Shape] Lower `shape.any`

Lower `shape.any` to its first operand.

Differential Revision: https://reviews.llvm.org/D83123
---
 .../ShapeToStandard/ShapeToStandard.cpp       | 17 ++++++++++++++
 .../ShapeToStandardPatterns.td                |  1 -
 .../ShapeToStandard/shape-to-standard.mlir    | 23 +++++++++++++++++++
 3 files changed, 40 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
index 7ebcb397349d4..f82019989e705 100644
--- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
+++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandard.cpp
@@ -23,6 +23,22 @@ namespace {
 #include "ShapeToStandardPatterns.inc"
 
 /// Conversion patterns.
+class AnyOpConversion : public OpConversionPattern<AnyOp> {
+public:
+  using OpConversionPattern<AnyOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(AnyOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override {
+    AnyOp::Adaptor transformed(operands);
+
+    // Replace `any` with its first operand.
+    // Any operand would be a valid substitution.
+    rewriter.replaceOp(op, {transformed.inputs().front()});
+    return success();
+  }
+};
+
 template <typename SrcOpTy, typename DstOpTy>
 class BinaryOpConversion : public OpConversionPattern<SrcOpTy> {
 public:
@@ -181,6 +197,7 @@ void mlir::populateShapeToStandardConversionPatterns(
   populateWithGenerated(ctx, &patterns);
   // clang-format off
   patterns.insert<
+      AnyOpConversion,
       BinaryOpConversion<AddOp, AddIOp>,
       BinaryOpConversion<MulOp, MulIOp>,
       ConstSizeOpConverter,
diff --git a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandardPatterns.td b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandardPatterns.td
index a1335487f5ab3..30bed6d7fb65e 100644
--- a/mlir/lib/Conversion/ShapeToStandard/ShapeToStandardPatterns.td
+++ b/mlir/lib/Conversion/ShapeToStandard/ShapeToStandardPatterns.td
@@ -18,4 +18,3 @@ def IndexToSizeOpConversion : Pat<
 def SizeToIndexOpConversion : Pat<
     (Shape_SizeToIndexOp $arg),
     (replaceWithValue $arg)>;
-
diff --git a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
index 28ef190d09eba..22206637adbf4 100644
--- a/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
+++ b/mlir/test/Conversion/ShapeToStandard/shape-to-standard.mlir
@@ -158,3 +158,26 @@ func @get_extent_from_extent_tensor(%extents : tensor<?xindex>,
   return %result : !shape.size
 }
 
+// -----
+
+// Lower `any` to its first operand.
+// CHECK-LABEL: @any_of_three
+// CHECK-SAME:  (%[[A:.*]]: tensor<?xindex>, %[[B:.*]]: tensor<?xindex>, %[[C:.*]]: tensor<?xindex>) -> tensor<?xindex>
+func @any_of_three(%a : !shape.shape, %b : !shape.shape, %c : !shape.shape)
+    -> !shape.shape {
+  // CHECK: return %[[A]] : tensor<?xindex>
+  %result = shape.any %a, %b, %c
+  return %result : !shape.shape
+}
+
+// -----
+
+// Lower `any` to its first operand.
+// CHECK-LABEL: @any_of_one
+// CHECK-SAME:  (%[[A:.*]]: tensor<?xindex>) -> tensor<?xindex>
+func @any_of_one(%a : !shape.shape) -> !shape.shape {
+  // CHECK: return %[[A]] : tensor<?xindex>
+  %result = shape.any %a
+  return %result : !shape.shape
+}
+

From fd85b40aee4aa4cfcecaf2bfaf86e1de4c78ab0c Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@amd.com>
Date: Mon, 13 Jul 2020 10:52:33 +0200
Subject: [PATCH 071/771] [GlobalISel][InlineAsm] Fix buildCopy for inputs

Check that input size matches size of destination reg class.
Attempt to extend input size when needed.

Differential Revision: https://reviews.llvm.org/D83384
---
 .../CodeGen/GlobalISel/InlineAsmLowering.cpp  | 39 ++++++++++++++++++-
 .../GlobalISel/irtranslator-inline-asm.ll     | 32 +++++++++++++++
 2 files changed, 69 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
index 241d5bace248b..2ce1d414e7550 100644
--- a/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/InlineAsmLowering.cpp
@@ -237,6 +237,39 @@ static unsigned getNumOpRegs(const MachineInstr &I, unsigned OpIdx) {
   return InlineAsm::getNumOperandRegisters(Flag);
 }
 
+static bool buildAnyextOrCopy(Register Dst, Register Src,
+                              MachineIRBuilder &MIRBuilder) {
+  const TargetRegisterInfo *TRI =
+      MIRBuilder.getMF().getSubtarget().getRegisterInfo();
+  MachineRegisterInfo *MRI = MIRBuilder.getMRI();
+
+  auto SrcTy = MRI->getType(Src);
+  if (!SrcTy.isValid()) {
+    LLVM_DEBUG(dbgs() << "Source type for copy is not valid\n");
+    return false;
+  }
+  unsigned SrcSize = TRI->getRegSizeInBits(Src, *MRI);
+  unsigned DstSize = TRI->getRegSizeInBits(Dst, *MRI);
+
+  if (DstSize < SrcSize) {
+    LLVM_DEBUG(dbgs() << "Input can't fit in destination reg class\n");
+    return false;
+  }
+
+  // Attempt to anyext small scalar sources.
+  if (DstSize > SrcSize) {
+    if (!SrcTy.isScalar()) {
+      LLVM_DEBUG(dbgs() << "Can't extend non-scalar input to size of"
+                           "destination register class\n");
+      return false;
+    }
+    Src = MIRBuilder.buildAnyExt(LLT::scalar(DstSize), Src).getReg(0);
+  }
+
+  MIRBuilder.buildCopy(Dst, Src);
+  return true;
+}
+
 bool InlineAsmLowering::lowerInlineAsm(
     MachineIRBuilder &MIRBuilder, const CallBase &Call,
     std::function<ArrayRef<Register>(const Value &Val)> GetOrCreateVRegs)
@@ -427,7 +460,8 @@ bool InlineAsmLowering::lowerInlineAsm(
         ArrayRef<Register> SrcRegs = GetOrCreateVRegs(*OpInfo.CallOperandVal);
         assert(SrcRegs.size() == 1 && "Single register is expected here");
         Register Tmp = MRI->createVirtualRegister(RC);
-        MIRBuilder.buildCopy(Tmp, SrcRegs[0]);
+        if (!buildAnyextOrCopy(Tmp, SrcRegs[0], MIRBuilder))
+          return false;
 
         // Add Flag and input register operand (Tmp) to Inst. Tie Tmp to Def.
         unsigned UseFlag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, 1);
@@ -525,7 +559,8 @@ bool InlineAsmLowering::lowerInlineAsm(
 
       unsigned Flag = InlineAsm::getFlagWord(InlineAsm::Kind_RegUse, NumRegs);
       Inst.addImm(Flag);
-      MIRBuilder.buildCopy(OpInfo.Regs[0], SourceRegs[0]);
+      if (!buildAnyextOrCopy(OpInfo.Regs[0], SourceRegs[0], MIRBuilder))
+        return false;
       Inst.addReg(OpInfo.Regs[0]);
       break;
     }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll
index f1be1011fa865..f8b23ef84721e 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/irtranslator-inline-asm.ll
@@ -211,3 +211,35 @@ define i32 @test_memory_constraint(i32* %a) nounwind {
   %1 = tail call i32 asm "ldr $0, $1", "=r,*m"(i32* %a)
   ret i32 %1
 }
+
+define i16 @test_anyext_input() {
+  ; CHECK-LABEL: name: test_anyext_input
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32)
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 655370 /* regdef:GPR32common */, def %0, 9 /* reguse */, [[COPY]]
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY %0
+  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
+  ; CHECK:   $w0 = COPY [[ANYEXT1]](s32)
+  ; CHECK:   RET_ReallyLR implicit $w0
+  %1 = call i16 asm sideeffect "", "=r,r"(i16 1)
+  ret i16 %1
+}
+
+define i16 @test_anyext_input_with_matching_constraint() {
+  ; CHECK-LABEL: name: test_anyext_input_with_matching_constraint
+  ; CHECK: bb.1 (%ir-block.0):
+  ; CHECK:   [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 1
+  ; CHECK:   [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[C]](s16)
+  ; CHECK:   [[COPY:%[0-9]+]]:gpr32common = COPY [[ANYEXT]](s32)
+  ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 655370 /* regdef:GPR32common */, def %0, 2147483657 /* reguse tiedto:$0 */, [[COPY]](tied-def 3)
+  ; CHECK:   [[COPY1:%[0-9]+]]:_(s32) = COPY %0
+  ; CHECK:   [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+  ; CHECK:   [[ANYEXT1:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s16)
+  ; CHECK:   $w0 = COPY [[ANYEXT1]](s32)
+  ; CHECK:   RET_ReallyLR implicit $w0
+  %1 = call i16 asm sideeffect "", "=r,0"(i16 1)
+  ret i16 %1
+}

From e124062bf3874e1ce7ddad407b35e95ec3d3ac13 Mon Sep 17 00:00:00 2001
From: Ella Ma <alansnape3058@gmail.com>
Date: Mon, 13 Jul 2020 12:22:16 +0300
Subject: [PATCH 072/771] Fix bad doxygen result for class
 clang::ento::CallEvent and its derived classes

Summary: Fix bug https://bugs.llvm.org/show_bug.cgi?id=44753. This
patch is a workaround of a Doxygen bug, so that it can correctly
generate documents for class clang::ento::CallEvent and its derived
classes.

Differential Revision: https://reviews.llvm.org/D82356
---
 .../include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
index d75f9f63286db..a2a98c558a4b7 100644
--- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
+++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h
@@ -554,7 +554,7 @@ class SimpleFunctionCall : public AnyFunctionCall {
 
 /// Represents a call to a block.
 ///
-/// Example: <tt>^{ /* ... */ }()</tt>
+/// Example: <tt>^{ statement-body }()</tt>
 class BlockCall : public CallEvent {
   friend class CallEventManager;
 

From 26cf6c1513f95082cedd4f2acf0881dd9ab56461 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Mon, 13 Jul 2020 11:26:45 +0200
Subject: [PATCH 073/771] [clangd] Add metrics for recovery-expr type
 propagation.

Differential Revision: https://reviews.llvm.org/D83657
---
 clang-tools-extra/clangd/Selection.cpp                | 5 ++++-
 clang-tools-extra/clangd/unittests/SelectionTests.cpp | 2 ++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/Selection.cpp b/clang-tools-extra/clangd/Selection.cpp
index 1db15ba6699a8..e94a3ca5a0c38 100644
--- a/clang-tools-extra/clangd/Selection.cpp
+++ b/clang-tools-extra/clangd/Selection.cpp
@@ -41,10 +41,13 @@ using ast_type_traits::DynTypedNode;
 void recordMetrics(const SelectionTree &S) {
   static constexpr trace::Metric SelectionUsedRecovery(
       "selection_recovery", trace::Metric::Distribution);
+  static constexpr trace::Metric RecoveryType("selection_recovery_type",
+                                              trace::Metric::Distribution);
   const auto *Common = S.commonAncestor();
   for (const auto *N = Common; N; N = N->Parent) {
-    if (N->ASTNode.get<RecoveryExpr>()) {
+    if (const auto *RE = N->ASTNode.get<RecoveryExpr>()) {
       SelectionUsedRecovery.record(1); // used recovery ast.
+      RecoveryType.record(RE->isTypeDependent() ? 0 : 1);
       return;
     }
   }
diff --git a/clang-tools-extra/clangd/unittests/SelectionTests.cpp b/clang-tools-extra/clangd/unittests/SelectionTests.cpp
index 6f8c10e966a88..051580ba6e49b 100644
--- a/clang-tools-extra/clangd/unittests/SelectionTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SelectionTests.cpp
@@ -453,6 +453,8 @@ TEST(SelectionTree, Metrics) {
   auto T = makeSelectionTree(Code, AST);
   EXPECT_THAT(Tracer.takeMetric("selection_recovery"),
               testing::ElementsAreArray({1}));
+  EXPECT_THAT(Tracer.takeMetric("selection_recovery_type"),
+              testing::ElementsAreArray({1}));
 }
 
 // FIXME: Doesn't select the binary operator node in

From 6ac9e589f869b6a63c9966e7c7ec7cc8207cf2f2 Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Thu, 9 Jul 2020 12:55:46 +0200
Subject: [PATCH 074/771] [clang][RecoveryExpr] Clarify the dependence-bits
 documentation.

The expr dependent-bits described that the expression somehow
depends on a template paramter.

With RecoveryExpr, we have generalized it to "the expression depends on
a template parameter, or an error".  This patch updates/cleanups all related
comments of dependence-bits.

Differential Revision: https://reviews.llvm.org/D83213
---
 clang/include/clang/AST/DependenceFlags.h | 31 +++++++++----
 clang/include/clang/AST/Expr.h            | 56 +++++++++++++++--------
 clang/lib/AST/ComputeDependence.cpp       | 13 ++++--
 clang/lib/Sema/SemaExpr.cpp               |  3 --
 4 files changed, 68 insertions(+), 35 deletions(-)

diff --git a/clang/include/clang/AST/DependenceFlags.h b/clang/include/clang/AST/DependenceFlags.h
index 3601cb90bb765..14a7ffaecb2b0 100644
--- a/clang/include/clang/AST/DependenceFlags.h
+++ b/clang/include/clang/AST/DependenceFlags.h
@@ -16,8 +16,18 @@ namespace clang {
 struct ExprDependenceScope {
   enum ExprDependence : uint8_t {
     UnexpandedPack = 1,
+    // This expr depends in any way on
+    //   - a template parameter, it implies that the resolution of this expr may
+    //     cause instantiation to fail
+    //   - or an error (often in a non-template context)
+    //
+    // Note that C++ standard doesn't define the instantiation-dependent term,
+    // we follow the formal definition coming from the Itanium C++ ABI, and
+    // extend it to errors.
     Instantiation = 2,
+    // The type of this expr depends on a template parameter, or an error.
     Type = 4,
+    // The value of this expr depends on a template parameter, or an error.
     Value = 8,
 
     // clang extension: this expr contains or references an error, and is
@@ -42,10 +52,14 @@ struct TypeDependenceScope {
     /// Whether this type contains an unexpanded parameter pack
     /// (for C++11 variadic templates)
     UnexpandedPack = 1,
-    /// Whether this type somehow involves a template parameter, even
-    /// if the resolution of the type does not depend on a template parameter.
+    /// Whether this type somehow involves
+    ///   - a template parameter, even if the resolution of the type does not
+    ///     depend on a template parameter.
+    ///   - or an error.
     Instantiation = 2,
-    /// Whether this type is a dependent type (C++ [temp.dep.type]).
+    /// Whether this type
+    ///   - is a dependent type (C++ [temp.dep.type])
+    ///   - or it somehow involves an error, e.g. decltype(recovery-expr)
     Dependent = 4,
     /// Whether this type is a variably-modified type (C99 6.7.5).
     VariablyModified = 8,
@@ -95,16 +109,17 @@ class Dependence {
 
     // Contains a template parameter pack that wasn't expanded.
     UnexpandedPack = 1,
-    // Uses a template parameter, even if it doesn't affect the result.
-    // Validity depends on the template parameter.
+    // Depends on a template parameter or an error in some way.
+    // Validity depends on how the template is instantiated or the error is
+    // resolved.
     Instantiation = 2,
-    // Expression type depends on template context.
+    // Expression type depends on template context, or an error.
     // Value and Instantiation should also be set.
     Type = 4,
-    // Expression value depends on template context.
+    // Expression value depends on template context, or an error.
     // Instantiation should also be set.
     Value = 8,
-    // Depends on template context.
+    // Depends on template context, or an error.
     // The type/value distinction is only meaningful for expressions.
     Dependent = Type | Value,
     // Includes an error, and depends on how it is resolved.
diff --git a/clang/include/clang/AST/Expr.h b/clang/include/clang/AST/Expr.h
index 66eafaaab715e..c13b971192850 100644
--- a/clang/include/clang/AST/Expr.h
+++ b/clang/include/clang/AST/Expr.h
@@ -157,9 +157,11 @@ class Expr : public ValueStmt {
     return static_cast<ExprDependence>(ExprBits.Dependent);
   }
 
-  /// isValueDependent - Determines whether this expression is
-  /// value-dependent (C++ [temp.dep.constexpr]). For example, the
-  /// array bound of "Chars" in the following example is
+  /// Determines whether the value of this expression depends on
+  ///   - a template parameter (C++ [temp.dep.constexpr])
+  ///   - or an error, whose resolution is unknown
+  ///
+  /// For example, the array bound of "Chars" in the following example is
   /// value-dependent.
   /// @code
   /// template<int Size, char (&Chars)[Size]> struct meta_string;
@@ -168,10 +170,12 @@ class Expr : public ValueStmt {
     return static_cast<bool>(getDependence() & ExprDependence::Value);
   }
 
-  /// isTypeDependent - Determines whether this expression is
-  /// type-dependent (C++ [temp.dep.expr]), which means that its type
-  /// could change from one template instantiation to the next. For
-  /// example, the expressions "x" and "x + y" are type-dependent in
+  /// Determines whether the type of this expression depends on
+  ///   - a template paramter (C++ [temp.dep.expr], which means that its type
+  ///     could change from one template instantiation to the next)
+  ///   - or an error
+  ///
+  /// For example, the expressions "x" and "x + y" are type-dependent in
   /// the following code, but "y" is not type-dependent:
   /// @code
   /// template<typename T>
@@ -184,8 +188,10 @@ class Expr : public ValueStmt {
   }
 
   /// Whether this expression is instantiation-dependent, meaning that
-  /// it depends in some way on a template parameter, even if neither its type
-  /// nor (constant) value can change due to the template instantiation.
+  /// it depends in some way on
+  ///    - a template parameter (even if neither its type nor (constant) value
+  ///      can change due to the template instantiation)
+  ///    - or an error
   ///
   /// In the following example, the expression \c sizeof(sizeof(T() + T())) is
   /// instantiation-dependent (since it involves a template parameter \c T), but
@@ -200,6 +206,12 @@ class Expr : public ValueStmt {
   /// }
   /// \endcode
   ///
+  /// \code
+  /// void func(int) {
+  ///   func(); // the expression is instantiation-dependent, because it depends
+  ///           // on an error.
+  /// }
+  /// \endcode
   bool isInstantiationDependent() const {
     return static_cast<bool>(getDependence() & ExprDependence::Instantiation);
   }
@@ -6212,19 +6224,25 @@ class TypoExpr : public Expr {
 /// subexpressions of some expression that we could not construct and source
 /// range covered by the expression.
 ///
-/// By default, RecoveryExpr is type-, value- and instantiation-dependent to
-/// take advantage of existing machinery to deal with dependent code in C++,
-/// e.g. RecoveryExpr is preserved in `decltype(<broken-expr>)` as part of the
-/// `DependentDecltypeType`. In addition to that, clang does not report most
-/// errors on dependent expressions, so we get rid of bogus errors for free.
-/// However, note that unlike other dependent expressions, RecoveryExpr can be
-/// produced in non-template contexts.
-/// In addition, we will preserve the type in RecoveryExpr when the type is
-/// known, e.g. preserving the return type for a broken non-overloaded function
-/// call, a overloaded call where all candidates have the same return type.
+/// By default, RecoveryExpr uses dependence-bits to take advantage of existing
+/// machinery to deal with dependent code in C++, e.g. RecoveryExpr is preserved
+/// in `decltype(<broken-expr>)` as part of the `DependentDecltypeType`. In
+/// addition to that, clang does not report most errors on dependent
+/// expressions, so we get rid of bogus errors for free. However, note that
+/// unlike other dependent expressions, RecoveryExpr can be produced in
+/// non-template contexts.
+///
+/// We will preserve the type in RecoveryExpr when the type is known, e.g.
+/// preserving the return type for a broken non-overloaded function call, a
+/// overloaded call where all candidates have the same return type. In this
+/// case, the expression is not type-dependent (unless the known type is itself
+/// dependent)
 ///
 /// One can also reliably suppress all bogus errors on expressions containing
 /// recovery expressions by examining results of Expr::containsErrors().
+///
+/// FIXME: RecoveryExpr is currently generated by default in C++ mode only, as
+/// dependence isn't handled properly on several C-only codepaths.
 class RecoveryExpr final : public Expr,
                            private llvm::TrailingObjects<RecoveryExpr, Expr *> {
 public:
diff --git a/clang/lib/AST/ComputeDependence.cpp b/clang/lib/AST/ComputeDependence.cpp
index 53c43b194b38c..2333993dbeb40 100644
--- a/clang/lib/AST/ComputeDependence.cpp
+++ b/clang/lib/AST/ComputeDependence.cpp
@@ -495,13 +495,16 @@ ExprDependence clang::computeDependence(DeclRefExpr *E, const ASTContext &Ctx) {
 }
 
 ExprDependence clang::computeDependence(RecoveryExpr *E) {
-  // Mark the expression as value- and instantiation- dependent to reuse
-  // existing suppressions for dependent code, e.g. avoiding
-  // constant-evaluation.
-  // FIXME: drop type+value+instantiation once Error is sufficient to suppress
-  // bogus dianostics.
+  // RecoveryExpr is
+  //   - always value-dependent, and therefore instantiation dependent
+  //   - contains errors (ExprDependence::Error), by definition
+  //   - type-dependent if we don't know the type (fallback to an opaque
+  //     dependent type), or the type is known and dependent, or it has
+  //     type-dependent subexpressions.
   auto D = toExprDependence(E->getType()->getDependence()) |
            ExprDependence::ValueInstantiation | ExprDependence::Error;
+  // FIXME: remove the type-dependent bit from subexpressions, if the
+  // RecoveryExpr has a non-dependent type.
   for (auto *S : E->subExpressions())
     D |= S->getDependence();
   return D;
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index 24b9c6777be17..ccae79636f323 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -19180,9 +19180,6 @@ ExprResult Sema::ActOnObjCAvailabilityCheckExpr(
 
 ExprResult Sema::CreateRecoveryExpr(SourceLocation Begin, SourceLocation End,
                                     ArrayRef<Expr *> SubExprs, QualType T) {
-  // FIXME: enable it for C++, RecoveryExpr is type-dependent to suppress
-  // bogus diagnostics and this trick does not work in C.
-  // FIXME: use containsErrors() to suppress unwanted diags in C.
   if (!Context.getLangOpts().RecoveryAST)
     return ExprError();
 

From afcc9a81d2dce92f98732ecc90433141c346971a Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin@arm.com>
Date: Mon, 13 Jul 2020 10:08:40 +0100
Subject: [PATCH 075/771] [SVE][Codegen] Add a helper function for pointer
 increment logic

Summary:
Helper used when splitting load & store operations to calculate
the pointer + offset for the high half of the split

Reviewers: efriedma, sdesmalen, david-arm

Reviewed By: efriedma

Subscribers: tschuett, hiraditya, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83577
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h |  5 ++
 .../SelectionDAG/LegalizeVectorTypes.cpp      | 46 +++++++++----------
 2 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 4bc75ceb4928e..0fa6d653a8364 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -776,6 +776,11 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
   void GetSplitVector(SDValue Op, SDValue &Lo, SDValue &Hi);
   void SetSplitVector(SDValue Op, SDValue Lo, SDValue Hi);
 
+  // Helper function for incrementing the pointer when splitting
+  // memory operations
+  void IncrementPointer(MemSDNode *N, EVT MemVT,
+                        MachinePointerInfo &MPI, SDValue &Ptr);
+
   // Vector Result Splitting: <128 x ty> -> 2 x <64 x ty>.
   void SplitVectorResult(SDNode *N, unsigned ResNo);
   void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi);
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 550174f0df72a..414ba25ffd5ff 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -976,6 +976,25 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
     SetSplitVector(SDValue(N, ResNo), Lo, Hi);
 }
 
+void DAGTypeLegalizer::IncrementPointer(MemSDNode *N, EVT MemVT,
+                                        MachinePointerInfo &MPI,
+                                        SDValue &Ptr) {
+  SDLoc DL(N);
+  unsigned IncrementSize = MemVT.getSizeInBits().getKnownMinSize() / 8;
+
+  if (MemVT.isScalableVector()) {
+    SDValue BytesIncrement = DAG.getVScale(
+        DL, Ptr.getValueType(),
+        APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
+    MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
+    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement);
+  } else {
+    MPI = N->getPointerInfo().getWithOffset(IncrementSize);
+    // Increment the pointer to the other half.
+    Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
+  }
+}
+
 void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo,
                                          SDValue &Hi) {
   SDValue LHSLo, LHSHi;
@@ -1537,19 +1556,8 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
                    LD->getPointerInfo(), LoMemVT, LD->getOriginalAlign(),
                    MMOFlags, AAInfo);
 
-  unsigned IncrementSize = LoMemVT.getSizeInBits().getKnownMinSize() / 8;
-
   MachinePointerInfo MPI;
-  if (LoVT.isScalableVector()) {
-    SDValue BytesIncrement = DAG.getVScale(
-        dl, Ptr.getValueType(),
-        APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
-    MPI = MachinePointerInfo(LD->getPointerInfo().getAddrSpace());
-    Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, BytesIncrement);
-  } else {
-    MPI = LD->getPointerInfo().getWithOffset(IncrementSize);
-    Ptr = DAG.getObjectPtrOffset(dl, Ptr, IncrementSize);
-  }
+  IncrementPointer(LD, LoMemVT, MPI, Ptr);
 
   Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, MPI,
                    HiMemVT, LD->getOriginalAlign(), MMOFlags, AAInfo);
@@ -2489,8 +2497,6 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
   if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized())
     return TLI.scalarizeVectorStore(N, DAG);
 
-  unsigned IncrementSize = LoMemVT.getSizeInBits().getKnownMinSize() / 8;
-
   if (isTruncating)
     Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), LoMemVT,
                            Alignment, MMOFlags, AAInfo);
@@ -2499,17 +2505,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) {
                       AAInfo);
 
   MachinePointerInfo MPI;
-  if (LoMemVT.isScalableVector()) {
-    SDValue BytesIncrement = DAG.getVScale(
-        DL, Ptr.getValueType(),
-        APInt(Ptr.getValueSizeInBits().getFixedSize(), IncrementSize));
-    MPI = MachinePointerInfo(N->getPointerInfo().getAddrSpace());
-    Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, BytesIncrement);
-  } else {
-    MPI = N->getPointerInfo().getWithOffset(IncrementSize);
-    // Increment the pointer to the other half.
-    Ptr = DAG.getObjectPtrOffset(DL, Ptr, IncrementSize);
-  }
+  IncrementPointer(N, LoMemVT, MPI, Ptr);
 
   if (isTruncating)
     Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, MPI,

From 1d3d9c7b589e6471edc0d8a2272c399d2ce6a13c Mon Sep 17 00:00:00 2001
From: Haojian Wu <hokein.wu@gmail.com>
Date: Mon, 13 Jul 2020 12:05:09 +0200
Subject: [PATCH 076/771] [clang] Include type specifiers in typo correction
 when checking isCXXDeclarationSpecifiers.

- add more tests (the test added in https://github.com/llvm/llvm-project/commit/2f448467e4254ddc3191136c968e6054bc009b88 is weak);
- improve the `MyTemplate<type_typo, int>();` case, with this patch, typo correction
  suggests the type decl, and no regressions found.

Differential Revision: https://reviews.llvm.org/D83025
---
 clang/lib/Parse/ParseTentative.cpp      |  5 ++--
 clang/test/Parser/cxx-template-decl.cpp | 14 ----------
 clang/test/SemaCXX/typo-correction.cpp  | 35 +++++++++++++++++++++++++
 3 files changed, 38 insertions(+), 16 deletions(-)

diff --git a/clang/lib/Parse/ParseTentative.cpp b/clang/lib/Parse/ParseTentative.cpp
index 948162c11b3ff..f026f3a1bfb29 100644
--- a/clang/lib/Parse/ParseTentative.cpp
+++ b/clang/lib/Parse/ParseTentative.cpp
@@ -1110,8 +1110,9 @@ class TentativeParseCCC final : public CorrectionCandidateCallback {
 public:
   TentativeParseCCC(const Token &Next) {
     WantRemainingKeywords = false;
-    WantTypeSpecifiers = Next.isOneOf(tok::l_paren, tok::r_paren, tok::greater,
-                                      tok::l_brace, tok::identifier);
+    WantTypeSpecifiers =
+        Next.isOneOf(tok::l_paren, tok::r_paren, tok::greater, tok::l_brace,
+                     tok::identifier, tok::comma);
   }
 
   bool ValidateCandidate(const TypoCorrection &Candidate) override {
diff --git a/clang/test/Parser/cxx-template-decl.cpp b/clang/test/Parser/cxx-template-decl.cpp
index 24cc13cde91fa..64e7ca921f575 100644
--- a/clang/test/Parser/cxx-template-decl.cpp
+++ b/clang/test/Parser/cxx-template-decl.cpp
@@ -286,17 +286,3 @@ namespace PR45239 {
   template<int> int b;
   template<int> auto f() -> b<0>; // expected-error +{{}}
 }
-
-namespace NoCrashOnNullNNSTypoCorrection {
-
-int AddObservation(); // expected-note {{declared here}}
-
-template <typename T, typename... Args> // expected-note {{template parameter is declared here}}
-class UsingImpl {};
-class AddObservation {
-  using Using =
-    UsingImpl<AddObservationFn, const int>; // expected-error {{use of undeclared identifier 'AddObservationFn'; did you mean}} \
-                                               expected-error {{template argument for template type parameter must be a type}}
-};
-
-}
diff --git a/clang/test/SemaCXX/typo-correction.cpp b/clang/test/SemaCXX/typo-correction.cpp
index 92a145074e728..e0325b3ba09bf 100644
--- a/clang/test/SemaCXX/typo-correction.cpp
+++ b/clang/test/SemaCXX/typo-correction.cpp
@@ -611,6 +611,41 @@ int bar() {
 }
 }
 
+namespace testIncludeTypeInTemplateArgument {
+template <typename T, typename U>
+void foo(T t = {}, U = {}); // expected-note {{candidate template ignored}}
+
+class AddObservation {}; // expected-note {{declared here}}
+int bar1() {
+  // should resolve to a class.
+  foo<AddObservationFn, int>(); // expected-error {{unknown type name 'AddObservationFn'; did you mean 'AddObservation'?}}
+
+  // should not resolve to a class.
+  foo(AddObservationFn, 1);    // expected-error-re {{use of undeclared identifier 'AddObservationFn'{{$}}}}
+  int a = AddObservationFn, b; // expected-error-re {{use of undeclared identifier 'AddObservationFn'{{$}}}}
+
+  int AddObservation; // expected-note 3{{declared here}}
+  // should resolve to a local variable.
+  foo(AddObservationFn, 1);    // expected-error {{use of undeclared identifier 'AddObservationFn'; did you mean}}
+  int c = AddObservationFn, d; // expected-error {{use of undeclared identifier 'AddObservationFn'; did you mean}}
+
+  // FIXME: would be nice to not resolve to a variable.
+  foo<AddObservationFn, int>(); // expected-error {{use of undeclared identifier 'AddObservationFn'; did you mean}} \
+                                   expected-error {{no matching function for call}}
+}
+} // namespace testIncludeTypeInTemplateArgument
+
+namespace testNoCrashOnNullNNSTypoCorrection {
+int AddObservation();
+template <typename T, typename... Args>
+class UsingImpl {};
+class AddObservation { // expected-note {{declared here}}
+  using Using =
+      // should resolve to a class.
+      UsingImpl<AddObservationFn, const int>; // expected-error {{unknown type name 'AddObservationFn'; did you mean}}
+};
+} // namespace testNoCrashOnNullNNSTypoCorrection
+
 namespace testNonStaticMemberHandling {
 struct Foo {
   bool usesMetadata;  // expected-note {{'usesMetadata' declared here}}

From ce23e54162edcb0de9b7af1f457229da0bbfe79d Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin@amd.com>
Date: Mon, 13 Jul 2020 11:44:18 +0200
Subject: [PATCH 077/771] [AMDGPU][GlobalISel] Select llvm.amdgcn.ballot

Select ballot intrinsic for GlobalISel.

Differential Revision: https://reviews.llvm.org/D83214
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      | 36 +++++++++
 .../Target/AMDGPU/AMDGPUInstructionSelector.h |  1 +
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |  8 ++
 .../GlobalISel/llvm.amdgcn.ballot.i32.ll      | 77 +++++++++++++++++++
 .../GlobalISel/llvm.amdgcn.ballot.i64.ll      | 73 ++++++++++++++++++
 .../CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll  | 64 ++++++---------
 .../CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll  | 68 +++++++---------
 7 files changed, 246 insertions(+), 81 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index c3d5e78964c87..84734365cc658 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -891,6 +891,8 @@ bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const {
     return selectDivScale(I);
   case Intrinsic::amdgcn_icmp:
     return selectIntrinsicIcmp(I);
+  case Intrinsic::amdgcn_ballot:
+    return selectBallot(I);
   default:
     return selectImpl(I, *CoverageInfo);
   }
@@ -1039,6 +1041,40 @@ bool AMDGPUInstructionSelector::selectIntrinsicIcmp(MachineInstr &I) const {
   return Ret;
 }
 
+bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
+  MachineBasicBlock *BB = I.getParent();
+  const DebugLoc &DL = I.getDebugLoc();
+  Register DstReg = I.getOperand(0).getReg();
+  const unsigned Size = MRI->getType(DstReg).getSizeInBits();
+  const bool Is64 = Size == 64;
+
+  if (Size != STI.getWavefrontSize())
+    return false;
+
+  Optional<ValueAndVReg> Arg =
+      getConstantVRegValWithLookThrough(I.getOperand(2).getReg(), *MRI, true);
+
+  if (Arg.hasValue()) {
+    const int64_t Value = Arg.getValue().Value;
+    if (Value == 0) {
+      unsigned Opcode = Is64 ? AMDGPU::S_MOV_B64 : AMDGPU::S_MOV_B32;
+      BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0);
+    } else if (Value == -1) { // all ones
+      Register SrcReg = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
+      const unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
+      BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
+          .addReg(SrcReg, 0, SubReg);
+    } else
+      return false;
+  } else {
+    Register SrcReg = I.getOperand(2).getReg();
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg);
+  }
+
+  I.eraseFromParent();
+  return true;
+}
+
 bool AMDGPUInstructionSelector::selectEndCfIntrinsic(MachineInstr &MI) const {
   // FIXME: Manually selecting to avoid dealiing with the SReg_1 trick
   // SelectionDAG uses for wave32 vs wave64.
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
index f8a8b5db4b556..1fe80958917d6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h
@@ -107,6 +107,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
   bool selectInterpP1F16(MachineInstr &MI) const;
   bool selectDivScale(MachineInstr &MI) const;
   bool selectIntrinsicIcmp(MachineInstr &MI) const;
+  bool selectBallot(MachineInstr &I) const;
   bool selectG_INTRINSIC(MachineInstr &I) const;
 
   bool selectEndCfIntrinsic(MachineInstr &MI) const;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index 56bc0c44779d8..dfaf97bfb08e7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -2989,6 +2989,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl(
       constrainOpWithReadfirstlane(MI, MRI, 3); // Index
       return;
     }
+    case Intrinsic::amdgcn_ballot:
     case Intrinsic::amdgcn_interp_p1:
     case Intrinsic::amdgcn_interp_p2:
     case Intrinsic::amdgcn_interp_mov:
@@ -4160,6 +4161,13 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       OpdsMapping[M0Idx] = AMDGPU::getValueMapping(M0Bank, 32);
       break;
     }
+    case Intrinsic::amdgcn_ballot: {
+      unsigned DstSize = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits();
+      unsigned SrcSize = MRI.getType(MI.getOperand(2).getReg()).getSizeInBits();
+      OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, DstSize);
+      OpdsMapping[2] = AMDGPU::getValueMapping(AMDGPU::VCCRegBankID, SrcSize);
+      break;
+    }
     }
     break;
   }
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
new file mode 100644
index 0000000000000..b15fbf64fd8e2
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -global-isel < %s | FileCheck %s
+
+declare i32 @llvm.amdgcn.ballot.i32(i1)
+
+; Test ballot(0)
+
+define amdgpu_cs i32 @constant_false() {
+; CHECK-LABEL: constant_false:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    ; implicit-def: $vcc_hi
+; CHECK-NEXT:    ; return to shader part epilog
+  %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 0)
+  ret i32 %ballot
+}
+
+; Test ballot(1)
+
+define amdgpu_cs i32 @constant_true() {
+; CHECK-LABEL: constant_true:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, exec_lo
+; CHECK-NEXT:    ; implicit-def: $vcc_hi
+; CHECK-NEXT:    ; return to shader part epilog
+  %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1)
+  ret i32 %ballot
+}
+
+; Test ballot of a non-comparison operation
+
+define amdgpu_cs i32 @non_compare(i32 %x) {
+; CHECK-LABEL: non_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT:    ; implicit-def: $vcc_hi
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; CHECK-NEXT:    ; return to shader part epilog
+  %trunc = trunc i32 %x to i1
+  %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %trunc)
+  ret i32 %ballot
+}
+
+; Test ballot of comparisons
+
+define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
+; CHECK-LABEL: compare_ints:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s0, v0, v1
+; CHECK-NEXT:    ; implicit-def: $vcc_hi
+; CHECK-NEXT:    ; return to shader part epilog
+  %cmp = icmp eq i32 %x, %y
+  %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
+  ret i32 %ballot
+}
+
+define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
+; CHECK-LABEL: compare_int_with_constant:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_le_i32_e64 s0, 0x63, v0
+; CHECK-NEXT:    ; implicit-def: $vcc_hi
+; CHECK-NEXT:    ; return to shader part epilog
+  %cmp = icmp sge i32 %x, 99
+  %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
+  ret i32 %ballot
+}
+
+define amdgpu_cs i32 @compare_floats(float %x, float %y) {
+; CHECK-LABEL: compare_floats:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
+; CHECK-NEXT:    ; implicit-def: $vcc_hi
+; CHECK-NEXT:    ; return to shader part epilog
+  %cmp = fcmp ogt float %x, %y
+  %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
+  ret i32 %ballot
+}
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
new file mode 100644
index 0000000000000..fcea5f8c9c59e
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
@@ -0,0 +1,73 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel < %s | FileCheck %s
+
+declare i64 @llvm.amdgcn.ballot.i64(i1)
+
+; Test ballot(0)
+
+define amdgpu_cs i64 @constant_false() {
+; CHECK-LABEL: constant_false:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s1, 0
+; CHECK-NEXT:    ; return to shader part epilog
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 0)
+  ret i64 %ballot
+}
+
+; Test ballot(1)
+
+define amdgpu_cs i64 @constant_true() {
+; CHECK-LABEL: constant_true:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_mov_b64 s[0:1], exec
+; CHECK-NEXT:    ; return to shader part epilog
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 1)
+  ret i64 %ballot
+}
+
+; Test ballot of a non-comparison operation
+
+define amdgpu_cs i64 @non_compare(i32 %x) {
+; CHECK-LABEL: non_compare:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v0
+; CHECK-NEXT:    ; return to shader part epilog
+  %trunc = trunc i32 %x to i1
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %trunc)
+  ret i64 %ballot
+}
+
+; Test ballot of comparisons
+
+define amdgpu_cs i64 @compare_ints(i32 %x, i32 %y) {
+; CHECK-LABEL: compare_ints:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[0:1], v0, v1
+; CHECK-NEXT:    ; return to shader part epilog
+  %cmp = icmp eq i32 %x, %y
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
+  ret i64 %ballot
+}
+
+define amdgpu_cs i64 @compare_int_with_constant(i32 %x) {
+; CHECK-LABEL: compare_int_with_constant:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0x63
+; CHECK-NEXT:    v_cmp_ge_i32_e64 s[0:1], v0, v1
+; CHECK-NEXT:    ; return to shader part epilog
+  %cmp = icmp sge i32 %x, 99
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
+  ret i64 %ballot
+}
+
+define amdgpu_cs i64 @compare_floats(float %x, float %y) {
+; CHECK-LABEL: compare_floats:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    v_cmp_gt_f32_e64 s[0:1], v0, v1
+; CHECK-NEXT:    ; return to shader part epilog
+  %cmp = fcmp ogt float %x, %y
+  %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
+  ret i64 %ballot
+}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
index c2aa65bddb7e0..20ef90db98319 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i32.ll
@@ -5,44 +5,37 @@ declare i32 @llvm.amdgcn.ballot.i32(i1)
 
 ; Test ballot(0)
 
-define i32 @test0() {
-; CHECK-LABEL: test0:
+define amdgpu_cs i32 @constant_false() {
+; CHECK-LABEL: constant_false:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT:    v_mov_b32_e32 v0, 0
+; CHECK-NEXT:    s_mov_b32 s0, 0
 ; CHECK-NEXT:    ; implicit-def: $vcc_hi
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    ; return to shader part epilog
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 0)
   ret i32 %ballot
 }
 
 ; Test ballot(1)
 
-define i32 @test1() {
-; CHECK-LABEL: test1:
+define amdgpu_cs i32 @constant_true() {
+; CHECK-LABEL: constant_true:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT:    v_mov_b32_e32 v0, exec_lo
+; CHECK-NEXT:    s_mov_b32 s0, exec_lo
 ; CHECK-NEXT:    ; implicit-def: $vcc_hi
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    ; return to shader part epilog
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 1)
   ret i32 %ballot
 }
 
 ; Test ballot of a non-comparison operation
 
-define i32 @test2(i32 %x) {
-; CHECK-LABEL: test2:
+define amdgpu_cs i32 @non_compare(i32 %x) {
+; CHECK-LABEL: non_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_waitcnt_vscnt null, 0x0
 ; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
 ; CHECK-NEXT:    ; implicit-def: $vcc_hi
-; CHECK-NEXT:    v_cmp_ne_u32_e64 s4, 0, v0
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s0, 0, v0
+; CHECK-NEXT:    ; return to shader part epilog
   %trunc = trunc i32 %x to i1
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %trunc)
   ret i32 %ballot
@@ -50,43 +43,34 @@ define i32 @test2(i32 %x) {
 
 ; Test ballot of comparisons
 
-define i32 @test3(i32 %x, i32 %y) {
-; CHECK-LABEL: test3:
+define amdgpu_cs i32 @compare_ints(i32 %x, i32 %y) {
+; CHECK-LABEL: compare_ints:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s4, v0, v1
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s0, v0, v1
 ; CHECK-NEXT:    ; implicit-def: $vcc_hi
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    ; return to shader part epilog
   %cmp = icmp eq i32 %x, %y
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
   ret i32 %ballot
 }
 
-define i32 @test4(i32 %x) {
-; CHECK-LABEL: test4:
+define amdgpu_cs i32 @compare_int_with_constant(i32 %x) {
+; CHECK-LABEL: compare_int_with_constant:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT:    v_cmp_lt_i32_e64 s4, 0x62, v0
+; CHECK-NEXT:    v_cmp_lt_i32_e64 s0, 0x62, v0
 ; CHECK-NEXT:    ; implicit-def: $vcc_hi
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    ; return to shader part epilog
   %cmp = icmp sge i32 %x, 99
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
   ret i32 %ballot
 }
 
-define i32 @test5(float %x, float %y) {
-; CHECK-LABEL: test5:
+define amdgpu_cs i32 @compare_floats(float %x, float %y) {
+; CHECK-LABEL: compare_floats:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_waitcnt_vscnt null, 0x0
-; CHECK-NEXT:    v_cmp_gt_f32_e64 s4, v0, v1
+; CHECK-NEXT:    v_cmp_gt_f32_e64 s0, v0, v1
 ; CHECK-NEXT:    ; implicit-def: $vcc_hi
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    ; return to shader part epilog
   %cmp = fcmp ogt float %x, %y
   %ballot = call i32 @llvm.amdgcn.ballot.i32(i1 %cmp)
   ret i32 %ballot
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
index 97678bf309cbc..69066011a56c4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ballot.i64.ll
@@ -5,41 +5,36 @@ declare i64 @llvm.amdgcn.ballot.i64(i1)
 
 ; Test ballot(0)
 
-define i64 @test0() {
-; CHECK-LABEL: test0:
+define amdgpu_cs i64 @constant_false() {
+; CHECK-LABEL: constant_false:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_mov_b32_e32 v0, 0
-; CHECK-NEXT:    v_mov_b32_e32 v1, 0
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    s_mov_b32 s0, 0
+; CHECK-NEXT:    s_mov_b32 s1, 0
+; CHECK-NEXT:    ; return to shader part epilog
   %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 0)
   ret i64 %ballot
 }
 
 ; Test ballot(1)
 
-define i64 @test1() {
-; CHECK-LABEL: test1:
+define amdgpu_cs i64 @constant_true() {
+; CHECK-LABEL: constant_true:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_mov_b32_e32 v0, exec_lo
-; CHECK-NEXT:    v_mov_b32_e32 v1, exec_hi
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    s_mov_b32 s0, exec_lo
+; CHECK-NEXT:    s_mov_b32 s1, exec_hi
+; CHECK-NEXT:    ; return to shader part epilog
   %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 1)
   ret i64 %ballot
 }
 
 ; Test ballot of a non-comparison operation
 
-define i64 @test2(i32 %x) {
-; CHECK-LABEL: test2:
+define amdgpu_cs i64 @non_compare(i32 %x) {
+; CHECK-LABEL: non_compare:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_and_b32_e32 v0, 1, v0
-; CHECK-NEXT:    v_cmp_ne_u32_e64 s[4:5], 0, v0
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    v_mov_b32_e32 v1, s5
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, v0
+; CHECK-NEXT:    ; return to shader part epilog
   %trunc = trunc i32 %x to i1
   %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %trunc)
   ret i64 %ballot
@@ -47,41 +42,32 @@ define i64 @test2(i32 %x) {
 
 ; Test ballot of comparisons
 
-define i64 @test3(i32 %x, i32 %y) {
-; CHECK-LABEL: test3:
+define amdgpu_cs i64 @compare_ints(i32 %x, i32 %y) {
+; CHECK-LABEL: compare_ints:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_eq_u32_e64 s[4:5], v0, v1
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    v_mov_b32_e32 v1, s5
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    v_cmp_eq_u32_e64 s[0:1], v0, v1
+; CHECK-NEXT:    ; return to shader part epilog
   %cmp = icmp eq i32 %x, %y
   %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
   ret i64 %ballot
 }
 
-define i64 @test4(i32 %x) {
-; CHECK-LABEL: test4:
+define amdgpu_cs i64 @compare_int_with_constant(i32 %x) {
+; CHECK-LABEL: compare_int_with_constant:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    s_movk_i32 s4, 0x62
-; CHECK-NEXT:    v_cmp_lt_i32_e64 s[4:5], s4, v0
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    v_mov_b32_e32 v1, s5
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    s_movk_i32 s0, 0x62
+; CHECK-NEXT:    v_cmp_lt_i32_e64 s[0:1], s0, v0
+; CHECK-NEXT:    ; return to shader part epilog
   %cmp = icmp sge i32 %x, 99
   %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
   ret i64 %ballot
 }
 
-define i64 @test5(float %x, float %y) {
-; CHECK-LABEL: test5:
+define amdgpu_cs i64 @compare_floats(float %x, float %y) {
+; CHECK-LABEL: compare_floats:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CHECK-NEXT:    v_cmp_gt_f32_e64 s[4:5], v0, v1
-; CHECK-NEXT:    v_mov_b32_e32 v0, s4
-; CHECK-NEXT:    v_mov_b32_e32 v1, s5
-; CHECK-NEXT:    s_setpc_b64 s[30:31]
+; CHECK-NEXT:    v_cmp_gt_f32_e64 s[0:1], v0, v1
+; CHECK-NEXT:    ; return to shader part epilog
   %cmp = fcmp ogt float %x, %y
   %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 %cmp)
   ret i64 %ballot

From 3bffe6022cc96a38ad0f6ada5f5b7b41eca5796e Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Mon, 13 Jul 2020 12:23:53 +0200
Subject: [PATCH 078/771] [mlir][VectorOps] Lower vector.fma to llvm.fmuladd
 instead of llvm.fma

Summary:
These are semantically equivalent, but fmuladd allows decaying the op
into fmul+fadd if there is no fma instruction available. llvm.fma lowers
to scalar calls to libm fmaf, which is a lot slower.

Reviewers: nicolasvasilache, aartbik, ftynse

Subscribers: mehdi_amini, rriddle, jpienaar, shauheen, antiagainst, arpith-jacob, mgester, lucyrfox, liufengdb, stephenneuendorffer, Joonsoo, grosul1, Kayjukh, jurahul, msifontes

Tags: #mlir

Differential Revision: https://reviews.llvm.org/D83666
---
 .../Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp    |  6 +++---
 mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir  | 10 +++++-----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index 96a8fa4c6f223..2be2bd9bb7d02 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -481,7 +481,7 @@ class VectorExtractOpConversion : public ConvertToLLVMPattern {
 /// ```
 /// is converted to:
 /// ```
-///  llvm.intr.fma %va, %va, %va:
+///  llvm.intr.fmuladd %va, %va, %va:
 ///    (!llvm<"<8 x float>">, !llvm<"<8 x float>">, !llvm<"<8 x float>">)
 ///    -> !llvm<"<8 x float>">
 /// ```
@@ -500,8 +500,8 @@ class VectorFMAOp1DConversion : public ConvertToLLVMPattern {
     VectorType vType = fmaOp.getVectorType();
     if (vType.getRank() != 1)
       return failure();
-    rewriter.replaceOpWithNewOp<LLVM::FMAOp>(op, adaptor.lhs(), adaptor.rhs(),
-                                             adaptor.acc());
+    rewriter.replaceOpWithNewOp<LLVM::FMulAddOp>(op, adaptor.lhs(),
+                                                 adaptor.rhs(), adaptor.acc());
     return success();
   }
 };
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 09162aa0236ba..829edf5f66f17 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -236,7 +236,7 @@ func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector
 //      CHECK: %[[T5:.*]] = llvm.insertelement %[[T2]], %[[T3]][%[[T4]] : !llvm.i32] : !llvm<"<3 x float>">
 //      CHECK: %[[T6:.*]] = llvm.shufflevector %[[T5]], %[[T3]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
 //      CHECK: %[[T7:.*]] = llvm.extractvalue %[[C]][0] : !llvm<"[2 x <3 x float>]">
-//      CHECK: %[[T8:.*]] = "llvm.intr.fma"(%[[T6]], %[[B]], %[[T7]]) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">)
+//      CHECK: %[[T8:.*]] = "llvm.intr.fmuladd"(%[[T6]], %[[B]], %[[T7]]) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">)
 //      CHECK: %[[T9:.*]] = llvm.insertvalue %[[T8]], %[[T0]][0] : !llvm<"[2 x <3 x float>]">
 //      CHECK: %[[T10:.*]] = llvm.mlir.constant(1 : i64) : !llvm.i64
 //      CHECK: %[[T11:.*]] = llvm.extractelement %[[A]][%[[T10]] : !llvm.i64] : !llvm<"<2 x float>">
@@ -245,7 +245,7 @@ func @outerproduct_add(%arg0: vector<2xf32>, %arg1: vector<3xf32>, %arg2: vector
 //      CHECK: %[[T14:.*]] = llvm.insertelement %[[T11]], %[[T12]][%[[T13]] : !llvm.i32] : !llvm<"<3 x float>">
 //      CHECK: %[[T15:.*]] = llvm.shufflevector %[[T14]], %[[T12]] [0 : i32, 0 : i32, 0 : i32] : !llvm<"<3 x float>">, !llvm<"<3 x float>">
 //      CHECK: %[[T16:.*]] = llvm.extractvalue %[[C]][1] : !llvm<"[2 x <3 x float>]">
-//      CHECK: %[[T17:.*]] = "llvm.intr.fma"(%[[T15]], %[[B]], %[[T16]]) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">)
+//      CHECK: %[[T17:.*]] = "llvm.intr.fmuladd"(%[[T15]], %[[B]], %[[T16]]) : (!llvm<"<3 x float>">, !llvm<"<3 x float>">, !llvm<"<3 x float>">)
 //      CHECK: %[[T18:.*]] = llvm.insertvalue %[[T17]], %[[T9]][1] : !llvm<"[2 x <3 x float>]">
 //      CHECK: llvm.return %[[T18]] : !llvm<"[2 x <3 x float>]">
 
@@ -688,20 +688,20 @@ func @extract_strides(%arg0: vector<3x3xf32>) -> vector<1x1xf32> {
 //  CHECK-SAME: %[[A:.*]]: !llvm<"<8 x float>">, %[[B:.*]]: !llvm<"[2 x <4 x float>]">)
 //  CHECK-SAME: -> !llvm<"{ <8 x float>, [2 x <4 x float>] }"> {
 func @vector_fma(%a: vector<8xf32>, %b: vector<2x4xf32>) -> (vector<8xf32>, vector<2x4xf32>) {
-  //         CHECK: "llvm.intr.fma"(%[[A]], %[[A]], %[[A]]) :
+  //         CHECK: "llvm.intr.fmuladd"(%[[A]], %[[A]], %[[A]]) :
   //    CHECK-SAME:   (!llvm<"<8 x float>">, !llvm<"<8 x float>">, !llvm<"<8 x float>">) -> !llvm<"<8 x float>">
   %0 = vector.fma %a, %a, %a : vector<8xf32>
 
   //       CHECK: %[[b00:.*]] = llvm.extractvalue %[[B]][0] : !llvm<"[2 x <4 x float>]">
   //       CHECK: %[[b01:.*]] = llvm.extractvalue %[[B]][0] : !llvm<"[2 x <4 x float>]">
   //       CHECK: %[[b02:.*]] = llvm.extractvalue %[[B]][0] : !llvm<"[2 x <4 x float>]">
-  //       CHECK: %[[B0:.*]] = "llvm.intr.fma"(%[[b00]], %[[b01]], %[[b02]]) :
+  //       CHECK: %[[B0:.*]] = "llvm.intr.fmuladd"(%[[b00]], %[[b01]], %[[b02]]) :
   //  CHECK-SAME: (!llvm<"<4 x float>">, !llvm<"<4 x float>">, !llvm<"<4 x float>">) -> !llvm<"<4 x float>">
   //       CHECK: llvm.insertvalue %[[B0]], {{.*}}[0] : !llvm<"[2 x <4 x float>]">
   //       CHECK: %[[b10:.*]] = llvm.extractvalue %[[B]][1] : !llvm<"[2 x <4 x float>]">
   //       CHECK: %[[b11:.*]] = llvm.extractvalue %[[B]][1] : !llvm<"[2 x <4 x float>]">
   //       CHECK: %[[b12:.*]] = llvm.extractvalue %[[B]][1] : !llvm<"[2 x <4 x float>]">
-  //       CHECK: %[[B1:.*]] = "llvm.intr.fma"(%[[b10]], %[[b11]], %[[b12]]) :
+  //       CHECK: %[[B1:.*]] = "llvm.intr.fmuladd"(%[[b10]], %[[b11]], %[[b12]]) :
   //  CHECK-SAME: (!llvm<"<4 x float>">, !llvm<"<4 x float>">, !llvm<"<4 x float>">) -> !llvm<"<4 x float>">
   //       CHECK: llvm.insertvalue %[[B1]], {{.*}}[1] : !llvm<"[2 x <4 x float>]">
   %1 = vector.fma %b, %b, %b : vector<2x4xf32>

From 6050c156ab4f13a3c54ca6ec297a72ece95966d7 Mon Sep 17 00:00:00 2001
From: Anastasia Stulova <anastasia.stulova@arm.com>
Date: Mon, 13 Jul 2020 11:30:13 +0100
Subject: [PATCH 079/771] [OpenCL] Defer addr space deduction for dependent
 type.

This patch removes deduction of address spaces in parsing
for types that depend on template parameter even if an
address space is already known. Deducing it early interferes
with template instantiation/specialization logic that uses
source address space where address space is not present.

Address space deduction for templates is therefore fully
moved to the template instantiation/specialization phase.

Patch by Ole Strohm (olestrohm)!

Tags: #clang

Differential Revision: https://reviews.llvm.org/D82781
---
 clang/lib/Sema/SemaDecl.cpp                     |  3 +++
 clang/lib/Sema/SemaTemplateInstantiateDecl.cpp  |  6 ++++++
 .../SemaOpenCLCXX/address-space-deduction.cl    | 17 ++++++++++++-----
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp
index f5e375134c293..3e2b61ae8cdf6 100644
--- a/clang/lib/Sema/SemaDecl.cpp
+++ b/clang/lib/Sema/SemaDecl.cpp
@@ -6290,6 +6290,8 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) {
 void Sema::deduceOpenCLAddressSpace(ValueDecl *Decl) {
   if (Decl->getType().hasAddressSpace())
     return;
+  if (Decl->getType()->isDependentType())
+    return;
   if (VarDecl *Var = dyn_cast<VarDecl>(Decl)) {
     QualType Type = Var->getType();
     if (Type->isSamplerT() || Type->isVoidType())
@@ -7859,6 +7861,7 @@ void Sema::CheckVariableDeclarationType(VarDecl *NewVD) {
     if (NewVD->isFileVarDecl() || NewVD->isStaticLocal() ||
         NewVD->hasExternalStorage()) {
       if (!T->isSamplerT() &&
+          !T->isDependentType() &&
           !(T.getAddressSpace() == LangAS::opencl_constant ||
             (T.getAddressSpace() == LangAS::opencl_global &&
              (getLangOpts().OpenCLVersion == 200 ||
diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
index 85adc4ef2dbde..2efb7acb97245 100644
--- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
+++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp
@@ -3625,6 +3625,9 @@ Decl *TemplateDeclInstantiator::VisitVarTemplateSpecializationDecl(
   if (InsertPos)
     VarTemplate->AddSpecialization(Var, InsertPos);
 
+  if (SemaRef.getLangOpts().OpenCL)
+    SemaRef.deduceOpenCLAddressSpace(Var);
+
   // Substitute the nested name specifier, if any.
   if (SubstQualifier(D, Var))
     return nullptr;
@@ -4895,6 +4898,9 @@ VarTemplateSpecializationDecl *Sema::CompleteVarTemplateSpecializationDecl(
   // Instantiate the initializer.
   InstantiateVariableInitializer(VarSpec, PatternDecl, TemplateArgs);
 
+  if (getLangOpts().OpenCL)
+    deduceOpenCLAddressSpace(VarSpec);
+
   return VarSpec;
 }
 
diff --git a/clang/test/SemaOpenCLCXX/address-space-deduction.cl b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
index 6a81a8b2d7c76..ddfdb6da4347c 100644
--- a/clang/test/SemaOpenCLCXX/address-space-deduction.cl
+++ b/clang/test/SemaOpenCLCXX/address-space-deduction.cl
@@ -5,6 +5,11 @@
 //CHECK: |-VarDecl {{.*}} foo 'const __global int'
 constexpr int foo = 0;
 
+//CHECK: |-VarDecl {{.*}} foo1 'T' cinit
+//CHECK: `-VarTemplateSpecializationDecl {{.*}} used foo1 '__global long':'__global long' cinit
+template <typename T>
+T foo1 = 0;
+
 class c {
 public:
   //CHECK: `-VarDecl {{.*}} foo2 'const __global int'
@@ -30,7 +35,7 @@ struct c2 {
 
 template <class T>
 struct x1 {
-//CHECK: -CXXMethodDecl {{.*}} operator= 'x1<T> &(const x1<T> &__private){{( __attribute__.*)?}} __generic'
+//CHECK: -CXXMethodDecl {{.*}} operator= 'x1<T> &(const x1<T> &){{( __attribute__.*)?}} __generic'
 //CHECK: -CXXMethodDecl {{.*}} operator= '__generic x1<int> &(const __generic x1<int> &__private){{( __attribute__.*)?}} __generic'
   x1<T>& operator=(const x1<T>& xx) {
     y = xx.y;
@@ -41,7 +46,7 @@ struct x1 {
 
 template <class T>
 struct x2 {
-//CHECK: -CXXMethodDecl {{.*}} foo 'void (x1<T> *__private){{( __attribute__.*)?}} __generic'
+//CHECK: -CXXMethodDecl {{.*}} foo 'void (x1<T> *){{( __attribute__.*)?}} __generic'
 //CHECK: -CXXMethodDecl {{.*}} foo 'void (__generic x1<int> *__private){{( __attribute__.*)?}} __generic'
   void foo(x1<T>* xx) {
     m[0] = *xx;
@@ -57,10 +62,10 @@ void bar(__global x1<int> *xx, __global x2<int> *bar) {
 template <typename T>
 class x3 : public T {
 public:
-  //CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &__private){{( __attribute__.*)?}} __generic'
+  //CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &){{( __attribute__.*)?}} __generic'
   x3(const x3 &t);
 };
-//CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &__private){{( __attribute__.*)?}} __generic'
+//CHECK: -CXXConstructorDecl {{.*}} x3<T> 'void (const x3<T> &){{( __attribute__.*)?}} __generic'
 template <typename T>
 x3<T>::x3(const x3<T> &t) {}
 
@@ -68,7 +73,8 @@ template <class T>
 T xxx(T *in1, T in2) {
   // This pointer can't be deduced to generic because addr space
   // will be taken from the template argument.
-  //CHECK: `-VarDecl {{.*}} '__private T *__private' cinit
+  //CHECK: `-VarDecl {{.*}} 'T *' cinit
+  //CHECK: `-VarDecl {{.*}} i '__private int *__private' cinit
   T *i = in1;
   T ii;
   __private T *ptr = &ii;
@@ -111,4 +117,5 @@ __kernel void k() {
   t3(&x);
   t4(&p);
   t5(&p);
+  long f1 = foo1<long>;
 }

From af16a45683cccc78925e71ac5d58d6cab8447840 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Thu, 9 Jul 2020 13:08:13 +0300
Subject: [PATCH 080/771] [LLD][ELF] -  Allow relocation sections to appear
 before their target sections.

It allows handling cases when we have SHT_REL[A] sections before target
sections in objects.

This fixes https://bugs.llvm.org/show_bug.cgi?id=46632

which says: "Normally it is not what compilers would emit. We have to support it,
because some custom tools might want to use this feature, which is not restricted by ELF gABI"

Differential revision: https://reviews.llvm.org/D83469
---
 lld/ELF/InputFiles.cpp                        | 14 ++++++-
 .../ELF/invalid/reloc-section-reordered.test  | 33 ----------------
 lld/test/ELF/reloc-sec-before-relocated.test  | 38 +++++++++++++++++++
 3 files changed, 51 insertions(+), 34 deletions(-)
 delete mode 100644 lld/test/ELF/invalid/reloc-section-reordered.test
 create mode 100644 lld/test/ELF/reloc-sec-before-relocated.test

diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp
index e59bf626be501..c2f1830a981b8 100644
--- a/lld/ELF/InputFiles.cpp
+++ b/lld/ELF/InputFiles.cpp
@@ -632,6 +632,8 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) {
       break;
     case SHT_SYMTAB:
     case SHT_STRTAB:
+    case SHT_REL:
+    case SHT_RELA:
     case SHT_NULL:
       break;
     default:
@@ -639,11 +641,21 @@ void ObjFile<ELFT>::initializeSections(bool ignoreComdats) {
     }
   }
 
-  // This block handles SHF_LINK_ORDER.
+  // We have a second loop. It is used to:
+  // 1) handle SHF_LINK_ORDER sections.
+  // 2) create SHT_REL[A] sections. In some cases the section header index of a
+  //    relocation section may be smaller than that of the relocated section. In
+  //    such cases, the relocation section would attempt to reference a target
+  //    section that has not yet been created. For simplicity, delay creation of
+  //    relocation sections until now.
   for (size_t i = 0, e = objSections.size(); i < e; ++i) {
     if (this->sections[i] == &InputSection::discarded)
       continue;
     const Elf_Shdr &sec = objSections[i];
+
+    if (sec.sh_type == SHT_REL || sec.sh_type == SHT_RELA)
+      this->sections[i] = createInputSection(sec);
+
     if (!(sec.sh_flags & SHF_LINK_ORDER))
       continue;
 
diff --git a/lld/test/ELF/invalid/reloc-section-reordered.test b/lld/test/ELF/invalid/reloc-section-reordered.test
deleted file mode 100644
index 91f25f61b7e6f..0000000000000
--- a/lld/test/ELF/invalid/reloc-section-reordered.test
+++ /dev/null
@@ -1,33 +0,0 @@
-# REQUIRES: x86
-
-# RUN: yaml2obj %s -o %t.o
-# RUN: not ld.lld %t.o -o /dev/null 2>&1 | FileCheck %s
-# CHECK: unsupported relocation reference
-
-## YAML below lists .rela.text before .text, we do not support it.
-
-!ELF
-FileHeader:
-  Class:           ELFCLASS64
-  Data:            ELFDATA2LSB
-  OSABI:           ELFOSABI_FREEBSD
-  Type:            ET_REL
-  Machine:         EM_X86_64
-Sections:
-  - Type:            SHT_REL
-    Name:            .rela.text
-    Link:            .symtab
-    Info:            .text
-    AddressAlign:    0x04
-    Relocations:
-      - Symbol:          .text
-        Type:            R_X86_64_NONE
-  - Type:            SHT_PROGBITS
-    Name:            .text
-    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
-    AddressAlign:    0x04
-    Content:         "FFFFFFFFFFFFFFFF"
-Symbols:
-  - Name:    .text
-    Type:    STT_SECTION
-    Section: .text
diff --git a/lld/test/ELF/reloc-sec-before-relocated.test b/lld/test/ELF/reloc-sec-before-relocated.test
new file mode 100644
index 0000000000000..a56231294a0ca
--- /dev/null
+++ b/lld/test/ELF/reloc-sec-before-relocated.test
@@ -0,0 +1,38 @@
+## If the section header index of a SHT_REL[A] section is smaller than the
+## section header index of the relocated section, we should handle it properly.
+## Normally it is not what compilers would emit, but some custom tools might
+## want to use this feature, which is not restricted by ELF gABI.
+## GNU ld supports this as well.
+
+# RUN: yaml2obj %s -DTYPE=SHT_RELA -o %t1.o
+# RUN: ld.lld -shared %t1.o -o %t1
+# RUN: llvm-readelf --relocs %t1 | FileCheck %s
+
+# RUN: yaml2obj %s -DTYPE=SHT_REL -o %t2.o
+# RUN: ld.lld -shared %t2.o -o %t2
+# RUN: llvm-readelf --relocs %t2 | FileCheck %s
+
+## Check we handle the relocation properly.
+# CHECK:      Relocation section '.rela.dyn' at offset 0x238 contains 1 entries:
+# CHECK-NEXT:     Offset             Info             Type    Symbol's Value  Symbol's Name + Addend
+# CHECK-NEXT: 00000000000022f0  0000000100000001 R_X86_64_64 0000000000000000 foo + 0
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+Sections:
+  - Name:  .relx.data
+    Type:  [[TYPE]]
+    Info:  .data
+    Relocations:
+      - Symbol: foo
+        Type:   R_X86_64_64
+  - Name:  .data
+    Type:  SHT_PROGBITS
+    Flags: [ SHF_ALLOC, SHF_WRITE ]
+Symbols:
+  - Name:    foo
+    Binding: STB_GLOBAL

From 6bda276f93023ae91937cb8a1f45bf27e5a3ced7 Mon Sep 17 00:00:00 2001
From: Ben Dunbobbin <Ben.Dunbobbin@sony.com>
Date: Mon, 13 Jul 2020 11:58:30 +0100
Subject: [PATCH 081/771] [LLD][ELF][Windows] small improvement to D82567

Bail early if there is no existing output file to be overwritten.

Differential Revision: https://reviews.llvm.org/D83272
---
 lld/Common/Filesystem.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/lld/Common/Filesystem.cpp b/lld/Common/Filesystem.cpp
index 206b892f0a69c..671b352a3f6bc 100644
--- a/lld/Common/Filesystem.cpp
+++ b/lld/Common/Filesystem.cpp
@@ -40,6 +40,9 @@ using namespace lld;
 // This function spawns a background thread to remove the file.
 // The calling thread returns almost immediately.
 void lld::unlinkAsync(StringRef path) {
+  if (!sys::fs::exists(path) || !sys::fs::is_regular_file(path))
+    return;
+
 // Removing a file is async on windows.
 #if defined(_WIN32)
   // On Windows co-operative programs can be expected to open LLD's
@@ -71,8 +74,7 @@ void lld::unlinkAsync(StringRef path) {
   }
   sys::fs::remove(path);
 #else
-  if (parallel::strategy.ThreadsRequested == 1 || !sys::fs::exists(path) ||
-      !sys::fs::is_regular_file(path))
+  if (parallel::strategy.ThreadsRequested == 1)
     return;
 
   // We cannot just remove path from a different thread because we are now going

From c051312eb24dedc119a917ea23e6a5810f5758ff Mon Sep 17 00:00:00 2001
From: Andre Vieira <andre.simoesdiasvieira@arm.com>
Date: Mon, 13 Jul 2020 11:52:58 +0100
Subject: [PATCH 082/771] [libc][benchmark] Add display option to render.py3

Differential Revision: https://reviews.llvm.org/D83380
---
 libc/benchmarks/render.py3 | 29 ++++++++++++++++++++++++-----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/libc/benchmarks/render.py3 b/libc/benchmarks/render.py3
index e790d18f84e7f..f8c321ff17af6 100644
--- a/libc/benchmarks/render.py3
+++ b/libc/benchmarks/render.py3
@@ -112,7 +112,7 @@ def get_configuration(jsons):
     return config
 
 
-def setup_graphs(files):
+def setup_graphs(files, display):
     """Setups the graphs to render from the json files."""
     jsons = []
     for file in files:
@@ -122,6 +122,7 @@ def setup_graphs(files):
         sys.exit("Nothing to process")
 
     for root in jsons:
+        frequency = root["Host"]["CpuFrequency"]
         for function in root["Functions"]:
             function_name = function["Name"]
             sizes = function["Sizes"]
@@ -129,7 +130,13 @@ def setup_graphs(files):
             assert len(sizes) == len(runtimes)
             values = collections.defaultdict(lambda: [])
             for i in range(len(sizes)):
-              values[sizes[i]].append(runtimes[i])
+              value = runtimes[i]
+              if display == "cycles":
+                  value = value * frequency
+              if display == "bytespercycle":
+                  value = value * frequency
+                  value = sizes[i] / value
+              values[sizes[i]].append(value)
             add_plot(function_name, values)
 
     config = get_configuration(jsons)
@@ -148,9 +155,15 @@ def setup_graphs(files):
     axes.set_title(get_title(get_host(jsons)))
     axes.set_ylim(bottom=0)
     axes.set_xlabel("Size")
-    axes.set_ylabel("Time")
     axes.xaxis.set_major_formatter(EngFormatter(unit="B"))
-    axes.yaxis.set_major_formatter(EngFormatter(unit="s"))
+    if display == "cycles":
+          axes.set_ylabel("Cycles")
+    if display == "time":
+          axes.set_ylabel("Time")
+          axes.yaxis.set_major_formatter(EngFormatter(unit="s"))
+    if display == "bytespercycle":
+          axes.set_ylabel("bytes/cycle")
+
     plt.legend()
     plt.grid()
 
@@ -164,8 +177,14 @@ def main():
         "--headless",
         help="If set do not display the graph.",
         action="store_true")
+    parser.add_argument(
+        "--display",
+        choices= ["time", "cycles", "bytespercycle"],
+        default="time",
+        help="Use to display either 'time', 'cycles' or 'bytes/cycle'.")
+
     args = parser.parse_args()
-    setup_graphs(args.files)
+    setup_graphs(args.files, args.display)
     if args.output:
         plt.savefig(args.output)
     if not args.headless:

From 60cbbb306d29f882e18d6293177d694c11c67e84 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Mon, 13 Jul 2020 12:22:09 +0200
Subject: [PATCH 083/771] [lldb][NFC] Remove misleading class_language variable
 in DWARFASTParserClang

There is a local 'class_language' veriable in DWARFASTParserClang which is named
as if it is related to the 'class_language' member of ParsedDWARFTypeAttributes.
However, it actually only has two possible enum values: 'ObjC' (which means the
current record is a Objective-C class) or 'Unknown' (which covers all other
cases).

This is confusing for the reader and also lead to some strange code where we
have several comparisons against the value "ObjC_plus_plus" (which is always
false).

This replaces the variable with either a const bool variable (if there are
multiple checks for that condition in a function) or a direct call to the
TypeSystemClang utility method for checking if it's a Objective-C
Object/Interface type.
---
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  | 32 +++++++++----------
 .../SymbolFile/DWARF/DWARFASTParserClang.h    |  2 --
 2 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 7de88274ccf6e..929001671af7d 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -1958,9 +1958,9 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die,
   ClangASTImporter::LayoutInfo layout_info;
 
   if (die.HasChildren()) {
-    LanguageType class_language = eLanguageTypeUnknown;
-    if (TypeSystemClang::IsObjCObjectOrInterfaceType(clang_type)) {
-      class_language = eLanguageTypeObjC;
+    const bool type_is_objc_object_or_interface =
+        TypeSystemClang::IsObjCObjectOrInterfaceType(clang_type);
+    if (type_is_objc_object_or_interface) {
       // For objective C we don't start the definition when the class is
       // created.
       TypeSystemClang::StartTagDeclarationDefinition(clang_type);
@@ -1986,16 +1986,15 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die,
     std::vector<DWARFDIE> member_function_dies;
 
     DelayedPropertyList delayed_properties;
-    ParseChildMembers(die, clang_type, class_language, bases,
-                      member_accessibilities, member_function_dies,
-                      delayed_properties, default_accessibility, is_a_class,
-                      layout_info);
+    ParseChildMembers(die, clang_type, bases, member_accessibilities,
+                      member_function_dies, delayed_properties,
+                      default_accessibility, is_a_class, layout_info);
 
     // Now parse any methods if there were any...
     for (const DWARFDIE &die : member_function_dies)
       dwarf->ResolveType(die);
 
-    if (class_language == eLanguageTypeObjC) {
+    if (type_is_objc_object_or_interface) {
       ConstString class_name(clang_type.GetTypeName());
       if (class_name) {
         dwarf->GetObjCMethods(class_name, [&](DWARFDIE method_die) {
@@ -2012,7 +2011,7 @@ bool DWARFASTParserClang::CompleteRecordType(const DWARFDIE &die,
 
     // If we have a DW_TAG_structure_type instead of a DW_TAG_class_type we
     // need to tell the clang type it is actually a class.
-    if (class_language != eLanguageTypeObjC) {
+    if (!type_is_objc_object_or_interface) {
       if (is_a_class && tag_decl_kind != clang::TTK_Class)
         m_ast.SetTagTypeKind(ClangUtil::GetQualType(clang_type),
                              clang::TTK_Class);
@@ -2346,7 +2345,6 @@ Function *DWARFASTParserClang::ParseFunctionFromDWARF(CompileUnit &comp_unit,
 void DWARFASTParserClang::ParseSingleMember(
     const DWARFDIE &die, const DWARFDIE &parent_die,
     const lldb_private::CompilerType &class_clang_type,
-    const lldb::LanguageType class_language,
     std::vector<int> &member_accessibilities,
     lldb::AccessType default_accessibility,
     DelayedPropertyList &delayed_properties,
@@ -2520,9 +2518,11 @@ void DWARFASTParserClang::ParseSingleMember(
       bit_offset = 0;
     }
 
+    const bool class_is_objc_object_or_interface =
+        TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type);
+
     // FIXME: Make Clang ignore Objective-C accessibility for expressions
-    if (class_language == eLanguageTypeObjC ||
-        class_language == eLanguageTypeObjC_plus_plus)
+    if (class_is_objc_object_or_interface)
       accessibility = eAccessNone;
 
     // Handle static members
@@ -2599,8 +2599,7 @@ void DWARFASTParserClang::ParseSingleMember(
             // unnamed bitfields if we have a new enough clang.
             bool detect_unnamed_bitfields = true;
 
-            if (class_language == eLanguageTypeObjC ||
-                class_language == eLanguageTypeObjC_plus_plus)
+            if (class_is_objc_object_or_interface)
               detect_unnamed_bitfields =
                   die.GetCU()->Supports_unnamed_objc_bitfields();
 
@@ -2754,7 +2753,6 @@ void DWARFASTParserClang::ParseSingleMember(
 
 bool DWARFASTParserClang::ParseChildMembers(
     const DWARFDIE &parent_die, CompilerType &class_clang_type,
-    const LanguageType class_language,
     std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> &base_classes,
     std::vector<int> &member_accessibilities,
     std::vector<DWARFDIE> &member_function_dies,
@@ -2778,7 +2776,7 @@ bool DWARFASTParserClang::ParseChildMembers(
     switch (tag) {
     case DW_TAG_member:
     case DW_TAG_APPLE_property:
-      ParseSingleMember(die, parent_die, class_clang_type, class_language,
+      ParseSingleMember(die, parent_die, class_clang_type,
                         member_accessibilities, default_accessibility,
                         delayed_properties, layout_info, last_field_info);
       break;
@@ -2868,7 +2866,7 @@ bool DWARFASTParserClang::ParseChildMembers(
         CompilerType base_class_clang_type =
             base_class_type->GetFullCompilerType();
         assert(base_class_clang_type);
-        if (class_language == eLanguageTypeObjC) {
+        if (TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type)) {
           ast->SetObjCSuperClass(class_clang_type, base_class_clang_type);
         } else {
           std::unique_ptr<clang::CXXBaseSpecifier> result =
diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
index cb718a207d2d4..2ef49abc1da16 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.h
@@ -110,7 +110,6 @@ class DWARFASTParserClang : public DWARFASTParser {
 
   bool ParseChildMembers(
       const DWARFDIE &die, lldb_private::CompilerType &class_compiler_type,
-      const lldb::LanguageType class_language,
       std::vector<std::unique_ptr<clang::CXXBaseSpecifier>> &base_classes,
       std::vector<int> &member_accessibilities,
       std::vector<DWARFDIE> &member_function_dies,
@@ -195,7 +194,6 @@ class DWARFASTParserClang : public DWARFASTParser {
   void
   ParseSingleMember(const DWARFDIE &die, const DWARFDIE &parent_die,
                     const lldb_private::CompilerType &class_clang_type,
-                    const lldb::LanguageType class_language,
                     std::vector<int> &member_accessibilities,
                     lldb::AccessType default_accessibility,
                     DelayedPropertyList &delayed_properties,

From 319a97b5e2620f9eb3618b629223253feacff92a Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Mon, 13 Jul 2020 11:16:30 +0000
Subject: [PATCH 084/771] [SVE] Ensure fixed length vector fptrunc operations
 bigger than NEON are not considered legal.

Differential Revision: https://reviews.llvm.org/D83568
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  15 +-
 .../AArch64/sve-fixed-length-fp-converts.ll   | 168 ++++++++++++++++++
 2 files changed, 180 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 65ccc18ed6013..85db14ab66feb 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -963,12 +963,15 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
           addTypeForFixedLengthSVE(VT);
 
       // 64bit results can mean a bigger than NEON input.
-      for (auto VT : {MVT::v8i8, MVT::v4i16, MVT::v2i32})
+      for (auto VT : {MVT::v8i8, MVT::v4i16})
         setOperationAction(ISD::TRUNCATE, VT, Custom);
+      setOperationAction(ISD::FP_ROUND, MVT::v4f16, Custom);
 
       // 128bit results imply a bigger than NEON input.
       for (auto VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
         setOperationAction(ISD::TRUNCATE, VT, Custom);
+      for (auto VT : {MVT::v8f16, MVT::v4f32})
+        setOperationAction(ISD::FP_ROUND, VT, Expand);
     }
   }
 
@@ -2712,13 +2715,19 @@ SDValue AArch64TargetLowering::LowerFP_ROUND(SDValue Op,
                                              SelectionDAG &DAG) const {
   bool IsStrict = Op->isStrictFPOpcode();
   SDValue SrcVal = Op.getOperand(IsStrict ? 1 : 0);
-  if (SrcVal.getValueType() != MVT::f128) {
+  EVT SrcVT = SrcVal.getValueType();
+
+  if (SrcVT != MVT::f128) {
+    // Expand cases where the input is a vector bigger than NEON.
+    if (useSVEForFixedLengthVectorVT(SrcVT))
+      return SDValue();
+
     // It's legal except when f128 is involved
     return Op;
   }
 
   RTLIB::Libcall LC;
-  LC = RTLIB::getFPROUND(SrcVal.getValueType(), Op.getValueType());
+  LC = RTLIB::getFPROUND(SrcVT, Op.getValueType());
 
   // FP_ROUND node has a second operand indicating whether it is known to be
   // precise. That doesn't take part in the LibCall so we can't directly use
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll
new file mode 100644
index 0000000000000..4ffb56abe5f18
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-fp-converts.ll
@@ -0,0 +1,168 @@
+; RUN: llc -aarch64-sve-vector-bits-min=128  -asm-verbose=0 < %s | FileCheck %s -check-prefix=NO_SVE
+; RUN: llc -aarch64-sve-vector-bits-min=256  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=384  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK
+; RUN: llc -aarch64-sve-vector-bits-min=512  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=640  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=768  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=896  -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512
+; RUN: llc -aarch64-sve-vector-bits-min=1024 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1152 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1280 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1408 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1536 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1664 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1792 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=1920 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
+; RUN: llc -aarch64-sve-vector-bits-min=2048 -asm-verbose=0 < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
+
+target triple = "aarch64-unknown-linux-gnu"
+
+; Don't use SVE when its registers are no bigger than NEON.
+; NO_SVE-NOT: z{0-9}
+
+; NOTE: fptrunc operations bigger than NEON are expanded. These tests just
+; ensure we've correctly set the operation action for fixed length vector types
+; that require SVE. They'll be updated to protect their expected code generation
+; when lowering it implemented.
+
+;
+; fptrunc f32 -> f16
+;
+
+define <8 x half> @fptrunc_v8f32_v8f16(<8 x float>* %in) #0 {
+; CHECK-LABEL: fptrunc_v8f32_v8f16:
+; CHECK-COUNT-8: fcvt h{{[0-9]}}, s{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <8 x float>, <8 x float>* %in
+  %b = fptrunc <8 x float> %a to <8 x half>
+  ret <8 x half> %b
+}
+
+define void @fptrunc_v16f32_v16f16(<16 x float>* %in, <16 x half>* %out) #0 {
+; CHECK-LABEL: fptrunc_v16f32_v16f16:
+; CHECK-COUNT-16: fcvt h{{[0-9]}}, s{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <16 x float>, <16 x float>* %in
+  %b = fptrunc <16 x float> %a to <16 x half>
+  store <16 x half> %b, <16 x half>* %out
+  ret void
+}
+
+define void @fptrunc_v32f32_v32f16(<32 x float>* %in, <32 x half>* %out) #0 {
+; CHECK-LABEL: fptrunc_v32f32_v32f16:
+; CHECK-COUNT-32: fcvt h{{[0-9]}}, s{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <32 x float>, <32 x float>* %in
+  %b = fptrunc <32 x float> %a to <32 x half>
+  store <32 x half> %b, <32 x half>* %out
+  ret void
+}
+
+define void @fptrunc_v64f32_v64f16(<64 x float>* %in, <64 x half>* %out) #0 {
+; CHECK-LABEL: fptrunc_v64f32_v64f16:
+; CHECK-COUNT-64: fcvt h{{[0-9]}}, s{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <64 x float>, <64 x float>* %in
+  %b = fptrunc <64 x float> %a to <64 x half>
+  store <64 x half> %b, <64 x half>* %out
+  ret void
+}
+
+;
+; fptrunc f64 -> f16
+;
+
+define <4 x half> @fptrunc_v4f64_v4f16(<4 x double>* %in) #0 {
+; CHECK-LABEL: fptrunc_v4f64_v4f16:
+; CHECK-COUNT-4: fcvt h{{[0-9]}}, d{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <4 x double>, <4 x double>* %in
+  %b = fptrunc <4 x double> %a to <4 x half>
+  ret <4 x half> %b
+}
+
+define <8 x half> @fptrunc_v8f64_v8f16(<8 x double>* %in) #0 {
+; CHECK-LABEL: fptrunc_v8f64_v8f16:
+; CHECK-COUNT-8: fcvt h{{[0-9]}}, d{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <8 x double>, <8 x double>* %in
+  %b = fptrunc <8 x double> %a to <8 x half>
+  ret <8 x half> %b
+}
+
+define void @fptrunc_v16f64_v16f16(<16 x double>* %in, <16 x half>* %out) #0 {
+; CHECK-LABEL: fptrunc_v16f64_v16f16:
+; CHECK-COUNT-16: fcvt h{{[0-9]}}, d{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <16 x double>, <16 x double>* %in
+  %b = fptrunc <16 x double> %a to <16 x half>
+  store <16 x half> %b, <16 x half>* %out
+  ret void
+}
+
+define void @fptrunc_v32f64_v32f16(<32 x double>* %in, <32 x half>* %out) #0 {
+; CHECK-LABEL: fptrunc_v32f64_v32f16:
+; CHECK-COUNT-32: fcvt h{{[0-9]}}, d{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <32 x double>, <32 x double>* %in
+  %b = fptrunc <32 x double> %a to <32 x half>
+  store <32 x half> %b, <32 x half>* %out
+  ret void
+}
+
+;
+; fptrunc f64 -> f32
+;
+
+define <4 x float> @fptrunc_v4f64_v4f32(<4 x double>* %in) #0 {
+; CHECK-LABEL: fptrunc_v4f64_v4f32:
+; CHECK-COUNT-4: fcvt s{{[0-9]}}, d{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <4 x double>, <4 x double>* %in
+  %b = fptrunc <4 x double> %a to <4 x float>
+  ret <4 x float> %b
+}
+
+define void @fptrunc_v8f64_v8f32(<8 x double>* %in, <8 x float>* %out) #0 {
+; CHECK-LABEL: fptrunc_v8f64_v8f32:
+; CHECK-COUNT-8: fcvt s{{[0-9]}}, d{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <8 x double>, <8 x double>* %in
+  %b = fptrunc <8 x double> %a to <8 x float>
+  store <8 x float> %b, <8 x float>* %out
+  ret void
+}
+
+define void @fptrunc_v16f64_v16f32(<16 x double>* %in, <16 x float>* %out) #0 {
+; CHECK-LABEL: fptrunc_v16f64_v16f32:
+; CHECK-COUNT-16: fcvt s{{[0-9]}}, d{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <16 x double>, <16 x double>* %in
+  %b = fptrunc <16 x double> %a to <16 x float>
+  store <16 x float> %b, <16 x float>* %out
+  ret void
+}
+
+define void @fptrunc_v32f64_v32f32(<32 x double>* %in, <32 x float>* %out) #0 {
+; CHECK-LABEL: fptrunc_v32f64_v32f32:
+; CHECK-COUNT-32: fcvt s{{[0-9]}}, d{{[0-9]}}
+; CHECK-NOT: fcvt
+; CHECK: ret
+  %a = load <32 x double>, <32 x double>* %in
+  %b = fptrunc <32 x double> %a to <32 x float>
+  store <32 x float> %b, <32 x float>* %out
+  ret void
+}
+
+attributes #0 = { nounwind "target-features"="+sve" }

From aa933d82f867ab4d33eafc5ee2666dbbc61d293d Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Mon, 13 Jul 2020 13:14:55 +0200
Subject: [PATCH 085/771] [lldb][NFC] Early-exit in
 DWARFASTParserClang::ParseSingleMember

This patch just early-exits after the 'if (num_attributes > 0)' check.
---
 .../SymbolFile/DWARF/DWARFASTParserClang.cpp  | 661 +++++++++---------
 1 file changed, 331 insertions(+), 330 deletions(-)

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
index 929001671af7d..2d1db66e7fd91 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/DWARFASTParserClang.cpp
@@ -2360,393 +2360,394 @@ void DWARFASTParserClang::ParseSingleMember(
 
   DWARFAttributes attributes;
   const size_t num_attributes = die.GetAttributes(attributes);
-  if (num_attributes > 0) {
-    const char *name = nullptr;
-    const char *prop_name = nullptr;
-    const char *prop_getter_name = nullptr;
-    const char *prop_setter_name = nullptr;
-    uint32_t prop_attributes = 0;
-
-    bool is_artificial = false;
-    DWARFFormValue encoding_form;
-    AccessType accessibility = eAccessNone;
-    uint32_t member_byte_offset =
-        (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX;
-    llvm::Optional<uint64_t> byte_size;
-    int64_t bit_offset = 0;
-    uint64_t data_bit_offset = UINT64_MAX;
-    size_t bit_size = 0;
-    bool is_external =
-        false; // On DW_TAG_members, this means the member is static
-    uint32_t i;
-    for (i = 0; i < num_attributes && !is_artificial; ++i) {
-      const dw_attr_t attr = attributes.AttributeAtIndex(i);
-      DWARFFormValue form_value;
-      if (attributes.ExtractFormValueAtIndex(i, form_value)) {
-        // DW_AT_data_member_location indicates the byte offset of the
-        // word from the base address of the structure.
-        //
-        // DW_AT_bit_offset indicates how many bits into the word
-        // (according to the host endianness) the low-order bit of the
-        // field starts.  AT_bit_offset can be negative.
-        //
-        // DW_AT_bit_size indicates the size of the field in bits.
-        switch (attr) {
-        case DW_AT_name:
-          name = form_value.AsCString();
-          break;
-        case DW_AT_type:
-          encoding_form = form_value;
-          break;
-        case DW_AT_bit_offset:
-          bit_offset = form_value.Signed();
-          break;
-        case DW_AT_bit_size:
-          bit_size = form_value.Unsigned();
-          break;
-        case DW_AT_byte_size:
-          byte_size = form_value.Unsigned();
-          break;
-        case DW_AT_data_bit_offset:
-          data_bit_offset = form_value.Unsigned();
-          break;
-        case DW_AT_data_member_location:
-          if (form_value.BlockData()) {
-            Value initialValue(0);
-            Value memberOffset(0);
-            const DWARFDataExtractor &debug_info_data = die.GetData();
-            uint32_t block_length = form_value.Unsigned();
-            uint32_t block_offset =
-                form_value.BlockData() - debug_info_data.GetDataStart();
-            if (DWARFExpression::Evaluate(
-                    nullptr, // ExecutionContext *
-                    nullptr, // RegisterContext *
-                    module_sp,
-                    DataExtractor(debug_info_data, block_offset, block_length),
-                    die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr,
-                    memberOffset, nullptr)) {
-              member_byte_offset = memberOffset.ResolveValue(nullptr).UInt();
-            }
-          } else {
-            // With DWARF 3 and later, if the value is an integer constant,
-            // this form value is the offset in bytes from the beginning of
-            // the containing entity.
-            member_byte_offset = form_value.Unsigned();
+  if (num_attributes == 0)
+    return;
+
+  const char *name = nullptr;
+  const char *prop_name = nullptr;
+  const char *prop_getter_name = nullptr;
+  const char *prop_setter_name = nullptr;
+  uint32_t prop_attributes = 0;
+
+  bool is_artificial = false;
+  DWARFFormValue encoding_form;
+  AccessType accessibility = eAccessNone;
+  uint32_t member_byte_offset =
+      (parent_die.Tag() == DW_TAG_union_type) ? 0 : UINT32_MAX;
+  llvm::Optional<uint64_t> byte_size;
+  int64_t bit_offset = 0;
+  uint64_t data_bit_offset = UINT64_MAX;
+  size_t bit_size = 0;
+  bool is_external =
+      false; // On DW_TAG_members, this means the member is static
+  uint32_t i;
+  for (i = 0; i < num_attributes && !is_artificial; ++i) {
+    const dw_attr_t attr = attributes.AttributeAtIndex(i);
+    DWARFFormValue form_value;
+    if (attributes.ExtractFormValueAtIndex(i, form_value)) {
+      // DW_AT_data_member_location indicates the byte offset of the
+      // word from the base address of the structure.
+      //
+      // DW_AT_bit_offset indicates how many bits into the word
+      // (according to the host endianness) the low-order bit of the
+      // field starts.  AT_bit_offset can be negative.
+      //
+      // DW_AT_bit_size indicates the size of the field in bits.
+      switch (attr) {
+      case DW_AT_name:
+        name = form_value.AsCString();
+        break;
+      case DW_AT_type:
+        encoding_form = form_value;
+        break;
+      case DW_AT_bit_offset:
+        bit_offset = form_value.Signed();
+        break;
+      case DW_AT_bit_size:
+        bit_size = form_value.Unsigned();
+        break;
+      case DW_AT_byte_size:
+        byte_size = form_value.Unsigned();
+        break;
+      case DW_AT_data_bit_offset:
+        data_bit_offset = form_value.Unsigned();
+        break;
+      case DW_AT_data_member_location:
+        if (form_value.BlockData()) {
+          Value initialValue(0);
+          Value memberOffset(0);
+          const DWARFDataExtractor &debug_info_data = die.GetData();
+          uint32_t block_length = form_value.Unsigned();
+          uint32_t block_offset =
+              form_value.BlockData() - debug_info_data.GetDataStart();
+          if (DWARFExpression::Evaluate(
+                  nullptr, // ExecutionContext *
+                  nullptr, // RegisterContext *
+                  module_sp,
+                  DataExtractor(debug_info_data, block_offset, block_length),
+                  die.GetCU(), eRegisterKindDWARF, &initialValue, nullptr,
+                  memberOffset, nullptr)) {
+            member_byte_offset = memberOffset.ResolveValue(nullptr).UInt();
           }
-          break;
+        } else {
+          // With DWARF 3 and later, if the value is an integer constant,
+          // this form value is the offset in bytes from the beginning of
+          // the containing entity.
+          member_byte_offset = form_value.Unsigned();
+        }
+        break;
 
-        case DW_AT_accessibility:
-          accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
-          break;
-        case DW_AT_artificial:
-          is_artificial = form_value.Boolean();
-          break;
-        case DW_AT_APPLE_property_name:
-          prop_name = form_value.AsCString();
-          break;
-        case DW_AT_APPLE_property_getter:
-          prop_getter_name = form_value.AsCString();
-          break;
-        case DW_AT_APPLE_property_setter:
-          prop_setter_name = form_value.AsCString();
-          break;
-        case DW_AT_APPLE_property_attribute:
-          prop_attributes = form_value.Unsigned();
-          break;
-        case DW_AT_external:
-          is_external = form_value.Boolean();
-          break;
+      case DW_AT_accessibility:
+        accessibility = DW_ACCESS_to_AccessType(form_value.Unsigned());
+        break;
+      case DW_AT_artificial:
+        is_artificial = form_value.Boolean();
+        break;
+      case DW_AT_APPLE_property_name:
+        prop_name = form_value.AsCString();
+        break;
+      case DW_AT_APPLE_property_getter:
+        prop_getter_name = form_value.AsCString();
+        break;
+      case DW_AT_APPLE_property_setter:
+        prop_setter_name = form_value.AsCString();
+        break;
+      case DW_AT_APPLE_property_attribute:
+        prop_attributes = form_value.Unsigned();
+        break;
+      case DW_AT_external:
+        is_external = form_value.Boolean();
+        break;
 
-        default:
-        case DW_AT_declaration:
-        case DW_AT_description:
-        case DW_AT_mutable:
-        case DW_AT_visibility:
-        case DW_AT_sibling:
-          break;
-        }
+      default:
+      case DW_AT_declaration:
+      case DW_AT_description:
+      case DW_AT_mutable:
+      case DW_AT_visibility:
+      case DW_AT_sibling:
+        break;
       }
     }
+  }
 
-    if (prop_name) {
-      ConstString fixed_setter;
+  if (prop_name) {
+    ConstString fixed_setter;
 
-      // Check if the property getter/setter were provided as full names.
-      // We want basenames, so we extract them.
+    // Check if the property getter/setter were provided as full names.
+    // We want basenames, so we extract them.
 
-      if (prop_getter_name && prop_getter_name[0] == '-') {
-        ObjCLanguage::MethodName prop_getter_method(prop_getter_name, true);
-        prop_getter_name = prop_getter_method.GetSelector().GetCString();
-      }
+    if (prop_getter_name && prop_getter_name[0] == '-') {
+      ObjCLanguage::MethodName prop_getter_method(prop_getter_name, true);
+      prop_getter_name = prop_getter_method.GetSelector().GetCString();
+    }
 
-      if (prop_setter_name && prop_setter_name[0] == '-') {
-        ObjCLanguage::MethodName prop_setter_method(prop_setter_name, true);
-        prop_setter_name = prop_setter_method.GetSelector().GetCString();
-      }
+    if (prop_setter_name && prop_setter_name[0] == '-') {
+      ObjCLanguage::MethodName prop_setter_method(prop_setter_name, true);
+      prop_setter_name = prop_setter_method.GetSelector().GetCString();
+    }
 
-      // If the names haven't been provided, they need to be filled in.
+    // If the names haven't been provided, they need to be filled in.
 
-      if (!prop_getter_name) {
-        prop_getter_name = prop_name;
-      }
-      if (!prop_setter_name && prop_name[0] &&
-          !(prop_attributes & DW_APPLE_PROPERTY_readonly)) {
-        StreamString ss;
+    if (!prop_getter_name) {
+      prop_getter_name = prop_name;
+    }
+    if (!prop_setter_name && prop_name[0] &&
+        !(prop_attributes & DW_APPLE_PROPERTY_readonly)) {
+      StreamString ss;
 
-        ss.Printf("set%c%s:", toupper(prop_name[0]), &prop_name[1]);
+      ss.Printf("set%c%s:", toupper(prop_name[0]), &prop_name[1]);
 
-        fixed_setter.SetString(ss.GetString());
-        prop_setter_name = fixed_setter.GetCString();
-      }
+      fixed_setter.SetString(ss.GetString());
+      prop_setter_name = fixed_setter.GetCString();
     }
+  }
 
-    // Clang has a DWARF generation bug where sometimes it represents
-    // fields that are references with bad byte size and bit size/offset
-    // information such as:
-    //
-    //  DW_AT_byte_size( 0x00 )
-    //  DW_AT_bit_size( 0x40 )
-    //  DW_AT_bit_offset( 0xffffffffffffffc0 )
-    //
-    // So check the bit offset to make sure it is sane, and if the values
-    // are not sane, remove them. If we don't do this then we will end up
-    // with a crash if we try to use this type in an expression when clang
-    // becomes unhappy with its recycled debug info.
+  // Clang has a DWARF generation bug where sometimes it represents
+  // fields that are references with bad byte size and bit size/offset
+  // information such as:
+  //
+  //  DW_AT_byte_size( 0x00 )
+  //  DW_AT_bit_size( 0x40 )
+  //  DW_AT_bit_offset( 0xffffffffffffffc0 )
+  //
+  // So check the bit offset to make sure it is sane, and if the values
+  // are not sane, remove them. If we don't do this then we will end up
+  // with a crash if we try to use this type in an expression when clang
+  // becomes unhappy with its recycled debug info.
 
-    if (byte_size.getValueOr(0) == 0 && bit_offset < 0) {
-      bit_size = 0;
-      bit_offset = 0;
-    }
+  if (byte_size.getValueOr(0) == 0 && bit_offset < 0) {
+    bit_size = 0;
+    bit_offset = 0;
+  }
 
-    const bool class_is_objc_object_or_interface =
-        TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type);
+  const bool class_is_objc_object_or_interface =
+      TypeSystemClang::IsObjCObjectOrInterfaceType(class_clang_type);
 
-    // FIXME: Make Clang ignore Objective-C accessibility for expressions
-    if (class_is_objc_object_or_interface)
-      accessibility = eAccessNone;
+  // FIXME: Make Clang ignore Objective-C accessibility for expressions
+  if (class_is_objc_object_or_interface)
+    accessibility = eAccessNone;
 
-    // Handle static members
-    if (is_external && member_byte_offset == UINT32_MAX) {
-      Type *var_type = die.ResolveTypeUID(encoding_form.Reference());
+  // Handle static members
+  if (is_external && member_byte_offset == UINT32_MAX) {
+    Type *var_type = die.ResolveTypeUID(encoding_form.Reference());
 
-      if (var_type) {
-        if (accessibility == eAccessNone)
-          accessibility = eAccessPublic;
-        TypeSystemClang::AddVariableToRecordType(
-            class_clang_type, name, var_type->GetLayoutCompilerType(),
-            accessibility);
-      }
-      return;
+    if (var_type) {
+      if (accessibility == eAccessNone)
+        accessibility = eAccessPublic;
+      TypeSystemClang::AddVariableToRecordType(
+          class_clang_type, name, var_type->GetLayoutCompilerType(),
+          accessibility);
     }
+    return;
+  }
 
-    if (!is_artificial) {
-      Type *member_type = die.ResolveTypeUID(encoding_form.Reference());
-
-      clang::FieldDecl *field_decl = nullptr;
-      const uint64_t character_width = 8;
-      const uint64_t word_width = 32;
-      if (tag == DW_TAG_member) {
-        if (member_type) {
-          CompilerType member_clang_type = member_type->GetLayoutCompilerType();
+  if (!is_artificial) {
+    Type *member_type = die.ResolveTypeUID(encoding_form.Reference());
 
-          if (accessibility == eAccessNone)
-            accessibility = default_accessibility;
-          member_accessibilities.push_back(accessibility);
+    clang::FieldDecl *field_decl = nullptr;
+    const uint64_t character_width = 8;
+    const uint64_t word_width = 32;
+    if (tag == DW_TAG_member) {
+      if (member_type) {
+        CompilerType member_clang_type = member_type->GetLayoutCompilerType();
 
-          uint64_t field_bit_offset =
-              (member_byte_offset == UINT32_MAX ? 0 : (member_byte_offset * 8));
+        if (accessibility == eAccessNone)
+          accessibility = default_accessibility;
+        member_accessibilities.push_back(accessibility);
 
-          if (bit_size > 0) {
-            FieldInfo this_field_info;
-            this_field_info.bit_offset = field_bit_offset;
-            this_field_info.bit_size = bit_size;
+        uint64_t field_bit_offset =
+            (member_byte_offset == UINT32_MAX ? 0 : (member_byte_offset * 8));
 
-            if (data_bit_offset != UINT64_MAX) {
-              this_field_info.bit_offset = data_bit_offset;
-            } else {
-              if (!byte_size)
-                byte_size = member_type->GetByteSize();
+        if (bit_size > 0) {
+          FieldInfo this_field_info;
+          this_field_info.bit_offset = field_bit_offset;
+          this_field_info.bit_size = bit_size;
 
-              ObjectFile *objfile = die.GetDWARF()->GetObjectFile();
-              if (objfile->GetByteOrder() == eByteOrderLittle) {
-                this_field_info.bit_offset += byte_size.getValueOr(0) * 8;
-                this_field_info.bit_offset -= (bit_offset + bit_size);
-              } else {
-                this_field_info.bit_offset += bit_offset;
-              }
-            }
+          if (data_bit_offset != UINT64_MAX) {
+            this_field_info.bit_offset = data_bit_offset;
+          } else {
+            if (!byte_size)
+              byte_size = member_type->GetByteSize();
 
-            if ((this_field_info.bit_offset >= parent_bit_size) ||
-                (last_field_info.IsBitfield() &&
-                 !last_field_info.NextBitfieldOffsetIsValid(
-                     this_field_info.bit_offset))) {
-              ObjectFile *objfile = die.GetDWARF()->GetObjectFile();
-              objfile->GetModule()->ReportWarning(
-                  "0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid "
-                  "bit offset (0x%8.8" PRIx64
-                  ") member will be ignored. Please file a bug against the "
-                  "compiler and include the preprocessed output for %s\n",
-                  die.GetID(), DW_TAG_value_to_name(tag), name,
-                  this_field_info.bit_offset, GetUnitName(parent_die).c_str());
-              return;
+            ObjectFile *objfile = die.GetDWARF()->GetObjectFile();
+            if (objfile->GetByteOrder() == eByteOrderLittle) {
+              this_field_info.bit_offset += byte_size.getValueOr(0) * 8;
+              this_field_info.bit_offset -= (bit_offset + bit_size);
+            } else {
+              this_field_info.bit_offset += bit_offset;
             }
+          }
 
-            // Update the field bit offset we will report for layout
-            field_bit_offset = this_field_info.bit_offset;
-
-            // Objective-C has invalid DW_AT_bit_offset values in older
-            // versions of clang, so we have to be careful and only insert
-            // unnamed bitfields if we have a new enough clang.
-            bool detect_unnamed_bitfields = true;
+          if ((this_field_info.bit_offset >= parent_bit_size) ||
+              (last_field_info.IsBitfield() &&
+               !last_field_info.NextBitfieldOffsetIsValid(
+                   this_field_info.bit_offset))) {
+            ObjectFile *objfile = die.GetDWARF()->GetObjectFile();
+            objfile->GetModule()->ReportWarning(
+                "0x%8.8" PRIx64 ": %s bitfield named \"%s\" has invalid "
+                "bit offset (0x%8.8" PRIx64
+                ") member will be ignored. Please file a bug against the "
+                "compiler and include the preprocessed output for %s\n",
+                die.GetID(), DW_TAG_value_to_name(tag), name,
+                this_field_info.bit_offset, GetUnitName(parent_die).c_str());
+            return;
+          }
 
-            if (class_is_objc_object_or_interface)
-              detect_unnamed_bitfields =
-                  die.GetCU()->Supports_unnamed_objc_bitfields();
+          // Update the field bit offset we will report for layout
+          field_bit_offset = this_field_info.bit_offset;
 
-            if (detect_unnamed_bitfields) {
-              clang::Optional<FieldInfo> unnamed_field_info;
-              uint64_t last_field_end = 0;
+          // Objective-C has invalid DW_AT_bit_offset values in older
+          // versions of clang, so we have to be careful and only insert
+          // unnamed bitfields if we have a new enough clang.
+          bool detect_unnamed_bitfields = true;
 
-              last_field_end =
-                  last_field_info.bit_offset + last_field_info.bit_size;
+          if (class_is_objc_object_or_interface)
+            detect_unnamed_bitfields =
+                die.GetCU()->Supports_unnamed_objc_bitfields();
 
-              if (!last_field_info.IsBitfield()) {
-                // The last field was not a bit-field...
-                // but if it did take up the entire word then we need to extend
-                // last_field_end so the bit-field does not step into the last
-                // fields padding.
-                if (last_field_end != 0 && ((last_field_end % word_width) != 0))
-                  last_field_end += word_width - (last_field_end % word_width);
-              }
+          if (detect_unnamed_bitfields) {
+            clang::Optional<FieldInfo> unnamed_field_info;
+            uint64_t last_field_end = 0;
 
-              // If we have a gap between the last_field_end and the current
-              // field we have an unnamed bit-field.
-              // If we have a base class, we assume there is no unnamed
-              // bit-field if this is the first field since the gap can be
-              // attributed to the members from the base class. This assumption
-              // is not correct if the first field of the derived class is
-              // indeed an unnamed bit-field. We currently do not have the
-              // machinary to track the offset of the last field of classes we
-              // have seen before, so we are not handling this case.
-              if (this_field_info.bit_offset != last_field_end &&
-                  this_field_info.bit_offset > last_field_end &&
-                  !(last_field_info.bit_offset == 0 &&
-                    last_field_info.bit_size == 0 &&
-                    layout_info.base_offsets.size() != 0)) {
-                unnamed_field_info = FieldInfo{};
-                unnamed_field_info->bit_size =
-                    this_field_info.bit_offset - last_field_end;
-                unnamed_field_info->bit_offset = last_field_end;
-              }
+            last_field_end =
+                last_field_info.bit_offset + last_field_info.bit_size;
 
-              if (unnamed_field_info) {
-                clang::FieldDecl *unnamed_bitfield_decl =
-                    TypeSystemClang::AddFieldToRecordType(
-                        class_clang_type, llvm::StringRef(),
-                        m_ast.GetBuiltinTypeForEncodingAndBitSize(eEncodingSint,
-                                                                  word_width),
-                        accessibility, unnamed_field_info->bit_size);
+            if (!last_field_info.IsBitfield()) {
+              // The last field was not a bit-field...
+              // but if it did take up the entire word then we need to extend
+              // last_field_end so the bit-field does not step into the last
+              // fields padding.
+              if (last_field_end != 0 && ((last_field_end % word_width) != 0))
+                last_field_end += word_width - (last_field_end % word_width);
+            }
 
-                layout_info.field_offsets.insert(std::make_pair(
-                    unnamed_bitfield_decl, unnamed_field_info->bit_offset));
-              }
+            // If we have a gap between the last_field_end and the current
+            // field we have an unnamed bit-field.
+            // If we have a base class, we assume there is no unnamed
+            // bit-field if this is the first field since the gap can be
+            // attributed to the members from the base class. This assumption
+            // is not correct if the first field of the derived class is
+            // indeed an unnamed bit-field. We currently do not have the
+            // machinary to track the offset of the last field of classes we
+            // have seen before, so we are not handling this case.
+            if (this_field_info.bit_offset != last_field_end &&
+                this_field_info.bit_offset > last_field_end &&
+                !(last_field_info.bit_offset == 0 &&
+                  last_field_info.bit_size == 0 &&
+                  layout_info.base_offsets.size() != 0)) {
+              unnamed_field_info = FieldInfo{};
+              unnamed_field_info->bit_size =
+                  this_field_info.bit_offset - last_field_end;
+              unnamed_field_info->bit_offset = last_field_end;
             }
 
-            last_field_info = this_field_info;
-            last_field_info.SetIsBitfield(true);
-          } else {
-            last_field_info.bit_offset = field_bit_offset;
+            if (unnamed_field_info) {
+              clang::FieldDecl *unnamed_bitfield_decl =
+                  TypeSystemClang::AddFieldToRecordType(
+                      class_clang_type, llvm::StringRef(),
+                      m_ast.GetBuiltinTypeForEncodingAndBitSize(eEncodingSint,
+                                                                word_width),
+                      accessibility, unnamed_field_info->bit_size);
 
-            if (llvm::Optional<uint64_t> clang_type_size =
-                    member_clang_type.GetByteSize(nullptr)) {
-              last_field_info.bit_size = *clang_type_size * character_width;
+              layout_info.field_offsets.insert(std::make_pair(
+                  unnamed_bitfield_decl, unnamed_field_info->bit_offset));
             }
+          }
 
-            last_field_info.SetIsBitfield(false);
+          last_field_info = this_field_info;
+          last_field_info.SetIsBitfield(true);
+        } else {
+          last_field_info.bit_offset = field_bit_offset;
+
+          if (llvm::Optional<uint64_t> clang_type_size =
+                  member_clang_type.GetByteSize(nullptr)) {
+            last_field_info.bit_size = *clang_type_size * character_width;
           }
 
-          if (!member_clang_type.IsCompleteType())
-            member_clang_type.GetCompleteType();
-
-          {
-            // Older versions of clang emit array[0] and array[1] in the
-            // same way (<rdar://problem/12566646>). If the current field
-            // is at the end of the structure, then there is definitely no
-            // room for extra elements and we override the type to
-            // array[0].
-
-            CompilerType member_array_element_type;
-            uint64_t member_array_size;
-            bool member_array_is_incomplete;
-
-            if (member_clang_type.IsArrayType(&member_array_element_type,
-                                              &member_array_size,
-                                              &member_array_is_incomplete) &&
-                !member_array_is_incomplete) {
-              uint64_t parent_byte_size =
-                  parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size,
-                                                         UINT64_MAX);
-
-              if (member_byte_offset >= parent_byte_size) {
-                if (member_array_size != 1 &&
-                    (member_array_size != 0 ||
-                     member_byte_offset > parent_byte_size)) {
-                  module_sp->ReportError(
-                      "0x%8.8" PRIx64
-                      ": DW_TAG_member '%s' refers to type 0x%8.8x"
-                      " which extends beyond the bounds of 0x%8.8" PRIx64,
-                      die.GetID(), name, encoding_form.Reference().GetOffset(),
-                      parent_die.GetID());
-                }
+          last_field_info.SetIsBitfield(false);
+        }
 
-                member_clang_type =
-                    m_ast.CreateArrayType(member_array_element_type, 0, false);
+        if (!member_clang_type.IsCompleteType())
+          member_clang_type.GetCompleteType();
+
+        {
+          // Older versions of clang emit array[0] and array[1] in the
+          // same way (<rdar://problem/12566646>). If the current field
+          // is at the end of the structure, then there is definitely no
+          // room for extra elements and we override the type to
+          // array[0].
+
+          CompilerType member_array_element_type;
+          uint64_t member_array_size;
+          bool member_array_is_incomplete;
+
+          if (member_clang_type.IsArrayType(&member_array_element_type,
+                                            &member_array_size,
+                                            &member_array_is_incomplete) &&
+              !member_array_is_incomplete) {
+            uint64_t parent_byte_size =
+                parent_die.GetAttributeValueAsUnsigned(DW_AT_byte_size,
+                                                       UINT64_MAX);
+
+            if (member_byte_offset >= parent_byte_size) {
+              if (member_array_size != 1 &&
+                  (member_array_size != 0 ||
+                   member_byte_offset > parent_byte_size)) {
+                module_sp->ReportError(
+                    "0x%8.8" PRIx64
+                    ": DW_TAG_member '%s' refers to type 0x%8.8x"
+                    " which extends beyond the bounds of 0x%8.8" PRIx64,
+                    die.GetID(), name, encoding_form.Reference().GetOffset(),
+                    parent_die.GetID());
               }
+
+              member_clang_type =
+                  m_ast.CreateArrayType(member_array_element_type, 0, false);
             }
           }
+        }
 
-          CompleteType(member_clang_type);
+        CompleteType(member_clang_type);
 
-          field_decl = TypeSystemClang::AddFieldToRecordType(
-              class_clang_type, name, member_clang_type, accessibility,
-              bit_size);
+        field_decl = TypeSystemClang::AddFieldToRecordType(
+            class_clang_type, name, member_clang_type, accessibility,
+            bit_size);
 
-          m_ast.SetMetadataAsUserID(field_decl, die.GetID());
+        m_ast.SetMetadataAsUserID(field_decl, die.GetID());
 
-          layout_info.field_offsets.insert(
-              std::make_pair(field_decl, field_bit_offset));
-        } else {
-          if (name)
-            module_sp->ReportError(
-                "0x%8.8" PRIx64 ": DW_TAG_member '%s' refers to type 0x%8.8x"
-                " which was unable to be parsed",
-                die.GetID(), name, encoding_form.Reference().GetOffset());
-          else
-            module_sp->ReportError(
-                "0x%8.8" PRIx64 ": DW_TAG_member refers to type 0x%8.8x"
-                " which was unable to be parsed",
-                die.GetID(), encoding_form.Reference().GetOffset());
-        }
+        layout_info.field_offsets.insert(
+            std::make_pair(field_decl, field_bit_offset));
+      } else {
+        if (name)
+          module_sp->ReportError(
+              "0x%8.8" PRIx64 ": DW_TAG_member '%s' refers to type 0x%8.8x"
+              " which was unable to be parsed",
+              die.GetID(), name, encoding_form.Reference().GetOffset());
+        else
+          module_sp->ReportError(
+              "0x%8.8" PRIx64 ": DW_TAG_member refers to type 0x%8.8x"
+              " which was unable to be parsed",
+              die.GetID(), encoding_form.Reference().GetOffset());
       }
+    }
 
-      if (prop_name != nullptr && member_type) {
-        clang::ObjCIvarDecl *ivar_decl = nullptr;
+    if (prop_name != nullptr && member_type) {
+      clang::ObjCIvarDecl *ivar_decl = nullptr;
 
-        if (field_decl) {
-          ivar_decl = clang::dyn_cast<clang::ObjCIvarDecl>(field_decl);
-          assert(ivar_decl != nullptr);
-        }
+      if (field_decl) {
+        ivar_decl = clang::dyn_cast<clang::ObjCIvarDecl>(field_decl);
+        assert(ivar_decl != nullptr);
+      }
 
-        ClangASTMetadata metadata;
-        metadata.SetUserID(die.GetID());
-        delayed_properties.push_back(DelayedAddObjCClassProperty(
-            class_clang_type, prop_name, member_type->GetLayoutCompilerType(),
-            ivar_decl, prop_setter_name, prop_getter_name, prop_attributes,
-            &metadata));
+      ClangASTMetadata metadata;
+      metadata.SetUserID(die.GetID());
+      delayed_properties.push_back(DelayedAddObjCClassProperty(
+          class_clang_type, prop_name, member_type->GetLayoutCompilerType(),
+          ivar_decl, prop_setter_name, prop_getter_name, prop_attributes,
+          &metadata));
 
-        if (ivar_decl)
-          m_ast.SetMetadataAsUserID(ivar_decl, die.GetID());
-      }
+      if (ivar_decl)
+        m_ast.SetMetadataAsUserID(ivar_decl, die.GetID());
     }
   }
 }

From 84a170178c4431b7536c83ff0e5ce80774d08df6 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Wed, 8 Jul 2020 14:54:45 +0300
Subject: [PATCH 086/771] [llvm-readobj] - Add a generic test for
 --dyn-relocations and fix an issue.

We have an issue currently: --dyn-relocations always prints the following
relocation header when dumping `DynPLTRelRegion`:

"Offset  Info  Type Symbol's Value  Symbol's Name + Addend"

I.e. even for an empty object, --dyn-relocations still prints this.
It is a easy to fix bug, but we have no dedicated test case for this option.
(we have a dynamic-reloc-no-section-headers.test, which has a slightly different purpose).

This patch adds a test and fixes the behavior.

Differential revision: https://reviews.llvm.org/D83387
---
 .../tools/llvm-readobj/ELF/dynamic-reloc.test | 135 ++++++++++++++++++
 llvm/tools/llvm-readobj/ELFDumper.cpp         |  27 ++--
 2 files changed, 149 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/tools/llvm-readobj/ELF/dynamic-reloc.test

diff --git a/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc.test b/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc.test
new file mode 100644
index 0000000000000..79faebadb40a4
--- /dev/null
+++ b/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc.test
@@ -0,0 +1,135 @@
+## Test that we are able to print dynamic relocations with --dyn-relocations.
+
+## Check what we print when there are no dynamic relocations in an object.
+# RUN: yaml2obj --docnum=1 %s -o %t1
+# RUN: llvm-readobj --dyn-relocations %t1 2>&1 | FileCheck %s --check-prefix=LLVM-NONE
+# RUN: llvm-readelf --dyn-relocations %t1 2>&1 | FileCheck %s --implicit-check-not={{.}} --allow-empty
+
+# LLVM-NONE:      Dynamic Relocations {
+# LLVM-NONE-NEXT: }
+# LLVM-NONE-NOT:  {{.}}
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_DYN
+  Machine: EM_X86_64
+
+## Check that we dump all possbile dynamic relocation sections.
+# RUN: yaml2obj --docnum=2 %s -o %t2.1
+# RUN: llvm-readobj --dyn-relocations %t2.1 2>&1 | \
+# RUN:   FileCheck %s --implicit-check-not=warning: --check-prefix=LLVM-RELOCS
+# RUN: llvm-readelf --dyn-relocations %t2.1 2>&1 | \
+# RUN:   FileCheck %s --implicit-check-not=warning: --strict-whitespace \
+# RUN:     --match-full-lines --check-prefixes=GNU-RELOCS,GNU-PLTREL
+
+## 7 == DT_RELA.
+# RUN: yaml2obj --docnum=2 %s -DDTPLTREL=7 -DPLTTYPE=SHT_RELA -DPLTRELSZ=0x18 -o %t2.2
+# RUN: llvm-readobj --dyn-relocations %t2.2 2>&1 | \
+# RUN:   FileCheck %s --implicit-check-not=warning: --check-prefix=LLVM-RELOCS
+# RUN: llvm-readelf --dyn-relocations %t2.2 2>&1 | \
+# RUN:   FileCheck %s --implicit-check-not=warning: --strict-whitespace \
+# RUN:     --match-full-lines --check-prefixes=GNU-RELOCS,GNU-PLTRELA
+
+# LLVM-RELOCS:      Dynamic Relocations {
+# LLVM-RELOCS-NEXT:   0x1 R_X86_64_NONE foo 0x0
+# LLVM-RELOCS-NEXT:   0x2 R_X86_64_NONE foo 0x0
+# LLVM-RELOCS-NEXT:   0x4 R_X86_64_RELATIVE - 0x0
+# LLVM-RELOCS-NEXT:   0x8 R_X86_64_NONE foo 0x0
+# LLVM-RELOCS-NEXT: }
+
+#       GNU-RELOCS:'RELA' relocation section at offset 0x78 contains 24 bytes:
+#  GNU-RELOCS-NEXT:    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+#  GNU-RELOCS-NEXT:0000000000000001  0000000100000000 R_X86_64_NONE          0000000000000000 foo + 0
+# GNU-RELOCS-EMPTY:
+#  GNU-RELOCS-NEXT:'REL' relocation section at offset 0x90 contains 16 bytes:
+#  GNU-RELOCS-NEXT:    Offset             Info             Type               Symbol's Value  Symbol's Name
+#  GNU-RELOCS-NEXT:0000000000000002  0000000100000000 R_X86_64_NONE          0000000000000000 foo
+# GNU-RELOCS-EMPTY:
+#  GNU-RELOCS-NEXT:'RELR' relocation section at offset 0xa0 contains 8 bytes:
+#  GNU-RELOCS-NEXT:    Offset             Info             Type               Symbol's Value  Symbol's Name
+#  GNU-RELOCS-NEXT:0000000000000004  0000000000000008 R_X86_64_RELATIVE                 {{$}}
+# GNU-RELOCS-EMPTY:
+#  GNU-PLTREL-NEXT:'PLT' relocation section at offset 0xa8 contains 16 bytes:
+#  GNU-PLTREL-NEXT:    Offset             Info             Type               Symbol's Value  Symbol's Name
+#  GNU-PLTREL-NEXT:0000000000000008  0000000100000000 R_X86_64_NONE          0000000000000000 foo
+# GNU-PLTRELA-NEXT:'PLT' relocation section at offset 0xa8 contains 24 bytes:
+# GNU-PLTRELA-NEXT:    Offset             Info             Type               Symbol's Value  Symbol's Name + Addend
+# GNU-PLTRELA-NEXT:0000000000000008  0000000100000000 R_X86_64_NONE          0000000000000000 foo + 0
+# GNU-RELOCS-EMPTY:
+#   GNU-RELOCS-NOT:{{.}}
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_DYN
+  Machine: EM_X86_64
+Sections:
+  - Name: .rela.dyn
+    Type: SHT_RELA
+    Relocations:
+      - Type:   R_X86_64_NONE
+        Offset: 0x1
+        Symbol: foo
+  - Name: .rel.dyn
+    Type: SHT_REL
+    Relocations:
+      - Type:   R_X86_64_NONE
+        Offset: 0x2
+        Symbol: foo
+  - Name:    .relr.dyn
+    Type:    SHT_RELR
+    Flags:   [ SHF_ALLOC ]
+    Entries: [ 0x0000000000000004 ]
+  - Name:    .plt
+    Type:    [[PLTTYPE=SHT_REL]]
+    Relocations:
+      - Type:   R_X86_64_NONE
+        Offset: 0x8
+        Symbol: foo
+  - Name: .dynamic
+    Type: SHT_DYNAMIC
+    Entries:
+      - Tag:   DT_RELA
+        Value: 0x0
+      - Tag:   DT_RELASZ
+        Value: 0x18
+      - Tag:   DT_RELAENT
+        Value: 0x18
+## 0x18 == offset of .rel.dyn in the segment.
+      - Tag:   DT_REL
+        Value: 0x18
+      - Tag:   DT_RELSZ
+        Value: 0x10
+      - Tag:   DT_RELENT
+        Value: 0x10
+## 0x28 == offset of .relr.dyn section in the segment.
+      - Tag:   DT_RELR
+        Value: 0x28
+      - Tag:   DT_RELRSZ
+        Value: 0x8
+      - Tag:   DT_RELRENT
+        Value: 0x8
+## 0x30 == offset of .plt section in the segment.
+      - Tag:   DT_JMPREL
+        Value: 0x30
+      - Tag:   DT_PLTREL
+        Value: [[DTPLTREL=17]] ## 17 == DT_REL
+      - Tag:   DT_PLTRELSZ
+        Value: [[PLTRELSZ=0x10]]
+      - Tag:   DT_NULL
+        Value: 0x0
+Symbols:
+  - Name: foo
+DynamicSymbols:
+  - Name: foo
+ProgramHeaders:
+  - Type:  PT_LOAD
+    Sections:
+      - Section: .rela.dyn
+      - Section: .rel.dyn
+      - Section: .relr.dyn
+      - Section: .plt
+      - Section: .dynamic
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 6a7f37e39a9ab..56528d321de67 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -4568,19 +4568,20 @@ void GNUStyle<ELFT>::printDynamicRelocations(const ELFO *Obj) {
                          Obj->base(),
                      1)
        << " contains " << DynPLTRelRegion.Size << " bytes:\n";
-  }
-  if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) {
-    printRelocHeader(ELF::SHT_RELA);
-    for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef<Elf_Rela>())
-      printDynamicRelocation(Obj, Rela, true);
-  } else {
-    printRelocHeader(ELF::SHT_REL);
-    for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef<Elf_Rel>()) {
-      Elf_Rela Rela;
-      Rela.r_offset = Rel.r_offset;
-      Rela.r_info = Rel.r_info;
-      Rela.r_addend = 0;
-      printDynamicRelocation(Obj, Rela, false);
+
+    if (DynPLTRelRegion.EntSize == sizeof(Elf_Rela)) {
+      printRelocHeader(ELF::SHT_RELA);
+      for (const Elf_Rela &Rela : DynPLTRelRegion.getAsArrayRef<Elf_Rela>())
+        printDynamicRelocation(Obj, Rela, true);
+    } else {
+      printRelocHeader(ELF::SHT_REL);
+      for (const Elf_Rel &Rel : DynPLTRelRegion.getAsArrayRef<Elf_Rel>()) {
+        Elf_Rela Rela;
+        Rela.r_offset = Rel.r_offset;
+        Rela.r_info = Rel.r_info;
+        Rela.r_addend = 0;
+        printDynamicRelocation(Obj, Rela, false);
+      }
     }
   }
 }

From 38998cfa9c1e887636a7ca7278b71fde5b19dd0e Mon Sep 17 00:00:00 2001
From: Mirko Brkusanin <Mirko.Brkusanin@amd.com>
Date: Mon, 13 Jul 2020 13:35:34 +0200
Subject: [PATCH 087/771] [AMDGPU][GlobalISel] Fix subregister index for EXEC
 register in selectBallot.

Temporarily remove subregister for EXEC in selectBallot added in
https://reviews.llvm.org/D83214 to fix failures on expensive checks buildbot.
---
 llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp         | 4 +---
 .../test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll | 2 +-
 .../test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll | 5 +++--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 84734365cc658..2025c0fa5d21b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -1061,9 +1061,7 @@ bool AMDGPUInstructionSelector::selectBallot(MachineInstr &I) const {
       BuildMI(*BB, &I, DL, TII.get(Opcode), DstReg).addImm(0);
     } else if (Value == -1) { // all ones
       Register SrcReg = Is64 ? AMDGPU::EXEC : AMDGPU::EXEC_LO;
-      const unsigned SubReg = Is64 ? AMDGPU::sub0_sub1 : AMDGPU::sub0;
-      BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg)
-          .addReg(SrcReg, 0, SubReg);
+      BuildMI(*BB, &I, DL, TII.get(AMDGPU::COPY), DstReg).addReg(SrcReg);
     } else
       return false;
   } else {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
index b15fbf64fd8e2..6627804bdf76a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i32.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -global-isel < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=gfx1010 -mattr=+wavefrontsize32,-wavefrontsize64 -global-isel -verify-machineinstrs < %s | FileCheck %s
 
 declare i32 @llvm.amdgcn.ballot.i32(i1)
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
index fcea5f8c9c59e..5f5af2954ff56 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ballot.i64.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel < %s | FileCheck %s
+; RUN: llc -march=amdgcn -mcpu=gfx900 -global-isel -verify-machineinstrs < %s | FileCheck %s
 
 declare i64 @llvm.amdgcn.ballot.i64(i1)
 
@@ -20,7 +20,8 @@ define amdgpu_cs i64 @constant_false() {
 define amdgpu_cs i64 @constant_true() {
 ; CHECK-LABEL: constant_true:
 ; CHECK:       ; %bb.0:
-; CHECK-NEXT:    s_mov_b64 s[0:1], exec
+; CHECK-NEXT:    s_mov_b32 s0, exec_lo
+; CHECK-NEXT:    s_mov_b32 s1, exec_hi
 ; CHECK-NEXT:    ; return to shader part epilog
   %ballot = call i64 @llvm.amdgcn.ballot.i64(i1 1)
   ret i64 %ballot

From e73d0b5719966ddbeff7a3da70a3cb782c3733ed Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Thu, 9 Jul 2020 14:54:53 +0200
Subject: [PATCH 088/771] [COFF] Error on unexpected .pdata size

Previously, lld would crash if the .pdata size was not an even multiple
of the expected .pdata entry size. This makes it error gracefully instead.

(We hit this in Chromium due to an assembler problem: https://crbug.com/1101577)

Differential revision: https://reviews.llvm.org/D83479
---
 lld/COFF/Writer.cpp                |  8 +++
 lld/test/COFF/pdata-arm64-bad.yaml | 89 ++++++++++++++++++++++++++++++
 2 files changed, 97 insertions(+)
 create mode 100644 lld/test/COFF/pdata-arm64-bad.yaml

diff --git a/lld/COFF/Writer.cpp b/lld/COFF/Writer.cpp
index ffa0a0006f0ed..3bcc1777f7ac8 100644
--- a/lld/COFF/Writer.cpp
+++ b/lld/COFF/Writer.cpp
@@ -1864,6 +1864,10 @@ void Writer::sortExceptionTable() {
   uint8_t *end = bufAddr(lastPdata) + lastPdata->getSize();
   if (config->machine == AMD64) {
     struct Entry { ulittle32_t begin, end, unwind; };
+    if ((end - begin) % sizeof(Entry) != 0) {
+      fatal("unexpected .pdata size: " + Twine(end - begin) +
+            " is not a multiple of " + Twine(sizeof(Entry)));
+    }
     parallelSort(
         MutableArrayRef<Entry>((Entry *)begin, (Entry *)end),
         [](const Entry &a, const Entry &b) { return a.begin < b.begin; });
@@ -1871,6 +1875,10 @@ void Writer::sortExceptionTable() {
   }
   if (config->machine == ARMNT || config->machine == ARM64) {
     struct Entry { ulittle32_t begin, unwind; };
+    if ((end - begin) % sizeof(Entry) != 0) {
+      fatal("unexpected .pdata size: " + Twine(end - begin) +
+            " is not a multiple of " + Twine(sizeof(Entry)));
+    }
     parallelSort(
         MutableArrayRef<Entry>((Entry *)begin, (Entry *)end),
         [](const Entry &a, const Entry &b) { return a.begin < b.begin; });
diff --git a/lld/test/COFF/pdata-arm64-bad.yaml b/lld/test/COFF/pdata-arm64-bad.yaml
new file mode 100644
index 0000000000000..d6b4967457952
--- /dev/null
+++ b/lld/test/COFF/pdata-arm64-bad.yaml
@@ -0,0 +1,89 @@
+# RUN: yaml2obj < %s > %t.obj
+# RUN: not lld-link /out:%t.exe /entry:func1 /subsystem:console %t.obj 2>&1 | FileCheck %s
+
+# This file is like pdata-arm64.yaml, except that .pdata has been extended with
+# 4 bytes. This can happen due to for example bad assembler input. Check that
+# lld errors gracefully instead of crashing.
+
+# CHECK: unexpected .pdata size: 20 is not a multiple of 8
+
+--- !COFF
+header:
+  Machine:         IMAGE_FILE_MACHINE_ARM64
+  Characteristics: [  ]
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    Alignment:       4
+    SectionData:     ff4300d1f37b00a9f303012a00000094e003132a00000094f37b40a9ff430091c0035fd6f353bea9fe0b00f9f303012af403022a00000094e003132a00000094e003142a00000094fe0b40f9f353c2a8c0035fd6c0035fd6
+    Relocations:
+      - VirtualAddress:  12
+        SymbolName:      func3
+        Type:            IMAGE_REL_ARM64_BRANCH26
+      - VirtualAddress:  20
+        SymbolName:      func3
+        Type:            IMAGE_REL_ARM64_BRANCH26
+      - VirtualAddress:  52
+        SymbolName:      func3
+        Type:            IMAGE_REL_ARM64_BRANCH26
+      - VirtualAddress:  60
+        SymbolName:      func3
+        Type:            IMAGE_REL_ARM64_BRANCH26
+      - VirtualAddress:  68
+        SymbolName:      func3
+        Type:            IMAGE_REL_ARM64_BRANCH26
+  - Name:            .pdata
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+    Alignment:       4
+    SectionData:     0000000031002201000000002500a10000000000
+    Relocations:
+      - VirtualAddress:  0
+        SymbolName:      func2
+        Type:            IMAGE_REL_ARM64_ADDR32NB
+      - VirtualAddress:  8
+        SymbolName:      func1
+        Type:            IMAGE_REL_ARM64_ADDR32NB
+symbols:
+  - Name:            .text
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          57
+      NumberOfRelocations: 0
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          1
+  - Name:            .pdata
+    Value:           0
+    SectionNumber:   2
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_STATIC
+    SectionDefinition:
+      Length:          20
+      NumberOfRelocations: 2
+      NumberOfLinenumbers: 0
+      CheckSum:        0
+      Number:          2
+  - Name:            func1
+    Value:           0
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+  - Name:            func2
+    Value:           36
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_FUNCTION
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+  - Name:            func3
+    Value:           84
+    SectionNumber:   1
+    SimpleType:      IMAGE_SYM_TYPE_NULL
+    ComplexType:     IMAGE_SYM_DTYPE_NULL
+    StorageClass:    IMAGE_SYM_CLASS_EXTERNAL
+...

From fbb6c9df28ccc8c35d39ea56707f71a3b2619071 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Mon, 13 Jul 2020 15:14:06 +0300
Subject: [PATCH 089/771] [LLD][ELF] - Fix the test after llvm-readelf output
 change.

An issue for llvm-readelf was fixed in
https://reviews.llvm.org/rG84a170178c4431b7536c83ff0e5ce80774d08df6

Now it produce no output for this test.
---
 lld/test/ELF/ppc64-rel-so-local-calls.s | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/lld/test/ELF/ppc64-rel-so-local-calls.s b/lld/test/ELF/ppc64-rel-so-local-calls.s
index 2bc89d554a022..3d2e0673c3a74 100644
--- a/lld/test/ELF/ppc64-rel-so-local-calls.s
+++ b/lld/test/ELF/ppc64-rel-so-local-calls.s
@@ -2,15 +2,11 @@
 
 // RUN: llvm-mc -filetype=obj -triple=powerpc64le-unknown-linux %s -o %t.o
 // RUN: ld.lld -shared %t.o -o %t.so
-// RUN: llvm-readelf -dyn-relocations %t.so | FileCheck %s
+// RUN: llvm-readelf -dyn-relocations %t.so | FileCheck %s -allow-empty --implicit-check-not={{.}}
 
 // RUN: llvm-mc -filetype=obj -triple=powerpc64-unknown-linux %s -o %t.o
 // RUN: ld.lld -shared %t.o -o %t.so
-// RUN: llvm-readelf -dyn-relocations %t.so | FileCheck %s
-
-
-// CHECK-NOT: foo
-// CHECK-NOT: bar
+// RUN: llvm-readelf -dyn-relocations %t.so | FileCheck %s -allow-empty --implicit-check-not={{.}}
 
 	.text
 	.abiversion 2

From d96a47c61625f853ec42a151ae3783e30a3943f3 Mon Sep 17 00:00:00 2001
From: Balazs Benics <benicsbalazs@gmail.com>
Date: Mon, 13 Jul 2020 14:29:47 +0200
Subject: [PATCH 090/771] [analyzer] ctu-on-demand-parsing tests: replace linux
 -> system-linux

Differential Revision: https://reviews.llvm.org/D83555
---
 clang/test/Analysis/ctu-on-demand-parsing.c   | 2 +-
 clang/test/Analysis/ctu-on-demand-parsing.cpp | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/clang/test/Analysis/ctu-on-demand-parsing.c b/clang/test/Analysis/ctu-on-demand-parsing.c
index 5adce7f369639..07a72a1046467 100644
--- a/clang/test/Analysis/ctu-on-demand-parsing.c
+++ b/clang/test/Analysis/ctu-on-demand-parsing.c
@@ -19,7 +19,7 @@
 // RUN:   -verify ctu-on-demand-parsing.c
 //
 // FIXME: Path handling should work on all platforms.
-// REQUIRES: linux
+// REQUIRES: system-linux
 
 void clang_analyzer_eval(int);
 
diff --git a/clang/test/Analysis/ctu-on-demand-parsing.cpp b/clang/test/Analysis/ctu-on-demand-parsing.cpp
index 058269662fb3a..e4e998c8f64c3 100644
--- a/clang/test/Analysis/ctu-on-demand-parsing.cpp
+++ b/clang/test/Analysis/ctu-on-demand-parsing.cpp
@@ -30,7 +30,7 @@
 // CHECK: CTU loaded AST file: {{.*}}ctu-chain.cpp
 //
 // FIXME: Path handling should work on all platforms.
-// REQUIRES: linux
+// REQUIRES: system-linux
 
 #include "ctu-hdr.h"
 

From 595270ae39671eed49b75983beeab13de74a342b Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Mon, 13 Jul 2020 11:53:09 +0100
Subject: [PATCH 091/771] [ARM][MVE] Refactor option
 -disable-mve-tail-predication

This refactors option -disable-mve-tail-predication to take different arguments
so that we have 1 option to control tail-predication rather than several
different ones.

This is also a prep step for D82953, in which we want to reject reductions
unless that is requested with this option.

Differential Revision: https://reviews.llvm.org/D83133
---
 .../lib/Target/ARM/ARMTargetTransformInfo.cpp |  6 ++--
 llvm/lib/Target/ARM/ARMTargetTransformInfo.h  | 10 ++++++
 llvm/lib/Target/ARM/MVETailPredication.cpp    | 35 +++++++++++++------
 .../LowOverheadLoops/basic-tail-pred.ll       |  2 +-
 .../LowOverheadLoops/clear-maskedinsts.ll     |  2 +-
 .../cond-vector-reduce-mve-codegen.ll         |  2 +-
 .../LowOverheadLoops/extending-loads.ll       |  2 +-
 .../Thumb2/LowOverheadLoops/fast-fp-loops.ll  |  2 +-
 .../LowOverheadLoops/lsr-profitable-chain.ll  |  2 +-
 .../LowOverheadLoops/mve-tail-data-types.ll   |  2 +-
 .../CodeGen/Thumb2/LowOverheadLoops/nested.ll |  2 +-
 .../Thumb2/LowOverheadLoops/reductions.ll     |  2 +-
 .../LowOverheadLoops/tail-pred-const.ll       |  2 +-
 .../tail-pred-intrinsic-add-sat.ll            |  3 +-
 .../tail-pred-intrinsic-fabs.ll               |  3 +-
 .../tail-pred-intrinsic-round.ll              |  3 +-
 .../tail-pred-intrinsic-sub-sat.ll            |  3 +-
 .../LowOverheadLoops/tail-pred-narrow.ll      |  2 +-
 .../tail-pred-pattern-fail.ll                 |  2 +-
 .../LowOverheadLoops/tail-pred-widen.ll       |  2 +-
 .../Thumb2/LowOverheadLoops/tail-reduce.ll    |  6 ++--
 .../varying-outer-2d-reduction.ll             |  2 +-
 .../LowOverheadLoops/vector-arith-codegen.ll  |  2 +-
 .../vector-reduce-mve-tail.ll                 |  2 +-
 .../Thumb2/LowOverheadLoops/vector-unroll.ll  |  2 +-
 llvm/test/CodeGen/Thumb2/mve-fma-loops.ll     |  2 +-
 .../ARM/prefer-tail-loop-folding.ll           | 16 ++++-----
 .../ARM/tail-folding-counting-down.ll         |  4 +--
 .../LoopVectorize/ARM/tail-loop-folding.ll    |  4 +--
 29 files changed, 79 insertions(+), 50 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 74b1331216a05..575e6171059da 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -45,7 +45,7 @@ static cl::opt<bool> DisableLowOverheadLoops(
   "disable-arm-loloops", cl::Hidden, cl::init(false),
   cl::desc("Disable the generation of low-overhead loops"));
 
-extern cl::opt<bool> DisableTailPredication;
+extern cl::opt<TailPredication::Mode> EnableTailPredication;
 
 extern cl::opt<bool> EnableMaskedGatherScatters;
 
@@ -1458,7 +1458,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
                                              TargetLibraryInfo *TLI,
                                              DominatorTree *DT,
                                              const LoopAccessInfo *LAI) {
-  if (DisableTailPredication)
+  if (!EnableTailPredication)
     return false;
 
   // Creating a predicated vector loop is the first step for generating a
@@ -1501,7 +1501,7 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
 }
 
 bool ARMTTIImpl::emitGetActiveLaneMask() const {
-  if (!ST->hasMVEIntegerOps() || DisableTailPredication)
+  if (!ST->hasMVEIntegerOps() || !EnableTailPredication)
     return false;
 
   // Intrinsic @llvm.get.active.lane.mask is supported.
diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
index 537a546361eeb..7bf6de4bffe07 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.h
@@ -38,6 +38,16 @@ class ScalarEvolution;
 class Type;
 class Value;
 
+namespace TailPredication {
+  enum Mode {
+    Disabled = 0,
+    EnabledNoReductions,
+    Enabled,
+    ForceEnabledNoReductions,
+    ForceEnabled
+  };
+}
+
 class ARMTTIImpl : public BasicTTIImplBase<ARMTTIImpl> {
   using BaseT = BasicTTIImplBase<ARMTTIImpl>;
   using TTI = TargetTransformInfo;
diff --git a/llvm/lib/Target/ARM/MVETailPredication.cpp b/llvm/lib/Target/ARM/MVETailPredication.cpp
index 6583dcb77e1ed..5bf3522ab2e64 100644
--- a/llvm/lib/Target/ARM/MVETailPredication.cpp
+++ b/llvm/lib/Target/ARM/MVETailPredication.cpp
@@ -42,6 +42,7 @@
 
 #include "ARM.h"
 #include "ARMSubtarget.h"
+#include "ARMTargetTransformInfo.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
@@ -64,16 +65,27 @@ using namespace llvm;
 #define DEBUG_TYPE "mve-tail-predication"
 #define DESC "Transform predicated vector loops to use MVE tail predication"
 
-static cl::opt<bool>
-ForceTailPredication("force-mve-tail-predication", cl::Hidden, cl::init(false),
-                     cl::desc("Force MVE tail-predication even if it might be "
-                              "unsafe (e.g. possible overflow in loop "
-                              "counters)"));
+cl::opt<TailPredication::Mode> EnableTailPredication(
+   "tail-predication", cl::desc("MVE tail-predication options"),
+   cl::init(TailPredication::Disabled),
+   cl::values(clEnumValN(TailPredication::Disabled, "disabled",
+                         "Don't tail-predicate loops"),
+              clEnumValN(TailPredication::EnabledNoReductions,
+                         "enabled-no-reductions",
+                         "Enable tail-predication, but not for reduction loops"),
+              clEnumValN(TailPredication::Enabled,
+                         "enabled",
+                         "Enable tail-predication, including reduction loops"),
+              clEnumValN(TailPredication::ForceEnabledNoReductions,
+                         "force-enabled-no-reductions",
+                         "Enable tail-predication, but not for reduction loops, "
+                         "and force this which might be unsafe"),
+              clEnumValN(TailPredication::ForceEnabled,
+                         "force-enabled",
+                         "Enable tail-predication, including reduction loops, "
+                         "and force this which might be unsafe")));
+
 
-cl::opt<bool>
-DisableTailPredication("disable-mve-tail-predication", cl::Hidden,
-                       cl::init(true),
-                       cl::desc("Disable MVE Tail Predication"));
 namespace {
 
 class MVETailPredication : public LoopPass {
@@ -146,7 +158,7 @@ static bool IsMasked(Instruction *I) {
 }
 
 bool MVETailPredication::runOnLoop(Loop *L, LPPassManager&) {
-  if (skipLoop(L) || DisableTailPredication)
+  if (skipLoop(L) || !EnableTailPredication)
     return false;
 
   MaskedInsts.clear();
@@ -346,6 +358,9 @@ static void Cleanup(SetVector<Instruction*> &MaybeDead, Loop *L) {
 //    vector width.
 bool MVETailPredication::IsSafeActiveMask(IntrinsicInst *ActiveLaneMask,
     Value *TripCount, FixedVectorType *VecTy) {
+  bool ForceTailPredication =
+    EnableTailPredication == TailPredication::ForceEnabledNoReductions ||
+    EnableTailPredication == TailPredication::ForceEnabled;
   // 1) Test whether entry to the loop is protected by a conditional
   // BTC + 1 < 0. In other words, if the scalar trip count overflows,
   // becomes negative, we shouldn't enter the loop and creating
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll
index a00af0d6a9ec4..5fced6ad29e2a 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/basic-tail-pred.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
 ; CHECK-LABEL: mul_v16i8
 ; CHECK-NOT: %num.elements = add i32 %trip.count.minus.1, 1
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll
index dab642b94be05..56343a6d65cb5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/clear-maskedinsts.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -mve-tail-predication -disable-mve-tail-predication=false %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -mve-tail-predication -tail-predication=enabled %s -S -o - | FileCheck %s
 
 define hidden i32 @_Z4loopPiPjiS0_i(i32* noalias nocapture readonly %s1, i32* noalias nocapture readonly %s2, i32 %x, i32* noalias nocapture %d, i32 %n) {
 ; CHECK-LABEL: @_Z4loopPiPjiS0_i(
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
index bf6e92a1c8838..e98276e258abd 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/cond-vector-reduce-mve-codegen.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s
 
 define dso_local i32 @vpsel_mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32* noalias nocapture readonly %c, i32 %N) {
 ; CHECK-LABEL: vpsel_mul_reduce_add:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll
index 8d201a23a6898..1fda5c08a0375 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/extending-loads.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
 
 define dso_local arm_aapcs_vfpcc void @sext_i8(i16* noalias nocapture %a, i8* nocapture readonly %b, i32 %N) {
 ; CHECK-LABEL: sext_i8:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
index 8c18159c24c56..d8d6af3b9a8dc 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/fast-fp-loops.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve.fp,+fp-armv8d16sp,+fp16,+fullfp16 -tail-predication=enabled %s -o - | FileCheck %s
 
 define arm_aapcs_vfpcc void @fast_float_mul(float* nocapture %a, float* nocapture readonly %b, float* nocapture readonly %c, i32 %N) {
 ; CHECK-LABEL: fast_float_mul:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll
index bc2c7e084ea7c..fddbfa8b66207 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/lsr-profitable-chain.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -O3 -disable-mve-tail-predication=false -mtriple=thumbv8.1m.main -mattr=+mve,+mve.fp %s -o - | FileCheck %s
+; RUN: llc -O3 -tail-predication=enabled -mtriple=thumbv8.1m.main -mattr=+mve,+mve.fp %s -o - | FileCheck %s
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m-arm-none-eabi"
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
index 6c1273db3f80f..428c703dd341e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/mve-tail-data-types.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
 
 define arm_aapcs_vfpcc i32 @test_acc_scalar_char(i8 zeroext %a, i8* nocapture readonly %b, i32 %N) {
 ; CHECK-LABEL: test_acc_scalar_char:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
index 64702cc3c3155..548ba396bed42 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/nested.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=armv8.1m.main -mattr=+mve -S -mve-tail-predication -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: opt -mtriple=armv8.1m.main -mattr=+mve -S -mve-tail-predication -tail-predication=enabled %s -o - | FileCheck %s
 
 define void @mat_vec_sext_i16(i16** nocapture readonly %A, i16* nocapture readonly %B, i32* noalias nocapture %C, i32 %N) {
 ; CHECK-LABEL: @mat_vec_sext_i16(
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
index 12c3ca0525f21..66601dd66cb29 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/reductions.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
 
 define dso_local arm_aapcs_vfpcc zeroext i8 @one_loop_add_add_v16i8(i8* nocapture readonly %a, i8* nocapture readonly %b, i32 %N) local_unnamed_addr {
 ; CHECK-LABEL: one_loop_add_add_v16i8:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll
index 13d750310a56c..065e534dd55bd 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-const.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s
 
 define dso_local void @foo(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32* noalias nocapture readnone %D, i32 %N) local_unnamed_addr #0 {
 ; CHECK-LABEL: @foo(
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll
index d405657f4d17e..e9facfda61335 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-add-sat.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+
 define arm_aapcs_vfpcc void @uadd_sat(i16* noalias nocapture readonly %pSrcA, i16* noalias nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %blockSize) {
 ; CHECK-LABEL: uadd_sat:
 ; CHECK:       @ %bb.0: @ %entry
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll
index d3247a3fd28e7..87f23adf7ffa5 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-fabs.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+
 define arm_aapcs_vfpcc void @fabs(float* noalias nocapture readonly %pSrcA, float* noalias nocapture %pDst, i32 %blockSize) {
 ; CHECK-LABEL: fabs:
 ; CHECK:       @ %bb.0: @ %entry
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
index 962e9df3dc1e9..e72e81da7e7c1 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-round.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+
 define arm_aapcs_vfpcc void @round(float* noalias nocapture readonly %pSrcA, float* noalias nocapture %pDst, i32 %n) #0 {
 ; CHECK-LABEL: round:
 ; CHECK:       @ %bb.0: @ %entry
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
index 790311a54aa1d..3c7ae4dc734ad 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-intrinsic-sub-sat.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -disable-mve-tail-predication=false -o - %s | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+
 define arm_aapcs_vfpcc void @usub_sat(i16* noalias nocapture readonly %pSrcA, i16* noalias nocapture readonly %pSrcB, i16* noalias nocapture %pDst, i32 %blockSize) {
 ; CHECK-LABEL: usub_sat:
 ; CHECK:       @ %bb.0: @ %entry
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll
index 8c1534be77db0..52cd8fdc6d798 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-narrow.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
 ; TODO: We should be able to generate a vctp for the loads.
 ; CHECK-LABEL: trunc_v4i32_v4i16
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll
index 1926bbeeaa70f..8e46e3385385e 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-pattern-fail.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
 ; The following functions should all fail to become tail-predicated.
 ; CHECK-NOT: call i32 @llvm.arm.vctp
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll
index 3a9d3d1171266..b40b36ced4af2 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-pred-widen.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
 ; CHECK-LABEL: expand_v8i16_v8i32
 ; CHECK-NOT: call i32 @llvm.arm.mve.vctp
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll
index 5c753134744d6..f3055bc8a575f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/tail-reduce.ll
@@ -1,6 +1,6 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false \
-; RUN:    -force-mve-tail-predication -mattr=+mve %s -S -o - | FileCheck %s --check-prefix=FORCE
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=force-enabled \
+; RUN:    -mattr=+mve %s -S -o - | FileCheck %s --check-prefix=FORCE
 
 ; CHECK-LABEL: reduction_i32
 ; CHECK: phi i32 [ 0, %vector.ph ]
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
index f1242db364851..4db17c074643f 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/varying-outer-2d-reduction.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main -mattr=+mve -tail-predication=enabled %s -o - | FileCheck %s
 
 define dso_local void @varying_outer_2d_reduction(i16* nocapture readonly %Input, i16* nocapture %Output, i16 signext %Size, i16 signext %N, i16 signext %Scale) local_unnamed_addr {
 ; CHECK-LABEL: varying_outer_2d_reduction:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
index 26a570ac4c29b..615334300c283 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-arith-codegen.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -disable-mve-tail-predication=false --verify-machineinstrs %s -o - | FileCheck %s
+; RUN: llc -mtriple=armv8.1m.main -mattr=+mve -tail-predication=enabled --verify-machineinstrs %s -o - | FileCheck %s
 
 define dso_local i32 @mul_reduce_add(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
 ; CHECK-LABEL: mul_reduce_add:
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
index aaeae75e072f7..e10cc3153b9c9 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-reduce-mve-tail.ll
@@ -1,5 +1,5 @@
 
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve %s -S -o - | FileCheck %s
 
 ; CHECK-LABEL: vec_mul_reduce_add
 
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll
index 19d9c89dabca8..f1a35af8b57ed 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/vector-unroll.ll
@@ -1,4 +1,4 @@
-; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -disable-mve-tail-predication=false -mattr=+mve,+lob %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main -mve-tail-predication -tail-predication=enabled -mattr=+mve,+lob %s -S -o - | FileCheck %s
 
 ; TODO: The unrolled pattern is preventing the transform
 ; CHECK-LABEL: mul_v16i8_unroll
diff --git a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll
index 0ba224415b67e..306f31be27f96 100644
--- a/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-fma-loops.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -disable-mve-tail-predication=false %s -o - | FileCheck %s
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled %s -o - | FileCheck %s
 
 define arm_aapcs_vfpcc void @fmas1(float* nocapture readonly %x, float* nocapture readonly %y, float* noalias nocapture %z, float %a, i32 %n) {
 ; CHECK-LABEL: fmas1:
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
index ff3e03c7bad42..ac6bb56ff5f80 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
@@ -1,19 +1,19 @@
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize -S < %s | \
+; RUN:   -tail-predication=enabled -loop-vectorize -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,PREFER-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=-mve \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=false -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve \
-; RUN:   -disable-mve-tail-predication=true -loop-vectorize \
+; RUN:   -tail-predication=disabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
 
@@ -21,24 +21,24 @@
 ; 'isHardwareLoopProfitable' return false, so that we test avoiding folding for
 ; these cases.
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve,-lob \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,PREFER-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \
 ; RUN:   -prefer-predicate-over-epilog=false \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,NO-FOLDING
 
 ; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \
 ; RUN:   -prefer-predicate-over-epilog=true \
-; RUN:   -disable-mve-tail-predication=false -loop-vectorize \
+; RUN:   -tail-predication=enabled -loop-vectorize \
 ; RUN:   -enable-arm-maskedldst=true -S < %s | \
 ; RUN:   FileCheck %s -check-prefixes=CHECK,FOLDING-OPT
 
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
index 23ecf5b6015cb..8a327f163b121 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-counting-down.ll
@@ -1,6 +1,6 @@
 ; RUN: opt < %s -loop-vectorize -S | FileCheck %s --check-prefixes=COMMON,DEFAULT
-; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false  -prefer-predicate-over-epilog -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-PREFER
-; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-ENABLE-TP
+; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-PREFER
+; RUN: opt < %s -loop-vectorize -tail-predication=enabled -S | FileCheck %s --check-prefixes=COMMON,CHECK-TF,CHECK-ENABLE-TP
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-unknown-eabihf"
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
index f3e1af6f14ffc..eda3c115c0f6b 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
@@ -1,7 +1,7 @@
-; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -S | \
+; RUN: opt < %s -loop-vectorize -tail-predication=enabled -S | \
 ; RUN:  FileCheck %s -check-prefixes=COMMON,CHECK
 
-; RUN: opt < %s -loop-vectorize -disable-mve-tail-predication=false -prefer-predicate-over-epilog -S | \
+; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S | \
 ; RUN:   FileCheck -check-prefixes=COMMON,PREDFLAG %s
 
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"

From 591a3af5c7acc05617c0eacf6ae4f76bd8a9a6ce Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 13 Jul 2020 08:51:09 -0400
Subject: [PATCH 092/771] [DAGCombiner] allow load/store merging if pairs can
 be rotated into place

This carves out an exception for a pair of consecutive loads that are
reversed from the consecutive order of a pair of stores. All of the
existing profitability/legality checks for the memops remain between
the 2 altered hunks of code.

This should give us the same x86 base-case asm that gcc gets in
PR41098 and PR44895:i
http://bugs.llvm.org/PR41098
http://bugs.llvm.org/PR44895

I think we are missing a potential subsequent conversion to use "movbe"
if the target supports that. That might be similar to what AArch64
would use to get "rev16".

Differential Revision:
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 39 +++++++++---
 .../CodeGen/AArch64/merge-store-dependency.ll | 22 ++++---
 llvm/test/CodeGen/X86/stores-merging.ll       | 61 ++++++++-----------
 3 files changed, 70 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 42e6e12f3f027..dd601bd5ca7e8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16541,14 +16541,27 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
   }
 
   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
-    // If we have load/store pair instructions and we only have two values,
-    // don't bother merging.
     Align RequiredAlignment;
-    if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
-        StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
-      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
-      LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
-      break;
+    bool NeedRotate = false;
+    if (LoadNodes.size() == 2) {
+      // If we have load/store pair instructions and we only have two values,
+      // don't bother merging.
+      if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+          StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
+        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
+        break;
+      }
+      // If the loads are reversed, see if we can rotate the halves into place.
+      int64_t Offset0 = LoadNodes[0].OffsetFromBase;
+      int64_t Offset1 = LoadNodes[1].OffsetFromBase;
+      EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
+      if (Offset0 - Offset1 == ElementSizeBytes &&
+          (hasOperation(ISD::ROTL, PairVT) ||
+           hasOperation(ISD::ROTR, PairVT))) {
+        std::swap(LoadNodes[0], LoadNodes[1]);
+        NeedRotate = true;
+      }
     }
     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
@@ -16713,8 +16726,18 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
       NewLoad = DAG.getLoad(
           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
+      SDValue StoreOp = NewLoad;
+      if (NeedRotate) {
+        unsigned LoadWidth = ElementSizeBytes * 8 * 2;
+        assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
+               "Unexpected type for rotate-able load pair");
+        SDValue RotAmt =
+            DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
+        // Target can convert to the identical ROTR if it does not have ROTL.
+        StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
+      }
       NewStore = DAG.getStore(
-          NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+          NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
     } else { // This must be the truncstore/extload case
       EVT ExtendedTy =
diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
index 77b7012d2ed1e..6850846fec068 100644
--- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
+++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -95,6 +95,8 @@ exit:
   ret void
 }
 
+; TODO: rev16?
+
 define void @rotate16_in_place(i8* %p) {
 ; A53-LABEL: rotate16_in_place:
 ; A53:       // %bb.0:
@@ -112,6 +114,8 @@ define void @rotate16_in_place(i8* %p) {
   ret void
 }
 
+; TODO: rev16?
+
 define void @rotate16(i8* %p, i8* %q) {
 ; A53-LABEL: rotate16:
 ; A53:       // %bb.0:
@@ -134,10 +138,9 @@ define void @rotate16(i8* %p, i8* %q) {
 define void @rotate32_in_place(i16* %p) {
 ; A53-LABEL: rotate32_in_place:
 ; A53:       // %bb.0:
-; A53-NEXT:    ldrh w8, [x0, #2]
-; A53-NEXT:    ldrh w9, [x0]
-; A53-NEXT:    strh w8, [x0]
-; A53-NEXT:    strh w9, [x0, #2]
+; A53-NEXT:    ldr w8, [x0]
+; A53-NEXT:    ror w8, w8, #16
+; A53-NEXT:    str w8, [x0]
 ; A53-NEXT:    ret
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -151,10 +154,9 @@ define void @rotate32_in_place(i16* %p) {
 define void @rotate32(i16* %p) {
 ; A53-LABEL: rotate32:
 ; A53:       // %bb.0:
-; A53-NEXT:    ldrh w8, [x0, #2]
-; A53-NEXT:    ldrh w9, [x0]
-; A53-NEXT:    strh w8, [x0, #84]
-; A53-NEXT:    strh w9, [x0, #86]
+; A53-NEXT:    ldr w8, [x0]
+; A53-NEXT:    ror w8, w8, #16
+; A53-NEXT:    str w8, [x0, #84]
 ; A53-NEXT:    ret
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -167,6 +169,8 @@ define void @rotate32(i16* %p) {
   ret void
 }
 
+; Prefer paired memops over rotate.
+
 define void @rotate64_in_place(i32* %p) {
 ; A53-LABEL: rotate64_in_place:
 ; A53:       // %bb.0:
@@ -182,6 +186,8 @@ define void @rotate64_in_place(i32* %p) {
   ret void
 }
 
+; Prefer paired memops over rotate.
+
 define void @rotate64(i32* %p) {
 ; A53-LABEL: rotate64:
 ; A53:       // %bb.0:
diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll
index 768684067f32f..60fd01eac0960 100644
--- a/llvm/test/CodeGen/X86/stores-merging.ll
+++ b/llvm/test/CodeGen/X86/stores-merging.ll
@@ -246,10 +246,7 @@ define void @pr43446_1(i8* %a) {
 define void @rotate16_in_place(i8* %p) {
 ; CHECK-LABEL: rotate16_in_place:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movb (%rdi), %al
-; CHECK-NEXT:    movb 1(%rdi), %cl
-; CHECK-NEXT:    movb %cl, (%rdi)
-; CHECK-NEXT:    movb %al, 1(%rdi)
+; CHECK-NEXT:    rolw $8, (%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i8, i8* %p, i64 0
   %p1 = getelementptr i8, i8* %p, i64 1
@@ -263,10 +260,9 @@ define void @rotate16_in_place(i8* %p) {
 define void @rotate16(i8* %p, i8* %q) {
 ; CHECK-LABEL: rotate16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movb (%rdi), %al
-; CHECK-NEXT:    movb 1(%rdi), %cl
-; CHECK-NEXT:    movb %cl, (%rsi)
-; CHECK-NEXT:    movb %al, 1(%rsi)
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    rolw $8, %ax
+; CHECK-NEXT:    movw %ax, (%rsi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i8, i8* %p, i64 0
   %p1 = getelementptr i8, i8* %p, i64 1
@@ -282,10 +278,7 @@ define void @rotate16(i8* %p, i8* %q) {
 define void @rotate32_in_place(i16* %p) {
 ; CHECK-LABEL: rotate32_in_place:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzwl (%rdi), %eax
-; CHECK-NEXT:    movzwl 2(%rdi), %ecx
-; CHECK-NEXT:    movw %cx, (%rdi)
-; CHECK-NEXT:    movw %ax, 2(%rdi)
+; CHECK-NEXT:    roll $16, (%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -299,10 +292,9 @@ define void @rotate32_in_place(i16* %p) {
 define void @rotate32(i16* %p) {
 ; CHECK-LABEL: rotate32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzwl (%rdi), %eax
-; CHECK-NEXT:    movzwl 2(%rdi), %ecx
-; CHECK-NEXT:    movw %cx, 84(%rdi)
-; CHECK-NEXT:    movw %ax, 86(%rdi)
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    roll $16, %eax
+; CHECK-NEXT:    movl %eax, 84(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -318,10 +310,7 @@ define void @rotate32(i16* %p) {
 define void @rotate64_in_place(i32* %p) {
 ; CHECK-LABEL: rotate64_in_place:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    movl 4(%rdi), %ecx
-; CHECK-NEXT:    movl %ecx, (%rdi)
-; CHECK-NEXT:    movl %eax, 4(%rdi)
+; CHECK-NEXT:    rolq $32, (%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i32, i32* %p, i64 0
   %p1 = getelementptr i32, i32* %p, i64 1
@@ -335,10 +324,9 @@ define void @rotate64_in_place(i32* %p) {
 define void @rotate64(i32* %p) {
 ; CHECK-LABEL: rotate64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    movl 4(%rdi), %ecx
-; CHECK-NEXT:    movl %ecx, 8(%rdi)
-; CHECK-NEXT:    movl %eax, 12(%rdi)
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    rolq $32, %rax
+; CHECK-NEXT:    movq %rax, 8(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i32, i32* %p, i64 0
   %p1 = getelementptr i32, i32* %p, i64 1
@@ -354,10 +342,9 @@ define void @rotate64(i32* %p) {
 define void @rotate64_iterate(i16* %p) {
 ; CHECK-LABEL: rotate64_iterate:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    movl 4(%rdi), %ecx
-; CHECK-NEXT:    movl %ecx, 84(%rdi)
-; CHECK-NEXT:    movl %eax, 88(%rdi)
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    rolq $32, %rax
+; CHECK-NEXT:    movq %rax, 84(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -378,6 +365,8 @@ define void @rotate64_iterate(i16* %p) {
   ret void
 }
 
+; TODO: recognize this as 2 rotates?
+
 define void @rotate32_consecutive(i16* %p) {
 ; CHECK-LABEL: rotate32_consecutive:
 ; CHECK:       # %bb.0:
@@ -409,17 +398,17 @@ define void @rotate32_consecutive(i16* %p) {
   ret void
 }
 
+; Same as above, but now the stores are not all consecutive.
+
 define void @rotate32_twice(i16* %p) {
 ; CHECK-LABEL: rotate32_twice:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzwl (%rdi), %eax
-; CHECK-NEXT:    movzwl 2(%rdi), %ecx
-; CHECK-NEXT:    movzwl 4(%rdi), %edx
-; CHECK-NEXT:    movzwl 6(%rdi), %esi
-; CHECK-NEXT:    movw %cx, 84(%rdi)
-; CHECK-NEXT:    movw %ax, 86(%rdi)
-; CHECK-NEXT:    movw %si, 108(%rdi)
-; CHECK-NEXT:    movw %dx, 110(%rdi)
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    movl 4(%rdi), %ecx
+; CHECK-NEXT:    roll $16, %eax
+; CHECK-NEXT:    roll $16, %ecx
+; CHECK-NEXT:    movl %eax, 84(%rdi)
+; CHECK-NEXT:    movl %ecx, 108(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1

From f1bbf3acb42a7447c170b8248e310d8a61443377 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 13 Jul 2020 08:55:29 -0400
Subject: [PATCH 093/771] Revert "[DAGCombiner] allow load/store merging if
 pairs can be rotated into place"

This reverts commit 591a3af5c7acc05617c0eacf6ae4f76bd8a9a6ce.
The commit message was cut off and failed to include the review citation.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 39 +++---------
 .../CodeGen/AArch64/merge-store-dependency.ll | 22 +++----
 llvm/test/CodeGen/X86/stores-merging.ll       | 61 +++++++++++--------
 3 files changed, 52 insertions(+), 70 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dd601bd5ca7e8..42e6e12f3f027 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16541,27 +16541,14 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
   }
 
   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
+    // If we have load/store pair instructions and we only have two values,
+    // don't bother merging.
     Align RequiredAlignment;
-    bool NeedRotate = false;
-    if (LoadNodes.size() == 2) {
-      // If we have load/store pair instructions and we only have two values,
-      // don't bother merging.
-      if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
-          StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
-        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
-        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
-        break;
-      }
-      // If the loads are reversed, see if we can rotate the halves into place.
-      int64_t Offset0 = LoadNodes[0].OffsetFromBase;
-      int64_t Offset1 = LoadNodes[1].OffsetFromBase;
-      EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
-      if (Offset0 - Offset1 == ElementSizeBytes &&
-          (hasOperation(ISD::ROTL, PairVT) ||
-           hasOperation(ISD::ROTR, PairVT))) {
-        std::swap(LoadNodes[0], LoadNodes[1]);
-        NeedRotate = true;
-      }
+    if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+        StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
+      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+      LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
+      break;
     }
     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
@@ -16726,18 +16713,8 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
       NewLoad = DAG.getLoad(
           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
-      SDValue StoreOp = NewLoad;
-      if (NeedRotate) {
-        unsigned LoadWidth = ElementSizeBytes * 8 * 2;
-        assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
-               "Unexpected type for rotate-able load pair");
-        SDValue RotAmt =
-            DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
-        // Target can convert to the identical ROTR if it does not have ROTL.
-        StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
-      }
       NewStore = DAG.getStore(
-          NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
+          NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
     } else { // This must be the truncstore/extload case
       EVT ExtendedTy =
diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
index 6850846fec068..77b7012d2ed1e 100644
--- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
+++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -95,8 +95,6 @@ exit:
   ret void
 }
 
-; TODO: rev16?
-
 define void @rotate16_in_place(i8* %p) {
 ; A53-LABEL: rotate16_in_place:
 ; A53:       // %bb.0:
@@ -114,8 +112,6 @@ define void @rotate16_in_place(i8* %p) {
   ret void
 }
 
-; TODO: rev16?
-
 define void @rotate16(i8* %p, i8* %q) {
 ; A53-LABEL: rotate16:
 ; A53:       // %bb.0:
@@ -138,9 +134,10 @@ define void @rotate16(i8* %p, i8* %q) {
 define void @rotate32_in_place(i16* %p) {
 ; A53-LABEL: rotate32_in_place:
 ; A53:       // %bb.0:
-; A53-NEXT:    ldr w8, [x0]
-; A53-NEXT:    ror w8, w8, #16
-; A53-NEXT:    str w8, [x0]
+; A53-NEXT:    ldrh w8, [x0, #2]
+; A53-NEXT:    ldrh w9, [x0]
+; A53-NEXT:    strh w8, [x0]
+; A53-NEXT:    strh w9, [x0, #2]
 ; A53-NEXT:    ret
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -154,9 +151,10 @@ define void @rotate32_in_place(i16* %p) {
 define void @rotate32(i16* %p) {
 ; A53-LABEL: rotate32:
 ; A53:       // %bb.0:
-; A53-NEXT:    ldr w8, [x0]
-; A53-NEXT:    ror w8, w8, #16
-; A53-NEXT:    str w8, [x0, #84]
+; A53-NEXT:    ldrh w8, [x0, #2]
+; A53-NEXT:    ldrh w9, [x0]
+; A53-NEXT:    strh w8, [x0, #84]
+; A53-NEXT:    strh w9, [x0, #86]
 ; A53-NEXT:    ret
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -169,8 +167,6 @@ define void @rotate32(i16* %p) {
   ret void
 }
 
-; Prefer paired memops over rotate.
-
 define void @rotate64_in_place(i32* %p) {
 ; A53-LABEL: rotate64_in_place:
 ; A53:       // %bb.0:
@@ -186,8 +182,6 @@ define void @rotate64_in_place(i32* %p) {
   ret void
 }
 
-; Prefer paired memops over rotate.
-
 define void @rotate64(i32* %p) {
 ; A53-LABEL: rotate64:
 ; A53:       // %bb.0:
diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll
index 60fd01eac0960..768684067f32f 100644
--- a/llvm/test/CodeGen/X86/stores-merging.ll
+++ b/llvm/test/CodeGen/X86/stores-merging.ll
@@ -246,7 +246,10 @@ define void @pr43446_1(i8* %a) {
 define void @rotate16_in_place(i8* %p) {
 ; CHECK-LABEL: rotate16_in_place:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rolw $8, (%rdi)
+; CHECK-NEXT:    movb (%rdi), %al
+; CHECK-NEXT:    movb 1(%rdi), %cl
+; CHECK-NEXT:    movb %cl, (%rdi)
+; CHECK-NEXT:    movb %al, 1(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i8, i8* %p, i64 0
   %p1 = getelementptr i8, i8* %p, i64 1
@@ -260,9 +263,10 @@ define void @rotate16_in_place(i8* %p) {
 define void @rotate16(i8* %p, i8* %q) {
 ; CHECK-LABEL: rotate16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzwl (%rdi), %eax
-; CHECK-NEXT:    rolw $8, %ax
-; CHECK-NEXT:    movw %ax, (%rsi)
+; CHECK-NEXT:    movb (%rdi), %al
+; CHECK-NEXT:    movb 1(%rdi), %cl
+; CHECK-NEXT:    movb %cl, (%rsi)
+; CHECK-NEXT:    movb %al, 1(%rsi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i8, i8* %p, i64 0
   %p1 = getelementptr i8, i8* %p, i64 1
@@ -278,7 +282,10 @@ define void @rotate16(i8* %p, i8* %q) {
 define void @rotate32_in_place(i16* %p) {
 ; CHECK-LABEL: rotate32_in_place:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    roll $16, (%rdi)
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    movzwl 2(%rdi), %ecx
+; CHECK-NEXT:    movw %cx, (%rdi)
+; CHECK-NEXT:    movw %ax, 2(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -292,9 +299,10 @@ define void @rotate32_in_place(i16* %p) {
 define void @rotate32(i16* %p) {
 ; CHECK-LABEL: rotate32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    roll $16, %eax
-; CHECK-NEXT:    movl %eax, 84(%rdi)
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    movzwl 2(%rdi), %ecx
+; CHECK-NEXT:    movw %cx, 84(%rdi)
+; CHECK-NEXT:    movw %ax, 86(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -310,7 +318,10 @@ define void @rotate32(i16* %p) {
 define void @rotate64_in_place(i32* %p) {
 ; CHECK-LABEL: rotate64_in_place:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    rolq $32, (%rdi)
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    movl 4(%rdi), %ecx
+; CHECK-NEXT:    movl %ecx, (%rdi)
+; CHECK-NEXT:    movl %eax, 4(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i32, i32* %p, i64 0
   %p1 = getelementptr i32, i32* %p, i64 1
@@ -324,9 +335,10 @@ define void @rotate64_in_place(i32* %p) {
 define void @rotate64(i32* %p) {
 ; CHECK-LABEL: rotate64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    rolq $32, %rax
-; CHECK-NEXT:    movq %rax, 8(%rdi)
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    movl 4(%rdi), %ecx
+; CHECK-NEXT:    movl %ecx, 8(%rdi)
+; CHECK-NEXT:    movl %eax, 12(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i32, i32* %p, i64 0
   %p1 = getelementptr i32, i32* %p, i64 1
@@ -342,9 +354,10 @@ define void @rotate64(i32* %p) {
 define void @rotate64_iterate(i16* %p) {
 ; CHECK-LABEL: rotate64_iterate:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    rolq $32, %rax
-; CHECK-NEXT:    movq %rax, 84(%rdi)
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    movl 4(%rdi), %ecx
+; CHECK-NEXT:    movl %ecx, 84(%rdi)
+; CHECK-NEXT:    movl %eax, 88(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -365,8 +378,6 @@ define void @rotate64_iterate(i16* %p) {
   ret void
 }
 
-; TODO: recognize this as 2 rotates?
-
 define void @rotate32_consecutive(i16* %p) {
 ; CHECK-LABEL: rotate32_consecutive:
 ; CHECK:       # %bb.0:
@@ -398,17 +409,17 @@ define void @rotate32_consecutive(i16* %p) {
   ret void
 }
 
-; Same as above, but now the stores are not all consecutive.
-
 define void @rotate32_twice(i16* %p) {
 ; CHECK-LABEL: rotate32_twice:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    movl 4(%rdi), %ecx
-; CHECK-NEXT:    roll $16, %eax
-; CHECK-NEXT:    roll $16, %ecx
-; CHECK-NEXT:    movl %eax, 84(%rdi)
-; CHECK-NEXT:    movl %ecx, 108(%rdi)
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    movzwl 2(%rdi), %ecx
+; CHECK-NEXT:    movzwl 4(%rdi), %edx
+; CHECK-NEXT:    movzwl 6(%rdi), %esi
+; CHECK-NEXT:    movw %cx, 84(%rdi)
+; CHECK-NEXT:    movw %ax, 86(%rdi)
+; CHECK-NEXT:    movw %si, 108(%rdi)
+; CHECK-NEXT:    movw %dx, 110(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1

From 2df46a574387663717a352eebad017979d3b5ef4 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 13 Jul 2020 08:57:00 -0400
Subject: [PATCH 094/771] [DAGCombiner] allow load/store merging if pairs can
 be rotated into place

This carves out an exception for a pair of consecutive loads that are
reversed from the consecutive order of a pair of stores. All of the
existing profitability/legality checks for the memops remain between
the 2 altered hunks of code.

This should give us the same x86 base-case asm that gcc gets in
PR41098 and PR44895:
http://bugs.llvm.org/PR41098
http://bugs.llvm.org/PR44895

I think we are missing a potential subsequent conversion to use "movbe"
if the target supports that. That might be similar to what AArch64
would use to get "rev16".

Differential Revision: https://reviews.llvm.org/D83567
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 39 +++++++++---
 .../CodeGen/AArch64/merge-store-dependency.ll | 22 ++++---
 llvm/test/CodeGen/X86/stores-merging.ll       | 61 ++++++++-----------
 3 files changed, 70 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 42e6e12f3f027..dd601bd5ca7e8 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -16541,14 +16541,27 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
   }
 
   while (NumConsecutiveStores >= 2 && LoadNodes.size() >= 2) {
-    // If we have load/store pair instructions and we only have two values,
-    // don't bother merging.
     Align RequiredAlignment;
-    if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
-        StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
-      StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
-      LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
-      break;
+    bool NeedRotate = false;
+    if (LoadNodes.size() == 2) {
+      // If we have load/store pair instructions and we only have two values,
+      // don't bother merging.
+      if (TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
+          StoreNodes[0].MemNode->getAlign() >= RequiredAlignment) {
+        StoreNodes.erase(StoreNodes.begin(), StoreNodes.begin() + 2);
+        LoadNodes.erase(LoadNodes.begin(), LoadNodes.begin() + 2);
+        break;
+      }
+      // If the loads are reversed, see if we can rotate the halves into place.
+      int64_t Offset0 = LoadNodes[0].OffsetFromBase;
+      int64_t Offset1 = LoadNodes[1].OffsetFromBase;
+      EVT PairVT = EVT::getIntegerVT(Context, ElementSizeBytes * 8 * 2);
+      if (Offset0 - Offset1 == ElementSizeBytes &&
+          (hasOperation(ISD::ROTL, PairVT) ||
+           hasOperation(ISD::ROTR, PairVT))) {
+        std::swap(LoadNodes[0], LoadNodes[1]);
+        NeedRotate = true;
+      }
     }
     LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
     unsigned FirstStoreAS = FirstInChain->getAddressSpace();
@@ -16713,8 +16726,18 @@ bool DAGCombiner::tryStoreMergeOfLoads(SmallVectorImpl<MemOpLink> &StoreNodes,
       NewLoad = DAG.getLoad(
           JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
           FirstLoad->getPointerInfo(), FirstLoadAlign, LdMMOFlags);
+      SDValue StoreOp = NewLoad;
+      if (NeedRotate) {
+        unsigned LoadWidth = ElementSizeBytes * 8 * 2;
+        assert(JointMemOpVT == EVT::getIntegerVT(Context, LoadWidth) &&
+               "Unexpected type for rotate-able load pair");
+        SDValue RotAmt =
+            DAG.getShiftAmountConstant(LoadWidth / 2, JointMemOpVT, LoadDL);
+        // Target can convert to the identical ROTR if it does not have ROTL.
+        StoreOp = DAG.getNode(ISD::ROTL, LoadDL, JointMemOpVT, NewLoad, RotAmt);
+      }
       NewStore = DAG.getStore(
-          NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
+          NewStoreChain, StoreDL, StoreOp, FirstInChain->getBasePtr(),
           FirstInChain->getPointerInfo(), FirstStoreAlign, StMMOFlags);
     } else { // This must be the truncstore/extload case
       EVT ExtendedTy =
diff --git a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
index 77b7012d2ed1e..6850846fec068 100644
--- a/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
+++ b/llvm/test/CodeGen/AArch64/merge-store-dependency.ll
@@ -95,6 +95,8 @@ exit:
   ret void
 }
 
+; TODO: rev16?
+
 define void @rotate16_in_place(i8* %p) {
 ; A53-LABEL: rotate16_in_place:
 ; A53:       // %bb.0:
@@ -112,6 +114,8 @@ define void @rotate16_in_place(i8* %p) {
   ret void
 }
 
+; TODO: rev16?
+
 define void @rotate16(i8* %p, i8* %q) {
 ; A53-LABEL: rotate16:
 ; A53:       // %bb.0:
@@ -134,10 +138,9 @@ define void @rotate16(i8* %p, i8* %q) {
 define void @rotate32_in_place(i16* %p) {
 ; A53-LABEL: rotate32_in_place:
 ; A53:       // %bb.0:
-; A53-NEXT:    ldrh w8, [x0, #2]
-; A53-NEXT:    ldrh w9, [x0]
-; A53-NEXT:    strh w8, [x0]
-; A53-NEXT:    strh w9, [x0, #2]
+; A53-NEXT:    ldr w8, [x0]
+; A53-NEXT:    ror w8, w8, #16
+; A53-NEXT:    str w8, [x0]
 ; A53-NEXT:    ret
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -151,10 +154,9 @@ define void @rotate32_in_place(i16* %p) {
 define void @rotate32(i16* %p) {
 ; A53-LABEL: rotate32:
 ; A53:       // %bb.0:
-; A53-NEXT:    ldrh w8, [x0, #2]
-; A53-NEXT:    ldrh w9, [x0]
-; A53-NEXT:    strh w8, [x0, #84]
-; A53-NEXT:    strh w9, [x0, #86]
+; A53-NEXT:    ldr w8, [x0]
+; A53-NEXT:    ror w8, w8, #16
+; A53-NEXT:    str w8, [x0, #84]
 ; A53-NEXT:    ret
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -167,6 +169,8 @@ define void @rotate32(i16* %p) {
   ret void
 }
 
+; Prefer paired memops over rotate.
+
 define void @rotate64_in_place(i32* %p) {
 ; A53-LABEL: rotate64_in_place:
 ; A53:       // %bb.0:
@@ -182,6 +186,8 @@ define void @rotate64_in_place(i32* %p) {
   ret void
 }
 
+; Prefer paired memops over rotate.
+
 define void @rotate64(i32* %p) {
 ; A53-LABEL: rotate64:
 ; A53:       // %bb.0:
diff --git a/llvm/test/CodeGen/X86/stores-merging.ll b/llvm/test/CodeGen/X86/stores-merging.ll
index 768684067f32f..60fd01eac0960 100644
--- a/llvm/test/CodeGen/X86/stores-merging.ll
+++ b/llvm/test/CodeGen/X86/stores-merging.ll
@@ -246,10 +246,7 @@ define void @pr43446_1(i8* %a) {
 define void @rotate16_in_place(i8* %p) {
 ; CHECK-LABEL: rotate16_in_place:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movb (%rdi), %al
-; CHECK-NEXT:    movb 1(%rdi), %cl
-; CHECK-NEXT:    movb %cl, (%rdi)
-; CHECK-NEXT:    movb %al, 1(%rdi)
+; CHECK-NEXT:    rolw $8, (%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i8, i8* %p, i64 0
   %p1 = getelementptr i8, i8* %p, i64 1
@@ -263,10 +260,9 @@ define void @rotate16_in_place(i8* %p) {
 define void @rotate16(i8* %p, i8* %q) {
 ; CHECK-LABEL: rotate16:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movb (%rdi), %al
-; CHECK-NEXT:    movb 1(%rdi), %cl
-; CHECK-NEXT:    movb %cl, (%rsi)
-; CHECK-NEXT:    movb %al, 1(%rsi)
+; CHECK-NEXT:    movzwl (%rdi), %eax
+; CHECK-NEXT:    rolw $8, %ax
+; CHECK-NEXT:    movw %ax, (%rsi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i8, i8* %p, i64 0
   %p1 = getelementptr i8, i8* %p, i64 1
@@ -282,10 +278,7 @@ define void @rotate16(i8* %p, i8* %q) {
 define void @rotate32_in_place(i16* %p) {
 ; CHECK-LABEL: rotate32_in_place:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzwl (%rdi), %eax
-; CHECK-NEXT:    movzwl 2(%rdi), %ecx
-; CHECK-NEXT:    movw %cx, (%rdi)
-; CHECK-NEXT:    movw %ax, 2(%rdi)
+; CHECK-NEXT:    roll $16, (%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -299,10 +292,9 @@ define void @rotate32_in_place(i16* %p) {
 define void @rotate32(i16* %p) {
 ; CHECK-LABEL: rotate32:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzwl (%rdi), %eax
-; CHECK-NEXT:    movzwl 2(%rdi), %ecx
-; CHECK-NEXT:    movw %cx, 84(%rdi)
-; CHECK-NEXT:    movw %ax, 86(%rdi)
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    roll $16, %eax
+; CHECK-NEXT:    movl %eax, 84(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -318,10 +310,7 @@ define void @rotate32(i16* %p) {
 define void @rotate64_in_place(i32* %p) {
 ; CHECK-LABEL: rotate64_in_place:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    movl 4(%rdi), %ecx
-; CHECK-NEXT:    movl %ecx, (%rdi)
-; CHECK-NEXT:    movl %eax, 4(%rdi)
+; CHECK-NEXT:    rolq $32, (%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i32, i32* %p, i64 0
   %p1 = getelementptr i32, i32* %p, i64 1
@@ -335,10 +324,9 @@ define void @rotate64_in_place(i32* %p) {
 define void @rotate64(i32* %p) {
 ; CHECK-LABEL: rotate64:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    movl 4(%rdi), %ecx
-; CHECK-NEXT:    movl %ecx, 8(%rdi)
-; CHECK-NEXT:    movl %eax, 12(%rdi)
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    rolq $32, %rax
+; CHECK-NEXT:    movq %rax, 8(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i32, i32* %p, i64 0
   %p1 = getelementptr i32, i32* %p, i64 1
@@ -354,10 +342,9 @@ define void @rotate64(i32* %p) {
 define void @rotate64_iterate(i16* %p) {
 ; CHECK-LABEL: rotate64_iterate:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movl (%rdi), %eax
-; CHECK-NEXT:    movl 4(%rdi), %ecx
-; CHECK-NEXT:    movl %ecx, 84(%rdi)
-; CHECK-NEXT:    movl %eax, 88(%rdi)
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    rolq $32, %rax
+; CHECK-NEXT:    movq %rax, 84(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1
@@ -378,6 +365,8 @@ define void @rotate64_iterate(i16* %p) {
   ret void
 }
 
+; TODO: recognize this as 2 rotates?
+
 define void @rotate32_consecutive(i16* %p) {
 ; CHECK-LABEL: rotate32_consecutive:
 ; CHECK:       # %bb.0:
@@ -409,17 +398,17 @@ define void @rotate32_consecutive(i16* %p) {
   ret void
 }
 
+; Same as above, but now the stores are not all consecutive.
+
 define void @rotate32_twice(i16* %p) {
 ; CHECK-LABEL: rotate32_twice:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    movzwl (%rdi), %eax
-; CHECK-NEXT:    movzwl 2(%rdi), %ecx
-; CHECK-NEXT:    movzwl 4(%rdi), %edx
-; CHECK-NEXT:    movzwl 6(%rdi), %esi
-; CHECK-NEXT:    movw %cx, 84(%rdi)
-; CHECK-NEXT:    movw %ax, 86(%rdi)
-; CHECK-NEXT:    movw %si, 108(%rdi)
-; CHECK-NEXT:    movw %dx, 110(%rdi)
+; CHECK-NEXT:    movl (%rdi), %eax
+; CHECK-NEXT:    movl 4(%rdi), %ecx
+; CHECK-NEXT:    roll $16, %eax
+; CHECK-NEXT:    roll $16, %ecx
+; CHECK-NEXT:    movl %eax, 84(%rdi)
+; CHECK-NEXT:    movl %ecx, 108(%rdi)
 ; CHECK-NEXT:    retq
   %p0 = getelementptr i16, i16* %p, i64 0
   %p1 = getelementptr i16, i16* %p, i64 1

From d7d1af39168ce8afd041f3ae8db1d1fd3d4f70ac Mon Sep 17 00:00:00 2001
From: Kirill Bobyrev <kbobyrev@google.com>
Date: Mon, 13 Jul 2020 15:02:47 +0200
Subject: [PATCH 095/771] [clangd] Fix DocumentSymbol ranges

Summary:
DocumentSymbol ranges were not previously tested and, as a result, had invalid
end location. This patch addresses the issue.

Reviewers: sammccall

Reviewed By: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83668
---
 clang-tools-extra/clangd/FindSymbols.cpp      | 16 ++---
 .../clangd/unittests/FindSymbolsTests.cpp     | 68 ++++++++++++++++++-
 2 files changed, 72 insertions(+), 12 deletions(-)

diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp
index 58e2ee1e21c77..f5d6a95aa713d 100644
--- a/clang-tools-extra/clangd/FindSymbols.cpp
+++ b/clang-tools-extra/clangd/FindSymbols.cpp
@@ -136,17 +136,11 @@ llvm::Optional<DocumentSymbol> declToSym(ASTContext &Ctx, const NamedDecl &ND) {
   auto &SM = Ctx.getSourceManager();
 
   SourceLocation NameLoc = nameLocation(ND, SM);
-  // getFileLoc is a good choice for us, but we also need to make sure
-  // sourceLocToPosition won't switch files, so we call getSpellingLoc on top of
-  // that to make sure it does not switch files.
-  // FIXME: sourceLocToPosition should not switch files!
   SourceLocation BeginLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getBeginLoc()));
   SourceLocation EndLoc = SM.getSpellingLoc(SM.getFileLoc(ND.getEndLoc()));
-  if (NameLoc.isInvalid() || BeginLoc.isInvalid() || EndLoc.isInvalid())
-    return llvm::None;
-
-  if (!SM.isWrittenInMainFile(NameLoc) || !SM.isWrittenInMainFile(BeginLoc) ||
-      !SM.isWrittenInMainFile(EndLoc))
+  const auto SymbolRange =
+      toHalfOpenFileRange(SM, Ctx.getLangOpts(), {BeginLoc, EndLoc});
+  if (!SymbolRange)
     return llvm::None;
 
   Position NameBegin = sourceLocToPosition(SM, NameLoc);
@@ -162,8 +156,8 @@ llvm::Optional<DocumentSymbol> declToSym(ASTContext &Ctx, const NamedDecl &ND) {
   SI.name = printName(Ctx, ND);
   SI.kind = SK;
   SI.deprecated = ND.isDeprecated();
-  SI.range =
-      Range{sourceLocToPosition(SM, BeginLoc), sourceLocToPosition(SM, EndLoc)};
+  SI.range = Range{sourceLocToPosition(SM, SymbolRange->getBegin()),
+                   sourceLocToPosition(SM, SymbolRange->getEnd())};
   SI.selectionRange = Range{NameBegin, NameEnd};
   if (!SI.range.contains(SI.selectionRange)) {
     // 'selectionRange' must be contained in 'range', so in cases where clang
diff --git a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp
index 31879e356ce0e..07c42fcf20304 100644
--- a/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/FindSymbolsTests.cpp
@@ -35,7 +35,7 @@ MATCHER_P(QName, Name, "") {
 }
 MATCHER_P(WithName, N, "") { return arg.name == N; }
 MATCHER_P(WithKind, Kind, "") { return arg.kind == Kind; }
-MATCHER_P(SymRange, Range, "") { return arg.location.range == Range; }
+MATCHER_P(SymRange, Range, "") { return arg.range == Range; }
 
 // GMock helpers for matching DocumentSymbol.
 MATCHER_P(SymNameRange, Range, "") { return arg.selectionRange == Range; }
@@ -712,6 +712,72 @@ TEST(DocumentSymbols, QualifiersWithTemplateArgs) {
                            WithName("Foo_type::method3")));
 }
 
+TEST(DocumentSymbolsTest, Ranges) {
+  TestTU TU;
+  Annotations Main(R"(
+      $foo[[int foo(bool Argument) {
+        return 42;
+      }]]
+
+      $variable[[char GLOBAL_VARIABLE]];
+
+      $ns[[namespace ns {
+      $bar[[class Bar {
+      public:
+        $ctor[[Bar() {}]]
+        $dtor[[~Bar()]];
+
+      private:
+        $field[[unsigned Baz]];
+
+        $getbaz[[unsigned getBaz() { return Baz; }]]
+      }]];
+      }]] // namespace ns
+
+      $forwardclass[[class ForwardClassDecl]];
+
+      $struct[[struct StructDefinition {
+        $structfield[[int *Pointer = nullptr]];
+      }]];
+      $forwardstruct[[struct StructDeclaration]];
+
+      $forwardfunc[[void forwardFunctionDecl(int Something)]];
+    )");
+  TU.Code = Main.code().str();
+  EXPECT_THAT(
+      getSymbols(TU.build()),
+      UnorderedElementsAre(
+          AllOf(WithName("foo"), WithKind(SymbolKind::Function),
+                SymRange(Main.range("foo"))),
+          AllOf(WithName("GLOBAL_VARIABLE"), WithKind(SymbolKind::Variable),
+                SymRange(Main.range("variable"))),
+          AllOf(
+              WithName("ns"), WithKind(SymbolKind::Namespace),
+              SymRange(Main.range("ns")),
+              Children(AllOf(
+                  WithName("Bar"), WithKind(SymbolKind::Class),
+                  SymRange(Main.range("bar")),
+                  Children(
+                      AllOf(WithName("Bar"), WithKind(SymbolKind::Constructor),
+                            SymRange(Main.range("ctor"))),
+                      AllOf(WithName("~Bar"), WithKind(SymbolKind::Constructor),
+                            SymRange(Main.range("dtor"))),
+                      AllOf(WithName("Baz"), WithKind(SymbolKind::Field),
+                            SymRange(Main.range("field"))),
+                      AllOf(WithName("getBaz"), WithKind(SymbolKind::Method),
+                            SymRange(Main.range("getbaz"))))))),
+          AllOf(WithName("ForwardClassDecl"), WithKind(SymbolKind::Class),
+                SymRange(Main.range("forwardclass"))),
+          AllOf(WithName("StructDefinition"), WithKind(SymbolKind::Struct),
+                SymRange(Main.range("struct")),
+                Children(AllOf(WithName("Pointer"), WithKind(SymbolKind::Field),
+                               SymRange(Main.range("structfield"))))),
+          AllOf(WithName("StructDeclaration"), WithKind(SymbolKind::Struct),
+                SymRange(Main.range("forwardstruct"))),
+          AllOf(WithName("forwardFunctionDecl"), WithKind(SymbolKind::Function),
+                SymRange(Main.range("forwardfunc")))));
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang

From f3b3689c043f49ad42e9d3f5057bc8f1a9f56d09 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Mon, 13 Jul 2020 13:36:25 +0200
Subject: [PATCH 096/771] [lldb][NFC] Refactor instruction dumping out of
 DumpDataExtractor

---
 lldb/source/Core/DumpDataExtractor.cpp | 88 +++++++++++++++-----------
 1 file changed, 50 insertions(+), 38 deletions(-)

diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp
index 233a1b3735508..33fc3a76d3d67 100644
--- a/lldb/source/Core/DumpDataExtractor.cpp
+++ b/lldb/source/Core/DumpDataExtractor.cpp
@@ -128,6 +128,53 @@ static lldb::offset_t DumpAPInt(Stream *s, const DataExtractor &data,
   return offset;
 }
 
+/// Dumps decoded instructions to a stream.
+static lldb::offset_t DumpInstructions(const DataExtractor &DE, Stream *s,
+                                       ExecutionContextScope *exe_scope,
+                                       offset_t start_offset,
+                                       uint64_t base_addr,
+                                       size_t number_of_instructions) {
+  offset_t offset = start_offset;
+
+  TargetSP target_sp;
+  if (exe_scope)
+    target_sp = exe_scope->CalculateTarget();
+  if (target_sp) {
+    DisassemblerSP disassembler_sp(
+        Disassembler::FindPlugin(target_sp->GetArchitecture(),
+                                 target_sp->GetDisassemblyFlavor(), nullptr));
+    if (disassembler_sp) {
+      lldb::addr_t addr = base_addr + start_offset;
+      lldb_private::Address so_addr;
+      bool data_from_file = true;
+      if (target_sp->GetSectionLoadList().ResolveLoadAddress(addr, so_addr)) {
+        data_from_file = false;
+      } else {
+        if (target_sp->GetSectionLoadList().IsEmpty() ||
+            !target_sp->GetImages().ResolveFileAddress(addr, so_addr))
+          so_addr.SetRawAddress(addr);
+      }
+
+      size_t bytes_consumed = disassembler_sp->DecodeInstructions(
+          so_addr, DE, start_offset, number_of_instructions, false,
+          data_from_file);
+
+      if (bytes_consumed) {
+        offset += bytes_consumed;
+        const bool show_address = base_addr != LLDB_INVALID_ADDRESS;
+        const bool show_bytes = true;
+        ExecutionContext exe_ctx;
+        exe_scope->CalculateExecutionContext(exe_ctx);
+        disassembler_sp->GetInstructionList().Dump(s, show_address, show_bytes,
+                                                   &exe_ctx);
+      }
+    }
+  } else
+    s->Printf("invalid target");
+
+  return offset;
+}
+
 lldb::offset_t lldb_private::DumpDataExtractor(
     const DataExtractor &DE, Stream *s, offset_t start_offset,
     lldb::Format item_format, size_t item_byte_size, size_t item_count,
@@ -147,44 +194,9 @@ lldb::offset_t lldb_private::DumpDataExtractor(
 
   offset_t offset = start_offset;
 
-  if (item_format == eFormatInstruction) {
-    TargetSP target_sp;
-    if (exe_scope)
-      target_sp = exe_scope->CalculateTarget();
-    if (target_sp) {
-      DisassemblerSP disassembler_sp(Disassembler::FindPlugin(
-          target_sp->GetArchitecture(),
-          target_sp->GetDisassemblyFlavor(), nullptr));
-      if (disassembler_sp) {
-        lldb::addr_t addr = base_addr + start_offset;
-        lldb_private::Address so_addr;
-        bool data_from_file = true;
-        if (target_sp->GetSectionLoadList().ResolveLoadAddress(addr, so_addr)) {
-          data_from_file = false;
-        } else {
-          if (target_sp->GetSectionLoadList().IsEmpty() ||
-              !target_sp->GetImages().ResolveFileAddress(addr, so_addr))
-            so_addr.SetRawAddress(addr);
-        }
-
-        size_t bytes_consumed = disassembler_sp->DecodeInstructions(
-            so_addr, DE, start_offset, item_count, false, data_from_file);
-
-        if (bytes_consumed) {
-          offset += bytes_consumed;
-          const bool show_address = base_addr != LLDB_INVALID_ADDRESS;
-          const bool show_bytes = true;
-          ExecutionContext exe_ctx;
-          exe_scope->CalculateExecutionContext(exe_ctx);
-          disassembler_sp->GetInstructionList().Dump(s, show_address,
-                                                     show_bytes, &exe_ctx);
-        }
-      }
-    } else
-      s->Printf("invalid target");
-
-    return offset;
-  }
+  if (item_format == eFormatInstruction)
+    return DumpInstructions(DE, s, exe_scope, start_offset, base_addr,
+                            item_count);
 
   if ((item_format == eFormatOSType || item_format == eFormatAddressInfo) &&
       item_byte_size > 8)

From 725412f1f54aca6d465b61191b420c0aee0cef38 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Thu, 9 Jul 2020 16:03:40 +0300
Subject: [PATCH 097/771] [yaml2obj] - Refactor header-sh-fields.yaml test.

This refines the test to use macros. It is needed for
a follow-up change that adds a functionality to
override more fields.

Also, it is just cleaner to test each key separately.

Differential revision: https://reviews.llvm.org/D83481
---
 .../tools/yaml2obj/ELF/header-sh-fields.yaml  | 64 ++++++++++---------
 1 file changed, 34 insertions(+), 30 deletions(-)

diff --git a/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml b/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
index 821b77418857c..166c68405bb76 100644
--- a/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
@@ -3,8 +3,8 @@
 
 ## First we check the default values.
 
-# RUN: yaml2obj --docnum=1 %s -o %t1
-# RUN: llvm-readelf --file-headers %t1 | FileCheck %s --check-prefix=DEFAULT
+# RUN: yaml2obj %s -o %t-default
+# RUN: llvm-readelf --file-headers %t-default | FileCheck %s --check-prefix=DEFAULT
 
 # DEFAULT:   Start of section headers:          88 (bytes into file)
 # DEFAULT:   Size of section headers:           64 (bytes)
@@ -18,15 +18,10 @@ FileHeader:
   Type:    ET_REL
   Machine: EM_X86_64
 
-## Override 3 fields: e_shoff, e_shnum and e_shstrndx. Check the output.
-
-# RUN: yaml2obj --docnum=2 %s -o %t2
-# RUN: llvm-readelf --file-headers %t2 | FileCheck %s --check-prefix=CUSTOM
-
-# CUSTOM: Start of section headers:          2 (bytes into file)
-# CUSTOM: Size of section headers:           64 (bytes)
-# CUSTOM: Number of section headers:         3
-# CUSTOM: Section header string table index: 4
+## Check we can override all default values using the same values
+## and that this does not change the output.
+# RUN: yaml2obj --docnum=2 %s -o %t-default-override
+# RUN: cmp %t-default %t-default-override
 
 --- !ELF
 FileHeader:
@@ -34,28 +29,37 @@ FileHeader:
   Data:      ELFDATA2LSB
   Type:      ET_REL
   Machine:   EM_X86_64
-  SHEntSize: 64
-  SHOff:     2
-  SHNum:     3
-  SHStrNdx:  4
+  SHEntSize: [[SHENTSIZE=64]]
+  SHOff:     [[SHOFF=88]]
+  SHNum:     [[SHNUM=3]]
+  SHStrNdx:  [[SHSTRNDX=2]]
+
+## Override different fields to check the output produced.
+
+## Override the e_shoff field.
+# RUN: yaml2obj --docnum=2 %s -DSHOFF=3 -o %t2
+# RUN: llvm-readelf --file-headers %t2 | FileCheck %s --check-prefix=SHOFF
+
+# SHOFF: Start of section headers: 3 (bytes into file)
+
+## Override the e_shnum field.
+# RUN: yaml2obj --docnum=2 %s -DSHNUM=2 -o %t3
+# RUN: llvm-readelf --file-headers %t3 | FileCheck %s --check-prefix=SHNUM
+
+# SHNUM: Number of section headers: 2{{$}}
+
+## Override the e_shstrndx field.
+# RUN: yaml2obj --docnum=2 %s -DSHSTRNDX=4 -o %t4
+# RUN: llvm-readelf --file-headers %t4 | FileCheck %s --check-prefix=SHSTRNDX
 
-## Finally, we use the same YAML as above, but set e_shentsize to 1.
+# SHSTRNDX: Section header string table index: 4{{$}}
+
+## Override the e_shentsize field.
 ## Check the result using raw output from 'od' because llvm-readelf
 ## is unable to dump such headers.
 
-# RUN: yaml2obj --docnum=3 %s -o %t3
-# RUN: od -A n -t x1 -v -j 0x3a -N 1 %t3 | FileCheck %s --check-prefix=NEWSIZE
-# RUN: od -A n -t x1 -v -j 0x3a -N 1 %t2 | FileCheck %s --check-prefix=OLDSIZE
+# RUN: yaml2obj --docnum=2 %s -DSHENTSIZE=1 -o %t5
+# RUN: od -A n -t x1 -v -j 0x3a -N 1 %t5 | FileCheck %s --check-prefix=NEWSIZE
+# RUN: od -A n -t x1 -v -j 0x3a -N 1 %t-default | FileCheck %s --check-prefix=OLDSIZE
 # NEWSIZE: 01
 # OLDSIZE: 40
-
---- !ELF
-FileHeader:
-  Class:     ELFCLASS64
-  Data:      ELFDATA2LSB
-  Type:      ET_REL
-  Machine:   EM_X86_64
-  SHEntSize: 1
-  SHOff:     2
-  SHNum:     3
-  SHStrNdx:  4

From 2e58004fe1873825cce772113f96339eecc0bb3a Mon Sep 17 00:00:00 2001
From: Sanne Wouda <Sanne.Wouda@arm.com>
Date: Thu, 25 Jun 2020 16:08:13 +0100
Subject: [PATCH 098/771] Fix crash when getVFABIMappings is called with an
 indirect call instruction

Differential Revision: https://reviews.llvm.org/D83122
---
 llvm/include/llvm/Analysis/VectorUtils.h      |  3 +++
 .../Analysis/VectorFunctionABITest.cpp        | 26 +++++++++++++++++++
 2 files changed, 29 insertions(+)

diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index 9acb1fcf11029..ce8327ae43a80 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -224,6 +224,9 @@ class VFDatabase {
   /// a vector Function ABI.
   static void getVFABIMappings(const CallInst &CI,
                                SmallVectorImpl<VFInfo> &Mappings) {
+    if (CI.isIndirectCall())
+      return;
+
     const StringRef ScalarName = CI.getCalledFunction()->getName();
 
     SmallVector<std::string, 8> ListOfStrings;
diff --git a/llvm/unittests/Analysis/VectorFunctionABITest.cpp b/llvm/unittests/Analysis/VectorFunctionABITest.cpp
index 6668529f49e09..d1f878754cbba 100644
--- a/llvm/unittests/Analysis/VectorFunctionABITest.cpp
+++ b/llvm/unittests/Analysis/VectorFunctionABITest.cpp
@@ -618,3 +618,29 @@ TEST_F(VFABIParserTest, ZeroIsInvalidVLEN) {
   EXPECT_FALSE(invokeParser("_ZGVsM0v_sin"));
   EXPECT_FALSE(invokeParser("_ZGVsN0v_sin"));
 }
+
+static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+  SMDiagnostic Err;
+  std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
+  if (!Mod)
+    Err.print("VectorFunctionABITests", errs());
+  return Mod;
+}
+
+TEST(VFABIGetMappingsTest, IndirectCallInst) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = parseIR(C, R"IR(
+define void @call(void () * %f) {
+entry:
+  call void %f()
+  ret void
+}
+)IR");
+  auto F = dyn_cast_or_null<Function>(M->getNamedValue("call"));
+  ASSERT_TRUE(F);
+  auto CI = dyn_cast<CallInst>(&F->front().front());
+  ASSERT_TRUE(CI);
+  ASSERT_TRUE(CI->isIndirectCall());
+  auto Mappings = VFDatabase::getMappings(*CI);
+  EXPECT_EQ(Mappings.size(), (unsigned)0);
+}

From e909f6bc48ee291f4be0d69a5eb0b80b96f7d16e Mon Sep 17 00:00:00 2001
From: Sanne Wouda <Sanne.Wouda@arm.com>
Date: Thu, 25 Jun 2020 16:02:52 +0100
Subject: [PATCH 099/771] Pre-commit tests

Prepare to land D82550
---
 .../AArch64/accelerate-vector-functions.ll    | 1213 ++++++++++++++++-
 .../SLPVectorizer/vectorizable-functions.ll   |   81 ++
 2 files changed, 1266 insertions(+), 28 deletions(-)
 create mode 100644 llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll

diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
index 0adc695a13417..1cd93d20f85e8 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
@@ -5,55 +5,1212 @@
 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
 target triple = "arm64-apple-ios14.0.0"
 
-declare float @llvm.sin.f32(float) #1
-
+declare float @llvm.sin.f32(float)
 
 ; Accelerate provides sin() for <4 x float>
+define <4 x float> @int_sin_4x(<4 x float>* %a) {
+; CHECK-LABEL: @int_sin_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @int_sin_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.sin.f32(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.sin.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.sin.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+
+declare float @ceilf(float) readonly
+
+define <4 x float> @ceil_4x(<4 x float>* %a) {
+; CHECK-LABEL: @ceil_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @ceil_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP0]])
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; NOACCELERATE-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @ceilf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @ceilf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @ceilf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @ceilf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+
+declare float @fabsf(float) readonly
+
+define <4 x float> @fabs_4x(<4 x float>* %a) {
+; CHECK-LABEL: @fabs_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @fabs_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; NOACCELERATE-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @fabsf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @fabsf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @fabsf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @fabsf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @llvm.fabs.f32(float)
+define <4 x float> @int_fabs_4x(<4 x float>* %a) {
+; CHECK-LABEL: @int_fabs_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @int_fabs_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.fabs.v4f32(<4 x float> [[TMP0]])
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; NOACCELERATE-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @llvm.fabs.f32(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @llvm.fabs.f32(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @llvm.fabs.f32(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @llvm.fabs.f32(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @floorf(float) readonly
+define <4 x float> @floor_4x(<4 x float>* %a) {
+; CHECK-LABEL: @floor_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @floor_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP0]])
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; NOACCELERATE-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @floorf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @floorf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @floorf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @floorf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @sqrtf(float) readonly
+define <4 x float> @sqrt_4x(<4 x float>* %a) {
+; CHECK-LABEL: @sqrt_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @sqrt_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP0]])
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; NOACCELERATE-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @sqrtf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @sqrtf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @sqrtf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @sqrtf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @expf(float) readonly
+define <4 x float> @exp_4x(<4 x float>* %a) {
+; CHECK-LABEL: @exp_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vexpf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @exp_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @expf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @expf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @expf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @expf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @expf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @expf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @expf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @expf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @expm1f(float) readonly
+define <4 x float> @expm1_4x(<4 x float>* %a) {
+; CHECK-LABEL: @expm1_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @expm1f(float [[VECEXT]]) #2
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @expm1f(float [[VECEXT_1]]) #2
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @expm1f(float [[VECEXT_2]]) #2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @expm1f(float [[VECEXT_3]]) #2
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @expm1_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @expm1f(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @expm1f(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @expm1f(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @expm1f(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @expm1f(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @expm1f(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @expm1f(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @expm1f(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @logf(float) readonly
+define <4 x float> @log_4x(<4 x float>* %a) {
+; CHECK-LABEL: @log_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlogf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @log_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @logf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @logf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @logf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @logf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @logf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @logf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @logf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @logf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @log1pf(float) readonly
+define <4 x float> @log1p_4x(<4 x float>* %a) {
+; CHECK-LABEL: @log1p_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @log1pf(float [[VECEXT]]) #3
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @log1pf(float [[VECEXT_1]]) #3
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @log1pf(float [[VECEXT_2]]) #3
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @log1pf(float [[VECEXT_3]]) #3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @log1p_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @log1pf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @log1pf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @log1pf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @log1pf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @log1pf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @log1pf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @log1pf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @log1pf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @log10pf(float) readonly
+define <4 x float> @log10p_4x(<4 x float>* %a) {
+; CHECK-LABEL: @log10p_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]])
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @log10pf(float [[VECEXT_1]])
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @log10pf(float [[VECEXT_2]])
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @log10pf(float [[VECEXT_3]])
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @log10p_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @log10pf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @log10pf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @log10pf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @log10pf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @log10pf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @log10pf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @log10pf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @log10pf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @logbf(float) readonly
+define <4 x float> @logb_4x(<4 x float>* %a) {
+; CHECK-LABEL: @logb_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @logbf(float [[VECEXT]]) #4
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @logbf(float [[VECEXT_1]]) #4
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @logbf(float [[VECEXT_2]]) #4
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @logbf(float [[VECEXT_3]]) #4
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @logb_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @logbf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @logbf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @logbf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @logbf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @logbf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @logbf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @logbf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @logbf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @sinf(float) readonly
 define <4 x float> @sin_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @sin_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @sin_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @sinf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @sinf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @sinf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @sinf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @sinf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @sinf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @sinf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @sinf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @cosf(float) readonly
+define <4 x float> @cos_4x(<4 x float>* %a) {
+; CHECK-LABEL: @cos_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @cos_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @cosf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @cosf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @cosf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @cosf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @cosf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @cosf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @cosf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @cosf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @tanf(float) readonly
+define <4 x float> @tan_4x(<4 x float>* %a) {
+; CHECK-LABEL: @tan_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]]) #5
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) #5
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) #5
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) #5
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @tan_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @tanf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @tanf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @tanf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @tanf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @asinf(float) readonly
+define <4 x float> @asin_4x(<4 x float>* %a) {
+; CHECK-LABEL: @asin_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) #6
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) #6
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) #6
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) #6
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
-; NOACCELERATE-LABEL: @sin_4x(
+; NOACCELERATE-LABEL: @asin_4x(
 ; NOACCELERATE-NEXT:  entry:
 ; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]])
 ; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
 ; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]])
 ; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
 ; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]])
 ; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
 ; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]])
 ; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
 ; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:
   %0 = load <4 x float>, <4 x float>* %a, align 16
   %vecext = extractelement <4 x float> %0, i32 0
-  %1 = tail call fast float @llvm.sin.f32(float %vecext)
+  %1 = tail call fast float @asinf(float %vecext)
   %vecins = insertelement <4 x float> undef, float %1, i32 0
   %vecext.1 = extractelement <4 x float> %0, i32 1
-  %2 = tail call fast float @llvm.sin.f32(float %vecext.1)
+  %2 = tail call fast float @asinf(float %vecext.1)
   %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
   %vecext.2 = extractelement <4 x float> %0, i32 2
-  %3 = tail call fast float @llvm.sin.f32(float %vecext.2)
+  %3 = tail call fast float @asinf(float %vecext.2)
   %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
   %vecext.3 = extractelement <4 x float> %0, i32 3
-  %4 = tail call fast float @llvm.sin.f32(float %vecext.3)
+  %4 = tail call fast float @asinf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @acosf(float) readonly
+define <4 x float> @acos_4x(<4 x float>* %a) {
+; CHECK-LABEL: @acos_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]]) #7
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) #7
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]]) #7
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]]) #7
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @acos_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @acosf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @acosf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @acosf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @acosf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @atanf(float) readonly
+define <4 x float> @atan_4x(<4 x float>* %a) {
+; CHECK-LABEL: @atan_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]]) #8
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) #8
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]]) #8
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]]) #8
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @atan_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @atanf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @atanf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @atanf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @atanf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @sinhf(float) readonly
+define <4 x float> @sinh_4x(<4 x float>* %a) {
+; CHECK-LABEL: @sinh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]]) #9
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) #9
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]]) #9
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]]) #9
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @sinh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @sinhf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @sinhf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @sinhf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @sinhf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @coshf(float) readonly
+define <4 x float> @cosh_4x(<4 x float>* %a) {
+; CHECK-LABEL: @cosh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) #10
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) #10
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) #10
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) #10
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @cosh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @coshf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @coshf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @coshf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @coshf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @tanhf(float) readonly
+define <4 x float> @tanh_4x(<4 x float>* %a) {
+; CHECK-LABEL: @tanh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]]) #11
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) #11
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]]) #11
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]]) #11
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @tanh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @tanhf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @tanhf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @tanhf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @tanhf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @asinhf(float) readonly
+define <4 x float> @asinh_4x(<4 x float>* %a) {
+; CHECK-LABEL: @asinh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]]) #12
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]]) #12
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]]) #12
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]]) #12
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @asinh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @asinhf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @asinhf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @asinhf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @asinhf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @acoshf(float) readonly
+define <4 x float> @acosh_4x(<4 x float>* %a) {
+; CHECK-LABEL: @acosh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]]) #13
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]]) #13
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]]) #13
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]]) #13
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @acosh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @acoshf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @acoshf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @acoshf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @acoshf(float %vecext.3)
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+declare float @atanhf(float) readonly
+define <4 x float> @atanh_4x(<4 x float>* %a) {
+; CHECK-LABEL: @atanh_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) #14
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) #14
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) #14
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) #14
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+; NOACCELERATE-LABEL: @atanh_4x(
+; NOACCELERATE-NEXT:  entry:
+; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; NOACCELERATE-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]])
+; NOACCELERATE-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; NOACCELERATE-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; NOACCELERATE-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]])
+; NOACCELERATE-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; NOACCELERATE-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; NOACCELERATE-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]])
+; NOACCELERATE-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; NOACCELERATE-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; NOACCELERATE-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]])
+; NOACCELERATE-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; NOACCELERATE-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @atanhf(float %vecext)
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @atanhf(float %vecext.1)
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @atanhf(float %vecext.2)
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @atanhf(float %vecext.3)
   %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
   ret <4 x float> %vecins.3
 }
@@ -64,10 +1221,10 @@ define <2 x float> @sin_2x(<2 x float>* %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) #1
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) #15
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
 ; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) #1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) #15
 ; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
 ; CHECK-NEXT:    ret <2 x float> [[VECINS_1]]
 ;
@@ -94,11 +1251,11 @@ entry:
 }
 
 
-declare float @llvm.cos.f32(float) #1
+declare float @llvm.cos.f32(float)
 
 ; Accelerate provides cos() for <4 x float>
-define <4 x float> @cos_4x(<4 x float>* %a) {
-; CHECK-LABEL: @cos_4x(
+define <4 x float> @int_cos_4x(<4 x float>* %a) {
+; CHECK-LABEL: @int_cos_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcosf(<4 x float> [[TMP0]])
@@ -112,7 +1269,7 @@ define <4 x float> @cos_4x(<4 x float>* %a) {
 ; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
-; NOACCELERATE-LABEL: @cos_4x(
+; NOACCELERATE-LABEL: @int_cos_4x(
 ; NOACCELERATE-NEXT:  entry:
 ; NOACCELERATE-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
 ; NOACCELERATE-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
@@ -152,10 +1309,10 @@ define <2 x float> @cos_2x(<2 x float>* %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) #2
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) #16
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
 ; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) #2
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) #16
 ; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
 ; CHECK-NEXT:    ret <2 x float> [[VECINS_1]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll b/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll
new file mode 100644
index 0000000000000..bb27efbf424ec
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll
@@ -0,0 +1,81 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S %s | FileCheck %s
+
+declare float @memread(float) readonly #0
+declare <4 x float> @vmemread(<4 x float>)
+
+define <4 x float> @memread_4x(<4 x float>* %a) {
+; CHECK-LABEL: @memread_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @memread(float [[VECEXT]]) #2
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @memread(float [[VECEXT_1]]) #2
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @memread(float [[VECEXT_2]]) #2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @memread(float [[VECEXT_3]]) #2
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @memread(float %vecext) #0
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @memread(float %vecext.1) #0
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @memread(float %vecext.2) #0
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @memread(float %vecext.3) #0
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+
+declare float @memwrite(float) #1
+declare <4 x float> @vmemwrite(<4 x float>)
+
+define <4 x float> @memwrite_4x(<4 x float>* %a) {
+; CHECK-LABEL: @memwrite_4x(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
+; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @memwrite(float [[VECEXT]]) #1
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @memwrite(float [[VECEXT_1]]) #1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
+; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
+; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @memwrite(float [[VECEXT_2]]) #1
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
+; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
+; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @memwrite(float [[VECEXT_3]]) #1
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
+;
+entry:
+  %0 = load <4 x float>, <4 x float>* %a, align 16
+  %vecext = extractelement <4 x float> %0, i32 0
+  %1 = tail call fast float @memwrite(float %vecext) #1
+  %vecins = insertelement <4 x float> undef, float %1, i32 0
+  %vecext.1 = extractelement <4 x float> %0, i32 1
+  %2 = tail call fast float @memwrite(float %vecext.1) #1
+  %vecins.1 = insertelement <4 x float> %vecins, float %2, i32 1
+  %vecext.2 = extractelement <4 x float> %0, i32 2
+  %3 = tail call fast float @memwrite(float %vecext.2) #1
+  %vecins.2 = insertelement <4 x float> %vecins.1, float %3, i32 2
+  %vecext.3 = extractelement <4 x float> %0, i32 3
+  %4 = tail call fast float @memwrite(float %vecext.3) #1
+  %vecins.3 = insertelement <4 x float> %vecins.2, float %4, i32 3
+  ret <4 x float> %vecins.3
+}
+
+attributes #0 = { "vector-function-abi-variant"="_ZGV_LLVM_N4v_memread(vmemread)" }
+attributes #1 = { "vector-function-abi-variant"="_ZGV_LLVM_N4v_memwrite(vmemwrite)" }

From 7b84045565bdf7945a2cddd4dd7eefa00fb220d3 Mon Sep 17 00:00:00 2001
From: Sanne Wouda <Sanne.Wouda@arm.com>
Date: Fri, 10 Jul 2020 12:37:26 +0100
Subject: [PATCH 100/771] [SLPVectorizer] handle vectorizeable library
 functions

Teaches the SLPVectorizer to use vectorized library functions for
non-intrinsic calls.

This already worked for intrinsics that have vectorized library
functions, thanks to D75878, but schedules with library functions with a
vector variant were being rejected early.

-   assume that there are no load/store dependencies between lib
    functions with a vector variant; this would otherwise prevent the
    bundle from becoming "ready"

-   check during legalization that the vector variant can be used

-   fix-up where we previously assumed that a call would be an intrinsic

Differential Revision: https://reviews.llvm.org/D82550
---
 .../Transforms/Vectorize/SLPVectorizer.cpp    |  27 +-
 .../AArch64/accelerate-vector-functions.ll    | 281 ++++++++----------
 .../SLPVectorizer/vectorizable-functions.ll   |  21 +-
 3 files changed, 148 insertions(+), 181 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index d4b16fac985d9..fe9ea2995377c 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3022,12 +3022,17 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
       return;
     }
     case Instruction::Call: {
-      // Check if the calls are all to the same vectorizable intrinsic.
+      // Check if the calls are all to the same vectorizable intrinsic or
+      // library function.
       CallInst *CI = cast<CallInst>(VL0);
-      // Check if this is an Intrinsic call or something that can be
-      // represented by an intrinsic call
       Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
-      if (!isTriviallyVectorizable(ID)) {
+
+      VFShape Shape = VFShape::get(
+          *CI, {static_cast<unsigned int>(VL.size()), false /*Scalable*/},
+          false /*HasGlobalPred*/);
+      Function *VecFunc = VFDatabase(*CI).getVectorizedFunction(Shape);
+
+      if (!VecFunc && !isTriviallyVectorizable(ID)) {
         BS.cancelScheduling(VL, VL0);
         newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
                      ReuseShuffleIndicies);
@@ -3044,6 +3049,8 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         CallInst *CI2 = dyn_cast<CallInst>(V);
         if (!CI2 || CI2->getCalledFunction() != Int ||
             getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
+            (VecFunc &&
+             VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) ||
             !CI->hasIdenticalOperandBundleSchema(*CI2)) {
           BS.cancelScheduling(VL, VL0);
           newTreeEntry(VL, None /*not vectorized*/, S, UserTreeIdx,
@@ -4507,7 +4514,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
       Intrinsic::ID ID = getVectorIntrinsicIDForCall(CI, TLI);
 
       auto VecCallCosts = getVectorCallCosts(CI, VecTy, TTI, TLI);
-      bool UseIntrinsic = VecCallCosts.first <= VecCallCosts.second;
+      bool UseIntrinsic = ID != Intrinsic::not_intrinsic &&
+                          VecCallCosts.first <= VecCallCosts.second;
 
       Value *ScalarArg = nullptr;
       std::vector<Value *> OpVecs;
@@ -4527,15 +4535,16 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
         OpVecs.push_back(OpVec);
       }
 
-      Module *M = F->getParent();
-      Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())};
-      Function *CF = Intrinsic::getDeclaration(M, ID, Tys);
-
+      Function *CF;
       if (!UseIntrinsic) {
         VFShape Shape = VFShape::get(
             *CI, {static_cast<unsigned>(VecTy->getNumElements()), false},
             false /*HasGlobalPred*/);
         CF = VFDatabase(*CI).getVectorizedFunction(Shape);
+      } else {
+        Module *M = F->getParent();
+        Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())};
+        CF = Intrinsic::getDeclaration(M, ID, Tys);
       }
 
       SmallVector<OperandBundleDef, 1> OpBundles;
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
index 1cd93d20f85e8..811f414742f8e 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/accelerate-vector-functions.ll
@@ -344,18 +344,15 @@ define <4 x float> @expm1_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @expm1_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @expm1f(float [[VECEXT]]) #2
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @expm1f(float [[VECEXT_1]]) #2
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @expm1f(float [[VECEXT_2]]) #2
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @expm1f(float [[VECEXT_3]]) #2
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vexpm1f(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @expm1_4x(
@@ -445,18 +442,15 @@ define <4 x float> @log1p_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @log1p_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @log1pf(float [[VECEXT]]) #3
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @log1pf(float [[VECEXT_1]]) #3
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @log1pf(float [[VECEXT_2]]) #3
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @log1pf(float [[VECEXT_3]]) #3
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlog1pf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @log1p_4x(
@@ -549,18 +543,15 @@ define <4 x float> @logb_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @logb_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @logbf(float [[VECEXT]]) #4
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @logbf(float [[VECEXT_1]]) #4
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @logbf(float [[VECEXT_2]]) #4
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @logbf(float [[VECEXT_3]]) #4
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vlogbf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @logb_4x(
@@ -699,18 +690,15 @@ define <4 x float> @tan_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @tan_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @tanf(float [[VECEXT]]) #5
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @tanf(float [[VECEXT_1]]) #5
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @tanf(float [[VECEXT_2]]) #5
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @tanf(float [[VECEXT_3]]) #5
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vtanf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @tan_4x(
@@ -751,18 +739,15 @@ define <4 x float> @asin_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @asin_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinf(float [[VECEXT]]) #6
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinf(float [[VECEXT_1]]) #6
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinf(float [[VECEXT_2]]) #6
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinf(float [[VECEXT_3]]) #6
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vasinf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @asin_4x(
@@ -803,18 +788,15 @@ define <4 x float> @acos_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @acos_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @acosf(float [[VECEXT]]) #7
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @acosf(float [[VECEXT_1]]) #7
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @acosf(float [[VECEXT_2]]) #7
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @acosf(float [[VECEXT_3]]) #7
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vacosf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @acos_4x(
@@ -855,18 +837,15 @@ define <4 x float> @atan_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @atan_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanf(float [[VECEXT]]) #8
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanf(float [[VECEXT_1]]) #8
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanf(float [[VECEXT_2]]) #8
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanf(float [[VECEXT_3]]) #8
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vatanf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @atan_4x(
@@ -907,18 +886,15 @@ define <4 x float> @sinh_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @sinh_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @sinhf(float [[VECEXT]]) #9
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @sinhf(float [[VECEXT_1]]) #9
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @sinhf(float [[VECEXT_2]]) #9
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @sinhf(float [[VECEXT_3]]) #9
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vsinhf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @sinh_4x(
@@ -959,18 +935,15 @@ define <4 x float> @cosh_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @cosh_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @coshf(float [[VECEXT]]) #10
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @coshf(float [[VECEXT_1]]) #10
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @coshf(float [[VECEXT_2]]) #10
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @coshf(float [[VECEXT_3]]) #10
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vcoshf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @cosh_4x(
@@ -1011,18 +984,15 @@ define <4 x float> @tanh_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @tanh_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @tanhf(float [[VECEXT]]) #11
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @tanhf(float [[VECEXT_1]]) #11
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @tanhf(float [[VECEXT_2]]) #11
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @tanhf(float [[VECEXT_3]]) #11
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vtanhf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @tanh_4x(
@@ -1063,18 +1033,15 @@ define <4 x float> @asinh_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @asinh_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @asinhf(float [[VECEXT]]) #12
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @asinhf(float [[VECEXT_1]]) #12
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @asinhf(float [[VECEXT_2]]) #12
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @asinhf(float [[VECEXT_3]]) #12
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vasinhf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @asinh_4x(
@@ -1115,18 +1082,15 @@ define <4 x float> @acosh_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @acosh_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @acoshf(float [[VECEXT]]) #13
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @acoshf(float [[VECEXT_1]]) #13
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @acoshf(float [[VECEXT_2]]) #13
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @acoshf(float [[VECEXT_3]]) #13
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vacoshf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @acosh_4x(
@@ -1167,18 +1131,15 @@ define <4 x float> @atanh_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @atanh_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @atanhf(float [[VECEXT]]) #14
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @atanhf(float [[VECEXT_1]]) #14
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @atanhf(float [[VECEXT_2]]) #14
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @atanhf(float [[VECEXT_3]]) #14
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vatanhf(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 ; NOACCELERATE-LABEL: @atanh_4x(
@@ -1221,10 +1182,10 @@ define <2 x float> @sin_2x(<2 x float>* %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) #15
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT]]) #2
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
 ; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) #15
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.sin.f32(float [[VECEXT_1]]) #2
 ; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
 ; CHECK-NEXT:    ret <2 x float> [[VECINS_1]]
 ;
@@ -1309,10 +1270,10 @@ define <2 x float> @cos_2x(<2 x float>* %a) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <2 x float>, <2 x float>* [[A:%.*]], align 16
 ; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <2 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) #16
+; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT]]) #3
 ; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <2 x float> undef, float [[TMP1]], i32 0
 ; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <2 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) #16
+; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @llvm.cos.f32(float [[VECEXT_1]]) #3
 ; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <2 x float> [[VECINS]], float [[TMP2]], i32 1
 ; CHECK-NEXT:    ret <2 x float> [[VECINS_1]]
 ;
diff --git a/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll b/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll
index bb27efbf424ec..ca724d46bea82 100644
--- a/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll
+++ b/llvm/test/Transforms/SLPVectorizer/vectorizable-functions.ll
@@ -8,18 +8,15 @@ define <4 x float> @memread_4x(<4 x float>* %a) {
 ; CHECK-LABEL: @memread_4x(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x float>, <4 x float>* [[A:%.*]], align 16
-; CHECK-NEXT:    [[VECEXT:%.*]] = extractelement <4 x float> [[TMP0]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call fast float @memread(float [[VECEXT]]) #2
-; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP1]], i32 0
-; CHECK-NEXT:    [[VECEXT_1:%.*]] = extractelement <4 x float> [[TMP0]], i32 1
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call fast float @memread(float [[VECEXT_1]]) #2
-; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP2]], i32 1
-; CHECK-NEXT:    [[VECEXT_2:%.*]] = extractelement <4 x float> [[TMP0]], i32 2
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call fast float @memread(float [[VECEXT_2]]) #2
-; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP3]], i32 2
-; CHECK-NEXT:    [[VECEXT_3:%.*]] = extractelement <4 x float> [[TMP0]], i32 3
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call fast float @memread(float [[VECEXT_3]]) #2
-; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP4]], i32 3
+; CHECK-NEXT:    [[TMP1:%.*]] = call fast <4 x float> @vmemread(<4 x float> [[TMP0]])
+; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
+; CHECK-NEXT:    [[VECINS:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
+; CHECK-NEXT:    [[VECINS_1:%.*]] = insertelement <4 x float> [[VECINS]], float [[TMP3]], i32 1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
+; CHECK-NEXT:    [[VECINS_2:%.*]] = insertelement <4 x float> [[VECINS_1]], float [[TMP4]], i32 2
+; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
+; CHECK-NEXT:    [[VECINS_3:%.*]] = insertelement <4 x float> [[VECINS_2]], float [[TMP5]], i32 3
 ; CHECK-NEXT:    ret <4 x float> [[VECINS_3]]
 ;
 entry:

From 13fec93a77fa159d700eff40b27402520eed2dfa Mon Sep 17 00:00:00 2001
From: Sanne Wouda <Sanne.Wouda@arm.com>
Date: Fri, 3 Jul 2020 12:37:21 +0100
Subject: [PATCH 101/771] [NFC] rename to reflect F is not necessarily an
 Intrinsic

---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index fe9ea2995377c..5bc35aa4695f8 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3039,7 +3039,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
         LLVM_DEBUG(dbgs() << "SLP: Non-vectorizable call.\n");
         return;
       }
-      Function *Int = CI->getCalledFunction();
+      Function *F = CI->getCalledFunction();
       unsigned NumArgs = CI->getNumArgOperands();
       SmallVector<Value*, 4> ScalarArgs(NumArgs, nullptr);
       for (unsigned j = 0; j != NumArgs; ++j)
@@ -3047,7 +3047,7 @@ void BoUpSLP::buildTree_rec(ArrayRef<Value *> VL, unsigned Depth,
           ScalarArgs[j] = CI->getArgOperand(j);
       for (Value *V : VL) {
         CallInst *CI2 = dyn_cast<CallInst>(V);
-        if (!CI2 || CI2->getCalledFunction() != Int ||
+        if (!CI2 || CI2->getCalledFunction() != F ||
             getVectorIntrinsicIDForCall(CI2, TLI) != ID ||
             (VecFunc &&
              VecFunc != VFDatabase(*CI2).getVectorizedFunction(Shape)) ||
@@ -4542,9 +4542,8 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E) {
             false /*HasGlobalPred*/);
         CF = VFDatabase(*CI).getVectorizedFunction(Shape);
       } else {
-        Module *M = F->getParent();
         Type *Tys[] = {FixedVectorType::get(CI->getType(), E->Scalars.size())};
-        CF = Intrinsic::getDeclaration(M, ID, Tys);
+        CF = Intrinsic::getDeclaration(F->getParent(), ID, Tys);
       }
 
       SmallVector<OperandBundleDef, 1> OpBundles;

From a5803765d8e0b62e0b48ea76bcad07a7c183618b Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Fri, 10 Jul 2020 13:18:00 +0200
Subject: [PATCH 102/771] [lldb/dotest] Remove the "xunit" result formatter

Summary:
My understanding is that this was added to make dotest interact well
with the GreenDragon bots, back when dotest was the main test driver.
Now that everything goes through lit (which has its own xunit
formatter), it seems largely irrelevant.

There are more cleanups that can be done after removing this be done
here, but this should be enough to test the waters.

Reviewers: JDevlieghere

Subscribers: lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D83545
---
 .../Python/lldbsuite/test/configuration.py    |   4 -
 lldb/packages/Python/lldbsuite/test/dotest.py |  21 +-
 .../Python/lldbsuite/test/dotest_args.py      |  22 -
 .../test_event/formatter/__init__.py          | 113 +---
 .../lldbsuite/test_event/formatter/xunit.py   | 595 ------------------
 5 files changed, 30 insertions(+), 725 deletions(-)
 delete mode 100644 lldb/packages/Python/lldbsuite/test_event/formatter/xunit.py

diff --git a/lldb/packages/Python/lldbsuite/test/configuration.py b/lldb/packages/Python/lldbsuite/test/configuration.py
index ca27864463002..84de0130f9907 100644
--- a/lldb/packages/Python/lldbsuite/test/configuration.py
+++ b/lldb/packages/Python/lldbsuite/test/configuration.py
@@ -122,10 +122,6 @@
 clang_module_cache_dir = None
 
 # Test results handling globals
-results_filename = None
-results_formatter_name = None
-results_formatter_object = None
-results_formatter_options = None
 test_result = None
 
 # Reproducers
diff --git a/lldb/packages/Python/lldbsuite/test/dotest.py b/lldb/packages/Python/lldbsuite/test/dotest.py
index 8238168d0fb6d..67f227cad7155 100644
--- a/lldb/packages/Python/lldbsuite/test/dotest.py
+++ b/lldb/packages/Python/lldbsuite/test/dotest.py
@@ -408,19 +408,6 @@ def parseOptionsAndInitTestdirs():
     if do_help:
         usage(parser)
 
-    if args.results_file:
-        configuration.results_filename = args.results_file
-
-    if args.results_formatter:
-        configuration.results_formatter_name = args.results_formatter
-    if args.results_formatter_options:
-        configuration.results_formatter_options = args.results_formatter_options
-
-    # Default to using the BasicResultsFormatter if no formatter is specified.
-    if configuration.results_formatter_name is None:
-        configuration.results_formatter_name = (
-            "lldbsuite.test_event.formatter.results_formatter.ResultsFormatter")
-
     # Reproducer arguments
     if args.capture_path and args.replay_path:
         logging.error('Cannot specify both a capture and a replay path.')
@@ -469,16 +456,10 @@ def parseOptionsAndInitTestdirs():
 
 def setupTestResults():
     """Sets up test results-related objects based on arg settings."""
-    # Setup the results formatter configuration.
-    formatter_config = formatter.FormatterConfig()
-    formatter_config.filename = configuration.results_filename
-    formatter_config.formatter_name = configuration.results_formatter_name
-    formatter_config.formatter_options = (
-        configuration.results_formatter_options)
 
     # Create the results formatter.
     formatter_spec = formatter.create_results_formatter(
-        formatter_config)
+            "lldbsuite.test_event.formatter.results_formatter.ResultsFormatter")
     if formatter_spec is not None and formatter_spec.formatter is not None:
         configuration.results_formatter_object = formatter_spec.formatter
 
diff --git a/lldb/packages/Python/lldbsuite/test/dotest_args.py b/lldb/packages/Python/lldbsuite/test/dotest_args.py
index d6f59efdf28b4..05dd523e744ac 100644
--- a/lldb/packages/Python/lldbsuite/test/dotest_args.py
+++ b/lldb/packages/Python/lldbsuite/test/dotest_args.py
@@ -244,28 +244,6 @@ def create_parser():
         help='(Windows only) When LLDB crashes, display the Windows crash dialog.')
     group.set_defaults(disable_crash_dialog=True)
 
-    # Test results support.
-    group = parser.add_argument_group('Test results options')
-    group.add_argument(
-        '--results-file',
-        action='store',
-        help=('Specifies the file where test results will be written '
-              'according to the results-formatter class used'))
-    group.add_argument(
-        '--results-formatter',
-        action='store',
-        help=('Specifies the full package/module/class name used to translate '
-              'test events into some kind of meaningful report, written to '
-              'the designated output results file-like object'))
-    group.add_argument(
-        '--results-formatter-option',
-        '-O',
-        action='append',
-        dest='results_formatter_options',
-        help=('Specify an option to pass to the formatter. '
-              'Use --results-formatter-option="--option1=val1" '
-              'syntax.  Note the "=" is critical, don\'t include whitespace.'))
-
     # Re-run related arguments
     group = parser.add_argument_group('Test Re-run Options')
     group.add_argument(
diff --git a/lldb/packages/Python/lldbsuite/test_event/formatter/__init__.py b/lldb/packages/Python/lldbsuite/test_event/formatter/__init__.py
index 1fe6ecd3ef82f..d6609d353c856 100644
--- a/lldb/packages/Python/lldbsuite/test_event/formatter/__init__.py
+++ b/lldb/packages/Python/lldbsuite/test_event/formatter/__init__.py
@@ -17,17 +17,6 @@
 # LLDB modules
 
 
-# Ignore method count on DTOs.
-# pylint: disable=too-few-public-methods
-class FormatterConfig(object):
-    """Provides formatter configuration info to create_results_formatter()."""
-
-    def __init__(self):
-        self.filename = None
-        self.formatter_name = None
-        self.formatter_options = None
-
-
 # Ignore method count on DTOs.
 # pylint: disable=too-few-public-methods
 class CreatedFormatter(object):
@@ -38,7 +27,7 @@ def __init__(self, formatter, cleanup_func):
         self.cleanup_func = cleanup_func
 
 
-def create_results_formatter(config):
+def create_results_formatter(formatter_name):
     """Sets up a test results formatter.
 
     @param config an instance of FormatterConfig
@@ -47,75 +36,31 @@ def create_results_formatter(config):
     @return an instance of CreatedFormatter.
     """
 
-    default_formatter_name = None
-    results_file_object = None
-    cleanup_func = None
-
-    if config.filename:
-        # Open the results file for writing.
-        if config.filename == 'stdout':
-            results_file_object = sys.stdout
-            cleanup_func = None
-        elif config.filename == 'stderr':
-            results_file_object = sys.stderr
-            cleanup_func = None
-        else:
-            results_file_object = open(config.filename, "w")
-            cleanup_func = results_file_object.close
-        default_formatter_name = (
-            "lldbsuite.test_event.formatter.xunit.XunitFormatter")
-
-    # If we have a results formatter name specified and we didn't specify
-    # a results file, we should use stdout.
-    if config.formatter_name is not None and results_file_object is None:
-        # Use stdout.
-        results_file_object = sys.stdout
-        cleanup_func = None
-
-    if results_file_object:
-        # We care about the formatter.  Choose user-specified or, if
-        # none specified, use the default for the output type.
-        if config.formatter_name:
-            formatter_name = config.formatter_name
-        else:
-            formatter_name = default_formatter_name
-
-        # Create an instance of the class.
-        # First figure out the package/module.
-        components = formatter_name.split(".")
-        module = importlib.import_module(".".join(components[:-1]))
-
-        # Create the class name we need to load.
-        cls = getattr(module, components[-1])
-
-        # Handle formatter options for the results formatter class.
-        formatter_arg_parser = cls.arg_parser()
-        if config.formatter_options and len(config.formatter_options) > 0:
-            command_line_options = config.formatter_options
-        else:
-            command_line_options = []
-
-        formatter_options = formatter_arg_parser.parse_args(
-            command_line_options)
-
-        # Create the TestResultsFormatter given the processed options.
-        results_formatter_object = cls(
-            results_file_object,
-            formatter_options)
-
-        def shutdown_formatter():
-            """Shuts down the formatter when it is no longer needed."""
-            # Tell the formatter to write out anything it may have
-            # been saving until the very end (e.g. xUnit results
-            # can't complete its output until this point).
-            results_formatter_object.send_terminate_as_needed()
-
-            # And now close out the output file-like object.
-            if cleanup_func is not None:
-                cleanup_func()
-
-        return CreatedFormatter(
-            results_formatter_object,
-            shutdown_formatter)
-    else:
-        return None
+    # Create an instance of the class.
+    # First figure out the package/module.
+    components = formatter_name.split(".")
+    module = importlib.import_module(".".join(components[:-1]))
+
+    # Create the class name we need to load.
+    cls = getattr(module, components[-1])
+
+    # Handle formatter options for the results formatter class.
+    formatter_arg_parser = cls.arg_parser()
+    command_line_options = []
+
+    formatter_options = formatter_arg_parser.parse_args(
+        command_line_options)
+
+    # Create the TestResultsFormatter given the processed options.
+    results_formatter_object = cls(sys.stdout, formatter_options)
+
+    def shutdown_formatter():
+        """Shuts down the formatter when it is no longer needed."""
+        # Tell the formatter to write out anything it may have
+        # been saving until the very end (e.g. xUnit results
+        # can't complete its output until this point).
+        results_formatter_object.send_terminate_as_needed()
+
+    return CreatedFormatter(
+        results_formatter_object,
+        shutdown_formatter)
diff --git a/lldb/packages/Python/lldbsuite/test_event/formatter/xunit.py b/lldb/packages/Python/lldbsuite/test_event/formatter/xunit.py
deleted file mode 100644
index e480df59a2f28..0000000000000
--- a/lldb/packages/Python/lldbsuite/test_event/formatter/xunit.py
+++ /dev/null
@@ -1,595 +0,0 @@
-"""
-Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-See https://llvm.org/LICENSE.txt for license information.
-SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-
-Provides an xUnit ResultsFormatter for integrating the LLDB
-test suite with the Jenkins xUnit aggregator and other xUnit-compliant
-test output processors.
-"""
-from __future__ import absolute_import
-from __future__ import print_function
-
-# System modules
-import re
-import sys
-import xml.sax.saxutils
-
-# Third-party modules
-import six
-
-# Local modules
-from ..event_builder import EventBuilder
-from ..build_exception import BuildError
-from .results_formatter import ResultsFormatter
-
-
-class XunitFormatter(ResultsFormatter):
-    """Provides xUnit-style formatted output.
-    """
-
-    # Result mapping arguments
-    RM_IGNORE = 'ignore'
-    RM_SUCCESS = 'success'
-    RM_FAILURE = 'failure'
-    RM_PASSTHRU = 'passthru'
-
-    @staticmethod
-    def _build_illegal_xml_regex():
-        """Constructs a regex to match all illegal xml characters.
-
-        Expects to be used against a unicode string."""
-        # Construct the range pairs of invalid unicode characters.
-        illegal_chars_u = [
-            (0x00, 0x08), (0x0B, 0x0C), (0x0E, 0x1F), (0x7F, 0x84),
-            (0x86, 0x9F), (0xFDD0, 0xFDDF), (0xFFFE, 0xFFFF)]
-
-        # For wide builds, we have more.
-        if sys.maxunicode >= 0x10000:
-            illegal_chars_u.extend(
-                [(0x1FFFE, 0x1FFFF), (0x2FFFE, 0x2FFFF), (0x3FFFE, 0x3FFFF),
-                 (0x4FFFE, 0x4FFFF), (0x5FFFE, 0x5FFFF), (0x6FFFE, 0x6FFFF),
-                 (0x7FFFE, 0x7FFFF), (0x8FFFE, 0x8FFFF), (0x9FFFE, 0x9FFFF),
-                 (0xAFFFE, 0xAFFFF), (0xBFFFE, 0xBFFFF), (0xCFFFE, 0xCFFFF),
-                 (0xDFFFE, 0xDFFFF), (0xEFFFE, 0xEFFFF), (0xFFFFE, 0xFFFFF),
-                 (0x10FFFE, 0x10FFFF)])
-
-        # Build up an array of range expressions.
-        illegal_ranges = [
-            "%s-%s" % (six.unichr(low), six.unichr(high))
-            for (low, high) in illegal_chars_u]
-
-        # Compile the regex
-        return re.compile(six.u('[%s]') % six.u('').join(illegal_ranges))
-
-    @staticmethod
-    def _quote_attribute(text):
-        """Returns the given text in a manner safe for usage in an XML attribute.
-
-        @param text the text that should appear within an XML attribute.
-        @return the attribute-escaped version of the input text.
-        """
-        return xml.sax.saxutils.quoteattr(text)
-
-    def _replace_invalid_xml(self, str_or_unicode):
-        """Replaces invalid XML characters with a '?'.
-
-        @param str_or_unicode a string to replace invalid XML
-        characters within.  Can be unicode or not.  If not unicode,
-        assumes it is a byte string in utf-8 encoding.
-
-        @returns a utf-8-encoded byte string with invalid
-        XML replaced with '?'.
-        """
-        # Get the content into unicode
-        if isinstance(str_or_unicode, str):
-            # If we hit decoding errors due to data corruption, replace the
-            # invalid characters with U+FFFD REPLACEMENT CHARACTER.
-            unicode_content = str_or_unicode.decode('utf-8', 'replace')
-        else:
-            unicode_content = str_or_unicode
-        return self.invalid_xml_re.sub(
-            six.u('?'), unicode_content).encode('utf-8')
-
-    @classmethod
-    def arg_parser(cls):
-        """@return arg parser used to parse formatter-specific options."""
-        parser = super(XunitFormatter, cls).arg_parser()
-
-        # These are valid choices for results mapping.
-        results_mapping_choices = [
-            XunitFormatter.RM_IGNORE,
-            XunitFormatter.RM_SUCCESS,
-            XunitFormatter.RM_FAILURE,
-            XunitFormatter.RM_PASSTHRU]
-        parser.add_argument(
-            "--assert-on-unknown-events",
-            action="store_true",
-            help=('cause unknown test events to generate '
-                  'a python assert.  Default is to ignore.'))
-        parser.add_argument(
-            "--ignore-skip-name",
-            "-n",
-            metavar='PATTERN',
-            action="append",
-            dest='ignore_skip_name_patterns',
-            help=('a python regex pattern, where '
-                  'any skipped test with a test method name where regex '
-                  'matches (via search) will be ignored for xUnit test '
-                  'result purposes.  Can be specified multiple times.'))
-        parser.add_argument(
-            "--ignore-skip-reason",
-            "-r",
-            metavar='PATTERN',
-            action="append",
-            dest='ignore_skip_reason_patterns',
-            help=('a python regex pattern, where '
-                  'any skipped test with a skip reason where the regex '
-                  'matches (via search) will be ignored for xUnit test '
-                  'result purposes.  Can be specified multiple times.'))
-        parser.add_argument(
-            "--xpass", action="store", choices=results_mapping_choices,
-            default=XunitFormatter.RM_FAILURE,
-            help=('specify mapping from unexpected success to jUnit/xUnit '
-                  'result type'))
-        parser.add_argument(
-            "--xfail", action="store", choices=results_mapping_choices,
-            default=XunitFormatter.RM_IGNORE,
-            help=('specify mapping from expected failure to jUnit/xUnit '
-                  'result type'))
-        return parser
-
-    @staticmethod
-    def _build_regex_list_from_patterns(patterns):
-        """Builds a list of compiled regular expressions from option value.
-
-        @param patterns contains a list of regular expression
-        patterns.
-
-        @return list of compiled regular expressions, empty if no
-        patterns provided.
-        """
-        regex_list = []
-        if patterns is not None:
-            for pattern in patterns:
-                regex_list.append(re.compile(pattern))
-        return regex_list
-
-    def __init__(self, out_file, options):
-        """Initializes the XunitFormatter instance.
-        @param out_file file-like object where formatted output is written.
-        @param options specifies a dictionary of options for the
-        formatter.
-        """
-        # Initialize the parent
-        super(XunitFormatter, self).__init__(out_file, options)
-        self.text_encoding = "UTF-8"
-        self.invalid_xml_re = XunitFormatter._build_illegal_xml_regex()
-        self.total_test_count = 0
-        self.ignore_skip_name_regexes = (
-            XunitFormatter._build_regex_list_from_patterns(
-                options.ignore_skip_name_patterns))
-        self.ignore_skip_reason_regexes = (
-            XunitFormatter._build_regex_list_from_patterns(
-                options.ignore_skip_reason_patterns))
-
-        self.elements = {
-            "successes": [],
-            "errors": [],
-            "failures": [],
-            "skips": [],
-            "unexpected_successes": [],
-            "expected_failures": [],
-            "all": []
-        }
-
-        self.status_handlers = {
-            EventBuilder.STATUS_SUCCESS: self._handle_success,
-            EventBuilder.STATUS_FAILURE: self._handle_failure,
-            EventBuilder.STATUS_ERROR: self._handle_error,
-            EventBuilder.STATUS_SKIP: self._handle_skip,
-            EventBuilder.STATUS_EXPECTED_FAILURE:
-                self._handle_expected_failure,
-            EventBuilder.STATUS_EXPECTED_TIMEOUT:
-                self._handle_expected_timeout,
-            EventBuilder.STATUS_UNEXPECTED_SUCCESS:
-                self._handle_unexpected_success,
-            EventBuilder.STATUS_EXCEPTIONAL_EXIT:
-                self._handle_exceptional_exit,
-            EventBuilder.STATUS_TIMEOUT:
-                self._handle_timeout
-        }
-
-    RESULT_TYPES = {
-        EventBuilder.TYPE_TEST_RESULT,
-        EventBuilder.TYPE_JOB_RESULT}
-
-    def handle_event(self, test_event):
-        super(XunitFormatter, self).handle_event(test_event)
-
-        event_type = test_event["event"]
-        if event_type is None:
-            return
-
-        if event_type == "terminate":
-            # Process all the final result events into their
-            # XML counterparts.
-            for result_event in self.result_events.values():
-                self._process_test_result(result_event)
-            self._finish_output()
-        else:
-            # This is an unknown event.
-            if self.options.assert_on_unknown_events:
-                raise Exception("unknown event type {} from {}\n".format(
-                    event_type, test_event))
-
-    def _handle_success(self, test_event):
-        """Handles a test success.
-        @param test_event the test event to handle.
-        """
-        result = self._common_add_testcase_entry(test_event)
-        with self.lock:
-            self.elements["successes"].append(result)
-
-    def _handle_failure(self, test_event):
-        """Handles a test failure.
-        @param test_event the test event to handle.
-        """
-        message = self._replace_invalid_xml(test_event["issue_message"])
-        backtrace = self._replace_invalid_xml(
-            "".join(test_event.get("issue_backtrace", [])))
-
-        result = self._common_add_testcase_entry(
-            test_event,
-            inner_content=(
-                '<failure type={} message={}><![CDATA[{}]]></failure>'.format(
-                    XunitFormatter._quote_attribute(test_event["issue_class"]),
-                    XunitFormatter._quote_attribute(message),
-                    backtrace)
-            ))
-        with self.lock:
-            self.elements["failures"].append(result)
-
-    def _handle_error_build(self, test_event):
-        """Handles a test error.
-        @param test_event the test event to handle.
-        """
-        message = self._replace_invalid_xml(test_event["issue_message"])
-        build_issue_description = self._replace_invalid_xml(
-            BuildError.format_build_error(
-                test_event.get("build_command", "<None>"),
-                test_event.get("build_error", "<None>")))
-
-        result = self._common_add_testcase_entry(
-            test_event,
-            inner_content=(
-                '<error type={} message={}><![CDATA[{}]]></error>'.format(
-                    XunitFormatter._quote_attribute(test_event["issue_class"]),
-                    XunitFormatter._quote_attribute(message),
-                    build_issue_description)
-            ))
-        with self.lock:
-            self.elements["errors"].append(result)
-
-    def _handle_error_standard(self, test_event):
-        """Handles a test error.
-        @param test_event the test event to handle.
-        """
-        message = self._replace_invalid_xml(test_event["issue_message"])
-        backtrace = self._replace_invalid_xml(
-            "".join(test_event.get("issue_backtrace", [])))
-
-        result = self._common_add_testcase_entry(
-            test_event,
-            inner_content=(
-                '<error type={} message={}><![CDATA[{}]]></error>'.format(
-                    XunitFormatter._quote_attribute(test_event["issue_class"]),
-                    XunitFormatter._quote_attribute(message),
-                    backtrace)
-            ))
-        with self.lock:
-            self.elements["errors"].append(result)
-
-    def _handle_error(self, test_event):
-        if test_event.get("issue_phase", None) == "build":
-            self._handle_error_build(test_event)
-        else:
-            self._handle_error_standard(test_event)
-
-    def _handle_exceptional_exit(self, test_event):
-        """Handles an exceptional exit.
-        @param test_event the test method or job result event to handle.
-        """
-        if "test_name" in test_event:
-            name = test_event["test_name"]
-        else:
-            name = test_event.get("test_filename", "<unknown test/filename>")
-
-        message_text = "ERROR: {} ({}): {}".format(
-            test_event.get("exception_code", 0),
-            test_event.get("exception_description", ""),
-            name)
-        message = self._replace_invalid_xml(message_text)
-
-        result = self._common_add_testcase_entry(
-            test_event,
-            inner_content=(
-                '<error type={} message={}></error>'.format(
-                    "exceptional_exit",
-                    XunitFormatter._quote_attribute(message))
-            ))
-        with self.lock:
-            self.elements["errors"].append(result)
-
-    def _handle_timeout(self, test_event):
-        """Handles a test method or job timeout.
-        @param test_event the test method or job result event to handle.
-        """
-        if "test_name" in test_event:
-            name = test_event["test_name"]
-        else:
-            name = test_event.get("test_filename", "<unknown test/filename>")
-
-        message_text = "TIMEOUT: {}".format(name)
-        message = self._replace_invalid_xml(message_text)
-
-        result = self._common_add_testcase_entry(
-            test_event,
-            inner_content=(
-                '<error type={} message={}></error>'.format(
-                    XunitFormatter._quote_attribute("timeout"),
-                    XunitFormatter._quote_attribute(message))
-            ))
-        with self.lock:
-            self.elements["errors"].append(result)
-
-    @staticmethod
-    def _ignore_based_on_regex_list(test_event, test_key, regex_list):
-        """Returns whether to ignore a test event based on patterns.
-
-        @param test_event the test event dictionary to check.
-        @param test_key the key within the dictionary to check.
-        @param regex_list a list of zero or more regexes.  May contain
-        zero or more compiled regexes.
-
-        @return True if any o the regex list match based on the
-        re.search() method; false otherwise.
-        """
-        for regex in regex_list:
-            match = regex.search(test_event.get(test_key, ''))
-            if match:
-                return True
-        return False
-
-    def _handle_skip(self, test_event):
-        """Handles a skipped test.
-        @param test_event the test event to handle.
-        """
-
-        # Are we ignoring this test based on test name?
-        if XunitFormatter._ignore_based_on_regex_list(
-                test_event, 'test_name', self.ignore_skip_name_regexes):
-            return
-
-        # Are we ignoring this test based on skip reason?
-        if XunitFormatter._ignore_based_on_regex_list(
-                test_event, 'skip_reason', self.ignore_skip_reason_regexes):
-            return
-
-        # We're not ignoring this test.  Process the skip.
-        reason = self._replace_invalid_xml(test_event.get("skip_reason", ""))
-        result = self._common_add_testcase_entry(
-            test_event,
-            inner_content='<skipped message={} />'.format(
-                XunitFormatter._quote_attribute(reason)))
-        with self.lock:
-            self.elements["skips"].append(result)
-
-    def _handle_expected_failure(self, test_event):
-        """Handles a test that failed as expected.
-        @param test_event the test event to handle.
-        """
-        if self.options.xfail == XunitFormatter.RM_PASSTHRU:
-            # This is not a natively-supported junit/xunit
-            # testcase mode, so it might fail a validating
-            # test results viewer.
-            if "bugnumber" in test_event:
-                bug_id_attribute = 'bug-id={} '.format(
-                    XunitFormatter._quote_attribute(test_event["bugnumber"]))
-            else:
-                bug_id_attribute = ''
-
-            result = self._common_add_testcase_entry(
-                test_event,
-                inner_content=(
-                    '<expected-failure {}type={} message={} />'.format(
-                        bug_id_attribute,
-                        XunitFormatter._quote_attribute(
-                            test_event["issue_class"]),
-                        XunitFormatter._quote_attribute(
-                            test_event["issue_message"]))
-                ))
-            with self.lock:
-                self.elements["expected_failures"].append(result)
-        elif self.options.xfail == XunitFormatter.RM_SUCCESS:
-            result = self._common_add_testcase_entry(test_event)
-            with self.lock:
-                self.elements["successes"].append(result)
-        elif self.options.xfail == XunitFormatter.RM_FAILURE:
-            result = self._common_add_testcase_entry(
-                test_event,
-                inner_content='<failure type={} message={} />'.format(
-                    XunitFormatter._quote_attribute(test_event["issue_class"]),
-                    XunitFormatter._quote_attribute(
-                        test_event["issue_message"])))
-            with self.lock:
-                self.elements["failures"].append(result)
-        elif self.options.xfail == XunitFormatter.RM_IGNORE:
-            pass
-        else:
-            raise Exception(
-                "unknown xfail option: {}".format(self.options.xfail))
-
-    @staticmethod
-    def _handle_expected_timeout(test_event):
-        """Handles expected_timeout.
-        @param test_event the test event to handle.
-        """
-        # We don't do anything with expected timeouts, not even report.
-        pass
-
-    def _handle_unexpected_success(self, test_event):
-        """Handles a test that passed but was expected to fail.
-        @param test_event the test event to handle.
-        """
-        if self.options.xpass == XunitFormatter.RM_PASSTHRU:
-            # This is not a natively-supported junit/xunit
-            # testcase mode, so it might fail a validating
-            # test results viewer.
-            result = self._common_add_testcase_entry(
-                test_event,
-                inner_content="<unexpected-success />")
-            with self.lock:
-                self.elements["unexpected_successes"].append(result)
-        elif self.options.xpass == XunitFormatter.RM_SUCCESS:
-            # Treat the xpass as a success.
-            result = self._common_add_testcase_entry(test_event)
-            with self.lock:
-                self.elements["successes"].append(result)
-        elif self.options.xpass == XunitFormatter.RM_FAILURE:
-            # Treat the xpass as a failure.
-            if "bugnumber" in test_event:
-                message = "unexpected success (bug_id:{})".format(
-                    test_event["bugnumber"])
-            else:
-                message = "unexpected success (bug_id:none)"
-            result = self._common_add_testcase_entry(
-                test_event,
-                inner_content='<failure type={} message={} />'.format(
-                    XunitFormatter._quote_attribute("unexpected_success"),
-                    XunitFormatter._quote_attribute(message)))
-            with self.lock:
-                self.elements["failures"].append(result)
-        elif self.options.xpass == XunitFormatter.RM_IGNORE:
-            # Ignore the xpass result as far as xUnit reporting goes.
-            pass
-        else:
-            raise Exception("unknown xpass option: {}".format(
-                self.options.xpass))
-
-    def _process_test_result(self, test_event):
-        """Processes the test_event known to be a test result.
-
-        This categorizes the event appropriately and stores the data needed
-        to generate the final xUnit report.  This method skips events that
-        cannot be represented in xUnit output.
-        """
-        if "status" not in test_event:
-            raise Exception("test event dictionary missing 'status' key")
-
-        status = test_event["status"]
-        if status not in self.status_handlers:
-            raise Exception("test event status '{}' unsupported".format(
-                status))
-
-        # Call the status handler for the test result.
-        self.status_handlers[status](test_event)
-
-    def _common_add_testcase_entry(self, test_event, inner_content=None):
-        """Registers a testcase result, and returns the text created.
-
-        The caller is expected to manage failure/skip/success counts
-        in some kind of appropriate way.  This call simply constructs
-        the XML and appends the returned result to the self.all_results
-        list.
-
-        @param test_event the test event dictionary.
-
-        @param inner_content if specified, gets included in the <testcase>
-        inner section, at the point before stdout and stderr would be
-        included.  This is where a <failure/>, <skipped/>, <error/>, etc.
-        could go.
-
-        @return the text of the xml testcase element.
-        """
-
-        # Get elapsed time.
-        test_class = test_event.get("test_class", "<no_class>")
-        test_name = test_event.get("test_name", "<no_test_method>")
-        event_time = test_event["event_time"]
-        time_taken = self.elapsed_time_for_test(
-            test_class, test_name, event_time)
-
-        # Plumb in stdout/stderr once we shift over to only test results.
-        test_stdout = ''
-        test_stderr = ''
-
-        # Formulate the output xml.
-        if not inner_content:
-            inner_content = ""
-        result = (
-            '<testcase classname="{}" name="{}" time="{:.3f}">'
-            '{}{}{}</testcase>'.format(
-                test_class,
-                test_name,
-                time_taken,
-                inner_content,
-                test_stdout,
-                test_stderr))
-
-        # Save the result, update total test count.
-        with self.lock:
-            self.total_test_count += 1
-            self.elements["all"].append(result)
-
-        return result
-
-    def _finish_output_no_lock(self):
-        """Flushes out the report of test executions to form valid xml output.
-
-        xUnit output is in XML.  The reporting system cannot complete the
-        formatting of the output without knowing when there is no more input.
-        This call addresses notification of the completed test run and thus is
-        when we can finish off the report output.
-        """
-
-        # Figure out the counts line for the testsuite.  If we have
-        # been counting either unexpected successes or expected
-        # failures, we'll output those in the counts, at the risk of
-        # being invalidated by a validating test results viewer.
-        # These aren't counted by default so they won't show up unless
-        # the user specified a formatter option to include them.
-        xfail_count = len(self.elements["expected_failures"])
-        xpass_count = len(self.elements["unexpected_successes"])
-        if xfail_count > 0 or xpass_count > 0:
-            extra_testsuite_attributes = (
-                ' expected-failures="{}"'
-                ' unexpected-successes="{}"'.format(xfail_count, xpass_count))
-        else:
-            extra_testsuite_attributes = ""
-
-        # Output the header.
-        self.out_file.write(
-            '<?xml version="1.0" encoding="{}"?>\n'
-            '<testsuites>'
-            '<testsuite name="{}" tests="{}" errors="{}" failures="{}" '
-            'skip="{}"{}>\n'.format(
-                self.text_encoding,
-                "LLDB test suite",
-                self.total_test_count,
-                len(self.elements["errors"]),
-                len(self.elements["failures"]),
-                len(self.elements["skips"]),
-                extra_testsuite_attributes))
-
-        # Output each of the test result entries.
-        for result in self.elements["all"]:
-            self.out_file.write(result + '\n')
-
-        # Close off the test suite.
-        self.out_file.write('</testsuite></testsuites>\n')
-
-    def _finish_output(self):
-        """Finish writing output as all incoming events have arrived."""
-        with self.lock:
-            self._finish_output_no_lock()

From 4cdea5faf980951bf3c4cb4ade9850d27c32af16 Mon Sep 17 00:00:00 2001
From: Eric Astor <epastor@google.com>
Date: Mon, 13 Jul 2020 10:33:15 -0400
Subject: [PATCH 103/771] [ms] [llvm-ml] Improve MASM STRUCT field accessor
 support

Summary:
Adds support for several accessors:
- `[<identifier>.<struct name>].<field>`
- `[<identifier>.<struct name>.<field>].<subfield>` (where `field` has already-defined STRUCT type)
- `[<variable>.<field>].<subfield>` (where `field` has already-defined STRUCT type)

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D83344
---
 llvm/include/llvm/MC/MCParser/MCAsmParser.h   |  8 ++-
 llvm/lib/MC/MCParser/MasmParser.cpp           | 55 ++++++++++++++-----
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 33 ++++++-----
 llvm/test/tools/llvm-ml/struct.test           | 50 ++++++++++++++++-
 4 files changed, 113 insertions(+), 33 deletions(-)

diff --git a/llvm/include/llvm/MC/MCParser/MCAsmParser.h b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
index 2040089759594..a68066e0f50b5 100644
--- a/llvm/include/llvm/MC/MCParser/MCAsmParser.h
+++ b/llvm/include/llvm/MC/MCParser/MCAsmParser.h
@@ -170,8 +170,12 @@ class MCAsmParser {
 
   virtual bool isParsingMasm() const { return false; }
 
-  virtual bool LookUpFieldOffset(StringRef Base, StringRef Member,
-                                 unsigned &Offset) {
+  virtual bool lookUpField(StringRef Name, StringRef &Type,
+                           unsigned &Offset) const {
+    return true;
+  }
+  virtual bool lookUpField(StringRef Base, StringRef Member, StringRef &Type,
+                           unsigned &Offset) const {
     return true;
   }
 
diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 3dbd00aae47a2..d7d0508cabff7 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -490,8 +490,10 @@ class MasmParser : public MCAsmParser {
 
   bool isParsingMasm() const override { return true; }
 
-  bool LookUpFieldOffset(StringRef Base, StringRef Member,
-                         unsigned &Offset) override;
+  bool lookUpField(StringRef Name, StringRef &Type,
+                   unsigned &Offset) const override;
+  bool lookUpField(StringRef Base, StringRef Member, StringRef &Type,
+                   unsigned &Offset) const override;
 
   bool parseMSInlineAsm(void *AsmLoc, std::string &AsmString,
                         unsigned &NumOutputs, unsigned &NumInputs,
@@ -561,8 +563,8 @@ class MasmParser : public MCAsmParser {
   }
   static void DiagHandler(const SMDiagnostic &Diag, void *Context);
 
-  bool LookUpFieldOffset(const StructInfo &Structure, StringRef Member,
-                         unsigned &Offset);
+  bool lookUpField(const StructInfo &Structure, StringRef Member,
+                   StringRef &Type, unsigned &Offset) const;
 
   /// Should we emit DWARF describing this assembler source?  (Returns false if
   /// the source has .file directives, which means we don't want to generate
@@ -1397,12 +1399,13 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
     }
 
     // Find the field offset if used.
+    StringRef Type;
     unsigned Offset = 0;
     Split = SymbolName.split('.');
     if (!Split.second.empty()) {
       SymbolName = Split.first;
       if (Structs.count(SymbolName.lower()) &&
-          !LookUpFieldOffset(SymbolName, Split.second, Offset)) {
+          !lookUpField(SymbolName, Split.second, Type, Offset)) {
         // This is actually a reference to a field offset.
         Res = MCConstantExpr::create(Offset, getContext());
         return false;
@@ -1410,10 +1413,10 @@ bool MasmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
 
       auto TypeIt = KnownType.find(SymbolName);
       if (TypeIt == KnownType.end() ||
-          LookUpFieldOffset(*TypeIt->second, Split.second, Offset)) {
+          lookUpField(*TypeIt->second, Split.second, Type, Offset)) {
         std::pair<StringRef, StringRef> BaseMember = Split.second.split('.');
         StringRef Base = BaseMember.first, Member = BaseMember.second;
-        LookUpFieldOffset(Base, Member, Offset);
+        lookUpField(Base, Member, Type, Offset);
       }
     }
 
@@ -6519,34 +6522,56 @@ static int rewritesSort(const AsmRewrite *AsmRewriteA,
   llvm_unreachable("Unstable rewrite sort.");
 }
 
-bool MasmParser::LookUpFieldOffset(StringRef Base, StringRef Member,
-                                   unsigned &Offset) {
+bool MasmParser::lookUpField(StringRef Name, StringRef &Type,
+                             unsigned &Offset) const {
+  const std::pair<StringRef, StringRef> BaseMember = Name.split('.');
+  const StringRef Base = BaseMember.first, Member = BaseMember.second;
+  return lookUpField(Base, Member, Type, Offset);
+}
+
+bool MasmParser::lookUpField(StringRef Base, StringRef Member, StringRef &Type,
+                             unsigned &Offset) const {
   if (Base.empty())
     return true;
 
+  unsigned BaseOffset = 0;
+  if (Base.contains('.') && !lookUpField(Base, Type, BaseOffset))
+    Base = Type;
+
   auto TypeIt = KnownType.find(Base);
   if (TypeIt != KnownType.end())
-    return LookUpFieldOffset(*TypeIt->second, Member, Offset);
+    return lookUpField(*TypeIt->second, Member, Type, Offset);
 
   auto StructIt = Structs.find(Base.lower());
   if (StructIt != Structs.end())
-    return LookUpFieldOffset(StructIt->second, Member, Offset);
+    return lookUpField(StructIt->second, Member, Type, Offset);
 
   return true;
 }
 
-bool MasmParser::LookUpFieldOffset(const StructInfo &Structure,
-                                   StringRef Member, unsigned &Offset) {
+bool MasmParser::lookUpField(const StructInfo &Structure, StringRef Member,
+                             StringRef &Type, unsigned &Offset) const {
+  if (Member.empty()) {
+    Type = Structure.Name;
+    return false;
+  }
+
   std::pair<StringRef, StringRef> Split = Member.split('.');
   const StringRef FieldName = Split.first, FieldMember = Split.second;
 
+  auto StructIt = Structs.find(FieldName.lower());
+  if (StructIt != Structs.end())
+    return lookUpField(StructIt->second, FieldMember, Type, Offset);
+
   auto FieldIt = Structure.FieldsByName.find(FieldName.lower());
   if (FieldIt == Structure.FieldsByName.end())
     return true;
 
   const FieldInfo &Field = Structure.Fields[FieldIt->second];
   if (FieldMember.empty()) {
-    Offset = Field.Offset;
+    Offset += Field.Offset;
+    if (Field.Contents.FT == FT_STRUCT)
+      Type = Field.Contents.StructInfo.Structure.Name;
     return false;
   }
 
@@ -6554,7 +6579,7 @@ bool MasmParser::LookUpFieldOffset(const StructInfo &Structure,
     return true;
   const StructFieldInfo &StructInfo = Field.Contents.StructInfo;
 
-  bool Result = LookUpFieldOffset(StructInfo.Structure, FieldMember, Offset);
+  bool Result = lookUpField(StructInfo.Structure, FieldMember, Type, Offset);
   if (Result)
     return true;
 
diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index 0573d4eec0599..fe09b2952f0e2 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -358,6 +358,7 @@ class X86AsmParser : public MCTargetAsmParser {
     bool MemExpr;
     bool OffsetOperator;
     SMLoc OffsetOperatorLoc;
+    StringRef CurType;
 
     bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
       if (Sym) {
@@ -385,6 +386,7 @@ class X86AsmParser : public MCTargetAsmParser {
     unsigned getScale() { return Scale; }
     const MCExpr *getSym() { return Sym; }
     StringRef getSymName() { return SymName; }
+    StringRef getType() { return CurType; }
     int64_t getImm() { return Imm + IC.execute(); }
     bool isValidEndState() {
       return State == IES_RBRAC || State == IES_INTEGER;
@@ -846,6 +848,7 @@ class X86AsmParser : public MCTargetAsmParser {
       }
       return false;
     }
+    void setType(StringRef Type) { CurType = Type; }
   };
 
   bool Error(SMLoc L, const Twine &Msg, SMRange Range = None,
@@ -1641,27 +1644,25 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
           break;
         }
         if (Parser.isParsingMasm()) {
-          const std::pair<StringRef, StringRef> RegField =
+          const std::pair<StringRef, StringRef> IDField =
               Tok.getString().split('.');
-          const StringRef RegName = RegField.first, Field = RegField.second;
-          SMLoc RegEndLoc =
-              SMLoc::getFromPointer(RegName.data() + RegName.size());
+          const StringRef ID = IDField.first, Field = IDField.second;
+          SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
           if (!Field.empty() &&
-              !MatchRegisterByName(Reg, RegName, IdentLoc, RegEndLoc)) {
+              !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
             if (SM.onRegister(Reg, ErrMsg))
               return Error(IdentLoc, ErrMsg);
 
+            StringRef Type;
+            unsigned Offset = 0;
             SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
-            const std::pair<StringRef, StringRef> BaseMember = Field.split('.');
-            const StringRef Base = BaseMember.first, Member = BaseMember.second;
-
-            unsigned Offset;
-            if (Parser.LookUpFieldOffset(Base, Member, Offset))
+            if (Parser.lookUpField(Field, Type, Offset))
               return Error(FieldStartLoc, "unknown offset");
             else if (SM.onPlus(ErrMsg))
               return Error(getTok().getLoc(), ErrMsg);
             else if (SM.onInteger(Offset, ErrMsg))
               return Error(IdentLoc, ErrMsg);
+            SM.setType(Type);
 
             End = consumeToken();
             break;
@@ -1915,9 +1916,11 @@ X86AsmParser::ParseRoundingModeOp(SMLoc Start) {
 }
 
 /// Parse the '.' operator.
-bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End) {
+bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
+                                         SMLoc &End) {
   const AsmToken &Tok = getTok();
-  unsigned Offset;
+  StringRef Type;
+  unsigned Offset = 0;
 
   // Drop the optional '.'.
   StringRef DotDispStr = Tok.getString();
@@ -1933,8 +1936,9 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End)
              Tok.is(AsmToken::Identifier)) {
     const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
     const StringRef Base = BaseMember.first, Member = BaseMember.second;
-    if (getParser().LookUpFieldOffset(SM.getSymName(), DotDispStr, Offset) &&
-        getParser().LookUpFieldOffset(Base, Member, Offset) &&
+    if (getParser().lookUpField(SM.getType(), DotDispStr, Type, Offset) &&
+        getParser().lookUpField(SM.getSymName(), DotDispStr, Type, Offset) &&
+        getParser().lookUpField(DotDispStr, Type, Offset) &&
         (!SemaCallback ||
          SemaCallback->LookupInlineAsmField(Base, Member, Offset)))
       return Error(Tok.getLoc(), "Unable to lookup field reference!");
@@ -1947,6 +1951,7 @@ bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End)
   while (Tok.getLoc().getPointer() < DotExprEndLoc)
     Lex();
   SM.addImm(Offset);
+  SM.setType(Type);
   return false;
 }
 
diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test
index 0e60d24494555..ecd89a140371b 100644
--- a/llvm/test/tools/llvm-ml/struct.test
+++ b/llvm/test/tools/llvm-ml/struct.test
@@ -85,13 +85,11 @@ t3:
 mov eax, t2.f.h
 mov eax, [t2].f.h
 mov eax, [t2.f.h]
-mov eax, t2.FOOBAR.f.h
 
 ; CHECK: t3:
 ; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
 ; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
 ; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
-; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
 
 t4:
 mov eax, j.FOOBAR.f.h
@@ -101,4 +99,52 @@ mov eax, j.baz.b
 ; CHECK-NEXT: mov eax, dword ptr [rip + j+12]
 ; CHECK-NEXT: mov eax, dword ptr [rip + j+1]
 
+t5:
+mov eax, [ebx].FOOBAR.f.h
+mov eax, [ebx.FOOBAR].f.h
+mov eax, [ebx.FOOBAR.f.h]
+
+; CHECK: t5:
+; CHECK-NEXT: mov eax, dword ptr [ebx + 12]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 12]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 12]
+
+t6:
+mov eax, t2.FOOBAR.f.h
+mov eax, [t2].FOOBAR.f.h
+mov eax, [t2.FOOBAR].f.h
+mov eax, [t2.FOOBAR.f.h]
+
+; CHECK: t6:
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+12]
+
+t7:
+mov eax, [ebx].FOOBAR.e.b
+mov eax, [ebx.FOOBAR].e.b
+mov eax, [ebx.FOOBAR.e].b
+mov eax, [ebx.FOOBAR.e.b]
+
+; CHECK: t7:
+; CHECK-NEXT: mov eax, dword ptr [ebx + 9]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 9]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 9]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 9]
+
+t8:
+mov eax, t2.FOOBAR.e.b
+mov eax, [t2].FOOBAR.e.b
+mov eax, [t2.FOOBAR].e.b
+mov eax, [t2.FOOBAR.e].b
+mov eax, [t2.FOOBAR.e.b]
+
+; CHECK: t8:
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+9]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+9]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+9]
+; CHECK-NEXT: mov eax, dword ptr [rip + (t2+8)+1]
+; CHECK-NEXT: mov eax, dword ptr [rip + t2+9]
+
 END

From 7f85e9808255f3f6c2dda81d834f9de1feb827c6 Mon Sep 17 00:00:00 2001
From: Eric Astor <epastor@google.com>
Date: Mon, 13 Jul 2020 10:36:30 -0400
Subject: [PATCH 104/771] [ms] [llvm-ml] Fix MASM support for nested unnamed
 STRUCTs and UNIONs

Summary: Fix MASM support for nested unnamed STRUCTs and UNIONs

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D83345
---
 llvm/lib/MC/MCParser/MasmParser.cpp | 67 ++++++++++++++++++-----------
 llvm/test/tools/llvm-ml/struct.test | 29 +++++++++++++
 2 files changed, 71 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index d7d0508cabff7..58c22b2ccef26 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -4084,11 +4084,8 @@ bool MasmParser::parseDirectiveEnds(StringRef Name, SMLoc NameLoc) {
     return Error(NameLoc, "mismatched name in ENDS directive; expected '" +
                               StructInProgress.back().Name + "'");
   StructInfo Structure = StructInProgress.pop_back_val();
-  if (Structure.Size % Structure.Alignment != 0) {
-    // Pad to make the structure's size divisible by its alignment.
-    Structure.Size +=
-        Structure.Alignment - (Structure.Size % Structure.Alignment);
-  }
+  // Pad to make the structure's size divisible by its alignment.
+  Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
   Structs[Name.lower()] = Structure;
 
   if (parseToken(AsmToken::EndOfStatement))
@@ -4107,29 +4104,49 @@ bool MasmParser::parseDirectiveNestedEnds() {
     return addErrorSuffix(" in nested ENDS directive");
 
   StructInfo Structure = StructInProgress.pop_back_val();
-  if (Structure.Size % Structure.Alignment != 0) {
-    // Pad to make the structure's size divisible by its alignment.
-    Structure.Size +=
-        Structure.Alignment - (Structure.Size % Structure.Alignment);
-  }
-  StructInfo &ParentStruct = StructInProgress.back();
+  // Pad to make the structure's size divisible by its alignment.
+  Structure.Size = llvm::alignTo(Structure.Size, Structure.Alignment);
 
-  FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT);
-  StructFieldInfo &StructInfo = Field.Contents.StructInfo;
-  Field.Type = Structure.Size;
-  Field.LengthOf = 1;
-  Field.SizeOf = Structure.Size;
+  StructInfo &ParentStruct = StructInProgress.back();
+  if (Structure.Name.empty()) {
+    const size_t OldFields = ParentStruct.Fields.size();
+    ParentStruct.Fields.insert(
+        ParentStruct.Fields.end(),
+        std::make_move_iterator(Structure.Fields.begin()),
+        std::make_move_iterator(Structure.Fields.end()));
+    for (const auto &FieldByName : Structure.FieldsByName) {
+      ParentStruct.FieldsByName[FieldByName.getKey()] =
+          FieldByName.getValue() + OldFields;
+    }
+    if (!ParentStruct.IsUnion) {
+      for (auto FieldIter = ParentStruct.Fields.begin() + OldFields;
+           FieldIter != ParentStruct.Fields.end(); ++FieldIter) {
+        FieldIter->Offset += ParentStruct.Size;
+      }
+    }
 
-  if (ParentStruct.IsUnion)
-    ParentStruct.Size = std::max(ParentStruct.Size, Field.SizeOf);
-  else
-    ParentStruct.Size += Field.SizeOf;
+    if (ParentStruct.IsUnion)
+      ParentStruct.Size = std::max(ParentStruct.Size, Structure.Size);
+    else
+      ParentStruct.Size += Structure.Size;
+  } else {
+    FieldInfo &Field = ParentStruct.addField(Structure.Name, FT_STRUCT);
+    StructFieldInfo &StructInfo = Field.Contents.StructInfo;
+    Field.Type = Structure.Size;
+    Field.LengthOf = 1;
+    Field.SizeOf = Structure.Size;
+
+    if (ParentStruct.IsUnion)
+      ParentStruct.Size = std::max(ParentStruct.Size, Field.SizeOf);
+    else
+      ParentStruct.Size += Field.SizeOf;
 
-  StructInfo.Structure = Structure;
-  StructInfo.Initializers.emplace_back();
-  auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
-  for (const auto &SubField : Structure.Fields) {
-    FieldInitializers.push_back(SubField.Contents);
+    StructInfo.Structure = Structure;
+    StructInfo.Initializers.emplace_back();
+    auto &FieldInitializers = StructInfo.Initializers.back().FieldInitializers;
+    for (const auto &SubField : Structure.Fields) {
+      FieldInitializers.push_back(SubField.Contents);
+    }
   }
 
   return false;
diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test
index ecd89a140371b..7bdbf51b7114e 100644
--- a/llvm/test/tools/llvm-ml/struct.test
+++ b/llvm/test/tools/llvm-ml/struct.test
@@ -147,4 +147,33 @@ mov eax, [t2.FOOBAR.e.b]
 ; CHECK-NEXT: mov eax, dword ptr [rip + (t2+8)+1]
 ; CHECK-NEXT: mov eax, dword ptr [rip + t2+9]
 
+QUUX STRUCT
+  u DWORD ?
+  UNION
+    v WORD ?
+    w DWORD ?
+    STRUCT
+      x BYTE ?
+      y BYTE ?
+    ENDS
+  ENDS
+  z DWORD ?
+QUUX ENDS
+
+t9:
+mov eax, [ebx].QUUX.u
+mov eax, [ebx].QUUX.v
+mov eax, [ebx].QUUX.w
+mov eax, [ebx].QUUX.x
+mov eax, [ebx].QUUX.y
+mov eax, [ebx].QUUX.z
+
+; CHECK: t9:
+; CHECK-NEXT: mov eax, dword ptr [ebx]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 4]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 4]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 4]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 5]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 8]
+
 END

From 835c1f9f40ecc1a725f8f392a2a1628f634479fe Mon Sep 17 00:00:00 2001
From: Jinsong Ji <jji@us.ibm.com>
Date: Mon, 13 Jul 2020 02:54:20 +0000
Subject: [PATCH 105/771] [compiler-rt][CMake] Pass down LLVM_LIT_ARGS in
 runtime build

We should also pass down the LLVM_LIT_ARGS in runtime build mode,
so that the runtime tests can be well controlled as well.

We actually passed this down in clang/runtime/CMakeLists.txt
But not for calls from llvm/runtime/CMakeLists.txt.

Reviewed By: phosek

Differential Revision: https://reviews.llvm.org/D83565
---
 llvm/cmake/modules/LLVMExternalProjectUtils.cmake | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
index 4eb5fad85634b..706a1ffb5c7b7 100644
--- a/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
+++ b/llvm/cmake/modules/LLVMExternalProjectUtils.cmake
@@ -250,6 +250,7 @@ function(llvm_ExternalProject_Add name source_dir)
                -DLLVM_HAVE_LINK_VERSION_SCRIPT=${LLVM_HAVE_LINK_VERSION_SCRIPT}
                -DLLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO=${LLVM_USE_RELATIVE_PATHS_IN_DEBUG_INFO}
                -DLLVM_USE_RELATIVE_PATHS_IN_FILES=${LLVM_USE_RELATIVE_PATHS_IN_FILES}
+               -DLLVM_LIT_ARGS=${LLVM_LIT_ARGS}
                -DLLVM_SOURCE_PREFIX=${LLVM_SOURCE_PREFIX}
                -DPACKAGE_VERSION=${PACKAGE_VERSION}
                -DCMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE}

From f08e8b6d7c46334ff8aa37cf8e473aa405e4fa93 Mon Sep 17 00:00:00 2001
From: Eric Astor <epastor@google.com>
Date: Mon, 13 Jul 2020 10:38:39 -0400
Subject: [PATCH 106/771] [ms] [llvm-ml] Add support for MASM STRUCT casting
 field accessors: (<TYPE> PTR <value>).<field>

Summary:
Add support for MASM STRUCT casting field accessors: (<TYPE> PTR <value>).<field>

Since these are operands, we add them to X86AsmParser. If/when we extend MASM support to other architectures (e.g., ARM), we will need similar changes there as well.

Reviewed By: thakis

Differential Revision: https://reviews.llvm.org/D83346
---
 .../lib/Target/X86/AsmParser/X86AsmParser.cpp | 31 ++++++++++++++++++-
 llvm/test/tools/llvm-ml/struct.test           | 16 ++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index fe09b2952f0e2..fc8813e79a3ed 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -332,6 +332,7 @@ class X86AsmParser : public MCTargetAsmParser {
     IES_PLUS,
     IES_MINUS,
     IES_OFFSET,
+    IES_CAST,
     IES_NOT,
     IES_MULTIPLY,
     IES_DIVIDE,
@@ -632,6 +633,7 @@ class X86AsmParser : public MCTargetAsmParser {
       default:
         State = IES_ERROR;
         break;
+      case IES_CAST:
       case IES_PLUS:
       case IES_MINUS:
       case IES_NOT:
@@ -744,6 +746,7 @@ class X86AsmParser : public MCTargetAsmParser {
         IC.pushOperator(IC_PLUS);
         break;
       case IES_INIT:
+      case IES_CAST:
         assert(!BracCount && "BracCount should be zero on parsing's start");
         State = IES_LBRAC;
         break;
@@ -816,6 +819,7 @@ class X86AsmParser : public MCTargetAsmParser {
       case IES_INTEGER:
       case IES_OFFSET:
       case IES_REGISTER:
+      case IES_RBRAC:
       case IES_RPAREN:
         State = IES_RPAREN;
         IC.pushOperator(IC_RPAREN);
@@ -848,6 +852,18 @@ class X86AsmParser : public MCTargetAsmParser {
       }
       return false;
     }
+    void onCast(StringRef Type) {
+      PrevState = State;
+      switch (State) {
+      default:
+        State = IES_ERROR;
+        break;
+      case IES_LPAREN:
+        setType(Type);
+        State = IES_CAST;
+        break;
+      }
+    }
     void setType(StringRef Type) { CurType = Type; }
   };
 
@@ -1635,6 +1651,18 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
       SMLoc IdentLoc = Tok.getLoc();
       StringRef Identifier = Tok.getString();
       UpdateLocLex = false;
+      // (MASM only) <TYPE> PTR operator
+      if (Parser.isParsingMasm()) {
+        const AsmToken &NextTok = getLexer().peekTok();
+        if (NextTok.is(AsmToken::Identifier) &&
+            NextTok.getIdentifier().equals_lower("ptr")) {
+          SM.onCast(Identifier);
+          // eat type and ptr
+          consumeToken();
+          End = consumeToken();
+          break;
+        }
+      }
       // Register, or (MASM only) <register>.<field>
       unsigned Reg;
       if (Tok.is(AsmToken::Identifier)) {
@@ -1681,7 +1709,8 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
       const MCExpr *Val;
       if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
         // MS Dot Operator expression
-        if (Identifier.count('.') && PrevTK == AsmToken::RBrac) {
+        if (Identifier.count('.') &&
+            (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
           if (ParseIntelDotOperator(SM, End))
             return true;
           break;
diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test
index 7bdbf51b7114e..3971cfd6eb237 100644
--- a/llvm/test/tools/llvm-ml/struct.test
+++ b/llvm/test/tools/llvm-ml/struct.test
@@ -176,4 +176,20 @@ mov eax, [ebx].QUUX.z
 ; CHECK-NEXT: mov eax, dword ptr [ebx + 5]
 ; CHECK-NEXT: mov eax, dword ptr [ebx + 8]
 
+t10:
+mov eax, FOOBAR.f
+mov eax, FOOBAR.f.h
+
+; CHECK: t10:
+; CHECK-NEXT: mov eax, 10
+; CHECK-NEXT: mov eax, 12
+
+t11:
+mov eax, (FOOBAR PTR [ebx]).f
+mov eax, (FOOBAR PTR t1).f
+
+; CHECK: t11:
+; CHECK-NEXT: mov eax, dword ptr [ebx + 10]
+; CHECK-NEXT: mov eax, dword ptr [rip + t1+10]
+
 END

From 1847f4dd7570f01f70646cd5067dd0c34257cd21 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 8 Jul 2020 15:43:07 +0200
Subject: [PATCH 107/771] [lldb/Utility] Rewrite Scalar::SetValueFromCString

The function's reliance on host types meant that it was needlessly
complicated, and did not handle the newer (wider) types. Rewrite it in
terms of APInt/APFloat functions to save code and improve functionality.
---
 lldb/source/Utility/Scalar.cpp        | 153 +++++++++-----------------
 lldb/unittests/Utility/ScalarTest.cpp |  14 +++
 2 files changed, 63 insertions(+), 104 deletions(-)

diff --git a/lldb/source/Utility/Scalar.cpp b/lldb/source/Utility/Scalar.cpp
index 87ac6c23892d8..6c48bbde532f9 100644
--- a/lldb/source/Utility/Scalar.cpp
+++ b/lldb/source/Utility/Scalar.cpp
@@ -24,6 +24,7 @@ using namespace lldb;
 using namespace lldb_private;
 
 using llvm::APFloat;
+using llvm::APInt;
 
 namespace {
 enum class Category { Void, Integral, Float };
@@ -1002,116 +1003,60 @@ Status Scalar::SetValueFromCString(const char *value_str, Encoding encoding,
     error.SetErrorString("Invalid encoding.");
     break;
 
-  case eEncodingUint:
-    if (byte_size <= sizeof(uint64_t)) {
-      uint64_t uval64;
-      if (!llvm::to_integer(value_str, uval64))
-        error.SetErrorStringWithFormat(
-            "'%s' is not a valid unsigned integer string value", value_str);
-      else if (!UIntValueIsValidForSize(uval64, byte_size))
-        error.SetErrorStringWithFormat(
-            "value 0x%" PRIx64 " is too large to fit in a %" PRIu64
-            " byte unsigned integer value",
-            uval64, static_cast<uint64_t>(byte_size));
-      else {
-        m_type = Scalar::GetValueTypeForUnsignedIntegerWithByteSize(byte_size);
-        switch (m_type) {
-        case e_uint:
-          m_integer = llvm::APInt(sizeof(uint_t) * 8, uval64, false);
-          break;
-        case e_ulong:
-          m_integer = llvm::APInt(sizeof(ulong_t) * 8, uval64, false);
-          break;
-        case e_ulonglong:
-          m_integer = llvm::APInt(sizeof(ulonglong_t) * 8, uval64, false);
-          break;
-        default:
-          error.SetErrorStringWithFormat(
-              "unsupported unsigned integer byte size: %" PRIu64 "",
-              static_cast<uint64_t>(byte_size));
-          break;
-        }
-      }
-    } else {
-      error.SetErrorStringWithFormat(
-          "unsupported unsigned integer byte size: %" PRIu64 "",
-          static_cast<uint64_t>(byte_size));
-      return error;
-    }
-    break;
-
   case eEncodingSint:
-    if (byte_size <= sizeof(int64_t)) {
-      int64_t sval64;
-      if (!llvm::to_integer(value_str, sval64))
-        error.SetErrorStringWithFormat(
-            "'%s' is not a valid signed integer string value", value_str);
-      else if (!SIntValueIsValidForSize(sval64, byte_size))
-        error.SetErrorStringWithFormat(
-            "value 0x%" PRIx64 " is too large to fit in a %" PRIu64
-            " byte signed integer value",
-            sval64, static_cast<uint64_t>(byte_size));
-      else {
-        m_type = Scalar::GetValueTypeForSignedIntegerWithByteSize(byte_size);
-        switch (m_type) {
-        case e_sint:
-          m_integer = llvm::APInt(sizeof(sint_t) * 8, sval64, true);
-          break;
-        case e_slong:
-          m_integer = llvm::APInt(sizeof(slong_t) * 8, sval64, true);
-          break;
-        case e_slonglong:
-          m_integer = llvm::APInt(sizeof(slonglong_t) * 8, sval64, true);
-          break;
-        default:
-          error.SetErrorStringWithFormat(
-              "unsupported signed integer byte size: %" PRIu64 "",
-              static_cast<uint64_t>(byte_size));
-          break;
-        }
-      }
-    } else {
-      error.SetErrorStringWithFormat(
-          "unsupported signed integer byte size: %" PRIu64 "",
-          static_cast<uint64_t>(byte_size));
-      return error;
+  case eEncodingUint: {
+    llvm::StringRef str = value_str;
+    bool is_signed = encoding == eEncodingSint;
+    bool is_negative = is_signed && str.consume_front("-");
+    APInt integer;
+    if (str.getAsInteger(0, integer)) {
+      error.SetErrorStringWithFormatv(
+          "'{0}' is not a valid integer string value", value_str);
+      break;
+    }
+    bool fits;
+    if (is_signed) {
+      integer = integer.zext(integer.getBitWidth() + 1);
+      if (is_negative)
+        integer.negate();
+      fits = integer.isSignedIntN(byte_size * 8);
+    } else
+      fits = integer.isIntN(byte_size * 8);
+    if (!fits) {
+      error.SetErrorStringWithFormatv(
+          "value {0} is too large to fit in a {1} byte integer value",
+          value_str, byte_size);
+      break;
+    }
+    m_type = GetBestTypeForBitSize(8 * byte_size, is_signed);
+    if (m_type == e_void) {
+      error.SetErrorStringWithFormatv("unsupported integer byte size: {0}",
+                                      byte_size);
+      break;
     }
+    if (is_signed)
+      m_integer = integer.sextOrTrunc(GetBitSize(m_type));
+    else
+      m_integer = integer.zextOrTrunc(GetBitSize(m_type));
     break;
+  }
 
-  case eEncodingIEEE754:
-    static float f_val;
-    static double d_val;
-    static long double l_val;
-    if (byte_size == sizeof(float)) {
-      if (::sscanf(value_str, "%f", &f_val) == 1) {
-        m_float = llvm::APFloat(f_val);
-        m_type = e_float;
-      } else
-        error.SetErrorStringWithFormat("'%s' is not a valid float string value",
-                                       value_str);
-    } else if (byte_size == sizeof(double)) {
-      if (::sscanf(value_str, "%lf", &d_val) == 1) {
-        m_float = llvm::APFloat(d_val);
-        m_type = e_double;
-      } else
-        error.SetErrorStringWithFormat("'%s' is not a valid float string value",
-                                       value_str);
-    } else if (byte_size == sizeof(long double)) {
-      if (::sscanf(value_str, "%Lf", &l_val) == 1) {
-        m_float = llvm::APFloat(
-            llvm::APFloat::x87DoubleExtended(),
-            llvm::APInt(BITWIDTH_INT128, NUM_OF_WORDS_INT128,
-                        (reinterpret_cast<type128 *>(&l_val))->x));
-        m_type = e_long_double;
-      } else
-        error.SetErrorStringWithFormat("'%s' is not a valid float string value",
-                                       value_str);
-    } else {
-      error.SetErrorStringWithFormat("unsupported float byte size: %" PRIu64 "",
-                                     static_cast<uint64_t>(byte_size));
-      return error;
+  case eEncodingIEEE754: {
+    Type type = GetValueTypeForFloatWithByteSize(byte_size);
+    if (type == e_void) {
+      error.SetErrorStringWithFormatv("unsupported float byte size: {0}",
+                                      byte_size);
+      break;
     }
+    APFloat f(GetFltSemantics(type));
+    if (llvm::Expected<APFloat::opStatus> op =
+            f.convertFromString(value_str, APFloat::rmNearestTiesToEven)) {
+      m_type = type;
+      m_float = std::move(f);
+    } else
+      error = op.takeError();
     break;
+  }
 
   case eEncodingVector:
     error.SetErrorString("vector encoding unsupported.");
diff --git a/lldb/unittests/Utility/ScalarTest.cpp b/lldb/unittests/Utility/ScalarTest.cpp
index 42a2f2aaebf2f..dd4683145b968 100644
--- a/lldb/unittests/Utility/ScalarTest.cpp
+++ b/lldb/unittests/Utility/ScalarTest.cpp
@@ -334,6 +334,20 @@ TEST(ScalarTest, SetValueFromCString) {
   EXPECT_THAT_ERROR(
       a.SetValueFromCString("-123", lldb::eEncodingUint, 8).ToError(),
       Failed());
+  EXPECT_THAT_ERROR(
+      a.SetValueFromCString("-2147483648", lldb::eEncodingSint, 4).ToError(),
+      Succeeded());
+  EXPECT_EQ(-2147483648, a);
+  EXPECT_THAT_ERROR(
+      a.SetValueFromCString("-2147483649", lldb::eEncodingSint, 4).ToError(),
+      Failed());
+  EXPECT_THAT_ERROR(
+      a.SetValueFromCString("47.25", lldb::eEncodingIEEE754, 4).ToError(),
+      Succeeded());
+  EXPECT_EQ(47.25f, a);
+  EXPECT_THAT_ERROR(
+      a.SetValueFromCString("asdf", lldb::eEncodingIEEE754, 4).ToError(),
+      Failed());
 }
 
 TEST(ScalarTest, APIntConstructor) {

From 3aabfa28086757f8469ead77c7d319302e49d3c8 Mon Sep 17 00:00:00 2001
From: Eric Astor <epastor@google.com>
Date: Mon, 13 Jul 2020 10:47:30 -0400
Subject: [PATCH 108/771] [ms] [llvm-ml] Restore omitted changes requested by
 reviewer

---
 llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 2 +-
 llvm/test/tools/llvm-ml/struct.test            | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
index fc8813e79a3ed..a3014b2aba92c 100644
--- a/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
+++ b/llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp
@@ -1657,7 +1657,7 @@ bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
         if (NextTok.is(AsmToken::Identifier) &&
             NextTok.getIdentifier().equals_lower("ptr")) {
           SM.onCast(Identifier);
-          // eat type and ptr
+          // Eat type and PTR.
           consumeToken();
           End = consumeToken();
           break;
diff --git a/llvm/test/tools/llvm-ml/struct.test b/llvm/test/tools/llvm-ml/struct.test
index 3971cfd6eb237..fa85ecd455dda 100644
--- a/llvm/test/tools/llvm-ml/struct.test
+++ b/llvm/test/tools/llvm-ml/struct.test
@@ -156,6 +156,7 @@ QUUX STRUCT
       x BYTE ?
       y BYTE ?
     ENDS
+    after_struct BYTE ?
   ENDS
   z DWORD ?
 QUUX ENDS
@@ -166,6 +167,7 @@ mov eax, [ebx].QUUX.v
 mov eax, [ebx].QUUX.w
 mov eax, [ebx].QUUX.x
 mov eax, [ebx].QUUX.y
+mov eax, [ebx].QUUX.after_struct
 mov eax, [ebx].QUUX.z
 
 ; CHECK: t9:
@@ -174,6 +176,7 @@ mov eax, [ebx].QUUX.z
 ; CHECK-NEXT: mov eax, dword ptr [ebx + 4]
 ; CHECK-NEXT: mov eax, dword ptr [ebx + 4]
 ; CHECK-NEXT: mov eax, dword ptr [ebx + 5]
+; CHECK-NEXT: mov eax, dword ptr [ebx + 4]
 ; CHECK-NEXT: mov eax, dword ptr [ebx + 8]
 
 t10:

From 7075c056e91bf95b111716a9722d10e1a61253bb Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Thu, 9 Jul 2020 08:34:06 -0400
Subject: [PATCH 109/771] [OPENMP]Fix compiler crash for target data directive
 without actual target codegen.

Summary:
Need to privatize addresses of the captured variables when trying to
emit the body of the target data directive in no target codegen mode.

Reviewers: jdoerfert

Subscribers: yaxunl, guansong, cfe-commits, sstefan1, caomhin

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83478
---
 clang/lib/CodeGen/CGStmtOpenMP.cpp        |  1 +
 clang/test/OpenMP/target_data_codegen.cpp | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 7135135d2a410..cfd5eda8cc80b 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -6077,6 +6077,7 @@ void CodeGenFunction::EmitOMPTargetDataDirective(
         (void)PrivateScope.Privatize();
         RCG(CGF);
       } else {
+        OMPLexicalScope Scope(CGF, S, OMPD_unknown);
         RCG(CGF);
       }
     };
diff --git a/clang/test/OpenMP/target_data_codegen.cpp b/clang/test/OpenMP/target_data_codegen.cpp
index f1c9f621bf748..f9257615ce405 100644
--- a/clang/test/OpenMP/target_data_codegen.cpp
+++ b/clang/test/OpenMP/target_data_codegen.cpp
@@ -491,4 +491,23 @@ void test_close_modifier(int arg) {
   {++arg;}
 }
 #endif
+///==========================================================================///
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefix CK7 --check-prefix CK7-64
+// RUN: %clang_cc1 -DCK7 -fopenmp -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck %s  --check-prefix CK7 --check-prefix CK7-64
+
+// RUN: %clang_cc1 -DCK7 -verify -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck --check-prefix SIMD-ONLY7 %s
+// RUN: %clang_cc1 -DCK7 -fopenmp-simd -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -fopenmp-simd -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY7 %s
+// SIMD-ONLY7-NOT: {{__kmpc|__tgt}}
+#ifdef CK7
+// CK7: test_device_ptr_addr
+void test_device_ptr_addr(int arg) {
+  int *p;
+  // CK7: add nsw i32
+  // CK7: add nsw i32
+  #pragma omp target data use_device_ptr(p) use_device_addr(arg)
+  { ++arg, ++(*p); }
+}
+#endif
 #endif

From 4121172239779ea509908a2a57278a3d2206ad92 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby@arm.com>
Date: Mon, 13 Jul 2020 16:07:22 +0100
Subject: [PATCH 110/771] [flang][openmp] libc++ unordered_map build fix in
 flang openmp static analysis

Simply move the include of unordered_map from the .cpp file to the .h file
---
 flang/lib/Semantics/check-omp-structure.cpp | 1 -
 flang/lib/Semantics/check-omp-structure.h   | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Semantics/check-omp-structure.cpp b/flang/lib/Semantics/check-omp-structure.cpp
index a5f65bcbc8044..d857d36ed05d9 100644
--- a/flang/lib/Semantics/check-omp-structure.cpp
+++ b/flang/lib/Semantics/check-omp-structure.cpp
@@ -9,7 +9,6 @@
 #include "check-omp-structure.h"
 #include "flang/Parser/parse-tree.h"
 #include "flang/Semantics/tools.h"
-#include <unordered_map>
 
 namespace Fortran::semantics {
 
diff --git a/flang/lib/Semantics/check-omp-structure.h b/flang/lib/Semantics/check-omp-structure.h
index eff0eb4aa76be..7fe78a792f19b 100644
--- a/flang/lib/Semantics/check-omp-structure.h
+++ b/flang/lib/Semantics/check-omp-structure.h
@@ -19,6 +19,8 @@
 #include "flang/Semantics/semantics.h"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 
+#include <unordered_map>
+
 using OmpDirectiveSet = Fortran::common::EnumSet<llvm::omp::Directive,
     llvm::omp::Directive_enumSize>;
 

From 4d3e8dc215d1ca99354f3c33cce87795d2b5098a Mon Sep 17 00:00:00 2001
From: Sanne Wouda <Sanne.Wouda@arm.com>
Date: Mon, 13 Jul 2020 16:09:41 +0100
Subject: [PATCH 111/771] Fix llvm-test-suite failure introduced by
 D82550/D83122

Apparently, isIndirectCall does not imply that getCalledFunction will be non-null
---
 llvm/include/llvm/Analysis/VectorUtils.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Analysis/VectorUtils.h b/llvm/include/llvm/Analysis/VectorUtils.h
index ce8327ae43a80..b1d7850442fba 100644
--- a/llvm/include/llvm/Analysis/VectorUtils.h
+++ b/llvm/include/llvm/Analysis/VectorUtils.h
@@ -224,7 +224,7 @@ class VFDatabase {
   /// a vector Function ABI.
   static void getVFABIMappings(const CallInst &CI,
                                SmallVectorImpl<VFInfo> &Mappings) {
-    if (CI.isIndirectCall())
+    if (!CI.getCalledFunction())
       return;
 
     const StringRef ScalarName = CI.getCalledFunction()->getName();

From a2552f76ac6ff705434adb1d277a578445721b78 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Mon, 13 Jul 2020 17:15:25 +0200
Subject: [PATCH 112/771] [ADT] Make Load(AP)IntFromMemory pointer argument
 const

The function does not modify this memory.
---
 llvm/include/llvm/ADT/APInt.h | 2 +-
 llvm/lib/Support/APInt.cpp    | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h
index c88d9651d68da..f7df648d27ed6 100644
--- a/llvm/include/llvm/ADT/APInt.h
+++ b/llvm/include/llvm/ADT/APInt.h
@@ -2286,7 +2286,7 @@ void StoreIntToMemory(const APInt &IntVal, uint8_t *Dst, unsigned StoreBytes);
 
 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
-void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes);
+void LoadIntFromMemory(APInt &IntVal, const uint8_t *Src, unsigned LoadBytes);
 
 } // namespace llvm
 
diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp
index 4a591efb141aa..9a6f93feaa29f 100644
--- a/llvm/lib/Support/APInt.cpp
+++ b/llvm/lib/Support/APInt.cpp
@@ -3086,7 +3086,8 @@ void llvm::StoreIntToMemory(const APInt &IntVal, uint8_t *Dst,
 
 /// LoadIntFromMemory - Loads the integer stored in the LoadBytes bytes starting
 /// from Src into IntVal, which is assumed to be wide enough and to hold zero.
-void llvm::LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) {
+void llvm::LoadIntFromMemory(APInt &IntVal, const uint8_t *Src,
+                             unsigned LoadBytes) {
   assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!");
   uint8_t *Dst = reinterpret_cast<uint8_t *>(
                    const_cast<uint64_t *>(IntVal.getRawData()));

From a7e9c5a39fbc7939dbeb508bb446a6ceaab2dc4b Mon Sep 17 00:00:00 2001
From: stevewan <wan.yu@ibm.com>
Date: Mon, 13 Jul 2020 11:58:22 -0400
Subject: [PATCH 113/771] [llvm-ar][test][AIX] Unsupport
 error-opening-directory.test on AIX

Summary:
The test fails on AIX as it allows open() and read() on a directory. This patch adds `# UNSUPPORTED:
system-aix` to the test to prevent it from running on AIX.

Reviewers: sameerarora101, daltenty, ShuhongL, hubert.reinterpretcast, MaskRay, smeenai, alexshap

Reviewed By: sameerarora101, hubert.reinterpretcast, MaskRay, smeenai

Subscribers: MaskRay, rupprecht, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83579
---
 llvm/test/tools/llvm-ar/error-opening-directory.test | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/tools/llvm-ar/error-opening-directory.test b/llvm/test/tools/llvm-ar/error-opening-directory.test
index 1f19da7543b3d..c3068871230fc 100644
--- a/llvm/test/tools/llvm-ar/error-opening-directory.test
+++ b/llvm/test/tools/llvm-ar/error-opening-directory.test
@@ -1,6 +1,6 @@
-## Unsupported on FreeBSD as FreeBSD 12 and earlier allow reading directories
-## by default.
-# UNSUPPORTED: system-freebsd
+## Unsupported on AIX and FreeBSD as AIX and FreeBSD 12 and earlier allow
+## reading directories by default.
+# UNSUPPORTED: system-freebsd, system-aix
 
 # RUN: rm -rf %t && mkdir -p %t
 

From 8f7d3430b72e0458f0917b605cd94bcfb9396b37 Mon Sep 17 00:00:00 2001
From: Pavel Iliin <Pavel.Iliin@arm.com>
Date: Mon, 13 Jul 2020 12:37:59 +0100
Subject: [PATCH 114/771] [ARM][NFC] More detailed vbsl checks in ARM & Thumb2
 tests.

---
 llvm/test/CodeGen/ARM/fcopysign.ll            |  96 +++-
 llvm/test/CodeGen/ARM/fp16-promote.ll         |  46 +-
 llvm/test/CodeGen/ARM/vbsl-constant.ll        | 104 +++--
 llvm/test/CodeGen/ARM/vbsl.ll                 | 205 +++++++--
 llvm/test/CodeGen/ARM/vselect_imax.ll         | 434 +++++++++++++++++-
 .../CodeGen/Thumb2/float-intrinsics-double.ll |   8 +-
 .../CodeGen/Thumb2/float-intrinsics-float.ll  |  20 +-
 7 files changed, 795 insertions(+), 118 deletions(-)

diff --git a/llvm/test/CodeGen/ARM/fcopysign.ll b/llvm/test/CodeGen/ARM/fcopysign.ll
index d013fbf8c15ac..05dbb65a6deba 100644
--- a/llvm/test/CodeGen/ARM/fcopysign.ll
+++ b/llvm/test/CodeGen/ARM/fcopysign.ll
@@ -1,40 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -disable-post-ra -mtriple=armv7-apple-darwin -mcpu=cortex-a8 | FileCheck %s -check-prefix=SOFT
 ; RUN: llc < %s -disable-post-ra -mtriple=armv7-gnueabi -float-abi=hard -mcpu=cortex-a8 | FileCheck %s -check-prefix=HARD
 
 ; rdar://8984306
 define float @test1(float %x, float %y) nounwind {
-entry:
 ; SOFT-LABEL: test1:
-; SOFT: lsr r1, r1, #31
-; SOFT: bfi r0, r1, #31, #1
-
+; SOFT:       @ %bb.0: @ %entry
+; SOFT-NEXT:    lsr r1, r1, #31
+; SOFT-NEXT:    bfi r0, r1, #31, #1
+; SOFT-NEXT:    bx lr
+;
 ; HARD-LABEL: test1:
-; HARD: vmov.i32 [[REG1:(d[0-9]+)]], #0x80000000
-; HARD: vbsl [[REG1]], d
+; HARD:       @ %bb.0: @ %entry
+; HARD-NEXT:    vmov.f32 s4, s1
+; HARD-NEXT:    @ kill: def $s0 killed $s0 def $d0
+; HARD-NEXT:    vmov.i32 d1, #0x80000000
+; HARD-NEXT:    vbsl d1, d2, d0
+; HARD-NEXT:    vmov.f32 s0, s2
+; HARD-NEXT:    bx lr
+entry:
+
   %0 = tail call float @copysignf(float %x, float %y) nounwind readnone
   ret float %0
 }
 
 define double @test2(double %x, double %y) nounwind {
-entry:
 ; SOFT-LABEL: test2:
-; SOFT: lsr r2, r3, #31
-; SOFT: bfi r1, r2, #31, #1
-
+; SOFT:       @ %bb.0: @ %entry
+; SOFT-NEXT:    lsr r2, r3, #31
+; SOFT-NEXT:    bfi r1, r2, #31, #1
+; SOFT-NEXT:    bx lr
+;
 ; HARD-LABEL: test2:
-; HARD: vmov.i32 [[REG2:(d[0-9]+)]], #0x80000000
-; HARD: vshl.i64 [[REG2]], [[REG2]], #32
-; HARD: vbsl [[REG2]], d1, d0
+; HARD:       @ %bb.0: @ %entry
+; HARD-NEXT:    vmov.i32 d16, #0x80000000
+; HARD-NEXT:    vshl.i64 d16, d16, #32
+; HARD-NEXT:    vbsl d16, d1, d0
+; HARD-NEXT:    vorr d0, d16, d16
+; HARD-NEXT:    bx lr
+entry:
+
   %0 = tail call double @copysign(double %x, double %y) nounwind readnone
   ret double %0
 }
 
 define double @test3(double %x, double %y, double %z) nounwind {
-entry:
 ; SOFT-LABEL: test3:
-; SOFT: vmov.i32 [[REG3:(d[0-9]+)]], #0x80000000
-; SOFT: vshl.i64 [[REG3]], [[REG3]], #32
-; SOFT: vbsl [[REG3]],
+; SOFT:       @ %bb.0: @ %entry
+; SOFT-NEXT:    vmov d16, r2, r3
+; SOFT-NEXT:    vmov d17, r0, r1
+; SOFT-NEXT:    vmul.f64 d16, d17, d16
+; SOFT-NEXT:    vmov.i32 d17, #0x80000000
+; SOFT-NEXT:    vshl.i64 d17, d17, #32
+; SOFT-NEXT:    vldr d18, [sp]
+; SOFT-NEXT:    vbsl d17, d18, d16
+; SOFT-NEXT:    vmov r0, r1, d17
+; SOFT-NEXT:    bx lr
+;
+; HARD-LABEL: test3:
+; HARD:       @ %bb.0: @ %entry
+; HARD-NEXT:    vmul.f64 d16, d0, d1
+; HARD-NEXT:    vmov.i32 d17, #0x80000000
+; HARD-NEXT:    vshl.i64 d0, d17, #32
+; HARD-NEXT:    vbsl d0, d2, d16
+; HARD-NEXT:    bx lr
+entry:
   %0 = fmul double %x, %y
   %1 = tail call double @copysign(double %0, double %z) nounwind readnone
   ret double %1
@@ -42,12 +72,34 @@ entry:
 
 ; rdar://9287902
 define float @test4() nounwind {
-entry:
 ; SOFT-LABEL: test4:
-; SOFT: vmov [[REG7:(d[0-9]+)]], r0, r1
-; SOFT: vmov.i32 [[REG6:(d[0-9]+)]], #0x80000000
-; SOFT: vshr.u64 [[REG7]], [[REG7]], #32
-; SOFT: vbsl [[REG6]], [[REG7]], 
+; SOFT:       @ %bb.0: @ %entry
+; SOFT-NEXT:    push {lr}
+; SOFT-NEXT:    bl _bar
+; SOFT-NEXT:    vmov d16, r0, r1
+; SOFT-NEXT:    vcvt.f32.f64 s0, d16
+; SOFT-NEXT:    vmov.i32 d17, #0x80000000
+; SOFT-NEXT:    vshr.u64 d16, d16, #32
+; SOFT-NEXT:    vmov.f32 d18, #5.000000e-01
+; SOFT-NEXT:    vbsl d17, d16, d18
+; SOFT-NEXT:    vadd.f32 d0, d0, d17
+; SOFT-NEXT:    vmov r0, s0
+; SOFT-NEXT:    pop {lr}
+;
+; HARD-LABEL: test4:
+; HARD:       @ %bb.0: @ %entry
+; HARD-NEXT:    .save {r11, lr}
+; HARD-NEXT:    push {r11, lr}
+; HARD-NEXT:    bl bar
+; HARD-NEXT:    vmov d16, r0, r1
+; HARD-NEXT:    vcvt.f32.f64 s0, d16
+; HARD-NEXT:    vmov.i32 d1, #0x80000000
+; HARD-NEXT:    vshr.u64 d16, d16, #32
+; HARD-NEXT:    vmov.f32 s4, #5.000000e-01
+; HARD-NEXT:    vbsl d1, d16, d2
+; HARD-NEXT:    vadd.f32 s0, s0, s2
+; HARD-NEXT:    pop {r11, pc}
+entry:
   %0 = tail call double (...) @bar() nounwind
   %1 = fptrunc double %0 to float
   %2 = tail call float @copysignf(float 5.000000e-01, float %1) nounwind readnone
diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll
index 3cd07df671b99..11670d7b57ad8 100644
--- a/llvm/test/CodeGen/ARM/fp16-promote.ll
+++ b/llvm/test/CodeGen/ARM/fp16-promote.ll
@@ -424,7 +424,7 @@ declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
 ; CHECK-FP16: vsqrt.f32
 ; CHECK-FP16: vcvtb.f16.f32
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-VFP-LIBCALL: vsqrt.f32
+; CHECK-LIBCALL-VFP: vsqrt.f32
 ; CHECK-NOVFP: bl sqrtf
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_sqrt(half* %p) #0 {
@@ -700,18 +700,44 @@ define void @test_maximum(half* %p) #0 {
 }
 
 ; CHECK-FP16-LABEL: test_copysign:
-; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vcvtb.f32.f16
-; CHECK-FP16: vbsl
-; CHECK-FP16: vcvtb.f16.f32
+; CHECK-FP16:         ldrh r2, [r0]
+; CHECK-FP16-NEXT:    vmov.i32 d0, #0x80000000
+; CHECK-FP16-NEXT:    ldrh r1, [r1]
+; CHECK-FP16-NEXT:    vmov s2, r2
+; CHECK-FP16-NEXT:    vmov s4, r1
+; CHECK-FP16-NEXT:    vcvtb.f32.f16 s2, s2
+; CHECK-FP16-NEXT:    vcvtb.f32.f16 s4, s4
+; CHECK-FP16-NEXT:    vbsl d0, d2, d1
+; CHECK-FP16-NEXT:    vcvtb.f16.f32 s0, s0
+; CHECK-FP16-NEXT:    vmov r1, s0
+; CHECK-FP16-NEXT:    strh r1, [r0]
+; CHECK-FP16-NEXT:    bx lr
+
 ; CHECK-LIBCALL-LABEL: test_copysign:
-; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-VFP-LIBCALL: vbsl
+; CHECK-LIBCALL-VFP:         .fnstart
+; CHECK-LIBCALL-VFP-NEXT:    .save {r4, r5, r11, lr}
+; CHECK-LIBCALL-VFP-NEXT:    push {r4, r5, r11, lr}
+; CHECK-LIBCALL-VFP-NEXT:    .vsave {d8, d9}
+; CHECK-LIBCALL-VFP-NEXT:    vpush {d8, d9}
+; CHECK-LIBCALL-VFP-NEXT:    mov r5, r0
+; CHECK-LIBCALL-VFP-NEXT:    ldrh r0, [r0]
+; CHECK-LIBCALL-VFP-NEXT:    mov r4, r1
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL-VFP:         ldrh r1, [r4]
+; CHECK-LIBCALL-VFP-NEXT:    vmov s18, r0
+; CHECK-LIBCALL-VFP-NEXT:    vmov.i32 d8, #0x80000000
+; CHECK-LIBCALL-VFP-NEXT:    mov r0, r1
+; CHECK-LIBCALL: bl __aeabi_h2f
+; CHECK-LIBCALL-VFP:         vmov s0, r0
+; CHECK-LIBCALL-VFP-NEXT:    vbsl d8, d0, d9
+; CHECK-LIBCALL-VFP-NEXT:    vmov r0, s16
+; CHECK-LIBCALL: bl __aeabi_f2h
+; CHECK-LIBCALL-VFP:         strh r0, [r5]
+; CHECK-LIBCALL-VFP-NEXT:    vpop {d8, d9}
+; CHECK-LIBCALL-VFP-NEXT:    pop {r4, r5, r11, pc}
 ; CHECK-NOVFP: and
 ; CHECK-NOVFP: bic
 ; CHECK-NOVFP: orr
-; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_copysign(half* %p, half* %q) #0 {
   %a = load half, half* %p, align 2
   %b = load half, half* %q, align 2
@@ -820,7 +846,7 @@ define void @test_round(half* %p) {
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL: bl __aeabi_h2f
-; CHECK-VFP-LIBCALL: vmla.f32
+; CHECK-LIBCALL-VFP: vmla.f32
 ; CHECK-NOVFP: bl __aeabi_fmul
 ; CHECK-LIBCALL: bl __aeabi_f2h
 define void @test_fmuladd(half* %p, half* %q, half* %r) #0 {
diff --git a/llvm/test/CodeGen/ARM/vbsl-constant.ll b/llvm/test/CodeGen/ARM/vbsl-constant.ll
index 6bcbbc8fa878d..83b34a133dd10 100644
--- a/llvm/test/CodeGen/ARM/vbsl-constant.ll
+++ b/llvm/test/CodeGen/ARM/vbsl-constant.ll
@@ -1,10 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=arm-apple-ios -mattr=+neon | FileCheck %s
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK-LABEL: v_bsli8:
-;CHECK: vldr
-;CHECK: vldr
-;CHECK: vbsl
+; CHECK-LABEL: v_bsli8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vmov.i8 d16, #0x3
+; CHECK-NEXT:    vldr d17, [r2]
+; CHECK-NEXT:    vldr d18, [r0]
+; CHECK-NEXT:    vbsl d16, d18, d17
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = load <8 x i8>, <8 x i8>* %C
@@ -15,10 +20,14 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK-LABEL: v_bsli16:
-;CHECK: vldr
-;CHECK: vldr
-;CHECK: vbsl
+; CHECK-LABEL: v_bsli16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vmov.i16 d16, #0x3
+; CHECK-NEXT:    vldr d17, [r2]
+; CHECK-NEXT:    vldr d18, [r0]
+; CHECK-NEXT:    vbsl d16, d18, d17
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = load <4 x i16>, <4 x i16>* %C
@@ -29,10 +38,14 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK-LABEL: v_bsli32:
-;CHECK: vldr
-;CHECK: vldr
-;CHECK: vbsl
+; CHECK-LABEL: v_bsli32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vmov.i32 d16, #0x3
+; CHECK-NEXT:    vldr d17, [r2]
+; CHECK-NEXT:    vldr d18, [r0]
+; CHECK-NEXT:    vbsl d16, d18, d17
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = load <2 x i32>, <2 x i32>* %C
@@ -43,11 +56,14 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
-;CHECK-LABEL: v_bsli64:
-;CHECK: vldr
-;CHECK: vldr
-;CHECK: vldr
-;CHECK: vbsl
+; CHECK-LABEL: v_bsli64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d17, [r2]
+; CHECK-NEXT:    vldr d16, LCPI3_0
+; CHECK-NEXT:    vldr d18, [r0]
+; CHECK-NEXT:    vbsl d16, d18, d17
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = load <1 x i64>, <1 x i64>* %C
@@ -58,10 +74,15 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
 }
 
 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
-;CHECK-LABEL: v_bslQi8:
-;CHECK: vld1.32
-;CHECK: vld1.32
-;CHECK: vbsl
+; CHECK-LABEL: v_bslQi8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.32 {d16, d17}, [r2]
+; CHECK-NEXT:    vmov.i8 q9, #0x3
+; CHECK-NEXT:    vld1.32 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q9, q10, q8
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = load <16 x i8>, <16 x i8>* %C
@@ -72,10 +93,15 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
-;CHECK-LABEL: v_bslQi16:
-;CHECK: vld1.32
-;CHECK: vld1.32
-;CHECK: vbsl
+; CHECK-LABEL: v_bslQi16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.32 {d16, d17}, [r2]
+; CHECK-NEXT:    vmov.i16 q9, #0x3
+; CHECK-NEXT:    vld1.32 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q9, q10, q8
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = load <8 x i16>, <8 x i16>* %C
@@ -86,10 +112,15 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 }
 
 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: v_bslQi32:
-;CHECK: vld1.32
-;CHECK: vld1.32
-;CHECK: vbsl
+; CHECK-LABEL: v_bslQi32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.32 {d16, d17}, [r2]
+; CHECK-NEXT:    vmov.i32 q9, #0x3
+; CHECK-NEXT:    vld1.32 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q9, q10, q8
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = load <4 x i32>, <4 x i32>* %C
@@ -100,11 +131,16 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 }
 
 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: v_bslQi64:
-;CHECK: vld1.32
-;CHECK: vld1.32
-;CHECK: vld1.64
-;CHECK: vbsl
+; CHECK-LABEL: v_bslQi64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.32 {d16, d17}, [r2]
+; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]
+; CHECK-NEXT:    adr r0, LCPI7_0
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0:128]
+; CHECK-NEXT:    vbsl q10, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d20
+; CHECK-NEXT:    vmov r2, r3, d21
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = load <2 x i64>, <2 x i64>* %C
diff --git a/llvm/test/CodeGen/ARM/vbsl.ll b/llvm/test/CodeGen/ARM/vbsl.ll
index 6812dd90a1004..01e1ffb2e983a 100644
--- a/llvm/test/CodeGen/ARM/vbsl.ll
+++ b/llvm/test/CodeGen/ARM/vbsl.ll
@@ -1,10 +1,17 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s
 
 ; rdar://12471808
 
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
-;CHECK-LABEL: v_bsli8:
-;CHECK: vbsl
+; CHECK-LABEL: v_bsli8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d16, [r2]
+; CHECK-NEXT:    vldr d17, [r1]
+; CHECK-NEXT:    vldr d18, [r0]
+; CHECK-NEXT:    vbsl d18, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
 	%tmp3 = load <8 x i8>, <8 x i8>* %C
@@ -16,8 +23,14 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 }
 
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
-;CHECK-LABEL: v_bsli16:
-;CHECK: vbsl
+; CHECK-LABEL: v_bsli16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d16, [r2]
+; CHECK-NEXT:    vldr d17, [r1]
+; CHECK-NEXT:    vldr d18, [r0]
+; CHECK-NEXT:    vbsl d18, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
 	%tmp3 = load <4 x i16>, <4 x i16>* %C
@@ -29,8 +42,14 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 }
 
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
-;CHECK-LABEL: v_bsli32:
-;CHECK: vbsl
+; CHECK-LABEL: v_bsli32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d16, [r2]
+; CHECK-NEXT:    vldr d17, [r1]
+; CHECK-NEXT:    vldr d18, [r0]
+; CHECK-NEXT:    vbsl d18, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
 	%tmp3 = load <2 x i32>, <2 x i32>* %C
@@ -42,8 +61,14 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 }
 
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
-;CHECK-LABEL: v_bsli64:
-;CHECK: vbsl
+; CHECK-LABEL: v_bsli64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d16, [r2]
+; CHECK-NEXT:    vldr d17, [r1]
+; CHECK-NEXT:    vldr d18, [r0]
+; CHECK-NEXT:    vbsl d18, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = load <1 x i64>, <1 x i64>* %B
 	%tmp3 = load <1 x i64>, <1 x i64>* %C
@@ -55,8 +80,15 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
 }
 
 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
-;CHECK-LABEL: v_bslQi8:
-;CHECK: vbsl
+; CHECK-LABEL: v_bslQi8:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q10, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d20
+; CHECK-NEXT:    vmov r2, r3, d21
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
 	%tmp3 = load <16 x i8>, <16 x i8>* %C
@@ -68,8 +100,15 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 }
 
 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
-;CHECK-LABEL: v_bslQi16:
-;CHECK: vbsl
+; CHECK-LABEL: v_bslQi16:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q10, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d20
+; CHECK-NEXT:    vmov r2, r3, d21
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
 	%tmp3 = load <8 x i16>, <8 x i16>* %C
@@ -81,8 +120,15 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 }
 
 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
-;CHECK-LABEL: v_bslQi32:
-;CHECK: vbsl
+; CHECK-LABEL: v_bslQi32:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q10, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d20
+; CHECK-NEXT:    vmov r2, r3, d21
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
 	%tmp3 = load <4 x i32>, <4 x i32>* %C
@@ -94,8 +140,15 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 }
 
 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
-;CHECK-LABEL: v_bslQi64:
-;CHECK: vbsl
+; CHECK-LABEL: v_bslQi64:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q10, q9, q8
+; CHECK-NEXT:    vmov r0, r1, d20
+; CHECK-NEXT:    vmov r2, r3, d21
+; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = load <2 x i64>, <2 x i64>* %B
 	%tmp3 = load <2 x i64>, <2 x i64>* %C
@@ -108,84 +161,180 @@ define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwin
 
 define <8 x i8> @f1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: f1:
-; CHECK: vbsl
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d16, [sp]
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    vbsl d18, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    mov pc, lr
   %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind
   ret <8 x i8> %vbsl.i
 }
 
 define <4 x i16> @f2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: f2:
-; CHECK: vbsl
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d16, [sp]
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    vbsl d18, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind
   ret <4 x i16> %vbsl3.i
 }
 
 define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: f3:
-; CHECK: vbsl
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d16, [sp]
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    vbsl d18, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind
   ret <2 x i32> %vbsl3.i
 }
 
 define <2 x float> @f4(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: f4:
-; CHECK: vbsl
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d16, [sp]
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    vbsl d18, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    mov pc, lr
   %vbsl4.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
   ret <2 x float> %vbsl4.i
 }
 
 define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: g1:
-; CHECK: vbsl
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    add r12, sp, #16
+; CHECK-NEXT:    vmov d19, r2, r3
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q9, q10, q8
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    mov pc, lr
   %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind
   ret <16 x i8> %vbsl.i
 }
 
 define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: g2:
-; CHECK: vbsl
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    add r12, sp, #16
+; CHECK-NEXT:    vmov d19, r2, r3
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q9, q10, q8
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind
   ret <8 x i16> %vbsl3.i
 }
 
 define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: g3:
-; CHECK: vbsl
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    add r12, sp, #16
+; CHECK-NEXT:    vmov d19, r2, r3
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q9, q10, q8
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind
   ret <4 x i32> %vbsl3.i
 }
 
 define <4 x float> @g4(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: g4:
-; CHECK: vbsl
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    add r12, sp, #16
+; CHECK-NEXT:    vmov d19, r2, r3
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q9, q10, q8
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    mov pc, lr
   %vbsl4.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind
   ret <4 x float> %vbsl4.i
 }
 
 define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: test_vbsl_s64:
-; CHECK: vbsl d
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d16, [sp]
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    vbsl d18, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
   ret <1 x i64> %vbsl3.i
 }
 
 define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: test_vbsl_u64:
-; CHECK: vbsl d
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d16, [sp]
+; CHECK-NEXT:    vmov d17, r2, r3
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    vbsl d18, d17, d16
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
   ret <1 x i64> %vbsl3.i
 }
 
 define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: test_vbslq_s64:
-; CHECK: vbsl q
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    add r12, sp, #16
+; CHECK-NEXT:    vmov d19, r2, r3
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q9, q10, q8
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
   ret <2 x i64> %vbsl3.i
 }
 
 define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: test_vbslq_u64:
-; CHECK: vbsl q
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    add r12, sp, #16
+; CHECK-NEXT:    vmov d19, r2, r3
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    vmov d18, r0, r1
+; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
+; CHECK-NEXT:    vbsl q9, q10, q8
+; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
   ret <2 x i64> %vbsl3.i
 }
diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll
index e212b37fa1f5f..f9d88cc4af982 100644
--- a/llvm/test/CodeGen/ARM/vselect_imax.ll
+++ b/llvm/test/CodeGen/ARM/vselect_imax.ll
@@ -63,11 +63,66 @@ define void @func_blend15(%T0_15* %loadaddr, %T0_15* %loadaddr2,
 ; lowering we also need to adjust the cost.
 %T0_18 = type <4 x i64>
 %T1_18 = type <4 x i1>
-; CHECK-LABEL: func_blend18:
 define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
                            %T1_18* %blend, %T0_18* %storeaddr) {
-; CHECK: vbsl
-; CHECK: vbsl
+; CHECK-LABEL: func_blend18:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    vld1.64 {d22, d23}, [r0:128]!
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]!
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0:128]
+; CHECK-NEXT:    vmov.32 r12, d16[0]
+; CHECK-NEXT:    vmov.32 r2, d20[0]
+; CHECK-NEXT:    vmov.32 lr, d16[1]
+; CHECK-NEXT:    vmov.32 r0, d20[1]
+; CHECK-NEXT:    vmov.32 r7, d18[0]
+; CHECK-NEXT:    vmov.32 r5, d22[0]
+; CHECK-NEXT:    vmov.32 r4, d22[1]
+; CHECK-NEXT:    vmov.32 r6, d17[0]
+; CHECK-NEXT:    subs r2, r2, r12
+; CHECK-NEXT:    vmov.32 r2, d18[1]
+; CHECK-NEXT:    sbcs r0, r0, lr
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    subs r7, r5, r7
+; CHECK-NEXT:    vmov.32 r7, d21[0]
+; CHECK-NEXT:    vmov.32 r5, d17[1]
+; CHECK-NEXT:    sbcs r2, r4, r2
+; CHECK-NEXT:    vmov.32 r4, d21[1]
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    movlt r2, #1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    subs r7, r7, r6
+; CHECK-NEXT:    vmov.32 r6, d23[0]
+; CHECK-NEXT:    vmov.32 r7, d19[0]
+; CHECK-NEXT:    sbcs r5, r4, r5
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movlt r4, #1
+; CHECK-NEXT:    vmov.32 r5, d19[1]
+; CHECK-NEXT:    subs r7, r6, r7
+; CHECK-NEXT:    vmov.32 r7, d23[1]
+; CHECK-NEXT:    sbcs r7, r7, r5
+; CHECK-NEXT:    movlt r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mvnne r1, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    vdup.32 d25, r1
+; CHECK-NEXT:    mvnne r4, #0
+; CHECK-NEXT:    vdup.32 d24, r2
+; CHECK-NEXT:    vdup.32 d27, r4
+; CHECK-NEXT:    vbsl q12, q11, q9
+; CHECK-NEXT:    vdup.32 d26, r0
+; CHECK-NEXT:    vbsl q13, q10, q8
+; CHECK-NEXT:    vst1.64 {d24, d25}, [r3:128]!
+; CHECK-NEXT:    vst1.64 {d26, d27}, [r3:128]
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    mov pc, lr
   %v0 = load %T0_18, %T0_18* %loadaddr
   %v1 = load %T0_18, %T0_18* %loadaddr2
   %c = icmp slt %T0_18 %v0, %v1
@@ -79,13 +134,126 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
 }
 %T0_19 = type <8 x i64>
 %T1_19 = type <8 x i1>
-; CHECK-LABEL: func_blend19:
 define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
                            %T1_19* %blend, %T0_19* %storeaddr) {
-; CHECK: vbsl
-; CHECK: vbsl
-; CHECK: vbsl
-; CHECK: vbsl
+; CHECK-LABEL: func_blend19:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, lr}
+; CHECK-NEXT:    push {r4, r5, r6, lr}
+; CHECK-NEXT:    mov r12, r1
+; CHECK-NEXT:    mov r2, r0
+; CHECK-NEXT:    vld1.64 {d24, d25}, [r12:128]!
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    mov lr, #0
+; CHECK-NEXT:    vld1.64 {d28, d29}, [r2:128]!
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12:128]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r2:128]
+; CHECK-NEXT:    add r2, r1, #32
+; CHECK-NEXT:    add r1, r1, #48
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r2:128]
+; CHECK-NEXT:    add r2, r0, #32
+; CHECK-NEXT:    add r0, r0, #48
+; CHECK-NEXT:    vld1.64 {d30, d31}, [r2:128]
+; CHECK-NEXT:    vmov.32 r4, d16[0]
+; CHECK-NEXT:    vmov.32 r2, d18[0]
+; CHECK-NEXT:    vmov.32 r12, d16[1]
+; CHECK-NEXT:    vmov.32 r5, d18[1]
+; CHECK-NEXT:    vld1.64 {d22, d23}, [r1:128]
+; CHECK-NEXT:    vmov.32 r1, d21[0]
+; CHECK-NEXT:    vld1.64 {d26, d27}, [r0:128]
+; CHECK-NEXT:    vmov.32 r0, d21[1]
+; CHECK-NEXT:    subs r2, r2, r4
+; CHECK-NEXT:    vmov.32 r4, d31[1]
+; CHECK-NEXT:    vmov.32 r2, d31[0]
+; CHECK-NEXT:    sbcs r5, r5, r12
+; CHECK-NEXT:    mov r12, #0
+; CHECK-NEXT:    movlt r12, #1
+; CHECK-NEXT:    cmp r12, #0
+; CHECK-NEXT:    mvnne r12, #0
+; CHECK-NEXT:    vmov.32 r5, d25[0]
+; CHECK-NEXT:    subs r1, r2, r1
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    sbcs r0, r4, r0
+; CHECK-NEXT:    vmov.32 r1, d29[0]
+; CHECK-NEXT:    vmov.32 r0, d25[1]
+; CHECK-NEXT:    movlt r2, #1
+; CHECK-NEXT:    vmov.32 r4, d29[1]
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    vdup.32 d5, r2
+; CHECK-NEXT:    subs r1, r1, r5
+; CHECK-NEXT:    vmov.32 r5, d24[1]
+; CHECK-NEXT:    vmov.32 r1, d24[0]
+; CHECK-NEXT:    sbcs r0, r4, r0
+; CHECK-NEXT:    vmov.32 r4, d28[0]
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d1, r0
+; CHECK-NEXT:    vmov.32 r0, d19[0]
+; CHECK-NEXT:    subs r1, r4, r1
+; CHECK-NEXT:    vmov.32 r4, d17[0]
+; CHECK-NEXT:    vmov.32 r1, d28[1]
+; CHECK-NEXT:    sbcs r1, r1, r5
+; CHECK-NEXT:    vmov.32 r5, d17[1]
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    movlt r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mvnne r1, #0
+; CHECK-NEXT:    subs r0, r0, r4
+; CHECK-NEXT:    vmov.32 r0, d19[1]
+; CHECK-NEXT:    vmov.32 r4, d22[0]
+; CHECK-NEXT:    vdup.32 d0, r1
+; CHECK-NEXT:    vmov.32 r1, d22[1]
+; CHECK-NEXT:    vbsl q0, q14, q12
+; CHECK-NEXT:    sbcs r0, r0, r5
+; CHECK-NEXT:    vmov.32 r5, d26[0]
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    subs r4, r5, r4
+; CHECK-NEXT:    vmov.32 r5, d20[0]
+; CHECK-NEXT:    vmov.32 r4, d26[1]
+; CHECK-NEXT:    sbcs r1, r4, r1
+; CHECK-NEXT:    vmov.32 r4, d30[0]
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    movlt r1, #1
+; CHECK-NEXT:    subs r4, r4, r5
+; CHECK-NEXT:    vmov.32 r5, d30[1]
+; CHECK-NEXT:    vmov.32 r4, d20[1]
+; CHECK-NEXT:    sbcs r4, r5, r4
+; CHECK-NEXT:    vmov.32 r5, d27[0]
+; CHECK-NEXT:    vmov.32 r4, d23[0]
+; CHECK-NEXT:    movlt r6, #1
+; CHECK-NEXT:    subs r4, r5, r4
+; CHECK-NEXT:    vmov.32 r5, d27[1]
+; CHECK-NEXT:    vmov.32 r4, d23[1]
+; CHECK-NEXT:    sbcs r4, r5, r4
+; CHECK-NEXT:    movlt lr, #1
+; CHECK-NEXT:    cmp lr, #0
+; CHECK-NEXT:    mvnne lr, #0
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    mvnne r6, #0
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mvnne r1, #0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vdup.32 d4, r6
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d3, lr
+; CHECK-NEXT:    vbsl q2, q15, q10
+; CHECK-NEXT:    vdup.32 d21, r0
+; CHECK-NEXT:    add r0, r3, #32
+; CHECK-NEXT:    vdup.32 d2, r1
+; CHECK-NEXT:    vdup.32 d20, r12
+; CHECK-NEXT:    vbsl q1, q13, q11
+; CHECK-NEXT:    vst1.64 {d4, d5}, [r0:128]
+; CHECK-NEXT:    add r0, r3, #48
+; CHECK-NEXT:    vbsl q10, q9, q8
+; CHECK-NEXT:    vst1.64 {d0, d1}, [r3:128]!
+; CHECK-NEXT:    vst1.64 {d2, d3}, [r0:128]
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r3:128]
+; CHECK-NEXT:    pop {r4, r5, r6, lr}
+; CHECK-NEXT:    mov pc, lr
   %v0 = load %T0_19, %T0_19* %loadaddr
   %v1 = load %T0_19, %T0_19* %loadaddr2
   %c = icmp slt %T0_19 %v0, %v1
@@ -97,17 +265,249 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
 }
 %T0_20 = type <16 x i64>
 %T1_20 = type <16 x i1>
-; CHECK-LABEL: func_blend20:
 define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
                            %T1_20* %blend, %T0_20* %storeaddr) {
-; CHECK: vbsl
-; CHECK: vbsl
-; CHECK: vbsl
-; CHECK: vbsl
-; CHECK: vbsl
-; CHECK: vbsl
-; CHECK: vbsl
-; CHECK: vbsl
+; CHECK-LABEL: func_blend20:
+; CHECK:       @ %bb.0:
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    .pad #4
+; CHECK-NEXT:    sub sp, sp, #4
+; CHECK-NEXT:    .vsave {d8, d9, d10, d11}
+; CHECK-NEXT:    vpush {d8, d9, d10, d11}
+; CHECK-NEXT:    .pad #8
+; CHECK-NEXT:    sub sp, sp, #8
+; CHECK-NEXT:    add r9, r1, #64
+; CHECK-NEXT:    mov r2, #32
+; CHECK-NEXT:    add r8, r0, #64
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r9:128], r2
+; CHECK-NEXT:    mov r10, #0
+; CHECK-NEXT:    vld1.64 {d22, d23}, [r8:128], r2
+; CHECK-NEXT:    vmov.32 r2, d19[0]
+; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT:    vmov.32 r7, d23[0]
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    vmov.32 r5, d19[1]
+; CHECK-NEXT:    vmov.32 r6, d23[1]
+; CHECK-NEXT:    vld1.64 {d2, d3}, [r9:128]!
+; CHECK-NEXT:    vmov.32 r12, d2[0]
+; CHECK-NEXT:    subs r2, r7, r2
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r7:128]!
+; CHECK-NEXT:    sbcs r2, r6, r5
+; CHECK-NEXT:    vmov.32 r5, d18[0]
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    vmov.32 r6, d22[0]
+; CHECK-NEXT:    movlt r2, #1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    vld1.64 {d0, d1}, [r7:128]
+; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    vdup.32 d17, r2
+; CHECK-NEXT:    mov r2, r0
+; CHECK-NEXT:    subs r5, r6, r5
+; CHECK-NEXT:    vmov.32 r6, d22[1]
+; CHECK-NEXT:    vmov.32 r5, d18[1]
+; CHECK-NEXT:    sbcs r5, r6, r5
+; CHECK-NEXT:    mov r5, #0
+; CHECK-NEXT:    movlt r5, #1
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mvnne r5, #0
+; CHECK-NEXT:    vdup.32 d16, r5
+; CHECK-NEXT:    vbsl q8, q11, q9
+; CHECK-NEXT:    vld1.64 {d22, d23}, [r2:128]!
+; CHECK-NEXT:    vmov.32 r5, d21[0]
+; CHECK-NEXT:    vmov.32 r6, d23[0]
+; CHECK-NEXT:    vld1.64 {d30, d31}, [r2:128]
+; CHECK-NEXT:    vmov.32 r2, d1[0]
+; CHECK-NEXT:    vmov.32 r7, d30[0]
+; CHECK-NEXT:    subs r5, r6, r5
+; CHECK-NEXT:    vmov.32 r6, d23[1]
+; CHECK-NEXT:    vmov.32 r5, d21[1]
+; CHECK-NEXT:    sbcs r5, r6, r5
+; CHECK-NEXT:    vmov.32 r6, d22[0]
+; CHECK-NEXT:    mov r5, #0
+; CHECK-NEXT:    movlt r5, #1
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mvnne r5, #0
+; CHECK-NEXT:    vdup.32 d19, r5
+; CHECK-NEXT:    vmov.32 r5, d20[0]
+; CHECK-NEXT:    subs r5, r6, r5
+; CHECK-NEXT:    vmov.32 r6, d22[1]
+; CHECK-NEXT:    vmov.32 r5, d20[1]
+; CHECK-NEXT:    sbcs r5, r6, r5
+; CHECK-NEXT:    mov r5, #0
+; CHECK-NEXT:    movlt r5, #1
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mvnne r5, #0
+; CHECK-NEXT:    vdup.32 d18, r5
+; CHECK-NEXT:    add r5, r0, #32
+; CHECK-NEXT:    vbsl q9, q11, q10
+; CHECK-NEXT:    vld1.64 {d22, d23}, [r5:128]
+; CHECK-NEXT:    add r5, r1, #32
+; CHECK-NEXT:    vld1.64 {d24, d25}, [r5:128]
+; CHECK-NEXT:    vmov.32 r5, d24[0]
+; CHECK-NEXT:    vmov.32 r6, d22[0]
+; CHECK-NEXT:    vmov.32 r4, d23[0]
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r8:128]!
+; CHECK-NEXT:    vmov.32 r11, d21[0]
+; CHECK-NEXT:    subs r5, r6, r5
+; CHECK-NEXT:    vmov.32 r6, d22[1]
+; CHECK-NEXT:    vmov.32 r5, d24[1]
+; CHECK-NEXT:    sbcs r5, r6, r5
+; CHECK-NEXT:    vmov.32 r6, d25[0]
+; CHECK-NEXT:    movlt r10, #1
+; CHECK-NEXT:    cmp r10, #0
+; CHECK-NEXT:    mvnne r10, #0
+; CHECK-NEXT:    subs r4, r4, r6
+; CHECK-NEXT:    vmov.32 r6, d23[1]
+; CHECK-NEXT:    vmov.32 r4, d25[1]
+; CHECK-NEXT:    sbcs r4, r6, r4
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    vmov.32 r4, d31[0]
+; CHECK-NEXT:    movlt r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    mvnne r6, #0
+; CHECK-NEXT:    subs r2, r4, r2
+; CHECK-NEXT:    vmov.32 r4, d31[1]
+; CHECK-NEXT:    vmov.32 r2, d1[1]
+; CHECK-NEXT:    sbcs r2, r4, r2
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    movlt r2, #1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    vdup.32 d27, r2
+; CHECK-NEXT:    add r2, r0, #48
+; CHECK-NEXT:    vld1.64 {d4, d5}, [r2:128]
+; CHECK-NEXT:    add r2, r1, #48
+; CHECK-NEXT:    add r0, r0, #80
+; CHECK-NEXT:    add r1, r1, #80
+; CHECK-NEXT:    vld1.64 {d6, d7}, [r2:128]
+; CHECK-NEXT:    vmov.32 r2, d7[0]
+; CHECK-NEXT:    vmov.32 r4, d5[0]
+; CHECK-NEXT:    vmov.32 r5, d4[0]
+; CHECK-NEXT:    vld1.64 {d8, d9}, [r0:128]
+; CHECK-NEXT:    subs r2, r4, r2
+; CHECK-NEXT:    vmov.32 r4, d5[1]
+; CHECK-NEXT:    vmov.32 r2, d7[1]
+; CHECK-NEXT:    sbcs r2, r4, r2
+; CHECK-NEXT:    vmov.32 r4, d0[0]
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    movlt r2, #1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    vdup.32 d29, r2
+; CHECK-NEXT:    vmov.32 r2, d6[1]
+; CHECK-NEXT:    subs r4, r7, r4
+; CHECK-NEXT:    vmov.32 r7, d30[1]
+; CHECK-NEXT:    vmov.32 r4, d0[1]
+; CHECK-NEXT:    sbcs r4, r7, r4
+; CHECK-NEXT:    vmov.32 r7, d4[1]
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    mvnne r4, #0
+; CHECK-NEXT:    vdup.32 d26, r4
+; CHECK-NEXT:    vmov.32 r4, d6[0]
+; CHECK-NEXT:    vbsl q13, q15, q0
+; CHECK-NEXT:    vld1.64 {d0, d1}, [r9:128]
+; CHECK-NEXT:    vdup.32 d31, r6
+; CHECK-NEXT:    vmov.32 r9, d3[0]
+; CHECK-NEXT:    vdup.32 d30, r10
+; CHECK-NEXT:    vmov.32 r10, d21[1]
+; CHECK-NEXT:    vbsl q15, q11, q12
+; CHECK-NEXT:    subs r4, r5, r4
+; CHECK-NEXT:    sbcs r2, r7, r2
+; CHECK-NEXT:    vmov.32 r4, d0[1]
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    movlt r2, #1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    vdup.32 d28, r2
+; CHECK-NEXT:    vbsl q14, q2, q3
+; CHECK-NEXT:    vld1.64 {d4, d5}, [r8:128]
+; CHECK-NEXT:    vmov.32 r2, d0[0]
+; CHECK-NEXT:    vmov.32 r6, d4[0]
+; CHECK-NEXT:    vmov.32 r5, d4[1]
+; CHECK-NEXT:    vld1.64 {d6, d7}, [r1:128]
+; CHECK-NEXT:    vmov.32 r7, d7[0]
+; CHECK-NEXT:    vmov.32 r1, d7[1]
+; CHECK-NEXT:    vmov.32 lr, d5[0]
+; CHECK-NEXT:    vmov.32 r8, d3[1]
+; CHECK-NEXT:    subs r0, r6, r2
+; CHECK-NEXT:    vmov.32 r2, d9[1]
+; CHECK-NEXT:    sbcs r0, r5, r4
+; CHECK-NEXT:    vmov.32 r4, d9[0]
+; CHECK-NEXT:    movlt r3, #1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mvnne r3, #0
+; CHECK-NEXT:    vmov.32 r6, d8[1]
+; CHECK-NEXT:    mov r5, #0
+; CHECK-NEXT:    vmov.32 r0, d5[1]
+; CHECK-NEXT:    subs r4, r4, r7
+; CHECK-NEXT:    vmov.32 r7, d2[1]
+; CHECK-NEXT:    sbcs r1, r2, r1
+; CHECK-NEXT:    vmov.32 r2, d8[0]
+; CHECK-NEXT:    vmov.32 r1, d6[0]
+; CHECK-NEXT:    movlt r5, #1
+; CHECK-NEXT:    vmov.32 r4, d6[1]
+; CHECK-NEXT:    cmp r5, #0
+; CHECK-NEXT:    mvnne r5, #0
+; CHECK-NEXT:    vdup.32 d11, r5
+; CHECK-NEXT:    vmov.32 r5, d20[0]
+; CHECK-NEXT:    subs r1, r2, r1
+; CHECK-NEXT:    vmov.32 r2, d1[0]
+; CHECK-NEXT:    sbcs r1, r6, r4
+; CHECK-NEXT:    vmov.32 r6, d1[1]
+; CHECK-NEXT:    vmov.32 r4, d20[1]
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    movlt r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mvnne r1, #0
+; CHECK-NEXT:    vdup.32 d10, r1
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    vbsl q5, q4, q3
+; CHECK-NEXT:    subs r2, lr, r2
+; CHECK-NEXT:    sbcs r0, r0, r6
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    subs r2, r5, r12
+; CHECK-NEXT:    sbcs r2, r4, r7
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    movlt r2, #1
+; CHECK-NEXT:    subs r7, r11, r9
+; CHECK-NEXT:    sbcs r7, r10, r8
+; CHECK-NEXT:    movlt r1, #1
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mvnne r1, #0
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    vdup.32 d23, r1
+; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vdup.32 d22, r2
+; CHECK-NEXT:    vdup.32 d25, r0
+; CHECK-NEXT:    add r0, r1, #80
+; CHECK-NEXT:    vbsl q11, q10, q1
+; CHECK-NEXT:    vdup.32 d24, r3
+; CHECK-NEXT:    vst1.64 {d10, d11}, [r0:128]
+; CHECK-NEXT:    add r0, r1, #32
+; CHECK-NEXT:    vbsl q12, q2, q0
+; CHECK-NEXT:    vst1.64 {d30, d31}, [r0:128]
+; CHECK-NEXT:    add r0, r1, #48
+; CHECK-NEXT:    vst1.64 {d28, d29}, [r0:128]
+; CHECK-NEXT:    add r0, r1, #64
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r1:128]!
+; CHECK-NEXT:    vst1.64 {d26, d27}, [r1:128]
+; CHECK-NEXT:    mov r1, #32
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128], r1
+; CHECK-NEXT:    vst1.64 {d22, d23}, [r0:128]!
+; CHECK-NEXT:    vst1.64 {d24, d25}, [r0:128]
+; CHECK-NEXT:    add sp, sp, #8
+; CHECK-NEXT:    vpop {d8, d9, d10, d11}
+; CHECK-NEXT:    add sp, sp, #4
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT:    mov pc, lr
   %v0 = load %T0_20, %T0_20* %loadaddr
   %v1 = load %T0_20, %T0_20* %loadaddr2
   %c = icmp slt %T0_20 %v0, %v1
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
index acafde53ac830..611a9c1500d82 100644
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
@@ -127,9 +127,11 @@ define double @copysign_d(double %a, double %b) {
 ; SOFT: bfi r1, [[REG]], #31, #1
 ; VFP: lsrs [[REG:r[0-9]+]], r3, #31
 ; VFP: bfi r1, [[REG]], #31, #1
-; NEON: vmov.i32 [[REG:d[0-9]+]], #0x80000000
-; NEON: vshl.i64 [[REG]], [[REG]], #32
-; NEON: vbsl [[REG]], d
+; NEON:         vmov.i32 d16, #0x80000000
+; NEON-NEXT:    vshl.i64 d16, d16, #32
+; NEON-NEXT:    vbsl d16, d1, d0
+; NEON-NEXT:    vorr d0, d16, d16
+; NEON-NEXT:    bx lr
   %1 = call double @llvm.copysign.f64(double %a, double %b)
   ret double %1
 }
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
index 1263ae15b4664..5e8276f071159 100644
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
@@ -3,8 +3,8 @@
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m33                   | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=NO-VMLA
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=VFP  -check-prefix=FP-ARMv8  -check-prefix=VMLA
 ; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-m7 -mattr=-fp64 | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=SP -check-prefix=FP-ARMv8 -check-prefix=VMLA
-; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=VFP4 -check-prefix=NO-VMLA
-; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a57                   | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON -check-prefix=FP-ARMv8 -check-prefix=VMLA
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a7                    | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON-A7 -check-prefix=VFP4 -check-prefix=NO-VMLA
+; RUN: llc < %s -mtriple=thumbv7-none-eabihf -mcpu=cortex-a57                   | FileCheck %s -check-prefix=CHECK -check-prefix=HARD -check-prefix=DP -check-prefix=NEON-A57 -check-prefix=FP-ARMv8 -check-prefix=VMLA
 
 declare float     @llvm.sqrt.f32(float %Val)
 define float @sqrt_f(float %a) {
@@ -123,8 +123,20 @@ define float @copysign_f(float %a, float %b) {
 ; SP: bfi r{{[0-9]+}}, [[REG]], #31, #1
 ; VFP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31
 ; VFP: bfi r{{[0-9]+}}, [[REG]], #31, #1
-; NEON: vmov.i32 [[REG:d[0-9]+]], #0x80000000
-; NEON: vbsl [[REG]], d
+; NEON-A7:       @ %bb.0:
+; NEON-A7-NEXT:    vmov.f32 s4, s1
+; NEON-A7-NEXT:    @ kill: def $s0 killed $s0 def $d0
+; NEON-A7-NEXT:    vmov.i32 d1, #0x80000000
+; NEON-A7-NEXT:    vbsl d1, d2, d0
+; NEON-A7-NEXT:    vmov.f32 s0, s2
+; NEON-A7-NEXT:    bx lr
+; NEON-A57:       @ %bb.0:
+; NEON-A57-NEXT:    vmov.f32 s4, s1
+; NEON-A57-NEXT:    vmov.i32 d1, #0x80000000
+; NEON-A57-NEXT:    @ kill: def $s0 killed $s0 def $d0
+; NEON-A57-NEXT:    vbsl d1, d2, d0
+; NEON-A57-NEXT:    vmov.f32 s0, s2
+; NEON-A57-NEXT:    bx lr
   %1 = call float @llvm.copysign.f32(float %a, float %b)
   ret float %1
 }

From 69fff1fc498fe9bcb1ac6d0aa01bd825860e32f3 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 13 Jul 2020 11:23:45 -0400
Subject: [PATCH 115/771] [x86] add tests for bswap/rotate; NFC

---
 llvm/test/CodeGen/X86/rot16.ll | 72 +++++++++++++++++++++++++++++++++-
 1 file changed, 70 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/X86/rot16.ll b/llvm/test/CodeGen/X86/rot16.ll
index 5a1a8da3c677b..f0cd650624c6f 100644
--- a/llvm/test/CodeGen/X86/rot16.ll
+++ b/llvm/test/CodeGen/X86/rot16.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686--                | FileCheck %s --check-prefixes=X32,BASE32
+; RUN: llc < %s -mtriple=i686--  -mattr=movbe  | FileCheck %s --check-prefixes=X32,MOVBE32
+; RUN: llc < %s -mtriple=x86_64--              | FileCheck %s --check-prefixes=X64,BASE64
+; RUN: llc < %s -mtriple=x86_64-- -mattr=movbe | FileCheck %s --check-prefixes=X64,MOVBE64
 
 define i16 @foo(i16 %x, i16 %y, i16 %z) nounwind {
 ; X32-LABEL: foo:
@@ -230,3 +232,69 @@ define i16 @rot16_trunc(i32 %x, i32 %y) nounwind {
 	%t3 = trunc i32 %t2 to i16
 	ret i16 %t3
 }
+
+define i16 @rotate16(i16 %x) {
+; X32-LABEL: rotate16:
+; X32:       # %bb.0:
+; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    rolw $8, %ax
+; X32-NEXT:    retl
+;
+; X64-LABEL: rotate16:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    rolw $8, %ax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %r = call i16 @llvm.fshl.i16(i16 %x, i16 %x, i16 8)
+  ret i16 %r
+}
+
+define void @rotate16_in_place_memory(i8* %p) {
+; X32-LABEL: rotate16_in_place_memory:
+; X32:       # %bb.0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    rolw $8, (%eax)
+; X32-NEXT:    retl
+;
+; X64-LABEL: rotate16_in_place_memory:
+; X64:       # %bb.0:
+; X64-NEXT:    rolw $8, (%rdi)
+; X64-NEXT:    retq
+  %p0 = getelementptr i8, i8* %p, i64 0
+  %p1 = getelementptr i8, i8* %p, i64 1
+  %i0 = load i8, i8* %p0, align 1
+  %i1 = load i8, i8* %p1, align 1
+  store i8 %i1, i8* %p0, align 1
+  store i8 %i0, i8* %p1, align 1
+  ret void
+}
+
+define void @rotate16_memory(i8* %p, i8* %q) {
+; X32-LABEL: rotate16_memory:
+; X32:       # %bb.0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movzwl (%ecx), %ecx
+; X32-NEXT:    rolw $8, %cx
+; X32-NEXT:    movw %cx, (%eax)
+; X32-NEXT:    retl
+;
+; X64-LABEL: rotate16_memory:
+; X64:       # %bb.0:
+; X64-NEXT:    movzwl (%rdi), %eax
+; X64-NEXT:    rolw $8, %ax
+; X64-NEXT:    movw %ax, (%rsi)
+; X64-NEXT:    retq
+  %p0 = getelementptr i8, i8* %p, i64 0
+  %p1 = getelementptr i8, i8* %p, i64 1
+  %q0 = getelementptr i8, i8* %q, i64 0
+  %q1 = getelementptr i8, i8* %q, i64 1
+  %i0 = load i8, i8* %p0, align 1
+  %i1 = load i8, i8* %p1, align 1
+  store i8 %i1, i8* %q0, align 1
+  store i8 %i0, i8* %q1, align 1
+  ret void
+}
+
+declare i16 @llvm.fshl.i16(i16, i16, i16)

From 8779b114109dd1461a410e2357f2505665e6efec Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 13 Jul 2020 11:35:38 -0400
Subject: [PATCH 116/771] [DAGCombiner] rot i16 X, 8 --> bswap X

We have this generic transform in IR (instcombine),
but as shown in PR41098:
http://bugs.llvm.org/PR41098
...the pattern may emerge in codegen too.

x86 has a potential refinement/reversal opportunity here,
but that should come later or needs a target hook to
avoid the transform. Converting to bswap is the more
specific form, so we should use it if it is available.
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  6 ++
 llvm/test/CodeGen/X86/rot16.ll                | 90 +++++++++++++------
 2 files changed, 68 insertions(+), 28 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dd601bd5ca7e8..0dde1d0918e13 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7694,6 +7694,12 @@ SDValue DAGCombiner::visitRotate(SDNode *N) {
       return DAG.getNode(N->getOpcode(), dl, VT, N0, Amt);
   }
 
+  // rot i16 X, 8 --> bswap X
+  auto *RotAmtC = isConstOrConstSplat(N1);
+  if (RotAmtC && RotAmtC->getAPIntValue() == 8 &&
+      VT.getScalarSizeInBits() == 16 && hasOperation(ISD::BSWAP, VT))
+    return DAG.getNode(ISD::BSWAP, dl, VT, N0);
+
   // Simplify the operands using demanded-bits information.
   if (SimplifyDemandedBits(SDValue(N, 0)))
     return SDValue(N, 0);
diff --git a/llvm/test/CodeGen/X86/rot16.ll b/llvm/test/CodeGen/X86/rot16.ll
index f0cd650624c6f..a6adb42242dda 100644
--- a/llvm/test/CodeGen/X86/rot16.ll
+++ b/llvm/test/CodeGen/X86/rot16.ll
@@ -234,11 +234,16 @@ define i16 @rot16_trunc(i32 %x, i32 %y) nounwind {
 }
 
 define i16 @rotate16(i16 %x) {
-; X32-LABEL: rotate16:
-; X32:       # %bb.0:
-; X32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    rolw $8, %ax
-; X32-NEXT:    retl
+; BASE32-LABEL: rotate16:
+; BASE32:       # %bb.0:
+; BASE32-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
+; BASE32-NEXT:    rolw $8, %ax
+; BASE32-NEXT:    retl
+;
+; MOVBE32-LABEL: rotate16:
+; MOVBE32:       # %bb.0:
+; MOVBE32-NEXT:    movbew {{[0-9]+}}(%esp), %ax
+; MOVBE32-NEXT:    retl
 ;
 ; X64-LABEL: rotate16:
 ; X64:       # %bb.0:
@@ -250,17 +255,32 @@ define i16 @rotate16(i16 %x) {
   ret i16 %r
 }
 
+; TODO: Should this always be rolw with memory operand?
+
 define void @rotate16_in_place_memory(i8* %p) {
-; X32-LABEL: rotate16_in_place_memory:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    rolw $8, (%eax)
-; X32-NEXT:    retl
+; BASE32-LABEL: rotate16_in_place_memory:
+; BASE32:       # %bb.0:
+; BASE32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BASE32-NEXT:    rolw $8, (%eax)
+; BASE32-NEXT:    retl
 ;
-; X64-LABEL: rotate16_in_place_memory:
-; X64:       # %bb.0:
-; X64-NEXT:    rolw $8, (%rdi)
-; X64-NEXT:    retq
+; MOVBE32-LABEL: rotate16_in_place_memory:
+; MOVBE32:       # %bb.0:
+; MOVBE32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; MOVBE32-NEXT:    movzwl (%eax), %ecx
+; MOVBE32-NEXT:    movbew %cx, (%eax)
+; MOVBE32-NEXT:    retl
+;
+; BASE64-LABEL: rotate16_in_place_memory:
+; BASE64:       # %bb.0:
+; BASE64-NEXT:    rolw $8, (%rdi)
+; BASE64-NEXT:    retq
+;
+; MOVBE64-LABEL: rotate16_in_place_memory:
+; MOVBE64:       # %bb.0:
+; MOVBE64-NEXT:    movzwl (%rdi), %eax
+; MOVBE64-NEXT:    movbew %ax, (%rdi)
+; MOVBE64-NEXT:    retq
   %p0 = getelementptr i8, i8* %p, i64 0
   %p1 = getelementptr i8, i8* %p, i64 1
   %i0 = load i8, i8* %p0, align 1
@@ -271,21 +291,35 @@ define void @rotate16_in_place_memory(i8* %p) {
 }
 
 define void @rotate16_memory(i8* %p, i8* %q) {
-; X32-LABEL: rotate16_memory:
-; X32:       # %bb.0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X32-NEXT:    movzwl (%ecx), %ecx
-; X32-NEXT:    rolw $8, %cx
-; X32-NEXT:    movw %cx, (%eax)
-; X32-NEXT:    retl
+; BASE32-LABEL: rotate16_memory:
+; BASE32:       # %bb.0:
+; BASE32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; BASE32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; BASE32-NEXT:    movzwl (%ecx), %ecx
+; BASE32-NEXT:    rolw $8, %cx
+; BASE32-NEXT:    movw %cx, (%eax)
+; BASE32-NEXT:    retl
 ;
-; X64-LABEL: rotate16_memory:
-; X64:       # %bb.0:
-; X64-NEXT:    movzwl (%rdi), %eax
-; X64-NEXT:    rolw $8, %ax
-; X64-NEXT:    movw %ax, (%rsi)
-; X64-NEXT:    retq
+; MOVBE32-LABEL: rotate16_memory:
+; MOVBE32:       # %bb.0:
+; MOVBE32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; MOVBE32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; MOVBE32-NEXT:    movzwl (%ecx), %ecx
+; MOVBE32-NEXT:    movbew %cx, (%eax)
+; MOVBE32-NEXT:    retl
+;
+; BASE64-LABEL: rotate16_memory:
+; BASE64:       # %bb.0:
+; BASE64-NEXT:    movzwl (%rdi), %eax
+; BASE64-NEXT:    rolw $8, %ax
+; BASE64-NEXT:    movw %ax, (%rsi)
+; BASE64-NEXT:    retq
+;
+; MOVBE64-LABEL: rotate16_memory:
+; MOVBE64:       # %bb.0:
+; MOVBE64-NEXT:    movzwl (%rdi), %eax
+; MOVBE64-NEXT:    movbew %ax, (%rsi)
+; MOVBE64-NEXT:    retq
   %p0 = getelementptr i8, i8* %p, i64 0
   %p1 = getelementptr i8, i8* %p, i64 1
   %q0 = getelementptr i8, i8* %q, i64 0

From 153a0b8906d57f07f6711c2cfd10361616987038 Mon Sep 17 00:00:00 2001
From: Hiroshi Yamauchi <yamauchi@google.com>
Date: Tue, 7 Jul 2020 10:19:54 -0700
Subject: [PATCH 117/771] [PGO][PGSO] Add profile guided size optimization to
 the X86 LEA fixup.

Differential Revision: https://reviews.llvm.org/D83330
---
 llvm/lib/Target/X86/X86FixupLEAs.cpp        | 19 +++++-
 llvm/lib/Target/X86/X86PadShortFunction.cpp |  1 +
 llvm/test/CodeGen/X86/fixup-lea.ll          | 74 +++++++--------------
 3 files changed, 42 insertions(+), 52 deletions(-)

diff --git a/llvm/lib/Target/X86/X86FixupLEAs.cpp b/llvm/lib/Target/X86/X86FixupLEAs.cpp
index 9ac401bb02537..4242790389219 100644
--- a/llvm/lib/Target/X86/X86FixupLEAs.cpp
+++ b/llvm/lib/Target/X86/X86FixupLEAs.cpp
@@ -16,8 +16,11 @@
 #include "X86InstrInfo.h"
 #include "X86Subtarget.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/CodeGen/LazyMachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineSizeOpts.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/TargetSchedule.h"
 #include "llvm/Support/Debug.h"
@@ -111,6 +114,12 @@ class FixupLEAPass : public MachineFunctionPass {
         MachineFunctionProperties::Property::NoVRegs);
   }
 
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
+    AU.addRequired<ProfileSummaryInfoWrapperPass>();
+    AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+    MachineFunctionPass::getAnalysisUsage(AU);
+  }
+
 private:
   TargetSchedModel TSM;
   const X86InstrInfo *TII = nullptr;
@@ -205,21 +214,27 @@ bool FixupLEAPass::runOnMachineFunction(MachineFunction &MF) {
   TSM.init(&ST);
   TII = ST.getInstrInfo();
   TRI = ST.getRegisterInfo();
+  auto *PSI = &getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
+  auto *MBFI = (PSI && PSI->hasProfileSummary())
+                   ? &getAnalysis<LazyMachineBlockFrequencyInfoPass>().getBFI()
+                   : nullptr;
 
   LLVM_DEBUG(dbgs() << "Start X86FixupLEAs\n";);
   for (MachineBasicBlock &MBB : MF) {
     // First pass. Try to remove or optimize existing LEAs.
+    bool OptIncDecPerBB =
+        OptIncDec || llvm::shouldOptimizeForSize(&MBB, PSI, MBFI);
     for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I) {
       if (!isLEA(I->getOpcode()))
         continue;
 
-      if (optTwoAddrLEA(I, MBB, OptIncDec, UseLEAForSP))
+      if (optTwoAddrLEA(I, MBB, OptIncDecPerBB, UseLEAForSP))
         continue;
 
       if (IsSlowLEA)
         processInstructionForSlowLEA(I, MBB);
       else if (IsSlow3OpsLEA)
-        processInstrForSlow3OpLEA(I, MBB, OptIncDec);
+        processInstrForSlow3OpLEA(I, MBB, OptIncDecPerBB);
     }
 
     // Second pass for creating LEAs. This may reverse some of the
diff --git a/llvm/lib/Target/X86/X86PadShortFunction.cpp b/llvm/lib/Target/X86/X86PadShortFunction.cpp
index 4c6bd0ccc2cd2..ec81b07f9e5f0 100644
--- a/llvm/lib/Target/X86/X86PadShortFunction.cpp
+++ b/llvm/lib/Target/X86/X86PadShortFunction.cpp
@@ -58,6 +58,7 @@ namespace {
     void getAnalysisUsage(AnalysisUsage &AU) const override {
       AU.addRequired<ProfileSummaryInfoWrapperPass>();
       AU.addRequired<LazyMachineBlockFrequencyInfoPass>();
+      AU.addPreserved<LazyMachineBlockFrequencyInfoPass>();
       MachineFunctionPass::getAnalysisUsage(AU);
     }
 
diff --git a/llvm/test/CodeGen/X86/fixup-lea.ll b/llvm/test/CodeGen/X86/fixup-lea.ll
index 3f661a8e991e9..35c3976408f64 100644
--- a/llvm/test/CodeGen/X86/fixup-lea.ll
+++ b/llvm/test/CodeGen/X86/fixup-lea.ll
@@ -109,31 +109,18 @@ for.end:
 }
 
 define void @foo_pgso(i32 inreg %dns) !prof !14 {
-; SLOW-LABEL: foo_pgso:
-; SLOW:       # %bb.0: # %entry
-; SLOW-NEXT:    xorl %ecx, %ecx
-; SLOW-NEXT:    decl %ecx
-; SLOW-NEXT:  .LBB4_1: # %for.body
-; SLOW-NEXT:    # =>This Inner Loop Header: Depth=1
-; SLOW-NEXT:    movzwl %cx, %edx
-; SLOW-NEXT:    decl %ecx
-; SLOW-NEXT:    cmpl %eax, %edx
-; SLOW-NEXT:    jl .LBB4_1
-; SLOW-NEXT:  # %bb.2: # %for.end
-; SLOW-NEXT:    retl
-;
-; FAST-LABEL: foo_pgso:
-; FAST:       # %bb.0: # %entry
-; FAST-NEXT:    xorl %ecx, %ecx
-; FAST-NEXT:    decl %ecx
-; FAST-NEXT:  .LBB4_1: # %for.body
-; FAST-NEXT:    # =>This Inner Loop Header: Depth=1
-; FAST-NEXT:    movzwl %cx, %edx
-; FAST-NEXT:    addl $-1, %ecx
-; FAST-NEXT:    cmpl %eax, %edx
-; FAST-NEXT:    jl .LBB4_1
-; FAST-NEXT:  # %bb.2: # %for.end
-; FAST-NEXT:    retl
+; CHECK-LABEL: foo_pgso:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    decl %ecx
+; CHECK-NEXT:  .LBB4_1: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movzwl %cx, %edx
+; CHECK-NEXT:    decl %ecx
+; CHECK-NEXT:    cmpl %eax, %edx
+; CHECK-NEXT:    jl .LBB4_1
+; CHECK-NEXT:  # %bb.2: # %for.end
+; CHECK-NEXT:    retl
 entry:
   br label %for.body
 
@@ -149,31 +136,18 @@ for.end:
 }
 
 define void @bar_pgso(i32 inreg %dns) !prof !14 {
-; SLOW-LABEL: bar_pgso:
-; SLOW:       # %bb.0: # %entry
-; SLOW-NEXT:    xorl %ecx, %ecx
-; SLOW-NEXT:    incl %ecx
-; SLOW-NEXT:  .LBB5_1: # %for.body
-; SLOW-NEXT:    # =>This Inner Loop Header: Depth=1
-; SLOW-NEXT:    movzwl %cx, %edx
-; SLOW-NEXT:    incl %ecx
-; SLOW-NEXT:    cmpl %eax, %edx
-; SLOW-NEXT:    jl .LBB5_1
-; SLOW-NEXT:  # %bb.2: # %for.end
-; SLOW-NEXT:    retl
-;
-; FAST-LABEL: bar_pgso:
-; FAST:       # %bb.0: # %entry
-; FAST-NEXT:    xorl %ecx, %ecx
-; FAST-NEXT:    incl %ecx
-; FAST-NEXT:  .LBB5_1: # %for.body
-; FAST-NEXT:    # =>This Inner Loop Header: Depth=1
-; FAST-NEXT:    movzwl %cx, %edx
-; FAST-NEXT:    addl $1, %ecx
-; FAST-NEXT:    cmpl %eax, %edx
-; FAST-NEXT:    jl .LBB5_1
-; FAST-NEXT:  # %bb.2: # %for.end
-; FAST-NEXT:    retl
+; CHECK-LABEL: bar_pgso:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %ecx, %ecx
+; CHECK-NEXT:    incl %ecx
+; CHECK-NEXT:  .LBB5_1: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movzwl %cx, %edx
+; CHECK-NEXT:    incl %ecx
+; CHECK-NEXT:    cmpl %eax, %edx
+; CHECK-NEXT:    jl .LBB5_1
+; CHECK-NEXT:  # %bb.2: # %for.end
+; CHECK-NEXT:    retl
 entry:
   br label %for.body
 

From 83080a294ad7d145d758821bcf4354ad0cb7d299 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Mon, 29 Jun 2020 11:37:06 -0700
Subject: [PATCH 118/771] [llvm] Native size estimator for training -Oz inliner

Summary:
This is an experimental ML-based native size estimator, necessary for
computing partial rewards during -Oz inliner policy training. Data
extraction for model training will be provided in a separate patch.

RFC: http://lists.llvm.org/pipermail/llvm-dev/2020-April/140763.html

Reviewers: davidxl, jdoerfert

Subscribers: mgorny, hiraditya, mgrang, arphaman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D82817
---
 llvm/CMakeLists.txt                           |    12 +
 .../Analysis/InlineSizeEstimatorAnalysis.h    |    35 +
 llvm/include/llvm/Analysis/Utils/TFUtils.h    |   136 +
 llvm/lib/Analysis/CMakeLists.txt              |    40 +-
 .../Analysis/InlineSizeEstimatorAnalysis.cpp  |   299 +
 llvm/lib/Analysis/TFUtils.cpp                 |   143 +
 llvm/lib/Passes/PassBuilder.cpp               |     1 +
 llvm/lib/Passes/PassRegistry.def              |     1 +
 llvm/unittests/Analysis/CMakeLists.txt        |    12 +-
 .../InlineSizeEstimatorAnalysisTest.cpp       |   101 +
 .../ir2native_x86_64_model/saved_model.pbtxt  | 10596 ++++++++++++++++
 .../variables/variables.data-00000-of-00001   |   Bin 0 -> 88424 bytes
 .../variables/variables.index                 |   Bin 0 -> 398 bytes
 llvm/unittests/Analysis/TFUtilsTest.cpp       |    98 +
 14 files changed, 11464 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
 create mode 100644 llvm/include/llvm/Analysis/Utils/TFUtils.h
 create mode 100644 llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
 create mode 100644 llvm/lib/Analysis/TFUtils.cpp
 create mode 100644 llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
 create mode 100644 llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
 create mode 100644 llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001
 create mode 100644 llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index
 create mode 100644 llvm/unittests/Analysis/TFUtilsTest.cpp

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index de2887b64c2a9..4e14e61fcacd6 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -981,6 +981,18 @@ if (NOT TENSORFLOW_AOT_PATH STREQUAL "")
     ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/tf_runtime)
 endif()
 
+set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install")
+find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib)
+
+# Similar to the above Tensorflow dependency, please refer to the same script.
+# In this case, the latest C API library is available for download from
+# https://www.tensorflow.org/install/lang_c
+if (tensorflow_c_api)
+  set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available")
+  add_definitions("-DLLVM_HAVE_TF_API")
+  include_directories(${TENSORFLOW_C_LIB_PATH}/include)
+endif()
+
 # Put this before tblgen. Else we have a circular dependence.
 add_subdirectory(lib/Demangle)
 add_subdirectory(lib/Support)
diff --git a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
new file mode 100644
index 0000000000000..29a6f59146748
--- /dev/null
+++ b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
@@ -0,0 +1,35 @@
+//===- InlineSizeEstimatorAnalysis.h - ML size estimator --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
+#define LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Function;
+
+class TFModelEvaluator;
+class InlineSizeEstimatorAnalysis
+    : public AnalysisInfoMixin<InlineSizeEstimatorAnalysis> {
+public:
+  InlineSizeEstimatorAnalysis();
+  InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis &&);
+  ~InlineSizeEstimatorAnalysis();
+
+  static AnalysisKey Key;
+  using Result = Optional<size_t>;
+  Result run(const Function &F, FunctionAnalysisManager &FAM);
+  static bool isEvaluatorRequested();
+
+private:
+  std::unique_ptr<TFModelEvaluator> Evaluator;
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
\ No newline at end of file
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
new file mode 100644
index 0000000000000..a1d7108b149ff
--- /dev/null
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -0,0 +1,136 @@
+//===- TFUtils.h - utilities for tensorflow C API ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H
+#define LLVM_ANALYSIS_UTILS_TFUTILS_H
+
+#include "tensorflow/c/c_api.h"
+#include "llvm/IR/LLVMContext.h"
+
+#include <memory>
+#include <vector>
+
+namespace llvm {
+
+/// Load a SavedModel, find the given inputs and outputs, and setup storage
+/// for input tensors. The user is responsible for correctly dimensioning the
+/// input tensors and setting their values before calling evaluate().
+/// To initialize:
+/// - construct the object
+/// - initialize the input tensors using initInput. Indices must correspond to
+///   indices in the InputNames used at construction.
+/// To use:
+/// - set input values by using getInput to get each input tensor, and then
+///   setting internal scalars, for all dimensions (tensors are row-major:
+///   https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/c/c_api.h#L205)
+/// - prepare an output vector of TF_Output* type, with the correct number of
+/// outputs (i.e. same as OutputNames). Initialize the vector with nullptr
+/// values.
+/// - call evaluate. The input tensors' values are not consumed after this, and
+///   may still be read.
+/// - use the outputs in the output vector
+/// - deallocate each output tensor in the output vector, using TF_DeleteTensor.
+class TFModelEvaluator final {
+public:
+  /// The result of a model evaluation. Handles the lifetime of the output
+  /// TF_Tensor objects, which means that their values need to be used before
+  /// the EvaluationResult's dtor is called.
+  class EvaluationResult {
+  public:
+    ~EvaluationResult() {
+      for (auto *P : Output)
+        if (P)
+          TF_DeleteTensor(P);
+    }
+
+    EvaluationResult(const EvaluationResult &) = delete;
+    EvaluationResult(EvaluationResult &&Other)
+        : OutputSize(Other.OutputSize), Output(std::move(Other.Output)) {
+      Other.Output.clear();
+    };
+
+    /// Get a pointer to the first element of the tensor at Index.
+    template <typename T> T *getTensorValue(size_t Index) {
+      return static_cast<T *>(TF_TensorData(Output[Index]));
+    }
+
+  private:
+    friend class TFModelEvaluator;
+    EvaluationResult(size_t OutputSize)
+        : OutputSize(OutputSize), Output(OutputSize){};
+
+    const size_t OutputSize;
+    std::vector<TF_Tensor *> Output;
+  };
+
+  using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
+  using TFSessionOptionsPtr =
+      std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
+  using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
+
+  TFModelEvaluator(StringRef SavedModelPath,
+                   const std::vector<std::string> &InputNames,
+                   const std::vector<std::string> &OutputNames,
+                   const char *Tags = "serve");
+  ~TFModelEvaluator();
+  TFModelEvaluator(const TFModelEvaluator &) = delete;
+  TFModelEvaluator(TFModelEvaluator &&) = delete;
+
+  /// Evaluate the model, assuming it is valid. Returns None if the evaluation
+  /// fails or the model is invalid, or an EvaluationResult otherwise. The
+  /// inputs are assumed to have been already provided via getInput(). When
+  /// returning None, it also marks the object invalid. Pass an Output vector
+  /// with the same size as OutputNames, but with nullptr values. evaluate()
+  /// will populate it with tensors, matching in index the corresponding
+  /// OutputNames. The caller is responsible for the deallocation of those
+  /// tensors, using TF_DeleteTensor.
+  Optional<EvaluationResult> evaluate();
+
+  /// Provides access to the input vector. It is already dimensioned correctly,
+  /// but the values need to be allocated by the user.
+  std::vector<TF_Tensor *> &getInput() { return Input; }
+
+  /// Returns true if the tensorflow model was loaded successfully, false
+  /// otherwise.
+  bool isValid() const { return !!Session; }
+
+  /// Initialize the input at Index as a tensor of the given type and dimensions
+  void initInput(int Index, TF_DataType Type,
+                 const std::vector<int64_t> &Dimensions);
+
+private:
+  /// The objects necessary for carrying out an evaluation of the SavedModel.
+  /// They are expensive to set up, and we maintain them accross all the
+  /// evaluations of the model.
+  TF_Session *Session = nullptr;
+  TFGraphPtr Graph;
+  TFSessionOptionsPtr Options;
+
+  /// The specification of the input nodes.
+  std::vector<TF_Output> InputFeed;
+
+  /// The input tensors. They must match by index of the corresponding InputFeed
+  /// value. We set up the tensors once and just mutate theirs scalars before
+  /// each evaluation. The input tensors keep their value after an evaluation.
+  std::vector<TF_Tensor *> Input;
+
+  /// The specification of the output nodes. When evaluating, the tensors in the
+  /// output tensor vector must match by index the corresponding element in the
+  /// OutputFeed.
+  std::vector<TF_Output> OutputFeed;
+
+  /// Reusable utility for deleting the session.
+  void deleteSession();
+
+  /// Reusable utility for ensuring we can bind the requested Name to a node in
+  /// the SavedModel Graph.
+  bool checkReportAndReset(const TF_Output &Output, StringRef Name);
+};
+} // namespace llvm
+
+#endif // LLVM_ANALYSIS_UTILS_TFUTILS_H
\ No newline at end of file
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index a317579ecc836..703623396d96a 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -1,17 +1,35 @@
 set(CommonMLSources MLInlineAdvisor.cpp)
 set(ReleaseModeMLSources ReleaseModeModelRunner.cpp)
+set(DevelopmentModeMLSources TFUtils.cpp)
 
-if (DEFINED LLVM_HAVE_TF_AOT)
-  include(TensorFlowCompile)
-  tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
-  list(APPEND ReleaseModeMLSources
-    $<TARGET_OBJECTS:tf_xla_runtime_objects>
-    ${GENERATED_OBJS}
-  )
-  set(MLPolicySources ${CommonMLSources} ${ReleaseModeMLSources})
+if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API)
+  set(MLPolicySources ${CommonMLSources})
+  if (DEFINED LLVM_HAVE_TF_AOT)
+    include(TensorFlowCompile)
+    tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
+    list(APPEND ReleaseModeMLSources
+      $<TARGET_OBJECTS:tf_xla_runtime_objects>
+      ${GENERATED_OBJS}
+    )
+    LIST(APPEND MLPolicySources ${ReleaseModeMLSources})
+  else()
+    LIST(APPEND LLVM_OPTIONAL_SOURCES ${ReleaseModeMLSources})
+  endif()
+
+  if (DEFINED LLVM_HAVE_TF_API)
+    LIST(APPEND MLPolicySources ${DevelopmentModeMLSources})
+    LIST(APPEND MLLinkDeps ${tensorflow_c_api})
+  else()
+    LIST(APPEND LLVM_OPTIONAL_SOURCES ${DevelopmentModeMLSources})
+  endif()
 else()
-  set(LLVM_OPTIONAL_SOURCES ${CommonMLSources} ${ReleaseModeMLSources})
+  LIST(APPEND LLVM_OPTIONAL_SOURCES 
+    ${CommonMLSources}
+    ${DevelopmentModeMLSources}
+    ${ReleaseModeMLSources}
+    )
 endif()
+  
 
 add_llvm_component_library(LLVMAnalysis
   AliasAnalysis.cpp
@@ -57,6 +75,7 @@ add_llvm_component_library(LLVMAnalysis
   InlineCost.cpp
   InlineAdvisor.cpp
   InlineFeaturesAnalysis.cpp
+  InlineSizeEstimatorAnalysis.cpp
   InstCount.cpp
   InstructionPrecedenceTracking.cpp
   InstructionSimplify.cpp
@@ -124,4 +143,7 @@ add_llvm_component_library(LLVMAnalysis
 
   DEPENDS
   intrinsics_gen
+
+  LINK_LIBS
+  ${MLLinkDeps}
   )
diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
new file mode 100644
index 0000000000000..1d1952ae6cbbe
--- /dev/null
+++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@@ -0,0 +1,299 @@
+//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements feature and label extraction for offline supervised learning
+// of a IR to native size model.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
+
+#ifdef LLVM_HAVE_TF_API
+#include "llvm/Analysis/Utils/TFUtils.h"
+#endif
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <deque>
+
+using namespace llvm;
+
+AnalysisKey InlineSizeEstimatorAnalysis::Key;
+
+#define DEBUG_TYPE "inline-size-estimator"
+
+#ifdef LLVM_HAVE_TF_API
+cl::opt<std::string> TFIR2NativeModelPath(
+    "ml-inliner-ir2native-model", cl::Hidden,
+    cl::desc("Path to saved model evaluating native size from IR."));
+
+namespace {
+unsigned getMaxInstructionID() {
+#define LAST_OTHER_INST(NR) return NR;
+#include "llvm/IR/Instruction.def"
+}
+
+class IRToNativeSizeLearning {
+public:
+  enum class NamedFeatureIndex : size_t {
+    InitialSize,
+    Blocks,
+    Calls,
+    IsLocal,
+    IsLinkOnceODR,
+    IsLinkOnce,
+    Loops,
+    MaxLoopDepth,
+    MaxDomTreeLevel,
+
+    NumNamedFeatures
+  };
+  static const size_t NumNamedFeatures =
+      static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
+  struct FunctionFeatures {
+    static std::vector<std::pair<size_t, size_t>>
+        ImportantInstructionSuccessions;
+    static const size_t FeatureCount;
+
+    std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
+    std::vector<int32_t> InstructionHistogram;
+    std::vector<int32_t> InstructionPairHistogram;
+
+    void fillTensor(int32_t *Ptr) const;
+    int32_t &operator[](NamedFeatureIndex Pos) {
+      return NamedFeatures[static_cast<size_t>(Pos)];
+    }
+  };
+  IRToNativeSizeLearning() = default;
+
+  static FunctionFeatures getFunctionFeatures(Function &F,
+                                              FunctionAnalysisManager &FAM);
+
+private:
+  /// Sort once the feature tuples.
+  struct SortFeatureTuples {
+    bool IsSorted = false;
+    SortFeatureTuples() {
+      std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(),
+                FunctionFeatures::ImportantInstructionSuccessions.end());
+      IsSorted = true;
+    }
+  };
+
+  static llvm::ManagedStatic<SortFeatureTuples> TupleSorter;
+
+  static bool ensureSortedTuples() { return TupleSorter->IsSorted; }
+};
+llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples>
+    IRToNativeSizeLearning::TupleSorter;
+
+// This is a point in time - we determined including these pairs of
+// consecutive instructions (in the IR layout available at inline time) as
+// features improves the model performance. We want to move away from manual
+// feature selection.
+// The vector is given in opcode pairs rather than labels because 1) labels
+// weren't readily available, and 2) the successions were hand - extracted
+std::vector<std::pair<size_t, size_t>>
+    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions =
+        {{1, 34},  {15, 27}, {53, 53}, {53, 34}, {1, 11},  {32, 2},  {2, 48},
+         {28, 48}, {1, 45},  {49, 32}, {57, 56}, {55, 53}, {1, 28},  {57, 34},
+         {1, 1},   {32, 28}, {32, 15}, {49, 28}, {53, 1},  {2, 53},  {48, 34},
+         {28, 53}, {2, 32},  {1, 40},  {32, 48}, {29, 56}, {56, 32}, {55, 56},
+         {48, 56}, {1, 31},  {33, 34}, {2, 28},  {1, 12},  {55, 1},  {31, 31},
+         {65, 1},  {33, 56}, {32, 32}, {13, 13}, {1, 26},  {13, 26}, {2, 1},
+         {1, 33},  {47, 49}, {64, 1},  {2, 38},  {34, 53}, {48, 2},  {55, 34},
+         {34, 32}, {1, 5},   {56, 13}, {2, 2},   {2, 49},  {33, 2},  {49, 39},
+         {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
+         {47, 15}, {13, 34}, {2, 33},  {32, 49}, {49, 34}, {56, 33}, {1, 30},
+         {33, 33}, {31, 33}, {2, 29},  {56, 7},  {32, 13}, {2, 55},  {56, 56},
+         {2, 34},  {1, 42},  {34, 49}, {1, 20},  {32, 33}, {1, 25},  {53, 28},
+         {1, 14},  {31, 49}, {28, 2},  {2, 13},  {2, 56},  {1, 32},  {56, 53},
+         {65, 65}, {33, 53}, {64, 64}, {13, 2},  {34, 33}, {1, 4},   {49, 2},
+         {1, 9},   {56, 1},  {33, 1},  {53, 57}, {32, 53}, {13, 56}, {32, 56},
+         {55, 55}, {1, 18},  {49, 56}, {34, 34}, {1, 7},   {56, 64}, {32, 1},
+         {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
+         {32, 40}, {1, 29},  {53, 2},  {34, 1},  {32, 34}, {49, 49}, {1, 24},
+         {40, 34}, {1, 13},  {38, 34}, {29, 2},  {34, 2},  {1, 39},  {1, 22},
+         {1, 27},  {49, 1},  {1, 8},   {56, 2}};
+
+// We have: 9 calculated features (the features here); 1 feature for each
+// instruction opcode; and 1 feature for each manually-identified sequence.
+// For the latter 2, we build a histogram: we count the number of
+// occurrences of each instruction opcode or succession of instructions,
+// respectively.
+// Note that instruction opcodes start from 1. For convenience, we also have an
+// always 0 feature for the '0' opcode, hence the extra 1.
+const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
+    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions
+        .size() +
+    getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures;
+
+size_t getSize(Function &F, TargetTransformInfo &TTI) {
+  size_t Ret = 0;
+  for (auto &BB : F)
+    for (auto &I : BB)
+      Ret += TTI.getInstructionCost(
+          &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
+  return Ret;
+}
+
+size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
+  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+  return getSize(F, TTI);
+}
+
+unsigned getMaxDominatorTreeDepth(const Function &F,
+                                  const DominatorTree &Tree) {
+  unsigned Ret = 0;
+  for (auto &BB : F)
+    if (auto *TN = Tree.getNode(&BB))
+      Ret = std::max(Ret, TN->getLevel());
+  return Ret;
+}
+} // namespace
+
+IRToNativeSizeLearning::FunctionFeatures
+IRToNativeSizeLearning::getFunctionFeatures(Function &F,
+                                            FunctionAnalysisManager &FAM) {
+  assert(ensureSortedTuples() && "expected lazy initialization");
+
+  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
+  FunctionFeatures FF;
+  size_t InstrCount = getMaxInstructionID() + 1;
+  FF.InstructionHistogram.resize(InstrCount);
+
+  FF.InstructionPairHistogram.resize(
+      FunctionFeatures::ImportantInstructionSuccessions.size());
+
+  auto StartID = 0;
+  auto LastID = StartID;
+  auto getPairIndex = [](size_t a, size_t b) {
+    auto I =
+        std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(),
+                  FunctionFeatures::ImportantInstructionSuccessions.end(),
+                  std::make_pair(a, b));
+    if (I == FunctionFeatures::ImportantInstructionSuccessions.end())
+      return -1;
+    return static_cast<int>(std::distance(
+        FunctionFeatures::ImportantInstructionSuccessions.begin(), I));
+  };
+
+  // We don't want debug calls, because they'd just add noise.
+  for (auto &BB : F) {
+    for (auto I = BB.instructionsWithoutDebug().begin(),
+              E = BB.instructionsWithoutDebug().end();
+         I != E; ++I) {
+      auto ID = I->getOpcode();
+
+      ++FF.InstructionHistogram[ID];
+      int PairIndex = getPairIndex(LastID, ID);
+      if (PairIndex >= 0)
+        ++FF.InstructionPairHistogram[PairIndex];
+      LastID = ID;
+      if (isa<CallBase>(*I))
+        ++FF[NamedFeatureIndex::Calls];
+    }
+  }
+
+  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
+  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
+  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
+  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
+  FF[NamedFeatureIndex::Blocks] =
+      std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
+  auto &LI = FAM.getResult<LoopAnalysis>(F);
+  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
+  for (auto &L : LI)
+    FF[NamedFeatureIndex::MaxLoopDepth] =
+        std::max(FF[NamedFeatureIndex::MaxLoopDepth],
+                 static_cast<int32_t>(L->getLoopDepth()));
+  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
+  return FF;
+}
+
+void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
+  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
+  Ptr += NamedFeatures.size();
+  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
+  Ptr += InstructionHistogram.size();
+  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
+            Ptr);
+}
+
+bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
+  return !TFIR2NativeModelPath.empty();
+}
+
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
+  if (!isEvaluatorRequested()) {
+    return;
+  }
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  Evaluator = std::make_unique<TFModelEvaluator>(
+      TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
+  if (!Evaluator || !Evaluator->isValid()) {
+    Evaluator.reset();
+    return;
+  }
+  static const std::vector<int64_t> Dim{
+      1, static_cast<int64_t>(
+             IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};
+
+  Evaluator->initInput(0, TF_INT32, Dim);
+}
+
+InlineSizeEstimatorAnalysis::Result
+InlineSizeEstimatorAnalysis::run(const Function &F,
+                                 FunctionAnalysisManager &FAM) {
+  if (!Evaluator)
+    return None;
+  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
+      const_cast<Function &>(F), FAM);
+  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator->getInput()[0]));
+  Features.fillTensor(V);
+  auto ER = Evaluator->evaluate();
+  if (!ER)
+    return None;
+  float Ret = *ER->getTensorValue<float>(0);
+  if (Ret < 0.0)
+    Ret = 0.0;
+  return static_cast<size_t>(Ret);
+}
+
+InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
+    InlineSizeEstimatorAnalysis &&Other)
+    : Evaluator(std::move(Other.Evaluator)) {}
+
+#else
+namespace llvm {
+class TFModelEvaluator {};
+} // namespace llvm
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
+    InlineSizeEstimatorAnalysis &&) {}
+InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::Result
+InlineSizeEstimatorAnalysis::run(const Function &F,
+                                 FunctionAnalysisManager &FAM) {
+  return None;
+}
+bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
+#endif
\ No newline at end of file
diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp
new file mode 100644
index 0000000000000..6cd5b5c9b4eae
--- /dev/null
+++ b/llvm/lib/Analysis/TFUtils.cpp
@@ -0,0 +1,143 @@
+//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for interfacing with tensorflow C APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "tensorflow/c/c_api_experimental.h"
+
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+struct TFInitializer {
+  TFInitializer() {
+    assert(!IsInitialized && "TFInitialized should be called only once");
+    int Argc = 1;
+    const char *Name = "";
+    const char **NamePtr = &Name;
+    TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
+    IsInitialized = true;
+  }
+  bool IsInitialized = false;
+};
+
+llvm::ManagedStatic<TFInitializer> TFLibInitializer;
+
+bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
+
+TFModelEvaluator::TFGraphPtr createTFGraph() {
+  return TFModelEvaluator::TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
+}
+
+TFModelEvaluator::TFStatusPtr createTFStatus() {
+  return TFModelEvaluator::TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
+}
+
+TFModelEvaluator::TFSessionOptionsPtr createTFSessionOptions() {
+  return TFModelEvaluator::TFSessionOptionsPtr(TF_NewSessionOptions(),
+                                               &TF_DeleteSessionOptions);
+}
+} // namespace
+
+TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
+                                   const std::vector<std::string> &InputNames,
+                                   const std::vector<std::string> &OutputNames,
+                                   const char *Tags)
+    : Graph(createTFGraph()), Options(createTFSessionOptions()),
+      InputFeed(InputNames.size()), Input(InputNames.size()),
+      OutputFeed(OutputNames.size()) {
+  if (!ensureInitTF()) {
+    errs() << "Tensorflow should have been initialized";
+    return;
+  }
+  auto Status = createTFStatus();
+
+  Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr,
+                                         SavedModelPath.str().c_str(), &Tags, 1,
+                                         Graph.get(), nullptr, Status.get());
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
+    errs() << TF_Message(Status.get());
+    deleteSession();
+  }
+  for (size_t I = 0; I < InputNames.size(); ++I) {
+    InputFeed[I] = {
+        TF_GraphOperationByName(Graph.get(), (InputNames[I]).c_str()), 0};
+    if (!checkReportAndReset(InputFeed[I], InputNames[I]))
+      return;
+  }
+  for (size_t I = 0; I < OutputNames.size(); ++I) {
+    OutputFeed[I] = {
+        TF_GraphOperationByName(Graph.get(), (OutputNames[I]).c_str()), 0};
+    if (!checkReportAndReset(OutputFeed[I], OutputNames[I]))
+      return;
+  }
+}
+
+TFModelEvaluator::~TFModelEvaluator() {
+  for (auto *T : Input) {
+    TF_DeleteTensor(T);
+  }
+  deleteSession();
+}
+
+bool TFModelEvaluator::checkReportAndReset(const TF_Output &Output,
+                                           StringRef Name) {
+  if (Output.oper)
+    return true;
+  errs() << "Could not find TF_Output named: " + Name;
+  deleteSession();
+  return false;
+}
+
+void TFModelEvaluator::deleteSession() {
+  if (Session == nullptr)
+    return;
+  auto Status = createTFStatus();
+  TF_DeleteSession(Session, Status.get());
+  Session = nullptr;
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK)
+    errs() << "Could not delete TF session";
+}
+
+Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() {
+  if (!isValid())
+    return None;
+  EvaluationResult Ret(OutputFeed.size());
+  auto Status = createTFStatus();
+  TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), Input.size(),
+                OutputFeed.data(), Ret.Output.data(), Ret.Output.size(),
+                nullptr, 0, nullptr, Status.get());
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
+    errs() << TF_Message(Status.get());
+    deleteSession();
+    return None;
+  }
+  return Ret;
+}
+
+void TFModelEvaluator::initInput(int Index, TF_DataType Type,
+                                 const std::vector<int64_t> &Dimensions) {
+  int64_t TotalSize = TF_DataTypeSize(Type);
+  for (auto &D : Dimensions)
+    TotalSize *= D;
+
+  Input[Index] =
+      TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
+  std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
+}
\ No newline at end of file
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 771cdfd17aa54..7f57634676956 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -35,6 +35,7 @@
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/InlineAdvisor.h"
 #include "llvm/Analysis/InlineFeaturesAnalysis.h"
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index eb2b740db5612..dfdfc3d05976a 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -133,6 +133,7 @@ FUNCTION_ANALYSIS("loops", LoopAnalysis())
 FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
 FUNCTION_ANALYSIS("da", DependenceAnalysis())
 FUNCTION_ANALYSIS("inliner-features", InlineFeaturesAnalysis())
+FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis())
 FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis())
 FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis())
 FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis())
diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt
index 42f7dd3c06101..59ad444d32fb4 100644
--- a/llvm/unittests/Analysis/CMakeLists.txt
+++ b/llvm/unittests/Analysis/CMakeLists.txt
@@ -6,7 +6,13 @@ set(LLVM_LINK_COMPONENTS
   TransformUtils
   )
 
-add_llvm_unittest(AnalysisTests
+if (DEFINED LLVM_HAVE_TF_API)
+  LIST(APPEND EXTRA_TESTS TFUtilsTest.cpp)
+else()
+  LIST(APPEND LLVM_OPTIONAL_SOURCES TFUtilsTest.cpp)
+endif()
+
+add_llvm_unittest_with_input_files(AnalysisTests
   AliasAnalysisTest.cpp
   AliasSetTrackerTest.cpp
   AssumeBundleQueriesTest.cpp
@@ -22,6 +28,7 @@ add_llvm_unittest(AnalysisTests
   DomTreeUpdaterTest.cpp
   GlobalsModRefTest.cpp
   InlineFeaturesAnalysisTest.cpp
+  InlineSizeEstimatorAnalysisTest.cpp
   IVDescriptorsTest.cpp
   LazyCallGraphTest.cpp
   LoadsTest.cpp
@@ -40,4 +47,7 @@ add_llvm_unittest(AnalysisTests
   ValueLatticeTest.cpp
   ValueTrackingTest.cpp
   VectorUtilsTest.cpp
+  ${EXTRA_TESTS}
   )
+
+ target_link_libraries(AnalysisTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp b/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
new file mode 100644
index 0000000000000..377590be016ac
--- /dev/null
+++ b/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
@@ -0,0 +1,101 @@
+//===- InlineSizeEstimatorAnalysisTest.cpp - test for ir2native -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+extern const char *TestMainArgv0;
+extern cl::opt<std::string> TFIR2NativeModelPath;
+
+#if LLVM_HAVE_TF_API
+static std::string getModelPath() {
+  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
+  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
+  return std::string(InputsDir);
+}
+#endif
+
+static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+  SMDiagnostic Err;
+  std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
+  if (!Mod)
+    Err.print("MLAnalysisTests", errs());
+  return Mod;
+}
+
+static FunctionAnalysisManager buildFAM() {
+  FunctionAnalysisManager FAM;
+  FAM.registerPass([&] { return DominatorTreeAnalysis(); });
+  FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
+  FAM.registerPass([&] { return TargetIRAnalysis(); });
+  FAM.registerPass([&] { return LoopAnalysis(); });
+  return FAM;
+}
+
+// Test model loading and evaluation.
+TEST(InlineSizeEstimatorAnalysis, SizeIsValidTest) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = parseIR(C,
+                                      R"IR(
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare i32 @f1(i32)
+declare i32 @f2(i32)
+
+define i32 @branches(i32) {
+  %cond = icmp slt i32 %0, 3
+  br i1 %cond, label %then, label %else
+
+then:
+  %ret.1 = call i32 @f1(i32 %0)
+  br label %last.block
+
+else:
+  %ret.2 = call i32 @f2(i32 %0)
+  br label %last.block
+
+last.block:
+  %ret = phi i32 [%ret.1, %then], [%ret.2, %else]
+  ret i32 %ret
+}
+
+define internal i32 @top() {
+  %1 = call i32 @branches(i32 2)
+  %2 = call i32 @f1(i32 %1)
+  ret i32 %2
+}
+)IR");
+
+  FunctionAnalysisManager FAM = buildFAM();
+#if LLVM_HAVE_TF_API
+  TFIR2NativeModelPath = getModelPath();
+#endif
+
+  InlineSizeEstimatorAnalysis FA;
+  auto SizeEstimate = FA.run(*M->getFunction("branches"), FAM);
+#if LLVM_HAVE_TF_API
+  EXPECT_GT(*SizeEstimate, 0);
+#else
+  EXPECT_FALSE(SizeEstimate.hasValue());
+#endif
+}
diff --git a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
new file mode 100644
index 0000000000000..6efdad51083d3
--- /dev/null
+++ b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
@@ -0,0 +1,10596 @@
+saved_model_schema_version: 1
+meta_graphs {
+  meta_info_def {
+    stripped_op_list {
+      op {
+        name: "Const"
+        output_arg {
+          name: "output"
+          type_attr: "dtype"
+        }
+        attr {
+          name: "value"
+          type: "tensor"
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+      }
+      op {
+        name: "NoOp"
+      }
+      op {
+        name: "Placeholder"
+        output_arg {
+          name: "output"
+          type_attr: "dtype"
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+        attr {
+          name: "shape"
+          type: "shape"
+          default_value {
+            shape {
+              unknown_rank: true
+            }
+          }
+        }
+      }
+      op {
+        name: "ReadVariableOp"
+        input_arg {
+          name: "resource"
+          type: DT_RESOURCE
+        }
+        output_arg {
+          name: "value"
+          type_attr: "dtype"
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+        is_stateful: true
+      }
+      op {
+        name: "StatefulPartitionedCall"
+        input_arg {
+          name: "args"
+          type_list_attr: "Tin"
+        }
+        output_arg {
+          name: "output"
+          type_list_attr: "Tout"
+        }
+        attr {
+          name: "Tin"
+          type: "list(type)"
+          has_minimum: true
+        }
+        attr {
+          name: "Tout"
+          type: "list(type)"
+          has_minimum: true
+        }
+        attr {
+          name: "f"
+          type: "func"
+        }
+        attr {
+          name: "config"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "config_proto"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "executor_type"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        is_stateful: true
+      }
+      op {
+        name: "VarHandleOp"
+        output_arg {
+          name: "resource"
+          type: DT_RESOURCE
+        }
+        attr {
+          name: "container"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "shared_name"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+        attr {
+          name: "shape"
+          type: "shape"
+        }
+        is_stateful: true
+      }
+    }
+    tags: "serve"
+    tensorflow_version: "1.15.0"
+    tensorflow_git_version: "unknown"
+    stripped_default_attrs: true
+  }
+  graph_def {
+    node {
+      name: "dense/kernel"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 214
+            }
+            dim {
+              size: 100
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "dense/kernel"
+        }
+      }
+    }
+    node {
+      name: "dense/kernel/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "dense/kernel"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 214
+              }
+              dim {
+                size: 100
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "dense/bias"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 100
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "dense/bias"
+        }
+      }
+    }
+    node {
+      name: "dense/bias/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "dense/bias"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 100
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "dense_1/kernel"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 100
+            }
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "dense_1/kernel"
+        }
+      }
+    }
+    node {
+      name: "dense_1/kernel/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "dense_1/kernel"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 100
+              }
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "dense_1/bias"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "dense_1/bias"
+        }
+      }
+    }
+    node {
+      name: "dense_1/bias/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "dense_1/bias"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "total"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "total"
+        }
+      }
+    }
+    node {
+      name: "total/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "total"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "count"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "count"
+        }
+      }
+    }
+    node {
+      name: "count/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "count"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "total_1"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "total_1"
+        }
+      }
+    }
+    node {
+      name: "total_1/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "total_1"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "count_1"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "count_1"
+        }
+      }
+    }
+    node {
+      name: "count_1/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "count_1"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "NoOp"
+      op: "NoOp"
+    }
+    node {
+      name: "Const"
+      op: "Const"
+      device: "/device:CPU:0"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+            }
+            string_val: "\n\277\001\n\030\010\001\022\024layer_with_weights-0\n\013\010\001\022\007layer-0\n\030\010\002\022\024layer_with_weights-1\n\013\010\002\022\007layer-1\n\r\010\003\022\toptimizer\n\031\010\004\022\025regularization_losses\n\r\010\005\022\tvariables\n\027\010\006\022\023trainable_variables\n\r\010\007\022\tkeras_api\n\016\010\010\022\nsignatures\nh\n\n\010\t\022\006kernel\n\010\010\n\022\004bias\n\031\010\013\022\025regularization_losses\n\r\010\014\022\tvariables\n\027\010\r\022\023trainable_variables\n\r\010\016\022\tkeras_api\nh\n\n\010\017\022\006kernel\n\010\010\020\022\004bias\n\031\010\021\022\025regularization_losses\n\r\010\022\022\tvariables\n\027\010\023\022\023trainable_variables\n\r\010\024\022\tkeras_api\n\000\n\000\n\034\n\005\010\t\022\0010\n\005\010\n\022\0011\n\005\010\017\022\0012\n\005\010\020\022\0013\n\034\n\005\010\t\022\0010\n\005\010\n\022\0011\n\005\010\017\022\0012\n\005\010\020\022\0013\n\255\001\n\n\010\025\022\006layers\n\037\010\026\022\033layer_regularization_losses\n\033\010\027\022\027non_trainable_variables\n\021\010\030\022\rlayer_metrics\n\031\010\004\022\025regularization_losses\n\013\010\031\022\007metrics\n\r\010\005\022\tvariables\n\027\010\006\022\023trainable_variables\n\000\nX\022V\n\016VARIABLE_VALUE\022\014dense/kernel\0326layer_with_weights-0/kernel/.ATTRIBUTES/VARIABLE_VALUE\nT\022R\n\016VARIABLE_VALUE\022\ndense/bias\0324layer_with_weights-0/bias/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\016\n\005\010\t\022\0010\n\005\010\n\022\0011\n\016\n\005\010\t\022\0010\n\005\010\n\022\0011\n\255\001\n\n\010\032\022\006layers\n\037\010\033\022\033layer_regularization_losses\n\033\010\034\022\027non_trainable_variables\n\021\010\035\022\rlayer_metrics\n\031\010\013\022\025regularization_losses\n\013\010\036\022\007metrics\n\r\010\014\022\tvariables\n\027\010\r\022\023trainable_variables\nZ\022X\n\016VARIABLE_VALUE\022\016dense_1/kernel\0326layer_with_weights-1/kernel/.ATTRIBUTES/VARIABLE_VALUE\nV\022T\n\016VARIABLE_VALUE\022\014dense_1/bias\0324layer_with_weights-1/bias/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\016\n\005\010\017\022\0010\n\005\010\020\022\0011\n\016\n\005\010\017\022\0010\n\005\010\020\022\0011\n\255\001\n\n\010\037\022\006layers\n\037\010 \022\033layer_regularization_losses\n\033\010!\022\027non_trainable_variables\n\021\010\"\022\rlayer_metrics\n\031\010\021\022\025regularization_losses\n\013\010#\022\007metrics\n\r\010\022\022\tvariables\n\027\010\023\022\023trainable_variables\n\016\n\005\010\001\022\0010\n\005\010\002\022\0011\n\000\n\000\n\000\n\016\n\005\010$\022\0010\n\005\010%\022\0011\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n4\n\t\010&\022\005total\n\t\010\'\022\005count\n\r\010(\022\tvariables\n\r\010)\022\tkeras_api\nD\n\t\010*\022\005total\n\t\010+\022\005count\n\016\010,\022\n_fn_kwargs\n\r\010-\022\tvariables\n\r\010.\022\tkeras_api\nO\022M\n\016VARIABLE_VALUE\022\005total\0324keras_api/metrics/0/total/.ATTRIBUTES/VARIABLE_VALUE\nO\022M\n\016VARIABLE_VALUE\022\005count\0324keras_api/metrics/0/count/.ATTRIBUTES/VARIABLE_VALUE\n\016\n\005\010&\022\0010\n\005\010\'\022\0011\n\017\n\r\010(\022\tvariables\nQ\022O\n\016VARIABLE_VALUE\022\007total_1\0324keras_api/metrics/1/total/.ATTRIBUTES/VARIABLE_VALUE\nQ\022O\n\016VARIABLE_VALUE\022\007count_1\0324keras_api/metrics/1/count/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\016\n\005\010*\022\0010\n\005\010+\022\0011\n\017\n\r\010-\022\tvariables"
+          }
+        }
+      }
+    }
+    node {
+      name: "serving_default_input_1"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 214
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: -1
+            }
+            dim {
+              size: 214
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "StatefulPartitionedCall"
+      op: "StatefulPartitionedCall"
+      input: "serving_default_input_1"
+      input: "dense/kernel"
+      input: "dense/bias"
+      input: "dense_1/kernel"
+      input: "dense_1/bias"
+      attr {
+        key: "Tin"
+        value {
+          list {
+            type: DT_INT32
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+            type: DT_FLOAT
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+            i: 1
+            i: 2
+            i: 3
+            i: 4
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference_signature_wrapper_6671"
+          }
+        }
+      }
+    }
+    node {
+      name: "saver_filename"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+    }
+    node {
+      name: "StatefulPartitionedCall_1"
+      op: "StatefulPartitionedCall"
+      input: "saver_filename"
+      input: "dense/kernel/Read/ReadVariableOp"
+      input: "dense/bias/Read/ReadVariableOp"
+      input: "dense_1/kernel/Read/ReadVariableOp"
+      input: "dense_1/bias/Read/ReadVariableOp"
+      input: "total/Read/ReadVariableOp"
+      input: "count/Read/ReadVariableOp"
+      input: "total_1/Read/ReadVariableOp"
+      input: "count_1/Read/ReadVariableOp"
+      input: "Const"
+      attr {
+        key: "Tin"
+        value {
+          list {
+            type: DT_STRING
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_STRING
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+            type: DT_STRING
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference__traced_save_6824"
+          }
+        }
+      }
+    }
+    node {
+      name: "StatefulPartitionedCall_2"
+      op: "StatefulPartitionedCall"
+      input: "saver_filename"
+      input: "dense/kernel"
+      input: "dense/bias"
+      input: "dense_1/kernel"
+      input: "dense_1/bias"
+      input: "total"
+      input: "count"
+      input: "total_1"
+      input: "count_1"
+      attr {
+        key: "Tin"
+        value {
+          list {
+            type: DT_STRING
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+            type: DT_STRING
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference__traced_restore_6860"
+          }
+        }
+      }
+    }
+    library {
+      function {
+        signature {
+          name: "__inference__traced_restore_6860"
+          input_arg {
+            name: "file_prefix"
+            type: DT_STRING
+          }
+          input_arg {
+            name: "assignvariableop_dense_kernel"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_1_dense_bias"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_2_dense_1_kernel"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_3_dense_1_bias"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_4_total"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_5_count"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_6_total_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_7_count_1"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity_9"
+            type: DT_STRING
+          }
+          is_stateful: true
+          control_output: "AssignVariableOp"
+          control_output: "AssignVariableOp_1"
+          control_output: "AssignVariableOp_2"
+          control_output: "AssignVariableOp_3"
+          control_output: "AssignVariableOp_4"
+          control_output: "AssignVariableOp_5"
+          control_output: "AssignVariableOp_6"
+          control_output: "AssignVariableOp_7"
+          control_output: "RestoreV2"
+          control_output: "RestoreV2_1"
+        }
+        node_def {
+          name: "RestoreV2/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 8
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 8
+                  }
+                }
+                string_val: "layer_with_weights-0/kernel/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-0/bias/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-1/kernel/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-1/bias/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/0/total/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/0/count/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/1/total/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/1/count/.ATTRIBUTES/VARIABLE_VALUE"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2/tensor_names"
+          }
+        }
+        node_def {
+          name: "RestoreV2/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 8
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 8
+                  }
+                }
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "RestoreV2"
+          op: "RestoreV2"
+          input: "file_prefix"
+          input: "RestoreV2/tensor_names:output:0"
+          input: "RestoreV2/shape_and_slices:output:0"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "RestoreV2:tensors:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp"
+          op: "AssignVariableOp"
+          input: "assignvariableop_dense_kernel"
+          input: "Identity:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp"
+          }
+        }
+        node_def {
+          name: "Identity_1"
+          op: "Identity"
+          input: "RestoreV2:tensors:1"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_1"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_1"
+          op: "AssignVariableOp"
+          input: "assignvariableop_1_dense_bias"
+          input: "Identity_1:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_1"
+          }
+        }
+        node_def {
+          name: "Identity_2"
+          op: "Identity"
+          input: "RestoreV2:tensors:2"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_2"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_2"
+          op: "AssignVariableOp"
+          input: "assignvariableop_2_dense_1_kernel"
+          input: "Identity_2:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_2"
+          }
+        }
+        node_def {
+          name: "Identity_3"
+          op: "Identity"
+          input: "RestoreV2:tensors:3"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_3"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_3"
+          op: "AssignVariableOp"
+          input: "assignvariableop_3_dense_1_bias"
+          input: "Identity_3:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_3"
+          }
+        }
+        node_def {
+          name: "Identity_4"
+          op: "Identity"
+          input: "RestoreV2:tensors:4"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_4"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_4"
+          op: "AssignVariableOp"
+          input: "assignvariableop_4_total"
+          input: "Identity_4:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_4"
+          }
+        }
+        node_def {
+          name: "Identity_5"
+          op: "Identity"
+          input: "RestoreV2:tensors:5"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_5"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_5"
+          op: "AssignVariableOp"
+          input: "assignvariableop_5_count"
+          input: "Identity_5:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_5"
+          }
+        }
+        node_def {
+          name: "Identity_6"
+          op: "Identity"
+          input: "RestoreV2:tensors:6"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_6"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_6"
+          op: "AssignVariableOp"
+          input: "assignvariableop_6_total_1"
+          input: "Identity_6:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_6"
+          }
+        }
+        node_def {
+          name: "Identity_7"
+          op: "Identity"
+          input: "RestoreV2:tensors:7"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_7"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_7"
+          op: "AssignVariableOp"
+          input: "assignvariableop_7_count_1"
+          input: "Identity_7:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_7"
+          }
+        }
+        node_def {
+          name: "RestoreV2_1/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: "_CHECKPOINTABLE_OBJECT_GRAPH"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2_1/tensor_names"
+          }
+        }
+        node_def {
+          name: "RestoreV2_1/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2_1/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "RestoreV2_1"
+          op: "RestoreV2"
+          input: "file_prefix"
+          input: "RestoreV2_1/tensor_names:output:0"
+          input: "RestoreV2_1/shape_and_slices:output:0"
+          input: "^RestoreV2"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_STRING
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2_1"
+          }
+        }
+        node_def {
+          name: "NoOp"
+          op: "NoOp"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "NoOp"
+          }
+        }
+        node_def {
+          name: "Identity_8"
+          op: "Identity"
+          input: "file_prefix"
+          input: "^AssignVariableOp"
+          input: "^AssignVariableOp_1"
+          input: "^AssignVariableOp_2"
+          input: "^AssignVariableOp_3"
+          input: "^AssignVariableOp_4"
+          input: "^AssignVariableOp_5"
+          input: "^AssignVariableOp_6"
+          input: "^AssignVariableOp_7"
+          input: "^NoOp"
+          device: "/device:CPU:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_8"
+          }
+        }
+        node_def {
+          name: "Identity_9"
+          op: "Identity"
+          input: "Identity_8:output:0"
+          input: "^AssignVariableOp"
+          input: "^AssignVariableOp_1"
+          input: "^AssignVariableOp_2"
+          input: "^AssignVariableOp_3"
+          input: "^AssignVariableOp_4"
+          input: "^AssignVariableOp_5"
+          input: "^AssignVariableOp_6"
+          input: "^AssignVariableOp_7"
+          input: "^RestoreV2"
+          input: "^RestoreV2_1"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_9"
+          }
+        }
+        ret {
+          key: "identity_9"
+          value: "Identity_9:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "AssignVariableOp"
+          value: "AssignVariableOp"
+        }
+        control_ret {
+          key: "AssignVariableOp_1"
+          value: "AssignVariableOp_1"
+        }
+        control_ret {
+          key: "AssignVariableOp_2"
+          value: "AssignVariableOp_2"
+        }
+        control_ret {
+          key: "AssignVariableOp_3"
+          value: "AssignVariableOp_3"
+        }
+        control_ret {
+          key: "AssignVariableOp_4"
+          value: "AssignVariableOp_4"
+        }
+        control_ret {
+          key: "AssignVariableOp_5"
+          value: "AssignVariableOp_5"
+        }
+        control_ret {
+          key: "AssignVariableOp_6"
+          value: "AssignVariableOp_6"
+        }
+        control_ret {
+          key: "AssignVariableOp_7"
+          value: "AssignVariableOp_7"
+        }
+        control_ret {
+          key: "RestoreV2"
+          value: "RestoreV2"
+        }
+        control_ret {
+          key: "RestoreV2_1"
+          value: "RestoreV2_1"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "file_prefix"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_fn_6629"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "input_1"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+                i: 3
+                i: 4
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_sequential_layer_call_and_return_conditional_losses_6618"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6587"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_6555"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_6557"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6581"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6583"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "dense/StatefulPartitionedCall"
+          control_output: "dense_1/StatefulPartitionedCall"
+        }
+        node_def {
+          name: "dense/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "input_1"
+          input: "dense_6555"
+          input: "dense_6557"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "dense_1/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "dense/StatefulPartitionedCall:output:0"
+          input: "dense_1_6581"
+          input: "dense_1_6583"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/StatefulPartitionedCall:output:0"
+          input: "^dense/StatefulPartitionedCall"
+          input: "^dense_1/StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "dense/StatefulPartitionedCall"
+          value: "dense/StatefulPartitionedCall"
+        }
+        control_ret {
+          key: "dense_1/StatefulPartitionedCall"
+          value: "dense_1/StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6618"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_6607"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_6609"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6612"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6614"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "dense/StatefulPartitionedCall"
+          control_output: "dense_1/StatefulPartitionedCall"
+        }
+        node_def {
+          name: "dense/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "dense_6607"
+          input: "dense_6609"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "dense_1/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "dense/StatefulPartitionedCall:output:0"
+          input: "dense_1_6612"
+          input: "dense_1_6614"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/StatefulPartitionedCall:output:0"
+          input: "^dense/StatefulPartitionedCall"
+          input: "^dense_1/StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "dense/StatefulPartitionedCall"
+          value: "dense/StatefulPartitionedCall"
+        }
+        control_ret {
+          key: "dense_1/StatefulPartitionedCall"
+          value: "dense_1/StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_fn_6656"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "input_1"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+                i: 3
+                i: 4
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_sequential_layer_call_and_return_conditional_losses_6645"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_1_layer_call_and_return_conditional_losses_6764"
+          input_arg {
+            name: "inputs"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "MatMul"
+          op: "MatMul"
+          input: "inputs"
+          input: "MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul"
+          }
+        }
+        node_def {
+          name: "BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "BiasAdd"
+          op: "BiasAdd"
+          input: "MatMul:product:0"
+          input: "BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_layer_call_fn_6754"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "unknown"
+          input: "unknown_0"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference__traced_save_6824"
+          input_arg {
+            name: "file_prefix"
+            type: DT_STRING
+          }
+          input_arg {
+            name: "savev2_dense_kernel_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_dense_bias_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_dense_1_kernel_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_dense_1_bias_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_total_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_count_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_total_1_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_count_1_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_1_const"
+            type: DT_STRING
+          }
+          output_arg {
+            name: "identity_1"
+            type: DT_STRING
+          }
+          is_stateful: true
+          control_output: "MergeV2Checkpoints"
+          control_output: "SaveV2"
+          control_output: "SaveV2_1"
+        }
+        node_def {
+          name: "StaticRegexFullMatch"
+          op: "StaticRegexFullMatch"
+          input: "file_prefix"
+          device: "/device:CPU:*"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "pattern"
+            value {
+              s: "^s3://.*"
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StaticRegexFullMatch"
+          }
+        }
+        node_def {
+          name: "Const"
+          op: "Const"
+          device: "/device:CPU:*"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                }
+                string_val: ".part"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Const"
+          }
+        }
+        node_def {
+          name: "Const_1"
+          op: "Const"
+          device: "/device:CPU:*"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                }
+                string_val: "_temp_6f1e5fef49bb4c06ace07a8a95dfbb1b/part"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Const_1"
+          }
+        }
+        node_def {
+          name: "Select"
+          op: "Select"
+          input: "StaticRegexFullMatch:output:0"
+          input: "Const:output:0"
+          input: "Const_1:output:0"
+          device: "/device:CPU:*"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Select"
+          }
+        }
+        node_def {
+          name: "StringJoin"
+          op: "StringJoin"
+          input: "file_prefix"
+          input: "Select:output:0"
+          device: "/device:CPU:*"
+          attr {
+            key: "N"
+            value {
+              i: 2
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StringJoin"
+          }
+        }
+        node_def {
+          name: "num_shards"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 2
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "num_shards"
+          }
+        }
+        node_def {
+          name: "ShardedFilename/shard"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename/shard"
+          }
+        }
+        node_def {
+          name: "ShardedFilename"
+          op: "ShardedFilename"
+          input: "StringJoin:output:0"
+          input: "ShardedFilename/shard:output:0"
+          input: "num_shards:output:0"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename"
+          }
+        }
+        node_def {
+          name: "SaveV2/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 8
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 8
+                  }
+                }
+                string_val: "layer_with_weights-0/kernel/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-0/bias/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-1/kernel/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-1/bias/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/0/total/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/0/count/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/1/total/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/1/count/.ATTRIBUTES/VARIABLE_VALUE"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2/tensor_names"
+          }
+        }
+        node_def {
+          name: "SaveV2/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 8
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 8
+                  }
+                }
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "SaveV2"
+          op: "SaveV2"
+          input: "ShardedFilename:filename:0"
+          input: "SaveV2/tensor_names:output:0"
+          input: "SaveV2/shape_and_slices:output:0"
+          input: "savev2_dense_kernel_read_readvariableop"
+          input: "savev2_dense_bias_read_readvariableop"
+          input: "savev2_dense_1_kernel_read_readvariableop"
+          input: "savev2_dense_1_bias_read_readvariableop"
+          input: "savev2_total_read_readvariableop"
+          input: "savev2_count_read_readvariableop"
+          input: "savev2_total_1_read_readvariableop"
+          input: "savev2_count_1_read_readvariableop"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2"
+          }
+        }
+        node_def {
+          name: "ShardedFilename_1/shard"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename_1/shard"
+          }
+        }
+        node_def {
+          name: "ShardedFilename_1"
+          op: "ShardedFilename"
+          input: "StringJoin:output:0"
+          input: "ShardedFilename_1/shard:output:0"
+          input: "num_shards:output:0"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename_1"
+          }
+        }
+        node_def {
+          name: "SaveV2_1/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: "_CHECKPOINTABLE_OBJECT_GRAPH"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2_1/tensor_names"
+          }
+        }
+        node_def {
+          name: "SaveV2_1/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2_1/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "SaveV2_1"
+          op: "SaveV2"
+          input: "ShardedFilename_1:filename:0"
+          input: "SaveV2_1/tensor_names:output:0"
+          input: "SaveV2_1/shape_and_slices:output:0"
+          input: "savev2_1_const"
+          input: "^SaveV2"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_STRING
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2_1"
+          }
+        }
+        node_def {
+          name: "MergeV2Checkpoints/checkpoint_prefixes"
+          op: "Pack"
+          input: "ShardedFilename:filename:0"
+          input: "ShardedFilename_1:filename:0"
+          input: "^SaveV2"
+          input: "^SaveV2_1"
+          device: "/device:CPU:0"
+          attr {
+            key: "N"
+            value {
+              i: 2
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 2
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MergeV2Checkpoints/checkpoint_prefixes"
+          }
+        }
+        node_def {
+          name: "MergeV2Checkpoints"
+          op: "MergeV2Checkpoints"
+          input: "MergeV2Checkpoints/checkpoint_prefixes:output:0"
+          input: "file_prefix"
+          input: "^SaveV2_1"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MergeV2Checkpoints"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "file_prefix"
+          input: "^MergeV2Checkpoints"
+          device: "/device:CPU:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        node_def {
+          name: "Identity_1"
+          op: "Identity"
+          input: "Identity:output:0"
+          input: "^MergeV2Checkpoints"
+          input: "^SaveV2"
+          input: "^SaveV2_1"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_1"
+          }
+        }
+        ret {
+          key: "identity_1"
+          value: "Identity_1:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+              }
+              shape {
+                dim {
+                  size: 214
+                }
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                dim {
+                  size: 100
+                }
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+              }
+              shape {
+              }
+              shape {
+              }
+              shape {
+              }
+              shape {
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "MergeV2Checkpoints"
+          value: "MergeV2Checkpoints"
+        }
+        control_ret {
+          key: "SaveV2"
+          value: "SaveV2"
+        }
+        control_ret {
+          key: "SaveV2_1"
+          value: "SaveV2_1"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "file_prefix"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 214
+                    }
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 100
+                    }
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 9
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6689"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "dense/Cast"
+          op: "Cast"
+          input: "inputs"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 214
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/Cast"
+          }
+        }
+        node_def {
+          name: "dense/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 214
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense/MatMul"
+          op: "MatMul"
+          input: "dense/Cast:y:0"
+          input: "dense/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/MatMul"
+          }
+        }
+        node_def {
+          name: "dense/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense/BiasAdd"
+          op: "BiasAdd"
+          input: "dense/MatMul:product:0"
+          input: "dense/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/BiasAdd"
+          }
+        }
+        node_def {
+          name: "dense/Relu"
+          op: "Relu"
+          input: "dense/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/Relu"
+          }
+        }
+        node_def {
+          name: "dense_1/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_1_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense_1/MatMul"
+          op: "MatMul"
+          input: "dense/Relu:activations:0"
+          input: "dense_1/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/MatMul"
+          }
+        }
+        node_def {
+          name: "dense_1/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_1_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense_1/BiasAdd"
+          op: "BiasAdd"
+          input: "dense_1/MatMul:product:0"
+          input: "dense_1/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/BiasAdd"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_layer_call_and_return_conditional_losses_6745"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "Cast"
+          op: "Cast"
+          input: "inputs"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 214
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Cast"
+          }
+        }
+        node_def {
+          name: "MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 214
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "MatMul"
+          op: "MatMul"
+          input: "Cast:y:0"
+          input: "MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul"
+          }
+        }
+        node_def {
+          name: "BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "BiasAdd"
+          op: "BiasAdd"
+          input: "MatMul:product:0"
+          input: "BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd"
+          }
+        }
+        node_def {
+          name: "Relu"
+          op: "Relu"
+          input: "BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Relu"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "Relu:activations:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_1_layer_call_fn_6773"
+          input_arg {
+            name: "inputs"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "unknown"
+          input: "unknown_0"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference__wrapped_model_6528"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "sequential_dense_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "sequential_dense_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "sequential_dense_1_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "sequential_dense_1_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "sequential/dense/Cast"
+          op: "Cast"
+          input: "input_1"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 214
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/Cast"
+          }
+        }
+        node_def {
+          name: "sequential/dense/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "sequential_dense_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 214
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "sequential/dense/MatMul"
+          op: "MatMul"
+          input: "sequential/dense/Cast:y:0"
+          input: "sequential/dense/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/MatMul"
+          }
+        }
+        node_def {
+          name: "sequential/dense/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "sequential_dense_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "sequential/dense/BiasAdd"
+          op: "BiasAdd"
+          input: "sequential/dense/MatMul:product:0"
+          input: "sequential/dense/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/BiasAdd"
+          }
+        }
+        node_def {
+          name: "sequential/dense/Relu"
+          op: "Relu"
+          input: "sequential/dense/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/Relu"
+          }
+        }
+        node_def {
+          name: "sequential/dense_1/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "sequential_dense_1_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense_1/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "sequential/dense_1/MatMul"
+          op: "MatMul"
+          input: "sequential/dense/Relu:activations:0"
+          input: "sequential/dense_1/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense_1/MatMul"
+          }
+        }
+        node_def {
+          name: "sequential/dense_1/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "sequential_dense_1_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense_1/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "sequential/dense_1/BiasAdd"
+          op: "BiasAdd"
+          input: "sequential/dense_1/MatMul:product:0"
+          input: "sequential/dense_1/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense_1/BiasAdd"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "sequential/dense_1/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "Cast"
+          op: "Cast"
+          input: "inputs"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 214
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Cast"
+          }
+        }
+        node_def {
+          name: "MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 214
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "MatMul"
+          op: "MatMul"
+          input: "Cast:y:0"
+          input: "MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul"
+          }
+        }
+        node_def {
+          name: "BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "BiasAdd"
+          op: "BiasAdd"
+          input: "MatMul:product:0"
+          input: "BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd"
+          }
+        }
+        node_def {
+          name: "Relu"
+          op: "Relu"
+          input: "BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Relu"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "Relu:activations:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6601"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_6590"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_6592"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6595"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6597"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "dense/StatefulPartitionedCall"
+          control_output: "dense_1/StatefulPartitionedCall"
+        }
+        node_def {
+          name: "dense/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "input_1"
+          input: "dense_6590"
+          input: "dense_6592"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "dense_1/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "dense/StatefulPartitionedCall:output:0"
+          input: "dense_1_6595"
+          input: "dense_1_6597"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/StatefulPartitionedCall:output:0"
+          input: "^dense/StatefulPartitionedCall"
+          input: "^dense_1/StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "dense/StatefulPartitionedCall"
+          value: "dense/StatefulPartitionedCall"
+        }
+        control_ret {
+          key: "dense_1/StatefulPartitionedCall"
+          value: "dense_1/StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_fn_6733"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+                i: 3
+                i: 4
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_sequential_layer_call_and_return_conditional_losses_6645"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6645"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_6634"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_6636"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6639"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6641"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "dense/StatefulPartitionedCall"
+          control_output: "dense_1/StatefulPartitionedCall"
+        }
+        node_def {
+          name: "dense/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "dense_6634"
+          input: "dense_6636"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "dense_1/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "dense/StatefulPartitionedCall:output:0"
+          input: "dense_1_6639"
+          input: "dense_1_6641"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/StatefulPartitionedCall:output:0"
+          input: "^dense/StatefulPartitionedCall"
+          input: "^dense_1/StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "dense/StatefulPartitionedCall"
+          value: "dense/StatefulPartitionedCall"
+        }
+        control_ret {
+          key: "dense_1/StatefulPartitionedCall"
+          value: "dense_1/StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+          input_arg {
+            name: "inputs"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "MatMul"
+          op: "MatMul"
+          input: "inputs"
+          input: "MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul"
+          }
+        }
+        node_def {
+          name: "BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "BiasAdd"
+          op: "BiasAdd"
+          input: "MatMul:product:0"
+          input: "BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_signature_wrapper_6671"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "input_1"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+                i: 3
+                i: 4
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference__wrapped_model_6528"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_fn_6720"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+                i: 3
+                i: 4
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_sequential_layer_call_and_return_conditional_losses_6618"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6707"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "dense/Cast"
+          op: "Cast"
+          input: "inputs"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 214
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/Cast"
+          }
+        }
+        node_def {
+          name: "dense/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 214
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense/MatMul"
+          op: "MatMul"
+          input: "dense/Cast:y:0"
+          input: "dense/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/MatMul"
+          }
+        }
+        node_def {
+          name: "dense/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense/BiasAdd"
+          op: "BiasAdd"
+          input: "dense/MatMul:product:0"
+          input: "dense/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/BiasAdd"
+          }
+        }
+        node_def {
+          name: "dense/Relu"
+          op: "Relu"
+          input: "dense/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/Relu"
+          }
+        }
+        node_def {
+          name: "dense_1/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_1_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense_1/MatMul"
+          op: "MatMul"
+          input: "dense/Relu:activations:0"
+          input: "dense_1/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/MatMul"
+          }
+        }
+        node_def {
+          name: "dense_1/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_1_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense_1/BiasAdd"
+          op: "BiasAdd"
+          input: "dense_1/MatMul:product:0"
+          input: "dense_1/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/BiasAdd"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    versions {
+      producer: 331
+      min_consumer: 12
+    }
+  }
+  saver_def {
+    filename_tensor_name: "saver_filename:0"
+    save_tensor_name: "StatefulPartitionedCall_1:0"
+    restore_op_name: "StatefulPartitionedCall_2"
+    version: V2
+  }
+  collection_def {
+    key: "saved_model_main_op"
+    value {
+      node_list {
+        value: "NoOp"
+      }
+    }
+  }
+  signature_def {
+    key: "__saved_model_init_op"
+    value {
+      outputs {
+        key: "__saved_model_init_op"
+        value {
+          name: "NoOp"
+          tensor_shape {
+            unknown_rank: true
+          }
+        }
+      }
+    }
+  }
+  signature_def {
+    key: "serving_default"
+    value {
+      inputs {
+        key: "input_1"
+        value {
+          name: "serving_default_input_1:0"
+          dtype: DT_INT32
+          tensor_shape {
+            dim {
+              size: -1
+            }
+            dim {
+              size: 214
+            }
+          }
+        }
+      }
+      outputs {
+        key: "output_1"
+        value {
+          name: "StatefulPartitionedCall:0"
+          dtype: DT_FLOAT
+          tensor_shape {
+            dim {
+              size: -1
+            }
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      method_name: "tensorflow/serving/predict"
+    }
+  }
+  object_graph_def {
+    nodes {
+      children {
+        node_id: 1
+        local_name: "layer_with_weights-0"
+      }
+      children {
+        node_id: 1
+        local_name: "layer-0"
+      }
+      children {
+        node_id: 2
+        local_name: "layer_with_weights-1"
+      }
+      children {
+        node_id: 2
+        local_name: "layer-1"
+      }
+      children {
+        node_id: 3
+        local_name: "optimizer"
+      }
+      children {
+        node_id: 4
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 5
+        local_name: "variables"
+      }
+      children {
+        node_id: 6
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 7
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 8
+        local_name: "signatures"
+      }
+      children {
+        node_id: 47
+        local_name: "__call__"
+      }
+      children {
+        node_id: 48
+        local_name: "_default_save_signature"
+      }
+      children {
+        node_id: 49
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_sequential"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Sequential\", \"name\": \"sequential\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"config\": {\"name\": \"sequential\", \"layers\": [{\"class_name\": \"Dense\", \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}, {\"class_name\": \"Dense\", \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 1, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}], \"build_input_shape\": {\"class_name\": \"__tuple__\", \"items\": [null, 214]}}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 214}}}, \"build_input_shape\": {\"class_name\": \"__tuple__\", \"items\": [null, 214]}, \"is_graph_network\": false, \"keras_version\": \"2.2.4-tf\", \"backend\": \"tensorflow\", \"model_config\": {\"class_name\": \"Sequential\", \"config\": {\"name\": \"sequential\", \"layers\": [{\"class_name\": \"Dense\", \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}, {\"class_name\": \"Dense\", \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 1, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}], \"build_input_shape\": {\"class_name\": \"__tuple__\", \"items\": [null, 214]}}}, \"training_config\": {\"loss\": \"mean_absolute_error\", \"metrics\": [\"mean_squared_error\"], \"weighted_metrics\": null, \"loss_weights\": null, \"sample_weight_mode\": null, \"optimizer_config\": {\"class_name\": \"Adam\", \"config\": {\"name\": \"Adam\", \"learning_rate\": 0.0003000000142492354, \"decay\": 0.0, \"beta_1\": 0.8999999761581421, \"beta_2\": 0.9990000128746033, \"epsilon\": 1e-07, \"amsgrad\": false}}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 9
+        local_name: "kernel"
+      }
+      children {
+        node_id: 10
+        local_name: "bias"
+      }
+      children {
+        node_id: 11
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 12
+        local_name: "variables"
+      }
+      children {
+        node_id: 13
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 14
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 50
+        local_name: "__call__"
+      }
+      children {
+        node_id: 51
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Dense\", \"name\": \"dense\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 214}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [null, 214]}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 15
+        local_name: "kernel"
+      }
+      children {
+        node_id: 16
+        local_name: "bias"
+      }
+      children {
+        node_id: 17
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 18
+        local_name: "variables"
+      }
+      children {
+        node_id: 19
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 20
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 52
+        local_name: "__call__"
+      }
+      children {
+        node_id: 53
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Dense\", \"name\": \"dense_1\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 1, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 100}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [null, 100]}}"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "optimizer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 9
+        local_name: "0"
+      }
+      children {
+        node_id: 10
+        local_name: "1"
+      }
+      children {
+        node_id: 15
+        local_name: "2"
+      }
+      children {
+        node_id: 16
+        local_name: "3"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 9
+        local_name: "0"
+      }
+      children {
+        node_id: 10
+        local_name: "1"
+      }
+      children {
+        node_id: 15
+        local_name: "2"
+      }
+      children {
+        node_id: 16
+        local_name: "3"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 21
+        local_name: "layers"
+      }
+      children {
+        node_id: 22
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 23
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 24
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 4
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 25
+        local_name: "metrics"
+      }
+      children {
+        node_id: 5
+        local_name: "variables"
+      }
+      children {
+        node_id: 6
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 47
+        local_name: "__call__"
+      }
+      children {
+        node_id: 48
+        local_name: "_default_save_signature"
+      }
+      children {
+        node_id: 49
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 49
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 54
+        local_name: "serving_default"
+      }
+      user_object {
+        identifier: "signature_map"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 214
+          }
+          dim {
+            size: 100
+          }
+        }
+        trainable: true
+        name: "dense/kernel"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 100
+          }
+        }
+        trainable: true
+        name: "dense/bias"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 9
+        local_name: "0"
+      }
+      children {
+        node_id: 10
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 9
+        local_name: "0"
+      }
+      children {
+        node_id: 10
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 26
+        local_name: "layers"
+      }
+      children {
+        node_id: 27
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 28
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 29
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 11
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 30
+        local_name: "metrics"
+      }
+      children {
+        node_id: 12
+        local_name: "variables"
+      }
+      children {
+        node_id: 13
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 50
+        local_name: "__call__"
+      }
+      children {
+        node_id: 51
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 51
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 100
+          }
+          dim {
+            size: 1
+          }
+        }
+        trainable: true
+        name: "dense_1/kernel"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 1
+          }
+        }
+        trainable: true
+        name: "dense_1/bias"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 15
+        local_name: "0"
+      }
+      children {
+        node_id: 16
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 15
+        local_name: "0"
+      }
+      children {
+        node_id: 16
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 31
+        local_name: "layers"
+      }
+      children {
+        node_id: 32
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 33
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 34
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 17
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 35
+        local_name: "metrics"
+      }
+      children {
+        node_id: 18
+        local_name: "variables"
+      }
+      children {
+        node_id: 19
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 52
+        local_name: "__call__"
+      }
+      children {
+        node_id: 53
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 53
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 1
+        local_name: "0"
+      }
+      children {
+        node_id: 2
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 36
+        local_name: "0"
+      }
+      children {
+        node_id: 37
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 38
+        local_name: "total"
+      }
+      children {
+        node_id: 39
+        local_name: "count"
+      }
+      children {
+        node_id: 40
+        local_name: "variables"
+      }
+      children {
+        node_id: 41
+        local_name: "keras_api"
+      }
+      user_object {
+        identifier: "_tf_keras_metric"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Mean\", \"name\": \"loss\", \"dtype\": \"float32\", \"config\": {\"name\": \"loss\", \"dtype\": \"float32\"}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 42
+        local_name: "total"
+      }
+      children {
+        node_id: 43
+        local_name: "count"
+      }
+      children {
+        node_id: 44
+        local_name: "_fn_kwargs"
+      }
+      children {
+        node_id: 45
+        local_name: "variables"
+      }
+      children {
+        node_id: 46
+        local_name: "keras_api"
+      }
+      user_object {
+        identifier: "_tf_keras_metric"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"MeanMetricWrapper\", \"name\": \"mean_squared_error\", \"dtype\": \"float32\", \"config\": {\"name\": \"mean_squared_error\", \"dtype\": \"float32\", \"fn\": \"mean_squared_error\"}}"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+        }
+        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
+        aggregation: VARIABLE_AGGREGATION_SUM
+        name: "total"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+        }
+        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
+        aggregation: VARIABLE_AGGREGATION_SUM
+        name: "count"
+      }
+    }
+    nodes {
+      children {
+        node_id: 38
+        local_name: "0"
+      }
+      children {
+        node_id: 39
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 40
+        local_name: "variables"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+        }
+        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
+        aggregation: VARIABLE_AGGREGATION_SUM
+        name: "total"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+        }
+        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
+        aggregation: VARIABLE_AGGREGATION_SUM
+        name: "count"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 42
+        local_name: "0"
+      }
+      children {
+        node_id: 43
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 45
+        local_name: "variables"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_sequential_layer_call_fn_6629"
+        concrete_functions: "__inference_sequential_layer_call_fn_6733"
+        concrete_functions: "__inference_sequential_layer_call_fn_6720"
+        concrete_functions: "__inference_sequential_layer_call_fn_6656"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      bool_value: false
+                    }
+                    values {
+                      none_value {
+                      }
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference__wrapped_model_6528"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  string_value: "args"
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          input_signature {
+            tuple_value {
+              values {
+                tensor_spec_value {
+                  name: "input_1"
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                  dtype: DT_INT32
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6689"
+        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6587"
+        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6707"
+        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6601"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      bool_value: false
+                    }
+                    values {
+                      none_value {
+                      }
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_dense_layer_call_fn_6754"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_dense_layer_call_and_return_conditional_losses_6745"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_dense_1_layer_call_fn_6773"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_dense_1_layer_call_and_return_conditional_losses_6764"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      bare_concrete_function {
+        concrete_function_name: "__inference_signature_wrapper_6671"
+        argument_keywords: "input_1"
+        allowed_positional_arguments: 1
+      }
+    }
+    concrete_functions {
+      key: "__inference__wrapped_model_6528"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "input_1"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          dict_value {
+            fields {
+              key: "output_1"
+              value {
+                tensor_spec_value {
+                  name: "output_1"
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 1
+                    }
+                  }
+                  dtype: DT_FLOAT
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_dense_1_layer_call_and_return_conditional_losses_6764"
+      value {
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 100
+                      }
+                    }
+                    dtype: DT_FLOAT
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_dense_1_layer_call_fn_6773"
+      value {
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 100
+                      }
+                    }
+                    dtype: DT_FLOAT
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_dense_layer_call_and_return_conditional_losses_6745"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_dense_layer_call_fn_6754"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 100
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_and_return_conditional_losses_6587"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "input_1"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: true
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_and_return_conditional_losses_6601"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "input_1"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: false
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_and_return_conditional_losses_6689"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: true
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_and_return_conditional_losses_6707"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: false
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_fn_6629"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "input_1"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: true
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_fn_6656"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "input_1"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: false
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_fn_6720"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: true
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_fn_6733"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: false
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_signature_wrapper_6671"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+              }
+            }
+            values {
+              dict_value {
+                fields {
+                  key: "input_1"
+                  value {
+                    tensor_spec_value {
+                      name: "input_1"
+                      shape {
+                        dim {
+                          size: -1
+                        }
+                        dim {
+                          size: 214
+                        }
+                      }
+                      dtype: DT_INT32
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        output_signature {
+          dict_value {
+            fields {
+              key: "output_1"
+              value {
+                tensor_spec_value {
+                  name: "output_1"
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 1
+                    }
+                  }
+                  dtype: DT_FLOAT
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
diff --git a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001 b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000000000000000000000000000000000000..98807d26ee9f40e99330ae6a5d2988c640a320ec
GIT binary patch
literal 88424
zcmWh!c{mqe7bi;yKWmgiMMaWI3*R|s5Q-F$5-o~KCA5f&60#E#rIPGRmV_*G??@_@
zN?B5tR4VOSNZMZSpYzN-&)m89nRCu(tKOK$y|sxzTm1;s6$-;cXFh^VV=}hORHKaO
zW2m$^j(RsVm;%=@45{3PJEC0BeR}}jJTxBT&a8&=Wj5&lt^gaG6^Y1MOO#*vo;mRK
zGVU4kmf!TQm54oH@%Xo76upp*vtq;X!Vbc{?(jpGm)}7=<P#?vX+bLg>Z0i)iY{kn
zk~N>@Xxp1{n7_>vGj;c%=a^Dl_eTpd#ytfyxi#n`X-GcKyNL1kmJ(MDca*tki>Yf5
z;>)eNsB+dBb3>mH*Z2ZlC>xK}@BYJwcMS1qxf4c3X5iili?F*d6q)KsL7;pj$#OE`
ze!iJNMCt|Td{-KsTzr_Jeq*9`(~^qhUnSw`7YQquS(_4~2$Q{zk(N~+*padi=h@hz
zz5ixrIQTZ%X{Ceam&>uoAsKUy=HSaY+4%j7DveFCL79LkTzW*5TYAKUvk`T`RU-4S
zdP5xc?9IVL!R^F6u#iNwU&e8Ndby2(TJ(^HH~!9V<-RG3p__~$K983p|H&CJb9d`8
zZLwo($4Nbd&q^^Q{?bdZ6ljo&ABLbUri7>G>>;(v7a?zpG_^Nsfk~<7NwCW#Iy6fi
zuZ`UbBQe@oVzGw3azTN6T-*SWru{H>%_GQ)Jql4N9>lOj2EV%n!@PVUENkzC>R0kO
z^_Cij=j1}+ld}+EXNg%FcR)W^9S^z0FfPfrxgP<Q^jlwMt=)++m|%Gr8kG%5=YB1+
zaVQ*=8Eq_9J4)dAT<*!icJdSTu`?m4M*s7FR8+YN{`MUdB={zQ$<78)`KiVE{=Nox
z7Z{>Tr2)#cc;fAar#Yp4*GP(m5mwp^alNh!V9ok|`0e%<cJ6pd=1<)Z*9SYmc1VF#
zzxn|uceFFDU$+v^x`p_y^c3^rq$>8mH9!TUAjaj52MKPqgFY!2Xg+%orq9(y`P4;%
zfF?bf8gZM9xwQ!Y+47XLu;<8>Ug&oYhQ(jk<1R5H=3LPU=AgVfaz1y-M?W_~&;51q
z)LnyLl=cNQQW}`n25F`$Qj__c@{oEgZ({1Eh~d2gag=d1CAG6}aC}}W*;T`GA9viW
z74f%4g;opXPyQgsho-{X$vUh=n<uv45X03QbIF~(XUIkKMWC_$8VonJfdA@FnEAc~
zMtbJrwP$<aOn4X62PA;>>lQAt>oX(=6f>HsQQ+1viRk7eQp;vFW@$n`<0|=x@h%R7
zz*9HC(Q*k$;7nY(*p&DUWl$&m3(THyQQEzWt0k+pgPFb#A0^@hp7}C3!(k!0n{<N{
z$oIfgk%{Eaw^VXSZWT3@%?B&><BX+u5`>3U(Xm~Pu%jf1e!9C6tzQeHi~lYXePTW?
z4q3~T6a!P`?g_gK6;Qplp5FDDg?1eb_txwJlx_ASR^y*RPJ1P>tS=+=unbbd#88K+
zrWtb!!Liqvzb!hBuW7nXJhr(JtH=f5EHa(euy<i_XC&@?I-cl<J`;TIlx52w96+hL
z`|!)tWZdpz#jaDhiW&d*;1@kLoZEN-ef8y0e{2!@oybM^&#w6EzgDc0O~I|1o9g11
zpTy%Lm1tR<h^f}MaLKAoh@<DQ@0dtk;ObD;Z~96+HXMW|(-P4ueHAJ^JIIDP6`{zw
z7Q7zakFmeS>0|TrX#H&_O5SL|ajQg0(EeLE(Q+bQ5MPS)JA;xnTk+Lh4&V8tW4oC@
zn;NZ+O|*fZyLSuDXLsOl(K?KiRjN}vHV22V4sy3Q2Vqcc7|sYWrowyo<Fa+KcxCZq
zTr#U3ty^tyv*<x~z95TzKV6!=^H!fV-BN@*3D2~?b;l0|Li~WqEB@&RFZTNhRn|b-
zieHrXhAMPgvcoC~s9kacUtf;LxQ)Yj?MWc3@T&%M1*dS*n<wbcMd0EKmoewSA5Q4V
zDZHMnh<<`%p5NQb+qFmYr&a9g?wc9n$oD|Daq@Vk!8VAME90>2T{bWGIE$-#vK$*k
z9}%4+G1&M+18Wk5L3?!;^gal|d0YD6_Qj?6^WA1Nketl=9*slW?Lp{MuK+K6d{GxC
z;F)iJsJ$u{Q}<**=SovHENFyrHl5BEkND#+$%*_6>pZwvVTi#C9I$o44k*l&6@0GX
zFpx^3VNDuyQ>y@527{rvFai&4I0&KBXFyQlV6DB-TTq)Ei1RGp!?tEM3|a7ruH6`m
zhZh}!9JwI)*!~M-f9;};HiXGF4adBjfIr7-!IP}{_{gw~2yU)N?cWPfZ%!sksEmHY
zvq@+(Ll{?2`VG(O<KW%)KXm--4Y0*GmL{L@$CDFv@n_p*nCKr0dn)Xxc855>%KtY>
zE|5U0_tNZZFDGWZbvb>#-5K>CN8!E$CxL$zN4?+=oRdt076%KQl_1G1xR?%AJ43Ll
z+k)|&Ylk8_c}!p00z&Q{VffR+tfKHyydW!#>-S`lK8yF<?d{E&I(s>2u9%HqB-asT
z15LcN^(-0Z5svM)ir9N&v|jJXWT?3-mhY$pFN5ch@#8VnKUQQ*TXnEoVi|@x?f}sV
zfQ{n<@keK0t<Vk|G#re8iXshm&Yjh$r~erPJRTWg@CP~{)o^pv7*HRPVM0RxLZV9q
zW*`2F^AF6y2Rr8D>u1L>bnqlB=o|pAEyrODX9b+jFH-Lv3X}bF1+_*tD1FBk$$U{h
zIH(F&Vm@=Gb}?$-NeA<}%c+ZACY)UGhj~7D1C#>oX;Gdd7K>aYYqHl>Ka{Y-Ei2qG
zZEPD{v73j@Cymg_dnsy!H$%GcJL<l89e%zsnPy#Hf!eAc8EF$U)YkGvkp(lTdG#9_
z)|5%(-mXALVHx&%VJ35QegYj6ItTA%^urbBpKx~8F52`+94idn&_zNX&zf2@_a4*<
zwsec*$Df+?ldc{<n|umPa30j0_k)A3LTrg;6e_;3!Z$l~>6qtBiA7T(&Y!b~*=cYU
z2Q$~gotQ{mB6AUA_g+D(>348f8i%5bL(tAH5Z4A9;EG!(FtL9J-nhMuf1U6eZGYF`
zs>@eUOJyg{k2r^CH0H1-?+99s=|n}bv$*DX35GX_)y1aGMZp#&K6KR!tj82AlE}vz
z<vn=g$T=Lmrp^map25R<UAUk;AI|qE5U<XgcuPGL@!Tuo->?bH&z#43r`zz^wRHT-
zIN*tVZ~D`21A4Bt#BdQ?s(i8+MGk4=`Q__SM~}t$M8I9)%hBjZGxj&~VEeoX%f3Iy
zaHSZkrgIW!KhHyN^}YDLxB_P`{)8DT+iO)Wsq#|g`|-!`$Mlzw6Fw3XVVnG|aFK^E
zmWBi~Dp|%<p>+d4W!E+~y333|Hr9)dT<k%W`y96Q+{9I7Pl%**6*9ZeVdCNInA_)u
zy^5K5X{Zj9f0mGg_kyr~q!15^ZRQv4P~_d#n(#r~RNi34LafWXfq_?2;lTPbI==Z7
z8f`-MrtfXoUF?m1PEV0!O4A4Do^cMZ=fUiY3Cz3mqBQYz1j*k&ksjFkn7Q#}ERzv(
zl2{k2(V(G?bb+WN4bjOXotB+2xc4s+$t~p6L!<G?JbPND8P0DG9wfJa>hX)0R^VOV
zhg|1I4Vv(22j}KJM3#0-veVqQ({Gg}Bx$o4tKP=&4pD7Hf5I<f`7i`c?4~nU6Lt_~
ze@!}R;1h{ZT}eI4)Y#$g)7i?SeZ=@y8dLh>42c@^m3H@Rp)>c5p`c_-SGUWs<BvBJ
zeOqyS5yT13yLU40g@^HV&onyc+!PuZ5JM`TYJ*L}1R5wDjv}wM;E}fw4fAjk@Yj~m
z!RO+nL?i|KRGwn}q&yZQGSR1NHT7~9<K<ik-y*XO1J(kQ$tjYP|D?#L3{7f$w~sU7
z^cnlqnrg4An<T2^29X+TMQ_)hWySB0rHa*ow5^)b?@1!OPwFvhuNs2W3Z#gu8P8w4
z6#=qw((L3-KS*TdL%w&g2o^N!Q*9+x>gV7IPfSGVrlB%Mxn71pTYCj`DxLwAktB)Z
z`^kFe`Pe9Whdi70yjFCkE4d)z!vr3zBf?!@P*g+~&y*bi@$0GlDz95m5U$F5@2Dfj
zJCpg+wHzKf_7|oHtI{og_lRw-8l6xU3wG1287KETM(oXFTB`J(Vd_0WspdQw>ukk)
z<ptmn*uylWw2^;*M~pXYbAqr+Z>n%XijOc?gw1giam0Q+oeA1(iqR){v{@8Qe-y*f
z(0lah@FGUDXC5<guOxN<a+b^Sh@e644umzUfMXNHskHGR**tzGxi&8gw!j-k$=4Jw
zt0%&o1qx(!aRQZ?W<=jCNQaFRexr9yJy<Rg@ZTOCz@a5>xXJT0o=_H|RV$Yedg>CW
zt4*RG`}0uWd=LD#>V@>;>C6kkFqzZ6jog#hW6ur}Qt~N*v`v0T4=PD8VPon@*^9gM
zh+PrWWTVZOu4(41L_?^>+|j-3j-Wn2PjN2uD+I0kWI?HM7B~%_V>}Y_!Cy9#Jp22C
zG+VbZN*7y*Q#22y|6L_b4T`{2Y7!5{LRjw=3+CH*l0ytf>|#V|*4S~d@<1c;%**BS
z^~9;BvJ?&&WWl<GGvr9kHBwNwoWvIeG9Th)Aw9K-=)PJ-G*=!VBfhgqLBf4-{!~M>
z9-rf?e$ON(-DYIK(3}~+=Lk`^r~#ihKfv%yJ19=dBzbzLIR9Q1Qr~fhG&H3{knb=P
z)Rb&oDz=Wv7#jpl8-7DwffObM<-^TgR>Wjz5rYZI<aOW`NbyM|hUN`q^}1o`o&BD1
zKduN<hs8-MQxCIlCX@aX8H6?JBd6mfQM&jQ*D74hH8_b<OUpi@d6g%nt=qZb>z|2T
z^Dsy*Ng&%7CXmFfUZie8t6*i&9@0Oym*f?nVisTj2hEEr$eO1@B*69qq#NbHdb^`A
z^Th@(D0vlQp7sDbCM1ESR|U9=C@|(?gzJK9U{`Ac%>n_*6dJ^TuXEAia4;Uy=F#g~
zBxt|1L;F9cahq5winL!tqpBMidRd%37`OrDL$9EmU~~rFl!j|w*ODQhZru6fD84g2
zL{kkOVCvhg#CvK14*U*fyEbm-^)-BOiTN2kqQqhB1}B93VRWIk6XJ?{_;j8WUAQ)g
zDwjM%?M;IymUxk?>QbUh4uqmxM>87O+`|jq?r6qeMwhg|T>sS?v{hEcvyb0m@F*^<
zd2$f#YZ7qLq<a`Tr3$k8c$7^KC9~3saJ5MjK0KgB7M^Or;M=Wua!NOTd^v*BCkin8
zn;g8i?S*+R{oIDz>tMfRBZ};6C1)>>XFqID!OgG$)<zoCCOX{0wWagvC1(NZ3TKj7
zlSK4gVT%`L2jgYWxhNmfO3yodLpJLQPRvV2)$Ys4uGoY(2MNUN^1`HtCvdLB==Em?
zKmO`Rh0}?!*P;ev*B#~${>l(!286>;qiy(ZVHSP1vbDz3LWnJj+C#ry?Il|t1aRtQ
z5g=WBr?xtAFI?6%VOm~}L2Iu9*ioKIcI|jejvtVN&ZUVYBlRJ-x%xbjEM7*J^^}rz
zy`PBq!&UT-FNLde1F-L#7vu)7;uQ@=FunX6aOXb?)QoqMxlId+)a^^Kpw*OY@;?aw
zO4M<3=4td$Gb4w-?k3;*Ez!f?iP%(>kXaFxr1C`uC>2FR|KTT~y+NJ((J2KgL$3tu
z{|%CB=dY4`b&|N$NQ_LrIsra3)Dz7ewNPm$fj;x~h;UB_NnI<A#px|v_}@}uIV^%F
zjVIQ2txe^wbk~7S`3NuvH)8%V7qogZ6HfSa!I$iA#&6aFYVD_q-<~GH)Kw2)$MZfY
z$X!XttxiK%&v%?*+#6=|Co%k-)=B#Jj>Uf^<M5IS2m3XgNp5@;xA%Ar^XC38(pwP?
z0w*Ob^-|z0M90FW@V**niF~ri<}Xp6txn|<`pFYtDNN7HBwHNCaNvFcQ+F?#B+VYf
z%aA4%{d^L$Pkq4Fi?ev25gzxBVvVz+X=q`23Y#zV@&Cre<EIB(aA!k2x<p>arRPJi
z{m5LlcX}$mtt-ZLG70ST?uYm}TZ5iAxrPVQw$lqu`=}tl7iY#6Vt7V2`klRpPOc1!
zO!$cw8>+FZuabA@x=8>2cOH}emE!J-Gn6E@k<`Y6XuCEPJ1QUJq4r9=_i_rol$_14
z-_eii?qzf&-~t+7UdtZ-auWCLF2dY#r_lc3UoN&X9OKVShupjZJOedYV11Tf{ptY5
z#E-7$MGKxjaTt#udVznJ2m{~c#A`N<huvK}X?e~oT;@O7B=o&5f4nD)t#B;m)n^|e
z@xv$ixJ&mz=wvtUFSMm?$1mc?Pw(-1WgSjy%j6T^Xz|lTzN6m48|X273uVrg;R^FF
zC}3)6_Cksi=k(#`Plnj@=Qx%0y}_I6Y~~kVID|n9hUw?%7+^0;vCB7J#UBUe^F4dT
zsOs6XsCXof1a18(kT`mtSt<09l<HMMT9F^|krcwkqdZT6N(M-e6~MX1y`UMW1nFy>
zNW+>sus51YCW%|a%B#BM)7bZfk-Nc-*XjWOQ{H4{zX)v{sU=OD8$tYQCam{723je%
zN!Afz>^K_+t_2yOa8!dFNvnpchp!pUmH!}pTY;dfqn#+VT;^PNGR(B;LUiZq3`iOX
zBR7qOQTXj|(rbDNhBH5tn7qxPv|xY;Eq+wp=XMNMi%%rYDlOm~69ev3^x+7~;G6hF
z(!W)RxE{%_ePKkwu9M{=Ufw3dIoXWdwAYN5<S(wv#u0=Z#?YuU%|y3a8h_vU1agI|
zfGL{;6Q`a9pJO9%?s5dE?(+m8m8G>|3GSrP;WJa$16=R$FF~zpF<Dr1mZZ4`5JB`K
z;x#du^fo@PZPpwQeJVpBz5Njc@rJNo=L~0_;s|S=s*}@ku0&{Z1BA-9fI(sl>=Bow
z4ZF^eE(-}d>(C2Qc1w*;+f+_^6(@p5auKPV^q7b*B+Nc-SMq(e0{O8<23WrstTI(6
z!OSs%8TXypYSmI}p0tpL@!>Qg=`&ZU^O6|bDPZTFR?ai^B3AmJ;g*J#Q;oqqAYtC{
zt3F#`rS_J|{=5iW#pLNg#sq2{@R-qjY>9EPb#QfH1<aaoPEgbQw00osH#uN*mwW4z
z4O0&r(()`Fntd#cMD>p4%h&xHoi#FGed0GVL+?IWQ#Ovy=M8AmmtbyszAecwUQcFb
z1;e#9*09}VZ|%o~ohada6{79jx$Aqh$g_9fx!vDaaV7s2<Lu2b+#cU;xF~1{B0p=;
zkM;Ah$uNT~+~`7gPd-S08);(l#>d#A5sWWQ_JD%0D8?`HBcY!+5c|#2G|*uNPG9Q@
zKZj<J*wr)Wn;&Cn=FUQRSofLiuVU#Bb{*{PPABiSbrRoOS+v|ng*_`9hc9iHG1E%)
z+5T0QuvW?%7k+%nTwMx8==D@Vd-VkBkSxict=WnHmdWCg#}hDAT9dDze~~K~Cx8Hp
zdtk1oFL)v)4<Dtrfx^4<0?#f5=5=oYOuRG%CQ{L`y!jH)E&Y)2pqm=0egS1Hpp#{C
zVcox6T2dZ}5jqMO_a>Hk9kmT*=HD{r<4#f~MLjetTU7hEdof<Id_ui+XJ9~pJh%lq
zQK4y9xlOsZ84o8TI2pYaKmR((wNx1J*UGDCV$vTNu9rjS_rb93`~u>H92AD-K~!*X
z?ZbEv-e8?Jd~Xw?_V*CY5{KdGE)K=*DYL?A56*Yc!CjjE=$1MIO=mgb>hX_hOw?nT
zwnK#0Z8-_8U3KX9oFO~gu8|+T<M}^sd+7q5=UDjp0s)02VjZMKI=1HtZkL+i3ac9+
zJ}AZ?a^^^J(N5UVcNlD!??khi9H(dMgX<E~_+4&1Xvw=kvVJ*P8B<NU!}C}&M;bj|
zFUILjMl2Iq2o5t6sMwo+CTGQYB9y6GJFzGcgnjMV_bP65SE(KeEo~tQQ<k$SDgLzT
zvMFs{=Sx>9E787sI}A2DO}YX~$k=<E1p^(kY5vVrt^>SyQ`k$@A1tE@FP_kbN2XN3
z&Y&C04M?cjKAJP-3z0fMg%)foC2B)1^l9@<YF8l6DC}ECCI0p^DWi47dXE=->*Y^k
z=jlg0XU)ZL;)>94-G?rA4j>*+dPw(6Z=(3Yn3a3B0{)6mhGE``#)L&N3vUOb*^h1X
zPK_5GJI$XfP~OhIcuW`{`!@3Mhz;#*)1~jHkFPuT$A_NYRzrGOd3xJ_8Eu}Mc=bt&
z7Aj;UQ{`K>I61?SuHP5ScwbB5^-b=Aajsfj_lN?2elfw+=`M7~$4TtA`zMIlJ8{<P
zN-(wDA5SW#Od}DsDLAaDM0XtLiAzE@nSaHYW8~L!Tc7LGOPU%m&+ZwqDir26pVtMU
ztrbM)P$(50^k%w4tZ3@c6;hXSoJmmEr{?WT>3r8j?9kuMIsQx|%S`f^jW-*}C7X3%
z4(lMSm5@?)0_hIt$Xk+0vK*H|m%J%(llx%6*@&2|b*)|1JV=5j)svW!AWpMhjx#hD
zB{rV_;M~H|TABQj99i)Q;#ZAhnj1}ts_t@9Jk&(&TyK)1<xhxn$!X|neZl#^R3pzo
zl#Wz65O<qlBC2FVj3ZOvltM7s-`Wk$UM+BQ$rlojqO@K!6M`ak3gWMvBX&9>cscbf
zsdF)4bZ4C+L0{yU8wbM4@RwXp+N>RdswKJj$%e!x<s$f$4lzMzJ>dI7WvD7^hD#-{
zV5l+)l1A6rdr}?rRqLU}<pr7U83qmO|3cK%n`FIhA{hL8!}O*|q3gvOflyvtEi<!#
z)7brnXxg6@tZ(H>#?xrhx*{H2uVgSz=IzY=iaPk+G@Ueo3;5itgodz4PP$r@lxK*e
zNf%%H?q4GqJXyd{=^G#va;WBj-*pm}7((QX$1y^#e-}xlghSZ`2VyXA86p&Yxghxn
zf#I_^aOwF4u5Z&gdU;$4@s?~O9?LSBj_krwd_RS&v+M<fjawO^vS46Gl1ZtSJU27;
zBJB4chYGb{NZ!*1W@g<svVJ6=q^g&Z*yrJ-(Z7{jR&E8W2TyQA-;%&HZzVb8r%6CX
zh6;_fskKoPMIXbzOz>l2;!~l^v{uXolQj<@M%Eb`p4D^X7YdIKDesBT(S;yxZb(F5
zjgS{xTVQpg7sNeGBh4ZC47H3WNy>R7!uk%=8u$+6R!h*+&5L3B@G0_v?S^-sUJ`$5
z2old8GQ*}tjOyo9sG7c+417~#Qj5w+qmd(#3mRhjl|I0lNhw5hYdp93VkHO#wJ#Jp
zDbLw#aRE=k02v$_?YnU?@Xxi9e66g6&+EEKhj}45x8DNQ&Z)qJCxH9mJjhD^L*#l}
zK*+qm)<>m`=~&(mbzUOa@cjfQbYN#q-IjVLaWISw=>LIbF;|(SCFUgOYXtbzuY`dg
zvf#P<2eI37i4$6)!?>2sg4ab=AonGQ`F?wJKA0p+dY|M89zH)q>UNLgTD>2Uyw(IJ
zEQN>W@29vCSt+cyO@q3nAx!DEBGTX}gUw>~#Kd8xAZyqU?2Vtn^vYh6VN^`2gp<ia
zfeiX=-A{rK=0G}iX3(M+Cgs+0LglH|a=Aa5y7PCK#W9JDsz?enA6w5j3`7zkmND*#
z9wz0rDMU5?6%02A5R*P>flyru*uO7_OEU(D(mNv%l7C7JqF#V~t2j2!`pP-iye9=8
z3ZQg_Ke4%A4=?R6lF}9bNaMGDkQ40X@GOw1!Dcw^pUouCyh!TSsMf|O>A=1#^~50i
z7<YQ-MZ%r-fjng%sCN$0OIiuT&m_=k`!G!R@FZsW!qg+|Am}|VfZ<2dC{g(chA&%y
z&jd5Fzby>p4)`#Ah85hA=<B0rd!ourq6X|<+H*>!y<C#gcTQDG6M7#_=Sn;F6O-2G
zweca!aKQ8y1a%A>-#=IkCKj`6C#l{h=$i%QJ0gh*+H#ZDj76anf1%^|My_;kKeKgu
zBI)ay0J_(<kocvxWO3zNrpmF9X;xXu_&b@Bua=TDSoI+3^7%*%TIO+96~eSUIt&&M
zjDvvgQW&WcMgx}#j7RQw=vtP-<y;b_i({*yd1yDOyDuP`(@)nPDoljoi6un((;H9<
zzYIxQKR|2bG6Zb+0710}1=mbuXuS9<P<UWRhQia~ut+7T%i6}6Z23k4GRldlk0$x8
zFNr+`(kN%9&rOOx1VJ<QLeRwVwW24kgTLTC6fNb+yidi%eD@YGS$q=wqs~Iz{vWkL
zOH*K3=xi9$>mVNE8kyGUuMlM-N-}QNf#w-g5IUAn+t@V>ZY7Z<=%TM6$jGhMHE{++
z6n`Wp<MX)QKkb5H9d~XfBtX#pq}s8~L&Q#M8A+P_l~5-NP0i2A>hmTLz&(Typ`-e>
zyaa5w6Q-r@v7B>OEum*7lba`Bp_OVi()14~cPt;}sz1=fiAS*R-+r`c?L$e;LOgU@
zm;ajYjZ*pHxM9<4bZ^>;m-#DbG}1}8El9;v|23ohn_=Qp?~E1+Z^_d?pU@$3klgL~
zMlWI?UfS>({TF$o-A-pL8udqvM4!iX@kRJy`a@h`yo?`CAH=}&ViYyqS(_7ckEn_~
z#1~)PFt9NUtE^6<Xtpn{`5DQtwdlovNf+?b@=JI@`#UWUeSzoR>>kC773kgjnZC8C
z!@hZO<oqZ{0wS&G+=2Y%B|lJ6<^<aPy@|3u5x7X?ES5YAz(@BaScj7nc~>Dn?96Pz
zJ(pi#^@?G%zVn$Zd>={524%qSPzi0kbC3VE--0ijdWX=^6x6CLz|HTsk7`(_XtE%S
z*9mS!q2?02JXnTdRgKs^_Yn?62hiByM4V1v;#I##7-2CMDn6W`D|QL-QHQfoQ=^)i
z{JX&j9HKEZy#lY)?cptrqhN(-2Od9`gx$NOF)X`)DLir=zE;Q6#nMiw`)oIkwkv#>
zln8cR7ja6j9o`!whVR#`!=BJ0@ChD<isNB;^oSu`(wz$awlnbd0V6yal82=12JGu|
zV?8E2a{Fh7B6pR=*_CqWF#a33oSA|}DTgs7R|S8LGhsZ`-;%EE0Z^Oq08%&gfsm<I
zO_^5%e4Bg#75&!0SQR6@5Iz=5oW~I3a>{HdpG1u>n!<%PQB0X|7^CMd#a*!)ka$xM
z&n0)kr0#q&C+!sIXfMM_!%4I%ZxfEwtp;w(Yz)W?#{iS3pw`0DeTPor{7qt1WkC<g
ztr~qt*){BTl_mb=am31hE}A?@WYYVt3C<elalYkSP%M8A4Y*&<JQ-5M$^Prm;CdK2
z{wRm}7jdul<)Y14#&*Kb*?+)FkPd!#ci|G_0npjt0Wn7L*lU<T-47SimX!0PbK59K
zR4Bs3tG@!rcRgTqx|~b?;)v^oL~-BwLl`-Hwbo+uWn9>;fLdGs!}|_lRQy*p&J0P#
zWk-roF47yT6{<K#`RVxI*mIcqNgYdbZ1KiT6&%!>L645k`tKrDkTu>!9si8Q?Ay|`
z=U67%9yvg_#|mSc?*BE}WoYVu37>IM=&h)Q4r0Q%Kkz*EOrDMt{N1qQ`87QKHVx04
zorQ(5V^Du@Htw^YkJ1t=u{7=y&if%lH@3(zEx)efy-_T8F^j>R&wtTq_HHaSDM6Fv
zRY>*1sE(uDDE>=^4NX(=-3v9WON}OXE8I|Koh^Ev6UN^*voXIn7%j%%=a#>|h=Lr3
z7r!~eS&r)RY{vxRb}1UoM20{?qXyNyuS2w>0=^y+@Ztwg@GZ(|0!Kq0mxoQl_D(tU
z^I)+x^9tyO55SVmt1u^xALUm~@e+Dr$?;G4cv=W*?dbvYf5(}+-QohzsuXH)Yc)Od
zR1-t**kFeK0GYEx45ig=u&-SK*B{M;stbNN^2v=a^|S(<zZ`chvO=xSLpY~97{A;1
zLsWnhBY4mTrhbQE1#JZDV}Wol<SaaHpM&Ss|A5j<dwOx`7}gey!(p9BXv^4$X0l@J
zURMu%Y;}toHE*OFH>i!?2YeW~A7}KBz@P2wM*RVZ&EJkg<VQEuKPRAD)&}FPUKN~q
zDG&x&QBGOY8-E!o<Cz{0Sa*IQ*1Htak^VLGLs=k_r-9(MV;sgXLsaCD4mL>W;)4Z#
zsOT!nzj=8Qqh@=M13wZ_z&(XyCPC=k5R4x-xxxCRJvh_o4&2#40YlU~Aw5=_m0uEr
zDG&@(mOJC$yE*t;dn%id-$SnSm!qy%D6XCQ2P*e$q;@?#*l$0Ay&FckdRG?Lw5-O-
zEiyRO@I0C4e2NjQoC|AxpMlJ)aM+;bgEudh^3s~2VAsqt!bBMBKl~-C``&`On>znl
zaV!p&yyT7;)xh4Hv2bzTGH6<;1X7ul?7m}!ySMFQqIJs=pP6GAFNT6ut7y<$HGDKx
z0S|2Auz5~7KJREIX|^_q>{)zjE{1slGB`6o8u!oi<j<xZz)z2Cu&MDs+Tm@F+X||2
z%6Kyj`SFtIe>h9N-aC$Oe2eh+N@-mGXD?P;C(yl_#yDkEGv<jmER8*ZL*zCx4)>Ao
zK7hSF>%rIVJPvi|;Owb-SSNM}GnI0Q{h4f{FSHRq%&#MRcizQ#_lcA|4MF7>c^Iy(
zf>p}0<gc$PMy&q@t=u%!`xSvEzvoiBmPx2Npn?YD{LnNc80To*MHMM=+HpS*U)@;E
z*N0u_R`&jbWerBuTQ?ne>(eAspTWzwA7GN`Lb~5;9)D-dZk#N;2Lm`u%vkM@rr?fe
zLr#&;TIn=WArbH0o`UaxZ^t@=si-yTWBK3_Lbd7|aoH?HCogw8(z~5zZ!)HD6#wJx
zr)lFqxkYej??rGuDM6Em)$mqW5Z`Rr3aOWZP)h1H{gq>i<^fYN@0U6jtD4~xjzRh6
zGq_{oLVV7pqm9-eF`2&}O}`6aXI=#s(ry^ByMnLPk+)5iK=BL3_`TPMQ<<fP+wB-W
zG4&5L-Vspe4|{0n<a2l^Z3w($u3*75UsN>D07<SDwrXs_mVv3T^rk=6&Rl>uM|~ug
zqS7erTtsd}W#ORXWK3VY73Uv4jN^P&a9Xwnuj=cJtIYLjgmft~nj7(pdKgOFn}VmK
zLh(iVSpM~pH!i$>fE=8mgwwSy;+i@|cG|f@^zZY>D-+IQ@beg4;3<VtH#snT_nfr%
zjN|>M=MWRgJK&xv!<YRvM%6-9F6(GKm!_l4-rrt=!K^g@)ao4nT7MRso0N<APZy2Y
zWE{6P7L|QVVcFr&FzI$DR7T0*)2UZbx@-n&RL8QH{MMrW&nBqUNWppgD`Cz!G0c&Z
zA<DC&asBX1YI3HHIC#|)kJ-~PYHb$pxxbG48Zs6C-HgH_r7t*(zksH8!?+peVxD;c
z4r{gJln*5+BsGS0*Sv=LP2~uuEAex81y+22fqpqZai{zmO!K~sfw%u-vmL(Tfp`D0
z*7t8<>xyfL%TIw*;TNhfrytMP{>Gyp@8BU_3HI${7Ca0~a7#=vUS5%d=}RZ`Bil=H
zMY9qs__c<rW<F-rE8gSxBY$wC^G%#<AkKaXIYHj+)uWnu6a{n2h}DMy{8ivVizd9n
z8nrg8jJbsI?`!G4=~*b(=E|r{t-{`CU3lWrd(x)x9p&X3F(yHb4IOxmg5cM<#7LDo
zKW)RDoh|&iVMiwPeLqSbSwP;EyyIriDM7Z&0Z%Q}rPikXD3JexZ|o=GZy`szdE;1i
z?(t&ubZkQ#$3W_QvkbpZ9K$~S(u;wsCD~1d-*F2*$5#Sx+_6rIwOO2pGI5U~#P=Ys
zeNhBIBj)4a;<4<r!Dx^>-NpFLo{hHkC-8%H6#f3?F-HSM*jJ;zvFuGAw2XD5^@fS$
zib(=rk!(O;znV#fUii_+H&f}_k-L2M%1!iqY$XxdzlV-I-9TT?cBWFj)-1~S(|dD-
zNmgbSJ+Wa0v6^X%Z5C(AQ(s|v6h4h|qiR(8xFY#%GlMQ0K26(}AEL9A6lwM1Ei`p!
z8@!HDVrvqnk>K?@^mp7koFu*hgcXBnZICi0Y9e(0?`veWg(1H<-kpux7DFt_1ys5}
zg^mb~N0&+VlrH^20{7d{qb-W8yj2A$cQ1v6j0(bQ`B9Orq5P18J-L-(Mt^U0r(@sF
zqjqBPG;pmhIjC6*;xBh_!yZd;N%43(WBf|y`6hqrc55SlJz@{h$k~Z<b7!%O3pMxy
zzNRdE%|V_^p>a0J#PekuakM>5XE;q^{W85tTflDWyeE^C-ZP`BC(Nm!DwHi(jHOCr
zh3LVRdB8U`vLsrQ@9I;nlU;e3Mu?=+U3w)%q+FN(^;L%62Q$_*XB=%2nn1l`H{h+)
znq)?^G##9-N0RsaBNsPY5XH$ZMDln$cX(lA?Yxj>_~y=Jfo)9`^fuQ+{fF(4<2?uF
zc?(eQNk8d~i6U%!D38y(n4nL+e0SFuNSr#8PTg{xtr<!%PCw~SejL$Y{<>YLjV`-D
z%LFOlmL-96LlxMLQNIw?SO*EO8i<k67rdm<$UV!r3O1X&;bn4@;8JHNk=6cziD1rG
zrF6pqD8Ma3ek7`CDWCR!Daed!_(z}rN4w_{_VesFL{#n-ym_#Pm>hQ`Gv(&v#)@1r
zFS-x5EB0`IBmbkbZ{{)<ZGZ8Uurwd<@d+3D&BBGfji6ej%tps8VtZEzvry?qCEm?o
zx81a6eNP$G8MdFIq5EXnN#D+p4}XkcS=ThU^8EnYqWOS7mC#7=(&pNTDU1Ls6Cl*I
zQy}Pb<*St|$b<9{Vm4C5-amMgS33HIKk;}!eK1;&KQB?M>u7TZqaU%fD(5YnOPIqp
zx~Y-QO@+)7?I#$%U<)1Tyhy*U6{6Mj82`&kg<kc(KxUZbP;L2NWUKaRwEdYw=9fv*
zN3J1ss>CZ2Sn!D~oh-y#=0?#EQ#aFYeOY>O_yTofV{uS2nCfgg4^5f!?5a3rnm(nL
z8uoh9!htX3<jfIrT>TR{n6;5=@?vcE`a>*xu#JfHThg!lig9uE0Cd`3psERn=!vbz
z=xEcUFJ{XUyPc7|qE{7B$^C<?;zDV1l0KR??4pci<S6#KNG;P>@&1o4(*3)&N#*)f
zswf{r+n<{X_SM?cnJSZMa7ql-Ta-t&8`H>x@$&468_M*8uL}F|tO9$*ynse8>w(|P
zhq=MdOt5m5VUOHVV)t(qW&`Gapu_G_v`dA@K<SzE<#|i`cZ@fUdgIPION6pR+6B~n
zP9}9pRig`PBj~|Bm#K<%5U#v%jhq?NPExg>V}RGF!~1tN-OEix`>)nCCO4k{TP8wg
zt<1*7<D99E(G1Y4x=3#69-#LU7O^$SrBrsB67ME+huT>Nv)8AN;qzun^G5?^`DxKl
zsP35AG`#l*{pNm$Zu<D0c7`_7s3wl?t~$m?Cy(VLqZRl~bv?Y3bt}F1a3ialF@_Jw
zw;(^VB3Vt1V){;fEN_|6MpY83siAT%`=6XNf2_Qk_J>_%e;aZ9HydSMes?1kI`<q?
zQ^sPze+qm#9ihrnGW@h}qWqj}*}CG%;e0%PrANd%Q1$2tZHZjV?yywi|Hju)GY?sQ
zm*zw2J|>@X6($h5SeCbNFQ?~%yx920&$L40Bfa`=BHzAsBLCf!rw$L5SfzU@ti-Ch
z?7kdX_Unc&I(t<Wj$A(vT0-Ug%4c@$z<F8L<4Y`m|Cj__bt#ws)btKRk4x~H-zM@Y
zullK%%Ld%L%a6?uuc19d-{^w9WBByd^)$VwkX9KbVHOJUnUk*4<xzFC;6*AM<tAG<
zr$vQtI{u7qyF8Uoew9ahtK#^<#kXnSu2}xVr^|4mrk=`>D7?9{m9CLh;-`kcq<60C
z@)2>l)UR2bKeOUC6|R%!ul^@q_vZRF`rhCcz1tzp$3DM9tMpoENz+X_*Xjf9zTZie
zP!ew#exwg_68P@Dg|uvEH{9NHhs|?pq<1nK>GVba@f#DR_<77eV(V8yukIP5>x`$c
zVwX(VwH?3c?A6k|c}*_%-ns?q4sEo?<}021?lv8(FUOys&GBWn^J!-505uE{!LO~4
z=u7KJ*1S@ce;G}w*1RTKs+&&#=2uYX+y=NldKS7rzf!j1Cz>dA)B8>z=;P&ORNioO
zkH%8^V*7oxAscz&TM>BA&J5FvCHVb!zrg%6;kbWwEAgEEk8QuTinVTNq?^2dQRj?C
zzN$NjRUA+0uC#W#xIvM3`cX|ulo<7W_?ODmKcJUGm3d~3C_m3pj2C-5ssdfQM(u({
z`87i7{2#fEe3_4Oo$J)AyiRO4{pB{cPIg5k^eXnDShphozHK)zUMP<1o@LSfGwZnF
z1!oy<iVoQyya<|X<iY0xPn`0-U|HBnSlAW{!NOO_`ZEcju%{W!C-y-f$>2g)pMoaY
zDhN_cCJ|>w^$)d$;CbXNI1T8L)i2sPW}y)&t?LKP{dIz6Vnxj9SFcD;^A?hxs>?Lk
zZ-S=kUodj97`CiYBod$efcY{^oKIU3h4(xJ6hy(m8c$;PUWm~&m1TN{I3l|35yVt}
z0K2I&+^5*9M7714>;EMWk0uvFmQw|p_@V|h_cYZu{D}kAYJI5Fk{~(bYDV>%QH{7S
zTTnWwij=vmAbl?$b6N_8Wc(O4((qwEp^j@vZ=3?7S(VIm>1cqfWB^x?RRX<cmYlxV
zI1JcOMf$(jk^Y10!N6xdxpSxngd#_K$Vy3CH-Q1USu>dO;VVQzU5IuJk3Q@0i22kw
z2F=~R!hlXLC!R?mNbM-o@L?y3_f;p4MlQnQ#iBUr0>`b)ItQ-H?3pBk^PJX$Gw|Wf
zDRMpX4IK6Ag`dB^!r!V?uH?o9-c<0M%zNm^g-qEEn(e!okIf99+4hIL>p4Mm_ui{<
zx#$J%k*cG!&{Y2Vr}M;JTL@Gwo3O`4ju^ib;Xh8R25FyE_TdQuUViOG!V~22KP7EE
zv?qY1H4otSW)XZpX*^C}eStqTu84X5TZk-NW=wiIgmL1rT=*sX-8jwRDzRU23vM)7
z*G#I^rJAO`JhvhX)?C`gh_{IukH0PheO^6alN3kt8}AacF>(}I6Cr7l3%U7n3b82;
zV2rGP!=TALV(9gT*;+bG5>21NM<rzxxP%cV$^iQ3i_)JTw^2t^QB=E-f^m~ISnqcZ
zXfgR)?c$|#n9s0;-H|QIublgav3n|wH>0Y-{i`&zC2NwEuHsN@Z-r)&!{kcRN5+1|
zOb8mAK=K9?NQhn<{1eEb`MP$d`erge@Ysxv-1e4!ikiVDe)-C5Sty4|S;tA<dTk2f
zlklnA3gXiAkz`G{O=i7ZLo*MVFh8D7LCH5k%+Z63_<Lm>7|Gc&78T`;@HQcc9ree~
zNE*$3Ih|&fE{T9Os~>@1rzd>veMN6X1fas?b1?iI`I$kxadU;+s9*msH01@b>s&|Z
zHK#Q2OLV~dlU%UnRXVpJW+i90JOTt7gPeog3)HLM&ia%H89%Gr3B{MwYdo99u&b$p
zQ=Q3RM#XoC5g8A5U(+#eQ#$4JdE=?;W#IOxXX@UW6};&m1#Gf$#c|wTP~3WiO1L`U
zfc{|yuSUQEsad0#F^eAEJqFLce*?dZKXJ_DlfbQh24*uevDe`ey)n~)_4(*S9&6f-
z;>=UfcVCA(A6DekmEY1`lG^wodlJ7ndC@3l5}@o{c@*1N10J6LYMVVa;D`8Ir2B^m
z%Pl_*BdZ1kAA{w<y+EEUiFsP9dN~l6CLV+E5x|`5Cupi$KJK&n%G`Uu9A{ar196!v
z^p(^*&>5%3?*1c-L;jOkpK}4+x7;-_E?S25%$S6B`(!|L*AQrLh(QgprR1_&7u^5y
znt6BoI3Yq!blWc-V6^lQN;E+ye+Xo)$8+OfB(o0FzjCvuk3h7~RJLnxJ;=YG#WtUO
z!TFAPMMM%WVa01H64uua8<{<VlT00FGT{+wTYiJDSEOJ}O&_B&{v%mE9LBiy{o#H!
zkJiYz8lc_A_$}|XK<H);8B+8Ubf#9&&xJ1R_`xcebkGLwpK5`qPYiq4t_Z#=h!Byf
z8klD{gC8@&m9g0{8(Qv)0`vS4SU)?7{h|Z#ukwN*GD{2<v5oQVSp`vgS4l_5XJ*me
z$8@!?IB3V2!z#x_`Zn=46+BU7-OTf0L)8fB-`CDx*PBjfkDJYwW~MUU!*L+6T7(mi
zbPJrX%JV_#5g2Wh4^LOhppl;`v&|urjCuJO(i3v1%$7^AxxPSf?qd_b`|u>TKX8D$
z+m-U>vlYp^zxMp(3q2&*{Ups+5n;OJ4x#z#A7EoSljQf`fmF9S@L2m8Q@Cy-PMq}t
z&5m}1s=F-cf1HTL>SHj*dNfOM;~FR~r0~2+8l`8?M@U|S+btYX?&v&Xk<$#;JtYW6
z3$XF~ds_7(zs70z4IFY@gImUzLA0+fN@RBcmJ6dya)luKL=$&8x*bv;h4Io$Z^EPR
z|3Gc-3+DCj8m>U6i8!y@M6Pe1iH?D7;BKdg3AG%Z7F&Sx2MCk8mvT1SKZ1z)e6+e}
zf=_*e$h#aBRMZ40nI}QEmG~0N2LrWT&GT?`+F`1<`a1lbm%!{@afYad9tX9DNnGMI
z4f;U;In;bKz_d-8XxJ75GLD^e>$XU!a*xDQ&sX6a3nirSCA4Smc-ofagVj=owC<y$
zK&-$BADpfRvDrD)XWDUCY08ZH1WUQLZ41cMPqL`|Yz=UO>(QeqkmLy+B-aXN(P-gX
zV%FhD=WDjo!=oCy-*z*)QsFdncC0im$)3m^k1C>yMZ)Nm9l|*Ow+MHD7@c$UIgyF3
zC+pT7;YW6skYvda_~b2xniK8{mI^K8buK-CmYyLv(lQ49>IQ2=|N92FZcL-Xq6M7M
zEk`iD6pyWztKe4eDN>XpLcU#l#SY~?!qSm*L{ZKc4%p2lb`wttHcQmvj-|Iq-WW5e
zlQ*T6M{4nsc`DhIH^Th)Rs_9T;)%Vt71{mb2k?=xBt3IJ*h-0``n+mp^I@UUG)V{{
zXF4HVPlr$Ys)A+y&EPEG3|pVd@Yh54!L6s3kn*;gd|9!VyOJ--XGw1W4WU-@bnD~V
zei3)Lqjmy<G=rewyCppwl#XBCA1CR@mlC6UOW?f!FS;+(f{ZZ}&`0a;aUwh3V&05N
zILGfa=KoY?#)SNZhcH1<zxx(ZUz<hK_Y`8bni|fr&0vQ9c7l^t9<yiOE>dm$nx-8M
zhAU-pTwdG;epZABZREbt8FMD^52n6{)7#46S)~o=9k^So^hXG{JT|BAPCSDiy}9IM
z;|-J$guv~@*Mk3|E1<RBm+EyYqqoI&)Yo9K@O==RRpg-jr49!C`wR=M$KfOS&B)e<
zFlWN&ur<rt;E$pNuKZJuS0wGR0Y@`CGV}4)KBZAkcMD^1p#{#`5vU)$1xIbx;lg`4
ztk@HC{FoV!mM@OcGh;)!cMoFW{+wv^Eb<d*rR9_IC{-+)y@v6>`x&~rmtw#T6RNye
zhz(WEfGM9Q;O`@PXfGAXlpmW2iw7^jGqG}5G(8$)ytFawmIArUb~B0&<8XbnBmYh7
zAntWN1MU9>z>OoT;d5X)_>V22dHs6)e~waog6;_Po*##%#!|RNRi55&euQUse#MR1
z<EYwURkUAF1=`}%*vMDbbYs9ETo_q|FMW2SW9UZOb3Cl}N31Bb!!{U&N4)W0&O>HF
zDud2bf7Aam0N#d9#%Y2We5mq>obDEb-xhgbyL*^fmYcyVn5l3L-@lQmhj+oiV>y~P
zj6_Uj7TP~i<K8doVdQVkhS%>CjbAJu%>d=f;dZ);OR&g>-c36|{!$13XxUa&Jf@F#
z_T7W=aW`N~+6oNl9>smrC*X`jKS5Du0)$KTgNl_k``lEHcuDHl{qRpAf-PgQcl7@?
z@IDGt3ff`cf*>k1PXrvZZAbG>*$h4;qx@|g{48n**V`viXx){XVpTOX5fZ`tc~00l
zql>vNs>*+twWA+58^QfuZgkczNt`?ClXt1|!-4c)Fmr_?{ytz#|J`~I^FEvgclRlf
zVs3|z&;7?6oIZ-{OSdsM-CJSXIBRUSDui^2ZkS>^gME9+mhLulrv1!vym_P(_IRG<
zzu33Xpf?f3MZE!v7b^4bEOcr3pa(x9t%jw$_ri$vEqI%p1W`+rdCQ&;%nFT^+Ilqu
z?$wINpu6Wf+^8~v^x843rNO8l&PtZ#Y^cFsc5y)V#DT$we|(*Y7s#!zM`^Fm+*XUD
z)MT0frB9!u!-7@tCVv9%_fCP+|Ba^#d2hK1hEwRi8P`WM@M3gPc^UEjG?DD6wWJMO
zC5il1F`8i?N754xk^v!m`pQur4z*6D58~c(O@7A&ip$NYhCq+Ho{FV`t8%$$Ia6FU
znwc#wkO!|XmZbk`3So;vnA}ed@Y_>`bf_y*{RO|_twV8bxN8P!ud$|L0so`uOyjBg
z+Aw@Fha@D4QmKR@MRE4CmV^eB(!Xdfjha*{X+lC0DhVlbMJhz(>}PFh5~Wm1lO%;w
zDGHT(_xsh)2j}c_thMgvy05if-1`;4SLYD!$vh*xE#{g#B-Ym+m~F?y<pyktb2jdr
za+Rz%euRU)2a%IE1~c80!}*08BRSlkg$@r7pw-Q%C6AxZXPFO{u#u+L`~n+qY|Utr
z{k>+)mL|G@@nI?3{%;~l88cmW;_K{^2|pf_@A+bno|%)}KaG5)#uu#fIz$#9yG+>r
z;V>~_JnL~-iO!d*$RNIg`<m>`s_wMo;!C}Vjej)e{!?bpdp0nu_8g2_oXGw3Rc2on
zt%XvX@hm$NnDvT{Z1(=kU=mluE|>+P<f=9EKc&ika&oNMVKO*sbm7JF*Qh=724Jfa
zi!SnpJJ00FXU)s}*VLonb4rsj?LuS?YcU~PLRzYyaISXu*|)dn_`KoD{If!L7@Vk2
zUZi}4J40tOkIlMduf{#p9JU_yKg4tK$3;GOu`V(Bk;H2Z&LzLE41=l<S|me@liB-p
zaGSk0W7+qUSa(c^++HHj8dJ9txz<}H^PVSy<EjJ_IdwU1dR&b5BMpi7^bgS2G!u;G
z?+|XNFJ!&5it!ugNcK*df$NL<!t!7Za^E%zzw2m`h=);pKlR0EWqTE}9;=ecXNGV~
zN4YY+R~ndI?~4D@K67`v4uSoKX|OZy4d~pFBhN0JBzm2WWYt$AZm11qJNByxKA6fB
z2VR18>o{WG^OEhJdW0|aFlW7%#t@}OZ>$<3A!Ge^VC}XTb|WVqoYRK!qd)rK{qqU<
zY1>~&Is66g76!3t``kfs+XHUv>)CK|O(@q?c#l0(Ok;*`3;5_kgXmMwQrWxzsI14m
z3IY#KM0uGq>pcD*{<QXngVws-hmU4N|E~>p1oUENozZaiqcYj{y$_kK5&@eHZIIYJ
z=3ibmhI-qnn7?NrNz5KWW~o)ehLi@_tn5Pip7djCCmqB+x(3;JVKDix@GU8tb{>8X
zb%9paz<U?x%BDw|!G*^P#O9wP!efqj>w1#T?;IPTDbBrPXLFlsE`kkJVBT>aWI_K~
zP^M-@KCNDlo!i2=C1=vvX$xKQ;;Y!VT|bw+%~->aPdE?tQ?7D)SR`B7vW?YO+K7D8
zNX+~=hPeFKm&6wCWMk5kxeZU4G_iIv8Foy87>Qjx+jlA?YL_=z?VAeu(y>H-)kM;z
zDRQ5VGg;AOFEW48e*Q#o15A|V!@$`xeE#wThWj5S<*iAw`3p2jVV4m;%y<aj3w0Pv
zUQRA%zX4g{UwHiG3~*~t@|_idFeG&@X{?z@t}wCNI#HbkrOhE-5Aq;t$A0b}871zC
zLuJ$MjwjoTrlZEbJ*>O95m&uC$9Ebmgi#ml`MBwRIJ=(#(>!j-^3~<=kN*Vr^^zmo
za6`(f9TR1$Ca-Wxsy=J>4@Z0RzHFq|Up%<pl&S4)<JN|Sqs#yI>&oGH^otv&caLVN
zhgxB0)nW2`bT&rs$;aJ#&bVaZ30!A9fUOA_z(zK`!+(W@{S(*HabCfo*58ONT;NPP
z^n)OLnlm#xV9%N@3b+kvL)b3!JJ47SWSRXtyyG>TSe@Ap{=reiV=iTD#tnh_!X@s;
zy&K^AWfjM}|AoMj=HmLj4B9`Q;{1P1X1fhlpe9O#)eKZ)e<E{bHjfz`*&RVHMb9Vy
z=DCQ;pEYwm^#gY)_hoBL-r&%}2cRu~iyJC_=FgOPlPRmGlfs-J7(DreH0PNmw(dT`
zPYB3hz3N5_ZO+Cp{De84IyxV7t=>u3saddDJ$Jaz+QY&2<X2hfO^)Ph)iEPY8!|*|
zG0_~X18tjwShMy9e%VPK_Icz;zRc|ktM^WodHeKZT30xs&EYxroK*zd=5Nrhkw+|b
ze#7>|=is1z12kzoh0s1pWM%KUNZzl&XZ6nT;KpMpe4z|l)d?a`@Ryey{tt6s`w3Ir
zCGb3L1F?{c6xY?w7}LP9@Ku?(_Um?;t4T6$9k5o~7WtPo|5|{XTVBZyx3mdqk-kLz
zTrjNcxFciYKHZo5$h9fmz=X*Q;ONvs&LOdlFETL@QgkP?znku%;SVKJ5|Gbb=v6D6
zGxLM!%c)p1Cjy^N*bG|VPr{k_gP;-n6BnDU2D#`^Hm>~$wre`!!bwkH%HaF3s@(!D
zHhdL`R~cEn$&Qo_oeK*`e#9m9r9zyF4=qT&CM54QAU{Wqg}MRv$d<%fF}p5epC)F(
zjMqNURCNYof4s()z3GC-x)^e{hC=JDi?H760^e!90;@yBUP8b!nCQ9$w`NX<F%#7U
zlU}L9nhj6L+INFUx7P<O*c(kHBi71F`e_I)Q<cbS{bH`_;9w*pOSyLu3u#CB1mavR
z@)TotfLpv8T{0pJV$RJb)_u>Bs&64=#;(^)Z`To`Xpuq`E(Cz<)lcx_(O7caKMrC~
zP9)zR^(KQa48!%$Ldb%~K4i>^c(QBA2;uP3<Mc=OUNW&Y8octigUgN}5*1y_4)=Ku
zXCg`=b6^^&xR}Q*$7&Jg=|h&^&cpbd-Eg8(jeOfXh184O_s14BqH|q?$inUlANGzT
zUt627xl@_s=O^O}kFzi{RE>P{av-HcS_JnU#|anP2M^m8!)eXea5E)|u2TyJgIP<+
z2!(j?NInFg8l>cYLmV*-olaw7b`ck;A?pb}L&jX-spa7UYEI&WpD|-eTmBOmb~;y>
zKJS;b{7yT&c3FX>)t>~*&?GW?!z|M0=K|sGFo9jXr$P=u06BX21WEBTCW-g^k`ua0
z)bLRZ*goz}4%R;qYHL;t{_k>xKC8m%(9Py#@+NoT^~WDDEA0l9MV%w7hXx4qI$Kby
z<S_AUN#YdtJCahfRpfh;H92XPNs5jX!@IG;r0Ij9m_t5><0Y5y;p@TV>j+aa+sl;G
z*gS*mi(LW<=~?VYP$YOSvL|swhrYiaLNa{xiS7V<a__Z^;I~16jB#-%mH$~2m1VO@
ziLdA=`Rz^odi5d0zc~=y9mD9X@{OdA+%*oDIFqw)0>NR@7kD`)6)Zov!5qs)q}9}l
zOv<zq&&&z<AgB)Fdm?ztf3LVtjTP)p=W5cha0PLEp9z~JBiW<U<wPNB18msR7tH4`
z=Dt`sgSBxpw1&Eq?tL%dlJyW0@7R~vR!osTNm@vvUuKht83uUz$S~5c_zaut(nul>
z+-C>RR}*!$a`wAvF}ancORv=q6|PP?#dge*l3wdwU{G=yH~K*k>mQs&?$u1T+gDry
z-*>x_%ljHQBb6j*UJ*_zbTXl=`ye#9l#zKEtt86!7-;KT2}iU-$n5p~gdLG1an9TV
zMs)Sa@dRg<Wn2!qxeCN-$!wN<av2nP`QX{Ac>K5E9_M&Om;a~XfT7#_5VC8yUG|Vh
zs4hCj?HiJa)jOW@XRcd;=e-@6-n5TdO?5(@B{kUCevzog{(#V^z4W_XAXM(WKp!g_
zW7PUY$Y0n6%jfjR@k;%OwlI{X<c9L@DSaSjX<zzje>!Q4`2z8`GD=$8tKrv`G5B%o
zQ1IvpMwy)0^|4mNptc?GyCi~9ybCRB|3c>NHre>4k!;}ZiP+|y2-kEfQCakxRKAJD
zwv4CTn7Lc9F@6@Fd%m88%}RiM9Zk@G{y;LLj~}0KYBK-LX*p5T97T4FdIAnVPmoVO
zNw~5lm!v9rvlqVtnbSsR@Jm_+ind4iy4^Bn2;X4Ca<Nx5kH<eN0sI3DNm$@K@_a)l
zyvtT5tv*up9lIB_CF|ix>NwK7E(1?Gs1ZS8fN5jB=*AF5Qa$`U?w3Y0@7ptB&kQ3P
zGoBNfRDBY7L5FM+9Uzg(X>ckjn={T#XOCp1V5QK5J*fp)p6|`u4|~OVZCnUj9jZ`%
zy2$=&_7glmm&u&|C=;d0Vb~Tk53Uje((^T)xqRG-<*5p=EYX4X-82d`)n||gxo1$-
zDT~gXhNQ(k1f%VvVX{g-b|eJC(LcdtxbqP#{ZfgUz09~1(|L6Bw}DBf+4y{9HE1f$
z2Wxj5^gN`>S4!8ws!~njG&@2lO&vs=JP*U-)+bzfN^cS}?gY6X9?Z|I)Fy44-{6sj
z=g6tr-;mNd7FON)Dr;F9ha-1x!l{c|WZf0lrKKO5A?eIk(28t^HBC36_~QWD=95gm
zRjwl^<{!kSp08xUv@9??bdJz-+gOQ1G+ESpBcX5+<7{?fuMykqw(ij-eO(X1{38NJ
z?=B~)p9GTLUdQ*`uZG`4%}`o_T<I8Zd}nAP_>WU3&lYBKjl+}$(*r52dcz?>EA=Ha
zE;whmPwto?>lcdG!oAsz$=6|89}}TzWhN><d;>p5-7l$W(W4nN#Tlm8IBHiMEV!#t
zs_!(Lj+3d;_4y-ctzio>EgeI@H#*R?)0#B=urY1At}nWj5(UMkadcrEp?>2O1ciy_
zbi&Fvq-&rFHIO~Rkd@Zp8m&&}XO5+Ve@SVn*BIJ<-%If5H;R@u^rzFUhX`-$HJO6S
zbZUHT7TxsSQP2?`URKGI>BqiWbZEIHRobFQHv}!D!Pj@wx~IdaW!(kgr~g!{q!dC-
z^~Tc7G-s-OONSoVQN*&+XVL$&RBB?5qq5$BzTZ88UJCG_R?}RlZ0cxwCw>ThZaa-$
zHJ>0%^L-}l*HjZmze*9ht+lA7!&YHhl^k>K?oHh$#gjRGwh1|2Y0Po1q97ODgpczU
z31q-5+V`d%eQxGR^}kl&;dyTagY{FX-Z5>eFhi3T9Mz|rrn=IJwt>Qw_3qSvjXJgF
zA_SIiDKy@$U{B?ph1w|tsrpPx(^~eU$C`J-_VhV)cvLr&Je*w22lb<)JOb&bVg6vW
z^%L4Wa$=K5B?$ek^kBKX8=q<=WpO!L%>LmIQrB{wzm%nnBYtW@+>Ti=XRRD>6*HBX
znhilkF_V3^Ww-G2(@^e}<tG*@3&w!Rcl_T!=SbyzGiE1dHt#2#<>Z@!p<<r`{RB5K
z<%j}HIdK-4&QxK}>`iE1!LhTZKiROMQZ$WoV6z*2A$@)gzd5ZOKYu#SW{Ul8wE_E~
zuy!irX1Kz?d-Lec?0&3zXfR*w@RNTxZ6ICy)Km169E0~x-8iOT1I~)}pc|5mxU$T-
z=y}e8rOFRwwXga!Lnihdo*oo(0_=rJK9|_#jk@f8iZ*jn)&kqiNa`te(*j;NNhhYA
z5nc_xF1#u|CLEp>%qFUgVe@}i<A%s0+}gdC`gA%7+cyUD*G{Kn0CRz7;ota@)3ex}
z-->Lq&oWdy>W*W=>Tp8NWWjt;AR+1ULffFla3Z>#l`Sd6m1`CV8d{F9@5MD?(x6#T
zQ)|Qid*VcuPRh{~r%z;M`){)LEKosSmA>xMp-l!lbXD^QGEdQxidiaM>eY*yM7||&
zw|^$f?6jy&=?F6DrUosZDL(U3Wvt71ND4D-iEF`KLOT=rZA#|C`%+!n@4{WO)K!bV
zih4^rVqW5J@|zUKloFZYX>wxne8DAGg}Q1r6RU^8Lj1#@&^NO`jSe14&kcA+?rQar
z)dO_s=i9uH*T<0N_V;1ui$~Kx&us;RXF62jkp`X6BcbYXH-%~G!)fi|3?R22kff2L
zXxNEfFyXQx&Dz*bu1*?EEw}T;zke0kCOZb4p0*HX{TKBocCe(*{`8bO3T~1M?2Sjf
z@LGw|iRRClj>!Sxuv-PO>~AKxD7XkpA}6wEt%Pc3NGQA5hQ~q|k@zQl==*u!$lcyA
ziN?flB>43sqWj}I_WV|&{ZsqU$#<Tzokv2+wJG&NZw(W=bEg8`Gg%@eFMf<%YYUD)
z`i9KCJDD0xlw-mS1*-U4p62dU<kubi0)f9*pu)ewLhl55vMlW{guHvkt!VO)-7vWW
z=C`Kfqcclj_@Q{*Fkv$ElTX7-%3s(@hXlyBd%|g7HlUt<x+L_eD$cERMIt($eBm95
z8T1|GFC~kem(7@aWE^ft89@2|i!tzwJc-zRhio4Dn!V<O@TkRUe)_ZrkTh=@-mWUb
zjJzYb@$GMT+FR@-P3cXH6%J#ZFT>5wDzr$i6K({^v0+vxWfe()$0OSL!JpM>b&fo#
z4d{ZyuNfp5<YHP*2J2`aMppNF%1gZyc(;A#{Ep%oa3kp&*;m#9xt1?k#_}8F(ZDk3
zsh5&-U!9rR+H|6EdjfmfXFADpPZeR51@y_bfr4_?Wr#V@1yY4g__=>5t}jp$46Gwr
zck&PDnl^@8y5<P%T3QFI9vKqs8c0hYsFLW&lUT0xnpG5>WS(2jvBjtTSkz4&*@pue
zg5!tB*z4ppqI|so8`4Gyt--H(z52c+wYnMj;swODE1a}k7!5Y}J^0!4x0A<7^2A3x
zUoSZ}ms_Jz$a$-8CZ&gZaZ6S&CmT2IBq7h2l0FL#3Hz1%NedSTa<ix&9W!_%nQAbd
zzSQ)ByxT=^u<)#q?KP6PDkg(@i8gT!+e6~i9T?x14dw3#fVI+UcE$ZFnK&zmoH;rP
z6334KhX)ae9%-N-GM&uJiRK3G^#*f~9MNYriRkPDqHk~y6<SJRt&fs0Aan@v$(u;Z
zYIc!`2jhelmnM*ehfO$lsVAiLUPp+%oRD=}fy|Dmfziw6a2MiAIlEL-a$P4L^|!W>
zy`#nn-p58T@0mAYLdRv1`5Z<6$%2Hnm9cDpRyue1i76?miN_!Jn@QI&Z+@YL9nrSh
zOf33G67PK}Ou48CT6Ya3@>+X{P17iD-pg5JvF|wIZ&iRpI!wr~LPMf|FbVHyIg+QH
zD}<$8gV^Ns!P2}wr6^gNQetW|pEj5^K(hWR;p%RC{O`a_GA`pLF>9U1wi)hYO2Zx4
z(mes}{6u+NoPMA8TC2!LOzexJgTk3~Ocs|I-hl2$D`l)H03CHtOLKCrV(i*&Z1h`Q
zR_m%y%|3MU%WpIj->Y$)EItSJytfsS&)Z^9?NI!y_yo7*D6za=>TvNvEH_%rM52g{
z4DK8+sEK|QeUBRcN6rKilQx}Sua|?jY8|;vtFGaRfirP|%29T1qC1(Iwv(k>d<HKg
zZ&s9<MgDAXW_f4t@*x?cSk(IaaAwzAG__VD&H>g~a2HY6>J|GOG?cCV^cfwz?O55Q
z8#wElA3OKv3Ou}&K~BtADfF&?!oSo$hWEAB6C*1FN<PddhfSh!(auGDx!h=?`sNQC
zyJ`^hRc{8e$eg9SPsGN*%lYd1JQx+VikYS~;>eoSti(7CZ#&Gug^$I4O-CoW@-UA7
zRJwvy^qDG1-?V_Mg+Kb6gh5Kn2YjO1!tB<j;|9HBWbgD*Y-k5!xvLt1e@8ik-kn5R
zGZRxtF0?k76Dut}VR6)I-lt@WEZA){%v?H^91XF7FE`TR-kD_RRsRgkq?1UR(m>*C
zcZp>cQ49!FA*CA4%yQanxW4ic8}U@^w>oLEGiRNpF#&O~W!Fvc>Z#%u_~pR1ZHI`R
z=1NR|Bqh4bFOlpoB`nXjH{pC9!O?&z=s4y(mMH^-|D*7F<0d$`atMj_yvCM|J3~JH
zGa*7{3~7(6l;xgQrJ;WZlB#*8CC|Svz@bA-1fLJ0+o)>`s>~LV2Hy{`Qym!0G9b&_
zw20brH7LGb4RQS_v3z+0UN&D~*A}!8hrl(EaI!Bs94m5V!BU~dbw;Vj?$dN*z-Z8T
z8BLble87zJLj_4W&mZ)ckiB|}#9Zl&G;Oc5n35_BDfj!6DW*@j|9|do>aXM$W9c=n
zCy(+`&$!<2zsVHiR|uP$^yu8tsmvf|q~P4LPt4A>N>`hYhN;UX^y|b<m?hs=$jH(G
zxsP9f-9N-y(@hXNXX4ozH&MOlKD#QILd(9TQnkL8aG^Q}AE)*uudl@MQ~Vyny5-%x
zwzfY16K6y4ARjL9+6Ty4br>2uM$j332Eo8bO7y?XPkffdKrlAggdOeSxYe)$CfTZx
z8E><3)OS<%UrIY1%&vx@fPr-P$f;Oa+8?~`6~n@V^EuD)J76bgj<PK=xFFgD{4F%d
z7I{N*rPhhe_8tuS-)8b*vnsiHI$<#L<p^TJhvIVQr|@`m1v|0DfRqM`T^C;)P}g?B
zW1Gjb!kiskx4}7-uh4-rrW~(7<p=Mm7bca<Jx|VG7)oq^FX6u^UgoxZF{F#599aKQ
zEn)TI?QC0WB`Az|0lb$PEbg_29Bxd68U1&|d$&3`f29(AW6to0N87@z&~NyDUxchM
zeGoVL#B&Hw>y5@1|FPeHAEQP51wLWSJ2uKeMwTxR;uo#`Vt0M#HPCs_@eRX&NZ-64
z%SIZX<9`)u;DvjKU_td%DDsO&8~PZ&F1-VeMz`RGMGyDfDjast&qO)D0(k3~2g>>N
zplMNn`Kv|Vqpve=YSbo?ZJ%M^x&UrYS27;^e1uokTgR-t)riV@M^-(&kgNS#z%&D9
za3@AZ@)sUW!?~{O@Q}k_EdG6)tC)6xyZ>t<By2rRHqV(w8XPP5>ZWIyJ|Kc9?2tfE
zp6HQzW{;8QZ1J~M8Xp{YhG+$Pi@d!&F+Qos?-*i);Wsu&*GOt{gd~~|Ii-iBPYrLi
zsSOV0cVmM47IX{Vjn-l7VAMu;J2kbJpxAX8f-=%@mxm#qSn?nF{;wK*UF^~3-b(T^
zayFc_b|Z<7FG0I376WDvBqwJ1lAmr@;OKIDw&=!pe(U4_GUU=Dv^YwoExYgX$CT%h
z;E97+&DkZ`Eh0FNY^MVKQ_9^v@q#aZx`$jIdPi2HItgdyY$4;M1yB}I344b2=NuaH
zxB-`a*$VeNV0liQFInHl-5njw;j9&=n;MeZB}cGJd`7DOnd9vQ1>$HCj0K`wMrU^t
zq?}*RwI27yjq~F$XUHOS|FVHEzgY~qwO6>dxT&1Pj_{W3I;Mne<Tlxrfy0V!B<>qg
zEBh$^S#JUw1-I~~ngNy{S7r6KxA5#AasHc)@S<xQDr8kaps@v{Og7|FD({HvxDshg
zZbbXN_hgn!SMiGznlbmMKc6GABqr_k7@wVvj<$)sMR$qVh5sl!|E_~uw(uL)28)dM
z##>^KPQnfhQeeZn<;iXR4Y(}%7S}j%B}ht5ic)(1^4S{a!Dz=tss7I=7~<2#YmzZg
zyiA6BHICwhs*Aj?<NyX7ONNJ_!NpECN4X(W@XYtcsI%=Kj-~NXJVNv~s-55svgbi;
zzdn$>AQ*?$9)hlZ@}$S}1a^;+u(tD`aGB`eQOVkc5+65dOw2sK)U+SyCO^aG1uMik
zZ8N-j>kBG%-k@nyfRgg1CGl%Epr+F(?p5A&l;pg|@Q=Uw4u#ENF0!3%bpyEttAAqY
z?6+KE^eYVitIi@-GU4ZgFbu31&zC(bK;IpzB<+Sgv0nNQeS?%p<e<CoDo+KRk2zt)
zyD*6Osm20d{KopxRs7R=*;sW?LYxgwLzhM+r~0A@;&z6k-iu1mA1h&{;Zd?hQbiIk
zcMQgLoIuS%id^8B{UBLm#Q&*PW4Z@Z@%;89yp=%#vX)55J$RR!oO}vzivJfvwL?Mj
z{Jk`Cl^sqvIuvAHN{o;EgLSP4%a>oq@?mEn{G1m^3YYQv^Lmpf1sSkXbbXh8wuYy#
zPs5ydifrxja&CstboAE_;~tK0<y0!g4tv*XH0ZepIWMzNFZC|Ece(LDwyCiCBh#VP
zZal=NUF0vVItu27zj2u8EYEFi!vfzsU|*FiE!}iY_T0E0W8_qb>nU|I=U@WH%)P-a
zQ?`SS#<5`9Enq`uD(=~72rI|e^Iu-R0HH^X)vr_JZr$!cUsE-#zk3?&Gg74S-CCgd
z_Xqw`EyF)*!%)*^Gq<hr1YUaGTRac#<R%U2McnuN<J!U~v?=W4Vs3L>Y(PF3y}2o_
zU^k?rdLH4;KC3XGbp|$C*Pv#~ChjRk)U1Ai<uyT2<#QJXhNv^L?I;ZXVu87}BRR`n
z_i^N|b8s)QgI|`>%}q;igRa`?aDGcVFX^3A;+TCME&S^-WaC|NjgICEW^G5S&+*7#
z7N4PiEm~?F;dnuuKW_aAUA>jzypbj-%zF%@<9_14qAK)?NI=huIU;|hf;X>?1V`=l
zkQ;r5n-K66rd6NdT0cbcE64tUNfS?S(@s@j$Kd1Q`&<QEOLC!NsUo>$5r_dj)iCO`
z9E&yWkB>b2Fkk2Q{4~8+oYB8TkkoyKWzrxntm7RPdq$#0WgX<p_hC0*I>0N(-XNJG
zm08^#j*=UNoQYEp7ZDu_N*m*#d}B2K@WePiCHtJT&-;gXaZ5Pbt&N68(RHBsehU9|
z?+`ZmYaO}_?a!`c)v&^JZ>;IqgbAM0p?hi&{CL^`^V;3Hif&K%6h43*T0dS~Q_7_O
z#ns?l8#$J;=?T1V{|0YoZNn)uKfxj0@BF*TfkY}3X9B5Bxah|%E~ZhH{fnFlchzp8
z|7y|ix^FKm`LTwpRartN+`G!ZcsoMsJK+rI?O6jY;|1V(Tb!z%i{m5)P}%v1OO3E3
z$24NVIBy9t+Hnr1Tm0s4MqlEV?@L36;t}9yu7~4C1*6R?dA3OFFoxy~WwDEng2eJN
zuK6|r?2|6z+wN;n=p6%ZjTHF@vSe98L=KsDF9`OG$_MVOCtEb0a*h`bSx|uvsql&=
zhdd_2qN@YRo{a;^`|xlQ^`IJT3dYJzn<wJDAJ6#V${ld$y@cI8^B4o`O^|!r%H_=!
zJy_s|MJGRV@s*MA^i>QLj$A>89a)WrN4DY4wrVKboXXaO1jDtICSGpJ0rblF2jlTG
zez+3EeWzJueXT0(dQ(sQ#2jGSgA$zo;R6ZnkWiz{`{amw3$b4~mMq_=N53R$&^@Dv
z(@4X2<c)3<`TKV~yPl~@GhY}`$ok3lH4mk2g*MbN{x&fPTF%ekbQx$t1vz`+Be6A7
zqiWK@RPBT_O&<4`*f)J426B!xurNoc8mK}yx#-f0yK7l(d@y`l*^ge98qof??h<W-
zE^<iIhUPwNCoAQ2Y5DOexagrxzi%%jGki5^&X#V{Y4w@}HN0o<e`?c!G5J_}PM<b-
z452NqS^V|oy{JJ&UwUA!GEJWQgACKtr|+vy^L-v%BJ!d7%mqi{LYp@v(^~8mo&#au
z;DM~8SeeT5R<X>ddzjb4oJktrW7+W-U)FW@Gr9YXC%yK6B7+B5ku1@3CCrh~GCei=
zSTdB(%$HEb1YKJCq@4{Of0rDeqDT*~>tRY)zku_Zb?82{53@X5PGArx7%eU5rYk-s
zVeZu=_ErQ5EZrfKT)t1v|Mwa^?uqwy#7-h|ccr3#F@P}rqr|}dC9FPoiA>-23$R1X
z0=^p(=Zr<f$D<b+zkNM<v(AL{%N4niAxR|AtB6=06WtbW{=~3pu+Vt!G;vII5uQ1n
zmBwn{g0+zm<eB*^_;2MvBJ`bsNpeTX-UKOeZ{Ntay9B_?R9j;9Ab@Ce^<f|UIymE)
z5@=J6Bv-Z<!_cH0G9X!=`MroIk53&VS4VZ>ygTo~asM!~LnDKT2`PCvCWxfxH9%_K
z08%^B8+wN&6RYw#vhlP%+CN@OhCdxfqPF=E5;lko+x8UR%q)hU&r_tu!P?~FJWclP
zQxd7!-hf&IN0DB)!pNbkmT)-z7q(ATA^(1ho&70K;9+GYG_)Ng&Vf$k+vAI5-cMsP
z)?aiTyH!Bg^?~9H*^ngMJwi60KSOHQtRiVQ3(4;aj*L0#!Vmt|j)N=rGTSdUWTg1J
zr+r>BjhX?l-YkKu4%tf9m^u&-vp#H>_kPmdFABalctO^$gS<yqKDS{1K%wEOHEBFr
z3I2`pWcr*@L{Cvi=&lNuGPRj#`r<fLzCXg}eovR3-!~66mIlGZ%zU`!_6uf7-b3pr
zPo%~3A>sK5+6?Yw+m8fW-|uO#FU$=t*=O<2qqo75q2tK%JLaUIG=kS%z8x;86+xn+
zD~_Gf%GK0fgaj`qzD_v^R!n^hgH!tP=il10cvVBNn8JZ2uLP|&dU#J5f<K2$<;Q)w
z1`wr-$GU#v{58EmsklFksfdEtZ#_uAPy=Dw)eXdUmjFX_MiTc0Vcey@I^^KAVti)#
z5!($WVfmP2;3P9ABTkDP%_e`A>dv5IT?h*c8^kAk%*Ep$=fe4)m(b5U9;R1`45;2H
za^Z3`ysbRV{H;#|8=`~_2TagyuQ@hGjevrugGv4v1H9V0m!tZdpnYx)i!}Cuk?Y0_
z!ISz5F>Vqvqb-uR-D{1fRU-I9`THOvB#a~#DzG2u0M&dV4Byv48qDqxyWaxwh@1qc
zs&(o6v2TcW+dYDQw~4y^K)U#i*r)UVL};oUeXjL|IIqy4QSHY`-+nbj!|De)GC2tL
z&8{RDeUFf5>s!dG%4nLYFEHEJMZ~WW$)43})FP>clxZoG#Y=vYX`|HWS0eH%4kp4W
zrb$m)|0Dy3iXFBRHKPC4fYzHS((qFfYOUQwQci}_e-q7w$wgh{yys}<*`81Ke$WF|
z(}DDg&wXOS|0L{L6I;7f%#hEyf#rX-WWi`%D*0YZ(ro)vm%06DO7Ci7eqNbw`czMj
zx{P3jO*@1EdtY#d$we&a-~bvl+lpNp(nVsPnNZ8&S77h!^X%Tg!F2f4Y_h#{0vSFz
zoK2qBPV!hbxe}{GhtEqV`G;ar<ytp6_A!g-7u_Lh-7kom!!@Ei*^}G<u$`RQF@R2R
zcnI=sAy}R^0AnM=$g6R6WP@EPS#ZHaG@w3WN46=`xLYB-rPD0FR{UEn7sPTVv9BTC
zQvpgBIq;2R7GPJ&Z1CLo31?=vW1UQmHJBtq?X^5kuk0bzlYE?3=L~a8<%z<q?NB<Y
z8cKUVlp4)$g!$<~SReeK=-paqSDY1!1)nHxTB$+;m8>AnXAq8ku^o@H1hkA)=Zy^S
zadNxvfsyWancsh(AgS~rnnW8y(ukwb8gC3n=}jEB;sU>?<Q&>NbwJ?Dc3Gp697yI6
znPsRh$6X8J)4rsEujMyrdvAjh-linC@EE6%63Ndf5$Esad0?e_7dmZfP_nGIH1_Iv
ze8@TB!@R3#_f(0Q`z?pny*07it%u8Vmt$>inOuzGN{AWM4Mt`OGPAo)n7i`^q<E?l
zIpY`bvt<(gJhcE*(mrwe-%CW7);yTl)0-KHthdsNo8WFC?(P0N&)eTs<(pTiVe^{R
z{6o8m+~BF_@q5HMC>{0@0v+B;qweg-troek%(@SGRn!gV>oT~4E$z^Mu`-J{?E;-v
zT?|Qz!LIS@OtnRox%ZFf{3K=Qr>O}k@0W3Qb*ZAi=?w4tZ8pDsW+uiCZ-bAM9$}pJ
zFpPVB0{-kafz_M(;JLq-MX&ZC-u{^v*Zf+Y-Rw>R`)%TD*}fA$tWCv&7uR7$=4;HT
zas$cKMA?!3(YV|;5Ch}faYuwIDL#EroaY+DlPeymbV>B|fB1=Qj{A7eJ9BWwj+byq
zRl>sE_kn(xgnjS-55t>hAW=L7b!rFT$ypx0_t}owD>Ttsy&LBSuErQ+@ilwj;w$1!
zQQ!R_1~#`yC55JZhEWn&`RC%?<^3R~rx9KaoB~VAo?wdiKj}qn1s4D5CKr?5i+sqb
z#<FqEs26O8F_Zc*<CG7$==V=3J!#J`bF0CAlO!zgtQnWG^Crfu8_0?0H_>FF!J8W{
zfLK!jBY$s&@Z-J7m%0K_8r6;K-m0<>&l1qxvl}FTZ}S_jgu~XSv0(DP7prxe2cDy@
zh}n~aY~$K`EGxKxUqTvCq3JuH9jZ*0$$rDD>~7w^QBjs(#zUah3SRPEMaFseAg9vJ
zkM7rl5g+RzNy!Zq9`u9rdgrm-yc(D3uH(%<-ozJ!E`a@5hULAy_`qA4b`!d~`SW?=
z_sOaQmyN09mZhpf%4h|UB;J7^YAS3&)gk!HL{F-L5!j3U@zNVS*EQxFN|toW^eUTC
z(?f+Ty_CbHm|Vg76W=jqS|!Z<twP$y8S_y|D&lP6ALl>$Ag9n%#j7`*#S@}SPW|2r
zI2$z%eJgahJ>|bavY~_TSo0NTip-&7WjV&bw1A{a8Q6~>Ahq9P2jTfDq{DwQ26jrg
zW`z&%drAmCHvNear`|zs<w`z$*&HZ1-3>86P5GpNB5a<j11iDZ;{7ql>`c5rayI>M
zKx}CtZkQ)#SUR0tww~zOOWMGn?>NRy`*)vvxNrg&qg^a^{zPWT<P1)F*A5+Wj=1%B
zE~=c-!4&_UobL-wJ|WCas90!?MYU$EQ=tJj=j{?avPZLyoLClN;mdBj>ap`LZn43C
zJekMZjcnc>@%>4BkJbq%G4=f{mY24R4YM&~iWz;Wc8xVN`7LAThj-w|+1;2P?=NUr
z9b|9&d$LjcT~YgO2rE{<gBwdi_!C;2SpRGV8Xm33wdT!aPtz7Mg`jh!bwmf>X0;u?
z1j>T!7BGkVC$VDWHL`d6ETK1@%?_KHu<>T?*bn>BY~NmN^0Q-X(9USqF>fC!{P!OV
z_Y1;*RkK;vo-Dj(cANdFTg)!K`i&n037g=XiOp^2u#ZDG|0Z=2`P=t_Fn?$Xxg^=m
z`rVc%dXlNU$_s09(@UB9WR521<@`w2tU6}n&`U__I7Q;+4>9HYS8$BJ4x=HH=$V76
z!ri7R?7v@z%t|_!)hx;53l`5~6Zc!uyTOOqxvK4~YJ!ar;9$nw`kAp80r#17ffW;o
z=+bcc0=0Aeh|!T&e&kVG`s@}%rCqPlJm3XaxbP%>boT`9E1yEm|J<U1xe0XW(QwMw
z9i!SO4^hK+TWGaK2yKmtq_?_5smZ=5x+E}=7T%6x9f_xD#m*#pVWA$Xw;ZSEUGnJb
ztdlfD&52go<_kLdCutZTMn}v&M#qhfq#4#>Xxh4$?o*4RNB@nb_Y3+7Wf4*IS7QR*
zxMHZVPg4dxxrww{K9-s$Cey*02k8jQ!!&=H8BN)JgjOp=3VUwF(?NBK>_tl=Wup&?
zefHyYghWDzx+POhv%C1yDuQNjI7!RXuM^kDr|HN2p>%a=1eGQR(34sxXw|eM!tLJ4
zWOwgVLXvl+&`U3t-q$(L!afCnR=1(xQCLprz1kt{fBTpW(so6Q<2P9G$6Z3zv3>O5
zlUN%4?G$}1&Y|Tx?~>k!j?u6uDKt9l43(@6p${Gg)2^xzGUak0y(T?HpYs|-|M6mC
z;rfa$Q7>V)by8{G#!kV$brYZX;{-i+?ErNhca^-kau5eUiKh(@$FP5+2GEQrj&zfb
zE}fSDnuIE7(;3~O&*tM)I?s9>Rmn0Y1Ez};z%vu*=~sQJ_km%Q+dhsO{+x~v8>CeC
z#VlI=!I+<5;7nVz<_M>)b?8513wonZj&R7vgw8fsq}(DyI@_r)ZTGh#11=Axt3t-n
zm$&l>x5Y>Ru%dhZy3q8IiR^a!8{FG5j)ql_rmyE2(=YSwY2OFK>GH4?a`io>WfKjB
zLf4se(z86`@@))#>S{n2M>x}{HN%Lt;UKzf_&1sQJ`<`wW)5x2oW_#U2UD#!Ju2y|
zLH9P9Q-^D!|6rTBpt9i<xi+#)&{&2nM$Uk)8$Jd~uZ1zm6(2!c>j=GEsv?BG?@#y6
zu_v-j58?H*GNw1zfx5_#qHhAE^heGnYHU4)*_;_jXEdwQ24X~)SW_CldKis8=1g@u
zG--a~Wa{l^B3vDIgUoX3L)|=YvVoR{G@>+$rEuHf`jwIN<b;v*;DMjaKYtNE>zqq7
z(*?F=<`y=3uo=sCl4rjPvM@Pp9Tvb6=BY4_d$emNW-keZPLq}FQ1*Sis<wu6qW!pN
z+YKy!STdQwOt7B&Ba1f}Njw`~<GPIhK>xxnmOAw!Y%_4eMUV1PHN=(stJ{e;yzcSt
zM^e}+ugQFHhXXp+>5y&J(?l1l=sESV#cAzFh>8A2XkL_I*JQ6=GQ#}>T1ywO%$qOy
zpo8yOPGB@n`C7_eUOIp#dE-gXj0r5z*O-mbti*ker(j{(M_GqwC1w|G##>PpDDic}
z-ir;G<y#m2>WD-fmT%2GPWH!=Um8OCS_=qQ`OcPRSYzv-Mw}(OMg}~-z>O$SFTGpQ
zNPZvq1pZwQh^@~-@K*W-uD7R?nmx~CyVo(iel1xxO?xBB@AM;y%9VC<*Q)WoMG<$%
zZX&DJx`eZ~OX%p!T^Q7I8!z<<70S5%Y;<)c_;gY5d3jm7*Cj-FcTtN|n%ydNzd(fW
z`)om*KEiZO+@UJZwtt<(dV7dj>VNfk_x2mq^VDNocG|Q2b1uwZ_5{n8DDpkJ&dg@a
zWOm?q3JypntWjjpCJnG7;dKjHo=+e?j$Q?en@q4UF$n9@G}*Os1Dq57oa`67{CRJ!
zL>{7=|E8<Nif_$8=@nb%xnG;#y*q>0wQnHOMTW3Jb2uAubR<by(+|EMGDIz<A#Bm@
zp{(`OBzD|#AZUgzCtA+$r4|PLWlmX&>~8W<YNjW{U~gNt{@YwIeJ0*L;jx=r`=fzt
z^*oM~x9Bo$&}Cz9T|f+Zi{DDKOM0p=N-NDXrAD`uSy5*U)3~O<59(57YX3TMr(Zhm
zFPOs2lXO_ypA<4$8cy<VE3+T|54c{-lgaqOpFmyiIeyRlh1pUkma)%`*pFKPk)o`B
z$(9D(FPX_QE{HDwkFKm$@FW|y3}ku{x4Eqe@`7!QhESMdFKix?#Z(td*z$H4p+svV
z{<~wta;~=FmFb*dE;Gg?iz7UjvzWEiMzSCM4zll4s&MUWeX``W7Q1UOp2<(lVLc@Y
zta!+7VxMz@nKX`NUapZMZ_|i9%dOe)$mN)OsttEp$}!`0N6Fp^AuJ^L1+mjhV};eH
zSnzvAa>d7#U6U2Cg<5qOCwd(ReX=J@=QQG2Sr)tWN8}=vZOHv1BR2T&d}gMbNFI$Z
z!k5j9*>yJ;Rv4AWYLvCvh<_HuAy~$qxTT4Eg)2m*^B_y_Gl^}OBV~)TqnK%YDBF}R
zWln#6*o0X-LDTCjd(bzGsod-)8g5~%m?kpt^<!tptFmhT0Mmb^MAwa&3ypJblAy?L
zC`z2lCO*7QEZ9hx5*-E(!*bYGjZ(1qHHJ3!s8j8tdxSnQAsr&GlCe@nWVfQ(ivFv~
zzt<hm{LPJJ8;af-eg|t*OJIi08`=Fks;o@GkR2MI$fnH`d&-)s!k46pbnA@4WW-4e
z=3cmpwk4f|iV9V7@wYzvZ?OVdp`%QKZ_N^&Q_ZkRbozQ;WtgoZ!>+(4P#2w3f2=p5
z>7ko2t^6Mr%q`-b=M`d#&L7?<{2dB>2*xc*MMo1`_&e?<tTnCVS9cA^zpry(`pFRN
zaEyh9gC&qCal;q4<VehRJub!L0od=oBWvE<1o~OZ<i*1mSifoz*LCFv7k@Vu_AJN-
zgKg0`;ompdKlTUCGH*iSG7ta6roqk6{orEdA@sWxf`Qq+NKOI=^~Z9h_3I_vw0(ix
zo|-qPBknKxp1trhECQM}@1Xsbmr~<j6TmVip8Ml&1dh8VVnNPBP%+U$BTG;5Tq%>y
zeDVRUmZ*}rKq;n7{RL}_FF}=d70ieh@4z*-<u09@jeZN{$lOn-k%_aP@U1@}``tAh
zJmEC>Sw-+^u?O&v@j|S+^8}vI4^XyVLgMR+xa<Wd(0!B0yL~@^lcIiriFlVs<fupd
z)xa|_L;Veu%Hm+clPP%m+cC7i^Fd04%b@XK4!Wz%;5;(&nN$0527@lMvg9p{Hv0;p
z>yw!2k^{`N+L>+Aeak{~=ag=$vta?<PuR0(SJ}Q9H`o{MEpts&pi0Vn*ufuJtb1{}
zz+cT_dpgfk1vMWQ?eGJph(5TUvHRJ%iXzsbvY5%$yl3s>hYGIwhuCSc3x7sQ>^5vV
zO`iK5W1rW|V<7{!6NiOSC<*gtQL`&pMEWYWI3$YoI&W0EbdR;r_x1&r)T@zXO*z2w
zuViCpX&GDO_KwMQEM&v=9ZSFOe#1Twtj5B3j~SV<l?~ln!&?2WGR44X)>Bl(if97s
z3CLu9yWZfY1J~i{?OxRGn~Y4!sbm^suhX>(@xt|rMAB7rQTThHis<Aqn*OC&m=w`O
zj=GnS&2#J6ZjW@<_f`@c=;i>HZgs*qrRQw%>JMzG_C0nb{wO<?m%@~mZ5CQs1v~b6
zK5HD7O~VFSm1+(iOwIh%1l1fTX0N9te0eCxh1rf2HVv|8Kla25CRWQK<!L!vB(iOK
zI@5sNTZ0PrJ&^k~3<BTlOJ50LP;A-)Ihj+T>v0SC8TCN@s%UA9g(6A$wG9;hUFQ21
z^`O7|57auWPGUDYLD{d<m{K(un)lXl9aAr1{m)?JM{a}}Tkn99v>a3hMR5GT!I0BO
zk+r#;<>ubM36=pHc$40)z?o};aoaPPafS!YKP$MTsmEbi&H>p`*<JBMXo25uSAr9_
zUO<g)ad_<dI}CixNpB84%0GPh57a9a$@i4mD8wnSd%Gj~xuRde{jvZJD=uT;`w3Ft
z<rbX8q{enmfE;<~V#awcng}U5hor?3FL{Rvtys2QLYB?zlG%|pXi~BQZtF|P`Oq)e
zWO@MeAJ;*KWhewsZ2-vyPR5l#h5GhWoRR!xX|9nPIKRsRy>fNPS#kot$o~`_+siQU
z>vLIaw;~S9I1g<hhB$7}XVeTH!fC8kCX-C>Kx^@MoICd_7=L;X%L<Qhb8mitN%Bqj
z;woUsJJGT3GYdRz6xc}L9jL7J6W3Yi@R}FrFm>h+FCUL*t7G24%1P7N>(?u>^3))h
zm~6nbV+M1jZkhOCmOQhJw`W-;O6&#9!xs<K*tWIPSsDJo?WN7KOUD;-!!|pzmLx+Q
zq^QD5b>wBe<Y%xoo+j+Po;lc7%99bDwdm?BNF_cQ+`}jN{Pl(2{M9S{Iln>IdG!<4
zaEZ#Z8|mtdQ+mcf^5_d1yblgL&Ow(_G2ZfjY1ia4oh=%=8SpovqmmoyMf7HDf)`t)
z(gR1&eZUL5Zpf4$U6N)lFkt%2joCH9m<(T_%pNlFp3_zvm^nZb9u-HTF^l9cmAIf+
zAdmlu3|=YNz{4fiA$d+OT;VZ}c&#+Y4ar^*qqDKZue};Oza+r&$TR4S)@*m%R#=_>
zms8a3;T66~$W*o(l5Y*b_fymHXqP<m3Q}YSYd^r-iv~<q*N%F3zu=Qd1$OAACo4`_
z19QKPz#&cvh<RqD>U1pj#3V9@z9+?V?l~w-IU?TS(}@!n=&_AHI;>^+NYt_2$TsCh
zu^{=;Y;oHbwpULMhXilr4HOTtT-&{DS-Bh8S}~B_DiPq)g0-yPLmiIfG|N7G7{Z>W
z?qVKdW^Gv^_TRr3@jANN?9Ke0csuDkcc@gCjM+YfJ$&{CC38|aonO63=yfw3?y{9V
zQJ#R_TgTz&$*Z{Rn-4^<QyQ))5q(PE<Jpw*TG;*59sgK`v7_Tgv5ANCQ2$UIvs~@R
zNn18EFNXsxV#*+#RGYxAG>&1a&7;`ro>*q-a}TTR#k&N)i+AX#H$b@54}36~y|Xib
zb54t3$4@U@5j%*<``M7x8W*zrEl2K1(@E<d36YKoWz+xkWrZ>KQN|^LDfa@3Zd$Oj
zqc36k5<50|&qB6;?Qjfwx&^<!Fl0>+_cN=<$=Lg^4ZD{R3wvshVv_r8X73gUrQ`ZY
z6;40pPRe;QrzROD#Z5*1u_xJ*|CCu*_*tBb$zcD*K$@~J6Oz8ELqJXh6hyZ|M#31F
z)c-nYKHbj6-$@3);4`S-bq=>m#qaN|f&8mGGSG9n2yF-I(6jO`Jav&HiM7G7Or{UM
zk4JMlKK3AqO13o(m4VNuleqA+gq_d&#80d9;sZoCO8ug5u%M|oDV-fFyO-Y=+1Os}
zjIx}_%Jsq0Ew{Kuz0>*9L6c;E@E<mRzQ7F|S^;xzNYF$*mUnTf$7LREywUCl(unu%
zc&`W0_t7aX#aM|o5842pv%28kUI5qgI^ul;PW;pVvf->h;!M?-V6RbuIal<-xceWJ
z8uY~vZ{9*qgM>Ya71_nTg%F&qfk_*mLV3+YzHMoLn6`H*jEg#fjobHQ;JP>*e7!$D
z*^>kQKgL4Zz~d0#ubB4@o(55d3(?gt3}dAL^%1YRY1>buqiPGUdHFZo+wd3n^~!^?
z(YL^)>McZlIu0R!IMCJYL4(|Cp1f2>KBf%+HavrsBd=jf&I74rMyc$hraEzQ{e_a=
zzoo?woY2)mfmrHYfMxV2-&Xh?^v4&YME;{x?}wOw>${3SKLSg74G=q>p}ge39T@Hs
z3S-Ck;?^~h@Qcd@|N3?Oz3q=7d~^i(J{^Mh;xE9Dxq)aHejMdWtg)`YI;lM|1FEu4
zL)+d4czEU+pR?E(dQMkktC$O4`nv{w-yGxJ&;Nj9b&ufc*;HuWx)g4{u!RD@T5!J-
ziESSW`T8@9p#D~a^rnTFZ}+>$xu;y?Iy~|qY<nj@@cN1d+795#4}^E#otUBX3!;{l
zgZ-|<I7~wc)i#Bp@33sH6i0IsC+(6)xg}8AaT&a&C((abDyLt2A2ge<V54g<DE;zE
z*4VNXK5V%T@s*D-c8@BUTtxi+xg6csW^p?0;jp8xGD-RJNVek88(3CX%Fmp34d<$*
zfI`b+OnY+<@9jzCXUshWlRkIiG8J=Px2zY*z0e6Y?uF=jQ-#D<+~QS7JcqT%nxNkQ
z3D`A@_o75860O^6thHn<C@s&$M!Qzr^YA<>e4T@m{rkAMjd#IaV>Oq1U%Xd3NCD@S
zDB`kcDR}g`iOW)kb4L6Ba5}Ss_~zq(VM(4UiMsX(+oo{b9<NL&{gn^yA6}qQ{|DIo
zq=x_UdoRWh{~tx?9S_v|#c_K}NhK5s5sI?Pea=-$3N1ycG>C?@qrMU%BQna0P!W=%
zRK|VIm1HChDU_sADMDJ(`rY6EUS8MB9p^dcbKY+cPO48}HC&ucKjto>s@e7=;Pf4m
zSFxEB>6IgOX^l*Ahb$^AHlY%>wdC)y4RmN_HW^4eLTr2!E411lGDDiv=nLH{>X`AE
zGYW~PZo%hCWr1A9IV>SLwTX1u8^PDvmP-zqD`NQ{HENTyiahidayDwRv|_&u_HC>r
zA&>Hi+j;}qTHs6{l1-%bn;2wf$z!{)yKOq*9<{sph-NvjpaC1M3;)v#bYOE5{rdD6
zIek(TL}uwS4=q%<%AsoOl+^dkf!X4)_H#WEIlh?&#blLdx2z^AUy3<*feT|YQiB+k
z94A4;14Jc6j-1d;CXQo;?8UOZMARsV`t_ZsTX-cB;&z{|m9Y@c13^M24z2Gm6~|q<
zY205CRl4QfRuXM4L#zxsI4zreqR{V2Y^7x3p2Q$M75<m(Q~X2nE{l-e6Ow7P&sMt4
zM}Zy+NFa4JS<J-2&(!l*G$+8+$Q5aMjJO!e+@8};MM}<9*iX30Of!B!RBg|4YvVrA
zf6~9HNya#eKUJyC^$gNCu903|B}MO6o*)m?w-ANgXWY>12Taz)_Z5A!1@3lt3aK>w
zLb}d1a!XTxk}g_F+ULs>$Czqjm#u&@k;k|VZTlHHBT+0pHI76~>St8<wJ`RB!Y=XD
zK&G>FG>0DfBxtQog@pA<+N&EyD(kG7=$oOW%SXu7&E~nPcXh%UL>!ZX<Zz(XglHv(
zG1=y)iQ}k#67i^vS@rxJ8LqlX<b+xG$KJ*C#BPR)%n=nButH|+?q+hM>jas5u$Vsm
zeTP2W0}Nxgi!Pl0liExduBX0}k@R~>WgSPcxKWWUQ2Ymns{g|F89DIs$4pi}ZzOBb
zKZ7k@F;3`-PlNdnCE12QW7*FzgY|WkW2+5Ev!~uGRnB_V2LpFTumvKTbh?<HP-{z8
z(nJw<gH<p2EPsv0^!$WckCCh@W6l;H(qQi-OTx;<Ggw)tPmo*rg;*t6@E^m+us?%-
z!^_h{LTyk?A2cemg%MJ0<UUKb_FX^3&DXWDv-!$ieyq=aT`+}rb<kz|oW@r6Ysm@x
zyD{tvFBNv+dml`EX3aj$x<ztco3Q8K&0?ju%wp$_QDYw@YO$$Cs;u^o@vOU*JeylF
z3qKxs!8@E;$itS={NwQ&?9xS}`3<kb$>ecK{PQtu`RT{U@ROXhY$_LjW0!23&u903
z$Eo-Gq5k#^)>_Pv9op@L!M{27{KJ{->0f{0so+}WQHOP`n#PXP5wQ`~l4czjj$&`L
zFX111IrF2(E<tBkN*$vm*^AklHd!5x%+*|fJ~r$R_$v?d{&ThHcSU0sPdz7<(j&Qu
zIY2aI1on{ADeI@BPm{F~pUGk~o_uapz{~d@5VxhJB;s^E+2A>gnXwn?sZBr>>hwuo
z>m{N!<UvC8QpwP9!3&eT#o8vXtztkUkgDhjyPlZJXghQfRpmx5Bq*Pra*pOKxKgrj
zc?-?3&L(SJCCQGsVruZ{75zI~6i3K4(90Jdk#pq<q;1-38ZpLKc!sahKS9B?^mYXk
zWO;~rGW?Z3>^ev~?n%=vkvhb#^c30BfaKKkRGMDZLxMC4xmLqZq-oqIYO#0}4JsJR
zDBLojADelaa5$Uj+nymo)o&^e4=2%m?Z>#l-2do+MG@z?tCtg5Kb{<(_k+sB9H#Gu
z{Oa471X|8jQ<1ZendQDoRAyKTRdW={uFEn^cV;|!J*AZ16&yFs-}=dunPIejPA>KT
zHOM8rmx4V7ab)e<E^5~JQSc(MM9sLGUK=lqSu+&5n!aW_?^O_8>ZwgiRPWG{<8`>8
zx4)THXJ2!E0TS?hFTnhxT40dd%FKD-0FQQWByBg35Lxp;=y%jYU$1bI@;He+at$Jn
zXStFpSC(j`c%tXmaM(U-H0bRXcCVC;U{F*6msWTJH&+C|7N-fj9wUL5TmzW`^E3ZO
zwsk`IXj-7V4#FBkVNXXdCob?64i_nt%BdkRQ>mJpntFkH4elZ%{Y4?wSzsw$+YD@?
zJL9{yo!-xu1E~mCeB3*R)BR(KPp&>9XJRIT%LKvQ+850&HoeR2nO{wdv<AujrRj|F
zvOId?;yMVhQ-r5}w&jWUh3v=O0<z}E#|r=Ovs@JuPaZz+q33>;Fx_rR74gR%g`JF0
z##_@Ks}@g36^ol>ZI2vQo=fLk?n$BY!K1M2%?WbgLmc_{^)|7%lutS{j)JIg9(EMC
zZ_gG>LXD^gT=jTFd&1X1)>?mfT$e?6cuIqYxHIZ(c{QtKnk}Tv4<jNiGP6sj#bTbv
zelT^&FK_J_MLVny2s^p^q5Wbc-toysrF4NO2sva+*-LCu*^de-p|~~h4VDP`k`>#p
z;pORtsPyg&#*E9sZ5g}qVb5tIf4v6ZC)~x1pI!n(?;YOL7)7m3v(f*{RI<j&K;VXT
z;7OlU;b#&s`JKRoQJ)OSCLeLbseJtJOEjoz#e!Nvp|I8|%5Qj;1$q*pBs=FlTD6y;
z30z0{X|GW_Y$Q9Y`7ub4vl!GH4{T)}>X1*cMd)`vo1BbyCnjJ%JDGL<=8F$al1b35
zFBm#rU<Wr?uw#}c;mnCAao*}Dc%m^HQ|&)s&CQ9hxvvSX{u&3*cio1RfK#|ew+6~1
z4cT4aXF-PP7RY*vpgnT}TX<OouR1-0$7^51Sf8I*{ImqsB(l-7b2PiHOO8zX9FO_8
zPodKV5k4a{0Kb@K;Zr?%HpML+?^*rA)c%un@32rKJv<HM%{OpS8p5Gv=P*cLj(qdG
z2)l|a@MOC@ThiRX{Rj&}$7f$Es+NAD{~1W2{FWhl%eIqPNPi$k_s&oQ&nc7}dxi51
zpG^vzMXU{`KcRljwxn3t9bG81!dm?BK`uP#Fd6<C$C+s=(GaCra@jkX@o`Qes^fN%
zAG#fcS|^acrLrKo$HDs0k{6`SE`SdCrP2<eraLD2(0W5WB`0<Wu0P#O+95HPxVd)_
zR2!y2{vE=e#wMzhe2T7DOQoBKW;54y>X}4QJ<_c<f!?1YMeY?D5UJ|l%-E5i$YE7E
z*r|Mo1}#kCTy(w?kK8LXx<LxA?v}&$QIF{F3s>nw`jJjFcui|<^vNpUGICVyIt^@_
zO%A8DQuE?t)*PWk4+zy%Fe7nEmxz34H!WM9PXk?!(N}Y7E3(%0FnQ%-AQ3c?xY!*l
ze>WgaQ+I_hYjXDqGnIw(;=vxqwCEgBAJ!B6v_5q0y-UPiU?KTR@2AxRtEh?g6Y|pT
z5A$H!OVX+IfeA9ZO`V1F+(fX$4VuR*7T8A+n@U$}3(qEEBX*ot%1Oh3*9n59LN;3I
zmyo3hBwMV;Qdlg4k!yusMBp9zWUY|L?(ZWu?aRo+ofV9h%TGo_(Us=CF(<k^6cEQe
zB}Oa86RQGo*#7e(6;Y72zWU!CS~OOXm@N^wy)Gpr;)>wpOA+c}!x>~PI+O1QGwH)0
zcNyO^dbIWTHAYL@j7tj3BpbX_tb?=-xbz@xQZVg2jXLn2&K)C%KZ3$&v$nt$mrJEn
zmmi|7+xp3+wqDYe`hvXd{6d%WPw8wA1r)n11uMG4N#68h)THSrQF-}?R80$}Sx?RB
zcC%<|vhfa$sGUN${CdfFj;rJbq(q_n=nY0>#1$^?dnMCZ*hVXT4sp$f;zCVf#pyj8
zM`t^Q(u|90^kQ%y%?n6p?i$vTpdX)@4_)u*1<g}r=$<^)TDh0pJsV2?zLP*B;Vjv@
zJdR8G9!v(NNHUpo<+0LqigmT}KicW?iP69OjT_0|p;>iPxIv>}QW+yb|EMLPl*J8v
zwIK`l#~#L)S(ccq9E116FQV~{1DH50cvG8FFn6pz)^9$EDN-kK(Op?|@l`|ZQ?+RB
zn}N^m!|2DEQW&-V1bp5)8V^jaqDj3@G|fE}TcVC*<`Nk!l|7D%b-nbxFq;^>n1VZ#
z6|v~Fv+&-yhFO^w=uue+_19u)jJ&|^ES-Xt|9F&VD6T#-Om79|)4Tg`pqB8R;S|<k
z+4&x@w7Q99ukT>`Ne<oX&%*dOzWCg+hQ|7p;y7(dR8*2-U)?o8n>SJTd~6hEqzB{I
z_#FIKGMZKUJ%jygS^{@OszF%K#c7f@7^))7d6+X+-WM)YU71O+F5oO}*3$%Xb1@wH
zz6hi%ywKzINqn+57?l$52s_=yc;e|rJe1>tgXs?V?S~dly_$?66OUqQkUqYzsls4-
z3?ze9(ErS7yp~xF?G~dk?msJR+}}*=hgY#`qs?(uW&w6Bm8C72=dtywEEvR;&_UTJ
z6*hZE!F<?Ap9h)2D>WbMnx~sd%9f+tvBoE)SmiQ3=cNV1LeF2pP@3Mc{KRZJE`uuz
zjNxEr8TU=xi%|}YV747mBySgsgNSx288bKsXE)_C36n0<MYF4@VQm2MzbAu1QWMES
z%NxQo-$Lf^+J(<Xtf!&HHAH3Q2kWd!V<<GIkq6eT)_XfG;8*Bcnln8U&kp|}4~$k)
z<z8`2nY5jL)|-m{WwlK7<^`1dmP;?sW5Drk2E8hBj%vLY;bs-Y(l@4JOxm;6RB=Zk
zbN`t)xAVV1dSS&i?(l+P&e^J^;(Ayy{b3woZSc#4DdI+;tcyHo7HtQQiv6JEu#=Rg
zoZ*7wL}-QFO59{exlucdNp$OIdT)9nb+%ne(z}EnNOKf9V_U*(EH>n1Q-X=J;9z*=
zD8meGUB!tN3$;wH0jIHgG?S(w3*~A*$=C0vn2bal93Yy^y7O1KiE81bcau9FGuNhr
zlOky31`)_qoq&&*+hg2-1?qo4O_xu0!)X->l*`#jJH+JinUy%<#(Gng(V{ro<qUV2
z8Kzcy2@P=j$gGl|fZ>xZanYDe@=Vj7s=JtzZ`;PuyVC7sNt^;admx61wSm+=F@-Ms
z@I`RI9->i|zE*`j&&rp^UMHBo2sGRr(D}!liX!JWQe-@Zi?ytr^<K1ze(-(F*hO!n
z@O>oJat!6Xv?6HQqs`FtXR)=xk9_9lx3&0dtu$Wj>7mov3hr;45^)p0Sp{29(JOPC
zxGzz2DD6_m-U=h@qQ0l}-QEl)wOX5Y_*+qpVJCW^+Ksc$3L|D4%jv!k(@Bia0AU7F
zNWqc+XA)(shxL@eM*S?a_ls0{yk-p1QdYxAx*2aio=Ieuy(Nn`jt7mz4(@5}TdK2Y
z35NDGGtZJHk)*d8bfj?`<9lE###X3Nr2#QWHb^CGphiVuiVJq`TSh#Uc2W1b6}0Ka
zdgk@ixpYuMicXJ;qI#98Bzw#w@cUE<H*>wgWPcBnqMi#Ui4%-j?g`?#2@qzo9E&bJ
z0`W~*pcYU9w?=iqjQXo!W#Nx0Yh$6<H5N4ckI}Ebb-=it!r-i3aEePKty2>4^Zb|a
zVfh|->G}X7B?}?1d<>YEEC3Q444Na32;4YBn$5(3{Ox4;dF}x&wNT*dk95Godxbzp
zdV}nv`B0v)9VfkU6IiUBAQrZTF}>FTKkht%f18A!vgB6yWm^yRd#=#n(}&^UJmH)2
z^D-O}7!jV0vD}_B!gC{g7p}UUgk1SuprZH~#9}6pf7)GWxU&g0&+oz{*9K6SJ`)ef
zJ5o!>9fJF`7L@DvW3*%~h_s!<hS1&ARj~@^gr0>Zqffvrxq6Ve?G8<mhPZhDYS_8s
zEleC^1MOGuK)qcK3?66^GS%-PIWZX|6?fz4W;v+P9Knn3|A}9&w!_rhDO5qXfLPCL
z#F7nnK&y|^6Z4*v-pmDHc60*Kk2=8h)u$0#(*iowlFoV63A5XpT(Z<(@E!+0;WPx7
zaLS8#VxoJJiQDhV9q<n!atCDT(o^$={NGKw@ly@$oG^mZld7SmSIZcqzW@7frL0>=
zRx{nh=ZX2w8hSwK7G3!2A3eD86P3x!CGUm>*Rkr)vLN%a3aznQiB0oHMnZcm(VBbI
zTBBTs?BDT&2F<XoXkPnH$h01$i`(RI<YR&1EW%Ok8Kp$^Lmf%q6UvqJ{w1nXJE_0Y
zd!jKpib!e6f!U@-MCDQ%@f5xxiz4rl-)qkik@?O<?|KuJ5WEVzf@d=kp`xIh@rxc<
zQbIQ@MrxEAPvuk<z&7b5dGqO{@U36KRHqD(;Pgk-YGWWV3j5A|U3`EJ*9Z)j@&ao2
zJfDm>-bPX<$5ELl?ey!?f3!LL4>_<kmpEn^(sa*v<YQhP)taTn?bA3#7eBv1N{@VF
zUXK@evom*-8*aB~o|6u#$=pXTSzjhg1)ugnsWej;`<RxV)r5ZO4PchF2ew413vLNf
z(BtbMf+>ZOThs)usv?cdz6c^qb3lK>NZ6q70WUr;fMvRsxOiDCSl?a*@18``f1P&F
z|8Whjyrl~>YBTA-yW(g#DjmM$tO3R%7lMx&fX4G+bU9lBQ#`alI&V8&AD@N={5Ysv
z{Es|bcnf#E?q>?;@{r>^3TBpBf~$g);HbMsWnZb%M?QW~t!0Mo>h@4*Z!OFfj6lk9
zH~f1&1KMqOp#Qo^SmAVsTPsos8BNi!%~+epn_U+eXsJM*H2`yYc$snr?q*2hD-Ri}
zDa=KT-ld`<Hyz$Ck4Ms=O7D6t#F_=C@$K~U7#lJMX2_>gg&$&6I(IgPI2;8n)0?1V
zzZT54J|H{gEb-FHbs)U#A!FAa0L@gWSc7oy?QW*+BEpM<6`=Hf3jKNMCY?F>j*DAc
z%UM2~4yj7{B+F(Qvplq$erT70?3;jzCq=RCdnlw6RU$H^MgGY|(ToQrq&0FA*(WKS
z<G&d)zmj9=*SL#Z)VI%c-y#(DF=}bF+DTGAaSUx;BRD~nr;{$v3}NRYk+iB+Qdfft
zYJW%q{`7pLO24G=_ZTy>@~s4(I_pHrM~k4%x;E?o_cBYrc#(8Pp?9CNj}tM7=Dczq
zlI8`1e|mHbUH-6|Ox&YGyC&w*(A1M;?*3%b*dc;(j#^B~X`YCU8BZ-PW{~J&6GDD<
zQNstK=r!&&ox1unEnR+(QS+073<Xg-kmW#sNXX)<#ap;z=Ua$Aok!wqPjRI$EXapt
zAL$9VW>Piyh-gjiU|jpdX@a3J^NooW=BFbWx1El3=^|AwagG9(#6IVA4R6w+DLc95
zPwhmt?=|ygLoglrBcI&eD+zmq9TgR;G-@)+fs0y|EaZ19s7OI4Wv-agX`5dWnMKpc
zwCf@C;Rz}5vHwK`j1A`#A%Y@*IJ&D*mm{@7w5V@9!(<r|TSGbWZ|fd5|E(oIP^QNJ
zT`b41nthyoJ*>p%@KgDxBD(y4f}>}%;LI6Xr^rj|X!2XYkiY7!&sXm==G_hERR;1h
zyzY`8sNI>(uiw^z4$nvMt7{GTWxLIZvaKOr>7UN;-mlCL{~XQRiTx9tIGSv#tscKA
zX%erl%Ch@kMX(L!5`2f-c)se|9Ja_`mQjmV;^9;mhX2y%YgSF+WenExks=EGjn~uo
zAFfka|JZNBxwoG${;R`hH0kntbNle}DM@xk<5*r_*b^RYHk!9MBgvPoT7X(g68v1$
z<uBHa;@?Y}^7937^zX7stY)WWW%?HtmL8eTR<@e)%PLdoz|~M*WQj%Pk%f``gN<X@
zmRrl=Z{Y#{&oWD1LCT)Z-=@H~u2kW7m8tWi6knk0E}nm`B*zDxmf{=uv3$XYiM*Yw
z0lz<p6`V)E@!%#Ue!|@S?Bxq_f@k&s$do_eW9G^6KOP*Y<a%F`0$-8J3jx#kpWj3(
zvu^w){xK>%V_rzqMW><dEf+d3(iA=0Mfh<RdKl0iiE{56>5D9Je61Y}d-DA;tlN`v
ztw(W(wg)vjwuQPFHKEyrKH4--AJ2!ikaA@cy8C`NL^mklIrFnLzUBdJe|7}5KMH(%
z!+EGb!3;-zT>#%Utf!h&_EYQAL2O0Q0w_9nhqL>z3#aY>NyaLTqbm<8pil5_Z2Yno
zRYZ5<oS9Bom7&Mm%;D&X>y)Sq9EoqqiP)K5LC;(5!S;#8T$7I`xP_>rzvWWmr7wyR
z#<f(bvyFLLrhzSbCKxt!0C!>_E;zIbpJ>Xm$K$ox{h}$tKIc@Rt8Y*fCYpU(T8c?i
zp5mUz3@fEY$)HgxnYOb7i(`a5o3=gt8FQYSS8<8D=#<ebt0uEta5oP7`(ay^HJ$%9
z1jjVXp#6+m`gKk}*fzwXy22DZw`L}LEa@`Jwlu>)xg>kDFNlkgyMR*SU+Bofb?mWK
zI;ec%iM5tQr7%N##_8-l%HErEAI1fJ0wzgtK7O=fbGEL4b5_fs=EzN0zPSzPuq!*e
zG6yaWpM+y$o`6nI2Fy;r59=J<`G%gCVDV)ojO`=r4$D07+%Im!skg#jpS1uHQCz#v
zLTCz!gLU=7{^f*d@UkDlcQ-GBl*SflpAbfk>$Au)?Ns=g+X;4Qv6xccNUT=fgCzU=
zu%3Gig;uWxE}}LoQh1*2Hhc%;K2V$>)F`)>yhB=X6(SdG2b-ITFeL57M*MpVB2)b<
z6b8>hzRh09Ptl>tKjUG=y)-CQ>V}`)F))=00X;iA-uU5fR_cmyd(CLWDm@2iUbmvs
z)U^Yf6W7DJdOKdsX$8)ie;+@6ZiPvXQ`qCd%W$3hLKuk3f#rYJ!A=*JZE$c06WL;z
zKT%*zSYLt(weLY@v=5w!Udva-hQq7R<?w3CZjg*U%s#eIWB2^f#Yg7$aAapazMFW6
z=xUd+C4spR{bMTIyLu*Fe7Xq^Da)}Zj?AF<+(WR>+W?P^{lok%8K&39?hxwMlQiS)
zUThZ`#aHB&&}Y&kFtBA0*ZX)UwlTstPV*IAcws#nOb)~&Rg&=j=SSMrVNTU;M^Src
z2NGB}j=VlM9lso&g0e+DwD`+ksx+(uWAwXe{P)jPMd}o~*lvYMDo3bKT|CZD%O@%2
zCS+N=87kdAhAN%Iv_k1CEwuiFY~5Ul?p%N!WfQ@Ddk{uOOonHlw2|2!f{rhCq06Kj
zILUt_N^TItkBl-t+GT`?-<(73>;)*QD1jl{Tj=EILYj5>3(fd;kUJb{gR>THLefz}
z&V`HNzYG~-nUscKJQ_jl^B@i_Y$6ZtF2?li34C|BC5|ukhGffaICAH4wAi^5?UqgB
z2PfTw{3Z{4*%OA*o`jP;n?&y?BWE3b885nr;nxensA_(nzS?KV`YEYlZb~p>!EtmM
zIL6z^thXLKJ&i9iHo+4b0RHb!F}oYrQJQwVd``)Hw0|~0w-pO)lAGU|4Vzcc(W|%M
znU5;O{Av{4|8FB0)UTtXE|^mX*<5P!^*x<rB0}FfWzfy`*J#m|iR4|xb)vsO5yd~g
z;^s{opuKZ!$d4^2Q2om$lqXU2((WN@cxM@Ux?d1#vUTYGXd7-GswYir=F^h}W~h5t
z5?!)KK+~DO^!}wP`mECyV}#ukm>R~Z>8}@dVvUIEkppxcIKs?$MV$Uw6b7>2S{JC!
z#NW;4=#eoWly2M=m`1I1gQYL^UG9nNW`2dZ{!O^Rb1hCE7=<;IqgQmf3O_R~&}sD~
zf3C#CSg+oSqpJ;wc1{k>aJmh$_muFT;}$qlF&?so{YVcxNl+2$xq{yxxcI^h8X8l<
zEf%;PlOOp(ZG$FsZi}R2i%l{5*<ZTj`w#2du?3vImIXvPxiBuJ)l{z}hWxiu5+ls>
zAvk|NX`Zl!`o%7RO^ShZUBDtR_-TyuuXNJEnhY{C;3-{Ye1_3#C?k1ZvP8LPGHu<o
zmPovPM%I4fi2tT^=HcErZdZo9unR3t?Zyt%r|L4)=67*LM8r)-Av21oR`oHyqmVXz
z1sa;tPk(RmBXS=HsaBQ1?sj-VZv4GZx&sf=>ZMDm&9h?b^d;g@^kJCk+<(=2!@?x8
zsa_8K6TCT-f$<!(<qlQW5Ikhh{kYaShQ$8Vd?IM*Nw$a?@pOuz*-`7s)1Ee(<?YFw
z4#}lm-Rfj*$}cWJQW5-rOOOH6^IVEk4vG4+iWZr@C3|{rk>`Wrn7(Qb897M|Z6r-N
zaq^574?m*0m13B8zm>6Th@-6zf=8etj@VECMJ*rwqxOz#NJ{ZLqHH7!wl@w_x1K9x
z!<G@;(!L?aXTb}StrSYc8ZQag5<*rqmkJq-NlaIVI0iW!Esslcq8UdjsfwKi8E_pT
z_~*{gE)p^oK4r@Z5iOyPyG3BBpBq!O-kkd|ErMn@-=iw)Z*%TKAJON-A@W`~owN@Z
zGQ-OjmM5Fbk+CAH=#JhjdZ<&f;+?B3otZulHD-&Gs0Fh~k4p_B=B9|obEYEu=?o1x
zTSli{vBwK_HRT6BJf=65esNLlwR92nCqaV_74Ey2!mVrhoY{wSoU67gDoh)uV)7S=
z%(_T=f4nF4&X`BjWHjkFbxSVej2%j6+d$CuvI^Z9FKJJ^H>2UcfzD76$Lc9zoYcfS
zBr|gsoef!}#a-~C9S<a@_YIbPSo)RjP1?i!nj1<La+I*?t_tlPwTv40ZK3;quOXu&
zVyQv#8sc6zowR*6qu(wFtgP%eByi(#qS}1hx|;82=G|>&YAbdM`GVinG2s>Ij*|o>
zqeWmGDvevl^pHCH$MpACS@?I^6ptSiLG}6LQ0>Gp-F0*xcXx>!{bsIAHRackH|{6s
zmyHBhKFa5^!&W2P-(YR>R+E&3yOypTzDR?V|1k%*3S7<vG45i<C93^E749oL(~tx4
zBxzv~`6%?z#1}>KKi8ST&JUBw!hIJ=k9j$3Cm{#b^^>UH!ZfZc-x1`Nk7Em*BdA!Q
zFLXbeS#izG4fNknhW2~oDle~0C&$)kL-)i2^7i5^E`72&dw--mQxoifI`-T7+3Q!9
zKV>ao!(vLb556WT`%d#u<T|*ts`2pRZwrl-dxD>rM3c!@V?cBN3*zo_l5BNQWQv74
z%2~3Gxtk&fZ{F``#l6dU)dNAW@RbqFO*MqGm1~Il&3&+Tga#bzk_C|+zj(`EZ<tfx
zFLSGot$`oQSAgEqIMziijkr#l$5s1FTW`Fi4K*ftOrXL8!d{hOJ3ilrF%~`$bF_ui
zoMFcQS$v3fyLO0|es&2qrrji!w|2AB226R+N*CgM-5fZ<b6Y#b1xBt_g4f2Pu=eLU
zxR!4NzaL#EWp5o}F=qs?cLu?Cm$iJRc?6jtbCP`Ay`H_}p}`jJ_GFviH<9nLJtQyw
z7jIKu&$x{cSh<}+Op!(`9~4(dlKo;y@P03%cm4~baAP`AYh5@?^jqz$lAve8J2;B^
zc;2VJ;&#M;d^iS$Pk>q0s*tnwD47slKnI_uPzfs~Hu78}xh?I*$jt5Me~N^YuOTJ)
zPfUWbxvWN>*VKah=Tw^Sm_|NW$$@R6I{j5?D=<)BQS;tVYt~zgz4&fAZjMRh&U6{W
zhBqZd)~bP|%n=3Nw2_Y4m_^6lm`*!e1ZKCwI==gx@IDGN0(tc@V4qpWzb+3ZQhV#E
z#u;-ed-@?CKY0Sv6_w33s4ukse%_DTmzwccPsfm5JLI_=Zu>YXZ7H(3Pnb1~5rVP_
zL3}*@kJN_R%+3r_gHFd-T%{X?rRt{e&({@eOtXlEiU)6F@QA-Et4F59U*nEfOK{#E
z8RSQ@4AuYd1%!<fP!`S?=-$?|<iW0bQnPzA_crk&$*xa8uQ!*-i+ct1ZIm1u&5Y+e
zj7}iidD+_f$aC^B;3DFYB2q0CinB*9u+Y-WL%qEBbjcGjlB;LVPH2vS+iVpaUVQ{!
zZIfYFtZ4;XjV9ogZ-K_Yr(l*b3#N=}gERBq!P5OnaMtP}NZiR6xOF##JZU(5Ig<gC
zS1e*rT)hp?!?M^V?genEUy7?wJI*~maUWuYI@Q9d4g7XCK+uUlaLC>pTn<J<{nQ2_
zcix8DnioLoTPe(JZJ<SCl*z8fM(F*t58Q6|LY03891B_i>#MuLwEP|nd9K7cX5PRQ
zy6}(1P61xaf-Tz3khH7~_DWq5cofrUNY)17KFEX;=Tc~X{|QQDGvNBOB1ktp4}OlH
z!A~?4KNfVr8JjamU+qAhuPLzTRtTNBWErlwVZxtUlZ+m}wxN|oRe5ei0;t)%gMIN$
z^zo&8@L%l*u(YfO<@`kEadJEx7I*`0$zO)U<txFf{0byDo`tVv6`<;K5&kTU1M!TB
z=o@d$hhI3vPH$6U9b?iV<-`{Vic=;pGhRSV?rnG!nFDo!a%d&|-PEgt9G&Y8qxGgR
zO>rV{O7{yW=JMRyF{+Gc>@;FA<~aE`Ck%cz){~>!*NLmG5=q&;5nj6~RD^6(X6wCn
zkjCGIWcrhSY!c>lMe^n7XxBw_4xXm70|-{VPUMQ7Zy>Ap%oaG8V_^8zW}J0?H+j^)
z5Jr|q^L?%fuqEg?NtV{8E8O#`w%#);`n!UJJzWPHi;PHh-U2vRlg5XKMA0JWg`B;w
zHM4Kv6p0XD2IFLB!dH<Muy<-cN&Msup^^fZRH2o=&e=zn*R^u{U^d@$@GFtd7qS?i
zZOG(<f4H*P0&;883F<R3jaZXU#Ba!hNtzi-)^VvA?k~Zf&G(02hZJ}V31yr$<~<5>
zPn_~}JEm@lCr2;lkvE5(q3P*92s<Om6&@G*%H?~xs*Uncp{fJ@!Sjgx_Z5)846(E6
zEjQEC66UVk%)18(J|J=i!?Ws9W5z>Lt#B2_+c(n%>+hn(fgP~;NFcQB6~*~=3D)_M
zi_zF66x2Pg!-S#j;H|!vZ|<xl)#trnZAA|$o0<pn54-XkW?I8x@dSub$pF*mE1@ad
z4N{LLV5Z+O*yqi|*8}p{_Uaf+@th7_MS6leMOnxk?12;*1Fe7aNVW1LF#4nl<<1w-
zd3-+1_ppFlp~s=l@I5%!g@Nq&P?+FLsnPx0WTD`fYxqgvlw=5G#Hz!MY4<>^rWssE
zZ3oe<+R!^v7sM~Wfj|Q}SR=k1@^+WPQ}wNI+5ZPo?%qzHeNcs`X4@gn$CzKaP!;x+
zKPKlwmcX*UL-3=?6Xt2^!lK^KFzV9-xE51SZ@v{gHtTe8M7SeN`sNRGYZJ<){N#*3
zN1^4U-FWtiBh+^traK<~By|`2iK;PynmkFi@=Q2{MxTIt-Z$Ywxid8U$b#433t{r_
z+2qM*12Ehk25Fvd81b(k5?ZBE`%M_17<mI$TiLTGg3IV*@97xTd<DLB1@QCTOX#O}
z!g~g8!KHiV5My_XSey-oIJsHWH}oHgb36bY-{(VCfC9-=PQdvyV+B~}Hb~lf0wxO#
zj0Uxj<U938wWw{d^tm!Ts`KEjx{krm#owUZ?J_AJcEi210x-_Ll$qZ$f$<agHyZKZ
z$bs6A@Y&@Fu^h-D%T^D=@2{a4a3zy`^bl(KFYjsb4RQQrXaR$RO5iv$mK@nPKtd~>
z0BzUc=i1%yE?OSbq!QtUwj94E#}U-#r;<9kHJ~r-)x_J6gO=}|G<u~B=+}9I>r7wk
z49*SMCC0F<ES@-_GAN9n3i9I*qRY%%VCnRpF+5OCKgzd~wA7o-Qlr_Z1<qWL^m_dF
z^cXGf4FUJGy?ELuA4jg(LhF`|CU19KCo8rOF;jY^@$vIKkSfz61Mk{Mo<{<RUR(w5
z1P;)WCN*}W*f{VPvdDQKL|K&_2NcAsFhyh@@6~&ne7A^Z9E?8F#rI<{u6w88*M3I2
z6lCF6^KpXRX}I<GSP-3?MGk-bORgc|SEvMm&ewk722Ek6-m-;1bHZ2?;rumY<XpH`
zaTultIFr}|!`9FGu9HC14KP{E2R3}jWG89QVQfns*h6<yi2D%<D&y7<Qye|u<+w-?
zZCXJ-hS-DG=c&B&JWuF#{Y2(&d_!h?$78I{SQzExBCxjVNzv~<Zj@XIyizg%@y3-f
zt0n;WEz79ZyjXVm>+japANr}{kR8<DD<c1#bzth7gHUX~7IMlGuriWBuhuG3;IJQl
zdrQDd(PU!v*#}NPea!3{a01Om(_!^BMUXuxkH>Z@uvUW8OLtQ$J=2m}v3TS$VfRyy
zUw%CU#E+}9|EV?7<lwV>x3Mm(8(4~ifo0??Zw!sbL11vcmsEax0#^j*`L<6Q;4{|}
zF2)!^c&ZN(&ueAUX+E4;7XqmxBjNNw9!A_YBi5lZ?DrFPjA^UDy-s+6DWCRP5B2UM
z-cEZVYQH}}(m0UOpP~YX`aG#>R_JUQLw#C#Sc6(ux8bWnEk;?W33D`Dp<2C>+v)q0
z7#bgE4$n;^*URn_>N~1@QQ3HA^7uEn(RdO_uWKivGe)vw#kEPJz~xOHlgK@_evSTX
z^0|Ot(L`nUTavl!MupvcYna|_iPu-zQddh8Zb6_oX;!~MHpN)N_Y5nT`soR$5<fu7
z?+VOT<z^DJr+|}@Jwy`5KSG1(H2zzyD3~}WGnf0!!0VI&%(%3fjMz03W?yxt`-3y6
z_+SW$H_E5;wEJeaP8bJeJ+I*Lj%iG6=PEAZ^AxbT<<7;FO_|M}dQIlP&&HNa!41`)
zMDka@CN1rvU`O6i-Sry$JDbZ?ag+$?zlx>r-A_{awi=WOdQ7e+ogjZaeQ1DQGO4*b
z2^;1;M%$D#L}Hy0_gtrt_z8)z8_8?o@=Xy|xpNI;dSE<_&rZeKg=1k}_zik3Uya#j
ze1o$th$1o%qp7h)BG{VoOhNTVV)Z+W>gH%d)XEKP?!BWB7*GYSgCS7j|A@nUEr|1~
zg!1qf(AgIYFGsPotmY;tnAd=hY!cjDbr%#%g5axP9ux?<jtMa(P+@<BFH)Qh>c7r{
z{>8PBdsCcNW*hOPKdysQ+;-T!fxwG~XuzX$*$3yJf|7|fl!@NNCJ{-J*qRAxn|4CF
zpATw9U+0`o-Gmf@k>#}nz`dsqjKWk{H@ElTFu;Ikeg?k0eH&75A7hH!Bfu++gU-F~
z;4JX14yCS#>Kp&aMZwK6XKo5ap$#8wqy^(5kHXujwIFA63<MxMs45G6oH7OUJHujR
zLj#s9vxU*lgLLUFd*b_|3)@W#vDM)`4jnrS^F>wI<$tF`i|=`yW5a>u?F1N7(}uZI
z8hIt%<?PJ;Cm?`^gL$eOm|a*8O82AT{Q6M#uxB2K=9z+m=WnQ*R|rKDkAkgZIRE;0
zI4s{ML$X@jm`k&s@C|o7;Pke+P`B|2&A9+@Wc(stRnZk*up_|f%Mx<6kma9?zF_+H
z=~H<pNyZ}n7TLdj6$Tdg5={$2#70a7)wyRY*0T%f`>+9~(aH!eh<xVS!k@EdmK|i`
z_IUUzW(Uv5|1DpZkODdT^NFRKD1G$j74f;T0wSi%5cR+6U_E03#KbMf#Qt;ma)%vE
zNxMq%PzQCsSW1;7<4Ebak0h=4h4r#l5zfEUoBcLe#;eKekTW9ZIpqhlpnKIt#^v99
zV8m3(nVuHn42Sv1N9(D6Sr<3C#ReW$-e!)@HRoR~mm+y{oM2$vE5>MZDmnPufgb)=
zMRKm5C8Jjfee-nl3eQ7}Dtw)G;_iHNenoi^pEg>IU1@l_!X+;RDqg;W-rY)2b>|eb
z5|`6?GS;9~ziM{Jh{*DZIuAgkFp-2Rt`K?-dl{p}BZ#$E3pw}jC(IpVK;BMpp&>DO
z_-s=wEb^O49GelR1hQo1t9;fW{v_#L^dH7OizFojBdLwIJ~_Pa2bo#)7)-;LP_BD`
zSij>5^Ccb(j8*W$s4tB7vJu>=a#J#Ivkr}ny~a764Wi2v-;m^ZCHiRDN&3jfo!`3k
z2}zUNL<0lv@-trf(2D`nnG5<KXt}>Q{*V`8Q(G!47LL12!o7DechtLy&rC6V;UZ03
zHYreVsYLYuX36fqsm;9FsUz&=ClZD6?sU4RE_rEmpT_qG5r<P+)YAM5ec74=bCVx1
zVrhM7Fk=gellhO&P54o!C3r&{YDKs`tz-G28-oIus+`L67rDvL8yTz0<K$j~K0TvR
zPS!d)kxsoh=AYI_`u2(ipZ7NvpLpNIF5N-uQa_Vc_7tLxWh3@J@M7yXZzGOnl4M~0
z2-Z4$3V%o{3*FYw1a*3bzBlf!cxv2C#_9XvFVk%Dy1a`pCBKMe;TLi!Vvt_ZK2TxV
zRZ2749+9SxLbf1D84v9&!bh7X&GLNy0&X1=SP3a!yp|tt{ae^Kp7DmKE4=jhtC^xO
zZ$cx{-|G!;1|HE`^EPTcGnI;d8s=@<Ur?#0YDV#JKV^O_C0$M5(dSwkvAB4pLVFt}
zr*wvCa^N{8e0MP~xqAnd_b8@DXRgxkeJANorzXyA@fA92WFD=3kjD9pSVWRElDN0`
zo2gdodoF&{AO2TBA6>F)C0E|EgVMn&{5~RxzEx2sZ64at_g54iSf<mIv$vU_Im@Yp
zsTZm1468H^tLA<#zAEhf3H<2HGUk`fIoNjl8p-(nkC`23$CRnQX7qECN!(T`ruLx}
zWAtAkag1029V?Ldnw?~Nq=y;Q)xi8gebRHn0sN1?Lz#^ta5+$)*_!H3%baXje%p7P
zq}srTgChItk^^ZNf0s8*e1Nkp5A#P}DuH;JG2=C98OJZWL%m!g;B256zkKXOZr`MO
z;<-T%R||8umqQo1@NWWVyKfQw)io1`b}wSp(!+2z*Nk685Uv~wp_}^N;NHxUbR~0y
zt*9G?*Cvl-E92xV&iylF-c)&@aqlqkuqr36b0-l~H907ob&YzQSAgy73u&h7ddBvc
z7UbmyaW%o;$Y8);V(EHTsQXUPvIU)x{r3``H#MJLnRbOH2UySxdS6k-(2x4%dGJrJ
z1;W_J;?Vt6iwiuxpT^z!BJ8V$;^T4$BCBCQ2NJR{Vy7aEx*o(SMKsX0+jx>$(@r~+
zC)3<Jd1SUkA?N?`A&s17g0j^of#5AJ`PeMzQXdJXH4)H0)IpjuH<7=qCsQ-K?cf``
zoXG6F#MEw1CX#WH-0E{}z^99Y5!paPb~lqnyflp5G#>h07!v){hj!SUCD|3yu<EWc
z6Lak*#mbwIp{fWovW}AWzek}#Q8*{N=Yjchp^h(=ArSC~J9B#h?eTfU+4nm@PLLG8
z4b<S{MKxIVppVRO9}j~b_o?w36-F>55;wt9|1#1G#zq%$<6G^?igVJmVzUy=T`-oP
zbN(~AVkYEDUW5>{-ixqiV<fFhqs%v5d%kR0G5P#Tm3*C@MRsl!V;8QrgPRRci0_tT
zkd<)?B&C2kA}@I1Yhr*qpAHwo{lT~|9fX89Jil<C)VO-VeIF?{aqDiPQgaOMz3wOe
z@_E=L^oe>}iiq}$I8YKin6`~Eko7wU4w4?S<X${vyIr7*_WXpR>F<a~^dKo(*GN|h
zoP+Y3K&TlM=4|`_5c4aGA>djWY*tYrX$NYEdAl<CCY-zna{rOrmbzrCbQ*Ys19&XG
zjB~o@!ljYwB>bQ<#73Qgy=)XUy_^AG(<Z{ME9Srq+k)QH1hA8S1R+P~fk{;jD86qX
zbA;z5DokL*6&Jw3v;Yz!ejJvaFCg9Pw&VBcJJ9tr3of-~6K9d5P<>oRxL<`rL&AVv
zJSG!o-VTGl5P{)PkxTaWEQ0md+R2fsM${R{6U&d|!9RE}iP<<06Zsh=PEMcI%bX$P
zzIFMxBhQe_Memu_cPD_!=rY0KDUXJp=V9oz9-iAiMCW8Zq^t5=(e&IKpshMs`q_(q
z@p(ZPo)1OMUz_>sNy#WZWdvS5tAHZkrlHQ9cyw3I2gL`fbfb|xF3geTb1XOE&&R*9
z?zA%=n=G(smS~amRnjOfeiGl)Se)hkkgC5H+(a2CaFntm_OG7G&hyh`mDsnmV}BCL
z1<e7gXNR~O#%<KGt%QEzHlw#NJ5P}fz=?MZdCU-4=X)dIKmYM)tTYHo(#qI8Z8A*}
zdi7b{W3qH80=*<l$;~cn`tyqyuDH64_s?zRvV$sw+9gcLByU7p;!MwdJPn;va(o8+
z1UA^ogJHY_?)-BT9;He0?;jt<`0J)@_OWts&wNLczFXmw@+>T#oCy7kwP|g72;S<p
zqkbxKm?D!+9;`ZqUOlE*=PgcEzK7w`71}sP?=x&)a}wWPiNU6`NWN|y0YNpFap<-L
zzjfRQ%J<*r50)De!<W4ve#ITvF&?lo=s0!SlgH`%w&JR+^B|xHi1E=GF!TDf3S}-C
z3@3ddzC~xqlY$_yG@C*un|vep2Q$F!R{#m>9}7NmN#I<cjQ({(->%0%U{fvRdqTtD
z;gws|d%zbev*zG}a7|YIs~Qw{3Y_caIB>0s2C2emK5ADE$c^=YHl2z539o}}<2)O(
zWUM5}eI1M02AbrcMi?|ZNJHiWfH{|sgNf!G5L5C6%dfA<ni<v9@oPE+n9X48a$XBw
zFCCCmi-yT+bBR%@5~%JKf%@+IMAST!jGJv!Iconx=$st`H%GH@J8cwnsyc(?oU^!p
zgasRXX9;WSaRK}8M8M?IBxcL^`-Hdl!^ag<*>i{V@kH-AbodfRBp>&3t2MXLKMKik
zYhWk%cV8u6CRp<76D3&}>AjG=G*{s4?jQrv1;lpO7ARK8g$bil$e*smu*P-3+Gx4~
zZ~NjbRX_cc*S0l)sXsPgwYWZ=@_9Q(+;xF5PSLPwzc!6~dlmvNAE1iyE!e7BfR#I+
z;PxYhIDPmXde&XV`%_P%toUhsM8>kd7u!&k?MAWlf6!Hb824SzLdP8rtj!y7exs}m
zf8u94K5`e~QzxI~z3#^%dsG1ps*mFZw|dMhyo?9_E5~ga4{)s32E4Q6C^`wx*HI5U
z-03y|E#;NiF}Dr#5*md(z+GDZbO5hU>qVzK?=i1dl5YzwhPGN;wkG>1inSHMm2<K9
z!7Lq?H)Z45)*8IhMbSNR669}vk2co=tXDEm(05K3-o6$MGWi48d#enepRdBD!*_7&
z?Q0kl-2$TToA|O;XH+*j2Z>e@*s(AdI@H?HOrZ_Z0w&U<^X|dC7vov)Np0|B(owkj
ztdO2we+E~4Nx;pT2{?69F2+}i@cJF+@peN2eiX?;6{qu9@aF|?AMr!T*WW?%vl3U+
zebD+~h}@k&fz8Vs&xZH+;K6ifcx7Ho)<pb=7M@~!oL>zS8jwvDeI$7QyS8lekO{tm
z<J{8uO1N)*JxmK7OX`9HIa*!Lxs1NYXit=594AU*dy6A(Uoeh}H%-Un6frz`B@S;4
zico`98r;gF0aWt1OBX3;Lvj2L-1qAo+j6}c6sH+-M-ud~A!Q*N?~z5`M2C;opI*@|
zc(#ki?S)Tg4xwRR63z5%qz)H-;ataETDl;Y-jsDiou1_=MQ34-a}@vgVHW<a(ZGT`
zI?!-~Qp2uBYJN6@dTvvrRqv#5dGly~#p~5rez1gSMn0o1VP_~gu8e*y35>yMp2jUq
zqv1OoFeT?YNAmko5VgZhu^{kRM_`9kDoyt22A^BAS*c@hu*_i=%ov_Wyq&9rGjBCi
zSO$aj2|eMtT|u=zNO1jAo4ASU(L{V}o4^6q=i<KEV^gj-dOh#t#y@VRu3sbgLl5n+
ze)t%(cuhH#I9N{RTdttHJ&uEhUBT=(dpfu}y=UAu>ml6Qe&1RmU4qx$Ad504t^$Wl
zhi&Yh4mu%!NzaFQ@FDFw5kI{dPBgf{zH1)vH0V4T+OieY1`}bHl?t>3DnN?gTVi)X
z1_BdQSeuV~;77v^GXD5mUiHdHvaRtJRKzF%bK?O~vaiI9PktnTQGw5PlfZV#NAfXH
zU|cRW1&6BDWc`^@vrdc_=S|v#-~D(#csMM_E|qE;k!1%hb0cAEkTN6>+rWR4#gOD-
zh!d~*Lf*DR*wr0Ktd=E%`3+U@y%!5t7V5+C>%TxMYNpUz9wu6z^T6kE1gt#xmYS>*
zg9U!M<V(Rb(y%K63Uvf7W%^T;r?+vk?p4g(r-S0cwGGr(k?pI-Vdx}T-fVm{SS{f3
z;_1!&%%sZ@RH4B3GtD&O(^9ZGtpQivWnj&dRETPC0exLD$k~wu4Xb9r{NX6LF);&1
zZ)pdM<V&RH!Xc7;c@@@IwK2;)LU7Ue4JiKJkC=GbgGH|bT~yg)UGOvz3?(A*{<13a
zxcDR)`x?MyT@>CPPRGplP(0<}%8EAUQ^$3Ou_w}kS|#14A-0L^s6Y7vV?+g~>+PgL
z=EbzfJRX0Wgz+ZAS#9=Lz^R`O@>>>(q4muT{3xS!m_Fq#vqpUsJ8o$iRn|(P(<kN9
zCzi3ereB<W8tF$(k49p-!#`n{F98~K4`O2Pcd8~f1SZGrx%{iCcyGBaI{uMIzoH!4
zKKT%dlH7$}+l^2n5lOe#QQYlc1?RJjFhs-|rKXKT(Y;$yDsn1nFMUh5%RZ!zxeM^U
zM?8L<nS=`FA=KBz5KV%eam=0=bX%McL-Q*6*roMwV~-Pz-g*M{Jq`GjGq<tjogN#U
z-3%sbHP9$FfjzuLjY<_O@tg1cgp8Lxv|+XvS~2msXO$xh3sQ;b#1u?4_(#W$*Tk?Z
z1+-r!jy`YEW3Oq&;Ptd%d=x5OIV)f}Z>u>DdY1?pw>w*~EL4Lv8E)Z<3lFhvn$7f)
zq$)o!c!m-GS4I5{?VwwCE_^#6)RVKUVCA?zxUr&y=<BV4rNO7jx5v{U-6NDe(K8JS
zFUxYj+?-&jq?%bHn@vo*f1|yzDz`}227-6-@FOq=+^=2XP0c7%V<eoxvkr5=I^U5C
z&SM47xGAW;9|hr_yRF}0B{Tk*61=S&&lWr=<h4^5L94PnKxq)8HAfL|f+~0)Sq7Oc
zJ!EC&FjL|$h4)<zkiV)7X=MUy{oOq1D{sYb(%Zqo_8=TEO=so~dh^ERpGjQfJ8szf
zKI6DK2EImJVV$+5VE^L(z_)TaVeW~;MB%(+8)_!Z3Tjb#@jtwoECP2$PLT2o?!0}c
zz}u`E2~m?fczZ=ry4hNVFP~J#?>#@7A46*^>i0*&nAi76$JjE)Iw%ILbiOlXw*RB(
zyyLn0zc@}-HmQu#LR6x%x$kpTl*)?8O3~1iv`~>vR+*7XkrEnY#C@Nuh0q{MODTQZ
zlm_kJ{r$@yp9i1M=Y8MjoY#3iZ<6dl71B`S4G|$;r1y3JjDJVario*=$nOO8D&vL}
zLqp!yVJ`bzte!pocs5in=|kTeUF5imwlFJ=1=X#Ayxg8=bS}ihu5CByv(w2aIU@+a
zuA7KgFX^+JNFnOo@kdneqIX*iaglxovv%4JZ1`M^zivIFp$}eB`YIADtu&xhCL0w!
zXCf_W<F~(^iv9zg0(a6+xC<9^$69S!Ev*7<T^5ZscYTnXatb%UXa+^!R;tk*ikB<J
z+55|s`MrBZuuwS*XLi&O^ZC`xRtpRKQg@CX>5ar$O_}uEu7}`$Y(GOzXQ7&D3%PD_
z6uU)JAg<F6HKdgBJ$qm9t@eV0Qw)8(J&8tChvOcJ6ZrUv6FaKz6qc?C!$}p9s3mQR
zJN77}wuvE$DSJpe8;rpHK@x1$b3?CT7yjwiYp{H^D}V5h5o7LohxCa7f5>w#SskCl
z*9*PAq+i!*w{;e6dSilJLC&;6$fn5*72~=+x!7O-k1kad7_?V}yZxGHB<I~}{E^j2
zH=JF~4jFubd(wBAIGtta{C5w|ADqul5~(B$-tMA*UACjHsym<V`;_kV%|^F_+Wf*N
z$7sr(n{?C2(_GQ^i$q(wlpYwZOZ`5T(iIaT$Y-XS>R$UnyARdUxQ6#+;8r1>cKtQC
z<L3naYsx*Vib|wQ?%DI#)YPEe|0pkA)J@)risI_+m(evLlBs&%O#i#CNt8Qwkp$0o
zOl`tya&q4hl5lTS!!!At5OXY<zA$~uUC-J}Z9a981zG{zI>m*|@|lzAbxH-vlOGxX
zawOl_6GLu}w4_O2n@QKHbh`W71ZtFSFFd>7kYNv=DAIIdUDZrf!+ubM!<l5@TM=J6
z{F9t`eTS9=ZlgvE8@XjFr^xR0ifHn_0{88jOx<DvA#I;I)lGYk8-68{W8u~KbKGBQ
zdhG!9G{~cAyM##Ik+GmrFb#UIHql+3g+#YrgOfVy!yb!!0)>&E=+lFJ<d1SFDasea
znH$PTt`3h14&B@X_6B{p)ew>_PjZqeBf04X2K=^(BVl}7HlO`*8|jJC=NBnVC&l~y
zN$$zfT<IrG;<@-my?ePbMz(6uXD7@t;(8;h=WEiwN&}SMq)LXCeXqyN2x43HkR~15
z&IB(vz-dC}UvmX;pB~?YyKm*G7avQd?yHftqgP_!`=wa%z#7YobTL-PhCEVqVVnj(
zaj!&N>AoQoT)TEMn2fPSxt%+#Y!8VyJlOXIQYZJ*l#AzR)|6DZuv`SUs|H}vld1Lo
zdBTkU^%GhlItHedX_B8?r{jj6YjD@dkcK>KA^2w&7HX}*#|yUMimfS_*r~;p`6bi8
zPgbLOLm~Y#w*`l-duc#SEMw(63McnWz`|cvG_&9}?6RxHubWrHA)g9ZlXIG`{XCLB
zTeJ@o$BD4U^Ie$(FDl^jjzwhbpd(R^dIg!Ug`Kh10`AbAOY}{&IR?l0<BS(xFeCRK
zm0Ug#t(U%Jp4;Tn6T#|aldl`NKli7TcKu?+3gfZnWCwY3DUM{T#?VnE>n+dTm4UF+
zcI3;I3y|zH2K5U%!BpU)uWI;3yPBWkcIRx^<|PMzHFCkq_#$R$Btn6=@Z6sg4Qu{8
zL$r<isq36=pi<!o)7q5bg!XaB+cFPyDs6eC(>@U7s|ISasaWph0YRDz*n`;?kQ1ap
z{@fqHF;9zNOy_lh^D+)#p%`30JcqnjTn!!rN65hiXZZN9b`ZOF9&8)A2m<=_G33I0
z@=SPbKKLkH$*3SAjtTJSTP<yOSP7*fZ%LlsN7|Y(0&Ms{+!RS;7#>mtn{WQ`sH~fx
zv2GJw?-m!_DKU_B@f?)=7TA|z49J+fz=nPgsJ^ZaL+R7NV~H~=<0|$;P8D@p8$+!+
zcY=E!@G3T=*@?4f@Rm<wSUICZoRU=t`nL2_kq$j}|B*3t(TgD>t$dzzF&9YN(sWjQ
zh7mO9m_hMi4rpi}g_q*S@Z`m5c(3#swKFHfv|mYZH_u$)n#=R|W6f!HNhoVla~O0l
z35pc$KKelK2s_5f5B7*$;(cE|Cj;|z;k<=A=qaBQcBXPjq}U{!`7#OZzt(fFw;du1
z4(sUbPd}+yL84XD`fSQtvpDHY4^7yRXEk}jC|oQuf-RAmf~{}bxuzqJ;dQ_)I^~`Y
z`|;U9GFQKjO;ncv<4r1<oo`CR<(p`1VzcF+ID6RiJ&xQh2qI%E4#A8eNj~*bJ4M-X
z*nATp@2?m=o^^<Z-7+D~7CWex{Y(1vnK?T`ZUMLm{e*4a0q{yeg7l7$#?q{F<aK2h
zX=qo(xQnS^S~-P8E_qJkl|RwD(SIo_=&;4Y9nG-g2bFnuj#j-hL2_sThKxxeS)&K>
zO}hlne<Vllu56~7J%}14S(xQI@F_Ee9pLw_@O9M{fwQ|2R!@D%rB>}H)5qzNL8TAO
z8YL5Y@F(y`0!slbx6+UGpJ}{0gHMwssq1?SoIl5byRtl(X<w^}c`{lw_U}bHCg?Mq
zRk48|uWUKD;5#^qNhX)oB-pqzGpm9SE%3j3pNyQFNGn`7utRM!%urgpz`01qw1q0T
zICnaJJmiLkQv|N-W{vt2MN3)b5r<H1&O&rLVvC+$k(d|wgzHng4(+dEX?493ExDFK
zuP=xC2gaS?Cou&LLu>I@U<|+CO^ogzeb&lvNe|7Lahoo`T8~A$BXFt3H+tExmH%TF
zAP_qC<CrUeSHcF!toAa}XC`D`R$iyQ^?K-N9D^sH&SEuPz2N=<MeItC!ZX$u7$<4~
zAC*?&ucb%16?Uz3nD)Y8z+&ulm4%)JDNGaHjk==qVZ)PfJV58rz@{R4Bs-qU#RcK>
z?~gFw;x%3kw!n{@W8ips9NNy-U<b0?*^PHa*{E-Sz#&tGXFrCqMZ&#FC;A*bTJR5d
ztck+ddv3Hm<ss#y31mBrXCp*^*8iQTMVC#tps_yT7_?zN71^N5)=crYyf;M+`(=CK
z+|nG@?QSg;ewP7@C4=?jM*o0WDM|F6`9V@Y`v_e;eFl5?8gf(Lp5-=u(u8k2P3w))
z<7jGqxz)eve=uSB5h|Jeo4N_EKilNi`u5u@4L4FvNv7j-8uy@`PXE+GI?X@PPkOJc
zqJ_TszjvBwn|%S6DW`I|mX+XZ`;GCpLq6%11E=&rR(O`UQ!2BM2^yot?Zxr<=i(tM
zGW3bcO{igBmz9HCRz7h!EX5337J<l|3w3)kx|v`jSGxX)HfakgprTT(80oWybv}NJ
zir$UH)z7QQ<0~#WSRjL1A?n<;9dX>SzAF3p-DfK2E{*Z>hWO8Y4CPjeG`PNfPxfZ^
z(t&HG+@sqJRqeY@E<D^wdc017X}2pc)e%W@gzu`!(^A@N{S1|xh2CB7Z9F4x0ozI)
z+5al4IhDBy;OFTCw<1Q8`G+L1@>4#MR67F}cPs^8b!EMEzBzuB^J84D1(R|6jcLuu
zw?K}_GHxpfZmb^P&{8p$T{!*#*_S%R7fRiu>vnGhxmg0M@W>4Qu6#B<_4WvL{?o|F
zEK|hVqDqoCbE}p5c!6WCs0mSVZv2H6dStt2nZS8mLtp5=Cfi;<0+ktN<n7U;G+Ap4
z5mU6Y@a}J+-3LFD9lzV@vHb}&Qo@S2R*#@Z8{_z%q*7Y4!5cT!S)gM3dAdjH28mqV
zMi%T5-p{H+{(i|b#$)eo^3ArE{E|Kap{M1@C~D0yWAv~f{RDGv^Lr|8af-$Wv%a2_
zYe~+38ob<7OZMcoRrIQRG`+0mNd9%J@IKRvxeVz#`karZ<mUye`}|olY<z>pG_I^4
z`LLV@{2IeMooyxgYHO+e>IiCZ^(|+ST1v&<3cDf>er$`<2tK*|8`m0jl0^K~WKF`&
z`J+k;(J*y4t8~v7+TAy>=Do*p-;ZSUQ&~nnA2cBue($X|%LUMhjqz;85nylsEw4BE
zCkKto)2VIkZ&FrK#eCD4z^;FIoqk-JMC%71^F}#g@cl#ybSOT+%EW#v7m~`qGi)Tw
z6P~cSD+cMo78yQ-Tw#1qEu}|{7%Z(CkGK2&()@uW?Dd<^%C1VKe%6VYXHh{PXMCrI
z&2|kBm$3Nf<tRF67>DB7W@xoZ;ES8-G?c8p#l1-P!_!h(5b~~twszX^yW~6R`oYZ&
z8B52q(gzH1Zl^6BT&Ia<{Rc5fVKhJMswgg~5qf#vhOFVmy+owxFa=jVw0N38?h09c
z&D=2BvPI|xnETLyH<9RQW&s~=&2Fe&_LX)PS@7{io3XNR4R2TFf}d7Drx8M5OMcXO
zzR^+@1NjE>=944JX?S2nPza7X{+*`fgrL5OA%5%;{-(?^+-LjWoW+X<7RJ7W^4=&!
zdq3P1y`9~D4e_i+I&Zzal~2sg!#{<-=yn{Squ8v$Fg%?|NAAb_hm$e1^)&bDVlDst
zR}t&=MI3+SeV}^{EU5eseT*=gjH%JZ?5M(a>iX_G4f?zlF7?e|MK2pSz&PWE3r^?h
z3p8MZHkL55{X6-M`_6Ii4PwB~t&(|KaEEq>+wcxo6UYLoT&5w)h5T{&%#TqnXQVes
zfwR*l_<C_Zjg`2JUiX&5!Ox2sORXVpS(6IW|L+dBX7X%a>+oV?@>UnVZ5quR?A}h)
zbB?jeXIe-|$Sr>Ks?)q;Ni?~YcAdB`*hjcoV(=kfp1pQc4n*g+kpA8?c!d#R?_Kc)
z_iasN^|TYvXnKM;Ojt=WW_=;g3**UgftM4--r=+F>Na#&E0H~O?O3H~O(?r=z{|UD
z<K8ZBBATy9gWeTue*UBkl5aPcRG7@-dgjT)M`u&)vq)i3cRM*VgAz~w29n|F4PKd>
z*kF-h)?;lft3GKd`>kynS)V_j*rh!O-{m74V$HnS__|?YNKN=-20DCGh8&w^hmf)<
zm-uJ0%*t*vGVsudulweT`TL`|y)Sggy@wK{v~2=i*gOd~opyn+bJJn&?nlI`PoBQF
z`NsF2nPaW-REmG*xftX}`9V@zDlO}=gWI};%mknF{3FRXbjH)~<kue?*ta=Ta6#>$
z3(Kp>?&d<CO%$hYN^fYRzXBfJ8bwSy9?;{A0cmn$Xrje!=GKN|^}b57*taMev*Zs^
z)0kN_G<Y$1{8~>W6zA}#uR74G+_6|Lo!$_l5l*C*R*|OWv2=(&B#mCanBH)N$~@al
z=4ED5`<H$AI#dR_iie4lodI098O8VvZ6coSe7(ndH>$ePj}C~QfQNB4FuWw+YWV0x
zP&S%Cr2N0~vJQfS?zaqHl2U=o@4hmkPY==OmLJH>2{VbIUkCA<!}6BP^JwYUCXUwU
zFdIc*T6xC)AyrNhye>6k+52kzj1>pKURxUGS2r`+m8+;)m!dF-b)Yj33w3kII0%1n
zi7GjTL376eIAhvG&DWXG#_g^ocfK*6ltKQIT|PC|b|>rB)G^aF2WTyGmT@*`VA+&3
z^6}n1;x}?O{HWAtFV5M8PQuf3v%zid$)X*sw!#QJG5;0nOTVXy2b>!urhlTzAMTS$
zhod0ncmgi>T!j9wTIsqwq7Ct3&D8En67Cf8_rtA@_%+L%1-}%``D2dLH4kI3+ER3{
z&%uK`g6LHDDEfB90h;k8iC0oyh@bB-hB=~uH}}XBw}}Vg?YmW2Ri}-nPTTQ)&nLQ8
z<TpfGnBiT@(i<li@E5ndrj6eBX_TfB<}qq8wd@N0IXekw@^ZNC?+@zO7>-$!65w^I
z4V&fQjJg#|;n1TTbclR}fs>ZuFAYll`ke68t=W8EYAt=zaFKjhlg8#x_XQ_Y9q6xf
z!k15VaB09&lsjETRf^mM7Md2^DLlsqIwnxlr`H93%pCk?=R(}&ucPOgNNz#u7wleX
z59hrl_~PObyxq5P?8rYJQ1fXMvdPDA)}sW}@fDcbTn&Z|D+n`yskmwGU8*5Jg<9GE
zp~stdBfJVlmwk_DmUaaW-C4j_-g5)(;XYV1Z2|7@k7WB>)|1hy$DqUiFKtlG#pgZ`
zIOQ3c_`M>Ve=hjA_pR{5^-8YTq|gcM^Obapt^~#>E8vEDZTxN8gVIYfaQc@*yjh-y
z@&W_xP1p$R@J?cW$4^C3@pPKx&BEmKQYgFR8I3dvM%nryZ0>7^$mp9i_~LP_J=jgn
zlHSn1D@SSR>t&d=ND3u?`r))c)6l*7Fh;FS!vC(>!t?$1)PB7Io{)FKvBR=xZAmHI
z>%uwPHWSNrVt8v>o8SPo!Cmcsth9<d)?Ew0BgQ${+3C%H3wc2gn|~#HPMxEjp_X{|
z(R{MaEFO#I8zG<;20HD=rM4NE8+Z?o{|)26X_dpc>B7A5c^A#Tn~vEPQ`lV*7Wh_A
znpOQ8#aL^+K-Y=C@!<L~d_q`3{Sxh|*wI^zHQ)7dvCk9q)p|(pPwAjyh8g%`r!&T1
z_QvX8sYtL4Nkuc&PIAWt6-j=J%qNoKsDa@^&pmglz#@9Nn74SFO3PJ5*k+fFg8!ov
zt5zpcZq6a}jIPAeDH*u&Z3`|uCj6hY&hovB4`F<?G!|P5E`#@pSTs+SpTQo*#_VA#
z<FE!#3cifXBm*}+lfz!qy?9}ZEPm^}z|Z9~@R6x4j#1La4~aY%=v9HM>{sHvvK#nk
z_y7)?p2WQg&HU!2*{I(XhIZlh>{*9n{1~S?Ot;X*QB$sgqkI-A$vcAXGf&~*>nZ43
zlZwf|&hwKUEs6HV5vZL10p{L!Mv=|u;MFI@4M(!@$UH6lceIHAH)awhdp{);BV+N*
zfqIlle*;rnQn0XK4_=y6fDyr~Fg9NsCw1zwS|1Xqk?&Dh_3jz?EG@_WNy<>Y=Lskz
zdO<~jkhK^RVZ-aR8hrl<9CiCDen#hKNa&2kIpb=O-JFP>&R_Y^<v$_QpTX4JMYzhz
z4bO_#;q^yHaZHO6&Ir&$v+A9=K1PGRw%r|5HkKmQP~cnMjKu>XR=m$v2^5>PfW2mW
z2?O_g(z~B~nO5Hnd^()TwnW>5RU!itmYQJsqY1>sRbZ#HHP~FaLgM4||L;d-n1zKz
z)JGY{Z+7Q8r53|FYY|9Xx|0s%tHF%m2TV<@6YTW2B+FYOSOw{F;XEA%pOu^OZKy8k
z9K#Xm%$e{p&lxt;{cv~6L!#TBPVSYNG`MNBQL}4X;LedLkhy3-z4YJ*>3-@Af95X-
zw}?TZFDV8SFYTvs%WWDQZZ8q`sMau-eHVjeiae}c=?_&qZj=6d<Kfcse&~<1tJ9m*
zPe%TYBXl3f>6NOo24oT(P--VyKJA1Jk|u9Pu7=dBli=8@X^_DE5b~jusJ)sx$jp3>
zM-!BIQ|mVJ?UW1~I<|_2ZEAq%*ZP<kV8QmaiPGV%p77CW8W@LKgS%BDby)d<i*sT?
zT}_jE`f9`bQQ>4p`C7=8@};HQ|B{b4wW0d`XRt0fL_Opb!Kc6mZrv^tW&%<CtwrBS
z&BbG&x$-ol?=u?*9&=>7ZZ`Q{b{IBIoP;NWUehi8C+PR5_Ne(qhx6Pw8kH2sQ)!EN
z7`|){nJeszyjV60Gydx1pD$1A50tOKPxnRHgBB6==_5xb==TNQ+S>!?e|N#PT5q^N
z{;%1`J}1FfV;a4mq--_e$0E8^7MWkcMX>mZGA902V%BJHBpMHsQGVfZ#?B=Q!_F@w
zmuh?|*<QuuoYug-J}T&SD2ZG0vH(A7yYYpxHH=v8akzWEm#(a6r$G{{arruR{GH#-
z$YcpzmaQ|fcUPOB!tqD-0tGx$@f_an9*2w8%cJ-RBfQ?GLysx%#4v-yLcXC91H9gm
z+P79h_H>vz<C_c*wiu8%`rpCp>0w+GdJ<NzUCB=n=4D^_hs;=6Z#X;dKDAnXi80f3
z#fz!6u+(EBKABP~)Ls@;H)<U<)(pT~-gbClQ42Pm8-){JZl^(YZ-u_5G4A~Kl#I<2
z9ONgr;6&|U!51Wuv_OSd4%VT|G-X)kxHOof0_XB>Gba2@g2PgZpqw!i(j4ZXO@lb}
z7@j4!Xcm!aJOXvU{5jQ~esH<<47vZUft=|rhIbG2KyriNM}3$9_d`WN%TF3#mBhnK
zivXhk`y^}<2by>EFZb)gTQajZ7F;5Vpj2S&#APM`7j>K@%(8&uyEb5}eGpVvS-{Tu
zC7`qE19cSkwM!pPg~;v6kiA<3_G~RD6F#NDQ+qkMzWx)DmR*5qH=|)ZnBdHEJ8&9&
zO+Iw2hs2wM#PZl$2;JC4KeWX|V`C9asB_@YOxJ?CYhOwC<5<|N<qDIZC4#GJDRZ<|
zjomS&j=AQSkDdRd;laOV2+uCY`!^!3R_W>CD$hDByBWgTW&6R+7l|0Uqy<AW#NlG+
zH_|qA1TG5CojI+Q^q;`vKe6gLvDz>dWMc$Y*nCO2+QLDvdN}Mm?hU_teRy$Ma~54k
zv+iZ#Fe^R{M#fy?V-~nl^9SeA@$yNsTS=MMj*^2&r(#G+T*=5^vS8K^&wz`m6Y1M*
zJ@#=|7G2z1PP*1zp?0_Q=mWUKasK7pWA}A*!saZxw&xmUE);XwQr!@iS4(etnJ_Vy
z;<!)X<d_J12ce2Ti0}J-{MYFHbiAEC*=v+S^^3J(cJV1<{x*?n%G*=Zwhppec+-lT
zcxv$9TzdQkB`TvbY1I2eB-i8_bMuit)c@+KFYABEG=v;tFITGbskTSRq|52>>Q^Y~
zKPh;d+)mPE|BjKiL&u5R)(moPrxO`3F%r6mw79No84NY~Ogn|XTIJb;wBolOsXV%c
zj%qf5M1N5*Kbis8ylNqAzX3e|lt4P3TVmU{6sj6{i99KIL3iwVODsQo;^h)acGXN>
zy!IxWUf9o)m(c~ZVvz)+xFi~re`L^(v+|&rs|u6eXu+ZW82Y=YkVt=GaQuea`o*yl
zkh`doR#%9j@x&#>B;Ydtesu$NT(ccLjP4WPeVsV6>;YYHbS)%JzfDI?siwA()wIEX
zH4|JynW@iSlPmG(;Z4^#;A-{Y?AjO*`4Iy<tQWvUEsiV{GEzl5g`dSFL;L)3aM((p
zuaoH{&S54n@2U<Pr!<nNdwD>JX(72%IEOD0y-lL~1Q)oF56E>dBZYrgk;L?u)VS@p
z#nF~p^7W4vK2O{M!~XqbMCMoGIBy{Y-S=gl?9l<UtlOm6SsRp&Eg+E>axl@RjeJfq
zgsY|l<mTR1G9~B%^Xbf5(7u*S?%TP-{1f{i?94c_cqK<yE*>CJh6`Zn_B66}FqV9;
zB_JVWpm#3mVkT*Cf|b&Vbl(Y2P<=jvBnf=^uk(W-RsAm3C+60dHHM;*<tR82?F_{?
zB>D5klgV1Ycr0)Ik4W0>gTmX+jNiG%Fj=_!epJwfo;xC>bNnL6b6Ew8OIE<#j(TRx
z*XN+QcLZ2(hyhHsrY^yD=&fgtqw~XY;sFP^qw|~V`C3Ype9YlcnIC*z+5?y3dTF(1
zD<e}W%3pODL&8g4q2Nnq{pxL(P_jE5+%FgtGYcuAlwC`9_soKnf<cn@&jUUMdO*~|
zolrM%BfQ&`O?>Z;0ORWM5T7N~=o-SzU0eds-;0El8cF`;@D<{5ARfk_Rsi##o<erz
z8rk*1j1~RcLQ-t=$z?{0cX$>Bf%86){$n9zTDU*mUsGM*(&PlYUwkDdVlp76{+|3B
z>w-HDO0l~xS^?wW2rBy@k%JAIxG(hwGclovq`JgHfp`Wc-0vY%9~>s3H!6sKpa}H)
z$fK6OGMsO)0@XNY`1V#Dv_JKd!;WsWLde+U?h)mM?Pc;+QusW0<&g$>C}fb!LBhR;
z&bcB3*wI1McdM}lQ>GF%xeJ`Z%@U^S&PwLYTpMzzYzo@`UIybe=fQ}KL=w1Il?;qI
zN0OG#h5HkJkez9&;BxQ*`KXx1$18u~wpR4O^r~Xy(uPQSOekwNW(#*N+>1w#t*~cp
z1C`m}L)R}!V!|K)M0R%~OniERgf9<<vQNkHrQHZH7%%{-+o#CpSO<9a=>_z(gu!#O
z8zk?P3k3e>0aMpW5jn5#a6T~-M%XTZ>f7Gz5&bM8l55Gou8x7}(|7X&&6nZSY(uzI
z8v$)=juRc@A<}o#gWp(~Nh*H!lllLBhoSZonDr@ud~lgTBulF;qaw;Ex8XCH|2PM{
zZQn7mL+S9Zct88EB?qe)nL^T=gI0I^Qoy9)FS9Z}5<U(Zk?335M7gOJMLiP92+cy`
zzSxf>zBmeCGM?`CK2N5H9)jSYH{^)@6SBQf8|K;O5=m(pepibQdu)op%jmX+I~7j2
zcU3I6@|pu$9?!4;xqUNU5c>5_1u76?HHoZ$^PR3<=>lW+j3s~HJ|~xT-O;orkv-LY
zl^n7>3_)|U;hm2TIao6XygM`D{@zIN{1pvb+*%-f*qZe_Da#nT+i*8CjfH-VIXh{@
zWv*XKsi9tbFQk1>1p5?eaGrkvevEPC<DFI6Z=-csyPX5j^-sfkg#Kt&EyIM(jh@VY
zQ&weP`p@S5#7DDl=4i3!tkv0PZhGvdWCixg!eBntLX2I*j%CM6O|-r}Rg3MFO2D>#
zBiWd_{*1m|n80NJ4lB(}*y2cYHmpa7ow+=P4{^|Ecc<vHiPF04zy5>Jw9<&3s3*%-
zh5g22Gg-V9H<oSwqR*<A$g;K9wAh>lD!kptW?pjZG}h+8S+sv_!S=44j!p@(tmAnz
zHc`iv9lK;KH0p}5*7wv&`9v$W&&QJWe$c~vOgCl+X3k;9y&l0H26J}m*3a-E>ojWY
zm222NXD*v}?J3^9YR*Pqac3Q#%QhG{edI&O&Y?#W22o=BRrdYOGthdem0xHgaLv>v
zvLDCIVO{P`VwDFE@Kg7SuxriL*-r8n^arP~wHi9?)p~ih?e9H^{O==#{in@}=5ONn
zxT#q`UcQ*u$~Z}*{Uq3}&+kyr`6Ix8Fb9KEXR%c<g5SR~k<nAJX6-je@)x%cli0#E
zGDE+d^xYZ>_{R%uZ2pk<16wKCWe&H+^KsGkjr3#I9U?R91kv!c;z}ZZ6N#^Xa0jOd
zBzOZkW|F|h_H1V+lYi7~(;!*&q?G@w{RH;}Mw6m9Gs%PECnV?JQL99;PZ(hQlZ?C?
zPfYYIP?Ar?)CrwL<jXY@w#JDx3q8%Xg;T-l-+s>QxEKsQ6o>b+sW7al$#;G{!A)Af
zisaR?V4TSE(HU~IH&B}i7&VGfRExqx5ytSmF_WZjW{6nBGp;1*GxD1jz>ybE$ghPj
z>+GV{VA`B6;+huBZS(zyTkBTv0Xsi)C+Si+J>nYEIa`u{{!bR=%=+N;f6DaRju0%^
zT8n`;sgTy)W;tshnN%!oBpVAN$lk&C^$qXpAbnUKJXYC5>uLv>5g15}#t3)CEFL;q
zMnSHDI&tz6u#Lf^`R>(IS?>kjv}f%~V&459s?{zOG6<Hix_F4ABUIU-pU%vt^kqbS
zK{imM%iQSOmuX3vGkHAq6)al6lCaI!=-h^dRC-M{J-uC?HTxoXq=|&3>^3p1_!vnY
zEzC&a9}lua*AcJxZo~%;bD_JXMCh$K(M3~d<D*gDyxF#skYw_NEBic)Tl!g(Xl85U
z?9_ZA|GS??+Rnj6Ry%2|{waJCw44lhjY9d9VRCF$GkNT$N>8`_rEe94^U6wrevQaO
z|B68n)l@?x4>K5&bjF)PHX&!-82qnM0*!s<<HqvKAQC$o51QQJRytgz3BvAl6r)UE
z1c=b#x^0#pl64S8)-lO@Iquz;9B56I;ce!d;ta9<;8vYUou^rY+wz%wz=qFg>MILg
z6-A(!ID$Vvw2jM5xzGHqm<xyZo#9TOZ4+vcH&*GDXQ20~0Uw?GfSJ&Dlh*scV*JIB
zZZdyO7p{=TW8FdY++G><4S9lcyZg|T?P6x#TLlWXH|PqT$^4{dG3L~jF(@$R@!{HA
z7|ajQyoaM`(5=O^;K(F2-cv&7j8Mf3LXBS+dcex5Xg&_g*D=Y4-pJV7f?>xA+C89-
zqpbi>G>*m-qyNw`>Ej`-I)hG9GZehG8R(MS#Fcm_<E(!&C_$w7qfN)_lzeV7ZY2kq
zSj$T^B)XF>*1ClClh<Hmk2L<fl8uXWHe+9G5w)sP!B-LIN$2y^oZ4QYPGA0k@!Qr&
zKdjuwe8?AtpQ9ytr1LN~I1c{gzM%)ZkD`6K83sO0qX{e5G4gxD_?+39w4t?(jvJmz
z76t95^P+_F#3YF_R%fYXsym*GjG=GF%*E4{&4N`%nw5U?g7$em0b4g8TJuka8>N^E
zgZ4gbiQgSMWl0{C^Ol%&MIG*I9jCWDTw#IL2=tYd$A>zLF+2Pl`1=&0xZ75mlI)4j
zj3f6a>^8IA^CunR&moR3rMd3!=$4`$@@#e=RtvqIuNIToG_h8?$uJ2PwA?0Q@E4zC
zzNHJkw!(UTJ+VG84R43F!mh7bAeq?+4f*M?*fb7e_vgUme^qd(@*YgAu7Z;~i+Iz5
z&EVW{6HLe5gf{jcdH=!$Oizki8*SbKUSd7acT|$x7=8`gKpzyY)I-d@Ag<9dlYMnB
z0zL>F)^`TFa9Z^wT-+>-@_G}YU0c{ex1PwJ-7|uhK2rv&-FHCD<^ry_Z6>3OuEG|V
zad1Oh70%9`2clI!_-^$b{KLa%KzXeku1QY^-Sgcr{q6zyx8f1_M@)ekvWcwG*H@r3
zB2(bW6~ml&O%UFbhNt5zh1Wg<Spw6c#rXj28Bq(Z>l;8xei}QF)DI_0m3d6C122uW
zY|L?YJZ2;f*PiOa<a%qSaMFHWOa2R=+t><zk=10Cb0diSHUg#cT8J^<!n#;D^FiPX
z7K}6WD%HZ->J_kT5d&Hm3*qx%A*73{fT2t{zwT8$zft!DDl|<e8{7oO#V%>S`}}{@
z<!uvjo$Li$O3(8<`hJkI>-NwU{t^_t_S36-*5k{@HMF8%mUby9W4G=>y8rzep$DGJ
z1dA9_@nAhX)iMr~`a`Lr;(dDL$4zeYm)X#ImSB>s3_f<x!Q{$ySbuy3kC_WGG*gcm
zoT$nAhpI5Lxk+69F=gx$&Y6u*<XC^hL>k~U9>e34(0lkF;ilJfp^uet^#lo_E<Z$f
zWawi>o<FDSpG<j;x7^|>%W!bqIKITc+R9Sq2a#{@r<2tN;frMvUAIFWOS@VayWro%
z;Q2UA>FcK+&V7^)+@M9-9he;;$4#4I%sE9*#G+6K`h0%`B?J4pgWAdX{KEryoEHbz
zH?HBj>IiS}W(t=7R%e5<n_*RGI+*`RXI{4H;%uWEWar>)xW3yS*CyEEy&Dto(ilk~
zk+;F+zs;yVeJ8DW@QhwOsE<+`X3?IJm*MrH0E7u&xtf@<Y)H;L9BK8Di0-PPE<NrT
z)m{zZ3lEbII}V~>uQ5&SbA$cL4pt$WQuNHtv8?#U7j(#AHXU*j&h<a1_!&J*v1E4~
zR~fa9Ua^>fe-5=lZ&Wl+iG4{uiv@1?lKnXFc><1n&<ftM>iB6z5Uvb7!td6YfR~+j
z@CRada1)ILMxNO;$dA`T-9@YM8ai@4%xl`JbB2z&$D)GCTFlGK#}yUcywYtooRa2_
zGMV~d5B~Jj1#{HS3C8U%x2g0YWo+{h?iUljFzM=xvH1D~ViA`{|C~Pp<D5t0SoKlV
zV!aL;l_!&h=bPxVr!2QZ;|6v6wVx&rCBZ|zG-`j2!HrD|aiBYm+g4aa{bJ^#xY}Ou
za(M*ImJV|9_AWecxQ#z>SAk!aD&$&2qS&RvK6XgpN6chJSnui{%;8O7<Dy==!zl)R
zHr}P5*6FYb$xDf&*8yt$(;EMKa+EgjA{e+*fg3xS$D{Y=;+v{)JhWW!cv@!Dtrnu}
z<&<E2(^g2oYO2GT*p1wg)#Le1iqlX-V9<;>Qch|-CgHNOpXd;320mhWW{C&QNLax~
zK7Rcm*>&(VR~B^FYFk_!cVHP04^`s`sdR)zBlp3b`gM@`JJ0I05bU~eRv8W#)RC-A
zb>8$+7a6)B!yZW44CX`US(P6pFbCH`Ok*~2?U+IA6>UL0r4LqTI1p{c`CONaGiWTb
zr((}7n327+;IHRY*jv4v>ZEp);x+<C4Qbr=t*6PiZ}ts$mBWb5`0X&9+0Fdi<_=LG
ztI>GlEhhJ05;vL=gIkN;`H4R&n1(K6&TijWrrv*cedz0SlJ;>j{JH8V@NQCwvL~_3
zT(Y`;Q&0y@yQG2>)|6w3oj0_oDZ-7ZJ_Ia2!n8Z}@cz~*l6bHXF8bEc-~AE|?|hPI
z-Doj*;h+JB2IYih#dBoeniOyzJqm^;CxK1!FbQH6$-qlv=u~|vxSZOE&R7>F_LC#Y
z417SBm}wL3{P7_5Splw(K1Qr}9%HZV67sVDdP&NXOw#dTKXm^!A!qz6$)4@feB!AN
z;(OSGv=lw3XMzRC=#0lC|Lk`%KC+0S^F?~0RFr+c@lE~H$6fTOxgyy$+mf9B9!ng>
zF4DF`soZQ?72;+Sz>e%tC;3zQs8OyqCt51QUVY($GfW%E+511~+=z18CHaVqtD`ti
z;S|9}b$Xw*$1{=p$TZ1E%+iV+k`WmS<_f*slHa??mVG%^0r|G{LP;WhuXmP@^wEay
z${S=`%QCpy_k$KG+QX05cVw~u<a(3NEA-MC5!^H9vz1*(E1jk>MBmL&gf(P6oIP`r
zY_PN>)ulpSS-zKierQZ?%ol;Y?49^LE`?;qj)p6Q(u4p>JTFoNyB_)y-^u^cuX?he
zGx;EY#<U&M!<6ZcppDQM9!K(RD`@<qZNzh3J-OgDiM_SrB4crQIhDOC110}0q-rv^
zN!XKNX5|cbY8#M5<)ZY^dLW1_ZGTEXw~vD>g};~(16ls--@{g&Q`&L8lOj;<P_Py<
zYSL@vV8~dXy|MHz*=VFnq$Vm_{N2|@a{cSbR_kRPgw!+Bf+j-!U><(PZ01J20@L+C
zlq_6+oH+K%LdS|6UR*?m7M6?z`FrPh<;^;<c8UVKr@o2WK8$78iYXDz-COF{eN%zu
z(I@HMBqipg_XzgJ_pxv<dL}$;zlwRH6NuYBa~S)liIf>?k^-AG%-q>)i54RXY1h7y
zn;TYIz1P3SuAd}n?Rr-N`lBcEPRoag$CUdJyrhnZr*)I<+Ir;TiHWFGZb*;++fUht
z<}m8d7vdi-+VJL388wDtI`yhDD6K0YH;y+D3CX``FR&tJcPEiuWy^5&qG@pKT{32V
zp2ja<v5()o=AYmU9S3z6MEJF4F6?@vFI?_bb$Y%?7Cv74&e^LfkR!(q<3id0sA$P*
zLeFc%!kc9@a4>-Uwdp3I_62YkqUrFKY4F1;4_4NE7FhIbL+DN|Y}}bfMrJFpJ8!4b
z(Mu)K+-)*cJ!?lPRs_XOHQ>{&Cb-$Y2@lOs!VbCvTeh9!-5&2mnX&6=&e9mHt_a5*
zmlq@{dNfKb_ra*u+1M`@L#5_h(Z13Vc=X$Qx^d4iEeU;v2BBlH;+-S^Hgb~Sra8yD
zxDLTWR+?_i1j;suV@;tV-pCeu=jOF^mEsHfxNAKuZh1vyO~ugGwuat(dj$6Dnp!=)
zt3+>}TaA@Y()8Pp1pI9?gm2cFvO~vK;RX3xkTVdY2B9_lZ_{Xe@^L1PJ)=q&ru={b
zBNf`Bx|1GamvS{rd$>1sZmj&H7~JY`ol30@!<+->sL_)Z_$6qNCa65Yc|osXjMWKn
z?oz~%yeqsEE7LHs_ZE!nKg#!x;^EF6J=lIc3;r!rz{aR_D36?vF7g}Dp!EyYwhZ6{
z#;dZ|R?kJ%*%3Ia#gWc8+JOP87TEo!0}}3fpx&iSj9Gb)3mN$nB!0%S;Rmjx<egYr
zweul4_q~d0Pke?A>wBnU_ea`rWi_+ZLmJ;*G_mwc@28USUg)NMfGEG%$Ctb<<yJY_
z(^#2fT;>5|tiDA^!bvHd{#b|^>LiiGCmFQXY7*Md9u^!N22>~S7q$8)(Xe^EBE4vl
z41zZTbxxSD9yX$^_usMT8aRox55A&Y%4TN$+XnLD&n|kl;~+WJRf$DC%h|XZ1611B
z##G2$1`olD!6K4@3+;68JY!sY@Ev{Z=h#p&bthjbTS-5Bok)HbZK8W@&r<&@-b8Hi
z1rj97(VLgQGm0W<m*Y0c(*+BWZdQ2AIJxG-g<Vz5)O0z#_H{mbRq%|u(?{x;evDMR
zKIOL-33Xh71T++@;f1Yz#4ma)?=LX-IvYya9S?@c^mShRZ2PnLxh@mc)*gn2wMk4~
zFEZxhCoQYgg7JQ(C2!R&PZzsH)6t6JR6Np@$lg<>*<=IgX-7~|_X0XFQG;Fc*ATyp
zpJpvO`q|dSoAK*830ROjK%4yB1+G&U)BQ}?0fG{eH|G>}ws&aAR?i{}+VyDgcTKM3
zb1EEDDyIr_$8+~r?5Cr%3{ix2AQx<A;^K%C#Mo4otdts$zGLI){nB#&{O{$=gB9Ai
z`oag0pP<PtDpBJ3KpT=ZcL#q`Q<e8<ACG#!4shk`-cpa>)3~y+A`MB-JlW;iW#z3Y
z#ea!*MJwSB^>XYO+!DSH3Pz@LJ?f$Kp4v>bldh!27OJ@G)+IhmPn6BMD6m@>tf0AR
zY4rLY5%$N<6Wlhx5!C3h6nPg=!EXuwObgt*X=wT>X5IR4^xTF)dVBwFDkJU1<To^N
zSUm-2z0|}x_rJoX>%dkhd$P+M)5z$S>*RjMCN{p~JFVQ?zz2Sw!=AE=q9HNU_|3C5
zXz!s)-YNS%UGhMO(w4JiPjMY74lv<PKhq+vN|%YI#4TdHt(J7`XtGRy7%H6iQhd%0
z2|Vd+Kuiu_MbQ~g`ICPG`R&!|#COCM?(5<uEF5sBOK+;Prvu;9$+!2S|GxxkRhrB3
zIR$uSOcs=#Glb>*FLLWvA%;HCfQY74D0R9`E_=3-lzv@kt6fZVN2ox8u_tsZ*uW@(
zSNP_Zkf{%gg(r$f;r<#+xO^}NCgm*S{YM!>RkI<l*!Y;c_A>*lv;^O?y9dmCxfM3&
zL_<LEN|-9J*DTU+g7U1}^xo>vB)lshl4on+#nb<2(jG}@(%k?S54_-p(KLA8Bg(J!
z6=&x!vxVw|GqBt&861)n@zOCpP&dzlg??+Hv*Z_y-A}+;#-0f_j{s?B4d97AA1&<9
z@2Z;!hks2V>tq;c3r&OBmeuugg$%oQvkJ_O8Rpu`V<E!eCg~FP6rPd?Q1?y+{Wol;
z*1JzYbmt7#^K~`7)N7Ar6(hjG^ad&35CDG!#+9SmaY$!dNo=LSJpU;NtG?<&L9Pz@
zIc^zwb-)6;8?B*p+FkySZvu8E*TSDS$51dXLZ_k}zokO~T~2Axofg3mgzBL8)K73=
zMuK{o@cYze`b=^uj@q@3sb+tXyZ-7}zT5_rZbs6agQM|It_((00;kY&jHy}T!dPn=
z(>seS=$!`#U`9q4y*ghD2To=~%SQ%hs9E9J{X*8dIu3Fog&N2~12uOV5NQbp!}sLW
zSJTOKL4`Wqo8Zlq$LQhxkoQdfTqFEgGZRGx|H<>&52-=UC(gLfg1W~Gv!#c&)Oe9K
zoNV!e)jRYUE#nwSexZqDUYsP$8-j?k=zRPl^i5s|ti%g}%kbFGcjO9xt3GS~Gfrmt
zS}gM^rmIhWXVe#&5*=NFITMXh<U|{3-qufg|3=b7(RHwCj1PTr*Onf>;YmkFC{UkB
zo*sYJ3F|&hfZ)zd;`m)0bc7xFg#v@p>Dx&3vf991Z5CzP=LBP_%wMa(;Zrm^R|5A1
zxT3VvuKG8U2T;m!nbpt53i!LHoD7|apczezsjOEG`P^JXpUtqs&*!hwS-+FuVo^I4
zFAm_+Ci;;BU)<{>zQ2c=;T@djn9=ZhogAI_C=ouqldv-P(14VzY|ebcOj718gU9OC
zVQgG<ee<Ms5W7^I#0<EDC-uR2i5E=e$yO5m`!i;xibL5d1*izSNAeeVvPNmM(DBVQ
zlGJGfKCVsmN3Px>ZKtA0_4p4YX3Hv2jU7!IUrzyWl;J9b`7tv_nF|RMgE2vFuuaYx
zN}UI-w&a?@3Y%gs<Xk+s?==C5W3%Diyadj|Hi~8j>>xu2Co<30?kBCjVsIk9iG*8^
zfUL=*!AJi$$#7E!zc7wDBlCxN+&E1BT0bQxXO9APn+}*|k5sp)68yBI!Trf;a&glO
za$HV?y;yP>8lS#`JmGVhuha_JD~j;(;8S#o41s?o@yub#t<2vae@U08GSu#_z^7xg
z$(FW_q-=?Oy?_67nCg*Fj>ew@!v;IB>K+9N@uKX5H?lBuS0?@OQUvGj_5{0U(V*e<
zjZuvI4JjAx>ekr`z6hl(QoFUB`#UX+gbZ{OUtyp8&@?rg9hHuEx^!XogEgdN<1V`3
zzW`#^{Q|~$j|Rj0?c~GF*F-%+hfG{<MW1;o!bcNZvOU=f8vTl3V(k)GIdp?8ar%#0
zaLJEfWPgtPFkokOJ?$T<3I0SBUoU6&&98&|8;i(8A>-M#>J*(F!sBMLL*U8`Fq=Ld
z7G`9{MD?RQ{azJ9UP2@Z(GDei+7w*7cr$PG>oD1pyMS}C4yRXtG*H(bAcbqz(0fNs
zVK`KTA6Xy^Ww%aod!}g6qaTpm(%%P1<bkZ|O5+yUC6K^%1>{PIz@0Y9M<<&UqF;ZH
zhTRCGe)~d58sAT5TT8I&1J+>IJOf8v(<eS7w&AiL--)_y1<BJ%q``gS@b>8|tBg)@
z@GuO<kB{X@!O%&LY2z8omPxeZeHmR+`HgvW(-ng`7uY7RLl&+`g78mjn8#{k?7y>k
zCR&C#^Q925<N*CA>WatZ8_2F_zp>6@2CWQ}B^#TL;L6bJ<kHbc#Os6v+;9=$*GD`i
z$Df@dai<P2pZ9JAamTk**Zd=K>AOsfUoQjKwl`MdXD7kEv1N4DDp%54?f|QfMDYqI
zSCcj2Dwr_p0eRToCpd-_p!}OCi3`{Y+0L%ee?=b}i>qi!mn*nSvrP4sV7xtN4}`s(
z3K8krAXU7EYdaFnwdRe2H@`Hkj#XbIXD*x}^c})iYX`nvArO}B(1snKBZ;$H8tt{P
zff^eFh)FdE@x!{bAVwK1MHHyNQUj;Ad<_w^n24Zpo4IIYMU4F?3+!JHm^XbL$VVLm
zm-EXoIo6%pCCvh-^BN$3!Vlkny@Hor3}L~?VziZ$0Zo5l?sU)t7}?$Q*u!*UvdtJg
z!%x;v5>+I>hc>`&=RAyj$S`xPqe(^JCoXrv2FCGd4rA{hg_(N-!0zZ9@*tAJ&hPOs
z`|3BK=lW<js{uAsjZu!1Mk;WDj6Lj_3e|S(ba_X@dPfPFRA08K${Aivw*!BnHVIPd
zV#^!^UsjGdj68IO*zQPz+0>ZtxZwm7BuwFX*?3q|eE@C*1%v*m?=<V30tBUrf@uFG
zl=xBy-Xq)4d{Gj#{&a%ZWr;X;TQ6C-I|a0E`hY^jJirfeyyuGyD36#UFanXrP6(i5
z#+eJur2TN`WguGke4r!zYe39%7nC@TgH_YpN#api{*$XtgU786@bT9&eBl!c31;u1
zuiXr+zNf&$i)X+*zaQJ@UWSW{B=F1ABhVElI8$=xHhlhXsldKI4Og0?;r76Bh>eT@
zqYX`{aY2%=DT{%~sX_*FVK($DoAS9G`E1qNO#a-0F;p_sl{PB<MYF}v*^nzDe8H*B
zD7!Ha>K817f-~0OrM0V}ZlRntwg!OrIRcE}n@LE^7I;vO@T4z|*IyG2QMTsb%qv5O
zO(q_<+KY2u>;~_`A(F9an1r3HwURvM$=+RD0F44uCw*BSIj=DlrffE3@4B9_I(1pd
z|6Dhtu5%mfBO`MeF{4&e;@{6CzPm~s0`1YPb`r$SumZ^t6|%<rJ(<_O6!v<}hQC$Q
zi5)Y{)GV^%)rtZISD_|b_g^T{2-Amc_w(T_2_?%!C&Biu3*g|&S)_KaF}vFKt`)nf
zf@_<c4lj#%_!+B5nu-PQxBN8hO6{hDGX+M+j`6}y>jNS-bu~#!J`WyoJTIf8L&`0W
z5Q)ICP-7QkIk;C1uF9s9jTMQU&(~&%$=4(z+Pk<NGH2=!YYA>{=O1wQpBLDTB;<XC
z6s*ypOvu?sb)qLZ&<=>eEw)aORVyJlK9ICXr82Jylc7~64Pt0Nzfs8T$0&cXlG&)k
z7pk>Ee&f&jM{Oz)ZXiXjhE|c({ljp;HVZATU*fLC#WDtWYU*=NjACZW-C|bIUQ+Dq
z4OjjUt6jvMnNu?!tgGgtvL*+LlbxXb<w#cjP#pE1h(bTb0+t2Hp!HsNW`j~G`7o{t
z<*GjtR~-eq>c3JZcCZPvCsZ*C6IPLf9xa6H5g}0n(O}nLLdzexg2<D6;&Vv^Ch5As
zK(IGF>gXd{S2D>R%X(g4F#xhcxAB1^^hg<d0mF?cE=|411;>vfZSfO8`=~#6r_~mI
zDLB)b#K#s{?z#MV-UZ~+z3Q&rGzAl<sdRev4J!UUgUn3S0LS9dq(|Bf)<|jMrJ;B9
zPIWPPF1dkhSR*i$+GN;HPiv4X^`mbF%b8%V5{`F@LHEqfG=8DrCsdE;c8}k~Pw1X>
zdF+aI^0Vq38MqS3NuF-yCW|P-)?^#Jswxk5QN2vT#ZgeXpny&IP>t!{xkyxyjG3T7
zR7`fWhKhGc+Dm!X{eV1vU$c+4a>)Y2%|z%c9A!ht9mHQMrNpvp4x_x}CUe47$Y#$^
zBM<bqph1KR1SaK>H_xBosFmA5+b$Y6$SF3c2aJIF<DGc;%v0un44sKz4Q&{Pqtd25
zrM=XrA_~zq?=uMzNkS-;tyM)uA+)Dmi#92xR8+P|HSarzN{C2{NQ+P+ONEf~&3|xy
z=ggd$_qp%qy8c_Zl4XtyL&rL8GUYphS2}WYQJ>r3vg$Z#(*8t(Z(b(JQ@=pCsE({%
zS4!xvU&P|`Lt=HLpJ-h>LXt~Uh|zU7BDQ~!@D+xTAT4S7Cg-`C5C|nBmL}}%ecfc5
zR2C@>=_GeY1IW9J93NJ^f<*V7Cxt)mlH!HY#Bs}ddheAslBilij6+SK&F>IX^S*;5
z933H(tupkh@dbG2WD@zZ`~xvr#FD(+B2wok2E7Yf*}GpKl8HhYn*ZYnc{vyi?{!K@
zNLn)yb<Zbf_71T>((aL2I2=D2{7Kev9sVL0Mf%&_LQ=5r1@U{v5LJ;nBJ?hjcnWCJ
z6COtpUABiB|MMC)ZNEVhYBQm8-e1u4j3VXgnJBWP5{4%`$eqR;<l)6S_D1_-Xi^I%
zdR)I<$+nS<tI%xy@$cC9;ANuAq0-KsOeFVe-w*@cd*rF`W$@gWP9hqUh*kO<vg8ZL
zd8kcB5BE#J%-_$6tj=vV_^B0UxqB=o=>c(?ug+S;&c*Ia5=eIOd=OfW!0Odw^rrP7
z=gr&<D|OVU(6~)7vGWTuJE?#(8tWOuaSHKrpVrK;sN%we=V-Ot45aBejGoKcgCN(>
z*=={0WeeX^Q>UAW(;gK#<Tgs*P9VfW$pGX#zaitzz34y!x9j42fQ|E;5AkQSDT#td
z#A96!5!&C2jxOfjljrP1!kmM8&A(~De^U%@N6&*r2e{tb@Bi2%_xac>*L5MyHU`VJ
z8-vwsUz&e2AN+o421^6pp(kd|#Na=EnDJi`$Ao=c<H+?JZ|qKGYTHdIty}}J*HnX9
zTYoVp6P56@8)spI%Pv-CuLiy2!W!7^GHPbw<c`xhZsh6cI<s3%9D{AE8*^Ds8+N+h
z0kg3xh#S+z8n08(S7l+CHm{qqS-zD1AWyMXPlQ0}6CajZp$ke2X5pJjE8#hwPmWZq
z0qc-YjH>fZh)j4)ZY(hb<CTAr;i3$*ep(2376`+Ukt_(Z^FYLV7Aw3u166$YL)V(3
z*cEe&koWdH95A<onvsyhw);!tn%c9BI$t^p^Wt*O#i8WSja|sMltucw7paCE0pLx|
zf_W{NmRxw7(GZ-A{nhi}se%()VeuVgjm}|ZvkSzF>m9}F$5BIe?PikK&N8mimyu4D
zG)OAY<|76bl;)dl#98zvWW+2q?^HHoJ_NTQlaLGO@~9jW`jE?a|LaEILQSyKL@}^J
zJL$Tev(e^ESMcdwz;)K=)3=8PsrOC4DVb+uOof;u`Q&g6ou2&>J>1Hm_X5{YwImO0
zMmZOy0oOAhx{8#4i6FJ<`IKGT98wQ+=(xLIaFVDFjBWO%v^myEf%O8G%?shJl<a}`
zgDz0$JOid5lp_P}zi6Z3NL;wd1lrkOsJws=S7lG9^H-+R8Lw3#F-#qNH(OCJ_TNG}
z_IlvhU(E)->p>~LcTnPwPIBmH7uo8hi3P<P!Adn6&2T71d5e^gm&F7jSc-aLX-IGP
zti-K%Uzrz-o+f>3j-X$Qo>2))RzP}hA1(Io7A3ltdu}+NfaFUD(c}?5`gB%4vbOdD
z-a8dAcoG3=zx_c|sgO+DGmfMWxq+LT3%R#01onxYVfO|cMVF=A*fNd<eQerJ)X#Zx
zSDx~Oe2%Lwaq>0$@ysGvI$#1X2OhE?<~|@@6^mh>V<K!TSVL~-kK&R&qv)^22r8(!
zjU@Z4(EhyX7Efl|^BlCJK>LO>{-v=S>S7Y$mhcbs>EUtsTqTTnKWFnaKEFZwJ0GIj
z4RetEzhT7pSA^E_*N1s_XCc1ZmYZWEknxd$@W!>odR_>tx_m$FY7bc5(Fy|A9pfE6
zF3YouTTSG?J)%Xu+)(3;TeNtKl?7X$MhA}?61R}~C`M5e=5#uOVW9+l^-(&!YK#Dv
z2xT<VX9RLwhv~xR5@cBUgh;x}fIeGEDg2%w2RL`&zrX-^0QT%x&M_RhT8PKInocyI
zUZC4HY=*Lc@8%EYo<tiwonbhx4z#ilkPDLvWQM&cIr_VtXh(57ZD@co4SJ1jb9XU+
zB_+risc<6Od7H`m9D%<Ec@Z5wWopISpV-omk7MX{<AHroSiHamr#+oPi>~n_Z~H%?
zPux4!yq(o}N-dnRi7_WFA?NUM<|-7*t{^LRE+S`qrI;plPa=?i1$!^<VV+VWjPC4>
zXnk7%S*PDr8~$ZG(J5O;23+gdz#mmS5Bu%JNbe{(uCBu0Cbp96;j2i=v4L8p!6nSx
z<X~2>CK_ihQ9^Srx8W!IbjZo2AMmgFN+eBMjzs%!Ahm~^nK=Cg<QLyO^v3Eg7zxXw
z%B?w2oNtF~edR#NaSN!F=ddxlA*`;I7+RTohOPN$Oj4iUM;EVH!}V$RvD8CXJSRRD
zn+gu%OQ8?Re?m%Rh}uaC^n!@B+d{I}B@e$*)}p^9EoYup&mw=exWcFL%j}5LF!jps
z8BudxZFVL(m<}^^MS_2~fL8euG8ibrn%GsN19J+PLhV(w{hi<VpII4hO}4^<8xPWU
zwV6bI#~ISa`S8S6Eh8rj)uH{{K_Xqhn#kt*;`b#xi2kK4GOqLttWCaP-E3PD>$8^#
z`WE861>4yjf|0~uPlA<mumFF<Vl4MelgPxlk-F2a_;w`&?q61sTfP3|=qm{}Fm4UW
zzjKBJ`nZ#T1`$v(o(`24PLXBz`?0V6Anr{_C3OmM>{>@@-a+A1d}Ea&oak^Qb5!?W
z*@tPw;GG*Dl)j3m*2jZx%r&h2EdvcJ<zmUIZDiYtUNRsuiU;n+5F48{glD>zhz#YB
z<}KgQWg9y@u)GjX1=zraw;^POnLg+RY#|e&No>)9I9BUe88kPzk~LMWNIKe<9Dm1o
z304=9$hZ5*d1@vR;Y}w?K$0F>RX}3BB}iDnC~h)v!eW8K+_k93-i)%xTlhoB!>@+y
z=<-%FZfyiz){|7=6JxS6@gkczB0~LGR!VC23z5?42brMYM|k-CMzT8X4cV7>7fm)R
zkiZSkaE-Sz?@4A5T9{mh=BP#D&>aLV&6PkKIX}h$M**z!;WJ{mj)3*D8N@cklFrM_
zL+&I4yQ#m2?_sStyW=}`{#FMiYkk2yNSA@iOIOg}<r`7_7w_5`@4w?NMIk!nMicJS
ze2R3<RCpU##!=;~%JI`5{4n!JCS|fo4=tPM;CjsqvCZ~-SZaAEl3kn%-JudZmA&am
zkK2n8skw=Dj_}hzBTr#<=RWg+l3YBKd(T$dRe<EpIKJw#<47^57Ol6?<i#$T2H9g;
z`1<AoJZvz*JebL#saQn1R-9%(#@A#2P(dVeF^#gx{YH|%YtlN0&eF4&+p~xGr77`(
zx5Ve3J8hD)hTih|Ds`cb>yKrKkv%g6$U|cXR=UEGRD0cG)S{}GZ1r!r?T<G;qj&@*
zF76`Vl*gHPbrrNT*2Y}STY)$C<TM!bNTSOhJuolXKTNNi+=sq2JtjvsEJXCI?OYH2
zDsoCZgs+?xVAuB5A%p89++4~N&;M@9)BN=mw_bmQYi9D{KdM92-YQE7O%)^VYpT$V
z|9Y|M-Xh$+Y#CZ_Do4NWD?!)21;|SN2W-o)!$`|xFC=Wns12`Yzw)`$lXKJ2O#2PE
zZHXmrnC{0!c6yP}2d~k+aB*_h*nqXrse`{4r@_yGD;R87p^|K2vS<BIe5$4vDH-SE
z%UqUNEp?cr?K0;n3%KE?<44$viyQF4t`u5ZS`!Czy)WioH?zr0n&<OJhBW^;hbor;
z#S#s+$fh}$Uh{Jpg&6TrwTm!$5dMdnzhx=1(THIem0W@gg7Gl@CXef~h@cfKop@`$
z@Tl(Uef0iPJ>JRbG7$Qy0VFbp*uc!2tcQyi@)>W$ijNfVJFU~^2V`pLt`lczvFvkr
zlgJa4@@pIS9bQ3Sj8CGO^<Qc2S*l3v!x-kZ7SP^rW!e0&4EoQrXj=cyM6G?|8d^Jl
z)O>B-TiV8-heWlnv!b6)Vt?Iks{4KkjOrCb>3tjE_3j1(?VIf5y#esa>m0aO%>=(O
zMd;sGNsc@`h}Qf{h0N1hoKsc_Odq+xvGcZ&^K>3OjmU<k`yuSTFE=@Ev?A}N%uP65
z!f^y-)abRl;vmaw4lK*8Kmyu1z;=6+q`@kf-(w6mUv|(TqH*v?w+e+%rJ>JlO61Pl
zAILZAB`R8Qk@Ks|!^=5Y;Gp9OX<m`^i^TJ=W$-_&7`7cYhAHu`b#H)8T=v2{?*Xz_
zGIX6@9(>>Yhq+Oa4~8qBpgju{NZONP7?A#ncwgn<uc$llZM_Y)&-uWla|bkUYX|Xl
z`lPTi6pD6?;j-!C?3npoR(fq2c^2r4?mC~P=QOs0Yvyf;v6sYpc^<G~s1W!))nLc!
z>Ga+czN92m4JKC>z>Kw~FlyP2Ru|>L_K%)KMx`H3#%e&>YcD7(G^GU_#=x)q0r~l|
z65`rK*f#F{PhrMlHZCk2b_<!YcI79K<){5n^4pHJ-xdldO0po=t^jUaDW)lt3eZe>
z0a7}n(6a3p$H4;jPC3UjvdjUgzO#@JS`B+j2f*N-HOsqt0>&NtK*-OXEPmGs4cLJ`
z+It5~Tr9|^mIGv+gA;^Fd4tsMA^5et3Px6B(Pv*2!1BUAfWI2d_m9zFMQ4C@&MD9`
zO((YxXQR6|-7rJC2fl552z}3nz&ouMQ0E)+!l?qplvGGlz!Ok3yH6b67r{yK<J{io
zMeu6QW>rt7L4kM%+F4W&_U6xFyKfO0o@jx?UpZHNdpKN*_yiTpf5LFB9$xujKYcFv
zK8p=&h=dmlvRgChMJ@+u^&jV0)wub*c{_HJ^>{V$pLd?U7%)ihEX*Rabp7CGR|>2P
zdJJ1d{;~@;NwbI7zW}uC7;HVC2zj$Tq4@Y6ko5Gm=y%9~|AK?yL-lmtLX#>ot5J#9
zGt@$4T0g_X@?28F_@LVQac)oN9UM+oCfe6}7=@=bu&K2NP1Ih&88@bpjec9O1k&ct
z?2Ad=<HtC_BM$M}5ZWQR8{7mm(M3O1vZGLfL`p0n-3!vu>b}*u$aF6@jvc}&GgR1Z
z!ke&Kg&QbtNoM#I?y|}~Vi2-jfG8`h#od7$QG-PT_VEvblTW9i$Sn&|+gJs@Uity8
za=V2G^!o8BOHoFn?UlL3`A6vI7BwQ$?|~i-Hsj~kK+-F&GW%ro@U%k_$WT3++B#~0
z4)f<z9j;>NOFh?TbiYr%i8DuXgMy^*>0`XgX^;8Haa}rVMi-NE#2??hFG8&Nzv0V$
za>P1D4ZPL+ag?VIY;6_-SCd{`KySc7b}`Vb?7@8TbfPvpWmEW^1S=Hq5;@-;fIl2Z
z?D4EZB&1M_H@rTJZS5|RYmHanX61Z5Z@m^t)zKyWheU{<gC2?TH3mVE+xTN|KB_n>
z&l-H!gYC>Hnd1f!OQP_jt8Yn!Enzxk__6SeFZkr&Ur1tq5UTlFQf(<*#*$VisQzLM
zFWgN)mfP=h%Qb<NJOe11Xh3h+EyzW$5&vs(0o(r`BJHn2NMceRT?k8tA6sO>qdoyt
z;%s2q+)9>C@;BQuG6ZujF9Ao@GWyDu>qLFG4~)B6!_br&yw7xpr542~F2V|C2W!C>
z&fTHfo=3ixJwkgf`Xa0R3+PV8GiH;mChWK@fNbTtzSLh2^m>y3jc2bw?v^TWVdWf{
z(RCi_+)@P33>9Sd$qgw-wNS-JW$>PqU1*VDG>$-r;PS{e_{nfQls_?W^i(9;l-PiF
zr>ufq-!C9LtuOdyq6i$C^@`Lt_<@htG?;F1lyt>b(}#*T)2|FGz@3lt^JxD@W@nw)
zIWc`O79<DLX6e8!V-xCS+gcF#ev90@d7cEC^rLqNhS0{}-%*V<9~8`5ih_@xCGXWU
zsoBH6F#S#dIx&4Zn|E*@+0SnY$2Fr+^sbpai?CO?rQj$Ip5_Ew<W~`?s2`M@z!7+&
zZclp@0J>~z1VzWhVaSmIp$;v0zh?wVKa>H#3%j5r@G!pjZ#mE^X~>exu%o{RB4sBb
zxL8>Nl081)WYuooTpCVWCMBcQhDXS&;bt_;xQi_m-UrPKnotmJkBk%cQw3b!PECZt
zrtEXnI<dK+Il6(Yww_OaH@uD7J(R&zu(@^y*DE_FJ|7+|*a%PFr892ozVKyWEvxO4
zNLGJoK}s26=tS2JM&D}yX(x-JU^tB8C<T~*m*WcRheH2C0l0mKhDTc~nHi!Aq_RZ_
z9anWhH&%yJ|Dh9TV}%rRKIkW!+u%<OH0pp)zYcvIqq#gOAJ;Z&(x=^MR+TP>74gZa
zSx5t3UE*d#ZT#%vm<~qMWI3}PS-@54#o%{bmDXJ<M{dyf7$MJ(Opv_*Oso!O_Ak)^
z8|SZV?Yc(v;r@DNNAOx$H?F{DqIq!I%!?IW1L$4WEZV*#8yhasAdXh;==_FFtcA=!
zv}sfTj2=$0Hlo{6g2Yu!S*;?4ZK`a0n*fOjc+H&C|B0XW^fUg=`M_WL1dCoaLKZI`
zGd-Rs(1#bA<Xi9#HlW81eNGml*2U$p!=85N!uv#?UmnMwiAW<ClF!1^Iz_ViVicEq
z&BMZebMT4IK+^j#kh*v2EDk(r249#S2(bK*2!yVpcpEdpePINpoAv<v6>8zuP(8D0
zQX!1B&;X$x&!Zc6=#uTpk`Uw+fkn<r(21FINK^_xdbnJTd@)%;Um4QE8mrBy_;Gv8
z5>INL$7M*cRjFO+JAtcz7GSkS3S7S0$*5#*hsY@nUi=L^D2uiR^@uA>NJ1(|ss19z
zn(rXbA_;cW=b4}z{DajA<<Xl~%Fr)%&4nE!FY(Q}vPA!546b>tN^gx%r;km%#y*o5
z@r89+_*vUp;#wDo8Y<tT28XLSsm>Ca@TF2qY^PbQ$d03bpNfH*PLhm<&TcxmxgAMP
zRS@RhZ(N}mM+enf;HcU_>^ED69Q1TTo=BNeNQlP2mShtgzZ@sN{fBct7UQ2v_GqT)
z1}u8>7xw-fhMWB@QObIGh~hX$m5Vf(7Dst_alMDR^}d`QZ@G;0>QAtGW)4X2!W@fP
zruT?T=scVw(Tez;bMYryAAd>skIhcO*neRqS~as6iyzIvdVijwV>aS2w&ny%sjbDw
z%39DH=WJv>@)gh6+lR&-+PV8VmeoGzfcl9(BsJYdp$(77-+mY5SapXgl)HlR(v^u&
ztxrwRhfL;JoD|GEavg09Nnw3Ce)wk2Nz|`g!}QVbsqj0E=zPH(h|u{91p>h&WWpQX
zW!0j093$OOQ;U`Pe3>LU@X=P&ZX)N%5o}epi<a41!aQDm9VvTcVMi<o#e=d?*b<AQ
zAD1ANKWflq+*Etpw*X)N_J_%;I**Ii$*^CAgy37r2zt&h1xe0XRB`!XI#5oHKH?<`
zPuiTx#Y%rHWfDNoysU_PQxe$E<!QKNsVXD%Y$yIDNuje{x<oiwhgi1$hRUm5Ow`q7
zsLOmcI##ra82N~>iwDo*3VA*9VG0p}#Cy0vB@rjaI)J=RHnQJZYHn4m#NOk2<EH<e
z#}39rl$mi7d(%Ra-Yt=DT4FE_S=}$fDFVri?@7+l!Ba$9>n8BMwc03f&u>b?C;)G}
zBu!#tuENyrFIf409!?nwK|-x2M6tY+V>{~6xr)bOp~WopMbjFD=Vs!PrgbQPOp-K9
zyrTA4?WWq#=HT(4^N7ZV8ob_c2B8KMDd#uN>{eEToR+jB_f_T-9qrY%pF72o`SV(E
z7M#GzEj?II>^0Je48vcKV_2>e%JM1kk=tt5AxJ#~whl#+u-B!mX+tShI`RSBb)Vv=
zQp?Dmm?4}xM~0T%b(gqJPei75f!OJy6S~^&PiemrCOyI{NPmhV*|o%x@Fir^uC}gZ
zThwW^@xW?0X{1hS8>B!YYYe|h3&+WdL-^dlTH<1ANZA<clYU+Rq<p)D=g&OB$jqHb
zLTty`yK%?Jhc0ux_Ma?X7Pf*sJ9z+3{PnF}H=;tKH-5&CZp4!873p{nC&HWhk%;3%
z|KadOH3Xg=V}$>nWwY(X$uIPXsB0$Cwg0w|_?9x}z5GGktG<SW`VSMA8<Y5<=wn>M
zGhh!dk%o)=L&?T<Mr2z~1oW}5(a4TCg6}J1$+zmHIcf}#Dvluyd0jeb_B@W=L0I4N
z5F!<+!82Gij33#VldJDnkgRxr;H~|NI~WJ-eXIoEO@7K`N^7%acard(L_N}Kc7izJ
zXkr-VfUAbSup&D}dB$DJbZG1qqHAYE%F`ryrcyw6A2%R&u{CVobrqI>6~_>9y2Vy{
z>yUlMLhJ<ZdaaAmG*Vl3ruK(QH7Pn7OUr9)BeU!r7`{nu?BDN8_*H_5@bxARW}S!U
zEa}5%tvJ@xrMLK1_f?|0JqjnqHsD`foowH&7u0qQbzZjm5~AO0OBc>thb5A>GCPN#
zlP?ncKvjG%RW_mzG5lQZ{^J!!Z*vGTvG;=GInS9LJU-mBY%$*Z@)H`{MnjU47+m-|
z58}7ns#V+&$XqVl3AbXzkfFUk@O{1x%O9&?Y3YsZyRZM@y3_nHbS)owD&I6;9oK*s
zWvP=*Oa>Lm$7NB|f1@$(9^2ilhT=mK(Q_v!+!qvW-fjFHskppF`&ODj>`EVWygnPO
zr@upMf^V4VD=op>#zs-cEETf8>NzFS^QHFpwYfMv!UJ#HK9h03B!iL;i$LJ7&49Lb
zFmoK2gT_!P^|WmP9HCMuZ`u<b%f5ymXIjE1z8Rp^W`UmVPD8uX1W3Ut7q}*Rj>+8^
zi~}O9L0eab7I)c>w(W_5EUr`1V(Jg-tr2LW3XQIE^IM40gu-*Spu&HnW|DKfi{%fa
zR!WTX*Cdmseipf?hNFyiUPL)43VL1cV(}C9B=`3tG*?7}5a)D!=Uga}=)8>XP(PUg
zmn!rt+=V$@ZNfG<DItENaoiW)i^~g+vk9A5)J6}?MRn77`1HatH1<}O5j=DsSyVk|
z*4&?s-#bR2NJ$kUr8LYWiy300f@IXY@_Mbz>LkXibr19{c4L-(?K0bFX+m4LbB@bS
zc{HkehVq-$jWhU;6YT?;=%o+G#n2XFYg<Ja@vU*Sxz`>bd3PN%jkyK2&i*BsdT7mD
ztB<H@<X1o{hV>9V_=;6Xdv2cb9GQwAWAN+8h0I8?I6(>bk^B8<tTeuk&REuf-3BH(
zFS{tI;uqjN!t?2~x7VV+@Q+C9RVC8E!OXr(Gl}UhTUblmv&b$1)c9sYPc{#)FgItH
z+uuS*cdTdqi<(HpIOhX+VFYQ5vboGrf!(d)f;D^7nH4!IxYh3xg(NrP2M=5D$e&x(
zt_FX6q`Cyvy>dq4L9OOxrw_8H56cqaO<w5L>lt)&;VtsgSr+_ab0Kk27<!!TL9bc;
z0(q`V!GhN@-X$kUhs3(!6)sgZn@38>9<M!AdHi(FIn~CPxK>bZ|59+r`>#xpv^?tb
z6(ReVn38enIAr}P1^4wIL-&qfM3+Y1BXNz(Fvq8uIyhd0KJaU>KRG{L%!$oBmy}k_
za^2##eJ{YzfV*Pa-SCAg?zlN>1KMxy3et~WGE%G_u~9fjUT^z`HXOf%x`h`L+vKC5
z?`(nOyCsQar6Zaw(k4;+#F;do8|2=+I5x5B2cDZ31)GKo%>^DOvkiSxBzt)Y6=Pq?
z@JpAmOE_2J2TvC7j^BdABaJC_o*pdul!VXqm7;*jF07CxP0n=QL6?1-(T<uNl#nG(
zuQ&RH!dvz50&gc)?$&Q?CXmeSX)U6KmY%^;yZ@3?sv%_C#o6p7-KXZBnQC}eb`|sB
zVm3<ddPQ!xN6_b9E8#UycbhkHXXMy}c_^0uB{jpL4DYn~fyJy#(EarH%-=hkd4C!{
zqw>AlbilSDbIJKuY_(;T`5|{@I&bd%+R@Y;;A6N^Z;TtB@%9nY{>v31j%hOD8f_>b
zz7G!l7Y3qfg79Wy2TUkLf#n@9_OM$Q+Q1%x8He~$KezwXWE2dR*2743)lBsLh#Kcl
zEn#<@UIx~u(&>tW(NJHo7t4mPqyE~QL@B*nD4SFFDUZ0-u&gGZ?Wl@|j~vr6>SQ^&
zGP8)=sW=WNbWg(eee03b%2H(h?;)DW8$rRZEn$DsK_pwN$DZp6C13S4;ecy0RtR{A
z-ag49FH4s|(Vblozh*52h6}Q4UACZ~u>zlM)Q5wA&!d0EPl?V*D5xxmhBaD7u=eI6
zwC>MJuv&48_#15F_WEDJupH;j)jJ4~q{WV<s)6XtHL#%2kFAc+fRKqYV(_h+q#CS%
z`ENsDMUNkhH0LAR@^R|;WFfoy(;S-rR4J9gb()(#CQ_!%QrH;ViAwSh!S3i-NDrHi
z47<d!(Z~%lr1}`{aDByp3#X`&jmKHLl}k~^`Em5EWC>jN+X5CL#;9p#3^3x=M0MB(
zNz6S*jR-s9+*N(#R%tAjdiIQ|D-(gNJ#96E(luo1!#&6}`U+b5M*?8yW$N$fCd?<;
zPFhQkq2%06@S9`BD!w?1HjOE9^ZAb`!v8KEP~(cSqtlS2R=@eza7`|wziXCT2CU-o
z&3IXG0tyK~12w73>GRSV=(eT;tZSD=8mgCS?b797-{W&g*Y6g7Rlfu+T@b}isqwQv
zVk0pQ9Y#yNC|LOBAGjK*quN`am_)$|^a(A3w(E;A4|1tP%csNP?baaAb0zBjYf;tE
zM%3RiV1904GUc#Yzq-d}JNG=~+|xhi(lxdO<*f>5W@<T->8VM?zG)HtPK%Fz-tvli
zkhGgVGqWDg8sPTT{~bb<Wqs}WfflA@u?`G#y&yy7H;}PF7_`OpA;OCf)P6035z8C+
z!#_=MmgAvomz06;1A}`czd+NA4P=YE7)VF~lF;2kw@aTz#rvgM&1ZH@ZSp4c#aWQN
zlRVFC=@X>$NIU#J=?J2MsSqr3p5F6^2X>m3a7DWS0w1|TO?p0U@!1<>2b<vKkE3wq
ziW_KU7DHv)0Ll2#2#*ikhQA&|sI>nfNEY(Z@0#sET27S=7RmGUEnmaiu9uK;dn+`n
zKLRV@7<21bh*9G_hCXsU^ZX8&6R;HAd7AM4r3G=h#^RXdXzq-A6n<XwgzCOx@J}s-
z_|Qpccv25b7D$q;dp7W;;|H1bcO8W8se_zOZ_rp(LO<YTfXl;|=7v^hz}$pztd8gK
zcJmGB|B?V#e_e%n>d)cxt!;2M#evA{t%S=P=d(3;R-$E`2R`nH9PJw11SDb?tD$m~
zE$=u+{Z=xi&wMN-!^4SCy!9p1#(9)SWWvFy@fJLt8;R{7@X^Z#+~L!|VDRRpg2DAy
zuom5b&)P>xiBuizm5Kn7*#$&eb_xvMhtNw44?uBEIqbc1k2R5xN9w-uaAkG}JWD=8
zKR|nN(fS%N$x1cP@A*unTBt$C%T4IKz$G@Ypn%bmdy4+dS7b~c)iIU_e}W$#Mv1|~
z5K(={{8#@>oX|#Nx1+Hj_9Gn)Yn|cdPan8Fp;F{77|X{0RDsvBdr?097fnz~=wpf;
zxU0?P9CiDt8(bD@_;3ilA2dM?Y6r=kM}DBHVFjt@pCbj44>)t%V=6aS3?8KO!Fc34
zXpOXmEky?@J<C$?HEl%K)}-PGFV$dVX(Y6mrBWvAjA0Gdg=b}^tiXFu$R2!;Gf#WL
zpK_phg0^gYl`Sew(L#EQ2GO&fT=uxVhq*kqgz|c(!uzzGbK~u9WJE7n!y^MZc2{Ho
zGziynxvd4bXnQk#<(pWWxf|h6+5ohxYf}-+?lb(!Eb^#LMBPi$pkHDH@7bUQ7VpGh
zGW%^!IyZ~Wm~w__2d*dCpG#>*&V%Wv1!=K{D&nno6*L8BvE7w(nXTa&tdv+iX1-2<
z)HY|>{*wi{lgc<ypckDQR%VxFhJozPX<P^9BI=9PCEwn*pb%AYbW0=$vVVI(#xFgh
z@Mi{ex*vk()trO=wh@^6h{2`;84?iq6m`uKLUWcz&>c_0A@-RcYxJy*oA(*B&S4lP
zX90}I4p3K4>2i!*8mjkg25XU3a6Ho+F2yL3sVE=zhHofp`lAD9792oCwF{{li6L5y
zg844{;M4CGYQvo)^r;nU>`aX_@Wf9Xg_R1y>nq}9NxB~#5BZ3Kmqx&*Q#nL?(jR2b
zX5)2ct{`zclw&eJM;Ql}gUqlyH2&9uW_s(w^Y@G4N^B2WAWpMsGh(?+q7IKca@|<f
z15jjenC<+j4!SCzz+^ZUtyz8rWk&*?<eWrXcV?nKYnD36?KY9%&7c~04E&#7q7R!p
zLub1w=oK$OOGDqIYwZ`&(V+}-Nihz+4Nyi=9|yp@r2?#X?q!d3e?~n8_bD00DB8#T
z4bt4B&o<vX3A?!&HQ(K_TDy~$(7C*yF>2F;14mULV|Os@<lTlbOMB@5VF3qU1|jD_
zMM&R@(Ruj}^t{j;X=_Ep`L%D+7yo&Hzf6Ixc09Pq$3n3AfAr27TcDu2kbSkH5>0-6
zj}Gmxr|5Z3Fy5dBRsYgp-Q!{OshG>!W1pe?A$u?uN<;BIA8`B25YW8l4RUT<3Dvs?
zZ{&7%)=a2!{0CJym}&?Ywo8EFPbE59d?q6Px+G-#40zFB2Iey}(Apg?aK<7NW%otl
zkNeuthmF4}S!M>*ACH3m+tI{rb|DN$uHp6uQX$oOKcL^8$hS)z-btmiZ)eGqht)^O
zuYoW)e0?u;Hu|txqc6!tT}L`4IG9vy7GynTZ^A-jj>AVq;%PFq=v7e+_>PMMofUyU
ztr#IZj;$oYF~vUguL7n1WN0uv0>ba#01;RXP1BuW^;A50#51Cavl~kJX+sV)r@+eZ
zL!fS-io#at((?CB;L=hp+HL_tZx87~+|fm}_vkM8{%-}hcl#MFQ&<a6`MFua2UU)r
zwhN+zClUJhhxry22k&Gu;QhbnXz%fDP%hez?rCiyH+5X$_@eW0a<c|}$gD-K4!P{3
z(k#f2y^r^A)Fk63(a`ra6e_znLAL55j)~Nb?%JM$l*;LFK_M8$pDV)Z^)J!%T@=i`
zwTko^0xs~p1k2e?sIu)3O8*uOd8Qj6*VqZ79z92w>WvstxyvA8&Vn_Y1K$%@LhSSy
z@YIVTR24=8yTg%h&^46pybzRMufvGT%LH|L(SWWi7)m`t-Oe%a^kD&G`CuVQHw%R@
z$C>7NR|Mc96G>vUj*!vS{B-YIB`l;Xgqoh;AUXeZiQJ_O;_Kke#3jT+jc^{UKOzfB
z@js9z_ggJn&*i9Qr(slz`wS+z@KJ6(q}X=BzX?0o`*AVwrPD;PDFBjrCg5LW4bIMa
zFhfF{gzsyp?GrwPW54}F|1<zQiiv^6w5=SE?<_eHCrjFTRq@|>+<wrY0+)phGB)>@
z;nms#l)rokuAQAkM#uV@eP2$JXqRPVp;sW0RC|gS78IcY!QHr_Lz;+rPrx$u=XgTB
z6QldaWQogGwm@z=?Rl~uUyIQt4K_pgvsn;9Sc2_)znsWRFCkTivT(cT57{Jfig+q<
zojc(z%)2#1%#8i}Z~@hV|K{>)5AGGk^M0LR(_crzt*iw6b=MNq+9*l7b5&XUb)h(5
z6+byT7*4hoegucP3HU;(6SICzBTn$8uys!c?D%C@Yjkiqx$NGB<vl;ujOm(_?KkX*
z`QrkpmX2fu-1h-)T0teC!#MM5Ed7hiIAeV_(4s}$h4J4_GGpHY_I7&%s}s8b7U_DE
ziltm;Gklnwo3)30-!{VRpg!Y07W`yGwG!De)`kO{LvX>rBlOP}8<JveNX|a*rm<BI
z{(2;lsNCv>D=QpuqJaf_Lq~+ryB5IYx~JGXzLFg^;o+l7KKRf1F7^R01`?NPz>ClZ
zII}jCHkM6>{V%UTSVunC;%ks{TNUWt6@Wj~g55C(=-9jn)Wx2`_cPx?@z{A-+CK+G
zx);#lZ82O<>IrVAd9Z#gk-kC&Gd+i&L6FG>XcblgW$7@W>nFkf=Q+^x;IcLAncS2i
zljL&ur%><;2>%xf-}lJil<WqWb*3IppO=T#VRc-W&4ku$eE}Z&$KczLI$d*PCal<T
z5qXI|0wuo!Zr>#W{=IKP*18D3z7Qd0HjhDg@HY5}CDPYNEMc*^2att15RvB(+>Q{m
zO_aik@OWx_OA?6{l%_S7T7$;KT_)7Zf<AmxmELKyjoWum0PEs#(0MSE-ueAEu<LGv
za=RV$)}4e~9d$4s*AG|K13}jD3B)VR1QB^x*wQ+JmPwui$qoNNpuP&k4>7RRXgMu;
z&6suEa0n8rwQ1hsdidCsMbzXOG-B!v*%sH}bmS(M@0woqS(8m57o0;3UvM43fM2yw
z8ZFVHT4VZ7YBX{>c?x$4{y>I)3vturUg92}g(A{EQ5Req@d5w0sC0t~s83&tpDepa
zLB)Giwpg9E7t6%<2cCiF>}habc#5npoMQBxa~X$Mf{^)25Hb2!DHDq)B-MfQm-0v9
zI^#2Bljt+ZJ+Fb*TWX=d*8(VAjf2?edJ|4f<KbpM7c@Hb7D-X=U^=RAer$R>qt|;8
z-G38L8ThoBpPR}@;#Xzh&6og$ehI^KCl909($|sXz6v~S?TIHGuaXL%672eFHCiGz
zigjYI)SjETit}cM!=v^VMD5wb{nvX!RB#M))u@Ox_Rqp1bPw~2>mIb`gu>}r7Qojc
z4PKn{)yG~Q*?R@n*tI1vlBOj%bJaL((GN%2MzXN}+I4()$9(4bD43`7JEGUNKiNxX
zhp8m%Pnavc(oMRy<m|W>{3oIU0+psrWRyDl@udt3;*^pNVHFH@aRAnYwo$9oi}9{T
zWm<?*AW=_dV#k|PxO;6H_{I0)Lzp75dj+`fB^KXnh=ahdN%&RGCUUnSibz(t;M|B9
zvh#5qTR3b^{I#y&rypmryw@^hEU$@FcJUGA#pNV(?{%E7Foz7P6=Q@o$<FRj^4snd
zEHsrS>1TG31#66uJhr31Qz2x@(IDcn;wU6@T-NOu<`b&lgoqYT;a`pvQO(z;tHxfT
zncR+;TF7itvtcdKpC(Lq`@g{^rWDz4e4OO#htjDt>ahE89mh}1!`m#oapIcUyk2u3
zqJPVfWa`F{(vH2P?b=R~CmjS2*JqQ32j0QdGD$RQhDgrVOxBJPfrA6WJllI+gwL}Z
zJS_KN+vF&iAK*xCbo~W~KLhxiV;He5Od+cZzrkgV4dm|rG=l8X$hD~u;!kN1)5FoE
zQn?(867<P3o+d#XW64diqj2-AFS66~0PRLMV*2$p<^y$ek8epNbvT1K8hpYITsC;?
z%ywL2`vFGdX4mF*2!lg+7xH#r%I@KBLie7^BIfotv_&!q>+R$iU?mm!)J_vD@1h81
zJNgisE{hA^XA!xDVT?z>U-V)nAI)D@h=X$y>3_ZZu=<;s$b2D<#(h#zy%FbBYq&sN
z*kM4i55rh#trJK*vIWo5=_27alPIBUDOy@1Yvw)1%?VasN4Mu+Ks~4LV~t)O`k~xS
zeJ;v@M;n*&l+N78YSjbSZ01(v;WtVyfFK+%RD_@DznN2ss;p_M1Z;cyiV?0h#XPeZ
zWIt5Ns=Qr>1PxA7GA&-zH?3T1k$Nc;K<^^6x0$kb(sryazddtQC>%L$FrlLtNI`>-
z1);r^sKCF%SnNzHD?ZN=ok5SlVZIjO8&QDfRW2yVM+r(>&Jn+hQY@d08c1(?ghR%R
zu%gnH+RrJw(NiyT;-s(=A?HM7e4n7K096)F=hF)kIgi_f5`59_WgocP;hep}JZ15x
zSoNg>@Qry<v-K^IhT<q$^Ls7xE<1%Xw%v`DpQW*z_-DX^t`BJ2e0#i9h4bd+#j=0A
zk{JVb5J{cAN9iugVFLUo%_SZyk$WF{k;?u>usD1VJ7xa{?R*u1bN5L>{<$;kf8IPs
zeuEocuze$n%Kb#y;>Y;yqIPtkaue>#5`*H1<G6Xi5e4P(p_j9j0CAcu3o0C4D$Bx_
zgFg5ZZv`6aPsMK!ZDNhNT(M90DOGdM1se<~Ld@pH^xk(EJ-YkX{JGpXa=5IAomX*Z
z*1(l0t9miaU$U)sQIras>G6|#{ahZe`}Gj_Ye_(&qd5N7Cql+Qj*}CYJV8J4J~q$_
zM%ERJiJ6cw@4f#zy1XfZ_F(eK-Bk<d_7kn>-wi9E7S*AI>u#vsKi~XJog|ca&qk(C
z<>*D7HK@|HA33$n2kXRxsLtX#CyiP}S9?d83k+4@y3h8=+4wo^T=R#$JkN^#5~qq@
zUdmv1Ik%vQ+m7t>ydu0jdMl~*io}Db60o{_36a(iB@>qO$#yR*^1!N|ZMAYBj@_0_
zzw0hMw|oH!+;)*3ealB44mRS7VhU$$S0Eb42F;_q<A_5?srh`)4-xd}7R^%^MeCYm
zNsNvb@ek=hUeo00f%Im4@l6nJvQ}W0G|nO~{$&%Jj=Q*B<qr-NHmCE(+F`TB5j?!0
zi_%*^4Nix?rNl4Ukf%vIiHN2fxz5ZbZEJV3`O+O^NP7;oVB`{hEVBdm{hbG-Tbk^T
zdW}0T&E>qz!f-mv2><HIqns1{@j{RDcp7=YN;l3RQxd++!<5gpUjt6yRUJZzza^To
zxl&3(zNb>hR53E^Tus(B6|+CKv=b-aRixBx7fWSdfJ&)7JcE$$Nc!bmj`y+y1O|b8
zD|m|q<}V-@od1&K-i`Fidk!RUbC8*CWi%yfB2H9#w5cyC@1XI86gju+7$dsefE8-G
zghmeQ(y5N8NLG#~i6X1%mTgxl6feo<^(vAJ@4Z>&>N<Sva|({v&LPV;Tt;=Klax5~
z8o4GVuu6)W`1+G-X13XM7`YyXrN@85!%A%!@V$c8KNN>~^SklxKPT|~*2ARd`)yRD
zHw!8C&?w!v40$bFM-(jOV1fNF5aTnZH60J(3ySx!!}HZh<(LV!@{pl;=0(hxioIN3
zAj9rj6$+gprtm;E5!EjqMxDYj)YKITMkU)AE&Dc#)$g_ARKG0p$n_v7TBl$MFFUeW
z<rdTC*@AzSV?5e456o|b;Um!;U$p1~>iaW}GGDNmTUtc%J9=25`b=EB<sIshxsTq=
zy^BmgH=%cWhpCsc-e6yrLKanx)%;CAhTfTplH~2%cp{+3sx?c~%?ob8{cBmIan&-q
zZ@xZhrj~-)c7&SeBr!|X-=Tr(l~noF15EbxQs(RT=-R{_7Afs%G0#}*hfnjbC!Zrr
zaKz4gc)P(F)?&niOr6&tcY4yGSgZ?c3H88r(GYxIya;6bKBAEz4Z1lx9k+b3W8|B=
zkb0RNtakzQOl$<%v^TN57n0!lXc}yai-1e(qfmI6H;hVnqV_v>@NK(16pnpI!50D{
z!8H^7HP=I+)*WP~GM(#{a6XCAZ~`9F0KO-)TNSo|`L0iBFe(r(&+-NRJ`ae!Do&fI
zOCqOWUuah@MIY}^BaPdSLz8kSs1D~a2Znm_n@8p7PCzhvED;Eov{S%#PBf~hpH3&q
zd_lH*B4F-F1gw>qM)KPmP{kC3mb@|qGq))4X!YXw(|gg<v8U)sX$Uk7D$pwq$U(XC
zVF=VIMV<U!=u7@V*t}Jo*102x)-3%85_go4)-EfQS279#WrgrA;w%w7Y0VZ){zA(Q
zmym%4g+vP#fCtA*3&JyjpI-@bH!p+~UJp6$=R!~k0xfQqv8I1640X<czaLIR^+r1~
z_U}7N668>zItuL9ow4kA+bZ_I+x?_dVlNu~DMm+}m_`n+GG&JplR#E23StWzQFmGv
z$0*-T*Rk@<;>>xqRfFz~>(48!w!S4v1zi-O$IT%0+8CE4U=O~X!l(CDG1f-Y$@OLf
zGWREUO)y0)t0q7c<$Lfe*Q3O<dN(@tRFQ3=^ibP_6|f~$ioSE}9e(%U3QSyA;^{IJ
zv3%VHvlAm2yM<@)GW!I&_iP_YKCpr`-t3?pXYT}en?G1(*pbBERVTvbcd_mEb=X(y
z6Omf^fw(R==Jp3ZkaMo*u+O(xDza-WvCUga9e30tPT99XW6vTyrCox4<h4@I23}Dl
zxQ^?AOOcMRR(MhK5A({dN<8I5F?0V(v*GV^V8-7rwB+a<vSZ;^a>=@YHA-`(E9d1v
zidY0oyr+;#(z)86*;821Opuj#<@Sgdc4Cp~W4Pq;2tK*il>J_vO~w!DlY`w4u;v>9
zQhz~@gs3<Z=BhHAlP-cICzVNj$zOIplII+$o7t%FchGX;J9ZDYVAl+X;mIi%b}+_~
zn!d$_<*U!A-4`fM4Cx|d)QxNF3qMl9Ys{fnYLfNddk#G_c0>*-KPmG&$@s?y3$ml>
z0UG2FMs*&dVDv%`{oYlHiVU2Xj>nlO|CK4;;Fm)Be_lh>zaYjpyA(}pHniA%UJHSO
z5Av*9jTO9NnA^2~DSLtSSYs#%506+^izLh>$=ifUS!X4hx+DZT^XI|N^P!jyv4(q5
zADJD~k0M7wZJ03XVUv!E@B~+tQ1A9z!Q$@}?*DrUg+=G08|CwGudp?oUtq&hXJx?V
z$s}47pTIDDi{aje+pLY(J_vi$i54_uBdyGL=1k#yylvJeD$w;LX?k%9zMq^zJ*`)%
zdWj3<T*VA}bYXa{dan-cd~z9NkI#j7t0=ZyOP(ZG9t9oOUUVaG4wu(yLgXiFTJXCy
zJjFaJo_`q>w@0IcMIVr*?IG&ooy&-$?=q=6pQzts&Cr=qgfDK5VtdXz^TJjFJejS8
zeQlhv{#_$_L~#wMJl|N`VJv~OE=Djed5Y-NiL1Ey{xcN(NS#O~oMwHsT=B$GO|oms
z8m&5>&SkIu<oI_#YN6o+>^?04buIahUnY;^_7iE)_|qPx`}L#3OO;gHxE}K@?+-b_
zpNJlmar@pPSunyA!oNP2Vx`AhYGYU2LS}DhviVdG?o0ZN#kr^CjRP`p%IXaYm6=Vw
zkK6-yVw&;gg^g&Rq#<grdxjOP6-m(72{N^_oOGx}BR_`@QV_*5#@7Vs&M#|Fv3L=F
z7$QO*8h(ShyS0(Z*Bn&*L58gR6NBpIwz5-Y4D<HpWfXFJ4Q@L40;>q!Wghz!v(d`?
z$-cw<tnhIa;y%H-p9k#7h+ZAqTlt$ka;6V&d+f%<tlDXw*V{;*JTAoIi~pDht(PNi
z7h`eEq&OK^LL)7MUr?L59pBwsflrikel*bzs?2AM>YghO;&FK>Kc)xmtM6mme)mA;
z_fB%THXjBaWa7rRwd`;J$FeEZ<jvbM59R8lvM1?tc<bQ_Br$Opti)1**{2F0pH#wM
zEyQN^Yr(nVqu@|g4z8ldpzYH!)@Hj4=VrDCp|-`K7*_=AIcCbe#WzXG7E$mz764n`
zc(C=7mLNK4K)*LT2*x|k&>p`|qxQQQFh_JI=lzTV(d}F>|GFIQlN%1J^4GxDpPZvN
z_XX6p-UYjCu7BQ7<NL`kQS=}U;TFr_N^K@2Ps;%Xp@-<ZVHXGvq{5BdAY$^o46>KL
z!*vR$q5Ve$bY2XE<YQvAeW@rsyYn5nm2xRz(N1VDKy2&EGWb`>fJ6OfbW*JuPAJvD
zDeb$=r6GS>pPT*sRJ9<R9XTf9NGGUI*+7))336)ROH_7R9*v~2tp4jbQX~G5^3dU)
z9~DX9;XTOh`)r4-^jcPF);jwAoNKUNZ3fr(je(*21g0(K^kb!7w6nW4oOoLgqrA)X
z;vqZQ@F*cmf{WR`U7@fw7SPeTYE*}^46P_%4F_lB(-(}wYTfY@;QhEm2c@Kw83+C&
z8{P(y$$wkno0Se}OnFA8-C4%Xg07K{ct3V%pC*aknn|2?=915IP7{aURb=bnGg9CF
zh~)h_Nv>QQgzr}p$&a2%BJ;kJ{O8|F&-t7Ow_0k*q46^$sXd8=-SHzihfdJbIxdir
zM-Pa`+Gs|9^f82pJ>d4iLdo0OW03x<49z>8NKRk#<92XX6O*oIWQR>JVG6Fp3XOZD
zO2P(oWXp+n!)-{MSxLGjHj|R`7s=Tv6;`rs8p%<XL+z@YNqMFW$##l@BB&#)&!rKQ
zt2>D6a5xzVze4h&%V9wq%PyM$<YV0p`qidH&PAV%8nq`2_(IqvooCsB3zG0HW)=I=
zubwE|N3p_Zy3xaF9Q)?!QsT8cpX8|;vnw1JwEN5=vQ4FpFwgDCb)El+r>!-~^Gm0v
zPuwJHxkuPv%PzP~tFp7?2FQbAD_&{qN%B$ZIBV!LguGHT={pO(Ny-g1R=YU?LFz6-
zk8VKSDn{5w)s(F5osBntS;7{qxrycWI@RVx{KTbOQn9p94VnB;jhs+>g~jySD5Vd2
zr1YTynciPTpXjK=<wCEhmw#U}QJPE9w8`^q&B-ukPooI=?b-}oZDsgyqy%m?(m+=V
z5S|lmORL`)$I+?rOu}Y9*vqevm!Elrmo;-Pa*<RBnth+KkJKWE=49diymY*!wG~U8
zo57AXWYbATxA4u3HYnbAm6DxQq<3-uWmXdhv5>JOTDGT*eLeSvdGp95o;yp9c+Bu*
zCT-)W^lh#9ow6i3`GKF<q~zdP>$9<=b}Bd?QD@_AwP4?cOc;C|iyiOHL4%hiiBww_
z9T6SGPTWcYn@efn6`v0LFSJR+@jdY5!Fk+SA4t4LIEIQxA#2|72~F(UNen(u;4(u|
z(!AS|yw$KGKHqk+#SPPNvxy;-rE!Gq^`B&6dMh3)KTqB*e#zX`4`yHN5+!=xoTtj@
z7WG-|0+C6H#HSKh;`@9qsL62-sSPa0=Vjw?--=LFk`scv&L$J>uv(nk@&VWUu7kwa
zVWi@A4vCj7AU+%us(NuMd9<?@JEf$NGkWt$iP1e+_<w8H9T(N{#A)jB=nsxwg-B63
zKv1K%vx0yH6h%ZuBOZ7HN>T1?*sw%I5D-lx*svF{fr#AhJFK7rMvdL5p#CH=Vl1GN
zU-&&BCKv(p$M2tew{K==cILY~pI3I~#c0yIN%>^u@t;Uo(M59Oxfk6yH=2svZj+I*
zOXznm#5AGi1O$bAPla`TX^hu0S}-q(f7hf<-y|8)kcV6N+!O)z5zVHyO9H9)=|b|P
z<ZF69NEdUtm8ihqkmeE@cpNJsb%)o}y#D+>mjz!RygZfO6g@_h0*2Ay)lKMexG&9b
z^rXeVI?@H&Db&CJM&f@UgQ}-{Qqibf8uCsBPx>kcp8m0xMwG3G>AknpT%$vLO@F?S
zg7;XsIZcI|Tt6Hf_`Hur%_~u8Rv|>V4+A)nL#s{=r1k?m>HbwZU@L4wSykVY1Fz4K
zoPIV`5&4V^OK_v=YbFD7GN&^qvecf>nNa>*OYheng>))|ZZ2gcp+1t1uT_Or->Tv-
zssicTeU9V+pNwr$B&T_MD(G#K0<pyyH3*ncA?|m3#o)_@e7!z@H?%vQzn03{i5ec5
z<0Hvqpf<1;$!@zql~)K_<YJG!eKWbT!X>bEV5WGT7A5)A9x7G5lvA#(hJ(EhK;;h?
zsK&b*^h)jmCdWgePn<JK9DI=cIz|tAG(JKeNh3+D=?E$;IxUW9l9t^4&75A=K8>0M
z$xxm>2`Vp4gM>JDv`wAg)Az$vV)GP(tjv-#*}WMtYR&MZN)PN<XPC7bK|^;#JYt|J
z=)2XTwV@BtlF{>^|DXbh+7S-F$Qfk4yb?Leb>ZImP?(XjnR_POk0$O>rCH71bZPu0
z6wol1)12oIp85L9RJBo9t)!ZiP4woroEV7BJdC&}&-srR@cUr)I!uKzkqk;sGs78w
z*un+*A}Btv3~Vlqg~#8dit9(|DpID}qcszPK$MzPs=V<nSLjuYG6X63>Zlc9Ts9uw
z=v2{HX#q%M$3l)N&K9q){fe6=)TVl)zZ|U8Dx`i@s`Sf2c_=zGi}YO>LcY$ZP&gQ@
zM&@N>kXq{Y(ntOITpY`-sD6PNowzg;>7=u?*j|U0N|Rx?p*nqO@Emok)<u%5Q;=fh
z&$RnRBP7`}j{Z@<0`|KY(w9<OnvyuM)VIVAB`eR;<|m$F=U)xU?Ce2`O?<5{gS`cc
z(P@on&fYcXTfXK?^W)uQ_!&Y@@P$nmmzpE18MTT*FV{hU&swrQ^9C`t6wvB$C%j-q
z7;@Eqtw?y*NX#GIhH6=V)F)n>9ywf&UUB=$*!pA$Qw<=q_FO@SfAJz}&wGj8mPC@7
zH}}z}mVvOrYAu@HGn{6*EJUO48F6WIOrfv*3aOlX8?>xf5;buH(e{`}@4;M{Gk_8M
z7uA#dI*I7g$xOv4elJ^8<~)cFa~EIRTSuDa`;+Encf<|dYtZ6k7Wcdpsqmh8mpD0P
z^0_)1oIs?@EgOAZ@nSvy48n2(=hXW#8vUvt9o(=R^|<*tm-R3fp-YaW+s_Zl@~zjz
z4?SihmklgfG})3O{+lCfH}G<7`1(qA2y(Z3(+%P*G<E7z<Q8cQMz@B8-WDsxwuQd%
z`}Jd`tEz<H&it2vSrz(h=zXeob}X5uGJ-CO8;GX79gEiP-9fJVbI5g?GfZB&35x#n
zHPWoof-6HWL+m!2(qJzma;4l4ls{6UlCer0H~4`fV*d-p0#c1MX7+@i^z}iK=nUn?
z7H~Rofnv8^BQnj_$0=2BN@<8S?DDrJY~EB@Sgwf%2@a7tjhgWGd848xWFL*6zY^I!
z2uA*q+2YbX{yk5uKC0M}PWP54bIoNk@rJD;I{yn_7?k>qsze)eHZ-3Gy+Pn}>L5|g
zI8B{r96^(vzJq-^KcK!#<H^vlW2iP#8x4Ku0qF;iLCpzHESl^DL+tON-a`r$FVFNN
z>|y?zPxrdma6pn`X?{Mp1-$6y*PnALTo2CgoEgNNoI~&M@1y3<=uUe__>-QmW4M}e
zqey;k5ejn1Ahz9DOoLe*G1L-gx+B`N(g3?I6XN+L##qh>@w!Z7Sdwjz1-A@w+I@3e
zu}g?6hFIe5=IPYxyb1Q3Xn{r1hTQsXrdar@iBvbT_)f!GJZ4rJXL;8cTL&57b$O=v
z;uK@NXU2DQ+f*U$f6x@;7n<DBC{xZ!%@W6qGsElTIrNX+H44WnD=e`x#3!HW<KOBG
z@r5lZg9`>z8sKA!*DYUD`f!B_zUq9Eoc1xp2SUxU+6)tXX0|I8-Lk-59=eK~)qGu_
zTr<3?cL;gqV2yt|WQfIfhB#<}HP$_9jw`P;kwN3ssonX-)Nr;I#lB|P*l#mkU9c5y
zo#+qDNg=Gfv6E(n9TA_Ywd6kkK8I``mr9orYaDo&#VdXj;>$~lQQeMQ`i{@0(#<u(
zT)HVfe%}B)R<L-(GFNKvZ-IZQGr$3hcc6QhC(-*-PpBW4qNr60v5kiv_v-fvB&zTf
zsqi+&ds8WmFuka#(6PdO>NJ$A&%c6a12;j|vMNX&vRdijdlicRn+ywSIP9Fj*ZSL?
zz!^U%2BRChz}>zaB7%#+_|`#?=AGm&85F@Gqi}E?8ldDK`NQ@I6QwR6|F14lEh~6C
zf=h@hfee#9pp<Nc@SsJ|Fzhww(IW+X&+g;vQrasu>e9G^yHzmcq733w-_g<^H6eB1
zJ$NL|gt2i4p<Ht#oc*Ss(kOF<a+g>E-UmzQx%wpV2)oE#m7Irrm5*R&TriMcI?5FO
zyIeWm$4iUzr-9zlXt3NIsI+yILdm>T5U<XJ=|MH%pj-;idOfBq;+i@Aso7lGn<Dz)
z=tj7C>K*5=W~N*|z)cAfBV}%uI#=_%lhQ(4U72_HIcK}6nQp263Vf0a!H+zE89r{x
z$SQT^Ykpr<W9&B|TP%TrpIv~FJFdad-~i?0lx%n#d>b^EGRmDkjB-<skuu`CP<f*;
z8G1Vp9TJgvjvR8S=HP4!G+)2QjkG^W4mZWarRn-upUd=CQQ69;7}^h4QL*){(tCSz
zxF^G@GNuAm)+ko8P?{J%KSmxMK3^IW6)l(bbz`)6N{?0w-!K)Zb;#z<v#Gsjb7yn}
z>a1qMY<WyvOp-K_F%xL8!bEA*+*nCsOp-(%lMo*sn;?@(Wjtp$R&ySIS`rb<H%$cH
zSwnfEBqqM49sV~#N6>@SoFz?^$igMFV;Eh5fMsMcQSlP_+(f=#G{Xoq+3q|;yfl^(
z2pCo)B1R(PHEVUQS^GoHI-P0OZLhgSlU_Sb`hRIM=v))~p(ex5G#Rzmq{65$mP|L^
zE>$=Fz_6<B{6UXZ_23WstZKhcQ1ey!p$ggVt=P#J8-X!v(S~rxP*?~|Sd(}jnvVQ$
zATVWh+Iq)H<%uzoExzp(RV{%T+v7dozlB(ZnaGARx}n~|BfNbA{lY`N1IPQZ+S8@+
zGO4HyS@ZsFL-=rt{z(+MdWVDrkMJ2E;x|t8p%f;B4gN?n(<ZrPo|t=fD7S^wl~ff*
z_aFHDv)zh;c{>a&K81m0R~T4*j6tXKTT5Ww9)o{3*~8e0ANfhQ)lcE>U19K^*p;_J
z*^rN9x1r0+?i7VKQde@@P~g`^%PQg5g?lS1?c1#=*tA2z_ERX>b%jFDk5TA!?Q03_
z+oSOB0zu0xQf;$Gt;L>}NYG}bL)+QU{yy7Ad;janXbO6<-Q)>!Ni6@@k?j_lFgIS#
zhgqlgAxKBy+&<Fy@~kd@_v-z=m#&}>%Y@H}51%z(k{Hzztowc_#kIYZAa>M8__mG2
z-1D!<E&4Mmh}=Z2gX{{}4h6Q_)3Lx-Qda_7{MxI{gN`j8)cdDjI@H)Y&W<#?|G!Oe
U7k$!%wrSwfW<>AzMzn|Jf3W)|^Z)<=

literal 0
HcmV?d00001

diff --git a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index
new file mode 100644
index 0000000000000000000000000000000000000000..c20d8afabf383430e807fde58270f4ff7c92fdc5
GIT binary patch
literal 398
zcmbQn@*$OhfsKPviiv}fK}I0n*~8V@JHX%5FT~Nw$2H#H$;;I_B;Gy9F~Ea^U5G(p
z%3@ZHEBvOMEJwl_O!%`?ixP|D6ALo+b5l!-GLwt-4fK=qOY=(f^&CS&f;^o<LtKOP
z!yJP=9U1*f!W?}<T{#$mrgQ*JVKHrBZE+M);4jH9NzBpLD>GtXXkj%{$(r+6K}d<;
zP(QgZje%hTs|n91pMdA{fb1StqZ*47qNWTcA~}hbsYUVSnI#$V<*AwJ870NK2Kq^v
ziN$4W7!p{7m^e}t9?W6Yn80lMt3U99sG=CqrFp4UPZ%ONg_t?6F@jWR09BZFzuEp#
zSW(z;RuKaSqe27g9*HtPHz83)iM~Pxph%7cP$i?nhxx2)b@qRrx`qJ=7(uw8ao<lE
qhvC4dg&P<cnR6L-Fu}!Un~AmVW#Tx+#19g}55j*pbgPuQ-v$5_)n_*V

literal 0
HcmV?d00001

diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp
new file mode 100644
index 0000000000000..4c775c4c0b93f
--- /dev/null
+++ b/llvm/unittests/Analysis/TFUtilsTest.cpp
@@ -0,0 +1,98 @@
+//===- TFUtilsTest.cpp - test for TFUtils ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+extern const char *TestMainArgv0;
+
+static std::string getModelPath() {
+  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
+  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
+  return std::string(InputsDir);
+}
+
+// Test observable behavior when no model is provided.
+TEST(TFUtilsTest, NoModel) {
+  TFModelEvaluator Evaluator("", {}, {});
+  EXPECT_FALSE(Evaluator.isValid());
+}
+
+// Test we can correctly load a savedmodel and evaluate it.
+TEST(TFUtilsTest, LoadAndExecuteTest) {
+  // We use the ir2native model for test. We know it has one feature of
+  // dimension (1, 214)
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  const static int64_t KnownSize = 214;
+
+  TFModelEvaluator Evaluator(getModelPath(), InputNames, OutputName);
+  static const std::vector<int64_t> Dim{1, KnownSize};
+
+  EXPECT_TRUE(Evaluator.isValid());
+  Evaluator.initInput(0, TF_INT32, Dim);
+
+  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
+  // Fill it up with 1's, we know the output.
+  for (auto I = 0; I < KnownSize; ++I) {
+    V[I] = 1;
+  }
+  {
+    auto ER = Evaluator.evaluate();
+    EXPECT_TRUE(ER.hasValue());
+    float Ret = *ER->getTensorValue<float>(0);
+    EXPECT_EQ(static_cast<size_t>(Ret), 80);
+  }
+  // The input vector should be unchanged
+  for (auto I = 0; I < KnownSize; ++I) {
+    EXPECT_EQ(V[I], 1);
+  }
+  // Zero-out the unused position '0' of the instruction histogram, which is
+  // after the first 9 calculated values. Should the the same result.
+  V[9] = 0;
+  {
+    auto ER = Evaluator.evaluate();
+    EXPECT_TRUE(ER.hasValue());
+    float Ret = *ER->getTensorValue<float>(0);
+    EXPECT_EQ(static_cast<size_t>(Ret), 80);
+  }
+}
+
+// Test incorrect input setup
+TEST(TFUtilsTest, EvalError) {
+  // We use the ir2native model for test. We know it has one feature of
+  // dimension (1, 214)
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  const static int64_t KnownSize = 213;
+
+  TFModelEvaluator Evaluator(getModelPath(), InputNames, OutputName);
+  static const std::vector<int64_t> Dim{1, KnownSize};
+
+  EXPECT_TRUE(Evaluator.isValid());
+  Evaluator.initInput(0, TF_INT32, Dim);
+
+  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
+  // Fill it up with 1's, we know the output.
+  for (auto I = 0; I < KnownSize; ++I) {
+    V[I] = 1;
+  }
+  auto ER = Evaluator.evaluate();
+  EXPECT_FALSE(ER.hasValue());
+  EXPECT_FALSE(Evaluator.isValid());
+}

From a1fc26030a42e9639e678344a4c08014a8cbba3d Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Sun, 12 Jul 2020 21:49:17 -0700
Subject: [PATCH 119/771] [JITLink] Add a synchronous version of finalize for
 convenience.

This will be used by upcoming patches that implement indirection utils
(reentry, reentry trampolines, and stubs) on top of
JITLinkMemoryManager to unify in-process and cross-process lazy
compilation support.
---
 .../ExecutionEngine/JITLink/JITLinkMemoryManager.h    | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index ac5a593bb77ba..3bb56f4d96158 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -17,7 +17,9 @@
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Memory.h"
+
 #include <cstdint>
+#include <future>
 
 namespace llvm {
 namespace jitlink {
@@ -74,6 +76,15 @@ class JITLinkMemoryManager {
     /// working memory.
     virtual void finalizeAsync(FinalizeContinuation OnFinalize) = 0;
 
+    /// Calls finalizeAsync and waits for completion.
+    Error finalize() {
+      std::promise<Error> FinalizeResultP;
+      auto FinalizeResultF = FinalizeResultP.get_future();
+      finalizeAsync(
+          [&](Error Err) { FinalizeResultP.set_value(std::move(Err)); });
+      return FinalizeResultF.get();
+    }
+
     /// Should deallocate target memory.
     virtual Error deallocate() = 0;
   };

From fb7ef0bb0b9c6964387391a3e1759c0a3320df87 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Sun, 12 Jul 2020 21:56:45 -0700
Subject: [PATCH 120/771] [ORC] Generalize emit re-entry, stub, etc. APIs for
 working addr != link addr.

This patch generalizes the APIs for writing re-entry blocks, trampolines and
stubs to allow their final linked address to differ from the address of
their initial working memory. This will allow these routines to be used with
JITLinkMemoryManagers, which will in turn allow for unification of code paths
for in-process and cross-process lazy JITing.
---
 .../ExecutionEngine/Orc/IndirectionUtils.h    |  91 ++-
 .../llvm/ExecutionEngine/Orc/LazyReexports.h  |   8 +-
 .../llvm/ExecutionEngine/Orc/OrcABISupport.h  | 356 +++++-----
 .../Orc/OrcRemoteTargetServer.h               |  42 +-
 .../lib/ExecutionEngine/Orc/OrcABISupport.cpp | 641 ++++++++----------
 5 files changed, 575 insertions(+), 563 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
index b3e2bddd716bb..e0cfd8bf24099 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h
@@ -17,6 +17,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/ExecutionEngine/Orc/Core.h"
+#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Memory.h"
 #include "llvm/Support/Process.h"
@@ -139,8 +140,10 @@ template <typename ORCABI> class LocalTrampolinePool : public TrampolinePool {
       return;
     }
 
-    ORCABI::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()),
-                              &reenter, this);
+    ORCABI::writeResolverCode(static_cast<char *>(ResolverBlock.base()),
+                              pointerToJITTargetAddress(ResolverBlock.base()),
+                              pointerToJITTargetAddress(&reenter),
+                              pointerToJITTargetAddress(this));
 
     EC = sys::Memory::protectMappedMemory(ResolverBlock.getMemoryBlock(),
                                           sys::Memory::MF_READ |
@@ -166,14 +169,14 @@ template <typename ORCABI> class LocalTrampolinePool : public TrampolinePool {
         (sys::Process::getPageSizeEstimate() - ORCABI::PointerSize) /
         ORCABI::TrampolineSize;
 
-    uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base());
-    ORCABI::writeTrampolines(TrampolineMem, ResolverBlock.base(),
-                             NumTrampolines);
+    char *TrampolineMem = static_cast<char *>(TrampolineBlock.base());
+    ORCABI::writeTrampolines(
+        TrampolineMem, pointerToJITTargetAddress(TrampolineMem),
+        pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines);
 
     for (unsigned I = 0; I < NumTrampolines; ++I)
-      this->AvailableTrampolines.push_back(
-          static_cast<JITTargetAddress>(reinterpret_cast<uintptr_t>(
-              TrampolineMem + (I * ORCABI::TrampolineSize))));
+      this->AvailableTrampolines.push_back(pointerToJITTargetAddress(
+          TrampolineMem + (I * ORCABI::TrampolineSize)));
 
     if (auto EC = sys::Memory::protectMappedMemory(
                     TrampolineBlock.getMemoryBlock(),
@@ -302,6 +305,61 @@ class IndirectStubsManager {
   virtual void anchor();
 };
 
+template <typename ORCABI> class LocalIndirectStubsInfo {
+public:
+  LocalIndirectStubsInfo(unsigned NumStubs, sys::OwningMemoryBlock StubsMem)
+      : NumStubs(NumStubs), StubsMem(std::move(StubsMem)) {}
+
+  static Expected<LocalIndirectStubsInfo> create(unsigned MinStubs,
+                                                 unsigned PageSize) {
+    auto ISAS = getIndirectStubsBlockSizes<ORCABI>(MinStubs, PageSize);
+
+    assert((ISAS.StubBytes % PageSize == 0) &&
+           "StubBytes is not a page size multiple");
+    uint64_t PointerAlloc = alignTo(ISAS.PointerBytes, PageSize);
+
+    // Allocate memory for stubs and pointers in one call.
+    std::error_code EC;
+    auto StubsAndPtrsMem =
+        sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
+            ISAS.StubBytes + PointerAlloc, nullptr,
+            sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
+    if (EC)
+      return errorCodeToError(EC);
+
+    sys::MemoryBlock StubsBlock(StubsAndPtrsMem.base(), ISAS.StubBytes);
+    auto StubsBlockMem = static_cast<char *>(StubsAndPtrsMem.base());
+    auto PtrBlockAddress =
+        pointerToJITTargetAddress(StubsBlockMem) + ISAS.StubBytes;
+
+    ORCABI::writeIndirectStubsBlock(StubsBlockMem,
+                                    pointerToJITTargetAddress(StubsBlockMem),
+                                    PtrBlockAddress, ISAS.NumStubs);
+
+    if (auto EC = sys::Memory::protectMappedMemory(
+            StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
+      return errorCodeToError(EC);
+
+    return LocalIndirectStubsInfo(ISAS.NumStubs, std::move(StubsAndPtrsMem));
+  }
+
+  unsigned getNumStubs() const { return NumStubs; }
+
+  void *getStub(unsigned Idx) const {
+    return static_cast<char *>(StubsMem.base()) + Idx * ORCABI::StubSize;
+  }
+
+  void **getPtr(unsigned Idx) const {
+    char *PtrsBase =
+        static_cast<char *>(StubsMem.base()) + NumStubs * ORCABI::StubSize;
+    return reinterpret_cast<void **>(PtrsBase) + Idx;
+  }
+
+private:
+  unsigned NumStubs = 0;
+  sys::OwningMemoryBlock StubsMem;
+};
+
 /// IndirectStubsManager implementation for the host architecture, e.g.
 ///        OrcX86_64. (See OrcArchitectureSupport.h).
 template <typename TargetT>
@@ -379,13 +437,13 @@ class LocalIndirectStubsManager : public IndirectStubsManager {
 
     unsigned NewStubsRequired = NumStubs - FreeStubs.size();
     unsigned NewBlockId = IndirectStubsInfos.size();
-    typename TargetT::IndirectStubsInfo ISI;
-    if (auto Err =
-            TargetT::emitIndirectStubsBlock(ISI, NewStubsRequired, nullptr))
-      return Err;
-    for (unsigned I = 0; I < ISI.getNumStubs(); ++I)
+    auto ISI =
+        LocalIndirectStubsInfo<TargetT>::create(NewStubsRequired, PageSize);
+    if (!ISI)
+      return ISI.takeError();
+    for (unsigned I = 0; I < ISI->getNumStubs(); ++I)
       FreeStubs.push_back(std::make_pair(NewBlockId, I));
-    IndirectStubsInfos.push_back(std::move(ISI));
+    IndirectStubsInfos.push_back(std::move(*ISI));
     return Error::success();
   }
 
@@ -394,12 +452,13 @@ class LocalIndirectStubsManager : public IndirectStubsManager {
     auto Key = FreeStubs.back();
     FreeStubs.pop_back();
     *IndirectStubsInfos[Key.first].getPtr(Key.second) =
-        reinterpret_cast<void *>(static_cast<uintptr_t>(InitAddr));
+        jitTargetAddressToPointer<void *>(InitAddr);
     StubIndexes[StubName] = std::make_pair(Key, StubFlags);
   }
 
+  unsigned PageSize = sys::Process::getPageSizeEstimate();
   std::mutex StubsMutex;
-  std::vector<typename TargetT::IndirectStubsInfo> IndirectStubsInfos;
+  std::vector<LocalIndirectStubsInfo<TargetT>> IndirectStubsInfos;
   using StubKey = std::pair<uint16_t, uint16_t>;
   std::vector<StubKey> FreeStubs;
   StringMap<std::pair<StubKey, JITSymbolFlags>> StubIndexes;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
index 01a2b9712e9a4..7972ed4300487 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -46,6 +46,10 @@ class LazyCallThroughManager {
   getCallThroughTrampoline(JITDylib &SourceJD, SymbolStringPtr SymbolName,
                            NotifyResolvedFunction NotifyResolved);
 
+  void resolveTrampolineLandingAddress(
+      JITTargetAddress TrampolineAddr,
+      TrampolinePool::NotifyLandingResolvedFunction NotifyLandingResolved);
+
 protected:
   using NotifyLandingResolvedFunction =
       TrampolinePool::NotifyLandingResolvedFunction;
@@ -63,10 +67,6 @@ class LazyCallThroughManager {
   Expected<ReexportsEntry> findReexport(JITTargetAddress TrampolineAddr);
   Error notifyResolved(JITTargetAddress TrampolineAddr,
                        JITTargetAddress ResolvedAddr);
-  void resolveTrampolineLandingAddress(
-      JITTargetAddress TrampolineAddr,
-      NotifyLandingResolvedFunction NotifyLandingResolved);
-
   void setTrampolinePool(std::unique_ptr<TrampolinePool> TP) {
     this->TP = std::move(TP);
   }
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 2e58ddd75d318..a41d4b0777f82 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -20,13 +20,34 @@
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/Memory.h"
+#include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cstdint>
+#include <optional>
 
 namespace llvm {
 namespace orc {
 
+struct IndirectStubsAllocationSizes {
+  uint64_t StubBytes = 0;
+  uint64_t PointerBytes = 0;
+  unsigned NumStubs = 0;
+};
+
+template <typename ORCABI>
+IndirectStubsAllocationSizes
+getIndirectStubsBlockSizes(unsigned MinStubs, unsigned RoundToMultipleOf = 0) {
+  assert(
+      (RoundToMultipleOf == 0 || (RoundToMultipleOf % ORCABI::StubSize == 0)) &&
+      "RoundToMultipleOf is not a multiple of stub size");
+  uint64_t StubBytes = MinStubs * ORCABI::StubSize;
+  if (RoundToMultipleOf)
+    StubBytes = alignTo(StubBytes, RoundToMultipleOf);
+  unsigned NumStubs = StubBytes / ORCABI::StubSize;
+  uint64_t PointerBytes = NumStubs * ORCABI::PointerSize;
+  return {StubBytes, PointerBytes, NumStubs};
+}
+
 /// Generic ORC ABI support.
 ///
 /// This class can be substituted as the target architecture support class for
@@ -35,113 +56,72 @@ namespace orc {
 /// will result in execution of an llvm_unreachable.
 class OrcGenericABI {
 public:
-  static const unsigned PointerSize = sizeof(uintptr_t);
-  static const unsigned TrampolineSize = 1;
-  static const unsigned ResolverCodeSize = 1;
-
-  using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
-                                            void *TrampolineId);
-
-  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
-                                void *CallbackMgr) {
+  static constexpr unsigned PointerSize = sizeof(uintptr_t);
+  static constexpr unsigned TrampolineSize = 1;
+  static constexpr unsigned StubSize = 1;
+  static constexpr unsigned StubToPointerMaxDisplacement = 1;
+  static constexpr unsigned ResolverCodeSize = 1;
+
+  static void writeResolverCode(char *ResolveWorkingMem,
+                                JITTargetAddress ResolverTargetAddr,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress ReentryCtxAddr) {
     llvm_unreachable("writeResolverCode is not supported by the generic host "
                      "support class");
   }
 
-  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+  static void writeTrampolines(char *TrampolineBlockWorkingMem,
+                               JITTargetAddress TrampolineBlockTragetAddr,
+                               JITTargetAddress ResolverAddr,
                                unsigned NumTrampolines) {
     llvm_unreachable("writeTrampolines is not supported by the generic host "
                      "support class");
   }
 
-  class IndirectStubsInfo {
-  public:
-    const static unsigned StubSize = 1;
-
-    unsigned getNumStubs() const { llvm_unreachable("Not supported"); }
-    void *getStub(unsigned Idx) const { llvm_unreachable("Not supported"); }
-    void **getPtr(unsigned Idx) const { llvm_unreachable("Not supported"); }
-  };
-
-  static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
-                                      unsigned MinStubs, void *InitialPtrVal) {
-    llvm_unreachable("emitIndirectStubsBlock is not supported by the generic "
-                     "host support class");
-  }
-};
-
-/// Provide information about stub blocks generated by the
-/// makeIndirectStubsBlock function.
-template <unsigned StubSizeVal> class GenericIndirectStubsInfo {
-public:
-  const static unsigned StubSize = StubSizeVal;
-
-  GenericIndirectStubsInfo() = default;
-  GenericIndirectStubsInfo(unsigned NumStubs, sys::OwningMemoryBlock StubsMem)
-      : NumStubs(NumStubs), StubsMem(std::move(StubsMem)) {}
-  GenericIndirectStubsInfo(GenericIndirectStubsInfo &&Other)
-      : NumStubs(Other.NumStubs), StubsMem(std::move(Other.StubsMem)) {
-    Other.NumStubs = 0;
-  }
-
-  GenericIndirectStubsInfo &operator=(GenericIndirectStubsInfo &&Other) {
-    NumStubs = Other.NumStubs;
-    Other.NumStubs = 0;
-    StubsMem = std::move(Other.StubsMem);
-    return *this;
-  }
-
-  /// Number of stubs in this block.
-  unsigned getNumStubs() const { return NumStubs; }
-
-  /// Get a pointer to the stub at the given index, which must be in
-  /// the range 0 .. getNumStubs() - 1.
-  void *getStub(unsigned Idx) const {
-    return static_cast<char *>(StubsMem.base()) + Idx * StubSize;
+  static void writeIndirectStubsBlock(
+      char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+      JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+    llvm_unreachable(
+        "writeIndirectStubsBlock is not supported by the generic host "
+        "support class");
   }
-
-  /// Get a pointer to the implementation-pointer at the given index,
-  /// which must be in the range 0 .. getNumStubs() - 1.
-  void **getPtr(unsigned Idx) const {
-    char *PtrsBase = static_cast<char *>(StubsMem.base()) + NumStubs * StubSize;
-    return reinterpret_cast<void **>(PtrsBase) + Idx;
-  }
-
-private:
-  unsigned NumStubs = 0;
-  sys::OwningMemoryBlock StubsMem;
 };
 
 class OrcAArch64 {
 public:
-  static const unsigned PointerSize = 8;
-  static const unsigned TrampolineSize = 12;
-  static const unsigned ResolverCodeSize = 0x120;
-
-  using IndirectStubsInfo = GenericIndirectStubsInfo<8>;
-
-  using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
-                                            void *TrampolineId);
+  static constexpr unsigned PointerSize = 8;
+  static constexpr unsigned TrampolineSize = 12;
+  static constexpr unsigned StubSize = 8;
+  static constexpr unsigned StubToPointerMaxDisplacement = 1U << 27;
+  static constexpr unsigned ResolverCodeSize = 0x120;
 
   /// Write the resolver code into the given memory. The user is
   /// responsible for allocating the memory and setting permissions.
-  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
-                                void *CallbackMgr);
+  ///
+  /// ReentryFnAddr should be the address of a function whose signature matches
+  /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
+  /// argument of writeResolverCode will be passed as the second argument to
+  /// the function at ReentryFnAddr.
+  static void writeResolverCode(char *ResolverWorkingMem,
+                                JITTargetAddress ResolverTargetAddress,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress RentryCtxAddr);
 
   /// Write the requested number of trampolines into the given memory,
   /// which must be big enough to hold 1 pointer, plus NumTrampolines
   /// trampolines.
-  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+  static void writeTrampolines(char *TrampolineBlockWorkingMem,
+                               JITTargetAddress TrampolineBlockTargetAddress,
+                               JITTargetAddress ResolverAddr,
                                unsigned NumTrampolines);
 
-  /// Emit at least MinStubs worth of indirect call stubs, rounded out to
-  /// the nearest page size.
-  ///
-  ///   E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
-  /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
-  /// will return a block of 1024 (2-pages worth).
-  static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
-                                      unsigned MinStubs, void *InitialPtrVal);
+  /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
+  /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
+  /// Nth stub using the Nth pointer in memory starting at
+  /// PointersBlockTargetAddress.
+  static void writeIndirectStubsBlock(
+      char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+      JITTargetAddress PointersBlockTargetAddress, unsigned MinStubs);
 };
 
 /// X86_64 code that's common to all ABIs.
@@ -149,25 +129,26 @@ class OrcAArch64 {
 /// X86_64 supports lazy JITing.
 class OrcX86_64_Base {
 public:
-  static const unsigned PointerSize = 8;
-  static const unsigned TrampolineSize = 8;
-
-  using IndirectStubsInfo = GenericIndirectStubsInfo<8>;
+  static constexpr unsigned PointerSize = 8;
+  static constexpr unsigned TrampolineSize = 8;
+  static constexpr unsigned StubSize = 8;
+  static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
 
   /// Write the requested number of trampolines into the given memory,
   /// which must be big enough to hold 1 pointer, plus NumTrampolines
   /// trampolines.
-  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+  static void writeTrampolines(char *TrampolineBlockWorkingMem,
+                               JITTargetAddress TrampolineBlockTargetAddress,
+                               JITTargetAddress ResolverAddr,
                                unsigned NumTrampolines);
 
-  /// Emit at least MinStubs worth of indirect call stubs, rounded out to
-  /// the nearest page size.
-  ///
-  ///   E.g. Asking for 4 stubs on x86-64, where stubs are 8-bytes, with 4k
-  /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
-  /// will return a block of 1024 (2-pages worth).
-  static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
-                                      unsigned MinStubs, void *InitialPtrVal);
+  /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
+  /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
+  /// Nth stub using the Nth pointer in memory starting at
+  /// PointersBlockTargetAddress.
+  static void writeIndirectStubsBlock(
+      char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+      JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
 };
 
 /// X86_64 support for SysV ABI (Linux, MacOSX).
@@ -175,15 +156,19 @@ class OrcX86_64_Base {
 /// X86_64_SysV supports lazy JITing.
 class OrcX86_64_SysV : public OrcX86_64_Base {
 public:
-  static const unsigned ResolverCodeSize = 0x6C;
-
-  using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
-                                            void *TrampolineId);
+  static constexpr unsigned ResolverCodeSize = 0x6C;
 
   /// Write the resolver code into the given memory. The user is
   /// responsible for allocating the memory and setting permissions.
-  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
-                                void *CallbackMgr);
+  ///
+  /// ReentryFnAddr should be the address of a function whose signature matches
+  /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
+  /// argument of writeResolverCode will be passed as the second argument to
+  /// the function at ReentryFnAddr.
+  static void writeResolverCode(char *ResolverWorkingMem,
+                                JITTargetAddress ResolverTargetAddress,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress ReentryCtxAddr);
 };
 
 /// X86_64 support for Win32.
@@ -191,15 +176,19 @@ class OrcX86_64_SysV : public OrcX86_64_Base {
 /// X86_64_Win32 supports lazy JITing.
 class OrcX86_64_Win32 : public OrcX86_64_Base {
 public:
-  static const unsigned ResolverCodeSize = 0x74;
-
-  using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
-                                            void *TrampolineId);
+  static constexpr unsigned ResolverCodeSize = 0x74;
 
   /// Write the resolver code into the given memory. The user is
   /// responsible for allocating the memory and setting permissions.
-  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
-                                void *CallbackMgr);
+  ///
+  /// ReentryFnAddr should be the address of a function whose signature matches
+  /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
+  /// argument of writeResolverCode will be passed as the second argument to
+  /// the function at ReentryFnAddr.
+  static void writeResolverCode(char *ResolverWorkingMem,
+                                JITTargetAddress ResolverTargetAddress,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress ReentryCtxAddr);
 };
 
 /// I386 support.
@@ -207,34 +196,39 @@ class OrcX86_64_Win32 : public OrcX86_64_Base {
 /// I386 supports lazy JITing.
 class OrcI386 {
 public:
-  static const unsigned PointerSize = 4;
-  static const unsigned TrampolineSize = 8;
-  static const unsigned ResolverCodeSize = 0x4a;
-
-  using IndirectStubsInfo = GenericIndirectStubsInfo<8>;
-
-  using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
-                                            void *TrampolineId);
+  static constexpr unsigned PointerSize = 4;
+  static constexpr unsigned TrampolineSize = 8;
+  static constexpr unsigned StubSize = 8;
+  static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
+  static constexpr unsigned ResolverCodeSize = 0x4a;
 
   /// Write the resolver code into the given memory. The user is
   /// responsible for allocating the memory and setting permissions.
-  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
-                                void *CallbackMgr);
+  ///
+  /// ReentryFnAddr should be the address of a function whose signature matches
+  /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
+  /// argument of writeResolverCode will be passed as the second argument to
+  /// the function at ReentryFnAddr.
+  static void writeResolverCode(char *ResolverWorkingMem,
+                                JITTargetAddress ResolverTargetAddress,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress ReentryCtxAddr);
 
   /// Write the requested number of trampolines into the given memory,
   /// which must be big enough to hold 1 pointer, plus NumTrampolines
   /// trampolines.
-  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+  static void writeTrampolines(char *TrampolineBlockWorkingMem,
+                               JITTargetAddress TrampolineBlockTargetAddress,
+                               JITTargetAddress ResolverAddr,
                                unsigned NumTrampolines);
 
-  /// Emit at least MinStubs worth of indirect call stubs, rounded out to
-  /// the nearest page size.
-  ///
-  ///   E.g. Asking for 4 stubs on i386, where stubs are 8-bytes, with 4k
-  /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
-  /// will return a block of 1024 (2-pages worth).
-  static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
-                                      unsigned MinStubs, void *InitialPtrVal);
+  /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
+  /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
+  /// Nth stub using the Nth pointer in memory starting at
+  /// PointersBlockTargetAddress.
+  static void writeIndirectStubsBlock(
+      char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+      JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
 };
 
 // @brief Mips32 support.
@@ -242,41 +236,61 @@ class OrcI386 {
 // Mips32 supports lazy JITing.
 class OrcMips32_Base {
 public:
-  static const unsigned PointerSize = 4;
-  static const unsigned TrampolineSize = 20;
-  static const unsigned ResolverCodeSize = 0xfc;
-  using IndirectStubsInfo = GenericIndirectStubsInfo<16>;
+  static constexpr unsigned PointerSize = 4;
+  static constexpr unsigned TrampolineSize = 20;
+  static constexpr unsigned StubSize = 8;
+  static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
+  static constexpr unsigned ResolverCodeSize = 0xfc;
 
-  using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
-                                            void *TrampolineId);
   /// Write the requested number of trampolines into the given memory,
   /// which must be big enough to hold 1 pointer, plus NumTrampolines
   /// trampolines.
-  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,unsigned NumTrampolines);
+  static void writeTrampolines(char *TrampolineBlockWorkingMem,
+                               JITTargetAddress TrampolineBlockTargetAddress,
+                               JITTargetAddress ResolverAddr,
+                               unsigned NumTrampolines);
 
   /// Write the resolver code into the given memory. The user is
   /// responsible for allocating the memory and setting permissions.
-  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr, bool isBigEndian);
-  /// Emit at least MinStubs worth of indirect call stubs, rounded out to
-  /// the nearest page size.
   ///
-  ///   E.g. Asking for 4 stubs on Mips32, where stubs are 8-bytes, with 4k
-  /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
-  /// will return a block of 1024 (2-pages worth).
-  static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,unsigned MinStubs, void *InitialPtrVal);
+  /// ReentryFnAddr should be the address of a function whose signature matches
+  /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
+  /// argument of writeResolverCode will be passed as the second argument to
+  /// the function at ReentryFnAddr.
+  static void writeResolverCode(char *ResolverBlockWorkingMem,
+                                JITTargetAddress ResolverBlockTargetAddress,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress ReentryCtxAddr,
+                                bool isBigEndian);
+  /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
+  /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
+  /// Nth stub using the Nth pointer in memory starting at
+  /// PointersBlockTargetAddress.
+  static void writeIndirectStubsBlock(
+      char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+      JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
 };
 
-
 class OrcMips32Le : public OrcMips32_Base {
 public:
-  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr)
-  { OrcMips32_Base::writeResolverCode(ResolveMem, Reentry, CallbackMgr, false); }
+  static void writeResolverCode(char *ResolverWorkingMem,
+                                JITTargetAddress ResolverTargetAddress,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress ReentryCtxAddr) {
+    OrcMips32_Base::writeResolverCode(ResolverWorkingMem, ResolverTargetAddress,
+                                      ReentryFnAddr, ReentryCtxAddr, false);
+  }
 };
 
 class OrcMips32Be : public OrcMips32_Base {
 public:
-  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr)
-  { OrcMips32_Base::writeResolverCode(ResolveMem, Reentry, CallbackMgr, true); }
+  static void writeResolverCode(char *ResolverWorkingMem,
+                                JITTargetAddress ResolverTargetAddress,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress ReentryCtxAddr) {
+    OrcMips32_Base::writeResolverCode(ResolverWorkingMem, ResolverTargetAddress,
+                                      ReentryFnAddr, ReentryCtxAddr, true);
+  }
 };
 
 // @brief Mips64 support.
@@ -284,31 +298,41 @@ class OrcMips32Be : public OrcMips32_Base {
 // Mips64 supports lazy JITing.
 class OrcMips64 {
 public:
-  static const unsigned PointerSize = 8;
-  static const unsigned TrampolineSize = 40;
-  static const unsigned ResolverCodeSize = 0x120;
+  static constexpr unsigned PointerSize = 8;
+  static constexpr unsigned TrampolineSize = 40;
+  static constexpr unsigned StubSize = 32;
+  static constexpr unsigned StubToPointerMaxDisplacement = 1 << 31;
+  static constexpr unsigned ResolverCodeSize = 0x120;
 
-  using IndirectStubsInfo = GenericIndirectStubsInfo<32>;
-  using JITReentryFn = JITTargetAddress (*)(void *CallbackMgr,
-                                            void *TrampolineId);
   /// Write the resolver code into the given memory. The user is
   /// responsible for allocating the memory and setting permissions.
-  static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,void *CallbackMgr);
+  ///
+  /// ReentryFnAddr should be the address of a function whose signature matches
+  /// void* (*)(void *TrampolineAddr, void *ReentryCtxAddr). The ReentryCtxAddr
+  /// argument of writeResolverCode will be passed as the second argument to
+  /// the function at ReentryFnAddr.
+  static void writeResolverCode(char *ResolverWorkingMem,
+                                JITTargetAddress ResolverTargetAddress,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress ReentryCtxAddr);
 
   /// Write the requested number of trampolines into the given memory,
   /// which must be big enough to hold 1 pointer, plus NumTrampolines
   /// trampolines.
-  static void writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,unsigned NumTrampolines);
-
-  /// Emit at least MinStubs worth of indirect call stubs, rounded out to
-  /// the nearest page size.
-  ///
-  ///   E.g. Asking for 4 stubs on Mips64, where stubs are 8-bytes, with 4k
-  /// pages will return a block of 512 stubs (4096 / 8 = 512). Asking for 513
-  /// will return a block of 1024 (2-pages worth).
-  static Error emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,unsigned MinStubs, void *InitialPtrVal);
+  static void writeTrampolines(char *TrampolineBlockWorkingMem,
+                               JITTargetAddress TrampolineBlockTargetAddress,
+                               JITTargetAddress ResolverFnAddr,
+                               unsigned NumTrampolines);
+  /// Write NumStubs indirect stubs to working memory at StubsBlockWorkingMem.
+  /// Stubs will be written as if linked at StubsBlockTargetAddress, with the
+  /// Nth stub using the Nth pointer in memory starting at
+  /// PointersBlockTargetAddress.
+  static void writeIndirectStubsBlock(
+      char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+      JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs);
 };
 
- } // end namespace orc
- } // end namespace llvm
+} // end namespace orc
+} // end namespace llvm
+
 #endif // LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
index ac1df847cf7e2..50c155d77db17 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h
@@ -15,6 +15,7 @@
 #define LLVM_EXECUTIONENGINE_ORC_ORCREMOTETARGETSERVER_H
 
 #include "llvm/ExecutionEngine/JITSymbol.h"
+#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
 #include "llvm/ExecutionEngine/Orc/OrcError.h"
 #include "llvm/ExecutionEngine/Orc/OrcRemoteTargetRPCAPI.h"
 #include "llvm/Support/Debug.h"
@@ -262,19 +263,17 @@ class OrcRemoteTargetServer
       return errorCodeToError(
                orcError(OrcErrorCode::RemoteIndirectStubsOwnerDoesNotExist));
 
-    typename TargetT::IndirectStubsInfo IS;
-    if (auto Err =
-            TargetT::emitIndirectStubsBlock(IS, NumStubsRequired, nullptr))
-      return std::move(Err);
+    auto IS = LocalIndirectStubsInfo<TargetT>::create(
+        NumStubsRequired, sys::Process::getPageSizeEstimate());
+    if (!IS)
+      return IS.takeError();
 
-    JITTargetAddress StubsBase = static_cast<JITTargetAddress>(
-        reinterpret_cast<uintptr_t>(IS.getStub(0)));
-    JITTargetAddress PtrsBase = static_cast<JITTargetAddress>(
-        reinterpret_cast<uintptr_t>(IS.getPtr(0)));
-    uint32_t NumStubsEmitted = IS.getNumStubs();
+    JITTargetAddress StubsBase = pointerToJITTargetAddress(IS->getStub(0));
+    JITTargetAddress PtrsBase = pointerToJITTargetAddress(IS->getPtr(0));
+    uint32_t NumStubsEmitted = IS->getNumStubs();
 
     auto &BlockList = StubOwnerItr->second;
-    BlockList.push_back(std::move(IS));
+    BlockList.push_back(std::move(*IS));
 
     return std::make_tuple(StubsBase, PtrsBase, NumStubsEmitted);
   }
@@ -287,8 +286,10 @@ class OrcRemoteTargetServer
     if (EC)
       return errorCodeToError(EC);
 
-    TargetT::writeResolverCode(static_cast<uint8_t *>(ResolverBlock.base()),
-                               &reenter, this);
+    TargetT::writeResolverCode(static_cast<char *>(ResolverBlock.base()),
+                               pointerToJITTargetAddress(ResolverBlock.base()),
+                               pointerToJITTargetAddress(&reenter),
+                               pointerToJITTargetAddress(this));
 
     return errorCodeToError(sys::Memory::protectMappedMemory(
         ResolverBlock.getMemoryBlock(),
@@ -308,9 +309,10 @@ class OrcRemoteTargetServer
         (sys::Process::getPageSizeEstimate() - TargetT::PointerSize) /
         TargetT::TrampolineSize;
 
-    uint8_t *TrampolineMem = static_cast<uint8_t *>(TrampolineBlock.base());
-    TargetT::writeTrampolines(TrampolineMem, ResolverBlock.base(),
-                              NumTrampolines);
+    char *TrampolineMem = static_cast<char *>(TrampolineBlock.base());
+    TargetT::writeTrampolines(
+        TrampolineMem, pointerToJITTargetAddress(TrampolineMem),
+        pointerToJITTargetAddress(ResolverBlock.base()), NumTrampolines);
 
     EC = sys::Memory::protectMappedMemory(TrampolineBlock.getMemoryBlock(),
                                           sys::Memory::MF_READ |
@@ -318,10 +320,8 @@ class OrcRemoteTargetServer
 
     TrampolineBlocks.push_back(std::move(TrampolineBlock));
 
-    auto TrampolineBaseAddr = static_cast<JITTargetAddress>(
-        reinterpret_cast<uintptr_t>(TrampolineMem));
-
-    return std::make_tuple(TrampolineBaseAddr, NumTrampolines);
+    return std::make_tuple(pointerToJITTargetAddress(TrampolineMem),
+                           NumTrampolines);
   }
 
   Expected<JITTargetAddress> handleGetSymbolAddress(const std::string &Name) {
@@ -337,7 +337,7 @@ class OrcRemoteTargetServer
     uint32_t PointerSize = TargetT::PointerSize;
     uint32_t PageSize = sys::Process::getPageSizeEstimate();
     uint32_t TrampolineSize = TargetT::TrampolineSize;
-    uint32_t IndirectStubSize = TargetT::IndirectStubsInfo::StubSize;
+    uint32_t IndirectStubSize = TargetT::StubSize;
     LLVM_DEBUG(dbgs() << "  Remote info:\n"
                       << "    triple             = '" << ProcessTriple << "'\n"
                       << "    pointer size       = " << PointerSize << "\n"
@@ -433,7 +433,7 @@ class OrcRemoteTargetServer
   SymbolLookupFtor SymbolLookup;
   EHFrameRegistrationFtor EHFramesRegister, EHFramesDeregister;
   std::map<ResourceIdMgr::ResourceId, Allocator> Allocators;
-  using ISBlockOwnerList = std::vector<typename TargetT::IndirectStubsInfo>;
+  using ISBlockOwnerList = std::vector<LocalIndirectStubsInfo<TargetT>>;
   std::map<ResourceIdMgr::ResourceId, ISBlockOwnerList> IndirectStubsOwners;
   sys::OwningMemoryBlock ResolverBlock;
   std::vector<sys::OwningMemoryBlock> TrampolineBlocks;
diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index 8ed23de419d1e..5f89d91ef9948 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -7,13 +7,46 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
+#include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/Process.h"
+#include "llvm/Support/raw_ostream.h"
+
+#define DEBUG_TYPE "orc"
+
+using namespace llvm;
+
+template <typename ORCABI>
+bool stubAndPointerRangesOk(JITTargetAddress StubBlockAddr,
+                            JITTargetAddress PointerBlockAddr,
+                            unsigned NumStubs) {
+  constexpr unsigned MaxDisp = ORCABI::StubToPointerMaxDisplacement;
+  JITTargetAddress FirstStub = StubBlockAddr;
+  JITTargetAddress LastStub = FirstStub + ((NumStubs - 1) * ORCABI::StubSize);
+  JITTargetAddress FirstPointer = PointerBlockAddr;
+  JITTargetAddress LastPointer =
+      FirstPointer + ((NumStubs - 1) * ORCABI::StubSize);
+
+  if (FirstStub < FirstPointer) {
+    if (LastStub >= FirstPointer)
+      return false; // Ranges overlap.
+    return (FirstPointer - FirstStub <= MaxDisp) &&
+           (LastPointer - LastStub <= MaxDisp); // out-of-range.
+  }
+
+  if (LastPointer >= FirstStub)
+    return false; // Ranges overlap.
+
+  return (FirstStub - FirstPointer <= MaxDisp) &&
+         (LastStub - LastPointer <= MaxDisp);
+}
 
 namespace llvm {
 namespace orc {
 
-void OrcAArch64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
-                                   void *CallbackMgr) {
+void OrcAArch64::writeResolverCode(char *ResolverWorkingMem,
+                                   JITTargetAddress ResolverTargetAddress,
+                                   JITTargetAddress ReentryFnAddr,
+                                   JITTargetAddress ReentryCtxAddr) {
 
   const uint32_t ResolverCode[] = {
     // resolver_entry:
@@ -48,7 +81,7 @@ void OrcAArch64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
     0xadbf17e4,        // 0x070:  stp   q4,  q5, [sp, #-32]!
     0xadbf0fe2,        // 0x074:  stp   q2,  q3, [sp, #-32]!
     0xadbf07e0,        // 0x078:  stp   q0,  q1, [sp, #-32]!
-    0x580004e0,        // 0x07c:  ldr   x0, Lcallbackmgr
+    0x580004e0,        // 0x07c:  ldr   x0, Lreentry_ctx_ptr
     0xaa1e03e1,        // 0x080:  mov   x1, x30
     0xd1003021,        // 0x084:  sub   x1,  x1, #12
     0x58000442,        // 0x088:  ldr   x2, Lreentry_fn_ptr
@@ -87,43 +120,47 @@ void OrcAArch64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
     0xd65f0220,        // 0x10c:  ret  x17
     0x01234567,        // 0x110:  Lreentry_fn_ptr:
     0xdeadbeef,        // 0x114:      .quad 0
-    0x98765432,        // 0x118:  Lcallbackmgr:
+    0x98765432,        // 0x118:  Lreentry_ctx_ptr:
     0xcafef00d         // 0x11c:      .quad 0
   };
 
   const unsigned ReentryFnAddrOffset = 0x110;
-  const unsigned CallbackMgrAddrOffset = 0x118;
+  const unsigned ReentryCtxAddrOffset = 0x118;
 
-  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
-  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
-  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
-         sizeof(CallbackMgr));
+  memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
+         sizeof(uint64_t));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
+         sizeof(uint64_t));
 }
 
-void OrcAArch64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+void OrcAArch64::writeTrampolines(char *TrampolineBlockWorkingMem,
+                                  JITTargetAddress TrampolineBlockTargetAddress,
+                                  JITTargetAddress ResolverAddr,
                                   unsigned NumTrampolines) {
 
   unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
 
-  memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void *));
+  memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
+         sizeof(uint64_t));
 
   // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so
   // subtract 32-bits.
   OffsetToPtr -= 4;
 
-  uint32_t *Trampolines = reinterpret_cast<uint32_t *>(TrampolineMem);
+  uint32_t *Trampolines =
+      reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
 
   for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
     Trampolines[3 * I + 0] = 0xaa1e03f1;                      // mov x17, x30
     Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // adr x16, Lptr
     Trampolines[3 * I + 2] = 0xd63f0200;                      // blr x16
   }
-
 }
 
-Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
-                                         unsigned MinStubs,
-                                         void *InitialPtrVal) {
+void OrcAArch64::writeIndirectStubsBlock(
+    char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+    JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
   // Stub format is:
   //
   // .section __orc_stubs
@@ -144,68 +181,41 @@ Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   //
   // ...
 
-  const unsigned StubSize = IndirectStubsInfo::StubSize;
-
-  // Emit at least MinStubs, rounded up to fill the pages allocated.
-  static const unsigned PageSize = sys::Process::getPageSizeEstimate();
-  unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
-  unsigned NumStubs = (NumPages * PageSize) / StubSize;
-
-  // Allocate memory for stubs and pointers in one call.
-  std::error_code EC;
-  auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
-      2 * NumPages * PageSize, nullptr,
-      sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
-
-  if (EC)
-    return errorCodeToError(EC);
-
-  // Create separate MemoryBlocks representing the stubs and pointers.
-  sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
-  sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
-                                 NumPages * PageSize,
-                             NumPages * PageSize);
-
-  // Populate the stubs page stubs and mark it executable.
-  uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlock.base());
-  uint64_t PtrOffsetField = static_cast<uint64_t>(NumPages * PageSize)
-                            << 3;
+  static_assert(StubSize == PointerSize,
+                "Pointer and stub size must match for algorithm below");
+  assert(stubAndPointerRangesOk<OrcAArch64>(
+             StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
+         "PointersBlock is out of range");
+  uint64_t PtrDisplacement =
+      PointersBlockTargetAddress - StubsBlockTargetAddress;
+  uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
+  uint64_t PtrOffsetField = PtrDisplacement << 3;
 
   for (unsigned I = 0; I < NumStubs; ++I)
     Stub[I] = 0xd61f020058000010 | PtrOffsetField;
-
-  if (auto EC = sys::Memory::protectMappedMemory(
-          StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
-    return errorCodeToError(EC);
-
-  // Initialize all pointers to point at FailureAddress.
-  void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
-  for (unsigned I = 0; I < NumStubs; ++I)
-    Ptr[I] = InitialPtrVal;
-
-  StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
-
-  return Error::success();
 }
 
-void OrcX86_64_Base::writeTrampolines(uint8_t *TrampolineMem,
-                                      void *ResolverAddr,
-                                      unsigned NumTrampolines) {
+void OrcX86_64_Base::writeTrampolines(
+    char *TrampolineBlockWorkingMem,
+    JITTargetAddress TrampolineBlockTargetAddress,
+    JITTargetAddress ResolverAddr, unsigned NumTrampolines) {
 
   unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
 
-  memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void *));
+  memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
+         sizeof(uint64_t));
 
-  uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineMem);
+  uint64_t *Trampolines =
+      reinterpret_cast<uint64_t *>(TrampolineBlockWorkingMem);
   uint64_t CallIndirPCRel = 0xf1c40000000015ff;
 
   for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize)
     Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16);
 }
 
-Error OrcX86_64_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
-                                             unsigned MinStubs,
-                                             void *InitialPtrVal) {
+void OrcX86_64_Base::writeIndirectStubsBlock(
+    char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+    JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
   // Stub format is:
   //
   // .section __orc_stubs
@@ -226,52 +236,28 @@ Error OrcX86_64_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   //
   // ...
 
-  const unsigned StubSize = IndirectStubsInfo::StubSize;
-
-  // Emit at least MinStubs, rounded up to fill the pages allocated.
-  static const unsigned PageSize = sys::Process::getPageSizeEstimate();
-  unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
-  unsigned NumStubs = (NumPages * PageSize) / StubSize;
-
-  // Allocate memory for stubs and pointers in one call.
-  std::error_code EC;
-  auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
-      2 * NumPages * PageSize, nullptr,
-      sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
-
-  if (EC)
-    return errorCodeToError(EC);
-
-  // Create separate MemoryBlocks representing the stubs and pointers.
-  sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
-  sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
-                                 NumPages * PageSize,
-                             NumPages * PageSize);
-
   // Populate the stubs page stubs and mark it executable.
-  uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlock.base());
-  uint64_t PtrOffsetField = static_cast<uint64_t>(NumPages * PageSize - 6)
-                            << 16;
+  static_assert(StubSize == PointerSize,
+                "Pointer and stub size must match for algorithm below");
+  assert(stubAndPointerRangesOk<OrcX86_64_Base>(
+             StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
+         "PointersBlock is out of range");
+  uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
+  uint64_t PtrOffsetField =
+      (PointersBlockTargetAddress - StubsBlockTargetAddress - 6) << 16;
   for (unsigned I = 0; I < NumStubs; ++I)
     Stub[I] = 0xF1C40000000025ff | PtrOffsetField;
-
-  if (auto EC = sys::Memory::protectMappedMemory(
-          StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
-    return errorCodeToError(EC);
-
-  // Initialize all pointers to point at FailureAddress.
-  void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
-  for (unsigned I = 0; I < NumStubs; ++I)
-    Ptr[I] = InitialPtrVal;
-
-  StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
-
-  return Error::success();
 }
 
-void OrcX86_64_SysV::writeResolverCode(uint8_t *ResolverMem,
-                                       JITReentryFn ReentryFn,
-                                       void *CallbackMgr) {
+void OrcX86_64_SysV::writeResolverCode(char *ResolverWorkingMem,
+                                       JITTargetAddress ResolverTargetAddress,
+                                       JITTargetAddress ReentryFnAddr,
+                                       JITTargetAddress ReentryCtxAddr) {
+
+  LLVM_DEBUG({
+    dbgs() << "Writing resolver code to "
+           << formatv("{0:x16}", ResolverTargetAddress) << "\n";
+  });
 
   const uint8_t ResolverCode[] = {
       // resolver_entry:
@@ -295,7 +281,7 @@ void OrcX86_64_SysV::writeResolverCode(uint8_t *ResolverMem,
       0x48, 0x0f, 0xae, 0x04, 0x24,             // 0x21: fxsave64  (%rsp)
       0x48, 0xbf,                               // 0x26: movabsq   <CBMgr>, %rdi
 
-      // 0x28: Callback manager addr.
+      // 0x28: JIT re-entry ctx addr.
       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 
       0x48, 0x8b, 0x75, 0x08,                   // 0x30: movq      8(%rbp), %rsi
@@ -325,23 +311,26 @@ void OrcX86_64_SysV::writeResolverCode(uint8_t *ResolverMem,
       0x58,                                     // 0x69: popq      %rax
       0x5d,                                     // 0x6a: popq      %rbp
       0xc3,                                     // 0x6b: retq
-  };
+ };
 
   const unsigned ReentryFnAddrOffset = 0x3a;
-  const unsigned CallbackMgrAddrOffset = 0x28;
+  const unsigned ReentryCtxAddrOffset = 0x28;
 
-  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
-  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
-  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
-         sizeof(CallbackMgr));
+  memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
+         sizeof(uint64_t));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
+         sizeof(uint64_t));
 }
 
-void OrcX86_64_Win32::writeResolverCode(uint8_t *ResolverMem,
-                                        JITReentryFn ReentryFn,
-                                        void *CallbackMgr) {
+void OrcX86_64_Win32::writeResolverCode(char *ResolverWorkingMem,
+                                        JITTargetAddress ResolverTargetAddress,
+                                        JITTargetAddress ReentryFnAddr,
+                                        JITTargetAddress ReentryCtxAddr) {
 
-  // resolverCode is similar to OrcX86_64 with differences specific to windows x64 calling convention:
-  // arguments go into rcx, rdx and come in reverse order, shadow space allocation on stack
+  // resolverCode is similar to OrcX86_64 with differences specific to windows
+  // x64 calling convention: arguments go into rcx, rdx and come in reverse
+  // order, shadow space allocation on stack
   const uint8_t ResolverCode[] = {
       // resolver_entry:
       0x55,                                      // 0x00: pushq     %rbp
@@ -364,7 +353,7 @@ void OrcX86_64_Win32::writeResolverCode(uint8_t *ResolverMem,
       0x48, 0x0f, 0xae, 0x04, 0x24,              // 0x21: fxsave64  (%rsp)
 
       0x48, 0xb9,                                // 0x26: movabsq   <CBMgr>, %rcx
-      // 0x28: Callback manager addr.
+      // 0x28: JIT re-entry ctx addr.
       0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 
       0x48, 0x8B, 0x55, 0x08,                    // 0x30: mov       rdx, [rbp+0x8]
@@ -402,18 +391,23 @@ void OrcX86_64_Win32::writeResolverCode(uint8_t *ResolverMem,
       0xc3,                                      // 0x73: retq
   };
 
-
   const unsigned ReentryFnAddrOffset = 0x3a;
-  const unsigned CallbackMgrAddrOffset = 0x28;
+  const unsigned ReentryCtxAddrOffset = 0x28;
 
-  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
-  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
-  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
-         sizeof(CallbackMgr));
+  memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
+         sizeof(uint64_t));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
+         sizeof(uint64_t));
 }
 
-void OrcI386::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
-                                void *CallbackMgr) {
+void OrcI386::writeResolverCode(char *ResolverWorkingMem,
+                                JITTargetAddress ResolverTargetAddress,
+                                JITTargetAddress ReentryFnAddr,
+                                JITTargetAddress ReentryCtxAddr) {
+
+  assert((ReentryFnAddr >> 32) == 0 && "ReentryFnAddr out of range");
+  assert((ReentryCtxAddr >> 32) == 0 && "ReentryCtxAddr out of range");
 
   const uint8_t ResolverCode[] = {
       // resolver_entry:
@@ -451,29 +445,39 @@ void OrcI386::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
   };
 
   const unsigned ReentryFnAddrOffset = 0x2a;
-  const unsigned CallbackMgrAddrOffset = 0x25;
+  const unsigned ReentryCtxAddrOffset = 0x25;
 
-  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
-  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
-  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
-         sizeof(CallbackMgr));
+  memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
+         sizeof(uint32_t));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
+         sizeof(uint32_t));
 }
 
-void OrcI386::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+void OrcI386::writeTrampolines(char *TrampolineWorkingMem,
+                               JITTargetAddress TrampolineBlockTargetAddress,
+                               JITTargetAddress ResolverAddr,
                                unsigned NumTrampolines) {
+  assert((ResolverAddr >> 32) == 0 && "ResolverAddr out of range");
 
   uint64_t CallRelImm = 0xF1C4C400000000e8;
-  uint64_t Resolver = reinterpret_cast<uint64_t>(ResolverAddr);
   uint64_t ResolverRel =
-      Resolver - reinterpret_cast<uint64_t>(TrampolineMem) - 5;
+      ResolverAddr - reinterpret_cast<uint64_t>(TrampolineBlockTargetAddress) -
+      5;
 
-  uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineMem);
+  uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineWorkingMem);
   for (unsigned I = 0; I < NumTrampolines; ++I, ResolverRel -= TrampolineSize)
     Trampolines[I] = CallRelImm | (ResolverRel << 8);
 }
 
-Error OrcI386::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
-                                      unsigned MinStubs, void *InitialPtrVal) {
+void OrcI386::writeIndirectStubsBlock(
+    char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+    JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+  assert((StubsBlockTargetAddress >> 32) == 0 &&
+         "StubsBlockTargetAddress is out of range");
+  assert((PointersBlockTargetAddress >> 32) == 0 &&
+         "PointersBlockTargetAddress is out of range");
+
   // Stub format is:
   //
   // .section __orc_stubs
@@ -494,51 +498,21 @@ Error OrcI386::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   //
   // ...
 
-  const unsigned StubSize = IndirectStubsInfo::StubSize;
+  assert(stubAndPointerRangesOk<OrcI386>(
+             StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
+         "PointersBlock is out of range");
 
-  // Emit at least MinStubs, rounded up to fill the pages allocated.
-  static const unsigned PageSize = sys::Process::getPageSizeEstimate();
-  unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
-  unsigned NumStubs = (NumPages * PageSize) / StubSize;
-
-  // Allocate memory for stubs and pointers in one call.
-  std::error_code EC;
-  auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
-      2 * NumPages * PageSize, nullptr,
-      sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
-
-  if (EC)
-    return errorCodeToError(EC);
-
-  // Create separate MemoryBlocks representing the stubs and pointers.
-  sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
-  sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
-                                 NumPages * PageSize,
-                             NumPages * PageSize);
-
-  // Populate the stubs page stubs and mark it executable.
-  uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlock.base());
-  uint64_t PtrAddr = reinterpret_cast<uint64_t>(PtrsBlock.base());
+  uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
+  uint64_t PtrAddr = PointersBlockTargetAddress;
   for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 4)
     Stub[I] = 0xF1C40000000025ff | (PtrAddr << 16);
-
-  if (auto EC = sys::Memory::protectMappedMemory(
-          StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
-    return errorCodeToError(EC);
-
-  // Initialize all pointers to point at FailureAddress.
-  void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
-  for (unsigned I = 0; I < NumStubs; ++I)
-    Ptr[I] = InitialPtrVal;
-
-  StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
-
-  return Error::success();
 }
 
-void OrcMips32_Base::writeResolverCode(uint8_t *ResolverMem,
-                                       JITReentryFn ReentryFn,
-                                       void *CallbackMgr, bool isBigEndian) {
+void OrcMips32_Base::writeResolverCode(char *ResolverWorkingMem,
+                                       JITTargetAddress ResolverTargetAddress,
+                                       JITTargetAddress ReentryFnAddr,
+                                       JITTargetAddress ReentryCtxAddr,
+                                       bool isBigEndian) {
 
   const uint32_t ResolverCode[] = {
       // resolver_entry:
@@ -570,9 +544,9 @@ void OrcMips32_Base::writeResolverCode(uint8_t *ResolverMem,
       0xafbe0060,                    // 0x64: sw $fp,96($sp)
       0xafbf0064,                    // 0x68: sw $ra,100($sp)
 
-      // Callback manager addr.
-      0x00000000,                    // 0x6c: lui $a0,callbackmgr
-      0x00000000,                    // 0x70: addiu $a0,$a0,callbackmgr
+      // JIT re-entry ctx addr.
+      0x00000000,                    // 0x6c: lui $a0,ctx
+      0x00000000,                    // 0x70: addiu $a0,$a0,ctx
 
       0x03e02825,                    // 0x74: move $a1, $ra
       0x24a5ffec,                    // 0x78: addiu $a1,$a1,-20
@@ -614,50 +588,63 @@ void OrcMips32_Base::writeResolverCode(uint8_t *ResolverMem,
   };
 
   const unsigned ReentryFnAddrOffset = 0x7c;   // JIT re-entry fn addr lui
-  const unsigned CallbackMgrAddrOffset = 0x6c; // Callback manager addr lui
+  const unsigned ReentryCtxAddrOffset = 0x6c;  // JIT re-entry context addr lui
   const unsigned Offsett = 0xf8;
 
-  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
+  memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
 
   // Depending on endian return value will be in v0 or v1.
   uint32_t MoveVxT9 = isBigEndian ? 0x0060c825 : 0x0040c825;
-  memcpy(ResolverMem + Offsett, &MoveVxT9, sizeof(MoveVxT9));
-
-  uint64_t CallMgrAddr = reinterpret_cast<uint64_t>(CallbackMgr);
-  uint32_t CallMgrLUi = 0x3c040000 | (((CallMgrAddr + 0x8000) >> 16) & 0xFFFF);
-  uint32_t CallMgrADDiu = 0x24840000 | ((CallMgrAddr) & 0xFFFF);
-  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallMgrLUi, sizeof(CallMgrLUi));
-  memcpy(ResolverMem + CallbackMgrAddrOffset + 4, &CallMgrADDiu,
-         sizeof(CallMgrADDiu));
-
-  uint64_t ReentryAddr = reinterpret_cast<uint64_t>(ReentryFn);
-  uint32_t ReentryLUi = 0x3c190000 | (((ReentryAddr + 0x8000) >> 16) & 0xFFFF);
-  uint32_t ReentryADDiu = 0x27390000 | ((ReentryAddr) & 0xFFFF);
-  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryLUi, sizeof(ReentryLUi));
-  memcpy(ResolverMem + ReentryFnAddrOffset + 4, &ReentryADDiu,
-         sizeof(ReentryADDiu));
+  memcpy(ResolverWorkingMem + Offsett, &MoveVxT9, sizeof(MoveVxT9));
+
+  uint32_t ReentryCtxLUi =
+      0x3c040000 | (((ReentryCtxAddr + 0x8000) >> 16) & 0xFFFF);
+  uint32_t ReentryCtxADDiu = 0x24840000 | ((ReentryCtxAddr)&0xFFFF);
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi,
+         sizeof(ReentryCtxLUi));
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 4, &ReentryCtxADDiu,
+         sizeof(ReentryCtxADDiu));
+
+  uint32_t ReentryFnLUi =
+      0x3c190000 | (((ReentryFnAddr + 0x8000) >> 16) & 0xFFFF);
+  uint32_t ReentryFnADDiu = 0x27390000 | ((ReentryFnAddr)&0xFFFF);
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi,
+         sizeof(ReentryFnLUi));
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 4, &ReentryFnADDiu,
+         sizeof(ReentryFnADDiu));
 }
 
-void OrcMips32_Base::writeTrampolines(uint8_t *TrampolineMem,
-                                      void *ResolverAddr,
-                                      unsigned NumTrampolines) {
+void OrcMips32_Base::writeTrampolines(
+    char *TrampolineBlockWorkingMem,
+    JITTargetAddress TrampolineBlockTargetAddress,
+    JITTargetAddress ResolverAddr, unsigned NumTrampolines) {
 
-  uint32_t *Trampolines = reinterpret_cast<uint32_t *>(TrampolineMem);
-  uint64_t ResolveAddr = reinterpret_cast<uint64_t>(ResolverAddr);
-  uint32_t RHiAddr = ((ResolveAddr + 0x8000) >> 16);
+  assert((ResolverAddr >> 32) == 0 && "ResolverAddr out of range");
+
+  uint32_t *Trampolines =
+      reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
+  uint32_t RHiAddr = ((ResolverAddr + 0x8000) >> 16);
 
   for (unsigned I = 0; I < NumTrampolines; ++I) {
-    Trampolines[5 * I + 0] = 0x03e0c025;                           // move $t8,$ra
-    Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF);      // lui $t9,resolveAddr
-    Trampolines[5 * I + 2] = 0x27390000 | (ResolveAddr & 0xFFFF);  // addiu $t9,$t9,resolveAddr
-    Trampolines[5 * I + 3] = 0x0320f809;                           // jalr $t9
-    Trampolines[5 * I + 4] = 0x00000000;                           // nop
+    // move $t8,$ra
+    // lui $t9,ResolverAddr
+    // addiu $t9,$t9,ResolverAddr
+    // jalr $t9
+    // nop
+    Trampolines[5 * I + 0] = 0x03e0c025;
+    Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF);
+    Trampolines[5 * I + 2] = 0x27390000 | (ResolverAddr & 0xFFFF);
+    Trampolines[5 * I + 3] = 0x0320f809;
+    Trampolines[5 * I + 4] = 0x00000000;
   }
 }
 
-Error OrcMips32_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
-                                             unsigned MinStubs,
-                                             void *InitialPtrVal) {
+void OrcMips32_Base::writeIndirectStubsBlock(
+    char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+    JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
+  assert((StubsBlockTargetAddress >> 32) == 0 &&
+         "InitialPtrVal is out of range");
+
   // Stub format is:
   //
   // .section __orc_stubs
@@ -678,33 +665,15 @@ Error OrcMips32_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   // ptr2:
   //                 .word 0x0
   //
-  // ...
-
-  const unsigned StubSize = IndirectStubsInfo::StubSize;
-
-  // Emit at least MinStubs, rounded up to fill the pages allocated.
-  static const unsigned PageSize = sys::Process::getPageSizeEstimate();
-  unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
-  unsigned NumStubs = (NumPages * PageSize) / StubSize;
+  // i..
 
-  // Allocate memory for stubs and pointers in one call.
-  std::error_code EC;
-  auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
-      2 * NumPages * PageSize, nullptr,
-      sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
-
-  if (EC)
-    return errorCodeToError(EC);
-
-  // Create separate MemoryBlocks representing the stubs and pointers.
-  sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
-  sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
-                                 NumPages * PageSize,
-                             NumPages * PageSize);
+  assert(stubAndPointerRangesOk<OrcAArch64>(
+             StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
+         "PointersBlock is out of range");
 
   // Populate the stubs page stubs and mark it executable.
-  uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlock.base());
-  uint64_t PtrAddr = reinterpret_cast<uint64_t>(Stub) + NumPages * PageSize;
+  uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
+  uint64_t PtrAddr = PointersBlockTargetAddress;
 
   for (unsigned I = 0; I < NumStubs; ++I) {
     uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16);
@@ -714,26 +683,15 @@ Error OrcMips32_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
     Stub[4 * I + 3] = 0x00000000;                      // nop
     PtrAddr += 4;
   }
-
-  if (auto EC = sys::Memory::protectMappedMemory(
-          StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
-    return errorCodeToError(EC);
-
-  // Initialize all pointers to point at FailureAddress.
-  void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
-  for (unsigned I = 0; I < NumStubs; ++I)
-    Ptr[I] = InitialPtrVal;
-
-  StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
-
-  return Error::success();
 }
 
-void OrcMips64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
-                                  void *CallbackMgr) {
+void OrcMips64::writeResolverCode(char *ResolverWorkingMem,
+                                  JITTargetAddress ResolverTargetAddress,
+                                  JITTargetAddress ReentryFnAddr,
+                                  JITTargetAddress ReentryCtxAddr) {
 
   const uint32_t ResolverCode[] = {
-      //resolver_entry:
+       //resolver_entry:
       0x67bdff30,                     // 0x00: daddiu $sp,$sp,-208
       0xffa20000,                     // 0x04: sd v0,0(sp)
       0xffa30008,                     // 0x08: sd v1,8(sp)
@@ -762,13 +720,13 @@ void OrcMips64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
       0xffbe00c0,                     // 0x64: sd fp,192(sp)
       0xffbf00c8,                     // 0x68: sd ra,200(sp)
 
-      // Callback manager addr.
-      0x00000000,                     // 0x6c: lui $a0,heighest(callbackmgr)
-      0x00000000,                     // 0x70: daddiu $a0,$a0,heigher(callbackmgr)
+      // JIT re-entry ctx addr.
+      0x00000000,                     // 0x6c: lui $a0,heighest(ctx)
+      0x00000000,                     // 0x70: daddiu $a0,$a0,heigher(ctx)
       0x00000000,                     // 0x74: dsll $a0,$a0,16
-      0x00000000,                     // 0x78: daddiu $a0,$a0,hi(callbackmgr)
+      0x00000000,                     // 0x78: daddiu $a0,$a0,hi(ctx)
       0x00000000,                     // 0x7c: dsll $a0,$a0,16
-      0x00000000,                     // 0x80: daddiu $a0,$a0,lo(callbackmgr)
+      0x00000000,                     // 0x80: daddiu $a0,$a0,lo(ctx)
 
       0x03e02825,                     // 0x84: move $a1, $ra
       0x64a5ffdc,                     // 0x88: daddiu $a1,$a1,-36
@@ -814,73 +772,73 @@ void OrcMips64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
   };
 
   const unsigned ReentryFnAddrOffset = 0x8c;   // JIT re-entry fn addr lui
-  const unsigned CallbackMgrAddrOffset = 0x6c; // Callback manager addr lui
-
-  memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
-
-  uint64_t CallMgrAddr = reinterpret_cast<uint64_t>(CallbackMgr);
-
-  uint32_t CallMgrLUi =
-      0x3c040000 | (((CallMgrAddr + 0x800080008000) >> 48) & 0xFFFF);
-  uint32_t CallMgrDADDiu =
-      0x64840000 | (((CallMgrAddr + 0x80008000) >> 32) & 0xFFFF);
-  uint32_t CallMgrDSLL = 0x00042438;
-  uint32_t CallMgrDADDiu2 =
-      0x64840000 | ((((CallMgrAddr + 0x8000) >> 16) & 0xFFFF));
-  uint32_t CallMgrDSLL2 = 0x00042438;
-  uint32_t CallMgrDADDiu3 = 0x64840000 | ((CallMgrAddr)&0xFFFF);
-
-  memcpy(ResolverMem + CallbackMgrAddrOffset, &CallMgrLUi, sizeof(CallMgrLUi));
-  memcpy(ResolverMem + (CallbackMgrAddrOffset + 4), &CallMgrDADDiu,
-         sizeof(CallMgrDADDiu));
-  memcpy(ResolverMem + (CallbackMgrAddrOffset + 8), &CallMgrDSLL,
-         sizeof(CallMgrDSLL));
-  memcpy(ResolverMem + (CallbackMgrAddrOffset + 12), &CallMgrDADDiu2,
-         sizeof(CallMgrDADDiu2));
-  memcpy(ResolverMem + (CallbackMgrAddrOffset + 16), &CallMgrDSLL2,
-         sizeof(CallMgrDSLL2));
-  memcpy(ResolverMem + (CallbackMgrAddrOffset + 20), &CallMgrDADDiu3,
-         sizeof(CallMgrDADDiu3));
-
-  uint64_t ReentryAddr = reinterpret_cast<uint64_t>(ReentryFn);
-
-  uint32_t ReentryLUi =
-      0x3c190000 | (((ReentryAddr + 0x800080008000) >> 48) & 0xFFFF);
-
-  uint32_t ReentryDADDiu =
-      0x67390000 | (((ReentryAddr + 0x80008000) >> 32) & 0xFFFF);
-
-  uint32_t ReentryDSLL = 0x0019cc38;
-
-  uint32_t ReentryDADDiu2 =
-      0x67390000 | (((ReentryAddr + 0x8000) >> 16) & 0xFFFF);
-
-  uint32_t ReentryDSLL2 = 0x0019cc38;
-
-  uint32_t ReentryDADDiu3 = 0x67390000 | ((ReentryAddr)&0xFFFF);
-
-  memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryLUi, sizeof(ReentryLUi));
-  memcpy(ResolverMem + (ReentryFnAddrOffset + 4), &ReentryDADDiu,
-         sizeof(ReentryDADDiu));
-  memcpy(ResolverMem + (ReentryFnAddrOffset + 8), &ReentryDSLL,
-         sizeof(ReentryDSLL));
-  memcpy(ResolverMem + (ReentryFnAddrOffset + 12), &ReentryDADDiu2,
-         sizeof(ReentryDADDiu2));
-  memcpy(ResolverMem + (ReentryFnAddrOffset + 16), &ReentryDSLL2,
-         sizeof(ReentryDSLL2));
-  memcpy(ResolverMem + (ReentryFnAddrOffset + 20), &ReentryDADDiu3,
-         sizeof(ReentryDADDiu3));
+  const unsigned ReentryCtxAddrOffset = 0x6c;  // JIT re-entry ctx addr lui
+
+  memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
+
+  uint32_t ReentryCtxLUi =
+      0x3c040000 | (((ReentryCtxAddr + 0x800080008000) >> 48) & 0xFFFF);
+  uint32_t ReentryCtxDADDiu =
+      0x64840000 | (((ReentryCtxAddr + 0x80008000) >> 32) & 0xFFFF);
+  uint32_t ReentryCtxDSLL = 0x00042438;
+  uint32_t ReentryCtxDADDiu2 =
+      0x64840000 | ((((ReentryCtxAddr + 0x8000) >> 16) & 0xFFFF));
+  uint32_t ReentryCtxDSLL2 = 0x00042438;
+  uint32_t ReentryCtxDADDiu3 = 0x64840000 | ((ReentryCtxAddr)&0xFFFF);
+
+  memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi,
+         sizeof(ReentryCtxLUi));
+  memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 4), &ReentryCtxDADDiu,
+         sizeof(ReentryCtxDADDiu));
+  memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 8), &ReentryCtxDSLL,
+         sizeof(ReentryCtxDSLL));
+  memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 12), &ReentryCtxDADDiu2,
+         sizeof(ReentryCtxDADDiu2));
+  memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 16), &ReentryCtxDSLL2,
+         sizeof(ReentryCtxDSLL2));
+  memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 20), &ReentryCtxDADDiu3,
+         sizeof(ReentryCtxDADDiu3));
+
+  uint32_t ReentryFnLUi =
+      0x3c190000 | (((ReentryFnAddr + 0x800080008000) >> 48) & 0xFFFF);
+
+  uint32_t ReentryFnDADDiu =
+      0x67390000 | (((ReentryFnAddr + 0x80008000) >> 32) & 0xFFFF);
+
+  uint32_t ReentryFnDSLL = 0x0019cc38;
+
+  uint32_t ReentryFnDADDiu2 =
+      0x67390000 | (((ReentryFnAddr + 0x8000) >> 16) & 0xFFFF);
+
+  uint32_t ReentryFnDSLL2 = 0x0019cc38;
+
+  uint32_t ReentryFnDADDiu3 = 0x67390000 | ((ReentryFnAddr)&0xFFFF);
+
+  memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi,
+         sizeof(ReentryFnLUi));
+  memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 4), &ReentryFnDADDiu,
+         sizeof(ReentryFnDADDiu));
+  memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 8), &ReentryFnDSLL,
+         sizeof(ReentryFnDSLL));
+  memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 12), &ReentryFnDADDiu2,
+         sizeof(ReentryFnDADDiu2));
+  memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 16), &ReentryFnDSLL2,
+         sizeof(ReentryFnDSLL2));
+  memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 20), &ReentryFnDADDiu3,
+         sizeof(ReentryFnDADDiu3));
 }
 
-void OrcMips64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
+void OrcMips64::writeTrampolines(char *TrampolineBlockWorkingMem,
+                                 JITTargetAddress TrampolineBlockTargetAddress,
+                                 JITTargetAddress ResolverAddr,
                                  unsigned NumTrampolines) {
 
-  uint32_t *Trampolines = reinterpret_cast<uint32_t *>(TrampolineMem);
-  uint64_t ResolveAddr = reinterpret_cast<uint64_t>(ResolverAddr);
+  uint32_t *Trampolines =
+      reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
 
-  uint64_t HeighestAddr = ((ResolveAddr + 0x800080008000) >> 48);
-  uint64_t HeigherAddr = ((ResolveAddr + 0x80008000) >> 32);
-  uint64_t HiAddr = ((ResolveAddr + 0x8000) >> 16);
+  uint64_t HeighestAddr = ((ResolverAddr + 0x800080008000) >> 48);
+  uint64_t HeigherAddr = ((ResolverAddr + 0x80008000) >> 32);
+  uint64_t HiAddr = ((ResolverAddr + 0x8000) >> 16);
 
   for (unsigned I = 0; I < NumTrampolines; ++I) {
     Trampolines[10 * I + 0] = 0x03e0c025;                            // move $t8,$ra
@@ -889,16 +847,17 @@ void OrcMips64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
     Trampolines[10 * I + 3] = 0x0019cc38;                            // dsll $t9,$t9,16
     Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF);        // daddiu $t9,$t9,%hi(ptr)
     Trampolines[10 * I + 5] = 0x0019cc38;                            // dsll $t9,$t9,16
-    Trampolines[10 * I + 6] = 0x67390000 | (ResolveAddr & 0xFFFF);   // daddiu $t9,$t9,%lo(ptr)
+    Trampolines[10 * I + 6] =
+        0x67390000 | (ResolverAddr & 0xFFFF); // daddiu $t9,$t9,%lo(ptr)
     Trampolines[10 * I + 7] = 0x0320f809;                            // jalr $t9
     Trampolines[10 * I + 8] = 0x00000000;                            // nop
     Trampolines[10 * I + 9] = 0x00000000;                            // nop
   }
 }
 
-Error OrcMips64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
-                                        unsigned MinStubs,
-                                        void *InitialPtrVal) {
+void OrcMips64::writeIndirectStubsBlock(
+    char *StubsBlockWorkingMem, JITTargetAddress StubsBlockTargetAddress,
+    JITTargetAddress PointersBlockTargetAddress, unsigned NumStubs) {
   // Stub format is:
   //
   // .section __orc_stubs
@@ -926,31 +885,14 @@ Error OrcMips64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
   //                 .dword 0x0
   //
   // ...
-  const unsigned StubSize = IndirectStubsInfo::StubSize;
-
-  // Emit at least MinStubs, rounded up to fill the pages allocated.
-  static const unsigned PageSize = sys::Process::getPageSizeEstimate();
-  unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
-  unsigned NumStubs = (NumPages * PageSize) / StubSize;
 
-  // Allocate memory for stubs and pointers in one call.
-  std::error_code EC;
-  auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
-      2 * NumPages * PageSize, nullptr,
-      sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
-
-  if (EC)
-    return errorCodeToError(EC);
-
-  // Create separate MemoryBlocks representing the stubs and pointers.
-  sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
-  sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
-                                 NumPages * PageSize,
-                             NumPages * PageSize);
+  assert(stubAndPointerRangesOk<OrcAArch64>(
+             StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
+         "PointersBlock is out of range");
 
   // Populate the stubs page stubs and mark it executable.
-  uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlock.base());
-  uint64_t PtrAddr = reinterpret_cast<uint64_t>(PtrsBlock.base());
+  uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
+  uint64_t PtrAddr = PointersBlockTargetAddress;
 
   for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) {
     uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48);
@@ -965,19 +907,6 @@ Error OrcMips64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
     Stub[8 * I + 6] = 0x03200008;                            // jr $t9
     Stub[8 * I + 7] = 0x00000000;                            // nop
   }
-
-  if (auto EC = sys::Memory::protectMappedMemory(
-          StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
-    return errorCodeToError(EC);
-
-  // Initialize all pointers to point at FailureAddress.
-  void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
-  for (unsigned I = 0; I < NumStubs; ++I)
-    Ptr[I] = InitialPtrVal;
-
-  StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
-
-  return Error::success();
 }
 } // End namespace orc.
 } // End namespace llvm.

From 44899ed659ea55121059666841087f23c8880154 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 13 Jul 2020 17:19:05 +0000
Subject: [PATCH 121/771] [gn build] Port 83080a294ad

---
 llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn       | 1 +
 llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 +
 2 files changed, 2 insertions(+)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
index 11498ed602984..c13dc723ecd1b 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
@@ -55,6 +55,7 @@ static_library("Analysis") {
     "InlineAdvisor.cpp",
     "InlineCost.cpp",
     "InlineFeaturesAnalysis.cpp",
+    "InlineSizeEstimatorAnalysis.cpp",
     "InstCount.cpp",
     "InstructionPrecedenceTracking.cpp",
     "InstructionSimplify.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
index b0dcd497d844e..27733f63c2c50 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
@@ -25,6 +25,7 @@ unittest("AnalysisTests") {
     "GlobalsModRefTest.cpp",
     "IVDescriptorsTest.cpp",
     "InlineFeaturesAnalysisTest.cpp",
+    "InlineSizeEstimatorAnalysisTest.cpp",
     "LazyCallGraphTest.cpp",
     "LoadsTest.cpp",
     "LoopInfoTest.cpp",

From fb558ccae743ed451ea42a30e197eb765a3184ac Mon Sep 17 00:00:00 2001
From: Hiroshi Yamauchi <yamauchi@google.com>
Date: Tue, 7 Jul 2020 11:01:35 -0700
Subject: [PATCH 122/771] [PGO][PGSO] Add profile guided size optimization to
 X86ISelDAGToDAG.

Differential Revision: https://reviews.llvm.org/D83331
---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp |  10 +-
 llvm/test/CodeGen/X86/popcnt.ll         | 446 ++++++++++++++++++++++++
 llvm/test/CodeGen/X86/pr27202.ll        |  30 ++
 3 files changed, 479 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index fb285376c5808..e91828bd17078 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -160,10 +160,6 @@ namespace {
     /// make the right decision when generating code for different targets.
     const X86Subtarget *Subtarget;
 
-    /// If true, selector should try to optimize for code size instead of
-    /// performance.
-    bool OptForSize;
-
     /// If true, selector should try to optimize for minimum code size.
     bool OptForMinSize;
 
@@ -172,7 +168,7 @@ namespace {
 
   public:
     explicit X86DAGToDAGISel(X86TargetMachine &tm, CodeGenOpt::Level OptLevel)
-        : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr), OptForSize(false),
+        : SelectionDAGISel(tm, OptLevel), Subtarget(nullptr),
           OptForMinSize(false), IndirectTlsSegRefs(false) {}
 
     StringRef getPassName() const override {
@@ -186,7 +182,7 @@ namespace {
                              "indirect-tls-seg-refs");
 
       // OptFor[Min]Size are used in pattern predicates that isel is matching.
-      OptForSize = MF.getFunction().hasOptSize();
+      bool OptForSize = MF.getFunction().hasOptSize();
       OptForMinSize = MF.getFunction().hasMinSize();
       assert((!OptForMinSize || OptForSize) &&
              "OptForMinSize implies OptForSize");
@@ -4557,7 +4553,7 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     // the patterns on the add/sub/and/or/xor with immediate paterns in the
     // tablegen files to check immediate use count without making the patterns
     // unavailable to the fast-isel table.
-    if (!OptForSize)
+    if (!CurDAG->shouldOptForSize())
       break;
 
     // Only handle i8/i16/i32/i64.
diff --git a/llvm/test/CodeGen/X86/popcnt.ll b/llvm/test/CodeGen/X86/popcnt.ll
index c68a3a5fe3246..cc6f3153d2ca1 100644
--- a/llvm/test/CodeGen/X86/popcnt.ll
+++ b/llvm/test/CodeGen/X86/popcnt.ll
@@ -1034,8 +1034,454 @@ define i128 @cnt128_optsize(i128 %x) nounwind readnone optsize {
   ret i128 %cnt
 }
 
+define i32 @cnt32_pgso(i32 %x) nounwind readnone !prof !14 {
+; X32-LABEL: cnt32_pgso:
+; X32:       # %bb.0:
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrl %ecx
+; X32-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X32-NEXT:    subl %ecx, %eax
+; X32-NEXT:    movl $858993459, %ecx # imm = 0x33333333
+; X32-NEXT:    movl %eax, %edx
+; X32-NEXT:    andl %ecx, %edx
+; X32-NEXT:    shrl $2, %eax
+; X32-NEXT:    andl %ecx, %eax
+; X32-NEXT:    addl %edx, %eax
+; X32-NEXT:    movl %eax, %ecx
+; X32-NEXT:    shrl $4, %ecx
+; X32-NEXT:    addl %eax, %ecx
+; X32-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X32-NEXT:    imull $16843009, %ecx, %eax # imm = 0x1010101
+; X32-NEXT:    shrl $24, %eax
+; X32-NEXT:    retl
+;
+; X64-LABEL: cnt32_pgso:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X64-NEXT:    subl %eax, %edi
+; X64-NEXT:    movl $858993459, %eax # imm = 0x33333333
+; X64-NEXT:    movl %edi, %ecx
+; X64-NEXT:    andl %eax, %ecx
+; X64-NEXT:    shrl $2, %edi
+; X64-NEXT:    andl %eax, %edi
+; X64-NEXT:    addl %ecx, %edi
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    shrl $4, %eax
+; X64-NEXT:    addl %edi, %eax
+; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; X64-NEXT:    shrl $24, %eax
+; X64-NEXT:    retq
+;
+; X32-POPCNT-LABEL: cnt32_pgso:
+; X32-POPCNT:       # %bb.0:
+; X32-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
+; X32-POPCNT-NEXT:    retl
+;
+; X64-POPCNT-LABEL: cnt32_pgso:
+; X64-POPCNT:       # %bb.0:
+; X64-POPCNT-NEXT:    popcntl %edi, %eax
+; X64-POPCNT-NEXT:    retq
+  %cnt = tail call i32 @llvm.ctpop.i32(i32 %x)
+  ret i32 %cnt
+}
+
+define i64 @cnt64_pgso(i64 %x) nounwind readnone !prof !14 {
+; X32-NOSSE-LABEL: cnt64_pgso:
+; X32-NOSSE:       # %bb.0:
+; X32-NOSSE-NEXT:    pushl %ebx
+; X32-NOSSE-NEXT:    pushl %edi
+; X32-NOSSE-NEXT:    pushl %esi
+; X32-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NOSSE-NEXT:    movl %ecx, %edx
+; X32-NOSSE-NEXT:    shrl %edx
+; X32-NOSSE-NEXT:    movl $1431655765, %esi # imm = 0x55555555
+; X32-NOSSE-NEXT:    andl %esi, %edx
+; X32-NOSSE-NEXT:    subl %edx, %ecx
+; X32-NOSSE-NEXT:    movl $858993459, %edx # imm = 0x33333333
+; X32-NOSSE-NEXT:    movl %ecx, %edi
+; X32-NOSSE-NEXT:    andl %edx, %edi
+; X32-NOSSE-NEXT:    shrl $2, %ecx
+; X32-NOSSE-NEXT:    andl %edx, %ecx
+; X32-NOSSE-NEXT:    addl %edi, %ecx
+; X32-NOSSE-NEXT:    movl %ecx, %edi
+; X32-NOSSE-NEXT:    shrl $4, %edi
+; X32-NOSSE-NEXT:    addl %ecx, %edi
+; X32-NOSSE-NEXT:    movl $252645135, %ecx # imm = 0xF0F0F0F
+; X32-NOSSE-NEXT:    andl %ecx, %edi
+; X32-NOSSE-NEXT:    imull $16843009, %edi, %edi # imm = 0x1010101
+; X32-NOSSE-NEXT:    shrl $24, %edi
+; X32-NOSSE-NEXT:    movl %eax, %ebx
+; X32-NOSSE-NEXT:    shrl %ebx
+; X32-NOSSE-NEXT:    andl %esi, %ebx
+; X32-NOSSE-NEXT:    subl %ebx, %eax
+; X32-NOSSE-NEXT:    movl %eax, %esi
+; X32-NOSSE-NEXT:    andl %edx, %esi
+; X32-NOSSE-NEXT:    shrl $2, %eax
+; X32-NOSSE-NEXT:    andl %edx, %eax
+; X32-NOSSE-NEXT:    addl %esi, %eax
+; X32-NOSSE-NEXT:    movl %eax, %edx
+; X32-NOSSE-NEXT:    shrl $4, %edx
+; X32-NOSSE-NEXT:    addl %eax, %edx
+; X32-NOSSE-NEXT:    andl %ecx, %edx
+; X32-NOSSE-NEXT:    imull $16843009, %edx, %eax # imm = 0x1010101
+; X32-NOSSE-NEXT:    shrl $24, %eax
+; X32-NOSSE-NEXT:    addl %edi, %eax
+; X32-NOSSE-NEXT:    xorl %edx, %edx
+; X32-NOSSE-NEXT:    popl %esi
+; X32-NOSSE-NEXT:    popl %edi
+; X32-NOSSE-NEXT:    popl %ebx
+; X32-NOSSE-NEXT:    retl
+;
+; X64-LABEL: cnt64_pgso:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NEXT:    andq %rax, %rcx
+; X64-NEXT:    subq %rcx, %rdi
+; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NEXT:    movq %rdi, %rcx
+; X64-NEXT:    andq %rax, %rcx
+; X64-NEXT:    shrq $2, %rdi
+; X64-NEXT:    andq %rax, %rdi
+; X64-NEXT:    addq %rcx, %rdi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $4, %rax
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NEXT:    andq %rax, %rcx
+; X64-NEXT:    movabsq $72340172838076673, %rax # imm = 0x101010101010101
+; X64-NEXT:    imulq %rcx, %rax
+; X64-NEXT:    shrq $56, %rax
+; X64-NEXT:    retq
+;
+; X32-POPCNT-LABEL: cnt64_pgso:
+; X32-POPCNT:       # %bb.0:
+; X32-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
+; X32-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %eax
+; X32-POPCNT-NEXT:    addl %ecx, %eax
+; X32-POPCNT-NEXT:    xorl %edx, %edx
+; X32-POPCNT-NEXT:    retl
+;
+; X64-POPCNT-LABEL: cnt64_pgso:
+; X64-POPCNT:       # %bb.0:
+; X64-POPCNT-NEXT:    popcntq %rdi, %rax
+; X64-POPCNT-NEXT:    retq
+;
+; X32-SSE2-LABEL: cnt64_pgso:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrlw $1, %xmm1
+; X32-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
+; X32-SSE2-NEXT:    psubb %xmm1, %xmm0
+; X32-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X32-SSE2-NEXT:    pand %xmm1, %xmm2
+; X32-SSE2-NEXT:    psrlw $2, %xmm0
+; X32-SSE2-NEXT:    pand %xmm1, %xmm0
+; X32-SSE2-NEXT:    paddb %xmm2, %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrlw $4, %xmm1
+; X32-SSE2-NEXT:    paddb %xmm0, %xmm1
+; X32-SSE2-NEXT:    pand {{\.LCPI.*}}, %xmm1
+; X32-SSE2-NEXT:    pxor %xmm0, %xmm0
+; X32-SSE2-NEXT:    psadbw %xmm1, %xmm0
+; X32-SSE2-NEXT:    movd %xmm0, %eax
+; X32-SSE2-NEXT:    xorl %edx, %edx
+; X32-SSE2-NEXT:    retl
+;
+; X32-SSSE3-LABEL: cnt64_pgso:
+; X32-SSSE3:       # %bb.0:
+; X32-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X32-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X32-SSSE3-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSSE3-NEXT:    pand %xmm0, %xmm2
+; X32-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; X32-SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSSE3-NEXT:    pshufb %xmm2, %xmm4
+; X32-SSSE3-NEXT:    psrlw $4, %xmm1
+; X32-SSSE3-NEXT:    pand %xmm0, %xmm1
+; X32-SSSE3-NEXT:    pshufb %xmm1, %xmm3
+; X32-SSSE3-NEXT:    paddb %xmm4, %xmm3
+; X32-SSSE3-NEXT:    pxor %xmm0, %xmm0
+; X32-SSSE3-NEXT:    psadbw %xmm3, %xmm0
+; X32-SSSE3-NEXT:    movd %xmm0, %eax
+; X32-SSSE3-NEXT:    xorl %edx, %edx
+; X32-SSSE3-NEXT:    retl
+  %cnt = tail call i64 @llvm.ctpop.i64(i64 %x)
+  ret i64 %cnt
+}
+
+define i128 @cnt128_pgso(i128 %x) nounwind readnone !prof !14 {
+; X32-NOSSE-LABEL: cnt128_pgso:
+; X32-NOSSE:       # %bb.0:
+; X32-NOSSE-NEXT:    pushl %ebp
+; X32-NOSSE-NEXT:    pushl %ebx
+; X32-NOSSE-NEXT:    pushl %edi
+; X32-NOSSE-NEXT:    pushl %esi
+; X32-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X32-NOSSE-NEXT:    movl %ebx, %ecx
+; X32-NOSSE-NEXT:    shrl %ecx
+; X32-NOSSE-NEXT:    movl $1431655765, %edi # imm = 0x55555555
+; X32-NOSSE-NEXT:    andl %edi, %ecx
+; X32-NOSSE-NEXT:    movl $1431655765, %edi # imm = 0x55555555
+; X32-NOSSE-NEXT:    subl %ecx, %ebx
+; X32-NOSSE-NEXT:    movl $858993459, %ecx # imm = 0x33333333
+; X32-NOSSE-NEXT:    movl %ebx, %ebp
+; X32-NOSSE-NEXT:    andl %ecx, %ebp
+; X32-NOSSE-NEXT:    shrl $2, %ebx
+; X32-NOSSE-NEXT:    andl %ecx, %ebx
+; X32-NOSSE-NEXT:    addl %ebp, %ebx
+; X32-NOSSE-NEXT:    movl %ebx, %ebp
+; X32-NOSSE-NEXT:    shrl $4, %ebp
+; X32-NOSSE-NEXT:    addl %ebx, %ebp
+; X32-NOSSE-NEXT:    movl %eax, %ebx
+; X32-NOSSE-NEXT:    shrl %ebx
+; X32-NOSSE-NEXT:    andl %edi, %ebx
+; X32-NOSSE-NEXT:    subl %ebx, %eax
+; X32-NOSSE-NEXT:    movl %eax, %ebx
+; X32-NOSSE-NEXT:    andl %ecx, %ebx
+; X32-NOSSE-NEXT:    shrl $2, %eax
+; X32-NOSSE-NEXT:    andl %ecx, %eax
+; X32-NOSSE-NEXT:    addl %ebx, %eax
+; X32-NOSSE-NEXT:    movl %eax, %edi
+; X32-NOSSE-NEXT:    shrl $4, %edi
+; X32-NOSSE-NEXT:    addl %eax, %edi
+; X32-NOSSE-NEXT:    movl $252645135, %ebx # imm = 0xF0F0F0F
+; X32-NOSSE-NEXT:    andl %ebx, %ebp
+; X32-NOSSE-NEXT:    imull $16843009, %ebp, %eax # imm = 0x1010101
+; X32-NOSSE-NEXT:    shrl $24, %eax
+; X32-NOSSE-NEXT:    andl %ebx, %edi
+; X32-NOSSE-NEXT:    imull $16843009, %edi, %edi # imm = 0x1010101
+; X32-NOSSE-NEXT:    shrl $24, %edi
+; X32-NOSSE-NEXT:    addl %eax, %edi
+; X32-NOSSE-NEXT:    movl %esi, %eax
+; X32-NOSSE-NEXT:    shrl %eax
+; X32-NOSSE-NEXT:    movl $1431655765, %ebp # imm = 0x55555555
+; X32-NOSSE-NEXT:    andl %ebp, %eax
+; X32-NOSSE-NEXT:    subl %eax, %esi
+; X32-NOSSE-NEXT:    movl %esi, %eax
+; X32-NOSSE-NEXT:    andl %ecx, %eax
+; X32-NOSSE-NEXT:    shrl $2, %esi
+; X32-NOSSE-NEXT:    andl %ecx, %esi
+; X32-NOSSE-NEXT:    addl %eax, %esi
+; X32-NOSSE-NEXT:    movl %esi, %eax
+; X32-NOSSE-NEXT:    shrl $4, %eax
+; X32-NOSSE-NEXT:    addl %esi, %eax
+; X32-NOSSE-NEXT:    movl %edx, %esi
+; X32-NOSSE-NEXT:    shrl %esi
+; X32-NOSSE-NEXT:    andl %ebp, %esi
+; X32-NOSSE-NEXT:    subl %esi, %edx
+; X32-NOSSE-NEXT:    movl %edx, %esi
+; X32-NOSSE-NEXT:    andl %ecx, %esi
+; X32-NOSSE-NEXT:    shrl $2, %edx
+; X32-NOSSE-NEXT:    andl %ecx, %edx
+; X32-NOSSE-NEXT:    addl %esi, %edx
+; X32-NOSSE-NEXT:    movl %edx, %ecx
+; X32-NOSSE-NEXT:    shrl $4, %ecx
+; X32-NOSSE-NEXT:    addl %edx, %ecx
+; X32-NOSSE-NEXT:    andl %ebx, %eax
+; X32-NOSSE-NEXT:    andl %ebx, %ecx
+; X32-NOSSE-NEXT:    imull $16843009, %eax, %eax # imm = 0x1010101
+; X32-NOSSE-NEXT:    shrl $24, %eax
+; X32-NOSSE-NEXT:    imull $16843009, %ecx, %ecx # imm = 0x1010101
+; X32-NOSSE-NEXT:    shrl $24, %ecx
+; X32-NOSSE-NEXT:    addl %eax, %ecx
+; X32-NOSSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NOSSE-NEXT:    addl %edi, %ecx
+; X32-NOSSE-NEXT:    xorl %edx, %edx
+; X32-NOSSE-NEXT:    movl %edx, 12(%eax)
+; X32-NOSSE-NEXT:    movl %edx, 8(%eax)
+; X32-NOSSE-NEXT:    movl %edx, 4(%eax)
+; X32-NOSSE-NEXT:    movl %ecx, (%eax)
+; X32-NOSSE-NEXT:    popl %esi
+; X32-NOSSE-NEXT:    popl %edi
+; X32-NOSSE-NEXT:    popl %ebx
+; X32-NOSSE-NEXT:    popl %ebp
+; X32-NOSSE-NEXT:    retl $4
+;
+; X64-LABEL: cnt128_pgso:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rsi, %rax
+; X64-NEXT:    shrq %rax
+; X64-NEXT:    movabsq $6148914691236517205, %r8 # imm = 0x5555555555555555
+; X64-NEXT:    andq %r8, %rax
+; X64-NEXT:    subq %rax, %rsi
+; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    andq %rax, %rcx
+; X64-NEXT:    shrq $2, %rsi
+; X64-NEXT:    andq %rax, %rsi
+; X64-NEXT:    addq %rcx, %rsi
+; X64-NEXT:    movq %rsi, %rcx
+; X64-NEXT:    shrq $4, %rcx
+; X64-NEXT:    addq %rsi, %rcx
+; X64-NEXT:    movabsq $1085102592571150095, %r9 # imm = 0xF0F0F0F0F0F0F0F
+; X64-NEXT:    andq %r9, %rcx
+; X64-NEXT:    movabsq $72340172838076673, %rdx # imm = 0x101010101010101
+; X64-NEXT:    imulq %rdx, %rcx
+; X64-NEXT:    shrq $56, %rcx
+; X64-NEXT:    movq %rdi, %rsi
+; X64-NEXT:    shrq %rsi
+; X64-NEXT:    andq %r8, %rsi
+; X64-NEXT:    subq %rsi, %rdi
+; X64-NEXT:    movq %rdi, %rsi
+; X64-NEXT:    andq %rax, %rsi
+; X64-NEXT:    shrq $2, %rdi
+; X64-NEXT:    andq %rax, %rdi
+; X64-NEXT:    addq %rsi, %rdi
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    shrq $4, %rax
+; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    andq %r9, %rax
+; X64-NEXT:    imulq %rdx, %rax
+; X64-NEXT:    shrq $56, %rax
+; X64-NEXT:    addq %rcx, %rax
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    retq
+;
+; X32-POPCNT-LABEL: cnt128_pgso:
+; X32-POPCNT:       # %bb.0:
+; X32-POPCNT-NEXT:    pushl %esi
+; X32-POPCNT-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
+; X32-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %edx
+; X32-POPCNT-NEXT:    addl %ecx, %edx
+; X32-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %ecx
+; X32-POPCNT-NEXT:    popcntl {{[0-9]+}}(%esp), %esi
+; X32-POPCNT-NEXT:    addl %ecx, %esi
+; X32-POPCNT-NEXT:    addl %edx, %esi
+; X32-POPCNT-NEXT:    xorl %ecx, %ecx
+; X32-POPCNT-NEXT:    movl %ecx, 12(%eax)
+; X32-POPCNT-NEXT:    movl %ecx, 8(%eax)
+; X32-POPCNT-NEXT:    movl %ecx, 4(%eax)
+; X32-POPCNT-NEXT:    movl %esi, (%eax)
+; X32-POPCNT-NEXT:    popl %esi
+; X32-POPCNT-NEXT:    retl $4
+;
+; X64-POPCNT-LABEL: cnt128_pgso:
+; X64-POPCNT:       # %bb.0:
+; X64-POPCNT-NEXT:    popcntq %rsi, %rcx
+; X64-POPCNT-NEXT:    popcntq %rdi, %rax
+; X64-POPCNT-NEXT:    addq %rcx, %rax
+; X64-POPCNT-NEXT:    xorl %edx, %edx
+; X64-POPCNT-NEXT:    retq
+;
+; X32-SSE2-LABEL: cnt128_pgso:
+; X32-SSE2:       # %bb.0:
+; X32-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X32-SSE2-NEXT:    psrlw $1, %xmm1
+; X32-SSE2-NEXT:    movdqa {{.*#+}} xmm2 = [85,85,85,85,85,85,85,85,85,85,85,85,85,85,85,85]
+; X32-SSE2-NEXT:    pand %xmm2, %xmm1
+; X32-SSE2-NEXT:    psubb %xmm1, %xmm0
+; X32-SSE2-NEXT:    movdqa {{.*#+}} xmm1 = [51,51,51,51,51,51,51,51,51,51,51,51,51,51,51,51]
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm3
+; X32-SSE2-NEXT:    pand %xmm1, %xmm3
+; X32-SSE2-NEXT:    psrlw $2, %xmm0
+; X32-SSE2-NEXT:    pand %xmm1, %xmm0
+; X32-SSE2-NEXT:    paddb %xmm3, %xmm0
+; X32-SSE2-NEXT:    movdqa %xmm0, %xmm3
+; X32-SSE2-NEXT:    psrlw $4, %xmm3
+; X32-SSE2-NEXT:    paddb %xmm0, %xmm3
+; X32-SSE2-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X32-SSE2-NEXT:    pand %xmm0, %xmm3
+; X32-SSE2-NEXT:    pxor %xmm4, %xmm4
+; X32-SSE2-NEXT:    psadbw %xmm4, %xmm3
+; X32-SSE2-NEXT:    movd %xmm3, %ecx
+; X32-SSE2-NEXT:    movq {{.*#+}} xmm3 = mem[0],zero
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm5
+; X32-SSE2-NEXT:    psrlw $1, %xmm5
+; X32-SSE2-NEXT:    pand %xmm2, %xmm5
+; X32-SSE2-NEXT:    psubb %xmm5, %xmm3
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm2
+; X32-SSE2-NEXT:    pand %xmm1, %xmm2
+; X32-SSE2-NEXT:    psrlw $2, %xmm3
+; X32-SSE2-NEXT:    pand %xmm1, %xmm3
+; X32-SSE2-NEXT:    paddb %xmm2, %xmm3
+; X32-SSE2-NEXT:    movdqa %xmm3, %xmm1
+; X32-SSE2-NEXT:    psrlw $4, %xmm1
+; X32-SSE2-NEXT:    paddb %xmm3, %xmm1
+; X32-SSE2-NEXT:    pand %xmm0, %xmm1
+; X32-SSE2-NEXT:    psadbw %xmm4, %xmm1
+; X32-SSE2-NEXT:    movd %xmm1, %edx
+; X32-SSE2-NEXT:    addl %ecx, %edx
+; X32-SSE2-NEXT:    xorl %ecx, %ecx
+; X32-SSE2-NEXT:    movl %ecx, 12(%eax)
+; X32-SSE2-NEXT:    movl %ecx, 8(%eax)
+; X32-SSE2-NEXT:    movl %ecx, 4(%eax)
+; X32-SSE2-NEXT:    movl %edx, (%eax)
+; X32-SSE2-NEXT:    retl $4
+;
+; X32-SSSE3-LABEL: cnt128_pgso:
+; X32-SSSE3:       # %bb.0:
+; X32-SSSE3-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-SSSE3-NEXT:    movdqa {{.*#+}} xmm0 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; X32-SSSE3-NEXT:    movq {{.*#+}} xmm1 = mem[0],zero
+; X32-SSSE3-NEXT:    movdqa %xmm1, %xmm2
+; X32-SSSE3-NEXT:    pand %xmm0, %xmm2
+; X32-SSSE3-NEXT:    movdqa {{.*#+}} xmm3 = [0,1,1,2,1,2,2,3,1,2,2,3,2,3,3,4]
+; X32-SSSE3-NEXT:    movdqa %xmm3, %xmm4
+; X32-SSSE3-NEXT:    pshufb %xmm2, %xmm4
+; X32-SSSE3-NEXT:    psrlw $4, %xmm1
+; X32-SSSE3-NEXT:    pand %xmm0, %xmm1
+; X32-SSSE3-NEXT:    movdqa %xmm3, %xmm2
+; X32-SSSE3-NEXT:    pshufb %xmm1, %xmm2
+; X32-SSSE3-NEXT:    paddb %xmm4, %xmm2
+; X32-SSSE3-NEXT:    pxor %xmm1, %xmm1
+; X32-SSSE3-NEXT:    psadbw %xmm1, %xmm2
+; X32-SSSE3-NEXT:    movd %xmm2, %ecx
+; X32-SSSE3-NEXT:    movq {{.*#+}} xmm2 = mem[0],zero
+; X32-SSSE3-NEXT:    movdqa %xmm2, %xmm4
+; X32-SSSE3-NEXT:    pand %xmm0, %xmm4
+; X32-SSSE3-NEXT:    movdqa %xmm3, %xmm5
+; X32-SSSE3-NEXT:    pshufb %xmm4, %xmm5
+; X32-SSSE3-NEXT:    psrlw $4, %xmm2
+; X32-SSSE3-NEXT:    pand %xmm0, %xmm2
+; X32-SSSE3-NEXT:    pshufb %xmm2, %xmm3
+; X32-SSSE3-NEXT:    paddb %xmm5, %xmm3
+; X32-SSSE3-NEXT:    psadbw %xmm1, %xmm3
+; X32-SSSE3-NEXT:    movd %xmm3, %edx
+; X32-SSSE3-NEXT:    addl %ecx, %edx
+; X32-SSSE3-NEXT:    xorl %ecx, %ecx
+; X32-SSSE3-NEXT:    movl %ecx, 12(%eax)
+; X32-SSSE3-NEXT:    movl %ecx, 8(%eax)
+; X32-SSSE3-NEXT:    movl %ecx, 4(%eax)
+; X32-SSSE3-NEXT:    movl %edx, (%eax)
+; X32-SSSE3-NEXT:    retl $4
+  %cnt = tail call i128 @llvm.ctpop.i128(i128 %x)
+  ret i128 %cnt
+}
+
 declare i8 @llvm.ctpop.i8(i8) nounwind readnone
 declare i16 @llvm.ctpop.i16(i16) nounwind readnone
 declare i32 @llvm.ctpop.i32(i32) nounwind readnone
 declare i64 @llvm.ctpop.i64(i64) nounwind readnone
 declare i128 @llvm.ctpop.i128(i128) nounwind readnone
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}
diff --git a/llvm/test/CodeGen/X86/pr27202.ll b/llvm/test/CodeGen/X86/pr27202.ll
index bb6be1d1685da..f3b319ead5982 100644
--- a/llvm/test/CodeGen/X86/pr27202.ll
+++ b/llvm/test/CodeGen/X86/pr27202.ll
@@ -14,6 +14,19 @@ define i1 @foo(i32 %i) optsize {
   ret i1 %cmp
 }
 
+define i1 @foo_pgso(i32 %i) !prof !14 {
+; CHECK-LABEL: foo_pgso:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl $305419896, %eax # imm = 0x12345678
+; CHECK-NEXT:    andl %eax, %edi
+; CHECK-NEXT:    cmpl %eax, %edi
+; CHECK-NEXT:    sete %al
+; CHECK-NEXT:    retq
+  %and = and i32 %i, 305419896
+  %cmp = icmp eq i32 %and, 305419896
+  ret i1 %cmp
+}
+
 ; 8-bit ALU immediates probably have small encodings.
 ; We do not want to hoist the constant into a register here.
 
@@ -52,3 +65,20 @@ define i64 @PR46237(i64 %x, i64 %y, i64 %z) optsize {
   %or4 = or i64 %or, %shl
   ret i64 %or4
 }
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}

From 2d3b8cc83fe85e5edcc607a0696d1e9f42ede246 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Mon, 13 Jul 2020 11:53:48 -0400
Subject: [PATCH 123/771] [libc++] Implement P0551

Make sure we satisfy the requirements added by P0551, and add tests to
enforce that.
---
 libcxx/include/ios                            |  48 ++---
 libcxx/include/ostream                        |   6 +-
 .../namespace/addressable_functions.sh.cpp    | 185 ++++++++++++++++++
 libcxx/www/cxx2a_status.html                  |   2 +-
 4 files changed, 213 insertions(+), 28 deletions(-)
 create mode 100644 libcxx/test/std/namespace/addressable_functions.sh.cpp

diff --git a/libcxx/include/ios b/libcxx/include/ios
index d6967edbccdb7..7f0e2d65e6406 100644
--- a/libcxx/include/ios
+++ b/libcxx/include/ios
@@ -843,7 +843,7 @@ basic_ios<_CharT, _Traits>::set_rdbuf(basic_streambuf<char_type, traits_type>* _
     ios_base::set_rdbuf(__sb);
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 boolalpha(ios_base& __str)
 {
@@ -851,7 +851,7 @@ boolalpha(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 noboolalpha(ios_base& __str)
 {
@@ -859,7 +859,7 @@ noboolalpha(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 showbase(ios_base& __str)
 {
@@ -867,7 +867,7 @@ showbase(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 noshowbase(ios_base& __str)
 {
@@ -875,7 +875,7 @@ noshowbase(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 showpoint(ios_base& __str)
 {
@@ -883,7 +883,7 @@ showpoint(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 noshowpoint(ios_base& __str)
 {
@@ -891,7 +891,7 @@ noshowpoint(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 showpos(ios_base& __str)
 {
@@ -899,7 +899,7 @@ showpos(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 noshowpos(ios_base& __str)
 {
@@ -907,7 +907,7 @@ noshowpos(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 skipws(ios_base& __str)
 {
@@ -915,7 +915,7 @@ skipws(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 noskipws(ios_base& __str)
 {
@@ -923,7 +923,7 @@ noskipws(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 uppercase(ios_base& __str)
 {
@@ -931,7 +931,7 @@ uppercase(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 nouppercase(ios_base& __str)
 {
@@ -939,7 +939,7 @@ nouppercase(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 unitbuf(ios_base& __str)
 {
@@ -947,7 +947,7 @@ unitbuf(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 nounitbuf(ios_base& __str)
 {
@@ -955,7 +955,7 @@ nounitbuf(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 internal(ios_base& __str)
 {
@@ -963,7 +963,7 @@ internal(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 left(ios_base& __str)
 {
@@ -971,7 +971,7 @@ left(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 right(ios_base& __str)
 {
@@ -979,7 +979,7 @@ right(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 dec(ios_base& __str)
 {
@@ -987,7 +987,7 @@ dec(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 hex(ios_base& __str)
 {
@@ -995,7 +995,7 @@ hex(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 oct(ios_base& __str)
 {
@@ -1003,7 +1003,7 @@ oct(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 fixed(ios_base& __str)
 {
@@ -1011,7 +1011,7 @@ fixed(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 scientific(ios_base& __str)
 {
@@ -1019,7 +1019,7 @@ scientific(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 hexfloat(ios_base& __str)
 {
@@ -1027,7 +1027,7 @@ hexfloat(ios_base& __str)
     return __str;
 }
 
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 ios_base&
 defaultfloat(ios_base& __str)
 {
diff --git a/libcxx/include/ostream b/libcxx/include/ostream
index ea3870532f329..697732d54e6d8 100644
--- a/libcxx/include/ostream
+++ b/libcxx/include/ostream
@@ -999,7 +999,7 @@ basic_ostream<_CharT, _Traits>::seekp(off_type __off, ios_base::seekdir __dir)
 }
 
 template <class _CharT, class _Traits>
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 basic_ostream<_CharT, _Traits>&
 endl(basic_ostream<_CharT, _Traits>& __os)
 {
@@ -1009,7 +1009,7 @@ endl(basic_ostream<_CharT, _Traits>& __os)
 }
 
 template <class _CharT, class _Traits>
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 basic_ostream<_CharT, _Traits>&
 ends(basic_ostream<_CharT, _Traits>& __os)
 {
@@ -1018,7 +1018,7 @@ ends(basic_ostream<_CharT, _Traits>& __os)
 }
 
 template <class _CharT, class _Traits>
-inline _LIBCPP_INLINE_VISIBILITY
+inline
 basic_ostream<_CharT, _Traits>&
 flush(basic_ostream<_CharT, _Traits>& __os)
 {
diff --git a/libcxx/test/std/namespace/addressable_functions.sh.cpp b/libcxx/test/std/namespace/addressable_functions.sh.cpp
new file mode 100644
index 0000000000000..fb731abf306ca
--- /dev/null
+++ b/libcxx/test/std/namespace/addressable_functions.sh.cpp
@@ -0,0 +1,185 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Make sure functions specified as being 'addressable' (their address can be
+// taken in a well-defined manner) are indeed addressable. This notion was
+// added by http://wg21.link/p0551. While it was technically only introduced
+// in C++20, we test it in all standard modes because it's basic QOI to provide
+// a consistent behavior for that across standard modes.
+
+// RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.tu1.o -DTU1
+// RUN: %{cxx} %{flags} %{compile_flags} -c %s -o %t.tu2.o -DTU2
+// RUN: %{cxx} %{flags} %{link_flags} %t.tu1.o %t.tu2.o -o %t.exe
+// RUN: %{exec} %t.exe
+
+#include <cassert>
+#include <iostream>
+#include <map>
+#include <string>
+#include <utility>
+
+
+typedef std::ios_base& (FormatFlagFunction)(std::ios_base&);
+typedef std::basic_ostream<char>& (OstreamManipFunction)(std::basic_ostream<char>&);
+typedef std::basic_ostream<wchar_t>& (WOstreamManipFunction)(std::basic_ostream<wchar_t>&);
+typedef std::basic_istream<char>& (IstreamManipFunction)(std::basic_istream<char>&);
+typedef std::basic_istream<wchar_t>& (WIstreamManipFunction)(std::basic_istream<wchar_t>&);
+
+extern FormatFlagFunction* get_formatflag_tu1(std::string);
+extern FormatFlagFunction* get_formatflag_tu2(std::string);
+
+extern OstreamManipFunction* get_ostreammanip_tu1(std::string);
+extern OstreamManipFunction* get_ostreammanip_tu2(std::string);
+extern WOstreamManipFunction* get_wostreammanip_tu1(std::string);
+extern WOstreamManipFunction* get_wostreammanip_tu2(std::string);
+
+extern IstreamManipFunction* get_istreammanip_tu1(std::string);
+extern IstreamManipFunction* get_istreammanip_tu2(std::string);
+extern WIstreamManipFunction* get_wistreammanip_tu1(std::string);
+extern WIstreamManipFunction* get_wistreammanip_tu2(std::string);
+
+#ifdef TU1
+FormatFlagFunction* get_formatflag_tu1(std::string func)
+#else
+FormatFlagFunction* get_formatflag_tu2(std::string func)
+#endif
+{
+    std::map<std::string, FormatFlagFunction*> all_funcs;
+
+    // [fmtflags.manip]
+    all_funcs.insert(std::make_pair("boolalpha", &std::boolalpha));
+    all_funcs.insert(std::make_pair("noboolalpha", &std::noboolalpha));
+    all_funcs.insert(std::make_pair("showbase", &std::showbase));
+    all_funcs.insert(std::make_pair("noshowbase", &std::noshowbase));
+    all_funcs.insert(std::make_pair("showpoint", &std::showpoint));
+    all_funcs.insert(std::make_pair("noshowpoint", &std::noshowpoint));
+    all_funcs.insert(std::make_pair("showpos", &std::showpos));
+    all_funcs.insert(std::make_pair("noshowpos", &std::noshowpos));
+    all_funcs.insert(std::make_pair("skipws", &std::skipws));
+    all_funcs.insert(std::make_pair("noskipws", &std::noskipws));
+    all_funcs.insert(std::make_pair("uppercase", &std::uppercase));
+    all_funcs.insert(std::make_pair("nouppercase", &std::nouppercase));
+    all_funcs.insert(std::make_pair("unitbuf", &std::unitbuf));
+    all_funcs.insert(std::make_pair("nounitbuf", &std::nounitbuf));
+
+    // [adjustfield.manip]
+    all_funcs.insert(std::make_pair("internal", &std::internal));
+    all_funcs.insert(std::make_pair("left", &std::left));
+    all_funcs.insert(std::make_pair("right", &std::right));
+
+    // [basefield.manip]
+    all_funcs.insert(std::make_pair("dec", &std::dec));
+    all_funcs.insert(std::make_pair("hex", &std::hex));
+    all_funcs.insert(std::make_pair("oct", &std::oct));
+
+    // [floatfield.manip]
+    all_funcs.insert(std::make_pair("fixed", &std::fixed));
+    all_funcs.insert(std::make_pair("scientific", &std::scientific));
+    all_funcs.insert(std::make_pair("hexfloat", &std::hexfloat));
+    all_funcs.insert(std::make_pair("defaultfloat", &std::defaultfloat));
+
+    return all_funcs.at(func);
+}
+
+// [ostream.manip] (char)
+#ifdef TU1
+OstreamManipFunction* get_ostreammanip_tu1(std::string func)
+#else
+OstreamManipFunction* get_ostreammanip_tu2(std::string func)
+#endif
+{
+    std::map<std::string, OstreamManipFunction*> all_funcs;
+    typedef std::char_traits<char> Traits;
+    all_funcs.insert(std::make_pair("endl", &std::endl<char, Traits>));
+    all_funcs.insert(std::make_pair("ends", &std::ends<char, Traits>));
+    all_funcs.insert(std::make_pair("flush", &std::flush<char, Traits>));
+    return all_funcs.at(func);
+}
+
+// [ostream.manip] (wchar_t)
+#ifdef TU1
+WOstreamManipFunction* get_wostreammanip_tu1(std::string func)
+#else
+WOstreamManipFunction* get_wostreammanip_tu2(std::string func)
+#endif
+{
+    std::map<std::string, WOstreamManipFunction*> all_funcs;
+    typedef std::char_traits<wchar_t> Traits;
+    all_funcs.insert(std::make_pair("endl", &std::endl<wchar_t, Traits>));
+    all_funcs.insert(std::make_pair("ends", &std::ends<wchar_t, Traits>));
+    all_funcs.insert(std::make_pair("flush", &std::flush<wchar_t, Traits>));
+    return all_funcs.at(func);
+}
+
+// [istream.manip] (char)
+#ifdef TU1
+IstreamManipFunction* get_istreammanip_tu1(std::string func)
+#else
+IstreamManipFunction* get_istreammanip_tu2(std::string func)
+#endif
+{
+    std::map<std::string, IstreamManipFunction*> all_funcs;
+    typedef std::char_traits<char> Traits;
+    all_funcs.insert(std::make_pair("ws", &std::ws<char, Traits>));
+    return all_funcs.at(func);
+}
+
+// [istream.manip] (wchar_t)
+#ifdef TU1
+WIstreamManipFunction* get_wistreammanip_tu1(std::string func)
+#else
+WIstreamManipFunction* get_wistreammanip_tu2(std::string func)
+#endif
+{
+    std::map<std::string, WIstreamManipFunction*> all_funcs;
+    typedef std::char_traits<wchar_t> Traits;
+    all_funcs.insert(std::make_pair("ws", &std::ws<wchar_t, Traits>));
+    return all_funcs.at(func);
+}
+
+
+#ifdef TU2
+    int main() {
+        assert(get_formatflag_tu1("boolalpha") == get_formatflag_tu2("boolalpha"));
+        assert(get_formatflag_tu1("noboolalpha") == get_formatflag_tu2("noboolalpha"));
+        assert(get_formatflag_tu1("showbase") == get_formatflag_tu2("showbase"));
+        assert(get_formatflag_tu1("noshowbase") == get_formatflag_tu2("noshowbase"));
+        assert(get_formatflag_tu1("showpoint") == get_formatflag_tu2("showpoint"));
+        assert(get_formatflag_tu1("noshowpoint") == get_formatflag_tu2("noshowpoint"));
+        assert(get_formatflag_tu1("showpos") == get_formatflag_tu2("showpos"));
+        assert(get_formatflag_tu1("noshowpos") == get_formatflag_tu2("noshowpos"));
+        assert(get_formatflag_tu1("skipws") == get_formatflag_tu2("skipws"));
+        assert(get_formatflag_tu1("noskipws") == get_formatflag_tu2("noskipws"));
+        assert(get_formatflag_tu1("uppercase") == get_formatflag_tu2("uppercase"));
+        assert(get_formatflag_tu1("nouppercase") == get_formatflag_tu2("nouppercase"));
+        assert(get_formatflag_tu1("unitbuf") == get_formatflag_tu2("unitbuf"));
+        assert(get_formatflag_tu1("nounitbuf") == get_formatflag_tu2("nounitbuf"));
+        assert(get_formatflag_tu1("internal") == get_formatflag_tu2("internal"));
+        assert(get_formatflag_tu1("left") == get_formatflag_tu2("left"));
+        assert(get_formatflag_tu1("right") == get_formatflag_tu2("right"));
+        assert(get_formatflag_tu1("dec") == get_formatflag_tu2("dec"));
+        assert(get_formatflag_tu1("hex") == get_formatflag_tu2("hex"));
+        assert(get_formatflag_tu1("oct") == get_formatflag_tu2("oct"));
+        assert(get_formatflag_tu1("fixed") == get_formatflag_tu2("fixed"));
+        assert(get_formatflag_tu1("scientific") == get_formatflag_tu2("scientific"));
+        assert(get_formatflag_tu1("hexfloat") == get_formatflag_tu2("hexfloat"));
+        assert(get_formatflag_tu1("defaultfloat") == get_formatflag_tu2("defaultfloat"));
+
+        assert(get_ostreammanip_tu1("endl") == get_ostreammanip_tu2("endl"));
+        assert(get_ostreammanip_tu1("ends") == get_ostreammanip_tu2("ends"));
+        assert(get_ostreammanip_tu1("flush") == get_ostreammanip_tu2("flush"));
+
+        assert(get_wostreammanip_tu1("endl") == get_wostreammanip_tu2("endl"));
+        assert(get_wostreammanip_tu1("ends") == get_wostreammanip_tu2("ends"));
+        assert(get_wostreammanip_tu1("flush") == get_wostreammanip_tu2("flush"));
+
+        assert(get_istreammanip_tu1("ws") == get_istreammanip_tu2("ws"));
+
+        assert(get_wistreammanip_tu1("ws") == get_wistreammanip_tu2("ws"));
+    }
+#endif
diff --git a/libcxx/www/cxx2a_status.html b/libcxx/www/cxx2a_status.html
index 6a2f2f44d1456..ad9bb36859cb3 100644
--- a/libcxx/www/cxx2a_status.html
+++ b/libcxx/www/cxx2a_status.html
@@ -73,7 +73,7 @@ <h3>Paper Status</h3>
 	<tr><td><a href="https://wg21.link/P0777R1">P0777R1</a></td><td>LWG</td><td>Treating Unnecessary <tt>decay</tt></td><td>Albuquerque</td><td>Complete</td><td>7.0</td></tr>
 	<tr><td><a href="https://wg21.link/P0122R7">P0122R7</a></td><td>LWG</td><td>&lt;span&gt;</td><td>Jacksonville</td><td>Complete</td><td>7.0</td></tr>
 	<tr><td><a href="https://wg21.link/P0355R7">P0355R7</a></td><td>LWG</td><td>Extending chrono to Calendars and Time Zones</td><td>Jacksonville</td><td><i>In progress</i></td><td></td></tr>
-	<tr><td><a href="https://wg21.link/P0551R3">P0551R3</a></td><td>LWG</td><td>Thou Shalt Not Specialize <tt>std</tt> Function Templates!</td><td>Jacksonville</td><td></td><td></td></tr>
+	<tr><td><a href="https://wg21.link/P0551R3">P0551R3</a></td><td>LWG</td><td>Thou Shalt Not Specialize <tt>std</tt> Function Templates!</td><td>Jacksonville</td><td>Complete</td><td>11.0</td></tr>
 	<tr><td><a href="https://wg21.link/P0753R2">P0753R2</a></td><td>LWG</td><td>Manipulators for C++ Synchronized Buffered Ostream</td><td>Jacksonville</td><td></td><td></td></tr>
 	<tr><td><a href="https://wg21.link/P0754R2">P0754R2</a></td><td>LWG</td><td>&lt;version&gt;</td><td>Jacksonville</td><td>Complete</td><td>7.0</td></tr>
 	<tr><td><a href="https://wg21.link/P0809R0">P0809R0</a></td><td>LWG</td><td>Comparing Unordered Containers</td><td>Jacksonville</td><td></td><td></td></tr>

From db091e12b2358255d249c9f3f211f4474e2c723c Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 13 Jul 2020 10:39:13 -0400
Subject: [PATCH 124/771] RISCV: Avoid GlobalISel build break in a future patch

The GlobalISelEmitter is stricter about matching timm instruction
outputs to timm inputs (although in an accidental sort of way that
doesn't hit a proper import failure error). Also, apparently no
intrinsic patterns were importing since the ID enum declaration was
missing.
---
 llvm/lib/Target/RISCV/RISCVInstrInfoA.td           | 8 ++++----
 llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
index de73c8df93679..7fce37519b93e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoA.td
@@ -235,13 +235,13 @@ class PseudoMaskedAMOUMinUMax
 
 class PseudoMaskedAMOPat<Intrinsic intrin, Pseudo AMOInst>
     : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering),
-          (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, imm:$ordering)>;
+          (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, timm:$ordering)>;
 
 class PseudoMaskedAMOMinMaxPat<Intrinsic intrin, Pseudo AMOInst>
     : Pat<(intrin GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
            timm:$ordering),
           (AMOInst GPR:$addr, GPR:$incr, GPR:$mask, GPR:$shiftamt,
-           imm:$ordering)>;
+           timm:$ordering)>;
 
 def PseudoMaskedAtomicSwap32 : PseudoMaskedAMO;
 def : PseudoMaskedAMOPat<int_riscv_masked_atomicrmw_xchg_i32,
@@ -310,7 +310,7 @@ def PseudoMaskedCmpXchg32
 def : Pat<(int_riscv_masked_cmpxchg_i32
             GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
           (PseudoMaskedCmpXchg32
-            GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
+            GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
 
 } // Predicates = [HasStdExtA]
 
@@ -387,5 +387,5 @@ defm : PseudoCmpXchgPat<"atomic_cmp_swap_64", PseudoCmpXchg64>;
 def : Pat<(int_riscv_masked_cmpxchg_i64
             GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering),
           (PseudoMaskedCmpXchg32
-            GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, imm:$ordering)>;
+            GPR:$addr, GPR:$cmpval, GPR:$newval, GPR:$mask, timm:$ordering)>;
 } // Predicates = [HasStdExtA, IsRV64]
diff --git a/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp b/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp
index 5bd09a546114f..4d1f47da209d0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstructionSelector.cpp
@@ -16,6 +16,7 @@
 #include "RISCVTargetMachine.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelector.h"
 #include "llvm/CodeGen/GlobalISel/InstructionSelectorImpl.h"
+#include "llvm/IR/IntrinsicsRISCV.h"
 #include "llvm/Support/Debug.h"
 
 #define DEBUG_TYPE "riscv-isel"

From 2e2af6026b43511f8681397a9b6f13525dda970f Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 13 Jul 2020 13:14:29 -0400
Subject: [PATCH 125/771] TableGen/GlobalISel: Emit enum names for reg class ID
 instead of value

This was emitting the raw value for the reg class ID with a comment
for the actual class name. Switch to emitting the qualified enum name
instead, which obviates the need for the comment and also helps keep
the lit tests on the emitter output more stable.
---
 llvm/test/TableGen/GlobalISelEmitter.td       |  2 +-
 .../TableGen/GlobalISelEmitterRegSequence.td  | 12 ++---
 llvm/test/TableGen/GlobalISelEmitterSubreg.td | 44 +++++++++----------
 llvm/utils/TableGen/GlobalISelEmitter.cpp     |  4 +-
 4 files changed, 31 insertions(+), 31 deletions(-)

diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td
index 6eb84925db790..5c276e7a56d3c 100644
--- a/llvm/test/TableGen/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter.td
@@ -1120,7 +1120,7 @@ def MUL : I<(outs GPR32:$dst), (ins GPR32:$src2, GPR32:$src1),
 // NOOPT-NEXT:    GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/MyTarget::FPR32RegClassID,
 // NOOPT-NEXT:    // (bitconvert:{ *:[i32] } FPR32:{ *:[f32] }:$src1) => (COPY_TO_REGCLASS:{ *:[i32] } FPR32:{ *:[f32] }:$src1, GPR32:{ *:[i32] })
 // NOOPT-NEXT:    GIR_MutateOpcode, /*InsnID*/0, /*RecycleInsnID*/0, /*Opcode*/TargetOpcode::COPY,
-// NOOPT-NEXT:    GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC GPR32*/1,
+// NOOPT-NEXT:    GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, MyTarget::GPR32RegClassID,
 // NOOPT-NEXT:    // GIR_Coverage, 25,
 // NOOPT-NEXT:    GIR_Done,
 // NOOPT-NEXT:  // Label [[LABEL_NUM]]: @[[LABEL]]
diff --git a/llvm/test/TableGen/GlobalISelEmitterRegSequence.td b/llvm/test/TableGen/GlobalISelEmitterRegSequence.td
index 6556bc3cdf29d..1b7391497f125 100644
--- a/llvm/test/TableGen/GlobalISelEmitterRegSequence.td
+++ b/llvm/test/TableGen/GlobalISelEmitterRegSequence.td
@@ -56,9 +56,9 @@ def SUBSOME_INSN : I<(outs SRegs:$dst), (ins SOP:$src), []>;
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/1, /*TempRegFlags*/0,
 // CHECK-NEXT: GIR_AddImm, /*InsnID*/0, /*SubRegIndex*/2,
 // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC SRegs*/0,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/3, /*RC SRegs*/0,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::DRegsRegClassID,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::SRegsRegClassID,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/3, Test::SRegsRegClassID,
 def : Pat<(i32 (sext SOP:$src)),
           (REG_SEQUENCE DRegs, (SUBSOME_INSN SOP:$src), sub0,
                                (SUBSOME_INSN SOP:$src), sub1)>;
@@ -71,9 +71,9 @@ def : Pat<(i32 (sext SOP:$src)),
 // CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*SubRegIndex*/1,
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/2, /*TempRegFlags*/0,
 // CHECK-NEXT: GIR_AddImm, /*InsnID*/1, /*SubRegIndex*/2,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC DRegs*/1,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC SRegs*/0,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/3, /*RC SRegs*/0,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, Test::DRegsRegClassID,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, Test::SRegsRegClassID,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/3, Test::SRegsRegClassID,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SOME_INSN,
 // Make sure operands are constrained when REG_SEQUENCE isn't the root instruction.
 def : Pat<(i32 (zext SOP:$src)),
diff --git a/llvm/test/TableGen/GlobalISelEmitterSubreg.td b/llvm/test/TableGen/GlobalISelEmitterSubreg.td
index aae996e8e2242..e8dc4a9ac4a07 100644
--- a/llvm/test/TableGen/GlobalISelEmitterSubreg.td
+++ b/llvm/test/TableGen/GlobalISelEmitterSubreg.td
@@ -57,9 +57,9 @@ def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), SOP:$src
 // CHECK-NEXT:            GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, // src
 // CHECK-NEXT:            GIR_AddImm, /*InsnID*/0, /*Imm*/1,
 // CHECK-NEXT:            GIR_EraseFromParent, /*InsnID*/0,
-// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1,
-// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC DRegs*/1,
-// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, /*RC SRegs*/0,
+// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::DRegsRegClassID,
+// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::DRegsRegClassID
+// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, Test::SRegsRegClassID,
 
 
 // Test that we can import INSERT_SUBREG when it is a subinstruction of another
@@ -76,9 +76,9 @@ def : Pat<(i32 (anyext i16:$src)), (SOME_INSN (INSERT_SUBREG (i32 (IMPLICIT_DEF)
 // CHECK-NEXT:            GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/0,
 // CHECK-NEXT:            GIR_Copy, /*NewInsnID*/1, /*OldInsnID*/0, /*OpIdx*/1, // src
 // CHECK-NEXT:            GIR_AddImm, /*InsnID*/1, /*Imm*/1,
-// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC DRegs*/1,
-// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC DRegs*/1,
-// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/2, /*RC SRegs*/0,
+// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, Test::DRegsRegClassID,
+// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, Test::DRegsRegClassID,
+// CHECK-NEXT:            GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/2, Test::SRegsRegClassID,
 // CHECK-NEXT:            GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SOME_INSN,
 // CHECK-NEXT:            GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst
 // CHECK-NEXT:            GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0,
@@ -92,9 +92,9 @@ def : Pat<(i32 (anyext i16:$src)), (SOME_INSN (INSERT_SUBREG (i32 (IMPLICIT_DEF)
 def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (COPY_TO_REGCLASS SOP:$src, ERegs)), SOP:$src, sub0)>;
 // CHECK-LABEL:  (anyext:{ *:[i32] } i16:{ *:[i16] }:$src)  =>  (INSERT_SUBREG:{ *:[i32] } (COPY_TO_REGCLASS:{ *:[i32] } SOP:{ *:[i16] }:$src, ERegs:{ *:[i32] }), SOP:{ *:[i16] }:$src, sub0:{ *:[i32] })
 // CHECK:                GIR_BuildMI, /*InsnID*/0, /*Opcode*/TargetOpcode::INSERT_SUBREG,
-// CHECK-DAG:            GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC ERegs*/2,
-// CHECK-NEXT:           GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC ERegs*/2,
-// CHECK-NEXT:           GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, /*RC SRegs*/0,
+// CHECK-DAG:            GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::ERegsRegClassID,
+// CHECK-NEXT:           GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::ERegsRegClassID,
+// CHECK-NEXT:           GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, Test::SRegsRegClassID,
 
 // Test that we can import INSERT_SUBREG when its subregister source is defined
 // by a subinstruction.
@@ -115,9 +115,9 @@ def : Pat<(i32 (anyext i16:$src)), (INSERT_SUBREG (i32 (IMPLICIT_DEF)), (SUBSOME
 // CHECK-NEXT:          GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/1, /*TempRegFlags*/0,
 // CHECK-NEXT:          GIR_AddImm, /*InsnID*/0, /*Imm*/1,
 // CHECK-NEXT:          GIR_EraseFromParent, /*InsnID*/0,
-// CHECK-NEXT:          GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1,
-// CHECK-NEXT:          GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC DRegs*/1,
-// CHECK-NEXT:          GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, /*RC SRegs*/0,
+// CHECK-NEXT:          GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::DRegsRegClassID,
+// CHECK-NEXT:          GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::DRegsRegClassID,
+// CHECK-NEXT:          GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, Test::SRegsRegClassID,
 
 // Test an EXTRACT_SUBREG that is a sub instruction. The individual
 // operands should be constrained to specific register classes, and
@@ -129,8 +129,8 @@ def : Pat<(i16 (trunc (not DOP:$src))),
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::COPY,
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/RegState::Define,
 // CHECK-NEXT: GIR_CopySubReg, /*NewInsnID*/1, /*OldInsnID*/1, /*OpIdx*/1, /*SubRegIdx*/1, // src
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC SRegs*/0,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC DRegs*/1,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, Test::SRegsRegClassID,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, Test::DRegsRegClassID,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SUBSOME_INSN,
 
 // Test an extract from an output instruction result (nonleaf)
@@ -150,8 +150,8 @@ def : Pat<(i16 (trunc (bitreverse DOP:$src))),
 // CHECK-NEXT:  GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst
 // CHECK-NEXT:  GIR_AddTempSubRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0, sub0,
 // CHECK-NEXT:  GIR_EraseFromParent, /*InsnID*/0,
-// CHECK-NEXT:  GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC SRegs*/0,
-// CHECK-NEXT:  GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC DRegs*/1,
+// CHECK-NEXT:  GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::SRegsRegClassID,
+// CHECK-NEXT:  GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::DRegsRegClassID,
 
 // EXTRACT_SUBREG is subinstruction, but also doesn't have a leaf input
 
@@ -169,8 +169,8 @@ def : Pat<(i16 (trunc (bitreverse DOP:$src))),
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/1, /*Opcode*/TargetOpcode::COPY,
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/1, /*TempRegID*/0, /*TempRegFlags*/RegState::Define,
 // CHECK-NEXT: GIR_AddTempSubRegister, /*InsnID*/1, /*TempRegID*/1, /*TempRegFlags*/0, sub0,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, /*RC SRegs*/0,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, /*RC DRegs*/1,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/0, Test::SRegsRegClassID,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/1, /*Op*/1, Test::DRegsRegClassID,
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SUBSOME_INSN2,
 // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst
 // CHECK-NEXT: GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0,
@@ -187,8 +187,8 @@ def : Pat<(i16 (trunc DOP:$src)),
 // CHECK-NEXT: GIR_Copy, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/0, // dst
 // CHECK-NEXT: GIR_CopySubReg, /*NewInsnID*/0, /*OldInsnID*/0, /*OpIdx*/1, /*SubRegIdx*/1, // src
 // CHECK-NEXT: GIR_EraseFromParent, /*InsnID*/0,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC SRegs*/0,
-// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, /*RC DRegs*/1,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::SRegsRegClassID,
+// CHECK-NEXT: GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/1, Test::DRegsRegClassID,
 
 
 // Test that we can import SUBREG_TO_REG
@@ -206,5 +206,5 @@ def : Pat<(i32 (zext SOP:$src)),
 // CHECK-NEXT:        GIR_AddTempRegister, /*InsnID*/0, /*TempRegID*/0, /*TempRegFlags*/0,
 // CHECK-NEXT:        GIR_AddImm, /*InsnID*/0, /*Imm*/1,
 // CHECK-NEXT:        GIR_EraseFromParent, /*InsnID*/0,
-// CHECK-NEXT:        GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, /*RC DRegs*/1,
-// CHECK-NEXT:        GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, /*RC SRegs*/0,
+// CHECK-NEXT:        GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/0, Test::DRegsRegClassID,
+// CHECK-NEXT:        GIR_ConstrainOperandRC, /*InsnID*/0, /*Op*/2, Test::SRegsRegClassID,
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 026f9ad349444..808ab83fd9b7a 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -2960,8 +2960,8 @@ class ConstrainOperandToRegClassAction : public MatchAction {
     Table << MatchTable::Opcode("GIR_ConstrainOperandRC")
           << MatchTable::Comment("InsnID") << MatchTable::IntValue(InsnID)
           << MatchTable::Comment("Op") << MatchTable::IntValue(OpIdx)
-          << MatchTable::Comment("RC " + RC.getName())
-          << MatchTable::IntValue(RC.EnumValue) << MatchTable::LineBreak;
+          << MatchTable::NamedValue(RC.getQualifiedName() + "RegClassID")
+          << MatchTable::LineBreak;
   }
 };
 

From acabaf600b7a28079721dd934d448aecc7c41cad Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Mon, 13 Jul 2020 10:59:55 -0700
Subject: [PATCH 126/771] [llvm][NFC] ML Policies: changed the saved_model
 protobuf to text

Also compacted the checkpoints (variables) to one file (plus the index).

This reduces the binary model files to just the variables and their
index. The index is very small. The variables are serialized float
arrays. When updated through training, the changes are very likely
unlocalized, so there's very little value in them being anything else
than binary.
---
 .../Analysis/models/inliner/saved_model.pb    |   Bin 235687 -> 0 bytes
 .../Analysis/models/inliner/saved_model.pbtxt | 32634 ++++++++++++++++
 ...of-00002 => variables.data-00000-of-00001} |   Bin 30496 -> 39110 bytes
 .../variables/variables.data-00000-of-00002   |   Bin 7051 -> 0 bytes
 .../models/inliner/variables/variables.index  |   Bin 382 -> 377 bytes
 5 files changed, 32634 insertions(+)
 delete mode 100644 llvm/lib/Analysis/models/inliner/saved_model.pb
 create mode 100644 llvm/lib/Analysis/models/inliner/saved_model.pbtxt
 rename llvm/lib/Analysis/models/inliner/variables/{variables.data-00001-of-00002 => variables.data-00000-of-00001} (77%)
 delete mode 100644 llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00002

diff --git a/llvm/lib/Analysis/models/inliner/saved_model.pb b/llvm/lib/Analysis/models/inliner/saved_model.pb
deleted file mode 100644
index 5488989454f72d8956bba4d298ef9a1ca7047b4e..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 235687
zcmeEv4SXC$-Tz*iCYSB|_AP7+J<^t@p>6K=F0Z^LX>w^xTS{9>TV5`g+_gP=xrDn*
z-w-`s1Vq$|h=>T`MIJ=dBB-c{5fShWQ4!Hcd8(qKBH|l=eE)xEX7_IQ?&fwcx%34#
z`FwA7XJ>x%o8QdN%+7aq*6EP$d~mAx`b6o+6UAn6;>v+oA}LQ3D+Y#=gG0%hiJ|2F
z!EntqapIm}bSNxUB*U@9K)hy($gW7`<hocPmaiW;XHa~|Ax_&6j3*<>$UrO{S{aN+
zrAcCWFupUPOxPTW$x}7SO8I6aQ8QB{-jW!NB$Bm6)Y4i<%`A~#%<`=g9Xq7*9Ye7`
z1gPj6i0z2%lqwR*cqFz{E|Y{BS)9y#1B3B_<bdJYJaI~RU$}25IS>z!Wb_xsmExog
z(O_SA*FZEBj_2mym|3JgF|jKM8O)K(Tg$2)Vw*U1V>lSv5{yTJz0vSFgVJQt84o80
zhT?tU>RHG+GofZuqtNRd4)HOEICoPrm<;b2iXL}HBpJBbAx=X6R|R7sVgT7-fSS2t
z6^bVrjKsomz4>cS5GTQ&@o*>*3-%jICaap&z=nFu*DMsLp@t3YMbuDuPoyuLkS5cb
z#A?M-xl9zGSk1&lIKC%bSK(RVX;|S_Du-gbV*`6*%?|1AhbqNgqL_?Btw16f9+dPs
zsGB2B4X`^QKod?#<ziW@P&Y}d{$=hea>BGCSLPWrGg;Ef7K7qRnUv~Db)i4HI+In0
zWL<1a%82(m#Kq^X4=48y#CO+s#rg(9xIMJ<5N_UZ{qAr)7RG(4D=I6p9d$FcjEb#J
zsmxgegz6fN8D1h;OJ2OgA+}gj)RLX7C0ch<PI*oi^_kI?t3$lqAucr0us0G+jKYv;
zPFanqXf3Bk^cjt~ShA9^_#ubbWI;ho2DFl3ot$Gnr>JGYV7mJq;*!Fe1w19Ihas4-
zwyxP|!ljZ`1;o1@;%OF@wPZ@G2^Xl1EK3HH-er1Y!=$CDkF`bL)N0b=M@(;OHKZj&
zn%vZ?r~<XJR^L!N#Ou+)rVE<3g#o2xwXhbou&l1uSPM^(cnOMEImA;r%2+ZEPjG=+
z#Z)xZsw*Ah>>MphQ;W*#PB9u|u9QD1@iK8~UL7piA-~K5m7>g8DJHX_tHcJdFE*)T
zV2KTdYo^q*kwxpu4d=DGt@M+XbsvThHe7yRQ^yTW&Pqw>cxRJ1B+hg?q-lX<q(4l@
z8iB-MxKEtrER&`OFfO<x+=t=me&##VIRQcU#)E@{7~KpGL?eCs#i`D6!+2t*bD}h@
ze;^c&2KK1q&IAHgNMa(gGZst^Vf-mpi_@UNq=8;C2nv#cxi}FX4lnnLM?4KuoRS#0
zG!SD$37+azc~tL`R*F-%v~OJ9-m$hTu%&(Nd0l#~<;A)&(2DlWn>ViRIB#>;rg~!(
zu?}jAlEV#WlH~BfnJhUJI8%u36P;5fhY!v)$<ZJR_`ks+&T&qcq(CHwCzk+wP+`WL
z?3^Jv17R{63dcpsDM?e2HCjZ0XmCILrZ{IxRT&$e;G8ARj)!*+MPb$h)M$a|Kq3Ki
z&UMa~X3`vxVZP4pQ=D_8DvYgyi9m2LQoy)7rErdYOqx2im6Dfb6?VP5zJLY8QeRF&
zR{HteWITnEiAXzhlCjzc<|d^SN@^6QQ+tWrguI0k+9sW!lP6w_n=9H^Nc8#Ac{!r<
zH)6Sh8wv>)#GjH|U~b-9V6L-Tnu!PFFrJO^oRq**0q*TYQas6df+PoaM1#q!tMtZ5
zi1VDXG&k!~-#~vad5jZf=R9c+TBk%(CsvI(#W~-2OP$ESrB34AQVaCA6du%Y*H^IT
z7tx7ZznpYcxOrX1luqWSRKrVYp+2SJ+MZ%Uf6^h&a4wRjP$LEU!<fG4BRMcMKMS0T
zrFkqw{zhn$^AxF4i!3^qNENEl3p8BwoVC*2*g%YQHIZ0W<Hc>&NXxnf&FoUv64$Yo
z_*B*sFJmq7a@G>BAT9AkXT9WTgyWVR=mwo0$$@Dlry@C8#0sZZD)YGE`rvxRjmUR{
zvE&;0CD+6&xn_OI%|>D^OxISX>uE&S`S9j<CW6UzT4<-|7Mh`gFL0h=R%mDH3+*gj
zp>c1GdCs;%g~qvWFi%3dN78kod-R|Pvwdie+H-ScxRDk(JIr#lQlFzvU5>c7;5=tn
zp&W7UPf`t8E@rTNNi1Ka3i*vysGDDfR`IIPYJC+VGS?Vop3RrJmM3#vw#@ZLndk6j
zZs5s0H(TaLqs&cwnVWes&&!s%#VGT9zRayWncK2uZa2!jfG_hxp3IA~WnOHQ8Q{z8
z;mHhU%j`AE?BmM}@nnXxW$rM_+{u@@izhRZE%Oqi%-wvMQJ&2HY?(2m%mKd4L7vP@
zvt`DOG8256NuJChy-d~)(AH~DZtG>}Ocprznzdg0^sU!^UF*f|hUPgB6l%RVeH3X?
zE;Cx<ZG1~y&a=eZ^_JL;!H}pM3{6T8hTfr1Np_|jyrim~S2*&gHPv~gL!Vd_iA~le
zHbqTrs`Dy@%RD#9&Z`Tg_D*hU*BDY`g}ev7*LaLNDlnie^C%dFw7~VkZH4QDdm3E!
zEXg#`em~>DGvl?6-1=tdd0fux9EB^~WaqmanHo3AdA&nd<+$C}eCN9zh3XuqQ)9|g
z)o;k7x}k$}Ip1Td`g;wkzfZ3^w|AWHJXlb5PT#3h{r!1VH}s<}=Z&VSf54#nO?uV2
zo$!3;2Mems>7sS2-<(HvLwD_Re#lhyTMVjySg$&_ub=O{wV>*pk$_J1kK|F^Fe-34
zZ!=Z>qXyM)*Q?GQQp|VWQBZZxfJ3MHa30kSgAbSUPE*zIGN^vHUUlxsWxn&Cf~s@I
zH9FNlmPd8N7{}$j*HrcYF{pl@UUlvOX}<I01y$z^pLD9<pGS4WP|D@}gsJKe7*zkH
zUUlv`Y`*hT1y$#a(sZhSI*;mx5u3|-$W-+Q4XS@euR3?QH{bbCLDe~fL7nOk=TY4-
zFmyRTYpVL^461)#uR3>BIp6t6LDe~9PMzvs$fLSp{ONLj(Ny&>8B~8%uR3?|I^X%_
zf~s?dwmQ`x%cHtsnCo(W#Z>hXgX&+^tIi!G&v!mvP<74-nyH@or9*<R<;jF$gz9oW
zVVa4r8#3{vJ`>#G?R@7`1v9}J{F3<#{rnA^3()&KOa^IB=gF2~IP7wM!!%ppG-T^r
znQRprOV4+HyFk7QjI@z4@ET;Q2E2wgRipR$EXmk+ax#`Z^maL)G0oX`4LN&OpELfr
zeZKR11#`w5)n{tlv2?UcToCLdBv_yiONYYYKyNS+=?nBm2l{pg`UZw#NlCvLtN@Sb
z(DR5@by;gV9AZ*D)tC}iKZWSRI3&D3z?P$d+oMTi6^U*M+89#3P@HK@bqKUy$)sml
zFMh<V7f0C~dF#akCAD5aoTDq7L?jsw><UMA?kZW8yGWd+B>+k$0D33VkM)G5s#{yJ
z+>YHlp-5u%_lhI;UfC|rP$h>$JHw^D4|~M9s_aNC8i^6&D-_-l9Ez6A>>2H2Aec+5
zeWuRDa1SJdG0=3uc(A`@xAYcqifW#CcyBNs8dYV<Q>5F(>8f0^tdcFd9ZefIA-A@?
zR>5B=%UeM!n>G7#F|D<nuaPw-k*94mCSlT~8Iw?3BAL^G$a(L3Is0~%bMG>lcXOn}
zX6kf0^{lEW8*7l9Q}D9I<qeh=Cs<xuV3Me|Fn~+z=O^XsTx`ku3F2I3c9r_)Ag*J=
zEc#bBMP0u>QJkPSs_IS@=b}x(U;}OEo^YT)7z^$U#{-d2Lc;pwf_;6hC<o$LCL9=u
zVaTAZCTA;L5)vk5<%((3XGk+=)h*9j{C=V|PplLttlV(kbO$-;CI`yII){6W(CnyN
zK^D_5-MB<t9DuYP;W!q@U{R8`YDisp6c6|sJuPmfQT%b9h5gwp{p+fjWk{w()db|&
zB>%30c_aeHGjCq6cuSdh0%zTQz$0-R6#dfd$t>rzz-JX%{=7Bj3k%u>Ny(1!HI1)X
zJV%_G2<|~;-4Tg~>FRt#6V6jzx5Tj0usmM8b|T-_3g6aQTF55L=;hI`ESH<oOA+(0
zo+m3#@-0l1$FhdlOyL5nX60g5k;>K=TFPtzWZf)QwWo-al&Z=q2mPxwR`E)+Tbx%<
zN`C7vCC%ERjWwc6->i`pFMW8@B2}5TXda(ee0>7HCVH*cL|rTJrt0O?7qjqsgyO7Z
z{aRMTcI=e;GE(YGHZB$y<|}pdnpjq=7JO5q^zm<1h<8pD1D3izUo+lc)7}@tTU>ah
zDT0;bc)@LdbroJMy34`sA093n#VA!vs_uB{QGCSq^&6Lqr|NA_-qNF!EPEr#UAkcf
z9ZvY%3I<P{9#_6xbgot{ETxZR7bkctdpoPuEmvR62~t-nIyOt>MJ-M<q+lGSC0TP~
zrhugxnr}Lly1C*Mb$zk=J{aD7B(MJ|3O7gGwxo5wQ;@1fj#lCq%fw==s<g;|8^1x?
zteiK0$iS@R?f1!~HqGBCi`98+Q<I`7GjiTAt(%cuD1!Kxzm<#kO%&(o#i);?Enbf_
zMXXA%!mq|cdvQ|wG~lVmB7MDwlI`KGt}=%3WqUMado*TyG-Z1<XM423W4bt5r<gm(
z$CKlu<oFP6jdF;(?cy1is%!h%!g$P!Xy@$J7|a~?4@K$Yhql0DU@-j*QawxR5YIFb
zLB>kKP^f^6mEu_@GCX)-GEt;cY%>*Nu8LQzPfJl>4l+;LC%SVYnI@Ifi^yMdh^^dl
zXEfN~8w%Ek_YLC3+CV7MpQ!Jm$DQO5=jyvEy!K5$Pdx!IsH*R#NFCGhj)~AJ{#W~R
zD5sA_4f*d5v2k>{ki<-mWZk{!l~;~VATdj()lT;S{P#Z{czw&EG><j|@)alRUSi6A
za!^aKW2kR;I2k#B6@uwQ`K-}pu=vc?J-9#=d%<vo@J<Wfl)h_$c*la8+ZG5y>MR?!
z4eUB#*8#f@*mc0J19ly->wsMc>^flA0e&55YZC+;N6rQ**!!)7VdQr#7(sk1IE-+G
zAhZj@XfPe|^l;&vzzwtk8-N}iQaHX2xCM9wcmbH-p`2R8tb#n^5Eq%vCaer5l2SSO
zl@+tF=}C&rWCM8p&P0+-r#lG8zbW2Fjwck@bcRi=oMKHg85;;?C`!&WN-{na4n_9V
z?=NEB%Tb3H!`YrJlRHFg!y?p`3nTkGtr$3+p8QgwZLQQv9yPR7iS6B@osm6qt(log
zVYKcCg<)DCX-zT(qE2l&OiN&CfgQPws>}CVs7ISFjiVlsUmXEmxWHNuU;`b?`dX+l
zH1+2S?b4=Z4D;C1gmpvFqEsy^u=RX_r8KJ$P*?t#etRg6U9lv#FPN+k?u#UfxD`yM
zfWbML>8DFqexgh~#aejDxsvL;MJdX%=JyY&2K4^nuMEr715ycPpEZrCWevC5da0~>
zg4ME~pwwEmjr^2DJl#YotN}R^6u_BdQ$+r+Lu?*xPIxqcPwpD6$Wamjt)tI`eC^4m
zw-CxQ-MQ+?fcDX5(@;wp+ik*i@aX^yc<evh0=5Ng3)mL0Enr)~wt#H`+XA))Yzy!$
z(1uGkY#WrI4zxiALFx!$1nG<?4nw|x>3ls1HQ>^r4aXXKa7=Iz7zPL(@EWkFLouJ6
z)6Xo(PdZE|pb%b137Acc>7{3;97anDm`{(X+R6A>h}o<pf~Kd(i<k}N5(Ab@ola4H
z#hkdvXqX`f>dYs6()56Z!b$6sDICia?&efeCIj*zOEpVJ5-c~*sH%K%G=Zf$M2P}y
zj?I4~m`xBPgK4UuIECfnf?J(Jb|`p5*W7Z1F!;Dbw4j4->b{7Z#Dp@aFEVBM*)q|L
z6+=-lU3Ij9h5YI2kZj@JN>c}Bq$8^tlsqt7;-1!g%=AH>bt_pS5R_wuKq#b$B?5sb
zfJY!Sm=g#*k>f`o@KUryASk0G5O|5^bQo1;j541<;H{ey$Uw-3Z3DXw*md9-t^;k*
z&4z6QeH}=lPS~(!gJIa}&4dx;=V;+D(mt9HF!5AMkcqGHfD3X*12^IogEoY>f(-~)
zEP4>97z`pj4=G&B2eQW43<nV=8)RK_HiqGsjr-uo!y#PLgG_8{7{RfIr*WJQ&%$5B
z3pm!}FwO~H&cbUrCwL<ZLI>(KP_Ba$$9hc0Il*)lv*2o|#xcQs6)w00i*%^P@iG-|
zxCA~G&2TlGj$?v0pi{*vxCCo~4Zvm<Tj5>^^Z+3hyWmEFK_IE(ZEy)vz?Hz&z%?qa
zhkFC?Uf>{bBXARNGjJ<#8*n=?4BQ3W1KbPT4?F-I0zLzL4tNB36c_;>2c86;2EGYA
z1AHHN9{36HBJc~~*T5^lpMlqbe*wZuK_~+z0+m1&Fd3KzNWd&$E+7LZ0w)6tfm48`
zz;ZwVnt(HacAyJb18e}!1GWPJAOu8!0bmF?uu@2+;9dpy8sK^yzZbX>xEZ(=es=)(
z;P`&Hp8_5NzJT+u0AB~bgYzE%KL%dJ`EP*NfY*V4uM|ccoq|x&DYQ)jrgsXd*}%L`
zVPqk&6mY}83GQh?C$60hcN6@2a2$cVALmzh3Wu-n6ox<0DfHaYDY!lfd>+S7!~I#O
za8T$Hrcdb--1S|;$kr~wd3~3#;oDup8<+WoWuNy8!h~)s1cGj6!4iR>6h$EDq-Ula
z#*si^+F2qHlp{_c=u)&qAf&?=K_D;#(Dl)#1VUPV{r*hhl#)P5Ct!&{Q0j{z5Yn<s
zP9QKnWGe)MQUrluP}rP6Fa$3tfk2hEL?9^Yq<o3rIj}$=7*e)GAZRg)ArK5UHYX5R
z#h~|=DS<$ZY>hzBG(Qpqf|iyg0>OK%5D0}7u|y#71n>xiMsoszCvyA<1YU}k2n6pa
z2?SoEIUUA05C}YC!S=^M-`>Q@wt-y->^flAf#bFgv|+N>MsYSs!Dcp&z72*^4&#jx
zq%mGN4B1Bu0w$+RiRq9xR=6Nzv~VNtXwZgu#b5)%mjXSIQwW2&ZiW=XSiyA&$HPHf
zvx-{~mWN?n%f@~1%ZEeo=iw1t(=dYLe0UoE*?1Oydc1)1Y#fH4hL>@y$7?v(@CJ@G
z2pzajRFuQj;KZ>WlX0$LI*v8W!m);G9P2S3=L9Yli{KK}0?Smm;rdiG!#!O^8{AG6
ztKbrBP_Y^AR^UPvA-KDMC@=`@15&`1z}3KYzzumo$QnY_5R&E=;5Ohc;6C61;9=lV
z;0b_`FV6vlaQPiTNESk_hyWo|2!TS#6E|=g;0Lw<eZbp*8$e>*4G?03kQYlpFr+{<
zyxwVrK+w%BSRxR-MG*)(>6s~qaU>9!c9sYP?-3^ubSYXQ5Ypj`AP|@VtPu!l`Qt?(
zq!X}2Ab5)+5Yn<sP9QKnED;FaA_xS7!sY~mA$Um%1gf+p0>OLK2n0jQmIwqbMll3}
z!N%qU0;?F-2n1?mYXpL(`H>(Hw6rV{2)<*5Kq#b$B?5sbfJY!SnG*;+k>f`o@KUry
zAoxZ}An+2+^yVt#JO%;*!$kYfwt#H`+XA))Yzw^gEYOB5+pukL4Ay}Zbb2#m82KA{
z6XP(cQ<#)J7T|&&M-pzNr=bnUrN9QrA?O(+Qiwkm97OzLaSP%Q44dFSTt8$1BM4In
zglKryB3?ka!+Ai6hSxYCM1w{!=v`-rkzmlyC&OPO7l=CxsMcXVj$JAi!L3!X46a*6
zGhBkxfi@MZ;1X<5u@&xxDth3CfL*{K2m9bpkOHm*t_H3H2(fSwxCtO{dJF^i0E9pw
zZ*)8Yi~vsp&jK$1F9ELt{{SX{5SRpLB*GFL6B6NcfRG6115w~A;KRV90C^LFyzQ_X
z_>3h2K{vBti9qlbMIh*;XQmv+kw9SDSt1a8N1Q;=rD%yjNQW<iKwt*2Mj)i+j~9WE
zPQVg@;46wiNXsrcfxz^zL?HNzAP@`+n-d6z;3XvxsM3}Q1m9635DY0>A`r9~#SjPv
z8=DgdtYTOr5U7!@5eS;*M}k1m(y~M#G#o1gLLo&g5ePg1JOZKFoIv1-96thqm!c&C
zp<$E+0x!`_Z$5Sc!Cp;|!K1yBnA$)f54<*P8`yQgt^;-*u<L+b2kbgv*8y%FXoGER
z*fuD(4x~_T-l`Zz-j5bWkhT>ZMmRGFgh0u{bew0y1wR6}iZ-|#fF58FNC5|dVc-#f
zkT%sFgz&KP27+#8!4iScP!xfnlb)G!7)JtuX=jN*XgJ~of-Xf%1VTD|5d;D=fHeXk
zEq}ZSgmeOy2!w{B2!yokk`o9_4@(3>LlFdmL1A+O!4SNp1Oips5`oZg)CdGa%9aQO
zEk-c}g2Be-1OlrV)(8Y@WNQS1rumT|5VW)`5eSXP3V~2a5laLDPXLcVXfY=ccp}G-
zK;Wfli9l!^C4s<8G}D{g1j3tX@?XHf)yA=BgEo|ljndm71-shd+h7>>c`ITB`L;27
z8ytrHY*@2_fC-zo0;VIMZ*{m}gW~~i=y@!n4SF3(Y(RSBi5|!w4-7(XsgQ!KvEVwy
z9}5m5ez72LS`>p}gg3)|2vZ1$aJ>-7<U9`}xR#Bl;g^kP;itz7IM;9(#~NP7F~Ms(
zyn$l^p@X3uE`bvuZ)a$4WVmqL46L<8An0ZmED;EeMG*)(>6s~qaU>9!c9sZ)#v@K3
z=u)&qAf&?=K_D;#SR)Y9^2dunNGD*4KxizAKuF6jIf20RutXp<7C|5w6gDRi48coE
zAW)?(5eSV(jX*G@Y>7b7ViZFl7;J1#Ah3#IjX<DAwniXmnjZ-QK}*XLfzWiU5D0}7
zkyB-2v)OmUc>)Xs!sKM6KO9IT!-D~j+nhn*$s9ihftR8s2BB$`3<59FOm{wJ2H}-U
zo)lgHo&b7)MZndOCxy_CC-ENV<9Hub#VWWOd^k3P3t^6TOhz7T*fy~1fL#YlSqEPG
zDxNNF*ftnt9e5h?-mDl#-i|f~p~I1c3+Y+K@CfX43<C3F_TLyRFx+-z*kV`vk!4W^
zk>0U@VdzmxWX~?^&-F9M`eQOvTndm`=3?MOcq>?iaAwf<2O30JE4UKjiop#C&ja}=
z0l}~i<ZICy9>%dAkKvr)X%*xv(LV)#4ZH@By@m+LM+?YTpb2IGbAWk(OT}WibwEAf
z1I`3i0UH6b7tw_(cEH^Ys9$>)uEfU;-i7n`!@UK#3&)=Z$QPZzhV$p({u=lPFd6$6
z)c{@WBM8FNmi92x%`8}A5Sof&5OmTrQx4<EATaGLF$hgZoI%i~Xo*2chcALbU<R<p
zAf)Aw7lV*az!HPdR1|}dmR)iNf$3q1L1-$1K`<z6&L9|qmy|)EN?T$OnvNQSU`W{#
zgP_GIhCwjc*qlLN6~h{XK#gpTLC`cm5)6WtmL&$E`B-5P3MpcVLEs7CG6)`X27xDY
z{1^mYik29J=20>TyhJnI`Is35y1s<0mza-{R2vWiQUDpn-2&VLd<u93cpM;~jUpql
zp8&rAUIxf$jC?-oKL8nxRRC3FR0K=|$WD;6fofnrumGq5$jGb~I2Bj{cmOg&YXVvU
zvSZ{~KnKtTkWt&&z&c<9un8caq}m2t09*_Nfe^40hyY}#$pIh^B!Rua0pN1r9l#YB
z$z2KeYT%tXz6S1faLLH;-M|eveh=LD!957~M&JWDz6tKla6bfKbST^k_aksW3io#4
z4jkVJ_bvdR$r3&W_g=X7!TmUJKaL-O`$@PMRi+LB590VCxDUhq9Nf>t#b`72Mc_*~
z{xaOh;EurkDqJ!OeFE;2aGwI6#_>1dehcn*;64NQS-9VW`vbVo!F?X?kKq0o?oZ(U
z6z<R9{vX`KaDM^!mvDat_t$V=hWlH%zlZw@+*jfL5%?31{{r`~a9@Y}|KR=|?myuE
z3+}&x{{lj%(B|kwz33E1fYbz_9DWnwR{=jDRf%&aPz65`en4sxfKheZ6ksa+r@^0K
zI{atAPXcDbe-``*$nK$Y;5QejhW`nG49vqdAT=L25jY9g7XU6?I~l;JeS}~kt}Ox<
z<Ju_zK4&J>0!x89TqigcScWjmffYbK!npwt!YF_j@Bs}7N6-i~0nG^80<;3B0jC3J
zAl{k4SwI`m4s;;yN}v<KlZ4<0x`9=|YG4g;Hn0|02f2W74zK|@7uX1F0yYEZ0b79c
zfvvzc$lnfJ09*)M1Y8URfF9@*1bTr!AOzWAU<a@h*abu&1G}>dQJ^1jV!!~x4g!|~
zafC?#NnGCp?8Wtcz<yji09=M^Zv!sJwRZriPGJ~my8?bd&y}6RVIXxCo}jLVAJFzr
z_yMVF;CC(jfVS)42c+Hwzw6-#w7naCK<Wngy$60k&wG)^`;Z3Eb`aNr)cbM$MqCGa
zK7i{t;X2UvL0kt?H{<$;a15kw!SRO?7U;PZ#~;Bl&~qD(KZ;|Z=XM<5fny*wjN?0T
z45aSD@!gOEr0#+IkKq^?z8A;;gLLmhIH2d_2zNiO0jW>m+5<QShChjGpTf0I;~W@1
zgli8X-e(XGNIit}hjIQ{oC7_dL)g#b{1Jo&dcJ`3FCs25{3V<}itt~?^~Vq%NPPw8
zBM9?VgaL*hhu_!W_XNUx9e%*@lkj^Aeow>i8}I{$zX`u@!SCCM`yKcJBhSG9yGY|%
z_<s-n--kai@&oukhwwkdwdZl|N4WL^t^p%I#<ic|+W+F(PjT&Mxb`B_`9E9-Mh*i%
z$Ms(TzeJdqfL|fZuYunn+{*|@@LPoY9c29;cm?<a!oP~Je*|8G+&=+-2L6J0e+B*q
zypH()4|oIkJMa(4A^0coFW}$6e}MmX3WGwI@ES1afC~(k!39ziy0Cr_E+9<o5)J~c
ziY{Roa8-5*hXA*;OBew*RCNi@0zD$)0;x$|!t}`q3%I5r46tD;!T>$fAa6S40j?R4
z1GGt9!eL-=Ca%pwJfLkh!UKbI;5Qe3KwCA!0fQ$%zKpoQhIvR6I5;2roQOEU;7Pc?
z0O<lfE*zhXus}}@ju*lo7+i$o#W)5APk}B=5En?*BAum>2@KaE&Z!8u4B>#(a$H-1
zYrvs;#B(D}56*#M1!29A<AaXCa09M4;(8OV10&6d-vWJG5#}_60S=uGzcb)>Cj8EV
zA8@D*e(mt<K)Ng82aI$ge_fF8hrDjsV-@^?k=00N4bnLqdacE^b%?Vb*MMiw!SxM@
zcP`{@M3_y8zZv1qL%1zS?|g*a3f%~{A<lM$zX0i92$>fl-o-!w=t2A-++Ls$2mxV)
z*#YbXb^#H@y9DufBi$&_51BDw0J>g^bmB-efwYs*VF>c}0DB>4AL8vt_yY)g8NyzU
zaBoN6-+{0x_yavxbO~<&*Ifynuj&#W0XAISCA<uz-q|HoUjsj&=URjVZn>^YXnq&+
z16+4K<h>hmfYc2*eh-d;!S^EG`?`dufvpFT?)#w!&~qbn{Q&X-?7InJK8WzZ!JDDa
zhaew#<QByLaF<YfE7AalKZ1C-LAQ_M92mYG=Xc=xFy!8e@IdM==zKTi-Gh7rBOilq
z_rmXg5bi$61)lym;@*$2pFsQvklrV8?NgBPX~+de4&nNPi1(Q;Vb?>@`(fnsv&hru
zkoM;x`;jie`31-V4u27Pe+e=l1-=YC)+OBc71(A3_$u%?^7J*hPr&^;<UWb~KZP`&
zM%Zt32`_vTa=rz5-+}CBkj{6zgjLTX-S0ua?<3w1An!SZ{~^lZd4&BD(*H5S`~<f7
zUtIqw<opc&FG9b=$j{H=_Y3&_5`Hhi@7D<T8`$P$*yFdz=kIa;3Uv4b&R<2|Uqd>7
zf*yZH*uNs~-;mDhNdFC_`FH5_Pw4$G$oe<@{tF0xq1xdWo&&Bf^9#NSe&IFXo^rns
zo#+=9RrrO&z<rf|;X)_Qt8g87R7Chmeqrlm#F^q3{slZc74fF|h3lsKh0qMY&?)(a
z+L?Y~@+`ywo}29#4$bikH_Y`5`>OrIt`q#iTG=n0J`XbHL-vV&Vb)20;SJ#M0>A~C
zC;NqO*Z75}7Xpj?Xmk9+Bc~vpC4S+STEB4HQa{=n_?-$Y1D5-RhgTq6J>t6k!b={=
zQ;<g=(r7?>jecRU33@c+S_{%U%`Y^c4t>sm-<inAS$<(vJ7jkFF|<YaF67-0|85+w
z_6rZJfvmO2<2t|a#CpHbwgK1A^$YViL6^-)V+(XRA9>maeYg9Co(qwWix4j07k=7<
z^IquFhd5yz@5J#gq<abM7=_#z@;czhcnRkTzu+E1xIGBB5AhH91=ri4)8)AS4&*WA
z7v6q_UwGS<e&L#{{KEBD`-Ow=gx=TS+O>Ycf1O|0{w}`|yB_}UMxJkg&hPOHH@z47
zz0WVaaL_N5--t9mfN(cK<_A%xH~WR(eaJ6}A4Xnp^$S;i1a`d5FFf{9zwpi5{X)$!
z>cpLX;qki=?r!MxG1%i?l<$48<H!BNf&2Z!FFxTHHh<DDJpU=bu;CDF{2=PhLw@0}
z4<n7w!6u*g3-K=?-7oot7a#QtH$Mg)NBly^<9?yy3HW^-x;^C=zW)v2TYlls-+^78
z^$XJX{lY2F!7k4uzc2WOCx7A>PWl=0^?$I*&;7#smyq_aQI5YwUHd)q`6}w(pP<KI
z;P*P>|J^VA<DY)vi~m7A6uN~-S+_8yyjyr?Vz+R2Ww#Kh>J~Om>K0C%(k(2T)-ANm
z=oWU*>=quJ-7U<j?iOB<yM_0k*e%q$x`kV7aJ;x%h}QzAb_?%Y(Jd@hx`js?x`iuR
zx&`+c-Gb8IEu7PZbXIo@Ti10951iXA+_I%xsJx(Ccs_vmA)H6L1?NDw@M^MK_|AcD
z;oelY(0g^aaL;w!Lf3n`g-tg?{)f7SS8nT0U3+)8(DaG!)HgrZopL?dExi1rZY!G@
z>1GxzF$m2?F$g;8nJI^HWDuBkmKcQQBhDb`QnbV%q{A1%ATR@1V-V8v$BRKoCt!&|
zXfBFDNXsrcgTVB##2_>m!5|nEHfInF!Ar^@P^B#~2+c>0K`^9ji9yg}6vH4GY;4XT
zu!>=gL7+yq#vo{#9|;CQOUn|2&~mIW2!#}}#31kla2W)}oI&8p96ttum!kZjLtJL|
z8b(X~fpB~v5r{^1hoy-t2V#k2-AUrqz`#&)a3~o_><SKs6VhZ+Y;{UyP6t8VBynOW
zxqmP$mAM^tCyEpI1fxS?X;z1X>zPpHWYv?hqBs@gZ#cv=%yjm+k4OzXYmA!6SCxq^
zqs|Mr4tiz(sD+NQ5}r1Oe8|_IY+eOWq8&qhyTi%Ifv{ASK9oB~ozu*)a_mgj2C!Jw
zV4!*c!d~42JPJGuyaW)2Y694aNx&?Cuu&%fCjkoqvPOuoPzvA!nt)b-uux|LZ9pgB
z2UY_b3$+=?=L3X+x(FZ)R38uqb^#GU&q9&)Mf-pQz}tYg1F4llO2<T{z(fhx0@uO+
zU2xwGs7zGqz3>AYMb{;PjY<_@qf)ma-0cXXXQS@HxsHtzK8`S8qf(y$R7Q%hQmIb?
zI%X>M8Jw%^RO+)h216x00+&F?Ql%b+|6{;c;6DO@sY*Q#d<}kI2c86;0>D<Kz5#%-
z621+9u@aua`F8=ZR>JoIm9<L!5Po2;gcksnxk~*njtO)1BA~KYsh{Kcm%vNFuMp?g
zz;AH;Te!c2`wH*};8mQ1#S*|`@t7>Kt}FFV;9m&$9{_Atn}8W(N2f3Xq{?tS0Vu~Y
zpl7t&D&g-$I3QJpYa-4k;T&k243}UEu1$siG#mqM({Vflkl+vK*{#%UoX-K~A}m2Q
zj!yt&grA3VfG}KbC*u4hoD(d7p9_8`!><PCKx!e*7vX#{jtNe|`4XJh0!tAFNY%mb
zRGcrvG0?Ug$189R$Y#7!3jDkX<3pGR90Nil&H=)HrJ8Wu4732PxON)+fVR_dd<JkP
z!U3tXfHwHI!><E=Kx!q>iSsV_`{8!uoM08sSL1vQ!V#PezqL4D2du|A&~^@vH{cjZ
zoeOLPHo<>0-1FdW!S(Zjtq4P~4cHD`fa@2+y$J5b2p0f)fFRI|@O`*O5W@8^{B{64
zfnB&3fgjLz366Io43LTf{qT#yZvgHfa48T667Wj`LkI)3?SZ=&*oWi&zyaVg$N^Gs
z11<;N4!i>_MhY(Ax&llGKp3>PE5TX-gh3Ooh6@}9dfo{a=(z?i&~q)q1HyF(1GwIW
zbD-_|PGJ~uy&End+yEEQ7&X^>5e{g3A6(!t&~p$j(Dr`#0c|(J1zaCMT)=e`(g0i^
zgbVcCjPO9}LkI`BZox4yqN44?hzGcCg$rn`Tib0o2lULFa0lW6J;RU*4Bv_CcfkdO
zyWs*o_u%+rI0jtz!Ucr?feR28E(IJ0hChz5_rvcKkO}lW0DqwElW+mT#vKNFJ`ERW
zI|LUXtlVJ$->d|44;L5#2s<|dw0#yX!RHY7^9b_@(gFxWCwvj%0pUxK0i+&<j$cMt
zKx689zJharuyrFq+gEYzaU27Lu@jzvJfQ9CaDkC0;r|rmJdI<Z=Nky~O&kNm-$J-=
z<NP~F59oOY;l7LTfa_WKe-GgZz7M}2AnbGS`yt{1ZO<bPkopmhUqJXD<JwQ)2lV_e
z&VPz)1V6*|7jf<X5Dz$f7=AxTm|p<DM7WnApWs(G{td#ujAJ16Tg3Stjse2v39rBp
zaQy*(K+mg?^+z1P2LC_d`kxUFAk3cdSKx2J>j?k<fH!dbcU%WX{(<oSM7Vze{|5ep
z^Zz1@fLU@taDZt54gsz*xPWT{Twu5yYymJh5q=d22RsY7E4zddz~$@`Qow6KPZeZ|
zI0p#JcW4r>PewRka0;$XMSMV*hA@C@I{bmdfO`g9V1tC&{h2rhHq3$yz%?7;<{;i&
zxPVZNv;g4*gaHH@`A{)^9?}C+^PvN9=tM9!C*eAvXZ&7sA<W5$TZ23T8y4bt5#j>w
z#YhJr%wOshoG-z(T7&@(6L%?es)PTjaF@XkAk3d@1=0kD>k-}qKVZ57Ilw_L;KOkP
z!ZjieaHt9SXhxV8TmxLK@COD@gPhY5{|xw_33+G1=50Vb((Hf>99)U(oe0+j8QqY(
z3jV8cyas8n#ql~EuZIh`&Ous$a4zBiu8l}%6OK1S-g&sT1=qI14|sMP(%TN5E`V&n
zeG$@9k-8Xh0tnLs9fPn#FRt|=Y#3pI4Lc!w7u*QK0MjplORyVhL?Npm$H3tj&IeG=
zg9v*m(vCw{;9vrBlJMJuu={ZR0Kx+9%MkB!oWC7m-hr?wuuH(pz`-lP4qb_~u7W=x
zTn!g!ekc5{0rLaA0l2RPTLfHp9sJ*gxWFyfgOz%B28VGxd;`M02Y&BEUV#x{@E~-0
zKNz(e;eG(JZbDpuuz$lq>SpK(yrIL^58)c%yahUa7~z5HTOkjq{Ro&XVBc-<|0vFH
z2lE9q-vQl*aSgcTPQ<?p>D>*Tf$8@^*2iFjd!YxA0-XN?djYi{hkfox_)h>2Anqrj
z3$WqSu*HM8{u%fKsfQ5%Vd(K$ga=;x9PIZ9^7jS!e-Y^dtG)z03V)#b%SiVzgavke
z1^SL4?XSY`YdC%a`hFex26~=Ec>=;yhzm@A8rOisz`<`I&Nt!rEnEX?zYTl``T*6>
zKv&?k@8bHi$mjQ<_xEx99Ay3w@t=pzKSG`QF~S2+{}g%p8T5S-{)gfI9O?WL?n@}o
zUqQ#;Aidw=_zwvCD$4PX$k(5c-d}M3SIGDq^71;4{~zT46W9I?zyCmY0W6*aEDn$?
z1B(NMCV<5$_Y1E8?ulSufVWkEwE_ML+)xPy2B>v{c>!Eieqp)@<_5R{I0)P`1#+f>
zK?Fk6!J2_7Yy+MEL+G0W=4dV$H{e3BgiipqCm^g0h6;F@U>?Gqh&T(t^Z@sq3^uC<
zvVk{%+ZG}o@DlJE@CGor2=Rcw0>Way-~>8Pf&UUPkxOx0hxASbdj=Nq@N&pofp8w^
z4=hrUS09)oU_*;v7zQ3b4RPAQPPKzs1D*v4Ye?`KFux0|m>+T0fT;vy=vxmNTfmM1
zgXbeZ7x*!jL>?}Nu0SY^u)AQhOJL94kO8#C;Fp9PFo2!=8Jw3vC*btABfmg-3Sodh
zUje@>A@?fieYIbh`YylFdOd9aZZKxR7s2|yej{Xo@oT&Z`rhmpUIkwKFm$`aFO=O4
z#_wL(66pODu7kNF?A<e8022q?`W58$aik46zXsM1xaD~;fPnu;46&d3g*`wIn7Nx?
z1e5p*$`0rx0MquLr5UURbEd+|^pbvVflMzgu`s8wGMGq8<>Xgxun;^wVUd}27%iE#
z)Utl^off9|kojbMC>)CHso$?n@6E}X-YXZ&S_R{@-fV4JZ*qr-X+5E?To~EkY0=*4
zB<0r}Vx0v8su41T)~OaEk>a7V?pBH@tx~3l)QKU7*?gz6te8B=4_T{Xn=Xx`ijiM=
zg@w$j`WMG)S*uJ`hr<r>6bowf4@IR3>xQDm>RfDi>&#ZIRlVg;I7BO!?i+~p1(TF@
zDdLWqP&ZGzW2BDh&IyvxD*jjdb13d&la`+^6PH>j5T>n;-LjHJU1gH3+-anS&^yh_
zy^JbC39cr~+K-vxI<*K>Sz{PH#|*<zNE2%e15W~%VenaC7<fX*kzwE^X^mm<jFw^G
zC7aWcRGD=obq_8O#omFTSST2efK|I|fq2J)n%fo-qRjrYEnr)~wt#H`+XA))Yzx>H
zuq|L)z_x&Gfn(7EZ>H}P9?yBfx+yH58y~bmZmP}FjJIxX!5YKh(Z4NYK7Fl|pP6+S
zPlkc%XN_U-9C?O8m!vg@AswL@hJhKuBEyhQVN@^8SYa5_DOh6|Jo?vW%qNA@(o4@U
zFim7j41*{0=1h?~TVNPQnMW%O1J&Ca!{8a8w`Z&{42GnwF$`Liq8J8)p)D{BtcF-*
z7^tZ&G7OsbM}%R}5|i(Bh;A<V!3djhESL<Bx*|v#)l}6{NvS4>Ls5!PQ~pwg$ftjD
z71h;&dT#!r!T#P*P|aGpw~{?35dxa^iin~VpNhBmD8AyOXy1e{A}`ukV2X&MZTnV4
z6zx5)Occ|>hyDejOf<!`INdpBGmn#72l(&L<W|r8H~D0%RB}zh*P?TiBcmfeWxgXC
zOeVvz`jzC3Dvd{$b5>qB#fsK)yi`@DUwtvb_)^sbiH4|g5<0mCgI%Rik7P(#{-Z;z
z<rb9|bYqyl=0(Ov^g#A;qvV<)Qz!Usf|d|9MmmD^f~m8f3Hf&pk*|Iz97}}j*9DVU
z!cf0491Lv%6Bq1_hR+$4rW()XGcJvVlY0l^y941^-#{o5+o_#XDS`fA5>$5}4k>$7
zDFcHzO$-dh`@#jdz#5cyNMGI#rEzpR%YSx=i@9Z`&#Y9zGF_k@)(T0j1nFXuPq&mp
zmN{GLOM$I!tA!j=mRQV+{9A{}Z?#yaJ0ihEdnlBpQ%(_|X;Q?!5EBfAxMj@0pR>xh
zX_Okve=XF;81gDrs+pGg_o2Q<v<}7xLPLGYR=3Q5;nI*omNknn;X=)pAtm_%hq%zB
z)@=+&hoo}yYnoxzFzFg?y-qH+qy~8gZ7i%VhBww)owH1;q-XT)P6+Kzt+Cx{9<AQ;
z8--T6(ZWn;*+SdHN}m*FYp^dF*%M4g&~Kt@o;f<R7F#!Kl|oD9jM*W5Dt~R07S>`5
z)Mi@Tqi=(<%DQFLTFd_`G>XW|ai$&O5Baqy?3U2AL}!^6cYeDTU*3g=6q<T!TJmR1
zds)<VU3+AnaaD5BEmEy{FR7{`kLY_2mLtVt8&sdJlxn=uw>h=hV_Sz|@LEtW`I`=L
zZn{DkgQnXQZ#qDiI!as5Ms4*h0xOuTRwXmFWRyC_9<EF|fvd3m6@7!LC7ZuC=ZsU_
zIR(-?>XC`zNyA}i_X=wzKk5*x)7H)ko4M;x&IyupmuChjSyIMqf0?hLd^eV&Z`u{v
zkqn1cV)A9@Ks?eHjN*3b9|(o(+v7Xe1^1EFi+Dsp=c7HcSy(J@vh){oyVjGHzvU2J
zX2GQjrmdW7>+FU__L>RHWI0c3#2?@~lp1qI<qhIQjGv2o^`NTGFuPSH2^$@PQmG1(
z6XFC&lqO<yzeSOkn&mE?CwVGWBz>-p0juf?<#8fSO}<2wI>X6uygw3)B$AQ7`e1S(
zT0{-WpQl;*q~@j-AD6U<RQdhr*bEuN-H-_O4@SeP!}fov->2oZI%fi-LM(4B!|Nf2
zJz-{O^OMew3bJR6B#?f~QcCi}82VcXslJ`F!a~sceVJ0vkY*F=aJ(trx0&u@!_An2
zM*s_H$k#Z;`J8Z@$R0YH1q;+Ry$vVS&CzT)rDGE2c*zuyqZRYKClyXdzRDq<R7jD4
zM^lBHXQLv9!kj=0(`A-~xfaU!YoQP>b`i%G9r1x+s1I)tp(#yBQw*o_5>6yqk^xV;
zI++Ai(gEwZwpLEDrlR~^jORFN8I(u)X61%=<I&utRg{-nk=`12V@{_)B__i=D}n+Z
z`5oMvY?w6Vn$EPE@^2NIfP5?8q->tEo=##V0&cCc7Rb0!8{XZ^w@y}y^7Rg}hGR%I
zXmDR7p($o*m}2Ny$s0=OSQlAIMZUjGJh7lMdTOX(z_dv-74K9|gsf^!tDelO%(O(!
zW$H|q4orpu>!q^l3EG%oLV*firYtZq1=)6uS90krZ61&&=}zQIj7})0$-8-&;s%{b
z&t>|~Nr3{^r;m|9;a<b5)inMsV0g0NaTpHp7X*q~AL5no!hoGqH4@2qBoqz>646K>
z#wy8R-)?jL3>(++J6Md@t*1)3-euN<XF6ZL87-+vH`XgA-C`9@_s?Tg)Ik<d6``mi
z|E`SNBN~cbn!JhCf=#3nm~_S7^qnJnt!tBs)L=c5etb|&G~H+cgXz_=P<S71a7&Gn
zqi(9`497y4r%CKaOXesrsxa74elLH7#9ACY5tO8#;SX~R>5w-X7jSH?1=RI5ynz~f
zh+$7JbXm!Aj!c6!GR4gcuBnipYA&<VfjE?tO>;=LVdXW)Ib^MA_qll>ld)Qk9kh(4
zLuy$m{Cv!B#5F28DbDSSMg{}D`vY_^Rlg3;(EUUGW1bUmkjXZVXtIm(?7Y{cN;Q3M
zdyzwby7CIjv&^rsspfJtl0)J&V+zG=)R2E>VMdNf@<2AlkPIw=8yh7dpJ4FU3W$zI
zs*RWS7g;@~Wu+-^6`iXw=a-Bm_e<#`d4?es{Q|_oCKWT%YEBfL7!feZGqf}^>b?`%
z4@6?REp3xxoUXiZjLR?D;MRQTatczrSfEc87mBk2Ly2%akQfa2MRr6m%83R0!_p+F
zDv8O-Kq46)Z0*9VN=d|&l}5~j(ugT9jhKn05mQkbF_one<1CGss?vxNOCx4dX~aw}
zjhHE=5i_+EV&*%iN!23fvA%V_b9%w6GYVdn3SOOA@aio7Rq?4Z5!1W@w6i<HaV(lb
zV-?wng|0*KaA0pdI5>z_&DZE@aeKY;b!FnT4Z%2i0+20)k%>m7Y>&Dsal+<EOezxv
zGFiJBdy>=R6UDiKzJX{oOqL3SG3bp2cS3X|l%SVq<xEl&0WMLd>v#eKvFQHvGA-68
zuz1VG$ruLhi0lktX-slJI#HS@R*Dl=Za8ncgB)~|17%{J!@WjmcGUSq#}27}<8tv-
zeU5hw#h}$dEU-6{+@;Gu&9~B`pli;`cHNBZu1OHTyI6d<Ty*IL2a@q%A9>^rBFjXY
z$LE!%iIa9jqG2c;-VxbXT`4UU7n1;c6cRiVqrC;owsD<Wb(Pd9wy8nW>|*|fZo9+r
zSePwz%UTIl?T|XeGp$J>jAdR;9Fkw`uqMM3D5#6&9C-o-v}qKTY;9DUvT*7?(anoj
zAT>#xs>br5PGhZTb)`I26t%HTz*Aj$ox^dLLs}P(?+kBIR_+S-?H(M6#FB}-92J{T
z)?1Xj98PwCN!9l`#95n?SU=OZF}yRp&p#B6f`9GXCFS*%_0^|l3OMsUc$RKOm2a&B
z1Cjk10JcxqY_W2XEN+d*BvEo+oba~R*RQBk=HyRU?i1N(1=I-|tEuP+rAi|iQYi@?
z6ITqP@gTI|N@6tmJ-9>ZQF?}DROy_qkuKg2kCUWxY9JZz9}Mj9^)-i-aHFrq>+v-y
zN=rj;xUo6t@q`<i!Y$sWda6xTS_k>v4zXfWn6yxm>dGhPE0**KRh~-K&zw(Jvy=A8
z&XBr&88J@Ya#FCin5iHqMUl*v#CEPh#gTN!hVqxEvEYSLB=e+Y+!XPdh)g>2c2SHC
z^^=G2I9g`)+Y%gQjkD*u6V+u3WM`y-JD3gnB91vgKiWSsK(43h<gH|Gp2B9S3JRXu
z@}@(k)23xzk+MDHnHg)OjVhm%6^$vLxjr+K&9tYn?78_Ny-SvK`LPM&Ox8NoV?BCe
zAWk1AwDQOn%zrYe)Eld^?%6W!R(Z0l<7;KOSqMW}*|9j&@6@ko-@JL_>W=d^cWtWQ
z(!OzZd&k<Yz?Sy4=XG@~Ba1Ggm=a*iuM_p|qC$I$3au0s+FMj;Us0hO3WQc>=PfAt
zW~_oz3ot>L<UkM;eRSe`%!PQeR)*CbbJ8U!bm;!cBnu~T`KfX=Fth_g8%JGP&Z>Q_
zWOPFAPAuh|ugcNa!gK|fU7S=hA*>B-<S`Fw<S`Ct<Pr>KtmcA0kda3*h>?pPz+^Wc
z`G?-K#R~eppoBC@tWqkg9Q2R&PpsNta%yf?%^@pMNCRKm2AxhCa#A9T&S&IYv)HI(
z&6c4z($vpX-?237GxI1$=&{-Cra@x*n~GyHe$#GXoGR-)TB}goaf;e1I6CGBR<7z=
zdG>~LR<GYo+wpTc)^x4h9O&NIzF`&ZCTUS-sY3XL6VgheK*!d6WhrXdqn33*jt)VY
zS0G8Uj3PZ)PB&Zgb4pL&aGjLSfZocPlC4Q!&(2^bOx7S@uDqB<vUUSaOW#0Pkft~)
z70i5UiORpkDu&#LgZjSogM#{Whz$qJH-IQLX{593vECAz)^e4qe!!ER0hLQnQz4ZF
zxpXXtu%<e>IrBUMNqPL_dTSyxg`<B6Dv-6jjhlj1e7Wh#@yyh;Zlc?e(9DZ-<K?j?
zVfp!EeBBhde1$_4wV7``2I=P4HQ!8@^Ch0HlsX$|G!S0{jLS9gs0}CUA6=!HnJej}
z>sDt^ci<(IRssLy({c3OI65OAqUZFT6?$Gv&*^${dX62dvDPS$?eo<MWqS5d1?85$
zg{1UR>q^YepCxi0QpClax%nxQzGN{w=itwhwJvg&N#M<L9@?~T>@gF#u;y=m0iqxT
zIpqcAFw?o56AQ|zg`A3la+t|o=m`qxQ-)kS`MDPBk(X-yd{%m*43E?Krof(>BmQH8
zrP)NK5t>zFEq62=4)g{Sk-k80bf9lH=2M1ZNz8QBii^~%eFL!&opD6q{ekelaNiJ?
zgR|fXgq)5kKqiM!FvLfb94{z2PUdbbNsd!;he)0Yk`8GlBFS)IS2(hB7f}^+fqess
zWFU-X#{F2O4zJ0gGZaa%yf`Gx4TVEH!|A-u5~oLEQE;lUoq<qzM{p=g#2{1*4_N6?
zNGk>%lfhUrfT@aLKT$vwE8^k37`9*<Z;Dt&h@}AetYh^AjMFNIV!LAldt=qJ$TjsO
z;5MFmjHin6)N4HTRnL-<mx#*JRaen@t+`}YB)nrNnl&pb-!>6r!u;1H-UT_630cjX
zoHm}!ts=6e>LDAdhAJ&jwNRy*R0&m@S{>x<svsMyfvhWZIj6v7U17`F1ubhODQ8MC
zbLhz&Dw#uX=Fo?Ub2<r+2SH`_1c#$a_cx3F)lE_7=IP+13NNo0oSrW<4y`RXNsrD>
zu9!A`hBR|l-FQ#dH@CES>BDdSDo8rpbO)>JYF(Tvo9_PP>Weu+w66Be#(86B65jpF
zy^&itqaWT-p!;)(3a2lYCGLIR+Jlby!h%xI<VZb5veZVju0rSDSullDI4Mk(4DAan
zFky@q$kR5#0_9`0fJwUs3rrlN1=KdN^=u4Gx(nKJDaV#Gr0n*zbv4E>T?K_L;Ru^1
z8QbF4P7HL~3W}=Xhyq=rb>*#oXBEh^0*#@#h$CgPr0We@*E&UMRY5s*966Gd)004}
zNu$>)Gvq+oO&)#OWertYJDgLbGYe+X#mS;mQilOpF*62QK0P$TAouvA<-avYOqE+^
zsY%C|Jsi}J6ph1CZFu}<9C(*-sC^8Kr*E~#)0xLAdptdqOyB`<)Z=M|EPh$O5g*>s
zKhqBmMECa(#0Pib5uEJffn_i|Vq~#Ob4ydRJ(|V~oa{ADE5G?xkCK)BqZv()Y1L__
z^XtIyd93|#CkDu4A3TkHaGa;ZD#75G%(xVo2P(nfILzjc@OV%$E{_M5<NkQyEZO5h
z)#!(qSu<~VJP=Fucrd0RbY^Z31N+G(9oSDP(ZGJ{=&hbTus<G0)AyYy-cw<*+iP(*
zyQR6}%=E&2wjNWRWX05N=0bWVc8(}ze6^=V%+M<0ij4n2FQ%v2E?EI}!aCo?_nBgA
z!8ipQE6fm|BGwd)uT7E{)-BIG1Y{T1&19CrRJY7DJ66Q<^I)8OUm`B#1kIX~UnIKn
z2h7~Q(6CNCC4Z<q6ZTw*nP+N9<mM&o#zN3z;ECgtmBml|$@%UTIp;o+^V}nHk@tr@
zhI>P{a9<SEC(k`mP#=@~p`bn*$6)O~kgePUvT6O7E!KUWq)fe+IeKT+d7I}TFC{D>
z&t>vPyg8zOE>oYyuz1;i2s>&IVdsF-IJ@91=5o#~=4>g`Dq^z(m8Qi2(J}e~V!CIl
zXCsf+^>{E_Q&7ES9Q9^NnbsJ)i7~3W^%C$GI||BM#gXThGA((%ZlqJs8|k#}0jm)&
znAT=aTBl2yULapu1xHA&7lZ5AUNGI$Iq5EvGCfGXbm_<nHmn#I8|LYKV8hCBv7t%N
z1sgiY#fBP-6a>$5QNc1gi&I8Rq)g9~Q%1UxV`~5pj~5h7;tWm_i=|9&YfM5v;%vPT
ztjyMe3AAz&sF5-~wIPAbNVT;W51JPjl-|aXUMppKd4u%q5iW8trPMjdj1XFbAa_b!
z##y6_<C6HfX`&dm%=@&seQv{3LothW?kpR{P+wb^Qa72cFrmvwW)-=bgtJtyZehWd
zBuGkj2*oZUIhhQu=5Nee$aXsW;>nqTCZ*sU_vPcaV1#1?vP@W8foZlu!rbgtnRH{v
zQO-Bd$54m+^0N4eS>mAy7H3chaX{F}^l$}>)9})3`V{X+I*cAlwucu_!-gonY>$R)
zkH&0|rfiSqY>yUrVD+F*F?WuSC&x$0@gdq8<q&m`fv5Q|l#k*T&y<_2FEjm#qCfHK
zPkj252K`B+{-jBN(yTvekqzZxIP)0J6vLU<aOSh0EbS-DqMjO$D|#>8V%eL@6x?vf
z-c*La8IirI4Bs~PrZNH^o<_f^%x7}n1IfS4q{Y{0uUjK8jJ&zltzB!cTeH`#v2W^W
zn>CGL-I~ph(C-LX8KGJUV=q$6+NDvy!rxw`W-n4Z8|*^~-(j&Asd1PHyc4721I`8?
zFlwv68D{>UTO$6xTwJJM1EB367{{x{1K5y}ZXwvzn7f7G0&!k2K{jsGHfO?DZ8MuX
z;ho;o#pav<o`9uI1QEMcY|M$Rn8hX==pioFjOE1jn#I*N4diI<Gmo#PzCx_aNxdNx
z+qg*}Qd=h0=7ev|4bP+-HvMZ>H~pL5P75OQkz(uvnC%tt<nzRyfVm;P`8<93JR9<P
zHs0l^!tU6l_K-aXC#8?E?cfs=#5w8nvF%1<Z#0&7gR~NEGzMxWe_6@5IYc>gDSJDc
zqBj=GDoP96jbV!ke){q=4lz>$rOcsm8^Z+|3HdkdS(pisoyGKf+Sxm?xO4XFz}JQH
zZt<;-iR(DKUSSKYbn=}2&}XItw6YA~OSqZ0!#UqW0#&D@`^W~AIjx73<0BVp>g7E9
zsOQ}SN53;*l_KRPEMKVKBu4kTe4d<ow46G4AI(Be4V%u$pTgcixy@pp+>W~6lJo9l
zV`>Zs*&QfXGlMyl!sbwlHHSjCO3F!8$zzUDcEmbk=)PGpN@FMnV|WW2!&|H|yz-%Z
zb;Fy-7-JY&=nRti&P?4X4dOKz#8=oLzG4mH%YU=@@)#udX0bR_(wW1TZcj&R4xhms
z4Ta6oP^>u`<l%hvqalwuiWvLp4A3xU12h;6&{)_2jl~+EQNBH&0UGld!17>Lm;J^u
zXTMRt51$=QcZga03l=>h$=X416k8o)^$$-Q`6he>RDUshbTL+SIU62gBvL3<`3Z-p
zAFOi+PABW7<R3aM)(6SXUb?Vl@#1v2?12<__)?d%%i$D*+H#9nzJB1GL5comi_P2G
zDp<rsqR4;OZ;jWayNC4qw#hRt&h9qlnK@o~*vaFm<n#3A^YrEOY{=)?C{MhYXrG-`
z(~ZCNqoXGIon^YgSF<kfnK4aMrsR$}8|bQ%JULUX0_kz0wXF%84O<g7>$WCrPG=rF
zpJsg|2>bM5V?xd>H}S{k6>|pFxos!es4%-~VTZ;%etAPGc|t0ALwfUs^yUre%M;R<
zH)KPekPUf5Hs%T0i0=Yq9(Z(3k0KlHZb{!l9f@ri=i_|`?ixif))NX|EiZU=V!^8w
z1+P}}uZn*t6IU5`QPeL!#pg=q8o%;^?`<&VEYH?^@}M|%V>lSfOfE@T=Q2;4mF@Ja
zu;pg9ylt7~sJdnda}jziTIpxv^bfq_xVjbPTwtYY%|z#~%BD}2*NW2<*xfiD$n0h=
z&ExFi7ErL&`3by>-sZBJ1>#I14IgWyTj2MT&$J8-O88m>>#XX|6lVwMXI``**xfNn
zg5Yfd-X(k<hjw&mlIotQ6f4*pU(y#U#cuxAn9`}@$?B_N+*fPd)2n341d0J`yJ$*H
z;&P+V!tc;z5~w$Bz9}sbXB!2vmtryzw5;WMoh3VKd4jZN1Zi0_c~>PnYbL>U320eU
zUrNc&n)a?rMx%zD%|WFav07&i{re}GXz0KU+lOLf*Ew1w!y6@;C`&Tii=qLUp@vSs
z#^s*wP|N>R{m!G(iQ-H(sP0XYjMS#wZAnpiPf(-eydIK?h*UB}jxLpqr_QXfQWjgI
zvr^U-U9^ST#;4K=;xtty^)0wec9-gQM8z(!k{T}a`kj0F1ZEkjS)8HEQC}p1jc-V9
zIjulO6Io`X>>bg8z4hdce(d5}k8e4JB7I2=MrP~`C$Z)M8`cJ+*f{|Eq)PJ(M%gY-
zB~dbLjkF4Hp-;ref*O6&$s)JS@=%q8Lb6-m_(O&G9fvp*FC9(`q!*3SuM~>2(3egp
z$zFmF-VDU|GvAr$L#L@9{t5-KXe-jUU!00p^~8*VkT+49mh~|Z1gen4^tZzj;w0zy
z9g^CniF2Gka7ekPfu!fKJ+3kTHS@t2?hMDmaqLJ$nvcLhFVRsdcRKJ<uNH9{-g=v)
zd&f|m2oHxBjr3e7*|$5`i?3@XhX(Phe!<Kns>eL)HssGKiGfQ4v2b$lKzujPsCW|~
z+o&cPl3J&Oa26E3h1VcYqU3PHnIt*z#EaLxFjqVgZ)7>VaHdKQADn5DqoL*`aZV=N
z(FisoHs<<V6vYqqMe#hRD1MY%6tl%C=_189+wdGm%fMw!lvMq!L<zTq@7+mk`42wG
zM89G~7G8*xoiik7fPS_t9LL%VNtz0?(h)=;8r%=RDOh?~r7@Wa@dW2AX?8rkb0~^0
z&m6#Kvam%}bRdxkC&al<@}4dEY8ox{Oq?msIa1Z`a2($+3l2sg@dXDGrTcCK0^+I8
zA3LN|NdHMc%n}ddb9b=-Io0)hY8o|o4xJH5?jRo$gEY1HnIYt#IWDW|iv|<$#|Kkt
zT3t2gu4nb5W|^xdpI=&81Wv|>!Uz`L2eTv-tO+1{LI>kJGgs+XIEZ94IKiY>A%c6!
z*TGcN(g%86kKwCfxP%PgYj`-ntcDuXwKte>(Xoq*eBBP8<8!5hkn}G{oJb<D6QBCq
zrDYDWcLbw}@a5c_&B7hi)Y^b%C%r#B9=jH6wyE!m^$moOQ`J`Zgjp;Ru7`o~%{tN3
zT91fi{DMucm4zme2oo!|v^J?}g%GV4(UugBhAAmEHE*j&O^rk|Q4!H-dP`fEsOiy-
z))8@TIz5$A*5og2Jz14co;+vg1f%lI@29)UaX-z?dq2%}R!cL<_ag8G#J+GML26J8
zl&^S_^8`r_?1%=jf4Jc)tr!V$o>P|QW?kwV=<ki7=@n(?JZTO(fJ9O!R*g8tIp27H
zp2)vHPvYI53yk;YPmK5H|60F4uPx)>pTEz!KeMWy{x;2X>G--^?{$JroGMkQRojXF
zp`6yQDNdKMz)t2DSPieh78(oer^W*Nne_tuZ-t1_h}NLeI;A$JzpBV?PCH4<pf4o#
z_99-^3x~F5h*4x)N{4UQ=3Z+Ljm@jpx);_cUy?%@9ws1Efj}}uWt&35qXr)Z2nXXN
za~Sy04_?Ao?Iu{MYGcdDfp8qxvhB4I9~v9zKQ9*9F%a)3a?*|cWi|Z~EDs`?S?*rp
z_TpN9a3Awf&lBNr2$$8KBuDehNnb(glZg&1$=4n6)q<?#X%Ej*CmGK#Ud?2>x2Ks@
z6{1P!y+VzM?~`NhL>(pQ<Eb)Xi3F`O$xI%p$eatbPh{U6N6(DV*;ZanE^Z>%j271N
zq$-@%HDi;({!G#^ejv*SlM1R2tr`2mq>@}#v)lv6=f0fTe<%_SWp(}8>TRZ@txe?1
zO+-O_tfHSty1=~*VlKM;a<P=d%dEaD&j7TPsU1RN?_4BJp{+)sKa4MA_9e0hj0>EL
zrFkqwx<VL+jgy?GNR?V-(YZvbP<yrlL(O^4T4`<!L(g=xllc;2p>gP_2cF^*GD@w<
z9;KqTY9B4pMyQ1!n6e5e&tTm=#2!3sEM?Clb?kZMRQ5cwj6IJmXU`)m@H~Q$^^&6z
zj$3jx!SP6rW;lxEXb~%%Ua8FEhU<gtF+N7*sX8eq%C^DScQx|+t|ngJ)okp$UNrVy
z|7X4L`XjgR`e9Drg?6wH&;R(CwxI_@0qVWe)d_cK;Rk|o?ZwQs(i%7YgGg{+gtRQn
zJ+y1c(==UBMBC)O2vY3u<ViVi+kLFr$20!rb<yP_X;4~N)6>eDp3~6un6@1FDl~rv
z#WZ$Si<qZn8k>CDjh&v`jTxS+7C6r^>&DL1cVlPix-sq(**s@kp>B-(xHjtNGjwBk
zm>YXHwgLr;X)iJ*hG{SJuEV?ti<L+Vtv$EU49`{zoE>I`wo+ecow`EfK9$XLb`>f#
z?jzf%pV3fgc%U15p=A|<`Pd`Rot7mdPY6<wGg#G-N>ep%_hQ`PH;y~H`QwgNym7~B
z<GACnaoq89>*J31D7<Ipzj(5{%(cP(-cXQ?{Iy3^w%ZZ$u|C>R$0GT9{Q}dE)*hJ)
z>XR=;?Zbl@Yh>p(picD%439cBzP(-T>A$_?=sfbXnf&y*&&BV`)$ObHw(s2QZ)x9p
zZhLU2-`(Cn(7B_%dv*J^bt^;d9hY{jChqyGFO0#bqkC^hOM8rj+}_OGP;>jhx{e*~
zYnt14o!h>B@A}>C%DF2qId{$G_LaN4HV^IYAgP^w?y6u%x8E1sx?!*$e;3B;_wQb<
z>|EpCo*d}fruYwpKm%`6T82Wa)(mcowj{P~jYj)o8-gp(U*p-PB%`73Xyf+Kz^d*2
z`}S<xdhUUB$$`$^?kz*g&h3|680%FUF4?wqbFgF8`h9(|a|hd7a8+4zK-sx38eY}0
ztG9n+bi2Q0&o)SG-@0?KJvngB_N}}2ZacqW_f}=&VDHxTJ9@pH!Ii80o(p5}bqCMi
z7rpe{&c>FNEkwc|NZk|c-neVs{+39ucTEgwyVokvMe!%Lde`jg-O|$E=V{p;+`8t#
z+5_!F;jLSSws%K&K)0=1_Vyw4)&m_o)@@$BfBk`W-?{_qhSqIfr-s^%<K6D{2iEOd
z-`V9oXY;mwYcJW=zrK50!}d#d`?e{oz1udg+q-`Ad3&H+Y|CJ8_r@KeZvX!63Z!~B
z?%#HPbZD!$V^?3aW%oIo`#kFpoOb}J>|5X8wSW8ht9@(LFl%E76KR39fbR1STo^+R
zqT9Bv848}?5L@eApTx0yo3d+9@ci}BzWpuxw{Klfu3r*d*>Xv5cQo1?+jwBDvVKqR
z)(&W~W&c*CWp8L@%idmP-yn4JMt7k6c0t-;Z-3vO?fqMJg;s6ZzkN%~p1xJ<hWb{;
zf}Q6#?CI<G?1Hq$ZChb^#Ybyq-`)XIQQF(j-PzI6-gP$W+s5{e3uCC!Y$m~4ozD){
zwQmr!L7fpi{2Qk!v}OUs1NkmJrX^^IJZ(e1doeTS$8h}u&kD>c*gv)n?0R6|26j_m
zw}f^}Xt#uROK7)*c1vitgmz14w}f^}ShywhlAFZ-c~jWHN6OOvv1Ee=VyhDVG!l75
z{WOsn7JiyZxH12<5Gh$d9xS}VD1rU5W<sGpv_0_9nq>H;`SVbnJye!LLn_qA3!jTF
zx}31GHQI!EfPA+Qv*uU~tdRm(xIYx7eme&Sc1FYA6;Z4?i(%R5ibQ`fj+Mm&5v(I!
zf%nkh-?wW;Uo;YqB@-*KHgqr&CF}7AlPlP(0qHf%D|Uo~SVSGhH9D7wb;ejC706lv
zrT0y177y+v^OJ^aX&u#-R9F!`)K8Qk1OOI7iFXo$W{r`cIh#+=tmP3j>x=}=FN_4u
zFRc?a2e|~z>y`<cfQJ|&&opdFSBMuw(Qq}&H!-Z$Ih&-hNgA7^smV`F9u<;?b};tO
zGM8-w+XiHSW!D4S26jELZv(q2uv<dACA3>YyCt++Lc1ljTS8t-NM~j3pJguF25&|i
zWa!7EPhJ&8;gm2irBL#*Jd|fKFg3KN@Gz!E|EuR~s3e7kWE_o`9{a%Zyj3`wA2))d
zS#RWM&f#-38+aVexkirWB_l`kE9)H1)7X0~|Hc!aYRI0K<-J4`pd0WUQydLXBb%eK
zU!<{Lq<M3^NMnz?-i+feyHxE`C6AnTJ$UQcAoI*+AH69Sf4*6Y@#s$hyd^a#;ZoS+
zR#XY6b4o4B!aCuj8YJJ$RjK{5Rxgk9X`U%JHt{x=U$)44?7#YHzhaMRN8u^euYS^T
zEXTCb@L2h!x0a{yFTEGkh3eLiX@&Lhe%VAU=U1I-WfQKszqG^^E5B?kp8HG7OI>cy
zj;Xa3{V%niQuvqJP0{~S`>-YahF@w2h5gc8D>=VxQd#|_x%1}!(!6-tFV)<u|E2nS
zbAM?ryzG~1>t(-GTd)3?YUpLZR73J20@-R*_e-@Rli%=Xzf>O|)kOWJI{4TxP1nbM
zX}UiBFHPOYerdWsmLmOapy{f=G_3}jRs&5-{iUfiQ27m1as!p7{!*C@R9YjI)=1^3
zzf?{m`=xRkshmbCr;*B0f2o8fDxrx=Xrh`l(YQ@CZWE2$L}NA4bem|bW*V!R#%iXq
znrW<N8orsP)l9=T)9@`cWD5=1LM6A*FfCMa3k~COQ=uL=4eN1JfgU%N=yB749ybf=
zVL{YZN^KU@JCof;9(FHySXK70dg@_S(Zgz(hgBaBt1TW@Av~<KJ*+@IKAHzl152-g
zYVKjh;9>UmFjIS&eLam#L1se_Gnt3k#=}hEX<->-Sy$ZDCW@OTq_7+*EGvq7KdUzx
z(^_HGS7EhJVHHVXDk+Uj7*j%F#ile<trcc`FEg^2rRQa@>3G?#>t!bJvg+z(HO$MZ
zhL@GHmld9m^?p94ppO~Sr`-@#2_LH>K9+2Qn^j|$UIV*X8?;(Y%eJ9~nxIjk(HmLq
zZ)DZ2i4}hnGeHw8j;3ZBubI7q)XZ+UX0=AKCZmO&dECq}9{iDe+~Z+2*Q2od?eVgD
zi&AH`tx>IfO{yi`s(m#(H?!+rUnAAo=VN_IV*_hvP&L#h)azpv$?K(geyeb64>f>W
z+i2w0Hu1T&%{*@HJR`UEYa_Sz8|&QKT|OSScAd{Mx8^<O-hbk0WOHjaw`Ox|Zw_va
z^;opqvVZIiE6DK0t_QXa?0R6|26j_mw}eNnC8V<$_RrBHhv~GA{bOl6pX1w1bW-YQ
z{jhms($F5UBRa2lZ2r*szH$CxGv>tNB|I7zzLHJYt8=QfFD}(FodnMF5<{{6Y_9#t
z{L)6oupw4qA2!!q>@U3wP54b-7<x}x;yq~<=W?A^t}-IT!fy$<QCce)abWUGYk~<+
zN=vN>=9F67z?m{1YHb5&s`sHbRH+?yOl@VtqS8Cgghi#+FtDg>m`3wu!llyMsB)<^
z7nXQcs(%q2E7dR$yGm2nXe82}s4ObA8{^`*Tq>1m;8JORRTh;>R{18Xt${zKaT{rP
z16xW%sthT$e4b<4;;Eb|O{Iz2jZuFpLrNtxR<wo6Q8`d5KTUYjcxj4?-K#2%sS%ef
zf=W)Z>}UieOV-0kF%Kit6xJFkY9(aWQ5ZAlWuD#^W*SD&H8Qr(<7UeQJT2t>t-@N}
zLRhOUM%L<lK5Mm=$69SOvQ{q}S*zb#XRWT~vQ~faT4t?$#~f?L)5vD6Y}U$Vt&SJg
ziuPmnkG(4e8K>Fxz_x*15A55(ZVK#{@HlA+=`+9m^VWN{)h_c=l{uZ(vVU?INIIiN
zXY=eIJ#9#*H0c!Paq%;blo=f*luUju=|<bQ63}N><mkyY+87i;ffXB`O_7h^FPmR4
z)vu2HG9kHWeK3TfWoklZv7RjVms-_8U={YE?NuHEi`v<eKO?rN-3*kL-iKPl@Jb-f
zbsis@clDJ(n!7y5G)H;JEWJO?7njVUx~ZfUJEj`uc`=ac;A6kEwyUoJ(zNot`bVWP
zy2|jnAC;g|R5Y%EoTAp&Q&j59ebh?aV=9xq-ef{x(U9tEeXJ#>lGWGuXzJ?AdNic^
zf*z~GZmO<_y*1`hZ*cZ9mq(?F7%AytucvA6&uJ7DBeOJ$irOblQL);`ib;Kg&CA#*
zFN@$~*=tZWVg=#xk&5<Kp{DL8AC%f|q^2(5Q&Si6sHuyL)YR{c)YR{-Q&Tr`KPdHv
z^$$ul9COqZPa~U}vZ*PXnmS&nDK;La>(T6=qqacYE(P`d3_6^$e@Zp$WLwg<Bx%;|
zdSKhYt_SvQU^fMJOK7)*c1xJIC1h_t5aZZKwhfNgdccSVqP=}oqzx#$aBTii%B83u
zN~2i)p)5<jA4=)u`!NvGZ2E@%(gB9L+ahCvs72Mc5NR4Jc}ZJYbvr@oPl&6x3QIFW
zULd;I$kGJ(EKLuOr3o5Ynpcc0%^$3@G@s_aK=eWD+uk)Eb1V%{Bb%kMSsI(Au~{0M
zc{m1lhO#Fq-pZ2{_S5(A^z>~%Wfu9AIZA^ao6n`kx>iL!GTVXmEAYp~8dfJH>~PCm
zCA2}lsU6#InJZ(1d~;m3Uy<9&872kkit)FQkA4#L&3eoxcaQyF`Up|750i~u8k-0|
zy1#51{z(4P5yU9BAG8vec952~C>&i#+B=MCV?(_J+MB5F!HnV9@a7k7V=ej67Rl0Q
z<5);G1!Mh}kvUd=>0Q8m<BZl%?uX}St;qA187*JK_KDQ;hWE>;%~iUI+RgAb8O^o2
zSt8AKp3lrt&D9Ud(G=MmVd`68R5$XuskaJ+_4rw!uzHOYRv(|j3h^kcu#v)g)ktCe
z(K?0o;F-KHH+|?#%X{E89di^GPa~VcvMDT^!a80ktmU2tH|sm-5$%D<5$o&75&PWY
za*wZtePWOtu?=o<)TAD<U1gWEw}I0Rlg3^kUamT7gi$+8ChOdm+fIp0pjH9fS#F!a
zHi64Ffo+0iwh3$#V7>CuZVT9ehs_Esw;kIAsF8M^c+;9deeklK<wwf|?B-(A*UR6u
zj)zWZ;{l(%lXJW}tf&~9yL^leGsK(vmzvA|IcBzZrj_le{v0<8A*hvDV|Bd$S|Ud2
zU0ULh)P}5AE&R&{S0=wy8k;Q}?_a})meew%^r1Slspn#U4clN+9W43K6xp;mcLPi+
z&EzVTqkh?K499uC{zg;J<4;p*qQC5OYldIuulrTM3P+_Gj%gV7i8c@864DO6<Wbuo
z^|oepUfYtB)l(0viXK+#X@^B;#o%E!_AoQ19TtS;&cia}VfpYdB^9QU!jw>$5{i1`
zsSZnBVYOUg6-QyoDl9#PiB_0QubU?8Wy*V5NH0^u%YyjWH6M!(l7)=I8q_+#A~dm(
z=?~d8l0a`2693PuLE`T)lK4CMB>pZQi61eN_^%mB{6ASI@qfOGN8&%V$})-He9V#f
zJdJD;-zM>G690H1@!4WPbrBg|P^NC(O_zhQO|{h?o-PMdH_WD2Tj;W~7J8iijw|an
z-E2Lex?ojZF{$;>Z1pKy+@|%<Y%_C@mkHB)YZg-d;w$_3t%q$P?rC7_DcP57J#3R~
z4;$rp8rdpE^>eOl*YE$|-nRfrcAaN=W~ANLU9Bv)W$WQpvYFr*+19G>`(7JE&8~J=
zyV0z)(puWxC^S7iJ+m|0(=(aw*;fz{h(kO{AUy0~5ncjJSb}ks#109V#DIB&DX5~T
zqJjyKgd{*9yc4SU?*DzKr@N<nW>#7yUer?6-s$@|=l{?Dpa1-i@7(LBY+6fr#-{mM
zT0i3hxnbsfexDYfe=Sy4x5`(D1myZ0@Lk}6PiUb&G)v%SKtyEVUSDqm5(v0QJYK->
z69;<jH{Ti_@YUV{U#}kU<G;>Vh}k<ml^`Y-Fk$Z$QjiErNK{}-prf1%even*z75EG
zK;8qs_B^041M(h__mIv-NZy@qLf%929+LNvyocmHB=63P(YJOL6u56Q8@VBQ4<lnG
zB<~@4cis<q56OE--b3;plJ_t%Q8_<}-y;smdr00xevCNG2zlg(g5^<#xs7hfSFDFQ
zr$NO?ium&PsA4@t^z|a@bws_6$aF-eoqI-sM-+HO^^U0CkwOrC16uFU*SDRi-VwQt
z$ZbS!BXS#&+lbRLBDay=7N2Sk8%GgE98ts(MI6zg1}4WGN3t1{&6sS)lx<Ae#*}T$
z=^OK-%rP;mF@+jas4*2frch%lbWE=|CV#QM06_j?@)zqJ_nf|QVm!oD;h18K$yiML
z6BADqld+hL#rl{t<(E`!1|*!Y2@O)huYx2LTtbc#a+Hvxgd8Q5SVD;<)X#+anb1Tg
z1vD1>%g9PXRuX-Uz<o*ESV_oALK!7AK?$c}LRCy?dlJsSgu+S5LrNY}@{p2;lsu&5
zAtn4c<y1?lbt%J+l;TJ!j+E+_DpH7TKBOc*rJkiEKBXK|ecqe)GbQmU=TS<-nNk2L
zCs0PKkx`{Gnv0B7XQVnK)fq=V<6OxIP|m1C87E1`A<sDE8MP;)@y{6aWSkZmt$s#w
zGm@L>8|2iNjQWyszGpOYIR`oCR~K^5@SLijb86?LGw0~$9NnCwo0HC*bmpWpr_IXw
z1&EwFk<&uwoVPjkI;URe9M7ENnUlcWEv=*wu`k!W3a2NQTrIX7O@w<UOdL(?{E8OU
z3Q^sDg$4&lE`=2>r4@asqM}u_LPGtD`yRSJT~Xm+;zJms!-bg&(Oq!vQ(y5hcZ*GN
zBwap7)AvQEFFJkE={}#~b1d-~`S7WvzOTjJr@(!m3hMh>;eC#!?{hG5)bm=<eOd-T
z&<8yNc_|9reZ}rLa$EzN0$<@f$hHZOBl!<#0FZFtc<TEden1252UJBI>0Sh)OG_@;
z=eTH@g)78<wY2&nCyTG;6i4<G(%Sl=<f>)U*Mf<o+TnEewLHSPGLwiS=?Ucog&L<&
za*VhpAX&oxwA}fTc8)}nU!?JgIAb9;wjXg4_>trmNp6wcvq*GAa>*jmp>+ykC;hSL
z(5mEXJ%R^Fz0w-w$21%`k}s_|$m>vEvE&m=Z(`0sKbBs{>R-zV)Qs}g5`v5n+f6jz
z67D*I)2Z{)U#${QPmWt6{u9X|kzORy*F^J4O90fF`jCq6RQ;yvH>HQ+r<ymi?@)h^
zd#d@8$~R6E^8(=ca=cQFw~RNY3&$trYJ=<+`IfErWu<W>KN&3^rWc>f)St|;pQ*o0
zb|aIXWwgGSj-)3Sf4TU}#h=WqpG&WD$urk{lU4O|jc2a;pG%H1qL}739`M|aZyb&O
zK=v}I(3}SqdE*s1;IfNBh4Uw<ke@(y5J$5oaQ@XE^%My}lN+J|_6q}Jk%KR%Ur|k<
z?*hTx=ziMUz~^}3$o3VEAdp}`AjsN713ZfUfO;RuF2QWEUl$()ToHqSaz_lJcBp4C
zZ)`85JB#K>f2eT@xwOIjvE7jK7-5U)eIPp$gj}_PQ1r@ffGpD|1U5P<-iatrq(e!M
z>;gg_{T7|F17M5iW9mr|(>@2W(yE&8K}@|5VueDn_>Lv7n0k)HE5}*>Uyz7j&EFv5
zx`>F%^cCiu<CKW5g!UoO{0vgAYcTg5C(TD>cKLity8|(B9ON|xDc7SQQy3|C2=N*D
z%@hM>^v;5e^Ek-p1Y;Q?U2<Ilc`Jb&6f7_7Ki9bAl3Px@6Xbjz%a73&R>%j!J<|iI
zhtYv$$>beY*ne1|y$mY~n0$_B=+iDjos2GohJ4QFd_|0?^NPL{T8GkWs2}fFB#iKp
z{Y#HS1+JlsPDA<^p^G!6uVJW&QF@AH(#8R?CfkoVPs50I4a+C_jkr!A{51O(Mxqm2
z8j5)r#?)st&+9SuIE=-g^bOO5{b}6@6J_(IUtywt68Z;WqTGKfImph3DeWHuS&l=>
zbraLh^f$~nFGA@J*wlH-JCrMo<(>4(-iA5nV<?{$aj)qurmxu>1i;oG+yLVvlAS~-
zY~vSI6o6JZ9}pCqTrsRRPLZlCk^HqtsTah_re~4-ttg<LAYx`e0r`ud$i;XgcGi*l
z7(oC$Px(Zl?vp&Dh<;HdzazpH6T3fBzcKxnNPYs$5BX7SiHZu_PdE<{Zriv-is_<+
z{u=^s@{v+sVYc{OD!Nk6t0+@{3S|(Dlh2HL6)Dz;a?!1MipbphMQO+Ahx!?vFoQO}
zaEGk_SpIgbiWkfyud7-GH;L^8iq++>#_~rIyR&^vsA8O8PW7AiA(kDFHLg&1winUf
z!@VLMG5vM8S;ik~QM^z7QJhd8V~t;&Qm^8aegvkCjUU87Ix^bPSn(a)GG5PVUl8Z(
zw*u)3?LbmdlwKhniQ<q1j&i{tqJABz-w75*JFoRJk=`X*2NO(eyRI}*BEL70ok$=+
zyN&_nk#-=F|A^&-*ApcJ<kw-^vt99xe1Oe243CW)+(EXNQ~%)>>Nn>bvH~V&qy=oe
zkQcD)7*=a1m7PrkWeOyBs4MSR904+njuaERpg+Zg;gRuOs&y9bB(EpBFQMH_6Gi`;
z2XHTq-!!A2l4hb${vM_U`<H)~RTSx0lvGBbRP>+ulwTHb{$z@)pceK#mPeB#%pLEK
zx;X+~SN;jhri}-tosAa~1UxEBf??+KF8`3x|H^XChb-s)d4)+Jm`}SOxdD?CmRqAE
zm)`}q)%weooXiz3L3})~I3-t{kt>}9H`eGzM!@DdmS-LHoXa$9<pUJZAQ4~`ArqjG
z0<Nv`SmXdqXh;H3i(#Y{e#4CG9xY&g#YH%<?8X{Gf5jN^d37e^Dul(}r5kcx!aOs>
zfk2;>7e`ZtBGflt{BAJohm;DUd-s%!<FH<k9p6PtV2izXV-W-qx+kZW6wy36g}UpH
zZh#M$sn94y@Zt+Tr|xmVB9u$_q)JmbzYOlX0z)_)WCx$-<j{pKClM-_4;w=(4d+LL
zfaOY0B?_cLg-IG?z^R0c2XpICDJ6N_YKl*M(u@}0`s5h81rLF?I}=L;kurvGxfE2n
z)uq^w$uYz_LEe$VWrMLY?O;a30vk?IA_+@Q<5bxmXe}SdGO0h1S+L=SgTpRhGWirj
zV73Ea$CTfPX6cd(BjHB#5=MpXq)vNNCok9}n`2ngq){+397E8>E>ai&!H;G;nWG0L
zLT9j;8L63^EyP&vOs@1t?%2WnXUlMqNTHjuh{(h#bl)yPNb>ICJ)5`@Rw_QJMATIT
zl8W@;IU763Jjn+~Qp<)VfC5>kaE`c~1;``l5-edsJn~cYPy$#+<12t-+h`#6+Y0uK
z(;xuR_7u`cY$pu4fFqz|1_w{mv>|lUB{*y33(BTHTzCZkg=gZ5IE6v8xdTU?OEFeb
zF$jx7!V+1-BAO!2ST=YGc?~w0DuaXwNg*C`$Q&pbef=S?0*e@>jEa@w;5=5Kh4iV7
zVF;DCL4ZY;4~?9qjYbiq*kBejSZ9<Z#<H)#0TH4R2FHkeA>&4?g`+OPYh_=^W*LP=
zcwnOegUbaIoIB57A!UfQnfD;8Zi4`)T~QOVBPJ$@f|?1xTlFG1dd3a{YEEIe&ZGxE
zvYc*s`Md{is&#=WO>2Yjnic?ak|Q5wkz8tQ$F@O$Z^>bS6G<-yi$6yUMIHH@a8yl#
zXxMpd80kXCu+qw>!+;qX36i~b3FejeK$|&qsFbkfBvmy53n0}GLrLEcsVrL2G}X+7
zx1ePnX-s1<OEs;qK57bAU>a$2i!}-Id8>e)e$@l$X}KTtba^!BsW}Stw9*fH`mg<<
zr(fAS=;;^k83lU!@;!S7J>_3@pr=t3y$PVF#Too}w)p2<@lX9)apKsinWwJKEM7g4
z&zwIyQ(HWd@p$_3%!SIak6xWwo4FjV&CIW!c&T;qspsPQ$&aqh<g3T#PtMhjwbq*T
zmHfu};M!U(IA1#&Tx`z;Cn~cyPOZ&!R-c>;PTvR?uQeM_9lbQS@?7)ciTuii)@;$*
zGidGnrK=ZSj4z)Lo?Dwce`aAWJhnFT?0V(t;DxAuGQU25EML8N^7xIXgVR^)EA#D(
z=g(f5JDD$Ec=6goedR>wd~gvxKetwI&0cw_HG3?M=h0g2g}gI={^-?<=bu}cJ&|3T
zT|O1gKDl^f_Ih+}_PXwQKDzeIxwGNyjpLQsbM^Sl^6`ylPF|>7TyCyBbMDgeORWpb
z^Ot6C%=l-X&zI(3Y_>sp<-+;Xp!vCl`KL}_s=XL5)K8wcem*!GpSkhT@XWbc^m077
zaPD;T!t&x3-V<#U_q^14I+(vQ7l4Cvv*6%`OZAm_ZEmH0r5^Z~>aFLNYbW!y3&9IF
zo(@kp>p^k-%<9v@nRe~P7dGafn!R!%$UF6upyK@Lg_l}$;nB*|;Zd~ITyDIOS1=Z6
zt+{dWdcJb;#S@kJ^QW8VgZyg!dVIApzxwROlg%3!f)m%lL-+dWs}TKDv&Ua*6`C+J
zbNtlI*^hkWr52_~Yopox;Nig;Rjqb5)|!oKwU|IloyJOg`a_4mY4EP&(1X_elS{aY
z+v=;Wg{4K@_pQ^d+DZd|OgHR?>b&mgtk;%WwYg?-t+T$-z(d3IzIJV?=<ad<<(I!1
z|KqNOjaI$0wA!k6uCH0c&DzS`e68qpWp#eT8k}BSU0rN8!XwQ_ZN0VBT0GKTsjYYF
zmsXeRjrNiCMjPkrmyXn%ON~~ieWbp+vbNN0tXEgpI!D%4n@jcURU@!?bfHn}Y^*nM
z%~)IA=&WsYs_jcP>z-pa-Fvo6*?R42K~~?jE{%Gyw!XNrQV>xL*2^!y?0tIyIc@b@
z-__C9cWspQ{g!^~```Ml?|<LB_5GETqpa`0JUOTZPj_YE#7^l{{Yo(sLxc{sXw*CH
z?r>KzAUmtXTn&P(F13b8q1~x<8VeguG_X)>whN^iqKUhaG)xbj))i8kt~YD#cGYn>
ze11$vSC4ao75^4<Zsw!KKeWG<!WM6{zm=Jl)h8EbPM(^%Fner%=IFN^JyjfEJcYS>
zVfN_Bt4H&hR$-|xW<1VkGpn;l7iLapGndZJT)cYb@=S2{*z(!a=Vp#wK7MZF^3kHp
zZ#?@{?dZu9QSJP*Yfs?6ms(F;zkDiKJYBiiSv?N>aAW?=>NBu3usEmJE;RG@7<4)Y
zo!&8^QwB_oJS?POdPey+i>p*raM%&XSjZw3;f4c>D7U1X1A{l80gZCKzVbfGM!15s
zg($wVJIZe;rJ{t2vL8wxsBoz0+eQ9Hzmlw~X>`G#3fLLnDoSQb%~fp`C@Uyd30e`q
zHj0oxB{NX=v~ZqbC!#a{Qk2d9l!{;+ionZmVO7QLGvN>nLP1F<14nH|K^f5=R~mq+
zj6ESjDP2bCBM_mCeN{)}NFX~=^e0e@tK~8M6RHx)1hvD5GM%idkSqGBR;CggGtgbM
zC@Eta!&L_fB0ywry)&uU#f~yqR}z5g=`sNvEMaF<ib6@vTA|vg-iXv*aT4J?4>r~j
z({x=xsmT@<GrNUZUBt`wO94Jp394D5MrPL(#;cH~bOCiqRZS(wRdbIxW|1mZ)FmbV
z2EnEzm#GZo7p*^4=s2AQF{k8#vuaP!Q{xXWkt>>3kw50RC<o@U3e=^TnZK9=q8wJN
z;+Sei)TKcns>#-ssi;IT6M0G<2|5r{Ut9`=^`e;5NvNSgSgL7=(qA#tPsnc=M@&i#
zqEpRksMOd+r&=q>nH0~VUd01XQVRw^suaHp8_2ra1C+b6hNx26N!pMuR@L-V1?YrH
zmo8-0s7@&90mv$|j4GfA2chzh5~>N!g|HDSY3vMOHCj^Td6)(3;#iFqS6CvqT~sTd
zqq3`psFw*M!BsHb1-IHj3J#Sj#YCYZcVu_cMs_i-I%&$Da=t77-G#ccP0ny3r!r=^
zlp5?JUQL=A=d@}%UEr(nAYfHS{<`>A6I{l`WyTdDV~s<ItBmtDqmdhg!J66#gOxFx
z>ndC1NcCqW3<uG$MzylrLJX)7DV&xvbE?Lo`c(|1YG%q|<}^q-Wz>bp+RzEjm2>`e
zk+L@OLUySzAbgh!J<9ScBkO8&OoOTtB|ytooy>Sa_S8tk(S`=qGBZ3lniOy}twGJq
zEEtZuUlkqXVG9<EQkssspR$3cqITtoR9wZ48i(?lo~I${!PTlg&Mx3;b)P(vid+&@
z+}N)xeW{XyhV8uYH>kStc~=z_4h=;(_Ol&tOS(~EVmm6Np=8A0TqRJV(DSs8sIusO
z&LMQC`{iZ1tv1~vpsV<aAJt8fAtwE(m{@-R*zw4^H8S;-4}ftzuR@l#)*$muI)rb)
zZ=1KM_NhIp-w@m`=~cRa2mG$um)j`(5$b}Xhb9lh%HIG`8J)sa0MAQ$P-@_L(XaXt
z@VvS%dgTEX6-3_8gg1Uu-{k3_%&-2WSMp>4Gvs)wCWI;@$03wH$w`6tEP8~8mS@s~
z_vQG=9qGaSvON?ejXo6-{4V}iWyLKVFy$LbpIpH=lKyBT3tk1wao&yxRzK2z;dr2)
zq(i;}3TFDPdAuDT%<)9I)97}kUu{r9A&7KpZwl0%{dV!fELQ+8tbU{qt|aM-uv@rc
z_9wgq;BWd(HwM#(`~q-d^k}aM>U|vdE&Q;alf1QFqdHFcX^lqNiS#Ctvn$=Yns7>-
z52h*mbA|41B~^HC;THkFpnjqFYV--;gvucM)ix1dxJFEO@`G{?@24B&3#SHHEBn=s
z5SRt>m8p`8a;-fl+z;Y1wzGvV)^F*J%13^tez)O_WoH27V7r>1+UbGq`{Yk}NffFm
ze=S}xPx`HSxP?Dvdn#7Je335U=6t>7*x`?Hw0?>baX|<0$M!sq_8f4?I<Lxah2xDf
zndt$Jdfru->6*bz+IRtntoB^tSz#&`80$}^L5GV#iJ8xJamWk?5W4Dq+7p;twhIBW
z9m*52t;r2Xo_D(-QK`23Ts_vk?!fJ6bdf87w!Ba6Z{d>F9@hillG&c{e?h460@lKw
zca?J4Npz?CxgHf&ZQiHaxa?oB9hc1eWXFPtdWRjDqz6DjYe)O;fKS$McT^2u8|x6K
z+k64sisPiMa$w)|AIlBLc{E;`@=+KOC~`nGn(|O!0K^LW=@!(5^OfD`!7uAR>5Clp
zK(4wg%;HG;ggXU}S-+KPkgwc>U*>pk;g_kea$vE|MZcwAa!|2sa{OHVU*08PccfGE
z5Qzu&FBcI=CXT24K`ghFkGwwMnC*VlqOJWN{IbqdFMwON@da$y+QZT00X5T+asqDI
z=mBm*=a~~iq2A;GoB-Puj3x{?Ke3#%e-*`1%I1A&u+SqQgmhi)3oL_WpY6Kss7e?>
zy-6p4ulBs+bfiCQeBl=8yl~A5q2VU5U*X^Y>*RgiBmGji5hh3AmC2`IN+5^*cJazC
zVZ!+YY!0t$9l>-XUy8-xhOobsb|qAF1>7?GN$G!dp%fX{L%1t^PJ8|^ZP}0PJn+mW
zccfg{j_fy*F1$}DEexNHH}K2uXyXBxquqz2wSyE4&%^O9#uvkF?E+40bO6U};{()%
zehb$tco{bQvVHC2#x@5V2jpf<KG+_`{*(iQnPGd%U%_o+|FTbjF7SED8{i{8zwJo5
z!L4FHk~3hbY)^Ira9v)PzX2SGek(;I`2iMU`UjXI`4o;bQpgJ&GwBvi65F*%*R~_=
z5Y&?G2`>cD37=Q|2p9>U&vl=|J>Z^=f3Rh8fH~D~AyTye5bz_T6L>c32iwAUpZ3wf
z{WG~BPh|bV4D-4YsmK^v|3EL<c;LwELh!*&Wc%``W61?>BHNR_hr7srVvZYfNTeIU
z<3gYCllN=B0Qaolod1}9rgsn<`%{hpU>dfoeI3|Npx>Ogn6A8E+eEQE*zZL9J&{y0
z{s6(U@c}v3uR|kY8=L7#0tc#~6UzzjQ(ObQ7oUg0vg?XJkY+M^vFzw5oO0v<K9n00
zP`oa`AIlQy)0T6nA%APvIN;&>Ej+h2a%0;Z>CiqpxV>yw`=Jy0rEq`ApY#*>C;g`0
z0YA;>0IVwb0es*3!}cBHFBNJPnJ8XY+zFVG^^d%h$rIb{Y`lO;<?}Je74ER<58Pqi
z55lY;w>KMrX!|W}9LQyR02teO<xb!pTmP9#AAnKYb!_i2evmD)elYEH<oXJC+1kgl
zYjTC#%;&Uk64R0GDuapcjSt|WjSd|3n{;EF5zos$VA<yL+P?%;Ea`D+sEm4_$uEc6
z8vn=|8J)TI1?AeO2Dp~>2RGaL0Zx(kYaHQLTYtH5UQl=A4{kP}Q@RakKjRl}0Q+&9
zw&fon-$TQJ_1<Mr-~$&!UTlrFKJf>QFveU_60DAqRit&SU;M!VF8;6{@rP>=A2eZu
z8f4(kID3mfj9mOdK8rt0;V>T+SpY|8Lx~m0<&qq}8JJi;j>kwd@;D~fBG02s4s#6f
zDBD2{Pnx@%V!Kf*Afx09&{zI}K!h#B<K}3<m1Y-kZP*~3W-_>ioOW1Y_`|x#ZH&cE
zIT8lkB*}ma#K*A)@)EoUc2Qs<ZRp?^@C>@*@M0zAK)EFr87d7X(2Z<4a=S>eT#yW?
zJ1@D7F@V-8a{@#rTgFyEb_yFq-n)MwT4y_$9_$^^MK*{)U1!{|VDAUV5U5tR1IW70
zWJ)t64loS}Bk#c*74(Ntl8CUBQ*)acARJmf{GqTE8bLC!!GME_1(fW-BR0VygC?IC
zI9HxQsnq%^fW<}_qB930Rw}&M7|Kg{nm6MQJ|4MEmcWWyrx;2X7RinV$j8`-1$d`~
zNf3XC47d_(r~3ym%shiXjCf?zsI-U|<X~4cJ>fu8)$s@CMd1ruU&SUc>ucB_&hbEr
z&g6xFuK44v0s{SH2pH0OKMbio8iv#v1w-2Chavrsei+jK+&c{EW5FmG(l7dhketyl
zB>$@qhBS(#F$`%8LmI=7?i3i3t$pKvzG~iSJ{HAT6h%ZZ)`RgFjP+nV4aTNmYzfDf
zaBK<3mT+td$Chwx3CEW3*Uu8N;!uqE_^<!J$G|EjzrO!itqE17w@P8=@buSs@5V;z
z`sHHxe6_jM?o_X?*Vfh=>*c#j6ZcPex8sh_PI~VsT`3=I)~;h){7R#<zEp3cA{6&l
zgub?Pwfr95vEEqRXx7%3Zq#gVesi_mZnV7vrEBH=?qRQVy}ZxvZhH@xl2Ns!_mpmw
zA8Zx-``sG?s#nl(!ED=mu#}D}H2rjcW$9n<xw7QFuY{YY>$PUHidPs^*BjXG->PEc
z`214AOs!cZJJW}}2dayWRs&;E-DnrD3z%ER=5-DGPu+#_`PSa?dARgDM{)UJ>E%(3
z{I33r+5hV2>woW=uj$WDjjEV^a%!+*cA{D9bQ;A=Ai6JXSlBw%6~)%h{1N;1-M0H-
z#j7{GaerruhIKh#>(r_XtLrPZ4oIJ_U#hiQjb^)w4g8(y;%yRojR4+3Fl1-{sp1_2
z&llJFUnJmq(kmPWUIhWQSwUmExKg`T9XPYpa%X(BTf5e9?|yjM`*g|kN?v*2<;FVb
z^h%`z<-6vVYQ@a^-MyOiNa^=Xj8d(KO5ZjyT(|Bk{oaXw<@!JU%Jr*zSFT^%H%hsF
zX5XN4J=tir8@;ME?@oM`DArE&IAKpOG*@e#Fev!gXf4?bJAeu*B#8HsTrs;>=-7Ir
zxncLT8`VN|IH&iUP^frw1<XpVxpbqkK1AnUC~RG;-gqAN2yY)5(7ZOz7B4-qCAN6q
ziM;`3q1HmH#jW#L4c3?DHVV5{5QCmub-WW}zS=4#L9@80-Du3?n)GuUdoLH$q9A-1
zLN7MfR|nqpVvmZ;?t)-ksxxbL=oK5orOtI&Uo9lP&#6tAWUz<##uSsex5;f6e&eTh
zP$-6`_VnXE2o58>-lZ7Zjiu)N!23%EXUsWFkk$3)?Hx0Cr2Ez#s9P9)Gu{&OgN32E
zx5rQ%l%cq<^!qT!w^qzYO5eVNrTExLOYt28mg3j?Eye%cyQTQH{i7_!&+XAtR7XK1
zej^qkvlSyL+BTgaJ!dVVv9aiR9naWUZ21A(jRjVg+hQ%yPGK$vLW23F`kN6nd~(9O
zyYyIjqJqQw%M(5hA1F@*IDD`?5#sQ8c_PB$NO>a0;feA@f<vV|k>cQ&Co&v@@<i_K
zD~07rzk*|gqmSbwI0iW8IEFZW2*(J=591i${)p&J8lV9&yh*X!?dau4C%tm%_fKpO
z08jKTE{99sIk9sDcyH<B6I&tRfzswgPYif+lnZ;P^j#A>1%Xcu_iqoDzS6%+{hK|n
zQV0TZ^BWHWZ^zj0-iAr%yN5{UbiZ`IXIJU`fj;T{!CvWn<7nx8?~c-WdX#j2re8Y$
z_Ew~Wo9`&;{GlPzInytlKfJ4S{z#v6{%Egso*6BjPwXh2XGclr-}Ou9-`|RKaPu7{
zolg#t&U5|J`MzDH^T+z6^Qm6xJU?1Guk0wDbEBm5v;ETfxm%GAZoZ?W^T&rs=Y@Xh
zeE+V}`4fH8`IEiUd2zIK{?v}rIX_A|Ki@B%|8OhP!OeG+bpG@Z>0IoW&Y#&;I<NLg
z=g;;^=bJ`L=g;jZotH*Q=NI~=^NY749o&3JN$1ZGk<M@Lm(HhmmCj%2lg<zHN~bzn
zI)8CT>HLmS()p!+>HNoAkq&OYqoi|lh;-)qrSq3|mCg_LN#`&3N~b<rI-l85I`gBX
z^Pl>q^Pg`;I=K0clFnZlBAtbP>3nuq>HJWibYAO~&f;k4{P2#_xim^TzuYgK|8gtR
z!OeG+bbe%rbo#zhaJclNyGrMCebV`SuXIL!;^0u}$99y?$PXaA?k^r78pY=k?kMT}
z)gjX9dyDzu(vR;douBBF&R^@5&d67wA1eLz9i=n!1?jK*jp&dLUY~wPN#_egq|>*#
z^Kj{J>?)m~?32z<^-5>t&d@`pFYYLvk^4qp_eN1j2fIpdyL84G!6GC0H;3p^ALQY1
z>2K|-M}NCdkG|BaM<ekPhf05EM?D&erg+`(3Fr~97H?dSjv%S+F(-ZiIq}<&tG?A1
zKVL}h?+%e%AEe-L>F@0-xxe2hxqq-Fxt;I`hf4o&2hoj2D;zAnZgc`<2Pnhsk=?zN
z-ND-@yIX1T#|qi~qiwPqtnwZ%{o`GQ_fPtS_vK#UjV=)%D*e+Ph4--$1>}RJ*Ih7%
z@K9ZT<H8$Aqu)6Od`#(WFp^{G-+oZ&cudD*+65kut<=~`-FDddfQ7jo0Ew}lzNPf^
z<CDR6OnQ%10kB+XtT%w9t>S}z*H>28*De9JU*Jr!w_u@FjS{@OJF9r_-ThX)v%CC5
z6W+~<*QP!_;T<@E&m_*QFBZV?@^AL;!UyZx<-@N`J?b4;zNfRa(x|pOjWzGS<$Lud
zLHjCQ`JlHSs7$+g(tBI^fOj{3UcOo`emvln<{JyOjb^7^F1<Q+w>S0b)Z5DM@Vv`c
zS6j{N_R!QTIQ4*cA5V2}FCH&@2Y_o|#mDc!Tf01UbLwsH$9eDb6Ty#6cn|e5=)PlD
z#rv7d-#6iXFL?Uy0iNC|o({Y+Rr2;N-_>q37j`87z8>-mdWI1Gu5E-DRF0(l(9uWw
z>HaPJXy0!I-I@1=$)i6p*)O>E(qfCBY19%;`36b(p<-i416b@@8}grPuGTMOZ^uTf
zQ~pi5(zDZ}dA-`W)~Ih3h-tgEUA|X$6&pU<YHfXFYwNxHMn<-_ZrexN<+m4Yw}I}h
zUTQ2YUh0$&)>pBer_qKQ0kvH&0nbfZy~g}vLv-K2)B-dd;P9%npzO7P1lQHKw?IRu
z*6LK(pl&Pe^1k)P)!O=e`JO^8?c0>zV@Kb&r{N33J>ME--p@`1K<RvC!uuCcs(&`1
zR8LE(o_u9$#wnF|aQVRM9LAOC`C|TT(fIb|gRMsA>gxJs%qI+n=Pj2E{0+u;)W$<S
z+IasfQwP0bDijmSn_PQ?x_SSE&8sn^h3Okh^bS<rAD9S$5O{UM`xNHrCkE!|qne}d
zdS$BIouj4YeYQ!ez2l_Y!xFIPwM(;XOltY9J*r=t2vBeM$qD!|uTA~%fUf$|)yH0$
z`iAYgTAW#_wJ+~^(Ri?D(J;TKSTt_edNF3C(;iEtaPG&dwJlPSwI9!s@f=|QJf0)t
zIZ~ul#w&Y&l4$IrSbl6&-e|mzbR)H~uQm3yhUQksb7VY6%HwrpypFi?)i{eXydd|c
z&XI9q^DUm(94o+ETLH@H=;C(yJ>`kJ9y$2xl*gK75$;Uy-<-Uw{I-YgDOdU*?5pNK
zF!+>#2!8pVTL43pzj#l1pLgKigZI7tkzWE3;pa=<S8(`OCGVe?UYq)I$t#@$zTtV)
zk;{*JTeaHq;myfMy?1u84^MQfxaGUblckA=rpoX0Tp9Y&&B;f+`?u~@b(%L$ls|w(
zyeoQt?dIemue^0@_kz3m3;Vs^0&@8?`)^L&-0!{UjV}6scyluIj%>BrQ-6M9BvOF=
z-Q%r#d&3Nz+?;&U`_PV^><l_^9sV}2AAaz}=Hv&x=^Yy;PQf+chGH0EVb`G;_K<7X
z4aIJNhh2waw+38?ZYUgfV%MQ?H@XInd)wd^!RF)>-g`$6XD>9vH8bN4fHW$blaG5}
zKcbO7IEQQE!(K0(<MGYO_j`wjKi`A;a1WpKw&Op-&B==Q4a1w*1{QG*fr=j3$WB!B
z+~yv}U=HF;b{fpVdtEaI%P<pn>b0p8p7-o(bE$sa49uh6K7zH%@7$cczxzW^mhbiM
zCa%h?+yU<{;JsGM_Tx+Y`@t>4@m~d~>-+b2&E1#wy*l;9ecq4b@VR~7XZF1|b#q^j
zx%>KFbNBAe$#?etCQVaikSuFPk_SxN*Y%jTcWq9Vdw#NoiSp;}?l)+kzWe6Xt9N@d
zI~cV0Zca|`aM_LYg*}=se{=H49&dF`KQ>^b9^RaM_rNK4t7BxCW$9sL*k#v*V`MkG
z(Zk4Yx4NbsBhGU4Fyc<RTc6l&Ob%^MKD_;`d*B0uX5*2~$p;6|xtrhHXCNNjoP40~
zguBfjVhXstj-2#pxo0-B<(?C+Ww19mWy#*)S@*!3GLgS?U?PtO`Tg|1?nM3+X7DHW
z(RuoweK)5*zHdBz0CseOdMiwy-3_gG=gqL47Z-0Y3)FOWb1L>i?;Snnp}y1xXt?|}
zo0IPuc3x^z{z(6lI$|b<VR?HLQs8_q?CJ3+E8}I5+oGRCH>V!<9`x?Udw-LteBb6I
Lu+#h-A<_Q>sDKA$

diff --git a/llvm/lib/Analysis/models/inliner/saved_model.pbtxt b/llvm/lib/Analysis/models/inliner/saved_model.pbtxt
new file mode 100644
index 0000000000000..ec522a8b7c353
--- /dev/null
+++ b/llvm/lib/Analysis/models/inliner/saved_model.pbtxt
@@ -0,0 +1,32634 @@
+saved_model_schema_version: 1
+meta_graphs {
+  meta_info_def {
+    stripped_op_list {
+      op {
+        name: "Const"
+        output_arg {
+          name: "output"
+          type_attr: "dtype"
+        }
+        attr {
+          name: "value"
+          type: "tensor"
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+      }
+      op {
+        name: "NoOp"
+      }
+      op {
+        name: "PartitionedCall"
+        input_arg {
+          name: "args"
+          type_list_attr: "Tin"
+        }
+        output_arg {
+          name: "output"
+          type_list_attr: "Tout"
+        }
+        attr {
+          name: "Tin"
+          type: "list(type)"
+          has_minimum: true
+        }
+        attr {
+          name: "Tout"
+          type: "list(type)"
+          has_minimum: true
+        }
+        attr {
+          name: "f"
+          type: "func"
+        }
+        attr {
+          name: "config"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "config_proto"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "executor_type"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+      }
+      op {
+        name: "Placeholder"
+        output_arg {
+          name: "output"
+          type_attr: "dtype"
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+        attr {
+          name: "shape"
+          type: "shape"
+          default_value {
+            shape {
+              unknown_rank: true
+            }
+          }
+        }
+      }
+      op {
+        name: "ReadVariableOp"
+        input_arg {
+          name: "resource"
+          type: DT_RESOURCE
+        }
+        output_arg {
+          name: "value"
+          type_attr: "dtype"
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+        is_stateful: true
+      }
+      op {
+        name: "StatefulPartitionedCall"
+        input_arg {
+          name: "args"
+          type_list_attr: "Tin"
+        }
+        output_arg {
+          name: "output"
+          type_list_attr: "Tout"
+        }
+        attr {
+          name: "Tin"
+          type: "list(type)"
+          has_minimum: true
+        }
+        attr {
+          name: "Tout"
+          type: "list(type)"
+          has_minimum: true
+        }
+        attr {
+          name: "f"
+          type: "func"
+        }
+        attr {
+          name: "config"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "config_proto"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "executor_type"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        is_stateful: true
+      }
+      op {
+        name: "VarHandleOp"
+        output_arg {
+          name: "resource"
+          type: DT_RESOURCE
+        }
+        attr {
+          name: "container"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "shared_name"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+        attr {
+          name: "shape"
+          type: "shape"
+        }
+        attr {
+          name: "allowed_devices"
+          type: "list(string)"
+          default_value {
+            list {
+            }
+          }
+        }
+        is_stateful: true
+      }
+    }
+    tags: "serve"
+    tensorflow_version: "1.15.0"
+    tensorflow_git_version: "unknown"
+    stripped_default_attrs: true
+  }
+  graph_def {
+    node {
+      name: "train_step"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "train_step"
+        }
+      }
+    }
+    node {
+      name: "train_step/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "train_step"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+    }
+    node {
+      name: "QNetwork/EncodingNetwork/dense/kernel"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 34
+            }
+            dim {
+              size: 100
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "QNetwork/EncodingNetwork/dense/kernel"
+        }
+      }
+    }
+    node {
+      name: "QNetwork/EncodingNetwork/dense/kernel/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "QNetwork/EncodingNetwork/dense/kernel"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 34
+              }
+              dim {
+                size: 100
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "QNetwork/EncodingNetwork/dense/bias"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 100
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "QNetwork/EncodingNetwork/dense/bias"
+        }
+      }
+    }
+    node {
+      name: "QNetwork/EncodingNetwork/dense/bias/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "QNetwork/EncodingNetwork/dense/bias"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 100
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "QNetwork/EncodingNetwork/dense_1/kernel"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 100
+            }
+            dim {
+              size: 40
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "QNetwork/EncodingNetwork/dense_1/kernel"
+        }
+      }
+    }
+    node {
+      name: "QNetwork/EncodingNetwork/dense_1/kernel/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "QNetwork/EncodingNetwork/dense_1/kernel"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 100
+              }
+              dim {
+                size: 40
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "QNetwork/EncodingNetwork/dense_1/bias"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 40
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "QNetwork/EncodingNetwork/dense_1/bias"
+        }
+      }
+    }
+    node {
+      name: "QNetwork/EncodingNetwork/dense_1/bias/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "QNetwork/EncodingNetwork/dense_1/bias"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 40
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "QNetwork/dense_2/kernel"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 40
+            }
+            dim {
+              size: 2
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "QNetwork/dense_2/kernel"
+        }
+      }
+    }
+    node {
+      name: "QNetwork/dense_2/kernel/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "QNetwork/dense_2/kernel"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 40
+              }
+              dim {
+                size: 2
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "QNetwork/dense_2/bias"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 2
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "QNetwork/dense_2/bias"
+        }
+      }
+    }
+    node {
+      name: "QNetwork/dense_2/bias/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "QNetwork/dense_2/bias"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 2
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "NoOp"
+      op: "NoOp"
+    }
+    node {
+      name: "Const"
+      op: "Const"
+      device: "/device:CPU:0"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+            }
+            string_val: "\nu\n\023\010\001\022\017_time_step_spec\n\024\010\002\022\020_trajectory_spec\n\023\010\003\022\017_wrapped_policy\n\016\010\004\022\ntrain_step\n\023\010\005\022\017model_variables\n\016\010\006\022\nsignatures\n\030\n\017\010\007\022\013observation\n\005\010\007\022\0013\n\030\n\017\010\007\022\013observation\n\005\010\007\022\0011\n;\n\016\010\010\022\n_q_network\n\023\010\001\022\017_time_step_spec\n\024\010\t\022\020_trajectory_spec\nE\022C\n\016VARIABLE_VALUE\022\ntrain_step\032%train_step/.ATTRIBUTES/VARIABLE_VALUE\n*\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\005\010\016\022\0014\n\005\010\017\022\0015\n\000\n\000\n\214\001\n\026\010\020\022\022_input_tensor_spec\n\014\010\021\022\010_encoder\n\022\010\022\022\016_q_value_layer\n\r\010\023\022\tvariables\n\031\010\024\022\025regularization_losses\n\027\010\025\022\023trainable_variables\n\r\010\026\022\tkeras_api\n\030\n\017\010\007\022\013observation\n\005\010\007\022\0011\ng\022e\n\016VARIABLE_VALUE\022%QNetwork/EncodingNetwork/dense/kernel\032,model_variables/0/.ATTRIBUTES/VARIABLE_VALUE\ne\022c\n\016VARIABLE_VALUE\022#QNetwork/EncodingNetwork/dense/bias\032,model_variables/1/.ATTRIBUTES/VARIABLE_VALUE\ni\022g\n\016VARIABLE_VALUE\022\'QNetwork/EncodingNetwork/dense_1/kernel\032,model_variables/2/.ATTRIBUTES/VARIABLE_VALUE\ng\022e\n\016VARIABLE_VALUE\022%QNetwork/EncodingNetwork/dense_1/bias\032,model_variables/3/.ATTRIBUTES/VARIABLE_VALUE\nY\022W\n\016VARIABLE_VALUE\022\027QNetwork/dense_2/kernel\032,model_variables/4/.ATTRIBUTES/VARIABLE_VALUE\nW\022U\n\016VARIABLE_VALUE\022\025QNetwork/dense_2/bias\032,model_variables/5/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\334\001\n\026\010\027\022\022_input_tensor_spec\n\027\010\030\022\023_preprocessing_nest\n\036\010\031\022\032_flat_preprocessing_layers\n\033\010\032\022\027_preprocessing_combiner\n\032\010\033\022\026_postprocessing_layers\n\r\010\034\022\tvariables\n\031\010\035\022\025regularization_losses\n\027\010\036\022\023trainable_variables\n\r\010\037\022\tkeras_api\nh\n\n\010\016\022\006kernel\n\010\010\017\022\004bias\n\r\010 \022\tvariables\n\031\010!\022\025regularization_losses\n\027\010\"\022\023trainable_variables\n\r\010#\022\tkeras_api\n*\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\005\010\016\022\0014\n\005\010\017\022\0015\n\000\n*\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\005\010\016\022\0014\n\005\010\017\022\0015\n\255\001\n\021\010$\022\rlayer_metrics\n\r\010\023\022\tvariables\n\037\010%\022\033layer_regularization_losses\n\013\010&\022\007metrics\n\n\010\'\022\006layers\n\031\010\024\022\025regularization_losses\n\033\010(\022\027non_trainable_variables\n\027\010\025\022\023trainable_variables\n\000\n\000\nV\n\005\010)\022\0010\n\005\010*\022\0011\n\005\010+\022\0012\n\005\010,\022\0013\n\005\010-\022\0014\n\005\010.\022\0015\n\005\010/\022\0016\n\005\0100\022\0017\n\005\0101\022\0018\n\005\0102\022\0019\n\006\0103\022\00210\n\006\0104\022\00211\nR\n\r\0105\022\tvariables\n\031\0106\022\025regularization_losses\n\027\0107\022\023trainable_variables\n\r\0108\022\tkeras_api\n\025\n\005\0109\022\0010\n\005\010:\022\0011\n\005\010;\022\0012\n\034\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\000\n\034\n\005\010\n\022\0010\n\005\010\013\022\0011\n\005\010\014\022\0012\n\005\010\r\022\0013\n\255\001\n\021\010<\022\rlayer_metrics\n\r\010\034\022\tvariables\n\037\010=\022\033layer_regularization_losses\n\013\010>\022\007metrics\n\n\010?\022\006layers\n\031\010\035\022\025regularization_losses\n\033\010@\022\027non_trainable_variables\n\027\010\036\022\023trainable_variables\n\016\n\005\010\016\022\0010\n\005\010\017\022\0011\n\000\n\016\n\005\010\016\022\0010\n\005\010\017\022\0011\n\255\001\n\021\010A\022\rlayer_metrics\n\r\010 \022\tvariables\n\037\010B\022\033layer_regularization_losses\n\013\010C\022\007metrics\n\n\010D\022\006layers\n\031\010!\022\025regularization_losses\n\033\010E\022\027non_trainable_variables\n\027\010\"\022\023trainable_variables\n\000\n\000\n\000\n\016\n\005\010\021\022\0010\n\005\010\022\022\0011\n\000\nR\n\r\010F\022\tvariables\n\031\010G\022\025regularization_losses\n\027\010H\022\023trainable_variables\n\r\010I\022\tkeras_api\nR\n\r\010J\022\tvariables\n\031\010K\022\025regularization_losses\n\027\010L\022\023trainable_variables\n\r\010M\022\tkeras_api\nR\n\r\010N\022\tvariables\n\031\010O\022\025regularization_losses\n\027\010P\022\023trainable_variables\n\r\010Q\022\tkeras_api\nR\n\r\010R\022\tvariables\n\031\010S\022\025regularization_losses\n\027\010T\022\023trainable_variables\n\r\010U\022\tkeras_api\nR\n\r\010V\022\tvariables\n\031\010W\022\025regularization_losses\n\027\010X\022\023trainable_variables\n\r\010Y\022\tkeras_api\nR\n\r\010Z\022\tvariables\n\031\010[\022\025regularization_losses\n\027\010\\\022\023trainable_variables\n\r\010]\022\tkeras_api\nR\n\r\010^\022\tvariables\n\031\010_\022\025regularization_losses\n\027\010`\022\023trainable_variables\n\r\010a\022\tkeras_api\nR\n\r\010b\022\tvariables\n\031\010c\022\025regularization_losses\n\027\010d\022\023trainable_variables\n\r\010e\022\tkeras_api\nR\n\r\010f\022\tvariables\n\031\010g\022\025regularization_losses\n\027\010h\022\023trainable_variables\n\r\010i\022\tkeras_api\nR\n\r\010j\022\tvariables\n\031\010k\022\025regularization_losses\n\027\010l\022\023trainable_variables\n\r\010m\022\tkeras_api\nR\n\r\010n\022\tvariables\n\031\010o\022\025regularization_losses\n\027\010p\022\023trainable_variables\n\r\010q\022\tkeras_api\nR\n\r\010r\022\tvariables\n\031\010s\022\025regularization_losses\n\027\010t\022\023trainable_variables\n\r\010u\022\tkeras_api\n\000\n\000\n\000\n\255\001\n\021\010v\022\rlayer_metrics\n\r\0105\022\tvariables\n\037\010w\022\033layer_regularization_losses\n\013\010x\022\007metrics\n\n\010y\022\006layers\n\031\0106\022\025regularization_losses\n\033\010z\022\027non_trainable_variables\n\027\0107\022\023trainable_variables\nR\n\r\010{\022\tvariables\n\031\010|\022\025regularization_losses\n\027\010}\022\023trainable_variables\n\r\010~\022\tkeras_api\nk\n\n\010\n\022\006kernel\n\010\010\013\022\004bias\n\r\010\177\022\tvariables\n\032\010\200\001\022\025regularization_losses\n\030\010\201\001\022\023trainable_variables\n\016\010\202\001\022\tkeras_api\nl\n\n\010\014\022\006kernel\n\010\010\r\022\004bias\n\016\010\203\001\022\tvariables\n\032\010\204\001\022\025regularization_losses\n\030\010\205\001\022\023trainable_variables\n\016\010\206\001\022\tkeras_api\n\000\n\000\n\000\nv\n\005\010)\022\0010\n\005\010*\022\0011\n\005\010+\022\0012\n\005\010,\022\0013\n\005\010-\022\0014\n\005\010.\022\0015\n\005\010/\022\0016\n\005\0100\022\0017\n\005\0101\022\0018\n\005\0102\022\0019\n\006\0103\022\00210\n\006\0104\022\00211\n\006\010\032\022\00212\n\006\0109\022\00213\n\006\010:\022\00214\n\006\010;\022\00215\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\262\001\n\022\010\207\001\022\rlayer_metrics\n\r\010F\022\tvariables\n \010\210\001\022\033layer_regularization_losses\n\014\010\211\001\022\007metrics\n\013\010\212\001\022\006layers\n\031\010G\022\025regularization_losses\n\034\010\213\001\022\027non_trainable_variables\n\027\010H\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\214\001\022\rlayer_metrics\n\r\010J\022\tvariables\n \010\215\001\022\033layer_regularization_losses\n\014\010\216\001\022\007metrics\n\013\010\217\001\022\006layers\n\031\010K\022\025regularization_losses\n\034\010\220\001\022\027non_trainable_variables\n\027\010L\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\221\001\022\rlayer_metrics\n\r\010N\022\tvariables\n \010\222\001\022\033layer_regularization_losses\n\014\010\223\001\022\007metrics\n\013\010\224\001\022\006layers\n\031\010O\022\025regularization_losses\n\034\010\225\001\022\027non_trainable_variables\n\027\010P\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\226\001\022\rlayer_metrics\n\r\010R\022\tvariables\n \010\227\001\022\033layer_regularization_losses\n\014\010\230\001\022\007metrics\n\013\010\231\001\022\006layers\n\031\010S\022\025regularization_losses\n\034\010\232\001\022\027non_trainable_variables\n\027\010T\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\233\001\022\rlayer_metrics\n\r\010V\022\tvariables\n \010\234\001\022\033layer_regularization_losses\n\014\010\235\001\022\007metrics\n\013\010\236\001\022\006layers\n\031\010W\022\025regularization_losses\n\034\010\237\001\022\027non_trainable_variables\n\027\010X\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\240\001\022\rlayer_metrics\n\r\010Z\022\tvariables\n \010\241\001\022\033layer_regularization_losses\n\014\010\242\001\022\007metrics\n\013\010\243\001\022\006layers\n\031\010[\022\025regularization_losses\n\034\010\244\001\022\027non_trainable_variables\n\027\010\\\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\245\001\022\rlayer_metrics\n\r\010^\022\tvariables\n \010\246\001\022\033layer_regularization_losses\n\014\010\247\001\022\007metrics\n\013\010\250\001\022\006layers\n\031\010_\022\025regularization_losses\n\034\010\251\001\022\027non_trainable_variables\n\027\010`\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\252\001\022\rlayer_metrics\n\r\010b\022\tvariables\n \010\253\001\022\033layer_regularization_losses\n\014\010\254\001\022\007metrics\n\013\010\255\001\022\006layers\n\031\010c\022\025regularization_losses\n\034\010\256\001\022\027non_trainable_variables\n\027\010d\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\257\001\022\rlayer_metrics\n\r\010f\022\tvariables\n \010\260\001\022\033layer_regularization_losses\n\014\010\261\001\022\007metrics\n\013\010\262\001\022\006layers\n\031\010g\022\025regularization_losses\n\034\010\263\001\022\027non_trainable_variables\n\027\010h\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\264\001\022\rlayer_metrics\n\r\010j\022\tvariables\n \010\265\001\022\033layer_regularization_losses\n\014\010\266\001\022\007metrics\n\013\010\267\001\022\006layers\n\031\010k\022\025regularization_losses\n\034\010\270\001\022\027non_trainable_variables\n\027\010l\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\271\001\022\rlayer_metrics\n\r\010n\022\tvariables\n \010\272\001\022\033layer_regularization_losses\n\014\010\273\001\022\007metrics\n\013\010\274\001\022\006layers\n\031\010o\022\025regularization_losses\n\034\010\275\001\022\027non_trainable_variables\n\027\010p\022\023trainable_variables\n\000\n\000\n\000\n\262\001\n\022\010\276\001\022\rlayer_metrics\n\r\010r\022\tvariables\n \010\277\001\022\033layer_regularization_losses\n\014\010\300\001\022\007metrics\n\013\010\301\001\022\006layers\n\031\010s\022\025regularization_losses\n\034\010\302\001\022\027non_trainable_variables\n\027\010t\022\023trainable_variables\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\262\001\n\022\010\303\001\022\rlayer_metrics\n\r\010{\022\tvariables\n \010\304\001\022\033layer_regularization_losses\n\014\010\305\001\022\007metrics\n\013\010\306\001\022\006layers\n\031\010|\022\025regularization_losses\n\034\010\307\001\022\027non_trainable_variables\n\027\010}\022\023trainable_variables\n\016\n\005\010\n\022\0010\n\005\010\013\022\0011\n\000\n\016\n\005\010\n\022\0010\n\005\010\013\022\0011\n\264\001\n\022\010\310\001\022\rlayer_metrics\n\r\010\177\022\tvariables\n \010\311\001\022\033layer_regularization_losses\n\014\010\312\001\022\007metrics\n\013\010\313\001\022\006layers\n\032\010\200\001\022\025regularization_losses\n\034\010\314\001\022\027non_trainable_variables\n\030\010\201\001\022\023trainable_variables\n\016\n\005\010\014\022\0010\n\005\010\r\022\0011\n\000\n\016\n\005\010\014\022\0010\n\005\010\r\022\0011\n\265\001\n\022\010\315\001\022\rlayer_metrics\n\016\010\203\001\022\tvariables\n \010\316\001\022\033layer_regularization_losses\n\014\010\317\001\022\007metrics\n\013\010\320\001\022\006layers\n\032\010\204\001\022\025regularization_losses\n\034\010\321\001\022\027non_trainable_variables\n\030\010\205\001\022\023trainable_variables\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000"
+          }
+        }
+      }
+    }
+    node {
+      name: "action_callee_basic_block_count"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_callee_conditionally_executed_blocks"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_callee_users"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_caller_basic_block_count"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_caller_conditionally_executed_blocks"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_caller_users"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_callsite_height"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_cost_estimate"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_discount"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_edge_count"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_inlining_default"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_node_count"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_nr_ctant_params"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT64
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_reward"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "action_step_type"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "StatefulPartitionedCall"
+      op: "StatefulPartitionedCall"
+      input: "action_callee_basic_block_count"
+      input: "action_callee_conditionally_executed_blocks"
+      input: "action_callee_users"
+      input: "action_caller_basic_block_count"
+      input: "action_caller_conditionally_executed_blocks"
+      input: "action_caller_users"
+      input: "action_callsite_height"
+      input: "action_cost_estimate"
+      input: "action_discount"
+      input: "action_edge_count"
+      input: "action_inlining_default"
+      input: "action_node_count"
+      input: "action_nr_ctant_params"
+      input: "action_reward"
+      input: "action_step_type"
+      input: "QNetwork/EncodingNetwork/dense/kernel"
+      input: "QNetwork/EncodingNetwork/dense/bias"
+      input: "QNetwork/EncodingNetwork/dense_1/kernel"
+      input: "QNetwork/EncodingNetwork/dense_1/bias"
+      input: "QNetwork/dense_2/kernel"
+      input: "QNetwork/dense_2/bias"
+      attr {
+        key: "Tin"
+        value {
+          list {
+            type: DT_INT64
+            type: DT_INT64
+            type: DT_INT64
+            type: DT_INT64
+            type: DT_INT64
+            type: DT_INT64
+            type: DT_INT64
+            type: DT_INT64
+            type: DT_FLOAT
+            type: DT_INT64
+            type: DT_INT64
+            type: DT_INT64
+            type: DT_INT64
+            type: DT_FLOAT
+            type: DT_INT32
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+            type: DT_INT64
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+            i: 15
+            i: 16
+            i: 17
+            i: 18
+            i: 19
+            i: 20
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference_signature_wrapper_4619026"
+          }
+        }
+      }
+    }
+    node {
+      name: "PartitionedCall"
+      op: "PartitionedCall"
+      attr {
+        key: "Tin"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference_signature_wrapper_4619033"
+          }
+        }
+      }
+    }
+    node {
+      name: "StatefulPartitionedCall_1"
+      op: "StatefulPartitionedCall"
+      input: "train_step"
+      attr {
+        key: "Tin"
+        value {
+          list {
+            type: DT_RESOURCE
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+            type: DT_INT64
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+            i: 0
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference_signature_wrapper_4619048"
+          }
+        }
+      }
+    }
+    node {
+      name: "saver_filename"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+    }
+    node {
+      name: "StatefulPartitionedCall_2"
+      op: "StatefulPartitionedCall"
+      input: "saver_filename"
+      input: "train_step/Read/ReadVariableOp"
+      input: "QNetwork/EncodingNetwork/dense/kernel/Read/ReadVariableOp"
+      input: "QNetwork/EncodingNetwork/dense/bias/Read/ReadVariableOp"
+      input: "QNetwork/EncodingNetwork/dense_1/kernel/Read/ReadVariableOp"
+      input: "QNetwork/EncodingNetwork/dense_1/bias/Read/ReadVariableOp"
+      input: "QNetwork/dense_2/kernel/Read/ReadVariableOp"
+      input: "QNetwork/dense_2/bias/Read/ReadVariableOp"
+      input: "Const"
+      attr {
+        key: "Tin"
+        value {
+          list {
+            type: DT_STRING
+            type: DT_INT64
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_STRING
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+            type: DT_STRING
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference__traced_save_4619143"
+          }
+        }
+      }
+    }
+    node {
+      name: "StatefulPartitionedCall_3"
+      op: "StatefulPartitionedCall"
+      input: "saver_filename"
+      input: "train_step"
+      input: "QNetwork/EncodingNetwork/dense/kernel"
+      input: "QNetwork/EncodingNetwork/dense/bias"
+      input: "QNetwork/EncodingNetwork/dense_1/kernel"
+      input: "QNetwork/EncodingNetwork/dense_1/bias"
+      input: "QNetwork/dense_2/kernel"
+      input: "QNetwork/dense_2/bias"
+      attr {
+        key: "Tin"
+        value {
+          list {
+            type: DT_STRING
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+            type: DT_STRING
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference__traced_restore_4619176"
+          }
+        }
+      }
+    }
+    library {
+      function {
+        signature {
+          name: "__inference_signature_wrapper_4619048"
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_INT64
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "unknown"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_INT64
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 0
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_function_with_signature_4619040"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_function_with_signature_4619029"
+        }
+        node_def {
+          name: "PartitionedCall"
+          op: "PartitionedCall"
+          attr {
+            key: "Tin"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_function_722"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "PartitionedCall"
+          }
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_action_931"
+          input_arg {
+            name: "time_step"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "time_step_1"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "time_step_2"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "time_step_3"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_4"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_5"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_6"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_7"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_8"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_9"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_10"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_11"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_12"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_13"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_14"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "qnetwork_encodingnetwork_dense_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "qnetwork_encodingnetwork_dense_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "qnetwork_encodingnetwork_dense_1_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "qnetwork_encodingnetwork_dense_1_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "qnetwork_dense_2_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "qnetwork_dense_2_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_INT64
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_3"
+          input: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 10
+                f: 10
+                f: 11
+                f: 12
+                f: 13
+                f: 14
+                f: 14
+                f: 14
+                f: 16
+                f: 17
+                f: 19
+                f: 23
+                f: 27
+                f: 39
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_4"
+          input: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_1/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda_1/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 3
+                f: 3
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 7
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 9
+                f: 10
+                f: 10
+                f: 10
+                f: 12
+                f: 12
+                f: 12
+                f: 14
+                f: 14
+                f: 18
+                f: 20
+                f: 23
+                f: 30
+                f: 41
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_1/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_1/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda_1/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_1/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_1/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_1/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_1/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda_1/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_1/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_1/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_1/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_1/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_1/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_1/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_1/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_1/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_1/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda_1/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_1/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_1/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_1/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_1/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_5"
+          input: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_2/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda_2/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 23
+                f: 23
+                f: 23
+                f: 24
+                f: 24
+                f: 24
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 26
+                f: 26
+                f: 26
+                f: 27
+                f: 27
+                f: 27
+                f: 27
+                f: 28
+                f: 28
+                f: 29
+                f: 29
+                f: 29
+                f: 29
+                f: 30
+                f: 30
+                f: 31
+                f: 31
+                f: 31
+                f: 31
+                f: 32
+                f: 32
+                f: 33
+                f: 33
+                f: 33
+                f: 34
+                f: 34
+                f: 34
+                f: 34
+                f: 35
+                f: 35
+                f: 36
+                f: 36
+                f: 37
+                f: 37
+                f: 37
+                f: 38
+                f: 38
+                f: 39
+                f: 39
+                f: 40
+                f: 40
+                f: 41
+                f: 41
+                f: 41
+                f: 42
+                f: 43
+                f: 43
+                f: 44
+                f: 44
+                f: 45
+                f: 45
+                f: 46
+                f: 46
+                f: 46
+                f: 47
+                f: 47
+                f: 48
+                f: 49
+                f: 49
+                f: 50
+                f: 50
+                f: 51
+                f: 52
+                f: 53
+                f: 53
+                f: 54
+                f: 55
+                f: 56
+                f: 57
+                f: 57
+                f: 58
+                f: 59
+                f: 60
+                f: 61
+                f: 61
+                f: 63
+                f: 63
+                f: 64
+                f: 65
+                f: 66
+                f: 67
+                f: 67
+                f: 69
+                f: 70
+                f: 71
+                f: 72
+                f: 73
+                f: 74
+                f: 75
+                f: 77
+                f: 78
+                f: 79
+                f: 80
+                f: 81
+                f: 82
+                f: 83
+                f: 85
+                f: 86
+                f: 88
+                f: 89
+                f: 91
+                f: 92
+                f: 94
+                f: 96
+                f: 97
+                f: 99
+                f: 100
+                f: 101
+                f: 103
+                f: 105
+                f: 107
+                f: 109
+                f: 111
+                f: 113
+                f: 115
+                f: 118
+                f: 121
+                f: 123
+                f: 126
+                f: 128
+                f: 130
+                f: 133
+                f: 135
+                f: 137
+                f: 140
+                f: 143
+                f: 146
+                f: 148
+                f: 151
+                f: 154
+                f: 157
+                f: 161
+                f: 163
+                f: 166
+                f: 169
+                f: 173
+                f: 178
+                f: 183
+                f: 189
+                f: 193
+                f: 197
+                f: 202
+                f: 208
+                f: 213
+                f: 218
+                f: 223
+                f: 228
+                f: 233
+                f: 239
+                f: 245
+                f: 250
+                f: 257
+                f: 262
+                f: 269
+                f: 277
+                f: 284
+                f: 292
+                f: 300
+                f: 308
+                f: 319
+                f: 329
+                f: 340
+                f: 349
+                f: 359
+                f: 371
+                f: 382
+                f: 394
+                f: 410
+                f: 423
+                f: 435
+                f: 445
+                f: 462
+                f: 480
+                f: 492
+                f: 506
+                f: 519
+                f: 536
+                f: 557
+                f: 577
+                f: 598
+                f: 622
+                f: 655
+                f: 679
+                f: 707
+                f: 733
+                f: 751
+                f: 787
+                f: 814
+                f: 847
+                f: 897
+                f: 934
+                f: 997
+                f: 1062
+                f: 1111
+                f: 1181
+                f: 1275
+                f: 1385
+                f: 1465
+                f: 1603
+                f: 1769
+                f: 2057
+                f: 2257
+                f: 2803
+                f: 3468
+                f: 4417
+                f: 6538
+                f: 16126
+                f: 23446
+                f: 33536
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_2/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_2/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda_2/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_2/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_2/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_2/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_2/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda_2/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_2/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_2/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_2/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_2/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_2/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_2/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_2/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_2/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_2/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda_2/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_2/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_2/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_2/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_2/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_6"
+          input: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_3/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda_3/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 23
+                f: 23
+                f: 23
+                f: 24
+                f: 24
+                f: 24
+                f: 24
+                f: 24
+                f: 24
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 26
+                f: 26
+                f: 26
+                f: 26
+                f: 27
+                f: 27
+                f: 27
+                f: 27
+                f: 27
+                f: 28
+                f: 28
+                f: 28
+                f: 29
+                f: 29
+                f: 29
+                f: 29
+                f: 30
+                f: 30
+                f: 30
+                f: 31
+                f: 31
+                f: 31
+                f: 32
+                f: 32
+                f: 32
+                f: 33
+                f: 33
+                f: 33
+                f: 34
+                f: 34
+                f: 34
+                f: 34
+                f: 35
+                f: 35
+                f: 35
+                f: 36
+                f: 36
+                f: 36
+                f: 37
+                f: 37
+                f: 37
+                f: 38
+                f: 38
+                f: 38
+                f: 38
+                f: 39
+                f: 39
+                f: 40
+                f: 40
+                f: 41
+                f: 41
+                f: 42
+                f: 43
+                f: 43
+                f: 44
+                f: 45
+                f: 45
+                f: 46
+                f: 47
+                f: 47
+                f: 48
+                f: 49
+                f: 49
+                f: 50
+                f: 50
+                f: 52
+                f: 52
+                f: 53
+                f: 54
+                f: 55
+                f: 55
+                f: 57
+                f: 58
+                f: 59
+                f: 60
+                f: 62
+                f: 64
+                f: 65
+                f: 66
+                f: 68
+                f: 70
+                f: 70
+                f: 70
+                f: 70
+                f: 70
+                f: 71
+                f: 73
+                f: 75
+                f: 76
+                f: 78
+                f: 81
+                f: 84
+                f: 86
+                f: 90
+                f: 94
+                f: 98
+                f: 101
+                f: 106
+                f: 111
+                f: 117
+                f: 123
+                f: 130
+                f: 138
+                f: 146
+                f: 157
+                f: 163
+                f: 176
+                f: 187
+                f: 198
+                f: 214
+                f: 227
+                f: 252
+                f: 280
+                f: 327
+                f: 395
+                f: 506
+                f: 671
+                f: 1025
+                f: 1971
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_3/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_3/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda_3/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_3/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_3/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_3/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_3/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda_3/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_3/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_3/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_3/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_3/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_3/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_3/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_3/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_3/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_3/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda_3/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_3/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_3/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_3/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_3/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_7"
+          input: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_4/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda_4/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 5
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 7
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 11
+                f: 11
+                f: 11
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 13
+                f: 13
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 19
+                f: 19
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 21
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 24
+                f: 24
+                f: 24
+                f: 24
+                f: 24
+                f: 24
+                f: 24
+                f: 24
+                f: 25
+                f: 26
+                f: 26
+                f: 26
+                f: 26
+                f: 26
+                f: 26
+                f: 26
+                f: 26
+                f: 26
+                f: 26
+                f: 27
+                f: 28
+                f: 28
+                f: 28
+                f: 28
+                f: 28
+                f: 29
+                f: 30
+                f: 30
+                f: 30
+                f: 30
+                f: 30
+                f: 30
+                f: 31
+                f: 32
+                f: 32
+                f: 32
+                f: 32
+                f: 32
+                f: 34
+                f: 34
+                f: 34
+                f: 34
+                f: 34
+                f: 34
+                f: 35
+                f: 36
+                f: 36
+                f: 36
+                f: 37
+                f: 38
+                f: 38
+                f: 38
+                f: 39
+                f: 40
+                f: 40
+                f: 41
+                f: 42
+                f: 42
+                f: 43
+                f: 44
+                f: 44
+                f: 46
+                f: 46
+                f: 47
+                f: 48
+                f: 48
+                f: 50
+                f: 50
+                f: 52
+                f: 52
+                f: 54
+                f: 55
+                f: 55
+                f: 56
+                f: 57
+                f: 58
+                f: 60
+                f: 60
+                f: 60
+                f: 60
+                f: 60
+                f: 60
+                f: 62
+                f: 62
+                f: 64
+                f: 65
+                f: 66
+                f: 68
+                f: 70
+                f: 72
+                f: 74
+                f: 77
+                f: 80
+                f: 82
+                f: 86
+                f: 89
+                f: 92
+                f: 96
+                f: 99
+                f: 104
+                f: 108
+                f: 114
+                f: 119
+                f: 125
+                f: 131
+                f: 139
+                f: 146
+                f: 157
+                f: 167
+                f: 176
+                f: 188
+                f: 198
+                f: 215
+                f: 236
+                f: 262
+                f: 306
+                f: 376
+                f: 462
+                f: 596
+                f: 942
+                f: 1428
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_4/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_4/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda_4/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_4/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_4/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_4/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_4/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda_4/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_4/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_4/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_4/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_4/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_4/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_4/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_4/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_4/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_4/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda_4/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_4/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_4/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_4/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_4/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_8"
+          input: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_5/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda_5/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 10
+                f: 10
+                f: 11
+                f: 11
+                f: 12
+                f: 13
+                f: 14
+                f: 15
+                f: 16
+                f: 18
+                f: 20
+                f: 23
+                f: 29
+                f: 38
+                f: 60
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_5/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_5/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda_5/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_5/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_5/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_5/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_5/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda_5/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_5/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_5/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_5/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_5/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_5/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_5/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_5/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_5/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_5/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda_5/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_5/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_5/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_5/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_5/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_9"
+          input: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_6/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda_6/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 3
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 4
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 6
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 7
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 8
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 9
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 11
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 12
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 13
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 14
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 16
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 17
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 18
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 19
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 21
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 22
+                f: 23
+                f: 23
+                f: 23
+                f: 23
+                f: 23
+                f: 23
+                f: 23
+                f: 24
+                f: 24
+                f: 24
+                f: 24
+                f: 24
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 26
+                f: 26
+                f: 26
+                f: 26
+                f: 27
+                f: 27
+                f: 27
+                f: 28
+                f: 28
+                f: 28
+                f: 29
+                f: 29
+                f: 30
+                f: 30
+                f: 30
+                f: 31
+                f: 31
+                f: 32
+                f: 32
+                f: 33
+                f: 33
+                f: 34
+                f: 35
+                f: 37
+                f: 38
+                f: 40
+                f: 46
+                f: 51
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_6/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_6/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda_6/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_6/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_6/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_6/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_6/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda_6/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_6/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_6/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_6/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_6/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_6/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_6/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_6/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_6/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_6/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda_6/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_6/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_6/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_6/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_6/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_10"
+          input: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_7/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda_7/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: -15035
+                f: -15030
+                f: -15025
+                f: -15000
+                f: -14985
+                f: -14945
+                f: -14745
+                f: -70
+                f: -55
+                f: -55
+                f: -50
+                f: -50
+                f: -50
+                f: -45
+                f: -45
+                f: -45
+                f: -45
+                f: -45
+                f: -45
+                f: -45
+                f: -45
+                f: -45
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -40
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -35
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -30
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -25
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -20
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -15
+                f: -10
+                f: -10
+                f: -10
+                f: -10
+                f: -10
+                f: -10
+                f: -10
+                f: -10
+                f: -10
+                f: -10
+                f: -10
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: -5
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 5
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 10
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 15
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 20
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 25
+                f: 30
+                f: 30
+                f: 30
+                f: 30
+                f: 30
+                f: 30
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 35
+                f: 40
+                f: 40
+                f: 40
+                f: 40
+                f: 40
+                f: 40
+                f: 40
+                f: 40
+                f: 40
+                f: 40
+                f: 40
+                f: 40
+                f: 45
+                f: 45
+                f: 45
+                f: 45
+                f: 45
+                f: 45
+                f: 45
+                f: 45
+                f: 45
+                f: 45
+                f: 50
+                f: 50
+                f: 50
+                f: 50
+                f: 50
+                f: 50
+                f: 50
+                f: 50
+                f: 50
+                f: 55
+                f: 55
+                f: 60
+                f: 60
+                f: 60
+                f: 60
+                f: 60
+                f: 60
+                f: 60
+                f: 60
+                f: 60
+                f: 60
+                f: 65
+                f: 65
+                f: 65
+                f: 65
+                f: 65
+                f: 65
+                f: 65
+                f: 65
+                f: 65
+                f: 65
+                f: 65
+                f: 65
+                f: 70
+                f: 70
+                f: 70
+                f: 70
+                f: 70
+                f: 70
+                f: 70
+                f: 75
+                f: 75
+                f: 80
+                f: 80
+                f: 80
+                f: 85
+                f: 85
+                f: 85
+                f: 90
+                f: 90
+                f: 90
+                f: 90
+                f: 95
+                f: 95
+                f: 100
+                f: 100
+                f: 105
+                f: 110
+                f: 115
+                f: 120
+                f: 125
+                f: 125
+                f: 130
+                f: 140
+                f: 140
+                f: 145
+                f: 150
+                f: 155
+                f: 160
+                f: 160
+                f: 165
+                f: 170
+                f: 175
+                f: 180
+                f: 190
+                f: 200
+                f: 210
+                f: 215
+                f: 220
+                f: 220
+                f: 230
+                f: 235
+                f: 245
+                f: 250
+                f: 260
+                f: 275
+                f: 290
+                f: 305
+                f: 325
+                f: 350
+                f: 370
+                f: 390
+                f: 425
+                f: 460
+                f: 500
+                f: 560
+                f: 650
+                f: 790
+                f: 1025
+                f: 1600
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_7/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_7/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda_7/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_7/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_7/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_7/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_7/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda_7/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_7/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_7/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_7/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_7/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_7/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_7/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_7/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_7/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_7/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda_7/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_7/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_7/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_7/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_7/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_11"
+          input: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_8/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda_8/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: 18
+                f: 29
+                f: 39
+                f: 48
+                f: 57
+                f: 64
+                f: 70
+                f: 76
+                f: 82
+                f: 87
+                f: 92
+                f: 97
+                f: 101
+                f: 105
+                f: 109
+                f: 113
+                f: 116
+                f: 120
+                f: 123
+                f: 127
+                f: 130
+                f: 134
+                f: 137
+                f: 140
+                f: 143
+                f: 146
+                f: 149
+                f: 152
+                f: 156
+                f: 159
+                f: 162
+                f: 165
+                f: 168
+                f: 171
+                f: 174
+                f: 177
+                f: 180
+                f: 183
+                f: 186
+                f: 188
+                f: 191
+                f: 194
+                f: 197
+                f: 200
+                f: 203
+                f: 205
+                f: 208
+                f: 211
+                f: 214
+                f: 217
+                f: 219
+                f: 222
+                f: 225
+                f: 228
+                f: 231
+                f: 233
+                f: 236
+                f: 239
+                f: 242
+                f: 244
+                f: 247
+                f: 250
+                f: 253
+                f: 255
+                f: 258
+                f: 261
+                f: 264
+                f: 266
+                f: 269
+                f: 272
+                f: 275
+                f: 278
+                f: 280
+                f: 283
+                f: 286
+                f: 289
+                f: 292
+                f: 294
+                f: 297
+                f: 300
+                f: 303
+                f: 305
+                f: 308
+                f: 311
+                f: 314
+                f: 317
+                f: 319
+                f: 322
+                f: 325
+                f: 327
+                f: 330
+                f: 333
+                f: 336
+                f: 339
+                f: 341
+                f: 344
+                f: 347
+                f: 350
+                f: 353
+                f: 355
+                f: 358
+                f: 361
+                f: 364
+                f: 367
+                f: 370
+                f: 373
+                f: 375
+                f: 378
+                f: 381
+                f: 384
+                f: 387
+                f: 390
+                f: 393
+                f: 396
+                f: 399
+                f: 401
+                f: 404
+                f: 407
+                f: 410
+                f: 413
+                f: 416
+                f: 419
+                f: 422
+                f: 425
+                f: 428
+                f: 431
+                f: 434
+                f: 437
+                f: 440
+                f: 443
+                f: 446
+                f: 449
+                f: 452
+                f: 455
+                f: 458
+                f: 461
+                f: 464
+                f: 467
+                f: 470
+                f: 473
+                f: 476
+                f: 479
+                f: 483
+                f: 486
+                f: 489
+                f: 492
+                f: 495
+                f: 498
+                f: 501
+                f: 504
+                f: 507
+                f: 511
+                f: 514
+                f: 517
+                f: 520
+                f: 523
+                f: 526
+                f: 530
+                f: 533
+                f: 536
+                f: 539
+                f: 542
+                f: 545
+                f: 549
+                f: 552
+                f: 555
+                f: 558
+                f: 562
+                f: 565
+                f: 569
+                f: 572
+                f: 575
+                f: 579
+                f: 582
+                f: 585
+                f: 589
+                f: 592
+                f: 595
+                f: 599
+                f: 602
+                f: 605
+                f: 609
+                f: 612
+                f: 616
+                f: 620
+                f: 623
+                f: 626
+                f: 630
+                f: 634
+                f: 637
+                f: 641
+                f: 644
+                f: 648
+                f: 651
+                f: 655
+                f: 658
+                f: 662
+                f: 665
+                f: 669
+                f: 672
+                f: 676
+                f: 680
+                f: 683
+                f: 687
+                f: 691
+                f: 694
+                f: 698
+                f: 702
+                f: 705
+                f: 709
+                f: 712
+                f: 716
+                f: 720
+                f: 724
+                f: 727
+                f: 731
+                f: 735
+                f: 739
+                f: 742
+                f: 746
+                f: 750
+                f: 754
+                f: 758
+                f: 761
+                f: 765
+                f: 769
+                f: 773
+                f: 777
+                f: 780
+                f: 784
+                f: 788
+                f: 792
+                f: 796
+                f: 800
+                f: 804
+                f: 808
+                f: 812
+                f: 816
+                f: 820
+                f: 823
+                f: 828
+                f: 832
+                f: 836
+                f: 840
+                f: 844
+                f: 848
+                f: 852
+                f: 856
+                f: 860
+                f: 864
+                f: 868
+                f: 873
+                f: 877
+                f: 881
+                f: 885
+                f: 889
+                f: 893
+                f: 897
+                f: 902
+                f: 906
+                f: 910
+                f: 914
+                f: 919
+                f: 923
+                f: 927
+                f: 931
+                f: 935
+                f: 940
+                f: 944
+                f: 948
+                f: 953
+                f: 957
+                f: 962
+                f: 966
+                f: 970
+                f: 975
+                f: 979
+                f: 984
+                f: 988
+                f: 993
+                f: 997
+                f: 1002
+                f: 1006
+                f: 1011
+                f: 1015
+                f: 1020
+                f: 1024
+                f: 1029
+                f: 1034
+                f: 1038
+                f: 1043
+                f: 1047
+                f: 1052
+                f: 1057
+                f: 1062
+                f: 1066
+                f: 1071
+                f: 1076
+                f: 1081
+                f: 1086
+                f: 1090
+                f: 1095
+                f: 1100
+                f: 1105
+                f: 1110
+                f: 1114
+                f: 1119
+                f: 1124
+                f: 1129
+                f: 1134
+                f: 1139
+                f: 1144
+                f: 1149
+                f: 1154
+                f: 1159
+                f: 1164
+                f: 1169
+                f: 1174
+                f: 1179
+                f: 1184
+                f: 1189
+                f: 1194
+                f: 1199
+                f: 1204
+                f: 1209
+                f: 1215
+                f: 1220
+                f: 1225
+                f: 1230
+                f: 1235
+                f: 1241
+                f: 1246
+                f: 1251
+                f: 1257
+                f: 1262
+                f: 1267
+                f: 1273
+                f: 1278
+                f: 1284
+                f: 1289
+                f: 1294
+                f: 1300
+                f: 1305
+                f: 1311
+                f: 1316
+                f: 1322
+                f: 1327
+                f: 1333
+                f: 1338
+                f: 1344
+                f: 1350
+                f: 1355
+                f: 1361
+                f: 1367
+                f: 1372
+                f: 1378
+                f: 1383
+                f: 1389
+                f: 1395
+                f: 1401
+                f: 1407
+                f: 1413
+                f: 1418
+                f: 1424
+                f: 1430
+                f: 1436
+                f: 1442
+                f: 1448
+                f: 1454
+                f: 1459
+                f: 1465
+                f: 1472
+                f: 1477
+                f: 1483
+                f: 1489
+                f: 1495
+                f: 1501
+                f: 1507
+                f: 1514
+                f: 1520
+                f: 1526
+                f: 1532
+                f: 1538
+                f: 1545
+                f: 1551
+                f: 1557
+                f: 1564
+                f: 1570
+                f: 1576
+                f: 1583
+                f: 1589
+                f: 1596
+                f: 1602
+                f: 1608
+                f: 1615
+                f: 1621
+                f: 1628
+                f: 1634
+                f: 1641
+                f: 1647
+                f: 1654
+                f: 1661
+                f: 1667
+                f: 1674
+                f: 1681
+                f: 1687
+                f: 1694
+                f: 1701
+                f: 1708
+                f: 1715
+                f: 1722
+                f: 1729
+                f: 1735
+                f: 1742
+                f: 1749
+                f: 1756
+                f: 1763
+                f: 1770
+                f: 1777
+                f: 1784
+                f: 1791
+                f: 1798
+                f: 1806
+                f: 1812
+                f: 1820
+                f: 1827
+                f: 1835
+                f: 1841
+                f: 1849
+                f: 1856
+                f: 1863
+                f: 1871
+                f: 1878
+                f: 1885
+                f: 1893
+                f: 1901
+                f: 1908
+                f: 1915
+                f: 1923
+                f: 1930
+                f: 1938
+                f: 1946
+                f: 1953
+                f: 1961
+                f: 1969
+                f: 1976
+                f: 1984
+                f: 1992
+                f: 2000
+                f: 2007
+                f: 2015
+                f: 2023
+                f: 2031
+                f: 2039
+                f: 2047
+                f: 2055
+                f: 2063
+                f: 2071
+                f: 2079
+                f: 2087
+                f: 2095
+                f: 2104
+                f: 2112
+                f: 2120
+                f: 2128
+                f: 2137
+                f: 2146
+                f: 2154
+                f: 2162
+                f: 2171
+                f: 2179
+                f: 2188
+                f: 2197
+                f: 2205
+                f: 2214
+                f: 2223
+                f: 2232
+                f: 2241
+                f: 2250
+                f: 2258
+                f: 2268
+                f: 2277
+                f: 2285
+                f: 2294
+                f: 2304
+                f: 2313
+                f: 2322
+                f: 2331
+                f: 2340
+                f: 2350
+                f: 2359
+                f: 2368
+                f: 2378
+                f: 2388
+                f: 2397
+                f: 2407
+                f: 2416
+                f: 2426
+                f: 2436
+                f: 2446
+                f: 2455
+                f: 2465
+                f: 2475
+                f: 2485
+                f: 2495
+                f: 2505
+                f: 2515
+                f: 2525
+                f: 2535
+                f: 2545
+                f: 2556
+                f: 2566
+                f: 2577
+                f: 2587
+                f: 2598
+                f: 2609
+                f: 2620
+                f: 2631
+                f: 2641
+                f: 2652
+                f: 2663
+                f: 2674
+                f: 2685
+                f: 2696
+                f: 2708
+                f: 2719
+                f: 2730
+                f: 2742
+                f: 2753
+                f: 2764
+                f: 2776
+                f: 2788
+                f: 2799
+                f: 2811
+                f: 2823
+                f: 2835
+                f: 2847
+                f: 2858
+                f: 2870
+                f: 2882
+                f: 2894
+                f: 2906
+                f: 2919
+                f: 2931
+                f: 2943
+                f: 2956
+                f: 2968
+                f: 2981
+                f: 2994
+                f: 3006
+                f: 3019
+                f: 3032
+                f: 3045
+                f: 3058
+                f: 3070
+                f: 3083
+                f: 3096
+                f: 3109
+                f: 3121
+                f: 3134
+                f: 3148
+                f: 3161
+                f: 3174
+                f: 3187
+                f: 3200
+                f: 3214
+                f: 3228
+                f: 3242
+                f: 3255
+                f: 3268
+                f: 3283
+                f: 3297
+                f: 3310
+                f: 3325
+                f: 3340
+                f: 3353
+                f: 3368
+                f: 3383
+                f: 3398
+                f: 3412
+                f: 3427
+                f: 3442
+                f: 3457
+                f: 3471
+                f: 3487
+                f: 3502
+                f: 3516
+                f: 3531
+                f: 3546
+                f: 3561
+                f: 3577
+                f: 3593
+                f: 3608
+                f: 3625
+                f: 3641
+                f: 3657
+                f: 3673
+                f: 3690
+                f: 3706
+                f: 3722
+                f: 3738
+                f: 3755
+                f: 3772
+                f: 3789
+                f: 3805
+                f: 3823
+                f: 3839
+                f: 3856
+                f: 3873
+                f: 3891
+                f: 3908
+                f: 3926
+                f: 3944
+                f: 3960
+                f: 3977
+                f: 3995
+                f: 4013
+                f: 4031
+                f: 4048
+                f: 4067
+                f: 4085
+                f: 4104
+                f: 4122
+                f: 4140
+                f: 4159
+                f: 4177
+                f: 4196
+                f: 4215
+                f: 4234
+                f: 4253
+                f: 4272
+                f: 4291
+                f: 4311
+                f: 4332
+                f: 4351
+                f: 4371
+                f: 4391
+                f: 4412
+                f: 4433
+                f: 4454
+                f: 4474
+                f: 4496
+                f: 4518
+                f: 4538
+                f: 4558
+                f: 4579
+                f: 4601
+                f: 4619
+                f: 4640
+                f: 4662
+                f: 4684
+                f: 4706
+                f: 4728
+                f: 4751
+                f: 4771
+                f: 4794
+                f: 4818
+                f: 4840
+                f: 4863
+                f: 4887
+                f: 4910
+                f: 4933
+                f: 4956
+                f: 4980
+                f: 5004
+                f: 5028
+                f: 5052
+                f: 5076
+                f: 5100
+                f: 5125
+                f: 5152
+                f: 5175
+                f: 5200
+                f: 5226
+                f: 5251
+                f: 5278
+                f: 5304
+                f: 5329
+                f: 5354
+                f: 5381
+                f: 5407
+                f: 5433
+                f: 5460
+                f: 5488
+                f: 5516
+                f: 5544
+                f: 5573
+                f: 5600
+                f: 5628
+                f: 5656
+                f: 5684
+                f: 5713
+                f: 5741
+                f: 5771
+                f: 5799
+                f: 5830
+                f: 5860
+                f: 5891
+                f: 5921
+                f: 5951
+                f: 5980
+                f: 6010
+                f: 6041
+                f: 6073
+                f: 6105
+                f: 6133
+                f: 6163
+                f: 6195
+                f: 6227
+                f: 6258
+                f: 6291
+                f: 6322
+                f: 6356
+                f: 6390
+                f: 6424
+                f: 6457
+                f: 6491
+                f: 6527
+                f: 6561
+                f: 6596
+                f: 6631
+                f: 6665
+                f: 6701
+                f: 6736
+                f: 6771
+                f: 6805
+                f: 6840
+                f: 6877
+                f: 6911
+                f: 6947
+                f: 6985
+                f: 7022
+                f: 7059
+                f: 7097
+                f: 7135
+                f: 7174
+                f: 7212
+                f: 7251
+                f: 7289
+                f: 7327
+                f: 7366
+                f: 7406
+                f: 7447
+                f: 7486
+                f: 7525
+                f: 7566
+                f: 7606
+                f: 7646
+                f: 7688
+                f: 7728
+                f: 7771
+                f: 7814
+                f: 7859
+                f: 7901
+                f: 7949
+                f: 7992
+                f: 8036
+                f: 8082
+                f: 8127
+                f: 8173
+                f: 8218
+                f: 8262
+                f: 8309
+                f: 8353
+                f: 8397
+                f: 8444
+                f: 8489
+                f: 8539
+                f: 8585
+                f: 8632
+                f: 8682
+                f: 8727
+                f: 8777
+                f: 8828
+                f: 8879
+                f: 8929
+                f: 8982
+                f: 9037
+                f: 9087
+                f: 9140
+                f: 9193
+                f: 9250
+                f: 9305
+                f: 9361
+                f: 9418
+                f: 9475
+                f: 9532
+                f: 9589
+                f: 9644
+                f: 9699
+                f: 9758
+                f: 9818
+                f: 9875
+                f: 9935
+                f: 9997
+                f: 10057
+                f: 10117
+                f: 10174
+                f: 10232
+                f: 10296
+                f: 10356
+                f: 10419
+                f: 10482
+                f: 10546
+                f: 10608
+                f: 10670
+                f: 10729
+                f: 10790
+                f: 10855
+                f: 10920
+                f: 10990
+                f: 11054
+                f: 11118
+                f: 11181
+                f: 11248
+                f: 11316
+                f: 11385
+                f: 11454
+                f: 11526
+                f: 11597
+                f: 11667
+                f: 11740
+                f: 11820
+                f: 11897
+                f: 11973
+                f: 12046
+                f: 12126
+                f: 12204
+                f: 12287
+                f: 12370
+                f: 12456
+                f: 12538
+                f: 12627
+                f: 12714
+                f: 12799
+                f: 12883
+                f: 12971
+                f: 13062
+                f: 13154
+                f: 13233
+                f: 13328
+                f: 13418
+                f: 13511
+                f: 13607
+                f: 13709
+                f: 13806
+                f: 13903
+                f: 14002
+                f: 14104
+                f: 14200
+                f: 14288
+                f: 14391
+                f: 14488
+                f: 14590
+                f: 14698
+                f: 14808
+                f: 14910
+                f: 15020
+                f: 15126
+                f: 15238
+                f: 15347
+                f: 15456
+                f: 15574
+                f: 15692
+                f: 15786
+                f: 15896
+                f: 16016
+                f: 16136
+                f: 16250
+                f: 16352
+                f: 16474
+                f: 16575
+                f: 16702
+                f: 16835
+                f: 16965
+                f: 17096
+                f: 17232
+                f: 17370
+                f: 17443
+                f: 17581
+                f: 17719
+                f: 17864
+                f: 17976
+                f: 18116
+                f: 18250
+                f: 18396
+                f: 18540
+                f: 18690
+                f: 18840
+                f: 18989
+                f: 19136
+                f: 19294
+                f: 19445
+                f: 19589
+                f: 19750
+                f: 19905
+                f: 20064
+                f: 20191
+                f: 20325
+                f: 20497
+                f: 20662
+                f: 20833
+                f: 20981
+                f: 21152
+                f: 21334
+                f: 21510
+                f: 21642
+                f: 21821
+                f: 22001
+                f: 22186
+                f: 22379
+                f: 22568
+                f: 22770
+                f: 22958
+                f: 23162
+                f: 23360
+                f: 23524
+                f: 23737
+                f: 23960
+                f: 24175
+                f: 24395
+                f: 24631
+                f: 24865
+                f: 25091
+                f: 25327
+                f: 25580
+                f: 25833
+                f: 26089
+                f: 26361
+                f: 26636
+                f: 26889
+                f: 27155
+                f: 27436
+                f: 27715
+                f: 28003
+                f: 28303
+                f: 28600
+                f: 28916
+                f: 29223
+                f: 29553
+                f: 29884
+                f: 30200
+                f: 30538
+                f: 30868
+                f: 31211
+                f: 31548
+                f: 31881
+                f: 32253
+                f: 32605
+                f: 32980
+                f: 33385
+                f: 33805
+                f: 34254
+                f: 34723
+                f: 35167
+                f: 35666
+                f: 36125
+                f: 36652
+                f: 37177
+                f: 37739
+                f: 38321
+                f: 38932
+                f: 39640
+                f: 40337
+                f: 41000
+                f: 41626
+                f: 42385
+                f: 43122
+                f: 43890
+                f: 44687
+                f: 45609
+                f: 46520
+                f: 47489
+                f: 48432
+                f: 49458
+                f: 50511
+                f: 51561
+                f: 52568
+                f: 53676
+                f: 54936
+                f: 56071
+                f: 57302
+                f: 58513
+                f: 59800
+                f: 61192
+                f: 62702
+                f: 64205
+                f: 65868
+                f: 67780
+                f: 69960
+                f: 72330
+                f: 74918
+                f: 77540
+                f: 80344
+                f: 83727
+                f: 87662
+                f: 93589
+                f: 101441
+                f: 110544
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_8/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_8/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda_8/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_8/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_8/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_8/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_8/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda_8/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_8/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_8/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_8/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_8/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_8/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_8/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_8/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_8/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_8/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda_8/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_8/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_8/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_8/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_8/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_12"
+          input: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_9/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_9/zeros_like"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                float_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_9/zeros_like"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_13"
+          input: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_10/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda_10/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: 13
+                f: 38
+                f: 56
+                f: 70
+                f: 82
+                f: 94
+                f: 104
+                f: 114
+                f: 123
+                f: 131
+                f: 139
+                f: 148
+                f: 152
+                f: 153
+                f: 158
+                f: 163
+                f: 170
+                f: 174
+                f: 178
+                f: 180
+                f: 183
+                f: 186
+                f: 188
+                f: 190
+                f: 192
+                f: 196
+                f: 198
+                f: 201
+                f: 205
+                f: 208
+                f: 212
+                f: 215
+                f: 219
+                f: 221
+                f: 225
+                f: 227
+                f: 229
+                f: 232
+                f: 233
+                f: 236
+                f: 239
+                f: 242
+                f: 245
+                f: 248
+                f: 250
+                f: 252
+                f: 254
+                f: 256
+                f: 259
+                f: 261
+                f: 264
+                f: 267
+                f: 270
+                f: 272
+                f: 275
+                f: 278
+                f: 280
+                f: 283
+                f: 285
+                f: 287
+                f: 290
+                f: 293
+                f: 295
+                f: 297
+                f: 300
+                f: 303
+                f: 305
+                f: 308
+                f: 311
+                f: 313
+                f: 316
+                f: 319
+                f: 322
+                f: 325
+                f: 329
+                f: 331
+                f: 333
+                f: 336
+                f: 338
+                f: 340
+                f: 343
+                f: 345
+                f: 347
+                f: 347
+                f: 349
+                f: 351
+                f: 353
+                f: 355
+                f: 357
+                f: 359
+                f: 361
+                f: 363
+                f: 365
+                f: 368
+                f: 369
+                f: 371
+                f: 373
+                f: 375
+                f: 377
+                f: 380
+                f: 382
+                f: 385
+                f: 387
+                f: 389
+                f: 391
+                f: 394
+                f: 396
+                f: 398
+                f: 400
+                f: 403
+                f: 405
+                f: 408
+                f: 410
+                f: 412
+                f: 415
+                f: 417
+                f: 420
+                f: 422
+                f: 425
+                f: 427
+                f: 429
+                f: 432
+                f: 434
+                f: 437
+                f: 439
+                f: 442
+                f: 444
+                f: 446
+                f: 449
+                f: 451
+                f: 454
+                f: 456
+                f: 458
+                f: 461
+                f: 463
+                f: 466
+                f: 469
+                f: 472
+                f: 474
+                f: 476
+                f: 479
+                f: 482
+                f: 483
+                f: 486
+                f: 489
+                f: 492
+                f: 495
+                f: 498
+                f: 500
+                f: 503
+                f: 505
+                f: 508
+                f: 510
+                f: 513
+                f: 516
+                f: 519
+                f: 522
+                f: 524
+                f: 528
+                f: 530
+                f: 533
+                f: 536
+                f: 539
+                f: 541
+                f: 544
+                f: 547
+                f: 550
+                f: 553
+                f: 556
+                f: 559
+                f: 561
+                f: 563
+                f: 567
+                f: 570
+                f: 572
+                f: 575
+                f: 577
+                f: 580
+                f: 584
+                f: 586
+                f: 589
+                f: 592
+                f: 595
+                f: 598
+                f: 601
+                f: 605
+                f: 607
+                f: 611
+                f: 613
+                f: 617
+                f: 620
+                f: 623
+                f: 626
+                f: 629
+                f: 632
+                f: 635
+                f: 639
+                f: 642
+                f: 645
+                f: 648
+                f: 651
+                f: 654
+                f: 657
+                f: 660
+                f: 662
+                f: 666
+                f: 669
+                f: 672
+                f: 676
+                f: 679
+                f: 682
+                f: 685
+                f: 688
+                f: 690
+                f: 693
+                f: 696
+                f: 699
+                f: 702
+                f: 705
+                f: 709
+                f: 712
+                f: 714
+                f: 718
+                f: 721
+                f: 724
+                f: 726
+                f: 728
+                f: 729
+                f: 731
+                f: 734
+                f: 737
+                f: 741
+                f: 745
+                f: 748
+                f: 750
+                f: 753
+                f: 756
+                f: 760
+                f: 763
+                f: 766
+                f: 770
+                f: 773
+                f: 776
+                f: 779
+                f: 782
+                f: 786
+                f: 788
+                f: 793
+                f: 796
+                f: 798
+                f: 802
+                f: 805
+                f: 808
+                f: 811
+                f: 815
+                f: 818
+                f: 820
+                f: 824
+                f: 827
+                f: 829
+                f: 832
+                f: 835
+                f: 838
+                f: 842
+                f: 846
+                f: 849
+                f: 854
+                f: 857
+                f: 860
+                f: 864
+                f: 867
+                f: 871
+                f: 875
+                f: 879
+                f: 882
+                f: 887
+                f: 890
+                f: 893
+                f: 897
+                f: 901
+                f: 905
+                f: 908
+                f: 911
+                f: 915
+                f: 918
+                f: 921
+                f: 925
+                f: 929
+                f: 932
+                f: 934
+                f: 937
+                f: 940
+                f: 943
+                f: 946
+                f: 950
+                f: 953
+                f: 956
+                f: 961
+                f: 965
+                f: 969
+                f: 973
+                f: 976
+                f: 980
+                f: 982
+                f: 985
+                f: 990
+                f: 994
+                f: 997
+                f: 1001
+                f: 1005
+                f: 1007
+                f: 1010
+                f: 1014
+                f: 1018
+                f: 1022
+                f: 1025
+                f: 1028
+                f: 1033
+                f: 1035
+                f: 1038
+                f: 1042
+                f: 1047
+                f: 1052
+                f: 1056
+                f: 1060
+                f: 1063
+                f: 1067
+                f: 1071
+                f: 1075
+                f: 1079
+                f: 1083
+                f: 1086
+                f: 1088
+                f: 1092
+                f: 1097
+                f: 1102
+                f: 1106
+                f: 1109
+                f: 1113
+                f: 1117
+                f: 1120
+                f: 1125
+                f: 1129
+                f: 1134
+                f: 1137
+                f: 1142
+                f: 1146
+                f: 1150
+                f: 1151
+                f: 1155
+                f: 1159
+                f: 1162
+                f: 1166
+                f: 1170
+                f: 1174
+                f: 1177
+                f: 1181
+                f: 1185
+                f: 1188
+                f: 1193
+                f: 1196
+                f: 1203
+                f: 1207
+                f: 1212
+                f: 1214
+                f: 1217
+                f: 1220
+                f: 1222
+                f: 1222
+                f: 1226
+                f: 1229
+                f: 1233
+                f: 1237
+                f: 1241
+                f: 1246
+                f: 1250
+                f: 1253
+                f: 1257
+                f: 1262
+                f: 1267
+                f: 1272
+                f: 1278
+                f: 1283
+                f: 1287
+                f: 1293
+                f: 1297
+                f: 1301
+                f: 1304
+                f: 1309
+                f: 1315
+                f: 1320
+                f: 1325
+                f: 1329
+                f: 1333
+                f: 1336
+                f: 1341
+                f: 1344
+                f: 1348
+                f: 1351
+                f: 1357
+                f: 1363
+                f: 1368
+                f: 1374
+                f: 1379
+                f: 1383
+                f: 1386
+                f: 1391
+                f: 1395
+                f: 1399
+                f: 1403
+                f: 1407
+                f: 1410
+                f: 1415
+                f: 1418
+                f: 1423
+                f: 1428
+                f: 1432
+                f: 1436
+                f: 1438
+                f: 1442
+                f: 1446
+                f: 1450
+                f: 1454
+                f: 1462
+                f: 1467
+                f: 1472
+                f: 1477
+                f: 1483
+                f: 1488
+                f: 1492
+                f: 1496
+                f: 1503
+                f: 1508
+                f: 1513
+                f: 1518
+                f: 1520
+                f: 1526
+                f: 1531
+                f: 1534
+                f: 1538
+                f: 1542
+                f: 1546
+                f: 1552
+                f: 1558
+                f: 1564
+                f: 1568
+                f: 1573
+                f: 1578
+                f: 1581
+                f: 1590
+                f: 1596
+                f: 1601
+                f: 1606
+                f: 1611
+                f: 1616
+                f: 1622
+                f: 1629
+                f: 1634
+                f: 1640
+                f: 1647
+                f: 1651
+                f: 1657
+                f: 1660
+                f: 1665
+                f: 1672
+                f: 1678
+                f: 1686
+                f: 1692
+                f: 1698
+                f: 1704
+                f: 1709
+                f: 1714
+                f: 1719
+                f: 1724
+                f: 1730
+                f: 1737
+                f: 1744
+                f: 1751
+                f: 1755
+                f: 1761
+                f: 1764
+                f: 1772
+                f: 1778
+                f: 1784
+                f: 1789
+                f: 1799
+                f: 1804
+                f: 1811
+                f: 1819
+                f: 1825
+                f: 1830
+                f: 1838
+                f: 1849
+                f: 1858
+                f: 1862
+                f: 1868
+                f: 1872
+                f: 1878
+                f: 1885
+                f: 1888
+                f: 1892
+                f: 1897
+                f: 1902
+                f: 1907
+                f: 1919
+                f: 1926
+                f: 1932
+                f: 1936
+                f: 1941
+                f: 1946
+                f: 1952
+                f: 1960
+                f: 1968
+                f: 1977
+                f: 1985
+                f: 1992
+                f: 1997
+                f: 2006
+                f: 2012
+                f: 2018
+                f: 2026
+                f: 2034
+                f: 2044
+                f: 2050
+                f: 2057
+                f: 2064
+                f: 2069
+                f: 2075
+                f: 2082
+                f: 2091
+                f: 2098
+                f: 2107
+                f: 2122
+                f: 2126
+                f: 2135
+                f: 2146
+                f: 2149
+                f: 2157
+                f: 2163
+                f: 2172
+                f: 2178
+                f: 2184
+                f: 2191
+                f: 2198
+                f: 2208
+                f: 2216
+                f: 2223
+                f: 2235
+                f: 2242
+                f: 2252
+                f: 2263
+                f: 2272
+                f: 2277
+                f: 2288
+                f: 2296
+                f: 2306
+                f: 2311
+                f: 2318
+                f: 2323
+                f: 2334
+                f: 2341
+                f: 2356
+                f: 2366
+                f: 2373
+                f: 2379
+                f: 2386
+                f: 2407
+                f: 2416
+                f: 2423
+                f: 2432
+                f: 2438
+                f: 2448
+                f: 2453
+                f: 2464
+                f: 2473
+                f: 2473
+                f: 2481
+                f: 2492
+                f: 2504
+                f: 2511
+                f: 2523
+                f: 2529
+                f: 2537
+                f: 2545
+                f: 2556
+                f: 2566
+                f: 2575
+                f: 2584
+                f: 2592
+                f: 2602
+                f: 2613
+                f: 2624
+                f: 2636
+                f: 2643
+                f: 2647
+                f: 2652
+                f: 2664
+                f: 2675
+                f: 2688
+                f: 2693
+                f: 2702
+                f: 2709
+                f: 2722
+                f: 2739
+                f: 2754
+                f: 2766
+                f: 2776
+                f: 2786
+                f: 2799
+                f: 2810
+                f: 2832
+                f: 2840
+                f: 2849
+                f: 2860
+                f: 2873
+                f: 2889
+                f: 2908
+                f: 2914
+                f: 2926
+                f: 2939
+                f: 2950
+                f: 2961
+                f: 2969
+                f: 2978
+                f: 2990
+                f: 2999
+                f: 3023
+                f: 3032
+                f: 3049
+                f: 3066
+                f: 3085
+                f: 3101
+                f: 3107
+                f: 3117
+                f: 3129
+                f: 3144
+                f: 3167
+                f: 3190
+                f: 3212
+                f: 3229
+                f: 3238
+                f: 3264
+                f: 3293
+                f: 3302
+                f: 3309
+                f: 3314
+                f: 3323
+                f: 3344
+                f: 3352
+                f: 3362
+                f: 3390
+                f: 3400
+                f: 3411
+                f: 3435
+                f: 3456
+                f: 3470
+                f: 3485
+                f: 3498
+                f: 3505
+                f: 3519
+                f: 3539
+                f: 3545
+                f: 3545
+                f: 3560
+                f: 3576
+                f: 3597
+                f: 3607
+                f: 3621
+                f: 3641
+                f: 3665
+                f: 3679
+                f: 3701
+                f: 3714
+                f: 3733
+                f: 3741
+                f: 3745
+                f: 3757
+                f: 3773
+                f: 3787
+                f: 3795
+                f: 3805
+                f: 3822
+                f: 3835
+                f: 3844
+                f: 3861
+                f: 3872
+                f: 3878
+                f: 3897
+                f: 3919
+                f: 3941
+                f: 3971
+                f: 4004
+                f: 4014
+                f: 4019
+                f: 4061
+                f: 4068
+                f: 4089
+                f: 4108
+                f: 4117
+                f: 4125
+                f: 4146
+                f: 4165
+                f: 4194
+                f: 4204
+                f: 4224
+                f: 4236
+                f: 4263
+                f: 4290
+                f: 4301
+                f: 4319
+                f: 4326
+                f: 4347
+                f: 4369
+                f: 4386
+                f: 4413
+                f: 4435
+                f: 4451
+                f: 4451
+                f: 4451
+                f: 4476
+                f: 4500
+                f: 4539
+                f: 4579
+                f: 4592
+                f: 4600
+                f: 4622
+                f: 4650
+                f: 4683
+                f: 4714
+                f: 4742
+                f: 4755
+                f: 4771
+                f: 4788
+                f: 4816
+                f: 4828
+                f: 4831
+                f: 4831
+                f: 4831
+                f: 4843
+                f: 4852
+                f: 4865
+                f: 4896
+                f: 4915
+                f: 4931
+                f: 4952
+                f: 4965
+                f: 4983
+                f: 5007
+                f: 5043
+                f: 5061
+                f: 5081
+                f: 5095
+                f: 5122
+                f: 5143
+                f: 5171
+                f: 5204
+                f: 5226
+                f: 5233
+                f: 5250
+                f: 5281
+                f: 5320
+                f: 5323
+                f: 5328
+                f: 5345
+                f: 5374
+                f: 5413
+                f: 5466
+                f: 5492
+                f: 5524
+                f: 5555
+                f: 5567
+                f: 5610
+                f: 5676
+                f: 5701
+                f: 5716
+                f: 5744
+                f: 5768
+                f: 5795
+                f: 5818
+                f: 5854
+                f: 5906
+                f: 5934
+                f: 5960
+                f: 5975
+                f: 5993
+                f: 6025
+                f: 6034
+                f: 6051
+                f: 6082
+                f: 6106
+                f: 6125
+                f: 6159
+                f: 6187
+                f: 6242
+                f: 6287
+                f: 6311
+                f: 6332
+                f: 6348
+                f: 6358
+                f: 6368
+                f: 6377
+                f: 6402
+                f: 6407
+                f: 6428
+                f: 6450
+                f: 6475
+                f: 6498
+                f: 6505
+                f: 6533
+                f: 6565
+                f: 6580
+                f: 6595
+                f: 6611
+                f: 6654
+                f: 6658
+                f: 6705
+                f: 6751
+                f: 6786
+                f: 6828
+                f: 6876
+                f: 6896
+                f: 6948
+                f: 6964
+                f: 7065
+                f: 7082
+                f: 7118
+                f: 7184
+                f: 7214
+                f: 7271
+                f: 7310
+                f: 7357
+                f: 7405
+                f: 7506
+                f: 7613
+                f: 7641
+                f: 7675
+                f: 7720
+                f: 7781
+                f: 7833
+                f: 7860
+                f: 7898
+                f: 7929
+                f: 8044
+                f: 8104
+                f: 8148
+                f: 8236
+                f: 8273
+                f: 8313
+                f: 8349
+                f: 8381
+                f: 8409
+                f: 8498
+                f: 8507
+                f: 8524
+                f: 8570
+                f: 8607
+                f: 8630
+                f: 8637
+                f: 8675
+                f: 8700
+                f: 8714
+                f: 8734
+                f: 8776
+                f: 8836
+                f: 8854
+                f: 8867
+                f: 8868
+                f: 9065
+                f: 9113
+                f: 9121
+                f: 9241
+                f: 9357
+                f: 9360
+                f: 9585
+                f: 9613
+                f: 9684
+                f: 9727
+                f: 9751
+                f: 9777
+                f: 9802
+                f: 9889
+                f: 9903
+                f: 9914
+                f: 9978
+                f: 10061
+                f: 10192
+                f: 10213
+                f: 10345
+                f: 10369
+                f: 10404
+                f: 10430
+                f: 10471
+                f: 10481
+                f: 10489
+                f: 10492
+                f: 10494
+                f: 10524
+                f: 10554
+                f: 10557
+                f: 10560
+                f: 10562
+                f: 10641
+                f: 10716
+                f: 10842
+                f: 10897
+                f: 10967
+                f: 11053
+                f: 11128
+                f: 11137
+                f: 11328
+                f: 11336
+                f: 11401
+                f: 11532
+                f: 11573
+                f: 11860
+                f: 11880
+                f: 12013
+                f: 12305
+                f: 12358
+                f: 12386
+                f: 12404
+                f: 12456
+                f: 12456
+                f: 12476
+                f: 12615
+                f: 12677
+                f: 12981
+                f: 13094
+                f: 13197
+                f: 13708
+                f: 13717
+                f: 13788
+                f: 14049
+                f: 14112
+                f: 14224
+                f: 14257
+                f: 14681
+                f: 14901
+                f: 15006
+                f: 15071
+                f: 15100
+                f: 15248
+                f: 15669
+                f: 15877
+                f: 15953
+                f: 15953
+                f: 16066
+                f: 16072
+                f: 16271
+                f: 16292
+                f: 16386
+                f: 16490
+                f: 16633
+                f: 16670
+                f: 16834
+                f: 16896
+                f: 17543
+                f: 17693
+                f: 17800
+                f: 17859
+                f: 18397
+                f: 18811
+                f: 18826
+                f: 18971
+                f: 19304
+                f: 19319
+                f: 19695
+                f: 20378
+                f: 20865
+                f: 21313
+                f: 21330
+                f: 22321
+                f: 22760
+                f: 22770
+                f: 23783
+                f: 23785
+                f: 24525
+                f: 24844
+                f: 24848
+                f: 24964
+                f: 24966
+                f: 27468
+                f: 27478
+                f: 27555
+                f: 27555
+                f: 28215
+                f: 28219
+                f: 28336
+                f: 28490
+                f: 30213
+                f: 30228
+                f: 30242
+                f: 34116
+                f: 43518
+                f: 43518
+                f: 43518
+                f: 43852
+                f: 43852
+                f: 43852
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_10/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_10/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda_10/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_10/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_10/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_10/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_10/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda_10/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_10/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_10/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_10/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_10/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_10/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_10/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_10/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_10/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_10/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda_10/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_10/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_10/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_10/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_10/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims/dim"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims/dim"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims"
+          op: "ExpandDims"
+          input: "time_step_14"
+          input: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims/dim:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_11/Bucketize"
+          op: "Bucketize"
+          input: "QNetwork/EncodingNetwork/lambda_11/expand_dims/ExpandDims:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "boundaries"
+            value {
+              list {
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 0
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 1
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 2
+                f: 3
+                f: 4
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_11/Bucketize"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_11/Cast"
+          op: "Cast"
+          input: "QNetwork/EncodingNetwork/lambda_11/Bucketize:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_11/Cast"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_11/truediv/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                }
+                float_val: 999
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_11/truediv/y"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_11/truediv"
+          op: "RealDiv"
+          input: "QNetwork/EncodingNetwork/lambda_11/Cast:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_11/truediv/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_11/truediv"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_11/Sqrt"
+          op: "Sqrt"
+          input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_11/Sqrt"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_11/mul"
+          op: "Mul"
+          input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_11/mul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_11/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_11/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/lambda_11/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda_11/truediv:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_11/Sqrt:y:0"
+          input: "QNetwork/EncodingNetwork/lambda_11/mul:z:0"
+          input: "QNetwork/EncodingNetwork/lambda_11/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 3
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/lambda_11/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/concatenate/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/concatenate/concat/axis"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/concatenate/concat"
+          op: "ConcatV2"
+          input: "QNetwork/EncodingNetwork/lambda/concat:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_1/concat:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_2/concat:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_3/concat:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_4/concat:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_5/concat:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_6/concat:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_7/concat:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_8/concat:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_9/zeros_like:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_10/concat:output:0"
+          input: "QNetwork/EncodingNetwork/lambda_11/concat:output:0"
+          input: "QNetwork/EncodingNetwork/concatenate/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 12
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 34
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/concatenate/concat"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/flatten/Const"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 2
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                  dim {
+                    size: 2
+                  }
+                }
+                tensor_content: "\377\377\377\377\"\000\000\000"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/flatten/Const"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/flatten/Reshape"
+          op: "Reshape"
+          input: "QNetwork/EncodingNetwork/concatenate/concat:output:0"
+          input: "QNetwork/EncodingNetwork/flatten/Const:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 34
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/flatten/Reshape"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/dense/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "qnetwork_encodingnetwork_dense_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 34
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/dense/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/dense/MatMul"
+          op: "MatMul"
+          input: "QNetwork/EncodingNetwork/flatten/Reshape:output:0"
+          input: "QNetwork/EncodingNetwork/dense/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/dense/MatMul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/dense/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "qnetwork_encodingnetwork_dense_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/dense/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/dense/BiasAdd"
+          op: "BiasAdd"
+          input: "QNetwork/EncodingNetwork/dense/MatMul:product:0"
+          input: "QNetwork/EncodingNetwork/dense/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/dense/BiasAdd"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/dense/Relu"
+          op: "Relu"
+          input: "QNetwork/EncodingNetwork/dense/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/dense/Relu"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/dense_1/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "qnetwork_encodingnetwork_dense_1_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 40
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/dense_1/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/dense_1/MatMul"
+          op: "MatMul"
+          input: "QNetwork/EncodingNetwork/dense/Relu:activations:0"
+          input: "QNetwork/EncodingNetwork/dense_1/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 40
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/dense_1/MatMul"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/dense_1/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "qnetwork_encodingnetwork_dense_1_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 40
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/dense_1/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/dense_1/BiasAdd"
+          op: "BiasAdd"
+          input: "QNetwork/EncodingNetwork/dense_1/MatMul:product:0"
+          input: "QNetwork/EncodingNetwork/dense_1/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 40
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/dense_1/BiasAdd"
+          }
+        }
+        node_def {
+          name: "QNetwork/EncodingNetwork/dense_1/Relu"
+          op: "Relu"
+          input: "QNetwork/EncodingNetwork/dense_1/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 40
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/EncodingNetwork/dense_1/Relu"
+          }
+        }
+        node_def {
+          name: "QNetwork/dense_2/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "qnetwork_dense_2_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 40
+                  }
+                  dim {
+                    size: 2
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/dense_2/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "QNetwork/dense_2/MatMul"
+          op: "MatMul"
+          input: "QNetwork/EncodingNetwork/dense_1/Relu:activations:0"
+          input: "QNetwork/dense_2/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 2
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/dense_2/MatMul"
+          }
+        }
+        node_def {
+          name: "QNetwork/dense_2/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "qnetwork_dense_2_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 2
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/dense_2/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "QNetwork/dense_2/BiasAdd"
+          op: "BiasAdd"
+          input: "QNetwork/dense_2/MatMul:product:0"
+          input: "QNetwork/dense_2/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 2
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "QNetwork/dense_2/BiasAdd"
+          }
+        }
+        node_def {
+          name: "ShiftedCategorical_1/mode/ArgMax/dimension"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: -1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShiftedCategorical_1/mode/ArgMax/dimension"
+          }
+        }
+        node_def {
+          name: "ShiftedCategorical_1/mode/ArgMax"
+          op: "ArgMax"
+          input: "QNetwork/dense_2/BiasAdd:output:0"
+          input: "ShiftedCategorical_1/mode/ArgMax/dimension:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShiftedCategorical_1/mode/ArgMax"
+          }
+        }
+        node_def {
+          name: "add/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT64
+                tensor_shape {
+                }
+                int64_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "add/y"
+          }
+        }
+        node_def {
+          name: "add"
+          op: "AddV2"
+          input: "ShiftedCategorical_1/mode/ArgMax:output:0"
+          input: "add/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "add"
+          }
+        }
+        node_def {
+          name: "Deterministic/atol"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT64
+                tensor_shape {
+                }
+                int64_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic/atol"
+          }
+        }
+        node_def {
+          name: "Deterministic/rtol"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT64
+                tensor_shape {
+                }
+                int64_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic/rtol"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/sample_shape/x"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_FLOAT
+                tensor_shape {
+                  dim {
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/sample_shape/x"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/sample_shape"
+          op: "Cast"
+          input: "Deterministic_1/sample/sample_shape/x:output:0"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/sample_shape"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/Shape"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                int_val: 1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/Shape"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/Shape_1"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                  dim {
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/Shape_1"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/Shape_2"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                  dim {
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/Shape_2"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/BroadcastArgs"
+          op: "BroadcastArgs"
+          input: "Deterministic_1/sample/Shape_1:output:0"
+          input: "Deterministic_1/sample/Shape_2:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/BroadcastArgs"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/BroadcastArgs_1"
+          op: "BroadcastArgs"
+          input: "Deterministic_1/sample/Shape:output:0"
+          input: "Deterministic_1/sample/BroadcastArgs:r0:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/BroadcastArgs_1"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/Const"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                  dim {
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/Const"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/concat/values_0"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                int_val: 1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/concat/values_0"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/concat/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/concat/axis"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/concat"
+          op: "ConcatV2"
+          input: "Deterministic_1/sample/concat/values_0:output:0"
+          input: "Deterministic_1/sample/BroadcastArgs_1:r0:0"
+          input: "Deterministic_1/sample/Const:output:0"
+          input: "Deterministic_1/sample/concat/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 3
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 2
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/concat"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/BroadcastTo"
+          op: "BroadcastTo"
+          input: "add:z:0"
+          input: "Deterministic_1/sample/concat:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/BroadcastTo"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/Shape_3"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 2
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                  dim {
+                    size: 2
+                  }
+                }
+                tensor_content: "\001\000\000\000\001\000\000\000"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/Shape_3"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/strided_slice/stack"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                int_val: 1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/strided_slice/stack"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/strided_slice/stack_1"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                int_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/strided_slice/stack_1"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/strided_slice/stack_2"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                int_val: 1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/strided_slice/stack_2"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/strided_slice"
+          op: "StridedSlice"
+          input: "Deterministic_1/sample/Shape_3:output:0"
+          input: "Deterministic_1/sample/strided_slice/stack:output:0"
+          input: "Deterministic_1/sample/strided_slice/stack_1:output:0"
+          input: "Deterministic_1/sample/strided_slice/stack_2:output:0"
+          attr {
+            key: "Index"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "end_mask"
+            value {
+              i: 1
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/strided_slice"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/concat_1/axis"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/concat_1/axis"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/concat_1"
+          op: "ConcatV2"
+          input: "Deterministic_1/sample/sample_shape:y:0"
+          input: "Deterministic_1/sample/strided_slice:output:0"
+          input: "Deterministic_1/sample/concat_1/axis:output:0"
+          attr {
+            key: "N"
+            value {
+              i: 2
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/concat_1"
+          }
+        }
+        node_def {
+          name: "Deterministic_1/sample/Reshape"
+          op: "Reshape"
+          input: "Deterministic_1/sample/BroadcastTo:output:0"
+          input: "Deterministic_1/sample/concat_1:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Deterministic_1/sample/Reshape"
+          }
+        }
+        node_def {
+          name: "clip_by_value/Minimum/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT64
+                tensor_shape {
+                }
+                int64_val: 1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "clip_by_value/Minimum/y"
+          }
+        }
+        node_def {
+          name: "clip_by_value/Minimum"
+          op: "Minimum"
+          input: "Deterministic_1/sample/Reshape:output:0"
+          input: "clip_by_value/Minimum/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "clip_by_value/Minimum"
+          }
+        }
+        node_def {
+          name: "clip_by_value/y"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT64
+                tensor_shape {
+                }
+                int64_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "clip_by_value/y"
+          }
+        }
+        node_def {
+          name: "clip_by_value"
+          op: "Maximum"
+          input: "clip_by_value/Minimum:z:0"
+          input: "clip_by_value/y:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "clip_by_value"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "clip_by_value:z:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 9
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 10
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 11
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 12
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 13
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 14
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 15
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 16
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 17
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 18
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 19
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 20
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_signature_wrapper_4619033"
+        }
+        node_def {
+          name: "PartitionedCall"
+          op: "PartitionedCall"
+          attr {
+            key: "Tin"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_function_with_signature_4619029"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "PartitionedCall"
+          }
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference__traced_save_4619143"
+          input_arg {
+            name: "file_prefix"
+            type: DT_STRING
+          }
+          input_arg {
+            name: "savev2_train_step_read_readvariableop"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "savev2_qnetwork_encodingnetwork_dense_kernel_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_qnetwork_encodingnetwork_dense_bias_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_qnetwork_encodingnetwork_dense_1_kernel_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_qnetwork_encodingnetwork_dense_1_bias_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_qnetwork_dense_2_kernel_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_qnetwork_dense_2_bias_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_1_const"
+            type: DT_STRING
+          }
+          output_arg {
+            name: "identity_1"
+            type: DT_STRING
+          }
+          is_stateful: true
+          control_output: "MergeV2Checkpoints"
+          control_output: "SaveV2"
+          control_output: "SaveV2_1"
+        }
+        node_def {
+          name: "StaticRegexFullMatch"
+          op: "StaticRegexFullMatch"
+          input: "file_prefix"
+          device: "/device:CPU:*"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "pattern"
+            value {
+              s: "^s3://.*"
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StaticRegexFullMatch"
+          }
+        }
+        node_def {
+          name: "Const"
+          op: "Const"
+          device: "/device:CPU:*"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                }
+                string_val: ".part"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Const"
+          }
+        }
+        node_def {
+          name: "Const_1"
+          op: "Const"
+          device: "/device:CPU:*"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                }
+                string_val: "_temp_f4c8d2e64931472295be68a11e57e937/part"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Const_1"
+          }
+        }
+        node_def {
+          name: "Select"
+          op: "Select"
+          input: "StaticRegexFullMatch:output:0"
+          input: "Const:output:0"
+          input: "Const_1:output:0"
+          device: "/device:CPU:*"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Select"
+          }
+        }
+        node_def {
+          name: "StringJoin"
+          op: "StringJoin"
+          input: "file_prefix"
+          input: "Select:output:0"
+          device: "/device:CPU:*"
+          attr {
+            key: "N"
+            value {
+              i: 2
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StringJoin"
+          }
+        }
+        node_def {
+          name: "num_shards"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 2
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "num_shards"
+          }
+        }
+        node_def {
+          name: "ShardedFilename/shard"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename/shard"
+          }
+        }
+        node_def {
+          name: "ShardedFilename"
+          op: "ShardedFilename"
+          input: "StringJoin:output:0"
+          input: "ShardedFilename/shard:output:0"
+          input: "num_shards:output:0"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename"
+          }
+        }
+        node_def {
+          name: "SaveV2/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 7
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 7
+                  }
+                }
+                string_val: "train_step/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/0/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/1/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/2/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/3/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/4/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/5/.ATTRIBUTES/VARIABLE_VALUE"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2/tensor_names"
+          }
+        }
+        node_def {
+          name: "SaveV2/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 7
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 7
+                  }
+                }
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "SaveV2"
+          op: "SaveV2"
+          input: "ShardedFilename:filename:0"
+          input: "SaveV2/tensor_names:output:0"
+          input: "SaveV2/shape_and_slices:output:0"
+          input: "savev2_train_step_read_readvariableop"
+          input: "savev2_qnetwork_encodingnetwork_dense_kernel_read_readvariableop"
+          input: "savev2_qnetwork_encodingnetwork_dense_bias_read_readvariableop"
+          input: "savev2_qnetwork_encodingnetwork_dense_1_kernel_read_readvariableop"
+          input: "savev2_qnetwork_encodingnetwork_dense_1_bias_read_readvariableop"
+          input: "savev2_qnetwork_dense_2_kernel_read_readvariableop"
+          input: "savev2_qnetwork_dense_2_bias_read_readvariableop"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_INT64
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2"
+          }
+        }
+        node_def {
+          name: "ShardedFilename_1/shard"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename_1/shard"
+          }
+        }
+        node_def {
+          name: "ShardedFilename_1"
+          op: "ShardedFilename"
+          input: "StringJoin:output:0"
+          input: "ShardedFilename_1/shard:output:0"
+          input: "num_shards:output:0"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename_1"
+          }
+        }
+        node_def {
+          name: "SaveV2_1/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: "_CHECKPOINTABLE_OBJECT_GRAPH"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2_1/tensor_names"
+          }
+        }
+        node_def {
+          name: "SaveV2_1/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2_1/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "SaveV2_1"
+          op: "SaveV2"
+          input: "ShardedFilename_1:filename:0"
+          input: "SaveV2_1/tensor_names:output:0"
+          input: "SaveV2_1/shape_and_slices:output:0"
+          input: "savev2_1_const"
+          input: "^SaveV2"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_STRING
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2_1"
+          }
+        }
+        node_def {
+          name: "MergeV2Checkpoints/checkpoint_prefixes"
+          op: "Pack"
+          input: "ShardedFilename:filename:0"
+          input: "ShardedFilename_1:filename:0"
+          input: "^SaveV2"
+          input: "^SaveV2_1"
+          device: "/device:CPU:0"
+          attr {
+            key: "N"
+            value {
+              i: 2
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 2
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MergeV2Checkpoints/checkpoint_prefixes"
+          }
+        }
+        node_def {
+          name: "MergeV2Checkpoints"
+          op: "MergeV2Checkpoints"
+          input: "MergeV2Checkpoints/checkpoint_prefixes:output:0"
+          input: "file_prefix"
+          input: "^SaveV2_1"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MergeV2Checkpoints"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "file_prefix"
+          input: "^MergeV2Checkpoints"
+          device: "/device:CPU:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        node_def {
+          name: "Identity_1"
+          op: "Identity"
+          input: "Identity:output:0"
+          input: "^MergeV2Checkpoints"
+          input: "^SaveV2"
+          input: "^SaveV2_1"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_1"
+          }
+        }
+        ret {
+          key: "identity_1"
+          value: "Identity_1:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+              }
+              shape {
+              }
+              shape {
+                dim {
+                  size: 34
+                }
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                dim {
+                  size: 100
+                }
+                dim {
+                  size: 40
+                }
+              }
+              shape {
+                dim {
+                  size: 40
+                }
+              }
+              shape {
+                dim {
+                  size: 40
+                }
+                dim {
+                  size: 2
+                }
+              }
+              shape {
+                dim {
+                  size: 2
+                }
+              }
+              shape {
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "MergeV2Checkpoints"
+          value: "MergeV2Checkpoints"
+        }
+        control_ret {
+          key: "SaveV2"
+          value: "SaveV2"
+        }
+        control_ret {
+          key: "SaveV2_1"
+          value: "SaveV2_1"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "file_prefix"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 34
+                    }
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 100
+                    }
+                    dim {
+                      size: 40
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 40
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 40
+                    }
+                    dim {
+                      size: 2
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 2
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_function_722"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_signature_wrapper_4619026"
+          input_arg {
+            name: "callee_basic_block_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "callee_conditionally_executed_blocks"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "callee_users"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "caller_basic_block_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "caller_conditionally_executed_blocks"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "caller_users"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "callsite_height"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "cost_estimate"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "discount"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "edge_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "inlining_default"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "node_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "nr_ctant_params"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "reward"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "step_type"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_3"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_4"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_INT64
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "step_type"
+          input: "reward"
+          input: "discount"
+          input: "callee_basic_block_count"
+          input: "callee_conditionally_executed_blocks"
+          input: "callee_users"
+          input: "caller_basic_block_count"
+          input: "caller_conditionally_executed_blocks"
+          input: "caller_users"
+          input: "callsite_height"
+          input: "cost_estimate"
+          input: "edge_count"
+          input: "inlining_default"
+          input: "node_count"
+          input: "nr_ctant_params"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          input: "unknown_3"
+          input: "unknown_4"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_INT64
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 15
+                i: 16
+                i: 17
+                i: 18
+                i: 19
+                i: 20
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_function_with_signature_4618993"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callee_basic_block_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callee_conditionally_executed_blocks"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callee_users"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "caller_basic_block_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "caller_conditionally_executed_blocks"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "caller_users"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callsite_height"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "cost_estimate"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "discount"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 9
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "edge_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 10
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inlining_default"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 11
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "node_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 12
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "nr_ctant_params"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 13
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "reward"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 14
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "step_type"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 15
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 16
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 17
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 18
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 19
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 20
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_function_with_signature_4618993"
+          input_arg {
+            name: "step_type"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "reward"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "discount"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "callee_basic_block_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "callee_conditionally_executed_blocks"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "callee_users"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "caller_basic_block_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "caller_conditionally_executed_blocks"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "caller_users"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "callsite_height"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "cost_estimate"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "edge_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "inlining_default"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "node_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "nr_ctant_params"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_3"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_4"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_INT64
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "step_type"
+          input: "reward"
+          input: "discount"
+          input: "callee_basic_block_count"
+          input: "callee_conditionally_executed_blocks"
+          input: "callee_users"
+          input: "caller_basic_block_count"
+          input: "caller_conditionally_executed_blocks"
+          input: "caller_users"
+          input: "callsite_height"
+          input: "cost_estimate"
+          input: "edge_count"
+          input: "inlining_default"
+          input: "node_count"
+          input: "nr_ctant_params"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          input: "unknown_3"
+          input: "unknown_4"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_INT64
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 15
+                i: 16
+                i: 17
+                i: 18
+                i: 19
+                i: 20
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_polymorphic_action_fn_4618978"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "step_type"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "reward"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "discount"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callee_basic_block_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callee_conditionally_executed_blocks"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callee_users"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "caller_basic_block_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "caller_conditionally_executed_blocks"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "caller_users"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 9
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callsite_height"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 10
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "cost_estimate"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 11
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "edge_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 12
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inlining_default"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 13
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "node_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 14
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "nr_ctant_params"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 15
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 16
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 17
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 18
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 19
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 20
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_polymorphic_action_fn_4619080"
+          input_arg {
+            name: "time_step_step_type"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "time_step_reward"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "time_step_discount"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "time_step_observation_callee_basic_block_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_callee_conditionally_executed_blocks"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_callee_users"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_caller_basic_block_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_caller_conditionally_executed_blocks"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_caller_users"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_callsite_height"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_cost_estimate"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_edge_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_inlining_default"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_node_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_observation_nr_ctant_params"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_3"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_4"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_INT64
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "time_step_step_type"
+          input: "time_step_reward"
+          input: "time_step_discount"
+          input: "time_step_observation_callee_basic_block_count"
+          input: "time_step_observation_callee_conditionally_executed_blocks"
+          input: "time_step_observation_callee_users"
+          input: "time_step_observation_caller_basic_block_count"
+          input: "time_step_observation_caller_conditionally_executed_blocks"
+          input: "time_step_observation_caller_users"
+          input: "time_step_observation_callsite_height"
+          input: "time_step_observation_cost_estimate"
+          input: "time_step_observation_edge_count"
+          input: "time_step_observation_inlining_default"
+          input: "time_step_observation_node_count"
+          input: "time_step_observation_nr_ctant_params"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          input: "unknown_3"
+          input: "unknown_4"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_INT64
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 15
+                i: 16
+                i: 17
+                i: 18
+                i: 19
+                i: 20
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_action_931"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/step_type"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/reward"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/discount"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/callee_basic_block_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/callee_conditionally_executed_blocks"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/callee_users"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/caller_basic_block_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/caller_conditionally_executed_blocks"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/caller_users"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 9
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/callsite_height"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 10
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/cost_estimate"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 11
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/edge_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 12
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/inlining_default"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 13
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/node_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 14
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step/observation/nr_ctant_params"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 15
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 16
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 17
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 18
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 19
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 20
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_function_with_signature_4619040"
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_INT64
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "unknown"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_INT64
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 0
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_<lambda>_728"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_polymorphic_action_fn_4618978"
+          input_arg {
+            name: "time_step"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "time_step_1"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "time_step_2"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "time_step_3"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_4"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_5"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_6"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_7"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_8"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_9"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_10"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_11"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_12"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_13"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "time_step_14"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_3"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_4"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_INT64
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "time_step"
+          input: "time_step_1"
+          input: "time_step_2"
+          input: "time_step_3"
+          input: "time_step_4"
+          input: "time_step_5"
+          input: "time_step_6"
+          input: "time_step_7"
+          input: "time_step_8"
+          input: "time_step_9"
+          input: "time_step_10"
+          input: "time_step_11"
+          input: "time_step_12"
+          input: "time_step_13"
+          input: "time_step_14"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          input: "unknown_3"
+          input: "unknown_4"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_INT64
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 15
+                i: 16
+                i: 17
+                i: 18
+                i: 19
+                i: 20
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_action_931"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 9
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 10
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 11
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 12
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 13
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 14
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "time_step"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 15
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 16
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 17
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 18
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 19
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 20
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_polymorphic_action_fn_946"
+          input_arg {
+            name: "step_type"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "reward"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "discount"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "callee_basic_block_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "callee_conditionally_executed_blocks"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "callee_users"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "caller_basic_block_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "caller_conditionally_executed_blocks"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "caller_users"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "callsite_height"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "cost_estimate"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "edge_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "inlining_default"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "node_count"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "nr_ctant_params"
+            type: DT_INT64
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_3"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_4"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_INT64
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "step_type"
+          input: "reward"
+          input: "discount"
+          input: "callee_basic_block_count"
+          input: "callee_conditionally_executed_blocks"
+          input: "callee_users"
+          input: "caller_basic_block_count"
+          input: "caller_conditionally_executed_blocks"
+          input: "caller_users"
+          input: "callsite_height"
+          input: "cost_estimate"
+          input: "edge_count"
+          input: "inlining_default"
+          input: "node_count"
+          input: "nr_ctant_params"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          input: "unknown_3"
+          input: "unknown_4"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_INT64
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_INT64
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 15
+                i: 16
+                i: 17
+                i: 18
+                i: 19
+                i: 20
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0012\005*\0010J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_action_931"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "step_type"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "reward"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "discount"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callee_basic_block_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callee_conditionally_executed_blocks"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callee_users"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "caller_basic_block_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "caller_conditionally_executed_blocks"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "caller_users"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 9
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "callsite_height"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 10
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "cost_estimate"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 11
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "edge_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 12
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inlining_default"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 13
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "node_count"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 14
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "nr_ctant_params"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 15
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 16
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 17
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 18
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 19
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 20
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference__traced_restore_4619176"
+          input_arg {
+            name: "file_prefix"
+            type: DT_STRING
+          }
+          input_arg {
+            name: "assignvariableop_train_step"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_1_qnetwork_encodingnetwork_dense_kernel"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_2_qnetwork_encodingnetwork_dense_bias"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_3_qnetwork_encodingnetwork_dense_1_kernel"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_4_qnetwork_encodingnetwork_dense_1_bias"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_5_qnetwork_dense_2_kernel"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_6_qnetwork_dense_2_bias"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity_8"
+            type: DT_STRING
+          }
+          is_stateful: true
+          control_output: "AssignVariableOp"
+          control_output: "AssignVariableOp_1"
+          control_output: "AssignVariableOp_2"
+          control_output: "AssignVariableOp_3"
+          control_output: "AssignVariableOp_4"
+          control_output: "AssignVariableOp_5"
+          control_output: "AssignVariableOp_6"
+          control_output: "RestoreV2"
+          control_output: "RestoreV2_1"
+        }
+        node_def {
+          name: "RestoreV2/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 7
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 7
+                  }
+                }
+                string_val: "train_step/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/0/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/1/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/2/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/3/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/4/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "model_variables/5/.ATTRIBUTES/VARIABLE_VALUE"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2/tensor_names"
+          }
+        }
+        node_def {
+          name: "RestoreV2/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 7
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 7
+                  }
+                }
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "RestoreV2"
+          op: "RestoreV2"
+          input: "file_prefix"
+          input: "RestoreV2/tensor_names:output:0"
+          input: "RestoreV2/shape_and_slices:output:0"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_INT64
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "RestoreV2:tensors:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp"
+          op: "AssignVariableOp"
+          input: "assignvariableop_train_step"
+          input: "Identity:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT64
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp"
+          }
+        }
+        node_def {
+          name: "Identity_1"
+          op: "Identity"
+          input: "RestoreV2:tensors:1"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_1"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_1"
+          op: "AssignVariableOp"
+          input: "assignvariableop_1_qnetwork_encodingnetwork_dense_kernel"
+          input: "Identity_1:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_1"
+          }
+        }
+        node_def {
+          name: "Identity_2"
+          op: "Identity"
+          input: "RestoreV2:tensors:2"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_2"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_2"
+          op: "AssignVariableOp"
+          input: "assignvariableop_2_qnetwork_encodingnetwork_dense_bias"
+          input: "Identity_2:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_2"
+          }
+        }
+        node_def {
+          name: "Identity_3"
+          op: "Identity"
+          input: "RestoreV2:tensors:3"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_3"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_3"
+          op: "AssignVariableOp"
+          input: "assignvariableop_3_qnetwork_encodingnetwork_dense_1_kernel"
+          input: "Identity_3:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_3"
+          }
+        }
+        node_def {
+          name: "Identity_4"
+          op: "Identity"
+          input: "RestoreV2:tensors:4"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_4"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_4"
+          op: "AssignVariableOp"
+          input: "assignvariableop_4_qnetwork_encodingnetwork_dense_1_bias"
+          input: "Identity_4:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_4"
+          }
+        }
+        node_def {
+          name: "Identity_5"
+          op: "Identity"
+          input: "RestoreV2:tensors:5"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_5"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_5"
+          op: "AssignVariableOp"
+          input: "assignvariableop_5_qnetwork_dense_2_kernel"
+          input: "Identity_5:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_5"
+          }
+        }
+        node_def {
+          name: "Identity_6"
+          op: "Identity"
+          input: "RestoreV2:tensors:6"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_6"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_6"
+          op: "AssignVariableOp"
+          input: "assignvariableop_6_qnetwork_dense_2_bias"
+          input: "Identity_6:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_6"
+          }
+        }
+        node_def {
+          name: "RestoreV2_1/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: "_CHECKPOINTABLE_OBJECT_GRAPH"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2_1/tensor_names"
+          }
+        }
+        node_def {
+          name: "RestoreV2_1/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2_1/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "RestoreV2_1"
+          op: "RestoreV2"
+          input: "file_prefix"
+          input: "RestoreV2_1/tensor_names:output:0"
+          input: "RestoreV2_1/shape_and_slices:output:0"
+          input: "^RestoreV2"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_STRING
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2_1"
+          }
+        }
+        node_def {
+          name: "NoOp"
+          op: "NoOp"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "NoOp"
+          }
+        }
+        node_def {
+          name: "Identity_7"
+          op: "Identity"
+          input: "file_prefix"
+          input: "^AssignVariableOp"
+          input: "^AssignVariableOp_1"
+          input: "^AssignVariableOp_2"
+          input: "^AssignVariableOp_3"
+          input: "^AssignVariableOp_4"
+          input: "^AssignVariableOp_5"
+          input: "^AssignVariableOp_6"
+          input: "^NoOp"
+          device: "/device:CPU:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_7"
+          }
+        }
+        node_def {
+          name: "Identity_8"
+          op: "Identity"
+          input: "Identity_7:output:0"
+          input: "^AssignVariableOp"
+          input: "^AssignVariableOp_1"
+          input: "^AssignVariableOp_2"
+          input: "^AssignVariableOp_3"
+          input: "^AssignVariableOp_4"
+          input: "^AssignVariableOp_5"
+          input: "^AssignVariableOp_6"
+          input: "^RestoreV2"
+          input: "^RestoreV2_1"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_8"
+          }
+        }
+        ret {
+          key: "identity_8"
+          value: "Identity_8:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "AssignVariableOp"
+          value: "AssignVariableOp"
+        }
+        control_ret {
+          key: "AssignVariableOp_1"
+          value: "AssignVariableOp_1"
+        }
+        control_ret {
+          key: "AssignVariableOp_2"
+          value: "AssignVariableOp_2"
+        }
+        control_ret {
+          key: "AssignVariableOp_3"
+          value: "AssignVariableOp_3"
+        }
+        control_ret {
+          key: "AssignVariableOp_4"
+          value: "AssignVariableOp_4"
+        }
+        control_ret {
+          key: "AssignVariableOp_5"
+          value: "AssignVariableOp_5"
+        }
+        control_ret {
+          key: "AssignVariableOp_6"
+          value: "AssignVariableOp_6"
+        }
+        control_ret {
+          key: "RestoreV2"
+          value: "RestoreV2"
+        }
+        control_ret {
+          key: "RestoreV2_1"
+          value: "RestoreV2_1"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "file_prefix"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_<lambda>_728"
+          input_arg {
+            name: "readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_INT64
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT64
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_INT64
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    versions {
+      producer: 357
+      min_consumer: 12
+    }
+  }
+  saver_def {
+    filename_tensor_name: "saver_filename:0"
+    save_tensor_name: "StatefulPartitionedCall_2:0"
+    restore_op_name: "StatefulPartitionedCall_3"
+    version: V2
+  }
+  collection_def {
+    key: "saved_model_main_op"
+    value {
+      node_list {
+        value: "NoOp"
+      }
+    }
+  }
+  signature_def {
+    key: "__saved_model_init_op"
+    value {
+      outputs {
+        key: "__saved_model_init_op"
+        value {
+          name: "NoOp"
+          tensor_shape {
+            unknown_rank: true
+          }
+        }
+      }
+    }
+  }
+  signature_def {
+    key: "action"
+    value {
+      inputs {
+        key: "callee_basic_block_count"
+        value {
+          name: "action_callee_basic_block_count:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "callee_conditionally_executed_blocks"
+        value {
+          name: "action_callee_conditionally_executed_blocks:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "callee_users"
+        value {
+          name: "action_callee_users:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "caller_basic_block_count"
+        value {
+          name: "action_caller_basic_block_count:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "caller_conditionally_executed_blocks"
+        value {
+          name: "action_caller_conditionally_executed_blocks:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "caller_users"
+        value {
+          name: "action_caller_users:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "callsite_height"
+        value {
+          name: "action_callsite_height:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "cost_estimate"
+        value {
+          name: "action_cost_estimate:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "discount"
+        value {
+          name: "action_discount:0"
+          dtype: DT_FLOAT
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "edge_count"
+        value {
+          name: "action_edge_count:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "inlining_default"
+        value {
+          name: "action_inlining_default:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "node_count"
+        value {
+          name: "action_node_count:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "nr_ctant_params"
+        value {
+          name: "action_nr_ctant_params:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "reward"
+        value {
+          name: "action_reward:0"
+          dtype: DT_FLOAT
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      inputs {
+        key: "step_type"
+        value {
+          name: "action_step_type:0"
+          dtype: DT_INT32
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      outputs {
+        key: "inlining_decision"
+        value {
+          name: "StatefulPartitionedCall:0"
+          dtype: DT_INT64
+          tensor_shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      method_name: "tensorflow/serving/predict"
+    }
+  }
+  signature_def {
+    key: "get_initial_state"
+    value {
+      method_name: "tensorflow/serving/predict"
+    }
+  }
+  signature_def {
+    key: "get_train_step"
+    value {
+      outputs {
+        key: "int64"
+        value {
+          name: "StatefulPartitionedCall_1:0"
+          dtype: DT_INT64
+          tensor_shape {
+          }
+        }
+      }
+      method_name: "tensorflow/serving/predict"
+    }
+  }
+  object_graph_def {
+    nodes {
+      children {
+        node_id: 1
+        local_name: "_time_step_spec"
+      }
+      children {
+        node_id: 2
+        local_name: "_trajectory_spec"
+      }
+      children {
+        node_id: 3
+        local_name: "_wrapped_policy"
+      }
+      children {
+        node_id: 4
+        local_name: "train_step"
+      }
+      children {
+        node_id: 5
+        local_name: "model_variables"
+      }
+      children {
+        node_id: 6
+        local_name: "signatures"
+      }
+      children {
+        node_id: 210
+        local_name: "action"
+      }
+      children {
+        node_id: 211
+        local_name: "get_initial_state"
+      }
+      children {
+        node_id: 212
+        local_name: "get_train_step"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 7
+        local_name: "observation"
+      }
+      children {
+        node_id: 7
+        local_name: "3"
+      }
+      user_object {
+        identifier: "trackable_tuple_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 7
+        local_name: "observation"
+      }
+      children {
+        node_id: 7
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_tuple_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 8
+        local_name: "_q_network"
+      }
+      children {
+        node_id: 1
+        local_name: "_time_step_spec"
+      }
+      children {
+        node_id: 9
+        local_name: "_trajectory_spec"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_INT64
+        shape {
+        }
+        name: "train_step"
+      }
+    }
+    nodes {
+      children {
+        node_id: 10
+        local_name: "0"
+      }
+      children {
+        node_id: 11
+        local_name: "1"
+      }
+      children {
+        node_id: 12
+        local_name: "2"
+      }
+      children {
+        node_id: 13
+        local_name: "3"
+      }
+      children {
+        node_id: 14
+        local_name: "4"
+      }
+      children {
+        node_id: 15
+        local_name: "5"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 213
+        local_name: "action"
+      }
+      children {
+        node_id: 214
+        local_name: "get_initial_state"
+      }
+      children {
+        node_id: 215
+        local_name: "get_train_step"
+      }
+      user_object {
+        identifier: "signature_map"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 16
+        local_name: "_input_tensor_spec"
+      }
+      children {
+        node_id: 17
+        local_name: "_encoder"
+      }
+      children {
+        node_id: 18
+        local_name: "_q_value_layer"
+      }
+      children {
+        node_id: 19
+        local_name: "variables"
+      }
+      children {
+        node_id: 20
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 21
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 22
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 216
+        local_name: "__call__"
+      }
+      children {
+        node_id: 217
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_network"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"QNetwork\", \"name\": \"QNetwork\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"config\": {\"layer was saved without config\": true}, \"is_graph_network\": false}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 7
+        local_name: "observation"
+      }
+      children {
+        node_id: 7
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_tuple_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 34
+          }
+          dim {
+            size: 100
+          }
+        }
+        trainable: true
+        name: "QNetwork/EncodingNetwork/dense/kernel"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 100
+          }
+        }
+        trainable: true
+        name: "QNetwork/EncodingNetwork/dense/bias"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 100
+          }
+          dim {
+            size: 40
+          }
+        }
+        trainable: true
+        name: "QNetwork/EncodingNetwork/dense_1/kernel"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 40
+          }
+        }
+        trainable: true
+        name: "QNetwork/EncodingNetwork/dense_1/bias"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 40
+          }
+          dim {
+            size: 2
+          }
+        }
+        trainable: true
+        name: "QNetwork/dense_2/kernel"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 2
+          }
+        }
+        trainable: true
+        name: "QNetwork/dense_2/bias"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 23
+        local_name: "_input_tensor_spec"
+      }
+      children {
+        node_id: 24
+        local_name: "_preprocessing_nest"
+      }
+      children {
+        node_id: 25
+        local_name: "_flat_preprocessing_layers"
+      }
+      children {
+        node_id: 26
+        local_name: "_preprocessing_combiner"
+      }
+      children {
+        node_id: 27
+        local_name: "_postprocessing_layers"
+      }
+      children {
+        node_id: 28
+        local_name: "variables"
+      }
+      children {
+        node_id: 29
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 30
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 31
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 218
+        local_name: "__call__"
+      }
+      children {
+        node_id: 219
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_network"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"EncodingNetwork\", \"name\": \"EncodingNetwork\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"config\": {\"layer was saved without config\": true}, \"is_graph_network\": false}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 14
+        local_name: "kernel"
+      }
+      children {
+        node_id: 15
+        local_name: "bias"
+      }
+      children {
+        node_id: 32
+        local_name: "variables"
+      }
+      children {
+        node_id: 33
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 34
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 35
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 220
+        local_name: "__call__"
+      }
+      children {
+        node_id: 221
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Dense\", \"name\": \"dense_2\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense_2\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 2, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"RandomUniform\", \"config\": {\"minval\": -0.03, \"maxval\": 0.03, \"seed\": null, \"dtype\": \"float32\"}}, \"bias_initializer\": {\"class_name\": \"Constant\", \"config\": {\"value\": -0.2, \"dtype\": \"float32\"}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 40}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [0, 40]}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 10
+        local_name: "0"
+      }
+      children {
+        node_id: 11
+        local_name: "1"
+      }
+      children {
+        node_id: 12
+        local_name: "2"
+      }
+      children {
+        node_id: 13
+        local_name: "3"
+      }
+      children {
+        node_id: 14
+        local_name: "4"
+      }
+      children {
+        node_id: 15
+        local_name: "5"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 10
+        local_name: "0"
+      }
+      children {
+        node_id: 11
+        local_name: "1"
+      }
+      children {
+        node_id: 12
+        local_name: "2"
+      }
+      children {
+        node_id: 13
+        local_name: "3"
+      }
+      children {
+        node_id: 14
+        local_name: "4"
+      }
+      children {
+        node_id: 15
+        local_name: "5"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 36
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 19
+        local_name: "variables"
+      }
+      children {
+        node_id: 37
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 38
+        local_name: "metrics"
+      }
+      children {
+        node_id: 39
+        local_name: "layers"
+      }
+      children {
+        node_id: 20
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 40
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 21
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 216
+        local_name: "__call__"
+      }
+      children {
+        node_id: 217
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 217
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 41
+        local_name: "0"
+      }
+      children {
+        node_id: 42
+        local_name: "1"
+      }
+      children {
+        node_id: 43
+        local_name: "2"
+      }
+      children {
+        node_id: 44
+        local_name: "3"
+      }
+      children {
+        node_id: 45
+        local_name: "4"
+      }
+      children {
+        node_id: 46
+        local_name: "5"
+      }
+      children {
+        node_id: 47
+        local_name: "6"
+      }
+      children {
+        node_id: 48
+        local_name: "7"
+      }
+      children {
+        node_id: 49
+        local_name: "8"
+      }
+      children {
+        node_id: 50
+        local_name: "9"
+      }
+      children {
+        node_id: 51
+        local_name: "10"
+      }
+      children {
+        node_id: 52
+        local_name: "11"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 53
+        local_name: "variables"
+      }
+      children {
+        node_id: 54
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 55
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 56
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 222
+        local_name: "__call__"
+      }
+      children {
+        node_id: 223
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Concatenate\", \"name\": \"concatenate\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"concatenate\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}, \"build_input_shape\": [{\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 1]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}, {\"class_name\": \"TensorShape\", \"items\": [0, 3]}]}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 57
+        local_name: "0"
+      }
+      children {
+        node_id: 58
+        local_name: "1"
+      }
+      children {
+        node_id: 59
+        local_name: "2"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 10
+        local_name: "0"
+      }
+      children {
+        node_id: 11
+        local_name: "1"
+      }
+      children {
+        node_id: 12
+        local_name: "2"
+      }
+      children {
+        node_id: 13
+        local_name: "3"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 10
+        local_name: "0"
+      }
+      children {
+        node_id: 11
+        local_name: "1"
+      }
+      children {
+        node_id: 12
+        local_name: "2"
+      }
+      children {
+        node_id: 13
+        local_name: "3"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 60
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 28
+        local_name: "variables"
+      }
+      children {
+        node_id: 61
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 62
+        local_name: "metrics"
+      }
+      children {
+        node_id: 63
+        local_name: "layers"
+      }
+      children {
+        node_id: 29
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 64
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 30
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 218
+        local_name: "__call__"
+      }
+      children {
+        node_id: 219
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 219
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 14
+        local_name: "0"
+      }
+      children {
+        node_id: 15
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 14
+        local_name: "0"
+      }
+      children {
+        node_id: 15
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 65
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 32
+        local_name: "variables"
+      }
+      children {
+        node_id: 66
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 67
+        local_name: "metrics"
+      }
+      children {
+        node_id: 68
+        local_name: "layers"
+      }
+      children {
+        node_id: 33
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 69
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 34
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 220
+        local_name: "__call__"
+      }
+      children {
+        node_id: 221
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 221
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 17
+        local_name: "0"
+      }
+      children {
+        node_id: 18
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 70
+        local_name: "variables"
+      }
+      children {
+        node_id: 71
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 72
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 73
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 224
+        local_name: "__call__"
+      }
+      children {
+        node_id: 225
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 11.0, 12.0, 13.0, 14.0, 14.0, 14.0, 16.0, 17.0, 19.0, 23.0, 27.0, 39.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 74
+        local_name: "variables"
+      }
+      children {
+        node_id: 75
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 76
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 77
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 226
+        local_name: "__call__"
+      }
+      children {
+        node_id: 227
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_1\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_1\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 8.0, 8.0, 8.0, 8.0, 9.0, 10.0, 10.0, 10.0, 12.0, 12.0, 12.0, 14.0, 14.0, 18.0, 20.0, 23.0, 30.0, 41.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 78
+        local_name: "variables"
+      }
+      children {
+        node_id: 79
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 80
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 81
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 228
+        local_name: "__call__"
+      }
+      children {
+        node_id: 229
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_2\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_2\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 22.0, 22.0, 22.0, 22.0, 23.0, 23.0, 23.0, 24.0, 24.0, 24.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 27.0, 28.0, 28.0, 29.0, 29.0, 29.0, 29.0, 30.0, 30.0, 31.0, 31.0, 31.0, 31.0, 32.0, 32.0, 33.0, 33.0, 33.0, 34.0, 34.0, 34.0, 34.0, 35.0, 35.0, 36.0, 36.0, 37.0, 37.0, 37.0, 38.0, 38.0, 39.0, 39.0, 40.0, 40.0, 41.0, 41.0, 41.0, 42.0, 43.0, 43.0, 44.0, 44.0, 45.0, 45.0, 46.0, 46.0, 46.0, 47.0, 47.0, 48.0, 49.0, 49.0, 50.0, 50.0, 51.0, 52.0, 53.0, 53.0, 54.0, 55.0, 56.0, 57.0, 57.0, 58.0, 59.0, 60.0, 61.0, 61.0, 63.0, 63.0, 64.0, 65.0, 66.0, 67.0, 67.0, 69.0, 70.0, 71.0, 72.0, 73.0, 74.0, 75.0, 77.0, 78.0, 79.0, 80.0, 81.0, 82.0, 83.0, 85.0, 86.0, 88.0, 89.0, 91.0, 92.0, 94.0, 96.0, 97.0, 99.0, 100.0, 101.0, 103.0, 105.0, 107.0, 109.0, 111.0, 113.0, 115.0, 118.0, 121.0, 123.0, 126.0, 128.0, 130.0, 133.0, 135.0, 137.0, 140.0, 143.0, 146.0, 148.0, 151.0, 154.0, 157.0, 161.0, 163.0, 166.0, 169.0, 173.0, 178.0, 183.0, 189.0, 193.0, 197.0, 202.0, 208.0, 213.0, 218.0, 223.0, 228.0, 233.0, 239.0, 245.0, 250.0, 257.0, 262.0, 269.0, 277.0, 284.0, 292.0, 300.0, 308.0, 319.0, 329.0, 340.0, 349.0, 359.0, 371.0, 382.0, 394.0, 410.0, 423.0, 435.0, 445.0, 462.0, 480.0, 492.0, 506.0, 519.0, 536.0, 557.0, 577.0, 598.0, 622.0, 655.0, 679.0, 707.0, 733.0, 751.0, 787.0, 814.0, 847.0, 897.0, 934.0, 997.0, 1062.0, 1111.0, 1181.0, 1275.0, 1385.0, 1465.0, 1603.0, 1769.0, 2057.0, 2257.0, 2803.0, 3468.0, 4417.0, 6538.0, 16126.0, 23446.0, 33536.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 82
+        local_name: "variables"
+      }
+      children {
+        node_id: 83
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 84
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 85
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 230
+        local_name: "__call__"
+      }
+      children {
+        node_id: 231
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_3\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_3\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 23.0, 23.0, 23.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 25.0, 25.0, 25.0, 25.0, 25.0, 26.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 27.0, 27.0, 28.0, 28.0, 28.0, 29.0, 29.0, 29.0, 29.0, 30.0, 30.0, 30.0, 31.0, 31.0, 31.0, 32.0, 32.0, 32.0, 33.0, 33.0, 33.0, 34.0, 34.0, 34.0, 34.0, 35.0, 35.0, 35.0, 36.0, 36.0, 36.0, 37.0, 37.0, 37.0, 38.0, 38.0, 38.0, 38.0, 39.0, 39.0, 40.0, 40.0, 41.0, 41.0, 42.0, 43.0, 43.0, 44.0, 45.0, 45.0, 46.0, 47.0, 47.0, 48.0, 49.0, 49.0, 50.0, 50.0, 52.0, 52.0, 53.0, 54.0, 55.0, 55.0, 57.0, 58.0, 59.0, 60.0, 62.0, 64.0, 65.0, 66.0, 68.0, 70.0, 70.0, 70.0, 70.0, 70.0, 71.0, 73.0, 75.0, 76.0, 78.0, 81.0, 84.0, 86.0, 90.0, 94.0, 98.0, 101.0, 106.0, 111.0, 117.0, 123.0, 130.0, 138.0, 146.0, 157.0, 163.0, 176.0, 187.0, 198.0, 214.0, 227.0, 252.0, 280.0, 327.0, 395.0, 506.0, 671.0, 1025.0, 1971.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 86
+        local_name: "variables"
+      }
+      children {
+        node_id: 87
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 88
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 89
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 232
+        local_name: "__call__"
+      }
+      children {
+        node_id: 233
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_4\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_4\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 24.0, 25.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 26.0, 27.0, 28.0, 28.0, 28.0, 28.0, 28.0, 29.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 31.0, 32.0, 32.0, 32.0, 32.0, 32.0, 34.0, 34.0, 34.0, 34.0, 34.0, 34.0, 35.0, 36.0, 36.0, 36.0, 37.0, 38.0, 38.0, 38.0, 39.0, 40.0, 40.0, 41.0, 42.0, 42.0, 43.0, 44.0, 44.0, 46.0, 46.0, 47.0, 48.0, 48.0, 50.0, 50.0, 52.0, 52.0, 54.0, 55.0, 55.0, 56.0, 57.0, 58.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 62.0, 62.0, 64.0, 65.0, 66.0, 68.0, 70.0, 72.0, 74.0, 77.0, 80.0, 82.0, 86.0, 89.0, 92.0, 96.0, 99.0, 104.0, 108.0, 114.0, 119.0, 125.0, 131.0, 139.0, 146.0, 157.0, 167.0, 176.0, 188.0, 198.0, 215.0, 236.0, 262.0, 306.0, 376.0, 462.0, 596.0, 942.0, 1428.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 90
+        local_name: "variables"
+      }
+      children {
+        node_id: 91
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 92
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 93
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 234
+        local_name: "__call__"
+      }
+      children {
+        node_id: 235
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_5\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_5\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 11.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0, 18.0, 20.0, 23.0, 29.0, 38.0, 60.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 94
+        local_name: "variables"
+      }
+      children {
+        node_id: 95
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 96
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 97
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 236
+        local_name: "__call__"
+      }
+      children {
+        node_id: 237
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_6\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_6\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 3.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 4.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 6.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 7.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 8.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 9.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 11.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 12.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 13.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 14.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 16.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 17.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 18.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 19.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 21.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 22.0, 23.0, 23.0, 23.0, 23.0, 23.0, 23.0, 23.0, 24.0, 24.0, 24.0, 24.0, 24.0, 25.0, 25.0, 25.0, 25.0, 25.0, 26.0, 26.0, 26.0, 26.0, 27.0, 27.0, 27.0, 28.0, 28.0, 28.0, 29.0, 29.0, 30.0, 30.0, 30.0, 31.0, 31.0, 32.0, 32.0, 33.0, 33.0, 34.0, 35.0, 37.0, 38.0, 40.0, 46.0, 51.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 98
+        local_name: "variables"
+      }
+      children {
+        node_id: 99
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 100
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 101
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 238
+        local_name: "__call__"
+      }
+      children {
+        node_id: 239
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_7\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_7\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [-15035.0, -15030.0, -15025.0, -15000.0, -14985.0, -14945.0, -14745.0, -70.0, -55.0, -55.0, -50.0, -50.0, -50.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -45.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -40.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -35.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -30.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -25.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -20.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -15.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -10.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, -5.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 5.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 10.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 15.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 20.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 25.0, 30.0, 30.0, 30.0, 30.0, 30.0, 30.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 35.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 40.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 45.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 50.0, 55.0, 55.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 60.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 65.0, 70.0, 70.0, 70.0, 70.0, 70.0, 70.0, 70.0, 75.0, 75.0, 80.0, 80.0, 80.0, 85.0, 85.0, 85.0, 90.0, 90.0, 90.0, 90.0, 95.0, 95.0, 100.0, 100.0, 105.0, 110.0, 115.0, 120.0, 125.0, 125.0, 130.0, 140.0, 140.0, 145.0, 150.0, 155.0, 160.0, 160.0, 165.0, 170.0, 175.0, 180.0, 190.0, 200.0, 210.0, 215.0, 220.0, 220.0, 230.0, 235.0, 245.0, 250.0, 260.0, 275.0, 290.0, 305.0, 325.0, 350.0, 370.0, 390.0, 425.0, 460.0, 500.0, 560.0, 650.0, 790.0, 1025.0, 1600.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 102
+        local_name: "variables"
+      }
+      children {
+        node_id: 103
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 104
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 105
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 240
+        local_name: "__call__"
+      }
+      children {
+        node_id: 241
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_8\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_8\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [18.0, 29.0, 39.0, 48.0, 57.0, 64.0, 70.0, 76.0, 82.0, 87.0, 92.0, 97.0, 101.0, 105.0, 109.0, 113.0, 116.0, 120.0, 123.0, 127.0, 130.0, 134.0, 137.0, 140.0, 143.0, 146.0, 149.0, 152.0, 156.0, 159.0, 162.0, 165.0, 168.0, 171.0, 174.0, 177.0, 180.0, 183.0, 186.0, 188.0, 191.0, 194.0, 197.0, 200.0, 203.0, 205.0, 208.0, 211.0, 214.0, 217.0, 219.0, 222.0, 225.0, 228.0, 231.0, 233.0, 236.0, 239.0, 242.0, 244.0, 247.0, 250.0, 253.0, 255.0, 258.0, 261.0, 264.0, 266.0, 269.0, 272.0, 275.0, 278.0, 280.0, 283.0, 286.0, 289.0, 292.0, 294.0, 297.0, 300.0, 303.0, 305.0, 308.0, 311.0, 314.0, 317.0, 319.0, 322.0, 325.0, 327.0, 330.0, 333.0, 336.0, 339.0, 341.0, 344.0, 347.0, 350.0, 353.0, 355.0, 358.0, 361.0, 364.0, 367.0, 370.0, 373.0, 375.0, 378.0, 381.0, 384.0, 387.0, 390.0, 393.0, 396.0, 399.0, 401.0, 404.0, 407.0, 410.0, 413.0, 416.0, 419.0, 422.0, 425.0, 428.0, 431.0, 434.0, 437.0, 440.0, 443.0, 446.0, 449.0, 452.0, 455.0, 458.0, 461.0, 464.0, 467.0, 470.0, 473.0, 476.0, 479.0, 483.0, 486.0, 489.0, 492.0, 495.0, 498.0, 501.0, 504.0, 507.0, 511.0, 514.0, 517.0, 520.0, 523.0, 526.0, 530.0, 533.0, 536.0, 539.0, 542.0, 545.0, 549.0, 552.0, 555.0, 558.0, 562.0, 565.0, 569.0, 572.0, 575.0, 579.0, 582.0, 585.0, 589.0, 592.0, 595.0, 599.0, 602.0, 605.0, 609.0, 612.0, 616.0, 620.0, 623.0, 626.0, 630.0, 634.0, 637.0, 641.0, 644.0, 648.0, 651.0, 655.0, 658.0, 662.0, 665.0, 669.0, 672.0, 676.0, 680.0, 683.0, 687.0, 691.0, 694.0, 698.0, 702.0, 705.0, 709.0, 712.0, 716.0, 720.0, 724.0, 727.0, 731.0, 735.0, 739.0, 742.0, 746.0, 750.0, 754.0, 758.0, 761.0, 765.0, 769.0, 773.0, 777.0, 780.0, 784.0, 788.0, 792.0, 796.0, 800.0, 804.0, 808.0, 812.0, 816.0, 820.0, 823.0, 828.0, 832.0, 836.0, 840.0, 844.0, 848.0, 852.0, 856.0, 860.0, 864.0, 868.0, 873.0, 877.0, 881.0, 885.0, 889.0, 893.0, 897.0, 902.0, 906.0, 910.0, 914.0, 919.0, 923.0, 927.0, 931.0, 935.0, 940.0, 944.0, 948.0, 953.0, 957.0, 962.0, 966.0, 970.0, 975.0, 979.0, 984.0, 988.0, 993.0, 997.0, 1002.0, 1006.0, 1011.0, 1015.0, 1020.0, 1024.0, 1029.0, 1034.0, 1038.0, 1043.0, 1047.0, 1052.0, 1057.0, 1062.0, 1066.0, 1071.0, 1076.0, 1081.0, 1086.0, 1090.0, 1095.0, 1100.0, 1105.0, 1110.0, 1114.0, 1119.0, 1124.0, 1129.0, 1134.0, 1139.0, 1144.0, 1149.0, 1154.0, 1159.0, 1164.0, 1169.0, 1174.0, 1179.0, 1184.0, 1189.0, 1194.0, 1199.0, 1204.0, 1209.0, 1215.0, 1220.0, 1225.0, 1230.0, 1235.0, 1241.0, 1246.0, 1251.0, 1257.0, 1262.0, 1267.0, 1273.0, 1278.0, 1284.0, 1289.0, 1294.0, 1300.0, 1305.0, 1311.0, 1316.0, 1322.0, 1327.0, 1333.0, 1338.0, 1344.0, 1350.0, 1355.0, 1361.0, 1367.0, 1372.0, 1378.0, 1383.0, 1389.0, 1395.0, 1401.0, 1407.0, 1413.0, 1418.0, 1424.0, 1430.0, 1436.0, 1442.0, 1448.0, 1454.0, 1459.0, 1465.0, 1472.0, 1477.0, 1483.0, 1489.0, 1495.0, 1501.0, 1507.0, 1514.0, 1520.0, 1526.0, 1532.0, 1538.0, 1545.0, 1551.0, 1557.0, 1564.0, 1570.0, 1576.0, 1583.0, 1589.0, 1596.0, 1602.0, 1608.0, 1615.0, 1621.0, 1628.0, 1634.0, 1641.0, 1647.0, 1654.0, 1661.0, 1667.0, 1674.0, 1681.0, 1687.0, 1694.0, 1701.0, 1708.0, 1715.0, 1722.0, 1729.0, 1735.0, 1742.0, 1749.0, 1756.0, 1763.0, 1770.0, 1777.0, 1784.0, 1791.0, 1798.0, 1806.0, 1812.0, 1820.0, 1827.0, 1835.0, 1841.0, 1849.0, 1856.0, 1863.0, 1871.0, 1878.0, 1885.0, 1893.0, 1901.0, 1908.0, 1915.0, 1923.0, 1930.0, 1938.0, 1946.0, 1953.0, 1961.0, 1969.0, 1976.0, 1984.0, 1992.0, 2000.0, 2007.0, 2015.0, 2023.0, 2031.0, 2039.0, 2047.0, 2055.0, 2063.0, 2071.0, 2079.0, 2087.0, 2095.0, 2104.0, 2112.0, 2120.0, 2128.0, 2137.0, 2146.0, 2154.0, 2162.0, 2171.0, 2179.0, 2188.0, 2197.0, 2205.0, 2214.0, 2223.0, 2232.0, 2241.0, 2250.0, 2258.0, 2268.0, 2277.0, 2285.0, 2294.0, 2304.0, 2313.0, 2322.0, 2331.0, 2340.0, 2350.0, 2359.0, 2368.0, 2378.0, 2388.0, 2397.0, 2407.0, 2416.0, 2426.0, 2436.0, 2446.0, 2455.0, 2465.0, 2475.0, 2485.0, 2495.0, 2505.0, 2515.0, 2525.0, 2535.0, 2545.0, 2556.0, 2566.0, 2577.0, 2587.0, 2598.0, 2609.0, 2620.0, 2631.0, 2641.0, 2652.0, 2663.0, 2674.0, 2685.0, 2696.0, 2708.0, 2719.0, 2730.0, 2742.0, 2753.0, 2764.0, 2776.0, 2788.0, 2799.0, 2811.0, 2823.0, 2835.0, 2847.0, 2858.0, 2870.0, 2882.0, 2894.0, 2906.0, 2919.0, 2931.0, 2943.0, 2956.0, 2968.0, 2981.0, 2994.0, 3006.0, 3019.0, 3032.0, 3045.0, 3058.0, 3070.0, 3083.0, 3096.0, 3109.0, 3121.0, 3134.0, 3148.0, 3161.0, 3174.0, 3187.0, 3200.0, 3214.0, 3228.0, 3242.0, 3255.0, 3268.0, 3283.0, 3297.0, 3310.0, 3325.0, 3340.0, 3353.0, 3368.0, 3383.0, 3398.0, 3412.0, 3427.0, 3442.0, 3457.0, 3471.0, 3487.0, 3502.0, 3516.0, 3531.0, 3546.0, 3561.0, 3577.0, 3593.0, 3608.0, 3625.0, 3641.0, 3657.0, 3673.0, 3690.0, 3706.0, 3722.0, 3738.0, 3755.0, 3772.0, 3789.0, 3805.0, 3823.0, 3839.0, 3856.0, 3873.0, 3891.0, 3908.0, 3926.0, 3944.0, 3960.0, 3977.0, 3995.0, 4013.0, 4031.0, 4048.0, 4067.0, 4085.0, 4104.0, 4122.0, 4140.0, 4159.0, 4177.0, 4196.0, 4215.0, 4234.0, 4253.0, 4272.0, 4291.0, 4311.0, 4332.0, 4351.0, 4371.0, 4391.0, 4412.0, 4433.0, 4454.0, 4474.0, 4496.0, 4518.0, 4538.0, 4558.0, 4579.0, 4601.0, 4619.0, 4640.0, 4662.0, 4684.0, 4706.0, 4728.0, 4751.0, 4771.0, 4794.0, 4818.0, 4840.0, 4863.0, 4887.0, 4910.0, 4933.0, 4956.0, 4980.0, 5004.0, 5028.0, 5052.0, 5076.0, 5100.0, 5125.0, 5152.0, 5175.0, 5200.0, 5226.0, 5251.0, 5278.0, 5304.0, 5329.0, 5354.0, 5381.0, 5407.0, 5433.0, 5460.0, 5488.0, 5516.0, 5544.0, 5573.0, 5600.0, 5628.0, 5656.0, 5684.0, 5713.0, 5741.0, 5771.0, 5799.0, 5830.0, 5860.0, 5891.0, 5921.0, 5951.0, 5980.0, 6010.0, 6041.0, 6073.0, 6105.0, 6133.0, 6163.0, 6195.0, 6227.0, 6258.0, 6291.0, 6322.0, 6356.0, 6390.0, 6424.0, 6457.0, 6491.0, 6527.0, 6561.0, 6596.0, 6631.0, 6665.0, 6701.0, 6736.0, 6771.0, 6805.0, 6840.0, 6877.0, 6911.0, 6947.0, 6985.0, 7022.0, 7059.0, 7097.0, 7135.0, 7174.0, 7212.0, 7251.0, 7289.0, 7327.0, 7366.0, 7406.0, 7447.0, 7486.0, 7525.0, 7566.0, 7606.0, 7646.0, 7688.0, 7728.0, 7771.0, 7814.0, 7859.0, 7901.0, 7949.0, 7992.0, 8036.0, 8082.0, 8127.0, 8173.0, 8218.0, 8262.0, 8309.0, 8353.0, 8397.0, 8444.0, 8489.0, 8539.0, 8585.0, 8632.0, 8682.0, 8727.0, 8777.0, 8828.0, 8879.0, 8929.0, 8982.0, 9037.0, 9087.0, 9140.0, 9193.0, 9250.0, 9305.0, 9361.0, 9418.0, 9475.0, 9532.0, 9589.0, 9644.0, 9699.0, 9758.0, 9818.0, 9875.0, 9935.0, 9997.0, 10057.0, 10117.0, 10174.0, 10232.0, 10296.0, 10356.0, 10419.0, 10482.0, 10546.0, 10608.0, 10670.0, 10729.0, 10790.0, 10855.0, 10920.0, 10990.0, 11054.0, 11118.0, 11181.0, 11248.0, 11316.0, 11385.0, 11454.0, 11526.0, 11597.0, 11667.0, 11740.0, 11820.0, 11897.0, 11973.0, 12046.0, 12126.0, 12204.0, 12287.0, 12370.0, 12456.0, 12538.0, 12627.0, 12714.0, 12799.0, 12883.0, 12971.0, 13062.0, 13154.0, 13233.0, 13328.0, 13418.0, 13511.0, 13607.0, 13709.0, 13806.0, 13903.0, 14002.0, 14104.0, 14200.0, 14288.0, 14391.0, 14488.0, 14590.0, 14698.0, 14808.0, 14910.0, 15020.0, 15126.0, 15238.0, 15347.0, 15456.0, 15574.0, 15692.0, 15786.0, 15896.0, 16016.0, 16136.0, 16250.0, 16352.0, 16474.0, 16575.0, 16702.0, 16835.0, 16965.0, 17096.0, 17232.0, 17370.0, 17443.0, 17581.0, 17719.0, 17864.0, 17976.0, 18116.0, 18250.0, 18396.0, 18540.0, 18690.0, 18840.0, 18989.0, 19136.0, 19294.0, 19445.0, 19589.0, 19750.0, 19905.0, 20064.0, 20191.0, 20325.0, 20497.0, 20662.0, 20833.0, 20981.0, 21152.0, 21334.0, 21510.0, 21642.0, 21821.0, 22001.0, 22186.0, 22379.0, 22568.0, 22770.0, 22958.0, 23162.0, 23360.0, 23524.0, 23737.0, 23960.0, 24175.0, 24395.0, 24631.0, 24865.0, 25091.0, 25327.0, 25580.0, 25833.0, 26089.0, 26361.0, 26636.0, 26889.0, 27155.0, 27436.0, 27715.0, 28003.0, 28303.0, 28600.0, 28916.0, 29223.0, 29553.0, 29884.0, 30200.0, 30538.0, 30868.0, 31211.0, 31548.0, 31881.0, 32253.0, 32605.0, 32980.0, 33385.0, 33805.0, 34254.0, 34723.0, 35167.0, 35666.0, 36125.0, 36652.0, 37177.0, 37739.0, 38321.0, 38932.0, 39640.0, 40337.0, 41000.0, 41626.0, 42385.0, 43122.0, 43890.0, 44687.0, 45609.0, 46520.0, 47489.0, 48432.0, 49458.0, 50511.0, 51561.0, 52568.0, 53676.0, 54936.0, 56071.0, 57302.0, 58513.0, 59800.0, 61192.0, 62702.0, 64205.0, 65868.0, 67780.0, 69960.0, 72330.0, 74918.0, 77540.0, 80344.0, 83727.0, 87662.0, 93589.0, 101441.0, 110544.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 106
+        local_name: "variables"
+      }
+      children {
+        node_id: 107
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 108
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 109
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 242
+        local_name: "__call__"
+      }
+      children {
+        node_id: 243
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_9\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_9\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAgAAAAQAAAATAAAAcxgAAACIAHwAgwF9AXQAagF8AXQAagJkAY0CUwApAk4pAdoF\\nZHR5cGUpA9oCdGbaCnplcm9zX2xpa2XaB2Zsb2F0MzIpAtoDb2Jz2gxleHBhbmRlZF9vYnMpAdoO\\nZXhwYW5kX2RpbXNfb3CpAPr0L2V4cG9ydC9oZGEzL2JvcmdsZXQvbG9jYWxfcmFtX2ZzX2RpcnMv\\nMC55dW5kaV9tdXBwZXRfMF8xMjI3MDgzMy4xMy55dW5kaS4xOTQ3MzE0MTc5NjEuOGY0ZjlmOThj\\nYjdhMzA1NS9idWlsZF90YXJnZXRfdHJhaW5fcGFyX2Q5NzU3NTM3MDE2YTJlYjgvdHJhaW4ucGFy\\nL2dvb2dsZTMvbGVhcm5pbmcvc21hcnRjaG9pY2VzL3Jlc2VhcmNoL2NsaWVudHMvY29tcGlsZXJf\\nb3B0L3BvbGljeV90cmFpbmluZy9mZWF0dXJlX29wcy5wedoPZGlzY2FyZF9mZWF0dXJlJwAAAHME\\nAAAAAAEIAQ==\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 110
+        local_name: "variables"
+      }
+      children {
+        node_id: 111
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 112
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 113
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 244
+        local_name: "__call__"
+      }
+      children {
+        node_id: 245
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_10\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_10\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [13.0, 38.0, 56.0, 70.0, 82.0, 94.0, 104.0, 114.0, 123.0, 131.0, 139.0, 148.0, 152.0, 153.0, 158.0, 163.0, 170.0, 174.0, 178.0, 180.0, 183.0, 186.0, 188.0, 190.0, 192.0, 196.0, 198.0, 201.0, 205.0, 208.0, 212.0, 215.0, 219.0, 221.0, 225.0, 227.0, 229.0, 232.0, 233.0, 236.0, 239.0, 242.0, 245.0, 248.0, 250.0, 252.0, 254.0, 256.0, 259.0, 261.0, 264.0, 267.0, 270.0, 272.0, 275.0, 278.0, 280.0, 283.0, 285.0, 287.0, 290.0, 293.0, 295.0, 297.0, 300.0, 303.0, 305.0, 308.0, 311.0, 313.0, 316.0, 319.0, 322.0, 325.0, 329.0, 331.0, 333.0, 336.0, 338.0, 340.0, 343.0, 345.0, 347.0, 347.0, 349.0, 351.0, 353.0, 355.0, 357.0, 359.0, 361.0, 363.0, 365.0, 368.0, 369.0, 371.0, 373.0, 375.0, 377.0, 380.0, 382.0, 385.0, 387.0, 389.0, 391.0, 394.0, 396.0, 398.0, 400.0, 403.0, 405.0, 408.0, 410.0, 412.0, 415.0, 417.0, 420.0, 422.0, 425.0, 427.0, 429.0, 432.0, 434.0, 437.0, 439.0, 442.0, 444.0, 446.0, 449.0, 451.0, 454.0, 456.0, 458.0, 461.0, 463.0, 466.0, 469.0, 472.0, 474.0, 476.0, 479.0, 482.0, 483.0, 486.0, 489.0, 492.0, 495.0, 498.0, 500.0, 503.0, 505.0, 508.0, 510.0, 513.0, 516.0, 519.0, 522.0, 524.0, 528.0, 530.0, 533.0, 536.0, 539.0, 541.0, 544.0, 547.0, 550.0, 553.0, 556.0, 559.0, 561.0, 563.0, 567.0, 570.0, 572.0, 575.0, 577.0, 580.0, 584.0, 586.0, 589.0, 592.0, 595.0, 598.0, 601.0, 605.0, 607.0, 611.0, 613.0, 617.0, 620.0, 623.0, 626.0, 629.0, 632.0, 635.0, 639.0, 642.0, 645.0, 648.0, 651.0, 654.0, 657.0, 660.0, 662.0, 666.0, 669.0, 672.0, 676.0, 679.0, 682.0, 685.0, 688.0, 690.0, 693.0, 696.0, 699.0, 702.0, 705.0, 709.0, 712.0, 714.0, 718.0, 721.0, 724.0, 726.0, 728.0, 729.0, 731.0, 734.0, 737.0, 741.0, 745.0, 748.0, 750.0, 753.0, 756.0, 760.0, 763.0, 766.0, 770.0, 773.0, 776.0, 779.0, 782.0, 786.0, 788.0, 793.0, 796.0, 798.0, 802.0, 805.0, 808.0, 811.0, 815.0, 818.0, 820.0, 824.0, 827.0, 829.0, 832.0, 835.0, 838.0, 842.0, 846.0, 849.0, 854.0, 857.0, 860.0, 864.0, 867.0, 871.0, 875.0, 879.0, 882.0, 887.0, 890.0, 893.0, 897.0, 901.0, 905.0, 908.0, 911.0, 915.0, 918.0, 921.0, 925.0, 929.0, 932.0, 934.0, 937.0, 940.0, 943.0, 946.0, 950.0, 953.0, 956.0, 961.0, 965.0, 969.0, 973.0, 976.0, 980.0, 982.0, 985.0, 990.0, 994.0, 997.0, 1001.0, 1005.0, 1007.0, 1010.0, 1014.0, 1018.0, 1022.0, 1025.0, 1028.0, 1033.0, 1035.0, 1038.0, 1042.0, 1047.0, 1052.0, 1056.0, 1060.0, 1063.0, 1067.0, 1071.0, 1075.0, 1079.0, 1083.0, 1086.0, 1088.0, 1092.0, 1097.0, 1102.0, 1106.0, 1109.0, 1113.0, 1117.0, 1120.0, 1125.0, 1129.0, 1134.0, 1137.0, 1142.0, 1146.0, 1150.0, 1151.0, 1155.0, 1159.0, 1162.0, 1166.0, 1170.0, 1174.0, 1177.0, 1181.0, 1185.0, 1188.0, 1193.0, 1196.0, 1203.0, 1207.0, 1212.0, 1214.0, 1217.0, 1220.0, 1222.0, 1222.0, 1226.0, 1229.0, 1233.0, 1237.0, 1241.0, 1246.0, 1250.0, 1253.0, 1257.0, 1262.0, 1267.0, 1272.0, 1278.0, 1283.0, 1287.0, 1293.0, 1297.0, 1301.0, 1304.0, 1309.0, 1315.0, 1320.0, 1325.0, 1329.0, 1333.0, 1336.0, 1341.0, 1344.0, 1348.0, 1351.0, 1357.0, 1363.0, 1368.0, 1374.0, 1379.0, 1383.0, 1386.0, 1391.0, 1395.0, 1399.0, 1403.0, 1407.0, 1410.0, 1415.0, 1418.0, 1423.0, 1428.0, 1432.0, 1436.0, 1438.0, 1442.0, 1446.0, 1450.0, 1454.0, 1462.0, 1467.0, 1472.0, 1477.0, 1483.0, 1488.0, 1492.0, 1496.0, 1503.0, 1508.0, 1513.0, 1518.0, 1520.0, 1526.0, 1531.0, 1534.0, 1538.0, 1542.0, 1546.0, 1552.0, 1558.0, 1564.0, 1568.0, 1573.0, 1578.0, 1581.0, 1590.0, 1596.0, 1601.0, 1606.0, 1611.0, 1616.0, 1622.0, 1629.0, 1634.0, 1640.0, 1647.0, 1651.0, 1657.0, 1660.0, 1665.0, 1672.0, 1678.0, 1686.0, 1692.0, 1698.0, 1704.0, 1709.0, 1714.0, 1719.0, 1724.0, 1730.0, 1737.0, 1744.0, 1751.0, 1755.0, 1761.0, 1764.0, 1772.0, 1778.0, 1784.0, 1789.0, 1799.0, 1804.0, 1811.0, 1819.0, 1825.0, 1830.0, 1838.0, 1849.0, 1858.0, 1862.0, 1868.0, 1872.0, 1878.0, 1885.0, 1888.0, 1892.0, 1897.0, 1902.0, 1907.0, 1919.0, 1926.0, 1932.0, 1936.0, 1941.0, 1946.0, 1952.0, 1960.0, 1968.0, 1977.0, 1985.0, 1992.0, 1997.0, 2006.0, 2012.0, 2018.0, 2026.0, 2034.0, 2044.0, 2050.0, 2057.0, 2064.0, 2069.0, 2075.0, 2082.0, 2091.0, 2098.0, 2107.0, 2122.0, 2126.0, 2135.0, 2146.0, 2149.0, 2157.0, 2163.0, 2172.0, 2178.0, 2184.0, 2191.0, 2198.0, 2208.0, 2216.0, 2223.0, 2235.0, 2242.0, 2252.0, 2263.0, 2272.0, 2277.0, 2288.0, 2296.0, 2306.0, 2311.0, 2318.0, 2323.0, 2334.0, 2341.0, 2356.0, 2366.0, 2373.0, 2379.0, 2386.0, 2407.0, 2416.0, 2423.0, 2432.0, 2438.0, 2448.0, 2453.0, 2464.0, 2473.0, 2473.0, 2481.0, 2492.0, 2504.0, 2511.0, 2523.0, 2529.0, 2537.0, 2545.0, 2556.0, 2566.0, 2575.0, 2584.0, 2592.0, 2602.0, 2613.0, 2624.0, 2636.0, 2643.0, 2647.0, 2652.0, 2664.0, 2675.0, 2688.0, 2693.0, 2702.0, 2709.0, 2722.0, 2739.0, 2754.0, 2766.0, 2776.0, 2786.0, 2799.0, 2810.0, 2832.0, 2840.0, 2849.0, 2860.0, 2873.0, 2889.0, 2908.0, 2914.0, 2926.0, 2939.0, 2950.0, 2961.0, 2969.0, 2978.0, 2990.0, 2999.0, 3023.0, 3032.0, 3049.0, 3066.0, 3085.0, 3101.0, 3107.0, 3117.0, 3129.0, 3144.0, 3167.0, 3190.0, 3212.0, 3229.0, 3238.0, 3264.0, 3293.0, 3302.0, 3309.0, 3314.0, 3323.0, 3344.0, 3352.0, 3362.0, 3390.0, 3400.0, 3411.0, 3435.0, 3456.0, 3470.0, 3485.0, 3498.0, 3505.0, 3519.0, 3539.0, 3545.0, 3545.0, 3560.0, 3576.0, 3597.0, 3607.0, 3621.0, 3641.0, 3665.0, 3679.0, 3701.0, 3714.0, 3733.0, 3741.0, 3745.0, 3757.0, 3773.0, 3787.0, 3795.0, 3805.0, 3822.0, 3835.0, 3844.0, 3861.0, 3872.0, 3878.0, 3897.0, 3919.0, 3941.0, 3971.0, 4004.0, 4014.0, 4019.0, 4061.0, 4068.0, 4089.0, 4108.0, 4117.0, 4125.0, 4146.0, 4165.0, 4194.0, 4204.0, 4224.0, 4236.0, 4263.0, 4290.0, 4301.0, 4319.0, 4326.0, 4347.0, 4369.0, 4386.0, 4413.0, 4435.0, 4451.0, 4451.0, 4451.0, 4476.0, 4500.0, 4539.0, 4579.0, 4592.0, 4600.0, 4622.0, 4650.0, 4683.0, 4714.0, 4742.0, 4755.0, 4771.0, 4788.0, 4816.0, 4828.0, 4831.0, 4831.0, 4831.0, 4843.0, 4852.0, 4865.0, 4896.0, 4915.0, 4931.0, 4952.0, 4965.0, 4983.0, 5007.0, 5043.0, 5061.0, 5081.0, 5095.0, 5122.0, 5143.0, 5171.0, 5204.0, 5226.0, 5233.0, 5250.0, 5281.0, 5320.0, 5323.0, 5328.0, 5345.0, 5374.0, 5413.0, 5466.0, 5492.0, 5524.0, 5555.0, 5567.0, 5610.0, 5676.0, 5701.0, 5716.0, 5744.0, 5768.0, 5795.0, 5818.0, 5854.0, 5906.0, 5934.0, 5960.0, 5975.0, 5993.0, 6025.0, 6034.0, 6051.0, 6082.0, 6106.0, 6125.0, 6159.0, 6187.0, 6242.0, 6287.0, 6311.0, 6332.0, 6348.0, 6358.0, 6368.0, 6377.0, 6402.0, 6407.0, 6428.0, 6450.0, 6475.0, 6498.0, 6505.0, 6533.0, 6565.0, 6580.0, 6595.0, 6611.0, 6654.0, 6658.0, 6705.0, 6751.0, 6786.0, 6828.0, 6876.0, 6896.0, 6948.0, 6964.0, 7065.0, 7082.0, 7118.0, 7184.0, 7214.0, 7271.0, 7310.0, 7357.0, 7405.0, 7506.0, 7613.0, 7641.0, 7675.0, 7720.0, 7781.0, 7833.0, 7860.0, 7898.0, 7929.0, 8044.0, 8104.0, 8148.0, 8236.0, 8273.0, 8313.0, 8349.0, 8381.0, 8409.0, 8498.0, 8507.0, 8524.0, 8570.0, 8607.0, 8630.0, 8637.0, 8675.0, 8700.0, 8714.0, 8734.0, 8776.0, 8836.0, 8854.0, 8867.0, 8868.0, 9065.0, 9113.0, 9121.0, 9241.0, 9357.0, 9360.0, 9585.0, 9613.0, 9684.0, 9727.0, 9751.0, 9777.0, 9802.0, 9889.0, 9903.0, 9914.0, 9978.0, 10061.0, 10192.0, 10213.0, 10345.0, 10369.0, 10404.0, 10430.0, 10471.0, 10481.0, 10489.0, 10492.0, 10494.0, 10524.0, 10554.0, 10557.0, 10560.0, 10562.0, 10641.0, 10716.0, 10842.0, 10897.0, 10967.0, 11053.0, 11128.0, 11137.0, 11328.0, 11336.0, 11401.0, 11532.0, 11573.0, 11860.0, 11880.0, 12013.0, 12305.0, 12358.0, 12386.0, 12404.0, 12456.0, 12456.0, 12476.0, 12615.0, 12677.0, 12981.0, 13094.0, 13197.0, 13708.0, 13717.0, 13788.0, 14049.0, 14112.0, 14224.0, 14257.0, 14681.0, 14901.0, 15006.0, 15071.0, 15100.0, 15248.0, 15669.0, 15877.0, 15953.0, 15953.0, 16066.0, 16072.0, 16271.0, 16292.0, 16386.0, 16490.0, 16633.0, 16670.0, 16834.0, 16896.0, 17543.0, 17693.0, 17800.0, 17859.0, 18397.0, 18811.0, 18826.0, 18971.0, 19304.0, 19319.0, 19695.0, 20378.0, 20865.0, 21313.0, 21330.0, 22321.0, 22760.0, 22770.0, 23783.0, 23785.0, 24525.0, 24844.0, 24848.0, 24964.0, 24966.0, 27468.0, 27478.0, 27555.0, 27555.0, 28215.0, 28219.0, 28336.0, 28490.0, 30213.0, 30228.0, 30242.0, 34116.0, 43518.0, 43518.0, 43518.0, 43852.0, 43852.0, 43852.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 114
+        local_name: "variables"
+      }
+      children {
+        node_id: 115
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 116
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 117
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 246
+        local_name: "__call__"
+      }
+      children {
+        node_id: 247
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Lambda\", \"name\": \"lambda_11\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"lambda_11\", \"trainable\": true, \"dtype\": \"float32\", \"function\": {\"class_name\": \"__tuple__\", \"items\": [\"4wEAAAAAAAAAAwAAAAUAAAATAAAAc0QAAACIAHwAgwF9AXQAagF0AmoDfAGIAYMCdABqBIMCdAWI\\nAYMBGwB9AnQAagZ8AnQAagd8AoMBfAJ8AhQAZwNkA2QCjQJTACkETukBAAAAKQHaBGF4aXPp////\\n/ykI2gJ0ZtoEY2FzdNoOY29udHJpYl9sYXllcnPaCWJ1Y2tldGl6ZdoHZmxvYXQzMtoDbGVu2gZj\\nb25jYXTaBHNxcnQpA9oDb2Jz2gxleHBhbmRlZF9vYnPaAXgpAtoOZXhwYW5kX2RpbXNfb3DaCHF1\\nYW50aWxlqQD69C9leHBvcnQvaGRhMy9ib3JnbGV0L2xvY2FsX3JhbV9mc19kaXJzLzAueXVuZGlf\\nbXVwcGV0XzBfMTIyNzA4MzMuMTMueXVuZGkuMTk0NzMxNDE3OTYxLjhmNGY5Zjk4Y2I3YTMwNTUv\\nYnVpbGRfdGFyZ2V0X3RyYWluX3Bhcl9kOTc1NzUzNzAxNmEyZWI4L3RyYWluLnBhci9nb29nbGUz\\nL2xlYXJuaW5nL3NtYXJ0Y2hvaWNlcy9yZXNlYXJjaC9jbGllbnRzL2NvbXBpbGVyX29wdC9wb2xp\\nY3lfdHJhaW5pbmcvZmVhdHVyZV9vcHMucHnaDW5vcm1hbGl6YXRpb24wAAAAcwoAAAAAAQgBBAEK\\nARAB\\n\", null, {\"class_name\": \"__tuple__\", \"items\": [{\"class_name\": \"ExpandDims\", \"config\": {\"name\": \"expand_dims\", \"trainable\": true, \"dtype\": \"float32\", \"axis\": -1}}, [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 2.0, 3.0, 4.0]]}]}, \"function_type\": \"lambda\", \"module\": \"google3.learning.smartchoices.research.clients.compiler_opt.policy_training.feature_ops\", \"output_shape\": null, \"output_shape_type\": \"raw\", \"output_shape_module\": null, \"arguments\": {}}}"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 118
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 53
+        local_name: "variables"
+      }
+      children {
+        node_id: 119
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 120
+        local_name: "metrics"
+      }
+      children {
+        node_id: 121
+        local_name: "layers"
+      }
+      children {
+        node_id: 54
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 122
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 55
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 222
+        local_name: "__call__"
+      }
+      children {
+        node_id: 223
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 223
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 123
+        local_name: "variables"
+      }
+      children {
+        node_id: 124
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 125
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 126
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 248
+        local_name: "__call__"
+      }
+      children {
+        node_id: 249
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Flatten\", \"name\": \"flatten\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"flatten\", \"trainable\": true, \"dtype\": \"float32\", \"data_format\": \"channels_last\"}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 1, \"axes\": {}}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 10
+        local_name: "kernel"
+      }
+      children {
+        node_id: 11
+        local_name: "bias"
+      }
+      children {
+        node_id: 127
+        local_name: "variables"
+      }
+      children {
+        node_id: 128
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 129
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 130
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 250
+        local_name: "__call__"
+      }
+      children {
+        node_id: 251
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Dense\", \"name\": \"dense\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"VarianceScaling\", \"config\": {\"scale\": 2.0, \"mode\": \"fan_in\", \"distribution\": \"truncated_normal\", \"seed\": null, \"dtype\": \"float32\"}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 34}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [0, 34]}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 12
+        local_name: "kernel"
+      }
+      children {
+        node_id: 13
+        local_name: "bias"
+      }
+      children {
+        node_id: 131
+        local_name: "variables"
+      }
+      children {
+        node_id: 132
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 133
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 134
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 252
+        local_name: "__call__"
+      }
+      children {
+        node_id: 253
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Dense\", \"name\": \"dense_1\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 40, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"VarianceScaling\", \"config\": {\"scale\": 2.0, \"mode\": \"fan_in\", \"distribution\": \"truncated_normal\", \"seed\": null, \"dtype\": \"float32\"}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 100}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [0, 100]}}"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 41
+        local_name: "0"
+      }
+      children {
+        node_id: 42
+        local_name: "1"
+      }
+      children {
+        node_id: 43
+        local_name: "2"
+      }
+      children {
+        node_id: 44
+        local_name: "3"
+      }
+      children {
+        node_id: 45
+        local_name: "4"
+      }
+      children {
+        node_id: 46
+        local_name: "5"
+      }
+      children {
+        node_id: 47
+        local_name: "6"
+      }
+      children {
+        node_id: 48
+        local_name: "7"
+      }
+      children {
+        node_id: 49
+        local_name: "8"
+      }
+      children {
+        node_id: 50
+        local_name: "9"
+      }
+      children {
+        node_id: 51
+        local_name: "10"
+      }
+      children {
+        node_id: 52
+        local_name: "11"
+      }
+      children {
+        node_id: 26
+        local_name: "12"
+      }
+      children {
+        node_id: 57
+        local_name: "13"
+      }
+      children {
+        node_id: 58
+        local_name: "14"
+      }
+      children {
+        node_id: 59
+        local_name: "15"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 135
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 70
+        local_name: "variables"
+      }
+      children {
+        node_id: 136
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 137
+        local_name: "metrics"
+      }
+      children {
+        node_id: 138
+        local_name: "layers"
+      }
+      children {
+        node_id: 71
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 139
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 72
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 224
+        local_name: "__call__"
+      }
+      children {
+        node_id: 225
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 225
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 140
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 74
+        local_name: "variables"
+      }
+      children {
+        node_id: 141
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 142
+        local_name: "metrics"
+      }
+      children {
+        node_id: 143
+        local_name: "layers"
+      }
+      children {
+        node_id: 75
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 144
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 76
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 226
+        local_name: "__call__"
+      }
+      children {
+        node_id: 227
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 227
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 145
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 78
+        local_name: "variables"
+      }
+      children {
+        node_id: 146
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 147
+        local_name: "metrics"
+      }
+      children {
+        node_id: 148
+        local_name: "layers"
+      }
+      children {
+        node_id: 79
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 149
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 80
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 228
+        local_name: "__call__"
+      }
+      children {
+        node_id: 229
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 229
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 150
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 82
+        local_name: "variables"
+      }
+      children {
+        node_id: 151
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 152
+        local_name: "metrics"
+      }
+      children {
+        node_id: 153
+        local_name: "layers"
+      }
+      children {
+        node_id: 83
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 154
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 84
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 230
+        local_name: "__call__"
+      }
+      children {
+        node_id: 231
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 231
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 155
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 86
+        local_name: "variables"
+      }
+      children {
+        node_id: 156
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 157
+        local_name: "metrics"
+      }
+      children {
+        node_id: 158
+        local_name: "layers"
+      }
+      children {
+        node_id: 87
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 159
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 88
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 232
+        local_name: "__call__"
+      }
+      children {
+        node_id: 233
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 233
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 160
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 90
+        local_name: "variables"
+      }
+      children {
+        node_id: 161
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 162
+        local_name: "metrics"
+      }
+      children {
+        node_id: 163
+        local_name: "layers"
+      }
+      children {
+        node_id: 91
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 164
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 92
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 234
+        local_name: "__call__"
+      }
+      children {
+        node_id: 235
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 235
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 165
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 94
+        local_name: "variables"
+      }
+      children {
+        node_id: 166
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 167
+        local_name: "metrics"
+      }
+      children {
+        node_id: 168
+        local_name: "layers"
+      }
+      children {
+        node_id: 95
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 169
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 96
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 236
+        local_name: "__call__"
+      }
+      children {
+        node_id: 237
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 237
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 170
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 98
+        local_name: "variables"
+      }
+      children {
+        node_id: 171
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 172
+        local_name: "metrics"
+      }
+      children {
+        node_id: 173
+        local_name: "layers"
+      }
+      children {
+        node_id: 99
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 174
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 100
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 238
+        local_name: "__call__"
+      }
+      children {
+        node_id: 239
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 239
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 175
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 102
+        local_name: "variables"
+      }
+      children {
+        node_id: 176
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 177
+        local_name: "metrics"
+      }
+      children {
+        node_id: 178
+        local_name: "layers"
+      }
+      children {
+        node_id: 103
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 179
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 104
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 240
+        local_name: "__call__"
+      }
+      children {
+        node_id: 241
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 241
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 180
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 106
+        local_name: "variables"
+      }
+      children {
+        node_id: 181
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 182
+        local_name: "metrics"
+      }
+      children {
+        node_id: 183
+        local_name: "layers"
+      }
+      children {
+        node_id: 107
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 184
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 108
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 242
+        local_name: "__call__"
+      }
+      children {
+        node_id: 243
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 243
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 185
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 110
+        local_name: "variables"
+      }
+      children {
+        node_id: 186
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 187
+        local_name: "metrics"
+      }
+      children {
+        node_id: 188
+        local_name: "layers"
+      }
+      children {
+        node_id: 111
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 189
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 112
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 244
+        local_name: "__call__"
+      }
+      children {
+        node_id: 245
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 245
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 190
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 114
+        local_name: "variables"
+      }
+      children {
+        node_id: 191
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 192
+        local_name: "metrics"
+      }
+      children {
+        node_id: 193
+        local_name: "layers"
+      }
+      children {
+        node_id: 115
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 194
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 116
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 246
+        local_name: "__call__"
+      }
+      children {
+        node_id: 247
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 247
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 195
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 123
+        local_name: "variables"
+      }
+      children {
+        node_id: 196
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 197
+        local_name: "metrics"
+      }
+      children {
+        node_id: 198
+        local_name: "layers"
+      }
+      children {
+        node_id: 124
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 199
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 125
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 248
+        local_name: "__call__"
+      }
+      children {
+        node_id: 249
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 249
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 10
+        local_name: "0"
+      }
+      children {
+        node_id: 11
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 10
+        local_name: "0"
+      }
+      children {
+        node_id: 11
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 200
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 127
+        local_name: "variables"
+      }
+      children {
+        node_id: 201
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 202
+        local_name: "metrics"
+      }
+      children {
+        node_id: 203
+        local_name: "layers"
+      }
+      children {
+        node_id: 128
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 204
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 129
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 250
+        local_name: "__call__"
+      }
+      children {
+        node_id: 251
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 251
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 12
+        local_name: "0"
+      }
+      children {
+        node_id: 13
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 12
+        local_name: "0"
+      }
+      children {
+        node_id: 13
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 205
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 131
+        local_name: "variables"
+      }
+      children {
+        node_id: 206
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 207
+        local_name: "metrics"
+      }
+      children {
+        node_id: 208
+        local_name: "layers"
+      }
+      children {
+        node_id: 132
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 209
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 133
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 252
+        local_name: "__call__"
+      }
+      children {
+        node_id: 253
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 253
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_polymorphic_action_fn_4619080"
+        concrete_functions: "__inference_polymorphic_action_fn_946"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "time_step"
+                    }
+                    values {
+                      string_value: "policy_state"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  tuple_value {
+                    values {
+                      tuple_value {
+                      }
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_function_722"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      bare_concrete_function {
+        concrete_function_name: "__inference_<lambda>_728"
+      }
+    }
+    nodes {
+      bare_concrete_function {
+        concrete_function_name: "__inference_signature_wrapper_4619026"
+        argument_keywords: "callee_basic_block_count"
+        argument_keywords: "callee_conditionally_executed_blocks"
+        argument_keywords: "callee_users"
+        argument_keywords: "caller_basic_block_count"
+        argument_keywords: "caller_conditionally_executed_blocks"
+        argument_keywords: "caller_users"
+        argument_keywords: "callsite_height"
+        argument_keywords: "cost_estimate"
+        argument_keywords: "discount"
+        argument_keywords: "edge_count"
+        argument_keywords: "inlining_default"
+        argument_keywords: "node_count"
+        argument_keywords: "nr_ctant_params"
+        argument_keywords: "reward"
+        argument_keywords: "step_type"
+      }
+    }
+    nodes {
+      bare_concrete_function {
+        concrete_function_name: "__inference_signature_wrapper_4619033"
+      }
+    }
+    nodes {
+      bare_concrete_function {
+        concrete_function_name: "__inference_signature_wrapper_4619048"
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "observation"
+                    }
+                    values {
+                      string_value: "step_type"
+                    }
+                    values {
+                      string_value: "network_state"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      tuple_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "observation"
+                    }
+                    values {
+                      string_value: "step_type"
+                    }
+                    values {
+                      string_value: "network_state"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      tuple_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "observation"
+                    }
+                    values {
+                      string_value: "step_type"
+                    }
+                    values {
+                      string_value: "network_state"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      tuple_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "observation"
+                    }
+                    values {
+                      string_value: "step_type"
+                    }
+                    values {
+                      string_value: "network_state"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      tuple_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      none_value {
+                      }
+                    }
+                    values {
+                      bool_value: false
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_<lambda>_728"
+      value {
+        bound_inputs: 4
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+            }
+            dtype: DT_INT64
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_function_722"
+      value {
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_polymorphic_action_fn_4619080"
+      value {
+        bound_inputs: 10
+        bound_inputs: 11
+        bound_inputs: 12
+        bound_inputs: 13
+        bound_inputs: 14
+        bound_inputs: 15
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  named_tuple_value {
+                    name: "TimeStep"
+                    values {
+                      key: "step_type"
+                      value {
+                        tensor_spec_value {
+                          name: "time_step/step_type"
+                          shape {
+                            dim {
+                              size: 1
+                            }
+                          }
+                          dtype: DT_INT32
+                        }
+                      }
+                    }
+                    values {
+                      key: "reward"
+                      value {
+                        tensor_spec_value {
+                          name: "time_step/reward"
+                          shape {
+                            dim {
+                              size: 1
+                            }
+                          }
+                          dtype: DT_FLOAT
+                        }
+                      }
+                    }
+                    values {
+                      key: "discount"
+                      value {
+                        tensor_spec_value {
+                          name: "time_step/discount"
+                          shape {
+                            dim {
+                              size: 1
+                            }
+                          }
+                          dtype: DT_FLOAT
+                        }
+                      }
+                    }
+                    values {
+                      key: "observation"
+                      value {
+                        dict_value {
+                          fields {
+                            key: "callee_basic_block_count"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/callee_basic_block_count"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "callee_conditionally_executed_blocks"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/callee_conditionally_executed_blocks"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "callee_users"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/callee_users"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "caller_basic_block_count"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/caller_basic_block_count"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "caller_conditionally_executed_blocks"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/caller_conditionally_executed_blocks"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "caller_users"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/caller_users"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "callsite_height"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/callsite_height"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "cost_estimate"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/cost_estimate"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "edge_count"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/edge_count"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "inlining_default"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/inlining_default"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "node_count"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/node_count"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "nr_ctant_params"
+                            value {
+                              tensor_spec_value {
+                                name: "time_step/observation/nr_ctant_params"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+                values {
+                  tuple_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          named_tuple_value {
+            name: "PolicyStep"
+            values {
+              key: "action"
+              value {
+                tensor_spec_value {
+                  name: "action"
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                  dtype: DT_INT64
+                }
+              }
+            }
+            values {
+              key: "state"
+              value {
+                tuple_value {
+                }
+              }
+            }
+            values {
+              key: "info"
+              value {
+                tuple_value {
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_polymorphic_action_fn_946"
+      value {
+        bound_inputs: 10
+        bound_inputs: 11
+        bound_inputs: 12
+        bound_inputs: 13
+        bound_inputs: 14
+        bound_inputs: 15
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  named_tuple_value {
+                    name: "TimeStep"
+                    values {
+                      key: "step_type"
+                      value {
+                        tensor_spec_value {
+                          name: "step_type"
+                          shape {
+                            dim {
+                              size: 1
+                            }
+                          }
+                          dtype: DT_INT32
+                        }
+                      }
+                    }
+                    values {
+                      key: "reward"
+                      value {
+                        tensor_spec_value {
+                          name: "reward"
+                          shape {
+                            dim {
+                              size: 1
+                            }
+                          }
+                          dtype: DT_FLOAT
+                        }
+                      }
+                    }
+                    values {
+                      key: "discount"
+                      value {
+                        tensor_spec_value {
+                          name: "discount"
+                          shape {
+                            dim {
+                              size: 1
+                            }
+                          }
+                          dtype: DT_FLOAT
+                        }
+                      }
+                    }
+                    values {
+                      key: "observation"
+                      value {
+                        dict_value {
+                          fields {
+                            key: "callee_basic_block_count"
+                            value {
+                              tensor_spec_value {
+                                name: "callee_basic_block_count"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "callee_conditionally_executed_blocks"
+                            value {
+                              tensor_spec_value {
+                                name: "callee_conditionally_executed_blocks"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "callee_users"
+                            value {
+                              tensor_spec_value {
+                                name: "callee_users"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "caller_basic_block_count"
+                            value {
+                              tensor_spec_value {
+                                name: "caller_basic_block_count"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "caller_conditionally_executed_blocks"
+                            value {
+                              tensor_spec_value {
+                                name: "caller_conditionally_executed_blocks"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "caller_users"
+                            value {
+                              tensor_spec_value {
+                                name: "caller_users"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "callsite_height"
+                            value {
+                              tensor_spec_value {
+                                name: "callsite_height"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "cost_estimate"
+                            value {
+                              tensor_spec_value {
+                                name: "cost_estimate"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "edge_count"
+                            value {
+                              tensor_spec_value {
+                                name: "edge_count"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "inlining_default"
+                            value {
+                              tensor_spec_value {
+                                name: "inlining_default"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "node_count"
+                            value {
+                              tensor_spec_value {
+                                name: "node_count"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                          fields {
+                            key: "nr_ctant_params"
+                            value {
+                              tensor_spec_value {
+                                name: "nr_ctant_params"
+                                shape {
+                                  dim {
+                                    size: 1
+                                  }
+                                }
+                                dtype: DT_INT64
+                              }
+                            }
+                          }
+                        }
+                      }
+                    }
+                  }
+                }
+                values {
+                  tuple_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          named_tuple_value {
+            name: "PolicyStep"
+            values {
+              key: "action"
+              value {
+                tensor_spec_value {
+                  name: "action"
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                  dtype: DT_INT64
+                }
+              }
+            }
+            values {
+              key: "state"
+              value {
+                tuple_value {
+                }
+              }
+            }
+            values {
+              key: "info"
+              value {
+                tuple_value {
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_signature_wrapper_4619026"
+      value {
+        bound_inputs: 10
+        bound_inputs: 11
+        bound_inputs: 12
+        bound_inputs: 13
+        bound_inputs: 14
+        bound_inputs: 15
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+              }
+            }
+            values {
+              dict_value {
+                fields {
+                  key: "callee_basic_block_count"
+                  value {
+                    tensor_spec_value {
+                      name: "callee_basic_block_count"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "callee_conditionally_executed_blocks"
+                  value {
+                    tensor_spec_value {
+                      name: "callee_conditionally_executed_blocks"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "callee_users"
+                  value {
+                    tensor_spec_value {
+                      name: "callee_users"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "caller_basic_block_count"
+                  value {
+                    tensor_spec_value {
+                      name: "caller_basic_block_count"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "caller_conditionally_executed_blocks"
+                  value {
+                    tensor_spec_value {
+                      name: "caller_conditionally_executed_blocks"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "caller_users"
+                  value {
+                    tensor_spec_value {
+                      name: "caller_users"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "callsite_height"
+                  value {
+                    tensor_spec_value {
+                      name: "callsite_height"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "cost_estimate"
+                  value {
+                    tensor_spec_value {
+                      name: "cost_estimate"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "discount"
+                  value {
+                    tensor_spec_value {
+                      name: "discount"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_FLOAT
+                    }
+                  }
+                }
+                fields {
+                  key: "edge_count"
+                  value {
+                    tensor_spec_value {
+                      name: "edge_count"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "inlining_default"
+                  value {
+                    tensor_spec_value {
+                      name: "inlining_default"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "node_count"
+                  value {
+                    tensor_spec_value {
+                      name: "node_count"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "nr_ctant_params"
+                  value {
+                    tensor_spec_value {
+                      name: "nr_ctant_params"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT64
+                    }
+                  }
+                }
+                fields {
+                  key: "reward"
+                  value {
+                    tensor_spec_value {
+                      name: "reward"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_FLOAT
+                    }
+                  }
+                }
+                fields {
+                  key: "step_type"
+                  value {
+                    tensor_spec_value {
+                      name: "step_type"
+                      shape {
+                        dim {
+                          size: 1
+                        }
+                      }
+                      dtype: DT_INT32
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        output_signature {
+          dict_value {
+            fields {
+              key: "inlining_decision"
+              value {
+                tensor_spec_value {
+                  name: "inlining_decision"
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                  dtype: DT_INT64
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_signature_wrapper_4619033"
+      value {
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          dict_value {
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_signature_wrapper_4619048"
+      value {
+        bound_inputs: 4
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          dict_value {
+            fields {
+              key: "int64"
+              value {
+                tensor_spec_value {
+                  name: "int64"
+                  shape {
+                  }
+                  dtype: DT_INT64
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
diff --git a/llvm/lib/Analysis/models/inliner/variables/variables.data-00001-of-00002 b/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00001
similarity index 77%
rename from llvm/lib/Analysis/models/inliner/variables/variables.data-00001-of-00002
rename to llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00001
index 1f1f1b151a71ff4d41aa8b5c1c1107364654a2e3..ee7d7060867e7a36b36ae8d6af62937c0f4cb169 100644
GIT binary patch
delta 8691
zcmb`NcYGYh700y#$<OPPj7PR*OAA_}H7x}BWSbI7kc|Qc(~N9G+HyLt#p2W5aeI<5
zZEzrkmVl`tBp4D(LNx)>3nU>4q|$p!g(Rd<^G`m!*}c1+*`3)vec<EIN25FK?(coy
zyf^dSthc_VJ+Q6yPIHQGU2)U@@7bzft?6B$k=b_D9d~T6>XdD-?Bw;i&`Nf&tCifz
zPQF^IYzn^Zg6;6qsY<R~cGlbFQo+q{(mP>$qT`ir(Z3&`*G6WKm)1K4doovXbE5^v
z!;9L9?zv;dTy>&?pJ_kLAbqyy6gJpiZqi{_*`-mYW^c$bNf)o0p|=4LZAtx?tbQUs
zX(rL_Q|zKsohnr}s?66V=J&&_x+YbN4wDsn=h~swD~Fb^8n)LCty(k8^YttWSqJtS
z8W~x=a`~E(;o}CxTl789_6*S$<Iq90EDoJS8^mEY1EOuP1JRb^$2mk>rnmV2wrTo&
zAcWX%u{=?=t4`4?RRYgvpo@U*6!YlUicVlI=|mq)<_Z&zUC3?1?>b-}nHf5KA<QT9
zD$dwM0q5YfT-7ZV?Lx`((5v0hO=y=-;k!M&sRI^}nH!x-&a-o6SMP^~b$_YdRhy$M
z5{wZi=D9`39u-W~z%Vo3Espu0X=UTUdgOBkP_F0{diIP&%)sI#=Q<=GQ|gy1HH1dp
zoY#<{^~fB^Ch53jET*$-vW}fqA`OO8G#)V1@Lrt>wFIgL)RIssE_4D}r_*SJcQ>f~
zX|V^D6ibtyy^gGjIc#3+(W5Tq=AvQsvZN=s=$l(HJ$j_+(G9(%%Pv=(a;21aJP*SR
z7YMJa?*s<vu{RWQRq?Mh$Qn=Yg&j$^_%}OW8Xt9wm{UEl6Pb_eoLA+Z>!+KC`PL`q
zo2kyXok=v^sJ|+9zz4*9>rc<Oi!@!wwWB@A8y#=~*`5Uf8f1wEEm9k_3yoP+GiX=Q
zpxx39N?z!q>s;+>s#q7r%OBSCIj}qF@Lg_?JJpJtXQ0AL*9Uu$-rx%PdN&I`NM_V;
z)?rW5UdT~gGCe(Ek?e&Jk?tZcnzD70t)S5v_5Ifmd!;*!E%vlUUyF<XK(Ik94mOBv
zut5y64WbQ}1pC8Mwm-DNGNSE`!`?*O2Zw!#wl5C*5^X=d9rh!w*~K`17{{zWg$~+Z
zjD!PXkw6=J!GUs;4me1RgpZ_y%|nw84v2;VqUDUpz^|h_Aq}^}7U>1y-~Aw15f1_(
z{rcdLv>+JfgMfaNML{hHl$qNLhb9F<JgsX%a9FCZD8@i1`UQ&smI+t{unjc&jgP)E
zjxHp69~_>Begu!cijRNg8qo_!CZX>lkD7isDis|${rC@Xy8;Ky4II$oNAJ;M^d1w7
z-bU~qD>tJ9R*S)VT+}KWFE&};jEKr>)XHmB%Iies<D<%fFy+f!rhI~^{4uri$5qOo
z5S2eER}O?}P+&9VPl?L5TKPnka!ynpO;QeoDO1ji%Inq2j!Johs63Xg%!E%OR4A2Q
zwera-<&C0pp*iL88gj7zFNC66xujAqi^``or(6-7O~F$uS5?XrqVi;O%2R?eoXUi$
zyfN9NQa(*oK0RIeRGjBCMCCI>WEyRW&Qd8;UfF`PwRGt-uu0vl@!CD`X)P2-ZOZjy
z3*a10uHFgf;$y;w%!1D}DgW7s{O3e@mxk@&0{FaA{=A6%`Ax_Nz3h`3cBvO=@x3c3
zO=#_0z=R96<lQR+7ir=4H4DC=aXWXsu&Heg27FOV+P|ox?6KhDrV!Z_UlQ^0mWYpe
z7hf82F%1t!E{pj1@+J(}!kT~#*93Ibnt)EYB4X8*QmZIC2v`7DMa&AV`clNItDCi|
zle21e!>TVwtfJRQ%?hmgiqfjDMy$HF39Ev5oWv@NtF|eAe1FRAc5EP9vx8WEAB^J?
z9EWgRisNz|>qk3*5#dkj=f_q_eh9^n3D>2j$Mr&be2q^JW#wd0_<CY~C<-eJZfM%$
zExONmql9rp*nds9DHZZ&0rDF>q_Rje;G0TD%8tPlHJk;vOc(N238YZbnQ)uHNaM$H
z8Ms}5yn}~S)`JGzsf1*VifWNsux+}KcS#_H;?aa}36L_zy8}jC|FDyv1^4if%F@$-
zdlMlQg{cMKRxwiUl)0Xjez<-|0x8s~CVV#)@_WKWexGNo6}gi6H{b_}kct}Ef*-0N
zRg6E<2#uYooW$2FCj2-R@+Si1eLSSHo;KkAfRQTBRTSm61wWlG<j*7%S+BlF3lz6M
z6Mmiw`G5fV3m#Hgx*PDz#EE2uJ!S7pBB$>b{A#+84@w|~Lf?dkq>Nnsmw{gkkiX#}
zm0f}XzfFWx^bi*O&S#{_FMfoS{JjKH=qXJ2gA_6>z%uZ#!1xH|A^na-){q$RXd<Md
zU9sS?w25T#CqO<fffU*o6P`$gd{TgXif80{AJ+PC^KZb@iI9rU$bx6mAUVcoSUd1+
z$Vh8vxzR?+gy&KzpBE@!2q|M-7T07M@FGu1Be|#U!7O-bx|}bEoDEE|Au}obF_rUA
z0_UH3&S*OqM!Eri;W=ZNwO)@(R-eC4gR>sHZ1m^$ZxUv)K{DZ$5SdEfOlII!f%)$|
zv%0-9;2(+1(dG-goZ$S$f`2x}%+6)`6YSvVlK;l{!D|v9i0zmO{|bCSor*1gTakf(
z3qJS{?*n-=$GRp1{+sB7NT=6$|EXK>dec6@H`wg78sA{oPpkQFd>_0a@qyU<neZmo
yABk^lGVqq*gSUAfs5?gk-bwTU*H=>f?*`#}Zwvm{ln?l*e{a9L@9p+!-2E=H(N1pw

delta 9
QcmX@Mk!is@#tjPP02jstPyhe`

diff --git a/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00002 b/llvm/lib/Analysis/models/inliner/variables/variables.data-00000-of-00002
deleted file mode 100644
index 58ebd0fc987114a4f1b631cb46adbd9641467207..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 7051
zcmd6s`FGS*6vqkDa(Qj1@JcDnL>fgz5oxC_yIaAkD58|Mh+CXaLkXQ`h9oT(7u;~g
zec#1>!v(>8-}mMCgTMI4cr&5&y-e=Q!E-#v({tvvnauZo?pyBtPP%*9p9}W>#YR{I
zXuRI^?P1IGe5+)7B`eQn!el<(^xfPxEAJQGvG8gG9EG>;aC4=SHDH#C1v@{+QZR)x
zywi5dpGUnaUO!wMunOjA&b4#>1<S*msyXxQK_};rxcK*MRu5BoO|jpz+|itG7adlG
z7qoWroh)00D!`e!-E=H}N6{Uk(5J<qckwkW)z{g*wsZBmF0-$5U2m62ot{6DLCfOK
zo}TWtt9yI8HnzwgVG9wHajg}H8m?t=sO4H44s`*06{NV<fkQplmas|qvqxjIU^?ff
z?UY7*)3+S2=!TP73o|&FmXpV+av2BCQ<&Olt}tSmh1?iktAhqUZDJ1Rz)argT7x46
zoYtM?Niz#Y&%^0#f=1p@&LEg>VczRt7N0g`xjE0wm2Ac24sxqve&%mjA5L^jS3qey
zgX4b>U>;Tr>N-{-y(qHSEv>2<w0OQE(7c2|{dUf)43kxbvH4&{m<8%EW|qvTO%>{2
zOdW=b&#7G%XfxkbkyF#*oXTrrwvjn>sH8XX-U{i)3h8*}B`V=b?C;<RG{qi)CYa3|
z%#v%B++yDHJltV;wmqNCg*iNJZYkva$h+l(;js*)c~j)Ad~vwncJSDwA;V{3b@F^c
z_Rt6GK$nly(e#m;OC70Z@kniD3{QP^corDo(Vr4b6CPeej&uwi=~yb#JTcOd)t)1{
zd!NQ;z;V2;JW1xT<-2x1$UrJT%`l&5!aL%NPz@Z<r;dM^!2(`=XwnshD+4F+Ca35G
zt;TjD#fz+JL1pd3GJ9fJi5G^I_@uBBFA6L1$zdg498}^eXyMvY99p@y42LY&mgCUI
zwH2%y+WF*cD~=sFW?47py+mHQrS!@zqgHOYxN?mcY(>brGGtv9u<Cf3ZYGlw-Y+lO
zDV2*RZSZC|HEGdK6Bq4tVbRE=k%2Q*i$<KA6lRO<F=(CG9<#WF(JSSQIx91h4pK9$
zPReMFn9-R+M&uF9K$j{b;(P|%Fw9~`m=_Q83bxQ$@)kOq-a_Y4TWGDgg#yiUWzF+w
z&2^OK`BBaFvgQS}=7p5zhNxz@thtfa?4dM!qndrP<|bP6B1&^}RP$n4^AcL~QcCl(
zsOIIerb%mFL22fqn*Fk7p4J?oG_9!S7FlzU*4#>I+ELAIvgQ!2S)eqBqneJaS)?^f
zl;-xRrYmcDw5CsKj)<CJ9Vl0?(aP#2HIru8kyO2|6sy;mP`$`Dl!2Y9>P7UZprTwQ
z&&1XAOk6|F#I@o~451lff*GngY=(A;k<xITMunOU*K6^yQgDMN1}k8&S|M27p<pSv
z5$~J$-js%$6w!8*(QcNag^N6j-Rr+_9o1-PCuiHxg{;7_9mkb8cHp=Q$MCykNHG4o
zU&HT=w`i67P3k-b+^VTJTrJ$DP3*WDxLp&r9ND&XxI<IzIijhB@MZKpG3Zi*GvLld
z^t&YVyG3-eJL+(c5}oLs0{Xo%=u#gw;J!rk`z7=TM0BzV>+qlwooLYl`a?13QoA<b
z;Y9RDB=ko`bh6j$@R$;vZ~_AQ<1y%xD=^@RMD!;m^ru90(o*Q~v=W^#90K|?G3b){
zFko*Y`m+-Hb0RwFTy%I|iB9+$0sVy-bjjlw@M0qROA`9aB06c1ba+LHPS_^_{nZ$B
z$x0dUS|a-E68b(7o%CTkyrD!VT$+IXW(>OI*bI0p5&dlmeZPoK+C3fKQKA!OP(VKr
zgDx3E0}dvlzbm1?C!&+CQiu1I=!EAK&_9Slm;9#zA10!IB%yySqLb!ThfkE~gtZmW
zKaD|`Y_0*HC8B>Wp?@KwlO9=zFO}$ogAUOrUdxu?tC%DthibsriAj7TCGoA81Zi(|
z_)eJwVSa<>7vl4q@L7O(<v?K4zK=;uvcm@akeJqwQd&Pvq^0uGI{d82OX0LJG5o(q
z@Tdm=uQC3pM!a$`NbHx2#G=;QfL{|c`%TL1cQG^Cx9jkSGBe86Pwcq=(Qp3%Vf+5A

diff --git a/llvm/lib/Analysis/models/inliner/variables/variables.index b/llvm/lib/Analysis/models/inliner/variables/variables.index
index 318d5a2443c2b3078e3aa63f303290d961201757..7e0c10c1780e0192665cc7cc5fb44295676661f6 100644
GIT binary patch
delta 305
zcmeyz^pi<o5A%mc1_m|`Mkyu^Mg|#yi2@4z3JcybYAka$b$lRJ&!8hV(ORkA(aFa(
zKFrZ4)Rlu#h(m~pLkWme6gV^%WSO>m@bU`C2pj4bnKA^hfW=p2X-r@?{mH|qFCZgs
zq+evjz?kln0+iBF*paKzP-|)tcVeo5jEHfW83O|gr;*7kMvVoGraXUS_X)^In3Nea
zFlGj50M#=o+<48XaUh?`v_5885YQIWGIIvTtO6#589+C3m~K^8_G3`xE-6aP%!@BB
zNiERVa}0@dVszmYVgO1ns}{6k00Kr34llTW4aQ+O@M+-&21e#mhQ&;9vHsoMO)Ho<
cwleWgPGl6FT)@Z%!ki4@zZ<$$O5JY*0Kd;lC;$Ke

delta 310
zcmey#^p8p481sh+1_m|`CMhNkMg|$4i2@4T8r|lm36CnLGw4W6v{$NUby^VS=o9M7
z!6?Kb#KfTl#3>St3LF{>vP|1OczFe6L=E-z9795aJe|VK85mf2jZIc$X-r@?{mH|q
zFCZgnRA$1!m>rV>)TjZ}xFc7iq1MzQ?!;6988Kshy}BF*1`S3Ni&u;q3m8p#{>bhV
zkdZRcFEV9d%#F|xs%PS00_wW)no;8blWBd-vLK*^re&54jJFs~V`jW#)Zj4Ps;unC
zpvqfPl$e<pUtE$}pr2L7z{4rTAi-#=v#eUsiU9~1L3mnSgE)-CaNyIz4GfITr3@>W
k;9~r@zh|#y;@HK+4-x``$q|f_Tnr51zZ<$$O5JY*02b0nP5=M^


From 87f8a4f9a2314044bf007e0d4564a40386751842 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 12 Jul 2020 17:22:15 -0400
Subject: [PATCH 127/771] AMDGPU/GlobalISel: Add tests for 96-bit add/sub/mul

I almost regressed these, so add tests for them.
---
 .../AMDGPU/GlobalISel/legalize-add.mir        | 42 +++++++++
 .../AMDGPU/GlobalISel/legalize-mul.mir        | 90 +++++++++++++++++++
 .../AMDGPU/GlobalISel/legalize-sub.mir        | 42 +++++++++
 3 files changed, 174 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir
index 42a3f0547343d..175144958cd99 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-add.mir
@@ -507,3 +507,45 @@ body: |
 #     %5:_(s64) = G_ANYEXT %4
 #     $vgpr0_vgpr1 = COPY %5
 # ...
+
+---
+name: test_add_s96
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+    ; GFX6-LABEL: name: test_add_s96
+    ; GFX6: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV3]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32)
+    ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; GFX8-LABEL: name: test_add_s96
+    ; GFX8: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; GFX8: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV3]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; GFX9-LABEL: name: test_add_s96
+    ; GFX9: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV3]]
+    ; GFX9: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV4]], [[UADDO1]]
+    ; GFX9: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV2]], [[UV5]], [[UADDE1]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32), [[UADDE2]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    %2:_(s96) = G_ADD %0, %1
+    $vgpr0_vgpr1_vgpr2 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
index a1499050f831c..63db6ec0d0b34 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-mul.mir
@@ -561,3 +561,93 @@ body: |
 #     %5:_(s64) = G_ANYEXT %4
 #     $vgpr0_vgpr1 = COPY %5
 # ...
+
+---
+name: test_mul_s96
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+    ; GFX6-LABEL: name: test_mul_s96
+    ; GFX6: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+    ; GFX6: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+    ; GFX6: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
+    ; GFX6: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]]
+    ; GFX6: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX6: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]]
+    ; GFX6: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]]
+    ; GFX6: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]]
+    ; GFX6: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
+    ; GFX6: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
+    ; GFX6: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]]
+    ; GFX6: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
+    ; GFX6: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]]
+    ; GFX6: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32)
+    ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; GFX8-LABEL: name: test_mul_s96
+    ; GFX8: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; GFX8: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+    ; GFX8: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+    ; GFX8: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
+    ; GFX8: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]]
+    ; GFX8: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]]
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX8: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]]
+    ; GFX8: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]]
+    ; GFX8: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]]
+    ; GFX8: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
+    ; GFX8: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
+    ; GFX8: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]]
+    ; GFX8: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
+    ; GFX8: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]]
+    ; GFX8: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; GFX9-LABEL: name: test_mul_s96
+    ; GFX9: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+    ; GFX9: [[MUL:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV3]]
+    ; GFX9: [[MUL1:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV3]]
+    ; GFX9: [[MUL2:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV4]]
+    ; GFX9: [[UMULH:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV3]]
+    ; GFX9: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[MUL1]], [[MUL2]]
+    ; GFX9: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO1]](s1)
+    ; GFX9: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UADDO]], [[UMULH]]
+    ; GFX9: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDO3]](s1)
+    ; GFX9: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[ZEXT]], [[ZEXT1]]
+    ; GFX9: [[MUL3:%[0-9]+]]:_(s32) = G_MUL [[UV2]], [[UV3]]
+    ; GFX9: [[MUL4:%[0-9]+]]:_(s32) = G_MUL [[UV1]], [[UV4]]
+    ; GFX9: [[MUL5:%[0-9]+]]:_(s32) = G_MUL [[UV]], [[UV5]]
+    ; GFX9: [[UMULH1:%[0-9]+]]:_(s32) = G_UMULH [[UV1]], [[UV3]]
+    ; GFX9: [[UMULH2:%[0-9]+]]:_(s32) = G_UMULH [[UV]], [[UV4]]
+    ; GFX9: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[MUL3]], [[MUL4]]
+    ; GFX9: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[ADD1]], [[MUL5]]
+    ; GFX9: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[ADD2]], [[UMULH1]]
+    ; GFX9: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[ADD3]], [[UMULH2]]
+    ; GFX9: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[ADD4]], [[ADD]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[MUL]](s32), [[UADDO2]](s32), [[ADD5]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    %2:_(s96) = G_MUL %0, %1
+    $vgpr0_vgpr1_vgpr2 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir
index 8cb346a761882..3fb34a4edc923 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sub.mir
@@ -501,3 +501,45 @@ body: |
 #     %5:_(s64) = G_ANYEXT %4
 #     $vgpr0_vgpr1 = COPY %5
 # ...
+
+---
+name: test_sub_s96
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2, $vgpr3_vgpr4_vgpr5
+
+    ; GFX6-LABEL: name: test_sub_s96
+    ; GFX6: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; GFX6: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+    ; GFX6: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV3]]
+    ; GFX6: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]]
+    ; GFX6: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32), [[USUBE2]](s32)
+    ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; GFX8-LABEL: name: test_sub_s96
+    ; GFX8: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; GFX8: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+    ; GFX8: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV3]]
+    ; GFX8: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]]
+    ; GFX8: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32), [[USUBE2]](s32)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    ; GFX9-LABEL: name: test_sub_s96
+    ; GFX9: [[COPY:%[0-9]+]]:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s96)
+    ; GFX9: [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](s96)
+    ; GFX9: [[USUBO:%[0-9]+]]:_(s32), [[USUBO1:%[0-9]+]]:_(s1) = G_USUBO [[UV]], [[UV3]]
+    ; GFX9: [[USUBE:%[0-9]+]]:_(s32), [[USUBE1:%[0-9]+]]:_(s1) = G_USUBE [[UV1]], [[UV4]], [[USUBO1]]
+    ; GFX9: [[USUBE2:%[0-9]+]]:_(s32), [[USUBE3:%[0-9]+]]:_(s1) = G_USUBE [[UV2]], [[UV5]], [[USUBE1]]
+    ; GFX9: [[MV:%[0-9]+]]:_(s96) = G_MERGE_VALUES [[USUBO]](s32), [[USUBE]](s32), [[USUBE2]](s32)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[MV]](s96)
+    %0:_(s96) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(s96) = COPY $vgpr3_vgpr4_vgpr5
+    %2:_(s96) = G_SUB %0, %1
+    $vgpr0_vgpr1_vgpr2 = COPY %2
+...

From fdb69539bcd250f6e4f49197c9b8149a7542e3ff Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Wed, 8 Jul 2020 15:16:21 -0700
Subject: [PATCH 128/771] [AST] Fix potential nullptr dereference in
 Expr::HasSideEffects

Array returned by LambdaExpr::capture_inits() can contain nullptrs.

Differential Revision: https://reviews.llvm.org/D83438
---
 clang/include/clang/AST/ExprCXX.h          |  1 +
 clang/lib/AST/Expr.cpp                     |  2 +-
 clang/unittests/AST/CMakeLists.txt         |  1 +
 clang/unittests/AST/HasSideEffectsTest.cpp | 86 ++++++++++++++++++++++
 4 files changed, 89 insertions(+), 1 deletion(-)
 create mode 100644 clang/unittests/AST/HasSideEffectsTest.cpp

diff --git a/clang/include/clang/AST/ExprCXX.h b/clang/include/clang/AST/ExprCXX.h
index 178f4db770618..6f0b68479b9d7 100644
--- a/clang/include/clang/AST/ExprCXX.h
+++ b/clang/include/clang/AST/ExprCXX.h
@@ -1931,6 +1931,7 @@ class LambdaExpr final : public Expr,
 
   /// Const iterator that walks over the capture initialization
   /// arguments.
+  /// FIXME: This interface is prone to being used incorrectly.
   using const_capture_init_iterator = Expr *const *;
 
   /// Retrieve the initialization expressions for this lambda's captures.
diff --git a/clang/lib/AST/Expr.cpp b/clang/lib/AST/Expr.cpp
index 343a271c33944..399e7e13c4459 100644
--- a/clang/lib/AST/Expr.cpp
+++ b/clang/lib/AST/Expr.cpp
@@ -3629,7 +3629,7 @@ bool Expr::HasSideEffects(const ASTContext &Ctx,
   case LambdaExprClass: {
     const LambdaExpr *LE = cast<LambdaExpr>(this);
     for (Expr *E : LE->capture_inits())
-      if (E->HasSideEffects(Ctx, IncludePossibleEffects))
+      if (E && E->HasSideEffects(Ctx, IncludePossibleEffects))
         return true;
     return false;
   }
diff --git a/clang/unittests/AST/CMakeLists.txt b/clang/unittests/AST/CMakeLists.txt
index 2e750ac9ea925..185995d5b5a27 100644
--- a/clang/unittests/AST/CMakeLists.txt
+++ b/clang/unittests/AST/CMakeLists.txt
@@ -26,6 +26,7 @@ add_clang_unittest(ASTTests
   DeclTest.cpp
   EvaluateAsRValueTest.cpp
   ExternalASTSourceTest.cpp
+  HasSideEffectsTest.cpp
   NamedDeclPrinterTest.cpp
   RecursiveASTVisitorTest.cpp
   SizelessTypesTest.cpp
diff --git a/clang/unittests/AST/HasSideEffectsTest.cpp b/clang/unittests/AST/HasSideEffectsTest.cpp
new file mode 100644
index 0000000000000..842afd8d7a9c3
--- /dev/null
+++ b/clang/unittests/AST/HasSideEffectsTest.cpp
@@ -0,0 +1,86 @@
+//===- unittest/AST/HasSideEffectsTest.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "clang/AST/RecursiveASTVisitor.h"
+#include "clang/AST/ASTConsumer.h"
+#include "clang/AST/ASTContext.h"
+#include "clang/AST/Attr.h"
+#include "clang/Frontend/FrontendAction.h"
+#include "clang/Tooling/Tooling.h"
+#include "llvm/ADT/FunctionExtras.h"
+#include "llvm/ADT/STLExtras.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+#include <cassert>
+
+using namespace clang;
+
+namespace {
+class ProcessASTAction : public clang::ASTFrontendAction {
+public:
+  ProcessASTAction(llvm::unique_function<void(clang::ASTContext &)> Process)
+      : Process(std::move(Process)) {
+    assert(this->Process);
+  }
+
+  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
+                                                 StringRef InFile) {
+    class Consumer : public ASTConsumer {
+    public:
+      Consumer(llvm::function_ref<void(ASTContext &CTx)> Process)
+          : Process(Process) {}
+
+      void HandleTranslationUnit(ASTContext &Ctx) override { Process(Ctx); }
+
+    private:
+      llvm::function_ref<void(ASTContext &CTx)> Process;
+    };
+
+    return std::make_unique<Consumer>(Process);
+  }
+
+private:
+  llvm::unique_function<void(clang::ASTContext &)> Process;
+};
+
+class RunHasSideEffects
+    : public RecursiveASTVisitor<RunHasSideEffects> {
+public:
+  RunHasSideEffects(ASTContext& Ctx)
+  : Ctx(Ctx) {}
+
+  bool VisitLambdaExpr(LambdaExpr *LE) {
+    LE->HasSideEffects(Ctx);
+    return true;
+  }
+
+  ASTContext& Ctx;
+};
+} // namespace
+
+TEST(HasSideEffectsTest, All) {
+  llvm::StringRef Code = R"cpp(
+void Test() {
+  int msize = 4;
+  float arr[msize];
+  [&arr] {};
+}
+  )cpp";
+
+  ASSERT_NO_FATAL_FAILURE(
+    clang::tooling::runToolOnCode(
+      std::make_unique<ProcessASTAction>(
+          [&](clang::ASTContext &Ctx) {
+              RunHasSideEffects Visitor(Ctx);
+              Visitor.TraverseAST(Ctx);
+          }
+      ),
+      Code)
+  );
+
+}

From c1efd6675a901e8c0033026752c05396a3b26fec Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 13 Jul 2020 18:10:37 +0000
Subject: [PATCH 129/771] [gn build] Port fdb69539bcd

---
 llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn
index f25ead00165c0..e8050f469f046 100644
--- a/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn
@@ -31,6 +31,7 @@ unittest("ASTTests") {
     "DeclTest.cpp",
     "EvaluateAsRValueTest.cpp",
     "ExternalASTSourceTest.cpp",
+    "HasSideEffectsTest.cpp",
     "NamedDeclPrinterTest.cpp",
     "RecursiveASTVisitorTest.cpp",
     "SizelessTypesTest.cpp",

From fefe7555e97685d61f76d4e7dfcabb163ff9bcdf Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Mon, 13 Jul 2020 11:10:56 -0700
Subject: [PATCH 130/771] [NewPM][opt] Translate -foo-analysis to
 require<foo-analysis>

 Fixes 53 check-llvm tests under NPM.

Reviewed By: asbirlea

Differential Revision: https://reviews.llvm.org/D83633
---
 llvm/include/llvm/Passes/PassBuilder.h |  3 +++
 llvm/lib/Passes/PassBuilder.cpp        | 17 +++++++++++++++++
 llvm/tools/opt/NewPMDriver.cpp         |  7 +++++--
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index f9b0d939e5f8c..0357e4a2fc058 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -518,6 +518,9 @@ class PassBuilder {
   /// Returns true if the pass name is the name of an alias analysis pass.
   bool isAAPassName(StringRef PassName);
 
+  /// Returns true if the pass name is the name of a (non-alias) analysis pass.
+  bool isAnalysisPassName(StringRef PassName);
+
   /// Register a callback for a default optimizer pipeline extension
   /// point
   ///
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 7f57634676956..537d300fee557 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -2666,3 +2666,20 @@ bool PassBuilder::isAAPassName(StringRef PassName) {
 #include "PassRegistry.def"
   return false;
 }
+
+bool PassBuilder::isAnalysisPassName(StringRef PassName) {
+#define MODULE_ANALYSIS(NAME, CREATE_PASS)                                     \
+  if (PassName == NAME)                                                        \
+    return true;
+#define FUNCTION_ANALYSIS(NAME, CREATE_PASS)                                   \
+  if (PassName == NAME)                                                        \
+    return true;
+#define LOOP_ANALYSIS(NAME, CREATE_PASS)                                       \
+  if (PassName == NAME)                                                        \
+    return true;
+#define CGSSC_ANALYSIS(NAME, CREATE_PASS)                                      \
+  if (PassName == NAME)                                                        \
+    return true;
+#include "PassRegistry.def"
+  return false;
+}
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index 8f8ca352dcfff..b94c58decdda2 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -358,8 +358,11 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
     }
   }
   for (auto PassName : NonAAPasses) {
-    if (auto Err =
-            PB.parsePassPipeline(MPM, PassName, VerifyEachPass, DebugPM)) {
+    std::string ModifiedPassName(PassName.begin(), PassName.end());
+    if (PB.isAnalysisPassName(PassName))
+      ModifiedPassName = "require<" + ModifiedPassName + ">";
+    if (auto Err = PB.parsePassPipeline(MPM, ModifiedPassName, VerifyEachPass,
+                                        DebugPM)) {
       errs() << Arg0 << ": " << toString(std::move(Err)) << "\n";
       return false;
     }

From 62881fda5852b9a5960c3f8c26e864d5085d11bb Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Thu, 9 Jul 2020 09:56:26 +0200
Subject: [PATCH 131/771] Fix HexagonGenExtract return status

Differential Revision: https://reviews.llvm.org/D83460
---
 llvm/lib/Target/Hexagon/HexagonGenExtract.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp b/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
index 342ca21525c5e..d9307190ae169 100644
--- a/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonGenExtract.cpp
@@ -221,15 +221,16 @@ bool HexagonGenExtract::convert(Instruction *In) {
 }
 
 bool HexagonGenExtract::visitBlock(BasicBlock *B) {
+  bool Changed = false;
+
   // Depth-first, bottom-up traversal.
   for (auto *DTN : children<DomTreeNode*>(DT->getNode(B)))
-    visitBlock(DTN->getBlock());
+    Changed |= visitBlock(DTN->getBlock());
 
   // Allow limiting the number of generated extracts for debugging purposes.
   bool HasCutoff = ExtractCutoff.getPosition();
   unsigned Cutoff = ExtractCutoff;
 
-  bool Changed = false;
   BasicBlock::iterator I = std::prev(B->end()), NextI, Begin = B->begin();
   while (true) {
     if (HasCutoff && (ExtractCount >= Cutoff))

From 9cafbf8f66c9596d8b31293830d8892db0837745 Mon Sep 17 00:00:00 2001
From: Lei Zhang <antiagainst@google.com>
Date: Mon, 13 Jul 2020 14:40:04 -0400
Subject: [PATCH 132/771] [NFC] Remove unused header include

Differential Revision: https://reviews.llvm.org/D83706
---
 llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index a41d4b0777f82..9f2cdf0292a73 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -23,7 +23,6 @@
 #include "llvm/Support/MathExtras.h"
 #include <algorithm>
 #include <cstdint>
-#include <optional>
 
 namespace llvm {
 namespace orc {

From 9d3e9a3e3c10cf7ff961df8e107c7cb0e1bc447c Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Mon, 13 Jul 2020 20:44:03 +0200
Subject: [PATCH 133/771] [clangd] Remove const_cast. NFC

---
 clang-tools-extra/clangd/CompileCommands.cpp | 3 +--
 clang-tools-extra/clangd/Config.h            | 4 ++--
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp
index 0b27e0e3e8284..4b69555769423 100644
--- a/clang-tools-extra/clangd/CompileCommands.cpp
+++ b/clang-tools-extra/clangd/CompileCommands.cpp
@@ -183,8 +183,7 @@ CommandMangler CommandMangler::forTests() {
 }
 
 void CommandMangler::adjust(std::vector<std::string> &Cmd) const {
-  // FIXME: remove const_cast once unique_function is const-compatible.
-  for (auto &Edit : const_cast<Config &>(Config::current()).CompileFlags.Edits)
+  for (auto &Edit : Config::current().CompileFlags.Edits)
     Edit(Cmd);
 
   // Check whether the flag exists, either as -flag or -flag=*
diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h
index 878c9e8549b52..d2c3ef37abd5c 100644
--- a/clang-tools-extra/clangd/Config.h
+++ b/clang-tools-extra/clangd/Config.h
@@ -52,8 +52,8 @@ struct Config {
   /// Controls how the compile command for the current file is determined.
   struct {
     // Edits to apply to the compile command, in sequence.
-    // FIXME: these functions need to be const-callable. For now, const_cast.
-    std::vector<llvm::unique_function<void(std::vector<std::string> &)>> Edits;
+    std::vector<llvm::unique_function<void(std::vector<std::string> &) const>>
+        Edits;
   } CompileFlags;
 };
 

From c0ee2d74680cc89ca6301613f1268de47e3d3bfc Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 12 Jul 2020 12:49:04 -0400
Subject: [PATCH 134/771] AMDGPU/GlobalISel: Add baseline add/sub sat
 legalization tests

---
 .../AMDGPU/GlobalISel/legalize-saddsat.mir    | 375 ++++++++++++++++++
 .../AMDGPU/GlobalISel/legalize-ssubsat.mir    | 375 ++++++++++++++++++
 .../AMDGPU/GlobalISel/legalize-uaddsat.mir    | 375 ++++++++++++++++++
 .../AMDGPU/GlobalISel/legalize-usubsat.mir    | 375 ++++++++++++++++++
 4 files changed, 1500 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
new file mode 100644
index 0000000000000..250645202af5a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
@@ -0,0 +1,375 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+
+---
+name: saddsat_s7
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: saddsat_s7
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s7) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s7)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: saddsat_s7
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s7) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s7)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: saddsat_s7
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s7) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s7)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s7) = G_TRUNC %0
+    %3:_(s7) = G_TRUNC %1
+    %4:_(s7) = G_SADDSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: saddsat_s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: saddsat_s8
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s8)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: saddsat_s8
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s8)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: saddsat_s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s8)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s8) = G_TRUNC %0
+    %3:_(s8) = G_TRUNC %1
+    %4:_(s8) = G_SADDSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: saddsat_v2s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: saddsat_v2s8
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<2 x s8>) = G_SADDSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s8>)
+    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: saddsat_v2s8
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<2 x s8>) = G_SADDSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s8>)
+    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: saddsat_v2s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s8>) = G_SADDSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s8>)
+    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(<2 x s8>) = G_BITCAST %2
+    %5:_(<2 x s8>) = G_BITCAST %3
+    %6:_(<2 x s8>) = G_SADDSAT %4, %5
+    %7:_(s16) = G_BITCAST %6
+    %8:_(s32) = G_ANYEXT %7
+    $vgpr0 = COPY %8
+...
+
+---
+name: saddsat_s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: saddsat_s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: saddsat_s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: saddsat_s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(s16) = G_SADDSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: saddsat_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: saddsat_v2s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0 = COPY [[SADDSAT]](<2 x s16>)
+    ; GFX8-LABEL: name: saddsat_v2s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0 = COPY [[SADDSAT]](<2 x s16>)
+    ; GFX9-LABEL: name: saddsat_v2s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[SADDSAT]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_SADDSAT %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: saddsat_v3s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; GFX6-LABEL: name: saddsat_v3s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<3 x s16>) = G_SADDSAT [[UV]], [[UV1]]
+    ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX8-LABEL: name: saddsat_v3s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<3 x s16>) = G_SADDSAT [[UV]], [[UV1]]
+    ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-LABEL: name: saddsat_v3s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<3 x s16>) = G_SADDSAT [[UV]], [[UV1]]
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
+    %3:_(<3 x s16>) = G_SADDSAT %1, %2
+    %4:_(<3 x s16>) = G_IMPLICIT_DEF
+    %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4
+    $vgpr0_vgpr1_vgpr2 = COPY %5
+...
+
+---
+name: saddsat_v4s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: saddsat_v4s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<4 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[SADDSAT]](<4 x s16>)
+    ; GFX8-LABEL: name: saddsat_v4s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<4 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[SADDSAT]](<4 x s16>)
+    ; GFX9-LABEL: name: saddsat_v4s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<4 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[SADDSAT]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    %2:_(<4 x s16>) = G_SADDSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: saddsat_s32
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: saddsat_s32
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0 = COPY [[SADDSAT]](s32)
+    ; GFX8-LABEL: name: saddsat_s32
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0 = COPY [[SADDSAT]](s32)
+    ; GFX9-LABEL: name: saddsat_s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[SADDSAT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_SADDSAT %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: saddsat_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: saddsat_v2s32
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[SADDSAT]](<2 x s32>)
+    ; GFX8-LABEL: name: saddsat_v2s32
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[SADDSAT]](<2 x s32>)
+    ; GFX9-LABEL: name: saddsat_v2s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[SADDSAT]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    %2:_(<2 x s32>) = G_SADDSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: saddsat_s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: saddsat_s64
+    ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[SADDSAT]](s64)
+    ; GFX8-LABEL: name: saddsat_s64
+    ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[SADDSAT]](s64)
+    ; GFX9-LABEL: name: saddsat_s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[SADDSAT]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    %2:_(s64) = G_SADDSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: saddsat_v2s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; GFX6-LABEL: name: saddsat_v2s64
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<2 x s64>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SADDSAT]](<2 x s64>)
+    ; GFX8-LABEL: name: saddsat_v2s64
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<2 x s64>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SADDSAT]](<2 x s64>)
+    ; GFX9-LABEL: name: saddsat_v2s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s64>) = G_SADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SADDSAT]](<2 x s64>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    %2:_(<2 x s64>) = G_SADDSAT %0, %1
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
new file mode 100644
index 0000000000000..53c9ba8410c72
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
@@ -0,0 +1,375 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+
+---
+name: ssubsat_s7
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: ssubsat_s7
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s7) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s7)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: ssubsat_s7
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s7) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s7)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: ssubsat_s7
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s7) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s7)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s7) = G_TRUNC %0
+    %3:_(s7) = G_TRUNC %1
+    %4:_(s7) = G_SSUBSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: ssubsat_s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: ssubsat_s8
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s8)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: ssubsat_s8
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s8)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: ssubsat_s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s8)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s8) = G_TRUNC %0
+    %3:_(s8) = G_TRUNC %1
+    %4:_(s8) = G_SSUBSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: ssubsat_v2s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: ssubsat_v2s8
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<2 x s8>) = G_SSUBSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s8>)
+    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: ssubsat_v2s8
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<2 x s8>) = G_SSUBSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s8>)
+    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: ssubsat_v2s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s8>) = G_SSUBSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s8>)
+    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(<2 x s8>) = G_BITCAST %2
+    %5:_(<2 x s8>) = G_BITCAST %3
+    %6:_(<2 x s8>) = G_SSUBSAT %4, %5
+    %7:_(s16) = G_BITCAST %6
+    %8:_(s32) = G_ANYEXT %7
+    $vgpr0 = COPY %8
+...
+
+---
+name: ssubsat_s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: ssubsat_s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: ssubsat_s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: ssubsat_s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(s16) = G_SSUBSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: ssubsat_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: ssubsat_v2s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>)
+    ; GFX8-LABEL: name: ssubsat_v2s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>)
+    ; GFX9-LABEL: name: ssubsat_v2s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_SSUBSAT %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: ssubsat_v3s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; GFX6-LABEL: name: ssubsat_v3s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<3 x s16>) = G_SSUBSAT [[UV]], [[UV1]]
+    ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX8-LABEL: name: ssubsat_v3s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<3 x s16>) = G_SSUBSAT [[UV]], [[UV1]]
+    ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-LABEL: name: ssubsat_v3s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<3 x s16>) = G_SSUBSAT [[UV]], [[UV1]]
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
+    %3:_(<3 x s16>) = G_SSUBSAT %1, %2
+    %4:_(<3 x s16>) = G_IMPLICIT_DEF
+    %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4
+    $vgpr0_vgpr1_vgpr2 = COPY %5
+...
+
+---
+name: ssubsat_v4s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: ssubsat_v4s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<4 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<4 x s16>)
+    ; GFX8-LABEL: name: ssubsat_v4s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<4 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<4 x s16>)
+    ; GFX9-LABEL: name: ssubsat_v4s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<4 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    %2:_(<4 x s16>) = G_SSUBSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: ssubsat_s32
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: ssubsat_s32
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0 = COPY [[SSUBSAT]](s32)
+    ; GFX8-LABEL: name: ssubsat_s32
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0 = COPY [[SSUBSAT]](s32)
+    ; GFX9-LABEL: name: ssubsat_s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[SSUBSAT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_SSUBSAT %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: ssubsat_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: ssubsat_v2s32
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<2 x s32>)
+    ; GFX8-LABEL: name: ssubsat_v2s32
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<2 x s32>)
+    ; GFX9-LABEL: name: ssubsat_v2s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    %2:_(<2 x s32>) = G_SSUBSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: ssubsat_s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: ssubsat_s64
+    ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[SSUBSAT]](s64)
+    ; GFX8-LABEL: name: ssubsat_s64
+    ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[SSUBSAT]](s64)
+    ; GFX9-LABEL: name: ssubsat_s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[SSUBSAT]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    %2:_(s64) = G_SSUBSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: ssubsat_v2s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; GFX6-LABEL: name: ssubsat_v2s64
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<2 x s64>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SSUBSAT]](<2 x s64>)
+    ; GFX8-LABEL: name: ssubsat_v2s64
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<2 x s64>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SSUBSAT]](<2 x s64>)
+    ; GFX9-LABEL: name: ssubsat_v2s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s64>) = G_SSUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SSUBSAT]](<2 x s64>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    %2:_(<2 x s64>) = G_SSUBSAT %0, %1
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
new file mode 100644
index 0000000000000..79d671214b549
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
@@ -0,0 +1,375 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+
+---
+name: uaddsat_s7
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: uaddsat_s7
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s7) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s7)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: uaddsat_s7
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s7) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s7)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: uaddsat_s7
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s7) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s7)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s7) = G_TRUNC %0
+    %3:_(s7) = G_TRUNC %1
+    %4:_(s7) = G_UADDSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: uaddsat_s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: uaddsat_s8
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s8)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: uaddsat_s8
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s8)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: uaddsat_s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s8)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s8) = G_TRUNC %0
+    %3:_(s8) = G_TRUNC %1
+    %4:_(s8) = G_UADDSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: uaddsat_v2s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: uaddsat_v2s8
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<2 x s8>) = G_UADDSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s8>)
+    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: uaddsat_v2s8
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<2 x s8>) = G_UADDSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s8>)
+    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: uaddsat_v2s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s8>) = G_UADDSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s8>)
+    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(<2 x s8>) = G_BITCAST %2
+    %5:_(<2 x s8>) = G_BITCAST %3
+    %6:_(<2 x s8>) = G_UADDSAT %4, %5
+    %7:_(s16) = G_BITCAST %6
+    %8:_(s32) = G_ANYEXT %7
+    $vgpr0 = COPY %8
+...
+
+---
+name: uaddsat_s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: uaddsat_s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: uaddsat_s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: uaddsat_s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(s16) = G_UADDSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: uaddsat_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: uaddsat_v2s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0 = COPY [[UADDSAT]](<2 x s16>)
+    ; GFX8-LABEL: name: uaddsat_v2s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0 = COPY [[UADDSAT]](<2 x s16>)
+    ; GFX9-LABEL: name: uaddsat_v2s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[UADDSAT]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_UADDSAT %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: uaddsat_v3s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; GFX6-LABEL: name: uaddsat_v3s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<3 x s16>) = G_UADDSAT [[UV]], [[UV1]]
+    ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX8-LABEL: name: uaddsat_v3s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<3 x s16>) = G_UADDSAT [[UV]], [[UV1]]
+    ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-LABEL: name: uaddsat_v3s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<3 x s16>) = G_UADDSAT [[UV]], [[UV1]]
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
+    %3:_(<3 x s16>) = G_UADDSAT %1, %2
+    %4:_(<3 x s16>) = G_IMPLICIT_DEF
+    %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4
+    $vgpr0_vgpr1_vgpr2 = COPY %5
+...
+
+---
+name: uaddsat_v4s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: uaddsat_v4s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<4 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[UADDSAT]](<4 x s16>)
+    ; GFX8-LABEL: name: uaddsat_v4s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<4 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[UADDSAT]](<4 x s16>)
+    ; GFX9-LABEL: name: uaddsat_v4s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<4 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[UADDSAT]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    %2:_(<4 x s16>) = G_UADDSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: uaddsat_s32
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: uaddsat_s32
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0 = COPY [[UADDSAT]](s32)
+    ; GFX8-LABEL: name: uaddsat_s32
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0 = COPY [[UADDSAT]](s32)
+    ; GFX9-LABEL: name: uaddsat_s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[UADDSAT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_UADDSAT %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: uaddsat_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: uaddsat_v2s32
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[UADDSAT]](<2 x s32>)
+    ; GFX8-LABEL: name: uaddsat_v2s32
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[UADDSAT]](<2 x s32>)
+    ; GFX9-LABEL: name: uaddsat_v2s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[UADDSAT]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    %2:_(<2 x s32>) = G_UADDSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: uaddsat_s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: uaddsat_s64
+    ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[UADDSAT]](s64)
+    ; GFX8-LABEL: name: uaddsat_s64
+    ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[UADDSAT]](s64)
+    ; GFX9-LABEL: name: uaddsat_s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[UADDSAT]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    %2:_(s64) = G_UADDSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: uaddsat_v2s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; GFX6-LABEL: name: uaddsat_v2s64
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<2 x s64>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UADDSAT]](<2 x s64>)
+    ; GFX8-LABEL: name: uaddsat_v2s64
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<2 x s64>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UADDSAT]](<2 x s64>)
+    ; GFX9-LABEL: name: uaddsat_v2s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s64>) = G_UADDSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UADDSAT]](<2 x s64>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    %2:_(<2 x s64>) = G_UADDSAT %0, %1
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
new file mode 100644
index 0000000000000..caf44cde8de06
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
@@ -0,0 +1,375 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=tahiti -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=fiji -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX8 %s
+# RUN: llc -global-isel-abort=0 -march=amdgcn -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9 %s
+
+---
+name: usubsat_s7
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: usubsat_s7
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s7) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s7)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: usubsat_s7
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s7) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s7)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: usubsat_s7
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s7) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s7)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s7) = G_TRUNC %0
+    %3:_(s7) = G_TRUNC %1
+    %4:_(s7) = G_USUBSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: usubsat_s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: usubsat_s8
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s8)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: usubsat_s8
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s8)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: usubsat_s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s8)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s8) = G_TRUNC %0
+    %3:_(s8) = G_TRUNC %1
+    %4:_(s8) = G_USUBSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: usubsat_v2s8
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: usubsat_v2s8
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<2 x s8>) = G_USUBSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s8>)
+    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: usubsat_v2s8
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<2 x s8>) = G_USUBSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s8>)
+    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: usubsat_v2s8
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
+    ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s8>) = G_USUBSAT [[BITCAST]], [[BITCAST1]]
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s8>)
+    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(<2 x s8>) = G_BITCAST %2
+    %5:_(<2 x s8>) = G_BITCAST %3
+    %6:_(<2 x s8>) = G_USUBSAT %4, %5
+    %7:_(s16) = G_BITCAST %6
+    %8:_(s32) = G_ANYEXT %7
+    $vgpr0 = COPY %8
+...
+
+---
+name: usubsat_s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: usubsat_s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: usubsat_s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9-LABEL: name: usubsat_s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
+    ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s16) = G_TRUNC %0
+    %3:_(s16) = G_TRUNC %1
+    %4:_(s16) = G_USUBSAT %2, %3
+    %5:_(s32) = G_ANYEXT %4
+    $vgpr0 = COPY %5
+...
+
+---
+name: usubsat_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: usubsat_v2s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0 = COPY [[USUBSAT]](<2 x s16>)
+    ; GFX8-LABEL: name: usubsat_v2s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0 = COPY [[USUBSAT]](<2 x s16>)
+    ; GFX9-LABEL: name: usubsat_v2s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[USUBSAT]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = COPY $vgpr1
+    %2:_(<2 x s16>) = G_USUBSAT %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: usubsat_v3s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+
+    ; GFX6-LABEL: name: usubsat_v3s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<3 x s16>) = G_USUBSAT [[UV]], [[UV1]]
+    ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX8-LABEL: name: usubsat_v3s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<3 x s16>) = G_USUBSAT [[UV]], [[UV1]]
+    ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    ; GFX9-LABEL: name: usubsat_v3s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<3 x s16>) = G_USUBSAT [[UV]], [[UV1]]
+    ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
+    ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
+    %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
+    %3:_(<3 x s16>) = G_USUBSAT %1, %2
+    %4:_(<3 x s16>) = G_IMPLICIT_DEF
+    %5:_(<6 x s16>) = G_CONCAT_VECTORS %3, %4
+    $vgpr0_vgpr1_vgpr2 = COPY %5
+...
+
+---
+name: usubsat_v4s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: usubsat_v4s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<4 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[USUBSAT]](<4 x s16>)
+    ; GFX8-LABEL: name: usubsat_v4s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<4 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[USUBSAT]](<4 x s16>)
+    ; GFX9-LABEL: name: usubsat_v4s16
+    ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<4 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[USUBSAT]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
+    %2:_(<4 x s16>) = G_USUBSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: usubsat_s32
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+
+    ; GFX6-LABEL: name: usubsat_s32
+    ; GFX6: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0 = COPY [[USUBSAT]](s32)
+    ; GFX8-LABEL: name: usubsat_s32
+    ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0 = COPY [[USUBSAT]](s32)
+    ; GFX9-LABEL: name: usubsat_s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[USUBSAT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s32) = G_USUBSAT %0, %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: usubsat_v2s32
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: usubsat_v2s32
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[USUBSAT]](<2 x s32>)
+    ; GFX8-LABEL: name: usubsat_v2s32
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[USUBSAT]](<2 x s32>)
+    ; GFX9-LABEL: name: usubsat_v2s32
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[USUBSAT]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
+    %2:_(<2 x s32>) = G_USUBSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: usubsat_s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: usubsat_s64
+    ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX6: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1 = COPY [[USUBSAT]](s64)
+    ; GFX8-LABEL: name: usubsat_s64
+    ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX8: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1 = COPY [[USUBSAT]](s64)
+    ; GFX9-LABEL: name: usubsat_s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[USUBSAT]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s64) = COPY $vgpr2_vgpr3
+    %2:_(s64) = G_USUBSAT %0, %1
+    $vgpr0_vgpr1 = COPY %2
+...
+
+---
+name: usubsat_v2s64
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4_vgpr5_vgpr6_vgpr7
+
+    ; GFX6-LABEL: name: usubsat_v2s64
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<2 x s64>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[USUBSAT]](<2 x s64>)
+    ; GFX8-LABEL: name: usubsat_v2s64
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<2 x s64>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[USUBSAT]](<2 x s64>)
+    ; GFX9-LABEL: name: usubsat_v2s64
+    ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s64>) = G_USUBSAT [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[USUBSAT]](<2 x s64>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
+    %2:_(<2 x s64>) = G_USUBSAT %0, %1
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %2
+...

From 6a8c11a11f6cf8d4ef1fa373c9546c33c69f8cb0 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 12 Jul 2020 13:58:53 -0400
Subject: [PATCH 135/771] GlobalISel: Implement widenScalar for saturating
 add/sub

Add a placeholder legality rule for AMDGPU until the rest of the
actions are handled.
---
 .../llvm/CodeGen/GlobalISel/LegalizerHelper.h |  2 +
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    | 43 ++++++++++
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |  6 ++
 .../AMDGPU/GlobalISel/legalize-saddsat.mir    | 84 +++++++++++++++----
 .../AMDGPU/GlobalISel/legalize-ssubsat.mir    | 84 +++++++++++++++----
 .../AMDGPU/GlobalISel/legalize-uaddsat.mir    | 84 +++++++++++++++----
 .../AMDGPU/GlobalISel/legalize-usubsat.mir    | 84 +++++++++++++++----
 7 files changed, 315 insertions(+), 72 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
index 0fe1d60b630db..058aacf38634d 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/LegalizerHelper.h
@@ -163,6 +163,8 @@ class LegalizerHelper {
   widenScalarExtract(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
   LegalizeResult
   widenScalarInsert(MachineInstr &MI, unsigned TypeIdx, LLT WideTy);
+  LegalizeResult widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx,
+                                      LLT WideTy);
 
   /// Helper function to split a wide generic register into bitwise blocks with
   /// the given Type (which implies the number of blocks needed). The generic
diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 38590656d1f4b..7cd52df5b84d1 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1638,6 +1638,44 @@ LegalizerHelper::widenScalarInsert(MachineInstr &MI, unsigned TypeIdx,
   return Legalized;
 }
 
+LegalizerHelper::LegalizeResult
+LegalizerHelper::widenScalarAddSubSat(MachineInstr &MI, unsigned TypeIdx,
+                                      LLT WideTy) {
+  bool IsSigned = MI.getOpcode() == TargetOpcode::G_SADDSAT ||
+                  MI.getOpcode() == TargetOpcode::G_SSUBSAT;
+  // We can convert this to:
+  //   1. Any extend iN to iM
+  //   2. SHL by M-N
+  //   3. [US][ADD|SUB]SAT
+  //   4. L/ASHR by M-N
+  //
+  // It may be more efficient to lower this to a min and a max operation in
+  // the higher precision arithmetic if the promoted operation isn't legal,
+  // but this decision is up to the target's lowering request.
+  Register DstReg = MI.getOperand(0).getReg();
+
+  unsigned NewBits = WideTy.getScalarSizeInBits();
+  unsigned SHLAmount = NewBits - MRI.getType(DstReg).getScalarSizeInBits();
+
+  auto LHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(1));
+  auto RHS = MIRBuilder.buildAnyExt(WideTy, MI.getOperand(2));
+  auto ShiftK = MIRBuilder.buildConstant(WideTy, SHLAmount);
+  auto ShiftL = MIRBuilder.buildShl(WideTy, LHS, ShiftK);
+  auto ShiftR = MIRBuilder.buildShl(WideTy, RHS, ShiftK);
+
+  auto WideInst = MIRBuilder.buildInstr(MI.getOpcode(), {WideTy},
+                                        {ShiftL, ShiftR}, MI.getFlags());
+
+  // Use a shift that will preserve the number of sign bits when the trunc is
+  // folded away.
+  auto Result = IsSigned ? MIRBuilder.buildAShr(WideTy, WideInst, ShiftK)
+                         : MIRBuilder.buildLShr(WideTy, WideInst, ShiftK);
+
+  MIRBuilder.buildTrunc(DstReg, Result);
+  MI.eraseFromParent();
+  return Legalized;
+}
+
 LegalizerHelper::LegalizeResult
 LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
   switch (MI.getOpcode()) {
@@ -1674,6 +1712,11 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     MI.eraseFromParent();
     return Legalized;
   }
+  case TargetOpcode::G_SADDSAT:
+  case TargetOpcode::G_SSUBSAT:
+  case TargetOpcode::G_UADDSAT:
+  case TargetOpcode::G_USUBSAT:
+    return widenScalarAddSubSat(MI, TypeIdx, WideTy);
   case TargetOpcode::G_CTTZ:
   case TargetOpcode::G_CTTZ_ZERO_UNDEF:
   case TargetOpcode::G_CTLZ:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 0802f2a2d08a7..2976794b49c3b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -1427,6 +1427,12 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
     SextInReg.lowerFor({{S32}, {S64}});
   }
 
+  // FIXME: Placeholder rule. Really depends on whether the clamp modifier is
+  // available, and is selectively legal for s16, s32, v2s16.
+  getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT, G_UADDSAT, G_USUBSAT})
+    .scalarize(0)
+    .clampScalar(0, S16, S32);
+
   SextInReg
     .scalarize(0)
     .clampScalar(0, S32, S64)
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
index 250645202af5a..50b639297b1db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
@@ -14,25 +14,49 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s7) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s7)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SADDSAT]](s16)
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s7) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s7)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: saddsat_s7
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s7) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s7)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[ASHR]](s16)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: saddsat_s7
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s7) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s7)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[ASHR]](s16)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -53,25 +77,49 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s8)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SADDSAT]](s16)
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s8)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: saddsat_s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s8)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: saddsat_s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SADDSAT]](s8)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s8) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
index 53c9ba8410c72..3a4cbb1336f3a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
@@ -14,25 +14,49 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s7) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s7)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SSUBSAT]](s16)
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s7) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s7)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: ssubsat_s7
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s7) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s7)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[ASHR]](s16)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: ssubsat_s7
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s7) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s7)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[ASHR]](s16)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -53,25 +77,49 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s8)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SSUBSAT]](s16)
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s8)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: ssubsat_s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s8)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: ssubsat_s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[SSUBSAT]](s8)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s8) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
index 79d671214b549..ac229cd56dae6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
@@ -14,25 +14,49 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s7) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s7)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16)
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s7) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s7)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: uaddsat_s7
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s7) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s7)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[LSHR]](s16)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: uaddsat_s7
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s7) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s7)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[LSHR]](s16)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -53,25 +77,49 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s8)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT]](s16)
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s8)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: uaddsat_s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s8)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: uaddsat_s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[UADDSAT]](s8)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s8) = G_TRUNC %0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
index caf44cde8de06..f0ea03d3d71f5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
@@ -14,25 +14,49 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX6: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s7) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s7)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16)
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s7) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s7)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: usubsat_s7
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX8: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s7) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s7)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[LSHR]](s16)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: usubsat_s7
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s7) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s7) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s7) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s7)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s7)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s7)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 9
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s7) = G_TRUNC [[LSHR]](s16)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s7)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s7) = G_TRUNC %0
@@ -53,25 +77,49 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX6: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s8)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT]](s16)
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC3]](s8)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: usubsat_s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX8: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s8)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: usubsat_s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[COPY1]](s32)
-    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[TRUNC]], [[TRUNC1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[USUBSAT]](s8)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC]](s8)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[TRUNC1]](s8)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC2]](s8)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s8) = G_TRUNC %0

From 23ec773d197cc19c3f52e9d9dc7d3b57a59fb496 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 12 Jul 2020 16:11:53 -0400
Subject: [PATCH 136/771] GlobalISel: Implement fewerElementsVector for
 saturating add/sub

---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |   4 +
 .../AMDGPU/GlobalISel/legalize-saddsat.mir    | 195 ++++++++++++++----
 .../AMDGPU/GlobalISel/legalize-ssubsat.mir    | 195 ++++++++++++++----
 .../AMDGPU/GlobalISel/legalize-uaddsat.mir    | 195 ++++++++++++++----
 .../AMDGPU/GlobalISel/legalize-usubsat.mir    | 195 ++++++++++++++----
 5 files changed, 604 insertions(+), 180 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index 7cd52df5b84d1..da519f99ad7e8 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -3457,6 +3457,10 @@ LegalizerHelper::fewerElementsVector(MachineInstr &MI, unsigned TypeIdx,
   case G_FSHL:
   case G_FSHR:
   case G_FREEZE:
+  case G_SADDSAT:
+  case G_SSUBSAT:
+  case G_UADDSAT:
+  case G_USUBSAT:
     return reduceOperationWidth(MI, TypeIdx, NarrowTy);
   case G_SHL:
   case G_LSHR:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
index 50b639297b1db..8b3fbdaa73eba 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-saddsat.mir
@@ -142,11 +142,25 @@ body: |
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<2 x s8>) = G_SADDSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s8>)
-    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SADDSAT1]](s16)
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SADDSAT]](s8), [[TRUNC3]](s8)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: saddsat_v2s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -154,11 +168,22 @@ body: |
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<2 x s8>) = G_SADDSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s8>)
-    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT1]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16)
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SADDSAT]](s8), [[TRUNC2]](s8)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: saddsat_v2s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -166,11 +191,22 @@ body: |
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s8>) = G_SADDSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SADDSAT]](<2 x s8>)
-    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s8) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SADDSAT1]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SADDSAT]](s8), [[TRUNC2]](s8)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -231,18 +267,30 @@ body: |
     ; GFX6-LABEL: name: saddsat_v2s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0 = COPY [[SADDSAT]](<2 x s16>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16)
+    ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     ; GFX8-LABEL: name: saddsat_v2s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0 = COPY [[SADDSAT]](<2 x s16>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16)
+    ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     ; GFX9-LABEL: name: saddsat_v2s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0 = COPY [[SADDSAT]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16)
+    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_SADDSAT %0, %1
@@ -258,26 +306,41 @@ body: |
     ; GFX6-LABEL: name: saddsat_v3s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<3 x s16>) = G_SADDSAT [[UV]], [[UV1]]
+    ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX6: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV5]]
+    ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV6]]
+    ; GFX6: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV4]], [[UV7]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16)
     ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX8-LABEL: name: saddsat_v3s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<3 x s16>) = G_SADDSAT [[UV]], [[UV1]]
+    ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX8: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV5]]
+    ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV6]]
+    ; GFX8: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV4]], [[UV7]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16)
     ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: saddsat_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<3 x s16>) = G_SADDSAT [[UV]], [[UV1]]
+    ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX9: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV5]]
+    ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV6]]
+    ; GFX9: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV4]], [[UV7]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
@@ -296,18 +359,36 @@ body: |
     ; GFX6-LABEL: name: saddsat_v4s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<4 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SADDSAT]](<4 x s16>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV4]]
+    ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV5]]
+    ; GFX6: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV6]]
+    ; GFX6: [[SADDSAT3:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV7]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16), [[SADDSAT3]](s16)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     ; GFX8-LABEL: name: saddsat_v4s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<4 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SADDSAT]](<4 x s16>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV4]]
+    ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV5]]
+    ; GFX8: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV6]]
+    ; GFX8: [[SADDSAT3:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV7]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16), [[SADDSAT3]](s16)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     ; GFX9-LABEL: name: saddsat_v4s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<4 x s16>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[SADDSAT]](<4 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s16) = G_SADDSAT [[UV]], [[UV4]]
+    ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s16) = G_SADDSAT [[UV1]], [[UV5]]
+    ; GFX9: [[SADDSAT2:%[0-9]+]]:_(s16) = G_SADDSAT [[UV2]], [[UV6]]
+    ; GFX9: [[SADDSAT3:%[0-9]+]]:_(s16) = G_SADDSAT [[UV3]], [[UV7]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SADDSAT]](s16), [[SADDSAT1]](s16), [[SADDSAT2]](s16), [[SADDSAT3]](s16)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s16>) = G_SADDSAT %0, %1
@@ -350,18 +431,30 @@ body: |
     ; GFX6-LABEL: name: saddsat_v2s32
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SADDSAT]](<2 x s32>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX8-LABEL: name: saddsat_v2s32
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SADDSAT]](<2 x s32>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: saddsat_v2s32
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s32>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[SADDSAT]](<2 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s32) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s32) = G_SADDSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SADDSAT]](s32), [[SADDSAT1]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = G_SADDSAT %0, %1
@@ -404,18 +497,30 @@ body: |
     ; GFX6-LABEL: name: saddsat_v2s64
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX6: [[SADDSAT:%[0-9]+]]:_(<2 x s64>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SADDSAT]](<2 x s64>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX6: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX6: [[SADDSAT1:%[0-9]+]]:_(s64) = G_SADDSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SADDSAT]](s64), [[SADDSAT1]](s64)
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX8-LABEL: name: saddsat_v2s64
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX8: [[SADDSAT:%[0-9]+]]:_(<2 x s64>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SADDSAT]](<2 x s64>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX8: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX8: [[SADDSAT1:%[0-9]+]]:_(s64) = G_SADDSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SADDSAT]](s64), [[SADDSAT1]](s64)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: saddsat_v2s64
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX9: [[SADDSAT:%[0-9]+]]:_(<2 x s64>) = G_SADDSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SADDSAT]](<2 x s64>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX9: [[SADDSAT:%[0-9]+]]:_(s64) = G_SADDSAT [[UV]], [[UV2]]
+    ; GFX9: [[SADDSAT1:%[0-9]+]]:_(s64) = G_SADDSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SADDSAT]](s64), [[SADDSAT1]](s64)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_SADDSAT %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
index 3a4cbb1336f3a..31f119c13e5e1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ssubsat.mir
@@ -142,11 +142,25 @@ body: |
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<2 x s8>) = G_SSUBSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s8>)
-    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[SSUBSAT1]](s16)
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s32) = G_ASHR [[SEXT]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[ASHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SSUBSAT]](s8), [[TRUNC3]](s8)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: ssubsat_v2s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -154,11 +168,22 @@ body: |
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<2 x s8>) = G_SSUBSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s8>)
-    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT1]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16)
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SSUBSAT]](s8), [[TRUNC2]](s8)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: ssubsat_v2s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -166,11 +191,22 @@ body: |
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s8>) = G_SSUBSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[SSUBSAT]](<2 x s8>)
-    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s8) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[ASHR:%[0-9]+]]:_(s16) = G_ASHR [[SSUBSAT1]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[ASHR]](s16)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[SSUBSAT]](s8), [[TRUNC2]](s8)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -231,18 +267,30 @@ body: |
     ; GFX6-LABEL: name: ssubsat_v2s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16)
+    ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     ; GFX8-LABEL: name: ssubsat_v2s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16)
+    ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     ; GFX9-LABEL: name: ssubsat_v2s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0 = COPY [[SSUBSAT]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16)
+    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_SSUBSAT %0, %1
@@ -258,26 +306,41 @@ body: |
     ; GFX6-LABEL: name: ssubsat_v3s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<3 x s16>) = G_SSUBSAT [[UV]], [[UV1]]
+    ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX6: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV5]]
+    ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV6]]
+    ; GFX6: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV4]], [[UV7]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16)
     ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX8-LABEL: name: ssubsat_v3s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<3 x s16>) = G_SSUBSAT [[UV]], [[UV1]]
+    ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX8: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV5]]
+    ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV6]]
+    ; GFX8: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV4]], [[UV7]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16)
     ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: ssubsat_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<3 x s16>) = G_SSUBSAT [[UV]], [[UV1]]
+    ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX9: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV5]]
+    ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV6]]
+    ; GFX9: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV4]], [[UV7]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[SSUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
@@ -296,18 +359,36 @@ body: |
     ; GFX6-LABEL: name: ssubsat_v4s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<4 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<4 x s16>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV4]]
+    ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV5]]
+    ; GFX6: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV6]]
+    ; GFX6: [[SSUBSAT3:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV7]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16), [[SSUBSAT3]](s16)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     ; GFX8-LABEL: name: ssubsat_v4s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<4 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<4 x s16>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV4]]
+    ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV5]]
+    ; GFX8: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV6]]
+    ; GFX8: [[SSUBSAT3:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV7]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16), [[SSUBSAT3]](s16)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     ; GFX9-LABEL: name: ssubsat_v4s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<4 x s16>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<4 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV]], [[UV4]]
+    ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV1]], [[UV5]]
+    ; GFX9: [[SSUBSAT2:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV2]], [[UV6]]
+    ; GFX9: [[SSUBSAT3:%[0-9]+]]:_(s16) = G_SSUBSAT [[UV3]], [[UV7]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[SSUBSAT]](s16), [[SSUBSAT1]](s16), [[SSUBSAT2]](s16), [[SSUBSAT3]](s16)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s16>) = G_SSUBSAT %0, %1
@@ -350,18 +431,30 @@ body: |
     ; GFX6-LABEL: name: ssubsat_v2s32
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<2 x s32>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX8-LABEL: name: ssubsat_v2s32
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<2 x s32>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: ssubsat_v2s32
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s32>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[SSUBSAT]](<2 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s32) = G_SSUBSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[SSUBSAT]](s32), [[SSUBSAT1]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = G_SSUBSAT %0, %1
@@ -404,18 +497,30 @@ body: |
     ; GFX6-LABEL: name: ssubsat_v2s64
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(<2 x s64>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SSUBSAT]](<2 x s64>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX6: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX6: [[SSUBSAT1:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SSUBSAT]](s64), [[SSUBSAT1]](s64)
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX8-LABEL: name: ssubsat_v2s64
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(<2 x s64>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SSUBSAT]](<2 x s64>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX8: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX8: [[SSUBSAT1:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SSUBSAT]](s64), [[SSUBSAT1]](s64)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: ssubsat_v2s64
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(<2 x s64>) = G_SSUBSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[SSUBSAT]](<2 x s64>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX9: [[SSUBSAT:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV]], [[UV2]]
+    ; GFX9: [[SSUBSAT1:%[0-9]+]]:_(s64) = G_SSUBSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[SSUBSAT]](s64), [[SSUBSAT1]](s64)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_SSUBSAT %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
index ac229cd56dae6..e080bde81b3a5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uaddsat.mir
@@ -142,11 +142,25 @@ body: |
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<2 x s8>) = G_UADDSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s8>)
-    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[UADDSAT1]](s16)
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[UADDSAT]](s8), [[TRUNC3]](s8)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: uaddsat_v2s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -154,11 +168,22 @@ body: |
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<2 x s8>) = G_UADDSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s8>)
-    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[UADDSAT]](s8), [[TRUNC2]](s8)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: uaddsat_v2s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -166,11 +191,22 @@ body: |
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s8>) = G_UADDSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[UADDSAT]](<2 x s8>)
-    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s8) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[UADDSAT1]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[UADDSAT]](s8), [[TRUNC2]](s8)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -231,18 +267,30 @@ body: |
     ; GFX6-LABEL: name: uaddsat_v2s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0 = COPY [[UADDSAT]](<2 x s16>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16)
+    ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     ; GFX8-LABEL: name: uaddsat_v2s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0 = COPY [[UADDSAT]](<2 x s16>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16)
+    ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     ; GFX9-LABEL: name: uaddsat_v2s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0 = COPY [[UADDSAT]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16)
+    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_UADDSAT %0, %1
@@ -258,26 +306,41 @@ body: |
     ; GFX6-LABEL: name: uaddsat_v3s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<3 x s16>) = G_UADDSAT [[UV]], [[UV1]]
+    ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX6: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV5]]
+    ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV6]]
+    ; GFX6: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV4]], [[UV7]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16)
     ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX8-LABEL: name: uaddsat_v3s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<3 x s16>) = G_UADDSAT [[UV]], [[UV1]]
+    ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX8: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV5]]
+    ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV6]]
+    ; GFX8: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV4]], [[UV7]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16)
     ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: uaddsat_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<3 x s16>) = G_UADDSAT [[UV]], [[UV1]]
+    ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX9: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV5]]
+    ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV6]]
+    ; GFX9: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV4]], [[UV7]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[UADDSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
@@ -296,18 +359,36 @@ body: |
     ; GFX6-LABEL: name: uaddsat_v4s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<4 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[UADDSAT]](<4 x s16>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV4]]
+    ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV5]]
+    ; GFX6: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV6]]
+    ; GFX6: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV7]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16), [[UADDSAT3]](s16)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     ; GFX8-LABEL: name: uaddsat_v4s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<4 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[UADDSAT]](<4 x s16>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV4]]
+    ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV5]]
+    ; GFX8: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV6]]
+    ; GFX8: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV7]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16), [[UADDSAT3]](s16)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     ; GFX9-LABEL: name: uaddsat_v4s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<4 x s16>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[UADDSAT]](<4 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s16) = G_UADDSAT [[UV]], [[UV4]]
+    ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s16) = G_UADDSAT [[UV1]], [[UV5]]
+    ; GFX9: [[UADDSAT2:%[0-9]+]]:_(s16) = G_UADDSAT [[UV2]], [[UV6]]
+    ; GFX9: [[UADDSAT3:%[0-9]+]]:_(s16) = G_UADDSAT [[UV3]], [[UV7]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[UADDSAT]](s16), [[UADDSAT1]](s16), [[UADDSAT2]](s16), [[UADDSAT3]](s16)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s16>) = G_UADDSAT %0, %1
@@ -350,18 +431,30 @@ body: |
     ; GFX6-LABEL: name: uaddsat_v2s32
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[UADDSAT]](<2 x s32>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX8-LABEL: name: uaddsat_v2s32
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[UADDSAT]](<2 x s32>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: uaddsat_v2s32
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s32>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[UADDSAT]](<2 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s32) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s32) = G_UADDSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[UADDSAT]](s32), [[UADDSAT1]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = G_UADDSAT %0, %1
@@ -404,18 +497,30 @@ body: |
     ; GFX6-LABEL: name: uaddsat_v2s64
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX6: [[UADDSAT:%[0-9]+]]:_(<2 x s64>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UADDSAT]](<2 x s64>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX6: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX6: [[UADDSAT1:%[0-9]+]]:_(s64) = G_UADDSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UADDSAT]](s64), [[UADDSAT1]](s64)
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX8-LABEL: name: uaddsat_v2s64
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX8: [[UADDSAT:%[0-9]+]]:_(<2 x s64>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UADDSAT]](<2 x s64>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX8: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX8: [[UADDSAT1:%[0-9]+]]:_(s64) = G_UADDSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UADDSAT]](s64), [[UADDSAT1]](s64)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: uaddsat_v2s64
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX9: [[UADDSAT:%[0-9]+]]:_(<2 x s64>) = G_UADDSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[UADDSAT]](<2 x s64>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX9: [[UADDSAT:%[0-9]+]]:_(s64) = G_UADDSAT [[UV]], [[UV2]]
+    ; GFX9: [[UADDSAT1:%[0-9]+]]:_(s64) = G_UADDSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[UADDSAT]](s64), [[UADDSAT1]](s64)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_UADDSAT %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
index f0ea03d3d71f5..56a1f1baded55 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-usubsat.mir
@@ -142,11 +142,25 @@ body: |
     ; GFX6: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX6: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<2 x s8>) = G_USUBSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s8>)
-    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX6: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX6: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX6: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX6: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]]
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[C]](s16)
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[USUBSAT1]](s16)
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s32) = G_LSHR [[ZEXT1]], [[ZEXT]](s32)
+    ; GFX6: [[TRUNC2:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR]](s32)
+    ; GFX6: [[TRUNC3:%[0-9]+]]:_(s8) = G_TRUNC [[TRUNC2]](s16)
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[USUBSAT]](s8), [[TRUNC3]](s8)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX6: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX6: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX8-LABEL: name: usubsat_v2s8
     ; GFX8: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -154,11 +168,22 @@ body: |
     ; GFX8: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX8: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<2 x s8>) = G_USUBSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s8>)
-    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX8: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX8: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX8: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]]
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C]](s16)
+    ; GFX8: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[USUBSAT]](s8), [[TRUNC2]](s8)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX8: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX8: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT2]](s32)
     ; GFX9-LABEL: name: usubsat_v2s8
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
@@ -166,11 +191,22 @@ body: |
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX9: [[BITCAST:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC]](s16)
     ; GFX9: [[BITCAST1:%[0-9]+]]:_(<2 x s8>) = G_BITCAST [[TRUNC1]](s16)
-    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s8>) = G_USUBSAT [[BITCAST]], [[BITCAST1]]
-    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[USUBSAT]](<2 x s8>)
-    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV]](s8), [[UV1]](s8)
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
-    ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX9: [[UV:%[0-9]+]]:_(s8), [[UV1:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST]](<2 x s8>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s8), [[UV3:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BITCAST1]](<2 x s8>)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s8) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s16) = G_ANYEXT [[UV1]](s8)
+    ; GFX9: [[ANYEXT1:%[0-9]+]]:_(s16) = G_ANYEXT [[UV3]](s8)
+    ; GFX9: [[C:%[0-9]+]]:_(s16) = G_CONSTANT i16 8
+    ; GFX9: [[SHL:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT]], [[C]](s16)
+    ; GFX9: [[SHL1:%[0-9]+]]:_(s16) = G_SHL [[ANYEXT1]], [[C]](s16)
+    ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[SHL]], [[SHL1]]
+    ; GFX9: [[LSHR:%[0-9]+]]:_(s16) = G_LSHR [[USUBSAT1]], [[C]](s16)
+    ; GFX9: [[TRUNC2:%[0-9]+]]:_(s8) = G_TRUNC [[LSHR]](s16)
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s8>) = G_BUILD_VECTOR [[USUBSAT]](s8), [[TRUNC2]](s8)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s8), [[UV5:%[0-9]+]]:_(s8) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<2 x s8>)
+    ; GFX9: [[MV:%[0-9]+]]:_(s16) = G_MERGE_VALUES [[UV4]](s8), [[UV5]](s8)
+    ; GFX9: [[ANYEXT2:%[0-9]+]]:_(s32) = G_ANYEXT [[MV]](s16)
+    ; GFX9: $vgpr0 = COPY [[ANYEXT2]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
@@ -231,18 +267,30 @@ body: |
     ; GFX6-LABEL: name: usubsat_v2s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0 = COPY [[USUBSAT]](<2 x s16>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16)
+    ; GFX6: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     ; GFX8-LABEL: name: usubsat_v2s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0 = COPY [[USUBSAT]](<2 x s16>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16)
+    ; GFX8: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     ; GFX9-LABEL: name: usubsat_v2s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
-    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0 = COPY [[USUBSAT]](<2 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<2 x s16>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<2 x s16>)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16)
+    ; GFX9: $vgpr0 = COPY [[BUILD_VECTOR]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
     %2:_(<2 x s16>) = G_USUBSAT %0, %1
@@ -258,26 +306,41 @@ body: |
     ; GFX6-LABEL: name: usubsat_v3s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX6: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<3 x s16>) = G_USUBSAT [[UV]], [[UV1]]
+    ; GFX6: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX6: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV5]]
+    ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV6]]
+    ; GFX6: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV4]], [[UV7]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16)
     ; GFX6: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX6: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX6: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX8-LABEL: name: usubsat_v3s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX8: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<3 x s16>) = G_USUBSAT [[UV]], [[UV1]]
+    ; GFX8: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX8: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV5]]
+    ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV6]]
+    ; GFX8: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV4]], [[UV7]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16)
     ; GFX8: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX8: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX8: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX8: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     ; GFX9-LABEL: name: usubsat_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     ; GFX9: [[UV:%[0-9]+]]:_(<3 x s16>), [[UV1:%[0-9]+]]:_(<3 x s16>) = G_UNMERGE_VALUES [[COPY]](<6 x s16>)
-    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<3 x s16>) = G_USUBSAT [[UV]], [[UV1]]
+    ; GFX9: [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16), [[UV4:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV]](<3 x s16>)
+    ; GFX9: [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[UV1]](<3 x s16>)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV5]]
+    ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV6]]
+    ; GFX9: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV4]], [[UV7]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<3 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16)
     ; GFX9: [[DEF:%[0-9]+]]:_(<4 x s16>) = G_IMPLICIT_DEF
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[DEF]](<4 x s16>), 0
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[USUBSAT]](<3 x s16>), [[EXTRACT]](<3 x s16>)
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<6 x s16>) = G_CONCAT_VECTORS [[BUILD_VECTOR]](<3 x s16>), [[EXTRACT]](<3 x s16>)
     ; GFX9: $vgpr0_vgpr1_vgpr2 = COPY [[CONCAT_VECTORS]](<6 x s16>)
     %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
     %1:_(<3 x s16>), %2:_(<3 x s16>) = G_UNMERGE_VALUES %0
@@ -296,18 +359,36 @@ body: |
     ; GFX6-LABEL: name: usubsat_v4s16
     ; GFX6: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<4 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[USUBSAT]](<4 x s16>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV4]]
+    ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV5]]
+    ; GFX6: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV6]]
+    ; GFX6: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV7]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16), [[USUBSAT3]](s16)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     ; GFX8-LABEL: name: usubsat_v4s16
     ; GFX8: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<4 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[USUBSAT]](<4 x s16>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV4]]
+    ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV5]]
+    ; GFX8: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV6]]
+    ; GFX8: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV7]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16), [[USUBSAT3]](s16)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     ; GFX9-LABEL: name: usubsat_v4s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<4 x s16>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[USUBSAT]](<4 x s16>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s16), [[UV1:%[0-9]+]]:_(s16), [[UV2:%[0-9]+]]:_(s16), [[UV3:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
+    ; GFX9: [[UV4:%[0-9]+]]:_(s16), [[UV5:%[0-9]+]]:_(s16), [[UV6:%[0-9]+]]:_(s16), [[UV7:%[0-9]+]]:_(s16) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s16) = G_USUBSAT [[UV]], [[UV4]]
+    ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s16) = G_USUBSAT [[UV1]], [[UV5]]
+    ; GFX9: [[USUBSAT2:%[0-9]+]]:_(s16) = G_USUBSAT [[UV2]], [[UV6]]
+    ; GFX9: [[USUBSAT3:%[0-9]+]]:_(s16) = G_USUBSAT [[UV3]], [[UV7]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<4 x s16>) = G_BUILD_VECTOR [[USUBSAT]](s16), [[USUBSAT1]](s16), [[USUBSAT2]](s16), [[USUBSAT3]](s16)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<4 x s16>) = G_USUBSAT %0, %1
@@ -350,18 +431,30 @@ body: |
     ; GFX6-LABEL: name: usubsat_v2s32
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1 = COPY [[USUBSAT]](<2 x s32>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32)
+    ; GFX6: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX8-LABEL: name: usubsat_v2s32
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1 = COPY [[USUBSAT]](<2 x s32>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32)
+    ; GFX8: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     ; GFX9-LABEL: name: usubsat_v2s32
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s32>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[USUBSAT]](<2 x s32>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s32) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s32) = G_USUBSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[USUBSAT]](s32), [[USUBSAT1]](s32)
+    ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
     %2:_(<2 x s32>) = G_USUBSAT %0, %1
@@ -404,18 +497,30 @@ body: |
     ; GFX6-LABEL: name: usubsat_v2s64
     ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX6: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX6: [[USUBSAT:%[0-9]+]]:_(<2 x s64>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[USUBSAT]](<2 x s64>)
+    ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX6: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX6: [[USUBSAT1:%[0-9]+]]:_(s64) = G_USUBSAT [[UV1]], [[UV3]]
+    ; GFX6: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[USUBSAT]](s64), [[USUBSAT1]](s64)
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX8-LABEL: name: usubsat_v2s64
     ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX8: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX8: [[USUBSAT:%[0-9]+]]:_(<2 x s64>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[USUBSAT]](<2 x s64>)
+    ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX8: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX8: [[USUBSAT1:%[0-9]+]]:_(s64) = G_USUBSAT [[UV1]], [[UV3]]
+    ; GFX8: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[USUBSAT]](s64), [[USUBSAT1]](s64)
+    ; GFX8: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     ; GFX9-LABEL: name: usubsat_v2s64
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
-    ; GFX9: [[USUBSAT:%[0-9]+]]:_(<2 x s64>) = G_USUBSAT [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[USUBSAT]](<2 x s64>)
+    ; GFX9: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX9: [[UV2:%[0-9]+]]:_(s64), [[UV3:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY1]](<2 x s64>)
+    ; GFX9: [[USUBSAT:%[0-9]+]]:_(s64) = G_USUBSAT [[UV]], [[UV2]]
+    ; GFX9: [[USUBSAT1:%[0-9]+]]:_(s64) = G_USUBSAT [[UV1]], [[UV3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s64>) = G_BUILD_VECTOR [[USUBSAT]](s64), [[USUBSAT1]](s64)
+    ; GFX9: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[BUILD_VECTOR]](<2 x s64>)
     %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
     %1:_(<2 x s64>) = COPY $vgpr4_vgpr5_vgpr6_vgpr7
     %2:_(<2 x s64>) = G_USUBSAT %0, %1

From 83aaa2085e5f44a287a3ac90532197a077c0160e Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sun, 12 Jul 2020 14:03:23 -0400
Subject: [PATCH 137/771] GlobalISel: Define equivalent nodes for saturating
 add/sub

---
 llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
index b8f03bcec16b8..150834e65b2dc 100644
--- a/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
+++ b/llvm/include/llvm/Target/GlobalISel/SelectionDAGCompat.td
@@ -67,6 +67,10 @@ def : GINodeEquiv<G_XOR, xor>;
 def : GINodeEquiv<G_SHL, shl>;
 def : GINodeEquiv<G_LSHR, srl>;
 def : GINodeEquiv<G_ASHR, sra>;
+def : GINodeEquiv<G_SADDSAT, saddsat>;
+def : GINodeEquiv<G_UADDSAT, uaddsat>;
+def : GINodeEquiv<G_SSUBSAT, ssubsat>;
+def : GINodeEquiv<G_USUBSAT, usubsat>;
 def : GINodeEquiv<G_SELECT, select>;
 def : GINodeEquiv<G_FNEG, fneg>;
 def : GINodeEquiv<G_FPEXT, fpextend>;

From 51b20152e66cc6f092675d36b8367320bda31955 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Mon, 13 Jul 2020 11:21:40 -0700
Subject: [PATCH 138/771] [JITLink] Apply MSVCPError workaround to
 a1fc26030a42.

Hopefully this will get the Windows bots building again.
---
 .../llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h        | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
index 3bb56f4d96158..0c8514a60a507 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h
@@ -17,6 +17,7 @@
 #include "llvm/ExecutionEngine/JITSymbol.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Memory.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
 
 #include <cstdint>
 #include <future>
@@ -78,7 +79,7 @@ class JITLinkMemoryManager {
 
     /// Calls finalizeAsync and waits for completion.
     Error finalize() {
-      std::promise<Error> FinalizeResultP;
+      std::promise<MSVCPError> FinalizeResultP;
       auto FinalizeResultF = FinalizeResultP.get_future();
       finalizeAsync(
           [&](Error Err) { FinalizeResultP.set_value(std::move(Err)); });

From 14f738b350147c18241ead3d62f4367d65ff38ad Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Mon, 13 Jul 2020 20:48:12 +0200
Subject: [PATCH 139/771] [NewGVN] Rename xfail tests (NFC)

Add an -xfail.ll suffix to tests marked XFAIL, so these files can
be split into passing and failing parts.
---
 ...007-07-31-NoDomInherit.ll => 2007-07-31-NoDomInherit-xfail.ll} | 0
 .../{2008-02-12-UndefLoad.ll => 2008-02-12-UndefLoad-xfail.ll}    | 0
 ...edScatterGather.ll => 2016-08-30-MaskedScatterGather-xfail.ll} | 0
 .../Transforms/NewGVN/{assume-equal.ll => assume-equal-xfail.ll}  | 0
 .../NewGVN/{calls-nonlocal.ll => calls-nonlocal-xfail.ll}         | 0
 llvm/test/Transforms/NewGVN/{cond_br2.ll => cond_br2-xfail.ll}    | 0
 llvm/test/Transforms/NewGVN/{debugloc.ll => debugloc-xfail.ll}    | 0
 llvm/test/Transforms/NewGVN/{fence.ll => fence-xfail.ll}          | 0
 .../NewGVN/{invariant.group.ll => invariant.group-xfail.ll}       | 0
 .../NewGVN/{nonescaping-malloc.ll => nonescaping-malloc-xfail.ll} | 0
 .../Transforms/NewGVN/{opt-remarks.ll => opt-remarks-xfail.ll}    | 0
 llvm/test/Transforms/NewGVN/{pr10820.ll => pr10820-xfail.ll}      | 0
 llvm/test/Transforms/NewGVN/{pr14166.ll => pr14166-xfail.ll}      | 0
 .../Transforms/NewGVN/{pre-new-inst.ll => pre-new-inst-xfail.ll}  | 0
 .../NewGVN/{rle-must-alias.ll => rle-must-alias-xfail.ll}         | 0
 .../{rle-no-phi-translate.ll => rle-no-phi-translate-xfail.ll}    | 0
 16 files changed, 0 insertions(+), 0 deletions(-)
 rename llvm/test/Transforms/NewGVN/{2007-07-31-NoDomInherit.ll => 2007-07-31-NoDomInherit-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{2008-02-12-UndefLoad.ll => 2008-02-12-UndefLoad-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{2016-08-30-MaskedScatterGather.ll => 2016-08-30-MaskedScatterGather-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{assume-equal.ll => assume-equal-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{calls-nonlocal.ll => calls-nonlocal-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{cond_br2.ll => cond_br2-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{debugloc.ll => debugloc-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{fence.ll => fence-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{invariant.group.ll => invariant.group-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{nonescaping-malloc.ll => nonescaping-malloc-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{opt-remarks.ll => opt-remarks-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{pr10820.ll => pr10820-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{pr14166.ll => pr14166-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{pre-new-inst.ll => pre-new-inst-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{rle-must-alias.ll => rle-must-alias-xfail.ll} (100%)
 rename llvm/test/Transforms/NewGVN/{rle-no-phi-translate.ll => rle-no-phi-translate-xfail.ll} (100%)

diff --git a/llvm/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll b/llvm/test/Transforms/NewGVN/2007-07-31-NoDomInherit-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/2007-07-31-NoDomInherit.ll
rename to llvm/test/Transforms/NewGVN/2007-07-31-NoDomInherit-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll b/llvm/test/Transforms/NewGVN/2008-02-12-UndefLoad-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/2008-02-12-UndefLoad.ll
rename to llvm/test/Transforms/NewGVN/2008-02-12-UndefLoad-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll b/llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather.ll
rename to llvm/test/Transforms/NewGVN/2016-08-30-MaskedScatterGather-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/assume-equal.ll b/llvm/test/Transforms/NewGVN/assume-equal-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/assume-equal.ll
rename to llvm/test/Transforms/NewGVN/assume-equal-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/calls-nonlocal.ll b/llvm/test/Transforms/NewGVN/calls-nonlocal-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/calls-nonlocal.ll
rename to llvm/test/Transforms/NewGVN/calls-nonlocal-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/cond_br2.ll b/llvm/test/Transforms/NewGVN/cond_br2-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/cond_br2.ll
rename to llvm/test/Transforms/NewGVN/cond_br2-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/debugloc.ll b/llvm/test/Transforms/NewGVN/debugloc-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/debugloc.ll
rename to llvm/test/Transforms/NewGVN/debugloc-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/fence.ll b/llvm/test/Transforms/NewGVN/fence-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/fence.ll
rename to llvm/test/Transforms/NewGVN/fence-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/invariant.group.ll b/llvm/test/Transforms/NewGVN/invariant.group-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/invariant.group.ll
rename to llvm/test/Transforms/NewGVN/invariant.group-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/nonescaping-malloc.ll b/llvm/test/Transforms/NewGVN/nonescaping-malloc-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/nonescaping-malloc.ll
rename to llvm/test/Transforms/NewGVN/nonescaping-malloc-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/opt-remarks.ll b/llvm/test/Transforms/NewGVN/opt-remarks-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/opt-remarks.ll
rename to llvm/test/Transforms/NewGVN/opt-remarks-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/pr10820.ll b/llvm/test/Transforms/NewGVN/pr10820-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/pr10820.ll
rename to llvm/test/Transforms/NewGVN/pr10820-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/pr14166.ll b/llvm/test/Transforms/NewGVN/pr14166-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/pr14166.ll
rename to llvm/test/Transforms/NewGVN/pr14166-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/pre-new-inst.ll b/llvm/test/Transforms/NewGVN/pre-new-inst-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/pre-new-inst.ll
rename to llvm/test/Transforms/NewGVN/pre-new-inst-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/rle-must-alias.ll b/llvm/test/Transforms/NewGVN/rle-must-alias-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/rle-must-alias.ll
rename to llvm/test/Transforms/NewGVN/rle-must-alias-xfail.ll
diff --git a/llvm/test/Transforms/NewGVN/rle-no-phi-translate.ll b/llvm/test/Transforms/NewGVN/rle-no-phi-translate-xfail.ll
similarity index 100%
rename from llvm/test/Transforms/NewGVN/rle-no-phi-translate.ll
rename to llvm/test/Transforms/NewGVN/rle-no-phi-translate-xfail.ll

From 4b626dd94944d60751af62d65a2692698520fcc2 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Mon, 13 Jul 2020 21:05:47 +0200
Subject: [PATCH 140/771] [NewGVN] Separate passing assume tests (NFC)

Result might not be exactly the same as under GVN, but all the
desired transforms are made.
---
 .../Transforms/NewGVN/assume-equal-xfail.ll   | 111 -------------
 llvm/test/Transforms/NewGVN/assume-equal.ll   | 151 ++++++++++++++++++
 2 files changed, 151 insertions(+), 111 deletions(-)
 create mode 100644 llvm/test/Transforms/NewGVN/assume-equal.ll

diff --git a/llvm/test/Transforms/NewGVN/assume-equal-xfail.ll b/llvm/test/Transforms/NewGVN/assume-equal-xfail.ll
index 7e009192064a7..7da17441d0787 100644
--- a/llvm/test/Transforms/NewGVN/assume-equal-xfail.ll
+++ b/llvm/test/Transforms/NewGVN/assume-equal-xfail.ll
@@ -154,117 +154,6 @@ entry:
   ret float %0
 }
 
-; CHECK-LABEL: define float @_Z1if(float %p)
-define float @_Z1if(float %p) {
-entry:
-  %p.addr = alloca float, align 4
-  store float %p, float* %p.addr, align 4
-  
-  %0 = load float, float* %p.addr, align 4
-  %cmp = fcmp ueq float %0, 3.000000e+00 ; no nnan flag - can't propagate
-  call void @llvm.assume(i1 %cmp)
-  
-  ; CHECK-NOT: ret float 3.000000e+00
-  ret float %0
-}
-
-; This test checks if constant propagation works for multiple node edges
-; CHECK-LABEL: define i32 @_Z1ii(i32 %p)
-define i32 @_Z1ii(i32 %p) {
-entry:
-  %cmp = icmp eq i32 %p, 42
-  call void @llvm.assume(i1 %cmp)
-  
-  ; CHECK: br i1 true, label %bb2, label %bb2
-  br i1 %cmp, label %bb2, label %bb2
-bb2:
-  call void @llvm.assume(i1 true)
-  ; CHECK: br i1 true, label %bb2, label %bb2
-  br i1 %cmp, label %bb2, label %bb2
-  
-  ; CHECK: ret i32 42
-  ret i32 %p
-}
-
-; CHECK-LABEL: define i32 @_Z1ij(i32 %p)
-define i32 @_Z1ij(i32 %p) {
-entry:
-  %cmp = icmp eq i32 %p, 42
-  call void @llvm.assume(i1 %cmp)
-  
-  ; CHECK: br i1 true, label %bb2, label %bb2
-  br i1 %cmp, label %bb2, label %bb2
-bb2:
-   ; CHECK-NOT: %cmp2 = 
-  %cmp2 = icmp eq i32 %p, 42
-  ; CHECK-NOT: call void @llvm.assume(
-  call void @llvm.assume(i1 %cmp2)
-  
-  ; CHECK: br i1 true, label %bb2, label %bb2
-  br i1 %cmp, label %bb2, label %bb2
-  
-  ; CHECK: ret i32 42
-  ret i32 %p
-}
-
-; CHECK-LABEL: define i32 @_Z1ik(i32 %p)
-define i32 @_Z1ik(i32 %p) {
-entry:
-  %cmp = icmp eq i32 %p, 42
-  call void @llvm.assume(i1 %cmp)
-  
-  ; CHECK: br i1 true, label %bb2, label %bb3
-  br i1 %cmp, label %bb2, label %bb3
-bb2:
-  ; CHECK-NOT: %cmp3 = 
-  %cmp3 = icmp eq i32 %p, 43
-  ; CHECK: store i8 undef, i8* null
-  call void @llvm.assume(i1 %cmp3)
-  ret i32 15
-bb3:
-  ret i32 17
-}
-
-; This test checks if GVN can do the constant propagation correctly
-; when there are multiple uses of the same assume value in the 
-; basic block that has a loop back-edge pointing to itself.
-;
-; CHECK-LABEL: define i32 @_Z1il(i32 %val, i1 %k)
-define i32 @_Z1il(i32 %val, i1 %k) {
-  br label %next
-
-next:
-; CHECK: tail call void @llvm.assume(i1 %k)
-; CHECK-NEXT: %cmp = icmp eq i32 %val, 50
-  tail call void @llvm.assume(i1 %k)
-  tail call void @llvm.assume(i1 %k)
-  %cmp = icmp eq i32 %val, 50
-  br i1 %cmp, label %next, label %meh
-
-meh:
-  ret i32 0 
-}
-
-; This test checks if GVN can prevent the constant propagation correctly
-; in the successor blocks that are not dominated by the basic block
-; with the assume instruction.
-;
-; CHECK-LABEL: define i1 @_z1im(i32 %val, i1 %k, i1 %j)
-define i1 @_z1im(i32 %val, i1 %k, i1 %j) {
-  br i1 %j, label %next, label %meh
-
-next:
-; CHECK: tail call void @llvm.assume(i1 %k)
-; CHECK-NEXT: br label %meh
-  tail call void @llvm.assume(i1 %k)
-  tail call void @llvm.assume(i1 %k)
-  br label %meh
-
-meh:
-; CHECK: ret i1 %k
-  ret i1 %k
-}
-
 declare noalias i8* @_Znwm(i64)
 declare void @_ZN1AC1Ev(%struct.A*)
 declare void @llvm.assume(i1)
diff --git a/llvm/test/Transforms/NewGVN/assume-equal.ll b/llvm/test/Transforms/NewGVN/assume-equal.ll
new file mode 100644
index 0000000000000..a20075665882a
--- /dev/null
+++ b/llvm/test/Transforms/NewGVN/assume-equal.ll
@@ -0,0 +1,151 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -newgvn -S | FileCheck %s
+
+define float @_Z1if(float %p) {
+; CHECK-LABEL: @_Z1if(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[P_ADDR:%.*]] = alloca float, align 4
+; CHECK-NEXT:    store float [[P:%.*]], float* [[P_ADDR]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = fcmp ueq float [[P]], 3.000000e+00
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    ret float [[P]]
+;
+entry:
+  %p.addr = alloca float, align 4
+  store float %p, float* %p.addr, align 4
+
+  %0 = load float, float* %p.addr, align 4
+  %cmp = fcmp ueq float %0, 3.000000e+00 ; no nnan flag - can't propagate
+  call void @llvm.assume(i1 %cmp)
+
+  ret float %0
+}
+
+; This test checks if constant propagation works for multiple node edges
+define i32 @_Z1ii(i32 %p) {
+; CHECK-LABEL: @_Z1ii(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[P:%.*]], 42
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    br i1 true, label [[BB2:%.*]], label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br i1 true, label [[BB2]], label [[BB2]]
+; CHECK:       0:
+; CHECK-NEXT:    store i8 undef, i8* null, align 1
+; CHECK-NEXT:    ret i32 [[P]]
+;
+entry:
+  %cmp = icmp eq i32 %p, 42
+  call void @llvm.assume(i1 %cmp)
+
+  br i1 %cmp, label %bb2, label %bb2
+bb2:
+  call void @llvm.assume(i1 true)
+  br i1 %cmp, label %bb2, label %bb2
+
+  ret i32 %p
+}
+
+define i32 @_Z1ij(i32 %p) {
+; CHECK-LABEL: @_Z1ij(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[P:%.*]], 42
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    br i1 true, label [[BB2:%.*]], label [[BB2]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    br i1 true, label [[TMP0:%.*]], label [[BB2]]
+; CHECK:       0:
+; CHECK-NEXT:    ret i32 42
+;
+entry:
+  %cmp = icmp eq i32 %p, 42
+  call void @llvm.assume(i1 %cmp)
+
+  br i1 %cmp, label %bb2, label %bb2
+bb2:
+  %cmp2 = icmp eq i32 %p, 42
+  call void @llvm.assume(i1 %cmp2)
+
+  br i1 %cmp, label %0, label %bb2
+
+  ret i32 %p
+}
+
+define i32 @_Z1ik(i32 %p) {
+; CHECK-LABEL: @_Z1ik(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[P:%.*]], 42
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    br i1 true, label [[BB2:%.*]], label [[BB3:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    ret i32 15
+; CHECK:       bb3:
+; CHECK-NEXT:    store i8 undef, i8* null, align 1
+; CHECK-NEXT:    ret i32 17
+;
+entry:
+  %cmp = icmp eq i32 %p, 42
+  call void @llvm.assume(i1 %cmp)
+
+  br i1 %cmp, label %bb2, label %bb3
+bb2:
+  %cmp3 = icmp eq i32 %p, 43
+  call void @llvm.assume(i1 %cmp3)
+  ret i32 15
+bb3:
+  ret i32 17
+}
+
+; This test checks if GVN can do the constant propagation correctly
+; when there are multiple uses of the same assume value in the
+; basic block that has a loop back-edge pointing to itself.
+define i32 @_Z1il(i32 %val, i1 %k) {
+; CHECK-LABEL: @_Z1il(
+; CHECK-NEXT:    br label [[NEXT:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[K:%.*]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[K]])
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[VAL:%.*]], 50
+; CHECK-NEXT:    br i1 [[CMP]], label [[NEXT]], label [[MEH:%.*]]
+; CHECK:       meh:
+; CHECK-NEXT:    ret i32 0
+;
+  br label %next
+
+next:
+  tail call void @llvm.assume(i1 %k)
+  tail call void @llvm.assume(i1 %k)
+  %cmp = icmp eq i32 %val, 50
+  br i1 %cmp, label %next, label %meh
+
+meh:
+  ret i32 0
+}
+
+; This test checks if GVN can prevent the constant propagation correctly
+; in the successor blocks that are not dominated by the basic block
+; with the assume instruction.
+define i1 @_z1im(i32 %val, i1 %k, i1 %j) {
+; CHECK-LABEL: @_z1im(
+; CHECK-NEXT:    br i1 [[J:%.*]], label [[NEXT:%.*]], label [[MEH:%.*]]
+; CHECK:       next:
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[K:%.*]])
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[K]])
+; CHECK-NEXT:    br label [[MEH]]
+; CHECK:       meh:
+; CHECK-NEXT:    ret i1 [[K]]
+;
+  br i1 %j, label %next, label %meh
+
+next:
+  tail call void @llvm.assume(i1 %k)
+  tail call void @llvm.assume(i1 %k)
+  br label %meh
+
+meh:
+  ret i1 %k
+}
+
+declare void @llvm.assume(i1)

From 353fa4403a06c2d86d617362b42e20ee6b3f53be Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 11 Jul 2020 22:48:26 +0200
Subject: [PATCH 141/771] [PredicateInfo] Place predicate info after assume

Place the ssa.copy instructions for assumes after the assume,
instead of before it. Both options are valid, but placing them
afterwards prevents assumes from being replaced with assume(true).
This fixes https://bugs.llvm.org/show_bug.cgi?id=37541 in NewGVN
and will avoid a similar issue in SCCP when we handle more
predicate infos.

Differential Revision: https://reviews.llvm.org/D83631
---
 llvm/lib/Transforms/Utils/PredicateInfo.cpp   | 10 +++++----
 llvm/test/Transforms/NewGVN/assume-equal.ll   | 10 ++++-----
 llvm/test/Transforms/NewGVN/assumes.ll        |  4 ++--
 .../Util/PredicateInfo/testandor.ll           | 22 +++++++++----------
 4 files changed, 24 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index c81efd77aa5ff..6ac2d64494e94 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -205,14 +205,14 @@ struct ValueDFS_Compare {
     // numbering will say the placed predicaeinfos should go first (IE
     // LN_beginning), so we won't be in this function. For assumes, we will end
     // up here, beause we need to order the def we will place relative to the
-    // assume.  So for the purpose of ordering, we pretend the def is the assume
-    // because that is where we will insert the info.
+    // assume.  So for the purpose of ordering, we pretend the def is right
+    // after the assume, because that is where we will insert the info.
     if (!VD.U) {
       assert(VD.PInfo &&
              "No def, no use, and no predicateinfo should not occur");
       assert(isa<PredicateAssume>(VD.PInfo) &&
              "Middle of block should only occur for assumes");
-      return cast<PredicateAssume>(VD.PInfo)->AssumeInst;
+      return cast<PredicateAssume>(VD.PInfo)->AssumeInst->getNextNode();
     }
     return nullptr;
   }
@@ -621,7 +621,9 @@ Value *PredicateInfoBuilder::materializeStack(unsigned int &Counter,
       auto *PAssume = dyn_cast<PredicateAssume>(ValInfo);
       assert(PAssume &&
              "Should not have gotten here without it being an assume");
-      IRBuilder<> B(PAssume->AssumeInst);
+      // Insert the predicate directly after the assume. While it also holds
+      // directly before it, assume(i1 true) is not a useful fact.
+      IRBuilder<> B(PAssume->AssumeInst->getNextNode());
       Function *IF = getCopyDeclaration(F.getParent(), Op->getType());
       if (IF->users().empty())
         PI.CreatedDeclarations.insert(IF);
diff --git a/llvm/test/Transforms/NewGVN/assume-equal.ll b/llvm/test/Transforms/NewGVN/assume-equal.ll
index a20075665882a..d67105fbf1a28 100644
--- a/llvm/test/Transforms/NewGVN/assume-equal.ll
+++ b/llvm/test/Transforms/NewGVN/assume-equal.ll
@@ -7,7 +7,7 @@ define float @_Z1if(float %p) {
 ; CHECK-NEXT:    [[P_ADDR:%.*]] = alloca float, align 4
 ; CHECK-NEXT:    store float [[P:%.*]], float* [[P_ADDR]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ueq float [[P]], 3.000000e+00
-; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    ret float [[P]]
 ;
 entry:
@@ -26,7 +26,7 @@ define i32 @_Z1ii(i32 %p) {
 ; CHECK-LABEL: @_Z1ii(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[P:%.*]], 42
-; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    br i1 true, label [[BB2:%.*]], label [[BB2]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    br i1 true, label [[BB2]], label [[BB2]]
@@ -50,7 +50,7 @@ define i32 @_Z1ij(i32 %p) {
 ; CHECK-LABEL: @_Z1ij(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[P:%.*]], 42
-; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    br i1 true, label [[BB2:%.*]], label [[BB2]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    call void @llvm.assume(i1 true)
@@ -76,10 +76,10 @@ define i32 @_Z1ik(i32 %p) {
 ; CHECK-LABEL: @_Z1ik(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[P:%.*]], 42
-; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    br i1 true, label [[BB2:%.*]], label [[BB3:%.*]]
 ; CHECK:       bb2:
-; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    call void @llvm.assume(i1 false)
 ; CHECK-NEXT:    ret i32 15
 ; CHECK:       bb3:
 ; CHECK-NEXT:    store i8 undef, i8* null, align 1
diff --git a/llvm/test/Transforms/NewGVN/assumes.ll b/llvm/test/Transforms/NewGVN/assumes.ll
index ea20b38bff6af..eee302a17ee46 100644
--- a/llvm/test/Transforms/NewGVN/assumes.ll
+++ b/llvm/test/Transforms/NewGVN/assumes.ll
@@ -4,7 +4,7 @@
 define i32 @test1(i32 %arg) {
 ; CHECK-LABEL: @test1(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[ARG:%.*]], 5
-; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    ret i32 [[ARG]]
 ;
   %cmp = icmp sge i32 %arg, 5
@@ -18,7 +18,7 @@ define i32 @test2(i32 %arg, i1 %b) {
 ; CHECK:       bb:
 ; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 1, [[TMP0:%.*]] ], [ 2, [[BB]] ]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[ARG:%.*]], [[A]]
-; CHECK-NEXT:    call void @llvm.assume(i1 true)
+; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
 ; CHECK-NEXT:    br i1 [[B:%.*]], label [[BB]], label [[END:%.*]]
 ; CHECK:       end:
 ; CHECK-NEXT:    ret i32 [[ARG]]
diff --git a/llvm/test/Transforms/Util/PredicateInfo/testandor.ll b/llvm/test/Transforms/Util/PredicateInfo/testandor.ll
index c84562cc5ef53..cd1491e31a8da 100644
--- a/llvm/test/Transforms/Util/PredicateInfo/testandor.ll
+++ b/llvm/test/Transforms/Util/PredicateInfo/testandor.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -print-predicateinfo < %s 2>&1 | FileCheck %s
+; RUN: opt -print-predicateinfo -disable-output < %s 2>&1 | FileCheck %s
 
 declare void @foo(i1)
 declare void @bar(i32)
@@ -136,18 +136,18 @@ define void @testandassume(i32 %x, i32 %y) {
 ; CHECK-NEXT:    [[XZ:%.*]] = icmp eq i32 [[X:%.*]], 0
 ; CHECK-NEXT:    [[YZ:%.*]] = icmp eq i32 [[Y:%.*]], 0
 ; CHECK-NEXT:    [[Z:%.*]] = and i1 [[XZ]], [[YZ]]
-; CHECK:         [[TMP1:%.*]] = call i1 @llvm.ssa.copy.{{.+}}(i1 [[XZ]])
-; CHECK:         [[TMP2:%.*]] = call i32 @llvm.ssa.copy.{{.+}}(i32 [[X]])
+; CHECK-NEXT:    call void @llvm.assume(i1 [[Z]])
+; CHECK:         [[TMP1:%.*]] = call i1 @llvm.ssa.copy.{{.+}}(i1 [[Z]])
+; CHECK:         [[TMP2:%.*]] = call i32 @llvm.ssa.copy.{{.+}}(i32 [[Y]])
 ; CHECK:         [[TMP3:%.*]] = call i1 @llvm.ssa.copy.{{.+}}(i1 [[YZ]])
-; CHECK:         [[TMP4:%.*]] = call i32 @llvm.ssa.copy.{{.+}}(i32 [[Y]])
-; CHECK:         [[TMP5:%.*]] = call i1 @llvm.ssa.copy.{{.+}}(i1 [[Z]])
-; CHECK-NEXT:    call void @llvm.assume(i1 [[TMP5]])
-; CHECK:         [[DOT0:%.*]] = call i1 @llvm.ssa.copy.{{.+}}(i1 [[TMP1]])
-; CHECK:         [[DOT01:%.*]] = call i32 @llvm.ssa.copy.{{.+}}(i32 [[TMP2]])
+; CHECK:         [[TMP4:%.*]] = call i32 @llvm.ssa.copy.{{.+}}(i32 [[X]])
+; CHECK:         [[TMP5:%.*]] = call i1 @llvm.ssa.copy.{{.+}}(i1 [[XZ]])
+; CHECK:         [[DOT0:%.*]] = call i1 @llvm.ssa.copy.{{.+}}(i1 [[TMP5]])
+; CHECK:         [[DOT01:%.*]] = call i32 @llvm.ssa.copy.{{.+}}(i32 [[TMP4]])
 ; CHECK:         [[DOT02:%.*]] = call i1 @llvm.ssa.copy.{{.+}}(i1 [[TMP3]])
-; CHECK:         [[DOT03:%.*]] = call i32 @llvm.ssa.copy.{{.+}}(i32 [[TMP4]])
-; CHECK:         [[DOT04:%.*]] = call i1 @llvm.ssa.copy.{{.+}}(i1 [[TMP5]])
-; CHECK-NEXT:    br i1 [[TMP5]], label [[BOTH:%.*]], label [[NOPE:%.*]]
+; CHECK:         [[DOT03:%.*]] = call i32 @llvm.ssa.copy.{{.+}}(i32 [[TMP2]])
+; CHECK:         [[DOT04:%.*]] = call i1 @llvm.ssa.copy.{{.+}}(i1 [[TMP1]])
+; CHECK-NEXT:    br i1 [[TMP1]], label [[BOTH:%.*]], label [[NOPE:%.*]]
 ; CHECK:       both:
 ; CHECK-NEXT:    call void @foo(i1 [[DOT0]])
 ; CHECK-NEXT:    call void @foo(i1 [[DOT02]])

From 3780d3eb1001fd25d4b4cf953ae621a0f3b30ee5 Mon Sep 17 00:00:00 2001
From: Tim Keith <tkeith@nvidia.com>
Date: Mon, 13 Jul 2020 12:19:17 -0700
Subject: [PATCH 142/771] [flang] Use octal escapes for character literals in
 modfiles

Character literals can be formatted using octal or hex escapes for
non-ascii characters. This is so that the program can be unparsed for
either pgf90 or gfortran to compile. But modfiles should not be affected
by that -- they should be consistent.

This changes causes modfiles to always have character literals formatted
with octal escapes.

Differential Revision: https://reviews.llvm.org/D83703
---
 flang/lib/Semantics/mod-file.cpp | 4 ++++
 flang/lib/Semantics/mod-file.h   | 2 +-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Semantics/mod-file.cpp b/flang/lib/Semantics/mod-file.cpp
index 2f813d95f26b0..6fa59f0a82a08 100644
--- a/flang/lib/Semantics/mod-file.cpp
+++ b/flang/lib/Semantics/mod-file.cpp
@@ -8,6 +8,7 @@
 
 #include "mod-file.h"
 #include "resolve-names.h"
+#include "flang/Common/restorer.h"
 #include "flang/Evaluate/tools.h"
 #include "flang/Parser/message.h"
 #include "flang/Parser/parsing.h"
@@ -99,6 +100,9 @@ class SubprogramSymbolCollector {
 };
 
 bool ModFileWriter::WriteAll() {
+  // this flag affects character literals: force it to be consistent
+  auto restorer{
+      common::ScopedSet(parser::useHexadecimalEscapeSequences, false)};
   WriteAll(context_.globalScope());
   return !context_.AnyFatalError();
 }
diff --git a/flang/lib/Semantics/mod-file.h b/flang/lib/Semantics/mod-file.h
index 8823c5f1e4972..17ffe804c5be3 100644
--- a/flang/lib/Semantics/mod-file.h
+++ b/flang/lib/Semantics/mod-file.h
@@ -32,7 +32,7 @@ class SemanticsContext;
 
 class ModFileWriter {
 public:
-  ModFileWriter(SemanticsContext &context) : context_{context} {}
+  explicit ModFileWriter(SemanticsContext &context) : context_{context} {}
   bool WriteAll();
 
 private:

From 11046ef69e3e9ec3ae9f5f4caadf965b7f1e22c8 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Mon, 13 Jul 2020 12:14:21 -0700
Subject: [PATCH 143/771] [llvm][NFC] Factored the default inlining advice

This is in preparation for the 'development' mode advisor. We currently
want to track what the default policy's decision would have been, this
refactoring makes it easier to do that.
---
 llvm/lib/Analysis/InlineAdvisor.cpp | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index 9a3e5fa0df722..74a536d1ce2f4 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -84,7 +84,9 @@ class DefaultInlineAdvice : public InlineAdvice {
 
 } // namespace
 
-std::unique_ptr<InlineAdvice> DefaultInlineAdvisor::getAdvice(CallBase &CB) {
+llvm::Optional<llvm::InlineCost>
+getDefaultInlineAdvice(CallBase &CB, FunctionAnalysisManager &FAM,
+                       const InlineParams &Params) {
   Function &Caller = *CB.getCaller();
   ProfileSummaryInfo *PSI =
       FAM.getResult<ModuleAnalysisManagerFunctionProxy>(Caller)
@@ -111,10 +113,16 @@ std::unique_ptr<InlineAdvice> DefaultInlineAdvisor::getAdvice(CallBase &CB) {
     return getInlineCost(CB, Params, CalleeTTI, GetAssumptionCache, GetTLI,
                          GetBFI, PSI, RemarksEnabled ? &ORE : nullptr);
   };
-  auto OIC = llvm::shouldInline(CB, GetInlineCost, ORE,
-                                Params.EnableDeferral.hasValue() &&
-                                    Params.EnableDeferral.getValue());
-  return std::make_unique<DefaultInlineAdvice>(this, CB, OIC, ORE);
+  return llvm::shouldInline(CB, GetInlineCost, ORE,
+                            Params.EnableDeferral.hasValue() &&
+                                Params.EnableDeferral.getValue());
+}
+
+std::unique_ptr<InlineAdvice> DefaultInlineAdvisor::getAdvice(CallBase &CB) {
+  auto OIC = getDefaultInlineAdvice(CB, FAM, Params);
+  return std::make_unique<DefaultInlineAdvice>(
+      this, CB, OIC,
+      FAM.getResult<OptimizationRemarkEmitterAnalysis>(*CB.getCaller()));
 }
 
 InlineAdvice::InlineAdvice(InlineAdvisor *Advisor, CallBase &CB,

From 31b5b121299c08f360b61ecf873fe3eee99c0499 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Mon, 13 Jul 2020 12:37:18 -0700
Subject: [PATCH 144/771] [ORC] Remove a spurious reinterpret_cast.

---
 llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
index 5f89d91ef9948..18b3c5e12b1c2 100644
--- a/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/OrcABISupport.cpp
@@ -461,9 +461,7 @@ void OrcI386::writeTrampolines(char *TrampolineWorkingMem,
   assert((ResolverAddr >> 32) == 0 && "ResolverAddr out of range");
 
   uint64_t CallRelImm = 0xF1C4C400000000e8;
-  uint64_t ResolverRel =
-      ResolverAddr - reinterpret_cast<uint64_t>(TrampolineBlockTargetAddress) -
-      5;
+  uint64_t ResolverRel = ResolverAddr - TrampolineBlockTargetAddress - 5;
 
   uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineWorkingMem);
   for (unsigned I = 0; I < NumTrampolines; ++I, ResolverRel -= TrampolineSize)

From 340c376b87c72e7eb3670301e4920106615b6689 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Mon, 13 Jul 2020 22:47:31 +0300
Subject: [PATCH 145/771] [lldb] Fix a CMake warning typo. NFC.

---
 lldb/tools/debugserver/source/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/tools/debugserver/source/CMakeLists.txt b/lldb/tools/debugserver/source/CMakeLists.txt
index 9a7e2eb9a1a0f..b29b3ddc3056c 100644
--- a/lldb/tools/debugserver/source/CMakeLists.txt
+++ b/lldb/tools/debugserver/source/CMakeLists.txt
@@ -41,7 +41,7 @@ function(get_debugserver_codesign_identity result)
     return()
   endif()
 
-  message(WARNING "Development code sign identiy not found: 'lldb_codesign' ${not_found_help}")
+  message(WARNING "Development code sign identity not found: 'lldb_codesign' ${not_found_help}")
 
   # LLVM pendant: fallback if available
   if(LLVM_CODESIGNING_IDENTITY)

From 341ec564182161861ec4415cdee1f4f3a0527e97 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Mon, 13 Jul 2020 13:09:53 -0700
Subject: [PATCH 146/771] Add a decorator to skip tests when running under
 Rosetta

This allows skipping a test when running the testsuite on macOS under
the Rosetta translation layer.

Differential Revision: https://reviews.llvm.org/D83600
---
 lldb/packages/Python/lldbsuite/test/decorators.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py
index ec084184cd656..be282f6db32c6 100644
--- a/lldb/packages/Python/lldbsuite/test/decorators.py
+++ b/lldb/packages/Python/lldbsuite/test/decorators.py
@@ -552,6 +552,14 @@ def are_sb_headers_missing():
     return skipTestIfFn(are_sb_headers_missing)(func)
 
 
+def skipIfRosetta(func, bugnumber=None):
+    """Skip a test when running the testsuite on macOS under the Rosetta translation layer."""
+    def is_running_rosetta(self):
+        if not lldbplatformutil.getPlatform() in ['darwin', 'macosx']:
+            return False
+        return platform.uname()[5] == "arm" and self.getArchitecture() == "x86_64"
+    return skipTestIfFn(is_running_rosetta, bugnumber)(func)
+
 def skipIfiOSSimulator(func):
     """Decorate the item to skip tests that should be skipped on the iOS Simulator."""
     def is_ios_simulator():

From 9908a3b9f521c954cbf6adcec35b14b2f6c8da49 Mon Sep 17 00:00:00 2001
From: Davide Italiano <ditaliano@apple.com>
Date: Mon, 13 Jul 2020 12:59:16 -0700
Subject: [PATCH 147/771] Revert "[llvm] Native size estimator for training -Oz
 inliner"

This reverts commit 83080a294ad7d145d758821bcf4354ad0cb7d299 as
it breaks the macOS modules build.
---
 llvm/CMakeLists.txt                           |    12 -
 .../Analysis/InlineSizeEstimatorAnalysis.h    |    35 -
 llvm/include/llvm/Analysis/Utils/TFUtils.h    |   136 -
 llvm/lib/Analysis/CMakeLists.txt              |    40 +-
 .../Analysis/InlineSizeEstimatorAnalysis.cpp  |   299 -
 llvm/lib/Analysis/TFUtils.cpp                 |   143 -
 llvm/lib/Passes/PassBuilder.cpp               |     1 -
 llvm/lib/Passes/PassRegistry.def              |     1 -
 llvm/unittests/Analysis/CMakeLists.txt        |    12 +-
 .../InlineSizeEstimatorAnalysisTest.cpp       |   101 -
 .../ir2native_x86_64_model/saved_model.pbtxt  | 10596 ----------------
 .../variables/variables.data-00000-of-00001   |   Bin 88424 -> 0 bytes
 .../variables/variables.index                 |   Bin 398 -> 0 bytes
 llvm/unittests/Analysis/TFUtilsTest.cpp       |    98 -
 14 files changed, 10 insertions(+), 11464 deletions(-)
 delete mode 100644 llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
 delete mode 100644 llvm/include/llvm/Analysis/Utils/TFUtils.h
 delete mode 100644 llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
 delete mode 100644 llvm/lib/Analysis/TFUtils.cpp
 delete mode 100644 llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
 delete mode 100644 llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
 delete mode 100644 llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001
 delete mode 100644 llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index
 delete mode 100644 llvm/unittests/Analysis/TFUtilsTest.cpp

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 4e14e61fcacd6..de2887b64c2a9 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -981,18 +981,6 @@ if (NOT TENSORFLOW_AOT_PATH STREQUAL "")
     ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/tf_runtime)
 endif()
 
-set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install")
-find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib)
-
-# Similar to the above Tensorflow dependency, please refer to the same script.
-# In this case, the latest C API library is available for download from
-# https://www.tensorflow.org/install/lang_c
-if (tensorflow_c_api)
-  set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available")
-  add_definitions("-DLLVM_HAVE_TF_API")
-  include_directories(${TENSORFLOW_C_LIB_PATH}/include)
-endif()
-
 # Put this before tblgen. Else we have a circular dependence.
 add_subdirectory(lib/Demangle)
 add_subdirectory(lib/Support)
diff --git a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
deleted file mode 100644
index 29a6f59146748..0000000000000
--- a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
+++ /dev/null
@@ -1,35 +0,0 @@
-//===- InlineSizeEstimatorAnalysis.h - ML size estimator --------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-
-#ifndef LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
-#define LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
-
-#include "llvm/IR/PassManager.h"
-
-namespace llvm {
-class Function;
-
-class TFModelEvaluator;
-class InlineSizeEstimatorAnalysis
-    : public AnalysisInfoMixin<InlineSizeEstimatorAnalysis> {
-public:
-  InlineSizeEstimatorAnalysis();
-  InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis &&);
-  ~InlineSizeEstimatorAnalysis();
-
-  static AnalysisKey Key;
-  using Result = Optional<size_t>;
-  Result run(const Function &F, FunctionAnalysisManager &FAM);
-  static bool isEvaluatorRequested();
-
-private:
-  std::unique_ptr<TFModelEvaluator> Evaluator;
-};
-} // namespace llvm
-#endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
\ No newline at end of file
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
deleted file mode 100644
index a1d7108b149ff..0000000000000
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ /dev/null
@@ -1,136 +0,0 @@
-//===- TFUtils.h - utilities for tensorflow C API ---------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-#ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H
-#define LLVM_ANALYSIS_UTILS_TFUTILS_H
-
-#include "tensorflow/c/c_api.h"
-#include "llvm/IR/LLVMContext.h"
-
-#include <memory>
-#include <vector>
-
-namespace llvm {
-
-/// Load a SavedModel, find the given inputs and outputs, and setup storage
-/// for input tensors. The user is responsible for correctly dimensioning the
-/// input tensors and setting their values before calling evaluate().
-/// To initialize:
-/// - construct the object
-/// - initialize the input tensors using initInput. Indices must correspond to
-///   indices in the InputNames used at construction.
-/// To use:
-/// - set input values by using getInput to get each input tensor, and then
-///   setting internal scalars, for all dimensions (tensors are row-major:
-///   https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/c/c_api.h#L205)
-/// - prepare an output vector of TF_Output* type, with the correct number of
-/// outputs (i.e. same as OutputNames). Initialize the vector with nullptr
-/// values.
-/// - call evaluate. The input tensors' values are not consumed after this, and
-///   may still be read.
-/// - use the outputs in the output vector
-/// - deallocate each output tensor in the output vector, using TF_DeleteTensor.
-class TFModelEvaluator final {
-public:
-  /// The result of a model evaluation. Handles the lifetime of the output
-  /// TF_Tensor objects, which means that their values need to be used before
-  /// the EvaluationResult's dtor is called.
-  class EvaluationResult {
-  public:
-    ~EvaluationResult() {
-      for (auto *P : Output)
-        if (P)
-          TF_DeleteTensor(P);
-    }
-
-    EvaluationResult(const EvaluationResult &) = delete;
-    EvaluationResult(EvaluationResult &&Other)
-        : OutputSize(Other.OutputSize), Output(std::move(Other.Output)) {
-      Other.Output.clear();
-    };
-
-    /// Get a pointer to the first element of the tensor at Index.
-    template <typename T> T *getTensorValue(size_t Index) {
-      return static_cast<T *>(TF_TensorData(Output[Index]));
-    }
-
-  private:
-    friend class TFModelEvaluator;
-    EvaluationResult(size_t OutputSize)
-        : OutputSize(OutputSize), Output(OutputSize){};
-
-    const size_t OutputSize;
-    std::vector<TF_Tensor *> Output;
-  };
-
-  using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
-  using TFSessionOptionsPtr =
-      std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
-  using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
-
-  TFModelEvaluator(StringRef SavedModelPath,
-                   const std::vector<std::string> &InputNames,
-                   const std::vector<std::string> &OutputNames,
-                   const char *Tags = "serve");
-  ~TFModelEvaluator();
-  TFModelEvaluator(const TFModelEvaluator &) = delete;
-  TFModelEvaluator(TFModelEvaluator &&) = delete;
-
-  /// Evaluate the model, assuming it is valid. Returns None if the evaluation
-  /// fails or the model is invalid, or an EvaluationResult otherwise. The
-  /// inputs are assumed to have been already provided via getInput(). When
-  /// returning None, it also marks the object invalid. Pass an Output vector
-  /// with the same size as OutputNames, but with nullptr values. evaluate()
-  /// will populate it with tensors, matching in index the corresponding
-  /// OutputNames. The caller is responsible for the deallocation of those
-  /// tensors, using TF_DeleteTensor.
-  Optional<EvaluationResult> evaluate();
-
-  /// Provides access to the input vector. It is already dimensioned correctly,
-  /// but the values need to be allocated by the user.
-  std::vector<TF_Tensor *> &getInput() { return Input; }
-
-  /// Returns true if the tensorflow model was loaded successfully, false
-  /// otherwise.
-  bool isValid() const { return !!Session; }
-
-  /// Initialize the input at Index as a tensor of the given type and dimensions
-  void initInput(int Index, TF_DataType Type,
-                 const std::vector<int64_t> &Dimensions);
-
-private:
-  /// The objects necessary for carrying out an evaluation of the SavedModel.
-  /// They are expensive to set up, and we maintain them accross all the
-  /// evaluations of the model.
-  TF_Session *Session = nullptr;
-  TFGraphPtr Graph;
-  TFSessionOptionsPtr Options;
-
-  /// The specification of the input nodes.
-  std::vector<TF_Output> InputFeed;
-
-  /// The input tensors. They must match by index of the corresponding InputFeed
-  /// value. We set up the tensors once and just mutate theirs scalars before
-  /// each evaluation. The input tensors keep their value after an evaluation.
-  std::vector<TF_Tensor *> Input;
-
-  /// The specification of the output nodes. When evaluating, the tensors in the
-  /// output tensor vector must match by index the corresponding element in the
-  /// OutputFeed.
-  std::vector<TF_Output> OutputFeed;
-
-  /// Reusable utility for deleting the session.
-  void deleteSession();
-
-  /// Reusable utility for ensuring we can bind the requested Name to a node in
-  /// the SavedModel Graph.
-  bool checkReportAndReset(const TF_Output &Output, StringRef Name);
-};
-} // namespace llvm
-
-#endif // LLVM_ANALYSIS_UTILS_TFUTILS_H
\ No newline at end of file
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 703623396d96a..a317579ecc836 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -1,35 +1,17 @@
 set(CommonMLSources MLInlineAdvisor.cpp)
 set(ReleaseModeMLSources ReleaseModeModelRunner.cpp)
-set(DevelopmentModeMLSources TFUtils.cpp)
 
-if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API)
-  set(MLPolicySources ${CommonMLSources})
-  if (DEFINED LLVM_HAVE_TF_AOT)
-    include(TensorFlowCompile)
-    tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
-    list(APPEND ReleaseModeMLSources
-      $<TARGET_OBJECTS:tf_xla_runtime_objects>
-      ${GENERATED_OBJS}
-    )
-    LIST(APPEND MLPolicySources ${ReleaseModeMLSources})
-  else()
-    LIST(APPEND LLVM_OPTIONAL_SOURCES ${ReleaseModeMLSources})
-  endif()
-
-  if (DEFINED LLVM_HAVE_TF_API)
-    LIST(APPEND MLPolicySources ${DevelopmentModeMLSources})
-    LIST(APPEND MLLinkDeps ${tensorflow_c_api})
-  else()
-    LIST(APPEND LLVM_OPTIONAL_SOURCES ${DevelopmentModeMLSources})
-  endif()
+if (DEFINED LLVM_HAVE_TF_AOT)
+  include(TensorFlowCompile)
+  tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
+  list(APPEND ReleaseModeMLSources
+    $<TARGET_OBJECTS:tf_xla_runtime_objects>
+    ${GENERATED_OBJS}
+  )
+  set(MLPolicySources ${CommonMLSources} ${ReleaseModeMLSources})
 else()
-  LIST(APPEND LLVM_OPTIONAL_SOURCES 
-    ${CommonMLSources}
-    ${DevelopmentModeMLSources}
-    ${ReleaseModeMLSources}
-    )
+  set(LLVM_OPTIONAL_SOURCES ${CommonMLSources} ${ReleaseModeMLSources})
 endif()
-  
 
 add_llvm_component_library(LLVMAnalysis
   AliasAnalysis.cpp
@@ -75,7 +57,6 @@ add_llvm_component_library(LLVMAnalysis
   InlineCost.cpp
   InlineAdvisor.cpp
   InlineFeaturesAnalysis.cpp
-  InlineSizeEstimatorAnalysis.cpp
   InstCount.cpp
   InstructionPrecedenceTracking.cpp
   InstructionSimplify.cpp
@@ -143,7 +124,4 @@ add_llvm_component_library(LLVMAnalysis
 
   DEPENDS
   intrinsics_gen
-
-  LINK_LIBS
-  ${MLLinkDeps}
   )
diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
deleted file mode 100644
index 1d1952ae6cbbe..0000000000000
--- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
+++ /dev/null
@@ -1,299 +0,0 @@
-//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This implements feature and label extraction for offline supervised learning
-// of a IR to native size model.
-//
-//===----------------------------------------------------------------------===//
-#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
-
-#ifdef LLVM_HAVE_TF_API
-#include "llvm/Analysis/Utils/TFUtils.h"
-#endif
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/BasicBlock.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Function.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/PassManager.h"
-#include "llvm/MC/MCAsmLayout.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include <algorithm>
-#include <deque>
-
-using namespace llvm;
-
-AnalysisKey InlineSizeEstimatorAnalysis::Key;
-
-#define DEBUG_TYPE "inline-size-estimator"
-
-#ifdef LLVM_HAVE_TF_API
-cl::opt<std::string> TFIR2NativeModelPath(
-    "ml-inliner-ir2native-model", cl::Hidden,
-    cl::desc("Path to saved model evaluating native size from IR."));
-
-namespace {
-unsigned getMaxInstructionID() {
-#define LAST_OTHER_INST(NR) return NR;
-#include "llvm/IR/Instruction.def"
-}
-
-class IRToNativeSizeLearning {
-public:
-  enum class NamedFeatureIndex : size_t {
-    InitialSize,
-    Blocks,
-    Calls,
-    IsLocal,
-    IsLinkOnceODR,
-    IsLinkOnce,
-    Loops,
-    MaxLoopDepth,
-    MaxDomTreeLevel,
-
-    NumNamedFeatures
-  };
-  static const size_t NumNamedFeatures =
-      static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
-  struct FunctionFeatures {
-    static std::vector<std::pair<size_t, size_t>>
-        ImportantInstructionSuccessions;
-    static const size_t FeatureCount;
-
-    std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
-    std::vector<int32_t> InstructionHistogram;
-    std::vector<int32_t> InstructionPairHistogram;
-
-    void fillTensor(int32_t *Ptr) const;
-    int32_t &operator[](NamedFeatureIndex Pos) {
-      return NamedFeatures[static_cast<size_t>(Pos)];
-    }
-  };
-  IRToNativeSizeLearning() = default;
-
-  static FunctionFeatures getFunctionFeatures(Function &F,
-                                              FunctionAnalysisManager &FAM);
-
-private:
-  /// Sort once the feature tuples.
-  struct SortFeatureTuples {
-    bool IsSorted = false;
-    SortFeatureTuples() {
-      std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(),
-                FunctionFeatures::ImportantInstructionSuccessions.end());
-      IsSorted = true;
-    }
-  };
-
-  static llvm::ManagedStatic<SortFeatureTuples> TupleSorter;
-
-  static bool ensureSortedTuples() { return TupleSorter->IsSorted; }
-};
-llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples>
-    IRToNativeSizeLearning::TupleSorter;
-
-// This is a point in time - we determined including these pairs of
-// consecutive instructions (in the IR layout available at inline time) as
-// features improves the model performance. We want to move away from manual
-// feature selection.
-// The vector is given in opcode pairs rather than labels because 1) labels
-// weren't readily available, and 2) the successions were hand - extracted
-std::vector<std::pair<size_t, size_t>>
-    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions =
-        {{1, 34},  {15, 27}, {53, 53}, {53, 34}, {1, 11},  {32, 2},  {2, 48},
-         {28, 48}, {1, 45},  {49, 32}, {57, 56}, {55, 53}, {1, 28},  {57, 34},
-         {1, 1},   {32, 28}, {32, 15}, {49, 28}, {53, 1},  {2, 53},  {48, 34},
-         {28, 53}, {2, 32},  {1, 40},  {32, 48}, {29, 56}, {56, 32}, {55, 56},
-         {48, 56}, {1, 31},  {33, 34}, {2, 28},  {1, 12},  {55, 1},  {31, 31},
-         {65, 1},  {33, 56}, {32, 32}, {13, 13}, {1, 26},  {13, 26}, {2, 1},
-         {1, 33},  {47, 49}, {64, 1},  {2, 38},  {34, 53}, {48, 2},  {55, 34},
-         {34, 32}, {1, 5},   {56, 13}, {2, 2},   {2, 49},  {33, 2},  {49, 39},
-         {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
-         {47, 15}, {13, 34}, {2, 33},  {32, 49}, {49, 34}, {56, 33}, {1, 30},
-         {33, 33}, {31, 33}, {2, 29},  {56, 7},  {32, 13}, {2, 55},  {56, 56},
-         {2, 34},  {1, 42},  {34, 49}, {1, 20},  {32, 33}, {1, 25},  {53, 28},
-         {1, 14},  {31, 49}, {28, 2},  {2, 13},  {2, 56},  {1, 32},  {56, 53},
-         {65, 65}, {33, 53}, {64, 64}, {13, 2},  {34, 33}, {1, 4},   {49, 2},
-         {1, 9},   {56, 1},  {33, 1},  {53, 57}, {32, 53}, {13, 56}, {32, 56},
-         {55, 55}, {1, 18},  {49, 56}, {34, 34}, {1, 7},   {56, 64}, {32, 1},
-         {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
-         {32, 40}, {1, 29},  {53, 2},  {34, 1},  {32, 34}, {49, 49}, {1, 24},
-         {40, 34}, {1, 13},  {38, 34}, {29, 2},  {34, 2},  {1, 39},  {1, 22},
-         {1, 27},  {49, 1},  {1, 8},   {56, 2}};
-
-// We have: 9 calculated features (the features here); 1 feature for each
-// instruction opcode; and 1 feature for each manually-identified sequence.
-// For the latter 2, we build a histogram: we count the number of
-// occurrences of each instruction opcode or succession of instructions,
-// respectively.
-// Note that instruction opcodes start from 1. For convenience, we also have an
-// always 0 feature for the '0' opcode, hence the extra 1.
-const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
-    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions
-        .size() +
-    getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures;
-
-size_t getSize(Function &F, TargetTransformInfo &TTI) {
-  size_t Ret = 0;
-  for (auto &BB : F)
-    for (auto &I : BB)
-      Ret += TTI.getInstructionCost(
-          &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
-  return Ret;
-}
-
-size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
-  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
-  return getSize(F, TTI);
-}
-
-unsigned getMaxDominatorTreeDepth(const Function &F,
-                                  const DominatorTree &Tree) {
-  unsigned Ret = 0;
-  for (auto &BB : F)
-    if (auto *TN = Tree.getNode(&BB))
-      Ret = std::max(Ret, TN->getLevel());
-  return Ret;
-}
-} // namespace
-
-IRToNativeSizeLearning::FunctionFeatures
-IRToNativeSizeLearning::getFunctionFeatures(Function &F,
-                                            FunctionAnalysisManager &FAM) {
-  assert(ensureSortedTuples() && "expected lazy initialization");
-
-  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
-  FunctionFeatures FF;
-  size_t InstrCount = getMaxInstructionID() + 1;
-  FF.InstructionHistogram.resize(InstrCount);
-
-  FF.InstructionPairHistogram.resize(
-      FunctionFeatures::ImportantInstructionSuccessions.size());
-
-  auto StartID = 0;
-  auto LastID = StartID;
-  auto getPairIndex = [](size_t a, size_t b) {
-    auto I =
-        std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(),
-                  FunctionFeatures::ImportantInstructionSuccessions.end(),
-                  std::make_pair(a, b));
-    if (I == FunctionFeatures::ImportantInstructionSuccessions.end())
-      return -1;
-    return static_cast<int>(std::distance(
-        FunctionFeatures::ImportantInstructionSuccessions.begin(), I));
-  };
-
-  // We don't want debug calls, because they'd just add noise.
-  for (auto &BB : F) {
-    for (auto I = BB.instructionsWithoutDebug().begin(),
-              E = BB.instructionsWithoutDebug().end();
-         I != E; ++I) {
-      auto ID = I->getOpcode();
-
-      ++FF.InstructionHistogram[ID];
-      int PairIndex = getPairIndex(LastID, ID);
-      if (PairIndex >= 0)
-        ++FF.InstructionPairHistogram[PairIndex];
-      LastID = ID;
-      if (isa<CallBase>(*I))
-        ++FF[NamedFeatureIndex::Calls];
-    }
-  }
-
-  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
-  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
-  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
-  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
-  FF[NamedFeatureIndex::Blocks] =
-      std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
-  auto &LI = FAM.getResult<LoopAnalysis>(F);
-  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
-  for (auto &L : LI)
-    FF[NamedFeatureIndex::MaxLoopDepth] =
-        std::max(FF[NamedFeatureIndex::MaxLoopDepth],
-                 static_cast<int32_t>(L->getLoopDepth()));
-  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
-  return FF;
-}
-
-void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
-  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
-  Ptr += NamedFeatures.size();
-  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
-  Ptr += InstructionHistogram.size();
-  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
-            Ptr);
-}
-
-bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
-  return !TFIR2NativeModelPath.empty();
-}
-
-InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
-  if (!isEvaluatorRequested()) {
-    return;
-  }
-  std::vector<std::string> InputNames{"serving_default_input_1"};
-  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
-  Evaluator = std::make_unique<TFModelEvaluator>(
-      TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
-  if (!Evaluator || !Evaluator->isValid()) {
-    Evaluator.reset();
-    return;
-  }
-  static const std::vector<int64_t> Dim{
-      1, static_cast<int64_t>(
-             IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};
-
-  Evaluator->initInput(0, TF_INT32, Dim);
-}
-
-InlineSizeEstimatorAnalysis::Result
-InlineSizeEstimatorAnalysis::run(const Function &F,
-                                 FunctionAnalysisManager &FAM) {
-  if (!Evaluator)
-    return None;
-  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
-      const_cast<Function &>(F), FAM);
-  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator->getInput()[0]));
-  Features.fillTensor(V);
-  auto ER = Evaluator->evaluate();
-  if (!ER)
-    return None;
-  float Ret = *ER->getTensorValue<float>(0);
-  if (Ret < 0.0)
-    Ret = 0.0;
-  return static_cast<size_t>(Ret);
-}
-
-InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
-InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
-    InlineSizeEstimatorAnalysis &&Other)
-    : Evaluator(std::move(Other.Evaluator)) {}
-
-#else
-namespace llvm {
-class TFModelEvaluator {};
-} // namespace llvm
-InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
-InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
-    InlineSizeEstimatorAnalysis &&) {}
-InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
-InlineSizeEstimatorAnalysis::Result
-InlineSizeEstimatorAnalysis::run(const Function &F,
-                                 FunctionAnalysisManager &FAM) {
-  return None;
-}
-bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
-#endif
\ No newline at end of file
diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp
deleted file mode 100644
index 6cd5b5c9b4eae..0000000000000
--- a/llvm/lib/Analysis/TFUtils.cpp
+++ /dev/null
@@ -1,143 +0,0 @@
-//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
-//
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements utilities for interfacing with tensorflow C APIs.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Utils/TFUtils.h"
-#include "llvm/ADT/Twine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/ManagedStatic.h"
-#include "llvm/Support/raw_ostream.h"
-
-#include "tensorflow/c/c_api_experimental.h"
-
-#include <cassert>
-
-using namespace llvm;
-
-namespace {
-
-struct TFInitializer {
-  TFInitializer() {
-    assert(!IsInitialized && "TFInitialized should be called only once");
-    int Argc = 1;
-    const char *Name = "";
-    const char **NamePtr = &Name;
-    TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
-    IsInitialized = true;
-  }
-  bool IsInitialized = false;
-};
-
-llvm::ManagedStatic<TFInitializer> TFLibInitializer;
-
-bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
-
-TFModelEvaluator::TFGraphPtr createTFGraph() {
-  return TFModelEvaluator::TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
-}
-
-TFModelEvaluator::TFStatusPtr createTFStatus() {
-  return TFModelEvaluator::TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
-}
-
-TFModelEvaluator::TFSessionOptionsPtr createTFSessionOptions() {
-  return TFModelEvaluator::TFSessionOptionsPtr(TF_NewSessionOptions(),
-                                               &TF_DeleteSessionOptions);
-}
-} // namespace
-
-TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
-                                   const std::vector<std::string> &InputNames,
-                                   const std::vector<std::string> &OutputNames,
-                                   const char *Tags)
-    : Graph(createTFGraph()), Options(createTFSessionOptions()),
-      InputFeed(InputNames.size()), Input(InputNames.size()),
-      OutputFeed(OutputNames.size()) {
-  if (!ensureInitTF()) {
-    errs() << "Tensorflow should have been initialized";
-    return;
-  }
-  auto Status = createTFStatus();
-
-  Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr,
-                                         SavedModelPath.str().c_str(), &Tags, 1,
-                                         Graph.get(), nullptr, Status.get());
-  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
-    errs() << TF_Message(Status.get());
-    deleteSession();
-  }
-  for (size_t I = 0; I < InputNames.size(); ++I) {
-    InputFeed[I] = {
-        TF_GraphOperationByName(Graph.get(), (InputNames[I]).c_str()), 0};
-    if (!checkReportAndReset(InputFeed[I], InputNames[I]))
-      return;
-  }
-  for (size_t I = 0; I < OutputNames.size(); ++I) {
-    OutputFeed[I] = {
-        TF_GraphOperationByName(Graph.get(), (OutputNames[I]).c_str()), 0};
-    if (!checkReportAndReset(OutputFeed[I], OutputNames[I]))
-      return;
-  }
-}
-
-TFModelEvaluator::~TFModelEvaluator() {
-  for (auto *T : Input) {
-    TF_DeleteTensor(T);
-  }
-  deleteSession();
-}
-
-bool TFModelEvaluator::checkReportAndReset(const TF_Output &Output,
-                                           StringRef Name) {
-  if (Output.oper)
-    return true;
-  errs() << "Could not find TF_Output named: " + Name;
-  deleteSession();
-  return false;
-}
-
-void TFModelEvaluator::deleteSession() {
-  if (Session == nullptr)
-    return;
-  auto Status = createTFStatus();
-  TF_DeleteSession(Session, Status.get());
-  Session = nullptr;
-  if (TF_GetCode(Status.get()) != TF_Code::TF_OK)
-    errs() << "Could not delete TF session";
-}
-
-Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() {
-  if (!isValid())
-    return None;
-  EvaluationResult Ret(OutputFeed.size());
-  auto Status = createTFStatus();
-  TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), Input.size(),
-                OutputFeed.data(), Ret.Output.data(), Ret.Output.size(),
-                nullptr, 0, nullptr, Status.get());
-  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
-    errs() << TF_Message(Status.get());
-    deleteSession();
-    return None;
-  }
-  return Ret;
-}
-
-void TFModelEvaluator::initInput(int Index, TF_DataType Type,
-                                 const std::vector<int64_t> &Dimensions) {
-  int64_t TotalSize = TF_DataTypeSize(Type);
-  for (auto &D : Dimensions)
-    TotalSize *= D;
-
-  Input[Index] =
-      TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
-  std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
-}
\ No newline at end of file
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 537d300fee557..53158e7aabab0 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -35,7 +35,6 @@
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/InlineAdvisor.h"
 #include "llvm/Analysis/InlineFeaturesAnalysis.h"
-#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index dfdfc3d05976a..eb2b740db5612 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -133,7 +133,6 @@ FUNCTION_ANALYSIS("loops", LoopAnalysis())
 FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
 FUNCTION_ANALYSIS("da", DependenceAnalysis())
 FUNCTION_ANALYSIS("inliner-features", InlineFeaturesAnalysis())
-FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis())
 FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis())
 FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis())
 FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis())
diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt
index 59ad444d32fb4..42f7dd3c06101 100644
--- a/llvm/unittests/Analysis/CMakeLists.txt
+++ b/llvm/unittests/Analysis/CMakeLists.txt
@@ -6,13 +6,7 @@ set(LLVM_LINK_COMPONENTS
   TransformUtils
   )
 
-if (DEFINED LLVM_HAVE_TF_API)
-  LIST(APPEND EXTRA_TESTS TFUtilsTest.cpp)
-else()
-  LIST(APPEND LLVM_OPTIONAL_SOURCES TFUtilsTest.cpp)
-endif()
-
-add_llvm_unittest_with_input_files(AnalysisTests
+add_llvm_unittest(AnalysisTests
   AliasAnalysisTest.cpp
   AliasSetTrackerTest.cpp
   AssumeBundleQueriesTest.cpp
@@ -28,7 +22,6 @@ add_llvm_unittest_with_input_files(AnalysisTests
   DomTreeUpdaterTest.cpp
   GlobalsModRefTest.cpp
   InlineFeaturesAnalysisTest.cpp
-  InlineSizeEstimatorAnalysisTest.cpp
   IVDescriptorsTest.cpp
   LazyCallGraphTest.cpp
   LoadsTest.cpp
@@ -47,7 +40,4 @@ add_llvm_unittest_with_input_files(AnalysisTests
   ValueLatticeTest.cpp
   ValueTrackingTest.cpp
   VectorUtilsTest.cpp
-  ${EXTRA_TESTS}
   )
-
- target_link_libraries(AnalysisTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp b/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
deleted file mode 100644
index 377590be016ac..0000000000000
--- a/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- InlineSizeEstimatorAnalysisTest.cpp - test for ir2native -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/AsmParser/Parser.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Testing/Support/SupportHelpers.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-extern const char *TestMainArgv0;
-extern cl::opt<std::string> TFIR2NativeModelPath;
-
-#if LLVM_HAVE_TF_API
-static std::string getModelPath() {
-  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
-  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
-  return std::string(InputsDir);
-}
-#endif
-
-static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
-  SMDiagnostic Err;
-  std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
-  if (!Mod)
-    Err.print("MLAnalysisTests", errs());
-  return Mod;
-}
-
-static FunctionAnalysisManager buildFAM() {
-  FunctionAnalysisManager FAM;
-  FAM.registerPass([&] { return DominatorTreeAnalysis(); });
-  FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
-  FAM.registerPass([&] { return TargetIRAnalysis(); });
-  FAM.registerPass([&] { return LoopAnalysis(); });
-  return FAM;
-}
-
-// Test model loading and evaluation.
-TEST(InlineSizeEstimatorAnalysis, SizeIsValidTest) {
-  LLVMContext C;
-  std::unique_ptr<Module> M = parseIR(C,
-                                      R"IR(
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-declare i32 @f1(i32)
-declare i32 @f2(i32)
-
-define i32 @branches(i32) {
-  %cond = icmp slt i32 %0, 3
-  br i1 %cond, label %then, label %else
-
-then:
-  %ret.1 = call i32 @f1(i32 %0)
-  br label %last.block
-
-else:
-  %ret.2 = call i32 @f2(i32 %0)
-  br label %last.block
-
-last.block:
-  %ret = phi i32 [%ret.1, %then], [%ret.2, %else]
-  ret i32 %ret
-}
-
-define internal i32 @top() {
-  %1 = call i32 @branches(i32 2)
-  %2 = call i32 @f1(i32 %1)
-  ret i32 %2
-}
-)IR");
-
-  FunctionAnalysisManager FAM = buildFAM();
-#if LLVM_HAVE_TF_API
-  TFIR2NativeModelPath = getModelPath();
-#endif
-
-  InlineSizeEstimatorAnalysis FA;
-  auto SizeEstimate = FA.run(*M->getFunction("branches"), FAM);
-#if LLVM_HAVE_TF_API
-  EXPECT_GT(*SizeEstimate, 0);
-#else
-  EXPECT_FALSE(SizeEstimate.hasValue());
-#endif
-}
diff --git a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
deleted file mode 100644
index 6efdad51083d3..0000000000000
--- a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
+++ /dev/null
@@ -1,10596 +0,0 @@
-saved_model_schema_version: 1
-meta_graphs {
-  meta_info_def {
-    stripped_op_list {
-      op {
-        name: "Const"
-        output_arg {
-          name: "output"
-          type_attr: "dtype"
-        }
-        attr {
-          name: "value"
-          type: "tensor"
-        }
-        attr {
-          name: "dtype"
-          type: "type"
-        }
-      }
-      op {
-        name: "NoOp"
-      }
-      op {
-        name: "Placeholder"
-        output_arg {
-          name: "output"
-          type_attr: "dtype"
-        }
-        attr {
-          name: "dtype"
-          type: "type"
-        }
-        attr {
-          name: "shape"
-          type: "shape"
-          default_value {
-            shape {
-              unknown_rank: true
-            }
-          }
-        }
-      }
-      op {
-        name: "ReadVariableOp"
-        input_arg {
-          name: "resource"
-          type: DT_RESOURCE
-        }
-        output_arg {
-          name: "value"
-          type_attr: "dtype"
-        }
-        attr {
-          name: "dtype"
-          type: "type"
-        }
-        is_stateful: true
-      }
-      op {
-        name: "StatefulPartitionedCall"
-        input_arg {
-          name: "args"
-          type_list_attr: "Tin"
-        }
-        output_arg {
-          name: "output"
-          type_list_attr: "Tout"
-        }
-        attr {
-          name: "Tin"
-          type: "list(type)"
-          has_minimum: true
-        }
-        attr {
-          name: "Tout"
-          type: "list(type)"
-          has_minimum: true
-        }
-        attr {
-          name: "f"
-          type: "func"
-        }
-        attr {
-          name: "config"
-          type: "string"
-          default_value {
-            s: ""
-          }
-        }
-        attr {
-          name: "config_proto"
-          type: "string"
-          default_value {
-            s: ""
-          }
-        }
-        attr {
-          name: "executor_type"
-          type: "string"
-          default_value {
-            s: ""
-          }
-        }
-        is_stateful: true
-      }
-      op {
-        name: "VarHandleOp"
-        output_arg {
-          name: "resource"
-          type: DT_RESOURCE
-        }
-        attr {
-          name: "container"
-          type: "string"
-          default_value {
-            s: ""
-          }
-        }
-        attr {
-          name: "shared_name"
-          type: "string"
-          default_value {
-            s: ""
-          }
-        }
-        attr {
-          name: "dtype"
-          type: "type"
-        }
-        attr {
-          name: "shape"
-          type: "shape"
-        }
-        is_stateful: true
-      }
-    }
-    tags: "serve"
-    tensorflow_version: "1.15.0"
-    tensorflow_git_version: "unknown"
-    stripped_default_attrs: true
-  }
-  graph_def {
-    node {
-      name: "dense/kernel"
-      op: "VarHandleOp"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "shape"
-        value {
-          shape {
-            dim {
-              size: 214
-            }
-            dim {
-              size: 100
-            }
-          }
-        }
-      }
-      attr {
-        key: "shared_name"
-        value {
-          s: "dense/kernel"
-        }
-      }
-    }
-    node {
-      name: "dense/kernel/Read/ReadVariableOp"
-      op: "ReadVariableOp"
-      input: "dense/kernel"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-              dim {
-                size: 214
-              }
-              dim {
-                size: 100
-              }
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-    }
-    node {
-      name: "dense/bias"
-      op: "VarHandleOp"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "shape"
-        value {
-          shape {
-            dim {
-              size: 100
-            }
-          }
-        }
-      }
-      attr {
-        key: "shared_name"
-        value {
-          s: "dense/bias"
-        }
-      }
-    }
-    node {
-      name: "dense/bias/Read/ReadVariableOp"
-      op: "ReadVariableOp"
-      input: "dense/bias"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-              dim {
-                size: 100
-              }
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-    }
-    node {
-      name: "dense_1/kernel"
-      op: "VarHandleOp"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "shape"
-        value {
-          shape {
-            dim {
-              size: 100
-            }
-            dim {
-              size: 1
-            }
-          }
-        }
-      }
-      attr {
-        key: "shared_name"
-        value {
-          s: "dense_1/kernel"
-        }
-      }
-    }
-    node {
-      name: "dense_1/kernel/Read/ReadVariableOp"
-      op: "ReadVariableOp"
-      input: "dense_1/kernel"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-              dim {
-                size: 100
-              }
-              dim {
-                size: 1
-              }
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-    }
-    node {
-      name: "dense_1/bias"
-      op: "VarHandleOp"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "shape"
-        value {
-          shape {
-            dim {
-              size: 1
-            }
-          }
-        }
-      }
-      attr {
-        key: "shared_name"
-        value {
-          s: "dense_1/bias"
-        }
-      }
-    }
-    node {
-      name: "dense_1/bias/Read/ReadVariableOp"
-      op: "ReadVariableOp"
-      input: "dense_1/bias"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-              dim {
-                size: 1
-              }
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-    }
-    node {
-      name: "total"
-      op: "VarHandleOp"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "shape"
-        value {
-          shape {
-          }
-        }
-      }
-      attr {
-        key: "shared_name"
-        value {
-          s: "total"
-        }
-      }
-    }
-    node {
-      name: "total/Read/ReadVariableOp"
-      op: "ReadVariableOp"
-      input: "total"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-    }
-    node {
-      name: "count"
-      op: "VarHandleOp"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "shape"
-        value {
-          shape {
-          }
-        }
-      }
-      attr {
-        key: "shared_name"
-        value {
-          s: "count"
-        }
-      }
-    }
-    node {
-      name: "count/Read/ReadVariableOp"
-      op: "ReadVariableOp"
-      input: "count"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-    }
-    node {
-      name: "total_1"
-      op: "VarHandleOp"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "shape"
-        value {
-          shape {
-          }
-        }
-      }
-      attr {
-        key: "shared_name"
-        value {
-          s: "total_1"
-        }
-      }
-    }
-    node {
-      name: "total_1/Read/ReadVariableOp"
-      op: "ReadVariableOp"
-      input: "total_1"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-    }
-    node {
-      name: "count_1"
-      op: "VarHandleOp"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-      attr {
-        key: "shape"
-        value {
-          shape {
-          }
-        }
-      }
-      attr {
-        key: "shared_name"
-        value {
-          s: "count_1"
-        }
-      }
-    }
-    node {
-      name: "count_1/Read/ReadVariableOp"
-      op: "ReadVariableOp"
-      input: "count_1"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_FLOAT
-        }
-      }
-    }
-    node {
-      name: "NoOp"
-      op: "NoOp"
-    }
-    node {
-      name: "Const"
-      op: "Const"
-      device: "/device:CPU:0"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_STRING
-        }
-      }
-      attr {
-        key: "value"
-        value {
-          tensor {
-            dtype: DT_STRING
-            tensor_shape {
-            }
-            string_val: "\n\277\001\n\030\010\001\022\024layer_with_weights-0\n\013\010\001\022\007layer-0\n\030\010\002\022\024layer_with_weights-1\n\013\010\002\022\007layer-1\n\r\010\003\022\toptimizer\n\031\010\004\022\025regularization_losses\n\r\010\005\022\tvariables\n\027\010\006\022\023trainable_variables\n\r\010\007\022\tkeras_api\n\016\010\010\022\nsignatures\nh\n\n\010\t\022\006kernel\n\010\010\n\022\004bias\n\031\010\013\022\025regularization_losses\n\r\010\014\022\tvariables\n\027\010\r\022\023trainable_variables\n\r\010\016\022\tkeras_api\nh\n\n\010\017\022\006kernel\n\010\010\020\022\004bias\n\031\010\021\022\025regularization_losses\n\r\010\022\022\tvariables\n\027\010\023\022\023trainable_variables\n\r\010\024\022\tkeras_api\n\000\n\000\n\034\n\005\010\t\022\0010\n\005\010\n\022\0011\n\005\010\017\022\0012\n\005\010\020\022\0013\n\034\n\005\010\t\022\0010\n\005\010\n\022\0011\n\005\010\017\022\0012\n\005\010\020\022\0013\n\255\001\n\n\010\025\022\006layers\n\037\010\026\022\033layer_regularization_losses\n\033\010\027\022\027non_trainable_variables\n\021\010\030\022\rlayer_metrics\n\031\010\004\022\025regularization_losses\n\013\010\031\022\007metrics\n\r\010\005\022\tvariables\n\027\010\006\022\023trainable_variables\n\000\nX\022V\n\016VARIABLE_VALUE\022\014dense/kernel\0326layer_with_weights-0/kernel/.ATTRIBUTES/VARIABLE_VALUE\nT\022R\n\016VARIABLE_VALUE\022\ndense/bias\0324layer_with_weights-0/bias/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\016\n\005\010\t\022\0010\n\005\010\n\022\0011\n\016\n\005\010\t\022\0010\n\005\010\n\022\0011\n\255\001\n\n\010\032\022\006layers\n\037\010\033\022\033layer_regularization_losses\n\033\010\034\022\027non_trainable_variables\n\021\010\035\022\rlayer_metrics\n\031\010\013\022\025regularization_losses\n\013\010\036\022\007metrics\n\r\010\014\022\tvariables\n\027\010\r\022\023trainable_variables\nZ\022X\n\016VARIABLE_VALUE\022\016dense_1/kernel\0326layer_with_weights-1/kernel/.ATTRIBUTES/VARIABLE_VALUE\nV\022T\n\016VARIABLE_VALUE\022\014dense_1/bias\0324layer_with_weights-1/bias/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\016\n\005\010\017\022\0010\n\005\010\020\022\0011\n\016\n\005\010\017\022\0010\n\005\010\020\022\0011\n\255\001\n\n\010\037\022\006layers\n\037\010 \022\033layer_regularization_losses\n\033\010!\022\027non_trainable_variables\n\021\010\"\022\rlayer_metrics\n\031\010\021\022\025regularization_losses\n\013\010#\022\007metrics\n\r\010\022\022\tvariables\n\027\010\023\022\023trainable_variables\n\016\n\005\010\001\022\0010\n\005\010\002\022\0011\n\000\n\000\n\000\n\016\n\005\010$\022\0010\n\005\010%\022\0011\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n4\n\t\010&\022\005total\n\t\010\'\022\005count\n\r\010(\022\tvariables\n\r\010)\022\tkeras_api\nD\n\t\010*\022\005total\n\t\010+\022\005count\n\016\010,\022\n_fn_kwargs\n\r\010-\022\tvariables\n\r\010.\022\tkeras_api\nO\022M\n\016VARIABLE_VALUE\022\005total\0324keras_api/metrics/0/total/.ATTRIBUTES/VARIABLE_VALUE\nO\022M\n\016VARIABLE_VALUE\022\005count\0324keras_api/metrics/0/count/.ATTRIBUTES/VARIABLE_VALUE\n\016\n\005\010&\022\0010\n\005\010\'\022\0011\n\017\n\r\010(\022\tvariables\nQ\022O\n\016VARIABLE_VALUE\022\007total_1\0324keras_api/metrics/1/total/.ATTRIBUTES/VARIABLE_VALUE\nQ\022O\n\016VARIABLE_VALUE\022\007count_1\0324keras_api/metrics/1/count/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\016\n\005\010*\022\0010\n\005\010+\022\0011\n\017\n\r\010-\022\tvariables"
-          }
-        }
-      }
-    }
-    node {
-      name: "serving_default_input_1"
-      op: "Placeholder"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-              dim {
-                size: -1
-              }
-              dim {
-                size: 214
-              }
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_INT32
-        }
-      }
-      attr {
-        key: "shape"
-        value {
-          shape {
-            dim {
-              size: -1
-            }
-            dim {
-              size: 214
-            }
-          }
-        }
-      }
-    }
-    node {
-      name: "StatefulPartitionedCall"
-      op: "StatefulPartitionedCall"
-      input: "serving_default_input_1"
-      input: "dense/kernel"
-      input: "dense/bias"
-      input: "dense_1/kernel"
-      input: "dense_1/bias"
-      attr {
-        key: "Tin"
-        value {
-          list {
-            type: DT_INT32
-            type: DT_RESOURCE
-            type: DT_RESOURCE
-            type: DT_RESOURCE
-            type: DT_RESOURCE
-          }
-        }
-      }
-      attr {
-        key: "Tout"
-        value {
-          list {
-            type: DT_FLOAT
-          }
-        }
-      }
-      attr {
-        key: "_collective_manager_ids"
-        value {
-          list {
-          }
-        }
-      }
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-              dim {
-                size: -1
-              }
-              dim {
-                size: 1
-              }
-            }
-          }
-        }
-      }
-      attr {
-        key: "_read_only_resource_inputs"
-        value {
-          list {
-            i: 1
-            i: 2
-            i: 3
-            i: 4
-          }
-        }
-      }
-      attr {
-        key: "config_proto"
-        value {
-          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-        }
-      }
-      attr {
-        key: "f"
-        value {
-          func {
-            name: "__inference_signature_wrapper_6671"
-          }
-        }
-      }
-    }
-    node {
-      name: "saver_filename"
-      op: "Placeholder"
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "dtype"
-        value {
-          type: DT_STRING
-        }
-      }
-      attr {
-        key: "shape"
-        value {
-          shape {
-          }
-        }
-      }
-    }
-    node {
-      name: "StatefulPartitionedCall_1"
-      op: "StatefulPartitionedCall"
-      input: "saver_filename"
-      input: "dense/kernel/Read/ReadVariableOp"
-      input: "dense/bias/Read/ReadVariableOp"
-      input: "dense_1/kernel/Read/ReadVariableOp"
-      input: "dense_1/bias/Read/ReadVariableOp"
-      input: "total/Read/ReadVariableOp"
-      input: "count/Read/ReadVariableOp"
-      input: "total_1/Read/ReadVariableOp"
-      input: "count_1/Read/ReadVariableOp"
-      input: "Const"
-      attr {
-        key: "Tin"
-        value {
-          list {
-            type: DT_STRING
-            type: DT_FLOAT
-            type: DT_FLOAT
-            type: DT_FLOAT
-            type: DT_FLOAT
-            type: DT_FLOAT
-            type: DT_FLOAT
-            type: DT_FLOAT
-            type: DT_FLOAT
-            type: DT_STRING
-          }
-        }
-      }
-      attr {
-        key: "Tout"
-        value {
-          list {
-            type: DT_STRING
-          }
-        }
-      }
-      attr {
-        key: "_collective_manager_ids"
-        value {
-          list {
-          }
-        }
-      }
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "_read_only_resource_inputs"
-        value {
-          list {
-          }
-        }
-      }
-      attr {
-        key: "config_proto"
-        value {
-          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-        }
-      }
-      attr {
-        key: "f"
-        value {
-          func {
-            name: "__inference__traced_save_6824"
-          }
-        }
-      }
-    }
-    node {
-      name: "StatefulPartitionedCall_2"
-      op: "StatefulPartitionedCall"
-      input: "saver_filename"
-      input: "dense/kernel"
-      input: "dense/bias"
-      input: "dense_1/kernel"
-      input: "dense_1/bias"
-      input: "total"
-      input: "count"
-      input: "total_1"
-      input: "count_1"
-      attr {
-        key: "Tin"
-        value {
-          list {
-            type: DT_STRING
-            type: DT_RESOURCE
-            type: DT_RESOURCE
-            type: DT_RESOURCE
-            type: DT_RESOURCE
-            type: DT_RESOURCE
-            type: DT_RESOURCE
-            type: DT_RESOURCE
-            type: DT_RESOURCE
-          }
-        }
-      }
-      attr {
-        key: "Tout"
-        value {
-          list {
-            type: DT_STRING
-          }
-        }
-      }
-      attr {
-        key: "_collective_manager_ids"
-        value {
-          list {
-          }
-        }
-      }
-      attr {
-        key: "_output_shapes"
-        value {
-          list {
-            shape {
-            }
-          }
-        }
-      }
-      attr {
-        key: "_read_only_resource_inputs"
-        value {
-          list {
-          }
-        }
-      }
-      attr {
-        key: "config_proto"
-        value {
-          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-        }
-      }
-      attr {
-        key: "f"
-        value {
-          func {
-            name: "__inference__traced_restore_6860"
-          }
-        }
-      }
-    }
-    library {
-      function {
-        signature {
-          name: "__inference__traced_restore_6860"
-          input_arg {
-            name: "file_prefix"
-            type: DT_STRING
-          }
-          input_arg {
-            name: "assignvariableop_dense_kernel"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "assignvariableop_1_dense_bias"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "assignvariableop_2_dense_1_kernel"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "assignvariableop_3_dense_1_bias"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "assignvariableop_4_total"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "assignvariableop_5_count"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "assignvariableop_6_total_1"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "assignvariableop_7_count_1"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity_9"
-            type: DT_STRING
-          }
-          is_stateful: true
-          control_output: "AssignVariableOp"
-          control_output: "AssignVariableOp_1"
-          control_output: "AssignVariableOp_2"
-          control_output: "AssignVariableOp_3"
-          control_output: "AssignVariableOp_4"
-          control_output: "AssignVariableOp_5"
-          control_output: "AssignVariableOp_6"
-          control_output: "AssignVariableOp_7"
-          control_output: "RestoreV2"
-          control_output: "RestoreV2_1"
-        }
-        node_def {
-          name: "RestoreV2/tensor_names"
-          op: "Const"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 8
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_STRING
-                tensor_shape {
-                  dim {
-                    size: 8
-                  }
-                }
-                string_val: "layer_with_weights-0/kernel/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "layer_with_weights-0/bias/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "layer_with_weights-1/kernel/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "layer_with_weights-1/bias/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "keras_api/metrics/0/total/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "keras_api/metrics/0/count/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "keras_api/metrics/1/total/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "keras_api/metrics/1/count/.ATTRIBUTES/VARIABLE_VALUE"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "RestoreV2/tensor_names"
-          }
-        }
-        node_def {
-          name: "RestoreV2/shape_and_slices"
-          op: "Const"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 8
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_STRING
-                tensor_shape {
-                  dim {
-                    size: 8
-                  }
-                }
-                string_val: ""
-                string_val: ""
-                string_val: ""
-                string_val: ""
-                string_val: ""
-                string_val: ""
-                string_val: ""
-                string_val: ""
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "RestoreV2/shape_and_slices"
-          }
-        }
-        node_def {
-          name: "RestoreV2"
-          op: "RestoreV2"
-          input: "file_prefix"
-          input: "RestoreV2/tensor_names:output:0"
-          input: "RestoreV2/shape_and_slices:output:0"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  unknown_rank: true
-                }
-                shape {
-                  unknown_rank: true
-                }
-                shape {
-                  unknown_rank: true
-                }
-                shape {
-                  unknown_rank: true
-                }
-                shape {
-                  unknown_rank: true
-                }
-                shape {
-                  unknown_rank: true
-                }
-                shape {
-                  unknown_rank: true
-                }
-                shape {
-                  unknown_rank: true
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtypes"
-            value {
-              list {
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "RestoreV2"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "RestoreV2:tensors:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  unknown_rank: true
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        node_def {
-          name: "AssignVariableOp"
-          op: "AssignVariableOp"
-          input: "assignvariableop_dense_kernel"
-          input: "Identity:output:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "AssignVariableOp"
-          }
-        }
-        node_def {
-          name: "Identity_1"
-          op: "Identity"
-          input: "RestoreV2:tensors:1"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  unknown_rank: true
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity_1"
-          }
-        }
-        node_def {
-          name: "AssignVariableOp_1"
-          op: "AssignVariableOp"
-          input: "assignvariableop_1_dense_bias"
-          input: "Identity_1:output:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "AssignVariableOp_1"
-          }
-        }
-        node_def {
-          name: "Identity_2"
-          op: "Identity"
-          input: "RestoreV2:tensors:2"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  unknown_rank: true
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity_2"
-          }
-        }
-        node_def {
-          name: "AssignVariableOp_2"
-          op: "AssignVariableOp"
-          input: "assignvariableop_2_dense_1_kernel"
-          input: "Identity_2:output:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "AssignVariableOp_2"
-          }
-        }
-        node_def {
-          name: "Identity_3"
-          op: "Identity"
-          input: "RestoreV2:tensors:3"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  unknown_rank: true
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity_3"
-          }
-        }
-        node_def {
-          name: "AssignVariableOp_3"
-          op: "AssignVariableOp"
-          input: "assignvariableop_3_dense_1_bias"
-          input: "Identity_3:output:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "AssignVariableOp_3"
-          }
-        }
-        node_def {
-          name: "Identity_4"
-          op: "Identity"
-          input: "RestoreV2:tensors:4"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  unknown_rank: true
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity_4"
-          }
-        }
-        node_def {
-          name: "AssignVariableOp_4"
-          op: "AssignVariableOp"
-          input: "assignvariableop_4_total"
-          input: "Identity_4:output:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "AssignVariableOp_4"
-          }
-        }
-        node_def {
-          name: "Identity_5"
-          op: "Identity"
-          input: "RestoreV2:tensors:5"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  unknown_rank: true
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity_5"
-          }
-        }
-        node_def {
-          name: "AssignVariableOp_5"
-          op: "AssignVariableOp"
-          input: "assignvariableop_5_count"
-          input: "Identity_5:output:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "AssignVariableOp_5"
-          }
-        }
-        node_def {
-          name: "Identity_6"
-          op: "Identity"
-          input: "RestoreV2:tensors:6"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  unknown_rank: true
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity_6"
-          }
-        }
-        node_def {
-          name: "AssignVariableOp_6"
-          op: "AssignVariableOp"
-          input: "assignvariableop_6_total_1"
-          input: "Identity_6:output:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "AssignVariableOp_6"
-          }
-        }
-        node_def {
-          name: "Identity_7"
-          op: "Identity"
-          input: "RestoreV2:tensors:7"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  unknown_rank: true
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity_7"
-          }
-        }
-        node_def {
-          name: "AssignVariableOp_7"
-          op: "AssignVariableOp"
-          input: "assignvariableop_7_count_1"
-          input: "Identity_7:output:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "AssignVariableOp_7"
-          }
-        }
-        node_def {
-          name: "RestoreV2_1/tensor_names"
-          op: "Const"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_STRING
-                tensor_shape {
-                  dim {
-                    size: 1
-                  }
-                }
-                string_val: "_CHECKPOINTABLE_OBJECT_GRAPH"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "RestoreV2_1/tensor_names"
-          }
-        }
-        node_def {
-          name: "RestoreV2_1/shape_and_slices"
-          op: "Const"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_STRING
-                tensor_shape {
-                  dim {
-                    size: 1
-                  }
-                }
-                string_val: ""
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "RestoreV2_1/shape_and_slices"
-          }
-        }
-        node_def {
-          name: "RestoreV2_1"
-          op: "RestoreV2"
-          input: "file_prefix"
-          input: "RestoreV2_1/tensor_names:output:0"
-          input: "RestoreV2_1/shape_and_slices:output:0"
-          input: "^RestoreV2"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  unknown_rank: true
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtypes"
-            value {
-              list {
-                type: DT_STRING
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "RestoreV2_1"
-          }
-        }
-        node_def {
-          name: "NoOp"
-          op: "NoOp"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "NoOp"
-          }
-        }
-        node_def {
-          name: "Identity_8"
-          op: "Identity"
-          input: "file_prefix"
-          input: "^AssignVariableOp"
-          input: "^AssignVariableOp_1"
-          input: "^AssignVariableOp_2"
-          input: "^AssignVariableOp_3"
-          input: "^AssignVariableOp_4"
-          input: "^AssignVariableOp_5"
-          input: "^AssignVariableOp_6"
-          input: "^AssignVariableOp_7"
-          input: "^NoOp"
-          device: "/device:CPU:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity_8"
-          }
-        }
-        node_def {
-          name: "Identity_9"
-          op: "Identity"
-          input: "Identity_8:output:0"
-          input: "^AssignVariableOp"
-          input: "^AssignVariableOp_1"
-          input: "^AssignVariableOp_2"
-          input: "^AssignVariableOp_3"
-          input: "^AssignVariableOp_4"
-          input: "^AssignVariableOp_5"
-          input: "^AssignVariableOp_6"
-          input: "^AssignVariableOp_7"
-          input: "^RestoreV2"
-          input: "^RestoreV2_1"
-          attr {
-            key: "T"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity_9"
-          }
-        }
-        ret {
-          key: "identity_9"
-          value: "Identity_9:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "AssignVariableOp"
-          value: "AssignVariableOp"
-        }
-        control_ret {
-          key: "AssignVariableOp_1"
-          value: "AssignVariableOp_1"
-        }
-        control_ret {
-          key: "AssignVariableOp_2"
-          value: "AssignVariableOp_2"
-        }
-        control_ret {
-          key: "AssignVariableOp_3"
-          value: "AssignVariableOp_3"
-        }
-        control_ret {
-          key: "AssignVariableOp_4"
-          value: "AssignVariableOp_4"
-        }
-        control_ret {
-          key: "AssignVariableOp_5"
-          value: "AssignVariableOp_5"
-        }
-        control_ret {
-          key: "AssignVariableOp_6"
-          value: "AssignVariableOp_6"
-        }
-        control_ret {
-          key: "AssignVariableOp_7"
-          value: "AssignVariableOp_7"
-        }
-        control_ret {
-          key: "RestoreV2"
-          value: "RestoreV2"
-        }
-        control_ret {
-          key: "RestoreV2_1"
-          value: "RestoreV2_1"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "file_prefix"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 5
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 6
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 7
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 8
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_sequential_layer_call_fn_6629"
-          input_arg {
-            name: "input_1"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "unknown"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_0"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_1"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_2"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "StatefulPartitionedCall"
-        }
-        node_def {
-          name: "StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "input_1"
-          input: "unknown"
-          input: "unknown_0"
-          input: "unknown_1"
-          input: "unknown_2"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_INT32
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-                i: 3
-                i: 4
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_sequential_layer_call_and_return_conditional_losses_6618"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "StatefulPartitionedCall:output:0"
-          input: "^StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "StatefulPartitionedCall"
-          value: "StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "input_1"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_sequential_layer_call_and_return_conditional_losses_6587"
-          input_arg {
-            name: "input_1"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "dense_6555"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_6557"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_6581"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_6583"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "dense/StatefulPartitionedCall"
-          control_output: "dense_1/StatefulPartitionedCall"
-        }
-        node_def {
-          name: "dense/StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "input_1"
-          input: "dense_6555"
-          input: "dense_6557"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_INT32
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "dense_1/StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "dense/StatefulPartitionedCall:output:0"
-          input: "dense_1_6581"
-          input: "dense_1_6583"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_FLOAT
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "dense_1/StatefulPartitionedCall:output:0"
-          input: "^dense/StatefulPartitionedCall"
-          input: "^dense_1/StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "dense/StatefulPartitionedCall"
-          value: "dense/StatefulPartitionedCall"
-        }
-        control_ret {
-          key: "dense_1/StatefulPartitionedCall"
-          value: "dense_1/StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "input_1"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_sequential_layer_call_and_return_conditional_losses_6618"
-          input_arg {
-            name: "inputs"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "dense_6607"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_6609"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_6612"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_6614"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "dense/StatefulPartitionedCall"
-          control_output: "dense_1/StatefulPartitionedCall"
-        }
-        node_def {
-          name: "dense/StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "inputs"
-          input: "dense_6607"
-          input: "dense_6609"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_INT32
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "dense_1/StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "dense/StatefulPartitionedCall:output:0"
-          input: "dense_1_6612"
-          input: "dense_1_6614"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_FLOAT
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "dense_1/StatefulPartitionedCall:output:0"
-          input: "^dense/StatefulPartitionedCall"
-          input: "^dense_1/StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "dense/StatefulPartitionedCall"
-          value: "dense/StatefulPartitionedCall"
-        }
-        control_ret {
-          key: "dense_1/StatefulPartitionedCall"
-          value: "dense_1/StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_sequential_layer_call_fn_6656"
-          input_arg {
-            name: "input_1"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "unknown"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_0"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_1"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_2"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "StatefulPartitionedCall"
-        }
-        node_def {
-          name: "StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "input_1"
-          input: "unknown"
-          input: "unknown_0"
-          input: "unknown_1"
-          input: "unknown_2"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_INT32
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-                i: 3
-                i: 4
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_sequential_layer_call_and_return_conditional_losses_6645"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "StatefulPartitionedCall:output:0"
-          input: "^StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "StatefulPartitionedCall"
-          value: "StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "input_1"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_dense_1_layer_call_and_return_conditional_losses_6764"
-          input_arg {
-            name: "inputs"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "matmul_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "biasadd_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-        }
-        node_def {
-          name: "MatMul/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "matmul_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 100
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "MatMul/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "MatMul"
-          op: "MatMul"
-          input: "inputs"
-          input: "MatMul/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "MatMul"
-          }
-        }
-        node_def {
-          name: "BiasAdd/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "biasadd_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "BiasAdd/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "BiasAdd"
-          op: "BiasAdd"
-          input: "MatMul:product:0"
-          input: "BiasAdd/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "BiasAdd"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "BiasAdd:output:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 100
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 100
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_dense_layer_call_fn_6754"
-          input_arg {
-            name: "inputs"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "unknown"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_0"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "StatefulPartitionedCall"
-        }
-        node_def {
-          name: "StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "inputs"
-          input: "unknown"
-          input: "unknown_0"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_INT32
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "StatefulPartitionedCall:output:0"
-          input: "^StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "StatefulPartitionedCall"
-          value: "StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference__traced_save_6824"
-          input_arg {
-            name: "file_prefix"
-            type: DT_STRING
-          }
-          input_arg {
-            name: "savev2_dense_kernel_read_readvariableop"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "savev2_dense_bias_read_readvariableop"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "savev2_dense_1_kernel_read_readvariableop"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "savev2_dense_1_bias_read_readvariableop"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "savev2_total_read_readvariableop"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "savev2_count_read_readvariableop"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "savev2_total_1_read_readvariableop"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "savev2_count_1_read_readvariableop"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "savev2_1_const"
-            type: DT_STRING
-          }
-          output_arg {
-            name: "identity_1"
-            type: DT_STRING
-          }
-          is_stateful: true
-          control_output: "MergeV2Checkpoints"
-          control_output: "SaveV2"
-          control_output: "SaveV2_1"
-        }
-        node_def {
-          name: "StaticRegexFullMatch"
-          op: "StaticRegexFullMatch"
-          input: "file_prefix"
-          device: "/device:CPU:*"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          attr {
-            key: "pattern"
-            value {
-              s: "^s3://.*"
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "StaticRegexFullMatch"
-          }
-        }
-        node_def {
-          name: "Const"
-          op: "Const"
-          device: "/device:CPU:*"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_STRING
-                tensor_shape {
-                }
-                string_val: ".part"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Const"
-          }
-        }
-        node_def {
-          name: "Const_1"
-          op: "Const"
-          device: "/device:CPU:*"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_STRING
-                tensor_shape {
-                }
-                string_val: "_temp_6f1e5fef49bb4c06ace07a8a95dfbb1b/part"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Const_1"
-          }
-        }
-        node_def {
-          name: "Select"
-          op: "Select"
-          input: "StaticRegexFullMatch:output:0"
-          input: "Const:output:0"
-          input: "Const_1:output:0"
-          device: "/device:CPU:*"
-          attr {
-            key: "T"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Select"
-          }
-        }
-        node_def {
-          name: "StringJoin"
-          op: "StringJoin"
-          input: "file_prefix"
-          input: "Select:output:0"
-          device: "/device:CPU:*"
-          attr {
-            key: "N"
-            value {
-              i: 2
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "StringJoin"
-          }
-        }
-        node_def {
-          name: "num_shards"
-          op: "Const"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_INT32
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_INT32
-                tensor_shape {
-                }
-                int_val: 2
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "num_shards"
-          }
-        }
-        node_def {
-          name: "ShardedFilename/shard"
-          op: "Const"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_INT32
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_INT32
-                tensor_shape {
-                }
-                int_val: 0
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "ShardedFilename/shard"
-          }
-        }
-        node_def {
-          name: "ShardedFilename"
-          op: "ShardedFilename"
-          input: "StringJoin:output:0"
-          input: "ShardedFilename/shard:output:0"
-          input: "num_shards:output:0"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "ShardedFilename"
-          }
-        }
-        node_def {
-          name: "SaveV2/tensor_names"
-          op: "Const"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 8
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_STRING
-                tensor_shape {
-                  dim {
-                    size: 8
-                  }
-                }
-                string_val: "layer_with_weights-0/kernel/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "layer_with_weights-0/bias/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "layer_with_weights-1/kernel/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "layer_with_weights-1/bias/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "keras_api/metrics/0/total/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "keras_api/metrics/0/count/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "keras_api/metrics/1/total/.ATTRIBUTES/VARIABLE_VALUE"
-                string_val: "keras_api/metrics/1/count/.ATTRIBUTES/VARIABLE_VALUE"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "SaveV2/tensor_names"
-          }
-        }
-        node_def {
-          name: "SaveV2/shape_and_slices"
-          op: "Const"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 8
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_STRING
-                tensor_shape {
-                  dim {
-                    size: 8
-                  }
-                }
-                string_val: ""
-                string_val: ""
-                string_val: ""
-                string_val: ""
-                string_val: ""
-                string_val: ""
-                string_val: ""
-                string_val: ""
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "SaveV2/shape_and_slices"
-          }
-        }
-        node_def {
-          name: "SaveV2"
-          op: "SaveV2"
-          input: "ShardedFilename:filename:0"
-          input: "SaveV2/tensor_names:output:0"
-          input: "SaveV2/shape_and_slices:output:0"
-          input: "savev2_dense_kernel_read_readvariableop"
-          input: "savev2_dense_bias_read_readvariableop"
-          input: "savev2_dense_1_kernel_read_readvariableop"
-          input: "savev2_dense_1_bias_read_readvariableop"
-          input: "savev2_total_read_readvariableop"
-          input: "savev2_count_read_readvariableop"
-          input: "savev2_total_1_read_readvariableop"
-          input: "savev2_count_1_read_readvariableop"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "dtypes"
-            value {
-              list {
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-                type: DT_FLOAT
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "SaveV2"
-          }
-        }
-        node_def {
-          name: "ShardedFilename_1/shard"
-          op: "Const"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_INT32
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_INT32
-                tensor_shape {
-                }
-                int_val: 1
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "ShardedFilename_1/shard"
-          }
-        }
-        node_def {
-          name: "ShardedFilename_1"
-          op: "ShardedFilename"
-          input: "StringJoin:output:0"
-          input: "ShardedFilename_1/shard:output:0"
-          input: "num_shards:output:0"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "ShardedFilename_1"
-          }
-        }
-        node_def {
-          name: "SaveV2_1/tensor_names"
-          op: "Const"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_STRING
-                tensor_shape {
-                  dim {
-                    size: 1
-                  }
-                }
-                string_val: "_CHECKPOINTABLE_OBJECT_GRAPH"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "SaveV2_1/tensor_names"
-          }
-        }
-        node_def {
-          name: "SaveV2_1/shape_and_slices"
-          op: "Const"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "value"
-            value {
-              tensor {
-                dtype: DT_STRING
-                tensor_shape {
-                  dim {
-                    size: 1
-                  }
-                }
-                string_val: ""
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "SaveV2_1/shape_and_slices"
-          }
-        }
-        node_def {
-          name: "SaveV2_1"
-          op: "SaveV2"
-          input: "ShardedFilename_1:filename:0"
-          input: "SaveV2_1/tensor_names:output:0"
-          input: "SaveV2_1/shape_and_slices:output:0"
-          input: "savev2_1_const"
-          input: "^SaveV2"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "dtypes"
-            value {
-              list {
-                type: DT_STRING
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "SaveV2_1"
-          }
-        }
-        node_def {
-          name: "MergeV2Checkpoints/checkpoint_prefixes"
-          op: "Pack"
-          input: "ShardedFilename:filename:0"
-          input: "ShardedFilename_1:filename:0"
-          input: "^SaveV2"
-          input: "^SaveV2_1"
-          device: "/device:CPU:0"
-          attr {
-            key: "N"
-            value {
-              i: 2
-            }
-          }
-          attr {
-            key: "T"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 2
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "MergeV2Checkpoints/checkpoint_prefixes"
-          }
-        }
-        node_def {
-          name: "MergeV2Checkpoints"
-          op: "MergeV2Checkpoints"
-          input: "MergeV2Checkpoints/checkpoint_prefixes:output:0"
-          input: "file_prefix"
-          input: "^SaveV2_1"
-          device: "/device:CPU:0"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "MergeV2Checkpoints"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "file_prefix"
-          input: "^MergeV2Checkpoints"
-          device: "/device:CPU:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        node_def {
-          name: "Identity_1"
-          op: "Identity"
-          input: "Identity:output:0"
-          input: "^MergeV2Checkpoints"
-          input: "^SaveV2"
-          input: "^SaveV2_1"
-          attr {
-            key: "T"
-            value {
-              type: DT_STRING
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity_1"
-          }
-        }
-        ret {
-          key: "identity_1"
-          value: "Identity_1:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-              }
-              shape {
-                dim {
-                  size: 214
-                }
-                dim {
-                  size: 100
-                }
-              }
-              shape {
-                dim {
-                  size: 100
-                }
-              }
-              shape {
-                dim {
-                  size: 100
-                }
-                dim {
-                  size: 1
-                }
-              }
-              shape {
-                dim {
-                  size: 1
-                }
-              }
-              shape {
-              }
-              shape {
-              }
-              shape {
-              }
-              shape {
-              }
-              shape {
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "MergeV2Checkpoints"
-          value: "MergeV2Checkpoints"
-        }
-        control_ret {
-          key: "SaveV2"
-          value: "SaveV2"
-        }
-        control_ret {
-          key: "SaveV2_1"
-          value: "SaveV2_1"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "file_prefix"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: 214
-                    }
-                    dim {
-                      size: 100
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: 100
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: 100
-                    }
-                    dim {
-                      size: 1
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: 1
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 5
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 6
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 7
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 8
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 9
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_sequential_layer_call_and_return_conditional_losses_6689"
-          input_arg {
-            name: "inputs"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "dense_matmul_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_biasadd_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_matmul_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_biasadd_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-        }
-        node_def {
-          name: "dense/Cast"
-          op: "Cast"
-          input: "inputs"
-          attr {
-            key: "DstT"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "SrcT"
-            value {
-              type: DT_INT32
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 214
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/Cast"
-          }
-        }
-        node_def {
-          name: "dense/MatMul/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "dense_matmul_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 214
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/MatMul/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "dense/MatMul"
-          op: "MatMul"
-          input: "dense/Cast:y:0"
-          input: "dense/MatMul/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/MatMul"
-          }
-        }
-        node_def {
-          name: "dense/BiasAdd/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "dense_biasadd_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/BiasAdd/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "dense/BiasAdd"
-          op: "BiasAdd"
-          input: "dense/MatMul:product:0"
-          input: "dense/BiasAdd/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/BiasAdd"
-          }
-        }
-        node_def {
-          name: "dense/Relu"
-          op: "Relu"
-          input: "dense/BiasAdd:output:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/Relu"
-          }
-        }
-        node_def {
-          name: "dense_1/MatMul/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "dense_1_matmul_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 100
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/MatMul/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "dense_1/MatMul"
-          op: "MatMul"
-          input: "dense/Relu:activations:0"
-          input: "dense_1/MatMul/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/MatMul"
-          }
-        }
-        node_def {
-          name: "dense_1/BiasAdd/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "dense_1_biasadd_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/BiasAdd/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "dense_1/BiasAdd"
-          op: "BiasAdd"
-          input: "dense_1/MatMul:product:0"
-          input: "dense_1/BiasAdd/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/BiasAdd"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "dense_1/BiasAdd:output:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_dense_layer_call_and_return_conditional_losses_6745"
-          input_arg {
-            name: "inputs"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "matmul_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "biasadd_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-        }
-        node_def {
-          name: "Cast"
-          op: "Cast"
-          input: "inputs"
-          attr {
-            key: "DstT"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "SrcT"
-            value {
-              type: DT_INT32
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 214
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Cast"
-          }
-        }
-        node_def {
-          name: "MatMul/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "matmul_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 214
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "MatMul/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "MatMul"
-          op: "MatMul"
-          input: "Cast:y:0"
-          input: "MatMul/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "MatMul"
-          }
-        }
-        node_def {
-          name: "BiasAdd/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "biasadd_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "BiasAdd/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "BiasAdd"
-          op: "BiasAdd"
-          input: "MatMul:product:0"
-          input: "BiasAdd/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "BiasAdd"
-          }
-        }
-        node_def {
-          name: "Relu"
-          op: "Relu"
-          input: "BiasAdd:output:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Relu"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "Relu:activations:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_dense_1_layer_call_fn_6773"
-          input_arg {
-            name: "inputs"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "unknown"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_0"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "StatefulPartitionedCall"
-        }
-        node_def {
-          name: "StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "inputs"
-          input: "unknown"
-          input: "unknown_0"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_FLOAT
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "StatefulPartitionedCall:output:0"
-          input: "^StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 100
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "StatefulPartitionedCall"
-          value: "StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 100
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference__wrapped_model_6528"
-          input_arg {
-            name: "input_1"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "sequential_dense_matmul_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "sequential_dense_biasadd_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "sequential_dense_1_matmul_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "sequential_dense_1_biasadd_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-        }
-        node_def {
-          name: "sequential/dense/Cast"
-          op: "Cast"
-          input: "input_1"
-          attr {
-            key: "DstT"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "SrcT"
-            value {
-              type: DT_INT32
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 214
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "sequential/dense/Cast"
-          }
-        }
-        node_def {
-          name: "sequential/dense/MatMul/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "sequential_dense_matmul_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 214
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "sequential/dense/MatMul/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "sequential/dense/MatMul"
-          op: "MatMul"
-          input: "sequential/dense/Cast:y:0"
-          input: "sequential/dense/MatMul/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "sequential/dense/MatMul"
-          }
-        }
-        node_def {
-          name: "sequential/dense/BiasAdd/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "sequential_dense_biasadd_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "sequential/dense/BiasAdd/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "sequential/dense/BiasAdd"
-          op: "BiasAdd"
-          input: "sequential/dense/MatMul:product:0"
-          input: "sequential/dense/BiasAdd/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "sequential/dense/BiasAdd"
-          }
-        }
-        node_def {
-          name: "sequential/dense/Relu"
-          op: "Relu"
-          input: "sequential/dense/BiasAdd:output:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "sequential/dense/Relu"
-          }
-        }
-        node_def {
-          name: "sequential/dense_1/MatMul/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "sequential_dense_1_matmul_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 100
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "sequential/dense_1/MatMul/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "sequential/dense_1/MatMul"
-          op: "MatMul"
-          input: "sequential/dense/Relu:activations:0"
-          input: "sequential/dense_1/MatMul/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "sequential/dense_1/MatMul"
-          }
-        }
-        node_def {
-          name: "sequential/dense_1/BiasAdd/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "sequential_dense_1_biasadd_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "sequential/dense_1/BiasAdd/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "sequential/dense_1/BiasAdd"
-          op: "BiasAdd"
-          input: "sequential/dense_1/MatMul:product:0"
-          input: "sequential/dense_1/BiasAdd/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "sequential/dense_1/BiasAdd"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "sequential/dense_1/BiasAdd:output:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "input_1"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
-          input_arg {
-            name: "inputs"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "matmul_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "biasadd_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-        }
-        node_def {
-          name: "Cast"
-          op: "Cast"
-          input: "inputs"
-          attr {
-            key: "DstT"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "SrcT"
-            value {
-              type: DT_INT32
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 214
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Cast"
-          }
-        }
-        node_def {
-          name: "MatMul/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "matmul_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 214
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "MatMul/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "MatMul"
-          op: "MatMul"
-          input: "Cast:y:0"
-          input: "MatMul/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "MatMul"
-          }
-        }
-        node_def {
-          name: "BiasAdd/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "biasadd_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "BiasAdd/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "BiasAdd"
-          op: "BiasAdd"
-          input: "MatMul:product:0"
-          input: "BiasAdd/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "BiasAdd"
-          }
-        }
-        node_def {
-          name: "Relu"
-          op: "Relu"
-          input: "BiasAdd:output:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Relu"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "Relu:activations:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_sequential_layer_call_and_return_conditional_losses_6601"
-          input_arg {
-            name: "input_1"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "dense_6590"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_6592"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_6595"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_6597"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "dense/StatefulPartitionedCall"
-          control_output: "dense_1/StatefulPartitionedCall"
-        }
-        node_def {
-          name: "dense/StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "input_1"
-          input: "dense_6590"
-          input: "dense_6592"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_INT32
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "dense_1/StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "dense/StatefulPartitionedCall:output:0"
-          input: "dense_1_6595"
-          input: "dense_1_6597"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_FLOAT
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "dense_1/StatefulPartitionedCall:output:0"
-          input: "^dense/StatefulPartitionedCall"
-          input: "^dense_1/StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "dense/StatefulPartitionedCall"
-          value: "dense/StatefulPartitionedCall"
-        }
-        control_ret {
-          key: "dense_1/StatefulPartitionedCall"
-          value: "dense_1/StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "input_1"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_sequential_layer_call_fn_6733"
-          input_arg {
-            name: "inputs"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "unknown"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_0"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_1"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_2"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "StatefulPartitionedCall"
-        }
-        node_def {
-          name: "StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "inputs"
-          input: "unknown"
-          input: "unknown_0"
-          input: "unknown_1"
-          input: "unknown_2"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_INT32
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-                i: 3
-                i: 4
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_sequential_layer_call_and_return_conditional_losses_6645"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "StatefulPartitionedCall:output:0"
-          input: "^StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "StatefulPartitionedCall"
-          value: "StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_sequential_layer_call_and_return_conditional_losses_6645"
-          input_arg {
-            name: "inputs"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "dense_6634"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_6636"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_6639"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_6641"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "dense/StatefulPartitionedCall"
-          control_output: "dense_1/StatefulPartitionedCall"
-        }
-        node_def {
-          name: "dense/StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "inputs"
-          input: "dense_6634"
-          input: "dense_6636"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_INT32
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "dense_1/StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "dense/StatefulPartitionedCall:output:0"
-          input: "dense_1_6639"
-          input: "dense_1_6641"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_FLOAT
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "dense_1/StatefulPartitionedCall:output:0"
-          input: "^dense/StatefulPartitionedCall"
-          input: "^dense_1/StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "dense/StatefulPartitionedCall"
-          value: "dense/StatefulPartitionedCall"
-        }
-        control_ret {
-          key: "dense_1/StatefulPartitionedCall"
-          value: "dense_1/StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
-          input_arg {
-            name: "inputs"
-            type: DT_FLOAT
-          }
-          input_arg {
-            name: "matmul_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "biasadd_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-        }
-        node_def {
-          name: "MatMul/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "matmul_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 100
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "MatMul/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "MatMul"
-          op: "MatMul"
-          input: "inputs"
-          input: "MatMul/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "MatMul"
-          }
-        }
-        node_def {
-          name: "BiasAdd/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "biasadd_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "BiasAdd/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "BiasAdd"
-          op: "BiasAdd"
-          input: "MatMul:product:0"
-          input: "BiasAdd/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "BiasAdd"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "BiasAdd:output:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 100
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 100
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_signature_wrapper_6671"
-          input_arg {
-            name: "input_1"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "unknown"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_0"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_1"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_2"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "StatefulPartitionedCall"
-        }
-        node_def {
-          name: "StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "input_1"
-          input: "unknown"
-          input: "unknown_0"
-          input: "unknown_1"
-          input: "unknown_2"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_INT32
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-                i: 3
-                i: 4
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference__wrapped_model_6528"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "StatefulPartitionedCall:output:0"
-          input: "^StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "StatefulPartitionedCall"
-          value: "StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "input_1"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_sequential_layer_call_fn_6720"
-          input_arg {
-            name: "inputs"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "unknown"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_0"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_1"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "unknown_2"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-          control_output: "StatefulPartitionedCall"
-        }
-        node_def {
-          name: "StatefulPartitionedCall"
-          op: "StatefulPartitionedCall"
-          input: "inputs"
-          input: "unknown"
-          input: "unknown_0"
-          input: "unknown_1"
-          input: "unknown_2"
-          attr {
-            key: "Tin"
-            value {
-              list {
-                type: DT_INT32
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-                type: DT_RESOURCE
-              }
-            }
-          }
-          attr {
-            key: "Tout"
-            value {
-              list {
-                type: DT_FLOAT
-              }
-            }
-          }
-          attr {
-            key: "_collective_manager_ids"
-            value {
-              list {
-              }
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "_read_only_resource_inputs"
-            value {
-              list {
-                i: 1
-                i: 2
-                i: 3
-                i: 4
-              }
-            }
-          }
-          attr {
-            key: "config_proto"
-            value {
-              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
-            }
-          }
-          attr {
-            key: "f"
-            value {
-              func {
-                name: "__inference_sequential_layer_call_and_return_conditional_losses_6618"
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "StatefulPartitionedCall"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "StatefulPartitionedCall:output:0"
-          input: "^StatefulPartitionedCall"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        control_ret {
-          key: "StatefulPartitionedCall"
-          value: "StatefulPartitionedCall"
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-      function {
-        signature {
-          name: "__inference_sequential_layer_call_and_return_conditional_losses_6707"
-          input_arg {
-            name: "inputs"
-            type: DT_INT32
-          }
-          input_arg {
-            name: "dense_matmul_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_biasadd_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_matmul_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          input_arg {
-            name: "dense_1_biasadd_readvariableop_resource"
-            type: DT_RESOURCE
-          }
-          output_arg {
-            name: "identity"
-            type: DT_FLOAT
-          }
-          is_stateful: true
-        }
-        node_def {
-          name: "dense/Cast"
-          op: "Cast"
-          input: "inputs"
-          attr {
-            key: "DstT"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "SrcT"
-            value {
-              type: DT_INT32
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 214
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/Cast"
-          }
-        }
-        node_def {
-          name: "dense/MatMul/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "dense_matmul_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 214
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/MatMul/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "dense/MatMul"
-          op: "MatMul"
-          input: "dense/Cast:y:0"
-          input: "dense/MatMul/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/MatMul"
-          }
-        }
-        node_def {
-          name: "dense/BiasAdd/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "dense_biasadd_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/BiasAdd/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "dense/BiasAdd"
-          op: "BiasAdd"
-          input: "dense/MatMul:product:0"
-          input: "dense/BiasAdd/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/BiasAdd"
-          }
-        }
-        node_def {
-          name: "dense/Relu"
-          op: "Relu"
-          input: "dense/BiasAdd:output:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense/Relu"
-          }
-        }
-        node_def {
-          name: "dense_1/MatMul/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "dense_1_matmul_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 100
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/MatMul/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "dense_1/MatMul"
-          op: "MatMul"
-          input: "dense/Relu:activations:0"
-          input: "dense_1/MatMul/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/MatMul"
-          }
-        }
-        node_def {
-          name: "dense_1/BiasAdd/ReadVariableOp"
-          op: "ReadVariableOp"
-          input: "dense_1_biasadd_readvariableop_resource"
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          attr {
-            key: "dtype"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/BiasAdd/ReadVariableOp"
-          }
-        }
-        node_def {
-          name: "dense_1/BiasAdd"
-          op: "BiasAdd"
-          input: "dense_1/MatMul:product:0"
-          input: "dense_1/BiasAdd/ReadVariableOp:value:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "dense_1/BiasAdd"
-          }
-        }
-        node_def {
-          name: "Identity"
-          op: "Identity"
-          input: "dense_1/BiasAdd:output:0"
-          attr {
-            key: "T"
-            value {
-              type: DT_FLOAT
-            }
-          }
-          attr {
-            key: "_output_shapes"
-            value {
-              list {
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-              }
-            }
-          }
-          experimental_debug_info {
-            original_node_names: "Identity"
-          }
-        }
-        ret {
-          key: "identity"
-          value: "Identity:output:0"
-        }
-        attr {
-          key: "_input_shapes"
-          value {
-            list {
-              shape {
-                dim {
-                  size: -1
-                }
-                dim {
-                  size: 214
-                }
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-              shape {
-                unknown_rank: true
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 0
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                }
-              }
-            }
-            attr {
-              key: "_user_specified_name"
-              value {
-                s: "inputs"
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 1
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 2
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 3
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-        arg_attr {
-          key: 4
-          value {
-            attr {
-              key: "_output_shapes"
-              value {
-                list {
-                  shape {
-                  }
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-    versions {
-      producer: 331
-      min_consumer: 12
-    }
-  }
-  saver_def {
-    filename_tensor_name: "saver_filename:0"
-    save_tensor_name: "StatefulPartitionedCall_1:0"
-    restore_op_name: "StatefulPartitionedCall_2"
-    version: V2
-  }
-  collection_def {
-    key: "saved_model_main_op"
-    value {
-      node_list {
-        value: "NoOp"
-      }
-    }
-  }
-  signature_def {
-    key: "__saved_model_init_op"
-    value {
-      outputs {
-        key: "__saved_model_init_op"
-        value {
-          name: "NoOp"
-          tensor_shape {
-            unknown_rank: true
-          }
-        }
-      }
-    }
-  }
-  signature_def {
-    key: "serving_default"
-    value {
-      inputs {
-        key: "input_1"
-        value {
-          name: "serving_default_input_1:0"
-          dtype: DT_INT32
-          tensor_shape {
-            dim {
-              size: -1
-            }
-            dim {
-              size: 214
-            }
-          }
-        }
-      }
-      outputs {
-        key: "output_1"
-        value {
-          name: "StatefulPartitionedCall:0"
-          dtype: DT_FLOAT
-          tensor_shape {
-            dim {
-              size: -1
-            }
-            dim {
-              size: 1
-            }
-          }
-        }
-      }
-      method_name: "tensorflow/serving/predict"
-    }
-  }
-  object_graph_def {
-    nodes {
-      children {
-        node_id: 1
-        local_name: "layer_with_weights-0"
-      }
-      children {
-        node_id: 1
-        local_name: "layer-0"
-      }
-      children {
-        node_id: 2
-        local_name: "layer_with_weights-1"
-      }
-      children {
-        node_id: 2
-        local_name: "layer-1"
-      }
-      children {
-        node_id: 3
-        local_name: "optimizer"
-      }
-      children {
-        node_id: 4
-        local_name: "regularization_losses"
-      }
-      children {
-        node_id: 5
-        local_name: "variables"
-      }
-      children {
-        node_id: 6
-        local_name: "trainable_variables"
-      }
-      children {
-        node_id: 7
-        local_name: "keras_api"
-      }
-      children {
-        node_id: 8
-        local_name: "signatures"
-      }
-      children {
-        node_id: 47
-        local_name: "__call__"
-      }
-      children {
-        node_id: 48
-        local_name: "_default_save_signature"
-      }
-      children {
-        node_id: 49
-        local_name: "call_and_return_all_conditional_losses"
-      }
-      user_object {
-        identifier: "_tf_keras_sequential"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-        metadata: "{\"class_name\": \"Sequential\", \"name\": \"sequential\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"config\": {\"name\": \"sequential\", \"layers\": [{\"class_name\": \"Dense\", \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}, {\"class_name\": \"Dense\", \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 1, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}], \"build_input_shape\": {\"class_name\": \"__tuple__\", \"items\": [null, 214]}}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 214}}}, \"build_input_shape\": {\"class_name\": \"__tuple__\", \"items\": [null, 214]}, \"is_graph_network\": false, \"keras_version\": \"2.2.4-tf\", \"backend\": \"tensorflow\", \"model_config\": {\"class_name\": \"Sequential\", \"config\": {\"name\": \"sequential\", \"layers\": [{\"class_name\": \"Dense\", \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}, {\"class_name\": \"Dense\", \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 1, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}], \"build_input_shape\": {\"class_name\": \"__tuple__\", \"items\": [null, 214]}}}, \"training_config\": {\"loss\": \"mean_absolute_error\", \"metrics\": [\"mean_squared_error\"], \"weighted_metrics\": null, \"loss_weights\": null, \"sample_weight_mode\": null, \"optimizer_config\": {\"class_name\": \"Adam\", \"config\": {\"name\": \"Adam\", \"learning_rate\": 0.0003000000142492354, \"decay\": 0.0, \"beta_1\": 0.8999999761581421, \"beta_2\": 0.9990000128746033, \"epsilon\": 1e-07, \"amsgrad\": false}}}}"
-      }
-    }
-    nodes {
-      children {
-        node_id: 9
-        local_name: "kernel"
-      }
-      children {
-        node_id: 10
-        local_name: "bias"
-      }
-      children {
-        node_id: 11
-        local_name: "regularization_losses"
-      }
-      children {
-        node_id: 12
-        local_name: "variables"
-      }
-      children {
-        node_id: 13
-        local_name: "trainable_variables"
-      }
-      children {
-        node_id: 14
-        local_name: "keras_api"
-      }
-      children {
-        node_id: 50
-        local_name: "__call__"
-      }
-      children {
-        node_id: 51
-        local_name: "call_and_return_all_conditional_losses"
-      }
-      user_object {
-        identifier: "_tf_keras_layer"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-        metadata: "{\"class_name\": \"Dense\", \"name\": \"dense\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 214}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [null, 214]}}"
-      }
-    }
-    nodes {
-      children {
-        node_id: 15
-        local_name: "kernel"
-      }
-      children {
-        node_id: 16
-        local_name: "bias"
-      }
-      children {
-        node_id: 17
-        local_name: "regularization_losses"
-      }
-      children {
-        node_id: 18
-        local_name: "variables"
-      }
-      children {
-        node_id: 19
-        local_name: "trainable_variables"
-      }
-      children {
-        node_id: 20
-        local_name: "keras_api"
-      }
-      children {
-        node_id: 52
-        local_name: "__call__"
-      }
-      children {
-        node_id: 53
-        local_name: "call_and_return_all_conditional_losses"
-      }
-      user_object {
-        identifier: "_tf_keras_layer"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-        metadata: "{\"class_name\": \"Dense\", \"name\": \"dense_1\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 1, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 100}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [null, 100]}}"
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "optimizer"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 9
-        local_name: "0"
-      }
-      children {
-        node_id: 10
-        local_name: "1"
-      }
-      children {
-        node_id: 15
-        local_name: "2"
-      }
-      children {
-        node_id: 16
-        local_name: "3"
-      }
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 9
-        local_name: "0"
-      }
-      children {
-        node_id: 10
-        local_name: "1"
-      }
-      children {
-        node_id: 15
-        local_name: "2"
-      }
-      children {
-        node_id: 16
-        local_name: "3"
-      }
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 21
-        local_name: "layers"
-      }
-      children {
-        node_id: 22
-        local_name: "layer_regularization_losses"
-      }
-      children {
-        node_id: 23
-        local_name: "non_trainable_variables"
-      }
-      children {
-        node_id: 24
-        local_name: "layer_metrics"
-      }
-      children {
-        node_id: 4
-        local_name: "regularization_losses"
-      }
-      children {
-        node_id: 25
-        local_name: "metrics"
-      }
-      children {
-        node_id: 5
-        local_name: "variables"
-      }
-      children {
-        node_id: 6
-        local_name: "trainable_variables"
-      }
-      children {
-        node_id: 47
-        local_name: "__call__"
-      }
-      children {
-        node_id: 48
-        local_name: "_default_save_signature"
-      }
-      children {
-        node_id: 49
-        local_name: "call_and_return_all_conditional_losses"
-      }
-      children {
-        node_id: 49
-        local_name: "call_and_return_conditional_losses"
-      }
-      user_object {
-        identifier: "_generic_user_object"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 54
-        local_name: "serving_default"
-      }
-      user_object {
-        identifier: "signature_map"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      variable {
-        dtype: DT_FLOAT
-        shape {
-          dim {
-            size: 214
-          }
-          dim {
-            size: 100
-          }
-        }
-        trainable: true
-        name: "dense/kernel"
-      }
-    }
-    nodes {
-      variable {
-        dtype: DT_FLOAT
-        shape {
-          dim {
-            size: 100
-          }
-        }
-        trainable: true
-        name: "dense/bias"
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 9
-        local_name: "0"
-      }
-      children {
-        node_id: 10
-        local_name: "1"
-      }
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 9
-        local_name: "0"
-      }
-      children {
-        node_id: 10
-        local_name: "1"
-      }
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 26
-        local_name: "layers"
-      }
-      children {
-        node_id: 27
-        local_name: "layer_regularization_losses"
-      }
-      children {
-        node_id: 28
-        local_name: "non_trainable_variables"
-      }
-      children {
-        node_id: 29
-        local_name: "layer_metrics"
-      }
-      children {
-        node_id: 11
-        local_name: "regularization_losses"
-      }
-      children {
-        node_id: 30
-        local_name: "metrics"
-      }
-      children {
-        node_id: 12
-        local_name: "variables"
-      }
-      children {
-        node_id: 13
-        local_name: "trainable_variables"
-      }
-      children {
-        node_id: 50
-        local_name: "__call__"
-      }
-      children {
-        node_id: 51
-        local_name: "call_and_return_all_conditional_losses"
-      }
-      children {
-        node_id: 51
-        local_name: "call_and_return_conditional_losses"
-      }
-      user_object {
-        identifier: "_generic_user_object"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      variable {
-        dtype: DT_FLOAT
-        shape {
-          dim {
-            size: 100
-          }
-          dim {
-            size: 1
-          }
-        }
-        trainable: true
-        name: "dense_1/kernel"
-      }
-    }
-    nodes {
-      variable {
-        dtype: DT_FLOAT
-        shape {
-          dim {
-            size: 1
-          }
-        }
-        trainable: true
-        name: "dense_1/bias"
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 15
-        local_name: "0"
-      }
-      children {
-        node_id: 16
-        local_name: "1"
-      }
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 15
-        local_name: "0"
-      }
-      children {
-        node_id: 16
-        local_name: "1"
-      }
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 31
-        local_name: "layers"
-      }
-      children {
-        node_id: 32
-        local_name: "layer_regularization_losses"
-      }
-      children {
-        node_id: 33
-        local_name: "non_trainable_variables"
-      }
-      children {
-        node_id: 34
-        local_name: "layer_metrics"
-      }
-      children {
-        node_id: 17
-        local_name: "regularization_losses"
-      }
-      children {
-        node_id: 35
-        local_name: "metrics"
-      }
-      children {
-        node_id: 18
-        local_name: "variables"
-      }
-      children {
-        node_id: 19
-        local_name: "trainable_variables"
-      }
-      children {
-        node_id: 52
-        local_name: "__call__"
-      }
-      children {
-        node_id: 53
-        local_name: "call_and_return_all_conditional_losses"
-      }
-      children {
-        node_id: 53
-        local_name: "call_and_return_conditional_losses"
-      }
-      user_object {
-        identifier: "_generic_user_object"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 1
-        local_name: "0"
-      }
-      children {
-        node_id: 2
-        local_name: "1"
-      }
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_dict_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 36
-        local_name: "0"
-      }
-      children {
-        node_id: 37
-        local_name: "1"
-      }
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_dict_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_dict_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 38
-        local_name: "total"
-      }
-      children {
-        node_id: 39
-        local_name: "count"
-      }
-      children {
-        node_id: 40
-        local_name: "variables"
-      }
-      children {
-        node_id: 41
-        local_name: "keras_api"
-      }
-      user_object {
-        identifier: "_tf_keras_metric"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-        metadata: "{\"class_name\": \"Mean\", \"name\": \"loss\", \"dtype\": \"float32\", \"config\": {\"name\": \"loss\", \"dtype\": \"float32\"}}"
-      }
-    }
-    nodes {
-      children {
-        node_id: 42
-        local_name: "total"
-      }
-      children {
-        node_id: 43
-        local_name: "count"
-      }
-      children {
-        node_id: 44
-        local_name: "_fn_kwargs"
-      }
-      children {
-        node_id: 45
-        local_name: "variables"
-      }
-      children {
-        node_id: 46
-        local_name: "keras_api"
-      }
-      user_object {
-        identifier: "_tf_keras_metric"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-        metadata: "{\"class_name\": \"MeanMetricWrapper\", \"name\": \"mean_squared_error\", \"dtype\": \"float32\", \"config\": {\"name\": \"mean_squared_error\", \"dtype\": \"float32\", \"fn\": \"mean_squared_error\"}}"
-      }
-    }
-    nodes {
-      variable {
-        dtype: DT_FLOAT
-        shape {
-        }
-        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
-        aggregation: VARIABLE_AGGREGATION_SUM
-        name: "total"
-      }
-    }
-    nodes {
-      variable {
-        dtype: DT_FLOAT
-        shape {
-        }
-        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
-        aggregation: VARIABLE_AGGREGATION_SUM
-        name: "count"
-      }
-    }
-    nodes {
-      children {
-        node_id: 38
-        local_name: "0"
-      }
-      children {
-        node_id: 39
-        local_name: "1"
-      }
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 40
-        local_name: "variables"
-      }
-      user_object {
-        identifier: "_generic_user_object"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      variable {
-        dtype: DT_FLOAT
-        shape {
-        }
-        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
-        aggregation: VARIABLE_AGGREGATION_SUM
-        name: "total"
-      }
-    }
-    nodes {
-      variable {
-        dtype: DT_FLOAT
-        shape {
-        }
-        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
-        aggregation: VARIABLE_AGGREGATION_SUM
-        name: "count"
-      }
-    }
-    nodes {
-      user_object {
-        identifier: "trackable_dict_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 42
-        local_name: "0"
-      }
-      children {
-        node_id: 43
-        local_name: "1"
-      }
-      user_object {
-        identifier: "trackable_list_wrapper"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      children {
-        node_id: 45
-        local_name: "variables"
-      }
-      user_object {
-        identifier: "_generic_user_object"
-        version {
-          producer: 1
-          min_consumer: 1
-        }
-      }
-    }
-    nodes {
-      function {
-        concrete_functions: "__inference_sequential_layer_call_fn_6629"
-        concrete_functions: "__inference_sequential_layer_call_fn_6733"
-        concrete_functions: "__inference_sequential_layer_call_fn_6720"
-        concrete_functions: "__inference_sequential_layer_call_fn_6656"
-        function_spec {
-          fullargspec {
-            named_tuple_value {
-              name: "FullArgSpec"
-              values {
-                key: "args"
-                value {
-                  list_value {
-                    values {
-                      string_value: "self"
-                    }
-                    values {
-                      string_value: "inputs"
-                    }
-                    values {
-                      string_value: "training"
-                    }
-                    values {
-                      string_value: "mask"
-                    }
-                  }
-                }
-              }
-              values {
-                key: "varargs"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "varkw"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "defaults"
-                value {
-                  list_value {
-                    values {
-                      bool_value: false
-                    }
-                    values {
-                      none_value {
-                      }
-                    }
-                  }
-                }
-              }
-              values {
-                key: "kwonlyargs"
-                value {
-                  list_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlydefaults"
-                value {
-                  dict_value {
-                  }
-                }
-              }
-              values {
-                key: "annotations"
-                value {
-                  dict_value {
-                  }
-                }
-              }
-            }
-          }
-          is_method: true
-          input_signature {
-            none_value {
-            }
-          }
-        }
-      }
-    }
-    nodes {
-      function {
-        concrete_functions: "__inference__wrapped_model_6528"
-        function_spec {
-          fullargspec {
-            named_tuple_value {
-              name: "FullArgSpec"
-              values {
-                key: "args"
-                value {
-                  list_value {
-                  }
-                }
-              }
-              values {
-                key: "varargs"
-                value {
-                  string_value: "args"
-                }
-              }
-              values {
-                key: "varkw"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "defaults"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlyargs"
-                value {
-                  list_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlydefaults"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "annotations"
-                value {
-                  dict_value {
-                  }
-                }
-              }
-            }
-          }
-          input_signature {
-            tuple_value {
-              values {
-                tensor_spec_value {
-                  name: "input_1"
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 214
-                    }
-                  }
-                  dtype: DT_INT32
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-    nodes {
-      function {
-        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6689"
-        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6587"
-        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6707"
-        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6601"
-        function_spec {
-          fullargspec {
-            named_tuple_value {
-              name: "FullArgSpec"
-              values {
-                key: "args"
-                value {
-                  list_value {
-                    values {
-                      string_value: "self"
-                    }
-                    values {
-                      string_value: "inputs"
-                    }
-                    values {
-                      string_value: "training"
-                    }
-                    values {
-                      string_value: "mask"
-                    }
-                  }
-                }
-              }
-              values {
-                key: "varargs"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "varkw"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "defaults"
-                value {
-                  list_value {
-                    values {
-                      bool_value: false
-                    }
-                    values {
-                      none_value {
-                      }
-                    }
-                  }
-                }
-              }
-              values {
-                key: "kwonlyargs"
-                value {
-                  list_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlydefaults"
-                value {
-                  dict_value {
-                  }
-                }
-              }
-              values {
-                key: "annotations"
-                value {
-                  dict_value {
-                  }
-                }
-              }
-            }
-          }
-          is_method: true
-          input_signature {
-            none_value {
-            }
-          }
-        }
-      }
-    }
-    nodes {
-      function {
-        concrete_functions: "__inference_dense_layer_call_fn_6754"
-        function_spec {
-          fullargspec {
-            named_tuple_value {
-              name: "FullArgSpec"
-              values {
-                key: "args"
-                value {
-                  list_value {
-                    values {
-                      string_value: "self"
-                    }
-                    values {
-                      string_value: "inputs"
-                    }
-                  }
-                }
-              }
-              values {
-                key: "varargs"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "varkw"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "defaults"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlyargs"
-                value {
-                  list_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlydefaults"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "annotations"
-                value {
-                  dict_value {
-                  }
-                }
-              }
-            }
-          }
-          is_method: true
-          input_signature {
-            none_value {
-            }
-          }
-        }
-      }
-    }
-    nodes {
-      function {
-        concrete_functions: "__inference_dense_layer_call_and_return_conditional_losses_6745"
-        function_spec {
-          fullargspec {
-            named_tuple_value {
-              name: "FullArgSpec"
-              values {
-                key: "args"
-                value {
-                  list_value {
-                    values {
-                      string_value: "self"
-                    }
-                    values {
-                      string_value: "inputs"
-                    }
-                  }
-                }
-              }
-              values {
-                key: "varargs"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "varkw"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "defaults"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlyargs"
-                value {
-                  list_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlydefaults"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "annotations"
-                value {
-                  dict_value {
-                  }
-                }
-              }
-            }
-          }
-          is_method: true
-          input_signature {
-            none_value {
-            }
-          }
-        }
-      }
-    }
-    nodes {
-      function {
-        concrete_functions: "__inference_dense_1_layer_call_fn_6773"
-        function_spec {
-          fullargspec {
-            named_tuple_value {
-              name: "FullArgSpec"
-              values {
-                key: "args"
-                value {
-                  list_value {
-                    values {
-                      string_value: "self"
-                    }
-                    values {
-                      string_value: "inputs"
-                    }
-                  }
-                }
-              }
-              values {
-                key: "varargs"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "varkw"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "defaults"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlyargs"
-                value {
-                  list_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlydefaults"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "annotations"
-                value {
-                  dict_value {
-                  }
-                }
-              }
-            }
-          }
-          is_method: true
-          input_signature {
-            none_value {
-            }
-          }
-        }
-      }
-    }
-    nodes {
-      function {
-        concrete_functions: "__inference_dense_1_layer_call_and_return_conditional_losses_6764"
-        function_spec {
-          fullargspec {
-            named_tuple_value {
-              name: "FullArgSpec"
-              values {
-                key: "args"
-                value {
-                  list_value {
-                    values {
-                      string_value: "self"
-                    }
-                    values {
-                      string_value: "inputs"
-                    }
-                  }
-                }
-              }
-              values {
-                key: "varargs"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "varkw"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "defaults"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlyargs"
-                value {
-                  list_value {
-                  }
-                }
-              }
-              values {
-                key: "kwonlydefaults"
-                value {
-                  none_value {
-                  }
-                }
-              }
-              values {
-                key: "annotations"
-                value {
-                  dict_value {
-                  }
-                }
-              }
-            }
-          }
-          is_method: true
-          input_signature {
-            none_value {
-            }
-          }
-        }
-      }
-    }
-    nodes {
-      bare_concrete_function {
-        concrete_function_name: "__inference_signature_wrapper_6671"
-        argument_keywords: "input_1"
-        allowed_positional_arguments: 1
-      }
-    }
-    concrete_functions {
-      key: "__inference__wrapped_model_6528"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "input_1"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          dict_value {
-            fields {
-              key: "output_1"
-              value {
-                tensor_spec_value {
-                  name: "output_1"
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 1
-                    }
-                  }
-                  dtype: DT_FLOAT
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_dense_1_layer_call_and_return_conditional_losses_6764"
-      value {
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "inputs"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 100
-                      }
-                    }
-                    dtype: DT_FLOAT
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tuple_value {
-            values {
-              tensor_spec_value {
-                name: "0"
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-                dtype: DT_FLOAT
-              }
-            }
-            values {
-              list_value {
-              }
-            }
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_dense_1_layer_call_fn_6773"
-      value {
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "inputs"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 100
-                      }
-                    }
-                    dtype: DT_FLOAT
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tensor_spec_value {
-            shape {
-              dim {
-                size: -1
-              }
-              dim {
-                size: 1
-              }
-            }
-            dtype: DT_FLOAT
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_dense_layer_call_and_return_conditional_losses_6745"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "inputs"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tuple_value {
-            values {
-              tensor_spec_value {
-                name: "0"
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 100
-                  }
-                }
-                dtype: DT_FLOAT
-              }
-            }
-            values {
-              list_value {
-              }
-            }
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_dense_layer_call_fn_6754"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "inputs"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tensor_spec_value {
-            shape {
-              dim {
-                size: -1
-              }
-              dim {
-                size: 100
-              }
-            }
-            dtype: DT_FLOAT
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_sequential_layer_call_and_return_conditional_losses_6587"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "input_1"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-                values {
-                  bool_value: true
-                }
-                values {
-                  none_value {
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tuple_value {
-            values {
-              tensor_spec_value {
-                name: "0"
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-                dtype: DT_FLOAT
-              }
-            }
-            values {
-              list_value {
-              }
-            }
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_sequential_layer_call_and_return_conditional_losses_6601"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "input_1"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-                values {
-                  bool_value: false
-                }
-                values {
-                  none_value {
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tuple_value {
-            values {
-              tensor_spec_value {
-                name: "0"
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-                dtype: DT_FLOAT
-              }
-            }
-            values {
-              list_value {
-              }
-            }
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_sequential_layer_call_and_return_conditional_losses_6689"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "inputs"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-                values {
-                  bool_value: true
-                }
-                values {
-                  none_value {
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tuple_value {
-            values {
-              tensor_spec_value {
-                name: "0"
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-                dtype: DT_FLOAT
-              }
-            }
-            values {
-              list_value {
-              }
-            }
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_sequential_layer_call_and_return_conditional_losses_6707"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "inputs"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-                values {
-                  bool_value: false
-                }
-                values {
-                  none_value {
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tuple_value {
-            values {
-              tensor_spec_value {
-                name: "0"
-                shape {
-                  dim {
-                    size: -1
-                  }
-                  dim {
-                    size: 1
-                  }
-                }
-                dtype: DT_FLOAT
-              }
-            }
-            values {
-              list_value {
-              }
-            }
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_sequential_layer_call_fn_6629"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "input_1"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-                values {
-                  bool_value: true
-                }
-                values {
-                  none_value {
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tensor_spec_value {
-            shape {
-              dim {
-                size: -1
-              }
-              dim {
-                size: 1
-              }
-            }
-            dtype: DT_FLOAT
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_sequential_layer_call_fn_6656"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "input_1"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-                values {
-                  bool_value: false
-                }
-                values {
-                  none_value {
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tensor_spec_value {
-            shape {
-              dim {
-                size: -1
-              }
-              dim {
-                size: 1
-              }
-            }
-            dtype: DT_FLOAT
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_sequential_layer_call_fn_6720"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "inputs"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-                values {
-                  bool_value: true
-                }
-                values {
-                  none_value {
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tensor_spec_value {
-            shape {
-              dim {
-                size: -1
-              }
-              dim {
-                size: 1
-              }
-            }
-            dtype: DT_FLOAT
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_sequential_layer_call_fn_6733"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-                values {
-                  tensor_spec_value {
-                    name: "inputs"
-                    shape {
-                      dim {
-                        size: -1
-                      }
-                      dim {
-                        size: 214
-                      }
-                    }
-                    dtype: DT_INT32
-                  }
-                }
-                values {
-                  bool_value: false
-                }
-                values {
-                  none_value {
-                  }
-                }
-              }
-            }
-            values {
-              dict_value {
-              }
-            }
-          }
-        }
-        output_signature {
-          tensor_spec_value {
-            shape {
-              dim {
-                size: -1
-              }
-              dim {
-                size: 1
-              }
-            }
-            dtype: DT_FLOAT
-          }
-        }
-      }
-    }
-    concrete_functions {
-      key: "__inference_signature_wrapper_6671"
-      value {
-        bound_inputs: 9
-        bound_inputs: 10
-        bound_inputs: 15
-        bound_inputs: 16
-        canonicalized_input_signature {
-          tuple_value {
-            values {
-              tuple_value {
-              }
-            }
-            values {
-              dict_value {
-                fields {
-                  key: "input_1"
-                  value {
-                    tensor_spec_value {
-                      name: "input_1"
-                      shape {
-                        dim {
-                          size: -1
-                        }
-                        dim {
-                          size: 214
-                        }
-                      }
-                      dtype: DT_INT32
-                    }
-                  }
-                }
-              }
-            }
-          }
-        }
-        output_signature {
-          dict_value {
-            fields {
-              key: "output_1"
-              value {
-                tensor_spec_value {
-                  name: "output_1"
-                  shape {
-                    dim {
-                      size: -1
-                    }
-                    dim {
-                      size: 1
-                    }
-                  }
-                  dtype: DT_FLOAT
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-  }
-}
-
diff --git a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001 b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001
deleted file mode 100644
index 98807d26ee9f40e99330ae6a5d2988c640a320ec..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 88424
zcmWh!c{mqe7bi;yKWmgiMMaWI3*R|s5Q-F$5-o~KCA5f&60#E#rIPGRmV_*G??@_@
zN?B5tR4VOSNZMZSpYzN-&)m89nRCu(tKOK$y|sxzTm1;s6$-;cXFh^VV=}hORHKaO
zW2m$^j(RsVm;%=@45{3PJEC0BeR}}jJTxBT&a8&=Wj5&lt^gaG6^Y1MOO#*vo;mRK
zGVU4kmf!TQm54oH@%Xo76upp*vtq;X!Vbc{?(jpGm)}7=<P#?vX+bLg>Z0i)iY{kn
zk~N>@Xxp1{n7_>vGj;c%=a^Dl_eTpd#ytfyxi#n`X-GcKyNL1kmJ(MDca*tki>Yf5
z;>)eNsB+dBb3>mH*Z2ZlC>xK}@BYJwcMS1qxf4c3X5iili?F*d6q)KsL7;pj$#OE`
ze!iJNMCt|Td{-KsTzr_Jeq*9`(~^qhUnSw`7YQquS(_4~2$Q{zk(N~+*padi=h@hz
zz5ixrIQTZ%X{Ceam&>uoAsKUy=HSaY+4%j7DveFCL79LkTzW*5TYAKUvk`T`RU-4S
zdP5xc?9IVL!R^F6u#iNwU&e8Ndby2(TJ(^HH~!9V<-RG3p__~$K983p|H&CJb9d`8
zZLwo($4Nbd&q^^Q{?bdZ6ljo&ABLbUri7>G>>;(v7a?zpG_^Nsfk~<7NwCW#Iy6fi
zuZ`UbBQe@oVzGw3azTN6T-*SWru{H>%_GQ)Jql4N9>lOj2EV%n!@PVUENkzC>R0kO
z^_Cij=j1}+ld}+EXNg%FcR)W^9S^z0FfPfrxgP<Q^jlwMt=)++m|%Gr8kG%5=YB1+
zaVQ*=8Eq_9J4)dAT<*!icJdSTu`?m4M*s7FR8+YN{`MUdB={zQ$<78)`KiVE{=Nox
z7Z{>Tr2)#cc;fAar#Yp4*GP(m5mwp^alNh!V9ok|`0e%<cJ6pd=1<)Z*9SYmc1VF#
zzxn|uceFFDU$+v^x`p_y^c3^rq$>8mH9!TUAjaj52MKPqgFY!2Xg+%orq9(y`P4;%
zfF?bf8gZM9xwQ!Y+47XLu;<8>Ug&oYhQ(jk<1R5H=3LPU=AgVfaz1y-M?W_~&;51q
z)LnyLl=cNQQW}`n25F`$Qj__c@{oEgZ({1Eh~d2gag=d1CAG6}aC}}W*;T`GA9viW
z74f%4g;opXPyQgsho-{X$vUh=n<uv45X03QbIF~(XUIkKMWC_$8VonJfdA@FnEAc~
zMtbJrwP$<aOn4X62PA;>>lQAt>oX(=6f>HsQQ+1viRk7eQp;vFW@$n`<0|=x@h%R7
zz*9HC(Q*k$;7nY(*p&DUWl$&m3(THyQQEzWt0k+pgPFb#A0^@hp7}C3!(k!0n{<N{
z$oIfgk%{Eaw^VXSZWT3@%?B&><BX+u5`>3U(Xm~Pu%jf1e!9C6tzQeHi~lYXePTW?
z4q3~T6a!P`?g_gK6;Qplp5FDDg?1eb_txwJlx_ASR^y*RPJ1P>tS=+=unbbd#88K+
zrWtb!!Liqvzb!hBuW7nXJhr(JtH=f5EHa(euy<i_XC&@?I-cl<J`;TIlx52w96+hL
z`|!)tWZdpz#jaDhiW&d*;1@kLoZEN-ef8y0e{2!@oybM^&#w6EzgDc0O~I|1o9g11
zpTy%Lm1tR<h^f}MaLKAoh@<DQ@0dtk;ObD;Z~96+HXMW|(-P4ueHAJ^JIIDP6`{zw
z7Q7zakFmeS>0|TrX#H&_O5SL|ajQg0(EeLE(Q+bQ5MPS)JA;xnTk+Lh4&V8tW4oC@
zn;NZ+O|*fZyLSuDXLsOl(K?KiRjN}vHV22V4sy3Q2Vqcc7|sYWrowyo<Fa+KcxCZq
zTr#U3ty^tyv*<x~z95TzKV6!=^H!fV-BN@*3D2~?b;l0|Li~WqEB@&RFZTNhRn|b-
zieHrXhAMPgvcoC~s9kacUtf;LxQ)Yj?MWc3@T&%M1*dS*n<wbcMd0EKmoewSA5Q4V
zDZHMnh<<`%p5NQb+qFmYr&a9g?wc9n$oD|Daq@Vk!8VAME90>2T{bWGIE$-#vK$*k
z9}%4+G1&M+18Wk5L3?!;^gal|d0YD6_Qj?6^WA1Nketl=9*slW?Lp{MuK+K6d{GxC
z;F)iJsJ$u{Q}<**=SovHENFyrHl5BEkND#+$%*_6>pZwvVTi#C9I$o44k*l&6@0GX
zFpx^3VNDuyQ>y@527{rvFai&4I0&KBXFyQlV6DB-TTq)Ei1RGp!?tEM3|a7ruH6`m
zhZh}!9JwI)*!~M-f9;};HiXGF4adBjfIr7-!IP}{_{gw~2yU)N?cWPfZ%!sksEmHY
zvq@+(Ll{?2`VG(O<KW%)KXm--4Y0*GmL{L@$CDFv@n_p*nCKr0dn)Xxc855>%KtY>
zE|5U0_tNZZFDGWZbvb>#-5K>CN8!E$CxL$zN4?+=oRdt076%KQl_1G1xR?%AJ43Ll
z+k)|&Ylk8_c}!p00z&Q{VffR+tfKHyydW!#>-S`lK8yF<?d{E&I(s>2u9%HqB-asT
z15LcN^(-0Z5svM)ir9N&v|jJXWT?3-mhY$pFN5ch@#8VnKUQQ*TXnEoVi|@x?f}sV
zfQ{n<@keK0t<Vk|G#re8iXshm&Yjh$r~erPJRTWg@CP~{)o^pv7*HRPVM0RxLZV9q
zW*`2F^AF6y2Rr8D>u1L>bnqlB=o|pAEyrODX9b+jFH-Lv3X}bF1+_*tD1FBk$$U{h
zIH(F&Vm@=Gb}?$-NeA<}%c+ZACY)UGhj~7D1C#>oX;Gdd7K>aYYqHl>Ka{Y-Ei2qG
zZEPD{v73j@Cymg_dnsy!H$%GcJL<l89e%zsnPy#Hf!eAc8EF$U)YkGvkp(lTdG#9_
z)|5%(-mXALVHx&%VJ35QegYj6ItTA%^urbBpKx~8F52`+94idn&_zNX&zf2@_a4*<
zwsec*$Df+?ldc{<n|umPa30j0_k)A3LTrg;6e_;3!Z$l~>6qtBiA7T(&Y!b~*=cYU
z2Q$~gotQ{mB6AUA_g+D(>348f8i%5bL(tAH5Z4A9;EG!(FtL9J-nhMuf1U6eZGYF`
zs>@eUOJyg{k2r^CH0H1-?+99s=|n}bv$*DX35GX_)y1aGMZp#&K6KR!tj82AlE}vz
z<vn=g$T=Lmrp^map25R<UAUk;AI|qE5U<XgcuPGL@!Tuo->?bH&z#43r`zz^wRHT-
zIN*tVZ~D`21A4Bt#BdQ?s(i8+MGk4=`Q__SM~}t$M8I9)%hBjZGxj&~VEeoX%f3Iy
zaHSZkrgIW!KhHyN^}YDLxB_P`{)8DT+iO)Wsq#|g`|-!`$Mlzw6Fw3XVVnG|aFK^E
zmWBi~Dp|%<p>+d4W!E+~y333|Hr9)dT<k%W`y96Q+{9I7Pl%**6*9ZeVdCNInA_)u
zy^5K5X{Zj9f0mGg_kyr~q!15^ZRQv4P~_d#n(#r~RNi34LafWXfq_?2;lTPbI==Z7
z8f`-MrtfXoUF?m1PEV0!O4A4Do^cMZ=fUiY3Cz3mqBQYz1j*k&ksjFkn7Q#}ERzv(
zl2{k2(V(G?bb+WN4bjOXotB+2xc4s+$t~p6L!<G?JbPND8P0DG9wfJa>hX)0R^VOV
zhg|1I4Vv(22j}KJM3#0-veVqQ({Gg}Bx$o4tKP=&4pD7Hf5I<f`7i`c?4~nU6Lt_~
ze@!}R;1h{ZT}eI4)Y#$g)7i?SeZ=@y8dLh>42c@^m3H@Rp)>c5p`c_-SGUWs<BvBJ
zeOqyS5yT13yLU40g@^HV&onyc+!PuZ5JM`TYJ*L}1R5wDjv}wM;E}fw4fAjk@Yj~m
z!RO+nL?i|KRGwn}q&yZQGSR1NHT7~9<K<ik-y*XO1J(kQ$tjYP|D?#L3{7f$w~sU7
z^cnlqnrg4An<T2^29X+TMQ_)hWySB0rHa*ow5^)b?@1!OPwFvhuNs2W3Z#gu8P8w4
z6#=qw((L3-KS*TdL%w&g2o^N!Q*9+x>gV7IPfSGVrlB%Mxn71pTYCj`DxLwAktB)Z
z`^kFe`Pe9Whdi70yjFCkE4d)z!vr3zBf?!@P*g+~&y*bi@$0GlDz95m5U$F5@2Dfj
zJCpg+wHzKf_7|oHtI{og_lRw-8l6xU3wG1287KETM(oXFTB`J(Vd_0WspdQw>ukk)
z<ptmn*uylWw2^;*M~pXYbAqr+Z>n%XijOc?gw1giam0Q+oeA1(iqR){v{@8Qe-y*f
z(0lah@FGUDXC5<guOxN<a+b^Sh@e644umzUfMXNHskHGR**tzGxi&8gw!j-k$=4Jw
zt0%&o1qx(!aRQZ?W<=jCNQaFRexr9yJy<Rg@ZTOCz@a5>xXJT0o=_H|RV$Yedg>CW
zt4*RG`}0uWd=LD#>V@>;>C6kkFqzZ6jog#hW6ur}Qt~N*v`v0T4=PD8VPon@*^9gM
zh+PrWWTVZOu4(41L_?^>+|j-3j-Wn2PjN2uD+I0kWI?HM7B~%_V>}Y_!Cy9#Jp22C
zG+VbZN*7y*Q#22y|6L_b4T`{2Y7!5{LRjw=3+CH*l0ytf>|#V|*4S~d@<1c;%**BS
z^~9;BvJ?&&WWl<GGvr9kHBwNwoWvIeG9Th)Aw9K-=)PJ-G*=!VBfhgqLBf4-{!~M>
z9-rf?e$ON(-DYIK(3}~+=Lk`^r~#ihKfv%yJ19=dBzbzLIR9Q1Qr~fhG&H3{knb=P
z)Rb&oDz=Wv7#jpl8-7DwffObM<-^TgR>Wjz5rYZI<aOW`NbyM|hUN`q^}1o`o&BD1
zKduN<hs8-MQxCIlCX@aX8H6?JBd6mfQM&jQ*D74hH8_b<OUpi@d6g%nt=qZb>z|2T
z^Dsy*Ng&%7CXmFfUZie8t6*i&9@0Oym*f?nVisTj2hEEr$eO1@B*69qq#NbHdb^`A
z^Th@(D0vlQp7sDbCM1ESR|U9=C@|(?gzJK9U{`Ac%>n_*6dJ^TuXEAia4;Uy=F#g~
zBxt|1L;F9cahq5winL!tqpBMidRd%37`OrDL$9EmU~~rFl!j|w*ODQhZru6fD84g2
zL{kkOVCvhg#CvK14*U*fyEbm-^)-BOiTN2kqQqhB1}B93VRWIk6XJ?{_;j8WUAQ)g
zDwjM%?M;IymUxk?>QbUh4uqmxM>87O+`|jq?r6qeMwhg|T>sS?v{hEcvyb0m@F*^<
zd2$f#YZ7qLq<a`Tr3$k8c$7^KC9~3saJ5MjK0KgB7M^Or;M=Wua!NOTd^v*BCkin8
zn;g8i?S*+R{oIDz>tMfRBZ};6C1)>>XFqID!OgG$)<zoCCOX{0wWagvC1(NZ3TKj7
zlSK4gVT%`L2jgYWxhNmfO3yodLpJLQPRvV2)$Ys4uGoY(2MNUN^1`HtCvdLB==Em?
zKmO`Rh0}?!*P;ev*B#~${>l(!286>;qiy(ZVHSP1vbDz3LWnJj+C#ry?Il|t1aRtQ
z5g=WBr?xtAFI?6%VOm~}L2Iu9*ioKIcI|jejvtVN&ZUVYBlRJ-x%xbjEM7*J^^}rz
zy`PBq!&UT-FNLde1F-L#7vu)7;uQ@=FunX6aOXb?)QoqMxlId+)a^^Kpw*OY@;?aw
zO4M<3=4td$Gb4w-?k3;*Ez!f?iP%(>kXaFxr1C`uC>2FR|KTT~y+NJ((J2KgL$3tu
z{|%CB=dY4`b&|N$NQ_LrIsra3)Dz7ewNPm$fj;x~h;UB_NnI<A#px|v_}@}uIV^%F
zjVIQ2txe^wbk~7S`3NuvH)8%V7qogZ6HfSa!I$iA#&6aFYVD_q-<~GH)Kw2)$MZfY
z$X!XttxiK%&v%?*+#6=|Co%k-)=B#Jj>Uf^<M5IS2m3XgNp5@;xA%Ar^XC38(pwP?
z0w*Ob^-|z0M90FW@V**niF~ri<}Xp6txn|<`pFYtDNN7HBwHNCaNvFcQ+F?#B+VYf
z%aA4%{d^L$Pkq4Fi?ev25gzxBVvVz+X=q`23Y#zV@&Cre<EIB(aA!k2x<p>arRPJi
z{m5LlcX}$mtt-ZLG70ST?uYm}TZ5iAxrPVQw$lqu`=}tl7iY#6Vt7V2`klRpPOc1!
zO!$cw8>+FZuabA@x=8>2cOH}emE!J-Gn6E@k<`Y6XuCEPJ1QUJq4r9=_i_rol$_14
z-_eii?qzf&-~t+7UdtZ-auWCLF2dY#r_lc3UoN&X9OKVShupjZJOedYV11Tf{ptY5
z#E-7$MGKxjaTt#udVznJ2m{~c#A`N<huvK}X?e~oT;@O7B=o&5f4nD)t#B;m)n^|e
z@xv$ixJ&mz=wvtUFSMm?$1mc?Pw(-1WgSjy%j6T^Xz|lTzN6m48|X273uVrg;R^FF
zC}3)6_Cksi=k(#`Plnj@=Qx%0y}_I6Y~~kVID|n9hUw?%7+^0;vCB7J#UBUe^F4dT
zsOs6XsCXof1a18(kT`mtSt<09l<HMMT9F^|krcwkqdZT6N(M-e6~MX1y`UMW1nFy>
zNW+>sus51YCW%|a%B#BM)7bZfk-Nc-*XjWOQ{H4{zX)v{sU=OD8$tYQCam{723je%
zN!Afz>^K_+t_2yOa8!dFNvnpchp!pUmH!}pTY;dfqn#+VT;^PNGR(B;LUiZq3`iOX
zBR7qOQTXj|(rbDNhBH5tn7qxPv|xY;Eq+wp=XMNMi%%rYDlOm~69ev3^x+7~;G6hF
z(!W)RxE{%_ePKkwu9M{=Ufw3dIoXWdwAYN5<S(wv#u0=Z#?YuU%|y3a8h_vU1agI|
zfGL{;6Q`a9pJO9%?s5dE?(+m8m8G>|3GSrP;WJa$16=R$FF~zpF<Dr1mZZ4`5JB`K
z;x#du^fo@PZPpwQeJVpBz5Njc@rJNo=L~0_;s|S=s*}@ku0&{Z1BA-9fI(sl>=Bow
z4ZF^eE(-}d>(C2Qc1w*;+f+_^6(@p5auKPV^q7b*B+Nc-SMq(e0{O8<23WrstTI(6
z!OSs%8TXypYSmI}p0tpL@!>Qg=`&ZU^O6|bDPZTFR?ai^B3AmJ;g*J#Q;oqqAYtC{
zt3F#`rS_J|{=5iW#pLNg#sq2{@R-qjY>9EPb#QfH1<aaoPEgbQw00osH#uN*mwW4z
z4O0&r(()`Fntd#cMD>p4%h&xHoi#FGed0GVL+?IWQ#Ovy=M8AmmtbyszAecwUQcFb
z1;e#9*09}VZ|%o~ohada6{79jx$Aqh$g_9fx!vDaaV7s2<Lu2b+#cU;xF~1{B0p=;
zkM;Ah$uNT~+~`7gPd-S08);(l#>d#A5sWWQ_JD%0D8?`HBcY!+5c|#2G|*uNPG9Q@
zKZj<J*wr)Wn;&Cn=FUQRSofLiuVU#Bb{*{PPABiSbrRoOS+v|ng*_`9hc9iHG1E%)
z+5T0QuvW?%7k+%nTwMx8==D@Vd-VkBkSxict=WnHmdWCg#}hDAT9dDze~~K~Cx8Hp
zdtk1oFL)v)4<Dtrfx^4<0?#f5=5=oYOuRG%CQ{L`y!jH)E&Y)2pqm=0egS1Hpp#{C
zVcox6T2dZ}5jqMO_a>Hk9kmT*=HD{r<4#f~MLjetTU7hEdof<Id_ui+XJ9~pJh%lq
zQK4y9xlOsZ84o8TI2pYaKmR((wNx1J*UGDCV$vTNu9rjS_rb93`~u>H92AD-K~!*X
z?ZbEv-e8?Jd~Xw?_V*CY5{KdGE)K=*DYL?A56*Yc!CjjE=$1MIO=mgb>hX_hOw?nT
zwnK#0Z8-_8U3KX9oFO~gu8|+T<M}^sd+7q5=UDjp0s)02VjZMKI=1HtZkL+i3ac9+
zJ}AZ?a^^^J(N5UVcNlD!??khi9H(dMgX<E~_+4&1Xvw=kvVJ*P8B<NU!}C}&M;bj|
zFUILjMl2Iq2o5t6sMwo+CTGQYB9y6GJFzGcgnjMV_bP65SE(KeEo~tQQ<k$SDgLzT
zvMFs{=Sx>9E787sI}A2DO}YX~$k=<E1p^(kY5vVrt^>SyQ`k$@A1tE@FP_kbN2XN3
z&Y&C04M?cjKAJP-3z0fMg%)foC2B)1^l9@<YF8l6DC}ECCI0p^DWi47dXE=->*Y^k
z=jlg0XU)ZL;)>94-G?rA4j>*+dPw(6Z=(3Yn3a3B0{)6mhGE``#)L&N3vUOb*^h1X
zPK_5GJI$XfP~OhIcuW`{`!@3Mhz;#*)1~jHkFPuT$A_NYRzrGOd3xJ_8Eu}Mc=bt&
z7Aj;UQ{`K>I61?SuHP5ScwbB5^-b=Aajsfj_lN?2elfw+=`M7~$4TtA`zMIlJ8{<P
zN-(wDA5SW#Od}DsDLAaDM0XtLiAzE@nSaHYW8~L!Tc7LGOPU%m&+ZwqDir26pVtMU
ztrbM)P$(50^k%w4tZ3@c6;hXSoJmmEr{?WT>3r8j?9kuMIsQx|%S`f^jW-*}C7X3%
z4(lMSm5@?)0_hIt$Xk+0vK*H|m%J%(llx%6*@&2|b*)|1JV=5j)svW!AWpMhjx#hD
zB{rV_;M~H|TABQj99i)Q;#ZAhnj1}ts_t@9Jk&(&TyK)1<xhxn$!X|neZl#^R3pzo
zl#Wz65O<qlBC2FVj3ZOvltM7s-`Wk$UM+BQ$rlojqO@K!6M`ak3gWMvBX&9>cscbf
zsdF)4bZ4C+L0{yU8wbM4@RwXp+N>RdswKJj$%e!x<s$f$4lzMzJ>dI7WvD7^hD#-{
zV5l+)l1A6rdr}?rRqLU}<pr7U83qmO|3cK%n`FIhA{hL8!}O*|q3gvOflyvtEi<!#
z)7brnXxg6@tZ(H>#?xrhx*{H2uVgSz=IzY=iaPk+G@Ueo3;5itgodz4PP$r@lxK*e
zNf%%H?q4GqJXyd{=^G#va;WBj-*pm}7((QX$1y^#e-}xlghSZ`2VyXA86p&Yxghxn
zf#I_^aOwF4u5Z&gdU;$4@s?~O9?LSBj_krwd_RS&v+M<fjawO^vS46Gl1ZtSJU27;
zBJB4chYGb{NZ!*1W@g<svVJ6=q^g&Z*yrJ-(Z7{jR&E8W2TyQA-;%&HZzVb8r%6CX
zh6;_fskKoPMIXbzOz>l2;!~l^v{uXolQj<@M%Eb`p4D^X7YdIKDesBT(S;yxZb(F5
zjgS{xTVQpg7sNeGBh4ZC47H3WNy>R7!uk%=8u$+6R!h*+&5L3B@G0_v?S^-sUJ`$5
z2old8GQ*}tjOyo9sG7c+417~#Qj5w+qmd(#3mRhjl|I0lNhw5hYdp93VkHO#wJ#Jp
zDbLw#aRE=k02v$_?YnU?@Xxi9e66g6&+EEKhj}45x8DNQ&Z)qJCxH9mJjhD^L*#l}
zK*+qm)<>m`=~&(mbzUOa@cjfQbYN#q-IjVLaWISw=>LIbF;|(SCFUgOYXtbzuY`dg
zvf#P<2eI37i4$6)!?>2sg4ab=AonGQ`F?wJKA0p+dY|M89zH)q>UNLgTD>2Uyw(IJ
zEQN>W@29vCSt+cyO@q3nAx!DEBGTX}gUw>~#Kd8xAZyqU?2Vtn^vYh6VN^`2gp<ia
zfeiX=-A{rK=0G}iX3(M+Cgs+0LglH|a=Aa5y7PCK#W9JDsz?enA6w5j3`7zkmND*#
z9wz0rDMU5?6%02A5R*P>flyru*uO7_OEU(D(mNv%l7C7JqF#V~t2j2!`pP-iye9=8
z3ZQg_Ke4%A4=?R6lF}9bNaMGDkQ40X@GOw1!Dcw^pUouCyh!TSsMf|O>A=1#^~50i
z7<YQ-MZ%r-fjng%sCN$0OIiuT&m_=k`!G!R@FZsW!qg+|Am}|VfZ<2dC{g(chA&%y
z&jd5Fzby>p4)`#Ah85hA=<B0rd!ourq6X|<+H*>!y<C#gcTQDG6M7#_=Sn;F6O-2G
zweca!aKQ8y1a%A>-#=IkCKj`6C#l{h=$i%QJ0gh*+H#ZDj76anf1%^|My_;kKeKgu
zBI)ay0J_(<kocvxWO3zNrpmF9X;xXu_&b@Bua=TDSoI+3^7%*%TIO+96~eSUIt&&M
zjDvvgQW&WcMgx}#j7RQw=vtP-<y;b_i({*yd1yDOyDuP`(@)nPDoljoi6un((;H9<
zzYIxQKR|2bG6Zb+0710}1=mbuXuS9<P<UWRhQia~ut+7T%i6}6Z23k4GRldlk0$x8
zFNr+`(kN%9&rOOx1VJ<QLeRwVwW24kgTLTC6fNb+yidi%eD@YGS$q=wqs~Iz{vWkL
zOH*K3=xi9$>mVNE8kyGUuMlM-N-}QNf#w-g5IUAn+t@V>ZY7Z<=%TM6$jGhMHE{++
z6n`Wp<MX)QKkb5H9d~XfBtX#pq}s8~L&Q#M8A+P_l~5-NP0i2A>hmTLz&(Typ`-e>
zyaa5w6Q-r@v7B>OEum*7lba`Bp_OVi()14~cPt;}sz1=fiAS*R-+r`c?L$e;LOgU@
zm;ajYjZ*pHxM9<4bZ^>;m-#DbG}1}8El9;v|23ohn_=Qp?~E1+Z^_d?pU@$3klgL~
zMlWI?UfS>({TF$o-A-pL8udqvM4!iX@kRJy`a@h`yo?`CAH=}&ViYyqS(_7ckEn_~
z#1~)PFt9NUtE^6<Xtpn{`5DQtwdlovNf+?b@=JI@`#UWUeSzoR>>kC773kgjnZC8C
z!@hZO<oqZ{0wS&G+=2Y%B|lJ6<^<aPy@|3u5x7X?ES5YAz(@BaScj7nc~>Dn?96Pz
zJ(pi#^@?G%zVn$Zd>={524%qSPzi0kbC3VE--0ijdWX=^6x6CLz|HTsk7`(_XtE%S
z*9mS!q2?02JXnTdRgKs^_Yn?62hiByM4V1v;#I##7-2CMDn6W`D|QL-QHQfoQ=^)i
z{JX&j9HKEZy#lY)?cptrqhN(-2Od9`gx$NOF)X`)DLir=zE;Q6#nMiw`)oIkwkv#>
zln8cR7ja6j9o`!whVR#`!=BJ0@ChD<isNB;^oSu`(wz$awlnbd0V6yal82=12JGu|
zV?8E2a{Fh7B6pR=*_CqWF#a33oSA|}DTgs7R|S8LGhsZ`-;%EE0Z^Oq08%&gfsm<I
zO_^5%e4Bg#75&!0SQR6@5Iz=5oW~I3a>{HdpG1u>n!<%PQB0X|7^CMd#a*!)ka$xM
z&n0)kr0#q&C+!sIXfMM_!%4I%ZxfEwtp;w(Yz)W?#{iS3pw`0DeTPor{7qt1WkC<g
ztr~qt*){BTl_mb=am31hE}A?@WYYVt3C<elalYkSP%M8A4Y*&<JQ-5M$^Prm;CdK2
z{wRm}7jdul<)Y14#&*Kb*?+)FkPd!#ci|G_0npjt0Wn7L*lU<T-47SimX!0PbK59K
zR4Bs3tG@!rcRgTqx|~b?;)v^oL~-BwLl`-Hwbo+uWn9>;fLdGs!}|_lRQy*p&J0P#
zWk-roF47yT6{<K#`RVxI*mIcqNgYdbZ1KiT6&%!>L645k`tKrDkTu>!9si8Q?Ay|`
z=U67%9yvg_#|mSc?*BE}WoYVu37>IM=&h)Q4r0Q%Kkz*EOrDMt{N1qQ`87QKHVx04
zorQ(5V^Du@Htw^YkJ1t=u{7=y&if%lH@3(zEx)efy-_T8F^j>R&wtTq_HHaSDM6Fv
zRY>*1sE(uDDE>=^4NX(=-3v9WON}OXE8I|Koh^Ev6UN^*voXIn7%j%%=a#>|h=Lr3
z7r!~eS&r)RY{vxRb}1UoM20{?qXyNyuS2w>0=^y+@Ztwg@GZ(|0!Kq0mxoQl_D(tU
z^I)+x^9tyO55SVmt1u^xALUm~@e+Dr$?;G4cv=W*?dbvYf5(}+-QohzsuXH)Yc)Od
zR1-t**kFeK0GYEx45ig=u&-SK*B{M;stbNN^2v=a^|S(<zZ`chvO=xSLpY~97{A;1
zLsWnhBY4mTrhbQE1#JZDV}Wol<SaaHpM&Ss|A5j<dwOx`7}gey!(p9BXv^4$X0l@J
zURMu%Y;}toHE*OFH>i!?2YeW~A7}KBz@P2wM*RVZ&EJkg<VQEuKPRAD)&}FPUKN~q
zDG&x&QBGOY8-E!o<Cz{0Sa*IQ*1Htak^VLGLs=k_r-9(MV;sgXLsaCD4mL>W;)4Z#
zsOT!nzj=8Qqh@=M13wZ_z&(XyCPC=k5R4x-xxxCRJvh_o4&2#40YlU~Aw5=_m0uEr
zDG&@(mOJC$yE*t;dn%id-$SnSm!qy%D6XCQ2P*e$q;@?#*l$0Ay&FckdRG?Lw5-O-
zEiyRO@I0C4e2NjQoC|AxpMlJ)aM+;bgEudh^3s~2VAsqt!bBMBKl~-C``&`On>znl
zaV!p&yyT7;)xh4Hv2bzTGH6<;1X7ul?7m}!ySMFQqIJs=pP6GAFNT6ut7y<$HGDKx
z0S|2Auz5~7KJREIX|^_q>{)zjE{1slGB`6o8u!oi<j<xZz)z2Cu&MDs+Tm@F+X||2
z%6Kyj`SFtIe>h9N-aC$Oe2eh+N@-mGXD?P;C(yl_#yDkEGv<jmER8*ZL*zCx4)>Ao
zK7hSF>%rIVJPvi|;Owb-SSNM}GnI0Q{h4f{FSHRq%&#MRcizQ#_lcA|4MF7>c^Iy(
zf>p}0<gc$PMy&q@t=u%!`xSvEzvoiBmPx2Npn?YD{LnNc80To*MHMM=+HpS*U)@;E
z*N0u_R`&jbWerBuTQ?ne>(eAspTWzwA7GN`Lb~5;9)D-dZk#N;2Lm`u%vkM@rr?fe
zLr#&;TIn=WArbH0o`UaxZ^t@=si-yTWBK3_Lbd7|aoH?HCogw8(z~5zZ!)HD6#wJx
zr)lFqxkYej??rGuDM6Em)$mqW5Z`Rr3aOWZP)h1H{gq>i<^fYN@0U6jtD4~xjzRh6
zGq_{oLVV7pqm9-eF`2&}O}`6aXI=#s(ry^ByMnLPk+)5iK=BL3_`TPMQ<<fP+wB-W
zG4&5L-Vspe4|{0n<a2l^Z3w($u3*75UsN>D07<SDwrXs_mVv3T^rk=6&Rl>uM|~ug
zqS7erTtsd}W#ORXWK3VY73Uv4jN^P&a9Xwnuj=cJtIYLjgmft~nj7(pdKgOFn}VmK
zLh(iVSpM~pH!i$>fE=8mgwwSy;+i@|cG|f@^zZY>D-+IQ@beg4;3<VtH#snT_nfr%
zjN|>M=MWRgJK&xv!<YRvM%6-9F6(GKm!_l4-rrt=!K^g@)ao4nT7MRso0N<APZy2Y
zWE{6P7L|QVVcFr&FzI$DR7T0*)2UZbx@-n&RL8QH{MMrW&nBqUNWppgD`Cz!G0c&Z
zA<DC&asBX1YI3HHIC#|)kJ-~PYHb$pxxbG48Zs6C-HgH_r7t*(zksH8!?+peVxD;c
z4r{gJln*5+BsGS0*Sv=LP2~uuEAex81y+22fqpqZai{zmO!K~sfw%u-vmL(Tfp`D0
z*7t8<>xyfL%TIw*;TNhfrytMP{>Gyp@8BU_3HI${7Ca0~a7#=vUS5%d=}RZ`Bil=H
zMY9qs__c<rW<F-rE8gSxBY$wC^G%#<AkKaXIYHj+)uWnu6a{n2h}DMy{8ivVizd9n
z8nrg8jJbsI?`!G4=~*b(=E|r{t-{`CU3lWrd(x)x9p&X3F(yHb4IOxmg5cM<#7LDo
zKW)RDoh|&iVMiwPeLqSbSwP;EyyIriDM7Z&0Z%Q}rPikXD3JexZ|o=GZy`szdE;1i
z?(t&ubZkQ#$3W_QvkbpZ9K$~S(u;wsCD~1d-*F2*$5#Sx+_6rIwOO2pGI5U~#P=Ys
zeNhBIBj)4a;<4<r!Dx^>-NpFLo{hHkC-8%H6#f3?F-HSM*jJ;zvFuGAw2XD5^@fS$
zib(=rk!(O;znV#fUii_+H&f}_k-L2M%1!iqY$XxdzlV-I-9TT?cBWFj)-1~S(|dD-
zNmgbSJ+Wa0v6^X%Z5C(AQ(s|v6h4h|qiR(8xFY#%GlMQ0K26(}AEL9A6lwM1Ei`p!
z8@!HDVrvqnk>K?@^mp7koFu*hgcXBnZICi0Y9e(0?`veWg(1H<-kpux7DFt_1ys5}
zg^mb~N0&+VlrH^20{7d{qb-W8yj2A$cQ1v6j0(bQ`B9Orq5P18J-L-(Mt^U0r(@sF
zqjqBPG;pmhIjC6*;xBh_!yZd;N%43(WBf|y`6hqrc55SlJz@{h$k~Z<b7!%O3pMxy
zzNRdE%|V_^p>a0J#PekuakM>5XE;q^{W85tTflDWyeE^C-ZP`BC(Nm!DwHi(jHOCr
zh3LVRdB8U`vLsrQ@9I;nlU;e3Mu?=+U3w)%q+FN(^;L%62Q$_*XB=%2nn1l`H{h+)
znq)?^G##9-N0RsaBNsPY5XH$ZMDln$cX(lA?Yxj>_~y=Jfo)9`^fuQ+{fF(4<2?uF
zc?(eQNk8d~i6U%!D38y(n4nL+e0SFuNSr#8PTg{xtr<!%PCw~SejL$Y{<>YLjV`-D
z%LFOlmL-96LlxMLQNIw?SO*EO8i<k67rdm<$UV!r3O1X&;bn4@;8JHNk=6cziD1rG
zrF6pqD8Ma3ek7`CDWCR!Daed!_(z}rN4w_{_VesFL{#n-ym_#Pm>hQ`Gv(&v#)@1r
zFS-x5EB0`IBmbkbZ{{)<ZGZ8Uurwd<@d+3D&BBGfji6ej%tps8VtZEzvry?qCEm?o
zx81a6eNP$G8MdFIq5EXnN#D+p4}XkcS=ThU^8EnYqWOS7mC#7=(&pNTDU1Ls6Cl*I
zQy}Pb<*St|$b<9{Vm4C5-amMgS33HIKk;}!eK1;&KQB?M>u7TZqaU%fD(5YnOPIqp
zx~Y-QO@+)7?I#$%U<)1Tyhy*U6{6Mj82`&kg<kc(KxUZbP;L2NWUKaRwEdYw=9fv*
zN3J1ss>CZ2Sn!D~oh-y#=0?#EQ#aFYeOY>O_yTofV{uS2nCfgg4^5f!?5a3rnm(nL
z8uoh9!htX3<jfIrT>TR{n6;5=@?vcE`a>*xu#JfHThg!lig9uE0Cd`3psERn=!vbz
z=xEcUFJ{XUyPc7|qE{7B$^C<?;zDV1l0KR??4pci<S6#KNG;P>@&1o4(*3)&N#*)f
zswf{r+n<{X_SM?cnJSZMa7ql-Ta-t&8`H>x@$&468_M*8uL}F|tO9$*ynse8>w(|P
zhq=MdOt5m5VUOHVV)t(qW&`Gapu_G_v`dA@K<SzE<#|i`cZ@fUdgIPION6pR+6B~n
zP9}9pRig`PBj~|Bm#K<%5U#v%jhq?NPExg>V}RGF!~1tN-OEix`>)nCCO4k{TP8wg
zt<1*7<D99E(G1Y4x=3#69-#LU7O^$SrBrsB67ME+huT>Nv)8AN;qzun^G5?^`DxKl
zsP35AG`#l*{pNm$Zu<D0c7`_7s3wl?t~$m?Cy(VLqZRl~bv?Y3bt}F1a3ialF@_Jw
zw;(^VB3Vt1V){;fEN_|6MpY83siAT%`=6XNf2_Qk_J>_%e;aZ9HydSMes?1kI`<q?
zQ^sPze+qm#9ihrnGW@h}qWqj}*}CG%;e0%PrANd%Q1$2tZHZjV?yywi|Hju)GY?sQ
zm*zw2J|>@X6($h5SeCbNFQ?~%yx920&$L40Bfa`=BHzAsBLCf!rw$L5SfzU@ti-Ch
z?7kdX_Unc&I(t<Wj$A(vT0-Ug%4c@$z<F8L<4Y`m|Cj__bt#ws)btKRk4x~H-zM@Y
zullK%%Ld%L%a6?uuc19d-{^w9WBByd^)$VwkX9KbVHOJUnUk*4<xzFC;6*AM<tAG<
zr$vQtI{u7qyF8Uoew9ahtK#^<#kXnSu2}xVr^|4mrk=`>D7?9{m9CLh;-`kcq<60C
z@)2>l)UR2bKeOUC6|R%!ul^@q_vZRF`rhCcz1tzp$3DM9tMpoENz+X_*Xjf9zTZie
zP!ew#exwg_68P@Dg|uvEH{9NHhs|?pq<1nK>GVba@f#DR_<77eV(V8yukIP5>x`$c
zVwX(VwH?3c?A6k|c}*_%-ns?q4sEo?<}021?lv8(FUOys&GBWn^J!-505uE{!LO~4
z=u7KJ*1S@ce;G}w*1RTKs+&&#=2uYX+y=NldKS7rzf!j1Cz>dA)B8>z=;P&ORNioO
zkH%8^V*7oxAscz&TM>BA&J5FvCHVb!zrg%6;kbWwEAgEEk8QuTinVTNq?^2dQRj?C
zzN$NjRUA+0uC#W#xIvM3`cX|ulo<7W_?ODmKcJUGm3d~3C_m3pj2C-5ssdfQM(u({
z`87i7{2#fEe3_4Oo$J)AyiRO4{pB{cPIg5k^eXnDShphozHK)zUMP<1o@LSfGwZnF
z1!oy<iVoQyya<|X<iY0xPn`0-U|HBnSlAW{!NOO_`ZEcju%{W!C-y-f$>2g)pMoaY
zDhN_cCJ|>w^$)d$;CbXNI1T8L)i2sPW}y)&t?LKP{dIz6Vnxj9SFcD;^A?hxs>?Lk
zZ-S=kUodj97`CiYBod$efcY{^oKIU3h4(xJ6hy(m8c$;PUWm~&m1TN{I3l|35yVt}
z0K2I&+^5*9M7714>;EMWk0uvFmQw|p_@V|h_cYZu{D}kAYJI5Fk{~(bYDV>%QH{7S
zTTnWwij=vmAbl?$b6N_8Wc(O4((qwEp^j@vZ=3?7S(VIm>1cqfWB^x?RRX<cmYlxV
zI1JcOMf$(jk^Y10!N6xdxpSxngd#_K$Vy3CH-Q1USu>dO;VVQzU5IuJk3Q@0i22kw
z2F=~R!hlXLC!R?mNbM-o@L?y3_f;p4MlQnQ#iBUr0>`b)ItQ-H?3pBk^PJX$Gw|Wf
zDRMpX4IK6Ag`dB^!r!V?uH?o9-c<0M%zNm^g-qEEn(e!okIf99+4hIL>p4Mm_ui{<
zx#$J%k*cG!&{Y2Vr}M;JTL@Gwo3O`4ju^ib;Xh8R25FyE_TdQuUViOG!V~22KP7EE
zv?qY1H4otSW)XZpX*^C}eStqTu84X5TZk-NW=wiIgmL1rT=*sX-8jwRDzRU23vM)7
z*G#I^rJAO`JhvhX)?C`gh_{IukH0PheO^6alN3kt8}AacF>(}I6Cr7l3%U7n3b82;
zV2rGP!=TALV(9gT*;+bG5>21NM<rzxxP%cV$^iQ3i_)JTw^2t^QB=E-f^m~ISnqcZ
zXfgR)?c$|#n9s0;-H|QIublgav3n|wH>0Y-{i`&zC2NwEuHsN@Z-r)&!{kcRN5+1|
zOb8mAK=K9?NQhn<{1eEb`MP$d`erge@Ysxv-1e4!ikiVDe)-C5Sty4|S;tA<dTk2f
zlklnA3gXiAkz`G{O=i7ZLo*MVFh8D7LCH5k%+Z63_<Lm>7|Gc&78T`;@HQcc9ree~
zNE*$3Ih|&fE{T9Os~>@1rzd>veMN6X1fas?b1?iI`I$kxadU;+s9*msH01@b>s&|Z
zHK#Q2OLV~dlU%UnRXVpJW+i90JOTt7gPeog3)HLM&ia%H89%Gr3B{MwYdo99u&b$p
zQ=Q3RM#XoC5g8A5U(+#eQ#$4JdE=?;W#IOxXX@UW6};&m1#Gf$#c|wTP~3WiO1L`U
zfc{|yuSUQEsad0#F^eAEJqFLce*?dZKXJ_DlfbQh24*uevDe`ey)n~)_4(*S9&6f-
z;>=UfcVCA(A6DekmEY1`lG^wodlJ7ndC@3l5}@o{c@*1N10J6LYMVVa;D`8Ir2B^m
z%Pl_*BdZ1kAA{w<y+EEUiFsP9dN~l6CLV+E5x|`5Cupi$KJK&n%G`Uu9A{ar196!v
z^p(^*&>5%3?*1c-L;jOkpK}4+x7;-_E?S25%$S6B`(!|L*AQrLh(QgprR1_&7u^5y
znt6BoI3Yq!blWc-V6^lQN;E+ye+Xo)$8+OfB(o0FzjCvuk3h7~RJLnxJ;=YG#WtUO
z!TFAPMMM%WVa01H64uua8<{<VlT00FGT{+wTYiJDSEOJ}O&_B&{v%mE9LBiy{o#H!
zkJiYz8lc_A_$}|XK<H);8B+8Ubf#9&&xJ1R_`xcebkGLwpK5`qPYiq4t_Z#=h!Byf
z8klD{gC8@&m9g0{8(Qv)0`vS4SU)?7{h|Z#ukwN*GD{2<v5oQVSp`vgS4l_5XJ*me
z$8@!?IB3V2!z#x_`Zn=46+BU7-OTf0L)8fB-`CDx*PBjfkDJYwW~MUU!*L+6T7(mi
zbPJrX%JV_#5g2Wh4^LOhppl;`v&|urjCuJO(i3v1%$7^AxxPSf?qd_b`|u>TKX8D$
z+m-U>vlYp^zxMp(3q2&*{Ups+5n;OJ4x#z#A7EoSljQf`fmF9S@L2m8Q@Cy-PMq}t
z&5m}1s=F-cf1HTL>SHj*dNfOM;~FR~r0~2+8l`8?M@U|S+btYX?&v&Xk<$#;JtYW6
z3$XF~ds_7(zs70z4IFY@gImUzLA0+fN@RBcmJ6dya)luKL=$&8x*bv;h4Io$Z^EPR
z|3Gc-3+DCj8m>U6i8!y@M6Pe1iH?D7;BKdg3AG%Z7F&Sx2MCk8mvT1SKZ1z)e6+e}
zf=_*e$h#aBRMZ40nI}QEmG~0N2LrWT&GT?`+F`1<`a1lbm%!{@afYad9tX9DNnGMI
z4f;U;In;bKz_d-8XxJ75GLD^e>$XU!a*xDQ&sX6a3nirSCA4Smc-ofagVj=owC<y$
zK&-$BADpfRvDrD)XWDUCY08ZH1WUQLZ41cMPqL`|Yz=UO>(QeqkmLy+B-aXN(P-gX
zV%FhD=WDjo!=oCy-*z*)QsFdncC0im$)3m^k1C>yMZ)Nm9l|*Ow+MHD7@c$UIgyF3
zC+pT7;YW6skYvda_~b2xniK8{mI^K8buK-CmYyLv(lQ49>IQ2=|N92FZcL-Xq6M7M
zEk`iD6pyWztKe4eDN>XpLcU#l#SY~?!qSm*L{ZKc4%p2lb`wttHcQmvj-|Iq-WW5e
zlQ*T6M{4nsc`DhIH^Th)Rs_9T;)%Vt71{mb2k?=xBt3IJ*h-0``n+mp^I@UUG)V{{
zXF4HVPlr$Ys)A+y&EPEG3|pVd@Yh54!L6s3kn*;gd|9!VyOJ--XGw1W4WU-@bnD~V
zei3)Lqjmy<G=rewyCppwl#XBCA1CR@mlC6UOW?f!FS;+(f{ZZ}&`0a;aUwh3V&05N
zILGfa=KoY?#)SNZhcH1<zxx(ZUz<hK_Y`8bni|fr&0vQ9c7l^t9<yiOE>dm$nx-8M
zhAU-pTwdG;epZABZREbt8FMD^52n6{)7#46S)~o=9k^So^hXG{JT|BAPCSDiy}9IM
z;|-J$guv~@*Mk3|E1<RBm+EyYqqoI&)Yo9K@O==RRpg-jr49!C`wR=M$KfOS&B)e<
zFlWN&ur<rt;E$pNuKZJuS0wGR0Y@`CGV}4)KBZAkcMD^1p#{#`5vU)$1xIbx;lg`4
ztk@HC{FoV!mM@OcGh;)!cMoFW{+wv^Eb<d*rR9_IC{-+)y@v6>`x&~rmtw#T6RNye
zhz(WEfGM9Q;O`@PXfGAXlpmW2iw7^jGqG}5G(8$)ytFawmIArUb~B0&<8XbnBmYh7
zAntWN1MU9>z>OoT;d5X)_>V22dHs6)e~waog6;_Po*##%#!|RNRi55&euQUse#MR1
z<EYwURkUAF1=`}%*vMDbbYs9ETo_q|FMW2SW9UZOb3Cl}N31Bb!!{U&N4)W0&O>HF
zDud2bf7Aam0N#d9#%Y2We5mq>obDEb-xhgbyL*^fmYcyVn5l3L-@lQmhj+oiV>y~P
zj6_Uj7TP~i<K8doVdQVkhS%>CjbAJu%>d=f;dZ);OR&g>-c36|{!$13XxUa&Jf@F#
z_T7W=aW`N~+6oNl9>smrC*X`jKS5Du0)$KTgNl_k``lEHcuDHl{qRpAf-PgQcl7@?
z@IDGt3ff`cf*>k1PXrvZZAbG>*$h4;qx@|g{48n**V`viXx){XVpTOX5fZ`tc~00l
zql>vNs>*+twWA+58^QfuZgkczNt`?ClXt1|!-4c)Fmr_?{ytz#|J`~I^FEvgclRlf
zVs3|z&;7?6oIZ-{OSdsM-CJSXIBRUSDui^2ZkS>^gME9+mhLulrv1!vym_P(_IRG<
zzu33Xpf?f3MZE!v7b^4bEOcr3pa(x9t%jw$_ri$vEqI%p1W`+rdCQ&;%nFT^+Ilqu
z?$wINpu6Wf+^8~v^x843rNO8l&PtZ#Y^cFsc5y)V#DT$we|(*Y7s#!zM`^Fm+*XUD
z)MT0frB9!u!-7@tCVv9%_fCP+|Ba^#d2hK1hEwRi8P`WM@M3gPc^UEjG?DD6wWJMO
zC5il1F`8i?N754xk^v!m`pQur4z*6D58~c(O@7A&ip$NYhCq+Ho{FV`t8%$$Ia6FU
znwc#wkO!|XmZbk`3So;vnA}ed@Y_>`bf_y*{RO|_twV8bxN8P!ud$|L0so`uOyjBg
z+Aw@Fha@D4QmKR@MRE4CmV^eB(!Xdfjha*{X+lC0DhVlbMJhz(>}PFh5~Wm1lO%;w
zDGHT(_xsh)2j}c_thMgvy05if-1`;4SLYD!$vh*xE#{g#B-Ym+m~F?y<pyktb2jdr
za+Rz%euRU)2a%IE1~c80!}*08BRSlkg$@r7pw-Q%C6AxZXPFO{u#u+L`~n+qY|Utr
z{k>+)mL|G@@nI?3{%;~l88cmW;_K{^2|pf_@A+bno|%)}KaG5)#uu#fIz$#9yG+>r
z;V>~_JnL~-iO!d*$RNIg`<m>`s_wMo;!C}Vjej)e{!?bpdp0nu_8g2_oXGw3Rc2on
zt%XvX@hm$NnDvT{Z1(=kU=mluE|>+P<f=9EKc&ika&oNMVKO*sbm7JF*Qh=724Jfa
zi!SnpJJ00FXU)s}*VLonb4rsj?LuS?YcU~PLRzYyaISXu*|)dn_`KoD{If!L7@Vk2
zUZi}4J40tOkIlMduf{#p9JU_yKg4tK$3;GOu`V(Bk;H2Z&LzLE41=l<S|me@liB-p
zaGSk0W7+qUSa(c^++HHj8dJ9txz<}H^PVSy<EjJ_IdwU1dR&b5BMpi7^bgS2G!u;G
z?+|XNFJ!&5it!ugNcK*df$NL<!t!7Za^E%zzw2m`h=);pKlR0EWqTE}9;=ecXNGV~
zN4YY+R~ndI?~4D@K67`v4uSoKX|OZy4d~pFBhN0JBzm2WWYt$AZm11qJNByxKA6fB
z2VR18>o{WG^OEhJdW0|aFlW7%#t@}OZ>$<3A!Ge^VC}XTb|WVqoYRK!qd)rK{qqU<
zY1>~&Is66g76!3t``kfs+XHUv>)CK|O(@q?c#l0(Ok;*`3;5_kgXmMwQrWxzsI14m
z3IY#KM0uGq>pcD*{<QXngVws-hmU4N|E~>p1oUENozZaiqcYj{y$_kK5&@eHZIIYJ
z=3ibmhI-qnn7?NrNz5KWW~o)ehLi@_tn5Pip7djCCmqB+x(3;JVKDix@GU8tb{>8X
zb%9paz<U?x%BDw|!G*^P#O9wP!efqj>w1#T?;IPTDbBrPXLFlsE`kkJVBT>aWI_K~
zP^M-@KCNDlo!i2=C1=vvX$xKQ;;Y!VT|bw+%~->aPdE?tQ?7D)SR`B7vW?YO+K7D8
zNX+~=hPeFKm&6wCWMk5kxeZU4G_iIv8Foy87>Qjx+jlA?YL_=z?VAeu(y>H-)kM;z
zDRQ5VGg;AOFEW48e*Q#o15A|V!@$`xeE#wThWj5S<*iAw`3p2jVV4m;%y<aj3w0Pv
zUQRA%zX4g{UwHiG3~*~t@|_idFeG&@X{?z@t}wCNI#HbkrOhE-5Aq;t$A0b}871zC
zLuJ$MjwjoTrlZEbJ*>O95m&uC$9Ebmgi#ml`MBwRIJ=(#(>!j-^3~<=kN*Vr^^zmo
za6`(f9TR1$Ca-Wxsy=J>4@Z0RzHFq|Up%<pl&S4)<JN|Sqs#yI>&oGH^otv&caLVN
zhgxB0)nW2`bT&rs$;aJ#&bVaZ30!A9fUOA_z(zK`!+(W@{S(*HabCfo*58ONT;NPP
z^n)OLnlm#xV9%N@3b+kvL)b3!JJ47SWSRXtyyG>TSe@Ap{=reiV=iTD#tnh_!X@s;
zy&K^AWfjM}|AoMj=HmLj4B9`Q;{1P1X1fhlpe9O#)eKZ)e<E{bHjfz`*&RVHMb9Vy
z=DCQ;pEYwm^#gY)_hoBL-r&%}2cRu~iyJC_=FgOPlPRmGlfs-J7(DreH0PNmw(dT`
zPYB3hz3N5_ZO+Cp{De84IyxV7t=>u3saddDJ$Jaz+QY&2<X2hfO^)Ph)iEPY8!|*|
zG0_~X18tjwShMy9e%VPK_Icz;zRc|ktM^WodHeKZT30xs&EYxroK*zd=5Nrhkw+|b
ze#7>|=is1z12kzoh0s1pWM%KUNZzl&XZ6nT;KpMpe4z|l)d?a`@Ryey{tt6s`w3Ir
zCGb3L1F?{c6xY?w7}LP9@Ku?(_Um?;t4T6$9k5o~7WtPo|5|{XTVBZyx3mdqk-kLz
zTrjNcxFciYKHZo5$h9fmz=X*Q;ONvs&LOdlFETL@QgkP?znku%;SVKJ5|Gbb=v6D6
zGxLM!%c)p1Cjy^N*bG|VPr{k_gP;-n6BnDU2D#`^Hm>~$wre`!!bwkH%HaF3s@(!D
zHhdL`R~cEn$&Qo_oeK*`e#9m9r9zyF4=qT&CM54QAU{Wqg}MRv$d<%fF}p5epC)F(
zjMqNURCNYof4s()z3GC-x)^e{hC=JDi?H760^e!90;@yBUP8b!nCQ9$w`NX<F%#7U
zlU}L9nhj6L+INFUx7P<O*c(kHBi71F`e_I)Q<cbS{bH`_;9w*pOSyLu3u#CB1mavR
z@)TotfLpv8T{0pJV$RJb)_u>Bs&64=#;(^)Z`To`Xpuq`E(Cz<)lcx_(O7caKMrC~
zP9)zR^(KQa48!%$Ldb%~K4i>^c(QBA2;uP3<Mc=OUNW&Y8octigUgN}5*1y_4)=Ku
zXCg`=b6^^&xR}Q*$7&Jg=|h&^&cpbd-Eg8(jeOfXh184O_s14BqH|q?$inUlANGzT
zUt627xl@_s=O^O}kFzi{RE>P{av-HcS_JnU#|anP2M^m8!)eXea5E)|u2TyJgIP<+
z2!(j?NInFg8l>cYLmV*-olaw7b`ck;A?pb}L&jX-spa7UYEI&WpD|-eTmBOmb~;y>
zKJS;b{7yT&c3FX>)t>~*&?GW?!z|M0=K|sGFo9jXr$P=u06BX21WEBTCW-g^k`ua0
z)bLRZ*goz}4%R;qYHL;t{_k>xKC8m%(9Py#@+NoT^~WDDEA0l9MV%w7hXx4qI$Kby
z<S_AUN#YdtJCahfRpfh;H92XPNs5jX!@IG;r0Ij9m_t5><0Y5y;p@TV>j+aa+sl;G
z*gS*mi(LW<=~?VYP$YOSvL|swhrYiaLNa{xiS7V<a__Z^;I~16jB#-%mH$~2m1VO@
ziLdA=`Rz^odi5d0zc~=y9mD9X@{OdA+%*oDIFqw)0>NR@7kD`)6)Zov!5qs)q}9}l
zOv<zq&&&z<AgB)Fdm?ztf3LVtjTP)p=W5cha0PLEp9z~JBiW<U<wPNB18msR7tH4`
z=Dt`sgSBxpw1&Eq?tL%dlJyW0@7R~vR!osTNm@vvUuKht83uUz$S~5c_zaut(nul>
z+-C>RR}*!$a`wAvF}ancORv=q6|PP?#dge*l3wdwU{G=yH~K*k>mQs&?$u1T+gDry
z-*>x_%ljHQBb6j*UJ*_zbTXl=`ye#9l#zKEtt86!7-;KT2}iU-$n5p~gdLG1an9TV
zMs)Sa@dRg<Wn2!qxeCN-$!wN<av2nP`QX{Ac>K5E9_M&Om;a~XfT7#_5VC8yUG|Vh
zs4hCj?HiJa)jOW@XRcd;=e-@6-n5TdO?5(@B{kUCevzog{(#V^z4W_XAXM(WKp!g_
zW7PUY$Y0n6%jfjR@k;%OwlI{X<c9L@DSaSjX<zzje>!Q4`2z8`GD=$8tKrv`G5B%o
zQ1IvpMwy)0^|4mNptc?GyCi~9ybCRB|3c>NHre>4k!;}ZiP+|y2-kEfQCakxRKAJD
zwv4CTn7Lc9F@6@Fd%m88%}RiM9Zk@G{y;LLj~}0KYBK-LX*p5T97T4FdIAnVPmoVO
zNw~5lm!v9rvlqVtnbSsR@Jm_+ind4iy4^Bn2;X4Ca<Nx5kH<eN0sI3DNm$@K@_a)l
zyvtT5tv*up9lIB_CF|ix>NwK7E(1?Gs1ZS8fN5jB=*AF5Qa$`U?w3Y0@7ptB&kQ3P
zGoBNfRDBY7L5FM+9Uzg(X>ckjn={T#XOCp1V5QK5J*fp)p6|`u4|~OVZCnUj9jZ`%
zy2$=&_7glmm&u&|C=;d0Vb~Tk53Uje((^T)xqRG-<*5p=EYX4X-82d`)n||gxo1$-
zDT~gXhNQ(k1f%VvVX{g-b|eJC(LcdtxbqP#{ZfgUz09~1(|L6Bw}DBf+4y{9HE1f$
z2Wxj5^gN`>S4!8ws!~njG&@2lO&vs=JP*U-)+bzfN^cS}?gY6X9?Z|I)Fy44-{6sj
z=g6tr-;mNd7FON)Dr;F9ha-1x!l{c|WZf0lrKKO5A?eIk(28t^HBC36_~QWD=95gm
zRjwl^<{!kSp08xUv@9??bdJz-+gOQ1G+ESpBcX5+<7{?fuMykqw(ij-eO(X1{38NJ
z?=B~)p9GTLUdQ*`uZG`4%}`o_T<I8Zd}nAP_>WU3&lYBKjl+}$(*r52dcz?>EA=Ha
zE;whmPwto?>lcdG!oAsz$=6|89}}TzWhN><d;>p5-7l$W(W4nN#Tlm8IBHiMEV!#t
zs_!(Lj+3d;_4y-ctzio>EgeI@H#*R?)0#B=urY1At}nWj5(UMkadcrEp?>2O1ciy_
zbi&Fvq-&rFHIO~Rkd@Zp8m&&}XO5+Ve@SVn*BIJ<-%If5H;R@u^rzFUhX`-$HJO6S
zbZUHT7TxsSQP2?`URKGI>BqiWbZEIHRobFQHv}!D!Pj@wx~IdaW!(kgr~g!{q!dC-
z^~Tc7G-s-OONSoVQN*&+XVL$&RBB?5qq5$BzTZ88UJCG_R?}RlZ0cxwCw>ThZaa-$
zHJ>0%^L-}l*HjZmze*9ht+lA7!&YHhl^k>K?oHh$#gjRGwh1|2Y0Po1q97ODgpczU
z31q-5+V`d%eQxGR^}kl&;dyTagY{FX-Z5>eFhi3T9Mz|rrn=IJwt>Qw_3qSvjXJgF
zA_SIiDKy@$U{B?ph1w|tsrpPx(^~eU$C`J-_VhV)cvLr&Je*w22lb<)JOb&bVg6vW
z^%L4Wa$=K5B?$ek^kBKX8=q<=WpO!L%>LmIQrB{wzm%nnBYtW@+>Ti=XRRD>6*HBX
znhilkF_V3^Ww-G2(@^e}<tG*@3&w!Rcl_T!=SbyzGiE1dHt#2#<>Z@!p<<r`{RB5K
z<%j}HIdK-4&QxK}>`iE1!LhTZKiROMQZ$WoV6z*2A$@)gzd5ZOKYu#SW{Ul8wE_E~
zuy!irX1Kz?d-Lec?0&3zXfR*w@RNTxZ6ICy)Km169E0~x-8iOT1I~)}pc|5mxU$T-
z=y}e8rOFRwwXga!Lnihdo*oo(0_=rJK9|_#jk@f8iZ*jn)&kqiNa`te(*j;NNhhYA
z5nc_xF1#u|CLEp>%qFUgVe@}i<A%s0+}gdC`gA%7+cyUD*G{Kn0CRz7;ota@)3ex}
z-->Lq&oWdy>W*W=>Tp8NWWjt;AR+1ULffFla3Z>#l`Sd6m1`CV8d{F9@5MD?(x6#T
zQ)|Qid*VcuPRh{~r%z;M`){)LEKosSmA>xMp-l!lbXD^QGEdQxidiaM>eY*yM7||&
zw|^$f?6jy&=?F6DrUosZDL(U3Wvt71ND4D-iEF`KLOT=rZA#|C`%+!n@4{WO)K!bV
zih4^rVqW5J@|zUKloFZYX>wxne8DAGg}Q1r6RU^8Lj1#@&^NO`jSe14&kcA+?rQar
z)dO_s=i9uH*T<0N_V;1ui$~Kx&us;RXF62jkp`X6BcbYXH-%~G!)fi|3?R22kff2L
zXxNEfFyXQx&Dz*bu1*?EEw}T;zke0kCOZb4p0*HX{TKBocCe(*{`8bO3T~1M?2Sjf
z@LGw|iRRClj>!Sxuv-PO>~AKxD7XkpA}6wEt%Pc3NGQA5hQ~q|k@zQl==*u!$lcyA
ziN?flB>43sqWj}I_WV|&{ZsqU$#<Tzokv2+wJG&NZw(W=bEg8`Gg%@eFMf<%YYUD)
z`i9KCJDD0xlw-mS1*-U4p62dU<kubi0)f9*pu)ewLhl55vMlW{guHvkt!VO)-7vWW
z=C`Kfqcclj_@Q{*Fkv$ElTX7-%3s(@hXlyBd%|g7HlUt<x+L_eD$cERMIt($eBm95
z8T1|GFC~kem(7@aWE^ft89@2|i!tzwJc-zRhio4Dn!V<O@TkRUe)_ZrkTh=@-mWUb
zjJzYb@$GMT+FR@-P3cXH6%J#ZFT>5wDzr$i6K({^v0+vxWfe()$0OSL!JpM>b&fo#
z4d{ZyuNfp5<YHP*2J2`aMppNF%1gZyc(;A#{Ep%oa3kp&*;m#9xt1?k#_}8F(ZDk3
zsh5&-U!9rR+H|6EdjfmfXFADpPZeR51@y_bfr4_?Wr#V@1yY4g__=>5t}jp$46Gwr
zck&PDnl^@8y5<P%T3QFI9vKqs8c0hYsFLW&lUT0xnpG5>WS(2jvBjtTSkz4&*@pue
zg5!tB*z4ppqI|so8`4Gyt--H(z52c+wYnMj;swODE1a}k7!5Y}J^0!4x0A<7^2A3x
zUoSZ}ms_Jz$a$-8CZ&gZaZ6S&CmT2IBq7h2l0FL#3Hz1%NedSTa<ix&9W!_%nQAbd
zzSQ)ByxT=^u<)#q?KP6PDkg(@i8gT!+e6~i9T?x14dw3#fVI+UcE$ZFnK&zmoH;rP
z6334KhX)ae9%-N-GM&uJiRK3G^#*f~9MNYriRkPDqHk~y6<SJRt&fs0Aan@v$(u;Z
zYIc!`2jhelmnM*ehfO$lsVAiLUPp+%oRD=}fy|Dmfziw6a2MiAIlEL-a$P4L^|!W>
zy`#nn-p58T@0mAYLdRv1`5Z<6$%2Hnm9cDpRyue1i76?miN_!Jn@QI&Z+@YL9nrSh
zOf33G67PK}Ou48CT6Ya3@>+X{P17iD-pg5JvF|wIZ&iRpI!wr~LPMf|FbVHyIg+QH
zD}<$8gV^Ns!P2}wr6^gNQetW|pEj5^K(hWR;p%RC{O`a_GA`pLF>9U1wi)hYO2Zx4
z(mes}{6u+NoPMA8TC2!LOzexJgTk3~Ocs|I-hl2$D`l)H03CHtOLKCrV(i*&Z1h`Q
zR_m%y%|3MU%WpIj->Y$)EItSJytfsS&)Z^9?NI!y_yo7*D6za=>TvNvEH_%rM52g{
z4DK8+sEK|QeUBRcN6rKilQx}Sua|?jY8|;vtFGaRfirP|%29T1qC1(Iwv(k>d<HKg
zZ&s9<MgDAXW_f4t@*x?cSk(IaaAwzAG__VD&H>g~a2HY6>J|GOG?cCV^cfwz?O55Q
z8#wElA3OKv3Ou}&K~BtADfF&?!oSo$hWEAB6C*1FN<PddhfSh!(auGDx!h=?`sNQC
zyJ`^hRc{8e$eg9SPsGN*%lYd1JQx+VikYS~;>eoSti(7CZ#&Gug^$I4O-CoW@-UA7
zRJwvy^qDG1-?V_Mg+Kb6gh5Kn2YjO1!tB<j;|9HBWbgD*Y-k5!xvLt1e@8ik-kn5R
zGZRxtF0?k76Dut}VR6)I-lt@WEZA){%v?H^91XF7FE`TR-kD_RRsRgkq?1UR(m>*C
zcZp>cQ49!FA*CA4%yQanxW4ic8}U@^w>oLEGiRNpF#&O~W!Fvc>Z#%u_~pR1ZHI`R
z=1NR|Bqh4bFOlpoB`nXjH{pC9!O?&z=s4y(mMH^-|D*7F<0d$`atMj_yvCM|J3~JH
zGa*7{3~7(6l;xgQrJ;WZlB#*8CC|Svz@bA-1fLJ0+o)>`s>~LV2Hy{`Qym!0G9b&_
zw20brH7LGb4RQS_v3z+0UN&D~*A}!8hrl(EaI!Bs94m5V!BU~dbw;Vj?$dN*z-Z8T
z8BLble87zJLj_4W&mZ)ckiB|}#9Zl&G;Oc5n35_BDfj!6DW*@j|9|do>aXM$W9c=n
zCy(+`&$!<2zsVHiR|uP$^yu8tsmvf|q~P4LPt4A>N>`hYhN;UX^y|b<m?hs=$jH(G
zxsP9f-9N-y(@hXNXX4ozH&MOlKD#QILd(9TQnkL8aG^Q}AE)*uudl@MQ~Vyny5-%x
zwzfY16K6y4ARjL9+6Ty4br>2uM$j332Eo8bO7y?XPkffdKrlAggdOeSxYe)$CfTZx
z8E><3)OS<%UrIY1%&vx@fPr-P$f;Oa+8?~`6~n@V^EuD)J76bgj<PK=xFFgD{4F%d
z7I{N*rPhhe_8tuS-)8b*vnsiHI$<#L<p^TJhvIVQr|@`m1v|0DfRqM`T^C;)P}g?B
zW1Gjb!kiskx4}7-uh4-rrW~(7<p=Mm7bca<Jx|VG7)oq^FX6u^UgoxZF{F#599aKQ
zEn)TI?QC0WB`Az|0lb$PEbg_29Bxd68U1&|d$&3`f29(AW6to0N87@z&~NyDUxchM
zeGoVL#B&Hw>y5@1|FPeHAEQP51wLWSJ2uKeMwTxR;uo#`Vt0M#HPCs_@eRX&NZ-64
z%SIZX<9`)u;DvjKU_td%DDsO&8~PZ&F1-VeMz`RGMGyDfDjast&qO)D0(k3~2g>>N
zplMNn`Kv|Vqpve=YSbo?ZJ%M^x&UrYS27;^e1uokTgR-t)riV@M^-(&kgNS#z%&D9
za3@AZ@)sUW!?~{O@Q}k_EdG6)tC)6xyZ>t<By2rRHqV(w8XPP5>ZWIyJ|Kc9?2tfE
zp6HQzW{;8QZ1J~M8Xp{YhG+$Pi@d!&F+Qos?-*i);Wsu&*GOt{gd~~|Ii-iBPYrLi
zsSOV0cVmM47IX{Vjn-l7VAMu;J2kbJpxAX8f-=%@mxm#qSn?nF{;wK*UF^~3-b(T^
zayFc_b|Z<7FG0I376WDvBqwJ1lAmr@;OKIDw&=!pe(U4_GUU=Dv^YwoExYgX$CT%h
z;E97+&DkZ`Eh0FNY^MVKQ_9^v@q#aZx`$jIdPi2HItgdyY$4;M1yB}I344b2=NuaH
zxB-`a*$VeNV0liQFInHl-5njw;j9&=n;MeZB}cGJd`7DOnd9vQ1>$HCj0K`wMrU^t
zq?}*RwI27yjq~F$XUHOS|FVHEzgY~qwO6>dxT&1Pj_{W3I;Mne<Tlxrfy0V!B<>qg
zEBh$^S#JUw1-I~~ngNy{S7r6KxA5#AasHc)@S<xQDr8kaps@v{Og7|FD({HvxDshg
zZbbXN_hgn!SMiGznlbmMKc6GABqr_k7@wVvj<$)sMR$qVh5sl!|E_~uw(uL)28)dM
z##>^KPQnfhQeeZn<;iXR4Y(}%7S}j%B}ht5ic)(1^4S{a!Dz=tss7I=7~<2#YmzZg
zyiA6BHICwhs*Aj?<NyX7ONNJ_!NpECN4X(W@XYtcsI%=Kj-~NXJVNv~s-55svgbi;
zzdn$>AQ*?$9)hlZ@}$S}1a^;+u(tD`aGB`eQOVkc5+65dOw2sK)U+SyCO^aG1uMik
zZ8N-j>kBG%-k@nyfRgg1CGl%Epr+F(?p5A&l;pg|@Q=Uw4u#ENF0!3%bpyEttAAqY
z?6+KE^eYVitIi@-GU4ZgFbu31&zC(bK;IpzB<+Sgv0nNQeS?%p<e<CoDo+KRk2zt)
zyD*6Osm20d{KopxRs7R=*;sW?LYxgwLzhM+r~0A@;&z6k-iu1mA1h&{;Zd?hQbiIk
zcMQgLoIuS%id^8B{UBLm#Q&*PW4Z@Z@%;89yp=%#vX)55J$RR!oO}vzivJfvwL?Mj
z{Jk`Cl^sqvIuvAHN{o;EgLSP4%a>oq@?mEn{G1m^3YYQv^Lmpf1sSkXbbXh8wuYy#
zPs5ydifrxja&CstboAE_;~tK0<y0!g4tv*XH0ZepIWMzNFZC|Ece(LDwyCiCBh#VP
zZal=NUF0vVItu27zj2u8EYEFi!vfzsU|*FiE!}iY_T0E0W8_qb>nU|I=U@WH%)P-a
zQ?`SS#<5`9Enq`uD(=~72rI|e^Iu-R0HH^X)vr_JZr$!cUsE-#zk3?&Gg74S-CCgd
z_Xqw`EyF)*!%)*^Gq<hr1YUaGTRac#<R%U2McnuN<J!U~v?=W4Vs3L>Y(PF3y}2o_
zU^k?rdLH4;KC3XGbp|$C*Pv#~ChjRk)U1Ai<uyT2<#QJXhNv^L?I;ZXVu87}BRR`n
z_i^N|b8s)QgI|`>%}q;igRa`?aDGcVFX^3A;+TCME&S^-WaC|NjgICEW^G5S&+*7#
z7N4PiEm~?F;dnuuKW_aAUA>jzypbj-%zF%@<9_14qAK)?NI=huIU;|hf;X>?1V`=l
zkQ;r5n-K66rd6NdT0cbcE64tUNfS?S(@s@j$Kd1Q`&<QEOLC!NsUo>$5r_dj)iCO`
z9E&yWkB>b2Fkk2Q{4~8+oYB8TkkoyKWzrxntm7RPdq$#0WgX<p_hC0*I>0N(-XNJG
zm08^#j*=UNoQYEp7ZDu_N*m*#d}B2K@WePiCHtJT&-;gXaZ5Pbt&N68(RHBsehU9|
z?+`ZmYaO}_?a!`c)v&^JZ>;IqgbAM0p?hi&{CL^`^V;3Hif&K%6h43*T0dS~Q_7_O
z#ns?l8#$J;=?T1V{|0YoZNn)uKfxj0@BF*TfkY}3X9B5Bxah|%E~ZhH{fnFlchzp8
z|7y|ix^FKm`LTwpRartN+`G!ZcsoMsJK+rI?O6jY;|1V(Tb!z%i{m5)P}%v1OO3E3
z$24NVIBy9t+Hnr1Tm0s4MqlEV?@L36;t}9yu7~4C1*6R?dA3OFFoxy~WwDEng2eJN
zuK6|r?2|6z+wN;n=p6%ZjTHF@vSe98L=KsDF9`OG$_MVOCtEb0a*h`bSx|uvsql&=
zhdd_2qN@YRo{a;^`|xlQ^`IJT3dYJzn<wJDAJ6#V${ld$y@cI8^B4o`O^|!r%H_=!
zJy_s|MJGRV@s*MA^i>QLj$A>89a)WrN4DY4wrVKboXXaO1jDtICSGpJ0rblF2jlTG
zez+3EeWzJueXT0(dQ(sQ#2jGSgA$zo;R6ZnkWiz{`{amw3$b4~mMq_=N53R$&^@Dv
z(@4X2<c)3<`TKV~yPl~@GhY}`$ok3lH4mk2g*MbN{x&fPTF%ekbQx$t1vz`+Be6A7
zqiWK@RPBT_O&<4`*f)J426B!xurNoc8mK}yx#-f0yK7l(d@y`l*^ge98qof??h<W-
zE^<iIhUPwNCoAQ2Y5DOexagrxzi%%jGki5^&X#V{Y4w@}HN0o<e`?c!G5J_}PM<b-
z452NqS^V|oy{JJ&UwUA!GEJWQgACKtr|+vy^L-v%BJ!d7%mqi{LYp@v(^~8mo&#au
z;DM~8SeeT5R<X>ddzjb4oJktrW7+W-U)FW@Gr9YXC%yK6B7+B5ku1@3CCrh~GCei=
zSTdB(%$HEb1YKJCq@4{Of0rDeqDT*~>tRY)zku_Zb?82{53@X5PGArx7%eU5rYk-s
zVeZu=_ErQ5EZrfKT)t1v|Mwa^?uqwy#7-h|ccr3#F@P}rqr|}dC9FPoiA>-23$R1X
z0=^p(=Zr<f$D<b+zkNM<v(AL{%N4niAxR|AtB6=06WtbW{=~3pu+Vt!G;vII5uQ1n
zmBwn{g0+zm<eB*^_;2MvBJ`bsNpeTX-UKOeZ{Ntay9B_?R9j;9Ab@Ce^<f|UIymE)
z5@=J6Bv-Z<!_cH0G9X!=`MroIk53&VS4VZ>ygTo~asM!~LnDKT2`PCvCWxfxH9%_K
z08%^B8+wN&6RYw#vhlP%+CN@OhCdxfqPF=E5;lko+x8UR%q)hU&r_tu!P?~FJWclP
zQxd7!-hf&IN0DB)!pNbkmT)-z7q(ATA^(1ho&70K;9+GYG_)Ng&Vf$k+vAI5-cMsP
z)?aiTyH!Bg^?~9H*^ngMJwi60KSOHQtRiVQ3(4;aj*L0#!Vmt|j)N=rGTSdUWTg1J
zr+r>BjhX?l-YkKu4%tf9m^u&-vp#H>_kPmdFABalctO^$gS<yqKDS{1K%wEOHEBFr
z3I2`pWcr*@L{Cvi=&lNuGPRj#`r<fLzCXg}eovR3-!~66mIlGZ%zU`!_6uf7-b3pr
zPo%~3A>sK5+6?Yw+m8fW-|uO#FU$=t*=O<2qqo75q2tK%JLaUIG=kS%z8x;86+xn+
zD~_Gf%GK0fgaj`qzD_v^R!n^hgH!tP=il10cvVBNn8JZ2uLP|&dU#J5f<K2$<;Q)w
z1`wr-$GU#v{58EmsklFksfdEtZ#_uAPy=Dw)eXdUmjFX_MiTc0Vcey@I^^KAVti)#
z5!($WVfmP2;3P9ABTkDP%_e`A>dv5IT?h*c8^kAk%*Ep$=fe4)m(b5U9;R1`45;2H
za^Z3`ysbRV{H;#|8=`~_2TagyuQ@hGjevrugGv4v1H9V0m!tZdpnYx)i!}Cuk?Y0_
z!ISz5F>Vqvqb-uR-D{1fRU-I9`THOvB#a~#DzG2u0M&dV4Byv48qDqxyWaxwh@1qc
zs&(o6v2TcW+dYDQw~4y^K)U#i*r)UVL};oUeXjL|IIqy4QSHY`-+nbj!|De)GC2tL
z&8{RDeUFf5>s!dG%4nLYFEHEJMZ~WW$)43})FP>clxZoG#Y=vYX`|HWS0eH%4kp4W
zrb$m)|0Dy3iXFBRHKPC4fYzHS((qFfYOUQwQci}_e-q7w$wgh{yys}<*`81Ke$WF|
z(}DDg&wXOS|0L{L6I;7f%#hEyf#rX-WWi`%D*0YZ(ro)vm%06DO7Ci7eqNbw`czMj
zx{P3jO*@1EdtY#d$we&a-~bvl+lpNp(nVsPnNZ8&S77h!^X%Tg!F2f4Y_h#{0vSFz
zoK2qBPV!hbxe}{GhtEqV`G;ar<ytp6_A!g-7u_Lh-7kom!!@Ei*^}G<u$`RQF@R2R
zcnI=sAy}R^0AnM=$g6R6WP@EPS#ZHaG@w3WN46=`xLYB-rPD0FR{UEn7sPTVv9BTC
zQvpgBIq;2R7GPJ&Z1CLo31?=vW1UQmHJBtq?X^5kuk0bzlYE?3=L~a8<%z<q?NB<Y
z8cKUVlp4)$g!$<~SReeK=-paqSDY1!1)nHxTB$+;m8>AnXAq8ku^o@H1hkA)=Zy^S
zadNxvfsyWancsh(AgS~rnnW8y(ukwb8gC3n=}jEB;sU>?<Q&>NbwJ?Dc3Gp697yI6
znPsRh$6X8J)4rsEujMyrdvAjh-linC@EE6%63Ndf5$Esad0?e_7dmZfP_nGIH1_Iv
ze8@TB!@R3#_f(0Q`z?pny*07it%u8Vmt$>inOuzGN{AWM4Mt`OGPAo)n7i`^q<E?l
zIpY`bvt<(gJhcE*(mrwe-%CW7);yTl)0-KHthdsNo8WFC?(P0N&)eTs<(pTiVe^{R
z{6o8m+~BF_@q5HMC>{0@0v+B;qweg-troek%(@SGRn!gV>oT~4E$z^Mu`-J{?E;-v
zT?|Qz!LIS@OtnRox%ZFf{3K=Qr>O}k@0W3Qb*ZAi=?w4tZ8pDsW+uiCZ-bAM9$}pJ
zFpPVB0{-kafz_M(;JLq-MX&ZC-u{^v*Zf+Y-Rw>R`)%TD*}fA$tWCv&7uR7$=4;HT
zas$cKMA?!3(YV|;5Ch}faYuwIDL#EroaY+DlPeymbV>B|fB1=Qj{A7eJ9BWwj+byq
zRl>sE_kn(xgnjS-55t>hAW=L7b!rFT$ypx0_t}owD>Ttsy&LBSuErQ+@ilwj;w$1!
zQQ!R_1~#`yC55JZhEWn&`RC%?<^3R~rx9KaoB~VAo?wdiKj}qn1s4D5CKr?5i+sqb
z#<FqEs26O8F_Zc*<CG7$==V=3J!#J`bF0CAlO!zgtQnWG^Crfu8_0?0H_>FF!J8W{
zfLK!jBY$s&@Z-J7m%0K_8r6;K-m0<>&l1qxvl}FTZ}S_jgu~XSv0(DP7prxe2cDy@
zh}n~aY~$K`EGxKxUqTvCq3JuH9jZ*0$$rDD>~7w^QBjs(#zUah3SRPEMaFseAg9vJ
zkM7rl5g+RzNy!Zq9`u9rdgrm-yc(D3uH(%<-ozJ!E`a@5hULAy_`qA4b`!d~`SW?=
z_sOaQmyN09mZhpf%4h|UB;J7^YAS3&)gk!HL{F-L5!j3U@zNVS*EQxFN|toW^eUTC
z(?f+Ty_CbHm|Vg76W=jqS|!Z<twP$y8S_y|D&lP6ALl>$Ag9n%#j7`*#S@}SPW|2r
zI2$z%eJgahJ>|bavY~_TSo0NTip-&7WjV&bw1A{a8Q6~>Ahq9P2jTfDq{DwQ26jrg
zW`z&%drAmCHvNear`|zs<w`z$*&HZ1-3>86P5GpNB5a<j11iDZ;{7ql>`c5rayI>M
zKx}CtZkQ)#SUR0tww~zOOWMGn?>NRy`*)vvxNrg&qg^a^{zPWT<P1)F*A5+Wj=1%B
zE~=c-!4&_UobL-wJ|WCas90!?MYU$EQ=tJj=j{?avPZLyoLClN;mdBj>ap`LZn43C
zJekMZjcnc>@%>4BkJbq%G4=f{mY24R4YM&~iWz;Wc8xVN`7LAThj-w|+1;2P?=NUr
z9b|9&d$LjcT~YgO2rE{<gBwdi_!C;2SpRGV8Xm33wdT!aPtz7Mg`jh!bwmf>X0;u?
z1j>T!7BGkVC$VDWHL`d6ETK1@%?_KHu<>T?*bn>BY~NmN^0Q-X(9USqF>fC!{P!OV
z_Y1;*RkK;vo-Dj(cANdFTg)!K`i&n037g=XiOp^2u#ZDG|0Z=2`P=t_Fn?$Xxg^=m
z`rVc%dXlNU$_s09(@UB9WR521<@`w2tU6}n&`U__I7Q;+4>9HYS8$BJ4x=HH=$V76
z!ri7R?7v@z%t|_!)hx;53l`5~6Zc!uyTOOqxvK4~YJ!ar;9$nw`kAp80r#17ffW;o
z=+bcc0=0Aeh|!T&e&kVG`s@}%rCqPlJm3XaxbP%>boT`9E1yEm|J<U1xe0XW(QwMw
z9i!SO4^hK+TWGaK2yKmtq_?_5smZ=5x+E}=7T%6x9f_xD#m*#pVWA$Xw;ZSEUGnJb
ztdlfD&52go<_kLdCutZTMn}v&M#qhfq#4#>Xxh4$?o*4RNB@nb_Y3+7Wf4*IS7QR*
zxMHZVPg4dxxrww{K9-s$Cey*02k8jQ!!&=H8BN)JgjOp=3VUwF(?NBK>_tl=Wup&?
zefHyYghWDzx+POhv%C1yDuQNjI7!RXuM^kDr|HN2p>%a=1eGQR(34sxXw|eM!tLJ4
zWOwgVLXvl+&`U3t-q$(L!afCnR=1(xQCLprz1kt{fBTpW(so6Q<2P9G$6Z3zv3>O5
zlUN%4?G$}1&Y|Tx?~>k!j?u6uDKt9l43(@6p${Gg)2^xzGUak0y(T?HpYs|-|M6mC
z;rfa$Q7>V)by8{G#!kV$brYZX;{-i+?ErNhca^-kau5eUiKh(@$FP5+2GEQrj&zfb
zE}fSDnuIE7(;3~O&*tM)I?s9>Rmn0Y1Ez};z%vu*=~sQJ_km%Q+dhsO{+x~v8>CeC
z#VlI=!I+<5;7nVz<_M>)b?8513wonZj&R7vgw8fsq}(DyI@_r)ZTGh#11=Axt3t-n
zm$&l>x5Y>Ru%dhZy3q8IiR^a!8{FG5j)ql_rmyE2(=YSwY2OFK>GH4?a`io>WfKjB
zLf4se(z86`@@))#>S{n2M>x}{HN%Lt;UKzf_&1sQJ`<`wW)5x2oW_#U2UD#!Ju2y|
zLH9P9Q-^D!|6rTBpt9i<xi+#)&{&2nM$Uk)8$Jd~uZ1zm6(2!c>j=GEsv?BG?@#y6
zu_v-j58?H*GNw1zfx5_#qHhAE^heGnYHU4)*_;_jXEdwQ24X~)SW_CldKis8=1g@u
zG--a~Wa{l^B3vDIgUoX3L)|=YvVoR{G@>+$rEuHf`jwIN<b;v*;DMjaKYtNE>zqq7
z(*?F=<`y=3uo=sCl4rjPvM@Pp9Tvb6=BY4_d$emNW-keZPLq}FQ1*Sis<wu6qW!pN
z+YKy!STdQwOt7B&Ba1f}Njw`~<GPIhK>xxnmOAw!Y%_4eMUV1PHN=(stJ{e;yzcSt
zM^e}+ugQFHhXXp+>5y&J(?l1l=sESV#cAzFh>8A2XkL_I*JQ6=GQ#}>T1ywO%$qOy
zpo8yOPGB@n`C7_eUOIp#dE-gXj0r5z*O-mbti*ker(j{(M_GqwC1w|G##>PpDDic}
z-ir;G<y#m2>WD-fmT%2GPWH!=Um8OCS_=qQ`OcPRSYzv-Mw}(OMg}~-z>O$SFTGpQ
zNPZvq1pZwQh^@~-@K*W-uD7R?nmx~CyVo(iel1xxO?xBB@AM;y%9VC<*Q)WoMG<$%
zZX&DJx`eZ~OX%p!T^Q7I8!z<<70S5%Y;<)c_;gY5d3jm7*Cj-FcTtN|n%ydNzd(fW
z`)om*KEiZO+@UJZwtt<(dV7dj>VNfk_x2mq^VDNocG|Q2b1uwZ_5{n8DDpkJ&dg@a
zWOm?q3JypntWjjpCJnG7;dKjHo=+e?j$Q?en@q4UF$n9@G}*Os1Dq57oa`67{CRJ!
zL>{7=|E8<Nif_$8=@nb%xnG;#y*q>0wQnHOMTW3Jb2uAubR<by(+|EMGDIz<A#Bm@
zp{(`OBzD|#AZUgzCtA+$r4|PLWlmX&>~8W<YNjW{U~gNt{@YwIeJ0*L;jx=r`=fzt
z^*oM~x9Bo$&}Cz9T|f+Zi{DDKOM0p=N-NDXrAD`uSy5*U)3~O<59(57YX3TMr(Zhm
zFPOs2lXO_ypA<4$8cy<VE3+T|54c{-lgaqOpFmyiIeyRlh1pUkma)%`*pFKPk)o`B
z$(9D(FPX_QE{HDwkFKm$@FW|y3}ku{x4Eqe@`7!QhESMdFKix?#Z(td*z$H4p+svV
z{<~wta;~=FmFb*dE;Gg?iz7UjvzWEiMzSCM4zll4s&MUWeX``W7Q1UOp2<(lVLc@Y
zta!+7VxMz@nKX`NUapZMZ_|i9%dOe)$mN)OsttEp$}!`0N6Fp^AuJ^L1+mjhV};eH
zSnzvAa>d7#U6U2Cg<5qOCwd(ReX=J@=QQG2Sr)tWN8}=vZOHv1BR2T&d}gMbNFI$Z
z!k5j9*>yJ;Rv4AWYLvCvh<_HuAy~$qxTT4Eg)2m*^B_y_Gl^}OBV~)TqnK%YDBF}R
zWln#6*o0X-LDTCjd(bzGsod-)8g5~%m?kpt^<!tptFmhT0Mmb^MAwa&3ypJblAy?L
zC`z2lCO*7QEZ9hx5*-E(!*bYGjZ(1qHHJ3!s8j8tdxSnQAsr&GlCe@nWVfQ(ivFv~
zzt<hm{LPJJ8;af-eg|t*OJIi08`=Fks;o@GkR2MI$fnH`d&-)s!k46pbnA@4WW-4e
z=3cmpwk4f|iV9V7@wYzvZ?OVdp`%QKZ_N^&Q_ZkRbozQ;WtgoZ!>+(4P#2w3f2=p5
z>7ko2t^6Mr%q`-b=M`d#&L7?<{2dB>2*xc*MMo1`_&e?<tTnCVS9cA^zpry(`pFRN
zaEyh9gC&qCal;q4<VehRJub!L0od=oBWvE<1o~OZ<i*1mSifoz*LCFv7k@Vu_AJN-
zgKg0`;ompdKlTUCGH*iSG7ta6roqk6{orEdA@sWxf`Qq+NKOI=^~Z9h_3I_vw0(ix
zo|-qPBknKxp1trhECQM}@1Xsbmr~<j6TmVip8Ml&1dh8VVnNPBP%+U$BTG;5Tq%>y
zeDVRUmZ*}rKq;n7{RL}_FF}=d70ieh@4z*-<u09@jeZN{$lOn-k%_aP@U1@}``tAh
zJmEC>Sw-+^u?O&v@j|S+^8}vI4^XyVLgMR+xa<Wd(0!B0yL~@^lcIiriFlVs<fupd
z)xa|_L;Veu%Hm+clPP%m+cC7i^Fd04%b@XK4!Wz%;5;(&nN$0527@lMvg9p{Hv0;p
z>yw!2k^{`N+L>+Aeak{~=ag=$vta?<PuR0(SJ}Q9H`o{MEpts&pi0Vn*ufuJtb1{}
zz+cT_dpgfk1vMWQ?eGJph(5TUvHRJ%iXzsbvY5%$yl3s>hYGIwhuCSc3x7sQ>^5vV
zO`iK5W1rW|V<7{!6NiOSC<*gtQL`&pMEWYWI3$YoI&W0EbdR;r_x1&r)T@zXO*z2w
zuViCpX&GDO_KwMQEM&v=9ZSFOe#1Twtj5B3j~SV<l?~ln!&?2WGR44X)>Bl(if97s
z3CLu9yWZfY1J~i{?OxRGn~Y4!sbm^suhX>(@xt|rMAB7rQTThHis<Aqn*OC&m=w`O
zj=GnS&2#J6ZjW@<_f`@c=;i>HZgs*qrRQw%>JMzG_C0nb{wO<?m%@~mZ5CQs1v~b6
zK5HD7O~VFSm1+(iOwIh%1l1fTX0N9te0eCxh1rf2HVv|8Kla25CRWQK<!L!vB(iOK
zI@5sNTZ0PrJ&^k~3<BTlOJ50LP;A-)Ihj+T>v0SC8TCN@s%UA9g(6A$wG9;hUFQ21
z^`O7|57auWPGUDYLD{d<m{K(un)lXl9aAr1{m)?JM{a}}Tkn99v>a3hMR5GT!I0BO
zk+r#;<>ubM36=pHc$40)z?o};aoaPPafS!YKP$MTsmEbi&H>p`*<JBMXo25uSAr9_
zUO<g)ad_<dI}CixNpB84%0GPh57a9a$@i4mD8wnSd%Gj~xuRde{jvZJD=uT;`w3Ft
z<rbX8q{enmfE;<~V#awcng}U5hor?3FL{Rvtys2QLYB?zlG%|pXi~BQZtF|P`Oq)e
zWO@MeAJ;*KWhewsZ2-vyPR5l#h5GhWoRR!xX|9nPIKRsRy>fNPS#kot$o~`_+siQU
z>vLIaw;~S9I1g<hhB$7}XVeTH!fC8kCX-C>Kx^@MoICd_7=L;X%L<Qhb8mitN%Bqj
z;woUsJJGT3GYdRz6xc}L9jL7J6W3Yi@R}FrFm>h+FCUL*t7G24%1P7N>(?u>^3))h
zm~6nbV+M1jZkhOCmOQhJw`W-;O6&#9!xs<K*tWIPSsDJo?WN7KOUD;-!!|pzmLx+Q
zq^QD5b>wBe<Y%xoo+j+Po;lc7%99bDwdm?BNF_cQ+`}jN{Pl(2{M9S{Iln>IdG!<4
zaEZ#Z8|mtdQ+mcf^5_d1yblgL&Ow(_G2ZfjY1ia4oh=%=8SpovqmmoyMf7HDf)`t)
z(gR1&eZUL5Zpf4$U6N)lFkt%2joCH9m<(T_%pNlFp3_zvm^nZb9u-HTF^l9cmAIf+
zAdmlu3|=YNz{4fiA$d+OT;VZ}c&#+Y4ar^*qqDKZue};Oza+r&$TR4S)@*m%R#=_>
zms8a3;T66~$W*o(l5Y*b_fymHXqP<m3Q}YSYd^r-iv~<q*N%F3zu=Qd1$OAACo4`_
z19QKPz#&cvh<RqD>U1pj#3V9@z9+?V?l~w-IU?TS(}@!n=&_AHI;>^+NYt_2$TsCh
zu^{=;Y;oHbwpULMhXilr4HOTtT-&{DS-Bh8S}~B_DiPq)g0-yPLmiIfG|N7G7{Z>W
z?qVKdW^Gv^_TRr3@jANN?9Ke0csuDkcc@gCjM+YfJ$&{CC38|aonO63=yfw3?y{9V
zQJ#R_TgTz&$*Z{Rn-4^<QyQ))5q(PE<Jpw*TG;*59sgK`v7_Tgv5ANCQ2$UIvs~@R
zNn18EFNXsxV#*+#RGYxAG>&1a&7;`ro>*q-a}TTR#k&N)i+AX#H$b@54}36~y|Xib
zb54t3$4@U@5j%*<``M7x8W*zrEl2K1(@E<d36YKoWz+xkWrZ>KQN|^LDfa@3Zd$Oj
zqc36k5<50|&qB6;?Qjfwx&^<!Fl0>+_cN=<$=Lg^4ZD{R3wvshVv_r8X73gUrQ`ZY
z6;40pPRe;QrzROD#Z5*1u_xJ*|CCu*_*tBb$zcD*K$@~J6Oz8ELqJXh6hyZ|M#31F
z)c-nYKHbj6-$@3);4`S-bq=>m#qaN|f&8mGGSG9n2yF-I(6jO`Jav&HiM7G7Or{UM
zk4JMlKK3AqO13o(m4VNuleqA+gq_d&#80d9;sZoCO8ug5u%M|oDV-fFyO-Y=+1Os}
zjIx}_%Jsq0Ew{Kuz0>*9L6c;E@E<mRzQ7F|S^;xzNYF$*mUnTf$7LREywUCl(unu%
zc&`W0_t7aX#aM|o5842pv%28kUI5qgI^ul;PW;pVvf->h;!M?-V6RbuIal<-xceWJ
z8uY~vZ{9*qgM>Ya71_nTg%F&qfk_*mLV3+YzHMoLn6`H*jEg#fjobHQ;JP>*e7!$D
z*^>kQKgL4Zz~d0#ubB4@o(55d3(?gt3}dAL^%1YRY1>buqiPGUdHFZo+wd3n^~!^?
z(YL^)>McZlIu0R!IMCJYL4(|Cp1f2>KBf%+HavrsBd=jf&I74rMyc$hraEzQ{e_a=
zzoo?woY2)mfmrHYfMxV2-&Xh?^v4&YME;{x?}wOw>${3SKLSg74G=q>p}ge39T@Hs
z3S-Ck;?^~h@Qcd@|N3?Oz3q=7d~^i(J{^Mh;xE9Dxq)aHejMdWtg)`YI;lM|1FEu4
zL)+d4czEU+pR?E(dQMkktC$O4`nv{w-yGxJ&;Nj9b&ufc*;HuWx)g4{u!RD@T5!J-
ziESSW`T8@9p#D~a^rnTFZ}+>$xu;y?Iy~|qY<nj@@cN1d+795#4}^E#otUBX3!;{l
zgZ-|<I7~wc)i#Bp@33sH6i0IsC+(6)xg}8AaT&a&C((abDyLt2A2ge<V54g<DE;zE
z*4VNXK5V%T@s*D-c8@BUTtxi+xg6csW^p?0;jp8xGD-RJNVek88(3CX%Fmp34d<$*
zfI`b+OnY+<@9jzCXUshWlRkIiG8J=Px2zY*z0e6Y?uF=jQ-#D<+~QS7JcqT%nxNkQ
z3D`A@_o75860O^6thHn<C@s&$M!Qzr^YA<>e4T@m{rkAMjd#IaV>Oq1U%Xd3NCD@S
zDB`kcDR}g`iOW)kb4L6Ba5}Ss_~zq(VM(4UiMsX(+oo{b9<NL&{gn^yA6}qQ{|DIo
zq=x_UdoRWh{~tx?9S_v|#c_K}NhK5s5sI?Pea=-$3N1ycG>C?@qrMU%BQna0P!W=%
zRK|VIm1HChDU_sADMDJ(`rY6EUS8MB9p^dcbKY+cPO48}HC&ucKjto>s@e7=;Pf4m
zSFxEB>6IgOX^l*Ahb$^AHlY%>wdC)y4RmN_HW^4eLTr2!E411lGDDiv=nLH{>X`AE
zGYW~PZo%hCWr1A9IV>SLwTX1u8^PDvmP-zqD`NQ{HENTyiahidayDwRv|_&u_HC>r
zA&>Hi+j;}qTHs6{l1-%bn;2wf$z!{)yKOq*9<{sph-NvjpaC1M3;)v#bYOE5{rdD6
zIek(TL}uwS4=q%<%AsoOl+^dkf!X4)_H#WEIlh?&#blLdx2z^AUy3<*feT|YQiB+k
z94A4;14Jc6j-1d;CXQo;?8UOZMARsV`t_ZsTX-cB;&z{|m9Y@c13^M24z2Gm6~|q<
zY205CRl4QfRuXM4L#zxsI4zreqR{V2Y^7x3p2Q$M75<m(Q~X2nE{l-e6Ow7P&sMt4
zM}Zy+NFa4JS<J-2&(!l*G$+8+$Q5aMjJO!e+@8};MM}<9*iX30Of!B!RBg|4YvVrA
zf6~9HNya#eKUJyC^$gNCu903|B}MO6o*)m?w-ANgXWY>12Taz)_Z5A!1@3lt3aK>w
zLb}d1a!XTxk}g_F+ULs>$Czqjm#u&@k;k|VZTlHHBT+0pHI76~>St8<wJ`RB!Y=XD
zK&G>FG>0DfBxtQog@pA<+N&EyD(kG7=$oOW%SXu7&E~nPcXh%UL>!ZX<Zz(XglHv(
zG1=y)iQ}k#67i^vS@rxJ8LqlX<b+xG$KJ*C#BPR)%n=nButH|+?q+hM>jas5u$Vsm
zeTP2W0}Nxgi!Pl0liExduBX0}k@R~>WgSPcxKWWUQ2Ymns{g|F89DIs$4pi}ZzOBb
zKZ7k@F;3`-PlNdnCE12QW7*FzgY|WkW2+5Ev!~uGRnB_V2LpFTumvKTbh?<HP-{z8
z(nJw<gH<p2EPsv0^!$WckCCh@W6l;H(qQi-OTx;<Ggw)tPmo*rg;*t6@E^m+us?%-
z!^_h{LTyk?A2cemg%MJ0<UUKb_FX^3&DXWDv-!$ieyq=aT`+}rb<kz|oW@r6Ysm@x
zyD{tvFBNv+dml`EX3aj$x<ztco3Q8K&0?ju%wp$_QDYw@YO$$Cs;u^o@vOU*JeylF
z3qKxs!8@E;$itS={NwQ&?9xS}`3<kb$>ecK{PQtu`RT{U@ROXhY$_LjW0!23&u903
z$Eo-Gq5k#^)>_Pv9op@L!M{27{KJ{->0f{0so+}WQHOP`n#PXP5wQ`~l4czjj$&`L
zFX111IrF2(E<tBkN*$vm*^AklHd!5x%+*|fJ~r$R_$v?d{&ThHcSU0sPdz7<(j&Qu
zIY2aI1on{ADeI@BPm{F~pUGk~o_uapz{~d@5VxhJB;s^E+2A>gnXwn?sZBr>>hwuo
z>m{N!<UvC8QpwP9!3&eT#o8vXtztkUkgDhjyPlZJXghQfRpmx5Bq*Pra*pOKxKgrj
zc?-?3&L(SJCCQGsVruZ{75zI~6i3K4(90Jdk#pq<q;1-38ZpLKc!sahKS9B?^mYXk
zWO;~rGW?Z3>^ev~?n%=vkvhb#^c30BfaKKkRGMDZLxMC4xmLqZq-oqIYO#0}4JsJR
zDBLojADelaa5$Uj+nymo)o&^e4=2%m?Z>#l-2do+MG@z?tCtg5Kb{<(_k+sB9H#Gu
z{Oa471X|8jQ<1ZendQDoRAyKTRdW={uFEn^cV;|!J*AZ16&yFs-}=dunPIejPA>KT
zHOM8rmx4V7ab)e<E^5~JQSc(MM9sLGUK=lqSu+&5n!aW_?^O_8>ZwgiRPWG{<8`>8
zx4)THXJ2!E0TS?hFTnhxT40dd%FKD-0FQQWByBg35Lxp;=y%jYU$1bI@;He+at$Jn
zXStFpSC(j`c%tXmaM(U-H0bRXcCVC;U{F*6msWTJH&+C|7N-fj9wUL5TmzW`^E3ZO
zwsk`IXj-7V4#FBkVNXXdCob?64i_nt%BdkRQ>mJpntFkH4elZ%{Y4?wSzsw$+YD@?
zJL9{yo!-xu1E~mCeB3*R)BR(KPp&>9XJRIT%LKvQ+850&HoeR2nO{wdv<AujrRj|F
zvOId?;yMVhQ-r5}w&jWUh3v=O0<z}E#|r=Ovs@JuPaZz+q33>;Fx_rR74gR%g`JF0
z##_@Ks}@g36^ol>ZI2vQo=fLk?n$BY!K1M2%?WbgLmc_{^)|7%lutS{j)JIg9(EMC
zZ_gG>LXD^gT=jTFd&1X1)>?mfT$e?6cuIqYxHIZ(c{QtKnk}Tv4<jNiGP6sj#bTbv
zelT^&FK_J_MLVny2s^p^q5Wbc-toysrF4NO2sva+*-LCu*^de-p|~~h4VDP`k`>#p
z;pORtsPyg&#*E9sZ5g}qVb5tIf4v6ZC)~x1pI!n(?;YOL7)7m3v(f*{RI<j&K;VXT
z;7OlU;b#&s`JKRoQJ)OSCLeLbseJtJOEjoz#e!Nvp|I8|%5Qj;1$q*pBs=FlTD6y;
z30z0{X|GW_Y$Q9Y`7ub4vl!GH4{T)}>X1*cMd)`vo1BbyCnjJ%JDGL<=8F$al1b35
zFBm#rU<Wr?uw#}c;mnCAao*}Dc%m^HQ|&)s&CQ9hxvvSX{u&3*cio1RfK#|ew+6~1
z4cT4aXF-PP7RY*vpgnT}TX<OouR1-0$7^51Sf8I*{ImqsB(l-7b2PiHOO8zX9FO_8
zPodKV5k4a{0Kb@K;Zr?%HpML+?^*rA)c%un@32rKJv<HM%{OpS8p5Gv=P*cLj(qdG
z2)l|a@MOC@ThiRX{Rj&}$7f$Es+NAD{~1W2{FWhl%eIqPNPi$k_s&oQ&nc7}dxi51
zpG^vzMXU{`KcRljwxn3t9bG81!dm?BK`uP#Fd6<C$C+s=(GaCra@jkX@o`Qes^fN%
zAG#fcS|^acrLrKo$HDs0k{6`SE`SdCrP2<eraLD2(0W5WB`0<Wu0P#O+95HPxVd)_
zR2!y2{vE=e#wMzhe2T7DOQoBKW;54y>X}4QJ<_c<f!?1YMeY?D5UJ|l%-E5i$YE7E
z*r|Mo1}#kCTy(w?kK8LXx<LxA?v}&$QIF{F3s>nw`jJjFcui|<^vNpUGICVyIt^@_
zO%A8DQuE?t)*PWk4+zy%Fe7nEmxz34H!WM9PXk?!(N}Y7E3(%0FnQ%-AQ3c?xY!*l
ze>WgaQ+I_hYjXDqGnIw(;=vxqwCEgBAJ!B6v_5q0y-UPiU?KTR@2AxRtEh?g6Y|pT
z5A$H!OVX+IfeA9ZO`V1F+(fX$4VuR*7T8A+n@U$}3(qEEBX*ot%1Oh3*9n59LN;3I
zmyo3hBwMV;Qdlg4k!yusMBp9zWUY|L?(ZWu?aRo+ofV9h%TGo_(Us=CF(<k^6cEQe
zB}Oa86RQGo*#7e(6;Y72zWU!CS~OOXm@N^wy)Gpr;)>wpOA+c}!x>~PI+O1QGwH)0
zcNyO^dbIWTHAYL@j7tj3BpbX_tb?=-xbz@xQZVg2jXLn2&K)C%KZ3$&v$nt$mrJEn
zmmi|7+xp3+wqDYe`hvXd{6d%WPw8wA1r)n11uMG4N#68h)THSrQF-}?R80$}Sx?RB
zcC%<|vhfa$sGUN${CdfFj;rJbq(q_n=nY0>#1$^?dnMCZ*hVXT4sp$f;zCVf#pyj8
zM`t^Q(u|90^kQ%y%?n6p?i$vTpdX)@4_)u*1<g}r=$<^)TDh0pJsV2?zLP*B;Vjv@
zJdR8G9!v(NNHUpo<+0LqigmT}KicW?iP69OjT_0|p;>iPxIv>}QW+yb|EMLPl*J8v
zwIK`l#~#L)S(ccq9E116FQV~{1DH50cvG8FFn6pz)^9$EDN-kK(Op?|@l`|ZQ?+RB
zn}N^m!|2DEQW&-V1bp5)8V^jaqDj3@G|fE}TcVC*<`Nk!l|7D%b-nbxFq;^>n1VZ#
z6|v~Fv+&-yhFO^w=uue+_19u)jJ&|^ES-Xt|9F&VD6T#-Om79|)4Tg`pqB8R;S|<k
z+4&x@w7Q99ukT>`Ne<oX&%*dOzWCg+hQ|7p;y7(dR8*2-U)?o8n>SJTd~6hEqzB{I
z_#FIKGMZKUJ%jygS^{@OszF%K#c7f@7^))7d6+X+-WM)YU71O+F5oO}*3$%Xb1@wH
zz6hi%ywKzINqn+57?l$52s_=yc;e|rJe1>tgXs?V?S~dly_$?66OUqQkUqYzsls4-
z3?ze9(ErS7yp~xF?G~dk?msJR+}}*=hgY#`qs?(uW&w6Bm8C72=dtywEEvR;&_UTJ
z6*hZE!F<?Ap9h)2D>WbMnx~sd%9f+tvBoE)SmiQ3=cNV1LeF2pP@3Mc{KRZJE`uuz
zjNxEr8TU=xi%|}YV747mBySgsgNSx288bKsXE)_C36n0<MYF4@VQm2MzbAu1QWMES
z%NxQo-$Lf^+J(<Xtf!&HHAH3Q2kWd!V<<GIkq6eT)_XfG;8*Bcnln8U&kp|}4~$k)
z<z8`2nY5jL)|-m{WwlK7<^`1dmP;?sW5Drk2E8hBj%vLY;bs-Y(l@4JOxm;6RB=Zk
zbN`t)xAVV1dSS&i?(l+P&e^J^;(Ayy{b3woZSc#4DdI+;tcyHo7HtQQiv6JEu#=Rg
zoZ*7wL}-QFO59{exlucdNp$OIdT)9nb+%ne(z}EnNOKf9V_U*(EH>n1Q-X=J;9z*=
zD8meGUB!tN3$;wH0jIHgG?S(w3*~A*$=C0vn2bal93Yy^y7O1KiE81bcau9FGuNhr
zlOky31`)_qoq&&*+hg2-1?qo4O_xu0!)X->l*`#jJH+JinUy%<#(Gng(V{ro<qUV2
z8Kzcy2@P=j$gGl|fZ>xZanYDe@=Vj7s=JtzZ`;PuyVC7sNt^;admx61wSm+=F@-Ms
z@I`RI9->i|zE*`j&&rp^UMHBo2sGRr(D}!liX!JWQe-@Zi?ytr^<K1ze(-(F*hO!n
z@O>oJat!6Xv?6HQqs`FtXR)=xk9_9lx3&0dtu$Wj>7mov3hr;45^)p0Sp{29(JOPC
zxGzz2DD6_m-U=h@qQ0l}-QEl)wOX5Y_*+qpVJCW^+Ksc$3L|D4%jv!k(@Bia0AU7F
zNWqc+XA)(shxL@eM*S?a_ls0{yk-p1QdYxAx*2aio=Ieuy(Nn`jt7mz4(@5}TdK2Y
z35NDGGtZJHk)*d8bfj?`<9lE###X3Nr2#QWHb^CGphiVuiVJq`TSh#Uc2W1b6}0Ka
zdgk@ixpYuMicXJ;qI#98Bzw#w@cUE<H*>wgWPcBnqMi#Ui4%-j?g`?#2@qzo9E&bJ
z0`W~*pcYU9w?=iqjQXo!W#Nx0Yh$6<H5N4ckI}Ebb-=it!r-i3aEePKty2>4^Zb|a
zVfh|->G}X7B?}?1d<>YEEC3Q444Na32;4YBn$5(3{Ox4;dF}x&wNT*dk95Godxbzp
zdV}nv`B0v)9VfkU6IiUBAQrZTF}>FTKkht%f18A!vgB6yWm^yRd#=#n(}&^UJmH)2
z^D-O}7!jV0vD}_B!gC{g7p}UUgk1SuprZH~#9}6pf7)GWxU&g0&+oz{*9K6SJ`)ef
zJ5o!>9fJF`7L@DvW3*%~h_s!<hS1&ARj~@^gr0>Zqffvrxq6Ve?G8<mhPZhDYS_8s
zEleC^1MOGuK)qcK3?66^GS%-PIWZX|6?fz4W;v+P9Knn3|A}9&w!_rhDO5qXfLPCL
z#F7nnK&y|^6Z4*v-pmDHc60*Kk2=8h)u$0#(*iowlFoV63A5XpT(Z<(@E!+0;WPx7
zaLS8#VxoJJiQDhV9q<n!atCDT(o^$={NGKw@ly@$oG^mZld7SmSIZcqzW@7frL0>=
zRx{nh=ZX2w8hSwK7G3!2A3eD86P3x!CGUm>*Rkr)vLN%a3aznQiB0oHMnZcm(VBbI
zTBBTs?BDT&2F<XoXkPnH$h01$i`(RI<YR&1EW%Ok8Kp$^Lmf%q6UvqJ{w1nXJE_0Y
zd!jKpib!e6f!U@-MCDQ%@f5xxiz4rl-)qkik@?O<?|KuJ5WEVzf@d=kp`xIh@rxc<
zQbIQ@MrxEAPvuk<z&7b5dGqO{@U36KRHqD(;Pgk-YGWWV3j5A|U3`EJ*9Z)j@&ao2
zJfDm>-bPX<$5ELl?ey!?f3!LL4>_<kmpEn^(sa*v<YQhP)taTn?bA3#7eBv1N{@VF
zUXK@evom*-8*aB~o|6u#$=pXTSzjhg1)ugnsWej;`<RxV)r5ZO4PchF2ew413vLNf
z(BtbMf+>ZOThs)usv?cdz6c^qb3lK>NZ6q70WUr;fMvRsxOiDCSl?a*@18``f1P&F
z|8Whjyrl~>YBTA-yW(g#DjmM$tO3R%7lMx&fX4G+bU9lBQ#`alI&V8&AD@N={5Ysv
z{Es|bcnf#E?q>?;@{r>^3TBpBf~$g);HbMsWnZb%M?QW~t!0Mo>h@4*Z!OFfj6lk9
zH~f1&1KMqOp#Qo^SmAVsTPsos8BNi!%~+epn_U+eXsJM*H2`yYc$snr?q*2hD-Ri}
zDa=KT-ld`<Hyz$Ck4Ms=O7D6t#F_=C@$K~U7#lJMX2_>gg&$&6I(IgPI2;8n)0?1V
zzZT54J|H{gEb-FHbs)U#A!FAa0L@gWSc7oy?QW*+BEpM<6`=Hf3jKNMCY?F>j*DAc
z%UM2~4yj7{B+F(Qvplq$erT70?3;jzCq=RCdnlw6RU$H^MgGY|(ToQrq&0FA*(WKS
z<G&d)zmj9=*SL#Z)VI%c-y#(DF=}bF+DTGAaSUx;BRD~nr;{$v3}NRYk+iB+Qdfft
zYJW%q{`7pLO24G=_ZTy>@~s4(I_pHrM~k4%x;E?o_cBYrc#(8Pp?9CNj}tM7=Dczq
zlI8`1e|mHbUH-6|Ox&YGyC&w*(A1M;?*3%b*dc;(j#^B~X`YCU8BZ-PW{~J&6GDD<
zQNstK=r!&&ox1unEnR+(QS+073<Xg-kmW#sNXX)<#ap;z=Ua$Aok!wqPjRI$EXapt
zAL$9VW>Piyh-gjiU|jpdX@a3J^NooW=BFbWx1El3=^|AwagG9(#6IVA4R6w+DLc95
zPwhmt?=|ygLoglrBcI&eD+zmq9TgR;G-@)+fs0y|EaZ19s7OI4Wv-agX`5dWnMKpc
zwCf@C;Rz}5vHwK`j1A`#A%Y@*IJ&D*mm{@7w5V@9!(<r|TSGbWZ|fd5|E(oIP^QNJ
zT`b41nthyoJ*>p%@KgDxBD(y4f}>}%;LI6Xr^rj|X!2XYkiY7!&sXm==G_hERR;1h
zyzY`8sNI>(uiw^z4$nvMt7{GTWxLIZvaKOr>7UN;-mlCL{~XQRiTx9tIGSv#tscKA
zX%erl%Ch@kMX(L!5`2f-c)se|9Ja_`mQjmV;^9;mhX2y%YgSF+WenExks=EGjn~uo
zAFfka|JZNBxwoG${;R`hH0kntbNle}DM@xk<5*r_*b^RYHk!9MBgvPoT7X(g68v1$
z<uBHa;@?Y}^7937^zX7stY)WWW%?HtmL8eTR<@e)%PLdoz|~M*WQj%Pk%f``gN<X@
zmRrl=Z{Y#{&oWD1LCT)Z-=@H~u2kW7m8tWi6knk0E}nm`B*zDxmf{=uv3$XYiM*Yw
z0lz<p6`V)E@!%#Ue!|@S?Bxq_f@k&s$do_eW9G^6KOP*Y<a%F`0$-8J3jx#kpWj3(
zvu^w){xK>%V_rzqMW><dEf+d3(iA=0Mfh<RdKl0iiE{56>5D9Je61Y}d-DA;tlN`v
ztw(W(wg)vjwuQPFHKEyrKH4--AJ2!ikaA@cy8C`NL^mklIrFnLzUBdJe|7}5KMH(%
z!+EGb!3;-zT>#%Utf!h&_EYQAL2O0Q0w_9nhqL>z3#aY>NyaLTqbm<8pil5_Z2Yno
zRYZ5<oS9Bom7&Mm%;D&X>y)Sq9EoqqiP)K5LC;(5!S;#8T$7I`xP_>rzvWWmr7wyR
z#<f(bvyFLLrhzSbCKxt!0C!>_E;zIbpJ>Xm$K$ox{h}$tKIc@Rt8Y*fCYpU(T8c?i
zp5mUz3@fEY$)HgxnYOb7i(`a5o3=gt8FQYSS8<8D=#<ebt0uEta5oP7`(ay^HJ$%9
z1jjVXp#6+m`gKk}*fzwXy22DZw`L}LEa@`Jwlu>)xg>kDFNlkgyMR*SU+Bofb?mWK
zI;ec%iM5tQr7%N##_8-l%HErEAI1fJ0wzgtK7O=fbGEL4b5_fs=EzN0zPSzPuq!*e
zG6yaWpM+y$o`6nI2Fy;r59=J<`G%gCVDV)ojO`=r4$D07+%Im!skg#jpS1uHQCz#v
zLTCz!gLU=7{^f*d@UkDlcQ-GBl*SflpAbfk>$Au)?Ns=g+X;4Qv6xccNUT=fgCzU=
zu%3Gig;uWxE}}LoQh1*2Hhc%;K2V$>)F`)>yhB=X6(SdG2b-ITFeL57M*MpVB2)b<
z6b8>hzRh09Ptl>tKjUG=y)-CQ>V}`)F))=00X;iA-uU5fR_cmyd(CLWDm@2iUbmvs
z)U^Yf6W7DJdOKdsX$8)ie;+@6ZiPvXQ`qCd%W$3hLKuk3f#rYJ!A=*JZE$c06WL;z
zKT%*zSYLt(weLY@v=5w!Udva-hQq7R<?w3CZjg*U%s#eIWB2^f#Yg7$aAapazMFW6
z=xUd+C4spR{bMTIyLu*Fe7Xq^Da)}Zj?AF<+(WR>+W?P^{lok%8K&39?hxwMlQiS)
zUThZ`#aHB&&}Y&kFtBA0*ZX)UwlTstPV*IAcws#nOb)~&Rg&=j=SSMrVNTU;M^Src
z2NGB}j=VlM9lso&g0e+DwD`+ksx+(uWAwXe{P)jPMd}o~*lvYMDo3bKT|CZD%O@%2
zCS+N=87kdAhAN%Iv_k1CEwuiFY~5Ul?p%N!WfQ@Ddk{uOOonHlw2|2!f{rhCq06Kj
zILUt_N^TItkBl-t+GT`?-<(73>;)*QD1jl{Tj=EILYj5>3(fd;kUJb{gR>THLefz}
z&V`HNzYG~-nUscKJQ_jl^B@i_Y$6ZtF2?li34C|BC5|ukhGffaICAH4wAi^5?UqgB
z2PfTw{3Z{4*%OA*o`jP;n?&y?BWE3b885nr;nxensA_(nzS?KV`YEYlZb~p>!EtmM
zIL6z^thXLKJ&i9iHo+4b0RHb!F}oYrQJQwVd``)Hw0|~0w-pO)lAGU|4Vzcc(W|%M
znU5;O{Av{4|8FB0)UTtXE|^mX*<5P!^*x<rB0}FfWzfy`*J#m|iR4|xb)vsO5yd~g
z;^s{opuKZ!$d4^2Q2om$lqXU2((WN@cxM@Ux?d1#vUTYGXd7-GswYir=F^h}W~h5t
z5?!)KK+~DO^!}wP`mECyV}#ukm>R~Z>8}@dVvUIEkppxcIKs?$MV$Uw6b7>2S{JC!
z#NW;4=#eoWly2M=m`1I1gQYL^UG9nNW`2dZ{!O^Rb1hCE7=<;IqgQmf3O_R~&}sD~
zf3C#CSg+oSqpJ;wc1{k>aJmh$_muFT;}$qlF&?so{YVcxNl+2$xq{yxxcI^h8X8l<
zEf%;PlOOp(ZG$FsZi}R2i%l{5*<ZTj`w#2du?3vImIXvPxiBuJ)l{z}hWxiu5+ls>
zAvk|NX`Zl!`o%7RO^ShZUBDtR_-TyuuXNJEnhY{C;3-{Ye1_3#C?k1ZvP8LPGHu<o
zmPovPM%I4fi2tT^=HcErZdZo9unR3t?Zyt%r|L4)=67*LM8r)-Av21oR`oHyqmVXz
z1sa;tPk(RmBXS=HsaBQ1?sj-VZv4GZx&sf=>ZMDm&9h?b^d;g@^kJCk+<(=2!@?x8
zsa_8K6TCT-f$<!(<qlQW5Ikhh{kYaShQ$8Vd?IM*Nw$a?@pOuz*-`7s)1Ee(<?YFw
z4#}lm-Rfj*$}cWJQW5-rOOOH6^IVEk4vG4+iWZr@C3|{rk>`Wrn7(Qb897M|Z6r-N
zaq^574?m*0m13B8zm>6Th@-6zf=8etj@VECMJ*rwqxOz#NJ{ZLqHH7!wl@w_x1K9x
z!<G@;(!L?aXTb}StrSYc8ZQag5<*rqmkJq-NlaIVI0iW!Esslcq8UdjsfwKi8E_pT
z_~*{gE)p^oK4r@Z5iOyPyG3BBpBq!O-kkd|ErMn@-=iw)Z*%TKAJON-A@W`~owN@Z
zGQ-OjmM5Fbk+CAH=#JhjdZ<&f;+?B3otZulHD-&Gs0Fh~k4p_B=B9|obEYEu=?o1x
zTSli{vBwK_HRT6BJf=65esNLlwR92nCqaV_74Ey2!mVrhoY{wSoU67gDoh)uV)7S=
z%(_T=f4nF4&X`BjWHjkFbxSVej2%j6+d$CuvI^Z9FKJJ^H>2UcfzD76$Lc9zoYcfS
zBr|gsoef!}#a-~C9S<a@_YIbPSo)RjP1?i!nj1<La+I*?t_tlPwTv40ZK3;quOXu&
zVyQv#8sc6zowR*6qu(wFtgP%eByi(#qS}1hx|;82=G|>&YAbdM`GVinG2s>Ij*|o>
zqeWmGDvevl^pHCH$MpACS@?I^6ptSiLG}6LQ0>Gp-F0*xcXx>!{bsIAHRackH|{6s
zmyHBhKFa5^!&W2P-(YR>R+E&3yOypTzDR?V|1k%*3S7<vG45i<C93^E749oL(~tx4
zBxzv~`6%?z#1}>KKi8ST&JUBw!hIJ=k9j$3Cm{#b^^>UH!ZfZc-x1`Nk7Em*BdA!Q
zFLXbeS#izG4fNknhW2~oDle~0C&$)kL-)i2^7i5^E`72&dw--mQxoifI`-T7+3Q!9
zKV>ao!(vLb556WT`%d#u<T|*ts`2pRZwrl-dxD>rM3c!@V?cBN3*zo_l5BNQWQv74
z%2~3Gxtk&fZ{F``#l6dU)dNAW@RbqFO*MqGm1~Il&3&+Tga#bzk_C|+zj(`EZ<tfx
zFLSGot$`oQSAgEqIMziijkr#l$5s1FTW`Fi4K*ftOrXL8!d{hOJ3ilrF%~`$bF_ui
zoMFcQS$v3fyLO0|es&2qrrji!w|2AB226R+N*CgM-5fZ<b6Y#b1xBt_g4f2Pu=eLU
zxR!4NzaL#EWp5o}F=qs?cLu?Cm$iJRc?6jtbCP`Ay`H_}p}`jJ_GFviH<9nLJtQyw
z7jIKu&$x{cSh<}+Op!(`9~4(dlKo;y@P03%cm4~baAP`AYh5@?^jqz$lAve8J2;B^
zc;2VJ;&#M;d^iS$Pk>q0s*tnwD47slKnI_uPzfs~Hu78}xh?I*$jt5Me~N^YuOTJ)
zPfUWbxvWN>*VKah=Tw^Sm_|NW$$@R6I{j5?D=<)BQS;tVYt~zgz4&fAZjMRh&U6{W
zhBqZd)~bP|%n=3Nw2_Y4m_^6lm`*!e1ZKCwI==gx@IDGN0(tc@V4qpWzb+3ZQhV#E
z#u;-ed-@?CKY0Sv6_w33s4ukse%_DTmzwccPsfm5JLI_=Zu>YXZ7H(3Pnb1~5rVP_
zL3}*@kJN_R%+3r_gHFd-T%{X?rRt{e&({@eOtXlEiU)6F@QA-Et4F59U*nEfOK{#E
z8RSQ@4AuYd1%!<fP!`S?=-$?|<iW0bQnPzA_crk&$*xa8uQ!*-i+ct1ZIm1u&5Y+e
zj7}iidD+_f$aC^B;3DFYB2q0CinB*9u+Y-WL%qEBbjcGjlB;LVPH2vS+iVpaUVQ{!
zZIfYFtZ4;XjV9ogZ-K_Yr(l*b3#N=}gERBq!P5OnaMtP}NZiR6xOF##JZU(5Ig<gC
zS1e*rT)hp?!?M^V?genEUy7?wJI*~maUWuYI@Q9d4g7XCK+uUlaLC>pTn<J<{nQ2_
zcix8DnioLoTPe(JZJ<SCl*z8fM(F*t58Q6|LY03891B_i>#MuLwEP|nd9K7cX5PRQ
zy6}(1P61xaf-Tz3khH7~_DWq5cofrUNY)17KFEX;=Tc~X{|QQDGvNBOB1ktp4}OlH
z!A~?4KNfVr8JjamU+qAhuPLzTRtTNBWErlwVZxtUlZ+m}wxN|oRe5ei0;t)%gMIN$
z^zo&8@L%l*u(YfO<@`kEadJEx7I*`0$zO)U<txFf{0byDo`tVv6`<;K5&kTU1M!TB
z=o@d$hhI3vPH$6U9b?iV<-`{Vic=;pGhRSV?rnG!nFDo!a%d&|-PEgt9G&Y8qxGgR
zO>rV{O7{yW=JMRyF{+Gc>@;FA<~aE`Ck%cz){~>!*NLmG5=q&;5nj6~RD^6(X6wCn
zkjCGIWcrhSY!c>lMe^n7XxBw_4xXm70|-{VPUMQ7Zy>Ap%oaG8V_^8zW}J0?H+j^)
z5Jr|q^L?%fuqEg?NtV{8E8O#`w%#);`n!UJJzWPHi;PHh-U2vRlg5XKMA0JWg`B;w
zHM4Kv6p0XD2IFLB!dH<Muy<-cN&Msup^^fZRH2o=&e=zn*R^u{U^d@$@GFtd7qS?i
zZOG(<f4H*P0&;883F<R3jaZXU#Ba!hNtzi-)^VvA?k~Zf&G(02hZJ}V31yr$<~<5>
zPn_~}JEm@lCr2;lkvE5(q3P*92s<Om6&@G*%H?~xs*Uncp{fJ@!Sjgx_Z5)846(E6
zEjQEC66UVk%)18(J|J=i!?Ws9W5z>Lt#B2_+c(n%>+hn(fgP~;NFcQB6~*~=3D)_M
zi_zF66x2Pg!-S#j;H|!vZ|<xl)#trnZAA|$o0<pn54-XkW?I8x@dSub$pF*mE1@ad
z4N{LLV5Z+O*yqi|*8}p{_Uaf+@th7_MS6leMOnxk?12;*1Fe7aNVW1LF#4nl<<1w-
zd3-+1_ppFlp~s=l@I5%!g@Nq&P?+FLsnPx0WTD`fYxqgvlw=5G#Hz!MY4<>^rWssE
zZ3oe<+R!^v7sM~Wfj|Q}SR=k1@^+WPQ}wNI+5ZPo?%qzHeNcs`X4@gn$CzKaP!;x+
zKPKlwmcX*UL-3=?6Xt2^!lK^KFzV9-xE51SZ@v{gHtTe8M7SeN`sNRGYZJ<){N#*3
zN1^4U-FWtiBh+^traK<~By|`2iK;PynmkFi@=Q2{MxTIt-Z$Ywxid8U$b#433t{r_
z+2qM*12Ehk25Fvd81b(k5?ZBE`%M_17<mI$TiLTGg3IV*@97xTd<DLB1@QCTOX#O}
z!g~g8!KHiV5My_XSey-oIJsHWH}oHgb36bY-{(VCfC9-=PQdvyV+B~}Hb~lf0wxO#
zj0Uxj<U938wWw{d^tm!Ts`KEjx{krm#owUZ?J_AJcEi210x-_Ll$qZ$f$<agHyZKZ
z$bs6A@Y&@Fu^h-D%T^D=@2{a4a3zy`^bl(KFYjsb4RQQrXaR$RO5iv$mK@nPKtd~>
z0BzUc=i1%yE?OSbq!QtUwj94E#}U-#r;<9kHJ~r-)x_J6gO=}|G<u~B=+}9I>r7wk
z49*SMCC0F<ES@-_GAN9n3i9I*qRY%%VCnRpF+5OCKgzd~wA7o-Qlr_Z1<qWL^m_dF
z^cXGf4FUJGy?ELuA4jg(LhF`|CU19KCo8rOF;jY^@$vIKkSfz61Mk{Mo<{<RUR(w5
z1P;)WCN*}W*f{VPvdDQKL|K&_2NcAsFhyh@@6~&ne7A^Z9E?8F#rI<{u6w88*M3I2
z6lCF6^KpXRX}I<GSP-3?MGk-bORgc|SEvMm&ewk722Ek6-m-;1bHZ2?;rumY<XpH`
zaTultIFr}|!`9FGu9HC14KP{E2R3}jWG89QVQfns*h6<yi2D%<D&y7<Qye|u<+w-?
zZCXJ-hS-DG=c&B&JWuF#{Y2(&d_!h?$78I{SQzExBCxjVNzv~<Zj@XIyizg%@y3-f
zt0n;WEz79ZyjXVm>+japANr}{kR8<DD<c1#bzth7gHUX~7IMlGuriWBuhuG3;IJQl
zdrQDd(PU!v*#}NPea!3{a01Om(_!^BMUXuxkH>Z@uvUW8OLtQ$J=2m}v3TS$VfRyy
zUw%CU#E+}9|EV?7<lwV>x3Mm(8(4~ifo0??Zw!sbL11vcmsEax0#^j*`L<6Q;4{|}
zF2)!^c&ZN(&ueAUX+E4;7XqmxBjNNw9!A_YBi5lZ?DrFPjA^UDy-s+6DWCRP5B2UM
z-cEZVYQH}}(m0UOpP~YX`aG#>R_JUQLw#C#Sc6(ux8bWnEk;?W33D`Dp<2C>+v)q0
z7#bgE4$n;^*URn_>N~1@QQ3HA^7uEn(RdO_uWKivGe)vw#kEPJz~xOHlgK@_evSTX
z^0|Ot(L`nUTavl!MupvcYna|_iPu-zQddh8Zb6_oX;!~MHpN)N_Y5nT`soR$5<fu7
z?+VOT<z^DJr+|}@Jwy`5KSG1(H2zzyD3~}WGnf0!!0VI&%(%3fjMz03W?yxt`-3y6
z_+SW$H_E5;wEJeaP8bJeJ+I*Lj%iG6=PEAZ^AxbT<<7;FO_|M}dQIlP&&HNa!41`)
zMDka@CN1rvU`O6i-Sry$JDbZ?ag+$?zlx>r-A_{awi=WOdQ7e+ogjZaeQ1DQGO4*b
z2^;1;M%$D#L}Hy0_gtrt_z8)z8_8?o@=Xy|xpNI;dSE<_&rZeKg=1k}_zik3Uya#j
ze1o$th$1o%qp7h)BG{VoOhNTVV)Z+W>gH%d)XEKP?!BWB7*GYSgCS7j|A@nUEr|1~
zg!1qf(AgIYFGsPotmY;tnAd=hY!cjDbr%#%g5axP9ux?<jtMa(P+@<BFH)Qh>c7r{
z{>8PBdsCcNW*hOPKdysQ+;-T!fxwG~XuzX$*$3yJf|7|fl!@NNCJ{-J*qRAxn|4CF
zpATw9U+0`o-Gmf@k>#}nz`dsqjKWk{H@ElTFu;Ikeg?k0eH&75A7hH!Bfu++gU-F~
z;4JX14yCS#>Kp&aMZwK6XKo5ap$#8wqy^(5kHXujwIFA63<MxMs45G6oH7OUJHujR
zLj#s9vxU*lgLLUFd*b_|3)@W#vDM)`4jnrS^F>wI<$tF`i|=`yW5a>u?F1N7(}uZI
z8hIt%<?PJ;Cm?`^gL$eOm|a*8O82AT{Q6M#uxB2K=9z+m=WnQ*R|rKDkAkgZIRE;0
zI4s{ML$X@jm`k&s@C|o7;Pke+P`B|2&A9+@Wc(stRnZk*up_|f%Mx<6kma9?zF_+H
z=~H<pNyZ}n7TLdj6$Tdg5={$2#70a7)wyRY*0T%f`>+9~(aH!eh<xVS!k@EdmK|i`
z_IUUzW(Uv5|1DpZkODdT^NFRKD1G$j74f;T0wSi%5cR+6U_E03#KbMf#Qt;ma)%vE
zNxMq%PzQCsSW1;7<4Ebak0h=4h4r#l5zfEUoBcLe#;eKekTW9ZIpqhlpnKIt#^v99
zV8m3(nVuHn42Sv1N9(D6Sr<3C#ReW$-e!)@HRoR~mm+y{oM2$vE5>MZDmnPufgb)=
zMRKm5C8Jjfee-nl3eQ7}Dtw)G;_iHNenoi^pEg>IU1@l_!X+;RDqg;W-rY)2b>|eb
z5|`6?GS;9~ziM{Jh{*DZIuAgkFp-2Rt`K?-dl{p}BZ#$E3pw}jC(IpVK;BMpp&>DO
z_-s=wEb^O49GelR1hQo1t9;fW{v_#L^dH7OizFojBdLwIJ~_Pa2bo#)7)-;LP_BD`
zSij>5^Ccb(j8*W$s4tB7vJu>=a#J#Ivkr}ny~a764Wi2v-;m^ZCHiRDN&3jfo!`3k
z2}zUNL<0lv@-trf(2D`nnG5<KXt}>Q{*V`8Q(G!47LL12!o7DechtLy&rC6V;UZ03
zHYreVsYLYuX36fqsm;9FsUz&=ClZD6?sU4RE_rEmpT_qG5r<P+)YAM5ec74=bCVx1
zVrhM7Fk=gellhO&P54o!C3r&{YDKs`tz-G28-oIus+`L67rDvL8yTz0<K$j~K0TvR
zPS!d)kxsoh=AYI_`u2(ipZ7NvpLpNIF5N-uQa_Vc_7tLxWh3@J@M7yXZzGOnl4M~0
z2-Z4$3V%o{3*FYw1a*3bzBlf!cxv2C#_9XvFVk%Dy1a`pCBKMe;TLi!Vvt_ZK2TxV
zRZ2749+9SxLbf1D84v9&!bh7X&GLNy0&X1=SP3a!yp|tt{ae^Kp7DmKE4=jhtC^xO
zZ$cx{-|G!;1|HE`^EPTcGnI;d8s=@<Ur?#0YDV#JKV^O_C0$M5(dSwkvAB4pLVFt}
zr*wvCa^N{8e0MP~xqAnd_b8@DXRgxkeJANorzXyA@fA92WFD=3kjD9pSVWRElDN0`
zo2gdodoF&{AO2TBA6>F)C0E|EgVMn&{5~RxzEx2sZ64at_g54iSf<mIv$vU_Im@Yp
zsTZm1468H^tLA<#zAEhf3H<2HGUk`fIoNjl8p-(nkC`23$CRnQX7qECN!(T`ruLx}
zWAtAkag1029V?Ldnw?~Nq=y;Q)xi8gebRHn0sN1?Lz#^ta5+$)*_!H3%baXje%p7P
zq}srTgChItk^^ZNf0s8*e1Nkp5A#P}DuH;JG2=C98OJZWL%m!g;B256zkKXOZr`MO
z;<-T%R||8umqQo1@NWWVyKfQw)io1`b}wSp(!+2z*Nk685Uv~wp_}^N;NHxUbR~0y
zt*9G?*Cvl-E92xV&iylF-c)&@aqlqkuqr36b0-l~H907ob&YzQSAgy73u&h7ddBvc
z7UbmyaW%o;$Y8);V(EHTsQXUPvIU)x{r3``H#MJLnRbOH2UySxdS6k-(2x4%dGJrJ
z1;W_J;?Vt6iwiuxpT^z!BJ8V$;^T4$BCBCQ2NJR{Vy7aEx*o(SMKsX0+jx>$(@r~+
zC)3<Jd1SUkA?N?`A&s17g0j^of#5AJ`PeMzQXdJXH4)H0)IpjuH<7=qCsQ-K?cf``
zoXG6F#MEw1CX#WH-0E{}z^99Y5!paPb~lqnyflp5G#>h07!v){hj!SUCD|3yu<EWc
z6Lak*#mbwIp{fWovW}AWzek}#Q8*{N=Yjchp^h(=ArSC~J9B#h?eTfU+4nm@PLLG8
z4b<S{MKxIVppVRO9}j~b_o?w36-F>55;wt9|1#1G#zq%$<6G^?igVJmVzUy=T`-oP
zbN(~AVkYEDUW5>{-ixqiV<fFhqs%v5d%kR0G5P#Tm3*C@MRsl!V;8QrgPRRci0_tT
zkd<)?B&C2kA}@I1Yhr*qpAHwo{lT~|9fX89Jil<C)VO-VeIF?{aqDiPQgaOMz3wOe
z@_E=L^oe>}iiq}$I8YKin6`~Eko7wU4w4?S<X${vyIr7*_WXpR>F<a~^dKo(*GN|h
zoP+Y3K&TlM=4|`_5c4aGA>djWY*tYrX$NYEdAl<CCY-zna{rOrmbzrCbQ*Ys19&XG
zjB~o@!ljYwB>bQ<#73Qgy=)XUy_^AG(<Z{ME9Srq+k)QH1hA8S1R+P~fk{;jD86qX
zbA;z5DokL*6&Jw3v;Yz!ejJvaFCg9Pw&VBcJJ9tr3of-~6K9d5P<>oRxL<`rL&AVv
zJSG!o-VTGl5P{)PkxTaWEQ0md+R2fsM${R{6U&d|!9RE}iP<<06Zsh=PEMcI%bX$P
zzIFMxBhQe_Memu_cPD_!=rY0KDUXJp=V9oz9-iAiMCW8Zq^t5=(e&IKpshMs`q_(q
z@p(ZPo)1OMUz_>sNy#WZWdvS5tAHZkrlHQ9cyw3I2gL`fbfb|xF3geTb1XOE&&R*9
z?zA%=n=G(smS~amRnjOfeiGl)Se)hkkgC5H+(a2CaFntm_OG7G&hyh`mDsnmV}BCL
z1<e7gXNR~O#%<KGt%QEzHlw#NJ5P}fz=?MZdCU-4=X)dIKmYM)tTYHo(#qI8Z8A*}
zdi7b{W3qH80=*<l$;~cn`tyqyuDH64_s?zRvV$sw+9gcLByU7p;!MwdJPn;va(o8+
z1UA^ogJHY_?)-BT9;He0?;jt<`0J)@_OWts&wNLczFXmw@+>T#oCy7kwP|g72;S<p
zqkbxKm?D!+9;`ZqUOlE*=PgcEzK7w`71}sP?=x&)a}wWPiNU6`NWN|y0YNpFap<-L
zzjfRQ%J<*r50)De!<W4ve#ITvF&?lo=s0!SlgH`%w&JR+^B|xHi1E=GF!TDf3S}-C
z3@3ddzC~xqlY$_yG@C*un|vep2Q$F!R{#m>9}7NmN#I<cjQ({(->%0%U{fvRdqTtD
z;gws|d%zbev*zG}a7|YIs~Qw{3Y_caIB>0s2C2emK5ADE$c^=YHl2z539o}}<2)O(
zWUM5}eI1M02AbrcMi?|ZNJHiWfH{|sgNf!G5L5C6%dfA<ni<v9@oPE+n9X48a$XBw
zFCCCmi-yT+bBR%@5~%JKf%@+IMAST!jGJv!Iconx=$st`H%GH@J8cwnsyc(?oU^!p
zgasRXX9;WSaRK}8M8M?IBxcL^`-Hdl!^ag<*>i{V@kH-AbodfRBp>&3t2MXLKMKik
zYhWk%cV8u6CRp<76D3&}>AjG=G*{s4?jQrv1;lpO7ARK8g$bil$e*smu*P-3+Gx4~
zZ~NjbRX_cc*S0l)sXsPgwYWZ=@_9Q(+;xF5PSLPwzc!6~dlmvNAE1iyE!e7BfR#I+
z;PxYhIDPmXde&XV`%_P%toUhsM8>kd7u!&k?MAWlf6!Hb824SzLdP8rtj!y7exs}m
zf8u94K5`e~QzxI~z3#^%dsG1ps*mFZw|dMhyo?9_E5~ga4{)s32E4Q6C^`wx*HI5U
z-03y|E#;NiF}Dr#5*md(z+GDZbO5hU>qVzK?=i1dl5YzwhPGN;wkG>1inSHMm2<K9
z!7Lq?H)Z45)*8IhMbSNR669}vk2co=tXDEm(05K3-o6$MGWi48d#enepRdBD!*_7&
z?Q0kl-2$TToA|O;XH+*j2Z>e@*s(AdI@H?HOrZ_Z0w&U<^X|dC7vov)Np0|B(owkj
ztdO2we+E~4Nx;pT2{?69F2+}i@cJF+@peN2eiX?;6{qu9@aF|?AMr!T*WW?%vl3U+
zebD+~h}@k&fz8Vs&xZH+;K6ifcx7Ho)<pb=7M@~!oL>zS8jwvDeI$7QyS8lekO{tm
z<J{8uO1N)*JxmK7OX`9HIa*!Lxs1NYXit=594AU*dy6A(Uoeh}H%-Un6frz`B@S;4
zico`98r;gF0aWt1OBX3;Lvj2L-1qAo+j6}c6sH+-M-ud~A!Q*N?~z5`M2C;opI*@|
zc(#ki?S)Tg4xwRR63z5%qz)H-;ataETDl;Y-jsDiou1_=MQ34-a}@vgVHW<a(ZGT`
zI?!-~Qp2uBYJN6@dTvvrRqv#5dGly~#p~5rez1gSMn0o1VP_~gu8e*y35>yMp2jUq
zqv1OoFeT?YNAmko5VgZhu^{kRM_`9kDoyt22A^BAS*c@hu*_i=%ov_Wyq&9rGjBCi
zSO$aj2|eMtT|u=zNO1jAo4ASU(L{V}o4^6q=i<KEV^gj-dOh#t#y@VRu3sbgLl5n+
ze)t%(cuhH#I9N{RTdttHJ&uEhUBT=(dpfu}y=UAu>ml6Qe&1RmU4qx$Ad504t^$Wl
zhi&Yh4mu%!NzaFQ@FDFw5kI{dPBgf{zH1)vH0V4T+OieY1`}bHl?t>3DnN?gTVi)X
z1_BdQSeuV~;77v^GXD5mUiHdHvaRtJRKzF%bK?O~vaiI9PktnTQGw5PlfZV#NAfXH
zU|cRW1&6BDWc`^@vrdc_=S|v#-~D(#csMM_E|qE;k!1%hb0cAEkTN6>+rWR4#gOD-
zh!d~*Lf*DR*wr0Ktd=E%`3+U@y%!5t7V5+C>%TxMYNpUz9wu6z^T6kE1gt#xmYS>*
zg9U!M<V(Rb(y%K63Uvf7W%^T;r?+vk?p4g(r-S0cwGGr(k?pI-Vdx}T-fVm{SS{f3
z;_1!&%%sZ@RH4B3GtD&O(^9ZGtpQivWnj&dRETPC0exLD$k~wu4Xb9r{NX6LF);&1
zZ)pdM<V&RH!Xc7;c@@@IwK2;)LU7Ue4JiKJkC=GbgGH|bT~yg)UGOvz3?(A*{<13a
zxcDR)`x?MyT@>CPPRGplP(0<}%8EAUQ^$3Ou_w}kS|#14A-0L^s6Y7vV?+g~>+PgL
z=EbzfJRX0Wgz+ZAS#9=Lz^R`O@>>>(q4muT{3xS!m_Fq#vqpUsJ8o$iRn|(P(<kN9
zCzi3ereB<W8tF$(k49p-!#`n{F98~K4`O2Pcd8~f1SZGrx%{iCcyGBaI{uMIzoH!4
zKKT%dlH7$}+l^2n5lOe#QQYlc1?RJjFhs-|rKXKT(Y;$yDsn1nFMUh5%RZ!zxeM^U
zM?8L<nS=`FA=KBz5KV%eam=0=bX%McL-Q*6*roMwV~-Pz-g*M{Jq`GjGq<tjogN#U
z-3%sbHP9$FfjzuLjY<_O@tg1cgp8Lxv|+XvS~2msXO$xh3sQ;b#1u?4_(#W$*Tk?Z
z1+-r!jy`YEW3Oq&;Ptd%d=x5OIV)f}Z>u>DdY1?pw>w*~EL4Lv8E)Z<3lFhvn$7f)
zq$)o!c!m-GS4I5{?VwwCE_^#6)RVKUVCA?zxUr&y=<BV4rNO7jx5v{U-6NDe(K8JS
zFUxYj+?-&jq?%bHn@vo*f1|yzDz`}227-6-@FOq=+^=2XP0c7%V<eoxvkr5=I^U5C
z&SM47xGAW;9|hr_yRF}0B{Tk*61=S&&lWr=<h4^5L94PnKxq)8HAfL|f+~0)Sq7Oc
zJ!EC&FjL|$h4)<zkiV)7X=MUy{oOq1D{sYb(%Zqo_8=TEO=so~dh^ERpGjQfJ8szf
zKI6DK2EImJVV$+5VE^L(z_)TaVeW~;MB%(+8)_!Z3Tjb#@jtwoECP2$PLT2o?!0}c
zz}u`E2~m?fczZ=ry4hNVFP~J#?>#@7A46*^>i0*&nAi76$JjE)Iw%ILbiOlXw*RB(
zyyLn0zc@}-HmQu#LR6x%x$kpTl*)?8O3~1iv`~>vR+*7XkrEnY#C@Nuh0q{MODTQZ
zlm_kJ{r$@yp9i1M=Y8MjoY#3iZ<6dl71B`S4G|$;r1y3JjDJVario*=$nOO8D&vL}
zLqp!yVJ`bzte!pocs5in=|kTeUF5imwlFJ=1=X#Ayxg8=bS}ihu5CByv(w2aIU@+a
zuA7KgFX^+JNFnOo@kdneqIX*iaglxovv%4JZ1`M^zivIFp$}eB`YIADtu&xhCL0w!
zXCf_W<F~(^iv9zg0(a6+xC<9^$69S!Ev*7<T^5ZscYTnXatb%UXa+^!R;tk*ikB<J
z+55|s`MrBZuuwS*XLi&O^ZC`xRtpRKQg@CX>5ar$O_}uEu7}`$Y(GOzXQ7&D3%PD_
z6uU)JAg<F6HKdgBJ$qm9t@eV0Qw)8(J&8tChvOcJ6ZrUv6FaKz6qc?C!$}p9s3mQR
zJN77}wuvE$DSJpe8;rpHK@x1$b3?CT7yjwiYp{H^D}V5h5o7LohxCa7f5>w#SskCl
z*9*PAq+i!*w{;e6dSilJLC&;6$fn5*72~=+x!7O-k1kad7_?V}yZxGHB<I~}{E^j2
zH=JF~4jFubd(wBAIGtta{C5w|ADqul5~(B$-tMA*UACjHsym<V`;_kV%|^F_+Wf*N
z$7sr(n{?C2(_GQ^i$q(wlpYwZOZ`5T(iIaT$Y-XS>R$UnyARdUxQ6#+;8r1>cKtQC
z<L3naYsx*Vib|wQ?%DI#)YPEe|0pkA)J@)risI_+m(evLlBs&%O#i#CNt8Qwkp$0o
zOl`tya&q4hl5lTS!!!At5OXY<zA$~uUC-J}Z9a981zG{zI>m*|@|lzAbxH-vlOGxX
zawOl_6GLu}w4_O2n@QKHbh`W71ZtFSFFd>7kYNv=DAIIdUDZrf!+ubM!<l5@TM=J6
z{F9t`eTS9=ZlgvE8@XjFr^xR0ifHn_0{88jOx<DvA#I;I)lGYk8-68{W8u~KbKGBQ
zdhG!9G{~cAyM##Ik+GmrFb#UIHql+3g+#YrgOfVy!yb!!0)>&E=+lFJ<d1SFDasea
znH$PTt`3h14&B@X_6B{p)ew>_PjZqeBf04X2K=^(BVl}7HlO`*8|jJC=NBnVC&l~y
zN$$zfT<IrG;<@-my?ePbMz(6uXD7@t;(8;h=WEiwN&}SMq)LXCeXqyN2x43HkR~15
z&IB(vz-dC}UvmX;pB~?YyKm*G7avQd?yHftqgP_!`=wa%z#7YobTL-PhCEVqVVnj(
zaj!&N>AoQoT)TEMn2fPSxt%+#Y!8VyJlOXIQYZJ*l#AzR)|6DZuv`SUs|H}vld1Lo
zdBTkU^%GhlItHedX_B8?r{jj6YjD@dkcK>KA^2w&7HX}*#|yUMimfS_*r~;p`6bi8
zPgbLOLm~Y#w*`l-duc#SEMw(63McnWz`|cvG_&9}?6RxHubWrHA)g9ZlXIG`{XCLB
zTeJ@o$BD4U^Ie$(FDl^jjzwhbpd(R^dIg!Ug`Kh10`AbAOY}{&IR?l0<BS(xFeCRK
zm0Ug#t(U%Jp4;Tn6T#|aldl`NKli7TcKu?+3gfZnWCwY3DUM{T#?VnE>n+dTm4UF+
zcI3;I3y|zH2K5U%!BpU)uWI;3yPBWkcIRx^<|PMzHFCkq_#$R$Btn6=@Z6sg4Qu{8
zL$r<isq36=pi<!o)7q5bg!XaB+cFPyDs6eC(>@U7s|ISasaWph0YRDz*n`;?kQ1ap
z{@fqHF;9zNOy_lh^D+)#p%`30JcqnjTn!!rN65hiXZZN9b`ZOF9&8)A2m<=_G33I0
z@=SPbKKLkH$*3SAjtTJSTP<yOSP7*fZ%LlsN7|Y(0&Ms{+!RS;7#>mtn{WQ`sH~fx
zv2GJw?-m!_DKU_B@f?)=7TA|z49J+fz=nPgsJ^ZaL+R7NV~H~=<0|$;P8D@p8$+!+
zcY=E!@G3T=*@?4f@Rm<wSUICZoRU=t`nL2_kq$j}|B*3t(TgD>t$dzzF&9YN(sWjQ
zh7mO9m_hMi4rpi}g_q*S@Z`m5c(3#swKFHfv|mYZH_u$)n#=R|W6f!HNhoVla~O0l
z35pc$KKelK2s_5f5B7*$;(cE|Cj;|z;k<=A=qaBQcBXPjq}U{!`7#OZzt(fFw;du1
z4(sUbPd}+yL84XD`fSQtvpDHY4^7yRXEk}jC|oQuf-RAmf~{}bxuzqJ;dQ_)I^~`Y
z`|;U9GFQKjO;ncv<4r1<oo`CR<(p`1VzcF+ID6RiJ&xQh2qI%E4#A8eNj~*bJ4M-X
z*nATp@2?m=o^^<Z-7+D~7CWex{Y(1vnK?T`ZUMLm{e*4a0q{yeg7l7$#?q{F<aK2h
zX=qo(xQnS^S~-P8E_qJkl|RwD(SIo_=&;4Y9nG-g2bFnuj#j-hL2_sThKxxeS)&K>
zO}hlne<Vllu56~7J%}14S(xQI@F_Ee9pLw_@O9M{fwQ|2R!@D%rB>}H)5qzNL8TAO
z8YL5Y@F(y`0!slbx6+UGpJ}{0gHMwssq1?SoIl5byRtl(X<w^}c`{lw_U}bHCg?Mq
zRk48|uWUKD;5#^qNhX)oB-pqzGpm9SE%3j3pNyQFNGn`7utRM!%urgpz`01qw1q0T
zICnaJJmiLkQv|N-W{vt2MN3)b5r<H1&O&rLVvC+$k(d|wgzHng4(+dEX?493ExDFK
zuP=xC2gaS?Cou&LLu>I@U<|+CO^ogzeb&lvNe|7Lahoo`T8~A$BXFt3H+tExmH%TF
zAP_qC<CrUeSHcF!toAa}XC`D`R$iyQ^?K-N9D^sH&SEuPz2N=<MeItC!ZX$u7$<4~
zAC*?&ucb%16?Uz3nD)Y8z+&ulm4%)JDNGaHjk==qVZ)PfJV58rz@{R4Bs-qU#RcK>
z?~gFw;x%3kw!n{@W8ips9NNy-U<b0?*^PHa*{E-Sz#&tGXFrCqMZ&#FC;A*bTJR5d
ztck+ddv3Hm<ss#y31mBrXCp*^*8iQTMVC#tps_yT7_?zN71^N5)=crYyf;M+`(=CK
z+|nG@?QSg;ewP7@C4=?jM*o0WDM|F6`9V@Y`v_e;eFl5?8gf(Lp5-=u(u8k2P3w))
z<7jGqxz)eve=uSB5h|Jeo4N_EKilNi`u5u@4L4FvNv7j-8uy@`PXE+GI?X@PPkOJc
zqJ_TszjvBwn|%S6DW`I|mX+XZ`;GCpLq6%11E=&rR(O`UQ!2BM2^yot?Zxr<=i(tM
zGW3bcO{igBmz9HCRz7h!EX5337J<l|3w3)kx|v`jSGxX)HfakgprTT(80oWybv}NJ
zir$UH)z7QQ<0~#WSRjL1A?n<;9dX>SzAF3p-DfK2E{*Z>hWO8Y4CPjeG`PNfPxfZ^
z(t&HG+@sqJRqeY@E<D^wdc017X}2pc)e%W@gzu`!(^A@N{S1|xh2CB7Z9F4x0ozI)
z+5al4IhDBy;OFTCw<1Q8`G+L1@>4#MR67F}cPs^8b!EMEzBzuB^J84D1(R|6jcLuu
zw?K}_GHxpfZmb^P&{8p$T{!*#*_S%R7fRiu>vnGhxmg0M@W>4Qu6#B<_4WvL{?o|F
zEK|hVqDqoCbE}p5c!6WCs0mSVZv2H6dStt2nZS8mLtp5=Cfi;<0+ktN<n7U;G+Ap4
z5mU6Y@a}J+-3LFD9lzV@vHb}&Qo@S2R*#@Z8{_z%q*7Y4!5cT!S)gM3dAdjH28mqV
zMi%T5-p{H+{(i|b#$)eo^3ArE{E|Kap{M1@C~D0yWAv~f{RDGv^Lr|8af-$Wv%a2_
zYe~+38ob<7OZMcoRrIQRG`+0mNd9%J@IKRvxeVz#`karZ<mUye`}|olY<z>pG_I^4
z`LLV@{2IeMooyxgYHO+e>IiCZ^(|+ST1v&<3cDf>er$`<2tK*|8`m0jl0^K~WKF`&
z`J+k;(J*y4t8~v7+TAy>=Do*p-;ZSUQ&~nnA2cBue($X|%LUMhjqz;85nylsEw4BE
zCkKto)2VIkZ&FrK#eCD4z^;FIoqk-JMC%71^F}#g@cl#ybSOT+%EW#v7m~`qGi)Tw
z6P~cSD+cMo78yQ-Tw#1qEu}|{7%Z(CkGK2&()@uW?Dd<^%C1VKe%6VYXHh{PXMCrI
z&2|kBm$3Nf<tRF67>DB7W@xoZ;ES8-G?c8p#l1-P!_!h(5b~~twszX^yW~6R`oYZ&
z8B52q(gzH1Zl^6BT&Ia<{Rc5fVKhJMswgg~5qf#vhOFVmy+owxFa=jVw0N38?h09c
z&D=2BvPI|xnETLyH<9RQW&s~=&2Fe&_LX)PS@7{io3XNR4R2TFf}d7Drx8M5OMcXO
zzR^+@1NjE>=944JX?S2nPza7X{+*`fgrL5OA%5%;{-(?^+-LjWoW+X<7RJ7W^4=&!
zdq3P1y`9~D4e_i+I&Zzal~2sg!#{<-=yn{Squ8v$Fg%?|NAAb_hm$e1^)&bDVlDst
zR}t&=MI3+SeV}^{EU5eseT*=gjH%JZ?5M(a>iX_G4f?zlF7?e|MK2pSz&PWE3r^?h
z3p8MZHkL55{X6-M`_6Ii4PwB~t&(|KaEEq>+wcxo6UYLoT&5w)h5T{&%#TqnXQVes
zfwR*l_<C_Zjg`2JUiX&5!Ox2sORXVpS(6IW|L+dBX7X%a>+oV?@>UnVZ5quR?A}h)
zbB?jeXIe-|$Sr>Ks?)q;Ni?~YcAdB`*hjcoV(=kfp1pQc4n*g+kpA8?c!d#R?_Kc)
z_iasN^|TYvXnKM;Ojt=WW_=;g3**UgftM4--r=+F>Na#&E0H~O?O3H~O(?r=z{|UD
z<K8ZBBATy9gWeTue*UBkl5aPcRG7@-dgjT)M`u&)vq)i3cRM*VgAz~w29n|F4PKd>
z*kF-h)?;lft3GKd`>kynS)V_j*rh!O-{m74V$HnS__|?YNKN=-20DCGh8&w^hmf)<
zm-uJ0%*t*vGVsudulweT`TL`|y)Sggy@wK{v~2=i*gOd~opyn+bJJn&?nlI`PoBQF
z`NsF2nPaW-REmG*xftX}`9V@zDlO}=gWI};%mknF{3FRXbjH)~<kue?*ta=Ta6#>$
z3(Kp>?&d<CO%$hYN^fYRzXBfJ8bwSy9?;{A0cmn$Xrje!=GKN|^}b57*taMev*Zs^
z)0kN_G<Y$1{8~>W6zA}#uR74G+_6|Lo!$_l5l*C*R*|OWv2=(&B#mCanBH)N$~@al
z=4ED5`<H$AI#dR_iie4lodI098O8VvZ6coSe7(ndH>$ePj}C~QfQNB4FuWw+YWV0x
zP&S%Cr2N0~vJQfS?zaqHl2U=o@4hmkPY==OmLJH>2{VbIUkCA<!}6BP^JwYUCXUwU
zFdIc*T6xC)AyrNhye>6k+52kzj1>pKURxUGS2r`+m8+;)m!dF-b)Yj33w3kII0%1n
zi7GjTL376eIAhvG&DWXG#_g^ocfK*6ltKQIT|PC|b|>rB)G^aF2WTyGmT@*`VA+&3
z^6}n1;x}?O{HWAtFV5M8PQuf3v%zid$)X*sw!#QJG5;0nOTVXy2b>!urhlTzAMTS$
zhod0ncmgi>T!j9wTIsqwq7Ct3&D8En67Cf8_rtA@_%+L%1-}%``D2dLH4kI3+ER3{
z&%uK`g6LHDDEfB90h;k8iC0oyh@bB-hB=~uH}}XBw}}Vg?YmW2Ri}-nPTTQ)&nLQ8
z<TpfGnBiT@(i<li@E5ndrj6eBX_TfB<}qq8wd@N0IXekw@^ZNC?+@zO7>-$!65w^I
z4V&fQjJg#|;n1TTbclR}fs>ZuFAYll`ke68t=W8EYAt=zaFKjhlg8#x_XQ_Y9q6xf
z!k15VaB09&lsjETRf^mM7Md2^DLlsqIwnxlr`H93%pCk?=R(}&ucPOgNNz#u7wleX
z59hrl_~PObyxq5P?8rYJQ1fXMvdPDA)}sW}@fDcbTn&Z|D+n`yskmwGU8*5Jg<9GE
zp~stdBfJVlmwk_DmUaaW-C4j_-g5)(;XYV1Z2|7@k7WB>)|1hy$DqUiFKtlG#pgZ`
zIOQ3c_`M>Ve=hjA_pR{5^-8YTq|gcM^Obapt^~#>E8vEDZTxN8gVIYfaQc@*yjh-y
z@&W_xP1p$R@J?cW$4^C3@pPKx&BEmKQYgFR8I3dvM%nryZ0>7^$mp9i_~LP_J=jgn
zlHSn1D@SSR>t&d=ND3u?`r))c)6l*7Fh;FS!vC(>!t?$1)PB7Io{)FKvBR=xZAmHI
z>%uwPHWSNrVt8v>o8SPo!Cmcsth9<d)?Ew0BgQ${+3C%H3wc2gn|~#HPMxEjp_X{|
z(R{MaEFO#I8zG<;20HD=rM4NE8+Z?o{|)26X_dpc>B7A5c^A#Tn~vEPQ`lV*7Wh_A
znpOQ8#aL^+K-Y=C@!<L~d_q`3{Sxh|*wI^zHQ)7dvCk9q)p|(pPwAjyh8g%`r!&T1
z_QvX8sYtL4Nkuc&PIAWt6-j=J%qNoKsDa@^&pmglz#@9Nn74SFO3PJ5*k+fFg8!ov
zt5zpcZq6a}jIPAeDH*u&Z3`|uCj6hY&hovB4`F<?G!|P5E`#@pSTs+SpTQo*#_VA#
z<FE!#3cifXBm*}+lfz!qy?9}ZEPm^}z|Z9~@R6x4j#1La4~aY%=v9HM>{sHvvK#nk
z_y7)?p2WQg&HU!2*{I(XhIZlh>{*9n{1~S?Ot;X*QB$sgqkI-A$vcAXGf&~*>nZ43
zlZwf|&hwKUEs6HV5vZL10p{L!Mv=|u;MFI@4M(!@$UH6lceIHAH)awhdp{);BV+N*
zfqIlle*;rnQn0XK4_=y6fDyr~Fg9NsCw1zwS|1Xqk?&Dh_3jz?EG@_WNy<>Y=Lskz
zdO<~jkhK^RVZ-aR8hrl<9CiCDen#hKNa&2kIpb=O-JFP>&R_Y^<v$_QpTX4JMYzhz
z4bO_#;q^yHaZHO6&Ir&$v+A9=K1PGRw%r|5HkKmQP~cnMjKu>XR=m$v2^5>PfW2mW
z2?O_g(z~B~nO5Hnd^()TwnW>5RU!itmYQJsqY1>sRbZ#HHP~FaLgM4||L;d-n1zKz
z)JGY{Z+7Q8r53|FYY|9Xx|0s%tHF%m2TV<@6YTW2B+FYOSOw{F;XEA%pOu^OZKy8k
z9K#Xm%$e{p&lxt;{cv~6L!#TBPVSYNG`MNBQL}4X;LedLkhy3-z4YJ*>3-@Af95X-
zw}?TZFDV8SFYTvs%WWDQZZ8q`sMau-eHVjeiae}c=?_&qZj=6d<Kfcse&~<1tJ9m*
zPe%TYBXl3f>6NOo24oT(P--VyKJA1Jk|u9Pu7=dBli=8@X^_DE5b~jusJ)sx$jp3>
zM-!BIQ|mVJ?UW1~I<|_2ZEAq%*ZP<kV8QmaiPGV%p77CW8W@LKgS%BDby)d<i*sT?
zT}_jE`f9`bQQ>4p`C7=8@};HQ|B{b4wW0d`XRt0fL_Opb!Kc6mZrv^tW&%<CtwrBS
z&BbG&x$-ol?=u?*9&=>7ZZ`Q{b{IBIoP;NWUehi8C+PR5_Ne(qhx6Pw8kH2sQ)!EN
z7`|){nJeszyjV60Gydx1pD$1A50tOKPxnRHgBB6==_5xb==TNQ+S>!?e|N#PT5q^N
z{;%1`J}1FfV;a4mq--_e$0E8^7MWkcMX>mZGA902V%BJHBpMHsQGVfZ#?B=Q!_F@w
zmuh?|*<QuuoYug-J}T&SD2ZG0vH(A7yYYpxHH=v8akzWEm#(a6r$G{{arruR{GH#-
z$YcpzmaQ|fcUPOB!tqD-0tGx$@f_an9*2w8%cJ-RBfQ?GLysx%#4v-yLcXC91H9gm
z+P79h_H>vz<C_c*wiu8%`rpCp>0w+GdJ<NzUCB=n=4D^_hs;=6Z#X;dKDAnXi80f3
z#fz!6u+(EBKABP~)Ls@;H)<U<)(pT~-gbClQ42Pm8-){JZl^(YZ-u_5G4A~Kl#I<2
z9ONgr;6&|U!51Wuv_OSd4%VT|G-X)kxHOof0_XB>Gba2@g2PgZpqw!i(j4ZXO@lb}
z7@j4!Xcm!aJOXvU{5jQ~esH<<47vZUft=|rhIbG2KyriNM}3$9_d`WN%TF3#mBhnK
zivXhk`y^}<2by>EFZb)gTQajZ7F;5Vpj2S&#APM`7j>K@%(8&uyEb5}eGpVvS-{Tu
zC7`qE19cSkwM!pPg~;v6kiA<3_G~RD6F#NDQ+qkMzWx)DmR*5qH=|)ZnBdHEJ8&9&
zO+Iw2hs2wM#PZl$2;JC4KeWX|V`C9asB_@YOxJ?CYhOwC<5<|N<qDIZC4#GJDRZ<|
zjomS&j=AQSkDdRd;laOV2+uCY`!^!3R_W>CD$hDByBWgTW&6R+7l|0Uqy<AW#NlG+
zH_|qA1TG5CojI+Q^q;`vKe6gLvDz>dWMc$Y*nCO2+QLDvdN}Mm?hU_teRy$Ma~54k
zv+iZ#Fe^R{M#fy?V-~nl^9SeA@$yNsTS=MMj*^2&r(#G+T*=5^vS8K^&wz`m6Y1M*
zJ@#=|7G2z1PP*1zp?0_Q=mWUKasK7pWA}A*!saZxw&xmUE);XwQr!@iS4(etnJ_Vy
z;<!)X<d_J12ce2Ti0}J-{MYFHbiAEC*=v+S^^3J(cJV1<{x*?n%G*=Zwhppec+-lT
zcxv$9TzdQkB`TvbY1I2eB-i8_bMuit)c@+KFYABEG=v;tFITGbskTSRq|52>>Q^Y~
zKPh;d+)mPE|BjKiL&u5R)(moPrxO`3F%r6mw79No84NY~Ogn|XTIJb;wBolOsXV%c
zj%qf5M1N5*Kbis8ylNqAzX3e|lt4P3TVmU{6sj6{i99KIL3iwVODsQo;^h)acGXN>
zy!IxWUf9o)m(c~ZVvz)+xFi~re`L^(v+|&rs|u6eXu+ZW82Y=YkVt=GaQuea`o*yl
zkh`doR#%9j@x&#>B;Ydtesu$NT(ccLjP4WPeVsV6>;YYHbS)%JzfDI?siwA()wIEX
zH4|JynW@iSlPmG(;Z4^#;A-{Y?AjO*`4Iy<tQWvUEsiV{GEzl5g`dSFL;L)3aM((p
zuaoH{&S54n@2U<Pr!<nNdwD>JX(72%IEOD0y-lL~1Q)oF56E>dBZYrgk;L?u)VS@p
z#nF~p^7W4vK2O{M!~XqbMCMoGIBy{Y-S=gl?9l<UtlOm6SsRp&Eg+E>axl@RjeJfq
zgsY|l<mTR1G9~B%^Xbf5(7u*S?%TP-{1f{i?94c_cqK<yE*>CJh6`Zn_B66}FqV9;
zB_JVWpm#3mVkT*Cf|b&Vbl(Y2P<=jvBnf=^uk(W-RsAm3C+60dHHM;*<tR82?F_{?
zB>D5klgV1Ycr0)Ik4W0>gTmX+jNiG%Fj=_!epJwfo;xC>bNnL6b6Ew8OIE<#j(TRx
z*XN+QcLZ2(hyhHsrY^yD=&fgtqw~XY;sFP^qw|~V`C3Ype9YlcnIC*z+5?y3dTF(1
zD<e}W%3pODL&8g4q2Nnq{pxL(P_jE5+%FgtGYcuAlwC`9_soKnf<cn@&jUUMdO*~|
zolrM%BfQ&`O?>Z;0ORWM5T7N~=o-SzU0eds-;0El8cF`;@D<{5ARfk_Rsi##o<erz
z8rk*1j1~RcLQ-t=$z?{0cX$>Bf%86){$n9zTDU*mUsGM*(&PlYUwkDdVlp76{+|3B
z>w-HDO0l~xS^?wW2rBy@k%JAIxG(hwGclovq`JgHfp`Wc-0vY%9~>s3H!6sKpa}H)
z$fK6OGMsO)0@XNY`1V#Dv_JKd!;WsWLde+U?h)mM?Pc;+QusW0<&g$>C}fb!LBhR;
z&bcB3*wI1McdM}lQ>GF%xeJ`Z%@U^S&PwLYTpMzzYzo@`UIybe=fQ}KL=w1Il?;qI
zN0OG#h5HkJkez9&;BxQ*`KXx1$18u~wpR4O^r~Xy(uPQSOekwNW(#*N+>1w#t*~cp
z1C`m}L)R}!V!|K)M0R%~OniERgf9<<vQNkHrQHZH7%%{-+o#CpSO<9a=>_z(gu!#O
z8zk?P3k3e>0aMpW5jn5#a6T~-M%XTZ>f7Gz5&bM8l55Gou8x7}(|7X&&6nZSY(uzI
z8v$)=juRc@A<}o#gWp(~Nh*H!lllLBhoSZonDr@ud~lgTBulF;qaw;Ex8XCH|2PM{
zZQn7mL+S9Zct88EB?qe)nL^T=gI0I^Qoy9)FS9Z}5<U(Zk?335M7gOJMLiP92+cy`
zzSxf>zBmeCGM?`CK2N5H9)jSYH{^)@6SBQf8|K;O5=m(pepibQdu)op%jmX+I~7j2
zcU3I6@|pu$9?!4;xqUNU5c>5_1u76?HHoZ$^PR3<=>lW+j3s~HJ|~xT-O;orkv-LY
zl^n7>3_)|U;hm2TIao6XygM`D{@zIN{1pvb+*%-f*qZe_Da#nT+i*8CjfH-VIXh{@
zWv*XKsi9tbFQk1>1p5?eaGrkvevEPC<DFI6Z=-csyPX5j^-sfkg#Kt&EyIM(jh@VY
zQ&weP`p@S5#7DDl=4i3!tkv0PZhGvdWCixg!eBntLX2I*j%CM6O|-r}Rg3MFO2D>#
zBiWd_{*1m|n80NJ4lB(}*y2cYHmpa7ow+=P4{^|Ecc<vHiPF04zy5>Jw9<&3s3*%-
zh5g22Gg-V9H<oSwqR*<A$g;K9wAh>lD!kptW?pjZG}h+8S+sv_!S=44j!p@(tmAnz
zHc`iv9lK;KH0p}5*7wv&`9v$W&&QJWe$c~vOgCl+X3k;9y&l0H26J}m*3a-E>ojWY
zm222NXD*v}?J3^9YR*Pqac3Q#%QhG{edI&O&Y?#W22o=BRrdYOGthdem0xHgaLv>v
zvLDCIVO{P`VwDFE@Kg7SuxriL*-r8n^arP~wHi9?)p~ih?e9H^{O==#{in@}=5ONn
zxT#q`UcQ*u$~Z}*{Uq3}&+kyr`6Ix8Fb9KEXR%c<g5SR~k<nAJX6-je@)x%cli0#E
zGDE+d^xYZ>_{R%uZ2pk<16wKCWe&H+^KsGkjr3#I9U?R91kv!c;z}ZZ6N#^Xa0jOd
zBzOZkW|F|h_H1V+lYi7~(;!*&q?G@w{RH;}Mw6m9Gs%PECnV?JQL99;PZ(hQlZ?C?
zPfYYIP?Ar?)CrwL<jXY@w#JDx3q8%Xg;T-l-+s>QxEKsQ6o>b+sW7al$#;G{!A)Af
zisaR?V4TSE(HU~IH&B}i7&VGfRExqx5ytSmF_WZjW{6nBGp;1*GxD1jz>ybE$ghPj
z>+GV{VA`B6;+huBZS(zyTkBTv0Xsi)C+Si+J>nYEIa`u{{!bR=%=+N;f6DaRju0%^
zT8n`;sgTy)W;tshnN%!oBpVAN$lk&C^$qXpAbnUKJXYC5>uLv>5g15}#t3)CEFL;q
zMnSHDI&tz6u#Lf^`R>(IS?>kjv}f%~V&459s?{zOG6<Hix_F4ABUIU-pU%vt^kqbS
zK{imM%iQSOmuX3vGkHAq6)al6lCaI!=-h^dRC-M{J-uC?HTxoXq=|&3>^3p1_!vnY
zEzC&a9}lua*AcJxZo~%;bD_JXMCh$K(M3~d<D*gDyxF#skYw_NEBic)Tl!g(Xl85U
z?9_ZA|GS??+Rnj6Ry%2|{waJCw44lhjY9d9VRCF$GkNT$N>8`_rEe94^U6wrevQaO
z|B68n)l@?x4>K5&bjF)PHX&!-82qnM0*!s<<HqvKAQC$o51QQJRytgz3BvAl6r)UE
z1c=b#x^0#pl64S8)-lO@Iquz;9B56I;ce!d;ta9<;8vYUou^rY+wz%wz=qFg>MILg
z6-A(!ID$Vvw2jM5xzGHqm<xyZo#9TOZ4+vcH&*GDXQ20~0Uw?GfSJ&Dlh*scV*JIB
zZZdyO7p{=TW8FdY++G><4S9lcyZg|T?P6x#TLlWXH|PqT$^4{dG3L~jF(@$R@!{HA
z7|ajQyoaM`(5=O^;K(F2-cv&7j8Mf3LXBS+dcex5Xg&_g*D=Y4-pJV7f?>xA+C89-
zqpbi>G>*m-qyNw`>Ej`-I)hG9GZehG8R(MS#Fcm_<E(!&C_$w7qfN)_lzeV7ZY2kq
zSj$T^B)XF>*1ClClh<Hmk2L<fl8uXWHe+9G5w)sP!B-LIN$2y^oZ4QYPGA0k@!Qr&
zKdjuwe8?AtpQ9ytr1LN~I1c{gzM%)ZkD`6K83sO0qX{e5G4gxD_?+39w4t?(jvJmz
z76t95^P+_F#3YF_R%fYXsym*GjG=GF%*E4{&4N`%nw5U?g7$em0b4g8TJuka8>N^E
zgZ4gbiQgSMWl0{C^Ol%&MIG*I9jCWDTw#IL2=tYd$A>zLF+2Pl`1=&0xZ75mlI)4j
zj3f6a>^8IA^CunR&moR3rMd3!=$4`$@@#e=RtvqIuNIToG_h8?$uJ2PwA?0Q@E4zC
zzNHJkw!(UTJ+VG84R43F!mh7bAeq?+4f*M?*fb7e_vgUme^qd(@*YgAu7Z;~i+Iz5
z&EVW{6HLe5gf{jcdH=!$Oizki8*SbKUSd7acT|$x7=8`gKpzyY)I-d@Ag<9dlYMnB
z0zL>F)^`TFa9Z^wT-+>-@_G}YU0c{ex1PwJ-7|uhK2rv&-FHCD<^ry_Z6>3OuEG|V
zad1Oh70%9`2clI!_-^$b{KLa%KzXeku1QY^-Sgcr{q6zyx8f1_M@)ekvWcwG*H@r3
zB2(bW6~ml&O%UFbhNt5zh1Wg<Spw6c#rXj28Bq(Z>l;8xei}QF)DI_0m3d6C122uW
zY|L?YJZ2;f*PiOa<a%qSaMFHWOa2R=+t><zk=10Cb0diSHUg#cT8J^<!n#;D^FiPX
z7K}6WD%HZ->J_kT5d&Hm3*qx%A*73{fT2t{zwT8$zft!DDl|<e8{7oO#V%>S`}}{@
z<!uvjo$Li$O3(8<`hJkI>-NwU{t^_t_S36-*5k{@HMF8%mUby9W4G=>y8rzep$DGJ
z1dA9_@nAhX)iMr~`a`Lr;(dDL$4zeYm)X#ImSB>s3_f<x!Q{$ySbuy3kC_WGG*gcm
zoT$nAhpI5Lxk+69F=gx$&Y6u*<XC^hL>k~U9>e34(0lkF;ilJfp^uet^#lo_E<Z$f
zWawi>o<FDSpG<j;x7^|>%W!bqIKITc+R9Sq2a#{@r<2tN;frMvUAIFWOS@VayWro%
z;Q2UA>FcK+&V7^)+@M9-9he;;$4#4I%sE9*#G+6K`h0%`B?J4pgWAdX{KEryoEHbz
zH?HBj>IiS}W(t=7R%e5<n_*RGI+*`RXI{4H;%uWEWar>)xW3yS*CyEEy&Dto(ilk~
zk+;F+zs;yVeJ8DW@QhwOsE<+`X3?IJm*MrH0E7u&xtf@<Y)H;L9BK8Di0-PPE<NrT
z)m{zZ3lEbII}V~>uQ5&SbA$cL4pt$WQuNHtv8?#U7j(#AHXU*j&h<a1_!&J*v1E4~
zR~fa9Ua^>fe-5=lZ&Wl+iG4{uiv@1?lKnXFc><1n&<ftM>iB6z5Uvb7!td6YfR~+j
z@CRada1)ILMxNO;$dA`T-9@YM8ai@4%xl`JbB2z&$D)GCTFlGK#}yUcywYtooRa2_
zGMV~d5B~Jj1#{HS3C8U%x2g0YWo+{h?iUljFzM=xvH1D~ViA`{|C~Pp<D5t0SoKlV
zV!aL;l_!&h=bPxVr!2QZ;|6v6wVx&rCBZ|zG-`j2!HrD|aiBYm+g4aa{bJ^#xY}Ou
za(M*ImJV|9_AWecxQ#z>SAk!aD&$&2qS&RvK6XgpN6chJSnui{%;8O7<Dy==!zl)R
zHr}P5*6FYb$xDf&*8yt$(;EMKa+EgjA{e+*fg3xS$D{Y=;+v{)JhWW!cv@!Dtrnu}
z<&<E2(^g2oYO2GT*p1wg)#Le1iqlX-V9<;>Qch|-CgHNOpXd;320mhWW{C&QNLax~
zK7Rcm*>&(VR~B^FYFk_!cVHP04^`s`sdR)zBlp3b`gM@`JJ0I05bU~eRv8W#)RC-A
zb>8$+7a6)B!yZW44CX`US(P6pFbCH`Ok*~2?U+IA6>UL0r4LqTI1p{c`CONaGiWTb
zr((}7n327+;IHRY*jv4v>ZEp);x+<C4Qbr=t*6PiZ}ts$mBWb5`0X&9+0Fdi<_=LG
ztI>GlEhhJ05;vL=gIkN;`H4R&n1(K6&TijWrrv*cedz0SlJ;>j{JH8V@NQCwvL~_3
zT(Y`;Q&0y@yQG2>)|6w3oj0_oDZ-7ZJ_Ia2!n8Z}@cz~*l6bHXF8bEc-~AE|?|hPI
z-Doj*;h+JB2IYih#dBoeniOyzJqm^;CxK1!FbQH6$-qlv=u~|vxSZOE&R7>F_LC#Y
z417SBm}wL3{P7_5Splw(K1Qr}9%HZV67sVDdP&NXOw#dTKXm^!A!qz6$)4@feB!AN
z;(OSGv=lw3XMzRC=#0lC|Lk`%KC+0S^F?~0RFr+c@lE~H$6fTOxgyy$+mf9B9!ng>
zF4DF`soZQ?72;+Sz>e%tC;3zQs8OyqCt51QUVY($GfW%E+511~+=z18CHaVqtD`ti
z;S|9}b$Xw*$1{=p$TZ1E%+iV+k`WmS<_f*slHa??mVG%^0r|G{LP;WhuXmP@^wEay
z${S=`%QCpy_k$KG+QX05cVw~u<a(3NEA-MC5!^H9vz1*(E1jk>MBmL&gf(P6oIP`r
zY_PN>)ulpSS-zKierQZ?%ol;Y?49^LE`?;qj)p6Q(u4p>JTFoNyB_)y-^u^cuX?he
zGx;EY#<U&M!<6ZcppDQM9!K(RD`@<qZNzh3J-OgDiM_SrB4crQIhDOC110}0q-rv^
zN!XKNX5|cbY8#M5<)ZY^dLW1_ZGTEXw~vD>g};~(16ls--@{g&Q`&L8lOj;<P_Py<
zYSL@vV8~dXy|MHz*=VFnq$Vm_{N2|@a{cSbR_kRPgw!+Bf+j-!U><(PZ01J20@L+C
zlq_6+oH+K%LdS|6UR*?m7M6?z`FrPh<;^;<c8UVKr@o2WK8$78iYXDz-COF{eN%zu
z(I@HMBqipg_XzgJ_pxv<dL}$;zlwRH6NuYBa~S)liIf>?k^-AG%-q>)i54RXY1h7y
zn;TYIz1P3SuAd}n?Rr-N`lBcEPRoag$CUdJyrhnZr*)I<+Ir;TiHWFGZb*;++fUht
z<}m8d7vdi-+VJL388wDtI`yhDD6K0YH;y+D3CX``FR&tJcPEiuWy^5&qG@pKT{32V
zp2ja<v5()o=AYmU9S3z6MEJF4F6?@vFI?_bb$Y%?7Cv74&e^LfkR!(q<3id0sA$P*
zLeFc%!kc9@a4>-Uwdp3I_62YkqUrFKY4F1;4_4NE7FhIbL+DN|Y}}bfMrJFpJ8!4b
z(Mu)K+-)*cJ!?lPRs_XOHQ>{&Cb-$Y2@lOs!VbCvTeh9!-5&2mnX&6=&e9mHt_a5*
zmlq@{dNfKb_ra*u+1M`@L#5_h(Z13Vc=X$Qx^d4iEeU;v2BBlH;+-S^Hgb~Sra8yD
zxDLTWR+?_i1j;suV@;tV-pCeu=jOF^mEsHfxNAKuZh1vyO~ugGwuat(dj$6Dnp!=)
zt3+>}TaA@Y()8Pp1pI9?gm2cFvO~vK;RX3xkTVdY2B9_lZ_{Xe@^L1PJ)=q&ru={b
zBNf`Bx|1GamvS{rd$>1sZmj&H7~JY`ol30@!<+->sL_)Z_$6qNCa65Yc|osXjMWKn
z?oz~%yeqsEE7LHs_ZE!nKg#!x;^EF6J=lIc3;r!rz{aR_D36?vF7g}Dp!EyYwhZ6{
z#;dZ|R?kJ%*%3Ia#gWc8+JOP87TEo!0}}3fpx&iSj9Gb)3mN$nB!0%S;Rmjx<egYr
zweul4_q~d0Pke?A>wBnU_ea`rWi_+ZLmJ;*G_mwc@28USUg)NMfGEG%$Ctb<<yJY_
z(^#2fT;>5|tiDA^!bvHd{#b|^>LiiGCmFQXY7*Md9u^!N22>~S7q$8)(Xe^EBE4vl
z41zZTbxxSD9yX$^_usMT8aRox55A&Y%4TN$+XnLD&n|kl;~+WJRf$DC%h|XZ1611B
z##G2$1`olD!6K4@3+;68JY!sY@Ev{Z=h#p&bthjbTS-5Bok)HbZK8W@&r<&@-b8Hi
z1rj97(VLgQGm0W<m*Y0c(*+BWZdQ2AIJxG-g<Vz5)O0z#_H{mbRq%|u(?{x;evDMR
zKIOL-33Xh71T++@;f1Yz#4ma)?=LX-IvYya9S?@c^mShRZ2PnLxh@mc)*gn2wMk4~
zFEZxhCoQYgg7JQ(C2!R&PZzsH)6t6JR6Np@$lg<>*<=IgX-7~|_X0XFQG;Fc*ATyp
zpJpvO`q|dSoAK*830ROjK%4yB1+G&U)BQ}?0fG{eH|G>}ws&aAR?i{}+VyDgcTKM3
zb1EEDDyIr_$8+~r?5Cr%3{ix2AQx<A;^K%C#Mo4otdts$zGLI){nB#&{O{$=gB9Ai
z`oag0pP<PtDpBJ3KpT=ZcL#q`Q<e8<ACG#!4shk`-cpa>)3~y+A`MB-JlW;iW#z3Y
z#ea!*MJwSB^>XYO+!DSH3Pz@LJ?f$Kp4v>bldh!27OJ@G)+IhmPn6BMD6m@>tf0AR
zY4rLY5%$N<6Wlhx5!C3h6nPg=!EXuwObgt*X=wT>X5IR4^xTF)dVBwFDkJU1<To^N
zSUm-2z0|}x_rJoX>%dkhd$P+M)5z$S>*RjMCN{p~JFVQ?zz2Sw!=AE=q9HNU_|3C5
zXz!s)-YNS%UGhMO(w4JiPjMY74lv<PKhq+vN|%YI#4TdHt(J7`XtGRy7%H6iQhd%0
z2|Vd+Kuiu_MbQ~g`ICPG`R&!|#COCM?(5<uEF5sBOK+;Prvu;9$+!2S|GxxkRhrB3
zIR$uSOcs=#Glb>*FLLWvA%;HCfQY74D0R9`E_=3-lzv@kt6fZVN2ox8u_tsZ*uW@(
zSNP_Zkf{%gg(r$f;r<#+xO^}NCgm*S{YM!>RkI<l*!Y;c_A>*lv;^O?y9dmCxfM3&
zL_<LEN|-9J*DTU+g7U1}^xo>vB)lshl4on+#nb<2(jG}@(%k?S54_-p(KLA8Bg(J!
z6=&x!vxVw|GqBt&861)n@zOCpP&dzlg??+Hv*Z_y-A}+;#-0f_j{s?B4d97AA1&<9
z@2Z;!hks2V>tq;c3r&OBmeuugg$%oQvkJ_O8Rpu`V<E!eCg~FP6rPd?Q1?y+{Wol;
z*1JzYbmt7#^K~`7)N7Ar6(hjG^ad&35CDG!#+9SmaY$!dNo=LSJpU;NtG?<&L9Pz@
zIc^zwb-)6;8?B*p+FkySZvu8E*TSDS$51dXLZ_k}zokO~T~2Axofg3mgzBL8)K73=
zMuK{o@cYze`b=^uj@q@3sb+tXyZ-7}zT5_rZbs6agQM|It_((00;kY&jHy}T!dPn=
z(>seS=$!`#U`9q4y*ghD2To=~%SQ%hs9E9J{X*8dIu3Fog&N2~12uOV5NQbp!}sLW
zSJTOKL4`Wqo8Zlq$LQhxkoQdfTqFEgGZRGx|H<>&52-=UC(gLfg1W~Gv!#c&)Oe9K
zoNV!e)jRYUE#nwSexZqDUYsP$8-j?k=zRPl^i5s|ti%g}%kbFGcjO9xt3GS~Gfrmt
zS}gM^rmIhWXVe#&5*=NFITMXh<U|{3-qufg|3=b7(RHwCj1PTr*Onf>;YmkFC{UkB
zo*sYJ3F|&hfZ)zd;`m)0bc7xFg#v@p>Dx&3vf991Z5CzP=LBP_%wMa(;Zrm^R|5A1
zxT3VvuKG8U2T;m!nbpt53i!LHoD7|apczezsjOEG`P^JXpUtqs&*!hwS-+FuVo^I4
zFAm_+Ci;;BU)<{>zQ2c=;T@djn9=ZhogAI_C=ouqldv-P(14VzY|ebcOj718gU9OC
zVQgG<ee<Ms5W7^I#0<EDC-uR2i5E=e$yO5m`!i;xibL5d1*izSNAeeVvPNmM(DBVQ
zlGJGfKCVsmN3Px>ZKtA0_4p4YX3Hv2jU7!IUrzyWl;J9b`7tv_nF|RMgE2vFuuaYx
zN}UI-w&a?@3Y%gs<Xk+s?==C5W3%Diyadj|Hi~8j>>xu2Co<30?kBCjVsIk9iG*8^
zfUL=*!AJi$$#7E!zc7wDBlCxN+&E1BT0bQxXO9APn+}*|k5sp)68yBI!Trf;a&glO
za$HV?y;yP>8lS#`JmGVhuha_JD~j;(;8S#o41s?o@yub#t<2vae@U08GSu#_z^7xg
z$(FW_q-=?Oy?_67nCg*Fj>ew@!v;IB>K+9N@uKX5H?lBuS0?@OQUvGj_5{0U(V*e<
zjZuvI4JjAx>ekr`z6hl(QoFUB`#UX+gbZ{OUtyp8&@?rg9hHuEx^!XogEgdN<1V`3
zzW`#^{Q|~$j|Rj0?c~GF*F-%+hfG{<MW1;o!bcNZvOU=f8vTl3V(k)GIdp?8ar%#0
zaLJEfWPgtPFkokOJ?$T<3I0SBUoU6&&98&|8;i(8A>-M#>J*(F!sBMLL*U8`Fq=Ld
z7G`9{MD?RQ{azJ9UP2@Z(GDei+7w*7cr$PG>oD1pyMS}C4yRXtG*H(bAcbqz(0fNs
zVK`KTA6Xy^Ww%aod!}g6qaTpm(%%P1<bkZ|O5+yUC6K^%1>{PIz@0Y9M<<&UqF;ZH
zhTRCGe)~d58sAT5TT8I&1J+>IJOf8v(<eS7w&AiL--)_y1<BJ%q``gS@b>8|tBg)@
z@GuO<kB{X@!O%&LY2z8omPxeZeHmR+`HgvW(-ng`7uY7RLl&+`g78mjn8#{k?7y>k
zCR&C#^Q925<N*CA>WatZ8_2F_zp>6@2CWQ}B^#TL;L6bJ<kHbc#Os6v+;9=$*GD`i
z$Df@dai<P2pZ9JAamTk**Zd=K>AOsfUoQjKwl`MdXD7kEv1N4DDp%54?f|QfMDYqI
zSCcj2Dwr_p0eRToCpd-_p!}OCi3`{Y+0L%ee?=b}i>qi!mn*nSvrP4sV7xtN4}`s(
z3K8krAXU7EYdaFnwdRe2H@`Hkj#XbIXD*x}^c})iYX`nvArO}B(1snKBZ;$H8tt{P
zff^eFh)FdE@x!{bAVwK1MHHyNQUj;Ad<_w^n24Zpo4IIYMU4F?3+!JHm^XbL$VVLm
zm-EXoIo6%pCCvh-^BN$3!Vlkny@Hor3}L~?VziZ$0Zo5l?sU)t7}?$Q*u!*UvdtJg
z!%x;v5>+I>hc>`&=RAyj$S`xPqe(^JCoXrv2FCGd4rA{hg_(N-!0zZ9@*tAJ&hPOs
z`|3BK=lW<js{uAsjZu!1Mk;WDj6Lj_3e|S(ba_X@dPfPFRA08K${Aivw*!BnHVIPd
zV#^!^UsjGdj68IO*zQPz+0>ZtxZwm7BuwFX*?3q|eE@C*1%v*m?=<V30tBUrf@uFG
zl=xBy-Xq)4d{Gj#{&a%ZWr;X;TQ6C-I|a0E`hY^jJirfeyyuGyD36#UFanXrP6(i5
z#+eJur2TN`WguGke4r!zYe39%7nC@TgH_YpN#api{*$XtgU786@bT9&eBl!c31;u1
zuiXr+zNf&$i)X+*zaQJ@UWSW{B=F1ABhVElI8$=xHhlhXsldKI4Og0?;r76Bh>eT@
zqYX`{aY2%=DT{%~sX_*FVK($DoAS9G`E1qNO#a-0F;p_sl{PB<MYF}v*^nzDe8H*B
zD7!Ha>K817f-~0OrM0V}ZlRntwg!OrIRcE}n@LE^7I;vO@T4z|*IyG2QMTsb%qv5O
zO(q_<+KY2u>;~_`A(F9an1r3HwURvM$=+RD0F44uCw*BSIj=DlrffE3@4B9_I(1pd
z|6Dhtu5%mfBO`MeF{4&e;@{6CzPm~s0`1YPb`r$SumZ^t6|%<rJ(<_O6!v<}hQC$Q
zi5)Y{)GV^%)rtZISD_|b_g^T{2-Amc_w(T_2_?%!C&Biu3*g|&S)_KaF}vFKt`)nf
zf@_<c4lj#%_!+B5nu-PQxBN8hO6{hDGX+M+j`6}y>jNS-bu~#!J`WyoJTIf8L&`0W
z5Q)ICP-7QkIk;C1uF9s9jTMQU&(~&%$=4(z+Pk<NGH2=!YYA>{=O1wQpBLDTB;<XC
z6s*ypOvu?sb)qLZ&<=>eEw)aORVyJlK9ICXr82Jylc7~64Pt0Nzfs8T$0&cXlG&)k
z7pk>Ee&f&jM{Oz)ZXiXjhE|c({ljp;HVZATU*fLC#WDtWYU*=NjACZW-C|bIUQ+Dq
z4OjjUt6jvMnNu?!tgGgtvL*+LlbxXb<w#cjP#pE1h(bTb0+t2Hp!HsNW`j~G`7o{t
z<*GjtR~-eq>c3JZcCZPvCsZ*C6IPLf9xa6H5g}0n(O}nLLdzexg2<D6;&Vv^Ch5As
zK(IGF>gXd{S2D>R%X(g4F#xhcxAB1^^hg<d0mF?cE=|411;>vfZSfO8`=~#6r_~mI
zDLB)b#K#s{?z#MV-UZ~+z3Q&rGzAl<sdRev4J!UUgUn3S0LS9dq(|Bf)<|jMrJ;B9
zPIWPPF1dkhSR*i$+GN;HPiv4X^`mbF%b8%V5{`F@LHEqfG=8DrCsdE;c8}k~Pw1X>
zdF+aI^0Vq38MqS3NuF-yCW|P-)?^#Jswxk5QN2vT#ZgeXpny&IP>t!{xkyxyjG3T7
zR7`fWhKhGc+Dm!X{eV1vU$c+4a>)Y2%|z%c9A!ht9mHQMrNpvp4x_x}CUe47$Y#$^
zBM<bqph1KR1SaK>H_xBosFmA5+b$Y6$SF3c2aJIF<DGc;%v0un44sKz4Q&{Pqtd25
zrM=XrA_~zq?=uMzNkS-;tyM)uA+)Dmi#92xR8+P|HSarzN{C2{NQ+P+ONEf~&3|xy
z=ggd$_qp%qy8c_Zl4XtyL&rL8GUYphS2}WYQJ>r3vg$Z#(*8t(Z(b(JQ@=pCsE({%
zS4!xvU&P|`Lt=HLpJ-h>LXt~Uh|zU7BDQ~!@D+xTAT4S7Cg-`C5C|nBmL}}%ecfc5
zR2C@>=_GeY1IW9J93NJ^f<*V7Cxt)mlH!HY#Bs}ddheAslBilij6+SK&F>IX^S*;5
z933H(tupkh@dbG2WD@zZ`~xvr#FD(+B2wok2E7Yf*}GpKl8HhYn*ZYnc{vyi?{!K@
zNLn)yb<Zbf_71T>((aL2I2=D2{7Kev9sVL0Mf%&_LQ=5r1@U{v5LJ;nBJ?hjcnWCJ
z6COtpUABiB|MMC)ZNEVhYBQm8-e1u4j3VXgnJBWP5{4%`$eqR;<l)6S_D1_-Xi^I%
zdR)I<$+nS<tI%xy@$cC9;ANuAq0-KsOeFVe-w*@cd*rF`W$@gWP9hqUh*kO<vg8ZL
zd8kcB5BE#J%-_$6tj=vV_^B0UxqB=o=>c(?ug+S;&c*Ia5=eIOd=OfW!0Odw^rrP7
z=gr&<D|OVU(6~)7vGWTuJE?#(8tWOuaSHKrpVrK;sN%we=V-Ot45aBejGoKcgCN(>
z*=={0WeeX^Q>UAW(;gK#<Tgs*P9VfW$pGX#zaitzz34y!x9j42fQ|E;5AkQSDT#td
z#A96!5!&C2jxOfjljrP1!kmM8&A(~De^U%@N6&*r2e{tb@Bi2%_xac>*L5MyHU`VJ
z8-vwsUz&e2AN+o421^6pp(kd|#Na=EnDJi`$Ao=c<H+?JZ|qKGYTHdIty}}J*HnX9
zTYoVp6P56@8)spI%Pv-CuLiy2!W!7^GHPbw<c`xhZsh6cI<s3%9D{AE8*^Ds8+N+h
z0kg3xh#S+z8n08(S7l+CHm{qqS-zD1AWyMXPlQ0}6CajZp$ke2X5pJjE8#hwPmWZq
z0qc-YjH>fZh)j4)ZY(hb<CTAr;i3$*ep(2376`+Ukt_(Z^FYLV7Aw3u166$YL)V(3
z*cEe&koWdH95A<onvsyhw);!tn%c9BI$t^p^Wt*O#i8WSja|sMltucw7paCE0pLx|
zf_W{NmRxw7(GZ-A{nhi}se%()VeuVgjm}|ZvkSzF>m9}F$5BIe?PikK&N8mimyu4D
zG)OAY<|76bl;)dl#98zvWW+2q?^HHoJ_NTQlaLGO@~9jW`jE?a|LaEILQSyKL@}^J
zJL$Tev(e^ESMcdwz;)K=)3=8PsrOC4DVb+uOof;u`Q&g6ou2&>J>1Hm_X5{YwImO0
zMmZOy0oOAhx{8#4i6FJ<`IKGT98wQ+=(xLIaFVDFjBWO%v^myEf%O8G%?shJl<a}`
zgDz0$JOid5lp_P}zi6Z3NL;wd1lrkOsJws=S7lG9^H-+R8Lw3#F-#qNH(OCJ_TNG}
z_IlvhU(E)->p>~LcTnPwPIBmH7uo8hi3P<P!Adn6&2T71d5e^gm&F7jSc-aLX-IGP
zti-K%Uzrz-o+f>3j-X$Qo>2))RzP}hA1(Io7A3ltdu}+NfaFUD(c}?5`gB%4vbOdD
z-a8dAcoG3=zx_c|sgO+DGmfMWxq+LT3%R#01onxYVfO|cMVF=A*fNd<eQerJ)X#Zx
zSDx~Oe2%Lwaq>0$@ysGvI$#1X2OhE?<~|@@6^mh>V<K!TSVL~-kK&R&qv)^22r8(!
zjU@Z4(EhyX7Efl|^BlCJK>LO>{-v=S>S7Y$mhcbs>EUtsTqTTnKWFnaKEFZwJ0GIj
z4RetEzhT7pSA^E_*N1s_XCc1ZmYZWEknxd$@W!>odR_>tx_m$FY7bc5(Fy|A9pfE6
zF3YouTTSG?J)%Xu+)(3;TeNtKl?7X$MhA}?61R}~C`M5e=5#uOVW9+l^-(&!YK#Dv
z2xT<VX9RLwhv~xR5@cBUgh;x}fIeGEDg2%w2RL`&zrX-^0QT%x&M_RhT8PKInocyI
zUZC4HY=*Lc@8%EYo<tiwonbhx4z#ilkPDLvWQM&cIr_VtXh(57ZD@co4SJ1jb9XU+
zB_+risc<6Od7H`m9D%<Ec@Z5wWopISpV-omk7MX{<AHroSiHamr#+oPi>~n_Z~H%?
zPux4!yq(o}N-dnRi7_WFA?NUM<|-7*t{^LRE+S`qrI;plPa=?i1$!^<VV+VWjPC4>
zXnk7%S*PDr8~$ZG(J5O;23+gdz#mmS5Bu%JNbe{(uCBu0Cbp96;j2i=v4L8p!6nSx
z<X~2>CK_ihQ9^Srx8W!IbjZo2AMmgFN+eBMjzs%!Ahm~^nK=Cg<QLyO^v3Eg7zxXw
z%B?w2oNtF~edR#NaSN!F=ddxlA*`;I7+RTohOPN$Oj4iUM;EVH!}V$RvD8CXJSRRD
zn+gu%OQ8?Re?m%Rh}uaC^n!@B+d{I}B@e$*)}p^9EoYup&mw=exWcFL%j}5LF!jps
z8BudxZFVL(m<}^^MS_2~fL8euG8ibrn%GsN19J+PLhV(w{hi<VpII4hO}4^<8xPWU
zwV6bI#~ISa`S8S6Eh8rj)uH{{K_Xqhn#kt*;`b#xi2kK4GOqLttWCaP-E3PD>$8^#
z`WE861>4yjf|0~uPlA<mumFF<Vl4MelgPxlk-F2a_;w`&?q61sTfP3|=qm{}Fm4UW
zzjKBJ`nZ#T1`$v(o(`24PLXBz`?0V6Anr{_C3OmM>{>@@-a+A1d}Ea&oak^Qb5!?W
z*@tPw;GG*Dl)j3m*2jZx%r&h2EdvcJ<zmUIZDiYtUNRsuiU;n+5F48{glD>zhz#YB
z<}KgQWg9y@u)GjX1=zraw;^POnLg+RY#|e&No>)9I9BUe88kPzk~LMWNIKe<9Dm1o
z304=9$hZ5*d1@vR;Y}w?K$0F>RX}3BB}iDnC~h)v!eW8K+_k93-i)%xTlhoB!>@+y
z=<-%FZfyiz){|7=6JxS6@gkczB0~LGR!VC23z5?42brMYM|k-CMzT8X4cV7>7fm)R
zkiZSkaE-Sz?@4A5T9{mh=BP#D&>aLV&6PkKIX}h$M**z!;WJ{mj)3*D8N@cklFrM_
zL+&I4yQ#m2?_sStyW=}`{#FMiYkk2yNSA@iOIOg}<r`7_7w_5`@4w?NMIk!nMicJS
ze2R3<RCpU##!=;~%JI`5{4n!JCS|fo4=tPM;CjsqvCZ~-SZaAEl3kn%-JudZmA&am
zkK2n8skw=Dj_}hzBTr#<=RWg+l3YBKd(T$dRe<EpIKJw#<47^57Ol6?<i#$T2H9g;
z`1<AoJZvz*JebL#saQn1R-9%(#@A#2P(dVeF^#gx{YH|%YtlN0&eF4&+p~xGr77`(
zx5Ve3J8hD)hTih|Ds`cb>yKrKkv%g6$U|cXR=UEGRD0cG)S{}GZ1r!r?T<G;qj&@*
zF76`Vl*gHPbrrNT*2Y}STY)$C<TM!bNTSOhJuolXKTNNi+=sq2JtjvsEJXCI?OYH2
zDsoCZgs+?xVAuB5A%p89++4~N&;M@9)BN=mw_bmQYi9D{KdM92-YQE7O%)^VYpT$V
z|9Y|M-Xh$+Y#CZ_Do4NWD?!)21;|SN2W-o)!$`|xFC=Wns12`Yzw)`$lXKJ2O#2PE
zZHXmrnC{0!c6yP}2d~k+aB*_h*nqXrse`{4r@_yGD;R87p^|K2vS<BIe5$4vDH-SE
z%UqUNEp?cr?K0;n3%KE?<44$viyQF4t`u5ZS`!Czy)WioH?zr0n&<OJhBW^;hbor;
z#S#s+$fh}$Uh{Jpg&6TrwTm!$5dMdnzhx=1(THIem0W@gg7Gl@CXef~h@cfKop@`$
z@Tl(Uef0iPJ>JRbG7$Qy0VFbp*uc!2tcQyi@)>W$ijNfVJFU~^2V`pLt`lczvFvkr
zlgJa4@@pIS9bQ3Sj8CGO^<Qc2S*l3v!x-kZ7SP^rW!e0&4EoQrXj=cyM6G?|8d^Jl
z)O>B-TiV8-heWlnv!b6)Vt?Iks{4KkjOrCb>3tjE_3j1(?VIf5y#esa>m0aO%>=(O
zMd;sGNsc@`h}Qf{h0N1hoKsc_Odq+xvGcZ&^K>3OjmU<k`yuSTFE=@Ev?A}N%uP65
z!f^y-)abRl;vmaw4lK*8Kmyu1z;=6+q`@kf-(w6mUv|(TqH*v?w+e+%rJ>JlO61Pl
zAILZAB`R8Qk@Ks|!^=5Y;Gp9OX<m`^i^TJ=W$-_&7`7cYhAHu`b#H)8T=v2{?*Xz_
zGIX6@9(>>Yhq+Oa4~8qBpgju{NZONP7?A#ncwgn<uc$llZM_Y)&-uWla|bkUYX|Xl
z`lPTi6pD6?;j-!C?3npoR(fq2c^2r4?mC~P=QOs0Yvyf;v6sYpc^<G~s1W!))nLc!
z>Ga+czN92m4JKC>z>Kw~FlyP2Ru|>L_K%)KMx`H3#%e&>YcD7(G^GU_#=x)q0r~l|
z65`rK*f#F{PhrMlHZCk2b_<!YcI79K<){5n^4pHJ-xdldO0po=t^jUaDW)lt3eZe>
z0a7}n(6a3p$H4;jPC3UjvdjUgzO#@JS`B+j2f*N-HOsqt0>&NtK*-OXEPmGs4cLJ`
z+It5~Tr9|^mIGv+gA;^Fd4tsMA^5et3Px6B(Pv*2!1BUAfWI2d_m9zFMQ4C@&MD9`
zO((YxXQR6|-7rJC2fl552z}3nz&ouMQ0E)+!l?qplvGGlz!Ok3yH6b67r{yK<J{io
zMeu6QW>rt7L4kM%+F4W&_U6xFyKfO0o@jx?UpZHNdpKN*_yiTpf5LFB9$xujKYcFv
zK8p=&h=dmlvRgChMJ@+u^&jV0)wub*c{_HJ^>{V$pLd?U7%)ihEX*Rabp7CGR|>2P
zdJJ1d{;~@;NwbI7zW}uC7;HVC2zj$Tq4@Y6ko5Gm=y%9~|AK?yL-lmtLX#>ot5J#9
zGt@$4T0g_X@?28F_@LVQac)oN9UM+oCfe6}7=@=bu&K2NP1Ih&88@bpjec9O1k&ct
z?2Ad=<HtC_BM$M}5ZWQR8{7mm(M3O1vZGLfL`p0n-3!vu>b}*u$aF6@jvc}&GgR1Z
z!ke&Kg&QbtNoM#I?y|}~Vi2-jfG8`h#od7$QG-PT_VEvblTW9i$Sn&|+gJs@Uity8
za=V2G^!o8BOHoFn?UlL3`A6vI7BwQ$?|~i-Hsj~kK+-F&GW%ro@U%k_$WT3++B#~0
z4)f<z9j;>NOFh?TbiYr%i8DuXgMy^*>0`XgX^;8Haa}rVMi-NE#2??hFG8&Nzv0V$
za>P1D4ZPL+ag?VIY;6_-SCd{`KySc7b}`Vb?7@8TbfPvpWmEW^1S=Hq5;@-;fIl2Z
z?D4EZB&1M_H@rTJZS5|RYmHanX61Z5Z@m^t)zKyWheU{<gC2?TH3mVE+xTN|KB_n>
z&l-H!gYC>Hnd1f!OQP_jt8Yn!Enzxk__6SeFZkr&Ur1tq5UTlFQf(<*#*$VisQzLM
zFWgN)mfP=h%Qb<NJOe11Xh3h+EyzW$5&vs(0o(r`BJHn2NMceRT?k8tA6sO>qdoyt
z;%s2q+)9>C@;BQuG6ZujF9Ao@GWyDu>qLFG4~)B6!_br&yw7xpr542~F2V|C2W!C>
z&fTHfo=3ixJwkgf`Xa0R3+PV8GiH;mChWK@fNbTtzSLh2^m>y3jc2bw?v^TWVdWf{
z(RCi_+)@P33>9Sd$qgw-wNS-JW$>PqU1*VDG>$-r;PS{e_{nfQls_?W^i(9;l-PiF
zr>ufq-!C9LtuOdyq6i$C^@`Lt_<@htG?;F1lyt>b(}#*T)2|FGz@3lt^JxD@W@nw)
zIWc`O79<DLX6e8!V-xCS+gcF#ev90@d7cEC^rLqNhS0{}-%*V<9~8`5ih_@xCGXWU
zsoBH6F#S#dIx&4Zn|E*@+0SnY$2Fr+^sbpai?CO?rQj$Ip5_Ew<W~`?s2`M@z!7+&
zZclp@0J>~z1VzWhVaSmIp$;v0zh?wVKa>H#3%j5r@G!pjZ#mE^X~>exu%o{RB4sBb
zxL8>Nl081)WYuooTpCVWCMBcQhDXS&;bt_;xQi_m-UrPKnotmJkBk%cQw3b!PECZt
zrtEXnI<dK+Il6(Yww_OaH@uD7J(R&zu(@^y*DE_FJ|7+|*a%PFr892ozVKyWEvxO4
zNLGJoK}s26=tS2JM&D}yX(x-JU^tB8C<T~*m*WcRheH2C0l0mKhDTc~nHi!Aq_RZ_
z9anWhH&%yJ|Dh9TV}%rRKIkW!+u%<OH0pp)zYcvIqq#gOAJ;Z&(x=^MR+TP>74gZa
zSx5t3UE*d#ZT#%vm<~qMWI3}PS-@54#o%{bmDXJ<M{dyf7$MJ(Opv_*Oso!O_Ak)^
z8|SZV?Yc(v;r@DNNAOx$H?F{DqIq!I%!?IW1L$4WEZV*#8yhasAdXh;==_FFtcA=!
zv}sfTj2=$0Hlo{6g2Yu!S*;?4ZK`a0n*fOjc+H&C|B0XW^fUg=`M_WL1dCoaLKZI`
zGd-Rs(1#bA<Xi9#HlW81eNGml*2U$p!=85N!uv#?UmnMwiAW<ClF!1^Iz_ViVicEq
z&BMZebMT4IK+^j#kh*v2EDk(r249#S2(bK*2!yVpcpEdpePINpoAv<v6>8zuP(8D0
zQX!1B&;X$x&!Zc6=#uTpk`Uw+fkn<r(21FINK^_xdbnJTd@)%;Um4QE8mrBy_;Gv8
z5>INL$7M*cRjFO+JAtcz7GSkS3S7S0$*5#*hsY@nUi=L^D2uiR^@uA>NJ1(|ss19z
zn(rXbA_;cW=b4}z{DajA<<Xl~%Fr)%&4nE!FY(Q}vPA!546b>tN^gx%r;km%#y*o5
z@r89+_*vUp;#wDo8Y<tT28XLSsm>Ca@TF2qY^PbQ$d03bpNfH*PLhm<&TcxmxgAMP
zRS@RhZ(N}mM+enf;HcU_>^ED69Q1TTo=BNeNQlP2mShtgzZ@sN{fBct7UQ2v_GqT)
z1}u8>7xw-fhMWB@QObIGh~hX$m5Vf(7Dst_alMDR^}d`QZ@G;0>QAtGW)4X2!W@fP
zruT?T=scVw(Tez;bMYryAAd>skIhcO*neRqS~as6iyzIvdVijwV>aS2w&ny%sjbDw
z%39DH=WJv>@)gh6+lR&-+PV8VmeoGzfcl9(BsJYdp$(77-+mY5SapXgl)HlR(v^u&
ztxrwRhfL;JoD|GEavg09Nnw3Ce)wk2Nz|`g!}QVbsqj0E=zPH(h|u{91p>h&WWpQX
zW!0j093$OOQ;U`Pe3>LU@X=P&ZX)N%5o}epi<a41!aQDm9VvTcVMi<o#e=d?*b<AQ
zAD1ANKWflq+*Etpw*X)N_J_%;I**Ii$*^CAgy37r2zt&h1xe0XRB`!XI#5oHKH?<`
zPuiTx#Y%rHWfDNoysU_PQxe$E<!QKNsVXD%Y$yIDNuje{x<oiwhgi1$hRUm5Ow`q7
zsLOmcI##ra82N~>iwDo*3VA*9VG0p}#Cy0vB@rjaI)J=RHnQJZYHn4m#NOk2<EH<e
z#}39rl$mi7d(%Ra-Yt=DT4FE_S=}$fDFVri?@7+l!Ba$9>n8BMwc03f&u>b?C;)G}
zBu!#tuENyrFIf409!?nwK|-x2M6tY+V>{~6xr)bOp~WopMbjFD=Vs!PrgbQPOp-K9
zyrTA4?WWq#=HT(4^N7ZV8ob_c2B8KMDd#uN>{eEToR+jB_f_T-9qrY%pF72o`SV(E
z7M#GzEj?II>^0Je48vcKV_2>e%JM1kk=tt5AxJ#~whl#+u-B!mX+tShI`RSBb)Vv=
zQp?Dmm?4}xM~0T%b(gqJPei75f!OJy6S~^&PiemrCOyI{NPmhV*|o%x@Fir^uC}gZ
zThwW^@xW?0X{1hS8>B!YYYe|h3&+WdL-^dlTH<1ANZA<clYU+Rq<p)D=g&OB$jqHb
zLTty`yK%?Jhc0ux_Ma?X7Pf*sJ9z+3{PnF}H=;tKH-5&CZp4!873p{nC&HWhk%;3%
z|KadOH3Xg=V}$>nWwY(X$uIPXsB0$Cwg0w|_?9x}z5GGktG<SW`VSMA8<Y5<=wn>M
zGhh!dk%o)=L&?T<Mr2z~1oW}5(a4TCg6}J1$+zmHIcf}#Dvluyd0jeb_B@W=L0I4N
z5F!<+!82Gij33#VldJDnkgRxr;H~|NI~WJ-eXIoEO@7K`N^7%acard(L_N}Kc7izJ
zXkr-VfUAbSup&D}dB$DJbZG1qqHAYE%F`ryrcyw6A2%R&u{CVobrqI>6~_>9y2Vy{
z>yUlMLhJ<ZdaaAmG*Vl3ruK(QH7Pn7OUr9)BeU!r7`{nu?BDN8_*H_5@bxARW}S!U
zEa}5%tvJ@xrMLK1_f?|0JqjnqHsD`foowH&7u0qQbzZjm5~AO0OBc>thb5A>GCPN#
zlP?ncKvjG%RW_mzG5lQZ{^J!!Z*vGTvG;=GInS9LJU-mBY%$*Z@)H`{MnjU47+m-|
z58}7ns#V+&$XqVl3AbXzkfFUk@O{1x%O9&?Y3YsZyRZM@y3_nHbS)owD&I6;9oK*s
zWvP=*Oa>Lm$7NB|f1@$(9^2ilhT=mK(Q_v!+!qvW-fjFHskppF`&ODj>`EVWygnPO
zr@upMf^V4VD=op>#zs-cEETf8>NzFS^QHFpwYfMv!UJ#HK9h03B!iL;i$LJ7&49Lb
zFmoK2gT_!P^|WmP9HCMuZ`u<b%f5ymXIjE1z8Rp^W`UmVPD8uX1W3Ut7q}*Rj>+8^
zi~}O9L0eab7I)c>w(W_5EUr`1V(Jg-tr2LW3XQIE^IM40gu-*Spu&HnW|DKfi{%fa
zR!WTX*Cdmseipf?hNFyiUPL)43VL1cV(}C9B=`3tG*?7}5a)D!=Uga}=)8>XP(PUg
zmn!rt+=V$@ZNfG<DItENaoiW)i^~g+vk9A5)J6}?MRn77`1HatH1<}O5j=DsSyVk|
z*4&?s-#bR2NJ$kUr8LYWiy300f@IXY@_Mbz>LkXibr19{c4L-(?K0bFX+m4LbB@bS
zc{HkehVq-$jWhU;6YT?;=%o+G#n2XFYg<Ja@vU*Sxz`>bd3PN%jkyK2&i*BsdT7mD
ztB<H@<X1o{hV>9V_=;6Xdv2cb9GQwAWAN+8h0I8?I6(>bk^B8<tTeuk&REuf-3BH(
zFS{tI;uqjN!t?2~x7VV+@Q+C9RVC8E!OXr(Gl}UhTUblmv&b$1)c9sYPc{#)FgItH
z+uuS*cdTdqi<(HpIOhX+VFYQ5vboGrf!(d)f;D^7nH4!IxYh3xg(NrP2M=5D$e&x(
zt_FX6q`Cyvy>dq4L9OOxrw_8H56cqaO<w5L>lt)&;VtsgSr+_ab0Kk27<!!TL9bc;
z0(q`V!GhN@-X$kUhs3(!6)sgZn@38>9<M!AdHi(FIn~CPxK>bZ|59+r`>#xpv^?tb
z6(ReVn38enIAr}P1^4wIL-&qfM3+Y1BXNz(Fvq8uIyhd0KJaU>KRG{L%!$oBmy}k_
za^2##eJ{YzfV*Pa-SCAg?zlN>1KMxy3et~WGE%G_u~9fjUT^z`HXOf%x`h`L+vKC5
z?`(nOyCsQar6Zaw(k4;+#F;do8|2=+I5x5B2cDZ31)GKo%>^DOvkiSxBzt)Y6=Pq?
z@JpAmOE_2J2TvC7j^BdABaJC_o*pdul!VXqm7;*jF07CxP0n=QL6?1-(T<uNl#nG(
zuQ&RH!dvz50&gc)?$&Q?CXmeSX)U6KmY%^;yZ@3?sv%_C#o6p7-KXZBnQC}eb`|sB
zVm3<ddPQ!xN6_b9E8#UycbhkHXXMy}c_^0uB{jpL4DYn~fyJy#(EarH%-=hkd4C!{
zqw>AlbilSDbIJKuY_(;T`5|{@I&bd%+R@Y;;A6N^Z;TtB@%9nY{>v31j%hOD8f_>b
zz7G!l7Y3qfg79Wy2TUkLf#n@9_OM$Q+Q1%x8He~$KezwXWE2dR*2743)lBsLh#Kcl
zEn#<@UIx~u(&>tW(NJHo7t4mPqyE~QL@B*nD4SFFDUZ0-u&gGZ?Wl@|j~vr6>SQ^&
zGP8)=sW=WNbWg(eee03b%2H(h?;)DW8$rRZEn$DsK_pwN$DZp6C13S4;ecy0RtR{A
z-ag49FH4s|(Vblozh*52h6}Q4UACZ~u>zlM)Q5wA&!d0EPl?V*D5xxmhBaD7u=eI6
zwC>MJuv&48_#15F_WEDJupH;j)jJ4~q{WV<s)6XtHL#%2kFAc+fRKqYV(_h+q#CS%
z`ENsDMUNkhH0LAR@^R|;WFfoy(;S-rR4J9gb()(#CQ_!%QrH;ViAwSh!S3i-NDrHi
z47<d!(Z~%lr1}`{aDByp3#X`&jmKHLl}k~^`Em5EWC>jN+X5CL#;9p#3^3x=M0MB(
zNz6S*jR-s9+*N(#R%tAjdiIQ|D-(gNJ#96E(luo1!#&6}`U+b5M*?8yW$N$fCd?<;
zPFhQkq2%06@S9`BD!w?1HjOE9^ZAb`!v8KEP~(cSqtlS2R=@eza7`|wziXCT2CU-o
z&3IXG0tyK~12w73>GRSV=(eT;tZSD=8mgCS?b797-{W&g*Y6g7Rlfu+T@b}isqwQv
zVk0pQ9Y#yNC|LOBAGjK*quN`am_)$|^a(A3w(E;A4|1tP%csNP?baaAb0zBjYf;tE
zM%3RiV1904GUc#Yzq-d}JNG=~+|xhi(lxdO<*f>5W@<T->8VM?zG)HtPK%Fz-tvli
zkhGgVGqWDg8sPTT{~bb<Wqs}WfflA@u?`G#y&yy7H;}PF7_`OpA;OCf)P6035z8C+
z!#_=MmgAvomz06;1A}`czd+NA4P=YE7)VF~lF;2kw@aTz#rvgM&1ZH@ZSp4c#aWQN
zlRVFC=@X>$NIU#J=?J2MsSqr3p5F6^2X>m3a7DWS0w1|TO?p0U@!1<>2b<vKkE3wq
ziW_KU7DHv)0Ll2#2#*ikhQA&|sI>nfNEY(Z@0#sET27S=7RmGUEnmaiu9uK;dn+`n
zKLRV@7<21bh*9G_hCXsU^ZX8&6R;HAd7AM4r3G=h#^RXdXzq-A6n<XwgzCOx@J}s-
z_|Qpccv25b7D$q;dp7W;;|H1bcO8W8se_zOZ_rp(LO<YTfXl;|=7v^hz}$pztd8gK
zcJmGB|B?V#e_e%n>d)cxt!;2M#evA{t%S=P=d(3;R-$E`2R`nH9PJw11SDb?tD$m~
zE$=u+{Z=xi&wMN-!^4SCy!9p1#(9)SWWvFy@fJLt8;R{7@X^Z#+~L!|VDRRpg2DAy
zuom5b&)P>xiBuizm5Kn7*#$&eb_xvMhtNw44?uBEIqbc1k2R5xN9w-uaAkG}JWD=8
zKR|nN(fS%N$x1cP@A*unTBt$C%T4IKz$G@Ypn%bmdy4+dS7b~c)iIU_e}W$#Mv1|~
z5K(={{8#@>oX|#Nx1+Hj_9Gn)Yn|cdPan8Fp;F{77|X{0RDsvBdr?097fnz~=wpf;
zxU0?P9CiDt8(bD@_;3ilA2dM?Y6r=kM}DBHVFjt@pCbj44>)t%V=6aS3?8KO!Fc34
zXpOXmEky?@J<C$?HEl%K)}-PGFV$dVX(Y6mrBWvAjA0Gdg=b}^tiXFu$R2!;Gf#WL
zpK_phg0^gYl`Sew(L#EQ2GO&fT=uxVhq*kqgz|c(!uzzGbK~u9WJE7n!y^MZc2{Ho
zGziynxvd4bXnQk#<(pWWxf|h6+5ohxYf}-+?lb(!Eb^#LMBPi$pkHDH@7bUQ7VpGh
zGW%^!IyZ~Wm~w__2d*dCpG#>*&V%Wv1!=K{D&nno6*L8BvE7w(nXTa&tdv+iX1-2<
z)HY|>{*wi{lgc<ypckDQR%VxFhJozPX<P^9BI=9PCEwn*pb%AYbW0=$vVVI(#xFgh
z@Mi{ex*vk()trO=wh@^6h{2`;84?iq6m`uKLUWcz&>c_0A@-RcYxJy*oA(*B&S4lP
zX90}I4p3K4>2i!*8mjkg25XU3a6Ho+F2yL3sVE=zhHofp`lAD9792oCwF{{li6L5y
zg844{;M4CGYQvo)^r;nU>`aX_@Wf9Xg_R1y>nq}9NxB~#5BZ3Kmqx&*Q#nL?(jR2b
zX5)2ct{`zclw&eJM;Ql}gUqlyH2&9uW_s(w^Y@G4N^B2WAWpMsGh(?+q7IKca@|<f
z15jjenC<+j4!SCzz+^ZUtyz8rWk&*?<eWrXcV?nKYnD36?KY9%&7c~04E&#7q7R!p
zLub1w=oK$OOGDqIYwZ`&(V+}-Nihz+4Nyi=9|yp@r2?#X?q!d3e?~n8_bD00DB8#T
z4bt4B&o<vX3A?!&HQ(K_TDy~$(7C*yF>2F;14mULV|Os@<lTlbOMB@5VF3qU1|jD_
zMM&R@(Ruj}^t{j;X=_Ep`L%D+7yo&Hzf6Ixc09Pq$3n3AfAr27TcDu2kbSkH5>0-6
zj}Gmxr|5Z3Fy5dBRsYgp-Q!{OshG>!W1pe?A$u?uN<;BIA8`B25YW8l4RUT<3Dvs?
zZ{&7%)=a2!{0CJym}&?Ywo8EFPbE59d?q6Px+G-#40zFB2Iey}(Apg?aK<7NW%otl
zkNeuthmF4}S!M>*ACH3m+tI{rb|DN$uHp6uQX$oOKcL^8$hS)z-btmiZ)eGqht)^O
zuYoW)e0?u;Hu|txqc6!tT}L`4IG9vy7GynTZ^A-jj>AVq;%PFq=v7e+_>PMMofUyU
ztr#IZj;$oYF~vUguL7n1WN0uv0>ba#01;RXP1BuW^;A50#51Cavl~kJX+sV)r@+eZ
zL!fS-io#at((?CB;L=hp+HL_tZx87~+|fm}_vkM8{%-}hcl#MFQ&<a6`MFua2UU)r
zwhN+zClUJhhxry22k&Gu;QhbnXz%fDP%hez?rCiyH+5X$_@eW0a<c|}$gD-K4!P{3
z(k#f2y^r^A)Fk63(a`ra6e_znLAL55j)~Nb?%JM$l*;LFK_M8$pDV)Z^)J!%T@=i`
zwTko^0xs~p1k2e?sIu)3O8*uOd8Qj6*VqZ79z92w>WvstxyvA8&Vn_Y1K$%@LhSSy
z@YIVTR24=8yTg%h&^46pybzRMufvGT%LH|L(SWWi7)m`t-Oe%a^kD&G`CuVQHw%R@
z$C>7NR|Mc96G>vUj*!vS{B-YIB`l;Xgqoh;AUXeZiQJ_O;_Kke#3jT+jc^{UKOzfB
z@js9z_ggJn&*i9Qr(slz`wS+z@KJ6(q}X=BzX?0o`*AVwrPD;PDFBjrCg5LW4bIMa
zFhfF{gzsyp?GrwPW54}F|1<zQiiv^6w5=SE?<_eHCrjFTRq@|>+<wrY0+)phGB)>@
z;nms#l)rokuAQAkM#uV@eP2$JXqRPVp;sW0RC|gS78IcY!QHr_Lz;+rPrx$u=XgTB
z6QldaWQogGwm@z=?Rl~uUyIQt4K_pgvsn;9Sc2_)znsWRFCkTivT(cT57{Jfig+q<
zojc(z%)2#1%#8i}Z~@hV|K{>)5AGGk^M0LR(_crzt*iw6b=MNq+9*l7b5&XUb)h(5
z6+byT7*4hoegucP3HU;(6SICzBTn$8uys!c?D%C@Yjkiqx$NGB<vl;ujOm(_?KkX*
z`QrkpmX2fu-1h-)T0teC!#MM5Ed7hiIAeV_(4s}$h4J4_GGpHY_I7&%s}s8b7U_DE
ziltm;Gklnwo3)30-!{VRpg!Y07W`yGwG!De)`kO{LvX>rBlOP}8<JveNX|a*rm<BI
z{(2;lsNCv>D=QpuqJaf_Lq~+ryB5IYx~JGXzLFg^;o+l7KKRf1F7^R01`?NPz>ClZ
zII}jCHkM6>{V%UTSVunC;%ks{TNUWt6@Wj~g55C(=-9jn)Wx2`_cPx?@z{A-+CK+G
zx);#lZ82O<>IrVAd9Z#gk-kC&Gd+i&L6FG>XcblgW$7@W>nFkf=Q+^x;IcLAncS2i
zljL&ur%><;2>%xf-}lJil<WqWb*3IppO=T#VRc-W&4ku$eE}Z&$KczLI$d*PCal<T
z5qXI|0wuo!Zr>#W{=IKP*18D3z7Qd0HjhDg@HY5}CDPYNEMc*^2att15RvB(+>Q{m
zO_aik@OWx_OA?6{l%_S7T7$;KT_)7Zf<AmxmELKyjoWum0PEs#(0MSE-ueAEu<LGv
za=RV$)}4e~9d$4s*AG|K13}jD3B)VR1QB^x*wQ+JmPwui$qoNNpuP&k4>7RRXgMu;
z&6suEa0n8rwQ1hsdidCsMbzXOG-B!v*%sH}bmS(M@0woqS(8m57o0;3UvM43fM2yw
z8ZFVHT4VZ7YBX{>c?x$4{y>I)3vturUg92}g(A{EQ5Req@d5w0sC0t~s83&tpDepa
zLB)Giwpg9E7t6%<2cCiF>}habc#5npoMQBxa~X$Mf{^)25Hb2!DHDq)B-MfQm-0v9
zI^#2Bljt+ZJ+Fb*TWX=d*8(VAjf2?edJ|4f<KbpM7c@Hb7D-X=U^=RAer$R>qt|;8
z-G38L8ThoBpPR}@;#Xzh&6og$ehI^KCl909($|sXz6v~S?TIHGuaXL%672eFHCiGz
zigjYI)SjETit}cM!=v^VMD5wb{nvX!RB#M))u@Ox_Rqp1bPw~2>mIb`gu>}r7Qojc
z4PKn{)yG~Q*?R@n*tI1vlBOj%bJaL((GN%2MzXN}+I4()$9(4bD43`7JEGUNKiNxX
zhp8m%Pnavc(oMRy<m|W>{3oIU0+psrWRyDl@udt3;*^pNVHFH@aRAnYwo$9oi}9{T
zWm<?*AW=_dV#k|PxO;6H_{I0)Lzp75dj+`fB^KXnh=ahdN%&RGCUUnSibz(t;M|B9
zvh#5qTR3b^{I#y&rypmryw@^hEU$@FcJUGA#pNV(?{%E7Foz7P6=Q@o$<FRj^4snd
zEHsrS>1TG31#66uJhr31Qz2x@(IDcn;wU6@T-NOu<`b&lgoqYT;a`pvQO(z;tHxfT
zncR+;TF7itvtcdKpC(Lq`@g{^rWDz4e4OO#htjDt>ahE89mh}1!`m#oapIcUyk2u3
zqJPVfWa`F{(vH2P?b=R~CmjS2*JqQ32j0QdGD$RQhDgrVOxBJPfrA6WJllI+gwL}Z
zJS_KN+vF&iAK*xCbo~W~KLhxiV;He5Od+cZzrkgV4dm|rG=l8X$hD~u;!kN1)5FoE
zQn?(867<P3o+d#XW64diqj2-AFS66~0PRLMV*2$p<^y$ek8epNbvT1K8hpYITsC;?
z%ywL2`vFGdX4mF*2!lg+7xH#r%I@KBLie7^BIfotv_&!q>+R$iU?mm!)J_vD@1h81
zJNgisE{hA^XA!xDVT?z>U-V)nAI)D@h=X$y>3_ZZu=<;s$b2D<#(h#zy%FbBYq&sN
z*kM4i55rh#trJK*vIWo5=_27alPIBUDOy@1Yvw)1%?VasN4Mu+Ks~4LV~t)O`k~xS
zeJ;v@M;n*&l+N78YSjbSZ01(v;WtVyfFK+%RD_@DznN2ss;p_M1Z;cyiV?0h#XPeZ
zWIt5Ns=Qr>1PxA7GA&-zH?3T1k$Nc;K<^^6x0$kb(sryazddtQC>%L$FrlLtNI`>-
z1);r^sKCF%SnNzHD?ZN=ok5SlVZIjO8&QDfRW2yVM+r(>&Jn+hQY@d08c1(?ghR%R
zu%gnH+RrJw(NiyT;-s(=A?HM7e4n7K096)F=hF)kIgi_f5`59_WgocP;hep}JZ15x
zSoNg>@Qry<v-K^IhT<q$^Ls7xE<1%Xw%v`DpQW*z_-DX^t`BJ2e0#i9h4bd+#j=0A
zk{JVb5J{cAN9iugVFLUo%_SZyk$WF{k;?u>usD1VJ7xa{?R*u1bN5L>{<$;kf8IPs
zeuEocuze$n%Kb#y;>Y;yqIPtkaue>#5`*H1<G6Xi5e4P(p_j9j0CAcu3o0C4D$Bx_
zgFg5ZZv`6aPsMK!ZDNhNT(M90DOGdM1se<~Ld@pH^xk(EJ-YkX{JGpXa=5IAomX*Z
z*1(l0t9miaU$U)sQIras>G6|#{ahZe`}Gj_Ye_(&qd5N7Cql+Qj*}CYJV8J4J~q$_
zM%ERJiJ6cw@4f#zy1XfZ_F(eK-Bk<d_7kn>-wi9E7S*AI>u#vsKi~XJog|ca&qk(C
z<>*D7HK@|HA33$n2kXRxsLtX#CyiP}S9?d83k+4@y3h8=+4wo^T=R#$JkN^#5~qq@
zUdmv1Ik%vQ+m7t>ydu0jdMl~*io}Db60o{_36a(iB@>qO$#yR*^1!N|ZMAYBj@_0_
zzw0hMw|oH!+;)*3ealB44mRS7VhU$$S0Eb42F;_q<A_5?srh`)4-xd}7R^%^MeCYm
zNsNvb@ek=hUeo00f%Im4@l6nJvQ}W0G|nO~{$&%Jj=Q*B<qr-NHmCE(+F`TB5j?!0
zi_%*^4Nix?rNl4Ukf%vIiHN2fxz5ZbZEJV3`O+O^NP7;oVB`{hEVBdm{hbG-Tbk^T
zdW}0T&E>qz!f-mv2><HIqns1{@j{RDcp7=YN;l3RQxd++!<5gpUjt6yRUJZzza^To
zxl&3(zNb>hR53E^Tus(B6|+CKv=b-aRixBx7fWSdfJ&)7JcE$$Nc!bmj`y+y1O|b8
zD|m|q<}V-@od1&K-i`Fidk!RUbC8*CWi%yfB2H9#w5cyC@1XI86gju+7$dsefE8-G
zghmeQ(y5N8NLG#~i6X1%mTgxl6feo<^(vAJ@4Z>&>N<Sva|({v&LPV;Tt;=Klax5~
z8o4GVuu6)W`1+G-X13XM7`YyXrN@85!%A%!@V$c8KNN>~^SklxKPT|~*2ARd`)yRD
zHw!8C&?w!v40$bFM-(jOV1fNF5aTnZH60J(3ySx!!}HZh<(LV!@{pl;=0(hxioIN3
zAj9rj6$+gprtm;E5!EjqMxDYj)YKITMkU)AE&Dc#)$g_ARKG0p$n_v7TBl$MFFUeW
z<rdTC*@AzSV?5e456o|b;Um!;U$p1~>iaW}GGDNmTUtc%J9=25`b=EB<sIshxsTq=
zy^BmgH=%cWhpCsc-e6yrLKanx)%;CAhTfTplH~2%cp{+3sx?c~%?ob8{cBmIan&-q
zZ@xZhrj~-)c7&SeBr!|X-=Tr(l~noF15EbxQs(RT=-R{_7Afs%G0#}*hfnjbC!Zrr
zaKz4gc)P(F)?&niOr6&tcY4yGSgZ?c3H88r(GYxIya;6bKBAEz4Z1lx9k+b3W8|B=
zkb0RNtakzQOl$<%v^TN57n0!lXc}yai-1e(qfmI6H;hVnqV_v>@NK(16pnpI!50D{
z!8H^7HP=I+)*WP~GM(#{a6XCAZ~`9F0KO-)TNSo|`L0iBFe(r(&+-NRJ`ae!Do&fI
zOCqOWUuah@MIY}^BaPdSLz8kSs1D~a2Znm_n@8p7PCzhvED;Eov{S%#PBf~hpH3&q
zd_lH*B4F-F1gw>qM)KPmP{kC3mb@|qGq))4X!YXw(|gg<v8U)sX$Uk7D$pwq$U(XC
zVF=VIMV<U!=u7@V*t}Jo*102x)-3%85_go4)-EfQS279#WrgrA;w%w7Y0VZ){zA(Q
zmym%4g+vP#fCtA*3&JyjpI-@bH!p+~UJp6$=R!~k0xfQqv8I1640X<czaLIR^+r1~
z_U}7N668>zItuL9ow4kA+bZ_I+x?_dVlNu~DMm+}m_`n+GG&JplR#E23StWzQFmGv
z$0*-T*Rk@<;>>xqRfFz~>(48!w!S4v1zi-O$IT%0+8CE4U=O~X!l(CDG1f-Y$@OLf
zGWREUO)y0)t0q7c<$Lfe*Q3O<dN(@tRFQ3=^ibP_6|f~$ioSE}9e(%U3QSyA;^{IJ
zv3%VHvlAm2yM<@)GW!I&_iP_YKCpr`-t3?pXYT}en?G1(*pbBERVTvbcd_mEb=X(y
z6Omf^fw(R==Jp3ZkaMo*u+O(xDza-WvCUga9e30tPT99XW6vTyrCox4<h4@I23}Dl
zxQ^?AOOcMRR(MhK5A({dN<8I5F?0V(v*GV^V8-7rwB+a<vSZ;^a>=@YHA-`(E9d1v
zidY0oyr+;#(z)86*;821Opuj#<@Sgdc4Cp~W4Pq;2tK*il>J_vO~w!DlY`w4u;v>9
zQhz~@gs3<Z=BhHAlP-cICzVNj$zOIplII+$o7t%FchGX;J9ZDYVAl+X;mIi%b}+_~
zn!d$_<*U!A-4`fM4Cx|d)QxNF3qMl9Ys{fnYLfNddk#G_c0>*-KPmG&$@s?y3$ml>
z0UG2FMs*&dVDv%`{oYlHiVU2Xj>nlO|CK4;;Fm)Be_lh>zaYjpyA(}pHniA%UJHSO
z5Av*9jTO9NnA^2~DSLtSSYs#%506+^izLh>$=ifUS!X4hx+DZT^XI|N^P!jyv4(q5
zADJD~k0M7wZJ03XVUv!E@B~+tQ1A9z!Q$@}?*DrUg+=G08|CwGudp?oUtq&hXJx?V
z$s}47pTIDDi{aje+pLY(J_vi$i54_uBdyGL=1k#yylvJeD$w;LX?k%9zMq^zJ*`)%
zdWj3<T*VA}bYXa{dan-cd~z9NkI#j7t0=ZyOP(ZG9t9oOUUVaG4wu(yLgXiFTJXCy
zJjFaJo_`q>w@0IcMIVr*?IG&ooy&-$?=q=6pQzts&Cr=qgfDK5VtdXz^TJjFJejS8
zeQlhv{#_$_L~#wMJl|N`VJv~OE=Djed5Y-NiL1Ey{xcN(NS#O~oMwHsT=B$GO|oms
z8m&5>&SkIu<oI_#YN6o+>^?04buIahUnY;^_7iE)_|qPx`}L#3OO;gHxE}K@?+-b_
zpNJlmar@pPSunyA!oNP2Vx`AhYGYU2LS}DhviVdG?o0ZN#kr^CjRP`p%IXaYm6=Vw
zkK6-yVw&;gg^g&Rq#<grdxjOP6-m(72{N^_oOGx}BR_`@QV_*5#@7Vs&M#|Fv3L=F
z7$QO*8h(ShyS0(Z*Bn&*L58gR6NBpIwz5-Y4D<HpWfXFJ4Q@L40;>q!Wghz!v(d`?
z$-cw<tnhIa;y%H-p9k#7h+ZAqTlt$ka;6V&d+f%<tlDXw*V{;*JTAoIi~pDht(PNi
z7h`eEq&OK^LL)7MUr?L59pBwsflrikel*bzs?2AM>YghO;&FK>Kc)xmtM6mme)mA;
z_fB%THXjBaWa7rRwd`;J$FeEZ<jvbM59R8lvM1?tc<bQ_Br$Opti)1**{2F0pH#wM
zEyQN^Yr(nVqu@|g4z8ldpzYH!)@Hj4=VrDCp|-`K7*_=AIcCbe#WzXG7E$mz764n`
zc(C=7mLNK4K)*LT2*x|k&>p`|qxQQQFh_JI=lzTV(d}F>|GFIQlN%1J^4GxDpPZvN
z_XX6p-UYjCu7BQ7<NL`kQS=}U;TFr_N^K@2Ps;%Xp@-<ZVHXGvq{5BdAY$^o46>KL
z!*vR$q5Ve$bY2XE<YQvAeW@rsyYn5nm2xRz(N1VDKy2&EGWb`>fJ6OfbW*JuPAJvD
zDeb$=r6GS>pPT*sRJ9<R9XTf9NGGUI*+7))336)ROH_7R9*v~2tp4jbQX~G5^3dU)
z9~DX9;XTOh`)r4-^jcPF);jwAoNKUNZ3fr(je(*21g0(K^kb!7w6nW4oOoLgqrA)X
z;vqZQ@F*cmf{WR`U7@fw7SPeTYE*}^46P_%4F_lB(-(}wYTfY@;QhEm2c@Kw83+C&
z8{P(y$$wkno0Se}OnFA8-C4%Xg07K{ct3V%pC*aknn|2?=915IP7{aURb=bnGg9CF
zh~)h_Nv>QQgzr}p$&a2%BJ;kJ{O8|F&-t7Ow_0k*q46^$sXd8=-SHzihfdJbIxdir
zM-Pa`+Gs|9^f82pJ>d4iLdo0OW03x<49z>8NKRk#<92XX6O*oIWQR>JVG6Fp3XOZD
zO2P(oWXp+n!)-{MSxLGjHj|R`7s=Tv6;`rs8p%<XL+z@YNqMFW$##l@BB&#)&!rKQ
zt2>D6a5xzVze4h&%V9wq%PyM$<YV0p`qidH&PAV%8nq`2_(IqvooCsB3zG0HW)=I=
zubwE|N3p_Zy3xaF9Q)?!QsT8cpX8|;vnw1JwEN5=vQ4FpFwgDCb)El+r>!-~^Gm0v
zPuwJHxkuPv%PzP~tFp7?2FQbAD_&{qN%B$ZIBV!LguGHT={pO(Ny-g1R=YU?LFz6-
zk8VKSDn{5w)s(F5osBntS;7{qxrycWI@RVx{KTbOQn9p94VnB;jhs+>g~jySD5Vd2
zr1YTynciPTpXjK=<wCEhmw#U}QJPE9w8`^q&B-ukPooI=?b-}oZDsgyqy%m?(m+=V
z5S|lmORL`)$I+?rOu}Y9*vqevm!Elrmo;-Pa*<RBnth+KkJKWE=49diymY*!wG~U8
zo57AXWYbATxA4u3HYnbAm6DxQq<3-uWmXdhv5>JOTDGT*eLeSvdGp95o;yp9c+Bu*
zCT-)W^lh#9ow6i3`GKF<q~zdP>$9<=b}Bd?QD@_AwP4?cOc;C|iyiOHL4%hiiBww_
z9T6SGPTWcYn@efn6`v0LFSJR+@jdY5!Fk+SA4t4LIEIQxA#2|72~F(UNen(u;4(u|
z(!AS|yw$KGKHqk+#SPPNvxy;-rE!Gq^`B&6dMh3)KTqB*e#zX`4`yHN5+!=xoTtj@
z7WG-|0+C6H#HSKh;`@9qsL62-sSPa0=Vjw?--=LFk`scv&L$J>uv(nk@&VWUu7kwa
zVWi@A4vCj7AU+%us(NuMd9<?@JEf$NGkWt$iP1e+_<w8H9T(N{#A)jB=nsxwg-B63
zKv1K%vx0yH6h%ZuBOZ7HN>T1?*sw%I5D-lx*svF{fr#AhJFK7rMvdL5p#CH=Vl1GN
zU-&&BCKv(p$M2tew{K==cILY~pI3I~#c0yIN%>^u@t;Uo(M59Oxfk6yH=2svZj+I*
zOXznm#5AGi1O$bAPla`TX^hu0S}-q(f7hf<-y|8)kcV6N+!O)z5zVHyO9H9)=|b|P
z<ZF69NEdUtm8ihqkmeE@cpNJsb%)o}y#D+>mjz!RygZfO6g@_h0*2Ay)lKMexG&9b
z^rXeVI?@H&Db&CJM&f@UgQ}-{Qqibf8uCsBPx>kcp8m0xMwG3G>AknpT%$vLO@F?S
zg7;XsIZcI|Tt6Hf_`Hur%_~u8Rv|>V4+A)nL#s{=r1k?m>HbwZU@L4wSykVY1Fz4K
zoPIV`5&4V^OK_v=YbFD7GN&^qvecf>nNa>*OYheng>))|ZZ2gcp+1t1uT_Or->Tv-
zssicTeU9V+pNwr$B&T_MD(G#K0<pyyH3*ncA?|m3#o)_@e7!z@H?%vQzn03{i5ec5
z<0Hvqpf<1;$!@zql~)K_<YJG!eKWbT!X>bEV5WGT7A5)A9x7G5lvA#(hJ(EhK;;h?
zsK&b*^h)jmCdWgePn<JK9DI=cIz|tAG(JKeNh3+D=?E$;IxUW9l9t^4&75A=K8>0M
z$xxm>2`Vp4gM>JDv`wAg)Az$vV)GP(tjv-#*}WMtYR&MZN)PN<XPC7bK|^;#JYt|J
z=)2XTwV@BtlF{>^|DXbh+7S-F$Qfk4yb?Leb>ZImP?(XjnR_POk0$O>rCH71bZPu0
z6wol1)12oIp85L9RJBo9t)!ZiP4woroEV7BJdC&}&-srR@cUr)I!uKzkqk;sGs78w
z*un+*A}Btv3~Vlqg~#8dit9(|DpID}qcszPK$MzPs=V<nSLjuYG6X63>Zlc9Ts9uw
z=v2{HX#q%M$3l)N&K9q){fe6=)TVl)zZ|U8Dx`i@s`Sf2c_=zGi}YO>LcY$ZP&gQ@
zM&@N>kXq{Y(ntOITpY`-sD6PNowzg;>7=u?*j|U0N|Rx?p*nqO@Emok)<u%5Q;=fh
z&$RnRBP7`}j{Z@<0`|KY(w9<OnvyuM)VIVAB`eR;<|m$F=U)xU?Ce2`O?<5{gS`cc
z(P@on&fYcXTfXK?^W)uQ_!&Y@@P$nmmzpE18MTT*FV{hU&swrQ^9C`t6wvB$C%j-q
z7;@Eqtw?y*NX#GIhH6=V)F)n>9ywf&UUB=$*!pA$Qw<=q_FO@SfAJz}&wGj8mPC@7
zH}}z}mVvOrYAu@HGn{6*EJUO48F6WIOrfv*3aOlX8?>xf5;buH(e{`}@4;M{Gk_8M
z7uA#dI*I7g$xOv4elJ^8<~)cFa~EIRTSuDa`;+Encf<|dYtZ6k7Wcdpsqmh8mpD0P
z^0_)1oIs?@EgOAZ@nSvy48n2(=hXW#8vUvt9o(=R^|<*tm-R3fp-YaW+s_Zl@~zjz
z4?SihmklgfG})3O{+lCfH}G<7`1(qA2y(Z3(+%P*G<E7z<Q8cQMz@B8-WDsxwuQd%
z`}Jd`tEz<H&it2vSrz(h=zXeob}X5uGJ-CO8;GX79gEiP-9fJVbI5g?GfZB&35x#n
zHPWoof-6HWL+m!2(qJzma;4l4ls{6UlCer0H~4`fV*d-p0#c1MX7+@i^z}iK=nUn?
z7H~Rofnv8^BQnj_$0=2BN@<8S?DDrJY~EB@Sgwf%2@a7tjhgWGd848xWFL*6zY^I!
z2uA*q+2YbX{yk5uKC0M}PWP54bIoNk@rJD;I{yn_7?k>qsze)eHZ-3Gy+Pn}>L5|g
zI8B{r96^(vzJq-^KcK!#<H^vlW2iP#8x4Ku0qF;iLCpzHESl^DL+tON-a`r$FVFNN
z>|y?zPxrdma6pn`X?{Mp1-$6y*PnALTo2CgoEgNNoI~&M@1y3<=uUe__>-QmW4M}e
zqey;k5ejn1Ahz9DOoLe*G1L-gx+B`N(g3?I6XN+L##qh>@w!Z7Sdwjz1-A@w+I@3e
zu}g?6hFIe5=IPYxyb1Q3Xn{r1hTQsXrdar@iBvbT_)f!GJZ4rJXL;8cTL&57b$O=v
z;uK@NXU2DQ+f*U$f6x@;7n<DBC{xZ!%@W6qGsElTIrNX+H44WnD=e`x#3!HW<KOBG
z@r5lZg9`>z8sKA!*DYUD`f!B_zUq9Eoc1xp2SUxU+6)tXX0|I8-Lk-59=eK~)qGu_
zTr<3?cL;gqV2yt|WQfIfhB#<}HP$_9jw`P;kwN3ssonX-)Nr;I#lB|P*l#mkU9c5y
zo#+qDNg=Gfv6E(n9TA_Ywd6kkK8I``mr9orYaDo&#VdXj;>$~lQQeMQ`i{@0(#<u(
zT)HVfe%}B)R<L-(GFNKvZ-IZQGr$3hcc6QhC(-*-PpBW4qNr60v5kiv_v-fvB&zTf
zsqi+&ds8WmFuka#(6PdO>NJ$A&%c6a12;j|vMNX&vRdijdlicRn+ywSIP9Fj*ZSL?
zz!^U%2BRChz}>zaB7%#+_|`#?=AGm&85F@Gqi}E?8ldDK`NQ@I6QwR6|F14lEh~6C
zf=h@hfee#9pp<Nc@SsJ|Fzhww(IW+X&+g;vQrasu>e9G^yHzmcq733w-_g<^H6eB1
zJ$NL|gt2i4p<Ht#oc*Ss(kOF<a+g>E-UmzQx%wpV2)oE#m7Irrm5*R&TriMcI?5FO
zyIeWm$4iUzr-9zlXt3NIsI+yILdm>T5U<XJ=|MH%pj-;idOfBq;+i@Aso7lGn<Dz)
z=tj7C>K*5=W~N*|z)cAfBV}%uI#=_%lhQ(4U72_HIcK}6nQp263Vf0a!H+zE89r{x
z$SQT^Ykpr<W9&B|TP%TrpIv~FJFdad-~i?0lx%n#d>b^EGRmDkjB-<skuu`CP<f*;
z8G1Vp9TJgvjvR8S=HP4!G+)2QjkG^W4mZWarRn-upUd=CQQ69;7}^h4QL*){(tCSz
zxF^G@GNuAm)+ko8P?{J%KSmxMK3^IW6)l(bbz`)6N{?0w-!K)Zb;#z<v#Gsjb7yn}
z>a1qMY<WyvOp-K_F%xL8!bEA*+*nCsOp-(%lMo*sn;?@(Wjtp$R&ySIS`rb<H%$cH
zSwnfEBqqM49sV~#N6>@SoFz?^$igMFV;Eh5fMsMcQSlP_+(f=#G{Xoq+3q|;yfl^(
z2pCo)B1R(PHEVUQS^GoHI-P0OZLhgSlU_Sb`hRIM=v))~p(ex5G#Rzmq{65$mP|L^
zE>$=Fz_6<B{6UXZ_23WstZKhcQ1ey!p$ggVt=P#J8-X!v(S~rxP*?~|Sd(}jnvVQ$
zATVWh+Iq)H<%uzoExzp(RV{%T+v7dozlB(ZnaGARx}n~|BfNbA{lY`N1IPQZ+S8@+
zGO4HyS@ZsFL-=rt{z(+MdWVDrkMJ2E;x|t8p%f;B4gN?n(<ZrPo|t=fD7S^wl~ff*
z_aFHDv)zh;c{>a&K81m0R~T4*j6tXKTT5Ww9)o{3*~8e0ANfhQ)lcE>U19K^*p;_J
z*^rN9x1r0+?i7VKQde@@P~g`^%PQg5g?lS1?c1#=*tA2z_ERX>b%jFDk5TA!?Q03_
z+oSOB0zu0xQf;$Gt;L>}NYG}bL)+QU{yy7Ad;janXbO6<-Q)>!Ni6@@k?j_lFgIS#
zhgqlgAxKBy+&<Fy@~kd@_v-z=m#&}>%Y@H}51%z(k{Hzztowc_#kIYZAa>M8__mG2
z-1D!<E&4Mmh}=Z2gX{{}4h6Q_)3Lx-Qda_7{MxI{gN`j8)cdDjI@H)Y&W<#?|G!Oe
U7k$!%wrSwfW<>AzMzn|Jf3W)|^Z)<=

diff --git a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index
deleted file mode 100644
index c20d8afabf383430e807fde58270f4ff7c92fdc5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 398
zcmbQn@*$OhfsKPviiv}fK}I0n*~8V@JHX%5FT~Nw$2H#H$;;I_B;Gy9F~Ea^U5G(p
z%3@ZHEBvOMEJwl_O!%`?ixP|D6ALo+b5l!-GLwt-4fK=qOY=(f^&CS&f;^o<LtKOP
z!yJP=9U1*f!W?}<T{#$mrgQ*JVKHrBZE+M);4jH9NzBpLD>GtXXkj%{$(r+6K}d<;
zP(QgZje%hTs|n91pMdA{fb1StqZ*47qNWTcA~}hbsYUVSnI#$V<*AwJ870NK2Kq^v
ziN$4W7!p{7m^e}t9?W6Yn80lMt3U99sG=CqrFp4UPZ%ONg_t?6F@jWR09BZFzuEp#
zSW(z;RuKaSqe27g9*HtPHz83)iM~Pxph%7cP$i?nhxx2)b@qRrx`qJ=7(uw8ao<lE
qhvC4dg&P<cnR6L-Fu}!Un~AmVW#Tx+#19g}55j*pbgPuQ-v$5_)n_*V

diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp
deleted file mode 100644
index 4c775c4c0b93f..0000000000000
--- a/llvm/unittests/Analysis/TFUtilsTest.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-//===- TFUtilsTest.cpp - test for TFUtils ---------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/Utils/TFUtils.h"
-#include "llvm/AsmParser/Parser.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Testing/Support/SupportHelpers.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-extern const char *TestMainArgv0;
-
-static std::string getModelPath() {
-  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
-  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
-  return std::string(InputsDir);
-}
-
-// Test observable behavior when no model is provided.
-TEST(TFUtilsTest, NoModel) {
-  TFModelEvaluator Evaluator("", {}, {});
-  EXPECT_FALSE(Evaluator.isValid());
-}
-
-// Test we can correctly load a savedmodel and evaluate it.
-TEST(TFUtilsTest, LoadAndExecuteTest) {
-  // We use the ir2native model for test. We know it has one feature of
-  // dimension (1, 214)
-  std::vector<std::string> InputNames{"serving_default_input_1"};
-  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
-  const static int64_t KnownSize = 214;
-
-  TFModelEvaluator Evaluator(getModelPath(), InputNames, OutputName);
-  static const std::vector<int64_t> Dim{1, KnownSize};
-
-  EXPECT_TRUE(Evaluator.isValid());
-  Evaluator.initInput(0, TF_INT32, Dim);
-
-  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
-  // Fill it up with 1's, we know the output.
-  for (auto I = 0; I < KnownSize; ++I) {
-    V[I] = 1;
-  }
-  {
-    auto ER = Evaluator.evaluate();
-    EXPECT_TRUE(ER.hasValue());
-    float Ret = *ER->getTensorValue<float>(0);
-    EXPECT_EQ(static_cast<size_t>(Ret), 80);
-  }
-  // The input vector should be unchanged
-  for (auto I = 0; I < KnownSize; ++I) {
-    EXPECT_EQ(V[I], 1);
-  }
-  // Zero-out the unused position '0' of the instruction histogram, which is
-  // after the first 9 calculated values. Should the the same result.
-  V[9] = 0;
-  {
-    auto ER = Evaluator.evaluate();
-    EXPECT_TRUE(ER.hasValue());
-    float Ret = *ER->getTensorValue<float>(0);
-    EXPECT_EQ(static_cast<size_t>(Ret), 80);
-  }
-}
-
-// Test incorrect input setup
-TEST(TFUtilsTest, EvalError) {
-  // We use the ir2native model for test. We know it has one feature of
-  // dimension (1, 214)
-  std::vector<std::string> InputNames{"serving_default_input_1"};
-  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
-  const static int64_t KnownSize = 213;
-
-  TFModelEvaluator Evaluator(getModelPath(), InputNames, OutputName);
-  static const std::vector<int64_t> Dim{1, KnownSize};
-
-  EXPECT_TRUE(Evaluator.isValid());
-  Evaluator.initInput(0, TF_INT32, Dim);
-
-  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
-  // Fill it up with 1's, we know the output.
-  for (auto I = 0; I < KnownSize; ++I) {
-    V[I] = 1;
-  }
-  auto ER = Evaluator.evaluate();
-  EXPECT_FALSE(ER.hasValue());
-  EXPECT_FALSE(Evaluator.isValid());
-}

From 4ba45a778a13eab1495a75a14682f874016f3d21 Mon Sep 17 00:00:00 2001
From: Lei Zhang <antiagainst@google.com>
Date: Sun, 12 Jul 2020 13:03:23 -0400
Subject: [PATCH 148/771] [mlir][StandardToSPIRV] Fix conversion for signed
 remainder

Per the Vulkan's SPIR-V environment spec, "for the OpSRem and OpSMod
instructions, if either operand is negative the result is undefined."
So we cannot directly use spv.SRem/spv.SMod if either operand can be
negative. Emulate it via spv.UMod.

Because the emulation uses spv.SNegate, this commit also defines
spv.SNegate.

Differential Revision: https://reviews.llvm.org/D83679
---
 .../mlir/Dialect/SPIRV/SPIRVArithmeticOps.td  | 25 ++++++
 mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td  | 72 ++++++++--------
 .../mlir/Dialect/SPIRV/SPIRVLogicalOps.td     |  6 ++
 .../ConvertStandardToSPIRV.cpp                | 85 +++++++++++++++++--
 mlir/lib/Dialect/SPIRV/SPIRVOps.cpp           | 12 +++
 .../StandardToSPIRV/std-ops-to-spirv.mlir     | 42 ++++++---
 .../SPIRV/Serialization/arithmetic-ops.mlir   |  5 ++
 mlir/test/Dialect/SPIRV/arithmetic-ops.mlir   | 11 +++
 8 files changed, 204 insertions(+), 54 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVArithmeticOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVArithmeticOps.td
index 350e3659a28d7..5a12e6f36ec48 100644
--- a/mlir/include/mlir/Dialect/SPIRV/SPIRVArithmeticOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVArithmeticOps.td
@@ -452,6 +452,31 @@ def SPV_SModOp : SPV_ArithmeticBinaryOp<"SMod", SPV_Integer, []> {
 
 // -----
 
+def SPV_SNegateOp : SPV_ArithmeticUnaryOp<"SNegate", SPV_Integer, []> {
+  let summary = "Signed-integer subtract of Operand from zero.";
+
+  let description = [{
+    Result Type must be a scalar or vector of integer type.
+
+    Operand’s type  must be a scalar or vector of integer type.  It must
+    have the same number of components as Result Type.  The component width
+    must equal the component width in Result Type.
+
+     Results are computed per component.
+
+    <!-- End of AutoGen section -->
+
+    #### Example:
+
+    ```mlir
+    %1 = spv.SNegate %0 : i32
+    %3 = spv.SNegate %2 : vector<4xi32>
+    ```
+  }];
+}
+
+// -----
+
 def SPV_SRemOp : SPV_ArithmeticBinaryOp<"SRem", SPV_Integer, []> {
   let summary = [{
     Signed remainder operation for the remainder whose sign matches the sign
diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
index f114c878569d6..cbff82efdfd3f 100644
--- a/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
+++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVBase.td
@@ -3150,6 +3150,7 @@ def SPV_OC_OpUConvert                  : I32EnumAttrCase<"OpUConvert", 113>;
 def SPV_OC_OpSConvert                  : I32EnumAttrCase<"OpSConvert", 114>;
 def SPV_OC_OpFConvert                  : I32EnumAttrCase<"OpFConvert", 115>;
 def SPV_OC_OpBitcast                   : I32EnumAttrCase<"OpBitcast", 124>;
+def SPV_OC_OpSNegate                   : I32EnumAttrCase<"OpSNegate", 126>;
 def SPV_OC_OpFNegate                   : I32EnumAttrCase<"OpFNegate", 127>;
 def SPV_OC_OpIAdd                      : I32EnumAttrCase<"OpIAdd", 128>;
 def SPV_OC_OpFAdd                      : I32EnumAttrCase<"OpFAdd", 129>;
@@ -3271,41 +3272,42 @@ def SPV_OpcodeAttr :
       SPV_OC_OpCompositeInsert, SPV_OC_OpTranspose, SPV_OC_OpConvertFToU,
       SPV_OC_OpConvertFToS, SPV_OC_OpConvertSToF, SPV_OC_OpConvertUToF,
       SPV_OC_OpUConvert, SPV_OC_OpSConvert, SPV_OC_OpFConvert, SPV_OC_OpBitcast,
-      SPV_OC_OpFNegate, SPV_OC_OpIAdd, SPV_OC_OpFAdd, SPV_OC_OpISub, SPV_OC_OpFSub,
-      SPV_OC_OpIMul, SPV_OC_OpFMul, SPV_OC_OpUDiv, SPV_OC_OpSDiv, SPV_OC_OpFDiv,
-      SPV_OC_OpUMod, SPV_OC_OpSRem, SPV_OC_OpSMod, SPV_OC_OpFRem, SPV_OC_OpFMod,
-      SPV_OC_OpMatrixTimesScalar, SPV_OC_OpMatrixTimesMatrix, SPV_OC_OpLogicalEqual,
-      SPV_OC_OpLogicalNotEqual, SPV_OC_OpLogicalOr, SPV_OC_OpLogicalAnd,
-      SPV_OC_OpLogicalNot, SPV_OC_OpSelect, SPV_OC_OpIEqual, SPV_OC_OpINotEqual,
-      SPV_OC_OpUGreaterThan, SPV_OC_OpSGreaterThan, SPV_OC_OpUGreaterThanEqual,
-      SPV_OC_OpSGreaterThanEqual, SPV_OC_OpULessThan, SPV_OC_OpSLessThan,
-      SPV_OC_OpULessThanEqual, SPV_OC_OpSLessThanEqual, SPV_OC_OpFOrdEqual,
-      SPV_OC_OpFUnordEqual, SPV_OC_OpFOrdNotEqual, SPV_OC_OpFUnordNotEqual,
-      SPV_OC_OpFOrdLessThan, SPV_OC_OpFUnordLessThan, SPV_OC_OpFOrdGreaterThan,
-      SPV_OC_OpFUnordGreaterThan, SPV_OC_OpFOrdLessThanEqual,
-      SPV_OC_OpFUnordLessThanEqual, SPV_OC_OpFOrdGreaterThanEqual,
-      SPV_OC_OpFUnordGreaterThanEqual, SPV_OC_OpShiftRightLogical,
-      SPV_OC_OpShiftRightArithmetic, SPV_OC_OpShiftLeftLogical, SPV_OC_OpBitwiseOr,
-      SPV_OC_OpBitwiseXor, SPV_OC_OpBitwiseAnd, SPV_OC_OpNot,
-      SPV_OC_OpBitFieldInsert, SPV_OC_OpBitFieldSExtract, SPV_OC_OpBitFieldUExtract,
-      SPV_OC_OpBitReverse, SPV_OC_OpBitCount, SPV_OC_OpControlBarrier,
-      SPV_OC_OpMemoryBarrier, SPV_OC_OpAtomicCompareExchangeWeak,
-      SPV_OC_OpAtomicIIncrement, SPV_OC_OpAtomicIDecrement, SPV_OC_OpAtomicIAdd,
-      SPV_OC_OpAtomicISub, SPV_OC_OpAtomicSMin, SPV_OC_OpAtomicUMin,
-      SPV_OC_OpAtomicSMax, SPV_OC_OpAtomicUMax, SPV_OC_OpAtomicAnd,
-      SPV_OC_OpAtomicOr, SPV_OC_OpAtomicXor, SPV_OC_OpPhi, SPV_OC_OpLoopMerge,
-      SPV_OC_OpSelectionMerge, SPV_OC_OpLabel, SPV_OC_OpBranch,
-      SPV_OC_OpBranchConditional, SPV_OC_OpReturn, SPV_OC_OpReturnValue,
-      SPV_OC_OpUnreachable, SPV_OC_OpNoLine, SPV_OC_OpModuleProcessed,
-      SPV_OC_OpGroupNonUniformElect, SPV_OC_OpGroupNonUniformBallot,
-      SPV_OC_OpGroupNonUniformIAdd, SPV_OC_OpGroupNonUniformFAdd,
-      SPV_OC_OpGroupNonUniformIMul, SPV_OC_OpGroupNonUniformFMul,
-      SPV_OC_OpGroupNonUniformSMin, SPV_OC_OpGroupNonUniformUMin,
-      SPV_OC_OpGroupNonUniformFMin, SPV_OC_OpGroupNonUniformSMax,
-      SPV_OC_OpGroupNonUniformUMax, SPV_OC_OpGroupNonUniformFMax,
-      SPV_OC_OpSubgroupBallotKHR, SPV_OC_OpTypeCooperativeMatrixNV,
-      SPV_OC_OpCooperativeMatrixLoadNV, SPV_OC_OpCooperativeMatrixStoreNV,
-      SPV_OC_OpCooperativeMatrixMulAddNV, SPV_OC_OpCooperativeMatrixLengthNV
+      SPV_OC_OpSNegate, SPV_OC_OpFNegate, SPV_OC_OpIAdd, SPV_OC_OpFAdd,
+      SPV_OC_OpISub, SPV_OC_OpFSub, SPV_OC_OpIMul, SPV_OC_OpFMul, SPV_OC_OpUDiv,
+      SPV_OC_OpSDiv, SPV_OC_OpFDiv, SPV_OC_OpUMod, SPV_OC_OpSRem, SPV_OC_OpSMod,
+      SPV_OC_OpFRem, SPV_OC_OpFMod, SPV_OC_OpMatrixTimesScalar,
+      SPV_OC_OpMatrixTimesMatrix, SPV_OC_OpLogicalEqual, SPV_OC_OpLogicalNotEqual,
+      SPV_OC_OpLogicalOr, SPV_OC_OpLogicalAnd, SPV_OC_OpLogicalNot, SPV_OC_OpSelect,
+      SPV_OC_OpIEqual, SPV_OC_OpINotEqual, SPV_OC_OpUGreaterThan,
+      SPV_OC_OpSGreaterThan, SPV_OC_OpUGreaterThanEqual, SPV_OC_OpSGreaterThanEqual,
+      SPV_OC_OpULessThan, SPV_OC_OpSLessThan, SPV_OC_OpULessThanEqual,
+      SPV_OC_OpSLessThanEqual, SPV_OC_OpFOrdEqual, SPV_OC_OpFUnordEqual,
+      SPV_OC_OpFOrdNotEqual, SPV_OC_OpFUnordNotEqual, SPV_OC_OpFOrdLessThan,
+      SPV_OC_OpFUnordLessThan, SPV_OC_OpFOrdGreaterThan, SPV_OC_OpFUnordGreaterThan,
+      SPV_OC_OpFOrdLessThanEqual, SPV_OC_OpFUnordLessThanEqual,
+      SPV_OC_OpFOrdGreaterThanEqual, SPV_OC_OpFUnordGreaterThanEqual,
+      SPV_OC_OpShiftRightLogical, SPV_OC_OpShiftRightArithmetic,
+      SPV_OC_OpShiftLeftLogical, SPV_OC_OpBitwiseOr, SPV_OC_OpBitwiseXor,
+      SPV_OC_OpBitwiseAnd, SPV_OC_OpNot, SPV_OC_OpBitFieldInsert,
+      SPV_OC_OpBitFieldSExtract, SPV_OC_OpBitFieldUExtract, SPV_OC_OpBitReverse,
+      SPV_OC_OpBitCount, SPV_OC_OpControlBarrier, SPV_OC_OpMemoryBarrier,
+      SPV_OC_OpAtomicCompareExchangeWeak, SPV_OC_OpAtomicIIncrement,
+      SPV_OC_OpAtomicIDecrement, SPV_OC_OpAtomicIAdd, SPV_OC_OpAtomicISub,
+      SPV_OC_OpAtomicSMin, SPV_OC_OpAtomicUMin, SPV_OC_OpAtomicSMax,
+      SPV_OC_OpAtomicUMax, SPV_OC_OpAtomicAnd, SPV_OC_OpAtomicOr, SPV_OC_OpAtomicXor,
+      SPV_OC_OpPhi, SPV_OC_OpLoopMerge, SPV_OC_OpSelectionMerge, SPV_OC_OpLabel,
+      SPV_OC_OpBranch, SPV_OC_OpBranchConditional, SPV_OC_OpReturn,
+      SPV_OC_OpReturnValue, SPV_OC_OpUnreachable, SPV_OC_OpNoLine,
+      SPV_OC_OpModuleProcessed, SPV_OC_OpGroupNonUniformElect,
+      SPV_OC_OpGroupNonUniformBallot, SPV_OC_OpGroupNonUniformIAdd,
+      SPV_OC_OpGroupNonUniformFAdd, SPV_OC_OpGroupNonUniformIMul,
+      SPV_OC_OpGroupNonUniformFMul, SPV_OC_OpGroupNonUniformSMin,
+      SPV_OC_OpGroupNonUniformUMin, SPV_OC_OpGroupNonUniformFMin,
+      SPV_OC_OpGroupNonUniformSMax, SPV_OC_OpGroupNonUniformUMax,
+      SPV_OC_OpGroupNonUniformFMax, SPV_OC_OpSubgroupBallotKHR,
+      SPV_OC_OpTypeCooperativeMatrixNV, SPV_OC_OpCooperativeMatrixLoadNV,
+      SPV_OC_OpCooperativeMatrixStoreNV, SPV_OC_OpCooperativeMatrixMulAddNV,
+      SPV_OC_OpCooperativeMatrixLengthNV
     ]>;
 
 // End opcode section. Generated from SPIR-V spec; DO NOT MODIFY!
diff --git a/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td b/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td
index e1b477126a02f..9789122809ec6 100644
--- a/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td
+++ b/mlir/include/mlir/Dialect/SPIRV/SPIRVLogicalOps.td
@@ -26,6 +26,12 @@ class SPV_LogicalBinaryOp<string mnemonic, Type operandsType,
                                 SameOperandsAndResultShape])> {
   let parser = [{ return ::parseLogicalBinaryOp(parser, result); }];
   let printer = [{ return ::printLogicalOp(getOperation(), p); }];
+
+  let builders = [
+    OpBuilder<
+      "OpBuilder &builder, OperationState &state, Value lhs, Value rhs",
+      "::buildLogicalBinaryOp(builder, state, lhs, rhs);">
+  ];
 }
 
 class SPV_LogicalUnaryOp<string mnemonic, Type operandType,
diff --git a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
index 6bb7a17ae46fa..dad8bfc0173f2 100644
--- a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
+++ b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
@@ -97,6 +97,35 @@ static FloatAttr convertFloatAttr(FloatAttr srcAttr, FloatType dstType,
   return builder.getF32FloatAttr(dstVal.convertToFloat());
 }
 
+/// Returns signed remainder for `lhs` and `rhs` and lets the result follow
+/// the sign of `signOperand`.
+///
+/// Note that this is needed for Vulkan. Per the Vulkan's SPIR-V environment
+/// spec, "for the OpSRem and OpSMod instructions, if either operand is negative
+/// the result is undefined."  So we cannot directly use spv.SRem/spv.SMod
+/// if either operand can be negative. Emulate it via spv.UMod.
+static Value emulateSignedRemainder(Location loc, Value lhs, Value rhs,
+                                    Value signOperand, OpBuilder &builder) {
+  assert(lhs.getType() == rhs.getType());
+  assert(lhs == signOperand || rhs == signOperand);
+
+  Type type = lhs.getType();
+
+  // Calculate the remainder with spv.UMod.
+  Value lhsAbs = builder.create<spirv::GLSLSAbsOp>(loc, type, lhs);
+  Value rhsAbs = builder.create<spirv::GLSLSAbsOp>(loc, type, rhs);
+  Value abs = builder.create<spirv::UModOp>(loc, lhsAbs, rhsAbs);
+
+  // Fix the sign.
+  Value isPositive;
+  if (lhs == signOperand)
+    isPositive = builder.create<spirv::IEqualOp>(loc, lhs, lhsAbs);
+  else
+    isPositive = builder.create<spirv::IEqualOp>(loc, rhs, rhsAbs);
+  Value absNegate = builder.create<spirv::SNegateOp>(loc, type, abs);
+  return builder.create<spirv::SelectOp>(loc, type, isPositive, abs, absNegate);
+}
+
 /// Returns the offset of the value in `targetBits` representation. `srcIdx` is
 /// an index into a 1-D array with each element having `sourceBits`. When
 /// accessing an element in the array treating as having elements of
@@ -308,6 +337,19 @@ class UnaryAndBinaryOpPattern final : public SPIRVOpLowering<StdOp> {
   }
 };
 
+/// Converts std.remi_signed to SPIR-V ops.
+///
+/// This cannot be merged into the template unary/binary pattern due to
+/// Vulkan restrictions over spv.SRem and spv.SMod.
+class SignedRemIOpPattern final : public SPIRVOpLowering<SignedRemIOp> {
+public:
+  using SPIRVOpLowering<SignedRemIOp>::SPIRVOpLowering;
+
+  LogicalResult
+  matchAndRewrite(SignedRemIOp remOp, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+
 /// Converts bitwise standard operations to SPIR-V operations. This is a special
 /// pattern other than the BinaryOpPatternPattern because if the operands are
 /// boolean values, SPIR-V uses different operations (`SPIRVLogicalOp`). For
@@ -506,6 +548,20 @@ class XOrOpPattern final : public SPIRVOpLowering<XOrOp> {
 
 } // namespace
 
+//===----------------------------------------------------------------------===//
+// SignedRemIOpPattern
+//===----------------------------------------------------------------------===//
+
+LogicalResult SignedRemIOpPattern::matchAndRewrite(
+    SignedRemIOp remOp, ArrayRef<Value> operands,
+    ConversionPatternRewriter &rewriter) const {
+  Value result = emulateSignedRemainder(remOp.getLoc(), operands[0],
+                                        operands[1], operands[0], rewriter);
+  rewriter.replaceOp(remOp, result);
+
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // ConstantOp with composite type.
 //===----------------------------------------------------------------------===//
@@ -1005,6 +1061,9 @@ void populateStandardToSPIRVPatterns(MLIRContext *context,
                                      SPIRVTypeConverter &typeConverter,
                                      OwningRewritePatternList &patterns) {
   patterns.insert<
+      // Unary and binary patterns
+      BitwiseOpPattern<AndOp, spirv::LogicalAndOp, spirv::BitwiseAndOp>,
+      BitwiseOpPattern<OrOp, spirv::LogicalOrOp, spirv::BitwiseOrOp>,
       UnaryAndBinaryOpPattern<AbsFOp, spirv::GLSLFAbsOp>,
       UnaryAndBinaryOpPattern<AddFOp, spirv::FAddOp>,
       UnaryAndBinaryOpPattern<AddIOp, spirv::IAddOp>,
@@ -1020,7 +1079,6 @@ void populateStandardToSPIRVPatterns(MLIRContext *context,
       UnaryAndBinaryOpPattern<RsqrtOp, spirv::GLSLInverseSqrtOp>,
       UnaryAndBinaryOpPattern<ShiftLeftOp, spirv::ShiftLeftLogicalOp>,
       UnaryAndBinaryOpPattern<SignedDivIOp, spirv::SDivOp>,
-      UnaryAndBinaryOpPattern<SignedRemIOp, spirv::SRemOp>,
       UnaryAndBinaryOpPattern<SignedShiftRightOp,
                               spirv::ShiftRightArithmeticOp>,
       UnaryAndBinaryOpPattern<SinOp, spirv::GLSLSinOp>,
@@ -1031,19 +1089,28 @@ void populateStandardToSPIRVPatterns(MLIRContext *context,
       UnaryAndBinaryOpPattern<UnsignedDivIOp, spirv::UDivOp>,
       UnaryAndBinaryOpPattern<UnsignedRemIOp, spirv::UModOp>,
       UnaryAndBinaryOpPattern<UnsignedShiftRightOp, spirv::ShiftRightLogicalOp>,
-      AllocOpPattern, DeallocOpPattern,
-      BitwiseOpPattern<AndOp, spirv::LogicalAndOp, spirv::BitwiseAndOp>,
-      BitwiseOpPattern<OrOp, spirv::LogicalOrOp, spirv::BitwiseOrOp>,
-      BoolCmpIOpPattern, ConstantCompositeOpPattern, ConstantScalarOpPattern,
-      CmpFOpPattern, CmpIOpPattern, IntLoadOpPattern, LoadOpPattern,
-      ReturnOpPattern, SelectOpPattern, IntStoreOpPattern, StoreOpPattern,
+      SignedRemIOpPattern, XOrOpPattern,
+
+      // Comparison patterns
+      BoolCmpIOpPattern, CmpFOpPattern, CmpIOpPattern,
+
+      // Constant patterns
+      ConstantCompositeOpPattern, ConstantScalarOpPattern,
+
+      // Memory patterns
+      AllocOpPattern, DeallocOpPattern, IntLoadOpPattern, IntStoreOpPattern,
+      LoadOpPattern, StoreOpPattern,
+
+      ReturnOpPattern, SelectOpPattern,
+
+      // Type cast patterns
       ZeroExtendI1Pattern, TypeCastingOpPattern<IndexCastOp, spirv::SConvertOp>,
       TypeCastingOpPattern<SIToFPOp, spirv::ConvertSToFOp>,
       TypeCastingOpPattern<ZeroExtendIOp, spirv::UConvertOp>,
       TypeCastingOpPattern<TruncateIOp, spirv::SConvertOp>,
       TypeCastingOpPattern<FPToSIOp, spirv::ConvertFToSOp>,
       TypeCastingOpPattern<FPExtOp, spirv::FConvertOp>,
-      TypeCastingOpPattern<FPTruncOp, spirv::FConvertOp>, XOrOpPattern>(
-      context, typeConverter);
+      TypeCastingOpPattern<FPTruncOp, spirv::FConvertOp>>(context,
+                                                          typeConverter);
 }
 } // namespace mlir
diff --git a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp
index 6c8319224974a..9d0570257d422 100644
--- a/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp
+++ b/mlir/lib/Dialect/SPIRV/SPIRVOps.cpp
@@ -844,6 +844,18 @@ static LogicalResult verifyShiftOp(Operation *op) {
   return success();
 }
 
+static void buildLogicalBinaryOp(OpBuilder &builder, OperationState &state,
+                                 Value lhs, Value rhs) {
+  assert(lhs.getType() == rhs.getType());
+
+  Type boolType = builder.getI1Type();
+  if (auto vecType = lhs.getType().dyn_cast<VectorType>())
+    boolType = VectorType::get(vecType.getShape(), boolType);
+  state.addTypes(boolType);
+
+  state.addOperands({lhs, rhs});
+}
+
 //===----------------------------------------------------------------------===//
 // spv.AccessChainOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
index c232395d80db5..a93bf792b34f3 100644
--- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
@@ -22,12 +22,23 @@ func @int32_scalar(%lhs: i32, %rhs: i32) {
   %2 = muli %lhs, %rhs: i32
   // CHECK: spv.SDiv %{{.*}}, %{{.*}}: i32
   %3 = divi_signed %lhs, %rhs: i32
-  // CHECK: spv.SRem %{{.*}}, %{{.*}}: i32
-  %4 = remi_signed %lhs, %rhs: i32
   // CHECK: spv.UDiv %{{.*}}, %{{.*}}: i32
-  %5 = divi_unsigned %lhs, %rhs: i32
+  %4 = divi_unsigned %lhs, %rhs: i32
   // CHECK: spv.UMod %{{.*}}, %{{.*}}: i32
-  %6 = remi_unsigned %lhs, %rhs: i32
+  %5 = remi_unsigned %lhs, %rhs: i32
+  return
+}
+
+// CHECK-LABEL: @scalar_srem
+// CHECK-SAME: (%[[LHS:.+]]: i32, %[[RHS:.+]]: i32)
+func @scalar_srem(%lhs: i32, %rhs: i32) {
+  // CHECK: %[[LABS:.+]] = spv.GLSL.SAbs %[[LHS]] : i32
+  // CHECK: %[[RABS:.+]] = spv.GLSL.SAbs %[[RHS]] : i32
+  // CHECK:  %[[ABS:.+]] = spv.UMod %[[LABS]], %[[RABS]] : i32
+  // CHECK:  %[[POS:.+]] = spv.IEqual %[[LHS]], %[[LABS]] : i32
+  // CHECK:  %[[NEG:.+]] = spv.SNegate %[[ABS]] : i32
+  // CHECK:      %{{.+}} = spv.Select %[[POS]], %[[ABS]], %[[NEG]] : i1, i32
+  %0 = remi_signed %lhs, %rhs: i32
   return
 }
 
@@ -75,13 +86,24 @@ func @float32_binary_scalar(%lhs: f32, %rhs: f32) {
 
 // Check int vector types.
 // CHECK-LABEL: @int_vector234
-func @int_vector234(%arg0: vector<2xi8>, %arg1: vector<3xi16>, %arg2: vector<4xi64>) {
+func @int_vector234(%arg0: vector<2xi8>, %arg1: vector<4xi64>) {
   // CHECK: spv.SDiv %{{.*}}, %{{.*}}: vector<2xi8>
   %0 = divi_signed %arg0, %arg0: vector<2xi8>
-  // CHECK: spv.SRem %{{.*}}, %{{.*}}: vector<3xi16>
-  %1 = remi_signed %arg1, %arg1: vector<3xi16>
   // CHECK: spv.UDiv %{{.*}}, %{{.*}}: vector<4xi64>
-  %2 = divi_unsigned %arg2, %arg2: vector<4xi64>
+  %1 = divi_unsigned %arg1, %arg1: vector<4xi64>
+  return
+}
+
+// CHECK-LABEL: @vector_srem
+// CHECK-SAME: (%[[LHS:.+]]: vector<3xi16>, %[[RHS:.+]]: vector<3xi16>)
+func @vector_srem(%arg0: vector<3xi16>, %arg1: vector<3xi16>) {
+  // CHECK: %[[LABS:.+]] = spv.GLSL.SAbs %[[LHS]] : vector<3xi16>
+  // CHECK: %[[RABS:.+]] = spv.GLSL.SAbs %[[RHS]] : vector<3xi16>
+  // CHECK:  %[[ABS:.+]] = spv.UMod %[[LABS]], %[[RABS]] : vector<3xi16>
+  // CHECK:  %[[POS:.+]] = spv.IEqual %[[LHS]], %[[LABS]] : vector<3xi16>
+  // CHECK:  %[[NEG:.+]] = spv.SNegate %[[ABS]] : vector<3xi16>
+  // CHECK:      %{{.+}} = spv.Select %[[POS]], %[[ABS]], %[[NEG]] : vector<3xi1>, vector<3xi16>
+  %0 = remi_signed %arg0, %arg1: vector<3xi16>
   return
 }
 
@@ -132,8 +154,8 @@ module attributes {
 func @int_vector23(%arg0: vector<2xi8>, %arg1: vector<3xi16>) {
   // CHECK: spv.SDiv %{{.*}}, %{{.*}}: vector<2xi32>
   %0 = divi_signed %arg0, %arg0: vector<2xi8>
-  // CHECK: spv.SRem %{{.*}}, %{{.*}}: vector<3xi32>
-  %1 = remi_signed %arg1, %arg1: vector<3xi16>
+  // CHECK: spv.SDiv %{{.*}}, %{{.*}}: vector<3xi32>
+  %1 = divi_signed %arg1, %arg1: vector<3xi16>
   return
 }
 
diff --git a/mlir/test/Dialect/SPIRV/Serialization/arithmetic-ops.mlir b/mlir/test/Dialect/SPIRV/Serialization/arithmetic-ops.mlir
index 55c67dafe6bba..9752c0d0e5799 100644
--- a/mlir/test/Dialect/SPIRV/Serialization/arithmetic-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/Serialization/arithmetic-ops.mlir
@@ -71,6 +71,11 @@ spv.module Logical GLSL450 requires #spv.vce<v1.0, [Shader], []> {
     %0 = spv.SMod %arg0, %arg1 : vector<4xi32>
     spv.Return
   }
+  spv.func @snegate(%arg0 : vector<4xi32>) "None" {
+    // CHECK: {{%.*}} = spv.SNegate {{%.*}} : vector<4xi32>
+    %0 = spv.SNegate %arg0 : vector<4xi32>
+    spv.Return
+  }
   spv.func @srem(%arg0 : vector<4xi32>, %arg1 : vector<4xi32>) "None" {
     // CHECK: {{%.*}} = spv.SRem {{%.*}}, {{%.*}} : vector<4xi32>
     %0 = spv.SRem %arg0, %arg1 : vector<4xi32>
diff --git a/mlir/test/Dialect/SPIRV/arithmetic-ops.mlir b/mlir/test/Dialect/SPIRV/arithmetic-ops.mlir
index 85998fb03efdd..de574b1510c9c 100644
--- a/mlir/test/Dialect/SPIRV/arithmetic-ops.mlir
+++ b/mlir/test/Dialect/SPIRV/arithmetic-ops.mlir
@@ -174,6 +174,17 @@ func @smod_scalar(%arg: i32) -> i32 {
 
 // -----
 
+//===----------------------------------------------------------------------===//
+// spv.SNegate
+//===----------------------------------------------------------------------===//
+
+func @snegate_scalar(%arg: i32) -> i32 {
+  // CHECK: spv.SNegate
+  %0 = spv.SNegate %arg : i32
+  return %0 : i32
+}
+
+// -----
 //===----------------------------------------------------------------------===//
 // spv.SRem
 //===----------------------------------------------------------------------===//

From 540277d08440048c5f3239ff7bcc95a505142d82 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 13 Jul 2020 20:18:59 +0000
Subject: [PATCH 149/771] [gn build] Port 9908a3b9f52

---
 llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn       | 1 -
 llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 -
 2 files changed, 2 deletions(-)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
index c13dc723ecd1b..11498ed602984 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
@@ -55,7 +55,6 @@ static_library("Analysis") {
     "InlineAdvisor.cpp",
     "InlineCost.cpp",
     "InlineFeaturesAnalysis.cpp",
-    "InlineSizeEstimatorAnalysis.cpp",
     "InstCount.cpp",
     "InstructionPrecedenceTracking.cpp",
     "InstructionSimplify.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
index 27733f63c2c50..b0dcd497d844e 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
@@ -25,7 +25,6 @@ unittest("AnalysisTests") {
     "GlobalsModRefTest.cpp",
     "IVDescriptorsTest.cpp",
     "InlineFeaturesAnalysisTest.cpp",
-    "InlineSizeEstimatorAnalysisTest.cpp",
     "LazyCallGraphTest.cpp",
     "LoadsTest.cpp",
     "LoopInfoTest.cpp",

From 2f23270af9bbe87859dc228eca63ccbc8986bebd Mon Sep 17 00:00:00 2001
From: Thomas Raoux <thomasraoux@google.com>
Date: Mon, 13 Jul 2020 13:24:27 -0700
Subject: [PATCH 150/771] [mlir] Support operations with multiple results in
 slicing

Right now slicing would assert if an operation with multiple results is in the
slice.

Differential Revision: https://reviews.llvm.org/D83627
---
 mlir/lib/Analysis/SliceAnalysis.cpp         | 36 +++++++++------------
 mlir/test/Dialect/Affine/slicing-utils.mlir | 11 +++++++
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/mlir/lib/Analysis/SliceAnalysis.cpp b/mlir/lib/Analysis/SliceAnalysis.cpp
index a09fcf4bea068..8f5f87ba620ee 100644
--- a/mlir/lib/Analysis/SliceAnalysis.cpp
+++ b/mlir/lib/Analysis/SliceAnalysis.cpp
@@ -41,24 +41,23 @@ static void getForwardSliceImpl(Operation *op,
   }
 
   if (auto forOp = dyn_cast<AffineForOp>(op)) {
-    for (auto *ownerOp : forOp.getInductionVar().getUsers())
-      if (forwardSlice->count(ownerOp) == 0)
-        getForwardSliceImpl(ownerOp, forwardSlice, filter);
+    for (Operation *userOp : forOp.getInductionVar().getUsers())
+      if (forwardSlice->count(userOp) == 0)
+        getForwardSliceImpl(userOp, forwardSlice, filter);
   } else if (auto forOp = dyn_cast<scf::ForOp>(op)) {
-    for (auto *ownerOp : forOp.getInductionVar().getUsers())
-      if (forwardSlice->count(ownerOp) == 0)
-        getForwardSliceImpl(ownerOp, forwardSlice, filter);
-    for (auto result : forOp.getResults())
-      for (auto *ownerOp : result.getUsers())
-        if (forwardSlice->count(ownerOp) == 0)
-          getForwardSliceImpl(ownerOp, forwardSlice, filter);
+    for (Operation *userOp : forOp.getInductionVar().getUsers())
+      if (forwardSlice->count(userOp) == 0)
+        getForwardSliceImpl(userOp, forwardSlice, filter);
+    for (Value result : forOp.getResults())
+      for (Operation *userOp : result.getUsers())
+        if (forwardSlice->count(userOp) == 0)
+          getForwardSliceImpl(userOp, forwardSlice, filter);
   } else {
     assert(op->getNumRegions() == 0 && "unexpected generic op with regions");
-    assert(op->getNumResults() <= 1 && "unexpected multiple results");
-    if (op->getNumResults() > 0) {
-      for (auto *ownerOp : op->getResult(0).getUsers())
-        if (forwardSlice->count(ownerOp) == 0)
-          getForwardSliceImpl(ownerOp, forwardSlice, filter);
+    for (Value result : op->getResults()) {
+      for (Operation *userOp : result.getUsers())
+        if (forwardSlice->count(userOp) == 0)
+          getForwardSliceImpl(userOp, forwardSlice, filter);
     }
   }
 
@@ -172,12 +171,9 @@ struct DFSState {
 } // namespace
 
 static void DFSPostorder(Operation *current, DFSState *state) {
-  assert(current->getNumResults() <= 1 && "NYI: multi-result");
-  if (current->getNumResults() > 0) {
-    for (auto &u : current->getResult(0).getUses()) {
-      auto *op = u.getOwner();
+  for (Value result : current->getResults()) {
+    for (Operation *op : result.getUsers())
       DFSPostorder(op, state);
-    }
   }
   bool inserted;
   using IterTy = decltype(state->seen.begin());
diff --git a/mlir/test/Dialect/Affine/slicing-utils.mlir b/mlir/test/Dialect/Affine/slicing-utils.mlir
index 5cc0c3ddcdfb2..e11a66b0d0ebb 100644
--- a/mlir/test/Dialect/Affine/slicing-utils.mlir
+++ b/mlir/test/Dialect/Affine/slicing-utils.mlir
@@ -274,6 +274,17 @@ func @slicing_test_function_argument(%arg0: index) -> index {
   return %0 : index
 }
 
+// FWD-LABEL: slicing_test_multiple_return
+// BWD-LABEL: slicing_test_multiple_return
+// FWDBWD-LABEL: slicing_test_multiple_return
+func @slicing_test_multiple_return(%arg0: index) -> (index, index) {
+  // BWD: matched: {{.*}} (index, index) -> (index, index) backward static slice:
+  // FWD: matched: %{{.*}}:2 = "slicing-test-op"(%arg0, %arg0) : (index, index) -> (index, index) forward static slice:
+  // FWD: return %{{.*}}#0, %{{.*}}#1 : index, index
+  %0:2 = "slicing-test-op"(%arg0, %arg0): (index, index) -> (index, index)
+  return %0#0, %0#1 : index, index
+}
+
 // This test dumps 2 sets of outputs: first the test outputs themselves followed
 // by the module. These labels isolate the test outputs from the module dump.
 // FWD-LABEL: slicing_test

From f630b8590f0c541ccc8133d84f9464c2b469dda3 Mon Sep 17 00:00:00 2001
From: AlexisPerry <aperry@lanl.gov>
Date: Mon, 13 Jul 2020 10:32:12 -0600
Subject: [PATCH 151/771] [flang] Extended the flang driver options to include
 gfortran equivalents to pgf90 specific options.

Summary: Added gfortran equivalents of pgf90's -Mfixed, -Mfree, -Mextend, -Mstandard, -Munlimited and also added -fdefault-double-8

Reviewers: sscalpone, richard.barton.arm, DavidTruby, clementval, jdoerfert

Reviewed By: sscalpone, richard.barton.arm

Subscribers: sstefan1, llvm-commits, flang-commits

Tags: #llvm, #flang

Differential Revision: https://reviews.llvm.org/D83687
---
 flang/tools/f18/f18.cpp | 36 ++++++++++++++++++++++++------------
 1 file changed, 24 insertions(+), 12 deletions(-)

diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp
index 26682eaa64897..05766a9c6a6dc 100644
--- a/flang/tools/f18/f18.cpp
+++ b/flang/tools/f18/f18.cpp
@@ -446,15 +446,17 @@ int main(int argc, char *const argv[]) {
         args.pop_front();
       }
       break;
-    } else if (arg == "-Mfixed") {
+    } else if (arg == "-Mfixed" || arg == "-ffixed-form") {
       driver.forcedForm = true;
       options.isFixedForm = true;
-    } else if (arg == "-Mfree") {
+    } else if (arg == "-Mfree" || arg == "-ffree-form") {
       driver.forcedForm = true;
       options.isFixedForm = false;
-    } else if (arg == "-Mextend") {
+    } else if (arg == "-Mextend" || arg == "-ffixed-line-length-132") {
       options.fixedFormColumns = 132;
-    } else if (arg == "-Munlimited") {
+    } else if (arg == "-Munlimited" || arg == "-ffree-line-length-none" ||
+        arg == "-ffree-line-length-0" || arg == "-ffixed-line-length-none" ||
+        arg == "-ffixed-line-length-0") {
       // For reparsing f18's -E output of fixed-form cooked character stream
       options.fixedFormColumns = 1000000;
     } else if (arg == "-Mbackslash") {
@@ -463,7 +465,8 @@ int main(int argc, char *const argv[]) {
     } else if (arg == "-Mnobackslash") {
       options.features.Enable(
           Fortran::common::LanguageFeature::BackslashEscapes, true);
-    } else if (arg == "-Mstandard") {
+    } else if (arg == "-Mstandard" || arg == "-std=f95" ||
+        arg == "-std=f2003" || arg == "-std=f2008" || arg == "-std=legacy") {
       driver.warnOnNonstandardUsage = true;
     } else if (arg == "-fopenmp") {
       options.features.Enable(Fortran::common::LanguageFeature::OpenMP);
@@ -530,6 +533,8 @@ int main(int argc, char *const argv[]) {
     } else if (arg.substr(0, 2) == "-U") {
       options.predefinitions.emplace_back(
           arg.substr(2), std::optional<std::string>{});
+    } else if (arg == "-fdefault-double-8") {
+      defaultKinds.set_defaultRealKind(4);
     } else if (arg == "-r8" || arg == "-fdefault-real-8") {
       defaultKinds.set_defaultRealKind(8);
     } else if (arg == "-i8" || arg == "-fdefault-integer-8") {
@@ -580,15 +585,17 @@ int main(int argc, char *const argv[]) {
     } else if (arg == "-help" || arg == "--help" || arg == "-?") {
       llvm::errs()
           << "f18 options:\n"
-          << "  -Mfixed | -Mfree     force the source form\n"
-          << "  -Mextend             132-column fixed form\n"
+          << "  -Mfixed | -Mfree | -ffixed-form | -ffree-form   force the "
+             "source form\n"
+          << "  -Mextend | -ffixed-line-length-132   132-column fixed form\n"
           << "  -f[no-]backslash     enable[disable] \\escapes in literals\n"
           << "  -M[no]backslash      disable[enable] \\escapes in literals\n"
           << "  -Mstandard           enable conformance warnings\n"
+          << "  -std=<standard>      enable conformance warnings\n"
           << "  -fenable=<feature>   enable a language feature\n"
           << "  -fdisable=<feature>  disable a language feature\n"
-          << "  -r8 | -fdefault-real-8 | -i8 | -fdefault-integer-8  "
-             "change default kinds of intrinsic types\n"
+          << "  -r8 | -fdefault-real-8 | -i8 | -fdefault-integer-8 | "
+             "-fdefault-double-8   change default kinds of intrinsic types\n"
           << "  -Werror              treat warnings as errors\n"
           << "  -ed                  enable fixed form D lines\n"
           << "  -E                   prescan & preprocess only\n"
@@ -632,21 +639,26 @@ int main(int argc, char *const argv[]) {
   if (driver.warnOnNonstandardUsage) {
     options.features.WarnOnAllNonstandard();
   }
-  if (options.features.IsEnabled(Fortran::common::LanguageFeature::OpenMP)) {
-    driver.pgf90Args.push_back("-mp");
-  }
   if (isPGF90) {
     if (!options.features.IsEnabled(
             Fortran::common::LanguageFeature::BackslashEscapes)) {
       driver.pgf90Args.push_back(
           "-Mbackslash"); // yes, this *disables* them in pgf90
     }
+    if (options.features.IsEnabled(Fortran::common::LanguageFeature::OpenMP)) {
+      driver.pgf90Args.push_back("-mp");
+    }
+
     Fortran::parser::useHexadecimalEscapeSequences = false;
   } else {
     if (options.features.IsEnabled(
             Fortran::common::LanguageFeature::BackslashEscapes)) {
       driver.pgf90Args.push_back("-fbackslash");
     }
+    if (options.features.IsEnabled(Fortran::common::LanguageFeature::OpenMP)) {
+      driver.pgf90Args.push_back("-fopenmp");
+    }
+
     Fortran::parser::useHexadecimalEscapeSequences = true;
   }
 

From 0d988da6d13e16a397d58bc3b965a36adb7fee03 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Mon, 13 Jul 2020 11:20:27 -0700
Subject: [PATCH 152/771] [MLIR] Change ODS collective params build method to
 provide an empty default value for named attributes

- Provide default value for `ArrayRef<NamedAttribute> attributes` parameter of
  the collective params build method.
- Change the `genSeparateArgParamBuilder` function to not generate build methods
  that may be ambiguous with the new collective params build method.
- This change should help eliminate passing empty NamedAttribue ArrayRef when the
  collective params build method is used
- Extend op-decl.td unit test to make sure the ambiguous build methods are not
  generated.

Differential Revision: https://reviews.llvm.org/D83517
---
 .../ConvertStandardToSPIRV.cpp                | 18 +++----
 mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp  |  7 ++-
 mlir/test/mlir-tblgen/op-decl.td              | 50 +++++++++++++++++
 mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp   | 53 ++++++++++++++++---
 4 files changed, 108 insertions(+), 20 deletions(-)

diff --git a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
index dad8bfc0173f2..e59830fcef89a 100644
--- a/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
+++ b/mlir/lib/Conversion/StandardToSPIRV/ConvertStandardToSPIRV.cpp
@@ -331,8 +331,7 @@ class UnaryAndBinaryOpPattern final : public SPIRVOpLowering<StdOp> {
       return operation.emitError(
           "bitwidth emulation is not implemented yet on unsigned op");
     }
-    rewriter.template replaceOpWithNewOp<SPIRVOp>(operation, dstType, operands,
-                                                  ArrayRef<NamedAttribute>());
+    rewriter.template replaceOpWithNewOp<SPIRVOp>(operation, dstType, operands);
     return success();
   }
 };
@@ -368,11 +367,11 @@ class BitwiseOpPattern final : public SPIRVOpLowering<StdOp> {
     if (!dstType)
       return failure();
     if (isBoolScalarOrVector(operands.front().getType())) {
-      rewriter.template replaceOpWithNewOp<SPIRVLogicalOp>(
-          operation, dstType, operands, ArrayRef<NamedAttribute>());
+      rewriter.template replaceOpWithNewOp<SPIRVLogicalOp>(operation, dstType,
+                                                           operands);
     } else {
-      rewriter.template replaceOpWithNewOp<SPIRVBitwiseOp>(
-          operation, dstType, operands, ArrayRef<NamedAttribute>());
+      rewriter.template replaceOpWithNewOp<SPIRVBitwiseOp>(operation, dstType,
+                                                           operands);
     }
     return success();
   }
@@ -529,8 +528,8 @@ class TypeCastingOpPattern final : public SPIRVOpLowering<StdOp> {
       // Then we can just erase this operation by forwarding its operand.
       rewriter.replaceOp(operation, operands.front());
     } else {
-      rewriter.template replaceOpWithNewOp<SPIRVOp>(
-          operation, dstType, operands, ArrayRef<NamedAttribute>());
+      rewriter.template replaceOpWithNewOp<SPIRVOp>(operation, dstType,
+                                                    operands);
     }
     return success();
   }
@@ -1046,8 +1045,7 @@ XOrOpPattern::matchAndRewrite(XOrOp xorOp, ArrayRef<Value> operands,
   auto dstType = typeConverter.convertType(xorOp.getType());
   if (!dstType)
     return failure();
-  rewriter.replaceOpWithNewOp<spirv::BitwiseXorOp>(xorOp, dstType, operands,
-                                                   ArrayRef<NamedAttribute>());
+  rewriter.replaceOpWithNewOp<spirv::BitwiseXorOp>(xorOp, dstType, operands);
 
   return success();
 }
diff --git a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
index 7aa26541ac279..c6a58a8dc5a80 100644
--- a/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
+++ b/mlir/lib/Target/LLVMIR/ConvertFromLLVMIR.cpp
@@ -418,8 +418,7 @@ Value Importer::processConstant(llvm::Constant *c) {
   }
   if (auto *GV = dyn_cast<llvm::GlobalVariable>(c))
     return bEntry.create<AddressOfOp>(UnknownLoc::get(context),
-                                      processGlobal(GV),
-                                      ArrayRef<NamedAttribute>());
+                                      processGlobal(GV));
 
   if (auto *ce = dyn_cast<llvm::ConstantExpr>(c)) {
     llvm::Instruction *i = ce->getAsInstruction();
@@ -727,7 +726,7 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) {
       if (!calledValue)
         return failure();
       ops.insert(ops.begin(), calledValue);
-      op = b.create<CallOp>(loc, tys, ops, ArrayRef<NamedAttribute>());
+      op = b.create<CallOp>(loc, tys, ops);
     }
     if (!ci->getType()->isVoidTy())
       v = op->getResult(0);
@@ -809,7 +808,7 @@ LogicalResult Importer::processInstruction(llvm::Instruction *inst) {
     Type type = processType(inst->getType());
     if (!type)
       return failure();
-    v = b.create<GEPOp>(loc, type, ops, ArrayRef<NamedAttribute>());
+    v = b.create<GEPOp>(loc, type, ops);
     return success();
   }
   }
diff --git a/mlir/test/mlir-tblgen/op-decl.td b/mlir/test/mlir-tblgen/op-decl.td
index b596eee038291..f8ff60e355574 100644
--- a/mlir/test/mlir-tblgen/op-decl.td
+++ b/mlir/test/mlir-tblgen/op-decl.td
@@ -171,6 +171,56 @@ def NS_GOp : NS_Op<"op_with_fixed_return_type", []> {
 // CHECK-LABEL: class GOp :
 // CHECK: static ::mlir::LogicalResult inferReturnTypes
 
+// Check default value for collective params builder. Check that other builders
+// are generated as well.
+def NS_HCollectiveParamsOp : NS_Op<"op_collective_params", []> {
+  let arguments = (ins AnyType:$a);
+  let results = (outs AnyType:$b);
+}
+
+// CHECK_LABEL: class NS_HCollectiveParamsOp :
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type b, ::mlir::Value a);
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::Value a);
+// CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {})
+
+// Check suppression of "separate arg, separate result" build method for an op
+// with single variadic arg and single variadic result (since it will be
+// ambiguous with the collective params build method).
+def NS_HCollectiveParamsSuppress0Op : NS_Op<"op_collective_suppress0", []> {
+  let arguments = (ins Variadic<I32>:$a);
+  let results = (outs Variadic<I32>:$b);
+}
+
+// CHECK_LABEL: class NS_HCollectiveParamsSuppress0Op :
+// CHECK-NOT: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> b, ::mlir::ValueRange a);
+// CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {});
+
+// Check suppression of "separate arg, collective result" build method for an op
+// with single variadic arg and non variadic result (since it will be
+// ambiguous with the collective params build method).
+def NS_HCollectiveParamsSuppress1Op : NS_Op<"op_collective_suppress1", []> {
+  let arguments = (ins Variadic<I32>:$a);
+  let results = (outs I32:$b);
+}
+
+// CHECK_LABEL: class NS_HCollectiveParamsSuppress1Op :
+// CHECK-NOT: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> b, ::mlir::ValueRange a);
+// CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {});
+
+// Check suppression of "separate arg, collective result" build method for an op
+// with single variadic arg and > 1 variadic result (since it will be
+// ambiguous with the collective params build method). Note that "separate arg,
+// separate result" build method should be generated in this case as its not
+// ambiguous with the collective params build method.
+def NS_HCollectiveParamsSuppress2Op : NS_Op<"op_collective_suppress2", [SameVariadicResultSize]> {
+  let arguments = (ins Variadic<I32>:$a);
+  let results = (outs Variadic<I32>:$b, Variadic<F32>:$c);
+}
+// CHECK_LABEL: class NS_HCollectiveParamsSuppress2Op :
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> b, ::llvm::ArrayRef<::mlir::Type> c, ::mlir::ValueRange a);
+// CHECK-NOT: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> b, ::mlir::ValueRange a);
+// CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {});
+
 // Check that default builders can be suppressed.
 // ---
 
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index b2b4245989b58..5e009e6025243 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -955,14 +955,51 @@ void OpEmitter::genSeparateArgParamBuilder() {
     llvm_unreachable("unhandled TypeParamKind");
   };
 
+  // A separate arg param builder method will have a signature which is
+  // ambiguous with the collective params build method (generated in
+  // `genCollectiveParamBuilder` function below) if it has a single
+  // `ArrayReg<Type>` parameter for result types and a single `ArrayRef<Value>`
+  // parameter for the operands, no parameters after that, and the collective
+  // params build method has `attributes` as its last parameter (with
+  // a default value). This will happen when all of the following are true:
+  // 1. [`attributes` as last parameter in collective params build method]:
+  //    getNumVariadicRegions must be 0 (otherwise the collective params build
+  //    method ends with a `numRegions` param, and we don't specify default
+  //    value for attributes).
+  // 2. [single `ArrayRef<Value>` parameter for operands, and no parameters
+  //    after that]: numArgs() must be 1 (if not, each arg gets a separate param
+  //    in the build methods generated here) and the single arg must be a
+  //    non-attribute variadic argument.
+  // 3. [single `ArrayReg<Type>` parameter for result types]:
+  //      3a. paramKind should be Collective, or
+  //      3b. paramKind should be Separate and there should be a single variadic
+  //          result
+  //
+  // In that case, skip generating such ambiguous build methods here.
+  bool hasSingleVariadicResult =
+      op.getNumResults() == 1 && op.getResult(0).isVariadic();
+
+  bool hasSingleVariadicArg =
+      op.getNumArgs() == 1 &&
+      op.getArg(0).is<tblgen::NamedTypeConstraint *>() &&
+      op.getOperand(0).isVariadic();
+  bool hasNoVariadicRegions = op.getNumVariadicRegions() == 0;
+
   for (auto attrType : attrBuilderType) {
-    emit(attrType, TypeParamKind::Separate, /*inferType=*/false);
+    // Case 3b above.
+    if (!(hasNoVariadicRegions && hasSingleVariadicArg &&
+          hasSingleVariadicResult))
+      emit(attrType, TypeParamKind::Separate, /*inferType=*/false);
     if (canInferType(op))
       emit(attrType, TypeParamKind::None, /*inferType=*/true);
-    // Emit separate arg build with collective type, unless there is only one
-    // variadic result, in which case the above would have already generated
-    // the same build method.
-    if (!(op.getNumResults() == 1 && op.getResult(0).isVariableLength()))
+    // The separate arg + collective param kind method will be:
+    // (a) Same as the separate arg + separate param kind method if there is
+    //     only one variadic result.
+    // (b) Ambiguous with the collective params method under conditions in (3a)
+    //     above.
+    // In either case, skip generating such build method.
+    if (!hasSingleVariadicResult &&
+        !(hasNoVariadicRegions && hasSingleVariadicArg))
       emit(attrType, TypeParamKind::Collective, /*inferType=*/false);
   }
 }
@@ -1184,8 +1221,12 @@ void OpEmitter::genCollectiveParamBuilder() {
       ", ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::ValueRange "
       "operands, "
       "::llvm::ArrayRef<::mlir::NamedAttribute> attributes";
-  if (op.getNumVariadicRegions())
+  if (op.getNumVariadicRegions()) {
     params += ", unsigned numRegions";
+  } else {
+    // Provide default value for `attributes` since its the last parameter
+    params += " = {}";
+  }
   auto &m = opClass.newMethod("void", "build", params, OpMethod::MP_Static);
   auto &body = m.body();
 

From 32d35fb74b2672ddf3674188423b71837afea8c4 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Mon, 13 Jul 2020 13:44:01 -0700
Subject: [PATCH 153/771] [lldb] Remove unused argument (NFC)

Nobody is writing to the stream so there's no point in passing it
around.
---
 lldb/include/lldb/Target/Process.h                          | 4 ++--
 lldb/source/API/SBTarget.cpp                                | 2 +-
 .../Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp | 6 +++---
 lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp    | 2 +-
 lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.h      | 3 +--
 lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp | 3 +--
 lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h   | 2 +-
 lldb/source/Target/Platform.cpp                             | 2 +-
 lldb/source/Target/Process.cpp                              | 6 +++---
 9 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/lldb/include/lldb/Target/Process.h b/lldb/include/lldb/Target/Process.h
index a3fb3447169e6..bf9b64547ed50 100644
--- a/lldb/include/lldb/Target/Process.h
+++ b/lldb/include/lldb/Target/Process.h
@@ -737,7 +737,7 @@ class Process : public std::enable_shared_from_this<Process>,
   ///
   /// \return
   ///     Returns an error object.
-  virtual Status ConnectRemote(Stream *strm, llvm::StringRef remote_url);
+  virtual Status ConnectRemote(llvm::StringRef remote_url);
 
   bool GetShouldDetach() const { return m_should_detach; }
 
@@ -925,7 +925,7 @@ class Process : public std::enable_shared_from_this<Process>,
   ///
   /// \return
   ///     Returns an error object.
-  virtual Status DoConnectRemote(Stream *strm, llvm::StringRef remote_url) {
+  virtual Status DoConnectRemote(llvm::StringRef remote_url) {
     Status error;
     error.SetErrorString("remote connections are not supported");
     return error;
diff --git a/lldb/source/API/SBTarget.cpp b/lldb/source/API/SBTarget.cpp
index ca75e91bd9069..b84e9f10fafe9 100644
--- a/lldb/source/API/SBTarget.cpp
+++ b/lldb/source/API/SBTarget.cpp
@@ -566,7 +566,7 @@ lldb::SBProcess SBTarget::ConnectRemote(SBListener &listener, const char *url,
 
     if (process_sp) {
       sb_process.SetSP(process_sp);
-      error.SetError(process_sp->ConnectRemote(nullptr, url));
+      error.SetError(process_sp->ConnectRemote(url));
     } else {
       error.SetErrorString("unable to create lldb_private::Process");
     }
diff --git a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
index 18631a0c53156..21bf7f4ac46d3 100644
--- a/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
+++ b/lldb/source/Plugins/Platform/gdb-server/PlatformRemoteGDBServer.cpp
@@ -503,10 +503,10 @@ lldb::ProcessSP PlatformRemoteGDBServer::DebugProcess(
                                              "gdb-remote", nullptr);
 
           if (process_sp) {
-            error = process_sp->ConnectRemote(nullptr, connect_url.c_str());
+            error = process_sp->ConnectRemote(connect_url.c_str());
             // Retry the connect remote one time...
             if (error.Fail())
-              error = process_sp->ConnectRemote(nullptr, connect_url.c_str());
+              error = process_sp->ConnectRemote(connect_url.c_str());
             if (error.Success())
               error = process_sp->Launch(launch_info);
             else if (debugserver_pid != LLDB_INVALID_PROCESS_ID) {
@@ -589,7 +589,7 @@ lldb::ProcessSP PlatformRemoteGDBServer::Attach(
               target->CreateProcess(attach_info.GetListenerForProcess(debugger),
                                     "gdb-remote", nullptr);
           if (process_sp) {
-            error = process_sp->ConnectRemote(nullptr, connect_url.c_str());
+            error = process_sp->ConnectRemote(connect_url.c_str());
             if (error.Success()) {
               ListenerSP listener_sp = attach_info.GetHijackListener();
               if (listener_sp)
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp
index 5b728a5f2960f..2f4a8917a78a7 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp
@@ -217,7 +217,7 @@ bool ProcessKDP::GetHostArchitecture(ArchSpec &arch) {
   return false;
 }
 
-Status ProcessKDP::DoConnectRemote(Stream *strm, llvm::StringRef remote_url) {
+Status ProcessKDP::DoConnectRemote(llvm::StringRef remote_url) {
   Status error;
 
   // Don't let any JIT happen when doing KDP as we can't allocate memory and we
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.h b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.h
index 67f8ac0698204..52af56134404c 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.h
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.h
@@ -67,8 +67,7 @@ class ProcessKDP : public lldb_private::Process {
   WillAttachToProcessWithName(const char *process_name,
                               bool wait_for_launch) override;
 
-  lldb_private::Status DoConnectRemote(lldb_private::Stream *strm,
-                                       llvm::StringRef remote_url) override;
+  lldb_private::Status DoConnectRemote(llvm::StringRef remote_url) override;
 
   lldb_private::Status DoAttachToProcessWithID(
       lldb::pid_t pid,
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
index ff263fa162587..1fed8e0642670 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.cpp
@@ -629,8 +629,7 @@ Status ProcessGDBRemote::WillAttachToProcessWithName(const char *process_name,
   return WillLaunchOrAttach();
 }
 
-Status ProcessGDBRemote::DoConnectRemote(Stream *strm,
-                                         llvm::StringRef remote_url) {
+Status ProcessGDBRemote::DoConnectRemote(llvm::StringRef remote_url) {
   Log *log(ProcessGDBRemoteLog::GetLogIfAllCategoriesSet(GDBR_LOG_PROCESS));
   Status error(WillLaunchOrAttach());
 
diff --git a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
index 22d86d6cdd75d..ba967727ae3b8 100644
--- a/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
+++ b/lldb/source/Plugins/Process/gdb-remote/ProcessGDBRemote.h
@@ -85,7 +85,7 @@ class ProcessGDBRemote : public Process,
   Status WillAttachToProcessWithName(const char *process_name,
                                      bool wait_for_launch) override;
 
-  Status DoConnectRemote(Stream *strm, llvm::StringRef remote_url) override;
+  Status DoConnectRemote(llvm::StringRef remote_url) override;
 
   Status WillLaunchOrAttach();
 
diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
index 8d4bea2148147..95c35ea826a07 100644
--- a/lldb/source/Target/Platform.cpp
+++ b/lldb/source/Target/Platform.cpp
@@ -1806,7 +1806,7 @@ lldb::ProcessSP Platform::ConnectProcess(llvm::StringRef connect_url,
   if (!process_sp)
     return nullptr;
 
-  error = process_sp->ConnectRemote(&debugger.GetOutputStream(), connect_url);
+  error = process_sp->ConnectRemote(connect_url);
   if (error.Fail())
     return nullptr;
 
diff --git a/lldb/source/Target/Process.cpp b/lldb/source/Target/Process.cpp
index 78f75981a94dd..d777a27139119 100644
--- a/lldb/source/Target/Process.cpp
+++ b/lldb/source/Target/Process.cpp
@@ -1270,7 +1270,7 @@ void Process::UpdateThreadListIfNeeded() {
           for (size_t i = 0; i < num_old_threads; ++i)
             old_thread_list.GetThreadAtIndex(i, false)->ClearBackingThread();
           // See if the OS plugin reports all threads.  If it does, then
-          // it is safe to clear unseen thread's plans here.  Otherwise we 
+          // it is safe to clear unseen thread's plans here.  Otherwise we
           // should preserve them in case they show up again:
           clear_unused_threads = GetOSPluginReportsAllThreads();
 
@@ -3096,14 +3096,14 @@ void Process::CompleteAttach() {
   }
 }
 
-Status Process::ConnectRemote(Stream *strm, llvm::StringRef remote_url) {
+Status Process::ConnectRemote(llvm::StringRef remote_url) {
   m_abi_sp.reset();
   m_process_input_reader.reset();
 
   // Find the process and its architecture.  Make sure it matches the
   // architecture of the current Target, and if not adjust it.
 
-  Status error(DoConnectRemote(strm, remote_url));
+  Status error(DoConnectRemote(remote_url));
   if (error.Success()) {
     if (GetID() != LLDB_INVALID_PROCESS_ID) {
       EventSP event_sp;

From 77c9aafc5d85a816c20d1f1fb176024bc0b8d0fe Mon Sep 17 00:00:00 2001
From: Walter Erquinigo <wallace@fb.com>
Date: Mon, 13 Jul 2020 12:10:49 -0700
Subject: [PATCH 154/771] Retry ""[lldb-vscode] Fix TestVSCode_module""

Original commit c60216db15132401ff60c08ccef899321f63b6b6.

The test can only run on Darwin because of how it was setup, so I'm
enforcing that.

Summary:

Test Plan:

Reviewers:

Subscribers:

Tasks:

Tags:
---
 .../test/tools/lldb-vscode/vscode.py          | 23 +++++
 .../API/tools/lldb-vscode/module/Makefile     | 13 +++
 .../lldb-vscode/module/TestVSCode_module.py   | 72 +++++++++++++++
 .../test/API/tools/lldb-vscode/module/foo.cpp |  3 +
 lldb/test/API/tools/lldb-vscode/module/foo.h  |  1 +
 .../API/tools/lldb-vscode/module/main.cpp     |  6 ++
 lldb/tools/lldb-vscode/JSONUtils.cpp          | 44 +++++++++
 lldb/tools/lldb-vscode/JSONUtils.h            | 13 +++
 lldb/tools/lldb-vscode/VSCode.cpp             |  5 +
 lldb/tools/lldb-vscode/lldb-vscode.cpp        | 92 +++++++++++++++++++
 10 files changed, 272 insertions(+)
 create mode 100644 lldb/test/API/tools/lldb-vscode/module/Makefile
 create mode 100644 lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py
 create mode 100644 lldb/test/API/tools/lldb-vscode/module/foo.cpp
 create mode 100644 lldb/test/API/tools/lldb-vscode/module/foo.h
 create mode 100644 lldb/test/API/tools/lldb-vscode/module/main.cpp

diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py
index 1ad168e794cff..6b1c1c961b545 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/vscode.py
@@ -113,6 +113,7 @@ def __init__(self, recv, send, init_commands):
         self.initialize_body = None
         self.thread_stop_reasons = {}
         self.breakpoint_events = []
+        self.module_events = {}
         self.sequence = 1
         self.threads = None
         self.recv_thread.start()
@@ -133,6 +134,9 @@ def validate_response(cls, command, response):
         if command['seq'] != response['request_seq']:
             raise ValueError('seq mismatch in response')
 
+    def get_active_modules(self):
+        return self.module_events
+        
     def get_output(self, category, timeout=0.0, clear=True):
         self.output_condition.acquire()
         output = None
@@ -218,6 +222,15 @@ def handle_recv_packet(self, packet):
                 self.breakpoint_events.append(packet)
                 # no need to add 'breakpoint' event packets to our packets list
                 return keepGoing
+            elif event == 'module':
+                reason = body['reason']
+                if (reason == 'new' or reason == 'changed'):
+                    self.module_events[body['module']['name']] = body['module']
+                elif reason == 'removed':
+                    if body['module']['name'] in self.module_events:
+                        self.module_events.pop(body['module']['name'])
+                return keepGoing
+
         elif packet_type == 'response':
             if packet['command'] == 'disconnect':
                 keepGoing = False
@@ -747,6 +760,16 @@ def request_setFunctionBreakpoints(self, names, condition=None,
         }
         return self.send_recv(command_dict)
 
+    def request_getCompileUnits(self, moduleId):
+        args_dict = {'moduleId': moduleId}
+        command_dict = {
+            'command': 'getCompileUnits',
+            'type': 'request',
+            'arguments': args_dict
+        }
+        response = self.send_recv(command_dict)
+        return response
+        
     def request_completions(self, text):
         args_dict = {
             'text': text,
diff --git a/lldb/test/API/tools/lldb-vscode/module/Makefile b/lldb/test/API/tools/lldb-vscode/module/Makefile
new file mode 100644
index 0000000000000..1fb944b138937
--- /dev/null
+++ b/lldb/test/API/tools/lldb-vscode/module/Makefile
@@ -0,0 +1,13 @@
+DYLIB_NAME := foo
+DYLIB_CXX_SOURCES := foo.cpp
+CXX_SOURCES := main.cpp
+
+all: a.out.stripped
+
+include Makefile.rules
+
+a.out.stripped: a.out.dSYM
+	strip -o a.out.stripped a.out
+ifneq "$(CODESIGN)" ""
+	$(CODESIGN) -fs - a.out.stripped
+endif
diff --git a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py
new file mode 100644
index 0000000000000..461ac201a73f4
--- /dev/null
+++ b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py
@@ -0,0 +1,72 @@
+"""
+Test lldb-vscode setBreakpoints request
+"""
+
+from __future__ import print_function
+
+import unittest2
+import vscode
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+import lldbvscode_testcase
+
+
+class TestVSCode_module(lldbvscode_testcase.VSCodeTestCaseBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+
+    @skipIfWindows
+    @skipUnlessDarwin
+    @skipIfRemote
+    def test_modules_event(self):
+        program_basename = "a.out.stripped"
+        program= self.getBuildArtifact(program_basename)
+        self.build_and_launch(program)
+        functions = ['foo']
+        breakpoint_ids = self.set_function_breakpoints(functions)
+        self.assertEquals(len(breakpoint_ids), len(functions),
+                        'expect one breakpoint')
+        self.continue_to_breakpoints(breakpoint_ids)
+        active_modules = self.vscode.get_active_modules()
+        self.assertIn(program_basename, active_modules, '%s module is in active modules' % (program_basename))
+        program_module = active_modules[program_basename]
+        self.assertIn('name', program_module, 'make sure name is in module')
+        self.assertEqual(program_basename, program_module['name'])
+        self.assertIn('path', program_module, 'make sure path is in module')
+        self.assertEqual(program, program_module['path'])
+        self.assertTrue('symbolFilePath' not in program_module, 'Make sure a.out.stripped has no debug info')
+        self.assertEqual('Symbols not found.', program_module['symbolStatus'])
+        symbol_path = self.getBuildArtifact("a.out")
+        self.vscode.request_evaluate('`%s' % ('target symbols add -s "%s" "%s"' % (program, symbol_path)))
+        active_modules = self.vscode.get_active_modules()
+        program_module = active_modules[program_basename]
+        self.assertEqual(program_basename, program_module['name'])
+        self.assertEqual(program, program_module['path'])
+        self.assertEqual('Symbols loaded.', program_module['symbolStatus'])
+        self.assertIn('symbolFilePath', program_module)
+        self.assertEqual(symbol_path, program_module['symbolFilePath'])
+        self.assertIn('addressRange', program_module)
+
+    @skipIfWindows
+    @skipUnlessDarwin
+    @skipIfRemote
+    def test_compile_units(self):
+        program= self.getBuildArtifact("a.out")
+        self.build_and_launch(program)
+        source = "main.cpp"
+        main_source_path = self.getSourcePath(source)
+        breakpoint1_line = line_number(source, '// breakpoint 1')
+        lines = [breakpoint1_line]
+        breakpoint_ids = self.set_source_breakpoints(source, lines)
+        self.continue_to_breakpoints(breakpoint_ids)
+        moduleId = self.vscode.get_active_modules()['a.out']['id']
+        response = self.vscode.request_getCompileUnits(moduleId)
+        print(response['body'])
+        self.assertTrue(response['body'])
+        self.assertTrue(len(response['body']['compileUnits']) == 1,
+                        'Only one source file should exist')
+        self.assertTrue(response['body']['compileUnits'][0]['compileUnitPath'] == main_source_path,
+                        'Real path to main.cpp matches')
+
diff --git a/lldb/test/API/tools/lldb-vscode/module/foo.cpp b/lldb/test/API/tools/lldb-vscode/module/foo.cpp
new file mode 100644
index 0000000000000..9dba85a9cccab
--- /dev/null
+++ b/lldb/test/API/tools/lldb-vscode/module/foo.cpp
@@ -0,0 +1,3 @@
+int foo() {
+    return 12;
+}
diff --git a/lldb/test/API/tools/lldb-vscode/module/foo.h b/lldb/test/API/tools/lldb-vscode/module/foo.h
new file mode 100644
index 0000000000000..5d5f8f0c9e786
--- /dev/null
+++ b/lldb/test/API/tools/lldb-vscode/module/foo.h
@@ -0,0 +1 @@
+int foo();
diff --git a/lldb/test/API/tools/lldb-vscode/module/main.cpp b/lldb/test/API/tools/lldb-vscode/module/main.cpp
new file mode 100644
index 0000000000000..4ff2b2360eb97
--- /dev/null
+++ b/lldb/test/API/tools/lldb-vscode/module/main.cpp
@@ -0,0 +1,6 @@
+#include "foo.h"
+
+int main(int argc, char const *argv[]) {
+  foo();
+  return 0; // breakpoint 1
+}
diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp
index 8fcf179b29aad..86c29fb238112 100644
--- a/lldb/tools/lldb-vscode/JSONUtils.cpp
+++ b/lldb/tools/lldb-vscode/JSONUtils.cpp
@@ -327,6 +327,41 @@ llvm::json::Value CreateBreakpoint(lldb::SBBreakpoint &bp,
   return llvm::json::Value(std::move(object));
 }
 
+llvm::json::Value CreateModule(lldb::SBModule &module) {
+  llvm::json::Object object;
+  if (!module.IsValid())
+    return llvm::json::Value(std::move(object));
+  object.try_emplace("id", std::string(module.GetUUIDString()));
+  object.try_emplace("name", std::string(module.GetFileSpec().GetFilename()));
+  char module_path_arr[PATH_MAX];
+  module.GetFileSpec().GetPath(module_path_arr, sizeof(module_path_arr));
+  std::string module_path(module_path_arr);
+  object.try_emplace("path", module_path);
+  if (module.GetNumCompileUnits() > 0) {
+    object.try_emplace("symbolStatus", "Symbols loaded.");
+    char symbol_path_arr[PATH_MAX];
+    module.GetSymbolFileSpec().GetPath(symbol_path_arr, sizeof(symbol_path_arr));
+    std::string symbol_path(symbol_path_arr);
+    object.try_emplace("symbolFilePath", symbol_path);
+  } else {
+    object.try_emplace("symbolStatus", "Symbols not found.");
+  }
+  std::string loaded_addr = std::to_string(
+      module.GetObjectFileHeaderAddress().GetLoadAddress(g_vsc.target));
+  object.try_emplace("addressRange", loaded_addr);
+  std::string version_str;
+  uint32_t version_nums[3];
+  uint32_t num_versions = module.GetVersion(version_nums, sizeof(version_nums)/sizeof(uint32_t));
+  for (uint32_t i=0; i<num_versions; ++i) {
+    if (!version_str.empty())
+      version_str += ".";
+    version_str += std::to_string(version_nums[i]);
+  }
+  if (!version_str.empty())
+    object.try_emplace("version", version_str);
+  return llvm::json::Value(std::move(object));
+}
+
 void AppendBreakpoint(lldb::SBBreakpoint &bp, llvm::json::Array &breakpoints,
                       llvm::Optional<llvm::StringRef> request_path,
                       llvm::Optional<uint32_t> request_line) {
@@ -902,4 +937,13 @@ llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference,
   return llvm::json::Value(std::move(object));
 }
 
+llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit) {
+  llvm::json::Object object;
+  char unit_path_arr[PATH_MAX];
+  unit.GetFileSpec().GetPath(unit_path_arr, sizeof(unit_path_arr));
+  std::string unit_path(unit_path_arr);
+  object.try_emplace("compileUnitPath", unit_path);
+  return llvm::json::Value(std::move(object));
+}
+
 } // namespace lldb_vscode
diff --git a/lldb/tools/lldb-vscode/JSONUtils.h b/lldb/tools/lldb-vscode/JSONUtils.h
index af76683d11cc8..e2ccfdb1fb2b6 100644
--- a/lldb/tools/lldb-vscode/JSONUtils.h
+++ b/lldb/tools/lldb-vscode/JSONUtils.h
@@ -13,6 +13,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/JSON.h"
 #include "VSCodeForward.h"
+#include "lldb/API/SBModule.h"
 
 namespace lldb_vscode {
 
@@ -237,6 +238,16 @@ CreateBreakpoint(lldb::SBBreakpoint &bp,
                  llvm::Optional<llvm::StringRef> request_path = llvm::None,
                  llvm::Optional<uint32_t> request_line = llvm::None);
 
+/// Converts a LLDB module to a VS Code DAP module for use in "modules" events.
+///
+/// \param[in] module
+///     A LLDB module object to convert into a JSON value
+///
+/// \return
+///     A "Module" JSON object with that follows the formal JSON
+///     definition outlined by Microsoft.
+llvm::json::Value CreateModule(lldb::SBModule &module);
+
 /// Create a "Event" JSON object using \a event_name as the event name
 ///
 /// \param[in] event_name
@@ -430,6 +441,8 @@ llvm::json::Value CreateThreadStopped(lldb::SBThread &thread, uint32_t stop_id);
 llvm::json::Value CreateVariable(lldb::SBValue v, int64_t variablesReference,
                                  int64_t varID, bool format_hex);
 
+llvm::json::Value CreateCompileUnit(lldb::SBCompileUnit unit);
+
 } // namespace lldb_vscode
 
 #endif
diff --git a/lldb/tools/lldb-vscode/VSCode.cpp b/lldb/tools/lldb-vscode/VSCode.cpp
index b2d16f96d1f2b..4a30aef3a6db4 100644
--- a/lldb/tools/lldb-vscode/VSCode.cpp
+++ b/lldb/tools/lldb-vscode/VSCode.cpp
@@ -358,6 +358,11 @@ void VSCode::SetTarget(const lldb::SBTarget target) {
         lldb::SBTarget::eBroadcastBitBreakpointChanged);
     listener.StartListeningForEvents(this->broadcaster,
                                      eBroadcastBitStopEventThread);
+    listener.StartListeningForEvents(
+      this->target.GetBroadcaster(),
+      lldb::SBTarget::eBroadcastBitModulesLoaded |
+          lldb::SBTarget::eBroadcastBitModulesUnloaded |
+          lldb::SBTarget::eBroadcastBitSymbolsLoaded);                                
   }
 }
 
diff --git a/lldb/tools/lldb-vscode/lldb-vscode.cpp b/lldb/tools/lldb-vscode/lldb-vscode.cpp
index 168873f827527..27ee832677d72 100644
--- a/lldb/tools/lldb-vscode/lldb-vscode.cpp
+++ b/lldb/tools/lldb-vscode/lldb-vscode.cpp
@@ -39,6 +39,7 @@
 #include <set>
 #include <sstream>
 #include <thread>
+#include <vector>
 
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/Option/Arg.h"
@@ -434,6 +435,30 @@ void EventThreadFunction() {
             g_vsc.SendJSON(llvm::json::Value(std::move(bp_event)));
           }
         }
+      } else if (lldb::SBTarget::EventIsTargetEvent(event)) {
+        if (event_mask & lldb::SBTarget::eBroadcastBitModulesLoaded ||
+            event_mask & lldb::SBTarget::eBroadcastBitModulesUnloaded ||
+            event_mask & lldb::SBTarget::eBroadcastBitSymbolsLoaded) {
+          int num_modules = lldb::SBTarget::GetNumModulesFromEvent(event);
+          for (int i = 0; i < num_modules; i++) {
+            auto module = lldb::SBTarget::GetModuleAtIndexFromEvent(i, event);
+            auto module_event = CreateEventObject("module");
+            llvm::json::Value module_value = CreateModule(module);
+            llvm::json::Object body;
+            if (event_mask & lldb::SBTarget::eBroadcastBitModulesLoaded) {
+              body.try_emplace("reason", "new");
+            } else if (event_mask &
+                        lldb::SBTarget::eBroadcastBitModulesUnloaded) {
+              body.try_emplace("reason", "removed");
+            } else if (event_mask &
+                        lldb::SBTarget::eBroadcastBitSymbolsLoaded) {
+              body.try_emplace("reason", "changed");
+            }
+            body.try_emplace("module", module_value);
+            module_event.try_emplace("body", std::move(body));
+            g_vsc.SendJSON(llvm::json::Value(std::move(module_event)));
+          }
+        }
       } else if (event.BroadcasterMatchesRef(g_vsc.broadcaster)) {
         if (event_mask & eBroadcastBitStopEventThread) {
           done = true;
@@ -1149,6 +1174,72 @@ void request_evaluate(const llvm::json::Object &request) {
   g_vsc.SendJSON(llvm::json::Value(std::move(response)));
 }
 
+// "getCompileUnitsRequest": {
+//   "allOf": [ { "$ref": "#/definitions/Request" }, {
+//     "type": "object",
+//     "description": "Compile Unit request; value of command field is
+//                     'getCompileUnits'.",
+//     "properties": {
+//       "command": {
+//         "type": "string",
+//         "enum": [ "getCompileUnits" ]
+//       },
+//       "arguments": {
+//         "$ref": "#/definitions/getCompileUnitRequestArguments"
+//       }
+//     },
+//     "required": [ "command", "arguments" ]
+//   }]
+// },
+// "getCompileUnitsRequestArguments": {
+//   "type": "object",
+//   "description": "Arguments for 'getCompileUnits' request.",
+//   "properties": {
+//     "moduleId": {
+//       "type": "string",
+//       "description": "The ID of the module."
+//     }
+//   },
+//   "required": [ "moduleId" ]
+// },
+// "getCompileUnitsResponse": {
+//   "allOf": [ { "$ref": "#/definitions/Response" }, {
+//     "type": "object",
+//     "description": "Response to 'getCompileUnits' request.",
+//     "properties": {
+//       "body": {
+//         "description": "Response to 'getCompileUnits' request. Array of
+//                         paths of compile units."
+//       }
+//     }
+//   }]
+// }
+
+void request_getCompileUnits(const llvm::json::Object &request) {
+  llvm::json::Object response;
+  FillResponse(request, response);
+  lldb::SBProcess process = g_vsc.target.GetProcess();
+  llvm::json::Object body;
+  llvm::json::Array units;
+  auto arguments = request.getObject("arguments");
+  std::string module_id = std::string(GetString(arguments, "moduleId"));
+  int num_modules = g_vsc.target.GetNumModules();
+  for (int i = 0; i < num_modules; i++) {
+    auto curr_module = g_vsc.target.GetModuleAtIndex(i);
+    if (module_id == curr_module.GetUUIDString()) {
+      int num_units = curr_module.GetNumCompileUnits();
+      for (int j = 0; j < num_units; j++) {
+        auto curr_unit = curr_module.GetCompileUnitAtIndex(j);\
+        units.emplace_back(CreateCompileUnit(curr_unit));\
+      }
+      body.try_emplace("compileUnits", std::move(units));
+      break;
+    }
+  }
+  response.try_emplace("body", std::move(body));
+  g_vsc.SendJSON(llvm::json::Value(std::move(response)));
+}
+
 // "InitializeRequest": {
 //   "allOf": [ { "$ref": "#/definitions/Request" }, {
 //     "type": "object",
@@ -2734,6 +2825,7 @@ const std::map<std::string, RequestCallback> &GetRequestHandlers() {
       REQUEST_CALLBACK(disconnect),
       REQUEST_CALLBACK(evaluate),
       REQUEST_CALLBACK(exceptionInfo),
+      REQUEST_CALLBACK(getCompileUnits),
       REQUEST_CALLBACK(initialize),
       REQUEST_CALLBACK(launch),
       REQUEST_CALLBACK(next),

From b9c2dd11a5139b754afca050effac80f3b638bc8 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Fri, 19 Jun 2020 20:25:33 +0200
Subject: [PATCH 155/771] [ADT] Specialize std::swap() for SetVector

This is intended to address a compile-time regression from
1eddce4177cfddc86d4696b758904443b0b4f193. A SmallPtrSet was
replaced with a SetVector there, which had an unexpected large
compile-time impact. It turns out that this structure is getting
swapped a lot, and previously this used an optimized std::swap()
specialization for SmallPtrSet. Now it ends up using the default,
triple-move based implementation, which is much more expensive.

This patch (partly) addresses the issue by specializing std::swap()
for SetVector.

Differential Revision: https://reviews.llvm.org/D82230
---
 llvm/include/llvm/ADT/SetVector.h | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h
index 901d5b1bcd90f..91ad72143ed35 100644
--- a/llvm/include/llvm/ADT/SetVector.h
+++ b/llvm/include/llvm/ADT/SetVector.h
@@ -263,6 +263,11 @@ class SetVector {
       remove(*SI);
   }
 
+  void swap(SetVector<T, Vector, Set> &RHS) {
+    set_.swap(RHS.set_);
+    vector_.swap(RHS.vector_);
+  }
+
 private:
   /// A wrapper predicate designed for use with std::remove_if.
   ///
@@ -308,4 +313,22 @@ class SmallSetVector
 
 } // end namespace llvm
 
+namespace std {
+
+/// Implement std::swap in terms of SetVector swap.
+template<typename T, typename V, typename S>
+inline void
+swap(llvm::SetVector<T, V, S> &LHS, llvm::SetVector<T, V, S> &RHS) {
+  LHS.swap(RHS);
+}
+
+/// Implement std::swap in terms of SmallSetVector swap.
+template<typename T, unsigned N>
+inline void
+swap(llvm::SmallSetVector<T, N> &LHS, llvm::SmallSetVector<T, N> &RHS) {
+  LHS.swap(RHS);
+}
+
+} // end namespace std
+
 #endif // LLVM_ADT_SETVECTOR_H

From affbc0cd1cc87826c2636f8903d85c911aef75ff Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <ntv@google.com>
Date: Mon, 13 Jul 2020 11:04:09 -0400
Subject: [PATCH 156/771] [mlir] Add alignment attribute to LLVM memory ops and
 use in vector.transfer

Summary: The native alignment may generally not be used when lowering a vector.transfer to the underlying load/store operation. This revision fixes the unmasked load/store alignment to match that of the masked path.

Differential Revision: https://reviews.llvm.org/D83684
---
 mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td   | 88 ++++++++++++++-----
 .../Vector/CPU/test-transfer-read.mlir        | 13 +++
 .../Vector/CPU/test-transfer-write.mlir       | 16 ++--
 .../VectorToLLVM/ConvertVectorToLLVM.cpp      | 11 ++-
 .../VectorToLLVM/vector-to-llvm.mlir          |  2 +-
 5 files changed, 97 insertions(+), 33 deletions(-)

diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
index 663a820905ce3..ce0b3de82d2c9 100644
--- a/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
+++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMOps.td
@@ -215,19 +215,36 @@ def LLVM_FDivOp : LLVM_ArithmeticOp<"fdiv", "CreateFDiv">;
 def LLVM_FRemOp : LLVM_ArithmeticOp<"frem", "CreateFRem">;
 def LLVM_FNegOp : LLVM_UnaryArithmeticOp<"fneg", "CreateFNeg">;
 
+// Common code definition that is used to verify and set the alignment attribute
+// of LLVM ops that accept such an attribute.
+class MemoryOpWithAlignmentBase {
+  code alignmentVerifierCode = [{
+    if (alignment().hasValue()) {
+      auto align = alignment().getValue().getSExtValue();
+      if (align < 0)
+        return emitOpError("expected positive alignment");
+    }
+    return success();
+  }];
+  code setAlignmentCode = [{
+    if ($alignment.hasValue()) {
+      auto align = $alignment.getValue().getZExtValue();
+      if (align != 0)
+        inst->setAlignment(llvm::Align(align));
+    }
+  }];
+}
+
 // Memory-related operations.
 def LLVM_AllocaOp :
+    MemoryOpWithAlignmentBase,
     LLVM_OneResultOp<"alloca">,
     Arguments<(ins LLVM_Type:$arraySize, OptionalAttr<I64Attr>:$alignment)> {
   string llvmBuilder = [{
-    auto *alloca = builder.CreateAlloca(
+    auto *inst = builder.CreateAlloca(
       $_resultType->getPointerElementType(), $arraySize);
-    if ($alignment.hasValue()) {
-      auto align = $alignment.getValue().getZExtValue();
-      if (align != 0)
-        alloca->setAlignment(llvm::Align(align));
-    }
-    $res = alloca;
+    }] # setAlignmentCode # [{
+    $res = inst;
   }];
   let builders = [OpBuilder<
     "OpBuilder &b, OperationState &result, Type resultType, Value arraySize, "
@@ -239,14 +256,7 @@ def LLVM_AllocaOp :
   }]>];
   let parser = [{ return parseAllocaOp(parser, result); }];
   let printer = [{ printAllocaOp(p, *this); }];
-  let verifier = [{
-    if (alignment().hasValue()) {
-      auto align = alignment().getValue().getSExtValue();
-      if (align < 0)
-        return emitOpError("expected positive alignment");
-    }
-    return success();
-  }];
+  let verifier = alignmentVerifierCode;
 }
 def LLVM_GEPOp : LLVM_OneResultOp<"getelementptr", [NoSideEffect]>,
                  Arguments<(ins LLVM_Type:$base, Variadic<LLVM_Type>:$indices)>,
@@ -255,22 +265,56 @@ def LLVM_GEPOp : LLVM_OneResultOp<"getelementptr", [NoSideEffect]>,
     $base `[` $indices `]` attr-dict `:` functional-type(operands, results)
   }];
 }
-def LLVM_LoadOp : LLVM_OneResultOp<"load">, Arguments<(ins LLVM_Type:$addr)>,
-                  LLVM_Builder<"$res = builder.CreateLoad($addr);"> {
+def LLVM_LoadOp :
+    MemoryOpWithAlignmentBase,
+    LLVM_OneResultOp<"load">,
+    Arguments<(ins LLVM_Type:$addr, OptionalAttr<I64Attr>:$alignment)> {
+  string llvmBuilder = [{
+    auto *inst = builder.CreateLoad($addr);
+  }] # setAlignmentCode # [{
+    $res = inst;
+  }];
   let builders = [OpBuilder<
-    "OpBuilder &b, OperationState &result, Value addr",
+    "OpBuilder &b, OperationState &result, Value addr, unsigned alignment = 0",
     [{
       auto type = addr.getType().cast<LLVM::LLVMType>().getPointerElementTy();
-      build(b, result, type, addr);
+      build(b, result, type, addr, alignment);
+    }]>,
+    OpBuilder<
+    "OpBuilder &b, OperationState &result, Type t, Value addr, "
+    "unsigned alignment = 0",
+    [{
+      if (alignment == 0)
+        return build(b, result, t, addr, IntegerAttr());
+      build(b, result, t, addr, b.getI64IntegerAttr(alignment));
     }]>];
   let parser = [{ return parseLoadOp(parser, result); }];
   let printer = [{ printLoadOp(p, *this); }];
+  let verifier = alignmentVerifierCode;
 }
-def LLVM_StoreOp : LLVM_ZeroResultOp<"store">,
-                   Arguments<(ins LLVM_Type:$value, LLVM_Type:$addr)>,
-                   LLVM_Builder<"builder.CreateStore($value, $addr);"> {
+def LLVM_StoreOp :
+    MemoryOpWithAlignmentBase,
+    LLVM_ZeroResultOp<"store">,
+    Arguments<(ins LLVM_Type:$value,
+                   LLVM_Type:$addr,
+                   OptionalAttr<I64Attr>:$alignment)> {
+  string llvmBuilder = [{
+    auto *inst = builder.CreateStore($value, $addr);
+  }] # setAlignmentCode;
+  let builders = [
+    OpBuilder<
+    "OpBuilder &b, OperationState &result, Value value, Value addr, "
+    "unsigned alignment = 0",
+    [{
+      if (alignment == 0)
+        return build(b, result, ArrayRef<Type>{}, value, addr, IntegerAttr());
+      build(b, result, ArrayRef<Type>{}, value, addr, 
+            b.getI64IntegerAttr(alignment));
+    }]
+  >];
   let parser = [{ return parseStoreOp(parser, result); }];
   let printer = [{ printStoreOp(p, *this); }];
+  let verifier = alignmentVerifierCode;
 }
 
 // Casts.
diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read.mlir
index f8934f06c0fd7..e6fa0df1ed7e3 100644
--- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read.mlir
+++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-read.mlir
@@ -12,6 +12,15 @@ func @transfer_read_1d(%A : memref<?xf32>, %base: index) {
   return
 }
 
+func @transfer_read_unmasked_4(%A : memref<?xf32>, %base: index) {
+  %fm42 = constant -42.0: f32
+  %f = vector.transfer_read %A[%base], %fm42
+      {permutation_map = affine_map<(d0) -> (d0)>, masked = [false]} :
+    memref<?xf32>, vector<4xf32>
+  vector.print %f: vector<4xf32>
+  return
+}
+
 func @transfer_write_1d(%A : memref<?xf32>, %base: index) {
   %f0 = constant 0.0 : f32
   %vf0 = splat %f0 : vector<4xf32>
@@ -44,8 +53,12 @@ func @entry() {
   // Read shifted by 0 and pad with -42:
   //   ( 0, 1, 2, 0, 0, -42, ..., -42)
   call @transfer_read_1d(%A, %c0) : (memref<?xf32>, index) -> ()
+  // Read unmasked 4 @ 1, guaranteed to not overflow.
+  // Exercises proper alignment.
+  call @transfer_read_unmasked_4(%A, %c1) : (memref<?xf32>, index) -> ()
   return
 }
 
 // CHECK: ( 2, 3, 4, -42, -42, -42, -42, -42, -42, -42, -42, -42, -42 )
 // CHECK: ( 0, 1, 2, 0, 0, -42, -42, -42, -42, -42, -42, -42, -42 )
+// CHECK: ( 1, 2, 0, 0 )
diff --git a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-write.mlir b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-write.mlir
index 57163700fc998..c61a1629dcfb0 100644
--- a/mlir/integration_test/Dialect/Vector/CPU/test-transfer-write.mlir
+++ b/mlir/integration_test/Dialect/Vector/CPU/test-transfer-write.mlir
@@ -3,11 +3,11 @@
 // RUN:   -shared-libs=%mlir_integration_test_dir/libmlir_c_runner_utils%shlibext | \
 // RUN: FileCheck %s
 
-func @transfer_write16_1d(%A : memref<?xf32>, %base: index) {
+func @transfer_write16_unmasked_1d(%A : memref<?xf32>, %base: index) {
   %f = constant 16.0 : f32
   %v = splat %f : vector<16xf32>
   vector.transfer_write %v, %A[%base]
-    {permutation_map = affine_map<(d0) -> (d0)>}
+    {permutation_map = affine_map<(d0) -> (d0)>, masked = [false]}
     : vector<16xf32>, memref<?xf32>
   return
 }
@@ -53,14 +53,14 @@ func @entry() {
   %0 = call @transfer_read_1d(%A) : (memref<?xf32>) -> (vector<32xf32>)
   vector.print %0 : vector<32xf32>
 
-  // Overwrite with 16 values of 16 at base 4.
-  %c4 = constant 4: index
-  call @transfer_write16_1d(%A, %c4) : (memref<?xf32>, index) -> ()
+  // Overwrite with 16 values of 16 at base 3.
+  // Statically guaranteed to be unmasked. Exercises proper alignment.
+  %c3 = constant 3: index
+  call @transfer_write16_unmasked_1d(%A, %c3) : (memref<?xf32>, index) -> ()
   %1 = call @transfer_read_1d(%A) : (memref<?xf32>) -> (vector<32xf32>)
   vector.print %1 : vector<32xf32>
 
   // Overwrite with 13 values of 13 at base 3.
-  %c3 = constant 3: index
   call @transfer_write13_1d(%A, %c3) : (memref<?xf32>, index) -> ()
   %2 = call @transfer_read_1d(%A) : (memref<?xf32>) -> (vector<32xf32>)
   vector.print %2 : vector<32xf32>
@@ -93,8 +93,8 @@ func @entry() {
 }
 
 // CHECK: ( 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-// CHECK: ( 0, 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
-// CHECK: ( 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+// CHECK: ( 0, 0, 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
+// CHECK: ( 0, 0, 0, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 16, 16, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
 // CHECK: ( 0, 0, 0, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
 // CHECK: ( 0, 0, 0, 17, 17, 17, 17, 17, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 )
 // CHECK: ( 0, 0, 0, 17, 17, 17, 17, 17, 13, 13, 13, 13, 13, 13, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 17, 0 )
diff --git a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
index 2be2bd9bb7d02..a59f02681c54e 100644
--- a/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
+++ b/mlir/lib/Conversion/VectorToLLVM/ConvertVectorToLLVM.cpp
@@ -143,7 +143,10 @@ replaceTransferOpWithLoadOrStore(ConversionPatternRewriter &rewriter,
                                  LLVMTypeConverter &typeConverter, Location loc,
                                  TransferReadOp xferOp,
                                  ArrayRef<Value> operands, Value dataPtr) {
-  rewriter.replaceOpWithNewOp<LLVM::LoadOp>(xferOp, dataPtr);
+  unsigned align;
+  if (failed(getVectorTransferAlignment(typeConverter, xferOp, align)))
+    return failure();
+  rewriter.replaceOpWithNewOp<LLVM::LoadOp>(xferOp, dataPtr, align);
   return success();
 }
 
@@ -176,8 +179,12 @@ replaceTransferOpWithLoadOrStore(ConversionPatternRewriter &rewriter,
                                  LLVMTypeConverter &typeConverter, Location loc,
                                  TransferWriteOp xferOp,
                                  ArrayRef<Value> operands, Value dataPtr) {
+  unsigned align;
+  if (failed(getVectorTransferAlignment(typeConverter, xferOp, align)))
+    return failure();
   auto adaptor = TransferWriteOpAdaptor(operands);
-  rewriter.replaceOpWithNewOp<LLVM::StoreOp>(xferOp, adaptor.vector(), dataPtr);
+  rewriter.replaceOpWithNewOp<LLVM::StoreOp>(xferOp, adaptor.vector(), dataPtr,
+                                             align);
   return success();
 }
 
diff --git a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
index 829edf5f66f17..874cb5cca1410 100644
--- a/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
+++ b/mlir/test/Conversion/VectorToLLVM/vector-to-llvm.mlir
@@ -935,7 +935,7 @@ func @transfer_read_1d_not_masked(%A : memref<?xf32>, %base: index) -> vector<17
 //  CHECK-SAME: !llvm<"float*"> to !llvm<"<17 x float>*">
 //
 // 2. Rewrite as a load.
-//       CHECK: %[[loaded:.*]] = llvm.load %[[vecPtr]] : !llvm<"<17 x float>*">
+//       CHECK: %[[loaded:.*]] = llvm.load %[[vecPtr]] {alignment = 4 : i64} : !llvm<"<17 x float>*">
 
 func @genbool_1d() -> vector<8xi1> {
   %0 = vector.constant_mask [4] : vector<8xi1>

From 427bda4e9b370d8efa96a7dbf503cdcb45616802 Mon Sep 17 00:00:00 2001
From: Quentin Colombet <qcolombet@apple.com>
Date: Mon, 13 Jul 2020 14:37:21 -0700
Subject: [PATCH 157/771] [MC/AsmParser] layout-interdependency.s depends on
 having a proper triple

Fix the requirements for that test.

NFC
---
 llvm/test/MC/AsmParser/layout-interdependency.s | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/test/MC/AsmParser/layout-interdependency.s b/llvm/test/MC/AsmParser/layout-interdependency.s
index 6e275e00d9ec7..6310610a718c1 100644
--- a/llvm/test/MC/AsmParser/layout-interdependency.s
+++ b/llvm/test/MC/AsmParser/layout-interdependency.s
@@ -1,4 +1,5 @@
 # RUN: not llvm-mc --filetype=obj %s -o /dev/null 2>&1 | FileCheck %s
+# REQUIRES: defaut_triple
 
 fct_end:
 

From e51c7fb842ee61e8002634ad2e8548d748cb6172 Mon Sep 17 00:00:00 2001
From: Vedant Kumar <vsk@apple.com>
Date: Mon, 13 Jul 2020 14:38:31 -0700
Subject: [PATCH 158/771] [debugify] Add targeted test for 2fa656c, NFC

https://reviews.llvm.org/D78411 introduced test changes which relied on
the ability to strip debugify metadata even if module-level metadata is
missing. This introduces a more targeted test for that ability.
---
 .../CodeGen/Generic/MIRStripDebug/no-metadata-present.mir  | 7 +++++++
 1 file changed, 7 insertions(+)
 create mode 100644 llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir

diff --git a/llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir b/llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir
new file mode 100644
index 0000000000000..8952003384931
--- /dev/null
+++ b/llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir
@@ -0,0 +1,7 @@
+# RUN: llc -run-pass=mir-strip-debug -mir-strip-debugify-only=0 -o /dev/null %s
+
+---
+name:            test
+body:             |
+  bb.1:
+...

From 724afa5a331372ff1684f2bffa6976887490cbaf Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Mon, 13 Jul 2020 23:50:00 +0200
Subject: [PATCH 159/771] [analyzer] Inline StringSet that's defined in a
 header

That's just asking for ODR violations. Also drop a call to lower()
that's not needed.
---
 clang/lib/StaticAnalyzer/Checkers/SmartPtr.h           | 7 -------
 clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp | 3 ++-
 2 files changed, 2 insertions(+), 8 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h
index 89b8965e4c9ad..ec43a23e30a9e 100644
--- a/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h
+++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtr.h
@@ -20,13 +20,6 @@ namespace clang {
 namespace ento {
 namespace smartptr {
 
-/// Set of STL smart pointer class which we are trying to model.
-const llvm::StringSet<> StdSmartPtrs = {
-    "shared_ptr",
-    "unique_ptr",
-    "weak_ptr",
-};
-
 /// Returns true if the event call is on smart pointer.
 bool isStdSmartPtrCall(const CallEvent &Call);
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp
index 91f2890788141..bcc7d4103c1c6 100644
--- a/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/SmartPtrModeling.cpp
@@ -73,7 +73,8 @@ bool isStdSmartPtrCall(const CallEvent &Call) {
     return false;
 
   if (RecordDecl->getDeclName().isIdentifier()) {
-    return smartptr::StdSmartPtrs.count(RecordDecl->getName().lower());
+    StringRef Name = RecordDecl->getName();
+    return Name == "shared_ptr" || Name == "unique_ptr" || Name == "weak_ptr";
   }
   return false;
 }

From 8c4a65b9b2ca6961139beca92de37eea479f00fa Mon Sep 17 00:00:00 2001
From: Vedant Kumar <vsk@apple.com>
Date: Fri, 13 Dec 2019 12:59:40 -0800
Subject: [PATCH 160/771] [ubsan] Check implicit casts in ObjC for-in
 statements

Check that the implicit cast from `id` used to construct the element
variable in an ObjC for-in statement is valid.

This check is included as part of a new `objc-cast` sanitizer, outside
of the main 'undefined' group, as (IIUC) the behavior it's checking for
is not technically UB.

The check can be extended to cover other kinds of invalid casts in ObjC.

Partially addresses: rdar://12903059, rdar://9542496

Differential Revision: https://reviews.llvm.org/D71491
---
 clang/docs/UndefinedBehaviorSanitizer.rst     |  4 ++
 clang/include/clang/Basic/Sanitizers.def      |  2 +
 clang/lib/CodeGen/CGObjC.cpp                  | 34 +++++++++++++
 clang/lib/CodeGen/CodeGenFunction.h           |  1 +
 clang/lib/Driver/SanitizerArgs.cpp            | 10 ++--
 clang/lib/Driver/ToolChains/Darwin.cpp        |  1 +
 clang/test/CodeGenObjC/for-in.m               | 17 ++++++-
 compiler-rt/lib/ubsan/ubsan_checks.inc        |  1 +
 compiler-rt/lib/ubsan/ubsan_handlers.cpp      | 31 ++++++++++++
 compiler-rt/lib/ubsan/ubsan_handlers.h        |  8 ++++
 compiler-rt/lib/ubsan/ubsan_value.cpp         | 48 +++++++++++++++++++
 compiler-rt/lib/ubsan/ubsan_value.h           |  3 ++
 .../ubsan_minimal/ubsan_minimal_handlers.cpp  |  1 +
 .../test/ubsan/TestCases/Misc/objc-cast.m     | 27 +++++++++++
 14 files changed, 183 insertions(+), 5 deletions(-)
 create mode 100644 compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m

diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst
index 0a27810150db9..76676dfce95b4 100644
--- a/clang/docs/UndefinedBehaviorSanitizer.rst
+++ b/clang/docs/UndefinedBehaviorSanitizer.rst
@@ -127,6 +127,10 @@ Available checks are:
      is annotated with ``_Nonnull``.
   -  ``-fsanitize=nullability-return``: Returning null from a function with
      a return type annotated with ``_Nonnull``.
+  -  ``-fsanitize=objc-cast``: Invalid implicit cast of an ObjC object pointer
+     to an incompatible type. This is often unintentional, but is not undefined
+     behavior, therefore the check is not a part of the ``undefined`` group.
+     Currently only supported on Darwin.
   -  ``-fsanitize=object-size``: An attempt to potentially use bytes which
      the optimizer can determine are not part of the object being accessed.
      This will also detect some types of undefined behavior that may not
diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def
index 0037cc2146f26..2912bdd44b2db 100644
--- a/clang/include/clang/Basic/Sanitizers.def
+++ b/clang/include/clang/Basic/Sanitizers.def
@@ -156,6 +156,8 @@ SANITIZER_GROUP("implicit-integer-arithmetic-value-change",
                 ImplicitIntegerArithmeticValueChange,
                 ImplicitIntegerSignChange | ImplicitSignedIntegerTruncation)
 
+SANITIZER("objc-cast", ObjCCast)
+
 // FIXME:
 //SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion,
 //                ImplicitIntegerArithmeticValueChange |
diff --git a/clang/lib/CodeGen/CGObjC.cpp b/clang/lib/CodeGen/CGObjC.cpp
index 158a548e66c1e..cd2b84f5dd203 100644
--- a/clang/lib/CodeGen/CGObjC.cpp
+++ b/clang/lib/CodeGen/CGObjC.cpp
@@ -1836,6 +1836,40 @@ void CodeGenFunction::EmitObjCForCollectionStmt(const ObjCForCollectionStmt &S){
   llvm::Value *CurrentItem =
     Builder.CreateAlignedLoad(CurrentItemPtr, getPointerAlign());
 
+  if (SanOpts.has(SanitizerKind::ObjCCast)) {
+    // Before using an item from the collection, check that the implicit cast
+    // from id to the element type is valid. This is done with instrumentation
+    // roughly corresponding to:
+    //
+    //   if (![item isKindOfClass:expectedCls]) { /* emit diagnostic */ }
+    const ObjCObjectPointerType *ObjPtrTy =
+        elementType->getAsObjCInterfacePointerType();
+    const ObjCInterfaceType *InterfaceTy =
+        ObjPtrTy ? ObjPtrTy->getInterfaceType() : nullptr;
+    if (InterfaceTy) {
+      SanitizerScope SanScope(this);
+      auto &C = CGM.getContext();
+      assert(InterfaceTy->getDecl() && "No decl for ObjC interface type");
+      Selector IsKindOfClassSel = GetUnarySelector("isKindOfClass", C);
+      CallArgList IsKindOfClassArgs;
+      llvm::Value *Cls =
+          CGM.getObjCRuntime().GetClass(*this, InterfaceTy->getDecl());
+      IsKindOfClassArgs.add(RValue::get(Cls), C.getObjCClassType());
+      llvm::Value *IsClass =
+          CGM.getObjCRuntime()
+              .GenerateMessageSend(*this, ReturnValueSlot(), C.BoolTy,
+                                   IsKindOfClassSel, CurrentItem,
+                                   IsKindOfClassArgs)
+              .getScalarVal();
+      llvm::Constant *StaticData[] = {
+          EmitCheckSourceLocation(S.getBeginLoc()),
+          EmitCheckTypeDescriptor(QualType(InterfaceTy, 0))};
+      EmitCheck({{IsClass, SanitizerKind::ObjCCast}},
+                SanitizerHandler::InvalidObjCCast,
+                ArrayRef<llvm::Constant *>(StaticData), CurrentItem);
+    }
+  }
+
   // Cast that value to the right type.
   CurrentItem = Builder.CreateBitCast(CurrentItem, convertedElementType,
                                       "currentitem");
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index 1fc2ed76ca9e6..d794f4f0fa815 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -124,6 +124,7 @@ enum TypeEvaluationKind {
   SANITIZER_CHECK(FunctionTypeMismatch, function_type_mismatch, 1)             \
   SANITIZER_CHECK(ImplicitConversion, implicit_conversion, 0)                  \
   SANITIZER_CHECK(InvalidBuiltin, invalid_builtin, 0)                          \
+  SANITIZER_CHECK(InvalidObjCCast, invalid_objc_cast, 0)                       \
   SANITIZER_CHECK(LoadInvalidValue, load_invalid_value, 0)                     \
   SANITIZER_CHECK(MissingReturn, missing_return, 0)                            \
   SANITIZER_CHECK(MulOverflow, mul_overflow, 0)                                \
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 0b81152d57f6b..bcc9ffc7ff8f6 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -27,7 +27,8 @@ using namespace llvm::opt;
 static const SanitizerMask NeedsUbsanRt =
     SanitizerKind::Undefined | SanitizerKind::Integer |
     SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
-    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero;
+    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero |
+    SanitizerKind::ObjCCast;
 static const SanitizerMask NeedsUbsanCxxRt =
     SanitizerKind::Vptr | SanitizerKind::CFI;
 static const SanitizerMask NotAllowedWithTrap = SanitizerKind::Vptr;
@@ -48,11 +49,11 @@ static const SanitizerMask SupportsCoverage =
     SanitizerKind::DataFlow | SanitizerKind::Fuzzer |
     SanitizerKind::FuzzerNoLink | SanitizerKind::FloatDivideByZero |
     SanitizerKind::SafeStack | SanitizerKind::ShadowCallStack |
-    SanitizerKind::Thread;
+    SanitizerKind::Thread | SanitizerKind::ObjCCast;
 static const SanitizerMask RecoverableByDefault =
     SanitizerKind::Undefined | SanitizerKind::Integer |
     SanitizerKind::ImplicitConversion | SanitizerKind::Nullability |
-    SanitizerKind::FloatDivideByZero;
+    SanitizerKind::FloatDivideByZero | SanitizerKind::ObjCCast;
 static const SanitizerMask Unrecoverable =
     SanitizerKind::Unreachable | SanitizerKind::Return;
 static const SanitizerMask AlwaysRecoverable =
@@ -62,7 +63,8 @@ static const SanitizerMask TrappingSupported =
     (SanitizerKind::Undefined & ~SanitizerKind::Vptr) |
     SanitizerKind::UnsignedIntegerOverflow | SanitizerKind::ImplicitConversion |
     SanitizerKind::Nullability | SanitizerKind::LocalBounds |
-    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero;
+    SanitizerKind::CFI | SanitizerKind::FloatDivideByZero |
+    SanitizerKind::ObjCCast;
 static const SanitizerMask TrappingDefault = SanitizerKind::CFI;
 static const SanitizerMask CFIClasses =
     SanitizerKind::CFIVCall | SanitizerKind::CFINVCall |
diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 2e1190c34ea7a..7b879f8cb6521 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -2721,6 +2721,7 @@ SanitizerMask Darwin::getSupportedSanitizers() const {
   Res |= SanitizerKind::Fuzzer;
   Res |= SanitizerKind::FuzzerNoLink;
   Res |= SanitizerKind::Function;
+  Res |= SanitizerKind::ObjCCast;
 
   // Prior to 10.9, macOS shipped a version of the C++ standard library without
   // C++11 support. The same is true of iOS prior to version 5. These OS'es are
diff --git a/clang/test/CodeGenObjC/for-in.m b/clang/test/CodeGenObjC/for-in.m
index 26fe7922aee9f..20e89b33affaf 100644
--- a/clang/test/CodeGenObjC/for-in.m
+++ b/clang/test/CodeGenObjC/for-in.m
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -emit-llvm %s -o %t
+// RUN: %clang_cc1 %s -verify -o /dev/null
+// RUN: %clang_cc1 %s -triple x86_64-apple-darwin -emit-llvm -fsanitize=objc-cast -o - | FileCheck %s
 
 void p(const char*, ...);
 
@@ -18,12 +19,26 @@ -(const char*) cString;
 #define L5(n) L4(n+0),L4(n+16)
 #define L6(n) L5(n+0),L5(n+32)
 
+// CHECK-LABEL: define void @t0
 void t0() {
   NSArray *array = [NSArray arrayWithObjects: L1(0), (void*)0];
 
   p("array.length: %d\n", [array count]);
   unsigned index = 0;
   for (NSString *i in array) {	// expected-warning {{collection expression type 'NSArray *' may not respond}}
+
+    // CHECK:      [[expectedCls:%.*]] = load %struct._class_t*, {{.*}}, !nosanitize
+    // CHECK-NEXT: [[kindOfClassSel:%.*]] = load i8*, i8** @OBJC_SELECTOR_REFERENCES{{.*}}, !nosanitize
+    // CHECK-NEXT: [[expectedClsI8:%.*]] = bitcast %struct._class_t* [[expectedCls]] to i8*, !nosanitize
+    // CHECK-NEXT: [[isCls:%.*]] = call zeroext i1 bitcast {{.*}}@objc_msgSend to i1 (i8*, i8*, {{.*}})(i8* [[theItem:%.*]], i8* [[kindOfClassSel]], i8* [[expectedClsI8]]), !nosanitize
+    // CHECK: br i1 [[isCls]]
+
+    // CHECK: ptrtoint i8* [[theItem]] to i64, !nosanitize
+    // CHECK-NEXT: call void @__ubsan_handle_invalid_objc_cast
+    // CHECK-NEXT: unreachable, !nosanitize
+
+    // CHECK: bitcast i8* [[theItem]]
+
     p("element %d: %s\n", index++, [i cString]);
   }
 }
diff --git a/compiler-rt/lib/ubsan/ubsan_checks.inc b/compiler-rt/lib/ubsan/ubsan_checks.inc
index 2c1529a7d92c5..846cd89ee19f8 100644
--- a/compiler-rt/lib/ubsan/ubsan_checks.inc
+++ b/compiler-rt/lib/ubsan/ubsan_checks.inc
@@ -37,6 +37,7 @@ UBSAN_CHECK(IntegerDivideByZero, "integer-divide-by-zero",
             "integer-divide-by-zero")
 UBSAN_CHECK(FloatDivideByZero, "float-divide-by-zero", "float-divide-by-zero")
 UBSAN_CHECK(InvalidBuiltin, "invalid-builtin-use", "invalid-builtin-use")
+UBSAN_CHECK(InvalidObjCCast, "invalid-objc-cast", "invalid-objc-cast")
 UBSAN_CHECK(ImplicitUnsignedIntegerTruncation,
             "implicit-unsigned-integer-truncation",
             "implicit-unsigned-integer-truncation")
diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp
index 7f6a46fb6cf08..e201e6bba2207 100644
--- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp
@@ -16,6 +16,7 @@
 #include "ubsan_diag.h"
 #include "ubsan_flags.h"
 #include "ubsan_monitor.h"
+#include "ubsan_value.h"
 
 #include "sanitizer_common/sanitizer_common.h"
 
@@ -640,6 +641,36 @@ void __ubsan::__ubsan_handle_invalid_builtin_abort(InvalidBuiltinData *Data) {
   Die();
 }
 
+static void handleInvalidObjCCast(InvalidObjCCast *Data, ValueHandle Pointer,
+                                  ReportOptions Opts) {
+  SourceLocation Loc = Data->Loc.acquire();
+  ErrorType ET = ErrorType::InvalidObjCCast;
+
+  if (ignoreReport(Loc, Opts, ET))
+    return;
+
+  ScopedReport R(Opts, Loc, ET);
+
+  const char *GivenClass = getObjCClassName(Pointer);
+  const char *GivenClassStr = GivenClass ? GivenClass : "<unknown type>";
+
+  Diag(Loc, DL_Error, ET,
+       "invalid ObjC cast, object is a '%0', but expected a %1")
+      << GivenClassStr << Data->ExpectedType;
+}
+
+void __ubsan::__ubsan_handle_invalid_objc_cast(InvalidObjCCast *Data,
+                                               ValueHandle Pointer) {
+  GET_REPORT_OPTIONS(false);
+  handleInvalidObjCCast(Data, Pointer, Opts);
+}
+void __ubsan::__ubsan_handle_invalid_objc_cast_abort(InvalidObjCCast *Data,
+                                                     ValueHandle Pointer) {
+  GET_REPORT_OPTIONS(true);
+  handleInvalidObjCCast(Data, Pointer, Opts);
+  Die();
+}
+
 static void handleNonNullReturn(NonNullReturnData *Data, SourceLocation *LocPtr,
                                 ReportOptions Opts, bool IsAttr) {
   if (!LocPtr)
diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h
index 22ca96422381c..219fb15de55fe 100644
--- a/compiler-rt/lib/ubsan/ubsan_handlers.h
+++ b/compiler-rt/lib/ubsan/ubsan_handlers.h
@@ -168,6 +168,14 @@ struct InvalidBuiltinData {
 /// Handle a builtin called in an invalid way.
 RECOVERABLE(invalid_builtin, InvalidBuiltinData *Data)
 
+struct InvalidObjCCast {
+  SourceLocation Loc;
+  const TypeDescriptor &ExpectedType;
+};
+
+/// Handle an invalid ObjC cast.
+RECOVERABLE(invalid_objc_cast, InvalidObjCCast *Data, ValueHandle Pointer)
+
 struct NonNullReturnData {
   SourceLocation AttrLoc;
 };
diff --git a/compiler-rt/lib/ubsan/ubsan_value.cpp b/compiler-rt/lib/ubsan/ubsan_value.cpp
index 60f0b5c993482..79c3ba991d398 100644
--- a/compiler-rt/lib/ubsan/ubsan_value.cpp
+++ b/compiler-rt/lib/ubsan/ubsan_value.cpp
@@ -16,9 +16,57 @@
 #include "ubsan_value.h"
 #include "sanitizer_common/sanitizer_common.h"
 #include "sanitizer_common/sanitizer_libc.h"
+#include "sanitizer_common/sanitizer_mutex.h"
+
+// TODO(dliew): Prefer '__APPLE__' here over 'SANITIZER_MAC', as the latter is
+// unclear. rdar://58124919 tracks using a more obviously portable guard.
+#if defined(__APPLE__)
+#include <dlfcn.h>
+#endif
 
 using namespace __ubsan;
 
+typedef const char *(*ObjCGetClassNameTy)(void *);
+
+const char *__ubsan::getObjCClassName(ValueHandle Pointer) {
+#if defined(__APPLE__)
+  // We need to query the ObjC runtime for some information, but do not want
+  // to introduce a static dependency from the ubsan runtime onto ObjC. Try to
+  // grab a handle to the ObjC runtime used by the process.
+  static bool AttemptedDlopen = false;
+  static void *ObjCHandle = nullptr;
+  static void *ObjCObjectGetClassName = nullptr;
+
+  // Prevent threads from racing to dlopen().
+  static __sanitizer::StaticSpinMutex Lock;
+  {
+    __sanitizer::SpinMutexLock Guard(&Lock);
+
+    if (!AttemptedDlopen) {
+      ObjCHandle = dlopen(
+          "/usr/lib/libobjc.A.dylib",
+          RTLD_LAZY         // Only bind symbols when used.
+              | RTLD_LOCAL  // Only make symbols available via the handle.
+              | RTLD_NOLOAD // Do not load the dylib, just grab a handle if the
+                            // image is already loaded.
+              | RTLD_FIRST  // Only search the image pointed-to by the handle.
+      );
+      AttemptedDlopen = true;
+      if (!ObjCHandle)
+        return nullptr;
+      ObjCObjectGetClassName = dlsym(ObjCHandle, "object_getClassName");
+    }
+  }
+
+  if (!ObjCObjectGetClassName)
+    return nullptr;
+
+  return ObjCGetClassNameTy(ObjCObjectGetClassName)((void *)Pointer);
+#else
+  return nullptr;
+#endif
+}
+
 SIntMax Value::getSIntValue() const {
   CHECK(getType().isSignedIntegerTy());
   if (isInlineInt()) {
diff --git a/compiler-rt/lib/ubsan/ubsan_value.h b/compiler-rt/lib/ubsan/ubsan_value.h
index a216e3a147e91..e0957276dd241 100644
--- a/compiler-rt/lib/ubsan/ubsan_value.h
+++ b/compiler-rt/lib/ubsan/ubsan_value.h
@@ -135,6 +135,9 @@ class TypeDescriptor {
 /// \brief An opaque handle to a value.
 typedef uptr ValueHandle;
 
+/// Returns the class name of the given ObjC object, or null if the name
+/// cannot be found.
+const char *getObjCClassName(ValueHandle Pointer);
 
 /// \brief Representation of an operand value provided by the instrumented code.
 ///
diff --git a/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp b/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
index ed62ddd0fa348..8654c705cfbb0 100644
--- a/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
+++ b/compiler-rt/lib/ubsan_minimal/ubsan_minimal_handlers.cpp
@@ -109,6 +109,7 @@ HANDLER(vla_bound_not_positive, "vla-bound-not-positive")
 HANDLER(float_cast_overflow, "float-cast-overflow")
 HANDLER(load_invalid_value, "load-invalid-value")
 HANDLER(invalid_builtin, "invalid-builtin")
+HANDLER(invalid_objc_cast, "invalid-objc-cast")
 HANDLER(function_type_mismatch, "function-type-mismatch")
 HANDLER(implicit_conversion, "implicit-conversion")
 HANDLER(nonnull_arg, "nonnull-arg")
diff --git a/compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m b/compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m
new file mode 100644
index 0000000000000..f502e5f535372
--- /dev/null
+++ b/compiler-rt/test/ubsan/TestCases/Misc/objc-cast.m
@@ -0,0 +1,27 @@
+// REQUIRES: darwin
+//
+// RUN: %clang -framework Foundation -fsanitize=objc-cast %s -O1 -o %t
+// RUN: %run %t 2>&1 | FileCheck %s
+//
+// RUN: %clang -framework Foundation -fsanitize=objc-cast -fno-sanitize-recover=objc-cast %s -O1 -o %t.trap
+// RUN: not %run %t.trap 2>&1 | FileCheck %s
+
+#include <Foundation/Foundation.h>
+
+int main() {
+  NSArray *arrayOfInt = [NSArray arrayWithObjects:@1, @2, @3, (void *)0];
+  // CHECK: objc-cast.m:[[@LINE+1]]:{{.*}}: runtime error: invalid ObjC cast, object is a '__NSCFNumber', but expected a 'NSString'
+  for (NSString *str in arrayOfInt) {
+    NSLog(@"%@", str);
+  }
+
+  NSArray *arrayOfStr = [NSArray arrayWithObjects:@"a", @"b", @"c", (void *)0];
+  for (NSString *str in arrayOfStr) {
+    NSLog(@"%@", str);
+  }
+
+  // The diagnostic should only be printed once.
+  // CHECK-NOT: runtime error
+
+  return 0;
+}

From 42170b3b4e1f7d30b377a3da07c354feae9b852e Mon Sep 17 00:00:00 2001
From: Gui Andrade <guiand@google.com>
Date: Mon, 29 Jun 2020 23:56:01 +0000
Subject: [PATCH 161/771] [Sanitizers] Implement getcpuclockid interceptor

Differential Revision: https://reviews.llvm.org/D83000
---
 .../sanitizer_common_interceptors.inc         | 18 +++++++++++++++++
 .../sanitizer_platform_interceptors.h         |  1 +
 .../TestCases/Linux/getcpuclockid.c           | 20 +++++++++++++++++++
 3 files changed, 39 insertions(+)
 create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Linux/getcpuclockid.c

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index ea9c71ba88032..4b02ad2670fef 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -2199,6 +2199,23 @@ INTERCEPTOR(int, clock_settime, u32 clk_id, const void *tp) {
 #define INIT_CLOCK_GETTIME
 #endif
 
+#if SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID
+INTERCEPTOR(int, clock_getcpuclockid, pid_t pid, __sanitizer_clockid_t *clockid) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, clock_getcpuclockid, pid, clockid);
+  int res = REAL(clock_getcpuclockid)(pid, clockid);
+  if (!res && clockid) {
+    COMMON_INTERCEPTOR_WRITE_RANGE(ctx, clockid, sizeof *clockid);
+  }
+  return res;
+}
+
+#define INIT_CLOCK_GETCPUCLOCKID                  \
+  COMMON_INTERCEPT_FUNCTION(clock_getcpuclockid);
+#else
+#define INIT_CLOCK_GETCPUCLOCKID
+#endif
+
 #if SANITIZER_INTERCEPT_GETITIMER
 INTERCEPTOR(int, getitimer, int which, void *curr_value) {
   void *ctx;
@@ -9914,6 +9931,7 @@ static void InitializeCommonInterceptors() {
   INIT_FGETGRENT_R;
   INIT_SETPWENT;
   INIT_CLOCK_GETTIME;
+  INIT_CLOCK_GETCPUCLOCKID;
   INIT_GETITIMER;
   INIT_TIME;
   INIT_GLOB;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index 2d48e9d0ae1ad..e28bb937ae83d 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -240,6 +240,7 @@
   (SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
 #define SANITIZER_INTERCEPT_CLOCK_GETTIME \
   (SI_FREEBSD || SI_NETBSD || SI_OPENBSD || SI_LINUX || SI_SOLARIS)
+#define SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID SI_LINUX
 #define SANITIZER_INTERCEPT_GETITIMER SI_POSIX
 #define SANITIZER_INTERCEPT_TIME SI_POSIX
 #define SANITIZER_INTERCEPT_GLOB SI_LINUX_NOT_ANDROID || SI_SOLARIS
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/getcpuclockid.c b/compiler-rt/test/sanitizer_common/TestCases/Linux/getcpuclockid.c
new file mode 100644
index 0000000000000..6999a80b638e5
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/getcpuclockid.c
@@ -0,0 +1,20 @@
+// RUN: %clang %s -Wl,-as-needed -o %t && %run %t
+#include <time.h>
+#include <unistd.h>
+#include <assert.h>
+
+long cpu_ns() {
+  clockid_t clk;
+  struct timespec ts;
+  int res = clock_getcpuclockid(getpid(), &clk);
+  assert(!res);
+  res = clock_gettime(clk, &ts);
+  assert(!res);
+  return ts.tv_nsec;
+}
+
+int main() {
+  long cpuns = cpu_ns();
+  asm volatile ("" :: "r"(cpuns));
+  return 0;
+}

From bf0d060fd4ea1221fdac154e3404e448709e31aa Mon Sep 17 00:00:00 2001
From: Jinsong Ji <jji@us.ibm.com>
Date: Mon, 13 Jul 2020 21:19:18 +0000
Subject: [PATCH 162/771] [compiler-rt][MSAN][test] Update buffersize for UTF-8
 to C convert

This is exposed by https://reviews.llvm.org/D83486.
When the host is UTF8, we may get n >10, causing assert failure.
Increase the buffersize to support UTF-8 to C conversion.

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D83719
---
 compiler-rt/test/msan/strxfrm.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/msan/strxfrm.cpp b/compiler-rt/test/msan/strxfrm.cpp
index 94b8c70240907..d40b56b234cd3 100644
--- a/compiler-rt/test/msan/strxfrm.cpp
+++ b/compiler-rt/test/msan/strxfrm.cpp
@@ -7,7 +7,7 @@
 #include <string.h>
 
 int main(void) {
-  char q[10];
+  char q[30];
   size_t n = strxfrm(q, "abcdef", sizeof(q));
   assert(n < sizeof(q));
   __msan_check_mem_is_initialized(q, n + 1);

From 528a1c56d976be83388782357b3b98711f7dafe0 Mon Sep 17 00:00:00 2001
From: Vedant Kumar <vsk@apple.com>
Date: Mon, 13 Jul 2020 15:15:34 -0700
Subject: [PATCH 163/771] Check output in
 test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir, NFC

---
 .../Generic/MIRStripDebug/no-metadata-present.mir      | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir b/llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir
index 8952003384931..3df834845b404 100644
--- a/llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir
+++ b/llvm/test/CodeGen/Generic/MIRStripDebug/no-metadata-present.mir
@@ -1,7 +1,13 @@
-# RUN: llc -run-pass=mir-strip-debug -mir-strip-debugify-only=0 -o /dev/null %s
+# RUN: llc -run-pass=mir-strip-debug -mir-strip-debugify-only=0 -o - %s | FileCheck %s
+
+# CHECK: name: test
+# CHECK: body: |
+# CHECK-NEXT: bb.0:
+# CHECK-EMPTY:
+# CHECK-NEXT: ...
 
 ---
 name:            test
 body:             |
-  bb.1:
+  bb.0:
 ...

From 3d52b1e81b7b3891d9132d826d4889119fad7d00 Mon Sep 17 00:00:00 2001
From: Vedant Kumar <vsk@apple.com>
Date: Mon, 13 Jul 2020 15:17:03 -0700
Subject: [PATCH 164/771] Revert "[InstCombine] Drop debug loc in
 TryToSinkInstruction (reland)"

This reverts commit 9649c2095f07a392bc2b2a93b5bd6c4c9bf5ba34. See
discussion on the llvm-commits thread: if it's OK to preserve the
location when sinking a call, it's probably OK to always preserve the
location.
---
 .../InstCombine/InstructionCombining.cpp      |  6 ---
 .../InstCombine/sink_to_unreachable_dbg.ll    | 46 -------------------
 2 files changed, 52 deletions(-)
 delete mode 100644 llvm/test/Transforms/InstCombine/sink_to_unreachable_dbg.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index d1c1e54188251..ec934906355d6 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -3356,12 +3356,6 @@ static bool TryToSinkInstruction(Instruction *I, BasicBlock *DestBlock) {
   I->moveBefore(&*InsertPos);
   ++NumSunkInst;
 
-  // Drop the debug loc of non-inlinable instructions. This prevents
-  // single-stepping from going backwards. See HowToUpdateDebugInfo.rst for
-  // the full rationale.
-  if (!isa<CallBase>(I))
-    I->setDebugLoc(DebugLoc());
-
   // Also sink all related debug uses from the source basic block. Otherwise we
   // get debug use before the def. Attempt to salvage debug uses first, to
   // maximise the range variables have location for. If we cannot salvage, then
diff --git a/llvm/test/Transforms/InstCombine/sink_to_unreachable_dbg.ll b/llvm/test/Transforms/InstCombine/sink_to_unreachable_dbg.ll
deleted file mode 100644
index e642276224b84..0000000000000
--- a/llvm/test/Transforms/InstCombine/sink_to_unreachable_dbg.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; RUN: opt -debugify -debugify-level=locations -instcombine -S < %s | FileCheck %s
-
-; CHECK-LABEL: @test1(
-; CHECK: [[phi:%.*]] = phi i32
-; CHECK-NEXT: [[add:%.*]] = add i32 {{.*}}, 1{{$}}
-; CHECK-NEXT: add i32 [[phi]], [[add]], !dbg
-define i32 @test1(i32 %0, i1 %1) {
-  %3 = add i32 %0, 1
-  br i1 %1, label %4, label %5
-
-4:                                                ; preds = %2
-  br label %6
-
-5:                                                ; preds = %2
-  br label %6
-
-6:                                                ; preds = %5, %4
-  %7 = phi i32 [ 0, %4 ], [ 1, %5 ]
-  %8 = add i32 %7, %3
-  ret i32 %8
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @external(i32) #0
-
-; CHECK-LABEL: @test2(
-; CHECK: [[phi:%.*]] = phi i32
-; CHECK-NEXT: [[add:%.*]] = call i32 @external(i32 {{.*}}), !dbg
-; CHECK-NEXT: add i32 [[phi]], [[add]], !dbg
-define i32 @test2(i32 %0, i1 %1) {
-  %3 = call i32 @external(i32 %0)
-  br i1 %1, label %4, label %5
-
-4:                                                ; preds = %2
-  br label %6
-
-5:                                                ; preds = %2
-  br label %6
-
-6:                                                ; preds = %5, %4
-  %7 = phi i32 [ 0, %4 ], [ 1, %5 ]
-  %8 = add i32 %7, %3
-  ret i32 %8
-}
-
-attributes #0 = { nounwind readnone }

From bfa3b627c6832552a7808a9f0f7f9cab61c7ea1a Mon Sep 17 00:00:00 2001
From: Gui Andrade <guiand@google.com>
Date: Tue, 7 Jul 2020 21:21:13 +0000
Subject: [PATCH 165/771] [InstCombine] Erase attribute lists for simplified
 libcalls

Currently, a transformation like pow(2.0, x) -> exp2(x) copies the pow
attribute list verbatim and applies it to exp2. This works out fine
when the attribute list is empty, but when it isn't clang may error due
due to the mismatch.

The source function and destination don't necessarily have anything
to do with one another, attribute-wise. So it makes sense to remove
the attribute lists (this is similar to what IPO does in this
situation).

This was discovered after implementing the `noundef` param attribute.

Differential Revision: https://reviews.llvm.org/D82820
---
 .../lib/Transforms/Utils/SimplifyLibCalls.cpp |  9 ++++----
 .../test/Transforms/InstCombine/pow_fp_int.ll |  6 ++---
 .../InstCombine/simplify-libcalls.ll          | 22 +++++++++++++++++++
 3 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
index 6ad8bc6e09426..cfcc3454a2102 100644
--- a/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp
@@ -1448,7 +1448,7 @@ static Value *getIntToFPVal(Value *I2F, IRBuilderBase &B) {
 /// exp10(x) for pow(10.0, x); exp2(log2(n) * x) for pow(n, x).
 Value *LibCallSimplifier::replacePowWithExp(CallInst *Pow, IRBuilderBase &B) {
   Value *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
-  AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
+  AttributeList Attrs; // Attributes are only meaningful on the original call
   Module *Mod = Pow->getModule();
   Type *Ty = Pow->getType();
   bool Ignored;
@@ -1615,7 +1615,7 @@ static Value *getSqrtCall(Value *V, AttributeList Attrs, bool NoErrno,
 /// Use square root in place of pow(x, +/-0.5).
 Value *LibCallSimplifier::replacePowWithSqrt(CallInst *Pow, IRBuilderBase &B) {
   Value *Sqrt, *Base = Pow->getArgOperand(0), *Expo = Pow->getArgOperand(1);
-  AttributeList Attrs = Pow->getCalledFunction()->getAttributes();
+  AttributeList Attrs; // Attributes are only meaningful on the original call
   Module *Mod = Pow->getModule();
   Type *Ty = Pow->getType();
 
@@ -1785,6 +1785,7 @@ Value *LibCallSimplifier::optimizePow(CallInst *Pow, IRBuilderBase &B) {
 
 Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
   Function *Callee = CI->getCalledFunction();
+  AttributeList Attrs; // Attributes are only meaningful on the original call
   StringRef Name = Callee->getName();
   Value *Ret = nullptr;
   if (UnsafeFPShrink && Name == TLI->getName(LibFunc_exp2) &&
@@ -1801,7 +1802,7 @@ Value *LibCallSimplifier::optimizeExp2(CallInst *CI, IRBuilderBase &B) {
     if (Value *Exp = getIntToFPVal(Op, B))
       return emitBinaryFloatFnCall(ConstantFP::get(Ty, 1.0), Exp, TLI,
                                    LibFunc_ldexp, LibFunc_ldexpf, LibFunc_ldexpl,
-                                   B, CI->getCalledFunction()->getAttributes());
+                                   B, Attrs);
   }
 
   return Ret;
@@ -1836,7 +1837,7 @@ Value *LibCallSimplifier::optimizeFMinFMax(CallInst *CI, IRBuilderBase &B) {
 
 Value *LibCallSimplifier::optimizeLog(CallInst *Log, IRBuilderBase &B) {
   Function *LogFn = Log->getCalledFunction();
-  AttributeList Attrs = LogFn->getAttributes();
+  AttributeList Attrs; // Attributes are only meaningful on the original call
   StringRef LogNm = LogFn->getName();
   Intrinsic::ID LogID = LogFn->getIntrinsicID();
   Module *Mod = Log->getModule();
diff --git a/llvm/test/Transforms/InstCombine/pow_fp_int.ll b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
index b4117d4ca0e0e..48297589ad91e 100644
--- a/llvm/test/Transforms/InstCombine/pow_fp_int.ll
+++ b/llvm/test/Transforms/InstCombine/pow_fp_int.ll
@@ -51,7 +51,7 @@ define double @pow_uitofp_double_const_base_fast(i31 %x) {
 
 define double @pow_sitofp_double_const_base_2_fast(i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_double_const_base_2_fast(
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i32 [[X:%.*]]) #1
+; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i32 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -78,7 +78,7 @@ define double @pow_sitofp_double_const_base_power_of_2_fast(i32 %x) {
 define double @pow_uitofp_const_base_2_fast(i31 %x) {
 ; CHECK-LABEL: @pow_uitofp_const_base_2_fast(
 ; CHECK-NEXT:    [[TMP1:%.*]] = zext i31 [[X:%.*]] to i32
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i32 [[TMP1]]) #1
+; CHECK-NEXT:    [[LDEXPF:%.*]] = call afn float @ldexpf(float 1.000000e+00, i32 [[TMP1]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
@@ -343,7 +343,7 @@ define double @pow_uitofp_const_base_no_fast(i32 %x) {
 
 define double @pow_sitofp_const_base_2_no_fast(i32 %x) {
 ; CHECK-LABEL: @pow_sitofp_const_base_2_no_fast(
-; CHECK-NEXT:    [[LDEXPF:%.*]] = call float @ldexpf(float 1.000000e+00, i32 [[X:%.*]]) #1
+; CHECK-NEXT:    [[LDEXPF:%.*]] = call float @ldexpf(float 1.000000e+00, i32 [[X:%.*]])
 ; CHECK-NEXT:    [[RES:%.*]] = fpext float [[LDEXPF]] to double
 ; CHECK-NEXT:    ret double [[RES]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
index d31c3b17ac62c..d212bcd8ff767 100644
--- a/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
+++ b/llvm/test/Transforms/InstCombine/simplify-libcalls.ll
@@ -175,6 +175,28 @@ define i32 @fake_toascii(i8 %x) {
   ret i32 %y
 }
 
+declare double @pow(double, double)
+declare double @exp2(double)
+
+; check to make sure only the correct libcall attributes are used
+define double @fake_exp2(double %x) {
+; CHECK-LABEL: @fake_exp2(
+; CHECK-NEXT:    [[Y:%.*]] = call double @exp2(double %x)
+; CHECK-NEXT:    ret double [[Y]]
+
+  %y = call inreg double @pow(double inreg 2.0, double inreg %x)
+  ret double %y
+}
+define double @fake_ldexp(i32 %x) {
+; CHECK-LABEL: @fake_ldexp(
+; CHECK-NEXT:    [[Z:%.*]] = call double @ldexp(double 1.0{{.*}}, i32 %x)
+; CHECK-NEXT:    ret double [[Z]]
+
+  %y = sitofp i32 %x to double
+  %z = call inreg double @exp2(double %y)
+  ret double %z
+}
+
 
 attributes #0 = { nobuiltin }
 attributes #1 = { builtin }

From 10aa0d7bbc12bf86958bc40943e37b46c6eed04a Mon Sep 17 00:00:00 2001
From: Dokyung Song <dokyungs@google.com>
Date: Mon, 13 Jul 2020 21:56:02 +0000
Subject: [PATCH 166/771] [compiler-rt] Fix compiler warnings and runtime
 errors in sanitizer RT strxfrm(_l) test cases.

Summary: Fixed an implicit definition warning by including <string.h>. Also fixed run-time assertions that the return value of strxfrm_l calls is less than the buffer size by increasing the size of the referenced buffer.

Reviewers: morehouse

Reviewed By: morehouse

Subscribers: dberris, #sanitizers

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D83593
---
 compiler-rt/test/msan/__strxfrm_l.cpp                       | 2 +-
 compiler-rt/test/sanitizer_common/TestCases/Posix/strxfrm.c | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/test/msan/__strxfrm_l.cpp b/compiler-rt/test/msan/__strxfrm_l.cpp
index c4eb10efb3e0b..9766d33056857 100644
--- a/compiler-rt/test/msan/__strxfrm_l.cpp
+++ b/compiler-rt/test/msan/__strxfrm_l.cpp
@@ -10,7 +10,7 @@
 extern "C" decltype(strxfrm_l) __strxfrm_l;
 
 int main(void) {
-  char q[10];
+  char q[100];
   locale_t loc = newlocale(LC_ALL_MASK, "", (locale_t)0);
   size_t n = __strxfrm_l(q, "qwerty", sizeof(q), loc);
   assert(n < sizeof(q));
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Posix/strxfrm.c b/compiler-rt/test/sanitizer_common/TestCases/Posix/strxfrm.c
index c28eb65b7d4f0..d08af1b3565fd 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Posix/strxfrm.c
+++ b/compiler-rt/test/sanitizer_common/TestCases/Posix/strxfrm.c
@@ -3,16 +3,16 @@
 
 #include <assert.h>
 #include <locale.h>
-#include <wchar.h>
+#include <string.h>
 
 int main(int argc, char **argv) {
   char q[10];
   size_t n = strxfrm(q, "abcdef", sizeof(q));
   assert(n < sizeof(q));
 
-  char q2[10];
+  char q2[100];
   locale_t loc = newlocale(LC_ALL_MASK, "", (locale_t)0);
-  n = strxfrm_l(q2, L"qwerty", sizeof(q), loc);
+  n = strxfrm_l(q2, "qwerty", sizeof(q2), loc);
   assert(n < sizeof(q2));
 
   freelocale(loc);

From 004bf35ba04873dc9ab1408bce0d6f2fed03b0ea Mon Sep 17 00:00:00 2001
From: Vedant Kumar <vsk@apple.com>
Date: Mon, 13 Jul 2020 15:40:05 -0700
Subject: [PATCH 167/771] Update ubsan_interface.inc for D71491

This should address the bot failure here:

http://lab.llvm.org:8011/builders/sanitizer-windows/builds/66309/
---
 compiler-rt/lib/ubsan/ubsan_interface.inc | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler-rt/lib/ubsan/ubsan_interface.inc b/compiler-rt/lib/ubsan/ubsan_interface.inc
index 1e44bc2171ded..503cc613b4544 100644
--- a/compiler-rt/lib/ubsan/ubsan_interface.inc
+++ b/compiler-rt/lib/ubsan/ubsan_interface.inc
@@ -27,6 +27,8 @@ INTERFACE_FUNCTION(__ubsan_handle_implicit_conversion)
 INTERFACE_FUNCTION(__ubsan_handle_implicit_conversion_abort)
 INTERFACE_FUNCTION(__ubsan_handle_invalid_builtin)
 INTERFACE_FUNCTION(__ubsan_handle_invalid_builtin_abort)
+INTERFACE_FUNCTION(__ubsan_handle_objc_cast_check)
+INTERFACE_FUNCTION(__ubsan_handle_objc_cast_check_abort)
 INTERFACE_FUNCTION(__ubsan_handle_load_invalid_value)
 INTERFACE_FUNCTION(__ubsan_handle_load_invalid_value_abort)
 INTERFACE_FUNCTION(__ubsan_handle_missing_return)

From d9067dca7ba7cda97a86ec22106e06ffc700ecbf Mon Sep 17 00:00:00 2001
From: Kiran Chandramohan <kiran.chandramohan@arm.com>
Date: Mon, 13 Jul 2020 23:13:04 +0100
Subject: [PATCH 168/771] Lowering of OpenMP Parallel operation to LLVM IR 1/n

This patch introduces lowering of the OpenMP parallel operation to LLVM
IR using the OpenMPIRBuilder.

Functions topologicalSort and connectPhiNodes are generalised so that
they work with operations also. connectPhiNodes is also made static.

Lowering works for a parallel region with multiple blocks. Clauses and
arguments of the OpenMP operation are not handled.

Reviewed By: rriddle, anchu-rajendran

Differential Revision: https://reviews.llvm.org/D81660
---
 mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td |   5 +-
 .../mlir/Target/LLVMIR/ModuleTranslation.h    |   3 +-
 mlir/lib/Target/LLVMIR/ModuleTranslation.cpp  | 231 ++++++++++++------
 mlir/test/Target/openmp-llvm.mlir             |  46 ++++
 4 files changed, 210 insertions(+), 75 deletions(-)

diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
index 3be6c97322b58..642282f8af181 100644
--- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
+++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td
@@ -24,7 +24,6 @@ def OpenMP_Dialect : Dialect {
 class OpenMP_Op<string mnemonic, list<OpTrait> traits = []> :
       Op<OpenMP_Dialect, mnemonic, traits>;
 
-
 //===----------------------------------------------------------------------===//
 // 2.6 parallel Construct
 //===----------------------------------------------------------------------===//
@@ -81,8 +80,8 @@ def ParallelOp : OpenMP_Op<"parallel", [AttrSizedOperandSegments]> {
     of the parallel region.
   }];
 
-  let arguments = (ins Optional<I1>:$if_expr_var,
-             Optional<AnyInteger>:$num_threads_var,
+  let arguments = (ins Optional<AnyType>:$if_expr_var,
+             Optional<AnyType>:$num_threads_var,
              OptionalAttr<ClauseDefault>:$default_val,
              Variadic<AnyType>:$private_vars,
              Variadic<AnyType>:$firstprivate_vars,
diff --git a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
index 3a701018beb54..e44ae976e0dd0 100644
--- a/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
+++ b/mlir/include/mlir/Target/LLVMIR/ModuleTranslation.h
@@ -87,6 +87,8 @@ class ModuleTranslation {
                                          llvm::IRBuilder<> &builder);
   virtual LogicalResult convertOmpOperation(Operation &op,
                                             llvm::IRBuilder<> &builder);
+  virtual LogicalResult convertOmpParallel(Operation &op,
+                                           llvm::IRBuilder<> &builder);
   static std::unique_ptr<llvm::Module> prepareLLVMModule(Operation *m);
 
   /// A helper to look up remapped operands in the value remapping table.
@@ -100,7 +102,6 @@ class ModuleTranslation {
   LogicalResult convertFunctions();
   LogicalResult convertGlobals();
   LogicalResult convertOneFunction(LLVMFuncOp func);
-  void connectPHINodes(LLVMFuncOp func);
   LogicalResult convertBlock(Block &bb, bool ignoreArguments);
 
   llvm::Constant *getLLVMConstant(llvm::Type *llvmType, Attribute attr,
diff --git a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
index 657aa84afe1c2..0defea6bbbb95 100644
--- a/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
+++ b/mlir/lib/Target/LLVMIR/ModuleTranslation.cpp
@@ -25,11 +25,13 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
 #include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/IR/Module.h"
+#include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Cloning.h"
 
 using namespace mlir;
@@ -304,7 +306,160 @@ ModuleTranslation::ModuleTranslation(Operation *module,
   assert(satisfiesLLVMModule(mlirModule) &&
          "mlirModule should honor LLVM's module semantics.");
 }
-ModuleTranslation::~ModuleTranslation() {}
+ModuleTranslation::~ModuleTranslation() {
+  if (ompBuilder)
+    ompBuilder->finalize();
+}
+
+/// Get the SSA value passed to the current block from the terminator operation
+/// of its predecessor.
+static Value getPHISourceValue(Block *current, Block *pred,
+                               unsigned numArguments, unsigned index) {
+  Operation &terminator = *pred->getTerminator();
+  if (isa<LLVM::BrOp>(terminator))
+    return terminator.getOperand(index);
+
+  // For conditional branches, we need to check if the current block is reached
+  // through the "true" or the "false" branch and take the relevant operands.
+  auto condBranchOp = dyn_cast<LLVM::CondBrOp>(terminator);
+  assert(condBranchOp &&
+         "only branch operations can be terminators of a block that "
+         "has successors");
+  assert((condBranchOp.getSuccessor(0) != condBranchOp.getSuccessor(1)) &&
+         "successors with arguments in LLVM conditional branches must be "
+         "different blocks");
+
+  return condBranchOp.getSuccessor(0) == current
+             ? condBranchOp.trueDestOperands()[index]
+             : condBranchOp.falseDestOperands()[index];
+}
+
+/// Connect the PHI nodes to the results of preceding blocks.
+template <typename T>
+static void
+connectPHINodes(T &func, const DenseMap<Value, llvm::Value *> &valueMapping,
+                const DenseMap<Block *, llvm::BasicBlock *> &blockMapping) {
+  // Skip the first block, it cannot be branched to and its arguments correspond
+  // to the arguments of the LLVM function.
+  for (auto it = std::next(func.begin()), eit = func.end(); it != eit; ++it) {
+    Block *bb = &*it;
+    llvm::BasicBlock *llvmBB = blockMapping.lookup(bb);
+    auto phis = llvmBB->phis();
+    auto numArguments = bb->getNumArguments();
+    assert(numArguments == std::distance(phis.begin(), phis.end()));
+    for (auto &numberedPhiNode : llvm::enumerate(phis)) {
+      auto &phiNode = numberedPhiNode.value();
+      unsigned index = numberedPhiNode.index();
+      for (auto *pred : bb->getPredecessors()) {
+        phiNode.addIncoming(valueMapping.lookup(getPHISourceValue(
+                                bb, pred, numArguments, index)),
+                            blockMapping.lookup(pred));
+      }
+    }
+  }
+}
+
+// TODO: implement an iterative version
+static void topologicalSortImpl(llvm::SetVector<Block *> &blocks, Block *b) {
+  blocks.insert(b);
+  for (Block *bb : b->getSuccessors()) {
+    if (blocks.count(bb) == 0)
+      topologicalSortImpl(blocks, bb);
+  }
+}
+
+/// Sort function blocks topologically.
+template <typename T>
+static llvm::SetVector<Block *> topologicalSort(T &f) {
+  // For each blocks that has not been visited yet (i.e. that has no
+  // predecessors), add it to the list and traverse its successors in DFS
+  // preorder.
+  llvm::SetVector<Block *> blocks;
+  for (Block &b : f) {
+    if (blocks.count(&b) == 0)
+      topologicalSortImpl(blocks, &b);
+  }
+  assert(blocks.size() == f.getBlocks().size() && "some blocks are not sorted");
+
+  return blocks;
+}
+
+/// Convert the OpenMP parallel Operation to LLVM IR.
+LogicalResult
+ModuleTranslation::convertOmpParallel(Operation &opInst,
+                                      llvm::IRBuilder<> &builder) {
+  using InsertPointTy = llvm::OpenMPIRBuilder::InsertPointTy;
+
+  auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
+                       llvm::BasicBlock &continuationIP) {
+    llvm::LLVMContext &llvmContext = llvmModule->getContext();
+
+    llvm::BasicBlock *codeGenIPBB = codeGenIP.getBlock();
+    llvm::Instruction *codeGenIPBBTI = codeGenIPBB->getTerminator();
+
+    builder.SetInsertPoint(codeGenIPBB);
+
+    for (auto &region : opInst.getRegions()) {
+      for (auto &bb : region) {
+        auto *llvmBB = llvm::BasicBlock::Create(
+            llvmContext, "omp.par.region", codeGenIP.getBlock()->getParent());
+        blockMapping[&bb] = llvmBB;
+      }
+
+      // Then, convert blocks one by one in topological order to ensure
+      // defs are converted before uses.
+      llvm::SetVector<Block *> blocks = topologicalSort(region);
+      for (auto indexedBB : llvm::enumerate(blocks)) {
+        Block *bb = indexedBB.value();
+        llvm::BasicBlock *curLLVMBB = blockMapping[bb];
+        if (bb->isEntryBlock())
+          codeGenIPBBTI->setSuccessor(0, curLLVMBB);
+
+        // TODO: Error not returned up the hierarchy
+        if (failed(
+                convertBlock(*bb, /*ignoreArguments=*/indexedBB.index() == 0)))
+          return;
+
+        // If this block has the terminator then add a jump to
+        // continuation bb
+        for (auto &op : *bb) {
+          if (isa<omp::TerminatorOp>(op)) {
+            builder.SetInsertPoint(curLLVMBB);
+            builder.CreateBr(&continuationIP);
+          }
+        }
+      }
+      // Finally, after all blocks have been traversed and values mapped,
+      // connect the PHI nodes to the results of preceding blocks.
+      connectPHINodes(region, valueMapping, blockMapping);
+    }
+  };
+
+  // TODO: Perform appropriate actions according to the data-sharing
+  // attribute (shared, private, firstprivate, ...) of variables.
+  // Currently defaults to shared.
+  auto privCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP,
+                    llvm::Value &vPtr,
+                    llvm::Value *&replacementValue) -> InsertPointTy {
+    replacementValue = &vPtr;
+
+    return codeGenIP;
+  };
+
+  // TODO: Perform finalization actions for variables. This has to be
+  // called for variables which have destructors/finalizers.
+  auto finiCB = [&](InsertPointTy codeGenIP) {};
+
+  // TODO: The various operands of parallel operation are not handled.
+  // Parallel operation is created with some default options for now.
+  llvm::Value *ifCond = nullptr;
+  llvm::Value *numThreads = nullptr;
+  bool isCancellable = false;
+  builder.restoreIP(ompBuilder->CreateParallel(
+      builder, bodyGenCB, privCB, finiCB, ifCond, numThreads,
+      llvm::omp::OMP_PROC_BIND_default, isCancellable));
+  return success();
+}
 
 /// Given an OpenMP MLIR operation, create the corresponding LLVM IR
 /// (including OpenMP runtime calls).
@@ -340,6 +495,9 @@ ModuleTranslation::convertOmpOperation(Operation &opInst,
         ompBuilder->CreateFlush(builder.saveIP());
         return success();
       })
+      .Case([&](omp::TerminatorOp) { return success(); })
+      .Case(
+          [&](omp::ParallelOp) { return convertOmpParallel(opInst, builder); })
       .Default([&](Operation *inst) {
         return inst->emitError("unsupported OpenMP operation: ")
                << inst->getName();
@@ -556,75 +714,6 @@ LogicalResult ModuleTranslation::convertGlobals() {
   return success();
 }
 
-/// Get the SSA value passed to the current block from the terminator operation
-/// of its predecessor.
-static Value getPHISourceValue(Block *current, Block *pred,
-                               unsigned numArguments, unsigned index) {
-  auto &terminator = *pred->getTerminator();
-  if (isa<LLVM::BrOp>(terminator)) {
-    return terminator.getOperand(index);
-  }
-
-  // For conditional branches, we need to check if the current block is reached
-  // through the "true" or the "false" branch and take the relevant operands.
-  auto condBranchOp = dyn_cast<LLVM::CondBrOp>(terminator);
-  assert(condBranchOp &&
-         "only branch operations can be terminators of a block that "
-         "has successors");
-  assert((condBranchOp.getSuccessor(0) != condBranchOp.getSuccessor(1)) &&
-         "successors with arguments in LLVM conditional branches must be "
-         "different blocks");
-
-  return condBranchOp.getSuccessor(0) == current
-             ? condBranchOp.trueDestOperands()[index]
-             : condBranchOp.falseDestOperands()[index];
-}
-
-void ModuleTranslation::connectPHINodes(LLVMFuncOp func) {
-  // Skip the first block, it cannot be branched to and its arguments correspond
-  // to the arguments of the LLVM function.
-  for (auto it = std::next(func.begin()), eit = func.end(); it != eit; ++it) {
-    Block *bb = &*it;
-    llvm::BasicBlock *llvmBB = blockMapping.lookup(bb);
-    auto phis = llvmBB->phis();
-    auto numArguments = bb->getNumArguments();
-    assert(numArguments == std::distance(phis.begin(), phis.end()));
-    for (auto &numberedPhiNode : llvm::enumerate(phis)) {
-      auto &phiNode = numberedPhiNode.value();
-      unsigned index = numberedPhiNode.index();
-      for (auto *pred : bb->getPredecessors()) {
-        phiNode.addIncoming(valueMapping.lookup(getPHISourceValue(
-                                bb, pred, numArguments, index)),
-                            blockMapping.lookup(pred));
-      }
-    }
-  }
-}
-
-// TODO: implement an iterative version
-static void topologicalSortImpl(llvm::SetVector<Block *> &blocks, Block *b) {
-  blocks.insert(b);
-  for (Block *bb : b->getSuccessors()) {
-    if (blocks.count(bb) == 0)
-      topologicalSortImpl(blocks, bb);
-  }
-}
-
-/// Sort function blocks topologically.
-static llvm::SetVector<Block *> topologicalSort(LLVMFuncOp f) {
-  // For each blocks that has not been visited yet (i.e. that has no
-  // predecessors), add it to the list and traverse its successors in DFS
-  // preorder.
-  llvm::SetVector<Block *> blocks;
-  for (Block &b : f) {
-    if (blocks.count(&b) == 0)
-      topologicalSortImpl(blocks, &b);
-  }
-  assert(blocks.size() == f.getBlocks().size() && "some blocks are not sorted");
-
-  return blocks;
-}
-
 /// Attempts to add an attribute identified by `key`, optionally with the given
 /// `value` to LLVM function `llvmFunc`. Reports errors at `loc` if any. If the
 /// attribute has a kind known to LLVM IR, create the attribute of this kind,
@@ -772,7 +861,7 @@ LogicalResult ModuleTranslation::convertOneFunction(LLVMFuncOp func) {
 
   // Finally, after all blocks have been traversed and values mapped, connect
   // the PHI nodes to the results of preceding blocks.
-  connectPHINodes(func);
+  connectPHINodes(func, valueMapping, blockMapping);
   return success();
 }
 
diff --git a/mlir/test/Target/openmp-llvm.mlir b/mlir/test/Target/openmp-llvm.mlir
index ddfc2a4cf7868..c8acd8022b2bf 100644
--- a/mlir/test/Target/openmp-llvm.mlir
+++ b/mlir/test/Target/openmp-llvm.mlir
@@ -32,3 +32,49 @@ llvm.func @test_flush_construct(%arg0: !llvm.i32) {
   // CHECK-NEXT:    ret void
   llvm.return
 }
+
+// CHECK-LABEL: define void @test_omp_parallel_1()
+llvm.func @test_omp_parallel_1() -> () {
+  // CHECK: call void{{.*}}@__kmpc_fork_call{{.*}}@[[OMP_OUTLINED_FN_1:.*]] to {{.*}}
+  omp.parallel {
+    omp.barrier
+    omp.terminator
+  }
+
+  llvm.return
+}
+
+// CHECK: define internal void @[[OMP_OUTLINED_FN_1]]
+  // CHECK: call void @__kmpc_barrier
+
+llvm.func @body(!llvm.i64)
+
+// CHECK-LABEL: define void @test_omp_parallel_2()
+llvm.func @test_omp_parallel_2() -> () {
+  // CHECK: call void{{.*}}@__kmpc_fork_call{{.*}}@[[OMP_OUTLINED_FN_2:.*]] to {{.*}}
+  omp.parallel {
+    ^bb0:
+      %0 = llvm.mlir.constant(1 : index) : !llvm.i64
+      %1 = llvm.mlir.constant(42 : index) : !llvm.i64
+      llvm.call @body(%0) : (!llvm.i64) -> ()
+      llvm.call @body(%1) : (!llvm.i64) -> ()
+      llvm.br ^bb1
+
+    ^bb1:
+      %2 = llvm.add %0, %1 : !llvm.i64
+      llvm.call @body(%2) : (!llvm.i64) -> ()
+      omp.terminator
+  }
+  llvm.return
+}
+
+// CHECK: define internal void @[[OMP_OUTLINED_FN_2]]
+  // CHECK-LABEL: omp.par.region:
+  // CHECK: br label %omp.par.region1
+  // CHECK-LABEL: omp.par.region1:
+  // CHECK: call void @body(i64 1)
+  // CHECK: call void @body(i64 42)
+  // CHECK: br label %omp.par.region2
+  // CHECK-LABEL: omp.par.region2:
+  // CHECK: call void @body(i64 43)
+  // CHECK: br label %omp.par.pre_finalize

From e958379581e5845572c21b8871873fcb0b15743e Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Mon, 13 Jul 2020 16:03:10 -0700
Subject: [PATCH 169/771] Fold the opt size check into the assert to silence an
 unused variable warning.

---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index e91828bd17078..3cd80cb04ab84 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -182,9 +182,8 @@ namespace {
                              "indirect-tls-seg-refs");
 
       // OptFor[Min]Size are used in pattern predicates that isel is matching.
-      bool OptForSize = MF.getFunction().hasOptSize();
       OptForMinSize = MF.getFunction().hasMinSize();
-      assert((!OptForMinSize || OptForSize) &&
+      assert((!OptForMinSize || MF.getFunction().hasOptSize()) &&
              "OptForMinSize implies OptForSize");
 
       SelectionDAGISel::runOnMachineFunction(MF);

From 8d09f20798ac180b1749276bff364682ce0196ab Mon Sep 17 00:00:00 2001
From: Tyker <tyker1@outlook.com>
Date: Tue, 14 Jul 2020 00:52:37 +0200
Subject: [PATCH 170/771] [AssumeBundles] Use operand bundles to encode
 alignment assumptions

Summary:
NOTE: There is a mailing list discussion on this: http://lists.llvm.org/pipermail/llvm-dev/2019-December/137632.html

Complemantary to the assumption outliner prototype in D71692, this patch
shows how we could simplify the code emitted for an alignemnt
assumption. The generated code is smaller, less fragile, and it makes it
easier to recognize the additional use as a "assumption use".

As mentioned in D71692 and on the mailing list, we could adopt this
scheme, and similar schemes for other patterns, without adopting the
assumption outlining.

Reviewers: hfinkel, xbolva00, lebedev.ri, nikic, rjmccall, spatel, jdoerfert, sstefan1

Reviewed By: jdoerfert

Subscribers: thopre, yamauchi, kuter, fhahn, merge_guards_bot, hiraditya, bollu, rkruppe, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D71739
---
 clang/lib/CodeGen/CodeGenFunction.cpp         |  36 +++++-
 clang/test/CodeGen/align_value.cpp            |  30 +----
 clang/test/CodeGen/alloc-align-attr.c         |  44 ++-----
 ...ssume-aligned-and-alloc-align-attributes.c |   8 +-
 clang/test/CodeGen/builtin-align-array.c      |  32 ++---
 clang/test/CodeGen/builtin-align.c            |  24 +---
 clang/test/CodeGen/builtin-assume-aligned.c   |  32 +----
 ...mption-attribute-align_value-on-lvalue.cpp |   8 +-
 ...tion-attribute-align_value-on-paramvar.cpp |   2 +-
 ...ibute-alloc_align-on-function-variable.cpp |  10 +-
 ...tion-attribute-alloc_align-on-function.cpp |   2 +-
 ...-assume_aligned-on-function-two-params.cpp |  10 +-
 ...n-attribute-assume_aligned-on-function.cpp |   2 +-
 ...n_assume_aligned-three-params-variable.cpp |  10 +-
 ...on-builtin_assume_aligned-three-params.cpp |  10 +-
 ...tion-builtin_assume_aligned-two-params.cpp |   8 +-
 .../catch-alignment-assumption-openmp.cpp     |   8 +-
 .../non-power-of-2-alignment-assumptions.c    |  13 +-
 clang/test/OpenMP/simd_codegen.cpp            |  16 ---
 clang/test/OpenMP/simd_metadata.c             | 117 +++++++----------
 ...s_distribute_parallel_for_simd_codegen.cpp |   5 +-
 llvm/include/llvm/IR/IRBuilder.h              |  28 ++--
 .../Scalar/AlignmentFromAssumptions.h         |   6 +-
 llvm/lib/Analysis/AssumeBundleQueries.cpp     |  13 +-
 llvm/lib/IR/IRBuilder.cpp                     |  77 ++++-------
 llvm/lib/IR/Verifier.cpp                      |  23 +++-
 .../InstCombine/InstCombineCalls.cpp          |  15 ++-
 .../Scalar/AlignmentFromAssumptions.cpp       | 121 +++++-------------
 .../AlignmentFromAssumptions/simple.ll        |  75 ++++-------
 .../AlignmentFromAssumptions/simple32.ll      | 114 ++++-------------
 llvm/test/Transforms/Inline/align.ll          |  15 +--
 llvm/test/Transforms/InstCombine/assume.ll    |   1 +
 .../inlining-alignment-assumptions.ll         |  27 +---
 llvm/test/Verifier/assume-bundles.ll          |  16 ++-
 .../Analysis/AssumeBundleQueriesTest.cpp      |  38 ++++++
 35 files changed, 369 insertions(+), 627 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 8ce488f35dd32..4a7c84562deef 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -2154,13 +2154,39 @@ void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue,
                                               SourceLocation AssumptionLoc,
                                               llvm::Value *Alignment,
                                               llvm::Value *OffsetValue) {
-  llvm::Value *TheCheck;
-  llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
-      CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck);
+  if (Alignment->getType() != IntPtrTy)
+    Alignment =
+        Builder.CreateIntCast(Alignment, IntPtrTy, false, "casted.align");
+  if (OffsetValue && OffsetValue->getType() != IntPtrTy)
+    OffsetValue =
+        Builder.CreateIntCast(OffsetValue, IntPtrTy, true, "casted.offset");
+  llvm::Value *TheCheck = nullptr;
   if (SanOpts.has(SanitizerKind::Alignment)) {
-    emitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
-                                 OffsetValue, TheCheck, Assumption);
+    llvm::Value *PtrIntValue =
+        Builder.CreatePtrToInt(PtrValue, IntPtrTy, "ptrint");
+
+    if (OffsetValue) {
+      bool IsOffsetZero = false;
+      if (const auto *CI = dyn_cast<llvm::ConstantInt>(OffsetValue))
+        IsOffsetZero = CI->isZero();
+
+      if (!IsOffsetZero)
+        PtrIntValue = Builder.CreateSub(PtrIntValue, OffsetValue, "offsetptr");
+    }
+
+    llvm::Value *Zero = llvm::ConstantInt::get(IntPtrTy, 0);
+    llvm::Value *Mask =
+        Builder.CreateSub(Alignment, llvm::ConstantInt::get(IntPtrTy, 1));
+    llvm::Value *MaskedPtr = Builder.CreateAnd(PtrIntValue, Mask, "maskedptr");
+    TheCheck = Builder.CreateICmpEQ(MaskedPtr, Zero, "maskcond");
   }
+  llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
+      CGM.getDataLayout(), PtrValue, Alignment, OffsetValue);
+
+  if (!SanOpts.has(SanitizerKind::Alignment))
+    return;
+  emitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
+                               OffsetValue, TheCheck, Assumption);
 }
 
 void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue,
diff --git a/clang/test/CodeGen/align_value.cpp b/clang/test/CodeGen/align_value.cpp
index acbfbaf2ba5c7..a18cb651fe4c0 100644
--- a/clang/test/CodeGen/align_value.cpp
+++ b/clang/test/CodeGen/align_value.cpp
@@ -29,10 +29,7 @@ struct ad_struct {
 // CHECK-NEXT:    [[TMP0:%.*]] = load %struct.ad_struct*, %struct.ad_struct** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_AD_STRUCT:%.*]], %struct.ad_struct* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[A]], align 8
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ]
 // CHECK-NEXT:    ret double* [[TMP1]]
 //
 double *foo(ad_struct& x) {
@@ -48,10 +45,7 @@ double *foo(ad_struct& x) {
 // CHECK-NEXT:    [[TMP0:%.*]] = load %struct.ad_struct*, %struct.ad_struct** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_AD_STRUCT:%.*]], %struct.ad_struct* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[A]], align 8
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ]
 // CHECK-NEXT:    ret double* [[TMP1]]
 //
 double *goo(ad_struct *x) {
@@ -66,10 +60,7 @@ double *goo(ad_struct *x) {
 // CHECK-NEXT:    store double** [[X]], double*** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load double**, double*** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[TMP0]], align 8
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ]
 // CHECK-NEXT:    ret double* [[TMP1]]
 //
 double *bar(aligned_double *x) {
@@ -84,10 +75,7 @@ double *bar(aligned_double *x) {
 // CHECK-NEXT:    store double** [[X]], double*** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load double**, double*** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[TMP0]], align 8
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ]
 // CHECK-NEXT:    ret double* [[TMP1]]
 //
 double *car(aligned_double &x) {
@@ -103,10 +91,7 @@ double *car(aligned_double &x) {
 // CHECK-NEXT:    [[TMP0:%.*]] = load double**, double*** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double*, double** [[TMP0]], i64 5
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[ARRAYIDX]], align 8
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ]
 // CHECK-NEXT:    ret double* [[TMP1]]
 //
 double *dar(aligned_double *x) {
@@ -118,10 +103,7 @@ aligned_double eep();
 // CHECK-LABEL: define {{[^@]+}}@_Z3retv() #0
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[CALL:%.*]] = call double* @_Z3eepv()
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[CALL]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[CALL]], i64 64) ]
 // CHECK-NEXT:    ret double* [[CALL]]
 //
 double *ret() {
diff --git a/clang/test/CodeGen/alloc-align-attr.c b/clang/test/CodeGen/alloc-align-attr.c
index 9517c50dbb1db..44a57291b47c8 100644
--- a/clang/test/CodeGen/alloc-align-attr.c
+++ b/clang/test/CodeGen/alloc-align-attr.c
@@ -11,12 +11,8 @@ __INT32_TYPE__*m1(__INT32_TYPE__ i) __attribute__((alloc_align(1)));
 // CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m1(i32 [[TMP0]])
-// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = zext i32 [[TMP0]] to i64
-// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    [[CASTED_ALIGN:%.*]] = zext i32 [[TMP0]] to i64
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ]
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -32,12 +28,8 @@ __INT32_TYPE__ test1(__INT32_TYPE__ a) {
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[A_ADDR]], align 8
 // CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m1(i32 [[CONV]])
-// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = zext i32 [[CONV]] to i64
-// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    [[CASTED_ALIGN:%.*]] = zext i32 [[CONV]] to i64
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ]
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -55,11 +47,7 @@ __INT32_TYPE__ *m2(__SIZE_TYPE__ i) __attribute__((alloc_align(1)));
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
 // CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP0]] to i64
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m2(i64 [[CONV]])
-// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[CONV]], 1
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CONV]]) ]
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -75,11 +63,7 @@ __INT32_TYPE__ test3(__INT32_TYPE__ a) {
 // CHECK-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[A_ADDR]], align 8
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m2(i64 [[TMP0]])
-// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[TMP0]], 1
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[TMP0]]) ]
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -115,12 +99,8 @@ __INT32_TYPE__ *m3(struct Empty s, __int128_t i) __attribute__((alloc_align(2)))
 // CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[TMP4]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m3(i64 [[TMP6]], i64 [[TMP8]])
-// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = trunc i128 [[TMP3]] to i64
-// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP3]] to i64
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ]
 // CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP9]]
 //
@@ -157,12 +137,8 @@ __INT32_TYPE__ *m4(struct MultiArgs s, __int128_t i) __attribute__((alloc_align(
 // CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[TMP9]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m4(i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP11]], i64 [[TMP13]])
-// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = trunc i128 [[TMP3]] to i64
-// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP3]] to i64
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ]
 // CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP14]]
 //
diff --git a/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c b/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c
index fa4ee8db12e7f..cd8a6f19b4f49 100644
--- a/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c
+++ b/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c
@@ -36,12 +36,8 @@ void *t2_immediate2() {
 // CHECK-NEXT:    store i32 [[ALIGNMENT:%.*]], i32* [[ALIGNMENT_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ALIGNMENT_ADDR]], align 4
 // CHECK-NEXT:    [[CALL:%.*]] = call align 32 i8* @my_aligned_alloc(i32 320, i32 [[TMP0]])
-// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = zext i32 [[TMP0]] to i64
-// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[CALL]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[CALL]], i64 [[TMP1]]) ]
 // CHECK-NEXT:    ret i8* [[CALL]]
 //
 void *t3_variable(int alignment) {
diff --git a/clang/test/CodeGen/builtin-align-array.c b/clang/test/CodeGen/builtin-align-array.c
index 97235c33b7fbe..31f7b42b56170 100644
--- a/clang/test/CodeGen/builtin-align-array.c
+++ b/clang/test/CodeGen/builtin-align-array.c
@@ -4,7 +4,7 @@
 
 extern int func(char *c);
 
-// CHECK-LABEL: define {{[^@]+}}@test_array() #0
+// CHECK-LABEL: @test_array(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[BUF:%.*]] = alloca [1024 x i8], align 16
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 44
@@ -12,10 +12,7 @@ extern int func(char *c);
 // CHECK-NEXT:    [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], -16
 // CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]]
 // CHECK-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX]], i64 [[DIFF]]
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 15
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 16) ]
 // CHECK-NEXT:    [[CALL:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT]])
 // CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 22
 // CHECK-NEXT:    [[INTPTR2:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64
@@ -23,13 +20,10 @@ extern int func(char *c);
 // CHECK-NEXT:    [[ALIGNED_INTPTR4:%.*]] = and i64 [[OVER_BOUNDARY]], -32
 // CHECK-NEXT:    [[DIFF5:%.*]] = sub i64 [[ALIGNED_INTPTR4]], [[INTPTR2]]
 // CHECK-NEXT:    [[ALIGNED_RESULT6:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX1]], i64 [[DIFF5]]
-// CHECK-NEXT:    [[PTRINT7:%.*]] = ptrtoint i8* [[ALIGNED_RESULT6]] to i64
-// CHECK-NEXT:    [[MASKEDPTR8:%.*]] = and i64 [[PTRINT7]], 31
-// CHECK-NEXT:    [[MASKCOND9:%.*]] = icmp eq i64 [[MASKEDPTR8]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND9]])
-// CHECK-NEXT:    [[CALL10:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]])
-// CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 16
-// CHECK-NEXT:    [[SRC_ADDR:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT6]], i64 32) ]
+// CHECK-NEXT:    [[CALL7:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]])
+// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 16
+// CHECK-NEXT:    [[SRC_ADDR:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64
 // CHECK-NEXT:    [[SET_BITS:%.*]] = and i64 [[SRC_ADDR]], 63
 // CHECK-NEXT:    [[IS_ALIGNED:%.*]] = icmp eq i64 [[SET_BITS]], 0
 // CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[IS_ALIGNED]] to i32
@@ -42,7 +36,7 @@ int test_array(void) {
   return __builtin_is_aligned(&buf[16], 64);
 }
 
-// CHECK-LABEL: define {{[^@]+}}@test_array_should_not_mask() #0
+// CHECK-LABEL: @test_array_should_not_mask(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[BUF:%.*]] = alloca [1024 x i8], align 32
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 64
@@ -50,10 +44,7 @@ int test_array(void) {
 // CHECK-NEXT:    [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], -16
 // CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]]
 // CHECK-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX]], i64 [[DIFF]]
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 15
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 16) ]
 // CHECK-NEXT:    [[CALL:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT]])
 // CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 32
 // CHECK-NEXT:    [[INTPTR2:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64
@@ -61,11 +52,8 @@ int test_array(void) {
 // CHECK-NEXT:    [[ALIGNED_INTPTR4:%.*]] = and i64 [[OVER_BOUNDARY]], -32
 // CHECK-NEXT:    [[DIFF5:%.*]] = sub i64 [[ALIGNED_INTPTR4]], [[INTPTR2]]
 // CHECK-NEXT:    [[ALIGNED_RESULT6:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX1]], i64 [[DIFF5]]
-// CHECK-NEXT:    [[PTRINT7:%.*]] = ptrtoint i8* [[ALIGNED_RESULT6]] to i64
-// CHECK-NEXT:    [[MASKEDPTR8:%.*]] = and i64 [[PTRINT7]], 31
-// CHECK-NEXT:    [[MASKCOND9:%.*]] = icmp eq i64 [[MASKEDPTR8]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND9]])
-// CHECK-NEXT:    [[CALL10:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT6]], i64 32) ]
+// CHECK-NEXT:    [[CALL7:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]])
 // CHECK-NEXT:    ret i32 1
 //
 int test_array_should_not_mask(void) {
diff --git a/clang/test/CodeGen/builtin-align.c b/clang/test/CodeGen/builtin-align.c
index 7e66e2b5c0b9b..60f7fc99c1d4d 100644
--- a/clang/test/CodeGen/builtin-align.c
+++ b/clang/test/CodeGen/builtin-align.c
@@ -122,11 +122,7 @@ _Bool is_aligned(TYPE ptr, unsigned align) {
 // CHECK-VOID_PTR-NEXT:    [[ALIGNED_INTPTR:%.*]] = and i64 [[OVER_BOUNDARY]], [[INVERTED_MASK]]
 // CHECK-VOID_PTR-NEXT:    [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]]
 // CHECK-VOID_PTR-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[DIFF]]
-// CHECK-VOID_PTR-NEXT:    [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1
-// CHECK-VOID_PTR-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64
-// CHECK-VOID_PTR-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]]
-// CHECK-VOID_PTR-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-VOID_PTR-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-VOID_PTR-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 [[ALIGNMENT]]) ]
 // CHECK-VOID_PTR-NEXT:    ret i8* [[ALIGNED_RESULT]]
 //
 // CHECK-FLOAT_PTR-LABEL: define {{[^@]+}}@align_up
@@ -142,11 +138,7 @@ _Bool is_aligned(TYPE ptr, unsigned align) {
 // CHECK-FLOAT_PTR-NEXT:    [[TMP0:%.*]] = bitcast float* [[PTR]] to i8*
 // CHECK-FLOAT_PTR-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 [[DIFF]]
 // CHECK-FLOAT_PTR-NEXT:    [[TMP1:%.*]] = bitcast i8* [[ALIGNED_RESULT]] to float*
-// CHECK-FLOAT_PTR-NEXT:    [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1
-// CHECK-FLOAT_PTR-NEXT:    [[PTRINT:%.*]] = ptrtoint float* [[TMP1]] to i64
-// CHECK-FLOAT_PTR-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]]
-// CHECK-FLOAT_PTR-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-FLOAT_PTR-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-FLOAT_PTR-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 [[ALIGNMENT]]) ]
 // CHECK-FLOAT_PTR-NEXT:    ret float* [[TMP1]]
 //
 // CHECK-LONG-LABEL: define {{[^@]+}}@align_up
@@ -184,11 +176,7 @@ TYPE align_up(TYPE ptr, unsigned align) {
 // CHECK-VOID_PTR-NEXT:    [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], [[INVERTED_MASK]]
 // CHECK-VOID_PTR-NEXT:    [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]]
 // CHECK-VOID_PTR-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[DIFF]]
-// CHECK-VOID_PTR-NEXT:    [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1
-// CHECK-VOID_PTR-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64
-// CHECK-VOID_PTR-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]]
-// CHECK-VOID_PTR-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-VOID_PTR-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-VOID_PTR-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 [[ALIGNMENT]]) ]
 // CHECK-VOID_PTR-NEXT:    ret i8* [[ALIGNED_RESULT]]
 //
 // CHECK-FLOAT_PTR-LABEL: define {{[^@]+}}@align_down
@@ -203,11 +191,7 @@ TYPE align_up(TYPE ptr, unsigned align) {
 // CHECK-FLOAT_PTR-NEXT:    [[TMP0:%.*]] = bitcast float* [[PTR]] to i8*
 // CHECK-FLOAT_PTR-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 [[DIFF]]
 // CHECK-FLOAT_PTR-NEXT:    [[TMP1:%.*]] = bitcast i8* [[ALIGNED_RESULT]] to float*
-// CHECK-FLOAT_PTR-NEXT:    [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1
-// CHECK-FLOAT_PTR-NEXT:    [[PTRINT:%.*]] = ptrtoint float* [[TMP1]] to i64
-// CHECK-FLOAT_PTR-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]]
-// CHECK-FLOAT_PTR-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-FLOAT_PTR-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-FLOAT_PTR-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 [[ALIGNMENT]]) ]
 // CHECK-FLOAT_PTR-NEXT:    ret float* [[TMP1]]
 //
 // CHECK-LONG-LABEL: define {{[^@]+}}@align_down
diff --git a/clang/test/CodeGen/builtin-assume-aligned.c b/clang/test/CodeGen/builtin-assume-aligned.c
index 90693cc215200..b9f1ebfbdcf58 100644
--- a/clang/test/CodeGen/builtin-assume-aligned.c
+++ b/clang/test/CodeGen/builtin-assume-aligned.c
@@ -8,10 +8,7 @@
 // CHECK-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32, i64 0) ]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
 // CHECK-NEXT:    store i32* [[TMP2]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
@@ -31,10 +28,7 @@ int test1(int *a) {
 // CHECK-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32, i64 0) ]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
 // CHECK-NEXT:    store i32* [[TMP2]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
@@ -54,10 +48,7 @@ int test2(int *a) {
 // CHECK-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32) ]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
 // CHECK-NEXT:    store i32* [[TMP2]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
@@ -81,11 +72,7 @@ int test3(int *a) {
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP2]] to i64
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64
-// CHECK-NEXT:    [[OFFSETPTR:%.*]] = sub i64 [[PTRINT]], [[CONV]]
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 31
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32, i64 [[CONV]]) ]
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP1]] to i32*
 // CHECK-NEXT:    store i32* [[TMP3]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8
@@ -115,11 +102,7 @@ int *m2() __attribute__((assume_aligned(64, 12)));
 // CHECK-LABEL: define {{[^@]+}}@test6() #0
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* (...) @m2()
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
-// CHECK-NEXT:    [[OFFSETPTR:%.*]] = sub i64 [[PTRINT]], 12
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 63
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 64, i64 12) ]
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP0]]
 //
@@ -134,10 +117,7 @@ int test6() {
 // CHECK-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 536870911
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 536870912) ]
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
 // CHECK-NEXT:    store i32* [[TMP2]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp
index 96d264190bec7..fb2b1a76116e9 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp
@@ -21,9 +21,9 @@ char **load_from_ac_struct(struct ac_struct *x) {
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load %[[STRUCT_AC_STRUCT]]*, %[[STRUCT_AC_STRUCT]]** %[[STRUCT_AC_STRUCT_ADDR]], align 8
   // CHECK:                             %[[A_ADDR:.*]] = getelementptr inbounds %[[STRUCT_AC_STRUCT]], %[[STRUCT_AC_STRUCT]]* %[[X_RELOADED]], i32 0, i32 0
   // CHECK:                             %[[A:.*]] = load i8**, i8*** %[[A_ADDR]], align 8
-  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8** %[[A]] to i64
-  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 2147483647
-  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8** %[[A]] to i64
+  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 2147483647
+  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8** %[[A]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -32,7 +32,7 @@ char **load_from_ac_struct(struct ac_struct *x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
+  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8** %[[A]], i64 2147483648) ]
   // CHECK-NEXT:                        ret i8** %[[A]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp
index 0e3fa750c66c3..46f7d09ae2aa5 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp
@@ -24,7 +24,7 @@ char **passthrough(__attribute__((align_value(0x80000000))) char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 %[[MASKCOND]])
+  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RELOADED]], i64 2147483648) ]
   // CHECK-NEXT:                        ret i8** %[[X_RELOADED]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp
index 591eaa0e13131..40abbc3871996 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp
@@ -30,10 +30,10 @@ char **caller(char **x, unsigned long alignment) {
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[ALIGNMENT_RELOADED:.*]] = load i64, i64* %[[ALIGNMENT_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RETURNED:.*]] = call i8** @[[PASSTHROUGH]](i8** %[[X_RELOADED]], i64 %[[ALIGNMENT_RELOADED]])
-  // CHECK-NEXT:                        %[[MASK:.*]] = sub i64 %[[ALIGNMENT_RELOADED]], 1
-  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64
-  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], %[[MASK]]
-  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64
+  // CHECK-SANITIZE-NEXT:               %[[MASK:.*]] = sub i64 %[[ALIGNMENT_RELOADED]], 1
+  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], %[[MASK]]
+  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -42,7 +42,7 @@ char **caller(char **x, unsigned long alignment) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
+  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 %1) ]
   // CHECK-NEXT:                        ret i8** %[[X_RETURNED]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp
index a41357933f918..87d903c69716c 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp
@@ -39,7 +39,7 @@ char **caller(char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 %[[MASKCOND]])
+  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 128) ]
   // CHECK-NEXT:                        ret i8** %[[X_RETURNED]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp
index e78667ce16e06..ecc96bcf6a53b 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp
@@ -24,10 +24,10 @@ char **caller(char **x) {
   // CHECK-NEXT:                        store i8** %[[X]], i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RETURNED:.*]] = call i8** @[[PASSTHROUGH]](i8** %[[X_RELOADED]])
-  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64
-  // CHECK-NEXT:                        %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42
-  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 2147483647
-  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64
+  // CHECK-SANITIZE-NEXT:               %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42
+  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 2147483647
+  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -36,7 +36,7 @@ char **caller(char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
+  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 2147483648, i64 42) ]
   // CHECK-NEXT:                        ret i8** %[[X_RETURNED]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp
index f750bbd77d42f..5bbc5843b89f8 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp
@@ -36,7 +36,7 @@ char **caller(char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 %[[MASKCOND]])
+  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 128) ]
   // CHECK-NEXT:                        ret i8** %[[X_RETURNED]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp
index 4306e322f5fb6..9c8944ba280b4 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp
@@ -16,10 +16,10 @@ void *caller(char **x, unsigned long offset) {
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[BITCAST:.*]] = bitcast i8** %[[X_RELOADED]] to i8*
   // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8
-  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
-  // CHECK-NEXT:                        %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], %[[OFFSET_RELOADED]]
-  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911
-  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
+  // CHECK-SANITIZE-NEXT:               %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], %[[OFFSET_RELOADED]]
+  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911
+  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[BITCAST]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -28,7 +28,7 @@ void *caller(char **x, unsigned long offset) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
+  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8* %[[BITCAST]], i64 536870912, i64 %[[OFFSET_RELOADED]]) ]
   // CHECK-NEXT:                        ret i8* %[[BITCAST]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp
index 27f53e92bed89..9f61e08106a01 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp
@@ -13,10 +13,10 @@ void *caller(char **x) {
   // CHECK-NEXT:                        store i8** %[[X]], i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[BITCAST:.*]] = bitcast i8** %[[X_RELOADED]] to i8*
-  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
-  // CHECK-NEXT:                        %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42
-  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911
-  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
+  // CHECK-SANITIZE-NEXT:               %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42
+  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911
+  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[BITCAST]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -25,7 +25,7 @@ void *caller(char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
+  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8* %[[BITCAST]], i64 536870912, i64 42) ]
   // CHECK-NEXT:                        ret i8* %[[BITCAST]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp
index 5412270f37619..20bed646ff951 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp
@@ -13,9 +13,9 @@ void *caller(char **x) {
   // CHECK-NEXT:                        store i8** %[[X]], i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[BITCAST:.*]] = bitcast i8** %[[X_RELOADED]] to i8*
-  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
-  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 536870911
-  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
+  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 536870911
+  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[BITCAST]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -24,7 +24,7 @@ void *caller(char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
+  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8* %[[BITCAST]], i64 536870912) ]
   // CHECK-NEXT:                        ret i8* %[[BITCAST]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp b/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp
index 6d75ee0858dac..353f2fd7f17bd 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp
@@ -12,9 +12,9 @@ void func(char *data) {
   // CHECK-NEXT:   %[[DATA_ADDR:.*]] = alloca i8*, align 8
   // CHECK:   store i8* %[[DATA]], i8** %[[DATA_ADDR]], align 8
   // CHECK:   %[[DATA_RELOADED:.*]] = load i8*, i8** %[[DATA_ADDR]], align 8
-  // CHECK-NEXT:   %[[PTRINT:.*]] = ptrtoint i8* %[[DATA_RELOADED]] to i64
-  // CHECK-NEXT:   %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 1073741823
-  // CHECK-NEXT:   %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-SANITIZE-NEXT:   %[[PTRINT:.*]] = ptrtoint i8* %[[DATA_RELOADED]] to i64
+  // CHECK-SANITIZE-NEXT:   %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 1073741823
+  // CHECK-SANITIZE-NEXT:   %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[DATA_RELOADED]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -23,7 +23,7 @@ void func(char *data) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
+  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8* %[[DATA_RELOADED]], i64 1073741824) ]
 
 #line 100
 #pragma omp for simd aligned(data : 0x40000000)
diff --git a/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c b/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c
index 9467f6228dfc4..b8ce1699f7ed0 100644
--- a/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c
+++ b/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c
@@ -9,12 +9,8 @@ void *__attribute__((alloc_align(1))) alloc(int align);
 // CHECK-NEXT:    store i32 [[ALIGN:%.*]], i32* [[ALIGN_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ALIGN_ADDR]], align 4
 // CHECK-NEXT:    [[CALL:%.*]] = call i8* @alloc(i32 [[TMP0]])
-// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = zext i32 [[TMP0]] to i64
-// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[CALL]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[CALL]], i64 [[TMP1]]) ]
 // CHECK-NEXT:    ret void
 //
 void t0(int align) {
@@ -25,10 +21,7 @@ void t0(int align) {
 // CHECK-NEXT:    [[ALIGN_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store i32 [[ALIGN:%.*]], i32* [[ALIGN_ADDR]], align 4
 // CHECK-NEXT:    [[CALL:%.*]] = call i8* @alloc(i32 7)
-// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[CALL]] to i64
-// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 6
-// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[CALL]], i64 7) ]
 // CHECK-NEXT:    ret void
 //
 void t1(int align) {
diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp
index cb53bb1aa38b8..3440225673c4d 100644
--- a/clang/test/OpenMP/simd_codegen.cpp
+++ b/clang/test/OpenMP/simd_codegen.cpp
@@ -817,25 +817,9 @@ void parallel_simd(float *a) {
 // TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]],
 
 // CHECK-LABEL: S8
-// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
-// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
-// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
-// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
-
-// CHECK-DAG: and i64 %{{.+}}, 15
-// CHECK-DAG: icmp eq i64 %{{.+}}, 0
 // CHECK-DAG: call void @llvm.assume(i1
-
-// CHECK-DAG: and i64 %{{.+}}, 7
-// CHECK-DAG: icmp eq i64 %{{.+}}, 0
 // CHECK-DAG: call void @llvm.assume(i1
-
-// CHECK-DAG: and i64 %{{.+}}, 15
-// CHECK-DAG: icmp eq i64 %{{.+}}, 0
 // CHECK-DAG: call void @llvm.assume(i1
-
-// CHECK-DAG: and i64 %{{.+}}, 3
-// CHECK-DAG: icmp eq i64 %{{.+}}, 0
 // CHECK-DAG: call void @llvm.assume(i1
 struct SS {
   SS(): a(0) {}
diff --git a/clang/test/OpenMP/simd_metadata.c b/clang/test/OpenMP/simd_metadata.c
index f0ae0200dd08e..18133e3b6c2e7 100644
--- a/clang/test/OpenMP/simd_metadata.c
+++ b/clang/test/OpenMP/simd_metadata.c
@@ -21,30 +21,21 @@ void h1(float *c, float *a, double b[], int size)
 // CHECK-LABEL: define void @h1
   int t = 0;
 #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b)
-// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
-// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
-// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
-// CHECK:         [[A_PTRINT:%.+]] = ptrtoint
-
-// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
-// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
-// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
-// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
-// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
-
-// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
-// CHECK:         [[B_PTRINT:%.+]] = ptrtoint
-
-// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
-// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
-// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
-// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
-// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
-
-// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
+  // CHECK:         call void @llvm.assume(i1 true) [ "align"(float* [[PTR4:%.*]], {{i64|i32}} 32) ]
+  // CHECK-NEXT:    load
+
+  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
+  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 32) ]
+  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 64) ]
+  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
+  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
+  // CHECK-NEXT:     load
+
+  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
+  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
+  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 64) ]
+  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
+  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
   for (int i = 0; i < size; ++i) {
     c[i] = a[i] * a[i] + b[i] * b[t];
     ++t;
@@ -52,30 +43,21 @@ void h1(float *c, float *a, double b[], int size)
 // do not emit llvm.access.group metadata due to usage of safelen clause.
 // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}}
 #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8)
-// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
-// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
-// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
-// CHECK:         [[A_PTRINT:%.+]] = ptrtoint
-
-// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
-// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
-// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
-// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
-// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
-
-// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
-// CHECK:         [[B_PTRINT:%.+]] = ptrtoint
-
-// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
-// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
-// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
-// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
-// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
-
-// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
+  // CHECK:         call void @llvm.assume(i1 true) [ "align"(float* [[PTR4:%.*]], {{i64|i32}} 32) ]
+  // CHECK-NEXT:    load
+
+  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
+  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 32) ]
+  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 64) ]
+  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
+  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
+  // CHECK-NEXT:     load
+
+  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
+  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
+  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 64) ]
+  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
+  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
   for (int i = 0; i < size; ++i) {
     c[i] = a[i] * a[i] + b[i] * b[t];
     ++t;
@@ -83,30 +65,21 @@ void h1(float *c, float *a, double b[], int size)
 // do not emit llvm.access.group metadata due to usage of safelen clause.
 // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}}
 #pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8)
-// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
-// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
-// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
-// CHECK:         [[A_PTRINT:%.+]] = ptrtoint
-
-// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
-// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
-// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
-// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
-// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
-
-// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
-// CHECK:         [[B_PTRINT:%.+]] = ptrtoint
-
-// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
-// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
-// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
-// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
-// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
-
-// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
-// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
+  // CHECK:         call void @llvm.assume(i1 true) [ "align"(float* [[PTR4:%.*]], {{i64|i32}} 32) ]
+  // CHECK-NEXT:    load
+
+  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
+  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 32) ]
+  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 64) ]
+  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
+  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
+  // CHECK-NEXT:     load
+
+  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
+  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
+  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 64) ]
+  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
+  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
   for (int i = 0; i < size; ++i) {
     c[i] = a[i] * a[i] + b[i] * b[t];
     ++t;
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
index 2fc166ed0b873..7192ef454d0a5 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -101,10 +101,7 @@ int target_teams_fun(int *g){
 
   // CK1: define internal void @[[OUTL1]]({{.+}})
   // CK1: [[ARRDECAY:%.+]] = getelementptr inbounds [1000 x i32], [1000 x i32]* %{{.+}}, i{{32|64}} 0, i{{32|64}} 0
-  // CK1: [[ARR_CAST:%.+]] = ptrtoint i32* [[ARRDECAY]] to i{{32|64}}
-  // CK1: [[MASKED_PTR:%.+]] = and i{{32|64}} [[ARR_CAST]], 7
-  // CK1: [[COND:%.+]] = icmp eq i{{32|64}} [[MASKED_PTR]], 0
-  // CK1: call void @llvm.assume(i1 [[COND]])
+  // CK1: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRDECAY]], {{i64|i32}} 8) ]
   // CK1: call void @__kmpc_for_static_init_4(
   // CK1: call void {{.+}} @__kmpc_fork_call(
   // CK1: call void @__kmpc_for_static_fini(
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index ffec4ff64ca66..4552ca016bd76 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -782,7 +782,11 @@ class IRBuilderBase {
 
   /// Create an assume intrinsic call that allows the optimizer to
   /// assume that the provided condition will be true.
-  CallInst *CreateAssumption(Value *Cond);
+  ///
+  /// The optional argument \p OpBundles specifies operand bundles that are
+  /// added to the call instruction.
+  CallInst *CreateAssumption(Value *Cond,
+                             ArrayRef<OperandBundleDef> OpBundles = llvm::None);
 
   /// Create a call to the experimental.gc.statepoint intrinsic to
   /// start a new statepoint sequence.
@@ -2502,13 +2506,11 @@ class IRBuilderBase {
 
 private:
   /// Helper function that creates an assume intrinsic call that
-  /// represents an alignment assumption on the provided Ptr, Mask, Type
-  /// and Offset. It may be sometimes useful to do some other logic
-  /// based on this alignment check, thus it can be stored into 'TheCheck'.
+  /// represents an alignment assumption on the provided pointer \p PtrValue
+  /// with offset \p OffsetValue and alignment value \p AlignValue.
   CallInst *CreateAlignmentAssumptionHelper(const DataLayout &DL,
-                                            Value *PtrValue, Value *Mask,
-                                            Type *IntPtrTy, Value *OffsetValue,
-                                            Value **TheCheck);
+                                            Value *PtrValue, Value *AlignValue,
+                                            Value *OffsetValue);
 
 public:
   /// Create an assume intrinsic call that represents an alignment
@@ -2517,13 +2519,9 @@ class IRBuilderBase {
   /// An optional offset can be provided, and if it is provided, the offset
   /// must be subtracted from the provided pointer to get the pointer with the
   /// specified alignment.
-  ///
-  /// It may be sometimes useful to do some other logic
-  /// based on this alignment check, thus it can be stored into 'TheCheck'.
   CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
                                       unsigned Alignment,
-                                      Value *OffsetValue = nullptr,
-                                      Value **TheCheck = nullptr);
+                                      Value *OffsetValue = nullptr);
 
   /// Create an assume intrinsic call that represents an alignment
   /// assumption on the provided pointer.
@@ -2532,15 +2530,11 @@ class IRBuilderBase {
   /// must be subtracted from the provided pointer to get the pointer with the
   /// specified alignment.
   ///
-  /// It may be sometimes useful to do some other logic
-  /// based on this alignment check, thus it can be stored into 'TheCheck'.
-  ///
   /// This overload handles the condition where the Alignment is dependent
   /// on an existing value rather than a static value.
   CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
                                       Value *Alignment,
-                                      Value *OffsetValue = nullptr,
-                                      Value **TheCheck = nullptr);
+                                      Value *OffsetValue = nullptr);
 };
 
 /// This provides a uniform API for creating instructions and inserting
diff --git a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index be119b8ab8552..10b6e1c6a21b6 100644
--- a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -37,9 +37,9 @@ struct AlignmentFromAssumptionsPass
   ScalarEvolution *SE = nullptr;
   DominatorTree *DT = nullptr;
 
-  bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
-                            const SCEV *&OffSCEV);
-  bool processAssumption(CallInst *I);
+  bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr,
+                            const SCEV *&AlignSCEV, const SCEV *&OffSCEV);
+  bool processAssumption(CallInst *I, unsigned Idx);
 };
 }
 
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index e9da1e607b45b..af81216f65264 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -96,10 +96,17 @@ llvm::getKnowledgeFromBundle(CallInst &Assume,
   Result.AttrKind = Attribute::getAttrKindFromName(BOI.Tag->getKey());
   if (bundleHasArgument(BOI, ABA_WasOn))
     Result.WasOn = getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn);
+  auto GetArgOr1 = [&](unsigned Idx) -> unsigned {
+    if (auto *ConstInt = dyn_cast<ConstantInt>(
+            getValueFromBundleOpInfo(Assume, BOI, ABA_Argument + Idx)))
+      return ConstInt->getZExtValue();
+    return 1;
+  };
   if (BOI.End - BOI.Begin > ABA_Argument)
-    Result.ArgValue =
-        cast<ConstantInt>(getValueFromBundleOpInfo(Assume, BOI, ABA_Argument))
-            ->getZExtValue();
+    Result.ArgValue = GetArgOr1(0);
+  if (Result.AttrKind == Attribute::Alignment)
+    if (BOI.End - BOI.Begin > ABA_Argument + 1)
+      Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1));
   return Result;
 }
 
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index 1fffce015f707..b87dfe1c8df65 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -71,8 +71,9 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
 static CallInst *createCallHelper(Function *Callee, ArrayRef<Value *> Ops,
                                   IRBuilderBase *Builder,
                                   const Twine &Name = "",
-                                  Instruction *FMFSource = nullptr) {
-  CallInst *CI = Builder->CreateCall(Callee, Ops, Name);
+                                  Instruction *FMFSource = nullptr,
+                                  ArrayRef<OperandBundleDef> OpBundles = {}) {
+  CallInst *CI = Builder->CreateCall(Callee, Ops, OpBundles, Name);
   if (FMFSource)
     CI->copyFastMathFlags(FMFSource);
   return CI;
@@ -449,14 +450,16 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
   return createCallHelper(TheFn, Ops, this);
 }
 
-CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
+CallInst *
+IRBuilderBase::CreateAssumption(Value *Cond,
+                                ArrayRef<OperandBundleDef> OpBundles) {
   assert(Cond->getType() == getInt1Ty() &&
          "an assumption condition must be of type i1");
 
   Value *Ops[] = { Cond };
   Module *M = BB->getParent()->getParent();
   Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
-  return createCallHelper(FnAssume, Ops, this);
+  return createCallHelper(FnAssume, Ops, this, "", nullptr, OpBundles);
 }
 
 /// Create a call to a Masked Load intrinsic.
@@ -1107,63 +1110,37 @@ Value *IRBuilderBase::CreatePreserveStructAccessIndex(
   return Fn;
 }
 
-CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper(
-    const DataLayout &DL, Value *PtrValue, Value *Mask, Type *IntPtrTy,
-    Value *OffsetValue, Value **TheCheck) {
-  Value *PtrIntValue = CreatePtrToInt(PtrValue, IntPtrTy, "ptrint");
-
-  if (OffsetValue) {
-    bool IsOffsetZero = false;
-    if (const auto *CI = dyn_cast<ConstantInt>(OffsetValue))
-      IsOffsetZero = CI->isZero();
-
-    if (!IsOffsetZero) {
-      if (OffsetValue->getType() != IntPtrTy)
-        OffsetValue = CreateIntCast(OffsetValue, IntPtrTy, /*isSigned*/ true,
-                                    "offsetcast");
-      PtrIntValue = CreateSub(PtrIntValue, OffsetValue, "offsetptr");
-    }
-  }
-
-  Value *Zero = ConstantInt::get(IntPtrTy, 0);
-  Value *MaskedPtr = CreateAnd(PtrIntValue, Mask, "maskedptr");
-  Value *InvCond = CreateICmpEQ(MaskedPtr, Zero, "maskcond");
-  if (TheCheck)
-    *TheCheck = InvCond;
-
-  return CreateAssumption(InvCond);
+CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper(const DataLayout &DL,
+                                                         Value *PtrValue,
+                                                         Value *AlignValue,
+                                                         Value *OffsetValue) {
+  SmallVector<Value *, 4> Vals({PtrValue, AlignValue});
+  if (OffsetValue)
+    Vals.push_back(OffsetValue);
+  OperandBundleDefT<Value *> AlignOpB("align", Vals);
+  return CreateAssumption(ConstantInt::getTrue(getContext()), {AlignOpB});
 }
 
-CallInst *IRBuilderBase::CreateAlignmentAssumption(
-    const DataLayout &DL, Value *PtrValue, unsigned Alignment,
-    Value *OffsetValue, Value **TheCheck) {
+CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL,
+                                                   Value *PtrValue,
+                                                   unsigned Alignment,
+                                                   Value *OffsetValue) {
   assert(isa<PointerType>(PtrValue->getType()) &&
          "trying to create an alignment assumption on a non-pointer?");
   assert(Alignment != 0 && "Invalid Alignment");
   auto *PtrTy = cast<PointerType>(PtrValue->getType());
   Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
-
-  Value *Mask = ConstantInt::get(IntPtrTy, Alignment - 1);
-  return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
-                                         OffsetValue, TheCheck);
+  Value *AlignValue = ConstantInt::get(IntPtrTy, Alignment);
+  return CreateAlignmentAssumptionHelper(DL, PtrValue, AlignValue, OffsetValue);
 }
 
-CallInst *IRBuilderBase::CreateAlignmentAssumption(
-    const DataLayout &DL, Value *PtrValue, Value *Alignment,
-    Value *OffsetValue, Value **TheCheck) {
+CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL,
+                                                   Value *PtrValue,
+                                                   Value *Alignment,
+                                                   Value *OffsetValue) {
   assert(isa<PointerType>(PtrValue->getType()) &&
          "trying to create an alignment assumption on a non-pointer?");
-  auto *PtrTy = cast<PointerType>(PtrValue->getType());
-  Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
-
-  if (Alignment->getType() != IntPtrTy)
-    Alignment = CreateIntCast(Alignment, IntPtrTy, /*isSigned*/ false,
-                              "alignmentcast");
-
-  Value *Mask = CreateSub(Alignment, ConstantInt::get(IntPtrTy, 1), "mask");
-
-  return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
-                                         OffsetValue, TheCheck);
+  return CreateAlignmentAssumptionHelper(DL, PtrValue, Alignment, OffsetValue);
 }
 
 IRBuilderDefaultInserter::~IRBuilderDefaultInserter() {}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 8fa87b7489013..3c8e73a03cc59 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4449,21 +4449,32 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
       Assert(Elem.Tag->getKey() == "ignore" ||
                  Attribute::isExistingAttribute(Elem.Tag->getKey()),
              "tags must be valid attribute names");
-      Assert(Elem.End - Elem.Begin <= 2, "to many arguments");
       Attribute::AttrKind Kind =
           Attribute::getAttrKindFromName(Elem.Tag->getKey());
+      unsigned ArgCount = Elem.End - Elem.Begin;
+      if (Kind == Attribute::Alignment) {
+        Assert(ArgCount <= 3 && ArgCount >= 2,
+               "alignment assumptions should have 2 or 3 arguments");
+        Assert(Call.getOperand(Elem.Begin)->getType()->isPointerTy(),
+               "first argument should be a pointer");
+        Assert(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(),
+               "second argument should be an integer");
+        if (ArgCount == 3)
+          Assert(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(),
+                 "third argument should be an integer if present");
+        return;
+      }
+      Assert(ArgCount <= 2, "to many arguments");
       if (Kind == Attribute::None)
         break;
       if (Attribute::doesAttrKindHaveArgument(Kind)) {
-        Assert(Elem.End - Elem.Begin == 2,
-               "this attribute should have 2 arguments");
+        Assert(ArgCount == 2, "this attribute should have 2 arguments");
         Assert(isa<ConstantInt>(Call.getOperand(Elem.Begin + 1)),
                "the second argument should be a constant integral value");
       } else if (isFuncOnlyAttr(Kind)) {
-        Assert((Elem.End - Elem.Begin) == 0, "this attribute has no argument");
+        Assert((ArgCount) == 0, "this attribute has no argument");
       } else if (!isFuncOrArgAttr(Kind)) {
-        Assert((Elem.End - Elem.Begin) == 1,
-               "this attribute should have one argument");
+        Assert((ArgCount) == 1, "this attribute should have one argument");
       }
     }
     break;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index 836af6234ad5c..c734c9a68fb2d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4220,11 +4220,16 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   case Intrinsic::assume: {
     Value *IIOperand = II->getArgOperand(0);
+    SmallVector<OperandBundleDef, 4> OpBundles;
+    II->getOperandBundlesAsDefs(OpBundles);
+    bool HasOpBundles = !OpBundles.empty();
     // Remove an assume if it is followed by an identical assume.
     // TODO: Do we need this? Unless there are conflicting assumptions, the
     // computeKnownBits(IIOperand) below here eliminates redundant assumes.
     Instruction *Next = II->getNextNonDebugInstruction();
-    if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
+    if (HasOpBundles &&
+        match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))) &&
+        !cast<IntrinsicInst>(Next)->hasOperandBundles())
       return eraseInstFromFunction(CI);
 
     // Canonicalize assume(a && b) -> assume(a); assume(b);
@@ -4234,14 +4239,15 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     Value *AssumeIntrinsic = II->getCalledOperand();
     Value *A, *B;
     if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
-      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName());
+      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
+                         II->getName());
       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
       return eraseInstFromFunction(*II);
     }
     // assume(!(a || b)) -> assume(!a); assume(!b);
     if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
-                         Builder.CreateNot(A), II->getName());
+                         Builder.CreateNot(A), OpBundles, II->getName());
       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
                          Builder.CreateNot(B), II->getName());
       return eraseInstFromFunction(*II);
@@ -4257,7 +4263,8 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         isValidAssumeForContext(II, LHS, &DT)) {
       MDNode *MD = MDNode::get(II->getContext(), None);
       LHS->setMetadata(LLVMContext::MD_nonnull, MD);
-      return eraseInstFromFunction(*II);
+      if (!HasOpBundles)
+        return eraseInstFromFunction(*II);
 
       // TODO: apply nonnull return attributes to calls and invokes
       // TODO: apply range metadata for range check patterns?
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index 5c008585869cd..bccf94fc217fe 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -15,6 +15,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/IR/Instructions.h"
 #include "llvm/InitializePasses.h"
 #define AA_NAME "alignment-from-assumptions"
 #define DEBUG_TYPE AA_NAME
@@ -203,103 +204,33 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
 }
 
 bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
+                                                        unsigned Idx,
                                                         Value *&AAPtr,
                                                         const SCEV *&AlignSCEV,
                                                         const SCEV *&OffSCEV) {
-  // An alignment assume must be a statement about the least-significant
-  // bits of the pointer being zero, possibly with some offset.
-  ICmpInst *ICI = dyn_cast<ICmpInst>(I->getArgOperand(0));
-  if (!ICI)
+  Type *Int64Ty = Type::getInt64Ty(I->getContext());
+  OperandBundleUse AlignOB = I->getOperandBundleAt(Idx);
+  if (AlignOB.getTagName() != "align")
     return false;
-
-  // This must be an expression of the form: x & m == 0.
-  if (ICI->getPredicate() != ICmpInst::ICMP_EQ)
-    return false;
-
-  // Swap things around so that the RHS is 0.
-  Value *CmpLHS = ICI->getOperand(0);
-  Value *CmpRHS = ICI->getOperand(1);
-  const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS);
-  const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS);
-  if (CmpLHSSCEV->isZero())
-    std::swap(CmpLHS, CmpRHS);
-  else if (!CmpRHSSCEV->isZero())
-    return false;
-
-  BinaryOperator *CmpBO = dyn_cast<BinaryOperator>(CmpLHS);
-  if (!CmpBO || CmpBO->getOpcode() != Instruction::And)
-    return false;
-
-  // Swap things around so that the right operand of the and is a constant
-  // (the mask); we cannot deal with variable masks.
-  Value *AndLHS = CmpBO->getOperand(0);
-  Value *AndRHS = CmpBO->getOperand(1);
-  const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS);
-  const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS);
-  if (isa<SCEVConstant>(AndLHSSCEV)) {
-    std::swap(AndLHS, AndRHS);
-    std::swap(AndLHSSCEV, AndRHSSCEV);
-  }
-
-  const SCEVConstant *MaskSCEV = dyn_cast<SCEVConstant>(AndRHSSCEV);
-  if (!MaskSCEV)
-    return false;
-
-  // The mask must have some trailing ones (otherwise the condition is
-  // trivial and tells us nothing about the alignment of the left operand).
-  unsigned TrailingOnes = MaskSCEV->getAPInt().countTrailingOnes();
-  if (!TrailingOnes)
-    return false;
-
-  // Cap the alignment at the maximum with which LLVM can deal (and make sure
-  // we don't overflow the shift).
-  uint64_t Alignment;
-  TrailingOnes = std::min(TrailingOnes,
-    unsigned(sizeof(unsigned) * CHAR_BIT - 1));
-  Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment);
-
-  Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext());
-  AlignSCEV = SE->getConstant(Int64Ty, Alignment);
-
-  // The LHS might be a ptrtoint instruction, or it might be the pointer
-  // with an offset.
-  AAPtr = nullptr;
-  OffSCEV = nullptr;
-  if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
-    AAPtr = PToI->getPointerOperand();
+  assert(AlignOB.Inputs.size() >= 2);
+  AAPtr = AlignOB.Inputs[0].get();
+  // TODO: Consider accumulating the offset to the base.
+  AAPtr = AAPtr->stripPointerCastsSameRepresentation();
+  AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
+  AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
+  if (AlignOB.Inputs.size() == 3)
+    OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
+  else
     OffSCEV = SE->getZero(Int64Ty);
-  } else if (const SCEVAddExpr* AndLHSAddSCEV =
-             dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
-    // Try to find the ptrtoint; subtract it and the rest is the offset.
-    for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(),
-         JE = AndLHSAddSCEV->op_end(); J != JE; ++J)
-      if (const SCEVUnknown *OpUnk = dyn_cast<SCEVUnknown>(*J))
-        if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(OpUnk->getValue())) {
-          AAPtr = PToI->getPointerOperand();
-          OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
-          break;
-        }
-  }
-
-  if (!AAPtr)
-    return false;
-
-  // Sign extend the offset to 64 bits (so that it is like all of the other
-  // expressions).
-  unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
-  if (OffSCEVBits < 64)
-    OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
-  else if (OffSCEVBits > 64)
-    return false;
-
-  AAPtr = AAPtr->stripPointerCasts();
+  OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty);
   return true;
 }
 
-bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
+bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
+                                                     unsigned Idx) {
   Value *AAPtr;
   const SCEV *AlignSCEV, *OffSCEV;
-  if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
+  if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV))
     return false;
 
   // Skip ConstantPointerNull and UndefValue.  Assumptions on these shouldn't
@@ -317,13 +248,14 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
       continue;
 
     if (Instruction *K = dyn_cast<Instruction>(J))
-      if (isValidAssumeForContext(ACall, K, DT))
         WorkList.push_back(K);
   }
 
   while (!WorkList.empty()) {
     Instruction *J = WorkList.pop_back_val();
     if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
+      if (!isValidAssumeForContext(ACall, J, DT))
+        continue;
       Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
                                            LI->getPointerOperand(), SE);
       if (NewAlignment > LI->getAlign()) {
@@ -331,6 +263,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
         ++NumLoadAlignChanged;
       }
     } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
+      if (!isValidAssumeForContext(ACall, J, DT))
+        continue;
       Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
                                            SI->getPointerOperand(), SE);
       if (NewAlignment > SI->getAlign()) {
@@ -338,6 +272,8 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
         ++NumStoreAlignChanged;
       }
     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
+      if (!isValidAssumeForContext(ACall, J, DT))
+        continue;
       Align NewDestAlignment =
           getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MI->getDest(), SE);
 
@@ -369,7 +305,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
     Visited.insert(J);
     for (User *UJ : J->users()) {
       Instruction *K = cast<Instruction>(UJ);
-      if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT))
+      if (!Visited.count(K))
         WorkList.push_back(K);
     }
   }
@@ -396,8 +332,11 @@ bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
 
   bool Changed = false;
   for (auto &AssumeVH : AC.assumptions())
-    if (AssumeVH)
-      Changed |= processAssumption(cast<CallInst>(AssumeVH));
+    if (AssumeVH) {
+      CallInst *Call = cast<CallInst>(AssumeVH);
+      for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++)
+        Changed |= processAssumption(Call, Idx);
+    }
 
   return Changed;
 }
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
index 14e764f042c7a..610fd448c3b98 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
@@ -4,10 +4,7 @@ target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
 
 define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32)]
   %0 = load i32, i32* %a, align 4
   ret i32 %0
 
@@ -18,11 +15,7 @@ entry:
 
 define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %offsetptr = add i64 %ptrint, 24
-  %maskedptr = and i64 %offsetptr, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 24)]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 2
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
@@ -34,11 +27,7 @@ entry:
 
 define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %offsetptr = add i64 %ptrint, 28
-  %maskedptr = and i64 %offsetptr, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 28)]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
@@ -50,10 +39,7 @@ entry:
 
 define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 0)]
   %0 = load i32, i32* %a, align 4
   ret i32 %0
 
@@ -64,10 +50,7 @@ entry:
 
 define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i32 0)]
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -98,10 +81,7 @@ for.end:                                          ; preds = %for.body
 ;         load(a, i0+i1+i2+32)
 define void @hoo2(i32* nocapture %a, i64 %id, i64 %num) nounwind uwtable readonly {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i64 0)]
   %id.mul = shl nsw i64 %id, 6
   %num.mul = shl nsw i64 %num, 6
   br label %for0.body
@@ -147,10 +127,7 @@ return:
 
 define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i8 0)]
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -175,16 +152,13 @@ for.end:                                          ; preds = %for.body
 
 define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i8 0)]
   %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.06
   %indvars.iv.next = add i64 %indvars.iv, 4
@@ -203,10 +177,7 @@ for.end:                                          ; preds = %for.body
 
 define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i128 32, i128 0)]
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -231,10 +202,7 @@ for.end:                                          ; preds = %for.body
 
 define i32 @moo(i32* nocapture %a) nounwind uwtable {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i16 32)]
   %0 = bitcast i32* %a to i8*
   tail call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 64, i1 false)
   ret i32 undef
@@ -246,15 +214,9 @@ entry:
 
 define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
-  %ptrint1 = ptrtoint i32* %b to i64
-  %maskedptr3 = and i64 %ptrint1, 127
-  %maskcond4 = icmp eq i64 %maskedptr3, 0
-  tail call void @llvm.assume(i1 %maskcond4)
+  tail call void @llvm.assume(i1 true) ["align"(i32* %b, i32 128)]
   %0 = bitcast i32* %a to i8*
+  tail call void @llvm.assume(i1 true) ["align"(i8* %0, i16 32)]
   %1 = bitcast i32* %b to i8*
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false)
   ret i32 undef
@@ -264,6 +226,19 @@ entry:
 ; CHECK: ret i32 undef
 }
 
+define i32 @moo3(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
+entry:
+  %0 = bitcast i32* %a to i8*
+  tail call void @llvm.assume(i1 true) ["align"(i8* %0, i16 32), "align"(i32* %b, i32 128)]
+  %1 = bitcast i32* %b to i8*
+  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false)
+  ret i32 undef
+
+; CHECK-LABEL: @moo3
+; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 128 %1, i64 64, i1 false)
+; CHECK: ret i32 undef
+}
+
 declare void @llvm.assume(i1) nounwind
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
index 3f0819e3641b3..453899c15c4fb 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
@@ -7,18 +7,12 @@ define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@foo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 32
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
   %0 = load i32, i32* %a, align 4
   ret i32 %0
 
@@ -28,21 +22,13 @@ define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@foo2
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[OFFSETPTR:%.*]] = add i64 [[PTRINT]], 24
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32, i64 24) ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %offsetptr = add i64 %ptrint, 24
-  %maskedptr = and i64 %offsetptr, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i64 24)]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 2
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
@@ -53,21 +39,13 @@ define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@foo2a
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[OFFSETPTR:%.*]] = add i64 [[PTRINT]], 28
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32, i64 28) ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 -1
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 32
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %offsetptr = add i64 %ptrint, 28
-  %maskedptr = and i64 %offsetptr, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i64 28)]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
@@ -78,18 +56,12 @@ define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@goo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 32
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
   %0 = load i32, i32* %a, align 4
   ret i32 %0
 
@@ -99,10 +71,7 @@ define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@hoo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -119,10 +88,7 @@ define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-NEXT:    ret i32 [[ADD_LCSSA]]
 ;
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -146,10 +112,7 @@ define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@joo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 4, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -166,10 +129,7 @@ define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-NEXT:    ret i32 [[ADD_LCSSA]]
 ;
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -193,10 +153,7 @@ define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@koo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -213,10 +170,7 @@ define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-NEXT:    ret i32 [[ADD_LCSSA]]
 ;
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -240,10 +194,7 @@ define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@koo2
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ -4, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -260,10 +211,7 @@ define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-NEXT:    ret i32 [[ADD_LCSSA]]
 ;
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -287,19 +235,13 @@ define i32 @moo(i32* nocapture %a) nounwind uwtable {
 ; CHECK-LABEL: define {{[^@]+}}@moo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #1
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to i8*
 ; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP0]], i8 0, i64 64, i1 false)
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
   %0 = bitcast i32* %a to i8*
   tail call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 64, i1 false)
   ret i32 undef
@@ -310,28 +252,16 @@ define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
 ; CHECK-LABEL: define {{[^@]+}}@moo2
 ; CHECK-SAME: (i32* nocapture [[A:%.*]], i32* nocapture [[B:%.*]]) #1
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
-; CHECK-NEXT:    [[PTRINT1:%.*]] = ptrtoint i32* [[B]] to i64
-; CHECK-NEXT:    [[MASKEDPTR3:%.*]] = and i64 [[PTRINT1]], 127
-; CHECK-NEXT:    [[MASKCOND4:%.*]] = icmp eq i64 [[MASKEDPTR3]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND4]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[B]], i64 128) ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to i8*
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 [[TMP0]], i8* align 128 [[TMP1]], i64 64, i1 false)
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  %ptrint = ptrtoint i32* %a to i64
-  %maskedptr = and i64 %ptrint, 31
-  %maskcond = icmp eq i64 %maskedptr, 0
-  tail call void @llvm.assume(i1 %maskcond)
-  %ptrint1 = ptrtoint i32* %b to i64
-  %maskedptr3 = and i64 %ptrint1, 127
-  %maskcond4 = icmp eq i64 %maskedptr3, 0
-  tail call void @llvm.assume(i1 %maskcond4)
+  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
+  call void @llvm.assume(i1 true) ["align"(i32* %b, i64 128)]
   %0 = bitcast i32* %a to i8*
   %1 = bitcast i32* %b to i8*
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false)
diff --git a/llvm/test/Transforms/Inline/align.ll b/llvm/test/Transforms/Inline/align.ll
index ede6c3fa7bcf4..f3a5184564850 100644
--- a/llvm/test/Transforms/Inline/align.ll
+++ b/llvm/test/Transforms/Inline/align.ll
@@ -23,10 +23,7 @@ define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@foo
 ; CHECK-SAME: (float* nocapture [[A:%.*]], float* nocapture readonly [[C:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint float* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 127
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[A]], i64 128) ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[C]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5
 ; CHECK-NEXT:    store float [[TMP0]], float* [[ARRAYIDX_I]], align 4
@@ -87,14 +84,8 @@ define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture rea
 ; CHECK-LABEL: define {{[^@]+}}@foo2
 ; CHECK-SAME: (float* nocapture [[A:%.*]], float* nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint float* [[A]] to i64
-; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 127
-; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
-; CHECK-NEXT:    [[PTRINT1:%.*]] = ptrtoint float* [[B]] to i64
-; CHECK-NEXT:    [[MASKEDPTR2:%.*]] = and i64 [[PTRINT1]], 127
-; CHECK-NEXT:    [[MASKCOND3:%.*]] = icmp eq i64 [[MASKEDPTR2]], 0
-; CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND3]])
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[A]], i64 128) ]
+; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[B]], i64 128) ]
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[C]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5
 ; CHECK-NEXT:    store float [[TMP0]], float* [[ARRAYIDX_I]], align 4
diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll
index 6f33e83ee3362..b372f52a2cdf0 100644
--- a/llvm/test/Transforms/InstCombine/assume.ll
+++ b/llvm/test/Transforms/InstCombine/assume.ll
@@ -377,6 +377,7 @@ define i32 @assumption_conflicts_with_known_bits(i32 %a, i32 %b) {
 define void @debug_interference(i8 %x) {
 ; CHECK-LABEL: @debug_interference(
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i8 [[X:%.*]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
 ; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
 ; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9
diff --git a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll
index 61287e35005ff..2605701d231d2 100644
--- a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll
@@ -41,10 +41,7 @@ define void @caller1(i1 %c, i64* align 1 %ptr) {
 ; ASSUMPTIONS-ON-NEXT:    br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]]
 ; ASSUMPTIONS-ON:       false1:
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 1, i64* [[PTR:%.*]], align 8
-; ASSUMPTIONS-ON-NEXT:    [[PTRINT:%.*]] = ptrtoint i64* [[PTR]] to i64
-; ASSUMPTIONS-ON-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7
-; ASSUMPTIONS-ON-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; ASSUMPTIONS-ON-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; ASSUMPTIONS-ON-NEXT:    call void @llvm.assume(i1 true) [ "align"(i64* [[PTR]], i64 8) ]
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 0, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
@@ -54,10 +51,7 @@ define void @caller1(i1 %c, i64* align 1 %ptr) {
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 3, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    ret void
 ; ASSUMPTIONS-ON:       true2.critedge:
-; ASSUMPTIONS-ON-NEXT:    [[PTRINT_C:%.*]] = ptrtoint i64* [[PTR]] to i64
-; ASSUMPTIONS-ON-NEXT:    [[MASKEDPTR_C:%.*]] = and i64 [[PTRINT_C]], 7
-; ASSUMPTIONS-ON-NEXT:    [[MASKCOND_C:%.*]] = icmp eq i64 [[MASKEDPTR_C]], 0
-; ASSUMPTIONS-ON-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND_C]])
+; ASSUMPTIONS-ON-NEXT:    call void @llvm.assume(i1 true) [ "align"(i64* [[PTR]], i64 8) ]
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 0, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
@@ -94,26 +88,17 @@ false2:
 ; This test checks that alignment assumptions do not prevent SROA.
 ; See PR45763.
 
-define internal void @callee2(i64* noalias sret align 8 %arg) {
+define internal void @callee2(i64* noalias sret align 32 %arg) {
   store i64 0, i64* %arg, align 8
   ret void
 }
 
 define amdgpu_kernel void @caller2() {
-; ASSUMPTIONS-OFF-LABEL: @caller2(
-; ASSUMPTIONS-OFF-NEXT:    ret void
-;
-; ASSUMPTIONS-ON-LABEL: @caller2(
-; ASSUMPTIONS-ON-NEXT:    [[ALLOCA:%.*]] = alloca i64, align 8, addrspace(5)
-; ASSUMPTIONS-ON-NEXT:    [[CAST:%.*]] = addrspacecast i64 addrspace(5)* [[ALLOCA]] to i64*
-; ASSUMPTIONS-ON-NEXT:    [[PTRINT:%.*]] = ptrtoint i64* [[CAST]] to i64
-; ASSUMPTIONS-ON-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7
-; ASSUMPTIONS-ON-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
-; ASSUMPTIONS-ON-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
-; ASSUMPTIONS-ON-NEXT:    ret void
+; CHECK-LABEL: @caller2(
+; CHECK-NEXT:    ret void
 ;
   %alloca = alloca i64, align 8, addrspace(5)
   %cast = addrspacecast i64 addrspace(5)* %alloca to i64*
-  call void @callee2(i64* sret align 8 %cast)
+  call void @callee2(i64* sret align 32 %cast)
   ret void
 }
diff --git a/llvm/test/Verifier/assume-bundles.ll b/llvm/test/Verifier/assume-bundles.ll
index 302421715c797..6e260f25129ee 100644
--- a/llvm/test/Verifier/assume-bundles.ll
+++ b/llvm/test/Verifier/assume-bundles.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: not opt -verify < %s 2>&1 | FileCheck %s
 
 declare void @llvm.assume(i1)
@@ -6,14 +7,21 @@ define void @func(i32* %P, i32 %P1, i32* %P2, i32* %P3) {
 ; CHECK: tags must be valid attribute names
   call void @llvm.assume(i1 true) ["adazdazd"()]
 ; CHECK: the second argument should be a constant integral value
-  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1)]
+  call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P, i32 %P1)]
 ; CHECK: to many arguments
-  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 8, i32 8)]
+  call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P, i32 8, i32 8)]
 ; CHECK: this attribute should have 2 arguments
-  call void @llvm.assume(i1 true) ["align"(i32* %P)]
+  call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P)]
 ; CHECK: this attribute has no argument
-  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 4), "cold"(i32* %P)]
+  call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P, i32 4), "cold"(i32* %P)]
 ; CHECK: this attribute should have one argument
   call void @llvm.assume(i1 true) ["noalias"()]
+  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1, i32 4)]
+; CHECK: alignment assumptions should have 2 or 3 arguments
+  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1, i32 4, i32 4)]
+; CHECK: second argument should be an integer
+  call void @llvm.assume(i1 true) ["align"(i32* %P, i32* %P2)]
+; CHECK: third argument should be an integer if present
+  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1, i32* %P2)]
   ret void
 }
diff --git a/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp b/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp
index d35a77fa379be..946368e1cb947 100644
--- a/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp
+++ b/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp
@@ -546,3 +546,41 @@ TEST(AssumeQueryAPI, AssumptionCache) {
   ASSERT_EQ(AR[0].Index, 1u);
   ASSERT_EQ(AR[0].Assume, &*First);
 }
+
+TEST(AssumeQueryAPI, Alignment) {
+  LLVMContext C;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> Mod = parseAssemblyString(
+      "declare void @llvm.assume(i1)\n"
+      "define void @test(i32* %P, i32* %P1, i32* %P2, i32 %I3, i1 %B) {\n"
+      "call void @llvm.assume(i1 true) [\"align\"(i32* %P, i32 8, i32 %I3)]\n"
+      "call void @llvm.assume(i1 true) [\"align\"(i32* %P1, i32 %I3, i32 "
+      "%I3)]\n"
+      "call void @llvm.assume(i1 true) [\"align\"(i32* %P2, i32 16, i32 8)]\n"
+      "ret void\n}\n",
+      Err, C);
+  if (!Mod)
+    Err.print("AssumeQueryAPI", errs());
+
+  Function *F = Mod->getFunction("test");
+  BasicBlock::iterator Start = F->begin()->begin();
+  IntrinsicInst *II;
+  RetainedKnowledge RK;
+  II = cast<IntrinsicInst>(&*Start);
+  RK = getKnowledgeFromBundle(*II, II->bundle_op_info_begin()[0]);
+  ASSERT_EQ(RK.AttrKind, Attribute::Alignment);
+  ASSERT_EQ(RK.WasOn, F->getArg(0));
+  ASSERT_EQ(RK.ArgValue, 1u);
+  Start++;
+  II = cast<IntrinsicInst>(&*Start);
+  RK = getKnowledgeFromBundle(*II, II->bundle_op_info_begin()[0]);
+  ASSERT_EQ(RK.AttrKind, Attribute::Alignment);
+  ASSERT_EQ(RK.WasOn, F->getArg(1));
+  ASSERT_EQ(RK.ArgValue, 1u);
+  Start++;
+  II = cast<IntrinsicInst>(&*Start);
+  RK = getKnowledgeFromBundle(*II, II->bundle_op_info_begin()[0]);
+  ASSERT_EQ(RK.AttrKind, Attribute::Alignment);
+  ASSERT_EQ(RK.WasOn, F->getArg(2));
+  ASSERT_EQ(RK.ArgValue, 8u);
+}

From c9b625d09aeccd70bc564dfd4f00002d0156bcc2 Mon Sep 17 00:00:00 2001
From: Gui Andrade <guiand@google.com>
Date: Mon, 13 Jul 2020 23:17:58 +0000
Subject: [PATCH 171/771] [Sanitizers] Ensure clock_getcpuclockid interceptor
 <= 80 chars

---
 .../lib/sanitizer_common/sanitizer_common_interceptors.inc     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index 4b02ad2670fef..d7e0bba762941 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -2200,7 +2200,8 @@ INTERCEPTOR(int, clock_settime, u32 clk_id, const void *tp) {
 #endif
 
 #if SANITIZER_INTERCEPT_CLOCK_GETCPUCLOCKID
-INTERCEPTOR(int, clock_getcpuclockid, pid_t pid, __sanitizer_clockid_t *clockid) {
+INTERCEPTOR(int, clock_getcpuclockid, pid_t pid,
+            __sanitizer_clockid_t *clockid) {
   void *ctx;
   COMMON_INTERCEPTOR_ENTER(ctx, clock_getcpuclockid, pid, clockid);
   int res = REAL(clock_getcpuclockid)(pid, clockid);

From 66550c36f43b32a5be6acdc88a346ca8ac5af368 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Mon, 13 Jul 2020 16:10:45 -0700
Subject: [PATCH 172/771] [ORC] Fix typo in parameter name.

---
 llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
index 9f2cdf0292a73..5061c15cf4c96 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/OrcABISupport.h
@@ -70,7 +70,7 @@ class OrcGenericABI {
   }
 
   static void writeTrampolines(char *TrampolineBlockWorkingMem,
-                               JITTargetAddress TrampolineBlockTragetAddr,
+                               JITTargetAddress TrampolineBlockTargetAddr,
                                JITTargetAddress ResolverAddr,
                                unsigned NumTrampolines) {
     llvm_unreachable("writeTrampolines is not supported by the generic host "

From caf395ee8c28028d5af0f1455cd5ef134432124c Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Mon, 13 Jul 2020 14:12:32 -0700
Subject: [PATCH 173/771] Reapply "[llvm] Native size estimator for training
 -Oz inliner"

This reverts commit 9908a3b9f521c954cbf6adcec35b14b2f6c8da49.

The fix was to exclude the content of TFUtils.h (automatically
included in the LLVM_Analysis module, when LLVM_ENABLE_MODULES is enabled).

Differential Revision: https://reviews.llvm.org/D82817
---
 llvm/CMakeLists.txt                           |    12 +
 .../Analysis/InlineSizeEstimatorAnalysis.h    |    35 +
 llvm/include/llvm/Analysis/Utils/TFUtils.h    |   138 +
 llvm/lib/Analysis/CMakeLists.txt              |    40 +-
 .../Analysis/InlineSizeEstimatorAnalysis.cpp  |   299 +
 llvm/lib/Analysis/TFUtils.cpp                 |   143 +
 llvm/lib/Passes/PassBuilder.cpp               |     1 +
 llvm/lib/Passes/PassRegistry.def              |     1 +
 llvm/unittests/Analysis/CMakeLists.txt        |    12 +-
 .../InlineSizeEstimatorAnalysisTest.cpp       |   101 +
 .../ir2native_x86_64_model/saved_model.pbtxt  | 10596 ++++++++++++++++
 .../variables/variables.data-00000-of-00001   |   Bin 0 -> 88424 bytes
 .../variables/variables.index                 |   Bin 0 -> 398 bytes
 llvm/unittests/Analysis/TFUtilsTest.cpp       |    98 +
 14 files changed, 11466 insertions(+), 10 deletions(-)
 create mode 100644 llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
 create mode 100644 llvm/include/llvm/Analysis/Utils/TFUtils.h
 create mode 100644 llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
 create mode 100644 llvm/lib/Analysis/TFUtils.cpp
 create mode 100644 llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
 create mode 100644 llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
 create mode 100644 llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001
 create mode 100644 llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index
 create mode 100644 llvm/unittests/Analysis/TFUtilsTest.cpp

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index de2887b64c2a9..4e14e61fcacd6 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -981,6 +981,18 @@ if (NOT TENSORFLOW_AOT_PATH STREQUAL "")
     ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/tf_runtime)
 endif()
 
+set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install")
+find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib)
+
+# Similar to the above Tensorflow dependency, please refer to the same script.
+# In this case, the latest C API library is available for download from
+# https://www.tensorflow.org/install/lang_c
+if (tensorflow_c_api)
+  set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available")
+  add_definitions("-DLLVM_HAVE_TF_API")
+  include_directories(${TENSORFLOW_C_LIB_PATH}/include)
+endif()
+
 # Put this before tblgen. Else we have a circular dependence.
 add_subdirectory(lib/Demangle)
 add_subdirectory(lib/Support)
diff --git a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
new file mode 100644
index 0000000000000..29a6f59146748
--- /dev/null
+++ b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
@@ -0,0 +1,35 @@
+//===- InlineSizeEstimatorAnalysis.h - ML size estimator --------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+
+#ifndef LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
+#define LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+class Function;
+
+class TFModelEvaluator;
+class InlineSizeEstimatorAnalysis
+    : public AnalysisInfoMixin<InlineSizeEstimatorAnalysis> {
+public:
+  InlineSizeEstimatorAnalysis();
+  InlineSizeEstimatorAnalysis(InlineSizeEstimatorAnalysis &&);
+  ~InlineSizeEstimatorAnalysis();
+
+  static AnalysisKey Key;
+  using Result = Optional<size_t>;
+  Result run(const Function &F, FunctionAnalysisManager &FAM);
+  static bool isEvaluatorRequested();
+
+private:
+  std::unique_ptr<TFModelEvaluator> Evaluator;
+};
+} // namespace llvm
+#endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
\ No newline at end of file
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
new file mode 100644
index 0000000000000..b7de199753a6f
--- /dev/null
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -0,0 +1,138 @@
+//===- TFUtils.h - utilities for tensorflow C API ---------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H
+#define LLVM_ANALYSIS_UTILS_TFUTILS_H
+
+#ifdef LLVM_HAVE_TF_API
+#include "tensorflow/c/c_api.h"
+#include "llvm/IR/LLVMContext.h"
+
+#include <memory>
+#include <vector>
+
+namespace llvm {
+
+/// Load a SavedModel, find the given inputs and outputs, and setup storage
+/// for input tensors. The user is responsible for correctly dimensioning the
+/// input tensors and setting their values before calling evaluate().
+/// To initialize:
+/// - construct the object
+/// - initialize the input tensors using initInput. Indices must correspond to
+///   indices in the InputNames used at construction.
+/// To use:
+/// - set input values by using getInput to get each input tensor, and then
+///   setting internal scalars, for all dimensions (tensors are row-major:
+///   https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/c/c_api.h#L205)
+/// - prepare an output vector of TF_Output* type, with the correct number of
+/// outputs (i.e. same as OutputNames). Initialize the vector with nullptr
+/// values.
+/// - call evaluate. The input tensors' values are not consumed after this, and
+///   may still be read.
+/// - use the outputs in the output vector
+/// - deallocate each output tensor in the output vector, using TF_DeleteTensor.
+class TFModelEvaluator final {
+public:
+  /// The result of a model evaluation. Handles the lifetime of the output
+  /// TF_Tensor objects, which means that their values need to be used before
+  /// the EvaluationResult's dtor is called.
+  class EvaluationResult {
+  public:
+    ~EvaluationResult() {
+      for (auto *P : Output)
+        if (P)
+          TF_DeleteTensor(P);
+    }
+
+    EvaluationResult(const EvaluationResult &) = delete;
+    EvaluationResult(EvaluationResult &&Other)
+        : OutputSize(Other.OutputSize), Output(std::move(Other.Output)) {
+      Other.Output.clear();
+    };
+
+    /// Get a pointer to the first element of the tensor at Index.
+    template <typename T> T *getTensorValue(size_t Index) {
+      return static_cast<T *>(TF_TensorData(Output[Index]));
+    }
+
+  private:
+    friend class TFModelEvaluator;
+    EvaluationResult(size_t OutputSize)
+        : OutputSize(OutputSize), Output(OutputSize){};
+
+    const size_t OutputSize;
+    std::vector<TF_Tensor *> Output;
+  };
+
+  using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
+  using TFSessionOptionsPtr =
+      std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
+  using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
+
+  TFModelEvaluator(StringRef SavedModelPath,
+                   const std::vector<std::string> &InputNames,
+                   const std::vector<std::string> &OutputNames,
+                   const char *Tags = "serve");
+  ~TFModelEvaluator();
+  TFModelEvaluator(const TFModelEvaluator &) = delete;
+  TFModelEvaluator(TFModelEvaluator &&) = delete;
+
+  /// Evaluate the model, assuming it is valid. Returns None if the evaluation
+  /// fails or the model is invalid, or an EvaluationResult otherwise. The
+  /// inputs are assumed to have been already provided via getInput(). When
+  /// returning None, it also marks the object invalid. Pass an Output vector
+  /// with the same size as OutputNames, but with nullptr values. evaluate()
+  /// will populate it with tensors, matching in index the corresponding
+  /// OutputNames. The caller is responsible for the deallocation of those
+  /// tensors, using TF_DeleteTensor.
+  Optional<EvaluationResult> evaluate();
+
+  /// Provides access to the input vector. It is already dimensioned correctly,
+  /// but the values need to be allocated by the user.
+  std::vector<TF_Tensor *> &getInput() { return Input; }
+
+  /// Returns true if the tensorflow model was loaded successfully, false
+  /// otherwise.
+  bool isValid() const { return !!Session; }
+
+  /// Initialize the input at Index as a tensor of the given type and dimensions
+  void initInput(int Index, TF_DataType Type,
+                 const std::vector<int64_t> &Dimensions);
+
+private:
+  /// The objects necessary for carrying out an evaluation of the SavedModel.
+  /// They are expensive to set up, and we maintain them accross all the
+  /// evaluations of the model.
+  TF_Session *Session = nullptr;
+  TFGraphPtr Graph;
+  TFSessionOptionsPtr Options;
+
+  /// The specification of the input nodes.
+  std::vector<TF_Output> InputFeed;
+
+  /// The input tensors. They must match by index of the corresponding InputFeed
+  /// value. We set up the tensors once and just mutate theirs scalars before
+  /// each evaluation. The input tensors keep their value after an evaluation.
+  std::vector<TF_Tensor *> Input;
+
+  /// The specification of the output nodes. When evaluating, the tensors in the
+  /// output tensor vector must match by index the corresponding element in the
+  /// OutputFeed.
+  std::vector<TF_Output> OutputFeed;
+
+  /// Reusable utility for deleting the session.
+  void deleteSession();
+
+  /// Reusable utility for ensuring we can bind the requested Name to a node in
+  /// the SavedModel Graph.
+  bool checkReportAndReset(const TF_Output &Output, StringRef Name);
+};
+} // namespace llvm
+
+#endif // LLVM_HAVE_TF_API
+#endif // LLVM_ANALYSIS_UTILS_TFUTILS_H
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index a317579ecc836..703623396d96a 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -1,17 +1,35 @@
 set(CommonMLSources MLInlineAdvisor.cpp)
 set(ReleaseModeMLSources ReleaseModeModelRunner.cpp)
+set(DevelopmentModeMLSources TFUtils.cpp)
 
-if (DEFINED LLVM_HAVE_TF_AOT)
-  include(TensorFlowCompile)
-  tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
-  list(APPEND ReleaseModeMLSources
-    $<TARGET_OBJECTS:tf_xla_runtime_objects>
-    ${GENERATED_OBJS}
-  )
-  set(MLPolicySources ${CommonMLSources} ${ReleaseModeMLSources})
+if (DEFINED LLVM_HAVE_TF_AOT OR DEFINED LLVM_HAVE_TF_API)
+  set(MLPolicySources ${CommonMLSources})
+  if (DEFINED LLVM_HAVE_TF_AOT)
+    include(TensorFlowCompile)
+    tfcompile(models/inliner serve action InlinerSizeModel llvm::InlinerSizeModel)
+    list(APPEND ReleaseModeMLSources
+      $<TARGET_OBJECTS:tf_xla_runtime_objects>
+      ${GENERATED_OBJS}
+    )
+    LIST(APPEND MLPolicySources ${ReleaseModeMLSources})
+  else()
+    LIST(APPEND LLVM_OPTIONAL_SOURCES ${ReleaseModeMLSources})
+  endif()
+
+  if (DEFINED LLVM_HAVE_TF_API)
+    LIST(APPEND MLPolicySources ${DevelopmentModeMLSources})
+    LIST(APPEND MLLinkDeps ${tensorflow_c_api})
+  else()
+    LIST(APPEND LLVM_OPTIONAL_SOURCES ${DevelopmentModeMLSources})
+  endif()
 else()
-  set(LLVM_OPTIONAL_SOURCES ${CommonMLSources} ${ReleaseModeMLSources})
+  LIST(APPEND LLVM_OPTIONAL_SOURCES 
+    ${CommonMLSources}
+    ${DevelopmentModeMLSources}
+    ${ReleaseModeMLSources}
+    )
 endif()
+  
 
 add_llvm_component_library(LLVMAnalysis
   AliasAnalysis.cpp
@@ -57,6 +75,7 @@ add_llvm_component_library(LLVMAnalysis
   InlineCost.cpp
   InlineAdvisor.cpp
   InlineFeaturesAnalysis.cpp
+  InlineSizeEstimatorAnalysis.cpp
   InstCount.cpp
   InstructionPrecedenceTracking.cpp
   InstructionSimplify.cpp
@@ -124,4 +143,7 @@ add_llvm_component_library(LLVMAnalysis
 
   DEPENDS
   intrinsics_gen
+
+  LINK_LIBS
+  ${MLLinkDeps}
   )
diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
new file mode 100644
index 0000000000000..1d1952ae6cbbe
--- /dev/null
+++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@@ -0,0 +1,299 @@
+//===- InlineSizeEstimatorAnalysis.cpp - IR to native size from ML model --===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This implements feature and label extraction for offline supervised learning
+// of a IR to native size model.
+//
+//===----------------------------------------------------------------------===//
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
+
+#ifdef LLVM_HAVE_TF_API
+#include "llvm/Analysis/Utils/TFUtils.h"
+#endif
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/IR/BasicBlock.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Function.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/MC/MCAsmLayout.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include <algorithm>
+#include <deque>
+
+using namespace llvm;
+
+AnalysisKey InlineSizeEstimatorAnalysis::Key;
+
+#define DEBUG_TYPE "inline-size-estimator"
+
+#ifdef LLVM_HAVE_TF_API
+cl::opt<std::string> TFIR2NativeModelPath(
+    "ml-inliner-ir2native-model", cl::Hidden,
+    cl::desc("Path to saved model evaluating native size from IR."));
+
+namespace {
+unsigned getMaxInstructionID() {
+#define LAST_OTHER_INST(NR) return NR;
+#include "llvm/IR/Instruction.def"
+}
+
+class IRToNativeSizeLearning {
+public:
+  enum class NamedFeatureIndex : size_t {
+    InitialSize,
+    Blocks,
+    Calls,
+    IsLocal,
+    IsLinkOnceODR,
+    IsLinkOnce,
+    Loops,
+    MaxLoopDepth,
+    MaxDomTreeLevel,
+
+    NumNamedFeatures
+  };
+  static const size_t NumNamedFeatures =
+      static_cast<size_t>(NamedFeatureIndex::NumNamedFeatures);
+  struct FunctionFeatures {
+    static std::vector<std::pair<size_t, size_t>>
+        ImportantInstructionSuccessions;
+    static const size_t FeatureCount;
+
+    std::array<int32_t, NumNamedFeatures> NamedFeatures = {0};
+    std::vector<int32_t> InstructionHistogram;
+    std::vector<int32_t> InstructionPairHistogram;
+
+    void fillTensor(int32_t *Ptr) const;
+    int32_t &operator[](NamedFeatureIndex Pos) {
+      return NamedFeatures[static_cast<size_t>(Pos)];
+    }
+  };
+  IRToNativeSizeLearning() = default;
+
+  static FunctionFeatures getFunctionFeatures(Function &F,
+                                              FunctionAnalysisManager &FAM);
+
+private:
+  /// Sort once the feature tuples.
+  struct SortFeatureTuples {
+    bool IsSorted = false;
+    SortFeatureTuples() {
+      std::sort(FunctionFeatures::ImportantInstructionSuccessions.begin(),
+                FunctionFeatures::ImportantInstructionSuccessions.end());
+      IsSorted = true;
+    }
+  };
+
+  static llvm::ManagedStatic<SortFeatureTuples> TupleSorter;
+
+  static bool ensureSortedTuples() { return TupleSorter->IsSorted; }
+};
+llvm::ManagedStatic<IRToNativeSizeLearning::SortFeatureTuples>
+    IRToNativeSizeLearning::TupleSorter;
+
+// This is a point in time - we determined including these pairs of
+// consecutive instructions (in the IR layout available at inline time) as
+// features improves the model performance. We want to move away from manual
+// feature selection.
+// The vector is given in opcode pairs rather than labels because 1) labels
+// weren't readily available, and 2) the successions were hand - extracted
+std::vector<std::pair<size_t, size_t>>
+    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions =
+        {{1, 34},  {15, 27}, {53, 53}, {53, 34}, {1, 11},  {32, 2},  {2, 48},
+         {28, 48}, {1, 45},  {49, 32}, {57, 56}, {55, 53}, {1, 28},  {57, 34},
+         {1, 1},   {32, 28}, {32, 15}, {49, 28}, {53, 1},  {2, 53},  {48, 34},
+         {28, 53}, {2, 32},  {1, 40},  {32, 48}, {29, 56}, {56, 32}, {55, 56},
+         {48, 56}, {1, 31},  {33, 34}, {2, 28},  {1, 12},  {55, 1},  {31, 31},
+         {65, 1},  {33, 56}, {32, 32}, {13, 13}, {1, 26},  {13, 26}, {2, 1},
+         {1, 33},  {47, 49}, {64, 1},  {2, 38},  {34, 53}, {48, 2},  {55, 34},
+         {34, 32}, {1, 5},   {56, 13}, {2, 2},   {2, 49},  {33, 2},  {49, 39},
+         {56, 49}, {33, 49}, {32, 39}, {39, 57}, {29, 33}, {31, 34}, {32, 29},
+         {47, 15}, {13, 34}, {2, 33},  {32, 49}, {49, 34}, {56, 33}, {1, 30},
+         {33, 33}, {31, 33}, {2, 29},  {56, 7},  {32, 13}, {2, 55},  {56, 56},
+         {2, 34},  {1, 42},  {34, 49}, {1, 20},  {32, 33}, {1, 25},  {53, 28},
+         {1, 14},  {31, 49}, {28, 2},  {2, 13},  {2, 56},  {1, 32},  {56, 53},
+         {65, 65}, {33, 53}, {64, 64}, {13, 2},  {34, 33}, {1, 4},   {49, 2},
+         {1, 9},   {56, 1},  {33, 1},  {53, 57}, {32, 53}, {13, 56}, {32, 56},
+         {55, 55}, {1, 18},  {49, 56}, {34, 34}, {1, 7},   {56, 64}, {32, 1},
+         {13, 33}, {55, 28}, {49, 33}, {57, 57}, {56, 34}, {34, 56}, {33, 32},
+         {32, 40}, {1, 29},  {53, 2},  {34, 1},  {32, 34}, {49, 49}, {1, 24},
+         {40, 34}, {1, 13},  {38, 34}, {29, 2},  {34, 2},  {1, 39},  {1, 22},
+         {1, 27},  {49, 1},  {1, 8},   {56, 2}};
+
+// We have: 9 calculated features (the features here); 1 feature for each
+// instruction opcode; and 1 feature for each manually-identified sequence.
+// For the latter 2, we build a histogram: we count the number of
+// occurrences of each instruction opcode or succession of instructions,
+// respectively.
+// Note that instruction opcodes start from 1. For convenience, we also have an
+// always 0 feature for the '0' opcode, hence the extra 1.
+const size_t IRToNativeSizeLearning::FunctionFeatures::FeatureCount =
+    IRToNativeSizeLearning::FunctionFeatures::ImportantInstructionSuccessions
+        .size() +
+    getMaxInstructionID() + 1 + IRToNativeSizeLearning::NumNamedFeatures;
+
+size_t getSize(Function &F, TargetTransformInfo &TTI) {
+  size_t Ret = 0;
+  for (auto &BB : F)
+    for (auto &I : BB)
+      Ret += TTI.getInstructionCost(
+          &I, TargetTransformInfo::TargetCostKind::TCK_CodeSize);
+  return Ret;
+}
+
+size_t getSize(Function &F, FunctionAnalysisManager &FAM) {
+  auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
+  return getSize(F, TTI);
+}
+
+unsigned getMaxDominatorTreeDepth(const Function &F,
+                                  const DominatorTree &Tree) {
+  unsigned Ret = 0;
+  for (auto &BB : F)
+    if (auto *TN = Tree.getNode(&BB))
+      Ret = std::max(Ret, TN->getLevel());
+  return Ret;
+}
+} // namespace
+
+IRToNativeSizeLearning::FunctionFeatures
+IRToNativeSizeLearning::getFunctionFeatures(Function &F,
+                                            FunctionAnalysisManager &FAM) {
+  assert(ensureSortedTuples() && "expected lazy initialization");
+
+  auto &DomTree = FAM.getResult<DominatorTreeAnalysis>(F);
+  FunctionFeatures FF;
+  size_t InstrCount = getMaxInstructionID() + 1;
+  FF.InstructionHistogram.resize(InstrCount);
+
+  FF.InstructionPairHistogram.resize(
+      FunctionFeatures::ImportantInstructionSuccessions.size());
+
+  auto StartID = 0;
+  auto LastID = StartID;
+  auto getPairIndex = [](size_t a, size_t b) {
+    auto I =
+        std::find(FunctionFeatures::ImportantInstructionSuccessions.begin(),
+                  FunctionFeatures::ImportantInstructionSuccessions.end(),
+                  std::make_pair(a, b));
+    if (I == FunctionFeatures::ImportantInstructionSuccessions.end())
+      return -1;
+    return static_cast<int>(std::distance(
+        FunctionFeatures::ImportantInstructionSuccessions.begin(), I));
+  };
+
+  // We don't want debug calls, because they'd just add noise.
+  for (auto &BB : F) {
+    for (auto I = BB.instructionsWithoutDebug().begin(),
+              E = BB.instructionsWithoutDebug().end();
+         I != E; ++I) {
+      auto ID = I->getOpcode();
+
+      ++FF.InstructionHistogram[ID];
+      int PairIndex = getPairIndex(LastID, ID);
+      if (PairIndex >= 0)
+        ++FF.InstructionPairHistogram[PairIndex];
+      LastID = ID;
+      if (isa<CallBase>(*I))
+        ++FF[NamedFeatureIndex::Calls];
+    }
+  }
+
+  FF[NamedFeatureIndex::InitialSize] = getSize(F, FAM);
+  FF[NamedFeatureIndex::IsLocal] = F.hasLocalLinkage();
+  FF[NamedFeatureIndex::IsLinkOnceODR] = F.hasLinkOnceODRLinkage();
+  FF[NamedFeatureIndex::IsLinkOnce] = F.hasLinkOnceLinkage();
+  FF[NamedFeatureIndex::Blocks] =
+      std::distance(F.getBasicBlockList().begin(), F.getBasicBlockList().end());
+  auto &LI = FAM.getResult<LoopAnalysis>(F);
+  FF[NamedFeatureIndex::Loops] = std::distance(LI.begin(), LI.end());
+  for (auto &L : LI)
+    FF[NamedFeatureIndex::MaxLoopDepth] =
+        std::max(FF[NamedFeatureIndex::MaxLoopDepth],
+                 static_cast<int32_t>(L->getLoopDepth()));
+  FF[NamedFeatureIndex::MaxDomTreeLevel] = getMaxDominatorTreeDepth(F, DomTree);
+  return FF;
+}
+
+void IRToNativeSizeLearning::FunctionFeatures::fillTensor(int32_t *Ptr) const {
+  std::copy(NamedFeatures.begin(), NamedFeatures.end(), Ptr);
+  Ptr += NamedFeatures.size();
+  std::copy(InstructionHistogram.begin(), InstructionHistogram.end(), Ptr);
+  Ptr += InstructionHistogram.size();
+  std::copy(InstructionPairHistogram.begin(), InstructionPairHistogram.end(),
+            Ptr);
+}
+
+bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() {
+  return !TFIR2NativeModelPath.empty();
+}
+
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
+  if (!isEvaluatorRequested()) {
+    return;
+  }
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  Evaluator = std::make_unique<TFModelEvaluator>(
+      TFIR2NativeModelPath.getValue().c_str(), InputNames, OutputName);
+  if (!Evaluator || !Evaluator->isValid()) {
+    Evaluator.reset();
+    return;
+  }
+  static const std::vector<int64_t> Dim{
+      1, static_cast<int64_t>(
+             IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};
+
+  Evaluator->initInput(0, TF_INT32, Dim);
+}
+
+InlineSizeEstimatorAnalysis::Result
+InlineSizeEstimatorAnalysis::run(const Function &F,
+                                 FunctionAnalysisManager &FAM) {
+  if (!Evaluator)
+    return None;
+  auto Features = IRToNativeSizeLearning::getFunctionFeatures(
+      const_cast<Function &>(F), FAM);
+  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator->getInput()[0]));
+  Features.fillTensor(V);
+  auto ER = Evaluator->evaluate();
+  if (!ER)
+    return None;
+  float Ret = *ER->getTensorValue<float>(0);
+  if (Ret < 0.0)
+    Ret = 0.0;
+  return static_cast<size_t>(Ret);
+}
+
+InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis(
+    InlineSizeEstimatorAnalysis &&Other)
+    : Evaluator(std::move(Other.Evaluator)) {}
+
+#else
+namespace llvm {
+class TFModelEvaluator {};
+} // namespace llvm
+InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis ::InlineSizeEstimatorAnalysis(
+    InlineSizeEstimatorAnalysis &&) {}
+InlineSizeEstimatorAnalysis::~InlineSizeEstimatorAnalysis() {}
+InlineSizeEstimatorAnalysis::Result
+InlineSizeEstimatorAnalysis::run(const Function &F,
+                                 FunctionAnalysisManager &FAM) {
+  return None;
+}
+bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
+#endif
\ No newline at end of file
diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp
new file mode 100644
index 0000000000000..6cd5b5c9b4eae
--- /dev/null
+++ b/llvm/lib/Analysis/TFUtils.cpp
@@ -0,0 +1,143 @@
+//===- TFUtils.cpp - tensorflow evaluation utilities ----------------------===//
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements utilities for interfacing with tensorflow C APIs.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/ADT/Twine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "tensorflow/c/c_api_experimental.h"
+
+#include <cassert>
+
+using namespace llvm;
+
+namespace {
+
+struct TFInitializer {
+  TFInitializer() {
+    assert(!IsInitialized && "TFInitialized should be called only once");
+    int Argc = 1;
+    const char *Name = "";
+    const char **NamePtr = &Name;
+    TF_InitMain(Name, &Argc, const_cast<char ***>(&NamePtr));
+    IsInitialized = true;
+  }
+  bool IsInitialized = false;
+};
+
+llvm::ManagedStatic<TFInitializer> TFLibInitializer;
+
+bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
+
+TFModelEvaluator::TFGraphPtr createTFGraph() {
+  return TFModelEvaluator::TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
+}
+
+TFModelEvaluator::TFStatusPtr createTFStatus() {
+  return TFModelEvaluator::TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
+}
+
+TFModelEvaluator::TFSessionOptionsPtr createTFSessionOptions() {
+  return TFModelEvaluator::TFSessionOptionsPtr(TF_NewSessionOptions(),
+                                               &TF_DeleteSessionOptions);
+}
+} // namespace
+
+TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
+                                   const std::vector<std::string> &InputNames,
+                                   const std::vector<std::string> &OutputNames,
+                                   const char *Tags)
+    : Graph(createTFGraph()), Options(createTFSessionOptions()),
+      InputFeed(InputNames.size()), Input(InputNames.size()),
+      OutputFeed(OutputNames.size()) {
+  if (!ensureInitTF()) {
+    errs() << "Tensorflow should have been initialized";
+    return;
+  }
+  auto Status = createTFStatus();
+
+  Session = TF_LoadSessionFromSavedModel(Options.get(), nullptr,
+                                         SavedModelPath.str().c_str(), &Tags, 1,
+                                         Graph.get(), nullptr, Status.get());
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
+    errs() << TF_Message(Status.get());
+    deleteSession();
+  }
+  for (size_t I = 0; I < InputNames.size(); ++I) {
+    InputFeed[I] = {
+        TF_GraphOperationByName(Graph.get(), (InputNames[I]).c_str()), 0};
+    if (!checkReportAndReset(InputFeed[I], InputNames[I]))
+      return;
+  }
+  for (size_t I = 0; I < OutputNames.size(); ++I) {
+    OutputFeed[I] = {
+        TF_GraphOperationByName(Graph.get(), (OutputNames[I]).c_str()), 0};
+    if (!checkReportAndReset(OutputFeed[I], OutputNames[I]))
+      return;
+  }
+}
+
+TFModelEvaluator::~TFModelEvaluator() {
+  for (auto *T : Input) {
+    TF_DeleteTensor(T);
+  }
+  deleteSession();
+}
+
+bool TFModelEvaluator::checkReportAndReset(const TF_Output &Output,
+                                           StringRef Name) {
+  if (Output.oper)
+    return true;
+  errs() << "Could not find TF_Output named: " + Name;
+  deleteSession();
+  return false;
+}
+
+void TFModelEvaluator::deleteSession() {
+  if (Session == nullptr)
+    return;
+  auto Status = createTFStatus();
+  TF_DeleteSession(Session, Status.get());
+  Session = nullptr;
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK)
+    errs() << "Could not delete TF session";
+}
+
+Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() {
+  if (!isValid())
+    return None;
+  EvaluationResult Ret(OutputFeed.size());
+  auto Status = createTFStatus();
+  TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), Input.size(),
+                OutputFeed.data(), Ret.Output.data(), Ret.Output.size(),
+                nullptr, 0, nullptr, Status.get());
+  if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
+    errs() << TF_Message(Status.get());
+    deleteSession();
+    return None;
+  }
+  return Ret;
+}
+
+void TFModelEvaluator::initInput(int Index, TF_DataType Type,
+                                 const std::vector<int64_t> &Dimensions) {
+  int64_t TotalSize = TF_DataTypeSize(Type);
+  for (auto &D : Dimensions)
+    TotalSize *= D;
+
+  Input[Index] =
+      TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
+  std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
+}
\ No newline at end of file
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 53158e7aabab0..537d300fee557 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -35,6 +35,7 @@
 #include "llvm/Analysis/IVUsers.h"
 #include "llvm/Analysis/InlineAdvisor.h"
 #include "llvm/Analysis/InlineFeaturesAnalysis.h"
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/LoopAccessAnalysis.h"
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index eb2b740db5612..dfdfc3d05976a 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -133,6 +133,7 @@ FUNCTION_ANALYSIS("loops", LoopAnalysis())
 FUNCTION_ANALYSIS("lazy-value-info", LazyValueAnalysis())
 FUNCTION_ANALYSIS("da", DependenceAnalysis())
 FUNCTION_ANALYSIS("inliner-features", InlineFeaturesAnalysis())
+FUNCTION_ANALYSIS("inliner-size-estimator", InlineSizeEstimatorAnalysis())
 FUNCTION_ANALYSIS("memdep", MemoryDependenceAnalysis())
 FUNCTION_ANALYSIS("memoryssa", MemorySSAAnalysis())
 FUNCTION_ANALYSIS("phi-values", PhiValuesAnalysis())
diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt
index 42f7dd3c06101..59ad444d32fb4 100644
--- a/llvm/unittests/Analysis/CMakeLists.txt
+++ b/llvm/unittests/Analysis/CMakeLists.txt
@@ -6,7 +6,13 @@ set(LLVM_LINK_COMPONENTS
   TransformUtils
   )
 
-add_llvm_unittest(AnalysisTests
+if (DEFINED LLVM_HAVE_TF_API)
+  LIST(APPEND EXTRA_TESTS TFUtilsTest.cpp)
+else()
+  LIST(APPEND LLVM_OPTIONAL_SOURCES TFUtilsTest.cpp)
+endif()
+
+add_llvm_unittest_with_input_files(AnalysisTests
   AliasAnalysisTest.cpp
   AliasSetTrackerTest.cpp
   AssumeBundleQueriesTest.cpp
@@ -22,6 +28,7 @@ add_llvm_unittest(AnalysisTests
   DomTreeUpdaterTest.cpp
   GlobalsModRefTest.cpp
   InlineFeaturesAnalysisTest.cpp
+  InlineSizeEstimatorAnalysisTest.cpp
   IVDescriptorsTest.cpp
   LazyCallGraphTest.cpp
   LoadsTest.cpp
@@ -40,4 +47,7 @@ add_llvm_unittest(AnalysisTests
   ValueLatticeTest.cpp
   ValueTrackingTest.cpp
   VectorUtilsTest.cpp
+  ${EXTRA_TESTS}
   )
+
+ target_link_libraries(AnalysisTests PRIVATE LLVMTestingSupport)
diff --git a/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp b/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
new file mode 100644
index 0000000000000..377590be016ac
--- /dev/null
+++ b/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
@@ -0,0 +1,101 @@
+//===- InlineSizeEstimatorAnalysisTest.cpp - test for ir2native -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+extern const char *TestMainArgv0;
+extern cl::opt<std::string> TFIR2NativeModelPath;
+
+#if LLVM_HAVE_TF_API
+static std::string getModelPath() {
+  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
+  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
+  return std::string(InputsDir);
+}
+#endif
+
+static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
+  SMDiagnostic Err;
+  std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
+  if (!Mod)
+    Err.print("MLAnalysisTests", errs());
+  return Mod;
+}
+
+static FunctionAnalysisManager buildFAM() {
+  FunctionAnalysisManager FAM;
+  FAM.registerPass([&] { return DominatorTreeAnalysis(); });
+  FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
+  FAM.registerPass([&] { return TargetIRAnalysis(); });
+  FAM.registerPass([&] { return LoopAnalysis(); });
+  return FAM;
+}
+
+// Test model loading and evaluation.
+TEST(InlineSizeEstimatorAnalysis, SizeIsValidTest) {
+  LLVMContext C;
+  std::unique_ptr<Module> M = parseIR(C,
+                                      R"IR(
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare i32 @f1(i32)
+declare i32 @f2(i32)
+
+define i32 @branches(i32) {
+  %cond = icmp slt i32 %0, 3
+  br i1 %cond, label %then, label %else
+
+then:
+  %ret.1 = call i32 @f1(i32 %0)
+  br label %last.block
+
+else:
+  %ret.2 = call i32 @f2(i32 %0)
+  br label %last.block
+
+last.block:
+  %ret = phi i32 [%ret.1, %then], [%ret.2, %else]
+  ret i32 %ret
+}
+
+define internal i32 @top() {
+  %1 = call i32 @branches(i32 2)
+  %2 = call i32 @f1(i32 %1)
+  ret i32 %2
+}
+)IR");
+
+  FunctionAnalysisManager FAM = buildFAM();
+#if LLVM_HAVE_TF_API
+  TFIR2NativeModelPath = getModelPath();
+#endif
+
+  InlineSizeEstimatorAnalysis FA;
+  auto SizeEstimate = FA.run(*M->getFunction("branches"), FAM);
+#if LLVM_HAVE_TF_API
+  EXPECT_GT(*SizeEstimate, 0);
+#else
+  EXPECT_FALSE(SizeEstimate.hasValue());
+#endif
+}
diff --git a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
new file mode 100644
index 0000000000000..6efdad51083d3
--- /dev/null
+++ b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/saved_model.pbtxt
@@ -0,0 +1,10596 @@
+saved_model_schema_version: 1
+meta_graphs {
+  meta_info_def {
+    stripped_op_list {
+      op {
+        name: "Const"
+        output_arg {
+          name: "output"
+          type_attr: "dtype"
+        }
+        attr {
+          name: "value"
+          type: "tensor"
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+      }
+      op {
+        name: "NoOp"
+      }
+      op {
+        name: "Placeholder"
+        output_arg {
+          name: "output"
+          type_attr: "dtype"
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+        attr {
+          name: "shape"
+          type: "shape"
+          default_value {
+            shape {
+              unknown_rank: true
+            }
+          }
+        }
+      }
+      op {
+        name: "ReadVariableOp"
+        input_arg {
+          name: "resource"
+          type: DT_RESOURCE
+        }
+        output_arg {
+          name: "value"
+          type_attr: "dtype"
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+        is_stateful: true
+      }
+      op {
+        name: "StatefulPartitionedCall"
+        input_arg {
+          name: "args"
+          type_list_attr: "Tin"
+        }
+        output_arg {
+          name: "output"
+          type_list_attr: "Tout"
+        }
+        attr {
+          name: "Tin"
+          type: "list(type)"
+          has_minimum: true
+        }
+        attr {
+          name: "Tout"
+          type: "list(type)"
+          has_minimum: true
+        }
+        attr {
+          name: "f"
+          type: "func"
+        }
+        attr {
+          name: "config"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "config_proto"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "executor_type"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        is_stateful: true
+      }
+      op {
+        name: "VarHandleOp"
+        output_arg {
+          name: "resource"
+          type: DT_RESOURCE
+        }
+        attr {
+          name: "container"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "shared_name"
+          type: "string"
+          default_value {
+            s: ""
+          }
+        }
+        attr {
+          name: "dtype"
+          type: "type"
+        }
+        attr {
+          name: "shape"
+          type: "shape"
+        }
+        is_stateful: true
+      }
+    }
+    tags: "serve"
+    tensorflow_version: "1.15.0"
+    tensorflow_git_version: "unknown"
+    stripped_default_attrs: true
+  }
+  graph_def {
+    node {
+      name: "dense/kernel"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 214
+            }
+            dim {
+              size: 100
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "dense/kernel"
+        }
+      }
+    }
+    node {
+      name: "dense/kernel/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "dense/kernel"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 214
+              }
+              dim {
+                size: 100
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "dense/bias"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 100
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "dense/bias"
+        }
+      }
+    }
+    node {
+      name: "dense/bias/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "dense/bias"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 100
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "dense_1/kernel"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 100
+            }
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "dense_1/kernel"
+        }
+      }
+    }
+    node {
+      name: "dense_1/kernel/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "dense_1/kernel"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 100
+              }
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "dense_1/bias"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "dense_1/bias"
+        }
+      }
+    }
+    node {
+      name: "dense_1/bias/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "dense_1/bias"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "total"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "total"
+        }
+      }
+    }
+    node {
+      name: "total/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "total"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "count"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "count"
+        }
+      }
+    }
+    node {
+      name: "count/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "count"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "total_1"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "total_1"
+        }
+      }
+    }
+    node {
+      name: "total_1/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "total_1"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "count_1"
+      op: "VarHandleOp"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+      attr {
+        key: "shared_name"
+        value {
+          s: "count_1"
+        }
+      }
+    }
+    node {
+      name: "count_1/Read/ReadVariableOp"
+      op: "ReadVariableOp"
+      input: "count_1"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_FLOAT
+        }
+      }
+    }
+    node {
+      name: "NoOp"
+      op: "NoOp"
+    }
+    node {
+      name: "Const"
+      op: "Const"
+      device: "/device:CPU:0"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "value"
+        value {
+          tensor {
+            dtype: DT_STRING
+            tensor_shape {
+            }
+            string_val: "\n\277\001\n\030\010\001\022\024layer_with_weights-0\n\013\010\001\022\007layer-0\n\030\010\002\022\024layer_with_weights-1\n\013\010\002\022\007layer-1\n\r\010\003\022\toptimizer\n\031\010\004\022\025regularization_losses\n\r\010\005\022\tvariables\n\027\010\006\022\023trainable_variables\n\r\010\007\022\tkeras_api\n\016\010\010\022\nsignatures\nh\n\n\010\t\022\006kernel\n\010\010\n\022\004bias\n\031\010\013\022\025regularization_losses\n\r\010\014\022\tvariables\n\027\010\r\022\023trainable_variables\n\r\010\016\022\tkeras_api\nh\n\n\010\017\022\006kernel\n\010\010\020\022\004bias\n\031\010\021\022\025regularization_losses\n\r\010\022\022\tvariables\n\027\010\023\022\023trainable_variables\n\r\010\024\022\tkeras_api\n\000\n\000\n\034\n\005\010\t\022\0010\n\005\010\n\022\0011\n\005\010\017\022\0012\n\005\010\020\022\0013\n\034\n\005\010\t\022\0010\n\005\010\n\022\0011\n\005\010\017\022\0012\n\005\010\020\022\0013\n\255\001\n\n\010\025\022\006layers\n\037\010\026\022\033layer_regularization_losses\n\033\010\027\022\027non_trainable_variables\n\021\010\030\022\rlayer_metrics\n\031\010\004\022\025regularization_losses\n\013\010\031\022\007metrics\n\r\010\005\022\tvariables\n\027\010\006\022\023trainable_variables\n\000\nX\022V\n\016VARIABLE_VALUE\022\014dense/kernel\0326layer_with_weights-0/kernel/.ATTRIBUTES/VARIABLE_VALUE\nT\022R\n\016VARIABLE_VALUE\022\ndense/bias\0324layer_with_weights-0/bias/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\016\n\005\010\t\022\0010\n\005\010\n\022\0011\n\016\n\005\010\t\022\0010\n\005\010\n\022\0011\n\255\001\n\n\010\032\022\006layers\n\037\010\033\022\033layer_regularization_losses\n\033\010\034\022\027non_trainable_variables\n\021\010\035\022\rlayer_metrics\n\031\010\013\022\025regularization_losses\n\013\010\036\022\007metrics\n\r\010\014\022\tvariables\n\027\010\r\022\023trainable_variables\nZ\022X\n\016VARIABLE_VALUE\022\016dense_1/kernel\0326layer_with_weights-1/kernel/.ATTRIBUTES/VARIABLE_VALUE\nV\022T\n\016VARIABLE_VALUE\022\014dense_1/bias\0324layer_with_weights-1/bias/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\016\n\005\010\017\022\0010\n\005\010\020\022\0011\n\016\n\005\010\017\022\0010\n\005\010\020\022\0011\n\255\001\n\n\010\037\022\006layers\n\037\010 \022\033layer_regularization_losses\n\033\010!\022\027non_trainable_variables\n\021\010\"\022\rlayer_metrics\n\031\010\021\022\025regularization_losses\n\013\010#\022\007metrics\n\r\010\022\022\tvariables\n\027\010\023\022\023trainable_variables\n\016\n\005\010\001\022\0010\n\005\010\002\022\0011\n\000\n\000\n\000\n\016\n\005\010$\022\0010\n\005\010%\022\0011\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n\000\n4\n\t\010&\022\005total\n\t\010\'\022\005count\n\r\010(\022\tvariables\n\r\010)\022\tkeras_api\nD\n\t\010*\022\005total\n\t\010+\022\005count\n\016\010,\022\n_fn_kwargs\n\r\010-\022\tvariables\n\r\010.\022\tkeras_api\nO\022M\n\016VARIABLE_VALUE\022\005total\0324keras_api/metrics/0/total/.ATTRIBUTES/VARIABLE_VALUE\nO\022M\n\016VARIABLE_VALUE\022\005count\0324keras_api/metrics/0/count/.ATTRIBUTES/VARIABLE_VALUE\n\016\n\005\010&\022\0010\n\005\010\'\022\0011\n\017\n\r\010(\022\tvariables\nQ\022O\n\016VARIABLE_VALUE\022\007total_1\0324keras_api/metrics/1/total/.ATTRIBUTES/VARIABLE_VALUE\nQ\022O\n\016VARIABLE_VALUE\022\007count_1\0324keras_api/metrics/1/count/.ATTRIBUTES/VARIABLE_VALUE\n\000\n\016\n\005\010*\022\0010\n\005\010+\022\0011\n\017\n\r\010-\022\tvariables"
+          }
+        }
+      }
+    }
+    node {
+      name: "serving_default_input_1"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 214
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_INT32
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+            dim {
+              size: -1
+            }
+            dim {
+              size: 214
+            }
+          }
+        }
+      }
+    }
+    node {
+      name: "StatefulPartitionedCall"
+      op: "StatefulPartitionedCall"
+      input: "serving_default_input_1"
+      input: "dense/kernel"
+      input: "dense/bias"
+      input: "dense_1/kernel"
+      input: "dense_1/bias"
+      attr {
+        key: "Tin"
+        value {
+          list {
+            type: DT_INT32
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+            type: DT_FLOAT
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+            i: 1
+            i: 2
+            i: 3
+            i: 4
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference_signature_wrapper_6671"
+          }
+        }
+      }
+    }
+    node {
+      name: "saver_filename"
+      op: "Placeholder"
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "dtype"
+        value {
+          type: DT_STRING
+        }
+      }
+      attr {
+        key: "shape"
+        value {
+          shape {
+          }
+        }
+      }
+    }
+    node {
+      name: "StatefulPartitionedCall_1"
+      op: "StatefulPartitionedCall"
+      input: "saver_filename"
+      input: "dense/kernel/Read/ReadVariableOp"
+      input: "dense/bias/Read/ReadVariableOp"
+      input: "dense_1/kernel/Read/ReadVariableOp"
+      input: "dense_1/bias/Read/ReadVariableOp"
+      input: "total/Read/ReadVariableOp"
+      input: "count/Read/ReadVariableOp"
+      input: "total_1/Read/ReadVariableOp"
+      input: "count_1/Read/ReadVariableOp"
+      input: "Const"
+      attr {
+        key: "Tin"
+        value {
+          list {
+            type: DT_STRING
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_FLOAT
+            type: DT_STRING
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+            type: DT_STRING
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference__traced_save_6824"
+          }
+        }
+      }
+    }
+    node {
+      name: "StatefulPartitionedCall_2"
+      op: "StatefulPartitionedCall"
+      input: "saver_filename"
+      input: "dense/kernel"
+      input: "dense/bias"
+      input: "dense_1/kernel"
+      input: "dense_1/bias"
+      input: "total"
+      input: "count"
+      input: "total_1"
+      input: "count_1"
+      attr {
+        key: "Tin"
+        value {
+          list {
+            type: DT_STRING
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+            type: DT_RESOURCE
+          }
+        }
+      }
+      attr {
+        key: "Tout"
+        value {
+          list {
+            type: DT_STRING
+          }
+        }
+      }
+      attr {
+        key: "_collective_manager_ids"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "_output_shapes"
+        value {
+          list {
+            shape {
+            }
+          }
+        }
+      }
+      attr {
+        key: "_read_only_resource_inputs"
+        value {
+          list {
+          }
+        }
+      }
+      attr {
+        key: "config_proto"
+        value {
+          s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+        }
+      }
+      attr {
+        key: "f"
+        value {
+          func {
+            name: "__inference__traced_restore_6860"
+          }
+        }
+      }
+    }
+    library {
+      function {
+        signature {
+          name: "__inference__traced_restore_6860"
+          input_arg {
+            name: "file_prefix"
+            type: DT_STRING
+          }
+          input_arg {
+            name: "assignvariableop_dense_kernel"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_1_dense_bias"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_2_dense_1_kernel"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_3_dense_1_bias"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_4_total"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_5_count"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_6_total_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "assignvariableop_7_count_1"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity_9"
+            type: DT_STRING
+          }
+          is_stateful: true
+          control_output: "AssignVariableOp"
+          control_output: "AssignVariableOp_1"
+          control_output: "AssignVariableOp_2"
+          control_output: "AssignVariableOp_3"
+          control_output: "AssignVariableOp_4"
+          control_output: "AssignVariableOp_5"
+          control_output: "AssignVariableOp_6"
+          control_output: "AssignVariableOp_7"
+          control_output: "RestoreV2"
+          control_output: "RestoreV2_1"
+        }
+        node_def {
+          name: "RestoreV2/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 8
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 8
+                  }
+                }
+                string_val: "layer_with_weights-0/kernel/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-0/bias/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-1/kernel/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-1/bias/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/0/total/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/0/count/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/1/total/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/1/count/.ATTRIBUTES/VARIABLE_VALUE"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2/tensor_names"
+          }
+        }
+        node_def {
+          name: "RestoreV2/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 8
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 8
+                  }
+                }
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "RestoreV2"
+          op: "RestoreV2"
+          input: "file_prefix"
+          input: "RestoreV2/tensor_names:output:0"
+          input: "RestoreV2/shape_and_slices:output:0"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "RestoreV2:tensors:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp"
+          op: "AssignVariableOp"
+          input: "assignvariableop_dense_kernel"
+          input: "Identity:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp"
+          }
+        }
+        node_def {
+          name: "Identity_1"
+          op: "Identity"
+          input: "RestoreV2:tensors:1"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_1"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_1"
+          op: "AssignVariableOp"
+          input: "assignvariableop_1_dense_bias"
+          input: "Identity_1:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_1"
+          }
+        }
+        node_def {
+          name: "Identity_2"
+          op: "Identity"
+          input: "RestoreV2:tensors:2"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_2"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_2"
+          op: "AssignVariableOp"
+          input: "assignvariableop_2_dense_1_kernel"
+          input: "Identity_2:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_2"
+          }
+        }
+        node_def {
+          name: "Identity_3"
+          op: "Identity"
+          input: "RestoreV2:tensors:3"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_3"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_3"
+          op: "AssignVariableOp"
+          input: "assignvariableop_3_dense_1_bias"
+          input: "Identity_3:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_3"
+          }
+        }
+        node_def {
+          name: "Identity_4"
+          op: "Identity"
+          input: "RestoreV2:tensors:4"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_4"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_4"
+          op: "AssignVariableOp"
+          input: "assignvariableop_4_total"
+          input: "Identity_4:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_4"
+          }
+        }
+        node_def {
+          name: "Identity_5"
+          op: "Identity"
+          input: "RestoreV2:tensors:5"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_5"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_5"
+          op: "AssignVariableOp"
+          input: "assignvariableop_5_count"
+          input: "Identity_5:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_5"
+          }
+        }
+        node_def {
+          name: "Identity_6"
+          op: "Identity"
+          input: "RestoreV2:tensors:6"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_6"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_6"
+          op: "AssignVariableOp"
+          input: "assignvariableop_6_total_1"
+          input: "Identity_6:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_6"
+          }
+        }
+        node_def {
+          name: "Identity_7"
+          op: "Identity"
+          input: "RestoreV2:tensors:7"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_7"
+          }
+        }
+        node_def {
+          name: "AssignVariableOp_7"
+          op: "AssignVariableOp"
+          input: "assignvariableop_7_count_1"
+          input: "Identity_7:output:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "AssignVariableOp_7"
+          }
+        }
+        node_def {
+          name: "RestoreV2_1/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: "_CHECKPOINTABLE_OBJECT_GRAPH"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2_1/tensor_names"
+          }
+        }
+        node_def {
+          name: "RestoreV2_1/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2_1/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "RestoreV2_1"
+          op: "RestoreV2"
+          input: "file_prefix"
+          input: "RestoreV2_1/tensor_names:output:0"
+          input: "RestoreV2_1/shape_and_slices:output:0"
+          input: "^RestoreV2"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  unknown_rank: true
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_STRING
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "RestoreV2_1"
+          }
+        }
+        node_def {
+          name: "NoOp"
+          op: "NoOp"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "NoOp"
+          }
+        }
+        node_def {
+          name: "Identity_8"
+          op: "Identity"
+          input: "file_prefix"
+          input: "^AssignVariableOp"
+          input: "^AssignVariableOp_1"
+          input: "^AssignVariableOp_2"
+          input: "^AssignVariableOp_3"
+          input: "^AssignVariableOp_4"
+          input: "^AssignVariableOp_5"
+          input: "^AssignVariableOp_6"
+          input: "^AssignVariableOp_7"
+          input: "^NoOp"
+          device: "/device:CPU:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_8"
+          }
+        }
+        node_def {
+          name: "Identity_9"
+          op: "Identity"
+          input: "Identity_8:output:0"
+          input: "^AssignVariableOp"
+          input: "^AssignVariableOp_1"
+          input: "^AssignVariableOp_2"
+          input: "^AssignVariableOp_3"
+          input: "^AssignVariableOp_4"
+          input: "^AssignVariableOp_5"
+          input: "^AssignVariableOp_6"
+          input: "^AssignVariableOp_7"
+          input: "^RestoreV2"
+          input: "^RestoreV2_1"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_9"
+          }
+        }
+        ret {
+          key: "identity_9"
+          value: "Identity_9:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "AssignVariableOp"
+          value: "AssignVariableOp"
+        }
+        control_ret {
+          key: "AssignVariableOp_1"
+          value: "AssignVariableOp_1"
+        }
+        control_ret {
+          key: "AssignVariableOp_2"
+          value: "AssignVariableOp_2"
+        }
+        control_ret {
+          key: "AssignVariableOp_3"
+          value: "AssignVariableOp_3"
+        }
+        control_ret {
+          key: "AssignVariableOp_4"
+          value: "AssignVariableOp_4"
+        }
+        control_ret {
+          key: "AssignVariableOp_5"
+          value: "AssignVariableOp_5"
+        }
+        control_ret {
+          key: "AssignVariableOp_6"
+          value: "AssignVariableOp_6"
+        }
+        control_ret {
+          key: "AssignVariableOp_7"
+          value: "AssignVariableOp_7"
+        }
+        control_ret {
+          key: "RestoreV2"
+          value: "RestoreV2"
+        }
+        control_ret {
+          key: "RestoreV2_1"
+          value: "RestoreV2_1"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "file_prefix"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_fn_6629"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "input_1"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+                i: 3
+                i: 4
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_sequential_layer_call_and_return_conditional_losses_6618"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6587"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_6555"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_6557"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6581"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6583"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "dense/StatefulPartitionedCall"
+          control_output: "dense_1/StatefulPartitionedCall"
+        }
+        node_def {
+          name: "dense/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "input_1"
+          input: "dense_6555"
+          input: "dense_6557"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "dense_1/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "dense/StatefulPartitionedCall:output:0"
+          input: "dense_1_6581"
+          input: "dense_1_6583"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/StatefulPartitionedCall:output:0"
+          input: "^dense/StatefulPartitionedCall"
+          input: "^dense_1/StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "dense/StatefulPartitionedCall"
+          value: "dense/StatefulPartitionedCall"
+        }
+        control_ret {
+          key: "dense_1/StatefulPartitionedCall"
+          value: "dense_1/StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6618"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_6607"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_6609"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6612"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6614"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "dense/StatefulPartitionedCall"
+          control_output: "dense_1/StatefulPartitionedCall"
+        }
+        node_def {
+          name: "dense/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "dense_6607"
+          input: "dense_6609"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "dense_1/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "dense/StatefulPartitionedCall:output:0"
+          input: "dense_1_6612"
+          input: "dense_1_6614"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/StatefulPartitionedCall:output:0"
+          input: "^dense/StatefulPartitionedCall"
+          input: "^dense_1/StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "dense/StatefulPartitionedCall"
+          value: "dense/StatefulPartitionedCall"
+        }
+        control_ret {
+          key: "dense_1/StatefulPartitionedCall"
+          value: "dense_1/StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_fn_6656"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "input_1"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+                i: 3
+                i: 4
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_sequential_layer_call_and_return_conditional_losses_6645"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_1_layer_call_and_return_conditional_losses_6764"
+          input_arg {
+            name: "inputs"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "MatMul"
+          op: "MatMul"
+          input: "inputs"
+          input: "MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul"
+          }
+        }
+        node_def {
+          name: "BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "BiasAdd"
+          op: "BiasAdd"
+          input: "MatMul:product:0"
+          input: "BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_layer_call_fn_6754"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "unknown"
+          input: "unknown_0"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference__traced_save_6824"
+          input_arg {
+            name: "file_prefix"
+            type: DT_STRING
+          }
+          input_arg {
+            name: "savev2_dense_kernel_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_dense_bias_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_dense_1_kernel_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_dense_1_bias_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_total_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_count_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_total_1_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_count_1_read_readvariableop"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "savev2_1_const"
+            type: DT_STRING
+          }
+          output_arg {
+            name: "identity_1"
+            type: DT_STRING
+          }
+          is_stateful: true
+          control_output: "MergeV2Checkpoints"
+          control_output: "SaveV2"
+          control_output: "SaveV2_1"
+        }
+        node_def {
+          name: "StaticRegexFullMatch"
+          op: "StaticRegexFullMatch"
+          input: "file_prefix"
+          device: "/device:CPU:*"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "pattern"
+            value {
+              s: "^s3://.*"
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StaticRegexFullMatch"
+          }
+        }
+        node_def {
+          name: "Const"
+          op: "Const"
+          device: "/device:CPU:*"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                }
+                string_val: ".part"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Const"
+          }
+        }
+        node_def {
+          name: "Const_1"
+          op: "Const"
+          device: "/device:CPU:*"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                }
+                string_val: "_temp_6f1e5fef49bb4c06ace07a8a95dfbb1b/part"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Const_1"
+          }
+        }
+        node_def {
+          name: "Select"
+          op: "Select"
+          input: "StaticRegexFullMatch:output:0"
+          input: "Const:output:0"
+          input: "Const_1:output:0"
+          device: "/device:CPU:*"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Select"
+          }
+        }
+        node_def {
+          name: "StringJoin"
+          op: "StringJoin"
+          input: "file_prefix"
+          input: "Select:output:0"
+          device: "/device:CPU:*"
+          attr {
+            key: "N"
+            value {
+              i: 2
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StringJoin"
+          }
+        }
+        node_def {
+          name: "num_shards"
+          op: "Const"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 2
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "num_shards"
+          }
+        }
+        node_def {
+          name: "ShardedFilename/shard"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 0
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename/shard"
+          }
+        }
+        node_def {
+          name: "ShardedFilename"
+          op: "ShardedFilename"
+          input: "StringJoin:output:0"
+          input: "ShardedFilename/shard:output:0"
+          input: "num_shards:output:0"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename"
+          }
+        }
+        node_def {
+          name: "SaveV2/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 8
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 8
+                  }
+                }
+                string_val: "layer_with_weights-0/kernel/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-0/bias/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-1/kernel/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "layer_with_weights-1/bias/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/0/total/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/0/count/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/1/total/.ATTRIBUTES/VARIABLE_VALUE"
+                string_val: "keras_api/metrics/1/count/.ATTRIBUTES/VARIABLE_VALUE"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2/tensor_names"
+          }
+        }
+        node_def {
+          name: "SaveV2/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 8
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 8
+                  }
+                }
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "SaveV2"
+          op: "SaveV2"
+          input: "ShardedFilename:filename:0"
+          input: "SaveV2/tensor_names:output:0"
+          input: "SaveV2/shape_and_slices:output:0"
+          input: "savev2_dense_kernel_read_readvariableop"
+          input: "savev2_dense_bias_read_readvariableop"
+          input: "savev2_dense_1_kernel_read_readvariableop"
+          input: "savev2_dense_1_bias_read_readvariableop"
+          input: "savev2_total_read_readvariableop"
+          input: "savev2_count_read_readvariableop"
+          input: "savev2_total_1_read_readvariableop"
+          input: "savev2_count_1_read_readvariableop"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+                type: DT_FLOAT
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2"
+          }
+        }
+        node_def {
+          name: "ShardedFilename_1/shard"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_INT32
+                tensor_shape {
+                }
+                int_val: 1
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename_1/shard"
+          }
+        }
+        node_def {
+          name: "ShardedFilename_1"
+          op: "ShardedFilename"
+          input: "StringJoin:output:0"
+          input: "ShardedFilename_1/shard:output:0"
+          input: "num_shards:output:0"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "ShardedFilename_1"
+          }
+        }
+        node_def {
+          name: "SaveV2_1/tensor_names"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: "_CHECKPOINTABLE_OBJECT_GRAPH"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2_1/tensor_names"
+          }
+        }
+        node_def {
+          name: "SaveV2_1/shape_and_slices"
+          op: "Const"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "value"
+            value {
+              tensor {
+                dtype: DT_STRING
+                tensor_shape {
+                  dim {
+                    size: 1
+                  }
+                }
+                string_val: ""
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2_1/shape_and_slices"
+          }
+        }
+        node_def {
+          name: "SaveV2_1"
+          op: "SaveV2"
+          input: "ShardedFilename_1:filename:0"
+          input: "SaveV2_1/tensor_names:output:0"
+          input: "SaveV2_1/shape_and_slices:output:0"
+          input: "savev2_1_const"
+          input: "^SaveV2"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "dtypes"
+            value {
+              list {
+                type: DT_STRING
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "SaveV2_1"
+          }
+        }
+        node_def {
+          name: "MergeV2Checkpoints/checkpoint_prefixes"
+          op: "Pack"
+          input: "ShardedFilename:filename:0"
+          input: "ShardedFilename_1:filename:0"
+          input: "^SaveV2"
+          input: "^SaveV2_1"
+          device: "/device:CPU:0"
+          attr {
+            key: "N"
+            value {
+              i: 2
+            }
+          }
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 2
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MergeV2Checkpoints/checkpoint_prefixes"
+          }
+        }
+        node_def {
+          name: "MergeV2Checkpoints"
+          op: "MergeV2Checkpoints"
+          input: "MergeV2Checkpoints/checkpoint_prefixes:output:0"
+          input: "file_prefix"
+          input: "^SaveV2_1"
+          device: "/device:CPU:0"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MergeV2Checkpoints"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "file_prefix"
+          input: "^MergeV2Checkpoints"
+          device: "/device:CPU:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        node_def {
+          name: "Identity_1"
+          op: "Identity"
+          input: "Identity:output:0"
+          input: "^MergeV2Checkpoints"
+          input: "^SaveV2"
+          input: "^SaveV2_1"
+          attr {
+            key: "T"
+            value {
+              type: DT_STRING
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity_1"
+          }
+        }
+        ret {
+          key: "identity_1"
+          value: "Identity_1:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+              }
+              shape {
+                dim {
+                  size: 214
+                }
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                dim {
+                  size: 100
+                }
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+                dim {
+                  size: 1
+                }
+              }
+              shape {
+              }
+              shape {
+              }
+              shape {
+              }
+              shape {
+              }
+              shape {
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "MergeV2Checkpoints"
+          value: "MergeV2Checkpoints"
+        }
+        control_ret {
+          key: "SaveV2"
+          value: "SaveV2"
+        }
+        control_ret {
+          key: "SaveV2_1"
+          value: "SaveV2_1"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "file_prefix"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 214
+                    }
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 100
+                    }
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: 1
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 5
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 6
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 7
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 8
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 9
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6689"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "dense/Cast"
+          op: "Cast"
+          input: "inputs"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 214
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/Cast"
+          }
+        }
+        node_def {
+          name: "dense/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 214
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense/MatMul"
+          op: "MatMul"
+          input: "dense/Cast:y:0"
+          input: "dense/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/MatMul"
+          }
+        }
+        node_def {
+          name: "dense/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense/BiasAdd"
+          op: "BiasAdd"
+          input: "dense/MatMul:product:0"
+          input: "dense/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/BiasAdd"
+          }
+        }
+        node_def {
+          name: "dense/Relu"
+          op: "Relu"
+          input: "dense/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/Relu"
+          }
+        }
+        node_def {
+          name: "dense_1/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_1_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense_1/MatMul"
+          op: "MatMul"
+          input: "dense/Relu:activations:0"
+          input: "dense_1/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/MatMul"
+          }
+        }
+        node_def {
+          name: "dense_1/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_1_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense_1/BiasAdd"
+          op: "BiasAdd"
+          input: "dense_1/MatMul:product:0"
+          input: "dense_1/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/BiasAdd"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_layer_call_and_return_conditional_losses_6745"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "Cast"
+          op: "Cast"
+          input: "inputs"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 214
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Cast"
+          }
+        }
+        node_def {
+          name: "MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 214
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "MatMul"
+          op: "MatMul"
+          input: "Cast:y:0"
+          input: "MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul"
+          }
+        }
+        node_def {
+          name: "BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "BiasAdd"
+          op: "BiasAdd"
+          input: "MatMul:product:0"
+          input: "BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd"
+          }
+        }
+        node_def {
+          name: "Relu"
+          op: "Relu"
+          input: "BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Relu"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "Relu:activations:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_1_layer_call_fn_6773"
+          input_arg {
+            name: "inputs"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "unknown"
+          input: "unknown_0"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference__wrapped_model_6528"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "sequential_dense_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "sequential_dense_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "sequential_dense_1_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "sequential_dense_1_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "sequential/dense/Cast"
+          op: "Cast"
+          input: "input_1"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 214
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/Cast"
+          }
+        }
+        node_def {
+          name: "sequential/dense/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "sequential_dense_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 214
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "sequential/dense/MatMul"
+          op: "MatMul"
+          input: "sequential/dense/Cast:y:0"
+          input: "sequential/dense/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/MatMul"
+          }
+        }
+        node_def {
+          name: "sequential/dense/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "sequential_dense_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "sequential/dense/BiasAdd"
+          op: "BiasAdd"
+          input: "sequential/dense/MatMul:product:0"
+          input: "sequential/dense/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/BiasAdd"
+          }
+        }
+        node_def {
+          name: "sequential/dense/Relu"
+          op: "Relu"
+          input: "sequential/dense/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense/Relu"
+          }
+        }
+        node_def {
+          name: "sequential/dense_1/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "sequential_dense_1_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense_1/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "sequential/dense_1/MatMul"
+          op: "MatMul"
+          input: "sequential/dense/Relu:activations:0"
+          input: "sequential/dense_1/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense_1/MatMul"
+          }
+        }
+        node_def {
+          name: "sequential/dense_1/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "sequential_dense_1_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense_1/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "sequential/dense_1/BiasAdd"
+          op: "BiasAdd"
+          input: "sequential/dense_1/MatMul:product:0"
+          input: "sequential/dense_1/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "sequential/dense_1/BiasAdd"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "sequential/dense_1/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "Cast"
+          op: "Cast"
+          input: "inputs"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 214
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Cast"
+          }
+        }
+        node_def {
+          name: "MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 214
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "MatMul"
+          op: "MatMul"
+          input: "Cast:y:0"
+          input: "MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul"
+          }
+        }
+        node_def {
+          name: "BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "BiasAdd"
+          op: "BiasAdd"
+          input: "MatMul:product:0"
+          input: "BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd"
+          }
+        }
+        node_def {
+          name: "Relu"
+          op: "Relu"
+          input: "BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Relu"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "Relu:activations:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6601"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_6590"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_6592"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6595"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6597"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "dense/StatefulPartitionedCall"
+          control_output: "dense_1/StatefulPartitionedCall"
+        }
+        node_def {
+          name: "dense/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "input_1"
+          input: "dense_6590"
+          input: "dense_6592"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "dense_1/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "dense/StatefulPartitionedCall:output:0"
+          input: "dense_1_6595"
+          input: "dense_1_6597"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/StatefulPartitionedCall:output:0"
+          input: "^dense/StatefulPartitionedCall"
+          input: "^dense_1/StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "dense/StatefulPartitionedCall"
+          value: "dense/StatefulPartitionedCall"
+        }
+        control_ret {
+          key: "dense_1/StatefulPartitionedCall"
+          value: "dense_1/StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_fn_6733"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+                i: 3
+                i: 4
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_sequential_layer_call_and_return_conditional_losses_6645"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6645"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_6634"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_6636"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6639"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_6641"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "dense/StatefulPartitionedCall"
+          control_output: "dense_1/StatefulPartitionedCall"
+        }
+        node_def {
+          name: "dense/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "dense_6634"
+          input: "dense_6636"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_layer_call_and_return_conditional_losses_6544"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "dense_1/StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "dense/StatefulPartitionedCall:output:0"
+          input: "dense_1_6639"
+          input: "dense_1_6641"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_FLOAT
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/StatefulPartitionedCall:output:0"
+          input: "^dense/StatefulPartitionedCall"
+          input: "^dense_1/StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "dense/StatefulPartitionedCall"
+          value: "dense/StatefulPartitionedCall"
+        }
+        control_ret {
+          key: "dense_1/StatefulPartitionedCall"
+          value: "dense_1/StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_dense_1_layer_call_and_return_conditional_losses_6570"
+          input_arg {
+            name: "inputs"
+            type: DT_FLOAT
+          }
+          input_arg {
+            name: "matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "MatMul"
+          op: "MatMul"
+          input: "inputs"
+          input: "MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "MatMul"
+          }
+        }
+        node_def {
+          name: "BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "BiasAdd"
+          op: "BiasAdd"
+          input: "MatMul:product:0"
+          input: "BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "BiasAdd"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 100
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 100
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_signature_wrapper_6671"
+          input_arg {
+            name: "input_1"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "input_1"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+                i: 3
+                i: 4
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference__wrapped_model_6528"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "input_1"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_fn_6720"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "unknown"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_0"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_1"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "unknown_2"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+          control_output: "StatefulPartitionedCall"
+        }
+        node_def {
+          name: "StatefulPartitionedCall"
+          op: "StatefulPartitionedCall"
+          input: "inputs"
+          input: "unknown"
+          input: "unknown_0"
+          input: "unknown_1"
+          input: "unknown_2"
+          attr {
+            key: "Tin"
+            value {
+              list {
+                type: DT_INT32
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+                type: DT_RESOURCE
+              }
+            }
+          }
+          attr {
+            key: "Tout"
+            value {
+              list {
+                type: DT_FLOAT
+              }
+            }
+          }
+          attr {
+            key: "_collective_manager_ids"
+            value {
+              list {
+              }
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "_read_only_resource_inputs"
+            value {
+              list {
+                i: 1
+                i: 2
+                i: 3
+                i: 4
+              }
+            }
+          }
+          attr {
+            key: "config_proto"
+            value {
+              s: "\n\007\n\003CPU\020\001\n\007\n\003GPU\020\0002\002J\0008\001"
+            }
+          }
+          attr {
+            key: "f"
+            value {
+              func {
+                name: "__inference_sequential_layer_call_and_return_conditional_losses_6618"
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "StatefulPartitionedCall"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "StatefulPartitionedCall:output:0"
+          input: "^StatefulPartitionedCall"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        control_ret {
+          key: "StatefulPartitionedCall"
+          value: "StatefulPartitionedCall"
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+      function {
+        signature {
+          name: "__inference_sequential_layer_call_and_return_conditional_losses_6707"
+          input_arg {
+            name: "inputs"
+            type: DT_INT32
+          }
+          input_arg {
+            name: "dense_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_matmul_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          input_arg {
+            name: "dense_1_biasadd_readvariableop_resource"
+            type: DT_RESOURCE
+          }
+          output_arg {
+            name: "identity"
+            type: DT_FLOAT
+          }
+          is_stateful: true
+        }
+        node_def {
+          name: "dense/Cast"
+          op: "Cast"
+          input: "inputs"
+          attr {
+            key: "DstT"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "SrcT"
+            value {
+              type: DT_INT32
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 214
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/Cast"
+          }
+        }
+        node_def {
+          name: "dense/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 214
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense/MatMul"
+          op: "MatMul"
+          input: "dense/Cast:y:0"
+          input: "dense/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/MatMul"
+          }
+        }
+        node_def {
+          name: "dense/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense/BiasAdd"
+          op: "BiasAdd"
+          input: "dense/MatMul:product:0"
+          input: "dense/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/BiasAdd"
+          }
+        }
+        node_def {
+          name: "dense/Relu"
+          op: "Relu"
+          input: "dense/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense/Relu"
+          }
+        }
+        node_def {
+          name: "dense_1/MatMul/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_1_matmul_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 100
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/MatMul/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense_1/MatMul"
+          op: "MatMul"
+          input: "dense/Relu:activations:0"
+          input: "dense_1/MatMul/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/MatMul"
+          }
+        }
+        node_def {
+          name: "dense_1/BiasAdd/ReadVariableOp"
+          op: "ReadVariableOp"
+          input: "dense_1_biasadd_readvariableop_resource"
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          attr {
+            key: "dtype"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/BiasAdd/ReadVariableOp"
+          }
+        }
+        node_def {
+          name: "dense_1/BiasAdd"
+          op: "BiasAdd"
+          input: "dense_1/MatMul:product:0"
+          input: "dense_1/BiasAdd/ReadVariableOp:value:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "dense_1/BiasAdd"
+          }
+        }
+        node_def {
+          name: "Identity"
+          op: "Identity"
+          input: "dense_1/BiasAdd:output:0"
+          attr {
+            key: "T"
+            value {
+              type: DT_FLOAT
+            }
+          }
+          attr {
+            key: "_output_shapes"
+            value {
+              list {
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+              }
+            }
+          }
+          experimental_debug_info {
+            original_node_names: "Identity"
+          }
+        }
+        ret {
+          key: "identity"
+          value: "Identity:output:0"
+        }
+        attr {
+          key: "_input_shapes"
+          value {
+            list {
+              shape {
+                dim {
+                  size: -1
+                }
+                dim {
+                  size: 214
+                }
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+              shape {
+                unknown_rank: true
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 0
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                }
+              }
+            }
+            attr {
+              key: "_user_specified_name"
+              value {
+                s: "inputs"
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 1
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 2
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 3
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+        arg_attr {
+          key: 4
+          value {
+            attr {
+              key: "_output_shapes"
+              value {
+                list {
+                  shape {
+                  }
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    versions {
+      producer: 331
+      min_consumer: 12
+    }
+  }
+  saver_def {
+    filename_tensor_name: "saver_filename:0"
+    save_tensor_name: "StatefulPartitionedCall_1:0"
+    restore_op_name: "StatefulPartitionedCall_2"
+    version: V2
+  }
+  collection_def {
+    key: "saved_model_main_op"
+    value {
+      node_list {
+        value: "NoOp"
+      }
+    }
+  }
+  signature_def {
+    key: "__saved_model_init_op"
+    value {
+      outputs {
+        key: "__saved_model_init_op"
+        value {
+          name: "NoOp"
+          tensor_shape {
+            unknown_rank: true
+          }
+        }
+      }
+    }
+  }
+  signature_def {
+    key: "serving_default"
+    value {
+      inputs {
+        key: "input_1"
+        value {
+          name: "serving_default_input_1:0"
+          dtype: DT_INT32
+          tensor_shape {
+            dim {
+              size: -1
+            }
+            dim {
+              size: 214
+            }
+          }
+        }
+      }
+      outputs {
+        key: "output_1"
+        value {
+          name: "StatefulPartitionedCall:0"
+          dtype: DT_FLOAT
+          tensor_shape {
+            dim {
+              size: -1
+            }
+            dim {
+              size: 1
+            }
+          }
+        }
+      }
+      method_name: "tensorflow/serving/predict"
+    }
+  }
+  object_graph_def {
+    nodes {
+      children {
+        node_id: 1
+        local_name: "layer_with_weights-0"
+      }
+      children {
+        node_id: 1
+        local_name: "layer-0"
+      }
+      children {
+        node_id: 2
+        local_name: "layer_with_weights-1"
+      }
+      children {
+        node_id: 2
+        local_name: "layer-1"
+      }
+      children {
+        node_id: 3
+        local_name: "optimizer"
+      }
+      children {
+        node_id: 4
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 5
+        local_name: "variables"
+      }
+      children {
+        node_id: 6
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 7
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 8
+        local_name: "signatures"
+      }
+      children {
+        node_id: 47
+        local_name: "__call__"
+      }
+      children {
+        node_id: 48
+        local_name: "_default_save_signature"
+      }
+      children {
+        node_id: 49
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_sequential"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Sequential\", \"name\": \"sequential\", \"trainable\": true, \"expects_training_arg\": true, \"dtype\": \"float32\", \"batch_input_shape\": null, \"config\": {\"name\": \"sequential\", \"layers\": [{\"class_name\": \"Dense\", \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}, {\"class_name\": \"Dense\", \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 1, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}], \"build_input_shape\": {\"class_name\": \"__tuple__\", \"items\": [null, 214]}}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 214}}}, \"build_input_shape\": {\"class_name\": \"__tuple__\", \"items\": [null, 214]}, \"is_graph_network\": false, \"keras_version\": \"2.2.4-tf\", \"backend\": \"tensorflow\", \"model_config\": {\"class_name\": \"Sequential\", \"config\": {\"name\": \"sequential\", \"layers\": [{\"class_name\": \"Dense\", \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}, {\"class_name\": \"Dense\", \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 1, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}}], \"build_input_shape\": {\"class_name\": \"__tuple__\", \"items\": [null, 214]}}}, \"training_config\": {\"loss\": \"mean_absolute_error\", \"metrics\": [\"mean_squared_error\"], \"weighted_metrics\": null, \"loss_weights\": null, \"sample_weight_mode\": null, \"optimizer_config\": {\"class_name\": \"Adam\", \"config\": {\"name\": \"Adam\", \"learning_rate\": 0.0003000000142492354, \"decay\": 0.0, \"beta_1\": 0.8999999761581421, \"beta_2\": 0.9990000128746033, \"epsilon\": 1e-07, \"amsgrad\": false}}}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 9
+        local_name: "kernel"
+      }
+      children {
+        node_id: 10
+        local_name: "bias"
+      }
+      children {
+        node_id: 11
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 12
+        local_name: "variables"
+      }
+      children {
+        node_id: 13
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 14
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 50
+        local_name: "__call__"
+      }
+      children {
+        node_id: 51
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Dense\", \"name\": \"dense\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 100, \"activation\": \"relu\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 214}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [null, 214]}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 15
+        local_name: "kernel"
+      }
+      children {
+        node_id: 16
+        local_name: "bias"
+      }
+      children {
+        node_id: 17
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 18
+        local_name: "variables"
+      }
+      children {
+        node_id: 19
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 20
+        local_name: "keras_api"
+      }
+      children {
+        node_id: 52
+        local_name: "__call__"
+      }
+      children {
+        node_id: 53
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      user_object {
+        identifier: "_tf_keras_layer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Dense\", \"name\": \"dense_1\", \"trainable\": true, \"expects_training_arg\": false, \"dtype\": \"float32\", \"batch_input_shape\": null, \"stateful\": false, \"config\": {\"name\": \"dense_1\", \"trainable\": true, \"dtype\": \"float32\", \"units\": 1, \"activation\": \"linear\", \"use_bias\": true, \"kernel_initializer\": {\"class_name\": \"GlorotUniform\", \"config\": {\"seed\": null}}, \"bias_initializer\": {\"class_name\": \"Zeros\", \"config\": {}}, \"kernel_regularizer\": null, \"bias_regularizer\": null, \"activity_regularizer\": null, \"kernel_constraint\": null, \"bias_constraint\": null}, \"input_spec\": {\"class_name\": \"InputSpec\", \"config\": {\"dtype\": null, \"shape\": null, \"ndim\": null, \"max_ndim\": null, \"min_ndim\": 2, \"axes\": {\"-1\": 100}}}, \"build_input_shape\": {\"class_name\": \"TensorShape\", \"items\": [null, 100]}}"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "optimizer"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 9
+        local_name: "0"
+      }
+      children {
+        node_id: 10
+        local_name: "1"
+      }
+      children {
+        node_id: 15
+        local_name: "2"
+      }
+      children {
+        node_id: 16
+        local_name: "3"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 9
+        local_name: "0"
+      }
+      children {
+        node_id: 10
+        local_name: "1"
+      }
+      children {
+        node_id: 15
+        local_name: "2"
+      }
+      children {
+        node_id: 16
+        local_name: "3"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 21
+        local_name: "layers"
+      }
+      children {
+        node_id: 22
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 23
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 24
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 4
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 25
+        local_name: "metrics"
+      }
+      children {
+        node_id: 5
+        local_name: "variables"
+      }
+      children {
+        node_id: 6
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 47
+        local_name: "__call__"
+      }
+      children {
+        node_id: 48
+        local_name: "_default_save_signature"
+      }
+      children {
+        node_id: 49
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 49
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 54
+        local_name: "serving_default"
+      }
+      user_object {
+        identifier: "signature_map"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 214
+          }
+          dim {
+            size: 100
+          }
+        }
+        trainable: true
+        name: "dense/kernel"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 100
+          }
+        }
+        trainable: true
+        name: "dense/bias"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 9
+        local_name: "0"
+      }
+      children {
+        node_id: 10
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 9
+        local_name: "0"
+      }
+      children {
+        node_id: 10
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 26
+        local_name: "layers"
+      }
+      children {
+        node_id: 27
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 28
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 29
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 11
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 30
+        local_name: "metrics"
+      }
+      children {
+        node_id: 12
+        local_name: "variables"
+      }
+      children {
+        node_id: 13
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 50
+        local_name: "__call__"
+      }
+      children {
+        node_id: 51
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 51
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 100
+          }
+          dim {
+            size: 1
+          }
+        }
+        trainable: true
+        name: "dense_1/kernel"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+          dim {
+            size: 1
+          }
+        }
+        trainable: true
+        name: "dense_1/bias"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 15
+        local_name: "0"
+      }
+      children {
+        node_id: 16
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 15
+        local_name: "0"
+      }
+      children {
+        node_id: 16
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 31
+        local_name: "layers"
+      }
+      children {
+        node_id: 32
+        local_name: "layer_regularization_losses"
+      }
+      children {
+        node_id: 33
+        local_name: "non_trainable_variables"
+      }
+      children {
+        node_id: 34
+        local_name: "layer_metrics"
+      }
+      children {
+        node_id: 17
+        local_name: "regularization_losses"
+      }
+      children {
+        node_id: 35
+        local_name: "metrics"
+      }
+      children {
+        node_id: 18
+        local_name: "variables"
+      }
+      children {
+        node_id: 19
+        local_name: "trainable_variables"
+      }
+      children {
+        node_id: 52
+        local_name: "__call__"
+      }
+      children {
+        node_id: 53
+        local_name: "call_and_return_all_conditional_losses"
+      }
+      children {
+        node_id: 53
+        local_name: "call_and_return_conditional_losses"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 1
+        local_name: "0"
+      }
+      children {
+        node_id: 2
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 36
+        local_name: "0"
+      }
+      children {
+        node_id: 37
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 38
+        local_name: "total"
+      }
+      children {
+        node_id: 39
+        local_name: "count"
+      }
+      children {
+        node_id: 40
+        local_name: "variables"
+      }
+      children {
+        node_id: 41
+        local_name: "keras_api"
+      }
+      user_object {
+        identifier: "_tf_keras_metric"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"Mean\", \"name\": \"loss\", \"dtype\": \"float32\", \"config\": {\"name\": \"loss\", \"dtype\": \"float32\"}}"
+      }
+    }
+    nodes {
+      children {
+        node_id: 42
+        local_name: "total"
+      }
+      children {
+        node_id: 43
+        local_name: "count"
+      }
+      children {
+        node_id: 44
+        local_name: "_fn_kwargs"
+      }
+      children {
+        node_id: 45
+        local_name: "variables"
+      }
+      children {
+        node_id: 46
+        local_name: "keras_api"
+      }
+      user_object {
+        identifier: "_tf_keras_metric"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+        metadata: "{\"class_name\": \"MeanMetricWrapper\", \"name\": \"mean_squared_error\", \"dtype\": \"float32\", \"config\": {\"name\": \"mean_squared_error\", \"dtype\": \"float32\", \"fn\": \"mean_squared_error\"}}"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+        }
+        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
+        aggregation: VARIABLE_AGGREGATION_SUM
+        name: "total"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+        }
+        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
+        aggregation: VARIABLE_AGGREGATION_SUM
+        name: "count"
+      }
+    }
+    nodes {
+      children {
+        node_id: 38
+        local_name: "0"
+      }
+      children {
+        node_id: 39
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 40
+        local_name: "variables"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+        }
+        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
+        aggregation: VARIABLE_AGGREGATION_SUM
+        name: "total"
+      }
+    }
+    nodes {
+      variable {
+        dtype: DT_FLOAT
+        shape {
+        }
+        synchronization: VARIABLE_SYNCHRONIZATION_ON_READ
+        aggregation: VARIABLE_AGGREGATION_SUM
+        name: "count"
+      }
+    }
+    nodes {
+      user_object {
+        identifier: "trackable_dict_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 42
+        local_name: "0"
+      }
+      children {
+        node_id: 43
+        local_name: "1"
+      }
+      user_object {
+        identifier: "trackable_list_wrapper"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      children {
+        node_id: 45
+        local_name: "variables"
+      }
+      user_object {
+        identifier: "_generic_user_object"
+        version {
+          producer: 1
+          min_consumer: 1
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_sequential_layer_call_fn_6629"
+        concrete_functions: "__inference_sequential_layer_call_fn_6733"
+        concrete_functions: "__inference_sequential_layer_call_fn_6720"
+        concrete_functions: "__inference_sequential_layer_call_fn_6656"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      bool_value: false
+                    }
+                    values {
+                      none_value {
+                      }
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference__wrapped_model_6528"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  string_value: "args"
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          input_signature {
+            tuple_value {
+              values {
+                tensor_spec_value {
+                  name: "input_1"
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 214
+                    }
+                  }
+                  dtype: DT_INT32
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6689"
+        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6587"
+        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6707"
+        concrete_functions: "__inference_sequential_layer_call_and_return_conditional_losses_6601"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                    values {
+                      string_value: "training"
+                    }
+                    values {
+                      string_value: "mask"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  list_value {
+                    values {
+                      bool_value: false
+                    }
+                    values {
+                      none_value {
+                      }
+                    }
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_dense_layer_call_fn_6754"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_dense_layer_call_and_return_conditional_losses_6745"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_dense_1_layer_call_fn_6773"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      function {
+        concrete_functions: "__inference_dense_1_layer_call_and_return_conditional_losses_6764"
+        function_spec {
+          fullargspec {
+            named_tuple_value {
+              name: "FullArgSpec"
+              values {
+                key: "args"
+                value {
+                  list_value {
+                    values {
+                      string_value: "self"
+                    }
+                    values {
+                      string_value: "inputs"
+                    }
+                  }
+                }
+              }
+              values {
+                key: "varargs"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "varkw"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "defaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlyargs"
+                value {
+                  list_value {
+                  }
+                }
+              }
+              values {
+                key: "kwonlydefaults"
+                value {
+                  none_value {
+                  }
+                }
+              }
+              values {
+                key: "annotations"
+                value {
+                  dict_value {
+                  }
+                }
+              }
+            }
+          }
+          is_method: true
+          input_signature {
+            none_value {
+            }
+          }
+        }
+      }
+    }
+    nodes {
+      bare_concrete_function {
+        concrete_function_name: "__inference_signature_wrapper_6671"
+        argument_keywords: "input_1"
+        allowed_positional_arguments: 1
+      }
+    }
+    concrete_functions {
+      key: "__inference__wrapped_model_6528"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "input_1"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          dict_value {
+            fields {
+              key: "output_1"
+              value {
+                tensor_spec_value {
+                  name: "output_1"
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 1
+                    }
+                  }
+                  dtype: DT_FLOAT
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_dense_1_layer_call_and_return_conditional_losses_6764"
+      value {
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 100
+                      }
+                    }
+                    dtype: DT_FLOAT
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_dense_1_layer_call_fn_6773"
+      value {
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 100
+                      }
+                    }
+                    dtype: DT_FLOAT
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_dense_layer_call_and_return_conditional_losses_6745"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 100
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_dense_layer_call_fn_6754"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 100
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_and_return_conditional_losses_6587"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "input_1"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: true
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_and_return_conditional_losses_6601"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "input_1"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: false
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_and_return_conditional_losses_6689"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: true
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_and_return_conditional_losses_6707"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: false
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tuple_value {
+            values {
+              tensor_spec_value {
+                name: "0"
+                shape {
+                  dim {
+                    size: -1
+                  }
+                  dim {
+                    size: 1
+                  }
+                }
+                dtype: DT_FLOAT
+              }
+            }
+            values {
+              list_value {
+              }
+            }
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_fn_6629"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "input_1"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: true
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_fn_6656"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "input_1"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: false
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_fn_6720"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: true
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_sequential_layer_call_fn_6733"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+                values {
+                  tensor_spec_value {
+                    name: "inputs"
+                    shape {
+                      dim {
+                        size: -1
+                      }
+                      dim {
+                        size: 214
+                      }
+                    }
+                    dtype: DT_INT32
+                  }
+                }
+                values {
+                  bool_value: false
+                }
+                values {
+                  none_value {
+                  }
+                }
+              }
+            }
+            values {
+              dict_value {
+              }
+            }
+          }
+        }
+        output_signature {
+          tensor_spec_value {
+            shape {
+              dim {
+                size: -1
+              }
+              dim {
+                size: 1
+              }
+            }
+            dtype: DT_FLOAT
+          }
+        }
+      }
+    }
+    concrete_functions {
+      key: "__inference_signature_wrapper_6671"
+      value {
+        bound_inputs: 9
+        bound_inputs: 10
+        bound_inputs: 15
+        bound_inputs: 16
+        canonicalized_input_signature {
+          tuple_value {
+            values {
+              tuple_value {
+              }
+            }
+            values {
+              dict_value {
+                fields {
+                  key: "input_1"
+                  value {
+                    tensor_spec_value {
+                      name: "input_1"
+                      shape {
+                        dim {
+                          size: -1
+                        }
+                        dim {
+                          size: 214
+                        }
+                      }
+                      dtype: DT_INT32
+                    }
+                  }
+                }
+              }
+            }
+          }
+        }
+        output_signature {
+          dict_value {
+            fields {
+              key: "output_1"
+              value {
+                tensor_spec_value {
+                  name: "output_1"
+                  shape {
+                    dim {
+                      size: -1
+                    }
+                    dim {
+                      size: 1
+                    }
+                  }
+                  dtype: DT_FLOAT
+                }
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+}
+
diff --git a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001 b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.data-00000-of-00001
new file mode 100644
index 0000000000000000000000000000000000000000..98807d26ee9f40e99330ae6a5d2988c640a320ec
GIT binary patch
literal 88424
zcmWh!c{mqe7bi;yKWmgiMMaWI3*R|s5Q-F$5-o~KCA5f&60#E#rIPGRmV_*G??@_@
zN?B5tR4VOSNZMZSpYzN-&)m89nRCu(tKOK$y|sxzTm1;s6$-;cXFh^VV=}hORHKaO
zW2m$^j(RsVm;%=@45{3PJEC0BeR}}jJTxBT&a8&=Wj5&lt^gaG6^Y1MOO#*vo;mRK
zGVU4kmf!TQm54oH@%Xo76upp*vtq;X!Vbc{?(jpGm)}7=<P#?vX+bLg>Z0i)iY{kn
zk~N>@Xxp1{n7_>vGj;c%=a^Dl_eTpd#ytfyxi#n`X-GcKyNL1kmJ(MDca*tki>Yf5
z;>)eNsB+dBb3>mH*Z2ZlC>xK}@BYJwcMS1qxf4c3X5iili?F*d6q)KsL7;pj$#OE`
ze!iJNMCt|Td{-KsTzr_Jeq*9`(~^qhUnSw`7YQquS(_4~2$Q{zk(N~+*padi=h@hz
zz5ixrIQTZ%X{Ceam&>uoAsKUy=HSaY+4%j7DveFCL79LkTzW*5TYAKUvk`T`RU-4S
zdP5xc?9IVL!R^F6u#iNwU&e8Ndby2(TJ(^HH~!9V<-RG3p__~$K983p|H&CJb9d`8
zZLwo($4Nbd&q^^Q{?bdZ6ljo&ABLbUri7>G>>;(v7a?zpG_^Nsfk~<7NwCW#Iy6fi
zuZ`UbBQe@oVzGw3azTN6T-*SWru{H>%_GQ)Jql4N9>lOj2EV%n!@PVUENkzC>R0kO
z^_Cij=j1}+ld}+EXNg%FcR)W^9S^z0FfPfrxgP<Q^jlwMt=)++m|%Gr8kG%5=YB1+
zaVQ*=8Eq_9J4)dAT<*!icJdSTu`?m4M*s7FR8+YN{`MUdB={zQ$<78)`KiVE{=Nox
z7Z{>Tr2)#cc;fAar#Yp4*GP(m5mwp^alNh!V9ok|`0e%<cJ6pd=1<)Z*9SYmc1VF#
zzxn|uceFFDU$+v^x`p_y^c3^rq$>8mH9!TUAjaj52MKPqgFY!2Xg+%orq9(y`P4;%
zfF?bf8gZM9xwQ!Y+47XLu;<8>Ug&oYhQ(jk<1R5H=3LPU=AgVfaz1y-M?W_~&;51q
z)LnyLl=cNQQW}`n25F`$Qj__c@{oEgZ({1Eh~d2gag=d1CAG6}aC}}W*;T`GA9viW
z74f%4g;opXPyQgsho-{X$vUh=n<uv45X03QbIF~(XUIkKMWC_$8VonJfdA@FnEAc~
zMtbJrwP$<aOn4X62PA;>>lQAt>oX(=6f>HsQQ+1viRk7eQp;vFW@$n`<0|=x@h%R7
zz*9HC(Q*k$;7nY(*p&DUWl$&m3(THyQQEzWt0k+pgPFb#A0^@hp7}C3!(k!0n{<N{
z$oIfgk%{Eaw^VXSZWT3@%?B&><BX+u5`>3U(Xm~Pu%jf1e!9C6tzQeHi~lYXePTW?
z4q3~T6a!P`?g_gK6;Qplp5FDDg?1eb_txwJlx_ASR^y*RPJ1P>tS=+=unbbd#88K+
zrWtb!!Liqvzb!hBuW7nXJhr(JtH=f5EHa(euy<i_XC&@?I-cl<J`;TIlx52w96+hL
z`|!)tWZdpz#jaDhiW&d*;1@kLoZEN-ef8y0e{2!@oybM^&#w6EzgDc0O~I|1o9g11
zpTy%Lm1tR<h^f}MaLKAoh@<DQ@0dtk;ObD;Z~96+HXMW|(-P4ueHAJ^JIIDP6`{zw
z7Q7zakFmeS>0|TrX#H&_O5SL|ajQg0(EeLE(Q+bQ5MPS)JA;xnTk+Lh4&V8tW4oC@
zn;NZ+O|*fZyLSuDXLsOl(K?KiRjN}vHV22V4sy3Q2Vqcc7|sYWrowyo<Fa+KcxCZq
zTr#U3ty^tyv*<x~z95TzKV6!=^H!fV-BN@*3D2~?b;l0|Li~WqEB@&RFZTNhRn|b-
zieHrXhAMPgvcoC~s9kacUtf;LxQ)Yj?MWc3@T&%M1*dS*n<wbcMd0EKmoewSA5Q4V
zDZHMnh<<`%p5NQb+qFmYr&a9g?wc9n$oD|Daq@Vk!8VAME90>2T{bWGIE$-#vK$*k
z9}%4+G1&M+18Wk5L3?!;^gal|d0YD6_Qj?6^WA1Nketl=9*slW?Lp{MuK+K6d{GxC
z;F)iJsJ$u{Q}<**=SovHENFyrHl5BEkND#+$%*_6>pZwvVTi#C9I$o44k*l&6@0GX
zFpx^3VNDuyQ>y@527{rvFai&4I0&KBXFyQlV6DB-TTq)Ei1RGp!?tEM3|a7ruH6`m
zhZh}!9JwI)*!~M-f9;};HiXGF4adBjfIr7-!IP}{_{gw~2yU)N?cWPfZ%!sksEmHY
zvq@+(Ll{?2`VG(O<KW%)KXm--4Y0*GmL{L@$CDFv@n_p*nCKr0dn)Xxc855>%KtY>
zE|5U0_tNZZFDGWZbvb>#-5K>CN8!E$CxL$zN4?+=oRdt076%KQl_1G1xR?%AJ43Ll
z+k)|&Ylk8_c}!p00z&Q{VffR+tfKHyydW!#>-S`lK8yF<?d{E&I(s>2u9%HqB-asT
z15LcN^(-0Z5svM)ir9N&v|jJXWT?3-mhY$pFN5ch@#8VnKUQQ*TXnEoVi|@x?f}sV
zfQ{n<@keK0t<Vk|G#re8iXshm&Yjh$r~erPJRTWg@CP~{)o^pv7*HRPVM0RxLZV9q
zW*`2F^AF6y2Rr8D>u1L>bnqlB=o|pAEyrODX9b+jFH-Lv3X}bF1+_*tD1FBk$$U{h
zIH(F&Vm@=Gb}?$-NeA<}%c+ZACY)UGhj~7D1C#>oX;Gdd7K>aYYqHl>Ka{Y-Ei2qG
zZEPD{v73j@Cymg_dnsy!H$%GcJL<l89e%zsnPy#Hf!eAc8EF$U)YkGvkp(lTdG#9_
z)|5%(-mXALVHx&%VJ35QegYj6ItTA%^urbBpKx~8F52`+94idn&_zNX&zf2@_a4*<
zwsec*$Df+?ldc{<n|umPa30j0_k)A3LTrg;6e_;3!Z$l~>6qtBiA7T(&Y!b~*=cYU
z2Q$~gotQ{mB6AUA_g+D(>348f8i%5bL(tAH5Z4A9;EG!(FtL9J-nhMuf1U6eZGYF`
zs>@eUOJyg{k2r^CH0H1-?+99s=|n}bv$*DX35GX_)y1aGMZp#&K6KR!tj82AlE}vz
z<vn=g$T=Lmrp^map25R<UAUk;AI|qE5U<XgcuPGL@!Tuo->?bH&z#43r`zz^wRHT-
zIN*tVZ~D`21A4Bt#BdQ?s(i8+MGk4=`Q__SM~}t$M8I9)%hBjZGxj&~VEeoX%f3Iy
zaHSZkrgIW!KhHyN^}YDLxB_P`{)8DT+iO)Wsq#|g`|-!`$Mlzw6Fw3XVVnG|aFK^E
zmWBi~Dp|%<p>+d4W!E+~y333|Hr9)dT<k%W`y96Q+{9I7Pl%**6*9ZeVdCNInA_)u
zy^5K5X{Zj9f0mGg_kyr~q!15^ZRQv4P~_d#n(#r~RNi34LafWXfq_?2;lTPbI==Z7
z8f`-MrtfXoUF?m1PEV0!O4A4Do^cMZ=fUiY3Cz3mqBQYz1j*k&ksjFkn7Q#}ERzv(
zl2{k2(V(G?bb+WN4bjOXotB+2xc4s+$t~p6L!<G?JbPND8P0DG9wfJa>hX)0R^VOV
zhg|1I4Vv(22j}KJM3#0-veVqQ({Gg}Bx$o4tKP=&4pD7Hf5I<f`7i`c?4~nU6Lt_~
ze@!}R;1h{ZT}eI4)Y#$g)7i?SeZ=@y8dLh>42c@^m3H@Rp)>c5p`c_-SGUWs<BvBJ
zeOqyS5yT13yLU40g@^HV&onyc+!PuZ5JM`TYJ*L}1R5wDjv}wM;E}fw4fAjk@Yj~m
z!RO+nL?i|KRGwn}q&yZQGSR1NHT7~9<K<ik-y*XO1J(kQ$tjYP|D?#L3{7f$w~sU7
z^cnlqnrg4An<T2^29X+TMQ_)hWySB0rHa*ow5^)b?@1!OPwFvhuNs2W3Z#gu8P8w4
z6#=qw((L3-KS*TdL%w&g2o^N!Q*9+x>gV7IPfSGVrlB%Mxn71pTYCj`DxLwAktB)Z
z`^kFe`Pe9Whdi70yjFCkE4d)z!vr3zBf?!@P*g+~&y*bi@$0GlDz95m5U$F5@2Dfj
zJCpg+wHzKf_7|oHtI{og_lRw-8l6xU3wG1287KETM(oXFTB`J(Vd_0WspdQw>ukk)
z<ptmn*uylWw2^;*M~pXYbAqr+Z>n%XijOc?gw1giam0Q+oeA1(iqR){v{@8Qe-y*f
z(0lah@FGUDXC5<guOxN<a+b^Sh@e644umzUfMXNHskHGR**tzGxi&8gw!j-k$=4Jw
zt0%&o1qx(!aRQZ?W<=jCNQaFRexr9yJy<Rg@ZTOCz@a5>xXJT0o=_H|RV$Yedg>CW
zt4*RG`}0uWd=LD#>V@>;>C6kkFqzZ6jog#hW6ur}Qt~N*v`v0T4=PD8VPon@*^9gM
zh+PrWWTVZOu4(41L_?^>+|j-3j-Wn2PjN2uD+I0kWI?HM7B~%_V>}Y_!Cy9#Jp22C
zG+VbZN*7y*Q#22y|6L_b4T`{2Y7!5{LRjw=3+CH*l0ytf>|#V|*4S~d@<1c;%**BS
z^~9;BvJ?&&WWl<GGvr9kHBwNwoWvIeG9Th)Aw9K-=)PJ-G*=!VBfhgqLBf4-{!~M>
z9-rf?e$ON(-DYIK(3}~+=Lk`^r~#ihKfv%yJ19=dBzbzLIR9Q1Qr~fhG&H3{knb=P
z)Rb&oDz=Wv7#jpl8-7DwffObM<-^TgR>Wjz5rYZI<aOW`NbyM|hUN`q^}1o`o&BD1
zKduN<hs8-MQxCIlCX@aX8H6?JBd6mfQM&jQ*D74hH8_b<OUpi@d6g%nt=qZb>z|2T
z^Dsy*Ng&%7CXmFfUZie8t6*i&9@0Oym*f?nVisTj2hEEr$eO1@B*69qq#NbHdb^`A
z^Th@(D0vlQp7sDbCM1ESR|U9=C@|(?gzJK9U{`Ac%>n_*6dJ^TuXEAia4;Uy=F#g~
zBxt|1L;F9cahq5winL!tqpBMidRd%37`OrDL$9EmU~~rFl!j|w*ODQhZru6fD84g2
zL{kkOVCvhg#CvK14*U*fyEbm-^)-BOiTN2kqQqhB1}B93VRWIk6XJ?{_;j8WUAQ)g
zDwjM%?M;IymUxk?>QbUh4uqmxM>87O+`|jq?r6qeMwhg|T>sS?v{hEcvyb0m@F*^<
zd2$f#YZ7qLq<a`Tr3$k8c$7^KC9~3saJ5MjK0KgB7M^Or;M=Wua!NOTd^v*BCkin8
zn;g8i?S*+R{oIDz>tMfRBZ};6C1)>>XFqID!OgG$)<zoCCOX{0wWagvC1(NZ3TKj7
zlSK4gVT%`L2jgYWxhNmfO3yodLpJLQPRvV2)$Ys4uGoY(2MNUN^1`HtCvdLB==Em?
zKmO`Rh0}?!*P;ev*B#~${>l(!286>;qiy(ZVHSP1vbDz3LWnJj+C#ry?Il|t1aRtQ
z5g=WBr?xtAFI?6%VOm~}L2Iu9*ioKIcI|jejvtVN&ZUVYBlRJ-x%xbjEM7*J^^}rz
zy`PBq!&UT-FNLde1F-L#7vu)7;uQ@=FunX6aOXb?)QoqMxlId+)a^^Kpw*OY@;?aw
zO4M<3=4td$Gb4w-?k3;*Ez!f?iP%(>kXaFxr1C`uC>2FR|KTT~y+NJ((J2KgL$3tu
z{|%CB=dY4`b&|N$NQ_LrIsra3)Dz7ewNPm$fj;x~h;UB_NnI<A#px|v_}@}uIV^%F
zjVIQ2txe^wbk~7S`3NuvH)8%V7qogZ6HfSa!I$iA#&6aFYVD_q-<~GH)Kw2)$MZfY
z$X!XttxiK%&v%?*+#6=|Co%k-)=B#Jj>Uf^<M5IS2m3XgNp5@;xA%Ar^XC38(pwP?
z0w*Ob^-|z0M90FW@V**niF~ri<}Xp6txn|<`pFYtDNN7HBwHNCaNvFcQ+F?#B+VYf
z%aA4%{d^L$Pkq4Fi?ev25gzxBVvVz+X=q`23Y#zV@&Cre<EIB(aA!k2x<p>arRPJi
z{m5LlcX}$mtt-ZLG70ST?uYm}TZ5iAxrPVQw$lqu`=}tl7iY#6Vt7V2`klRpPOc1!
zO!$cw8>+FZuabA@x=8>2cOH}emE!J-Gn6E@k<`Y6XuCEPJ1QUJq4r9=_i_rol$_14
z-_eii?qzf&-~t+7UdtZ-auWCLF2dY#r_lc3UoN&X9OKVShupjZJOedYV11Tf{ptY5
z#E-7$MGKxjaTt#udVznJ2m{~c#A`N<huvK}X?e~oT;@O7B=o&5f4nD)t#B;m)n^|e
z@xv$ixJ&mz=wvtUFSMm?$1mc?Pw(-1WgSjy%j6T^Xz|lTzN6m48|X273uVrg;R^FF
zC}3)6_Cksi=k(#`Plnj@=Qx%0y}_I6Y~~kVID|n9hUw?%7+^0;vCB7J#UBUe^F4dT
zsOs6XsCXof1a18(kT`mtSt<09l<HMMT9F^|krcwkqdZT6N(M-e6~MX1y`UMW1nFy>
zNW+>sus51YCW%|a%B#BM)7bZfk-Nc-*XjWOQ{H4{zX)v{sU=OD8$tYQCam{723je%
zN!Afz>^K_+t_2yOa8!dFNvnpchp!pUmH!}pTY;dfqn#+VT;^PNGR(B;LUiZq3`iOX
zBR7qOQTXj|(rbDNhBH5tn7qxPv|xY;Eq+wp=XMNMi%%rYDlOm~69ev3^x+7~;G6hF
z(!W)RxE{%_ePKkwu9M{=Ufw3dIoXWdwAYN5<S(wv#u0=Z#?YuU%|y3a8h_vU1agI|
zfGL{;6Q`a9pJO9%?s5dE?(+m8m8G>|3GSrP;WJa$16=R$FF~zpF<Dr1mZZ4`5JB`K
z;x#du^fo@PZPpwQeJVpBz5Njc@rJNo=L~0_;s|S=s*}@ku0&{Z1BA-9fI(sl>=Bow
z4ZF^eE(-}d>(C2Qc1w*;+f+_^6(@p5auKPV^q7b*B+Nc-SMq(e0{O8<23WrstTI(6
z!OSs%8TXypYSmI}p0tpL@!>Qg=`&ZU^O6|bDPZTFR?ai^B3AmJ;g*J#Q;oqqAYtC{
zt3F#`rS_J|{=5iW#pLNg#sq2{@R-qjY>9EPb#QfH1<aaoPEgbQw00osH#uN*mwW4z
z4O0&r(()`Fntd#cMD>p4%h&xHoi#FGed0GVL+?IWQ#Ovy=M8AmmtbyszAecwUQcFb
z1;e#9*09}VZ|%o~ohada6{79jx$Aqh$g_9fx!vDaaV7s2<Lu2b+#cU;xF~1{B0p=;
zkM;Ah$uNT~+~`7gPd-S08);(l#>d#A5sWWQ_JD%0D8?`HBcY!+5c|#2G|*uNPG9Q@
zKZj<J*wr)Wn;&Cn=FUQRSofLiuVU#Bb{*{PPABiSbrRoOS+v|ng*_`9hc9iHG1E%)
z+5T0QuvW?%7k+%nTwMx8==D@Vd-VkBkSxict=WnHmdWCg#}hDAT9dDze~~K~Cx8Hp
zdtk1oFL)v)4<Dtrfx^4<0?#f5=5=oYOuRG%CQ{L`y!jH)E&Y)2pqm=0egS1Hpp#{C
zVcox6T2dZ}5jqMO_a>Hk9kmT*=HD{r<4#f~MLjetTU7hEdof<Id_ui+XJ9~pJh%lq
zQK4y9xlOsZ84o8TI2pYaKmR((wNx1J*UGDCV$vTNu9rjS_rb93`~u>H92AD-K~!*X
z?ZbEv-e8?Jd~Xw?_V*CY5{KdGE)K=*DYL?A56*Yc!CjjE=$1MIO=mgb>hX_hOw?nT
zwnK#0Z8-_8U3KX9oFO~gu8|+T<M}^sd+7q5=UDjp0s)02VjZMKI=1HtZkL+i3ac9+
zJ}AZ?a^^^J(N5UVcNlD!??khi9H(dMgX<E~_+4&1Xvw=kvVJ*P8B<NU!}C}&M;bj|
zFUILjMl2Iq2o5t6sMwo+CTGQYB9y6GJFzGcgnjMV_bP65SE(KeEo~tQQ<k$SDgLzT
zvMFs{=Sx>9E787sI}A2DO}YX~$k=<E1p^(kY5vVrt^>SyQ`k$@A1tE@FP_kbN2XN3
z&Y&C04M?cjKAJP-3z0fMg%)foC2B)1^l9@<YF8l6DC}ECCI0p^DWi47dXE=->*Y^k
z=jlg0XU)ZL;)>94-G?rA4j>*+dPw(6Z=(3Yn3a3B0{)6mhGE``#)L&N3vUOb*^h1X
zPK_5GJI$XfP~OhIcuW`{`!@3Mhz;#*)1~jHkFPuT$A_NYRzrGOd3xJ_8Eu}Mc=bt&
z7Aj;UQ{`K>I61?SuHP5ScwbB5^-b=Aajsfj_lN?2elfw+=`M7~$4TtA`zMIlJ8{<P
zN-(wDA5SW#Od}DsDLAaDM0XtLiAzE@nSaHYW8~L!Tc7LGOPU%m&+ZwqDir26pVtMU
ztrbM)P$(50^k%w4tZ3@c6;hXSoJmmEr{?WT>3r8j?9kuMIsQx|%S`f^jW-*}C7X3%
z4(lMSm5@?)0_hIt$Xk+0vK*H|m%J%(llx%6*@&2|b*)|1JV=5j)svW!AWpMhjx#hD
zB{rV_;M~H|TABQj99i)Q;#ZAhnj1}ts_t@9Jk&(&TyK)1<xhxn$!X|neZl#^R3pzo
zl#Wz65O<qlBC2FVj3ZOvltM7s-`Wk$UM+BQ$rlojqO@K!6M`ak3gWMvBX&9>cscbf
zsdF)4bZ4C+L0{yU8wbM4@RwXp+N>RdswKJj$%e!x<s$f$4lzMzJ>dI7WvD7^hD#-{
zV5l+)l1A6rdr}?rRqLU}<pr7U83qmO|3cK%n`FIhA{hL8!}O*|q3gvOflyvtEi<!#
z)7brnXxg6@tZ(H>#?xrhx*{H2uVgSz=IzY=iaPk+G@Ueo3;5itgodz4PP$r@lxK*e
zNf%%H?q4GqJXyd{=^G#va;WBj-*pm}7((QX$1y^#e-}xlghSZ`2VyXA86p&Yxghxn
zf#I_^aOwF4u5Z&gdU;$4@s?~O9?LSBj_krwd_RS&v+M<fjawO^vS46Gl1ZtSJU27;
zBJB4chYGb{NZ!*1W@g<svVJ6=q^g&Z*yrJ-(Z7{jR&E8W2TyQA-;%&HZzVb8r%6CX
zh6;_fskKoPMIXbzOz>l2;!~l^v{uXolQj<@M%Eb`p4D^X7YdIKDesBT(S;yxZb(F5
zjgS{xTVQpg7sNeGBh4ZC47H3WNy>R7!uk%=8u$+6R!h*+&5L3B@G0_v?S^-sUJ`$5
z2old8GQ*}tjOyo9sG7c+417~#Qj5w+qmd(#3mRhjl|I0lNhw5hYdp93VkHO#wJ#Jp
zDbLw#aRE=k02v$_?YnU?@Xxi9e66g6&+EEKhj}45x8DNQ&Z)qJCxH9mJjhD^L*#l}
zK*+qm)<>m`=~&(mbzUOa@cjfQbYN#q-IjVLaWISw=>LIbF;|(SCFUgOYXtbzuY`dg
zvf#P<2eI37i4$6)!?>2sg4ab=AonGQ`F?wJKA0p+dY|M89zH)q>UNLgTD>2Uyw(IJ
zEQN>W@29vCSt+cyO@q3nAx!DEBGTX}gUw>~#Kd8xAZyqU?2Vtn^vYh6VN^`2gp<ia
zfeiX=-A{rK=0G}iX3(M+Cgs+0LglH|a=Aa5y7PCK#W9JDsz?enA6w5j3`7zkmND*#
z9wz0rDMU5?6%02A5R*P>flyru*uO7_OEU(D(mNv%l7C7JqF#V~t2j2!`pP-iye9=8
z3ZQg_Ke4%A4=?R6lF}9bNaMGDkQ40X@GOw1!Dcw^pUouCyh!TSsMf|O>A=1#^~50i
z7<YQ-MZ%r-fjng%sCN$0OIiuT&m_=k`!G!R@FZsW!qg+|Am}|VfZ<2dC{g(chA&%y
z&jd5Fzby>p4)`#Ah85hA=<B0rd!ourq6X|<+H*>!y<C#gcTQDG6M7#_=Sn;F6O-2G
zweca!aKQ8y1a%A>-#=IkCKj`6C#l{h=$i%QJ0gh*+H#ZDj76anf1%^|My_;kKeKgu
zBI)ay0J_(<kocvxWO3zNrpmF9X;xXu_&b@Bua=TDSoI+3^7%*%TIO+96~eSUIt&&M
zjDvvgQW&WcMgx}#j7RQw=vtP-<y;b_i({*yd1yDOyDuP`(@)nPDoljoi6un((;H9<
zzYIxQKR|2bG6Zb+0710}1=mbuXuS9<P<UWRhQia~ut+7T%i6}6Z23k4GRldlk0$x8
zFNr+`(kN%9&rOOx1VJ<QLeRwVwW24kgTLTC6fNb+yidi%eD@YGS$q=wqs~Iz{vWkL
zOH*K3=xi9$>mVNE8kyGUuMlM-N-}QNf#w-g5IUAn+t@V>ZY7Z<=%TM6$jGhMHE{++
z6n`Wp<MX)QKkb5H9d~XfBtX#pq}s8~L&Q#M8A+P_l~5-NP0i2A>hmTLz&(Typ`-e>
zyaa5w6Q-r@v7B>OEum*7lba`Bp_OVi()14~cPt;}sz1=fiAS*R-+r`c?L$e;LOgU@
zm;ajYjZ*pHxM9<4bZ^>;m-#DbG}1}8El9;v|23ohn_=Qp?~E1+Z^_d?pU@$3klgL~
zMlWI?UfS>({TF$o-A-pL8udqvM4!iX@kRJy`a@h`yo?`CAH=}&ViYyqS(_7ckEn_~
z#1~)PFt9NUtE^6<Xtpn{`5DQtwdlovNf+?b@=JI@`#UWUeSzoR>>kC773kgjnZC8C
z!@hZO<oqZ{0wS&G+=2Y%B|lJ6<^<aPy@|3u5x7X?ES5YAz(@BaScj7nc~>Dn?96Pz
zJ(pi#^@?G%zVn$Zd>={524%qSPzi0kbC3VE--0ijdWX=^6x6CLz|HTsk7`(_XtE%S
z*9mS!q2?02JXnTdRgKs^_Yn?62hiByM4V1v;#I##7-2CMDn6W`D|QL-QHQfoQ=^)i
z{JX&j9HKEZy#lY)?cptrqhN(-2Od9`gx$NOF)X`)DLir=zE;Q6#nMiw`)oIkwkv#>
zln8cR7ja6j9o`!whVR#`!=BJ0@ChD<isNB;^oSu`(wz$awlnbd0V6yal82=12JGu|
zV?8E2a{Fh7B6pR=*_CqWF#a33oSA|}DTgs7R|S8LGhsZ`-;%EE0Z^Oq08%&gfsm<I
zO_^5%e4Bg#75&!0SQR6@5Iz=5oW~I3a>{HdpG1u>n!<%PQB0X|7^CMd#a*!)ka$xM
z&n0)kr0#q&C+!sIXfMM_!%4I%ZxfEwtp;w(Yz)W?#{iS3pw`0DeTPor{7qt1WkC<g
ztr~qt*){BTl_mb=am31hE}A?@WYYVt3C<elalYkSP%M8A4Y*&<JQ-5M$^Prm;CdK2
z{wRm}7jdul<)Y14#&*Kb*?+)FkPd!#ci|G_0npjt0Wn7L*lU<T-47SimX!0PbK59K
zR4Bs3tG@!rcRgTqx|~b?;)v^oL~-BwLl`-Hwbo+uWn9>;fLdGs!}|_lRQy*p&J0P#
zWk-roF47yT6{<K#`RVxI*mIcqNgYdbZ1KiT6&%!>L645k`tKrDkTu>!9si8Q?Ay|`
z=U67%9yvg_#|mSc?*BE}WoYVu37>IM=&h)Q4r0Q%Kkz*EOrDMt{N1qQ`87QKHVx04
zorQ(5V^Du@Htw^YkJ1t=u{7=y&if%lH@3(zEx)efy-_T8F^j>R&wtTq_HHaSDM6Fv
zRY>*1sE(uDDE>=^4NX(=-3v9WON}OXE8I|Koh^Ev6UN^*voXIn7%j%%=a#>|h=Lr3
z7r!~eS&r)RY{vxRb}1UoM20{?qXyNyuS2w>0=^y+@Ztwg@GZ(|0!Kq0mxoQl_D(tU
z^I)+x^9tyO55SVmt1u^xALUm~@e+Dr$?;G4cv=W*?dbvYf5(}+-QohzsuXH)Yc)Od
zR1-t**kFeK0GYEx45ig=u&-SK*B{M;stbNN^2v=a^|S(<zZ`chvO=xSLpY~97{A;1
zLsWnhBY4mTrhbQE1#JZDV}Wol<SaaHpM&Ss|A5j<dwOx`7}gey!(p9BXv^4$X0l@J
zURMu%Y;}toHE*OFH>i!?2YeW~A7}KBz@P2wM*RVZ&EJkg<VQEuKPRAD)&}FPUKN~q
zDG&x&QBGOY8-E!o<Cz{0Sa*IQ*1Htak^VLGLs=k_r-9(MV;sgXLsaCD4mL>W;)4Z#
zsOT!nzj=8Qqh@=M13wZ_z&(XyCPC=k5R4x-xxxCRJvh_o4&2#40YlU~Aw5=_m0uEr
zDG&@(mOJC$yE*t;dn%id-$SnSm!qy%D6XCQ2P*e$q;@?#*l$0Ay&FckdRG?Lw5-O-
zEiyRO@I0C4e2NjQoC|AxpMlJ)aM+;bgEudh^3s~2VAsqt!bBMBKl~-C``&`On>znl
zaV!p&yyT7;)xh4Hv2bzTGH6<;1X7ul?7m}!ySMFQqIJs=pP6GAFNT6ut7y<$HGDKx
z0S|2Auz5~7KJREIX|^_q>{)zjE{1slGB`6o8u!oi<j<xZz)z2Cu&MDs+Tm@F+X||2
z%6Kyj`SFtIe>h9N-aC$Oe2eh+N@-mGXD?P;C(yl_#yDkEGv<jmER8*ZL*zCx4)>Ao
zK7hSF>%rIVJPvi|;Owb-SSNM}GnI0Q{h4f{FSHRq%&#MRcizQ#_lcA|4MF7>c^Iy(
zf>p}0<gc$PMy&q@t=u%!`xSvEzvoiBmPx2Npn?YD{LnNc80To*MHMM=+HpS*U)@;E
z*N0u_R`&jbWerBuTQ?ne>(eAspTWzwA7GN`Lb~5;9)D-dZk#N;2Lm`u%vkM@rr?fe
zLr#&;TIn=WArbH0o`UaxZ^t@=si-yTWBK3_Lbd7|aoH?HCogw8(z~5zZ!)HD6#wJx
zr)lFqxkYej??rGuDM6Em)$mqW5Z`Rr3aOWZP)h1H{gq>i<^fYN@0U6jtD4~xjzRh6
zGq_{oLVV7pqm9-eF`2&}O}`6aXI=#s(ry^ByMnLPk+)5iK=BL3_`TPMQ<<fP+wB-W
zG4&5L-Vspe4|{0n<a2l^Z3w($u3*75UsN>D07<SDwrXs_mVv3T^rk=6&Rl>uM|~ug
zqS7erTtsd}W#ORXWK3VY73Uv4jN^P&a9Xwnuj=cJtIYLjgmft~nj7(pdKgOFn}VmK
zLh(iVSpM~pH!i$>fE=8mgwwSy;+i@|cG|f@^zZY>D-+IQ@beg4;3<VtH#snT_nfr%
zjN|>M=MWRgJK&xv!<YRvM%6-9F6(GKm!_l4-rrt=!K^g@)ao4nT7MRso0N<APZy2Y
zWE{6P7L|QVVcFr&FzI$DR7T0*)2UZbx@-n&RL8QH{MMrW&nBqUNWppgD`Cz!G0c&Z
zA<DC&asBX1YI3HHIC#|)kJ-~PYHb$pxxbG48Zs6C-HgH_r7t*(zksH8!?+peVxD;c
z4r{gJln*5+BsGS0*Sv=LP2~uuEAex81y+22fqpqZai{zmO!K~sfw%u-vmL(Tfp`D0
z*7t8<>xyfL%TIw*;TNhfrytMP{>Gyp@8BU_3HI${7Ca0~a7#=vUS5%d=}RZ`Bil=H
zMY9qs__c<rW<F-rE8gSxBY$wC^G%#<AkKaXIYHj+)uWnu6a{n2h}DMy{8ivVizd9n
z8nrg8jJbsI?`!G4=~*b(=E|r{t-{`CU3lWrd(x)x9p&X3F(yHb4IOxmg5cM<#7LDo
zKW)RDoh|&iVMiwPeLqSbSwP;EyyIriDM7Z&0Z%Q}rPikXD3JexZ|o=GZy`szdE;1i
z?(t&ubZkQ#$3W_QvkbpZ9K$~S(u;wsCD~1d-*F2*$5#Sx+_6rIwOO2pGI5U~#P=Ys
zeNhBIBj)4a;<4<r!Dx^>-NpFLo{hHkC-8%H6#f3?F-HSM*jJ;zvFuGAw2XD5^@fS$
zib(=rk!(O;znV#fUii_+H&f}_k-L2M%1!iqY$XxdzlV-I-9TT?cBWFj)-1~S(|dD-
zNmgbSJ+Wa0v6^X%Z5C(AQ(s|v6h4h|qiR(8xFY#%GlMQ0K26(}AEL9A6lwM1Ei`p!
z8@!HDVrvqnk>K?@^mp7koFu*hgcXBnZICi0Y9e(0?`veWg(1H<-kpux7DFt_1ys5}
zg^mb~N0&+VlrH^20{7d{qb-W8yj2A$cQ1v6j0(bQ`B9Orq5P18J-L-(Mt^U0r(@sF
zqjqBPG;pmhIjC6*;xBh_!yZd;N%43(WBf|y`6hqrc55SlJz@{h$k~Z<b7!%O3pMxy
zzNRdE%|V_^p>a0J#PekuakM>5XE;q^{W85tTflDWyeE^C-ZP`BC(Nm!DwHi(jHOCr
zh3LVRdB8U`vLsrQ@9I;nlU;e3Mu?=+U3w)%q+FN(^;L%62Q$_*XB=%2nn1l`H{h+)
znq)?^G##9-N0RsaBNsPY5XH$ZMDln$cX(lA?Yxj>_~y=Jfo)9`^fuQ+{fF(4<2?uF
zc?(eQNk8d~i6U%!D38y(n4nL+e0SFuNSr#8PTg{xtr<!%PCw~SejL$Y{<>YLjV`-D
z%LFOlmL-96LlxMLQNIw?SO*EO8i<k67rdm<$UV!r3O1X&;bn4@;8JHNk=6cziD1rG
zrF6pqD8Ma3ek7`CDWCR!Daed!_(z}rN4w_{_VesFL{#n-ym_#Pm>hQ`Gv(&v#)@1r
zFS-x5EB0`IBmbkbZ{{)<ZGZ8Uurwd<@d+3D&BBGfji6ej%tps8VtZEzvry?qCEm?o
zx81a6eNP$G8MdFIq5EXnN#D+p4}XkcS=ThU^8EnYqWOS7mC#7=(&pNTDU1Ls6Cl*I
zQy}Pb<*St|$b<9{Vm4C5-amMgS33HIKk;}!eK1;&KQB?M>u7TZqaU%fD(5YnOPIqp
zx~Y-QO@+)7?I#$%U<)1Tyhy*U6{6Mj82`&kg<kc(KxUZbP;L2NWUKaRwEdYw=9fv*
zN3J1ss>CZ2Sn!D~oh-y#=0?#EQ#aFYeOY>O_yTofV{uS2nCfgg4^5f!?5a3rnm(nL
z8uoh9!htX3<jfIrT>TR{n6;5=@?vcE`a>*xu#JfHThg!lig9uE0Cd`3psERn=!vbz
z=xEcUFJ{XUyPc7|qE{7B$^C<?;zDV1l0KR??4pci<S6#KNG;P>@&1o4(*3)&N#*)f
zswf{r+n<{X_SM?cnJSZMa7ql-Ta-t&8`H>x@$&468_M*8uL}F|tO9$*ynse8>w(|P
zhq=MdOt5m5VUOHVV)t(qW&`Gapu_G_v`dA@K<SzE<#|i`cZ@fUdgIPION6pR+6B~n
zP9}9pRig`PBj~|Bm#K<%5U#v%jhq?NPExg>V}RGF!~1tN-OEix`>)nCCO4k{TP8wg
zt<1*7<D99E(G1Y4x=3#69-#LU7O^$SrBrsB67ME+huT>Nv)8AN;qzun^G5?^`DxKl
zsP35AG`#l*{pNm$Zu<D0c7`_7s3wl?t~$m?Cy(VLqZRl~bv?Y3bt}F1a3ialF@_Jw
zw;(^VB3Vt1V){;fEN_|6MpY83siAT%`=6XNf2_Qk_J>_%e;aZ9HydSMes?1kI`<q?
zQ^sPze+qm#9ihrnGW@h}qWqj}*}CG%;e0%PrANd%Q1$2tZHZjV?yywi|Hju)GY?sQ
zm*zw2J|>@X6($h5SeCbNFQ?~%yx920&$L40Bfa`=BHzAsBLCf!rw$L5SfzU@ti-Ch
z?7kdX_Unc&I(t<Wj$A(vT0-Ug%4c@$z<F8L<4Y`m|Cj__bt#ws)btKRk4x~H-zM@Y
zullK%%Ld%L%a6?uuc19d-{^w9WBByd^)$VwkX9KbVHOJUnUk*4<xzFC;6*AM<tAG<
zr$vQtI{u7qyF8Uoew9ahtK#^<#kXnSu2}xVr^|4mrk=`>D7?9{m9CLh;-`kcq<60C
z@)2>l)UR2bKeOUC6|R%!ul^@q_vZRF`rhCcz1tzp$3DM9tMpoENz+X_*Xjf9zTZie
zP!ew#exwg_68P@Dg|uvEH{9NHhs|?pq<1nK>GVba@f#DR_<77eV(V8yukIP5>x`$c
zVwX(VwH?3c?A6k|c}*_%-ns?q4sEo?<}021?lv8(FUOys&GBWn^J!-505uE{!LO~4
z=u7KJ*1S@ce;G}w*1RTKs+&&#=2uYX+y=NldKS7rzf!j1Cz>dA)B8>z=;P&ORNioO
zkH%8^V*7oxAscz&TM>BA&J5FvCHVb!zrg%6;kbWwEAgEEk8QuTinVTNq?^2dQRj?C
zzN$NjRUA+0uC#W#xIvM3`cX|ulo<7W_?ODmKcJUGm3d~3C_m3pj2C-5ssdfQM(u({
z`87i7{2#fEe3_4Oo$J)AyiRO4{pB{cPIg5k^eXnDShphozHK)zUMP<1o@LSfGwZnF
z1!oy<iVoQyya<|X<iY0xPn`0-U|HBnSlAW{!NOO_`ZEcju%{W!C-y-f$>2g)pMoaY
zDhN_cCJ|>w^$)d$;CbXNI1T8L)i2sPW}y)&t?LKP{dIz6Vnxj9SFcD;^A?hxs>?Lk
zZ-S=kUodj97`CiYBod$efcY{^oKIU3h4(xJ6hy(m8c$;PUWm~&m1TN{I3l|35yVt}
z0K2I&+^5*9M7714>;EMWk0uvFmQw|p_@V|h_cYZu{D}kAYJI5Fk{~(bYDV>%QH{7S
zTTnWwij=vmAbl?$b6N_8Wc(O4((qwEp^j@vZ=3?7S(VIm>1cqfWB^x?RRX<cmYlxV
zI1JcOMf$(jk^Y10!N6xdxpSxngd#_K$Vy3CH-Q1USu>dO;VVQzU5IuJk3Q@0i22kw
z2F=~R!hlXLC!R?mNbM-o@L?y3_f;p4MlQnQ#iBUr0>`b)ItQ-H?3pBk^PJX$Gw|Wf
zDRMpX4IK6Ag`dB^!r!V?uH?o9-c<0M%zNm^g-qEEn(e!okIf99+4hIL>p4Mm_ui{<
zx#$J%k*cG!&{Y2Vr}M;JTL@Gwo3O`4ju^ib;Xh8R25FyE_TdQuUViOG!V~22KP7EE
zv?qY1H4otSW)XZpX*^C}eStqTu84X5TZk-NW=wiIgmL1rT=*sX-8jwRDzRU23vM)7
z*G#I^rJAO`JhvhX)?C`gh_{IukH0PheO^6alN3kt8}AacF>(}I6Cr7l3%U7n3b82;
zV2rGP!=TALV(9gT*;+bG5>21NM<rzxxP%cV$^iQ3i_)JTw^2t^QB=E-f^m~ISnqcZ
zXfgR)?c$|#n9s0;-H|QIublgav3n|wH>0Y-{i`&zC2NwEuHsN@Z-r)&!{kcRN5+1|
zOb8mAK=K9?NQhn<{1eEb`MP$d`erge@Ysxv-1e4!ikiVDe)-C5Sty4|S;tA<dTk2f
zlklnA3gXiAkz`G{O=i7ZLo*MVFh8D7LCH5k%+Z63_<Lm>7|Gc&78T`;@HQcc9ree~
zNE*$3Ih|&fE{T9Os~>@1rzd>veMN6X1fas?b1?iI`I$kxadU;+s9*msH01@b>s&|Z
zHK#Q2OLV~dlU%UnRXVpJW+i90JOTt7gPeog3)HLM&ia%H89%Gr3B{MwYdo99u&b$p
zQ=Q3RM#XoC5g8A5U(+#eQ#$4JdE=?;W#IOxXX@UW6};&m1#Gf$#c|wTP~3WiO1L`U
zfc{|yuSUQEsad0#F^eAEJqFLce*?dZKXJ_DlfbQh24*uevDe`ey)n~)_4(*S9&6f-
z;>=UfcVCA(A6DekmEY1`lG^wodlJ7ndC@3l5}@o{c@*1N10J6LYMVVa;D`8Ir2B^m
z%Pl_*BdZ1kAA{w<y+EEUiFsP9dN~l6CLV+E5x|`5Cupi$KJK&n%G`Uu9A{ar196!v
z^p(^*&>5%3?*1c-L;jOkpK}4+x7;-_E?S25%$S6B`(!|L*AQrLh(QgprR1_&7u^5y
znt6BoI3Yq!blWc-V6^lQN;E+ye+Xo)$8+OfB(o0FzjCvuk3h7~RJLnxJ;=YG#WtUO
z!TFAPMMM%WVa01H64uua8<{<VlT00FGT{+wTYiJDSEOJ}O&_B&{v%mE9LBiy{o#H!
zkJiYz8lc_A_$}|XK<H);8B+8Ubf#9&&xJ1R_`xcebkGLwpK5`qPYiq4t_Z#=h!Byf
z8klD{gC8@&m9g0{8(Qv)0`vS4SU)?7{h|Z#ukwN*GD{2<v5oQVSp`vgS4l_5XJ*me
z$8@!?IB3V2!z#x_`Zn=46+BU7-OTf0L)8fB-`CDx*PBjfkDJYwW~MUU!*L+6T7(mi
zbPJrX%JV_#5g2Wh4^LOhppl;`v&|urjCuJO(i3v1%$7^AxxPSf?qd_b`|u>TKX8D$
z+m-U>vlYp^zxMp(3q2&*{Ups+5n;OJ4x#z#A7EoSljQf`fmF9S@L2m8Q@Cy-PMq}t
z&5m}1s=F-cf1HTL>SHj*dNfOM;~FR~r0~2+8l`8?M@U|S+btYX?&v&Xk<$#;JtYW6
z3$XF~ds_7(zs70z4IFY@gImUzLA0+fN@RBcmJ6dya)luKL=$&8x*bv;h4Io$Z^EPR
z|3Gc-3+DCj8m>U6i8!y@M6Pe1iH?D7;BKdg3AG%Z7F&Sx2MCk8mvT1SKZ1z)e6+e}
zf=_*e$h#aBRMZ40nI}QEmG~0N2LrWT&GT?`+F`1<`a1lbm%!{@afYad9tX9DNnGMI
z4f;U;In;bKz_d-8XxJ75GLD^e>$XU!a*xDQ&sX6a3nirSCA4Smc-ofagVj=owC<y$
zK&-$BADpfRvDrD)XWDUCY08ZH1WUQLZ41cMPqL`|Yz=UO>(QeqkmLy+B-aXN(P-gX
zV%FhD=WDjo!=oCy-*z*)QsFdncC0im$)3m^k1C>yMZ)Nm9l|*Ow+MHD7@c$UIgyF3
zC+pT7;YW6skYvda_~b2xniK8{mI^K8buK-CmYyLv(lQ49>IQ2=|N92FZcL-Xq6M7M
zEk`iD6pyWztKe4eDN>XpLcU#l#SY~?!qSm*L{ZKc4%p2lb`wttHcQmvj-|Iq-WW5e
zlQ*T6M{4nsc`DhIH^Th)Rs_9T;)%Vt71{mb2k?=xBt3IJ*h-0``n+mp^I@UUG)V{{
zXF4HVPlr$Ys)A+y&EPEG3|pVd@Yh54!L6s3kn*;gd|9!VyOJ--XGw1W4WU-@bnD~V
zei3)Lqjmy<G=rewyCppwl#XBCA1CR@mlC6UOW?f!FS;+(f{ZZ}&`0a;aUwh3V&05N
zILGfa=KoY?#)SNZhcH1<zxx(ZUz<hK_Y`8bni|fr&0vQ9c7l^t9<yiOE>dm$nx-8M
zhAU-pTwdG;epZABZREbt8FMD^52n6{)7#46S)~o=9k^So^hXG{JT|BAPCSDiy}9IM
z;|-J$guv~@*Mk3|E1<RBm+EyYqqoI&)Yo9K@O==RRpg-jr49!C`wR=M$KfOS&B)e<
zFlWN&ur<rt;E$pNuKZJuS0wGR0Y@`CGV}4)KBZAkcMD^1p#{#`5vU)$1xIbx;lg`4
ztk@HC{FoV!mM@OcGh;)!cMoFW{+wv^Eb<d*rR9_IC{-+)y@v6>`x&~rmtw#T6RNye
zhz(WEfGM9Q;O`@PXfGAXlpmW2iw7^jGqG}5G(8$)ytFawmIArUb~B0&<8XbnBmYh7
zAntWN1MU9>z>OoT;d5X)_>V22dHs6)e~waog6;_Po*##%#!|RNRi55&euQUse#MR1
z<EYwURkUAF1=`}%*vMDbbYs9ETo_q|FMW2SW9UZOb3Cl}N31Bb!!{U&N4)W0&O>HF
zDud2bf7Aam0N#d9#%Y2We5mq>obDEb-xhgbyL*^fmYcyVn5l3L-@lQmhj+oiV>y~P
zj6_Uj7TP~i<K8doVdQVkhS%>CjbAJu%>d=f;dZ);OR&g>-c36|{!$13XxUa&Jf@F#
z_T7W=aW`N~+6oNl9>smrC*X`jKS5Du0)$KTgNl_k``lEHcuDHl{qRpAf-PgQcl7@?
z@IDGt3ff`cf*>k1PXrvZZAbG>*$h4;qx@|g{48n**V`viXx){XVpTOX5fZ`tc~00l
zql>vNs>*+twWA+58^QfuZgkczNt`?ClXt1|!-4c)Fmr_?{ytz#|J`~I^FEvgclRlf
zVs3|z&;7?6oIZ-{OSdsM-CJSXIBRUSDui^2ZkS>^gME9+mhLulrv1!vym_P(_IRG<
zzu33Xpf?f3MZE!v7b^4bEOcr3pa(x9t%jw$_ri$vEqI%p1W`+rdCQ&;%nFT^+Ilqu
z?$wINpu6Wf+^8~v^x843rNO8l&PtZ#Y^cFsc5y)V#DT$we|(*Y7s#!zM`^Fm+*XUD
z)MT0frB9!u!-7@tCVv9%_fCP+|Ba^#d2hK1hEwRi8P`WM@M3gPc^UEjG?DD6wWJMO
zC5il1F`8i?N754xk^v!m`pQur4z*6D58~c(O@7A&ip$NYhCq+Ho{FV`t8%$$Ia6FU
znwc#wkO!|XmZbk`3So;vnA}ed@Y_>`bf_y*{RO|_twV8bxN8P!ud$|L0so`uOyjBg
z+Aw@Fha@D4QmKR@MRE4CmV^eB(!Xdfjha*{X+lC0DhVlbMJhz(>}PFh5~Wm1lO%;w
zDGHT(_xsh)2j}c_thMgvy05if-1`;4SLYD!$vh*xE#{g#B-Ym+m~F?y<pyktb2jdr
za+Rz%euRU)2a%IE1~c80!}*08BRSlkg$@r7pw-Q%C6AxZXPFO{u#u+L`~n+qY|Utr
z{k>+)mL|G@@nI?3{%;~l88cmW;_K{^2|pf_@A+bno|%)}KaG5)#uu#fIz$#9yG+>r
z;V>~_JnL~-iO!d*$RNIg`<m>`s_wMo;!C}Vjej)e{!?bpdp0nu_8g2_oXGw3Rc2on
zt%XvX@hm$NnDvT{Z1(=kU=mluE|>+P<f=9EKc&ika&oNMVKO*sbm7JF*Qh=724Jfa
zi!SnpJJ00FXU)s}*VLonb4rsj?LuS?YcU~PLRzYyaISXu*|)dn_`KoD{If!L7@Vk2
zUZi}4J40tOkIlMduf{#p9JU_yKg4tK$3;GOu`V(Bk;H2Z&LzLE41=l<S|me@liB-p
zaGSk0W7+qUSa(c^++HHj8dJ9txz<}H^PVSy<EjJ_IdwU1dR&b5BMpi7^bgS2G!u;G
z?+|XNFJ!&5it!ugNcK*df$NL<!t!7Za^E%zzw2m`h=);pKlR0EWqTE}9;=ecXNGV~
zN4YY+R~ndI?~4D@K67`v4uSoKX|OZy4d~pFBhN0JBzm2WWYt$AZm11qJNByxKA6fB
z2VR18>o{WG^OEhJdW0|aFlW7%#t@}OZ>$<3A!Ge^VC}XTb|WVqoYRK!qd)rK{qqU<
zY1>~&Is66g76!3t``kfs+XHUv>)CK|O(@q?c#l0(Ok;*`3;5_kgXmMwQrWxzsI14m
z3IY#KM0uGq>pcD*{<QXngVws-hmU4N|E~>p1oUENozZaiqcYj{y$_kK5&@eHZIIYJ
z=3ibmhI-qnn7?NrNz5KWW~o)ehLi@_tn5Pip7djCCmqB+x(3;JVKDix@GU8tb{>8X
zb%9paz<U?x%BDw|!G*^P#O9wP!efqj>w1#T?;IPTDbBrPXLFlsE`kkJVBT>aWI_K~
zP^M-@KCNDlo!i2=C1=vvX$xKQ;;Y!VT|bw+%~->aPdE?tQ?7D)SR`B7vW?YO+K7D8
zNX+~=hPeFKm&6wCWMk5kxeZU4G_iIv8Foy87>Qjx+jlA?YL_=z?VAeu(y>H-)kM;z
zDRQ5VGg;AOFEW48e*Q#o15A|V!@$`xeE#wThWj5S<*iAw`3p2jVV4m;%y<aj3w0Pv
zUQRA%zX4g{UwHiG3~*~t@|_idFeG&@X{?z@t}wCNI#HbkrOhE-5Aq;t$A0b}871zC
zLuJ$MjwjoTrlZEbJ*>O95m&uC$9Ebmgi#ml`MBwRIJ=(#(>!j-^3~<=kN*Vr^^zmo
za6`(f9TR1$Ca-Wxsy=J>4@Z0RzHFq|Up%<pl&S4)<JN|Sqs#yI>&oGH^otv&caLVN
zhgxB0)nW2`bT&rs$;aJ#&bVaZ30!A9fUOA_z(zK`!+(W@{S(*HabCfo*58ONT;NPP
z^n)OLnlm#xV9%N@3b+kvL)b3!JJ47SWSRXtyyG>TSe@Ap{=reiV=iTD#tnh_!X@s;
zy&K^AWfjM}|AoMj=HmLj4B9`Q;{1P1X1fhlpe9O#)eKZ)e<E{bHjfz`*&RVHMb9Vy
z=DCQ;pEYwm^#gY)_hoBL-r&%}2cRu~iyJC_=FgOPlPRmGlfs-J7(DreH0PNmw(dT`
zPYB3hz3N5_ZO+Cp{De84IyxV7t=>u3saddDJ$Jaz+QY&2<X2hfO^)Ph)iEPY8!|*|
zG0_~X18tjwShMy9e%VPK_Icz;zRc|ktM^WodHeKZT30xs&EYxroK*zd=5Nrhkw+|b
ze#7>|=is1z12kzoh0s1pWM%KUNZzl&XZ6nT;KpMpe4z|l)d?a`@Ryey{tt6s`w3Ir
zCGb3L1F?{c6xY?w7}LP9@Ku?(_Um?;t4T6$9k5o~7WtPo|5|{XTVBZyx3mdqk-kLz
zTrjNcxFciYKHZo5$h9fmz=X*Q;ONvs&LOdlFETL@QgkP?znku%;SVKJ5|Gbb=v6D6
zGxLM!%c)p1Cjy^N*bG|VPr{k_gP;-n6BnDU2D#`^Hm>~$wre`!!bwkH%HaF3s@(!D
zHhdL`R~cEn$&Qo_oeK*`e#9m9r9zyF4=qT&CM54QAU{Wqg}MRv$d<%fF}p5epC)F(
zjMqNURCNYof4s()z3GC-x)^e{hC=JDi?H760^e!90;@yBUP8b!nCQ9$w`NX<F%#7U
zlU}L9nhj6L+INFUx7P<O*c(kHBi71F`e_I)Q<cbS{bH`_;9w*pOSyLu3u#CB1mavR
z@)TotfLpv8T{0pJV$RJb)_u>Bs&64=#;(^)Z`To`Xpuq`E(Cz<)lcx_(O7caKMrC~
zP9)zR^(KQa48!%$Ldb%~K4i>^c(QBA2;uP3<Mc=OUNW&Y8octigUgN}5*1y_4)=Ku
zXCg`=b6^^&xR}Q*$7&Jg=|h&^&cpbd-Eg8(jeOfXh184O_s14BqH|q?$inUlANGzT
zUt627xl@_s=O^O}kFzi{RE>P{av-HcS_JnU#|anP2M^m8!)eXea5E)|u2TyJgIP<+
z2!(j?NInFg8l>cYLmV*-olaw7b`ck;A?pb}L&jX-spa7UYEI&WpD|-eTmBOmb~;y>
zKJS;b{7yT&c3FX>)t>~*&?GW?!z|M0=K|sGFo9jXr$P=u06BX21WEBTCW-g^k`ua0
z)bLRZ*goz}4%R;qYHL;t{_k>xKC8m%(9Py#@+NoT^~WDDEA0l9MV%w7hXx4qI$Kby
z<S_AUN#YdtJCahfRpfh;H92XPNs5jX!@IG;r0Ij9m_t5><0Y5y;p@TV>j+aa+sl;G
z*gS*mi(LW<=~?VYP$YOSvL|swhrYiaLNa{xiS7V<a__Z^;I~16jB#-%mH$~2m1VO@
ziLdA=`Rz^odi5d0zc~=y9mD9X@{OdA+%*oDIFqw)0>NR@7kD`)6)Zov!5qs)q}9}l
zOv<zq&&&z<AgB)Fdm?ztf3LVtjTP)p=W5cha0PLEp9z~JBiW<U<wPNB18msR7tH4`
z=Dt`sgSBxpw1&Eq?tL%dlJyW0@7R~vR!osTNm@vvUuKht83uUz$S~5c_zaut(nul>
z+-C>RR}*!$a`wAvF}ancORv=q6|PP?#dge*l3wdwU{G=yH~K*k>mQs&?$u1T+gDry
z-*>x_%ljHQBb6j*UJ*_zbTXl=`ye#9l#zKEtt86!7-;KT2}iU-$n5p~gdLG1an9TV
zMs)Sa@dRg<Wn2!qxeCN-$!wN<av2nP`QX{Ac>K5E9_M&Om;a~XfT7#_5VC8yUG|Vh
zs4hCj?HiJa)jOW@XRcd;=e-@6-n5TdO?5(@B{kUCevzog{(#V^z4W_XAXM(WKp!g_
zW7PUY$Y0n6%jfjR@k;%OwlI{X<c9L@DSaSjX<zzje>!Q4`2z8`GD=$8tKrv`G5B%o
zQ1IvpMwy)0^|4mNptc?GyCi~9ybCRB|3c>NHre>4k!;}ZiP+|y2-kEfQCakxRKAJD
zwv4CTn7Lc9F@6@Fd%m88%}RiM9Zk@G{y;LLj~}0KYBK-LX*p5T97T4FdIAnVPmoVO
zNw~5lm!v9rvlqVtnbSsR@Jm_+ind4iy4^Bn2;X4Ca<Nx5kH<eN0sI3DNm$@K@_a)l
zyvtT5tv*up9lIB_CF|ix>NwK7E(1?Gs1ZS8fN5jB=*AF5Qa$`U?w3Y0@7ptB&kQ3P
zGoBNfRDBY7L5FM+9Uzg(X>ckjn={T#XOCp1V5QK5J*fp)p6|`u4|~OVZCnUj9jZ`%
zy2$=&_7glmm&u&|C=;d0Vb~Tk53Uje((^T)xqRG-<*5p=EYX4X-82d`)n||gxo1$-
zDT~gXhNQ(k1f%VvVX{g-b|eJC(LcdtxbqP#{ZfgUz09~1(|L6Bw}DBf+4y{9HE1f$
z2Wxj5^gN`>S4!8ws!~njG&@2lO&vs=JP*U-)+bzfN^cS}?gY6X9?Z|I)Fy44-{6sj
z=g6tr-;mNd7FON)Dr;F9ha-1x!l{c|WZf0lrKKO5A?eIk(28t^HBC36_~QWD=95gm
zRjwl^<{!kSp08xUv@9??bdJz-+gOQ1G+ESpBcX5+<7{?fuMykqw(ij-eO(X1{38NJ
z?=B~)p9GTLUdQ*`uZG`4%}`o_T<I8Zd}nAP_>WU3&lYBKjl+}$(*r52dcz?>EA=Ha
zE;whmPwto?>lcdG!oAsz$=6|89}}TzWhN><d;>p5-7l$W(W4nN#Tlm8IBHiMEV!#t
zs_!(Lj+3d;_4y-ctzio>EgeI@H#*R?)0#B=urY1At}nWj5(UMkadcrEp?>2O1ciy_
zbi&Fvq-&rFHIO~Rkd@Zp8m&&}XO5+Ve@SVn*BIJ<-%If5H;R@u^rzFUhX`-$HJO6S
zbZUHT7TxsSQP2?`URKGI>BqiWbZEIHRobFQHv}!D!Pj@wx~IdaW!(kgr~g!{q!dC-
z^~Tc7G-s-OONSoVQN*&+XVL$&RBB?5qq5$BzTZ88UJCG_R?}RlZ0cxwCw>ThZaa-$
zHJ>0%^L-}l*HjZmze*9ht+lA7!&YHhl^k>K?oHh$#gjRGwh1|2Y0Po1q97ODgpczU
z31q-5+V`d%eQxGR^}kl&;dyTagY{FX-Z5>eFhi3T9Mz|rrn=IJwt>Qw_3qSvjXJgF
zA_SIiDKy@$U{B?ph1w|tsrpPx(^~eU$C`J-_VhV)cvLr&Je*w22lb<)JOb&bVg6vW
z^%L4Wa$=K5B?$ek^kBKX8=q<=WpO!L%>LmIQrB{wzm%nnBYtW@+>Ti=XRRD>6*HBX
znhilkF_V3^Ww-G2(@^e}<tG*@3&w!Rcl_T!=SbyzGiE1dHt#2#<>Z@!p<<r`{RB5K
z<%j}HIdK-4&QxK}>`iE1!LhTZKiROMQZ$WoV6z*2A$@)gzd5ZOKYu#SW{Ul8wE_E~
zuy!irX1Kz?d-Lec?0&3zXfR*w@RNTxZ6ICy)Km169E0~x-8iOT1I~)}pc|5mxU$T-
z=y}e8rOFRwwXga!Lnihdo*oo(0_=rJK9|_#jk@f8iZ*jn)&kqiNa`te(*j;NNhhYA
z5nc_xF1#u|CLEp>%qFUgVe@}i<A%s0+}gdC`gA%7+cyUD*G{Kn0CRz7;ota@)3ex}
z-->Lq&oWdy>W*W=>Tp8NWWjt;AR+1ULffFla3Z>#l`Sd6m1`CV8d{F9@5MD?(x6#T
zQ)|Qid*VcuPRh{~r%z;M`){)LEKosSmA>xMp-l!lbXD^QGEdQxidiaM>eY*yM7||&
zw|^$f?6jy&=?F6DrUosZDL(U3Wvt71ND4D-iEF`KLOT=rZA#|C`%+!n@4{WO)K!bV
zih4^rVqW5J@|zUKloFZYX>wxne8DAGg}Q1r6RU^8Lj1#@&^NO`jSe14&kcA+?rQar
z)dO_s=i9uH*T<0N_V;1ui$~Kx&us;RXF62jkp`X6BcbYXH-%~G!)fi|3?R22kff2L
zXxNEfFyXQx&Dz*bu1*?EEw}T;zke0kCOZb4p0*HX{TKBocCe(*{`8bO3T~1M?2Sjf
z@LGw|iRRClj>!Sxuv-PO>~AKxD7XkpA}6wEt%Pc3NGQA5hQ~q|k@zQl==*u!$lcyA
ziN?flB>43sqWj}I_WV|&{ZsqU$#<Tzokv2+wJG&NZw(W=bEg8`Gg%@eFMf<%YYUD)
z`i9KCJDD0xlw-mS1*-U4p62dU<kubi0)f9*pu)ewLhl55vMlW{guHvkt!VO)-7vWW
z=C`Kfqcclj_@Q{*Fkv$ElTX7-%3s(@hXlyBd%|g7HlUt<x+L_eD$cERMIt($eBm95
z8T1|GFC~kem(7@aWE^ft89@2|i!tzwJc-zRhio4Dn!V<O@TkRUe)_ZrkTh=@-mWUb
zjJzYb@$GMT+FR@-P3cXH6%J#ZFT>5wDzr$i6K({^v0+vxWfe()$0OSL!JpM>b&fo#
z4d{ZyuNfp5<YHP*2J2`aMppNF%1gZyc(;A#{Ep%oa3kp&*;m#9xt1?k#_}8F(ZDk3
zsh5&-U!9rR+H|6EdjfmfXFADpPZeR51@y_bfr4_?Wr#V@1yY4g__=>5t}jp$46Gwr
zck&PDnl^@8y5<P%T3QFI9vKqs8c0hYsFLW&lUT0xnpG5>WS(2jvBjtTSkz4&*@pue
zg5!tB*z4ppqI|so8`4Gyt--H(z52c+wYnMj;swODE1a}k7!5Y}J^0!4x0A<7^2A3x
zUoSZ}ms_Jz$a$-8CZ&gZaZ6S&CmT2IBq7h2l0FL#3Hz1%NedSTa<ix&9W!_%nQAbd
zzSQ)ByxT=^u<)#q?KP6PDkg(@i8gT!+e6~i9T?x14dw3#fVI+UcE$ZFnK&zmoH;rP
z6334KhX)ae9%-N-GM&uJiRK3G^#*f~9MNYriRkPDqHk~y6<SJRt&fs0Aan@v$(u;Z
zYIc!`2jhelmnM*ehfO$lsVAiLUPp+%oRD=}fy|Dmfziw6a2MiAIlEL-a$P4L^|!W>
zy`#nn-p58T@0mAYLdRv1`5Z<6$%2Hnm9cDpRyue1i76?miN_!Jn@QI&Z+@YL9nrSh
zOf33G67PK}Ou48CT6Ya3@>+X{P17iD-pg5JvF|wIZ&iRpI!wr~LPMf|FbVHyIg+QH
zD}<$8gV^Ns!P2}wr6^gNQetW|pEj5^K(hWR;p%RC{O`a_GA`pLF>9U1wi)hYO2Zx4
z(mes}{6u+NoPMA8TC2!LOzexJgTk3~Ocs|I-hl2$D`l)H03CHtOLKCrV(i*&Z1h`Q
zR_m%y%|3MU%WpIj->Y$)EItSJytfsS&)Z^9?NI!y_yo7*D6za=>TvNvEH_%rM52g{
z4DK8+sEK|QeUBRcN6rKilQx}Sua|?jY8|;vtFGaRfirP|%29T1qC1(Iwv(k>d<HKg
zZ&s9<MgDAXW_f4t@*x?cSk(IaaAwzAG__VD&H>g~a2HY6>J|GOG?cCV^cfwz?O55Q
z8#wElA3OKv3Ou}&K~BtADfF&?!oSo$hWEAB6C*1FN<PddhfSh!(auGDx!h=?`sNQC
zyJ`^hRc{8e$eg9SPsGN*%lYd1JQx+VikYS~;>eoSti(7CZ#&Gug^$I4O-CoW@-UA7
zRJwvy^qDG1-?V_Mg+Kb6gh5Kn2YjO1!tB<j;|9HBWbgD*Y-k5!xvLt1e@8ik-kn5R
zGZRxtF0?k76Dut}VR6)I-lt@WEZA){%v?H^91XF7FE`TR-kD_RRsRgkq?1UR(m>*C
zcZp>cQ49!FA*CA4%yQanxW4ic8}U@^w>oLEGiRNpF#&O~W!Fvc>Z#%u_~pR1ZHI`R
z=1NR|Bqh4bFOlpoB`nXjH{pC9!O?&z=s4y(mMH^-|D*7F<0d$`atMj_yvCM|J3~JH
zGa*7{3~7(6l;xgQrJ;WZlB#*8CC|Svz@bA-1fLJ0+o)>`s>~LV2Hy{`Qym!0G9b&_
zw20brH7LGb4RQS_v3z+0UN&D~*A}!8hrl(EaI!Bs94m5V!BU~dbw;Vj?$dN*z-Z8T
z8BLble87zJLj_4W&mZ)ckiB|}#9Zl&G;Oc5n35_BDfj!6DW*@j|9|do>aXM$W9c=n
zCy(+`&$!<2zsVHiR|uP$^yu8tsmvf|q~P4LPt4A>N>`hYhN;UX^y|b<m?hs=$jH(G
zxsP9f-9N-y(@hXNXX4ozH&MOlKD#QILd(9TQnkL8aG^Q}AE)*uudl@MQ~Vyny5-%x
zwzfY16K6y4ARjL9+6Ty4br>2uM$j332Eo8bO7y?XPkffdKrlAggdOeSxYe)$CfTZx
z8E><3)OS<%UrIY1%&vx@fPr-P$f;Oa+8?~`6~n@V^EuD)J76bgj<PK=xFFgD{4F%d
z7I{N*rPhhe_8tuS-)8b*vnsiHI$<#L<p^TJhvIVQr|@`m1v|0DfRqM`T^C;)P}g?B
zW1Gjb!kiskx4}7-uh4-rrW~(7<p=Mm7bca<Jx|VG7)oq^FX6u^UgoxZF{F#599aKQ
zEn)TI?QC0WB`Az|0lb$PEbg_29Bxd68U1&|d$&3`f29(AW6to0N87@z&~NyDUxchM
zeGoVL#B&Hw>y5@1|FPeHAEQP51wLWSJ2uKeMwTxR;uo#`Vt0M#HPCs_@eRX&NZ-64
z%SIZX<9`)u;DvjKU_td%DDsO&8~PZ&F1-VeMz`RGMGyDfDjast&qO)D0(k3~2g>>N
zplMNn`Kv|Vqpve=YSbo?ZJ%M^x&UrYS27;^e1uokTgR-t)riV@M^-(&kgNS#z%&D9
za3@AZ@)sUW!?~{O@Q}k_EdG6)tC)6xyZ>t<By2rRHqV(w8XPP5>ZWIyJ|Kc9?2tfE
zp6HQzW{;8QZ1J~M8Xp{YhG+$Pi@d!&F+Qos?-*i);Wsu&*GOt{gd~~|Ii-iBPYrLi
zsSOV0cVmM47IX{Vjn-l7VAMu;J2kbJpxAX8f-=%@mxm#qSn?nF{;wK*UF^~3-b(T^
zayFc_b|Z<7FG0I376WDvBqwJ1lAmr@;OKIDw&=!pe(U4_GUU=Dv^YwoExYgX$CT%h
z;E97+&DkZ`Eh0FNY^MVKQ_9^v@q#aZx`$jIdPi2HItgdyY$4;M1yB}I344b2=NuaH
zxB-`a*$VeNV0liQFInHl-5njw;j9&=n;MeZB}cGJd`7DOnd9vQ1>$HCj0K`wMrU^t
zq?}*RwI27yjq~F$XUHOS|FVHEzgY~qwO6>dxT&1Pj_{W3I;Mne<Tlxrfy0V!B<>qg
zEBh$^S#JUw1-I~~ngNy{S7r6KxA5#AasHc)@S<xQDr8kaps@v{Og7|FD({HvxDshg
zZbbXN_hgn!SMiGznlbmMKc6GABqr_k7@wVvj<$)sMR$qVh5sl!|E_~uw(uL)28)dM
z##>^KPQnfhQeeZn<;iXR4Y(}%7S}j%B}ht5ic)(1^4S{a!Dz=tss7I=7~<2#YmzZg
zyiA6BHICwhs*Aj?<NyX7ONNJ_!NpECN4X(W@XYtcsI%=Kj-~NXJVNv~s-55svgbi;
zzdn$>AQ*?$9)hlZ@}$S}1a^;+u(tD`aGB`eQOVkc5+65dOw2sK)U+SyCO^aG1uMik
zZ8N-j>kBG%-k@nyfRgg1CGl%Epr+F(?p5A&l;pg|@Q=Uw4u#ENF0!3%bpyEttAAqY
z?6+KE^eYVitIi@-GU4ZgFbu31&zC(bK;IpzB<+Sgv0nNQeS?%p<e<CoDo+KRk2zt)
zyD*6Osm20d{KopxRs7R=*;sW?LYxgwLzhM+r~0A@;&z6k-iu1mA1h&{;Zd?hQbiIk
zcMQgLoIuS%id^8B{UBLm#Q&*PW4Z@Z@%;89yp=%#vX)55J$RR!oO}vzivJfvwL?Mj
z{Jk`Cl^sqvIuvAHN{o;EgLSP4%a>oq@?mEn{G1m^3YYQv^Lmpf1sSkXbbXh8wuYy#
zPs5ydifrxja&CstboAE_;~tK0<y0!g4tv*XH0ZepIWMzNFZC|Ece(LDwyCiCBh#VP
zZal=NUF0vVItu27zj2u8EYEFi!vfzsU|*FiE!}iY_T0E0W8_qb>nU|I=U@WH%)P-a
zQ?`SS#<5`9Enq`uD(=~72rI|e^Iu-R0HH^X)vr_JZr$!cUsE-#zk3?&Gg74S-CCgd
z_Xqw`EyF)*!%)*^Gq<hr1YUaGTRac#<R%U2McnuN<J!U~v?=W4Vs3L>Y(PF3y}2o_
zU^k?rdLH4;KC3XGbp|$C*Pv#~ChjRk)U1Ai<uyT2<#QJXhNv^L?I;ZXVu87}BRR`n
z_i^N|b8s)QgI|`>%}q;igRa`?aDGcVFX^3A;+TCME&S^-WaC|NjgICEW^G5S&+*7#
z7N4PiEm~?F;dnuuKW_aAUA>jzypbj-%zF%@<9_14qAK)?NI=huIU;|hf;X>?1V`=l
zkQ;r5n-K66rd6NdT0cbcE64tUNfS?S(@s@j$Kd1Q`&<QEOLC!NsUo>$5r_dj)iCO`
z9E&yWkB>b2Fkk2Q{4~8+oYB8TkkoyKWzrxntm7RPdq$#0WgX<p_hC0*I>0N(-XNJG
zm08^#j*=UNoQYEp7ZDu_N*m*#d}B2K@WePiCHtJT&-;gXaZ5Pbt&N68(RHBsehU9|
z?+`ZmYaO}_?a!`c)v&^JZ>;IqgbAM0p?hi&{CL^`^V;3Hif&K%6h43*T0dS~Q_7_O
z#ns?l8#$J;=?T1V{|0YoZNn)uKfxj0@BF*TfkY}3X9B5Bxah|%E~ZhH{fnFlchzp8
z|7y|ix^FKm`LTwpRartN+`G!ZcsoMsJK+rI?O6jY;|1V(Tb!z%i{m5)P}%v1OO3E3
z$24NVIBy9t+Hnr1Tm0s4MqlEV?@L36;t}9yu7~4C1*6R?dA3OFFoxy~WwDEng2eJN
zuK6|r?2|6z+wN;n=p6%ZjTHF@vSe98L=KsDF9`OG$_MVOCtEb0a*h`bSx|uvsql&=
zhdd_2qN@YRo{a;^`|xlQ^`IJT3dYJzn<wJDAJ6#V${ld$y@cI8^B4o`O^|!r%H_=!
zJy_s|MJGRV@s*MA^i>QLj$A>89a)WrN4DY4wrVKboXXaO1jDtICSGpJ0rblF2jlTG
zez+3EeWzJueXT0(dQ(sQ#2jGSgA$zo;R6ZnkWiz{`{amw3$b4~mMq_=N53R$&^@Dv
z(@4X2<c)3<`TKV~yPl~@GhY}`$ok3lH4mk2g*MbN{x&fPTF%ekbQx$t1vz`+Be6A7
zqiWK@RPBT_O&<4`*f)J426B!xurNoc8mK}yx#-f0yK7l(d@y`l*^ge98qof??h<W-
zE^<iIhUPwNCoAQ2Y5DOexagrxzi%%jGki5^&X#V{Y4w@}HN0o<e`?c!G5J_}PM<b-
z452NqS^V|oy{JJ&UwUA!GEJWQgACKtr|+vy^L-v%BJ!d7%mqi{LYp@v(^~8mo&#au
z;DM~8SeeT5R<X>ddzjb4oJktrW7+W-U)FW@Gr9YXC%yK6B7+B5ku1@3CCrh~GCei=
zSTdB(%$HEb1YKJCq@4{Of0rDeqDT*~>tRY)zku_Zb?82{53@X5PGArx7%eU5rYk-s
zVeZu=_ErQ5EZrfKT)t1v|Mwa^?uqwy#7-h|ccr3#F@P}rqr|}dC9FPoiA>-23$R1X
z0=^p(=Zr<f$D<b+zkNM<v(AL{%N4niAxR|AtB6=06WtbW{=~3pu+Vt!G;vII5uQ1n
zmBwn{g0+zm<eB*^_;2MvBJ`bsNpeTX-UKOeZ{Ntay9B_?R9j;9Ab@Ce^<f|UIymE)
z5@=J6Bv-Z<!_cH0G9X!=`MroIk53&VS4VZ>ygTo~asM!~LnDKT2`PCvCWxfxH9%_K
z08%^B8+wN&6RYw#vhlP%+CN@OhCdxfqPF=E5;lko+x8UR%q)hU&r_tu!P?~FJWclP
zQxd7!-hf&IN0DB)!pNbkmT)-z7q(ATA^(1ho&70K;9+GYG_)Ng&Vf$k+vAI5-cMsP
z)?aiTyH!Bg^?~9H*^ngMJwi60KSOHQtRiVQ3(4;aj*L0#!Vmt|j)N=rGTSdUWTg1J
zr+r>BjhX?l-YkKu4%tf9m^u&-vp#H>_kPmdFABalctO^$gS<yqKDS{1K%wEOHEBFr
z3I2`pWcr*@L{Cvi=&lNuGPRj#`r<fLzCXg}eovR3-!~66mIlGZ%zU`!_6uf7-b3pr
zPo%~3A>sK5+6?Yw+m8fW-|uO#FU$=t*=O<2qqo75q2tK%JLaUIG=kS%z8x;86+xn+
zD~_Gf%GK0fgaj`qzD_v^R!n^hgH!tP=il10cvVBNn8JZ2uLP|&dU#J5f<K2$<;Q)w
z1`wr-$GU#v{58EmsklFksfdEtZ#_uAPy=Dw)eXdUmjFX_MiTc0Vcey@I^^KAVti)#
z5!($WVfmP2;3P9ABTkDP%_e`A>dv5IT?h*c8^kAk%*Ep$=fe4)m(b5U9;R1`45;2H
za^Z3`ysbRV{H;#|8=`~_2TagyuQ@hGjevrugGv4v1H9V0m!tZdpnYx)i!}Cuk?Y0_
z!ISz5F>Vqvqb-uR-D{1fRU-I9`THOvB#a~#DzG2u0M&dV4Byv48qDqxyWaxwh@1qc
zs&(o6v2TcW+dYDQw~4y^K)U#i*r)UVL};oUeXjL|IIqy4QSHY`-+nbj!|De)GC2tL
z&8{RDeUFf5>s!dG%4nLYFEHEJMZ~WW$)43})FP>clxZoG#Y=vYX`|HWS0eH%4kp4W
zrb$m)|0Dy3iXFBRHKPC4fYzHS((qFfYOUQwQci}_e-q7w$wgh{yys}<*`81Ke$WF|
z(}DDg&wXOS|0L{L6I;7f%#hEyf#rX-WWi`%D*0YZ(ro)vm%06DO7Ci7eqNbw`czMj
zx{P3jO*@1EdtY#d$we&a-~bvl+lpNp(nVsPnNZ8&S77h!^X%Tg!F2f4Y_h#{0vSFz
zoK2qBPV!hbxe}{GhtEqV`G;ar<ytp6_A!g-7u_Lh-7kom!!@Ei*^}G<u$`RQF@R2R
zcnI=sAy}R^0AnM=$g6R6WP@EPS#ZHaG@w3WN46=`xLYB-rPD0FR{UEn7sPTVv9BTC
zQvpgBIq;2R7GPJ&Z1CLo31?=vW1UQmHJBtq?X^5kuk0bzlYE?3=L~a8<%z<q?NB<Y
z8cKUVlp4)$g!$<~SReeK=-paqSDY1!1)nHxTB$+;m8>AnXAq8ku^o@H1hkA)=Zy^S
zadNxvfsyWancsh(AgS~rnnW8y(ukwb8gC3n=}jEB;sU>?<Q&>NbwJ?Dc3Gp697yI6
znPsRh$6X8J)4rsEujMyrdvAjh-linC@EE6%63Ndf5$Esad0?e_7dmZfP_nGIH1_Iv
ze8@TB!@R3#_f(0Q`z?pny*07it%u8Vmt$>inOuzGN{AWM4Mt`OGPAo)n7i`^q<E?l
zIpY`bvt<(gJhcE*(mrwe-%CW7);yTl)0-KHthdsNo8WFC?(P0N&)eTs<(pTiVe^{R
z{6o8m+~BF_@q5HMC>{0@0v+B;qweg-troek%(@SGRn!gV>oT~4E$z^Mu`-J{?E;-v
zT?|Qz!LIS@OtnRox%ZFf{3K=Qr>O}k@0W3Qb*ZAi=?w4tZ8pDsW+uiCZ-bAM9$}pJ
zFpPVB0{-kafz_M(;JLq-MX&ZC-u{^v*Zf+Y-Rw>R`)%TD*}fA$tWCv&7uR7$=4;HT
zas$cKMA?!3(YV|;5Ch}faYuwIDL#EroaY+DlPeymbV>B|fB1=Qj{A7eJ9BWwj+byq
zRl>sE_kn(xgnjS-55t>hAW=L7b!rFT$ypx0_t}owD>Ttsy&LBSuErQ+@ilwj;w$1!
zQQ!R_1~#`yC55JZhEWn&`RC%?<^3R~rx9KaoB~VAo?wdiKj}qn1s4D5CKr?5i+sqb
z#<FqEs26O8F_Zc*<CG7$==V=3J!#J`bF0CAlO!zgtQnWG^Crfu8_0?0H_>FF!J8W{
zfLK!jBY$s&@Z-J7m%0K_8r6;K-m0<>&l1qxvl}FTZ}S_jgu~XSv0(DP7prxe2cDy@
zh}n~aY~$K`EGxKxUqTvCq3JuH9jZ*0$$rDD>~7w^QBjs(#zUah3SRPEMaFseAg9vJ
zkM7rl5g+RzNy!Zq9`u9rdgrm-yc(D3uH(%<-ozJ!E`a@5hULAy_`qA4b`!d~`SW?=
z_sOaQmyN09mZhpf%4h|UB;J7^YAS3&)gk!HL{F-L5!j3U@zNVS*EQxFN|toW^eUTC
z(?f+Ty_CbHm|Vg76W=jqS|!Z<twP$y8S_y|D&lP6ALl>$Ag9n%#j7`*#S@}SPW|2r
zI2$z%eJgahJ>|bavY~_TSo0NTip-&7WjV&bw1A{a8Q6~>Ahq9P2jTfDq{DwQ26jrg
zW`z&%drAmCHvNear`|zs<w`z$*&HZ1-3>86P5GpNB5a<j11iDZ;{7ql>`c5rayI>M
zKx}CtZkQ)#SUR0tww~zOOWMGn?>NRy`*)vvxNrg&qg^a^{zPWT<P1)F*A5+Wj=1%B
zE~=c-!4&_UobL-wJ|WCas90!?MYU$EQ=tJj=j{?avPZLyoLClN;mdBj>ap`LZn43C
zJekMZjcnc>@%>4BkJbq%G4=f{mY24R4YM&~iWz;Wc8xVN`7LAThj-w|+1;2P?=NUr
z9b|9&d$LjcT~YgO2rE{<gBwdi_!C;2SpRGV8Xm33wdT!aPtz7Mg`jh!bwmf>X0;u?
z1j>T!7BGkVC$VDWHL`d6ETK1@%?_KHu<>T?*bn>BY~NmN^0Q-X(9USqF>fC!{P!OV
z_Y1;*RkK;vo-Dj(cANdFTg)!K`i&n037g=XiOp^2u#ZDG|0Z=2`P=t_Fn?$Xxg^=m
z`rVc%dXlNU$_s09(@UB9WR521<@`w2tU6}n&`U__I7Q;+4>9HYS8$BJ4x=HH=$V76
z!ri7R?7v@z%t|_!)hx;53l`5~6Zc!uyTOOqxvK4~YJ!ar;9$nw`kAp80r#17ffW;o
z=+bcc0=0Aeh|!T&e&kVG`s@}%rCqPlJm3XaxbP%>boT`9E1yEm|J<U1xe0XW(QwMw
z9i!SO4^hK+TWGaK2yKmtq_?_5smZ=5x+E}=7T%6x9f_xD#m*#pVWA$Xw;ZSEUGnJb
ztdlfD&52go<_kLdCutZTMn}v&M#qhfq#4#>Xxh4$?o*4RNB@nb_Y3+7Wf4*IS7QR*
zxMHZVPg4dxxrww{K9-s$Cey*02k8jQ!!&=H8BN)JgjOp=3VUwF(?NBK>_tl=Wup&?
zefHyYghWDzx+POhv%C1yDuQNjI7!RXuM^kDr|HN2p>%a=1eGQR(34sxXw|eM!tLJ4
zWOwgVLXvl+&`U3t-q$(L!afCnR=1(xQCLprz1kt{fBTpW(so6Q<2P9G$6Z3zv3>O5
zlUN%4?G$}1&Y|Tx?~>k!j?u6uDKt9l43(@6p${Gg)2^xzGUak0y(T?HpYs|-|M6mC
z;rfa$Q7>V)by8{G#!kV$brYZX;{-i+?ErNhca^-kau5eUiKh(@$FP5+2GEQrj&zfb
zE}fSDnuIE7(;3~O&*tM)I?s9>Rmn0Y1Ez};z%vu*=~sQJ_km%Q+dhsO{+x~v8>CeC
z#VlI=!I+<5;7nVz<_M>)b?8513wonZj&R7vgw8fsq}(DyI@_r)ZTGh#11=Axt3t-n
zm$&l>x5Y>Ru%dhZy3q8IiR^a!8{FG5j)ql_rmyE2(=YSwY2OFK>GH4?a`io>WfKjB
zLf4se(z86`@@))#>S{n2M>x}{HN%Lt;UKzf_&1sQJ`<`wW)5x2oW_#U2UD#!Ju2y|
zLH9P9Q-^D!|6rTBpt9i<xi+#)&{&2nM$Uk)8$Jd~uZ1zm6(2!c>j=GEsv?BG?@#y6
zu_v-j58?H*GNw1zfx5_#qHhAE^heGnYHU4)*_;_jXEdwQ24X~)SW_CldKis8=1g@u
zG--a~Wa{l^B3vDIgUoX3L)|=YvVoR{G@>+$rEuHf`jwIN<b;v*;DMjaKYtNE>zqq7
z(*?F=<`y=3uo=sCl4rjPvM@Pp9Tvb6=BY4_d$emNW-keZPLq}FQ1*Sis<wu6qW!pN
z+YKy!STdQwOt7B&Ba1f}Njw`~<GPIhK>xxnmOAw!Y%_4eMUV1PHN=(stJ{e;yzcSt
zM^e}+ugQFHhXXp+>5y&J(?l1l=sESV#cAzFh>8A2XkL_I*JQ6=GQ#}>T1ywO%$qOy
zpo8yOPGB@n`C7_eUOIp#dE-gXj0r5z*O-mbti*ker(j{(M_GqwC1w|G##>PpDDic}
z-ir;G<y#m2>WD-fmT%2GPWH!=Um8OCS_=qQ`OcPRSYzv-Mw}(OMg}~-z>O$SFTGpQ
zNPZvq1pZwQh^@~-@K*W-uD7R?nmx~CyVo(iel1xxO?xBB@AM;y%9VC<*Q)WoMG<$%
zZX&DJx`eZ~OX%p!T^Q7I8!z<<70S5%Y;<)c_;gY5d3jm7*Cj-FcTtN|n%ydNzd(fW
z`)om*KEiZO+@UJZwtt<(dV7dj>VNfk_x2mq^VDNocG|Q2b1uwZ_5{n8DDpkJ&dg@a
zWOm?q3JypntWjjpCJnG7;dKjHo=+e?j$Q?en@q4UF$n9@G}*Os1Dq57oa`67{CRJ!
zL>{7=|E8<Nif_$8=@nb%xnG;#y*q>0wQnHOMTW3Jb2uAubR<by(+|EMGDIz<A#Bm@
zp{(`OBzD|#AZUgzCtA+$r4|PLWlmX&>~8W<YNjW{U~gNt{@YwIeJ0*L;jx=r`=fzt
z^*oM~x9Bo$&}Cz9T|f+Zi{DDKOM0p=N-NDXrAD`uSy5*U)3~O<59(57YX3TMr(Zhm
zFPOs2lXO_ypA<4$8cy<VE3+T|54c{-lgaqOpFmyiIeyRlh1pUkma)%`*pFKPk)o`B
z$(9D(FPX_QE{HDwkFKm$@FW|y3}ku{x4Eqe@`7!QhESMdFKix?#Z(td*z$H4p+svV
z{<~wta;~=FmFb*dE;Gg?iz7UjvzWEiMzSCM4zll4s&MUWeX``W7Q1UOp2<(lVLc@Y
zta!+7VxMz@nKX`NUapZMZ_|i9%dOe)$mN)OsttEp$}!`0N6Fp^AuJ^L1+mjhV};eH
zSnzvAa>d7#U6U2Cg<5qOCwd(ReX=J@=QQG2Sr)tWN8}=vZOHv1BR2T&d}gMbNFI$Z
z!k5j9*>yJ;Rv4AWYLvCvh<_HuAy~$qxTT4Eg)2m*^B_y_Gl^}OBV~)TqnK%YDBF}R
zWln#6*o0X-LDTCjd(bzGsod-)8g5~%m?kpt^<!tptFmhT0Mmb^MAwa&3ypJblAy?L
zC`z2lCO*7QEZ9hx5*-E(!*bYGjZ(1qHHJ3!s8j8tdxSnQAsr&GlCe@nWVfQ(ivFv~
zzt<hm{LPJJ8;af-eg|t*OJIi08`=Fks;o@GkR2MI$fnH`d&-)s!k46pbnA@4WW-4e
z=3cmpwk4f|iV9V7@wYzvZ?OVdp`%QKZ_N^&Q_ZkRbozQ;WtgoZ!>+(4P#2w3f2=p5
z>7ko2t^6Mr%q`-b=M`d#&L7?<{2dB>2*xc*MMo1`_&e?<tTnCVS9cA^zpry(`pFRN
zaEyh9gC&qCal;q4<VehRJub!L0od=oBWvE<1o~OZ<i*1mSifoz*LCFv7k@Vu_AJN-
zgKg0`;ompdKlTUCGH*iSG7ta6roqk6{orEdA@sWxf`Qq+NKOI=^~Z9h_3I_vw0(ix
zo|-qPBknKxp1trhECQM}@1Xsbmr~<j6TmVip8Ml&1dh8VVnNPBP%+U$BTG;5Tq%>y
zeDVRUmZ*}rKq;n7{RL}_FF}=d70ieh@4z*-<u09@jeZN{$lOn-k%_aP@U1@}``tAh
zJmEC>Sw-+^u?O&v@j|S+^8}vI4^XyVLgMR+xa<Wd(0!B0yL~@^lcIiriFlVs<fupd
z)xa|_L;Veu%Hm+clPP%m+cC7i^Fd04%b@XK4!Wz%;5;(&nN$0527@lMvg9p{Hv0;p
z>yw!2k^{`N+L>+Aeak{~=ag=$vta?<PuR0(SJ}Q9H`o{MEpts&pi0Vn*ufuJtb1{}
zz+cT_dpgfk1vMWQ?eGJph(5TUvHRJ%iXzsbvY5%$yl3s>hYGIwhuCSc3x7sQ>^5vV
zO`iK5W1rW|V<7{!6NiOSC<*gtQL`&pMEWYWI3$YoI&W0EbdR;r_x1&r)T@zXO*z2w
zuViCpX&GDO_KwMQEM&v=9ZSFOe#1Twtj5B3j~SV<l?~ln!&?2WGR44X)>Bl(if97s
z3CLu9yWZfY1J~i{?OxRGn~Y4!sbm^suhX>(@xt|rMAB7rQTThHis<Aqn*OC&m=w`O
zj=GnS&2#J6ZjW@<_f`@c=;i>HZgs*qrRQw%>JMzG_C0nb{wO<?m%@~mZ5CQs1v~b6
zK5HD7O~VFSm1+(iOwIh%1l1fTX0N9te0eCxh1rf2HVv|8Kla25CRWQK<!L!vB(iOK
zI@5sNTZ0PrJ&^k~3<BTlOJ50LP;A-)Ihj+T>v0SC8TCN@s%UA9g(6A$wG9;hUFQ21
z^`O7|57auWPGUDYLD{d<m{K(un)lXl9aAr1{m)?JM{a}}Tkn99v>a3hMR5GT!I0BO
zk+r#;<>ubM36=pHc$40)z?o};aoaPPafS!YKP$MTsmEbi&H>p`*<JBMXo25uSAr9_
zUO<g)ad_<dI}CixNpB84%0GPh57a9a$@i4mD8wnSd%Gj~xuRde{jvZJD=uT;`w3Ft
z<rbX8q{enmfE;<~V#awcng}U5hor?3FL{Rvtys2QLYB?zlG%|pXi~BQZtF|P`Oq)e
zWO@MeAJ;*KWhewsZ2-vyPR5l#h5GhWoRR!xX|9nPIKRsRy>fNPS#kot$o~`_+siQU
z>vLIaw;~S9I1g<hhB$7}XVeTH!fC8kCX-C>Kx^@MoICd_7=L;X%L<Qhb8mitN%Bqj
z;woUsJJGT3GYdRz6xc}L9jL7J6W3Yi@R}FrFm>h+FCUL*t7G24%1P7N>(?u>^3))h
zm~6nbV+M1jZkhOCmOQhJw`W-;O6&#9!xs<K*tWIPSsDJo?WN7KOUD;-!!|pzmLx+Q
zq^QD5b>wBe<Y%xoo+j+Po;lc7%99bDwdm?BNF_cQ+`}jN{Pl(2{M9S{Iln>IdG!<4
zaEZ#Z8|mtdQ+mcf^5_d1yblgL&Ow(_G2ZfjY1ia4oh=%=8SpovqmmoyMf7HDf)`t)
z(gR1&eZUL5Zpf4$U6N)lFkt%2joCH9m<(T_%pNlFp3_zvm^nZb9u-HTF^l9cmAIf+
zAdmlu3|=YNz{4fiA$d+OT;VZ}c&#+Y4ar^*qqDKZue};Oza+r&$TR4S)@*m%R#=_>
zms8a3;T66~$W*o(l5Y*b_fymHXqP<m3Q}YSYd^r-iv~<q*N%F3zu=Qd1$OAACo4`_
z19QKPz#&cvh<RqD>U1pj#3V9@z9+?V?l~w-IU?TS(}@!n=&_AHI;>^+NYt_2$TsCh
zu^{=;Y;oHbwpULMhXilr4HOTtT-&{DS-Bh8S}~B_DiPq)g0-yPLmiIfG|N7G7{Z>W
z?qVKdW^Gv^_TRr3@jANN?9Ke0csuDkcc@gCjM+YfJ$&{CC38|aonO63=yfw3?y{9V
zQJ#R_TgTz&$*Z{Rn-4^<QyQ))5q(PE<Jpw*TG;*59sgK`v7_Tgv5ANCQ2$UIvs~@R
zNn18EFNXsxV#*+#RGYxAG>&1a&7;`ro>*q-a}TTR#k&N)i+AX#H$b@54}36~y|Xib
zb54t3$4@U@5j%*<``M7x8W*zrEl2K1(@E<d36YKoWz+xkWrZ>KQN|^LDfa@3Zd$Oj
zqc36k5<50|&qB6;?Qjfwx&^<!Fl0>+_cN=<$=Lg^4ZD{R3wvshVv_r8X73gUrQ`ZY
z6;40pPRe;QrzROD#Z5*1u_xJ*|CCu*_*tBb$zcD*K$@~J6Oz8ELqJXh6hyZ|M#31F
z)c-nYKHbj6-$@3);4`S-bq=>m#qaN|f&8mGGSG9n2yF-I(6jO`Jav&HiM7G7Or{UM
zk4JMlKK3AqO13o(m4VNuleqA+gq_d&#80d9;sZoCO8ug5u%M|oDV-fFyO-Y=+1Os}
zjIx}_%Jsq0Ew{Kuz0>*9L6c;E@E<mRzQ7F|S^;xzNYF$*mUnTf$7LREywUCl(unu%
zc&`W0_t7aX#aM|o5842pv%28kUI5qgI^ul;PW;pVvf->h;!M?-V6RbuIal<-xceWJ
z8uY~vZ{9*qgM>Ya71_nTg%F&qfk_*mLV3+YzHMoLn6`H*jEg#fjobHQ;JP>*e7!$D
z*^>kQKgL4Zz~d0#ubB4@o(55d3(?gt3}dAL^%1YRY1>buqiPGUdHFZo+wd3n^~!^?
z(YL^)>McZlIu0R!IMCJYL4(|Cp1f2>KBf%+HavrsBd=jf&I74rMyc$hraEzQ{e_a=
zzoo?woY2)mfmrHYfMxV2-&Xh?^v4&YME;{x?}wOw>${3SKLSg74G=q>p}ge39T@Hs
z3S-Ck;?^~h@Qcd@|N3?Oz3q=7d~^i(J{^Mh;xE9Dxq)aHejMdWtg)`YI;lM|1FEu4
zL)+d4czEU+pR?E(dQMkktC$O4`nv{w-yGxJ&;Nj9b&ufc*;HuWx)g4{u!RD@T5!J-
ziESSW`T8@9p#D~a^rnTFZ}+>$xu;y?Iy~|qY<nj@@cN1d+795#4}^E#otUBX3!;{l
zgZ-|<I7~wc)i#Bp@33sH6i0IsC+(6)xg}8AaT&a&C((abDyLt2A2ge<V54g<DE;zE
z*4VNXK5V%T@s*D-c8@BUTtxi+xg6csW^p?0;jp8xGD-RJNVek88(3CX%Fmp34d<$*
zfI`b+OnY+<@9jzCXUshWlRkIiG8J=Px2zY*z0e6Y?uF=jQ-#D<+~QS7JcqT%nxNkQ
z3D`A@_o75860O^6thHn<C@s&$M!Qzr^YA<>e4T@m{rkAMjd#IaV>Oq1U%Xd3NCD@S
zDB`kcDR}g`iOW)kb4L6Ba5}Ss_~zq(VM(4UiMsX(+oo{b9<NL&{gn^yA6}qQ{|DIo
zq=x_UdoRWh{~tx?9S_v|#c_K}NhK5s5sI?Pea=-$3N1ycG>C?@qrMU%BQna0P!W=%
zRK|VIm1HChDU_sADMDJ(`rY6EUS8MB9p^dcbKY+cPO48}HC&ucKjto>s@e7=;Pf4m
zSFxEB>6IgOX^l*Ahb$^AHlY%>wdC)y4RmN_HW^4eLTr2!E411lGDDiv=nLH{>X`AE
zGYW~PZo%hCWr1A9IV>SLwTX1u8^PDvmP-zqD`NQ{HENTyiahidayDwRv|_&u_HC>r
zA&>Hi+j;}qTHs6{l1-%bn;2wf$z!{)yKOq*9<{sph-NvjpaC1M3;)v#bYOE5{rdD6
zIek(TL}uwS4=q%<%AsoOl+^dkf!X4)_H#WEIlh?&#blLdx2z^AUy3<*feT|YQiB+k
z94A4;14Jc6j-1d;CXQo;?8UOZMARsV`t_ZsTX-cB;&z{|m9Y@c13^M24z2Gm6~|q<
zY205CRl4QfRuXM4L#zxsI4zreqR{V2Y^7x3p2Q$M75<m(Q~X2nE{l-e6Ow7P&sMt4
zM}Zy+NFa4JS<J-2&(!l*G$+8+$Q5aMjJO!e+@8};MM}<9*iX30Of!B!RBg|4YvVrA
zf6~9HNya#eKUJyC^$gNCu903|B}MO6o*)m?w-ANgXWY>12Taz)_Z5A!1@3lt3aK>w
zLb}d1a!XTxk}g_F+ULs>$Czqjm#u&@k;k|VZTlHHBT+0pHI76~>St8<wJ`RB!Y=XD
zK&G>FG>0DfBxtQog@pA<+N&EyD(kG7=$oOW%SXu7&E~nPcXh%UL>!ZX<Zz(XglHv(
zG1=y)iQ}k#67i^vS@rxJ8LqlX<b+xG$KJ*C#BPR)%n=nButH|+?q+hM>jas5u$Vsm
zeTP2W0}Nxgi!Pl0liExduBX0}k@R~>WgSPcxKWWUQ2Ymns{g|F89DIs$4pi}ZzOBb
zKZ7k@F;3`-PlNdnCE12QW7*FzgY|WkW2+5Ev!~uGRnB_V2LpFTumvKTbh?<HP-{z8
z(nJw<gH<p2EPsv0^!$WckCCh@W6l;H(qQi-OTx;<Ggw)tPmo*rg;*t6@E^m+us?%-
z!^_h{LTyk?A2cemg%MJ0<UUKb_FX^3&DXWDv-!$ieyq=aT`+}rb<kz|oW@r6Ysm@x
zyD{tvFBNv+dml`EX3aj$x<ztco3Q8K&0?ju%wp$_QDYw@YO$$Cs;u^o@vOU*JeylF
z3qKxs!8@E;$itS={NwQ&?9xS}`3<kb$>ecK{PQtu`RT{U@ROXhY$_LjW0!23&u903
z$Eo-Gq5k#^)>_Pv9op@L!M{27{KJ{->0f{0so+}WQHOP`n#PXP5wQ`~l4czjj$&`L
zFX111IrF2(E<tBkN*$vm*^AklHd!5x%+*|fJ~r$R_$v?d{&ThHcSU0sPdz7<(j&Qu
zIY2aI1on{ADeI@BPm{F~pUGk~o_uapz{~d@5VxhJB;s^E+2A>gnXwn?sZBr>>hwuo
z>m{N!<UvC8QpwP9!3&eT#o8vXtztkUkgDhjyPlZJXghQfRpmx5Bq*Pra*pOKxKgrj
zc?-?3&L(SJCCQGsVruZ{75zI~6i3K4(90Jdk#pq<q;1-38ZpLKc!sahKS9B?^mYXk
zWO;~rGW?Z3>^ev~?n%=vkvhb#^c30BfaKKkRGMDZLxMC4xmLqZq-oqIYO#0}4JsJR
zDBLojADelaa5$Uj+nymo)o&^e4=2%m?Z>#l-2do+MG@z?tCtg5Kb{<(_k+sB9H#Gu
z{Oa471X|8jQ<1ZendQDoRAyKTRdW={uFEn^cV;|!J*AZ16&yFs-}=dunPIejPA>KT
zHOM8rmx4V7ab)e<E^5~JQSc(MM9sLGUK=lqSu+&5n!aW_?^O_8>ZwgiRPWG{<8`>8
zx4)THXJ2!E0TS?hFTnhxT40dd%FKD-0FQQWByBg35Lxp;=y%jYU$1bI@;He+at$Jn
zXStFpSC(j`c%tXmaM(U-H0bRXcCVC;U{F*6msWTJH&+C|7N-fj9wUL5TmzW`^E3ZO
zwsk`IXj-7V4#FBkVNXXdCob?64i_nt%BdkRQ>mJpntFkH4elZ%{Y4?wSzsw$+YD@?
zJL9{yo!-xu1E~mCeB3*R)BR(KPp&>9XJRIT%LKvQ+850&HoeR2nO{wdv<AujrRj|F
zvOId?;yMVhQ-r5}w&jWUh3v=O0<z}E#|r=Ovs@JuPaZz+q33>;Fx_rR74gR%g`JF0
z##_@Ks}@g36^ol>ZI2vQo=fLk?n$BY!K1M2%?WbgLmc_{^)|7%lutS{j)JIg9(EMC
zZ_gG>LXD^gT=jTFd&1X1)>?mfT$e?6cuIqYxHIZ(c{QtKnk}Tv4<jNiGP6sj#bTbv
zelT^&FK_J_MLVny2s^p^q5Wbc-toysrF4NO2sva+*-LCu*^de-p|~~h4VDP`k`>#p
z;pORtsPyg&#*E9sZ5g}qVb5tIf4v6ZC)~x1pI!n(?;YOL7)7m3v(f*{RI<j&K;VXT
z;7OlU;b#&s`JKRoQJ)OSCLeLbseJtJOEjoz#e!Nvp|I8|%5Qj;1$q*pBs=FlTD6y;
z30z0{X|GW_Y$Q9Y`7ub4vl!GH4{T)}>X1*cMd)`vo1BbyCnjJ%JDGL<=8F$al1b35
zFBm#rU<Wr?uw#}c;mnCAao*}Dc%m^HQ|&)s&CQ9hxvvSX{u&3*cio1RfK#|ew+6~1
z4cT4aXF-PP7RY*vpgnT}TX<OouR1-0$7^51Sf8I*{ImqsB(l-7b2PiHOO8zX9FO_8
zPodKV5k4a{0Kb@K;Zr?%HpML+?^*rA)c%un@32rKJv<HM%{OpS8p5Gv=P*cLj(qdG
z2)l|a@MOC@ThiRX{Rj&}$7f$Es+NAD{~1W2{FWhl%eIqPNPi$k_s&oQ&nc7}dxi51
zpG^vzMXU{`KcRljwxn3t9bG81!dm?BK`uP#Fd6<C$C+s=(GaCra@jkX@o`Qes^fN%
zAG#fcS|^acrLrKo$HDs0k{6`SE`SdCrP2<eraLD2(0W5WB`0<Wu0P#O+95HPxVd)_
zR2!y2{vE=e#wMzhe2T7DOQoBKW;54y>X}4QJ<_c<f!?1YMeY?D5UJ|l%-E5i$YE7E
z*r|Mo1}#kCTy(w?kK8LXx<LxA?v}&$QIF{F3s>nw`jJjFcui|<^vNpUGICVyIt^@_
zO%A8DQuE?t)*PWk4+zy%Fe7nEmxz34H!WM9PXk?!(N}Y7E3(%0FnQ%-AQ3c?xY!*l
ze>WgaQ+I_hYjXDqGnIw(;=vxqwCEgBAJ!B6v_5q0y-UPiU?KTR@2AxRtEh?g6Y|pT
z5A$H!OVX+IfeA9ZO`V1F+(fX$4VuR*7T8A+n@U$}3(qEEBX*ot%1Oh3*9n59LN;3I
zmyo3hBwMV;Qdlg4k!yusMBp9zWUY|L?(ZWu?aRo+ofV9h%TGo_(Us=CF(<k^6cEQe
zB}Oa86RQGo*#7e(6;Y72zWU!CS~OOXm@N^wy)Gpr;)>wpOA+c}!x>~PI+O1QGwH)0
zcNyO^dbIWTHAYL@j7tj3BpbX_tb?=-xbz@xQZVg2jXLn2&K)C%KZ3$&v$nt$mrJEn
zmmi|7+xp3+wqDYe`hvXd{6d%WPw8wA1r)n11uMG4N#68h)THSrQF-}?R80$}Sx?RB
zcC%<|vhfa$sGUN${CdfFj;rJbq(q_n=nY0>#1$^?dnMCZ*hVXT4sp$f;zCVf#pyj8
zM`t^Q(u|90^kQ%y%?n6p?i$vTpdX)@4_)u*1<g}r=$<^)TDh0pJsV2?zLP*B;Vjv@
zJdR8G9!v(NNHUpo<+0LqigmT}KicW?iP69OjT_0|p;>iPxIv>}QW+yb|EMLPl*J8v
zwIK`l#~#L)S(ccq9E116FQV~{1DH50cvG8FFn6pz)^9$EDN-kK(Op?|@l`|ZQ?+RB
zn}N^m!|2DEQW&-V1bp5)8V^jaqDj3@G|fE}TcVC*<`Nk!l|7D%b-nbxFq;^>n1VZ#
z6|v~Fv+&-yhFO^w=uue+_19u)jJ&|^ES-Xt|9F&VD6T#-Om79|)4Tg`pqB8R;S|<k
z+4&x@w7Q99ukT>`Ne<oX&%*dOzWCg+hQ|7p;y7(dR8*2-U)?o8n>SJTd~6hEqzB{I
z_#FIKGMZKUJ%jygS^{@OszF%K#c7f@7^))7d6+X+-WM)YU71O+F5oO}*3$%Xb1@wH
zz6hi%ywKzINqn+57?l$52s_=yc;e|rJe1>tgXs?V?S~dly_$?66OUqQkUqYzsls4-
z3?ze9(ErS7yp~xF?G~dk?msJR+}}*=hgY#`qs?(uW&w6Bm8C72=dtywEEvR;&_UTJ
z6*hZE!F<?Ap9h)2D>WbMnx~sd%9f+tvBoE)SmiQ3=cNV1LeF2pP@3Mc{KRZJE`uuz
zjNxEr8TU=xi%|}YV747mBySgsgNSx288bKsXE)_C36n0<MYF4@VQm2MzbAu1QWMES
z%NxQo-$Lf^+J(<Xtf!&HHAH3Q2kWd!V<<GIkq6eT)_XfG;8*Bcnln8U&kp|}4~$k)
z<z8`2nY5jL)|-m{WwlK7<^`1dmP;?sW5Drk2E8hBj%vLY;bs-Y(l@4JOxm;6RB=Zk
zbN`t)xAVV1dSS&i?(l+P&e^J^;(Ayy{b3woZSc#4DdI+;tcyHo7HtQQiv6JEu#=Rg
zoZ*7wL}-QFO59{exlucdNp$OIdT)9nb+%ne(z}EnNOKf9V_U*(EH>n1Q-X=J;9z*=
zD8meGUB!tN3$;wH0jIHgG?S(w3*~A*$=C0vn2bal93Yy^y7O1KiE81bcau9FGuNhr
zlOky31`)_qoq&&*+hg2-1?qo4O_xu0!)X->l*`#jJH+JinUy%<#(Gng(V{ro<qUV2
z8Kzcy2@P=j$gGl|fZ>xZanYDe@=Vj7s=JtzZ`;PuyVC7sNt^;admx61wSm+=F@-Ms
z@I`RI9->i|zE*`j&&rp^UMHBo2sGRr(D}!liX!JWQe-@Zi?ytr^<K1ze(-(F*hO!n
z@O>oJat!6Xv?6HQqs`FtXR)=xk9_9lx3&0dtu$Wj>7mov3hr;45^)p0Sp{29(JOPC
zxGzz2DD6_m-U=h@qQ0l}-QEl)wOX5Y_*+qpVJCW^+Ksc$3L|D4%jv!k(@Bia0AU7F
zNWqc+XA)(shxL@eM*S?a_ls0{yk-p1QdYxAx*2aio=Ieuy(Nn`jt7mz4(@5}TdK2Y
z35NDGGtZJHk)*d8bfj?`<9lE###X3Nr2#QWHb^CGphiVuiVJq`TSh#Uc2W1b6}0Ka
zdgk@ixpYuMicXJ;qI#98Bzw#w@cUE<H*>wgWPcBnqMi#Ui4%-j?g`?#2@qzo9E&bJ
z0`W~*pcYU9w?=iqjQXo!W#Nx0Yh$6<H5N4ckI}Ebb-=it!r-i3aEePKty2>4^Zb|a
zVfh|->G}X7B?}?1d<>YEEC3Q444Na32;4YBn$5(3{Ox4;dF}x&wNT*dk95Godxbzp
zdV}nv`B0v)9VfkU6IiUBAQrZTF}>FTKkht%f18A!vgB6yWm^yRd#=#n(}&^UJmH)2
z^D-O}7!jV0vD}_B!gC{g7p}UUgk1SuprZH~#9}6pf7)GWxU&g0&+oz{*9K6SJ`)ef
zJ5o!>9fJF`7L@DvW3*%~h_s!<hS1&ARj~@^gr0>Zqffvrxq6Ve?G8<mhPZhDYS_8s
zEleC^1MOGuK)qcK3?66^GS%-PIWZX|6?fz4W;v+P9Knn3|A}9&w!_rhDO5qXfLPCL
z#F7nnK&y|^6Z4*v-pmDHc60*Kk2=8h)u$0#(*iowlFoV63A5XpT(Z<(@E!+0;WPx7
zaLS8#VxoJJiQDhV9q<n!atCDT(o^$={NGKw@ly@$oG^mZld7SmSIZcqzW@7frL0>=
zRx{nh=ZX2w8hSwK7G3!2A3eD86P3x!CGUm>*Rkr)vLN%a3aznQiB0oHMnZcm(VBbI
zTBBTs?BDT&2F<XoXkPnH$h01$i`(RI<YR&1EW%Ok8Kp$^Lmf%q6UvqJ{w1nXJE_0Y
zd!jKpib!e6f!U@-MCDQ%@f5xxiz4rl-)qkik@?O<?|KuJ5WEVzf@d=kp`xIh@rxc<
zQbIQ@MrxEAPvuk<z&7b5dGqO{@U36KRHqD(;Pgk-YGWWV3j5A|U3`EJ*9Z)j@&ao2
zJfDm>-bPX<$5ELl?ey!?f3!LL4>_<kmpEn^(sa*v<YQhP)taTn?bA3#7eBv1N{@VF
zUXK@evom*-8*aB~o|6u#$=pXTSzjhg1)ugnsWej;`<RxV)r5ZO4PchF2ew413vLNf
z(BtbMf+>ZOThs)usv?cdz6c^qb3lK>NZ6q70WUr;fMvRsxOiDCSl?a*@18``f1P&F
z|8Whjyrl~>YBTA-yW(g#DjmM$tO3R%7lMx&fX4G+bU9lBQ#`alI&V8&AD@N={5Ysv
z{Es|bcnf#E?q>?;@{r>^3TBpBf~$g);HbMsWnZb%M?QW~t!0Mo>h@4*Z!OFfj6lk9
zH~f1&1KMqOp#Qo^SmAVsTPsos8BNi!%~+epn_U+eXsJM*H2`yYc$snr?q*2hD-Ri}
zDa=KT-ld`<Hyz$Ck4Ms=O7D6t#F_=C@$K~U7#lJMX2_>gg&$&6I(IgPI2;8n)0?1V
zzZT54J|H{gEb-FHbs)U#A!FAa0L@gWSc7oy?QW*+BEpM<6`=Hf3jKNMCY?F>j*DAc
z%UM2~4yj7{B+F(Qvplq$erT70?3;jzCq=RCdnlw6RU$H^MgGY|(ToQrq&0FA*(WKS
z<G&d)zmj9=*SL#Z)VI%c-y#(DF=}bF+DTGAaSUx;BRD~nr;{$v3}NRYk+iB+Qdfft
zYJW%q{`7pLO24G=_ZTy>@~s4(I_pHrM~k4%x;E?o_cBYrc#(8Pp?9CNj}tM7=Dczq
zlI8`1e|mHbUH-6|Ox&YGyC&w*(A1M;?*3%b*dc;(j#^B~X`YCU8BZ-PW{~J&6GDD<
zQNstK=r!&&ox1unEnR+(QS+073<Xg-kmW#sNXX)<#ap;z=Ua$Aok!wqPjRI$EXapt
zAL$9VW>Piyh-gjiU|jpdX@a3J^NooW=BFbWx1El3=^|AwagG9(#6IVA4R6w+DLc95
zPwhmt?=|ygLoglrBcI&eD+zmq9TgR;G-@)+fs0y|EaZ19s7OI4Wv-agX`5dWnMKpc
zwCf@C;Rz}5vHwK`j1A`#A%Y@*IJ&D*mm{@7w5V@9!(<r|TSGbWZ|fd5|E(oIP^QNJ
zT`b41nthyoJ*>p%@KgDxBD(y4f}>}%;LI6Xr^rj|X!2XYkiY7!&sXm==G_hERR;1h
zyzY`8sNI>(uiw^z4$nvMt7{GTWxLIZvaKOr>7UN;-mlCL{~XQRiTx9tIGSv#tscKA
zX%erl%Ch@kMX(L!5`2f-c)se|9Ja_`mQjmV;^9;mhX2y%YgSF+WenExks=EGjn~uo
zAFfka|JZNBxwoG${;R`hH0kntbNle}DM@xk<5*r_*b^RYHk!9MBgvPoT7X(g68v1$
z<uBHa;@?Y}^7937^zX7stY)WWW%?HtmL8eTR<@e)%PLdoz|~M*WQj%Pk%f``gN<X@
zmRrl=Z{Y#{&oWD1LCT)Z-=@H~u2kW7m8tWi6knk0E}nm`B*zDxmf{=uv3$XYiM*Yw
z0lz<p6`V)E@!%#Ue!|@S?Bxq_f@k&s$do_eW9G^6KOP*Y<a%F`0$-8J3jx#kpWj3(
zvu^w){xK>%V_rzqMW><dEf+d3(iA=0Mfh<RdKl0iiE{56>5D9Je61Y}d-DA;tlN`v
ztw(W(wg)vjwuQPFHKEyrKH4--AJ2!ikaA@cy8C`NL^mklIrFnLzUBdJe|7}5KMH(%
z!+EGb!3;-zT>#%Utf!h&_EYQAL2O0Q0w_9nhqL>z3#aY>NyaLTqbm<8pil5_Z2Yno
zRYZ5<oS9Bom7&Mm%;D&X>y)Sq9EoqqiP)K5LC;(5!S;#8T$7I`xP_>rzvWWmr7wyR
z#<f(bvyFLLrhzSbCKxt!0C!>_E;zIbpJ>Xm$K$ox{h}$tKIc@Rt8Y*fCYpU(T8c?i
zp5mUz3@fEY$)HgxnYOb7i(`a5o3=gt8FQYSS8<8D=#<ebt0uEta5oP7`(ay^HJ$%9
z1jjVXp#6+m`gKk}*fzwXy22DZw`L}LEa@`Jwlu>)xg>kDFNlkgyMR*SU+Bofb?mWK
zI;ec%iM5tQr7%N##_8-l%HErEAI1fJ0wzgtK7O=fbGEL4b5_fs=EzN0zPSzPuq!*e
zG6yaWpM+y$o`6nI2Fy;r59=J<`G%gCVDV)ojO`=r4$D07+%Im!skg#jpS1uHQCz#v
zLTCz!gLU=7{^f*d@UkDlcQ-GBl*SflpAbfk>$Au)?Ns=g+X;4Qv6xccNUT=fgCzU=
zu%3Gig;uWxE}}LoQh1*2Hhc%;K2V$>)F`)>yhB=X6(SdG2b-ITFeL57M*MpVB2)b<
z6b8>hzRh09Ptl>tKjUG=y)-CQ>V}`)F))=00X;iA-uU5fR_cmyd(CLWDm@2iUbmvs
z)U^Yf6W7DJdOKdsX$8)ie;+@6ZiPvXQ`qCd%W$3hLKuk3f#rYJ!A=*JZE$c06WL;z
zKT%*zSYLt(weLY@v=5w!Udva-hQq7R<?w3CZjg*U%s#eIWB2^f#Yg7$aAapazMFW6
z=xUd+C4spR{bMTIyLu*Fe7Xq^Da)}Zj?AF<+(WR>+W?P^{lok%8K&39?hxwMlQiS)
zUThZ`#aHB&&}Y&kFtBA0*ZX)UwlTstPV*IAcws#nOb)~&Rg&=j=SSMrVNTU;M^Src
z2NGB}j=VlM9lso&g0e+DwD`+ksx+(uWAwXe{P)jPMd}o~*lvYMDo3bKT|CZD%O@%2
zCS+N=87kdAhAN%Iv_k1CEwuiFY~5Ul?p%N!WfQ@Ddk{uOOonHlw2|2!f{rhCq06Kj
zILUt_N^TItkBl-t+GT`?-<(73>;)*QD1jl{Tj=EILYj5>3(fd;kUJb{gR>THLefz}
z&V`HNzYG~-nUscKJQ_jl^B@i_Y$6ZtF2?li34C|BC5|ukhGffaICAH4wAi^5?UqgB
z2PfTw{3Z{4*%OA*o`jP;n?&y?BWE3b885nr;nxensA_(nzS?KV`YEYlZb~p>!EtmM
zIL6z^thXLKJ&i9iHo+4b0RHb!F}oYrQJQwVd``)Hw0|~0w-pO)lAGU|4Vzcc(W|%M
znU5;O{Av{4|8FB0)UTtXE|^mX*<5P!^*x<rB0}FfWzfy`*J#m|iR4|xb)vsO5yd~g
z;^s{opuKZ!$d4^2Q2om$lqXU2((WN@cxM@Ux?d1#vUTYGXd7-GswYir=F^h}W~h5t
z5?!)KK+~DO^!}wP`mECyV}#ukm>R~Z>8}@dVvUIEkppxcIKs?$MV$Uw6b7>2S{JC!
z#NW;4=#eoWly2M=m`1I1gQYL^UG9nNW`2dZ{!O^Rb1hCE7=<;IqgQmf3O_R~&}sD~
zf3C#CSg+oSqpJ;wc1{k>aJmh$_muFT;}$qlF&?so{YVcxNl+2$xq{yxxcI^h8X8l<
zEf%;PlOOp(ZG$FsZi}R2i%l{5*<ZTj`w#2du?3vImIXvPxiBuJ)l{z}hWxiu5+ls>
zAvk|NX`Zl!`o%7RO^ShZUBDtR_-TyuuXNJEnhY{C;3-{Ye1_3#C?k1ZvP8LPGHu<o
zmPovPM%I4fi2tT^=HcErZdZo9unR3t?Zyt%r|L4)=67*LM8r)-Av21oR`oHyqmVXz
z1sa;tPk(RmBXS=HsaBQ1?sj-VZv4GZx&sf=>ZMDm&9h?b^d;g@^kJCk+<(=2!@?x8
zsa_8K6TCT-f$<!(<qlQW5Ikhh{kYaShQ$8Vd?IM*Nw$a?@pOuz*-`7s)1Ee(<?YFw
z4#}lm-Rfj*$}cWJQW5-rOOOH6^IVEk4vG4+iWZr@C3|{rk>`Wrn7(Qb897M|Z6r-N
zaq^574?m*0m13B8zm>6Th@-6zf=8etj@VECMJ*rwqxOz#NJ{ZLqHH7!wl@w_x1K9x
z!<G@;(!L?aXTb}StrSYc8ZQag5<*rqmkJq-NlaIVI0iW!Esslcq8UdjsfwKi8E_pT
z_~*{gE)p^oK4r@Z5iOyPyG3BBpBq!O-kkd|ErMn@-=iw)Z*%TKAJON-A@W`~owN@Z
zGQ-OjmM5Fbk+CAH=#JhjdZ<&f;+?B3otZulHD-&Gs0Fh~k4p_B=B9|obEYEu=?o1x
zTSli{vBwK_HRT6BJf=65esNLlwR92nCqaV_74Ey2!mVrhoY{wSoU67gDoh)uV)7S=
z%(_T=f4nF4&X`BjWHjkFbxSVej2%j6+d$CuvI^Z9FKJJ^H>2UcfzD76$Lc9zoYcfS
zBr|gsoef!}#a-~C9S<a@_YIbPSo)RjP1?i!nj1<La+I*?t_tlPwTv40ZK3;quOXu&
zVyQv#8sc6zowR*6qu(wFtgP%eByi(#qS}1hx|;82=G|>&YAbdM`GVinG2s>Ij*|o>
zqeWmGDvevl^pHCH$MpACS@?I^6ptSiLG}6LQ0>Gp-F0*xcXx>!{bsIAHRackH|{6s
zmyHBhKFa5^!&W2P-(YR>R+E&3yOypTzDR?V|1k%*3S7<vG45i<C93^E749oL(~tx4
zBxzv~`6%?z#1}>KKi8ST&JUBw!hIJ=k9j$3Cm{#b^^>UH!ZfZc-x1`Nk7Em*BdA!Q
zFLXbeS#izG4fNknhW2~oDle~0C&$)kL-)i2^7i5^E`72&dw--mQxoifI`-T7+3Q!9
zKV>ao!(vLb556WT`%d#u<T|*ts`2pRZwrl-dxD>rM3c!@V?cBN3*zo_l5BNQWQv74
z%2~3Gxtk&fZ{F``#l6dU)dNAW@RbqFO*MqGm1~Il&3&+Tga#bzk_C|+zj(`EZ<tfx
zFLSGot$`oQSAgEqIMziijkr#l$5s1FTW`Fi4K*ftOrXL8!d{hOJ3ilrF%~`$bF_ui
zoMFcQS$v3fyLO0|es&2qrrji!w|2AB226R+N*CgM-5fZ<b6Y#b1xBt_g4f2Pu=eLU
zxR!4NzaL#EWp5o}F=qs?cLu?Cm$iJRc?6jtbCP`Ay`H_}p}`jJ_GFviH<9nLJtQyw
z7jIKu&$x{cSh<}+Op!(`9~4(dlKo;y@P03%cm4~baAP`AYh5@?^jqz$lAve8J2;B^
zc;2VJ;&#M;d^iS$Pk>q0s*tnwD47slKnI_uPzfs~Hu78}xh?I*$jt5Me~N^YuOTJ)
zPfUWbxvWN>*VKah=Tw^Sm_|NW$$@R6I{j5?D=<)BQS;tVYt~zgz4&fAZjMRh&U6{W
zhBqZd)~bP|%n=3Nw2_Y4m_^6lm`*!e1ZKCwI==gx@IDGN0(tc@V4qpWzb+3ZQhV#E
z#u;-ed-@?CKY0Sv6_w33s4ukse%_DTmzwccPsfm5JLI_=Zu>YXZ7H(3Pnb1~5rVP_
zL3}*@kJN_R%+3r_gHFd-T%{X?rRt{e&({@eOtXlEiU)6F@QA-Et4F59U*nEfOK{#E
z8RSQ@4AuYd1%!<fP!`S?=-$?|<iW0bQnPzA_crk&$*xa8uQ!*-i+ct1ZIm1u&5Y+e
zj7}iidD+_f$aC^B;3DFYB2q0CinB*9u+Y-WL%qEBbjcGjlB;LVPH2vS+iVpaUVQ{!
zZIfYFtZ4;XjV9ogZ-K_Yr(l*b3#N=}gERBq!P5OnaMtP}NZiR6xOF##JZU(5Ig<gC
zS1e*rT)hp?!?M^V?genEUy7?wJI*~maUWuYI@Q9d4g7XCK+uUlaLC>pTn<J<{nQ2_
zcix8DnioLoTPe(JZJ<SCl*z8fM(F*t58Q6|LY03891B_i>#MuLwEP|nd9K7cX5PRQ
zy6}(1P61xaf-Tz3khH7~_DWq5cofrUNY)17KFEX;=Tc~X{|QQDGvNBOB1ktp4}OlH
z!A~?4KNfVr8JjamU+qAhuPLzTRtTNBWErlwVZxtUlZ+m}wxN|oRe5ei0;t)%gMIN$
z^zo&8@L%l*u(YfO<@`kEadJEx7I*`0$zO)U<txFf{0byDo`tVv6`<;K5&kTU1M!TB
z=o@d$hhI3vPH$6U9b?iV<-`{Vic=;pGhRSV?rnG!nFDo!a%d&|-PEgt9G&Y8qxGgR
zO>rV{O7{yW=JMRyF{+Gc>@;FA<~aE`Ck%cz){~>!*NLmG5=q&;5nj6~RD^6(X6wCn
zkjCGIWcrhSY!c>lMe^n7XxBw_4xXm70|-{VPUMQ7Zy>Ap%oaG8V_^8zW}J0?H+j^)
z5Jr|q^L?%fuqEg?NtV{8E8O#`w%#);`n!UJJzWPHi;PHh-U2vRlg5XKMA0JWg`B;w
zHM4Kv6p0XD2IFLB!dH<Muy<-cN&Msup^^fZRH2o=&e=zn*R^u{U^d@$@GFtd7qS?i
zZOG(<f4H*P0&;883F<R3jaZXU#Ba!hNtzi-)^VvA?k~Zf&G(02hZJ}V31yr$<~<5>
zPn_~}JEm@lCr2;lkvE5(q3P*92s<Om6&@G*%H?~xs*Uncp{fJ@!Sjgx_Z5)846(E6
zEjQEC66UVk%)18(J|J=i!?Ws9W5z>Lt#B2_+c(n%>+hn(fgP~;NFcQB6~*~=3D)_M
zi_zF66x2Pg!-S#j;H|!vZ|<xl)#trnZAA|$o0<pn54-XkW?I8x@dSub$pF*mE1@ad
z4N{LLV5Z+O*yqi|*8}p{_Uaf+@th7_MS6leMOnxk?12;*1Fe7aNVW1LF#4nl<<1w-
zd3-+1_ppFlp~s=l@I5%!g@Nq&P?+FLsnPx0WTD`fYxqgvlw=5G#Hz!MY4<>^rWssE
zZ3oe<+R!^v7sM~Wfj|Q}SR=k1@^+WPQ}wNI+5ZPo?%qzHeNcs`X4@gn$CzKaP!;x+
zKPKlwmcX*UL-3=?6Xt2^!lK^KFzV9-xE51SZ@v{gHtTe8M7SeN`sNRGYZJ<){N#*3
zN1^4U-FWtiBh+^traK<~By|`2iK;PynmkFi@=Q2{MxTIt-Z$Ywxid8U$b#433t{r_
z+2qM*12Ehk25Fvd81b(k5?ZBE`%M_17<mI$TiLTGg3IV*@97xTd<DLB1@QCTOX#O}
z!g~g8!KHiV5My_XSey-oIJsHWH}oHgb36bY-{(VCfC9-=PQdvyV+B~}Hb~lf0wxO#
zj0Uxj<U938wWw{d^tm!Ts`KEjx{krm#owUZ?J_AJcEi210x-_Ll$qZ$f$<agHyZKZ
z$bs6A@Y&@Fu^h-D%T^D=@2{a4a3zy`^bl(KFYjsb4RQQrXaR$RO5iv$mK@nPKtd~>
z0BzUc=i1%yE?OSbq!QtUwj94E#}U-#r;<9kHJ~r-)x_J6gO=}|G<u~B=+}9I>r7wk
z49*SMCC0F<ES@-_GAN9n3i9I*qRY%%VCnRpF+5OCKgzd~wA7o-Qlr_Z1<qWL^m_dF
z^cXGf4FUJGy?ELuA4jg(LhF`|CU19KCo8rOF;jY^@$vIKkSfz61Mk{Mo<{<RUR(w5
z1P;)WCN*}W*f{VPvdDQKL|K&_2NcAsFhyh@@6~&ne7A^Z9E?8F#rI<{u6w88*M3I2
z6lCF6^KpXRX}I<GSP-3?MGk-bORgc|SEvMm&ewk722Ek6-m-;1bHZ2?;rumY<XpH`
zaTultIFr}|!`9FGu9HC14KP{E2R3}jWG89QVQfns*h6<yi2D%<D&y7<Qye|u<+w-?
zZCXJ-hS-DG=c&B&JWuF#{Y2(&d_!h?$78I{SQzExBCxjVNzv~<Zj@XIyizg%@y3-f
zt0n;WEz79ZyjXVm>+japANr}{kR8<DD<c1#bzth7gHUX~7IMlGuriWBuhuG3;IJQl
zdrQDd(PU!v*#}NPea!3{a01Om(_!^BMUXuxkH>Z@uvUW8OLtQ$J=2m}v3TS$VfRyy
zUw%CU#E+}9|EV?7<lwV>x3Mm(8(4~ifo0??Zw!sbL11vcmsEax0#^j*`L<6Q;4{|}
zF2)!^c&ZN(&ueAUX+E4;7XqmxBjNNw9!A_YBi5lZ?DrFPjA^UDy-s+6DWCRP5B2UM
z-cEZVYQH}}(m0UOpP~YX`aG#>R_JUQLw#C#Sc6(ux8bWnEk;?W33D`Dp<2C>+v)q0
z7#bgE4$n;^*URn_>N~1@QQ3HA^7uEn(RdO_uWKivGe)vw#kEPJz~xOHlgK@_evSTX
z^0|Ot(L`nUTavl!MupvcYna|_iPu-zQddh8Zb6_oX;!~MHpN)N_Y5nT`soR$5<fu7
z?+VOT<z^DJr+|}@Jwy`5KSG1(H2zzyD3~}WGnf0!!0VI&%(%3fjMz03W?yxt`-3y6
z_+SW$H_E5;wEJeaP8bJeJ+I*Lj%iG6=PEAZ^AxbT<<7;FO_|M}dQIlP&&HNa!41`)
zMDka@CN1rvU`O6i-Sry$JDbZ?ag+$?zlx>r-A_{awi=WOdQ7e+ogjZaeQ1DQGO4*b
z2^;1;M%$D#L}Hy0_gtrt_z8)z8_8?o@=Xy|xpNI;dSE<_&rZeKg=1k}_zik3Uya#j
ze1o$th$1o%qp7h)BG{VoOhNTVV)Z+W>gH%d)XEKP?!BWB7*GYSgCS7j|A@nUEr|1~
zg!1qf(AgIYFGsPotmY;tnAd=hY!cjDbr%#%g5axP9ux?<jtMa(P+@<BFH)Qh>c7r{
z{>8PBdsCcNW*hOPKdysQ+;-T!fxwG~XuzX$*$3yJf|7|fl!@NNCJ{-J*qRAxn|4CF
zpATw9U+0`o-Gmf@k>#}nz`dsqjKWk{H@ElTFu;Ikeg?k0eH&75A7hH!Bfu++gU-F~
z;4JX14yCS#>Kp&aMZwK6XKo5ap$#8wqy^(5kHXujwIFA63<MxMs45G6oH7OUJHujR
zLj#s9vxU*lgLLUFd*b_|3)@W#vDM)`4jnrS^F>wI<$tF`i|=`yW5a>u?F1N7(}uZI
z8hIt%<?PJ;Cm?`^gL$eOm|a*8O82AT{Q6M#uxB2K=9z+m=WnQ*R|rKDkAkgZIRE;0
zI4s{ML$X@jm`k&s@C|o7;Pke+P`B|2&A9+@Wc(stRnZk*up_|f%Mx<6kma9?zF_+H
z=~H<pNyZ}n7TLdj6$Tdg5={$2#70a7)wyRY*0T%f`>+9~(aH!eh<xVS!k@EdmK|i`
z_IUUzW(Uv5|1DpZkODdT^NFRKD1G$j74f;T0wSi%5cR+6U_E03#KbMf#Qt;ma)%vE
zNxMq%PzQCsSW1;7<4Ebak0h=4h4r#l5zfEUoBcLe#;eKekTW9ZIpqhlpnKIt#^v99
zV8m3(nVuHn42Sv1N9(D6Sr<3C#ReW$-e!)@HRoR~mm+y{oM2$vE5>MZDmnPufgb)=
zMRKm5C8Jjfee-nl3eQ7}Dtw)G;_iHNenoi^pEg>IU1@l_!X+;RDqg;W-rY)2b>|eb
z5|`6?GS;9~ziM{Jh{*DZIuAgkFp-2Rt`K?-dl{p}BZ#$E3pw}jC(IpVK;BMpp&>DO
z_-s=wEb^O49GelR1hQo1t9;fW{v_#L^dH7OizFojBdLwIJ~_Pa2bo#)7)-;LP_BD`
zSij>5^Ccb(j8*W$s4tB7vJu>=a#J#Ivkr}ny~a764Wi2v-;m^ZCHiRDN&3jfo!`3k
z2}zUNL<0lv@-trf(2D`nnG5<KXt}>Q{*V`8Q(G!47LL12!o7DechtLy&rC6V;UZ03
zHYreVsYLYuX36fqsm;9FsUz&=ClZD6?sU4RE_rEmpT_qG5r<P+)YAM5ec74=bCVx1
zVrhM7Fk=gellhO&P54o!C3r&{YDKs`tz-G28-oIus+`L67rDvL8yTz0<K$j~K0TvR
zPS!d)kxsoh=AYI_`u2(ipZ7NvpLpNIF5N-uQa_Vc_7tLxWh3@J@M7yXZzGOnl4M~0
z2-Z4$3V%o{3*FYw1a*3bzBlf!cxv2C#_9XvFVk%Dy1a`pCBKMe;TLi!Vvt_ZK2TxV
zRZ2749+9SxLbf1D84v9&!bh7X&GLNy0&X1=SP3a!yp|tt{ae^Kp7DmKE4=jhtC^xO
zZ$cx{-|G!;1|HE`^EPTcGnI;d8s=@<Ur?#0YDV#JKV^O_C0$M5(dSwkvAB4pLVFt}
zr*wvCa^N{8e0MP~xqAnd_b8@DXRgxkeJANorzXyA@fA92WFD=3kjD9pSVWRElDN0`
zo2gdodoF&{AO2TBA6>F)C0E|EgVMn&{5~RxzEx2sZ64at_g54iSf<mIv$vU_Im@Yp
zsTZm1468H^tLA<#zAEhf3H<2HGUk`fIoNjl8p-(nkC`23$CRnQX7qECN!(T`ruLx}
zWAtAkag1029V?Ldnw?~Nq=y;Q)xi8gebRHn0sN1?Lz#^ta5+$)*_!H3%baXje%p7P
zq}srTgChItk^^ZNf0s8*e1Nkp5A#P}DuH;JG2=C98OJZWL%m!g;B256zkKXOZr`MO
z;<-T%R||8umqQo1@NWWVyKfQw)io1`b}wSp(!+2z*Nk685Uv~wp_}^N;NHxUbR~0y
zt*9G?*Cvl-E92xV&iylF-c)&@aqlqkuqr36b0-l~H907ob&YzQSAgy73u&h7ddBvc
z7UbmyaW%o;$Y8);V(EHTsQXUPvIU)x{r3``H#MJLnRbOH2UySxdS6k-(2x4%dGJrJ
z1;W_J;?Vt6iwiuxpT^z!BJ8V$;^T4$BCBCQ2NJR{Vy7aEx*o(SMKsX0+jx>$(@r~+
zC)3<Jd1SUkA?N?`A&s17g0j^of#5AJ`PeMzQXdJXH4)H0)IpjuH<7=qCsQ-K?cf``
zoXG6F#MEw1CX#WH-0E{}z^99Y5!paPb~lqnyflp5G#>h07!v){hj!SUCD|3yu<EWc
z6Lak*#mbwIp{fWovW}AWzek}#Q8*{N=Yjchp^h(=ArSC~J9B#h?eTfU+4nm@PLLG8
z4b<S{MKxIVppVRO9}j~b_o?w36-F>55;wt9|1#1G#zq%$<6G^?igVJmVzUy=T`-oP
zbN(~AVkYEDUW5>{-ixqiV<fFhqs%v5d%kR0G5P#Tm3*C@MRsl!V;8QrgPRRci0_tT
zkd<)?B&C2kA}@I1Yhr*qpAHwo{lT~|9fX89Jil<C)VO-VeIF?{aqDiPQgaOMz3wOe
z@_E=L^oe>}iiq}$I8YKin6`~Eko7wU4w4?S<X${vyIr7*_WXpR>F<a~^dKo(*GN|h
zoP+Y3K&TlM=4|`_5c4aGA>djWY*tYrX$NYEdAl<CCY-zna{rOrmbzrCbQ*Ys19&XG
zjB~o@!ljYwB>bQ<#73Qgy=)XUy_^AG(<Z{ME9Srq+k)QH1hA8S1R+P~fk{;jD86qX
zbA;z5DokL*6&Jw3v;Yz!ejJvaFCg9Pw&VBcJJ9tr3of-~6K9d5P<>oRxL<`rL&AVv
zJSG!o-VTGl5P{)PkxTaWEQ0md+R2fsM${R{6U&d|!9RE}iP<<06Zsh=PEMcI%bX$P
zzIFMxBhQe_Memu_cPD_!=rY0KDUXJp=V9oz9-iAiMCW8Zq^t5=(e&IKpshMs`q_(q
z@p(ZPo)1OMUz_>sNy#WZWdvS5tAHZkrlHQ9cyw3I2gL`fbfb|xF3geTb1XOE&&R*9
z?zA%=n=G(smS~amRnjOfeiGl)Se)hkkgC5H+(a2CaFntm_OG7G&hyh`mDsnmV}BCL
z1<e7gXNR~O#%<KGt%QEzHlw#NJ5P}fz=?MZdCU-4=X)dIKmYM)tTYHo(#qI8Z8A*}
zdi7b{W3qH80=*<l$;~cn`tyqyuDH64_s?zRvV$sw+9gcLByU7p;!MwdJPn;va(o8+
z1UA^ogJHY_?)-BT9;He0?;jt<`0J)@_OWts&wNLczFXmw@+>T#oCy7kwP|g72;S<p
zqkbxKm?D!+9;`ZqUOlE*=PgcEzK7w`71}sP?=x&)a}wWPiNU6`NWN|y0YNpFap<-L
zzjfRQ%J<*r50)De!<W4ve#ITvF&?lo=s0!SlgH`%w&JR+^B|xHi1E=GF!TDf3S}-C
z3@3ddzC~xqlY$_yG@C*un|vep2Q$F!R{#m>9}7NmN#I<cjQ({(->%0%U{fvRdqTtD
z;gws|d%zbev*zG}a7|YIs~Qw{3Y_caIB>0s2C2emK5ADE$c^=YHl2z539o}}<2)O(
zWUM5}eI1M02AbrcMi?|ZNJHiWfH{|sgNf!G5L5C6%dfA<ni<v9@oPE+n9X48a$XBw
zFCCCmi-yT+bBR%@5~%JKf%@+IMAST!jGJv!Iconx=$st`H%GH@J8cwnsyc(?oU^!p
zgasRXX9;WSaRK}8M8M?IBxcL^`-Hdl!^ag<*>i{V@kH-AbodfRBp>&3t2MXLKMKik
zYhWk%cV8u6CRp<76D3&}>AjG=G*{s4?jQrv1;lpO7ARK8g$bil$e*smu*P-3+Gx4~
zZ~NjbRX_cc*S0l)sXsPgwYWZ=@_9Q(+;xF5PSLPwzc!6~dlmvNAE1iyE!e7BfR#I+
z;PxYhIDPmXde&XV`%_P%toUhsM8>kd7u!&k?MAWlf6!Hb824SzLdP8rtj!y7exs}m
zf8u94K5`e~QzxI~z3#^%dsG1ps*mFZw|dMhyo?9_E5~ga4{)s32E4Q6C^`wx*HI5U
z-03y|E#;NiF}Dr#5*md(z+GDZbO5hU>qVzK?=i1dl5YzwhPGN;wkG>1inSHMm2<K9
z!7Lq?H)Z45)*8IhMbSNR669}vk2co=tXDEm(05K3-o6$MGWi48d#enepRdBD!*_7&
z?Q0kl-2$TToA|O;XH+*j2Z>e@*s(AdI@H?HOrZ_Z0w&U<^X|dC7vov)Np0|B(owkj
ztdO2we+E~4Nx;pT2{?69F2+}i@cJF+@peN2eiX?;6{qu9@aF|?AMr!T*WW?%vl3U+
zebD+~h}@k&fz8Vs&xZH+;K6ifcx7Ho)<pb=7M@~!oL>zS8jwvDeI$7QyS8lekO{tm
z<J{8uO1N)*JxmK7OX`9HIa*!Lxs1NYXit=594AU*dy6A(Uoeh}H%-Un6frz`B@S;4
zico`98r;gF0aWt1OBX3;Lvj2L-1qAo+j6}c6sH+-M-ud~A!Q*N?~z5`M2C;opI*@|
zc(#ki?S)Tg4xwRR63z5%qz)H-;ataETDl;Y-jsDiou1_=MQ34-a}@vgVHW<a(ZGT`
zI?!-~Qp2uBYJN6@dTvvrRqv#5dGly~#p~5rez1gSMn0o1VP_~gu8e*y35>yMp2jUq
zqv1OoFeT?YNAmko5VgZhu^{kRM_`9kDoyt22A^BAS*c@hu*_i=%ov_Wyq&9rGjBCi
zSO$aj2|eMtT|u=zNO1jAo4ASU(L{V}o4^6q=i<KEV^gj-dOh#t#y@VRu3sbgLl5n+
ze)t%(cuhH#I9N{RTdttHJ&uEhUBT=(dpfu}y=UAu>ml6Qe&1RmU4qx$Ad504t^$Wl
zhi&Yh4mu%!NzaFQ@FDFw5kI{dPBgf{zH1)vH0V4T+OieY1`}bHl?t>3DnN?gTVi)X
z1_BdQSeuV~;77v^GXD5mUiHdHvaRtJRKzF%bK?O~vaiI9PktnTQGw5PlfZV#NAfXH
zU|cRW1&6BDWc`^@vrdc_=S|v#-~D(#csMM_E|qE;k!1%hb0cAEkTN6>+rWR4#gOD-
zh!d~*Lf*DR*wr0Ktd=E%`3+U@y%!5t7V5+C>%TxMYNpUz9wu6z^T6kE1gt#xmYS>*
zg9U!M<V(Rb(y%K63Uvf7W%^T;r?+vk?p4g(r-S0cwGGr(k?pI-Vdx}T-fVm{SS{f3
z;_1!&%%sZ@RH4B3GtD&O(^9ZGtpQivWnj&dRETPC0exLD$k~wu4Xb9r{NX6LF);&1
zZ)pdM<V&RH!Xc7;c@@@IwK2;)LU7Ue4JiKJkC=GbgGH|bT~yg)UGOvz3?(A*{<13a
zxcDR)`x?MyT@>CPPRGplP(0<}%8EAUQ^$3Ou_w}kS|#14A-0L^s6Y7vV?+g~>+PgL
z=EbzfJRX0Wgz+ZAS#9=Lz^R`O@>>>(q4muT{3xS!m_Fq#vqpUsJ8o$iRn|(P(<kN9
zCzi3ereB<W8tF$(k49p-!#`n{F98~K4`O2Pcd8~f1SZGrx%{iCcyGBaI{uMIzoH!4
zKKT%dlH7$}+l^2n5lOe#QQYlc1?RJjFhs-|rKXKT(Y;$yDsn1nFMUh5%RZ!zxeM^U
zM?8L<nS=`FA=KBz5KV%eam=0=bX%McL-Q*6*roMwV~-Pz-g*M{Jq`GjGq<tjogN#U
z-3%sbHP9$FfjzuLjY<_O@tg1cgp8Lxv|+XvS~2msXO$xh3sQ;b#1u?4_(#W$*Tk?Z
z1+-r!jy`YEW3Oq&;Ptd%d=x5OIV)f}Z>u>DdY1?pw>w*~EL4Lv8E)Z<3lFhvn$7f)
zq$)o!c!m-GS4I5{?VwwCE_^#6)RVKUVCA?zxUr&y=<BV4rNO7jx5v{U-6NDe(K8JS
zFUxYj+?-&jq?%bHn@vo*f1|yzDz`}227-6-@FOq=+^=2XP0c7%V<eoxvkr5=I^U5C
z&SM47xGAW;9|hr_yRF}0B{Tk*61=S&&lWr=<h4^5L94PnKxq)8HAfL|f+~0)Sq7Oc
zJ!EC&FjL|$h4)<zkiV)7X=MUy{oOq1D{sYb(%Zqo_8=TEO=so~dh^ERpGjQfJ8szf
zKI6DK2EImJVV$+5VE^L(z_)TaVeW~;MB%(+8)_!Z3Tjb#@jtwoECP2$PLT2o?!0}c
zz}u`E2~m?fczZ=ry4hNVFP~J#?>#@7A46*^>i0*&nAi76$JjE)Iw%ILbiOlXw*RB(
zyyLn0zc@}-HmQu#LR6x%x$kpTl*)?8O3~1iv`~>vR+*7XkrEnY#C@Nuh0q{MODTQZ
zlm_kJ{r$@yp9i1M=Y8MjoY#3iZ<6dl71B`S4G|$;r1y3JjDJVario*=$nOO8D&vL}
zLqp!yVJ`bzte!pocs5in=|kTeUF5imwlFJ=1=X#Ayxg8=bS}ihu5CByv(w2aIU@+a
zuA7KgFX^+JNFnOo@kdneqIX*iaglxovv%4JZ1`M^zivIFp$}eB`YIADtu&xhCL0w!
zXCf_W<F~(^iv9zg0(a6+xC<9^$69S!Ev*7<T^5ZscYTnXatb%UXa+^!R;tk*ikB<J
z+55|s`MrBZuuwS*XLi&O^ZC`xRtpRKQg@CX>5ar$O_}uEu7}`$Y(GOzXQ7&D3%PD_
z6uU)JAg<F6HKdgBJ$qm9t@eV0Qw)8(J&8tChvOcJ6ZrUv6FaKz6qc?C!$}p9s3mQR
zJN77}wuvE$DSJpe8;rpHK@x1$b3?CT7yjwiYp{H^D}V5h5o7LohxCa7f5>w#SskCl
z*9*PAq+i!*w{;e6dSilJLC&;6$fn5*72~=+x!7O-k1kad7_?V}yZxGHB<I~}{E^j2
zH=JF~4jFubd(wBAIGtta{C5w|ADqul5~(B$-tMA*UACjHsym<V`;_kV%|^F_+Wf*N
z$7sr(n{?C2(_GQ^i$q(wlpYwZOZ`5T(iIaT$Y-XS>R$UnyARdUxQ6#+;8r1>cKtQC
z<L3naYsx*Vib|wQ?%DI#)YPEe|0pkA)J@)risI_+m(evLlBs&%O#i#CNt8Qwkp$0o
zOl`tya&q4hl5lTS!!!At5OXY<zA$~uUC-J}Z9a981zG{zI>m*|@|lzAbxH-vlOGxX
zawOl_6GLu}w4_O2n@QKHbh`W71ZtFSFFd>7kYNv=DAIIdUDZrf!+ubM!<l5@TM=J6
z{F9t`eTS9=ZlgvE8@XjFr^xR0ifHn_0{88jOx<DvA#I;I)lGYk8-68{W8u~KbKGBQ
zdhG!9G{~cAyM##Ik+GmrFb#UIHql+3g+#YrgOfVy!yb!!0)>&E=+lFJ<d1SFDasea
znH$PTt`3h14&B@X_6B{p)ew>_PjZqeBf04X2K=^(BVl}7HlO`*8|jJC=NBnVC&l~y
zN$$zfT<IrG;<@-my?ePbMz(6uXD7@t;(8;h=WEiwN&}SMq)LXCeXqyN2x43HkR~15
z&IB(vz-dC}UvmX;pB~?YyKm*G7avQd?yHftqgP_!`=wa%z#7YobTL-PhCEVqVVnj(
zaj!&N>AoQoT)TEMn2fPSxt%+#Y!8VyJlOXIQYZJ*l#AzR)|6DZuv`SUs|H}vld1Lo
zdBTkU^%GhlItHedX_B8?r{jj6YjD@dkcK>KA^2w&7HX}*#|yUMimfS_*r~;p`6bi8
zPgbLOLm~Y#w*`l-duc#SEMw(63McnWz`|cvG_&9}?6RxHubWrHA)g9ZlXIG`{XCLB
zTeJ@o$BD4U^Ie$(FDl^jjzwhbpd(R^dIg!Ug`Kh10`AbAOY}{&IR?l0<BS(xFeCRK
zm0Ug#t(U%Jp4;Tn6T#|aldl`NKli7TcKu?+3gfZnWCwY3DUM{T#?VnE>n+dTm4UF+
zcI3;I3y|zH2K5U%!BpU)uWI;3yPBWkcIRx^<|PMzHFCkq_#$R$Btn6=@Z6sg4Qu{8
zL$r<isq36=pi<!o)7q5bg!XaB+cFPyDs6eC(>@U7s|ISasaWph0YRDz*n`;?kQ1ap
z{@fqHF;9zNOy_lh^D+)#p%`30JcqnjTn!!rN65hiXZZN9b`ZOF9&8)A2m<=_G33I0
z@=SPbKKLkH$*3SAjtTJSTP<yOSP7*fZ%LlsN7|Y(0&Ms{+!RS;7#>mtn{WQ`sH~fx
zv2GJw?-m!_DKU_B@f?)=7TA|z49J+fz=nPgsJ^ZaL+R7NV~H~=<0|$;P8D@p8$+!+
zcY=E!@G3T=*@?4f@Rm<wSUICZoRU=t`nL2_kq$j}|B*3t(TgD>t$dzzF&9YN(sWjQ
zh7mO9m_hMi4rpi}g_q*S@Z`m5c(3#swKFHfv|mYZH_u$)n#=R|W6f!HNhoVla~O0l
z35pc$KKelK2s_5f5B7*$;(cE|Cj;|z;k<=A=qaBQcBXPjq}U{!`7#OZzt(fFw;du1
z4(sUbPd}+yL84XD`fSQtvpDHY4^7yRXEk}jC|oQuf-RAmf~{}bxuzqJ;dQ_)I^~`Y
z`|;U9GFQKjO;ncv<4r1<oo`CR<(p`1VzcF+ID6RiJ&xQh2qI%E4#A8eNj~*bJ4M-X
z*nATp@2?m=o^^<Z-7+D~7CWex{Y(1vnK?T`ZUMLm{e*4a0q{yeg7l7$#?q{F<aK2h
zX=qo(xQnS^S~-P8E_qJkl|RwD(SIo_=&;4Y9nG-g2bFnuj#j-hL2_sThKxxeS)&K>
zO}hlne<Vllu56~7J%}14S(xQI@F_Ee9pLw_@O9M{fwQ|2R!@D%rB>}H)5qzNL8TAO
z8YL5Y@F(y`0!slbx6+UGpJ}{0gHMwssq1?SoIl5byRtl(X<w^}c`{lw_U}bHCg?Mq
zRk48|uWUKD;5#^qNhX)oB-pqzGpm9SE%3j3pNyQFNGn`7utRM!%urgpz`01qw1q0T
zICnaJJmiLkQv|N-W{vt2MN3)b5r<H1&O&rLVvC+$k(d|wgzHng4(+dEX?493ExDFK
zuP=xC2gaS?Cou&LLu>I@U<|+CO^ogzeb&lvNe|7Lahoo`T8~A$BXFt3H+tExmH%TF
zAP_qC<CrUeSHcF!toAa}XC`D`R$iyQ^?K-N9D^sH&SEuPz2N=<MeItC!ZX$u7$<4~
zAC*?&ucb%16?Uz3nD)Y8z+&ulm4%)JDNGaHjk==qVZ)PfJV58rz@{R4Bs-qU#RcK>
z?~gFw;x%3kw!n{@W8ips9NNy-U<b0?*^PHa*{E-Sz#&tGXFrCqMZ&#FC;A*bTJR5d
ztck+ddv3Hm<ss#y31mBrXCp*^*8iQTMVC#tps_yT7_?zN71^N5)=crYyf;M+`(=CK
z+|nG@?QSg;ewP7@C4=?jM*o0WDM|F6`9V@Y`v_e;eFl5?8gf(Lp5-=u(u8k2P3w))
z<7jGqxz)eve=uSB5h|Jeo4N_EKilNi`u5u@4L4FvNv7j-8uy@`PXE+GI?X@PPkOJc
zqJ_TszjvBwn|%S6DW`I|mX+XZ`;GCpLq6%11E=&rR(O`UQ!2BM2^yot?Zxr<=i(tM
zGW3bcO{igBmz9HCRz7h!EX5337J<l|3w3)kx|v`jSGxX)HfakgprTT(80oWybv}NJ
zir$UH)z7QQ<0~#WSRjL1A?n<;9dX>SzAF3p-DfK2E{*Z>hWO8Y4CPjeG`PNfPxfZ^
z(t&HG+@sqJRqeY@E<D^wdc017X}2pc)e%W@gzu`!(^A@N{S1|xh2CB7Z9F4x0ozI)
z+5al4IhDBy;OFTCw<1Q8`G+L1@>4#MR67F}cPs^8b!EMEzBzuB^J84D1(R|6jcLuu
zw?K}_GHxpfZmb^P&{8p$T{!*#*_S%R7fRiu>vnGhxmg0M@W>4Qu6#B<_4WvL{?o|F
zEK|hVqDqoCbE}p5c!6WCs0mSVZv2H6dStt2nZS8mLtp5=Cfi;<0+ktN<n7U;G+Ap4
z5mU6Y@a}J+-3LFD9lzV@vHb}&Qo@S2R*#@Z8{_z%q*7Y4!5cT!S)gM3dAdjH28mqV
zMi%T5-p{H+{(i|b#$)eo^3ArE{E|Kap{M1@C~D0yWAv~f{RDGv^Lr|8af-$Wv%a2_
zYe~+38ob<7OZMcoRrIQRG`+0mNd9%J@IKRvxeVz#`karZ<mUye`}|olY<z>pG_I^4
z`LLV@{2IeMooyxgYHO+e>IiCZ^(|+ST1v&<3cDf>er$`<2tK*|8`m0jl0^K~WKF`&
z`J+k;(J*y4t8~v7+TAy>=Do*p-;ZSUQ&~nnA2cBue($X|%LUMhjqz;85nylsEw4BE
zCkKto)2VIkZ&FrK#eCD4z^;FIoqk-JMC%71^F}#g@cl#ybSOT+%EW#v7m~`qGi)Tw
z6P~cSD+cMo78yQ-Tw#1qEu}|{7%Z(CkGK2&()@uW?Dd<^%C1VKe%6VYXHh{PXMCrI
z&2|kBm$3Nf<tRF67>DB7W@xoZ;ES8-G?c8p#l1-P!_!h(5b~~twszX^yW~6R`oYZ&
z8B52q(gzH1Zl^6BT&Ia<{Rc5fVKhJMswgg~5qf#vhOFVmy+owxFa=jVw0N38?h09c
z&D=2BvPI|xnETLyH<9RQW&s~=&2Fe&_LX)PS@7{io3XNR4R2TFf}d7Drx8M5OMcXO
zzR^+@1NjE>=944JX?S2nPza7X{+*`fgrL5OA%5%;{-(?^+-LjWoW+X<7RJ7W^4=&!
zdq3P1y`9~D4e_i+I&Zzal~2sg!#{<-=yn{Squ8v$Fg%?|NAAb_hm$e1^)&bDVlDst
zR}t&=MI3+SeV}^{EU5eseT*=gjH%JZ?5M(a>iX_G4f?zlF7?e|MK2pSz&PWE3r^?h
z3p8MZHkL55{X6-M`_6Ii4PwB~t&(|KaEEq>+wcxo6UYLoT&5w)h5T{&%#TqnXQVes
zfwR*l_<C_Zjg`2JUiX&5!Ox2sORXVpS(6IW|L+dBX7X%a>+oV?@>UnVZ5quR?A}h)
zbB?jeXIe-|$Sr>Ks?)q;Ni?~YcAdB`*hjcoV(=kfp1pQc4n*g+kpA8?c!d#R?_Kc)
z_iasN^|TYvXnKM;Ojt=WW_=;g3**UgftM4--r=+F>Na#&E0H~O?O3H~O(?r=z{|UD
z<K8ZBBATy9gWeTue*UBkl5aPcRG7@-dgjT)M`u&)vq)i3cRM*VgAz~w29n|F4PKd>
z*kF-h)?;lft3GKd`>kynS)V_j*rh!O-{m74V$HnS__|?YNKN=-20DCGh8&w^hmf)<
zm-uJ0%*t*vGVsudulweT`TL`|y)Sggy@wK{v~2=i*gOd~opyn+bJJn&?nlI`PoBQF
z`NsF2nPaW-REmG*xftX}`9V@zDlO}=gWI};%mknF{3FRXbjH)~<kue?*ta=Ta6#>$
z3(Kp>?&d<CO%$hYN^fYRzXBfJ8bwSy9?;{A0cmn$Xrje!=GKN|^}b57*taMev*Zs^
z)0kN_G<Y$1{8~>W6zA}#uR74G+_6|Lo!$_l5l*C*R*|OWv2=(&B#mCanBH)N$~@al
z=4ED5`<H$AI#dR_iie4lodI098O8VvZ6coSe7(ndH>$ePj}C~QfQNB4FuWw+YWV0x
zP&S%Cr2N0~vJQfS?zaqHl2U=o@4hmkPY==OmLJH>2{VbIUkCA<!}6BP^JwYUCXUwU
zFdIc*T6xC)AyrNhye>6k+52kzj1>pKURxUGS2r`+m8+;)m!dF-b)Yj33w3kII0%1n
zi7GjTL376eIAhvG&DWXG#_g^ocfK*6ltKQIT|PC|b|>rB)G^aF2WTyGmT@*`VA+&3
z^6}n1;x}?O{HWAtFV5M8PQuf3v%zid$)X*sw!#QJG5;0nOTVXy2b>!urhlTzAMTS$
zhod0ncmgi>T!j9wTIsqwq7Ct3&D8En67Cf8_rtA@_%+L%1-}%``D2dLH4kI3+ER3{
z&%uK`g6LHDDEfB90h;k8iC0oyh@bB-hB=~uH}}XBw}}Vg?YmW2Ri}-nPTTQ)&nLQ8
z<TpfGnBiT@(i<li@E5ndrj6eBX_TfB<}qq8wd@N0IXekw@^ZNC?+@zO7>-$!65w^I
z4V&fQjJg#|;n1TTbclR}fs>ZuFAYll`ke68t=W8EYAt=zaFKjhlg8#x_XQ_Y9q6xf
z!k15VaB09&lsjETRf^mM7Md2^DLlsqIwnxlr`H93%pCk?=R(}&ucPOgNNz#u7wleX
z59hrl_~PObyxq5P?8rYJQ1fXMvdPDA)}sW}@fDcbTn&Z|D+n`yskmwGU8*5Jg<9GE
zp~stdBfJVlmwk_DmUaaW-C4j_-g5)(;XYV1Z2|7@k7WB>)|1hy$DqUiFKtlG#pgZ`
zIOQ3c_`M>Ve=hjA_pR{5^-8YTq|gcM^Obapt^~#>E8vEDZTxN8gVIYfaQc@*yjh-y
z@&W_xP1p$R@J?cW$4^C3@pPKx&BEmKQYgFR8I3dvM%nryZ0>7^$mp9i_~LP_J=jgn
zlHSn1D@SSR>t&d=ND3u?`r))c)6l*7Fh;FS!vC(>!t?$1)PB7Io{)FKvBR=xZAmHI
z>%uwPHWSNrVt8v>o8SPo!Cmcsth9<d)?Ew0BgQ${+3C%H3wc2gn|~#HPMxEjp_X{|
z(R{MaEFO#I8zG<;20HD=rM4NE8+Z?o{|)26X_dpc>B7A5c^A#Tn~vEPQ`lV*7Wh_A
znpOQ8#aL^+K-Y=C@!<L~d_q`3{Sxh|*wI^zHQ)7dvCk9q)p|(pPwAjyh8g%`r!&T1
z_QvX8sYtL4Nkuc&PIAWt6-j=J%qNoKsDa@^&pmglz#@9Nn74SFO3PJ5*k+fFg8!ov
zt5zpcZq6a}jIPAeDH*u&Z3`|uCj6hY&hovB4`F<?G!|P5E`#@pSTs+SpTQo*#_VA#
z<FE!#3cifXBm*}+lfz!qy?9}ZEPm^}z|Z9~@R6x4j#1La4~aY%=v9HM>{sHvvK#nk
z_y7)?p2WQg&HU!2*{I(XhIZlh>{*9n{1~S?Ot;X*QB$sgqkI-A$vcAXGf&~*>nZ43
zlZwf|&hwKUEs6HV5vZL10p{L!Mv=|u;MFI@4M(!@$UH6lceIHAH)awhdp{);BV+N*
zfqIlle*;rnQn0XK4_=y6fDyr~Fg9NsCw1zwS|1Xqk?&Dh_3jz?EG@_WNy<>Y=Lskz
zdO<~jkhK^RVZ-aR8hrl<9CiCDen#hKNa&2kIpb=O-JFP>&R_Y^<v$_QpTX4JMYzhz
z4bO_#;q^yHaZHO6&Ir&$v+A9=K1PGRw%r|5HkKmQP~cnMjKu>XR=m$v2^5>PfW2mW
z2?O_g(z~B~nO5Hnd^()TwnW>5RU!itmYQJsqY1>sRbZ#HHP~FaLgM4||L;d-n1zKz
z)JGY{Z+7Q8r53|FYY|9Xx|0s%tHF%m2TV<@6YTW2B+FYOSOw{F;XEA%pOu^OZKy8k
z9K#Xm%$e{p&lxt;{cv~6L!#TBPVSYNG`MNBQL}4X;LedLkhy3-z4YJ*>3-@Af95X-
zw}?TZFDV8SFYTvs%WWDQZZ8q`sMau-eHVjeiae}c=?_&qZj=6d<Kfcse&~<1tJ9m*
zPe%TYBXl3f>6NOo24oT(P--VyKJA1Jk|u9Pu7=dBli=8@X^_DE5b~jusJ)sx$jp3>
zM-!BIQ|mVJ?UW1~I<|_2ZEAq%*ZP<kV8QmaiPGV%p77CW8W@LKgS%BDby)d<i*sT?
zT}_jE`f9`bQQ>4p`C7=8@};HQ|B{b4wW0d`XRt0fL_Opb!Kc6mZrv^tW&%<CtwrBS
z&BbG&x$-ol?=u?*9&=>7ZZ`Q{b{IBIoP;NWUehi8C+PR5_Ne(qhx6Pw8kH2sQ)!EN
z7`|){nJeszyjV60Gydx1pD$1A50tOKPxnRHgBB6==_5xb==TNQ+S>!?e|N#PT5q^N
z{;%1`J}1FfV;a4mq--_e$0E8^7MWkcMX>mZGA902V%BJHBpMHsQGVfZ#?B=Q!_F@w
zmuh?|*<QuuoYug-J}T&SD2ZG0vH(A7yYYpxHH=v8akzWEm#(a6r$G{{arruR{GH#-
z$YcpzmaQ|fcUPOB!tqD-0tGx$@f_an9*2w8%cJ-RBfQ?GLysx%#4v-yLcXC91H9gm
z+P79h_H>vz<C_c*wiu8%`rpCp>0w+GdJ<NzUCB=n=4D^_hs;=6Z#X;dKDAnXi80f3
z#fz!6u+(EBKABP~)Ls@;H)<U<)(pT~-gbClQ42Pm8-){JZl^(YZ-u_5G4A~Kl#I<2
z9ONgr;6&|U!51Wuv_OSd4%VT|G-X)kxHOof0_XB>Gba2@g2PgZpqw!i(j4ZXO@lb}
z7@j4!Xcm!aJOXvU{5jQ~esH<<47vZUft=|rhIbG2KyriNM}3$9_d`WN%TF3#mBhnK
zivXhk`y^}<2by>EFZb)gTQajZ7F;5Vpj2S&#APM`7j>K@%(8&uyEb5}eGpVvS-{Tu
zC7`qE19cSkwM!pPg~;v6kiA<3_G~RD6F#NDQ+qkMzWx)DmR*5qH=|)ZnBdHEJ8&9&
zO+Iw2hs2wM#PZl$2;JC4KeWX|V`C9asB_@YOxJ?CYhOwC<5<|N<qDIZC4#GJDRZ<|
zjomS&j=AQSkDdRd;laOV2+uCY`!^!3R_W>CD$hDByBWgTW&6R+7l|0Uqy<AW#NlG+
zH_|qA1TG5CojI+Q^q;`vKe6gLvDz>dWMc$Y*nCO2+QLDvdN}Mm?hU_teRy$Ma~54k
zv+iZ#Fe^R{M#fy?V-~nl^9SeA@$yNsTS=MMj*^2&r(#G+T*=5^vS8K^&wz`m6Y1M*
zJ@#=|7G2z1PP*1zp?0_Q=mWUKasK7pWA}A*!saZxw&xmUE);XwQr!@iS4(etnJ_Vy
z;<!)X<d_J12ce2Ti0}J-{MYFHbiAEC*=v+S^^3J(cJV1<{x*?n%G*=Zwhppec+-lT
zcxv$9TzdQkB`TvbY1I2eB-i8_bMuit)c@+KFYABEG=v;tFITGbskTSRq|52>>Q^Y~
zKPh;d+)mPE|BjKiL&u5R)(moPrxO`3F%r6mw79No84NY~Ogn|XTIJb;wBolOsXV%c
zj%qf5M1N5*Kbis8ylNqAzX3e|lt4P3TVmU{6sj6{i99KIL3iwVODsQo;^h)acGXN>
zy!IxWUf9o)m(c~ZVvz)+xFi~re`L^(v+|&rs|u6eXu+ZW82Y=YkVt=GaQuea`o*yl
zkh`doR#%9j@x&#>B;Ydtesu$NT(ccLjP4WPeVsV6>;YYHbS)%JzfDI?siwA()wIEX
zH4|JynW@iSlPmG(;Z4^#;A-{Y?AjO*`4Iy<tQWvUEsiV{GEzl5g`dSFL;L)3aM((p
zuaoH{&S54n@2U<Pr!<nNdwD>JX(72%IEOD0y-lL~1Q)oF56E>dBZYrgk;L?u)VS@p
z#nF~p^7W4vK2O{M!~XqbMCMoGIBy{Y-S=gl?9l<UtlOm6SsRp&Eg+E>axl@RjeJfq
zgsY|l<mTR1G9~B%^Xbf5(7u*S?%TP-{1f{i?94c_cqK<yE*>CJh6`Zn_B66}FqV9;
zB_JVWpm#3mVkT*Cf|b&Vbl(Y2P<=jvBnf=^uk(W-RsAm3C+60dHHM;*<tR82?F_{?
zB>D5klgV1Ycr0)Ik4W0>gTmX+jNiG%Fj=_!epJwfo;xC>bNnL6b6Ew8OIE<#j(TRx
z*XN+QcLZ2(hyhHsrY^yD=&fgtqw~XY;sFP^qw|~V`C3Ype9YlcnIC*z+5?y3dTF(1
zD<e}W%3pODL&8g4q2Nnq{pxL(P_jE5+%FgtGYcuAlwC`9_soKnf<cn@&jUUMdO*~|
zolrM%BfQ&`O?>Z;0ORWM5T7N~=o-SzU0eds-;0El8cF`;@D<{5ARfk_Rsi##o<erz
z8rk*1j1~RcLQ-t=$z?{0cX$>Bf%86){$n9zTDU*mUsGM*(&PlYUwkDdVlp76{+|3B
z>w-HDO0l~xS^?wW2rBy@k%JAIxG(hwGclovq`JgHfp`Wc-0vY%9~>s3H!6sKpa}H)
z$fK6OGMsO)0@XNY`1V#Dv_JKd!;WsWLde+U?h)mM?Pc;+QusW0<&g$>C}fb!LBhR;
z&bcB3*wI1McdM}lQ>GF%xeJ`Z%@U^S&PwLYTpMzzYzo@`UIybe=fQ}KL=w1Il?;qI
zN0OG#h5HkJkez9&;BxQ*`KXx1$18u~wpR4O^r~Xy(uPQSOekwNW(#*N+>1w#t*~cp
z1C`m}L)R}!V!|K)M0R%~OniERgf9<<vQNkHrQHZH7%%{-+o#CpSO<9a=>_z(gu!#O
z8zk?P3k3e>0aMpW5jn5#a6T~-M%XTZ>f7Gz5&bM8l55Gou8x7}(|7X&&6nZSY(uzI
z8v$)=juRc@A<}o#gWp(~Nh*H!lllLBhoSZonDr@ud~lgTBulF;qaw;Ex8XCH|2PM{
zZQn7mL+S9Zct88EB?qe)nL^T=gI0I^Qoy9)FS9Z}5<U(Zk?335M7gOJMLiP92+cy`
zzSxf>zBmeCGM?`CK2N5H9)jSYH{^)@6SBQf8|K;O5=m(pepibQdu)op%jmX+I~7j2
zcU3I6@|pu$9?!4;xqUNU5c>5_1u76?HHoZ$^PR3<=>lW+j3s~HJ|~xT-O;orkv-LY
zl^n7>3_)|U;hm2TIao6XygM`D{@zIN{1pvb+*%-f*qZe_Da#nT+i*8CjfH-VIXh{@
zWv*XKsi9tbFQk1>1p5?eaGrkvevEPC<DFI6Z=-csyPX5j^-sfkg#Kt&EyIM(jh@VY
zQ&weP`p@S5#7DDl=4i3!tkv0PZhGvdWCixg!eBntLX2I*j%CM6O|-r}Rg3MFO2D>#
zBiWd_{*1m|n80NJ4lB(}*y2cYHmpa7ow+=P4{^|Ecc<vHiPF04zy5>Jw9<&3s3*%-
zh5g22Gg-V9H<oSwqR*<A$g;K9wAh>lD!kptW?pjZG}h+8S+sv_!S=44j!p@(tmAnz
zHc`iv9lK;KH0p}5*7wv&`9v$W&&QJWe$c~vOgCl+X3k;9y&l0H26J}m*3a-E>ojWY
zm222NXD*v}?J3^9YR*Pqac3Q#%QhG{edI&O&Y?#W22o=BRrdYOGthdem0xHgaLv>v
zvLDCIVO{P`VwDFE@Kg7SuxriL*-r8n^arP~wHi9?)p~ih?e9H^{O==#{in@}=5ONn
zxT#q`UcQ*u$~Z}*{Uq3}&+kyr`6Ix8Fb9KEXR%c<g5SR~k<nAJX6-je@)x%cli0#E
zGDE+d^xYZ>_{R%uZ2pk<16wKCWe&H+^KsGkjr3#I9U?R91kv!c;z}ZZ6N#^Xa0jOd
zBzOZkW|F|h_H1V+lYi7~(;!*&q?G@w{RH;}Mw6m9Gs%PECnV?JQL99;PZ(hQlZ?C?
zPfYYIP?Ar?)CrwL<jXY@w#JDx3q8%Xg;T-l-+s>QxEKsQ6o>b+sW7al$#;G{!A)Af
zisaR?V4TSE(HU~IH&B}i7&VGfRExqx5ytSmF_WZjW{6nBGp;1*GxD1jz>ybE$ghPj
z>+GV{VA`B6;+huBZS(zyTkBTv0Xsi)C+Si+J>nYEIa`u{{!bR=%=+N;f6DaRju0%^
zT8n`;sgTy)W;tshnN%!oBpVAN$lk&C^$qXpAbnUKJXYC5>uLv>5g15}#t3)CEFL;q
zMnSHDI&tz6u#Lf^`R>(IS?>kjv}f%~V&459s?{zOG6<Hix_F4ABUIU-pU%vt^kqbS
zK{imM%iQSOmuX3vGkHAq6)al6lCaI!=-h^dRC-M{J-uC?HTxoXq=|&3>^3p1_!vnY
zEzC&a9}lua*AcJxZo~%;bD_JXMCh$K(M3~d<D*gDyxF#skYw_NEBic)Tl!g(Xl85U
z?9_ZA|GS??+Rnj6Ry%2|{waJCw44lhjY9d9VRCF$GkNT$N>8`_rEe94^U6wrevQaO
z|B68n)l@?x4>K5&bjF)PHX&!-82qnM0*!s<<HqvKAQC$o51QQJRytgz3BvAl6r)UE
z1c=b#x^0#pl64S8)-lO@Iquz;9B56I;ce!d;ta9<;8vYUou^rY+wz%wz=qFg>MILg
z6-A(!ID$Vvw2jM5xzGHqm<xyZo#9TOZ4+vcH&*GDXQ20~0Uw?GfSJ&Dlh*scV*JIB
zZZdyO7p{=TW8FdY++G><4S9lcyZg|T?P6x#TLlWXH|PqT$^4{dG3L~jF(@$R@!{HA
z7|ajQyoaM`(5=O^;K(F2-cv&7j8Mf3LXBS+dcex5Xg&_g*D=Y4-pJV7f?>xA+C89-
zqpbi>G>*m-qyNw`>Ej`-I)hG9GZehG8R(MS#Fcm_<E(!&C_$w7qfN)_lzeV7ZY2kq
zSj$T^B)XF>*1ClClh<Hmk2L<fl8uXWHe+9G5w)sP!B-LIN$2y^oZ4QYPGA0k@!Qr&
zKdjuwe8?AtpQ9ytr1LN~I1c{gzM%)ZkD`6K83sO0qX{e5G4gxD_?+39w4t?(jvJmz
z76t95^P+_F#3YF_R%fYXsym*GjG=GF%*E4{&4N`%nw5U?g7$em0b4g8TJuka8>N^E
zgZ4gbiQgSMWl0{C^Ol%&MIG*I9jCWDTw#IL2=tYd$A>zLF+2Pl`1=&0xZ75mlI)4j
zj3f6a>^8IA^CunR&moR3rMd3!=$4`$@@#e=RtvqIuNIToG_h8?$uJ2PwA?0Q@E4zC
zzNHJkw!(UTJ+VG84R43F!mh7bAeq?+4f*M?*fb7e_vgUme^qd(@*YgAu7Z;~i+Iz5
z&EVW{6HLe5gf{jcdH=!$Oizki8*SbKUSd7acT|$x7=8`gKpzyY)I-d@Ag<9dlYMnB
z0zL>F)^`TFa9Z^wT-+>-@_G}YU0c{ex1PwJ-7|uhK2rv&-FHCD<^ry_Z6>3OuEG|V
zad1Oh70%9`2clI!_-^$b{KLa%KzXeku1QY^-Sgcr{q6zyx8f1_M@)ekvWcwG*H@r3
zB2(bW6~ml&O%UFbhNt5zh1Wg<Spw6c#rXj28Bq(Z>l;8xei}QF)DI_0m3d6C122uW
zY|L?YJZ2;f*PiOa<a%qSaMFHWOa2R=+t><zk=10Cb0diSHUg#cT8J^<!n#;D^FiPX
z7K}6WD%HZ->J_kT5d&Hm3*qx%A*73{fT2t{zwT8$zft!DDl|<e8{7oO#V%>S`}}{@
z<!uvjo$Li$O3(8<`hJkI>-NwU{t^_t_S36-*5k{@HMF8%mUby9W4G=>y8rzep$DGJ
z1dA9_@nAhX)iMr~`a`Lr;(dDL$4zeYm)X#ImSB>s3_f<x!Q{$ySbuy3kC_WGG*gcm
zoT$nAhpI5Lxk+69F=gx$&Y6u*<XC^hL>k~U9>e34(0lkF;ilJfp^uet^#lo_E<Z$f
zWawi>o<FDSpG<j;x7^|>%W!bqIKITc+R9Sq2a#{@r<2tN;frMvUAIFWOS@VayWro%
z;Q2UA>FcK+&V7^)+@M9-9he;;$4#4I%sE9*#G+6K`h0%`B?J4pgWAdX{KEryoEHbz
zH?HBj>IiS}W(t=7R%e5<n_*RGI+*`RXI{4H;%uWEWar>)xW3yS*CyEEy&Dto(ilk~
zk+;F+zs;yVeJ8DW@QhwOsE<+`X3?IJm*MrH0E7u&xtf@<Y)H;L9BK8Di0-PPE<NrT
z)m{zZ3lEbII}V~>uQ5&SbA$cL4pt$WQuNHtv8?#U7j(#AHXU*j&h<a1_!&J*v1E4~
zR~fa9Ua^>fe-5=lZ&Wl+iG4{uiv@1?lKnXFc><1n&<ftM>iB6z5Uvb7!td6YfR~+j
z@CRada1)ILMxNO;$dA`T-9@YM8ai@4%xl`JbB2z&$D)GCTFlGK#}yUcywYtooRa2_
zGMV~d5B~Jj1#{HS3C8U%x2g0YWo+{h?iUljFzM=xvH1D~ViA`{|C~Pp<D5t0SoKlV
zV!aL;l_!&h=bPxVr!2QZ;|6v6wVx&rCBZ|zG-`j2!HrD|aiBYm+g4aa{bJ^#xY}Ou
za(M*ImJV|9_AWecxQ#z>SAk!aD&$&2qS&RvK6XgpN6chJSnui{%;8O7<Dy==!zl)R
zHr}P5*6FYb$xDf&*8yt$(;EMKa+EgjA{e+*fg3xS$D{Y=;+v{)JhWW!cv@!Dtrnu}
z<&<E2(^g2oYO2GT*p1wg)#Le1iqlX-V9<;>Qch|-CgHNOpXd;320mhWW{C&QNLax~
zK7Rcm*>&(VR~B^FYFk_!cVHP04^`s`sdR)zBlp3b`gM@`JJ0I05bU~eRv8W#)RC-A
zb>8$+7a6)B!yZW44CX`US(P6pFbCH`Ok*~2?U+IA6>UL0r4LqTI1p{c`CONaGiWTb
zr((}7n327+;IHRY*jv4v>ZEp);x+<C4Qbr=t*6PiZ}ts$mBWb5`0X&9+0Fdi<_=LG
ztI>GlEhhJ05;vL=gIkN;`H4R&n1(K6&TijWrrv*cedz0SlJ;>j{JH8V@NQCwvL~_3
zT(Y`;Q&0y@yQG2>)|6w3oj0_oDZ-7ZJ_Ia2!n8Z}@cz~*l6bHXF8bEc-~AE|?|hPI
z-Doj*;h+JB2IYih#dBoeniOyzJqm^;CxK1!FbQH6$-qlv=u~|vxSZOE&R7>F_LC#Y
z417SBm}wL3{P7_5Splw(K1Qr}9%HZV67sVDdP&NXOw#dTKXm^!A!qz6$)4@feB!AN
z;(OSGv=lw3XMzRC=#0lC|Lk`%KC+0S^F?~0RFr+c@lE~H$6fTOxgyy$+mf9B9!ng>
zF4DF`soZQ?72;+Sz>e%tC;3zQs8OyqCt51QUVY($GfW%E+511~+=z18CHaVqtD`ti
z;S|9}b$Xw*$1{=p$TZ1E%+iV+k`WmS<_f*slHa??mVG%^0r|G{LP;WhuXmP@^wEay
z${S=`%QCpy_k$KG+QX05cVw~u<a(3NEA-MC5!^H9vz1*(E1jk>MBmL&gf(P6oIP`r
zY_PN>)ulpSS-zKierQZ?%ol;Y?49^LE`?;qj)p6Q(u4p>JTFoNyB_)y-^u^cuX?he
zGx;EY#<U&M!<6ZcppDQM9!K(RD`@<qZNzh3J-OgDiM_SrB4crQIhDOC110}0q-rv^
zN!XKNX5|cbY8#M5<)ZY^dLW1_ZGTEXw~vD>g};~(16ls--@{g&Q`&L8lOj;<P_Py<
zYSL@vV8~dXy|MHz*=VFnq$Vm_{N2|@a{cSbR_kRPgw!+Bf+j-!U><(PZ01J20@L+C
zlq_6+oH+K%LdS|6UR*?m7M6?z`FrPh<;^;<c8UVKr@o2WK8$78iYXDz-COF{eN%zu
z(I@HMBqipg_XzgJ_pxv<dL}$;zlwRH6NuYBa~S)liIf>?k^-AG%-q>)i54RXY1h7y
zn;TYIz1P3SuAd}n?Rr-N`lBcEPRoag$CUdJyrhnZr*)I<+Ir;TiHWFGZb*;++fUht
z<}m8d7vdi-+VJL388wDtI`yhDD6K0YH;y+D3CX``FR&tJcPEiuWy^5&qG@pKT{32V
zp2ja<v5()o=AYmU9S3z6MEJF4F6?@vFI?_bb$Y%?7Cv74&e^LfkR!(q<3id0sA$P*
zLeFc%!kc9@a4>-Uwdp3I_62YkqUrFKY4F1;4_4NE7FhIbL+DN|Y}}bfMrJFpJ8!4b
z(Mu)K+-)*cJ!?lPRs_XOHQ>{&Cb-$Y2@lOs!VbCvTeh9!-5&2mnX&6=&e9mHt_a5*
zmlq@{dNfKb_ra*u+1M`@L#5_h(Z13Vc=X$Qx^d4iEeU;v2BBlH;+-S^Hgb~Sra8yD
zxDLTWR+?_i1j;suV@;tV-pCeu=jOF^mEsHfxNAKuZh1vyO~ugGwuat(dj$6Dnp!=)
zt3+>}TaA@Y()8Pp1pI9?gm2cFvO~vK;RX3xkTVdY2B9_lZ_{Xe@^L1PJ)=q&ru={b
zBNf`Bx|1GamvS{rd$>1sZmj&H7~JY`ol30@!<+->sL_)Z_$6qNCa65Yc|osXjMWKn
z?oz~%yeqsEE7LHs_ZE!nKg#!x;^EF6J=lIc3;r!rz{aR_D36?vF7g}Dp!EyYwhZ6{
z#;dZ|R?kJ%*%3Ia#gWc8+JOP87TEo!0}}3fpx&iSj9Gb)3mN$nB!0%S;Rmjx<egYr
zweul4_q~d0Pke?A>wBnU_ea`rWi_+ZLmJ;*G_mwc@28USUg)NMfGEG%$Ctb<<yJY_
z(^#2fT;>5|tiDA^!bvHd{#b|^>LiiGCmFQXY7*Md9u^!N22>~S7q$8)(Xe^EBE4vl
z41zZTbxxSD9yX$^_usMT8aRox55A&Y%4TN$+XnLD&n|kl;~+WJRf$DC%h|XZ1611B
z##G2$1`olD!6K4@3+;68JY!sY@Ev{Z=h#p&bthjbTS-5Bok)HbZK8W@&r<&@-b8Hi
z1rj97(VLgQGm0W<m*Y0c(*+BWZdQ2AIJxG-g<Vz5)O0z#_H{mbRq%|u(?{x;evDMR
zKIOL-33Xh71T++@;f1Yz#4ma)?=LX-IvYya9S?@c^mShRZ2PnLxh@mc)*gn2wMk4~
zFEZxhCoQYgg7JQ(C2!R&PZzsH)6t6JR6Np@$lg<>*<=IgX-7~|_X0XFQG;Fc*ATyp
zpJpvO`q|dSoAK*830ROjK%4yB1+G&U)BQ}?0fG{eH|G>}ws&aAR?i{}+VyDgcTKM3
zb1EEDDyIr_$8+~r?5Cr%3{ix2AQx<A;^K%C#Mo4otdts$zGLI){nB#&{O{$=gB9Ai
z`oag0pP<PtDpBJ3KpT=ZcL#q`Q<e8<ACG#!4shk`-cpa>)3~y+A`MB-JlW;iW#z3Y
z#ea!*MJwSB^>XYO+!DSH3Pz@LJ?f$Kp4v>bldh!27OJ@G)+IhmPn6BMD6m@>tf0AR
zY4rLY5%$N<6Wlhx5!C3h6nPg=!EXuwObgt*X=wT>X5IR4^xTF)dVBwFDkJU1<To^N
zSUm-2z0|}x_rJoX>%dkhd$P+M)5z$S>*RjMCN{p~JFVQ?zz2Sw!=AE=q9HNU_|3C5
zXz!s)-YNS%UGhMO(w4JiPjMY74lv<PKhq+vN|%YI#4TdHt(J7`XtGRy7%H6iQhd%0
z2|Vd+Kuiu_MbQ~g`ICPG`R&!|#COCM?(5<uEF5sBOK+;Prvu;9$+!2S|GxxkRhrB3
zIR$uSOcs=#Glb>*FLLWvA%;HCfQY74D0R9`E_=3-lzv@kt6fZVN2ox8u_tsZ*uW@(
zSNP_Zkf{%gg(r$f;r<#+xO^}NCgm*S{YM!>RkI<l*!Y;c_A>*lv;^O?y9dmCxfM3&
zL_<LEN|-9J*DTU+g7U1}^xo>vB)lshl4on+#nb<2(jG}@(%k?S54_-p(KLA8Bg(J!
z6=&x!vxVw|GqBt&861)n@zOCpP&dzlg??+Hv*Z_y-A}+;#-0f_j{s?B4d97AA1&<9
z@2Z;!hks2V>tq;c3r&OBmeuugg$%oQvkJ_O8Rpu`V<E!eCg~FP6rPd?Q1?y+{Wol;
z*1JzYbmt7#^K~`7)N7Ar6(hjG^ad&35CDG!#+9SmaY$!dNo=LSJpU;NtG?<&L9Pz@
zIc^zwb-)6;8?B*p+FkySZvu8E*TSDS$51dXLZ_k}zokO~T~2Axofg3mgzBL8)K73=
zMuK{o@cYze`b=^uj@q@3sb+tXyZ-7}zT5_rZbs6agQM|It_((00;kY&jHy}T!dPn=
z(>seS=$!`#U`9q4y*ghD2To=~%SQ%hs9E9J{X*8dIu3Fog&N2~12uOV5NQbp!}sLW
zSJTOKL4`Wqo8Zlq$LQhxkoQdfTqFEgGZRGx|H<>&52-=UC(gLfg1W~Gv!#c&)Oe9K
zoNV!e)jRYUE#nwSexZqDUYsP$8-j?k=zRPl^i5s|ti%g}%kbFGcjO9xt3GS~Gfrmt
zS}gM^rmIhWXVe#&5*=NFITMXh<U|{3-qufg|3=b7(RHwCj1PTr*Onf>;YmkFC{UkB
zo*sYJ3F|&hfZ)zd;`m)0bc7xFg#v@p>Dx&3vf991Z5CzP=LBP_%wMa(;Zrm^R|5A1
zxT3VvuKG8U2T;m!nbpt53i!LHoD7|apczezsjOEG`P^JXpUtqs&*!hwS-+FuVo^I4
zFAm_+Ci;;BU)<{>zQ2c=;T@djn9=ZhogAI_C=ouqldv-P(14VzY|ebcOj718gU9OC
zVQgG<ee<Ms5W7^I#0<EDC-uR2i5E=e$yO5m`!i;xibL5d1*izSNAeeVvPNmM(DBVQ
zlGJGfKCVsmN3Px>ZKtA0_4p4YX3Hv2jU7!IUrzyWl;J9b`7tv_nF|RMgE2vFuuaYx
zN}UI-w&a?@3Y%gs<Xk+s?==C5W3%Diyadj|Hi~8j>>xu2Co<30?kBCjVsIk9iG*8^
zfUL=*!AJi$$#7E!zc7wDBlCxN+&E1BT0bQxXO9APn+}*|k5sp)68yBI!Trf;a&glO
za$HV?y;yP>8lS#`JmGVhuha_JD~j;(;8S#o41s?o@yub#t<2vae@U08GSu#_z^7xg
z$(FW_q-=?Oy?_67nCg*Fj>ew@!v;IB>K+9N@uKX5H?lBuS0?@OQUvGj_5{0U(V*e<
zjZuvI4JjAx>ekr`z6hl(QoFUB`#UX+gbZ{OUtyp8&@?rg9hHuEx^!XogEgdN<1V`3
zzW`#^{Q|~$j|Rj0?c~GF*F-%+hfG{<MW1;o!bcNZvOU=f8vTl3V(k)GIdp?8ar%#0
zaLJEfWPgtPFkokOJ?$T<3I0SBUoU6&&98&|8;i(8A>-M#>J*(F!sBMLL*U8`Fq=Ld
z7G`9{MD?RQ{azJ9UP2@Z(GDei+7w*7cr$PG>oD1pyMS}C4yRXtG*H(bAcbqz(0fNs
zVK`KTA6Xy^Ww%aod!}g6qaTpm(%%P1<bkZ|O5+yUC6K^%1>{PIz@0Y9M<<&UqF;ZH
zhTRCGe)~d58sAT5TT8I&1J+>IJOf8v(<eS7w&AiL--)_y1<BJ%q``gS@b>8|tBg)@
z@GuO<kB{X@!O%&LY2z8omPxeZeHmR+`HgvW(-ng`7uY7RLl&+`g78mjn8#{k?7y>k
zCR&C#^Q925<N*CA>WatZ8_2F_zp>6@2CWQ}B^#TL;L6bJ<kHbc#Os6v+;9=$*GD`i
z$Df@dai<P2pZ9JAamTk**Zd=K>AOsfUoQjKwl`MdXD7kEv1N4DDp%54?f|QfMDYqI
zSCcj2Dwr_p0eRToCpd-_p!}OCi3`{Y+0L%ee?=b}i>qi!mn*nSvrP4sV7xtN4}`s(
z3K8krAXU7EYdaFnwdRe2H@`Hkj#XbIXD*x}^c})iYX`nvArO}B(1snKBZ;$H8tt{P
zff^eFh)FdE@x!{bAVwK1MHHyNQUj;Ad<_w^n24Zpo4IIYMU4F?3+!JHm^XbL$VVLm
zm-EXoIo6%pCCvh-^BN$3!Vlkny@Hor3}L~?VziZ$0Zo5l?sU)t7}?$Q*u!*UvdtJg
z!%x;v5>+I>hc>`&=RAyj$S`xPqe(^JCoXrv2FCGd4rA{hg_(N-!0zZ9@*tAJ&hPOs
z`|3BK=lW<js{uAsjZu!1Mk;WDj6Lj_3e|S(ba_X@dPfPFRA08K${Aivw*!BnHVIPd
zV#^!^UsjGdj68IO*zQPz+0>ZtxZwm7BuwFX*?3q|eE@C*1%v*m?=<V30tBUrf@uFG
zl=xBy-Xq)4d{Gj#{&a%ZWr;X;TQ6C-I|a0E`hY^jJirfeyyuGyD36#UFanXrP6(i5
z#+eJur2TN`WguGke4r!zYe39%7nC@TgH_YpN#api{*$XtgU786@bT9&eBl!c31;u1
zuiXr+zNf&$i)X+*zaQJ@UWSW{B=F1ABhVElI8$=xHhlhXsldKI4Og0?;r76Bh>eT@
zqYX`{aY2%=DT{%~sX_*FVK($DoAS9G`E1qNO#a-0F;p_sl{PB<MYF}v*^nzDe8H*B
zD7!Ha>K817f-~0OrM0V}ZlRntwg!OrIRcE}n@LE^7I;vO@T4z|*IyG2QMTsb%qv5O
zO(q_<+KY2u>;~_`A(F9an1r3HwURvM$=+RD0F44uCw*BSIj=DlrffE3@4B9_I(1pd
z|6Dhtu5%mfBO`MeF{4&e;@{6CzPm~s0`1YPb`r$SumZ^t6|%<rJ(<_O6!v<}hQC$Q
zi5)Y{)GV^%)rtZISD_|b_g^T{2-Amc_w(T_2_?%!C&Biu3*g|&S)_KaF}vFKt`)nf
zf@_<c4lj#%_!+B5nu-PQxBN8hO6{hDGX+M+j`6}y>jNS-bu~#!J`WyoJTIf8L&`0W
z5Q)ICP-7QkIk;C1uF9s9jTMQU&(~&%$=4(z+Pk<NGH2=!YYA>{=O1wQpBLDTB;<XC
z6s*ypOvu?sb)qLZ&<=>eEw)aORVyJlK9ICXr82Jylc7~64Pt0Nzfs8T$0&cXlG&)k
z7pk>Ee&f&jM{Oz)ZXiXjhE|c({ljp;HVZATU*fLC#WDtWYU*=NjACZW-C|bIUQ+Dq
z4OjjUt6jvMnNu?!tgGgtvL*+LlbxXb<w#cjP#pE1h(bTb0+t2Hp!HsNW`j~G`7o{t
z<*GjtR~-eq>c3JZcCZPvCsZ*C6IPLf9xa6H5g}0n(O}nLLdzexg2<D6;&Vv^Ch5As
zK(IGF>gXd{S2D>R%X(g4F#xhcxAB1^^hg<d0mF?cE=|411;>vfZSfO8`=~#6r_~mI
zDLB)b#K#s{?z#MV-UZ~+z3Q&rGzAl<sdRev4J!UUgUn3S0LS9dq(|Bf)<|jMrJ;B9
zPIWPPF1dkhSR*i$+GN;HPiv4X^`mbF%b8%V5{`F@LHEqfG=8DrCsdE;c8}k~Pw1X>
zdF+aI^0Vq38MqS3NuF-yCW|P-)?^#Jswxk5QN2vT#ZgeXpny&IP>t!{xkyxyjG3T7
zR7`fWhKhGc+Dm!X{eV1vU$c+4a>)Y2%|z%c9A!ht9mHQMrNpvp4x_x}CUe47$Y#$^
zBM<bqph1KR1SaK>H_xBosFmA5+b$Y6$SF3c2aJIF<DGc;%v0un44sKz4Q&{Pqtd25
zrM=XrA_~zq?=uMzNkS-;tyM)uA+)Dmi#92xR8+P|HSarzN{C2{NQ+P+ONEf~&3|xy
z=ggd$_qp%qy8c_Zl4XtyL&rL8GUYphS2}WYQJ>r3vg$Z#(*8t(Z(b(JQ@=pCsE({%
zS4!xvU&P|`Lt=HLpJ-h>LXt~Uh|zU7BDQ~!@D+xTAT4S7Cg-`C5C|nBmL}}%ecfc5
zR2C@>=_GeY1IW9J93NJ^f<*V7Cxt)mlH!HY#Bs}ddheAslBilij6+SK&F>IX^S*;5
z933H(tupkh@dbG2WD@zZ`~xvr#FD(+B2wok2E7Yf*}GpKl8HhYn*ZYnc{vyi?{!K@
zNLn)yb<Zbf_71T>((aL2I2=D2{7Kev9sVL0Mf%&_LQ=5r1@U{v5LJ;nBJ?hjcnWCJ
z6COtpUABiB|MMC)ZNEVhYBQm8-e1u4j3VXgnJBWP5{4%`$eqR;<l)6S_D1_-Xi^I%
zdR)I<$+nS<tI%xy@$cC9;ANuAq0-KsOeFVe-w*@cd*rF`W$@gWP9hqUh*kO<vg8ZL
zd8kcB5BE#J%-_$6tj=vV_^B0UxqB=o=>c(?ug+S;&c*Ia5=eIOd=OfW!0Odw^rrP7
z=gr&<D|OVU(6~)7vGWTuJE?#(8tWOuaSHKrpVrK;sN%we=V-Ot45aBejGoKcgCN(>
z*=={0WeeX^Q>UAW(;gK#<Tgs*P9VfW$pGX#zaitzz34y!x9j42fQ|E;5AkQSDT#td
z#A96!5!&C2jxOfjljrP1!kmM8&A(~De^U%@N6&*r2e{tb@Bi2%_xac>*L5MyHU`VJ
z8-vwsUz&e2AN+o421^6pp(kd|#Na=EnDJi`$Ao=c<H+?JZ|qKGYTHdIty}}J*HnX9
zTYoVp6P56@8)spI%Pv-CuLiy2!W!7^GHPbw<c`xhZsh6cI<s3%9D{AE8*^Ds8+N+h
z0kg3xh#S+z8n08(S7l+CHm{qqS-zD1AWyMXPlQ0}6CajZp$ke2X5pJjE8#hwPmWZq
z0qc-YjH>fZh)j4)ZY(hb<CTAr;i3$*ep(2376`+Ukt_(Z^FYLV7Aw3u166$YL)V(3
z*cEe&koWdH95A<onvsyhw);!tn%c9BI$t^p^Wt*O#i8WSja|sMltucw7paCE0pLx|
zf_W{NmRxw7(GZ-A{nhi}se%()VeuVgjm}|ZvkSzF>m9}F$5BIe?PikK&N8mimyu4D
zG)OAY<|76bl;)dl#98zvWW+2q?^HHoJ_NTQlaLGO@~9jW`jE?a|LaEILQSyKL@}^J
zJL$Tev(e^ESMcdwz;)K=)3=8PsrOC4DVb+uOof;u`Q&g6ou2&>J>1Hm_X5{YwImO0
zMmZOy0oOAhx{8#4i6FJ<`IKGT98wQ+=(xLIaFVDFjBWO%v^myEf%O8G%?shJl<a}`
zgDz0$JOid5lp_P}zi6Z3NL;wd1lrkOsJws=S7lG9^H-+R8Lw3#F-#qNH(OCJ_TNG}
z_IlvhU(E)->p>~LcTnPwPIBmH7uo8hi3P<P!Adn6&2T71d5e^gm&F7jSc-aLX-IGP
zti-K%Uzrz-o+f>3j-X$Qo>2))RzP}hA1(Io7A3ltdu}+NfaFUD(c}?5`gB%4vbOdD
z-a8dAcoG3=zx_c|sgO+DGmfMWxq+LT3%R#01onxYVfO|cMVF=A*fNd<eQerJ)X#Zx
zSDx~Oe2%Lwaq>0$@ysGvI$#1X2OhE?<~|@@6^mh>V<K!TSVL~-kK&R&qv)^22r8(!
zjU@Z4(EhyX7Efl|^BlCJK>LO>{-v=S>S7Y$mhcbs>EUtsTqTTnKWFnaKEFZwJ0GIj
z4RetEzhT7pSA^E_*N1s_XCc1ZmYZWEknxd$@W!>odR_>tx_m$FY7bc5(Fy|A9pfE6
zF3YouTTSG?J)%Xu+)(3;TeNtKl?7X$MhA}?61R}~C`M5e=5#uOVW9+l^-(&!YK#Dv
z2xT<VX9RLwhv~xR5@cBUgh;x}fIeGEDg2%w2RL`&zrX-^0QT%x&M_RhT8PKInocyI
zUZC4HY=*Lc@8%EYo<tiwonbhx4z#ilkPDLvWQM&cIr_VtXh(57ZD@co4SJ1jb9XU+
zB_+risc<6Od7H`m9D%<Ec@Z5wWopISpV-omk7MX{<AHroSiHamr#+oPi>~n_Z~H%?
zPux4!yq(o}N-dnRi7_WFA?NUM<|-7*t{^LRE+S`qrI;plPa=?i1$!^<VV+VWjPC4>
zXnk7%S*PDr8~$ZG(J5O;23+gdz#mmS5Bu%JNbe{(uCBu0Cbp96;j2i=v4L8p!6nSx
z<X~2>CK_ihQ9^Srx8W!IbjZo2AMmgFN+eBMjzs%!Ahm~^nK=Cg<QLyO^v3Eg7zxXw
z%B?w2oNtF~edR#NaSN!F=ddxlA*`;I7+RTohOPN$Oj4iUM;EVH!}V$RvD8CXJSRRD
zn+gu%OQ8?Re?m%Rh}uaC^n!@B+d{I}B@e$*)}p^9EoYup&mw=exWcFL%j}5LF!jps
z8BudxZFVL(m<}^^MS_2~fL8euG8ibrn%GsN19J+PLhV(w{hi<VpII4hO}4^<8xPWU
zwV6bI#~ISa`S8S6Eh8rj)uH{{K_Xqhn#kt*;`b#xi2kK4GOqLttWCaP-E3PD>$8^#
z`WE861>4yjf|0~uPlA<mumFF<Vl4MelgPxlk-F2a_;w`&?q61sTfP3|=qm{}Fm4UW
zzjKBJ`nZ#T1`$v(o(`24PLXBz`?0V6Anr{_C3OmM>{>@@-a+A1d}Ea&oak^Qb5!?W
z*@tPw;GG*Dl)j3m*2jZx%r&h2EdvcJ<zmUIZDiYtUNRsuiU;n+5F48{glD>zhz#YB
z<}KgQWg9y@u)GjX1=zraw;^POnLg+RY#|e&No>)9I9BUe88kPzk~LMWNIKe<9Dm1o
z304=9$hZ5*d1@vR;Y}w?K$0F>RX}3BB}iDnC~h)v!eW8K+_k93-i)%xTlhoB!>@+y
z=<-%FZfyiz){|7=6JxS6@gkczB0~LGR!VC23z5?42brMYM|k-CMzT8X4cV7>7fm)R
zkiZSkaE-Sz?@4A5T9{mh=BP#D&>aLV&6PkKIX}h$M**z!;WJ{mj)3*D8N@cklFrM_
zL+&I4yQ#m2?_sStyW=}`{#FMiYkk2yNSA@iOIOg}<r`7_7w_5`@4w?NMIk!nMicJS
ze2R3<RCpU##!=;~%JI`5{4n!JCS|fo4=tPM;CjsqvCZ~-SZaAEl3kn%-JudZmA&am
zkK2n8skw=Dj_}hzBTr#<=RWg+l3YBKd(T$dRe<EpIKJw#<47^57Ol6?<i#$T2H9g;
z`1<AoJZvz*JebL#saQn1R-9%(#@A#2P(dVeF^#gx{YH|%YtlN0&eF4&+p~xGr77`(
zx5Ve3J8hD)hTih|Ds`cb>yKrKkv%g6$U|cXR=UEGRD0cG)S{}GZ1r!r?T<G;qj&@*
zF76`Vl*gHPbrrNT*2Y}STY)$C<TM!bNTSOhJuolXKTNNi+=sq2JtjvsEJXCI?OYH2
zDsoCZgs+?xVAuB5A%p89++4~N&;M@9)BN=mw_bmQYi9D{KdM92-YQE7O%)^VYpT$V
z|9Y|M-Xh$+Y#CZ_Do4NWD?!)21;|SN2W-o)!$`|xFC=Wns12`Yzw)`$lXKJ2O#2PE
zZHXmrnC{0!c6yP}2d~k+aB*_h*nqXrse`{4r@_yGD;R87p^|K2vS<BIe5$4vDH-SE
z%UqUNEp?cr?K0;n3%KE?<44$viyQF4t`u5ZS`!Czy)WioH?zr0n&<OJhBW^;hbor;
z#S#s+$fh}$Uh{Jpg&6TrwTm!$5dMdnzhx=1(THIem0W@gg7Gl@CXef~h@cfKop@`$
z@Tl(Uef0iPJ>JRbG7$Qy0VFbp*uc!2tcQyi@)>W$ijNfVJFU~^2V`pLt`lczvFvkr
zlgJa4@@pIS9bQ3Sj8CGO^<Qc2S*l3v!x-kZ7SP^rW!e0&4EoQrXj=cyM6G?|8d^Jl
z)O>B-TiV8-heWlnv!b6)Vt?Iks{4KkjOrCb>3tjE_3j1(?VIf5y#esa>m0aO%>=(O
zMd;sGNsc@`h}Qf{h0N1hoKsc_Odq+xvGcZ&^K>3OjmU<k`yuSTFE=@Ev?A}N%uP65
z!f^y-)abRl;vmaw4lK*8Kmyu1z;=6+q`@kf-(w6mUv|(TqH*v?w+e+%rJ>JlO61Pl
zAILZAB`R8Qk@Ks|!^=5Y;Gp9OX<m`^i^TJ=W$-_&7`7cYhAHu`b#H)8T=v2{?*Xz_
zGIX6@9(>>Yhq+Oa4~8qBpgju{NZONP7?A#ncwgn<uc$llZM_Y)&-uWla|bkUYX|Xl
z`lPTi6pD6?;j-!C?3npoR(fq2c^2r4?mC~P=QOs0Yvyf;v6sYpc^<G~s1W!))nLc!
z>Ga+czN92m4JKC>z>Kw~FlyP2Ru|>L_K%)KMx`H3#%e&>YcD7(G^GU_#=x)q0r~l|
z65`rK*f#F{PhrMlHZCk2b_<!YcI79K<){5n^4pHJ-xdldO0po=t^jUaDW)lt3eZe>
z0a7}n(6a3p$H4;jPC3UjvdjUgzO#@JS`B+j2f*N-HOsqt0>&NtK*-OXEPmGs4cLJ`
z+It5~Tr9|^mIGv+gA;^Fd4tsMA^5et3Px6B(Pv*2!1BUAfWI2d_m9zFMQ4C@&MD9`
zO((YxXQR6|-7rJC2fl552z}3nz&ouMQ0E)+!l?qplvGGlz!Ok3yH6b67r{yK<J{io
zMeu6QW>rt7L4kM%+F4W&_U6xFyKfO0o@jx?UpZHNdpKN*_yiTpf5LFB9$xujKYcFv
zK8p=&h=dmlvRgChMJ@+u^&jV0)wub*c{_HJ^>{V$pLd?U7%)ihEX*Rabp7CGR|>2P
zdJJ1d{;~@;NwbI7zW}uC7;HVC2zj$Tq4@Y6ko5Gm=y%9~|AK?yL-lmtLX#>ot5J#9
zGt@$4T0g_X@?28F_@LVQac)oN9UM+oCfe6}7=@=bu&K2NP1Ih&88@bpjec9O1k&ct
z?2Ad=<HtC_BM$M}5ZWQR8{7mm(M3O1vZGLfL`p0n-3!vu>b}*u$aF6@jvc}&GgR1Z
z!ke&Kg&QbtNoM#I?y|}~Vi2-jfG8`h#od7$QG-PT_VEvblTW9i$Sn&|+gJs@Uity8
za=V2G^!o8BOHoFn?UlL3`A6vI7BwQ$?|~i-Hsj~kK+-F&GW%ro@U%k_$WT3++B#~0
z4)f<z9j;>NOFh?TbiYr%i8DuXgMy^*>0`XgX^;8Haa}rVMi-NE#2??hFG8&Nzv0V$
za>P1D4ZPL+ag?VIY;6_-SCd{`KySc7b}`Vb?7@8TbfPvpWmEW^1S=Hq5;@-;fIl2Z
z?D4EZB&1M_H@rTJZS5|RYmHanX61Z5Z@m^t)zKyWheU{<gC2?TH3mVE+xTN|KB_n>
z&l-H!gYC>Hnd1f!OQP_jt8Yn!Enzxk__6SeFZkr&Ur1tq5UTlFQf(<*#*$VisQzLM
zFWgN)mfP=h%Qb<NJOe11Xh3h+EyzW$5&vs(0o(r`BJHn2NMceRT?k8tA6sO>qdoyt
z;%s2q+)9>C@;BQuG6ZujF9Ao@GWyDu>qLFG4~)B6!_br&yw7xpr542~F2V|C2W!C>
z&fTHfo=3ixJwkgf`Xa0R3+PV8GiH;mChWK@fNbTtzSLh2^m>y3jc2bw?v^TWVdWf{
z(RCi_+)@P33>9Sd$qgw-wNS-JW$>PqU1*VDG>$-r;PS{e_{nfQls_?W^i(9;l-PiF
zr>ufq-!C9LtuOdyq6i$C^@`Lt_<@htG?;F1lyt>b(}#*T)2|FGz@3lt^JxD@W@nw)
zIWc`O79<DLX6e8!V-xCS+gcF#ev90@d7cEC^rLqNhS0{}-%*V<9~8`5ih_@xCGXWU
zsoBH6F#S#dIx&4Zn|E*@+0SnY$2Fr+^sbpai?CO?rQj$Ip5_Ew<W~`?s2`M@z!7+&
zZclp@0J>~z1VzWhVaSmIp$;v0zh?wVKa>H#3%j5r@G!pjZ#mE^X~>exu%o{RB4sBb
zxL8>Nl081)WYuooTpCVWCMBcQhDXS&;bt_;xQi_m-UrPKnotmJkBk%cQw3b!PECZt
zrtEXnI<dK+Il6(Yww_OaH@uD7J(R&zu(@^y*DE_FJ|7+|*a%PFr892ozVKyWEvxO4
zNLGJoK}s26=tS2JM&D}yX(x-JU^tB8C<T~*m*WcRheH2C0l0mKhDTc~nHi!Aq_RZ_
z9anWhH&%yJ|Dh9TV}%rRKIkW!+u%<OH0pp)zYcvIqq#gOAJ;Z&(x=^MR+TP>74gZa
zSx5t3UE*d#ZT#%vm<~qMWI3}PS-@54#o%{bmDXJ<M{dyf7$MJ(Opv_*Oso!O_Ak)^
z8|SZV?Yc(v;r@DNNAOx$H?F{DqIq!I%!?IW1L$4WEZV*#8yhasAdXh;==_FFtcA=!
zv}sfTj2=$0Hlo{6g2Yu!S*;?4ZK`a0n*fOjc+H&C|B0XW^fUg=`M_WL1dCoaLKZI`
zGd-Rs(1#bA<Xi9#HlW81eNGml*2U$p!=85N!uv#?UmnMwiAW<ClF!1^Iz_ViVicEq
z&BMZebMT4IK+^j#kh*v2EDk(r249#S2(bK*2!yVpcpEdpePINpoAv<v6>8zuP(8D0
zQX!1B&;X$x&!Zc6=#uTpk`Uw+fkn<r(21FINK^_xdbnJTd@)%;Um4QE8mrBy_;Gv8
z5>INL$7M*cRjFO+JAtcz7GSkS3S7S0$*5#*hsY@nUi=L^D2uiR^@uA>NJ1(|ss19z
zn(rXbA_;cW=b4}z{DajA<<Xl~%Fr)%&4nE!FY(Q}vPA!546b>tN^gx%r;km%#y*o5
z@r89+_*vUp;#wDo8Y<tT28XLSsm>Ca@TF2qY^PbQ$d03bpNfH*PLhm<&TcxmxgAMP
zRS@RhZ(N}mM+enf;HcU_>^ED69Q1TTo=BNeNQlP2mShtgzZ@sN{fBct7UQ2v_GqT)
z1}u8>7xw-fhMWB@QObIGh~hX$m5Vf(7Dst_alMDR^}d`QZ@G;0>QAtGW)4X2!W@fP
zruT?T=scVw(Tez;bMYryAAd>skIhcO*neRqS~as6iyzIvdVijwV>aS2w&ny%sjbDw
z%39DH=WJv>@)gh6+lR&-+PV8VmeoGzfcl9(BsJYdp$(77-+mY5SapXgl)HlR(v^u&
ztxrwRhfL;JoD|GEavg09Nnw3Ce)wk2Nz|`g!}QVbsqj0E=zPH(h|u{91p>h&WWpQX
zW!0j093$OOQ;U`Pe3>LU@X=P&ZX)N%5o}epi<a41!aQDm9VvTcVMi<o#e=d?*b<AQ
zAD1ANKWflq+*Etpw*X)N_J_%;I**Ii$*^CAgy37r2zt&h1xe0XRB`!XI#5oHKH?<`
zPuiTx#Y%rHWfDNoysU_PQxe$E<!QKNsVXD%Y$yIDNuje{x<oiwhgi1$hRUm5Ow`q7
zsLOmcI##ra82N~>iwDo*3VA*9VG0p}#Cy0vB@rjaI)J=RHnQJZYHn4m#NOk2<EH<e
z#}39rl$mi7d(%Ra-Yt=DT4FE_S=}$fDFVri?@7+l!Ba$9>n8BMwc03f&u>b?C;)G}
zBu!#tuENyrFIf409!?nwK|-x2M6tY+V>{~6xr)bOp~WopMbjFD=Vs!PrgbQPOp-K9
zyrTA4?WWq#=HT(4^N7ZV8ob_c2B8KMDd#uN>{eEToR+jB_f_T-9qrY%pF72o`SV(E
z7M#GzEj?II>^0Je48vcKV_2>e%JM1kk=tt5AxJ#~whl#+u-B!mX+tShI`RSBb)Vv=
zQp?Dmm?4}xM~0T%b(gqJPei75f!OJy6S~^&PiemrCOyI{NPmhV*|o%x@Fir^uC}gZ
zThwW^@xW?0X{1hS8>B!YYYe|h3&+WdL-^dlTH<1ANZA<clYU+Rq<p)D=g&OB$jqHb
zLTty`yK%?Jhc0ux_Ma?X7Pf*sJ9z+3{PnF}H=;tKH-5&CZp4!873p{nC&HWhk%;3%
z|KadOH3Xg=V}$>nWwY(X$uIPXsB0$Cwg0w|_?9x}z5GGktG<SW`VSMA8<Y5<=wn>M
zGhh!dk%o)=L&?T<Mr2z~1oW}5(a4TCg6}J1$+zmHIcf}#Dvluyd0jeb_B@W=L0I4N
z5F!<+!82Gij33#VldJDnkgRxr;H~|NI~WJ-eXIoEO@7K`N^7%acard(L_N}Kc7izJ
zXkr-VfUAbSup&D}dB$DJbZG1qqHAYE%F`ryrcyw6A2%R&u{CVobrqI>6~_>9y2Vy{
z>yUlMLhJ<ZdaaAmG*Vl3ruK(QH7Pn7OUr9)BeU!r7`{nu?BDN8_*H_5@bxARW}S!U
zEa}5%tvJ@xrMLK1_f?|0JqjnqHsD`foowH&7u0qQbzZjm5~AO0OBc>thb5A>GCPN#
zlP?ncKvjG%RW_mzG5lQZ{^J!!Z*vGTvG;=GInS9LJU-mBY%$*Z@)H`{MnjU47+m-|
z58}7ns#V+&$XqVl3AbXzkfFUk@O{1x%O9&?Y3YsZyRZM@y3_nHbS)owD&I6;9oK*s
zWvP=*Oa>Lm$7NB|f1@$(9^2ilhT=mK(Q_v!+!qvW-fjFHskppF`&ODj>`EVWygnPO
zr@upMf^V4VD=op>#zs-cEETf8>NzFS^QHFpwYfMv!UJ#HK9h03B!iL;i$LJ7&49Lb
zFmoK2gT_!P^|WmP9HCMuZ`u<b%f5ymXIjE1z8Rp^W`UmVPD8uX1W3Ut7q}*Rj>+8^
zi~}O9L0eab7I)c>w(W_5EUr`1V(Jg-tr2LW3XQIE^IM40gu-*Spu&HnW|DKfi{%fa
zR!WTX*Cdmseipf?hNFyiUPL)43VL1cV(}C9B=`3tG*?7}5a)D!=Uga}=)8>XP(PUg
zmn!rt+=V$@ZNfG<DItENaoiW)i^~g+vk9A5)J6}?MRn77`1HatH1<}O5j=DsSyVk|
z*4&?s-#bR2NJ$kUr8LYWiy300f@IXY@_Mbz>LkXibr19{c4L-(?K0bFX+m4LbB@bS
zc{HkehVq-$jWhU;6YT?;=%o+G#n2XFYg<Ja@vU*Sxz`>bd3PN%jkyK2&i*BsdT7mD
ztB<H@<X1o{hV>9V_=;6Xdv2cb9GQwAWAN+8h0I8?I6(>bk^B8<tTeuk&REuf-3BH(
zFS{tI;uqjN!t?2~x7VV+@Q+C9RVC8E!OXr(Gl}UhTUblmv&b$1)c9sYPc{#)FgItH
z+uuS*cdTdqi<(HpIOhX+VFYQ5vboGrf!(d)f;D^7nH4!IxYh3xg(NrP2M=5D$e&x(
zt_FX6q`Cyvy>dq4L9OOxrw_8H56cqaO<w5L>lt)&;VtsgSr+_ab0Kk27<!!TL9bc;
z0(q`V!GhN@-X$kUhs3(!6)sgZn@38>9<M!AdHi(FIn~CPxK>bZ|59+r`>#xpv^?tb
z6(ReVn38enIAr}P1^4wIL-&qfM3+Y1BXNz(Fvq8uIyhd0KJaU>KRG{L%!$oBmy}k_
za^2##eJ{YzfV*Pa-SCAg?zlN>1KMxy3et~WGE%G_u~9fjUT^z`HXOf%x`h`L+vKC5
z?`(nOyCsQar6Zaw(k4;+#F;do8|2=+I5x5B2cDZ31)GKo%>^DOvkiSxBzt)Y6=Pq?
z@JpAmOE_2J2TvC7j^BdABaJC_o*pdul!VXqm7;*jF07CxP0n=QL6?1-(T<uNl#nG(
zuQ&RH!dvz50&gc)?$&Q?CXmeSX)U6KmY%^;yZ@3?sv%_C#o6p7-KXZBnQC}eb`|sB
zVm3<ddPQ!xN6_b9E8#UycbhkHXXMy}c_^0uB{jpL4DYn~fyJy#(EarH%-=hkd4C!{
zqw>AlbilSDbIJKuY_(;T`5|{@I&bd%+R@Y;;A6N^Z;TtB@%9nY{>v31j%hOD8f_>b
zz7G!l7Y3qfg79Wy2TUkLf#n@9_OM$Q+Q1%x8He~$KezwXWE2dR*2743)lBsLh#Kcl
zEn#<@UIx~u(&>tW(NJHo7t4mPqyE~QL@B*nD4SFFDUZ0-u&gGZ?Wl@|j~vr6>SQ^&
zGP8)=sW=WNbWg(eee03b%2H(h?;)DW8$rRZEn$DsK_pwN$DZp6C13S4;ecy0RtR{A
z-ag49FH4s|(Vblozh*52h6}Q4UACZ~u>zlM)Q5wA&!d0EPl?V*D5xxmhBaD7u=eI6
zwC>MJuv&48_#15F_WEDJupH;j)jJ4~q{WV<s)6XtHL#%2kFAc+fRKqYV(_h+q#CS%
z`ENsDMUNkhH0LAR@^R|;WFfoy(;S-rR4J9gb()(#CQ_!%QrH;ViAwSh!S3i-NDrHi
z47<d!(Z~%lr1}`{aDByp3#X`&jmKHLl}k~^`Em5EWC>jN+X5CL#;9p#3^3x=M0MB(
zNz6S*jR-s9+*N(#R%tAjdiIQ|D-(gNJ#96E(luo1!#&6}`U+b5M*?8yW$N$fCd?<;
zPFhQkq2%06@S9`BD!w?1HjOE9^ZAb`!v8KEP~(cSqtlS2R=@eza7`|wziXCT2CU-o
z&3IXG0tyK~12w73>GRSV=(eT;tZSD=8mgCS?b797-{W&g*Y6g7Rlfu+T@b}isqwQv
zVk0pQ9Y#yNC|LOBAGjK*quN`am_)$|^a(A3w(E;A4|1tP%csNP?baaAb0zBjYf;tE
zM%3RiV1904GUc#Yzq-d}JNG=~+|xhi(lxdO<*f>5W@<T->8VM?zG)HtPK%Fz-tvli
zkhGgVGqWDg8sPTT{~bb<Wqs}WfflA@u?`G#y&yy7H;}PF7_`OpA;OCf)P6035z8C+
z!#_=MmgAvomz06;1A}`czd+NA4P=YE7)VF~lF;2kw@aTz#rvgM&1ZH@ZSp4c#aWQN
zlRVFC=@X>$NIU#J=?J2MsSqr3p5F6^2X>m3a7DWS0w1|TO?p0U@!1<>2b<vKkE3wq
ziW_KU7DHv)0Ll2#2#*ikhQA&|sI>nfNEY(Z@0#sET27S=7RmGUEnmaiu9uK;dn+`n
zKLRV@7<21bh*9G_hCXsU^ZX8&6R;HAd7AM4r3G=h#^RXdXzq-A6n<XwgzCOx@J}s-
z_|Qpccv25b7D$q;dp7W;;|H1bcO8W8se_zOZ_rp(LO<YTfXl;|=7v^hz}$pztd8gK
zcJmGB|B?V#e_e%n>d)cxt!;2M#evA{t%S=P=d(3;R-$E`2R`nH9PJw11SDb?tD$m~
zE$=u+{Z=xi&wMN-!^4SCy!9p1#(9)SWWvFy@fJLt8;R{7@X^Z#+~L!|VDRRpg2DAy
zuom5b&)P>xiBuizm5Kn7*#$&eb_xvMhtNw44?uBEIqbc1k2R5xN9w-uaAkG}JWD=8
zKR|nN(fS%N$x1cP@A*unTBt$C%T4IKz$G@Ypn%bmdy4+dS7b~c)iIU_e}W$#Mv1|~
z5K(={{8#@>oX|#Nx1+Hj_9Gn)Yn|cdPan8Fp;F{77|X{0RDsvBdr?097fnz~=wpf;
zxU0?P9CiDt8(bD@_;3ilA2dM?Y6r=kM}DBHVFjt@pCbj44>)t%V=6aS3?8KO!Fc34
zXpOXmEky?@J<C$?HEl%K)}-PGFV$dVX(Y6mrBWvAjA0Gdg=b}^tiXFu$R2!;Gf#WL
zpK_phg0^gYl`Sew(L#EQ2GO&fT=uxVhq*kqgz|c(!uzzGbK~u9WJE7n!y^MZc2{Ho
zGziynxvd4bXnQk#<(pWWxf|h6+5ohxYf}-+?lb(!Eb^#LMBPi$pkHDH@7bUQ7VpGh
zGW%^!IyZ~Wm~w__2d*dCpG#>*&V%Wv1!=K{D&nno6*L8BvE7w(nXTa&tdv+iX1-2<
z)HY|>{*wi{lgc<ypckDQR%VxFhJozPX<P^9BI=9PCEwn*pb%AYbW0=$vVVI(#xFgh
z@Mi{ex*vk()trO=wh@^6h{2`;84?iq6m`uKLUWcz&>c_0A@-RcYxJy*oA(*B&S4lP
zX90}I4p3K4>2i!*8mjkg25XU3a6Ho+F2yL3sVE=zhHofp`lAD9792oCwF{{li6L5y
zg844{;M4CGYQvo)^r;nU>`aX_@Wf9Xg_R1y>nq}9NxB~#5BZ3Kmqx&*Q#nL?(jR2b
zX5)2ct{`zclw&eJM;Ql}gUqlyH2&9uW_s(w^Y@G4N^B2WAWpMsGh(?+q7IKca@|<f
z15jjenC<+j4!SCzz+^ZUtyz8rWk&*?<eWrXcV?nKYnD36?KY9%&7c~04E&#7q7R!p
zLub1w=oK$OOGDqIYwZ`&(V+}-Nihz+4Nyi=9|yp@r2?#X?q!d3e?~n8_bD00DB8#T
z4bt4B&o<vX3A?!&HQ(K_TDy~$(7C*yF>2F;14mULV|Os@<lTlbOMB@5VF3qU1|jD_
zMM&R@(Ruj}^t{j;X=_Ep`L%D+7yo&Hzf6Ixc09Pq$3n3AfAr27TcDu2kbSkH5>0-6
zj}Gmxr|5Z3Fy5dBRsYgp-Q!{OshG>!W1pe?A$u?uN<;BIA8`B25YW8l4RUT<3Dvs?
zZ{&7%)=a2!{0CJym}&?Ywo8EFPbE59d?q6Px+G-#40zFB2Iey}(Apg?aK<7NW%otl
zkNeuthmF4}S!M>*ACH3m+tI{rb|DN$uHp6uQX$oOKcL^8$hS)z-btmiZ)eGqht)^O
zuYoW)e0?u;Hu|txqc6!tT}L`4IG9vy7GynTZ^A-jj>AVq;%PFq=v7e+_>PMMofUyU
ztr#IZj;$oYF~vUguL7n1WN0uv0>ba#01;RXP1BuW^;A50#51Cavl~kJX+sV)r@+eZ
zL!fS-io#at((?CB;L=hp+HL_tZx87~+|fm}_vkM8{%-}hcl#MFQ&<a6`MFua2UU)r
zwhN+zClUJhhxry22k&Gu;QhbnXz%fDP%hez?rCiyH+5X$_@eW0a<c|}$gD-K4!P{3
z(k#f2y^r^A)Fk63(a`ra6e_znLAL55j)~Nb?%JM$l*;LFK_M8$pDV)Z^)J!%T@=i`
zwTko^0xs~p1k2e?sIu)3O8*uOd8Qj6*VqZ79z92w>WvstxyvA8&Vn_Y1K$%@LhSSy
z@YIVTR24=8yTg%h&^46pybzRMufvGT%LH|L(SWWi7)m`t-Oe%a^kD&G`CuVQHw%R@
z$C>7NR|Mc96G>vUj*!vS{B-YIB`l;Xgqoh;AUXeZiQJ_O;_Kke#3jT+jc^{UKOzfB
z@js9z_ggJn&*i9Qr(slz`wS+z@KJ6(q}X=BzX?0o`*AVwrPD;PDFBjrCg5LW4bIMa
zFhfF{gzsyp?GrwPW54}F|1<zQiiv^6w5=SE?<_eHCrjFTRq@|>+<wrY0+)phGB)>@
z;nms#l)rokuAQAkM#uV@eP2$JXqRPVp;sW0RC|gS78IcY!QHr_Lz;+rPrx$u=XgTB
z6QldaWQogGwm@z=?Rl~uUyIQt4K_pgvsn;9Sc2_)znsWRFCkTivT(cT57{Jfig+q<
zojc(z%)2#1%#8i}Z~@hV|K{>)5AGGk^M0LR(_crzt*iw6b=MNq+9*l7b5&XUb)h(5
z6+byT7*4hoegucP3HU;(6SICzBTn$8uys!c?D%C@Yjkiqx$NGB<vl;ujOm(_?KkX*
z`QrkpmX2fu-1h-)T0teC!#MM5Ed7hiIAeV_(4s}$h4J4_GGpHY_I7&%s}s8b7U_DE
ziltm;Gklnwo3)30-!{VRpg!Y07W`yGwG!De)`kO{LvX>rBlOP}8<JveNX|a*rm<BI
z{(2;lsNCv>D=QpuqJaf_Lq~+ryB5IYx~JGXzLFg^;o+l7KKRf1F7^R01`?NPz>ClZ
zII}jCHkM6>{V%UTSVunC;%ks{TNUWt6@Wj~g55C(=-9jn)Wx2`_cPx?@z{A-+CK+G
zx);#lZ82O<>IrVAd9Z#gk-kC&Gd+i&L6FG>XcblgW$7@W>nFkf=Q+^x;IcLAncS2i
zljL&ur%><;2>%xf-}lJil<WqWb*3IppO=T#VRc-W&4ku$eE}Z&$KczLI$d*PCal<T
z5qXI|0wuo!Zr>#W{=IKP*18D3z7Qd0HjhDg@HY5}CDPYNEMc*^2att15RvB(+>Q{m
zO_aik@OWx_OA?6{l%_S7T7$;KT_)7Zf<AmxmELKyjoWum0PEs#(0MSE-ueAEu<LGv
za=RV$)}4e~9d$4s*AG|K13}jD3B)VR1QB^x*wQ+JmPwui$qoNNpuP&k4>7RRXgMu;
z&6suEa0n8rwQ1hsdidCsMbzXOG-B!v*%sH}bmS(M@0woqS(8m57o0;3UvM43fM2yw
z8ZFVHT4VZ7YBX{>c?x$4{y>I)3vturUg92}g(A{EQ5Req@d5w0sC0t~s83&tpDepa
zLB)Giwpg9E7t6%<2cCiF>}habc#5npoMQBxa~X$Mf{^)25Hb2!DHDq)B-MfQm-0v9
zI^#2Bljt+ZJ+Fb*TWX=d*8(VAjf2?edJ|4f<KbpM7c@Hb7D-X=U^=RAer$R>qt|;8
z-G38L8ThoBpPR}@;#Xzh&6og$ehI^KCl909($|sXz6v~S?TIHGuaXL%672eFHCiGz
zigjYI)SjETit}cM!=v^VMD5wb{nvX!RB#M))u@Ox_Rqp1bPw~2>mIb`gu>}r7Qojc
z4PKn{)yG~Q*?R@n*tI1vlBOj%bJaL((GN%2MzXN}+I4()$9(4bD43`7JEGUNKiNxX
zhp8m%Pnavc(oMRy<m|W>{3oIU0+psrWRyDl@udt3;*^pNVHFH@aRAnYwo$9oi}9{T
zWm<?*AW=_dV#k|PxO;6H_{I0)Lzp75dj+`fB^KXnh=ahdN%&RGCUUnSibz(t;M|B9
zvh#5qTR3b^{I#y&rypmryw@^hEU$@FcJUGA#pNV(?{%E7Foz7P6=Q@o$<FRj^4snd
zEHsrS>1TG31#66uJhr31Qz2x@(IDcn;wU6@T-NOu<`b&lgoqYT;a`pvQO(z;tHxfT
zncR+;TF7itvtcdKpC(Lq`@g{^rWDz4e4OO#htjDt>ahE89mh}1!`m#oapIcUyk2u3
zqJPVfWa`F{(vH2P?b=R~CmjS2*JqQ32j0QdGD$RQhDgrVOxBJPfrA6WJllI+gwL}Z
zJS_KN+vF&iAK*xCbo~W~KLhxiV;He5Od+cZzrkgV4dm|rG=l8X$hD~u;!kN1)5FoE
zQn?(867<P3o+d#XW64diqj2-AFS66~0PRLMV*2$p<^y$ek8epNbvT1K8hpYITsC;?
z%ywL2`vFGdX4mF*2!lg+7xH#r%I@KBLie7^BIfotv_&!q>+R$iU?mm!)J_vD@1h81
zJNgisE{hA^XA!xDVT?z>U-V)nAI)D@h=X$y>3_ZZu=<;s$b2D<#(h#zy%FbBYq&sN
z*kM4i55rh#trJK*vIWo5=_27alPIBUDOy@1Yvw)1%?VasN4Mu+Ks~4LV~t)O`k~xS
zeJ;v@M;n*&l+N78YSjbSZ01(v;WtVyfFK+%RD_@DznN2ss;p_M1Z;cyiV?0h#XPeZ
zWIt5Ns=Qr>1PxA7GA&-zH?3T1k$Nc;K<^^6x0$kb(sryazddtQC>%L$FrlLtNI`>-
z1);r^sKCF%SnNzHD?ZN=ok5SlVZIjO8&QDfRW2yVM+r(>&Jn+hQY@d08c1(?ghR%R
zu%gnH+RrJw(NiyT;-s(=A?HM7e4n7K096)F=hF)kIgi_f5`59_WgocP;hep}JZ15x
zSoNg>@Qry<v-K^IhT<q$^Ls7xE<1%Xw%v`DpQW*z_-DX^t`BJ2e0#i9h4bd+#j=0A
zk{JVb5J{cAN9iugVFLUo%_SZyk$WF{k;?u>usD1VJ7xa{?R*u1bN5L>{<$;kf8IPs
zeuEocuze$n%Kb#y;>Y;yqIPtkaue>#5`*H1<G6Xi5e4P(p_j9j0CAcu3o0C4D$Bx_
zgFg5ZZv`6aPsMK!ZDNhNT(M90DOGdM1se<~Ld@pH^xk(EJ-YkX{JGpXa=5IAomX*Z
z*1(l0t9miaU$U)sQIras>G6|#{ahZe`}Gj_Ye_(&qd5N7Cql+Qj*}CYJV8J4J~q$_
zM%ERJiJ6cw@4f#zy1XfZ_F(eK-Bk<d_7kn>-wi9E7S*AI>u#vsKi~XJog|ca&qk(C
z<>*D7HK@|HA33$n2kXRxsLtX#CyiP}S9?d83k+4@y3h8=+4wo^T=R#$JkN^#5~qq@
zUdmv1Ik%vQ+m7t>ydu0jdMl~*io}Db60o{_36a(iB@>qO$#yR*^1!N|ZMAYBj@_0_
zzw0hMw|oH!+;)*3ealB44mRS7VhU$$S0Eb42F;_q<A_5?srh`)4-xd}7R^%^MeCYm
zNsNvb@ek=hUeo00f%Im4@l6nJvQ}W0G|nO~{$&%Jj=Q*B<qr-NHmCE(+F`TB5j?!0
zi_%*^4Nix?rNl4Ukf%vIiHN2fxz5ZbZEJV3`O+O^NP7;oVB`{hEVBdm{hbG-Tbk^T
zdW}0T&E>qz!f-mv2><HIqns1{@j{RDcp7=YN;l3RQxd++!<5gpUjt6yRUJZzza^To
zxl&3(zNb>hR53E^Tus(B6|+CKv=b-aRixBx7fWSdfJ&)7JcE$$Nc!bmj`y+y1O|b8
zD|m|q<}V-@od1&K-i`Fidk!RUbC8*CWi%yfB2H9#w5cyC@1XI86gju+7$dsefE8-G
zghmeQ(y5N8NLG#~i6X1%mTgxl6feo<^(vAJ@4Z>&>N<Sva|({v&LPV;Tt;=Klax5~
z8o4GVuu6)W`1+G-X13XM7`YyXrN@85!%A%!@V$c8KNN>~^SklxKPT|~*2ARd`)yRD
zHw!8C&?w!v40$bFM-(jOV1fNF5aTnZH60J(3ySx!!}HZh<(LV!@{pl;=0(hxioIN3
zAj9rj6$+gprtm;E5!EjqMxDYj)YKITMkU)AE&Dc#)$g_ARKG0p$n_v7TBl$MFFUeW
z<rdTC*@AzSV?5e456o|b;Um!;U$p1~>iaW}GGDNmTUtc%J9=25`b=EB<sIshxsTq=
zy^BmgH=%cWhpCsc-e6yrLKanx)%;CAhTfTplH~2%cp{+3sx?c~%?ob8{cBmIan&-q
zZ@xZhrj~-)c7&SeBr!|X-=Tr(l~noF15EbxQs(RT=-R{_7Afs%G0#}*hfnjbC!Zrr
zaKz4gc)P(F)?&niOr6&tcY4yGSgZ?c3H88r(GYxIya;6bKBAEz4Z1lx9k+b3W8|B=
zkb0RNtakzQOl$<%v^TN57n0!lXc}yai-1e(qfmI6H;hVnqV_v>@NK(16pnpI!50D{
z!8H^7HP=I+)*WP~GM(#{a6XCAZ~`9F0KO-)TNSo|`L0iBFe(r(&+-NRJ`ae!Do&fI
zOCqOWUuah@MIY}^BaPdSLz8kSs1D~a2Znm_n@8p7PCzhvED;Eov{S%#PBf~hpH3&q
zd_lH*B4F-F1gw>qM)KPmP{kC3mb@|qGq))4X!YXw(|gg<v8U)sX$Uk7D$pwq$U(XC
zVF=VIMV<U!=u7@V*t}Jo*102x)-3%85_go4)-EfQS279#WrgrA;w%w7Y0VZ){zA(Q
zmym%4g+vP#fCtA*3&JyjpI-@bH!p+~UJp6$=R!~k0xfQqv8I1640X<czaLIR^+r1~
z_U}7N668>zItuL9ow4kA+bZ_I+x?_dVlNu~DMm+}m_`n+GG&JplR#E23StWzQFmGv
z$0*-T*Rk@<;>>xqRfFz~>(48!w!S4v1zi-O$IT%0+8CE4U=O~X!l(CDG1f-Y$@OLf
zGWREUO)y0)t0q7c<$Lfe*Q3O<dN(@tRFQ3=^ibP_6|f~$ioSE}9e(%U3QSyA;^{IJ
zv3%VHvlAm2yM<@)GW!I&_iP_YKCpr`-t3?pXYT}en?G1(*pbBERVTvbcd_mEb=X(y
z6Omf^fw(R==Jp3ZkaMo*u+O(xDza-WvCUga9e30tPT99XW6vTyrCox4<h4@I23}Dl
zxQ^?AOOcMRR(MhK5A({dN<8I5F?0V(v*GV^V8-7rwB+a<vSZ;^a>=@YHA-`(E9d1v
zidY0oyr+;#(z)86*;821Opuj#<@Sgdc4Cp~W4Pq;2tK*il>J_vO~w!DlY`w4u;v>9
zQhz~@gs3<Z=BhHAlP-cICzVNj$zOIplII+$o7t%FchGX;J9ZDYVAl+X;mIi%b}+_~
zn!d$_<*U!A-4`fM4Cx|d)QxNF3qMl9Ys{fnYLfNddk#G_c0>*-KPmG&$@s?y3$ml>
z0UG2FMs*&dVDv%`{oYlHiVU2Xj>nlO|CK4;;Fm)Be_lh>zaYjpyA(}pHniA%UJHSO
z5Av*9jTO9NnA^2~DSLtSSYs#%506+^izLh>$=ifUS!X4hx+DZT^XI|N^P!jyv4(q5
zADJD~k0M7wZJ03XVUv!E@B~+tQ1A9z!Q$@}?*DrUg+=G08|CwGudp?oUtq&hXJx?V
z$s}47pTIDDi{aje+pLY(J_vi$i54_uBdyGL=1k#yylvJeD$w;LX?k%9zMq^zJ*`)%
zdWj3<T*VA}bYXa{dan-cd~z9NkI#j7t0=ZyOP(ZG9t9oOUUVaG4wu(yLgXiFTJXCy
zJjFaJo_`q>w@0IcMIVr*?IG&ooy&-$?=q=6pQzts&Cr=qgfDK5VtdXz^TJjFJejS8
zeQlhv{#_$_L~#wMJl|N`VJv~OE=Djed5Y-NiL1Ey{xcN(NS#O~oMwHsT=B$GO|oms
z8m&5>&SkIu<oI_#YN6o+>^?04buIahUnY;^_7iE)_|qPx`}L#3OO;gHxE}K@?+-b_
zpNJlmar@pPSunyA!oNP2Vx`AhYGYU2LS}DhviVdG?o0ZN#kr^CjRP`p%IXaYm6=Vw
zkK6-yVw&;gg^g&Rq#<grdxjOP6-m(72{N^_oOGx}BR_`@QV_*5#@7Vs&M#|Fv3L=F
z7$QO*8h(ShyS0(Z*Bn&*L58gR6NBpIwz5-Y4D<HpWfXFJ4Q@L40;>q!Wghz!v(d`?
z$-cw<tnhIa;y%H-p9k#7h+ZAqTlt$ka;6V&d+f%<tlDXw*V{;*JTAoIi~pDht(PNi
z7h`eEq&OK^LL)7MUr?L59pBwsflrikel*bzs?2AM>YghO;&FK>Kc)xmtM6mme)mA;
z_fB%THXjBaWa7rRwd`;J$FeEZ<jvbM59R8lvM1?tc<bQ_Br$Opti)1**{2F0pH#wM
zEyQN^Yr(nVqu@|g4z8ldpzYH!)@Hj4=VrDCp|-`K7*_=AIcCbe#WzXG7E$mz764n`
zc(C=7mLNK4K)*LT2*x|k&>p`|qxQQQFh_JI=lzTV(d}F>|GFIQlN%1J^4GxDpPZvN
z_XX6p-UYjCu7BQ7<NL`kQS=}U;TFr_N^K@2Ps;%Xp@-<ZVHXGvq{5BdAY$^o46>KL
z!*vR$q5Ve$bY2XE<YQvAeW@rsyYn5nm2xRz(N1VDKy2&EGWb`>fJ6OfbW*JuPAJvD
zDeb$=r6GS>pPT*sRJ9<R9XTf9NGGUI*+7))336)ROH_7R9*v~2tp4jbQX~G5^3dU)
z9~DX9;XTOh`)r4-^jcPF);jwAoNKUNZ3fr(je(*21g0(K^kb!7w6nW4oOoLgqrA)X
z;vqZQ@F*cmf{WR`U7@fw7SPeTYE*}^46P_%4F_lB(-(}wYTfY@;QhEm2c@Kw83+C&
z8{P(y$$wkno0Se}OnFA8-C4%Xg07K{ct3V%pC*aknn|2?=915IP7{aURb=bnGg9CF
zh~)h_Nv>QQgzr}p$&a2%BJ;kJ{O8|F&-t7Ow_0k*q46^$sXd8=-SHzihfdJbIxdir
zM-Pa`+Gs|9^f82pJ>d4iLdo0OW03x<49z>8NKRk#<92XX6O*oIWQR>JVG6Fp3XOZD
zO2P(oWXp+n!)-{MSxLGjHj|R`7s=Tv6;`rs8p%<XL+z@YNqMFW$##l@BB&#)&!rKQ
zt2>D6a5xzVze4h&%V9wq%PyM$<YV0p`qidH&PAV%8nq`2_(IqvooCsB3zG0HW)=I=
zubwE|N3p_Zy3xaF9Q)?!QsT8cpX8|;vnw1JwEN5=vQ4FpFwgDCb)El+r>!-~^Gm0v
zPuwJHxkuPv%PzP~tFp7?2FQbAD_&{qN%B$ZIBV!LguGHT={pO(Ny-g1R=YU?LFz6-
zk8VKSDn{5w)s(F5osBntS;7{qxrycWI@RVx{KTbOQn9p94VnB;jhs+>g~jySD5Vd2
zr1YTynciPTpXjK=<wCEhmw#U}QJPE9w8`^q&B-ukPooI=?b-}oZDsgyqy%m?(m+=V
z5S|lmORL`)$I+?rOu}Y9*vqevm!Elrmo;-Pa*<RBnth+KkJKWE=49diymY*!wG~U8
zo57AXWYbATxA4u3HYnbAm6DxQq<3-uWmXdhv5>JOTDGT*eLeSvdGp95o;yp9c+Bu*
zCT-)W^lh#9ow6i3`GKF<q~zdP>$9<=b}Bd?QD@_AwP4?cOc;C|iyiOHL4%hiiBww_
z9T6SGPTWcYn@efn6`v0LFSJR+@jdY5!Fk+SA4t4LIEIQxA#2|72~F(UNen(u;4(u|
z(!AS|yw$KGKHqk+#SPPNvxy;-rE!Gq^`B&6dMh3)KTqB*e#zX`4`yHN5+!=xoTtj@
z7WG-|0+C6H#HSKh;`@9qsL62-sSPa0=Vjw?--=LFk`scv&L$J>uv(nk@&VWUu7kwa
zVWi@A4vCj7AU+%us(NuMd9<?@JEf$NGkWt$iP1e+_<w8H9T(N{#A)jB=nsxwg-B63
zKv1K%vx0yH6h%ZuBOZ7HN>T1?*sw%I5D-lx*svF{fr#AhJFK7rMvdL5p#CH=Vl1GN
zU-&&BCKv(p$M2tew{K==cILY~pI3I~#c0yIN%>^u@t;Uo(M59Oxfk6yH=2svZj+I*
zOXznm#5AGi1O$bAPla`TX^hu0S}-q(f7hf<-y|8)kcV6N+!O)z5zVHyO9H9)=|b|P
z<ZF69NEdUtm8ihqkmeE@cpNJsb%)o}y#D+>mjz!RygZfO6g@_h0*2Ay)lKMexG&9b
z^rXeVI?@H&Db&CJM&f@UgQ}-{Qqibf8uCsBPx>kcp8m0xMwG3G>AknpT%$vLO@F?S
zg7;XsIZcI|Tt6Hf_`Hur%_~u8Rv|>V4+A)nL#s{=r1k?m>HbwZU@L4wSykVY1Fz4K
zoPIV`5&4V^OK_v=YbFD7GN&^qvecf>nNa>*OYheng>))|ZZ2gcp+1t1uT_Or->Tv-
zssicTeU9V+pNwr$B&T_MD(G#K0<pyyH3*ncA?|m3#o)_@e7!z@H?%vQzn03{i5ec5
z<0Hvqpf<1;$!@zql~)K_<YJG!eKWbT!X>bEV5WGT7A5)A9x7G5lvA#(hJ(EhK;;h?
zsK&b*^h)jmCdWgePn<JK9DI=cIz|tAG(JKeNh3+D=?E$;IxUW9l9t^4&75A=K8>0M
z$xxm>2`Vp4gM>JDv`wAg)Az$vV)GP(tjv-#*}WMtYR&MZN)PN<XPC7bK|^;#JYt|J
z=)2XTwV@BtlF{>^|DXbh+7S-F$Qfk4yb?Leb>ZImP?(XjnR_POk0$O>rCH71bZPu0
z6wol1)12oIp85L9RJBo9t)!ZiP4woroEV7BJdC&}&-srR@cUr)I!uKzkqk;sGs78w
z*un+*A}Btv3~Vlqg~#8dit9(|DpID}qcszPK$MzPs=V<nSLjuYG6X63>Zlc9Ts9uw
z=v2{HX#q%M$3l)N&K9q){fe6=)TVl)zZ|U8Dx`i@s`Sf2c_=zGi}YO>LcY$ZP&gQ@
zM&@N>kXq{Y(ntOITpY`-sD6PNowzg;>7=u?*j|U0N|Rx?p*nqO@Emok)<u%5Q;=fh
z&$RnRBP7`}j{Z@<0`|KY(w9<OnvyuM)VIVAB`eR;<|m$F=U)xU?Ce2`O?<5{gS`cc
z(P@on&fYcXTfXK?^W)uQ_!&Y@@P$nmmzpE18MTT*FV{hU&swrQ^9C`t6wvB$C%j-q
z7;@Eqtw?y*NX#GIhH6=V)F)n>9ywf&UUB=$*!pA$Qw<=q_FO@SfAJz}&wGj8mPC@7
zH}}z}mVvOrYAu@HGn{6*EJUO48F6WIOrfv*3aOlX8?>xf5;buH(e{`}@4;M{Gk_8M
z7uA#dI*I7g$xOv4elJ^8<~)cFa~EIRTSuDa`;+Encf<|dYtZ6k7Wcdpsqmh8mpD0P
z^0_)1oIs?@EgOAZ@nSvy48n2(=hXW#8vUvt9o(=R^|<*tm-R3fp-YaW+s_Zl@~zjz
z4?SihmklgfG})3O{+lCfH}G<7`1(qA2y(Z3(+%P*G<E7z<Q8cQMz@B8-WDsxwuQd%
z`}Jd`tEz<H&it2vSrz(h=zXeob}X5uGJ-CO8;GX79gEiP-9fJVbI5g?GfZB&35x#n
zHPWoof-6HWL+m!2(qJzma;4l4ls{6UlCer0H~4`fV*d-p0#c1MX7+@i^z}iK=nUn?
z7H~Rofnv8^BQnj_$0=2BN@<8S?DDrJY~EB@Sgwf%2@a7tjhgWGd848xWFL*6zY^I!
z2uA*q+2YbX{yk5uKC0M}PWP54bIoNk@rJD;I{yn_7?k>qsze)eHZ-3Gy+Pn}>L5|g
zI8B{r96^(vzJq-^KcK!#<H^vlW2iP#8x4Ku0qF;iLCpzHESl^DL+tON-a`r$FVFNN
z>|y?zPxrdma6pn`X?{Mp1-$6y*PnALTo2CgoEgNNoI~&M@1y3<=uUe__>-QmW4M}e
zqey;k5ejn1Ahz9DOoLe*G1L-gx+B`N(g3?I6XN+L##qh>@w!Z7Sdwjz1-A@w+I@3e
zu}g?6hFIe5=IPYxyb1Q3Xn{r1hTQsXrdar@iBvbT_)f!GJZ4rJXL;8cTL&57b$O=v
z;uK@NXU2DQ+f*U$f6x@;7n<DBC{xZ!%@W6qGsElTIrNX+H44WnD=e`x#3!HW<KOBG
z@r5lZg9`>z8sKA!*DYUD`f!B_zUq9Eoc1xp2SUxU+6)tXX0|I8-Lk-59=eK~)qGu_
zTr<3?cL;gqV2yt|WQfIfhB#<}HP$_9jw`P;kwN3ssonX-)Nr;I#lB|P*l#mkU9c5y
zo#+qDNg=Gfv6E(n9TA_Ywd6kkK8I``mr9orYaDo&#VdXj;>$~lQQeMQ`i{@0(#<u(
zT)HVfe%}B)R<L-(GFNKvZ-IZQGr$3hcc6QhC(-*-PpBW4qNr60v5kiv_v-fvB&zTf
zsqi+&ds8WmFuka#(6PdO>NJ$A&%c6a12;j|vMNX&vRdijdlicRn+ywSIP9Fj*ZSL?
zz!^U%2BRChz}>zaB7%#+_|`#?=AGm&85F@Gqi}E?8ldDK`NQ@I6QwR6|F14lEh~6C
zf=h@hfee#9pp<Nc@SsJ|Fzhww(IW+X&+g;vQrasu>e9G^yHzmcq733w-_g<^H6eB1
zJ$NL|gt2i4p<Ht#oc*Ss(kOF<a+g>E-UmzQx%wpV2)oE#m7Irrm5*R&TriMcI?5FO
zyIeWm$4iUzr-9zlXt3NIsI+yILdm>T5U<XJ=|MH%pj-;idOfBq;+i@Aso7lGn<Dz)
z=tj7C>K*5=W~N*|z)cAfBV}%uI#=_%lhQ(4U72_HIcK}6nQp263Vf0a!H+zE89r{x
z$SQT^Ykpr<W9&B|TP%TrpIv~FJFdad-~i?0lx%n#d>b^EGRmDkjB-<skuu`CP<f*;
z8G1Vp9TJgvjvR8S=HP4!G+)2QjkG^W4mZWarRn-upUd=CQQ69;7}^h4QL*){(tCSz
zxF^G@GNuAm)+ko8P?{J%KSmxMK3^IW6)l(bbz`)6N{?0w-!K)Zb;#z<v#Gsjb7yn}
z>a1qMY<WyvOp-K_F%xL8!bEA*+*nCsOp-(%lMo*sn;?@(Wjtp$R&ySIS`rb<H%$cH
zSwnfEBqqM49sV~#N6>@SoFz?^$igMFV;Eh5fMsMcQSlP_+(f=#G{Xoq+3q|;yfl^(
z2pCo)B1R(PHEVUQS^GoHI-P0OZLhgSlU_Sb`hRIM=v))~p(ex5G#Rzmq{65$mP|L^
zE>$=Fz_6<B{6UXZ_23WstZKhcQ1ey!p$ggVt=P#J8-X!v(S~rxP*?~|Sd(}jnvVQ$
zATVWh+Iq)H<%uzoExzp(RV{%T+v7dozlB(ZnaGARx}n~|BfNbA{lY`N1IPQZ+S8@+
zGO4HyS@ZsFL-=rt{z(+MdWVDrkMJ2E;x|t8p%f;B4gN?n(<ZrPo|t=fD7S^wl~ff*
z_aFHDv)zh;c{>a&K81m0R~T4*j6tXKTT5Ww9)o{3*~8e0ANfhQ)lcE>U19K^*p;_J
z*^rN9x1r0+?i7VKQde@@P~g`^%PQg5g?lS1?c1#=*tA2z_ERX>b%jFDk5TA!?Q03_
z+oSOB0zu0xQf;$Gt;L>}NYG}bL)+QU{yy7Ad;janXbO6<-Q)>!Ni6@@k?j_lFgIS#
zhgqlgAxKBy+&<Fy@~kd@_v-z=m#&}>%Y@H}51%z(k{Hzztowc_#kIYZAa>M8__mG2
z-1D!<E&4Mmh}=Z2gX{{}4h6Q_)3Lx-Qda_7{MxI{gN`j8)cdDjI@H)Y&W<#?|G!Oe
U7k$!%wrSwfW<>AzMzn|Jf3W)|^Z)<=

literal 0
HcmV?d00001

diff --git a/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index b/llvm/unittests/Analysis/Inputs/ir2native_x86_64_model/variables/variables.index
new file mode 100644
index 0000000000000000000000000000000000000000..c20d8afabf383430e807fde58270f4ff7c92fdc5
GIT binary patch
literal 398
zcmbQn@*$OhfsKPviiv}fK}I0n*~8V@JHX%5FT~Nw$2H#H$;;I_B;Gy9F~Ea^U5G(p
z%3@ZHEBvOMEJwl_O!%`?ixP|D6ALo+b5l!-GLwt-4fK=qOY=(f^&CS&f;^o<LtKOP
z!yJP=9U1*f!W?}<T{#$mrgQ*JVKHrBZE+M);4jH9NzBpLD>GtXXkj%{$(r+6K}d<;
zP(QgZje%hTs|n91pMdA{fb1StqZ*47qNWTcA~}hbsYUVSnI#$V<*AwJ870NK2Kq^v
ziN$4W7!p{7m^e}t9?W6Yn80lMt3U99sG=CqrFp4UPZ%ONg_t?6F@jWR09BZFzuEp#
zSW(z;RuKaSqe27g9*HtPHz83)iM~Pxph%7cP$i?nhxx2)b@qRrx`qJ=7(uw8ao<lE
qhvC4dg&P<cnR6L-Fu}!Un~AmVW#Tx+#19g}55j*pbgPuQ-v$5_)n_*V

literal 0
HcmV?d00001

diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp
new file mode 100644
index 0000000000000..4c775c4c0b93f
--- /dev/null
+++ b/llvm/unittests/Analysis/TFUtilsTest.cpp
@@ -0,0 +1,98 @@
+//===- TFUtilsTest.cpp - test for TFUtils ---------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/Utils/TFUtils.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/IR/LLVMContext.h"
+#include "llvm/IR/Module.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
+#include "llvm/Testing/Support/SupportHelpers.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+extern const char *TestMainArgv0;
+
+static std::string getModelPath() {
+  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
+  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
+  return std::string(InputsDir);
+}
+
+// Test observable behavior when no model is provided.
+TEST(TFUtilsTest, NoModel) {
+  TFModelEvaluator Evaluator("", {}, {});
+  EXPECT_FALSE(Evaluator.isValid());
+}
+
+// Test we can correctly load a savedmodel and evaluate it.
+TEST(TFUtilsTest, LoadAndExecuteTest) {
+  // We use the ir2native model for test. We know it has one feature of
+  // dimension (1, 214)
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  const static int64_t KnownSize = 214;
+
+  TFModelEvaluator Evaluator(getModelPath(), InputNames, OutputName);
+  static const std::vector<int64_t> Dim{1, KnownSize};
+
+  EXPECT_TRUE(Evaluator.isValid());
+  Evaluator.initInput(0, TF_INT32, Dim);
+
+  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
+  // Fill it up with 1's, we know the output.
+  for (auto I = 0; I < KnownSize; ++I) {
+    V[I] = 1;
+  }
+  {
+    auto ER = Evaluator.evaluate();
+    EXPECT_TRUE(ER.hasValue());
+    float Ret = *ER->getTensorValue<float>(0);
+    EXPECT_EQ(static_cast<size_t>(Ret), 80);
+  }
+  // The input vector should be unchanged
+  for (auto I = 0; I < KnownSize; ++I) {
+    EXPECT_EQ(V[I], 1);
+  }
+  // Zero-out the unused position '0' of the instruction histogram, which is
+  // after the first 9 calculated values. Should the the same result.
+  V[9] = 0;
+  {
+    auto ER = Evaluator.evaluate();
+    EXPECT_TRUE(ER.hasValue());
+    float Ret = *ER->getTensorValue<float>(0);
+    EXPECT_EQ(static_cast<size_t>(Ret), 80);
+  }
+}
+
+// Test incorrect input setup
+TEST(TFUtilsTest, EvalError) {
+  // We use the ir2native model for test. We know it has one feature of
+  // dimension (1, 214)
+  std::vector<std::string> InputNames{"serving_default_input_1"};
+  std::vector<std::string> OutputName{"StatefulPartitionedCall"};
+  const static int64_t KnownSize = 213;
+
+  TFModelEvaluator Evaluator(getModelPath(), InputNames, OutputName);
+  static const std::vector<int64_t> Dim{1, KnownSize};
+
+  EXPECT_TRUE(Evaluator.isValid());
+  Evaluator.initInput(0, TF_INT32, Dim);
+
+  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
+  // Fill it up with 1's, we know the output.
+  for (auto I = 0; I < KnownSize; ++I) {
+    V[I] = 1;
+  }
+  auto ER = Evaluator.evaluate();
+  EXPECT_FALSE(ER.hasValue());
+  EXPECT_FALSE(Evaluator.isValid());
+}

From 73f02a61dfb967e3d058490b0c39178e1219835e Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Mon, 13 Jul 2020 16:29:19 -0700
Subject: [PATCH 174/771] [llvm][NFC] ML InlineAdvisor: Factored CHECKs in
 common test

The CHECKs are going to be shared with the development mode test
---
 llvm/test/Transforms/Inline/ML/Inputs/test-module.ll   | 6 +++++-
 llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll | 4 ++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/Inline/ML/Inputs/test-module.ll b/llvm/test/Transforms/Inline/ML/Inputs/test-module.ll
index b8279e5db6a02..d01f4bb301a86 100644
--- a/llvm/test/Transforms/Inline/ML/Inputs/test-module.ll
+++ b/llvm/test/Transforms/Inline/ML/Inputs/test-module.ll
@@ -61,4 +61,8 @@ define i32 @switcher(i32) {
 ; <label>:12:                                     ; preds = %11, %6, %5
   %13 = load i32, i32* %2, align 4
   ret i32 %13
-}
\ No newline at end of file
+}
+
+; CHECK-NOT: @adder
+; DEFAULT-LABEL:        @adder
+; DEFAULT-NEXT:         %2 = mul
\ No newline at end of file
diff --git a/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll b/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll
index 1ac0efd3cd0d4..01acb43d01969 100644
--- a/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll
+++ b/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll
@@ -6,8 +6,8 @@
 ; for the 'development' mode.
 ;
 ; REQUIRES: have_tf_aot
-; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %s --check-prefix=CHECK
-; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=default -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %s --check-prefix=DEFAULT
+; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=CHECK
+; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=default -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=DEFAULT
 
 ; CHECK-NOT: @adder
 ; DEFAULT-LABEL:        @adder

From d1c7f51a9e8d6ea623349337d28da1df8b5194e2 Mon Sep 17 00:00:00 2001
From: Gui Andrade <guiand@google.com>
Date: Tue, 9 Jun 2020 16:54:02 +0000
Subject: [PATCH 175/771] MemorySanitizer: If a field is marked noundef, check
 init at call site

Adds LLVM option to control eager checking under -msan-eager-checks.
This change depends on the noundef keyword to determining cases where it
it sound to check these shadows, and falls back to passing shadows
values by TLS.

Checking at call boundaries enforces undefined behavior rules with
passing uninitialized arguments by value.

Differential Revision: https://reviews.llvm.org/D81699
---
 .../Instrumentation/MemorySanitizer.cpp       |  73 +++++++--
 .../MemorySanitizer/msan_eager.ll             | 142 ++++++++++++++++++
 2 files changed, 201 insertions(+), 14 deletions(-)
 create mode 100644 llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index f825cf99205b3..07caac4bc8747 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -284,6 +284,11 @@ static cl::opt<bool> ClCheckAccessAddress("msan-check-access-address",
        cl::desc("report accesses through a pointer which has poisoned shadow"),
        cl::Hidden, cl::init(true));
 
+static cl::opt<bool> ClEagerChecks(
+    "msan-eager-checks",
+    cl::desc("check arguments and return values at function call boundaries"),
+    cl::Hidden, cl::init(false));
+
 static cl::opt<bool> ClDumpStrictInstructions("msan-dump-strict-instructions",
        cl::desc("print out instructions with default strict semantics"),
        cl::Hidden, cl::init(false));
@@ -1052,7 +1057,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
   bool PropagateShadow;
   bool PoisonStack;
   bool PoisonUndef;
-  bool CheckReturnValue;
 
   struct ShadowOriginAndInsertPoint {
     Value *Shadow;
@@ -1076,9 +1080,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     PropagateShadow = SanitizeFunction;
     PoisonStack = SanitizeFunction && ClPoisonStack;
     PoisonUndef = SanitizeFunction && ClPoisonUndef;
-    // FIXME: Consider using SpecialCaseList to specify a list of functions that
-    // must always return fully initialized values. For now, we hardcode "main".
-    CheckReturnValue = SanitizeFunction && (F.getName() == "main");
 
     MS.initializeCallbacks(*F.getParent());
     if (MS.CompileKernel)
@@ -1618,14 +1619,23 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
           continue;
         }
+
+        bool FArgByVal = FArg.hasByValAttr();
+        bool FArgNoUndef = FArg.hasAttribute(Attribute::NoUndef);
+        bool FArgEagerCheck = ClEagerChecks && !FArgByVal && FArgNoUndef;
         unsigned Size =
             FArg.hasByValAttr()
                 ? DL.getTypeAllocSize(FArg.getParamByValType())
                 : DL.getTypeAllocSize(FArg.getType());
+
         if (A == &FArg) {
           bool Overflow = ArgOffset + Size > kParamTLSSize;
-          Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
-          if (FArg.hasByValAttr()) {
+          if (FArgEagerCheck) {
+            *ShadowPtr = getCleanShadow(V);
+            setOrigin(A, getCleanOrigin());
+            continue;
+          } else if (FArgByVal) {
+            Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
             // ByVal pointer itself has clean shadow. We copy the actual
             // argument shadow to the underlying memory.
             // Figure out maximal valid memcpy alignment.
@@ -1650,6 +1660,8 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
             }
             *ShadowPtr = getCleanShadow(V);
           } else {
+            // Shadow over TLS
+            Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
             if (Overflow) {
               // ParamTLS overflow.
               *ShadowPtr = getCleanShadow(V);
@@ -1668,7 +1680,9 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
             setOrigin(A, getCleanOrigin());
           }
         }
-        ArgOffset += alignTo(Size, kShadowTLSAlignment);
+
+        if (!FArgEagerCheck)
+          ArgOffset += alignTo(Size, kShadowTLSAlignment);
       }
       assert(*ShadowPtr && "Could not find shadow for an argument");
       return *ShadowPtr;
@@ -3391,7 +3405,18 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                         << " Shadow: " << *ArgShadow << "\n");
       bool ArgIsInitialized = false;
       const DataLayout &DL = F.getParent()->getDataLayout();
-      if (CB.paramHasAttr(i, Attribute::ByVal)) {
+
+      bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
+      bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
+      bool EagerCheck = ClEagerChecks && !ByVal && NoUndef;
+
+      if (EagerCheck) {
+        insertShadowCheck(A, &CB);
+        continue;
+      }
+      if (ByVal) {
+        // ByVal requires some special handling as it's too big for a single
+        // load
         assert(A->getType()->isPointerTy() &&
                "ByVal argument is not a pointer!");
         Size = DL.getTypeAllocSize(CB.getParamByValType(i));
@@ -3409,6 +3434,7 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
                                  Alignment, Size);
         // TODO(glider): need to copy origins.
       } else {
+        // Any other parameters mean we need bit-grained tracking of uninit data
         Size = DL.getTypeAllocSize(A->getType());
         if (ArgOffset + Size > kParamTLSSize) break;
         Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
@@ -3437,6 +3463,13 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // Don't emit the epilogue for musttail call returns.
     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
       return;
+
+    if (ClEagerChecks && CB.hasRetAttr(Attribute::NoUndef)) {
+      setShadow(&CB, getCleanShadow(&CB));
+      setOrigin(&CB, getCleanOrigin());
+      return;
+    }
+
     IRBuilder<> IRBBefore(&CB);
     // Until we have full dynamic coverage, make sure the retval shadow is 0.
     Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
@@ -3489,14 +3522,26 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     // Don't emit the epilogue for musttail call returns.
     if (isAMustTailRetVal(RetVal)) return;
     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
-    if (CheckReturnValue) {
+    bool HasNoUndef =
+        F.hasAttribute(AttributeList::ReturnIndex, Attribute::NoUndef);
+    bool StoreShadow = !(ClEagerChecks && HasNoUndef);
+    // FIXME: Consider using SpecialCaseList to specify a list of functions that
+    // must always return fully initialized values. For now, we hardcode "main".
+    bool EagerCheck = (ClEagerChecks && HasNoUndef) || (F.getName() == "main");
+
+    Value *Shadow = getShadow(RetVal);
+    bool StoreOrigin = true;
+    if (EagerCheck) {
       insertShadowCheck(RetVal, &I);
-      Value *Shadow = getCleanShadow(RetVal);
-      IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
-    } else {
-      Value *Shadow = getShadow(RetVal);
+      Shadow = getCleanShadow(RetVal);
+      StoreOrigin = false;
+    }
+
+    // The caller may still expect information passed over TLS if we pass our
+    // check
+    if (StoreShadow) {
       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
-      if (MS.TrackOrigins)
+      if (MS.TrackOrigins && StoreOrigin)
         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
     }
   }
diff --git a/llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll b/llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll
new file mode 100644
index 0000000000000..1c203177796e1
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/msan_eager.ll
@@ -0,0 +1,142 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -msan-eager-checks -S -passes='module(msan-module),function(msan)' 2>&1 | \
+; RUN:   FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,CHECK-ORIGINS %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define noundef i32 @NormalRet() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @NormalRet(
+; CHECK-NEXT:    ret i32 123
+;
+  ret i32 123
+}
+
+define i32 @PartialRet() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @PartialRet(
+; CHECK-NEXT:    store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8
+; CHECK-NEXT:    store i32 0, i32* @__msan_retval_origin_tls, align 4
+; CHECK-NEXT:    ret i32 123
+;
+  ret i32 123
+}
+
+define noundef i32 @LoadedRet() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @LoadedRet(
+; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 0 to i32*
+; CHECK-NEXT:    [[O:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i32* [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32*
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32*
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSLD]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof !0
+; CHECK:       7:
+; CHECK-NEXT:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #1
+; CHECK-NEXT:    unreachable
+; CHECK:       8:
+; CHECK-NEXT:    ret i32 [[O]]
+;
+  %p = inttoptr i64 0 to i32 *
+  %o = load i32, i32 *%p
+  ret i32 %o
+}
+
+
+define void @NormalArg(i32 noundef %a) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @NormalArg(
+; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 0 to i32*
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i32* [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32*
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32*
+; CHECK-NEXT:    store i32 0, i32* [[TMP3]], align 4
+; CHECK-NEXT:    store i32 [[A:%.*]], i32* [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p = inttoptr i64 0 to i32 *
+  store i32 %a, i32 *%p
+  ret void
+}
+
+define void @PartialArg(i32 %a) nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @PartialArg(
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* bitcast ([100 x i64]* @__msan_param_tls to i32*), align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4
+; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 0 to i32*
+; CHECK-NEXT:    [[TMP3:%.*]] = ptrtoint i32* [[P]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = xor i64 [[TMP3]], 87960930222080
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32*
+; CHECK-NEXT:    [[TMP6:%.*]] = add i64 [[TMP4]], 17592186044416
+; CHECK-NEXT:    [[TMP7:%.*]] = inttoptr i64 [[TMP6]] to i32*
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP8:%.*]], label [[TMP9:%.*]], !prof !0
+; CHECK:       8:
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP7]], align 4
+; CHECK-NEXT:    br label [[TMP9]]
+; CHECK:       9:
+; CHECK-NEXT:    store i32 [[A:%.*]], i32* [[P]], align 4
+; CHECK-NEXT:    ret void
+;
+  %p = inttoptr i64 0 to i32 *
+  store i32 %a, i32 *%p
+  ret void
+}
+
+define void @CallNormal() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @CallNormal(
+; CHECK-NEXT:    [[R:%.*]] = call i32 @NormalRet() #0
+; CHECK-NEXT:    call void @NormalArg(i32 [[R]]) #0
+; CHECK-NEXT:    ret void
+;
+  %r = call i32 @NormalRet() nounwind uwtable sanitize_memory
+  call void @NormalArg(i32 %r) nounwind uwtable sanitize_memory
+  ret void
+}
+
+define void @CallWithLoaded() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @CallWithLoaded(
+; CHECK-NEXT:    [[P:%.*]] = inttoptr i64 0 to i32*
+; CHECK-NEXT:    [[O:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = ptrtoint i32* [[P]] to i64
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i64 [[TMP1]], 87960930222080
+; CHECK-NEXT:    [[TMP3:%.*]] = inttoptr i64 [[TMP2]] to i32*
+; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], 17592186044416
+; CHECK-NEXT:    [[TMP5:%.*]] = inttoptr i64 [[TMP4]] to i32*
+; CHECK-NEXT:    [[_MSLD:%.*]] = load i32, i32* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4
+; CHECK-NEXT:    [[_MSCMP:%.*]] = icmp ne i32 [[_MSLD]], 0
+; CHECK-NEXT:    br i1 [[_MSCMP]], label [[TMP7:%.*]], label [[TMP8:%.*]], !prof !0
+; CHECK:       7:
+; CHECK-NEXT:    call void @__msan_warning_with_origin_noreturn(i32 [[TMP6]]) #1
+; CHECK-NEXT:    unreachable
+; CHECK:       8:
+; CHECK-NEXT:    call void @NormalArg(i32 [[O]]) #0
+; CHECK-NEXT:    ret void
+;
+  %p = inttoptr i64 0 to i32 *
+  %o = load i32, i32 *%p
+  call void @NormalArg(i32 %o) nounwind uwtable sanitize_memory
+  ret void
+}
+
+define void @CallPartial() nounwind uwtable sanitize_memory {
+; CHECK-LABEL: @CallPartial(
+; CHECK-NEXT:    store i32 0, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8
+; CHECK-NEXT:    [[R:%.*]] = call i32 @PartialRet() #0
+; CHECK-NEXT:    [[_MSRET:%.*]] = load i32, i32* bitcast ([100 x i64]* @__msan_retval_tls to i32*), align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* @__msan_retval_origin_tls, align 4
+; CHECK-NEXT:    store i32 [[_MSRET]], i32* bitcast ([100 x i64]* @__msan_param_tls to i32*), align 8
+; CHECK-NEXT:    store i32 [[TMP1]], i32* getelementptr inbounds ([200 x i32], [200 x i32]* @__msan_param_origin_tls, i32 0, i32 0), align 4
+; CHECK-NEXT:    call void @PartialArg(i32 [[R]]) #0
+; CHECK-NEXT:    ret void
+;
+  %r = call i32 @PartialRet() nounwind uwtable sanitize_memory
+  call void @PartialArg(i32 %r) nounwind uwtable sanitize_memory
+  ret void
+}

From 9d5a8b7edb28707879b8e2a37d14a4cf6dbcbefa Mon Sep 17 00:00:00 2001
From: Stephen Hines <srhines@google.com>
Date: Mon, 13 Jul 2020 16:35:54 -0700
Subject: [PATCH 176/771] Fix a missing update that C compiles default to
 gnu17.

https://reviews.llvm.org/D75383 switched the C default to gnu17, but
missed this instance.

Differential Revision: https://reviews.llvm.org/D83726
---
 clang/docs/CommandGuide/clang.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/docs/CommandGuide/clang.rst b/clang/docs/CommandGuide/clang.rst
index 5978650c32888..2cca04fb31f1a 100644
--- a/clang/docs/CommandGuide/clang.rst
+++ b/clang/docs/CommandGuide/clang.rst
@@ -146,7 +146,7 @@ Language Selection and Mode Options
 
    ISO C 2017 with GNU extensions
 
- The default C language standard is ``gnu11``, except on PS4, where it is
+ The default C language standard is ``gnu17``, except on PS4, where it is
  ``gnu99``.
 
  Supported values for the C++ language are:

From b8460fb2cd03999b2655fc0e70bd9a82641db554 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Mon, 13 Jul 2020 23:39:28 +0000
Subject: [PATCH 177/771] [gn build] Port caf395ee8c2

---
 llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn       | 1 +
 llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 +
 2 files changed, 2 insertions(+)

diff --git a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
index 11498ed602984..c13dc723ecd1b 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Analysis/BUILD.gn
@@ -55,6 +55,7 @@ static_library("Analysis") {
     "InlineAdvisor.cpp",
     "InlineCost.cpp",
     "InlineFeaturesAnalysis.cpp",
+    "InlineSizeEstimatorAnalysis.cpp",
     "InstCount.cpp",
     "InstructionPrecedenceTracking.cpp",
     "InstructionSimplify.cpp",
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
index b0dcd497d844e..27733f63c2c50 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
@@ -25,6 +25,7 @@ unittest("AnalysisTests") {
     "GlobalsModRefTest.cpp",
     "IVDescriptorsTest.cpp",
     "InlineFeaturesAnalysisTest.cpp",
+    "InlineSizeEstimatorAnalysisTest.cpp",
     "LazyCallGraphTest.cpp",
     "LoadsTest.cpp",
     "LoopInfoTest.cpp",

From f398e0f3d1fd56e5d06e95f3522bdcde48310088 Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard@google.com>
Date: Fri, 19 Jun 2020 00:00:24 -0700
Subject: [PATCH 178/771] [builtins][Android] Define HAS_80_BIT_LONG_DOUBLE to
 0

Android 32-bit x86 uses a 64-bit long double.

Android 64-bit x86 uses a 128-bit quad-precision long double.

Differential Revision: https://reviews.llvm.org/D82152
---
 compiler-rt/lib/builtins/int_types.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/builtins/int_types.h b/compiler-rt/lib/builtins/int_types.h
index a575c07456561..705355a4840d4 100644
--- a/compiler-rt/lib/builtins/int_types.h
+++ b/compiler-rt/lib/builtins/int_types.h
@@ -144,9 +144,12 @@ typedef struct {
 // Check if the target supports 80 bit extended precision long doubles.
 // Notably, on x86 Windows, MSVC only provides a 64-bit long double, but GCC
 // still makes it 80 bits. Clang will match whatever compiler it is trying to
-// be compatible with.
-#if ((defined(__i386__) || defined(__x86_64__)) && !defined(_MSC_VER)) ||      \
-    defined(__m68k__) || defined(__ia64__)
+// be compatible with. On 32-bit x86 Android, long double is 64 bits, while on
+// x86_64 Android, long double is 128 bits.
+#if (defined(__i386__) || defined(__x86_64__)) &&                              \
+    !(defined(_MSC_VER) || defined(__ANDROID__))
+#define HAS_80_BIT_LONG_DOUBLE 1
+#elif defined(__m68k__) || defined(__ia64__)
 #define HAS_80_BIT_LONG_DOUBLE 1
 #else
 #define HAS_80_BIT_LONG_DOUBLE 0

From 8cbb6ccc7fcb4184724b57ab78d7839469158c5b Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard@google.com>
Date: Thu, 18 Jun 2020 23:59:36 -0700
Subject: [PATCH 179/771] [builtins] Cleanup generic-file filtering

Split filter_builtin_sources into two functions:
 - filter_builtin_sources that removes generic files when an
   arch-specific file is selected.
 - darwin_filter_builtin_sources that implements the EXCLUDE/INCLUDE
   lists (using the files in lib/builtins/Darwin-excludes).

darwin_filter_builtin_sources delegates to filter_builtin_sources.

Previously, lib/builtins/CMakeLists.txt had a number of calls to
filter_builtin_sources (with a confusing/broken use of the
`excluded_list` parameter), as well as a redundant arch-vs-generic
filtering for the non-Apple code path at the end of the file. Replace
all of this with a single call to filter_builtin_sources.

Remove i686_SOURCES. Previously, this list contained only the
arch-specific files common to 32-bit and 64-bit x86, which is a strange
set. Normally the ${ARCH}_SOURCES list contains everything needed for
the arch. "i686" isn't in ALL_BUILTIN_SUPPORTED_ARCH.

NFCI, but i686_SOURCES won't be defined, and the order of files in
${arch}_SOURCES lists will change.

Differential Revision: https://reviews.llvm.org/D82151
---
 .../cmake/Modules/CompilerRTDarwinUtils.cmake | 41 ++++++++++++++--
 .../cmake/Modules/CompilerRTUtils.cmake       | 49 +++++++++----------
 compiler-rt/lib/builtins/CMakeLists.txt       | 44 +++++------------
 3 files changed, 74 insertions(+), 60 deletions(-)

diff --git a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake
index 425de8bffdf72..be8d7e733c7a0 100644
--- a/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTDarwinUtils.cmake
@@ -344,6 +344,38 @@ function(darwin_lipo_libs name)
   endif()
 endfunction()
 
+# Filter the list of builtin sources for Darwin, then delegate to the generic
+# filtering.
+#
+# `exclude_or_include` must be one of:
+#  - EXCLUDE: remove every item whose name (w/o extension) matches a name in
+#    `excluded_list`.
+#  - INCLUDE: keep only items whose name (w/o extension) matches something
+#    in `excluded_list`.
+function(darwin_filter_builtin_sources output_var name exclude_or_include excluded_list)
+  if(exclude_or_include STREQUAL "EXCLUDE")
+    set(filter_action GREATER)
+    set(filter_value -1)
+  elseif(exclude_or_include STREQUAL "INCLUDE")
+    set(filter_action LESS)
+    set(filter_value 0)
+  else()
+    message(FATAL_ERROR "darwin_filter_builtin_sources called without EXCLUDE|INCLUDE")
+  endif()
+
+  set(intermediate ${ARGN})
+  foreach(_file ${intermediate})
+    get_filename_component(_name_we ${_file} NAME_WE)
+    list(FIND ${excluded_list} ${_name_we} _found)
+    if(_found ${filter_action} ${filter_value})
+      list(REMOVE_ITEM intermediate ${_file})
+    endif()
+  endforeach()
+
+  filter_builtin_sources(intermediate ${name})
+  set(${output_var} ${intermediate} PARENT_SCOPE)
+endfunction()
+
 # Generates builtin libraries for all operating systems specified in ARGN. Each
 # OS library is constructed by lipo-ing together single-architecture libraries.
 macro(darwin_add_builtin_libraries)
@@ -366,7 +398,8 @@ macro(darwin_add_builtin_libraries)
                               ARCH ${arch}
                               MIN_VERSION ${DARWIN_${os}_BUILTIN_MIN_VER})
 
-      filter_builtin_sources(filtered_sources
+      darwin_filter_builtin_sources(filtered_sources
+        ${os}_${arch}
         EXCLUDE ${arch}_${os}_EXCLUDED_BUILTINS
         ${${arch}_SOURCES})
 
@@ -388,7 +421,8 @@ macro(darwin_add_builtin_libraries)
                               OS ${os}
                               ARCH ${arch})
 
-        filter_builtin_sources(filtered_sources
+        darwin_filter_builtin_sources(filtered_sources
+          cc_kext_${os}_${arch}
           EXCLUDE ${arch}_${os}_EXCLUDED_BUILTINS
           ${${arch}_SOURCES})
 
@@ -484,7 +518,8 @@ macro(darwin_add_embedded_builtin_libraries)
     set(x86_64_FUNCTIONS ${common_FUNCTIONS})
 
     foreach(arch ${DARWIN_macho_embedded_ARCHS})
-      filter_builtin_sources(${arch}_filtered_sources
+      darwin_filter_builtin_sources(${arch}_filtered_sources
+        macho_embedded_${arch}
         INCLUDE ${arch}_FUNCTIONS
         ${${arch}_SOURCES})
       if(NOT ${arch}_filtered_sources)
diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index d3607edd58822..99b9f0e4af44d 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -332,33 +332,30 @@ macro(construct_compiler_rt_default_triple)
   endif()
 endmacro()
 
-# Filter out generic versions of routines that are re-implemented in
-# architecture specific manner.  This prevents multiple definitions of the
-# same symbols, making the symbol selection non-deterministic.
-function(filter_builtin_sources output_var exclude_or_include excluded_list)
-  if(exclude_or_include STREQUAL "EXCLUDE")
-    set(filter_action GREATER)
-    set(filter_value -1)
-  elseif(exclude_or_include STREQUAL "INCLUDE")
-    set(filter_action LESS)
-    set(filter_value 0)
-  else()
-    message(FATAL_ERROR "filter_builtin_sources called without EXCLUDE|INCLUDE")
-  endif()
-
-  set(intermediate ${ARGN})
-  foreach (_file ${intermediate})
-    get_filename_component(_name_we ${_file} NAME_WE)
-    list(FIND ${excluded_list} ${_name_we} _found)
-    if(_found ${filter_action} ${filter_value})
-      list(REMOVE_ITEM intermediate ${_file})
-    elseif(${_file} MATCHES ".*/.*\\.S" OR ${_file} MATCHES ".*/.*\\.c")
+# Filter out generic versions of routines that are re-implemented in an
+# architecture specific manner. This prevents multiple definitions of the same
+# symbols, making the symbol selection non-deterministic.
+#
+# We follow the convention that a source file that exists in a sub-directory
+# (e.g. `ppc/divtc3.c`) is architecture-specific and that if a generic
+# implementation exists it will be a top-level source file with the same name
+# modulo the file extension (e.g. `divtc3.c`).
+function(filter_builtin_sources inout_var name)
+  set(intermediate ${${inout_var}})
+  foreach(_file ${intermediate})
+    get_filename_component(_file_dir ${_file} DIRECTORY)
+    if (NOT "${_file_dir}" STREQUAL "")
+      # Architecture specific file. If a generic version exists, print a notice
+      # and ensure that it is removed from the file list.
       get_filename_component(_name ${_file} NAME)
-      string(REPLACE ".S" ".c" _cname "${_name}")
-      list(REMOVE_ITEM intermediate ${_cname})
-    endif ()
-  endforeach ()
-  set(${output_var} ${intermediate} PARENT_SCOPE)
+      string(REGEX REPLACE "\\.S$" ".c" _cname "${_name}")
+      if(EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_cname}")
+        message(STATUS "For ${name} builtins preferring ${_file} to ${_cname}")
+        list(REMOVE_ITEM intermediate ${_cname})
+      endif()
+    endif()
+  endforeach()
+  set(${inout_var} ${intermediate} PARENT_SCOPE)
 endfunction()
 
 function(get_compiler_rt_target arch variable)
diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 5e3c901322ec6..3a66dd9c3fb3f 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -260,7 +260,9 @@ endif ()
 
 if (NOT MSVC)
   set(x86_64_SOURCES
+    ${GENERIC_SOURCES}
     ${GENERIC_TF_SOURCES}
+    ${x86_ARCH_SOURCES}
     x86_64/floatdidf.c
     x86_64/floatdisf.c
     x86_64/floatdixf.c
@@ -268,7 +270,8 @@ if (NOT MSVC)
     x86_64/floatundisf.S
     x86_64/floatundixf.S
   )
-  filter_builtin_sources(x86_64_SOURCES EXCLUDE x86_64_SOURCES "${x86_64_SOURCES};${GENERIC_SOURCES}")
+
+  # Darwin x86_64 Haswell
   set(x86_64h_SOURCES ${x86_64_SOURCES})
 
   if (WIN32)
@@ -280,6 +283,8 @@ if (NOT MSVC)
   endif()
 
   set(i386_SOURCES
+    ${GENERIC_SOURCES}
+    ${x86_ARCH_SOURCES}
     i386/ashldi3.S
     i386/ashrdi3.S
     i386/divdi3.S
@@ -295,7 +300,6 @@ if (NOT MSVC)
     i386/udivdi3.S
     i386/umoddi3.S
   )
-  filter_builtin_sources(i386_SOURCES EXCLUDE i386_SOURCES "${i386_SOURCES};${GENERIC_SOURCES}")
 
   if (WIN32)
     set(i386_SOURCES
@@ -309,20 +313,15 @@ else () # MSVC
   # MSVC's assembler takes Intel syntax, not AT&T syntax.
   # Also use only MSVC compilable builtin implementations.
   set(x86_64_SOURCES
+    ${GENERIC_SOURCES}
+    ${x86_ARCH_SOURCES}
     x86_64/floatdidf.c
     x86_64/floatdisf.c
     x86_64/floatdixf.c
-    ${GENERIC_SOURCES}
   )
-  set(x86_64h_SOURCES ${x86_64_SOURCES})
-  set(i386_SOURCES ${GENERIC_SOURCES})
+  set(i386_SOURCES ${GENERIC_SOURCES} ${x86_ARCH_SOURCES})
 endif () # if (NOT MSVC)
 
-set(x86_64h_SOURCES ${x86_64h_SOURCES} ${x86_ARCH_SOURCES})
-set(x86_64_SOURCES ${x86_64_SOURCES} ${x86_ARCH_SOURCES})
-set(i386_SOURCES ${i386_SOURCES} ${x86_ARCH_SOURCES})
-set(i686_SOURCES ${i686_SOURCES} ${x86_ARCH_SOURCES})
-
 set(arm_SOURCES
   arm/fp_mode.c
   arm/bswapdi2.S
@@ -356,8 +355,8 @@ set(arm_SOURCES
   arm/udivmodsi4.S
   arm/udivsi3.S
   arm/umodsi3.S
+  ${GENERIC_SOURCES}
 )
-filter_builtin_sources(arm_SOURCES EXCLUDE arm_SOURCES "${arm_SOURCES};${GENERIC_SOURCES}")
 
 set(thumb1_SOURCES
   arm/divsi3.S
@@ -451,8 +450,8 @@ if(MINGW)
     arm/aeabi_uldivmod.S
     arm/chkstk.S
     mingw_fixfloat.c
+    ${GENERIC_SOURCES}
   )
-  filter_builtin_sources(arm_SOURCES EXCLUDE arm_SOURCES "${arm_SOURCES};${GENERIC_SOURCES}")
 elseif(NOT WIN32)
   # TODO the EABI sources should only be added to EABI targets
   set(arm_SOURCES
@@ -619,25 +618,8 @@ else ()
         endif()
       endif()
 
-      # Filter out generic versions of routines that are re-implemented in
-      # architecture specific manner.  This prevents multiple definitions of the
-      # same symbols, making the symbol selection non-deterministic.
-      foreach (_file ${${arch}_SOURCES})
-        get_filename_component(_file_dir "${_file}" DIRECTORY)
-        if (NOT "${_file_dir}" STREQUAL "")
-          # Architecture specific file. We follow the convention that a source
-          # file that exists in a sub-directory (e.g. `ppc/divtc3.c`) is
-          # architecture specific and that if a generic implementation exists
-          # it will be a top-level source file with the same name modulo the
-          # file extension (e.g. `divtc3.c`).
-          get_filename_component(_name ${_file} NAME)
-          string(REPLACE ".S" ".c" _cname "${_name}")
-          if (EXISTS "${CMAKE_CURRENT_SOURCE_DIR}/${_cname}")
-            message(STATUS "For ${arch} builtins preferring ${_file} to ${_cname}")
-            list(REMOVE_ITEM ${arch}_SOURCES ${_cname})
-          endif()
-        endif ()
-      endforeach ()
+      # Remove a generic C builtin when an arch-specific builtin is specified.
+      filter_builtin_sources(${arch}_SOURCES ${arch})
 
       # Needed for clear_cache on debug mode, due to r7's usage in inline asm.
       # Release mode already sets it via -O2/3, Debug mode doesn't.

From 9b7e24c2a5b32e25b773bc8b4ca84dbda995d959 Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard@google.com>
Date: Thu, 18 Jun 2020 23:53:49 -0700
Subject: [PATCH 180/771] [compiler-rt][Android] Stop using detect_target_arch

For Android only, compiler-rt used detect_target_arch to select the
architecture to target. detect_target_arch was added in Sept 2014
(SVN r218605). At that time, compiler-rt selected the default arch
using ${LLVM_NATIVE_ARCH}, which seems to have been the host
architecture and therefore not suitable for cross-compilation.

The compiler-rt build system was refactored in Sept 2015 (SVN r247094
and SVN r247099) to use COMPILER_RT_DEFAULT_TARGET_TRIPLE to control
the target arch rather than LLVM_NATIVE_ARCH. This approach is simpler
and also works for Android cross-compilation, so remove the
detect_target_arch function.

Android targets i686, but compiler-rt seems to identify 32-bit x86 as
"i386". For Android, we were previously calling add_default_target_arch
with i386, and calling add_default_target_arch with i686 does not build
anything. i686 is not listed in builtin-config-ix.cmake,
ALL_BUILTIN_SUPPORTED_ARCH.

Differential Revision: https://reviews.llvm.org/D82148
---
 .../cmake/Modules/CompilerRTUtils.cmake       | 55 -------------------
 compiler-rt/cmake/base-config-ix.cmake        |  7 ++-
 2 files changed, 5 insertions(+), 57 deletions(-)

diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index 99b9f0e4af44d..107a475d6a0eb 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -151,61 +151,6 @@ macro(test_target_arch arch def)
   endif()
 endmacro()
 
-macro(detect_target_arch)
-  check_symbol_exists(__arm__ "" __ARM)
-  check_symbol_exists(__aarch64__ "" __AARCH64)
-  check_symbol_exists(__x86_64__ "" __X86_64)
-  check_symbol_exists(__i386__ "" __I386)
-  check_symbol_exists(__mips__ "" __MIPS)
-  check_symbol_exists(__mips64__ "" __MIPS64)
-  check_symbol_exists(__powerpc64__ "" __PPC64)
-  check_symbol_exists(__powerpc64le__ "" __PPC64LE)
-  check_symbol_exists(__riscv "" __RISCV)
-  check_symbol_exists(__s390x__ "" __S390X)
-  check_symbol_exists(__sparc "" __SPARC)
-  check_symbol_exists(__sparcv9 "" __SPARCV9)
-  check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
-  check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
-  check_symbol_exists(__ve__ "" __VE)
-  if(__ARM)
-    add_default_target_arch(arm)
-  elseif(__AARCH64)
-    add_default_target_arch(aarch64)
-  elseif(__X86_64)
-    add_default_target_arch(x86_64)
-  elseif(__I386)
-    add_default_target_arch(i386)
-  elseif(__MIPS64) # must be checked before __MIPS
-    add_default_target_arch(mips64)
-  elseif(__MIPS)
-    add_default_target_arch(mips)
-  elseif(__PPC64)
-    add_default_target_arch(powerpc64)
-  elseif(__PPC64LE)
-    add_default_target_arch(powerpc64le)
-  elseif(__RISCV)
-    if(CMAKE_SIZEOF_VOID_P EQUAL "4")
-      add_default_target_arch(riscv32)
-    elseif(CMAKE_SIZEOF_VOID_P EQUAL "8")
-      add_default_target_arch(riscv64)
-    else()
-      message(FATAL_ERROR "Unsupport XLEN for RISC-V")
-    endif()
-  elseif(__S390X)
-    add_default_target_arch(s390x)
-  elseif(__SPARCV9)
-    add_default_target_arch(sparcv9)
-  elseif(__SPARC)
-    add_default_target_arch(sparc)
-  elseif(__WEBASSEMBLY32)
-    add_default_target_arch(wasm32)
-  elseif(__WEBASSEMBLY64)
-    add_default_target_arch(wasm64)
-  elseif(__VE)
-    add_default_target_arch(ve)
-  endif()
-endmacro()
-
 macro(load_llvm_config)
   if (NOT LLVM_CONFIG_PATH)
     find_program(LLVM_CONFIG_PATH "llvm-config"
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 964dd598f1022..6b704f7dc9bce 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -163,8 +163,11 @@ macro(test_targets)
 
   # Generate the COMPILER_RT_SUPPORTED_ARCH list.
   if(ANDROID)
-    # Examine compiler output to determine target architecture.
-    detect_target_arch()
+    if(${COMPILER_RT_DEFAULT_TARGET_ARCH} STREQUAL "i686")
+      add_default_target_arch(i386)
+    else()
+      add_default_target_arch(${COMPILER_RT_DEFAULT_TARGET_ARCH})
+    endif()
     set(COMPILER_RT_OS_SUFFIX "-android")
   elseif(NOT APPLE) # Supported archs for Apple platforms are generated later
     if(COMPILER_RT_DEFAULT_TARGET_ONLY)

From 36f9947aac9e09e783a5d29be31ef5b00723d747 Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard@google.com>
Date: Thu, 18 Jun 2020 23:47:18 -0700
Subject: [PATCH 181/771] [builtins][ARM] Replace call_apsr.S with inline asm

The %arm_call_apsr expansion doesn't work when config.clang is a clang
driver defaulting to a non-ARM arch. Rather than fix it, replace
call_apsr.S with inline asm in call_apsr.h, which also resolves the
FIXME added in D31259.

Maybe the `__attribute__((noinline,pcs("aapcs")))` attributes are
unnecessary on the static functions, but I was unsure what liberty the
compiler had to insert instructions that modified the condition codes,
so it seemed helpful.

Differential Revision: https://reviews.llvm.org/D82147
---
 .../builtins/Unit/arm/aeabi_cdcmpeq_test.c    |  3 +-
 .../builtins/Unit/arm/aeabi_cdcmple_test.c    |  3 +-
 .../builtins/Unit/arm/aeabi_cfcmpeq_test.c    |  3 +-
 .../builtins/Unit/arm/aeabi_cfcmple_test.c    |  3 +-
 .../test/builtins/Unit/arm/call_apsr.S        | 29 -------------------
 .../test/builtins/Unit/arm/call_apsr.h        | 20 ++++++++++---
 compiler-rt/test/builtins/Unit/lit.cfg.py     | 12 --------
 7 files changed, 20 insertions(+), 53 deletions(-)
 delete mode 100644 compiler-rt/test/builtins/Unit/arm/call_apsr.S

diff --git a/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c b/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c
index 9778e59184048..fb19e0c601919 100644
--- a/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c
+++ b/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmpeq_test.c
@@ -1,6 +1,5 @@
 // REQUIRES: arm-target-arch || armv6m-target-arch
-// RUN: %arm_call_apsr -o %t.aspr.o
-// RUN: %clang_builtins %s %t.aspr.o %librt -o %t && %run %t
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 
 #include <stdint.h>
 #include <stdio.h>
diff --git a/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmple_test.c b/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmple_test.c
index 1feeac5ffd7e0..7dbf2acb28726 100644
--- a/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmple_test.c
+++ b/compiler-rt/test/builtins/Unit/arm/aeabi_cdcmple_test.c
@@ -1,6 +1,5 @@
 // REQUIRES: arm-target-arch || armv6m-target-arch
-// RUN: %arm_call_apsr -o %t.aspr.o
-// RUN: %clang_builtins %s  %t.aspr.o %librt -o %t && %run %t
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 
 #include <stdint.h>
 #include <stdio.h>
diff --git a/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c b/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c
index 8da56071364b7..bfd67d1eee4d5 100644
--- a/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c
+++ b/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmpeq_test.c
@@ -1,6 +1,5 @@
 // REQUIRES: arm-target-arch || armv6m-target-arch
-// RUN: %arm_call_apsr -o %t.aspr.o
-// RUN: %clang_builtins %s  %t.aspr.o %librt -o %t && %run %t
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 
 #include <stdint.h>
 #include <stdio.h>
diff --git a/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmple_test.c b/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmple_test.c
index a9358c4cf5f77..d80c45d224473 100644
--- a/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmple_test.c
+++ b/compiler-rt/test/builtins/Unit/arm/aeabi_cfcmple_test.c
@@ -1,6 +1,5 @@
 // REQUIRES: arm-target-arch || armv6m-target-arch
-// RUN: %arm_call_apsr -o %t.aspr.o
-// RUN: %clang_builtins %s  %t.aspr.o %librt -o %t && %run %t
+// RUN: %clang_builtins %s %librt -o %t && %run %t
 
 #include <stdint.h>
 #include <stdio.h>
diff --git a/compiler-rt/test/builtins/Unit/arm/call_apsr.S b/compiler-rt/test/builtins/Unit/arm/call_apsr.S
deleted file mode 100644
index 116f930c6e1e7..0000000000000
--- a/compiler-rt/test/builtins/Unit/arm/call_apsr.S
+++ /dev/null
@@ -1,29 +0,0 @@
-#include "../../../../lib/builtins/assembly.h"
-
-.syntax unified
-// __attribute__((pcs("aapcs")))
-// int32_t call_apsr_d(double a, double b, void(*fn)(double, double)) {
-//   fn(a, b);
-//   return apsr;
-// }
-
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(call_apsr_d)
-    push {r7, lr}
-    ldr r7, [sp, #8]
-    blx r7
-    mrs r0, apsr
-    pop {r7, pc}
-END_COMPILERRT_FUNCTION(call_apsr_d)
-
-// __attribute__((pcs("aapcs")))
-// int32_t call_apsr_f(float a, float b, void(*fn)(float, float)) {
-//   fn(a, b);
-//   return apsr;
-// }
-
-DEFINE_COMPILERRT_PRIVATE_FUNCTION(call_apsr_f)
-    push {lr}
-    blx r2
-    mrs r0, apsr
-    pop {pc}
-END_COMPILERRT_FUNCTION(call_apsr_f)
diff --git a/compiler-rt/test/builtins/Unit/arm/call_apsr.h b/compiler-rt/test/builtins/Unit/arm/call_apsr.h
index fa81e892e3f00..87a7a74cb2a5e 100644
--- a/compiler-rt/test/builtins/Unit/arm/call_apsr.h
+++ b/compiler-rt/test/builtins/Unit/arm/call_apsr.h
@@ -16,10 +16,22 @@ union cpsr {
     uint32_t value;
 };
 
-extern __attribute__((pcs("aapcs")))
-uint32_t call_apsr_f(float a, float b, __attribute__((pcs("aapcs"))) void (*fn)(float, float));
+__attribute__((noinline, pcs("aapcs"))) static uint32_t call_apsr_f(float a, float b,
+                                                                    __attribute__((pcs("aapcs"))) void (*fn)(float, float)) {
+  uint32_t result;
+  fn(a, b);
+  asm volatile("mrs %0, apsr"
+               : "=r"(result));
+  return result;
+}
 
-extern __attribute__((pcs("aapcs")))
-uint32_t call_apsr_d(double a, double b, __attribute__((pcs("aapcs"))) void (*fn)(double, double));
+__attribute__((noinline, pcs("aapcs"))) static uint32_t call_apsr_d(double a, double b,
+                                                                    __attribute__((pcs("aapcs"))) void (*fn)(double, double)) {
+  uint32_t result;
+  fn(a, b);
+  asm volatile("mrs %0, apsr"
+               : "=r"(result));
+  return result;
+}
 
 #endif // CALL_APSR_H
diff --git a/compiler-rt/test/builtins/Unit/lit.cfg.py b/compiler-rt/test/builtins/Unit/lit.cfg.py
index c8888078be507..fa6dc86783d3e 100644
--- a/compiler-rt/test/builtins/Unit/lit.cfg.py
+++ b/compiler-rt/test/builtins/Unit/lit.cfg.py
@@ -87,10 +87,6 @@ def build_invocation(compile_flags):
   return " " + " ".join([clang_wrapper, config.clang] + compile_flags) + " "
 
 
-target_arch = config.target_arch
-if (target_arch == "arm"):
-  target_arch = "armv7"
-
 config.substitutions.append( ("%clang ", build_invocation(target_cflags)) )
 config.substitutions.append( ("%clangxx ", build_invocation(target_cxxflags)) )
 config.substitutions.append( ("%clang_builtins ", \
@@ -98,14 +94,6 @@ def build_invocation(compile_flags):
 config.substitutions.append( ("%clangxx_builtins ", \
                               build_invocation(clang_builtins_cxxflags)))
 
-# FIXME: move the call_apsr.s into call_apsr.h as inline-asm.
-# some ARM tests needs call_apsr.s
-call_apsr_source = os.path.join(builtins_lit_source_dir, 'arm', 'call_apsr.S')
-march_flag = '-march=' + target_arch
-call_apsr_flags = ['-c', march_flag, call_apsr_source]
-config.substitutions.append( ("%arm_call_apsr ", \
-                              build_invocation(call_apsr_flags)) )
-
 # Default test suffixes.
 config.suffixes = ['.c', '.cpp']
 

From eafe7c14ea38946e8c1fb64d548effaee7614718 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Mon, 13 Jul 2020 16:57:06 -0700
Subject: [PATCH 182/771] [PowerPC] Fix combineVectorShuffle regression after
 D77448

Commit 1fed131660b2 assumed that NewShuffle (shuffle vector
canonicalization result) will always be ShuffleVectorSDNode, which may
be false (it may be a BITCAST node):

```
...
t12: v4i32 = scalar_to_vector t2
t15: v16i8 = bitcast t12  # LHS
t17: v16i8 = vector_shuffle<u,u,u,u,u,u,u,u,0,1,2,3,u,u,u,u> t15, undef:v16i8  # SVN
```

Reviewed By: #powerpc, nemanjai

Differential Revision: https://reviews.llvm.org/D83617
---
 llvm/lib/Target/PowerPC/PPCISelLowering.cpp   |  2 +
 .../PowerPC/canonical-merge-shuffles.ll       | 51 ++++++++++++++-----
 2 files changed, 41 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 575ad68fecd99..ddfbd04e1ebc5 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -9896,6 +9896,8 @@ SDValue PPCTargetLowering::LowerVECTOR_SHUFFLE(SDValue Op,
   // to vector legalization will not be sent to the target combine. Try to
   // combine it here.
   if (SDValue NewShuffle = combineVectorShuffle(SVOp, DAG)) {
+    if (!isa<ShuffleVectorSDNode>(NewShuffle))
+      return NewShuffle;
     Op = NewShuffle;
     SVOp = cast<ShuffleVectorSDNode>(Op);
     V1 = Op.getOperand(0);
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index ada7c73cd9ed5..cc349ec228f46 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -343,30 +343,57 @@ test_entry:
   unreachable
 }
 
+define dso_local <16 x i8> @no_crash_bitcast(i32 %a) {
+; CHECK-P8-LABEL: no_crash_bitcast:
+; CHECK-P8:       # %bb.0: # %entry
+; CHECK-P8-NEXT:    mtvsrwz v2, r3
+; CHECK-P8-NEXT:    blr
+;
+; CHECK-P9-LABEL: no_crash_bitcast:
+; CHECK-P9:       # %bb.0: # %entry
+; CHECK-P9-NEXT:    mtvsrws v2, r3
+; CHECK-P9-NEXT:    blr
+;
+; CHECK-NOVSX-LABEL: no_crash_bitcast:
+; CHECK-NOVSX:       # %bb.0: # %entry
+; CHECK-NOVSX-NEXT:    addis r4, r2, .LCPI14_0@toc@ha
+; CHECK-NOVSX-NEXT:    stw r3, -16(r1)
+; CHECK-NOVSX-NEXT:    addi r3, r1, -16
+; CHECK-NOVSX-NEXT:    addi r4, r4, .LCPI14_0@toc@l
+; CHECK-NOVSX-NEXT:    lvx v3, 0, r3
+; CHECK-NOVSX-NEXT:    lvx v2, 0, r4
+; CHECK-NOVSX-NEXT:    vperm v2, v3, v3, v2
+; CHECK-NOVSX-NEXT:    blr
+entry:
+  %cast = bitcast i32 %a to <4 x i8>
+  %ret = shufflevector <4 x i8> %cast, <4 x i8> undef, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
+  ret <16 x i8> %ret
+}
+
 define dso_local <4 x i32> @replace_undefs_in_splat(<4 x i32> %a) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: replace_undefs_in_splat:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r3, r2, .LCPI14_0@toc@ha
-; CHECK-P8-NEXT:    addi r3, r3, .LCPI14_0@toc@l
+; CHECK-P8-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
+; CHECK-P8-NEXT:    addi r3, r3, .LCPI15_0@toc@l
 ; CHECK-P8-NEXT:    lvx v3, 0, r3
 ; CHECK-P8-NEXT:    vmrgow v2, v3, v2
 ; CHECK-P8-NEXT:    blr
 ;
 ; CHECK-P9-LABEL: replace_undefs_in_splat:
 ; CHECK-P9:       # %bb.0: # %entry
-; CHECK-P9-NEXT:    addis r3, r2, .LCPI14_0@toc@ha
-; CHECK-P9-NEXT:    addi r3, r3, .LCPI14_0@toc@l
+; CHECK-P9-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, .LCPI15_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r3
 ; CHECK-P9-NEXT:    vmrgow v2, v3, v2
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-NOVSX-LABEL: replace_undefs_in_splat:
 ; CHECK-NOVSX:       # %bb.0: # %entry
-; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI14_0@toc@ha
-; CHECK-NOVSX-NEXT:    addis r4, r2, .LCPI14_1@toc@ha
-; CHECK-NOVSX-NEXT:    addi r3, r3, .LCPI14_0@toc@l
+; CHECK-NOVSX-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
+; CHECK-NOVSX-NEXT:    addis r4, r2, .LCPI15_1@toc@ha
+; CHECK-NOVSX-NEXT:    addi r3, r3, .LCPI15_0@toc@l
 ; CHECK-NOVSX-NEXT:    lvx v3, 0, r3
-; CHECK-NOVSX-NEXT:    addi r3, r4, .LCPI14_1@toc@l
+; CHECK-NOVSX-NEXT:    addi r3, r4, .LCPI15_1@toc@l
 ; CHECK-NOVSX-NEXT:    lvx v4, 0, r3
 ; CHECK-NOVSX-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-NOVSX-NEXT:    blr
@@ -378,10 +405,10 @@ entry:
 define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture readonly %ptr, i32 signext %offset) local_unnamed_addr #0 {
 ; CHECK-P8-LABEL: no_RAUW_in_combine_during_legalize:
 ; CHECK-P8:       # %bb.0: # %entry
-; CHECK-P8-NEXT:    addis r5, r2, .LCPI15_0@toc@ha
+; CHECK-P8-NEXT:    addis r5, r2, .LCPI16_0@toc@ha
 ; CHECK-P8-NEXT:    sldi r4, r4, 2
 ; CHECK-P8-NEXT:    xxlxor v4, v4, v4
-; CHECK-P8-NEXT:    addi r5, r5, .LCPI15_0@toc@l
+; CHECK-P8-NEXT:    addi r5, r5, .LCPI16_0@toc@l
 ; CHECK-P8-NEXT:    lxsiwzx v2, r3, r4
 ; CHECK-P8-NEXT:    lvx v3, 0, r5
 ; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
@@ -391,8 +418,8 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture re
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    sldi r4, r4, 2
 ; CHECK-P9-NEXT:    lxsiwzx v2, r3, r4
-; CHECK-P9-NEXT:    addis r3, r2, .LCPI15_0@toc@ha
-; CHECK-P9-NEXT:    addi r3, r3, .LCPI15_0@toc@l
+; CHECK-P9-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
+; CHECK-P9-NEXT:    addi r3, r3, .LCPI16_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r3
 ; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    vperm v2, v4, v2, v3

From 746b8c400bd3f975b49f4092aa6ecd30ade7cfa5 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Mon, 13 Jul 2020 16:54:39 -0700
Subject: [PATCH 183/771] Basic support for flexible array members in constant
 evaluation.

We don't allow runtime-sized flexible array members, nor initialization
of flexible array members, but it seems reasonable to support the most
basic case where the flexible array member is empty.
---
 clang/lib/AST/ExprConstant.cpp                | 12 +++++++++++-
 .../SemaCXX/constant-expression-cxx11.cpp     | 19 +++++++++++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index a4dc0ccad1e0f..d20c2382b6ac1 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -9929,8 +9929,18 @@ namespace {
     bool ZeroInitialization(const Expr *E) {
       const ConstantArrayType *CAT =
           Info.Ctx.getAsConstantArrayType(E->getType());
-      if (!CAT)
+      if (!CAT) {
+        if (const IncompleteArrayType *IAT =
+                Info.Ctx.getAsIncompleteArrayType(E->getType())) {
+          // We can be asked to zero-initialize a flexible array member; this
+          // is represented as an ImplicitValueInitExpr of incomplete array
+          // type. In this case, the array has zero elements.
+          Result = APValue(APValue::UninitArray(), 0, 0);
+          return true;
+        }
+        // FIXME: We could handle VLAs here.
         return Error(E);
+      }
 
       Result = APValue(APValue::UninitArray(), 0,
                        CAT->getSize().getZExtValue());
diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp
index 78e9fef96c8da..b69bcb2fef9d0 100644
--- a/clang/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp
@@ -2322,3 +2322,22 @@ namespace array_size {
     f3(a);
   }
 }
+
+namespace flexible_array {
+  struct A { int x; char arr[]; }; // expected-warning {{C99}} expected-note {{here}}
+  constexpr A a = {1};
+  static_assert(a.x == 1, "");
+  static_assert(&a.arr != nullptr, "");
+  static_assert(a.arr[0], ""); // expected-error {{constant expression}} expected-note {{array member without known bound}}
+  static_assert(a.arr[1], ""); // expected-error {{constant expression}} expected-note {{array member without known bound}}
+
+  constexpr A b[] = {{1}, {2}, {3}}; // expected-warning {{flexible array member}}
+  static_assert(b[0].x == 1, "");
+  static_assert(b[1].x == 2, "");
+  static_assert(b[2].x == 3, "");
+  static_assert(b[2].arr[0], ""); // expected-error {{constant expression}} expected-note {{array member without known bound}}
+
+  // If we ever start to accept this, we'll need to ensure we can
+  // constant-evaluate it properly.
+  constexpr A c = {1, 2, 3}; // expected-error {{initialization of flexible array member}}
+}

From 6b109f2f054f6c03156d1e606ea21488b83de6a6 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Mon, 13 Jul 2020 16:58:16 -0700
Subject: [PATCH 184/771] [llvm][NFC] Removed unused CHECKs in a ml test

The CHECKs are now in Inputs/test-module.ll
---
 llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll b/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll
index 01acb43d01969..8e19659bf4eb2 100644
--- a/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll
+++ b/llvm/test/Transforms/Inline/ML/ml-test-release-mode.ll
@@ -8,7 +8,3 @@
 ; REQUIRES: have_tf_aot
 ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=release -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=CHECK
 ; RUN: opt -passes=scc-oz-module-inliner -enable-ml-inliner=default -S < %S/Inputs/test-module.ll 2>&1 | FileCheck %S/Inputs/test-module.ll --check-prefix=DEFAULT
-
-; CHECK-NOT: @adder
-; DEFAULT-LABEL:        @adder
-; DEFAULT-NEXT:         %2 = mul
\ No newline at end of file

From 871251b2b60c8d7b095fe732fd95c65c989337ff Mon Sep 17 00:00:00 2001
From: Gui Andrade <guiand@google.com>
Date: Tue, 30 Jun 2020 22:34:43 +0000
Subject: [PATCH 185/771] [MSAN] Implement experimental vector reduction
 intrinsics

Implement llvm.experimental.vector.{add,mul,or,and,...}.
An IR test is included but no C test for lack of good way to
get the compiler to emit these.

Differential Revision: https://reviews.llvm.org/D82920
---
 .../Instrumentation/MemorySanitizer.cpp       | 55 +++++++++++++++
 .../MemorySanitizer/experimental-reduce.ll    | 68 +++++++++++++++++++
 2 files changed, 123 insertions(+)
 create mode 100644 llvm/test/Instrumentation/MemorySanitizer/experimental-reduce.ll

diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 07caac4bc8747..fcf7f470b3e10 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -2889,6 +2889,50 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     setOriginForNaryOp(I);
   }
 
+  // Instrument generic vector reduction intrinsics
+  // by ORing together all their fields.
+  void handleVectorReduceIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
+    setShadow(&I, S);
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  // Instrument experimental.vector.reduce.or intrinsic.
+  // Valid (non-poisoned) set bits in the operand pull low the
+  // corresponding shadow bits.
+  void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *OperandShadow = getShadow(&I, 0);
+    Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
+    Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
+    // Bit N is clean if any field's bit N is 1 and unpoison
+    Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
+    // Otherwise, it is clean if every field's bit N is unpoison
+    Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
+    Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
+
+    setShadow(&I, S);
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
+  // Instrument experimental.vector.reduce.or intrinsic.
+  // Valid (non-poisoned) unset bits in the operand pull down the
+  // corresponding shadow bits.
+  void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
+    IRBuilder<> IRB(&I);
+    Value *OperandShadow = getShadow(&I, 0);
+    Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
+    // Bit N is clean if any field's bit N is 0 and unpoison
+    Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
+    // Otherwise, it is clean if every field's bit N is unpoison
+    Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
+    Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
+
+    setShadow(&I, S);
+    setOrigin(&I, getOrigin(&I, 0));
+  }
+
   void handleStmxcsr(IntrinsicInst &I) {
     IRBuilder<> IRB(&I);
     Value* Addr = I.getArgOperand(0);
@@ -3107,6 +3151,17 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     case Intrinsic::masked_load:
       handleMaskedLoad(I);
       break;
+    case Intrinsic::experimental_vector_reduce_and:
+      handleVectorReduceAndIntrinsic(I);
+      break;
+    case Intrinsic::experimental_vector_reduce_or:
+      handleVectorReduceOrIntrinsic(I);
+      break;
+    case Intrinsic::experimental_vector_reduce_add:
+    case Intrinsic::experimental_vector_reduce_xor:
+    case Intrinsic::experimental_vector_reduce_mul:
+      handleVectorReduceIntrinsic(I);
+      break;
     case Intrinsic::x86_sse_stmxcsr:
       handleStmxcsr(I);
       break;
diff --git a/llvm/test/Instrumentation/MemorySanitizer/experimental-reduce.ll b/llvm/test/Instrumentation/MemorySanitizer/experimental-reduce.ll
new file mode 100644
index 0000000000000..e81333b515e25
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/experimental-reduce.ll
@@ -0,0 +1,68 @@
+
+; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=1 -S -passes='module(msan-module),function(msan)' 2>&1 | \
+; RUN:   FileCheck -allow-deprecated-dag-overlap -check-prefixes=CHECK,CHECK-ORIGINS %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @llvm.experimental.vector.reduce.add(<3 x i32>)
+declare i32 @llvm.experimental.vector.reduce.and(<3 x i32>)
+declare i32 @llvm.experimental.vector.reduce.or(<3 x i32>)
+
+; CHECK-LABEL: @reduce_add
+define i32 @reduce_add() sanitize_memory {
+; CHECK: [[P:%.*]] = inttoptr i64 0 to <3 x i32>*
+  %p = inttoptr i64 0 to <3 x i32> *
+; CHECK: [[O:%.*]] = load <3 x i32>, <3 x i32>* [[P]]
+  %o = load <3 x i32>, <3 x i32> *%p
+; CHECK: [[O_SHADOW:%.*]] = load <3 x i32>, <3 x i32>*
+; CHECK: [[O_ORIGIN:%.*]] = load i32, i32*
+; CHECK: [[R_SHADOW:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]])
+; CHECK: [[R:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v3i32(<3 x i32> [[O]])
+  %r = call i32 @llvm.experimental.vector.reduce.add(<3 x i32> %o)
+; CHECK: store i32 [[R_SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: store i32 [[O_ORIGIN]], {{.*}} @__msan_retval_origin_tls
+; CHECK: ret i32 [[R]]
+  ret i32 %r
+}
+
+; CHECK-LABEL: @reduce_and
+define i32 @reduce_and() sanitize_memory {
+; CHECK: [[P:%.*]] = inttoptr i64 0 to <3 x i32>*
+  %p = inttoptr i64 0 to <3 x i32> *
+; CHECK: [[O:%.*]] = load <3 x i32>, <3 x i32>* [[P]]
+  %o = load <3 x i32>, <3 x i32> *%p
+; CHECK: [[O_SHADOW:%.*]] = load <3 x i32>, <3 x i32>*
+; CHECK: [[O_ORIGIN:%.*]] = load i32, i32*
+; CHECK: [[O_SHADOW_1:%.*]] = or <3 x i32> [[O]], [[O_SHADOW]]
+; CHECK: [[O_SHADOW_2:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> [[O_SHADOW_1]]
+; CHECK: [[O_SHADOW_3:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]])
+; CHECK: [[R_SHADOW:%.*]] = and i32 [[O_SHADOW_2]], [[O_SHADOW_3]]
+; CHECK: [[R:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> [[O]])
+  %r = call i32 @llvm.experimental.vector.reduce.and(<3 x i32> %o)
+; CHECK: store i32 [[R_SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: store i32 [[O_ORIGIN]], {{.*}} @__msan_retval_origin_tls
+; CHECK: ret i32 [[R]]
+  ret i32 %r
+}
+
+; CHECK-LABEL: @reduce_or
+define i32 @reduce_or() sanitize_memory {
+; CHECK: [[P:%.*]] = inttoptr i64 0 to <3 x i32>*
+  %p = inttoptr i64 0 to <3 x i32> *
+; CHECK: [[O:%.*]] = load <3 x i32>, <3 x i32>* [[P]]
+  %o = load <3 x i32>, <3 x i32> *%p
+; CHECK: [[O_SHADOW:%.*]] = load <3 x i32>, <3 x i32>*
+; CHECK: [[O_ORIGIN:%.*]] = load i32, i32*
+; CHECK: [[NOT_O:%.*]] = xor <3 x i32> [[O]], <i32 -1, i32 -1, i32 -1>
+; CHECK: [[O_SHADOW_1:%.*]] = or <3 x i32> [[NOT_O]], [[O_SHADOW]]
+; CHECK: [[O_SHADOW_2:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v3i32(<3 x i32> [[O_SHADOW_1]]
+; CHECK: [[O_SHADOW_3:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O_SHADOW]])
+; CHECK: [[R_SHADOW:%.*]] = and i32 [[O_SHADOW_2]], [[O_SHADOW_3]]
+; CHECK: [[R:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v3i32(<3 x i32> [[O]])
+  %r = call i32 @llvm.experimental.vector.reduce.or(<3 x i32> %o)
+; CHECK: store i32 [[R_SHADOW]], {{.*}} @__msan_retval_tls
+; CHECK: store i32 [[O_ORIGIN]], {{.*}} @__msan_retval_origin_tls
+; CHECK: ret i32 [[R]]
+  ret i32 %r
+}

From 65049d16100af360674659fb56e8f9bec96a0836 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Mon, 13 Jul 2020 20:11:52 -0400
Subject: [PATCH 186/771] [flang][openacc] OpenACC 3.0 parser

Summary:
This patch introduce the parser for OpenACC 3.0 in Flang. It uses the same TableGen mechanism
than OpenMP.

Reviewers: nvdatian, sscalpone, tskeith, klausler, ichoyjx, jdoerfert, DavidTruby

Reviewed By: klausler

Subscribers: SouraVX, mgorny, hiraditya, jfb, sstefan1, llvm-commits

Tags: #llvm, #flang

Differential Revision: https://reviews.llvm.org/D83649
---
 flang/include/flang/Common/Fortran-features.h |   7 +-
 flang/include/flang/Parser/dump-parse-tree.h  |  93 +++
 flang/include/flang/Parser/parse-tree.h       | 290 ++++++++-
 flang/lib/Parser/CMakeLists.txt               |   2 +
 flang/lib/Parser/executable-parsers.cpp       |   1 +
 flang/lib/Parser/openacc-parsers.cpp          | 282 ++++++++
 flang/lib/Parser/openmp-parsers.cpp           |   4 -
 flang/lib/Parser/parsing.cpp                  |   3 +
 flang/lib/Parser/program-parsers.cpp          |  16 +-
 flang/lib/Parser/stmt-parser.h                |   1 +
 flang/lib/Parser/token-parsers.h              |   5 +
 flang/lib/Parser/type-parsers.h               |   2 +
 flang/lib/Parser/unparse.cpp                  | 379 ++++++++++-
 flang/lib/Semantics/resolve-names.cpp         |   3 +-
 flang/test/Semantics/acc-validity.f90         | 169 +++++
 flang/tools/f18-parse-demo/CMakeLists.txt     |   1 +
 flang/tools/f18/CMakeLists.txt                |   3 +-
 flang/tools/f18/f18.cpp                       |   3 +
 llvm/include/llvm/CMakeLists.txt              |   2 +-
 llvm/include/llvm/Frontend/CMakeLists.txt     |   2 +
 .../llvm/Frontend/Directive/DirectiveBase.td  |   3 +
 llvm/include/llvm/Frontend/OpenACC/ACC.td     | 604 ++++++++++++++++++
 .../llvm/Frontend/OpenACC/CMakeLists.txt      |   4 +
 llvm/lib/Frontend/CMakeLists.txt              |   1 +
 llvm/lib/Frontend/OpenACC/CMakeLists.txt      |  15 +
 25 files changed, 1877 insertions(+), 18 deletions(-)
 create mode 100644 flang/lib/Parser/openacc-parsers.cpp
 create mode 100644 flang/test/Semantics/acc-validity.f90
 create mode 100644 llvm/include/llvm/Frontend/CMakeLists.txt
 create mode 100644 llvm/include/llvm/Frontend/OpenACC/ACC.td
 create mode 100644 llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
 create mode 100644 llvm/lib/Frontend/OpenACC/CMakeLists.txt

diff --git a/flang/include/flang/Common/Fortran-features.h b/flang/include/flang/Common/Fortran-features.h
index 823fa85ad12e2..613aa69cc5d61 100644
--- a/flang/include/flang/Common/Fortran-features.h
+++ b/flang/include/flang/Common/Fortran-features.h
@@ -24,7 +24,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
     OldStyleParameter, ComplexConstructor, PercentLOC, SignedPrimary, FileName,
     Convert, Dispose, IOListLeadingComma, AbbreviatedEditDescriptor,
     ProgramParentheses, PercentRefAndVal, OmitFunctionDummies, CrayPointer,
-    Hollerith, ArithmeticIF, Assign, AssignedGOTO, Pause, OpenMP,
+    Hollerith, ArithmeticIF, Assign, AssignedGOTO, Pause, OpenACC, OpenMP,
     CruftAfterAmpersand, ClassicCComments, AdditionalFormats, BigIntLiterals,
     RealDoControls, EquivalenceNumericWithCharacter, AdditionalIntrinsics,
     AnonymousParents, OldLabelDoEndStatements, LogicalIntegerAssignment,
@@ -37,6 +37,7 @@ class LanguageFeatureControl {
   LanguageFeatureControl() {
     // These features must be explicitly enabled by command line options.
     disable_.set(LanguageFeature::OldDebugLines);
+    disable_.set(LanguageFeature::OpenACC);
     disable_.set(LanguageFeature::OpenMP);
     // These features, if enabled, conflict with valid standard usage,
     // so there are disabled here by default.
@@ -50,7 +51,9 @@ class LanguageFeatureControl {
   void WarnOnAllNonstandard(bool yes = true) { warnAll_ = yes; }
   bool IsEnabled(LanguageFeature f) const { return !disable_.test(f); }
   bool ShouldWarn(LanguageFeature f) const {
-    return (warnAll_ && f != LanguageFeature::OpenMP) || warn_.test(f);
+    return (warnAll_ && f != LanguageFeature::OpenMP &&
+               f != LanguageFeature::OpenACC) ||
+        warn_.test(f);
   }
   // Return all spellings of operators names, depending on features enabled
   std::vector<const char *> GetNames(LogicalOperator) const;
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 59333c7405ffa..36e593eb3b781 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -53,6 +53,88 @@ class ParseTreeDumper {
   NODE(format, IntrinsicTypeDataEditDesc)
   NODE(format::IntrinsicTypeDataEditDesc, Kind)
   NODE(parser, Abstract)
+  NODE(parser, AccAtomicCapture)
+  NODE(AccAtomicCapture, Stmt1)
+  NODE(AccAtomicCapture, Stmt2)
+  NODE(parser, AccAtomicRead)
+  NODE(parser, AccAtomicUpdate)
+  NODE(parser, AccAtomicWrite)
+  NODE(parser, AccBeginBlockDirective)
+  NODE(parser, AccBeginCombinedDirective)
+  NODE(parser, AccBeginLoopDirective)
+  NODE(parser, AccBlockDirective)
+  NODE(parser, AccClause)
+  NODE(AccClause, Auto)
+  NODE(AccClause, Async)
+  NODE(AccClause, Attach)
+  NODE(AccClause, Bind)
+  NODE(AccClause, Capture)
+  NODE(AccClause, Collapse)
+  NODE(AccClause, Copy)
+  NODE(AccClause, Copyin)
+  NODE(AccClause, Copyout)
+  NODE(AccClause, Create)
+  NODE(AccClause, Default)
+  NODE(AccClause, DefaultAsync)
+  NODE(AccClause, Delete)
+  NODE(AccClause, Detach)
+  NODE(AccClause, Device)
+  NODE(AccClause, DeviceNum)
+  NODE(AccClause, DevicePtr)
+  NODE(AccClause, DeviceResident)
+  NODE(AccClause, DeviceType)
+  NODE(AccClause, Finalize)
+  NODE(AccClause, FirstPrivate)
+  NODE(AccClause, Gang)
+  NODE(AccClause, Host)
+  NODE(AccClause, If)
+  NODE(AccClause, IfPresent)
+  NODE(AccClause, Independent)
+  NODE(AccClause, Link)
+  NODE(AccClause, NoCreate)
+  NODE(AccClause, NoHost)
+  NODE(AccClause, NumGangs)
+  NODE(AccClause, NumWorkers)
+  NODE(AccClause, Present)
+  NODE(AccClause, Private)
+  NODE(AccClause, Tile)
+  NODE(AccClause, UseDevice)
+  NODE(AccClause, Read)
+  NODE(AccClause, Reduction)
+  NODE(AccClause, Self)
+  NODE(AccClause, Seq)
+  NODE(AccClause, Vector)
+  NODE(AccClause, VectorLength)
+  NODE(AccClause, Wait)
+  NODE(AccClause, Worker)
+  NODE(AccClause, Write)
+  NODE(AccClause, Unknown)
+  NODE(parser, AccDefaultClause)
+  NODE_ENUM(parser::AccDefaultClause, Arg)
+  NODE(parser, AccClauseList)
+  NODE(parser, AccCombinedDirective)
+  NODE(parser, AccDataModifier)
+  NODE_ENUM(parser::AccDataModifier, Modifier)
+  NODE(parser, AccDeclarativeDirective)
+  NODE(parser, AccEndAtomic)
+  NODE(parser, AccEndBlockDirective)
+  NODE(parser, AccEndCombinedDirective)
+  NODE(parser, AccGangArgument)
+  NODE(parser, AccObject)
+  NODE(parser, AccObjectList)
+  NODE(parser, AccObjectListWithModifier)
+  NODE(parser, AccObjectListWithReduction)
+  NODE(parser, AccReductionOperator)
+  NODE(parser, AccSizeExpr)
+  NODE(parser, AccSizeExprList)
+  NODE(parser, AccStandaloneDirective)
+  NODE(parser, AccLoopDirective)
+  NODE(parser, AccWaitArgument)
+  static std::string GetNodeName(const llvm::acc::Directive &x) {
+    return llvm::Twine(
+        "llvm::acc::Directive = ", llvm::acc::getOpenACCDirectiveName(x))
+        .str();
+  }
   NODE(parser, AcImpliedDo)
   NODE(parser, AcImpliedDoControl)
   NODE(parser, AcValue)
@@ -510,6 +592,17 @@ class ParseTreeDumper {
   NODE(parser, OmpSectionsDirective)
   NODE(parser, OmpSimpleStandaloneDirective)
   NODE(parser, Only)
+  NODE(parser, OpenACCAtomicConstruct)
+  NODE(parser, OpenACCBlockConstruct)
+  NODE(parser, OpenACCCacheConstruct)
+  NODE(parser, OpenACCCombinedConstruct)
+  NODE(parser, OpenACCConstruct)
+  NODE(parser, OpenACCDeclarativeConstruct)
+  NODE(parser, OpenACCLoopConstruct)
+  NODE(parser, OpenACCRoutineConstruct)
+  NODE(parser, OpenACCStandaloneDeclarativeConstruct)
+  NODE(parser, OpenACCStandaloneConstruct)
+  NODE(parser, OpenACCWaitConstruct)
   NODE(parser, OpenMPAtomicConstruct)
   NODE(parser, OpenMPBlockConstruct)
   NODE(parser, OpenMPCancelConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 67fd5741b0975..d9ecebfc3fdda 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -25,6 +25,7 @@
 #include "flang/Common/Fortran.h"
 #include "flang/Common/idioms.h"
 #include "flang/Common/indirection.h"
+#include "llvm/Frontend/OpenACC/ACC.h.inc"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include <cinttypes>
 #include <list>
@@ -256,6 +257,8 @@ struct ArithmeticIfStmt;
 struct AssignStmt;
 struct AssignedGotoStmt;
 struct PauseStmt;
+struct OpenACCConstruct;
+struct OpenACCDeclarativeConstruct;
 struct OpenMPConstruct;
 struct OpenMPDeclarativeConstruct;
 struct OmpEndLoopDirective;
@@ -386,6 +389,7 @@ struct SpecificationConstruct {
       Statement<OtherSpecificationStmt>,
       Statement<common::Indirection<TypeDeclarationStmt>>,
       common::Indirection<StructureDef>,
+      common::Indirection<OpenACCDeclarativeConstruct>,
       common::Indirection<OpenMPDeclarativeConstruct>,
       common::Indirection<CompilerDirective>>
       u;
@@ -424,7 +428,8 @@ struct DeclarationConstruct {
 // from the implicit part to the declaration constructs
 struct SpecificationPart {
   TUPLE_CLASS_BOILERPLATE(SpecificationPart);
-  std::tuple<std::list<OpenMPDeclarativeConstruct>,
+  std::tuple<std::list<OpenACCDeclarativeConstruct>,
+      std::list<OpenMPDeclarativeConstruct>,
       std::list<Statement<common::Indirection<UseStmt>>>,
       std::list<Statement<common::Indirection<ImportStmt>>>, ImplicitPart,
       std::list<DeclarationConstruct>>
@@ -509,6 +514,7 @@ struct ExecutableConstruct {
       common::Indirection<SelectTypeConstruct>,
       common::Indirection<WhereConstruct>, common::Indirection<ForallConstruct>,
       common::Indirection<CompilerDirective>,
+      common::Indirection<OpenACCConstruct>,
       common::Indirection<OpenMPConstruct>,
       common::Indirection<OmpEndLoopDirective>>
       u;
@@ -3789,5 +3795,287 @@ struct OpenMPConstruct {
       OpenMPCriticalConstruct>
       u;
 };
+
+// Parse tree nodes for OpenACC 3.0 directives and clauses
+
+struct AccObject {
+  UNION_CLASS_BOILERPLATE(AccObject);
+  std::variant<Designator, /*common block*/ Name> u;
+};
+
+WRAPPER_CLASS(AccObjectList, std::list<AccObject>);
+
+// OpenACC directive beginning or ending a block
+struct AccBlockDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccBlockDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+struct AccLoopDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccLoopDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+struct AccStandaloneDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccStandaloneDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+// 2.11 Combined constructs
+struct AccCombinedDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccCombinedDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+struct AccDeclarativeDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccDeclarativeDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+// OpenACC Clauses
+struct AccDefaultClause {
+  ENUM_CLASS(Arg, None, Present)
+  WRAPPER_CLASS_BOILERPLATE(AccDefaultClause, Arg);
+  CharBlock source;
+};
+
+struct AccDataModifier {
+  ENUM_CLASS(Modifier, ReadOnly, Zero)
+  WRAPPER_CLASS_BOILERPLATE(AccDataModifier, Modifier);
+  CharBlock source;
+};
+
+struct AccObjectListWithModifier {
+  TUPLE_CLASS_BOILERPLATE(AccObjectListWithModifier);
+  std::tuple<std::optional<AccDataModifier>, AccObjectList> t;
+};
+
+// 2.5.13: + | * | max | min | iand | ior | ieor | .and. | .or. | .eqv. | .neqv.
+struct AccReductionOperator {
+  UNION_CLASS_BOILERPLATE(AccReductionOperator);
+  std::variant<DefinedOperator, ProcedureDesignator> u;
+};
+
+struct AccObjectListWithReduction {
+  TUPLE_CLASS_BOILERPLATE(AccObjectListWithReduction);
+  std::tuple<AccReductionOperator, AccObjectList> t;
+};
+
+struct AccWaitArgument {
+  TUPLE_CLASS_BOILERPLATE(AccWaitArgument);
+  std::tuple<std::optional<ScalarIntExpr>, std::list<ScalarIntExpr>> t;
+};
+
+struct AccSizeExpr {
+  TUPLE_CLASS_BOILERPLATE(AccSizeExpr);
+  CharBlock source;
+  std::tuple<std::optional<ScalarIntExpr>> t; // if null then *
+};
+
+struct AccSizeExprList {
+  WRAPPER_CLASS_BOILERPLATE(AccSizeExprList, std::list<AccSizeExpr>);
+};
+
+struct AccGangArgument {
+  TUPLE_CLASS_BOILERPLATE(AccGangArgument);
+  std::tuple<std::optional<ScalarIntExpr>, std::optional<AccSizeExpr>> t;
+};
+
+struct AccClause {
+  UNION_CLASS_BOILERPLATE(AccClause);
+
+  EMPTY_CLASS(Auto);
+  WRAPPER_CLASS(Async, std::optional<ScalarIntExpr>);
+  WRAPPER_CLASS(Attach, AccObjectList);
+  WRAPPER_CLASS(Bind, Name);
+  EMPTY_CLASS(Capture);
+  WRAPPER_CLASS(Collapse, ScalarIntConstantExpr);
+  WRAPPER_CLASS(Copy, AccObjectList);
+  WRAPPER_CLASS(Copyin, AccObjectListWithModifier);
+  WRAPPER_CLASS(Copyout, AccObjectListWithModifier);
+  WRAPPER_CLASS(Create, AccObjectListWithModifier);
+  WRAPPER_CLASS(Default, AccDefaultClause);
+  WRAPPER_CLASS(DefaultAsync, ScalarIntExpr);
+  WRAPPER_CLASS(Delete, AccObjectList);
+  WRAPPER_CLASS(Detach, AccObjectList);
+  WRAPPER_CLASS(Device, AccObjectList);
+  WRAPPER_CLASS(DeviceNum, ScalarIntConstantExpr);
+  WRAPPER_CLASS(DevicePtr, AccObjectList);
+  WRAPPER_CLASS(DeviceResident, AccObjectList);
+  WRAPPER_CLASS(DeviceType, std::optional<std::list<Name>>);
+  EMPTY_CLASS(Finalize);
+  WRAPPER_CLASS(FirstPrivate, AccObjectList);
+  WRAPPER_CLASS(Gang, std::optional<AccGangArgument>);
+  WRAPPER_CLASS(Host, AccObjectList);
+  WRAPPER_CLASS(If, ScalarLogicalExpr);
+  EMPTY_CLASS(IfPresent);
+  EMPTY_CLASS(Independent);
+  WRAPPER_CLASS(Link, AccObjectList);
+  WRAPPER_CLASS(NoCreate, AccObjectList);
+  EMPTY_CLASS(NoHost);
+  WRAPPER_CLASS(NumGangs, ScalarIntExpr);
+  WRAPPER_CLASS(NumWorkers, ScalarIntExpr);
+  WRAPPER_CLASS(Present, AccObjectList);
+  WRAPPER_CLASS(Private, AccObjectList);
+  WRAPPER_CLASS(Tile, AccSizeExprList);
+  WRAPPER_CLASS(UseDevice, AccObjectList);
+  EMPTY_CLASS(Read);
+  WRAPPER_CLASS(Reduction, AccObjectListWithReduction);
+  WRAPPER_CLASS(Self, std::optional<ScalarLogicalExpr>);
+  EMPTY_CLASS(Seq);
+  WRAPPER_CLASS(Vector, std::optional<ScalarIntExpr>);
+  WRAPPER_CLASS(VectorLength, ScalarIntExpr);
+  WRAPPER_CLASS(Wait, std::optional<AccWaitArgument>);
+  WRAPPER_CLASS(Worker, std::optional<ScalarIntExpr>);
+  EMPTY_CLASS(Write);
+  EMPTY_CLASS(Unknown);
+
+  CharBlock source;
+
+  std::variant<Auto, Async, Attach, Bind, Capture, Collapse, Copy, Copyin,
+      Copyout, Create, Default, DefaultAsync, Delete, Detach, Device, DeviceNum,
+      DevicePtr, DeviceResident, DeviceType, Finalize, FirstPrivate, Gang, Host,
+      If, IfPresent, Independent, Link, NoCreate, NoHost, NumGangs, NumWorkers,
+      Present, Private, Tile, UseDevice, Read, Reduction, Self, Seq, Vector,
+      VectorLength, Wait, Worker, Write, Unknown>
+      u;
+};
+
+struct AccClauseList {
+  WRAPPER_CLASS_BOILERPLATE(AccClauseList, std::list<AccClause>);
+  CharBlock source;
+};
+
+struct OpenACCRoutineConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCRoutineConstruct);
+  CharBlock source;
+  std::tuple<Verbatim, std::optional<Name>, AccClauseList> t;
+};
+
+struct OpenACCCacheConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCCacheConstruct);
+  CharBlock source;
+  std::tuple<Verbatim, AccObjectListWithModifier> t;
+};
+
+struct OpenACCWaitConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCWaitConstruct);
+  CharBlock source;
+  std::tuple<Verbatim, std::optional<AccWaitArgument>, AccClauseList> t;
+};
+
+struct AccBeginLoopDirective {
+  TUPLE_CLASS_BOILERPLATE(AccBeginLoopDirective);
+  std::tuple<AccLoopDirective, AccClauseList> t;
+  CharBlock source;
+};
+
+struct AccBeginBlockDirective {
+  TUPLE_CLASS_BOILERPLATE(AccBeginBlockDirective);
+  CharBlock source;
+  std::tuple<AccBlockDirective, AccClauseList> t;
+};
+
+struct AccEndBlockDirective {
+  CharBlock source;
+  WRAPPER_CLASS_BOILERPLATE(AccEndBlockDirective, AccBlockDirective);
+};
+
+// ACC END ATOMIC
+EMPTY_CLASS(AccEndAtomic);
+
+// ACC ATOMIC READ
+struct AccAtomicRead {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicRead);
+  std::tuple<Verbatim, Statement<AssignmentStmt>, std::optional<AccEndAtomic>>
+      t;
+};
+
+// ACC ATOMIC WRITE
+struct AccAtomicWrite {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicWrite);
+  std::tuple<Verbatim, Statement<AssignmentStmt>, std::optional<AccEndAtomic>>
+      t;
+};
+
+// ACC ATOMIC UPDATE
+struct AccAtomicUpdate {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicUpdate);
+  std::tuple<std::optional<Verbatim>, Statement<AssignmentStmt>,
+      std::optional<AccEndAtomic>>
+      t;
+};
+
+// ACC ATOMIC CAPTURE
+struct AccAtomicCapture {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicCapture);
+  WRAPPER_CLASS(Stmt1, Statement<AssignmentStmt>);
+  WRAPPER_CLASS(Stmt2, Statement<AssignmentStmt>);
+  std::tuple<Verbatim, Stmt1, Stmt2, AccEndAtomic> t;
+};
+
+struct OpenACCAtomicConstruct {
+  UNION_CLASS_BOILERPLATE(OpenACCAtomicConstruct);
+  std::variant<AccAtomicRead, AccAtomicWrite, AccAtomicCapture, AccAtomicUpdate>
+      u;
+};
+
+struct OpenACCBlockConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCBlockConstruct);
+  std::tuple<AccBeginBlockDirective, Block, AccEndBlockDirective> t;
+};
+
+struct OpenACCStandaloneDeclarativeConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCStandaloneDeclarativeConstruct);
+  CharBlock source;
+  std::tuple<AccDeclarativeDirective, AccClauseList> t;
+};
+
+struct AccBeginCombinedDirective {
+  TUPLE_CLASS_BOILERPLATE(AccBeginCombinedDirective);
+  std::tuple<AccCombinedDirective, AccClauseList> t;
+};
+
+struct AccEndCombinedDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccEndCombinedDirective, AccCombinedDirective);
+  CharBlock source;
+};
+
+struct OpenACCCombinedConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCCombinedConstruct);
+  CharBlock source;
+  std::tuple<AccBeginCombinedDirective, Block,
+      std::optional<AccEndCombinedDirective>>
+      t;
+};
+
+struct OpenACCDeclarativeConstruct {
+  UNION_CLASS_BOILERPLATE(OpenACCDeclarativeConstruct);
+  CharBlock source;
+  std::variant<OpenACCStandaloneDeclarativeConstruct> u;
+};
+
+// OpenACC directives enclosing do loop
+struct OpenACCLoopConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCLoopConstruct);
+  OpenACCLoopConstruct(AccBeginLoopDirective &&a)
+      : t({std::move(a), std::nullopt}) {}
+  std::tuple<AccBeginLoopDirective, std::optional<DoConstruct>> t;
+};
+
+struct OpenACCStandaloneConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCStandaloneConstruct);
+  CharBlock source;
+  std::tuple<AccStandaloneDirective, AccClauseList> t;
+};
+
+struct OpenACCConstruct {
+  UNION_CLASS_BOILERPLATE(OpenACCConstruct);
+  std::variant<OpenACCBlockConstruct, OpenACCCombinedConstruct,
+      OpenACCLoopConstruct, OpenACCStandaloneConstruct, OpenACCRoutineConstruct,
+      OpenACCCacheConstruct, OpenACCWaitConstruct, OpenACCAtomicConstruct>
+      u;
+};
+
 } // namespace Fortran::parser
 #endif // FORTRAN_PARSER_PARSE_TREE_H_
diff --git a/flang/lib/Parser/CMakeLists.txt b/flang/lib/Parser/CMakeLists.txt
index eb5126e1b937e..e1e77ac6e92df 100644
--- a/flang/lib/Parser/CMakeLists.txt
+++ b/flang/lib/Parser/CMakeLists.txt
@@ -11,6 +11,7 @@ add_flang_library(FortranParser
   instrumented-parser.cpp
   io-parsers.cpp
   message.cpp
+  openacc-parsers.cpp
   openmp-parsers.cpp
   parse-tree.cpp
   parsing.cpp
@@ -32,4 +33,5 @@ add_flang_library(FortranParser
 
   DEPENDS
   omp_gen
+  acc_gen
 )
diff --git a/flang/lib/Parser/executable-parsers.cpp b/flang/lib/Parser/executable-parsers.cpp
index 160b2dc376a48..d6dd4688dbac1 100644
--- a/flang/lib/Parser/executable-parsers.cpp
+++ b/flang/lib/Parser/executable-parsers.cpp
@@ -50,6 +50,7 @@ constexpr auto executableConstruct{
         construct<ExecutableConstruct>(indirect(whereConstruct)),
         construct<ExecutableConstruct>(indirect(forallConstruct)),
         construct<ExecutableConstruct>(indirect(ompEndLoopDirective)),
+        construct<ExecutableConstruct>(indirect(openaccConstruct)),
         construct<ExecutableConstruct>(indirect(openmpConstruct)),
         construct<ExecutableConstruct>(indirect(compilerDirective)))};
 
diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp
new file mode 100644
index 0000000000000..de55ea27f8228
--- /dev/null
+++ b/flang/lib/Parser/openacc-parsers.cpp
@@ -0,0 +1,282 @@
+//===-- lib/Parser/openacc-parsers.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Top-level grammar specification for OpenACC 3.0.
+
+#include "basic-parsers.h"
+#include "expr-parsers.h"
+#include "misc-parsers.h"
+#include "stmt-parser.h"
+#include "token-parsers.h"
+#include "type-parser-implementation.h"
+#include "flang/Parser/parse-tree.h"
+
+// OpenACC Directives and Clauses
+namespace Fortran::parser {
+
+constexpr auto startAccLine = skipStuffBeforeStatement >> "!$ACC "_sptok;
+constexpr auto endAccLine = space >> endOfLine;
+
+// Basic clauses
+TYPE_PARSER("AUTO" >> construct<AccClause>(construct<AccClause::Auto>()) ||
+    "ASYNC" >> construct<AccClause>(construct<AccClause::Async>(
+                   maybe(parenthesized(scalarIntExpr)))) ||
+    "ATTACH" >> construct<AccClause>(construct<AccClause::Attach>(
+                    parenthesized(Parser<AccObjectList>{}))) ||
+    "BIND" >> construct<AccClause>(construct<AccClause::Bind>(
+        parenthesized(name))) ||
+    "CAPTURE" >> construct<AccClause>(construct<AccClause::Capture>()) ||
+    "COLLAPSE" >> construct<AccClause>(construct<AccClause::Collapse>(
+                      parenthesized(scalarIntConstantExpr))) ||
+    ("COPY"_tok || "PRESENT_OR_COPY"_tok || "PCOPY"_tok) >>
+        construct<AccClause>(construct<AccClause::Copy>(
+            parenthesized(Parser<AccObjectList>{}))) ||
+    ("COPYIN"_tok || "PRESENT_OR_COPYIN"_tok || "PCOPYIN"_tok) >>
+        construct<AccClause>(construct<AccClause::Copyin>(
+            parenthesized(Parser<AccObjectListWithModifier>{}))) ||
+    ("COPYOUT"_tok || "PRESENT_OR_COPYOUT"_tok || "PCOPYOUT"_tok) >>
+        construct<AccClause>(construct<AccClause::Copyout>(
+            parenthesized(Parser<AccObjectListWithModifier>{}))) ||
+    ("CREATE"_tok || "PRESENT_OR_CREATE"_tok || "PCREATE"_tok) >>
+        construct<AccClause>(construct<AccClause::Create>(
+            parenthesized(Parser<AccObjectListWithModifier>{}))) ||
+    "DEFAULT" >> construct<AccClause>(construct<AccClause::Default>(
+                     Parser<AccDefaultClause>{})) ||
+    "DEFAULT_ASYNC" >> construct<AccClause>(construct<AccClause::DefaultAsync>(
+                           parenthesized(scalarIntExpr))) ||
+    "DELETE" >> construct<AccClause>(construct<AccClause::Delete>(
+                    parenthesized(Parser<AccObjectList>{}))) ||
+    "DETACH" >> construct<AccClause>(construct<AccClause::Detach>(
+                    parenthesized(Parser<AccObjectList>{}))) ||
+    "DEVICE" >> construct<AccClause>(construct<AccClause::Device>(
+                    parenthesized(Parser<AccObjectList>{}))) ||
+    "DEVICEPTR" >> construct<AccClause>(construct<AccClause::DevicePtr>(
+                       parenthesized(Parser<AccObjectList>{}))) ||
+    "DEVICENUM" >> construct<AccClause>(construct<AccClause::DeviceNum>(
+                       parenthesized(scalarIntConstantExpr))) ||
+    "DEVICE_RESIDENT" >>
+        construct<AccClause>(construct<AccClause::DeviceResident>(
+            parenthesized(Parser<AccObjectList>{}))) ||
+    ("DEVICE_TYPE"_tok || "DTYPE"_tok) >> construct<AccClause>(
+        construct<AccClause::DeviceType>(parenthesized(
+            "*" >> construct<std::optional<std::list<Name>>>()))) ||
+    ("DEVICE_TYPE"_tok || "DTYPE"_tok) >> construct<AccClause>(
+        construct<AccClause::DeviceType>(
+            parenthesized(maybe(nonemptyList(name))))) ||
+    "FINALIZE" >> construct<AccClause>(construct<AccClause::Finalize>()) ||
+    "FIRSTPRIVATE" >> construct<AccClause>(construct<AccClause::FirstPrivate>(
+                          parenthesized(Parser<AccObjectList>{}))) ||
+    "GANG" >> construct<AccClause>(construct<AccClause::Gang>(
+                  maybe(parenthesized(Parser<AccGangArgument>{})))) ||
+    "HOST" >> construct<AccClause>(construct<AccClause::Host>(
+                  parenthesized(Parser<AccObjectList>{}))) ||
+    "IF" >> construct<AccClause>(
+                construct<AccClause::If>(parenthesized(scalarLogicalExpr))) ||
+    "IF_PRESENT" >> construct<AccClause>(construct<AccClause::IfPresent>()) ||
+    "INDEPENDENT" >> construct<AccClause>(
+        construct<AccClause::Independent>()) ||
+    "LINK" >> construct<AccClause>(construct<AccClause::Link>(
+                  parenthesized(Parser<AccObjectList>{}))) ||
+    "NO_CREATE" >> construct<AccClause>(construct<AccClause::NoCreate>(
+                       parenthesized(Parser<AccObjectList>{}))) ||
+    "NOHOST" >> construct<AccClause>(construct<AccClause::NoHost>()) ||
+    "NUM_GANGS" >> construct<AccClause>(construct<AccClause::NumGangs>(
+                       parenthesized(scalarIntExpr))) ||
+    "NUM_WORKERS" >> construct<AccClause>(construct<AccClause::NumWorkers>(
+                         parenthesized(scalarIntExpr))) ||
+    "PRESENT" >> construct<AccClause>(construct<AccClause::Present>(
+                     parenthesized(Parser<AccObjectList>{}))) ||
+    "PRIVATE" >> construct<AccClause>(construct<AccClause::Private>(
+                     parenthesized(Parser<AccObjectList>{}))) ||
+    "READ" >> construct<AccClause>(construct<AccClause::Read>()) ||
+    "REDUCTION" >> construct<AccClause>(construct<AccClause::Reduction>(
+                       parenthesized(construct<AccObjectListWithReduction>(
+                           Parser<AccReductionOperator>{} / ":",
+                           Parser<AccObjectList>{})))) ||
+    "SELF" >> construct<AccClause>(construct<AccClause::Self>(
+                  maybe(parenthesized(scalarLogicalExpr)))) ||
+    "SEQ" >> construct<AccClause>(construct<AccClause::Seq>()) ||
+    "TILE" >> construct<AccClause>(construct<AccClause::Tile>(
+                  parenthesized(Parser<AccSizeExprList>{}))) ||
+    "USE_DEVICE" >> construct<AccClause>(construct<AccClause::UseDevice>(
+                        parenthesized(Parser<AccObjectList>{}))) ||
+    "VECTOR_LENGTH" >> construct<AccClause>(construct<AccClause::VectorLength>(
+                           parenthesized(scalarIntExpr))) ||
+    "VECTOR" >> construct<AccClause>(construct<AccClause::Vector>(maybe(
+            parenthesized(("LENGTH:" >> scalarIntExpr || scalarIntExpr))))) ||
+    "WAIT" >> construct<AccClause>(construct<AccClause::Wait>(
+                  maybe(Parser<AccWaitArgument>{}))) ||
+    "WORKER" >> construct<AccClause>(construct<AccClause::Worker>(maybe(
+            parenthesized(("NUM:" >> scalarIntExpr || scalarIntExpr))))) ||
+    "WRITE" >> construct<AccClause>(construct<AccClause::Auto>()))
+
+TYPE_PARSER(
+    construct<AccObject>(designator) || construct<AccObject>("/" >> name / "/"))
+
+TYPE_PARSER(construct<AccObjectList>(nonemptyList(Parser<AccObject>{})))
+
+TYPE_PARSER(construct<AccObjectListWithModifier>(
+    maybe(Parser<AccDataModifier>{}), Parser<AccObjectList>{}))
+
+TYPE_PARSER(construct<AccWaitArgument>(
+    maybe("DEVNUM:" >> scalarIntExpr / ":"), nonemptyList(scalarIntExpr)))
+
+// 2.9 (1609) size-expr is one of:
+//   int-expr
+TYPE_PARSER(construct<AccSizeExpr>(scalarIntExpr) ||
+    construct<AccSizeExpr>("*" >> maybe(scalarIntExpr)))
+TYPE_PARSER(construct<AccSizeExprList>(nonemptyList(Parser<AccSizeExpr>{})))
+
+// 2.9 (1607) gang-arg is one of:
+//   [num:]int-expr
+//   static:size-expr
+TYPE_PARSER(construct<AccGangArgument>(maybe(scalarIntExpr),
+                maybe(","_tok / "STATIC:" >> Parser<AccSizeExpr>{})) ||
+    construct<AccGangArgument>(maybe("NUM:" >> scalarIntExpr),
+        maybe(","_tok / "STATIC:" >> Parser<AccSizeExpr>{})))
+
+// 2.5.13 Reduction
+TYPE_PARSER(construct<AccReductionOperator>(Parser<DefinedOperator>{}) ||
+    construct<AccReductionOperator>(Parser<ProcedureDesignator>{}))
+
+// 2.5.14 Default clause
+TYPE_PARSER(construct<AccDefaultClause>(
+    parenthesized(first("NONE" >> pure(AccDefaultClause::Arg::None),
+        "PRESENT" >> pure(AccDefaultClause::Arg::Present)))))
+
+// Modifier for copyin, copyout, cache and create
+TYPE_PARSER(construct<AccDataModifier>(
+    first("ZERO:" >> pure(AccDataModifier::Modifier::Zero),
+        "READONLY:" >> pure(AccDataModifier::Modifier::ReadOnly))))
+
+// Combined directives
+TYPE_PARSER(sourced(construct<AccCombinedDirective>(
+    first("KERNELS LOOP" >> pure(llvm::acc::Directive::ACCD_kernels_loop),
+        "PARALLEL LOOP" >> pure(llvm::acc::Directive::ACCD_parallel_loop),
+        "SERIAL LOOP" >> pure(llvm::acc::Directive::ACCD_serial_loop)))))
+
+// Block directives
+TYPE_PARSER(sourced(construct<AccBlockDirective>(
+    first("DATA" >> pure(llvm::acc::Directive::ACCD_data),
+        "HOST_DATA" >> pure(llvm::acc::Directive::ACCD_host_data),
+        "KERNELS" >> pure(llvm::acc::Directive::ACCD_kernels),
+        "PARALLEL" >> pure(llvm::acc::Directive::ACCD_parallel),
+        "SERIAL" >> pure(llvm::acc::Directive::ACCD_serial)))))
+
+// Standalone directives
+TYPE_PARSER(sourced(construct<AccStandaloneDirective>(
+    first("ENTER DATA" >> pure(llvm::acc::Directive::ACCD_enter_data),
+        "EXIT DATA" >> pure(llvm::acc::Directive::ACCD_exit_data),
+        "INIT" >> pure(llvm::acc::Directive::ACCD_init),
+        "SHUTDOWN" >> pure(llvm::acc::Directive::ACCD_shutdown),
+        "SET" >> pure(llvm::acc::Directive::ACCD_set),
+        "UPDATE" >> pure(llvm::acc::Directive::ACCD_update)))))
+
+// Loop directives
+TYPE_PARSER(sourced(construct<AccLoopDirective>(
+    first("LOOP" >> pure(llvm::acc::Directive::ACCD_loop)))))
+
+TYPE_PARSER(construct<AccBeginLoopDirective>(
+    sourced(Parser<AccLoopDirective>{}), Parser<AccClauseList>{}))
+
+TYPE_PARSER(
+    construct<OpenACCLoopConstruct>(sourced(Parser<AccBeginLoopDirective>{})))
+
+// 2.15.1 Routine directive
+TYPE_PARSER(sourced(construct<OpenACCRoutineConstruct>(verbatim("ROUTINE"_tok),
+    maybe(parenthesized(name)), Parser<AccClauseList>{})))
+
+// 2.10 Cache directive
+TYPE_PARSER(sourced(
+    construct<OpenACCCacheConstruct>(sourced(construct<Verbatim>("CACHE"_tok)),
+        parenthesized(Parser<AccObjectListWithModifier>{}))))
+
+// 2.11 Combined constructs
+TYPE_PARSER(startAccLine >> construct<AccEndCombinedDirective>(sourced(
+                                "END"_tok >> Parser<AccCombinedDirective>{})))
+
+TYPE_PARSER(construct<AccBeginCombinedDirective>(
+    sourced(Parser<AccCombinedDirective>{}), Parser<AccClauseList>{}))
+
+TYPE_PARSER(construct<OpenACCCombinedConstruct>(
+    Parser<AccBeginCombinedDirective>{} / endAccLine, block,
+    maybe(Parser<AccEndCombinedDirective>{} / endAccLine)))
+
+// 2.12 Atomic constructs
+TYPE_PARSER(construct<AccEndAtomic>(startAccLine >> "END ATOMIC"_tok))
+
+TYPE_PARSER("ATOMIC" >>
+    construct<AccAtomicRead>(verbatim("READ"_tok) / endAccLine,
+        statement(assignmentStmt), maybe(Parser<AccEndAtomic>{} / endAccLine)))
+
+TYPE_PARSER("ATOMIC" >>
+    construct<AccAtomicWrite>(verbatim("WRITE"_tok) / endAccLine,
+        statement(assignmentStmt), maybe(Parser<AccEndAtomic>{} / endAccLine)))
+
+TYPE_PARSER("ATOMIC" >>
+    construct<AccAtomicUpdate>(maybe(verbatim("UPDATE"_tok)) / endAccLine,
+        statement(assignmentStmt), maybe(Parser<AccEndAtomic>{} / endAccLine)))
+
+TYPE_PARSER("ATOMIC" >>
+    construct<AccAtomicCapture>(verbatim("CAPTURE"_tok) / endAccLine,
+        statement(assignmentStmt), statement(assignmentStmt),
+        Parser<AccEndAtomic>{} / endAccLine))
+
+TYPE_PARSER(construct<OpenACCAtomicConstruct>(Parser<AccAtomicRead>{}) ||
+    construct<OpenACCAtomicConstruct>(Parser<AccAtomicCapture>{}) ||
+    construct<OpenACCAtomicConstruct>(Parser<AccAtomicWrite>{}) ||
+    construct<OpenACCAtomicConstruct>(Parser<AccAtomicUpdate>{}))
+
+// 2.13 Declare constructs
+TYPE_PARSER(sourced(construct<AccDeclarativeDirective>(
+    first("DECLARE" >> pure(llvm::acc::Directive::ACCD_declare)))))
+
+// [Clause, [Clause], ...]
+TYPE_PARSER(sourced(construct<AccClauseList>(
+    many(maybe(","_tok) >> sourced(Parser<AccClause>{})))))
+
+// 2.16.3 Wait directive
+TYPE_PARSER(sourced(construct<OpenACCWaitConstruct>(
+    sourced(construct<Verbatim>("WAIT"_tok)),
+    maybe(parenthesized(Parser<AccWaitArgument>{})), Parser<AccClauseList>{})))
+
+// Block Constructs
+TYPE_PARSER(sourced(construct<AccBeginBlockDirective>(
+    sourced(Parser<AccBlockDirective>{}), Parser<AccClauseList>{})))
+
+TYPE_PARSER(startAccLine >> sourced(construct<AccEndBlockDirective>("END"_tok >>
+                                sourced(Parser<AccBlockDirective>{}))))
+
+TYPE_PARSER(construct<OpenACCBlockConstruct>(
+    Parser<AccBeginBlockDirective>{} / endAccLine, block,
+    Parser<AccEndBlockDirective>{} / endAccLine))
+
+// Standalone constructs
+TYPE_PARSER(construct<OpenACCStandaloneConstruct>(
+    sourced(Parser<AccStandaloneDirective>{}), Parser<AccClauseList>{}))
+
+// Standalone declarative constructs
+TYPE_PARSER(construct<OpenACCStandaloneDeclarativeConstruct>(
+    sourced(Parser<AccDeclarativeDirective>{}), Parser<AccClauseList>{}))
+
+TYPE_PARSER(
+    startAccLine >> sourced(construct<OpenACCDeclarativeConstruct>(
+                        Parser<OpenACCStandaloneDeclarativeConstruct>{})))
+
+// OpenACC constructs
+TYPE_CONTEXT_PARSER("OpenACC construct"_en_US,
+    startAccLine >>
+        first(construct<OpenACCConstruct>(Parser<OpenACCBlockConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCCombinedConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCLoopConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCStandaloneConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCRoutineConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCCacheConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCWaitConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCAtomicConstruct>{})))
+} // namespace Fortran::parser
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 41a97ff902d97..a09a5554116fb 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -23,10 +23,6 @@ namespace Fortran::parser {
 constexpr auto startOmpLine = skipStuffBeforeStatement >> "!$OMP "_sptok;
 constexpr auto endOmpLine = space >> endOfLine;
 
-template <typename A> constexpr decltype(auto) verbatim(A x) {
-  return sourced(construct<Verbatim>(x));
-}
-
 // OpenMP Clauses
 // 2.15.3.1 DEFAULT (PRIVATE | FIRSTPRIVATE | SHARED | NONE)
 TYPE_PARSER(construct<OmpDefaultClause>(
diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index 5e12b5545f0ac..d7a7d107878d3 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -67,6 +67,9 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
   prescanner.set_fixedForm(options.isFixedForm)
       .set_fixedFormColumnLimit(options.fixedFormColumns)
       .AddCompilerDirectiveSentinel("dir$");
+  if (options.features.IsEnabled(LanguageFeature::OpenACC)) {
+    prescanner.AddCompilerDirectiveSentinel("$acc");
+  }
   if (options.features.IsEnabled(LanguageFeature::OpenMP)) {
     prescanner.AddCompilerDirectiveSentinel("$omp");
     prescanner.AddCompilerDirectiveSentinel("$"); // OMP conditional line
diff --git a/flang/lib/Parser/program-parsers.cpp b/flang/lib/Parser/program-parsers.cpp
index fc2c7c324eb60..9e18c458ea3cb 100644
--- a/flang/lib/Parser/program-parsers.cpp
+++ b/flang/lib/Parser/program-parsers.cpp
@@ -60,7 +60,8 @@ TYPE_PARSER(construct<ProgramUnit>(indirect(Parser<Module>{})) ||
 //         [use-stmt]... [import-stmt]... [implicit-part]
 //         [declaration-construct]...
 TYPE_CONTEXT_PARSER("specification part"_en_US,
-    construct<SpecificationPart>(many(openmpDeclarativeConstruct),
+    construct<SpecificationPart>(many(openaccDeclarativeConstruct),
+        many(openmpDeclarativeConstruct),
         many(statement(indirect(Parser<UseStmt>{}))),
         many(unambiguousStatement(indirect(Parser<ImportStmt>{}))),
         implicitPart, many(declarationConstruct)))
@@ -75,10 +76,10 @@ TYPE_CONTEXT_PARSER("specification part"_en_US,
 // are in contexts that impose constraints on the kinds of statements that
 // are allowed, and so we have a variant production for declaration-construct
 // that implements those constraints.
-constexpr auto execPartLookAhead{
-    first(actionStmt >> ok, ompEndLoopDirective >> ok, openmpConstruct >> ok,
-        "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok,
-        "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)};
+constexpr auto execPartLookAhead{first(actionStmt >> ok,
+    ompEndLoopDirective >> ok, openaccConstruct >> ok, openmpConstruct >> ok,
+    "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok,
+    "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)};
 constexpr auto declErrorRecovery{
     stmtErrorRecoveryStart >> !execPartLookAhead >> skipStmtErrorRecovery};
 constexpr auto misplacedSpecificationStmt{Parser<UseStmt>{} >>
@@ -126,7 +127,8 @@ constexpr auto limitedDeclarationConstruct{recovery(
 // specialized error recovery in the event of a spurious executable
 // statement.
 constexpr auto limitedSpecificationPart{inContext("specification part"_en_US,
-    construct<SpecificationPart>(many(openmpDeclarativeConstruct),
+    construct<SpecificationPart>(many(openaccDeclarativeConstruct),
+        many(openmpDeclarativeConstruct),
         many(statement(indirect(Parser<UseStmt>{}))),
         many(unambiguousStatement(indirect(Parser<ImportStmt>{}))),
         implicitPart, many(limitedDeclarationConstruct)))};
@@ -151,6 +153,8 @@ TYPE_CONTEXT_PARSER("specification construct"_en_US,
         construct<SpecificationConstruct>(
             statement(indirect(typeDeclarationStmt))),
         construct<SpecificationConstruct>(indirect(Parser<StructureDef>{})),
+        construct<SpecificationConstruct>(
+            indirect(openaccDeclarativeConstruct)),
         construct<SpecificationConstruct>(indirect(openmpDeclarativeConstruct)),
         construct<SpecificationConstruct>(indirect(compilerDirective))))
 
diff --git a/flang/lib/Parser/stmt-parser.h b/flang/lib/Parser/stmt-parser.h
index 7dcc1f4620a9d..cd1c69beedd4a 100644
--- a/flang/lib/Parser/stmt-parser.h
+++ b/flang/lib/Parser/stmt-parser.h
@@ -80,6 +80,7 @@ constexpr auto skipBadLine{SkipPast<'\n'>{} >> construct<ErrorRecovery>()};
 constexpr auto executionPartErrorRecovery{stmtErrorRecoveryStart >>
     !"END"_tok >> !"CONTAINS"_tok >> !"ELSE"_tok >> !"CASE"_tok >>
     !"TYPE IS"_tok >> !"CLASS"_tok >> !"RANK"_tok >>
+    !("!$ACC "_sptok >> "END"_tok) >>
     !("!$OMP "_sptok >> ("END"_tok || "SECTION"_id)) >> skipBadLine};
 
 // END statement error recovery
diff --git a/flang/lib/Parser/token-parsers.h b/flang/lib/Parser/token-parsers.h
index fe43182e386f7..14ae12cda6f42 100644
--- a/flang/lib/Parser/token-parsers.h
+++ b/flang/lib/Parser/token-parsers.h
@@ -664,5 +664,10 @@ constexpr auto logicalFALSE{
 constexpr auto rawHollerithLiteral{
     deprecated<LanguageFeature::Hollerith>(HollerithLiteral{})};
 
+
+template <typename A> constexpr decltype(auto) verbatim(A x) {
+  return sourced(construct<Verbatim>(x));
+}
+
 } // namespace Fortran::parser
 #endif // FORTRAN_PARSER_TOKEN_PARSERS_H_
diff --git a/flang/lib/Parser/type-parsers.h b/flang/lib/Parser/type-parsers.h
index c7a1bce781fff..a2f38e90db212 100644
--- a/flang/lib/Parser/type-parsers.h
+++ b/flang/lib/Parser/type-parsers.h
@@ -130,6 +130,8 @@ constexpr Parser<EndSubroutineStmt> endSubroutineStmt; // R1537
 constexpr Parser<EntryStmt> entryStmt; // R1541
 constexpr Parser<ContainsStmt> containsStmt; // R1543
 constexpr Parser<CompilerDirective> compilerDirective;
+constexpr Parser<OpenACCConstruct> openaccConstruct;
+constexpr Parser<OpenACCDeclarativeConstruct> openaccDeclarativeConstruct;
 constexpr Parser<OpenMPConstruct> openmpConstruct;
 constexpr Parser<OpenMPDeclarativeConstruct> openmpDeclarativeConstruct;
 constexpr Parser<OmpEndLoopDirective> ompEndLoopDirective;
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 09acaaa37076c..99792cbf706fa 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -1777,6 +1777,375 @@ class UnparseVisitor {
     }
     Walk(std::get<Name>(x.t));
   }
+
+  // OpenACC Directives & Clauses
+  void Unparse(const AccAtomicCapture &x) {
+    BeginOpenACC();
+    Word("!$ACC CAPTURE");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<AccAtomicCapture::Stmt1>(x.t));
+    Put("\n");
+    Walk(std::get<AccAtomicCapture::Stmt2>(x.t));
+    BeginOpenACC();
+    Word("!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccAtomicRead &x) {
+    BeginOpenACC();
+    Word("!$ACC ATOMIC READ");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<Statement<AssignmentStmt>>(x.t));
+    BeginOpenACC();
+    Walk(std::get<std::optional<AccEndAtomic>>(x.t), "!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccAtomicWrite &x) {
+    BeginOpenACC();
+    Word("!$ACC ATOMIC WRITE");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<Statement<AssignmentStmt>>(x.t));
+    BeginOpenACC();
+    Walk(std::get<std::optional<AccEndAtomic>>(x.t), "!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccAtomicUpdate &x) {
+    BeginOpenACC();
+    Word("!$ACC ATOMIC UPDATE");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<Statement<AssignmentStmt>>(x.t));
+    BeginOpenACC();
+    Walk(std::get<std::optional<AccEndAtomic>>(x.t), "!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const llvm::acc::Directive &x) {
+    Word(llvm::acc::getOpenACCDirectiveName(x).str());
+  }
+  void Before(const AccClause::Auto &) { Word("AUTO"); }
+  void Before(const AccClause::Capture &) { Word("CAPTURE"); }
+  void Before(const AccClause::Finalize &) { Word("FINALIZE"); }
+  void Before(const AccClause::IfPresent &) { Word("IF_PRESENT"); }
+  void Before(const AccClause::Independent &) { Word("INDEPENDENT"); }
+  void Before(const AccClause::NoHost &) { Word("NOHOST"); }
+  void Before(const AccClause::Read &) { Word("READ"); }
+  void Before(const AccClause::Seq &) { Word("SEQ"); }
+  void Before(const AccClause::Write &) { Word("WRITE"); }
+  void Before(const AccClause::Unknown &) { Word("UNKNOWN"); }
+  void Unparse(const AccClause::Attach &x) {
+    Word("ATTACH");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Bind &x) {
+    Word("BIND");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Collapse &x) {
+    Word("COLLAPSE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Copy &x) {
+    Word("COPY");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Copyin &x) {
+    Word("COPYIN");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Copyout &x) {
+    Word("COPYOUT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Create &x) {
+    Word("CREATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Default &x) {
+    Word("DEFAULT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Delete &x) {
+    Word("DELETE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Detach &x) {
+    Word("DETACH");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Device &x) {
+    Word("DEVICE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::DevicePtr &x) {
+    Word("DEVICEPTR");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::DeviceResident &x) {
+    Word("DEVICE_RESIDENT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::FirstPrivate &x) {
+    Word("FIRSTPRIVATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Host &x) {
+    Word("HOST");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::If &x) {
+    Word("IF");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Link &x) {
+    Word("LINK");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::NumGangs &x) {
+    Word("NUM_GANGS");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::NumWorkers &x) {
+    Word("NUM_WORKERS");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Present &x) {
+    Word("PRESENT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Private &x) {
+    Word("PRIVATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Reduction &x) {
+    Word("REDUCTION");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::VectorLength &x) {
+    Word("VECTOR_LENGTH");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Async &x) {
+    Word("ASYNC");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::DefaultAsync &x) {
+    Word("DEFAULT_ASYNC");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::DeviceNum &x) {
+    Word("DEVICE_NUM");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Gang &x) {
+    Word("GANG");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::NoCreate &x) {
+    Word("NO_CREATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::UseDevice &x) {
+    Word("USE_DEVICE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Self &x) {
+    Word("SELF");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::Vector &x) {
+    Word("VECTOR");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::Wait &x) {
+    Word("WAIT");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::Worker &x) {
+    Word("WORKER");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::DeviceType &x) {
+    Word("DEVICE_TYPE");
+    Put("(");
+    if (x.v.has_value())
+      Walk(x.v);
+    else
+      Put("*");
+    Put(")");
+  }
+  void Unparse(const AccObjectListWithModifier &x) {
+    Walk(std::get<std::optional<AccDataModifier>>(x.t), ":");
+    Walk(std::get<AccObjectList>(x.t));
+  }
+  void Unparse(const AccDataModifier::Modifier &x) {
+    Word(AccDataModifier::EnumToString(x));
+  }
+  void Unparse(const AccDefaultClause &x) {
+    switch (x.v) {
+    case AccDefaultClause::Arg::None:
+      Put("NONE");
+      break;
+    case AccDefaultClause::Arg::Present:
+      Put("PRESENT");
+      break;
+    }
+  }
+  void Unparse(const AccClauseList &x) { Walk(" ", x.v, " "); }
+  void Unparse(const AccGangArgument &x) {
+    Walk("NUM:", std::get<std::optional<ScalarIntExpr>>(x.t));
+    Walk(", STATIC:", std::get<std::optional<AccSizeExpr>>(x.t));
+  }
+  void Unparse(const OpenACCBlockConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get<AccBeginBlockDirective>(x.t));
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<Block>(x.t), "");
+    BeginOpenACC();
+    Word("!$ACC END ");
+    Walk(std::get<AccEndBlockDirective>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCLoopConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get<AccBeginLoopDirective>(x.t));
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<std::optional<DoConstruct>>(x.t));
+  }
+  void Unparse(const AccBeginLoopDirective &x) {
+    Walk(std::get<AccLoopDirective>(x.t));
+    Walk(std::get<AccClauseList>(x.t));
+  }
+  void Unparse(const OpenACCStandaloneConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get<AccStandaloneDirective>(x.t));
+    Walk(std::get<AccClauseList>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCStandaloneDeclarativeConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get<AccDeclarativeDirective>(x.t));
+    Walk(std::get<AccClauseList>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCCombinedConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get<AccBeginCombinedDirective>(x.t));
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<Block>(x.t), "");
+    BeginOpenACC();
+    Word("!$ACC END ");
+    Walk(std::get<std::optional<AccEndCombinedDirective>>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCRoutineConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ROUTINE");
+    Walk("(", std::get<std::optional<Name>>(x.t), ")");
+    Walk(std::get<AccClauseList>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccObject &x) {
+    std::visit(common::visitors{
+                   [&](const Designator &y) { Walk(y); },
+                   [&](const Name &y) { Put("/"), Walk(y), Put("/"); },
+               },
+        x.u);
+  }
+  void Unparse(const AccObjectList &x) { Walk(x.v, ","); }
+  void Unparse(const AccObjectListWithReduction &x) {
+    Walk(std::get<AccReductionOperator>(x.t));
+    Put(":");
+    Walk(std::get<AccObjectList>(x.t));
+  }
+  void Unparse(const OpenACCCacheConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Word("CACHE(");
+    Walk(std::get<AccObjectListWithModifier>(x.t));
+    Put(")");
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCWaitConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Word("WAIT(");
+    Walk(std::get<std::optional<AccWaitArgument>>(x.t));
+    Walk(std::get<AccClauseList>(x.t));
+    Put(")");
+    Put("\n");
+    EndOpenACC();
+  }
+
   // OpenMP Clauses & Directives
   void Unparse(const OmpObject &x) {
     std::visit(common::visitors{
@@ -2522,6 +2891,8 @@ class UnparseVisitor {
   }
   void BeginOpenMP() { openmpDirective_ = true; }
   void EndOpenMP() { openmpDirective_ = false; }
+  void BeginOpenACC() { openaccDirective_ = true; }
+  void EndOpenACC() { openaccDirective_ = false; }
 
   // Call back to the traversal framework.
   template <typename T> void Walk(const T &x) {
@@ -2591,6 +2962,7 @@ class UnparseVisitor {
   std::set<CharBlock> structureComponents_;
   Encoding encoding_{Encoding::UTF_8};
   bool capitalizeKeywords_{true};
+  bool openaccDirective_{false};
   bool openmpDirective_{false};
   bool backslashEscapes_{false};
   preStatementType *preStatement_{nullptr};
@@ -2599,7 +2971,7 @@ class UnparseVisitor {
 
 void UnparseVisitor::Put(char ch) {
   int sav = indent_;
-  if (openmpDirective_) {
+  if (openmpDirective_ || openaccDirective_) {
     indent_ = 0;
   }
   if (column_ <= 1) {
@@ -2620,13 +2992,16 @@ void UnparseVisitor::Put(char ch) {
     if (openmpDirective_) {
       out_ << "!$OMP&";
       column_ = 8;
+    } else if (openaccDirective_) {
+      out_ << "!$ACC&";
+      column_ = 8;
     } else {
       out_ << '&';
       column_ = indent_ + 3;
     }
   }
   out_ << ch;
-  if (openmpDirective_) {
+  if (openmpDirective_ || openaccDirective_) {
     indent_ = sav;
   }
 }
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 4d70f03dd5532..bd566408cd2ce 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -6010,7 +6010,8 @@ bool ResolveNamesVisitor::Pre(const parser::SpecificationPart &x) {
   Walk(std::get<1>(x.t));
   Walk(std::get<2>(x.t));
   Walk(std::get<3>(x.t));
-  const std::list<parser::DeclarationConstruct> &decls{std::get<4>(x.t)};
+  Walk(std::get<4>(x.t));
+  const std::list<parser::DeclarationConstruct> &decls{std::get<5>(x.t)};
   for (const auto &decl : decls) {
     if (const auto *spec{
             std::get_if<parser::SpecificationConstruct>(&decl.u)}) {
diff --git a/flang/test/Semantics/acc-validity.f90 b/flang/test/Semantics/acc-validity.f90
new file mode 100644
index 0000000000000..88f62a84d1610
--- /dev/null
+++ b/flang/test/Semantics/acc-validity.f90
@@ -0,0 +1,169 @@
+! RUN: %S/test_errors.sh %s %t %f18 -fopenacc
+
+! Check OpenACC clause validity for the following construct and directive:
+!   2.6.5 Data
+!   2.5.1 Parallel
+!   2.5.2 Kernels
+!   2.5.3 Serial
+!   2.15.1 Routine
+!   2.11 Parallel Loop
+!   2.11 Kernels Loop
+!   2.11 Serial Loop
+
+program openacc_clause_validity
+
+  implicit none
+
+  integer :: i, j
+  integer :: N = 256
+
+  !$acc declare
+  real(8) :: a(256)
+
+  !$acc enter data
+
+  !$acc enter data copyin(zero: i)
+
+  !$acc enter data create(readonly: i)
+
+  !$acc data copyout(readonly: i)
+  !$acc end data
+
+  !$acc enter data copyin(i) copyout(i)
+
+  !$acc data copy(i) if(.true.) if(.true.)
+  !$acc end data
+
+  !$acc exit data
+
+  !$acc host_data
+  !$acc end host_data
+
+  !$acc set
+
+  !$acc data
+  !$acc end data
+
+  !$acc data copyin(i)
+  !$acc end data
+
+  !$acc data copyin(i)
+
+  !$acc end parallel
+
+  !$acc update device(i) device_type(*) async
+
+
+  !$acc update device(i) device_type(*) if(.TRUE.)
+
+  !$acc parallel
+
+  !$acc loop seq independent
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !$acc parallel device_type(*) num_gangs(2)
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+
+  !$acc loop collapse(-1)
+  do i = 1, N
+    do j = 1, N
+      a(i) = 3.14 + j
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+
+  !$acc loop device_type(*) private(i)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+
+  !$acc loop gang seq
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+
+  !$acc parallel device_type(*) if(.TRUE.)
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+
+  !$acc parallel loop device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel loop
+
+  !$acc kernels device_type(*) async
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end kernels
+
+
+  !$acc kernels device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end kernels
+
+
+  !$acc kernels loop device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end kernels loop
+
+  !$acc serial device_type(*) async
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end serial
+
+
+  !$acc serial device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end serial
+
+
+  !$acc serial loop device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end serial loop
+
+ contains
+
+   subroutine sub1(a)
+     real :: a(:)
+
+     !$acc routine
+   end subroutine sub1
+
+   subroutine sub2(a)
+     real :: a(:)
+
+     !$acc routine seq device_type(*) nohost
+   end subroutine sub2
+
+end program openacc_clause_validity
\ No newline at end of file
diff --git a/flang/tools/f18-parse-demo/CMakeLists.txt b/flang/tools/f18-parse-demo/CMakeLists.txt
index 465873ca00ff6..a89e8ae8816cd 100644
--- a/flang/tools/f18-parse-demo/CMakeLists.txt
+++ b/flang/tools/f18-parse-demo/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  FrontendOpenACC
   FrontendOpenMP
   )
 
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 8738561fe45e7..46c38fa43a2e5 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  FrontendOpenACC
   FrontendOpenMP
   Support
   )
@@ -59,7 +60,7 @@ install(TARGETS f18 DESTINATION bin)
 
 set(FLANG_INTRINSIC_MODULES_DIR ${FLANG_BINARY_DIR}/include/flang)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${CMAKE_BINARY_DIR}/tools/flang/bin/flang @ONLY)
-file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE) 
+file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE)
 # The flang script to be installed needs a different path to the headers.
 set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_INSTALL_PREFIX}/include/flang)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${FLANG_BINARY_DIR}/bin/flang-install.sh @ONLY)
diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp
index 05766a9c6a6dc..5f6070a0fe34b 100644
--- a/flang/tools/f18/f18.cpp
+++ b/flang/tools/f18/f18.cpp
@@ -468,6 +468,9 @@ int main(int argc, char *const argv[]) {
     } else if (arg == "-Mstandard" || arg == "-std=f95" ||
         arg == "-std=f2003" || arg == "-std=f2008" || arg == "-std=legacy") {
       driver.warnOnNonstandardUsage = true;
+    } else if (arg == "-fopenacc") {
+      options.features.Enable(Fortran::common::LanguageFeature::OpenACC);
+      options.predefinitions.emplace_back("_OPENACC", "201911");
     } else if (arg == "-fopenmp") {
       options.features.Enable(Fortran::common::LanguageFeature::OpenMP);
       options.predefinitions.emplace_back("_OPENMP", "201511");
diff --git a/llvm/include/llvm/CMakeLists.txt b/llvm/include/llvm/CMakeLists.txt
index 7cf8699aa21e4..b46319f24fc8e 100644
--- a/llvm/include/llvm/CMakeLists.txt
+++ b/llvm/include/llvm/CMakeLists.txt
@@ -1,6 +1,6 @@
 add_subdirectory(IR)
 add_subdirectory(Support)
-add_subdirectory(Frontend/OpenMP)
+add_subdirectory(Frontend)
 
 # If we're doing an out-of-tree build, copy a module map for generated
 # header files into the build area.
diff --git a/llvm/include/llvm/Frontend/CMakeLists.txt b/llvm/include/llvm/Frontend/CMakeLists.txt
new file mode 100644
index 0000000000000..ea66917b8936a
--- /dev/null
+++ b/llvm/include/llvm/Frontend/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(OpenACC)
+add_subdirectory(OpenMP)
diff --git a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
index 3c295a1d7c5f3..26049ca60db39 100644
--- a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
+++ b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
@@ -59,6 +59,9 @@ class Clause<string c> {
   // Optional class holding value of the clause in clang AST.
   string clangClass = ?;
 
+  // Optional class holding value of the clause in flang AST.
+  string flangClass = ?;
+
   // Is clause implicit? If clause is set as implicit, the default kind will
   // be return in get<LanguageName>ClauseKind instead of their own kind.
   bit isImplicit = 0;
diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td
new file mode 100644
index 0000000000000..0bc0f2481db5f
--- /dev/null
+++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td
@@ -0,0 +1,604 @@
+//===-- ACC.td - OpenACC directive definition file ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the definition file for OpenACC directives and clauses.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Frontend/Directive/DirectiveBase.td"
+
+//===----------------------------------------------------------------------===//
+// Definition of general OpenACC information
+//===----------------------------------------------------------------------===//
+
+def OpenACC : DirectiveLanguage {
+  let name = "OpenACC";
+  let cppNamespace = "acc"; // final namespace will be llvm::acc
+  let directivePrefix = "ACCD_";
+  let clausePrefix = "ACCC_";
+  let makeEnumAvailableInNamespace = 1;
+  let enableBitmaskEnumInNamespace = 1;
+  let includeHeader = "llvm/Frontend/OpenACC/ACC.h.inc";
+  let clauseEnumSetClass = "AccClauseSet";
+}
+
+//===----------------------------------------------------------------------===//
+// Definition of OpenACC clauses
+//===----------------------------------------------------------------------===//
+
+// 2.9.6
+def ACCC_Auto : Clause<"auto"> {}
+
+// 2.16.1
+def ACCC_Async : Clause<"async"> {
+  let flangClass = "std::optional<ScalarIntExpr>";
+}
+
+// 2.7.11
+def ACCC_Attach : Clause<"attach"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.15.1
+def ACCC_Bind : Clause<"bind"> {
+  let flangClass = "Name";
+}
+
+// 2.12
+def ACCC_Capture : Clause<"capture"> {
+}
+
+// 2.9.1
+def ACCC_Collapse : Clause<"collapse"> {
+  let flangClass = "ScalarIntConstantExpr";
+}
+
+// 2.7.5
+def ACCC_Copy : Clause<"copy"> {
+  let flangClass = "AccObjectList";
+}
+// 2.7.6
+def ACCC_Copyin : Clause<"copyin"> {
+  let flangClass = "AccObjectListWithModifier";
+}
+
+// 2.7.7
+def ACCC_Copyout : Clause<"copyout"> {
+  let flangClass = "AccObjectListWithModifier";
+}
+
+// 2.7.8
+def ACCC_Create : Clause<"create"> {
+  let flangClass = "AccObjectListWithModifier";
+}
+
+// 2.5.14
+def ACCC_Default : Clause<"default"> {
+  let flangClass = "AccDefaultClause";
+}
+
+// 2.4.12
+def ACCC_DefaultAsync : Clause<"default_async"> {
+  let flangClass = "ScalarIntExpr";
+}
+
+// 2.7.10
+def ACCC_Delete : Clause<"delete"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.7.12
+def ACCC_Detach : Clause<"detach"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.14.4
+def ACCC_Device : Clause<"device"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.14.1
+def ACCC_DeviceNum : Clause<"devicenum">  {
+  let flangClass = "ScalarIntConstantExpr";
+}
+
+// 2.7.3
+def ACCC_DevicePtr : Clause<"deviceptr"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.13
+def ACCC_DeviceResident : Clause<"device_resident"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.4
+def ACCC_DeviceType : Clause<"device_type"> {
+  // (DeviceType, "*"
+  let flangClass = "std::optional<std::list<Name>>";
+}
+
+// 2.6.6
+def ACCC_Finalize : Clause<"finalize"> {}
+
+// 2.5.12
+def ACCC_FirstPrivate : Clause<"firstprivate"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.9.2
+def ACCC_Gang : Clause<"gang"> {
+  let flangClass = "std::optional<AccGangArgument>";
+}
+
+// 2.14.4
+def ACCC_Host : Clause<"host"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.5.4
+def ACCC_If : Clause <"if"> {
+  let flangClass = "ScalarLogicalExpr";
+}
+
+// 2.14.4
+def ACCC_IfPresent : Clause<"if_present"> {}
+
+// 2.9.9
+def ACCC_Independent : Clause<"independent"> {}
+
+// 2.13
+def ACCC_Link : Clause<"link"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.7.9
+def ACCC_NoCreate : Clause<"no_create"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.15.1
+def ACCC_NoHost : Clause<"nohost"> {}
+
+// 2.5.8
+def ACCC_NumGangs : Clause<"num_gangs"> {
+  let flangClass = "ScalarIntExpr";
+}
+
+// 2.5.9
+def ACCC_NumWorkers : Clause<"num_workers"> {
+  let flangClass = "ScalarIntExpr";
+}
+
+// 2.7.4
+def ACCC_Present : Clause<"present"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.5.11
+def ACCC_Private : Clause<"private"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.9.7
+def ACCC_Tile : Clause <"tile"> {
+  let flangClass = "AccSizeExprList";
+}
+
+// 2.8.1
+def ACCC_UseDevice : Clause <"use_device"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.12
+def ACCC_Read : Clause<"read"> {}
+
+// 2.5.13
+def ACCC_Reduction : Clause<"reduction"> {
+  let flangClass = "AccObjectListWithReduction";
+}
+
+// 2.5.5
+def ACCC_Self : Clause<"self"> {
+  let flangClass = "std::optional<ScalarLogicalExpr>";
+}
+
+// 2.9.5
+def ACCC_Seq : Clause<"seq"> {}
+
+// 2.9.4
+def ACCC_Vector : Clause<"vector"> {
+  let flangClass = "std::optional<ScalarIntExpr>";
+}
+
+// 2.5.10
+def ACCC_VectorLength : Clause<"vector_length"> {
+  let flangClass = "ScalarIntExpr";
+}
+
+// 2.16.2
+def ACCC_Wait : Clause<"wait"> {
+  let flangClass = "std::optional<AccWaitArgument>";
+}
+
+// 2.9.3
+def ACCC_Worker: Clause<"worker"> {
+  let flangClass = "std::optional<ScalarIntExpr>";
+}
+
+// 2.12
+def ACCC_Write : Clause<"write"> {}
+
+def ACCC_Unknown : Clause<"unknown"> {
+  let isDefault = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Definition of OpenACC directives
+//===----------------------------------------------------------------------===//
+
+// 2.12
+def ACC_Atomic : Directive<"atomic"> {}
+
+// 2.6.5
+def ACC_Data : Directive<"data"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_If>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>
+  ];
+}
+
+// 2.13
+def ACC_Declare : Directive<"declare"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_DeviceResident>,
+    VersionedClause<ACCC_Link>
+  ];
+}
+
+// 2.5.2
+def ACC_Kernels : Directive<"kernels"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_DevicePtr>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_NumGangs>,
+    VersionedClause<ACCC_NumWorkers>,
+    VersionedClause<ACCC_Self>,
+    VersionedClause<ACCC_VectorLength>,
+    VersionedClause<ACCC_Wait>
+  ];
+}
+
+// 2.5.1
+def ACC_Parallel : Directive<"parallel"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_Private>,
+    VersionedClause<ACCC_FirstPrivate>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_NumGangs>,
+    VersionedClause<ACCC_NumWorkers>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Self>,
+    VersionedClause<ACCC_VectorLength>
+  ];
+}
+
+// 2.5.3
+def ACC_Serial : Directive<"serial"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_FirstPrivate>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_Private>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Self>
+  ];
+}
+
+// 2.9
+def ACC_Loop : Directive<"loop"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_Private>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Collapse>,
+    VersionedClause<ACCC_Gang>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Tile>,
+    VersionedClause<ACCC_Vector>,
+    VersionedClause<ACCC_Worker>
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<ACCC_Auto>,
+    VersionedClause<ACCC_Independent>,
+    VersionedClause<ACCC_Seq>
+  ];
+}
+
+// 2.10
+def ACC_Cache : Directive<"cache"> {}
+
+// 2.14.1
+def ACC_Init : Directive<"init"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_DeviceNum>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_If>
+  ];
+}
+
+// 2.15.1
+def ACC_Routine : Directive<"routine"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Bind>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_NoHost>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_Gang>,
+    VersionedClause<ACCC_Seq>,
+    VersionedClause<ACCC_Vector>,
+    VersionedClause<ACCC_Worker>
+  ];
+}
+
+// 2.14.3
+def ACC_Set : Directive<"set"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_If>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_DefaultAsync>,
+    VersionedClause<ACCC_DeviceNum>,
+    VersionedClause<ACCC_DeviceType>
+  ];
+}
+
+// 2.14.2
+def ACC_Shutdown : Directive<"shutdown"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_DeviceNum>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_If>
+  ];
+}
+
+// 2.14.4
+def ACC_Update : Directive<"update"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_IfPresent>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_Device>,
+    VersionedClause<ACCC_Host>,
+    VersionedClause<ACCC_Self>
+  ];
+}
+
+// 2.16.3
+def ACC_Wait : Directive<"wait"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_If>
+  ];
+}
+
+// 2.14.6
+def ACC_EnterData : Directive<"enter data"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_Copyin>
+  ];
+}
+
+// 2.14.7
+def ACC_ExitData : Directive<"exit data"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_Wait>,
+    VersionedClause<ACCC_Finalize>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Delete>,
+    VersionedClause<ACCC_Detach>
+  ];
+}
+def ACC_HostData : Directive<"host_data"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_IfPresent>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_UseDevice>
+  ];
+}
+
+// 2.11
+def ACC_KernelsLoop : Directive<"kernels loop"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_Private>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_Attach>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Collapse>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_Gang>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_Independent>,
+    VersionedClause<ACCC_NumGangs>,
+    VersionedClause<ACCC_NumWorkers>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Self>,
+    VersionedClause<ACCC_Tile>,
+    VersionedClause<ACCC_Vector>,
+    VersionedClause<ACCC_VectorLength>,
+    VersionedClause<ACCC_Wait>,
+    VersionedClause<ACCC_Worker>
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<ACCC_Auto>,
+    VersionedClause<ACCC_Independent>,
+    VersionedClause<ACCC_Seq>
+  ];
+}
+
+// 2.11
+def ACC_ParallelLoop : Directive<"parallel loop"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_FirstPrivate>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_Private>,
+    VersionedClause<ACCC_Tile>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Collapse>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_Gang>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_NumGangs>,
+    VersionedClause<ACCC_NumWorkers>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Self>,
+    VersionedClause<ACCC_Vector>,
+    VersionedClause<ACCC_VectorLength>,
+    VersionedClause<ACCC_Worker>
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<ACCC_Auto>,
+    VersionedClause<ACCC_Independent>,
+    VersionedClause<ACCC_Seq>
+  ];
+}
+
+// 2.11
+def ACC_SerialLoop : Directive<"serial loop"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_FirstPrivate>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_Private>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Collapse>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_Gang>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Self>,
+    VersionedClause<ACCC_Tile>,
+    VersionedClause<ACCC_Vector>,
+    VersionedClause<ACCC_Worker>
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<ACCC_Auto>,
+    VersionedClause<ACCC_Independent>,
+    VersionedClause<ACCC_Seq>
+  ];
+}
+
+def ACC_Unknown : Directive<"unknown"> {
+  let isDefault = 1;
+}
\ No newline at end of file
diff --git a/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt b/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
new file mode 100644
index 0000000000000..82cc7cfaccc9c
--- /dev/null
+++ b/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(LLVM_TARGET_DEFINITIONS ACC.td)
+tablegen(LLVM ACC.h.inc --gen-directive-decl)
+tablegen(LLVM ACC.cpp.inc --gen-directive-impl)
+add_public_tablegen_target(acc_gen)
diff --git a/llvm/lib/Frontend/CMakeLists.txt b/llvm/lib/Frontend/CMakeLists.txt
index 9730c8414edff..ea66917b8936a 100644
--- a/llvm/lib/Frontend/CMakeLists.txt
+++ b/llvm/lib/Frontend/CMakeLists.txt
@@ -1 +1,2 @@
+add_subdirectory(OpenACC)
 add_subdirectory(OpenMP)
diff --git a/llvm/lib/Frontend/OpenACC/CMakeLists.txt b/llvm/lib/Frontend/OpenACC/CMakeLists.txt
new file mode 100644
index 0000000000000..021b680a33cf7
--- /dev/null
+++ b/llvm/lib/Frontend/OpenACC/CMakeLists.txt
@@ -0,0 +1,15 @@
+set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC/ACC.td)
+tablegen(LLVM ACC.cpp --gen-directive-impl)
+add_public_tablegen_target(acc_cpp)
+
+add_llvm_component_library(LLVMFrontendOpenACC
+  ACC.cpp # Generated by tablegen above
+
+  ADDITIONAL_HEADER_DIRS
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC
+
+  DEPENDS
+  acc_gen
+  acc_cpp
+)

From 869d05fb3e449ec7ec835b8a61687f8df41b8651 Mon Sep 17 00:00:00 2001
From: Walter Erquinigo <waltermelon@fb.com>
Date: Mon, 13 Jul 2020 17:55:24 -0700
Subject: [PATCH 187/771] [lldb-vscode] Fix TestVSCode_module This test was
 added in https://reviews.llvm.org/D82477 and needs to wait a little bit
 before fetching some information.

---
 .../test/tools/lldb-vscode/lldbvscode_testcase.py        | 7 +++++++
 .../API/tools/lldb-vscode/module/TestVSCode_module.py    | 9 +++++++--
 2 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py
index 676e08d5a38cc..c1b33c220b4bd 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-vscode/lldbvscode_testcase.py
@@ -2,6 +2,7 @@
 from lldbsuite.test.lldbtest import *
 import os
 import vscode
+import time
 
 
 class VSCodeTestCaseBase(TestBase):
@@ -52,6 +53,12 @@ def set_function_breakpoints(self, functions, condition=None,
             breakpoint_ids.append('%i' % (breakpoint['id']))
         return breakpoint_ids
 
+    def waitUntil(self, condition):
+        while True:
+            if condition():
+                break
+            time.sleep(0.5)
+
     def verify_breakpoint_hit(self, breakpoint_ids):
         '''Wait for the process we are debugging to stop, and verify we hit
            any breakpoint location in the "breakpoint_ids" array.
diff --git a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py
index 461ac201a73f4..40c4145b38e36 100644
--- a/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py
+++ b/lldb/test/API/tools/lldb-vscode/module/TestVSCode_module.py
@@ -11,7 +11,6 @@
 from lldbsuite.test import lldbutil
 import lldbvscode_testcase
 
-
 class TestVSCode_module(lldbvscode_testcase.VSCodeTestCaseBase):
 
     mydir = TestBase.compute_mydir(__file__)
@@ -40,6 +39,13 @@ def test_modules_event(self):
         self.assertEqual('Symbols not found.', program_module['symbolStatus'])
         symbol_path = self.getBuildArtifact("a.out")
         self.vscode.request_evaluate('`%s' % ('target symbols add -s "%s" "%s"' % (program, symbol_path)))
+
+        def checkSymbolsLoaded():
+            active_modules = self.vscode.get_active_modules()
+            program_module = active_modules[program_basename]
+            return 'Symbols loaded.' == program_module['symbolStatus']
+        self.waitUntil(checkSymbolsLoaded)
+
         active_modules = self.vscode.get_active_modules()
         program_module = active_modules[program_basename]
         self.assertEqual(program_basename, program_module['name'])
@@ -63,7 +69,6 @@ def test_compile_units(self):
         self.continue_to_breakpoints(breakpoint_ids)
         moduleId = self.vscode.get_active_modules()['a.out']['id']
         response = self.vscode.request_getCompileUnits(moduleId)
-        print(response['body'])
         self.assertTrue(response['body'])
         self.assertTrue(len(response['body']['compileUnits']) == 1,
                         'Only one source file should exist')

From bf74c3838904b2df2a0f31bc457af1bdb811953f Mon Sep 17 00:00:00 2001
From: clementval <clementval@gmail.com>
Date: Mon, 13 Jul 2020 21:06:22 -0400
Subject: [PATCH 188/771] [flang][openacc] Add Support library for error
 handling

---
 llvm/lib/Frontend/OpenACC/CMakeLists.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/lib/Frontend/OpenACC/CMakeLists.txt b/llvm/lib/Frontend/OpenACC/CMakeLists.txt
index 021b680a33cf7..7c05635137819 100644
--- a/llvm/lib/Frontend/OpenACC/CMakeLists.txt
+++ b/llvm/lib/Frontend/OpenACC/CMakeLists.txt
@@ -2,6 +2,8 @@ set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC/ACC.t
 tablegen(LLVM ACC.cpp --gen-directive-impl)
 add_public_tablegen_target(acc_cpp)
 
+set(LLVM_LINK_COMPONENTS Support)
+
 add_llvm_component_library(LLVMFrontendOpenACC
   ACC.cpp # Generated by tablegen above
 

From 9a9ae01f994a92900bc703cf8a512082f49ce45d Mon Sep 17 00:00:00 2001
From: Walter Erquinigo <waltermelon@fb.com>
Date: Mon, 13 Jul 2020 18:26:41 -0700
Subject: [PATCH 189/771] [lldb-vscode] Fix TestVSCode_setBreakpoints

It was failing because some module events had empty UUID, and that was not handled correctly.
The diff that added that logic is https://reviews.llvm.org/D82477
---
 lldb/tools/lldb-vscode/JSONUtils.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lldb/tools/lldb-vscode/JSONUtils.cpp b/lldb/tools/lldb-vscode/JSONUtils.cpp
index 86c29fb238112..1ebaa5c377121 100644
--- a/lldb/tools/lldb-vscode/JSONUtils.cpp
+++ b/lldb/tools/lldb-vscode/JSONUtils.cpp
@@ -331,7 +331,8 @@ llvm::json::Value CreateModule(lldb::SBModule &module) {
   llvm::json::Object object;
   if (!module.IsValid())
     return llvm::json::Value(std::move(object));
-  object.try_emplace("id", std::string(module.GetUUIDString()));
+  const char *uuid = module.GetUUIDString();
+  object.try_emplace("id", uuid ? std::string(uuid) : std::string(""));
   object.try_emplace("name", std::string(module.GetFileSpec().GetFilename()));
   char module_path_arr[PATH_MAX];
   module.GetFileSpec().GetPath(module_path_arr, sizeof(module_path_arr));

From fefe6a6642e475cd6d624c502120cb061da8144e Mon Sep 17 00:00:00 2001
From: zuojian lin <manjian2006@gmail.com>
Date: Mon, 13 Jul 2020 18:28:00 -0700
Subject: [PATCH 190/771] Fix undefined behavior in DWARF emission

Caused by uninitialized load of llvm::DwarfDebug::PrevCU:
llvm::DwarfCompileUnit::addRange () at ../lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp:276
llvm::DwarfDebug::endFunctionImpl () at ../lib/CodeGen/AsmPrinter/DwarfDebug.cpp:1586
llvm::DebugHandlerBase::endFunction () at ../lib/CodeGen/AsmPrinter/DebugHandlerBase.cpp:319
llvm::AsmPrinter::EmitFunctionBody () at ../lib/CodeGen/AsmPrinter/AsmPrinter.cpp:1230
llvm::ARMAsmPrinter::runOnMachineFunction () at ../lib/Target/ARM/ARMAsmPrinter.cpp:161

Most of the DebugInfo tests under `LLVM_LIT_ARGS:STRING=-sv --vg` prior to this fix, and pass with the fix applied.

Reviewed By: aprantl, dblaikie

Differential Revision: https://reviews.llvm.org/D81631
---
 llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
index d7a4b2abf52b3..ad2f2f3edd8e6 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -325,7 +325,7 @@ class DwarfDebug : public DebugHandlerBase {
   const MachineFunction *CurFn = nullptr;
 
   /// If nonnull, stores the CU in which the previous subprogram was contained.
-  const DwarfCompileUnit *PrevCU;
+  const DwarfCompileUnit *PrevCU = nullptr;
 
   /// As an optimization, there is no need to emit an entry in the directory
   /// table for the same directory as DW_AT_comp_dir.

From 19f01a484760d9a29f4f414b30da110dd6550191 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 6 Nov 2019 11:47:40 -0500
Subject: [PATCH 191/771] [GVN] add early exit to
 ConstantFoldLoadThroughBitcast [NFC]

And adds some additional test coverage to ensure later commits don't
introduce regressions.

Differential Revision: https://reviews.llvm.org/D59730
---
 llvm/lib/Analysis/ConstantFolding.cpp         |  7 +-
 .../Transforms/GVN/non-integral-pointers.ll   | 71 +++++++++++++++++++
 .../GlobalOpt/evaluate-call-errors.ll         |  4 +-
 3 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index a414336fb21be..020c60cd30c04 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -333,10 +333,15 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
                                          const DataLayout &DL) {
   do {
     Type *SrcTy = C->getType();
+    uint64_t DestSize = DL.getTypeSizeInBits(DestTy);
+    uint64_t SrcSize = DL.getTypeSizeInBits(SrcTy);
+    if (SrcSize < DestSize)
+      return nullptr;
 
     // If the type sizes are the same and a cast is legal, just directly
     // cast the constant.
-    if (DL.getTypeSizeInBits(DestTy) == DL.getTypeSizeInBits(SrcTy)) {
+    // But be careful not to coerce non-integral pointers illegally.
+    if (SrcSize == DestSize) {
       Instruction::CastOps Cast = Instruction::BitCast;
       // If we are going from a pointer to int or vice versa, we spell the cast
       // differently.
diff --git a/llvm/test/Transforms/GVN/non-integral-pointers.ll b/llvm/test/Transforms/GVN/non-integral-pointers.ll
index 2a5414fbc07ce..133bc71c9ce44 100644
--- a/llvm/test/Transforms/GVN/non-integral-pointers.ll
+++ b/llvm/test/Transforms/GVN/non-integral-pointers.ll
@@ -169,7 +169,14 @@ define i8 addrspace(4)* @forward_store_zero2(i8 addrspace(4)* addrspace(4)* %loc
   ret i8 addrspace(4)* %ref
 }
 
+
+
 @NonZeroConstant = constant <4 x i64> <i64 3, i64 3, i64 3, i64 3>
+@NonZeroConstant2 = constant <4 x i64 addrspace(4)*> <
+  i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3),
+  i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3),
+  i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3),
+  i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3)>
 @ZeroConstant = constant <4 x i64> zeroinitializer
 
 
@@ -190,6 +197,39 @@ entry:
   ret i8 addrspace(4)* %ref
 }
 
+define i64 addrspace(4)* @neg_forward_memcopy2(i64 addrspace(4)* addrspace(4)* %loc) {
+; CHECK-LABEL: @neg_forward_memcopy2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
+; CHECK-NEXT:    call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 8, i1 false)
+; CHECK-NEXT:    [[REF:%.*]] = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* [[LOC]]
+; CHECK-NEXT:    ret i64 addrspace(4)* [[REF]]
+;
+entry:
+  %loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
+  %src.bc = bitcast <4 x i64>* @NonZeroConstant to i8*
+  call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
+  %ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc
+  ret i64 addrspace(4)* %ref
+}
+
+; TODO: missed optimization
+define i8 addrspace(4)* @forward_memcopy(i8 addrspace(4)* addrspace(4)* %loc) {
+; CHECK-LABEL: @forward_memcopy(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
+; CHECK-NEXT:    call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false)
+; CHECK-NEXT:    [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
+; CHECK-NEXT:    ret i8 addrspace(4)* [[REF]]
+;
+entry:
+  %loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
+  %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*
+  call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
+  %ref = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* %loc
+  ret i8 addrspace(4)* %ref
+}
+
 define <1 x i8 addrspace(4)*> @neg_forward_memcpy_vload(<1 x i8 addrspace(4)*> addrspace(4)* %loc) {
 ; CHECK-LABEL: @neg_forward_memcpy_vload(
 ; CHECK-NEXT:  entry:
@@ -206,6 +246,37 @@ entry:
   ret <1 x i8 addrspace(4)*> %ref
 }
 
+define <4 x i64 addrspace(4)*> @neg_forward_memcpy_vload2(<4 x i64 addrspace(4)*> addrspace(4)* %loc) {
+; CHECK-LABEL: @neg_forward_memcpy_vload2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
+; CHECK-NEXT:    call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @NonZeroConstant to i8*), i64 32, i1 false)
+; CHECK-NEXT:    [[REF:%.*]] = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* [[LOC]]
+; CHECK-NEXT:    ret <4 x i64 addrspace(4)*> [[REF]]
+;
+entry:
+  %loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)*
+  %src.bc = bitcast <4 x i64>* @NonZeroConstant to i8*
+  call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false)
+  %ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc
+  ret <4 x i64 addrspace(4)*> %ref
+}
+
+define <4 x i64> @neg_forward_memcpy_vload3(<4 x i64> addrspace(4)* %loc) {
+; CHECK-LABEL: @neg_forward_memcpy_vload3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOC_BC:%.*]] = bitcast <4 x i64> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
+; CHECK-NEXT:    call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false)
+; CHECK-NEXT:    [[REF:%.*]] = load <4 x i64>, <4 x i64> addrspace(4)* [[LOC]]
+; CHECK-NEXT:    ret <4 x i64> [[REF]]
+;
+entry:
+  %loc.bc = bitcast <4 x i64> addrspace(4)* %loc to i8 addrspace(4)*
+  %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*
+  call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false)
+  %ref = load <4 x i64>, <4 x i64> addrspace(4)* %loc
+  ret <4 x i64> %ref
+}
 
 ; Can forward since we can do so w/o breaking types
 ; TODO: missed optimization
diff --git a/llvm/test/Transforms/GlobalOpt/evaluate-call-errors.ll b/llvm/test/Transforms/GlobalOpt/evaluate-call-errors.ll
index 88f7cbd8df1bd..84fe70b08b2ee 100644
--- a/llvm/test/Transforms/GlobalOpt/evaluate-call-errors.ll
+++ b/llvm/test/Transforms/GlobalOpt/evaluate-call-errors.ll
@@ -65,7 +65,7 @@ define linkonce_odr void @_ZN1SC2Ev(%struct.S*) unnamed_addr align 2 {
 }
 
 define internal %struct.Foo* @_ZL3foov() {
-  ret %struct.Foo* null
+  ret %struct.Foo* getelementptr (%struct.Foo, %struct.Foo *null, i32 1)
 }
 
 define linkonce_odr void @_ZN1QC2Ev(%struct.Q*) unnamed_addr align 2 {
@@ -73,7 +73,7 @@ define linkonce_odr void @_ZN1QC2Ev(%struct.Q*) unnamed_addr align 2 {
   store %struct.Q* %0, %struct.Q** %2, align 8
   %3 = load %struct.Q*, %struct.Q** %2, align 8
   %4 = getelementptr inbounds %struct.Q, %struct.Q* %3, i32 0, i32 0
-  %5 = call i32 bitcast (i32 (i32)* @_ZL3baz3Foo to i32 (%struct.Foo*)*)(%struct.Foo* null)
+  %5 = call i32 bitcast (i32 (i32)* @_ZL3baz3Foo to i32 (%struct.Foo*)*)(%struct.Foo* getelementptr (%struct.Foo, %struct.Foo *null, i32 1))
   store i32 %5, i32* %4, align 4
   ret void
 }

From e244f86f4dfd9e8982940d09294af522d3809d7f Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 6 Nov 2019 12:00:36 -0500
Subject: [PATCH 192/771] [VNCoercion] avoid creating bitcast for zero offsets
 [NFCI]

This could previously make it more complicated for ConstantFolding
later, leading to a higher likelyhood it would have to reject the
expression, even though zero seems like probably the common case here.

Differential Revision: https://reviews.llvm.org/D59730
---
 llvm/lib/Transforms/Utils/VNCoercion.cpp | 30 ++++++++++++++----------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index c9839121e7f64..75da481b8c179 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -412,12 +412,14 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
   unsigned AS = Src->getType()->getPointerAddressSpace();
   // Otherwise, see if we can constant fold a load from the constant with the
   // offset applied as appropriate.
-  Src =
-      ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
-  Constant *OffsetCst =
-      ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
-  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
-                                       OffsetCst);
+  if (Offset) {
+    Src = ConstantExpr::getBitCast(Src,
+                                   Type::getInt8PtrTy(Src->getContext(), AS));
+    Constant *OffsetCst =
+        ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+    Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()),
+                                         Src, OffsetCst);
+  }
   Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
   if (ConstantFoldLoadFromConstPtr(Src, LoadTy, DL))
     return Offset;
@@ -587,16 +589,18 @@ T *getMemInstValueForLoadHelper(MemIntrinsic *SrcInst, unsigned Offset,
   // Otherwise, this is a memcpy/memmove from a constant global.
   MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
   Constant *Src = cast<Constant>(MTI->getSource());
-  unsigned AS = Src->getType()->getPointerAddressSpace();
 
+  unsigned AS = Src->getType()->getPointerAddressSpace();
   // Otherwise, see if we can constant fold a load from the constant with the
   // offset applied as appropriate.
-  Src =
-      ConstantExpr::getBitCast(Src, Type::getInt8PtrTy(Src->getContext(), AS));
-  Constant *OffsetCst =
-      ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
-  Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()), Src,
-                                       OffsetCst);
+  if (Offset) {
+    Src = ConstantExpr::getBitCast(Src,
+                                   Type::getInt8PtrTy(Src->getContext(), AS));
+    Constant *OffsetCst =
+        ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+    Src = ConstantExpr::getGetElementPtr(Type::getInt8Ty(Src->getContext()),
+                                         Src, OffsetCst);
+  }
   Src = ConstantExpr::getBitCast(Src, PointerType::get(LoadTy, AS));
   return ConstantFoldLoadFromConstPtr(Src, LoadTy, DL);
 }

From 2c7a07b59d5da54eba8e3e030e1cc040a88ecf58 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 6 Nov 2019 12:01:57 -0500
Subject: [PATCH 193/771] [GVN] teach ConstantFolding correct handling of
 non-integral addrspace casts

Here we teach the ConstantFolding analysis pass that it is not legal to
replace a load of a bitcast constant (having a non-integral addrspace)
with a bitcast of the value of that constant (with a different
non-integral addrspace).

But also teach it that certain bit patterns are always known and
convertable (a fact it already uses elsewhere). This required us to also
fix a globalopt test, since, after this change, LLVM is able to realize
that the test actually is a valid transform (NULL is always a known
bit-pattern) and so it doesn't need to emit the failure remarks for it.

Also simplify some of the negative tests for transforms by avoiding a
type change in their bitcast, and add positive versions of the same
tests, to show that they otherwise should work.

Differential Revision: https://reviews.llvm.org/D59730
---
 llvm/lib/Analysis/ConstantFolding.cpp         | 12 ++++++-
 llvm/lib/Transforms/Utils/VNCoercion.cpp      |  6 ----
 .../Transforms/GVN/non-integral-pointers.ll   | 36 +++++++++++++++++--
 3 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 020c60cd30c04..8c66decaaf58d 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -338,10 +338,20 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
     if (SrcSize < DestSize)
       return nullptr;
 
+    // Catch the obvious splat cases (since all-zeros can coerce non-integral
+    // pointers legally).
+    if (C->isNullValue() && !DestTy->isX86_MMXTy())
+      return Constant::getNullValue(DestTy);
+    if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() &&
+        !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
+      return Constant::getAllOnesValue(DestTy);
+
     // If the type sizes are the same and a cast is legal, just directly
     // cast the constant.
     // But be careful not to coerce non-integral pointers illegally.
-    if (SrcSize == DestSize) {
+    if (SrcSize == DestSize &&
+        DL.isNonIntegralPointerType(SrcTy->getScalarType()) ==
+            DL.isNonIntegralPointerType(DestTy->getScalarType())) {
       Instruction::CastOps Cast = Instruction::BitCast;
       // If we are going from a pointer to int or vice versa, we spell the cast
       // differently.
diff --git a/llvm/lib/Transforms/Utils/VNCoercion.cpp b/llvm/lib/Transforms/Utils/VNCoercion.cpp
index 75da481b8c179..6ff08cd287124 100644
--- a/llvm/lib/Transforms/Utils/VNCoercion.cpp
+++ b/llvm/lib/Transforms/Utils/VNCoercion.cpp
@@ -403,12 +403,6 @@ int analyzeLoadFromClobberingMemInst(Type *LoadTy, Value *LoadPtr,
   if (Offset == -1)
     return Offset;
 
-  // Don't coerce non-integral pointers to integers or vice versa, and the
-  // memtransfer is implicitly a raw byte code
-  if (DL.isNonIntegralPointerType(LoadTy->getScalarType()))
-    // TODO: Can allow nullptrs from constant zeros
-    return -1;
-
   unsigned AS = Src->getType()->getPointerAddressSpace();
   // Otherwise, see if we can constant fold a load from the constant with the
   // offset applied as appropriate.
diff --git a/llvm/test/Transforms/GVN/non-integral-pointers.ll b/llvm/test/Transforms/GVN/non-integral-pointers.ll
index 133bc71c9ce44..a017dda926e3a 100644
--- a/llvm/test/Transforms/GVN/non-integral-pointers.ll
+++ b/llvm/test/Transforms/GVN/non-integral-pointers.ll
@@ -230,6 +230,21 @@ entry:
   ret i8 addrspace(4)* %ref
 }
 
+define i64 addrspace(4)* @forward_memcopy2(i64 addrspace(4)* addrspace(4)* %loc) {
+; CHECK-LABEL: @forward_memcopy2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOC_BC:%.*]] = bitcast i64 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
+; CHECK-NEXT:    call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 8, i1 false)
+; CHECK-NEXT:    ret i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3)
+;
+entry:
+  %loc.bc = bitcast i64 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*
+  %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*
+  call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 8, i1 false)
+  %ref = load i64 addrspace(4)*, i64 addrspace(4)* addrspace(4)* %loc
+  ret i64 addrspace(4)* %ref
+}
+
 define <1 x i8 addrspace(4)*> @neg_forward_memcpy_vload(<1 x i8 addrspace(4)*> addrspace(4)* %loc) {
 ; CHECK-LABEL: @neg_forward_memcpy_vload(
 ; CHECK-NEXT:  entry:
@@ -278,15 +293,30 @@ entry:
   ret <4 x i64> %ref
 }
 
+define <1 x i64 addrspace(4)*> @forward_memcpy_vload3(<4 x i64 addrspace(4)*> addrspace(4)* %loc) {
+; CHECK-LABEL: @forward_memcpy_vload3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[LOC_BC:%.*]] = bitcast <4 x i64 addrspace(4)*> addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
+; CHECK-NEXT:    call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*), i64 32, i1 false)
+; CHECK-NEXT:    ret <1 x i64 addrspace(4)*> <i64 addrspace(4)* getelementptr (i64, i64 addrspace(4)* null, i32 3)>
+;
+entry:
+  %loc.bc = bitcast <4 x i64 addrspace(4)*> addrspace(4)* %loc to i8 addrspace(4)*
+  %src.bc = bitcast <4 x i64 addrspace(4)*>* @NonZeroConstant2 to i8*
+  call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 %loc.bc, i8* %src.bc, i64 32, i1 false)
+  %ref = load <4 x i64 addrspace(4)*>, <4 x i64 addrspace(4)*> addrspace(4)* %loc
+  %val = extractelement <4 x i64 addrspace(4)*> %ref, i32 0
+  %ret = insertelement <1 x i64 addrspace(4)*> undef, i64 addrspace(4)* %val, i32 0
+  ret <1 x i64 addrspace(4)*> %ret
+}
+
 ; Can forward since we can do so w/o breaking types
-; TODO: missed optimization
 define i8 addrspace(4)* @forward_memcpy_zero(i8 addrspace(4)* addrspace(4)* %loc) {
 ; CHECK-LABEL: @forward_memcpy_zero(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[LOC_BC:%.*]] = bitcast i8 addrspace(4)* addrspace(4)* [[LOC:%.*]] to i8 addrspace(4)*
 ; CHECK-NEXT:    call void @llvm.memcpy.p4i8.p0i8.i64(i8 addrspace(4)* align 4 [[LOC_BC]], i8* bitcast (<4 x i64>* @ZeroConstant to i8*), i64 8, i1 false)
-; CHECK-NEXT:    [[REF:%.*]] = load i8 addrspace(4)*, i8 addrspace(4)* addrspace(4)* [[LOC]], align 8
-; CHECK-NEXT:    ret i8 addrspace(4)* [[REF]]
+; CHECK-NEXT:    ret i8 addrspace(4)* null
 ;
 entry:
   %loc.bc = bitcast i8 addrspace(4)* addrspace(4)* %loc to i8 addrspace(4)*

From 1a21b088f4a855469f481a094b218c316ca4e796 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Mon, 13 Jul 2020 22:22:47 -0400
Subject: [PATCH 194/771] Add LINK_COMPONENTS Support

---
 llvm/lib/Frontend/OpenACC/CMakeLists.txt | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Frontend/OpenACC/CMakeLists.txt b/llvm/lib/Frontend/OpenACC/CMakeLists.txt
index 7c05635137819..4c23f32ecd3bc 100644
--- a/llvm/lib/Frontend/OpenACC/CMakeLists.txt
+++ b/llvm/lib/Frontend/OpenACC/CMakeLists.txt
@@ -2,8 +2,6 @@ set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC/ACC.t
 tablegen(LLVM ACC.cpp --gen-directive-impl)
 add_public_tablegen_target(acc_cpp)
 
-set(LLVM_LINK_COMPONENTS Support)
-
 add_llvm_component_library(LLVMFrontendOpenACC
   ACC.cpp # Generated by tablegen above
 
@@ -11,6 +9,9 @@ add_llvm_component_library(LLVMFrontendOpenACC
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC
 
+  LINK_COMPONENTS
+  Support
+
   DEPENDS
   acc_gen
   acc_cpp

From f06ad9134722cf090bc301ac8bf41a2a381f137e Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Mon, 13 Jul 2020 22:35:19 -0400
Subject: [PATCH 195/771] Revert "[flang][openacc] OpenACC 3.0 parser"

This reverts commit 65049d16100af360674659fb56e8f9bec96a0836.

Buildbot failure clang-ppc64le-rhel
---
 flang/include/flang/Common/Fortran-features.h |   7 +-
 flang/include/flang/Parser/dump-parse-tree.h  |  93 ---
 flang/include/flang/Parser/parse-tree.h       | 290 +--------
 flang/lib/Parser/CMakeLists.txt               |   2 -
 flang/lib/Parser/executable-parsers.cpp       |   1 -
 flang/lib/Parser/openacc-parsers.cpp          | 282 --------
 flang/lib/Parser/openmp-parsers.cpp           |   4 +
 flang/lib/Parser/parsing.cpp                  |   3 -
 flang/lib/Parser/program-parsers.cpp          |  16 +-
 flang/lib/Parser/stmt-parser.h                |   1 -
 flang/lib/Parser/token-parsers.h              |   5 -
 flang/lib/Parser/type-parsers.h               |   2 -
 flang/lib/Parser/unparse.cpp                  | 379 +----------
 flang/lib/Semantics/resolve-names.cpp         |   3 +-
 flang/test/Semantics/acc-validity.f90         | 169 -----
 flang/tools/f18-parse-demo/CMakeLists.txt     |   1 -
 flang/tools/f18/CMakeLists.txt                |   3 +-
 flang/tools/f18/f18.cpp                       |   3 -
 llvm/include/llvm/CMakeLists.txt              |   2 +-
 llvm/include/llvm/Frontend/CMakeLists.txt     |   2 -
 .../llvm/Frontend/Directive/DirectiveBase.td  |   3 -
 llvm/include/llvm/Frontend/OpenACC/ACC.td     | 604 ------------------
 .../llvm/Frontend/OpenACC/CMakeLists.txt      |   4 -
 llvm/lib/Frontend/CMakeLists.txt              |   1 -
 llvm/lib/Frontend/OpenACC/CMakeLists.txt      |  18 -
 25 files changed, 18 insertions(+), 1880 deletions(-)
 delete mode 100644 flang/lib/Parser/openacc-parsers.cpp
 delete mode 100644 flang/test/Semantics/acc-validity.f90
 delete mode 100644 llvm/include/llvm/Frontend/CMakeLists.txt
 delete mode 100644 llvm/include/llvm/Frontend/OpenACC/ACC.td
 delete mode 100644 llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
 delete mode 100644 llvm/lib/Frontend/OpenACC/CMakeLists.txt

diff --git a/flang/include/flang/Common/Fortran-features.h b/flang/include/flang/Common/Fortran-features.h
index 613aa69cc5d61..823fa85ad12e2 100644
--- a/flang/include/flang/Common/Fortran-features.h
+++ b/flang/include/flang/Common/Fortran-features.h
@@ -24,7 +24,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
     OldStyleParameter, ComplexConstructor, PercentLOC, SignedPrimary, FileName,
     Convert, Dispose, IOListLeadingComma, AbbreviatedEditDescriptor,
     ProgramParentheses, PercentRefAndVal, OmitFunctionDummies, CrayPointer,
-    Hollerith, ArithmeticIF, Assign, AssignedGOTO, Pause, OpenACC, OpenMP,
+    Hollerith, ArithmeticIF, Assign, AssignedGOTO, Pause, OpenMP,
     CruftAfterAmpersand, ClassicCComments, AdditionalFormats, BigIntLiterals,
     RealDoControls, EquivalenceNumericWithCharacter, AdditionalIntrinsics,
     AnonymousParents, OldLabelDoEndStatements, LogicalIntegerAssignment,
@@ -37,7 +37,6 @@ class LanguageFeatureControl {
   LanguageFeatureControl() {
     // These features must be explicitly enabled by command line options.
     disable_.set(LanguageFeature::OldDebugLines);
-    disable_.set(LanguageFeature::OpenACC);
     disable_.set(LanguageFeature::OpenMP);
     // These features, if enabled, conflict with valid standard usage,
     // so there are disabled here by default.
@@ -51,9 +50,7 @@ class LanguageFeatureControl {
   void WarnOnAllNonstandard(bool yes = true) { warnAll_ = yes; }
   bool IsEnabled(LanguageFeature f) const { return !disable_.test(f); }
   bool ShouldWarn(LanguageFeature f) const {
-    return (warnAll_ && f != LanguageFeature::OpenMP &&
-               f != LanguageFeature::OpenACC) ||
-        warn_.test(f);
+    return (warnAll_ && f != LanguageFeature::OpenMP) || warn_.test(f);
   }
   // Return all spellings of operators names, depending on features enabled
   std::vector<const char *> GetNames(LogicalOperator) const;
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 36e593eb3b781..59333c7405ffa 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -53,88 +53,6 @@ class ParseTreeDumper {
   NODE(format, IntrinsicTypeDataEditDesc)
   NODE(format::IntrinsicTypeDataEditDesc, Kind)
   NODE(parser, Abstract)
-  NODE(parser, AccAtomicCapture)
-  NODE(AccAtomicCapture, Stmt1)
-  NODE(AccAtomicCapture, Stmt2)
-  NODE(parser, AccAtomicRead)
-  NODE(parser, AccAtomicUpdate)
-  NODE(parser, AccAtomicWrite)
-  NODE(parser, AccBeginBlockDirective)
-  NODE(parser, AccBeginCombinedDirective)
-  NODE(parser, AccBeginLoopDirective)
-  NODE(parser, AccBlockDirective)
-  NODE(parser, AccClause)
-  NODE(AccClause, Auto)
-  NODE(AccClause, Async)
-  NODE(AccClause, Attach)
-  NODE(AccClause, Bind)
-  NODE(AccClause, Capture)
-  NODE(AccClause, Collapse)
-  NODE(AccClause, Copy)
-  NODE(AccClause, Copyin)
-  NODE(AccClause, Copyout)
-  NODE(AccClause, Create)
-  NODE(AccClause, Default)
-  NODE(AccClause, DefaultAsync)
-  NODE(AccClause, Delete)
-  NODE(AccClause, Detach)
-  NODE(AccClause, Device)
-  NODE(AccClause, DeviceNum)
-  NODE(AccClause, DevicePtr)
-  NODE(AccClause, DeviceResident)
-  NODE(AccClause, DeviceType)
-  NODE(AccClause, Finalize)
-  NODE(AccClause, FirstPrivate)
-  NODE(AccClause, Gang)
-  NODE(AccClause, Host)
-  NODE(AccClause, If)
-  NODE(AccClause, IfPresent)
-  NODE(AccClause, Independent)
-  NODE(AccClause, Link)
-  NODE(AccClause, NoCreate)
-  NODE(AccClause, NoHost)
-  NODE(AccClause, NumGangs)
-  NODE(AccClause, NumWorkers)
-  NODE(AccClause, Present)
-  NODE(AccClause, Private)
-  NODE(AccClause, Tile)
-  NODE(AccClause, UseDevice)
-  NODE(AccClause, Read)
-  NODE(AccClause, Reduction)
-  NODE(AccClause, Self)
-  NODE(AccClause, Seq)
-  NODE(AccClause, Vector)
-  NODE(AccClause, VectorLength)
-  NODE(AccClause, Wait)
-  NODE(AccClause, Worker)
-  NODE(AccClause, Write)
-  NODE(AccClause, Unknown)
-  NODE(parser, AccDefaultClause)
-  NODE_ENUM(parser::AccDefaultClause, Arg)
-  NODE(parser, AccClauseList)
-  NODE(parser, AccCombinedDirective)
-  NODE(parser, AccDataModifier)
-  NODE_ENUM(parser::AccDataModifier, Modifier)
-  NODE(parser, AccDeclarativeDirective)
-  NODE(parser, AccEndAtomic)
-  NODE(parser, AccEndBlockDirective)
-  NODE(parser, AccEndCombinedDirective)
-  NODE(parser, AccGangArgument)
-  NODE(parser, AccObject)
-  NODE(parser, AccObjectList)
-  NODE(parser, AccObjectListWithModifier)
-  NODE(parser, AccObjectListWithReduction)
-  NODE(parser, AccReductionOperator)
-  NODE(parser, AccSizeExpr)
-  NODE(parser, AccSizeExprList)
-  NODE(parser, AccStandaloneDirective)
-  NODE(parser, AccLoopDirective)
-  NODE(parser, AccWaitArgument)
-  static std::string GetNodeName(const llvm::acc::Directive &x) {
-    return llvm::Twine(
-        "llvm::acc::Directive = ", llvm::acc::getOpenACCDirectiveName(x))
-        .str();
-  }
   NODE(parser, AcImpliedDo)
   NODE(parser, AcImpliedDoControl)
   NODE(parser, AcValue)
@@ -592,17 +510,6 @@ class ParseTreeDumper {
   NODE(parser, OmpSectionsDirective)
   NODE(parser, OmpSimpleStandaloneDirective)
   NODE(parser, Only)
-  NODE(parser, OpenACCAtomicConstruct)
-  NODE(parser, OpenACCBlockConstruct)
-  NODE(parser, OpenACCCacheConstruct)
-  NODE(parser, OpenACCCombinedConstruct)
-  NODE(parser, OpenACCConstruct)
-  NODE(parser, OpenACCDeclarativeConstruct)
-  NODE(parser, OpenACCLoopConstruct)
-  NODE(parser, OpenACCRoutineConstruct)
-  NODE(parser, OpenACCStandaloneDeclarativeConstruct)
-  NODE(parser, OpenACCStandaloneConstruct)
-  NODE(parser, OpenACCWaitConstruct)
   NODE(parser, OpenMPAtomicConstruct)
   NODE(parser, OpenMPBlockConstruct)
   NODE(parser, OpenMPCancelConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index d9ecebfc3fdda..67fd5741b0975 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -25,7 +25,6 @@
 #include "flang/Common/Fortran.h"
 #include "flang/Common/idioms.h"
 #include "flang/Common/indirection.h"
-#include "llvm/Frontend/OpenACC/ACC.h.inc"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include <cinttypes>
 #include <list>
@@ -257,8 +256,6 @@ struct ArithmeticIfStmt;
 struct AssignStmt;
 struct AssignedGotoStmt;
 struct PauseStmt;
-struct OpenACCConstruct;
-struct OpenACCDeclarativeConstruct;
 struct OpenMPConstruct;
 struct OpenMPDeclarativeConstruct;
 struct OmpEndLoopDirective;
@@ -389,7 +386,6 @@ struct SpecificationConstruct {
       Statement<OtherSpecificationStmt>,
       Statement<common::Indirection<TypeDeclarationStmt>>,
       common::Indirection<StructureDef>,
-      common::Indirection<OpenACCDeclarativeConstruct>,
       common::Indirection<OpenMPDeclarativeConstruct>,
       common::Indirection<CompilerDirective>>
       u;
@@ -428,8 +424,7 @@ struct DeclarationConstruct {
 // from the implicit part to the declaration constructs
 struct SpecificationPart {
   TUPLE_CLASS_BOILERPLATE(SpecificationPart);
-  std::tuple<std::list<OpenACCDeclarativeConstruct>,
-      std::list<OpenMPDeclarativeConstruct>,
+  std::tuple<std::list<OpenMPDeclarativeConstruct>,
       std::list<Statement<common::Indirection<UseStmt>>>,
       std::list<Statement<common::Indirection<ImportStmt>>>, ImplicitPart,
       std::list<DeclarationConstruct>>
@@ -514,7 +509,6 @@ struct ExecutableConstruct {
       common::Indirection<SelectTypeConstruct>,
       common::Indirection<WhereConstruct>, common::Indirection<ForallConstruct>,
       common::Indirection<CompilerDirective>,
-      common::Indirection<OpenACCConstruct>,
       common::Indirection<OpenMPConstruct>,
       common::Indirection<OmpEndLoopDirective>>
       u;
@@ -3795,287 +3789,5 @@ struct OpenMPConstruct {
       OpenMPCriticalConstruct>
       u;
 };
-
-// Parse tree nodes for OpenACC 3.0 directives and clauses
-
-struct AccObject {
-  UNION_CLASS_BOILERPLATE(AccObject);
-  std::variant<Designator, /*common block*/ Name> u;
-};
-
-WRAPPER_CLASS(AccObjectList, std::list<AccObject>);
-
-// OpenACC directive beginning or ending a block
-struct AccBlockDirective {
-  WRAPPER_CLASS_BOILERPLATE(AccBlockDirective, llvm::acc::Directive);
-  CharBlock source;
-};
-
-struct AccLoopDirective {
-  WRAPPER_CLASS_BOILERPLATE(AccLoopDirective, llvm::acc::Directive);
-  CharBlock source;
-};
-
-struct AccStandaloneDirective {
-  WRAPPER_CLASS_BOILERPLATE(AccStandaloneDirective, llvm::acc::Directive);
-  CharBlock source;
-};
-
-// 2.11 Combined constructs
-struct AccCombinedDirective {
-  WRAPPER_CLASS_BOILERPLATE(AccCombinedDirective, llvm::acc::Directive);
-  CharBlock source;
-};
-
-struct AccDeclarativeDirective {
-  WRAPPER_CLASS_BOILERPLATE(AccDeclarativeDirective, llvm::acc::Directive);
-  CharBlock source;
-};
-
-// OpenACC Clauses
-struct AccDefaultClause {
-  ENUM_CLASS(Arg, None, Present)
-  WRAPPER_CLASS_BOILERPLATE(AccDefaultClause, Arg);
-  CharBlock source;
-};
-
-struct AccDataModifier {
-  ENUM_CLASS(Modifier, ReadOnly, Zero)
-  WRAPPER_CLASS_BOILERPLATE(AccDataModifier, Modifier);
-  CharBlock source;
-};
-
-struct AccObjectListWithModifier {
-  TUPLE_CLASS_BOILERPLATE(AccObjectListWithModifier);
-  std::tuple<std::optional<AccDataModifier>, AccObjectList> t;
-};
-
-// 2.5.13: + | * | max | min | iand | ior | ieor | .and. | .or. | .eqv. | .neqv.
-struct AccReductionOperator {
-  UNION_CLASS_BOILERPLATE(AccReductionOperator);
-  std::variant<DefinedOperator, ProcedureDesignator> u;
-};
-
-struct AccObjectListWithReduction {
-  TUPLE_CLASS_BOILERPLATE(AccObjectListWithReduction);
-  std::tuple<AccReductionOperator, AccObjectList> t;
-};
-
-struct AccWaitArgument {
-  TUPLE_CLASS_BOILERPLATE(AccWaitArgument);
-  std::tuple<std::optional<ScalarIntExpr>, std::list<ScalarIntExpr>> t;
-};
-
-struct AccSizeExpr {
-  TUPLE_CLASS_BOILERPLATE(AccSizeExpr);
-  CharBlock source;
-  std::tuple<std::optional<ScalarIntExpr>> t; // if null then *
-};
-
-struct AccSizeExprList {
-  WRAPPER_CLASS_BOILERPLATE(AccSizeExprList, std::list<AccSizeExpr>);
-};
-
-struct AccGangArgument {
-  TUPLE_CLASS_BOILERPLATE(AccGangArgument);
-  std::tuple<std::optional<ScalarIntExpr>, std::optional<AccSizeExpr>> t;
-};
-
-struct AccClause {
-  UNION_CLASS_BOILERPLATE(AccClause);
-
-  EMPTY_CLASS(Auto);
-  WRAPPER_CLASS(Async, std::optional<ScalarIntExpr>);
-  WRAPPER_CLASS(Attach, AccObjectList);
-  WRAPPER_CLASS(Bind, Name);
-  EMPTY_CLASS(Capture);
-  WRAPPER_CLASS(Collapse, ScalarIntConstantExpr);
-  WRAPPER_CLASS(Copy, AccObjectList);
-  WRAPPER_CLASS(Copyin, AccObjectListWithModifier);
-  WRAPPER_CLASS(Copyout, AccObjectListWithModifier);
-  WRAPPER_CLASS(Create, AccObjectListWithModifier);
-  WRAPPER_CLASS(Default, AccDefaultClause);
-  WRAPPER_CLASS(DefaultAsync, ScalarIntExpr);
-  WRAPPER_CLASS(Delete, AccObjectList);
-  WRAPPER_CLASS(Detach, AccObjectList);
-  WRAPPER_CLASS(Device, AccObjectList);
-  WRAPPER_CLASS(DeviceNum, ScalarIntConstantExpr);
-  WRAPPER_CLASS(DevicePtr, AccObjectList);
-  WRAPPER_CLASS(DeviceResident, AccObjectList);
-  WRAPPER_CLASS(DeviceType, std::optional<std::list<Name>>);
-  EMPTY_CLASS(Finalize);
-  WRAPPER_CLASS(FirstPrivate, AccObjectList);
-  WRAPPER_CLASS(Gang, std::optional<AccGangArgument>);
-  WRAPPER_CLASS(Host, AccObjectList);
-  WRAPPER_CLASS(If, ScalarLogicalExpr);
-  EMPTY_CLASS(IfPresent);
-  EMPTY_CLASS(Independent);
-  WRAPPER_CLASS(Link, AccObjectList);
-  WRAPPER_CLASS(NoCreate, AccObjectList);
-  EMPTY_CLASS(NoHost);
-  WRAPPER_CLASS(NumGangs, ScalarIntExpr);
-  WRAPPER_CLASS(NumWorkers, ScalarIntExpr);
-  WRAPPER_CLASS(Present, AccObjectList);
-  WRAPPER_CLASS(Private, AccObjectList);
-  WRAPPER_CLASS(Tile, AccSizeExprList);
-  WRAPPER_CLASS(UseDevice, AccObjectList);
-  EMPTY_CLASS(Read);
-  WRAPPER_CLASS(Reduction, AccObjectListWithReduction);
-  WRAPPER_CLASS(Self, std::optional<ScalarLogicalExpr>);
-  EMPTY_CLASS(Seq);
-  WRAPPER_CLASS(Vector, std::optional<ScalarIntExpr>);
-  WRAPPER_CLASS(VectorLength, ScalarIntExpr);
-  WRAPPER_CLASS(Wait, std::optional<AccWaitArgument>);
-  WRAPPER_CLASS(Worker, std::optional<ScalarIntExpr>);
-  EMPTY_CLASS(Write);
-  EMPTY_CLASS(Unknown);
-
-  CharBlock source;
-
-  std::variant<Auto, Async, Attach, Bind, Capture, Collapse, Copy, Copyin,
-      Copyout, Create, Default, DefaultAsync, Delete, Detach, Device, DeviceNum,
-      DevicePtr, DeviceResident, DeviceType, Finalize, FirstPrivate, Gang, Host,
-      If, IfPresent, Independent, Link, NoCreate, NoHost, NumGangs, NumWorkers,
-      Present, Private, Tile, UseDevice, Read, Reduction, Self, Seq, Vector,
-      VectorLength, Wait, Worker, Write, Unknown>
-      u;
-};
-
-struct AccClauseList {
-  WRAPPER_CLASS_BOILERPLATE(AccClauseList, std::list<AccClause>);
-  CharBlock source;
-};
-
-struct OpenACCRoutineConstruct {
-  TUPLE_CLASS_BOILERPLATE(OpenACCRoutineConstruct);
-  CharBlock source;
-  std::tuple<Verbatim, std::optional<Name>, AccClauseList> t;
-};
-
-struct OpenACCCacheConstruct {
-  TUPLE_CLASS_BOILERPLATE(OpenACCCacheConstruct);
-  CharBlock source;
-  std::tuple<Verbatim, AccObjectListWithModifier> t;
-};
-
-struct OpenACCWaitConstruct {
-  TUPLE_CLASS_BOILERPLATE(OpenACCWaitConstruct);
-  CharBlock source;
-  std::tuple<Verbatim, std::optional<AccWaitArgument>, AccClauseList> t;
-};
-
-struct AccBeginLoopDirective {
-  TUPLE_CLASS_BOILERPLATE(AccBeginLoopDirective);
-  std::tuple<AccLoopDirective, AccClauseList> t;
-  CharBlock source;
-};
-
-struct AccBeginBlockDirective {
-  TUPLE_CLASS_BOILERPLATE(AccBeginBlockDirective);
-  CharBlock source;
-  std::tuple<AccBlockDirective, AccClauseList> t;
-};
-
-struct AccEndBlockDirective {
-  CharBlock source;
-  WRAPPER_CLASS_BOILERPLATE(AccEndBlockDirective, AccBlockDirective);
-};
-
-// ACC END ATOMIC
-EMPTY_CLASS(AccEndAtomic);
-
-// ACC ATOMIC READ
-struct AccAtomicRead {
-  TUPLE_CLASS_BOILERPLATE(AccAtomicRead);
-  std::tuple<Verbatim, Statement<AssignmentStmt>, std::optional<AccEndAtomic>>
-      t;
-};
-
-// ACC ATOMIC WRITE
-struct AccAtomicWrite {
-  TUPLE_CLASS_BOILERPLATE(AccAtomicWrite);
-  std::tuple<Verbatim, Statement<AssignmentStmt>, std::optional<AccEndAtomic>>
-      t;
-};
-
-// ACC ATOMIC UPDATE
-struct AccAtomicUpdate {
-  TUPLE_CLASS_BOILERPLATE(AccAtomicUpdate);
-  std::tuple<std::optional<Verbatim>, Statement<AssignmentStmt>,
-      std::optional<AccEndAtomic>>
-      t;
-};
-
-// ACC ATOMIC CAPTURE
-struct AccAtomicCapture {
-  TUPLE_CLASS_BOILERPLATE(AccAtomicCapture);
-  WRAPPER_CLASS(Stmt1, Statement<AssignmentStmt>);
-  WRAPPER_CLASS(Stmt2, Statement<AssignmentStmt>);
-  std::tuple<Verbatim, Stmt1, Stmt2, AccEndAtomic> t;
-};
-
-struct OpenACCAtomicConstruct {
-  UNION_CLASS_BOILERPLATE(OpenACCAtomicConstruct);
-  std::variant<AccAtomicRead, AccAtomicWrite, AccAtomicCapture, AccAtomicUpdate>
-      u;
-};
-
-struct OpenACCBlockConstruct {
-  TUPLE_CLASS_BOILERPLATE(OpenACCBlockConstruct);
-  std::tuple<AccBeginBlockDirective, Block, AccEndBlockDirective> t;
-};
-
-struct OpenACCStandaloneDeclarativeConstruct {
-  TUPLE_CLASS_BOILERPLATE(OpenACCStandaloneDeclarativeConstruct);
-  CharBlock source;
-  std::tuple<AccDeclarativeDirective, AccClauseList> t;
-};
-
-struct AccBeginCombinedDirective {
-  TUPLE_CLASS_BOILERPLATE(AccBeginCombinedDirective);
-  std::tuple<AccCombinedDirective, AccClauseList> t;
-};
-
-struct AccEndCombinedDirective {
-  WRAPPER_CLASS_BOILERPLATE(AccEndCombinedDirective, AccCombinedDirective);
-  CharBlock source;
-};
-
-struct OpenACCCombinedConstruct {
-  TUPLE_CLASS_BOILERPLATE(OpenACCCombinedConstruct);
-  CharBlock source;
-  std::tuple<AccBeginCombinedDirective, Block,
-      std::optional<AccEndCombinedDirective>>
-      t;
-};
-
-struct OpenACCDeclarativeConstruct {
-  UNION_CLASS_BOILERPLATE(OpenACCDeclarativeConstruct);
-  CharBlock source;
-  std::variant<OpenACCStandaloneDeclarativeConstruct> u;
-};
-
-// OpenACC directives enclosing do loop
-struct OpenACCLoopConstruct {
-  TUPLE_CLASS_BOILERPLATE(OpenACCLoopConstruct);
-  OpenACCLoopConstruct(AccBeginLoopDirective &&a)
-      : t({std::move(a), std::nullopt}) {}
-  std::tuple<AccBeginLoopDirective, std::optional<DoConstruct>> t;
-};
-
-struct OpenACCStandaloneConstruct {
-  TUPLE_CLASS_BOILERPLATE(OpenACCStandaloneConstruct);
-  CharBlock source;
-  std::tuple<AccStandaloneDirective, AccClauseList> t;
-};
-
-struct OpenACCConstruct {
-  UNION_CLASS_BOILERPLATE(OpenACCConstruct);
-  std::variant<OpenACCBlockConstruct, OpenACCCombinedConstruct,
-      OpenACCLoopConstruct, OpenACCStandaloneConstruct, OpenACCRoutineConstruct,
-      OpenACCCacheConstruct, OpenACCWaitConstruct, OpenACCAtomicConstruct>
-      u;
-};
-
 } // namespace Fortran::parser
 #endif // FORTRAN_PARSER_PARSE_TREE_H_
diff --git a/flang/lib/Parser/CMakeLists.txt b/flang/lib/Parser/CMakeLists.txt
index e1e77ac6e92df..eb5126e1b937e 100644
--- a/flang/lib/Parser/CMakeLists.txt
+++ b/flang/lib/Parser/CMakeLists.txt
@@ -11,7 +11,6 @@ add_flang_library(FortranParser
   instrumented-parser.cpp
   io-parsers.cpp
   message.cpp
-  openacc-parsers.cpp
   openmp-parsers.cpp
   parse-tree.cpp
   parsing.cpp
@@ -33,5 +32,4 @@ add_flang_library(FortranParser
 
   DEPENDS
   omp_gen
-  acc_gen
 )
diff --git a/flang/lib/Parser/executable-parsers.cpp b/flang/lib/Parser/executable-parsers.cpp
index d6dd4688dbac1..160b2dc376a48 100644
--- a/flang/lib/Parser/executable-parsers.cpp
+++ b/flang/lib/Parser/executable-parsers.cpp
@@ -50,7 +50,6 @@ constexpr auto executableConstruct{
         construct<ExecutableConstruct>(indirect(whereConstruct)),
         construct<ExecutableConstruct>(indirect(forallConstruct)),
         construct<ExecutableConstruct>(indirect(ompEndLoopDirective)),
-        construct<ExecutableConstruct>(indirect(openaccConstruct)),
         construct<ExecutableConstruct>(indirect(openmpConstruct)),
         construct<ExecutableConstruct>(indirect(compilerDirective)))};
 
diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp
deleted file mode 100644
index de55ea27f8228..0000000000000
--- a/flang/lib/Parser/openacc-parsers.cpp
+++ /dev/null
@@ -1,282 +0,0 @@
-//===-- lib/Parser/openacc-parsers.cpp ------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-// Top-level grammar specification for OpenACC 3.0.
-
-#include "basic-parsers.h"
-#include "expr-parsers.h"
-#include "misc-parsers.h"
-#include "stmt-parser.h"
-#include "token-parsers.h"
-#include "type-parser-implementation.h"
-#include "flang/Parser/parse-tree.h"
-
-// OpenACC Directives and Clauses
-namespace Fortran::parser {
-
-constexpr auto startAccLine = skipStuffBeforeStatement >> "!$ACC "_sptok;
-constexpr auto endAccLine = space >> endOfLine;
-
-// Basic clauses
-TYPE_PARSER("AUTO" >> construct<AccClause>(construct<AccClause::Auto>()) ||
-    "ASYNC" >> construct<AccClause>(construct<AccClause::Async>(
-                   maybe(parenthesized(scalarIntExpr)))) ||
-    "ATTACH" >> construct<AccClause>(construct<AccClause::Attach>(
-                    parenthesized(Parser<AccObjectList>{}))) ||
-    "BIND" >> construct<AccClause>(construct<AccClause::Bind>(
-        parenthesized(name))) ||
-    "CAPTURE" >> construct<AccClause>(construct<AccClause::Capture>()) ||
-    "COLLAPSE" >> construct<AccClause>(construct<AccClause::Collapse>(
-                      parenthesized(scalarIntConstantExpr))) ||
-    ("COPY"_tok || "PRESENT_OR_COPY"_tok || "PCOPY"_tok) >>
-        construct<AccClause>(construct<AccClause::Copy>(
-            parenthesized(Parser<AccObjectList>{}))) ||
-    ("COPYIN"_tok || "PRESENT_OR_COPYIN"_tok || "PCOPYIN"_tok) >>
-        construct<AccClause>(construct<AccClause::Copyin>(
-            parenthesized(Parser<AccObjectListWithModifier>{}))) ||
-    ("COPYOUT"_tok || "PRESENT_OR_COPYOUT"_tok || "PCOPYOUT"_tok) >>
-        construct<AccClause>(construct<AccClause::Copyout>(
-            parenthesized(Parser<AccObjectListWithModifier>{}))) ||
-    ("CREATE"_tok || "PRESENT_OR_CREATE"_tok || "PCREATE"_tok) >>
-        construct<AccClause>(construct<AccClause::Create>(
-            parenthesized(Parser<AccObjectListWithModifier>{}))) ||
-    "DEFAULT" >> construct<AccClause>(construct<AccClause::Default>(
-                     Parser<AccDefaultClause>{})) ||
-    "DEFAULT_ASYNC" >> construct<AccClause>(construct<AccClause::DefaultAsync>(
-                           parenthesized(scalarIntExpr))) ||
-    "DELETE" >> construct<AccClause>(construct<AccClause::Delete>(
-                    parenthesized(Parser<AccObjectList>{}))) ||
-    "DETACH" >> construct<AccClause>(construct<AccClause::Detach>(
-                    parenthesized(Parser<AccObjectList>{}))) ||
-    "DEVICE" >> construct<AccClause>(construct<AccClause::Device>(
-                    parenthesized(Parser<AccObjectList>{}))) ||
-    "DEVICEPTR" >> construct<AccClause>(construct<AccClause::DevicePtr>(
-                       parenthesized(Parser<AccObjectList>{}))) ||
-    "DEVICENUM" >> construct<AccClause>(construct<AccClause::DeviceNum>(
-                       parenthesized(scalarIntConstantExpr))) ||
-    "DEVICE_RESIDENT" >>
-        construct<AccClause>(construct<AccClause::DeviceResident>(
-            parenthesized(Parser<AccObjectList>{}))) ||
-    ("DEVICE_TYPE"_tok || "DTYPE"_tok) >> construct<AccClause>(
-        construct<AccClause::DeviceType>(parenthesized(
-            "*" >> construct<std::optional<std::list<Name>>>()))) ||
-    ("DEVICE_TYPE"_tok || "DTYPE"_tok) >> construct<AccClause>(
-        construct<AccClause::DeviceType>(
-            parenthesized(maybe(nonemptyList(name))))) ||
-    "FINALIZE" >> construct<AccClause>(construct<AccClause::Finalize>()) ||
-    "FIRSTPRIVATE" >> construct<AccClause>(construct<AccClause::FirstPrivate>(
-                          parenthesized(Parser<AccObjectList>{}))) ||
-    "GANG" >> construct<AccClause>(construct<AccClause::Gang>(
-                  maybe(parenthesized(Parser<AccGangArgument>{})))) ||
-    "HOST" >> construct<AccClause>(construct<AccClause::Host>(
-                  parenthesized(Parser<AccObjectList>{}))) ||
-    "IF" >> construct<AccClause>(
-                construct<AccClause::If>(parenthesized(scalarLogicalExpr))) ||
-    "IF_PRESENT" >> construct<AccClause>(construct<AccClause::IfPresent>()) ||
-    "INDEPENDENT" >> construct<AccClause>(
-        construct<AccClause::Independent>()) ||
-    "LINK" >> construct<AccClause>(construct<AccClause::Link>(
-                  parenthesized(Parser<AccObjectList>{}))) ||
-    "NO_CREATE" >> construct<AccClause>(construct<AccClause::NoCreate>(
-                       parenthesized(Parser<AccObjectList>{}))) ||
-    "NOHOST" >> construct<AccClause>(construct<AccClause::NoHost>()) ||
-    "NUM_GANGS" >> construct<AccClause>(construct<AccClause::NumGangs>(
-                       parenthesized(scalarIntExpr))) ||
-    "NUM_WORKERS" >> construct<AccClause>(construct<AccClause::NumWorkers>(
-                         parenthesized(scalarIntExpr))) ||
-    "PRESENT" >> construct<AccClause>(construct<AccClause::Present>(
-                     parenthesized(Parser<AccObjectList>{}))) ||
-    "PRIVATE" >> construct<AccClause>(construct<AccClause::Private>(
-                     parenthesized(Parser<AccObjectList>{}))) ||
-    "READ" >> construct<AccClause>(construct<AccClause::Read>()) ||
-    "REDUCTION" >> construct<AccClause>(construct<AccClause::Reduction>(
-                       parenthesized(construct<AccObjectListWithReduction>(
-                           Parser<AccReductionOperator>{} / ":",
-                           Parser<AccObjectList>{})))) ||
-    "SELF" >> construct<AccClause>(construct<AccClause::Self>(
-                  maybe(parenthesized(scalarLogicalExpr)))) ||
-    "SEQ" >> construct<AccClause>(construct<AccClause::Seq>()) ||
-    "TILE" >> construct<AccClause>(construct<AccClause::Tile>(
-                  parenthesized(Parser<AccSizeExprList>{}))) ||
-    "USE_DEVICE" >> construct<AccClause>(construct<AccClause::UseDevice>(
-                        parenthesized(Parser<AccObjectList>{}))) ||
-    "VECTOR_LENGTH" >> construct<AccClause>(construct<AccClause::VectorLength>(
-                           parenthesized(scalarIntExpr))) ||
-    "VECTOR" >> construct<AccClause>(construct<AccClause::Vector>(maybe(
-            parenthesized(("LENGTH:" >> scalarIntExpr || scalarIntExpr))))) ||
-    "WAIT" >> construct<AccClause>(construct<AccClause::Wait>(
-                  maybe(Parser<AccWaitArgument>{}))) ||
-    "WORKER" >> construct<AccClause>(construct<AccClause::Worker>(maybe(
-            parenthesized(("NUM:" >> scalarIntExpr || scalarIntExpr))))) ||
-    "WRITE" >> construct<AccClause>(construct<AccClause::Auto>()))
-
-TYPE_PARSER(
-    construct<AccObject>(designator) || construct<AccObject>("/" >> name / "/"))
-
-TYPE_PARSER(construct<AccObjectList>(nonemptyList(Parser<AccObject>{})))
-
-TYPE_PARSER(construct<AccObjectListWithModifier>(
-    maybe(Parser<AccDataModifier>{}), Parser<AccObjectList>{}))
-
-TYPE_PARSER(construct<AccWaitArgument>(
-    maybe("DEVNUM:" >> scalarIntExpr / ":"), nonemptyList(scalarIntExpr)))
-
-// 2.9 (1609) size-expr is one of:
-//   int-expr
-TYPE_PARSER(construct<AccSizeExpr>(scalarIntExpr) ||
-    construct<AccSizeExpr>("*" >> maybe(scalarIntExpr)))
-TYPE_PARSER(construct<AccSizeExprList>(nonemptyList(Parser<AccSizeExpr>{})))
-
-// 2.9 (1607) gang-arg is one of:
-//   [num:]int-expr
-//   static:size-expr
-TYPE_PARSER(construct<AccGangArgument>(maybe(scalarIntExpr),
-                maybe(","_tok / "STATIC:" >> Parser<AccSizeExpr>{})) ||
-    construct<AccGangArgument>(maybe("NUM:" >> scalarIntExpr),
-        maybe(","_tok / "STATIC:" >> Parser<AccSizeExpr>{})))
-
-// 2.5.13 Reduction
-TYPE_PARSER(construct<AccReductionOperator>(Parser<DefinedOperator>{}) ||
-    construct<AccReductionOperator>(Parser<ProcedureDesignator>{}))
-
-// 2.5.14 Default clause
-TYPE_PARSER(construct<AccDefaultClause>(
-    parenthesized(first("NONE" >> pure(AccDefaultClause::Arg::None),
-        "PRESENT" >> pure(AccDefaultClause::Arg::Present)))))
-
-// Modifier for copyin, copyout, cache and create
-TYPE_PARSER(construct<AccDataModifier>(
-    first("ZERO:" >> pure(AccDataModifier::Modifier::Zero),
-        "READONLY:" >> pure(AccDataModifier::Modifier::ReadOnly))))
-
-// Combined directives
-TYPE_PARSER(sourced(construct<AccCombinedDirective>(
-    first("KERNELS LOOP" >> pure(llvm::acc::Directive::ACCD_kernels_loop),
-        "PARALLEL LOOP" >> pure(llvm::acc::Directive::ACCD_parallel_loop),
-        "SERIAL LOOP" >> pure(llvm::acc::Directive::ACCD_serial_loop)))))
-
-// Block directives
-TYPE_PARSER(sourced(construct<AccBlockDirective>(
-    first("DATA" >> pure(llvm::acc::Directive::ACCD_data),
-        "HOST_DATA" >> pure(llvm::acc::Directive::ACCD_host_data),
-        "KERNELS" >> pure(llvm::acc::Directive::ACCD_kernels),
-        "PARALLEL" >> pure(llvm::acc::Directive::ACCD_parallel),
-        "SERIAL" >> pure(llvm::acc::Directive::ACCD_serial)))))
-
-// Standalone directives
-TYPE_PARSER(sourced(construct<AccStandaloneDirective>(
-    first("ENTER DATA" >> pure(llvm::acc::Directive::ACCD_enter_data),
-        "EXIT DATA" >> pure(llvm::acc::Directive::ACCD_exit_data),
-        "INIT" >> pure(llvm::acc::Directive::ACCD_init),
-        "SHUTDOWN" >> pure(llvm::acc::Directive::ACCD_shutdown),
-        "SET" >> pure(llvm::acc::Directive::ACCD_set),
-        "UPDATE" >> pure(llvm::acc::Directive::ACCD_update)))))
-
-// Loop directives
-TYPE_PARSER(sourced(construct<AccLoopDirective>(
-    first("LOOP" >> pure(llvm::acc::Directive::ACCD_loop)))))
-
-TYPE_PARSER(construct<AccBeginLoopDirective>(
-    sourced(Parser<AccLoopDirective>{}), Parser<AccClauseList>{}))
-
-TYPE_PARSER(
-    construct<OpenACCLoopConstruct>(sourced(Parser<AccBeginLoopDirective>{})))
-
-// 2.15.1 Routine directive
-TYPE_PARSER(sourced(construct<OpenACCRoutineConstruct>(verbatim("ROUTINE"_tok),
-    maybe(parenthesized(name)), Parser<AccClauseList>{})))
-
-// 2.10 Cache directive
-TYPE_PARSER(sourced(
-    construct<OpenACCCacheConstruct>(sourced(construct<Verbatim>("CACHE"_tok)),
-        parenthesized(Parser<AccObjectListWithModifier>{}))))
-
-// 2.11 Combined constructs
-TYPE_PARSER(startAccLine >> construct<AccEndCombinedDirective>(sourced(
-                                "END"_tok >> Parser<AccCombinedDirective>{})))
-
-TYPE_PARSER(construct<AccBeginCombinedDirective>(
-    sourced(Parser<AccCombinedDirective>{}), Parser<AccClauseList>{}))
-
-TYPE_PARSER(construct<OpenACCCombinedConstruct>(
-    Parser<AccBeginCombinedDirective>{} / endAccLine, block,
-    maybe(Parser<AccEndCombinedDirective>{} / endAccLine)))
-
-// 2.12 Atomic constructs
-TYPE_PARSER(construct<AccEndAtomic>(startAccLine >> "END ATOMIC"_tok))
-
-TYPE_PARSER("ATOMIC" >>
-    construct<AccAtomicRead>(verbatim("READ"_tok) / endAccLine,
-        statement(assignmentStmt), maybe(Parser<AccEndAtomic>{} / endAccLine)))
-
-TYPE_PARSER("ATOMIC" >>
-    construct<AccAtomicWrite>(verbatim("WRITE"_tok) / endAccLine,
-        statement(assignmentStmt), maybe(Parser<AccEndAtomic>{} / endAccLine)))
-
-TYPE_PARSER("ATOMIC" >>
-    construct<AccAtomicUpdate>(maybe(verbatim("UPDATE"_tok)) / endAccLine,
-        statement(assignmentStmt), maybe(Parser<AccEndAtomic>{} / endAccLine)))
-
-TYPE_PARSER("ATOMIC" >>
-    construct<AccAtomicCapture>(verbatim("CAPTURE"_tok) / endAccLine,
-        statement(assignmentStmt), statement(assignmentStmt),
-        Parser<AccEndAtomic>{} / endAccLine))
-
-TYPE_PARSER(construct<OpenACCAtomicConstruct>(Parser<AccAtomicRead>{}) ||
-    construct<OpenACCAtomicConstruct>(Parser<AccAtomicCapture>{}) ||
-    construct<OpenACCAtomicConstruct>(Parser<AccAtomicWrite>{}) ||
-    construct<OpenACCAtomicConstruct>(Parser<AccAtomicUpdate>{}))
-
-// 2.13 Declare constructs
-TYPE_PARSER(sourced(construct<AccDeclarativeDirective>(
-    first("DECLARE" >> pure(llvm::acc::Directive::ACCD_declare)))))
-
-// [Clause, [Clause], ...]
-TYPE_PARSER(sourced(construct<AccClauseList>(
-    many(maybe(","_tok) >> sourced(Parser<AccClause>{})))))
-
-// 2.16.3 Wait directive
-TYPE_PARSER(sourced(construct<OpenACCWaitConstruct>(
-    sourced(construct<Verbatim>("WAIT"_tok)),
-    maybe(parenthesized(Parser<AccWaitArgument>{})), Parser<AccClauseList>{})))
-
-// Block Constructs
-TYPE_PARSER(sourced(construct<AccBeginBlockDirective>(
-    sourced(Parser<AccBlockDirective>{}), Parser<AccClauseList>{})))
-
-TYPE_PARSER(startAccLine >> sourced(construct<AccEndBlockDirective>("END"_tok >>
-                                sourced(Parser<AccBlockDirective>{}))))
-
-TYPE_PARSER(construct<OpenACCBlockConstruct>(
-    Parser<AccBeginBlockDirective>{} / endAccLine, block,
-    Parser<AccEndBlockDirective>{} / endAccLine))
-
-// Standalone constructs
-TYPE_PARSER(construct<OpenACCStandaloneConstruct>(
-    sourced(Parser<AccStandaloneDirective>{}), Parser<AccClauseList>{}))
-
-// Standalone declarative constructs
-TYPE_PARSER(construct<OpenACCStandaloneDeclarativeConstruct>(
-    sourced(Parser<AccDeclarativeDirective>{}), Parser<AccClauseList>{}))
-
-TYPE_PARSER(
-    startAccLine >> sourced(construct<OpenACCDeclarativeConstruct>(
-                        Parser<OpenACCStandaloneDeclarativeConstruct>{})))
-
-// OpenACC constructs
-TYPE_CONTEXT_PARSER("OpenACC construct"_en_US,
-    startAccLine >>
-        first(construct<OpenACCConstruct>(Parser<OpenACCBlockConstruct>{}),
-            construct<OpenACCConstruct>(Parser<OpenACCCombinedConstruct>{}),
-            construct<OpenACCConstruct>(Parser<OpenACCLoopConstruct>{}),
-            construct<OpenACCConstruct>(Parser<OpenACCStandaloneConstruct>{}),
-            construct<OpenACCConstruct>(Parser<OpenACCRoutineConstruct>{}),
-            construct<OpenACCConstruct>(Parser<OpenACCCacheConstruct>{}),
-            construct<OpenACCConstruct>(Parser<OpenACCWaitConstruct>{}),
-            construct<OpenACCConstruct>(Parser<OpenACCAtomicConstruct>{})))
-} // namespace Fortran::parser
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index a09a5554116fb..41a97ff902d97 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -23,6 +23,10 @@ namespace Fortran::parser {
 constexpr auto startOmpLine = skipStuffBeforeStatement >> "!$OMP "_sptok;
 constexpr auto endOmpLine = space >> endOfLine;
 
+template <typename A> constexpr decltype(auto) verbatim(A x) {
+  return sourced(construct<Verbatim>(x));
+}
+
 // OpenMP Clauses
 // 2.15.3.1 DEFAULT (PRIVATE | FIRSTPRIVATE | SHARED | NONE)
 TYPE_PARSER(construct<OmpDefaultClause>(
diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index d7a7d107878d3..5e12b5545f0ac 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -67,9 +67,6 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
   prescanner.set_fixedForm(options.isFixedForm)
       .set_fixedFormColumnLimit(options.fixedFormColumns)
       .AddCompilerDirectiveSentinel("dir$");
-  if (options.features.IsEnabled(LanguageFeature::OpenACC)) {
-    prescanner.AddCompilerDirectiveSentinel("$acc");
-  }
   if (options.features.IsEnabled(LanguageFeature::OpenMP)) {
     prescanner.AddCompilerDirectiveSentinel("$omp");
     prescanner.AddCompilerDirectiveSentinel("$"); // OMP conditional line
diff --git a/flang/lib/Parser/program-parsers.cpp b/flang/lib/Parser/program-parsers.cpp
index 9e18c458ea3cb..fc2c7c324eb60 100644
--- a/flang/lib/Parser/program-parsers.cpp
+++ b/flang/lib/Parser/program-parsers.cpp
@@ -60,8 +60,7 @@ TYPE_PARSER(construct<ProgramUnit>(indirect(Parser<Module>{})) ||
 //         [use-stmt]... [import-stmt]... [implicit-part]
 //         [declaration-construct]...
 TYPE_CONTEXT_PARSER("specification part"_en_US,
-    construct<SpecificationPart>(many(openaccDeclarativeConstruct),
-        many(openmpDeclarativeConstruct),
+    construct<SpecificationPart>(many(openmpDeclarativeConstruct),
         many(statement(indirect(Parser<UseStmt>{}))),
         many(unambiguousStatement(indirect(Parser<ImportStmt>{}))),
         implicitPart, many(declarationConstruct)))
@@ -76,10 +75,10 @@ TYPE_CONTEXT_PARSER("specification part"_en_US,
 // are in contexts that impose constraints on the kinds of statements that
 // are allowed, and so we have a variant production for declaration-construct
 // that implements those constraints.
-constexpr auto execPartLookAhead{first(actionStmt >> ok,
-    ompEndLoopDirective >> ok, openaccConstruct >> ok, openmpConstruct >> ok,
-    "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok,
-    "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)};
+constexpr auto execPartLookAhead{
+    first(actionStmt >> ok, ompEndLoopDirective >> ok, openmpConstruct >> ok,
+        "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok,
+        "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)};
 constexpr auto declErrorRecovery{
     stmtErrorRecoveryStart >> !execPartLookAhead >> skipStmtErrorRecovery};
 constexpr auto misplacedSpecificationStmt{Parser<UseStmt>{} >>
@@ -127,8 +126,7 @@ constexpr auto limitedDeclarationConstruct{recovery(
 // specialized error recovery in the event of a spurious executable
 // statement.
 constexpr auto limitedSpecificationPart{inContext("specification part"_en_US,
-    construct<SpecificationPart>(many(openaccDeclarativeConstruct),
-        many(openmpDeclarativeConstruct),
+    construct<SpecificationPart>(many(openmpDeclarativeConstruct),
         many(statement(indirect(Parser<UseStmt>{}))),
         many(unambiguousStatement(indirect(Parser<ImportStmt>{}))),
         implicitPart, many(limitedDeclarationConstruct)))};
@@ -153,8 +151,6 @@ TYPE_CONTEXT_PARSER("specification construct"_en_US,
         construct<SpecificationConstruct>(
             statement(indirect(typeDeclarationStmt))),
         construct<SpecificationConstruct>(indirect(Parser<StructureDef>{})),
-        construct<SpecificationConstruct>(
-            indirect(openaccDeclarativeConstruct)),
         construct<SpecificationConstruct>(indirect(openmpDeclarativeConstruct)),
         construct<SpecificationConstruct>(indirect(compilerDirective))))
 
diff --git a/flang/lib/Parser/stmt-parser.h b/flang/lib/Parser/stmt-parser.h
index cd1c69beedd4a..7dcc1f4620a9d 100644
--- a/flang/lib/Parser/stmt-parser.h
+++ b/flang/lib/Parser/stmt-parser.h
@@ -80,7 +80,6 @@ constexpr auto skipBadLine{SkipPast<'\n'>{} >> construct<ErrorRecovery>()};
 constexpr auto executionPartErrorRecovery{stmtErrorRecoveryStart >>
     !"END"_tok >> !"CONTAINS"_tok >> !"ELSE"_tok >> !"CASE"_tok >>
     !"TYPE IS"_tok >> !"CLASS"_tok >> !"RANK"_tok >>
-    !("!$ACC "_sptok >> "END"_tok) >>
     !("!$OMP "_sptok >> ("END"_tok || "SECTION"_id)) >> skipBadLine};
 
 // END statement error recovery
diff --git a/flang/lib/Parser/token-parsers.h b/flang/lib/Parser/token-parsers.h
index 14ae12cda6f42..fe43182e386f7 100644
--- a/flang/lib/Parser/token-parsers.h
+++ b/flang/lib/Parser/token-parsers.h
@@ -664,10 +664,5 @@ constexpr auto logicalFALSE{
 constexpr auto rawHollerithLiteral{
     deprecated<LanguageFeature::Hollerith>(HollerithLiteral{})};
 
-
-template <typename A> constexpr decltype(auto) verbatim(A x) {
-  return sourced(construct<Verbatim>(x));
-}
-
 } // namespace Fortran::parser
 #endif // FORTRAN_PARSER_TOKEN_PARSERS_H_
diff --git a/flang/lib/Parser/type-parsers.h b/flang/lib/Parser/type-parsers.h
index a2f38e90db212..c7a1bce781fff 100644
--- a/flang/lib/Parser/type-parsers.h
+++ b/flang/lib/Parser/type-parsers.h
@@ -130,8 +130,6 @@ constexpr Parser<EndSubroutineStmt> endSubroutineStmt; // R1537
 constexpr Parser<EntryStmt> entryStmt; // R1541
 constexpr Parser<ContainsStmt> containsStmt; // R1543
 constexpr Parser<CompilerDirective> compilerDirective;
-constexpr Parser<OpenACCConstruct> openaccConstruct;
-constexpr Parser<OpenACCDeclarativeConstruct> openaccDeclarativeConstruct;
 constexpr Parser<OpenMPConstruct> openmpConstruct;
 constexpr Parser<OpenMPDeclarativeConstruct> openmpDeclarativeConstruct;
 constexpr Parser<OmpEndLoopDirective> ompEndLoopDirective;
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 99792cbf706fa..09acaaa37076c 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -1777,375 +1777,6 @@ class UnparseVisitor {
     }
     Walk(std::get<Name>(x.t));
   }
-
-  // OpenACC Directives & Clauses
-  void Unparse(const AccAtomicCapture &x) {
-    BeginOpenACC();
-    Word("!$ACC CAPTURE");
-    Put("\n");
-    EndOpenACC();
-    Walk(std::get<AccAtomicCapture::Stmt1>(x.t));
-    Put("\n");
-    Walk(std::get<AccAtomicCapture::Stmt2>(x.t));
-    BeginOpenACC();
-    Word("!$ACC END ATOMIC\n");
-    EndOpenACC();
-  }
-  void Unparse(const AccAtomicRead &x) {
-    BeginOpenACC();
-    Word("!$ACC ATOMIC READ");
-    Put("\n");
-    EndOpenACC();
-    Walk(std::get<Statement<AssignmentStmt>>(x.t));
-    BeginOpenACC();
-    Walk(std::get<std::optional<AccEndAtomic>>(x.t), "!$ACC END ATOMIC\n");
-    EndOpenACC();
-  }
-  void Unparse(const AccAtomicWrite &x) {
-    BeginOpenACC();
-    Word("!$ACC ATOMIC WRITE");
-    Put("\n");
-    EndOpenACC();
-    Walk(std::get<Statement<AssignmentStmt>>(x.t));
-    BeginOpenACC();
-    Walk(std::get<std::optional<AccEndAtomic>>(x.t), "!$ACC END ATOMIC\n");
-    EndOpenACC();
-  }
-  void Unparse(const AccAtomicUpdate &x) {
-    BeginOpenACC();
-    Word("!$ACC ATOMIC UPDATE");
-    Put("\n");
-    EndOpenACC();
-    Walk(std::get<Statement<AssignmentStmt>>(x.t));
-    BeginOpenACC();
-    Walk(std::get<std::optional<AccEndAtomic>>(x.t), "!$ACC END ATOMIC\n");
-    EndOpenACC();
-  }
-  void Unparse(const llvm::acc::Directive &x) {
-    Word(llvm::acc::getOpenACCDirectiveName(x).str());
-  }
-  void Before(const AccClause::Auto &) { Word("AUTO"); }
-  void Before(const AccClause::Capture &) { Word("CAPTURE"); }
-  void Before(const AccClause::Finalize &) { Word("FINALIZE"); }
-  void Before(const AccClause::IfPresent &) { Word("IF_PRESENT"); }
-  void Before(const AccClause::Independent &) { Word("INDEPENDENT"); }
-  void Before(const AccClause::NoHost &) { Word("NOHOST"); }
-  void Before(const AccClause::Read &) { Word("READ"); }
-  void Before(const AccClause::Seq &) { Word("SEQ"); }
-  void Before(const AccClause::Write &) { Word("WRITE"); }
-  void Before(const AccClause::Unknown &) { Word("UNKNOWN"); }
-  void Unparse(const AccClause::Attach &x) {
-    Word("ATTACH");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Bind &x) {
-    Word("BIND");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Collapse &x) {
-    Word("COLLAPSE");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Copy &x) {
-    Word("COPY");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Copyin &x) {
-    Word("COPYIN");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Copyout &x) {
-    Word("COPYOUT");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Create &x) {
-    Word("CREATE");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Default &x) {
-    Word("DEFAULT");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Delete &x) {
-    Word("DELETE");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Detach &x) {
-    Word("DETACH");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Device &x) {
-    Word("DEVICE");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::DevicePtr &x) {
-    Word("DEVICEPTR");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::DeviceResident &x) {
-    Word("DEVICE_RESIDENT");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::FirstPrivate &x) {
-    Word("FIRSTPRIVATE");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Host &x) {
-    Word("HOST");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::If &x) {
-    Word("IF");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Link &x) {
-    Word("LINK");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::NumGangs &x) {
-    Word("NUM_GANGS");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::NumWorkers &x) {
-    Word("NUM_WORKERS");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Present &x) {
-    Word("PRESENT");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Private &x) {
-    Word("PRIVATE");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Reduction &x) {
-    Word("REDUCTION");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::VectorLength &x) {
-    Word("VECTOR_LENGTH");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Async &x) {
-    Word("ASYNC");
-    Walk("(", x.v, ")");
-  }
-  void Unparse(const AccClause::DefaultAsync &x) {
-    Word("DEFAULT_ASYNC");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::DeviceNum &x) {
-    Word("DEVICE_NUM");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Gang &x) {
-    Word("GANG");
-    Walk("(", x.v, ")");
-  }
-  void Unparse(const AccClause::NoCreate &x) {
-    Word("NO_CREATE");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::UseDevice &x) {
-    Word("USE_DEVICE");
-    Put("(");
-    Walk(x.v);
-    Put(")");
-  }
-  void Unparse(const AccClause::Self &x) {
-    Word("SELF");
-    Walk("(", x.v, ")");
-  }
-  void Unparse(const AccClause::Vector &x) {
-    Word("VECTOR");
-    Walk("(", x.v, ")");
-  }
-  void Unparse(const AccClause::Wait &x) {
-    Word("WAIT");
-    Walk("(", x.v, ")");
-  }
-  void Unparse(const AccClause::Worker &x) {
-    Word("WORKER");
-    Walk("(", x.v, ")");
-  }
-  void Unparse(const AccClause::DeviceType &x) {
-    Word("DEVICE_TYPE");
-    Put("(");
-    if (x.v.has_value())
-      Walk(x.v);
-    else
-      Put("*");
-    Put(")");
-  }
-  void Unparse(const AccObjectListWithModifier &x) {
-    Walk(std::get<std::optional<AccDataModifier>>(x.t), ":");
-    Walk(std::get<AccObjectList>(x.t));
-  }
-  void Unparse(const AccDataModifier::Modifier &x) {
-    Word(AccDataModifier::EnumToString(x));
-  }
-  void Unparse(const AccDefaultClause &x) {
-    switch (x.v) {
-    case AccDefaultClause::Arg::None:
-      Put("NONE");
-      break;
-    case AccDefaultClause::Arg::Present:
-      Put("PRESENT");
-      break;
-    }
-  }
-  void Unparse(const AccClauseList &x) { Walk(" ", x.v, " "); }
-  void Unparse(const AccGangArgument &x) {
-    Walk("NUM:", std::get<std::optional<ScalarIntExpr>>(x.t));
-    Walk(", STATIC:", std::get<std::optional<AccSizeExpr>>(x.t));
-  }
-  void Unparse(const OpenACCBlockConstruct &x) {
-    BeginOpenACC();
-    Word("!$ACC ");
-    Walk(std::get<AccBeginBlockDirective>(x.t));
-    Put("\n");
-    EndOpenACC();
-    Walk(std::get<Block>(x.t), "");
-    BeginOpenACC();
-    Word("!$ACC END ");
-    Walk(std::get<AccEndBlockDirective>(x.t));
-    Put("\n");
-    EndOpenACC();
-  }
-  void Unparse(const OpenACCLoopConstruct &x) {
-    BeginOpenACC();
-    Word("!$ACC ");
-    Walk(std::get<AccBeginLoopDirective>(x.t));
-    Put("\n");
-    EndOpenACC();
-    Walk(std::get<std::optional<DoConstruct>>(x.t));
-  }
-  void Unparse(const AccBeginLoopDirective &x) {
-    Walk(std::get<AccLoopDirective>(x.t));
-    Walk(std::get<AccClauseList>(x.t));
-  }
-  void Unparse(const OpenACCStandaloneConstruct &x) {
-    BeginOpenACC();
-    Word("!$ACC ");
-    Walk(std::get<AccStandaloneDirective>(x.t));
-    Walk(std::get<AccClauseList>(x.t));
-    Put("\n");
-    EndOpenACC();
-  }
-  void Unparse(const OpenACCStandaloneDeclarativeConstruct &x) {
-    BeginOpenACC();
-    Word("!$ACC ");
-    Walk(std::get<AccDeclarativeDirective>(x.t));
-    Walk(std::get<AccClauseList>(x.t));
-    Put("\n");
-    EndOpenACC();
-  }
-  void Unparse(const OpenACCCombinedConstruct &x) {
-    BeginOpenACC();
-    Word("!$ACC ");
-    Walk(std::get<AccBeginCombinedDirective>(x.t));
-    Put("\n");
-    EndOpenACC();
-    Walk(std::get<Block>(x.t), "");
-    BeginOpenACC();
-    Word("!$ACC END ");
-    Walk(std::get<std::optional<AccEndCombinedDirective>>(x.t));
-    Put("\n");
-    EndOpenACC();
-  }
-  void Unparse(const OpenACCRoutineConstruct &x) {
-    BeginOpenACC();
-    Word("!$ACC ROUTINE");
-    Walk("(", std::get<std::optional<Name>>(x.t), ")");
-    Walk(std::get<AccClauseList>(x.t));
-    Put("\n");
-    EndOpenACC();
-  }
-  void Unparse(const AccObject &x) {
-    std::visit(common::visitors{
-                   [&](const Designator &y) { Walk(y); },
-                   [&](const Name &y) { Put("/"), Walk(y), Put("/"); },
-               },
-        x.u);
-  }
-  void Unparse(const AccObjectList &x) { Walk(x.v, ","); }
-  void Unparse(const AccObjectListWithReduction &x) {
-    Walk(std::get<AccReductionOperator>(x.t));
-    Put(":");
-    Walk(std::get<AccObjectList>(x.t));
-  }
-  void Unparse(const OpenACCCacheConstruct &x) {
-    BeginOpenACC();
-    Word("!$ACC ");
-    Word("CACHE(");
-    Walk(std::get<AccObjectListWithModifier>(x.t));
-    Put(")");
-    Put("\n");
-    EndOpenACC();
-  }
-  void Unparse(const OpenACCWaitConstruct &x) {
-    BeginOpenACC();
-    Word("!$ACC ");
-    Word("WAIT(");
-    Walk(std::get<std::optional<AccWaitArgument>>(x.t));
-    Walk(std::get<AccClauseList>(x.t));
-    Put(")");
-    Put("\n");
-    EndOpenACC();
-  }
-
   // OpenMP Clauses & Directives
   void Unparse(const OmpObject &x) {
     std::visit(common::visitors{
@@ -2891,8 +2522,6 @@ class UnparseVisitor {
   }
   void BeginOpenMP() { openmpDirective_ = true; }
   void EndOpenMP() { openmpDirective_ = false; }
-  void BeginOpenACC() { openaccDirective_ = true; }
-  void EndOpenACC() { openaccDirective_ = false; }
 
   // Call back to the traversal framework.
   template <typename T> void Walk(const T &x) {
@@ -2962,7 +2591,6 @@ class UnparseVisitor {
   std::set<CharBlock> structureComponents_;
   Encoding encoding_{Encoding::UTF_8};
   bool capitalizeKeywords_{true};
-  bool openaccDirective_{false};
   bool openmpDirective_{false};
   bool backslashEscapes_{false};
   preStatementType *preStatement_{nullptr};
@@ -2971,7 +2599,7 @@ class UnparseVisitor {
 
 void UnparseVisitor::Put(char ch) {
   int sav = indent_;
-  if (openmpDirective_ || openaccDirective_) {
+  if (openmpDirective_) {
     indent_ = 0;
   }
   if (column_ <= 1) {
@@ -2992,16 +2620,13 @@ void UnparseVisitor::Put(char ch) {
     if (openmpDirective_) {
       out_ << "!$OMP&";
       column_ = 8;
-    } else if (openaccDirective_) {
-      out_ << "!$ACC&";
-      column_ = 8;
     } else {
       out_ << '&';
       column_ = indent_ + 3;
     }
   }
   out_ << ch;
-  if (openmpDirective_ || openaccDirective_) {
+  if (openmpDirective_) {
     indent_ = sav;
   }
 }
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index bd566408cd2ce..4d70f03dd5532 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -6010,8 +6010,7 @@ bool ResolveNamesVisitor::Pre(const parser::SpecificationPart &x) {
   Walk(std::get<1>(x.t));
   Walk(std::get<2>(x.t));
   Walk(std::get<3>(x.t));
-  Walk(std::get<4>(x.t));
-  const std::list<parser::DeclarationConstruct> &decls{std::get<5>(x.t)};
+  const std::list<parser::DeclarationConstruct> &decls{std::get<4>(x.t)};
   for (const auto &decl : decls) {
     if (const auto *spec{
             std::get_if<parser::SpecificationConstruct>(&decl.u)}) {
diff --git a/flang/test/Semantics/acc-validity.f90 b/flang/test/Semantics/acc-validity.f90
deleted file mode 100644
index 88f62a84d1610..0000000000000
--- a/flang/test/Semantics/acc-validity.f90
+++ /dev/null
@@ -1,169 +0,0 @@
-! RUN: %S/test_errors.sh %s %t %f18 -fopenacc
-
-! Check OpenACC clause validity for the following construct and directive:
-!   2.6.5 Data
-!   2.5.1 Parallel
-!   2.5.2 Kernels
-!   2.5.3 Serial
-!   2.15.1 Routine
-!   2.11 Parallel Loop
-!   2.11 Kernels Loop
-!   2.11 Serial Loop
-
-program openacc_clause_validity
-
-  implicit none
-
-  integer :: i, j
-  integer :: N = 256
-
-  !$acc declare
-  real(8) :: a(256)
-
-  !$acc enter data
-
-  !$acc enter data copyin(zero: i)
-
-  !$acc enter data create(readonly: i)
-
-  !$acc data copyout(readonly: i)
-  !$acc end data
-
-  !$acc enter data copyin(i) copyout(i)
-
-  !$acc data copy(i) if(.true.) if(.true.)
-  !$acc end data
-
-  !$acc exit data
-
-  !$acc host_data
-  !$acc end host_data
-
-  !$acc set
-
-  !$acc data
-  !$acc end data
-
-  !$acc data copyin(i)
-  !$acc end data
-
-  !$acc data copyin(i)
-
-  !$acc end parallel
-
-  !$acc update device(i) device_type(*) async
-
-
-  !$acc update device(i) device_type(*) if(.TRUE.)
-
-  !$acc parallel
-
-  !$acc loop seq independent
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end parallel
-
-  !$acc parallel device_type(*) num_gangs(2)
-  !$acc loop
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end parallel
-
-  !$acc parallel
-
-  !$acc loop collapse(-1)
-  do i = 1, N
-    do j = 1, N
-      a(i) = 3.14 + j
-    end do
-  end do
-  !$acc end parallel
-
-  !$acc parallel
-
-  !$acc loop device_type(*) private(i)
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end parallel
-
-  !$acc parallel
-
-  !$acc loop gang seq
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end parallel
-
-
-  !$acc parallel device_type(*) if(.TRUE.)
-  !$acc loop
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end parallel
-
-
-  !$acc parallel loop device_type(*) if(.TRUE.)
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end parallel loop
-
-  !$acc kernels device_type(*) async
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end kernels
-
-
-  !$acc kernels device_type(*) if(.TRUE.)
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end kernels
-
-
-  !$acc kernels loop device_type(*) if(.TRUE.)
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end kernels loop
-
-  !$acc serial device_type(*) async
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end serial
-
-
-  !$acc serial device_type(*) if(.TRUE.)
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end serial
-
-
-  !$acc serial loop device_type(*) if(.TRUE.)
-  do i = 1, N
-    a(i) = 3.14
-  end do
-  !$acc end serial loop
-
- contains
-
-   subroutine sub1(a)
-     real :: a(:)
-
-     !$acc routine
-   end subroutine sub1
-
-   subroutine sub2(a)
-     real :: a(:)
-
-     !$acc routine seq device_type(*) nohost
-   end subroutine sub2
-
-end program openacc_clause_validity
\ No newline at end of file
diff --git a/flang/tools/f18-parse-demo/CMakeLists.txt b/flang/tools/f18-parse-demo/CMakeLists.txt
index a89e8ae8816cd..465873ca00ff6 100644
--- a/flang/tools/f18-parse-demo/CMakeLists.txt
+++ b/flang/tools/f18-parse-demo/CMakeLists.txt
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS
-  FrontendOpenACC
   FrontendOpenMP
   )
 
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 46c38fa43a2e5..8738561fe45e7 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -1,5 +1,4 @@
 set(LLVM_LINK_COMPONENTS
-  FrontendOpenACC
   FrontendOpenMP
   Support
   )
@@ -60,7 +59,7 @@ install(TARGETS f18 DESTINATION bin)
 
 set(FLANG_INTRINSIC_MODULES_DIR ${FLANG_BINARY_DIR}/include/flang)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${CMAKE_BINARY_DIR}/tools/flang/bin/flang @ONLY)
-file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE)
+file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE) 
 # The flang script to be installed needs a different path to the headers.
 set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_INSTALL_PREFIX}/include/flang)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${FLANG_BINARY_DIR}/bin/flang-install.sh @ONLY)
diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp
index 5f6070a0fe34b..05766a9c6a6dc 100644
--- a/flang/tools/f18/f18.cpp
+++ b/flang/tools/f18/f18.cpp
@@ -468,9 +468,6 @@ int main(int argc, char *const argv[]) {
     } else if (arg == "-Mstandard" || arg == "-std=f95" ||
         arg == "-std=f2003" || arg == "-std=f2008" || arg == "-std=legacy") {
       driver.warnOnNonstandardUsage = true;
-    } else if (arg == "-fopenacc") {
-      options.features.Enable(Fortran::common::LanguageFeature::OpenACC);
-      options.predefinitions.emplace_back("_OPENACC", "201911");
     } else if (arg == "-fopenmp") {
       options.features.Enable(Fortran::common::LanguageFeature::OpenMP);
       options.predefinitions.emplace_back("_OPENMP", "201511");
diff --git a/llvm/include/llvm/CMakeLists.txt b/llvm/include/llvm/CMakeLists.txt
index b46319f24fc8e..7cf8699aa21e4 100644
--- a/llvm/include/llvm/CMakeLists.txt
+++ b/llvm/include/llvm/CMakeLists.txt
@@ -1,6 +1,6 @@
 add_subdirectory(IR)
 add_subdirectory(Support)
-add_subdirectory(Frontend)
+add_subdirectory(Frontend/OpenMP)
 
 # If we're doing an out-of-tree build, copy a module map for generated
 # header files into the build area.
diff --git a/llvm/include/llvm/Frontend/CMakeLists.txt b/llvm/include/llvm/Frontend/CMakeLists.txt
deleted file mode 100644
index ea66917b8936a..0000000000000
--- a/llvm/include/llvm/Frontend/CMakeLists.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-add_subdirectory(OpenACC)
-add_subdirectory(OpenMP)
diff --git a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
index 26049ca60db39..3c295a1d7c5f3 100644
--- a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
+++ b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
@@ -59,9 +59,6 @@ class Clause<string c> {
   // Optional class holding value of the clause in clang AST.
   string clangClass = ?;
 
-  // Optional class holding value of the clause in flang AST.
-  string flangClass = ?;
-
   // Is clause implicit? If clause is set as implicit, the default kind will
   // be return in get<LanguageName>ClauseKind instead of their own kind.
   bit isImplicit = 0;
diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td
deleted file mode 100644
index 0bc0f2481db5f..0000000000000
--- a/llvm/include/llvm/Frontend/OpenACC/ACC.td
+++ /dev/null
@@ -1,604 +0,0 @@
-//===-- ACC.td - OpenACC directive definition file ---------*- tablegen -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This is the definition file for OpenACC directives and clauses.
-//
-//===----------------------------------------------------------------------===//
-
-include "llvm/Frontend/Directive/DirectiveBase.td"
-
-//===----------------------------------------------------------------------===//
-// Definition of general OpenACC information
-//===----------------------------------------------------------------------===//
-
-def OpenACC : DirectiveLanguage {
-  let name = "OpenACC";
-  let cppNamespace = "acc"; // final namespace will be llvm::acc
-  let directivePrefix = "ACCD_";
-  let clausePrefix = "ACCC_";
-  let makeEnumAvailableInNamespace = 1;
-  let enableBitmaskEnumInNamespace = 1;
-  let includeHeader = "llvm/Frontend/OpenACC/ACC.h.inc";
-  let clauseEnumSetClass = "AccClauseSet";
-}
-
-//===----------------------------------------------------------------------===//
-// Definition of OpenACC clauses
-//===----------------------------------------------------------------------===//
-
-// 2.9.6
-def ACCC_Auto : Clause<"auto"> {}
-
-// 2.16.1
-def ACCC_Async : Clause<"async"> {
-  let flangClass = "std::optional<ScalarIntExpr>";
-}
-
-// 2.7.11
-def ACCC_Attach : Clause<"attach"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.15.1
-def ACCC_Bind : Clause<"bind"> {
-  let flangClass = "Name";
-}
-
-// 2.12
-def ACCC_Capture : Clause<"capture"> {
-}
-
-// 2.9.1
-def ACCC_Collapse : Clause<"collapse"> {
-  let flangClass = "ScalarIntConstantExpr";
-}
-
-// 2.7.5
-def ACCC_Copy : Clause<"copy"> {
-  let flangClass = "AccObjectList";
-}
-// 2.7.6
-def ACCC_Copyin : Clause<"copyin"> {
-  let flangClass = "AccObjectListWithModifier";
-}
-
-// 2.7.7
-def ACCC_Copyout : Clause<"copyout"> {
-  let flangClass = "AccObjectListWithModifier";
-}
-
-// 2.7.8
-def ACCC_Create : Clause<"create"> {
-  let flangClass = "AccObjectListWithModifier";
-}
-
-// 2.5.14
-def ACCC_Default : Clause<"default"> {
-  let flangClass = "AccDefaultClause";
-}
-
-// 2.4.12
-def ACCC_DefaultAsync : Clause<"default_async"> {
-  let flangClass = "ScalarIntExpr";
-}
-
-// 2.7.10
-def ACCC_Delete : Clause<"delete"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.7.12
-def ACCC_Detach : Clause<"detach"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.14.4
-def ACCC_Device : Clause<"device"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.14.1
-def ACCC_DeviceNum : Clause<"devicenum">  {
-  let flangClass = "ScalarIntConstantExpr";
-}
-
-// 2.7.3
-def ACCC_DevicePtr : Clause<"deviceptr"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.13
-def ACCC_DeviceResident : Clause<"device_resident"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.4
-def ACCC_DeviceType : Clause<"device_type"> {
-  // (DeviceType, "*"
-  let flangClass = "std::optional<std::list<Name>>";
-}
-
-// 2.6.6
-def ACCC_Finalize : Clause<"finalize"> {}
-
-// 2.5.12
-def ACCC_FirstPrivate : Clause<"firstprivate"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.9.2
-def ACCC_Gang : Clause<"gang"> {
-  let flangClass = "std::optional<AccGangArgument>";
-}
-
-// 2.14.4
-def ACCC_Host : Clause<"host"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.5.4
-def ACCC_If : Clause <"if"> {
-  let flangClass = "ScalarLogicalExpr";
-}
-
-// 2.14.4
-def ACCC_IfPresent : Clause<"if_present"> {}
-
-// 2.9.9
-def ACCC_Independent : Clause<"independent"> {}
-
-// 2.13
-def ACCC_Link : Clause<"link"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.7.9
-def ACCC_NoCreate : Clause<"no_create"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.15.1
-def ACCC_NoHost : Clause<"nohost"> {}
-
-// 2.5.8
-def ACCC_NumGangs : Clause<"num_gangs"> {
-  let flangClass = "ScalarIntExpr";
-}
-
-// 2.5.9
-def ACCC_NumWorkers : Clause<"num_workers"> {
-  let flangClass = "ScalarIntExpr";
-}
-
-// 2.7.4
-def ACCC_Present : Clause<"present"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.5.11
-def ACCC_Private : Clause<"private"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.9.7
-def ACCC_Tile : Clause <"tile"> {
-  let flangClass = "AccSizeExprList";
-}
-
-// 2.8.1
-def ACCC_UseDevice : Clause <"use_device"> {
-  let flangClass = "AccObjectList";
-}
-
-// 2.12
-def ACCC_Read : Clause<"read"> {}
-
-// 2.5.13
-def ACCC_Reduction : Clause<"reduction"> {
-  let flangClass = "AccObjectListWithReduction";
-}
-
-// 2.5.5
-def ACCC_Self : Clause<"self"> {
-  let flangClass = "std::optional<ScalarLogicalExpr>";
-}
-
-// 2.9.5
-def ACCC_Seq : Clause<"seq"> {}
-
-// 2.9.4
-def ACCC_Vector : Clause<"vector"> {
-  let flangClass = "std::optional<ScalarIntExpr>";
-}
-
-// 2.5.10
-def ACCC_VectorLength : Clause<"vector_length"> {
-  let flangClass = "ScalarIntExpr";
-}
-
-// 2.16.2
-def ACCC_Wait : Clause<"wait"> {
-  let flangClass = "std::optional<AccWaitArgument>";
-}
-
-// 2.9.3
-def ACCC_Worker: Clause<"worker"> {
-  let flangClass = "std::optional<ScalarIntExpr>";
-}
-
-// 2.12
-def ACCC_Write : Clause<"write"> {}
-
-def ACCC_Unknown : Clause<"unknown"> {
-  let isDefault = 1;
-}
-
-//===----------------------------------------------------------------------===//
-// Definition of OpenACC directives
-//===----------------------------------------------------------------------===//
-
-// 2.12
-def ACC_Atomic : Directive<"atomic"> {}
-
-// 2.6.5
-def ACC_Data : Directive<"data"> {
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_If>
-  ];
-  let requiredClauses = [
-    VersionedClause<ACCC_Attach>,
-    VersionedClause<ACCC_Copy>,
-    VersionedClause<ACCC_Copyin>,
-    VersionedClause<ACCC_Copyout>,
-    VersionedClause<ACCC_Create>,
-    VersionedClause<ACCC_Default>,
-    VersionedClause<ACCC_DevicePtr>,
-    VersionedClause<ACCC_NoCreate>,
-    VersionedClause<ACCC_Present>
-  ];
-}
-
-// 2.13
-def ACC_Declare : Directive<"declare"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_Copy>,
-    VersionedClause<ACCC_Copyin>,
-    VersionedClause<ACCC_Copyout>,
-    VersionedClause<ACCC_Create>,
-    VersionedClause<ACCC_Present>,
-    VersionedClause<ACCC_DevicePtr>,
-    VersionedClause<ACCC_DeviceResident>,
-    VersionedClause<ACCC_Link>
-  ];
-}
-
-// 2.5.2
-def ACC_Kernels : Directive<"kernels"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_Attach>,
-    VersionedClause<ACCC_Copy>,
-    VersionedClause<ACCC_Copyin>,
-    VersionedClause<ACCC_Copyout>,
-    VersionedClause<ACCC_Create>,
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_NoCreate>,
-    VersionedClause<ACCC_Present>,
-    VersionedClause<ACCC_DevicePtr>
-  ];
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Async>,
-    VersionedClause<ACCC_Default>,
-    VersionedClause<ACCC_If>,
-    VersionedClause<ACCC_NumGangs>,
-    VersionedClause<ACCC_NumWorkers>,
-    VersionedClause<ACCC_Self>,
-    VersionedClause<ACCC_VectorLength>,
-    VersionedClause<ACCC_Wait>
-  ];
-}
-
-// 2.5.1
-def ACC_Parallel : Directive<"parallel"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_Attach>,
-    VersionedClause<ACCC_Copy>,
-    VersionedClause<ACCC_Copyin>,
-    VersionedClause<ACCC_Copyout>,
-    VersionedClause<ACCC_Create>,
-    VersionedClause<ACCC_DevicePtr>,
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_NoCreate>,
-    VersionedClause<ACCC_Present>,
-    VersionedClause<ACCC_Private>,
-    VersionedClause<ACCC_FirstPrivate>,
-    VersionedClause<ACCC_Wait>
-  ];
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Async>,
-    VersionedClause<ACCC_Default>,
-    VersionedClause<ACCC_If>,
-    VersionedClause<ACCC_NumGangs>,
-    VersionedClause<ACCC_NumWorkers>,
-    VersionedClause<ACCC_Reduction>,
-    VersionedClause<ACCC_Self>,
-    VersionedClause<ACCC_VectorLength>
-  ];
-}
-
-// 2.5.3
-def ACC_Serial : Directive<"serial"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_Attach>,
-    VersionedClause<ACCC_Copy>,
-    VersionedClause<ACCC_Copyin>,
-    VersionedClause<ACCC_Copyout>,
-    VersionedClause<ACCC_Create>,
-    VersionedClause<ACCC_DevicePtr>,
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_FirstPrivate>,
-    VersionedClause<ACCC_NoCreate>,
-    VersionedClause<ACCC_Present>,
-    VersionedClause<ACCC_Private>,
-    VersionedClause<ACCC_Wait>
-  ];
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Async>,
-    VersionedClause<ACCC_Default>,
-    VersionedClause<ACCC_If>,
-    VersionedClause<ACCC_Reduction>,
-    VersionedClause<ACCC_Self>
-  ];
-}
-
-// 2.9
-def ACC_Loop : Directive<"loop"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_Private>
-  ];
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Collapse>,
-    VersionedClause<ACCC_Gang>,
-    VersionedClause<ACCC_Reduction>,
-    VersionedClause<ACCC_Tile>,
-    VersionedClause<ACCC_Vector>,
-    VersionedClause<ACCC_Worker>
-  ];
-  let allowedExclusiveClauses = [
-    VersionedClause<ACCC_Auto>,
-    VersionedClause<ACCC_Independent>,
-    VersionedClause<ACCC_Seq>
-  ];
-}
-
-// 2.10
-def ACC_Cache : Directive<"cache"> {}
-
-// 2.14.1
-def ACC_Init : Directive<"init"> {
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_DeviceNum>,
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_If>
-  ];
-}
-
-// 2.15.1
-def ACC_Routine : Directive<"routine"> {
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Bind>,
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_NoHost>
-  ];
-  let requiredClauses = [
-    VersionedClause<ACCC_Gang>,
-    VersionedClause<ACCC_Seq>,
-    VersionedClause<ACCC_Vector>,
-    VersionedClause<ACCC_Worker>
-  ];
-}
-
-// 2.14.3
-def ACC_Set : Directive<"set"> {
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_If>
-  ];
-  let requiredClauses = [
-    VersionedClause<ACCC_DefaultAsync>,
-    VersionedClause<ACCC_DeviceNum>,
-    VersionedClause<ACCC_DeviceType>
-  ];
-}
-
-// 2.14.2
-def ACC_Shutdown : Directive<"shutdown"> {
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_DeviceNum>,
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_If>
-  ];
-}
-
-// 2.14.4
-def ACC_Update : Directive<"update"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_Wait>
-  ];
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Async>,
-    VersionedClause<ACCC_If>,
-    VersionedClause<ACCC_IfPresent>
-  ];
-  let requiredClauses = [
-    VersionedClause<ACCC_Device>,
-    VersionedClause<ACCC_Host>,
-    VersionedClause<ACCC_Self>
-  ];
-}
-
-// 2.16.3
-def ACC_Wait : Directive<"wait"> {
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Async>,
-    VersionedClause<ACCC_If>
-  ];
-}
-
-// 2.14.6
-def ACC_EnterData : Directive<"enter data"> {
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Async>,
-    VersionedClause<ACCC_If>,
-    VersionedClause<ACCC_Wait>
-  ];
-  let requiredClauses = [
-    VersionedClause<ACCC_Attach>,
-    VersionedClause<ACCC_Create>,
-    VersionedClause<ACCC_Copyin>
-  ];
-}
-
-// 2.14.7
-def ACC_ExitData : Directive<"exit data"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_Async>,
-    VersionedClause<ACCC_If>,
-    VersionedClause<ACCC_Wait>,
-    VersionedClause<ACCC_Finalize>
-  ];
-  let requiredClauses = [
-    VersionedClause<ACCC_Copyout>,
-    VersionedClause<ACCC_Delete>,
-    VersionedClause<ACCC_Detach>
-  ];
-}
-def ACC_HostData : Directive<"host_data"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_If>,
-    VersionedClause<ACCC_IfPresent>
-  ];
-  let requiredClauses = [
-    VersionedClause<ACCC_UseDevice>
-  ];
-}
-
-// 2.11
-def ACC_KernelsLoop : Directive<"kernels loop"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_Copy>,
-    VersionedClause<ACCC_Copyin>,
-    VersionedClause<ACCC_Copyout>,
-    VersionedClause<ACCC_Create>,
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_NoCreate>,
-    VersionedClause<ACCC_Present>,
-    VersionedClause<ACCC_Private>,
-    VersionedClause<ACCC_DevicePtr>,
-    VersionedClause<ACCC_Attach>
-  ];
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Async>,
-    VersionedClause<ACCC_Collapse>,
-    VersionedClause<ACCC_Default>,
-    VersionedClause<ACCC_Gang>,
-    VersionedClause<ACCC_If>,
-    VersionedClause<ACCC_Independent>,
-    VersionedClause<ACCC_NumGangs>,
-    VersionedClause<ACCC_NumWorkers>,
-    VersionedClause<ACCC_Reduction>,
-    VersionedClause<ACCC_Self>,
-    VersionedClause<ACCC_Tile>,
-    VersionedClause<ACCC_Vector>,
-    VersionedClause<ACCC_VectorLength>,
-    VersionedClause<ACCC_Wait>,
-    VersionedClause<ACCC_Worker>
-  ];
-  let allowedExclusiveClauses = [
-    VersionedClause<ACCC_Auto>,
-    VersionedClause<ACCC_Independent>,
-    VersionedClause<ACCC_Seq>
-  ];
-}
-
-// 2.11
-def ACC_ParallelLoop : Directive<"parallel loop"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_Attach>,
-    VersionedClause<ACCC_Copy>,
-    VersionedClause<ACCC_Copyin>,
-    VersionedClause<ACCC_Copyout>,
-    VersionedClause<ACCC_Create>,
-    VersionedClause<ACCC_DevicePtr>,
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_FirstPrivate>,
-    VersionedClause<ACCC_NoCreate>,
-    VersionedClause<ACCC_Present>,
-    VersionedClause<ACCC_Private>,
-    VersionedClause<ACCC_Tile>,
-    VersionedClause<ACCC_Wait>
-  ];
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Async>,
-    VersionedClause<ACCC_Collapse>,
-    VersionedClause<ACCC_Default>,
-    VersionedClause<ACCC_Gang>,
-    VersionedClause<ACCC_If>,
-    VersionedClause<ACCC_NumGangs>,
-    VersionedClause<ACCC_NumWorkers>,
-    VersionedClause<ACCC_Reduction>,
-    VersionedClause<ACCC_Self>,
-    VersionedClause<ACCC_Vector>,
-    VersionedClause<ACCC_VectorLength>,
-    VersionedClause<ACCC_Worker>
-  ];
-  let allowedExclusiveClauses = [
-    VersionedClause<ACCC_Auto>,
-    VersionedClause<ACCC_Independent>,
-    VersionedClause<ACCC_Seq>
-  ];
-}
-
-// 2.11
-def ACC_SerialLoop : Directive<"serial loop"> {
-  let allowedClauses = [
-    VersionedClause<ACCC_Attach>,
-    VersionedClause<ACCC_Copy>,
-    VersionedClause<ACCC_Copyin>,
-    VersionedClause<ACCC_Copyout>,
-    VersionedClause<ACCC_Create>,
-    VersionedClause<ACCC_DevicePtr>,
-    VersionedClause<ACCC_DeviceType>,
-    VersionedClause<ACCC_FirstPrivate>,
-    VersionedClause<ACCC_NoCreate>,
-    VersionedClause<ACCC_Present>,
-    VersionedClause<ACCC_Private>,
-    VersionedClause<ACCC_Wait>
-  ];
-  let allowedOnceClauses = [
-    VersionedClause<ACCC_Async>,
-    VersionedClause<ACCC_Collapse>,
-    VersionedClause<ACCC_Default>,
-    VersionedClause<ACCC_Gang>,
-    VersionedClause<ACCC_If>,
-    VersionedClause<ACCC_Reduction>,
-    VersionedClause<ACCC_Self>,
-    VersionedClause<ACCC_Tile>,
-    VersionedClause<ACCC_Vector>,
-    VersionedClause<ACCC_Worker>
-  ];
-  let allowedExclusiveClauses = [
-    VersionedClause<ACCC_Auto>,
-    VersionedClause<ACCC_Independent>,
-    VersionedClause<ACCC_Seq>
-  ];
-}
-
-def ACC_Unknown : Directive<"unknown"> {
-  let isDefault = 1;
-}
\ No newline at end of file
diff --git a/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt b/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
deleted file mode 100644
index 82cc7cfaccc9c..0000000000000
--- a/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS ACC.td)
-tablegen(LLVM ACC.h.inc --gen-directive-decl)
-tablegen(LLVM ACC.cpp.inc --gen-directive-impl)
-add_public_tablegen_target(acc_gen)
diff --git a/llvm/lib/Frontend/CMakeLists.txt b/llvm/lib/Frontend/CMakeLists.txt
index ea66917b8936a..9730c8414edff 100644
--- a/llvm/lib/Frontend/CMakeLists.txt
+++ b/llvm/lib/Frontend/CMakeLists.txt
@@ -1,2 +1 @@
-add_subdirectory(OpenACC)
 add_subdirectory(OpenMP)
diff --git a/llvm/lib/Frontend/OpenACC/CMakeLists.txt b/llvm/lib/Frontend/OpenACC/CMakeLists.txt
deleted file mode 100644
index 4c23f32ecd3bc..0000000000000
--- a/llvm/lib/Frontend/OpenACC/CMakeLists.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC/ACC.td)
-tablegen(LLVM ACC.cpp --gen-directive-impl)
-add_public_tablegen_target(acc_cpp)
-
-add_llvm_component_library(LLVMFrontendOpenACC
-  ACC.cpp # Generated by tablegen above
-
-  ADDITIONAL_HEADER_DIRS
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend
-  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC
-
-  LINK_COMPONENTS
-  Support
-
-  DEPENDS
-  acc_gen
-  acc_cpp
-)

From d4e7d126b0c1f1233fb8f9a922887d0c035cc570 Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail@cn.ibm.com>
Date: Mon, 13 Jul 2020 04:55:28 +0000
Subject: [PATCH 196/771] [PowerPC] Generate CFI directives when probing in
 prologue

Add missing CFI directives when probing in prologue if
`stack-clash-protection` is enabled.

Differential Revision: https://reviews.llvm.org/D83276
---
 llvm/lib/Target/PowerPC/PPCFrameLowering.cpp  | 33 +++++++++++++++
 .../CodeGen/PowerPC/stack-clash-prologue.ll   | 42 +++++++++++++++++--
 2 files changed, 72 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
index 93ea3154e47f2..bd9174c1973dc 100644
--- a/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -1371,6 +1371,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
   const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
   MachineFrameInfo &MFI = MF.getFrameInfo();
+  MachineModuleInfo &MMI = MF.getMMI();
+  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+  // AIX assembler does not support cfi directives.
+  const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
   auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) {
     int Opc = MI.getOpcode();
     return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32;
@@ -1394,6 +1398,24 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
   // Initialize current frame pointer.
   const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
   BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
+  // Subroutines to generate .cfi_* directives.
+  auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI, Register Reg) {
+    unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+    unsigned CFIIndex = MF.addFrameInst(
+        MCCFIInstruction::createDefCfaRegister(nullptr, RegNum));
+    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+  };
+  auto buildDefCFA = [&](MachineBasicBlock &MBB,
+                         MachineBasicBlock::iterator MBBI, Register Reg,
+                         int Offset) {
+    unsigned RegNum = MRI->getDwarfRegNum(Reg, true);
+    unsigned CFIIndex = MBB.getParent()->addFrameInst(
+        MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset));
+    BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION))
+        .addCFIIndex(CFIIndex);
+  };
   // Subroutine to determine if we can use the Imm as part of d-form.
   auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; };
   // Subroutine to materialize the Imm into TempReg.
@@ -1427,6 +1449,9 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
           .addReg(SPReg)
           .addReg(NegSizeReg);
   };
+  // Use FPReg to calculate CFA.
+  if (needsCFI)
+    buildDefCFA(PrologMBB, {MI}, FPReg, 0);
   // For case HasBP && MaxAlign > 1, we have to align the SP by performing
   // SP = SP - SP % MaxAlign.
   if (HasBP && MaxAlign > 1) {
@@ -1462,6 +1487,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
       MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg);
     for (int i = 0; i < NumBlocks; ++i)
       allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm);
+    if (needsCFI) {
+      // Restore using SPReg to calculate CFA.
+      buildDefCFAReg(PrologMBB, {MI}, SPReg);
+    }
   } else {
     // Since CTR is a volatile register and current shrinkwrap implementation
     // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
@@ -1492,6 +1521,10 @@ void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
                     PrologMBB.end());
     ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB);
     PrologMBB.addSuccessor(LoopMBB);
+    if (needsCFI) {
+      // Restore using SPReg to calculate CFA.
+      buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+    }
     // Update liveins.
     recomputeLiveIns(*LoopMBB);
     recomputeLiveIns(*ExitMBB);
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
index eb8e05eef519f..cb513be9128cc 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-prologue.ll
@@ -45,6 +45,7 @@ define i8 @f1() #0 "stack-probe-size"="0" {
 ; CHECK-LE-LABEL: f1:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-LE-NEXT:    li r0, 259
 ; CHECK-LE-NEXT:    mtctr r0
 ; CHECK-LE-NEXT:  .LBB1_1: # %entry
@@ -52,6 +53,7 @@ define i8 @f1() #0 "stack-probe-size"="0" {
 ; CHECK-LE-NEXT:    stdu r12, -16(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB1_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-LE-NEXT:    .cfi_def_cfa_offset 4144
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
@@ -62,6 +64,7 @@ define i8 @f1() #0 "stack-probe-size"="0" {
 ; CHECK-BE-LABEL: f1:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-BE-NEXT:    li r0, 260
 ; CHECK-BE-NEXT:    mtctr r0
 ; CHECK-BE-NEXT:  .LBB1_1: # %entry
@@ -69,6 +72,7 @@ define i8 @f1() #0 "stack-probe-size"="0" {
 ; CHECK-BE-NEXT:    stdu r12, -16(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB1_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-BE-NEXT:    .cfi_def_cfa_offset 4160
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
@@ -79,6 +83,7 @@ define i8 @f1() #0 "stack-probe-size"="0" {
 ; CHECK-32-LABEL: f1:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-32-NEXT:    li r0, 257
 ; CHECK-32-NEXT:    mtctr r0
 ; CHECK-32-NEXT:  .LBB1_1: # %entry
@@ -86,6 +91,7 @@ define i8 @f1() #0 "stack-probe-size"="0" {
 ; CHECK-32-NEXT:    stwu r12, -16(r1)
 ; CHECK-32-NEXT:    bdnz .LBB1_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-32-NEXT:    sub r0, r1, r12
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 4112
@@ -106,6 +112,7 @@ define i8 @f2() #0 {
 ; CHECK-LE-LABEL: f2:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-LE-NEXT:    stdu r12, -48(r1)
 ; CHECK-LE-NEXT:    li r0, 16
 ; CHECK-LE-NEXT:    mtctr r0
@@ -114,6 +121,7 @@ define i8 @f2() #0 {
 ; CHECK-LE-NEXT:    stdu r12, -4096(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB2_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-LE-NEXT:    .cfi_def_cfa_offset 65584
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
@@ -124,6 +132,7 @@ define i8 @f2() #0 {
 ; CHECK-BE-LABEL: f2:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-BE-NEXT:    stdu r12, -64(r1)
 ; CHECK-BE-NEXT:    li r0, 16
 ; CHECK-BE-NEXT:    mtctr r0
@@ -132,6 +141,7 @@ define i8 @f2() #0 {
 ; CHECK-BE-NEXT:    stdu r12, -4096(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB2_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-BE-NEXT:    .cfi_def_cfa_offset 65600
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
@@ -142,6 +152,7 @@ define i8 @f2() #0 {
 ; CHECK-32-LABEL: f2:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-32-NEXT:    stwu r12, -16(r1)
 ; CHECK-32-NEXT:    li r0, 16
 ; CHECK-32-NEXT:    mtctr r0
@@ -150,6 +161,7 @@ define i8 @f2() #0 {
 ; CHECK-32-NEXT:    stwu r12, -4096(r1)
 ; CHECK-32-NEXT:    bdnz .LBB2_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-32-NEXT:    sub r0, r1, r12
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 65552
@@ -173,9 +185,11 @@ define i8 @f3() #0 "stack-probe-size"="32768" {
 ; CHECK-LE-LABEL: f3:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-LE-NEXT:    stdu r12, -48(r1)
 ; CHECK-LE-NEXT:    stdu r12, -32768(r1)
 ; CHECK-LE-NEXT:    stdu r12, -32768(r1)
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-LE-NEXT:    .cfi_def_cfa_offset 65584
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
@@ -186,9 +200,11 @@ define i8 @f3() #0 "stack-probe-size"="32768" {
 ; CHECK-BE-LABEL: f3:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-BE-NEXT:    stdu r12, -64(r1)
 ; CHECK-BE-NEXT:    stdu r12, -32768(r1)
 ; CHECK-BE-NEXT:    stdu r12, -32768(r1)
+; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-BE-NEXT:    .cfi_def_cfa_offset 65600
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
@@ -199,9 +215,11 @@ define i8 @f3() #0 "stack-probe-size"="32768" {
 ; CHECK-32-LABEL: f3:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-32-NEXT:    stwu r12, -16(r1)
 ; CHECK-32-NEXT:    stwu r12, -32768(r1)
 ; CHECK-32-NEXT:    stwu r12, -32768(r1)
+; CHECK-32-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-32-NEXT:    sub r0, r1, r12
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 65552
@@ -274,6 +292,7 @@ define i8 @f5() #0 "stack-probe-size"="65536" {
 ; CHECK-LE-LABEL: f5:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-LE-NEXT:    stdu r12, -48(r1)
 ; CHECK-LE-NEXT:    li r0, 16
 ; CHECK-LE-NEXT:    mtctr r0
@@ -284,6 +303,7 @@ define i8 @f5() #0 "stack-probe-size"="65536" {
 ; CHECK-LE-NEXT:    stdux r12, r1, r0
 ; CHECK-LE-NEXT:    bdnz .LBB5_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-LE-NEXT:    .cfi_def_cfa_offset 1048624
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
@@ -294,6 +314,7 @@ define i8 @f5() #0 "stack-probe-size"="65536" {
 ; CHECK-BE-LABEL: f5:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-BE-NEXT:    stdu r12, -64(r1)
 ; CHECK-BE-NEXT:    li r0, 16
 ; CHECK-BE-NEXT:    mtctr r0
@@ -304,6 +325,7 @@ define i8 @f5() #0 "stack-probe-size"="65536" {
 ; CHECK-BE-NEXT:    stdux r12, r1, r0
 ; CHECK-BE-NEXT:    bdnz .LBB5_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-BE-NEXT:    .cfi_def_cfa_offset 1048640
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
@@ -314,6 +336,7 @@ define i8 @f5() #0 "stack-probe-size"="65536" {
 ; CHECK-32-LABEL: f5:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-32-NEXT:    stwu r12, -16(r1)
 ; CHECK-32-NEXT:    li r0, 16
 ; CHECK-32-NEXT:    mtctr r0
@@ -324,6 +347,7 @@ define i8 @f5() #0 "stack-probe-size"="65536" {
 ; CHECK-32-NEXT:    stwux r12, r1, r0
 ; CHECK-32-NEXT:    bdnz .LBB5_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-32-NEXT:    sub r0, r1, r12
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 1048592
@@ -347,6 +371,7 @@ define i8 @f6() #0 {
 ; CHECK-LE-LABEL: f6:
 ; CHECK-LE:       # %bb.0: # %entry
 ; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-LE-NEXT:    stdu r12, -48(r1)
 ; CHECK-LE-NEXT:    lis r0, 4
 ; CHECK-LE-NEXT:    nop
@@ -356,6 +381,7 @@ define i8 @f6() #0 {
 ; CHECK-LE-NEXT:    stdu r12, -4096(r1)
 ; CHECK-LE-NEXT:    bdnz .LBB6_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-LE-NEXT:    .cfi_def_cfa_offset 1073741872
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 48(r1)
@@ -366,6 +392,7 @@ define i8 @f6() #0 {
 ; CHECK-BE-LABEL: f6:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-BE-NEXT:    stdu r12, -64(r1)
 ; CHECK-BE-NEXT:    lis r0, 4
 ; CHECK-BE-NEXT:    nop
@@ -375,6 +402,7 @@ define i8 @f6() #0 {
 ; CHECK-BE-NEXT:    stdu r12, -4096(r1)
 ; CHECK-BE-NEXT:    bdnz .LBB6_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-BE-NEXT:    .cfi_def_cfa_offset 1073741888
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 64(r1)
@@ -385,6 +413,7 @@ define i8 @f6() #0 {
 ; CHECK-32-LABEL: f6:
 ; CHECK-32:       # %bb.0: # %entry
 ; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
 ; CHECK-32-NEXT:    stwu r12, -16(r1)
 ; CHECK-32-NEXT:    lis r0, 4
 ; CHECK-32-NEXT:    nop
@@ -394,6 +423,7 @@ define i8 @f6() #0 {
 ; CHECK-32-NEXT:    stwu r12, -4096(r1)
 ; CHECK-32-NEXT:    bdnz .LBB6_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-32-NEXT:    sub r0, r1, r12
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 1073741840
@@ -416,8 +446,9 @@ entry:
 define i8 @f7() #0 "stack-probe-size"="65536" {
 ; CHECK-LE-LABEL: f7:
 ; CHECK-LE:       # %bb.0: # %entry
-; CHECK-LE-NEXT:    lis r0, -1
 ; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
+; CHECK-LE-NEXT:    lis r0, -1
 ; CHECK-LE-NEXT:    ori r0, r0, 13776
 ; CHECK-LE-NEXT:    stdux r12, r1, r0
 ; CHECK-LE-NEXT:    li r0, 15258
@@ -429,6 +460,7 @@ define i8 @f7() #0 "stack-probe-size"="65536" {
 ; CHECK-LE-NEXT:    stdux r12, r1, r0
 ; CHECK-LE-NEXT:    bdnz .LBB7_1
 ; CHECK-LE-NEXT:  # %bb.2: # %entry
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-LE-NEXT:    .cfi_def_cfa_offset 1000000048
 ; CHECK-LE-NEXT:    li r3, 3
 ; CHECK-LE-NEXT:    stb r3, 41(r1)
@@ -438,8 +470,9 @@ define i8 @f7() #0 "stack-probe-size"="65536" {
 ;
 ; CHECK-BE-LABEL: f7:
 ; CHECK-BE:       # %bb.0: # %entry
-; CHECK-BE-NEXT:    lis r0, -1
 ; CHECK-BE-NEXT:    mr r12, r1
+; CHECK-BE-NEXT:    .cfi_def_cfa r12, 0
+; CHECK-BE-NEXT:    lis r0, -1
 ; CHECK-BE-NEXT:    ori r0, r0, 13760
 ; CHECK-BE-NEXT:    stdux r12, r1, r0
 ; CHECK-BE-NEXT:    li r0, 15258
@@ -451,6 +484,7 @@ define i8 @f7() #0 "stack-probe-size"="65536" {
 ; CHECK-BE-NEXT:    stdux r12, r1, r0
 ; CHECK-BE-NEXT:    bdnz .LBB7_1
 ; CHECK-BE-NEXT:  # %bb.2: # %entry
+; CHECK-BE-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-BE-NEXT:    .cfi_def_cfa_offset 1000000064
 ; CHECK-BE-NEXT:    li r3, 3
 ; CHECK-BE-NEXT:    stb r3, 57(r1)
@@ -460,8 +494,9 @@ define i8 @f7() #0 "stack-probe-size"="65536" {
 ;
 ; CHECK-32-LABEL: f7:
 ; CHECK-32:       # %bb.0: # %entry
-; CHECK-32-NEXT:    lis r0, -1
 ; CHECK-32-NEXT:    mr r12, r1
+; CHECK-32-NEXT:    .cfi_def_cfa r12, 0
+; CHECK-32-NEXT:    lis r0, -1
 ; CHECK-32-NEXT:    ori r0, r0, 13808
 ; CHECK-32-NEXT:    stwux r12, r1, r0
 ; CHECK-32-NEXT:    li r0, 15258
@@ -473,6 +508,7 @@ define i8 @f7() #0 "stack-probe-size"="65536" {
 ; CHECK-32-NEXT:    stwux r12, r1, r0
 ; CHECK-32-NEXT:    bdnz .LBB7_1
 ; CHECK-32-NEXT:  # %bb.2: # %entry
+; CHECK-32-NEXT:    .cfi_def_cfa_register r1
 ; CHECK-32-NEXT:    sub r0, r1, r12
 ; CHECK-32-NEXT:    sub r0, r1, r0
 ; CHECK-32-NEXT:    .cfi_def_cfa_offset 1000000016

From 830a7c2ad41d147c536f1b20061fe94ca978f079 Mon Sep 17 00:00:00 2001
From: Xing GUO <higuoxing@gmail.com>
Date: Tue, 14 Jul 2020 10:55:34 +0800
Subject: [PATCH 197/771] [DWARFYAML] Replace Is64bit with Is64BitAddrSize.
 NFC.

Is64bit is ambiguous. In this patch, we replace it with Is64BitAddrSize
to make it clearer.
---
 llvm/include/llvm/ObjectYAML/DWARFYAML.h | 2 +-
 llvm/lib/ObjectYAML/DWARFEmitter.cpp     | 4 ++--
 llvm/lib/ObjectYAML/ELFYAML.cpp          | 2 +-
 llvm/lib/ObjectYAML/MachOYAML.cpp        | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h
index 259152ff5d035..9f62a4a2be570 100644
--- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h
@@ -179,7 +179,7 @@ struct AddrTableEntry {
 
 struct Data {
   bool IsLittleEndian;
-  bool Is64bit;
+  bool Is64BitAddrSize;
   std::vector<Abbrev> AbbrevDecls;
   std::vector<StringRef> DebugStrings;
   std::vector<ARange> ARanges;
diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp
index a8b467af7b2d5..ed3732ba29f6c 100644
--- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp
@@ -168,7 +168,7 @@ Error DWARFYAML::emitDebugRanges(raw_ostream &OS, const DWARFYAML::Data &DI) {
     if (DebugRanges.AddrSize)
       AddrSize = *DebugRanges.AddrSize;
     else
-      AddrSize = DI.Is64bit ? 8 : 4;
+      AddrSize = DI.Is64BitAddrSize ? 8 : 4;
     for (auto Entry : DebugRanges.Entries) {
       if (Error Err = writeVariableSizedInteger(Entry.LowOffset, AddrSize, OS,
                                                 DI.IsLittleEndian))
@@ -381,7 +381,7 @@ Error DWARFYAML::emitDebugAddr(raw_ostream &OS, const Data &DI) {
     if (TableEntry.AddrSize)
       AddrSize = *TableEntry.AddrSize;
     else
-      AddrSize = DI.Is64bit ? 8 : 4;
+      AddrSize = DI.Is64BitAddrSize ? 8 : 4;
 
     uint64_t Length;
     if (TableEntry.Length)
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 51ca5dd726216..3de1ae006ce4a 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -1681,7 +1681,7 @@ void MappingTraits<ELFYAML::Object>::mapping(IO &IO, ELFYAML::Object &Object) {
   if (Object.DWARF) {
     Object.DWARF->IsLittleEndian =
         Object.Header.Data == ELFYAML::ELF_ELFDATA(ELF::ELFDATA2LSB);
-    Object.DWARF->Is64bit =
+    Object.DWARF->Is64BitAddrSize =
         Object.Header.Class == ELFYAML::ELF_ELFCLASS(ELF::ELFCLASS64);
   }
   IO.setContext(nullptr);
diff --git a/llvm/lib/ObjectYAML/MachOYAML.cpp b/llvm/lib/ObjectYAML/MachOYAML.cpp
index cd9ca76f5d2f0..86aad0233767e 100644
--- a/llvm/lib/ObjectYAML/MachOYAML.cpp
+++ b/llvm/lib/ObjectYAML/MachOYAML.cpp
@@ -107,8 +107,8 @@ void MappingTraits<MachOYAML::Object>::mapping(IO &IO,
   Object.DWARF.IsLittleEndian = Object.IsLittleEndian;
 
   IO.mapRequired("FileHeader", Object.Header);
-  Object.DWARF.Is64bit = Object.Header.magic == MachO::MH_MAGIC_64 ||
-                         Object.Header.magic == MachO::MH_CIGAM_64;
+  Object.DWARF.Is64BitAddrSize = Object.Header.magic == MachO::MH_MAGIC_64 ||
+                                 Object.Header.magic == MachO::MH_CIGAM_64;
   IO.mapOptional("LoadCommands", Object.LoadCommands);
   if(!Object.LinkEdit.isEmpty() || !IO.outputting())
     IO.mapOptional("LinkEditData", Object.LinkEdit);

From 64eb3a4915f00cca9af4c305a9ff36209003cd7b Mon Sep 17 00:00:00 2001
From: Amara Emerson <aemerson@apple.com>
Date: Wed, 17 Jun 2020 11:55:53 -0700
Subject: [PATCH 198/771] [AArch64][GlobalISel] Add post-legalize combine for
 sext_inreg(trunc(sextload)) -> copy

On AArch64 we generate redundant G_SEXTs or G_SEXT_INREGs because of this.

Differential Revision: https://reviews.llvm.org/D81993
---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  3 +
 .../include/llvm/Target/GlobalISel/Combine.td |  6 ++
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 18 +++++
 .../lib/CodeGen/GlobalISel/GISelKnownBits.cpp | 11 +++
 llvm/lib/Target/AArch64/AArch64Combine.td     |  2 +-
 .../combine-sext-trunc-sextload.mir           | 81 +++++++++++++++++++
 6 files changed, 120 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 43a8cb2a1d51c..c317b7ed4c54b 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -107,6 +107,9 @@ class CombinerHelper {
   bool matchCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
   void applyCombineIndexedLoadStore(MachineInstr &MI, IndexedLoadStoreMatchInfo &MatchInfo);
 
+  bool matchSextAlreadyExtended(MachineInstr &MI);
+  bool applySextAlreadyExtended(MachineInstr &MI);
+
   bool matchElideBrByInvertingCond(MachineInstr &MI);
   void applyElideBrByInvertingCond(MachineInstr &MI);
   bool tryElideBrByInvertingCond(MachineInstr &MI);
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index eeb2761faeb9f..1dd3e374b5245 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -125,6 +125,12 @@ def extending_loads : GICombineRule<
   (apply [{ Helper.applyCombineExtendingLoads(*${root}, ${matchinfo}); }])>;
 def combines_for_extload: GICombineGroup<[extending_loads]>;
 
+def sext_already_extended : GICombineRule<
+  (defs root:$d),
+  (match (wip_match_opcode G_SEXT_INREG):$d,
+         [{ return Helper.matchSextAlreadyExtended(*${d}); }]),
+  (apply [{ Helper.applySextAlreadyExtended(*${d}); }])>;
+
 def combine_indexed_load_store : GICombineRule<
   (defs root:$root, indexed_load_store_matchdata:$matchinfo),
   (match (wip_match_opcode G_LOAD, G_SEXTLOAD, G_ZEXTLOAD, G_STORE):$root,
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index 32bad28d318ba..194961ae3b216 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -576,6 +576,24 @@ bool CombinerHelper::dominates(const MachineInstr &DefMI,
   return isPredecessor(DefMI, UseMI);
 }
 
+bool CombinerHelper::matchSextAlreadyExtended(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+  Register SrcReg = MI.getOperand(1).getReg();
+  unsigned SrcSignBits = KB->computeNumSignBits(SrcReg);
+  unsigned NumSextBits =
+      MRI.getType(MI.getOperand(0).getReg()).getScalarSizeInBits() -
+      MI.getOperand(2).getImm();
+  return SrcSignBits >= NumSextBits;
+}
+
+bool CombinerHelper::applySextAlreadyExtended(MachineInstr &MI) {
+  assert(MI.getOpcode() == TargetOpcode::G_SEXT_INREG);
+  MachineIRBuilder MIB(MI);
+  MIB.buildCopy(MI.getOperand(0).getReg(), MI.getOperand(1).getReg());
+  MI.eraseFromParent();
+  return true;
+}
+
 bool CombinerHelper::findPostIndexCandidate(MachineInstr &MI, Register &Addr,
                                             Register &Base, Register &Offset) {
   auto &MF = *MI.getParent()->getParent();
diff --git a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
index e5d77b0eb8578..0e9c6e4fab9f9 100644
--- a/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/GISelKnownBits.cpp
@@ -11,6 +11,7 @@
 //
 //===------------------
 #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/CodeGen/GlobalISel/Utils.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -441,6 +442,16 @@ unsigned GISelKnownBits::computeNumSignBits(Register R,
     unsigned Tmp = DstTy.getScalarSizeInBits() - SrcTy.getScalarSizeInBits();
     return computeNumSignBits(Src, DemandedElts, Depth + 1) + Tmp;
   }
+  case TargetOpcode::G_SEXTLOAD: {
+    Register Dst = MI.getOperand(0).getReg();
+    LLT Ty = MRI.getType(Dst);
+    // TODO: add vector support
+    if (Ty.isVector())
+      break;
+    if (MI.hasOneMemOperand())
+      return Ty.getSizeInBits() - (*MI.memoperands_begin())->getSizeInBits();
+    break;
+  }
   case TargetOpcode::G_TRUNC: {
     Register Src = MI.getOperand(1).getReg();
     LLT SrcTy = MRI.getType(Src);
diff --git a/llvm/lib/Target/AArch64/AArch64Combine.td b/llvm/lib/Target/AArch64/AArch64Combine.td
index 1e39db5a984a6..aa41cae289e8b 100644
--- a/llvm/lib/Target/AArch64/AArch64Combine.td
+++ b/llvm/lib/Target/AArch64/AArch64Combine.td
@@ -79,6 +79,6 @@ def shuffle_vector_pseudos : GICombineGroup<[dup, rev, ext, zip, uzp, trn]>;
 def AArch64PostLegalizerCombinerHelper
     : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
                        [erase_undef_store, combines_for_extload,
-                        shuffle_vector_pseudos]> {
+                        sext_already_extended, shuffle_vector_pseudos]> {
   let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 }
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir
new file mode 100644
index 0000000000000..483547ac0511c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-sext-trunc-sextload.mir
@@ -0,0 +1,81 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
+---
+name:            test_combine_sext_trunc_of_sextload
+legalized:       true
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x0
+    ; CHECK-LABEL: name: test_combine_sext_trunc_of_sextload
+    ; CHECK: liveins: $x0
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s64) = G_SEXTLOAD [[COPY]](p0) :: (load 2)
+    ; CHECK: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[SEXTLOAD]](s64)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[TRUNC]](s32)
+    ; CHECK: $w0 = COPY [[COPY1]](s32)
+    %0:_(p0) = COPY $x0
+    %1:_(s64) = G_SEXTLOAD %0:_(p0) :: (load 2)
+    %2:_(s32) = G_TRUNC %1:_(s64)
+    %3:_(s32) = G_SEXT_INREG %2:_(s32), 16
+    $w0 = COPY %3(s32)
+...
+---
+name:            test_combine_sext_of_sextload
+legalized:       true
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x0
+    ; CHECK-LABEL: name: test_combine_sext_of_sextload
+    ; CHECK: liveins: $x0
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32)
+    ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY [[COPY1]](s32)
+    ; CHECK: $w0 = COPY [[COPY2]](s32)
+    %0:_(p0) = COPY $x0
+    %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2)
+    %2:_(s32) = COPY %1:_(s32)
+    %3:_(s32) = G_SEXT_INREG %2:_(s32), 16
+    $w0 = COPY %3(s32)
+...
+---
+name:            test_combine_sext_of_sextload_not_matching
+legalized:       true
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x0
+    ; Here we're trying to extend from a larger width than was extended in the load.
+    ; CHECK-LABEL: name: test_combine_sext_of_sextload_not_matching
+    ; CHECK: liveins: $x0
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2)
+    ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SEXTLOAD]](s32)
+    ; CHECK: $w0 = COPY [[COPY1]](s32)
+    %0:_(p0) = COPY $x0
+    %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2)
+    %2:_(s32) = G_SEXT_INREG %1:_(s32), 24
+    $w0 = COPY %2(s32)
+...
+---
+name:            test_combine_sext_of_sextload_not_enough_src_signbits
+legalized:       true
+tracksRegLiveness: true
+body: |
+  bb.0.entry:
+    liveins: $x0
+    ; Here we're trying to extend from a smaller width than was extended in the load.
+    ; Don't perform the combine.
+    ; CHECK-LABEL: name: test_combine_sext_of_sextload_not_enough_src_signbits
+    ; CHECK: liveins: $x0
+    ; CHECK: [[COPY:%[0-9]+]]:_(p0) = COPY $x0
+    ; CHECK: [[SEXTLOAD:%[0-9]+]]:_(s32) = G_SEXTLOAD [[COPY]](p0) :: (load 2)
+    ; CHECK: [[SEXT_INREG:%[0-9]+]]:_(s32) = G_SEXT_INREG [[SEXTLOAD]], 8
+    ; CHECK: $w0 = COPY [[SEXT_INREG]](s32)
+    %0:_(p0) = COPY $x0
+    %1:_(s32) = G_SEXTLOAD %0:_(p0) :: (load 2)
+    %2:_(s32) = G_SEXT_INREG %1:_(s32), 8
+    $w0 = COPY %2(s32)
+...

From 62f5ba624bfba5ccf4446737ad2bfb1fc013b376 Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1@ibm.com>
Date: Mon, 13 Jul 2020 17:37:02 -0500
Subject: [PATCH 199/771] [PowerPC][Power10] Implement Test LSB by Byte
 Builtins in LLVM/Clang

This patch implements builtins for the Test LSB by Byte instruction introduced
in Power10.

Differential Revision: https://reviews.llvm.org/D82431
---
 clang/include/clang/Basic/BuiltinsPPC.def     |  2 ++
 clang/lib/Headers/altivec.h                   | 14 ++++++++
 clang/test/CodeGen/builtins-ppc-p10vector.c   | 12 +++++++
 llvm/include/llvm/IR/IntrinsicsPowerPC.td     |  3 ++
 llvm/lib/Target/PowerPC/PPCInstrPrefix.td     | 28 +++++++++++++--
 .../CodeGen/PowerPC/builtins-ppc-p10vsx.ll    | 35 +++++++++++++++++++
 6 files changed, 92 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index d0df5fcd15523..6b291e6b08063 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -467,6 +467,8 @@ BUILTIN(__builtin_vsx_xxsldwi, "v.", "t")
 
 BUILTIN(__builtin_vsx_xxeval, "V2ULLiV2ULLiV2ULLiV2ULLiIi", "")
 
+BUILTIN(__builtin_vsx_xvtlsbb, "iV16Ucb", "")
+
 // P10 Vector Permute Extended built-in.
 BUILTIN(__builtin_vsx_xxpermx, "V16UcV16UcV16UcV16UcIi", "")
 
diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h
index 9a4009216930a..ac5f43836316e 100644
--- a/clang/lib/Headers/altivec.h
+++ b/clang/lib/Headers/altivec.h
@@ -17146,6 +17146,20 @@ vec_splati_ins(vector float __a, const unsigned int __b, const float __c) {
 #endif
   return __a;
 }
+
+/* vec_test_lsbb_all_ones */
+
+static __inline__ int __ATTRS_o_ai
+vec_test_lsbb_all_ones(vector unsigned char __a) {
+  return __builtin_vsx_xvtlsbb(__a, 1);
+}
+
+/* vec_test_lsbb_all_zeros */
+
+static __inline__ int __ATTRS_o_ai
+vec_test_lsbb_all_zeros(vector unsigned char __a) {
+  return __builtin_vsx_xvtlsbb(__a, 0);
+}
 #endif /* __VSX__ */
 #endif /* __POWER10_VECTOR__ */
 
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index 22b4e7a6f3ecf..c51c24f259862 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -581,3 +581,15 @@ vector float test_vec_vec_splati_ins_f(void) {
   // CHECK: ret <4 x float>
   return vec_splati_ins(vfa, 0, 1.0f);
 }
+
+int test_vec_test_lsbb_all_ones(void) {
+  // CHECK: @llvm.ppc.vsx.xvtlsbb(<16 x i8> %{{.+}}, i1 true
+  // CHECK-NEXT: ret i32
+  return vec_test_lsbb_all_ones(vuca);
+}
+
+int test_vec_test_lsbb_all_zeros(void) {
+  // CHECK: @llvm.ppc.vsx.xvtlsbb(<16 x i8> %{{.+}}, i1 false
+  // CHECK-NEXT: ret i32
+  return vec_test_lsbb_all_zeros(vuca);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 12f4a3ce8e28f..614a29049686a 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -1067,6 +1067,9 @@ def int_ppc_vsx_xxinsertw :
       PowerPC_VSX_Intrinsic<"xxinsertw",[llvm_v4i32_ty],
                             [llvm_v4i32_ty,llvm_v2i64_ty,llvm_i32_ty],
                             [IntrNoMem]>;
+def int_ppc_vsx_xvtlsbb :
+      PowerPC_VSX_Intrinsic<"xvtlsbb", [llvm_i32_ty],
+                            [llvm_v16i8_ty, llvm_i1_ty], [IntrNoMem]>;
 def int_ppc_vsx_xxeval :
       PowerPC_VSX_Intrinsic<"xxeval", [llvm_v2i64_ty],
                            [llvm_v2i64_ty, llvm_v2i64_ty,
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 2d12a72e29aee..2bab73418e10d 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -396,6 +396,25 @@ class 8RR_XX4Form_IMM3_XTABC6<bits<6> opcode, bits<2> xo, dag OOL, dag IOL,
   let Inst{63} = XT{5};
 }
 
+// [PO BF / XO2 B XO BX /]
+class XX2_BF3_XO5_XB6_XO9<bits<6> opcode, bits<5> xo2, bits<9> xo, dag OOL,
+                          dag IOL, string asmstr, InstrItinClass itin,
+                          list<dag> pattern>
+  : I<opcode, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+  bits<6> XB;
+
+  let Pattern = pattern;
+
+  let Inst{6-8}   = BF;
+  let Inst{9-10}  = 0;
+  let Inst{11-15} = xo2;
+  let Inst{16-20} = XB{4-0};
+  let Inst{21-29} = xo;
+  let Inst{30}    = XB{5};
+  let Inst{31}    = 0;
+}
+
 multiclass MLS_DForm_R_SI34_RTA5_MEM_p<bits<6> opcode, dag OOL, dag IOL,
                                        dag PCRel_IOL, string asmstr,
                                        InstrItinClass itin> {
@@ -943,6 +962,9 @@ let Predicates = [IsISA3_1] in {
                          [(set v16i8:$vD,
                                (int_ppc_altivec_vclrrb v16i8:$vA, i32:$rB))]>;
 
+  def XVTLSBB : XX2_BF3_XO5_XB6_XO9<60, 2, 475, (outs crrc:$BF), (ins vsrc:$XB),
+                                    "xvtlsbb $BF, $XB", IIC_VecGeneral, []>;
+
   // The XFormMemOp flag for the following 8 instructions is set on
   // the instruction format.
   let mayLoad = 1, mayStore = 0 in {
@@ -960,8 +982,6 @@ let Predicates = [IsISA3_1] in {
   }
 }
 
-
-
 //---------------------------- Anonymous Patterns ----------------------------//
 let Predicates = [IsISA3_1] in {
   def : Pat<(v16i8 (int_ppc_vsx_xxgenpcvbm v16i8:$VRB, imm:$IMM)),
@@ -972,6 +992,10 @@ let Predicates = [IsISA3_1] in {
             (v4i32 (COPY_TO_REGCLASS (XXGENPCVWM $VRB, imm:$IMM), VRRC))>;
   def : Pat<(v2i64 (int_ppc_vsx_xxgenpcvdm v2i64:$VRB, imm:$IMM)),
             (v2i64 (COPY_TO_REGCLASS (XXGENPCVDM $VRB, imm:$IMM), VRRC))>;
+  def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, -1)),
+            (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_lt)>;
+  def : Pat<(i32 (int_ppc_vsx_xvtlsbb v16i8:$XB, 0)),
+            (EXTRACT_SUBREG (XVTLSBB (COPY_TO_REGCLASS $XB, VSRC)), sub_eq)>;
 }
 
 let AddedComplexity = 400, Predicates = [PrefixInstrs] in {
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
new file mode 100644
index 0000000000000..d4e71d18c6ebb
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10vsx.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
+; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
+; RUN:   FileCheck %s
+
+; These test cases aims to test the builtins for the Power10 VSX vector
+; instructions introduced in ISA 3.1.
+
+declare i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8>, i1)
+
+define signext i32 @test_vec_test_lsbb_all_ones(<16 x i8> %vuca) {
+; CHECK-LABEL: test_vec_test_lsbb_all_ones:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtlsbb cr0, v2
+; CHECK-NEXT:    mfocrf r3, 128
+; CHECK-NEXT:    srwi r3, r3, 31
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i1 1)
+  ret i32 %0
+}
+
+define signext i32 @test_vec_test_lsbb_all_zeros(<16 x i8> %vuca) {
+; CHECK-LABEL: test_vec_test_lsbb_all_zeros:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xvtlsbb cr0, v2
+; CHECK-NEXT:    mfocrf r3, 128
+; CHECK-NEXT:    rlwinm r3, r3, 3, 31, 31
+; CHECK-NEXT:    extsw r3, r3
+; CHECK-NEXT:    blr
+entry:
+  %0 = tail call i32 @llvm.ppc.vsx.xvtlsbb(<16 x i8> %vuca, i1 0)
+  ret i32 %0
+}

From 74c14202d90b46dda64a2542602855727b7d7f60 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Tue, 14 Jul 2020 12:22:46 +0900
Subject: [PATCH 200/771] [AMDGPU] Propagate dead flag during pre-RA exec mask
 optimizations

Preserve SCC dead flags in SIOptimizeExecMaskingPreRA.
This helps with removing redundant s_andn2 instructions later.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D83637
---
 .../AMDGPU/SIOptimizeExecMaskingPreRA.cpp     |  5 +++++
 .../AMDGPU/optimize-exec-masking-pre-ra.mir   |  2 +-
 ...imize-negated-cond-exec-masking-wave32.mir | 16 +++++++--------
 .../optimize-negated-cond-exec-masking.mir    | 20 +++++++++----------
 4 files changed, 24 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
index 7eb1ec941dbd7..8af00fcf62a82 100644
--- a/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIOptimizeExecMaskingPreRA.cpp
@@ -168,6 +168,11 @@ static unsigned optimizeVcndVcmpPair(MachineBasicBlock &MBB,
               And->getOperand(0).getReg())
           .addReg(ExecReg)
           .addReg(CCReg, getUndefRegState(CC->isUndef()), CC->getSubReg());
+  MachineOperand &AndSCC = And->getOperand(3);
+  assert(AndSCC.getReg() == AMDGPU::SCC);
+  MachineOperand &Andn2SCC = Andn2->getOperand(3);
+  assert(Andn2SCC.getReg() == AMDGPU::SCC);
+  Andn2SCC.setIsDead(AndSCC.isDead());
   And->eraseFromParent();
   LIS->InsertMachineInstrInMaps(*Andn2);
 
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
index 4a69057b1f10a..e5a7421dbd5de 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-exec-masking-pre-ra.mir
@@ -104,7 +104,7 @@ body:             |
   ; GCN-LABEL: name: cndmask_cmp_cbranch_fold_undef
   ; GCN: bb.0:
   ; GCN:   successors: %bb.1(0x80000000)
-  ; GCN:   $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def $scc
+  ; GCN:   $vcc = S_ANDN2_B64 $exec, undef %1:sreg_64_xexec, implicit-def dead $scc
   ; GCN:   S_CBRANCH_VCCZ %bb.1, implicit $vcc
   ; GCN: bb.1:
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir
index 91bb625ddad7c..7da54744b8c43 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking-wave32.mir
@@ -2,7 +2,7 @@
 
 # GCN: name: negated_cond_vop2
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
-# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc
+# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop2
@@ -24,7 +24,7 @@ body:             |
 
 # GCN: name: negated_cond_vop3
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
-# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc
+# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop3
@@ -116,7 +116,7 @@ body:             |
 
 # GCN: name: negated_cond_vop3_imp_vcc
 # GCN:      $vcc_lo = IMPLICIT_DEF
-# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def $scc
+# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop3_imp_vcc
@@ -138,7 +138,7 @@ body:             |
 
 # GCN: name: negated_cond_vop2_imp_vcc
 # GCN:      $vcc_lo = IMPLICIT_DEF
-# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def $scc
+# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, $vcc_lo, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop2_imp_vcc
@@ -187,7 +187,7 @@ body:             |
 # GCN: name: negated_cond_vop2_used_sel
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
-# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc
+# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop2_used_sel
@@ -213,7 +213,7 @@ body:             |
 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
 # GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
 # GCN-NEXT: $sgpr0_sgpr1 = COPY $vcc
-# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc
+# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop2_used_vcc
@@ -289,7 +289,7 @@ body:             |
 # GCN: name: negated_cond_vop3_sel_right_subreg1
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
-# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc
+# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop3_sel_right_subreg1
@@ -313,7 +313,7 @@ body:             |
 # GCN: name: negated_cond_vop3_sel_right_subreg2
 # GCN:      %0:sgpr_32 = IMPLICIT_DEF
 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
-# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def $scc
+# GCN-NEXT: $vcc_lo = S_ANDN2_B32 $exec_lo, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop3_sel_right_subreg2
diff --git a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir
index c70474bf8c390..24e1ec81cb3ad 100644
--- a/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir
+++ b/llvm/test/CodeGen/AMDGPU/optimize-negated-cond-exec-masking.mir
@@ -2,7 +2,7 @@
 
 # GCN: name: negated_cond_vop2
 # GCN:      %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
+# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop2
@@ -24,7 +24,7 @@ body:             |
 
 # GCN: name: negated_cond_vop3
 # GCN:      %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
+# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop3
@@ -142,7 +142,7 @@ body:             |
 
 # GCN: name: negated_cond_vop3_imp_vcc
 # GCN:      $vcc = IMPLICIT_DEF
-# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def $scc
+# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop3_imp_vcc
@@ -164,7 +164,7 @@ body:             |
 
 # GCN: name: negated_cond_vop2_imp_vcc
 # GCN:      $vcc = IMPLICIT_DEF
-# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def $scc
+# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, $vcc, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop2_imp_vcc
@@ -213,7 +213,7 @@ body:             |
 # GCN: name: negated_cond_vop2_used_sel
 # GCN:      %0:sreg_64_xexec = IMPLICIT_DEF
 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
-# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
+# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop2_used_sel
@@ -239,7 +239,7 @@ body:             |
 # GCN-NEXT: %1:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %0, implicit $exec
 # GCN-NEXT: V_CMP_NE_U32_e32 1, %1, implicit-def $vcc, implicit $exec
 # GCN-NEXT: $sgpr0_sgpr1 = COPY $vcc
-# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
+# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop2_used_vcc
@@ -315,7 +315,7 @@ body:             |
 # GCN: name: negated_cond_vop3_sel_right_subreg1
 # GCN:      %0:sreg_64_xexec = IMPLICIT_DEF
 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
-# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
+# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop3_sel_right_subreg1
@@ -339,7 +339,7 @@ body:             |
 # GCN: name: negated_cond_vop3_sel_right_subreg2
 # GCN:      %0:sreg_64_xexec = IMPLICIT_DEF
 # GCN-NEXT: %1.sub1:vreg_64 = IMPLICIT_DEF
-# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
+# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_vop3_sel_right_subreg2
@@ -388,7 +388,7 @@ body:             |
 
 # GCN: name: negated_cond_vop2_dominated_blocks
 # GCN:      %0:sreg_64_xexec = IMPLICIT_DEF
-# GCN:      $vcc = S_ANDN2_B64 $exec, %0, implicit-def $scc
+# GCN:      $vcc = S_ANDN2_B64 $exec, %0, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.3, implicit $vcc
 ---
 name:            negated_cond_vop2_dominated_blocks
@@ -466,7 +466,7 @@ body:             |
 
 # GCN: name: negated_cond_subreg
 # GCN:      %0.sub0_sub1:sgpr_128 = IMPLICIT_DEF
-# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0.sub0_sub1, implicit-def $scc
+# GCN-NEXT: $vcc = S_ANDN2_B64 $exec, %0.sub0_sub1, implicit-def dead $scc
 # GCN-NEXT: S_CBRANCH_VCCNZ %bb.2, implicit $vcc
 ---
 name:            negated_cond_subreg

From 1af8c93bab4b5ccf64fa64406f5e5712c1d54998 Mon Sep 17 00:00:00 2001
From: Djordje Todorovic <djordje.todorovic@syrmia.com>
Date: Tue, 14 Jul 2020 08:49:01 +0200
Subject: [PATCH 201/771] [deadargelim] Attach dbg info to the
 insert/extractvalue instructions

Attach DbgLoc on insertvalue/extractvalue instructions created by
DeadArgumentElimination.

This fixes the PR46350.

Differential Revision: https://reviews.llvm.org/D81939
---
 .../IPO/DeadArgumentElimination.cpp           | 17 +++---
 .../X86/dbgloc-insert-extract-val-instrs.ll   | 57 +++++++++++++++++++
 2 files changed, 67 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/DebugInfo/X86/dbgloc-insert-extract-val-instrs.ll

diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 09a18b7b5c237..54c51b6e7161b 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -25,12 +25,14 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Function.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Instruction.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Intrinsics.h"
 #include "llvm/IR/Module.h"
+#include "llvm/IR/NoFolder.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/IR/Type.h"
 #include "llvm/IR/Use.h"
@@ -967,16 +969,16 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
         for (unsigned Ri = 0; Ri != RetCount; ++Ri)
           if (NewRetIdxs[Ri] != -1) {
             Value *V;
+            IRBuilder<NoFolder> IRB(InsertPt);
             if (RetTypes.size() > 1)
               // We are still returning a struct, so extract the value from our
               // return value
-              V = ExtractValueInst::Create(NewCB, NewRetIdxs[Ri], "newret",
-                                           InsertPt);
+              V = IRB.CreateExtractValue(NewCB, NewRetIdxs[Ri], "newret");
             else
               // We are now returning a single element, so just insert that
               V = NewCB;
             // Insert the value at the old position
-            RetVal = InsertValueInst::Create(RetVal, V, Ri, "oldret", InsertPt);
+            RetVal = IRB.CreateInsertValue(RetVal, V, Ri, "oldret");
           }
         // Now, replace all uses of the old call instruction with the return
         // struct we built
@@ -1019,6 +1021,7 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
   if (F->getReturnType() != NF->getReturnType())
     for (BasicBlock &BB : *NF)
       if (ReturnInst *RI = dyn_cast<ReturnInst>(BB.getTerminator())) {
+        IRBuilder<NoFolder> IRB(RI);
         Value *RetVal = nullptr;
 
         if (!NFTy->getReturnType()->isVoidTy()) {
@@ -1033,14 +1036,14 @@ bool DeadArgumentEliminationPass::RemoveDeadStuffFromFunction(Function *F) {
           RetVal = UndefValue::get(NRetTy);
           for (unsigned RetI = 0; RetI != RetCount; ++RetI)
             if (NewRetIdxs[RetI] != -1) {
-              ExtractValueInst *EV =
-                  ExtractValueInst::Create(OldRet, RetI, "oldret", RI);
+              Value *EV = IRB.CreateExtractValue(OldRet, RetI, "oldret");
+
               if (RetTypes.size() > 1) {
                 // We're still returning a struct, so reinsert the value into
                 // our new return value at the new index
 
-                RetVal = InsertValueInst::Create(RetVal, EV, NewRetIdxs[RetI],
-                                                 "newret", RI);
+                RetVal = IRB.CreateInsertValue(RetVal, EV, NewRetIdxs[RetI],
+                                               "newret");
               } else {
                 // We are now only returning a simple value, so just return the
                 // extracted value.
diff --git a/llvm/test/DebugInfo/X86/dbgloc-insert-extract-val-instrs.ll b/llvm/test/DebugInfo/X86/dbgloc-insert-extract-val-instrs.ll
new file mode 100644
index 0000000000000..cbbc830e61ac9
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/dbgloc-insert-extract-val-instrs.ll
@@ -0,0 +1,57 @@
+;; Check that every instruction inserted by -deadargelim has a debug location.
+;; The test was generated by using -debugify option.
+
+; RUN: opt < %s -deadargelim -S 2>&1 | FileCheck %s
+
+; CHECK-LABEL: fn
+; CHECK: %oldret = extractvalue { i32, i32, i16 } %z, 0, !dbg ![[LOC:.*]]
+; CHECK: %newret = insertvalue { i32, i32 } undef, i32 %oldret, 0, !dbg ![[LOC:.*]]
+; CHECK: %oldret1 = extractvalue { i32, i32, i16 } %z, 1, !dbg ![[LOC:.*]]
+; CHECK: %newret2 = insertvalue { i32, i32 } %newret, i32 %oldret1, 1, !dbg ![[LOC:.*]]
+
+; CHECK-LABEL: fn1
+; CHECK: %newret = extractvalue { i32, i32 } %ret, 0, !dbg ![[LOC2:.*]]
+; CHECK: %oldret = insertvalue { i32, i32, i16 } undef, i32 %newret, 0, !dbg ![[LOC2:.*]]
+; CHECK: %newret1 = extractvalue { i32, i32 } %ret, 1, !dbg ![[LOC2:.*]]
+; CHECK: %oldret2 = insertvalue { i32, i32, i16 } %oldret, i32 %newret1, 1, !dbg ![[LOC2:.*]]
+
+; ModuleID = 'test.ll'
+source_filename = "test.ll"
+
+define internal { i32, i32, i16 } @fn() !dbg !6 {
+  %x = insertvalue { i32, i32, i16 } undef, i32 1, 0, !dbg !8
+  %y = insertvalue { i32, i32, i16 } %x, i32 2, 1, !dbg !9
+  %z = insertvalue { i32, i32, i16 } %y, i16 3, 2, !dbg !10
+  ret { i32, i32, i16 } %z, !dbg !11
+}
+
+define i32 @fn1() !dbg !12 {
+  %ret = call { i32, i32, i16 } @fn(), !dbg !13
+  %b = extractvalue { i32, i32, i16 } %ret, 0, !dbg !14
+  %c = extractvalue { i32, i32, i16 } %ret, 1, !dbg !15
+  %d = add i32 %b, %c, !dbg !16
+  ret i32 %d, !dbg !17
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "debugify", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "test.ll", directory: "/")
+!2 = !{}
+!5 = !{i32 2, !"Debug Info Version", i32 3}
+!6 = distinct !DISubprogram(name: "fn", linkageName: "fn", scope: null, file: !1, line: 1, type: !7, scopeLine: 1, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 1, column: 1, scope: !6)
+!9 = !DILocation(line: 2, column: 1, scope: !6)
+!10 = !DILocation(line: 3, column: 1, scope: !6)
+!11 = !DILocation(line: 4, column: 1, scope: !6)
+!12 = distinct !DISubprogram(name: "fn1", linkageName: "fn1", scope: null, file: !1, line: 5, type: !7, scopeLine: 5, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !2)
+!13 = !DILocation(line: 5, column: 1, scope: !12)
+!14 = !DILocation(line: 6, column: 1, scope: !12)
+!15 = !DILocation(line: 7, column: 1, scope: !12)
+!16 = !DILocation(line: 8, column: 1, scope: !12)
+!17 = !DILocation(line: 9, column: 1, scope: !12)
+
+; CHECK: ![[LOC]] = !DILocation(line: 4
+; CHECK: ![[LOC2]] = !DILocation(line: 5

From e5f022cad99abb39d4f66b151a4a31a4ac063aff Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Tue, 14 Jul 2020 15:31:10 +0900
Subject: [PATCH 202/771] [AMDGPU][NFC] Tidy sgpr-control-flow.ll whitespace

Pre-commit clean up for D83641.
---
 llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll | 52 +++++++++----------
 1 file changed, 26 insertions(+), 26 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index 596225fac66aa..a72af066a9c9b 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -11,32 +11,32 @@
 define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 ; SI-LABEL: sgpr_if_else_salu_br:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:	s_load_dwordx4 s[4:7], s[0:1], 0xb
-; SI-NEXT:	s_load_dword s2, s[0:1], 0xf
-; SI-NEXT:	s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT:	s_waitcnt lgkmcnt(0)
-; SI-NEXT:	s_cmp_lg_u32 s4, 0
-; SI-NEXT:	s_cbranch_scc0 BB0_2
-; SI-NEXT:; %bb.1:                                ; %else
-; SI-NEXT:	s_add_i32 s2, s7, s2
-; SI-NEXT:	s_mov_b64 s[8:9], 0
-; SI-NEXT:	s_andn2_b64 vcc, exec, s[8:9]
-; SI-NEXT:	s_cbranch_vccz BB0_3
-; SI-NEXT:	s_branch BB0_4
-; SI-NEXT:BB0_2:
-; SI-NEXT:	s_mov_b64 s[8:9], -1
-; SI-NEXT:                                        ; implicit-def: $sgpr2
-; SI-NEXT:	s_andn2_b64 vcc, exec, s[8:9]
-; SI-NEXT:	s_cbranch_vccnz BB0_4
-; SI-NEXT:BB0_3:                                  ; %if
-; SI-NEXT:	s_sub_i32 s2, s5, s6
-; SI-NEXT:BB0_4:                                  ; %endif
-; SI-NEXT:	s_add_i32 s4, s2, s4
-; SI-NEXT:	s_mov_b32 s3, 0xf000
-; SI-NEXT:	s_mov_b32 s2, -1
-; SI-NEXT:	v_mov_b32_e32 v0, s4
-; SI-NEXT:	buffer_store_dword v0, off, s[0:3], 0
-; SI-NEXT:	s_endpgm
+; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
+; SI-NEXT:    s_load_dword s2, s[0:1], 0xf
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_cmp_lg_u32 s4, 0
+; SI-NEXT:    s_cbranch_scc0 BB0_2
+; SI-NEXT:  ; %bb.1: ; %else
+; SI-NEXT:    s_add_i32 s2, s7, s2
+; SI-NEXT:    s_mov_b64 s[8:9], 0
+; SI-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
+; SI-NEXT:    s_cbranch_vccz BB0_3
+; SI-NEXT:    s_branch BB0_4
+; SI-NEXT:  BB0_2:
+; SI-NEXT:    s_mov_b64 s[8:9], -1
+; SI-NEXT:    ; implicit-def: $sgpr2
+; SI-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
+; SI-NEXT:    s_cbranch_vccnz BB0_4
+; SI-NEXT:  BB0_3: ; %if
+; SI-NEXT:    s_sub_i32 s2, s5, s6
+; SI-NEXT:  BB0_4: ; %endif
+; SI-NEXT:    s_add_i32 s4, s2, s4
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
 entry:
   %0 = icmp eq i32 %a, 0
   br i1 %0, label %if, label %else

From 3b8eaf26db93fbb58db2478feb0f6234e9ad552a Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood@arm.com>
Date: Fri, 10 Jul 2020 17:22:10 +0100
Subject: [PATCH 203/771] [SVE][CodeGen] Fix implicit TypeSize->uint64_t
 conversion in TransformFPLoadStorePair

In DAGCombiner::TransformFPLoadStorePair we were dropping the scalable
property of TypeSize when trying to create an integer type of equivalent
size. In fact, this optimisation makes no sense for scalable types
since we don't know the size at compile time. I have changed the code
to bail out when encountering scalable type sizes.

I've added a test to

  llvm/test/CodeGen/AArch64/sve-fp.ll

that exercises this code path. The test already emits an error if it
encounters warnings due to implicit TypeSize->uint64_t conversions.

Differential Revision: https://reviews.llvm.org/D83572
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  9 ++++++++-
 llvm/test/CodeGen/AArch64/sve-fp.ll           | 12 ++++++++++++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 0dde1d0918e13..dc9c86264e602 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -15768,7 +15768,14 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
         ST->getPointerInfo().getAddrSpace() != 0)
       return SDValue();
 
-    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
+    TypeSize VTSize = VT.getSizeInBits();
+
+    // We don't know the size of scalable types at compile time so we cannot
+    // create an integer of the equivalent size.
+    if (VTSize.isScalable())
+      return SDValue();
+
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VTSize.getFixedSize());
     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll
index e3c0ba72bda1f..43345bd5dec17 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp.ll
@@ -208,6 +208,18 @@ define void @scalar_to_vector(%complex* %outval, <vscale x 2 x i1> %pred, <vscal
   ret void
 }
 
+define void @float_copy(<vscale x 4 x float>* %P1, <vscale x 4 x float>* %P2) {
+; CHECK-LABEL: float_copy:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    ld1w { z0.s }, p0/z, [x0]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x1]
+; CHECK-NEXT:    ret
+  %A = load <vscale x 4 x float>, <vscale x 4 x float>* %P1, align 16
+  store <vscale x 4 x float> %A, <vscale x 4 x float>* %P2, align 16
+  ret void
+}
+
 declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
 declare <vscale x 4 x float>  @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)
 declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)

From c06b7e2ab5167ad031745a706204abed1aefd823 Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood@arm.com>
Date: Wed, 8 Jul 2020 08:47:06 +0100
Subject: [PATCH 204/771] [SVE] Fix implicit TypeSize->uint64_t conversion
 getCastInstrCost

In getCastInstrCost() when comparing different sizes for src and
dst types we should be using the TypeSize comparison operators
instead of relying upon TypeSize being converted a uin64_t.
Previously this meant we were dropping the scalable property and
treating fixed and scalable vector types the same.

Differential Revision: https://reviews.llvm.org/D83461
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h            |  7 +++----
 llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll | 12 ++++++++++++
 2 files changed, 15 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index f9d32eadd23e2..407f09063dce2 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -702,8 +702,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
     std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
 
-    unsigned SrcSize = SrcLT.second.getSizeInBits();
-    unsigned DstSize = DstLT.second.getSizeInBits();
+    TypeSize SrcSize = SrcLT.second.getSizeInBits();
+    TypeSize DstSize = DstLT.second.getSizeInBits();
     bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
     bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
 
@@ -777,8 +777,7 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     // Check vector-to-vector casts.
     if (DstVTy && SrcVTy) {
       // If the cast is between same-sized registers, then the check is simple.
-      if (SrcLT.first == DstLT.first &&
-          SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) {
+      if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
 
         // Assume that Zext is done using AND.
         if (Opcode == Instruction::ZExt)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll b/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll
new file mode 100644
index 0000000000000..c9695061e7f12
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-bitcast.ll
@@ -0,0 +1,12 @@
+; RUN: opt -mtriple=aarch64-linux-gnu -mattr=+sve -cost-model -analyze < %s 2>%t | FileCheck %s
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; If this check fails please read test/CodeGen/aarch64-sve-intrinsics/README for instructions on how to resolve it.
+; WARN-NOT: warning
+
+; CHECK: Found an estimated cost of 0 for instruction:   %b = bitcast <vscale x 2 x double> %a to <vscale x 2 x i64>
+
+define <vscale x 2 x i64> @foo(<vscale x 2 x double> %a, i32 %x) {
+  %b = bitcast <vscale x 2 x double> %a to <vscale x 2 x i64>
+  ret <vscale x 2 x i64> %b
+}

From 7a514c9bf8f2513b57aee685879dd2c104381d99 Mon Sep 17 00:00:00 2001
From: Kirill Bobyrev <kbobyrev@google.com>
Date: Tue, 14 Jul 2020 09:28:38 +0200
Subject: [PATCH 205/771] [clangd] Implement textDocument/foldingRange

Summary:
This patch introduces basic textDocument/foldingRange support. It relies on
textDocument/documentSymbols to collect all symbols and uses takes ranges
to create folds.

The next steps for textDocument/foldingRange support would be:

* Implementing FoldingRangeClientCapabilities and respecting respect client
  preferences
* Specifying folding range kind
* Migrating from DocumentSymbol implementation to custom RecursiveASTVisitor flow that will allow more flexibility
* Supporting more folding range types: comments, PP conditional regions, includes and other code regions (e.g. public/private/protected sections of classes, control flow statement bodies)

Tested: (Neo)Vim (coc-clangd) and VSCode.

Related issue: https://github.com/clangd/clangd/issues/310

Reviewers: sammccall

Reviewed By: sammccall

Subscribers: nridge, ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D82436
---
 clang-tools-extra/clangd/ClangdLSPServer.cpp  | 11 +++-
 clang-tools-extra/clangd/ClangdLSPServer.h    |  2 +
 clang-tools-extra/clangd/ClangdServer.cpp     | 12 ++++
 clang-tools-extra/clangd/ClangdServer.h       |  6 ++
 clang-tools-extra/clangd/Protocol.cpp         | 19 ++++++
 clang-tools-extra/clangd/Protocol.h           | 17 ++++++
 .../clangd/SemanticSelection.cpp              | 35 +++++++++++
 clang-tools-extra/clangd/SemanticSelection.h  |  4 ++
 clang-tools-extra/clangd/tool/ClangdMain.cpp  |  9 +++
 .../unittests/SemanticSelectionTests.cpp      | 61 +++++++++++++++++--
 10 files changed, 171 insertions(+), 5 deletions(-)

diff --git a/clang-tools-extra/clangd/ClangdLSPServer.cpp b/clang-tools-extra/clangd/ClangdLSPServer.cpp
index b0aba886edbe4..0408b0498488e 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.cpp
+++ b/clang-tools-extra/clangd/ClangdLSPServer.cpp
@@ -637,6 +637,8 @@ void ClangdLSPServer::onInitialize(const InitializeParams &Params,
         ->insert(
             {"semanticHighlighting",
              llvm::json::Object{{"scopes", buildHighlightScopeLookupTable()}}});
+  if (ClangdServerOpts.FoldingRanges)
+    Result.getObject("capabilities")->insert({"foldingRangeProvider", true});
   Reply(std::move(Result));
 }
 
@@ -929,7 +931,6 @@ void ClangdLSPServer::onDocumentFormatting(
 static std::vector<SymbolInformation>
 flattenSymbolHierarchy(llvm::ArrayRef<DocumentSymbol> Symbols,
                        const URIForFile &FileURI) {
-
   std::vector<SymbolInformation> Results;
   std::function<void(const DocumentSymbol &, llvm::StringRef)> Process =
       [&](const DocumentSymbol &S, llvm::Optional<llvm::StringRef> ParentName) {
@@ -968,6 +969,12 @@ void ClangdLSPServer::onDocumentSymbol(const DocumentSymbolParams &Params,
       });
 }
 
+void ClangdLSPServer::onFoldingRange(
+    const FoldingRangeParams &Params,
+    Callback<std::vector<FoldingRange>> Reply) {
+  Server->foldingRanges(Params.textDocument.uri.file(), std::move(Reply));
+}
+
 static llvm::Optional<Command> asCommand(const CodeAction &Action) {
   Command Cmd;
   if (Action.command && Action.edit)
@@ -1395,6 +1402,8 @@ ClangdLSPServer::ClangdLSPServer(
   MsgHandler->bind("textDocument/documentLink", &ClangdLSPServer::onDocumentLink);
   MsgHandler->bind("textDocument/semanticTokens/full", &ClangdLSPServer::onSemanticTokens);
   MsgHandler->bind("textDocument/semanticTokens/full/delta", &ClangdLSPServer::onSemanticTokensDelta);
+  if (Opts.FoldingRanges)
+    MsgHandler->bind("textDocument/foldingRange", &ClangdLSPServer::onFoldingRange);
   // clang-format on
 }
 
diff --git a/clang-tools-extra/clangd/ClangdLSPServer.h b/clang-tools-extra/clangd/ClangdLSPServer.h
index a779e9036c4a8..d0c0e814c6418 100644
--- a/clang-tools-extra/clangd/ClangdLSPServer.h
+++ b/clang-tools-extra/clangd/ClangdLSPServer.h
@@ -87,6 +87,8 @@ class ClangdLSPServer : private ClangdServer::Callbacks {
   // otherwise.
   void onDocumentSymbol(const DocumentSymbolParams &,
                         Callback<llvm::json::Value>);
+  void onFoldingRange(const FoldingRangeParams &,
+                      Callback<std::vector<FoldingRange>>);
   void onCodeAction(const CodeActionParams &, Callback<llvm::json::Value>);
   void onCompletion(const CompletionParams &, Callback<CompletionList>);
   void onSignatureHelp(const TextDocumentPositionParams &,
diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index 5d99104dadaf6..c33cdffcb0ca7 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -674,6 +674,18 @@ void ClangdServer::documentSymbols(llvm::StringRef File,
                            TUScheduler::InvalidateOnUpdate);
 }
 
+void ClangdServer::foldingRanges(llvm::StringRef File,
+                                 Callback<std::vector<FoldingRange>> CB) {
+  auto Action =
+      [CB = std::move(CB)](llvm::Expected<InputsAndAST> InpAST) mutable {
+        if (!InpAST)
+          return CB(InpAST.takeError());
+        CB(clangd::getFoldingRanges(InpAST->AST));
+      };
+  WorkScheduler.runWithAST("foldingRanges", File, std::move(Action),
+                           TUScheduler::InvalidateOnUpdate);
+}
+
 void ClangdServer::findReferences(PathRef File, Position Pos, uint32_t Limit,
                                   Callback<ReferencesResult> CB) {
   auto Action = [Pos, Limit, CB = std::move(CB),
diff --git a/clang-tools-extra/clangd/ClangdServer.h b/clang-tools-extra/clangd/ClangdServer.h
index ea82081f24405..3529e5050aa38 100644
--- a/clang-tools-extra/clangd/ClangdServer.h
+++ b/clang-tools-extra/clangd/ClangdServer.h
@@ -157,6 +157,9 @@ class ClangdServer {
     /// Enable notification-based semantic highlighting.
     bool TheiaSemanticHighlighting = false;
 
+    /// Enable preview of FoldingRanges feature.
+    bool FoldingRanges = false;
+
     /// Returns true if the tweak should be enabled.
     std::function<bool(const Tweak &)> TweakFilter = [](const Tweak &T) {
       return !T.hidden(); // only enable non-hidden tweaks.
@@ -246,6 +249,9 @@ class ClangdServer {
   void documentSymbols(StringRef File,
                        Callback<std::vector<DocumentSymbol>> CB);
 
+  /// Retrieve ranges that can be used to fold code within the specified file.
+  void foldingRanges(StringRef File, Callback<std::vector<FoldingRange>> CB);
+
   /// Retrieve locations for symbol references.
   void findReferences(PathRef File, Position Pos, uint32_t Limit,
                       Callback<ReferencesResult> CB);
diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp
index 2396037157853..b5dbee54f59db 100644
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@@ -1241,5 +1241,24 @@ llvm::json::Value toJSON(const DocumentLink &DocumentLink) {
   };
 }
 
+bool fromJSON(const llvm::json::Value &Params, FoldingRangeParams &R) {
+  llvm::json::ObjectMapper O(Params);
+  return O && O.map("textDocument", R.textDocument);
+}
+
+llvm::json::Value toJSON(const FoldingRange &Range) {
+  llvm::json::Object Result{
+      {"startLine", Range.startLine},
+      {"endLine", Range.endLine},
+  };
+  if (Range.startCharacter)
+    Result["startCharacter"] = Range.startCharacter;
+  if (Range.endCharacter)
+    Result["endCharacter"] = Range.endCharacter;
+  if (Range.kind)
+    Result["kind"] = *Range.kind;
+  return Result;
+}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/Protocol.h b/clang-tools-extra/clangd/Protocol.h
index 77d402a6a9ba1..2bb23e5ddd94e 100644
--- a/clang-tools-extra/clangd/Protocol.h
+++ b/clang-tools-extra/clangd/Protocol.h
@@ -1510,6 +1510,23 @@ struct DocumentLink {
 };
 llvm::json::Value toJSON(const DocumentLink &DocumentLink);
 
+// FIXME(kirillbobyrev): Add FoldingRangeClientCapabilities so we can support
+// per-line-folding editors.
+struct FoldingRangeParams {
+  TextDocumentIdentifier textDocument;
+};
+bool fromJSON(const llvm::json::Value &, FoldingRangeParams &);
+
+/// Stores information about a region of code that can be folded.
+struct FoldingRange {
+  unsigned startLine = 0;
+  unsigned startCharacter;
+  unsigned endLine = 0;
+  unsigned endCharacter;
+  llvm::Optional<std::string> kind;
+};
+llvm::json::Value toJSON(const FoldingRange &Range);
+
 } // namespace clangd
 } // namespace clang
 
diff --git a/clang-tools-extra/clangd/SemanticSelection.cpp b/clang-tools-extra/clangd/SemanticSelection.cpp
index a6b1ebfb83275..cfce1520cd082 100644
--- a/clang-tools-extra/clangd/SemanticSelection.cpp
+++ b/clang-tools-extra/clangd/SemanticSelection.cpp
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 #include "SemanticSelection.h"
+#include "FindSymbols.h"
 #include "ParsedAST.h"
 #include "Protocol.h"
 #include "Selection.h"
@@ -18,6 +19,7 @@
 namespace clang {
 namespace clangd {
 namespace {
+
 // Adds Range \p R to the Result if it is distinct from the last added Range.
 // Assumes that only consecutive ranges can coincide.
 void addIfDistinct(const Range &R, std::vector<Range> &Result) {
@@ -25,6 +27,20 @@ void addIfDistinct(const Range &R, std::vector<Range> &Result) {
     Result.push_back(R);
   }
 }
+
+// Recursively collects FoldingRange from a symbol and its children.
+void collectFoldingRanges(DocumentSymbol Symbol,
+                          std::vector<FoldingRange> &Result) {
+  FoldingRange Range;
+  Range.startLine = Symbol.range.start.line;
+  Range.startCharacter = Symbol.range.start.character;
+  Range.endLine = Symbol.range.end.line;
+  Range.endCharacter = Symbol.range.end.character;
+  Result.push_back(Range);
+  for (const auto &Child : Symbol.children)
+    collectFoldingRanges(Child, Result);
+}
+
 } // namespace
 
 llvm::Expected<SelectionRange> getSemanticRanges(ParsedAST &AST, Position Pos) {
@@ -81,5 +97,24 @@ llvm::Expected<SelectionRange> getSemanticRanges(ParsedAST &AST, Position Pos) {
   return std::move(Head);
 }
 
+// FIXME(kirillbobyrev): Collect comments, PP conditional regions, includes and
+// other code regions (e.g. public/private/protected sections of classes,
+// control flow statement bodies).
+// Related issue:
+// https://github.com/clangd/clangd/issues/310
+llvm::Expected<std::vector<FoldingRange>> getFoldingRanges(ParsedAST &AST) {
+  // FIXME(kirillbobyrev): getDocumentSymbols() is conveniently available but
+  // limited (e.g. doesn't yield blocks inside functions and provides ranges for
+  // nodes themselves instead of their contents which is less useful). Replace
+  // this with a more general RecursiveASTVisitor implementation instead.
+  auto DocumentSymbols = getDocumentSymbols(AST);
+  if (!DocumentSymbols)
+    return DocumentSymbols.takeError();
+  std::vector<FoldingRange> Result;
+  for (const auto &Symbol : *DocumentSymbols)
+    collectFoldingRanges(Symbol, Result);
+  return Result;
+}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/SemanticSelection.h b/clang-tools-extra/clangd/SemanticSelection.h
index 810cc21d9a58f..2fe37871ec680 100644
--- a/clang-tools-extra/clangd/SemanticSelection.h
+++ b/clang-tools-extra/clangd/SemanticSelection.h
@@ -25,6 +25,10 @@ namespace clangd {
 /// If pos is not in any interesting range, return [Pos, Pos).
 llvm::Expected<SelectionRange> getSemanticRanges(ParsedAST &AST, Position Pos);
 
+/// Returns a list of ranges whose contents might be collapsible in an editor.
+/// This should include large scopes, preprocessor blocks etc.
+llvm::Expected<std::vector<FoldingRange>> getFoldingRanges(ParsedAST &AST);
+
 } // namespace clangd
 } // namespace clang
 
diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp
index 6e3d6a231da1e..12d3e299868eb 100644
--- a/clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -296,6 +296,14 @@ opt<bool> RecoveryASTType{
     Hidden,
 };
 
+opt<bool> FoldingRanges{
+    "folding-ranges",
+    cat(Features),
+    desc("Enable preview of FoldingRanges feature"),
+    init(false),
+    Hidden,
+};
+
 opt<unsigned> WorkerThreadsCount{
     "j",
     cat(Misc),
@@ -676,6 +684,7 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var
   Opts.AsyncThreadsCount = WorkerThreadsCount;
   Opts.BuildRecoveryAST = RecoveryAST;
   Opts.PreserveRecoveryASTType = RecoveryASTType;
+  Opts.FoldingRanges = FoldingRanges;
 
   clangd::CodeCompleteOptions CCOpts;
   CCOpts.IncludeIneligibleResults = IncludeIneligibleResults;
diff --git a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
index cd1d798341676..5c1a80aae7f9c 100644
--- a/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SemanticSelectionTests.cpp
@@ -17,15 +17,19 @@
 #include "TestTU.h"
 #include "clang/Basic/SourceLocation.h"
 #include "clang/Basic/SourceManager.h"
+#include "llvm/ADT/ArrayRef.h"
 #include "llvm/Support/Error.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 #include <vector>
+
 namespace clang {
 namespace clangd {
 namespace {
+
 using ::testing::ElementsAre;
 using ::testing::ElementsAreArray;
+using ::testing::UnorderedElementsAreArray;
 
 // front() is SR.range, back() is outermost range.
 std::vector<Range> gatherRanges(const SelectionRange &SR) {
@@ -35,6 +39,20 @@ std::vector<Range> gatherRanges(const SelectionRange &SR) {
   return Ranges;
 }
 
+std::vector<Range>
+gatherFoldingRanges(llvm::ArrayRef<FoldingRange> FoldingRanges) {
+  std::vector<Range> Ranges;
+  Range NextRange;
+  for (const auto &R : FoldingRanges) {
+    NextRange.start.line = R.startLine;
+    NextRange.start.character = R.startCharacter;
+    NextRange.end.line = R.endLine;
+    NextRange.end.character = R.endCharacter;
+    Ranges.push_back(NextRange);
+  }
+  return Ranges;
+}
+
 TEST(SemanticSelection, All) {
   const char *Tests[] = {
       R"cpp( // Single statement in a function body.
@@ -118,16 +136,16 @@ TEST(SemanticSelection, All) {
       )cpp",
       R"cpp( // Inside struct.
         struct A { static int a(); };
-        [[struct B { 
+        [[struct B {
           [[static int b() [[{
             [[return [[[[1^1]] + 2]]]];
           }]]]]
         }]];
       )cpp",
       // Namespaces.
-      R"cpp( 
-        [[namespace nsa { 
-          [[namespace nsb { 
+      R"cpp(
+        [[namespace nsa {
+          [[namespace nsb {
             static int ccc();
             [[void func() [[{
               // int x = nsa::nsb::ccc();
@@ -181,6 +199,41 @@ TEST(SemanticSelection, RunViaClangdServer) {
   EXPECT_THAT(gatherRanges(Ranges->back()),
               ElementsAre(SourceAnnotations.range("empty")));
 }
+
+TEST(FoldingRanges, All) {
+  const char *Tests[] = {
+      R"cpp(
+        [[int global_variable]];
+
+        [[void func() {
+          int v = 100;
+        }]]
+      )cpp",
+      R"cpp(
+        [[class Foo {
+        public:
+          [[Foo() {
+            int X = 1;
+          }]]
+
+        private:
+          [[int getBar() {
+            return 42;
+          }]]
+
+          [[void getFooBar() { }]]
+        }]];
+      )cpp",
+  };
+  for (const char *Test : Tests) {
+    auto T = Annotations(Test);
+    auto AST = TestTU::withCode(T.code()).build();
+    EXPECT_THAT(gatherFoldingRanges(llvm::cantFail(getFoldingRanges(AST))),
+                UnorderedElementsAreArray(T.ranges()))
+        << Test;
+  }
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang

From 02650ac03632ddea38116c859189b2efed7baabd Mon Sep 17 00:00:00 2001
From: David Sherwood <david.sherwood@arm.com>
Date: Thu, 9 Jul 2020 10:16:32 +0100
Subject: [PATCH 206/771] [SVE][CodeGen] Add README for SVE-related warnings in
 tests

I have added a new file:

  llvm/test/CodeGen/AArch64/README

that describes what to do in the event one of the SVE codegen tests
fails the warnings check. In addition, I've added comments to all
the relevant SVE tests pointing users at the README file.

Differential Revision: https://reviews.llvm.org/D83467
---
 llvm/test/CodeGen/AArch64/README                      | 11 +++++++++++
 llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll       |  1 +
 llvm/test/CodeGen/AArch64/sve-bitcast.ll              |  1 +
 .../AArch64/sve-breakdown-scalable-vectortype.ll      |  1 +
 llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll |  1 +
 .../CodeGen/AArch64/sve-calling-convention-byref.ll   |  1 +
 .../AArch64/sve-calling-convention-tuple-types.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-calling-convention.ll   |  1 +
 llvm/test/CodeGen/AArch64/sve-extract-element.ll      |  1 +
 llvm/test/CodeGen/AArch64/sve-extract-subvector.ll    |  1 +
 llvm/test/CodeGen/AArch64/sve-fcmp.ll                 |  1 +
 llvm/test/CodeGen/AArch64/sve-fp.ll                   |  1 +
 .../CodeGen/AArch64/sve-gather-scatter-dag-combine.ll |  1 +
 llvm/test/CodeGen/AArch64/sve-gep.ll                  |  1 +
 llvm/test/CodeGen/AArch64/sve-insert-element.ll       |  1 +
 llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll        |  1 +
 llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll       |  1 +
 llvm/test/CodeGen/AArch64/sve-int-arith.ll            |  1 +
 llvm/test/CodeGen/AArch64/sve-int-div-pred.ll         |  1 +
 llvm/test/CodeGen/AArch64/sve-int-imm.ll              |  1 +
 llvm/test/CodeGen/AArch64/sve-int-log-imm.ll          |  1 +
 llvm/test/CodeGen/AArch64/sve-int-log-pred.ll         |  1 +
 llvm/test/CodeGen/AArch64/sve-int-log.ll              |  1 +
 llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll         |  1 +
 llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll         |  1 +
 llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll      |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll |  1 +
 .../CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll       |  1 +
 .../AArch64/sve-intrinsics-contiguous-prefetches.ll   |  1 +
 .../test/CodeGen/AArch64/sve-intrinsics-conversion.ll |  1 +
 .../CodeGen/AArch64/sve-intrinsics-counting-bits.ll   |  1 +
 .../CodeGen/AArch64/sve-intrinsics-counting-elems.ll  |  1 +
 .../CodeGen/AArch64/sve-intrinsics-create-tuple.ll    |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll     |  1 +
 ...intrinsics-ff-gather-loads-32bit-scaled-offsets.ll |  1 +
 ...trinsics-ff-gather-loads-32bit-unscaled-offsets.ll |  1 +
 ...-intrinsics-ff-gather-loads-64bit-scaled-offset.ll |  1 +
 ...ntrinsics-ff-gather-loads-64bit-unscaled-offset.ll |  1 +
 ...trinsics-ff-gather-loads-vector-base-imm-offset.ll |  1 +
 ...nsics-ff-gather-loads-vector-base-scalar-offset.ll |  1 +
 .../AArch64/sve-intrinsics-ffr-manipulation.ll        |  1 +
 .../AArch64/sve-intrinsics-fp-arith-merging.ll        |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll  |  1 +
 .../CodeGen/AArch64/sve-intrinsics-fp-compares.ll     |  1 +
 .../CodeGen/AArch64/sve-intrinsics-fp-converts.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll |  1 +
 ...ve-intrinsics-gather-loads-32bit-scaled-offsets.ll |  1 +
 ...-intrinsics-gather-loads-32bit-unscaled-offsets.ll |  1 +
 ...sve-intrinsics-gather-loads-64bit-scaled-offset.ll |  1 +
 ...e-intrinsics-gather-loads-64bit-unscaled-offset.ll |  1 +
 ...-intrinsics-gather-loads-vector-base-imm-offset.ll |  1 +
 ...trinsics-gather-loads-vector-base-scalar-offset.ll |  1 +
 ...cs-gather-prefetches-scalar-base-vector-indexes.ll |  1 +
 ...trinsics-gather-prefetches-vect-base-imm-offset.ll |  1 +
 ...-gather-prefetches-vect-base-invalid-imm-offset.ll |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll     |  1 +
 .../CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll   |  1 +
 .../AArch64/sve-intrinsics-int-arith-merging.ll       |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll |  1 +
 .../AArch64/sve-intrinsics-int-compares-with-imm.ll   |  1 +
 .../CodeGen/AArch64/sve-intrinsics-int-compares.ll    |  1 +
 .../sve-intrinsics-ld1-addressing-mode-reg-imm.ll     |  1 +
 .../sve-intrinsics-ld1-addressing-mode-reg-reg.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll       |  1 +
 .../sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll   |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll  |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll  |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll   |  1 +
 .../CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll     |  1 +
 .../CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll     |  1 +
 .../CodeGen/AArch64/sve-intrinsics-matmul-int8.ll     |  1 +
 .../CodeGen/AArch64/sve-intrinsics-perm-select.ll     |  1 +
 .../CodeGen/AArch64/sve-intrinsics-pred-creation.ll   |  1 +
 .../CodeGen/AArch64/sve-intrinsics-pred-operations.ll |  1 +
 .../CodeGen/AArch64/sve-intrinsics-pred-testing.ll    |  1 +
 .../CodeGen/AArch64/sve-intrinsics-reinterpret.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll  |  1 +
 .../CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll   |  1 +
 ...-intrinsics-scatter-stores-32bit-scaled-offsets.ll |  1 +
 ...ntrinsics-scatter-stores-32bit-unscaled-offsets.ll |  1 +
 ...e-intrinsics-scatter-stores-64bit-scaled-offset.ll |  1 +
 ...intrinsics-scatter-stores-64bit-unscaled-offset.ll |  1 +
 ...ntrinsics-scatter-stores-vector-base-imm-offset.ll |  1 +
 ...insics-scatter-stores-vector-base-scalar-offset.ll |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll       |  1 +
 .../CodeGen/AArch64/sve-intrinsics-shifts-merging.ll  |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll    |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll     |  1 +
 .../sve-intrinsics-st1-addressing-mode-reg-imm.ll     |  1 +
 .../sve-intrinsics-st1-addressing-mode-reg-reg.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll       |  1 +
 .../AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll   |  1 +
 .../AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll   |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll    |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll     |  1 +
 .../AArch64/sve-ld1-addressing-mode-reg-imm.ll        |  1 +
 llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll   |  1 +
 llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll     |  1 +
 llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll    |  1 +
 llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll     |  1 +
 ...ve-pred-contiguous-ldst-addressing-mode-reg-imm.ll |  1 +
 ...ve-pred-contiguous-ldst-addressing-mode-reg-reg.ll |  1 +
 llvm/test/CodeGen/AArch64/sve-pred-log.ll             |  1 +
 ...-pred-non-temporal-ldst-addressing-mode-reg-imm.ll |  1 +
 ...-pred-non-temporal-ldst-addressing-mode-reg-reg.ll |  1 +
 llvm/test/CodeGen/AArch64/sve-select.ll               |  1 +
 llvm/test/CodeGen/AArch64/sve-setcc.ll                |  1 +
 .../AArch64/sve-st1-addressing-mode-reg-imm.ll        |  1 +
 llvm/test/CodeGen/AArch64/sve-trunc.ll                |  1 +
 llvm/test/CodeGen/AArch64/sve-vector-splat.ll         |  1 +
 llvm/test/CodeGen/AArch64/sve-vscale-combine.ll       |  1 +
 llvm/test/CodeGen/AArch64/sve-vscale.ll               |  1 +
 llvm/test/CodeGen/AArch64/sve-vselect-imm.ll          |  1 +
 llvm/test/CodeGen/AArch64/sve-zeroinit.ll             |  1 +
 120 files changed, 130 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/README

diff --git a/llvm/test/CodeGen/AArch64/README b/llvm/test/CodeGen/AArch64/README
new file mode 100644
index 0000000000000..b0a93e8668eeb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/README
@@ -0,0 +1,11 @@
+++ SVE CodeGen Warnings ++
+
+When the WARN check lines fail in the SVE codegen tests it most likely means you
+have introduced a warning due to:
+1. Adding an invalid call to VectorType::getNumElements() or EVT::getVectorNumElements()
+   when the type is a scalable vector.
+2. Relying upon an implicit cast conversion from TypeSize to uint64_t.
+
+For generic code, please modify your code to work with ElementCount and TypeSize directly.
+For target-specific code that only deals with fixed-width vectors, use the fixed-size interfaces.
+Please refer to the code where those functions live for more details.
diff --git a/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll b/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll
index f5ea33d273ecb..4e9ac5a50eb93 100644
--- a/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll
+++ b/llvm/test/CodeGen/AArch64/sve-alloca-stackid.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64 -mattr=+sve -stop-after=finalize-isel < %s | FileCheck %s --check-prefix=CHECKISEL
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; CHECKCG-LABEL: foo:
diff --git a/llvm/test/CodeGen/AArch64/sve-bitcast.ll b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
index 670c9864c85ab..bb96e996df024 100644
--- a/llvm/test/CodeGen/AArch64/sve-bitcast.ll
+++ b/llvm/test/CodeGen/AArch64/sve-bitcast.ll
@@ -3,6 +3,7 @@
 ; RUN: not --crash llc -mtriple=aarch64_be -mattr=+sve < %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 16 x i8> @bitcast_i16_to_i8(<vscale x 8 x i16> %v) {
diff --git a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
index 92395a6502746..ad97b9bb54798 100644
--- a/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
+++ b/llvm/test/CodeGen/AArch64/sve-breakdown-scalable-vectortype.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Test that scalable vectors that are a multiple of the legal vector size
diff --git a/llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll b/llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll
index caa8d32186f4c..ff339f5e92885 100644
--- a/llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll
+++ b/llvm/test/CodeGen/AArch64/sve-callbyref-notailcall.ll
@@ -3,6 +3,7 @@
 ; RUN: llc -mtriple=aarch64 -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; CHECK-LABEL: caller:
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll
index d579ba08b59b6..bceb39af0beec 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-byref.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=finalize-isel < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Test that z8 and z9, passed in by reference, are correctly loaded from x0 and x1.
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll
index 6700c27eb1096..e535afdd07786 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention-tuple-types.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-calling-convention.ll b/llvm/test/CodeGen/AArch64/sve-calling-convention.ll
index 1906abe6645e3..767a3cd8acfe9 100644
--- a/llvm/test/CodeGen/AArch64/sve-calling-convention.ll
+++ b/llvm/test/CodeGen/AArch64/sve-calling-convention.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -stop-after=finalize-isel < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; CHECK-LABEL: name: nosve_signature
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-element.ll b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
index e63ae4db7648c..cd40f66a16c9a 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-element.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define i8 @test_lane0_16xi8(<vscale x 16 x i8> %a) {
diff --git a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
index 29ad1273e352b..40c147c31ff84 100644
--- a/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-extract-subvector.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Test that DAGCombiner doesn't drop the scalable flag when it tries to fold:
diff --git a/llvm/test/CodeGen/AArch64/sve-fcmp.ll b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
index 703e86d9f4539..86fff734f1883 100644
--- a/llvm/test/CodeGen/AArch64/sve-fcmp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fcmp.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 4 x i1> @oeq(<vscale x 4 x float> %x, <vscale x 4 x float> %x2) {
diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll
index 43345bd5dec17..6a882216bcc44 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 8 x half> @fadd_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
diff --git a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll
index e9e34ada83d19..43e6adf1f94a6 100644
--- a/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gather-scatter-dag-combine.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Verify that DAG combine rules for LD1 + sext/zext don't apply when the
diff --git a/llvm/test/CodeGen/AArch64/sve-gep.ll b/llvm/test/CodeGen/AArch64/sve-gep.ll
index 48fc3ccb48bbc..4230a7fa28716 100644
--- a/llvm/test/CodeGen/AArch64/sve-gep.ll
+++ b/llvm/test/CodeGen/AArch64/sve-gep.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 2 x i64>* @scalar_of_scalable_1(<vscale x 2 x i64>* %base) {
diff --git a/llvm/test/CodeGen/AArch64/sve-insert-element.ll b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
index e37fd8346780b..a2a4a8e1ba74c 100644
--- a/llvm/test/CodeGen/AArch64/sve-insert-element.ll
+++ b/llvm/test/CodeGen/AArch64/sve-insert-element.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 16 x i8> @test_lane0_16xi8(<vscale x 16 x i8> %a) {
diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll
index b721cc7b00c55..2e9620f113fa0 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-arith-imm.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll
index 35b6f52884189..e2bfff75b72bb 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-arith-pred.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 16 x i8> @add_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
diff --git a/llvm/test/CodeGen/AArch64/sve-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-int-arith.ll
index a1b4f73d09ba2..d70e817085500 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-arith.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 2 x i64> @add_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
diff --git a/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll
index 36f83284d45f5..1b7f131f7928e 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-div-pred.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 4 x i32> @sdiv_i32(<vscale x 4 x i1> %pg, <vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
diff --git a/llvm/test/CodeGen/AArch64/sve-int-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-imm.ll
index 13c634e8fc1fc..3694a58a2dead 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-imm.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-int-log-imm.ll b/llvm/test/CodeGen/AArch64/sve-int-log-imm.ll
index 617b649a06b3a..13d98b526f2cd 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-log-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-log-imm.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll
index 8d97844da2be8..b1e7eb982cb9e 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-log-pred.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 16 x i8> @and_pred_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
diff --git a/llvm/test/CodeGen/AArch64/sve-int-log.ll b/llvm/test/CodeGen/AArch64/sve-int-log.ll
index 8c286ad4c8d5b..fa4cc3c212001 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-log.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 2 x i64> @and_d(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
diff --git a/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll
index 4b6a9365b9d09..bcd66346d8f7a 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-mad-pred.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 16 x i8> @mad_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
diff --git a/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll
index b6f665274f4e9..65f2ba3d8f4af 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-mul-pred.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 16 x i8> @mul_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
diff --git a/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll b/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll
index 116e76f419dfd..360e22d4a777e 100644
--- a/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll
+++ b/llvm/test/CodeGen/AArch64/sve-int-reduce-pred.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define i64 @saddv_i8(<vscale x 16 x i1> %pg, <vscale x 16 x i8> %a) {
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll
index af3572a5c9e9b..191fddacffd1d 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-ptest.ll
@@ -1,6 +1,7 @@
 ; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck --check-prefix OPT %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define i1 @ptest_any1(<vscale x 2 x i1> %a) {
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
index d225396b6e73a..723ffd8c17330 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsic-opts-reinterpret.ll
@@ -1,6 +1,7 @@
 ; RUN: opt -S -sve-intrinsic-opts -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck --check-prefix OPT %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 8 x i1> @reinterpret_test_h(<vscale x 8 x i1> %a) {
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll
index 3262de23baf47..71bf76735f6a4 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-adr.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -verify-machineinstrs < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
index 4c397c9db1577..11573a790c585 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-contiguous-prefetches.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll
index 81d7fc8006162..7c1ebcb903251 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-conversion.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
index 40826c26af0d0..179a7f1e37471 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-bits.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
index 8a60a2667c28e..640f8152d2768 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-counting-elems.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
index 2f2b8f5af3b17..218d9fcad52e0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-create-tuple.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple aarch64 -mattr=+sve -asm-verbose=1 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
index 788592c131bc8..9df1635e41533 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-dup-x.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -verify-machineinstrs -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll
index 5637a6982c2ab..eacd75655c28a 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-scaled-offsets.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll
index 27aa5622160d4..339862ac0a711 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-32bit-unscaled-offsets.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll
index 6cfbddf031daf..73d4b6d1b1c7d 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-scaled-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll
index 9e17b470037a2..3cfc7c984075f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-64bit-unscaled-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll
index d591614b964ca..03fae3ff8fe0b 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-imm-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll
index 6534f32cfbb10..1f0828a7cda64 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ff-gather-loads-vector-base-scalar-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
index e9ad9f12b5378..723a46251850a 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ffr-manipulation.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll
index 1bcae7135fd36..e6f63284ee49e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith-merging.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
index fff5290af8e32..64c0d2f581101 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-arith.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll
index 194ffe383340c..6ba496fae5371 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-compares.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
index fee3edaa74457..0cadd2e029d52 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-converts.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
index d1d1e3c55507a..5f5d0dda61c06 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-fp-reduce.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll
index b03e1a25f5bf6..0b7b009326cf8 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-scaled-offsets.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll
index cf38473237340..0fc340eddeab7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-32bit-unscaled-offsets.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll
index 3818c6178faad..64cb89edd679e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-scaled-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll
index 87580c92e710c..7cf641a264274 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-64bit-unscaled-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll
index 856d29aec7a48..f6afdbe2b5de5 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-imm-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll
index f877d24111da5..4fded79d9cc1a 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-loads-vector-base-scalar-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll
index 7feba87aecc2c..7948daadf4346 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-scalar-base-vector-indexes.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; PRFB <prfop>, <Pg>, [<Xn|SP>, <Zm>.S, <mod>]    -> 32-bit indexes
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll
index a9c2110aa2f2e..1b30a551f6d59 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-imm-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; PRFB <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll
index a9abad9f1fddf..4a323a2cfc40d 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-gather-prefetches-vect-base-invalid-imm-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; PRFB <prfop>, <Pg>, [<Zn>.S{, #<imm>}] -> 32-bit element, imm = 0, 1, ..., 31
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
index 8b70778e867dd..99e79a6408cc0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-index.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
index 18d7f3515756a..0a6842921cbe3 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; SMAX
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll
index 50bb76d2ffb9e..5b0d2faf923e2 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-merging.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
index bc73bd49b0fd1..8a5d669e4241e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll
index b34f8f4269baf..b20c6a8bd9c51 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares-with-imm.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
index 2572262110848..80edb71cc50d1 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-compares.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll
index 2aaf222504a70..7d60df04493cd 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-imm.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll
index e66b84a741033..62b710016af63 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1-addressing-mode-reg-reg.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll
index e5d200945098c..a594686c8918f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll
index 1a4a25c83b347..f5607f91a3e44 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro-addressing-mode-reg-reg.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm,+bf16 -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll
index 94634f47cfa36..27ae95cced19e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-ld1ro.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+f64mm -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
index e56ebcbde8e6a..951c80ac8e9d4 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-ff.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
index 31f5c6797bbc5..be1c03a754fee 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads-nf.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Range testing for the immediate in the reg+imm(mulvl) addressing
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
index 5ff497c20ef5c..0c8031d1852ad 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-loads.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
index deca4dee84179..0c56dac0276c0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-logical.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll
index 44cba612adba3..37433ab7329c6 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp32.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f32mm -asm-verbose=0 < %s -o - 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 4 x float> @fmmla_s(<vscale x 4 x float> %r, <vscale x 4 x float> %a, <vscale x 4 x float> %b) nounwind {
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll
index 024e1b7bc3ce9..caf406f2a5de0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-fp64.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+f64mm -asm-verbose=0 < %s -o - 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 2 x double> @fmmla_d(<vscale x 2 x double> %r, <vscale x 2 x double> %a, <vscale x 2 x double> %b) nounwind {
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
index 32627f79b8b21..dfdfb5c7a7081 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-matmul-int8.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-none-linux-gnu -mattr=+sve,+i8mm -asm-verbose=0 < %s -o - 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 4 x i32> @smmla(<vscale x 4 x i32> %r, <vscale x 16 x i8> %a, <vscale x 16 x i8> %b) nounwind {
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
index 433a1cdfd8e91..b248d209f44aa 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-perm-select.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll
index 22d4bbe73bcdb..b8cb4c3bf18b4 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-creation.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
index 49b333460f01a..b291c6e5b2577 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-operations.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll
index ad615d76e903b..e1be2541c1152 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-pred-testing.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
index 3c1b8cbcd46b7..d0f1ab9255fc5 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reinterpret.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll
index 64987847eb7a9..ae3fa5c419ba0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-reversal.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
index 9f679aa6dc4f8..8e0fb70875a0a 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scalar-to-vec.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll
index 26ebf38cf85cd..75e0ff37da1c7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-scaled-offsets.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll
index 5b2612d8fa39b..2bf2848a64bb0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-32bit-unscaled-offsets.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll
index c0000a3d36780..8b40e5d76556f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-scaled-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll
index 862b6fb2c0a91..3f3cbbb92f2d2 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-64bit-unscaled-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll
index 7b632c8be727d..34625365fe1a8 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-imm-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll
index cd441e69efece..bdc64f62ff601 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-scatter-stores-vector-base-scalar-offset.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll
index 28341cb522b85..df0ea818f1dcb 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sel.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll
index 615caf18d3286..8044c2c3e0f77 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts-merging.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -mattr=+use-experimental-zeroing-pseudos < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
index dd884d5577f09..d160ae03864c0 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-shifts.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
index d9b193382bc3f..50bbbd4fed4a3 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqdec.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Since SQDEC{B|H|W|D|P} and SQINC{B|H|W|D|P} have identical semantics, the tests for
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
index 98362836dd0bc..8801e1cd7aea2 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-sqinc.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Since SQDEC{B|H|W|D|P} and SQINC{B|H|W|D|P} have identical semantics, the tests for
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll
index e4e0ed8738f4f..3eb838ab91b38 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-imm.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll
index 1af4ce746c8a3..72d1f35cfe0f1 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1-addressing-mode-reg-reg.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll
index c541415f0c3bf..54c35adc056b1 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-st1.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -O0 -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
index 8d8743eb58c87..1da557f5ea47d 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-imm-addr-mode.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; NOTE: invalid, upper and lower bound immediate values of the reg+imm
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
index 98967e3418bb9..d64d1dd19bd4f 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stN-reg-reg-addr-mode.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
index bda72ab2ab3a1..d26ab2980cccc 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-stores.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve,+bf16 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll
index b1ef24ddbe655..716533155c6f6 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqdec.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Since UQDEC{B|H|W|D|P} and UQINC{B|H|W|D|P} have identical semantics, the tests for
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll
index f774e2fc7a58d..a0d91ff82f4b7 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-uqinc.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Since UQDEC{B|H|W|D|P} and UQINC{B|H|W|D|P} have identical semantics, the tests for
diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
index 715d68f69217b..78f2a7f5ed92e 100644
--- a/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
+++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-while.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll
index 13bd864c1f23f..abb7513b2a587 100644
--- a/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-ld1-addressing-mode-reg-imm.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; LD1B
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
index c709cfa82373e..f5047a7bcbaff 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-nonext.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
index c4c94aa7a3af1..667798754d9e1 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-sext.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
index 8f86aee14dbe0..b92aa45fc01cc 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-trunc.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve -asm-verbose=0 < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
index 106f426fcf4a4..a1a12c6c11c74 100644
--- a/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
+++ b/llvm/test/CodeGen/AArch64/sve-masked-ldst-zext.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll
index 2a6c1ec532f9b..6065dbdd2765e 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-imm.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Range checks: for all the instruction tested in this file, the
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-reg.ll
index d3bd88a1f9d42..03a0ce77fe6d5 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-contiguous-ldst-addressing-mode-reg-reg.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; 2-lane contiguous load/stores
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-log.ll b/llvm/test/CodeGen/AArch64/sve-pred-log.ll
index 1411900130607..c25fbdceccb46 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-log.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-log.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 16 x i1> @vselect_16(<vscale x 16 x i1> %Pg, <vscale x 16 x i1> %Pn, <vscale x 16 x i1> %Pd) {
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll
index 91c252585c662..6917d1d549ab4 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-imm.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Range checks: for all the instruction tested in this file, the
diff --git a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll
index b78dfa41eb098..e066acef2c5a2 100644
--- a/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll
+++ b/llvm/test/CodeGen/AArch64/sve-pred-non-temporal-ldst-addressing-mode-reg-reg.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; 2-lane non-temporal load/stores
diff --git a/llvm/test/CodeGen/AArch64/sve-select.ll b/llvm/test/CodeGen/AArch64/sve-select.ll
index fccb0dc11b04c..dccfd0afd0276 100644
--- a/llvm/test/CodeGen/AArch64/sve-select.ll
+++ b/llvm/test/CodeGen/AArch64/sve-select.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Integer vector select
diff --git a/llvm/test/CodeGen/AArch64/sve-setcc.ll b/llvm/test/CodeGen/AArch64/sve-setcc.ll
index 1b0865b1bc3ed..3dbe0eb422832 100644
--- a/llvm/test/CodeGen/AArch64/sve-setcc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-setcc.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; Ensure we use the inverted CC result of SVE compare instructions when branching.
diff --git a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll
index 2e4f190145454..1d81a586826f1 100644
--- a/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-st1-addressing-mode-reg-imm.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; ST1B
diff --git a/llvm/test/CodeGen/AArch64/sve-trunc.ll b/llvm/test/CodeGen/AArch64/sve-trunc.ll
index 50ce4d9660879..876003a3962c6 100644
--- a/llvm/test/CodeGen/AArch64/sve-trunc.ll
+++ b/llvm/test/CodeGen/AArch64/sve-trunc.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ; For all the functions below should the operation is a nop
diff --git a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
index cd7ecbeb5ca13..7a53e5879c908 100644
--- a/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vector-splat.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;; Splats of legal integer vector types
diff --git a/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll b/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll
index 2084b6a23adb8..4df6c16b71f14 100644
--- a/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vscale-combine.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve --asm-verbose=false < %s 2>%t |FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 declare i32 @llvm.vscale.i32()
diff --git a/llvm/test/CodeGen/AArch64/sve-vscale.ll b/llvm/test/CodeGen/AArch64/sve-vscale.ll
index aed8f223c4715..61bafbd11a2aa 100644
--- a/llvm/test/CodeGen/AArch64/sve-vscale.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vscale.ll
@@ -2,6 +2,7 @@
 ; RUN: opt -mtriple=aarch64 -codegenprepare -S < %s | llc -mtriple=aarch64 -mattr=+sve -asm-verbose=0 | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 ;
diff --git a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
index 4cb2cb2c43277..2598734392720 100644
--- a/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
+++ b/llvm/test/CodeGen/AArch64/sve-vselect-imm.ll
@@ -2,6 +2,7 @@
 ; RUN: llc -mtriple=aarch64--linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 define <vscale x 16 x i8> @sel_8_positive(<vscale x 16 x i1> %p) {
diff --git a/llvm/test/CodeGen/AArch64/sve-zeroinit.ll b/llvm/test/CodeGen/AArch64/sve-zeroinit.ll
index 56ac0111f42b8..de2e4885f7883 100644
--- a/llvm/test/CodeGen/AArch64/sve-zeroinit.ll
+++ b/llvm/test/CodeGen/AArch64/sve-zeroinit.ll
@@ -1,6 +1,7 @@
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s
 ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
 
+; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
 target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"

From e2b75cafcbacd0e4296fed65afca7197aef172aa Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Tue, 14 Jul 2020 10:41:51 +0300
Subject: [PATCH 207/771] [NFCI][InstCombine] Move store merging from
 `visitStoreInst()` into `visitUnconditionalBranchInst()`

Summary:
As @nikic is pointing out in https://bugs.llvm.org/show_bug.cgi?id=46680#c5,
InstCombine should not have forward instruction scans,
so let's move this transform into the proper place.

This is pretty much NFCI.

Reviewers: nikic, spatel

Reviewed By: nikic

Subscribers: hiraditya, llvm-commits, nikic

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83670
---
 .../InstCombine/InstCombineInternal.h         |  1 +
 .../InstCombineLoadStoreAlloca.cpp            | 32 ++-----------------
 .../InstCombine/InstructionCombining.cpp      | 32 +++++++++++++++++--
 3 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index dd2f59be08e92..f918dc7198ca9 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -440,6 +440,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
   Instruction *visitLoadInst(LoadInst &LI);
   Instruction *visitStoreInst(StoreInst &SI);
   Instruction *visitAtomicRMWInst(AtomicRMWInst &SI);
+  Instruction *visitUnconditionalBranchInst(BranchInst &BI);
   Instruction *visitBranchInst(BranchInst &BI);
   Instruction *visitFenceInst(FenceInst &FI);
   Instruction *visitSwitchInst(SwitchInst &SI);
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index 7203850ad24d6..dad2f23120bdb 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1425,34 +1425,6 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
   if (isa<UndefValue>(Val))
     return eraseInstFromFunction(SI);
 
-  auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
-    return isa<DbgInfoIntrinsic>(BBI) ||
-           (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
-  };
-
-  // If this store is the second-to-last instruction in the basic block
-  // (excluding debug info and bitcasts of pointers) and if the block ends with
-  // an unconditional branch, try to move the store to the successor block.
-  BBI = SI.getIterator();
-  do {
-    ++BBI;
-  } while (IsNoopInstrForStoreMerging(BBI));
-
-  if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
-    if (BI->isUnconditional())
-      if (mergeStoreIntoSuccessor(SI)) {
-        // Okay, we've managed to do that. Now, let's see if now-second-to-last
-        // instruction is also a store that we can also sink.
-        BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
-        do {
-          if (BBI != FirstInstr)
-            --BBI;
-        } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
-        if (StoreInst *PrevStore = dyn_cast<StoreInst>(BBI))
-          Worklist.add(PrevStore);
-        return nullptr;
-      }
-
   return nullptr;
 }
 
@@ -1462,8 +1434,8 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
 ///   *P = v1; if () { *P = v2; }
 /// into a phi node with a store in the successor.
 bool InstCombiner::mergeStoreIntoSuccessor(StoreInst &SI) {
-  assert(SI.isUnordered() &&
-         "This code has not been audited for volatile or ordered store case.");
+  if (!SI.isUnordered())
+    return false; // This code has not been audited for volatile/ordered case.
 
   // Check if the successor block has exactly 2 incoming edges.
   BasicBlock *StoreBB = SI.getParent();
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index ec934906355d6..b3254c10a0b2b 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2734,10 +2734,38 @@ Instruction *InstCombiner::visitReturnInst(ReturnInst &RI) {
   return nullptr;
 }
 
+Instruction *InstCombiner::visitUnconditionalBranchInst(BranchInst &BI) {
+  assert(BI.isUnconditional() && "Only for unconditional branches.");
+
+  // If this store is the second-to-last instruction in the basic block
+  // (excluding debug info and bitcasts of pointers) and if the block ends with
+  // an unconditional branch, try to move the store to the successor block.
+
+  auto GetLastSinkableStore = [](BasicBlock::iterator BBI) {
+    auto IsNoopInstrForStoreMerging = [](BasicBlock::iterator BBI) {
+      return isa<DbgInfoIntrinsic>(BBI) ||
+             (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy());
+    };
+
+    BasicBlock::iterator FirstInstr = BBI->getParent()->begin();
+    do {
+      if (BBI != FirstInstr)
+        --BBI;
+    } while (BBI != FirstInstr && IsNoopInstrForStoreMerging(BBI));
+
+    return dyn_cast<StoreInst>(BBI);
+  };
+
+  if (StoreInst *SI = GetLastSinkableStore(BasicBlock::iterator(BI)))
+    if (mergeStoreIntoSuccessor(*SI))
+      return &BI;
+
+  return nullptr;
+}
+
 Instruction *InstCombiner::visitBranchInst(BranchInst &BI) {
-  // Nothing to do about unconditional branches.
   if (BI.isUnconditional())
-    return nullptr;
+    return visitUnconditionalBranchInst(BI);
 
   // Change br (not X), label True, label False to: br X, label False, True
   Value *X = nullptr;

From 3667d87a33d3c8d4072a41fd84bb880c59347dc0 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Thu, 28 May 2020 12:40:17 +0200
Subject: [PATCH 208/771] Double check that passes correctly set their Modified
 status

The approach is simple: if a pass reports that it's not modifying a
Function/Module, compute a loose hash of that Function/Module and compare it
with the original one. If we report no change but there's a hash change, then we
have an error.

This approach misses a lot of change but it's not super intrusive and can
detect most of the simple mistakes.

Differential Revision: https://reviews.llvm.org/D80916
---
 llvm/lib/IR/LegacyPassManager.cpp           | 87 +++++++++++++++++++++
 llvm/unittests/IR/LegacyPassManagerTest.cpp |  2 +-
 2 files changed, 88 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp
index 4189aea46294c..74869fa62c66f 100644
--- a/llvm/lib/IR/LegacyPassManager.cpp
+++ b/llvm/lib/IR/LegacyPassManager.cpp
@@ -1475,6 +1475,74 @@ void FPPassManager::dumpPassStructure(unsigned Offset) {
   }
 }
 
+#ifdef EXPENSIVE_CHECKS
+namespace {
+namespace details {
+
+// Basic hashing mechanism to detect structural change to the IR, used to verify
+// pass return status consistency with actual change. Loosely copied from
+// llvm/lib/Transforms/Utils/FunctionComparator.cpp
+
+class StructuralHash {
+  uint64_t Hash = 0x6acaa36bef8325c5ULL;
+
+  void update(uint64_t V) { Hash = hashing::detail::hash_16_bytes(Hash, V); }
+
+public:
+  StructuralHash() = default;
+
+  void update(Function &F) {
+    if (F.empty())
+      return;
+
+    update(F.isVarArg());
+    update(F.arg_size());
+
+    SmallVector<const BasicBlock *, 8> BBs;
+    SmallPtrSet<const BasicBlock *, 16> VisitedBBs;
+
+    BBs.push_back(&F.getEntryBlock());
+    VisitedBBs.insert(BBs[0]);
+    while (!BBs.empty()) {
+      const BasicBlock *BB = BBs.pop_back_val();
+      update(45798); // Block header
+      for (auto &Inst : *BB)
+        update(Inst.getOpcode());
+
+      const Instruction *Term = BB->getTerminator();
+      for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
+        if (!VisitedBBs.insert(Term->getSuccessor(i)).second)
+          continue;
+        BBs.push_back(Term->getSuccessor(i));
+      }
+    }
+  }
+
+  void update(Module &M) {
+    for (Function &F : M)
+      update(F);
+  }
+
+  uint64_t getHash() const { return Hash; }
+};
+
+} // namespace details
+
+uint64_t StructuralHash(Function &F) {
+  details::StructuralHash H;
+  H.update(F);
+  return H.getHash();
+}
+
+uint64_t StructuralHash(Module &M) {
+  details::StructuralHash H;
+  H.update(M);
+  return H.getHash();
+}
+
+} // end anonymous namespace
+
+#endif
 
 /// Execute all of the passes scheduled for execution by invoking
 /// runOnFunction method.  Keep track of whether any of the passes modifies
@@ -1513,7 +1581,16 @@ bool FPPassManager::runOnFunction(Function &F) {
     {
       PassManagerPrettyStackEntry X(FP, F);
       TimeRegion PassTimer(getPassTimer(FP));
+#ifdef EXPENSIVE_CHECKS
+      uint64_t RefHash = StructuralHash(F);
+#endif
       LocalChanged |= FP->runOnFunction(F);
+
+#ifdef EXPENSIVE_CHECKS
+      assert((LocalChanged || (RefHash == StructuralHash(F))) &&
+             "Pass modifies its input and doesn't report it.");
+#endif
+
       if (EmitICRemark) {
         unsigned NewSize = F.getInstructionCount();
 
@@ -1614,7 +1691,17 @@ MPPassManager::runOnModule(Module &M) {
       PassManagerPrettyStackEntry X(MP, M);
       TimeRegion PassTimer(getPassTimer(MP));
 
+#ifdef EXPENSIVE_CHECKS
+      uint64_t RefHash = StructuralHash(M);
+#endif
+
       LocalChanged |= MP->runOnModule(M);
+
+#ifdef EXPENSIVE_CHECKS
+      assert((LocalChanged || (RefHash == StructuralHash(M))) &&
+             "Pass modifies its input and doesn't report it.");
+#endif
+
       if (EmitICRemark) {
         // Update the size of the module.
         unsigned ModuleCount = M.getInstructionCount();
diff --git a/llvm/unittests/IR/LegacyPassManagerTest.cpp b/llvm/unittests/IR/LegacyPassManagerTest.cpp
index b7801b52481dd..8dda94b1b0326 100644
--- a/llvm/unittests/IR/LegacyPassManagerTest.cpp
+++ b/llvm/unittests/IR/LegacyPassManagerTest.cpp
@@ -680,7 +680,7 @@ namespace llvm {
       ASSERT_EQ(M->getFunctionList().size(), 4U);
       Function *F = M->getFunction("test2");
       Function *SF = splitSimpleFunction(*F);
-      CallInst::Create(F, "", &SF->getEntryBlock());
+      CallInst::Create(F, "", &*SF->getEntryBlock().getFirstInsertionPt());
       ASSERT_EQ(M->getFunctionList().size(), 5U);
       CGModifierPass *P = new CGModifierPass();
       legacy::PassManager Passes;

From 1658b8d7ddb65eb78e1304b009f1043ab6d9463f Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Tue, 14 Jul 2020 09:03:12 +0100
Subject: [PATCH 209/771] [AMDGPU] Avoid using s_cmpk when src0 is not register

The hardware spec require src0 of s_cmpk should be a register. So, we
should not optimize s_cmp to s_cmpk if src0 is not register.

Patch by Ruiling Song!
---
 llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp |  5 +++++
 llvm/test/CodeGen/AMDGPU/cmp_shrink.mir         | 11 +++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/cmp_shrink.mir

diff --git a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
index 53b7f7d3ca0a9..9c6833a7dab61 100644
--- a/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
+++ b/llvm/lib/Target/AMDGPU/SIShrinkInstructions.cpp
@@ -185,6 +185,11 @@ static void shrinkScalarCompare(const SIInstrInfo *TII, MachineInstr &MI) {
   if (!MI.getOperand(0).isReg())
     TII->commuteInstruction(MI, false, 0, 1);
 
+  // cmpk requires src0 to be a register
+  const MachineOperand &Src0 = MI.getOperand(0);
+  if (!Src0.isReg())
+    return;
+
   const MachineOperand &Src1 = MI.getOperand(1);
   if (!Src1.isImm())
     return;
diff --git a/llvm/test/CodeGen/AMDGPU/cmp_shrink.mir b/llvm/test/CodeGen/AMDGPU/cmp_shrink.mir
new file mode 100644
index 0000000000000..e7bf09ab49b2a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/cmp_shrink.mir
@@ -0,0 +1,11 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-shrink-instructions -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
+
+---
+name:             not_shrink_icmp
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: not_shrink_icmp
+    ; GCN: S_CMP_GT_I32 1, 65, implicit-def $scc
+    S_CMP_GT_I32 1, 65, implicit-def $scc
+...

From eafeb8af34946306a7382fa3801cf6e39a1c7226 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara@apple.com>
Date: Tue, 14 Jul 2020 00:25:38 -0700
Subject: [PATCH 210/771] Revert "[compiler-rt] [test] Allow expanding lit
 substitutions recursively"

This reverts commit 8372d505082aceb38417e0b561cd32f2e227597b.

It broke the TestCases/Darwin/asan-symbolize-partial-report-with-module-map.cpp test on green dragon.
---
 compiler-rt/test/lit.common.cfg.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 9d0c214bd9a76..98a2f3c03e609 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -23,9 +23,6 @@
     # bash on Windows is usually very slow.
     execute_external = (not sys.platform in ['win32'])
 
-# Allow expanding substitutions that are based on other substitutions
-config.recursiveExpansionLimit = 10
-
 # Setup test format.
 config.test_format = lit.formats.ShTest(execute_external)
 if execute_external:

From a8f4f85d84c17c7e85ca67c88dda458cc0fe8a2e Mon Sep 17 00:00:00 2001
From: Sander de Smalen <sander.desmalen@arm.com>
Date: Mon, 13 Jul 2020 17:19:04 +0100
Subject: [PATCH 211/771] [AArch64][SVE] Remove erroneous assert in
 resolveFrameOffsetReference

The code already supports addressing a fixed-size stack object from
the frame-pointer, by first subtracting sizeof(SVE area) from FP.

Reviewers: efriedma, cameron.mcinally, david-arm, rengolin

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D83125
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |  4 +--
 llvm/test/CodeGen/AArch64/framelayout-sve.mir | 34 +++++++++++++++++++
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index bd76855f7c644..efa3fd5ca9cef 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1818,10 +1818,8 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
         bool CanUseBP = RegInfo->hasBasePointer(MF);
         if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
           UseFP = PreferFP;
-        else if (!CanUseBP) { // Can't use BP. Forced to use FP.
-          assert(!SVEStackSize && "Expected BP to be available");
+        else if (!CanUseBP) // Can't use BP. Forced to use FP.
           UseFP = true;
-        }
         // else we can use BP and FP, but the offset from FP won't fit.
         // That will make us scavenge registers which we can probably avoid by
         // using BP. If it won't fit for BP either, we'll scavenge anyway.
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve.mir b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
index 75013bc475a13..046357b860b3b 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve.mir
@@ -30,6 +30,7 @@
   define void @test_address_sve_fp() nounwind { entry: unreachable }
   define void @test_stack_arg_sve() nounwind { entry: unreachable }
   define void @test_address_sve_out_of_range() nounwind { entry: unreachable }
+  define void @test_address_gpr_vla_nobp() nounwind { entry: unreachable }
   define aarch64_sve_vector_pcs void @save_restore_pregs_sve() nounwind { entry: unreachable }
   define aarch64_sve_vector_pcs void @save_restore_zregs_sve() nounwind { entry: unreachable }
   define aarch64_sve_vector_pcs void @save_restore_sve() nounwind { entry: unreachable }
@@ -334,6 +335,39 @@ body:             |
     RET_ReallyLR
 ---
 ...
+# Test that non-SVE objects are accessed from FP when there is no BP,
+# but the SP cannot be used because of variable-length arrays.
+#
+# +----------+ <- FP
+# | %fstack.0|  // 16 scalable bytes
+# +----------+ <- @FP - 16 scalable bytes
+# | %stack.0 |  // 16 bytes
+# +----------+ <- @FP - 16 scalable bytes - 16b
+# : %stack.1 :  // variable length
+# +----------+ <- SP
+
+# CHECK-LABEL: name: test_address_gpr_vla_nobp
+# CHECK:      bb.0.entry:
+# CHECK:      $[[TMP:x[0-9]+]] = ADDVL_XXI $fp, -1
+# CHECK-NEXT: STURXi $xzr, killed $[[TMP]], -16
+# CHECK:      RET_ReallyLR
+name:            test_address_gpr_vla_nobp
+frameInfo:
+  maxAlignment:  16
+fixedStack:
+  - { id: 0, stack-id: sve-vec, size: 16, alignment: 8, offset: -16 }
+stack:
+  - { id: 0, stack-id: default, size: 16, alignment: 8 }
+  - { id: 1, stack-id: default, type: variable-sized }
+body:             |
+  bb.0.entry:
+    liveins: $xzr
+
+    STRXui $xzr, %stack.0, 0
+
+    RET_ReallyLR
+---
+...
 # CHECK-LABEL: name: save_restore_pregs_sve
 # CHECK: $sp = frame-setup ADDVL_XXI $sp, -1
 # CHECK: frame-setup STR_PXI killed $p6, $sp, 5

From faa7e306e41ba250b1bf74511a6b0b15a944fb93 Mon Sep 17 00:00:00 2001
From: Sergej Jaskiewicz <jaskiewiczs@icloud.com>
Date: Fri, 10 Jul 2020 20:35:24 +0300
Subject: [PATCH 212/771] Revert "Revert "[compiler-rt] [test] Use the parent
 process env as base env in tests""

This reverts commit 979c5023d3f0656cf51bd645936f52acd62b0333.

The underlying issue has been fixed in https://reviews.llvm.org/D83719.
---
 compiler-rt/test/lit.common.cfg.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/compiler-rt/test/lit.common.cfg.py b/compiler-rt/test/lit.common.cfg.py
index 98a2f3c03e609..7c98c387c8705 100644
--- a/compiler-rt/test/lit.common.cfg.py
+++ b/compiler-rt/test/lit.common.cfg.py
@@ -67,6 +67,8 @@
     # to link. In r19 and later we just use the default which is libc++.
     config.cxx_mode_flags.append('-stdlib=libstdc++')
 
+config.environment = dict(os.environ)
+
 # Clear some environment variables that might affect Clang.
 possibly_dangerous_env_vars = ['ASAN_OPTIONS', 'DFSAN_OPTIONS', 'LSAN_OPTIONS',
                                'MSAN_OPTIONS', 'UBSAN_OPTIONS',

From 5b4f143564502664a9d1197d6909047eab49530e Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko@apple.com>
Date: Fri, 10 Jul 2020 10:52:25 +0300
Subject: [PATCH 213/771] [analyzer][tests] Introduce analyzer benchmarking
 framework

Summary:
This commit includes a couple of changes:
  * Benchmark selected projects by analyzing them multiple times
  * Compare two benchmarking results and visualizing them on one chart
  * Organize project build logging, so we can use the same code
    in benchmarks

Differential Revision: https://reviews.llvm.org/D83539
---
 clang/utils/analyzer/SATest.py            |  89 +++++++++---
 clang/utils/analyzer/SATestBenchmark.py   | 158 ++++++++++++++++++++++
 clang/utils/analyzer/SATestBuild.py       |  86 +++++++-----
 clang/utils/analyzer/SATestUpdateDiffs.py |   4 +-
 clang/utils/analyzer/requirements.txt     |   2 +
 5 files changed, 281 insertions(+), 58 deletions(-)
 create mode 100644 clang/utils/analyzer/SATestBenchmark.py

diff --git a/clang/utils/analyzer/SATest.py b/clang/utils/analyzer/SATest.py
index 16f1dce0c584e..46e636ad2895e 100755
--- a/clang/utils/analyzer/SATest.py
+++ b/clang/utils/analyzer/SATest.py
@@ -34,29 +34,10 @@ def add(parser, args):
 
 def build(parser, args):
     import SATestBuild
-    from ProjectMap import ProjectMap
 
     SATestBuild.VERBOSE = args.verbose
 
-    project_map = ProjectMap()
-    projects = project_map.projects
-
-    if args.projects:
-        projects_arg = args.projects.split(",")
-        available_projects = [project.name
-                              for project in projects]
-
-        # validate that given projects are present in the project map file
-        for manual_project in projects_arg:
-            if manual_project not in available_projects:
-                parser.error("Project '{project}' is not found in "
-                             "the project map file. Available projects are "
-                             "{all}.".format(project=manual_project,
-                                             all=available_projects))
-
-        projects = [project.with_fields(enabled=project.name in projects_arg)
-                    for project in projects]
-
+    projects = get_projects(parser, args.projects)
     tester = SATestBuild.RegressionTester(args.jobs,
                                           projects,
                                           args.override_compiler,
@@ -100,6 +81,44 @@ def update(parser, args):
         SATestUpdateDiffs.update_reference_results(project)
 
 
+def benchmark(parser, args):
+    from SATestBenchmark import Benchmark
+
+    projects = get_projects(parser, args.projects)
+    benchmark = Benchmark(projects, args.iterations, args.output)
+    benchmark.run()
+
+
+def benchmark_compare(parser, args):
+    import SATestBenchmark
+    SATestBenchmark.compare(args.old, args.new, args.output)
+
+
+def get_projects(parser, projects_str):
+    from ProjectMap import ProjectMap
+
+    project_map = ProjectMap()
+    projects = project_map.projects
+
+    if projects_str:
+        projects_arg = projects_str.split(",")
+        available_projects = [project.name
+                              for project in projects]
+
+        # validate that given projects are present in the project map file
+        for manual_project in projects_arg:
+            if manual_project not in available_projects:
+                parser.error("Project '{project}' is not found in "
+                             "the project map file. Available projects are "
+                             "{all}.".format(project=manual_project,
+                                             all=available_projects))
+
+        projects = [project.with_fields(enabled=project.name in projects_arg)
+                    for project in projects]
+
+    return projects
+
+
 def docker(parser, args):
     if len(args.rest) > 0:
         if args.rest[0] != "--":
@@ -284,6 +303,36 @@ def main():
                              "to the docker's entrypoint.")
     dock_parser.set_defaults(func=docker)
 
+    # benchmark subcommand
+    bench_parser = subparsers.add_parser(
+        "benchmark",
+        help="Run benchmarks by building a set of projects multiple times.")
+
+    bench_parser.add_argument("-i", "--iterations", action="store",
+                              type=int, default=20,
+                              help="Number of iterations for building each "
+                              "project.")
+    bench_parser.add_argument("-o", "--output", action="store",
+                              default="benchmark.csv",
+                              help="Output csv file for the benchmark results")
+    bench_parser.add_argument("--projects", action="store", default="",
+                              help="Comma-separated list of projects to test")
+    bench_parser.set_defaults(func=benchmark)
+
+    bench_subparsers = bench_parser.add_subparsers()
+    bench_compare_parser = bench_subparsers.add_parser(
+        "compare",
+        help="Compare benchmark runs.")
+    bench_compare_parser.add_argument("--old", action="store", required=True,
+                                      help="Benchmark reference results to "
+                                      "compare agains.")
+    bench_compare_parser.add_argument("--new", action="store", required=True,
+                                      help="New benchmark results to check.")
+    bench_compare_parser.add_argument("-o", "--output",
+                                      action="store", required=True,
+                                      help="Output file for plots.")
+    bench_compare_parser.set_defaults(func=benchmark_compare)
+
     args = parser.parse_args()
     args.func(parser, args)
 
diff --git a/clang/utils/analyzer/SATestBenchmark.py b/clang/utils/analyzer/SATestBenchmark.py
new file mode 100644
index 0000000000000..0fa2204bbbe7e
--- /dev/null
+++ b/clang/utils/analyzer/SATestBenchmark.py
@@ -0,0 +1,158 @@
+"""
+Static Analyzer qualification infrastructure.
+
+This source file contains all the functionality related to benchmarking
+the analyzer on a set projects.  Right now, this includes measuring
+execution time and peak memory usage.  Benchmark runs analysis on every
+project multiple times to get a better picture about the distribution
+of measured values.
+
+Additionally, this file includes a comparison routine for two benchmarking
+results that plots the result together on one chart.
+"""
+
+import SATestUtils as utils
+from SATestBuild import ProjectTester, stdout, TestInfo
+from ProjectMap import ProjectInfo
+
+import pandas as pd
+from typing import List, Tuple
+
+
+INDEX_COLUMN = "index"
+
+
+def _save(data: pd.DataFrame, file_path: str):
+    data.to_csv(file_path, index_label=INDEX_COLUMN)
+
+
+def _load(file_path: str) -> pd.DataFrame:
+    return pd.read_csv(file_path, index_col=INDEX_COLUMN)
+
+
+class Benchmark:
+    """
+    Becnhmark class encapsulates one functionality: it runs the analysis
+    multiple times for the given set of projects and stores results in the
+    specified file.
+    """
+    def __init__(self, projects: List[ProjectInfo], iterations: int,
+                 output_path: str):
+        self.projects = projects
+        self.iterations = iterations
+        self.out = output_path
+
+    def run(self):
+        results = [self._benchmark_project(project)
+                   for project in self.projects]
+
+        data = pd.concat(results, ignore_index=True)
+        _save(data, self.out)
+
+    def _benchmark_project(self, project: ProjectInfo) -> pd.DataFrame:
+        if not project.enabled:
+            stdout(f" \n\n--- Skipping disabled project {project.name}\n")
+            return
+
+        stdout(f" \n\n--- Benchmarking project {project.name}\n")
+
+        test_info = TestInfo(project)
+        tester = ProjectTester(test_info, silent=True)
+        project_dir = tester.get_project_dir()
+        output_dir = tester.get_output_dir()
+
+        raw_data = []
+
+        for i in range(self.iterations):
+            stdout(f"Iteration #{i + 1}")
+            time, mem = tester.build(project_dir, output_dir)
+            raw_data.append({"time": time, "memory": mem,
+                             "iteration": i, "project": project.name})
+            stdout(f"time: {utils.time_to_str(time)}, "
+                   f"peak memory: {utils.memory_to_str(mem)}")
+
+        return pd.DataFrame(raw_data)
+
+
+def compare(old_path: str, new_path: str, plot_file: str):
+    """
+    Compare two benchmarking results stored as .csv files
+    and produce a plot in the specified file.
+    """
+    old = _load(old_path)
+    new = _load(new_path)
+
+    old_projects = set(old["project"])
+    new_projects = set(new["project"])
+    common_projects = old_projects & new_projects
+
+    # Leave only rows for projects common to both dataframes.
+    old = old[old["project"].isin(common_projects)]
+    new = new[new["project"].isin(common_projects)]
+
+    old, new = _normalize(old, new)
+
+    # Seaborn prefers all the data to be in one dataframe.
+    old["kind"] = "old"
+    new["kind"] = "new"
+    data = pd.concat([old, new], ignore_index=True)
+
+    # TODO: compare data in old and new dataframes using statistical tests
+    #       to check if they belong to the same distribution
+    _plot(data, plot_file)
+
+
+def _normalize(old: pd.DataFrame,
+               new: pd.DataFrame) -> Tuple[pd.DataFrame, pd.DataFrame]:
+    # This creates a dataframe with all numerical data averaged.
+    means = old.groupby("project").mean()
+    return _normalize_impl(old, means), _normalize_impl(new, means)
+
+
+def _normalize_impl(data: pd.DataFrame, means: pd.DataFrame):
+    # Right now 'means' has one row corresponding to one project,
+    # while 'data' has N rows for each project (one for each iteration).
+    #
+    # In order for us to work easier with this data, we duplicate
+    # 'means' data to match the size of the 'data' dataframe.
+    #
+    # All the columns from 'data' will maintain their names, while
+    # new columns coming from 'means' will have "_mean" suffix.
+    joined_data = data.merge(means, on="project", suffixes=("", "_mean"))
+    _normalize_key(joined_data, "time")
+    _normalize_key(joined_data, "memory")
+    return joined_data
+
+
+def _normalize_key(data: pd.DataFrame, key: str):
+    norm_key = _normalized_name(key)
+    mean_key = f"{key}_mean"
+    data[norm_key] = data[key] / data[mean_key]
+
+
+def _normalized_name(name: str) -> str:
+    return f"normalized {name}"
+
+
+def _plot(data: pd.DataFrame, plot_file: str):
+    import matplotlib
+    import seaborn as sns
+    from matplotlib import pyplot as plt
+
+    sns.set_style("whitegrid")
+    # We want to have time and memory charts one above the other.
+    figure, (ax1, ax2) = plt.subplots(2, 1, figsize=(8, 6))
+
+    def _subplot(key: str, ax: matplotlib.axes.Axes):
+        sns.boxplot(x="project", y=_normalized_name(key), hue="kind",
+                    data=data, palette=sns.color_palette("BrBG", 2), ax=ax)
+
+    _subplot("time", ax1)
+    # No need to have xlabels on both top and bottom charts.
+    ax1.set_xlabel("")
+
+    _subplot("memory", ax2)
+    # The legend on the top chart is enough.
+    ax2.get_legend().remove()
+
+    figure.savefig(plot_file)
diff --git a/clang/utils/analyzer/SATestBuild.py b/clang/utils/analyzer/SATestBuild.py
index eefab869f6ef9..ed5c7379bb5b4 100644
--- a/clang/utils/analyzer/SATestBuild.py
+++ b/clang/utils/analyzer/SATestBuild.py
@@ -87,10 +87,18 @@ def fileno(self) -> int:
         return 0
 
 
-Logger = logging.getLogger("main")
 LOCAL = threading.local()
-LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
-LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
+
+
+def init_logger(name: str):
+    # TODO: use debug levels for VERBOSE messages
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.DEBUG)
+    LOCAL.stdout = StreamToLogger(logger, logging.INFO)
+    LOCAL.stderr = StreamToLogger(logger, logging.ERROR)
+
+
+init_logger("main")
 
 
 def stderr(message: str):
@@ -102,7 +110,6 @@ def stdout(message: str):
 
 
 logging.basicConfig(
-    level=logging.DEBUG,
     format='%(asctime)s:%(levelname)s:%(name)s: %(message)s')
 
 
@@ -298,12 +305,13 @@ class ProjectTester:
     """
     A component aggregating testing for one project.
     """
-    def __init__(self, test_info: TestInfo):
+    def __init__(self, test_info: TestInfo, silent: bool = False):
         self.project = test_info.project
         self.override_compiler = test_info.override_compiler
         self.extra_analyzer_config = test_info.extra_analyzer_config
         self.is_reference_build = test_info.is_reference_build
         self.strictness = test_info.strictness
+        self.silent = silent
 
     def test(self) -> bool:
         """
@@ -312,20 +320,19 @@ def test(self) -> bool:
         to the :param strictness: criteria.
         """
         if not self.project.enabled:
-            stdout(f" \n\n--- Skipping disabled project {self.project.name}\n")
+            self.out(
+                f" \n\n--- Skipping disabled project {self.project.name}\n")
             return True
 
-        stdout(f" \n\n--- Building project {self.project.name}\n")
+        self.out(f" \n\n--- Building project {self.project.name}\n")
 
         start_time = time.time()
 
         project_dir = self.get_project_dir()
-        if VERBOSE >= 1:
-            stdout(f"  Build directory: {project_dir}.\n")
+        self.vout(f"  Build directory: {project_dir}.\n")
 
         # Set the build results directory.
         output_dir = self.get_output_dir()
-        output_dir = os.path.join(project_dir, output_dir)
 
         self.build(project_dir, output_dir)
         check_build(output_dir)
@@ -336,8 +343,8 @@ def test(self) -> bool:
         else:
             passed = run_cmp_results(project_dir, self.strictness)
 
-        stdout(f"Completed tests for project {self.project.name} "
-               f"(time: {time.time() - start_time:.2f}).\n")
+        self.out(f"Completed tests for project {self.project.name} "
+                 f"(time: {time.time() - start_time:.2f}).\n")
 
         return passed
 
@@ -346,22 +353,23 @@ def get_project_dir(self) -> str:
 
     def get_output_dir(self) -> str:
         if self.is_reference_build:
-            return REF_PREFIX + OUTPUT_DIR_NAME
+            dirname = REF_PREFIX + OUTPUT_DIR_NAME
         else:
-            return OUTPUT_DIR_NAME
+            dirname = OUTPUT_DIR_NAME
+
+        return os.path.join(self.get_project_dir(), dirname)
 
-    def build(self, directory: str, output_dir: str):
+    def build(self, directory: str, output_dir: str) -> Tuple[float, int]:
         build_log_path = get_build_log_path(output_dir)
 
-        stdout(f"Log file: {build_log_path}\n")
-        stdout(f"Output directory: {output_dir}\n")
+        self.out(f"Log file: {build_log_path}\n")
+        self.out(f"Output directory: {output_dir}\n")
 
         remove_log_file(output_dir)
 
         # Clean up scan build results.
         if os.path.exists(output_dir):
-            if VERBOSE >= 1:
-                stdout(f"  Removing old results: {output_dir}\n")
+            self.vout(f"  Removing old results: {output_dir}\n")
 
             shutil.rmtree(output_dir)
 
@@ -374,7 +382,7 @@ def build(self, directory: str, output_dir: str):
                 self._download_and_patch(directory, build_log_file)
                 run_cleanup_script(directory, build_log_file)
                 build_time, memory = self.scan_build(directory, output_dir,
-                                               build_log_file)
+                                                     build_log_file)
             else:
                 build_time, memory = self.analyze_preprocessed(directory,
                                                                output_dir)
@@ -384,9 +392,11 @@ def build(self, directory: str, output_dir: str):
                 normalize_reference_results(directory, output_dir,
                                             self.project.mode)
 
-        stdout(f"Build complete (time: {utils.time_to_str(build_time)}, "
-               f"peak memory: {utils.memory_to_str(memory)}). "
-               f"See the log for more details: {build_log_path}\n")
+        self.out(f"Build complete (time: {utils.time_to_str(build_time)}, "
+                 f"peak memory: {utils.memory_to_str(memory)}). "
+                 f"See the log for more details: {build_log_path}\n")
+
+        return build_time, memory
 
     def scan_build(self, directory: str, output_dir: str,
                    build_log_file: IO) -> Tuple[float, int]:
@@ -454,8 +464,7 @@ def scan_build(self, directory: str, output_dir: str,
 
                 command_to_run = command_prefix + command
 
-                if VERBOSE >= 1:
-                    stdout(f"  Executing: {command_to_run}\n")
+                self.vout(f"  Executing: {command_to_run}\n")
 
                 time, mem = utils.check_and_measure_call(
                     command_to_run, cwd=cwd,
@@ -522,8 +531,7 @@ def analyze_preprocessed(self, directory: str,
             log_path = os.path.join(fail_path, file_name + ".stderr.txt")
             with open(log_path, "w+") as log_file:
                 try:
-                    if VERBOSE >= 1:
-                        stdout(f"  Executing: {command}\n")
+                    self.vout(f"  Executing: {command}\n")
 
                     time, mem = utils.check_and_measure_call(
                         command, cwd=directory, stderr=log_file,
@@ -592,8 +600,10 @@ def _download(self, directory: str, build_log_file: IO):
                 f"for the '{self.project.name}' project")
 
     def _download_from_git(self, directory: str, build_log_file: IO):
+        repo = self.project.origin
         cached_source = os.path.join(directory, CACHED_SOURCE_DIR_NAME)
-        check_call(f"git clone --recursive {self.project.origin} {cached_source}",
+
+        check_call(f"git clone --recursive {repo} {cached_source}",
                    cwd=directory, stderr=build_log_file,
                    stdout=build_log_file, shell=True)
         check_call(f"git checkout --quiet {self.project.commit}",
@@ -624,16 +634,15 @@ def _run_download_script(directory: str, build_log_file: IO):
                          out=LOCAL.stdout, err=LOCAL.stderr,
                          verbose=VERBOSE)
 
-    @staticmethod
-    def _apply_patch(directory: str, build_log_file: IO):
+    def _apply_patch(self, directory: str, build_log_file: IO):
         patchfile_path = os.path.join(directory, PATCHFILE_NAME)
         patched_source = os.path.join(directory, PATCHED_SOURCE_DIR_NAME)
 
         if not os.path.exists(patchfile_path):
-            stdout("  No local patches.\n")
+            self.out("  No local patches.\n")
             return
 
-        stdout("  Applying patch.\n")
+        self.out("  Applying patch.\n")
         try:
             check_call(f"patch -p1 < '{patchfile_path}'",
                        cwd=patched_source,
@@ -646,6 +655,14 @@ def _apply_patch(directory: str, build_log_file: IO):
                    f"See {build_log_file.name} for details.\n")
             sys.exit(1)
 
+    def out(self, what: str):
+        if not self.silent:
+            stdout(what)
+
+    def vout(self, what: str):
+        if VERBOSE >= 1:
+            self.out(what)
+
 
 class TestProjectThread(threading.Thread):
     def __init__(self, tasks_queue: TestQueue,
@@ -668,10 +685,7 @@ def run(self):
         while not self.tasks_queue.empty():
             try:
                 test_info = self.tasks_queue.get()
-
-                Logger = logging.getLogger(test_info.project.name)
-                LOCAL.stdout = StreamToLogger(Logger, logging.INFO)
-                LOCAL.stderr = StreamToLogger(Logger, logging.ERROR)
+                init_logger(test_info.project.name)
 
                 tester = ProjectTester(test_info)
                 if not tester.test():
diff --git a/clang/utils/analyzer/SATestUpdateDiffs.py b/clang/utils/analyzer/SATestUpdateDiffs.py
index 1a2c41d2debf2..920fa15e4c6f5 100644
--- a/clang/utils/analyzer/SATestUpdateDiffs.py
+++ b/clang/utils/analyzer/SATestUpdateDiffs.py
@@ -21,10 +21,10 @@ def update_reference_results(project: ProjectInfo):
     project_dir = tester.get_project_dir()
 
     tester.is_reference_build = True
-    ref_results_path = os.path.join(project_dir, tester.get_output_dir())
+    ref_results_path = tester.get_output_dir()
 
     tester.is_reference_build = False
-    created_results_path = os.path.join(project_dir, tester.get_output_dir())
+    created_results_path = tester.get_output_dir()
 
     if not os.path.exists(created_results_path):
         print("New results not found, was SATestBuild.py previously run?",
diff --git a/clang/utils/analyzer/requirements.txt b/clang/utils/analyzer/requirements.txt
index ec4f669299523..8ae8bc88ac191 100644
--- a/clang/utils/analyzer/requirements.txt
+++ b/clang/utils/analyzer/requirements.txt
@@ -1,4 +1,6 @@
 graphviz
 humanize
 matplotlib
+pandas
 psutil
+seaborn

From 089a0ad8bc993923817d7957f08bd67dbecd56af Mon Sep 17 00:00:00 2001
From: Valeriy Savchenko <vsavchenko@apple.com>
Date: Mon, 13 Jul 2020 15:48:07 +0300
Subject: [PATCH 214/771] [analyzer][tests] Add 5 more projects for testing

Differential Revision: https://reviews.llvm.org/D83701
---
 clang/utils/analyzer/Dockerfile               | 10 ++++++
 clang/utils/analyzer/entrypoint.py            |  2 +-
 .../capnproto/cleanup_run_static_analyzer.sh  |  1 +
 .../capnproto/run_static_analyzer.cmd         |  2 ++
 .../cppcheck/cleanup_run_static_analyzer.sh   |  1 +
 .../projects/cppcheck/run_static_analyzer.cmd |  2 ++
 .../faiss/cleanup_run_static_analyzer.sh      |  1 +
 .../projects/faiss/run_static_analyzer.cmd    |  2 ++
 .../harfbuzz/cleanup_run_static_analyzer.sh   |  1 +
 .../projects/harfbuzz/run_static_analyzer.cmd |  2 ++
 clang/utils/analyzer/projects/projects.json   | 35 +++++++++++++++++++
 .../tmux/cleanup_run_static_analyzer.sh       |  2 ++
 .../projects/tmux/run_static_analyzer.cmd     |  2 ++
 13 files changed, 62 insertions(+), 1 deletion(-)
 create mode 100755 clang/utils/analyzer/projects/capnproto/cleanup_run_static_analyzer.sh
 create mode 100644 clang/utils/analyzer/projects/capnproto/run_static_analyzer.cmd
 create mode 100755 clang/utils/analyzer/projects/cppcheck/cleanup_run_static_analyzer.sh
 create mode 100644 clang/utils/analyzer/projects/cppcheck/run_static_analyzer.cmd
 create mode 100755 clang/utils/analyzer/projects/faiss/cleanup_run_static_analyzer.sh
 create mode 100644 clang/utils/analyzer/projects/faiss/run_static_analyzer.cmd
 create mode 100755 clang/utils/analyzer/projects/harfbuzz/cleanup_run_static_analyzer.sh
 create mode 100644 clang/utils/analyzer/projects/harfbuzz/run_static_analyzer.cmd
 create mode 100755 clang/utils/analyzer/projects/tmux/cleanup_run_static_analyzer.sh
 create mode 100644 clang/utils/analyzer/projects/tmux/run_static_analyzer.cmd

diff --git a/clang/utils/analyzer/Dockerfile b/clang/utils/analyzer/Dockerfile
index 21906011c7dc2..f74ff8aa95c25 100644
--- a/clang/utils/analyzer/Dockerfile
+++ b/clang/utils/analyzer/Dockerfile
@@ -42,6 +42,16 @@ RUN apt-get install -y \
     libjsonrpccpp-dev=0.7.0-1build2 \
     uuid-dev=2.31.1-0.4ubuntu3.6
 
+# tmux dependencies
+RUN apt-get install -y \
+    autotools-dev=20180224.1 \
+    automake=1:1.15.1-3ubuntu2 \
+    libncurses5-dev=6.1-1ubuntu1.18.04 \
+    libevent-dev=2.1.8-stable-4build1 \
+    pkg-config=0.29.1-0ubuntu2 \
+    flex=2.6.4-6 \
+    bison=2:3.0.4.dfsg-1build1
+
 RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
 
 VOLUME /analyzer
diff --git a/clang/utils/analyzer/entrypoint.py b/clang/utils/analyzer/entrypoint.py
index b440e776b57cb..9c84431da5482 100644
--- a/clang/utils/analyzer/entrypoint.py
+++ b/clang/utils/analyzer/entrypoint.py
@@ -50,7 +50,7 @@ def is_cmake_needed():
 
 CMAKE_COMMAND = "cmake -G Ninja -DCMAKE_BUILD_TYPE=Release " \
     "-DCMAKE_INSTALL_PREFIX=/analyzer -DLLVM_TARGETS_TO_BUILD=X86 " \
-    "-DLLVM_ENABLE_PROJECTS=clang -DLLVM_BUILD_RUNTIME=OFF " \
+    "-DLLVM_ENABLE_PROJECTS=\"clang;openmp\" -DLLVM_BUILD_RUNTIME=OFF " \
     "-DLLVM_ENABLE_TERMINFO=OFF -DCLANG_ENABLE_ARCMT=OFF " \
     "-DCLANG_ENABLE_STATIC_ANALYZER=ON"
 
diff --git a/clang/utils/analyzer/projects/capnproto/cleanup_run_static_analyzer.sh b/clang/utils/analyzer/projects/capnproto/cleanup_run_static_analyzer.sh
new file mode 100755
index 0000000000000..e14c423280ec5
--- /dev/null
+++ b/clang/utils/analyzer/projects/capnproto/cleanup_run_static_analyzer.sh
@@ -0,0 +1 @@
+rm -rf ./build
diff --git a/clang/utils/analyzer/projects/capnproto/run_static_analyzer.cmd b/clang/utils/analyzer/projects/capnproto/run_static_analyzer.cmd
new file mode 100644
index 0000000000000..6678fe635db32
--- /dev/null
+++ b/clang/utils/analyzer/projects/capnproto/run_static_analyzer.cmd
@@ -0,0 +1,2 @@
+cmake . -DCMAKE_BUILD_TYPE=Debug -Bbuild -GNinja
+cmake --build build
diff --git a/clang/utils/analyzer/projects/cppcheck/cleanup_run_static_analyzer.sh b/clang/utils/analyzer/projects/cppcheck/cleanup_run_static_analyzer.sh
new file mode 100755
index 0000000000000..e14c423280ec5
--- /dev/null
+++ b/clang/utils/analyzer/projects/cppcheck/cleanup_run_static_analyzer.sh
@@ -0,0 +1 @@
+rm -rf ./build
diff --git a/clang/utils/analyzer/projects/cppcheck/run_static_analyzer.cmd b/clang/utils/analyzer/projects/cppcheck/run_static_analyzer.cmd
new file mode 100644
index 0000000000000..72cb7f7677e6d
--- /dev/null
+++ b/clang/utils/analyzer/projects/cppcheck/run_static_analyzer.cmd
@@ -0,0 +1,2 @@
+cmake . -DCMAKE_BUILD_TYPE=Debug -DCMAKE_DISABLE_PRECOMPILE_HEADERS=ON -Bbuild -GNinja
+cmake --build build
diff --git a/clang/utils/analyzer/projects/faiss/cleanup_run_static_analyzer.sh b/clang/utils/analyzer/projects/faiss/cleanup_run_static_analyzer.sh
new file mode 100755
index 0000000000000..efcd16e590a18
--- /dev/null
+++ b/clang/utils/analyzer/projects/faiss/cleanup_run_static_analyzer.sh
@@ -0,0 +1 @@
+make clean
diff --git a/clang/utils/analyzer/projects/faiss/run_static_analyzer.cmd b/clang/utils/analyzer/projects/faiss/run_static_analyzer.cmd
new file mode 100644
index 0000000000000..877fa2aa389b7
--- /dev/null
+++ b/clang/utils/analyzer/projects/faiss/run_static_analyzer.cmd
@@ -0,0 +1,2 @@
+./configure --without-cuda
+make
diff --git a/clang/utils/analyzer/projects/harfbuzz/cleanup_run_static_analyzer.sh b/clang/utils/analyzer/projects/harfbuzz/cleanup_run_static_analyzer.sh
new file mode 100755
index 0000000000000..e14c423280ec5
--- /dev/null
+++ b/clang/utils/analyzer/projects/harfbuzz/cleanup_run_static_analyzer.sh
@@ -0,0 +1 @@
+rm -rf ./build
diff --git a/clang/utils/analyzer/projects/harfbuzz/run_static_analyzer.cmd b/clang/utils/analyzer/projects/harfbuzz/run_static_analyzer.cmd
new file mode 100644
index 0000000000000..6678fe635db32
--- /dev/null
+++ b/clang/utils/analyzer/projects/harfbuzz/run_static_analyzer.cmd
@@ -0,0 +1,2 @@
+cmake . -DCMAKE_BUILD_TYPE=Debug -Bbuild -GNinja
+cmake --build build
diff --git a/clang/utils/analyzer/projects/projects.json b/clang/utils/analyzer/projects/projects.json
index e3d853ac6f6a5..84b741035f46c 100644
--- a/clang/utils/analyzer/projects/projects.json
+++ b/clang/utils/analyzer/projects/projects.json
@@ -103,5 +103,40 @@
     "source": "git",
     "origin": "https://github.com/google/re2.git",
     "commit": "2b25567"
+  },
+  {
+    "name": "cppcheck",
+    "mode": 1,
+    "source": "git",
+    "origin": "https://github.com/danmar/cppcheck.git",
+    "commit": "5fa3d53"
+  },
+  {
+    "name": "harfbuzz",
+    "mode": 1,
+    "source": "git",
+    "origin": "https://github.com/harfbuzz/harfbuzz.git",
+    "commit": "f8d345e"
+  },
+  {
+    "name": "capnproto",
+    "mode": 1,
+    "source": "git",
+    "origin": "https://github.com/capnproto/capnproto.git",
+    "commit": "8be1c9f"
+  },
+  {
+    "name": "tmux",
+    "mode": 1,
+    "source": "git",
+    "origin": "https://github.com/tmux/tmux.git",
+    "commit": "a5f99e1"
+  },
+  {
+    "name": "faiss",
+    "mode": 1,
+    "source": "git",
+    "origin": "https://github.com/facebookresearch/faiss.git",
+    "commit": "9e5d5b7"
   }
 ]
diff --git a/clang/utils/analyzer/projects/tmux/cleanup_run_static_analyzer.sh b/clang/utils/analyzer/projects/tmux/cleanup_run_static_analyzer.sh
new file mode 100755
index 0000000000000..f671df1de5f4e
--- /dev/null
+++ b/clang/utils/analyzer/projects/tmux/cleanup_run_static_analyzer.sh
@@ -0,0 +1,2 @@
+make distclean
+exit 0
diff --git a/clang/utils/analyzer/projects/tmux/run_static_analyzer.cmd b/clang/utils/analyzer/projects/tmux/run_static_analyzer.cmd
new file mode 100644
index 0000000000000..78ce31f452a12
--- /dev/null
+++ b/clang/utils/analyzer/projects/tmux/run_static_analyzer.cmd
@@ -0,0 +1,2 @@
+./autogen.sh
+./configure && make

From 959eaa50d62d807dd78c980d5f3b9da0f06b0003 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Mon, 13 Jul 2020 17:16:52 +0100
Subject: [PATCH 215/771] [ARM][MVE] Only tail-fold integer add reductions

If a vector body has live-out values, it is probably a reduction, which needs a
final reduction step after the loop. MVE has a VADDV instruction to reduce
integer vectors, but doesn't have an equivalent one for float vectors. A
live-out value that is not recognised as reduction later in the optimisation
pipeline will result in the tail-predicated loop to be reverted to a
non-predicated loop and this is very expensive, i.e. it has a significant
performance impact, which is what we hope to avoid with fine tuning the ARM TTI
hook preferPredicateOverEpilogue implementation.

Differential Revision: https://reviews.llvm.org/D82953
---
 .../lib/Target/ARM/ARMTargetTransformInfo.cpp |  44 ++-
 .../LoopVectorize/ARM/tail-loop-folding.ll    | 327 ++++++++++++++++++
 2 files changed, 368 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
index 575e6171059da..bea4e157a1316 100644
--- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp
@@ -28,6 +28,7 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/MachineValueType.h"
 #include "llvm/Target/TargetMachine.h"
+#include "llvm/Transforms/Utils/LoopUtils.h"
 #include <algorithm>
 #include <cassert>
 #include <cstdint>
@@ -1405,12 +1406,47 @@ static bool canTailPredicateInstruction(Instruction &I, int &ICmpCount) {
 static bool canTailPredicateLoop(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
                                  const DataLayout &DL,
                                  const LoopAccessInfo *LAI) {
+  LLVM_DEBUG(dbgs() << "Tail-predication: checking allowed instructions\n");
+
+  // If there are live-out values, it is probably a reduction, which needs a
+  // final reduction step after the loop. MVE has a VADDV instruction to reduce
+  // integer vectors, but doesn't have an equivalent one for float vectors. A
+  // live-out value that is not recognised as a reduction will result in the
+  // tail-predicated loop to be reverted to a non-predicated loop and this is
+  // very expensive, i.e. it has a significant performance impact. So, in this
+  // case it's better not to tail-predicate the loop, which is what we check
+  // here. Thus, we allow only 1 live-out value, which has to be an integer
+  // reduction, which matches the loops supported by ARMLowOverheadLoops.
+  // It is important to keep ARMLowOverheadLoops and canTailPredicateLoop in
+  // sync with each other.
+  SmallVector< Instruction *, 8 > LiveOuts;
+  LiveOuts = llvm::findDefsUsedOutsideOfLoop(L);
+  bool IntReductionsDisabled =
+      EnableTailPredication == TailPredication::EnabledNoReductions ||
+      EnableTailPredication == TailPredication::ForceEnabledNoReductions;
+
+  for (auto *I : LiveOuts) {
+    if (!I->getType()->isIntegerTy()) {
+      LLVM_DEBUG(dbgs() << "Don't tail-predicate loop with non-integer "
+                           "live-out value\n");
+      return false;
+    }
+    if (I->getOpcode() != Instruction::Add) {
+      LLVM_DEBUG(dbgs() << "Only add reductions supported\n");
+      return false;
+    }
+    if (IntReductionsDisabled) {
+      LLVM_DEBUG(dbgs() << "Integer add reductions not enabled\n");
+      return false;
+    }
+  }
+
+  // Next, check that all instructions can be tail-predicated.
   PredicatedScalarEvolution PSE = LAI->getPSE();
+  SmallVector<Instruction *, 16> LoadStores;
   int ICmpCount = 0;
   int Stride = 0;
 
-  LLVM_DEBUG(dbgs() << "tail-predication: checking allowed instructions\n");
-  SmallVector<Instruction *, 16> LoadStores;
   for (BasicBlock *BB : L->blocks()) {
     for (Instruction &I : BB->instructionsWithoutDebug()) {
       if (isa<PHINode>(&I))
@@ -1458,8 +1494,10 @@ bool ARMTTIImpl::preferPredicateOverEpilogue(Loop *L, LoopInfo *LI,
                                              TargetLibraryInfo *TLI,
                                              DominatorTree *DT,
                                              const LoopAccessInfo *LAI) {
-  if (!EnableTailPredication)
+  if (!EnableTailPredication) {
+    LLVM_DEBUG(dbgs() << "Tail-predication not enabled.\n");
     return false;
+  }
 
   // Creating a predicated vector loop is the first step for generating a
   // tail-predicated hardware loop, for which we need the MVE masked
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
index eda3c115c0f6b..c6b415ff9cd49 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
@@ -4,6 +4,13 @@
 ; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S | \
 ; RUN:   FileCheck -check-prefixes=COMMON,PREDFLAG %s
 
+; RUN: opt < %s -loop-vectorize -tail-predication=enabled-no-reductions -S | \
+; RUN:  FileCheck %s -check-prefixes=COMMON,NORED
+
+; RUN: opt < %s -loop-vectorize -tail-predication=force-enabled-no-reductions -S | \
+; RUN:  FileCheck %s -check-prefixes=COMMON,NORED
+
+
 target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
 target triple = "thumbv8.1m.main-arm-unknown-eabihf"
 
@@ -162,6 +169,326 @@ for.body:                                         ; preds = %for.body.preheader,
   br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !14
 }
 
+define dso_local i32 @i32_add_reduction(i32* noalias nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
+; COMMON-LABEL: i32_add_reduction(
+; COMMON:       entry:
+; CHECK:        @llvm.get.active.lane.mask
+; NORED-NOT:    @llvm.get.active.lane.mask
+; COMMON:       }
+entry:
+  %cmp6 = icmp sgt i32 %N, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %S.0.lcssa = phi i32 [ 1, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ]
+  ret i32 %S.0.lcssa
+
+for.body:
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %S.07 = phi i32 [ %add, %for.body ], [ 1, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %S.07
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+
+; Don't tail-fold float reductions.
+;
+define dso_local void @f32_reduction(float* nocapture readonly %Input, i32 %N, float* nocapture %Output) local_unnamed_addr #0 {
+; CHECK-LABEL: f32_reduction(
+; CHECK:       vector.body:
+; CHECK-NOT:   @llvm.masked.load
+; CHECK-NOT:   @llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
+entry:
+  %cmp6 = icmp eq i32 %N, 0
+  br i1 %cmp6, label %while.end, label %while.body.preheader
+
+while.body.preheader:                             ; preds = %entry
+  br label %while.body
+
+while.body:                                       ; preds = %while.body.preheader, %while.body
+  %blkCnt.09 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ]
+  %sum.08 = phi float [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
+  %Input.addr.07 = phi float* [ %incdec.ptr, %while.body ], [ %Input, %while.body.preheader ]
+  %incdec.ptr = getelementptr inbounds float, float* %Input.addr.07, i32 1
+  %0 = load float, float* %Input.addr.07, align 4
+  %add = fadd fast float %0, %sum.08
+  %dec = add i32 %blkCnt.09, -1
+  %cmp = icmp eq i32 %dec, 0
+  br i1 %cmp, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:                               ; preds = %while.body
+  %add.lcssa = phi float [ %add, %while.body ]
+  br label %while.end
+
+while.end:                                        ; preds = %while.end.loopexit, %entry
+  %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
+  %conv = uitofp i32 %N to float
+  %div = fdiv fast float %sum.0.lcssa, %conv
+  store float %div, float* %Output, align 4
+  ret void
+}
+
+; Don't tail-fold float reductions.
+;
+define dso_local void @mixed_f32_i32_reduction(float* nocapture readonly %fInput, i32* nocapture readonly %iInput, i32 %N, float* nocapture %fOutput, i32* nocapture %iOutput) local_unnamed_addr #0 {
+; CHECK-LABEL: mixed_f32_i32_reduction(
+; CHECK:       vector.body:
+; CHECK-NOT:   @llvm.masked.load
+; CHECK-NOT:   @llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
+entry:
+  %cmp15 = icmp eq i32 %N, 0
+  br i1 %cmp15, label %while.end, label %while.body.preheader
+
+while.body.preheader:
+  br label %while.body
+
+while.body:
+  %blkCnt.020 = phi i32 [ %dec, %while.body ], [ %N, %while.body.preheader ]
+  %isum.019 = phi i32 [ %add2, %while.body ], [ 0, %while.body.preheader ]
+  %fsum.018 = phi float [ %add, %while.body ], [ 0.000000e+00, %while.body.preheader ]
+  %fInput.addr.017 = phi float* [ %incdec.ptr, %while.body ], [ %fInput, %while.body.preheader ]
+  %iInput.addr.016 = phi i32* [ %incdec.ptr1, %while.body ], [ %iInput, %while.body.preheader ]
+  %incdec.ptr = getelementptr inbounds float, float* %fInput.addr.017, i32 1
+  %incdec.ptr1 = getelementptr inbounds i32, i32* %iInput.addr.016, i32 1
+  %0 = load i32, i32* %iInput.addr.016, align 4
+  %add2 = add nsw i32 %0, %isum.019
+  %1 = load float, float* %fInput.addr.017, align 4
+  %add = fadd fast float %1, %fsum.018
+  %dec = add i32 %blkCnt.020, -1
+  %cmp = icmp eq i32 %dec, 0
+  br i1 %cmp, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:
+  %add.lcssa = phi float [ %add, %while.body ]
+  %add2.lcssa = phi i32 [ %add2, %while.body ]
+  %phitmp = sitofp i32 %add2.lcssa to float
+  br label %while.end
+
+while.end:
+  %fsum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add.lcssa, %while.end.loopexit ]
+  %isum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %phitmp, %while.end.loopexit ]
+  %conv = uitofp i32 %N to float
+  %div = fdiv fast float %fsum.0.lcssa, %conv
+  store float %div, float* %fOutput, align 4
+  %div5 = fdiv fast float %isum.0.lcssa, %conv
+  %conv6 = fptosi float %div5 to i32
+  store i32 %conv6, i32* %iOutput, align 4
+  ret void
+}
+
+define dso_local i32 @i32_mul_reduction(i32* noalias nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
+; CHECK-LABEL: i32_mul_reduction(
+; CHECK:       vector.body:
+; CHECK-NOT:   @llvm.masked.load
+; CHECK-NOT:   @llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
+entry:
+  %cmp6 = icmp sgt i32 %N, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %mul.lcssa = phi i32 [ %mul, %for.body ]
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %S.0.lcssa = phi i32 [ 1, %entry ], [ %mul.lcssa, %for.cond.cleanup.loopexit ]
+  ret i32 %S.0.lcssa
+
+for.body:
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %S.07 = phi i32 [ %mul, %for.body ], [ 1, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %mul = mul nsw i32 %0, %S.07
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define dso_local i32 @i32_or_reduction(i32* noalias nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
+; CHECK-LABEL: i32_or_reduction(
+; CHECK:       vector.body:
+; CHECK-NOT:   @llvm.masked.load
+; CHECK-NOT:   @llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
+entry:
+  %cmp6 = icmp sgt i32 %N, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  %or.lcssa = phi i32 [ %or, %for.body ]
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %S.0.lcssa = phi i32 [ 1, %entry ], [ %or.lcssa, %for.cond.cleanup.loopexit ]
+  ret i32 %S.0.lcssa
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %S.07 = phi i32 [ %or, %for.body ], [ 1, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %or = or i32 %0, %S.07
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define dso_local i32 @i32_and_reduction(i32* noalias nocapture readonly %A, i32 %N, i32 %S) local_unnamed_addr #0 {
+; CHECK-LABEL: i32_and_reduction(
+; CHECK:       vector.body:
+; CHECK-NOT:   @llvm.masked.load
+; CHECK-NOT:   @llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
+entry:
+  %cmp5 = icmp sgt i32 %N, 0
+  br i1 %cmp5, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  %and.lcssa = phi i32 [ %and, %for.body ]
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  %S.addr.0.lcssa = phi i32 [ %S, %entry ], [ %and.lcssa, %for.cond.cleanup.loopexit ]
+  ret i32 %S.addr.0.lcssa
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %S.addr.06 = phi i32 [ %and, %for.body ], [ %S, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.07
+  %0 = load i32, i32* %arrayidx, align 4
+  %and = and i32 %0, %S.addr.06
+  %inc = add nuw nsw i32 %i.07, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+define i32 @i32_smin_reduction(i32* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: i32_smin_reduction(
+; CHECK:       vector.body:
+; CHECK-NOT:   @llvm.masked.load
+; CHECK-NOT:   @llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i32 [ %add, %for.body ], [ 2147483647, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %c = icmp slt i32 %r.07, %0
+  %add = select i1 %c, i32 %r.07, i32 %0
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 2147483647, %entry ], [ %add, %for.body ]
+  ret i32 %r.0.lcssa
+}
+
+define i32 @i32_smax_reduction(i32* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: i32_smax_reduction(
+; CHECK:       vector.body:
+; CHECK-NOT:   @llvm.masked.load
+; CHECK-NOT:   @llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i32 [ %add, %for.body ], [ -2147483648, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %c = icmp sgt i32 %r.07, %0
+  %add = select i1 %c, i32 %r.07, i32 %0
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ -2147483648, %entry ], [ %add, %for.body ]
+  ret i32 %r.0.lcssa
+}
+
+define i32 @i32_umin_reduction(i32* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: i32_umin_reduction(
+; CHECK:       vector.body:
+; CHECK-NOT:   @llvm.masked.load
+; CHECK-NOT:   @llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i32 [ %add, %for.body ], [ 4294967295, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %c = icmp ult i32 %r.07, %0
+  %add = select i1 %c, i32 %r.07, i32 %0
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 4294967295, %entry ], [ %add, %for.body ]
+  ret i32 %r.0.lcssa
+}
+
+define i32 @i32_umax_reduction(i32* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: i32_umax_reduction(
+; CHECK:       vector.body:
+; CHECK-NOT:   @llvm.masked.load
+; CHECK-NOT:   @llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %c = icmp ugt i32 %r.07, %0
+  %add = select i1 %c, i32 %r.07, i32 %0
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %r.0.lcssa
+}
+
 ; CHECK:      !0 = distinct !{!0, !1}
 ; CHECK-NEXT: !1 = !{!"llvm.loop.isvectorized", i32 1}
 ; CHECK-NEXT: !2 = distinct !{!2, !3, !1}

From 5ab2e14d31d4905589d69dff30ccf4f9dd295e26 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Tue, 14 Jul 2020 10:12:30 +0100
Subject: [PATCH 216/771] [AMDGPU] Fix typos in performCtlz_CttzCombine()

Fix two obvious errors in the code and also update the test check.
Also add one test to catch the failure.

Patch by Ruiling Song!

Differential Revision: https://reviews.llvm.org/D83280
---
 llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 16 +++----
 llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll   |  8 ++--
 .../CodeGen/AMDGPU/select-constant-cttz.ll    | 42 +++++++++++++++++++
 3 files changed, 54 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 9f49136c986f9..940ec6f31c698 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3462,24 +3462,24 @@ SDValue AMDGPUTargetLowering::performCtlz_CttzCombine(const SDLoc &SL, SDValue C
   ISD::CondCode CCOpcode = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
   SDValue CmpLHS = Cond.getOperand(0);
 
-  unsigned Opc = isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 :
-                                           AMDGPUISD::FFBH_U32;
-
   // select (setcc x, 0, eq), -1, (ctlz_zero_undef x) -> ffbh_u32 x
   // select (setcc x, 0, eq), -1, (cttz_zero_undef x) -> ffbl_u32 x
   if (CCOpcode == ISD::SETEQ &&
       (isCtlzOpc(RHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
-      RHS.getOperand(0) == CmpLHS &&
-      isNegativeOne(LHS)) {
+      RHS.getOperand(0) == CmpLHS && isNegativeOne(LHS)) {
+    unsigned Opc =
+        isCttzOpc(RHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
     return getFFBX_U32(DAG, CmpLHS, SL, Opc);
   }
 
   // select (setcc x, 0, ne), (ctlz_zero_undef x), -1 -> ffbh_u32 x
   // select (setcc x, 0, ne), (cttz_zero_undef x), -1 -> ffbl_u32 x
   if (CCOpcode == ISD::SETNE &&
-      (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(RHS.getOpcode())) &&
-      LHS.getOperand(0) == CmpLHS &&
-      isNegativeOne(RHS)) {
+      (isCtlzOpc(LHS.getOpcode()) || isCttzOpc(LHS.getOpcode())) &&
+      LHS.getOperand(0) == CmpLHS && isNegativeOne(RHS)) {
+    unsigned Opc =
+        isCttzOpc(LHS.getOpcode()) ? AMDGPUISD::FFBL_B32 : AMDGPUISD::FFBH_U32;
+
     return getFFBX_U32(DAG, CmpLHS, SL, Opc);
   }
 
diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index 7b4d6f8d2f925..6895c3b75a619 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -198,8 +198,8 @@ define amdgpu_kernel void @v_cttz_zero_undef_i64_with_select(i64 addrspace(1)* n
 }
 
 ; FUNC-LABEL: {{^}}v_cttz_i32_sel_eq_neg1:
-; SI: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL:v[0-9]+]]
-; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]]
+; SI: v_ffbl_b32_e32 [[VAL:v[0-9]+]], v{{[0-9]+}}
+; SI: buffer_store_dword [[VAL]],
 ; SI: s_endpgm
 ; EG: MEM_RAT_CACHELESS STORE_RAW
 ; EG: FFBL_INT
@@ -213,8 +213,8 @@ define amdgpu_kernel void @v_cttz_i32_sel_eq_neg1(i32 addrspace(1)* noalias %out
 }
 
 ; FUNC-LABEL: {{^}}v_cttz_i32_sel_ne_neg1:
-; SI: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL:v[0-9]+]]
-; SI: v_cmp_ne_u32_e32 vcc, 0, [[VAL]]
+; SI: v_ffbl_b32_e32 [[VAL:v[0-9]+]], v{{[0-9]+}}
+; SI: buffer_store_dword [[VAL]],
 ; SI: s_endpgm
 ; EG: MEM_RAT_CACHELESS STORE_RAW
 ; EG: FFBL_INT
diff --git a/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
new file mode 100644
index 0000000000000..746a277ea59a3
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/select-constant-cttz.ll
@@ -0,0 +1,42 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -march=amdgcn -verify-machineinstrs -o - %s | FileCheck -check-prefix=GCN %s
+
+declare i32 @llvm.cttz.i32(i32, i1) nounwind readnone
+declare i32 @llvm.amdgcn.sffbh.i32(i32) nounwind readnone speculatable
+define amdgpu_kernel void @select_constant_cttz(i32 addrspace(1)* noalias %out, i32 addrspace(1)* nocapture readonly %arrayidx) nounwind {
+; GCN-LABEL: select_constant_cttz:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_load_dword s8, s[2:3], 0x0
+; GCN-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_lshr_b32 s0, 1, s8
+; GCN-NEXT:    s_ff1_i32_b32 s0, s0
+; GCN-NEXT:    s_mov_b32 s6, -1
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    v_cmp_ne_u32_e64 s[2:3], s8, 0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, -1, s[2:3]
+; GCN-NEXT:    v_ffbh_i32_e32 v1, v0
+; GCN-NEXT:    v_cmp_eq_u32_e64 s[0:1], 0, v0
+; GCN-NEXT:    v_sub_i32_e32 v0, vcc, 31, v1
+; GCN-NEXT:    s_or_b64 s[0:1], s[2:3], s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, -1, s[0:1]
+; GCN-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GCN-NEXT:    s_endpgm
+  %v    = load i32, i32 addrspace(1)* %arrayidx, align 4
+  %sr   = lshr i32 1, %v
+  %cmp  = icmp ne i32 %v, 0
+  %cttz = call i32 @llvm.cttz.i32(i32 %sr, i1 true), !range !0
+  %sel  = select i1 %cmp, i32 -1, i32 %cttz
+  %ffbh = call i32 @llvm.amdgcn.sffbh.i32(i32 %sel)
+  %sub  = sub i32 31, %ffbh
+  %cmp2 = icmp eq i32 %sel, 0
+  %or   = or i1 %cmp, %cmp2
+  %sel2 = select i1 %or, i32 -1, i32 %sub
+  store i32 %sel2, i32 addrspace(1)* %out
+  ret void
+}
+
+!0 = !{i32 0, i32 33}

From c15e04ee5e412d3b6aa671379c316166f1b2ba7c Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard@linaro.org>
Date: Thu, 27 Feb 2020 10:39:14 +0000
Subject: [PATCH 217/771] [DebugInfo] Add unit test for compact expression
 printer

Add a unit test for the compact DWARF expression printer which will be
used by the llvm-objdump --debug-vars option.

Differential revision: https://reviews.llvm.org/D75250
---
 llvm/unittests/DebugInfo/DWARF/CMakeLists.txt |  1 +
 .../DWARFExpressionCompactPrinterTest.cpp     | 76 +++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp

diff --git a/llvm/unittests/DebugInfo/DWARF/CMakeLists.txt b/llvm/unittests/DebugInfo/DWARF/CMakeLists.txt
index 6b55f976994f0..36020021652af 100644
--- a/llvm/unittests/DebugInfo/DWARF/CMakeLists.txt
+++ b/llvm/unittests/DebugInfo/DWARF/CMakeLists.txt
@@ -19,6 +19,7 @@ add_llvm_unittest(DebugInfoDWARFTests
   DWARFDebugInfoTest.cpp
   DWARFDebugLineTest.cpp
   DWARFDieTest.cpp
+  DWARFExpressionCompactPrinterTest.cpp
   DWARFFormValueTest.cpp
   DWARFLocationExpressionTest.cpp
   )
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
new file mode 100644
index 0000000000000..d411c9204f0a4
--- /dev/null
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
@@ -0,0 +1,76 @@
+//===- llvm/unittest/DebugInfo/DWARFExpressionCompactPrinterTest.cpp ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/DebugInfo/DWARF/DWARFContext.h"
+#include "llvm/DebugInfo/DWARF/DWARFDie.h"
+#include "llvm/DebugInfo/DWARF/DWARFExpression.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/Support/DataExtractor.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+#include "DwarfGenerator.h"
+
+using namespace llvm;
+using namespace dwarf;
+
+namespace {
+class DWARFExpressionCompactPrinterTest : public ::testing::Test {
+public:
+  std::unique_ptr<MCRegisterInfo> MRI;
+
+  DWARFExpressionCompactPrinterTest() {
+    InitializeAllTargets();
+    InitializeAllTargetMCs();
+    InitializeAllAsmPrinters();
+
+    std::string TripleName = "armv8a-linux-gnueabi";
+    std::string ErrorStr;
+
+    const Target *TheTarget =
+        TargetRegistry::lookupTarget(TripleName, ErrorStr);
+
+    if (!TheTarget)
+      return;
+
+    MRI.reset(TheTarget->createMCRegInfo(TripleName));
+  }
+
+  void TestExprPrinter(ArrayRef<uint8_t> ExprData, StringRef Expected);
+};
+} // namespace
+
+void DWARFExpressionCompactPrinterTest::TestExprPrinter(
+    ArrayRef<uint8_t> ExprData, StringRef Expected) {
+  // If we didn't build ARM, do not run the test.
+  if (!MRI)
+    return;
+
+  // Print the expression, passing in the subprogram DIE, and check that the
+  // result is as expected.
+  std::string Result;
+  raw_string_ostream OS(Result);
+  DataExtractor DE(ExprData, true, 8);
+  DWARFExpression Expr(DE, 8);
+  Expr.printCompact(OS, *MRI);
+  EXPECT_EQ(OS.str(), Expected);
+}
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_reg0) {
+  TestExprPrinter({DW_OP_reg0}, "R0");
+}
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_reg10) {
+  TestExprPrinter({DW_OP_reg10}, "R10");
+}
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_regx) {
+  TestExprPrinter({DW_OP_regx, 0x80, 0x02}, "D0");
+}

From 1d7311e0524fa94c5ea775623d5008bf66baa300 Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard@linaro.org>
Date: Wed, 19 Feb 2020 12:24:19 +0000
Subject: [PATCH 218/771] [llvm-objdump] Add simple memory expressions to
 variable display

Add the DW_OP_breg0..DW_OP_breg31 and DW_OP_bregx opcodes to the DWARF
expression printer.

Differential revision: https://reviews.llvm.org/D74841
---
 llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp  | 33 ++++++++++++++++++-
 .../DWARFExpressionCompactPrinterTest.cpp     | 24 ++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index d3c1cd5bb88f4..ffe2ef7532b9e 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -410,6 +410,20 @@ static bool printCompactDWARFExpr(raw_ostream &OS, DWARFExpression::iterator I,
       S << MRI.getName(*LLVMRegNum);
       break;
     }
+    case dwarf::DW_OP_bregx: {
+      int DwarfRegNum = Op.getRawOperand(0);
+      int64_t Offset = Op.getRawOperand(1);
+      Optional<unsigned> LLVMRegNum = MRI.getLLVMRegNum(DwarfRegNum, false);
+      if (!LLVMRegNum) {
+        OS << "<unknown register " << DwarfRegNum << ">";
+        return false;
+      }
+      raw_svector_ostream S(Stack.emplace_back().String);
+      S << MRI.getName(*LLVMRegNum);
+      if (Offset)
+        S << format("%+" PRId64, Offset);
+      break;
+    }
     default:
       if (Opcode >= dwarf::DW_OP_reg0 && Opcode <= dwarf::DW_OP_reg31) {
         // DW_OP_reg<N>: A register, with the register num implied by the
@@ -422,6 +436,19 @@ static bool printCompactDWARFExpr(raw_ostream &OS, DWARFExpression::iterator I,
         }
         raw_svector_ostream S(Stack.emplace_back(PrintedExpr::Value).String);
         S << MRI.getName(*LLVMRegNum);
+      } else if (Opcode >= dwarf::DW_OP_breg0 &&
+                 Opcode <= dwarf::DW_OP_breg31) {
+        int DwarfRegNum = Opcode - dwarf::DW_OP_breg0;
+        int64_t Offset = Op.getRawOperand(0);
+        Optional<unsigned> LLVMRegNum = MRI.getLLVMRegNum(DwarfRegNum, false);
+        if (!LLVMRegNum) {
+          OS << "<unknown register " << DwarfRegNum << ">";
+          return false;
+        }
+        raw_svector_ostream S(Stack.emplace_back().String);
+        S << MRI.getName(*LLVMRegNum);
+        if (Offset)
+          S << format("%+" PRId64, Offset);
       } else {
         // If we hit an unknown operand, we don't know its effect on the stack,
         // so bail out on the whole expression.
@@ -435,7 +462,11 @@ static bool printCompactDWARFExpr(raw_ostream &OS, DWARFExpression::iterator I,
   }
 
   assert(Stack.size() == 1 && "expected one value on stack");
-  OS << Stack.front().String;
+
+  if (Stack.front().Kind == PrintedExpr::Address)
+    OS << "[" << Stack.front().String << "]";
+  else
+    OS << Stack.front().String;
 
   return true;
 }
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
index d411c9204f0a4..4cdd6079cdc4c 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
@@ -74,3 +74,27 @@ TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_reg10) {
 TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_regx) {
   TestExprPrinter({DW_OP_regx, 0x80, 0x02}, "D0");
 }
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_breg0) {
+  TestExprPrinter({DW_OP_breg0, 0x04}, "[R0+4]");
+}
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_breg0_large_offset) {
+  TestExprPrinter({DW_OP_breg0, 0x80, 0x02}, "[R0+256]");
+}
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_breg13) {
+  TestExprPrinter({DW_OP_breg13, 0x10}, "[SP+16]");
+}
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_breg13_zero_offset) {
+  TestExprPrinter({DW_OP_breg13, 0x00}, "[SP]");
+}
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_breg0_negative) {
+  TestExprPrinter({DW_OP_breg0, 0x70}, "[R0-16]");
+}
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_bregx) {
+  TestExprPrinter({DW_OP_bregx, 0x0d, 0x28}, "[SP+40]");
+}

From 57909b0a53d8167ac8f6079b53d5504c25157fdf Mon Sep 17 00:00:00 2001
From: Oliver Stannard <oliver.stannard@linaro.org>
Date: Wed, 19 Feb 2020 12:34:12 +0000
Subject: [PATCH 219/771] [llvm-objdump] Add entry_value and stack_value
 opcodes

Add the DW_OP_entry_value and DW_OP_stack_value opcodes to the DWARF
expression printer.

Differential revision: https://reviews.llvm.org/D74843
---
 .../llvm/DebugInfo/DWARF/DWARFExpression.h    |  4 ++++
 llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp  | 21 +++++++++++++++++++
 .../DWARFExpressionCompactPrinterTest.cpp     | 15 +++++++++++++
 3 files changed, 40 insertions(+)

diff --git a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
index 1aff2624990f7..edfa68d49a60d 100644
--- a/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
+++ b/llvm/include/llvm/DebugInfo/DWARF/DWARFExpression.h
@@ -126,6 +126,10 @@ class DWARFExpression {
       return Op;
     }
 
+    iterator skipBytes(uint64_t Add) {
+      return iterator(Expr, Op.EndOffset + Add);
+    }
+
     // Comparison operators are provided out of line.
     friend bool operator==(const iterator &, const iterator &);
   };
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
index ffe2ef7532b9e..de5e11e084f47 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFExpression.cpp
@@ -424,6 +424,27 @@ static bool printCompactDWARFExpr(raw_ostream &OS, DWARFExpression::iterator I,
         S << format("%+" PRId64, Offset);
       break;
     }
+    case dwarf::DW_OP_entry_value:
+    case dwarf::DW_OP_GNU_entry_value: {
+      // DW_OP_entry_value contains a sub-expression which must be rendered
+      // separately.
+      uint64_t SubExprLength = Op.getRawOperand(0);
+      DWARFExpression::iterator SubExprEnd = I.skipBytes(SubExprLength);
+      ++I;
+      raw_svector_ostream S(Stack.emplace_back().String);
+      S << "entry(";
+      printCompactDWARFExpr(S, I, SubExprEnd, MRI);
+      S << ")";
+      I = SubExprEnd;
+      continue;
+    }
+    case dwarf::DW_OP_stack_value: {
+      // The top stack entry should be treated as the actual value of tne
+      // variable, rather than the address of the variable in memory.
+      assert(!Stack.empty());
+      Stack.back().Kind = PrintedExpr::Value;
+      break;
+    }
     default:
       if (Opcode >= dwarf::DW_OP_reg0 && Opcode <= dwarf::DW_OP_reg31) {
         // DW_OP_reg<N>: A register, with the register num implied by the
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
index 4cdd6079cdc4c..6fa97794218c4 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFExpressionCompactPrinterTest.cpp
@@ -98,3 +98,18 @@ TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_breg0_negative) {
 TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_bregx) {
   TestExprPrinter({DW_OP_bregx, 0x0d, 0x28}, "[SP+40]");
 }
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_stack_value) {
+  TestExprPrinter({DW_OP_breg13, 0x04, DW_OP_stack_value}, "SP+4");
+}
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_entry_value) {
+  TestExprPrinter({DW_OP_entry_value, 0x01, DW_OP_reg0, DW_OP_stack_value},
+                  "entry(R0)");
+}
+
+TEST_F(DWARFExpressionCompactPrinterTest, Test_OP_entry_value_mem) {
+  TestExprPrinter(
+      {DW_OP_entry_value, 0x02, DW_OP_breg13, 0x10, DW_OP_stack_value},
+      "entry([SP+16])");
+}

From 2d1fe0c372473a40a81309eefdd4d47a23160736 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 14 Jul 2020 11:29:47 +0200
Subject: [PATCH 220/771] [llvm][unittests] Fix
 ProgramEnvTest.TestExecuteAndWaitStatistics on Solaris

The new `LLVM-Unit :: Support/./SupportTests/ProgramEnvTest.TestExecuteAndWaitStatistics` test currently FAILs on Solaris:

  [ RUN      ] ProgramEnvTest.TestExecuteAndWaitStatistics
  /vol/llvm/src/llvm-project/local/llvm/unittests/Support/ProgramTest.cpp:360: Failure
  Expected: (ProcStat->PeakMemory) > (0U), actual: 0 vs 0
  [  FAILED  ] ProgramEnvTest.TestExecuteAndWaitStatistics (22 ms)

According to `llvm/lib/Support/Unix/Program.inc (llvm::sys::Wait)`, `PeakMemory`
corresponds to `struct rusage.ru_maxrss`.

However, Solaris `getrusage(3C)` documents

  NOTES
         The ru_maxrss, ru_ixrss, ru_idrss, and ru_isrss members of  the  rusage
         structure are set to 0 in this implementation.

Since changing the test to check for `PeakMemory >= 0` instead is pointless
and would generate a warning on targets where `ru_maxrss` is unsigned, this
patch removes the check.

Tested on `amd64-pc-solaris2.11`.

Differential Revision: https://reviews.llvm.org/D83661
---
 llvm/unittests/Support/ProgramTest.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/unittests/Support/ProgramTest.cpp b/llvm/unittests/Support/ProgramTest.cpp
index 3025e6c859a86..9052b66b5fb9e 100644
--- a/llvm/unittests/Support/ProgramTest.cpp
+++ b/llvm/unittests/Support/ProgramTest.cpp
@@ -357,7 +357,6 @@ TEST_F(ProgramEnvTest, TestExecuteAndWaitStatistics) {
                                &ExecutionFailed, &ProcStat);
   ASSERT_EQ(0, RetCode);
   ASSERT_TRUE(ProcStat);
-  ASSERT_GT(ProcStat->PeakMemory, 0U);
   ASSERT_GE(ProcStat->UserTime, std::chrono::microseconds(0));
   ASSERT_GE(ProcStat->TotalTime, ProcStat->UserTime);
 }

From 242a736a14ed96be9fff3f9e0a7a7f8b07b4a4bd Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Tue, 14 Jul 2020 09:31:18 +0000
Subject: [PATCH 221/771] [gn build] Port c15e04ee5e4

---
 llvm/utils/gn/secondary/llvm/unittests/DebugInfo/DWARF/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/DWARF/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/DWARF/BUILD.gn
index e2b9f325ea76f..2d795760a4651 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/DWARF/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/DWARF/BUILD.gn
@@ -19,6 +19,7 @@ unittest("DebugInfoDWARFTests") {
     "DWARFDebugInfoTest.cpp",
     "DWARFDebugLineTest.cpp",
     "DWARFDieTest.cpp",
+    "DWARFExpressionCompactPrinterTest.cpp",
     "DWARFFormValueTest.cpp",
     "DWARFLocationExpressionTest.cpp",
     "DwarfGenerator.cpp",

From c4fc26b4c0e37066c0909099b2e61d4701549227 Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin@accesssoftek.com>
Date: Tue, 14 Jul 2020 15:54:35 +0700
Subject: [PATCH 222/771] [ELF] Do not leave undefined symbols (specified by
 -init and -fini) if they are defined in non-fetched archive members

After D69985, symbols for "-init" and "-fini" were unconditionally
marked as used even if they were just lazy symbols seen when scanning
archives. That resulted in exposing them in the symbol table of an
output file, as Undefined, which added unwanted dependencies. The patch
fixes the issue by checking the kind of the symbols before the marking.

Differential Revision: https://reviews.llvm.org/D83549
---
 lld/ELF/Driver.cpp       | 4 ++--
 lld/test/ELF/init-fini.s | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp
index 301f11359823b..4637a3b306daf 100644
--- a/lld/ELF/Driver.cpp
+++ b/lld/ELF/Driver.cpp
@@ -1944,9 +1944,9 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &args) {
     handleUndefinedGlob(pat);
 
   // Mark -init and -fini symbols so that the LTO doesn't eliminate them.
-  if (Symbol *sym = symtab->find(config->init))
+  if (Symbol *sym = dyn_cast_or_null<Defined>(symtab->find(config->init)))
     sym->isUsedInRegularObj = true;
-  if (Symbol *sym = symtab->find(config->fini))
+  if (Symbol *sym = dyn_cast_or_null<Defined>(symtab->find(config->fini)))
     sym->isUsedInRegularObj = true;
 
   // If any of our inputs are bitcode files, the LTO code generator may create
diff --git a/lld/test/ELF/init-fini.s b/lld/test/ELF/init-fini.s
index 40aa98e95cebe..a07d4e3122c4a 100644
--- a/lld/test/ELF/init-fini.s
+++ b/lld/test/ELF/init-fini.s
@@ -46,6 +46,14 @@
 // NOENTRY-NOT: Name: _unknown
 // NOENTRY: ]
 
+// Should not add entries for "_init" and "_fini" to the symbol table
+// if the symbols are defined in non-fetched achive members.
+// RUN: rm -f %t.a
+// RUN: llvm-ar rcs %t.a %t
+// RUN: ld.lld -shared -m elf_x86_64 -e _unknown %t.a -o %t.so
+// RUN: llvm-nm %t.so | \
+// RUN:   FileCheck %s --implicit-check-not=_init --implicit-check-not=_fini
+
 .global _start,_init,_fini,_foo,_bar,_undef
 _start:
 _init = 0x11010

From dd6faf13d8e27990f226147f543a6bdf1d0a0c46 Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin@accesssoftek.com>
Date: Tue, 14 Jul 2020 15:54:59 +0700
Subject: [PATCH 223/771] [DebugInfo] Add unit tests for
 DWARFListTableHeader::length().

This adds unit tests to check the expected behavior of the method in
case the unit length field is truncated.

Differential Revision: https://reviews.llvm.org/D83673
---
 llvm/unittests/DebugInfo/DWARF/CMakeLists.txt |  1 +
 .../DebugInfo/DWARF/DWARFListTableTest.cpp    | 76 +++++++++++++++++++
 2 files changed, 77 insertions(+)
 create mode 100644 llvm/unittests/DebugInfo/DWARF/DWARFListTableTest.cpp

diff --git a/llvm/unittests/DebugInfo/DWARF/CMakeLists.txt b/llvm/unittests/DebugInfo/DWARF/CMakeLists.txt
index 36020021652af..90a6bd7ef62e1 100644
--- a/llvm/unittests/DebugInfo/DWARF/CMakeLists.txt
+++ b/llvm/unittests/DebugInfo/DWARF/CMakeLists.txt
@@ -21,6 +21,7 @@ add_llvm_unittest(DebugInfoDWARFTests
   DWARFDieTest.cpp
   DWARFExpressionCompactPrinterTest.cpp
   DWARFFormValueTest.cpp
+  DWARFListTableTest.cpp
   DWARFLocationExpressionTest.cpp
   )
 
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFListTableTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFListTableTest.cpp
new file mode 100644
index 0000000000000..ff7d71b5b052c
--- /dev/null
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFListTableTest.cpp
@@ -0,0 +1,76 @@
+//===- DWARFListTableTest.cpp ---------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/DWARF/DWARFListTable.h"
+#include "llvm/Testing/Support/Error.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+TEST(DWARFListTableHeader, TruncatedLength) {
+  static const char SecData[] = "\x33\x22\x11"; // Truncated DWARF32 length
+  DWARFDataExtractor Extractor(StringRef(SecData, sizeof(SecData) - 1),
+                               /*isLittleEndian=*/true,
+                               /*AddrSize=*/4);
+  DWARFListTableHeader Header(/*SectionName=*/".debug_rnglists",
+                              /*ListTypeString=*/"range");
+  uint64_t Offset = 0;
+  EXPECT_THAT_ERROR(
+      Header.extract(Extractor, &Offset),
+      FailedWithMessage(
+          "parsing .debug_rnglists table at offset 0x0: unexpected end of data "
+          "at offset 0x3 while reading [0x0, 0x4)"));
+  // length() is expected to return 0 to indicate that the unit length field
+  // can not be parsed and so we can not, for example, skip the current set
+  // to continue parsing from the next one.
+  EXPECT_EQ(Header.length(), 0u);
+}
+
+TEST(DWARFListTableHeader, TruncatedLengthDWARF64) {
+  static const char SecData[] =
+      "\xff\xff\xff\xff"      // DWARF64 mark
+      "\x55\x44\x33\x22\x11"; // Truncated DWARF64 length
+  DWARFDataExtractor Extractor(StringRef(SecData, sizeof(SecData) - 1),
+                               /*isLittleEndian=*/true,
+                               /*AddrSize=*/4);
+  DWARFListTableHeader Header(/*SectionName=*/".debug_rnglists",
+                              /*ListTypeString=*/"range");
+  uint64_t Offset = 0;
+  EXPECT_THAT_ERROR(
+      Header.extract(Extractor, &Offset),
+      FailedWithMessage(
+          "parsing .debug_rnglists table at offset 0x0: unexpected end of data "
+          "at offset 0x9 while reading [0x4, 0xc)"));
+  // length() is expected to return 0 to indicate that the unit length field
+  // can not be parsed and so we can not, for example, skip the current set
+  // to continue parsing from the next one.
+  EXPECT_EQ(Header.length(), 0u);
+}
+
+TEST(DWARFListTableHeader, TruncatedHeader) {
+  static const char SecData[] = "\x02\x00\x00\x00" // Length
+                                "\x05\x00";        // Version
+  DWARFDataExtractor Extractor(StringRef(SecData, sizeof(SecData) - 1),
+                               /*isLittleEndian=*/true,
+                               /*AddrSize=*/4);
+  DWARFListTableHeader Header(/*SectionName=*/".debug_rnglists",
+                              /*ListTypeString=*/"range");
+  uint64_t Offset = 0;
+  EXPECT_THAT_ERROR(
+      Header.extract(Extractor, &Offset),
+      FailedWithMessage(".debug_rnglists table at offset 0x0 has too small "
+                        "length (0x6) to contain a complete header"));
+  // length() is expected to return the full length of the set if the unit
+  // length field is read, even if an error occurred during the parsing,
+  // to allow skipping the current set and continue parsing from the next one.
+  EXPECT_EQ(Header.length(), 6u);
+}
+
+} // end anonymous namespace

From ec9f0c7d4ae257642ee825baa9e23f9ffb000de8 Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin@accesssoftek.com>
Date: Tue, 14 Jul 2020 15:55:31 +0700
Subject: [PATCH 224/771] [DebugInfo] Fix a possible crash when reading a
 malformed .debug_*lists section.

DWARFListTableHeader::length() handles the zero value of HeaderData.Length
in a special way, which makes the result different from the calculated
value of FullLength, which leads to triggering an assertion. The patch
moves the assertion a bit later when `FullLength` is already checked for
minimal allowed value.

Differential Revision: https://reviews.llvm.org/D82886
---
 llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp          |  2 +-
 .../DebugInfo/X86/dwarfdump-rnglists-zero-length.s   | 12 ++++++++++++
 2 files changed, 13 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/DebugInfo/X86/dwarfdump-rnglists-zero-length.s

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
index 5f5f12a390833..2124a49bef606 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFListTable.cpp
@@ -29,13 +29,13 @@ Error DWARFListTableHeader::extract(DWARFDataExtractor Data,
   uint8_t OffsetByteSize = Format == dwarf::DWARF64 ? 8 : 4;
   uint64_t FullLength =
       HeaderData.Length + dwarf::getUnitLengthFieldByteSize(Format);
-  assert(FullLength == length());
   if (FullLength < getHeaderSize(Format))
     return createStringError(errc::invalid_argument,
                        "%s table at offset 0x%" PRIx64
                        " has too small length (0x%" PRIx64
                        ") to contain a complete header",
                        SectionName.data(), HeaderOffset, FullLength);
+  assert(FullLength == length() && "Inconsistent calculation of length.");
   uint64_t End = HeaderOffset + FullLength;
   if (!Data.isValidOffsetForDataOfSize(HeaderOffset, FullLength))
     return createStringError(errc::invalid_argument,
diff --git a/llvm/test/DebugInfo/X86/dwarfdump-rnglists-zero-length.s b/llvm/test/DebugInfo/X86/dwarfdump-rnglists-zero-length.s
new file mode 100644
index 0000000000000..05f87a1ef513c
--- /dev/null
+++ b/llvm/test/DebugInfo/X86/dwarfdump-rnglists-zero-length.s
@@ -0,0 +1,12 @@
+## The test checks that llvm-dwarfdump can handle a malformed input file without
+## crashing.
+
+# RUN: llvm-mc -triple x86_64 %s -filetype=obj -o %t
+# RUN: not llvm-dwarfdump -debug-rnglists %t 2>&1 | FileCheck %s
+
+# CHECK: error: .debug_rnglists table at offset 0x0 has too small length (0x4) to contain a complete header
+
+## An assertion used to trigger in the debug build of the DebugInfo/DWARF 
+## library if the unit length field in a range list table was 0.
+    .section .debug_rnglists,"",@progbits
+    .long 0

From 3ae9f5d179cc73a9f2e96ee5485a1967ac46e40f Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Tue, 14 Jul 2020 09:36:07 +0000
Subject: [PATCH 225/771] [gn build] Port dd6faf13d8e

---
 llvm/utils/gn/secondary/llvm/unittests/DebugInfo/DWARF/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/DWARF/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/DWARF/BUILD.gn
index 2d795760a4651..2c62be508fc22 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/DWARF/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/DWARF/BUILD.gn
@@ -21,6 +21,7 @@ unittest("DebugInfoDWARFTests") {
     "DWARFDieTest.cpp",
     "DWARFExpressionCompactPrinterTest.cpp",
     "DWARFFormValueTest.cpp",
+    "DWARFListTableTest.cpp",
     "DWARFLocationExpressionTest.cpp",
     "DwarfGenerator.cpp",
     "DwarfUtils.cpp",

From dad1868772f1e487a96c2aa66f90ce9a1b5d06ac Mon Sep 17 00:00:00 2001
From: Victor Campos <victor.campos@arm.com>
Date: Mon, 13 Jul 2020 14:50:36 +0100
Subject: [PATCH 226/771] [AArch64][AsmParser] Add rcpc support in
 .arch_extension

AArch64 does not support enabling rcpc via .arch_extension in assembly.
GCC, on the other hand, does.

This patch adds 'rcpc' as a valid value to .arch_extension handling.

Differential Revision: https://reviews.llvm.org/D83685
---
 llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp   | 1 +
 llvm/test/MC/AArch64/directive-arch_extension-negative.s | 5 +++++
 llvm/test/MC/AArch64/directive-arch_extension.s          | 4 ++++
 3 files changed, 10 insertions(+)

diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
index 860c9c20044c5..0ac09c4f96f04 100644
--- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
+++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp
@@ -2844,6 +2844,7 @@ static const struct Extension {
     {"tlb-rmi", {AArch64::FeatureTLB_RMI}},
     {"pan-rwv", {AArch64::FeaturePAN_RWV}},
     {"ccpp", {AArch64::FeatureCCPP}},
+    {"rcpc", {AArch64::FeatureRCPC}},
     {"sve", {AArch64::FeatureSVE}},
     {"sve2", {AArch64::FeatureSVE2}},
     {"sve2-aes", {AArch64::FeatureSVE2AES}},
diff --git a/llvm/test/MC/AArch64/directive-arch_extension-negative.s b/llvm/test/MC/AArch64/directive-arch_extension-negative.s
index 61351e171b599..e516046d8edf7 100644
--- a/llvm/test/MC/AArch64/directive-arch_extension-negative.s
+++ b/llvm/test/MC/AArch64/directive-arch_extension-negative.s
@@ -78,3 +78,8 @@ at s1e1wp, x2
 dc cvap, x7
 // CHECK: error: DC CVAP requires ccpp
 // CHECK-NEXT: dc cvap, x7
+
+.arch_extension norcpc
+ldapr x0, [x1]
+// CHECK: error: instruction requires: rcpc
+// CHECK-NEXT: ldapr x0, [x1]
diff --git a/llvm/test/MC/AArch64/directive-arch_extension.s b/llvm/test/MC/AArch64/directive-arch_extension.s
index 6f1a651134623..e351526c871b8 100644
--- a/llvm/test/MC/AArch64/directive-arch_extension.s
+++ b/llvm/test/MC/AArch64/directive-arch_extension.s
@@ -59,3 +59,7 @@ at s1e1wp, x2
 .arch_extension ccpp
 dc cvap, x7
 // CHECK: dc cvap, x7
+
+.arch_extension rcpc
+ldapr x0, [x1]
+// CHECK: ldapr x0, [x1]

From bcedc4fa0a606b4c4384c0892c7d4da8010a676a Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Tue, 14 Jul 2020 09:57:50 +0000
Subject: [PATCH 227/771] [MLIR][Standard] Add `assert` operation to the
 standard dialect

Differential Revision: https://reviews.llvm.org/D83117
---
 .../mlir/Dialect/StandardOps/IR/Ops.td        | 34 +++++++++++++++++--
 mlir/test/Dialect/Standard/ops.mlir           |  4 +++
 2 files changed, 35 insertions(+), 3 deletions(-)

diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index dd74aa1370608..57293591f383d 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -379,7 +379,7 @@ def AllocaOp : AllocLikeOp<"alloca", AutomaticAllocationScopeResource> {
     operands. For example:
 
     ```mlir
-		%0 = alloca() : memref<8x64xf32>
+    %0 = alloca() : memref<8x64xf32>
     ```
 
     The optional list of dimension operands are bound to the dynamic dimensions
@@ -387,7 +387,7 @@ def AllocaOp : AllocLikeOp<"alloca", AutomaticAllocationScopeResource> {
     bound to the second dimension of the memref (which is dynamic).
 
     ```mlir
-		%0 = alloca(%d) : memref<8x?xf32>
+    %0 = alloca(%d) : memref<8x?xf32>
     ```
 
     The optional list of symbol operands are bound to the symbols of the
@@ -395,7 +395,7 @@ def AllocaOp : AllocLikeOp<"alloca", AutomaticAllocationScopeResource> {
     the symbol 's0' in the affine map specified in the allocs memref type.
 
     ```mlir
-		%0 = alloca()[%s] : memref<8x64xf32,
+    %0 = alloca()[%s] : memref<8x64xf32,
                                affine_map<(d0, d1)[s0] -> ((d0 + s0), d1)>>
     ```
 
@@ -441,6 +441,34 @@ def AndOp : IntArithmeticOp<"and", [Commutative]> {
   let hasFolder = 1;
 }
 
+//===----------------------------------------------------------------------===//
+// AssertOp
+//===----------------------------------------------------------------------===//
+
+def AssertOp : Std_Op<"assert"> {
+  let summary = "Assert operation with message attribute";
+  let description = [{
+    Assert operation with single boolean operand and an error message attribute.
+    If the argument is `true` this operation has no effect.
+    Otherwise, the program execution will abort.
+    The provided error message may be used by a runtime to propagate the error
+    to the user.
+
+    Example:
+
+    ```mlir
+    assert %b, "Expected ... to be true"
+    ```
+  }];
+
+  let arguments = (ins I1:$arg, StrAttr:$msg);
+
+  let assemblyFormat = "$arg `,` $msg attr-dict";
+
+  // AssertOp is fully verified by its traits.
+  let verifier = ?;
+}
+
 //===----------------------------------------------------------------------===//
 // AssumeAlignmentOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/Standard/ops.mlir b/mlir/test/Dialect/Standard/ops.mlir
index 3b098eb960acd..24da04eebaaa6 100644
--- a/mlir/test/Dialect/Standard/ops.mlir
+++ b/mlir/test/Dialect/Standard/ops.mlir
@@ -18,3 +18,7 @@ func @test_index_cast_tensor_reverse(%arg0 : tensor<i64>) -> tensor<index> {
   return %0 : tensor<index>
 }
 
+func @assert(%arg : i1) {
+  assert %arg, "Some message in case this assertion fails."
+  return
+}

From 1ee0d22f269ee61e0df215079a5d479f829a2921 Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Tue, 14 Jul 2020 10:08:04 +0000
Subject: [PATCH 228/771] [MLIR][Standard] Erase redundant assertions
 `std.assert`

Differential Revision: https://reviews.llvm.org/D83118
---
 .../mlir/Dialect/StandardOps/IR/Ops.td        |  2 ++
 mlir/lib/Dialect/StandardOps/IR/Ops.cpp       | 25 +++++++++++++++++++
 .../Dialect/Standard/canonicalize-cf.mlir     | 23 +++++++++++++++++
 3 files changed, 50 insertions(+)

diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index 57293591f383d..b34dac4f38a72 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -467,6 +467,8 @@ def AssertOp : Std_Op<"assert"> {
 
   // AssertOp is fully verified by its traits.
   let verifier = ?;
+
+  let hasCanonicalizer = 1;
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
index 7c28608d98b91..3e71c48f08717 100644
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -439,6 +439,31 @@ OpFoldResult AndOp::fold(ArrayRef<Attribute> operands) {
                                         [](APInt a, APInt b) { return a & b; });
 }
 
+//===----------------------------------------------------------------------===//
+// AssertOp
+//===----------------------------------------------------------------------===//
+
+namespace {
+struct EraseRedundantAssertions : public OpRewritePattern<AssertOp> {
+  using OpRewritePattern<AssertOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(AssertOp op,
+                                PatternRewriter &rewriter) const override {
+    // Erase assertion if argument is constant true.
+    if (matchPattern(op.arg(), m_One())) {
+      rewriter.eraseOp(op);
+      return success();
+    }
+    return failure();
+  }
+};
+} // namespace
+
+void AssertOp::getCanonicalizationPatterns(OwningRewritePatternList &patterns,
+                                           MLIRContext *context) {
+  patterns.insert<EraseRedundantAssertions>(context);
+}
+
 //===----------------------------------------------------------------------===//
 // AssumeAlignmentOp
 //===----------------------------------------------------------------------===//
diff --git a/mlir/test/Dialect/Standard/canonicalize-cf.mlir b/mlir/test/Dialect/Standard/canonicalize-cf.mlir
index c22bd28dfccd9..0cdf7fdc1471b 100644
--- a/mlir/test/Dialect/Standard/canonicalize-cf.mlir
+++ b/mlir/test/Dialect/Standard/canonicalize-cf.mlir
@@ -138,3 +138,26 @@ func @cond_br_pass_through_fail(%cond : i1) {
 ^bb2:
   return
 }
+
+// -----
+
+// Erase assertion if condition is known to be true at compile time.
+// CHECK-LABEL: @assert_true
+func @assert_true() {
+  // CHECK-NOT: assert
+  %true = constant true
+  assert %true, "Computer says no"
+  return
+}
+
+// -----
+
+// Keep assertion if condition unknown at compile time.
+// CHECK-LABEL: @assert
+// CHECK-SAME:  (%[[ARG:.*]]: i1)
+func @assert(%arg : i1) {
+  // CHECK: assert %[[ARG]], "Computer says no"
+  assert %arg, "Computer says no"
+  return
+}
+

From a5405a2f0503c02b1cd5c89b8f7bc2bcb733a58c Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker@arm.com>
Date: Tue, 14 Jul 2020 11:09:50 +0100
Subject: [PATCH 229/771] [NFC][ARM] Add SimplifyCFG tests

---
 .../SimplifyCFG/ARM/speculate-math.ll         | 360 ++++++++++++++++++
 .../SimplifyCFG/ARM/speculate-vector-ops.ll   | 112 ++++++
 2 files changed, 472 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/ARM/speculate-math.ll
 create mode 100644 llvm/test/Transforms/SimplifyCFG/ARM/speculate-vector-ops.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/speculate-math.ll b/llvm/test/Transforms/SimplifyCFG/ARM/speculate-math.ll
new file mode 100644
index 0000000000000..229d453d00e0f
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/speculate-math.ll
@@ -0,0 +1,360 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -simplifycfg -mtriple=thumbv8.1m.main -mattr=+mve < %s | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt -S -simplifycfg -mtriple=thumbv8m.main < %s | FileCheck %s --check-prefix=CHECK-V8M-MAIN
+; RUN: opt -S -simplifycfg -mtriple=thumbv8m.base < %s | FileCheck %s --check-prefix=CHECK-V8M-BASE
+
+declare float @llvm.sqrt.f32(float) nounwind readonly
+declare float @llvm.fma.f32(float, float, float) nounwind readonly
+declare float @llvm.fmuladd.f32(float, float, float) nounwind readonly
+declare float @llvm.fabs.f32(float) nounwind readonly
+declare float @llvm.minnum.f32(float, float) nounwind readonly
+declare float @llvm.maxnum.f32(float, float) nounwind readonly
+declare float @llvm.minimum.f32(float, float) nounwind readonly
+declare float @llvm.maximum.f32(float, float) nounwind readonly
+
+define double @fdiv_test(double %a, double %b) {
+; CHECK-MVE-LABEL: @fdiv_test(
+; CHECK-MVE-NEXT:  entry:
+; CHECK-MVE-NEXT:    [[CMP:%.*]] = fcmp ogt double [[A:%.*]], 0.000000e+00
+; CHECK-MVE-NEXT:    [[DIV:%.*]] = fdiv double [[B:%.*]], [[A]]
+; CHECK-MVE-NEXT:    [[COND:%.*]] = select nsz i1 [[CMP]], double [[DIV]], double 0.000000e+00
+; CHECK-MVE-NEXT:    ret double [[COND]]
+;
+; CHECK-V8M-MAIN-LABEL: @fdiv_test(
+; CHECK-V8M-MAIN-NEXT:  entry:
+; CHECK-V8M-MAIN-NEXT:    [[CMP:%.*]] = fcmp ogt double [[A:%.*]], 0.000000e+00
+; CHECK-V8M-MAIN-NEXT:    [[DIV:%.*]] = fdiv double [[B:%.*]], [[A]]
+; CHECK-V8M-MAIN-NEXT:    [[COND:%.*]] = select nsz i1 [[CMP]], double [[DIV]], double 0.000000e+00
+; CHECK-V8M-MAIN-NEXT:    ret double [[COND]]
+;
+; CHECK-V8M-BASE-LABEL: @fdiv_test(
+; CHECK-V8M-BASE-NEXT:  entry:
+; CHECK-V8M-BASE-NEXT:    [[CMP:%.*]] = fcmp ogt double [[A:%.*]], 0.000000e+00
+; CHECK-V8M-BASE-NEXT:    [[DIV:%.*]] = fdiv double [[B:%.*]], [[A]]
+; CHECK-V8M-BASE-NEXT:    [[COND:%.*]] = select nsz i1 [[CMP]], double [[DIV]], double 0.000000e+00
+; CHECK-V8M-BASE-NEXT:    ret double [[COND]]
+;
+entry:
+  %cmp = fcmp ogt double %a, 0.0
+  br i1 %cmp, label %cond.true, label %cond.end
+
+cond.true:
+  %div = fdiv double %b, %a
+  br label %cond.end
+
+cond.end:
+  %cond = phi nsz double [ %div, %cond.true ], [ 0.0, %entry ]
+  ret double %cond
+}
+
+define void @sqrt_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
+; CHECK-MVE-LABEL: @sqrt_test(
+; CHECK-MVE-NEXT:  entry:
+; CHECK-MVE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-MVE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.sqrt.f32(float [[A]]) #3
+; CHECK-MVE-NEXT:    [[COND_I:%.*]] = select afn i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-MVE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-MVE-NEXT:    ret void
+;
+; CHECK-V8M-MAIN-LABEL: @sqrt_test(
+; CHECK-V8M-MAIN-NEXT:  entry:
+; CHECK-V8M-MAIN-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-MAIN-NEXT:    [[TMP0:%.*]] = tail call float @llvm.sqrt.f32(float [[A]]) #2
+; CHECK-V8M-MAIN-NEXT:    [[COND_I:%.*]] = select afn i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-MAIN-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-MAIN-NEXT:    ret void
+;
+; CHECK-V8M-BASE-LABEL: @sqrt_test(
+; CHECK-V8M-BASE-NEXT:  entry:
+; CHECK-V8M-BASE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-BASE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.sqrt.f32(float [[A]]) #2
+; CHECK-V8M-BASE-NEXT:    [[COND_I:%.*]] = select afn i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-BASE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-BASE-NEXT:    ret void
+;
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_sqrt.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.sqrt.f32(float %a) nounwind readnone
+  br label %test_sqrt.exit
+
+test_sqrt.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi afn float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @fabs_test(float addrspace(1)* noalias nocapture %out, float %a) nounwind {
+; CHECK-MVE-LABEL: @fabs_test(
+; CHECK-MVE-NEXT:  entry:
+; CHECK-MVE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-MVE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[A]]) #3
+; CHECK-MVE-NEXT:    [[COND_I:%.*]] = select reassoc i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-MVE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-MVE-NEXT:    ret void
+;
+; CHECK-V8M-MAIN-LABEL: @fabs_test(
+; CHECK-V8M-MAIN-NEXT:  entry:
+; CHECK-V8M-MAIN-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-MAIN-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[A]]) #2
+; CHECK-V8M-MAIN-NEXT:    [[COND_I:%.*]] = select reassoc i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-MAIN-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-MAIN-NEXT:    ret void
+;
+; CHECK-V8M-BASE-LABEL: @fabs_test(
+; CHECK-V8M-BASE-NEXT:  entry:
+; CHECK-V8M-BASE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-BASE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fabs.f32(float [[A]]) #2
+; CHECK-V8M-BASE-NEXT:    [[COND_I:%.*]] = select reassoc i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-BASE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-BASE-NEXT:    ret void
+;
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_fabs.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.fabs.f32(float %a) nounwind readnone
+  br label %test_fabs.exit
+
+test_fabs.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi reassoc float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @fma_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
+; CHECK-MVE-LABEL: @fma_test(
+; CHECK-MVE-NEXT:  entry:
+; CHECK-MVE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-MVE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fma.f32(float [[A]], float [[B:%.*]], float [[C:%.*]]) #3
+; CHECK-MVE-NEXT:    [[COND_I:%.*]] = select reassoc nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-MVE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-MVE-NEXT:    ret void
+;
+; CHECK-V8M-MAIN-LABEL: @fma_test(
+; CHECK-V8M-MAIN-NEXT:  entry:
+; CHECK-V8M-MAIN-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-MAIN-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fma.f32(float [[A]], float [[B:%.*]], float [[C:%.*]]) #2
+; CHECK-V8M-MAIN-NEXT:    [[COND_I:%.*]] = select reassoc nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-MAIN-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-MAIN-NEXT:    ret void
+;
+; CHECK-V8M-BASE-LABEL: @fma_test(
+; CHECK-V8M-BASE-NEXT:  entry:
+; CHECK-V8M-BASE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-BASE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fma.f32(float [[A]], float [[B:%.*]], float [[C:%.*]]) #2
+; CHECK-V8M-BASE-NEXT:    [[COND_I:%.*]] = select reassoc nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-BASE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-BASE-NEXT:    ret void
+;
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_fma.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.fma.f32(float %a, float %b, float %c) nounwind readnone
+  br label %test_fma.exit
+
+test_fma.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi nsz reassoc float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @fmuladd_test(float addrspace(1)* noalias nocapture %out, float %a, float %b, float %c) nounwind {
+; CHECK-MVE-LABEL: @fmuladd_test(
+; CHECK-MVE-NEXT:  entry:
+; CHECK-MVE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-MVE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fmuladd.f32(float [[A]], float [[B:%.*]], float [[C:%.*]]) #3
+; CHECK-MVE-NEXT:    [[COND_I:%.*]] = select ninf i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-MVE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-MVE-NEXT:    ret void
+;
+; CHECK-V8M-MAIN-LABEL: @fmuladd_test(
+; CHECK-V8M-MAIN-NEXT:  entry:
+; CHECK-V8M-MAIN-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-MAIN-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fmuladd.f32(float [[A]], float [[B:%.*]], float [[C:%.*]]) #2
+; CHECK-V8M-MAIN-NEXT:    [[COND_I:%.*]] = select ninf i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-MAIN-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-MAIN-NEXT:    ret void
+;
+; CHECK-V8M-BASE-LABEL: @fmuladd_test(
+; CHECK-V8M-BASE-NEXT:  entry:
+; CHECK-V8M-BASE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-BASE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.fmuladd.f32(float [[A]], float [[B:%.*]], float [[C:%.*]]) #2
+; CHECK-V8M-BASE-NEXT:    [[COND_I:%.*]] = select ninf i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-BASE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-BASE-NEXT:    ret void
+;
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_fmuladd.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.fmuladd.f32(float %a, float %b, float %c) nounwind readnone
+  br label %test_fmuladd.exit
+
+test_fmuladd.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi ninf float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @minnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
+; CHECK-MVE-LABEL: @minnum_test(
+; CHECK-MVE-NEXT:  entry:
+; CHECK-MVE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-MVE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.minnum.f32(float [[A]], float [[B:%.*]]) #3
+; CHECK-MVE-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-MVE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-MVE-NEXT:    ret void
+;
+; CHECK-V8M-MAIN-LABEL: @minnum_test(
+; CHECK-V8M-MAIN-NEXT:  entry:
+; CHECK-V8M-MAIN-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-MAIN-NEXT:    [[TMP0:%.*]] = tail call float @llvm.minnum.f32(float [[A]], float [[B:%.*]]) #2
+; CHECK-V8M-MAIN-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-MAIN-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-MAIN-NEXT:    ret void
+;
+; CHECK-V8M-BASE-LABEL: @minnum_test(
+; CHECK-V8M-BASE-NEXT:  entry:
+; CHECK-V8M-BASE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-BASE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.minnum.f32(float [[A]], float [[B:%.*]]) #2
+; CHECK-V8M-BASE-NEXT:    [[COND_I:%.*]] = select i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-BASE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-BASE-NEXT:    ret void
+;
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_minnum.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
+  br label %test_minnum.exit
+
+test_minnum.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @maxnum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
+; CHECK-MVE-LABEL: @maxnum_test(
+; CHECK-MVE-NEXT:  entry:
+; CHECK-MVE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-MVE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.maxnum.f32(float [[A]], float [[B:%.*]]) #3
+; CHECK-MVE-NEXT:    [[COND_I:%.*]] = select ninf nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-MVE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-MVE-NEXT:    ret void
+;
+; CHECK-V8M-MAIN-LABEL: @maxnum_test(
+; CHECK-V8M-MAIN-NEXT:  entry:
+; CHECK-V8M-MAIN-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-MAIN-NEXT:    [[TMP0:%.*]] = tail call float @llvm.maxnum.f32(float [[A]], float [[B:%.*]]) #2
+; CHECK-V8M-MAIN-NEXT:    [[COND_I:%.*]] = select ninf nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-MAIN-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-MAIN-NEXT:    ret void
+;
+; CHECK-V8M-BASE-LABEL: @maxnum_test(
+; CHECK-V8M-BASE-NEXT:  entry:
+; CHECK-V8M-BASE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-BASE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.maxnum.f32(float [[A]], float [[B:%.*]]) #2
+; CHECK-V8M-BASE-NEXT:    [[COND_I:%.*]] = select ninf nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-BASE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-BASE-NEXT:    ret void
+;
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_maxnum.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
+  br label %test_maxnum.exit
+
+test_maxnum.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi ninf nsz float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @minimum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
+; CHECK-MVE-LABEL: @minimum_test(
+; CHECK-MVE-NEXT:  entry:
+; CHECK-MVE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-MVE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.minimum.f32(float [[A]], float [[B:%.*]]) #3
+; CHECK-MVE-NEXT:    [[COND_I:%.*]] = select reassoc i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-MVE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-MVE-NEXT:    ret void
+;
+; CHECK-V8M-MAIN-LABEL: @minimum_test(
+; CHECK-V8M-MAIN-NEXT:  entry:
+; CHECK-V8M-MAIN-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-MAIN-NEXT:    [[TMP0:%.*]] = tail call float @llvm.minimum.f32(float [[A]], float [[B:%.*]]) #2
+; CHECK-V8M-MAIN-NEXT:    [[COND_I:%.*]] = select reassoc i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-MAIN-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-MAIN-NEXT:    ret void
+;
+; CHECK-V8M-BASE-LABEL: @minimum_test(
+; CHECK-V8M-BASE-NEXT:  entry:
+; CHECK-V8M-BASE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-BASE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.minimum.f32(float [[A]], float [[B:%.*]]) #2
+; CHECK-V8M-BASE-NEXT:    [[COND_I:%.*]] = select reassoc i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-BASE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-BASE-NEXT:    ret void
+;
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_minimum.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.minimum.f32(float %a, float %b) nounwind readnone
+  br label %test_minimum.exit
+
+test_minimum.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi reassoc float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
+
+define void @maximum_test(float addrspace(1)* noalias nocapture %out, float %a, float %b) nounwind {
+; CHECK-MVE-LABEL: @maximum_test(
+; CHECK-MVE-NEXT:  entry:
+; CHECK-MVE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-MVE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.maximum.f32(float [[A]], float [[B:%.*]]) #3
+; CHECK-MVE-NEXT:    [[COND_I:%.*]] = select nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-MVE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-MVE-NEXT:    ret void
+;
+; CHECK-V8M-MAIN-LABEL: @maximum_test(
+; CHECK-V8M-MAIN-NEXT:  entry:
+; CHECK-V8M-MAIN-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-MAIN-NEXT:    [[TMP0:%.*]] = tail call float @llvm.maximum.f32(float [[A]], float [[B:%.*]]) #2
+; CHECK-V8M-MAIN-NEXT:    [[COND_I:%.*]] = select nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-MAIN-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-MAIN-NEXT:    ret void
+;
+; CHECK-V8M-BASE-LABEL: @maximum_test(
+; CHECK-V8M-BASE-NEXT:  entry:
+; CHECK-V8M-BASE-NEXT:    [[CMP_I:%.*]] = fcmp olt float [[A:%.*]], 0.000000e+00
+; CHECK-V8M-BASE-NEXT:    [[TMP0:%.*]] = tail call float @llvm.maximum.f32(float [[A]], float [[B:%.*]]) #2
+; CHECK-V8M-BASE-NEXT:    [[COND_I:%.*]] = select nsz i1 [[CMP_I]], float 0x7FF8000000000000, float [[TMP0]]
+; CHECK-V8M-BASE-NEXT:    store float [[COND_I]], float addrspace(1)* [[OUT:%.*]], align 4
+; CHECK-V8M-BASE-NEXT:    ret void
+;
+entry:
+  %cmp.i = fcmp olt float %a, 0.000000e+00
+  br i1 %cmp.i, label %test_maximum.exit, label %cond.else.i
+
+cond.else.i:                                      ; preds = %entry
+  %0 = tail call float @llvm.maximum.f32(float %a, float %b) nounwind readnone
+  br label %test_maximum.exit
+
+test_maximum.exit:                                   ; preds = %cond.else.i, %entry
+  %cond.i = phi nsz float [ %0, %cond.else.i ], [ 0x7FF8000000000000, %entry ]
+  store float %cond.i, float addrspace(1)* %out, align 4
+  ret void
+}
diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/speculate-vector-ops.ll b/llvm/test/Transforms/SimplifyCFG/ARM/speculate-vector-ops.ll
new file mode 100644
index 0000000000000..1f74644eb091d
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/speculate-vector-ops.ll
@@ -0,0 +1,112 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -simplifycfg -mtriple=thumbv8.1m.main -mattr=+mve -S %s -o - | FileCheck %s --check-prefix=CHECK-MVE
+; RUN: opt -simplifycfg -mtriple=thumbv8.1m.main -S %s -o - | FileCheck %s --check-prefix=CHECK-NOMVE
+
+define i32 @speculate_vector_extract(i32 %d, <4 x i32> %v) {
+; CHECK-MVE-LABEL: @speculate_vector_extract(
+; CHECK-MVE-NEXT:  entry:
+; CHECK-MVE-NEXT:    [[CONV:%.*]] = insertelement <4 x i32> undef, i32 [[D:%.*]], i32 0
+; CHECK-MVE-NEXT:    [[CONV2:%.*]] = insertelement <4 x i32> [[CONV]], i32 [[D]], i32 1
+; CHECK-MVE-NEXT:    [[CONV3:%.*]] = insertelement <4 x i32> [[CONV2]], i32 [[D]], i32 2
+; CHECK-MVE-NEXT:    [[CONV4:%.*]] = insertelement <4 x i32> [[CONV3]], i32 [[D]], i32 3
+; CHECK-MVE-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[CONV4]], <i32 0, i32 -1, i32 -2, i32 -3>
+; CHECK-MVE-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[TMP6]], zeroinitializer
+; CHECK-MVE-NEXT:    [[CMP_EXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-MVE-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[CMP_EXT]], i32 0
+; CHECK-MVE-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-MVE-NEXT:    br i1 [[TOBOOL]], label [[COND_ELSE:%.*]], label [[COND_THEN:%.*]]
+; CHECK-MVE:       cond.then:
+; CHECK-MVE-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[V:%.*]], i32 0
+; CHECK-MVE-NEXT:    br label [[COND_END:%.*]]
+; CHECK-MVE:       cond.else:
+; CHECK-MVE-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[V]], i32 3
+; CHECK-MVE-NEXT:    br label [[COND_END]]
+; CHECK-MVE:       cond.end:
+; CHECK-MVE-NEXT:    [[COND:%.*]] = phi i32 [ [[TMP10]], [[COND_THEN]] ], [ [[TMP12]], [[COND_ELSE]] ]
+; CHECK-MVE-NEXT:    [[TMP14:%.*]] = extractelement <4 x i32> [[CMP_EXT]], i32 1
+; CHECK-MVE-NEXT:    [[TOBOOL15:%.*]] = icmp eq i32 [[TMP14]], 0
+; CHECK-MVE-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32> [[V]], i32 1
+; CHECK-MVE-NEXT:    [[COND22:%.*]] = select i1 [[TOBOOL15]], i32 [[COND]], i32 [[TMP20]]
+; CHECK-MVE-NEXT:    [[TMP24:%.*]] = extractelement <4 x i32> [[CMP_EXT]], i32 2
+; CHECK-MVE-NEXT:    [[TOBOOL25:%.*]] = icmp eq i32 [[TMP24]], 0
+; CHECK-MVE-NEXT:    [[TMP30:%.*]] = extractelement <4 x i32> [[V]], i32 2
+; CHECK-MVE-NEXT:    [[COND32:%.*]] = select i1 [[TOBOOL25]], i32 [[COND22]], i32 [[TMP30]]
+; CHECK-MVE-NEXT:    ret i32 [[COND32]]
+;
+; CHECK-NOMVE-LABEL: @speculate_vector_extract(
+; CHECK-NOMVE-NEXT:  entry:
+; CHECK-NOMVE-NEXT:    [[CONV:%.*]] = insertelement <4 x i32> undef, i32 [[D:%.*]], i32 0
+; CHECK-NOMVE-NEXT:    [[CONV2:%.*]] = insertelement <4 x i32> [[CONV]], i32 [[D]], i32 1
+; CHECK-NOMVE-NEXT:    [[CONV3:%.*]] = insertelement <4 x i32> [[CONV2]], i32 [[D]], i32 2
+; CHECK-NOMVE-NEXT:    [[CONV4:%.*]] = insertelement <4 x i32> [[CONV3]], i32 [[D]], i32 3
+; CHECK-NOMVE-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[CONV4]], <i32 0, i32 -1, i32 -2, i32 -3>
+; CHECK-NOMVE-NEXT:    [[CMP:%.*]] = icmp eq <4 x i32> [[TMP6]], zeroinitializer
+; CHECK-NOMVE-NEXT:    [[CMP_EXT:%.*]] = sext <4 x i1> [[CMP]] to <4 x i32>
+; CHECK-NOMVE-NEXT:    [[TMP8:%.*]] = extractelement <4 x i32> [[CMP_EXT]], i32 0
+; CHECK-NOMVE-NEXT:    [[TOBOOL:%.*]] = icmp eq i32 [[TMP8]], 0
+; CHECK-NOMVE-NEXT:    [[TMP10:%.*]] = extractelement <4 x i32> [[V:%.*]], i32 0
+; CHECK-NOMVE-NEXT:    [[TMP12:%.*]] = extractelement <4 x i32> [[V]], i32 3
+; CHECK-NOMVE-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL]], i32 [[TMP12]], i32 [[TMP10]]
+; CHECK-NOMVE-NEXT:    [[TMP14:%.*]] = extractelement <4 x i32> [[CMP_EXT]], i32 1
+; CHECK-NOMVE-NEXT:    [[TOBOOL15:%.*]] = icmp eq i32 [[TMP14]], 0
+; CHECK-NOMVE-NEXT:    [[TMP20:%.*]] = extractelement <4 x i32> [[V]], i32 1
+; CHECK-NOMVE-NEXT:    [[COND22:%.*]] = select i1 [[TOBOOL15]], i32 [[COND]], i32 [[TMP20]]
+; CHECK-NOMVE-NEXT:    [[TMP24:%.*]] = extractelement <4 x i32> [[CMP_EXT]], i32 2
+; CHECK-NOMVE-NEXT:    [[TOBOOL25:%.*]] = icmp eq i32 [[TMP24]], 0
+; CHECK-NOMVE-NEXT:    [[TMP30:%.*]] = extractelement <4 x i32> [[V]], i32 2
+; CHECK-NOMVE-NEXT:    [[COND32:%.*]] = select i1 [[TOBOOL25]], i32 [[COND22]], i32 [[TMP30]]
+; CHECK-NOMVE-NEXT:    ret i32 [[COND32]]
+;
+entry:
+  %conv = insertelement <4 x i32> undef, i32 %d, i32 0
+  %conv2 = insertelement <4 x i32> %conv, i32 %d, i32 1
+  %conv3 = insertelement <4 x i32> %conv2, i32 %d, i32 2
+  %conv4 = insertelement <4 x i32> %conv3, i32 %d, i32 3
+  %tmp6 = add nsw <4 x i32> %conv4, <i32 0, i32 -1, i32 -2, i32 -3>
+  %cmp = icmp eq <4 x i32> %tmp6, zeroinitializer
+  %cmp.ext = sext <4 x i1> %cmp to <4 x i32>
+  %tmp8 = extractelement <4 x i32> %cmp.ext, i32 0
+  %tobool = icmp eq i32 %tmp8, 0
+  br i1 %tobool, label %cond.else, label %cond.then
+
+return:                                           ; preds = %cond.end28
+  ret i32 %cond32
+
+cond.then:                                        ; preds = %entry
+  %tmp10 = extractelement <4 x i32> %v, i32 0
+  br label %cond.end
+
+cond.else:                                        ; preds = %entry
+  %tmp12 = extractelement <4 x i32> %v, i32 3
+  br label %cond.end
+
+cond.end:                                         ; preds = %cond.else, %cond.then
+  %cond = phi i32 [ %tmp10, %cond.then ], [ %tmp12, %cond.else ]
+  %tmp14 = extractelement <4 x i32> %cmp.ext, i32 1
+  %tobool15 = icmp eq i32 %tmp14, 0
+  br i1 %tobool15, label %cond.else17, label %cond.then16
+
+cond.then16:                                      ; preds = %cond.end
+  %tmp20 = extractelement <4 x i32> %v, i32 1
+  br label %cond.end18
+
+cond.else17:                                      ; preds = %cond.end
+  br label %cond.end18
+
+cond.end18:                                       ; preds = %cond.else17, %cond.then16
+  %cond22 = phi i32 [ %tmp20, %cond.then16 ], [ %cond, %cond.else17 ]
+  %tmp24 = extractelement <4 x i32> %cmp.ext, i32 2
+  %tobool25 = icmp eq i32 %tmp24, 0
+  br i1 %tobool25, label %cond.else27, label %cond.then26
+
+cond.then26:                                      ; preds = %cond.end18
+  %tmp30 = extractelement <4 x i32> %v, i32 2
+  br label %cond.end28
+
+cond.else27:                                      ; preds = %cond.end18
+  br label %cond.end28
+
+cond.end28:                                       ; preds = %cond.else27, %cond.then26
+  %cond32 = phi i32 [ %tmp30, %cond.then26 ], [ %cond22, %cond.else27 ]
+  br label %return
+}

From 1d15bbb9d9168a7eecf4a37a46e12babd1d58afc Mon Sep 17 00:00:00 2001
From: Sam Elliott <selliott@lowrisc.org>
Date: Tue, 14 Jul 2020 11:15:01 +0100
Subject: [PATCH 230/771] Revert "[RISCV] Avoid Splitting MBB in
 RISCVExpandPseudo"

This reverts commit 97106f9d80f6ba1bf5eafbd5a6f88d72913ec5a1.

This is based on feedback from https://reviews.llvm.org/D82988#2147105
---
 llvm/include/llvm/CodeGen/MachineBasicBlock.h | 11 +++
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    |  5 +-
 .../Target/RISCV/RISCVExpandPseudoInsts.cpp   | 98 ++++++++++++-------
 llvm/lib/Target/RISCV/RISCVMCInstLower.cpp    |  3 -
 llvm/lib/Target/RISCV/RISCVTargetMachine.cpp  |  2 +-
 llvm/test/CodeGen/RISCV/codemodel-lowering.ll | 21 ++--
 llvm/test/CodeGen/RISCV/mir-target-flags.ll   | 20 ++--
 llvm/test/CodeGen/RISCV/pic-models.ll         | 20 ++--
 llvm/test/CodeGen/RISCV/tls-models.ll         | 60 +++++++-----
 9 files changed, 147 insertions(+), 93 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index b69f6584fe6c2..d6cb7211cf70e 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -143,6 +143,10 @@ class MachineBasicBlock
   /// branch.
   bool AddressTaken = false;
 
+  /// Indicate that this basic block needs its symbol be emitted regardless of
+  /// whether the flow just falls-through to it.
+  bool LabelMustBeEmitted = false;
+
   /// Indicate that this basic block is the entry block of an EH scope, i.e.,
   /// the block that used to have a catchpad or cleanuppad instruction in the
   /// LLVM IR.
@@ -202,6 +206,13 @@ class MachineBasicBlock
   /// branch.
   void setHasAddressTaken() { AddressTaken = true; }
 
+  /// Test whether this block must have its label emitted.
+  bool hasLabelMustBeEmitted() const { return LabelMustBeEmitted; }
+
+  /// Set this block to reflect that, regardless how we flow to it, we need
+  /// its label be emitted.
+  void setLabelMustBeEmitted() { LabelMustBeEmitted = true; }
+
   /// Return the MachineFunction containing this basic block.
   const MachineFunction *getParent() const { return xParent; }
   MachineFunction *getParent() { return xParent; }
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 4d7c36041398f..27e9ffe9ea07e 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -3057,13 +3057,16 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
 
   if (MBB.pred_empty() ||
       (!MF->hasBBLabels() && isBlockOnlyReachableByFallthrough(&MBB) &&
-       !MBB.isEHFuncletEntry())) {
+       !MBB.isEHFuncletEntry() && !MBB.hasLabelMustBeEmitted())) {
     if (isVerbose()) {
       // NOTE: Want this comment at start of line, don't emit with AddComment.
       OutStreamer->emitRawComment(" %bb." + Twine(MBB.getNumber()) + ":",
                                   false);
     }
   } else {
+    if (isVerbose() && MBB.hasLabelMustBeEmitted()) {
+      OutStreamer->AddComment("Label of block must be emitted");
+    }
     // Switch to a new section if this basic block must begin a section.
     if (MBB.isBeginSection()) {
       OutStreamer->SwitchSection(
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 33db8f231c7db..5dcd294cef04c 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -19,7 +19,6 @@
 #include "llvm/CodeGen/LivePhysRegs.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
 #include "llvm/CodeGen/MachineInstrBuilder.h"
-#include "llvm/MC/MCContext.h"
 
 using namespace llvm;
 
@@ -42,18 +41,24 @@ class RISCVExpandPseudo : public MachineFunctionPass {
 
 private:
   bool expandMBB(MachineBasicBlock &MBB);
-  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
+  bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+                MachineBasicBlock::iterator &NextMBBI);
   bool expandAuipcInstPair(MachineBasicBlock &MBB,
                            MachineBasicBlock::iterator MBBI,
+                           MachineBasicBlock::iterator &NextMBBI,
                            unsigned FlagsHi, unsigned SecondOpcode);
   bool expandLoadLocalAddress(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator MBBI);
+                              MachineBasicBlock::iterator MBBI,
+                              MachineBasicBlock::iterator &NextMBBI);
   bool expandLoadAddress(MachineBasicBlock &MBB,
-                         MachineBasicBlock::iterator MBBI);
+                         MachineBasicBlock::iterator MBBI,
+                         MachineBasicBlock::iterator &NextMBBI);
   bool expandLoadTLSIEAddress(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator MBBI);
+                              MachineBasicBlock::iterator MBBI,
+                              MachineBasicBlock::iterator &NextMBBI);
   bool expandLoadTLSGDAddress(MachineBasicBlock &MBB,
-                              MachineBasicBlock::iterator MBBI);
+                              MachineBasicBlock::iterator MBBI,
+                              MachineBasicBlock::iterator &NextMBBI);
 };
 
 char RISCVExpandPseudo::ID = 0;
@@ -72,7 +77,7 @@ bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
   while (MBBI != E) {
     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
-    Modified |= expandMI(MBB, MBBI);
+    Modified |= expandMI(MBB, MBBI, NMBBI);
     MBBI = NMBBI;
   }
 
@@ -80,56 +85,73 @@ bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
 }
 
 bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MBBI) {
+                                 MachineBasicBlock::iterator MBBI,
+                                 MachineBasicBlock::iterator &NextMBBI) {
   switch (MBBI->getOpcode()) {
   case RISCV::PseudoLLA:
-    return expandLoadLocalAddress(MBB, MBBI);
+    return expandLoadLocalAddress(MBB, MBBI, NextMBBI);
   case RISCV::PseudoLA:
-    return expandLoadAddress(MBB, MBBI);
+    return expandLoadAddress(MBB, MBBI, NextMBBI);
   case RISCV::PseudoLA_TLS_IE:
-    return expandLoadTLSIEAddress(MBB, MBBI);
+    return expandLoadTLSIEAddress(MBB, MBBI, NextMBBI);
   case RISCV::PseudoLA_TLS_GD:
-    return expandLoadTLSGDAddress(MBB, MBBI);
+    return expandLoadTLSGDAddress(MBB, MBBI, NextMBBI);
   }
 
   return false;
 }
 
-bool RISCVExpandPseudo::expandAuipcInstPair(MachineBasicBlock &MBB,
-                                            MachineBasicBlock::iterator MBBI,
-                                            unsigned FlagsHi,
-                                            unsigned SecondOpcode) {
+bool RISCVExpandPseudo::expandAuipcInstPair(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI, unsigned FlagsHi,
+    unsigned SecondOpcode) {
   MachineFunction *MF = MBB.getParent();
   MachineInstr &MI = *MBBI;
   DebugLoc DL = MI.getDebugLoc();
 
   Register DestReg = MI.getOperand(0).getReg();
-  Register ScratchReg =
-      MF->getRegInfo().createVirtualRegister(&RISCV::GPRRegClass);
+  const MachineOperand &Symbol = MI.getOperand(1);
 
-  MachineOperand &Symbol = MI.getOperand(1);
-  Symbol.setTargetFlags(FlagsHi);
-  MCSymbol *AUIPCSymbol = MF->getContext().createTempSymbol(false);
+  MachineBasicBlock *NewMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
 
-  MachineInstr *MIAUIPC =
-      BuildMI(MBB, MBBI, DL, TII->get(RISCV::AUIPC), ScratchReg).add(Symbol);
-  MIAUIPC->setPreInstrSymbol(*MF, AUIPCSymbol);
+  // Tell AsmPrinter that we unconditionally want the symbol of this label to be
+  // emitted.
+  NewMBB->setLabelMustBeEmitted();
 
-  BuildMI(MBB, MBBI, DL, TII->get(SecondOpcode), DestReg)
-      .addReg(ScratchReg)
-      .addSym(AUIPCSymbol, RISCVII::MO_PCREL_LO);
+  MF->insert(++MBB.getIterator(), NewMBB);
 
+  BuildMI(NewMBB, DL, TII->get(RISCV::AUIPC), DestReg)
+      .addDisp(Symbol, 0, FlagsHi);
+  BuildMI(NewMBB, DL, TII->get(SecondOpcode), DestReg)
+      .addReg(DestReg)
+      .addMBB(NewMBB, RISCVII::MO_PCREL_LO);
+
+  // Move all the rest of the instructions to NewMBB.
+  NewMBB->splice(NewMBB->end(), &MBB, std::next(MBBI), MBB.end());
+  // Update machine-CFG edges.
+  NewMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+  // Make the original basic block fall-through to the new.
+  MBB.addSuccessor(NewMBB);
+
+  // Make sure live-ins are correctly attached to this new basic block.
+  LivePhysRegs LiveRegs;
+  computeAndAddLiveIns(LiveRegs, *NewMBB);
+
+  NextMBBI = MBB.end();
   MI.eraseFromParent();
   return true;
 }
 
 bool RISCVExpandPseudo::expandLoadLocalAddress(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
-  return expandAuipcInstPair(MBB, MBBI, RISCVII::MO_PCREL_HI, RISCV::ADDI);
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_PCREL_HI,
+                             RISCV::ADDI);
 }
 
-bool RISCVExpandPseudo::expandLoadAddress(MachineBasicBlock &MBB,
-                                          MachineBasicBlock::iterator MBBI) {
+bool RISCVExpandPseudo::expandLoadAddress(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
   MachineFunction *MF = MBB.getParent();
 
   unsigned SecondOpcode;
@@ -142,21 +164,25 @@ bool RISCVExpandPseudo::expandLoadAddress(MachineBasicBlock &MBB,
     SecondOpcode = RISCV::ADDI;
     FlagsHi = RISCVII::MO_PCREL_HI;
   }
-  return expandAuipcInstPair(MBB, MBBI, FlagsHi, SecondOpcode);
+  return expandAuipcInstPair(MBB, MBBI, NextMBBI, FlagsHi, SecondOpcode);
 }
 
 bool RISCVExpandPseudo::expandLoadTLSIEAddress(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
   MachineFunction *MF = MBB.getParent();
 
   const auto &STI = MF->getSubtarget<RISCVSubtarget>();
   unsigned SecondOpcode = STI.is64Bit() ? RISCV::LD : RISCV::LW;
-  return expandAuipcInstPair(MBB, MBBI, RISCVII::MO_TLS_GOT_HI, SecondOpcode);
+  return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GOT_HI,
+                             SecondOpcode);
 }
 
 bool RISCVExpandPseudo::expandLoadTLSGDAddress(
-    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
-  return expandAuipcInstPair(MBB, MBBI, RISCVII::MO_TLS_GD_HI, RISCV::ADDI);
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
+    MachineBasicBlock::iterator &NextMBBI) {
+  return expandAuipcInstPair(MBB, MBBI, NextMBBI, RISCVII::MO_TLS_GD_HI,
+                             RISCV::ADDI);
 }
 
 } // end of anonymous namespace
diff --git a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
index 8ddcf757c97e7..b1dbcfa7f7387 100644
--- a/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
+++ b/llvm/lib/Target/RISCV/RISCVMCInstLower.cpp
@@ -121,9 +121,6 @@ bool llvm::LowerRISCVMachineOperandToMCOperand(const MachineOperand &MO,
   case MachineOperand::MO_ConstantPoolIndex:
     MCOp = lowerSymbolOperand(MO, AP.GetCPISymbol(MO.getIndex()), AP);
     break;
-  case MachineOperand::MO_MCSymbol:
-    MCOp = lowerSymbolOperand(MO, MO.getMCSymbol(), AP);
-    break;
   }
   return true;
 }
diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
index 63f607a9c352c..75683e2fd8e96 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp
@@ -173,6 +173,7 @@ void RISCVPassConfig::addPreSched2() {}
 void RISCVPassConfig::addPreEmitPass() { addPass(&BranchRelaxationPassID); }
 
 void RISCVPassConfig::addPreEmitPass2() {
+  addPass(createRISCVExpandPseudoPass());
   // Schedule the expansion of AMOs at the last possible moment, avoiding the
   // possibility for other passes to break the requirements for forward
   // progress in the LR/SC block.
@@ -180,6 +181,5 @@ void RISCVPassConfig::addPreEmitPass2() {
 }
 
 void RISCVPassConfig::addPreRegAlloc() {
-  addPass(createRISCVExpandPseudoPass());
   addPass(createRISCVMergeBaseOffsetOptPass());
 }
diff --git a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll
index 84774feccf12c..6c172a26f050b 100644
--- a/llvm/test/CodeGen/RISCV/codemodel-lowering.ll
+++ b/llvm/test/CodeGen/RISCV/codemodel-lowering.ll
@@ -16,9 +16,9 @@ define i32 @lower_global(i32 %a) nounwind {
 ;
 ; RV32I-MEDIUM-LABEL: lower_global:
 ; RV32I-MEDIUM:       # %bb.0:
-; RV32I-MEDIUM-NEXT:  .Ltmp0:
+; RV32I-MEDIUM-NEXT:  .LBB0_1: # Label of block must be emitted
 ; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(G)
-; RV32I-MEDIUM-NEXT:    addi a0, a0, %pcrel_lo(.Ltmp0)
+; RV32I-MEDIUM-NEXT:    addi a0, a0, %pcrel_lo(.LBB0_1)
 ; RV32I-MEDIUM-NEXT:    lw a0, 0(a0)
 ; RV32I-MEDIUM-NEXT:    ret
   %1 = load volatile i32, i32* @G
@@ -39,9 +39,9 @@ define void @lower_blockaddress() nounwind {
 ;
 ; RV32I-MEDIUM-LABEL: lower_blockaddress:
 ; RV32I-MEDIUM:       # %bb.0:
-; RV32I-MEDIUM-NEXT:  .Ltmp1:
+; RV32I-MEDIUM-NEXT:  .LBB1_1: # Label of block must be emitted
 ; RV32I-MEDIUM-NEXT:    auipc a0, %pcrel_hi(addr)
-; RV32I-MEDIUM-NEXT:    addi a0, a0, %pcrel_lo(.Ltmp1)
+; RV32I-MEDIUM-NEXT:    addi a0, a0, %pcrel_lo(.LBB1_1)
 ; RV32I-MEDIUM-NEXT:    addi a1, zero, 1
 ; RV32I-MEDIUM-NEXT:    sw a1, 0(a0)
 ; RV32I-MEDIUM-NEXT:    ret
@@ -82,16 +82,17 @@ define signext i32 @lower_blockaddress_displ(i32 signext %w) nounwind {
 ; RV32I-MEDIUM:       # %bb.0: # %entry
 ; RV32I-MEDIUM-NEXT:    addi sp, sp, -16
 ; RV32I-MEDIUM-NEXT:    sw ra, 12(sp)
-; RV32I-MEDIUM-NEXT:  .Ltmp2:
-; RV32I-MEDIUM-NEXT:    auipc a1, %pcrel_hi(.Ltmp3)
-; RV32I-MEDIUM-NEXT:    addi a1, a1, %pcrel_lo(.Ltmp2)
+; RV32I-MEDIUM-NEXT:  .LBB2_5: # %entry
+; RV32I-MEDIUM-NEXT:    # Label of block must be emitted
+; RV32I-MEDIUM-NEXT:    auipc a1, %pcrel_hi(.Ltmp0)
+; RV32I-MEDIUM-NEXT:    addi a1, a1, %pcrel_lo(.LBB2_5)
 ; RV32I-MEDIUM-NEXT:    addi a2, zero, 101
 ; RV32I-MEDIUM-NEXT:    sw a1, 8(sp)
 ; RV32I-MEDIUM-NEXT:    blt a0, a2, .LBB2_3
 ; RV32I-MEDIUM-NEXT:  # %bb.1: # %if.then
 ; RV32I-MEDIUM-NEXT:    lw a0, 8(sp)
 ; RV32I-MEDIUM-NEXT:    jr a0
-; RV32I-MEDIUM-NEXT:  .Ltmp3: # Block address taken
+; RV32I-MEDIUM-NEXT:  .Ltmp0: # Block address taken
 ; RV32I-MEDIUM-NEXT:  .LBB2_2: # %return
 ; RV32I-MEDIUM-NEXT:    addi a0, zero, 4
 ; RV32I-MEDIUM-NEXT:    j .LBB2_4
@@ -139,9 +140,9 @@ define float @lower_constantpool(float %a) nounwind {
 ;
 ; RV32I-MEDIUM-LABEL: lower_constantpool:
 ; RV32I-MEDIUM:       # %bb.0:
-; RV32I-MEDIUM-NEXT:  .Ltmp4:
+; RV32I-MEDIUM-NEXT:  .LBB3_1: # Label of block must be emitted
 ; RV32I-MEDIUM-NEXT:    auipc a1, %pcrel_hi(.LCPI3_0)
-; RV32I-MEDIUM-NEXT:    addi a1, a1, %pcrel_lo(.Ltmp4)
+; RV32I-MEDIUM-NEXT:    addi a1, a1, %pcrel_lo(.LBB3_1)
 ; RV32I-MEDIUM-NEXT:    flw ft0, 0(a1)
 ; RV32I-MEDIUM-NEXT:    fmv.w.x ft1, a0
 ; RV32I-MEDIUM-NEXT:    fadd.s ft0, ft1, ft0
diff --git a/llvm/test/CodeGen/RISCV/mir-target-flags.ll b/llvm/test/CodeGen/RISCV/mir-target-flags.ll
index b1bf935c4e3bf..f41fb77dbb00c 100644
--- a/llvm/test/CodeGen/RISCV/mir-target-flags.ll
+++ b/llvm/test/CodeGen/RISCV/mir-target-flags.ll
@@ -27,11 +27,11 @@ define i32 @caller(i32 %a) nounwind {
 ; RV32-SMALL-NEXT: target-flags(riscv-hi) @g_i
 ; RV32-SMALL-NEXT: target-flags(riscv-lo) @g_i
 ; RV32-SMALL:      target-flags(riscv-tls-got-hi) @t_un
-; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo)
+; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) %bb.1
 ; RV32-SMALL:      target-flags(riscv-tls-got-hi) @t_ld
-; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo)
+; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) %bb.2
 ; RV32-SMALL:      target-flags(riscv-tls-got-hi) @t_ie
-; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo)
+; RV32-SMALL-NEXT: target-flags(riscv-pcrel-lo) %bb.3
 ; RV32-SMALL:      target-flags(riscv-tprel-hi) @t_le
 ; RV32-SMALL-NEXT: target-flags(riscv-tprel-add) @t_le
 ; RV32-SMALL-NEXT: target-flags(riscv-tprel-lo) @t_le
@@ -39,17 +39,17 @@ define i32 @caller(i32 %a) nounwind {
 ;
 ; RV32-MED-LABEL: name: caller
 ; RV32-MED:      target-flags(riscv-got-hi) @g_e
-; RV32-MED-NEXT: target-flags(riscv-pcrel-lo)
+; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) %bb.1
 ; RV32-MED:      target-flags(riscv-pcrel-hi) @g_i
-; RV32-MED-NEXT: target-flags(riscv-pcrel-lo)
+; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) %bb.2
 ; RV32-MED:      target-flags(riscv-tls-gd-hi) @t_un
-; RV32-MED-NEXT: target-flags(riscv-pcrel-lo)
-; RV32-MED:      target-flags(riscv-plt) &__tls_get_addr
+; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) %bb.3
+; RV32-MED-NEXT: target-flags(riscv-plt) &__tls_get_addr
 ; RV32-MED:      target-flags(riscv-tls-gd-hi) @t_ld
-; RV32-MED-NEXT: target-flags(riscv-pcrel-lo)
-; RV32-MED:      target-flags(riscv-plt) &__tls_get_addr
+; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) %bb.4
+; RV32-MED-NEXT: target-flags(riscv-plt) &__tls_get_addr
 ; RV32-MED:      target-flags(riscv-tls-got-hi) @t_ie
-; RV32-MED-NEXT: target-flags(riscv-pcrel-lo)
+; RV32-MED-NEXT: target-flags(riscv-pcrel-lo) %bb.5
 ; RV32-MED:      target-flags(riscv-tprel-hi) @t_le
 ; RV32-MED-NEXT: target-flags(riscv-tprel-add) @t_le
 ; RV32-MED-NEXT: target-flags(riscv-tprel-lo) @t_le
diff --git a/llvm/test/CodeGen/RISCV/pic-models.ll b/llvm/test/CodeGen/RISCV/pic-models.ll
index 46e9cee57d79d..8d835ae99f406 100644
--- a/llvm/test/CodeGen/RISCV/pic-models.ll
+++ b/llvm/test/CodeGen/RISCV/pic-models.ll
@@ -26,9 +26,10 @@ define i32* @f1() nounwind {
 ;
 ; RV32-PIC-LABEL: f1:
 ; RV32-PIC:       # %bb.0: # %entry
-; RV32-PIC-NEXT:  .Ltmp0:
+; RV32-PIC-NEXT:  .LBB0_1: # %entry
+; RV32-PIC-NEXT:    # Label of block must be emitted
 ; RV32-PIC-NEXT:    auipc a0, %got_pcrel_hi(external_var)
-; RV32-PIC-NEXT:    lw a0, %pcrel_lo(.Ltmp0)(a0)
+; RV32-PIC-NEXT:    lw a0, %pcrel_lo(.LBB0_1)(a0)
 ; RV32-PIC-NEXT:    ret
 ;
 ; RV64-STATIC-LABEL: f1:
@@ -39,9 +40,10 @@ define i32* @f1() nounwind {
 ;
 ; RV64-PIC-LABEL: f1:
 ; RV64-PIC:       # %bb.0: # %entry
-; RV64-PIC-NEXT:  .Ltmp0:
+; RV64-PIC-NEXT:  .LBB0_1: # %entry
+; RV64-PIC-NEXT:    # Label of block must be emitted
 ; RV64-PIC-NEXT:    auipc a0, %got_pcrel_hi(external_var)
-; RV64-PIC-NEXT:    ld a0, %pcrel_lo(.Ltmp0)(a0)
+; RV64-PIC-NEXT:    ld a0, %pcrel_lo(.LBB0_1)(a0)
 ; RV64-PIC-NEXT:    ret
 entry:
   ret i32* @external_var
@@ -59,9 +61,10 @@ define i32* @f2() nounwind {
 ;
 ; RV32-PIC-LABEL: f2:
 ; RV32-PIC:       # %bb.0: # %entry
-; RV32-PIC-NEXT:  .Ltmp1:
+; RV32-PIC-NEXT:  .LBB1_1: # %entry
+; RV32-PIC-NEXT:    # Label of block must be emitted
 ; RV32-PIC-NEXT:    auipc a0, %pcrel_hi(internal_var)
-; RV32-PIC-NEXT:    addi a0, a0, %pcrel_lo(.Ltmp1)
+; RV32-PIC-NEXT:    addi a0, a0, %pcrel_lo(.LBB1_1)
 ; RV32-PIC-NEXT:    ret
 ;
 ; RV64-STATIC-LABEL: f2:
@@ -72,9 +75,10 @@ define i32* @f2() nounwind {
 ;
 ; RV64-PIC-LABEL: f2:
 ; RV64-PIC:       # %bb.0: # %entry
-; RV64-PIC-NEXT:  .Ltmp1:
+; RV64-PIC-NEXT:  .LBB1_1: # %entry
+; RV64-PIC-NEXT:    # Label of block must be emitted
 ; RV64-PIC-NEXT:    auipc a0, %pcrel_hi(internal_var)
-; RV64-PIC-NEXT:    addi a0, a0, %pcrel_lo(.Ltmp1)
+; RV64-PIC-NEXT:    addi a0, a0, %pcrel_lo(.LBB1_1)
 ; RV64-PIC-NEXT:    ret
 entry:
   ret i32* @internal_var
diff --git a/llvm/test/CodeGen/RISCV/tls-models.ll b/llvm/test/CodeGen/RISCV/tls-models.ll
index 27f63ff336740..25a2f71beb317 100644
--- a/llvm/test/CodeGen/RISCV/tls-models.ll
+++ b/llvm/test/CodeGen/RISCV/tls-models.ll
@@ -23,9 +23,10 @@ define i32* @f1() nounwind {
 ; RV32-PIC:       # %bb.0: # %entry
 ; RV32-PIC-NEXT:    addi sp, sp, -16
 ; RV32-PIC-NEXT:    sw ra, 12(sp)
-; RV32-PIC-NEXT:  .Ltmp0:
+; RV32-PIC-NEXT:  .LBB0_1: # %entry
+; RV32-PIC-NEXT:    # Label of block must be emitted
 ; RV32-PIC-NEXT:    auipc a0, %tls_gd_pcrel_hi(unspecified)
-; RV32-PIC-NEXT:    addi a0, a0, %pcrel_lo(.Ltmp0)
+; RV32-PIC-NEXT:    addi a0, a0, %pcrel_lo(.LBB0_1)
 ; RV32-PIC-NEXT:    call __tls_get_addr@plt
 ; RV32-PIC-NEXT:    lw ra, 12(sp)
 ; RV32-PIC-NEXT:    addi sp, sp, 16
@@ -35,9 +36,10 @@ define i32* @f1() nounwind {
 ; RV64-PIC:       # %bb.0: # %entry
 ; RV64-PIC-NEXT:    addi sp, sp, -16
 ; RV64-PIC-NEXT:    sd ra, 8(sp)
-; RV64-PIC-NEXT:  .Ltmp0:
+; RV64-PIC-NEXT:  .LBB0_1: # %entry
+; RV64-PIC-NEXT:    # Label of block must be emitted
 ; RV64-PIC-NEXT:    auipc a0, %tls_gd_pcrel_hi(unspecified)
-; RV64-PIC-NEXT:    addi a0, a0, %pcrel_lo(.Ltmp0)
+; RV64-PIC-NEXT:    addi a0, a0, %pcrel_lo(.LBB0_1)
 ; RV64-PIC-NEXT:    call __tls_get_addr@plt
 ; RV64-PIC-NEXT:    ld ra, 8(sp)
 ; RV64-PIC-NEXT:    addi sp, sp, 16
@@ -45,17 +47,19 @@ define i32* @f1() nounwind {
 ;
 ; RV32-NOPIC-LABEL: f1:
 ; RV32-NOPIC:       # %bb.0: # %entry
-; RV32-NOPIC-NEXT:  .Ltmp0:
+; RV32-NOPIC-NEXT:  .LBB0_1: # %entry
+; RV32-NOPIC-NEXT:    # Label of block must be emitted
 ; RV32-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(unspecified)
-; RV32-NOPIC-NEXT:    lw a0, %pcrel_lo(.Ltmp0)(a0)
+; RV32-NOPIC-NEXT:    lw a0, %pcrel_lo(.LBB0_1)(a0)
 ; RV32-NOPIC-NEXT:    add a0, a0, tp
 ; RV32-NOPIC-NEXT:    ret
 ;
 ; RV64-NOPIC-LABEL: f1:
 ; RV64-NOPIC:       # %bb.0: # %entry
-; RV64-NOPIC-NEXT:  .Ltmp0:
+; RV64-NOPIC-NEXT:  .LBB0_1: # %entry
+; RV64-NOPIC-NEXT:    # Label of block must be emitted
 ; RV64-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(unspecified)
-; RV64-NOPIC-NEXT:    ld a0, %pcrel_lo(.Ltmp0)(a0)
+; RV64-NOPIC-NEXT:    ld a0, %pcrel_lo(.LBB0_1)(a0)
 ; RV64-NOPIC-NEXT:    add a0, a0, tp
 ; RV64-NOPIC-NEXT:    ret
 entry:
@@ -70,9 +74,10 @@ define i32* @f2() nounwind {
 ; RV32-PIC:       # %bb.0: # %entry
 ; RV32-PIC-NEXT:    addi sp, sp, -16
 ; RV32-PIC-NEXT:    sw ra, 12(sp)
-; RV32-PIC-NEXT:  .Ltmp1:
+; RV32-PIC-NEXT:  .LBB1_1: # %entry
+; RV32-PIC-NEXT:    # Label of block must be emitted
 ; RV32-PIC-NEXT:    auipc a0, %tls_gd_pcrel_hi(ld)
-; RV32-PIC-NEXT:    addi a0, a0, %pcrel_lo(.Ltmp1)
+; RV32-PIC-NEXT:    addi a0, a0, %pcrel_lo(.LBB1_1)
 ; RV32-PIC-NEXT:    call __tls_get_addr@plt
 ; RV32-PIC-NEXT:    lw ra, 12(sp)
 ; RV32-PIC-NEXT:    addi sp, sp, 16
@@ -82,9 +87,10 @@ define i32* @f2() nounwind {
 ; RV64-PIC:       # %bb.0: # %entry
 ; RV64-PIC-NEXT:    addi sp, sp, -16
 ; RV64-PIC-NEXT:    sd ra, 8(sp)
-; RV64-PIC-NEXT:  .Ltmp1:
+; RV64-PIC-NEXT:  .LBB1_1: # %entry
+; RV64-PIC-NEXT:    # Label of block must be emitted
 ; RV64-PIC-NEXT:    auipc a0, %tls_gd_pcrel_hi(ld)
-; RV64-PIC-NEXT:    addi a0, a0, %pcrel_lo(.Ltmp1)
+; RV64-PIC-NEXT:    addi a0, a0, %pcrel_lo(.LBB1_1)
 ; RV64-PIC-NEXT:    call __tls_get_addr@plt
 ; RV64-PIC-NEXT:    ld ra, 8(sp)
 ; RV64-PIC-NEXT:    addi sp, sp, 16
@@ -92,17 +98,19 @@ define i32* @f2() nounwind {
 ;
 ; RV32-NOPIC-LABEL: f2:
 ; RV32-NOPIC:       # %bb.0: # %entry
-; RV32-NOPIC-NEXT:  .Ltmp1:
+; RV32-NOPIC-NEXT:  .LBB1_1: # %entry
+; RV32-NOPIC-NEXT:    # Label of block must be emitted
 ; RV32-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(ld)
-; RV32-NOPIC-NEXT:    lw a0, %pcrel_lo(.Ltmp1)(a0)
+; RV32-NOPIC-NEXT:    lw a0, %pcrel_lo(.LBB1_1)(a0)
 ; RV32-NOPIC-NEXT:    add a0, a0, tp
 ; RV32-NOPIC-NEXT:    ret
 ;
 ; RV64-NOPIC-LABEL: f2:
 ; RV64-NOPIC:       # %bb.0: # %entry
-; RV64-NOPIC-NEXT:  .Ltmp1:
+; RV64-NOPIC-NEXT:  .LBB1_1: # %entry
+; RV64-NOPIC-NEXT:    # Label of block must be emitted
 ; RV64-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(ld)
-; RV64-NOPIC-NEXT:    ld a0, %pcrel_lo(.Ltmp1)(a0)
+; RV64-NOPIC-NEXT:    ld a0, %pcrel_lo(.LBB1_1)(a0)
 ; RV64-NOPIC-NEXT:    add a0, a0, tp
 ; RV64-NOPIC-NEXT:    ret
 entry:
@@ -115,33 +123,37 @@ entry:
 define i32* @f3() nounwind {
 ; RV32-PIC-LABEL: f3:
 ; RV32-PIC:       # %bb.0: # %entry
-; RV32-PIC-NEXT:  .Ltmp2:
+; RV32-PIC-NEXT:  .LBB2_1: # %entry
+; RV32-PIC-NEXT:    # Label of block must be emitted
 ; RV32-PIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(ie)
-; RV32-PIC-NEXT:    lw a0, %pcrel_lo(.Ltmp2)(a0)
+; RV32-PIC-NEXT:    lw a0, %pcrel_lo(.LBB2_1)(a0)
 ; RV32-PIC-NEXT:    add a0, a0, tp
 ; RV32-PIC-NEXT:    ret
 ;
 ; RV64-PIC-LABEL: f3:
 ; RV64-PIC:       # %bb.0: # %entry
-; RV64-PIC-NEXT:  .Ltmp2:
+; RV64-PIC-NEXT:  .LBB2_1: # %entry
+; RV64-PIC-NEXT:    # Label of block must be emitted
 ; RV64-PIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(ie)
-; RV64-PIC-NEXT:    ld a0, %pcrel_lo(.Ltmp2)(a0)
+; RV64-PIC-NEXT:    ld a0, %pcrel_lo(.LBB2_1)(a0)
 ; RV64-PIC-NEXT:    add a0, a0, tp
 ; RV64-PIC-NEXT:    ret
 ;
 ; RV32-NOPIC-LABEL: f3:
 ; RV32-NOPIC:       # %bb.0: # %entry
-; RV32-NOPIC-NEXT:  .Ltmp2:
+; RV32-NOPIC-NEXT:  .LBB2_1: # %entry
+; RV32-NOPIC-NEXT:    # Label of block must be emitted
 ; RV32-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(ie)
-; RV32-NOPIC-NEXT:    lw a0, %pcrel_lo(.Ltmp2)(a0)
+; RV32-NOPIC-NEXT:    lw a0, %pcrel_lo(.LBB2_1)(a0)
 ; RV32-NOPIC-NEXT:    add a0, a0, tp
 ; RV32-NOPIC-NEXT:    ret
 ;
 ; RV64-NOPIC-LABEL: f3:
 ; RV64-NOPIC:       # %bb.0: # %entry
-; RV64-NOPIC-NEXT:  .Ltmp2:
+; RV64-NOPIC-NEXT:  .LBB2_1: # %entry
+; RV64-NOPIC-NEXT:    # Label of block must be emitted
 ; RV64-NOPIC-NEXT:    auipc a0, %tls_ie_pcrel_hi(ie)
-; RV64-NOPIC-NEXT:    ld a0, %pcrel_lo(.Ltmp2)(a0)
+; RV64-NOPIC-NEXT:    ld a0, %pcrel_lo(.LBB2_1)(a0)
 ; RV64-NOPIC-NEXT:    add a0, a0, tp
 ; RV64-NOPIC-NEXT:    ret
 entry:

From 300156932321a8b34b46d6a890cce0699525ed20 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Thu, 9 Jul 2020 10:55:44 +0300
Subject: [PATCH 231/771] [yaml2obj] - Add a syntax to override e_phoff,
 e_phentsize and e_phnum fields.

This adds `EPhOff`, `EPhEntSize` and `EPhNum` keys.
Will be useful for creating broken objects for testing llvm-readelf.

Differential revision: https://reviews.llvm.org/D83482
---
 llvm/include/llvm/ObjectYAML/ELFYAML.h        |  4 ++
 llvm/lib/ObjectYAML/ELFEmitter.cpp            | 24 +++++++--
 llvm/lib/ObjectYAML/ELFYAML.cpp               |  7 +++
 .../tools/yaml2obj/ELF/header-sh-fields.yaml  | 52 +++++++++++++++----
 llvm/tools/obj2yaml/elf2yaml.cpp              |  6 +--
 5 files changed, 78 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index e7a2411a3e2aa..bfc31ea247ef7 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -81,6 +81,10 @@ struct FileHeader {
   ELF_EF Flags;
   llvm::yaml::Hex64 Entry;
 
+  Optional<llvm::yaml::Hex64> EPhOff;
+  Optional<llvm::yaml::Hex16> EPhEntSize;
+  Optional<llvm::yaml::Hex16> EPhNum;
+
   Optional<llvm::yaml::Hex16> SHEntSize;
   Optional<llvm::yaml::Hex64> SHOff;
   Optional<llvm::yaml::Hex16> SHNum;
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index 218e7df8e39a5..f4ad10a9eb083 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -393,11 +393,29 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
   Header.e_machine = Doc.Header.Machine;
   Header.e_version = EV_CURRENT;
   Header.e_entry = Doc.Header.Entry;
-  Header.e_phoff = Doc.ProgramHeaders.size() ? sizeof(Header) : 0;
   Header.e_flags = Doc.Header.Flags;
   Header.e_ehsize = sizeof(Elf_Ehdr);
-  Header.e_phentsize = Doc.ProgramHeaders.size() ? sizeof(Elf_Phdr) : 0;
-  Header.e_phnum = Doc.ProgramHeaders.size();
+
+  if (Doc.Header.EPhOff)
+    Header.e_phoff = *Doc.Header.EPhOff;
+  else if (!Doc.ProgramHeaders.empty())
+    Header.e_phoff = sizeof(Header);
+  else
+    Header.e_phoff = 0;
+
+  if (Doc.Header.EPhEntSize)
+    Header.e_phentsize = *Doc.Header.EPhEntSize;
+  else if (!Doc.ProgramHeaders.empty())
+    Header.e_phentsize = sizeof(Elf_Phdr);
+  else
+    Header.e_phentsize = 0;
+
+  if (Doc.Header.EPhNum)
+    Header.e_phnum = *Doc.Header.EPhNum;
+  else if (!Doc.ProgramHeaders.empty())
+    Header.e_phnum = Doc.ProgramHeaders.size();
+  else
+    Header.e_phnum = 0;
 
   Header.e_shentsize =
       Doc.Header.SHEntSize ? (uint16_t)*Doc.Header.SHEntSize : sizeof(Elf_Shdr);
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 3de1ae006ce4a..aa247c53a3c6a 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -866,6 +866,13 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
   IO.mapOptional("Flags", FileHdr.Flags, ELFYAML::ELF_EF(0));
   IO.mapOptional("Entry", FileHdr.Entry, Hex64(0));
 
+  // obj2yaml does not dump these fields.
+  assert(!IO.outputting() ||
+         (!FileHdr.EPhOff && !FileHdr.EPhEntSize && !FileHdr.EPhNum));
+  IO.mapOptional("EPhOff", FileHdr.EPhOff);
+  IO.mapOptional("EPhEntSize", FileHdr.EPhEntSize);
+  IO.mapOptional("EPhNum", FileHdr.EPhNum);
+
   IO.mapOptional("SHEntSize", FileHdr.SHEntSize);
   IO.mapOptional("SHOff", FileHdr.SHOff);
   IO.mapOptional("SHNum", FileHdr.SHNum);
diff --git a/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml b/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
index 166c68405bb76..f52e5731271f8 100644
--- a/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
@@ -1,15 +1,18 @@
 ## In this test case we check that we can override the default values for
-## e_shentsize, e_shoff, e_shnum and e_shstrndx fields in the YAML.
+## ELF header fields in the YAML.
 
 ## First we check the default values.
 
 # RUN: yaml2obj %s -o %t-default
 # RUN: llvm-readelf --file-headers %t-default | FileCheck %s --check-prefix=DEFAULT
 
-# DEFAULT:   Start of section headers:          88 (bytes into file)
-# DEFAULT:   Size of section headers:           64 (bytes)
-# DEFAULT:   Number of section headers:         3
-# DEFAULT:   Section header string table index: 2
+# DEFAULT: Start of program headers:          64 (bytes into file)
+# DEFAULT: Start of section headers:          200 (bytes into file)
+# DEFAULT: Size of program headers:           56 (bytes)
+# DEFAULT: Number of program headers:         2
+# DEFAULT: Size of section headers:           64 (bytes)
+# DEFAULT: Number of section headers:         3
+# DEFAULT: Section header string table index: 2
 
 --- !ELF
 FileHeader:
@@ -17,6 +20,11 @@ FileHeader:
   Data:    ELFDATA2LSB
   Type:    ET_REL
   Machine: EM_X86_64
+ProgramHeaders:
+  - Type:     PT_LOAD
+    Sections: []
+  - Type:     PT_LOAD
+    Sections: []
 
 ## Check we can override all default values using the same values
 ## and that this does not change the output.
@@ -29,10 +37,18 @@ FileHeader:
   Data:      ELFDATA2LSB
   Type:      ET_REL
   Machine:   EM_X86_64
-  SHEntSize: [[SHENTSIZE=64]]
-  SHOff:     [[SHOFF=88]]
-  SHNum:     [[SHNUM=3]]
-  SHStrNdx:  [[SHSTRNDX=2]]
+  SHEntSize:  [[SHENTSIZE=64]]
+  SHOff:      [[SHOFF=200]]
+  SHNum:      [[SHNUM=3]]
+  SHStrNdx:   [[SHSTRNDX=2]]
+  EPhOff:     [[PHOFF=64]]
+  EPhEntSize: [[PHENTSIZE=56]]
+  EPhNum:     [[PHNUM=2]]
+ProgramHeaders:
+  - Type:     PT_LOAD
+    Sections: []
+  - Type:     PT_LOAD
+    Sections: []
 
 ## Override different fields to check the output produced.
 
@@ -63,3 +79,21 @@ FileHeader:
 # RUN: od -A n -t x1 -v -j 0x3a -N 1 %t-default | FileCheck %s --check-prefix=OLDSIZE
 # NEWSIZE: 01
 # OLDSIZE: 40
+
+## Override the e_phoff field.
+# RUN: yaml2obj --docnum=2 %s -DPHOFF=3 -o %t6
+# RUN: llvm-readelf --file-headers %t6 | FileCheck %s --check-prefix=PHOFF
+
+# PHOFF: Start of program headers: 3 (bytes into file){{$}}
+
+## Override the e_phnum field.
+# RUN: yaml2obj --docnum=2 %s -DPHNUM=1 -o %t7
+# RUN: llvm-readelf --file-headers %t7 | FileCheck %s --check-prefix=PHNUM
+
+# PHNUM: Number of program headers: 1{{$}}
+
+## Override the e_phentsize field.
+# RUN: yaml2obj --docnum=2 %s -DPHENTSIZE=1 -o %t8
+# RUN: not llvm-readelf --file-headers %t8 2>&1 | FileCheck %s --check-prefix=PHENTSIZE
+
+# PHENTSIZE: invalid e_phentsize: 1{{$}}
diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp
index 868a78e15c663..7b0687a5a5f8d 100644
--- a/llvm/tools/obj2yaml/elf2yaml.cpp
+++ b/llvm/tools/obj2yaml/elf2yaml.cpp
@@ -200,9 +200,9 @@ bool ELFDumper<ELFT>::shouldPrintSection(const ELFYAML::Section &S,
 template <class ELFT> Expected<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   auto Y = std::make_unique<ELFYAML::Object>();
 
-  // Dump header. We do not dump SHEntSize, SHOff, SHNum and SHStrNdx fields.
-  // When not explicitly set, the values are set by yaml2obj automatically
-  // and there is no need to dump them here.
+  // Dump header. We do not dump EPh* and SH* fields. When not explicitly set,
+  // the values are set by yaml2obj automatically and there is no need to dump
+  // them here.
   Y->Header.Class = ELFYAML::ELF_ELFCLASS(Obj.getHeader()->getFileClass());
   Y->Header.Data = ELFYAML::ELF_ELFDATA(Obj.getHeader()->getDataEncoding());
   Y->Header.OSABI = Obj.getHeader()->e_ident[ELF::EI_OSABI];

From f0e9b76c3500496f8f3ea7abe6f4bf801e3b41e7 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Tue, 14 Jul 2020 12:56:18 +0200
Subject: [PATCH 232/771] [compiler-rt][sanitizers] Fix GetPcSpBp determination
 of SP on 32-bit Solaris/x86

A dozen 32-bit `AddressSanitizer` testcases FAIL on the latest beta of Solaris 11.4/x86, e.g.
`AddressSanitizer-i386-sunos :: TestCases/null_deref.cpp` produces

  AddressSanitizer:DEADLYSIGNAL
  =================================================================
  ==29274==ERROR: AddressSanitizer: stack-overflow on address 0x00000028 (pc 0x08135efd bp 0xfeffdfd8 sp 0x00000000 T0)
      #0 0x8135efd in NullDeref(int*) /vol/llvm/src/llvm-project/dist/compiler-rt/test/asan/TestCases/null_deref.cpp:15:10
      #1 0x8135ea6 in main /vol/llvm/src/llvm-project/dist/compiler-rt/test/asan/TestCases/null_deref.cpp:21:3
      #2 0x8084b85 in _start (null_deref.cpp.tmp+0x8084b85)

   SUMMARY: AddressSanitizer: stack-overflow /vol/llvm/src/llvm-project/dist/compiler-rt/test/asan/TestCases/null_deref.cpp:15:10 in NullDeref(int*)
  ==29274==ABORTING

instead of the expected

  AddressSanitizer:DEADLYSIGNAL
  =================================================================
  ==29276==ERROR: AddressSanitizer: SEGV on unknown address 0x00000028 (pc 0x08135f1f bp 0xfeffdf48 sp 0xfeffdf40 T0)
  ==29276==The signal is caused by a WRITE memory access.
  ==29276==Hint: address points to the zero page.
      #0 0x8135f1f in NullDeref(int*) /vol/llvm/src/llvm-project/local/compiler-rt/test/asan/TestCases/null_deref.cpp:15:10
      #1 0x8135efa in main /vol/llvm/src/llvm-project/local/compiler-rt/test/asan/TestCases/null_deref.cpp:21:3
      #2 0x8084be5 in _start (null_deref.cpp.tmp+0x8084be5)

  AddressSanitizer can not provide additional info.
   SUMMARY: AddressSanitizer: SEGV /vol/llvm/src/llvm-project/local/compiler-rt/test/asan/TestCases/null_deref.cpp:15:10 in NullDeref(int*)
  ==29276==ABORTING

I managed to trace this to a change in `<sys/regset.h>`: previously the header would
primarily define the short register indices (like `UESP`). While they are required by the
i386 psABI, they are only required in `<ucontext.h>` and could previously leak into
unsuspecting user code, polluting the namespace and requiring elaborate workarounds
like that in `llvm/include/llvm/Support/Solaris/sys/regset.h`. The change fixed that by restricting
the definition of the short forms appropriately, at the same time defining all `REG_` prefixed
forms for compatiblity with other systems.  This exposed a bug in `compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp`, however:
Previously, the index for the user stack pointer would be hardcoded if `REG_ESP`
wasn't defined. Now with that definition present, it turned out that `REG_ESP` was the wrong index to use: the previous value 17 (and `REG_SP`) corresponds to `REG_UESP`
instead.

With that change, the failures are all gone.

Tested on `amd-pc-solaris2.11`.

Differential Revision: https://reviews.llvm.org/D83664
---
 compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
index 91caa6a35693b..470f4b70f0592 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux.cpp
@@ -2042,13 +2042,13 @@ static void GetPcSpBp(void *context, uptr *pc, uptr *sp, uptr *bp) {
 # ifndef REG_EBP
 #  define REG_EBP  6 // REG_FP
 # endif
-# ifndef REG_ESP
-#  define REG_ESP 17 // REG_SP
+# ifndef REG_UESP
+#  define REG_UESP 17 // REG_SP
 # endif
 # endif
   *pc = ucontext->uc_mcontext.gregs[REG_EIP];
   *bp = ucontext->uc_mcontext.gregs[REG_EBP];
-  *sp = ucontext->uc_mcontext.gregs[REG_ESP];
+  *sp = ucontext->uc_mcontext.gregs[REG_UESP];
 # endif
 #elif defined(__powerpc__) || defined(__powerpc64__)
   ucontext_t *ucontext = (ucontext_t*)context;

From b539f01221486bbdba0de436ea4e235ccd3f195e Mon Sep 17 00:00:00 2001
From: Xing GUO <higuoxing@gmail.com>
Date: Tue, 14 Jul 2020 19:09:58 +0800
Subject: [PATCH 233/771] [DWARFYAML] Add support for emitting value forms of
 strx, addrx, etc.

This patch adds support for emitting value forms of DW_FORM_strx,
DW_FORM_addrx, DW_FORM_loclistx and DW_FORM_rnglistx.

Reviewed By: jhenderson

Differential Revision: https://reviews.llvm.org/D83749
---
 llvm/lib/ObjectYAML/DWARFVisitor.cpp          |   6 +
 .../tools/yaml2obj/ELF/DWARF/debug-info.yaml  | 144 ++++++++++--------
 2 files changed, 84 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/ObjectYAML/DWARFVisitor.cpp b/llvm/lib/ObjectYAML/DWARFVisitor.cpp
index f478a1b84397f..a2dd37b5fe324 100644
--- a/llvm/lib/ObjectYAML/DWARFVisitor.cpp
+++ b/llvm/lib/ObjectYAML/DWARFVisitor.cpp
@@ -116,6 +116,12 @@ template <typename T> Error DWARFYAML::VisitorImpl<T>::traverseDebugInfo() {
                                 ""));
             break;
           }
+          case dwarf::DW_FORM_strx:
+          case dwarf::DW_FORM_addrx:
+          case dwarf::DW_FORM_rnglistx:
+          case dwarf::DW_FORM_loclistx:
+            onValue((uint64_t)FormVal->Value, /*LEB=*/true);
+            break;
           case dwarf::DW_FORM_data1:
           case dwarf::DW_FORM_ref1:
           case dwarf::DW_FORM_flag:
diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-info.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-info.yaml
index 896a9b4728870..177de3ee816ef 100644
--- a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-info.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-info.yaml
@@ -6,7 +6,7 @@
 
 # RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2LSB %s -o %t1.le.o
 # RUN: llvm-readobj --sections --section-data %t1.le.o | \
-# RUN:   FileCheck -DINDEX=2 -DNAME=15 -DOFFSET=0x9B -DSIZE=171 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-LE-CONTENT
+# RUN:   FileCheck -DINDEX=2 -DNAME=15 -DOFFSET=0x9B -DSIZE=179 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-LE-CONTENT
 
 #                    SHDR: Index: [[INDEX]]
 #               SHDR-NEXT: Name: .debug_info ([[NAME]])
@@ -67,39 +67,45 @@
 ##                                                     ^--------       Form: DW_FORM_sec_offset (4-byte)
 ##                                                              ^-     Form: DW_FORM_exprloc size (ULEB128) 0x02
 ##                                                                ^-^- Form: DW_FORM_exprloc body (2-byte)
-# DWARF32-LE-CONTENT-NEXT:   0060: 78563412 78563412 78563412 F0DEBC9A
-##                                 ^-------                            Form: DW_FORM_ref_sup4 (4-byte)
-##                                          ^-------                   Form: DW_FORM_strp_sup (4-byte)
-##                                                   ^-------          Form: DW_FORM_line_strp (4-byte)
-##                                                            ^------- Form: DW_FORM_ref_sig8 (8-byte)
-# DWARF32-LE-CONTENT-NEXT:   0070: 78563412 F0DEBC9A 78563412 12341278
+# DWARF32-LE-CONTENT-NEXT:   0060: B424B424 78563412 78563412 78563412
+##                                 ^---                                Form: DW_FORM_strx (ULEB128)
+##                                     ^---                            Form: DW_FORM_addrx (ULEB128)
+##                                          ^-------                   Form: DW_FORM_ref_sup4 (4-byte)
+##                                                   ^-------          Form: DW_FORM_strp_sup (4-byte)
+##                                                            ^------- Form: DW_FORM_line_strp (4-byte)
+# DWARF32-LE-CONTENT-NEXT:   0070: F0DEBC9A 78563412 B424B424 F0DEBC9A
+##                                 ^----------------                   Form: DW_FORM_ref_sig8 (8-byte)
+##                                                   ^---              Form: DW_FORM_loclistx (ULEB128)
+##                                                       ^---          Form: DW_FORM_rnglistx (ULEB128)
+##                                                            ^------- Form: DW_FORM_ref_sup8 (8-byte)
+# DWARF32-LE-CONTENT-NEXT:   0080: 78563412 12341278 56341212 34127856
 ##                                 --------
-##                                          ^----------------          Form: DW_FORM_ref_sup8 (8-byte)
-##                                                            ^-       Form: DW_FORM_strx1 (1-byte)
-##                                                              ^---   Form: DW_FORM_strx2 (2-byte)
-##                                                                  ^- Form: DW_FORM_strx4 (4-byte)
-# DWARF32-LE-CONTENT-NEXT:   0080: 56341212 34127856 34123412 00000500
+##                                          ^-                         Form: DW_FORM_strx1 (1-byte)
+##                                            ^---                     Form: DW_FORM_strx2 (2-byte)
+##                                                ^--------            Form: DW_FORM_strx4 (4-byte)
+##                                                         ^-          Form: DW_FORM_addrx1 (1-byte)
+##                                                            ^---     Form: DW_FORM_addrx2 (1-byte)
+##                                                                ^--- Form: DW_FORM_addrx4 (4-byte)
+# DWARF32-LE-CONTENT-NEXT:   0090: 34123412 00000500 01043412 00000078
+##                                 ----
+##                                     ^--------                       unit_length (4-byte)
+##                                              ^---                   version (2-byte)
+##                                                   ^-                unit_type (1-byte)
+##                                                     ^-              address_size (1-byte)
+##                                                       ^--------     debug_abbrev_offset (4-byte)
+##                                                                ^-   abbrev code (ULEB128) 0x00
+##                                                                  ^- unit_length (4-byte)
+# DWARF32-LE-CONTENT-NEXT:   00A0: 56000004 00785600 00040178 56341202
 ##                                 ------
-##                                       ^-                            Form: DW_FORM_addrx1 (1-byte)
-##                                          ^---                       From: DW_FORM_addrx2 (2-byte)
-##                                              ^--------              Form: DW_FORM_addrx4 (4-byte)
-##                                                       ^--------     unit_length (4-byte)
-##                                                                ^--- version (2-byte)
-# DWARF32-LE-CONTENT-NEXT:   0090: 01043412 00000078 56000004 00785600
-##                                 ^-                                  unit_type (1-byte) DW_UT_compile
-##                                   ^-                                address_size (4-byte)
-##                                     ^--------                       debug_abbrev_offset (4-byte)
-##                                              ^--------              unit_length (4-byte)
-##                                                       ^---          version (2-byte)
-##                                                            ^-       abbreviation code (ULEB128)
-##                                                              ^----- debug_abbrev_offset (4-byte)
-# DWARF32-LE-CONTENT-NEXT:   00A0: 00040178 56341202 001234
+##                                       ^----                         version (2-byte)
+##                                            ^--------                debug_abbrev_offset (4-byte)
+##                                                     ^-              address_size (1-byte)
+##                                                       ^-            abbrev code (ULEB128) 0x01
+##                                                         ^--------   Form: DW_FORM_addr
+##                                                                  ^- Form: DW_FORM_block2 size (2-byte)
+# DWARF32-LE-CONTENT-NEXT:   00B0: 001234
 ##                                 --
-##                                   ^-                                  address_size (1-byte)
-##                                     ^-                                abbreviation code (ULEB128) 0x01
-##                                       ^--------                       Form: DW_FORM_addr (4-byte)
-##                                                ^----                  Form: DW_FORM_block2 size (2-byte)
-##                                                     ^-^-              Form: DW_FORM_block2 body (2-byte)
+##                                   ^-^-                              Form: DW_FORM_block2 body (2-byte)
 # DWARF32-LE-CONTENT-NEXT: )
 
 --- !ELF
@@ -242,16 +248,16 @@ DWARF:
             - BlockData:                ## DW_FORM_exprloc
                 - 0x12
                 - 0x34
-            - Value: 0x1234             ## DW_FORM_strx (unimplemented)
-            - Value: 0x1234             ## DW_FORM_addrx (unimplemented)
+            - Value: 0x1234             ## DW_FORM_strx
+            - Value: 0x1234             ## DW_FORM_addrx
             - Value: 0x12345678         ## DW_FORM_ref_sup4
             - Value: 0x12345678         ## DW_FORM_strp_sup
             - Value: 0x1234             ## DW_FORM_data16 (unimplemented)
             - Value: 0x12345678         ## DW_FORM_line_strp
             - Value: 0x123456789abcdef0 ## DW_FORM_ref_sig8
             - Value: 0x1234             ## DW_FORM_implicit_const (unimplemented)
-            - Value: 0x1234             ## DW_FORM_loclistx (unimplemented)
-            - Value: 0x1234             ## DW_FORM_rnglistx (unimplemented)
+            - Value: 0x1234             ## DW_FORM_loclistx
+            - Value: 0x1234             ## DW_FORM_rnglistx
             - Value: 0x123456789abcdef0 ## DW_FORM_ref_sup8
             - Value: 0x12               ## DW_FORM_strx1
             - Value: 0x1234             ## DW_FORM_strx2
@@ -287,7 +293,7 @@ DWARF:
 
 # RUN: yaml2obj --docnum=1 -DENDIAN=ELFDATA2MSB %s -o %t1.be.o
 # RUN: llvm-readobj --sections --section-data %t1.be.o | \
-# RUN:   FileCheck -DINDEX=2 -DNAME=15 -DOFFSET=0x9B -DSIZE=171 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-BE-CONTENT
+# RUN:   FileCheck -DINDEX=2 -DNAME=15 -DOFFSET=0x9B -DSIZE=179 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF32-BE-CONTENT
 
 # DWARF32-BE-CONTENT-NEXT: SectionData (
 # DWARF32-BE-CONTENT-NEXT:   0000: 00001234 00050204 00001234 01123456
@@ -336,39 +342,45 @@ DWARF:
 ##                                                     ^--------       Form: DW_FORM_sec_offset (4-byte)
 ##                                                              ^-     Form: DW_FORM_exprloc size (ULEB128) 0x02
 ##                                                                ^-^- Form: DW_FORM_exprloc body (2-byte)
-# DWARF32-BE-CONTENT-NEXT:   0060: 12345678 12345678 12345678 12345678
-##                                 ^-------                            Form: DW_FORM_ref_sup4 (4-byte)
-##                                          ^-------                   Form: DW_FORM_strp_sup (4-byte)
-##                                                   ^-------          Form: DW_FORM_line_strp (4-byte)
-##                                                            ^------- Form: DW_FORM_ref_sig8 (8-byte)
-# DWARF32-BE-CONTENT-NEXT:   0070: 9ABCDEF0 12345678 9ABCDEF0 12123412
+# DWARF32-BE-CONTENT-NEXT:   0060: B424B424 12345678 12345678 12345678
+##                                 ^---                                Form: DW_FORM_strx (ULEB128)
+##                                     ^---                            Form: DW_FORM_addrx (ULEB128)
+##                                          ^-------                   Form: DW_FORM_ref_sup4 (4-byte)
+##                                                   ^-------          Form: DW_FORM_strp_sup (4-byte)
+##                                                            ^------- Form: DW_FORM_line_strp (4-byte)
+# DWARF32-BE-CONTENT-NEXT:   0070: 12345678 9ABCDEF0 B424B424 12345678
+##                                 ^----------------                   Form: DW_FORM_ref_sig8 (8-byte)
+##                                                   ^---              Form: DW_FORM_loclistx (ULEB128)
+##                                                       ^---          Form: DW_FORM_rnglistx (ULEB128)
+##                                                            ^------- Form: DW_FORM_ref_sup8 (8-byte)
+# DWARF32-BE-CONTENT-NEXT:   0080: 9ABCDEF0 12123412 34567812 12341234
 ##                                 --------
-##                                          ^----------------          Form: DW_FORM_ref_sup8 (8-byte)
-##                                                            ^-       Form: DW_FORM_strx1 (1-byte)
-##                                                              ^---   Form: DW_FORM_strx2 (2-byte)
-##                                                                  ^- Form: DW_FORM_strx4 (4-byte)
-# DWARF32-BE-CONTENT-NEXT:   0080: 34567812 12341234 56780000 12340005
+##                                          ^-                         Form: DW_FORM_strx1 (1-byte)
+##                                            ^---                     Form: DW_FORM_strx2 (2-byte)
+##                                                ^--------            Form: DW_FORM_strx4 (4-byte)
+##                                                         ^-          Form: DW_FORM_addrx1 (1-byte)
+##                                                            ^---     Form: DW_FORM_addrx2 (1-byte)
+##                                                                ^--- Form: DW_FORM_addrx4 (4-byte)
+# DWARF32-BE-CONTENT-NEXT:   0090: 56780000 12340005 01040000 12340000
+##                                 ----
+##                                     ^--------                       unit_length (4-byte)
+##                                              ^---                   version (2-byte)
+##                                                   ^-                unit_type (1-byte)
+##                                                     ^-              address_size (1-byte)
+##                                                       ^--------     debug_abbrev_offset (4-byte)
+##                                                                ^-   abbrev code (ULEB128) 0x00
+##                                                                  ^- unit_length (4-byte)
+# DWARF32-BE-CONTENT-NEXT:   00A0: 00567800 04000056 78040112 34567800
 ##                                 ------
-##                                       ^-                            Form: DW_FORM_addrx1 (1-byte)
-##                                          ^---                       From: DW_FORM_addrx2 (2-byte)
-##                                              ^--------              Form: DW_FORM_addrx4 (4-byte)
-##                                                       ^--------     unit_length (4-byte)
-##                                                                ^--- version (2-byte)
-# DWARF32-BE-CONTENT-NEXT:   0090: 01040000 12340000 00567800 04000056
-##                                 ^-                                  unit_type (1-byte) DW_UT_compile
-##                                   ^-                                address_size (4-byte)
-##                                     ^--------                       debug_abbrev_offset (4-byte)
-##                                              ^--------              unit_length (4-byte)
-##                                                       ^---          version (2-byte)
-##                                                            ^-       abbreviation code (ULEB128)
-##                                                              ^----- debug_abbrev_offset (4-byte)
-# DWARF32-BE-CONTENT-NEXT:   00A0: 78040112 34567800 021234
+##                                       ^----                         version (2-byte)
+##                                            ^--------                debug_abbrev_offset (4-byte)
+##                                                     ^-              address_size (1-byte)
+##                                                       ^-            abbrev code (ULEB128) 0x01
+##                                                         ^--------   Form: DW_FORM_addr
+##                                                                  ^- Form: DW_FORM_block2 size (2-byte)
+# DWARF32-BE-CONTENT-NEXT:   00B0: 021234
 ##                                 --
-##                                   ^-                                  address_size (1-byte)
-##                                     ^-                                abbreviation code (ULEB128) 0x01
-##                                       ^--------                       Form: DW_FORM_addr (4-byte)
-##                                                ^----                  Form: DW_FORM_block2 size (2-byte)
-##                                                     ^-^-              Form: DW_FORM_block2 body (2-byte)
+##                                   ^-^-                              Form: DW_FORM_block2 body (2-byte)
 # DWARF32-BE-CONTENT-NEXT: )
 
 ## b) Generate the .debug_info section from raw section content.

From 3cdbacc46422b8ed6dc6fb8aaec3dfb58451460f Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Tue, 14 Jul 2020 13:08:52 +0200
Subject: [PATCH 234/771] [lldb/test] Avoid globbing in log file handling code

The glob expression for a test called "test" could match a log file for
a the test "test_foo". Instead of globbing, maintain an explicit list of
log files relevant to the current test.
---
 .../Python/lldbsuite/test/lldbtest.py         | 34 +++++++++++--------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py
index 29561d4794beb..ebef896d12b63 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbtest.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py
@@ -576,6 +576,7 @@ def enableLogChannelsForCurrentTest(self):
         # confirm that the file is writeable
         host_log_path = "{}-host.log".format(log_basename)
         open(host_log_path, 'w').close()
+        self.log_files.append(host_log_path)
 
         log_enable = "log enable -Tpn -f {} ".format(host_log_path)
         for channel_with_categories in lldbtest_config.channels:
@@ -602,6 +603,7 @@ def enableLogChannelsForCurrentTest(self):
         if lldb.remote_platform is None:
             server_log_path = "{}-server.log".format(log_basename)
             open(server_log_path, 'w').close()
+            self.log_files.append(server_log_path)
             os.environ["LLDB_DEBUGSERVER_LOG_FILE"] = server_log_path
 
             # Communicate channels to lldb-server
@@ -623,12 +625,13 @@ def disableLogChannelsForCurrentTest(self):
         # Retrieve the server log (if any) from the remote system. It is assumed the server log
         # is writing to the "server.log" file in the current test directory. This can be
         # achieved by setting LLDB_DEBUGSERVER_LOG_FILE="server.log" when starting remote
-        # platform. If the remote logging is not enabled, then just let the Get() command silently
-        # fail.
+        # platform.
         if lldb.remote_platform:
-            lldb.remote_platform.Get(
-                lldb.SBFileSpec("server.log"), lldb.SBFileSpec(
-                    self.getLogBasenameForCurrentTest() + "-server.log"))
+            server_log_path = self.getLogBasenameForCurrentTest() + "-server.log"
+            if lldb.remote_platform.Get(
+                lldb.SBFileSpec("server.log"),
+                lldb.SBFileSpec(server_log_path)).Success():
+                self.log_files.append(server_log_path)
 
     def setPlatformWorkingDir(self):
         if not lldb.remote_platform or not configuration.lldb_platform_working_dir:
@@ -800,11 +803,12 @@ def setUp(self):
         # List of forked process PIDs
         self.forkedProcessPids = []
 
-        # Create a string buffer to record the session info, to be dumped into a
-        # test case specific file if test failure is encountered.
-        self.log_basename = self.getLogBasenameForCurrentTest()
+        # List of log files produced by the current test.
+        self.log_files = []
+
+        session_file = self.getLogBasenameForCurrentTest()+".log"
+        self.log_files.append(session_file)
 
-        session_file = "{}.log".format(self.log_basename)
         # Python 3 doesn't support unbuffered I/O in text mode.  Open buffered.
         self.session = encoded_file.open(session_file, "utf-8", mode="w")
 
@@ -1218,14 +1222,13 @@ def dumpSessionInfo(self):
         del self.session
 
         # process the log files
-        log_files_for_this_test = glob.glob(self.log_basename + "*")
-
         if prefix != 'Success' or lldbtest_config.log_success:
             # keep all log files, rename them to include prefix
+            src_log_basename = self.getLogBasenameForCurrentTest(None)
             dst_log_basename = self.getLogBasenameForCurrentTest(prefix)
-            for src in log_files_for_this_test:
+            for src in self.log_files:
                 if os.path.isfile(src):
-                    dst = src.replace(self.log_basename, dst_log_basename)
+                    dst = src.replace(src_log_basename, dst_log_basename)
                     if os.name == "nt" and os.path.isfile(dst):
                         # On Windows, renaming a -> b will throw an exception if
                         # b exists.  On non-Windows platforms it silently
@@ -1239,8 +1242,9 @@ def dumpSessionInfo(self):
                     os.rename(src, dst)
         else:
             # success!  (and we don't want log files) delete log files
-            for log_file in log_files_for_this_test:
-                remove_file(log_file)
+            for log_file in self.log_files:
+                if os.path.isfile(log_file):
+                    remove_file(log_file)
 
     # ====================================================
     # Config. methods supported through a plugin interface

From 6e198aae1d65906535aca3f5b4efbfe5b5cfa92c Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Tue, 14 Jul 2020 10:18:01 +0000
Subject: [PATCH 235/771] [SelectionDAG] Prevent warnings when extracting fixed
 length vector from scalable.

ComputeNumSignBits and computeKnownBits both trigger "Scalable flag
may be dropped" warnings when a fixed length vector is extracted
from a scalable vector.  This patch assumes nothing about the
demanded elements thus matching the behaviour when extracting a
scalable vector from a scalable vector.

Differential Revision: https://reviews.llvm.org/D83642
---
 .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp |  6 ++++++
 .../AArch64/sve-fixed-length-subvector.ll     | 20 ++++++++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 8065091208694..592c09c10fb08 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -2718,6 +2718,9 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
   case ISD::EXTRACT_SUBVECTOR: {
     // Offset the demanded elts by the subvector index.
     SDValue Src = Op.getOperand(0);
+    // Bail until we can represent demanded elements for scalable vectors.
+    if (Src.getValueType().isScalableVector())
+      break;
     uint64_t Idx = Op.getConstantOperandVal(1);
     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
@@ -3973,6 +3976,9 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, const APInt &DemandedElts,
   case ISD::EXTRACT_SUBVECTOR: {
     // Offset the demanded elts by the subvector index.
     SDValue Src = Op.getOperand(0);
+    // Bail until we can represent demanded elements for scalable vectors.
+    if (Src.getValueType().isScalableVector())
+      break;
     uint64_t Idx = Op.getConstantOperandVal(1);
     unsigned NumSrcElts = Src.getValueType().getVectorNumElements();
     APInt DemandedSrcElts = DemandedElts.zextOrSelf(NumSrcElts).shl(Idx);
diff --git a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
index 45ebdc78784e4..9fe2b86402f15 100644
--- a/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fixed-length-subvector.ll
@@ -13,7 +13,10 @@
 ; RUN: llc -aarch64-sve-vector-bits-min=1664 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
 ; RUN: llc -aarch64-sve-vector-bits-min=1792 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
 ; RUN: llc -aarch64-sve-vector-bits-min=1920 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024
-; RUN: llc -aarch64-sve-vector-bits-min=2048 -aarch64-enable-atomic-cfg-tidy=false < %s | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
+; RUN: llc -aarch64-sve-vector-bits-min=2048 -aarch64-enable-atomic-cfg-tidy=false < %s 2>%t | FileCheck %s -check-prefixes=CHECK,VBITS_GE_512,VBITS_GE_1024,VBITS_GE_2048
+; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t
+
+; WARN-NOT: warning
 
 ; Test we can code generater patterns of the form:
 ;   fixed_length_vector = ISD::EXTRACT_SUBVECTOR scalable_vector, 0
@@ -85,4 +88,19 @@ bb1:
   ret void
 }
 
+;
+define <8 x i1> @no_warn_dropped_scalable(<8 x i32>* %in) #0 {
+; CHECK-LABEL: no_warn_dropped_scalable:
+; CHECK: ptrue [[PG:p[0-9]+]].s, vl8
+; CHECK: ld1w { z{{[0-9]+}}.s }, [[PG]]/z, [x0]
+; CHECK-COUNT-8: cmp w{{[0-9]+}}, #0
+; CHECK: ret
+  %a = load <8 x i32>, <8 x i32>* %in
+  br label %bb1
+
+bb1:
+  %cond = icmp sgt <8 x i32> %a, zeroinitializer
+  ret <8 x i1> %cond
+}
+
 attributes #0 = { "target-features"="+sve" }

From 7ef17638d53bf477612bbb1a615bfb41a07f94fa Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Fri, 10 Jul 2020 15:54:00 +0300
Subject: [PATCH 236/771] [llvm-readobj] - Stop using unwrapOrError() for all
 program_headers() calls.

program_headers() returns the list of program headers. This change allows
to continue attempt of dumping when something is wrong with program headers.

Differential revision: https://reviews.llvm.org/D83554
---
 llvm/test/Object/invalid.test                 |  10 +-
 .../tools/llvm-readobj/ELF/dynamic-tags.test  | 160 +++++++++++++++++-
 .../tools/llvm-readobj/ELF/gnu-notes.test     |  23 ++-
 .../tools/llvm-readobj/ELF/gnu-phdrs.test     |  27 +++
 .../llvm-readobj/ELF/gnu-section-mapping.test |  19 ++-
 .../llvm-readobj/ELF/program-headers.test     |  24 +++
 .../tools/yaml2obj/ELF/header-sh-fields.yaml  |   4 +-
 llvm/tools/llvm-readobj/ELFDumper.cpp         |  72 ++++++--
 8 files changed, 303 insertions(+), 36 deletions(-)

diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test
index 3a7ddab8d043a..4751431028725 100644
--- a/llvm/test/Object/invalid.test
+++ b/llvm/test/Object/invalid.test
@@ -228,12 +228,12 @@ Sections:
     Link: .symtab
 Symbols: []
 
-## Check that llvm-readobj reports an error if the e_phentsize field is broken.
+## Check that llvm-readobj reports a warning when the e_phentsize field is broken.
 
-# RUN: not llvm-readobj --program-headers %p/Inputs/invalid-e_shnum.elf 2>&1 | \
+# RUN: llvm-readobj --program-headers %p/Inputs/invalid-e_shnum.elf 2>&1 | \
 # RUN:  FileCheck -DFILE=%p/Inputs/invalid-e_shnum.elf --check-prefix=INVALID-PH-ENTSIZE %s
 
-# INVALID-PH-ENTSIZE: error: '[[FILE]]': invalid e_phentsize: 12336
+# INVALID-PH-ENTSIZE: warning: '[[FILE]]': unable to dump program headers: invalid e_phentsize: 12336
 
 ## Check that llvm-readobj reports a warning when we have no SHT_SYMTAB_SHNDX section,
 ## but have a symbol referencing it.
@@ -409,10 +409,10 @@ DynamicSymbols:
 ## ELF header contains e_phentsize field with a value != sizeof(Elf_Phdr).
 ## Check llvm-readobj reports it.
 
-# RUN: not llvm-readobj -l %p/Inputs/corrupt-invalid-phentsize.elf.x86-64 2>&1 \
+# RUN: llvm-readobj -l %p/Inputs/corrupt-invalid-phentsize.elf.x86-64 2>&1 \
 # RUN:   | FileCheck -DFILE=%p/Inputs/corrupt-invalid-phentsize.elf.x86-64 --check-prefix=PHENTSIZE %s
 
-# PHENTSIZE: error: '[[FILE]]': invalid e_phentsize: 57
+# PHENTSIZE: warning: '[[FILE]]': unable to read program headers to locate the PT_DYNAMIC segment: invalid e_phentsize: 57
 
 ## The dynamic table contains DT_STRTAB with a value that is not in any loadable segment.
 ## Check llvm-readobj reports it.
diff --git a/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test b/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test
index c7bd551eb754b..5f8b709cef5b6 100644
--- a/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test
+++ b/llvm/test/tools/llvm-readobj/ELF/dynamic-tags.test
@@ -140,10 +140,11 @@
 
 --- !ELF
 FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_EXEC
-  Machine: EM_X86_64
+  Class:      ELFCLASS64
+  Data:       ELFDATA2LSB
+  Type:       ET_EXEC
+  Machine:    EM_X86_64
+  EPhEntSize: [[PHENTSIZE=56]]
 Sections:
   - Name:    .dynstr
     Type:    SHT_STRTAB
@@ -637,3 +638,154 @@ Sections:
        Value: 0x1
      - Tag:   DT_NULL
        Value: 0x0
+
+## Check how we dump dynamic tags when we are unable to read program headers.
+# RUN: yaml2obj --docnum=1 -DPHENTSIZE=1 %s -o %t-phentsize-err
+# RUN: llvm-readobj --dynamic-table %t-phentsize-err 2>&1 | \
+# RUN:   FileCheck %s --implicit-check-not=warning: -DFILE=%t-phentsize-err \
+# RUN:     --check-prefixes=PHENTSIZE-WARN,PHENTSIZE-LLVM
+# RUN: llvm-readelf --dynamic-table %t-phentsize-err 2>&1 | \
+# RUN:   FileCheck %s --implicit-check-not=warning: -DFILE=%t-phentsize-err \
+# RUN:     --check-prefixes=PHENTSIZE-WARN,PHENTSIZE-GNU
+
+# PHENTSIZE-WARN:      warning: '[[FILE]]': unable to read program headers to locate the PT_DYNAMIC segment: invalid e_phentsize: 1
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': Unable to parse DT_HASH: invalid e_phentsize: 1
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': Unable to parse DT_STRTAB: invalid e_phentsize: 1
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': Unable to parse DT_SYMTAB: invalid e_phentsize: 1
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': Unable to parse DT_RELA: invalid e_phentsize: 1
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': DT_SYMENT value of 0x987 is not the size of a symbol (0x18)
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': Unable to parse DT_REL: invalid e_phentsize: 1
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': Unable to parse DT_JMPREL: invalid e_phentsize: 1
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': Unable to parse DT_RELR: invalid e_phentsize: 1
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': Unable to parse DT_ANDROID_RELR: invalid e_phentsize: 1
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': Unable to parse DT_GNU_HASH: invalid e_phentsize: 1
+# PHENTSIZE-WARN-NEXT: warning: '[[FILE]]': string table was not found
+
+# PHENTSIZE-LLVM:      DynamicSection [ (61 entries)
+# PHENTSIZE-LLVM-NEXT:   Tag                Type                 Name/Value
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000001 NEEDED               Shared library: [<?>]
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000002 PLTRELSZ             16 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000003 PLTGOT               0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000004 HASH                 0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000005 STRTAB               0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000006 SYMTAB               0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000007 RELA                 0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000008 RELASZ               16 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000009 RELAENT              1929 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x000000000000000A STRSZ                16 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x000000000000000B SYMENT               2439 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x000000000000000C INIT                 0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000000000000D FINI                 0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000000000000E SONAME               Library soname: [<?>]
+# PHENTSIZE-LLVM-NEXT:   0x000000000000000F RPATH                Library rpath: [<?>]
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000010 SYMBOLIC             0x1234567890ABCDEF
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000011 REL                  0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000012 RELSZ                16 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000013 RELENT               291 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000014 PLTREL               RELA
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000015 DEBUG                0xFEDCBA0987654321
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000016 TEXTREL              0x1122334455667788
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000017 JMPREL               0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000018 BIND_NOW             0x8877665544332211
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000019 INIT_ARRAY           0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000000000001A FINI_ARRAY           0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000000000001B INIT_ARRAYSZ         16 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x000000000000001C FINI_ARRAYSZ         16 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x000000000000001D RUNPATH              Library runpath: [<?>]
+# PHENTSIZE-LLVM-NEXT:   0x000000000000001E FLAGS                ORIGIN SYMBOLIC TEXTREL BIND_NOW STATIC_TLS
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000020 PREINIT_ARRAY        0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000021 PREINIT_ARRAYSZ      16 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000022 SYMTAB_SHNDX         0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000023 RELRSZ               0x10
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000024 RELR                 0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000025 RELRENT              0x4321
+# PHENTSIZE-LLVM-NEXT:   0x000000006000000F ANDROID_REL          0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000060000010 ANDROID_RELSZ        16 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x0000000060000011 ANDROID_RELA         0x1000
+# PHENTSIZE-LLVM-NEXT:   0x0000000060000012 ANDROID_RELASZ       16 (bytes)
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFE000 ANDROID_RELR         0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFE001 ANDROID_RELRSZ       0x10
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFE003 ANDROID_RELRENT      0x1234
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFEF5 GNU_HASH             0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFEF6 TLSDESC_PLT          0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFEF7 TLSDESC_GOT          0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFFF9 RELACOUNT            0
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFFFA RELCOUNT             0
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFFFB FLAGS_1              NOW GLOBAL GROUP NODELETE LOADFLTR INITFIRST NOOPEN ORIGIN DIRECT TRANS INTERPOSE NODEFLIB NODUMP CONFALT ENDFILTEE DISPRELDNE DISPRELPND NODIRECT IGNMULDEF NOKSYMS NOHDR EDITED NORELOC SYMINTPOSE GLOBAUDIT SINGLETON PIE {{$}}
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFFF0 VERSYM               0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFFFC VERDEF               0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFFFD VERDEFNUM            0
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFFFE VERNEED              0x1000
+# PHENTSIZE-LLVM-NEXT:   0x000000006FFFFFFF VERNEEDNUM           0
+# PHENTSIZE-LLVM-NEXT:   0x000000007FFFFFFD AUXILIARY            Auxiliary library: [<?>]
+# PHENTSIZE-LLVM-NEXT:   0x000000007FFFFFFE USED                 Not needed object: [<?>]
+# PHENTSIZE-LLVM-NEXT:   0x000000007FFFFFFF FILTER               Filter library: [<?>]
+# PHENTSIZE-LLVM-NEXT:   0x0000000012345678 <unknown:>0x12345678 0x8765432187654321
+# PHENTSIZE-LLVM-NEXT:   0x000000006ABCDEF0 <unknown:>0x6abcdef0 0x9988776655443322
+# PHENTSIZE-LLVM-NEXT:   0x0000000076543210 <unknown:>0x76543210 0x5555666677778888
+# PHENTSIZE-LLVM-NEXT:   0x0000000000000000 NULL                 0x0
+# PHENTSIZE-LLVM-NEXT: ]
+
+# PHENTSIZE-GNU:      Dynamic section at offset 0xc0 contains 61 entries:
+# PHENTSIZE-GNU-NEXT:   Tag                Type                   Name/Value
+# PHENTSIZE-GNU-NEXT:   0x0000000000000001 (NEEDED)               Shared library: [<?>]
+# PHENTSIZE-GNU-NEXT:   0x0000000000000002 (PLTRELSZ)             16 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x0000000000000003 (PLTGOT)               0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000000000004 (HASH)                 0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000000000005 (STRTAB)               0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000000000006 (SYMTAB)               0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000000000007 (RELA)                 0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000000000008 (RELASZ)               16 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x0000000000000009 (RELAENT)              1929 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x000000000000000a (STRSZ)                16 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x000000000000000b (SYMENT)               2439 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x000000000000000c (INIT)                 0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000000000000d (FINI)                 0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000000000000e (SONAME)               Library soname: [<?>]
+# PHENTSIZE-GNU-NEXT:   0x000000000000000f (RPATH)                Library rpath: [<?>]
+# PHENTSIZE-GNU-NEXT:   0x0000000000000010 (SYMBOLIC)             0x1234567890abcdef
+# PHENTSIZE-GNU-NEXT:   0x0000000000000011 (REL)                  0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000000000012 (RELSZ)                16 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x0000000000000013 (RELENT)               291 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x0000000000000014 (PLTREL)               RELA
+# PHENTSIZE-GNU-NEXT:   0x0000000000000015 (DEBUG)                0xfedcba0987654321
+# PHENTSIZE-GNU-NEXT:   0x0000000000000016 (TEXTREL)              0x1122334455667788
+# PHENTSIZE-GNU-NEXT:   0x0000000000000017 (JMPREL)               0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000000000018 (BIND_NOW)             0x8877665544332211
+# PHENTSIZE-GNU-NEXT:   0x0000000000000019 (INIT_ARRAY)           0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000000000001a (FINI_ARRAY)           0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000000000001b (INIT_ARRAYSZ)         16 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x000000000000001c (FINI_ARRAYSZ)         16 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x000000000000001d (RUNPATH)              Library runpath: [<?>]
+# PHENTSIZE-GNU-NEXT:   0x000000000000001e (FLAGS)                ORIGIN SYMBOLIC TEXTREL BIND_NOW STATIC_TLS
+# PHENTSIZE-GNU-NEXT:   0x0000000000000020 (PREINIT_ARRAY)        0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000000000021 (PREINIT_ARRAYSZ)      16 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x0000000000000022 (SYMTAB_SHNDX)         0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000000000023 (RELRSZ)               0x10
+# PHENTSIZE-GNU-NEXT:   0x0000000000000024 (RELR)                 0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000000000025 (RELRENT)              0x4321
+# PHENTSIZE-GNU-NEXT:   0x000000006000000f (ANDROID_REL)          0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000060000010 (ANDROID_RELSZ)        16 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x0000000060000011 (ANDROID_RELA)         0x1000
+# PHENTSIZE-GNU-NEXT:   0x0000000060000012 (ANDROID_RELASZ)       16 (bytes)
+# PHENTSIZE-GNU-NEXT:   0x000000006fffe000 (ANDROID_RELR)         0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000006fffe001 (ANDROID_RELRSZ)       0x10
+# PHENTSIZE-GNU-NEXT:   0x000000006fffe003 (ANDROID_RELRENT)      0x1234
+# PHENTSIZE-GNU-NEXT:   0x000000006ffffef5 (GNU_HASH)             0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000006ffffef6 (TLSDESC_PLT)          0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000006ffffef7 (TLSDESC_GOT)          0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000006ffffff9 (RELACOUNT)            0
+# PHENTSIZE-GNU-NEXT:   0x000000006ffffffa (RELCOUNT)             0
+# PHENTSIZE-GNU-NEXT:   0x000000006ffffffb (FLAGS_1)              NOW GLOBAL GROUP NODELETE LOADFLTR INITFIRST NOOPEN ORIGIN DIRECT TRANS INTERPOSE NODEFLIB NODUMP CONFALT ENDFILTEE DISPRELDNE DISPRELPND NODIRECT IGNMULDEF NOKSYMS NOHDR EDITED NORELOC SYMINTPOSE GLOBAUDIT SINGLETON PIE
+# PHENTSIZE-GNU-NEXT:   0x000000006ffffff0 (VERSYM)               0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000006ffffffc (VERDEF)               0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000006ffffffd (VERDEFNUM)            0
+# PHENTSIZE-GNU-NEXT:   0x000000006ffffffe (VERNEED)              0x1000
+# PHENTSIZE-GNU-NEXT:   0x000000006fffffff (VERNEEDNUM)           0
+# PHENTSIZE-GNU-NEXT:   0x000000007ffffffd (AUXILIARY)            Auxiliary library: [<?>]
+# PHENTSIZE-GNU-NEXT:   0x000000007ffffffe (USED)                 Not needed object: [<?>]
+# PHENTSIZE-GNU-NEXT:   0x000000007fffffff (FILTER)               Filter library: [<?>]
+# PHENTSIZE-GNU-NEXT:   0x0000000012345678 (<unknown:>0x12345678) 0x8765432187654321
+# PHENTSIZE-GNU-NEXT:   0x000000006abcdef0 (<unknown:>0x6abcdef0) 0x9988776655443322
+# PHENTSIZE-GNU-NEXT:   0x0000000076543210 (<unknown:>0x76543210) 0x5555666677778888
+# PHENTSIZE-GNU-NEXT:   0x0000000000000000 (NULL)                 0x0
diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test b/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test
index 366d435e8e48b..b1ea1981511f7 100644
--- a/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test
+++ b/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test
@@ -83,10 +83,12 @@
 
 --- !ELF
 FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_EXEC
-  Machine: EM_X86_64
+  Class:      ELFCLASS64
+  Data:       ELFDATA2LSB
+  Type:       ET_EXEC
+  Machine:    EM_X86_64
+  EPhEntSize: [[PHENTSIZE=56]]
+  SHNum:      [[SHNUM=6]]
 Sections:
   - Name:         .note.ABI-tag
     Type:         SHT_NOTE
@@ -193,3 +195,16 @@ ProgramHeaders:
     FileSize: 0xffff0000
     Sections:
       - Section: .note
+
+## Check we report a warning when we are unable to locate the PT_NOTE
+## segment because of broken program headers.
+# RUN: yaml2obj --docnum=1 -DPHENTSIZE=1 -DSHNUM=0 %s -o %t6.so
+# RUN: llvm-readelf --notes %t6.so 2>&1 | FileCheck %s -DFILE=%t6.so --check-prefix=PHENTSIZE-WARN-GNU
+# RUN: llvm-readobj --notes %t6.so 2>&1 | FileCheck %s -DFILE=%t6.so --check-prefix=PHENTSIZE-WARN-LLVM
+
+# PHENTSIZE-WARN-GNU: warning: '[[FILE]]': unable to read program headers to locate the PT_DYNAMIC segment: invalid e_phentsize: 1
+# PHENTSIZE-WARN-GNU: warning: '[[FILE]]': unable to read program headers to locate the PT_NOTE segment: invalid e_phentsize: 1
+
+# PHENTSIZE-WARN-LLVM:      Notes [
+# PHENTSIZE-WARN-LLVM-NEXT: warning: '[[FILE]]': unable to read program headers to locate the PT_NOTE segment: invalid e_phentsize: 1
+# PHENTSIZE-WARN-LLVM-NEXT: ]
diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test b/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test
index cc3a38fb167b8..d1d071f10ac6f 100644
--- a/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test
+++ b/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test
@@ -354,3 +354,30 @@ ProgramHeaders:
 ## Case 5: an arbitrary large offset that goes past the end of the file.
   - Type:   PT_INTERP
     Offset: 0xAABBCCDDEEFF1122
+
+## Check we report a warning when we are unable to read program headers.
+# RUN: yaml2obj --docnum=3 %s -o %t.phdr.err
+# RUN: llvm-readelf --program-headers %t.phdr.err 2>&1 | \
+# RUN:   FileCheck %s -DFILE=%t.phdr.err --check-prefix=WARN-PHENTSIZE
+
+# WARN-PHENTSIZE:      Program Headers:
+# WARN-PHENTSIZE-NEXT:   Type Offset VirtAddr PhysAddr FileSiz  MemSiz Flg Align
+# WARN-PHENTSIZE-NEXT: warning: '[[FILE]]': unable to dump program headers: invalid e_phentsize: 1
+# WARN-PHENTSIZE:      Section to Segment mapping:
+# WARN-PHENTSIZE-NEXT:   Segment Sections...
+# WARN-PHENTSIZE-NEXT: warning: '[[FILE]]': can't read program headers to build section to segment mapping: invalid e_phentsize: 1
+
+--- !ELF
+FileHeader:
+  Class:      ELFCLASS64
+  Data:       ELFDATA2LSB
+  Type:       ET_EXEC
+  Machine:    EM_X86_64
+  EPhEntSize: 1
+Sections:
+  - Name: .foo
+    Type: SHT_PROGBITS
+ProgramHeaders:
+  - Type: PT_PHDR
+    Sections:
+      - Section: .foo
diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test b/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test
index ddbfdd5abd93f..e1c7181f15589 100644
--- a/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test
+++ b/llvm/test/tools/llvm-readobj/ELF/gnu-section-mapping.test
@@ -14,11 +14,11 @@
 
 --- !ELF
 FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_EXEC
-  Machine: EM_X86_64
-  Entry:   0x12345678
+  Class:      ELFCLASS64
+  Data:       ELFDATA2LSB
+  Type:       ET_EXEC
+  Machine:    EM_X86_64
+  EPhEntSize: [[PHENTSIZE=56]]
 Sections:
   - Name:    .foo.begin
     Type:    SHT_PROGBITS
@@ -74,3 +74,12 @@ ProgramHeaders:
 # NO-SECHDRS-NEXT:   00 {{$}}
 # NO-SECHDRS-NEXT:   01 {{$}}
 # NO-SECHDRS-NOT: {{.}}
+
+## Check we report a warning when we are unable to read program headers.
+# RUN: yaml2obj %s -DPHENTSIZE=1 -o %t64-err1.elf
+# RUN: llvm-readelf --section-mapping %t64-err1.elf 2>&1 | \
+# RUN:   FileCheck %s -DFILE=%t64-err1.elf --check-prefix=PHENTSIZE
+
+# PHENTSIZE:      Section to Segment mapping:
+# PHENTSIZE-NEXT:  Segment Sections...
+# PHENTSIZE-NEXT: warning: '[[FILE]]': can't read program headers to build section to segment mapping: invalid e_phentsize: 1
diff --git a/llvm/test/tools/llvm-readobj/ELF/program-headers.test b/llvm/test/tools/llvm-readobj/ELF/program-headers.test
index 8ab5f65c02ce7..04fd85b23c1be 100644
--- a/llvm/test/tools/llvm-readobj/ELF/program-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/program-headers.test
@@ -168,3 +168,27 @@ ProgramHeaders:
   - Type: 0x65a3dbe6 ## PT_OPENBSD_RANDOMIZE
   - Type: 0x65a3dbe7 ## PT_OPENBSD_WXNEEDED
   - Type: 0x65a41be6 ## PT_OPENBSD_BOOTDATA
+
+## Check we report a warning when we are unable to read program headers.
+# RUN: yaml2obj --docnum=2 %s -o %t.err
+# RUN: llvm-readobj --program-headers %t.err 2>&1 | \
+# RUN:   FileCheck %s -DFILE=%t.err --check-prefix=WARN-PHENTSIZE
+
+# WARN-PHENTSIZE:      ProgramHeaders [
+# WARN-PHENTSIZE-NEXT: warning: '[[FILE]]': unable to dump program headers: invalid e_phentsize: 1
+# WARN-PHENTSIZE-NEXT: ]
+
+--- !ELF
+FileHeader:
+  Class:      ELFCLASS64
+  Data:       ELFDATA2LSB
+  Type:       ET_EXEC
+  Machine:    EM_X86_64
+  EPhEntSize: 1
+Sections:
+  - Name: .foo
+    Type: SHT_PROGBITS
+ProgramHeaders:
+  - Type: PT_PHDR
+    Sections:
+      - Section: .foo
diff --git a/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml b/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
index f52e5731271f8..a6860546bdec8 100644
--- a/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
@@ -94,6 +94,6 @@ ProgramHeaders:
 
 ## Override the e_phentsize field.
 # RUN: yaml2obj --docnum=2 %s -DPHENTSIZE=1 -o %t8
-# RUN: not llvm-readelf --file-headers %t8 2>&1 | FileCheck %s --check-prefix=PHENTSIZE
+# RUN: llvm-readelf --file-headers %t8 | FileCheck %s --check-prefix=PHENTSIZE
 
-# PHENTSIZE: invalid e_phentsize: 1{{$}}
+# PHENTSIZE: Size of program headers: 1 (bytes)
diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp
index 56528d321de67..15076f1f89337 100644
--- a/llvm/tools/llvm-readobj/ELFDumper.cpp
+++ b/llvm/tools/llvm-readobj/ELFDumper.cpp
@@ -1901,12 +1901,17 @@ std::pair<const typename ELFT::Phdr *, const typename ELFT::Shdr *>
 ELFDumper<ELFT>::findDynamic(const ELFFile<ELFT> *Obj) {
   // Try to locate the PT_DYNAMIC header.
   const Elf_Phdr *DynamicPhdr = nullptr;
-  for (const Elf_Phdr &Phdr :
-       unwrapOrError(ObjF->getFileName(), Obj->program_headers())) {
-    if (Phdr.p_type != ELF::PT_DYNAMIC)
-      continue;
-    DynamicPhdr = &Phdr;
-    break;
+  if (Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers()) {
+    for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
+      if (Phdr.p_type != ELF::PT_DYNAMIC)
+        continue;
+      DynamicPhdr = &Phdr;
+      break;
+    }
+  } else {
+    this->reportUniqueWarning(createError(
+        "unable to read program headers to locate the PT_DYNAMIC segment: " +
+        toString(PhdrsOrErr.takeError())));
   }
 
   // Try to locate the .dynamic section in the sections header table.
@@ -4340,8 +4345,15 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
 
   unsigned Width = ELFT::Is64Bits ? 18 : 10;
   unsigned SizeWidth = ELFT::Is64Bits ? 8 : 7;
-  for (const auto &Phdr :
-       unwrapOrError(this->FileName, Obj->program_headers())) {
+
+  Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers();
+  if (!PhdrsOrErr) {
+    this->reportUniqueWarning(createError("unable to dump program headers: " +
+                                          toString(PhdrsOrErr.takeError())));
+    return;
+  }
+
+  for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
     Fields[0].Str = getElfPtType(Header->e_machine, Phdr.p_type);
     Fields[1].Str = to_string(format_hex(Phdr.p_offset, 8));
     Fields[2].Str = to_string(format_hex(Phdr.p_vaddr, Width));
@@ -4388,8 +4400,16 @@ void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
   OS << "\n Section to Segment mapping:\n  Segment Sections...\n";
   DenseSet<const Elf_Shdr *> BelongsToSegment;
   int Phnum = 0;
-  for (const Elf_Phdr &Phdr :
-       unwrapOrError(this->FileName, Obj->program_headers())) {
+
+  Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers();
+  if (!PhdrsOrErr) {
+    this->reportUniqueWarning(createError(
+        "can't read program headers to build section to segment mapping: " +
+        toString(PhdrsOrErr.takeError())));
+    return;
+  }
+
+  for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
     std::string Sections;
     OS << format("   %2.2d     ", Phnum++);
     // Check if each section is in a segment and then print mapping.
@@ -5506,8 +5526,15 @@ void GNUStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
         reportError(std::move(Err), this->FileName);
     }
   } else {
-    for (const auto &P :
-         unwrapOrError(this->FileName, Obj->program_headers())) {
+    Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers();
+    if (!PhdrsOrErr) {
+      this->reportUniqueWarning(createError(
+          "unable to read program headers to locate the PT_NOTE segment: " +
+          toString(PhdrsOrErr.takeError())));
+      return;
+    }
+
+    for (const Elf_Phdr &P : *PhdrsOrErr) {
       if (P.p_type != PT_NOTE)
         continue;
       PrintHeader(/*SecName=*/None, P.p_offset, P.p_filesz);
@@ -6523,8 +6550,14 @@ template <class ELFT>
 void LLVMStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
   ListScope L(W, "ProgramHeaders");
 
-  for (const Elf_Phdr &Phdr :
-       unwrapOrError(this->FileName, Obj->program_headers())) {
+  Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers();
+  if (!PhdrsOrErr) {
+    this->reportUniqueWarning(createError("unable to dump program headers: " +
+                                          toString(PhdrsOrErr.takeError())));
+    return;
+  }
+
+  for (const Elf_Phdr &Phdr : *PhdrsOrErr) {
     DictScope P(W, "ProgramHeader");
     W.printHex("Type",
                getElfSegmentType(Obj->getHeader()->e_machine, Phdr.p_type),
@@ -6819,8 +6852,15 @@ void LLVMStyle<ELFT>::printNotes(const ELFFile<ELFT> *Obj) {
         reportError(std::move(Err), this->FileName);
     }
   } else {
-    for (const auto &P :
-         unwrapOrError(this->FileName, Obj->program_headers())) {
+    Expected<ArrayRef<Elf_Phdr>> PhdrsOrErr = Obj->program_headers();
+    if (!PhdrsOrErr) {
+      this->reportUniqueWarning(createError(
+          "unable to read program headers to locate the PT_NOTE segment: " +
+          toString(PhdrsOrErr.takeError())));
+      return;
+    }
+
+    for (const Elf_Phdr &P : *PhdrsOrErr) {
       if (P.p_type != PT_NOTE)
         continue;
       DictScope D(W, "NoteSection");

From 84a1bc7f2c0c7bd5f18a4ecaf91e27644aa94190 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Fri, 10 Jul 2020 17:33:55 +0300
Subject: [PATCH 237/771] [test/Object][llvm-objdump] - llvm-objdump: don't
 abort() when the e_phoff field is invalid and refine testing.

llvm-objdump currently calls report_fatal_error() when the e_phoff field is invalid.

This is tested by elf-invalid-phdr.test which has the following issues:
1) It uses a precompiled object.
2) it could be a part of invalid.test.
3) It tests the Object lib, but we have no separate test for llvm-objdump.

This patch addresses issues mentioned.

Differential revision: https://reviews.llvm.org/D83559
---
 llvm/test/Object/Inputs/invalid-phdr.elf      | Bin 4162 -> 0 bytes
 llvm/test/Object/elf-invalid-phdr.test        |  26 ------------------
 llvm/test/Object/invalid.test                 |  14 ++++++++++
 .../tools/llvm-objdump/ELF/invalid-phdr.test  |  16 +++++++++++
 llvm/tools/llvm-objdump/ELFDump.cpp           |  22 +++++++++------
 5 files changed, 44 insertions(+), 34 deletions(-)
 delete mode 100644 llvm/test/Object/Inputs/invalid-phdr.elf
 delete mode 100644 llvm/test/Object/elf-invalid-phdr.test
 create mode 100644 llvm/test/tools/llvm-objdump/ELF/invalid-phdr.test

diff --git a/llvm/test/Object/Inputs/invalid-phdr.elf b/llvm/test/Object/Inputs/invalid-phdr.elf
deleted file mode 100644
index 8a5cc53cc94bdcb4f2101f07f6b44ee2e26fd969..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 4162
zcmeH@F%H5o3`O12LdUSdMlaBr33Y*qOMryL!a_5YiTm`D!47t*A|_aWr0D0{-%0eI
z`{N<cGP0G^jy{ekgb-W31z20!a;Bw{ikK~>=QQAkA)efOu+XyUnA7^NB7c-UPcP7a
zjPF_B{-=J;np<z8mF8Ajo3Y9}m3pf$*-I~78=aprluYUuT>Ca(>wKMDoX#h0SP%dK
n5C8!X009sH0T2KI5C8!X_>(~6FTJOxyLSHWPiH=vK5pV)DLWOp

diff --git a/llvm/test/Object/elf-invalid-phdr.test b/llvm/test/Object/elf-invalid-phdr.test
deleted file mode 100644
index 1b47f8d66cc41..0000000000000
--- a/llvm/test/Object/elf-invalid-phdr.test
+++ /dev/null
@@ -1,26 +0,0 @@
-# invalid-phdr.elf is generated by creating a simple elf file with yaml2obj:
-# !ELF
-# FileHeader:
-#   Class:           ELFCLASS64
-#   Data:            ELFDATA2LSB
-#   Type:            ET_EXEC
-#   Machine:         EM_X86_64
-# Sections:
-#   - Name:            .text
-#     Type:            SHT_PROGBITS
-#     Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
-#     AddressAlign:    0x0000000000001000
-#     Content:         "00000000"
-# ProgramHeaders:
-#   - Type: PT_LOAD
-#     Flags: [ PF_X, PF_R ]
-#     VAddr: 0xAAAA1000
-#     PAddr: 0xFFFF1000
-#     Sections:
-#       - Section: .text
-#
-# Then editing the e_phoff in with a hexeditor to set it to 0xffffff
-RUN: not --crash llvm-objdump --private-headers %p/Inputs/invalid-phdr.elf 2>&1 \
-RUN:         | FileCheck %s
-
-CHECK: LLVM ERROR: program headers are longer than binary of size 4162: e_phoff = 0xffffff, e_phnum = 1, e_phentsize = 56
diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test
index 4751431028725..499200bbc8a74 100644
--- a/llvm/test/Object/invalid.test
+++ b/llvm/test/Object/invalid.test
@@ -650,3 +650,17 @@ FileHeader:
 Sections:
   - Type: SHT_NULL
     Link: 0xff
+
+## Check the case when the e_phoff field is invalid.
+# RUN: yaml2obj --docnum=31 %s -o %t31
+# RUN: not llvm-objdump --private-headers %t31 2>&1 | FileCheck -DFILE=%t31 %s --check-prefix=INVALID-PHOFF
+
+# INVALID-PHOFF: error: '[[FILE]]': program headers are longer than binary of size 280: e_phoff = 0xffffff, e_phnum = 0, e_phentsize = 0
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+  EPhOff:  0xffffff
diff --git a/llvm/test/tools/llvm-objdump/ELF/invalid-phdr.test b/llvm/test/tools/llvm-objdump/ELF/invalid-phdr.test
new file mode 100644
index 0000000000000..94de7eacc3eca
--- /dev/null
+++ b/llvm/test/tools/llvm-objdump/ELF/invalid-phdr.test
@@ -0,0 +1,16 @@
+## Test how we handle the case when the e_phoff field is invalid.
+# RUN: yaml2obj %s -o %t
+# RUN: not llvm-objdump --private-headers %t 2>&1 | \
+# RUN:   FileCheck -DFILE=%t %s --check-prefix=INVALID-PHOFF
+
+# INVALID-PHOFF:      Program Header:
+# INVALID-PHOFF-NEXT: warning: '[[FILE]]': unable to read program headers: program headers are longer than binary of size 280: e_phoff = 0xffffff, e_phnum = 0, e_phentsize = 0
+# INVALID-PHOFF-NEXT: error: '[[FILE]]': program headers are longer than binary of size 280: e_phoff = 0xffffff, e_phnum = 0, e_phentsize = 0
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+  EPhOff:  0xffffff
diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp
index d9b1bad521831..602bc63882527 100644
--- a/llvm/tools/llvm-objdump/ELFDump.cpp
+++ b/llvm/tools/llvm-objdump/ELFDump.cpp
@@ -198,11 +198,17 @@ static void printDynamicSection(const ELFFile<ELFT> *Elf, StringRef Filename) {
   }
 }
 
-template <class ELFT> static void printProgramHeaders(const ELFFile<ELFT> *o) {
+template <class ELFT>
+static void printProgramHeaders(const ELFFile<ELFT> *Obj, StringRef FileName) {
   outs() << "Program Header:\n";
-  auto ProgramHeaderOrError = o->program_headers();
-  if (!ProgramHeaderOrError)
-    report_fatal_error(toString(ProgramHeaderOrError.takeError()));
+  auto ProgramHeaderOrError = Obj->program_headers();
+  if (!ProgramHeaderOrError) {
+    reportWarning("unable to read program headers: " +
+                      toString(ProgramHeaderOrError.takeError()),
+                  FileName);
+    return;
+  }
+
   for (const typename ELFT::Phdr &Phdr : *ProgramHeaderOrError) {
     switch (Phdr.p_type) {
     case ELF::PT_DYNAMIC:
@@ -346,13 +352,13 @@ static void printSymbolVersionInfo(const ELFFile<ELFT> *Elf,
 
 void objdump::printELFFileHeader(const object::ObjectFile *Obj) {
   if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Obj))
-    printProgramHeaders(ELFObj->getELFFile());
+    printProgramHeaders(ELFObj->getELFFile(), Obj->getFileName());
   else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Obj))
-    printProgramHeaders(ELFObj->getELFFile());
+    printProgramHeaders(ELFObj->getELFFile(), Obj->getFileName());
   else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Obj))
-    printProgramHeaders(ELFObj->getELFFile());
+    printProgramHeaders(ELFObj->getELFFile(), Obj->getFileName());
   else if (const auto *ELFObj = dyn_cast<ELF64BEObjectFile>(Obj))
-    printProgramHeaders(ELFObj->getELFFile());
+    printProgramHeaders(ELFObj->getELFFile(), Obj->getFileName());
 }
 
 void objdump::printELFDynamicSection(const object::ObjectFile *Obj) {

From 9cc669d22d8641e9d359d871ac7761a121c9caf0 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Mon, 13 Jul 2020 16:02:10 -0400
Subject: [PATCH 238/771] [InstCombine][InstSimplify] add tests for sign of
 maxnum; NFC

More coverage for D83601.
---
 llvm/test/Transforms/InstCombine/copysign.ll        | 13 +++++++++++++
 .../InstSimplify/floating-point-arithmetic.ll       | 10 ++++++++++
 .../InstSimplify/floating-point-compare.ll          |  9 +++++++++
 3 files changed, 32 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/copysign.ll b/llvm/test/Transforms/InstCombine/copysign.ll
index f7e387fb60d0d..2e0063e86356c 100644
--- a/llvm/test/Transforms/InstCombine/copysign.ll
+++ b/llvm/test/Transforms/InstCombine/copysign.ll
@@ -3,6 +3,7 @@
 
 declare float @llvm.fabs.f32(float)
 declare float @llvm.copysign.f32(float, float)
+declare float @llvm.maxnum.f32(float, float)
 declare <3 x double> @llvm.copysign.v3f64(<3 x double>, <3 x double>)
 
 define float @positive_sign_arg(float %x) {
@@ -63,6 +64,18 @@ define <3 x double> @known_positive_sign_arg_vec(<3 x double> %x, <3 x i32> %y)
   ret <3 x double> %r
 }
 
+; FIXME: maxnum(-0.0, 0.0) can return -0.0.
+
+define float @not_known_positive_sign_arg(float %x, float %y) {
+; CHECK-LABEL: @not_known_positive_sign_arg(
+; CHECK-NEXT:    [[TMP1:%.*]] = call ninf float @llvm.fabs.f32(float [[Y:%.*]])
+; CHECK-NEXT:    ret float [[TMP1]]
+;
+  %max = call float @llvm.maxnum.f32(float %x, float 0.0)
+  %r = call ninf float @llvm.copysign.f32(float %y, float %max)
+  ret float %r
+}
+
 ; The magnitude operand of the 1st copysign is irrelevant.
 ; copysign(x, copysign(y, z)) --> copysign(x, z)
 
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index 653b730fac36b..dca25a3791f53 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -1383,3 +1383,13 @@ define float @maxnum_with_negzero_op_commute(float %a) {
   %fabs = call float @llvm.fabs.f32(float %max)
   ret float %fabs
 }
+
+define float @maxnum_with_pos_one_op(float %a) {
+; CHECK-LABEL: @maxnum_with_pos_one_op(
+; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[A:%.*]], float 1.000000e+00)
+; CHECK-NEXT:    ret float [[MAX]]
+;
+  %max = call float @llvm.maxnum.f32(float %a, float 1.0)
+  %fabs = call float @llvm.fabs.f32(float %max)
+  ret float %fabs
+}
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
index 7de0664b041bc..6f3f738b6d3e3 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
@@ -212,6 +212,15 @@ define i1 @orderedLessZero_fdiv(float %x) {
   ret i1 %uge
 }
 
+define i1 @orderedLessZero_maxnum(float %x) {
+; CHECK-LABEL: @orderedLessZero_maxnum(
+; CHECK-NEXT:    ret i1 true
+;
+  %d = call float @llvm.maxnum.f32(float %x, float 0.0)
+  %uge = fcmp uge float %d, 0.0
+  ret i1 %uge
+}
+
 define i1 @orderedLessZeroExpExt(float) {
 ; CHECK-LABEL: @orderedLessZeroExpExt(
 ; CHECK-NEXT:    ret i1 true

From 34d35d4a42dc747345cea4a8b7066371a70cf7a8 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 14 Jul 2020 07:42:43 -0400
Subject: [PATCH 239/771] [ValueTracking] fix miscompile in maxnum case of
 cannotBeOrderedLessThanZeroImpl (PR46627)

A miscompile with -0.0 is shown in:
http://bugs.llvm.org/PR46627

This is because maxnum(-0.0, +0.0) does not specify a fixed result:
http://llvm.org/docs/LangRef.html#llvm-maxnum-intrinsic

So we need to tighten the constraints for when it is ok to say the
result of maxnum is positive (including +0.0).

Differential Revision: https://reviews.llvm.org/D83601
---
 llvm/lib/Analysis/ValueTracking.cpp           | 29 ++++++++++++++-----
 llvm/test/Transforms/InstCombine/copysign.ll  |  7 +++--
 .../InstSimplify/floating-point-arithmetic.ll | 16 +++++++---
 .../InstSimplify/floating-point-compare.ll    |  2 ++
 4 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 614c8bb2f1e64..ffa2037fa10b2 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3368,13 +3368,28 @@ static bool cannotBeOrderedLessThanZeroImpl(const Value *V,
     switch (IID) {
     default:
       break;
-    case Intrinsic::maxnum:
-      return (isKnownNeverNaN(I->getOperand(0), TLI) &&
-              cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI,
-                                              SignBitOnly, Depth + 1)) ||
-            (isKnownNeverNaN(I->getOperand(1), TLI) &&
-              cannotBeOrderedLessThanZeroImpl(I->getOperand(1), TLI,
-                                              SignBitOnly, Depth + 1));
+    case Intrinsic::maxnum: {
+      Value *V0 = I->getOperand(0), *V1 = I->getOperand(1);
+      auto isPositiveNum = [&](Value *V) {
+        if (SignBitOnly) {
+          // With SignBitOnly, this is tricky because the result of
+          // maxnum(+0.0, -0.0) is unspecified. Just check if the operand is
+          // a constant strictly greater than 0.0.
+          const APFloat *C;
+          return match(V, m_APFloat(C)) &&
+                 *C > APFloat::getZero(C->getSemantics());
+        }
+
+        // -0.0 compares equal to 0.0, so if this operand is at least -0.0,
+        // maxnum can't be ordered-less-than-zero.
+        return isKnownNeverNaN(V, TLI) &&
+               cannotBeOrderedLessThanZeroImpl(V, TLI, false, Depth + 1);
+      };
+
+      // TODO: This could be improved. We could also check that neither operand
+      //       has its sign bit set (and at least 1 is not-NAN?).
+      return isPositiveNum(V0) || isPositiveNum(V1);
+    }
 
     case Intrinsic::maximum:
       return cannotBeOrderedLessThanZeroImpl(I->getOperand(0), TLI, SignBitOnly,
diff --git a/llvm/test/Transforms/InstCombine/copysign.ll b/llvm/test/Transforms/InstCombine/copysign.ll
index 2e0063e86356c..2d87c7b271666 100644
--- a/llvm/test/Transforms/InstCombine/copysign.ll
+++ b/llvm/test/Transforms/InstCombine/copysign.ll
@@ -64,12 +64,13 @@ define <3 x double> @known_positive_sign_arg_vec(<3 x double> %x, <3 x i32> %y)
   ret <3 x double> %r
 }
 
-; FIXME: maxnum(-0.0, 0.0) can return -0.0.
+; maxnum(-0.0, 0.0) can return -0.0.
 
 define float @not_known_positive_sign_arg(float %x, float %y) {
 ; CHECK-LABEL: @not_known_positive_sign_arg(
-; CHECK-NEXT:    [[TMP1:%.*]] = call ninf float @llvm.fabs.f32(float [[Y:%.*]])
-; CHECK-NEXT:    ret float [[TMP1]]
+; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[X:%.*]], float 0.000000e+00)
+; CHECK-NEXT:    [[R:%.*]] = call ninf float @llvm.copysign.f32(float [[Y:%.*]], float [[MAX]])
+; CHECK-NEXT:    ret float [[R]]
 ;
   %max = call float @llvm.maxnum.f32(float %x, float 0.0)
   %r = call ninf float @llvm.copysign.f32(float %y, float %max)
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index dca25a3791f53..8b606dca2e21f 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -1334,10 +1334,13 @@ define float @fsub_fadd_common_op_wrong_commute_commute(float %x, float %y) {
   ret float %r
 }
 
+; PR46627 - https://bugs.llvm.org/show_bug.cgi?id=46627
+
 define float @maxnum_with_poszero_op(float %a) {
 ; CHECK-LABEL: @maxnum_with_poszero_op(
 ; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[A:%.*]], float 0.000000e+00)
-; CHECK-NEXT:    ret float [[MAX]]
+; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[MAX]])
+; CHECK-NEXT:    ret float [[FABS]]
 ;
   %max = call float @llvm.maxnum.f32(float %a, float 0.0)
   %fabs = call float @llvm.fabs.f32(float %max)
@@ -1348,7 +1351,8 @@ define float @maxnum_with_poszero_op_commute(float %a) {
 ; CHECK-LABEL: @maxnum_with_poszero_op_commute(
 ; CHECK-NEXT:    [[SQRT:%.*]] = call float @llvm.sqrt.f32(float [[A:%.*]])
 ; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float 0.000000e+00, float [[SQRT]])
-; CHECK-NEXT:    ret float [[MAX]]
+; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[MAX]])
+; CHECK-NEXT:    ret float [[FABS]]
 ;
   %sqrt = call float @llvm.sqrt.f32(float %a)
   %max = call float @llvm.maxnum.f32(float 0.0, float %sqrt)
@@ -1361,7 +1365,8 @@ define float @maxnum_with_negzero_op(float %a) {
 ; CHECK-NEXT:    [[NNAN:%.*]] = call nnan float @llvm.sqrt.f32(float [[A:%.*]])
 ; CHECK-NEXT:    [[FABSA:%.*]] = call float @llvm.fabs.f32(float [[NNAN]])
 ; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float -0.000000e+00, float [[FABSA]])
-; CHECK-NEXT:    ret float [[MAX]]
+; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[MAX]])
+; CHECK-NEXT:    ret float [[FABS]]
 ;
   %nnan = call nnan float @llvm.sqrt.f32(float %a)
   %fabsa = call float @llvm.fabs.f32(float %nnan)
@@ -1375,7 +1380,8 @@ define float @maxnum_with_negzero_op_commute(float %a) {
 ; CHECK-NEXT:    [[NNAN:%.*]] = call nnan float @llvm.sqrt.f32(float [[A:%.*]])
 ; CHECK-NEXT:    [[FABSA:%.*]] = call float @llvm.fabs.f32(float [[NNAN]])
 ; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[FABSA]], float -0.000000e+00)
-; CHECK-NEXT:    ret float [[MAX]]
+; CHECK-NEXT:    [[FABS:%.*]] = call float @llvm.fabs.f32(float [[MAX]])
+; CHECK-NEXT:    ret float [[FABS]]
 ;
   %nnan = call nnan float @llvm.sqrt.f32(float %a)
   %fabsa = call float @llvm.fabs.f32(float %nnan)
@@ -1384,6 +1390,8 @@ define float @maxnum_with_negzero_op_commute(float %a) {
   ret float %fabs
 }
 
+; If an operand is strictly greater than 0.0, we know the sign of the result of maxnum.
+
 define float @maxnum_with_pos_one_op(float %a) {
 ; CHECK-LABEL: @maxnum_with_pos_one_op(
 ; CHECK-NEXT:    [[MAX:%.*]] = call float @llvm.maxnum.f32(float [[A:%.*]], float 1.000000e+00)
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
index 6f3f738b6d3e3..0cee7b3ce20fa 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
@@ -212,6 +212,8 @@ define i1 @orderedLessZero_fdiv(float %x) {
   ret i1 %uge
 }
 
+; If x == -0.0, maxnum can return -0.0, but that still compares equal to 0.0.
+
 define i1 @orderedLessZero_maxnum(float %x) {
 ; CHECK-LABEL: @orderedLessZero_maxnum(
 ; CHECK-NEXT:    ret i1 true

From 90e34b563affb145c43ec73f66410a9d1c4dc57a Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Mon, 13 Jul 2020 15:09:55 +0300
Subject: [PATCH 240/771] [yaml2obj] - Refine handling of the NoHeaders key.

Imagine we have an YAML description for some object and we want to
produce 2 outputs: with and without the section header.
A natural way to do it would look like:

```
--- !ELF
FileHeader:
  Class:   ELFCLASS64
  Data:    ELFDATA2LSB
  Type:    ET_REL
  Machine: EM_X86_64
Sections:
...
SectionHeaderTable:
  NoHeaders: [[NOHEADERS]]

```
But currently, we do not distinguish between no `NoHeaders` key case
and `NoHeaders == false`. Because of this we can't simply specify
`NOHEADERS = false`, as tool starts to complain.

With this patch the behavior changed. When we have:

```
SectionHeaderTable:
  NoHeaders: false

```
it is the same as we have no `SectionHeaderTable` at all.
(`NoHeaders` key still can't be used with `Sections/Excluded` keys)

Differential revision: https://reviews.llvm.org/D83672
---
 llvm/include/llvm/ObjectYAML/ELFYAML.h        |  2 +-
 llvm/lib/ObjectYAML/ELFEmitter.cpp            | 12 ++++----
 llvm/lib/ObjectYAML/ELFYAML.cpp               |  2 +-
 .../yaml2obj/ELF/section-headers-exclude.yaml |  5 ++--
 .../tools/yaml2obj/ELF/section-headers.yaml   | 29 +++++++++++++------
 5 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index bfc31ea247ef7..651cd6a83398b 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -98,7 +98,7 @@ struct SectionHeader {
 struct SectionHeaderTable {
   Optional<std::vector<SectionHeader>> Sections;
   Optional<std::vector<SectionHeader>> Excluded;
-  bool NoHeaders;
+  Optional<bool> NoHeaders;
 };
 
 struct SectionName {
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index f4ad10a9eb083..8513874ffea83 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -420,7 +420,8 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
   Header.e_shentsize =
       Doc.Header.SHEntSize ? (uint16_t)*Doc.Header.SHEntSize : sizeof(Elf_Shdr);
 
-  const bool NoShdrs = Doc.SectionHeaders && Doc.SectionHeaders->NoHeaders;
+  const bool NoShdrs =
+      Doc.SectionHeaders && Doc.SectionHeaders->NoHeaders.getValueOr(false);
 
   if (Doc.Header.SHOff)
     Header.e_shoff = *Doc.Header.SHOff;
@@ -503,11 +504,12 @@ unsigned ELFState<ELFT>::toSectionIndex(StringRef S, StringRef LocSec,
     return 0;
   }
 
-  if (!Doc.SectionHeaders ||
-      (!Doc.SectionHeaders->NoHeaders && !Doc.SectionHeaders->Excluded))
+  if (!Doc.SectionHeaders || (Doc.SectionHeaders->NoHeaders &&
+                              !Doc.SectionHeaders->NoHeaders.getValue()))
     return Index;
 
-  assert(!Doc.SectionHeaders->NoHeaders || !Doc.SectionHeaders->Sections);
+  assert(!Doc.SectionHeaders->NoHeaders.getValueOr(false) ||
+         !Doc.SectionHeaders->Sections);
   size_t FirstExcluded =
       Doc.SectionHeaders->Sections ? Doc.SectionHeaders->Sections->size() : 0;
   if (Index >= FirstExcluded) {
@@ -1776,7 +1778,7 @@ template <class ELFT> void ELFState<ELFT>::buildSectionIndex() {
         if (!ExcludedSectionHeaders.insert(Hdr.Name).second)
           llvm_unreachable("buildSectionIndex() failed");
 
-    if (Doc.SectionHeaders->NoHeaders)
+    if (Doc.SectionHeaders->NoHeaders.getValueOr(false))
       for (const ELFYAML::Section *S : Sections)
         if (!ExcludedSectionHeaders.insert(S->Name).second)
           llvm_unreachable("buildSectionIndex() failed");
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index aa247c53a3c6a..dc65f77d565b2 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -842,7 +842,7 @@ void MappingTraits<ELFYAML::SectionHeaderTable>::mapping(
     IO &IO, ELFYAML::SectionHeaderTable &SectionHeader) {
   IO.mapOptional("Sections", SectionHeader.Sections);
   IO.mapOptional("Excluded", SectionHeader.Excluded);
-  IO.mapOptional("NoHeaders", SectionHeader.NoHeaders, false);
+  IO.mapOptional("NoHeaders", SectionHeader.NoHeaders);
 }
 
 StringRef MappingTraits<ELFYAML::SectionHeaderTable>::validate(
diff --git a/llvm/test/tools/yaml2obj/ELF/section-headers-exclude.yaml b/llvm/test/tools/yaml2obj/ELF/section-headers-exclude.yaml
index 140d6e5806aa6..83790ab48c785 100644
--- a/llvm/test/tools/yaml2obj/ELF/section-headers-exclude.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/section-headers-exclude.yaml
@@ -500,7 +500,8 @@ SectionHeaderTable:
     - Name: .shstrtab
 
 ## Check we do not allow using "Excluded" together with "NoHeaders".
-# RUN: not yaml2obj %s --docnum=19 -o /dev/null 2>&1 | FileCheck %s --check-prefix=NOHEADERS
+# RUN: not yaml2obj %s --docnum=19 -DNOHEADERS=true -o /dev/null 2>&1 | FileCheck %s --check-prefix=NOHEADERS
+# RUN: not yaml2obj %s --docnum=19 -DNOHEADERS=false -o /dev/null 2>&1 | FileCheck %s --check-prefix=NOHEADERS
 # NOHEADERS: NoHeaders can't be used together with Sections/Excluded
 
 --- !ELF
@@ -510,5 +511,5 @@ FileHeader:
   Type:    ET_REL
   Machine: EM_X86_64
 SectionHeaderTable:
-  NoHeaders: true
+  NoHeaders: [[NOHEADERS]]
   Excluded:  []
diff --git a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
index 827e93bc76d24..01845ba2a6cb5 100644
--- a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
@@ -99,13 +99,13 @@ SectionHeaderTable:
   Sections: []
 
 ## Test that we are able to use "NoHeaders" property to produce an empty section header table.
-# RUN: yaml2obj %s --docnum=3 -o %t3
-# RUN: llvm-readelf --file-headers %t3 | FileCheck %s --check-prefix=NO-HEADERS
+# RUN: yaml2obj %s --docnum=3 -DNOHEADERS=true -o %t3.1
+# RUN: llvm-readelf --file-headers %t3.1 | FileCheck %s --check-prefix=NO-HEADERS-TRUE
 
-# NO-HEADERS: Start of section headers:          0  (bytes into file)
-# NO-HEADERS: Size of section headers:           64 (bytes)
-# NO-HEADERS: Number of section headers:         0
-# NO-HEADERS: Section header string table index: 0
+# NO-HEADERS-TRUE: Start of section headers:          0  (bytes into file)
+# NO-HEADERS-TRUE: Size of section headers:           64 (bytes)
+# NO-HEADERS-TRUE: Number of section headers:         0
+# NO-HEADERS-TRUE: Section header string table index: 0
 
 --- !ELF
 FileHeader:
@@ -117,10 +117,21 @@ Sections:
   - Name: .foo
     Type: SHT_PROGBITS
 SectionHeaderTable:
-  NoHeaders: true
+  NoHeaders: [[NOHEADERS]]
+
+## Test that we are able to set NoHeaders to false. In this case the tool produces an output
+## as if there were no `SectionHeaderTable` key at all.
+# RUN: yaml2obj %s --docnum=3 -DNOHEADERS=false -o %t3.2
+# RUN: llvm-readelf --file-headers %t3.2 | FileCheck %s --check-prefix=NO-HEADERS-FALSE
+
+# NO-HEADERS-FALSE: Start of section headers:          96 (bytes into file)
+# NO-HEADERS-FALSE: Size of section headers:           64 (bytes)
+# NO-HEADERS-FALSE: Number of section headers:         1
+# NO-HEADERS-FALSE: Section header string table index: 3
 
 ## Check we do not allow using "Sections" together with "NoHeaders".
-# RUN: not yaml2obj %s --docnum=4 -o /dev/null 2>&1 | FileCheck %s --check-prefix=SECTIONS-NO-HEADERS
+# RUN: not yaml2obj %s --docnum=4 -DNOHEADERS=true -o /dev/null 2>&1 | FileCheck %s --check-prefix=SECTIONS-NO-HEADERS
+# RUN: not yaml2obj %s --docnum=4 -DNOHEADERS=false -o /dev/null 2>&1 | FileCheck %s --check-prefix=SECTIONS-NO-HEADERS
 
 # SECTIONS-NO-HEADERS: error: NoHeaders can't be used together with Sections/Excluded
 
@@ -135,7 +146,7 @@ Sections:
     Type: SHT_PROGBITS
 SectionHeaderTable:
   Sections:  []
-  NoHeaders: true
+  NoHeaders: [[NOHEADERS]]
 
 ## Check that we do not allow an empty SectionHeaderTable tag and suggest to use an explicit syntax instead.
 # RUN: not yaml2obj %s --docnum=5 -DVAL="" -o /dev/null 2>&1 | FileCheck %s --check-prefix=NO-VALUE

From 6f51ceea1f9858f18133b6101235d26133e11d10 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 14 Jul 2020 13:49:28 +0200
Subject: [PATCH 241/771] [lldb] Refactor character printing in
 DumpDataExtractor

Summary: Just unifying all that copy-pasted code.

Reviewers: JDevlieghere

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D83662
---
 lldb/source/Core/DumpDataExtractor.cpp | 154 +++++++++----------------
 1 file changed, 54 insertions(+), 100 deletions(-)

diff --git a/lldb/source/Core/DumpDataExtractor.cpp b/lldb/source/Core/DumpDataExtractor.cpp
index 33fc3a76d3d67..dbfedfae27a8c 100644
--- a/lldb/source/Core/DumpDataExtractor.cpp
+++ b/lldb/source/Core/DumpDataExtractor.cpp
@@ -175,6 +175,57 @@ static lldb::offset_t DumpInstructions(const DataExtractor &DE, Stream *s,
   return offset;
 }
 
+/// Prints the specific escape sequence of the given character to the stream.
+/// If the character doesn't have a known specific escape sequence (e.g., '\a',
+/// '\n' but not generic escape sequences such as'\x12'), this function will
+/// not modify the stream and return false.
+static bool TryDumpSpecialEscapedChar(Stream &s, const char c) {
+  switch (c) {
+  case '\033':
+    // Common non-standard escape code for 'escape'.
+    s.Printf("\\e");
+    return true;
+  case '\a':
+    s.Printf("\\a");
+    return true;
+  case '\b':
+    s.Printf("\\b");
+    return true;
+  case '\f':
+    s.Printf("\\f");
+    return true;
+  case '\n':
+    s.Printf("\\n");
+    return true;
+  case '\r':
+    s.Printf("\\r");
+    return true;
+  case '\t':
+    s.Printf("\\t");
+    return true;
+  case '\v':
+    s.Printf("\\v");
+    return true;
+  case '\0':
+    s.Printf("\\0");
+    return true;
+  default:
+    return false;
+  }
+}
+
+/// Dump the character to a stream. A character that is not printable will be
+/// represented by its escape sequence.
+static void DumpCharacter(Stream &s, const char c) {
+  if (TryDumpSpecialEscapedChar(s, c))
+    return;
+  if (llvm::isPrint(c)) {
+    s.PutChar(c);
+    return;
+  }
+  s.Printf("\\x%2.2x", c);
+}
+
 lldb::offset_t lldb_private::DumpDataExtractor(
     const DataExtractor &DE, Stream *s, offset_t start_offset,
     lldb::Format item_format, size_t item_byte_size, size_t item_count,
@@ -299,40 +350,11 @@ lldb::offset_t lldb_private::DumpDataExtractor(
       if (llvm::isPrint(ch))
         s->Printf("%c", (char)ch);
       else if (item_format != eFormatCharPrintable) {
-        switch (ch) {
-        case '\033':
-          s->Printf("\\e");
-          break;
-        case '\a':
-          s->Printf("\\a");
-          break;
-        case '\b':
-          s->Printf("\\b");
-          break;
-        case '\f':
-          s->Printf("\\f");
-          break;
-        case '\n':
-          s->Printf("\\n");
-          break;
-        case '\r':
-          s->Printf("\\r");
-          break;
-        case '\t':
-          s->Printf("\\t");
-          break;
-        case '\v':
-          s->Printf("\\v");
-          break;
-        case '\0':
-          s->Printf("\\0");
-          break;
-        default:
+        if (!TryDumpSpecialEscapedChar(*s, ch)) {
           if (item_byte_size == 1)
             s->Printf("\\x%2.2x", (uint8_t)ch);
           else
             s->Printf("%" PRIu64, ch);
-          break;
         }
       } else {
         s->PutChar(NON_PRINTABLE_CHAR);
@@ -387,42 +409,7 @@ lldb::offset_t lldb_private::DumpDataExtractor(
       s->PutChar('\'');
       for (uint32_t i = 0; i < item_byte_size; ++i) {
         uint8_t ch = (uint8_t)(uval64 >> ((item_byte_size - i - 1) * 8));
-        if (llvm::isPrint(ch))
-          s->Printf("%c", ch);
-        else {
-          switch (ch) {
-          case '\033':
-            s->Printf("\\e");
-            break;
-          case '\a':
-            s->Printf("\\a");
-            break;
-          case '\b':
-            s->Printf("\\b");
-            break;
-          case '\f':
-            s->Printf("\\f");
-            break;
-          case '\n':
-            s->Printf("\\n");
-            break;
-          case '\r':
-            s->Printf("\\r");
-            break;
-          case '\t':
-            s->Printf("\\t");
-            break;
-          case '\v':
-            s->Printf("\\v");
-            break;
-          case '\0':
-            s->Printf("\\0");
-            break;
-          default:
-            s->Printf("\\x%2.2x", ch);
-            break;
-          }
-        }
+        DumpCharacter(*s, ch);
       }
       s->PutChar('\'');
     } break;
@@ -437,40 +424,7 @@ lldb::offset_t lldb_private::DumpDataExtractor(
         s->PutChar('\"');
 
         while (const char c = *cstr) {
-          if (llvm::isPrint(c)) {
-            s->PutChar(c);
-          } else {
-            switch (c) {
-            case '\033':
-              s->Printf("\\e");
-              break;
-            case '\a':
-              s->Printf("\\a");
-              break;
-            case '\b':
-              s->Printf("\\b");
-              break;
-            case '\f':
-              s->Printf("\\f");
-              break;
-            case '\n':
-              s->Printf("\\n");
-              break;
-            case '\r':
-              s->Printf("\\r");
-              break;
-            case '\t':
-              s->Printf("\\t");
-              break;
-            case '\v':
-              s->Printf("\\v");
-              break;
-            default:
-              s->Printf("\\x%2.2x", c);
-              break;
-            }
-          }
-
+          DumpCharacter(*s, c);
           ++cstr;
         }
 

From 1cd1c1d62edc6693a3854b22d53c9bbc1b149798 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Tue, 14 Jul 2020 14:23:14 +0200
Subject: [PATCH 242/771] Revert "[SCEV][IndVarSimplify] insert point should
 not be block front."

This reverts commit f1efb8bb4ba0584a9b994f3404a2c62920ce6652.

Reverted because it doesn't correctly update the pass return status, see

http://lab.llvm.org:8011/builders/llvm-clang-x86_64-expensive-checks-debian/builds/9441/steps/test-check-all/logs/FAIL%3A%20LLVM%3A%3Awiden-i32-i8ptr.ll
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp |  8 ++-----
 .../Utils/ScalarEvolutionExpander.cpp         | 14 ++++++-----
 .../IndVarSimplify/widen-i32-i8ptr.ll         | 24 -------------------
 3 files changed, 10 insertions(+), 36 deletions(-)
 delete mode 100644 llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 0357d905fde52..f6a0b6ea46372 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1435,12 +1435,8 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
   // either find an existing phi or materialize a new one. Either way, we
   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
   // of the phi-SCC dominates the loop entry.
-  Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
-  WidePhi = dyn_cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
-  // If the wide phi is not a phi node, for example a cast node, like bitcast,
-  // inttoptr, ptrtoint, just skip for now.
-  if (!WidePhi)
-    return nullptr;
+  Instruction *InsertPt = &L->getHeader()->front();
+  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
 
   // Remembering the WideIV increment generated by SCEVExpander allows
   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index c54ae26b53234..71b48482f26aa 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1292,8 +1292,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   if (useSubtract)
     Step = SE.getNegativeSCEV(Step);
   // Expand the step somewhere that dominates the loop header.
-  Value *StepV = expandCodeFor(Step, IntTy,
-                               &*L->getHeader()->getFirstInsertionPt());
+  Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
 
   // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
   // we actually do emit an addition.  It does not apply if we emit a
@@ -1439,8 +1438,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
       {
         // Expand the step somewhere that dominates the loop header.
         SCEVInsertPointGuard Guard(Builder, this);
-        StepV = expandCodeFor(Step, IntTy,
-                              &*L->getHeader()->getFirstInsertionPt());
+        StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
       }
       Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
     }
@@ -1872,6 +1870,11 @@ Value *SCEVExpander::expand(const SCEV *S) {
     }
   }
 
+  // IndVarSimplify sometimes sets the insertion point at the block start, even
+  // when there are PHIs at that point.  We must correct for this.
+  if (isa<PHINode>(*InsertPt))
+    InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
+
   // Check to see if we already expanded this here.
   auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
   if (I != InsertedExpressions.end())
@@ -1942,8 +1945,7 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
   // Emit code for it.
   SCEVInsertPointGuard Guard(Builder, this);
   PHINode *V =
-      cast<PHINode>(expandCodeFor(H, nullptr,
-                                  &*L->getHeader()->getFirstInsertionPt()));
+      cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
 
   return V;
 }
diff --git a/llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll b/llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll
deleted file mode 100644
index 80191d4e5b771..0000000000000
--- a/llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -indvars -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-n32:64"
-
-define dso_local void @Widen_i32_i8ptr() local_unnamed_addr {
-; CHECK-LABEL: @Widen_i32_i8ptr(
-; CHECK: phi i8*
-; CHECK: phi i32
-entry:
-  %ptrids = alloca [15 x i8*], align 8
-  %arraydecay2032 = getelementptr inbounds [15 x i8*], [15 x i8*]* %ptrids, i64 0, i64 0
-  store i8** %arraydecay2032, i8*** inttoptr (i64 8 to i8***), align 8
-  br label %for.cond2106
-
-for.cond2106:                                     ; preds = %for.cond2106, %entry
-  %gid.0 = phi i8* [ null, %entry ], [ %incdec.ptr, %for.cond2106 ]
-  %i.0 = phi i32 [ 0, %entry ], [ %inc2117, %for.cond2106 ]
-  %incdec.ptr = getelementptr inbounds i8, i8* %gid.0, i64 1
-  %idxprom2114 = zext i32 %i.0 to i64
-  %arrayidx2115 = getelementptr inbounds [15 x i8*], [15 x i8*]* %ptrids, i64 0, i64 %idxprom2114
-  store i8* %gid.0, i8** %arrayidx2115, align 8
-  %inc2117 = add nuw nsw i32 %i.0, 1
-  br label %for.cond2106
-}

From 3d0b76022df60a8ceaa2198012d67eab3ec3a2d9 Mon Sep 17 00:00:00 2001
From: Sourabh Singh Tomar <SourabhSingh.Tomar@amd.com>
Date: Mon, 13 Jul 2020 12:31:52 +0530
Subject: [PATCH 243/771] [flang][OpenMP] upstream OpenMP lowering

Summary:
This patch implements lowering of OpenMP barrier construct from
pft to OpenMPDialect.

Patch is carved out of following merged PR's from fir-dev branch
of https://github.com/flang-compiler/f18-llvm-project/

PR's:
https://github.com/flang-compiler/f18-llvm-project/pull/248
https://github.com/flang-compiler/f18-llvm-project/pull/251

Unfortunately primary tool `bbc` for functional validation is not
yet upstreamed. So this patch includes a unittest for lowering
`!OMP barrier` construct.

Some part of the these PR's still remains downstream(functional test
and dialect registration to legalizer) for obvious reasons.
Will upstream them when the dependencies are upstreamed.

Reviewed By: schweitz, kiranchandramohan

Differential Revision: https://reviews.llvm.org/D83659
---
 flang/include/flang/Lower/OpenMP.h            |  4 +
 .../flang/Optimizer/Dialect/FIRDialect.h      |  1 +
 flang/lib/Lower/OpenMP.cpp                    | 86 ++++++++++++++++++-
 flang/unittests/CMakeLists.txt                |  1 +
 flang/unittests/Lower/CMakeLists.txt          | 13 +++
 flang/unittests/Lower/OpenMPLoweringTest.cpp  | 44 ++++++++++
 6 files changed, 146 insertions(+), 3 deletions(-)
 create mode 100644 flang/unittests/Lower/CMakeLists.txt
 create mode 100644 flang/unittests/Lower/OpenMPLoweringTest.cpp

diff --git a/flang/include/flang/Lower/OpenMP.h b/flang/include/flang/Lower/OpenMP.h
index 0b273a6aa7340..13dd43b60fded 100644
--- a/flang/include/flang/Lower/OpenMP.h
+++ b/flang/include/flang/Lower/OpenMP.h
@@ -5,6 +5,10 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
 
 #ifndef FORTRAN_LOWER_OPENMP_H
 #define FORTRAN_LOWER_OPENMP_H
diff --git a/flang/include/flang/Optimizer/Dialect/FIRDialect.h b/flang/include/flang/Optimizer/Dialect/FIRDialect.h
index 963dad8a09c4b..1c06fe5ab0609 100644
--- a/flang/include/flang/Optimizer/Dialect/FIRDialect.h
+++ b/flang/include/flang/Optimizer/Dialect/FIRDialect.h
@@ -38,6 +38,7 @@ inline void registerFIR() {
   [[maybe_unused]] static bool init_once = [] {
     mlir::registerDialect<mlir::AffineDialect>();
     mlir::registerDialect<mlir::LLVM::LLVMDialect>();
+    mlir::registerDialect<mlir::omp::OpenMPDialect>();
     mlir::registerDialect<mlir::scf::SCFDialect>();
     mlir::registerDialect<mlir::StandardOpsDialect>();
     mlir::registerDialect<mlir::vector::VectorDialect>();
diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index 5eb6a1866d293..c476f1b0d7d31 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -5,18 +5,98 @@
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
+//
+// Coding style: https://mlir.llvm.org/getting_started/DeveloperGuide/
+//
+//===----------------------------------------------------------------------===//
 
 #include "flang/Lower/OpenMP.h"
 #include "flang/Lower/Bridge.h"
+#include "flang/Lower/FIRBuilder.h"
 #include "flang/Lower/PFTBuilder.h"
 #include "flang/Parser/parse-tree.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
 
 #define TODO() llvm_unreachable("not yet implemented")
 
+static void genOMP(Fortran::lower::AbstractConverter &absConv,
+                   Fortran::lower::pft::Evaluation &eval,
+                   const Fortran::parser::OpenMPSimpleStandaloneConstruct
+                       &simpleStandaloneConstruct) {
+  const auto &directive =
+      std::get<Fortran::parser::OmpSimpleStandaloneDirective>(
+          simpleStandaloneConstruct.t);
+  switch (directive.v) {
+  default:
+    break;
+  case llvm::omp::Directive::OMPD_barrier:
+    absConv.getFirOpBuilder().create<mlir::omp::BarrierOp>(
+        absConv.getCurrentLocation());
+    break;
+  case llvm::omp::Directive::OMPD_taskwait:
+    TODO();
+  case llvm::omp::Directive::OMPD_taskyield:
+    TODO();
+  case llvm::omp::Directive::OMPD_target_enter_data:
+    TODO();
+  case llvm::omp::Directive::OMPD_target_exit_data:
+    TODO();
+  case llvm::omp::Directive::OMPD_target_update:
+    TODO();
+  case llvm::omp::Directive::OMPD_ordered:
+    TODO();
+  }
+}
+
+static void
+genOMP(Fortran::lower::AbstractConverter &absConv,
+       Fortran::lower::pft::Evaluation &eval,
+       const Fortran::parser::OpenMPStandaloneConstruct &standaloneConstruct) {
+  std::visit(
+      Fortran::common::visitors{
+          [&](const Fortran::parser::OpenMPSimpleStandaloneConstruct
+                  &simpleStandaloneConstruct) {
+            genOMP(absConv, eval, simpleStandaloneConstruct);
+          },
+          [&](const Fortran::parser::OpenMPFlushConstruct &flushConstruct) {
+            TODO();
+          },
+          [&](const Fortran::parser::OpenMPCancelConstruct &cancelConstruct) {
+            TODO();
+          },
+          [&](const Fortran::parser::OpenMPCancellationPointConstruct
+                  &cancellationPointConstruct) { TODO(); },
+      },
+      standaloneConstruct.u);
+}
+
 void Fortran::lower::genOpenMPConstruct(
-    Fortran::lower::AbstractConverter &, Fortran::lower::pft::Evaluation &,
-    const Fortran::parser::OpenMPConstruct &) {
-  TODO();
+    Fortran::lower::AbstractConverter &absConv,
+    Fortran::lower::pft::Evaluation &eval,
+    const Fortran::parser::OpenMPConstruct &ompConstruct) {
+
+  std::visit(
+      common::visitors{
+          [&](const Fortran::parser::OpenMPStandaloneConstruct
+                  &standaloneConstruct) {
+            genOMP(absConv, eval, standaloneConstruct);
+          },
+          [&](const Fortran::parser::OpenMPSectionsConstruct
+                  &sectionsConstruct) { TODO(); },
+          [&](const Fortran::parser::OpenMPLoopConstruct &loopConstruct) {
+            TODO();
+          },
+          [&](const Fortran::parser::OpenMPBlockConstruct &blockConstruct) {
+            TODO();
+          },
+          [&](const Fortran::parser::OpenMPAtomicConstruct &atomicConstruct) {
+            TODO();
+          },
+          [&](const Fortran::parser::OpenMPCriticalConstruct
+                  &criticalConstruct) { TODO(); },
+      },
+      ompConstruct.u);
 }
 
 void Fortran::lower::genOpenMPEndLoop(
diff --git a/flang/unittests/CMakeLists.txt b/flang/unittests/CMakeLists.txt
index 440ab4e0358de..d53d155f2f2b5 100644
--- a/flang/unittests/CMakeLists.txt
+++ b/flang/unittests/CMakeLists.txt
@@ -9,3 +9,4 @@ add_subdirectory(Optimizer)
 add_subdirectory(Decimal)
 add_subdirectory(Evaluate)
 add_subdirectory(Runtime)
+add_subdirectory(Lower)
diff --git a/flang/unittests/Lower/CMakeLists.txt b/flang/unittests/Lower/CMakeLists.txt
new file mode 100644
index 0000000000000..19535e8f45348
--- /dev/null
+++ b/flang/unittests/Lower/CMakeLists.txt
@@ -0,0 +1,13 @@
+get_property(dialect_libs GLOBAL PROPERTY MLIR_DIALECT_LIBS)
+
+set(LIBS
+  MLIRLLVMIR
+  ${dialect_libs}
+)
+
+add_flang_unittest(FlangLoweringOpenMPTests
+	OpenMPLoweringTest.cpp
+)
+target_link_libraries(FlangLoweringOpenMPTests
+  PRIVATE
+  ${LIBS})
diff --git a/flang/unittests/Lower/OpenMPLoweringTest.cpp b/flang/unittests/Lower/OpenMPLoweringTest.cpp
new file mode 100644
index 0000000000000..0942b9deab989
--- /dev/null
+++ b/flang/unittests/Lower/OpenMPLoweringTest.cpp
@@ -0,0 +1,44 @@
+//===- OpenMPLoweringTest.cpp -- OpenMPLowering unit tests ----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
+#include "mlir/IR/Builders.h"
+#include "flang/Parser/parse-tree.h"
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+
+class OpenMPLoweringTest : public testing::Test {
+protected:
+  void SetUp() override {
+    mlir::registerDialect<mlir::omp::OpenMPDialect>();
+    mlir::registerAllDialects(&ctx);
+    mlirOpBuilder.reset(new mlir::OpBuilder(&ctx));
+  }
+
+  void TearDown() override { mlirOpBuilder.reset(); }
+
+  mlir::MLIRContext ctx;
+  std::unique_ptr<mlir::OpBuilder> mlirOpBuilder;
+};
+
+TEST_F(OpenMPLoweringTest, Barrier) {
+  // Construct a dummy parse tree node for `!OMP barrier`.
+  struct Fortran::parser::OmpSimpleStandaloneDirective barrierDirective(
+      llvm::omp::Directive::OMPD_barrier);
+
+  // Check and lower the `!OMP barrier` node to `BarrierOp` operation of
+  // OpenMPDialect.
+  EXPECT_EQ(barrierDirective.v, llvm::omp::Directive::OMPD_barrier);
+  auto barrierOp = mlirOpBuilder->create<mlir::omp::BarrierOp>(
+      mlirOpBuilder->getUnknownLoc());
+
+  EXPECT_EQ(barrierOp.getOperationName(), "omp.barrier");
+  EXPECT_EQ(succeeded(barrierOp.verify()), true);
+}
+
+// main() from gtest_main

From c1d021e2cc9f51503fa4510cb90103b8152f5ccb Mon Sep 17 00:00:00 2001
From: Roger Ferrer Ibanez <roger.ferrer@bsc.es>
Date: Tue, 14 Jul 2020 07:18:07 +0000
Subject: [PATCH 244/771] [NFC][RISCV] Test for D81805

New test to show the changes after D81805 is committed.

Differential Revision: https://reviews.llvm.org/D83750
---
 llvm/test/CodeGen/RISCV/stack-store-check.ll | 325 +++++++++++++++++++
 1 file changed, 325 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/stack-store-check.ll

diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll
new file mode 100644
index 0000000000000..4183dbee2d4ae
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll
@@ -0,0 +1,325 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple riscv32 -o - %s | FileCheck %s
+; This test has been minimized from GCC Torture Suite's regstack-1.c
+; and checks that RISCVInstrInfo::storeRegToStackSlot works at the basic
+; level.
+
+@U = external local_unnamed_addr global fp128, align 16
+@Y1 = external local_unnamed_addr global fp128, align 16
+@X = external local_unnamed_addr global fp128, align 16
+@Y = external local_unnamed_addr global fp128, align 16
+@T = external local_unnamed_addr global fp128, align 16
+@S = external local_unnamed_addr global fp128, align 16
+
+define void @main() local_unnamed_addr nounwind {
+; CHECK-LABEL: main:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    addi sp, sp, -688
+; CHECK-NEXT:    sw ra, 684(sp)
+; CHECK-NEXT:    sw s0, 680(sp)
+; CHECK-NEXT:    sw s1, 676(sp)
+; CHECK-NEXT:    sw s2, 672(sp)
+; CHECK-NEXT:    sw s3, 668(sp)
+; CHECK-NEXT:    sw s4, 664(sp)
+; CHECK-NEXT:    sw s5, 660(sp)
+; CHECK-NEXT:    sw s6, 656(sp)
+; CHECK-NEXT:    sw s7, 652(sp)
+; CHECK-NEXT:    sw s8, 648(sp)
+; CHECK-NEXT:    sw s9, 644(sp)
+; CHECK-NEXT:    sw s10, 640(sp)
+; CHECK-NEXT:    sw s11, 636(sp)
+; CHECK-NEXT:    lui a0, %hi(U)
+; CHECK-NEXT:    lw s6, %lo(U)(a0)
+; CHECK-NEXT:    lw s7, %lo(U+4)(a0)
+; CHECK-NEXT:    lw s8, %lo(U+8)(a0)
+; CHECK-NEXT:    lw s0, %lo(U+12)(a0)
+; CHECK-NEXT:    sw zero, 612(sp)
+; CHECK-NEXT:    sw zero, 608(sp)
+; CHECK-NEXT:    sw zero, 604(sp)
+; CHECK-NEXT:    sw zero, 600(sp)
+; CHECK-NEXT:    sw s0, 596(sp)
+; CHECK-NEXT:    sw s8, 592(sp)
+; CHECK-NEXT:    sw s7, 588(sp)
+; CHECK-NEXT:    addi a0, sp, 616
+; CHECK-NEXT:    addi a1, sp, 600
+; CHECK-NEXT:    addi a2, sp, 584
+; CHECK-NEXT:    sw s6, 584(sp)
+; CHECK-NEXT:    call __subtf3
+; CHECK-NEXT:    lw s3, 616(sp)
+; CHECK-NEXT:    lw s4, 620(sp)
+; CHECK-NEXT:    lw s9, 624(sp)
+; CHECK-NEXT:    lw s11, 628(sp)
+; CHECK-NEXT:    sw s0, 548(sp)
+; CHECK-NEXT:    sw s8, 544(sp)
+; CHECK-NEXT:    sw s7, 540(sp)
+; CHECK-NEXT:    sw s6, 536(sp)
+; CHECK-NEXT:    sw s11, 564(sp)
+; CHECK-NEXT:    sw s9, 560(sp)
+; CHECK-NEXT:    sw s4, 556(sp)
+; CHECK-NEXT:    addi a0, sp, 568
+; CHECK-NEXT:    addi a1, sp, 552
+; CHECK-NEXT:    addi a2, sp, 536
+; CHECK-NEXT:    sw s3, 552(sp)
+; CHECK-NEXT:    call __subtf3
+; CHECK-NEXT:    lw a0, 568(sp)
+; CHECK-NEXT:    sw a0, 40(sp)
+; CHECK-NEXT:    lw a0, 572(sp)
+; CHECK-NEXT:    sw a0, 32(sp)
+; CHECK-NEXT:    lw a0, 576(sp)
+; CHECK-NEXT:    sw a0, 24(sp)
+; CHECK-NEXT:    lw a0, 580(sp)
+; CHECK-NEXT:    sw a0, 16(sp)
+; CHECK-NEXT:    sw zero, 500(sp)
+; CHECK-NEXT:    sw zero, 496(sp)
+; CHECK-NEXT:    sw zero, 492(sp)
+; CHECK-NEXT:    sw zero, 488(sp)
+; CHECK-NEXT:    sw s0, 516(sp)
+; CHECK-NEXT:    sw s8, 512(sp)
+; CHECK-NEXT:    sw s7, 508(sp)
+; CHECK-NEXT:    addi a0, sp, 520
+; CHECK-NEXT:    addi a1, sp, 504
+; CHECK-NEXT:    addi a2, sp, 488
+; CHECK-NEXT:    sw s6, 504(sp)
+; CHECK-NEXT:    call __addtf3
+; CHECK-NEXT:    lw s2, 520(sp)
+; CHECK-NEXT:    lw s10, 524(sp)
+; CHECK-NEXT:    lw s5, 528(sp)
+; CHECK-NEXT:    lw s1, 532(sp)
+; CHECK-NEXT:    sw s1, 8(sp)
+; CHECK-NEXT:    lui a0, %hi(Y1)
+; CHECK-NEXT:    lw a1, %lo(Y1)(a0)
+; CHECK-NEXT:    sw a1, 48(sp)
+; CHECK-NEXT:    lw a2, %lo(Y1+4)(a0)
+; CHECK-NEXT:    sw a2, 52(sp)
+; CHECK-NEXT:    lw a3, %lo(Y1+8)(a0)
+; CHECK-NEXT:    sw a3, 4(sp)
+; CHECK-NEXT:    lw a0, %lo(Y1+12)(a0)
+; CHECK-NEXT:    sw a0, 0(sp)
+; CHECK-NEXT:    sw a0, 308(sp)
+; CHECK-NEXT:    sw a3, 304(sp)
+; CHECK-NEXT:    sw a2, 300(sp)
+; CHECK-NEXT:    lw a0, 52(sp)
+; CHECK-NEXT:    sw a1, 296(sp)
+; CHECK-NEXT:    sw s11, 324(sp)
+; CHECK-NEXT:    sw s9, 320(sp)
+; CHECK-NEXT:    sw s4, 316(sp)
+; CHECK-NEXT:    addi a0, sp, 328
+; CHECK-NEXT:    addi a1, sp, 312
+; CHECK-NEXT:    addi a2, sp, 296
+; CHECK-NEXT:    sw s3, 312(sp)
+; CHECK-NEXT:    call __multf3
+; CHECK-NEXT:    lw a0, 328(sp)
+; CHECK-NEXT:    sw a0, 44(sp)
+; CHECK-NEXT:    lw a0, 332(sp)
+; CHECK-NEXT:    sw a0, 36(sp)
+; CHECK-NEXT:    lw a0, 336(sp)
+; CHECK-NEXT:    sw a0, 28(sp)
+; CHECK-NEXT:    lw a0, 340(sp)
+; CHECK-NEXT:    sw a0, 20(sp)
+; CHECK-NEXT:    sw s0, 468(sp)
+; CHECK-NEXT:    sw s8, 464(sp)
+; CHECK-NEXT:    sw s7, 460(sp)
+; CHECK-NEXT:    sw s6, 456(sp)
+; CHECK-NEXT:    sw s1, 452(sp)
+; CHECK-NEXT:    sw s5, 448(sp)
+; CHECK-NEXT:    sw s10, 444(sp)
+; CHECK-NEXT:    addi a0, sp, 472
+; CHECK-NEXT:    addi a1, sp, 456
+; CHECK-NEXT:    addi a2, sp, 440
+; CHECK-NEXT:    sw s2, 440(sp)
+; CHECK-NEXT:    call __addtf3
+; CHECK-NEXT:    lw a3, 472(sp)
+; CHECK-NEXT:    lw a0, 476(sp)
+; CHECK-NEXT:    lw a1, 480(sp)
+; CHECK-NEXT:    lw a2, 484(sp)
+; CHECK-NEXT:    sw zero, 420(sp)
+; CHECK-NEXT:    sw zero, 416(sp)
+; CHECK-NEXT:    sw zero, 412(sp)
+; CHECK-NEXT:    sw zero, 408(sp)
+; CHECK-NEXT:    sw a2, 404(sp)
+; CHECK-NEXT:    sw a1, 400(sp)
+; CHECK-NEXT:    sw a0, 396(sp)
+; CHECK-NEXT:    addi a0, sp, 424
+; CHECK-NEXT:    addi a1, sp, 408
+; CHECK-NEXT:    addi a2, sp, 392
+; CHECK-NEXT:    sw a3, 392(sp)
+; CHECK-NEXT:    call __subtf3
+; CHECK-NEXT:    lw a0, 424(sp)
+; CHECK-NEXT:    lw a1, 436(sp)
+; CHECK-NEXT:    lw a2, 432(sp)
+; CHECK-NEXT:    lw a3, 428(sp)
+; CHECK-NEXT:    lui a4, %hi(X)
+; CHECK-NEXT:    sw a1, %lo(X+12)(a4)
+; CHECK-NEXT:    sw a2, %lo(X+8)(a4)
+; CHECK-NEXT:    sw a3, %lo(X+4)(a4)
+; CHECK-NEXT:    sw a0, %lo(X)(a4)
+; CHECK-NEXT:    lw s8, 0(sp)
+; CHECK-NEXT:    sw s8, 212(sp)
+; CHECK-NEXT:    lw s7, 4(sp)
+; CHECK-NEXT:    sw s7, 208(sp)
+; CHECK-NEXT:    lw a0, 52(sp)
+; CHECK-NEXT:    sw a0, 204(sp)
+; CHECK-NEXT:    lw a0, 48(sp)
+; CHECK-NEXT:    sw a0, 200(sp)
+; CHECK-NEXT:    lw s6, 16(sp)
+; CHECK-NEXT:    sw s6, 228(sp)
+; CHECK-NEXT:    lw s4, 24(sp)
+; CHECK-NEXT:    sw s4, 224(sp)
+; CHECK-NEXT:    lw s0, 32(sp)
+; CHECK-NEXT:    sw s0, 220(sp)
+; CHECK-NEXT:    addi a0, sp, 232
+; CHECK-NEXT:    addi a1, sp, 216
+; CHECK-NEXT:    addi a2, sp, 200
+; CHECK-NEXT:    lw s1, 40(sp)
+; CHECK-NEXT:    sw s1, 216(sp)
+; CHECK-NEXT:    call __multf3
+; CHECK-NEXT:    lw a0, 232(sp)
+; CHECK-NEXT:    sw a0, 12(sp)
+; CHECK-NEXT:    lw s3, 236(sp)
+; CHECK-NEXT:    lw s9, 240(sp)
+; CHECK-NEXT:    lw s11, 244(sp)
+; CHECK-NEXT:    sw zero, 356(sp)
+; CHECK-NEXT:    sw zero, 352(sp)
+; CHECK-NEXT:    sw zero, 348(sp)
+; CHECK-NEXT:    sw zero, 344(sp)
+; CHECK-NEXT:    lw a0, 8(sp)
+; CHECK-NEXT:    sw a0, 372(sp)
+; CHECK-NEXT:    sw s5, 368(sp)
+; CHECK-NEXT:    sw s10, 364(sp)
+; CHECK-NEXT:    addi a0, sp, 376
+; CHECK-NEXT:    addi a1, sp, 360
+; CHECK-NEXT:    addi a2, sp, 344
+; CHECK-NEXT:    sw s2, 360(sp)
+; CHECK-NEXT:    call __multf3
+; CHECK-NEXT:    lw a0, 376(sp)
+; CHECK-NEXT:    lw a1, 388(sp)
+; CHECK-NEXT:    lw a2, 384(sp)
+; CHECK-NEXT:    lw a3, 380(sp)
+; CHECK-NEXT:    lui a4, %hi(S)
+; CHECK-NEXT:    sw a1, %lo(S+12)(a4)
+; CHECK-NEXT:    sw a2, %lo(S+8)(a4)
+; CHECK-NEXT:    sw a3, %lo(S+4)(a4)
+; CHECK-NEXT:    sw a0, %lo(S)(a4)
+; CHECK-NEXT:    sw s6, 260(sp)
+; CHECK-NEXT:    sw s4, 256(sp)
+; CHECK-NEXT:    sw s0, 252(sp)
+; CHECK-NEXT:    sw s1, 248(sp)
+; CHECK-NEXT:    lw a0, 20(sp)
+; CHECK-NEXT:    sw a0, 276(sp)
+; CHECK-NEXT:    lw a0, 28(sp)
+; CHECK-NEXT:    sw a0, 272(sp)
+; CHECK-NEXT:    lw a0, 36(sp)
+; CHECK-NEXT:    sw a0, 268(sp)
+; CHECK-NEXT:    addi a0, sp, 280
+; CHECK-NEXT:    addi a1, sp, 264
+; CHECK-NEXT:    addi a2, sp, 248
+; CHECK-NEXT:    lw a3, 44(sp)
+; CHECK-NEXT:    sw a3, 264(sp)
+; CHECK-NEXT:    call __subtf3
+; CHECK-NEXT:    lw a0, 280(sp)
+; CHECK-NEXT:    lw a1, 292(sp)
+; CHECK-NEXT:    lw a2, 288(sp)
+; CHECK-NEXT:    lw a3, 284(sp)
+; CHECK-NEXT:    lui a4, %hi(T)
+; CHECK-NEXT:    sw a1, %lo(T+12)(a4)
+; CHECK-NEXT:    sw a2, %lo(T+8)(a4)
+; CHECK-NEXT:    sw a3, %lo(T+4)(a4)
+; CHECK-NEXT:    sw a0, %lo(T)(a4)
+; CHECK-NEXT:    sw zero, 164(sp)
+; CHECK-NEXT:    sw zero, 160(sp)
+; CHECK-NEXT:    sw zero, 156(sp)
+; CHECK-NEXT:    sw zero, 152(sp)
+; CHECK-NEXT:    sw s11, 180(sp)
+; CHECK-NEXT:    sw s9, 176(sp)
+; CHECK-NEXT:    sw s3, 172(sp)
+; CHECK-NEXT:    addi a0, sp, 184
+; CHECK-NEXT:    addi a1, sp, 168
+; CHECK-NEXT:    addi a2, sp, 152
+; CHECK-NEXT:    lw a3, 12(sp)
+; CHECK-NEXT:    sw a3, 168(sp)
+; CHECK-NEXT:    call __addtf3
+; CHECK-NEXT:    lw a0, 184(sp)
+; CHECK-NEXT:    lw a1, 196(sp)
+; CHECK-NEXT:    lw a2, 192(sp)
+; CHECK-NEXT:    lw a3, 188(sp)
+; CHECK-NEXT:    lui a4, %hi(Y)
+; CHECK-NEXT:    sw a1, %lo(Y+12)(a4)
+; CHECK-NEXT:    sw a2, %lo(Y+8)(a4)
+; CHECK-NEXT:    sw a3, %lo(Y+4)(a4)
+; CHECK-NEXT:    sw a0, %lo(Y)(a4)
+; CHECK-NEXT:    sw zero, 116(sp)
+; CHECK-NEXT:    sw zero, 112(sp)
+; CHECK-NEXT:    sw zero, 108(sp)
+; CHECK-NEXT:    sw zero, 104(sp)
+; CHECK-NEXT:    sw s8, 132(sp)
+; CHECK-NEXT:    sw s7, 128(sp)
+; CHECK-NEXT:    lw a0, 52(sp)
+; CHECK-NEXT:    sw a0, 124(sp)
+; CHECK-NEXT:    addi a0, sp, 136
+; CHECK-NEXT:    addi a1, sp, 120
+; CHECK-NEXT:    addi a2, sp, 104
+; CHECK-NEXT:    lw a3, 48(sp)
+; CHECK-NEXT:    sw a3, 120(sp)
+; CHECK-NEXT:    call __multf3
+; CHECK-NEXT:    lw a3, 136(sp)
+; CHECK-NEXT:    lw a0, 140(sp)
+; CHECK-NEXT:    lw a1, 144(sp)
+; CHECK-NEXT:    lw a2, 148(sp)
+; CHECK-NEXT:    lui a4, 786400
+; CHECK-NEXT:    sw a4, 68(sp)
+; CHECK-NEXT:    sw zero, 64(sp)
+; CHECK-NEXT:    sw zero, 60(sp)
+; CHECK-NEXT:    sw zero, 56(sp)
+; CHECK-NEXT:    sw a2, 84(sp)
+; CHECK-NEXT:    sw a1, 80(sp)
+; CHECK-NEXT:    sw a0, 76(sp)
+; CHECK-NEXT:    addi a0, sp, 88
+; CHECK-NEXT:    addi a1, sp, 72
+; CHECK-NEXT:    addi a2, sp, 56
+; CHECK-NEXT:    sw a3, 72(sp)
+; CHECK-NEXT:    call __addtf3
+; CHECK-NEXT:    lw a0, 96(sp)
+; CHECK-NEXT:    lw a1, 100(sp)
+; CHECK-NEXT:    lw a2, 88(sp)
+; CHECK-NEXT:    lw a3, 92(sp)
+; CHECK-NEXT:    lui a4, %hi(Y1)
+; CHECK-NEXT:    sw a0, %lo(Y1+8)(a4)
+; CHECK-NEXT:    sw a1, %lo(Y1+12)(a4)
+; CHECK-NEXT:    sw a2, %lo(Y1)(a4)
+; CHECK-NEXT:    sw a3, %lo(Y1+4)(a4)
+; CHECK-NEXT:    lw s11, 636(sp)
+; CHECK-NEXT:    lw s10, 640(sp)
+; CHECK-NEXT:    lw s9, 644(sp)
+; CHECK-NEXT:    lw s8, 648(sp)
+; CHECK-NEXT:    lw s7, 652(sp)
+; CHECK-NEXT:    lw s6, 656(sp)
+; CHECK-NEXT:    lw s5, 660(sp)
+; CHECK-NEXT:    lw s4, 664(sp)
+; CHECK-NEXT:    lw s3, 668(sp)
+; CHECK-NEXT:    lw s2, 672(sp)
+; CHECK-NEXT:    lw s1, 676(sp)
+; CHECK-NEXT:    lw s0, 680(sp)
+; CHECK-NEXT:    lw ra, 684(sp)
+; CHECK-NEXT:    addi sp, sp, 688
+; CHECK-NEXT:    ret
+  %1 = load fp128, fp128* @U, align 16
+  %2 = fsub fp128 0xL00000000000000000000000000000000, %1
+  %3 = fsub fp128 %2, %1
+  %4 = fadd fp128 %1, 0xL00000000000000000000000000000000
+  %5 = load fp128, fp128* @Y1, align 16
+  %6 = fmul fp128 %2, %5
+  %7 = fadd fp128 %1, %4
+  %8 = fsub fp128 0xL00000000000000000000000000000000, %7
+  store fp128 %8, fp128* @X, align 16
+  %9 = fmul fp128 %3, %5
+  %10 = fmul fp128 0xL00000000000000000000000000000000, %4
+  store fp128 %10, fp128* @S, align 16
+  %11 = fsub fp128 %6, %3
+  store fp128 %11, fp128* @T, align 16
+  %12 = fadd fp128 0xL00000000000000000000000000000000, %9
+  store fp128 %12, fp128* @Y, align 16
+  %13 = fmul fp128 0xL00000000000000000000000000000000, %5
+  %14 = fadd fp128 %13, 0xL0000000000000000BFFE000000000000
+  store fp128 %14, fp128* @Y1, align 16
+  ret void
+}

From 0cbdd2a82ad84dc9c70341a8900506cc5676edfe Mon Sep 17 00:00:00 2001
From: Roger Ferrer Ibanez <roger.ferrer@bsc.es>
Date: Tue, 14 Jul 2020 07:26:01 +0000
Subject: [PATCH 245/771] [RISCV] Fix isStoreToStackSlot

Because of the layout of stores (that don't have a destination operand)
this check is exactly the same as the one in
RISCVInstrInfo::isLoadFromStackSlot.

Differential Revision: https://reviews.llvm.org/D81805
---
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp     | 8 ++++----
 llvm/test/CodeGen/RISCV/stack-store-check.ll | 1 -
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index f64e4397dcd3c..dc212d9cde2ec 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -76,10 +76,10 @@ unsigned RISCVInstrInfo::isStoreToStackSlot(const MachineInstr &MI,
     break;
   }
 
-  if (MI.getOperand(0).isFI() && MI.getOperand(1).isImm() &&
-      MI.getOperand(1).getImm() == 0) {
-    FrameIndex = MI.getOperand(0).getIndex();
-    return MI.getOperand(2).getReg();
+  if (MI.getOperand(1).isFI() && MI.getOperand(2).isImm() &&
+      MI.getOperand(2).getImm() == 0) {
+    FrameIndex = MI.getOperand(1).getIndex();
+    return MI.getOperand(0).getReg();
   }
 
   return 0;
diff --git a/llvm/test/CodeGen/RISCV/stack-store-check.ll b/llvm/test/CodeGen/RISCV/stack-store-check.ll
index 4183dbee2d4ae..c8f733bd6ce93 100644
--- a/llvm/test/CodeGen/RISCV/stack-store-check.ll
+++ b/llvm/test/CodeGen/RISCV/stack-store-check.ll
@@ -98,7 +98,6 @@ define void @main() local_unnamed_addr nounwind {
 ; CHECK-NEXT:    sw a0, 308(sp)
 ; CHECK-NEXT:    sw a3, 304(sp)
 ; CHECK-NEXT:    sw a2, 300(sp)
-; CHECK-NEXT:    lw a0, 52(sp)
 ; CHECK-NEXT:    sw a1, 296(sp)
 ; CHECK-NEXT:    sw s11, 324(sp)
 ; CHECK-NEXT:    sw s9, 320(sp)

From d083adb068e781a2fc35aea8c6b7cccd566a735f Mon Sep 17 00:00:00 2001
From: "Mott, Jeffrey T" <jeffrey.t.mott@intel.com>
Date: Tue, 14 Jul 2020 06:09:06 -0700
Subject: [PATCH 246/771] Prohibit use of _ExtInt in atomic intrinsic

The _ExtInt type allows custom width integers, but the atomic memory
access's operand must have a power-of-two size. _ExtInts with
non-power-of-two size should not be allowed for atomic intrinsic.

Before this change:

$ cat test.c

typedef unsigned _ExtInt(42) dtype;
void verify_binary_op_nand(dtype* pval1, dtype val2)
{    __sync_nand_and_fetch(pval1, val2); }

$ clang test.c

clang-11:
/home/ubuntu/llvm_workspace/llvm/clang/lib/CodeGen/CGBuiltin.cpp:117:
llvm::Value*
EmitToInt(clang::CodeGen::CodeGenFunction&, llvm::Value*,
clang::QualType, llvm::IntegerType*): Assertion `V->getType() ==
IntType' failed.
PLEASE submit a bug report to https://bugs.llvm.org/ and include the
crash backtrace, preprocessed source, and associated run script.

After this change:

$ clang test.c

test.c:3:30: error: Atomic memory operand must have a power-of-two size
{    __sync_nand_and_fetch(pval1, val2); }
^

List of the atomic intrinsics that have this
problem:

__sync_fetch_and_add
__sync_fetch_and_sub
__sync_fetch_and_or
__sync_fetch_and_and
__sync_fetch_and_xor
__sync_fetch_and_nand
__sync_nand_and_fetch
__sync_and_and_fetch
__sync_add_and_fetch
__sync_sub_and_fetch
__sync_or_and_fetch
__sync_xor_and_fetch
__sync_fetch_and_min
__sync_fetch_and_max
__sync_fetch_and_umin
__sync_fetch_and_umax
__sync_val_compare_and_swap
__sync_bool_compare_and_swap

Differential Revision: https://reviews.llvm.org/D83340
---
 .../clang/Basic/DiagnosticSemaKinds.td        |  2 ++
 clang/lib/Sema/SemaChecking.cpp               |  9 +++++
 clang/test/Sema/builtins.c                    | 36 +++++++++++++++++++
 3 files changed, 47 insertions(+)

diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index 71517edd6659b..aa4de2812312d 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -7939,6 +7939,8 @@ def err_atomic_builtin_pointer_size : Error<
 def err_atomic_exclusive_builtin_pointer_size : Error<
   "address argument to load or store exclusive builtin must be a pointer to"
   " 1,2,4 or 8 byte type (%0 invalid)">;
+def err_atomic_builtin_ext_int_size : Error<
+  "Atomic memory operand must have a power-of-two size">;
 def err_atomic_op_needs_atomic : Error<
   "address argument to atomic operation must be a pointer to _Atomic "
   "type (%0 invalid)">;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index efaf36a693061..509d88e25000e 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -5349,6 +5349,15 @@ Sema::SemaBuiltinAtomicOverloaded(ExprResult TheCallResult) {
   // gracefully.
   TheCall->setType(ResultType);
 
+  // Prohibit use of _ExtInt with atomic builtins.
+  // The arguments would have already been converted to the first argument's
+  // type, so only need to check the first argument.
+  const auto *ExtIntValType = ValType->getAs<ExtIntType>();
+  if (ExtIntValType && !llvm::isPowerOf2_64(ExtIntValType->getNumBits())) {
+    Diag(FirstArg->getExprLoc(), diag::err_atomic_builtin_ext_int_size);
+    return ExprError();
+  }
+
   return TheCallResult;
 }
 
diff --git a/clang/test/Sema/builtins.c b/clang/test/Sema/builtins.c
index 1d41bcf9f0865..90c033e47cd17 100644
--- a/clang/test/Sema/builtins.c
+++ b/clang/test/Sema/builtins.c
@@ -281,6 +281,42 @@ void test21(const int *ptr) {
   __atomic_fetch_add(ptr, 1, 0);  // expected-error {{address argument to atomic operation must be a pointer to non-const type ('const int *' invalid)}}
 }
 
+void test_ei_i42i(_ExtInt(42) *ptr, int value) {
+  __sync_fetch_and_add(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}}
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}}
+}
+
+void test_ei_i64i(_ExtInt(64) *ptr, int value) {
+  __sync_fetch_and_add(ptr, value); // expect success
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expect success
+}
+
+void test_ei_ii42(int *ptr, _ExtInt(42) value) {
+  __sync_fetch_and_add(ptr, value); // expect success
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expect success
+}
+
+void test_ei_ii64(int *ptr, _ExtInt(64) value) {
+  __sync_fetch_and_add(ptr, value); // expect success
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expect success
+}
+
+void test_ei_i42i42(_ExtInt(42) *ptr, _ExtInt(42) value) {
+  __sync_fetch_and_add(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}}
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expected-error {{Atomic memory operand must have a power-of-two size}}
+}
+
+void test_ei_i64i64(_ExtInt(64) *ptr, _ExtInt(64) value) {
+  __sync_fetch_and_add(ptr, value); // expect success
+  // expected-warning@+1 {{the semantics of this intrinsic changed with GCC version 4.4 - the newer semantics are provided here}}
+  __sync_nand_and_fetch(ptr, value); // expect success
+}
+
 void test22(void) {
   (void)__builtin_signbit(); // expected-error{{too few arguments to function call, expected 1, have 0}}
   (void)__builtin_signbit(1.0, 2.0, 3.0); // expected-error{{too many arguments to function call, expected 1, have 3}}

From 8a24208977c27d6ae891833b027471e187fe53a5 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Tue, 14 Jul 2020 14:18:15 +0100
Subject: [PATCH 247/771] [AMDGPU] Simplify AMDGPUSubtarget::getWavesPerEU.
 NFC.

---
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
index 2849645863a57..213788ae0f67b 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp
@@ -411,11 +411,8 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
   unsigned MinImpliedByFlatWorkGroupSize =
     getWavesPerEUForWorkGroup(FlatWorkGroupSizes.second);
   Default.first = MinImpliedByFlatWorkGroupSize;
-  bool RequestedFlatWorkGroupSize = false;
-
-  if (F.hasFnAttribute("amdgpu-flat-work-group-size")) {
-    RequestedFlatWorkGroupSize = true;
-  }
+  bool RequestedFlatWorkGroupSize =
+      F.hasFnAttribute("amdgpu-flat-work-group-size");
 
   // Requested minimum/maximum number of waves per execution unit.
   std::pair<unsigned, unsigned> Requested = AMDGPU::getIntegerPairAttribute(
@@ -427,9 +424,7 @@ std::pair<unsigned, unsigned> AMDGPUSubtarget::getWavesPerEU(
 
   // Make sure requested values do not violate subtarget's specifications.
   if (Requested.first < getMinWavesPerEU() ||
-      Requested.first > getMaxWavesPerEU())
-    return Default;
-  if (Requested.second > getMaxWavesPerEU())
+      Requested.second > getMaxWavesPerEU())
     return Default;
 
   // Make sure requested values are compatible with values implied by requested

From 8eb8c92eb46908ee9c64dfc4a2f49501b085f682 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 16 Jun 2020 21:21:32 +0200
Subject: [PATCH 248/771] [clangd] Add library to semantically strip flags by
 name.

Summary:
This is designed for tweaking compile commands by specifying flags to add/remove
in a config file. Something like:
  CompileFlags: { Remove: -fcolor-diagnostics }

Having users tweak raw argv (e.g. with a regex) is going to end in tears: bugs
around clang-cl, xclang, aliases, joined-vs-separate args etc are inevitable.

This isn't in tooling because of the performance choices: build a big table
up-front to make subsequent actions fast. Maybe it should be though.

Reviewers: adamcz, hokein

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D81958
---
 clang-tools-extra/clangd/CompileCommands.cpp  | 268 ++++++++++++++++++
 clang-tools-extra/clangd/CompileCommands.h    |  39 +++
 .../clangd/unittests/CompileCommandsTests.cpp | 160 +++++++++++
 3 files changed, 467 insertions(+)

diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp
index 4b69555769423..473122157cac6 100644
--- a/clang-tools-extra/clangd/CompileCommands.cpp
+++ b/clang-tools-extra/clangd/CompileCommands.cpp
@@ -9,8 +9,12 @@
 #include "CompileCommands.h"
 #include "Config.h"
 #include "support/Logger.h"
+#include "clang/Driver/Options.h"
 #include "clang/Frontend/CompilerInvocation.h"
 #include "clang/Tooling/ArgumentsAdjusters.h"
+#include "llvm/Option/Option.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Support/Debug.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/FileUtilities.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -234,5 +238,269 @@ CommandMangler::operator clang::tooling::ArgumentsAdjuster() && {
   };
 }
 
+// ArgStripper implementation
+namespace {
+
+// Determine total number of args consumed by this option.
+// Return answers for {Exact, Prefix} match. 0 means not allowed.
+std::pair<unsigned, unsigned> getArgCount(const llvm::opt::Option &Opt) {
+  constexpr static unsigned Rest = 10000; // Should be all the rest!
+  // Reference is llvm::opt::Option::acceptInternal()
+  using llvm::opt::Option;
+  switch (Opt.getKind()) {
+  case Option::FlagClass:
+    return {1, 0};
+  case Option::JoinedClass:
+  case Option::CommaJoinedClass:
+    return {1, 1};
+  case Option::GroupClass:
+  case Option::InputClass:
+  case Option::UnknownClass:
+  case Option::ValuesClass:
+    return {1, 0};
+  case Option::JoinedAndSeparateClass:
+    return {2, 2};
+  case Option::SeparateClass:
+    return {2, 0};
+  case Option::MultiArgClass:
+    return {1 + Opt.getNumArgs(), 0};
+  case Option::JoinedOrSeparateClass:
+    return {2, 1};
+  case Option::RemainingArgsClass:
+    return {Rest, 0};
+  case Option::RemainingArgsJoinedClass:
+    return {Rest, Rest};
+  }
+}
+
+// Flag-parsing mode, which affects which flags are available.
+enum DriverMode : unsigned char {
+  DM_None = 0,
+  DM_GCC = 1, // Default mode e.g. when invoked as 'clang'
+  DM_CL = 2,  // MS CL.exe compatible mode e.g. when invoked as 'clang-cl'
+  DM_CC1 = 4, // When invoked as 'clang -cc1' or after '-Xclang'
+  DM_All = 7
+};
+
+// Examine args list to determine if we're in GCC, CL-compatible, or cc1 mode.
+DriverMode getDriverMode(const std::vector<std::string> &Args) {
+  DriverMode Mode = DM_GCC;
+  llvm::StringRef Argv0 = Args.front();
+  if (Argv0.endswith_lower(".exe"))
+    Argv0 = Argv0.drop_back(strlen(".exe"));
+  if (Argv0.endswith_lower("cl"))
+    Mode = DM_CL;
+  for (const llvm::StringRef Arg : Args) {
+    if (Arg == "--driver-mode=cl") {
+      Mode = DM_CL;
+      break;
+    }
+    if (Arg == "-cc1") {
+      Mode = DM_CC1;
+      break;
+    }
+  }
+  return Mode;
+}
+
+// Returns the set of DriverModes where an option may be used.
+unsigned char getModes(const llvm::opt::Option &Opt) {
+  // Why is this so complicated?!
+  // Reference is clang::driver::Driver::getIncludeExcludeOptionFlagMasks()
+  unsigned char Result = DM_None;
+  if (Opt.hasFlag(driver::options::CC1Option))
+    Result |= DM_CC1;
+  if (!Opt.hasFlag(driver::options::NoDriverOption)) {
+    if (Opt.hasFlag(driver::options::CLOption)) {
+      Result |= DM_CL;
+    } else {
+      Result |= DM_GCC;
+      if (Opt.hasFlag(driver::options::CoreOption)) {
+        Result |= DM_CL;
+      }
+    }
+  }
+  return Result;
+};
+
+} // namespace
+
+llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
+  // All the hard work is done once in a static initializer.
+  // We compute a table containing strings to look for and #args to skip.
+  // e.g. "-x" => {-x 2 args, -x* 1 arg, --language 2 args, --language=* 1 arg}
+  using TableTy =
+      llvm::StringMap<llvm::SmallVector<Rule, 4>, llvm::BumpPtrAllocator>;
+  static TableTy *Table = [] {
+    auto &DriverTable = driver::getDriverOptTable();
+    using DriverID = clang::driver::options::ID;
+
+    // Collect sets of aliases, so we can treat -foo and -foo= as synonyms.
+    // Conceptually a double-linked list: PrevAlias[I] -> I -> NextAlias[I].
+    // If PrevAlias[I] is INVALID, then I is canonical.
+    DriverID PrevAlias[DriverID::LastOption] = {DriverID::OPT_INVALID};
+    DriverID NextAlias[DriverID::LastOption] = {DriverID::OPT_INVALID};
+    auto AddAlias = [&](DriverID Self, DriverID T) {
+      if (NextAlias[T]) {
+        PrevAlias[NextAlias[T]] = Self;
+        NextAlias[Self] = NextAlias[T];
+      }
+      PrevAlias[Self] = T;
+      NextAlias[T] = Self;
+    };
+    // Also grab prefixes for each option, these are not fully exposed.
+    const char *const *Prefixes[DriverID::LastOption] = {nullptr};
+#define PREFIX(NAME, VALUE) static const char *const NAME[] = VALUE;
+#define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
+               HELP, METAVAR, VALUES)                                          \
+  if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && ALIASARGS == nullptr)  \
+    AddAlias(DriverID::OPT_##ID, DriverID::OPT_##ALIAS);                       \
+  Prefixes[DriverID::OPT_##ID] = PREFIX;
+#include "clang/Driver/Options.inc"
+#undef OPTION
+#undef PREFIX
+
+    auto Result = std::make_unique<TableTy>();
+    // Iterate over distinct options (represented by the canonical alias).
+    // Every spelling of this option will get the same set of rules.
+    for (unsigned ID = 1 /*Skip INVALID */; ID < DriverID::LastOption; ++ID) {
+      if (PrevAlias[ID] || ID == DriverID::OPT_Xclang)
+        continue; // Not canonical, or specially handled.
+      llvm::SmallVector<Rule, 8> Rules;
+      // Iterate over each alias, to add rules for parsing it.
+      for (unsigned A = ID; A != DriverID::OPT_INVALID; A = NextAlias[A]) {
+        if (Prefixes[A] == nullptr) // option groups.
+          continue;
+        auto Opt = DriverTable.getOption(A);
+        // Exclude - and -foo pseudo-options.
+        if (Opt.getName().empty())
+          continue;
+        auto Modes = getModes(Opt);
+        std::pair<unsigned, unsigned> ArgCount = getArgCount(Opt);
+        // Iterate over each spelling of the alias, e.g. -foo vs --foo.
+        for (auto *Prefix = Prefixes[A]; *Prefix != nullptr; ++Prefix) {
+          llvm::SmallString<64> Buf(*Prefix);
+          Buf.append(Opt.getName());
+          llvm::StringRef Spelling = Result->try_emplace(Buf).first->getKey();
+          Rules.emplace_back();
+          Rule &R = Rules.back();
+          R.Text = Spelling;
+          R.Modes = Modes;
+          R.ExactArgs = ArgCount.first;
+          R.PrefixArgs = ArgCount.second;
+          // Concrete priority is the index into the option table.
+          // Effectively, earlier entries take priority over later ones.
+          assert(ID < std::numeric_limits<decltype(R.Priority)>::max() &&
+                 "Rules::Priority overflowed by options table");
+          R.Priority = ID;
+        }
+      }
+      // Register the set of rules under each possible name.
+      for (const auto &R : Rules)
+        Result->find(R.Text)->second.append(Rules.begin(), Rules.end());
+    }
+#ifndef NDEBUG
+    // Dump the table and various measures of its size.
+    unsigned RuleCount = 0;
+    dlog("ArgStripper Option spelling table");
+    for (const auto &Entry : *Result) {
+      dlog("{0}", Entry.first());
+      RuleCount += Entry.second.size();
+      for (const auto &R : Entry.second)
+        dlog("  {0} #={1} *={2} Mode={3}", R.Text, R.ExactArgs, R.PrefixArgs,
+             int(R.Modes));
+    }
+    dlog("Table spellings={0} rules={1} string-bytes={2}", Result->size(),
+         RuleCount, Result->getAllocator().getBytesAllocated());
+#endif
+    // The static table will never be destroyed.
+    return Result.release();
+  }();
+
+  auto It = Table->find(Arg);
+  return (It == Table->end()) ? llvm::ArrayRef<Rule>() : It->second;
+}
+
+void ArgStripper::strip(llvm::StringRef Arg) {
+  auto OptionRules = rulesFor(Arg);
+  if (OptionRules.empty()) {
+    // Not a recognized flag. Strip it literally.
+    Storage.emplace_back(Arg);
+    Rules.emplace_back();
+    Rules.back().Text = Storage.back();
+    Rules.back().ExactArgs = 1;
+    if (Rules.back().Text.consume_back("*"))
+      Rules.back().PrefixArgs = 1;
+    Rules.back().Modes = DM_All;
+    Rules.back().Priority = -1; // Max unsigned = lowest priority.
+  } else {
+    Rules.append(OptionRules.begin(), OptionRules.end());
+  }
+}
+
+const ArgStripper::Rule *ArgStripper::matchingRule(llvm::StringRef Arg,
+                                                   unsigned Mode,
+                                                   unsigned &ArgCount) const {
+  const ArgStripper::Rule *BestRule = nullptr;
+  for (const Rule &R : Rules) {
+    // Rule can fail to match if...
+    if (!(R.Modes & Mode))
+      continue; // not applicable to current driver mode
+    if (BestRule && BestRule->Priority < R.Priority)
+      continue; // lower-priority than best candidate.
+    if (!Arg.startswith(R.Text))
+      continue; // current arg doesn't match the prefix string
+    bool PrefixMatch = Arg.size() > R.Text.size();
+    // Can rule apply as an exact/prefix match?
+    if (unsigned Count = PrefixMatch ? R.PrefixArgs : R.ExactArgs) {
+      BestRule = &R;
+      ArgCount = Count;
+    }
+    // Continue in case we find a higher-priority rule.
+  }
+  return BestRule;
+}
+
+void ArgStripper::process(std::vector<std::string> &Args) const {
+  if (Args.empty())
+    return;
+
+  // We're parsing the args list in some mode (e.g. gcc-compatible) but may
+  // temporarily switch to another mode with the -Xclang flag.
+  DriverMode MainMode = getDriverMode(Args);
+  DriverMode CurrentMode = MainMode;
+
+  // Read and write heads for in-place deletion.
+  unsigned Read = 0, Write = 0;
+  bool WasXclang = false;
+  while (Read < Args.size()) {
+    unsigned ArgCount = 0;
+    if (const Rule *R = matchingRule(Args[Read], CurrentMode, ArgCount)) {
+      // Delete it and its args.
+      if (WasXclang) {
+        assert(Write > 0);
+        --Write; // Drop previous -Xclang arg
+        CurrentMode = MainMode;
+        WasXclang = false;
+      }
+      // Advance to last arg. An arg may be foo or -Xclang foo.
+      for (unsigned I = 1; Read < Args.size() && I < ArgCount; ++I) {
+        ++Read;
+        if (Read < Args.size() && Args[Read] == "-Xclang")
+          ++Read;
+      }
+    } else {
+      // No match, just copy the arg through.
+      WasXclang = Args[Read] == "-Xclang";
+      CurrentMode = WasXclang ? DM_CC1 : MainMode;
+      if (Write != Read)
+        Args[Write] = std::move(Args[Read]);
+      ++Write;
+    }
+    ++Read;
+  }
+  Args.resize(Write);
+}
+
 } // namespace clangd
 } // namespace clang
diff --git a/clang-tools-extra/clangd/CompileCommands.h b/clang-tools-extra/clangd/CompileCommands.h
index 51a5574d13d3a..c9f2d668c3654 100644
--- a/clang-tools-extra/clangd/CompileCommands.h
+++ b/clang-tools-extra/clangd/CompileCommands.h
@@ -50,6 +50,45 @@ struct CommandMangler {
   Memoize<llvm::StringMap<std::string>> ResolvedDriversNoFollow;
 };
 
+// Removes args from a command-line in a semantically-aware way.
+//
+// Internally this builds a large (0.5MB) table of clang options on first use.
+// Both strip() and process() are fairly cheap after that.
+//
+// FIXME: this reimplements much of OptTable, it might be nice to expose more.
+// The table-building strategy may not make sense outside clangd.
+class ArgStripper {
+public:
+  // Adds the arg to the set which should be removed.
+  //
+  // Recognized clang flags are stripped semantically. When "-I" is stripped:
+  //  - so is its value (either as -Ifoo or -I foo)
+  //  - aliases like --include-directory=foo are also stripped
+  //  - CL-style /Ifoo will be removed if the args indicate MS-compatible mode
+  // Compile args not recognized as flags are removed literally, except:
+  //  - strip("ABC*") will remove any arg with an ABC prefix.
+  //
+  // In either case, the -Xclang prefix will be dropped if present.
+  void strip(llvm::StringRef Arg);
+  // Remove the targets from a compile command, in-place.
+  void process(std::vector<std::string> &Args) const;
+
+private:
+  // Deletion rules, to be checked for each arg.
+  struct Rule {
+    llvm::StringRef Text;    // Rule applies only if arg begins with Text.
+    unsigned char Modes = 0; // Rule applies only in specified driver modes.
+    uint16_t Priority = 0;   // Lower is better.
+    uint16_t ExactArgs = 0;  // Num args consumed when Arg == Text.
+    uint16_t PrefixArgs = 0; // Num args consumed when Arg starts with Text.
+  };
+  static llvm::ArrayRef<Rule> rulesFor(llvm::StringRef Arg);
+  const Rule *matchingRule(llvm::StringRef Arg, unsigned Mode,
+                           unsigned &ArgCount) const;
+  llvm::SmallVector<Rule, 4> Rules;
+  std::vector<std::string> Storage; // Store strings not found in option table.
+};
+
 } // namespace clangd
 } // namespace clang
 
diff --git a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp
index d86296b84e3f9..1acbcd94ac84e 100644
--- a/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp
+++ b/clang-tools-extra/clangd/unittests/CompileCommandsTests.cpp
@@ -207,6 +207,166 @@ TEST(CommandMangler, ConfigEdits) {
   EXPECT_THAT(Cmd, ElementsAre(_, "FOO.CC", "--hello", "-fsyntax-only"));
 }
 
+static std::string strip(llvm::StringRef Arg, llvm::StringRef Argv) {
+  llvm::SmallVector<llvm::StringRef, 8> Parts;
+  llvm::SplitString(Argv, Parts);
+  std::vector<std::string> Args = {Parts.begin(), Parts.end()};
+  ArgStripper S;
+  S.strip(Arg);
+  S.process(Args);
+  return llvm::join(Args, " ");
+}
+
+TEST(ArgStripperTest, Spellings) {
+  // May use alternate prefixes.
+  EXPECT_EQ(strip("-pedantic", "clang -pedantic foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-pedantic", "clang --pedantic foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("--pedantic", "clang -pedantic foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("--pedantic", "clang --pedantic foo.cc"), "clang foo.cc");
+  // May use alternate names.
+  EXPECT_EQ(strip("-x", "clang -x c++ foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-x", "clang --language=c++ foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("--language=", "clang -x c++ foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("--language=", "clang --language=c++ foo.cc"),
+            "clang foo.cc");
+}
+
+TEST(ArgStripperTest, UnknownFlag) {
+  EXPECT_EQ(strip("-xyzzy", "clang -xyzzy foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-xyz*", "clang -xyzzy foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-xyzzy", "clang -Xclang -xyzzy foo.cc"), "clang foo.cc");
+}
+
+TEST(ArgStripperTest, Xclang) {
+  // Flags may be -Xclang escaped.
+  EXPECT_EQ(strip("-ast-dump", "clang -Xclang -ast-dump foo.cc"),
+            "clang foo.cc");
+  // Args may be -Xclang escaped.
+  EXPECT_EQ(strip("-add-plugin", "clang -Xclang -add-plugin -Xclang z foo.cc"),
+            "clang foo.cc");
+}
+
+TEST(ArgStripperTest, ClangCL) {
+  // /I is a synonym for -I in clang-cl mode only.
+  // Not stripped by default.
+  EXPECT_EQ(strip("-I", "clang -I /usr/inc /Interesting/file.cc"),
+            "clang /Interesting/file.cc");
+  // Stripped when invoked as clang-cl.
+  EXPECT_EQ(strip("-I", "clang-cl -I /usr/inc /Interesting/file.cc"),
+            "clang-cl");
+  // Stripped when invoked as CL.EXE
+  EXPECT_EQ(strip("-I", "CL.EXE -I /usr/inc /Interesting/file.cc"), "CL.EXE");
+  // Stripped when passed --driver-mode=cl.
+  EXPECT_EQ(strip("-I", "cc -I /usr/inc /Interesting/file.cc --driver-mode=cl"),
+            "cc --driver-mode=cl");
+}
+
+TEST(ArgStripperTest, ArgStyles) {
+  // Flag
+  EXPECT_EQ(strip("-Qn", "clang -Qn foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-Qn", "clang -QnZ foo.cc"), "clang -QnZ foo.cc");
+  // Joined
+  EXPECT_EQ(strip("-std=", "clang -std= foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-std=", "clang -std=c++11 foo.cc"), "clang foo.cc");
+  // Separate
+  EXPECT_EQ(strip("-mllvm", "clang -mllvm X foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-mllvm", "clang -mllvmX foo.cc"), "clang -mllvmX foo.cc");
+  // RemainingArgsJoined
+  EXPECT_EQ(strip("/link", "clang-cl /link b c d foo.cc"), "clang-cl");
+  EXPECT_EQ(strip("/link", "clang-cl /linka b c d foo.cc"), "clang-cl");
+  // CommaJoined
+  EXPECT_EQ(strip("-Wl,", "clang -Wl,x,y foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-Wl,", "clang -Wl, foo.cc"), "clang foo.cc");
+  // MultiArg
+  EXPECT_EQ(strip("-segaddr", "clang -segaddr a b foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-segaddr", "clang -segaddra b foo.cc"),
+            "clang -segaddra b foo.cc");
+  // JoinedOrSeparate
+  EXPECT_EQ(strip("-G", "clang -GX foo.cc"), "clang foo.cc");
+  EXPECT_EQ(strip("-G", "clang -G X foo.cc"), "clang foo.cc");
+  // JoinedAndSeparate
+  EXPECT_EQ(strip("-plugin-arg-", "clang -cc1 -plugin-arg-X Y foo.cc"),
+            "clang -cc1 foo.cc");
+  EXPECT_EQ(strip("-plugin-arg-", "clang -cc1 -plugin-arg- Y foo.cc"),
+            "clang -cc1 foo.cc");
+}
+
+TEST(ArgStripperTest, EndOfList) {
+  // When we hit the end-of-args prematurely, we don't crash.
+  // We consume the incomplete args if we've matched the target option.
+  EXPECT_EQ(strip("-I", "clang -Xclang"), "clang -Xclang");
+  EXPECT_EQ(strip("-I", "clang -Xclang -I"), "clang");
+  EXPECT_EQ(strip("-I", "clang -I -Xclang"), "clang");
+  EXPECT_EQ(strip("-I", "clang -I"), "clang");
+}
+
+TEST(ArgStripperTest, Multiple) {
+  ArgStripper S;
+  S.strip("-o");
+  S.strip("-c");
+  std::vector<std::string> Args = {"clang", "-o", "foo.o", "foo.cc", "-c"};
+  S.process(Args);
+  EXPECT_THAT(Args, ElementsAre("clang", "foo.cc"));
+}
+
+TEST(ArgStripperTest, Warning) {
+  {
+    // -W is a flag name
+    ArgStripper S;
+    S.strip("-W");
+    std::vector<std::string> Args = {"clang", "-Wfoo", "-Wno-bar", "-Werror",
+                                     "foo.cc"};
+    S.process(Args);
+    EXPECT_THAT(Args, ElementsAre("clang", "foo.cc"));
+  }
+  {
+    // -Wfoo is not a flag name, matched literally.
+    ArgStripper S;
+    S.strip("-Wunused");
+    std::vector<std::string> Args = {"clang", "-Wunused", "-Wno-unused",
+                                     "foo.cc"};
+    S.process(Args);
+    EXPECT_THAT(Args, ElementsAre("clang", "-Wno-unused", "foo.cc"));
+  }
+}
+
+TEST(ArgStripperTest, Define) {
+  {
+    // -D is a flag name
+    ArgStripper S;
+    S.strip("-D");
+    std::vector<std::string> Args = {"clang", "-Dfoo", "-Dbar=baz", "foo.cc"};
+    S.process(Args);
+    EXPECT_THAT(Args, ElementsAre("clang", "foo.cc"));
+  }
+  {
+    // -Dbar is not: matched literally
+    ArgStripper S;
+    S.strip("-Dbar");
+    std::vector<std::string> Args = {"clang", "-Dfoo", "-Dbar=baz", "foo.cc"};
+    S.process(Args);
+    EXPECT_THAT(Args, ElementsAre("clang", "-Dfoo", "-Dbar=baz", "foo.cc"));
+    S.strip("-Dfoo");
+    S.process(Args);
+    EXPECT_THAT(Args, ElementsAre("clang", "-Dbar=baz", "foo.cc"));
+    S.strip("-Dbar=*");
+    S.process(Args);
+    EXPECT_THAT(Args, ElementsAre("clang", "foo.cc"));
+  }
+}
+
+TEST(ArgStripperTest, OrderDependent) {
+  ArgStripper S;
+  // If -include is stripped first, we see -pch as its arg and foo.pch remains.
+  // To get this case right, we must process -include-pch first.
+  S.strip("-include");
+  S.strip("-include-pch");
+  std::vector<std::string> Args = {"clang", "-include-pch", "foo.pch",
+                                   "foo.cc"};
+  S.process(Args);
+  EXPECT_THAT(Args, ElementsAre("clang", "foo.cc"));
+}
+
 } // namespace
 } // namespace clangd
 } // namespace clang

From 8978032a17cd0f1c3925ecb1752fdf59de7f7967 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Tue, 14 Jul 2020 15:30:21 +0200
Subject: [PATCH 249/771] Fix test for the hasExternalFormalLinkage matcher

Summary:
Names of local variables have no linkage (see C++20 [basic.link] p8).

Names of variables in unnamed namespace have internal linkage (see C++20
[basic.link] p4).

Reviewers: aaron.ballman, rsmith, ymandel

Reviewed By: ymandel

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83700
---
 .../ASTMatchers/ASTMatchersNarrowingTest.cpp  | 21 ++++++++-----------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
index 687908043a8d3..c249410201ba9 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
@@ -2534,19 +2534,16 @@ TEST(NullPointerConstants, Basic) {
 }
 
 TEST(HasExternalFormalLinkage, Basic) {
-  EXPECT_TRUE(matches("int a = 0;", namedDecl(hasExternalFormalLinkage())));
-  EXPECT_TRUE(
-      notMatches("static int a = 0;", namedDecl(hasExternalFormalLinkage())));
+  EXPECT_TRUE(matches("int a = 0;",
+                      namedDecl(hasName("a"), hasExternalFormalLinkage())));
+  EXPECT_TRUE(notMatches("static int a = 0;",
+                         namedDecl(hasName("a"), hasExternalFormalLinkage())));
   EXPECT_TRUE(notMatches("static void f(void) { int a = 0; }",
-                         namedDecl(hasExternalFormalLinkage())));
-  EXPECT_TRUE(matches("void f(void) { int a = 0; }",
-                      namedDecl(hasExternalFormalLinkage())));
-
-  // Despite having internal semantic linkage, the anonymous namespace member
-  // has external linkage because the member has a unique name in all
-  // translation units.
-  EXPECT_TRUE(matches("namespace { int a = 0; }",
-                      namedDecl(hasExternalFormalLinkage())));
+                         namedDecl(hasName("a"), hasExternalFormalLinkage())));
+  EXPECT_TRUE(notMatches("void f(void) { int a = 0; }",
+                         namedDecl(hasName("a"), hasExternalFormalLinkage())));
+  EXPECT_TRUE(notMatches("namespace { int a = 0; }",
+                         namedDecl(hasName("a"), hasExternalFormalLinkage())));
 }
 
 TEST(HasDefaultArgument, Basic) {

From 3ae43a580eeacede5b9be715d2539e87030fe1ca Mon Sep 17 00:00:00 2001
From: Jacques Pienaar <jpienaar@google.com>
Date: Tue, 14 Jul 2020 06:52:32 -0700
Subject: [PATCH 250/771] [ods] Enable getting forward decls allow

Summary: Currently forward decls are included with all the op classes. But there are cases (say when splitting up headers) where one wants the forward decls but not all the classes. Add an option to enable this. This does not change any current behavior (some further refactoring is probably due here).

Differential Revision: https://reviews.llvm.org/D83727
---
 mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 5e009e6025243..faeb21265142d 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -2129,15 +2129,19 @@ void OpOperandAdaptorEmitter::emitDef(const Operator &op, raw_ostream &os) {
 // Emits the opcode enum and op classes.
 static void emitOpClasses(const std::vector<Record *> &defs, raw_ostream &os,
                           bool emitDecl) {
-  IfDefScope scope("GET_OP_CLASSES", os);
   // First emit forward declaration for each class, this allows them to refer
   // to each others in traits for example.
   if (emitDecl) {
+    os << "#if defined(GET_OP_CLASSES) || defined(GET_OP_FWD_DEFINES)\n";
+    os << "#undef GET_OP_FWD_DEFINES\n";
     for (auto *def : defs) {
       Operator op(*def);
       os << "class " << op.getCppClassName() << ";\n";
     }
+    os << "#endif\n\n";
   }
+
+  IfDefScope scope("GET_OP_CLASSES", os);
   for (auto *def : defs) {
     Operator op(*def);
     if (emitDecl) {

From efa40eb194916e1faa32748f097f0cce60dd7d9b Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Fri, 26 Jun 2020 16:05:55 -0400
Subject: [PATCH 251/771] [libc++] Use a proper CMake target to represent
 libc++ headers

Instead of having complex logic around how to include the libc++ headers
and __config_site, handle that by defining cxx-headers as an INTERFACE
library and linking against it. After this patch, linking against cxx-headers
is sufficient to get the right __config_site include and include paths
for libc++.

Differential Revision: https://reviews.llvm.org/D82702
---
 libcxx/CMakeLists.txt                         | 13 +---------
 libcxx/cmake/Modules/DefineLinkerScript.cmake |  3 +++
 libcxx/include/CMakeLists.txt                 | 25 ++++++++++++++-----
 libcxx/src/CMakeLists.txt                     |  8 +++---
 4 files changed, 28 insertions(+), 21 deletions(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 310ca5f56c461..88dc4553069e0 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -429,6 +429,7 @@ elseif(LLVM_LIBRARY_OUTPUT_INTDIR)
   set(LIBCXX_INSTALL_LIBRARY_DIR lib${LIBCXX_LIBDIR_SUFFIX})
 else()
   set(LIBCXX_LIBRARY_DIR ${CMAKE_BINARY_DIR}/lib${LIBCXX_LIBDIR_SUFFIX})
+  set(LIBCXX_HEADER_DIR  ${CMAKE_BINARY_DIR})
   set(LIBCXX_INSTALL_LIBRARY_DIR lib${LIBCXX_LIBDIR_SUFFIX})
 endif()
 
@@ -874,22 +875,11 @@ if (DEFINED WIN32 AND LIBCXX_ENABLE_STATIC AND NOT LIBCXX_ENABLE_SHARED)
   config_define(ON _LIBCPP_DISABLE_VISIBILITY_ANNOTATIONS)
 endif()
 
-# We generate a __config_site header (see libcxx/include/CMakeLists.txt) and
-# we make sure to include it when building the library.
-function(cxx_add_config_site target)
-  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC")
-    target_compile_options(${target} PUBLIC /FI "${LIBCXX_BINARY_DIR}/__config_site")
-  else()
-    target_compile_options(${target} PUBLIC -include "${LIBCXX_BINARY_DIR}/__config_site")
-  endif()
-endfunction()
-
 # Setup all common build flags =================================================
 function(cxx_add_common_build_flags target)
   cxx_add_basic_build_flags(${target})
   cxx_add_warning_flags(${target})
   cxx_add_windows_flags(${target})
-  cxx_add_config_site(${target})
   cxx_add_exception_flags(${target})
   cxx_add_rtti_flags(${target})
   cxx_add_module_flags(${target})
@@ -899,7 +889,6 @@ endfunction()
 #===============================================================================
 # Setup Source Code And Tests
 #===============================================================================
-include_directories(include)
 add_subdirectory(include)
 add_subdirectory(src)
 
diff --git a/libcxx/cmake/Modules/DefineLinkerScript.cmake b/libcxx/cmake/Modules/DefineLinkerScript.cmake
index 2e68121f6187e..11a6ca57dfc0d 100644
--- a/libcxx/cmake/Modules/DefineLinkerScript.cmake
+++ b/libcxx/cmake/Modules/DefineLinkerScript.cmake
@@ -31,6 +31,9 @@ function(define_linker_script target)
   set(link_libraries)
   if (interface_libs)
     foreach(lib IN LISTS interface_libs)
+      if ("${lib}" STREQUAL "cxx-headers")
+        continue()
+      endif()
       if (TARGET "${lib}" OR
           (${lib} MATCHES "cxxabi(_static|_shared)?" AND HAVE_LIBCXXABI) OR
           (${lib} MATCHES "unwind(_static|_shared)?" AND HAVE_LIBUNWIND))
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
index da6623f103b67..be8141c981667 100644
--- a/libcxx/include/CMakeLists.txt
+++ b/libcxx/include/CMakeLists.txt
@@ -201,7 +201,6 @@ add_custom_command(OUTPUT ${LIBCXX_BINARY_DIR}/__generated_config
 # Add a target that executes the generation commands.
 add_custom_target(cxx-generated-config ALL
   DEPENDS ${LIBCXX_BINARY_DIR}/__generated_config)
-set(generated_config_deps cxx-generated-config)
 
 # In some build configurations (like bootstrapping clang), we need to be able to
 # install the libcxx headers before the CMake configuration for libcxx runs. Making
@@ -229,16 +228,30 @@ if(LIBCXX_HEADER_DIR)
   set(src ${LIBCXX_BINARY_DIR}/__generated_config)
   set(dst ${output_dir}/__config)
   add_custom_command(OUTPUT ${dst}
-      DEPENDS ${src} ${generated_config_deps}
+      DEPENDS ${src} cxx-generated-config
       COMMAND ${CMAKE_COMMAND} -E copy_if_different ${src} ${dst}
       COMMENT "Copying CXX __config")
   list(APPEND out_files ${dst})
+  add_custom_target(generate-cxx-headers DEPENDS ${out_files})
 
-  add_custom_target(${CXX_HEADER_TARGET} ALL DEPENDS ${out_files} ${LIBCXX_CXX_ABI_HEADER_TARGET})
+  add_library(${CXX_HEADER_TARGET} INTERFACE)
+  add_dependencies(${CXX_HEADER_TARGET} generate-cxx-headers ${LIBCXX_CXX_ABI_HEADER_TARGET})
+  # TODO: Use target_include_directories once we figure out why that breaks the runtimes build
+  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC")
+    target_compile_options(${CXX_HEADER_TARGET} INTERFACE /I "${output_dir}")
+  else()
+    target_compile_options(${CXX_HEADER_TARGET} INTERFACE -I "${output_dir}")
+  endif()
+
+  # Make sure the generated __config_site header is included when we build the library.
+  if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" OR "${CMAKE_CXX_SIMULATE_ID}" STREQUAL "MSVC")
+    target_compile_options(${CXX_HEADER_TARGET} INTERFACE /FI "${LIBCXX_BINARY_DIR}/__config_site")
+  else()
+    target_compile_options(${CXX_HEADER_TARGET} INTERFACE -include "${LIBCXX_BINARY_DIR}/__config_site")
+  endif()
 else()
-  add_custom_target(${CXX_HEADER_TARGET})
+  add_library(${CXX_HEADER_TARGET} INTERFACE)
 endif()
-set_target_properties(${CXX_HEADER_TARGET} PROPERTIES FOLDER "Misc")
 
 if (LIBCXX_INSTALL_HEADERS)
   foreach(file ${files})
@@ -259,7 +272,7 @@ if (LIBCXX_INSTALL_HEADERS)
 
   if (NOT CMAKE_CONFIGURATION_TYPES)
     add_custom_target(install-${CXX_HEADER_TARGET}
-                      DEPENDS ${CXX_HEADER_TARGET} ${generated_config_deps}
+                      DEPENDS ${CXX_HEADER_TARGET} cxx-generated-config
                       COMMAND "${CMAKE_COMMAND}"
                               -DCMAKE_INSTALL_COMPONENT=${CXX_HEADER_TARGET}
                               -P "${CMAKE_BINARY_DIR}/cmake_install.cmake")
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
index 9c2db48b66b7f..2001c09761d96 100644
--- a/libcxx/src/CMakeLists.txt
+++ b/libcxx/src/CMakeLists.txt
@@ -163,7 +163,8 @@ if (LIBCXX_ENABLE_SHARED)
   if(COMMAND llvm_setup_rpath)
     llvm_setup_rpath(cxx_shared)
   endif()
-  target_link_libraries(cxx_shared PRIVATE ${LIBCXX_LIBRARIES})
+  target_link_libraries(cxx_shared PUBLIC cxx-headers
+                                   PRIVATE ${LIBCXX_LIBRARIES})
   set_target_properties(cxx_shared
     PROPERTIES
       COMPILE_FLAGS "${LIBCXX_COMPILE_FLAGS}"
@@ -244,7 +245,8 @@ endif()
 # Build the static library.
 if (LIBCXX_ENABLE_STATIC)
   add_library(cxx_static STATIC ${exclude_from_all} ${LIBCXX_SOURCES} ${LIBCXX_HEADERS})
-  target_link_libraries(cxx_static PRIVATE ${LIBCXX_LIBRARIES})
+  target_link_libraries(cxx_static PUBLIC cxx-headers
+                                   PRIVATE ${LIBCXX_LIBRARIES})
   set(CMAKE_STATIC_LIBRARY_PREFIX "lib")
   set_target_properties(cxx_static
     PROPERTIES
@@ -298,7 +300,7 @@ if (LIBCXX_ENABLE_STATIC)
 endif()
 
 # Add a meta-target for both libraries.
-add_custom_target(cxx DEPENDS cxx-headers ${LIBCXX_BUILD_TARGETS})
+add_custom_target(cxx DEPENDS ${LIBCXX_BUILD_TARGETS})
 
 if (LIBCXX_ENABLE_EXPERIMENTAL_LIBRARY)
   set(LIBCXX_EXPERIMENTAL_SOURCES

From 86d362f48928b1b853f481e1a39fe71c5570a481 Mon Sep 17 00:00:00 2001
From: Simon Tatham <simon.tatham@arm.com>
Date: Tue, 14 Jul 2020 14:54:05 +0100
Subject: [PATCH 252/771] [utils] New script `check_ninja_deps.py`

Summary:
This can be run after a ninja-based build, and analyzes the ninja
build files and dependency database to spot any missing dependencies
in the build scripts.

I wrote it in the course of investigating D82659, and it seems likely
to be useful again.

Reviewers: thakis, chandlerc, theraven

Reviewed By: thakis

Subscribers: riccibruno, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83032
---
 llvm/utils/check_ninja_deps.py | 191 +++++++++++++++++++++++++++++++++
 1 file changed, 191 insertions(+)
 create mode 100755 llvm/utils/check_ninja_deps.py

diff --git a/llvm/utils/check_ninja_deps.py b/llvm/utils/check_ninja_deps.py
new file mode 100755
index 0000000000000..d19c470d21204
--- /dev/null
+++ b/llvm/utils/check_ninja_deps.py
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+#
+# ======- check-ninja-deps - build debugging script ----*- python -*--========#
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+# ==------------------------------------------------------------------------==#
+
+"""Script to find missing formal dependencies in a build.ninja file.
+
+Suppose you have a header file that's autogenerated by (for example) Tablegen.
+If a C++ compilation step needs to include that header, then it must be
+executed after the Tablegen build step that generates the header. So the
+dependency graph in build.ninja should have the Tablegen build step as an
+ancestor of the C++ one. If it does not, then there's a latent build-failure
+bug, because depending on the order that ninja chooses to schedule its build
+steps, the C++ build step could run first, and fail because the header it needs
+does not exist yet.
+
+But because that kind of bug can easily be latent or intermittent, you might
+not notice, if your local test build happens to succeed. What you'd like is a
+way to detect problems of this kind reliably, even if they _didn't_ cause a
+failure on your first test.
+
+This script tries to do that. It's specific to the 'ninja' build tool, because
+ninja has useful auxiliary output modes that produce the necessary data:
+
+ - 'ninja -t graph' emits the full DAG of formal dependencies derived from
+   build.ninja (in Graphviz format)
+
+ - 'ninja -t deps' dumps the database of dependencies discovered at build time
+   by finding out which headers each source file actually included
+
+By cross-checking these two sources of data against each other, you can find
+true dependencies shown by 'deps' that are not reflected as formal dependencies
+in 'graph', i.e. a generated header that is required by a given source file but
+not forced to be built first.
+
+To run it:
+
+ - set up a build directory using ninja as the build tool (cmake -G Ninja)
+
+ - in that build directory, run ninja to perform an actual build (populating
+   the dependency database)
+
+ - then, in the same build directory, run this script. No arguments are needed
+   (but -C and -f are accepted, and propagated to ninja for convenience).
+
+Requirements outside core Python: the 'pygraphviz' module, available via pip or
+as the 'python3-pygraphviz' package in Debian and Ubuntu.
+
+"""
+
+import sys
+import argparse
+import subprocess
+import pygraphviz
+
+def toposort(g):
+    """Topologically sort a graph.
+
+    The input g is a pygraphviz graph object representing a DAG. The function
+    yields the vertices of g in an arbitrary order consistent with the edges,
+    so that for any edge v->w, v is output before w."""
+
+    # Count the number of immediate predecessors *not yet output* for each
+    # vertex. Initially this is simply their in-degrees.
+    ideg = {v: g.in_degree(v) for v in g.nodes_iter()}
+
+    # Set of vertices which can be output next, which is true if they have no
+    # immediate predecessor that has not already been output.
+    ready = {v for v, d in ideg.items() if d == 0}
+
+    # Keep outputting vertices while we have any to output.
+    while len(ready) > 0:
+        v = next(iter(ready))
+        yield v
+        ready.remove(v)
+
+        # Having output v, find each immediate successor w, and decrement its
+        # 'ideg' value by 1, to indicate that one more of its predecessors has
+        # now been output.
+        for w in g.out_neighbors(v):
+            ideg[w] -= 1
+            if ideg[w] == 0:
+                # If that counter reaches zero, w is ready to output.
+                ready.add(w)
+
+def ancestors(g, translate = lambda x: x):
+    """Form the set of ancestors for each vertex of a graph.
+
+    The input g is a pygraphviz graph object representing a DAG. The function
+    yields a sequence of pairs (vertex, set of proper ancestors).
+
+    The vertex names are all mapped through 'translate' before output. This
+    allows us to produce output referring to the label rather than the
+    identifier of every vertex.
+    """
+
+    # Store the set of (translated) ancestors for each vertex so far. a[v]
+    # includes (the translation of) v itself.
+    a = {}
+
+    for v in toposort(g):
+        vm = translate(v)
+
+        # Make up a[v], based on a[predecessors of v].
+        a[v] = {vm} # include v itself
+        for w in g.in_neighbors(v):
+            a[v].update(a[w])
+
+        # Remove v itself from the set before yielding it, so that the caller
+        # doesn't get the trivial dependency of v on itself.
+        yield vm, a[v].difference({vm})
+
+def main():
+    parser = argparse.ArgumentParser(
+        description='Find missing formal dependencies on generated include '
+        'files in a build.ninja file.')
+    parser.add_argument("-C", "--build-dir",
+                        help="Build directory (default cwd)")
+    parser.add_argument("-f", "--build-file",
+                        help="Build directory (default build.ninja)")
+    args = parser.parse_args()
+
+    errs = 0
+
+    ninja_prefix = ["ninja"]
+    if args.build_dir is not None:
+        ninja_prefix.extend(["-C", args.build_dir])
+    if args.build_file is not None:
+        ninja_prefix.extend(["-f", args.build_file])
+
+    # Get the formal dependency graph and decode it using pygraphviz.
+    g = pygraphviz.AGraph(subprocess.check_output(
+        ninja_prefix + ["-t", "graph"]).decode("UTF-8"))
+
+    # Helper function to ask for the label of a vertex, which is where ninja's
+    # Graphviz output keeps the actual file name of the target.
+    label = lambda v: g.get_node(v).attr["label"]
+
+    # Start by making a list of build targets, i.e. generated files. These are
+    # just any graph vertex with at least one predecessor.
+    targets = set(label(v) for v in g.nodes_iter() if g.in_degree(v) > 0)
+
+    # Find the set of ancestors of each graph vertex. We pass in 'label' as a
+    # translation function, so that this gives us the set of ancestor _files_
+    # for a given _file_ rather than arbitrary numeric vertex ids.
+    deps = dict(ancestors(g, label))
+
+    # Fetch the cached dependency data and check it against our formal ancestry
+    # data.
+    currtarget = None
+    for line in (subprocess.check_output(ninja_prefix + ["-t", "deps"])
+                 .decode("UTF-8").splitlines()):
+        # ninja -t deps output consists of stanzas of the following form,
+        # separated by a blank line:
+        #
+        # target: [other information we don't need]
+        #     some_file.cpp
+        #     some_header.h
+        #     other_header.h
+        #
+        # We parse this ad-hoc by detecting the four leading spaces in a
+        # source-file line, and the colon in a target line. 'currtarget' stores
+        # the last target name we saw.
+        if line.startswith("    "):
+            dep = line[4:]
+            assert currtarget is not None, "Source file appeared before target"
+
+            # We're only interested in this dependency if it's a *generated*
+            # file, i.e. it is in our set of targets. Also, we must check that
+            # currtarget is actually a target we know about: the dependency
+            # cache is not cleared when build.ninja changes, so it can contain
+            # stale data from targets that existed only in past builds in the
+            # same directory.
+            if (dep in targets and currtarget in deps and
+                dep not in deps[currtarget]):
+                print("error:", currtarget, "requires", dep,
+                      "but has no dependency on it", file=sys.stderr)
+                errs += 1
+        elif ":" in line:
+            currtarget = line.split(":", 1)[0]
+
+    if errs:
+        sys.exit("{:d} errors found".format(errs))
+
+if __name__ == '__main__':
+    main()

From 6c16fbd0ac7b18110891d0f180a2408d55fe47a8 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Mon, 13 Jul 2020 20:37:54 +0200
Subject: [PATCH 253/771] [clangd] Config: CompileFlags.Remove

Summary: While here, add documentation to CompileFlags and CompileFlags.Add.

Reviewers: hokein

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83705
---
 clang-tools-extra/clangd/CompileCommands.h    |  6 ++++
 clang-tools-extra/clangd/ConfigCompile.cpp    | 14 ++++++++
 clang-tools-extra/clangd/ConfigFragment.h     | 32 ++++++++++++++++++-
 clang-tools-extra/clangd/ConfigYAML.cpp       |  4 +++
 .../clangd/unittests/ConfigCompileTests.cpp   |  8 +++--
 5 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/clang-tools-extra/clangd/CompileCommands.h b/clang-tools-extra/clangd/CompileCommands.h
index c9f2d668c3654..84c4c2a26a872 100644
--- a/clang-tools-extra/clangd/CompileCommands.h
+++ b/clang-tools-extra/clangd/CompileCommands.h
@@ -59,6 +59,12 @@ struct CommandMangler {
 // The table-building strategy may not make sense outside clangd.
 class ArgStripper {
 public:
+  ArgStripper() = default;
+  ArgStripper(ArgStripper &&) = default;
+  ArgStripper(const ArgStripper &) = delete;
+  ArgStripper &operator=(ArgStripper &&) = default;
+  ArgStripper &operator=(const ArgStripper &) = delete;
+
   // Adds the arg to the set which should be removed.
   //
   // Recognized clang flags are stripped semantically. When "-I" is stripped:
diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp
index 04c0df88bbf76..568e029b5c0a1 100644
--- a/clang-tools-extra/clangd/ConfigCompile.cpp
+++ b/clang-tools-extra/clangd/ConfigCompile.cpp
@@ -23,6 +23,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "CompileCommands.h"
 #include "Config.h"
 #include "ConfigFragment.h"
 #include "support/Logger.h"
@@ -122,6 +123,19 @@ struct FragmentCompiler {
   }
 
   void compile(Fragment::CompileFlagsBlock &&F) {
+    if (!F.Remove.empty()) {
+      auto Remove = std::make_shared<ArgStripper>();
+      for (auto &A : F.Remove)
+        Remove->strip(*A);
+      Out.Apply.push_back([Remove(std::shared_ptr<const ArgStripper>(
+                              std::move(Remove)))](Config &C) {
+        C.CompileFlags.Edits.push_back(
+            [Remove](std::vector<std::string> &Args) {
+              Remove->process(Args);
+            });
+      });
+    }
+
     if (!F.Add.empty()) {
       std::vector<std::string> Add;
       for (auto &A : F.Add)
diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h
index 42f9ec2edc724..5cc8749c5efa1 100644
--- a/clang-tools-extra/clangd/ConfigFragment.h
+++ b/clang-tools-extra/clangd/ConfigFragment.h
@@ -117,9 +117,39 @@ struct Fragment {
   };
   IfBlock If;
 
+  /// Conditions in the CompileFlags block affect how a file is parsed.
+  ///
+  /// clangd emulates how clang would interpret a file.
+  /// By default, it behaves roughly like `clang $FILENAME`, but real projects
+  /// usually require setting the include path (with the `-I` flag), defining
+  /// preprocessor symbols, configuring warnings etc.
+  /// Often, a compilation database specifies these compile commands. clangd
+  /// searches for compile_commands.json in parents of the source file.
+  ///
+  /// This section modifies how the compile command is constructed.
   struct CompileFlagsBlock {
+    /// List of flags to append to the compile command.
     std::vector<Located<std::string>> Add;
-  } CompileFlags;
+    /// List of flags to remove from the compile command.
+    ///
+    /// - If the value is a recognized clang flag (like "-I") then it will be
+    ///   removed along with any arguments. Synonyms like --include-dir= will
+    ///   also be removed.
+    /// - Otherwise, if the value ends in * (like "-DFOO=*") then any argument
+    ///   with the prefix will be removed.
+    /// - Otherwise any argument exactly matching the value is removed.
+    ///
+    /// In all cases, -Xclang is also removed where needed.
+    ///
+    /// Example:
+    ///   Command: clang++ --include-directory=/usr/include -DFOO=42 foo.cc
+    ///   Remove: [-I, -DFOO=*]
+    ///   Result: clang++ foo.cc
+    ///
+    /// Flags added by the same CompileFlags entry will not be removed.
+    std::vector<Located<std::string>> Remove;
+  };
+  CompileFlagsBlock CompileFlags;
 };
 
 } // namespace config
diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp
index ef6003b024392..7e86e37212483 100644
--- a/clang-tools-extra/clangd/ConfigYAML.cpp
+++ b/clang-tools-extra/clangd/ConfigYAML.cpp
@@ -63,6 +63,10 @@ class Parser {
       if (auto Values = scalarValues(N))
         F.Add = std::move(*Values);
     });
+    Dict.handle("Remove", [&](Node &N) {
+      if (auto Values = scalarValues(N))
+        F.Remove = std::move(*Values);
+    });
     Dict.parse(N);
   }
 
diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
index 825d6878727d9..033734789bedd 100644
--- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
@@ -91,10 +91,12 @@ TEST_F(ConfigCompileTests, Condition) {
 
 TEST_F(ConfigCompileTests, CompileCommands) {
   Frag.CompileFlags.Add.emplace_back("-foo");
-  std::vector<std::string> Argv = {"clang", "a.cc"};
+  Frag.CompileFlags.Remove.emplace_back("--include-directory=");
+  std::vector<std::string> Argv = {"clang", "-I", "bar/", "a.cc"};
   EXPECT_TRUE(compileAndApply());
-  EXPECT_THAT(Conf.CompileFlags.Edits, SizeIs(1));
-  Conf.CompileFlags.Edits.front()(Argv);
+  EXPECT_THAT(Conf.CompileFlags.Edits, SizeIs(2));
+  for (auto &Edit : Conf.CompileFlags.Edits)
+    Edit(Argv);
   EXPECT_THAT(Argv, ElementsAre("clang", "a.cc", "-foo"));
 }
 

From 9300de4d1cd151828ab2548c242d34d6ad9124d4 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 14 Jul 2020 09:28:57 -0400
Subject: [PATCH 254/771] [InstSimplify] add test with nobuiltin attribute
 (PR46627); NFC

---
 llvm/test/Transforms/InstSimplify/call.ll | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll
index 9d0147b5ba52c..019def569f15b 100644
--- a/llvm/test/Transforms/InstSimplify/call.ll
+++ b/llvm/test/Transforms/InstSimplify/call.ll
@@ -1038,3 +1038,19 @@ define i32 @call_undef_musttail() {
   %x = musttail call i32 undef()
   ret i32 %x
 }
+
+; FIXME: This is not the builtin fmax, so we don't know anything about its behavior.
+
+define float @nobuiltin_fmax() {
+; CHECK-LABEL: @nobuiltin_fmax(
+; CHECK-NEXT:    [[M:%.*]] = call float @fmaxf(float 0.000000e+00, float 1.000000e+00) #3
+; CHECK-NEXT:    ret float [[M]]
+;
+  %m = call float @fmaxf(float 0.0, float 1.0) #0
+  %r = call float @llvm.fabs.f32(float %m)
+  ret float %r
+}
+
+declare float @fmaxf(float, float)
+
+attributes #0 = { nobuiltin readnone }

From e6c016420c796ac038b15e1ba0557947bf11d507 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Tue, 14 Jul 2020 10:02:50 -0400
Subject: [PATCH 255/771] [ValueTracking] fix library to intrinsic mapping to
 respect 'nobuiltin' attribute

This is another problem raised in:
http://bugs.llvm.org/PR46627
---
 llvm/lib/Analysis/ValueTracking.cpp       | 17 +++++------------
 llvm/test/Transforms/InstSimplify/call.ll |  5 +++--
 2 files changed, 8 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index ffa2037fa10b2..43caaa62c2ec5 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -3133,21 +3133,14 @@ Intrinsic::ID llvm::getIntrinsicForCallSite(const CallBase &CB,
   if (F->isIntrinsic())
     return F->getIntrinsicID();
 
-  if (!TLI)
-    return Intrinsic::not_intrinsic;
-
+  // We are going to infer semantics of a library function based on mapping it
+  // to an LLVM intrinsic. Check that the library function is available from
+  // this callbase and in this environment.
   LibFunc Func;
-  // We're going to make assumptions on the semantics of the functions, check
-  // that the target knows that it's available in this environment and it does
-  // not have local linkage.
-  if (!F || F->hasLocalLinkage() || !TLI->getLibFunc(*F, Func))
-    return Intrinsic::not_intrinsic;
-
-  if (!CB.onlyReadsMemory())
+  if (F->hasLocalLinkage() || !TLI || !TLI->getLibFunc(CB, Func) ||
+      !CB.onlyReadsMemory())
     return Intrinsic::not_intrinsic;
 
-  // Otherwise check if we have a call to a function that can be turned into a
-  // vector intrinsic.
   switch (Func) {
   default:
     break;
diff --git a/llvm/test/Transforms/InstSimplify/call.ll b/llvm/test/Transforms/InstSimplify/call.ll
index 019def569f15b..6579bda52795b 100644
--- a/llvm/test/Transforms/InstSimplify/call.ll
+++ b/llvm/test/Transforms/InstSimplify/call.ll
@@ -1039,12 +1039,13 @@ define i32 @call_undef_musttail() {
   ret i32 %x
 }
 
-; FIXME: This is not the builtin fmax, so we don't know anything about its behavior.
+; This is not the builtin fmax, so we don't know anything about its behavior.
 
 define float @nobuiltin_fmax() {
 ; CHECK-LABEL: @nobuiltin_fmax(
 ; CHECK-NEXT:    [[M:%.*]] = call float @fmaxf(float 0.000000e+00, float 1.000000e+00) #3
-; CHECK-NEXT:    ret float [[M]]
+; CHECK-NEXT:    [[R:%.*]] = call float @llvm.fabs.f32(float [[M]])
+; CHECK-NEXT:    ret float [[R]]
 ;
   %m = call float @fmaxf(float 0.0, float 1.0) #0
   %r = call float @llvm.fabs.f32(float %m)

From ff616f74c3b45e0890b53d92fcfc6a9d18f4bfdd Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 14 Jul 2020 10:45:06 +0200
Subject: [PATCH 256/771] [clangd] Cache config files for 5 seconds, without
 revalidating with stat.

Summary:
This is motivated by:
 - code completion: nice to do no i/o on the request path
 - background index: deciding whether to enqueue each file would stat the config
   file thousands of times in quick succession.

Currently it's applied uniformly to all requests though.

This gives up on performing stat() outside the lock, all this achieves is
letting multiple threads stat concurrently (and thus finish without contention
for nonexistent files).
The ability to finish without IO (just mutex lock + integer check) should
outweigh this, and is less sensitive to platform IO characteristics.

Reviewers: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83755
---
 clang-tools-extra/clangd/ClangdServer.cpp     |  4 ++
 clang-tools-extra/clangd/ConfigProvider.cpp   | 50 +++++++++++++------
 clang-tools-extra/clangd/ConfigProvider.h     |  5 ++
 .../clangd/unittests/ConfigProviderTests.cpp  | 40 ++++++++++++++-
 4 files changed, 83 insertions(+), 16 deletions(-)

diff --git a/clang-tools-extra/clangd/ClangdServer.cpp b/clang-tools-extra/clangd/ClangdServer.cpp
index c33cdffcb0ca7..ec48556595010 100644
--- a/clang-tools-extra/clangd/ClangdServer.cpp
+++ b/clang-tools-extra/clangd/ClangdServer.cpp
@@ -48,6 +48,7 @@
 #include "llvm/Support/ScopedPrinter.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
+#include <chrono>
 #include <future>
 #include <memory>
 #include <mutex>
@@ -762,6 +763,9 @@ Context ClangdServer::createProcessingContext(PathRef File) const {
     return Context::current().clone();
 
   config::Params Params;
+  // Don't reread config files excessively often.
+  // FIXME: when we see a config file change event, use the event timestamp.
+  Params.FreshTime = std::chrono::steady_clock::now() - std::chrono::seconds(5);
   llvm::SmallString<256> PosixPath;
   if (!File.empty()) {
     assert(llvm::sys::path::is_absolute(File));
diff --git a/clang-tools-extra/clangd/ConfigProvider.cpp b/clang-tools-extra/clangd/ConfigProvider.cpp
index 4b466d53e2930..1f0f727998e3e 100644
--- a/clang-tools-extra/clangd/ConfigProvider.cpp
+++ b/clang-tools-extra/clangd/ConfigProvider.cpp
@@ -11,8 +11,10 @@
 #include "ConfigFragment.h"
 #include "support/ThreadsafeFS.h"
 #include "support/Trace.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/Path.h"
+#include <chrono>
 #include <mutex>
 
 namespace clang {
@@ -22,21 +24,18 @@ namespace config {
 // Threadsafe cache around reading a YAML config file from disk.
 class FileConfigCache {
   std::mutex Mu;
+  std::chrono::steady_clock::time_point ValidTime = {};
   llvm::SmallVector<CompiledFragment, 1> CachedValue;
   llvm::sys::TimePoint<> MTime = {};
   unsigned Size = -1;
 
-  void updateCacheLocked(const llvm::vfs::Status &Stat,
-                         llvm::vfs::FileSystem &FS, DiagnosticCallback DC) {
-    if (Size == Stat.getSize() && MTime == Stat.getLastModificationTime())
-      return; // Already valid.
-
-    Size = Stat.getSize();
-    MTime = Stat.getLastModificationTime();
+  // Called once we are sure we want to read the file.
+  // REQUIRES: Cache keys are set. Mutex must be held.
+  void fillCacheFromDisk(llvm::vfs::FileSystem &FS, DiagnosticCallback DC) {
     CachedValue.clear();
 
     auto Buf = FS.getBufferForFile(Path);
-    // If stat() succeeds but we failed to read, don't cache failure.
+    // If we failed to read (but stat succeeded), don't cache failure.
     if (!Buf) {
       Size = -1;
       MTime = {};
@@ -68,19 +67,40 @@ class FileConfigCache {
   // - allow caches to be reused based on short elapsed walltime
   // - allow latency-sensitive operations to skip revalidating the cache
   void read(const ThreadsafeFS &TFS, DiagnosticCallback DC,
+            llvm::Optional<std::chrono::steady_clock::time_point> FreshTime,
             std::vector<CompiledFragment> &Out) {
+    std::lock_guard<std::mutex> Lock(Mu);
+    // We're going to update the cache and return whatever's in it.
+    auto Return = llvm::make_scope_exit(
+        [&] { llvm::copy(CachedValue, std::back_inserter(Out)); });
+
+    // Return any sufficiently recent result without doing any further work.
+    if (FreshTime && ValidTime >= FreshTime)
+      return;
+
+    // Ensure we bump the ValidTime at the end to allow for reuse.
+    auto MarkTime = llvm::make_scope_exit(
+        [&] { ValidTime = std::chrono::steady_clock::now(); });
+
+    // Stat is cheaper than opening the file, it's usually unchanged.
     assert(llvm::sys::path::is_absolute(Path));
     auto FS = TFS.view(/*CWD=*/llvm::None);
     auto Stat = FS->status(Path);
+    // If there's no file, the result is empty. Ensure we have an invalid key.
     if (!Stat || !Stat->isRegularFile()) {
-      // No point taking the lock to clear the cache. We know what to return.
-      // If the file comes back we'll invalidate the cache at that point.
+      MTime = {};
+      Size = -1;
+      CachedValue.clear();
       return;
     }
+    // If the modified-time and size match, assume the content does too.
+    if (Size == Stat->getSize() && MTime == Stat->getLastModificationTime())
+      return;
 
-    std::lock_guard<std::mutex> Lock(Mu);
-    updateCacheLocked(*Stat, *FS, DC);
-    llvm::copy(CachedValue, std::back_inserter(Out));
+    // OK, the file has actually changed. Update cache key, compute new value.
+    Size = Stat->getSize();
+    MTime = Stat->getLastModificationTime();
+    fillCacheFromDisk(*FS, DC);
   }
 };
 
@@ -93,7 +113,7 @@ std::unique_ptr<Provider> Provider::fromYAMLFile(llvm::StringRef AbsPath,
     std::vector<CompiledFragment>
     getFragments(const Params &P, DiagnosticCallback DC) const override {
       std::vector<CompiledFragment> Result;
-      Cache.read(FS, DC, Result);
+      Cache.read(FS, DC, P.FreshTime, Result);
       return Result;
     };
 
@@ -158,7 +178,7 @@ Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath,
       // This will take a (per-file) lock for each file that actually exists.
       std::vector<CompiledFragment> Result;
       for (FileConfigCache *Cache : Caches)
-        Cache->read(FS, DC, Result);
+        Cache->read(FS, DC, P.FreshTime, Result);
       return Result;
     };
 
diff --git a/clang-tools-extra/clangd/ConfigProvider.h b/clang-tools-extra/clangd/ConfigProvider.h
index a773e56b3bd72..f6c26bde9e0f4 100644
--- a/clang-tools-extra/clangd/ConfigProvider.h
+++ b/clang-tools-extra/clangd/ConfigProvider.h
@@ -20,6 +20,7 @@
 #include "llvm/ADT/FunctionExtras.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
+#include <chrono>
 #include <string>
 #include <vector>
 
@@ -34,6 +35,10 @@ struct Params {
   /// Absolute path to a source file we're applying the config to. Unix slashes.
   /// Empty if not configuring a particular file.
   llvm::StringRef Path;
+  /// Hint that stale data is OK to improve performance (e.g. avoid IO).
+  /// FreshTime sets a bound for how old the data can be.
+  /// If not set, providers should validate caches against the data source.
+  llvm::Optional<std::chrono::steady_clock::time_point> FreshTime;
 };
 
 /// Used to report problems in parsing or interpreting a config.
diff --git a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
index 122b55cf64e01..ff3198e8d3353 100644
--- a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
@@ -10,10 +10,11 @@
 #include "ConfigProvider.h"
 #include "ConfigTesting.h"
 #include "TestFS.h"
+#include "llvm/Support/SourceMgr.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
-#include "llvm/Support/SourceMgr.h"
 #include <atomic>
+#include <chrono>
 
 namespace clang {
 namespace clangd {
@@ -150,6 +151,43 @@ TEST(ProviderTest, FromAncestorRelativeYAMLFiles) {
   EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("bar", "baz"));
 }
 
+TEST(ProviderTest, Staleness) {
+  MockFS FS;
+
+  auto StartTime = std::chrono::steady_clock::now();
+  Params StaleOK;
+  StaleOK.FreshTime = StartTime;
+  Params MustBeFresh;
+  MustBeFresh.FreshTime = StartTime + std::chrono::hours(1);
+  CapturedDiags Diags;
+  auto P = Provider::fromYAMLFile(testPath("foo.yaml"), FS);
+
+  // Initial query always reads, regardless of policy.
+  FS.Files["foo.yaml"] = AddFooWithErr;
+  auto Cfg = P->getConfig(StaleOK, Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics,
+              ElementsAre(DiagMessage("Unknown CompileFlags key Unknown")));
+  EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("foo"));
+  Diags.Diagnostics.clear();
+
+  // Stale value reused by policy.
+  FS.Files["foo.yaml"] = AddBarBaz;
+  Cfg = P->getConfig(StaleOK, Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty()) << "Cached, not re-parsed";
+  EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("foo"));
+
+  // Cache revalidated by policy.
+  Cfg = P->getConfig(MustBeFresh, Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty()) << "New config, no errors";
+  EXPECT_THAT(getAddedArgs(Cfg), ElementsAre("bar", "baz"));
+
+  // Cache revalidated by (default) policy.
+  FS.Files.erase("foo.yaml");
+  Cfg = P->getConfig(Params(), Diags.callback());
+  EXPECT_THAT(Diags.Diagnostics, IsEmpty());
+  EXPECT_THAT(getAddedArgs(Cfg), IsEmpty());
+}
+
 } // namespace
 } // namespace config
 } // namespace clangd

From 50a5fa8b9ba4b09433bf46f4228d4e4cae9ac486 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 14 Jul 2020 17:03:45 +0200
Subject: [PATCH 257/771] [clangd] Add missing link dep after 8eb8c92eb46908e

---
 clang-tools-extra/clangd/CMakeLists.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index 9eb06941e4dd3..b3002b1d56981 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -25,6 +25,7 @@ set(LLVM_LINK_COMPONENTS
   Support
   AllTargetsInfos
   FrontendOpenMP
+  Option
   )
 
 add_clang_library(clangDaemon

From 706cccb889c8d14791c029a7bb69d8eddb6b1728 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 14 Jul 2020 08:44:40 -0700
Subject: [PATCH 258/771] [lldb] Make `process connect` blocking in synchronous
 mode.

In synchronous mode, the process connect command and its aliases should
wait for the stop event before claiming the command is complete.
Currently, the stop event is always handled asynchronously by the
debugger.

The implementation takes the same approach as Process::ResumeSynchronous
which hijacks the event and handles it on the current thread. Similarly,
after this patch, the stop event is part of the command return object,
which is the property used by the test case.

Differential revision: https://reviews.llvm.org/D83728
---
 lldb/include/lldb/Target/Platform.h           | 16 ++++--
 lldb/source/Commands/CommandObjectProcess.cpp | 12 +++--
 lldb/source/Target/Platform.cpp               | 49 ++++++++++++++---
 .../gdb_remote_client/TestProcessConnect.py   | 52 +++++++++++++++++++
 4 files changed, 115 insertions(+), 14 deletions(-)
 create mode 100644 lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py

diff --git a/lldb/include/lldb/Target/Platform.h b/lldb/include/lldb/Target/Platform.h
index 52696f131f827..6234b8244b3f3 100644
--- a/lldb/include/lldb/Target/Platform.h
+++ b/lldb/include/lldb/Target/Platform.h
@@ -372,9 +372,13 @@ class Platform : public PluginInterface {
 
   virtual lldb::ProcessSP ConnectProcess(llvm::StringRef connect_url,
                                          llvm::StringRef plugin_name,
-                                         lldb_private::Debugger &debugger,
-                                         lldb_private::Target *target,
-                                         lldb_private::Status &error);
+                                         Debugger &debugger, Target *target,
+                                         Status &error);
+
+  virtual lldb::ProcessSP
+  ConnectProcessSynchronous(llvm::StringRef connect_url,
+                            llvm::StringRef plugin_name, Debugger &debugger,
+                            Stream &stream, Target *target, Status &error);
 
   /// Attach to an existing process using a process ID.
   ///
@@ -848,6 +852,12 @@ class Platform : public PluginInterface {
   }
 
 protected:
+  /// Private implementation of connecting to a process. If the stream is set
+  /// we connect synchronously.
+  lldb::ProcessSP DoConnectProcess(llvm::StringRef connect_url,
+                                   llvm::StringRef plugin_name,
+                                   Debugger &debugger, Stream *stream,
+                                   Target *target, Status &error);
   bool m_is_host;
   // Set to true when we are able to actually set the OS version while being
   // connected. For remote platforms, we might set the version ahead of time
diff --git a/lldb/source/Commands/CommandObjectProcess.cpp b/lldb/source/Commands/CommandObjectProcess.cpp
index 3659f0db832ce..f86779d85b5fa 100644
--- a/lldb/source/Commands/CommandObjectProcess.cpp
+++ b/lldb/source/Commands/CommandObjectProcess.cpp
@@ -820,9 +820,15 @@ class CommandObjectProcessConnect : public CommandObjectParsed {
     Status error;
     Debugger &debugger = GetDebugger();
     PlatformSP platform_sp = m_interpreter.GetPlatform(true);
-    ProcessSP process_sp = platform_sp->ConnectProcess(
-        command.GetArgumentAtIndex(0), plugin_name, debugger,
-        debugger.GetSelectedTarget().get(), error);
+    ProcessSP process_sp =
+        debugger.GetAsyncExecution()
+            ? platform_sp->ConnectProcess(
+                  command.GetArgumentAtIndex(0), plugin_name, debugger,
+                  debugger.GetSelectedTarget().get(), error)
+            : platform_sp->ConnectProcessSynchronous(
+                  command.GetArgumentAtIndex(0), plugin_name, debugger,
+                  result.GetOutputStream(), debugger.GetSelectedTarget().get(),
+                  error);
     if (error.Fail() || process_sp == nullptr) {
       result.AppendError(error.AsCString("Error connecting to the process"));
       result.SetStatus(eReturnStatusFailed);
diff --git a/lldb/source/Target/Platform.cpp b/lldb/source/Target/Platform.cpp
index 95c35ea826a07..16787141bee0a 100644
--- a/lldb/source/Target/Platform.cpp
+++ b/lldb/source/Target/Platform.cpp
@@ -12,9 +12,6 @@
 #include <memory>
 #include <vector>
 
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/Path.h"
-
 #include "lldb/Breakpoint/BreakpointIDList.h"
 #include "lldb/Breakpoint/BreakpointLocation.h"
 #include "lldb/Core/Debugger.h"
@@ -40,8 +37,8 @@
 #include "lldb/Utility/Log.h"
 #include "lldb/Utility/Status.h"
 #include "lldb/Utility/StructuredData.h"
-
 #include "llvm/Support/FileSystem.h"
+#include "llvm/Support/Path.h"
 
 // Define these constants from POSIX mman.h rather than include the file so
 // that they will be correct even when compiled on Linux.
@@ -1774,9 +1771,23 @@ Status Platform::UnloadImage(lldb_private::Process *process,
 
 lldb::ProcessSP Platform::ConnectProcess(llvm::StringRef connect_url,
                                          llvm::StringRef plugin_name,
-                                         lldb_private::Debugger &debugger,
-                                         lldb_private::Target *target,
-                                         lldb_private::Status &error) {
+                                         Debugger &debugger, Target *target,
+                                         Status &error) {
+  return DoConnectProcess(connect_url, plugin_name, debugger, nullptr, target,
+                          error);
+}
+
+lldb::ProcessSP Platform::ConnectProcessSynchronous(
+    llvm::StringRef connect_url, llvm::StringRef plugin_name,
+    Debugger &debugger, Stream &stream, Target *target, Status &error) {
+  return DoConnectProcess(connect_url, plugin_name, debugger, &stream, target,
+                          error);
+}
+
+lldb::ProcessSP Platform::DoConnectProcess(llvm::StringRef connect_url,
+                                           llvm::StringRef plugin_name,
+                                           Debugger &debugger, Stream *stream,
+                                           Target *target, Status &error) {
   error.Clear();
 
   if (!target) {
@@ -1803,12 +1814,34 @@ lldb::ProcessSP Platform::ConnectProcess(llvm::StringRef connect_url,
 
   lldb::ProcessSP process_sp =
       target->CreateProcess(debugger.GetListener(), plugin_name, nullptr);
+
   if (!process_sp)
     return nullptr;
 
+  // If this private method is called with a stream we are synchronous.
+  const bool synchronous = stream != nullptr;
+
+  ListenerSP listener_sp(
+      Listener::MakeListener("lldb.Process.ConnectProcess.hijack"));
+  if (synchronous)
+    process_sp->HijackProcessEvents(listener_sp);
+
   error = process_sp->ConnectRemote(connect_url);
-  if (error.Fail())
+  if (error.Fail()) {
+    if (synchronous)
+      process_sp->RestoreProcessEvents();
     return nullptr;
+  }
+
+  if (synchronous) {
+    EventSP event_sp;
+    process_sp->WaitForProcessToStop(llvm::None, &event_sp, true, listener_sp,
+                                     nullptr);
+    process_sp->RestoreProcessEvents();
+    bool pop_process_io_handler = false;
+    Process::HandleProcessStateChangedEvent(event_sp, stream,
+                                            pop_process_io_handler);
+  }
 
   return process_sp;
 }
diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py b/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
new file mode 100644
index 0000000000000..34ae8d08004d4
--- /dev/null
+++ b/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
@@ -0,0 +1,52 @@
+import lldb
+import binascii
+import os
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test.decorators import *
+from gdbclientutils import *
+
+
+class TestProcessConnect(GDBRemoteTestBase):
+    def test_gdb_remote_sync(self):
+        """Test the gdb-remote command in synchronous mode"""
+        try:
+            self.dbg.SetAsync(False)
+            self.expect("gdb-remote %d" % self.server.port,
+                        substrs=['Process', 'stopped'])
+        finally:
+            self.dbg.GetSelectedPlatform().DisconnectRemote()
+
+    def test_gdb_remote_async(self):
+        """Test the gdb-remote command in asynchronous mode"""
+        try:
+            self.dbg.SetAsync(True)
+            self.expect("gdb-remote %d" % self.server.port,
+                        matching=False,
+                        substrs=['Process', 'stopped'])
+            lldbutil.expect_state_changes(self, self.dbg.GetListener(),
+                                          self.process(), [lldb.eStateStopped])
+        finally:
+            self.dbg.GetSelectedPlatform().DisconnectRemote()
+
+    def test_process_connect_sync(self):
+        """Test the gdb-remote command in synchronous mode"""
+        try:
+            self.dbg.SetAsync(False)
+            self.expect("process connect connect://localhost:%d" %
+                        self.server.port,
+                        substrs=['Process', 'stopped'])
+        finally:
+            self.dbg.GetSelectedPlatform().DisconnectRemote()
+
+    def test_process_connect_async(self):
+        """Test the gdb-remote command in asynchronous mode"""
+        try:
+            self.dbg.SetAsync(True)
+            self.expect("process connect connect://localhost:%d" %
+                        self.server.port,
+                        matching=False,
+                        substrs=['Process', 'stopped'])
+            lldbutil.expect_state_changes(self, self.dbg.GetListener(),
+                                          self.process(), [lldb.eStateStopped])
+        finally:
+            self.dbg.GetSelectedPlatform().DisconnectRemote()

From a4a00ced0cf8cc5663ff0ced801d6139153f3f76 Mon Sep 17 00:00:00 2001
From: Fred Riss <friss@apple.com>
Date: Thu, 9 Jul 2020 14:14:36 -0700
Subject: [PATCH 259/771] [lldb/Module] Allow for the creation of memory-only
 modules

Summary:
This patch extends the ModuleSpec class to include a
DataBufferSP which contains the module data. If this
data is provided, LLDB won't try to hit the filesystem
to create the Module, but use only the data stored in
the ModuleSpec.

Reviewers: labath, espindola

Subscribers: emaste, MaskRay, lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D83512
---
 lldb/include/lldb/Core/Module.h               |   6 +
 lldb/include/lldb/Core/ModuleSpec.h           |  43 ++---
 lldb/include/lldb/Symbol/ObjectFile.h         |   8 +-
 lldb/include/lldb/Utility/DataBuffer.h        |  14 ++
 lldb/source/Core/Module.cpp                   |  45 +++--
 .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp  |   6 +-
 .../ObjectFile/PECOFF/ObjectFilePECOFF.cpp    |  20 ++-
 lldb/source/Symbol/ObjectFile.cpp             |  10 +-
 lldb/unittests/Core/CMakeLists.txt            |   3 +
 lldb/unittests/Core/MangledTest.cpp           |   3 +-
 lldb/unittests/Core/ModuleSpecTest.cpp        | 166 ++++++++++++++++++
 .../ObjectFile/ELF/TestObjectFileELF.cpp      |  15 +-
 .../ObjectFile/PECOFF/TestPECallFrameInfo.cpp |   2 +-
 .../Symbol/TestDWARFCallFrameInfo.cpp         |   3 +-
 lldb/unittests/Symbol/TestLineEntry.cpp       |   2 +-
 .../TestingSupport/TestUtilities.cpp          |  32 +---
 lldb/unittests/TestingSupport/TestUtilities.h |  24 +--
 17 files changed, 292 insertions(+), 110 deletions(-)
 create mode 100644 lldb/unittests/Core/ModuleSpecTest.cpp

diff --git a/lldb/include/lldb/Core/Module.h b/lldb/include/lldb/Core/Module.h
index 3fae2d0cd04ab..8bd70ab16b5ab 100644
--- a/lldb/include/lldb/Core/Module.h
+++ b/lldb/include/lldb/Core/Module.h
@@ -958,6 +958,12 @@ class Module : public std::enable_shared_from_this<Module>,
                              ///by \a m_file.
   uint64_t m_object_offset;
   llvm::sys::TimePoint<> m_object_mod_time;
+
+  /// DataBuffer containing the module image, if it was provided at
+  /// construction time. Otherwise the data will be retrieved by mapping
+  /// one of the FileSpec members above.
+  lldb::DataBufferSP m_data_sp;
+
   lldb::ObjectFileSP m_objfile_sp; ///< A shared pointer to the object file
                                    ///parser for this module as it may or may
                                    ///not be shared with the SymbolFile
diff --git a/lldb/include/lldb/Core/ModuleSpec.h b/lldb/include/lldb/Core/ModuleSpec.h
index 01398d443edc9..9dd398a05291f 100644
--- a/lldb/include/lldb/Core/ModuleSpec.h
+++ b/lldb/include/lldb/Core/ModuleSpec.h
@@ -30,11 +30,19 @@ class ModuleSpec {
         m_object_name(), m_object_offset(0), m_object_size(0),
         m_source_mappings() {}
 
-  ModuleSpec(const FileSpec &file_spec, const UUID &uuid = UUID())
+  /// If the \param data argument is passed, its contents will be used
+  /// as the module contents instead of trying to read them from
+  /// \param file_spec.
+  ModuleSpec(const FileSpec &file_spec, const UUID &uuid = UUID(),
+             lldb::DataBufferSP data = lldb::DataBufferSP())
       : m_file(file_spec), m_platform_file(), m_symbol_file(), m_arch(),
-        m_uuid(uuid), m_object_name(), m_object_offset(0),
-        m_object_size(FileSystem::Instance().GetByteSize(file_spec)),
-        m_source_mappings() {}
+        m_uuid(uuid), m_object_name(), m_object_offset(0), m_source_mappings(),
+        m_data(data) {
+    if (data)
+      m_object_size = data->GetByteSize();
+    else if (m_file)
+      m_object_size = FileSystem::Instance().GetByteSize(file_spec);
+  }
 
   ModuleSpec(const FileSpec &file_spec, const ArchSpec &arch)
       : m_file(file_spec), m_platform_file(), m_symbol_file(), m_arch(arch),
@@ -42,30 +50,6 @@ class ModuleSpec {
         m_object_size(FileSystem::Instance().GetByteSize(file_spec)),
         m_source_mappings() {}
 
-  ModuleSpec(const ModuleSpec &rhs)
-      : m_file(rhs.m_file), m_platform_file(rhs.m_platform_file),
-        m_symbol_file(rhs.m_symbol_file), m_arch(rhs.m_arch),
-        m_uuid(rhs.m_uuid), m_object_name(rhs.m_object_name),
-        m_object_offset(rhs.m_object_offset), m_object_size(rhs.m_object_size),
-        m_object_mod_time(rhs.m_object_mod_time),
-        m_source_mappings(rhs.m_source_mappings) {}
-
-  ModuleSpec &operator=(const ModuleSpec &rhs) {
-    if (this != &rhs) {
-      m_file = rhs.m_file;
-      m_platform_file = rhs.m_platform_file;
-      m_symbol_file = rhs.m_symbol_file;
-      m_arch = rhs.m_arch;
-      m_uuid = rhs.m_uuid;
-      m_object_name = rhs.m_object_name;
-      m_object_offset = rhs.m_object_offset;
-      m_object_size = rhs.m_object_size;
-      m_object_mod_time = rhs.m_object_mod_time;
-      m_source_mappings = rhs.m_source_mappings;
-    }
-    return *this;
-  }
-
   FileSpec *GetFileSpecPtr() { return (m_file ? &m_file : nullptr); }
 
   const FileSpec *GetFileSpecPtr() const {
@@ -146,6 +130,8 @@ class ModuleSpec {
 
   PathMappingList &GetSourceMappingList() const { return m_source_mappings; }
 
+  lldb::DataBufferSP GetData() const { return m_data; }
+
   void Clear() {
     m_file.Clear();
     m_platform_file.Clear();
@@ -289,6 +275,7 @@ class ModuleSpec {
   uint64_t m_object_size;
   llvm::sys::TimePoint<> m_object_mod_time;
   mutable PathMappingList m_source_mappings;
+  lldb::DataBufferSP m_data = {};
 };
 
 class ModuleSpecList {
diff --git a/lldb/include/lldb/Symbol/ObjectFile.h b/lldb/include/lldb/Symbol/ObjectFile.h
index 966a1861f0ddf..e814015c0bf7b 100644
--- a/lldb/include/lldb/Symbol/ObjectFile.h
+++ b/lldb/include/lldb/Symbol/ObjectFile.h
@@ -172,10 +172,10 @@ class ObjectFile : public std::enable_shared_from_this<ObjectFile>,
                                        lldb::addr_t header_addr,
                                        lldb::DataBufferSP &file_data_sp);
 
-  static size_t GetModuleSpecifications(const FileSpec &file,
-                                        lldb::offset_t file_offset,
-                                        lldb::offset_t file_size,
-                                        ModuleSpecList &specs);
+  static size_t
+  GetModuleSpecifications(const FileSpec &file, lldb::offset_t file_offset,
+                          lldb::offset_t file_size, ModuleSpecList &specs,
+                          lldb::DataBufferSP data_sp = lldb::DataBufferSP());
 
   static size_t GetModuleSpecifications(const lldb_private::FileSpec &file,
                                         lldb::DataBufferSP &data_sp,
diff --git a/lldb/include/lldb/Utility/DataBuffer.h b/lldb/include/lldb/Utility/DataBuffer.h
index bdc384a3815f9..302b13307958d 100644
--- a/lldb/include/lldb/Utility/DataBuffer.h
+++ b/lldb/include/lldb/Utility/DataBuffer.h
@@ -79,6 +79,20 @@ class DataBuffer {
   }
 };
 
+class DataBufferUnowned : public DataBuffer {
+public:
+  DataBufferUnowned(uint8_t *bytes, lldb::offset_t size)
+      : m_bytes(bytes), m_size(size) {}
+
+  uint8_t *GetBytes() override { return m_bytes; }
+  const uint8_t *GetBytes() const override { return m_bytes; }
+  lldb::offset_t GetByteSize() const override { return m_size; }
+
+private:
+  uint8_t *m_bytes;
+  lldb::offset_t m_size;
+};
+
 } // namespace lldb_private
 
 #endif /// #if defined(__cplusplus)
diff --git a/lldb/source/Core/Module.cpp b/lldb/source/Core/Module.cpp
index ed69796d88c2a..b76659ee3e074 100644
--- a/lldb/source/Core/Module.cpp
+++ b/lldb/source/Core/Module.cpp
@@ -147,11 +147,16 @@ Module::Module(const ModuleSpec &module_spec)
                   : module_spec.GetObjectName().AsCString(""),
               module_spec.GetObjectName().IsEmpty() ? "" : ")");
 
+  auto data_sp = module_spec.GetData();
+  lldb::offset_t file_size = 0;
+  if (data_sp)
+    file_size = data_sp->GetByteSize();
+
   // First extract all module specifications from the file using the local file
   // path. If there are no specifications, then don't fill anything in
   ModuleSpecList modules_specs;
-  if (ObjectFile::GetModuleSpecifications(module_spec.GetFileSpec(), 0, 0,
-                                          modules_specs) == 0)
+  if (ObjectFile::GetModuleSpecifications(
+          module_spec.GetFileSpec(), 0, file_size, modules_specs, data_sp) == 0)
     return;
 
   // Now make sure that one of the module specifications matches what we just
@@ -170,11 +175,20 @@ Module::Module(const ModuleSpec &module_spec)
     return;
   }
 
-  if (module_spec.GetFileSpec())
-    m_mod_time = FileSystem::Instance().GetModificationTime(module_spec.GetFileSpec());
-  else if (matching_module_spec.GetFileSpec())
-    m_mod_time =
-        FileSystem::Instance().GetModificationTime(matching_module_spec.GetFileSpec());
+  // Set m_data_sp if it was initially provided in the ModuleSpec. Note that
+  // we cannot use the data_sp variable here, because it will have been
+  // modified by GetModuleSpecifications().
+  if (auto module_spec_data_sp = module_spec.GetData()) {
+    m_data_sp = module_spec_data_sp;
+    m_mod_time = {};
+  } else {
+    if (module_spec.GetFileSpec())
+      m_mod_time =
+          FileSystem::Instance().GetModificationTime(module_spec.GetFileSpec());
+    else if (matching_module_spec.GetFileSpec())
+      m_mod_time = FileSystem::Instance().GetModificationTime(
+          matching_module_spec.GetFileSpec());
+  }
 
   // Copy the architecture from the actual spec if we got one back, else use
   // the one that was specified
@@ -1110,6 +1124,10 @@ void Module::ReportError(const char *format, ...) {
 }
 
 bool Module::FileHasChanged() const {
+  // We have provided the DataBuffer for this module to avoid accessing the
+  // filesystem. We never want to reload those files.
+  if (m_data_sp)
+    return false;
   if (!m_file_has_changed)
     m_file_has_changed =
         (FileSystem::Instance().GetModificationTime(m_file) != m_mod_time);
@@ -1229,12 +1247,19 @@ ObjectFile *Module::GetObjectFile() {
       static Timer::Category func_cat(LLVM_PRETTY_FUNCTION);
       Timer scoped_timer(func_cat, "Module::GetObjectFile () module = %s",
                          GetFileSpec().GetFilename().AsCString(""));
-      DataBufferSP data_sp;
       lldb::offset_t data_offset = 0;
-      const lldb::offset_t file_size =
-          FileSystem::Instance().GetByteSize(m_file);
+      lldb::offset_t file_size = 0;
+
+      if (m_data_sp)
+        file_size = m_data_sp->GetByteSize();
+      else if (m_file)
+        file_size = FileSystem::Instance().GetByteSize(m_file);
+
       if (file_size > m_object_offset) {
         m_did_load_objfile = true;
+        // FindPlugin will modify its data_sp argument. Do not let it
+        // modify our m_data_sp member.
+        auto data_sp = m_data_sp;
         m_objfile_sp = ObjectFile::FindPlugin(
             shared_from_this(), &m_file, m_object_offset,
             file_size - m_object_offset, data_sp, data_offset);
diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
index beee2f5b6df79..bca575b7f8842 100644
--- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
+++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp
@@ -541,7 +541,8 @@ size_t ObjectFileELF::GetModuleSpecifications(
                       __FUNCTION__, file.GetPath().c_str());
           }
 
-          data_sp = MapFileData(file, -1, file_offset);
+          if (data_sp->GetByteSize() < length)
+            data_sp = MapFileData(file, -1, file_offset);
           if (data_sp)
             data.SetData(data_sp);
           // In case there is header extension in the section #0, the header we
@@ -580,8 +581,7 @@ size_t ObjectFileELF::GetModuleSpecifications(
                   func_cat,
                   "Calculating module crc32 %s with size %" PRIu64 " KiB",
                   file.GetLastPathComponent().AsCString(),
-                  (FileSystem::Instance().GetByteSize(file) - file_offset) /
-                      1024);
+                  (length - file_offset) / 1024);
 
               // For core files - which usually don't happen to have a
               // gnu_debuglink, and are pretty bulky - calculating whole
diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
index d2227bde47e92..39808cdec7908 100644
--- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
+++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
@@ -169,8 +169,9 @@ size_t ObjectFilePECOFF::GetModuleSpecifications(
 
   Log *log(GetLogIfAllCategoriesSet(LIBLLDB_LOG_OBJECT));
 
-  if (DataBufferSP full_sp = MapFileData(file, -1, file_offset))
-    data_sp = std::move(full_sp);
+  if (data_sp->GetByteSize() < length)
+    if (DataBufferSP full_sp = MapFileData(file, -1, file_offset))
+      data_sp = std::move(full_sp);
   auto binary = llvm::object::createBinary(llvm::MemoryBufferRef(
       toStringRef(data_sp->GetData()), file.GetFilename().GetStringRef()));
 
@@ -539,6 +540,9 @@ DataExtractor ObjectFilePECOFF::ReadImageData(uint32_t offset, size_t size) {
   if (!size)
     return {};
 
+  if (m_data.ValidOffsetForDataOfSize(offset, size))
+    return DataExtractor(m_data, offset, size);
+
   if (m_file) {
     // A bit of a hack, but we intend to write to this buffer, so we can't
     // mmap it.
@@ -562,13 +566,11 @@ DataExtractor ObjectFilePECOFF::ReadImageData(uint32_t offset, size_t size) {
 }
 
 DataExtractor ObjectFilePECOFF::ReadImageDataByRVA(uint32_t rva, size_t size) {
-  if (m_file) {
-    Address addr = GetAddress(rva);
-    SectionSP sect = addr.GetSection();
-    if (!sect)
-      return {};
-    rva = sect->GetFileOffset() + addr.GetOffset();
-  }
+  Address addr = GetAddress(rva);
+  SectionSP sect = addr.GetSection();
+  if (!sect)
+    return {};
+  rva = sect->GetFileOffset() + addr.GetOffset();
 
   return ReadImageData(rva, size);
 }
diff --git a/lldb/source/Symbol/ObjectFile.cpp b/lldb/source/Symbol/ObjectFile.cpp
index 5a4680b011034..6b552dd0c19ef 100644
--- a/lldb/source/Symbol/ObjectFile.cpp
+++ b/lldb/source/Symbol/ObjectFile.cpp
@@ -47,8 +47,8 @@ ObjectFile::FindPlugin(const lldb::ModuleSP &module_sp, const FileSpec *file,
       FileSpec archive_file;
       ObjectContainerCreateInstance create_object_container_callback;
 
-      const bool file_exists = FileSystem::Instance().Exists(*file);
       if (!data_sp) {
+        const bool file_exists = FileSystem::Instance().Exists(*file);
         // We have an object name which most likely means we have a .o file in
         // a static archive (.a file). Try and see if we have a cached archive
         // first without reading any data first
@@ -207,9 +207,11 @@ ObjectFileSP ObjectFile::FindPlugin(const lldb::ModuleSP &module_sp,
 size_t ObjectFile::GetModuleSpecifications(const FileSpec &file,
                                            lldb::offset_t file_offset,
                                            lldb::offset_t file_size,
-                                           ModuleSpecList &specs) {
-  DataBufferSP data_sp =
-      FileSystem::Instance().CreateDataBuffer(file.GetPath(), 512, file_offset);
+                                           ModuleSpecList &specs,
+                                           DataBufferSP data_sp) {
+  if (!data_sp)
+    data_sp = FileSystem::Instance().CreateDataBuffer(file.GetPath(), 512,
+                                                      file_offset);
   if (data_sp) {
     if (file_size == 0) {
       const lldb::offset_t actual_file_size =
diff --git a/lldb/unittests/Core/CMakeLists.txt b/lldb/unittests/Core/CMakeLists.txt
index a2cc5a7f1f6d5..de99856486f1e 100644
--- a/lldb/unittests/Core/CMakeLists.txt
+++ b/lldb/unittests/Core/CMakeLists.txt
@@ -1,6 +1,7 @@
 add_lldb_unittest(LLDBCoreTests
   CommunicationTest.cpp
   MangledTest.cpp
+  ModuleSpecTest.cpp
   RichManglingContextTest.cpp
   SourceManagerTest.cpp
   StreamCallbackTest.cpp
@@ -11,6 +12,8 @@ add_lldb_unittest(LLDBCoreTests
     lldbHost
     lldbSymbol
     lldbPluginObjectFileELF
+    lldbPluginObjectFileMachO
+    lldbPluginObjectFilePECOFF
     lldbPluginSymbolFileSymtab
     lldbUtilityHelpers
     LLVMTestingSupport
diff --git a/lldb/unittests/Core/MangledTest.cpp b/lldb/unittests/Core/MangledTest.cpp
index 5e667d1ada8c0..6e1bdd59978d8 100644
--- a/lldb/unittests/Core/MangledTest.cpp
+++ b/lldb/unittests/Core/MangledTest.cpp
@@ -165,8 +165,7 @@ TEST(MangledTest, NameIndexes_FindFunctionSymbols) {
 )");
   ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
 
-  ModuleSpec Spec{FileSpec(ExpectedFile->name())};
-  auto M = std::make_shared<Module>(Spec);
+  auto M = std::make_shared<Module>(ExpectedFile->moduleSpec());
 
   auto Count = [M](const char *Name, FunctionNameType Type) -> int {
     SymbolContextList SymList;
diff --git a/lldb/unittests/Core/ModuleSpecTest.cpp b/lldb/unittests/Core/ModuleSpecTest.cpp
new file mode 100644
index 0000000000000..f9e19ed35acc4
--- /dev/null
+++ b/lldb/unittests/Core/ModuleSpecTest.cpp
@@ -0,0 +1,166 @@
+//===-- ModuleSpecTest.cpp ------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "TestingSupport/SubsystemRAII.h"
+#include "TestingSupport/TestUtilities.h"
+
+#include "lldb/Core/Module.h"
+#include "lldb/Core/ModuleSpec.h"
+#include "lldb/Utility/DataBuffer.h"
+
+#include "Plugins/ObjectFile/ELF/ObjectFileELF.h"
+#include "Plugins/ObjectFile/Mach-O/ObjectFileMachO.h"
+#include "Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.h"
+
+#include "gtest/gtest.h"
+
+using namespace lldb;
+using namespace lldb_private;
+
+extern const char *TestMainArgv0;
+
+// This test file intentionally doesn't initialize the FileSystem.
+// Everything in this file should be able to run without requiring
+// any interaction with the FileSystem class; by keeping it
+// uninitialized, it will assert if anything tries to interact with
+// it.
+
+TEST(ModuleSpecTest, InvalidInMemoryBuffer) {
+  uint8_t Invalid[] = "This is not a binary file.";
+  DataBufferSP InvalidBufferSP =
+      std::make_shared<DataBufferUnowned>(Invalid, sizeof(Invalid));
+  ModuleSpec Spec(FileSpec(), UUID(), InvalidBufferSP);
+
+  auto InvalidModuleSP = std::make_shared<Module>(Spec);
+  ASSERT_EQ(InvalidModuleSP->GetObjectFile(), nullptr);
+}
+
+TEST(ModuleSpecTest, InvalidInMemoryBufferValidFile) {
+  uint8_t Invalid[] = "This is not a binary file.";
+  DataBufferSP InvalidBufferSP =
+      std::make_shared<DataBufferUnowned>(Invalid, sizeof(Invalid));
+  ModuleSpec Spec(FileSpec(TestMainArgv0), UUID(), InvalidBufferSP);
+
+  auto InvalidModuleSP = std::make_shared<Module>(Spec);
+  ASSERT_EQ(InvalidModuleSP->GetObjectFile(), nullptr);
+}
+
+TEST(ModuleSpecTest, TestELFFile) {
+  SubsystemRAII<ObjectFileELF> subsystems;
+
+  auto ExpectedFile = TestFile::fromYaml(R"(
+--- !ELF
+FileHeader:
+  Class:           ELFCLASS64
+  Data:            ELFDATA2LSB
+  Type:            ET_REL
+  Machine:         EM_X86_64
+Sections:
+  - Name:            .text
+    Type:            SHT_PROGBITS
+    Flags:           [ SHF_ALLOC, SHF_EXECINSTR ]
+    AddressAlign:    0x0000000000000010
+...
+)");
+  ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
+
+  auto M = std::make_shared<Module>(ExpectedFile->moduleSpec());
+  ObjectFile *OF = M->GetObjectFile();
+
+  ASSERT_EQ(llvm::isa<ObjectFileELF>(OF), true);
+}
+
+TEST(ModuleSpecTest, TestCOFFFile) {
+  SubsystemRAII<ObjectFilePECOFF> subsystems;
+
+  auto ExpectedFile = TestFile::fromYaml(R"(
+--- !COFF
+OptionalHeader:
+  AddressOfEntryPoint: 0
+  ImageBase:       16777216
+  SectionAlignment: 4096
+  FileAlignment:   512
+  MajorOperatingSystemVersion: 6
+  MinorOperatingSystemVersion: 0
+  MajorImageVersion: 0
+  MinorImageVersion: 0
+  MajorSubsystemVersion: 6
+  MinorSubsystemVersion: 0
+  Subsystem:       IMAGE_SUBSYSTEM_WINDOWS_CUI
+  DLLCharacteristics: [ IMAGE_DLL_CHARACTERISTICS_HIGH_ENTROPY_VA, IMAGE_DLL_CHARACTERISTICS_DYNAMIC_BASE, IMAGE_DLL_CHARACTERISTICS_NX_COMPAT ]
+  SizeOfStackReserve: 1048576
+  SizeOfStackCommit: 4096
+  SizeOfHeapReserve: 1048576
+  SizeOfHeapCommit: 4096
+header:
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: [ IMAGE_FILE_EXECUTABLE_IMAGE, IMAGE_FILE_LARGE_ADDRESS_AWARE ]
+sections:
+  - Name:            .text
+    Characteristics: [ IMAGE_SCN_CNT_CODE, IMAGE_SCN_MEM_EXECUTE, IMAGE_SCN_MEM_READ ]
+    VirtualAddress:  4096
+    VirtualSize:     4096
+symbols:         []
+...
+)");
+  ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
+
+  auto M = std::make_shared<Module>(ExpectedFile->moduleSpec());
+  ObjectFile *OF = M->GetObjectFile();
+
+  ASSERT_EQ(llvm::isa<ObjectFilePECOFF>(OF), true);
+}
+
+TEST(ModuleSpecTest, TestMachOFile) {
+  SubsystemRAII<ObjectFileMachO> subsystems;
+
+  auto ExpectedFile = TestFile::fromYaml(R"(
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x0100000C
+  cpusubtype:      0x00000000
+  filetype:        0x00000001
+  ncmds:           1
+  sizeofcmds:      232
+  flags:           0x00002000
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         232
+    segname:         ''
+    vmaddr:          0
+    vmsize:          56
+    fileoff:         392
+    filesize:        56
+    maxprot:         7
+    initprot:        7
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        size:            24
+        offset:          0x00000188
+        align:           2
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+...
+)");
+  ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
+
+  auto M = std::make_shared<Module>(ExpectedFile->moduleSpec());
+  ObjectFile *OF = M->GetObjectFile();
+
+  ASSERT_EQ(llvm::isa<ObjectFileMachO>(OF), true);
+}
diff --git a/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp b/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp
index b9a650d5fafaf..9718ad3d27e95 100644
--- a/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp
+++ b/lldb/unittests/ObjectFile/ELF/TestObjectFileELF.cpp
@@ -91,10 +91,7 @@ TEST_F(ObjectFileELFTest, SectionsResolveConsistently) {
 )");
   ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
 
-  ModuleSpec spec{FileSpec(ExpectedFile->name())};
-  spec.GetSymbolFileSpec().SetFile(ExpectedFile->name(),
-                                   FileSpec::Style::native);
-  auto module_sp = std::make_shared<Module>(spec);
+  auto module_sp = std::make_shared<Module>(ExpectedFile->moduleSpec());
   SectionList *list = module_sp->GetSectionList();
   ASSERT_NE(nullptr, list);
 
@@ -212,10 +209,7 @@ TEST_F(ObjectFileELFTest, GetSymtab_NoSymEntryPointArmThumbAddressClass) {
 )");
   ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
 
-  ModuleSpec spec{FileSpec(ExpectedFile->name())};
-  spec.GetSymbolFileSpec().SetFile(ExpectedFile->name(),
-                                   FileSpec::Style::native);
-  auto module_sp = std::make_shared<Module>(spec);
+  auto module_sp = std::make_shared<Module>(ExpectedFile->moduleSpec());
 
   auto entry_point_addr = module_sp->GetObjectFile()->GetEntryPointAddress();
   ASSERT_TRUE(entry_point_addr.GetOffset() & 1);
@@ -277,10 +271,7 @@ TEST_F(ObjectFileELFTest, GetSymtab_NoSymEntryPointArmAddressClass) {
 )");
   ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
 
-  ModuleSpec spec{FileSpec(ExpectedFile->name())};
-  spec.GetSymbolFileSpec().SetFile(ExpectedFile->name(),
-                                   FileSpec::Style::native);
-  auto module_sp = std::make_shared<Module>(spec);
+  auto module_sp = std::make_shared<Module>(ExpectedFile->moduleSpec());
 
   auto entry_point_addr = module_sp->GetObjectFile()->GetEntryPointAddress();
   ASSERT_EQ(entry_point_addr.GetAddressClass(), AddressClass::eCode);
diff --git a/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp b/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp
index 1c6fdd301bf85..e842df5988867 100644
--- a/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp
+++ b/lldb/unittests/ObjectFile/PECOFF/TestPECallFrameInfo.cpp
@@ -192,7 +192,7 @@ symbols:         []
 )");
   ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
 
-  ModuleSP module_sp = std::make_shared<Module>(ModuleSpec(FileSpec(ExpectedFile->name())));
+  ModuleSP module_sp = std::make_shared<Module>(ExpectedFile->moduleSpec());
   ObjectFile *object_file = module_sp->GetObjectFile();
   ASSERT_NE(object_file, nullptr);
 
diff --git a/lldb/unittests/Symbol/TestDWARFCallFrameInfo.cpp b/lldb/unittests/Symbol/TestDWARFCallFrameInfo.cpp
index bc2de074806b3..86a6cf0cacb14 100644
--- a/lldb/unittests/Symbol/TestDWARFCallFrameInfo.cpp
+++ b/lldb/unittests/Symbol/TestDWARFCallFrameInfo.cpp
@@ -220,8 +220,7 @@ void DWARFCallFrameInfoTest::TestBasic(DWARFCallFrameInfo::Type type,
 )");
   ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
 
-  auto module_sp =
-      std::make_shared<Module>(ModuleSpec(FileSpec(ExpectedFile->name())));
+  auto module_sp = std::make_shared<Module>(ExpectedFile->moduleSpec());
   SectionList *list = module_sp->GetSectionList();
   ASSERT_NE(nullptr, list);
 
diff --git a/lldb/unittests/Symbol/TestLineEntry.cpp b/lldb/unittests/Symbol/TestLineEntry.cpp
index 389b338faa421..d32ec9b1e7c6d 100644
--- a/lldb/unittests/Symbol/TestLineEntry.cpp
+++ b/lldb/unittests/Symbol/TestLineEntry.cpp
@@ -49,7 +49,7 @@ void LineEntryTest::SetUp() {
   auto ExpectedFile = TestFile::fromYamlFile("inlined-functions.yaml");
   ASSERT_THAT_EXPECTED(ExpectedFile, llvm::Succeeded());
   m_file.emplace(std::move(*ExpectedFile));
-  m_module_sp = std::make_shared<Module>(ModuleSpec(FileSpec(m_file->name())));
+  m_module_sp = std::make_shared<Module>(m_file->moduleSpec());
 }
 
 llvm::Expected<LineEntry> LineEntryTest::GetLineEntryForLine(uint32_t line) {
diff --git a/lldb/unittests/TestingSupport/TestUtilities.cpp b/lldb/unittests/TestingSupport/TestUtilities.cpp
index d40ae9dd99250..28120505acbad 100644
--- a/lldb/unittests/TestingSupport/TestUtilities.cpp
+++ b/lldb/unittests/TestingSupport/TestUtilities.cpp
@@ -29,21 +29,14 @@ std::string lldb_private::GetInputFilePath(const llvm::Twine &name) {
 llvm::Expected<TestFile> TestFile::fromYaml(llvm::StringRef Yaml) {
   const auto *Info = testing::UnitTest::GetInstance()->current_test_info();
   assert(Info);
-  llvm::SmallString<128> Name;
-  int FD;
-  if (std::error_code EC = llvm::sys::fs::createTemporaryFile(
-          llvm::Twine(Info->test_case_name()) + "-" + Info->name(), "test", FD,
-          Name))
-    return llvm::errorCodeToError(EC);
-  llvm::FileRemover Remover(Name);
-  {
-    llvm::raw_fd_ostream OS(FD, /*shouldClose*/ true);
-    llvm::yaml::Input YIn(Yaml);
-    if (!llvm::yaml::convertYAML(YIn, OS, [](const llvm::Twine &Msg) {}))
-      return llvm::createStringError(llvm::inconvertibleErrorCode(),
-                                     "convertYAML() failed");
-  }
-  return TestFile(Name, std::move(Remover));
+
+  std::string Buffer;
+  llvm::raw_string_ostream OS(Buffer);
+  llvm::yaml::Input YIn(Yaml);
+  if (!llvm::yaml::convertYAML(YIn, OS, [](const llvm::Twine &Msg) {}))
+    return llvm::createStringError(llvm::inconvertibleErrorCode(),
+                                   "convertYAML() failed");
+  return TestFile(std::move(Buffer));
 }
 
 llvm::Expected<TestFile> TestFile::fromYamlFile(const llvm::Twine &Name) {
@@ -54,12 +47,3 @@ llvm::Expected<TestFile> TestFile::fromYamlFile(const llvm::Twine &Name) {
     return llvm::errorCodeToError(BufferOrError.getError());
   return fromYaml(BufferOrError.get()->getBuffer());
 }
-
-TestFile::~TestFile() {
-  if (!Name)
-    return;
-  if (std::error_code EC =
-          llvm::sys::fs::remove(*Name, /*IgnoreNonExisting*/ false))
-    GTEST_LOG_(WARNING) << "Failed to delete `" << Name->c_str()
-                        << "`: " << EC.message();
-}
diff --git a/lldb/unittests/TestingSupport/TestUtilities.h b/lldb/unittests/TestingSupport/TestUtilities.h
index 852c87ed3d9df..60a07119e9243 100644
--- a/lldb/unittests/TestingSupport/TestUtilities.h
+++ b/lldb/unittests/TestingSupport/TestUtilities.h
@@ -9,6 +9,8 @@
 #ifndef LLDB_UNITTESTS_TESTINGSUPPORT_TESTUTILITIES_H
 #define LLDB_UNITTESTS_TESTINGSUPPORT_TESTUTILITIES_H
 
+#include "lldb/Core/ModuleSpec.h"
+#include "lldb/Utility/DataBuffer.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Support/Error.h"
@@ -34,22 +36,24 @@ class TestFile {
   static llvm::Expected<TestFile> fromYaml(llvm::StringRef Yaml);
   static llvm::Expected<TestFile> fromYamlFile(const llvm::Twine &Name);
 
-  TestFile(TestFile &&RHS) : Name(std::move(RHS.Name)) {
-    RHS.Name = llvm::None;
+  ~TestFile() = default;
+
+  ModuleSpec moduleSpec() {
+    return ModuleSpec(FileSpec(), UUID(), dataBuffer());
   }
 
-  ~TestFile();
+private:
+  TestFile(std::string &&Buffer) : Buffer(std::move(Buffer)) {}
 
-  llvm::StringRef name() { return *Name; }
+  void operator=(const TestFile &) = delete;
 
-private:
-  TestFile(llvm::StringRef Name, llvm::FileRemover &&Remover)
-      : Name(std::string(Name)) {
-    Remover.releaseFile();
+  lldb::DataBufferSP dataBuffer() {
+    auto *Data = reinterpret_cast<const uint8_t *>(Buffer.data());
+    return std::make_shared<DataBufferUnowned>(const_cast<uint8_t *>(Data),
+                                               Buffer.size());
   }
-  void operator=(const TestFile &) = delete;
 
-  llvm::Optional<std::string> Name;
+  std::string Buffer;
 };
 }
 

From e1ca7a652222699d3f72cd3115ca269c1040c66e Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 14 Jul 2020 11:49:18 -0400
Subject: [PATCH 260/771] [libc++] Fix building the benchmarks after
 introducing a target for cxx-headers

The libc++ headers were included twice, which broke the #include_next
logic.
---
 libcxx/benchmarks/CMakeLists.txt | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/libcxx/benchmarks/CMakeLists.txt b/libcxx/benchmarks/CMakeLists.txt
index f012cccb696e9..8480ede23a49f 100644
--- a/libcxx/benchmarks/CMakeLists.txt
+++ b/libcxx/benchmarks/CMakeLists.txt
@@ -87,8 +87,6 @@ set(BENCHMARK_TEST_COMPILE_FLAGS
     -I${LIBCXX_SOURCE_DIR}/test/support
 )
 set(BENCHMARK_TEST_LIBCXX_COMPILE_FLAGS
-    -nostdinc++
-    -isystem ${LIBCXX_SOURCE_DIR}/include
     ${BENCHMARK_TEST_COMPILE_FLAGS}
     ${SANITIZER_FLAGS}
     -Wno-user-defined-literals
@@ -130,7 +128,7 @@ function(add_benchmark_test name source_file)
   set(libcxx_target ${name}_libcxx)
   list(APPEND libcxx_benchmark_targets ${libcxx_target})
   add_executable(${libcxx_target} EXCLUDE_FROM_ALL ${source_file})
-  add_dependencies(${libcxx_target} cxx cxx-headers google-benchmark-libcxx)
+  add_dependencies(${libcxx_target} cxx google-benchmark-libcxx)
   add_dependencies(cxx-benchmarks ${libcxx_target})
   if (LIBCXX_ENABLE_SHARED)
     target_link_libraries(${libcxx_target} PRIVATE cxx_shared)

From 2c2a297bb6d1ce9752a69c8c18a58eacc6d3f961 Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Tue, 14 Jul 2020 08:53:59 -0700
Subject: [PATCH 261/771] [clang][NFC] Add 'override' keyword to virtual
 function overrides

This patch adds override to several overriding virtual functions that were missing the keyword within the clang/ directory. These were found by the new -Wsuggest-override.
---
 clang/include/clang/AST/DeclOpenMP.h                      | 4 ++--
 clang/lib/AST/Interp/InterpFrame.h                        | 8 ++++----
 clang/lib/AST/OSLog.cpp                                   | 6 +++---
 clang/lib/Basic/Targets/OSTargets.h                       | 2 +-
 clang/lib/Sema/SemaDeclCXX.cpp                            | 2 +-
 .../StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp   | 2 +-
 .../Checkers/NumberObjectConversionChecker.cpp            | 2 +-
 .../lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp | 2 +-
 8 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/clang/include/clang/AST/DeclOpenMP.h b/clang/include/clang/AST/DeclOpenMP.h
index 437feaba28fb7..154ecb977692c 100644
--- a/clang/include/clang/AST/DeclOpenMP.h
+++ b/clang/include/clang/AST/DeclOpenMP.h
@@ -129,7 +129,7 @@ class OMPDeclareReductionDecl final : public ValueDecl, public DeclContext {
   /// the declare reduction construct is declared inside compound statement.
   LazyDeclPtr PrevDeclInScope;
 
-  virtual void anchor();
+  void anchor() override;
 
   OMPDeclareReductionDecl(Kind DK, DeclContext *DC, SourceLocation L,
                           DeclarationName Name, QualType Ty,
@@ -228,7 +228,7 @@ class OMPDeclareMapperDecl final : public ValueDecl, public DeclContext {
 
   LazyDeclPtr PrevDeclInScope;
 
-  virtual void anchor();
+  void anchor() override;
 
   OMPDeclareMapperDecl(Kind DK, DeclContext *DC, SourceLocation L,
                        DeclarationName Name, QualType Ty,
diff --git a/clang/lib/AST/Interp/InterpFrame.h b/clang/lib/AST/Interp/InterpFrame.h
index b8391b0bcf92c..304e2ad66537b 100644
--- a/clang/lib/AST/Interp/InterpFrame.h
+++ b/clang/lib/AST/Interp/InterpFrame.h
@@ -45,16 +45,16 @@ class InterpFrame final : public Frame {
   void popArgs();
 
   /// Describes the frame with arguments for diagnostic purposes.
-  void describe(llvm::raw_ostream &OS);
+  void describe(llvm::raw_ostream &OS) override;
 
   /// Returns the parent frame object.
-  Frame *getCaller() const;
+  Frame *getCaller() const override;
 
   /// Returns the location of the call to the frame.
-  SourceLocation getCallLocation() const;
+  SourceLocation getCallLocation() const override;
 
   /// Returns the caller.
-  const FunctionDecl *getCallee() const;
+  const FunctionDecl *getCallee() const override;
 
   /// Returns the current function.
   Function *getFunction() const { return Func; }
diff --git a/clang/lib/AST/OSLog.cpp b/clang/lib/AST/OSLog.cpp
index df2f808728cfb..094c0102854b1 100644
--- a/clang/lib/AST/OSLog.cpp
+++ b/clang/lib/AST/OSLog.cpp
@@ -55,9 +55,9 @@ class OSLogFormatStringHandler
     ArgsData.reserve(Args.size());
   }
 
-  virtual bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
-                                     const char *StartSpecifier,
-                                     unsigned SpecifierLen) {
+  bool HandlePrintfSpecifier(const analyze_printf::PrintfSpecifier &FS,
+                             const char *StartSpecifier,
+                             unsigned SpecifierLen) override {
     if (!FS.consumesDataArgument() &&
         FS.getConversionSpecifier().getKind() !=
             clang::analyze_format_string::ConversionSpecifier::PrintErrno)
diff --git a/clang/lib/Basic/Targets/OSTargets.h b/clang/lib/Basic/Targets/OSTargets.h
index af04b75392f59..cfa362bef1b1c 100644
--- a/clang/lib/Basic/Targets/OSTargets.h
+++ b/clang/lib/Basic/Targets/OSTargets.h
@@ -821,7 +821,7 @@ class LLVM_LIBRARY_VISIBILITY WebAssemblyOSTargetInfo
     : public OSTargetInfo<Target> {
 protected:
   void getOSDefines(const LangOptions &Opts, const llvm::Triple &Triple,
-                    MacroBuilder &Builder) const {
+                    MacroBuilder &Builder) const override {
     // A common platform macro.
     if (Opts.POSIXThreads)
       Builder.defineMacro("_REENTRANT");
diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp
index 515a2e9690ed1..22bf35dbd0cb0 100644
--- a/clang/lib/Sema/SemaDeclCXX.cpp
+++ b/clang/lib/Sema/SemaDeclCXX.cpp
@@ -1066,7 +1066,7 @@ static IsTupleLike isTupleLike(Sema &S, SourceLocation Loc, QualType T,
     TemplateArgumentListInfo &Args;
     ICEDiagnoser(LookupResult &R, TemplateArgumentListInfo &Args)
         : R(R), Args(Args) {}
-    void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) {
+    void diagnoseNotICE(Sema &S, SourceLocation Loc, SourceRange SR) override {
       S.Diag(Loc, diag::err_decomp_decl_std_tuple_size_not_constant)
           << printTemplateArgs(S.Context.getPrintingPolicy(), Args);
     }
diff --git a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
index 43f7dcd14b014..87477e96d2d16 100644
--- a/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/MacOSKeychainAPIChecker.cpp
@@ -66,7 +66,7 @@ class MacOSKeychainAPIChecker : public Checker<check::PreStmt<CallExpr>,
   ProgramStateRef evalAssume(ProgramStateRef state, SVal Cond,
                              bool Assumption) const;
   void printState(raw_ostream &Out, ProgramStateRef State,
-                  const char *NL, const char *Sep) const;
+                  const char *NL, const char *Sep) const override;
 
 private:
   typedef std::pair<SymbolRef, const AllocationState*> AllocationPair;
diff --git a/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp
index abeca596d056b..df01cc760e7e9 100644
--- a/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/NumberObjectConversionChecker.cpp
@@ -57,7 +57,7 @@ class Callback : public MatchFinder::MatchCallback {
   Callback(const NumberObjectConversionChecker *C,
            BugReporter &BR, AnalysisDeclContext *ADC)
       : C(C), BR(BR), ADC(ADC) {}
-  virtual void run(const MatchFinder::MatchResult &Result);
+  void run(const MatchFinder::MatchResult &Result) override;
 };
 } // end of anonymous namespace
 
diff --git a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
index ff19d7a2b9d4c..8c2008a7ceb44 100644
--- a/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/ObjCContainersChecker.cpp
@@ -58,7 +58,7 @@ class ObjCContainersChecker : public Checker< check::PreStmt<CallExpr>,
                                      PointerEscapeKind Kind) const;
 
   void printState(raw_ostream &OS, ProgramStateRef State,
-                  const char *NL, const char *Sep) const;
+                  const char *NL, const char *Sep) const override;
 };
 } // end anonymous namespace
 

From 322e7cfab53ea8079a04a4f562530aafe6148f0e Mon Sep 17 00:00:00 2001
From: Michael Kruse <llvm-project@meinersbur.de>
Date: Tue, 14 Jul 2020 10:58:49 -0500
Subject: [PATCH 262/771] [docs] Update llvm.loop metadata documentation.

Loop metadata nodes do not adhere to the documented property:

(a) LoopIDs are not unique: Any pass that duplicates IR will do it
    including its metadata (e.g. LoopVersioning) such that multiple
    loops are linked with the same LoopID. There is even a test case
    (Transforms/LoopUnroll/unroll-pragmas-disabled.ll) for multiple
    loops with the same LoopID.

(b) LoopIDs are not persistent: Adding or removing an item from a LoopID
    can only be done by creating a new MDNode and assigning it to the
    loop's branch(es). Passes such as LoopUnroll (llvm.loop.unroll.disable)
    and LoopVectorize (llvm.loop.isvectorized) use this to mark loops to
    not be transformed multiple times or to avoid that a LoopVersioned
    original loop is transformed.

Update the documentation according to how llvm.loop is used in practice.

Differential Revision: https://reviews.llvm.org/D55290
---
 llvm/docs/LangRef.rst | 45 ++++++++++++++++++++++---------------------
 1 file changed, 23 insertions(+), 22 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 86d315be74bcf..c82d8c4e5dcad 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -5718,33 +5718,34 @@ attribute on parameters and return values.
 
 It is sometimes useful to attach information to loop constructs. Currently,
 loop metadata is implemented as metadata attached to the branch instruction
-in the loop latch block. This type of metadata refer to a metadata node that is
-guaranteed to be separate for each loop. The loop identifier metadata is
-specified with the name ``llvm.loop``.
-
-The loop identifier metadata is implemented using a metadata that refers to
-itself to avoid merging it with any other identifier metadata, e.g.,
-during module linkage or function inlining. That is, each loop should refer
-to their own identification metadata even if they reside in separate functions.
-The following example contains loop identifier metadata for two separate loop
-constructs:
-
-.. code-block:: llvm
-
-    !0 = !{!0}
-    !1 = !{!1}
-
-The loop identifier metadata can be used to specify additional
-per-loop metadata. Any operands after the first operand can be treated
-as user-defined metadata. For example the ``llvm.loop.unroll.count``
-suggests an unroll factor to the loop unroller:
+in the loop latch block. The loop metadata node is a list of
+other metadata nodes, each representing a property of the loop. Usually,
+the first item of the property node is a string. For example, the
+``llvm.loop.unroll.count`` suggests an unroll factor to the loop
+unroller:
 
 .. code-block:: llvm
 
       br i1 %exitcond, label %._crit_edge, label %.lr.ph, !llvm.loop !0
     ...
-    !0 = !{!0, !1}
-    !1 = !{!"llvm.loop.unroll.count", i32 4}
+    !0 = !{!0, !1, !2}
+    !1 = !{!"llvm.loop.unroll.enable"}
+    !2 = !{!"llvm.loop.unroll.count", i32 4}
+
+For legacy reasons, the first item of a loop metadata node must be a
+reference to itself. Before the advent of the 'distinct' keyword, this
+forced the preservation of otherwise identical metadata nodes. Since
+the loop-metadata node can be attached to multiple nodes, the 'distinct'
+keyword has become unnecessary.
+
+Prior to the property nodes, one or two ``DILocation`` (debug location)
+nodes can be present in the list. The first, if present, identifies the
+source-code location where the loop begins. The second, if present,
+identifies the source-code location where the loop ends.
+
+Loop metadata nodes cannot be used as unique identifiers. They are
+neither persistent for the same loop through transformations nor
+necessarily unique to just one loop.
 
 '``llvm.loop.disable_nonforced``'
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

From 256d44811eabd838db79b6a1adfffa4f588750b7 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Mon, 13 Jul 2020 18:53:50 -0700
Subject: [PATCH 263/771] [MLIR] [TableGen] Avoid generating an assert which is
 always true.

- Avoid generating "assert(resultTypes.size() >= 0u)" which is always true

Differential Revision: https://reviews.llvm.org/D83735
---
 mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index faeb21265142d..08035a95a0a13 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -943,13 +943,21 @@ void OpEmitter::genSeparateArgParamBuilder() {
              << ");\n";
       }
       return;
-    case TypeParamKind::Collective:
-      body << "  "
-           << "assert(resultTypes.size() "
-           << (op.getNumVariableLengthResults() == 0 ? "==" : ">=") << " "
-           << (op.getNumResults() - op.getNumVariableLengthResults())
-           << "u && \"mismatched number of results\");\n";
+    case TypeParamKind::Collective: {
+      int numResults = op.getNumResults();
+      int numVariadicResults = op.getNumVariableLengthResults();
+      int numNonVariadicResults = numResults - numVariadicResults;
+      bool hasVariadicResult = numVariadicResults != 0;
+
+      // Avoid emitting "resultTypes.size() >= 0u" which is always true.
+      if (!(hasVariadicResult && numNonVariadicResults == 0))
+        body << "  "
+             << "assert(resultTypes.size() "
+             << (hasVariadicResult ? ">=" : "==") << " "
+             << numNonVariadicResults
+             << "u && \"mismatched number of results\");\n";
       body << "  " << builderOpState << ".addTypes(resultTypes);\n";
+    }
       return;
     }
     llvm_unreachable("unhandled TypeParamKind");

From 85bed2f381eaa528b851c53e5f9429c6d3966c85 Mon Sep 17 00:00:00 2001
From: Shuhong Liu <shuhong.liu@ibm.com>
Date: Tue, 14 Jul 2020 12:23:39 -0400
Subject: [PATCH 264/771] [AIX] Remove diff -a option on llvm-cov.test

Summary:
llvm-cov.test generates .gcov files and compared with target
sample files. Since the files do not contain any binary data
(files are plain ASCII texts), remove -a from diff. And this
fix will the error on AIX since the default diff tool on AIX
does not support -a option.

Reviewers: hubert.reinterpretcast, daltenty, stevewan

Subscribers: llvm-commits

Tags: #LLVM

Differential Revision: https://reviews.llvm.org/D83711
---
 llvm/test/tools/llvm-cov/llvm-cov.test | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/test/tools/llvm-cov/llvm-cov.test b/llvm/test/tools/llvm-cov/llvm-cov.test
index 127e9b3b693fc..2256501cd5ea2 100644
--- a/llvm/test/tools/llvm-cov/llvm-cov.test
+++ b/llvm/test/tools/llvm-cov/llvm-cov.test
@@ -13,7 +13,7 @@ RUN: cp %p/Inputs/test* .
 # Basic behaviour with no flags
 RUN: llvm-cov gcov test.c 2> %t.err | FileCheck %s --check-prefixes=OUT,OUTFILE --match-full-lines --strict-whitespace
 RUN: FileCheck %s --check-prefix=C --match-full-lines --strict-whitespace < test.cpp.gcov
-RUN: diff -aub test_no_options.h.gcov test.h.gcov
+RUN: diff -ub test_no_options.h.gcov test.h.gcov
 RUN: count 0 < %t.err
 
 # Same, but specifying the object directory
@@ -156,8 +156,8 @@ H-C: unconditional  0 taken 1
 
 # Missing gcda file just gives 0 counts.
 RUN: llvm-cov gcov test.c -gcda=no_such_gcda_file | FileCheck %s --check-prefix=NO-GCDA
-RUN: diff -aub test_no_gcda.cpp.gcov test.cpp.gcov
-RUN: diff -aub test_no_gcda.h.gcov test.h.gcov
+RUN: diff -ub test_no_gcda.cpp.gcov test.cpp.gcov
+RUN: diff -ub test_no_gcda.h.gcov test.h.gcov
 NO-GCDA:       File 'test.cpp'
 NO-GCDA-NEXT:  Lines executed:0.00% of 43
 NO-GCDA-NEXT:  Creating 'test.cpp.gcov'

From e2b716105be33bc1296b4a0c56f8cfc2e8595037 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Fri, 10 Jul 2020 17:07:29 -0700
Subject: [PATCH 265/771] [MLIR] Add argument related API to Region

- Arguments of the first block of a region are considered region arguments.
- Add API on Region class to deal with these arguments directly instead of
  using the front() block.
- Changed several instances of existing code that can use this API
- Fixes https://bugs.llvm.org/show_bug.cgi?id=46535

Differential Revision: https://reviews.llvm.org/D83599
---
 mlir/include/mlir/Dialect/GPU/GPUOps.td       |  8 ++--
 .../mlir/Dialect/StandardOps/IR/Ops.td        |  2 +-
 mlir/include/mlir/IR/FunctionSupport.h        | 12 +++--
 mlir/include/mlir/IR/Region.h                 | 45 +++++++++++++++++++
 mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp     |  4 +-
 mlir/lib/Dialect/GPU/IR/GPUDialect.cpp        | 21 +++++----
 .../GPU/Transforms/AllReduceLowering.cpp      |  4 +-
 mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp       |  3 +-
 mlir/lib/Dialect/StandardOps/IR/Ops.cpp       | 11 +++--
 mlir/lib/IR/AsmPrinter.cpp                    |  4 +-
 mlir/lib/IR/FunctionImplementation.cpp        |  2 +-
 mlir/lib/IR/Operation.cpp                     |  2 +-
 mlir/lib/IR/Region.cpp                        |  5 +++
 mlir/lib/Transforms/SCCP.cpp                  |  4 +-
 mlir/test/lib/Dialect/Test/TestPatterns.cpp   |  2 +-
 15 files changed, 87 insertions(+), 42 deletions(-)

diff --git a/mlir/include/mlir/Dialect/GPU/GPUOps.td b/mlir/include/mlir/Dialect/GPU/GPUOps.td
index e7e67e24381df..c0a6ac101d7ba 100644
--- a/mlir/include/mlir/Dialect/GPU/GPUOps.td
+++ b/mlir/include/mlir/Dialect/GPU/GPUOps.td
@@ -237,7 +237,7 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">,
     /// the workgroup memory
     ArrayRef<BlockArgument> getWorkgroupAttributions() {
       auto begin =
-          std::next(getBody().front().args_begin(), getType().getNumInputs());
+          std::next(getBody().args_begin(), getType().getNumInputs());
       auto end = std::next(begin, getNumWorkgroupAttributions());
       return {begin, end};
     }
@@ -248,7 +248,7 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">,
 
     /// Returns the number of buffers located in the private memory.
     unsigned getNumPrivateAttributions() {
-      return getBody().front().getNumArguments() - getType().getNumInputs() -
+      return getBody().getNumArguments() - getType().getNumInputs() -
           getNumWorkgroupAttributions();
     }
  
@@ -258,9 +258,9 @@ def GPU_GPUFuncOp : GPU_Op<"func", [HasParent<"GPUModuleOp">,
       // Buffers on the private memory always come after buffers on the workgroup
       // memory.
       auto begin =
-          std::next(getBody().front().args_begin(),
+          std::next(getBody().args_begin(),
                     getType().getNumInputs() + getNumWorkgroupAttributions());
-      return {begin, getBody().front().args_end()};
+      return {begin, getBody().args_end()};
     }
 
     /// Adds a new block argument that corresponds to buffers located in
diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index b34dac4f38a72..452546f5da83a 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -583,7 +583,7 @@ def GenericAtomicRMWOp : Std_Op<"generic_atomic_rmw", [
   let extraClassDeclaration = [{
     // The value stored in memref[ivs].
     Value getCurrentValue() {
-      return body().front().getArgument(0);
+      return body().getArgument(0);
     }
     MemRefType getMemRefType() {
       return memref().getType().cast<MemRefType>();
diff --git a/mlir/include/mlir/IR/FunctionSupport.h b/mlir/include/mlir/IR/FunctionSupport.h
index 87e4b67801642..b358215ca9624 100644
--- a/mlir/include/mlir/IR/FunctionSupport.h
+++ b/mlir/include/mlir/IR/FunctionSupport.h
@@ -216,15 +216,13 @@ class FunctionLike : public OpTrait::TraitBase<ConcreteType, FunctionLike> {
   }
 
   /// Gets argument.
-  BlockArgument getArgument(unsigned idx) {
-    return getBlocks().front().getArgument(idx);
-  }
+  BlockArgument getArgument(unsigned idx) { return getBody().getArgument(idx); }
 
   /// Support argument iteration.
-  using args_iterator = Block::args_iterator;
-  args_iterator args_begin() { return front().args_begin(); }
-  args_iterator args_end() { return front().args_end(); }
-  Block::BlockArgListType getArguments() { return front().getArguments(); }
+  using args_iterator = Region::args_iterator;
+  args_iterator args_begin() { return getBody().args_begin(); }
+  args_iterator args_end() { return getBody().args_end(); }
+  Block::BlockArgListType getArguments() { return getBody().getArguments(); }
 
   //===--------------------------------------------------------------------===//
   // Argument Attributes
diff --git a/mlir/include/mlir/IR/Region.h b/mlir/include/mlir/IR/Region.h
index 35e773d743850..5671f2b5581e2 100644
--- a/mlir/include/mlir/IR/Region.h
+++ b/mlir/include/mlir/IR/Region.h
@@ -16,6 +16,9 @@
 #include "mlir/IR/Block.h"
 
 namespace mlir {
+class TypeRange;
+template <typename ValueRangeT>
+class ValueTypeRange;
 class BlockAndValueMapping;
 
 /// This class contains a list of basic blocks and a link to the parent
@@ -62,6 +65,48 @@ class Region {
     return &Region::blocks;
   }
 
+  //===--------------------------------------------------------------------===//
+  // Argument Handling
+  //===--------------------------------------------------------------------===//
+
+  // This is the list of arguments to the block.
+  using BlockArgListType = MutableArrayRef<BlockArgument>;
+  BlockArgListType getArguments() {
+    return empty() ? BlockArgListType() : front().getArguments();
+  }
+  using args_iterator = BlockArgListType::iterator;
+  using reverse_args_iterator = BlockArgListType::reverse_iterator;
+  args_iterator args_begin() { return getArguments().begin(); }
+  args_iterator args_end() { return getArguments().end(); }
+  reverse_args_iterator args_rbegin() { return getArguments().rbegin(); }
+  reverse_args_iterator args_rend() { return getArguments().rend(); }
+
+  bool args_empty() { return getArguments().empty(); }
+
+  /// Add one value to the argument list.
+  BlockArgument addArgument(Type type) { return front().addArgument(type); }
+
+  /// Insert one value to the position in the argument list indicated by the
+  /// given iterator. The existing arguments are shifted. The block is expected
+  /// not to have predecessors.
+  BlockArgument insertArgument(args_iterator it, Type type) {
+    return front().insertArgument(it, type);
+  }
+
+  /// Add one argument to the argument list for each type specified in the list.
+  iterator_range<args_iterator> addArguments(TypeRange types);
+
+  /// Add one value to the argument list at the specified position.
+  BlockArgument insertArgument(unsigned index, Type type) {
+    return front().insertArgument(index, type);
+  }
+
+  /// Erase the argument at 'index' and remove it from the argument list.
+  void eraseArgument(unsigned index) { front().eraseArgument(index); }
+
+  unsigned getNumArguments() { return getArguments().size(); }
+  BlockArgument getArgument(unsigned i) { return getArguments()[i]; }
+
   //===--------------------------------------------------------------------===//
   // Operation list utilities
   //===--------------------------------------------------------------------===//
diff --git a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
index 0a657e5387b27..b1d5a854de801 100644
--- a/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
+++ b/mlir/lib/Conversion/SCFToGPU/SCFToGPU.cpp
@@ -417,8 +417,8 @@ static LogicalResult processParallelLoop(
 
     if (isMappedToProcessor(processor)) {
       // Use the corresponding thread/grid index as replacement for the loop iv.
-      Value operand = launchOp.body().front().getArgument(
-          getLaunchOpArgumentNum(processor));
+      Value operand =
+          launchOp.body().getArgument(getLaunchOpArgumentNum(processor));
       // Take the indexmap and add the lower bound and step computations in.
       // This computes operand * step + lowerBound.
       // Use an affine map here so that it composes nicely with the provided
diff --git a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
index fd0c6245e084b..dd8200d3687b6 100644
--- a/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
+++ b/mlir/lib/Dialect/GPU/IR/GPUDialect.cpp
@@ -127,9 +127,9 @@ static LogicalResult verifyAllReduce(gpu::AllReduceOp allReduce) {
     return allReduce.emitError(
         "expected either an op attribute or a non-empty body");
   if (!allReduce.body().empty()) {
-    if (allReduce.body().front().getNumArguments() != 2)
+    if (allReduce.body().getNumArguments() != 2)
       return allReduce.emitError("expected two region arguments");
-    for (auto argument : allReduce.body().front().getArguments()) {
+    for (auto argument : allReduce.body().getArguments()) {
       if (argument.getType() != allReduce.getType())
         return allReduce.emitError("incorrect region argument type");
     }
@@ -219,25 +219,25 @@ void LaunchOp::build(OpBuilder &builder, OperationState &result,
 
 KernelDim3 LaunchOp::getBlockIds() {
   assert(!body().empty() && "LaunchOp body must not be empty.");
-  auto args = body().front().getArguments();
+  auto args = body().getArguments();
   return KernelDim3{args[0], args[1], args[2]};
 }
 
 KernelDim3 LaunchOp::getThreadIds() {
   assert(!body().empty() && "LaunchOp body must not be empty.");
-  auto args = body().front().getArguments();
+  auto args = body().getArguments();
   return KernelDim3{args[3], args[4], args[5]};
 }
 
 KernelDim3 LaunchOp::getGridSize() {
   assert(!body().empty() && "LaunchOp body must not be empty.");
-  auto args = body().front().getArguments();
+  auto args = body().getArguments();
   return KernelDim3{args[6], args[7], args[8]};
 }
 
 KernelDim3 LaunchOp::getBlockSize() {
   assert(!body().empty() && "LaunchOp body must not be empty.");
-  auto args = body().getBlocks().front().getArguments();
+  auto args = body().getArguments();
   return KernelDim3{args[9], args[10], args[11]};
 }
 
@@ -254,8 +254,7 @@ static LogicalResult verify(LaunchOp op) {
   // sizes and transforms them into kNumConfigRegionAttributes region arguments
   // for block/thread identifiers and grid/block sizes.
   if (!op.body().empty()) {
-    Block &entryBlock = op.body().front();
-    if (entryBlock.getNumArguments() !=
+    if (op.body().getNumArguments() !=
         LaunchOp::kNumConfigOperands + op.getNumOperands())
       return op.emitOpError("unexpected number of region arguments");
   }
@@ -463,8 +462,8 @@ BlockArgument GPUFuncOp::addWorkgroupAttribution(Type type) {
   auto attrName = getNumWorkgroupAttributionsAttrName();
   auto attr = getAttrOfType<IntegerAttr>(attrName);
   setAttr(attrName, IntegerAttr::get(attr.getType(), attr.getValue() + 1));
-  return getBody().front().insertArgument(
-      getType().getNumInputs() + attr.getInt(), type);
+  return getBody().insertArgument(getType().getNumInputs() + attr.getInt(),
+                                  type);
 }
 
 /// Adds a new block argument that corresponds to buffers located in
@@ -472,7 +471,7 @@ BlockArgument GPUFuncOp::addWorkgroupAttribution(Type type) {
 BlockArgument GPUFuncOp::addPrivateAttribution(Type type) {
   // Buffers on the private memory always come after buffers on the workgroup
   // memory.
-  return getBody().front().addArgument(type);
+  return getBody().addArgument(type);
 }
 
 void GPUFuncOp::build(OpBuilder &builder, OperationState &result,
diff --git a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
index 6f6f1c27241ca..38df9ef991549 100644
--- a/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
+++ b/mlir/lib/Dialect/GPU/Transforms/AllReduceLowering.cpp
@@ -181,8 +181,8 @@ struct GpuAllReduceRewriter {
 
       // Insert accumulator body between split block.
       BlockAndValueMapping mapping;
-      mapping.map(body.front().getArgument(0), lhs);
-      mapping.map(body.front().getArgument(1), rhs);
+      mapping.map(body.getArgument(0), lhs);
+      mapping.map(body.getArgument(1), rhs);
       rewriter.cloneRegionBefore(body, *split->getParent(),
                                  split->getIterator(), mapping);
 
diff --git a/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp b/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp
index 47440265239de..a2659d6a0eecc 100644
--- a/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp
+++ b/mlir/lib/Dialect/SPIRV/SPIRVDialect.cpp
@@ -1102,8 +1102,7 @@ LogicalResult SPIRVDialect::verifyRegionArgAttribute(Operation *op,
                                                      unsigned argIndex,
                                                      NamedAttribute attribute) {
   return verifyRegionAttribute(
-      op->getLoc(),
-      op->getRegion(regionIndex).front().getArgument(argIndex).getType(),
+      op->getLoc(), op->getRegion(regionIndex).getArgument(argIndex).getType(),
       attribute);
 }
 
diff --git a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
index 3e71c48f08717..84c35c9fb7a56 100644
--- a/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
+++ b/mlir/lib/Dialect/StandardOps/IR/Ops.cpp
@@ -525,22 +525,21 @@ void GenericAtomicRMWOp::build(OpBuilder &builder, OperationState &result,
 
     Region *bodyRegion = result.addRegion();
     bodyRegion->push_back(new Block());
-    bodyRegion->front().addArgument(elementType);
+    bodyRegion->addArgument(elementType);
   }
 }
 
 static LogicalResult verify(GenericAtomicRMWOp op) {
-  auto &block = op.body().front();
-  if (block.getNumArguments() != 1)
+  auto &body = op.body();
+  if (body.getNumArguments() != 1)
     return op.emitOpError("expected single number of entry block arguments");
 
-  if (op.getResult().getType() != block.getArgument(0).getType())
+  if (op.getResult().getType() != body.getArgument(0).getType())
     return op.emitOpError(
         "expected block argument of the same type result type");
 
   bool hasSideEffects =
-      op.body()
-          .walk([&](Operation *nestedOp) {
+      body.walk([&](Operation *nestedOp) {
             if (MemoryEffectOpInterface::hasNoEffect(nestedOp))
               return WalkResult::advance();
             nestedOp->emitError("body of 'generic_atomic_rmw' should contain "
diff --git a/mlir/lib/IR/AsmPrinter.cpp b/mlir/lib/IR/AsmPrinter.cpp
index 09135021a732d..372e4c93dc378 100644
--- a/mlir/lib/IR/AsmPrinter.cpp
+++ b/mlir/lib/IR/AsmPrinter.cpp
@@ -619,7 +619,7 @@ unsigned SSANameState::getBlockID(Block *block) {
 
 void SSANameState::shadowRegionArgs(Region &region, ValueRange namesToUse) {
   assert(!region.empty() && "cannot shadow arguments of an empty region");
-  assert(region.front().getNumArguments() == namesToUse.size() &&
+  assert(region.getNumArguments() == namesToUse.size() &&
          "incorrect number of names passed in");
   assert(region.getParentOp()->isKnownIsolatedFromAbove() &&
          "only KnownIsolatedFromAbove ops can shadow names");
@@ -629,7 +629,7 @@ void SSANameState::shadowRegionArgs(Region &region, ValueRange namesToUse) {
     auto nameToUse = namesToUse[i];
     if (nameToUse == nullptr)
       continue;
-    auto nameToReplace = region.front().getArgument(i);
+    auto nameToReplace = region.getArgument(i);
 
     nameStr.clear();
     llvm::raw_svector_ostream nameStream(nameStr);
diff --git a/mlir/lib/IR/FunctionImplementation.cpp b/mlir/lib/IR/FunctionImplementation.cpp
index 8b90ff13244fe..13aee344bbdcd 100644
--- a/mlir/lib/IR/FunctionImplementation.cpp
+++ b/mlir/lib/IR/FunctionImplementation.cpp
@@ -238,7 +238,7 @@ void mlir::impl::printFunctionSignature(OpAsmPrinter &p, Operation *op,
       p << ", ";
 
     if (!isExternal) {
-      p.printOperand(body.front().getArgument(i));
+      p.printOperand(body.getArgument(i));
       p << ": ";
     }
 
diff --git a/mlir/lib/IR/Operation.cpp b/mlir/lib/IR/Operation.cpp
index 1e2a47639fdbf..8feab8ec903a2 100644
--- a/mlir/lib/IR/Operation.cpp
+++ b/mlir/lib/IR/Operation.cpp
@@ -1022,7 +1022,7 @@ LogicalResult OpTrait::impl::verifyNoRegionArguments(Operation *op) {
     if (region.empty())
       continue;
 
-    if (region.front().getNumArguments() != 0) {
+    if (region.getNumArguments() != 0) {
       if (op->getNumRegions() > 1)
         return op->emitOpError("region #")
                << region.getRegionNumber() << " should have no arguments";
diff --git a/mlir/lib/IR/Region.cpp b/mlir/lib/IR/Region.cpp
index aa2acc00dde4f..b616eaa15422d 100644
--- a/mlir/lib/IR/Region.cpp
+++ b/mlir/lib/IR/Region.cpp
@@ -33,6 +33,11 @@ Location Region::getLoc() {
   return container->getLoc();
 }
 
+/// Add one argument to the argument list for each type specified in the list.
+iterator_range<Region::args_iterator> Region::addArguments(TypeRange types) {
+  return front().addArguments(types);
+}
+
 Region *Region::getParentRegion() {
   assert(container && "region is not attached to a container");
   return container->getParentRegion();
diff --git a/mlir/lib/Transforms/SCCP.cpp b/mlir/lib/Transforms/SCCP.cpp
index 25115fc8ffbd1..95b035ee68cdc 100644
--- a/mlir/lib/Transforms/SCCP.cpp
+++ b/mlir/lib/Transforms/SCCP.cpp
@@ -123,7 +123,7 @@ class CallableLatticeState {
   /// Build a lattice state with a given callable region, and a specified number
   /// of results to be initialized to the default lattice value (Unknown).
   CallableLatticeState(Region *callableRegion, unsigned numResults)
-      : callableArguments(callableRegion->front().getArguments()),
+      : callableArguments(callableRegion->getArguments()),
         resultLatticeValues(numResults) {}
 
   /// Returns the arguments to the callable region.
@@ -403,7 +403,7 @@ void SCCPSolver::initializeSymbolCallables(Operation *op) {
       // If not all of the uses of this symbol are visible, we can't track the
       // state of the arguments.
       if (symbol.isPublic() || (!allUsesVisible && symbol.isNested()))
-        markAllOverdefined(callableRegion->front().getArguments());
+        markAllOverdefined(callableRegion->getArguments());
     }
     if (callableLatticeState.empty())
       return;
diff --git a/mlir/test/lib/Dialect/Test/TestPatterns.cpp b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
index c471cd3ead3e8..255b1c152a365 100644
--- a/mlir/test/lib/Dialect/Test/TestPatterns.cpp
+++ b/mlir/test/lib/Dialect/Test/TestPatterns.cpp
@@ -284,7 +284,7 @@ struct TestUndoBlockArgReplace : public ConversionPattern {
                   ConversionPatternRewriter &rewriter) const final {
     auto illegalOp =
         rewriter.create<ILLegalOpF>(op->getLoc(), rewriter.getF32Type());
-    rewriter.replaceUsesOfBlockArgument(op->getRegion(0).front().getArgument(0),
+    rewriter.replaceUsesOfBlockArgument(op->getRegion(0).getArgument(0),
                                         illegalOp);
     rewriter.updateRootInPlace(op, [] {});
     return success();

From fbb30c31fefcf992ddb287087e8ca766eeddb59d Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Tue, 14 Jul 2020 09:36:43 -0700
Subject: [PATCH 266/771] [clang] Add 'override' to virtual function overrides
 generated by ClangAttrEmitter

ClangAttrEmitter.cpp generates ParsedAttr derived classes with virtual overrides in them (which end up in AttrParsedAttrImpl.inc); this patch ensures these generated functions are marked override, and not (redundantly) virtual.

I hesitate to say NFC since this does of course affect the behavior of the generator code, but the generated code behaves the same as it did before, so it's NFC in that sense.

Differential Revision: https://reviews.llvm.org/D83616
---
 clang/utils/TableGen/ClangAttrEmitter.cpp | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/clang/utils/TableGen/ClangAttrEmitter.cpp b/clang/utils/TableGen/ClangAttrEmitter.cpp
index 1b9fd2d29bf90..bd20e447a9506 100644
--- a/clang/utils/TableGen/ClangAttrEmitter.cpp
+++ b/clang/utils/TableGen/ClangAttrEmitter.cpp
@@ -2012,10 +2012,10 @@ PragmaClangAttributeSupport::generateStrictConformsTo(const Record &Attr,
     return;
   // Generate a function that constructs a set of matching rules that describe
   // to which declarations the attribute should apply to.
-  OS << "virtual void getPragmaAttributeMatchRules("
+  OS << "void getPragmaAttributeMatchRules("
      << "llvm::SmallVectorImpl<std::pair<"
      << AttributeSubjectMatchRule::EnumName
-     << ", bool>> &MatchRules, const LangOptions &LangOpts) const {\n";
+     << ", bool>> &MatchRules, const LangOptions &LangOpts) const override {\n";
   const Record *SubjectObj = Attr.getValueAsDef("Subjects");
   std::vector<Record *> Subjects = SubjectObj->getValueAsListOfDefs("Subjects");
   for (const auto *Subject : Subjects) {
@@ -3519,8 +3519,8 @@ static void GenerateAppertainsTo(const Record &Attr, raw_ostream &OS) {
   // at all (for instance because it was applied to a type), or that the caller
   // has determined that the check should fail (perhaps prior to the creation
   // of the declaration).
-  OS << "virtual bool diagAppertainsToDecl(Sema &S, ";
-  OS << "const ParsedAttr &Attr, const Decl *D) const {\n";
+  OS << "bool diagAppertainsToDecl(Sema &S, ";
+  OS << "const ParsedAttr &Attr, const Decl *D) const override {\n";
   OS << "  if (";
   for (auto I = Subjects.begin(), E = Subjects.end(); I != E; ++I) {
     // If the subject has custom code associated with it, use the generated
@@ -3594,8 +3594,8 @@ static void GenerateLangOptRequirements(const Record &R,
   if (LangOpts.empty())
     return;
 
-  OS << "virtual bool diagLangOpts(Sema &S, const ParsedAttr &Attr) ";
-  OS << "const {\n";
+  OS << "bool diagLangOpts(Sema &S, const ParsedAttr &Attr) ";
+  OS << "const override {\n";
   OS << "  auto &LangOpts = S.LangOpts;\n";
   OS << "  if (" << GenerateTestExpression(LangOpts) << ")\n";
   OS << "    return true;\n\n";
@@ -3639,7 +3639,7 @@ static void GenerateTargetRequirements(const Record &Attr,
   std::string Test;
   bool UsesT = GenerateTargetSpecificAttrChecks(R, Arches, Test, &FnName);
 
-  OS << "virtual bool existsInTarget(const TargetInfo &Target) const {\n";
+  OS << "bool existsInTarget(const TargetInfo &Target) const override {\n";
   if (UsesT)
     OS << "  const llvm::Triple &T = Target.getTriple(); (void)T;\n";
   OS << "  return " << Test << ";\n";
@@ -3664,8 +3664,8 @@ static void GenerateSpellingIndexToSemanticSpelling(const Record &Attr,
   std::string Enum = CreateSemanticSpellings(Spellings, SemanticToSyntacticMap);
   std::string Name = Attr.getName().str() + "AttrSpellingMap";
 
-  OS << "virtual unsigned spellingIndexToSemanticSpelling(";
-  OS << "const ParsedAttr &Attr) const {\n";
+  OS << "unsigned spellingIndexToSemanticSpelling(";
+  OS << "const ParsedAttr &Attr) const override {\n";
   OS << Enum;
   OS << "  unsigned Idx = Attr.getAttributeSpellingListIndex();\n";
   WriteSemanticSpellingSwitch("Idx", SemanticToSyntacticMap, OS);
@@ -3678,8 +3678,8 @@ static void GenerateHandleDeclAttribute(const Record &Attr, raw_ostream &OS) {
     return;
 
   // Generate a function which just converts from ParsedAttr to the Attr type.
-  OS << "virtual AttrHandling handleDeclAttribute(Sema &S, Decl *D,";
-  OS << "const ParsedAttr &Attr) const {\n";
+  OS << "AttrHandling handleDeclAttribute(Sema &S, Decl *D,";
+  OS << "const ParsedAttr &Attr) const override {\n";
   OS << "  D->addAttr(::new (S.Context) " << Attr.getName();
   OS << "Attr(S.Context, Attr));\n";
   OS << "  return AttributeApplied;\n";

From a19461d9e114bea99186a4f0ea092a650a1650c2 Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Tue, 14 Jul 2020 09:47:29 -0700
Subject: [PATCH 267/771] [NFC] Add 'override' keyword where missing in
 include/ and lib/.

This fixes warnings raised by Clang's new -Wsuggest-override, in preparation for enabling that warning in the LLVM build. This patch also removes the virtual keyword where redundant, but only in places where doing so improves consistency within a given file. It also removes a couple unnecessary virtual destructor declarations in derived classes where the destructor inherited from the base class is already virtual.

Differential Revision: https://reviews.llvm.org/D83709
---
 .../Analysis/InstructionPrecedenceTracking.h  |  4 +-
 llvm/include/llvm/Analysis/MustExecute.h      | 28 ++++------
 .../llvm/Demangle/MicrosoftDemangleNodes.h    | 12 ++--
 llvm/include/llvm/IR/DiagnosticInfo.h         | 10 ++--
 llvm/include/llvm/MC/MCELFObjectWriter.h      |  2 +-
 llvm/include/llvm/MC/MCMachObjectWriter.h     |  2 +-
 llvm/include/llvm/MC/MCWasmObjectWriter.h     |  2 +-
 llvm/include/llvm/MC/MCWinCOFFObjectWriter.h  |  2 +-
 llvm/include/llvm/Object/Error.h              |  2 +-
 llvm/include/llvm/Support/FormatAdapters.h    | 10 ++--
 llvm/lib/Analysis/LazyValueInfo.cpp           |  8 +--
 llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp | 14 ++---
 .../AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp |  2 +-
 llvm/lib/Target/AVR/AVRFrameLowering.cpp      |  8 +--
 .../lib/Target/AVR/AsmParser/AVRAsmParser.cpp | 16 +++---
 llvm/lib/Target/BPF/BTFDebug.h                | 56 +++++++++----------
 .../MSP430/AsmParser/MSP430AsmParser.cpp      | 20 +++----
 .../PowerPC/MCTargetDesc/PPCMCAsmInfo.h       |  2 +-
 llvm/lib/Target/X86/X86TargetTransformInfo.h  |  4 +-
 llvm/lib/Transforms/Utils/FixIrreducible.cpp  |  4 +-
 llvm/lib/Transforms/Utils/PredicateInfo.cpp   |  8 +--
 llvm/lib/Transforms/Utils/UnifyLoopExits.cpp  |  4 +-
 22 files changed, 109 insertions(+), 111 deletions(-)

diff --git a/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h b/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
index 0391f2cdd9136..46bc974c4a7f7 100644
--- a/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
+++ b/llvm/include/llvm/Analysis/InstructionPrecedenceTracking.h
@@ -110,7 +110,7 @@ class ImplicitControlFlowTracking : public InstructionPrecedenceTracking {
     return isPreceededBySpecialInstruction(Insn);
   }
 
-  virtual bool isSpecialInstruction(const Instruction *Insn) const;
+  bool isSpecialInstruction(const Instruction *Insn) const override;
 };
 
 class MemoryWriteTracking : public InstructionPrecedenceTracking {
@@ -133,7 +133,7 @@ class MemoryWriteTracking : public InstructionPrecedenceTracking {
     return isPreceededBySpecialInstruction(Insn);
   }
 
-  virtual bool isSpecialInstruction(const Instruction *Insn) const;
+  bool isSpecialInstruction(const Instruction *Insn) const override;
 };
 
 } // llvm
diff --git a/llvm/include/llvm/Analysis/MustExecute.h b/llvm/include/llvm/Analysis/MustExecute.h
index c169416f73f90..a3b7bee97808c 100644
--- a/llvm/include/llvm/Analysis/MustExecute.h
+++ b/llvm/include/llvm/Analysis/MustExecute.h
@@ -111,17 +111,15 @@ class SimpleLoopSafetyInfo: public LoopSafetyInfo {
   bool HeaderMayThrow = false; // Same as previous, but specific to loop header
 
 public:
-  virtual bool blockMayThrow(const BasicBlock *BB) const;
+  bool blockMayThrow(const BasicBlock *BB) const override;
 
-  virtual bool anyBlockMayThrow() const;
+  bool anyBlockMayThrow() const override;
 
-  virtual void computeLoopSafetyInfo(const Loop *CurLoop);
+  void computeLoopSafetyInfo(const Loop *CurLoop) override;
 
-  virtual bool isGuaranteedToExecute(const Instruction &Inst,
-                                     const DominatorTree *DT,
-                                     const Loop *CurLoop) const;
-
-  virtual ~SimpleLoopSafetyInfo() {};
+  bool isGuaranteedToExecute(const Instruction &Inst,
+                             const DominatorTree *DT,
+                             const Loop *CurLoop) const override;
 };
 
 /// This implementation of LoopSafetyInfo use ImplicitControlFlowTracking to
@@ -138,15 +136,15 @@ class ICFLoopSafetyInfo: public LoopSafetyInfo {
   mutable MemoryWriteTracking MW;
 
 public:
-  virtual bool blockMayThrow(const BasicBlock *BB) const;
+  bool blockMayThrow(const BasicBlock *BB) const override;
 
-  virtual bool anyBlockMayThrow() const;
+  bool anyBlockMayThrow() const override;
 
-  virtual void computeLoopSafetyInfo(const Loop *CurLoop);
+  void computeLoopSafetyInfo(const Loop *CurLoop) override;
 
-  virtual bool isGuaranteedToExecute(const Instruction &Inst,
-                                     const DominatorTree *DT,
-                                     const Loop *CurLoop) const;
+  bool isGuaranteedToExecute(const Instruction &Inst,
+                             const DominatorTree *DT,
+                             const Loop *CurLoop) const override;
 
   /// Returns true if we could not execute a memory-modifying instruction before
   /// we enter \p BB under assumption that \p CurLoop is entered.
@@ -167,8 +165,6 @@ class ICFLoopSafetyInfo: public LoopSafetyInfo {
   /// from its block. It will make all cache updates to keep it correct after
   /// this removal.
   void removeInstruction(const Instruction *Inst);
-
-  virtual ~ICFLoopSafetyInfo() {};
 };
 
 bool mayContainIrreducibleControl(const Function &F, const LoopInfo *LI);
diff --git a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
index ec40eec5a05ee..62e0f4765a69a 100644
--- a/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
+++ b/llvm/include/llvm/Demangle/MicrosoftDemangleNodes.h
@@ -313,8 +313,8 @@ struct PrimitiveTypeNode : public TypeNode {
   explicit PrimitiveTypeNode(PrimitiveKind K)
       : TypeNode(NodeKind::PrimitiveType), PrimKind(K) {}
 
-  void outputPre(OutputStream &OS, OutputFlags Flags) const;
-  void outputPost(OutputStream &OS, OutputFlags Flags) const {}
+  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPost(OutputStream &OS, OutputFlags Flags) const override {}
 
   PrimitiveKind PrimKind;
 };
@@ -474,8 +474,8 @@ struct PointerTypeNode : public TypeNode {
 struct TagTypeNode : public TypeNode {
   explicit TagTypeNode(TagKind Tag) : TypeNode(NodeKind::TagType), Tag(Tag) {}
 
-  void outputPre(OutputStream &OS, OutputFlags Flags) const;
-  void outputPost(OutputStream &OS, OutputFlags Flags) const;
+  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
 
   QualifiedNameNode *QualifiedName = nullptr;
   TagKind Tag;
@@ -484,8 +484,8 @@ struct TagTypeNode : public TypeNode {
 struct ArrayTypeNode : public TypeNode {
   ArrayTypeNode() : TypeNode(NodeKind::ArrayType) {}
 
-  void outputPre(OutputStream &OS, OutputFlags Flags) const;
-  void outputPost(OutputStream &OS, OutputFlags Flags) const;
+  void outputPre(OutputStream &OS, OutputFlags Flags) const override;
+  void outputPost(OutputStream &OS, OutputFlags Flags) const override;
 
   void outputDimensionsImpl(OutputStream &OS, OutputFlags Flags) const;
   void outputOneDimension(OutputStream &OS, OutputFlags Flags, Node *N) const;
diff --git a/llvm/include/llvm/IR/DiagnosticInfo.h b/llvm/include/llvm/IR/DiagnosticInfo.h
index a8e8a7915b2a4..b7e0ecde8629e 100644
--- a/llvm/include/llvm/IR/DiagnosticInfo.h
+++ b/llvm/include/llvm/IR/DiagnosticInfo.h
@@ -213,7 +213,7 @@ class DiagnosticInfoResourceLimit : public DiagnosticInfo {
 };
 
 class DiagnosticInfoStackSize : public DiagnosticInfoResourceLimit {
-  virtual void anchor() override;
+  void anchor() override;
 public:
   DiagnosticInfoStackSize(const Function &Fn, uint64_t StackSize,
                           DiagnosticSeverity Severity = DS_Warning,
@@ -364,7 +364,7 @@ class DiagnosticLocation {
 
 /// Common features for diagnostics with an associated location.
 class DiagnosticInfoWithLocationBase : public DiagnosticInfo {
-  virtual void anchor() override;
+  void anchor() override;
 public:
   /// \p Fn is the function where the diagnostic is being emitted. \p Loc is
   /// the location information to use in the diagnostic.
@@ -611,7 +611,7 @@ operator<<(RemarkT &R,
 /// Common features for diagnostics dealing with optimization remarks
 /// that are used by IR passes.
 class DiagnosticInfoIROptimization : public DiagnosticInfoOptimizationBase {
-  virtual void anchor() override;
+  void anchor() override;
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. \p
   /// RemarkName is a textual identifier for the remark (single-word,
@@ -832,7 +832,7 @@ class OptimizationRemarkAnalysis : public DiagnosticInfoIROptimization {
 /// Diagnostic information for optimization analysis remarks related to
 /// floating-point non-commutativity.
 class OptimizationRemarkAnalysisFPCommute : public OptimizationRemarkAnalysis {
-  virtual void anchor();
+  void anchor() override;
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. If this name
   /// matches the regular expression given in -Rpass-analysis=, then the
@@ -874,7 +874,7 @@ class OptimizationRemarkAnalysisFPCommute : public OptimizationRemarkAnalysis {
 /// Diagnostic information for optimization analysis remarks related to
 /// pointer aliasing.
 class OptimizationRemarkAnalysisAliasing : public OptimizationRemarkAnalysis {
-  virtual void anchor();
+  void anchor() override;
 public:
   /// \p PassName is the name of the pass emitting this diagnostic. If this name
   /// matches the regular expression given in -Rpass-analysis=, then the
diff --git a/llvm/include/llvm/MC/MCELFObjectWriter.h b/llvm/include/llvm/MC/MCELFObjectWriter.h
index faf5e330afcf0..8f78b99d37949 100644
--- a/llvm/include/llvm/MC/MCELFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCELFObjectWriter.h
@@ -65,7 +65,7 @@ class MCELFObjectTargetWriter : public MCObjectTargetWriter {
 public:
   virtual ~MCELFObjectTargetWriter() = default;
 
-  virtual Triple::ObjectFormatType getFormat() const { return Triple::ELF; }
+  Triple::ObjectFormatType getFormat() const override { return Triple::ELF; }
   static bool classof(const MCObjectTargetWriter *W) {
     return W->getFormat() == Triple::ELF;
   }
diff --git a/llvm/include/llvm/MC/MCMachObjectWriter.h b/llvm/include/llvm/MC/MCMachObjectWriter.h
index bff8808cf4ffd..38ba68b78fe13 100644
--- a/llvm/include/llvm/MC/MCMachObjectWriter.h
+++ b/llvm/include/llvm/MC/MCMachObjectWriter.h
@@ -45,7 +45,7 @@ class MCMachObjectTargetWriter : public MCObjectTargetWriter {
 public:
   virtual ~MCMachObjectTargetWriter();
 
-  virtual Triple::ObjectFormatType getFormat() const { return Triple::MachO; }
+  Triple::ObjectFormatType getFormat() const override { return Triple::MachO; }
   static bool classof(const MCObjectTargetWriter *W) {
     return W->getFormat() == Triple::MachO;
   }
diff --git a/llvm/include/llvm/MC/MCWasmObjectWriter.h b/llvm/include/llvm/MC/MCWasmObjectWriter.h
index fbb68549b5037..382818ad6867a 100644
--- a/llvm/include/llvm/MC/MCWasmObjectWriter.h
+++ b/llvm/include/llvm/MC/MCWasmObjectWriter.h
@@ -28,7 +28,7 @@ class MCWasmObjectTargetWriter : public MCObjectTargetWriter {
 public:
   virtual ~MCWasmObjectTargetWriter();
 
-  virtual Triple::ObjectFormatType getFormat() const { return Triple::Wasm; }
+  Triple::ObjectFormatType getFormat() const override { return Triple::Wasm; }
   static bool classof(const MCObjectTargetWriter *W) {
     return W->getFormat() == Triple::Wasm;
   }
diff --git a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
index 3fe124fd7f1c4..3015efe7389e4 100644
--- a/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
+++ b/llvm/include/llvm/MC/MCWinCOFFObjectWriter.h
@@ -31,7 +31,7 @@ class raw_pwrite_stream;
   public:
     virtual ~MCWinCOFFObjectTargetWriter() = default;
 
-    virtual Triple::ObjectFormatType getFormat() const { return Triple::COFF; }
+    Triple::ObjectFormatType getFormat() const override { return Triple::COFF; }
     static bool classof(const MCObjectTargetWriter *W) {
       return W->getFormat() == Triple::COFF;
     }
diff --git a/llvm/include/llvm/Object/Error.h b/llvm/include/llvm/Object/Error.h
index 1e109fa131c8a..07744188444ac 100644
--- a/llvm/include/llvm/Object/Error.h
+++ b/llvm/include/llvm/Object/Error.h
@@ -51,7 +51,7 @@ inline std::error_code make_error_code(object_error e) {
 /// Currently inherits from ECError for easy interoperability with
 /// std::error_code, but this will be removed in the future.
 class BinaryError : public ErrorInfo<BinaryError, ECError> {
-  virtual void anchor();
+  void anchor() override;
 public:
   static char ID;
   BinaryError() {
diff --git a/llvm/include/llvm/Support/FormatAdapters.h b/llvm/include/llvm/Support/FormatAdapters.h
index acf50724d3e3b..495205d11748b 100644
--- a/llvm/include/llvm/Support/FormatAdapters.h
+++ b/llvm/include/llvm/Support/FormatAdapters.h
@@ -34,7 +34,7 @@ template <typename T> class AlignAdapter final : public FormatAdapter<T> {
       : FormatAdapter<T>(std::forward<T>(Item)), Where(Where), Amount(Amount),
         Fill(Fill) {}
 
-  void format(llvm::raw_ostream &Stream, StringRef Style) {
+  void format(llvm::raw_ostream &Stream, StringRef Style) override {
     auto Adapter = detail::build_format_adapter(std::forward<T>(this->Item));
     FmtAlign(Adapter, Where, Amount, Fill).format(Stream, Style);
   }
@@ -48,7 +48,7 @@ template <typename T> class PadAdapter final : public FormatAdapter<T> {
   PadAdapter(T &&Item, size_t Left, size_t Right)
       : FormatAdapter<T>(std::forward<T>(Item)), Left(Left), Right(Right) {}
 
-  void format(llvm::raw_ostream &Stream, StringRef Style) {
+  void format(llvm::raw_ostream &Stream, StringRef Style) override {
     auto Adapter = detail::build_format_adapter(std::forward<T>(this->Item));
     Stream.indent(Left);
     Adapter.format(Stream, Style);
@@ -63,7 +63,7 @@ template <typename T> class RepeatAdapter final : public FormatAdapter<T> {
   RepeatAdapter(T &&Item, size_t Count)
       : FormatAdapter<T>(std::forward<T>(Item)), Count(Count) {}
 
-  void format(llvm::raw_ostream &Stream, StringRef Style) {
+  void format(llvm::raw_ostream &Stream, StringRef Style) override {
     auto Adapter = detail::build_format_adapter(std::forward<T>(this->Item));
     for (size_t I = 0; I < Count; ++I) {
       Adapter.format(Stream, Style);
@@ -76,7 +76,9 @@ class ErrorAdapter : public FormatAdapter<Error> {
   ErrorAdapter(Error &&Item) : FormatAdapter(std::move(Item)) {}
   ErrorAdapter(ErrorAdapter &&) = default;
   ~ErrorAdapter() { consumeError(std::move(Item)); }
-  void format(llvm::raw_ostream &Stream, StringRef Style) { Stream << Item; }
+  void format(llvm::raw_ostream &Stream, StringRef Style) override {
+    Stream << Item;
+  }
 };
 }
 
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index fb14008a2b471..f5ffa7286b3b8 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -330,11 +330,11 @@ class LazyValueInfoAnnotatedWriter : public AssemblyAnnotationWriter {
   LazyValueInfoAnnotatedWriter(LazyValueInfoImpl *L, DominatorTree &DTree)
       : LVIImpl(L), DT(DTree) {}
 
-  virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
-                                        formatted_raw_ostream &OS);
+  void emitBasicBlockStartAnnot(const BasicBlock *BB,
+                                formatted_raw_ostream &OS) override;
 
-  virtual void emitInstructionAnnot(const Instruction *I,
-                                    formatted_raw_ostream &OS);
+  void emitInstructionAnnot(const Instruction *I,
+                            formatted_raw_ostream &OS) override;
 };
 }
 namespace {
diff --git a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
index f7041c0cc9263..3f053c7a38c77 100644
--- a/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/CodeViewDebug.cpp
@@ -101,21 +101,21 @@ class CVMCAdapter : public CodeViewRecordStreamer {
   CVMCAdapter(MCStreamer &OS, TypeCollection &TypeTable)
       : OS(&OS), TypeTable(TypeTable) {}
 
-  void emitBytes(StringRef Data) { OS->emitBytes(Data); }
+  void emitBytes(StringRef Data) override { OS->emitBytes(Data); }
 
-  void emitIntValue(uint64_t Value, unsigned Size) {
+  void emitIntValue(uint64_t Value, unsigned Size) override {
     OS->emitIntValueInHex(Value, Size);
   }
 
-  void emitBinaryData(StringRef Data) { OS->emitBinaryData(Data); }
+  void emitBinaryData(StringRef Data) override { OS->emitBinaryData(Data); }
 
-  void AddComment(const Twine &T) { OS->AddComment(T); }
+  void AddComment(const Twine &T) override { OS->AddComment(T); }
 
-  void AddRawComment(const Twine &T) { OS->emitRawComment(T); }
+  void AddRawComment(const Twine &T) override { OS->emitRawComment(T); }
 
-  bool isVerboseAsm() { return OS->isVerboseAsm(); }
+  bool isVerboseAsm() override { return OS->isVerboseAsm(); }
 
-  std::string getTypeName(TypeIndex TI) {
+  std::string getTypeName(TypeIndex TI) override {
     std::string TypeName;
     if (!TI.isNoneType()) {
       if (TI.isSimple())
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
index 2f1f4e7a03928..f614705730501 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/R600MCCodeEmitter.cpp
@@ -47,7 +47,7 @@ class R600MCCodeEmitter : public MCCodeEmitter {
   /// Encode the instruction and write it to the OS.
   void encodeInstruction(const MCInst &MI, raw_ostream &OS,
                          SmallVectorImpl<MCFixup> &Fixups,
-                         const MCSubtargetInfo &STI) const;
+                         const MCSubtargetInfo &STI) const override;
 
   /// \returns the encoding for an MCOperand.
   uint64_t getMachineOpValue(const MCInst &MI, const MCOperand &MO,
diff --git a/llvm/lib/Target/AVR/AVRFrameLowering.cpp b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
index a2bc1d050fbba..c95a553b86acf 100644
--- a/llvm/lib/Target/AVR/AVRFrameLowering.cpp
+++ b/llvm/lib/Target/AVR/AVRFrameLowering.cpp
@@ -405,7 +405,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass {
   static char ID;
   AVRFrameAnalyzer() : MachineFunctionPass(ID) {}
 
-  bool runOnMachineFunction(MachineFunction &MF) {
+  bool runOnMachineFunction(MachineFunction &MF) override {
     const MachineFrameInfo &MFI = MF.getFrameInfo();
     AVRMachineFunctionInfo *FuncInfo = MF.getInfo<AVRMachineFunctionInfo>();
 
@@ -457,7 +457,7 @@ struct AVRFrameAnalyzer : public MachineFunctionPass {
     return false;
   }
 
-  StringRef getPassName() const { return "AVR Frame Analyzer"; }
+  StringRef getPassName() const override { return "AVR Frame Analyzer"; }
 };
 
 char AVRFrameAnalyzer::ID = 0;
@@ -473,7 +473,7 @@ struct AVRDynAllocaSR : public MachineFunctionPass {
   static char ID;
   AVRDynAllocaSR() : MachineFunctionPass(ID) {}
 
-  bool runOnMachineFunction(MachineFunction &MF) {
+  bool runOnMachineFunction(MachineFunction &MF) override {
     // Early exit when there are no variable sized objects in the function.
     if (!MF.getFrameInfo().hasVarSizedObjects()) {
       return false;
@@ -506,7 +506,7 @@ struct AVRDynAllocaSR : public MachineFunctionPass {
     return true;
   }
 
-  StringRef getPassName() const {
+  StringRef getPassName() const override {
     return "AVR dynalloca stack pointer save/restore";
   }
 };
diff --git a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
index a7b6d03b3b3d5..230bc7adc07ab 100644
--- a/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
+++ b/llvm/lib/Target/AVR/AsmParser/AVRAsmParser.cpp
@@ -178,10 +178,10 @@ class AVROperand : public MCParsedAsmOperand {
     return isUInt<8>(Value);
   }
 
-  bool isReg() const { return Kind == k_Register; }
-  bool isImm() const { return Kind == k_Immediate; }
-  bool isToken() const { return Kind == k_Token; }
-  bool isMem() const { return Kind == k_Memri; }
+  bool isReg() const override { return Kind == k_Register; }
+  bool isImm() const override { return Kind == k_Immediate; }
+  bool isToken() const override { return Kind == k_Token; }
+  bool isMem() const override { return Kind == k_Memri; }
   bool isMemri() const { return Kind == k_Memri; }
 
   StringRef getToken() const {
@@ -189,7 +189,7 @@ class AVROperand : public MCParsedAsmOperand {
     return Tok;
   }
 
-  unsigned getReg() const {
+  unsigned getReg() const override {
     assert((Kind == k_Register || Kind == k_Memri) && "Invalid access!");
 
     return RegImm.Reg;
@@ -239,10 +239,10 @@ class AVROperand : public MCParsedAsmOperand {
     RegImm = {RegNo, Imm};
   }
 
-  SMLoc getStartLoc() const { return Start; }
-  SMLoc getEndLoc() const { return End; }
+  SMLoc getStartLoc() const override { return Start; }
+  SMLoc getEndLoc() const override { return End; }
 
-  virtual void print(raw_ostream &O) const {
+  void print(raw_ostream &O) const override {
     switch (Kind) {
     case k_Token:
       O << "Token: \"" << getToken() << "\"";
diff --git a/llvm/lib/Target/BPF/BTFDebug.h b/llvm/lib/Target/BPF/BTFDebug.h
index 8ccd53eb3106d..2f39f665299a5 100644
--- a/llvm/lib/Target/BPF/BTFDebug.h
+++ b/llvm/lib/Target/BPF/BTFDebug.h
@@ -63,8 +63,8 @@ class BTFTypeDerived : public BTFTypeBase {
 
 public:
   BTFTypeDerived(const DIDerivedType *Ty, unsigned Tag, bool NeedsFixup);
-  void completeType(BTFDebug &BDebug);
-  void emitType(MCStreamer &OS);
+  void completeType(BTFDebug &BDebug) override;
+  void emitType(MCStreamer &OS) override;
   void setPointeeType(uint32_t PointeeType);
 };
 
@@ -74,8 +74,8 @@ class BTFTypeFwd : public BTFTypeBase {
 
 public:
   BTFTypeFwd(StringRef Name, bool IsUnion);
-  void completeType(BTFDebug &BDebug);
-  void emitType(MCStreamer &OS);
+  void completeType(BTFDebug &BDebug) override;
+  void emitType(MCStreamer &OS) override;
 };
 
 /// Handle int type.
@@ -86,9 +86,9 @@ class BTFTypeInt : public BTFTypeBase {
 public:
   BTFTypeInt(uint32_t Encoding, uint32_t SizeInBits, uint32_t OffsetInBits,
              StringRef TypeName);
-  uint32_t getSize() { return BTFTypeBase::getSize() + sizeof(uint32_t); }
-  void completeType(BTFDebug &BDebug);
-  void emitType(MCStreamer &OS);
+  uint32_t getSize() override { return BTFTypeBase::getSize() + sizeof(uint32_t); }
+  void completeType(BTFDebug &BDebug) override;
+  void emitType(MCStreamer &OS) override;
 };
 
 /// Handle enumerate type.
@@ -98,11 +98,11 @@ class BTFTypeEnum : public BTFTypeBase {
 
 public:
   BTFTypeEnum(const DICompositeType *ETy, uint32_t NumValues);
-  uint32_t getSize() {
+  uint32_t getSize() override {
     return BTFTypeBase::getSize() + EnumValues.size() * BTF::BTFEnumSize;
   }
-  void completeType(BTFDebug &BDebug);
-  void emitType(MCStreamer &OS);
+  void completeType(BTFDebug &BDebug) override;
+  void emitType(MCStreamer &OS) override;
 };
 
 /// Handle array type.
@@ -111,9 +111,9 @@ class BTFTypeArray : public BTFTypeBase {
 
 public:
   BTFTypeArray(uint32_t ElemTypeId, uint32_t NumElems);
-  uint32_t getSize() { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
-  void completeType(BTFDebug &BDebug);
-  void emitType(MCStreamer &OS);
+  uint32_t getSize() override { return BTFTypeBase::getSize() + BTF::BTFArraySize; }
+  void completeType(BTFDebug &BDebug) override;
+  void emitType(MCStreamer &OS) override;
 };
 
 /// Handle struct/union type.
@@ -125,11 +125,11 @@ class BTFTypeStruct : public BTFTypeBase {
 public:
   BTFTypeStruct(const DICompositeType *STy, bool IsStruct, bool HasBitField,
                 uint32_t NumMembers);
-  uint32_t getSize() {
+  uint32_t getSize() override {
     return BTFTypeBase::getSize() + Members.size() * BTF::BTFMemberSize;
   }
-  void completeType(BTFDebug &BDebug);
-  void emitType(MCStreamer &OS);
+  void completeType(BTFDebug &BDebug) override;
+  void emitType(MCStreamer &OS) override;
   std::string getName();
 };
 
@@ -142,11 +142,11 @@ class BTFTypeFuncProto : public BTFTypeBase {
 public:
   BTFTypeFuncProto(const DISubroutineType *STy, uint32_t NumParams,
                    const std::unordered_map<uint32_t, StringRef> &FuncArgNames);
-  uint32_t getSize() {
+  uint32_t getSize() override {
     return BTFTypeBase::getSize() + Parameters.size() * BTF::BTFParamSize;
   }
-  void completeType(BTFDebug &BDebug);
-  void emitType(MCStreamer &OS);
+  void completeType(BTFDebug &BDebug) override;
+  void emitType(MCStreamer &OS) override;
 };
 
 /// Handle subprogram
@@ -155,9 +155,9 @@ class BTFTypeFunc : public BTFTypeBase {
 
 public:
   BTFTypeFunc(StringRef FuncName, uint32_t ProtoTypeId, uint32_t Scope);
-  uint32_t getSize() { return BTFTypeBase::getSize(); }
-  void completeType(BTFDebug &BDebug);
-  void emitType(MCStreamer &OS);
+  uint32_t getSize() override { return BTFTypeBase::getSize(); }
+  void completeType(BTFDebug &BDebug) override;
+  void emitType(MCStreamer &OS) override;
 };
 
 /// Handle variable instances
@@ -167,9 +167,9 @@ class BTFKindVar : public BTFTypeBase {
 
 public:
   BTFKindVar(StringRef VarName, uint32_t TypeId, uint32_t VarInfo);
-  uint32_t getSize() { return BTFTypeBase::getSize() + 4; }
-  void completeType(BTFDebug &BDebug);
-  void emitType(MCStreamer &OS);
+  uint32_t getSize() override { return BTFTypeBase::getSize() + 4; }
+  void completeType(BTFDebug &BDebug) override;
+  void emitType(MCStreamer &OS) override;
 };
 
 /// Handle data sections
@@ -180,15 +180,15 @@ class BTFKindDataSec : public BTFTypeBase {
 
 public:
   BTFKindDataSec(AsmPrinter *AsmPrt, std::string SecName);
-  uint32_t getSize() {
+  uint32_t getSize() override {
     return BTFTypeBase::getSize() + BTF::BTFDataSecVarSize * Vars.size();
   }
   void addVar(uint32_t Id, const MCSymbol *Sym, uint32_t Size) {
     Vars.push_back(std::make_tuple(Id, Sym, Size));
   }
   std::string getName() { return Name; }
-  void completeType(BTFDebug &BDebug);
-  void emitType(MCStreamer &OS);
+  void completeType(BTFDebug &BDebug) override;
+  void emitType(MCStreamer &OS) override;
 };
 
 /// String table.
diff --git a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
index cbff8d1773ff0..9529b5e802d58 100644
--- a/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
+++ b/llvm/lib/Target/MSP430/AsmParser/MSP430AsmParser.cpp
@@ -156,12 +156,12 @@ class MSP430Operand : public MCParsedAsmOperand {
     addExprOperand(Inst, Mem.Offset);
   }
 
-  bool isReg() const        { return Kind == k_Reg; }
-  bool isImm() const        { return Kind == k_Imm; }
-  bool isToken() const      { return Kind == k_Tok; }
-  bool isMem() const        { return Kind == k_Mem; }
-  bool isIndReg() const     { return Kind == k_IndReg; }
-  bool isPostIndReg() const { return Kind == k_PostIndReg; }
+  bool isReg()   const override { return Kind == k_Reg; }
+  bool isImm()   const override { return Kind == k_Imm; }
+  bool isToken() const override { return Kind == k_Tok; }
+  bool isMem()   const override { return Kind == k_Mem; }
+  bool isIndReg()         const { return Kind == k_IndReg; }
+  bool isPostIndReg()     const { return Kind == k_PostIndReg; }
 
   bool isCGImm() const {
     if (Kind != k_Imm)
@@ -182,7 +182,7 @@ class MSP430Operand : public MCParsedAsmOperand {
     return Tok;
   }
 
-  unsigned getReg() const {
+  unsigned getReg() const override {
     assert(Kind == k_Reg && "Invalid access!");
     return Reg;
   }
@@ -222,10 +222,10 @@ class MSP430Operand : public MCParsedAsmOperand {
     return std::make_unique<MSP430Operand>(k_PostIndReg, RegNum, S, E);
   }
 
-  SMLoc getStartLoc() const { return Start; }
-  SMLoc getEndLoc() const { return End; }
+  SMLoc getStartLoc() const override { return Start; }
+  SMLoc getEndLoc() const override { return End; }
 
-  virtual void print(raw_ostream &O) const {
+  void print(raw_ostream &O) const override {
     switch (Kind) {
     case k_Tok:
       O << "Token " << Tok;
diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
index 8c52bbbd8a56a..27c687686641f 100644
--- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
+++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h
@@ -28,7 +28,7 @@ class PPCELFMCAsmInfo : public MCAsmInfoELF {
 };
 
 class PPCXCOFFMCAsmInfo : public MCAsmInfoXCOFF {
-  virtual void anchor();
+  void anchor() override;
 
 public:
   explicit PPCXCOFFMCAsmInfo(bool is64Bit, const Triple &);
diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.h b/llvm/lib/Target/X86/X86TargetTransformInfo.h
index b97072893cc2e..d462e1f96ca26 100644
--- a/llvm/lib/Target/X86/X86TargetTransformInfo.h
+++ b/llvm/lib/Target/X86/X86TargetTransformInfo.h
@@ -105,9 +105,9 @@ class X86TTIImpl : public BasicTTIImplBase<X86TTIImpl> {
   /// \name Cache TTI Implementation
   /// @{
   llvm::Optional<unsigned> getCacheSize(
-    TargetTransformInfo::CacheLevel Level) const;
+    TargetTransformInfo::CacheLevel Level) const override;
   llvm::Optional<unsigned> getCacheAssociativity(
-    TargetTransformInfo::CacheLevel Level) const;
+    TargetTransformInfo::CacheLevel Level) const override;
   /// @}
 
   /// \name Vector TTI Implementations
diff --git a/llvm/lib/Transforms/Utils/FixIrreducible.cpp b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
index 452463c9b6277..460ba9e97fc6e 100644
--- a/llvm/lib/Transforms/Utils/FixIrreducible.cpp
+++ b/llvm/lib/Transforms/Utils/FixIrreducible.cpp
@@ -84,7 +84,7 @@ struct FixIrreducible : public FunctionPass {
     initializeFixIrreduciblePass(*PassRegistry::getPassRegistry());
   }
 
-  void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredID(LowerSwitchID);
     AU.addRequired<DominatorTreeWrapperPass>();
     AU.addRequired<LoopInfoWrapperPass>();
@@ -93,7 +93,7 @@ struct FixIrreducible : public FunctionPass {
     AU.addPreserved<LoopInfoWrapperPass>();
   }
 
-  bool runOnFunction(Function &F);
+  bool runOnFunction(Function &F) override;
 };
 } // namespace
 
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 6ac2d64494e94..99b64a7462f62 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -885,11 +885,11 @@ class PredicateInfoAnnotatedWriter : public AssemblyAnnotationWriter {
 public:
   PredicateInfoAnnotatedWriter(const PredicateInfo *M) : PredInfo(M) {}
 
-  virtual void emitBasicBlockStartAnnot(const BasicBlock *BB,
-                                        formatted_raw_ostream &OS) {}
+  void emitBasicBlockStartAnnot(const BasicBlock *BB,
+                                formatted_raw_ostream &OS) override {}
 
-  virtual void emitInstructionAnnot(const Instruction *I,
-                                    formatted_raw_ostream &OS) {
+  void emitInstructionAnnot(const Instruction *I,
+                            formatted_raw_ostream &OS) override {
     if (const auto *PI = PredInfo->getPredicateInfoFor(I)) {
       OS << "; Has predicate info\n";
       if (const auto *PB = dyn_cast<PredicateBranch>(PI)) {
diff --git a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
index 8cdb4d670ac02..b10deee3907c7 100644
--- a/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
+++ b/llvm/lib/Transforms/Utils/UnifyLoopExits.cpp
@@ -33,7 +33,7 @@ struct UnifyLoopExits : public FunctionPass {
     initializeUnifyLoopExitsPass(*PassRegistry::getPassRegistry());
   }
 
-  void getAnalysisUsage(AnalysisUsage &AU) const {
+  void getAnalysisUsage(AnalysisUsage &AU) const override {
     AU.addRequiredID(LowerSwitchID);
     AU.addRequired<LoopInfoWrapperPass>();
     AU.addRequired<DominatorTreeWrapperPass>();
@@ -42,7 +42,7 @@ struct UnifyLoopExits : public FunctionPass {
     AU.addPreserved<DominatorTreeWrapperPass>();
   }
 
-  bool runOnFunction(Function &F);
+  bool runOnFunction(Function &F) override;
 };
 } // namespace
 

From bfd643353e6b7ca7b89c0f983ff6a24c36aed276 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Tue, 14 Jul 2020 09:59:46 -0700
Subject: [PATCH 268/771] Fix merging of two arity-only pack deductions.

If we deduced the arity of a pack in two different ways, but didn't
deduce an element of the pack in either of those deductions, we'd merge
that element to produce a null template argument, which we'd incorrectly
interpret as the merge having failed.

Testcase based on one supplied by Hubert Tong.
---
 clang/lib/Sema/SemaTemplateDeduction.cpp |  2 +-
 clang/test/SemaTemplate/deduction.cpp    | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Sema/SemaTemplateDeduction.cpp b/clang/lib/Sema/SemaTemplateDeduction.cpp
index f3641afbbf8a0..5392be57a3aa2 100644
--- a/clang/lib/Sema/SemaTemplateDeduction.cpp
+++ b/clang/lib/Sema/SemaTemplateDeduction.cpp
@@ -355,7 +355,7 @@ checkDeducedTemplateArguments(ASTContext &Context,
       TemplateArgument Merged = checkDeducedTemplateArguments(
           Context, DeducedTemplateArgument(*XA, X.wasDeducedFromArrayBound()),
           DeducedTemplateArgument(*YA, Y.wasDeducedFromArrayBound()));
-      if (Merged.isNull())
+      if (Merged.isNull() && !(XA->isNull() && YA->isNull()))
         return DeducedTemplateArgument();
       NewPack.push_back(Merged);
     }
diff --git a/clang/test/SemaTemplate/deduction.cpp b/clang/test/SemaTemplate/deduction.cpp
index 5218543ab8a41..a068bcaea0483 100644
--- a/clang/test/SemaTemplate/deduction.cpp
+++ b/clang/test/SemaTemplate/deduction.cpp
@@ -581,3 +581,19 @@ namespace PR44890 {
     return w.get<0>();
   }
 }
+
+namespace merge_size_only_deductions {
+#if __cplusplus >= 201703L
+  // Based on a testcase by Hubert Tong.
+  template<typename ...> struct X {};
+  template<auto ...> struct Y {};
+  template<typename T> struct id { using Type = T; };
+
+  template<typename ...T, typename T::Type ...V>
+    int f(X<char [V] ...>, Y<V ...>, X<T ...>);
+
+  using size_t = __SIZE_TYPE__;
+  int a = f(X<char [1], char [2]>(), Y<(size_t)1, (size_t)2>(), X<id<size_t>, id<size_t>>());
+  int b = f(X<char [1], char [2]>(), Y<1, 2>(), X<id<int>, id<int>>());
+#endif
+}

From dbf486c0de92c76df77c1a1f815cf16533ecbb3a Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 14 Jul 2020 15:17:16 +0200
Subject: [PATCH 269/771] [clangd] Config: Index.Background

Summary:
We only support Build/Skip for now, but with 'Load' or similar as an
option for future (load existing shards but don't build new ones).

This requires creating the config for each TU on startup. In LLVM, this
is 4000 occurrences for a total of 800ms on my machine.
But together with caching from D83755 it is only 25ms.

Reviewers: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83768
---
 clang-tools-extra/clangd/Config.h             |  7 +++
 clang-tools-extra/clangd/ConfigCompile.cpp    | 62 +++++++++++++++++++
 clang-tools-extra/clangd/ConfigFragment.h     | 11 ++++
 clang-tools-extra/clangd/ConfigYAML.cpp       |  8 +++
 clang-tools-extra/clangd/index/Background.cpp |  9 +++
 .../clangd/unittests/BackgroundIndexTests.cpp |  5 +-
 .../clangd/unittests/ConfigCompileTests.cpp   | 16 +++++
 7 files changed, 117 insertions(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/Config.h b/clang-tools-extra/clangd/Config.h
index d2c3ef37abd5c..f8dc2df51814a 100644
--- a/clang-tools-extra/clangd/Config.h
+++ b/clang-tools-extra/clangd/Config.h
@@ -55,6 +55,13 @@ struct Config {
     std::vector<llvm::unique_function<void(std::vector<std::string> &) const>>
         Edits;
   } CompileFlags;
+
+  enum class BackgroundPolicy { Build, Skip };
+  /// Controls background-index behavior.
+  struct {
+    /// Whether this TU should be indexed.
+    BackgroundPolicy Background = BackgroundPolicy::Build;
+  } Index;
 };
 
 } // namespace clangd
diff --git a/clang-tools-extra/clangd/ConfigCompile.cpp b/clang-tools-extra/clangd/ConfigCompile.cpp
index 568e029b5c0a1..9b8a48fdaf7b0 100644
--- a/clang-tools-extra/clangd/ConfigCompile.cpp
+++ b/clang-tools-extra/clangd/ConfigCompile.cpp
@@ -28,7 +28,9 @@
 #include "ConfigFragment.h"
 #include "support/Logger.h"
 #include "support/Trace.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Regex.h"
 #include "llvm/Support/SMLoc.h"
 #include "llvm/Support/SourceMgr.h"
@@ -80,9 +82,56 @@ struct FragmentCompiler {
     return Result;
   }
 
+  // Helper with similar API to StringSwitch, for parsing enum values.
+  template <typename T> class EnumSwitch {
+    FragmentCompiler &Outer;
+    llvm::StringRef EnumName;
+    const Located<std::string> &Input;
+    llvm::Optional<T> Result;
+    llvm::SmallVector<llvm::StringLiteral, 8> ValidValues;
+
+  public:
+    EnumSwitch(llvm::StringRef EnumName, const Located<std::string> &In,
+               FragmentCompiler &Outer)
+        : Outer(Outer), EnumName(EnumName), Input(In) {}
+
+    EnumSwitch &map(llvm::StringLiteral Name, T Value) {
+      assert(!llvm::is_contained(ValidValues, Name) && "Duplicate value!");
+      ValidValues.push_back(Name);
+      if (!Result && *Input == Name)
+        Result = Value;
+      return *this;
+    }
+
+    llvm::Optional<T> value() {
+      if (!Result)
+        Outer.diag(
+            Warning,
+            llvm::formatv("Invalid {0} value '{1}'. Valid values are {2}.",
+                          EnumName, *Input, llvm::join(ValidValues, ", "))
+                .str(),
+            Input.Range);
+      return Result;
+    };
+  };
+
+  // Attempt to parse a specified string into an enum.
+  // Yields llvm::None and produces a diagnostic on failure.
+  //
+  // Optional<T> Value = compileEnum<En>("Foo", Frag.Foo)
+  //    .map("Foo", Enum::Foo)
+  //    .map("Bar", Enum::Bar)
+  //    .value();
+  template <typename T>
+  EnumSwitch<T> compileEnum(llvm::StringRef EnumName,
+                            const Located<std::string> &In) {
+    return EnumSwitch<T>(EnumName, In, *this);
+  }
+
   void compile(Fragment &&F) {
     compile(std::move(F.If));
     compile(std::move(F.CompileFlags));
+    compile(std::move(F.Index));
   }
 
   void compile(Fragment::IfBlock &&F) {
@@ -148,7 +197,20 @@ struct FragmentCompiler {
     }
   }
 
+  void compile(Fragment::IndexBlock &&F) {
+    if (F.Background) {
+      if (auto Val = compileEnum<Config::BackgroundPolicy>("Background",
+                                                           **F.Background)
+                         .map("Build", Config::BackgroundPolicy::Build)
+                         .map("Skip", Config::BackgroundPolicy::Skip)
+                         .value())
+        Out.Apply.push_back([Val](Config &C) { C.Index.Background = *Val; });
+    }
+  }
+
   constexpr static llvm::SourceMgr::DiagKind Error = llvm::SourceMgr::DK_Error;
+  constexpr static llvm::SourceMgr::DiagKind Warning =
+      llvm::SourceMgr::DK_Warning;
   void diag(llvm::SourceMgr::DiagKind Kind, llvm::StringRef Message,
             llvm::SMRange Range) {
     if (Range.isValid() && SourceMgr != nullptr)
diff --git a/clang-tools-extra/clangd/ConfigFragment.h b/clang-tools-extra/clangd/ConfigFragment.h
index 5cc8749c5efa1..330f157326f21 100644
--- a/clang-tools-extra/clangd/ConfigFragment.h
+++ b/clang-tools-extra/clangd/ConfigFragment.h
@@ -150,6 +150,17 @@ struct Fragment {
     std::vector<Located<std::string>> Remove;
   };
   CompileFlagsBlock CompileFlags;
+
+  /// Controls how clangd understands code outside the current file.
+  /// clangd's indexes provide information about symbols that isn't available
+  /// to clang's parser, such as incoming references.
+  struct IndexBlock {
+    /// Whether files are built in the background to produce a project index.
+    /// This is checked for translation units only, not headers they include.
+    /// Legal values are "Build" or "Skip".
+    llvm::Optional<Located<std::string>> Background;
+  };
+  IndexBlock Index;
 };
 
 } // namespace config
diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp
index 7e86e37212483..16639f6649c2b 100644
--- a/clang-tools-extra/clangd/ConfigYAML.cpp
+++ b/clang-tools-extra/clangd/ConfigYAML.cpp
@@ -7,6 +7,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "ConfigFragment.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -70,6 +71,13 @@ class Parser {
     Dict.parse(N);
   }
 
+  void parse(Fragment::IndexBlock &F, Node &N) {
+    DictParser Dict("Index", this);
+    Dict.handle("Background",
+                [&](Node &N) { F.Background = scalarValue(N, "Background"); });
+    Dict.parse(N);
+  }
+
   // Helper for parsing mapping nodes (dictionaries).
   // We don't use YamlIO as we want to control over unknown keys.
   class DictParser {
diff --git a/clang-tools-extra/clangd/index/Background.cpp b/clang-tools-extra/clangd/index/Background.cpp
index 5024ace66b7cc..a22785b01d647 100644
--- a/clang-tools-extra/clangd/index/Background.cpp
+++ b/clang-tools-extra/clangd/index/Background.cpp
@@ -8,6 +8,7 @@
 
 #include "index/Background.h"
 #include "Compiler.h"
+#include "Config.h"
 #include "Headers.h"
 #include "ParsedAST.h"
 #include "SourceCode.h"
@@ -354,6 +355,14 @@ llvm::Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
 // staleness.
 std::vector<std::string>
 BackgroundIndex::loadProject(std::vector<std::string> MainFiles) {
+  // Drop files where background indexing is disabled in config.
+  if (ContextProvider)
+    llvm::erase_if(MainFiles, [&](const std::string &TU) {
+      // Load the config for each TU, as indexing may be selectively enabled.
+      WithContext WithProvidedContext(ContextProvider(TU));
+      return Config::current().Index.Background ==
+             Config::BackgroundPolicy::Skip;
+    });
   Rebuilder.startLoading();
   // Load shards for all of the mainfiles.
   const std::vector<LoadedShard> Result =
diff --git a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
index cb4d23e0be347..70d5156b10723 100644
--- a/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
+++ b/clang-tools-extra/clangd/unittests/BackgroundIndexTests.cpp
@@ -113,7 +113,7 @@ TEST_F(BackgroundIndexTest, Config) {
   // Set up two identical TUs, foo and bar.
   // They define foo::one and bar::one.
   std::vector<tooling::CompileCommand> Cmds;
-  for (std::string Name : {"foo", "bar"}) {
+  for (std::string Name : {"foo", "bar", "baz"}) {
     std::string Filename = Name + ".cpp";
     std::string Header = Name + ".h";
     FS.Files[Filename] = "#include \"" + Header + "\"";
@@ -126,11 +126,14 @@ TEST_F(BackgroundIndexTest, Config) {
   }
   // Context provider that installs a configuration mutating foo's command.
   // This causes it to define foo::two instead of foo::one.
+  // It also disables indexing of baz entirely.
   auto ContextProvider = [](PathRef P) {
     Config C;
     if (P.endswith("foo.cpp"))
       C.CompileFlags.Edits.push_back(
           [](std::vector<std::string> &Argv) { Argv.push_back("-Done=two"); });
+    if (P.endswith("baz.cpp"))
+      C.Index.Background = Config::BackgroundPolicy::Skip;
     return Context::current().derive(Config::Key, std::move(C));
   };
   // Create the background index.
diff --git a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
index 033734789bedd..c8465dc70edbc 100644
--- a/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigCompileTests.cpp
@@ -100,6 +100,22 @@ TEST_F(ConfigCompileTests, CompileCommands) {
   EXPECT_THAT(Argv, ElementsAre("clang", "a.cc", "-foo"));
 }
 
+TEST_F(ConfigCompileTests, Index) {
+  Frag.Index.Background.emplace("Skip");
+  EXPECT_TRUE(compileAndApply());
+  EXPECT_EQ(Conf.Index.Background, Config::BackgroundPolicy::Skip);
+
+  Frag = {};
+  Frag.Index.Background.emplace("Foo");
+  EXPECT_TRUE(compileAndApply());
+  EXPECT_EQ(Conf.Index.Background, Config::BackgroundPolicy::Build)
+      << "by default";
+  EXPECT_THAT(
+      Diags.Diagnostics,
+      ElementsAre(DiagMessage(
+          "Invalid Background value 'Foo'. Valid values are Build, Skip.")));
+}
+
 } // namespace
 } // namespace config
 } // namespace clangd

From f5f15acebbbab9c56c1a50c7f01834e067c8c3b2 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 14 Jul 2020 10:11:43 -0700
Subject: [PATCH 270/771] [lldb/Test] Skip TestProcessConnect.py on Windows

Skip TestProcessConnect.py on Windows and Android (the same platforms as
TestPlatformProcessConnect.py) and mark it as a NO_DEBUG_INFO test so we
don't run all the variants.
---
 .../gdb_remote_client/TestProcessConnect.py                | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py b/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
index 34ae8d08004d4..9aa21b6317f23 100644
--- a/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
+++ b/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
@@ -7,6 +7,10 @@
 
 
 class TestProcessConnect(GDBRemoteTestBase):
+
+    NO_DEBUG_INFO_TESTCASE = True
+
+    @expectedFailureAll(hostoslist=["windows"], triple='.*-android')
     def test_gdb_remote_sync(self):
         """Test the gdb-remote command in synchronous mode"""
         try:
@@ -16,6 +20,7 @@ def test_gdb_remote_sync(self):
         finally:
             self.dbg.GetSelectedPlatform().DisconnectRemote()
 
+    @expectedFailureAll(hostoslist=["windows"], triple='.*-android')
     def test_gdb_remote_async(self):
         """Test the gdb-remote command in asynchronous mode"""
         try:
@@ -28,6 +33,7 @@ def test_gdb_remote_async(self):
         finally:
             self.dbg.GetSelectedPlatform().DisconnectRemote()
 
+    @expectedFailureAll(hostoslist=["windows"], triple='.*-android')
     def test_process_connect_sync(self):
         """Test the gdb-remote command in synchronous mode"""
         try:
@@ -38,6 +44,7 @@ def test_process_connect_sync(self):
         finally:
             self.dbg.GetSelectedPlatform().DisconnectRemote()
 
+    @expectedFailureAll(hostoslist=["windows"], triple='.*-android')
     def test_process_connect_async(self):
         """Test the gdb-remote command in asynchronous mode"""
         try:

From 9017b9ce1a14b578fc5162897b49c440b3d4fa4c Mon Sep 17 00:00:00 2001
From: Isuru Fernando <isuruf@gmail.com>
Date: Tue, 14 Jul 2020 10:59:26 -0500
Subject: [PATCH 271/771] [flang] Turn off FLANG_ENABLE_WERROR by default

Summary: This is a follow up to https://reviews.llvm.org/D78306

Reviewers: DavidTruby, sscalpone, jdoerfert

Reviewed By: DavidTruby

Subscribers: tskeith, Meinersbur, ChinouneMehdi, richard.barton.arm, mehdi_amini, mgorny, llvm-commits

Tags: #llvm, #flang

Differential Revision: https://reviews.llvm.org/D81695
---
 flang/CMakeLists.txt | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/flang/CMakeLists.txt b/flang/CMakeLists.txt
index feb9544209bf0..13e675f1096e5 100644
--- a/flang/CMakeLists.txt
+++ b/flang/CMakeLists.txt
@@ -37,13 +37,7 @@ endif()
 list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules")
 include(AddFlang)
 
-if (MSVC)
-  set(_FLANG_ENABLE_WERROR_DEFAULT OFF)
-else ()
-  set(_FLANG_ENABLE_WERROR_DEFAULT ON)
-endif()
-option(FLANG_ENABLE_WERROR "Fail and stop building flang if a warning is triggered."
-       "${_FLANG_ENABLE_WERROR_DEFAULT}")
+option(FLANG_ENABLE_WERROR "Fail and stop building flang if a warning is triggered." OFF)
 
 # Check for a standalone build and configure as appropriate from
 # there.
@@ -305,9 +299,6 @@ if (FLANG_ENABLE_WERROR)
     append("-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
     append("-Wno-error" CMAKE_REQUIRED_FLAGS)
   endif( LLVM_COMPILER_IS_GCC_COMPATIBLE )
-  if (NOT LLVM_ENABLE_WERROR)
-      message(WARNING "FLANG_ENABLE_WERROR setting is different from LLVM_ENABLE_WERROR.")
-  endif()
 endif()
 
 # Builtin check_cxx_compiler_flag doesn't seem to work correctly

From f4476b72fb13c5276434b4a276b7730e30724121 Mon Sep 17 00:00:00 2001
From: Richard Barton <richard.barton@arm.com>
Date: Tue, 14 Jul 2020 18:41:05 +0100
Subject: [PATCH 272/771] [lit] Prevent hang when lit sees non-ASCII characters

As per discussion in D69207, have lit ignore UnicodeDecodeErrors
when running with python 2 in an ASCII shell.

Differential Revision: https://reviews.llvm.org/D82754
---
 llvm/utils/lit/lit/display.py        |  5 ++++-
 llvm/utils/lit/tests/shtest-shell.py | 22 +++++++++++++++-------
 2 files changed, 19 insertions(+), 8 deletions(-)

diff --git a/llvm/utils/lit/lit/display.py b/llvm/utils/lit/lit/display.py
index c986b341512d7..3543b287f25ea 100644
--- a/llvm/utils/lit/lit/display.py
+++ b/llvm/utils/lit/lit/display.py
@@ -86,7 +86,10 @@ def print_result(self, test):
                                      errors="replace")
                 except UnicodeDecodeError:
                     pass
-                out = out.decode(encoding=sys.stdout.encoding)
+                # Python 2 can raise UnicodeDecodeError here too in cases
+                # where the stdout encoding is ASCII. Ignore decode errors
+                # in this case.
+                out = out.decode(encoding=sys.stdout.encoding, errors="ignore")
             print(out)
             print("*" * 20)
 
diff --git a/llvm/utils/lit/tests/shtest-shell.py b/llvm/utils/lit/tests/shtest-shell.py
index be9c53f12918c..4c247de15ddd1 100644
--- a/llvm/utils/lit/tests/shtest-shell.py
+++ b/llvm/utils/lit/tests/shtest-shell.py
@@ -6,6 +6,14 @@
 # RUN: cat %t.out
 # RUN: FileCheck --input-file %t.out %s
 #
+# Test again in non-UTF shell to catch potential errors with python 2 seen
+# on stdout-encoding.txt
+# RUN: env PYTHONIOENCODING=ascii not %{lit} -j 1 -a %{inputs}/shtest-shell > %t.ascii.out
+# FIXME: Temporarily dump test output so we can debug failing tests on
+# buildbots.
+# RUN: cat %t.ascii.out
+# RUN: FileCheck --input-file %t.ascii.out %s
+#
 # END.
 
 # CHECK: -- Testing:
@@ -64,7 +72,7 @@
 # CHECK-NEXT: @@
 # CHECK-NEXT: {{^ .f.o.o.$}}
 # CHECK-NEXT: {{^-.b.a.r.$}}
-# CHECK-NEXT: {{^\+.b.a.r..}}
+# CHECK-NEXT: {{^\+.b.a.r.}}
 # CHECK-NEXT: {{^ .b.a.z.$}}
 # CHECK: error: command failed with exit status: 1
 # CHECK: $ "true"
@@ -78,7 +86,7 @@
 # CHECK-NEXT: -bar
 # CHECK-NEXT: -baz
 # CHECK-NEXT: {{^\+.f.o.o.$}}
-# CHECK-NEXT: {{^\+.b.a.r..}}
+# CHECK-NEXT: {{^\+.b.a.r.}}
 # CHECK-NEXT: {{^\+.b.a.z.$}}
 # CHECK: error: command failed with exit status: 1
 # CHECK: $ "true"
@@ -89,7 +97,7 @@
 # CHECK-NEXT: +++
 # CHECK-NEXT: @@
 # CHECK-NEXT: {{^\-.f.o.o.$}}
-# CHECK-NEXT: {{^\-.b.a.r..}}
+# CHECK-NEXT: {{^\-.b.a.r.}}
 # CHECK-NEXT: {{^\-.b.a.z.$}}
 # CHECK-NEXT: +foo
 # CHECK-NEXT: +bar
@@ -116,7 +124,7 @@
 # CHECK-NEXT: @@
 # CHECK-NEXT: {{^ .f.o.o.$}}
 # CHECK-NEXT: {{^-.b.a.r.$}}
-# CHECK-NEXT: {{^\+.b.a.r..}}
+# CHECK-NEXT: {{^\+.b.a.r.}}
 # CHECK-NEXT: {{^ .b.a.z.$}}
 # CHECK: error: command failed with exit status: 1
 # CHECK: $ "true"
@@ -132,7 +140,7 @@
 # CHECK-NEXT: -bar
 # CHECK-NEXT: -baz
 # CHECK-NEXT: {{^\+.f.o.o.$}}
-# CHECK-NEXT: {{^\+.b.a.r..}}
+# CHECK-NEXT: {{^\+.b.a.r.}}
 # CHECK-NEXT: {{^\+.b.a.z.$}}
 # CHECK: error: command failed with exit status: 1
 # CHECK: $ "true"
@@ -143,7 +151,7 @@
 # CHECK-NEXT: +++
 # CHECK-NEXT: @@
 # CHECK-NEXT: {{^\-.f.o.o.$}}
-# CHECK-NEXT: {{^\-.b.a.r..}}
+# CHECK-NEXT: {{^\-.b.a.r.}}
 # CHECK-NEXT: {{^\-.b.a.z.$}}
 # CHECK-NEXT: +foo
 # CHECK-NEXT: +bar
@@ -576,7 +584,7 @@
 # CHECK: $ "cat" "diff-in.bin"
 # CHECK: # command output:
 # CHECK-NEXT: {{^.f.o.o.$}}
-# CHECK-NEXT: {{^.b.a.r..}}
+# CHECK-NEXT: {{^.b.a.r.}}
 # CHECK-NEXT: {{^.b.a.z.$}}
 # CHECK-NOT: error
 # CHECK: $ "false"

From 9ecbad54c2f02e3ef44077d1f542eaa8b3dd6d44 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 14 Jul 2020 09:26:07 -0700
Subject: [PATCH 273/771] [lldb] lldbinline and lldbtest gardening (NFC)

 - Make the open more Pythonic.
 - Remove the unused `cleanup` Make target.
 - Remove commented-out/obvious/low-value comments.
 - Cleanup the forked process PID list.
---
 .../Python/lldbsuite/test/lldbinline.py       | 32 +++++++------------
 .../Python/lldbsuite/test/lldbtest.py         |  9 ++----
 2 files changed, 13 insertions(+), 28 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/lldbinline.py b/lldb/packages/Python/lldbsuite/test/lldbinline.py
index 29a708440c2a7..0d1cb24a54dfd 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbinline.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbinline.py
@@ -82,20 +82,16 @@ def handle_breakpoint(self, test, breakpoint_id):
 
 
 class InlineTest(TestBase):
-    # Overrides
 
     def getBuildDirBasename(self):
         return self.__class__.__name__ + "." + self.testMethodName
 
-    # Internal implementation
-
     def BuildMakefile(self):
         makefilePath = self.getBuildArtifact("Makefile")
         if os.path.exists(makefilePath):
             return
 
         categories = {}
-
         for f in os.listdir(self.getSourceDir()):
             t = source_type(f)
             if t:
@@ -104,24 +100,20 @@ def BuildMakefile(self):
                 else:
                     categories[t] = [f]
 
-        makefile = open(makefilePath, 'w+')
+        with open(makefilePath, 'w+') as makefile:
+            for t in list(categories.keys()):
+                line = t + " := " + " ".join(categories[t])
+                makefile.write(line + "\n")
 
-        for t in list(categories.keys()):
-            line = t + " := " + " ".join(categories[t])
-            makefile.write(line + "\n")
+            if ('OBJCXX_SOURCES' in list(categories.keys())) or \
+               ('OBJC_SOURCES' in list(categories.keys())):
+                makefile.write(
+                    "LDFLAGS = $(CFLAGS) -lobjc -framework Foundation\n")
 
-        if ('OBJCXX_SOURCES' in list(categories.keys())) or (
-                'OBJC_SOURCES' in list(categories.keys())):
-            makefile.write(
-                "LDFLAGS = $(CFLAGS) -lobjc -framework Foundation\n")
+            if ('CXX_SOURCES' in list(categories.keys())):
+                makefile.write("CXXFLAGS += -std=c++11\n")
 
-        if ('CXX_SOURCES' in list(categories.keys())):
-            makefile.write("CXXFLAGS += -std=c++11\n")
-
-        makefile.write("include Makefile.rules\n")
-        makefile.write("\ncleanup:\n\trm -f Makefile *.d\n\n")
-        makefile.flush()
-        makefile.close()
+            makefile.write("include Makefile.rules\n")
 
     def _test(self):
         self.BuildMakefile()
@@ -168,8 +160,6 @@ def do_test(self):
                                                lldb.eStateExited],
                         PROCESS_EXITED)
 
-    # Utilities for testcases
-
     def check_expression(self, expression, expected_result, use_summary=True):
         value = self.frame().EvaluateExpression(expression)
         self.assertTrue(value.IsValid(), expression + "returned a valid value")
diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py
index ebef896d12b63..13afcb944aa50 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbtest.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py
@@ -891,13 +891,14 @@ def cleanupSubprocesses(self):
         for p in self.subprocesses:
             p.terminate()
             del p
-        del self.subprocesses[:]
+        self.subprocesses.clear()
         # Ensure any forked processes are cleaned up
         for pid in self.forkedProcessPids:
             try:
                 os.kill(pid, signal.SIGTERM)
             except OSError:
                 pass
+        self.forkedProcessPids.clear()
 
     def spawnSubprocess(self, executable, args=[], install_remote=True):
         """ Creates a subprocess.Popen object with the specified executable and arguments,
@@ -1877,9 +1878,6 @@ def generateSource(self, source):
         self.addTearDownHook(lambda: os.remove(src))
 
     def setUp(self):
-        #import traceback
-        # traceback.print_stack()
-
         # Works with the test driver to conditionally skip tests via
         # decorators.
         Base.setUp(self)
@@ -1998,9 +1996,6 @@ def get_process_working_directory(self):
             return self.getBuildDir()
 
     def tearDown(self):
-        #import traceback
-        # traceback.print_stack()
-
         # Ensure all the references to SB objects have gone away so that we can
         # be sure that all test-specific resources have been freed before we
         # attempt to delete the targets.

From 5a62008f352b38ca80cb4156fb5d234404db75f2 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 14 Jul 2020 09:32:36 -0700
Subject: [PATCH 274/771] [lldb] Use runBuildCommands from buildGModules

Use runBuildCommands, like all other builders, to raise a build-specific
error when the command fails.
---
 lldb/packages/Python/lldbsuite/test/plugins/builder_base.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py b/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py
index e54431eb1fe7f..a8114b3e05924 100644
--- a/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py
+++ b/lldb/packages/Python/lldbsuite/test/plugins/builder_base.py
@@ -254,7 +254,7 @@ def buildGModules(
                      getModuleCacheSpec(),
                      getCmdLine(dictionary)])
 
-    lldbtest.system(commands, sender=sender)
+    runBuildCommands(commands, sender=sender)
     # True signifies that we can handle building with gmodules.
     return True
 

From 438e95e95bfc038aaf7719ad9dc20f57fa424d22 Mon Sep 17 00:00:00 2001
From: Francesco Petrogalli <francesco.petrogalli@arm.com>
Date: Wed, 1 Jul 2020 21:16:17 +0000
Subject: [PATCH 275/771] [clang][aarch64] Generate preprocessor macros for
 -march=armv8.6a+sve.

Summary:
The following preprocessor macros are implied when `-march=armv8.6a+sve`:

```
__ARM_FEATURE_SVE 1
__ARM_FEATURE_SVE_BF16 1
__ARM_FEATURE_SVE_MATMUL_FP32 1
__ARM_FEATURE_SVE_MATMUL_INT8 1
```

Reviewers: sdesmalen, efriedma, SjoerdMeijer, rengolin

Subscribers: tschuett, kristof.beyls, danielkiss, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83079
---
 clang/lib/Driver/ToolChains/Arch/AArch64.cpp   |  4 ++++
 .../Preprocessor/aarch64-target-features.c     | 18 ++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index 4c198a6037a8b..487c50dfc4663 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -365,6 +365,10 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
     }
   }
 
+  auto V8_6Pos = llvm::find(Features, "+v8.6a");
+  if (V8_6Pos != std::end(Features))
+    V8_6Pos = Features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});
+
   if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
                                options::OPT_munaligned_access))
     if (A->getOption().matches(options::OPT_mno_unaligned_access))
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index bef145930697f..12af9e043c75c 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -112,6 +112,24 @@
 // CHECK-SVE-F64MM: __ARM_FEATURE_SVE 1
 // CHECK-SVE-F64MM: __ARM_FEATURE_SVE_MATMUL_FP64 1
 
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.5-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_5 %s
+// CHECK-SVE-8_5-NOT: __ARM_FEATURE_SVE_BF16 1
+// CHECK-SVE-8_5-NOT: __ARM_FEATURE_SVE_MATMUL_FP32 1
+// CHECK-SVE-8_5-NOT: __ARM_FEATURE_SVE_MATMUL_INT8 1
+// CHECK-SVE-8_5: __ARM_FEATURE_SVE 1
+
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.6-a+sve -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_6 %s
+// CHECK-SVE-8_6: __ARM_FEATURE_SVE 1
+// CHECK-SVE-8_6: __ARM_FEATURE_SVE_BF16 1
+// CHECK-SVE-8_6: __ARM_FEATURE_SVE_MATMUL_FP32 1
+// CHECK-SVE-8_6: __ARM_FEATURE_SVE_MATMUL_INT8 1
+
+// RUN: %clang -target aarch64-none-linux-gnu -march=armv8.6-a+sve+noi8mm+nobf16+nof32mm -x c -E -dM %s -o - | FileCheck --check-prefix=CHECK-SVE-8_6-NOFEATURES %s
+// CHECK-SVE-8_6-NOFEATURES-NOT: __ARM_FEATURE_SVE_BF16 1
+// CHECK-SVE-8_6-NOFEATURES-NOT: __ARM_FEATURE_SVE_MATMUL_FP32 1
+// CHECK-SVE-8_6-NOFEATURES-NOT: __ARM_FEATURE_SVE_MATMUL_INT8 1
+// CHECK-SVE-8_6-NOFEATURES:     __ARM_FEATURE_SVE 1
+
 // The following tests may need to be revised in the future since
 // SVE2 is currently still part of Future Architecture Technologies
 // (https://developer.arm.com/docs/ddi0602/latest)

From 3d931e85f1ca8bf72d3bfcd34390cb5ec0d73aa5 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Tue, 14 Jul 2020 09:31:14 -0700
Subject: [PATCH 276/771] [ORC] Don't take ownership of the trampoline pool in
 LazyReexportsManager.

LazyReexportsManager instances use the trampoline pool, but they don't need to
own it. Keeping TrampolinePool ownership separate allows re-use of the
trampoline pool by other clients.
---
 .../llvm/ExecutionEngine/Orc/LazyReexports.h       | 14 +++++++-------
 llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp     |  7 ++++---
 2 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
index 7972ed4300487..85c1fe7b19a91 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -55,8 +55,7 @@ class LazyCallThroughManager {
       TrampolinePool::NotifyLandingResolvedFunction;
 
   LazyCallThroughManager(ExecutionSession &ES,
-                         JITTargetAddress ErrorHandlerAddr,
-                         std::unique_ptr<TrampolinePool> TP);
+                         JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP);
 
   struct ReexportsEntry {
     JITDylib *SourceJD;
@@ -67,9 +66,7 @@ class LazyCallThroughManager {
   Expected<ReexportsEntry> findReexport(JITTargetAddress TrampolineAddr);
   Error notifyResolved(JITTargetAddress TrampolineAddr,
                        JITTargetAddress ResolvedAddr);
-  void setTrampolinePool(std::unique_ptr<TrampolinePool> TP) {
-    this->TP = std::move(TP);
-  }
+  void setTrampolinePool(TrampolinePool &TP) { this->TP = &TP; }
 
 private:
   using ReexportsMap = std::map<JITTargetAddress, ReexportsEntry>;
@@ -79,7 +76,7 @@ class LazyCallThroughManager {
   std::mutex LCTMMutex;
   ExecutionSession &ES;
   JITTargetAddress ErrorHandlerAddr;
-  std::unique_ptr<TrampolinePool> TP;
+  TrampolinePool *TP = nullptr;
   ReexportsMap Reexports;
   NotifiersMap Notifiers;
 };
@@ -105,10 +102,13 @@ class LocalLazyCallThroughManager : public LazyCallThroughManager {
     if (!TP)
       return TP.takeError();
 
-    setTrampolinePool(std::move(*TP));
+    this->TP = std::move(*TP);
+    setTrampolinePool(*this->TP);
     return Error::success();
   }
 
+  std::unique_ptr<TrampolinePool> TP;
+
 public:
   /// Create a LocalLazyCallThroughManager using the given ABI. See
   /// createLocalLazyCallThroughManager.
diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index ff66955082d85..153f6b80784f0 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -17,13 +17,14 @@ namespace llvm {
 namespace orc {
 
 LazyCallThroughManager::LazyCallThroughManager(
-    ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr,
-    std::unique_ptr<TrampolinePool> TP)
-    : ES(ES), ErrorHandlerAddr(ErrorHandlerAddr), TP(std::move(TP)) {}
+    ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP)
+    : ES(ES), ErrorHandlerAddr(ErrorHandlerAddr), TP(TP) {}
 
 Expected<JITTargetAddress> LazyCallThroughManager::getCallThroughTrampoline(
     JITDylib &SourceJD, SymbolStringPtr SymbolName,
     NotifyResolvedFunction NotifyResolved) {
+  assert(TP && "TrampolinePool not set");
+
   std::lock_guard<std::mutex> Lock(LCTMMutex);
   auto Trampoline = TP->getTrampoline();
 

From a8694eb5625862ede3b51795bd2fa3a1ccead214 Mon Sep 17 00:00:00 2001
From: Vedant Kumar <vsk@apple.com>
Date: Tue, 14 Jul 2020 11:16:09 -0700
Subject: [PATCH 277/771] Update ubsan_interface.inc for D71491 (second try)

I mistyped the ubsan objc_cast handler names on the first try.

Testing:

./bin/llvm-lit projects/compiler-rt/test/asan/X86_64HDarwinConfig/TestCases/Darwin/interface_symbols_darwin.cpp
---
 compiler-rt/lib/ubsan/ubsan_interface.inc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/ubsan/ubsan_interface.inc b/compiler-rt/lib/ubsan/ubsan_interface.inc
index 503cc613b4544..94337d85017b4 100644
--- a/compiler-rt/lib/ubsan/ubsan_interface.inc
+++ b/compiler-rt/lib/ubsan/ubsan_interface.inc
@@ -27,8 +27,8 @@ INTERFACE_FUNCTION(__ubsan_handle_implicit_conversion)
 INTERFACE_FUNCTION(__ubsan_handle_implicit_conversion_abort)
 INTERFACE_FUNCTION(__ubsan_handle_invalid_builtin)
 INTERFACE_FUNCTION(__ubsan_handle_invalid_builtin_abort)
-INTERFACE_FUNCTION(__ubsan_handle_objc_cast_check)
-INTERFACE_FUNCTION(__ubsan_handle_objc_cast_check_abort)
+INTERFACE_FUNCTION(__ubsan_handle_invalid_objc_cast)
+INTERFACE_FUNCTION(__ubsan_handle_invalid_objc_cast_abort)
 INTERFACE_FUNCTION(__ubsan_handle_load_invalid_value)
 INTERFACE_FUNCTION(__ubsan_handle_load_invalid_value_abort)
 INTERFACE_FUNCTION(__ubsan_handle_missing_return)

From 9b974dfa720854d17df0911fdf2a55ad75052b78 Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday@polymagelabs.com>
Date: Tue, 14 Jul 2020 23:23:11 +0530
Subject: [PATCH 278/771] [MLIR] [NFC] Buffer placement pass - clang tidy
 warnings

Add missing const - addresses clang tidy warnings.

Differential Revision: https://reviews.llvm.org/D83794
---
 mlir/lib/Transforms/BufferPlacement.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/mlir/lib/Transforms/BufferPlacement.cpp b/mlir/lib/Transforms/BufferPlacement.cpp
index 87b2687b6982f..0c24621c36668 100644
--- a/mlir/lib/Transforms/BufferPlacement.cpp
+++ b/mlir/lib/Transforms/BufferPlacement.cpp
@@ -311,7 +311,7 @@ class BufferPlacement {
   /// the top of the file for all alloc nodes that can be handled by this
   /// analysis.
   void placeAllocs() const {
-    for (auto &entry : allocs) {
+    for (const AllocEntry &entry : allocs) {
       Value alloc = entry.allocValue;
       // Get the actual block to place the alloc and get liveness information
       // for the placement block.
@@ -572,7 +572,7 @@ class BufferPlacement {
     // These deallocations will be linked to their associated allocation nodes
     // since they don't have any aliases that can (potentially) increase their
     // liveness.
-    for (auto &entry : allocs) {
+    for (const AllocEntry &entry : allocs) {
       Value alloc = entry.allocValue;
       auto aliasesSet = aliases.resolve(alloc);
       assert(aliasesSet.size() > 0 && "must contain at least one alias");

From 3f2d880a932970d19bfed88c6900d50c9c3bc203 Mon Sep 17 00:00:00 2001
From: Davide Italiano <ditaliano@apple.com>
Date: Tue, 14 Jul 2020 11:25:32 -0700
Subject: [PATCH 279/771] [ObjC] Wrap namespace-global structs in an anonymous
 namespace to avoid ODR violations

<rdar://problem/65537147>

Differential Revision:  https://reviews.llvm.org/D83796
---
 lldb/source/Plugins/Language/ObjC/NSArray.cpp |  64 ++++++-----
 .../Plugins/Language/ObjC/NSDictionary.cpp    | 105 +++++++++---------
 2 files changed, 88 insertions(+), 81 deletions(-)

diff --git a/lldb/source/Plugins/Language/ObjC/NSArray.cpp b/lldb/source/Plugins/Language/ObjC/NSArray.cpp
index e1c789ce26d8d..8d648d8a08614 100644
--- a/lldb/source/Plugins/Language/ObjC/NSArray.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSArray.cpp
@@ -98,42 +98,46 @@ class GenericNSArrayMSyntheticFrontEnd : public NSArrayMSyntheticFrontEndBase {
 };
   
 namespace Foundation1010 {
-  struct DataDescriptor_32 {
-    uint32_t _used;
-    uint32_t _offset;
-    uint32_t _size : 28;
-    uint64_t _priv1 : 4;
-    uint32_t _priv2;
-    uint32_t _data;
-  };
-  
-  struct DataDescriptor_64 {
-    uint64_t _used;
-    uint64_t _offset;
-    uint64_t _size : 60;
-    uint64_t _priv1 : 4;
-    uint32_t _priv2;
-    uint64_t _data;
-  };
+  namespace {
+    struct DataDescriptor_32 {
+      uint32_t _used;
+      uint32_t _offset;
+      uint32_t _size : 28;
+      uint64_t _priv1 : 4;
+      uint32_t _priv2;
+      uint32_t _data;
+    };
+    
+    struct DataDescriptor_64 {
+      uint64_t _used;
+      uint64_t _offset;
+      uint64_t _size : 60;
+      uint64_t _priv1 : 4;
+      uint32_t _priv2;
+      uint64_t _data;
+    };
+  }
   
   using NSArrayMSyntheticFrontEnd =
       GenericNSArrayMSyntheticFrontEnd<DataDescriptor_32, DataDescriptor_64>;
 }
   
 namespace Foundation1428 {
-  struct DataDescriptor_32 {
-    uint32_t _used;
-    uint32_t _offset;
-    uint32_t _size;
-    uint32_t _data;
-  };
-  
-  struct DataDescriptor_64 {
-    uint64_t _used;
-    uint64_t _offset;
-    uint64_t _size;
-    uint64_t _data;
-  };
+  namespace {
+    struct DataDescriptor_32 {
+      uint32_t _used;
+      uint32_t _offset;
+      uint32_t _size;
+      uint32_t _data;
+    };
+    
+    struct DataDescriptor_64 {
+      uint64_t _used;
+      uint64_t _offset;
+      uint64_t _size;
+      uint64_t _data;
+    };
+  }
   
   using NSArrayMSyntheticFrontEnd =
       GenericNSArrayMSyntheticFrontEnd<DataDescriptor_32, DataDescriptor_64>;
diff --git a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
index e4e51de9ddfcd..3dc07678f92f5 100644
--- a/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
+++ b/lldb/source/Plugins/Language/ObjC/NSDictionary.cpp
@@ -278,64 +278,67 @@ namespace Foundation1100 {
 }
   
 namespace Foundation1428 {
-  struct DataDescriptor_32 {
-    uint32_t _used : 26;
-    uint32_t _kvo : 1;
-    uint32_t _size;
-    uint32_t _buffer;
-    uint64_t GetSize() { return _size; }
-  };
-  
-  struct DataDescriptor_64 {
-    uint64_t _used : 58;
-    uint32_t _kvo : 1;
-    uint64_t _size;
-    uint64_t _buffer;
-    uint64_t GetSize() { return _size; }
-  };
-  
-  
-  
+  namespace {
+    struct DataDescriptor_32 {
+      uint32_t _used : 26;
+      uint32_t _kvo : 1;
+      uint32_t _size;
+      uint32_t _buffer;
+      uint64_t GetSize() { return _size; }
+    };
+    
+    struct DataDescriptor_64 {
+      uint64_t _used : 58;
+      uint32_t _kvo : 1;
+      uint64_t _size;
+      uint64_t _buffer;
+      uint64_t GetSize() { return _size; }
+    };
+  }
+
   using NSDictionaryMSyntheticFrontEnd =
     GenericNSDictionaryMSyntheticFrontEnd<DataDescriptor_32, DataDescriptor_64>;
 }
   
 namespace Foundation1437 {
-  static const uint64_t NSDictionaryCapacities[] = {
-      0, 3, 7, 13, 23, 41, 71, 127, 191, 251, 383, 631, 1087, 1723,
-      2803, 4523, 7351, 11959, 19447, 31231, 50683, 81919, 132607,
-      214519, 346607, 561109, 907759, 1468927, 2376191, 3845119,
-      6221311, 10066421, 16287743, 26354171, 42641881, 68996069,
-      111638519, 180634607, 292272623, 472907251
-  };
-  
-  static const size_t NSDictionaryNumSizeBuckets = sizeof(NSDictionaryCapacities) / sizeof(uint64_t);
-  
-  struct DataDescriptor_32 {
-    uint32_t _buffer;
-    uint32_t _muts;
-    uint32_t _used : 25;
-    uint32_t _kvo : 1;
-    uint32_t _szidx : 6;
+  namespace {
+    static const uint64_t NSDictionaryCapacities[] = {
+        0, 3, 7, 13, 23, 41, 71, 127, 191, 251, 383, 631, 1087, 1723,
+        2803, 4523, 7351, 11959, 19447, 31231, 50683, 81919, 132607,
+        214519, 346607, 561109, 907759, 1468927, 2376191, 3845119,
+        6221311, 10066421, 16287743, 26354171, 42641881, 68996069,
+        111638519, 180634607, 292272623, 472907251
+    };
+    
+    static const size_t NSDictionaryNumSizeBuckets =
+        sizeof(NSDictionaryCapacities) / sizeof(uint64_t);
+    
+    struct DataDescriptor_32 {
+      uint32_t _buffer;
+      uint32_t _muts;
+      uint32_t _used : 25;
+      uint32_t _kvo : 1;
+      uint32_t _szidx : 6;
 
-    uint64_t GetSize() {
-      return (_szidx) >= NSDictionaryNumSizeBuckets ?
-          0 : NSDictionaryCapacities[_szidx];
-    }
-  };
-  
-  struct DataDescriptor_64 {
-    uint64_t _buffer;
-    uint32_t _muts;
-    uint32_t _used : 25;
-    uint32_t _kvo : 1;
-    uint32_t _szidx : 6;
+      uint64_t GetSize() {
+        return (_szidx) >= NSDictionaryNumSizeBuckets ?
+            0 : NSDictionaryCapacities[_szidx];
+      }
+    };
+    
+    struct DataDescriptor_64 {
+      uint64_t _buffer;
+      uint32_t _muts;
+      uint32_t _used : 25;
+      uint32_t _kvo : 1;
+      uint32_t _szidx : 6;
 
-    uint64_t GetSize() {
-      return (_szidx) >= NSDictionaryNumSizeBuckets ?
-          0 : NSDictionaryCapacities[_szidx];
-    }
-  };
+      uint64_t GetSize() {
+        return (_szidx) >= NSDictionaryNumSizeBuckets ?
+            0 : NSDictionaryCapacities[_szidx];
+      }
+    };
+  }
   
   using NSDictionaryMSyntheticFrontEnd =
     GenericNSDictionaryMSyntheticFrontEnd<DataDescriptor_32, DataDescriptor_64>;

From 1254f6d5312cd4952e413cc11dd7c78a87d050b9 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sat, 20 Jun 2020 21:38:43 -0400
Subject: [PATCH 280/771] TableGen/GlobalISel: Partially fix nontrivial, custom
 predicates

Currently custom code predicates can only really be used for
contextless checks tied to a single instruction (e.g. check the def
for hasOneUse). If you do want to inspect the input instructions in
the source pattern, you cannot without re-verifying the opcode and
type checks implied by the patterns, since this check was emitted
before any operand constraints. Really, these are pattern level
predicates that implicitly depend on the instruction and operand
checks.

Introduce a filtering function so the custom predicate is emitted
last. I'm not sure this is the most elegant solution. It seems like
this is really a different thing from the InstructionMatcher/IPM_
predicate kinds. I initially tried keeping this in a separate
predicate list, but that also seemed awkward.

This only half fixes the problem I'm trying to solve. The AMDGPU
pattern I'm attempting to port also uses the PredicateCodeUsesOperands
feature to allow checks on the source operands when the input pattern
is commuted. Really the emitter should reject the pattern since it
doesn't handle this case, but at this point it would be more
productive to just implement this.
---
 llvm/test/TableGen/GlobalISelEmitter.td       |   4 +-
 .../GlobalISelEmitterCustomPredicate.td       | 148 ++++++++++++++++++
 .../GlobalISelEmitterOverloadedPtr.td         |   4 +-
 llvm/utils/TableGen/GlobalISelEmitter.cpp     |  44 +++++-
 4 files changed, 196 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td

diff --git a/llvm/test/TableGen/GlobalISelEmitter.td b/llvm/test/TableGen/GlobalISelEmitter.td
index 5c276e7a56d3c..ed7bed3f711f0 100644
--- a/llvm/test/TableGen/GlobalISelEmitter.td
+++ b/llvm/test/TableGen/GlobalISelEmitter.td
@@ -339,12 +339,10 @@ def : Pat<(select GPR32:$src1, (complex_rr GPR32:$src2a, GPR32:$src2b),
 // R21C-NEXT:  // Label [[PREV_NUM]]: @[[PREV]]
 // R21C-NEXT:  GIM_Try, /*On fail goto*//*Label [[LABEL_NUM:[0-9]+]]*/ [[LABEL:[0-9]+]], // Rule ID 21 //
 //
-// R21O-NEXT:    GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag,
 // R21O-NEXT:    GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/MyTarget::GPR32RegClassID,
 // R21O-NEXT:    GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/MyTarget::GPR32RegClassID,
 // R21N-NEXT:    GIM_CheckNumOperands, /*MI*/0, /*Expected*/4,
 // R21N-NEXT:    GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_SELECT,
-// R21N-NEXT:    GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag,
 // R21N-NEXT:    // MIs[0] dst
 // R21N-NEXT:    GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32,
 // R21N-NEXT:    GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/MyTarget::GPR32RegClassID,
@@ -354,10 +352,12 @@ def : Pat<(select GPR32:$src1, (complex_rr GPR32:$src2a, GPR32:$src2b),
 // R21N-NEXT:    // MIs[0] src2
 // R21N-NEXT:    GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32,
 //
+// R21O-NEXT:    GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag,
 // R21C-NEXT:    GIM_CheckComplexPattern, /*MI*/0, /*Op*/2, /*Renderer*/0, GICP_gi_complex,
 // R21N-NEXT:    // MIs[0] src3
 // R21N-NEXT:    GIM_CheckType, /*MI*/0, /*Op*/3, /*Type*/GILLT_s32,
 // R21C-NEXT:    GIM_CheckComplexPattern, /*MI*/0, /*Op*/3, /*Renderer*/1, GICP_gi_complex,
+// R21N-NEXT:    GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag,
 // R21C-NEXT:    // (select:{ *:[i32] } GPR32:{ *:[i32] }:$src1, complex:{ *:[i32] }:$src2, complex:{ *:[i32] }:$src3)<<P:Predicate_frag>> => (INSN2:{ *:[i32] } GPR32:{ *:[i32] }:$src1, complex:{ *:[i32] }:$src3, complex:{ *:[i32] }:$src2)
 
 // R21C-NEXT:    GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::INSN2,
diff --git a/llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td b/llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td
new file mode 100644
index 0000000000000..d985ef5da9245
--- /dev/null
+++ b/llvm/test/TableGen/GlobalISelEmitterCustomPredicate.td
@@ -0,0 +1,148 @@
+// RUN: llvm-tblgen %s -gen-global-isel -optimize-match-table=false -I %p/../../include -I %p/Common -o - | FileCheck %s
+
+include "llvm/Target/Target.td"
+include "GlobalISelEmitterCommon.td"
+
+// Boilerplate code for setting up some registers with subregs.
+class MyReg<string n, list<Register> subregs = []>
+  : Register<n> {
+  let SubRegs = subregs;
+}
+
+class MyClass<int size, list<ValueType> types, dag registers>
+  : RegisterClass<"Test", types, size, registers> {
+  let Size = size;
+}
+
+def sub0 : SubRegIndex<16>;
+def sub1 : SubRegIndex<16, 16>;
+def S0 : MyReg<"s0">;
+def S1 : MyReg<"s1">;
+def SRegs : MyClass<16, [i16], (sequence "S%u", 0, 1)>;
+
+let SubRegIndices = [sub0, sub1] in {
+def D0 : MyReg<"d0", [S0, S1]>;
+}
+
+def DRegs : MyClass<32, [i32], (sequence "D%u", 0, 0)>;
+def DOP : RegisterOperand<DRegs>;
+def AND_OR : I<(outs DRegs:$dst), (ins DOP:$src0, DOP:$src1, DOP:$src2), []>;
+
+
+def or_oneuse : PatFrag<
+  (ops node:$x, node:$y),
+  (or node:$x, node:$y), [{ return foo(); }]> {
+  let GISelPredicateCode = [{
+    return MRI.hasOneNonDBGUse(MI.getOperand(0).getReg());
+  }];
+}
+
+
+// FIXME: GISelPredicateCode ignored if DAG predicate not set.
+def and_or_pat : PatFrag<
+  (ops node:$x, node:$y, node:$z),
+  (and (or node:$x, node:$y), node:$z), [{ return foo(); }]> {
+  let GISelPredicateCode = [{
+    return doesComplexCheck(MI);
+  }];
+}
+
+// CHECK: GIM_Try, /*On fail goto*//*Label 0*/ {{[0-9]+}}, // Rule ID 1 //
+// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/3,
+// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_AND,
+// CHECK-NEXT: // MIs[0] dst
+// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/Test::DRegsRegClassID,
+// CHECK-NEXT: // MIs[0] Operand 1
+// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/1, // MIs[1]
+// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/1, /*Expected*/3,
+// CHECK-NEXT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_OR,
+// CHECK-NEXT: // MIs[1] Operand 0
+// CHECK-NEXT:GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_s32,
+// CHECK-NEXT: // MIs[1] src0
+// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/1, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/1, /*RC*/Test::DRegsRegClassID,
+// CHECK-NEXT: // MIs[1] src1
+// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/2, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/2, /*RC*/Test::DRegsRegClassID,
+// CHECK-NEXT: // MIs[0] src2
+// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/2, /*RC*/Test::DRegsRegClassID,
+// CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_and_or_pat,
+// CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1,
+// CHECK-NEXT: // (and:{ *:[i32] } (or:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1), DOP:{ *:[i32] }:$src2)<<P:Predicate_and_or_pat>>  =>  (AND_OR:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1, DOP:{ *:[i32] }:$src2)
+// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::AND_OR,
+
+
+// CHECK: GIM_Try, /*On fail goto*//*Label 1*/ {{[0-9]+}}, // Rule ID 2 //
+// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/3,
+// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_AND,
+// CHECK-NEXT: // MIs[0] dst
+// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/Test::DRegsRegClassID,
+// CHECK-NEXT: // MIs[0] src2
+// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/1, /*RC*/Test::DRegsRegClassID,
+// CHECK-NEXT: // MIs[0] Operand 2
+// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/2, // MIs[1]
+// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/1, /*Expected*/3,
+// CHECK-NEXT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_OR,
+// CHECK-NEXT: // MIs[1] Operand 0
+// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_s32,
+// CHECK-NEXT: // MIs[1] src0
+// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/1, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/1, /*RC*/Test::DRegsRegClassID,
+// CHECK-NEXT: // MIs[1] src1
+// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/2, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/1, /*Op*/2, /*RC*/Test::DRegsRegClassID,
+// CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_and_or_pat,
+// CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1,
+// CHECK-NEXT: // (and:{ *:[i32] } DOP:{ *:[i32] }:$src2, (or:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1))<<P:Predicate_and_or_pat>>  =>  (AND_OR:{ *:[i32] } DOP:{ *:[i32] }:$src0, DOP:{ *:[i32] }:$src1, DOP:{ *:[i32] }:$src2)
+// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::AND_OR,
+
+// Test commutative, standalone pattern.
+def : Pat<
+  (i32 (and_or_pat DOP:$src0, DOP:$src1, DOP:$src2)),
+  (AND_OR DOP:$src0, DOP:$src1, DOP:$src2)
+>;
+
+
+def sub3_pat : PatFrag<
+  (ops node:$x, node:$y, node:$z),
+  (sub (sub node:$x, node:$y), node:$z), [{ return foo(); }]> {
+  let GISelPredicateCode = [{
+    return doesComplexCheck(MI);
+  }];
+}
+
+// CHECK: GIM_Try, /*On fail goto*//*Label 2*/ {{[0-9]+}}, // Rule ID 0 //
+// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/0, /*Expected*/3,
+// CHECK-NEXT: GIM_CheckOpcode, /*MI*/0, TargetOpcode::G_SUB,
+// CHECK-NEXT: // MIs[0] dst
+// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/Test::DRegsRegClassID,
+// CHECK-NEXT: // MIs[0] Operand 1
+// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/1, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_RecordInsn, /*DefineMI*/1, /*MI*/0, /*OpIdx*/1, // MIs[1]
+// CHECK-NEXT: GIM_CheckNumOperands, /*MI*/1, /*Expected*/3,
+// CHECK-NEXT: GIM_CheckOpcode, /*MI*/1, TargetOpcode::G_SUB,
+// CHECK-NEXT: // MIs[1] Operand 0
+// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/0, /*Type*/GILLT_s32,
+// CHECK-NEXT: // MIs[1] src0
+// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/1, /*Type*/GILLT_s32,
+// CHECK-NEXT: // MIs[1] src1
+// CHECK-NEXT: GIM_CheckType, /*MI*/1, /*Op*/2, /*Type*/GILLT_s32,
+// CHECK-NEXT: // MIs[0] src2
+// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/2, /*Type*/GILLT_s32,
+// CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_sub3_pat,
+// CHECK-NEXT: GIM_CheckIsSafeToFold, /*InsnID*/1,
+// CHECK-NEXT: // (sub:{ *:[i32] } (sub:{ *:[i32] } i32:{ *:[i32] }:$src0, i32:{ *:[i32] }:$src1), i32:{ *:[i32] }:$src2)<<P:Predicate_sub3_pat>>  =>  (SUB3:{ *:[i32] } i32:{ *:[i32] }:$src0, i32:{ *:[i32] }:$src1, i32:{ *:[i32] }:$src2)
+// CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::SUB3,
+
+// Test a non-commutative pattern.
+def SUB3 : I<(outs DRegs:$dst),
+  (ins DOP:$src0, DOP:$src1, DOP:$src2),
+  [(set DRegs:$dst, (sub3_pat i32:$src0, i32:$src1, i32:$src2))]
+>;
diff --git a/llvm/test/TableGen/GlobalISelEmitterOverloadedPtr.td b/llvm/test/TableGen/GlobalISelEmitterOverloadedPtr.td
index 03adab086da77..047af1ca0524f 100644
--- a/llvm/test/TableGen/GlobalISelEmitterOverloadedPtr.td
+++ b/llvm/test/TableGen/GlobalISelEmitterOverloadedPtr.td
@@ -11,11 +11,13 @@ let TargetPrefix = "mytarget" in {
 // Ensure that llvm_anyptr_ty on an intrinsic results in a
 // GIM_CheckPointerToAny rather than a GIM_CheckType.
 //
-// CHECK: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag_anyptr,
+// CHECK: GIM_CheckIntrinsicID, /*MI*/0, /*Op*/1, Intrinsic::mytarget_anyptr,
+// CHECK-NEXT: GIM_CheckType, /*MI*/0, /*Op*/0, /*Type*/GILLT_s32,
 // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/0, /*RC*/MyTarget::GPR32RegClassID,
 // CHECK-NEXT: // MIs[0] src
 // CHECK-NEXT: GIM_CheckPointerToAny, /*MI*/0, /*Op*/2, /*SizeInBits*/32,
 // CHECK-NEXT: GIM_CheckRegBankForClass, /*MI*/0, /*Op*/2, /*RC*/MyTarget::GPR32RegClassID,
+// CHECK-NEXT: GIM_CheckCxxInsnPredicate, /*MI*/0, /*FnId*/GIPFP_MI_Predicate_frag_anyptr,
 // CHECK-NEXT: // (intrinsic_w_chain:{ *:[i32] } {{[0-9]+}}:{ *:[iPTR] }, GPR32:{ *:[i32] }:$src)<<P:Predicate_frag_anyptr>>  =>  (ANYLOAD:{ *:[i32] } GPR32:{ *:[i32] }:$src)
 // CHECK-NEXT: GIR_BuildMI, /*InsnID*/0, /*Opcode*/MyTarget::ANYLOAD,
 let hasSideEffects = 1 in {
diff --git a/llvm/utils/TableGen/GlobalISelEmitter.cpp b/llvm/utils/TableGen/GlobalISelEmitter.cpp
index 808ab83fd9b7a..4e8dcc52fc202 100644
--- a/llvm/utils/TableGen/GlobalISelEmitter.cpp
+++ b/llvm/utils/TableGen/GlobalISelEmitter.cpp
@@ -1043,6 +1043,28 @@ template <class PredicateTy> class PredicateListMatcher {
     for (const auto &Predicate : predicates())
       Predicate->emitPredicateOpcodes(Table, std::forward<Args>(args)...);
   }
+
+  /// Provide a function to avoid emitting certain predicates. This is used to
+  /// defer some predicate checks until after others
+  using PredicateFilterFunc = std::function<bool(const PredicateTy&)>;
+
+  /// Emit MatchTable opcodes for predicates which satisfy \p
+  /// ShouldEmitPredicate. This should be called multiple times to ensure all
+  /// predicates are eventually added to the match table.
+  template <class... Args>
+  void emitFilteredPredicateListOpcodes(PredicateFilterFunc ShouldEmitPredicate,
+                                        MatchTable &Table, Args &&... args) {
+    if (Predicates.empty() && !Optimized) {
+      Table << MatchTable::Comment(getNoPredicateComment())
+            << MatchTable::LineBreak;
+      return;
+    }
+
+    for (const auto &Predicate : predicates()) {
+      if (ShouldEmitPredicate(*Predicate))
+        Predicate->emitPredicateOpcodes(Table, std::forward<Args>(args)...);
+    }
+  }
 };
 
 class PredicateMatcher {
@@ -1100,6 +1122,13 @@ class PredicateMatcher {
 
   PredicateKind getKind() const { return Kind; }
 
+  bool dependsOnOperands() const {
+    // Custom predicates really depend on the context pattern of the
+    // instruction, not just the individual instruction. This therefore
+    // implicitly depends on all other pattern constraints.
+    return Kind == IPM_GenericPredicate;
+  }
+
   virtual bool isIdentical(const PredicateMatcher &B) const {
     return B.getKind() == getKind() && InsnVarID == B.InsnVarID &&
            OpIdx == B.OpIdx;
@@ -2127,10 +2156,23 @@ class InstructionMatcher final : public PredicateListMatcher<PredicateMatcher> {
       InstructionNumOperandsMatcher(InsnVarID, getNumOperands())
           .emitPredicateOpcodes(Table, Rule);
 
-    emitPredicateListOpcodes(Table, Rule);
+    // First emit all instruction level predicates need to be verified before we
+    // can verify operands.
+    emitFilteredPredicateListOpcodes(
+      [](const PredicateMatcher &P) {
+        return !P.dependsOnOperands();
+      }, Table, Rule);
 
+    // Emit all operand constraints.
     for (const auto &Operand : Operands)
       Operand->emitPredicateOpcodes(Table, Rule);
+
+    // All of the tablegen defined predicates should now be matched. Now emit
+    // any custom predicates that rely on all generated checks.
+    emitFilteredPredicateListOpcodes(
+      [](const PredicateMatcher &P) {
+        return P.dependsOnOperands();
+      }, Table, Rule);
   }
 
   /// Compare the priority of this object and B.

From 0a90ffa77293e8e2c99843f770cc0f2cd1d8947c Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Tue, 14 Jul 2020 14:28:34 -0400
Subject: [PATCH 281/771] [flang][openacc] OpenACC 3.0 parser

Summary:
This patch introduce the parser for OpenACC 3.0 in Flang. It uses the same TableGen mechanism
than OpenMP.

Reviewers: nvdatian, sscalpone, tskeith, klausler, ichoyjx, jdoerfert, DavidTruby

Reviewed By: klausler

Subscribers: MaskRay, SouraVX, mgorny, hiraditya, jfb, sstefan1, llvm-commits

Tags: #llvm, #flang

Differential Revision: https://reviews.llvm.org/D83649
---
 flang/include/flang/Common/Fortran-features.h |   7 +-
 flang/include/flang/Parser/dump-parse-tree.h  |  93 +++
 flang/include/flang/Parser/parse-tree.h       | 290 ++++++++-
 flang/lib/Parser/CMakeLists.txt               |   2 +
 flang/lib/Parser/executable-parsers.cpp       |   1 +
 flang/lib/Parser/openacc-parsers.cpp          | 284 ++++++++
 flang/lib/Parser/openmp-parsers.cpp           |   4 -
 flang/lib/Parser/parsing.cpp                  |   3 +
 flang/lib/Parser/program-parsers.cpp          |  16 +-
 flang/lib/Parser/stmt-parser.h                |   1 +
 flang/lib/Parser/token-parsers.h              |   4 +
 flang/lib/Parser/type-parsers.h               |   2 +
 flang/lib/Parser/unparse.cpp                  | 379 ++++++++++-
 flang/lib/Semantics/resolve-names.cpp         |   3 +-
 flang/test/Semantics/acc-validity.f90         | 169 +++++
 flang/tools/f18-parse-demo/CMakeLists.txt     |   1 +
 flang/tools/f18/CMakeLists.txt                |   3 +-
 flang/tools/f18/f18.cpp                       |   3 +
 llvm/include/llvm/CMakeLists.txt              |   2 +-
 llvm/include/llvm/Frontend/CMakeLists.txt     |   2 +
 .../llvm/Frontend/Directive/DirectiveBase.td  |   3 +
 llvm/include/llvm/Frontend/OpenACC/ACC.td     | 604 ++++++++++++++++++
 .../llvm/Frontend/OpenACC/CMakeLists.txt      |   4 +
 llvm/lib/Frontend/CMakeLists.txt              |   1 +
 llvm/lib/Frontend/OpenACC/CMakeLists.txt      |  18 +
 llvm/test/TableGen/directive1.td              |   1 +
 llvm/test/TableGen/directive2.td              |   1 +
 llvm/utils/TableGen/DirectiveEmitter.cpp      |   1 +
 28 files changed, 1884 insertions(+), 18 deletions(-)
 create mode 100644 flang/lib/Parser/openacc-parsers.cpp
 create mode 100644 flang/test/Semantics/acc-validity.f90
 create mode 100644 llvm/include/llvm/Frontend/CMakeLists.txt
 create mode 100644 llvm/include/llvm/Frontend/OpenACC/ACC.td
 create mode 100644 llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
 create mode 100644 llvm/lib/Frontend/OpenACC/CMakeLists.txt

diff --git a/flang/include/flang/Common/Fortran-features.h b/flang/include/flang/Common/Fortran-features.h
index 823fa85ad12e2..613aa69cc5d61 100644
--- a/flang/include/flang/Common/Fortran-features.h
+++ b/flang/include/flang/Common/Fortran-features.h
@@ -24,7 +24,7 @@ ENUM_CLASS(LanguageFeature, BackslashEscapes, OldDebugLines,
     OldStyleParameter, ComplexConstructor, PercentLOC, SignedPrimary, FileName,
     Convert, Dispose, IOListLeadingComma, AbbreviatedEditDescriptor,
     ProgramParentheses, PercentRefAndVal, OmitFunctionDummies, CrayPointer,
-    Hollerith, ArithmeticIF, Assign, AssignedGOTO, Pause, OpenMP,
+    Hollerith, ArithmeticIF, Assign, AssignedGOTO, Pause, OpenACC, OpenMP,
     CruftAfterAmpersand, ClassicCComments, AdditionalFormats, BigIntLiterals,
     RealDoControls, EquivalenceNumericWithCharacter, AdditionalIntrinsics,
     AnonymousParents, OldLabelDoEndStatements, LogicalIntegerAssignment,
@@ -37,6 +37,7 @@ class LanguageFeatureControl {
   LanguageFeatureControl() {
     // These features must be explicitly enabled by command line options.
     disable_.set(LanguageFeature::OldDebugLines);
+    disable_.set(LanguageFeature::OpenACC);
     disable_.set(LanguageFeature::OpenMP);
     // These features, if enabled, conflict with valid standard usage,
     // so there are disabled here by default.
@@ -50,7 +51,9 @@ class LanguageFeatureControl {
   void WarnOnAllNonstandard(bool yes = true) { warnAll_ = yes; }
   bool IsEnabled(LanguageFeature f) const { return !disable_.test(f); }
   bool ShouldWarn(LanguageFeature f) const {
-    return (warnAll_ && f != LanguageFeature::OpenMP) || warn_.test(f);
+    return (warnAll_ && f != LanguageFeature::OpenMP &&
+               f != LanguageFeature::OpenACC) ||
+        warn_.test(f);
   }
   // Return all spellings of operators names, depending on features enabled
   std::vector<const char *> GetNames(LogicalOperator) const;
diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 59333c7405ffa..36e593eb3b781 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -53,6 +53,88 @@ class ParseTreeDumper {
   NODE(format, IntrinsicTypeDataEditDesc)
   NODE(format::IntrinsicTypeDataEditDesc, Kind)
   NODE(parser, Abstract)
+  NODE(parser, AccAtomicCapture)
+  NODE(AccAtomicCapture, Stmt1)
+  NODE(AccAtomicCapture, Stmt2)
+  NODE(parser, AccAtomicRead)
+  NODE(parser, AccAtomicUpdate)
+  NODE(parser, AccAtomicWrite)
+  NODE(parser, AccBeginBlockDirective)
+  NODE(parser, AccBeginCombinedDirective)
+  NODE(parser, AccBeginLoopDirective)
+  NODE(parser, AccBlockDirective)
+  NODE(parser, AccClause)
+  NODE(AccClause, Auto)
+  NODE(AccClause, Async)
+  NODE(AccClause, Attach)
+  NODE(AccClause, Bind)
+  NODE(AccClause, Capture)
+  NODE(AccClause, Collapse)
+  NODE(AccClause, Copy)
+  NODE(AccClause, Copyin)
+  NODE(AccClause, Copyout)
+  NODE(AccClause, Create)
+  NODE(AccClause, Default)
+  NODE(AccClause, DefaultAsync)
+  NODE(AccClause, Delete)
+  NODE(AccClause, Detach)
+  NODE(AccClause, Device)
+  NODE(AccClause, DeviceNum)
+  NODE(AccClause, DevicePtr)
+  NODE(AccClause, DeviceResident)
+  NODE(AccClause, DeviceType)
+  NODE(AccClause, Finalize)
+  NODE(AccClause, FirstPrivate)
+  NODE(AccClause, Gang)
+  NODE(AccClause, Host)
+  NODE(AccClause, If)
+  NODE(AccClause, IfPresent)
+  NODE(AccClause, Independent)
+  NODE(AccClause, Link)
+  NODE(AccClause, NoCreate)
+  NODE(AccClause, NoHost)
+  NODE(AccClause, NumGangs)
+  NODE(AccClause, NumWorkers)
+  NODE(AccClause, Present)
+  NODE(AccClause, Private)
+  NODE(AccClause, Tile)
+  NODE(AccClause, UseDevice)
+  NODE(AccClause, Read)
+  NODE(AccClause, Reduction)
+  NODE(AccClause, Self)
+  NODE(AccClause, Seq)
+  NODE(AccClause, Vector)
+  NODE(AccClause, VectorLength)
+  NODE(AccClause, Wait)
+  NODE(AccClause, Worker)
+  NODE(AccClause, Write)
+  NODE(AccClause, Unknown)
+  NODE(parser, AccDefaultClause)
+  NODE_ENUM(parser::AccDefaultClause, Arg)
+  NODE(parser, AccClauseList)
+  NODE(parser, AccCombinedDirective)
+  NODE(parser, AccDataModifier)
+  NODE_ENUM(parser::AccDataModifier, Modifier)
+  NODE(parser, AccDeclarativeDirective)
+  NODE(parser, AccEndAtomic)
+  NODE(parser, AccEndBlockDirective)
+  NODE(parser, AccEndCombinedDirective)
+  NODE(parser, AccGangArgument)
+  NODE(parser, AccObject)
+  NODE(parser, AccObjectList)
+  NODE(parser, AccObjectListWithModifier)
+  NODE(parser, AccObjectListWithReduction)
+  NODE(parser, AccReductionOperator)
+  NODE(parser, AccSizeExpr)
+  NODE(parser, AccSizeExprList)
+  NODE(parser, AccStandaloneDirective)
+  NODE(parser, AccLoopDirective)
+  NODE(parser, AccWaitArgument)
+  static std::string GetNodeName(const llvm::acc::Directive &x) {
+    return llvm::Twine(
+        "llvm::acc::Directive = ", llvm::acc::getOpenACCDirectiveName(x))
+        .str();
+  }
   NODE(parser, AcImpliedDo)
   NODE(parser, AcImpliedDoControl)
   NODE(parser, AcValue)
@@ -510,6 +592,17 @@ class ParseTreeDumper {
   NODE(parser, OmpSectionsDirective)
   NODE(parser, OmpSimpleStandaloneDirective)
   NODE(parser, Only)
+  NODE(parser, OpenACCAtomicConstruct)
+  NODE(parser, OpenACCBlockConstruct)
+  NODE(parser, OpenACCCacheConstruct)
+  NODE(parser, OpenACCCombinedConstruct)
+  NODE(parser, OpenACCConstruct)
+  NODE(parser, OpenACCDeclarativeConstruct)
+  NODE(parser, OpenACCLoopConstruct)
+  NODE(parser, OpenACCRoutineConstruct)
+  NODE(parser, OpenACCStandaloneDeclarativeConstruct)
+  NODE(parser, OpenACCStandaloneConstruct)
+  NODE(parser, OpenACCWaitConstruct)
   NODE(parser, OpenMPAtomicConstruct)
   NODE(parser, OpenMPBlockConstruct)
   NODE(parser, OpenMPCancelConstruct)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index 67fd5741b0975..d9ecebfc3fdda 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -25,6 +25,7 @@
 #include "flang/Common/Fortran.h"
 #include "flang/Common/idioms.h"
 #include "flang/Common/indirection.h"
+#include "llvm/Frontend/OpenACC/ACC.h.inc"
 #include "llvm/Frontend/OpenMP/OMPConstants.h"
 #include <cinttypes>
 #include <list>
@@ -256,6 +257,8 @@ struct ArithmeticIfStmt;
 struct AssignStmt;
 struct AssignedGotoStmt;
 struct PauseStmt;
+struct OpenACCConstruct;
+struct OpenACCDeclarativeConstruct;
 struct OpenMPConstruct;
 struct OpenMPDeclarativeConstruct;
 struct OmpEndLoopDirective;
@@ -386,6 +389,7 @@ struct SpecificationConstruct {
       Statement<OtherSpecificationStmt>,
       Statement<common::Indirection<TypeDeclarationStmt>>,
       common::Indirection<StructureDef>,
+      common::Indirection<OpenACCDeclarativeConstruct>,
       common::Indirection<OpenMPDeclarativeConstruct>,
       common::Indirection<CompilerDirective>>
       u;
@@ -424,7 +428,8 @@ struct DeclarationConstruct {
 // from the implicit part to the declaration constructs
 struct SpecificationPart {
   TUPLE_CLASS_BOILERPLATE(SpecificationPart);
-  std::tuple<std::list<OpenMPDeclarativeConstruct>,
+  std::tuple<std::list<OpenACCDeclarativeConstruct>,
+      std::list<OpenMPDeclarativeConstruct>,
       std::list<Statement<common::Indirection<UseStmt>>>,
       std::list<Statement<common::Indirection<ImportStmt>>>, ImplicitPart,
       std::list<DeclarationConstruct>>
@@ -509,6 +514,7 @@ struct ExecutableConstruct {
       common::Indirection<SelectTypeConstruct>,
       common::Indirection<WhereConstruct>, common::Indirection<ForallConstruct>,
       common::Indirection<CompilerDirective>,
+      common::Indirection<OpenACCConstruct>,
       common::Indirection<OpenMPConstruct>,
       common::Indirection<OmpEndLoopDirective>>
       u;
@@ -3789,5 +3795,287 @@ struct OpenMPConstruct {
       OpenMPCriticalConstruct>
       u;
 };
+
+// Parse tree nodes for OpenACC 3.0 directives and clauses
+
+struct AccObject {
+  UNION_CLASS_BOILERPLATE(AccObject);
+  std::variant<Designator, /*common block*/ Name> u;
+};
+
+WRAPPER_CLASS(AccObjectList, std::list<AccObject>);
+
+// OpenACC directive beginning or ending a block
+struct AccBlockDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccBlockDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+struct AccLoopDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccLoopDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+struct AccStandaloneDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccStandaloneDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+// 2.11 Combined constructs
+struct AccCombinedDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccCombinedDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+struct AccDeclarativeDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccDeclarativeDirective, llvm::acc::Directive);
+  CharBlock source;
+};
+
+// OpenACC Clauses
+struct AccDefaultClause {
+  ENUM_CLASS(Arg, None, Present)
+  WRAPPER_CLASS_BOILERPLATE(AccDefaultClause, Arg);
+  CharBlock source;
+};
+
+struct AccDataModifier {
+  ENUM_CLASS(Modifier, ReadOnly, Zero)
+  WRAPPER_CLASS_BOILERPLATE(AccDataModifier, Modifier);
+  CharBlock source;
+};
+
+struct AccObjectListWithModifier {
+  TUPLE_CLASS_BOILERPLATE(AccObjectListWithModifier);
+  std::tuple<std::optional<AccDataModifier>, AccObjectList> t;
+};
+
+// 2.5.13: + | * | max | min | iand | ior | ieor | .and. | .or. | .eqv. | .neqv.
+struct AccReductionOperator {
+  UNION_CLASS_BOILERPLATE(AccReductionOperator);
+  std::variant<DefinedOperator, ProcedureDesignator> u;
+};
+
+struct AccObjectListWithReduction {
+  TUPLE_CLASS_BOILERPLATE(AccObjectListWithReduction);
+  std::tuple<AccReductionOperator, AccObjectList> t;
+};
+
+struct AccWaitArgument {
+  TUPLE_CLASS_BOILERPLATE(AccWaitArgument);
+  std::tuple<std::optional<ScalarIntExpr>, std::list<ScalarIntExpr>> t;
+};
+
+struct AccSizeExpr {
+  TUPLE_CLASS_BOILERPLATE(AccSizeExpr);
+  CharBlock source;
+  std::tuple<std::optional<ScalarIntExpr>> t; // if null then *
+};
+
+struct AccSizeExprList {
+  WRAPPER_CLASS_BOILERPLATE(AccSizeExprList, std::list<AccSizeExpr>);
+};
+
+struct AccGangArgument {
+  TUPLE_CLASS_BOILERPLATE(AccGangArgument);
+  std::tuple<std::optional<ScalarIntExpr>, std::optional<AccSizeExpr>> t;
+};
+
+struct AccClause {
+  UNION_CLASS_BOILERPLATE(AccClause);
+
+  EMPTY_CLASS(Auto);
+  WRAPPER_CLASS(Async, std::optional<ScalarIntExpr>);
+  WRAPPER_CLASS(Attach, AccObjectList);
+  WRAPPER_CLASS(Bind, Name);
+  EMPTY_CLASS(Capture);
+  WRAPPER_CLASS(Collapse, ScalarIntConstantExpr);
+  WRAPPER_CLASS(Copy, AccObjectList);
+  WRAPPER_CLASS(Copyin, AccObjectListWithModifier);
+  WRAPPER_CLASS(Copyout, AccObjectListWithModifier);
+  WRAPPER_CLASS(Create, AccObjectListWithModifier);
+  WRAPPER_CLASS(Default, AccDefaultClause);
+  WRAPPER_CLASS(DefaultAsync, ScalarIntExpr);
+  WRAPPER_CLASS(Delete, AccObjectList);
+  WRAPPER_CLASS(Detach, AccObjectList);
+  WRAPPER_CLASS(Device, AccObjectList);
+  WRAPPER_CLASS(DeviceNum, ScalarIntConstantExpr);
+  WRAPPER_CLASS(DevicePtr, AccObjectList);
+  WRAPPER_CLASS(DeviceResident, AccObjectList);
+  WRAPPER_CLASS(DeviceType, std::optional<std::list<Name>>);
+  EMPTY_CLASS(Finalize);
+  WRAPPER_CLASS(FirstPrivate, AccObjectList);
+  WRAPPER_CLASS(Gang, std::optional<AccGangArgument>);
+  WRAPPER_CLASS(Host, AccObjectList);
+  WRAPPER_CLASS(If, ScalarLogicalExpr);
+  EMPTY_CLASS(IfPresent);
+  EMPTY_CLASS(Independent);
+  WRAPPER_CLASS(Link, AccObjectList);
+  WRAPPER_CLASS(NoCreate, AccObjectList);
+  EMPTY_CLASS(NoHost);
+  WRAPPER_CLASS(NumGangs, ScalarIntExpr);
+  WRAPPER_CLASS(NumWorkers, ScalarIntExpr);
+  WRAPPER_CLASS(Present, AccObjectList);
+  WRAPPER_CLASS(Private, AccObjectList);
+  WRAPPER_CLASS(Tile, AccSizeExprList);
+  WRAPPER_CLASS(UseDevice, AccObjectList);
+  EMPTY_CLASS(Read);
+  WRAPPER_CLASS(Reduction, AccObjectListWithReduction);
+  WRAPPER_CLASS(Self, std::optional<ScalarLogicalExpr>);
+  EMPTY_CLASS(Seq);
+  WRAPPER_CLASS(Vector, std::optional<ScalarIntExpr>);
+  WRAPPER_CLASS(VectorLength, ScalarIntExpr);
+  WRAPPER_CLASS(Wait, std::optional<AccWaitArgument>);
+  WRAPPER_CLASS(Worker, std::optional<ScalarIntExpr>);
+  EMPTY_CLASS(Write);
+  EMPTY_CLASS(Unknown);
+
+  CharBlock source;
+
+  std::variant<Auto, Async, Attach, Bind, Capture, Collapse, Copy, Copyin,
+      Copyout, Create, Default, DefaultAsync, Delete, Detach, Device, DeviceNum,
+      DevicePtr, DeviceResident, DeviceType, Finalize, FirstPrivate, Gang, Host,
+      If, IfPresent, Independent, Link, NoCreate, NoHost, NumGangs, NumWorkers,
+      Present, Private, Tile, UseDevice, Read, Reduction, Self, Seq, Vector,
+      VectorLength, Wait, Worker, Write, Unknown>
+      u;
+};
+
+struct AccClauseList {
+  WRAPPER_CLASS_BOILERPLATE(AccClauseList, std::list<AccClause>);
+  CharBlock source;
+};
+
+struct OpenACCRoutineConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCRoutineConstruct);
+  CharBlock source;
+  std::tuple<Verbatim, std::optional<Name>, AccClauseList> t;
+};
+
+struct OpenACCCacheConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCCacheConstruct);
+  CharBlock source;
+  std::tuple<Verbatim, AccObjectListWithModifier> t;
+};
+
+struct OpenACCWaitConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCWaitConstruct);
+  CharBlock source;
+  std::tuple<Verbatim, std::optional<AccWaitArgument>, AccClauseList> t;
+};
+
+struct AccBeginLoopDirective {
+  TUPLE_CLASS_BOILERPLATE(AccBeginLoopDirective);
+  std::tuple<AccLoopDirective, AccClauseList> t;
+  CharBlock source;
+};
+
+struct AccBeginBlockDirective {
+  TUPLE_CLASS_BOILERPLATE(AccBeginBlockDirective);
+  CharBlock source;
+  std::tuple<AccBlockDirective, AccClauseList> t;
+};
+
+struct AccEndBlockDirective {
+  CharBlock source;
+  WRAPPER_CLASS_BOILERPLATE(AccEndBlockDirective, AccBlockDirective);
+};
+
+// ACC END ATOMIC
+EMPTY_CLASS(AccEndAtomic);
+
+// ACC ATOMIC READ
+struct AccAtomicRead {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicRead);
+  std::tuple<Verbatim, Statement<AssignmentStmt>, std::optional<AccEndAtomic>>
+      t;
+};
+
+// ACC ATOMIC WRITE
+struct AccAtomicWrite {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicWrite);
+  std::tuple<Verbatim, Statement<AssignmentStmt>, std::optional<AccEndAtomic>>
+      t;
+};
+
+// ACC ATOMIC UPDATE
+struct AccAtomicUpdate {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicUpdate);
+  std::tuple<std::optional<Verbatim>, Statement<AssignmentStmt>,
+      std::optional<AccEndAtomic>>
+      t;
+};
+
+// ACC ATOMIC CAPTURE
+struct AccAtomicCapture {
+  TUPLE_CLASS_BOILERPLATE(AccAtomicCapture);
+  WRAPPER_CLASS(Stmt1, Statement<AssignmentStmt>);
+  WRAPPER_CLASS(Stmt2, Statement<AssignmentStmt>);
+  std::tuple<Verbatim, Stmt1, Stmt2, AccEndAtomic> t;
+};
+
+struct OpenACCAtomicConstruct {
+  UNION_CLASS_BOILERPLATE(OpenACCAtomicConstruct);
+  std::variant<AccAtomicRead, AccAtomicWrite, AccAtomicCapture, AccAtomicUpdate>
+      u;
+};
+
+struct OpenACCBlockConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCBlockConstruct);
+  std::tuple<AccBeginBlockDirective, Block, AccEndBlockDirective> t;
+};
+
+struct OpenACCStandaloneDeclarativeConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCStandaloneDeclarativeConstruct);
+  CharBlock source;
+  std::tuple<AccDeclarativeDirective, AccClauseList> t;
+};
+
+struct AccBeginCombinedDirective {
+  TUPLE_CLASS_BOILERPLATE(AccBeginCombinedDirective);
+  std::tuple<AccCombinedDirective, AccClauseList> t;
+};
+
+struct AccEndCombinedDirective {
+  WRAPPER_CLASS_BOILERPLATE(AccEndCombinedDirective, AccCombinedDirective);
+  CharBlock source;
+};
+
+struct OpenACCCombinedConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCCombinedConstruct);
+  CharBlock source;
+  std::tuple<AccBeginCombinedDirective, Block,
+      std::optional<AccEndCombinedDirective>>
+      t;
+};
+
+struct OpenACCDeclarativeConstruct {
+  UNION_CLASS_BOILERPLATE(OpenACCDeclarativeConstruct);
+  CharBlock source;
+  std::variant<OpenACCStandaloneDeclarativeConstruct> u;
+};
+
+// OpenACC directives enclosing do loop
+struct OpenACCLoopConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCLoopConstruct);
+  OpenACCLoopConstruct(AccBeginLoopDirective &&a)
+      : t({std::move(a), std::nullopt}) {}
+  std::tuple<AccBeginLoopDirective, std::optional<DoConstruct>> t;
+};
+
+struct OpenACCStandaloneConstruct {
+  TUPLE_CLASS_BOILERPLATE(OpenACCStandaloneConstruct);
+  CharBlock source;
+  std::tuple<AccStandaloneDirective, AccClauseList> t;
+};
+
+struct OpenACCConstruct {
+  UNION_CLASS_BOILERPLATE(OpenACCConstruct);
+  std::variant<OpenACCBlockConstruct, OpenACCCombinedConstruct,
+      OpenACCLoopConstruct, OpenACCStandaloneConstruct, OpenACCRoutineConstruct,
+      OpenACCCacheConstruct, OpenACCWaitConstruct, OpenACCAtomicConstruct>
+      u;
+};
+
 } // namespace Fortran::parser
 #endif // FORTRAN_PARSER_PARSE_TREE_H_
diff --git a/flang/lib/Parser/CMakeLists.txt b/flang/lib/Parser/CMakeLists.txt
index eb5126e1b937e..e1e77ac6e92df 100644
--- a/flang/lib/Parser/CMakeLists.txt
+++ b/flang/lib/Parser/CMakeLists.txt
@@ -11,6 +11,7 @@ add_flang_library(FortranParser
   instrumented-parser.cpp
   io-parsers.cpp
   message.cpp
+  openacc-parsers.cpp
   openmp-parsers.cpp
   parse-tree.cpp
   parsing.cpp
@@ -32,4 +33,5 @@ add_flang_library(FortranParser
 
   DEPENDS
   omp_gen
+  acc_gen
 )
diff --git a/flang/lib/Parser/executable-parsers.cpp b/flang/lib/Parser/executable-parsers.cpp
index 160b2dc376a48..d6dd4688dbac1 100644
--- a/flang/lib/Parser/executable-parsers.cpp
+++ b/flang/lib/Parser/executable-parsers.cpp
@@ -50,6 +50,7 @@ constexpr auto executableConstruct{
         construct<ExecutableConstruct>(indirect(whereConstruct)),
         construct<ExecutableConstruct>(indirect(forallConstruct)),
         construct<ExecutableConstruct>(indirect(ompEndLoopDirective)),
+        construct<ExecutableConstruct>(indirect(openaccConstruct)),
         construct<ExecutableConstruct>(indirect(openmpConstruct)),
         construct<ExecutableConstruct>(indirect(compilerDirective)))};
 
diff --git a/flang/lib/Parser/openacc-parsers.cpp b/flang/lib/Parser/openacc-parsers.cpp
new file mode 100644
index 0000000000000..a2ab628c0993a
--- /dev/null
+++ b/flang/lib/Parser/openacc-parsers.cpp
@@ -0,0 +1,284 @@
+//===-- lib/Parser/openacc-parsers.cpp ------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// Top-level grammar specification for OpenACC 3.0.
+
+#include "basic-parsers.h"
+#include "expr-parsers.h"
+#include "misc-parsers.h"
+#include "stmt-parser.h"
+#include "token-parsers.h"
+#include "type-parser-implementation.h"
+#include "flang/Parser/parse-tree.h"
+
+// OpenACC Directives and Clauses
+namespace Fortran::parser {
+
+constexpr auto startAccLine = skipStuffBeforeStatement >> "!$ACC "_sptok;
+constexpr auto endAccLine = space >> endOfLine;
+
+// Basic clauses
+TYPE_PARSER("AUTO" >> construct<AccClause>(construct<AccClause::Auto>()) ||
+    "ASYNC" >> construct<AccClause>(construct<AccClause::Async>(
+                   maybe(parenthesized(scalarIntExpr)))) ||
+    "ATTACH" >> construct<AccClause>(construct<AccClause::Attach>(
+                    parenthesized(Parser<AccObjectList>{}))) ||
+    "BIND" >>
+        construct<AccClause>(construct<AccClause::Bind>(parenthesized(name))) ||
+    "CAPTURE" >> construct<AccClause>(construct<AccClause::Capture>()) ||
+    "COLLAPSE" >> construct<AccClause>(construct<AccClause::Collapse>(
+                      parenthesized(scalarIntConstantExpr))) ||
+    ("COPY"_tok || "PRESENT_OR_COPY"_tok || "PCOPY"_tok) >>
+        construct<AccClause>(construct<AccClause::Copy>(
+            parenthesized(Parser<AccObjectList>{}))) ||
+    ("COPYIN"_tok || "PRESENT_OR_COPYIN"_tok || "PCOPYIN"_tok) >>
+        construct<AccClause>(construct<AccClause::Copyin>(
+            parenthesized(Parser<AccObjectListWithModifier>{}))) ||
+    ("COPYOUT"_tok || "PRESENT_OR_COPYOUT"_tok || "PCOPYOUT"_tok) >>
+        construct<AccClause>(construct<AccClause::Copyout>(
+            parenthesized(Parser<AccObjectListWithModifier>{}))) ||
+    ("CREATE"_tok || "PRESENT_OR_CREATE"_tok || "PCREATE"_tok) >>
+        construct<AccClause>(construct<AccClause::Create>(
+            parenthesized(Parser<AccObjectListWithModifier>{}))) ||
+    "DEFAULT" >> construct<AccClause>(construct<AccClause::Default>(
+                     Parser<AccDefaultClause>{})) ||
+    "DEFAULT_ASYNC" >> construct<AccClause>(construct<AccClause::DefaultAsync>(
+                           parenthesized(scalarIntExpr))) ||
+    "DELETE" >> construct<AccClause>(construct<AccClause::Delete>(
+                    parenthesized(Parser<AccObjectList>{}))) ||
+    "DETACH" >> construct<AccClause>(construct<AccClause::Detach>(
+                    parenthesized(Parser<AccObjectList>{}))) ||
+    "DEVICE" >> construct<AccClause>(construct<AccClause::Device>(
+                    parenthesized(Parser<AccObjectList>{}))) ||
+    "DEVICEPTR" >> construct<AccClause>(construct<AccClause::DevicePtr>(
+                       parenthesized(Parser<AccObjectList>{}))) ||
+    "DEVICENUM" >> construct<AccClause>(construct<AccClause::DeviceNum>(
+                       parenthesized(scalarIntConstantExpr))) ||
+    "DEVICE_RESIDENT" >>
+        construct<AccClause>(construct<AccClause::DeviceResident>(
+            parenthesized(Parser<AccObjectList>{}))) ||
+    ("DEVICE_TYPE"_tok || "DTYPE"_tok) >>
+        construct<AccClause>(construct<AccClause::DeviceType>(parenthesized(
+            "*" >> construct<std::optional<std::list<Name>>>()))) ||
+    ("DEVICE_TYPE"_tok || "DTYPE"_tok) >>
+        construct<AccClause>(construct<AccClause::DeviceType>(
+            parenthesized(maybe(nonemptyList(name))))) ||
+    "FINALIZE" >> construct<AccClause>(construct<AccClause::Finalize>()) ||
+    "FIRSTPRIVATE" >> construct<AccClause>(construct<AccClause::FirstPrivate>(
+                          parenthesized(Parser<AccObjectList>{}))) ||
+    "GANG" >> construct<AccClause>(construct<AccClause::Gang>(
+                  maybe(parenthesized(Parser<AccGangArgument>{})))) ||
+    "HOST" >> construct<AccClause>(construct<AccClause::Host>(
+                  parenthesized(Parser<AccObjectList>{}))) ||
+    "IF" >> construct<AccClause>(
+                construct<AccClause::If>(parenthesized(scalarLogicalExpr))) ||
+    "IF_PRESENT" >> construct<AccClause>(construct<AccClause::IfPresent>()) ||
+    "INDEPENDENT" >>
+        construct<AccClause>(construct<AccClause::Independent>()) ||
+    "LINK" >> construct<AccClause>(construct<AccClause::Link>(
+                  parenthesized(Parser<AccObjectList>{}))) ||
+    "NO_CREATE" >> construct<AccClause>(construct<AccClause::NoCreate>(
+                       parenthesized(Parser<AccObjectList>{}))) ||
+    "NOHOST" >> construct<AccClause>(construct<AccClause::NoHost>()) ||
+    "NUM_GANGS" >> construct<AccClause>(construct<AccClause::NumGangs>(
+                       parenthesized(scalarIntExpr))) ||
+    "NUM_WORKERS" >> construct<AccClause>(construct<AccClause::NumWorkers>(
+                         parenthesized(scalarIntExpr))) ||
+    "PRESENT" >> construct<AccClause>(construct<AccClause::Present>(
+                     parenthesized(Parser<AccObjectList>{}))) ||
+    "PRIVATE" >> construct<AccClause>(construct<AccClause::Private>(
+                     parenthesized(Parser<AccObjectList>{}))) ||
+    "READ" >> construct<AccClause>(construct<AccClause::Read>()) ||
+    "REDUCTION" >> construct<AccClause>(construct<AccClause::Reduction>(
+                       parenthesized(construct<AccObjectListWithReduction>(
+                           Parser<AccReductionOperator>{} / ":",
+                           Parser<AccObjectList>{})))) ||
+    "SELF" >> construct<AccClause>(construct<AccClause::Self>(
+                  maybe(parenthesized(scalarLogicalExpr)))) ||
+    "SEQ" >> construct<AccClause>(construct<AccClause::Seq>()) ||
+    "TILE" >> construct<AccClause>(construct<AccClause::Tile>(
+                  parenthesized(Parser<AccSizeExprList>{}))) ||
+    "USE_DEVICE" >> construct<AccClause>(construct<AccClause::UseDevice>(
+                        parenthesized(Parser<AccObjectList>{}))) ||
+    "VECTOR_LENGTH" >> construct<AccClause>(construct<AccClause::VectorLength>(
+                           parenthesized(scalarIntExpr))) ||
+    "VECTOR" >>
+        construct<AccClause>(construct<AccClause::Vector>(maybe(
+            parenthesized(("LENGTH:" >> scalarIntExpr || scalarIntExpr))))) ||
+    "WAIT" >> construct<AccClause>(construct<AccClause::Wait>(
+                  maybe(Parser<AccWaitArgument>{}))) ||
+    "WORKER" >>
+        construct<AccClause>(construct<AccClause::Worker>(maybe(
+            parenthesized(("NUM:" >> scalarIntExpr || scalarIntExpr))))) ||
+    "WRITE" >> construct<AccClause>(construct<AccClause::Auto>()))
+
+TYPE_PARSER(
+    construct<AccObject>(designator) || construct<AccObject>("/" >> name / "/"))
+
+TYPE_PARSER(construct<AccObjectList>(nonemptyList(Parser<AccObject>{})))
+
+TYPE_PARSER(construct<AccObjectListWithModifier>(
+    maybe(Parser<AccDataModifier>{}), Parser<AccObjectList>{}))
+
+TYPE_PARSER(construct<AccWaitArgument>(
+    maybe("DEVNUM:" >> scalarIntExpr / ":"), nonemptyList(scalarIntExpr)))
+
+// 2.9 (1609) size-expr is one of:
+//   int-expr
+TYPE_PARSER(construct<AccSizeExpr>(scalarIntExpr) ||
+    construct<AccSizeExpr>("*" >> maybe(scalarIntExpr)))
+TYPE_PARSER(construct<AccSizeExprList>(nonemptyList(Parser<AccSizeExpr>{})))
+
+// 2.9 (1607) gang-arg is one of:
+//   [num:]int-expr
+//   static:size-expr
+TYPE_PARSER(construct<AccGangArgument>(maybe(scalarIntExpr),
+                maybe(","_tok / "STATIC:" >> Parser<AccSizeExpr>{})) ||
+    construct<AccGangArgument>(maybe("NUM:" >> scalarIntExpr),
+        maybe(","_tok / "STATIC:" >> Parser<AccSizeExpr>{})))
+
+// 2.5.13 Reduction
+TYPE_PARSER(construct<AccReductionOperator>(Parser<DefinedOperator>{}) ||
+    construct<AccReductionOperator>(Parser<ProcedureDesignator>{}))
+
+// 2.5.14 Default clause
+TYPE_PARSER(construct<AccDefaultClause>(
+    parenthesized(first("NONE" >> pure(AccDefaultClause::Arg::None),
+        "PRESENT" >> pure(AccDefaultClause::Arg::Present)))))
+
+// Modifier for copyin, copyout, cache and create
+TYPE_PARSER(construct<AccDataModifier>(
+    first("ZERO:" >> pure(AccDataModifier::Modifier::Zero),
+        "READONLY:" >> pure(AccDataModifier::Modifier::ReadOnly))))
+
+// Combined directives
+TYPE_PARSER(sourced(construct<AccCombinedDirective>(
+    first("KERNELS LOOP" >> pure(llvm::acc::Directive::ACCD_kernels_loop),
+        "PARALLEL LOOP" >> pure(llvm::acc::Directive::ACCD_parallel_loop),
+        "SERIAL LOOP" >> pure(llvm::acc::Directive::ACCD_serial_loop)))))
+
+// Block directives
+TYPE_PARSER(sourced(construct<AccBlockDirective>(
+    first("DATA" >> pure(llvm::acc::Directive::ACCD_data),
+        "HOST_DATA" >> pure(llvm::acc::Directive::ACCD_host_data),
+        "KERNELS" >> pure(llvm::acc::Directive::ACCD_kernels),
+        "PARALLEL" >> pure(llvm::acc::Directive::ACCD_parallel),
+        "SERIAL" >> pure(llvm::acc::Directive::ACCD_serial)))))
+
+// Standalone directives
+TYPE_PARSER(sourced(construct<AccStandaloneDirective>(
+    first("ENTER DATA" >> pure(llvm::acc::Directive::ACCD_enter_data),
+        "EXIT DATA" >> pure(llvm::acc::Directive::ACCD_exit_data),
+        "INIT" >> pure(llvm::acc::Directive::ACCD_init),
+        "SHUTDOWN" >> pure(llvm::acc::Directive::ACCD_shutdown),
+        "SET" >> pure(llvm::acc::Directive::ACCD_set),
+        "UPDATE" >> pure(llvm::acc::Directive::ACCD_update)))))
+
+// Loop directives
+TYPE_PARSER(sourced(construct<AccLoopDirective>(
+    first("LOOP" >> pure(llvm::acc::Directive::ACCD_loop)))))
+
+TYPE_PARSER(construct<AccBeginLoopDirective>(
+    sourced(Parser<AccLoopDirective>{}), Parser<AccClauseList>{}))
+
+TYPE_PARSER(
+    construct<OpenACCLoopConstruct>(sourced(Parser<AccBeginLoopDirective>{})))
+
+// 2.15.1 Routine directive
+TYPE_PARSER(sourced(construct<OpenACCRoutineConstruct>(verbatim("ROUTINE"_tok),
+    maybe(parenthesized(name)), Parser<AccClauseList>{})))
+
+// 2.10 Cache directive
+TYPE_PARSER(sourced(
+    construct<OpenACCCacheConstruct>(sourced(construct<Verbatim>("CACHE"_tok)),
+        parenthesized(Parser<AccObjectListWithModifier>{}))))
+
+// 2.11 Combined constructs
+TYPE_PARSER(startAccLine >> construct<AccEndCombinedDirective>(sourced(
+                                "END"_tok >> Parser<AccCombinedDirective>{})))
+
+TYPE_PARSER(construct<AccBeginCombinedDirective>(
+    sourced(Parser<AccCombinedDirective>{}), Parser<AccClauseList>{}))
+
+TYPE_PARSER(construct<OpenACCCombinedConstruct>(
+    Parser<AccBeginCombinedDirective>{} / endAccLine, block,
+    maybe(Parser<AccEndCombinedDirective>{} / endAccLine)))
+
+// 2.12 Atomic constructs
+TYPE_PARSER(construct<AccEndAtomic>(startAccLine >> "END ATOMIC"_tok))
+
+TYPE_PARSER("ATOMIC" >>
+    construct<AccAtomicRead>(verbatim("READ"_tok) / endAccLine,
+        statement(assignmentStmt), maybe(Parser<AccEndAtomic>{} / endAccLine)))
+
+TYPE_PARSER("ATOMIC" >>
+    construct<AccAtomicWrite>(verbatim("WRITE"_tok) / endAccLine,
+        statement(assignmentStmt), maybe(Parser<AccEndAtomic>{} / endAccLine)))
+
+TYPE_PARSER("ATOMIC" >>
+    construct<AccAtomicUpdate>(maybe(verbatim("UPDATE"_tok)) / endAccLine,
+        statement(assignmentStmt), maybe(Parser<AccEndAtomic>{} / endAccLine)))
+
+TYPE_PARSER("ATOMIC" >>
+    construct<AccAtomicCapture>(verbatim("CAPTURE"_tok) / endAccLine,
+        statement(assignmentStmt), statement(assignmentStmt),
+        Parser<AccEndAtomic>{} / endAccLine))
+
+TYPE_PARSER(construct<OpenACCAtomicConstruct>(Parser<AccAtomicRead>{}) ||
+    construct<OpenACCAtomicConstruct>(Parser<AccAtomicCapture>{}) ||
+    construct<OpenACCAtomicConstruct>(Parser<AccAtomicWrite>{}) ||
+    construct<OpenACCAtomicConstruct>(Parser<AccAtomicUpdate>{}))
+
+// 2.13 Declare constructs
+TYPE_PARSER(sourced(construct<AccDeclarativeDirective>(
+    first("DECLARE" >> pure(llvm::acc::Directive::ACCD_declare)))))
+
+// [Clause, [Clause], ...]
+TYPE_PARSER(sourced(construct<AccClauseList>(
+    many(maybe(","_tok) >> sourced(Parser<AccClause>{})))))
+
+// 2.16.3 Wait directive
+TYPE_PARSER(sourced(construct<OpenACCWaitConstruct>(
+    sourced(construct<Verbatim>("WAIT"_tok)),
+    maybe(parenthesized(Parser<AccWaitArgument>{})), Parser<AccClauseList>{})))
+
+// Block Constructs
+TYPE_PARSER(sourced(construct<AccBeginBlockDirective>(
+    sourced(Parser<AccBlockDirective>{}), Parser<AccClauseList>{})))
+
+TYPE_PARSER(startAccLine >> sourced(construct<AccEndBlockDirective>("END"_tok >>
+                                sourced(Parser<AccBlockDirective>{}))))
+
+TYPE_PARSER(construct<OpenACCBlockConstruct>(
+    Parser<AccBeginBlockDirective>{} / endAccLine, block,
+    Parser<AccEndBlockDirective>{} / endAccLine))
+
+// Standalone constructs
+TYPE_PARSER(construct<OpenACCStandaloneConstruct>(
+    sourced(Parser<AccStandaloneDirective>{}), Parser<AccClauseList>{}))
+
+// Standalone declarative constructs
+TYPE_PARSER(construct<OpenACCStandaloneDeclarativeConstruct>(
+    sourced(Parser<AccDeclarativeDirective>{}), Parser<AccClauseList>{}))
+
+TYPE_PARSER(
+    startAccLine >> sourced(construct<OpenACCDeclarativeConstruct>(
+                        Parser<OpenACCStandaloneDeclarativeConstruct>{})))
+
+// OpenACC constructs
+TYPE_CONTEXT_PARSER("OpenACC construct"_en_US,
+    startAccLine >>
+        first(construct<OpenACCConstruct>(Parser<OpenACCBlockConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCCombinedConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCLoopConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCStandaloneConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCRoutineConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCCacheConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCWaitConstruct>{}),
+            construct<OpenACCConstruct>(Parser<OpenACCAtomicConstruct>{})))
+} // namespace Fortran::parser
diff --git a/flang/lib/Parser/openmp-parsers.cpp b/flang/lib/Parser/openmp-parsers.cpp
index 41a97ff902d97..a09a5554116fb 100644
--- a/flang/lib/Parser/openmp-parsers.cpp
+++ b/flang/lib/Parser/openmp-parsers.cpp
@@ -23,10 +23,6 @@ namespace Fortran::parser {
 constexpr auto startOmpLine = skipStuffBeforeStatement >> "!$OMP "_sptok;
 constexpr auto endOmpLine = space >> endOfLine;
 
-template <typename A> constexpr decltype(auto) verbatim(A x) {
-  return sourced(construct<Verbatim>(x));
-}
-
 // OpenMP Clauses
 // 2.15.3.1 DEFAULT (PRIVATE | FIRSTPRIVATE | SHARED | NONE)
 TYPE_PARSER(construct<OmpDefaultClause>(
diff --git a/flang/lib/Parser/parsing.cpp b/flang/lib/Parser/parsing.cpp
index 5e12b5545f0ac..d7a7d107878d3 100644
--- a/flang/lib/Parser/parsing.cpp
+++ b/flang/lib/Parser/parsing.cpp
@@ -67,6 +67,9 @@ const SourceFile *Parsing::Prescan(const std::string &path, Options options) {
   prescanner.set_fixedForm(options.isFixedForm)
       .set_fixedFormColumnLimit(options.fixedFormColumns)
       .AddCompilerDirectiveSentinel("dir$");
+  if (options.features.IsEnabled(LanguageFeature::OpenACC)) {
+    prescanner.AddCompilerDirectiveSentinel("$acc");
+  }
   if (options.features.IsEnabled(LanguageFeature::OpenMP)) {
     prescanner.AddCompilerDirectiveSentinel("$omp");
     prescanner.AddCompilerDirectiveSentinel("$"); // OMP conditional line
diff --git a/flang/lib/Parser/program-parsers.cpp b/flang/lib/Parser/program-parsers.cpp
index fc2c7c324eb60..9e18c458ea3cb 100644
--- a/flang/lib/Parser/program-parsers.cpp
+++ b/flang/lib/Parser/program-parsers.cpp
@@ -60,7 +60,8 @@ TYPE_PARSER(construct<ProgramUnit>(indirect(Parser<Module>{})) ||
 //         [use-stmt]... [import-stmt]... [implicit-part]
 //         [declaration-construct]...
 TYPE_CONTEXT_PARSER("specification part"_en_US,
-    construct<SpecificationPart>(many(openmpDeclarativeConstruct),
+    construct<SpecificationPart>(many(openaccDeclarativeConstruct),
+        many(openmpDeclarativeConstruct),
         many(statement(indirect(Parser<UseStmt>{}))),
         many(unambiguousStatement(indirect(Parser<ImportStmt>{}))),
         implicitPart, many(declarationConstruct)))
@@ -75,10 +76,10 @@ TYPE_CONTEXT_PARSER("specification part"_en_US,
 // are in contexts that impose constraints on the kinds of statements that
 // are allowed, and so we have a variant production for declaration-construct
 // that implements those constraints.
-constexpr auto execPartLookAhead{
-    first(actionStmt >> ok, ompEndLoopDirective >> ok, openmpConstruct >> ok,
-        "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok,
-        "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)};
+constexpr auto execPartLookAhead{first(actionStmt >> ok,
+    ompEndLoopDirective >> ok, openaccConstruct >> ok, openmpConstruct >> ok,
+    "ASSOCIATE ("_tok, "BLOCK"_tok, "SELECT"_tok, "CHANGE TEAM"_sptok,
+    "CRITICAL"_tok, "DO"_tok, "IF ("_tok, "WHERE ("_tok, "FORALL ("_tok)};
 constexpr auto declErrorRecovery{
     stmtErrorRecoveryStart >> !execPartLookAhead >> skipStmtErrorRecovery};
 constexpr auto misplacedSpecificationStmt{Parser<UseStmt>{} >>
@@ -126,7 +127,8 @@ constexpr auto limitedDeclarationConstruct{recovery(
 // specialized error recovery in the event of a spurious executable
 // statement.
 constexpr auto limitedSpecificationPart{inContext("specification part"_en_US,
-    construct<SpecificationPart>(many(openmpDeclarativeConstruct),
+    construct<SpecificationPart>(many(openaccDeclarativeConstruct),
+        many(openmpDeclarativeConstruct),
         many(statement(indirect(Parser<UseStmt>{}))),
         many(unambiguousStatement(indirect(Parser<ImportStmt>{}))),
         implicitPart, many(limitedDeclarationConstruct)))};
@@ -151,6 +153,8 @@ TYPE_CONTEXT_PARSER("specification construct"_en_US,
         construct<SpecificationConstruct>(
             statement(indirect(typeDeclarationStmt))),
         construct<SpecificationConstruct>(indirect(Parser<StructureDef>{})),
+        construct<SpecificationConstruct>(
+            indirect(openaccDeclarativeConstruct)),
         construct<SpecificationConstruct>(indirect(openmpDeclarativeConstruct)),
         construct<SpecificationConstruct>(indirect(compilerDirective))))
 
diff --git a/flang/lib/Parser/stmt-parser.h b/flang/lib/Parser/stmt-parser.h
index 7dcc1f4620a9d..cd1c69beedd4a 100644
--- a/flang/lib/Parser/stmt-parser.h
+++ b/flang/lib/Parser/stmt-parser.h
@@ -80,6 +80,7 @@ constexpr auto skipBadLine{SkipPast<'\n'>{} >> construct<ErrorRecovery>()};
 constexpr auto executionPartErrorRecovery{stmtErrorRecoveryStart >>
     !"END"_tok >> !"CONTAINS"_tok >> !"ELSE"_tok >> !"CASE"_tok >>
     !"TYPE IS"_tok >> !"CLASS"_tok >> !"RANK"_tok >>
+    !("!$ACC "_sptok >> "END"_tok) >>
     !("!$OMP "_sptok >> ("END"_tok || "SECTION"_id)) >> skipBadLine};
 
 // END statement error recovery
diff --git a/flang/lib/Parser/token-parsers.h b/flang/lib/Parser/token-parsers.h
index fe43182e386f7..2ad89053fc65b 100644
--- a/flang/lib/Parser/token-parsers.h
+++ b/flang/lib/Parser/token-parsers.h
@@ -664,5 +664,9 @@ constexpr auto logicalFALSE{
 constexpr auto rawHollerithLiteral{
     deprecated<LanguageFeature::Hollerith>(HollerithLiteral{})};
 
+template <typename A> constexpr decltype(auto) verbatim(A x) {
+  return sourced(construct<Verbatim>(x));
+}
+
 } // namespace Fortran::parser
 #endif // FORTRAN_PARSER_TOKEN_PARSERS_H_
diff --git a/flang/lib/Parser/type-parsers.h b/flang/lib/Parser/type-parsers.h
index c7a1bce781fff..a2f38e90db212 100644
--- a/flang/lib/Parser/type-parsers.h
+++ b/flang/lib/Parser/type-parsers.h
@@ -130,6 +130,8 @@ constexpr Parser<EndSubroutineStmt> endSubroutineStmt; // R1537
 constexpr Parser<EntryStmt> entryStmt; // R1541
 constexpr Parser<ContainsStmt> containsStmt; // R1543
 constexpr Parser<CompilerDirective> compilerDirective;
+constexpr Parser<OpenACCConstruct> openaccConstruct;
+constexpr Parser<OpenACCDeclarativeConstruct> openaccDeclarativeConstruct;
 constexpr Parser<OpenMPConstruct> openmpConstruct;
 constexpr Parser<OpenMPDeclarativeConstruct> openmpDeclarativeConstruct;
 constexpr Parser<OmpEndLoopDirective> ompEndLoopDirective;
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 09acaaa37076c..99792cbf706fa 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -1777,6 +1777,375 @@ class UnparseVisitor {
     }
     Walk(std::get<Name>(x.t));
   }
+
+  // OpenACC Directives & Clauses
+  void Unparse(const AccAtomicCapture &x) {
+    BeginOpenACC();
+    Word("!$ACC CAPTURE");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<AccAtomicCapture::Stmt1>(x.t));
+    Put("\n");
+    Walk(std::get<AccAtomicCapture::Stmt2>(x.t));
+    BeginOpenACC();
+    Word("!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccAtomicRead &x) {
+    BeginOpenACC();
+    Word("!$ACC ATOMIC READ");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<Statement<AssignmentStmt>>(x.t));
+    BeginOpenACC();
+    Walk(std::get<std::optional<AccEndAtomic>>(x.t), "!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccAtomicWrite &x) {
+    BeginOpenACC();
+    Word("!$ACC ATOMIC WRITE");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<Statement<AssignmentStmt>>(x.t));
+    BeginOpenACC();
+    Walk(std::get<std::optional<AccEndAtomic>>(x.t), "!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccAtomicUpdate &x) {
+    BeginOpenACC();
+    Word("!$ACC ATOMIC UPDATE");
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<Statement<AssignmentStmt>>(x.t));
+    BeginOpenACC();
+    Walk(std::get<std::optional<AccEndAtomic>>(x.t), "!$ACC END ATOMIC\n");
+    EndOpenACC();
+  }
+  void Unparse(const llvm::acc::Directive &x) {
+    Word(llvm::acc::getOpenACCDirectiveName(x).str());
+  }
+  void Before(const AccClause::Auto &) { Word("AUTO"); }
+  void Before(const AccClause::Capture &) { Word("CAPTURE"); }
+  void Before(const AccClause::Finalize &) { Word("FINALIZE"); }
+  void Before(const AccClause::IfPresent &) { Word("IF_PRESENT"); }
+  void Before(const AccClause::Independent &) { Word("INDEPENDENT"); }
+  void Before(const AccClause::NoHost &) { Word("NOHOST"); }
+  void Before(const AccClause::Read &) { Word("READ"); }
+  void Before(const AccClause::Seq &) { Word("SEQ"); }
+  void Before(const AccClause::Write &) { Word("WRITE"); }
+  void Before(const AccClause::Unknown &) { Word("UNKNOWN"); }
+  void Unparse(const AccClause::Attach &x) {
+    Word("ATTACH");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Bind &x) {
+    Word("BIND");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Collapse &x) {
+    Word("COLLAPSE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Copy &x) {
+    Word("COPY");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Copyin &x) {
+    Word("COPYIN");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Copyout &x) {
+    Word("COPYOUT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Create &x) {
+    Word("CREATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Default &x) {
+    Word("DEFAULT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Delete &x) {
+    Word("DELETE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Detach &x) {
+    Word("DETACH");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Device &x) {
+    Word("DEVICE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::DevicePtr &x) {
+    Word("DEVICEPTR");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::DeviceResident &x) {
+    Word("DEVICE_RESIDENT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::FirstPrivate &x) {
+    Word("FIRSTPRIVATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Host &x) {
+    Word("HOST");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::If &x) {
+    Word("IF");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Link &x) {
+    Word("LINK");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::NumGangs &x) {
+    Word("NUM_GANGS");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::NumWorkers &x) {
+    Word("NUM_WORKERS");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Present &x) {
+    Word("PRESENT");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Private &x) {
+    Word("PRIVATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Reduction &x) {
+    Word("REDUCTION");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::VectorLength &x) {
+    Word("VECTOR_LENGTH");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Async &x) {
+    Word("ASYNC");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::DefaultAsync &x) {
+    Word("DEFAULT_ASYNC");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::DeviceNum &x) {
+    Word("DEVICE_NUM");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Gang &x) {
+    Word("GANG");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::NoCreate &x) {
+    Word("NO_CREATE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::UseDevice &x) {
+    Word("USE_DEVICE");
+    Put("(");
+    Walk(x.v);
+    Put(")");
+  }
+  void Unparse(const AccClause::Self &x) {
+    Word("SELF");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::Vector &x) {
+    Word("VECTOR");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::Wait &x) {
+    Word("WAIT");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::Worker &x) {
+    Word("WORKER");
+    Walk("(", x.v, ")");
+  }
+  void Unparse(const AccClause::DeviceType &x) {
+    Word("DEVICE_TYPE");
+    Put("(");
+    if (x.v.has_value())
+      Walk(x.v);
+    else
+      Put("*");
+    Put(")");
+  }
+  void Unparse(const AccObjectListWithModifier &x) {
+    Walk(std::get<std::optional<AccDataModifier>>(x.t), ":");
+    Walk(std::get<AccObjectList>(x.t));
+  }
+  void Unparse(const AccDataModifier::Modifier &x) {
+    Word(AccDataModifier::EnumToString(x));
+  }
+  void Unparse(const AccDefaultClause &x) {
+    switch (x.v) {
+    case AccDefaultClause::Arg::None:
+      Put("NONE");
+      break;
+    case AccDefaultClause::Arg::Present:
+      Put("PRESENT");
+      break;
+    }
+  }
+  void Unparse(const AccClauseList &x) { Walk(" ", x.v, " "); }
+  void Unparse(const AccGangArgument &x) {
+    Walk("NUM:", std::get<std::optional<ScalarIntExpr>>(x.t));
+    Walk(", STATIC:", std::get<std::optional<AccSizeExpr>>(x.t));
+  }
+  void Unparse(const OpenACCBlockConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get<AccBeginBlockDirective>(x.t));
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<Block>(x.t), "");
+    BeginOpenACC();
+    Word("!$ACC END ");
+    Walk(std::get<AccEndBlockDirective>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCLoopConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get<AccBeginLoopDirective>(x.t));
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<std::optional<DoConstruct>>(x.t));
+  }
+  void Unparse(const AccBeginLoopDirective &x) {
+    Walk(std::get<AccLoopDirective>(x.t));
+    Walk(std::get<AccClauseList>(x.t));
+  }
+  void Unparse(const OpenACCStandaloneConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get<AccStandaloneDirective>(x.t));
+    Walk(std::get<AccClauseList>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCStandaloneDeclarativeConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get<AccDeclarativeDirective>(x.t));
+    Walk(std::get<AccClauseList>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCCombinedConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Walk(std::get<AccBeginCombinedDirective>(x.t));
+    Put("\n");
+    EndOpenACC();
+    Walk(std::get<Block>(x.t), "");
+    BeginOpenACC();
+    Word("!$ACC END ");
+    Walk(std::get<std::optional<AccEndCombinedDirective>>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCRoutineConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ROUTINE");
+    Walk("(", std::get<std::optional<Name>>(x.t), ")");
+    Walk(std::get<AccClauseList>(x.t));
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const AccObject &x) {
+    std::visit(common::visitors{
+                   [&](const Designator &y) { Walk(y); },
+                   [&](const Name &y) { Put("/"), Walk(y), Put("/"); },
+               },
+        x.u);
+  }
+  void Unparse(const AccObjectList &x) { Walk(x.v, ","); }
+  void Unparse(const AccObjectListWithReduction &x) {
+    Walk(std::get<AccReductionOperator>(x.t));
+    Put(":");
+    Walk(std::get<AccObjectList>(x.t));
+  }
+  void Unparse(const OpenACCCacheConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Word("CACHE(");
+    Walk(std::get<AccObjectListWithModifier>(x.t));
+    Put(")");
+    Put("\n");
+    EndOpenACC();
+  }
+  void Unparse(const OpenACCWaitConstruct &x) {
+    BeginOpenACC();
+    Word("!$ACC ");
+    Word("WAIT(");
+    Walk(std::get<std::optional<AccWaitArgument>>(x.t));
+    Walk(std::get<AccClauseList>(x.t));
+    Put(")");
+    Put("\n");
+    EndOpenACC();
+  }
+
   // OpenMP Clauses & Directives
   void Unparse(const OmpObject &x) {
     std::visit(common::visitors{
@@ -2522,6 +2891,8 @@ class UnparseVisitor {
   }
   void BeginOpenMP() { openmpDirective_ = true; }
   void EndOpenMP() { openmpDirective_ = false; }
+  void BeginOpenACC() { openaccDirective_ = true; }
+  void EndOpenACC() { openaccDirective_ = false; }
 
   // Call back to the traversal framework.
   template <typename T> void Walk(const T &x) {
@@ -2591,6 +2962,7 @@ class UnparseVisitor {
   std::set<CharBlock> structureComponents_;
   Encoding encoding_{Encoding::UTF_8};
   bool capitalizeKeywords_{true};
+  bool openaccDirective_{false};
   bool openmpDirective_{false};
   bool backslashEscapes_{false};
   preStatementType *preStatement_{nullptr};
@@ -2599,7 +2971,7 @@ class UnparseVisitor {
 
 void UnparseVisitor::Put(char ch) {
   int sav = indent_;
-  if (openmpDirective_) {
+  if (openmpDirective_ || openaccDirective_) {
     indent_ = 0;
   }
   if (column_ <= 1) {
@@ -2620,13 +2992,16 @@ void UnparseVisitor::Put(char ch) {
     if (openmpDirective_) {
       out_ << "!$OMP&";
       column_ = 8;
+    } else if (openaccDirective_) {
+      out_ << "!$ACC&";
+      column_ = 8;
     } else {
       out_ << '&';
       column_ = indent_ + 3;
     }
   }
   out_ << ch;
-  if (openmpDirective_) {
+  if (openmpDirective_ || openaccDirective_) {
     indent_ = sav;
   }
 }
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index 4d70f03dd5532..bd566408cd2ce 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -6010,7 +6010,8 @@ bool ResolveNamesVisitor::Pre(const parser::SpecificationPart &x) {
   Walk(std::get<1>(x.t));
   Walk(std::get<2>(x.t));
   Walk(std::get<3>(x.t));
-  const std::list<parser::DeclarationConstruct> &decls{std::get<4>(x.t)};
+  Walk(std::get<4>(x.t));
+  const std::list<parser::DeclarationConstruct> &decls{std::get<5>(x.t)};
   for (const auto &decl : decls) {
     if (const auto *spec{
             std::get_if<parser::SpecificationConstruct>(&decl.u)}) {
diff --git a/flang/test/Semantics/acc-validity.f90 b/flang/test/Semantics/acc-validity.f90
new file mode 100644
index 0000000000000..88f62a84d1610
--- /dev/null
+++ b/flang/test/Semantics/acc-validity.f90
@@ -0,0 +1,169 @@
+! RUN: %S/test_errors.sh %s %t %f18 -fopenacc
+
+! Check OpenACC clause validity for the following construct and directive:
+!   2.6.5 Data
+!   2.5.1 Parallel
+!   2.5.2 Kernels
+!   2.5.3 Serial
+!   2.15.1 Routine
+!   2.11 Parallel Loop
+!   2.11 Kernels Loop
+!   2.11 Serial Loop
+
+program openacc_clause_validity
+
+  implicit none
+
+  integer :: i, j
+  integer :: N = 256
+
+  !$acc declare
+  real(8) :: a(256)
+
+  !$acc enter data
+
+  !$acc enter data copyin(zero: i)
+
+  !$acc enter data create(readonly: i)
+
+  !$acc data copyout(readonly: i)
+  !$acc end data
+
+  !$acc enter data copyin(i) copyout(i)
+
+  !$acc data copy(i) if(.true.) if(.true.)
+  !$acc end data
+
+  !$acc exit data
+
+  !$acc host_data
+  !$acc end host_data
+
+  !$acc set
+
+  !$acc data
+  !$acc end data
+
+  !$acc data copyin(i)
+  !$acc end data
+
+  !$acc data copyin(i)
+
+  !$acc end parallel
+
+  !$acc update device(i) device_type(*) async
+
+
+  !$acc update device(i) device_type(*) if(.TRUE.)
+
+  !$acc parallel
+
+  !$acc loop seq independent
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !$acc parallel device_type(*) num_gangs(2)
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+
+  !$acc loop collapse(-1)
+  do i = 1, N
+    do j = 1, N
+      a(i) = 3.14 + j
+    end do
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+
+  !$acc loop device_type(*) private(i)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+
+  !$acc loop gang seq
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+
+  !$acc parallel device_type(*) if(.TRUE.)
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel
+
+
+  !$acc parallel loop device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end parallel loop
+
+  !$acc kernels device_type(*) async
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end kernels
+
+
+  !$acc kernels device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end kernels
+
+
+  !$acc kernels loop device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end kernels loop
+
+  !$acc serial device_type(*) async
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end serial
+
+
+  !$acc serial device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end serial
+
+
+  !$acc serial loop device_type(*) if(.TRUE.)
+  do i = 1, N
+    a(i) = 3.14
+  end do
+  !$acc end serial loop
+
+ contains
+
+   subroutine sub1(a)
+     real :: a(:)
+
+     !$acc routine
+   end subroutine sub1
+
+   subroutine sub2(a)
+     real :: a(:)
+
+     !$acc routine seq device_type(*) nohost
+   end subroutine sub2
+
+end program openacc_clause_validity
\ No newline at end of file
diff --git a/flang/tools/f18-parse-demo/CMakeLists.txt b/flang/tools/f18-parse-demo/CMakeLists.txt
index 465873ca00ff6..a89e8ae8816cd 100644
--- a/flang/tools/f18-parse-demo/CMakeLists.txt
+++ b/flang/tools/f18-parse-demo/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  FrontendOpenACC
   FrontendOpenMP
   )
 
diff --git a/flang/tools/f18/CMakeLists.txt b/flang/tools/f18/CMakeLists.txt
index 8738561fe45e7..46c38fa43a2e5 100644
--- a/flang/tools/f18/CMakeLists.txt
+++ b/flang/tools/f18/CMakeLists.txt
@@ -1,4 +1,5 @@
 set(LLVM_LINK_COMPONENTS
+  FrontendOpenACC
   FrontendOpenMP
   Support
   )
@@ -59,7 +60,7 @@ install(TARGETS f18 DESTINATION bin)
 
 set(FLANG_INTRINSIC_MODULES_DIR ${FLANG_BINARY_DIR}/include/flang)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${CMAKE_BINARY_DIR}/tools/flang/bin/flang @ONLY)
-file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE) 
+file(COPY ${CMAKE_BINARY_DIR}/tools/flang/bin/flang DESTINATION ${CMAKE_BINARY_DIR}/bin FILE_PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE)
 # The flang script to be installed needs a different path to the headers.
 set(FLANG_INTRINSIC_MODULES_DIR ${CMAKE_INSTALL_PREFIX}/include/flang)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/flang.sh.in ${FLANG_BINARY_DIR}/bin/flang-install.sh @ONLY)
diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp
index 05766a9c6a6dc..5f6070a0fe34b 100644
--- a/flang/tools/f18/f18.cpp
+++ b/flang/tools/f18/f18.cpp
@@ -468,6 +468,9 @@ int main(int argc, char *const argv[]) {
     } else if (arg == "-Mstandard" || arg == "-std=f95" ||
         arg == "-std=f2003" || arg == "-std=f2008" || arg == "-std=legacy") {
       driver.warnOnNonstandardUsage = true;
+    } else if (arg == "-fopenacc") {
+      options.features.Enable(Fortran::common::LanguageFeature::OpenACC);
+      options.predefinitions.emplace_back("_OPENACC", "201911");
     } else if (arg == "-fopenmp") {
       options.features.Enable(Fortran::common::LanguageFeature::OpenMP);
       options.predefinitions.emplace_back("_OPENMP", "201511");
diff --git a/llvm/include/llvm/CMakeLists.txt b/llvm/include/llvm/CMakeLists.txt
index 7cf8699aa21e4..b46319f24fc8e 100644
--- a/llvm/include/llvm/CMakeLists.txt
+++ b/llvm/include/llvm/CMakeLists.txt
@@ -1,6 +1,6 @@
 add_subdirectory(IR)
 add_subdirectory(Support)
-add_subdirectory(Frontend/OpenMP)
+add_subdirectory(Frontend)
 
 # If we're doing an out-of-tree build, copy a module map for generated
 # header files into the build area.
diff --git a/llvm/include/llvm/Frontend/CMakeLists.txt b/llvm/include/llvm/Frontend/CMakeLists.txt
new file mode 100644
index 0000000000000..ea66917b8936a
--- /dev/null
+++ b/llvm/include/llvm/Frontend/CMakeLists.txt
@@ -0,0 +1,2 @@
+add_subdirectory(OpenACC)
+add_subdirectory(OpenMP)
diff --git a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
index 3c295a1d7c5f3..26049ca60db39 100644
--- a/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
+++ b/llvm/include/llvm/Frontend/Directive/DirectiveBase.td
@@ -59,6 +59,9 @@ class Clause<string c> {
   // Optional class holding value of the clause in clang AST.
   string clangClass = ?;
 
+  // Optional class holding value of the clause in flang AST.
+  string flangClass = ?;
+
   // Is clause implicit? If clause is set as implicit, the default kind will
   // be return in get<LanguageName>ClauseKind instead of their own kind.
   bit isImplicit = 0;
diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td
new file mode 100644
index 0000000000000..0bc0f2481db5f
--- /dev/null
+++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td
@@ -0,0 +1,604 @@
+//===-- ACC.td - OpenACC directive definition file ---------*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This is the definition file for OpenACC directives and clauses.
+//
+//===----------------------------------------------------------------------===//
+
+include "llvm/Frontend/Directive/DirectiveBase.td"
+
+//===----------------------------------------------------------------------===//
+// Definition of general OpenACC information
+//===----------------------------------------------------------------------===//
+
+def OpenACC : DirectiveLanguage {
+  let name = "OpenACC";
+  let cppNamespace = "acc"; // final namespace will be llvm::acc
+  let directivePrefix = "ACCD_";
+  let clausePrefix = "ACCC_";
+  let makeEnumAvailableInNamespace = 1;
+  let enableBitmaskEnumInNamespace = 1;
+  let includeHeader = "llvm/Frontend/OpenACC/ACC.h.inc";
+  let clauseEnumSetClass = "AccClauseSet";
+}
+
+//===----------------------------------------------------------------------===//
+// Definition of OpenACC clauses
+//===----------------------------------------------------------------------===//
+
+// 2.9.6
+def ACCC_Auto : Clause<"auto"> {}
+
+// 2.16.1
+def ACCC_Async : Clause<"async"> {
+  let flangClass = "std::optional<ScalarIntExpr>";
+}
+
+// 2.7.11
+def ACCC_Attach : Clause<"attach"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.15.1
+def ACCC_Bind : Clause<"bind"> {
+  let flangClass = "Name";
+}
+
+// 2.12
+def ACCC_Capture : Clause<"capture"> {
+}
+
+// 2.9.1
+def ACCC_Collapse : Clause<"collapse"> {
+  let flangClass = "ScalarIntConstantExpr";
+}
+
+// 2.7.5
+def ACCC_Copy : Clause<"copy"> {
+  let flangClass = "AccObjectList";
+}
+// 2.7.6
+def ACCC_Copyin : Clause<"copyin"> {
+  let flangClass = "AccObjectListWithModifier";
+}
+
+// 2.7.7
+def ACCC_Copyout : Clause<"copyout"> {
+  let flangClass = "AccObjectListWithModifier";
+}
+
+// 2.7.8
+def ACCC_Create : Clause<"create"> {
+  let flangClass = "AccObjectListWithModifier";
+}
+
+// 2.5.14
+def ACCC_Default : Clause<"default"> {
+  let flangClass = "AccDefaultClause";
+}
+
+// 2.4.12
+def ACCC_DefaultAsync : Clause<"default_async"> {
+  let flangClass = "ScalarIntExpr";
+}
+
+// 2.7.10
+def ACCC_Delete : Clause<"delete"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.7.12
+def ACCC_Detach : Clause<"detach"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.14.4
+def ACCC_Device : Clause<"device"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.14.1
+def ACCC_DeviceNum : Clause<"devicenum">  {
+  let flangClass = "ScalarIntConstantExpr";
+}
+
+// 2.7.3
+def ACCC_DevicePtr : Clause<"deviceptr"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.13
+def ACCC_DeviceResident : Clause<"device_resident"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.4
+def ACCC_DeviceType : Clause<"device_type"> {
+  // (DeviceType, "*"
+  let flangClass = "std::optional<std::list<Name>>";
+}
+
+// 2.6.6
+def ACCC_Finalize : Clause<"finalize"> {}
+
+// 2.5.12
+def ACCC_FirstPrivate : Clause<"firstprivate"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.9.2
+def ACCC_Gang : Clause<"gang"> {
+  let flangClass = "std::optional<AccGangArgument>";
+}
+
+// 2.14.4
+def ACCC_Host : Clause<"host"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.5.4
+def ACCC_If : Clause <"if"> {
+  let flangClass = "ScalarLogicalExpr";
+}
+
+// 2.14.4
+def ACCC_IfPresent : Clause<"if_present"> {}
+
+// 2.9.9
+def ACCC_Independent : Clause<"independent"> {}
+
+// 2.13
+def ACCC_Link : Clause<"link"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.7.9
+def ACCC_NoCreate : Clause<"no_create"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.15.1
+def ACCC_NoHost : Clause<"nohost"> {}
+
+// 2.5.8
+def ACCC_NumGangs : Clause<"num_gangs"> {
+  let flangClass = "ScalarIntExpr";
+}
+
+// 2.5.9
+def ACCC_NumWorkers : Clause<"num_workers"> {
+  let flangClass = "ScalarIntExpr";
+}
+
+// 2.7.4
+def ACCC_Present : Clause<"present"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.5.11
+def ACCC_Private : Clause<"private"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.9.7
+def ACCC_Tile : Clause <"tile"> {
+  let flangClass = "AccSizeExprList";
+}
+
+// 2.8.1
+def ACCC_UseDevice : Clause <"use_device"> {
+  let flangClass = "AccObjectList";
+}
+
+// 2.12
+def ACCC_Read : Clause<"read"> {}
+
+// 2.5.13
+def ACCC_Reduction : Clause<"reduction"> {
+  let flangClass = "AccObjectListWithReduction";
+}
+
+// 2.5.5
+def ACCC_Self : Clause<"self"> {
+  let flangClass = "std::optional<ScalarLogicalExpr>";
+}
+
+// 2.9.5
+def ACCC_Seq : Clause<"seq"> {}
+
+// 2.9.4
+def ACCC_Vector : Clause<"vector"> {
+  let flangClass = "std::optional<ScalarIntExpr>";
+}
+
+// 2.5.10
+def ACCC_VectorLength : Clause<"vector_length"> {
+  let flangClass = "ScalarIntExpr";
+}
+
+// 2.16.2
+def ACCC_Wait : Clause<"wait"> {
+  let flangClass = "std::optional<AccWaitArgument>";
+}
+
+// 2.9.3
+def ACCC_Worker: Clause<"worker"> {
+  let flangClass = "std::optional<ScalarIntExpr>";
+}
+
+// 2.12
+def ACCC_Write : Clause<"write"> {}
+
+def ACCC_Unknown : Clause<"unknown"> {
+  let isDefault = 1;
+}
+
+//===----------------------------------------------------------------------===//
+// Definition of OpenACC directives
+//===----------------------------------------------------------------------===//
+
+// 2.12
+def ACC_Atomic : Directive<"atomic"> {}
+
+// 2.6.5
+def ACC_Data : Directive<"data"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_If>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>
+  ];
+}
+
+// 2.13
+def ACC_Declare : Directive<"declare"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_DeviceResident>,
+    VersionedClause<ACCC_Link>
+  ];
+}
+
+// 2.5.2
+def ACC_Kernels : Directive<"kernels"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_DevicePtr>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_NumGangs>,
+    VersionedClause<ACCC_NumWorkers>,
+    VersionedClause<ACCC_Self>,
+    VersionedClause<ACCC_VectorLength>,
+    VersionedClause<ACCC_Wait>
+  ];
+}
+
+// 2.5.1
+def ACC_Parallel : Directive<"parallel"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_Private>,
+    VersionedClause<ACCC_FirstPrivate>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_NumGangs>,
+    VersionedClause<ACCC_NumWorkers>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Self>,
+    VersionedClause<ACCC_VectorLength>
+  ];
+}
+
+// 2.5.3
+def ACC_Serial : Directive<"serial"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_FirstPrivate>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_Private>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Self>
+  ];
+}
+
+// 2.9
+def ACC_Loop : Directive<"loop"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_Private>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Collapse>,
+    VersionedClause<ACCC_Gang>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Tile>,
+    VersionedClause<ACCC_Vector>,
+    VersionedClause<ACCC_Worker>
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<ACCC_Auto>,
+    VersionedClause<ACCC_Independent>,
+    VersionedClause<ACCC_Seq>
+  ];
+}
+
+// 2.10
+def ACC_Cache : Directive<"cache"> {}
+
+// 2.14.1
+def ACC_Init : Directive<"init"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_DeviceNum>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_If>
+  ];
+}
+
+// 2.15.1
+def ACC_Routine : Directive<"routine"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Bind>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_NoHost>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_Gang>,
+    VersionedClause<ACCC_Seq>,
+    VersionedClause<ACCC_Vector>,
+    VersionedClause<ACCC_Worker>
+  ];
+}
+
+// 2.14.3
+def ACC_Set : Directive<"set"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_If>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_DefaultAsync>,
+    VersionedClause<ACCC_DeviceNum>,
+    VersionedClause<ACCC_DeviceType>
+  ];
+}
+
+// 2.14.2
+def ACC_Shutdown : Directive<"shutdown"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_DeviceNum>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_If>
+  ];
+}
+
+// 2.14.4
+def ACC_Update : Directive<"update"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_IfPresent>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_Device>,
+    VersionedClause<ACCC_Host>,
+    VersionedClause<ACCC_Self>
+  ];
+}
+
+// 2.16.3
+def ACC_Wait : Directive<"wait"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_If>
+  ];
+}
+
+// 2.14.6
+def ACC_EnterData : Directive<"enter data"> {
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_Copyin>
+  ];
+}
+
+// 2.14.7
+def ACC_ExitData : Directive<"exit data"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_Wait>,
+    VersionedClause<ACCC_Finalize>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Delete>,
+    VersionedClause<ACCC_Detach>
+  ];
+}
+def ACC_HostData : Directive<"host_data"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_IfPresent>
+  ];
+  let requiredClauses = [
+    VersionedClause<ACCC_UseDevice>
+  ];
+}
+
+// 2.11
+def ACC_KernelsLoop : Directive<"kernels loop"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_Private>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_Attach>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Collapse>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_Gang>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_Independent>,
+    VersionedClause<ACCC_NumGangs>,
+    VersionedClause<ACCC_NumWorkers>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Self>,
+    VersionedClause<ACCC_Tile>,
+    VersionedClause<ACCC_Vector>,
+    VersionedClause<ACCC_VectorLength>,
+    VersionedClause<ACCC_Wait>,
+    VersionedClause<ACCC_Worker>
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<ACCC_Auto>,
+    VersionedClause<ACCC_Independent>,
+    VersionedClause<ACCC_Seq>
+  ];
+}
+
+// 2.11
+def ACC_ParallelLoop : Directive<"parallel loop"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_FirstPrivate>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_Private>,
+    VersionedClause<ACCC_Tile>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Collapse>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_Gang>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_NumGangs>,
+    VersionedClause<ACCC_NumWorkers>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Self>,
+    VersionedClause<ACCC_Vector>,
+    VersionedClause<ACCC_VectorLength>,
+    VersionedClause<ACCC_Worker>
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<ACCC_Auto>,
+    VersionedClause<ACCC_Independent>,
+    VersionedClause<ACCC_Seq>
+  ];
+}
+
+// 2.11
+def ACC_SerialLoop : Directive<"serial loop"> {
+  let allowedClauses = [
+    VersionedClause<ACCC_Attach>,
+    VersionedClause<ACCC_Copy>,
+    VersionedClause<ACCC_Copyin>,
+    VersionedClause<ACCC_Copyout>,
+    VersionedClause<ACCC_Create>,
+    VersionedClause<ACCC_DevicePtr>,
+    VersionedClause<ACCC_DeviceType>,
+    VersionedClause<ACCC_FirstPrivate>,
+    VersionedClause<ACCC_NoCreate>,
+    VersionedClause<ACCC_Present>,
+    VersionedClause<ACCC_Private>,
+    VersionedClause<ACCC_Wait>
+  ];
+  let allowedOnceClauses = [
+    VersionedClause<ACCC_Async>,
+    VersionedClause<ACCC_Collapse>,
+    VersionedClause<ACCC_Default>,
+    VersionedClause<ACCC_Gang>,
+    VersionedClause<ACCC_If>,
+    VersionedClause<ACCC_Reduction>,
+    VersionedClause<ACCC_Self>,
+    VersionedClause<ACCC_Tile>,
+    VersionedClause<ACCC_Vector>,
+    VersionedClause<ACCC_Worker>
+  ];
+  let allowedExclusiveClauses = [
+    VersionedClause<ACCC_Auto>,
+    VersionedClause<ACCC_Independent>,
+    VersionedClause<ACCC_Seq>
+  ];
+}
+
+def ACC_Unknown : Directive<"unknown"> {
+  let isDefault = 1;
+}
\ No newline at end of file
diff --git a/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt b/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
new file mode 100644
index 0000000000000..82cc7cfaccc9c
--- /dev/null
+++ b/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
@@ -0,0 +1,4 @@
+set(LLVM_TARGET_DEFINITIONS ACC.td)
+tablegen(LLVM ACC.h.inc --gen-directive-decl)
+tablegen(LLVM ACC.cpp.inc --gen-directive-impl)
+add_public_tablegen_target(acc_gen)
diff --git a/llvm/lib/Frontend/CMakeLists.txt b/llvm/lib/Frontend/CMakeLists.txt
index 9730c8414edff..ea66917b8936a 100644
--- a/llvm/lib/Frontend/CMakeLists.txt
+++ b/llvm/lib/Frontend/CMakeLists.txt
@@ -1 +1,2 @@
+add_subdirectory(OpenACC)
 add_subdirectory(OpenMP)
diff --git a/llvm/lib/Frontend/OpenACC/CMakeLists.txt b/llvm/lib/Frontend/OpenACC/CMakeLists.txt
new file mode 100644
index 0000000000000..ba340ab9c5619
--- /dev/null
+++ b/llvm/lib/Frontend/OpenACC/CMakeLists.txt
@@ -0,0 +1,18 @@
+set(LLVM_TARGET_DEFINITIONS ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC/ACC.td)
+tablegen(LLVM ACC.cpp --gen-directive-impl)
+add_public_tablegen_target(acc_cpp)
+
+add_llvm_component_library(LLVMFrontendOpenACC
+  ACC.cpp # Generated by tablegen above
+
+  ADDITIONAL_HEADER_DIRS
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend
+  ${LLVM_MAIN_INCLUDE_DIR}/llvm/Frontend/OpenACC
+
+  DEPENDS
+  acc_gen
+  acc_cpp
+)
+
+target_link_libraries(LLVMFrontendOpenACC LLVMSupport)
+
diff --git a/llvm/test/TableGen/directive1.td b/llvm/test/TableGen/directive1.td
index b293196d4d556..49df4e67b53dc 100644
--- a/llvm/test/TableGen/directive1.td
+++ b/llvm/test/TableGen/directive1.td
@@ -75,6 +75,7 @@ def TDL_DirA : Directive<"dira"> {
 
 // IMPL:       #include "llvm/ADT/StringRef.h"
 // IMPL-NEXT:  #include "llvm/ADT/StringSwitch.h"
+// IMPL-NEXT:  #include "llvm/Support/ErrorHandling.h"
 // IMPL-EMPTY:
 // IMPL-NEXT:  using namespace llvm;
 // IMPL-NEXT:  using namespace tdl;
diff --git a/llvm/test/TableGen/directive2.td b/llvm/test/TableGen/directive2.td
index 517c79d457988..e585e11496ef5 100644
--- a/llvm/test/TableGen/directive2.td
+++ b/llvm/test/TableGen/directive2.td
@@ -68,6 +68,7 @@ def TDL_DirA : Directive<"dira"> {
 // IMPL-EMPTY:
 // IMPL-NEXT:  #include "llvm/ADT/StringRef.h"
 // IMPL-NEXT:  #include "llvm/ADT/StringSwitch.h"
+// IMPL-NEXT:  #include "llvm/Support/ErrorHandling.h"
 // IMPL-EMPTY:
 // IMPL-NEXT:  using namespace llvm;
 // IMPL-NEXT:  using namespace tdl;
diff --git a/llvm/utils/TableGen/DirectiveEmitter.cpp b/llvm/utils/TableGen/DirectiveEmitter.cpp
index ebcd6873205ea..2061ff1fdd1af 100644
--- a/llvm/utils/TableGen/DirectiveEmitter.cpp
+++ b/llvm/utils/TableGen/DirectiveEmitter.cpp
@@ -492,6 +492,7 @@ void EmitDirectivesImpl(RecordKeeper &Records, raw_ostream &OS) {
 
   OS << "#include \"llvm/ADT/StringRef.h\"\n";
   OS << "#include \"llvm/ADT/StringSwitch.h\"\n";
+  OS << "#include \"llvm/Support/ErrorHandling.h\"\n";
   OS << "\n";
   OS << "using namespace llvm;\n";
   llvm::SmallVector<StringRef, 2> Namespaces;

From 77ee4b4c9be515bd27dee99f5a30bb36002fd702 Mon Sep 17 00:00:00 2001
From: Sterling Augustine <saugustine@google.com>
Date: Mon, 13 Jul 2020 17:19:01 -0700
Subject: [PATCH 282/771] Desugar class type for iterator lookup.

Summary:
Without this, printing sets and maps hidden behind
using declarations fail.

Reviewers: #libc!

Subscribers: libcxx-commits

Tags: #libc

Differential Revision: https://reviews.llvm.org/D83732
---
 libcxx/test/pretty_printers/gdb_pretty_printer_test.sh.cpp | 4 ++++
 libcxx/utils/gdb/libcxx/printers.py                        | 4 ++--
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/libcxx/test/pretty_printers/gdb_pretty_printer_test.sh.cpp b/libcxx/test/pretty_printers/gdb_pretty_printer_test.sh.cpp
index 081e778540a0b..540db56478e4e 100644
--- a/libcxx/test/pretty_printers/gdb_pretty_printer_test.sh.cpp
+++ b/libcxx/test/pretty_printers/gdb_pretty_printer_test.sh.cpp
@@ -383,6 +383,10 @@ void set_test() {
   ComparePrettyPrintToChars(prime_pairs,
       "std::set with 2 elements = {"
       "{first = 3, second = 5}, {first = 5, second = 7}}");
+
+  using using_set = std::set<int>;
+  using_set other{1, 2, 3};
+  ComparePrettyPrintToChars(other, "std::set with 3 elements = {1, 2, 3}");
 }
 
 void stack_test() {
diff --git a/libcxx/utils/gdb/libcxx/printers.py b/libcxx/utils/gdb/libcxx/printers.py
index 7cccc07997b3b..0ee446f46c51f 100644
--- a/libcxx/utils/gdb/libcxx/printers.py
+++ b/libcxx/utils/gdb/libcxx/printers.py
@@ -698,7 +698,7 @@ class StdMapPrinter(AbstractRBTreePrinter):
 
     def _init_cast_type(self, val_type):
         map_it_type = gdb.lookup_type(
-            str(val_type) + "::iterator").strip_typedefs()
+            str(val_type.strip_typedefs()) + "::iterator").strip_typedefs()
         tree_it_type = map_it_type.template_argument(0)
         node_ptr_type = tree_it_type.template_argument(1)
         return node_ptr_type
@@ -717,7 +717,7 @@ class StdSetPrinter(AbstractRBTreePrinter):
 
     def _init_cast_type(self, val_type):
         set_it_type = gdb.lookup_type(
-            str(val_type) + "::iterator").strip_typedefs()
+            str(val_type.strip_typedefs()) + "::iterator").strip_typedefs()
         node_ptr_type = set_it_type.template_argument(1)
         return node_ptr_type
 

From f88ce078f778886d8dc0408c4ed6344f3332bdd1 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 14 Jul 2020 20:44:59 +0200
Subject: [PATCH 283/771] [clangd] Make config::Provider::combine non-owning.
 NFC

This is a prerequisite for having ClangdLSPServer inject its own.
---
 clang-tools-extra/clangd/ConfigProvider.cpp                | 4 ++--
 clang-tools-extra/clangd/ConfigProvider.h                  | 3 +--
 clang-tools-extra/clangd/tool/ClangdMain.cpp               | 7 +++++--
 clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp | 7 +++----
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/clang-tools-extra/clangd/ConfigProvider.cpp b/clang-tools-extra/clangd/ConfigProvider.cpp
index 1f0f727998e3e..eec1ae9921947 100644
--- a/clang-tools-extra/clangd/ConfigProvider.cpp
+++ b/clang-tools-extra/clangd/ConfigProvider.cpp
@@ -193,9 +193,9 @@ Provider::fromAncestorRelativeYAMLFiles(llvm::StringRef RelPath,
 }
 
 std::unique_ptr<Provider>
-Provider::combine(std::vector<std::unique_ptr<Provider>> Providers) {
+Provider::combine(std::vector<const Provider *> Providers) {
   struct CombinedProvider : Provider {
-    std::vector<std::unique_ptr<Provider>> Providers;
+    std::vector<const Provider *> Providers;
 
     std::vector<CompiledFragment>
     getFragments(const Params &P, DiagnosticCallback DC) const override {
diff --git a/clang-tools-extra/clangd/ConfigProvider.h b/clang-tools-extra/clangd/ConfigProvider.h
index f6c26bde9e0f4..1ef33c79c1e86 100644
--- a/clang-tools-extra/clangd/ConfigProvider.h
+++ b/clang-tools-extra/clangd/ConfigProvider.h
@@ -76,8 +76,7 @@ class Provider {
 
   /// A provider that includes fragments from all the supplied providers.
   /// Order is preserved; later providers take precedence over earlier ones.
-  static std::unique_ptr<Provider>
-      combine(std::vector<std::unique_ptr<Provider>>);
+  static std::unique_ptr<Provider> combine(std::vector<const Provider *>);
 
   /// Build a config based on this provider.
   Config getConfig(const Params &, DiagnosticCallback) const;
diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp
index 12d3e299868eb..7804955308560 100644
--- a/clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -703,9 +703,9 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var
   CCOpts.RunParser = CodeCompletionParse;
 
   RealThreadsafeFS TFS;
+  std::vector<std::unique_ptr<config::Provider>> ProviderStack;
   std::unique_ptr<config::Provider> Config;
   if (EnableConfig) {
-    std::vector<std::unique_ptr<config::Provider>> ProviderStack;
     ProviderStack.push_back(
         config::Provider::fromAncestorRelativeYAMLFiles(".clangd", TFS));
     llvm::SmallString<256> UserConfig;
@@ -716,7 +716,10 @@ clangd accepts flags on the commandline, and in the CLANGD_FLAGS environment var
     } else {
       elog("Couldn't determine user config file, not loading");
     }
-    Config = config::Provider::combine(std::move(ProviderStack));
+    std::vector<const config::Provider *> ProviderPointers;
+    for (const auto& P : ProviderStack)
+      ProviderPointers.push_back(P.get());
+    Config = config::Provider::combine(std::move(ProviderPointers));
     Opts.ConfigProvider = Config.get();
   }
 
diff --git a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
index ff3198e8d3353..0cf582410ff81 100644
--- a/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
+++ b/clang-tools-extra/clangd/unittests/ConfigProviderTests.cpp
@@ -57,10 +57,9 @@ std::vector<std::string> getAddedArgs(Config &C) {
 // cache their results.
 TEST(ProviderTest, Combine) {
   CapturedDiags Diags;
-  std::vector<std::unique_ptr<Provider>> Providers;
-  Providers.push_back(std::make_unique<FakeProvider>("foo"));
-  Providers.push_back(std::make_unique<FakeProvider>("bar"));
-  auto Combined = Provider::combine(std::move(Providers));
+  FakeProvider Foo("foo");
+  FakeProvider Bar("bar");
+  auto Combined = Provider::combine({&Foo, &Bar});
   Config Cfg = Combined->getConfig(Params(), Diags.callback());
   EXPECT_THAT(Diags.Diagnostics,
               ElementsAre(DiagMessage("foo"), DiagMessage("bar")));

From be15284ef60c6af8ff7ae1dad59491993a9bf6ab Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Tue, 14 Jul 2020 20:02:51 +0300
Subject: [PATCH 284/771] [MLIR][StdToSPIRV] Fixed a typo in ops conversion
 tests

Fixed a typo in `std-ops-to-spitv.mlir` test.

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D83791
---
 mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir | 1 -
 1 file changed, 1 deletion(-)

diff --git a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
index a93bf792b34f3..735223755396e 100644
--- a/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
+++ b/mlir/test/Conversion/StandardToSPIRV/std-ops-to-spirv.mlir
@@ -181,7 +181,6 @@ module attributes {
      max_compute_workgroup_size = dense<[128, 128, 64]> : vector<3xi32>}>
 } {
 
-// CHECK-LEBEL: @int_vector4_invalid
 func @int_vector4_invalid(%arg0: vector<4xi64>) {
   // expected-error @+2 {{bitwidth emulation is not implemented yet on unsigned op}}
   // expected-error @+1 {{op requires the same type for all operands and results}}

From 2b3c505d0f6e776f1cfadd86d2c9bcda971fa45c Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Tue, 14 Jul 2020 11:01:05 +0100
Subject: [PATCH 285/771] [Matrix] Intrinsic descriptions

This changes the matrix load/store intrinsic definitions to load/store from/to
a pointer, and not from/to a pointer to a vector, as discussed in D83477.

This also includes the recommit of "[Matrix] Tighten LangRef definitions and
Verifier checks" which adds improved language reference descriptions of the
matrix intrinsics and verifier checks.

Differential Revision: https://reviews.llvm.org/D83785
---
 llvm/docs/LangRef.rst                         |  96 ++++++-----
 llvm/include/llvm/IR/Intrinsics.td            |   4 +-
 llvm/lib/IR/Verifier.cpp                      |  51 +++++-
 .../load-align-volatile.ll                    |  48 +++---
 .../LowerMatrixIntrinsics/remarks-inlining.ll |   6 +-
 .../LowerMatrixIntrinsics/remarks.ll          |  45 +++--
 .../strided-load-double.ll                    |  37 ++---
 .../strided-load-float.ll                     |  31 ++--
 .../LowerMatrixIntrinsics/strided-load-i32.ll |  31 ++--
 .../strided-store-double.ll                   |  16 +-
 llvm/test/Verifier/matrix-intrinsics.ll       | 156 ++++++++++++++----
 11 files changed, 324 insertions(+), 197 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index c82d8c4e5dcad..8bb808a3256cc 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15525,6 +15525,7 @@ The argument to this intrinsic must be a vector of floating-point values.
 
 Syntax:
 """""""
+This is an overloaded intrinsic.
 
 ::
 
@@ -15549,17 +15550,20 @@ Matrix Intrinsics
 -----------------
 
 Operations on matrixes requiring shape information (like number of rows/columns
-or the memory layout) can be expressed using the matrix intrinsics. Matrixes are
-embedded in a flat vector and the intrinsics take the dimensions as arguments.
-Currently column-major layout is assumed. The intrinsics support both integer
-and floating point matrixes.
+or the memory layout) can be expressed using the matrix intrinsics. These
+intrinsics require matrix dimensions to be passed as immediate arguments, and
+matrixes are passed and returned as vectors. This means that for a ``R`` x
+``C`` matrix, element ``i`` of column ``j`` is at index ``j * R + i`` in the
+corresponding vector, with indices starting at 0. Currently column-major layout
+is assumed.  The intrinsics support both integer and floating point matrixes.
 
 
 '``llvm.matrix.transpose.*``' Intrinsic
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Syntax:
 """""""
+This is an overloaded intrinsic.
 
 ::
 
@@ -15568,21 +15572,24 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.matrix.transpose.*``' intrinsic treats %In as containing a matrix
-with <Rows> rows and <Cols> columns and returns the transposed matrix embedded in
-the result vector.
+The '``llvm.matrix.transpose.*``' intrinsics treat %In as a <Rows> x <Cols> matrix
+and return the transposed matrix in the result vector.
 
 Arguments:
 """"""""""
 
-The <Rows> and <Cols> arguments must be constant integers. The vector argument
-%In and the returned vector must have <Rows> * <Cols> elements.
+First argument %In is vector that corresponds to a <Rows> x <Cols> matrix.
+Thus, arguments <Rows> and <Cols> correspond to the number of rows and columns,
+respectively, and must be positive, constant integers. The returned vector must
+have <Rows> * <Cols> elements, and have the same float or integer element type
+as %In.
 
 '``llvm.matrix.multiply.*``' Intrinsic
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Syntax:
 """""""
+This is an overloaded intrinsic.
 
 ::
 
@@ -15591,18 +15598,19 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.matrix.multiply.*``' intrinsic treats %A as a matrix with <OuterRows>
-rows and <Inner> columns, %B as a matrix with <Inner> rows and <OuterColumns>
-columns and multiplies them. The result matrix is returned embedded in the
-result vector.
+The '``llvm.matrix.multiply.*``' intrinsics treat %A as a <OuterRows> x <Inner>
+matrix, %B as a <Inner> x <OuterColumns> matrix, and multiplies them. The result
+matrix is returned in the result vector.
 
 Arguments:
 """"""""""
 
-The <OuterRows>, <Inner> and <OuterColumns> arguments must be constant
-integers. The vector argument %A must have <OuterRows> * <Inner> elements, %B
-must have <Inner> * <OuterColumns> elements and the returned vector must have
-<OuterRows> * <OuterColumns> elements.
+The first vector argument %A corresponds to a matrix with <OuterRows> * <Inner>
+elements, and the second argument %B to a matrix with <Inner> * <OuterColumns>
+elements. Arguments <OuterRows>, <Inner> and <OuterColumns> must be positive,
+constant integers. The returned vector must have <OuterRows> * <OuterColumns>
+elements. Vectors %A, %B, and the returned vector all have the same float or
+integer element type.
 
 
 '``llvm.matrix.column.major.load.*``' Intrinsic
@@ -15610,6 +15618,7 @@ must have <Inner> * <OuterColumns> elements and the returned vector must have
 
 Syntax:
 """""""
+This is an overloaded intrinsic.
 
 ::
 
@@ -15619,22 +15628,26 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.matrix.column.major.load.*``' intrinsic loads a matrix with <Rows>
-rows and <Cols> columns, using a stride of %Stride between columns. For two
-consecutive columns A and B, %Stride refers to the distance (the number of
-elements) between the start of column A and the start of column B. The result
-matrix is returned embedded in the result vector. This allows for convenient
-loading of sub matrixes.  If <IsVolatile> is true, the intrinsic is considered
-a :ref:`volatile memory access <volatile>`.
-
-If the %Ptr argument is known to be aligned to some boundary, this can be
-specified as an attribute on the argument.
+The '``llvm.matrix.column.major.load.*``' intrinsics load a <Rows> x <Cols>
+matrix using a stride of %Stride to compute the start address of the different
+columns.  This allows for convenient loading of sub matrixes. If <IsVolatile>
+is true, the intrinsic is considered a :ref:`volatile memory access
+<volatile>`. The result matrix is returned in the result vector. If the %Ptr
+argument is known to be aligned to some boundary, this can be specified as an
+attribute on the argument.
 
 Arguments:
 """"""""""
 
-The <IsVolatile>, <Rows> and <Cols> arguments must be constant integers. The
-returned vector must have <Rows> * <Cols> elements. %Stride must be >= <Rows>.
+The first argument %Ptr is a pointer type to the returned vector type, and
+correponds to the start address to load from. The second argument %Stride is a
+postive, constant integer with %Stride ``>=`` <Rows>. %Stride is used to compute
+the column memory addresses. I.e., for a column ``C``, its start memory
+addresses is calculated with %Ptr + ``C`` * %Stride. The third Argument
+<IsVolatile> is a boolean value.  The fourth and fifth arguments, <Rows> and
+<Cols>, correspond to the number of rows and columns, respectively, and must be
+positive, constant integers. The returned vector must have <Rows> * <Cols>
+elements.
 
 The :ref:`align <attr_align>` parameter attribute can be provided
 for the %Ptr arguments.
@@ -15654,12 +15667,10 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.matrix.column.major.store.*``' intrinsic stores the matrix with
-<Rows> rows and <Cols> columns embedded in %In, using a stride of %Stride
-between columns. For two consecutive columns A and B, %Stride refers to the
-distance (the number of elements) between the start of column A and the start
-of column B. If <IsVolatile> is true, the intrinsic is considered a
-:ref:`volatile memory access <volatile>`.
+The '``llvm.matrix.column.major.store.*``' intrinsics store the <Rows> x <Cols>
+matrix in %In to memory using a stride of %Stride between columns. If
+<IsVolatile> is true, the intrinsic is considered a :ref:`volatile memory
+access <volatile>`.
 
 If the %Ptr argument is known to be aligned to some boundary, this can be
 specified as an attribute on the argument.
@@ -15667,8 +15678,15 @@ specified as an attribute on the argument.
 Arguments:
 """"""""""
 
-The <IsVolatile>, <Rows>, <Cols> arguments must be constant integers. The
-vector argument %In must have <Rows> * <Cols> elements. %Stride must be >= <Rows>.
+The first argument %In is a vector that corresponds to a <Rows> x <Cols> matrix
+to be stored to memory. The second argument %Ptr is a pointer to the vector
+type of %In, and is the start address of the matrix in memory. The third
+argument %Stride is a positive, constant integer with %Stride ``>=`` <Rows>.
+%Stride is used to compute the column memory addresses. I.e., for a column
+``C``, its start memory addresses is calculated with %Ptr + ``C`` * %Stride.
+The fourth argument <IsVolatile> is a boolean value. The arguments <Rows> and
+<Cols> correspond to the number of rows and columns, respectively, and must be
+positive, constant integers.
 
 The :ref:`align <attr_align>` parameter attribute can be provided
 for the %Ptr arguments.
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 94741229a2a75..9b14a07eb7b99 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -1458,7 +1458,7 @@ def int_matrix_multiply
 
 def int_matrix_column_major_load
   : Intrinsic<[llvm_anyvector_ty],
-              [LLVMAnyPointerType<LLVMMatchType<0>>, llvm_i64_ty, llvm_i1_ty,
+              [LLVMPointerToElt<0>, llvm_i64_ty, llvm_i1_ty,
                llvm_i32_ty, llvm_i32_ty],
               [IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrReadMem,
                NoCapture<ArgIndex<0>>, ImmArg<ArgIndex<2>>, ImmArg<ArgIndex<3>>,
@@ -1466,7 +1466,7 @@ def int_matrix_column_major_load
 
 def int_matrix_column_major_store
   : Intrinsic<[],
-              [llvm_anyvector_ty, LLVMAnyPointerType<LLVMMatchType<0>>,
+              [llvm_anyvector_ty, LLVMPointerToElt<0>,
                llvm_i64_ty, llvm_i1_ty, llvm_i32_ty, llvm_i32_ty],
               [IntrNoSync, IntrWillReturn, IntrArgMemOnly, IntrWriteMem,
                WriteOnly<ArgIndex<1>>, NoCapture<ArgIndex<1>>,
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 3c8e73a03cc59..6df1072925f92 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -5017,36 +5017,73 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
   case Intrinsic::matrix_transpose:
   case Intrinsic::matrix_column_major_load:
   case Intrinsic::matrix_column_major_store: {
+    Function *IF = Call.getCalledFunction();
+    ConstantInt *Stride = nullptr;
     ConstantInt *NumRows;
     ConstantInt *NumColumns;
-    VectorType *TypeToCheck;
+    VectorType *ResultTy;
+    Type *Op0ElemTy = nullptr;
+    Type *Op1ElemTy = nullptr;
     switch (ID) {
     case Intrinsic::matrix_multiply:
       NumRows = cast<ConstantInt>(Call.getArgOperand(2));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(4));
-      TypeToCheck = cast<VectorType>(Call.getType());
+      ResultTy = cast<VectorType>(Call.getType());
+      Op0ElemTy =
+          cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
+      Op1ElemTy =
+          cast<VectorType>(Call.getArgOperand(1)->getType())->getElementType();
       break;
     case Intrinsic::matrix_transpose:
       NumRows = cast<ConstantInt>(Call.getArgOperand(1));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(2));
-      TypeToCheck = cast<VectorType>(Call.getType());
+      ResultTy = cast<VectorType>(Call.getType());
+      Op0ElemTy =
+          cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
       break;
     case Intrinsic::matrix_column_major_load:
+      Stride = dyn_cast<ConstantInt>(Call.getArgOperand(1));
       NumRows = cast<ConstantInt>(Call.getArgOperand(3));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(4));
-      TypeToCheck = cast<VectorType>(Call.getType());
+      ResultTy = cast<VectorType>(Call.getType());
+      Op0ElemTy =
+          cast<PointerType>(Call.getArgOperand(0)->getType())->getElementType();
       break;
     case Intrinsic::matrix_column_major_store:
+      Stride = dyn_cast<ConstantInt>(Call.getArgOperand(2));
       NumRows = cast<ConstantInt>(Call.getArgOperand(4));
       NumColumns = cast<ConstantInt>(Call.getArgOperand(5));
-      TypeToCheck = cast<VectorType>(Call.getArgOperand(0)->getType());
+      ResultTy = cast<VectorType>(Call.getArgOperand(0)->getType());
+      Op0ElemTy =
+          cast<VectorType>(Call.getArgOperand(0)->getType())->getElementType();
+      Op1ElemTy =
+          cast<PointerType>(Call.getArgOperand(1)->getType())->getElementType();
       break;
     default:
       llvm_unreachable("unexpected intrinsic");
     }
-    Assert(TypeToCheck->getNumElements() ==
+
+    Assert(ResultTy->getElementType()->isIntegerTy() ||
+           ResultTy->getElementType()->isFloatingPointTy(),
+           "Result type must be an integer or floating-point type!", IF);
+
+    Assert(ResultTy->getElementType() == Op0ElemTy,
+           "Vector element type mismatch of the result and first operand "
+           "vector!", IF);
+
+    if (Op1ElemTy)
+      Assert(ResultTy->getElementType() == Op1ElemTy,
+             "Vector element type mismatch of the result and second operand "
+             "vector!", IF);
+
+    Assert(ResultTy->getNumElements() ==
                NumRows->getZExtValue() * NumColumns->getZExtValue(),
-           "result of a matrix operation does not fit in the returned vector");
+           "Result of a matrix operation does not fit in the returned vector!");
+
+    if (Stride)
+      Assert(Stride->getZExtValue() >= NumRows->getZExtValue(),
+             "Stride must be greater or equal than the number of rows!", IF);
+
     break;
   }
   };
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll
index 14b81a1d8d9b1..9fe38b4d336da 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/load-align-volatile.ll
@@ -1,30 +1,29 @@
 ; RUN: opt -lower-matrix-intrinsics -S < %s | FileCheck %s
 ; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
 
-define <9 x double> @strided_load_3x3_volatile(<9 x double>* %in, i64 %stride) {
+define <9 x double> @strided_load_3x3_volatile(double* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_3x3_volatile(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <9 x double>* [[IN:%.*]] to double*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>*
 ; CHECK-NEXT:    load volatile <3 x double>, <3 x double>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* %in, i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast double* [[VEC_GEP2]] to <3 x double>*
 ; CHECK-NEXT:    load volatile <3 x double>, <3 x double>* [[VEC_CAST3]], align 8
 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START5]]
+; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* %in, i64 [[VEC_START5]]
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <3 x double>*
 ; CHECK-NEXT:    load volatile <3 x double>, <3 x double>* [[VEC_CAST7]], align 8
 ; CHECK-NOT:     = load
 ;
 entry:
-  %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(<9 x double>* %in, i64 %stride, i1 true, i32 3, i32 3)
+  %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(double* %in, i64 %stride, i1 true, i32 3, i32 3)
   ret <9 x double> %load
 }
 
-declare <9 x double> @llvm.matrix.column.major.load.v9f64(<9 x double>*, i64, i1, i32, i32)
+declare <9 x double> @llvm.matrix.column.major.load.v9f64(double*, i64, i1, i32, i32)
 
 define <4 x double> @load_volatile_multiply(<4 x double>* %in) {
 ; CHECK-LABEL: @load_volatile_multiply(
@@ -44,49 +43,47 @@ define <4 x double> @load_volatile_multiply(<4 x double>* %in) {
 declare <4 x double> @llvm.matrix.multiply(<4 x double>, <4 x double>, i32, i32, i32)
 
 
-define <9 x double> @strided_load_3x3_align32(<9 x double>* %in, i64 %stride) {
+define <9 x double> @strided_load_3x3_align32(double* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_3x3_align32(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <9 x double>* [[IN:%.*]] to double*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>*
 ; CHECK-NEXT:    load <3 x double>, <3 x double>* [[VEC_CAST]], align 32
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* %in, i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast double* [[VEC_GEP2]] to <3 x double>*
 ; CHECK-NEXT:    load <3 x double>, <3 x double>* [[VEC_CAST3]], align 8
 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START5]]
+; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* %in, i64 [[VEC_START5]]
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <3 x double>*
 ; CHECK-NEXT:    load <3 x double>, <3 x double>* [[VEC_CAST7]], align 8
 ; CHECK-NOT:     = load
 ;
 entry:
-  %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(<9 x double>* align 32 %in, i64 %stride, i1 false, i32 3, i32 3)
+  %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(double* align 32 %in, i64 %stride, i1 false, i32 3, i32 3)
   ret <9 x double> %load
 }
 
-define <9 x double> @strided_load_3x3_align2(<9 x double>* %in, i64 %stride) {
+define <9 x double> @strided_load_3x3_align2(double* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_3x3_align2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <9 x double>* [[IN:%.*]] to double*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>*
 ; CHECK-NEXT:    load <3 x double>, <3 x double>* [[VEC_CAST]], align 2
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* %in, i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast double* [[VEC_GEP2]] to <3 x double>*
 ; CHECK-NEXT:    load <3 x double>, <3 x double>* [[VEC_CAST3]], align 2
 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START5]]
+; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* %in, i64 [[VEC_START5]]
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <3 x double>*
 ; CHECK-NEXT:    load <3 x double>, <3 x double>* [[VEC_CAST7]], align 2
 ; CHECK-NOT:     = load
 ;
 entry:
-  %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(<9 x double>* align 2 %in, i64 %stride, i1 false, i32 3, i32 3)
+  %load = call <9 x double> @llvm.matrix.column.major.load.v9f64(double* align 2 %in, i64 %stride, i1 false, i32 3, i32 3)
   ret <9 x double> %load
 }
 
@@ -106,16 +103,15 @@ define <4 x double> @load_align2_multiply(<4 x double>* %in) {
   ret <4 x double> %res
 }
 
-define <6 x float> @strided_load_2x3_align16_stride2(<6 x float>* %in) {
+define <6 x float> @strided_load_2x3_align16_stride2(float* %in) {
 ; CHECK-LABEL: @strided_load_2x3_align16_stride2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <6 x float>* [[IN:%.*]] to float*
-; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* [[TMP0]] to <2 x float>*
+; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* %in to <2 x float>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST]], align 16
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* [[TMP0]], i64 2
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* %in, i64 2
 ; CHECK-NEXT:    [[VEC_CAST1:%.*]] = bitcast float* [[VEC_GEP]] to <2 x float>*
 ; CHECK-NEXT:    [[COL_LOAD2:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST1]], align 8
-; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr float, float* [[TMP0]], i64 4
+; CHECK-NEXT:    [[VEC_GEP3:%.*]] = getelementptr float, float* %in, i64 4
 ; CHECK-NEXT:    [[VEC_CAST4:%.*]] = bitcast float* [[VEC_GEP3]] to <2 x float>*
 ; CHECK-NEXT:    [[COL_LOAD5:%.*]] = load <2 x float>, <2 x float>* [[VEC_CAST4]], align 16
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x float> [[COL_LOAD]], <2 x float> [[COL_LOAD2]], <4 x i32> <i32 0, i32 1, i32 2, i32 3>
@@ -124,8 +120,8 @@ define <6 x float> @strided_load_2x3_align16_stride2(<6 x float>* %in) {
 ; CHECK-NEXT:    ret <6 x float> [[TMP3]]
 ;
 entry:
-  %load = call <6 x float> @llvm.matrix.column.major.load.v6f32(<6 x float>* align 16 %in, i64 2, i1 false, i32 2, i32 3)
+  %load = call <6 x float> @llvm.matrix.column.major.load.v6f32(float* align 16 %in, i64 2, i1 false, i32 2, i32 3)
   ret <6 x float> %load
 }
 
-declare <6 x float> @llvm.matrix.column.major.load.v6f32(<6 x float>*, i64, i1, i32, i32)
+declare <6 x float> @llvm.matrix.column.major.load.v6f32(float*, i64, i1, i32, i32)
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll
index 5f3ed2a5e382f..f8af07271af31 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks-inlining.ll
@@ -92,10 +92,10 @@ target triple = "aarch64-apple-ios"
 ; CHECK-LABEL: remark: transpose.h:13:11: Lowered with 0 stores, 0 loads, 8 compute ops
 ; CHECK-NEXT:  transpose.1x2.float(transpose.2x1.float(addr %D))
 
-define void @toplevel(<15 x double>* %A, <15 x double>* %B, <15 x double>* %C, <2 x float>* %D) !dbg !16 {
+define void @toplevel(<15 x double>* %A, double* %B, <15 x double>* %C, <2 x float>* %D) !dbg !16 {
 entry:
   %a = load <15 x double>, <15 x double> *%A, align 16, !dbg !3791
-  %b = call <15 x double> @llvm.matrix.column.major.load(<15 x double>* %B, i64 5, i1 false, i32 3, i32 5), !dbg !3793
+  %b = call <15 x double> @llvm.matrix.column.major.load(double* %B, i64 5, i1 false, i32 3, i32 5), !dbg !3793
   %c  = fadd <15 x double> %a, %b, !dbg !100
   store <15 x double> %c, <15 x double> *%C, align 16, !dbg !102
 
@@ -106,7 +106,7 @@ entry:
   ret void
 }
 
-declare <15 x double> @llvm.matrix.column.major.load(<15 x double>*, i64, i1, i32, i32)
+declare <15 x double> @llvm.matrix.column.major.load(double*, i64, i1, i32, i32)
 declare <2 x float> @llvm.matrix.transpose(<2 x float>, i32, i32)
 
 !llvm.dbg.cu = !{!0}
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll
index 3bfb36e266555..ae5ab68c4efb1 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/remarks.ll
@@ -15,9 +15,6 @@ define void @transpose(<12 x double>* %A, <12 x double>* %B) !dbg !23 {
   ret void
 }
 
-declare <12 x double> @llvm.matrix.transpose.v12f64.v12f64(<12 x double>, i32, i32)
-
-
 ; CHECK-LABEL: remark: test.h:50:20: Lowered with 2 stores, 12 loads, 22 compute ops
 ; CHECK-NEXT:  store(
 ; CHECK-NEXT:   multiply.2x6.6x2.double(
@@ -32,33 +29,27 @@ define void @multiply(<12 x double>* %A, <12 x double>* %B, <4 x double>* %C) !d
   ret void
 }
 
-declare <4 x double> @llvm.matrix.multiply(<12 x double>, <12 x double>, i32, i32, i32)
-
 ; CHECK-LABEL: remark: test.h:60:20: Lowered with 6 stores, 6 loads, 0 compute ops
 ; CHECK-NEXT:  store(
 ; CHECK-NEXT:   column.major.load.3x3.double(addr %A, 5),
 ; CHECK-NEXT:   addr %B)
-define void @column.major.load(<9 x double>* %A, <9 x double>* %B) !dbg !27 {
-  %A.matrix = call <9 x double> @llvm.matrix.column.major.load(<9 x double>* %A, i64 5, i1 false, i32 3, i32 3), !dbg !28
+define void @column.major.load(double* %A, <9 x double>* %B) !dbg !27 {
+  %A.matrix = call <9 x double> @llvm.matrix.column.major.load(double* %A, i64 5, i1 false, i32 3, i32 3), !dbg !28
   store <9 x double> %A.matrix, <9 x double>* %B, !dbg !28
   ret void
 }
 
-declare <9 x double> @llvm.matrix.column.major.load(<9 x double>*, i64, i1, i32, i32)
-
 ; CHECK-LABEL: remark: test.h:70:20: Lowered with 6 stores, 6 loads, 0 compute ops
 ; CHECK-NEXT:  column.major.store.3x3.double(
 ; CHECK-NEXT:   column.major.load.3x3.double(addr %A, 5),
 ; CHECK-NEXT:   addr %B,
 ; CHECK-NEXT:   10)
-define void @column.major.store(<9 x double>* %A, <9 x double>* %B) !dbg !29 {
-  %A.matrix = call <9 x double> @llvm.matrix.column.major.load(<9 x double>* %A, i64 5, i1 false, i32 3, i32 3), !dbg !30
-  call void @llvm.matrix.column.major.store(<9 x double> %A.matrix, <9 x double>* %B, i64 10, i1 false, i32 3, i32 3), !dbg !30
+define void @column.major.store(double* %A, double* %B) !dbg !29 {
+  %A.matrix = call <9 x double> @llvm.matrix.column.major.load(double* %A, i64 5, i1 false, i32 3, i32 3), !dbg !30
+  call void @llvm.matrix.column.major.store(<9 x double> %A.matrix, double* %B, i64 10, i1 false, i32 3, i32 3), !dbg !30
   ret void
 }
 
-declare void @llvm.matrix.column.major.store(<9 x double>, <9 x double>*, i64, i1, i32, i32)
-
 ; CHECK-LABEL: remark: test.h:80:20: Lowered with 6 stores, 6 loads, 12 compute ops
 ; CHECK-NEXT:  column.major.store.3x3.double(
 ; CHECK-NEXT:   fmul(
@@ -69,11 +60,11 @@ declare void @llvm.matrix.column.major.store(<9 x double>, <9 x double>*, i64, i
 ; CHECK-NEXT:   addr %B,
 ; CHECK-NEXT:   10)
 
-define void @binaryops(<9 x double>* %A, <9 x double>* %B) !dbg !31 {
-  %A.matrix = call <9 x double> @llvm.matrix.column.major.load(<9 x double>* %A, i64 5, i1 false, i32 3, i32 3), !dbg !32
+define void @binaryops(double* %A, double* %B) !dbg !31 {
+  %A.matrix = call <9 x double> @llvm.matrix.column.major.load(double* %A, i64 5, i1 false, i32 3, i32 3), !dbg !32
   %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix, !dbg !32
   %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix, !dbg !32
-  call void @llvm.matrix.column.major.store(<9 x double> %R2.matrix, <9 x double>* %B, i64 10, i1 false, i32 3, i32 3), !dbg !32
+  call void @llvm.matrix.column.major.store(<9 x double> %R2.matrix, double* %B, i64 10, i1 false, i32 3, i32 3), !dbg !32
   ret void
 }
 
@@ -93,11 +84,11 @@ define void @binaryops(<9 x double>* %A, <9 x double>* %B) !dbg !31 {
 ; CHECK-NEXT:    load(addr %D)),
 ; CHECK-NEXT:   addr %E)
 
-define void @multiple_expressions(<9 x double>* %A, <9 x double>* %B, <12 x double>* %C, <12 x double>* %D, <4 x double>* %E) !dbg !33 {
-  %A.matrix = call <9 x double> @llvm.matrix.column.major.load(<9 x double>* %A, i64 5, i1 false, i32 3, i32 3), !dbg !34
+define void @multiple_expressions(double* %A, double* %B, <12 x double>* %C, <12 x double>* %D, <4 x double>* %E) !dbg !33 {
+  %A.matrix = call <9 x double> @llvm.matrix.column.major.load(double* %A, i64 5, i1 false, i32 3, i32 3), !dbg !34
   %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix, !dbg !34
   %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix, !dbg !34
-  call void @llvm.matrix.column.major.store(<9 x double> %R2.matrix, <9 x double>* %B, i64 10, i1 false, i32 3, i32 3), !dbg !34
+  call void @llvm.matrix.column.major.store(<9 x double> %R2.matrix, double* %B, i64 10, i1 false, i32 3, i32 3), !dbg !34
 
   %C.matrix = load <12 x double>, <12 x double>* %C, !dbg !34
   %D.matrix = load <12 x double>, <12 x double>* %D, !dbg !34
@@ -114,14 +105,13 @@ define void @multiple_expressions(<9 x double>* %A, <9 x double>* %B, <12 x doub
 ; CHECK-NEXT:     column.major.load.3x3.double(addr %A, 5)
 ; CHECK-NEXT:     (reused) column.major.load.3x3.double(addr %A, 5)),
 ; CHECK-NEXT:    (reused) column.major.load.3x3.double(addr %A, 5)),
-; CHECK-NEXT:   stack addr %B,
+; CHECK-NEXT:   addr %B,
 ; CHECK-NEXT:   10)
-define void @stackaddresses(<9 x double>* %A) !dbg !35 {
-  %B = alloca <9 x double>
-  %A.matrix = call <9 x double> @llvm.matrix.column.major.load(<9 x double>* %A, i64 5, i1 false, i32 3, i32 3), !dbg !36
+define void @stackaddresses(double* %A, double* %B) !dbg !35 {
+  %A.matrix = call <9 x double> @llvm.matrix.column.major.load(double* %A, i64 5, i1 false, i32 3, i32 3), !dbg !36
   %R1.matrix = fadd <9 x double> %A.matrix, %A.matrix, !dbg !36
   %R2.matrix = fmul <9 x double> %R1.matrix, %A.matrix, !dbg !36
-  call void @llvm.matrix.column.major.store(<9 x double> %R2.matrix, <9 x double>* %B, i64 10, i1 false, i32 3, i32 3), !dbg !36
+  call void @llvm.matrix.column.major.store(<9 x double> %R2.matrix, double* %B, i64 10, i1 false, i32 3, i32 3), !dbg !36
   ret void
 }
 
@@ -146,7 +136,12 @@ entry:
   ret void
 }
 
+declare <12 x double> @llvm.matrix.transpose.v12f64.v12f64(<12 x double>, i32, i32)
+declare <4 x double> @llvm.matrix.multiply(<12 x double>, <12 x double>, i32, i32, i32)
+declare <9 x double> @llvm.matrix.column.major.load(double*, i64, i1, i32, i32)
 declare <15 x double> @llvm.matrix.transpose.v15f64.v15f64(<15 x double>, i32, i32)
+declare void @llvm.matrix.column.major.store(<9 x double>, double*, i64, i1, i32, i32)
+
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!3, !4}
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll
index 1bc645932eccc..d211ee156ecf6 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-double.ll
@@ -2,20 +2,19 @@
 ; RUN: opt -lower-matrix-intrinsics -S < %s | FileCheck %s
 ; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
 
-define <9 x double> @strided_load_3x3(<9 x double>* %in, i64 %stride) {
+define <9 x double> @strided_load_3x3(double* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_3x3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <9 x double>* [[IN:%.*]] to double*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <3 x double>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* %in, i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast double* [[VEC_GEP2]] to <3 x double>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST3]], align 8
 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START5]]
+; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr double, double* %in, i64 [[VEC_START5]]
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast double* [[VEC_GEP6]] to <3 x double>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <3 x double>, <3 x double>* [[VEC_CAST7]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x double> [[COL_LOAD]], <3 x double> [[COL_LOAD4]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
@@ -24,51 +23,47 @@ define <9 x double> @strided_load_3x3(<9 x double>* %in, i64 %stride) {
 ; CHECK-NEXT:    ret <9 x double> [[TMP3]]
 ;
 entry:
-  %load = call <9 x double> @llvm.matrix.column.major.load(<9 x double>* %in, i64 %stride, i1 false, i32 3, i32 3)
+  %load = call <9 x double> @llvm.matrix.column.major.load(double* %in, i64 %stride, i1 false, i32 3, i32 3)
   ret <9 x double> %load
 }
 
-declare <9 x double> @llvm.matrix.column.major.load(<9 x double>*, i64, i1, i32, i32)
+declare <9 x double> @llvm.matrix.column.major.load(double*, i64, i1, i32, i32)
 
-define <9 x double> @strided_load_9x1(<9 x double>* %in, i64 %stride) {
+define <9 x double> @strided_load_9x1(double* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_9x1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <9 x double>* [[IN:%.*]] to double*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <9 x double>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <9 x double>, <9 x double>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    ret <9 x double> [[COL_LOAD]]
 ;
 entry:
-  %load = call <9 x double> @llvm.matrix.column.major.load(<9 x double>* %in, i64 %stride, i1 false, i32 9, i32 1)
+  %load = call <9 x double> @llvm.matrix.column.major.load(double* %in, i64 %stride, i1 false, i32 9, i32 1)
   ret <9 x double> %load
 }
 
-declare <8 x double> @llvm.matrix.column.major.load.v8f64(<8 x double>*, i64, i1, i32, i32)
+declare <8 x double> @llvm.matrix.column.major.load.v8f64(double*, i64, i1, i32, i32)
+; CHECK: declare <8 x double> @llvm.matrix.column.major.load.v8f64(double* nocapture, i64, i1 immarg, i32 immarg, i32 immarg) [[READONLY:#[0-9]]]
 
-define <8 x double> @strided_load_4x2(<8 x double>* %in, i64 %stride) {
+define <8 x double> @strided_load_4x2(double* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_4x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x double>* [[IN:%.*]] to double*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr double, double* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast double* [[VEC_GEP]] to <4 x double>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <4 x double>, <4 x double>* [[VEC_CAST]], align 8
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* [[TMP0]], i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr double, double* %in, i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast double* [[VEC_GEP2]] to <4 x double>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <4 x double>, <4 x double>* [[VEC_CAST3]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[COL_LOAD]], <4 x double> [[COL_LOAD4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    ret <8 x double> [[TMP1]]
 ;
 entry:
-  %load = call <8 x double> @llvm.matrix.column.major.load.v8f64(<8 x double>* %in, i64 %stride, i1 false, i32 4, i32 2)
+  %load = call <8 x double> @llvm.matrix.column.major.load.v8f64(double* %in, i64 %stride, i1 false, i32 4, i32 2)
   ret <8 x double> %load
 }
 
-; CHECK: declare <9 x double> @llvm.matrix.column.major.load.v9f64.p0v9f64(<9 x double>* nocapture, i64, i1 immarg, i32 immarg, i32 immarg) [[READONLY:#[0-9]]]
-
-; CHECK: declare <8 x double> @llvm.matrix.column.major.load.v8f64.p0v8f64(<8 x double>* nocapture, i64, i1 immarg, i32 immarg, i32 immarg) [[READONLY]]
-
+; CHECK: declare <9 x double> @llvm.matrix.column.major.load.v9f64(double* nocapture, i64, i1 immarg, i32 immarg, i32 immarg) [[READONLY]]
 ; CHECK: attributes [[READONLY]] = { argmemonly nosync nounwind readonly willreturn }
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll
index 248f621345ba0..6b48a1709bde6 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-float.ll
@@ -2,20 +2,19 @@
 ; RUN: opt -lower-matrix-intrinsics -S < %s | FileCheck %s
 ; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
 
-define <9 x float> @strided_load_3x3(<9 x float>* %in, i64 %stride) {
+define <9 x float> @strided_load_3x3(float* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_3x3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <9 x float>* [[IN:%.*]] to float*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* [[VEC_GEP]] to <3 x float>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <3 x float>, <3 x float>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr float, float* [[TMP0]], i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr float, float* %in, i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast float* [[VEC_GEP2]] to <3 x float>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <3 x float>, <3 x float>* [[VEC_CAST3]], align 4
 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr float, float* [[TMP0]], i64 [[VEC_START5]]
+; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr float, float* %in, i64 [[VEC_START5]]
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast float* [[VEC_GEP6]] to <3 x float>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <3 x float>, <3 x float>* [[VEC_CAST7]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x float> [[COL_LOAD]], <3 x float> [[COL_LOAD4]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
@@ -24,45 +23,43 @@ define <9 x float> @strided_load_3x3(<9 x float>* %in, i64 %stride) {
 ; CHECK-NEXT:    ret <9 x float> [[TMP3]]
 ;
 entry:
-  %load = call <9 x float> @llvm.matrix.column.major.load(<9 x float>* %in, i64 %stride, i1 false, i32 3, i32 3)
+  %load = call <9 x float> @llvm.matrix.column.major.load(float* %in, i64 %stride, i1 false, i32 3, i32 3)
   ret <9 x float> %load
 }
 
-declare <9 x float> @llvm.matrix.column.major.load(<9 x float>*, i64, i1, i32, i32)
+declare <9 x float> @llvm.matrix.column.major.load(float*, i64, i1, i32, i32)
 
-define <9 x float> @strided_load_9x1(<9 x float>* %in, i64 %stride) {
+define <9 x float> @strided_load_9x1(float* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_9x1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <9 x float>* [[IN:%.*]] to float*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* [[VEC_GEP]] to <9 x float>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <9 x float>, <9 x float>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    ret <9 x float> [[COL_LOAD]]
 ;
 entry:
-  %load = call <9 x float> @llvm.matrix.column.major.load(<9 x float>* %in, i64 %stride, i1 false, i32 9, i32 1)
+  %load = call <9 x float> @llvm.matrix.column.major.load(float* %in, i64 %stride, i1 false, i32 9, i32 1)
   ret <9 x float> %load
 }
 
-declare <8 x float> @llvm.matrix.column.major.load.v8f32(<8 x float>*, i64, i1, i32, i32)
+declare <8 x float> @llvm.matrix.column.major.load.v8f32(float*, i64, i1, i32, i32)
 
-define <8 x float> @strided_load_4x2(<8 x float>* %in, i64 %stride) {
+define <8 x float> @strided_load_4x2(float* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_4x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x float>* [[IN:%.*]] to float*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr float, float* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast float* [[VEC_GEP]] to <4 x float>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <4 x float>, <4 x float>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr float, float* [[TMP0]], i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr float, float* %in, i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast float* [[VEC_GEP2]] to <4 x float>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <4 x float>, <4 x float>* [[VEC_CAST3]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> [[COL_LOAD]], <4 x float> [[COL_LOAD4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    ret <8 x float> [[TMP1]]
 ;
 entry:
-  %load = call <8 x float> @llvm.matrix.column.major.load.v8f32(<8 x float>* %in, i64 %stride, i1 false, i32 4, i32 2)
+  %load = call <8 x float> @llvm.matrix.column.major.load.v8f32(float* %in, i64 %stride, i1 false, i32 4, i32 2)
   ret <8 x float> %load
 }
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll
index a589b7c1d2b08..4f815af6d11ca 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-load-i32.ll
@@ -2,20 +2,19 @@
 ; RUN: opt -lower-matrix-intrinsics -S < %s | FileCheck %s
 ; RUN: opt -passes='lower-matrix-intrinsics' -S < %s | FileCheck %s
 
-define <9 x i32> @strided_load_3x3(<9 x i32>* %in, i64 %stride) {
+define <9 x i32> @strided_load_3x3(i32* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_3x3(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <9 x i32>* [[IN:%.*]] to i32*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <3 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <3 x i32>, <3 x i32>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr i32, i32* [[TMP0]], i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast i32* [[VEC_GEP2]] to <3 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <3 x i32>, <3 x i32>* [[VEC_CAST3]], align 4
 ; CHECK-NEXT:    [[VEC_START5:%.*]] = mul i64 2, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr i32, i32* [[TMP0]], i64 [[VEC_START5]]
+; CHECK-NEXT:    [[VEC_GEP6:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START5]]
 ; CHECK-NEXT:    [[VEC_CAST7:%.*]] = bitcast i32* [[VEC_GEP6]] to <3 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD8:%.*]] = load <3 x i32>, <3 x i32>* [[VEC_CAST7]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <3 x i32> [[COL_LOAD]], <3 x i32> [[COL_LOAD4]], <6 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5>
@@ -24,45 +23,43 @@ define <9 x i32> @strided_load_3x3(<9 x i32>* %in, i64 %stride) {
 ; CHECK-NEXT:    ret <9 x i32> [[TMP3]]
 ;
 entry:
-  %load = call <9 x i32> @llvm.matrix.column.major.load(<9 x i32>* %in, i64 %stride, i1 false, i32 3, i32 3)
+  %load = call <9 x i32> @llvm.matrix.column.major.load(i32* %in, i64 %stride, i1 false, i32 3, i32 3)
   ret <9 x i32> %load
 }
 
-declare <9 x i32> @llvm.matrix.column.major.load(<9 x i32>*, i64, i1, i32, i32)
+declare <9 x i32> @llvm.matrix.column.major.load(i32*, i64, i1, i32, i32)
 
-define <9 x i32> @strided_load_9x1(<9 x i32>* %in, i64 %stride) {
+define <9 x i32> @strided_load_9x1(i32* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_9x1(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <9 x i32>* [[IN:%.*]] to i32*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <9 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <9 x i32>, <9 x i32>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    ret <9 x i32> [[COL_LOAD]]
 ;
 entry:
-  %load = call <9 x i32> @llvm.matrix.column.major.load(<9 x i32>* %in, i64 %stride, i1 false, i32 9, i32 1)
+  %load = call <9 x i32> @llvm.matrix.column.major.load(i32* %in, i64 %stride, i1 false, i32 9, i32 1)
   ret <9 x i32> %load
 }
 
-declare <8 x i32> @llvm.matrix.column.major.load.v8i32(<8 x i32>*, i64, i1, i32, i32)
+declare <8 x i32> @llvm.matrix.column.major.load.v8i32(i32*, i64, i1, i32, i32)
 
-define <8 x i32> @strided_load_4x2(<8 x i32>* %in, i64 %stride) {
+define <8 x i32> @strided_load_4x2(i32* %in, i64 %stride) {
 ; CHECK-LABEL: @strided_load_4x2(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = bitcast <8 x i32>* [[IN:%.*]] to i32*
 ; CHECK-NEXT:    [[VEC_START:%.*]] = mul i64 0, [[STRIDE:%.*]]
-; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* [[TMP0]], i64 [[VEC_START]]
+; CHECK-NEXT:    [[VEC_GEP:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START]]
 ; CHECK-NEXT:    [[VEC_CAST:%.*]] = bitcast i32* [[VEC_GEP]] to <4 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[VEC_CAST]], align 4
 ; CHECK-NEXT:    [[VEC_START1:%.*]] = mul i64 1, [[STRIDE]]
-; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr i32, i32* [[TMP0]], i64 [[VEC_START1]]
+; CHECK-NEXT:    [[VEC_GEP2:%.*]] = getelementptr i32, i32* %in, i64 [[VEC_START1]]
 ; CHECK-NEXT:    [[VEC_CAST3:%.*]] = bitcast i32* [[VEC_GEP2]] to <4 x i32>*
 ; CHECK-NEXT:    [[COL_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[VEC_CAST3]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[COL_LOAD]], <4 x i32> [[COL_LOAD4]], <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 ; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
 ;
 entry:
-  %load = call <8 x i32> @llvm.matrix.column.major.load.v8i32(<8 x i32>* %in, i64 %stride, i1 false, i32 4, i32 2)
+  %load = call <8 x i32> @llvm.matrix.column.major.load.v8i32(i32* %in, i64 %stride, i1 false, i32 4, i32 2)
   ret <8 x i32> %load
 }
diff --git a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll
index f9fbdd388cff7..e90b79627dc6d 100644
--- a/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll
+++ b/llvm/test/Transforms/LowerMatrixIntrinsics/strided-store-double.ll
@@ -13,7 +13,7 @@ define void @strided_store_3x2(<6 x double> %in, double* %out) {
 ; CHECK-NEXT:    store <3 x double> [[SPLIT1]], <3 x double>* [[VEC_CAST2]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.matrix.column.major.store(<6 x double> %in, double* %out, i64 5, i1 false, i32 3, i32 2)
+  call void @llvm.matrix.column.major.store.v6f64(<6 x double> %in, double* %out, i64 5, i1 false, i32 3, i32 2)
   ret void
 }
 
@@ -31,13 +31,10 @@ define void @strided_store_3x2_nonconst_stride(<6 x double> %in, i64 %stride, do
 ; CHECK-NEXT:    store <3 x double> [[SPLIT1]], <3 x double>* [[VEC_CAST4]], align 8
 ; CHECK-NEXT:    ret void
 ;
-  call void @llvm.matrix.column.major.store(<6 x double> %in, double* %out, i64 %stride, i1 false, i32 3, i32 2)
+  call void @llvm.matrix.column.major.store.v6f64(<6 x double> %in, double* %out, i64 %stride, i1 false, i32 3, i32 2)
   ret void
 }
 
-
-declare void @llvm.matrix.column.major.store(<6 x double>, double*, i64, i1, i32, i32)
-
 define void @strided_store_2x3(<10 x double> %in, double* %out) {
 ; CHECK-LABEL: @strided_store_2x3(
 ; CHECK-NEXT:    [[SPLIT:%.*]] = shufflevector <10 x double> [[IN:%.*]], <10 x double> undef, <2 x i32> <i32 0, i32 1>
@@ -65,10 +62,9 @@ define void @strided_store_2x3(<10 x double> %in, double* %out) {
   ret void
 }
 
+declare void @llvm.matrix.column.major.store.v6f64(<6 x double>, double*, i64, i1, i32, i32)
 declare void @llvm.matrix.column.major.store.v10f64(<10 x double>, double*, i64, i1, i32, i32)
 
-; CHECK: declare void @llvm.matrix.column.major.store.v6f64.p0f64(<6 x double>, double* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) [[WRITEONLY:#[0-9]]]
-
-; CHECK: declare void @llvm.matrix.column.major.store.v10f64.p0f64(<10 x double>, double* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) [[WRITEONLY]]
-
-; CHECK: attributes [[WRITEONLY]] = { argmemonly nosync nounwind willreturn writeonly }
+; CHECK: declare void @llvm.matrix.column.major.store.v6f64(<6 x double>, double* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) #0 
+; CHECK: declare void @llvm.matrix.column.major.store.v10f64(<10 x double>, double* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg) #0 
+; CHECK: attributes #0 = { argmemonly nosync nounwind willreturn writeonly }
diff --git a/llvm/test/Verifier/matrix-intrinsics.ll b/llvm/test/Verifier/matrix-intrinsics.ll
index 6b2a4c501c660..4194cfb434f23 100644
--- a/llvm/test/Verifier/matrix-intrinsics.ll
+++ b/llvm/test/Verifier/matrix-intrinsics.ll
@@ -1,11 +1,10 @@
 ; RUN: not llvm-as < %s -o /dev/null 2>&1 | FileCheck %s
 
-declare <4 x float> @llvm.matrix.transpose.v4f32(<4 x float>, i32, i32)
 define <4 x float> @transpose(<4 x float> %m, i32 %arg) {
 ; CHECK: assembly parsed, but does not verify as correct!
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
 ; CHECK-NEXT: immarg operand has non-immediate parameter
 ; CHECK-NEXT: i32 %arg
 ; CHECK-NEXT:   %result.3 = call <4 x float> @llvm.matrix.transpose.v4f32(<4 x float> %result.2, i32 %arg, i32 2)
@@ -20,11 +19,10 @@ define <4 x float> @transpose(<4 x float> %m, i32 %arg) {
   ret <4 x float> %result.4
 }
 
-declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)
 define <4 x float> @multiply(<4 x float> %m, i32 %arg) {
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
 ; CHECK-NEXT: immarg operand has non-immediate parameter
 ; CHECK-NEXT: i32 %arg
 ; CHECK-NEXT:   %result.3 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float> %result.2, <4 x float> %m, i32 %arg, i32 2, i32 1)
@@ -35,32 +33,130 @@ define <4 x float> @multiply(<4 x float> %m, i32 %arg) {
   ret <4 x float> %result.3
 }
 
-declare <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4f32(<4 x float>*, i64, i1, i32, i32)
-declare <6 x float> @llvm.matrix.column.major.load.v6f32.p0v6f32(<6 x float>*, i64, i1, i32, i32)
-define <4 x float> @column.major_load(<4 x float>* %m, <6 x float>* %n, i32 %arg) {
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
+define <4 x float> @column.major_load(float* %m, float* %n, i32 %arg) {
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
 ; CHECK-NEXT: immarg operand has non-immediate parameter
 ; CHECK-NEXT: i32 %arg
-; CHECK-NEXT:   %result.3 = call <6 x float> @llvm.matrix.column.major.load.v6f32.p0v6f32(<6 x float>* %n, i64 2, i1 true, i32 3, i32 %arg)
-  %result.0 = call <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4f32(<4 x float>* %m, i64 0, i1 false, i32 0, i32 0)
-  %result.1 = call <4 x float> @llvm.matrix.column.major.load.v4f32.p0v4f32(<4 x float>* %m, i64 2, i1 false, i32 1, i32 2)
-  %result.2 = call <6 x float> @llvm.matrix.column.major.load.v6f32.p0v6f32(<6 x float>* %n, i64 2, i1 true, i32 3, i32 3)
-  %result.3 = call <6 x float> @llvm.matrix.column.major.load.v6f32.p0v6f32(<6 x float>* %n, i64 2, i1 true, i32 3, i32 %arg)
+; CHECK-NEXT:   %result.3 = call <6 x float> @llvm.matrix.column.major.load.v6f32(float* %n, i64 2, i1 true, i32 3, i32 %arg)
+  %result.0 = call <4 x float> @llvm.matrix.column.major.load.v4f32(float* %m, i64 0, i1 false, i32 0, i32 0)
+  %result.1 = call <4 x float> @llvm.matrix.column.major.load.v4f32(float* %m, i64 2, i1 false, i32 1, i32 2)
+  %result.2 = call <6 x float> @llvm.matrix.column.major.load.v6f32(float* %n, i64 2, i1 true, i32 3, i32 3)
+  %result.3 = call <6 x float> @llvm.matrix.column.major.load.v6f32(float* %n, i64 2, i1 true, i32 3, i32 %arg)
   ret <4 x float> %result.1
 }
 
-declare void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float>, <4 x float>*, i64, i1, i32, i32)
-declare void @llvm.matrix.column.major.store.v6f32.p0v6f32(<6 x float>, <6 x float>*, i64, i1, i32, i32)
-define void @column.major_store(<4 x float>* %m, <6 x float>* %n, i64 %arg) {
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-; CHECK-NEXT: result of a matrix operation does not fit in the returned vector
-  call void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float> zeroinitializer, <4 x float>* %m, i64 0, i1 false, i32 0, i32 0)
-  call void @llvm.matrix.column.major.store.v4f32.p0v4f32(<4 x float> zeroinitializer, <4 x float>* %m, i64 2, i1 false, i32 1, i32 2)
-  call void @llvm.matrix.column.major.store.v6f32.p0v6f32(<6 x float> zeroinitializer, <6 x float>* %n, i64 2, i1 false, i32 3, i32 3)
-  call void @llvm.matrix.column.major.store.v6f32.p0v6f32(<6 x float> zeroinitializer, <6 x float>* %n, i64 %arg, i1 false, i32 3, i32 3)
+define void @column.major_store(float* %m, float* %n, i64 %arg) {
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+; CHECK-NEXT: Result of a matrix operation does not fit in the returned vector!
+  call void @llvm.matrix.column.major.store.v4f32(<4 x float> zeroinitializer, float* %m, i64 0, i1 false, i32 0, i32 0)
+  call void @llvm.matrix.column.major.store.v4f32(<4 x float> zeroinitializer, float* %m, i64 2, i1 false, i32 1, i32 2)
+  call void @llvm.matrix.column.major.store.v6f32(<6 x float> zeroinitializer, float* %n, i64 2, i1 false, i32 3, i32 3)
+  call void @llvm.matrix.column.major.store.v6f32(<6 x float> zeroinitializer, float* %n, i64 %arg, i1 false, i32 3, i32 3)
+  ret void
+}
+
+define <4 x float> @transpose_mixed_types(<4 x float> %fvec, <4 x i32> %ivec, i32 %arg) {
+;
+; CHECK-NEXT: Intrinsic has incorrect argument type!
+; CHECK-NEXT: <4 x float> (<4 x i32>, i32, i32)* @llvm.matrix.transpose.v4f32.v4i32
+; CHECK-NEXT: Intrinsic has incorrect argument type!
+; CHECK-NEXT: <4 x i32> (<4 x float>, i32, i32)* @llvm.matrix.transpose.v4i32.v4f32
+;
+  %result.0 = call <4 x float> @llvm.matrix.transpose.v4f32.v4i32(<4 x i32> %ivec, i32 0, i32 0)
+  %result.1 = call <4 x i32> @llvm.matrix.transpose.v4i32.v4f32(<4 x float> %result.0, i32 3, i32 2)
+  ret <4 x float> %result.0
+}
+
+define <4 x float> @multiply_mixed_types(<4 x i32> %ivec, <4 x float> %fvec, i32 %arg) {
+;
+; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
+; CHECK-NEXT: <4 x i32> (<4 x float>, <4 x float>, i32, i32, i32)* @llvm.matrix.multiply.v4i32.v4f32.v4f32
+; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
+; CHECK-NEXT: <4 x float> (<4 x i32>, <4 x float>, i32, i32, i32)* @llvm.matrix.multiply.v4f32.v4i32.v4f32
+; CHECK-NEXT: Vector element type mismatch of the result and second operand vector!
+; CHECK-NEXT: <4 x float> (<4 x float>, <4 x i32>, i32, i32, i32)* @llvm.matrix.multiply.v4f32.v4f32.v4i32
+; CHECK-NEXT: Vector element type mismatch of the result and first operand vector!
+; CHECK-NEXT: <4 x float> (<4 x i32>, <4 x i32>, i32, i32, i32)* @llvm.matrix.multiply.v4f32.v4i32.v4i32
+;
+  %result.0 = call <4 x i32> @llvm.matrix.multiply.v4i32.v4f32.v4f32(<4 x float> %fvec, <4 x float> %fvec, i32 2, i32 2, i32 2)
+  %result.1 = call <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4f32(<4 x i32> %result.0, <4 x float> %fvec, i32 2, i32 2, i32 2)
+  %result.2 = call <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4i32(<4 x float> %fvec, <4 x i32> %ivec, i32 2, i32 2, i32 2)
+  %result.3 = call <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4i32(<4 x i32> %ivec, <4 x i32> %ivec, i32 2, i32 2, i32 2)
+  ret <4 x float> %result.3
+}
+
+define <4 x float> @column.major_load_mixed_types(i32* %m, float* %n, i32 %arg) {
+;
+; CHECK-NEXT: Intrinsic has incorrect argument type!
+; CHECK-NEXT: <4 x float> (i32*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4f32.pi32
+; CHECK-NEXT: Intrinsic has incorrect argument type!
+; CHECK-NEXT: <4 x i32> (float*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4i32
+;
+  %result.0 = call <4 x float> @llvm.matrix.column.major.load.v4f32.pi32(i32* %m, i64 2, i1 false, i32 2, i32 2)
+  %result.1 = call <4 x i32> @llvm.matrix.column.major.load.v4i32(float* %n, i64 2, i1 false, i32 2, i32 2)
+  ret <4 x float> %result.0
+}
+
+define void @column.major_store_mixed_types(float* %m, i32* %n, i64 %arg) {
+;
+; CHECK-NEXT: Intrinsic has incorrect argument type! 
+; CHECK-NEXT: void (<4 x i32>, float*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4i32.vi32
+; CHECK-NEXT: Intrinsic has incorrect argument type! 
+; CHECK-NEXT: void (<4 x float>, i32*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4f32.pi32
+;
+  call void @llvm.matrix.column.major.store.v4i32.vi32(<4 x i32> zeroinitializer, float* %m, i64 2, i1 false, i32 2, i32 2)
+  call void @llvm.matrix.column.major.store.v4f32.pi32(<4 x float> zeroinitializer, i32* %n, i64 2, i1 false, i32 2, i32 2)
   ret void
 }
+
+define void @column.major_store_non_int_float_type(<4 x float>* %m, <4 x float>* %n, i64 %arg) {
+;
+; CHECK-NEXT: Intrinsic has incorrect argument type!
+; CHECK-NEXT: void (<4 x float*>, <4 x float>*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4f32p0.p0v4f32
+;
+  call void @llvm.matrix.column.major.store.v4f32p0.p0v4f32(<4 x float*> zeroinitializer, <4 x float>* %n, i64 2, i1 false, i32 2, i32 2)
+  ret void
+}
+
+define <4 x float> @column.major_load_stride_too_small(float* %m, i32 %arg) {
+;
+; CHECK-NEXT: Stride must be greater or equal than the number of rows!
+; CHECK-NEXT: <4 x float> (float*, i64, i1, i32, i32)* @llvm.matrix.column.major.load.v4f32
+;
+  %result.1 = call <4 x float> @llvm.matrix.column.major.load.v4f32(float* %m, i64 1, i1 false, i32 2, i32 2)
+  ret <4 x float> %result.1
+}
+
+define void @column.major_store_stride_too_small(float* %m, i64 %arg) {
+;
+; CHECK-NEXT: Stride must be greater or equal than the number of rows!
+; CHECK-NEXT: void (<4 x float>, float*, i64, i1, i32, i32)* @llvm.matrix.column.major.store.v4f32
+;
+  call void @llvm.matrix.column.major.store.v4f32(<4 x float> zeroinitializer, float* %m, i64 1, i1 false, i32 2, i32 2)
+  ret void
+}
+
+declare <4 x i32>   @llvm.matrix.column.major.load.v4i32(float*, i64, i1, i32, i32)
+declare <4 x float> @llvm.matrix.column.major.load.v4f32.pi32(i32*, i64, i1, i32, i32)
+declare <4 x float> @llvm.matrix.column.major.load.v4f32(float*, i64, i1, i32, i32)
+declare <6 x float> @llvm.matrix.column.major.load.v6f32(float*, i64, i1, i32, i32)
+
+declare void @llvm.matrix.column.major.store.v4f32(<4 x float>, float*, i64, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v6f32(<6 x float>, float*, i64, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v4i32.vi32(<4 x i32>, float*, i64, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v4f32.pi32(<4 x float>, i32*, i64, i1, i32, i32)
+declare void @llvm.matrix.column.major.store.v4f32p0.p0v4f32(<4 x float*>, <4 x float>*, i64, i1, i32, i32)
+
+declare <4 x i32>   @llvm.matrix.transpose.v4i32.v4f32(<4 x float>, i32, i32)
+declare <4 x float> @llvm.matrix.transpose.v4f32(<4 x float>, i32, i32)
+declare <4 x float> @llvm.matrix.transpose.v4f32.v4i32(<4 x i32>, i32, i32)
+
+declare <4 x i32>   @llvm.matrix.multiply.v4i32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)
+declare <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4f32(<4 x i32>, <4 x float>, i32, i32, i32)
+declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4i32(<4 x float>, <4 x i32>, i32, i32, i32)
+declare <4 x float> @llvm.matrix.multiply.v4f32.v4i32.v4i32(<4 x i32>, <4 x i32>, i32, i32, i32)
+declare <4 x float> @llvm.matrix.multiply.v4f32.v4f32.v4f32(<4 x float>, <4 x float>, i32, i32, i32)

From 02c3f70b47b81d8055adcf6cacc0456f13fde357 Mon Sep 17 00:00:00 2001
From: AlexisPerry <aperry@lanl.gov>
Date: Mon, 13 Jul 2020 10:15:00 -0600
Subject: [PATCH 286/771] [flang] Change the default F18_FC to gfortran

Summary: Changed default F18_FC from pgf90 to gfortran. Removed unnecessary references to pgf90 in favor of more generic naming.

Reviewers: sscalpone, richard.barton.arm, DavidTruby, jdoerfert, clementval

Reviewed By: sscalpone, richard.barton.arm, clementval

Subscribers: sstefan1, tskeith, klausler, clementval, flang-commits, llvm-commits

Tags: #llvm, #flang

Differential Revision: https://reviews.llvm.org/D83488
---
 flang/tools/f18/f18.cpp | 38 +++++++++++++++++++-------------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/flang/tools/f18/f18.cpp b/flang/tools/f18/f18.cpp
index 5f6070a0fe34b..574a37074e523 100644
--- a/flang/tools/f18/f18.cpp
+++ b/flang/tools/f18/f18.cpp
@@ -101,8 +101,8 @@ struct DriverOptions {
   bool debugNoSemantics{false};
   bool debugModuleWriter{false};
   bool measureTree{false};
-  bool unparseTypedExprsToPGF90{false};
-  std::vector<std::string> pgf90Args;
+  bool unparseTypedExprsToF18_FC{false};
+  std::vector<std::string> F18_FCArgs;
   const char *prefix{nullptr};
   bool getDefinition{false};
   GetDefinitionArgs getDefinitionArgs{0, 0, 0};
@@ -137,8 +137,8 @@ void Exec(std::vector<char *> &argv, bool verbose = false) {
 
 void RunOtherCompiler(DriverOptions &driver, char *source, char *relo) {
   std::vector<char *> argv;
-  for (size_t j{0}; j < driver.pgf90Args.size(); ++j) {
-    argv.push_back(driver.pgf90Args[j].data());
+  for (size_t j{0}; j < driver.F18_FCArgs.size(); ++j) {
+    argv.push_back(driver.F18_FCArgs[j].data());
   }
   char dashC[3] = "-c", dashO[3] = "-o";
   argv.push_back(dashC);
@@ -342,7 +342,7 @@ std::string CompileFortran(std::string path, Fortran::parser::Options options,
         options.features.IsEnabled(
             Fortran::common::LanguageFeature::BackslashEscapes),
         nullptr /* action before each statement */,
-        driver.unparseTypedExprsToPGF90 ? &asFortran : nullptr);
+        driver.unparseTypedExprsToF18_FC ? &asFortran : nullptr);
   }
 
   if (ParentProcess()) {
@@ -371,8 +371,8 @@ std::string CompileOtherLanguage(std::string path, DriverOptions &driver) {
 void Link(std::vector<std::string> &relocatables, DriverOptions &driver) {
   if (!ParentProcess()) {
     std::vector<char *> argv;
-    for (size_t j{0}; j < driver.pgf90Args.size(); ++j) {
-      argv.push_back(driver.pgf90Args[j].data());
+    for (size_t j{0}; j < driver.F18_FCArgs.size(); ++j) {
+      argv.push_back(driver.F18_FCArgs[j].data());
     }
     for (auto &relo : relocatables) {
       argv.push_back(relo.data());
@@ -391,9 +391,9 @@ int main(int argc, char *const argv[]) {
   atexit(CleanUpAtExit);
 
   DriverOptions driver;
-  const char *pgf90{getenv("F18_FC")};
-  driver.pgf90Args.push_back(pgf90 ? pgf90 : "pgf90");
-  bool isPGF90{driver.pgf90Args.back().rfind("pgf90") != std::string::npos};
+  const char *F18_FC{getenv("F18_FC")};
+  driver.F18_FCArgs.push_back(F18_FC ? F18_FC : "gfortran");
+  bool isPGF90{driver.F18_FCArgs.back().rfind("pgf90") != std::string::npos};
 
   std::list<std::string> args{argList(argc, argv)};
   std::string prefix{args.front()};
@@ -423,7 +423,7 @@ int main(int argc, char *const argv[]) {
       anyFiles = true;
       auto dot{arg.rfind(".")};
       if (dot == std::string::npos) {
-        driver.pgf90Args.push_back(arg);
+        driver.F18_FCArgs.push_back(arg);
       } else {
         std::string suffix{arg.substr(dot + 1)};
         if (suffix == "f" || suffix == "F" || suffix == "ff" ||
@@ -516,8 +516,8 @@ int main(int argc, char *const argv[]) {
       driver.dumpUnparse = true;
     } else if (arg == "-funparse-with-symbols") {
       driver.dumpUnparseWithSymbols = true;
-    } else if (arg == "-funparse-typed-exprs-to-pgf90") {
-      driver.unparseTypedExprsToPGF90 = true;
+    } else if (arg == "-funparse-typed-exprs-to-f18-fc") {
+      driver.unparseTypedExprsToF18_FC = true;
     } else if (arg == "-fparse-only") {
       driver.parseOnly = true;
     } else if (arg == "-c") {
@@ -626,11 +626,11 @@ int main(int argc, char *const argv[]) {
       llvm::errs() << "\nf18 compiler (under development)\n";
       return exitStatus;
     } else {
-      driver.pgf90Args.push_back(arg);
+      driver.F18_FCArgs.push_back(arg);
       if (arg == "-v") {
         driver.verbose = true;
       } else if (arg == "-I") {
-        driver.pgf90Args.push_back(args.front());
+        driver.F18_FCArgs.push_back(args.front());
         driver.searchDirectories.push_back(args.front());
         args.pop_front();
       } else if (arg.substr(0, 2) == "-I") {
@@ -645,21 +645,21 @@ int main(int argc, char *const argv[]) {
   if (isPGF90) {
     if (!options.features.IsEnabled(
             Fortran::common::LanguageFeature::BackslashEscapes)) {
-      driver.pgf90Args.push_back(
+      driver.F18_FCArgs.push_back(
           "-Mbackslash"); // yes, this *disables* them in pgf90
     }
     if (options.features.IsEnabled(Fortran::common::LanguageFeature::OpenMP)) {
-      driver.pgf90Args.push_back("-mp");
+      driver.F18_FCArgs.push_back("-mp");
     }
 
     Fortran::parser::useHexadecimalEscapeSequences = false;
   } else {
     if (options.features.IsEnabled(
             Fortran::common::LanguageFeature::BackslashEscapes)) {
-      driver.pgf90Args.push_back("-fbackslash");
+      driver.F18_FCArgs.push_back("-fbackslash");
     }
     if (options.features.IsEnabled(Fortran::common::LanguageFeature::OpenMP)) {
-      driver.pgf90Args.push_back("-fopenmp");
+      driver.F18_FCArgs.push_back("-fopenmp");
     }
 
     Fortran::parser::useHexadecimalEscapeSequences = true;

From 6014c46c80cafa3dd817497c59224adb9727fbb0 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson@google.com>
Date: Tue, 17 Mar 2020 11:08:35 -0700
Subject: [PATCH 287/771] Restore "[WPD/LowerTypeTests] Delay lowering/removal
 of type tests until after ICP"

This restores commit 80d0a137a5aba6998fadb764f1e11cb901aae233, and the
follow on fix in 873c0d0786dcf22f4af39f65df824917f70f2170, with a new
fix for test failures after a 2-stage clang bootstrap, and a more robust
fix for the Chromium build failure that an earlier version partially
fixed. See also discussion on D75201.

Reviewers: evgeny777

Subscribers: mehdi_amini, Prazek, hiraditya, steven_wu, dexonsmith, arphaman, davidxl, cfe-commits, llvm-commits

Tags: #clang, #llvm

Differential Revision: https://reviews.llvm.org/D73242
---
 llvm/include/llvm/IR/ModuleSummaryIndex.h     |   5 +-
 llvm/include/llvm/IR/ModuleSummaryIndexYAML.h |   1 +
 llvm/lib/AsmParser/LLParser.cpp               |   3 +
 llvm/lib/IR/AsmWriter.cpp                     |   2 +
 llvm/lib/Passes/PassBuilder.cpp               |  16 +++
 llvm/lib/Transforms/IPO/LowerTypeTests.cpp    |  21 +++-
 .../lib/Transforms/IPO/PassManagerBuilder.cpp |  12 +-
 .../lib/Transforms/IPO/WholeProgramDevirt.cpp | 105 +++++++++++++-----
 llvm/test/Bitcode/summary_version.ll          |   2 +-
 llvm/test/Other/new-pm-lto-defaults.ll        |   1 +
 llvm/test/Other/new-pm-thinlto-defaults.ll    |   1 +
 .../new-pm-thinlto-postlink-pgo-defaults.ll   |   1 +
 ...-pm-thinlto-postlink-samplepgo-defaults.ll |   1 +
 llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll     |  68 ++++++++++++
 llvm/test/ThinLTO/X86/cfi-unsat.ll            |  82 ++++++++++++++
 .../X86/nodevirt-nonpromoted-typeid.ll        |  19 ++++
 .../test/ThinLTO/X86/type_test_noindircall.ll |  59 ++++++++++
 .../WholeProgramDevirt/branch-funnel.ll       |   6 +-
 .../WholeProgramDevirt/devirt-single-impl2.ll |   2 +-
 .../WholeProgramDevirt/export-single-impl.ll  |   8 +-
 .../export-uniform-ret-val.ll                 |   2 +-
 .../export-unique-ret-val.ll                  |   4 +-
 .../WholeProgramDevirt/export-vcp.ll          |   4 +-
 .../WholeProgramDevirt/import-indir.ll        |   2 +-
 .../WholeProgramDevirt/uniform-retval.ll      |   2 +-
 25 files changed, 376 insertions(+), 53 deletions(-)
 create mode 100644 llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll
 create mode 100644 llvm/test/ThinLTO/X86/cfi-unsat.ll
 create mode 100644 llvm/test/ThinLTO/X86/type_test_noindircall.ll

diff --git a/llvm/include/llvm/IR/ModuleSummaryIndex.h b/llvm/include/llvm/IR/ModuleSummaryIndex.h
index 4cfd4e916200b..12a829b14e36a 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndex.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndex.h
@@ -894,7 +894,8 @@ struct TypeTestResolution {
     Single,    ///< Single element (last example in "Short Inline Bit Vectors")
     AllOnes,   ///< All-ones bit vector ("Eliminating Bit Vector Checks for
                ///  All-Ones Bit Vectors")
-  } TheKind = Unsat;
+    Unknown,   ///< Unknown (analysis not performed, don't lower)
+  } TheKind = Unknown;
 
   /// Range of size-1 expressed as a bit width. For example, if the size is in
   /// range [1,256], this number will be 8. This helps generate the most compact
@@ -1092,7 +1093,7 @@ class ModuleSummaryIndex {
   // in the way some record are interpreted, like flags for instance.
   // Note that incrementing this may require changes in both BitcodeReader.cpp
   // and BitcodeWriter.cpp.
-  static constexpr uint64_t BitcodeSummaryVersion = 8;
+  static constexpr uint64_t BitcodeSummaryVersion = 9;
 
   // Regular LTO module name for ASM writer
   static constexpr const char *getRegularLTOModuleName() {
diff --git a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
index 756388ce54988..f7fa16df11003 100644
--- a/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
+++ b/llvm/include/llvm/IR/ModuleSummaryIndexYAML.h
@@ -17,6 +17,7 @@ namespace yaml {
 
 template <> struct ScalarEnumerationTraits<TypeTestResolution::Kind> {
   static void enumeration(IO &io, TypeTestResolution::Kind &value) {
+    io.enumCase(value, "Unknown", TypeTestResolution::Unknown);
     io.enumCase(value, "Unsat", TypeTestResolution::Unsat);
     io.enumCase(value, "ByteArray", TypeTestResolution::ByteArray);
     io.enumCase(value, "Inline", TypeTestResolution::Inline);
diff --git a/llvm/lib/AsmParser/LLParser.cpp b/llvm/lib/AsmParser/LLParser.cpp
index e3a52c7882a2f..c9f21ee83826a 100644
--- a/llvm/lib/AsmParser/LLParser.cpp
+++ b/llvm/lib/AsmParser/LLParser.cpp
@@ -7799,6 +7799,9 @@ bool LLParser::ParseTypeTestResolution(TypeTestResolution &TTRes) {
     return true;
 
   switch (Lex.getKind()) {
+  case lltok::kw_unknown:
+    TTRes.TheKind = TypeTestResolution::Unknown;
+    break;
   case lltok::kw_unsat:
     TTRes.TheKind = TypeTestResolution::Unsat;
     break;
diff --git a/llvm/lib/IR/AsmWriter.cpp b/llvm/lib/IR/AsmWriter.cpp
index d408d7a4705b4..fd08310316b3a 100644
--- a/llvm/lib/IR/AsmWriter.cpp
+++ b/llvm/lib/IR/AsmWriter.cpp
@@ -2861,6 +2861,8 @@ static const char *getWholeProgDevirtResByArgKindName(
 
 static const char *getTTResKindName(TypeTestResolution::Kind K) {
   switch (K) {
+  case TypeTestResolution::Unknown:
+    return "unknown";
   case TypeTestResolution::Unsat:
     return "unsat";
   case TypeTestResolution::ByteArray:
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 537d300fee557..4db7bebcb77ce 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -971,6 +971,12 @@ ModulePassManager PassBuilder::buildModuleSimplificationPipeline(
   if (AttributorRun & AttributorRunOption::MODULE)
     MPM.addPass(AttributorPass());
 
+  // Lower type metadata and the type.test intrinsic in the ThinLTO
+  // post link pipeline after ICP. This is to enable usage of the type
+  // tests in ICP sequences.
+  if (Phase == ThinLTOPhase::PostLink)
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
+
   // Interprocedural constant propagation now that basic cleanup has occurred
   // and prior to optimizing globals.
   // FIXME: This position in the pipeline hasn't been carefully considered in
@@ -1355,6 +1361,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
     // metadata and intrinsics.
     MPM.addPass(WholeProgramDevirtPass(ExportSummary, nullptr));
     MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+    // Run a second time to clean up any type tests left behind by WPD for use
+    // in ICP.
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
     return MPM;
   }
 
@@ -1421,6 +1430,10 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
     // The LowerTypeTestsPass needs to run to lower type metadata and the
     // type.test intrinsics. The pass does nothing if CFI is disabled.
     MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+    // Run a second time to clean up any type tests left behind by WPD for use
+    // in ICP (which is performed earlier than this in the regular LTO
+    // pipeline).
+    MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
     return MPM;
   }
 
@@ -1548,6 +1561,9 @@ PassBuilder::buildLTODefaultPipeline(OptimizationLevel Level, bool DebugLogging,
   // to be run at link time if CFI is enabled. This pass does nothing if
   // CFI is disabled.
   MPM.addPass(LowerTypeTestsPass(ExportSummary, nullptr));
+  // Run a second time to clean up any type tests left behind by WPD for use
+  // in ICP (which is performed earlier than this in the regular LTO pipeline).
+  MPM.addPass(LowerTypeTestsPass(nullptr, nullptr, true));
 
   // Enable splitting late in the FullLTO post-link pipeline. This is done in
   // the same stage in the old pass manager (\ref addLateLTOOptimizationPasses).
diff --git a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
index fac52cb3d8459..8eef7e3e7e999 100644
--- a/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
+++ b/llvm/lib/Transforms/IPO/LowerTypeTests.cpp
@@ -735,6 +735,9 @@ static bool isKnownTypeIdMember(Metadata *TypeId, const DataLayout &DL,
 /// replace the call with.
 Value *LowerTypeTestsModule::lowerTypeTestCall(Metadata *TypeId, CallInst *CI,
                                                const TypeIdLowering &TIL) {
+  // Delay lowering if the resolution is currently unknown.
+  if (TIL.TheKind == TypeTestResolution::Unknown)
+    return nullptr;
   if (TIL.TheKind == TypeTestResolution::Unsat)
     return ConstantInt::getFalse(M.getContext());
 
@@ -1036,14 +1039,18 @@ void LowerTypeTestsModule::importTypeTest(CallInst *CI) {
     report_fatal_error("Second argument of llvm.type.test must be metadata");
 
   auto TypeIdStr = dyn_cast<MDString>(TypeIdMDVal->getMetadata());
+  // If this is a local unpromoted type, which doesn't have a metadata string,
+  // treat as Unknown and delay lowering, so that we can still utilize it for
+  // later optimizations.
   if (!TypeIdStr)
-    report_fatal_error(
-        "Second argument of llvm.type.test must be a metadata string");
+    return;
 
   TypeIdLowering TIL = importTypeId(TypeIdStr->getString());
   Value *Lowered = lowerTypeTestCall(TypeIdStr, CI, TIL);
-  CI->replaceAllUsesWith(Lowered);
-  CI->eraseFromParent();
+  if (Lowered) {
+    CI->replaceAllUsesWith(Lowered);
+    CI->eraseFromParent();
+  }
 }
 
 // ThinLTO backend: the function F has a jump table entry; update this module
@@ -1166,8 +1173,10 @@ void LowerTypeTestsModule::lowerTypeTestCalls(
     for (CallInst *CI : TIUI.CallSites) {
       ++NumTypeTestCallsLowered;
       Value *Lowered = lowerTypeTestCall(TypeId, CI, TIL);
-      CI->replaceAllUsesWith(Lowered);
-      CI->eraseFromParent();
+      if (Lowered) {
+        CI->replaceAllUsesWith(Lowered);
+        CI->eraseFromParent();
+      }
     }
   }
 }
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index b65eb469a4923..d73d42c52074b 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -515,6 +515,7 @@ void PassManagerBuilder::populateModulePassManager(
       MPM.add(createBarrierNoopPass());
 
     if (PerformThinLTO) {
+      MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
       // Drop available_externally and unreferenced globals. This is necessary
       // with ThinLTO in order to avoid leaving undefined references to dead
       // globals in the object file.
@@ -548,9 +549,11 @@ void PassManagerBuilder::populateModulePassManager(
   // inter-module indirect calls. For that we perform indirect call promotion
   // earlier in the pass pipeline, here before globalopt. Otherwise imported
   // available_externally functions look unreferenced and are removed.
-  if (PerformThinLTO)
+  if (PerformThinLTO) {
     MPM.add(createPGOIndirectCallPromotionLegacyPass(/*InLTO = */ true,
                                                      !PGOSampleUse.empty()));
+    MPM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
+  }
 
   // For SamplePGO in ThinLTO compile phase, we do not want to unroll loops
   // as it will change the CFG too much to make the 2nd profile annotation
@@ -1079,8 +1082,8 @@ void PassManagerBuilder::populateThinLTOPassManager(
     PM.add(createVerifierPass());
 
   if (ImportSummary) {
-    // These passes import type identifier resolutions for whole-program
-    // devirtualization and CFI. They must run early because other passes may
+    // This pass imports type identifier resolutions for whole-program
+    // devirtualization and CFI. It must run early because other passes may
     // disturb the specific instruction patterns that these passes look for,
     // creating dependencies on resolutions that may not appear in the summary.
     //
@@ -1128,6 +1131,9 @@ void PassManagerBuilder::populateLTOPassManager(legacy::PassManagerBase &PM) {
   // control flow integrity mechanisms (-fsanitize=cfi*) and needs to run at
   // link time if CFI is enabled. The pass does nothing if CFI is disabled.
   PM.add(createLowerTypeTestsPass(ExportSummary, nullptr));
+  // Run a second time to clean up any type tests left behind by WPD for use
+  // in ICP (which is performed earlier than this in the regular LTO pipeline).
+  PM.add(createLowerTypeTestsPass(nullptr, nullptr, true));
 
   if (OptLevel != 0)
     addLateLTOOptimizationPasses(PM);
diff --git a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
index 7bea1503ea630..5a25f9857665c 100644
--- a/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
+++ b/llvm/lib/Transforms/IPO/WholeProgramDevirt.cpp
@@ -540,7 +540,9 @@ struct DevirtModule {
 
   bool areRemarksEnabled();
 
-  void scanTypeTestUsers(Function *TypeTestFunc);
+  void
+  scanTypeTestUsers(Function *TypeTestFunc,
+                    DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap);
   void scanTypeCheckedLoadUsers(Function *TypeCheckedLoadFunc);
 
   void buildTypeIdentifierMap(
@@ -1705,7 +1707,9 @@ bool DevirtModule::areRemarksEnabled() {
   return false;
 }
 
-void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) {
+void DevirtModule::scanTypeTestUsers(
+    Function *TypeTestFunc,
+    DenseMap<Metadata *, std::set<TypeMemberInfo>> &TypeIdMap) {
   // Find all virtual calls via a virtual table pointer %p under an assumption
   // of the form llvm.assume(llvm.type.test(%p, %md)). This indicates that %p
   // points to a member of the type identifier %md. Group calls by (type ID,
@@ -1724,22 +1728,59 @@ void DevirtModule::scanTypeTestUsers(Function *TypeTestFunc) {
     auto &DT = LookupDomTree(*CI->getFunction());
     findDevirtualizableCallsForTypeTest(DevirtCalls, Assumes, CI, DT);
 
+    Metadata *TypeId =
+        cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
     // If we found any, add them to CallSlots.
     if (!Assumes.empty()) {
-      Metadata *TypeId =
-          cast<MetadataAsValue>(CI->getArgOperand(1))->getMetadata();
       Value *Ptr = CI->getArgOperand(0)->stripPointerCasts();
       for (DevirtCallSite Call : DevirtCalls)
         CallSlots[{TypeId, Call.Offset}].addCallSite(Ptr, Call.CB, nullptr);
     }
 
-    // We no longer need the assumes or the type test.
-    for (auto Assume : Assumes)
-      Assume->eraseFromParent();
-    // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
-    // may use the vtable argument later.
-    if (CI->use_empty())
-      CI->eraseFromParent();
+    auto RemoveTypeTestAssumes = [&]() {
+      // We no longer need the assumes or the type test.
+      for (auto Assume : Assumes)
+        Assume->eraseFromParent();
+      // We can't use RecursivelyDeleteTriviallyDeadInstructions here because we
+      // may use the vtable argument later.
+      if (CI->use_empty())
+        CI->eraseFromParent();
+    };
+
+    // At this point we could remove all type test assume sequences, as they
+    // were originally inserted for WPD. However, we can keep these in the
+    // code stream for later analysis (e.g. to help drive more efficient ICP
+    // sequences). They will eventually be removed by a second LowerTypeTests
+    // invocation that cleans them up. In order to do this correctly, the first
+    // LowerTypeTests invocation needs to know that they have "Unknown" type
+    // test resolution, so that they aren't treated as Unsat and lowered to
+    // False, which will break any uses on assumes. Below we remove any type
+    // test assumes that will not be treated as Unknown by LTT.
+
+    // The type test assumes will be treated by LTT as Unsat if the type id is
+    // not used on a global (in which case it has no entry in the TypeIdMap).
+    if (!TypeIdMap.count(TypeId))
+      RemoveTypeTestAssumes();
+
+    // For ThinLTO importing, we need to remove the type test assumes if this is
+    // an MDString type id without a corresponding TypeIdSummary. Any
+    // non-MDString type ids are ignored and treated as Unknown by LTT, so their
+    // type test assumes can be kept. If the MDString type id is missing a
+    // TypeIdSummary (e.g. because there was no use on a vcall, preventing the
+    // exporting phase of WPD from analyzing it), then it would be treated as
+    // Unsat by LTT and we need to remove its type test assumes here. If not
+    // used on a vcall we don't need them for later optimization use in any
+    // case.
+    else if (ImportSummary && isa<MDString>(TypeId)) {
+      const TypeIdSummary *TidSummary =
+          ImportSummary->getTypeIdSummary(cast<MDString>(TypeId)->getString());
+      if (!TidSummary)
+        RemoveTypeTestAssumes();
+      else
+        // If one was created it should not be Unsat, because if we reached here
+        // the type id was used on a global.
+        assert(TidSummary->TTRes.TheKind != TypeTestResolution::Unsat);
+    }
   }
 }
 
@@ -1931,8 +1972,13 @@ bool DevirtModule::run() {
       (!TypeCheckedLoadFunc || TypeCheckedLoadFunc->use_empty()))
     return false;
 
+  // Rebuild type metadata into a map for easy lookup.
+  std::vector<VTableBits> Bits;
+  DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
+  buildTypeIdentifierMap(Bits, TypeIdMap);
+
   if (TypeTestFunc && AssumeFunc)
-    scanTypeTestUsers(TypeTestFunc);
+    scanTypeTestUsers(TypeTestFunc, TypeIdMap);
 
   if (TypeCheckedLoadFunc)
     scanTypeCheckedLoadUsers(TypeCheckedLoadFunc);
@@ -1954,10 +2000,6 @@ bool DevirtModule::run() {
     return true;
   }
 
-  // Rebuild type metadata into a map for easy lookup.
-  std::vector<VTableBits> Bits;
-  DenseMap<Metadata *, std::set<TypeMemberInfo>> TypeIdMap;
-  buildTypeIdentifierMap(Bits, TypeIdMap);
   if (TypeIdMap.empty())
     return true;
 
@@ -2014,14 +2056,22 @@ bool DevirtModule::run() {
     // function implementation at offset S.first.ByteOffset, and add to
     // TargetsForSlot.
     std::vector<VirtualCallTarget> TargetsForSlot;
-    if (tryFindVirtualCallTargets(TargetsForSlot, TypeIdMap[S.first.TypeID],
+    WholeProgramDevirtResolution *Res = nullptr;
+    const std::set<TypeMemberInfo> &TypeMemberInfos = TypeIdMap[S.first.TypeID];
+    if (ExportSummary && isa<MDString>(S.first.TypeID) &&
+        TypeMemberInfos.size())
+      // For any type id used on a global's type metadata, create the type id
+      // summary resolution regardless of whether we can devirtualize, so that
+      // lower type tests knows the type id is not Unsat. If it was not used on
+      // a global's type metadata, the TypeIdMap entry set will be empty, and
+      // we don't want to create an entry (with the default Unknown type
+      // resolution), which can prevent detection of the Unsat.
+      Res = &ExportSummary
+                 ->getOrInsertTypeIdSummary(
+                     cast<MDString>(S.first.TypeID)->getString())
+                 .WPDRes[S.first.ByteOffset];
+    if (tryFindVirtualCallTargets(TargetsForSlot, TypeMemberInfos,
                                   S.first.ByteOffset)) {
-      WholeProgramDevirtResolution *Res = nullptr;
-      if (ExportSummary && isa<MDString>(S.first.TypeID))
-        Res = &ExportSummary
-                   ->getOrInsertTypeIdSummary(
-                       cast<MDString>(S.first.TypeID)->getString())
-                   .WPDRes[S.first.ByteOffset];
 
       if (!trySingleImplDevirt(ExportSummary, TargetsForSlot, S.second, Res)) {
         DidVirtualConstProp |=
@@ -2135,11 +2185,14 @@ void DevirtIndex::run() {
     std::vector<ValueInfo> TargetsForSlot;
     auto TidSummary = ExportSummary.getTypeIdCompatibleVtableSummary(S.first.TypeID);
     assert(TidSummary);
+    // Create the type id summary resolution regardlness of whether we can
+    // devirtualize, so that lower type tests knows the type id is used on
+    // a global and not Unsat.
+    WholeProgramDevirtResolution *Res =
+        &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
+             .WPDRes[S.first.ByteOffset];
     if (tryFindVirtualCallTargets(TargetsForSlot, *TidSummary,
                                   S.first.ByteOffset)) {
-      WholeProgramDevirtResolution *Res =
-          &ExportSummary.getOrInsertTypeIdSummary(S.first.TypeID)
-               .WPDRes[S.first.ByteOffset];
 
       if (!trySingleImplDevirt(TargetsForSlot, S.first, S.second, Res,
                                DevirtTargets))
diff --git a/llvm/test/Bitcode/summary_version.ll b/llvm/test/Bitcode/summary_version.ll
index 2a67073713c0b..98feab6fe2f99 100644
--- a/llvm/test/Bitcode/summary_version.ll
+++ b/llvm/test/Bitcode/summary_version.ll
@@ -2,7 +2,7 @@
 ; RUN: opt  -module-summary  %s -o - | llvm-bcanalyzer -dump | FileCheck %s
 
 ; CHECK: <GLOBALVAL_SUMMARY_BLOCK
-; CHECK: <VERSION op0=8/>
+; CHECK: <VERSION op0=9/>
 
 
diff --git a/llvm/test/Other/new-pm-lto-defaults.ll b/llvm/test/Other/new-pm-lto-defaults.ll
index 29263f633a8da..0283bc8d0a107 100644
--- a/llvm/test/Other/new-pm-lto-defaults.ll
+++ b/llvm/test/Other/new-pm-lto-defaults.ll
@@ -99,6 +99,7 @@
 ; CHECK-O2-NEXT: Running analysis: DemandedBitsAnalysis
 ; CHECK-O2-NEXT: Running pass: CrossDSOCFIPass
 ; CHECK-O2-NEXT: Running pass: LowerTypeTestsPass
+; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-O2-NEXT: Running pass: ModuleToFunctionPassAdaptor<{{.*}}SimplifyCFGPass>
 ; CHECK-O2-NEXT: Running pass: EliminateAvailableExternallyPass
 ; CHECK-O2-NEXT: Running pass: GlobalDCEPass
diff --git a/llvm/test/Other/new-pm-thinlto-defaults.ll b/llvm/test/Other/new-pm-thinlto-defaults.ll
index 4a1a96ce64b60..78f9022c741dc 100644
--- a/llvm/test/Other/new-pm-thinlto-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-defaults.ll
@@ -79,6 +79,7 @@
 ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass
 ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass
 ; CHECK-O-NEXT: Finished llvm::Function pass manager run.
+; CHECK-POSTLINK-O-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-O-NEXT: Running pass: IPSCCPPass
 ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
index 2dc90ebb75965..cbb6d8ac081cb 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-pgo-defaults.ll
@@ -48,6 +48,7 @@
 ; CHECK-O-NEXT: Running pass: LowerExpectIntrinsicPass
 ; CHECK-O3-NEXT: Running pass: CallSiteSplittingPass
 ; CHECK-O-NEXT: Finished {{.*}}Function pass manager run.
+; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-O-NEXT: Running pass: IPSCCPPass
 ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
diff --git a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
index f4afe56fd85ad..295a65eb76683 100644
--- a/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
+++ b/llvm/test/Other/new-pm-thinlto-postlink-samplepgo-defaults.ll
@@ -59,6 +59,7 @@
 ; CHECK-O-NEXT: Running analysis: CallGraphAnalysis
 ; CHECK-O-NEXT: Running pass: RequireAnalysisPass<{{.*}}ProfileSummaryAnalysis
 ; CHECK-O-NEXT: Running pass: PGOIndirectCallPromotion
+; CHECK-O-NEXT: Running pass: LowerTypeTestsPass
 ; CHECK-O-NEXT: Running pass: IPSCCPPass
 ; CHECK-O-NEXT: Running pass: CalledValuePropagationPass
 ; CHECK-O-NEXT: Running pass: GlobalOptPass
diff --git a/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll b/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll
new file mode 100644
index 0000000000000..c44acd32f18c6
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/Inputs/cfi-unsat.ll
@@ -0,0 +1,68 @@
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+%struct.B = type { i32 (...)** }
+
+@_ZTV1B = linkonce_odr constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1fEi to i8*), i8* bitcast (i32 (%struct.B*, i32)* @_ZN1B1nEi to i8*)] }, !type !0
+
+$test = comdat any
+$testb = comdat any
+
+define linkonce_odr i32 @test(%struct.A* %obj, i32 %a) comdat {
+entry:
+  %0 = bitcast %struct.A* %obj to i8**
+  %vtable5 = load i8*, i8** %0
+
+  %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A")
+  %2 = extractvalue { i8*, i1 } %1, 1
+  br i1 %2, label %cont, label %trap
+
+trap:
+  tail call void @llvm.trap()
+  unreachable
+
+cont:
+  %3 = extractvalue { i8*, i1 } %1, 0
+  %4 = bitcast i8* %3 to i32 (%struct.A*, i32)*
+
+  %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a)
+
+  ret i32 %call
+}
+
+define linkonce_odr i32 @testb(%struct.A* %obj, i32 %a) comdat {
+entry:
+  %0 = bitcast %struct.A* %obj to i8**
+  %vtable5 = load i8*, i8** %0
+
+  %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 0, metadata !"_ZTS1A")
+  %2 = extractvalue { i8*, i1 } %1, 1
+  br i1 %2, label %cont, label %trap
+
+trap:
+  tail call void @llvm.trap()
+  unreachable
+
+cont:
+  %3 = extractvalue { i8*, i1 } %1, 0
+  %4 = bitcast i8* %3 to i32 (%struct.A*, i32)*
+
+  %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a)
+
+  ret i32 %call
+}
+
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata)
+declare void @llvm.trap()
+
+define internal i32 @_ZN1B1fEi(%struct.B* %this, i32 %a) {
+entry:
+   ret i32 0
+}
+define internal i32 @_ZN1B1nEi(%struct.B* %this, i32 %a) {
+entry:
+   ret i32 0
+}
+
+!0 = !{i64 16, !"_ZTS1B"}
diff --git a/llvm/test/ThinLTO/X86/cfi-unsat.ll b/llvm/test/ThinLTO/X86/cfi-unsat.ll
new file mode 100644
index 0000000000000..c22ba8f7ad2b3
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/cfi-unsat.ll
@@ -0,0 +1,82 @@
+; REQUIRES: x86-registered-target
+
+; Test CFI devirtualization through the thin link and backend when
+; a type id is Unsat (not used on any global's type metadata).
+;
+; In this test case, the first module is split and will import a resolution
+; for its type test. The resolution would be exported by the second
+; module, which is set up so that it does not get split (treated as regular
+; LTO because it does not have any external globals from which to create
+; a unique module ID). We should not actually get any resolution for the
+; type id in this case, since no globals include it in their type metadata,
+; so the resolution is Unsat and the type.checked.load instructions are
+; converted to type tests that evaluate to false.
+
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t.o %s
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit -o %t1.o %p/Inputs/cfi-unsat.ll
+
+; RUN: llvm-lto2 run %t.o %t1.o -save-temps -use-new-pm -pass-remarks=. \
+; RUN:   -whole-program-visibility \
+; RUN:   -o %t3 \
+; RUN:   -r=%t.o,test2,px \
+; RUN:   -r=%t1.o,_ZTV1B,px \
+; RUN:   -r=%t1.o,test,px \
+; RUN:   -r=%t1.o,testb,px
+; RUN: llvm-dis %t3.index.bc -o - | FileCheck %s --check-prefix=INDEX
+; RUN: llvm-dis %t3.0.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR0
+; RUN: llvm-dis %t3.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR1
+
+; INDEX-NOT: "typeid:"
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%struct.A = type { i32 (...)** }
+
+$test2 = comdat any
+
+define linkonce_odr i32 @test2(%struct.A* %obj, i32 %a) comdat {
+entry:
+  %0 = bitcast %struct.A* %obj to i8**
+  %vtable5 = load i8*, i8** %0
+
+  %1 = tail call { i8*, i1 } @llvm.type.checked.load(i8* %vtable5, i32 8, metadata !"_ZTS1A")
+  %2 = extractvalue { i8*, i1 } %1, 1
+  br i1 %2, label %cont, label %trap
+
+trap:
+  tail call void @llvm.trap()
+  unreachable
+
+cont:
+  %3 = extractvalue { i8*, i1 } %1, 0
+  %4 = bitcast i8* %3 to i32 (%struct.A*, i32)*
+
+  %call = tail call i32 %4(%struct.A* nonnull %obj, i32 %a)
+
+  ret i32 %call
+}
+
+; CHECK-IR0: define weak_odr i32 @test
+; CHECK-IR0-NEXT: entry:
+; CHECK-IR0-NEXT: %0 = bitcast
+; CHECK-IR0-NEXT: %vtable5 =
+; CHECK-IR0-NEXT: tail call void @llvm.trap()
+; CHECK-IR0-NEXT: unreachable
+; CHECK-IR0-NEXT: }
+; CHECK-IR0: define weak_odr i32 @testb
+; CHECK-IR0-NEXT: entry:
+; CHECK-IR0-NEXT: %0 = bitcast
+; CHECK-IR0-NEXT: %vtable5 =
+; CHECK-IR0-NEXT: tail call void @llvm.trap()
+; CHECK-IR0-NEXT: unreachable
+; CHECK-IR0-NEXT: }
+
+; CHECK-IR1: define weak_odr i32 @test2
+; CHECK-IR1-NEXT:   entry:
+; CHECK-IR1-NEXT:     tail call void @llvm.trap()
+; CHECK-IR1-NEXT:     unreachable
+; CHECK-IR1-NEXT:   }
+
+declare { i8*, i1 } @llvm.type.checked.load(i8*, i32, metadata)
+declare void @llvm.trap()
diff --git a/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll b/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
index 611a424143ac8..3669db72fa002 100644
--- a/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
+++ b/llvm/test/ThinLTO/X86/nodevirt-nonpromoted-typeid.ll
@@ -33,6 +33,8 @@
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-grtev4-linux-gnu"
 
+@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @g, i8* null }]
+
 %struct.D = type { i32 (...)** }
 
 @_ZTV1D = internal constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* undef, i8* bitcast (i32 (%struct.D*, i32)* @_ZN1D1mEi to i8*)] }, !type !3
@@ -57,6 +59,23 @@ entry:
 ; CHECK-IR-LABEL: ret i32
 ; CHECK-IR-LABEL: }
 
+; Function Attrs: inlinehint nounwind uwtable
+define internal void @_ZN1DC2Ev(%struct.D* %this) unnamed_addr align 2 {
+entry:
+  %this.addr = alloca %struct.D*, align 8
+  store %struct.D* %this, %struct.D** %this.addr, align 8
+  %this1 = load %struct.D*, %struct.D** %this.addr
+  %0 = bitcast %struct.D* %this1 to i32 (...)***
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTV1D, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  ret void
+}
+
+define internal void @g() section ".text.startup" {
+  %d = alloca %struct.D, align 8
+  call void @_ZN1DC2Ev(%struct.D* %d)
+  ret void
+}
+
 declare i1 @llvm.type.test(i8*, metadata)
 declare void @llvm.assume(i1)
 
diff --git a/llvm/test/ThinLTO/X86/type_test_noindircall.ll b/llvm/test/ThinLTO/X86/type_test_noindircall.ll
new file mode 100644
index 0000000000000..3a2badcaea693
--- /dev/null
+++ b/llvm/test/ThinLTO/X86/type_test_noindircall.ll
@@ -0,0 +1,59 @@
+; Test to ensure that we correctly handle a type test not used for a virtual call.
+; If it isn't removed correctly by WPD then we could incorrectly get an Unsat
+; (resulting in an unreachable in the IR).
+
+; REQUIRES: x86-registered-target
+
+; RUN: opt -thinlto-bc -o %t.o %s
+
+; FIXME: Fix machine verifier issues and remove -verify-machineinstrs=0. PR39436.
+; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \
+; RUN:   -whole-program-visibility \
+; RUN:   -verify-machineinstrs=0 \
+; RUN:	 -r=%t.o,_ZTVN12_GLOBAL__N_18RealFileE,px \
+; RUN:   -o %t2
+; RUN: llvm-dis %t2.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+; Try again without LTO unit splitting.
+; RUN: opt -thinlto-bc -thinlto-split-lto-unit=false -o %t3.o %s
+; RUN: llvm-lto2 run %t.o -save-temps -pass-remarks=. \
+; RUN:   -whole-program-visibility \
+; RUN:   -verify-machineinstrs=0 \
+; RUN:	 -r=%t.o,_ZTVN12_GLOBAL__N_18RealFileE,px \
+; RUN:   -o %t4
+; RUN: llvm-dis %t4.1.4.opt.bc -o - | FileCheck %s --check-prefix=CHECK-IR
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-grtev4-linux-gnu"
+
+%"class.llvm::vfs::File" = type { i32 (...)** }
+%"class.llvm::vfs::Status" = type <{ %"class.std::__cxx11::basic_string", %"class.llvm::sys::fs::UniqueID", %"struct.std::chrono::time_point", i32, i32, i64, i32, i32, i8, [7 x i8] }>
+%"class.std::__cxx11::basic_string" = type { %"struct.std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider", i64, %union.anon }
+%"struct.std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Alloc_hider" = type { i8* }
+%union.anon = type { i64, [8 x i8] }
+%"class.llvm::sys::fs::UniqueID" = type { i64, i64 }
+%"struct.std::chrono::time_point" = type { %"struct.std::chrono::duration" }
+%"struct.std::chrono::duration" = type { i64 }
+%"class.(anonymous namespace)::RealFile" = type { %"class.llvm::vfs::File", i32, [4 x i8], %"class.llvm::vfs::Status", %"class.std::__cxx11::basic_string" }
+
+@_ZTVN12_GLOBAL__N_18RealFileE = unnamed_addr constant { [3 x i8*] } { [3 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.(anonymous namespace)::RealFile"*)* @_ZN12_GLOBAL__N_18RealFileD2Ev to i8*)] }, align 8, !type !74
+
+define internal void @_ZN12_GLOBAL__N_18RealFileD2Ev(%"class.(anonymous namespace)::RealFile"* %this) unnamed_addr #0 align 2 {
+entry:
+; CHECK-IR: %0 = getelementptr
+  %0 = getelementptr %"class.(anonymous namespace)::RealFile", %"class.(anonymous namespace)::RealFile"* %this, i64 0, i32 0, i32 0
+; CHECK-IR-NEXT: store
+  store i32 (...)** bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_18RealFileE, i64 0, inrange i32 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
+  %1 = tail call i1 @llvm.type.test(i8* bitcast (i8** getelementptr inbounds ({ [3 x i8*] }, { [3 x i8*] }* @_ZTVN12_GLOBAL__N_18RealFileE, i64 0, inrange i32 0, i64 2) to i8*), metadata !"4$09c6cc733fc6accb91e5d7b87cb48f2d")
+  tail call void @llvm.assume(i1 %1)
+; CHECK-IR-NEXT: ret void
+  ret void
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+; Make sure we don't inline or otherwise optimize out the direct calls.
+attributes #0 = { noinline optnone }
+
+!74 = !{i64 16, !"4$09c6cc733fc6accb91e5d7b87cb48f2d"}
diff --git a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll
index 32d964819fee5..bf7c8547f2710 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/branch-funnel.ll
@@ -10,7 +10,7 @@
 ; SUMMARY:      TypeIdMap:       
 ; SUMMARY-NEXT:   typeid3:
 ; SUMMARY-NEXT:     TTRes:           
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
@@ -23,7 +23,7 @@
 ; SUMMARY-NEXT:         ResByArg:        
 ; SUMMARY-NEXT:   typeid1:
 ; SUMMARY-NEXT:     TTRes:           
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
@@ -36,7 +36,7 @@
 ; SUMMARY-NEXT:         ResByArg:        
 ; SUMMARY-NEXT:   typeid2:
 ; SUMMARY-NEXT:     TTRes:           
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
diff --git a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll
index 63ccfb833d456..7c85114239cf2 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/devirt-single-impl2.ll
@@ -14,7 +14,7 @@
 ; RUN:     -wholeprogramdevirt-summary-action=export -o /dev/null 2>&1 | FileCheck %s --check-prefix=MISSING-MODULE
 
 ; Check single impl devirtulation in summary
-; CHECK: typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: unsat, sizeM1BitWidth: 0), wpdResolutions: ((offset: 0, wpdRes: (kind: singleImpl, singleImplName: "_ZNK1A1fEv"))))) ; guid
+; CHECK: typeid: (name: "_ZTS1A", summary: (typeTestRes: (kind: unknown, sizeM1BitWidth: 0), wpdResolutions: ((offset: 0, wpdRes: (kind: singleImpl, singleImplName: "_ZNK1A1fEv"))))) ; guid
 
 ; MISSING-MODULE: combined summary should contain Regular LTO module
 
diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll b/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll
index 33ff9e1afe50f..861f5f6584898 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/export-single-impl.ll
@@ -4,7 +4,7 @@
 ; SUMMARY:      TypeIdMap:
 ; SUMMARY-NEXT:   typeid3:
 ; SUMMARY-NEXT:     TTRes:
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
@@ -17,7 +17,7 @@
 ; SUMMARY-NEXT:         ResByArg:
 ; SUMMARY-NEXT:   typeid1:
 ; SUMMARY-NEXT:     TTRes:
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
@@ -30,7 +30,7 @@
 ; SUMMARY-NEXT:         ResByArg:
 ; SUMMARY-NEXT:   typeid2:
 ; SUMMARY-NEXT:     TTRes:
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
@@ -43,7 +43,7 @@
 ; SUMMARY-NEXT:         ResByArg:
 ; SUMMARY-NEXT:   typeid4:
 ; SUMMARY-NEXT:     TTRes:
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll b/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll
index cb2fddd75d1d0..634eaa12196eb 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/export-uniform-ret-val.ll
@@ -6,7 +6,7 @@
 ; SUMMARY:      TypeIdMap:
 ; SUMMARY-NEXT:   typeid4:
 ; SUMMARY-NEXT:     TTRes:
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll b/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll
index 0f780a3873687..7b646341ece27 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/export-unique-ret-val.ll
@@ -6,7 +6,7 @@
 ; SUMMARY:      TypeIdMap:
 ; SUMMARY-NEXT:   typeid3:
 ; SUMMARY-NEXT:     TTRes:
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
@@ -24,7 +24,7 @@
 ; SUMMARY-NEXT:             Bit:             0
 ; SUMMARY-NEXT:   typeid4:
 ; SUMMARY-NEXT:     TTRes:
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
diff --git a/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll b/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll
index eb7b36e87dd62..e33abd259625a 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/export-vcp.ll
@@ -9,7 +9,7 @@ target datalayout = "e-p:64:64"
 ; SUMMARY:      TypeIdMap:
 ; SUMMARY-NEXT:   typeid3:
 ; SUMMARY-NEXT:     TTRes:
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
@@ -29,7 +29,7 @@ target datalayout = "e-p:64:64"
 ; SUMMARY-ARM-NEXT:         Bit:             1
 ; SUMMARY-NEXT:   typeid4:
 ; SUMMARY-NEXT:     TTRes:
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
diff --git a/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll b/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll
index 5c2be7d862963..19ee68be955a0 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/import-indir.ll
@@ -32,7 +32,7 @@
 ; SUMMARY-NEXT: TypeIdMap:
 ; SUMMARY-NEXT:   typeid1:
 ; SUMMARY-NEXT:     TTRes:
-; SUMMARY-NEXT:       Kind:            Unsat
+; SUMMARY-NEXT:       Kind:            Unknown
 ; SUMMARY-NEXT:       SizeM1BitWidth:  0
 ; SUMMARY-NEXT:       AlignLog2:       0
 ; SUMMARY-NEXT:       SizeM1:          0
diff --git a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll
index 7626aba24c1ab..16f9ef822d6f3 100644
--- a/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll
+++ b/llvm/test/Transforms/WholeProgramDevirt/uniform-retval.ll
@@ -25,7 +25,7 @@ define i32 @call(i8* %obj) {
   %fptr = load i8*, i8** %fptrptr
   %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
   %result = call i32 %fptr_casted(i8* %obj)
-  ; CHECK-NOT: call
+  ; CHECK-NOT: call i32 %
   ; CHECK: ret i32 123
   ret i32 %result
 }

From 66b409582a1d349a3ce5480237aeab92dd5ebde1 Mon Sep 17 00:00:00 2001
From: Jan Sjodin <jan_sjodin@yahoo.com>
Date: Mon, 22 Jun 2020 13:04:12 -0400
Subject: [PATCH 288/771] llvm-link: Add support for archive files as inputs

This patch adds support for archive files as inputs to llvm-link. One
of the use-cases is for OpenMP, where device specific libraries need
to be extracted from libraries containing bundled object files. The
clang-offload-bundler will support extracting these archives, which
will be passed into llvm-link, see https://reviews.llvm.org/D80816.

Reviewed By: jdoerfert

Differential Revision: https://reviews.llvm.org/D81109
---
 llvm/test/tools/llvm-link/Inputs/f.ll    |  6 ++
 llvm/test/tools/llvm-link/Inputs/g.ll    |  6 ++
 llvm/test/tools/llvm-link/Inputs/h.ll    |  6 ++
 llvm/test/tools/llvm-link/archive-bad.ll |  7 +++
 llvm/test/tools/llvm-link/archive.ll     | 17 ++++++
 llvm/test/tools/llvm-link/archivell.ll   |  7 +++
 llvm/tools/llvm-link/llvm-link.cpp       | 73 +++++++++++++++++++++++-
 7 files changed, 121 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/tools/llvm-link/Inputs/f.ll
 create mode 100644 llvm/test/tools/llvm-link/Inputs/g.ll
 create mode 100644 llvm/test/tools/llvm-link/Inputs/h.ll
 create mode 100644 llvm/test/tools/llvm-link/archive-bad.ll
 create mode 100644 llvm/test/tools/llvm-link/archive.ll
 create mode 100644 llvm/test/tools/llvm-link/archivell.ll

diff --git a/llvm/test/tools/llvm-link/Inputs/f.ll b/llvm/test/tools/llvm-link/Inputs/f.ll
new file mode 100644
index 0000000000000..a7cdacea82fb3
--- /dev/null
+++ b/llvm/test/tools/llvm-link/Inputs/f.ll
@@ -0,0 +1,6 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f() {
+entry:
+  ret void
+}
diff --git a/llvm/test/tools/llvm-link/Inputs/g.ll b/llvm/test/tools/llvm-link/Inputs/g.ll
new file mode 100644
index 0000000000000..b81de922b4dae
--- /dev/null
+++ b/llvm/test/tools/llvm-link/Inputs/g.ll
@@ -0,0 +1,6 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @g() {
+entry:
+  ret void
+}
diff --git a/llvm/test/tools/llvm-link/Inputs/h.ll b/llvm/test/tools/llvm-link/Inputs/h.ll
new file mode 100644
index 0000000000000..c2bda1712a403
--- /dev/null
+++ b/llvm/test/tools/llvm-link/Inputs/h.ll
@@ -0,0 +1,6 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @h() {
+entry:
+  ret void
+}
diff --git a/llvm/test/tools/llvm-link/archive-bad.ll b/llvm/test/tools/llvm-link/archive-bad.ll
new file mode 100644
index 0000000000000..80ce6fc1fe0d7
--- /dev/null
+++ b/llvm/test/tools/llvm-link/archive-bad.ll
@@ -0,0 +1,7 @@
+# RUN: cp %S/Inputs/f.ll %t.fg.a
+# RUN: not llvm-link %S/Inputs/h.ll %t.fg.a -o %t.linked.bc 2>&1 | FileCheck %s
+
+# RUN: rm -f %t.fg.a
+# RUN: rm -f %t.linked.bc
+
+# CHECK: file too small to be an archive
diff --git a/llvm/test/tools/llvm-link/archive.ll b/llvm/test/tools/llvm-link/archive.ll
new file mode 100644
index 0000000000000..10ab83a3d5be4
--- /dev/null
+++ b/llvm/test/tools/llvm-link/archive.ll
@@ -0,0 +1,17 @@
+# RUN: llvm-as %S/Inputs/f.ll -o %t.f.bc
+# RUN: llvm-as %S/Inputs/g.ll -o %t.g.bc
+# RUN: llvm-ar cr %t.fg.a %t.f.bc %t.g.bc
+# RUN: llvm-ar cr %t.empty.a
+# RUN: llvm-link %S/Inputs/h.ll %t.fg.a %t.empty.a -o %t.linked.bc
+
+# RUN: llvm-nm %t.linked.bc | FileCheck %s
+
+# RUN: rm -f %t.f.bc
+# RUN: rm -f %t.g.bc
+# RUN: rm -f %t.fg.a
+# RUN: rm -f %t.empty.a
+# RUN: rm -f %t.linked.bc
+
+# CHECK: -------- T f
+# CHECK: -------- T g
+# CHECK: -------- T h
diff --git a/llvm/test/tools/llvm-link/archivell.ll b/llvm/test/tools/llvm-link/archivell.ll
new file mode 100644
index 0000000000000..7474df14e9072
--- /dev/null
+++ b/llvm/test/tools/llvm-link/archivell.ll
@@ -0,0 +1,7 @@
+# RUN: llvm-ar cr %t.fg.a %S/Inputs/f.ll llvm-as %S/Inputs/g.ll
+# RUN: not llvm-link %S/Inputs/h.ll %t.fg.a -o %t.linked.bc 2>&1 | FileCheck %s
+
+# RUN: rm -f %t.fg.a
+# RUN: rm -f %t.linked.bc
+
+# CHECK: error: member of archive is not a bitcode file
diff --git a/llvm/tools/llvm-link/llvm-link.cpp b/llvm/tools/llvm-link/llvm-link.cpp
index a7cda24bbe0a9..7141bd1ca7a15 100644
--- a/llvm/tools/llvm-link/llvm-link.cpp
+++ b/llvm/tools/llvm-link/llvm-link.cpp
@@ -11,6 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Object/Archive.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/Bitcode/BitcodeReader.h"
 #include "llvm/Bitcode/BitcodeWriter.h"
@@ -139,6 +140,73 @@ static std::unique_ptr<Module> loadFile(const char *argv0,
   return Result;
 }
 
+static std::unique_ptr<Module> loadArFile(const char *Argv0,
+                                          const std::string &ArchiveName,
+                                          LLVMContext &Context, Linker &L,
+                                          unsigned OrigFlags,
+                                          unsigned ApplicableFlags) {
+  std::unique_ptr<Module> Result(new Module("ArchiveModule", Context));
+  if (Verbose)
+    errs() << "Reading library archive file '" << ArchiveName
+           << "' to memory\n";
+  ErrorOr<std::unique_ptr<MemoryBuffer>> Buf =
+    MemoryBuffer::getFile(ArchiveName, -1, false);
+  ExitOnErr(errorCodeToError(Buf.getError()));
+  Error Err = Error::success();
+  object::Archive Archive(Buf.get()->getMemBufferRef(), Err);
+  ExitOnErr(std::move(Err));
+  for (const object::Archive::Child &C : Archive.children(Err)) {
+    Expected<StringRef> Ename = C.getName();
+    if (Error E = Ename.takeError()) {
+      errs() << Argv0 << ": ";
+      WithColor::error()
+          << " failed to read name of archive member"
+          << ArchiveName << "'\n";
+      return nullptr;
+    };
+    std::string ChildName = Ename.get().str();
+    if (Verbose)
+      errs() << "Parsing member '" << ChildName
+             << "' of archive library to module.\n";
+    SMDiagnostic ParseErr;
+    Expected<MemoryBufferRef> MemBuf = C.getMemoryBufferRef();
+    if (Error E = MemBuf.takeError()) {
+      errs() << Argv0 << ": ";
+      WithColor::error() << " loading memory for member '" << ChildName
+                         << "' of archive library failed'" << ArchiveName
+                         << "'\n";
+      return nullptr;
+    };
+
+    if (!isBitcode(reinterpret_cast<const unsigned char *>
+                   (MemBuf.get().getBufferStart()),
+                   reinterpret_cast<const unsigned char *>
+                   (MemBuf.get().getBufferEnd()))) {
+      errs() << Argv0 << ": ";
+      WithColor::error() << "  member of archive is not a bitcode file: '"
+                         << ChildName << "'\n";
+      return nullptr;
+    }
+
+    std::unique_ptr<Module> M = parseIR(MemBuf.get(), ParseErr, Context);
+
+    if (!M.get()) {
+      errs() << Argv0 << ": ";
+      WithColor::error() << " parsing member '" << ChildName
+                         << "' of archive library failed'" << ArchiveName
+                         << "'\n";
+      return nullptr;
+    }
+    if (Verbose)
+      errs() << "Linking member '" << ChildName << "' of archive library.\n";
+    if (L.linkModules(*Result, std::move(M), ApplicableFlags))
+      return nullptr;
+    ApplicableFlags = OrigFlags;
+  } // end for each child
+  ExitOnErr(std::move(Err));
+  return Result;
+}
+
 namespace {
 
 /// Helper to load on demand a Module from file and cache it for subsequent
@@ -281,7 +349,10 @@ static bool linkFiles(const char *argv0, LLVMContext &Context, Linker &L,
   // Similar to some flags, internalization doesn't apply to the first file.
   bool InternalizeLinkedSymbols = false;
   for (const auto &File : Files) {
-    std::unique_ptr<Module> M = loadFile(argv0, File, Context);
+    std::unique_ptr<Module> M =
+      (llvm::sys::path::extension(File) == ".a")
+          ? loadArFile(argv0, File, Context, L, Flags, ApplicableFlags)
+          : loadFile(argv0, File, Context);
     if (!M.get()) {
       errs() << argv0 << ": ";
       WithColor::error() << " loading file '" << File << "'\n";

From 368eb7712f9f19f93f3e318d8b16e732436fb9c4 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Tue, 14 Jul 2020 12:33:50 -0700
Subject: [PATCH 289/771] Fix a -Wunused-variable warning.

---
 lldb/unittests/TestingSupport/TestUtilities.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lldb/unittests/TestingSupport/TestUtilities.cpp b/lldb/unittests/TestingSupport/TestUtilities.cpp
index 28120505acbad..4d369bd0968a1 100644
--- a/lldb/unittests/TestingSupport/TestUtilities.cpp
+++ b/lldb/unittests/TestingSupport/TestUtilities.cpp
@@ -27,8 +27,7 @@ std::string lldb_private::GetInputFilePath(const llvm::Twine &name) {
 }
 
 llvm::Expected<TestFile> TestFile::fromYaml(llvm::StringRef Yaml) {
-  const auto *Info = testing::UnitTest::GetInstance()->current_test_info();
-  assert(Info);
+  assert(testing::UnitTest::GetInstance()->current_test_info());
 
   std::string Buffer;
   llvm::raw_string_ostream OS(Buffer);

From 00d97b758e8d3286abb7bd07899e4d3aed6e8165 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 14 Jul 2020 19:04:55 +0200
Subject: [PATCH 290/771] [clangd] Config: on by default

Summary: (Possible to flip back on the branch if this breaks things)

Reviewers: kadircet

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83790
---
 clang-tools-extra/clangd/tool/ClangdMain.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/tool/ClangdMain.cpp b/clang-tools-extra/clangd/tool/ClangdMain.cpp
index 7804955308560..7bce1c062e817 100644
--- a/clang-tools-extra/clangd/tool/ClangdMain.cpp
+++ b/clang-tools-extra/clangd/tool/ClangdMain.cpp
@@ -446,7 +446,7 @@ opt<bool> EnableConfig{
         "\tMac OS: ~/Library/Preferences/\n"
         "\tOthers: $XDG_CONFIG_HOME, usually ~/.config\n"
         "Configuration is documented at https://clangd.llvm.org/config.html"),
-    init(false),
+    init(true),
 };
 
 /// Supports a test URI scheme with relaxed constraints for lit tests.

From 16f777f4217cfcdcf6ddce8eb1e3525a65563c43 Mon Sep 17 00:00:00 2001
From: Tyker <tyker1@outlook.com>
Date: Tue, 14 Jul 2020 21:41:45 +0200
Subject: [PATCH 291/771] [NFC] Add debug and stat counters to assume queries
 and assume builder

Summary:
Add debug counter and stats counter to assume queries and assume builder
here is the collected stats on a build of check-llvm + check-clang.
  "assume-builder.NumAssumeBuilt": 2720879,
  "assume-builder.NumAssumesMerged": 761396,
  "assume-builder.NumAssumesRemoved": 1576212,
  "assume-builder.NumBundlesInAssumes": 6518809,
  "assume-queries.NumAssumeQueries": 85566380,
  "assume-queries.NumUsefullAssumeQueries": 2727360,
the NumUsefullAssumeQueries stat is actually pessimistic because in a few places queries
ask to keep providing information to try to get better information. and this isn't counted
as a usefull query evem tho it can be usefull

Reviewers: jdoerfert

Reviewed By: jdoerfert

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83506
---
 llvm/lib/Analysis/AssumeBundleQueries.cpp     |  25 +++-
 .../Transforms/Utils/AssumeBundleBuilder.cpp  |  22 ++++
 .../ValueTracking/assume-queries-counter.ll   | 110 ++++++++++++++++++
 .../Transforms/Util/assume-builder-counter.ll |  79 +++++++++++++
 4 files changed, 233 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Analysis/ValueTracking/assume-queries-counter.ll
 create mode 100644 llvm/test/Transforms/Util/assume-builder-counter.ll

diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index af81216f65264..05fe05a0bd851 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -6,17 +6,29 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "assume-queries"
+
 #include "llvm/Analysis/AssumeBundleQueries.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Support/DebugCounter.h"
 
 using namespace llvm;
 using namespace llvm::PatternMatch;
 
+STATISTIC(NumAssumeQueries, "Number of Queries into an assume assume bundles");
+STATISTIC(
+    NumUsefullAssumeQueries,
+    "Number of Queries into an assume assume bundles that were satisfied");
+
+DEBUG_COUNTER(AssumeQueryCounter, "assume-queries-counter",
+              "Controls which assumes gets created");
+
 static bool bundleHasArgument(const CallBase::BundleOpInfo &BOI, unsigned Idx) {
   return BOI.End - BOI.Begin > Idx;
 }
@@ -158,6 +170,9 @@ llvm::getKnowledgeForValue(const Value *V,
                            function_ref<bool(RetainedKnowledge, Instruction *,
                                              const CallBase::BundleOpInfo *)>
                                Filter) {
+  NumAssumeQueries++;
+  if (!DebugCounter::shouldExecute(AssumeQueryCounter))
+    return RetainedKnowledge::none();
   if (AC) {
     for (AssumptionCache::ResultElem &Elem : AC->assumptionsFor(V)) {
       IntrinsicInst *II = cast_or_null<IntrinsicInst>(Elem.Assume);
@@ -166,20 +181,24 @@ llvm::getKnowledgeForValue(const Value *V,
       if (RetainedKnowledge RK = getKnowledgeFromBundle(
               *II, II->bundle_op_info_begin()[Elem.Index]))
         if (is_contained(AttrKinds, RK.AttrKind) &&
-            Filter(RK, II, &II->bundle_op_info_begin()[Elem.Index]))
+            Filter(RK, II, &II->bundle_op_info_begin()[Elem.Index])) {
+          NumUsefullAssumeQueries++;
           return RK;
+        }
     }
     return RetainedKnowledge::none();
   }
-  for (auto &U : V->uses()) {
+  for (const auto &U : V->uses()) {
     CallInst::BundleOpInfo* Bundle = getBundleFromUse(&U);
     if (!Bundle)
       continue;
     if (RetainedKnowledge RK =
             getKnowledgeFromBundle(*cast<CallInst>(U.getUser()), *Bundle))
       if (is_contained(AttrKinds, RK.AttrKind) &&
-          Filter(RK, cast<Instruction>(U.getUser()), Bundle))
+          Filter(RK, cast<Instruction>(U.getUser()), Bundle)) {
+        NumUsefullAssumeQueries++;
         return RK;
+      }
   }
   return RetainedKnowledge::none();
 }
diff --git a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
index f2208edd5b196..7ff73fcdada79 100644
--- a/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
+++ b/llvm/lib/Transforms/Utils/AssumeBundleBuilder.cpp
@@ -6,9 +6,12 @@
 //
 //===----------------------------------------------------------------------===//
 
+#define DEBUG_TYPE "assume-builder"
+
 #include "llvm/Transforms/Utils/AssumeBundleBuilder.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -19,6 +22,7 @@
 #include "llvm/IR/Module.h"
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/DebugCounter.h"
 #include "llvm/Transforms/Utils/Local.h"
 
 using namespace llvm;
@@ -33,6 +37,16 @@ cl::opt<bool> EnableKnowledgeRetention(
     cl::desc(
         "enable preservation of attributes throughout code transformation"));
 
+STATISTIC(NumAssumeBuilt, "Number of assume built by the assume builder");
+STATISTIC(NumBundlesInAssumes, "Total number of Bundles in the assume built");
+STATISTIC(NumAssumesMerged,
+          "Number of assume merged by the assume simplify pass");
+STATISTIC(NumAssumesRemoved,
+          "Number of assume removed by the assume simplify pass");
+
+DEBUG_COUNTER(BuildAssumeCounter, "assume-builder-counter",
+              "Controls which assumes gets created");
+
 namespace {
 
 bool isUsefullToPreserve(Attribute::AttrKind Kind) {
@@ -204,6 +218,8 @@ struct AssumeBuilderState {
   IntrinsicInst *build() {
     if (AssumedKnowledgeMap.empty())
       return nullptr;
+    if (!DebugCounter::shouldExecute(BuildAssumeCounter))
+      return nullptr;
     Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
     LLVMContext &C = M->getContext();
     SmallVector<OperandBundleDef, 8> OpBundle;
@@ -220,7 +236,9 @@ struct AssumeBuilderState {
       OpBundle.push_back(OperandBundleDefT<Value *>(
           std::string(Attribute::getNameFromAttrKind(MapElem.first.second)),
           Args));
+      NumBundlesInAssumes++;
     }
+    NumAssumeBuilt++;
     return cast<IntrinsicInst>(CallInst::Create(
         FnAssume, ArrayRef<Value *>({ConstantInt::getTrue(C)}), OpBundle));
   }
@@ -328,6 +346,10 @@ struct AssumeSimplify {
           (!ForceCleanup && !isAssumeWithEmptyBundle(*Assume)))
         continue;
       MadeChange = true;
+      if (ForceCleanup)
+        NumAssumesMerged++;
+      else
+        NumAssumesRemoved++;
       Assume->eraseFromParent();
     }
     CleanupToDo.clear();
diff --git a/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll b/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll
new file mode 100644
index 0000000000000..476b0ef04d34d
--- /dev/null
+++ b/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll
@@ -0,0 +1,110 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=0,assume-queries-counter-count=1 -S | FileCheck %s --check-prefixes=SAME,COUNTER1
+; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=1,assume-queries-counter-count=2 -S | FileCheck %s --check-prefixes=SAME,COUNTER2
+; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=2,assume-queries-counter-count=1 -S | FileCheck %s --check-prefixes=SAME,COUNTER3
+
+declare i1 @get_val()
+declare void @llvm.assume(i1)
+
+define dso_local i1 @test1(i32* readonly %0) {
+; COUNTER1-LABEL: @test1(
+; COUNTER1-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ]
+; COUNTER1-NEXT:    ret i1 false
+;
+; COUNTER2-LABEL: @test1(
+; COUNTER2-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ]
+; COUNTER2-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0]], null
+; COUNTER2-NEXT:    ret i1 [[TMP2]]
+;
+; COUNTER3-LABEL: @test1(
+; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ]
+; COUNTER3-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0]], null
+; COUNTER3-NEXT:    ret i1 [[TMP2]]
+;
+  call void @llvm.assume(i1 true) ["nonnull"(i32* %0)]
+  %2 = icmp eq i32* %0, null
+  ret i1 %2
+}
+
+define dso_local i1 @test2(i32* readonly %0) {
+; COUNTER1-LABEL: @test2(
+; COUNTER1-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0:%.*]], null
+; COUNTER1-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0]]) ]
+; COUNTER1-NEXT:    ret i1 [[TMP2]]
+;
+; COUNTER2-LABEL: @test2(
+; COUNTER2-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0:%.*]]) ]
+; COUNTER2-NEXT:    ret i1 false
+;
+; COUNTER3-LABEL: @test2(
+; COUNTER3-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0:%.*]], null
+; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[TMP0]]) ]
+; COUNTER3-NEXT:    ret i1 [[TMP2]]
+;
+  %2 = icmp eq i32* %0, null
+  call void @llvm.assume(i1 true) ["nonnull"(i32* %0)]
+  ret i1 %2
+}
+
+define dso_local i32 @test4(i32* readonly %0, i1 %cond) {
+; COUNTER1-LABEL: @test4(
+; COUNTER1-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ]
+; COUNTER1-NEXT:    br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
+; COUNTER1:       B:
+; COUNTER1-NEXT:    br label [[A]]
+; COUNTER1:       A:
+; COUNTER1-NEXT:    [[TMP2:%.*]] = icmp eq i32* [[TMP0]], null
+; COUNTER1-NEXT:    br i1 [[TMP2]], label [[TMP5:%.*]], label [[TMP3:%.*]]
+; COUNTER1:       3:
+; COUNTER1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP0]], align 4
+; COUNTER1-NEXT:    br label [[TMP5]]
+; COUNTER1:       5:
+; COUNTER1-NEXT:    [[TMP6:%.*]] = phi i32 [ [[TMP4]], [[TMP3]] ], [ 0, [[A]] ]
+; COUNTER1-NEXT:    ret i32 [[TMP6]]
+;
+; COUNTER2-LABEL: @test4(
+; COUNTER2-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ]
+; COUNTER2-NEXT:    br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
+; COUNTER2:       B:
+; COUNTER2-NEXT:    br label [[A]]
+; COUNTER2:       A:
+; COUNTER2-NEXT:    br i1 false, label [[TMP4:%.*]], label [[TMP2:%.*]]
+; COUNTER2:       2:
+; COUNTER2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+; COUNTER2-NEXT:    br label [[TMP4]]
+; COUNTER2:       4:
+; COUNTER2-NEXT:    [[TMP5:%.*]] = phi i32 [ [[TMP3]], [[TMP2]] ], [ 0, [[A]] ]
+; COUNTER2-NEXT:    ret i32 [[TMP5]]
+;
+; COUNTER3-LABEL: @test4(
+; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[TMP0:%.*]], i32 4) ]
+; COUNTER3-NEXT:    br i1 [[COND:%.*]], label [[A:%.*]], label [[B:%.*]]
+; COUNTER3:       B:
+; COUNTER3-NEXT:    br label [[A]]
+; COUNTER3:       A:
+; COUNTER3-NEXT:    br i1 false, label [[TMP4:%.*]], label [[TMP2:%.*]]
+; COUNTER3:       2:
+; COUNTER3-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP0]], align 4
+; COUNTER3-NEXT:    br label [[TMP4]]
+; COUNTER3:       4:
+; COUNTER3-NEXT:    [[TMP5:%.*]] = phi i32 [ [[TMP3]], [[TMP2]] ], [ 0, [[A]] ]
+; COUNTER3-NEXT:    ret i32 [[TMP5]]
+;
+  call void @llvm.assume(i1 true) ["dereferenceable"(i32* %0, i32 4)]
+  br i1 %cond, label %A, label %B
+
+B:
+  br label %A
+
+A:
+  %2 = icmp eq i32* %0, null
+  br i1 %2, label %5, label %3
+
+3:                                                ; preds = %1
+  %4 = load i32, i32* %0, align 4
+  br label %5
+
+5:                                                ; preds = %1, %3
+  %6 = phi i32 [ %4, %3 ], [ 0, %A ]
+  ret i32 %6
+}
diff --git a/llvm/test/Transforms/Util/assume-builder-counter.ll b/llvm/test/Transforms/Util/assume-builder-counter.ll
new file mode 100644
index 0000000000000..380b88314032f
--- /dev/null
+++ b/llvm/test/Transforms/Util/assume-builder-counter.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
+; RUN: opt -passes='assume-builder,verify' --enable-knowledge-retention --debug-counter=assume-builder-counter-skip=5,assume-builder-counter-count=1 -S %s | FileCheck %s --check-prefixes=COUNTER1
+; RUN: opt -passes='assume-builder,verify' --enable-knowledge-retention --debug-counter=assume-builder-counter-skip=1,assume-builder-counter-count=3 -S %s | FileCheck %s --check-prefixes=COUNTER2
+; RUN: opt -passes='assume-builder,verify' --enable-knowledge-retention --debug-counter=assume-builder-counter-skip=2,assume-builder-counter-count=200 -S %s | FileCheck %s --check-prefixes=COUNTER3
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @func(i32*, i32*)
+declare void @func_cold(i32*) cold willreturn nounwind
+declare void @func_strbool(i32*) "no-jump-tables"
+declare void @func_many(i32*) "no-jump-tables" nounwind "less-precise-fpmad" willreturn norecurse
+declare void @func_argattr(i32* align 8, i32* nonnull) nounwind
+declare void @may_throw()
+
+define void @test(i32* %P, i32* %P1, i32* %P2, i32* %P3) {
+; COUNTER1-LABEL: define {{[^@]+}}@test
+; COUNTER1-SAME: (i32* [[P:%.*]], i32* [[P1:%.*]], i32* [[P2:%.*]], i32* [[P3:%.*]])
+; COUNTER1-NEXT:    call void @func(i32* nonnull dereferenceable(16) [[P]], i32* null)
+; COUNTER1-NEXT:    call void @func(i32* dereferenceable(12) [[P1]], i32* nonnull [[P]])
+; COUNTER1-NEXT:    call void @func_cold(i32* dereferenceable(12) [[P1]]) #5
+; COUNTER1-NEXT:    call void @func_cold(i32* dereferenceable(12) [[P1]])
+; COUNTER1-NEXT:    call void @func(i32* [[P1]], i32* [[P]])
+; COUNTER1-NEXT:    call void @func_strbool(i32* [[P1]])
+; COUNTER1-NEXT:    call void @func(i32* dereferenceable(32) [[P]], i32* dereferenceable(8) [[P]])
+; COUNTER1-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[P1]], i64 8) ]
+; COUNTER1-NEXT:    call void @func_many(i32* align 8 [[P1]])
+; COUNTER1-NEXT:    call void @func_argattr(i32* [[P2]], i32* [[P3]])
+; COUNTER1-NEXT:    call void @func(i32* nonnull [[P1]], i32* nonnull [[P]])
+; COUNTER1-NEXT:    ret void
+;
+; COUNTER2-LABEL: define {{[^@]+}}@test
+; COUNTER2-SAME: (i32* [[P:%.*]], i32* [[P1:%.*]], i32* [[P2:%.*]], i32* [[P3:%.*]])
+; COUNTER2-NEXT:    call void @func(i32* nonnull dereferenceable(16) [[P]], i32* null)
+; COUNTER2-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P1]], i64 12), "nonnull"(i32* [[P]]) ]
+; COUNTER2-NEXT:    call void @func(i32* dereferenceable(12) [[P1]], i32* nonnull [[P]])
+; COUNTER2-NEXT:    call void @llvm.assume(i1 true) [ "cold"() ]
+; COUNTER2-NEXT:    call void @func_cold(i32* dereferenceable(12) [[P1]]) #5
+; COUNTER2-NEXT:    call void @llvm.assume(i1 true) [ "cold"() ]
+; COUNTER2-NEXT:    call void @func_cold(i32* dereferenceable(12) [[P1]])
+; COUNTER2-NEXT:    call void @func(i32* [[P1]], i32* [[P]])
+; COUNTER2-NEXT:    call void @func_strbool(i32* [[P1]])
+; COUNTER2-NEXT:    call void @func(i32* dereferenceable(32) [[P]], i32* dereferenceable(8) [[P]])
+; COUNTER2-NEXT:    call void @func_many(i32* align 8 [[P1]])
+; COUNTER2-NEXT:    call void @func_argattr(i32* [[P2]], i32* [[P3]])
+; COUNTER2-NEXT:    call void @func(i32* nonnull [[P1]], i32* nonnull [[P]])
+; COUNTER2-NEXT:    ret void
+;
+; COUNTER3-LABEL: define {{[^@]+}}@test
+; COUNTER3-SAME: (i32* [[P:%.*]], i32* [[P1:%.*]], i32* [[P2:%.*]], i32* [[P3:%.*]])
+; COUNTER3-NEXT:    call void @func(i32* nonnull dereferenceable(16) [[P]], i32* null)
+; COUNTER3-NEXT:    call void @func(i32* dereferenceable(12) [[P1]], i32* nonnull [[P]])
+; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P1]], i64 12), "cold"() ]
+; COUNTER3-NEXT:    call void @func_cold(i32* dereferenceable(12) [[P1]]) #5
+; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "cold"() ]
+; COUNTER3-NEXT:    call void @func_cold(i32* dereferenceable(12) [[P1]])
+; COUNTER3-NEXT:    call void @func(i32* [[P1]], i32* [[P]])
+; COUNTER3-NEXT:    call void @func_strbool(i32* [[P1]])
+; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "dereferenceable"(i32* [[P]], i64 32) ]
+; COUNTER3-NEXT:    call void @func(i32* dereferenceable(32) [[P]], i32* dereferenceable(8) [[P]])
+; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[P1]], i64 8) ]
+; COUNTER3-NEXT:    call void @func_many(i32* align 8 [[P1]])
+; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[P2]], i64 8), "nonnull"(i32* [[P3]]) ]
+; COUNTER3-NEXT:    call void @func_argattr(i32* [[P2]], i32* [[P3]])
+; COUNTER3-NEXT:    call void @llvm.assume(i1 true) [ "nonnull"(i32* [[P1]]), "nonnull"(i32* [[P]]) ]
+; COUNTER3-NEXT:    call void @func(i32* nonnull [[P1]], i32* nonnull [[P]])
+; COUNTER3-NEXT:    ret void
+;
+  call void @func(i32* nonnull dereferenceable(16) %P, i32* null)
+  call void @func(i32* dereferenceable(12) %P1, i32* nonnull %P)
+  call void @func_cold(i32* dereferenceable(12) %P1) cold
+  call void @func_cold(i32* dereferenceable(12) %P1)
+  call void @func(i32* %P1, i32* %P)
+  call void @func_strbool(i32* %P1)
+  call void @func(i32* dereferenceable(32) %P, i32* dereferenceable(8) %P)
+  call void @func_many(i32* align 8 %P1)
+  call void @func_argattr(i32* %P2, i32* %P3)
+  call void @func(i32* nonnull %P1, i32* nonnull %P)
+  ret void
+}

From c3e6555616fd92e21b17fbc27f8c145c27890f1a Mon Sep 17 00:00:00 2001
From: Krzysztof Pszeniczny <krzysztof.pszeniczny@gmail.com>
Date: Tue, 14 Jul 2020 11:55:41 -0700
Subject: [PATCH 292/771] Call Frame Information (CFI) Handling for Basic Block
 Sections
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch handles CFI with basic block sections, which unlike DebugInfo does
not support ranges. The DWARF standard explicitly requires emitting separate
CFI Frame Descriptor Entries for each contiguous fragment of a function. Thus,
the CFI information for all callee-saved registers (possibly including the
frame pointer, if necessary) have to be emitted along with redefining the
Call Frame Address (CFA), viz. where the current frame starts.

CFI directives are emitted in FDE’s in the object file with a low_pc, high_pc
specification. So, a single FDE must point to a contiguous code region unlike
debug info which has the support for ranges. This is what complicates CFI for
basic block sections.

Now, what happens when we start placing individual basic blocks in unique
sections:

* Basic block sections allow the linker to randomly reorder basic blocks in the
address space such that a given basic block can become non-contiguous with the
original function.
* The different basic block sections can no longer share the cfi_startproc and
cfi_endproc directives. So, each basic block section should emit this
independently.
* Each (cfi_startproc, cfi_endproc) directive will result in a new FDE that
caters to that basic block section.
* Now, this basic block section needs to duplicate the information from the
entry block to compute the CFA as it is an independent entity. It cannot refer
to the FDE of the original function and hence must duplicate all the stuff that
is needed to compute the CFA on its own.
* We are working on a de-duplication patch that can share common information in
FDEs in a CIE (Common Information Entry) and we will present this as a follow up
patch. This can significantly reduce the duplication overhead and is
particularly useful when several basic block sections are created.
* The CFI directives are emitted similarly for registers that are pushed onto
the stack, like callee saved registers in the prologue. There are cfi
directives that emit how to retrieve the value of the register at that point
when the push happened. This has to be duplicated too in a basic block that is
floated as a separate section.

Differential Revision: https://reviews.llvm.org/D79978
---
 .../llvm/CodeGen/TargetFrameLowering.h        | 11 +++
 llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp    | 18 +++-
 .../CodeGen/AsmPrinter/DwarfCFIException.cpp  | 11 ++-
 llvm/lib/CodeGen/AsmPrinter/DwarfException.h  |  3 +
 llvm/lib/CodeGen/CFIInstrInserter.cpp         | 51 ++++++-----
 .../lib/Target/AArch64/AArch64FrameLowering.h |  5 +-
 llvm/lib/Target/X86/X86FrameLowering.cpp      | 23 +++++
 llvm/lib/Target/X86/X86FrameLowering.h        |  7 +-
 .../CodeGen/X86/cfi-basic-block-sections-1.ll | 86 +++++++++++++++++++
 ...ic-block-sections-callee-save-registers.ll | 53 ++++++++++++
 10 files changed, 241 insertions(+), 27 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll
 create mode 100644 llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll

diff --git a/llvm/include/llvm/CodeGen/TargetFrameLowering.h b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
index cd4a682deeb7f..c3a11b1996759 100644
--- a/llvm/include/llvm/CodeGen/TargetFrameLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetFrameLowering.h
@@ -202,6 +202,17 @@ class TargetFrameLowering {
   virtual void emitEpilogue(MachineFunction &MF,
                             MachineBasicBlock &MBB) const = 0;
 
+  /// With basic block sections, emit callee saved frame moves for basic blocks
+  /// that are in a different section.
+  virtual void
+  emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI) const {}
+
+  virtual void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+                                         MachineBasicBlock::iterator MBBI,
+                                         const DebugLoc &DL,
+                                         bool IsPrologue) const {}
+
   /// Replace a StackProbe stub (if any) with the actual probe code inline
   virtual void inlineStackProbe(MachineFunction &MF,
                                 MachineBasicBlock &PrologueMBB) const {}
diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 27e9ffe9ea07e..f8f7b74baf916 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -3067,18 +3067,30 @@ void AsmPrinter::emitBasicBlockStart(const MachineBasicBlock &MBB) {
     if (isVerbose() && MBB.hasLabelMustBeEmitted()) {
       OutStreamer->AddComment("Label of block must be emitted");
     }
+    auto *BBSymbol = MBB.getSymbol();
     // Switch to a new section if this basic block must begin a section.
     if (MBB.isBeginSection()) {
       OutStreamer->SwitchSection(
           getObjFileLowering().getSectionForMachineBasicBlock(MF->getFunction(),
                                                               MBB, TM));
-      CurrentSectionBeginSym = MBB.getSymbol();
+      CurrentSectionBeginSym = BBSymbol;
     }
-    OutStreamer->emitLabel(MBB.getSymbol());
+    OutStreamer->emitLabel(BBSymbol);
+    // With BB sections, each basic block must handle CFI information on its own
+    // if it begins a section.
+    if (MBB.isBeginSection())
+      for (const HandlerInfo &HI : Handlers)
+        HI.Handler->beginBasicBlock(MBB);
   }
 }
 
-void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {}
+void AsmPrinter::emitBasicBlockEnd(const MachineBasicBlock &MBB) {
+  // Check if CFI information needs to be updated for this MBB with basic block
+  // sections.
+  if (MBB.isEndSection())
+    for (const HandlerInfo &HI : Handlers)
+      HI.Handler->endBasicBlock(MBB);
+}
 
 void AsmPrinter::emitVisibility(MCSymbol *Sym, unsigned Visibility,
                                 bool IsDefinition) const {
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
index 3245ecdbcc880..11ed1062f77e4 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp
@@ -47,7 +47,7 @@ void DwarfCFIExceptionBase::markFunctionEnd() {
 }
 
 void DwarfCFIExceptionBase::endFragment() {
-  if (shouldEmitCFI)
+  if (shouldEmitCFI && !Asm->MF->hasBBSections())
     Asm->OutStreamer->emitCFIEndProc();
 }
 
@@ -172,3 +172,12 @@ void DwarfCFIException::endFunction(const MachineFunction *MF) {
 
   emitExceptionTable();
 }
+
+void DwarfCFIException::beginBasicBlock(const MachineBasicBlock &MBB) {
+  beginFragment(&MBB, getExceptionSym);
+}
+
+void DwarfCFIException::endBasicBlock(const MachineBasicBlock &MBB) {
+  if (shouldEmitCFI)
+    Asm->OutStreamer->emitCFIEndProc();
+}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
index 24bbf58b91ec9..c2956380438f9 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -66,6 +66,9 @@ class LLVM_LIBRARY_VISIBILITY DwarfCFIException : public DwarfCFIExceptionBase {
 
   void beginFragment(const MachineBasicBlock *MBB,
                      ExceptionSymbolProvider ESP) override;
+
+  void beginBasicBlock(const MachineBasicBlock &MBB) override;
+  void endBasicBlock(const MachineBasicBlock &MBB) override;
 };
 
 class LLVM_LIBRARY_VISIBILITY ARMException : public DwarfCFIExceptionBase {
diff --git a/llvm/lib/CodeGen/CFIInstrInserter.cpp b/llvm/lib/CodeGen/CFIInstrInserter.cpp
index cb9f8f2e01845..23c7fea01f282 100644
--- a/llvm/lib/CodeGen/CFIInstrInserter.cpp
+++ b/llvm/lib/CodeGen/CFIInstrInserter.cpp
@@ -303,28 +303,31 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
     auto MBBI = MBBInfo.MBB->begin();
     DebugLoc DL = MBBInfo.MBB->findDebugLoc(MBBI);
 
-    if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) {
+    // If the current MBB will be placed in a unique section, a full DefCfa
+    // must be emitted.
+    const bool ForceFullCFA = MBB.isBeginSection();
+
+    if ((PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset &&
+         PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) ||
+        ForceFullCFA) {
       // If both outgoing offset and register of a previous block don't match
-      // incoming offset and register of this block, add a def_cfa instruction
-      // with the correct offset and register for this block.
-      if (PrevMBBInfo->OutgoingCFARegister != MBBInfo.IncomingCFARegister) {
-        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
-            nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB)));
-        BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-            .addCFIIndex(CFIIndex);
-        // If outgoing offset of a previous block doesn't match incoming offset
-        // of this block, add a def_cfa_offset instruction with the correct
-        // offset for this block.
-      } else {
-        unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(
-            nullptr, getCorrectCFAOffset(&MBB)));
-        BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
-            .addCFIIndex(CFIIndex);
-      }
+      // incoming offset and register of this block, or if this block begins a
+      // section, add a def_cfa instruction with the correct offset and
+      // register for this block.
+      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa(
+          nullptr, MBBInfo.IncomingCFARegister, getCorrectCFAOffset(&MBB)));
+      BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex);
+      InsertedCFIInstr = true;
+    } else if (PrevMBBInfo->OutgoingCFAOffset != MBBInfo.IncomingCFAOffset) {
+      // If outgoing offset of a previous block doesn't match incoming offset
+      // of this block, add a def_cfa_offset instruction with the correct
+      // offset for this block.
+      unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(
+          nullptr, getCorrectCFAOffset(&MBB)));
+      BuildMI(*MBBInfo.MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
+          .addCFIIndex(CFIIndex);
       InsertedCFIInstr = true;
-      // If outgoing register of a previous block doesn't match incoming
-      // register of this block, add a def_cfa_register instruction with the
-      // correct register for this block.
     } else if (PrevMBBInfo->OutgoingCFARegister !=
                MBBInfo.IncomingCFARegister) {
       unsigned CFIIndex =
@@ -335,6 +338,14 @@ bool CFIInstrInserter::insertCFIInstrs(MachineFunction &MF) {
       InsertedCFIInstr = true;
     }
 
+    if (ForceFullCFA) {
+      MF.getSubtarget().getFrameLowering()->emitCalleeSavedFrameMoves(
+          *MBBInfo.MBB, MBBI);
+      InsertedCFIInstr = true;
+      PrevMBBInfo = &MBBInfo;
+      continue;
+    }
+
     BitVector SetDifference = PrevMBBInfo->OutgoingCSRSaved;
     SetDifference.reset(MBBInfo.IncomingCSRSaved);
     for (int Reg : SetDifference.set_bits()) {
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 0134d3f2d88a4..9d0a6d9eaf255 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -24,8 +24,9 @@ class AArch64FrameLowering : public TargetFrameLowering {
       : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16),
                             true /*StackRealignable*/) {}
 
-  void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
-                                 MachineBasicBlock::iterator MBBI) const;
+  void
+  emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI) const override;
 
   MachineBasicBlock::iterator
   eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index c1bd80c41f131..c7ca6fb2a4fcf 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -479,6 +479,29 @@ void X86FrameLowering::BuildCFI(MachineBasicBlock &MBB,
       .addCFIIndex(CFIIndex);
 }
 
+/// Emits Dwarf Info specifying offsets of callee saved registers and
+/// frame pointer. This is called only when basic block sections are enabled.
+void X86FrameLowering::emitCalleeSavedFrameMoves(
+    MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) const {
+  MachineFunction &MF = *MBB.getParent();
+  if (!hasFP(MF)) {
+    emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true);
+    return;
+  }
+  const MachineModuleInfo &MMI = MF.getMMI();
+  const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
+  const unsigned FramePtr = TRI->getFrameRegister(MF);
+  const unsigned MachineFramePtr =
+      STI.isTarget64BitILP32() ? unsigned(getX86SubSuperRegister(FramePtr, 64))
+                               : FramePtr;
+  unsigned DwarfReg = MRI->getDwarfRegNum(MachineFramePtr, true);
+  // Offset = space for return address + size of the frame pointer itself.
+  unsigned Offset = (Is64Bit ? 8 : 4) + (Uses64BitFramePtr ? 8 : 4);
+  BuildCFI(MBB, MBBI, DebugLoc{},
+           MCCFIInstruction::createOffset(nullptr, DwarfReg, -Offset));
+  emitCalleeSavedFrameMoves(MBB, MBBI, DebugLoc{}, true);
+}
+
 void X86FrameLowering::emitCalleeSavedFrameMoves(
     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
     const DebugLoc &DL, bool IsPrologue) const {
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 9326dc9e959ac..c0b4be95f88d3 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -58,9 +58,14 @@ class X86FrameLowering : public TargetFrameLowering {
   void inlineStackProbe(MachineFunction &MF,
                         MachineBasicBlock &PrologMBB) const override;
 
+  void
+  emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
+                            MachineBasicBlock::iterator MBBI) const override;
+
   void emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,
-                                 const DebugLoc &DL, bool IsPrologue) const;
+                                 const DebugLoc &DL,
+                                 bool IsPrologue) const override;
 
   /// emitProlog/emitEpilog - These methods insert prolog and epilog code into
   /// the function.
diff --git a/llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll b/llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll
new file mode 100644
index 0000000000000..62e669eff9e4e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cfi-basic-block-sections-1.ll
@@ -0,0 +1,86 @@
+; RUN: llc -O0 %s --basicblock-sections=all -mtriple=x86_64 -filetype=asm --frame-pointer=all -o - | FileCheck --check-prefix=SECTIONS_CFI %s
+; RUN: llc -O0 %s --basicblock-sections=all -mtriple=x86_64 -filetype=asm --frame-pointer=none -o - | FileCheck --check-prefix=SECTIONS_NOFP_CFI %s
+; RUN: llc -O0 %s --basicblock-sections=all -mtriple=x86_64 -filetype=obj --frame-pointer=all -o - | llvm-dwarfdump --eh-frame  - | FileCheck --check-prefix=EH_FRAME %s
+
+;; void f1();
+;; void f3(bool b) {
+;;   if (b)
+;;     f1();
+;; }
+
+
+; SECTIONS_CFI: _Z2f3b:
+; SECTIONS_CFI: .cfi_startproc
+; SECTIONS_CFI: .cfi_def_cfa_offset 16
+; SECTIONS_CFI: .cfi_offset %rbp, -16
+; SECTIONS_CFI: .cfi_def_cfa_register %rbp
+; SECTIONS_CFI: .cfi_endproc
+
+; SECTIONS_CFI: _Z2f3b.1:
+; SECTIONS_CFI-NEXT: .cfi_startproc
+; SECTIONS_CFI-NEXT: .cfi_def_cfa %rbp, 16
+; SECTIONS_CFI-NEXT: .cfi_offset %rbp, -16
+; SECTIONS_CFI: .cfi_endproc
+
+; SECTIONS_CFI: _Z2f3b.2:
+; SECTIONS_CFI-NEXT: .cfi_startproc
+; SECTIONS_CFI-NEXT: .cfi_def_cfa %rbp, 16
+; SECTIONS_CFI-NEXT: .cfi_offset %rbp, -16
+; SECTIONS_CFI: .cfi_def_cfa
+; SECTIONS_CFI: .cfi_endproc
+
+
+; SECTIONS_NOFP_CFI: _Z2f3b:
+; SECTIONS_NOFP_CFI: .cfi_startproc
+; SECTIONS_NOFP_CFI: .cfi_def_cfa_offset 16
+; SECTIONS_NOFP_CFI: .cfi_endproc
+
+; SECTIONS_NOFP_CFI: _Z2f3b.1:
+; SECTIONS_NOFP_CFI-NEXT: .cfi_startproc
+; SECTIONS_NOFP_CFI-NEXT: .cfi_def_cfa %rsp, 16
+; SECTIONS_NOFP_CFI: .cfi_endproc
+
+; SECTIONS_NOFP_CFI: _Z2f3b.2:
+; SECTIONS_NOFP_CFI-NEXT: .cfi_startproc
+; SECTIONS_NOFP_CFI-NEXT: .cfi_def_cfa %rsp, 16
+; SECTIONS_NOFP_CFI: .cfi_endproc
+
+
+;; There must be 1 CIE and 3 FDEs.
+
+; EH_FRAME: CIE
+; EH_FRAME: DW_CFA_def_cfa
+; EH_FRAME: DW_CFA_offset
+
+; EH_FRAME: FDE cie=
+; EH_FRAME: DW_CFA_def_cfa_offset
+; EH_FRAME: DW_CFA_offset
+; EH_FRAME: DW_CFA_def_cfa_register
+
+; EH_FRAME: FDE cie=
+; EH_FRAME: DW_CFA_def_cfa
+; EH_FRAME: DW_CFA_offset
+
+; EH_FRAME: FDE cie=
+; EH_FRAME: DW_CFA_def_cfa
+; EH_FRAME: DW_CFA_offset
+
+; Function Attrs: noinline optnone uwtable
+define dso_local void @_Z2f3b(i1 zeroext %b) {
+entry:
+  %b.addr = alloca i8, align 1
+  %frombool = zext i1 %b to i8
+  store i8 %frombool, i8* %b.addr, align 1
+  %0 = load i8, i8* %b.addr, align 1
+  %tobool = trunc i8 %0 to i1
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void @_Z2f1v()
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+
+declare dso_local void @_Z2f1v()
diff --git a/llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll b/llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll
new file mode 100644
index 0000000000000..19725138f6ed9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cfi-inserter-basic-block-sections-callee-save-registers.ll
@@ -0,0 +1,53 @@
+;; This test checks if CFI instructions for all callee saved registers are emitted
+;; correctly with basic block sections.
+; RUN: llc  %s -mtriple=x86_64 -filetype=asm --basicblock-sections=all --frame-pointer=all -o - | FileCheck --check-prefix=SECTIONS_CFI %s
+
+; SECTIONS_CFI:       _Z3foob:
+; SECTIONS_CFI:      .cfi_offset %rbp, -16
+; SECTIONS_CFI:      .cfi_offset [[RA:%r.+]], -56
+; SECTIONS_CFI-NEXT: .cfi_offset [[RB:%r.+]], -48
+; SECTIONS_CFI-NEXT: .cfi_offset [[RC:%r.+]], -40
+; SECTIONS_CFI-NEXT: .cfi_offset [[RD:%r.+]], -32
+; SECTIONS_CFI-NEXT: .cfi_offset [[RE:%r.+]], -24
+
+; SECTIONS_CFI:      _Z3foob.1:
+; SECTIONS_CFI:      .cfi_offset %rbp, -16
+; SECTIONS_CFI:      .cfi_offset [[RA]], -56
+; SECTIONS_CFI-NEXT: .cfi_offset [[RB]], -48
+; SECTIONS_CFI-NEXT: .cfi_offset [[RC]], -40
+; SECTIONS_CFI-NEXT: .cfi_offset [[RD]], -32
+; SECTIONS_CFI-NEXT: .cfi_offset [[RE]], -24
+
+; SECTIONS_CFI:      _Z3foob.2:
+; SECTIONS_CFI:      .cfi_offset %rbp, -16
+; SECTIONS_CFI:      .cfi_offset [[RA]], -56
+; SECTIONS_CFI-NEXT: .cfi_offset [[RB]], -48
+; SECTIONS_CFI-NEXT: .cfi_offset [[RC]], -40
+; SECTIONS_CFI-NEXT: .cfi_offset [[RD]], -32
+; SECTIONS_CFI-NEXT: .cfi_offset [[RE]], -24
+
+
+;; void foo(bool b) {
+;;   if (b) // adds a basic block
+;;     // clobber all callee-save registers to force them to be callee-saved and to
+;;     // be described by cfi_offset directives.
+;;     asm("nop" ::: "r12", "r13", "r14", "r15", "rbx");
+;; }
+
+define dso_local void @_Z3foob(i1 zeroext %b) {
+entry:
+  %b.addr = alloca i8, align 1
+  %frombool = zext i1 %b to i8
+  store i8 %frombool, i8* %b.addr, align 1
+  %0 = load i8, i8* %b.addr, align 1
+  %tobool = trunc i8 %0 to i1
+  br i1 %tobool, label %if.then, label %if.end
+
+if.then:                                          ; preds = %entry
+  call void asm sideeffect "nop", "~{r12},~{r13},~{r14},~{r15},~{rbx},~{dirflag},~{fpsr},~{flags}"() #1, !srcloc !2
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %entry
+  ret void
+}
+!2 = !{i32 38}

From b98f414a04e19202669a4273e620bc12b5054413 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Tue, 14 Jul 2020 12:55:58 -0700
Subject: [PATCH 293/771] [mlir][DialectConversion] Emit an error if an
 operation marked as erased has live users after conversion

Up until now, there has been an implicit agreement that when an operation is marked as
"erased" all uses of that operation's results are guaranteed to be removed during conversion. How this works in practice is that there is either an assert/crash/asan failure/etc. This revision adds support for properly detecting when an erased operation has dangling users, emits and error and fails the conversion.

Differential Revision: https://reviews.llvm.org/D82830
---
 mlir/lib/Transforms/DialectConversion.cpp     | 166 ++++++++++++------
 .../test-legalize-erased-op-with-uses.mlir    |  10 ++
 2 files changed, 124 insertions(+), 52 deletions(-)
 create mode 100644 mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir

diff --git a/mlir/lib/Transforms/DialectConversion.cpp b/mlir/lib/Transforms/DialectConversion.cpp
index 9401121eed966..b9ed64f573f22 100644
--- a/mlir/lib/Transforms/DialectConversion.cpp
+++ b/mlir/lib/Transforms/DialectConversion.cpp
@@ -519,10 +519,9 @@ class OperationTransactionState {
 /// This class represents one requested operation replacement via 'replaceOp'.
 struct OpReplacement {
   OpReplacement() = default;
-  OpReplacement(Operation *op, ValueRange newValues)
-      : op(op), newValues(newValues.begin(), newValues.end()) {}
+  OpReplacement(ValueRange newValues)
+      : newValues(newValues.begin(), newValues.end()) {}
 
-  Operation *op;
   SmallVector<Value, 2> newValues;
 };
 
@@ -681,8 +680,8 @@ struct ConversionPatternRewriterImpl {
   /// Ordered vector of all of the newly created operations during conversion.
   std::vector<Operation *> createdOps;
 
-  /// Ordered vector of any requested operation replacements.
-  SmallVector<OpReplacement, 4> replacements;
+  /// Ordered map of requested operation replacements.
+  llvm::MapVector<Operation *, OpReplacement> replacements;
 
   /// Ordered vector of any requested block argument replacements.
   SmallVector<BlockArgument, 4> argReplacements;
@@ -690,18 +689,29 @@ struct ConversionPatternRewriterImpl {
   /// Ordered list of block operations (creations, splits, motions).
   SmallVector<BlockAction, 4> blockActions;
 
-  /// A set of operations that have been erased/replaced/etc that should no
-  /// longer be considered for legalization. This is not meant to be an
-  /// exhaustive list of all operations, but the minimal set that can be used to
-  /// detect if a given operation should be `ignored`. For example, we may add
-  /// the operations that define non-empty regions to the set, but not any of
-  /// the others. This simplifies the amount of memory needed as we can query if
-  /// the parent operation was ignored.
+  /// A set of operations that should no longer be considered for legalization,
+  /// but were not directly replace/erased/etc. by a pattern. These are
+  /// generally child operations of other operations who were
+  /// replaced/erased/etc. This is not meant to be an exhaustive list of all
+  /// operations, but the minimal set that can be used to detect if a given
+  /// operation should be `ignored`. For example, we may add the operations that
+  /// define non-empty regions to the set, but not any of the others. This
+  /// simplifies the amount of memory needed as we can query if the parent
+  /// operation was ignored.
   llvm::SetVector<Operation *> ignoredOps;
 
   /// A transaction state for each of operations that were updated in-place.
   SmallVector<OperationTransactionState, 4> rootUpdates;
 
+  /// A vector of indices into `replacements` of operations that were replaced
+  /// with values with different result types than the original operation, e.g.
+  /// 1->N conversion of some kind.
+  SmallVector<unsigned, 4> operationsWithChangedResults;
+
+  /// A default type converter, used when block conversions do not have one
+  /// explicitly provided.
+  TypeConverter defaultTypeConverter;
+
 #ifndef NDEBUG
   /// A set of operations that have pending updates. This tracking isn't
   /// strictly necessary, and is thus only active during debug builds for extra
@@ -711,10 +721,6 @@ struct ConversionPatternRewriterImpl {
   /// A logger used to emit diagnostics during the conversion process.
   llvm::ScopedPrinter logger{llvm::dbgs()};
 #endif
-
-  /// A default type converter, used when block conversions do not have one
-  /// explicitly provided.
-  TypeConverter defaultTypeConverter;
 };
 } // end namespace detail
 } // end namespace mlir
@@ -728,10 +734,13 @@ struct ConversionPatternRewriterImpl {
 /// does not need to collect nested ops recursively because it is expected to
 /// also be called for each nested op when it is about to be deleted.
 static void detachNestedAndErase(Operation *op) {
-  for (Region &region : op->getRegions())
-    for (Block &block : region.getBlocks())
+  for (Region &region : op->getRegions()) {
+    for (Block &block : region.getBlocks()) {
       while (!block.getOperations().empty())
         block.getOperations().remove(block.getOperations().begin());
+      block.dropAllDefinedValueUses();
+    }
+  }
   op->erase();
 }
 
@@ -750,16 +759,16 @@ void ConversionPatternRewriterImpl::discardRewrites() {
 void ConversionPatternRewriterImpl::applyRewrites() {
   // Apply all of the rewrites replacements requested during conversion.
   for (auto &repl : replacements) {
-    for (unsigned i = 0, e = repl.newValues.size(); i != e; ++i) {
-      if (auto newValue = repl.newValues[i])
-        repl.op->getResult(i).replaceAllUsesWith(
+    for (unsigned i = 0, e = repl.second.newValues.size(); i != e; ++i) {
+      if (auto newValue = repl.second.newValues[i])
+        repl.first->getResult(i).replaceAllUsesWith(
             mapping.lookupOrDefault(newValue));
     }
 
     // If this operation defines any regions, drop any pending argument
     // rewrites.
-    if (repl.op->getNumRegions())
-      argConverter.notifyOpRemoved(repl.op);
+    if (repl.first->getNumRegions())
+      argConverter.notifyOpRemoved(repl.first);
   }
 
   // Apply all of the requested argument replacements.
@@ -785,7 +794,7 @@ void ConversionPatternRewriterImpl::applyRewrites() {
   // allows processing nested operations before their parent region is
   // destroyed.
   for (auto &repl : llvm::reverse(replacements))
-    repl.op->erase();
+    repl.first->erase();
 
   argConverter.applyRewrites(mapping);
 
@@ -819,9 +828,10 @@ void ConversionPatternRewriterImpl::resetState(RewriterState state) {
 
   // Reset any replaced operations and undo any saved mappings.
   for (auto &repl : llvm::drop_begin(replacements, state.numReplacements))
-    for (auto result : repl.op->getResults())
+    for (auto result : repl.first->getResults())
       mapping.erase(result);
-  replacements.resize(state.numReplacements);
+  while (replacements.size() != state.numReplacements)
+    replacements.pop_back();
 
   // Pop all of the newly created operations.
   while (createdOps.size() != state.numCreatedOps) {
@@ -832,6 +842,11 @@ void ConversionPatternRewriterImpl::resetState(RewriterState state) {
   // Pop all of the recorded ignored operations that are no longer valid.
   while (ignoredOps.size() != state.numIgnoredOperations)
     ignoredOps.pop_back();
+
+  // Reset operations with changed results.
+  while (!operationsWithChangedResults.empty() &&
+         operationsWithChangedResults.back() >= state.numReplacements)
+    operationsWithChangedResults.pop_back();
 }
 
 void ConversionPatternRewriterImpl::eraseDanglingBlocks() {
@@ -898,8 +913,8 @@ void ConversionPatternRewriterImpl::remapValues(
 }
 
 bool ConversionPatternRewriterImpl::isOpIgnored(Operation *op) const {
-  // Check to see if this operation or its parent were ignored.
-  return ignoredOps.count(op) || ignoredOps.count(op->getParentOp());
+  // Check to see if this operation was replaced or its parent ignored.
+  return replacements.count(op) || ignoredOps.count(op->getParentOp());
 }
 
 void ConversionPatternRewriterImpl::markNestedOpsIgnored(Operation *op) {
@@ -963,14 +978,25 @@ FailureOr<Block *> ConversionPatternRewriterImpl::convertRegionTypes(
 void ConversionPatternRewriterImpl::notifyOpReplaced(Operation *op,
                                                      ValueRange newValues) {
   assert(newValues.size() == op->getNumResults());
+  assert(!replacements.count(op) && "operation was already replaced");
+
+  // Track if any of the results changed, e.g. erased and replaced with null.
+  bool resultChanged = false;
 
   // Create mappings for each of the new result values.
-  for (unsigned i = 0, e = newValues.size(); i < e; ++i)
-    if (auto repl = newValues[i])
-      mapping.map(op->getResult(i), repl);
+  Value newValue, result;
+  for (auto it : llvm::zip(newValues, op->getResults())) {
+    std::tie(newValue, result) = it;
+    if (!newValue)
+      resultChanged = true;
+    else
+      mapping.map(result, newValue);
+  }
+  if (resultChanged)
+    operationsWithChangedResults.push_back(replacements.size());
 
   // Record the requested operation replacement.
-  replacements.emplace_back(op, newValues);
+  replacements.insert(std::make_pair(op, OpReplacement(newValues)));
 
   // Mark this operation as recursively ignored so that we don't need to
   // convert any nested operations.
@@ -1511,20 +1537,12 @@ LogicalResult OperationLegalizer::legalizePatternResult(
   assert(impl.pendingRootUpdates.empty() && "dangling root updates");
 #endif
 
-  // Check all of the replacements to ensure that the pattern actually replaced
-  // the root operation. We also mark any other replaced ops as 'dead' so that
-  // we don't try to legalize them later.
-  bool replacedRoot = false;
-  for (unsigned i = curState.numReplacements, e = impl.replacements.size();
-       i != e; ++i) {
-    Operation *replacedOp = impl.replacements[i].op;
-    if (replacedOp == op)
-      replacedRoot = true;
-    else
-      impl.ignoredOps.insert(replacedOp);
-  }
-
-  // Check that the root was either updated or replace.
+  // Check that the root was either replaced or updated in place.
+  auto replacedRoot = [&] {
+    return llvm::any_of(
+        llvm::drop_begin(impl.replacements, curState.numReplacements),
+        [op](auto &it) { return it.first == op; });
+  };
   auto updatedRootInPlace = [&] {
     return llvm::any_of(
         llvm::drop_begin(impl.rootUpdates, curState.numRootUpdates),
@@ -1532,7 +1550,7 @@ LogicalResult OperationLegalizer::legalizePatternResult(
   };
   (void)replacedRoot;
   (void)updatedRootInPlace;
-  assert((replacedRoot || updatedRootInPlace()) &&
+  assert((replacedRoot() || updatedRootInPlace()) &&
          "expected pattern to replace the root operation");
 
   // Legalize each of the actions registered during application.
@@ -1856,6 +1874,10 @@ struct OperationConverter {
   /// Converts an operation with the given rewriter.
   LogicalResult convert(ConversionPatternRewriter &rewriter, Operation *op);
 
+  /// This method is called after the conversion process to legalize any
+  /// remaining artifacts and complete the conversion.
+  LogicalResult finalize(ConversionPatternRewriter &rewriter);
+
   /// The legalizer to use when converting operations.
   OperationLegalizer opLegalizer;
 
@@ -1916,16 +1938,56 @@ LogicalResult OperationConverter::convertOperations(ArrayRef<Operation *> ops) {
 
   // Convert each operation and discard rewrites on failure.
   ConversionPatternRewriter rewriter(ops.front()->getContext());
+  ConversionPatternRewriterImpl &rewriterImpl = rewriter.getImpl();
   for (auto *op : toConvert)
     if (failed(convert(rewriter, op)))
-      return rewriter.getImpl().discardRewrites(), failure();
+      return rewriterImpl.discardRewrites(), failure();
 
-  // Otherwise, the body conversion succeeded. Apply rewrites if this is not an
-  // analysis conversion.
+  // Now that all of the operations have been converted, finalize the conversion
+  // process to ensure any lingering conversion artifacts are cleaned up and
+  // legalized.
+  if (failed(finalize(rewriter)))
+    return rewriterImpl.discardRewrites(), failure();
+
+  // After a successful conversion, apply rewrites if this is not an analysis
+  // conversion.
   if (mode == OpConversionMode::Analysis)
-    rewriter.getImpl().discardRewrites();
+    rewriterImpl.discardRewrites();
   else
-    rewriter.getImpl().applyRewrites();
+    rewriterImpl.applyRewrites();
+  return success();
+}
+
+LogicalResult
+OperationConverter::finalize(ConversionPatternRewriter &rewriter) {
+  ConversionPatternRewriterImpl &rewriterImpl = rewriter.getImpl();
+  auto isOpDead = [&](Operation *op) { return rewriterImpl.isOpIgnored(op); };
+
+  // Process the operations with changed results.
+  for (unsigned replIdx : rewriterImpl.operationsWithChangedResults) {
+    auto &repl = *(rewriterImpl.replacements.begin() + replIdx);
+    for (auto it : llvm::zip(repl.first->getResults(), repl.second.newValues)) {
+      Value result = std::get<0>(it), newValue = std::get<1>(it);
+
+      // If the operation result was replaced with null, all of the uses of this
+      // value should be replaced.
+      if (newValue)
+        continue;
+
+      auto liveUserIt = llvm::find_if_not(result.getUsers(), isOpDead);
+      if (liveUserIt != result.user_end()) {
+        InFlightDiagnostic diag = repl.first->emitError()
+                                  << "failed to legalize operation '"
+                                  << repl.first->getName()
+                                  << "' marked as erased";
+        diag.attachNote(liveUserIt->getLoc())
+            << "found live user of result #"
+            << result.cast<OpResult>().getResultNumber() << ": " << *liveUserIt;
+        return failure();
+      }
+    }
+  }
+
   return success();
 }
 
diff --git a/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir b/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir
new file mode 100644
index 0000000000000..34c46d1cfc86f
--- /dev/null
+++ b/mlir/test/Transforms/test-legalize-erased-op-with-uses.mlir
@@ -0,0 +1,10 @@
+// RUN: mlir-opt %s -test-legalize-unknown-root-patterns -verify-diagnostics
+
+// Test that an error is emitted when an operation is marked as "erased", but
+// has users that live across the conversion.
+func @remove_all_ops(%arg0: i32) -> i32 {
+  // expected-error@below {{failed to legalize operation 'test.illegal_op_a' marked as erased}}
+  %0 = "test.illegal_op_a"() : () -> i32
+  // expected-note@below {{found live user of result #0: return %0 : i32}}
+  return %0 : i32
+}

From aef60af34ec3fd2a03b69d69b031e1d34070f6d5 Mon Sep 17 00:00:00 2001
From: Giorgis Georgakoudis <georgakoudis1@llnl.gov>
Date: Tue, 7 Jul 2020 22:43:24 -0700
Subject: [PATCH 294/771] [CallGraph] Ignore callback uses

Summary:
Ignore callback uses when adding a callback function
in the CallGraph. Callback functions are typically
created when outlining, e.g. for OpenMP, so they have
internal scope and linkage. They should not be added
to the ExternalCallingNode since they are only callable
by the specified caller function at creation time.

A CGSCC pass, such as OpenMPOpt, may need to update
the CallGraph by adding a new outlined callback function.
Without ignoring callback uses, adding breaks CGSCC
pass restrictions and results to a broken CallGraph.

Reviewers: jdoerfert

Subscribers: hiraditya, sstefan1, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83370
---
 llvm/include/llvm/IR/Function.h               |  6 ++-
 llvm/lib/Analysis/CallGraph.cpp               |  7 +--
 llvm/lib/IR/Function.cpp                      | 14 ++++-
 .../CallGraph/ignore-callback-uses.ll         | 51 +++++++++++++++++++
 4 files changed, 71 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/Analysis/CallGraph/ignore-callback-uses.ll

diff --git a/llvm/include/llvm/IR/Function.h b/llvm/include/llvm/IR/Function.h
index ee66abc3eaed6..bb4ec13c7610f 100644
--- a/llvm/include/llvm/IR/Function.h
+++ b/llvm/include/llvm/IR/Function.h
@@ -830,9 +830,11 @@ class Function : public GlobalObject, public ilist_node<Function> {
 
   /// hasAddressTaken - returns true if there are any uses of this function
   /// other than direct calls or invokes to it, or blockaddress expressions.
-  /// Optionally passes back an offending user for diagnostic purposes.
+  /// Optionally passes back an offending user for diagnostic purposes and
+  /// ignores callback uses.
   ///
-  bool hasAddressTaken(const User** = nullptr) const;
+  bool hasAddressTaken(const User ** = nullptr,
+                       bool IgnoreCallbackUses = false) const;
 
   /// isDefTriviallyDead - Return true if it is trivially safe to remove
   /// this function definition from the module (because it isn't externally
diff --git a/llvm/lib/Analysis/CallGraph.cpp b/llvm/lib/Analysis/CallGraph.cpp
index d8abccfdb0958..55adb454b7338 100644
--- a/llvm/lib/Analysis/CallGraph.cpp
+++ b/llvm/lib/Analysis/CallGraph.cpp
@@ -77,9 +77,10 @@ bool CallGraph::invalidate(Module &, const PreservedAnalyses &PA,
 void CallGraph::addToCallGraph(Function *F) {
   CallGraphNode *Node = getOrInsertFunction(F);
 
-  // If this function has external linkage or has its address taken, anything
-  // could call it.
-  if (!F->hasLocalLinkage() || F->hasAddressTaken())
+  // If this function has external linkage or has its address taken and
+  // it is not a callback, then anything could call it.
+  if (!F->hasLocalLinkage() ||
+      F->hasAddressTaken(nullptr, /*IgnoreCallbackUses=*/true))
     ExternalCallingNode->addCalledFunction(nullptr, Node);
 
   populateCallGraphNode(Node);
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 0ec0cce83a8c5..10d535e3ab113 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -20,6 +20,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringExtras.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/IR/AbstractCallSite.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/BasicBlock.h"
@@ -1484,12 +1485,21 @@ Optional<Function *> Intrinsic::remangleIntrinsicFunction(Function *F) {
 }
 
 /// hasAddressTaken - returns true if there are any uses of this function
-/// other than direct calls or invokes to it.
-bool Function::hasAddressTaken(const User* *PutOffender) const {
+/// other than direct calls or invokes to it. Optionally ignores callback
+/// uses.
+bool Function::hasAddressTaken(const User **PutOffender,
+                               bool IgnoreCallbackUses) const {
   for (const Use &U : uses()) {
     const User *FU = U.getUser();
     if (isa<BlockAddress>(FU))
       continue;
+
+    if (IgnoreCallbackUses) {
+      AbstractCallSite ACS(&U);
+      if (ACS && ACS.isCallbackCall())
+        continue;
+    }
+
     const auto *Call = dyn_cast<CallBase>(FU);
     if (!Call) {
       if (PutOffender)
diff --git a/llvm/test/Analysis/CallGraph/ignore-callback-uses.ll b/llvm/test/Analysis/CallGraph/ignore-callback-uses.ll
new file mode 100644
index 0000000000000..8964ca1efd866
--- /dev/null
+++ b/llvm/test/Analysis/CallGraph/ignore-callback-uses.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -print-callgraph -disable-output 2>&1 | FileCheck %s
+; CHECK: Call graph node <<null function>><<{{.*}}>>  #uses=0
+; CHECK-NEXT:   CS<{{.*}}> calls function 'f'
+; CHECK-NEXT:   CS<{{.*}}> calls function '__kmpc_fork_call'
+; CHECK-EMPTY:
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+@0 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
+@1 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @0, i32 0, i32 0) }, align 8
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @f() {
+entry:
+  br label %omp_parallel
+
+omp_parallel:                                     ; preds = %entry
+  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @f..omp_par to void (i32*, i32*, ...)*))
+  br label %omp.par.exit.split
+
+omp.par.exit.split:                               ; preds = %omp_parallel
+  ret void
+}
+
+; Function Attrs: norecurse nounwind
+define internal void @f..omp_par(i32* noalias %tid.addr, i32* noalias %zero.addr) {
+omp.par.entry:
+  %tid.addr.local = alloca i32, align 4
+  %0 = load i32, i32* %tid.addr, align 4
+  store i32 %0, i32* %tid.addr.local, align 4
+  %tid = load i32, i32* %tid.addr.local, align 4
+  br label %omp.par.region
+
+omp.par.exit.split.exitStub:                      ; preds = %omp.par.outlined.exit
+  ret void
+
+omp.par.region:                                   ; preds = %omp.par.entry
+  br label %omp.par.pre_finalize
+
+omp.par.pre_finalize:                             ; preds = %omp.par.region
+  br label %omp.par.outlined.exit
+
+omp.par.outlined.exit:                            ; preds = %omp.par.pre_finalize
+  br label %omp.par.exit.split.exitStub
+}
+
+; Function Attrs: nounwind
+declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #2
+
+!2 = !{!3}
+!3 = !{i64 2, i64 -1, i64 -1, i1 true}

From 6b476e2426e9cfa442dac5deed2ceae890513f18 Mon Sep 17 00:00:00 2001
From: River Riddle <riddleriver@gmail.com>
Date: Tue, 14 Jul 2020 13:14:14 -0700
Subject: [PATCH 295/771] [mlir] Add support for parsing optional Attribute
 values.

This adds a `parseOptionalAttribute` method to the OpAsmParser that allows for parsing optional attributes, in a similar fashion to how optional types are parsed. This also enables the use of attribute values as the first element of an assembly format optional group.

Differential Revision: https://reviews.llvm.org/D83712
---
 mlir/docs/OpDefinitions.md              |  3 +-
 mlir/include/mlir/IR/OpImplementation.h | 11 +++++
 mlir/lib/Parser/AttributeParser.cpp     | 34 ++++++++++++++
 mlir/lib/Parser/Parser.cpp              | 11 +++++
 mlir/lib/Parser/Parser.h                |  4 ++
 mlir/test/lib/Dialect/Test/TestOps.td   |  8 +++-
 mlir/test/mlir-tblgen/op-format-spec.td |  6 +--
 mlir/test/mlir-tblgen/op-format.mlir    | 10 ++++-
 mlir/tools/mlir-tblgen/OpFormatGen.cpp  | 59 +++++++++++++++++++++----
 9 files changed, 129 insertions(+), 17 deletions(-)

diff --git a/mlir/docs/OpDefinitions.md b/mlir/docs/OpDefinitions.md
index a10610f87a0af..c068aac09babe 100644
--- a/mlir/docs/OpDefinitions.md
+++ b/mlir/docs/OpDefinitions.md
@@ -713,7 +713,8 @@ of the assembly format can be marked as `optional` based on the presence of this
 information. An optional group is defined by wrapping a set of elements within
 `()` followed by a `?` and has the following requirements:
 
-*   The first element of the group must either be a literal or an operand.
+*   The first element of the group must either be a literal, attribute, or an
+    operand.
     -   This is because the first element must be optionally parsable.
 *   Exactly one argument variable within the group must be marked as the anchor
     of the group.
diff --git a/mlir/include/mlir/IR/OpImplementation.h b/mlir/include/mlir/IR/OpImplementation.h
index 20660be4347c1..0124ef5f7c0a4 100644
--- a/mlir/include/mlir/IR/OpImplementation.h
+++ b/mlir/include/mlir/IR/OpImplementation.h
@@ -384,6 +384,17 @@ class OpAsmParser {
                                      StringRef attrName,
                                      NamedAttrList &attrs) = 0;
 
+  /// Parse an optional attribute.
+  virtual OptionalParseResult parseOptionalAttribute(Attribute &result,
+                                                     Type type,
+                                                     StringRef attrName,
+                                                     NamedAttrList &attrs) = 0;
+  OptionalParseResult parseOptionalAttribute(Attribute &result,
+                                             StringRef attrName,
+                                             NamedAttrList &attrs) {
+    return parseOptionalAttribute(result, Type(), attrName, attrs);
+  }
+
   /// Parse an attribute of a specific kind and type.
   template <typename AttrType>
   ParseResult parseAttribute(AttrType &result, Type type, StringRef attrName,
diff --git a/mlir/lib/Parser/AttributeParser.cpp b/mlir/lib/Parser/AttributeParser.cpp
index e2860b1152310..1c1261e6d765c 100644
--- a/mlir/lib/Parser/AttributeParser.cpp
+++ b/mlir/lib/Parser/AttributeParser.cpp
@@ -187,6 +187,40 @@ Attribute Parser::parseAttribute(Type type) {
   }
 }
 
+/// Parse an optional attribute with the provided type.
+OptionalParseResult Parser::parseOptionalAttribute(Attribute &attribute,
+                                                   Type type) {
+  switch (getToken().getKind()) {
+  case Token::at_identifier:
+  case Token::floatliteral:
+  case Token::integer:
+  case Token::hash_identifier:
+  case Token::kw_affine_map:
+  case Token::kw_affine_set:
+  case Token::kw_dense:
+  case Token::kw_false:
+  case Token::kw_loc:
+  case Token::kw_opaque:
+  case Token::kw_sparse:
+  case Token::kw_true:
+  case Token::kw_unit:
+  case Token::l_brace:
+  case Token::l_square:
+  case Token::minus:
+  case Token::string:
+    attribute = parseAttribute(type);
+    return success(attribute != nullptr);
+
+  default:
+    // Parse an optional type attribute.
+    Type type;
+    OptionalParseResult result = parseOptionalType(type);
+    if (result.hasValue() && succeeded(*result))
+      attribute = TypeAttr::get(type);
+    return result;
+  }
+}
+
 /// Attribute dictionary.
 ///
 ///   attribute-dict ::= `{` `}`
diff --git a/mlir/lib/Parser/Parser.cpp b/mlir/lib/Parser/Parser.cpp
index fc9d449ecc143..3a995a4e2b048 100644
--- a/mlir/lib/Parser/Parser.cpp
+++ b/mlir/lib/Parser/Parser.cpp
@@ -1011,6 +1011,17 @@ class CustomOpAsmParser : public OpAsmParser {
     return success();
   }
 
+  /// Parse an optional attribute.
+  OptionalParseResult parseOptionalAttribute(Attribute &result, Type type,
+                                             StringRef attrName,
+                                             NamedAttrList &attrs) override {
+    OptionalParseResult parseResult =
+        parser.parseOptionalAttribute(result, type);
+    if (parseResult.hasValue() && succeeded(*parseResult))
+      attrs.push_back(parser.builder.getNamedAttr(attrName, result));
+    return parseResult;
+  }
+
   /// Parse a named dictionary into 'result' if it is present.
   ParseResult parseOptionalAttrDict(NamedAttrList &result) override {
     if (parser.getToken().isNot(Token::l_brace))
diff --git a/mlir/lib/Parser/Parser.h b/mlir/lib/Parser/Parser.h
index 3d82d622bf06d..3b2c6e8525446 100644
--- a/mlir/lib/Parser/Parser.h
+++ b/mlir/lib/Parser/Parser.h
@@ -184,6 +184,10 @@ class Parser {
   /// Parse an arbitrary attribute with an optional type.
   Attribute parseAttribute(Type type = {});
 
+  /// Parse an optional attribute with the provided type.
+  OptionalParseResult parseOptionalAttribute(Attribute &attribute,
+                                             Type type = {});
+
   /// Parse an attribute dictionary.
   ParseResult parseAttributeDict(NamedAttrList &attributes);
 
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index 19e636b3df322..e73c7c3f3230a 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -1253,9 +1253,13 @@ def FormatAttrOp : TEST_Op<"format_attr_op"> {
 }
 
 // Test that we elide optional attributes that are within the syntax.
-def FormatOptAttrOp : TEST_Op<"format_opt_attr_op"> {
+def FormatOptAttrAOp : TEST_Op<"format_opt_attr_op_a"> {
   let arguments = (ins OptionalAttr<I64Attr>:$opt_attr);
-  let assemblyFormat = "(`(`$opt_attr^`)`)? attr-dict";
+  let assemblyFormat = "(`(` $opt_attr^ `)` )? attr-dict";
+}
+def FormatOptAttrBOp : TEST_Op<"format_opt_attr_op_b"> {
+  let arguments = (ins OptionalAttr<I64Attr>:$opt_attr);
+  let assemblyFormat = "($opt_attr^)? attr-dict";
 }
 
 // Test that we elide attributes that are within the syntax.
diff --git a/mlir/test/mlir-tblgen/op-format-spec.td b/mlir/test/mlir-tblgen/op-format-spec.td
index 47255d47f8a76..3a3c500d76b30 100644
--- a/mlir/test/mlir-tblgen/op-format-spec.td
+++ b/mlir/test/mlir-tblgen/op-format-spec.td
@@ -206,10 +206,10 @@ def OptionalInvalidB : TestFormat_Op<"optional_invalid_b", [{
 def OptionalInvalidC : TestFormat_Op<"optional_invalid_c", [{
   ($attr)? attr-dict
 }]>, Arguments<(ins OptionalAttr<I64Attr>:$attr)>;
-// CHECK: error: first element of an operand group must be a literal or operand
+// CHECK: error: first element of an operand group must be an attribute, literal, or operand
 def OptionalInvalidD : TestFormat_Op<"optional_invalid_d", [{
-  ($attr^)? attr-dict
-}]>, Arguments<(ins OptionalAttr<I64Attr>:$attr)>;
+  (type($operand) $operand^)? attr-dict
+}]>, Arguments<(ins Optional<I64>:$operand)>;
 // CHECK: error: type directive can only refer to variables within the optional group
 def OptionalInvalidE : TestFormat_Op<"optional_invalid_e", [{
   (`,` $attr^ type(operands))? attr-dict
diff --git a/mlir/test/mlir-tblgen/op-format.mlir b/mlir/test/mlir-tblgen/op-format.mlir
index 49ac3d26f9269..af5976b227068 100644
--- a/mlir/test/mlir-tblgen/op-format.mlir
+++ b/mlir/test/mlir-tblgen/op-format.mlir
@@ -12,9 +12,15 @@ test.format_literal_op keyword_$. -> :, = <> () [] {foo.some_attr}
 // CHECK-NOT: {attr
 test.format_attr_op 10
 
-// CHECK: test.format_opt_attr_op(10)
+// CHECK: test.format_opt_attr_op_a(10)
 // CHECK-NOT: {opt_attr
-test.format_opt_attr_op(10)
+test.format_opt_attr_op_a(10)
+test.format_opt_attr_op_a
+
+// CHECK: test.format_opt_attr_op_b 10
+// CHECK-NOT: {opt_attr
+test.format_opt_attr_op_b 10
+test.format_opt_attr_op_b
 
 // CHECK: test.format_attr_dict_w_keyword attributes {attr = 10 : i64}
 test.format_attr_dict_w_keyword attributes {attr = 10 : i64}
diff --git a/mlir/tools/mlir-tblgen/OpFormatGen.cpp b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
index 3fcbeeff1e6fd..13f2a2fd96dc9 100644
--- a/mlir/tools/mlir-tblgen/OpFormatGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpFormatGen.cpp
@@ -373,6 +373,15 @@ const char *const attrParserCode = R"(
   if (parser.parseAttribute({1}Attr{2}, "{1}", result.attributes))
     return failure();
 )";
+const char *const optionalAttrParserCode = R"(
+  {0} {1}Attr;
+  {
+    ::mlir::OptionalParseResult parseResult =
+      parser.parseOptionalAttribute({1}Attr{2}, "{1}", result.attributes);
+    if (parseResult.hasValue() && failed(*parseResult))
+      return failure();
+  }
+)";
 
 /// The code snippet used to generate a parser call for an enum attribute.
 ///
@@ -397,6 +406,30 @@ const char *const enumAttrParserCode = R"(
     result.addAttribute("{0}", {3});
   }
 )";
+const char *const optionalEnumAttrParserCode = R"(
+  Attribute {0}Attr;
+  {
+    ::mlir::StringAttr attrVal;
+    ::mlir::NamedAttrList attrStorage;
+    auto loc = parser.getCurrentLocation();
+
+    ::mlir::OptionalParseResult parseResult =
+      parser.parseOptionalAttribute(attrVal, parser.getBuilder().getNoneType(),
+                                    "{0}", attrStorage);
+    if (parseResult.hasValue()) {
+      if (failed(*parseResult))
+        return failure();
+
+      auto attrOptional = {1}::{2}(attrVal.getValue());
+      if (!attrOptional)
+        return parser.emitError(loc, "invalid ")
+               << "{0} attribute specification: " << attrVal;
+
+      {0}Attr = {3};
+      result.addAttribute("{0}", {0}Attr);
+    }
+  }
+)";
 
 /// The code snippet used to generate a parser call for an operand.
 ///
@@ -599,11 +632,15 @@ static void genElementParser(Element *element, OpMethodBody &body,
 
     // Generate a special optional parser for the first element to gate the
     // parsing of the rest of the elements.
-    if (auto *literal = dyn_cast<LiteralElement>(&*elements.begin())) {
+    Element *firstElement = &*elements.begin();
+    if (auto *attrVar = dyn_cast<AttributeVariable>(firstElement)) {
+      genElementParser(attrVar, body, attrTypeCtx);
+      body << "  if (" << attrVar->getVar()->name << "Attr) {\n";
+    } else if (auto *literal = dyn_cast<LiteralElement>(firstElement)) {
       body << "  if (succeeded(parser.parseOptional";
       genLiteralParser(literal->getLiteral(), body);
       body << ")) {\n";
-    } else if (auto *opVar = dyn_cast<OperandVariable>(&*elements.begin())) {
+    } else if (auto *opVar = dyn_cast<OperandVariable>(firstElement)) {
       genElementParser(opVar, body, attrTypeCtx);
       body << "  if (!" << opVar->getVar()->name << "Operands.empty()) {\n";
     }
@@ -635,7 +672,9 @@ static void genElementParser(Element *element, OpMethodBody &body,
                     "attrOptional.getValue()");
       }
 
-      body << formatv(enumAttrParserCode, var->name, enumAttr.getCppNamespace(),
+      body << formatv(var->attr.isOptional() ? optionalEnumAttrParserCode
+                                             : enumAttrParserCode,
+                      var->name, enumAttr.getCppNamespace(),
                       enumAttr.getStringToSymbolFnName(), attrBuilderStr);
       return;
     }
@@ -648,8 +687,9 @@ static void genElementParser(Element *element, OpMethodBody &body,
       os << ", " << tgfmt(*typeBuilder, &attrTypeCtx);
     }
 
-    body << formatv(attrParserCode, var->attr.getStorageType(), var->name,
-                    attrTypeStr);
+    body << formatv(var->attr.isOptional() ? optionalAttrParserCode
+                                           : attrParserCode,
+                    var->attr.getStorageType(), var->name, attrTypeStr);
   } else if (auto *operand = dyn_cast<OperandVariable>(element)) {
     ArgumentLengthKind lengthKind = getArgumentLengthKind(operand->getVar());
     StringRef name = operand->getVar()->name;
@@ -1910,10 +1950,11 @@ LogicalResult FormatParser::parseOptional(std::unique_ptr<Element> &element,
 
   // The first element of the group must be one that can be parsed/printed in an
   // optional fashion.
-  if (!isa<LiteralElement>(&*elements.front()) &&
-      !isa<OperandVariable>(&*elements.front()))
-    return emitError(curLoc, "first element of an operand group must be a "
-                             "literal or operand");
+  Element *firstElement = &*elements.front();
+  if (!isa<AttributeVariable>(firstElement) &&
+      !isa<LiteralElement>(firstElement) && !isa<OperandVariable>(firstElement))
+    return emitError(curLoc, "first element of an operand group must be an "
+                             "attribute, literal, or operand");
 
   // After parsing all of the elements, ensure that all type directives refer
   // only to elements within the group.

From ce6f0303df51667f42a2a63eb292ab7c0c125eea Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Tue, 14 Jul 2020 12:38:03 -0700
Subject: [PATCH 296/771] [flang] Tuning up binary->decimal conversion

Use short division of big-radix values by powers of two when
converting values with negative unbiased exponents rather than
multiplication by smaller powers of five; this reduces the overall
outer iteration count. This change is a win across the entire range
of inputs.

Reviewed By: tskeith

Differential Revision: https://reviews.llvm.org/D83806
---
 flang/include/flang/Decimal/decimal.h        |  2 +-
 flang/lib/Decimal/big-radix-floating-point.h | 43 +++++++++++++++++---
 flang/lib/Decimal/binary-to-decimal.cpp      | 38 +----------------
 3 files changed, 40 insertions(+), 43 deletions(-)

diff --git a/flang/include/flang/Decimal/decimal.h b/flang/include/flang/Decimal/decimal.h
index 0bc9deb08f4cd..fa687e92d35b4 100644
--- a/flang/include/flang/Decimal/decimal.h
+++ b/flang/include/flang/Decimal/decimal.h
@@ -69,7 +69,7 @@ enum DecimalConversionFlags {
  * some extra due to the library working internally in base 10**16
  * and computing its output size in multiples of 16.
  */
-#define EXTRA_DECIMAL_CONVERSION_SPACE (1 + 1 + 16 - 1)
+#define EXTRA_DECIMAL_CONVERSION_SPACE (1 + 1 + 2 * 16 - 1)
 
 #ifdef __cplusplus
 template <int PREC>
diff --git a/flang/lib/Decimal/big-radix-floating-point.h b/flang/lib/Decimal/big-radix-floating-point.h
index 53bd9d7249145..2fbb777104d6d 100644
--- a/flang/lib/Decimal/big-radix-floating-point.h
+++ b/flang/lib/Decimal/big-radix-floating-point.h
@@ -222,15 +222,46 @@ template <int PREC, int LOG10RADIX = 16> class BigRadixFloatingPointNumber {
     return remainder;
   }
 
-  int DivideByPowerOfTwo(int twoPow) { // twoPow <= LOG10RADIX
-    int remainder{0};
+  void DivideByPowerOfTwo(int twoPow) { // twoPow <= log10Radix
+    Digit remainder{0};
+    auto mask{(Digit{1} << twoPow) - 1};
+    auto coeff{radix >> twoPow};
     for (int j{digits_ - 1}; j >= 0; --j) {
-      Digit q{digit_[j] >> twoPow};
-      int nrem = digit_[j] - (q << twoPow);
-      digit_[j] = q + (radix >> twoPow) * remainder;
+      auto nrem{digit_[j] & mask};
+      digit_[j] = (digit_[j] >> twoPow) + coeff * remainder;
       remainder = nrem;
     }
-    return remainder;
+  }
+
+  // Returns true on overflow
+  bool DivideByPowerOfTwoInPlace(int twoPow) {
+    if (digits_ > 0) {
+      while (twoPow > 0) {
+        int chunk{twoPow > log10Radix ? log10Radix : twoPow};
+        if ((digit_[0] & ((Digit{1} << chunk) - 1)) == 0) {
+          DivideByPowerOfTwo(chunk);
+          twoPow -= chunk;
+          continue;
+        }
+        twoPow -= chunk;
+        if (digit_[digits_ - 1] >> chunk != 0) {
+          if (digits_ == digitLimit_) {
+            return true; // overflow
+          }
+          digit_[digits_++] = 0;
+        }
+        auto remainder{digit_[digits_ - 1]};
+        exponent_ -= log10Radix;
+        auto coeff{radix >> chunk}; // precise; radix is (5*2)**log10Radix
+        auto mask{(Digit{1} << chunk) - 1};
+        for (int j{digits_ - 1}; j >= 1; --j) {
+          digit_[j] = (digit_[j - 1] >> chunk) + coeff * remainder;
+          remainder = digit_[j - 1] & mask;
+        }
+        digit_[0] = coeff * remainder;
+      }
+    }
+    return false; // no overflow
   }
 
   int AddCarry(int position = 0, int carry = 1) {
diff --git a/flang/lib/Decimal/binary-to-decimal.cpp b/flang/lib/Decimal/binary-to-decimal.cpp
index ad30b4d854033..bcc0f08558aa4 100644
--- a/flang/lib/Decimal/binary-to-decimal.cpp
+++ b/flang/lib/Decimal/binary-to-decimal.cpp
@@ -70,42 +70,8 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::BigRadixFloatingPointNumber(
     overflow |= MultiplyBy<2>();
   }
 
-  while (twoPow < 0) {
-    int shift{common::TrailingZeroBitCount(digit_[0])};
-    if (shift == 0) {
-      break;
-    }
-    if (shift > log10Radix) {
-      shift = log10Radix;
-    }
-    if (shift > -twoPow) {
-      shift = -twoPow;
-    }
-    // (D*(2**S)) * 10.**E * 2.**twoPow -> D * 10.**E * 2.**(twoPow+S)
-    DivideByPowerOfTwo(shift);
-    twoPow += shift;
-  }
-
-  for (; twoPow <= -4; twoPow += 4) {
-    // D * 10.**E * 2.**twoPow -> 625D * 10.**(E-4) * 2.**(twoPow+4)
-    overflow |= MultiplyBy<(5 * 5 * 5 * 5)>();
-    exponent_ -= 4;
-  }
-  if (twoPow <= -2) {
-    // D * 10.**E * 2.**twoPow -> 25D * 10.**(E-2) * 2.**(twoPow+2)
-    overflow |= MultiplyBy<5 * 5>();
-    twoPow += 2;
-    exponent_ -= 2;
-  }
-  for (; twoPow < 0; ++twoPow) {
-    // D * 10.**E * 2.**twoPow -> 5D * 10.**(E-1) * 2.**(twoPow+1)
-    overflow |= MultiplyBy<5>();
-    --exponent_;
-  }
-
+  overflow |= DivideByPowerOfTwoInPlace(-twoPow);
   assert(overflow == 0);
-
-  // twoPow == 0, the decimal encoding is complete.
   Normalize();
 }
 
@@ -153,7 +119,7 @@ BigRadixFloatingPointNumber<PREC, LOG10RADIX>::ConvertToDecimal(char *buffer,
     for (int k{0}; k < log10Radix; k += 2) {
       Digit d{common::DivideUnsignedBy<Digit, hundredth>(dig)};
       dig = 100 * (dig - d * hundredth);
-      const char *q = lut + 2 * d;
+      const char *q{lut + 2 * d};
       *p++ = q[0];
       *p++ = q[1];
     }

From b71ef0c50ac6728581e3c82ee90fe400dcc48bd6 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Tue, 14 Jul 2020 13:44:00 -0700
Subject: [PATCH 297/771] [MC] Support .reloc sym+constant, *, *

For `.reloc offset, *, *`, currently offset can be a constant or symbol.
This patch makes it support any expression which can be folded to sym+constant.

Reviewed By: stefanp

Differential Revision: https://reviews.llvm.org/D83751
---
 llvm/include/llvm/MC/MCObjectStreamer.h |  6 +--
 llvm/include/llvm/MC/MCStreamer.h       | 13 +++---
 llvm/lib/MC/MCAsmStreamer.cpp           | 15 +++---
 llvm/lib/MC/MCObjectStreamer.cpp        | 46 +++++++++++--------
 llvm/lib/MC/MCParser/AsmParser.cpp      | 18 +++-----
 llvm/test/MC/ELF/reloc-directive.s      | 61 +++++++++++++++++++++++++
 llvm/test/MC/Mips/reloc-directive-bad.s |  6 ---
 7 files changed, 111 insertions(+), 54 deletions(-)
 create mode 100644 llvm/test/MC/ELF/reloc-directive.s

diff --git a/llvm/include/llvm/MC/MCObjectStreamer.h b/llvm/include/llvm/MC/MCObjectStreamer.h
index f37f2ad760029..c3f3ae5de921e 100644
--- a/llvm/include/llvm/MC/MCObjectStreamer.h
+++ b/llvm/include/llvm/MC/MCObjectStreamer.h
@@ -171,9 +171,9 @@ class MCObjectStreamer : public MCStreamer {
   void emitTPRel64Value(const MCExpr *Value) override;
   void emitGPRel32Value(const MCExpr *Value) override;
   void emitGPRel64Value(const MCExpr *Value) override;
-  bool emitRelocDirective(const MCExpr &Offset, StringRef Name,
-                          const MCExpr *Expr, SMLoc Loc,
-                          const MCSubtargetInfo &STI) override;
+  Optional<std::pair<bool, std::string>>
+  emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr,
+                     SMLoc Loc, const MCSubtargetInfo &STI) override;
   using MCStreamer::emitFill;
   void emitFill(const MCExpr &NumBytes, uint64_t FillValue,
                 SMLoc Loc = SMLoc()) override;
diff --git a/llvm/include/llvm/MC/MCStreamer.h b/llvm/include/llvm/MC/MCStreamer.h
index d7255a22e9415..484c62538366e 100644
--- a/llvm/include/llvm/MC/MCStreamer.h
+++ b/llvm/include/llvm/MC/MCStreamer.h
@@ -1014,13 +1014,12 @@ class MCStreamer {
 
   virtual void emitSyntaxDirective();
 
-  /// Emit a .reloc directive.
-  /// Returns true if the relocation could not be emitted because Name is not
-  /// known.
-  virtual bool emitRelocDirective(const MCExpr &Offset, StringRef Name,
-                                  const MCExpr *Expr, SMLoc Loc,
-                                  const MCSubtargetInfo &STI) {
-    return true;
+  /// Record a relocation described by the .reloc directive. Return None if
+  /// succeeded. Otherwise, return a pair (Name is invalid, error message).
+  virtual Optional<std::pair<bool, std::string>>
+  emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr,
+                     SMLoc Loc, const MCSubtargetInfo &STI) {
+    return None;
   }
 
   virtual void emitAddrsig() {}
diff --git a/llvm/lib/MC/MCAsmStreamer.cpp b/llvm/lib/MC/MCAsmStreamer.cpp
index 0747ab2372abe..6a8572e57922c 100644
--- a/llvm/lib/MC/MCAsmStreamer.cpp
+++ b/llvm/lib/MC/MCAsmStreamer.cpp
@@ -349,9 +349,9 @@ class MCAsmStreamer final : public MCStreamer {
   void emitBundleLock(bool AlignToEnd) override;
   void emitBundleUnlock() override;
 
-  bool emitRelocDirective(const MCExpr &Offset, StringRef Name,
-                          const MCExpr *Expr, SMLoc Loc,
-                          const MCSubtargetInfo &STI) override;
+  Optional<std::pair<bool, std::string>>
+  emitRelocDirective(const MCExpr &Offset, StringRef Name, const MCExpr *Expr,
+                     SMLoc Loc, const MCSubtargetInfo &STI) override;
 
   void emitAddrsig() override;
   void emitAddrsigSym(const MCSymbol *Sym) override;
@@ -2072,9 +2072,10 @@ void MCAsmStreamer::emitBundleUnlock() {
   EmitEOL();
 }
 
-bool MCAsmStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
-                                       const MCExpr *Expr, SMLoc,
-                                       const MCSubtargetInfo &STI) {
+Optional<std::pair<bool, std::string>>
+MCAsmStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
+                                  const MCExpr *Expr, SMLoc,
+                                  const MCSubtargetInfo &STI) {
   OS << "\t.reloc ";
   Offset.print(OS, MAI);
   OS << ", " << Name;
@@ -2083,7 +2084,7 @@ bool MCAsmStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
     Expr->print(OS, MAI);
   }
   EmitEOL();
-  return false;
+  return None;
 }
 
 void MCAsmStreamer::emitAddrsig() {
diff --git a/llvm/lib/MC/MCObjectStreamer.cpp b/llvm/lib/MC/MCObjectStreamer.cpp
index 9c564c83b6b5d..e39c4a03bc1ef 100644
--- a/llvm/lib/MC/MCObjectStreamer.cpp
+++ b/llvm/lib/MC/MCObjectStreamer.cpp
@@ -18,6 +18,7 @@
 #include "llvm/MC/MCObjectWriter.h"
 #include "llvm/MC/MCSection.h"
 #include "llvm/MC/MCSymbol.h"
+#include "llvm/MC/MCValue.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/SourceMgr.h"
 using namespace llvm;
@@ -664,12 +665,13 @@ void MCObjectStreamer::emitGPRel64Value(const MCExpr *Value) {
   DF->getContents().resize(DF->getContents().size() + 8, 0);
 }
 
-bool MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
-                                          const MCExpr *Expr, SMLoc Loc,
-                                          const MCSubtargetInfo &STI) {
+Optional<std::pair<bool, std::string>>
+MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
+                                     const MCExpr *Expr, SMLoc Loc,
+                                     const MCSubtargetInfo &STI) {
   Optional<MCFixupKind> MaybeKind = Assembler->getBackend().getFixupKind(Name);
   if (!MaybeKind.hasValue())
-    return true;
+    return std::make_pair(true, std::string("unknown relocation name"));
 
   MCFixupKind Kind = *MaybeKind;
 
@@ -680,27 +682,33 @@ bool MCObjectStreamer::emitRelocDirective(const MCExpr &Offset, StringRef Name,
   MCDataFragment *DF = getOrCreateDataFragment(&STI);
   flushPendingLabels(DF, DF->getContents().size());
 
-  int64_t OffsetValue;
-  if (Offset.evaluateAsAbsolute(OffsetValue)) {
-    if (OffsetValue < 0)
-      llvm_unreachable(".reloc offset is negative");
-    DF->getFixups().push_back(MCFixup::create(OffsetValue, Expr, Kind, Loc));
-    return false;
+  MCValue OffsetVal;
+  if (!Offset.evaluateAsRelocatable(OffsetVal, nullptr, nullptr))
+    return std::make_pair(false,
+                          std::string(".reloc offset is not relocatable"));
+  if (OffsetVal.isAbsolute()) {
+    if (OffsetVal.getConstant() < 0)
+      return std::make_pair(false, std::string(".reloc offset is negative"));
+    DF->getFixups().push_back(
+        MCFixup::create(OffsetVal.getConstant(), Expr, Kind, Loc));
+    return None;
   }
+  if (OffsetVal.getSymB())
+    return std::make_pair(false,
+                          std::string(".reloc offset is not representable"));
 
-  if (Offset.getKind() != llvm::MCExpr::SymbolRef)
-    llvm_unreachable(".reloc offset is not absolute nor a label");
-
-  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(Offset);
+  const MCSymbolRefExpr &SRE = cast<MCSymbolRefExpr>(*OffsetVal.getSymA());
   if (SRE.getSymbol().isDefined()) {
-    DF->getFixups().push_back(MCFixup::create(SRE.getSymbol().getOffset(),
-                                              Expr, Kind, Loc));
-    return false;
+    // FIXME SRE.getSymbol() may not be relative to DF.
+    DF->getFixups().push_back(
+        MCFixup::create(SRE.getSymbol().getOffset() + OffsetVal.getConstant(),
+                        Expr, Kind, Loc));
+    return None;
   }
 
   PendingFixups.emplace_back(&SRE.getSymbol(), DF,
-                                         MCFixup::create(-1, Expr, Kind, Loc));
-  return false;
+                             MCFixup::create(-1, Expr, Kind, Loc));
+  return None;
 }
 
 void MCObjectStreamer::emitFill(const MCExpr &NumBytes, uint64_t FillValue,
diff --git a/llvm/lib/MC/MCParser/AsmParser.cpp b/llvm/lib/MC/MCParser/AsmParser.cpp
index 9e92ce09986f6..c05f26cbdda5c 100644
--- a/llvm/lib/MC/MCParser/AsmParser.cpp
+++ b/llvm/lib/MC/MCParser/AsmParser.cpp
@@ -3011,20 +3011,12 @@ bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) {
 bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {
   const MCExpr *Offset;
   const MCExpr *Expr = nullptr;
-  int64_t OffsetValue;
   SMLoc OffsetLoc = Lexer.getTok().getLoc();
 
   if (parseExpression(Offset))
     return true;
-
-  if ((Offset->evaluateAsAbsolute(OffsetValue,
-                                  getStreamer().getAssemblerPtr()) &&
-       check(OffsetValue < 0, OffsetLoc, "expression is negative")) ||
-      (check(Offset->getKind() != llvm::MCExpr::Constant &&
-             Offset->getKind() != llvm::MCExpr::SymbolRef,
-             OffsetLoc, "expected non-negative number or a label")) ||
-      (parseToken(AsmToken::Comma, "expected comma") ||
-       check(getTok().isNot(AsmToken::Identifier), "expected relocation name")))
+  if (parseToken(AsmToken::Comma, "expected comma") ||
+      check(getTok().isNot(AsmToken::Identifier), "expected relocation name"))
     return true;
 
   SMLoc NameLoc = Lexer.getTok().getLoc();
@@ -3048,8 +3040,10 @@ bool AsmParser::parseDirectiveReloc(SMLoc DirectiveLoc) {
 
   const MCTargetAsmParser &MCT = getTargetParser();
   const MCSubtargetInfo &STI = MCT.getSTI();
-  if (getStreamer().emitRelocDirective(*Offset, Name, Expr, DirectiveLoc, STI))
-    return Error(NameLoc, "unknown relocation name");
+  if (Optional<std::pair<bool, std::string>> Err =
+          getStreamer().emitRelocDirective(*Offset, Name, Expr, DirectiveLoc,
+                                           STI))
+    return Error(Err->first ? NameLoc : OffsetLoc, Err->second);
 
   return false;
 }
diff --git a/llvm/test/MC/ELF/reloc-directive.s b/llvm/test/MC/ELF/reloc-directive.s
new file mode 100644
index 0000000000000..59d7ace40d7e0
--- /dev/null
+++ b/llvm/test/MC/ELF/reloc-directive.s
@@ -0,0 +1,61 @@
+## Target specific relocation support is tested in MC/$target/*reloc-directive*.s
+# RUN: llvm-mc -triple=x86_64 %s | FileCheck %s --check-prefix=ASM
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t
+# RUN: llvm-readobj -r %t | FileCheck %s
+
+# ASM:      .Ltmp0:
+# ASM-NEXT:  .reloc (.Ltmp0+3)-2, R_X86_64_NONE, foo
+# ASM-NEXT: .Ltmp1:
+# ASM-NEXT:  .reloc .Ltmp1-1, R_X86_64_NONE, foo
+# ASM-NEXT: .Ltmp2:
+# ASM-NEXT:  .reloc 2+.Ltmp2, R_X86_64_NONE, foo
+# ASM-NEXT:  .reloc (1+foo)+3, R_X86_64_NONE, data+1
+
+# CHECK:      0x2 R_X86_64_NONE foo 0x0
+# CHECK-NEXT: 0x0 R_X86_64_NONE foo 0x0
+# CHECK-NEXT: 0x3 R_X86_64_NONE foo 0x0
+# CHECK-NEXT: 0x4 R_X86_64_NONE data 0x1
+
+.text
+.globl foo
+foo:
+  ret
+  .reloc .+3-2, R_X86_64_NONE, foo
+  .reloc .-1, R_X86_64_NONE, foo
+  .reloc 2+., R_X86_64_NONE, foo
+  .reloc 1+foo+3, R_X86_64_NONE, data+1
+
+.data
+.globl data
+data:
+  .long 0
+
+# RUN: not llvm-mc -filetype=obj -triple=x86_64 --defsym=ERR=1 %s 2>&1 | FileCheck %s --check-prefix=ERR
+
+.ifdef ERR
+.text
+.globl a, b
+a: ret
+b: ret
+x: ret
+y: ret
+
+# ERR: {{.*}}.s:[[#@LINE+1]]:10: error: expected comma
+.reloc 0 R_X86_64_NONE, a
+
+# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: .reloc offset is negative
+.reloc -1, R_X86_64_NONE, a
+# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: .reloc offset is not relocatable
+.reloc 2*., R_X86_64_NONE, a
+# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: .reloc offset is not relocatable
+.reloc a+a, R_X86_64_NONE, a
+## GNU as accepts a-a but rejects b-a.
+# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: .reloc offset is not representable
+.reloc a-a, R_X86_64_NONE, a
+## TODO GNU as accepts x-x and y-x.
+# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: .reloc offset is not representable
+.reloc x-x, R_X86_64_NONE, a
+
+# ERR: {{.*}}.s:[[#@LINE+1]]:8: error: directional label undefined
+.reloc 1f, R_X86_64_NONE, a
+.endif
diff --git a/llvm/test/MC/Mips/reloc-directive-bad.s b/llvm/test/MC/Mips/reloc-directive-bad.s
index 929643b914afa..bb056b752fb9f 100644
--- a/llvm/test/MC/Mips/reloc-directive-bad.s
+++ b/llvm/test/MC/Mips/reloc-directive-bad.s
@@ -2,12 +2,6 @@
 # RUN:     -target-abi=o32 2>&1 | FileCheck %s
 	.text
 foo:
-	.reloc foo+4, R_MIPS_32, .text    # CHECK: :[[@LINE]]:9: error: expected non-negative number or a label
-	.reloc foo+foo, R_MIPS_32, .text  # CHECK: :[[@LINE]]:9: error: expected non-negative number or a label
 	.reloc 0, R_MIPS_32, .text+.text  # CHECK: :[[@LINE]]:23: error: expression must be relocatable
-	.reloc 0 R_MIPS_32, .text         # CHECK: :[[@LINE]]:11: error: expected comma
 	.reloc 0, 0, R_MIPS_32, .text     # CHECK: :[[@LINE]]:12: error: expected relocation name
-	.reloc -1, R_MIPS_32, .text       # CHECK: :[[@LINE]]:9: error: expression is negative
-	.reloc 1b, R_MIPS_32, .text       # CHECK: :[[@LINE]]:9: error: directional label undefined
-	.reloc 1f, R_MIPS_32, .text       # CHECK: :[[@LINE]]:9: error: directional label undefined
 	nop

From eaca1e4e54905815b444e1234096c70adc9812f6 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Tue, 14 Jul 2020 16:28:41 -0400
Subject: [PATCH 298/771] [libc++] Automatically detect whether RTTI is enabled

Instead of detecting it automatically but also allowing for the setting
to be specified explicitly, always detect whether exceptions are enabled
based on whether -fno-rtti (or equivalent) is used. It's less confusing
to have a single way of tweaking that knob.

This change follows the lead of 71d88cebfb42.
---
 libcxx/CMakeLists.txt                       |  1 -
 libcxx/include/__config                     | 17 ++++++-----------
 libcxx/utils/libcxx/test/config.py          |  2 +-
 llvm/utils/gn/secondary/libcxx/src/BUILD.gn |  1 -
 4 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 88dc4553069e0..aabe31fa6ec18 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -639,7 +639,6 @@ endfunction()
 # RTTI flags ==================================================================
 function(cxx_add_rtti_flags target)
   if (NOT LIBCXX_ENABLE_RTTI)
-    target_compile_definitions(${target} PUBLIC -D_LIBCPP_NO_RTTI)
     target_add_compile_flags_if_supported(${target} PUBLIC -GR-)
     target_add_compile_flags_if_supported(${target} PUBLIC -fno-rtti)
   endif()
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 7e4c37431ea44..575147cead42d 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -423,10 +423,6 @@ typedef __char32_t char32_t;
 #  define _LIBCPP_NO_EXCEPTIONS
 #endif
 
-#if !(__has_feature(cxx_rtti)) && !defined(_LIBCPP_NO_RTTI)
-#define _LIBCPP_NO_RTTI
-#endif
-
 #if !(__has_feature(cxx_strong_enums))
 #define _LIBCPP_HAS_NO_STRONG_ENUMS
 #endif
@@ -1109,13 +1105,12 @@ _LIBCPP_FUNC_VIS extern "C" void __sanitizer_annotate_contiguous_container(
 #endif
 
 // Try to find out if RTTI is disabled.
-// g++ and cl.exe have RTTI on by default and define a macro when it is.
-#if !defined(_LIBCPP_NO_RTTI)
-#  if defined(__GNUC__) && !defined(__GXX_RTTI)
-#    define _LIBCPP_NO_RTTI
-#  elif defined(_LIBCPP_COMPILER_MSVC) && !defined(_CPPRTTI)
-#    define _LIBCPP_NO_RTTI
-#  endif
+#if defined(_LIBCPP_COMPILER_CLANG) && !__has_feature(cxx_rtti)
+#  define _LIBCPP_NO_RTTI
+#elif defined(__GNUC__) && !defined(__GXX_RTTI)
+#  define _LIBCPP_NO_RTTI
+#elif defined(_LIBCPP_COMPILER_MSVC) && !defined(_CPPRTTI)
+#  define _LIBCPP_NO_RTTI
 #endif
 
 #ifndef _LIBCPP_WEAK
diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py
index 99a966dbfdfde..b88b085d408f9 100644
--- a/libcxx/utils/libcxx/test/config.py
+++ b/libcxx/utils/libcxx/test/config.py
@@ -410,7 +410,7 @@ def configure_compile_flags_rtti(self):
         enable_rtti = self.get_lit_bool('enable_rtti', True)
         if not enable_rtti:
             self.config.available_features.add('-fno-rtti')
-            self.cxx.compile_flags += ['-fno-rtti', '-D_LIBCPP_NO_RTTI']
+            self.cxx.compile_flags += ['-fno-rtti']
 
     def configure_link_flags(self):
         # Configure library path
diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn
index 1c52d341ec822..09d850036ad1b 100644
--- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn
@@ -102,7 +102,6 @@ config("cxx_config") {
     } else {
       cflags_cc += [ "-fno-rtti" ]
     }
-    defines += [ "_LIBCPP_NO_RTTI" ]
   }
 }
 

From 0257ba581ce1870b0aa266dfee3e64d631b0f7bf Mon Sep 17 00:00:00 2001
From: Tyker <tyker1@outlook.com>
Date: Tue, 14 Jul 2020 22:52:12 +0200
Subject: [PATCH 299/771] Fix tests after
 16f777f4217cfcdcf6ddce8eb1e3525a65563c43

---
 llvm/test/Analysis/ValueTracking/assume-queries-counter.ll | 2 ++
 llvm/test/Transforms/Util/assume-builder-counter.ll        | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll b/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll
index 476b0ef04d34d..d234205648c77 100644
--- a/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll
+++ b/llvm/test/Analysis/ValueTracking/assume-queries-counter.ll
@@ -1,3 +1,5 @@
+; REQUIRES: asserts
+
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=0,assume-queries-counter-count=1 -S | FileCheck %s --check-prefixes=SAME,COUNTER1
 ; RUN: opt < %s -instcombine --debug-counter=assume-queries-counter-skip=1,assume-queries-counter-count=2 -S | FileCheck %s --check-prefixes=SAME,COUNTER2
diff --git a/llvm/test/Transforms/Util/assume-builder-counter.ll b/llvm/test/Transforms/Util/assume-builder-counter.ll
index 380b88314032f..deaffb1cc96ef 100644
--- a/llvm/test/Transforms/Util/assume-builder-counter.ll
+++ b/llvm/test/Transforms/Util/assume-builder-counter.ll
@@ -1,3 +1,5 @@
+; REQUIRES: asserts
+
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
 ; RUN: opt -passes='assume-builder,verify' --enable-knowledge-retention --debug-counter=assume-builder-counter-skip=5,assume-builder-counter-count=1 -S %s | FileCheck %s --check-prefixes=COUNTER1
 ; RUN: opt -passes='assume-builder,verify' --enable-knowledge-retention --debug-counter=assume-builder-counter-skip=1,assume-builder-counter-count=3 -S %s | FileCheck %s --check-prefixes=COUNTER2

From 4aafc479f28ce1ad952f66ac3758f69060c77a08 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 14 Jul 2020 12:12:05 -0700
Subject: [PATCH 300/771] [lldb/Test] Always set the cleanupSubprocesses tear
 down hook

Always clean up subprocesses on tear down instead of relying on the
caller to do so. This is not only less error prone but also means the
tests can be more concise.

Differential revision: https://reviews.llvm.org/D83787
---
 .../Python/lldbsuite/test/lldbtest.py         | 20 +++++--------------
 .../platform/process/list/TestProcessList.py  |  1 -
 .../process/attach-resume/TestAttachResume.py |  1 -
 .../process/attach/TestProcessAttach.py       |  3 ---
 .../attach/attach_denied/TestAttachDenied.py  |  1 -
 .../register_command/TestRegisters.py         |  1 -
 .../TestAutoInstallMainExecutable.py          |  1 -
 .../TestDeletedExecutable.py                  |  1 -
 .../process_group/TestChangeProcessGroup.py   |  1 -
 .../attach/TestReproducerAttach.py            |  1 -
 .../TestCreateAfterAttach.py                  |  1 -
 .../TestBundleWithDotInFilename.py            |  1 -
 .../find-dsym/deep-bundle/TestDeepBundle.py   |  1 -
 .../function-starts/TestFunctionStarts.py     |  1 -
 .../API/macosx/universal/TestUniversal.py     |  1 -
 .../python_api/hello_world/TestHelloWorld.py  |  2 --
 .../TestPlatformProcessConnect.py             |  1 -
 .../lldb-vscode/attach/TestVSCode_attach.py   |  1 -
 18 files changed, 5 insertions(+), 35 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py
index 13afcb944aa50..9c32bdb42e283 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbtest.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py
@@ -891,23 +891,18 @@ def cleanupSubprocesses(self):
         for p in self.subprocesses:
             p.terminate()
             del p
-        self.subprocesses.clear()
+        del self.subprocesses[:]
         # Ensure any forked processes are cleaned up
         for pid in self.forkedProcessPids:
             try:
                 os.kill(pid, signal.SIGTERM)
             except OSError:
                 pass
-        self.forkedProcessPids.clear()
+        del self.forkedProcessPids[:]
 
     def spawnSubprocess(self, executable, args=[], install_remote=True):
         """ Creates a subprocess.Popen object with the specified executable and arguments,
             saves it in self.subprocesses, and returns the object.
-            NOTE: if using this function, ensure you also call:
-
-              self.addTearDownHook(self.cleanupSubprocesses)
-
-            otherwise the test suite will leak processes.
         """
         proc = _RemoteProcess(
             install_remote) if lldb.remote_platform else _LocalProcess(self.TraceOn())
@@ -917,11 +912,6 @@ def spawnSubprocess(self, executable, args=[], install_remote=True):
 
     def forkSubprocess(self, executable, args=[]):
         """ Fork a subprocess with its own group ID.
-            NOTE: if using this function, ensure you also call:
-
-              self.addTearDownHook(self.cleanupSubprocesses)
-
-            otherwise the test suite will leak processes.
         """
         child_pid = os.fork()
         if child_pid == 0:
@@ -1025,9 +1015,6 @@ def deletePexpectChild(self):
 
     def tearDown(self):
         """Fixture for unittest test case teardown."""
-        #import traceback
-        # traceback.print_stack()
-
         self.deletePexpectChild()
 
         # Check and run any hook functions.
@@ -1054,6 +1041,9 @@ def tearDown(self):
                 for dict in reversed(self.dicts):
                     self.cleanup(dictionary=dict)
 
+        # Remove subprocesses created by the test.
+        self.cleanupSubprocesses()
+
         # This must be the last statement, otherwise teardown hooks or other
         # lines might depend on this still being active.
         lldb.SBDebugger.Destroy(self.dbg)
diff --git a/lldb/test/API/commands/platform/process/list/TestProcessList.py b/lldb/test/API/commands/platform/process/list/TestProcessList.py
index ba0193ab1a687..9fc84d4f26e0f 100644
--- a/lldb/test/API/commands/platform/process/list/TestProcessList.py
+++ b/lldb/test/API/commands/platform/process/list/TestProcessList.py
@@ -25,7 +25,6 @@ def test_process_list_with_args(self):
 
         # Spawn a new process
         popen = self.spawnSubprocess(exe, args=["arg1", "--arg2", "arg3"])
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         substrs = [str(popen.pid), "TestProcess arg1 --arg2 arg3"]
 
diff --git a/lldb/test/API/commands/process/attach-resume/TestAttachResume.py b/lldb/test/API/commands/process/attach-resume/TestAttachResume.py
index 48a281e096a93..ff1bb8c6921d2 100644
--- a/lldb/test/API/commands/process/attach-resume/TestAttachResume.py
+++ b/lldb/test/API/commands/process/attach-resume/TestAttachResume.py
@@ -33,7 +33,6 @@ def process_attach_continue_interrupt_detach(self):
         exe = self.getBuildArtifact(exe_name)
 
         popen = self.spawnSubprocess(exe)
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         self.runCmd("process attach -p " + str(popen.pid))
 
diff --git a/lldb/test/API/commands/process/attach/TestProcessAttach.py b/lldb/test/API/commands/process/attach/TestProcessAttach.py
index f9b273309956c..4e61675c6fc58 100644
--- a/lldb/test/API/commands/process/attach/TestProcessAttach.py
+++ b/lldb/test/API/commands/process/attach/TestProcessAttach.py
@@ -29,7 +29,6 @@ def test_attach_to_process_by_id(self):
 
         # Spawn a new process
         popen = self.spawnSubprocess(exe)
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         self.runCmd("process attach -p " + str(popen.pid))
 
@@ -55,7 +54,6 @@ def test_attach_to_process_from_different_dir_by_id(self):
 
         # Spawn a new process
         popen = self.spawnSubprocess(exe)
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         os.chdir(newdir)
         self.addTearDownHook(lambda: os.chdir(testdir))
@@ -74,7 +72,6 @@ def test_attach_to_process_by_name(self):
 
         # Spawn a new process
         popen = self.spawnSubprocess(exe)
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         self.runCmd("process attach -n " + exe_name)
 
diff --git a/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py b/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py
index a7565ccfeb75a..4a2b0b7cf817d 100644
--- a/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py
+++ b/lldb/test/API/commands/process/attach/attach_denied/TestAttachDenied.py
@@ -37,7 +37,6 @@ def test_attach_to_process_by_id_denied(self):
 
         # Spawn a new process
         popen = self.spawnSubprocess(exe, [pid_file_path])
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         pid = lldbutil.wait_for_file_on_target(self, pid_file_path)
 
diff --git a/lldb/test/API/commands/register/register/register_command/TestRegisters.py b/lldb/test/API/commands/register/register/register_command/TestRegisters.py
index b0931a7d6977c..9441483816c5f 100644
--- a/lldb/test/API/commands/register/register/register_command/TestRegisters.py
+++ b/lldb/test/API/commands/register/register/register_command/TestRegisters.py
@@ -457,7 +457,6 @@ def convenience_registers_with_process_attach(self, test_16bit_regs):
 
         # Spawn a new process
         pid = self.spawnSubprocess(exe, ['wait_for_attach']).pid
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         if self.TraceOn():
             print("pid of spawned process: %d" % pid)
diff --git a/lldb/test/API/commands/target/auto-install-main-executable/TestAutoInstallMainExecutable.py b/lldb/test/API/commands/target/auto-install-main-executable/TestAutoInstallMainExecutable.py
index ae5822750d694..8ab84bd3203e4 100644
--- a/lldb/test/API/commands/target/auto-install-main-executable/TestAutoInstallMainExecutable.py
+++ b/lldb/test/API/commands/target/auto-install-main-executable/TestAutoInstallMainExecutable.py
@@ -48,7 +48,6 @@ def test_target_auto_install_main_executable(self):
             self.debug_monitor_exe,
             commandline_args,
             install_remote=False)
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         # Wait for the new process gets ready.
         time.sleep(0.1)
diff --git a/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py b/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py
index 78f3feae6ff63..c6ab4150a6bfd 100644
--- a/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py
+++ b/lldb/test/API/functionalities/deleted-executable/TestDeletedExecutable.py
@@ -35,7 +35,6 @@ def test(self):
 
         # Spawn a new process
         popen = self.spawnSubprocess(exe, [pid_file_path])
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         # Wait until process has fully started up.
         pid = lldbutil.wait_for_file_on_target(self, pid_file_path)
diff --git a/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py b/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py
index 124d13ed97a41..51c0ae75c1a3c 100644
--- a/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py
+++ b/lldb/test/API/functionalities/process_group/TestChangeProcessGroup.py
@@ -38,7 +38,6 @@ def test_setpgid(self):
                 (pid_file_path)))
 
         popen = self.spawnSubprocess(exe, [pid_file_path])
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         pid = lldbutil.wait_for_file_on_target(self, pid_file_path)
 
diff --git a/lldb/test/API/functionalities/reproducers/attach/TestReproducerAttach.py b/lldb/test/API/functionalities/reproducers/attach/TestReproducerAttach.py
index b02b170a7e3fa..e6bb9c6a16727 100644
--- a/lldb/test/API/functionalities/reproducers/attach/TestReproducerAttach.py
+++ b/lldb/test/API/functionalities/reproducers/attach/TestReproducerAttach.py
@@ -37,7 +37,6 @@ def test_reproducer_attach(self):
                 pass
 
         self.build(dictionary={'EXE': exe})
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         inferior = self.spawnSubprocess(self.getBuildArtifact(exe), [token])
         pid = inferior.pid
diff --git a/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py b/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py
index 59fb3b6fd3992..19c02c12a8946 100644
--- a/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py
+++ b/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py
@@ -56,7 +56,6 @@ def create_after_attach(self, use_fork):
         else:
             popen = self.spawnSubprocess(exe)
             pid = popen.pid
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         # Attach to the spawned process
         self.runCmd("process attach -p " + str(pid))
diff --git a/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/TestBundleWithDotInFilename.py b/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/TestBundleWithDotInFilename.py
index 793551259f9aa..2572600a1829f 100644
--- a/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/TestBundleWithDotInFilename.py
+++ b/lldb/test/API/macosx/find-dsym/bundle-with-dot-in-filename/TestBundleWithDotInFilename.py
@@ -38,7 +38,6 @@ def test_attach_and_check_dsyms(self):
         self.build()
         os.chdir(self.getBuildDir());
         popen = self.spawnSubprocess(exe)
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         # Give the inferior time to start up, dlopen a bundle, remove the bundle it linked in
         sleep(5)
diff --git a/lldb/test/API/macosx/find-dsym/deep-bundle/TestDeepBundle.py b/lldb/test/API/macosx/find-dsym/deep-bundle/TestDeepBundle.py
index 379ff5d0a7ae9..a486c5159f01a 100644
--- a/lldb/test/API/macosx/find-dsym/deep-bundle/TestDeepBundle.py
+++ b/lldb/test/API/macosx/find-dsym/deep-bundle/TestDeepBundle.py
@@ -36,7 +36,6 @@ def test_attach_and_check_dsyms(self):
         exe = self.getBuildArtifact(exe_name)
         self.build()
         popen = self.spawnSubprocess(exe, [self.getBuildDir()])
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         # Give the inferior time to start up, dlopen a bundle, remove the bundle it linked in
         sleep(5)
diff --git a/lldb/test/API/macosx/function-starts/TestFunctionStarts.py b/lldb/test/API/macosx/function-starts/TestFunctionStarts.py
index 141f4e70930a4..0a983436462ae 100644
--- a/lldb/test/API/macosx/function-starts/TestFunctionStarts.py
+++ b/lldb/test/API/macosx/function-starts/TestFunctionStarts.py
@@ -53,7 +53,6 @@ def do_function_starts(self, in_memory):
                 (pid_file_path)))
 
         popen = self.spawnSubprocess(exe, [pid_file_path])
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         # Wait until process has fully started up.
         pid = lldbutil.wait_for_file_on_target(self, pid_file_path)
diff --git a/lldb/test/API/macosx/universal/TestUniversal.py b/lldb/test/API/macosx/universal/TestUniversal.py
index 9982edcc77f07..94a056762a2cc 100644
--- a/lldb/test/API/macosx/universal/TestUniversal.py
+++ b/lldb/test/API/macosx/universal/TestUniversal.py
@@ -137,7 +137,6 @@ def test_process_attach_with_wrong_arch(self):
             "Our main breakpoint has locations.")
 
         popen = self.spawnSubprocess(exe, ["keep_waiting"])
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         error = lldb.SBError()
         empty_listener = lldb.SBListener()
diff --git a/lldb/test/API/python_api/hello_world/TestHelloWorld.py b/lldb/test/API/python_api/hello_world/TestHelloWorld.py
index c1155cf298145..d52b0087a8e6d 100644
--- a/lldb/test/API/python_api/hello_world/TestHelloWorld.py
+++ b/lldb/test/API/python_api/hello_world/TestHelloWorld.py
@@ -91,7 +91,6 @@ def test_with_attach_to_process_with_id_api(self):
             if os.path.exists(token):
                 os.remove(token)
         popen = self.spawnSubprocess(self.getBuildArtifact(exe), [token])
-        self.addTearDownHook(self.cleanupSubprocesses)
         lldbutil.wait_for_file_on_target(self, token)
 
         listener = lldb.SBListener("my.attach.listener")
@@ -126,7 +125,6 @@ def test_with_attach_to_process_with_name_api(self):
             if os.path.exists(token):
                 os.remove(token)
         popen = self.spawnSubprocess(self.getBuildArtifact(exe), [token])
-        self.addTearDownHook(self.cleanupSubprocesses)
         lldbutil.wait_for_file_on_target(self, token)
 
         listener = lldb.SBListener("my.attach.listener")
diff --git a/lldb/test/API/tools/lldb-server/platform-process-connect/TestPlatformProcessConnect.py b/lldb/test/API/tools/lldb-server/platform-process-connect/TestPlatformProcessConnect.py
index a9847c66ca12b..c9331e7d09a58 100644
--- a/lldb/test/API/tools/lldb-server/platform-process-connect/TestPlatformProcessConnect.py
+++ b/lldb/test/API/tools/lldb-server/platform-process-connect/TestPlatformProcessConnect.py
@@ -54,7 +54,6 @@ def test_platform_process_connect(self):
             self.debug_monitor_exe,
             commandline_args,
             install_remote=False)
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         socket_id = lldbutil.wait_for_file_on_target(self, port_file)
 
diff --git a/lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py b/lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py
index e49c9267d971a..7955b6a97b04e 100644
--- a/lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py
+++ b/lldb/test/API/tools/lldb-vscode/attach/TestVSCode_attach.py
@@ -90,7 +90,6 @@ def cleanup():
         self.addTearDownHook(cleanup)
 
         popen = self.spawnSubprocess(program, [pid_file_path])
-        self.addTearDownHook(self.cleanupSubprocesses)
 
         pid = lldbutil.wait_for_file_on_target(self, pid_file_path)
 

From c6e8bf7287edddf74e1fe4005d6284667bae4542 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 14 Jul 2020 14:07:06 -0700
Subject: [PATCH 301/771] [lldb/Test] Skip TestProcessConnect.py on Windows

Remote connections are not supported on Windows.
---
 .../gdb_remote_client/TestProcessConnect.py               | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py b/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
index 9aa21b6317f23..e9e6b4e38ce1b 100644
--- a/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
+++ b/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
@@ -10,7 +10,7 @@ class TestProcessConnect(GDBRemoteTestBase):
 
     NO_DEBUG_INFO_TESTCASE = True
 
-    @expectedFailureAll(hostoslist=["windows"], triple='.*-android')
+    @skipIfWindows
     def test_gdb_remote_sync(self):
         """Test the gdb-remote command in synchronous mode"""
         try:
@@ -20,7 +20,7 @@ def test_gdb_remote_sync(self):
         finally:
             self.dbg.GetSelectedPlatform().DisconnectRemote()
 
-    @expectedFailureAll(hostoslist=["windows"], triple='.*-android')
+    @skipIfWindows
     def test_gdb_remote_async(self):
         """Test the gdb-remote command in asynchronous mode"""
         try:
@@ -33,7 +33,7 @@ def test_gdb_remote_async(self):
         finally:
             self.dbg.GetSelectedPlatform().DisconnectRemote()
 
-    @expectedFailureAll(hostoslist=["windows"], triple='.*-android')
+    @skipIfWindows
     def test_process_connect_sync(self):
         """Test the gdb-remote command in synchronous mode"""
         try:
@@ -44,7 +44,7 @@ def test_process_connect_sync(self):
         finally:
             self.dbg.GetSelectedPlatform().DisconnectRemote()
 
-    @expectedFailureAll(hostoslist=["windows"], triple='.*-android')
+    @skipIfWindows
     def test_process_connect_async(self):
         """Test the gdb-remote command in asynchronous mode"""
         try:

From c1e2f73c392c111dc40de09daa71245f640ca9f5 Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Mon, 13 Jul 2020 22:30:25 -0700
Subject: [PATCH 302/771] [llvm][NFC] expose LLVM_HAVE_TF_API through
 llvm-config.h

Summary:
This allows users of the llvm library discover whether llvm was built
with the tensorflow c API dependency, which helps if using the TFUtils
wrapper, for example.

We don't do the same for the LLVM_HAVE_TF_AOT flag, because that does
not expose any API.

Reviewers: mehdi_amini, davidxl

Subscribers: mgorny, aaron.ballman, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83746
---
 llvm/CMakeLists.txt                           | 36 +++++++++++--------
 llvm/include/llvm/Analysis/Utils/TFUtils.h    |  2 ++
 llvm/include/llvm/Config/llvm-config.h.cmake  |  3 ++
 .../InlineSizeEstimatorAnalysisTest.cpp       |  6 ++--
 4 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index 4e14e61fcacd6..eacf8d5e55011 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -832,6 +832,21 @@ configure_file(
   ${LLVM_INCLUDE_DIR}/llvm/Config/Targets.def
   )
 
+# For up-to-date instructions for installing the Tensorflow dependency, refer to
+# the bot setup script: https://github.com/google/ml-compiler-opt/blob/master/buildbot/buildbot_init.sh
+# In this case, the latest C API library is available for download from
+# https://www.tensorflow.org/install/lang_c.
+# We will expose the conditional compilation variable,
+# LLVM_HAVE_TF_API, through llvm-config.h, so that a user of the LLVM library may
+# also leverage the dependency.
+set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install")
+find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib)
+
+if (tensorflow_c_api)
+  set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available")
+  include_directories(${TENSORFLOW_C_LIB_PATH}/include)
+endif()
+
 # Configure the three LLVM configuration header files.
 configure_file(
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Config/config.h.cmake
@@ -972,27 +987,18 @@ set(TENSORFLOW_AOT_PATH "" CACHE PATH "Path to TensorFlow pip install dir")
 
 if (NOT TENSORFLOW_AOT_PATH STREQUAL "")
   set(LLVM_HAVE_TF_AOT "ON" CACHE BOOL "Tensorflow AOT available")
-  set(TENSORFLOW_AOT_COMPILER 
-    "${TENSORFLOW_AOT_PATH}/../../../../bin/saved_model_cli" 
-      CACHE PATH "Path to the Tensorflow AOT compiler")
+  set(TENSORFLOW_AOT_COMPILER
+    "${TENSORFLOW_AOT_PATH}/../../../../bin/saved_model_cli"
+    CACHE PATH "Path to the Tensorflow AOT compiler")
+  # Unlike the LLVM_HAVE_TF_API case, we don't need to expose this through
+  # llvm-config.h, because it's an internal implementation detail. A user of the llvm library that wants to also
+  # use the TF AOT compiler may do so through their custom build step.
   add_definitions("-DLLVM_HAVE_TF_AOT")
   include_directories(${TENSORFLOW_AOT_PATH}/include)
   add_subdirectory(${TENSORFLOW_AOT_PATH}/xla_aot_runtime_src
     ${CMAKE_ARCHIVE_OUTPUT_DIRECTORY}/tf_runtime)
 endif()
 
-set(TENSORFLOW_C_LIB_PATH "" CACHE PATH "Path to TensorFlow C library install")
-find_library(tensorflow_c_api tensorflow PATHS ${TENSORFLOW_C_LIB_PATH}/lib)
-
-# Similar to the above Tensorflow dependency, please refer to the same script.
-# In this case, the latest C API library is available for download from
-# https://www.tensorflow.org/install/lang_c
-if (tensorflow_c_api)
-  set(LLVM_HAVE_TF_API "ON" CACHE BOOL "Full Tensorflow API available")
-  add_definitions("-DLLVM_HAVE_TF_API")
-  include_directories(${TENSORFLOW_C_LIB_PATH}/include)
-endif()
-
 # Put this before tblgen. Else we have a circular dependence.
 add_subdirectory(lib/Demangle)
 add_subdirectory(lib/Support)
diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
index b7de199753a6f..118081652e9e8 100644
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -9,6 +9,8 @@
 #ifndef LLVM_ANALYSIS_UTILS_TFUTILS_H
 #define LLVM_ANALYSIS_UTILS_TFUTILS_H
 
+#include "llvm/Config/config.h"
+
 #ifdef LLVM_HAVE_TF_API
 #include "tensorflow/c/c_api.h"
 #include "llvm/IR/LLVMContext.h"
diff --git a/llvm/include/llvm/Config/llvm-config.h.cmake b/llvm/include/llvm/Config/llvm-config.h.cmake
index 475c93efd653d..82b682ddb3dc5 100644
--- a/llvm/include/llvm/Config/llvm-config.h.cmake
+++ b/llvm/include/llvm/Config/llvm-config.h.cmake
@@ -79,4 +79,7 @@
  */
 #cmakedefine01 LLVM_FORCE_ENABLE_STATS
 
+/* Define if LLVM was built with a dependency to the libtensorflow dynamic library */
+#cmakedefine LLVM_HAVE_TF_API
+
 #endif
diff --git a/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp b/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
index 377590be016ac..1d51ae292c889 100644
--- a/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
+++ b/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
@@ -26,7 +26,7 @@ using namespace llvm;
 extern const char *TestMainArgv0;
 extern cl::opt<std::string> TFIR2NativeModelPath;
 
-#if LLVM_HAVE_TF_API
+#ifdef LLVM_HAVE_TF_API
 static std::string getModelPath() {
   SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
   llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
@@ -87,13 +87,13 @@ define internal i32 @top() {
 )IR");
 
   FunctionAnalysisManager FAM = buildFAM();
-#if LLVM_HAVE_TF_API
+#ifdef LLVM_HAVE_TF_API
   TFIR2NativeModelPath = getModelPath();
 #endif
 
   InlineSizeEstimatorAnalysis FA;
   auto SizeEstimate = FA.run(*M->getFunction("branches"), FAM);
-#if LLVM_HAVE_TF_API
+#ifdef LLVM_HAVE_TF_API
   EXPECT_GT(*SizeEstimate, 0);
 #else
   EXPECT_FALSE(SizeEstimate.hasValue());

From af6b8d51390dc1a4af7ae5de4e71947dce8a75f6 Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Tue, 14 Jul 2020 10:16:30 -0700
Subject: [PATCH 303/771] [flang] Refine CR handling

We need to retain carriage return characters in source files
that are not parts of multi-byte line endings; they are
significant in CHARACTER literal constants.

Reviewed By: tskeith

Differential Revision: https://reviews.llvm.org/D83808
---
 flang/include/flang/Parser/char-buffer.h |  3 ---
 flang/lib/Parser/char-buffer.cpp         | 22 ----------------------
 flang/lib/Parser/source.cpp              | 13 +++++++++++--
 3 files changed, 11 insertions(+), 27 deletions(-)

diff --git a/flang/include/flang/Parser/char-buffer.h b/flang/include/flang/Parser/char-buffer.h
index e61a3fe3427e4..1879e1960c381 100644
--- a/flang/include/flang/Parser/char-buffer.h
+++ b/flang/include/flang/Parser/char-buffer.h
@@ -58,9 +58,6 @@ class CharBuffer {
 
   std::string Marshal() const;
 
-  // Removes carriage returns ('\r') and ensures a final line feed ('\n').
-  std::string MarshalNormalized() const;
-
 private:
   struct Block {
     static constexpr std::size_t capacity{1 << 20};
diff --git a/flang/lib/Parser/char-buffer.cpp b/flang/lib/Parser/char-buffer.cpp
index e0fc7335424ba..780d7e89538f7 100644
--- a/flang/lib/Parser/char-buffer.cpp
+++ b/flang/lib/Parser/char-buffer.cpp
@@ -65,26 +65,4 @@ std::string CharBuffer::Marshal() const {
   CHECK(result.size() == bytes_);
   return result;
 }
-
-std::string CharBuffer::MarshalNormalized() const {
-  std::string result;
-  std::size_t bytes{bytes_};
-  result.reserve(bytes + 1 /* for terminal line feed */);
-  char ch{'\0'};
-  for (const Block &block : blocks_) {
-    std::size_t chunk{std::min(bytes, Block::capacity)};
-    for (std::size_t j{0}; j < chunk; ++j) {
-      ch = block.data[j];
-      if (ch != '\r') {
-        result += ch;
-      }
-    }
-    bytes -= chunk;
-  }
-  if (ch != '\n') {
-    result += '\n';
-  }
-  result.shrink_to_fit();
-  return result;
-}
 } // namespace Fortran::parser
diff --git a/flang/lib/Parser/source.cpp b/flang/lib/Parser/source.cpp
index 4f6c21fc2b48e..693138c2711cc 100644
--- a/flang/lib/Parser/source.cpp
+++ b/flang/lib/Parser/source.cpp
@@ -85,10 +85,19 @@ std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
       break;
     }
     std::size_t chunk = crcp - p;
+    auto advance{chunk + 1};
+    if (chunk + 1 >= bytes || crcp[1] == '\n') {
+      // CR followed by LF or EOF: omit
+    } else if ((chunk == 0 && p == buf.data()) || crcp[-1] == '\n') {
+      // CR preceded by LF or BOF: omit
+    } else {
+      // CR in line: retain
+      ++chunk;
+    }
     std::memmove(buffer + wrote, p, chunk);
     wrote += chunk;
-    p += chunk + 1;
-    bytes -= chunk + 1;
+    p += advance;
+    bytes -= advance;
   }
   return wrote;
 }

From d1ca9960bc1930bed49dd19b4ff442a9de13a0de Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lubo=C5=A1=20Lu=C5=88=C3=A1k?= <l.lunak@centrum.cz>
Date: Sat, 11 Jul 2020 15:07:27 +0200
Subject: [PATCH 304/771] document -fpch-instantiate-templates in release notes

This should have been included in D69585.

Differential Revision: https://reviews.llvm.org/D83622
---
 clang/docs/ReleaseNotes.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 8a9a58aa01f8f..8b27e663d9f86 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -108,6 +108,13 @@ New Compiler Flags
   By default, this is ~/.cache but on some platforms or installations, this
   might be elsewhere. The -fmodules-cache-path=... flag continues to work.
 
+- -fpch-instantiate-templates tries to instantiate templates already while
+  generating a precompiled header. Such templates do not need to be
+  instantiated every time the precompiled header is used, which saves compile
+  time. This may result in an error during the precompiled header generation
+  if the source header file is not self-contained. This option is enabled
+  by default for clang-cl.
+
 Deprecated Compiler Flags
 -------------------------
 

From fcf0f75a59fb565e57d71c29f3e820828301c7e2 Mon Sep 17 00:00:00 2001
From: Nathan James <n.james93@hotmail.co.uk>
Date: Tue, 14 Jul 2020 22:19:36 +0100
Subject: [PATCH 305/771] [clang-tidy] OptionsView::store specialized on bool

Following on fcf7cc268fe and 672207c319a which granted checks the ability to read boolean configuration arguments as `true` or `false`.
This enables storing the options back to the configuration file using `true` and `false`.
This is in line with how clang-format dumps boolean options in its style config.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D83053
---
 .../clang-tidy/ClangTidyCheck.cpp             | 13 +++++++++---
 clang-tools-extra/clang-tidy/ClangTidyCheck.h | 20 ++++++++++++++++---
 .../infrastructure/config-files.cpp           |  6 +++---
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp
index e149978bcdeaf..c24b8553999cc 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp
+++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp
@@ -155,12 +155,19 @@ void ClangTidyCheck::OptionsView::store(ClangTidyOptions::OptionMap &Options,
   Options[NamePrefix + LocalName.str()] = Value;
 }
 
-void ClangTidyCheck::OptionsView::store(ClangTidyOptions::OptionMap &Options,
-                                        StringRef LocalName,
-                                        int64_t Value) const {
+void ClangTidyCheck::OptionsView::storeInt(ClangTidyOptions::OptionMap &Options,
+                                           StringRef LocalName,
+                                           int64_t Value) const {
   store(Options, LocalName, llvm::itostr(Value));
 }
 
+template <>
+void ClangTidyCheck::OptionsView::store<bool>(
+    ClangTidyOptions::OptionMap &Options, StringRef LocalName,
+    bool Value) const {
+  store(Options, LocalName, Value ? StringRef("true") : StringRef("false"));
+}
+
 llvm::Expected<int64_t>
 ClangTidyCheck::OptionsView::getEnumInt(StringRef LocalName,
                                         ArrayRef<NameAndValue> Mapping,
diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.h b/clang-tools-extra/clang-tidy/ClangTidyCheck.h
index 3c625ee0cb796..54b7251267524 100644
--- a/clang-tools-extra/clang-tidy/ClangTidyCheck.h
+++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.h
@@ -405,9 +405,13 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback {
                StringRef Value) const;
 
     /// Stores an option with the check-local name \p LocalName with
-    /// ``int64_t`` value \p Value to \p Options.
-    void store(ClangTidyOptions::OptionMap &Options, StringRef LocalName,
-               int64_t Value) const;
+    /// integer value \p Value to \p Options.
+    template <typename T>
+    std::enable_if_t<std::is_integral<T>::value>
+    store(ClangTidyOptions::OptionMap &Options, StringRef LocalName,
+          T Value) const {
+      storeInt(Options, LocalName, Value);
+    }
 
     /// Stores an option with the check-local name \p LocalName as the string
     /// representation of the Enum \p Value to \p Options.
@@ -448,6 +452,9 @@ class ClangTidyCheck : public ast_matchers::MatchFinder::MatchCallback {
       return Result;
     }
 
+    void storeInt(ClangTidyOptions::OptionMap &Options, StringRef LocalName,
+                  int64_t Value) const;
+
     static void logErrToStdErr(llvm::Error &&Err);
 
     std::string NamePrefix;
@@ -509,6 +516,13 @@ template <>
 bool ClangTidyCheck::OptionsView::getLocalOrGlobal<bool>(StringRef LocalName,
                                                          bool Default) const;
 
+/// Stores an option with the check-local name \p LocalName with
+/// bool value \p Value to \p Options.
+template <>
+void ClangTidyCheck::OptionsView::store<bool>(
+    ClangTidyOptions::OptionMap &Options, StringRef LocalName,
+    bool Value) const;
+
 } // namespace tidy
 } // namespace clang
 
diff --git a/clang-tools-extra/test/clang-tidy/infrastructure/config-files.cpp b/clang-tools-extra/test/clang-tidy/infrastructure/config-files.cpp
index ee1ed49472baa..d2a0a8c2a150f 100644
--- a/clang-tools-extra/test/clang-tidy/infrastructure/config-files.cpp
+++ b/clang-tools-extra/test/clang-tidy/infrastructure/config-files.cpp
@@ -18,13 +18,13 @@
 // RUN: clang-tidy -dump-config %S/Inputs/config-files/4/44/- -- | FileCheck %s -check-prefix=CHECK-CHILD4
 // CHECK-CHILD4: Checks: {{.*}}modernize-loop-convert,modernize-use-using,llvm-qualified-auto
 // CHECK-CHILD4: - key: llvm-qualified-auto.AddConstToQualified
-// CHECK-CHILD4-NEXT: value: '1'
+// CHECK-CHILD4-NEXT: value: 'true'
 // CHECK-CHILD4: - key: modernize-loop-convert.MaxCopySize
 // CHECK-CHILD4-NEXT: value: '20'
 // CHECK-CHILD4: - key: modernize-loop-convert.MinConfidence
 // CHECK-CHILD4-NEXT: value: reasonable
 // CHECK-CHILD4: - key: modernize-use-using.IgnoreMacros
-// CHECK-CHILD4-NEXT: value: '0'
+// CHECK-CHILD4-NEXT: value: 'false'
 
 // RUN: clang-tidy --explain-config %S/Inputs/config-files/4/44/- -- | FileCheck %s -check-prefix=CHECK-EXPLAIN
 // CHECK-EXPLAIN: 'llvm-qualified-auto' is enabled in the {{.*}}{{[/\\]}}Inputs{{[/\\]}}config-files{{[/\\]}}4{{[/\\]}}44{{[/\\]}}.clang-tidy.
@@ -42,7 +42,7 @@
 // CHECK-CHILD5: - key: modernize-loop-convert.MinConfidence
 // CHECK-CHILD5-NEXT: value: reasonable
 // CHECK-CHILD5: - key: modernize-use-using.IgnoreMacros
-// CHECK-CHILD5-NEXT: value: '0'
+// CHECK-CHILD5-NEXT: value: 'false'
 
 // RUN: clang-tidy -dump-config \
 // RUN: --config='{InheritParentConfig: false, \

From 9c87c5580575cefdebb02cc6685fb6b66fb375c9 Mon Sep 17 00:00:00 2001
From: Christopher Tetreault <ctetreau@quicinc.com>
Date: Tue, 14 Jul 2020 13:36:33 -0700
Subject: [PATCH 306/771] [SVE] Make cstfp_pred_ty and cst_pred_ty work with
 scalable splats

Reviewers: efriedma, lebedev.ri, fhahn, c-rhodes, david-arm

Reviewed By: efriedma, david-arm

Subscribers: tschuett, rkruppe, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83001
---
 llvm/include/llvm/IR/Constants.h         |   1 +
 llvm/include/llvm/IR/PatternMatch.h      |  74 ++++------
 llvm/test/Transforms/InstCombine/fmul.ll |   9 ++
 llvm/test/Transforms/InstCombine/mul.ll  |   9 ++
 llvm/unittests/IR/PatternMatch.cpp       | 177 +++++++++++++++++++++++
 5 files changed, 221 insertions(+), 49 deletions(-)

diff --git a/llvm/include/llvm/IR/Constants.h b/llvm/include/llvm/IR/Constants.h
index 3579c9f1ee332..8e2dba9b2417c 100644
--- a/llvm/include/llvm/IR/Constants.h
+++ b/llvm/include/llvm/IR/Constants.h
@@ -308,6 +308,7 @@ class ConstantFP final : public ConstantData {
   /// Return true if Ty is big enough to represent V.
   static bool isValueValidForType(Type *Ty, const APFloat &V);
   inline const APFloat &getValueAPF() const { return Val; }
+  inline const APFloat &getValue() const { return Val; }
 
   /// Return true if the value is positive or negative zero.
   bool isZero() const { return Val.isZero(); }
diff --git a/llvm/include/llvm/IR/PatternMatch.h b/llvm/include/llvm/IR/PatternMatch.h
index 98182bc3d85d9..4c11bc82510b4 100644
--- a/llvm/include/llvm/IR/PatternMatch.h
+++ b/llvm/include/llvm/IR/PatternMatch.h
@@ -262,17 +262,23 @@ template <int64_t Val> inline constantint_match<Val> m_ConstantInt() {
   return constantint_match<Val>();
 }
 
-/// This helper class is used to match scalar and fixed width vector integer
-/// constants that satisfy a specified predicate.
-/// For vector constants, undefined elements are ignored.
-template <typename Predicate> struct cst_pred_ty : public Predicate {
+/// This helper class is used to match constant scalars, vector splats,
+/// and fixed width vectors that satisfy a specified predicate.
+/// For fixed width vector constants, undefined elements are ignored.
+template <typename Predicate, typename ConstantVal>
+struct cstval_pred_ty : public Predicate {
   template <typename ITy> bool match(ITy *V) {
-    if (const auto *CI = dyn_cast<ConstantInt>(V))
-      return this->isValue(CI->getValue());
-    if (const auto *FVTy = dyn_cast<FixedVectorType>(V->getType())) {
+    if (const auto *CV = dyn_cast<ConstantVal>(V))
+      return this->isValue(CV->getValue());
+    if (const auto *VTy = dyn_cast<VectorType>(V->getType())) {
       if (const auto *C = dyn_cast<Constant>(V)) {
-        if (const auto *CI = dyn_cast_or_null<ConstantInt>(C->getSplatValue()))
-          return this->isValue(CI->getValue());
+        if (const auto *CV = dyn_cast_or_null<ConstantVal>(C->getSplatValue()))
+          return this->isValue(CV->getValue());
+
+        // Number of elements of a scalable vector unknown at compile time
+        auto *FVTy = dyn_cast<FixedVectorType>(VTy);
+        if (!FVTy)
+          return false;
 
         // Non-splat vector constant: check each element for a match.
         unsigned NumElts = FVTy->getNumElements();
@@ -284,8 +290,8 @@ template <typename Predicate> struct cst_pred_ty : public Predicate {
             return false;
           if (isa<UndefValue>(Elt))
             continue;
-          auto *CI = dyn_cast<ConstantInt>(Elt);
-          if (!CI || !this->isValue(CI->getValue()))
+          auto *CV = dyn_cast<ConstantVal>(Elt);
+          if (!CV || !this->isValue(CV->getValue()))
             return false;
           HasNonUndefElements = true;
         }
@@ -296,6 +302,14 @@ template <typename Predicate> struct cst_pred_ty : public Predicate {
   }
 };
 
+/// specialization of cstval_pred_ty for ConstantInt
+template <typename Predicate>
+using cst_pred_ty = cstval_pred_ty<Predicate, ConstantInt>;
+
+/// specialization of cstval_pred_ty for ConstantFP
+template <typename Predicate>
+using cstfp_pred_ty = cstval_pred_ty<Predicate, ConstantFP>;
+
 /// This helper class is used to match scalar and vector constants that
 /// satisfy a specified predicate, and bind them to an APInt.
 template <typename Predicate> struct api_pred_ty : public Predicate {
@@ -321,44 +335,6 @@ template <typename Predicate> struct api_pred_ty : public Predicate {
   }
 };
 
-/// This helper class is used to match scalar and vector floating-point
-/// constants that satisfy a specified predicate.
-/// For vector constants, undefined elements are ignored.
-template <typename Predicate> struct cstfp_pred_ty : public Predicate {
-  template <typename ITy> bool match(ITy *V) {
-    if (const auto *CF = dyn_cast<ConstantFP>(V))
-      return this->isValue(CF->getValueAPF());
-    if (V->getType()->isVectorTy()) {
-      if (const auto *C = dyn_cast<Constant>(V)) {
-        if (const auto *CF = dyn_cast_or_null<ConstantFP>(C->getSplatValue()))
-          return this->isValue(CF->getValueAPF());
-
-        // Number of elements of a scalable vector unknown at compile time
-        if (isa<ScalableVectorType>(V->getType()))
-          return false;
-
-        // Non-splat vector constant: check each element for a match.
-        unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
-        assert(NumElts != 0 && "Constant vector with no elements?");
-        bool HasNonUndefElements = false;
-        for (unsigned i = 0; i != NumElts; ++i) {
-          Constant *Elt = C->getAggregateElement(i);
-          if (!Elt)
-            return false;
-          if (isa<UndefValue>(Elt))
-            continue;
-          auto *CF = dyn_cast<ConstantFP>(Elt);
-          if (!CF || !this->isValue(CF->getValueAPF()))
-            return false;
-          HasNonUndefElements = true;
-        }
-        return HasNonUndefElements;
-      }
-    }
-    return false;
-  }
-};
-
 ///////////////////////////////////////////////////////////////////////////////
 //
 // Encapsulate constant value queries for use in templated predicate matchers.
diff --git a/llvm/test/Transforms/InstCombine/fmul.ll b/llvm/test/Transforms/InstCombine/fmul.ll
index 8e168f252978e..4162973f0bede 100644
--- a/llvm/test/Transforms/InstCombine/fmul.ll
+++ b/llvm/test/Transforms/InstCombine/fmul.ll
@@ -1164,3 +1164,12 @@ define double @fmul_sqrt_select(double %x, i1 %c) {
   %mul = fmul fast double %sqr, %sel
   ret double %mul
 }
+
+; fastmath => z * splat(0) = splat(0), even for scalable vectors
+define <vscale x 2 x float> @mul_scalable_splat_zero(<vscale x 2 x float> %z) {
+; CHECK-LABEL: @mul_scalable_splat_zero(
+; CHECK-NEXT:    ret <vscale x 2 x float> zeroinitializer
+  %shuf = shufflevector <vscale x 2 x float> insertelement (<vscale x 2 x float> undef, float 0.0, i32 0), <vscale x 2 x float> undef, <vscale x 2 x i32> zeroinitializer
+  %t3 = fmul fast <vscale x 2 x float> %shuf, %z
+  ret <vscale x 2 x float> %t3
+}
diff --git a/llvm/test/Transforms/InstCombine/mul.ll b/llvm/test/Transforms/InstCombine/mul.ll
index 059b18d30b907..d2844561ca7ae 100644
--- a/llvm/test/Transforms/InstCombine/mul.ll
+++ b/llvm/test/Transforms/InstCombine/mul.ll
@@ -857,3 +857,12 @@ define <4 x i32> @combine_mul_nabs_v4i32(<4 x i32> %0) {
   %m = mul <4 x i32> %r, %r
   ret <4 x i32> %m
 }
+
+; z * splat(0) = splat(0), even for scalable vectors
+define <vscale x 2 x i64> @mul_scalable_splat_zero(<vscale x 2 x i64> %z) {
+; CHECK-LABEL: @mul_scalable_splat_zero(
+; CHECK-NEXT:    ret <vscale x 2 x i64> zeroinitializer
+  %shuf = shufflevector <vscale x 2 x i64> insertelement (<vscale x 2 x i64> undef, i64 0, i32 0), <vscale x 2 x i64> undef, <vscale x 2 x i32> zeroinitializer
+  %t3 = mul <vscale x 2 x i64> %shuf, %z
+  ret <vscale x 2 x i64> %t3
+}
diff --git a/llvm/unittests/IR/PatternMatch.cpp b/llvm/unittests/IR/PatternMatch.cpp
index bbcbd91c8f1f1..34dc2767021a8 100644
--- a/llvm/unittests/IR/PatternMatch.cpp
+++ b/llvm/unittests/IR/PatternMatch.cpp
@@ -1325,6 +1325,183 @@ TEST_F(PatternMatchTest, IntrinsicMatcher) {
                             m_SpecificInt(10))));
 }
 
+namespace {
+
+struct is_unsigned_zero_pred {
+  bool isValue(const APInt &C) { return C.isNullValue(); }
+};
+
+struct is_float_zero_pred {
+  bool isValue(const APFloat &C) { return C.isZero(); }
+};
+
+template <typename T> struct always_true_pred {
+  bool isValue(const T &) { return true; }
+};
+
+template <typename T> struct always_false_pred {
+  bool isValue(const T &) { return false; }
+};
+
+struct is_unsigned_max_pred {
+  bool isValue(const APInt &C) { return C.isMaxValue(); }
+};
+
+struct is_float_nan_pred {
+  bool isValue(const APFloat &C) { return C.isNaN(); }
+};
+
+} // namespace
+
+TEST_F(PatternMatchTest, ConstantPredicateType) {
+
+  // Scalar integer
+  APInt U32Max = APInt::getAllOnesValue(32);
+  APInt U32Zero = APInt::getNullValue(32);
+  APInt U32DeadBeef(32, 0xDEADBEEF);
+
+  Type *U32Ty = Type::getInt32Ty(Ctx);
+
+  Constant *CU32Max = Constant::getIntegerValue(U32Ty, U32Max);
+  Constant *CU32Zero = Constant::getIntegerValue(U32Ty, U32Zero);
+  Constant *CU32DeadBeef = Constant::getIntegerValue(U32Ty, U32DeadBeef);
+
+  EXPECT_TRUE(match(CU32Max, cst_pred_ty<is_unsigned_max_pred>()));
+  EXPECT_FALSE(match(CU32Max, cst_pred_ty<is_unsigned_zero_pred>()));
+  EXPECT_TRUE(match(CU32Max, cst_pred_ty<always_true_pred<APInt>>()));
+  EXPECT_FALSE(match(CU32Max, cst_pred_ty<always_false_pred<APInt>>()));
+
+  EXPECT_FALSE(match(CU32Zero, cst_pred_ty<is_unsigned_max_pred>()));
+  EXPECT_TRUE(match(CU32Zero, cst_pred_ty<is_unsigned_zero_pred>()));
+  EXPECT_TRUE(match(CU32Zero, cst_pred_ty<always_true_pred<APInt>>()));
+  EXPECT_FALSE(match(CU32Zero, cst_pred_ty<always_false_pred<APInt>>()));
+
+  EXPECT_FALSE(match(CU32DeadBeef, cst_pred_ty<is_unsigned_max_pred>()));
+  EXPECT_FALSE(match(CU32DeadBeef, cst_pred_ty<is_unsigned_zero_pred>()));
+  EXPECT_TRUE(match(CU32DeadBeef, cst_pred_ty<always_true_pred<APInt>>()));
+  EXPECT_FALSE(match(CU32DeadBeef, cst_pred_ty<always_false_pred<APInt>>()));
+
+  // Scalar float
+  APFloat F32NaN = APFloat::getNaN(APFloat::IEEEsingle());
+  APFloat F32Zero = APFloat::getZero(APFloat::IEEEsingle());
+  APFloat F32Pi(3.14f);
+
+  Type *F32Ty = Type::getFloatTy(Ctx);
+
+  Constant *CF32NaN = ConstantFP::get(F32Ty, F32NaN);
+  Constant *CF32Zero = ConstantFP::get(F32Ty, F32Zero);
+  Constant *CF32Pi = ConstantFP::get(F32Ty, F32Pi);
+
+  EXPECT_TRUE(match(CF32NaN, cstfp_pred_ty<is_float_nan_pred>()));
+  EXPECT_FALSE(match(CF32NaN, cstfp_pred_ty<is_float_zero_pred>()));
+  EXPECT_TRUE(match(CF32NaN, cstfp_pred_ty<always_true_pred<APFloat>>()));
+  EXPECT_FALSE(match(CF32NaN, cstfp_pred_ty<always_false_pred<APFloat>>()));
+
+  EXPECT_FALSE(match(CF32Zero, cstfp_pred_ty<is_float_nan_pred>()));
+  EXPECT_TRUE(match(CF32Zero, cstfp_pred_ty<is_float_zero_pred>()));
+  EXPECT_TRUE(match(CF32Zero, cstfp_pred_ty<always_true_pred<APFloat>>()));
+  EXPECT_FALSE(match(CF32Zero, cstfp_pred_ty<always_false_pred<APFloat>>()));
+
+  EXPECT_FALSE(match(CF32Pi, cstfp_pred_ty<is_float_nan_pred>()));
+  EXPECT_FALSE(match(CF32Pi, cstfp_pred_ty<is_float_zero_pred>()));
+  EXPECT_TRUE(match(CF32Pi, cstfp_pred_ty<always_true_pred<APFloat>>()));
+  EXPECT_FALSE(match(CF32Pi, cstfp_pred_ty<always_false_pred<APFloat>>()));
+
+  ElementCount FixedEC(4, false);
+  ElementCount ScalableEC(4, true);
+
+  // Vector splat
+
+  for (auto EC : {FixedEC, ScalableEC}) {
+    // integer
+
+    Constant *CSplatU32Max = ConstantVector::getSplat(EC, CU32Max);
+    Constant *CSplatU32Zero = ConstantVector::getSplat(EC, CU32Zero);
+    Constant *CSplatU32DeadBeef = ConstantVector::getSplat(EC, CU32DeadBeef);
+
+    EXPECT_TRUE(match(CSplatU32Max, cst_pred_ty<is_unsigned_max_pred>()));
+    EXPECT_FALSE(match(CSplatU32Max, cst_pred_ty<is_unsigned_zero_pred>()));
+    EXPECT_TRUE(match(CSplatU32Max, cst_pred_ty<always_true_pred<APInt>>()));
+    EXPECT_FALSE(match(CSplatU32Max, cst_pred_ty<always_false_pred<APInt>>()));
+
+    EXPECT_FALSE(match(CSplatU32Zero, cst_pred_ty<is_unsigned_max_pred>()));
+    EXPECT_TRUE(match(CSplatU32Zero, cst_pred_ty<is_unsigned_zero_pred>()));
+    EXPECT_TRUE(match(CSplatU32Zero, cst_pred_ty<always_true_pred<APInt>>()));
+    EXPECT_FALSE(match(CSplatU32Zero, cst_pred_ty<always_false_pred<APInt>>()));
+
+    EXPECT_FALSE(match(CSplatU32DeadBeef, cst_pred_ty<is_unsigned_max_pred>()));
+    EXPECT_FALSE(
+        match(CSplatU32DeadBeef, cst_pred_ty<is_unsigned_zero_pred>()));
+    EXPECT_TRUE(
+        match(CSplatU32DeadBeef, cst_pred_ty<always_true_pred<APInt>>()));
+    EXPECT_FALSE(
+        match(CSplatU32DeadBeef, cst_pred_ty<always_false_pred<APInt>>()));
+
+    // float
+
+    Constant *CSplatF32NaN = ConstantVector::getSplat(EC, CF32NaN);
+    Constant *CSplatF32Zero = ConstantVector::getSplat(EC, CF32Zero);
+    Constant *CSplatF32Pi = ConstantVector::getSplat(EC, CF32Pi);
+
+    EXPECT_TRUE(match(CSplatF32NaN, cstfp_pred_ty<is_float_nan_pred>()));
+    EXPECT_FALSE(match(CSplatF32NaN, cstfp_pred_ty<is_float_zero_pred>()));
+    EXPECT_TRUE(
+        match(CSplatF32NaN, cstfp_pred_ty<always_true_pred<APFloat>>()));
+    EXPECT_FALSE(
+        match(CSplatF32NaN, cstfp_pred_ty<always_false_pred<APFloat>>()));
+
+    EXPECT_FALSE(match(CSplatF32Zero, cstfp_pred_ty<is_float_nan_pred>()));
+    EXPECT_TRUE(match(CSplatF32Zero, cstfp_pred_ty<is_float_zero_pred>()));
+    EXPECT_TRUE(
+        match(CSplatF32Zero, cstfp_pred_ty<always_true_pred<APFloat>>()));
+    EXPECT_FALSE(
+        match(CSplatF32Zero, cstfp_pred_ty<always_false_pred<APFloat>>()));
+
+    EXPECT_FALSE(match(CSplatF32Pi, cstfp_pred_ty<is_float_nan_pred>()));
+    EXPECT_FALSE(match(CSplatF32Pi, cstfp_pred_ty<is_float_zero_pred>()));
+    EXPECT_TRUE(match(CSplatF32Pi, cstfp_pred_ty<always_true_pred<APFloat>>()));
+    EXPECT_FALSE(
+        match(CSplatF32Pi, cstfp_pred_ty<always_false_pred<APFloat>>()));
+  }
+
+  // Int arbitrary vector
+
+  Constant *CMixedU32 = ConstantVector::get({CU32Max, CU32Zero, CU32DeadBeef});
+  Constant *CU32Undef = UndefValue::get(U32Ty);
+  Constant *CU32MaxWithUndef =
+      ConstantVector::get({CU32Undef, CU32Max, CU32Undef});
+
+  EXPECT_FALSE(match(CMixedU32, cst_pred_ty<is_unsigned_max_pred>()));
+  EXPECT_FALSE(match(CMixedU32, cst_pred_ty<is_unsigned_zero_pred>()));
+  EXPECT_TRUE(match(CMixedU32, cst_pred_ty<always_true_pred<APInt>>()));
+  EXPECT_FALSE(match(CMixedU32, cst_pred_ty<always_false_pred<APInt>>()));
+
+  EXPECT_TRUE(match(CU32MaxWithUndef, cst_pred_ty<is_unsigned_max_pred>()));
+  EXPECT_FALSE(match(CU32MaxWithUndef, cst_pred_ty<is_unsigned_zero_pred>()));
+  EXPECT_TRUE(match(CU32MaxWithUndef, cst_pred_ty<always_true_pred<APInt>>()));
+  EXPECT_FALSE(
+      match(CU32MaxWithUndef, cst_pred_ty<always_false_pred<APInt>>()));
+
+  // Float arbitrary vector
+
+  Constant *CMixedF32 = ConstantVector::get({CF32NaN, CF32Zero, CF32Pi});
+  Constant *CF32Undef = UndefValue::get(F32Ty);
+  Constant *CF32NaNWithUndef =
+      ConstantVector::get({CF32Undef, CF32NaN, CF32Undef});
+
+  EXPECT_FALSE(match(CMixedF32, cstfp_pred_ty<is_float_nan_pred>()));
+  EXPECT_FALSE(match(CMixedF32, cstfp_pred_ty<is_float_zero_pred>()));
+  EXPECT_TRUE(match(CMixedF32, cstfp_pred_ty<always_true_pred<APFloat>>()));
+  EXPECT_FALSE(match(CMixedF32, cstfp_pred_ty<always_false_pred<APFloat>>()));
+
+  EXPECT_TRUE(match(CF32NaNWithUndef, cstfp_pred_ty<is_float_nan_pred>()));
+  EXPECT_FALSE(match(CF32NaNWithUndef, cstfp_pred_ty<is_float_zero_pred>()));
+  EXPECT_TRUE(
+      match(CF32NaNWithUndef, cstfp_pred_ty<always_true_pred<APFloat>>()));
+  EXPECT_FALSE(
+      match(CF32NaNWithUndef, cstfp_pred_ty<always_false_pred<APFloat>>()));
+}
+
 template <typename T> struct MutableConstTest : PatternMatchTest { };
 
 typedef ::testing::Types<std::tuple<Value*, Instruction*>,

From 226866e115de652b6fbc8f8195881a529ccb4b01 Mon Sep 17 00:00:00 2001
From: Dokyung Song <dokyungs@google.com>
Date: Tue, 14 Jul 2020 21:02:32 +0000
Subject: [PATCH 307/771] [libFuzzer] Separate platform related macros out from
 FuzzerDefs.h into FuzzerPlatform.h, and adjust includes in other files.

Summary: This patch separates platform related macros in lib/fuzzer/FuzzerDefs.h into lib/fuzzer/FuzzerPlatform.h, and use FuzzerPlatform.h where necessary. This separation helps when compiling libFuzzer's interceptor module (under review); an unnecessary include of standard headers (such as string.h) may produce conflicts/ambiguation with the interceptor's declarations/definitions of library functions, which complicates interceptor implementation.

Reviewers: morehouse, hctim

Reviewed By: morehouse

Subscribers: krytarowski, #sanitizers

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D83805
---
 compiler-rt/lib/fuzzer/FuzzerBuiltins.h       |   2 +-
 compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h   |   2 +-
 compiler-rt/lib/fuzzer/FuzzerDefs.h           | 150 ----------------
 compiler-rt/lib/fuzzer/FuzzerDriver.cpp       |   1 +
 .../lib/fuzzer/FuzzerExtFunctionsDlsym.cpp    |   2 +-
 .../lib/fuzzer/FuzzerExtFunctionsWeak.cpp     |   2 +-
 .../lib/fuzzer/FuzzerExtFunctionsWindows.cpp  |   2 +-
 .../lib/fuzzer/FuzzerExtraCounters.cpp        |   3 +-
 compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp      |   2 +-
 compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp    |   2 +-
 compiler-rt/lib/fuzzer/FuzzerMain.cpp         |   1 +
 compiler-rt/lib/fuzzer/FuzzerPlatform.h       | 163 ++++++++++++++++++
 compiler-rt/lib/fuzzer/FuzzerTracePC.cpp      |   1 +
 compiler-rt/lib/fuzzer/FuzzerUtilDarwin.cpp   |   2 +-
 compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp  |   2 +-
 compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp    |   2 +-
 compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp    |   2 +-
 compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp  |   2 +-
 compiler-rt/lib/fuzzer/FuzzerValueBitMap.h    |   3 +-
 19 files changed, 182 insertions(+), 164 deletions(-)
 create mode 100644 compiler-rt/lib/fuzzer/FuzzerPlatform.h

diff --git a/compiler-rt/lib/fuzzer/FuzzerBuiltins.h b/compiler-rt/lib/fuzzer/FuzzerBuiltins.h
index 5f1ccef8a9cad..4c0ada82662dd 100644
--- a/compiler-rt/lib/fuzzer/FuzzerBuiltins.h
+++ b/compiler-rt/lib/fuzzer/FuzzerBuiltins.h
@@ -11,7 +11,7 @@
 #ifndef LLVM_FUZZER_BUILTINS_H
 #define LLVM_FUZZER_BUILTINS_H
 
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 
 #if !LIBFUZZER_MSVC
 #include <cstdint>
diff --git a/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h b/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h
index bc65c60098be9..c5bec9787d8ed 100644
--- a/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h
+++ b/compiler-rt/lib/fuzzer/FuzzerBuiltinsMsvc.h
@@ -12,7 +12,7 @@
 #ifndef LLVM_FUZZER_BUILTINS_MSVC_H
 #define LLVM_FUZZER_BUILTINS_MSVC_H
 
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 
 #if LIBFUZZER_MSVC
 #include <intrin.h>
diff --git a/compiler-rt/lib/fuzzer/FuzzerDefs.h b/compiler-rt/lib/fuzzer/FuzzerDefs.h
index 5793e86aa804b..1a2752af2f4d5 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDefs.h
+++ b/compiler-rt/lib/fuzzer/FuzzerDefs.h
@@ -21,156 +21,6 @@
 #include <vector>
 
 
-// Platform detection.
-#ifdef __linux__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 1
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __APPLE__
-#define LIBFUZZER_APPLE 1
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __NetBSD__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 1
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __FreeBSD__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 1
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __OpenBSD__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 1
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif _WIN32
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 1
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __Fuchsia__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 1
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 0
-#elif __EMSCRIPTEN__
-#define LIBFUZZER_APPLE 0
-#define LIBFUZZER_FUCHSIA 0
-#define LIBFUZZER_LINUX 0
-#define LIBFUZZER_NETBSD 0
-#define LIBFUZZER_FREEBSD 0
-#define LIBFUZZER_OPENBSD 0
-#define LIBFUZZER_WINDOWS 0
-#define LIBFUZZER_EMSCRIPTEN 1
-#else
-#error "Support for your platform has not been implemented"
-#endif
-
-#if defined(_MSC_VER) && !defined(__clang__)
-// MSVC compiler is being used.
-#define LIBFUZZER_MSVC 1
-#else
-#define LIBFUZZER_MSVC 0
-#endif
-
-#ifndef __has_attribute
-#  define __has_attribute(x) 0
-#endif
-
-#define LIBFUZZER_POSIX                                                        \
-  (LIBFUZZER_APPLE || LIBFUZZER_LINUX || LIBFUZZER_NETBSD ||                   \
-   LIBFUZZER_FREEBSD || LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN)
-
-#ifdef __x86_64
-#  if __has_attribute(target)
-#    define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt")))
-#  else
-#    define ATTRIBUTE_TARGET_POPCNT
-#  endif
-#else
-#  define ATTRIBUTE_TARGET_POPCNT
-#endif
-
-
-#ifdef __clang__  // avoid gcc warning.
-#  if __has_attribute(no_sanitize)
-#    define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
-#  else
-#    define ATTRIBUTE_NO_SANITIZE_MEMORY
-#  endif
-#  define ALWAYS_INLINE __attribute__((always_inline))
-#else
-#  define ATTRIBUTE_NO_SANITIZE_MEMORY
-#  define ALWAYS_INLINE
-#endif // __clang__
-
-#if LIBFUZZER_WINDOWS
-#define ATTRIBUTE_NO_SANITIZE_ADDRESS
-#else
-#define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address))
-#endif
-
-#if LIBFUZZER_WINDOWS
-#define ATTRIBUTE_ALIGNED(X) __declspec(align(X))
-#define ATTRIBUTE_INTERFACE __declspec(dllexport)
-// This is used for __sancov_lowest_stack which is needed for
-// -fsanitize-coverage=stack-depth. That feature is not yet available on
-// Windows, so make the symbol static to avoid linking errors.
-#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC static
-#define ATTRIBUTE_NOINLINE __declspec(noinline)
-#else
-#define ATTRIBUTE_ALIGNED(X) __attribute__((aligned(X)))
-#define ATTRIBUTE_INTERFACE __attribute__((visibility("default")))
-#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC \
-  ATTRIBUTE_INTERFACE __attribute__((tls_model("initial-exec"))) thread_local
-
-#define ATTRIBUTE_NOINLINE __attribute__((noinline))
-#endif
-
-#if defined(__has_feature)
-#  if __has_feature(address_sanitizer)
-#    define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_ADDRESS
-#  elif __has_feature(memory_sanitizer)
-#    define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_MEMORY
-#  else
-#    define ATTRIBUTE_NO_SANITIZE_ALL
-#  endif
-#else
-#  define ATTRIBUTE_NO_SANITIZE_ALL
-#endif
-
 namespace fuzzer {
 
 template <class T> T Min(T a, T b) { return a < b ? a : b; }
diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
index 1a0b2580c5b71..a847c76e292d7 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
@@ -16,6 +16,7 @@
 #include "FuzzerInternal.h"
 #include "FuzzerMerge.h"
 #include "FuzzerMutate.h"
+#include "FuzzerPlatform.h"
 #include "FuzzerRandom.h"
 #include "FuzzerTracePC.h"
 #include <algorithm>
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp
index dcd7134594870..95233d2a10d37 100644
--- a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsDlsym.cpp
@@ -10,7 +10,7 @@
 // requires that clients of LibFuzzer pass ``--export-dynamic`` to the linker.
 // That is a complication we don't wish to expose to clients right now.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_APPLE
 
 #include "FuzzerExtFunctions.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWeak.cpp b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWeak.cpp
index d56dab36c646c..24ddc57d47d6b 100644
--- a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWeak.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWeak.cpp
@@ -11,7 +11,7 @@
 // weak symbols to be undefined. That is a complication we don't want to expose
 // to clients right now.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_LINUX || LIBFUZZER_NETBSD || LIBFUZZER_FUCHSIA ||                \
     LIBFUZZER_FREEBSD || LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN
 
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWindows.cpp
index 55efe8f80e908..688bad1d51ca5 100644
--- a/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWindows.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerExtFunctionsWindows.cpp
@@ -9,7 +9,7 @@
 // compiled with MSVC. Uses weak aliases when compiled with clang. Unfortunately
 // the method each compiler supports is not supported by the other.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_WINDOWS
 
 #include "FuzzerExtFunctions.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp b/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp
index b2face778203e..d36beba1b1ba9 100644
--- a/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerExtraCounters.cpp
@@ -8,7 +8,8 @@
 // Extra coverage counters defined by user code.
 //===----------------------------------------------------------------------===//
 
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
+#include <cstdint>
 
 #if LIBFUZZER_LINUX || LIBFUZZER_NETBSD || LIBFUZZER_FREEBSD ||                \
     LIBFUZZER_OPENBSD || LIBFUZZER_FUCHSIA || LIBFUZZER_EMSCRIPTEN
diff --git a/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp b/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
index fcd9b8d8b9c7e..aac85b08727ab 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerIOPosix.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // IO functions implementation using Posix API.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_POSIX || LIBFUZZER_FUCHSIA
 
 #include "FuzzerExtFunctions.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp
index 56757aa09a370..651283a551cf0 100644
--- a/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerIOWindows.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // IO functions implementation for Windows.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_WINDOWS
 
 #include "FuzzerExtFunctions.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerMain.cpp b/compiler-rt/lib/fuzzer/FuzzerMain.cpp
index 771a34aed3167..75f2f8e75c9bd 100644
--- a/compiler-rt/lib/fuzzer/FuzzerMain.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerMain.cpp
@@ -9,6 +9,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 
 extern "C" {
 // This function should be defined by the user.
diff --git a/compiler-rt/lib/fuzzer/FuzzerPlatform.h b/compiler-rt/lib/fuzzer/FuzzerPlatform.h
new file mode 100644
index 0000000000000..8befdb882cc6e
--- /dev/null
+++ b/compiler-rt/lib/fuzzer/FuzzerPlatform.h
@@ -0,0 +1,163 @@
+//===-- FuzzerPlatform.h --------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Common platform macros.
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_FUZZER_PLATFORM_H
+#define LLVM_FUZZER_PLATFORM_H
+
+// Platform detection.
+#ifdef __linux__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 1
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __APPLE__
+#define LIBFUZZER_APPLE 1
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __NetBSD__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 1
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __FreeBSD__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 1
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __OpenBSD__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 1
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif _WIN32
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 1
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __Fuchsia__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 1
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 0
+#elif __EMSCRIPTEN__
+#define LIBFUZZER_APPLE 0
+#define LIBFUZZER_FUCHSIA 0
+#define LIBFUZZER_LINUX 0
+#define LIBFUZZER_NETBSD 0
+#define LIBFUZZER_FREEBSD 0
+#define LIBFUZZER_OPENBSD 0
+#define LIBFUZZER_WINDOWS 0
+#define LIBFUZZER_EMSCRIPTEN 1
+#else
+#error "Support for your platform has not been implemented"
+#endif
+
+#if defined(_MSC_VER) && !defined(__clang__)
+// MSVC compiler is being used.
+#define LIBFUZZER_MSVC 1
+#else
+#define LIBFUZZER_MSVC 0
+#endif
+
+#ifndef __has_attribute
+#define __has_attribute(x) 0
+#endif
+
+#define LIBFUZZER_POSIX                                                        \
+  (LIBFUZZER_APPLE || LIBFUZZER_LINUX || LIBFUZZER_NETBSD ||                   \
+   LIBFUZZER_FREEBSD || LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN)
+
+#ifdef __x86_64
+#if __has_attribute(target)
+#define ATTRIBUTE_TARGET_POPCNT __attribute__((target("popcnt")))
+#else
+#define ATTRIBUTE_TARGET_POPCNT
+#endif
+#else
+#define ATTRIBUTE_TARGET_POPCNT
+#endif
+
+#ifdef __clang__ // avoid gcc warning.
+#if __has_attribute(no_sanitize)
+#define ATTRIBUTE_NO_SANITIZE_MEMORY __attribute__((no_sanitize("memory")))
+#else
+#define ATTRIBUTE_NO_SANITIZE_MEMORY
+#endif
+#define ALWAYS_INLINE __attribute__((always_inline))
+#else
+#define ATTRIBUTE_NO_SANITIZE_MEMORY
+#define ALWAYS_INLINE
+#endif // __clang__
+
+#if LIBFUZZER_WINDOWS
+#define ATTRIBUTE_NO_SANITIZE_ADDRESS
+#else
+#define ATTRIBUTE_NO_SANITIZE_ADDRESS __attribute__((no_sanitize_address))
+#endif
+
+#if LIBFUZZER_WINDOWS
+#define ATTRIBUTE_ALIGNED(X) __declspec(align(X))
+#define ATTRIBUTE_INTERFACE __declspec(dllexport)
+// This is used for __sancov_lowest_stack which is needed for
+// -fsanitize-coverage=stack-depth. That feature is not yet available on
+// Windows, so make the symbol static to avoid linking errors.
+#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC static
+#define ATTRIBUTE_NOINLINE __declspec(noinline)
+#else
+#define ATTRIBUTE_ALIGNED(X) __attribute__((aligned(X)))
+#define ATTRIBUTE_INTERFACE __attribute__((visibility("default")))
+#define ATTRIBUTES_INTERFACE_TLS_INITIAL_EXEC                                  \
+  ATTRIBUTE_INTERFACE __attribute__((tls_model("initial-exec"))) thread_local
+
+#define ATTRIBUTE_NOINLINE __attribute__((noinline))
+#endif
+
+#if defined(__has_feature)
+#if __has_feature(address_sanitizer)
+#define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_ADDRESS
+#elif __has_feature(memory_sanitizer)
+#define ATTRIBUTE_NO_SANITIZE_ALL ATTRIBUTE_NO_SANITIZE_MEMORY
+#else
+#define ATTRIBUTE_NO_SANITIZE_ALL
+#endif
+#else
+#define ATTRIBUTE_NO_SANITIZE_ALL
+#endif
+
+#endif // LLVM_FUZZER_PLATFORM_H
diff --git a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp
index 86649f9e095cb..b2ca7693e540e 100644
--- a/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerTracePC.cpp
@@ -19,6 +19,7 @@
 #include "FuzzerDictionary.h"
 #include "FuzzerExtFunctions.h"
 #include "FuzzerIO.h"
+#include "FuzzerPlatform.h"
 #include "FuzzerUtil.h"
 #include "FuzzerValueBitMap.h"
 #include <set>
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilDarwin.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilDarwin.cpp
index d449bc248f095..a5bed658a446c 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilDarwin.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilDarwin.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Misc utils for Darwin.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_APPLE
 #include "FuzzerCommand.h"
 #include "FuzzerIO.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp
index 12239c6e1b3e3..190fb7866649e 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilFuchsia.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Misc utils implementation using Fuchsia/Zircon APIs.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 
 #if LIBFUZZER_FUCHSIA
 
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp
index 993023e703931..95490b992e0bc 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilLinux.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Misc utils for Linux.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_LINUX || LIBFUZZER_NETBSD || LIBFUZZER_FREEBSD ||                \
     LIBFUZZER_OPENBSD || LIBFUZZER_EMSCRIPTEN
 #include "FuzzerCommand.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp
index 48073cfda3749..fc57b724db108 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilPosix.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Misc utils implementation using Posix API.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_POSIX
 #include "FuzzerIO.h"
 #include "FuzzerInternal.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
index b86306afddb6d..6c693e3d7eea3 100644
--- a/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerUtilWindows.cpp
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 // Misc utils implementation for Windows.
 //===----------------------------------------------------------------------===//
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 #if LIBFUZZER_WINDOWS
 #include "FuzzerCommand.h"
 #include "FuzzerIO.h"
diff --git a/compiler-rt/lib/fuzzer/FuzzerValueBitMap.h b/compiler-rt/lib/fuzzer/FuzzerValueBitMap.h
index bc039f1df27f3..ddbfe200af9c6 100644
--- a/compiler-rt/lib/fuzzer/FuzzerValueBitMap.h
+++ b/compiler-rt/lib/fuzzer/FuzzerValueBitMap.h
@@ -11,7 +11,8 @@
 #ifndef LLVM_FUZZER_VALUE_BIT_MAP_H
 #define LLVM_FUZZER_VALUE_BIT_MAP_H
 
-#include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
+#include <cstdint>
 
 namespace fuzzer {
 

From 2b42080b51c9a0c5ed733b30da165774dcd0d595 Mon Sep 17 00:00:00 2001
From: Steven Wu <stevenwu@apple.com>
Date: Tue, 14 Jul 2020 14:39:51 -0700
Subject: [PATCH 308/771] [clang] Teach -fembed-bitcode option not to embed
 W_value Group

Summary:
-fembed-bitcode options doesn't embed warning options since they are
useless to code generation. Make sure it handles the W_value group and
not embed those options in the output.

Reviewers: zixuw, arphaman

Reviewed By: zixuw

Subscribers: jkorous, dexonsmith, ributzka, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83813
---
 clang/lib/Frontend/CompilerInvocation.cpp | 3 +--
 clang/test/Frontend/embed-bitcode.ll      | 8 ++++++++
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 863c6b3ca4f31..75d7cf5d26d3f 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -1086,8 +1086,7 @@ static bool ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, InputKind IK,
           A->getOption().getID() == options::OPT_INPUT ||
           A->getOption().getID() == options::OPT_x ||
           A->getOption().getID() == options::OPT_fembed_bitcode ||
-          (A->getOption().getGroup().isValid() &&
-           A->getOption().getGroup().getID() == options::OPT_W_Group))
+          A->getOption().matches(options::OPT_W_Group))
         continue;
       ArgStringList ASL;
       A->render(Args, ASL);
diff --git a/clang/test/Frontend/embed-bitcode.ll b/clang/test/Frontend/embed-bitcode.ll
index bd2afb44bb0fd..75cdc5f657fc0 100644
--- a/clang/test/Frontend/embed-bitcode.ll
+++ b/clang/test/Frontend/embed-bitcode.ll
@@ -37,6 +37,11 @@
 ; CHECK: @llvm.cmdline = private constant
 ; CHECK: section "__LLVM,__cmdline"
 
+; check warning options are not embedded
+; RUN: %clang_cc1 -triple thumbv7-apple-ios8.0.0 -emit-llvm \
+; RUN:    -fembed-bitcode=all -x ir %s -o - -Wall -Wundef-prefix=TEST \
+; RUN:    | FileCheck %s -check-prefix=CHECK-WARNING
+
 ; CHECK-ELF: @llvm.embedded.module
 ; CHECK-ELF: section ".llvmbc"
 ; CHECK-ELF: @llvm.cmdline
@@ -54,6 +59,9 @@
 ; CHECK-MARKER: @llvm.cmdline
 ; CHECK-MARKER: section "__LLVM,__cmdline"
 
+; CHECK-WARNING-NOT: Wall
+; CHECK-WARNING-NOT: Wundef-prefix
+
 define i32 @f0() {
   ret i32 0
 }

From bef00b244c3140558c574cc106771b0f2452ef84 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Tue, 14 Jul 2020 15:04:30 -0700
Subject: [PATCH 309/771] Revert "[compiler-rt][Android] Stop using
 detect_target_arch"

This reverts commit 9b7e24c2a5b32e25b773bc8b4ca84dbda995d959.

See comments in https://reviews.llvm.org/D82148.
---
 .../cmake/Modules/CompilerRTUtils.cmake       | 55 +++++++++++++++++++
 compiler-rt/cmake/base-config-ix.cmake        |  7 +--
 2 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
index 107a475d6a0eb..99b9f0e4af44d 100644
--- a/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
+++ b/compiler-rt/cmake/Modules/CompilerRTUtils.cmake
@@ -151,6 +151,61 @@ macro(test_target_arch arch def)
   endif()
 endmacro()
 
+macro(detect_target_arch)
+  check_symbol_exists(__arm__ "" __ARM)
+  check_symbol_exists(__aarch64__ "" __AARCH64)
+  check_symbol_exists(__x86_64__ "" __X86_64)
+  check_symbol_exists(__i386__ "" __I386)
+  check_symbol_exists(__mips__ "" __MIPS)
+  check_symbol_exists(__mips64__ "" __MIPS64)
+  check_symbol_exists(__powerpc64__ "" __PPC64)
+  check_symbol_exists(__powerpc64le__ "" __PPC64LE)
+  check_symbol_exists(__riscv "" __RISCV)
+  check_symbol_exists(__s390x__ "" __S390X)
+  check_symbol_exists(__sparc "" __SPARC)
+  check_symbol_exists(__sparcv9 "" __SPARCV9)
+  check_symbol_exists(__wasm32__ "" __WEBASSEMBLY32)
+  check_symbol_exists(__wasm64__ "" __WEBASSEMBLY64)
+  check_symbol_exists(__ve__ "" __VE)
+  if(__ARM)
+    add_default_target_arch(arm)
+  elseif(__AARCH64)
+    add_default_target_arch(aarch64)
+  elseif(__X86_64)
+    add_default_target_arch(x86_64)
+  elseif(__I386)
+    add_default_target_arch(i386)
+  elseif(__MIPS64) # must be checked before __MIPS
+    add_default_target_arch(mips64)
+  elseif(__MIPS)
+    add_default_target_arch(mips)
+  elseif(__PPC64)
+    add_default_target_arch(powerpc64)
+  elseif(__PPC64LE)
+    add_default_target_arch(powerpc64le)
+  elseif(__RISCV)
+    if(CMAKE_SIZEOF_VOID_P EQUAL "4")
+      add_default_target_arch(riscv32)
+    elseif(CMAKE_SIZEOF_VOID_P EQUAL "8")
+      add_default_target_arch(riscv64)
+    else()
+      message(FATAL_ERROR "Unsupport XLEN for RISC-V")
+    endif()
+  elseif(__S390X)
+    add_default_target_arch(s390x)
+  elseif(__SPARCV9)
+    add_default_target_arch(sparcv9)
+  elseif(__SPARC)
+    add_default_target_arch(sparc)
+  elseif(__WEBASSEMBLY32)
+    add_default_target_arch(wasm32)
+  elseif(__WEBASSEMBLY64)
+    add_default_target_arch(wasm64)
+  elseif(__VE)
+    add_default_target_arch(ve)
+  endif()
+endmacro()
+
 macro(load_llvm_config)
   if (NOT LLVM_CONFIG_PATH)
     find_program(LLVM_CONFIG_PATH "llvm-config"
diff --git a/compiler-rt/cmake/base-config-ix.cmake b/compiler-rt/cmake/base-config-ix.cmake
index 6b704f7dc9bce..964dd598f1022 100644
--- a/compiler-rt/cmake/base-config-ix.cmake
+++ b/compiler-rt/cmake/base-config-ix.cmake
@@ -163,11 +163,8 @@ macro(test_targets)
 
   # Generate the COMPILER_RT_SUPPORTED_ARCH list.
   if(ANDROID)
-    if(${COMPILER_RT_DEFAULT_TARGET_ARCH} STREQUAL "i686")
-      add_default_target_arch(i386)
-    else()
-      add_default_target_arch(${COMPILER_RT_DEFAULT_TARGET_ARCH})
-    endif()
+    # Examine compiler output to determine target architecture.
+    detect_target_arch()
     set(COMPILER_RT_OS_SUFFIX "-android")
   elseif(NOT APPLE) # Supported archs for Apple platforms are generated later
     if(COMPILER_RT_DEFAULT_TARGET_ONLY)

From 0c64cb6d08964b576cd95f5532ea4016fcd27943 Mon Sep 17 00:00:00 2001
From: Pete Steinfeld <psteinfeld@nvidia.com>
Date: Tue, 14 Jul 2020 10:08:34 -0700
Subject: [PATCH 310/771] [flang] Fix a crash when an array constructor
 contains an unlimited polymorphic value

Summary:
C7113 States that "An ac-value shall not be unlimited polymorphic."  We failed
to detect this situation which resulted in a crash when trying to get the
underlying derived type specification of the unlimited polymorphic value.

I added code to avoid the crash, code to emit an error message, and a test.

Reviewers: klausler, tskeith, DavidTruby

Subscribers: llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83793
---
 flang/lib/Semantics/expression.cpp | 15 +++++++++++++--
 flang/test/Semantics/resolve70.f90 | 16 ++++++++++++++++
 2 files changed, 29 insertions(+), 2 deletions(-)

diff --git a/flang/lib/Semantics/expression.cpp b/flang/lib/Semantics/expression.cpp
index f22f8a9669246..4cb43a91ef09d 100644
--- a/flang/lib/Semantics/expression.cpp
+++ b/flang/lib/Semantics/expression.cpp
@@ -1159,8 +1159,12 @@ class ArrayConstructorContext {
   template <typename T> Result Test() {
     if (type_ && type_->category() == T::category) {
       if constexpr (T::category == TypeCategory::Derived) {
-        return AsMaybeExpr(ArrayConstructor<T>{
-            type_->GetDerivedTypeSpec(), MakeSpecific<T>(std::move(values_))});
+        if (type_->IsUnlimitedPolymorphic()) {
+          return std::nullopt;
+        } else {
+          return AsMaybeExpr(ArrayConstructor<T>{type_->GetDerivedTypeSpec(),
+              MakeSpecific<T>(std::move(values_))});
+        }
       } else if (type_->kind() == T::kind) {
         if constexpr (T::category == TypeCategory::Character) {
           if (auto len{type_->LEN()}) {
@@ -1295,6 +1299,13 @@ void ArrayConstructorContext::Add(const parser::AcValue &x) {
             auto restorer{exprAnalyzer_.GetContextualMessages().SetLocation(
                 expr.value().source)};
             if (MaybeExpr v{exprAnalyzer_.Analyze(expr.value())}) {
+              if (auto exprType{v->GetType()}) {
+                if (exprType->IsUnlimitedPolymorphic()) {
+                  exprAnalyzer_.Say(
+                      "Cannot have an unlimited polymorphic value in an "
+                      "array constructor"_err_en_US);
+                }
+              }
               Push(std::move(*v));
             }
           },
diff --git a/flang/test/Semantics/resolve70.f90 b/flang/test/Semantics/resolve70.f90
index 564805a12408d..d86016e76f9d3 100644
--- a/flang/test/Semantics/resolve70.f90
+++ b/flang/test/Semantics/resolve70.f90
@@ -57,3 +57,19 @@ subroutine s1()
   !ERROR: Non-extensible derived type 'inextensible' may not be used with CLASS keyword
   class(inextensible), allocatable :: x
 end subroutine s1
+
+subroutine s2()
+  type t
+    integer i
+  end type t
+  type, extends(t) :: t2
+    real x
+  end type t2
+contains
+  function f1(dummy)
+    class(*) dummy
+    type(t) f1(1)
+    !ERROR: Cannot have an unlimited polymorphic value in an array constructor
+    f1 = [ (dummy) ]
+  end function f1
+end subroutine s2

From 750369e2e8cb318d29693eaae1e2dc38aa535917 Mon Sep 17 00:00:00 2001
From: Dokyung Song <dokyungs@google.com>
Date: Tue, 14 Jul 2020 22:25:51 +0000
Subject: [PATCH 311/771] [libFuzzer] Fix compilation error by including
 missing platform macro definitions.

Summary: This patch fixes sanitizer-windows build errors.

Reviewers: morehouse, hctim

Reviewed By: morehouse, hctim

Subscribers: #sanitizers

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D83823
---
 compiler-rt/lib/fuzzer/FuzzerLoop.cpp | 1 +
 compiler-rt/lib/fuzzer/FuzzerSHA1.cpp | 1 +
 2 files changed, 2 insertions(+)

diff --git a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
index 7c3288fc57502..02db6d27b0a3e 100644
--- a/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerLoop.cpp
@@ -12,6 +12,7 @@
 #include "FuzzerIO.h"
 #include "FuzzerInternal.h"
 #include "FuzzerMutate.h"
+#include "FuzzerPlatform.h"
 #include "FuzzerRandom.h"
 #include "FuzzerTracePC.h"
 #include <algorithm>
diff --git a/compiler-rt/lib/fuzzer/FuzzerSHA1.cpp b/compiler-rt/lib/fuzzer/FuzzerSHA1.cpp
index 43e5e78cd7877..2005dc7003053 100644
--- a/compiler-rt/lib/fuzzer/FuzzerSHA1.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerSHA1.cpp
@@ -17,6 +17,7 @@
 
 #include "FuzzerSHA1.h"
 #include "FuzzerDefs.h"
+#include "FuzzerPlatform.h"
 
 /* This code is public-domain - it is based on libcrypt
  * placed in the public domain by Wei Dai and other contributors.

From 4c22f5f8046a1c1075bb0a594eb0bfa747522759 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 14 Jul 2020 17:06:32 -0400
Subject: [PATCH 312/771] AMDGPU: Add @LINE to assembler error test checks

It was basically impossible to figure out where the failure point was
---
 llvm/test/MC/AMDGPU/wave32.s | 228 +++++++++++++++++------------------
 1 file changed, 114 insertions(+), 114 deletions(-)

diff --git a/llvm/test/MC/AMDGPU/wave32.s b/llvm/test/MC/AMDGPU/wave32.s
index f7a0835f0a17e..aa0b8727cf27d 100644
--- a/llvm/test/MC/AMDGPU/wave32.s
+++ b/llvm/test/MC/AMDGPU/wave32.s
@@ -9,46 +9,46 @@ v_cmp_ge_i32_e32 s0, v0
 
 v_cmp_ge_i32_e32 vcc_lo, s0, v1
 // GFX1032: v_cmp_ge_i32_e32 vcc_lo, s0, v1 ; encoding: [0x00,0x02,0x0c,0x7d]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_cmp_ge_i32_e32 vcc, s0, v2
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_cmp_ge_i32_e32 vcc, s0, v2 ; encoding: [0x00,0x04,0x0c,0x7d]
 
 v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD
 // GFX1032: v_cmp_le_f16_sdwa s0, v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:19: error: invalid operand for instruction
 
 v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:19: error: invalid operand for instruction
 // GFX1064: v_cmp_le_f16_sdwa s[0:1], v3, v4 src0_sel:WORD_1 src1_sel:DWORD ; encoding: [0xf9,0x08,0x96,0x7d,0x03,0x80,0x05,0x06]
 
 v_cmp_class_f32_e32 vcc_lo, s0, v0
 // GFX1032: v_cmp_class_f32_e32 vcc_lo, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_cmp_class_f32_e32 vcc, s0, v0
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_cmp_class_f32_e32 vcc, s0, v0 ; encoding: [0x00,0x00,0x10,0x7d]
 
 // TODO-GFX10: The following encoding does not match SP3's encoding, which is:
 //  [0xf9,0x04,0x1e,0x7d,0x01,0x06,0x06,0x06]
 v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD
 // GFX1032: v_cmp_class_f16_sdwa vcc_lo, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
 // TODO-GFX10: The following encoding does not match SP3's encoding, which is:
 //  [0xf9,0x04,0x1e,0x7d,0x01,0x06,0x06,0x06]
 v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_cmp_class_f16_sdwa vcc, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x00,0x06,0x06]
 
 v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD
 // GFX1032: v_cmp_class_f16_sdwa s0, v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
 v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
 // GFX1064: v_cmp_class_f16_sdwa s[0:1], v1, v2 src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x1e,0x7d,0x01,0x80,0x06,0x06]
 
 v_cndmask_b32_e32 v1, v2, v3,
@@ -57,10 +57,10 @@ v_cndmask_b32_e32 v1, v2, v3,
 
 v_cndmask_b32_e32 v1, v2, v3, vcc_lo
 // GFX1032: v_cndmask_b32_e32 v1, v2, v3, vcc_lo ; encoding: [0x02,0x07,0x02,0x02]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_cndmask_b32_e32 v1, v2, v3, vcc
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_cndmask_b32_e32 v1, v2, v3, vcc ; encoding: [0x02,0x07,0x02,0x02]
 
 v_cndmask_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
@@ -69,10 +69,10 @@ v_cndmask_b32_sdwa v5, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:
 
 v_cndmask_b32_sdwa v5, v1, v2, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
 // GFX1032: v_cndmask_b32_sdwa v5, v1, v2, vcc_lo  dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_cndmask_b32_sdwa v5, v1, v2, vcc dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_cndmask_b32_sdwa v5, v1, v2, vcc  dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD src1_sel:DWORD ; encoding: [0xf9,0x04,0x0a,0x02,0x01,0x16,0x06,0x06]
 
 v_cndmask_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
@@ -81,26 +81,26 @@ v_cndmask_b32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 
 v_cndmask_b32_dpp v5, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX1032: v_cndmask_b32_dpp v5, v1, v2, vcc_lo  quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_cndmask_b32_dpp v5, v1, v2, vcc  quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x02,0x01,0xe4,0x00,0x00]
 
 v_add_co_u32_e32 v2, vcc_lo, s0, v2
-// GFX1032-ERR: error: instruction not supported on this GPU
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_add_co_u32_e32 v2, vcc, s0, v2
-// GFX1032-ERR: error: instruction not supported on this GPU
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
 // GFX1032: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x50]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
 
 v_add_co_ci_u32_e32 v3, v3, v4
@@ -108,27 +108,27 @@ v_add_co_ci_u32_e32 v3, v3, v4
 // GFX1064: v_add_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x50]
 
 v_sub_co_u32_e32 v2, vcc_lo, s0, v2
-// GFX1032-ERR: error: instruction not supported on this GPU
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_sub_co_u32_e32 v2, vcc, s0, v2
-// GFX1032-ERR: error: instruction not supported on this GPU
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_subrev_co_u32_e32 v2, vcc_lo, s0, v2
-// GFX1032-ERR: error: instruction not supported on this GPU
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_subrev_co_u32_e32 v2, vcc, s0, v2
-// GFX1032-ERR: error: instruction not supported on this GPU
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo
 // GFX1032: v_sub_co_ci_u32_e32 v3, vcc_lo, v3, v4, vcc_lo ; encoding: [0x03,0x09,0x06,0x52]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_sub_co_ci_u32_e32 v3, vcc, v3, v4, vcc ; encoding: [0x03,0x09,0x06,0x52]
 
 v_sub_co_ci_u32_e32 v3, v3, v4
@@ -137,10 +137,10 @@ v_sub_co_ci_u32_e32 v3, v3, v4
 
 v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
 // GFX1032: v_subrev_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo ; encoding: [0x80,0x02,0x02,0x54]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
 
 v_subrev_co_ci_u32_e32 v1, 0, v1
@@ -148,23 +148,23 @@ v_subrev_co_ci_u32_e32 v1, 0, v1
 // GFX1064: v_subrev_co_ci_u32_e32 v1, vcc, 0, v1, vcc ; encoding: [0x80,0x02,0x02,0x54]
 
 v_add_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-// GFX1032-ERR: error: invalid operand
-// GFX1064-ERR: error: invalid operand
+// GFX1032-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction{{$}}
 
 v_add_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-// GFX1032-ERR: error: instruction not supported
-// GFX1064-ERR: error: instruction not supported
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_add_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:30: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:30: error: not a valid operand.{{$}}
 
 v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
 
 v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
@@ -172,35 +172,35 @@ v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYT
 // GFX1064: v_add_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x00,0x06]
 
 v_sub_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-// GFX1032-ERR: error: invalid operand
-// GFX1064-ERR: error: invalid operand
+// GFX1032-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction{{$}}
 
 v_sub_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-// GFX1032-ERR: error: instruction not supported
-// GFX1064-ERR: error: instruction not supported
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:30: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:30: error: not a valid operand.{{$}}
 
 v_subrev_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-// GFX1032-ERR: error: invalid operand
-// GFX1064-ERR: error: invalid operand
+// GFX1032-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction{{$}}
 
 v_subrev_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-// GFX1032-ERR: error: instruction not supported
-// GFX1064-ERR: error: instruction not supported
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_subrev_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:33: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:33: error: not a valid operand.{{$}}
 
 v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX1032: v_sub_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_sub_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x52,0x01,0x06,0x00,0x06]
 
 v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
@@ -209,10 +209,10 @@ v_sub_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYT
 
 v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX1032: v_subrev_co_ci_u32_sdwa v1, vcc_lo, v1, v4, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_subrev_co_ci_u32_sdwa v1, vcc, v1, v4, vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x54,0x01,0x06,0x00,0x06]
 
 v_subrev_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
@@ -225,23 +225,23 @@ v_add_co_ci_u32 v1, sext(v1), sext(v4) dst_sel:DWORD dst_unused:UNUSED_PAD src0_
 
 v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 // GFX1032: v_add_co_ci_u32_sdwa v1, vcc_lo, sext(v1), sext(v4), vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_add_co_ci_u32_sdwa v1, vcc, sext(v1), sext(v4), vcc dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD ; encoding: [0xf9,0x08,0x02,0x50,0x01,0x06,0x08,0x0e]
 
 v_add_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:29: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:29: error: not a valid operand.{{$}}
 
 v_add_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:37: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:37: error: not a valid operand.{{$}}
 
 v_add_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:34: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:34: error: not a valid operand.{{$}}
 
 v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
@@ -249,170 +249,170 @@ v_add_co_ci_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 
 v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX1032: v_add_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_add_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x50,0x01,0xe4,0x00,0x00]
 
 v_sub_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:29: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:29: error: not a valid operand.{{$}}
 
 v_sub_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:37: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:37: error: not a valid operand.{{$}}
 
 v_sub_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:34: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:34: error: not a valid operand.{{$}}
 
 v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX1032: v_sub_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_sub_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x52,0x01,0xe4,0x00,0x00]
 
 v_subrev_co_u32_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:32: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:32: error: not a valid operand.{{$}}
 
 v_subrev_co_u32_dpp v5, vcc_lo, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:40: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:40: error: not a valid operand.{{$}}
 
 v_subrev_co_u32_dpp v5, vcc, v1, v2 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: not a valid operand
-// GFX1064-ERR: error: not a valid operand
+// GFX1032-ERR: :[[@LINE-1]]:37: error: not a valid operand.{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:37: error: not a valid operand.{{$}}
 
 v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // GFX1032: v_subrev_co_ci_u32_dpp v5, vcc_lo, v1, v2, vcc_lo quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
-// GFX1064-ERR: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
 
 v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX1032-ERR: error: instruction not supported on this GPU
+// GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
 // GFX1064: v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x04,0x0a,0x54,0x01,0xe4,0x00,0x00]
 
 v_add_co_u32 v0, s0, v0, v2
 // GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_add_co_u32_e64 v0, s0, v0, v2
 // GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
 v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
 // GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
 v_sub_co_u32 v0, s0, v0, v2
 // GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
 
 v_sub_co_u32_e64 v0, s0, v0, v2
 // GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
 
 v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
 // GFX1032: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
 v_subrev_co_u32 v0, s0, v0, v2
 // GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_subrev_co_u32_e64 v0, s0, v0, v2
 // GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
 v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
 // GFX1032: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:28: error: invalid operand for instruction
 
 v_add_co_u32 v0, s[0:1], v0, v2
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction
 // GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_u32_e64 v0, s[0:1], v0, v2
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
 // GFX1064: v_add_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
 
 v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 // GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_sub_co_u32 v0, s[0:1], v0, v2
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:18: error: invalid operand for instruction
 // GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
 
 v_sub_co_u32_e64 v0, s[0:1], v0, v2
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:22: error: invalid operand for instruction
 // GFX1064: v_sub_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
 
 v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 // GFX1064: v_sub_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_subrev_co_u32 v0, s[0:1], v0, v2
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:21: error: invalid operand for instruction
 // GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
 
 v_subrev_co_u32_e64 v0, s[0:1], v0, v2
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 // GFX1064: v_subrev_co_u32_e64 v0, s[0:1], v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
 
 v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3]
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:28: error: invalid operand for instruction
 // GFX1064: v_subrev_co_ci_u32_e64 v4, s[0:1], v1, v5, s[2:3] ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2
 // GFX1032: v_add_co_ci_u32_e64 v4, vcc_lo, v1, v5, s2 ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction
 
 v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3]
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction
 // GFX1064: v_add_co_ci_u32_e64 v4, vcc, v1, v5, s[2:3] ; encoding: [0x04,0x6a,0x28,0xd5,0x01,0x0b,0x0a,0x00]
 
 v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo
 // GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, vcc_lo ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
 v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 // GFX1064: v_add_co_ci_u32_e64 v4, s[0:1], v1, v5, vcc ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0xaa,0x01]
 
 v_div_scale_f32 v2, s2, v0, v0, v2
 // GFX1032: v_div_scale_f32 v2, s2, v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
 
 v_div_scale_f32 v2, s[2:3], v0, v0, v2
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:21: error: invalid operand for instruction
 // GFX1064: v_div_scale_f32 v2, s[2:3], v0, v0, v2 ; encoding: [0x02,0x02,0x6d,0xd5,0x00,0x01,0x0a,0x04]
 
 v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3]
 // GFX1032: v_div_scale_f64 v[2:3], s2, v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
 
 v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3]
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:25: error: invalid operand for instruction
 // GFX1064: v_div_scale_f64 v[2:3], s[2:3], v[0:1], v[0:1], v[2:3] ; encoding: [0x02,0x02,0x6e,0xd5,0x00,0x01,0x0a,0x04]
 
 v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3]
 // GFX1032: v_mad_i64_i32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
 v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3]
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:23: error: invalid operand for instruction
 // GFX1064: v_mad_i64_i32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x77,0xd5,0x00,0x03,0x0a,0x04]
 
 v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3]
 // GFX1032: v_mad_u64_u32 v[0:1], s6, v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
-// GFX1064-ERR: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:23: error: invalid operand for instruction
 
 v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3]
-// GFX1032-ERR: error: invalid operand for instruction
+// GFX1032-ERR: :[[@LINE-1]]:23: error: invalid operand for instruction
 // GFX1064: v_mad_u64_u32 v[0:1], s[6:7], v0, v1, v[2:3] ; encoding: [0x00,0x06,0x76,0xd5,0x00,0x03,0x0a,0x04]
 
 v_cmpx_neq_f32_e32 v0, v1

From bd43fa29e3f92161270cbd077b79282cdcbb6aa0 Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Tue, 14 Jul 2020 11:28:03 -0700
Subject: [PATCH 313/771] [flang] Implement anonymous units in the runtime

I/O from/to an unopened unit number needs to open &/or create
a "fort.$UNIT" file.

Fixes FCVS test fm401.f.

Reviewed By: tskeith

Differential Revision: https://reviews.llvm.org/D83809
---
 flang/runtime/io-api.cpp | 19 ++++++++++---------
 flang/runtime/unit-map.h | 11 +++--------
 flang/runtime/unit.cpp   | 35 ++++++++++++++++++++++++++++++++---
 flang/runtime/unit.h     |  5 ++++-
 4 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index 4710a590ccab9..2163b3bca0d8d 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -111,8 +111,8 @@ Cookie BeginExternalListIO(
   if (unitNumber == DefaultUnit) {
     unitNumber = DIR == Direction::Input ? 5 : 6;
   }
-  ExternalFileUnit &unit{
-      ExternalFileUnit::LookUpOrCrash(unitNumber, terminator)};
+  ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous(
+      unitNumber, DIR, false /*formatted*/, terminator)};
   if (unit.access == Access::Direct) {
     terminator.Crash("List-directed I/O attempted on direct access file");
     return nullptr;
@@ -150,8 +150,8 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength,
   if (unitNumber == DefaultUnit) {
     unitNumber = DIR == Direction::Input ? 5 : 6;
   }
-  ExternalFileUnit &unit{
-      ExternalFileUnit::LookUpOrCrash(unitNumber, terminator)};
+  ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous(
+      unitNumber, DIR, false /*formatted*/, terminator)};
   if (unit.isUnformatted) {
     terminator.Crash("Formatted I/O attempted on unformatted file");
     return nullptr;
@@ -185,8 +185,8 @@ template <Direction DIR>
 Cookie BeginUnformattedIO(
     ExternalUnit unitNumber, const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
-  ExternalFileUnit &unit{
-      ExternalFileUnit::LookUpOrCrash(unitNumber, terminator)};
+  ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreateAnonymous(
+      unitNumber, DIR, true /*unformatted*/, terminator)};
   if (!unit.isUnformatted) {
     terminator.Crash("Unformatted output attempted on formatted file");
   }
@@ -223,7 +223,7 @@ Cookie IONAME(BeginOpenUnit)( // OPEN(without NEWUNIT=)
   bool wasExtant{false};
   Terminator terminator{sourceFile, sourceLine};
   ExternalFileUnit &unit{
-      ExternalFileUnit::LookUpOrCreate(unitNumber, terminator, &wasExtant)};
+      ExternalFileUnit::LookUpOrCreate(unitNumber, terminator, wasExtant)};
   return &unit.BeginIoStatement<OpenStatementState>(
       unit, wasExtant, sourceFile, sourceLine);
 }
@@ -231,10 +231,11 @@ Cookie IONAME(BeginOpenUnit)( // OPEN(without NEWUNIT=)
 Cookie IONAME(BeginOpenNewUnit)( // OPEN(NEWUNIT=j)
     const char *sourceFile, int sourceLine) {
   Terminator terminator{sourceFile, sourceLine};
+  bool ignored{false};
   ExternalFileUnit &unit{ExternalFileUnit::LookUpOrCreate(
-      ExternalFileUnit::NewUnit(terminator), terminator)};
+      ExternalFileUnit::NewUnit(terminator), terminator, ignored)};
   return &unit.BeginIoStatement<OpenStatementState>(
-      unit, false /*wasExtant*/, sourceFile, sourceLine);
+      unit, false /*was an existing file*/, sourceFile, sourceLine);
 }
 
 Cookie IONAME(BeginClose)(
diff --git a/flang/runtime/unit-map.h b/flang/runtime/unit-map.h
index 9efb2698d2233..be244f5ae463a 100644
--- a/flang/runtime/unit-map.h
+++ b/flang/runtime/unit-map.h
@@ -27,16 +27,11 @@ class UnitMap {
   }
 
   ExternalFileUnit &LookUpOrCreate(
-      int n, const Terminator &terminator, bool *wasExtant) {
+      int n, const Terminator &terminator, bool &wasExtant) {
     CriticalSection critical{lock_};
     auto *p{Find(n)};
-    if (wasExtant) {
-      *wasExtant = p != nullptr;
-    }
-    if (p) {
-      return *p;
-    }
-    return Create(n, terminator);
+    wasExtant = p != nullptr;
+    return p ? *p : Create(n, terminator);
   }
 
   ExternalFileUnit &NewUnit(const Terminator &terminator) {
diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp
index 2193ee0f6aca7..cf20d7cd81c66 100644
--- a/flang/runtime/unit.cpp
+++ b/flang/runtime/unit.cpp
@@ -10,6 +10,7 @@
 #include "io-error.h"
 #include "lock.h"
 #include "unit-map.h"
+#include <cstdio>
 
 namespace Fortran::runtime::io {
 
@@ -46,10 +47,38 @@ ExternalFileUnit &ExternalFileUnit::LookUpOrCrash(
 }
 
 ExternalFileUnit &ExternalFileUnit::LookUpOrCreate(
-    int unit, const Terminator &terminator, bool *wasExtant) {
+    int unit, const Terminator &terminator, bool &wasExtant) {
   return GetUnitMap().LookUpOrCreate(unit, terminator, wasExtant);
 }
 
+ExternalFileUnit &ExternalFileUnit::LookUpOrCreateAnonymous(
+    int unit, Direction dir, bool isUnformatted, const Terminator &terminator) {
+  bool exists{false};
+  ExternalFileUnit &result{
+      GetUnitMap().LookUpOrCreate(unit, terminator, exists)};
+  if (!exists) {
+    // I/O to an unconnected unit reads/creates a local file, e.g. fort.7
+    std::size_t pathMaxLen{32};
+    auto path{SizedNew<char>{terminator}(pathMaxLen)};
+    std::snprintf(path.get(), pathMaxLen, "fort.%d", unit);
+    IoErrorHandler handler{terminator};
+    result.OpenUnit(
+        dir == Direction::Input ? OpenStatus::Old : OpenStatus::Replace,
+        Position::Rewind, std::move(path), std::strlen(path.get()), handler);
+    result.isUnformatted = isUnformatted;
+  }
+  return result;
+}
+
+ExternalFileUnit &ExternalFileUnit::CreateNew(
+    int unit, const Terminator &terminator) {
+  bool wasExtant{false};
+  ExternalFileUnit &result{
+      GetUnitMap().LookUpOrCreate(unit, terminator, wasExtant)};
+  RUNTIME_CHECK(terminator, !wasExtant);
+  return result;
+}
+
 ExternalFileUnit *ExternalFileUnit::LookUpForClose(int unit) {
   return GetUnitMap().LookUpForClose(unit);
 }
@@ -155,14 +184,14 @@ UnitMap &ExternalFileUnit::GetUnitMap() {
   Terminator terminator{__FILE__, __LINE__};
   IoErrorHandler handler{terminator};
   unitMap = New<UnitMap>{terminator}().release();
-  ExternalFileUnit &out{ExternalFileUnit::LookUpOrCreate(6, terminator)};
+  ExternalFileUnit &out{ExternalFileUnit::CreateNew(6, terminator)};
   out.Predefine(1);
   out.set_mayRead(false);
   out.set_mayWrite(true);
   out.set_mayPosition(false);
   out.SetDirection(Direction::Output, handler);
   defaultOutput = &out;
-  ExternalFileUnit &in{ExternalFileUnit::LookUpOrCreate(5, terminator)};
+  ExternalFileUnit &in{ExternalFileUnit::CreateNew(5, terminator)};
   in.Predefine(0);
   in.set_mayRead(true);
   in.set_mayWrite(false);
diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h
index c54625413b875..f0edeedef0812 100644
--- a/flang/runtime/unit.h
+++ b/flang/runtime/unit.h
@@ -39,7 +39,10 @@ class ExternalFileUnit : public ConnectionState,
   static ExternalFileUnit *LookUp(int unit);
   static ExternalFileUnit &LookUpOrCrash(int unit, const Terminator &);
   static ExternalFileUnit &LookUpOrCreate(
-      int unit, const Terminator &, bool *wasExtant = nullptr);
+      int unit, const Terminator &, bool &wasExtant);
+  static ExternalFileUnit &LookUpOrCreateAnonymous(
+      int unit, Direction, bool isUnformatted, const Terminator &);
+  static ExternalFileUnit &CreateNew(int unit, const Terminator &);
   static ExternalFileUnit *LookUpForClose(int unit);
   static int NewUnit(const Terminator &);
   static void CloseAll(IoErrorHandler &);

From 984e12ab48cdb5d81d0e994e1e0baca9cc3037a1 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Tue, 14 Jul 2020 16:17:10 -0700
Subject: [PATCH 314/771] [lldb/Test] Create reproducer dir if necessary

Create the reproducer directory under the build root if it doesn't
exists. The reproducer will only create the top level directory.
---
 lldb/test/API/lit.cfg.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/test/API/lit.cfg.py b/lldb/test/API/lit.cfg.py
index 57e7509a9f7e5..a211abe6cc41c 100644
--- a/lldb/test/API/lit.cfg.py
+++ b/lldb/test/API/lit.cfg.py
@@ -124,6 +124,7 @@ def find_python_interpreter():
 lldb_repro_mode = lit_config.params.get('lldb-run-with-repro', None)
 if lldb_repro_mode:
   lit_config.note("Running API tests in {} mode.".format(lldb_repro_mode))
+  mkdir_p(config.lldb_reproducer_directory)
   if lldb_repro_mode == 'capture':
     config.available_features.add('lldb-repro-capture')
   elif lldb_repro_mode == 'replay':

From 8dbc86adf3e4da2ea284955ede94a0b30acc6d36 Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Tue, 14 Jul 2020 12:31:16 -0700
Subject: [PATCH 315/771] [flang] Fix list-directed input (repeated nulls and
 LOGICAL)

Allow repeated nulls in list-directed input (e.g., "4*,") and
ignore excess characters in list-directed LOGICAL input after the
T or F.

Fixes FCVS test fm923.f.

Reviewed By: sscalpone

Differential Revision: https://reviews.llvm.org/D83810
---
 flang/runtime/edit-input.cpp           |  3 +++
 flang/runtime/io-stmt.cpp              | 18 +++++++++++++-----
 flang/unittests/Runtime/list-input.cpp |  2 +-
 3 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index f710c1107ec16..27e8122d9ae6b 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -337,6 +337,9 @@ bool EditLogicalInput(IoStatementState &io, const DataEdit &edit, bool &x) {
   }
   if (remaining) { // ignore the rest of the field
     io.HandleRelativePosition(*remaining);
+  } else if (edit.descriptor == DataEdit::ListDirected) {
+    while (io.NextInField(remaining)) { // discard rest of field
+    }
   }
   return true;
 }
diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp
index a4d8af4f7211d..0681da215d1e4 100644
--- a/flang/runtime/io-stmt.cpp
+++ b/flang/runtime/io-stmt.cpp
@@ -472,6 +472,10 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
     edit.descriptor = DataEdit::ListDirectedNullValue;
     return edit;
   }
+  char32_t comma{','};
+  if (io.mutableModes().editingFlags & decimalComma) {
+    comma = ';';
+  }
   if (remaining_ > 0 && !realPart_) { // "r*c" repetition in progress
     while (connection.currentRecordNumber > initialRecordNumber_) {
       io.BackspaceRecord();
@@ -479,6 +483,10 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
     connection.HandleAbsolutePosition(initialPositionInRecord_);
     if (!imaginaryPart_) {
       edit.repeat = std::min<int>(remaining_, maxRepeat);
+      auto ch{io.GetNextNonBlank()};
+      if (!ch || *ch == ' ' || *ch == comma) { // "r*" repeated null
+        edit.descriptor = DataEdit::ListDirectedNullValue;
+      }
     }
     remaining_ -= edit.repeat;
     return edit;
@@ -503,10 +511,6 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
     edit.descriptor = DataEdit::ListDirectedNullValue;
     return edit;
   }
-  char32_t comma{','};
-  if (io.mutableModes().editingFlags & decimalComma) {
-    comma = ';';
-  }
   bool isFirstItem{isFirstItem_};
   isFirstItem_ = false;
   if (*ch == comma) {
@@ -544,10 +548,14 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
     if (r > 0 && ch && *ch == '*') { // subtle: r must be nonzero
       io.HandleRelativePosition(1);
       ch = io.GetCurrentChar();
-      if (!ch || *ch == ' ' || *ch == comma || *ch == '/') { // "r*" null
+      if (ch && *ch == '/') { // r*/
+        hitSlash_ = true;
         edit.descriptor = DataEdit::ListDirectedNullValue;
         return edit;
       }
+      if (!ch || *ch == ' ' || *ch == comma) { // "r*" null
+        edit.descriptor = DataEdit::ListDirectedNullValue;
+      }
       edit.repeat = std::min<int>(r, maxRepeat);
       remaining_ = r - edit.repeat;
       initialRecordNumber_ = connection.currentRecordNumber;
diff --git a/flang/unittests/Runtime/list-input.cpp b/flang/unittests/Runtime/list-input.cpp
index c7a660dc87aae..9ec77080203a2 100644
--- a/flang/unittests/Runtime/list-input.cpp
+++ b/flang/unittests/Runtime/list-input.cpp
@@ -15,7 +15,7 @@ int main() {
 
   char buffer[4][32];
   int j{0};
-  for (const char *p : {"1 2 2*3  ,", ",6,,8,123*",
+  for (const char *p : {"1 2 2*3  ,", ",6,,8,1*",
            "2*'abcdefghijklmnopqrstuvwxyzABC", "DEFGHIJKLMNOPQRSTUVWXYZ'"}) {
     SetCharacter(buffer[j++], sizeof buffer[0], p);
   }

From f49edafd9abf75aaa7d9254c345026620e69b5ce Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Tue, 14 Jul 2020 17:06:27 -0700
Subject: [PATCH 316/771] Fix test that was accidentally adding the llvm-as
 binary into an IR archive.

---
 llvm/test/tools/llvm-link/archivell.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/tools/llvm-link/archivell.ll b/llvm/test/tools/llvm-link/archivell.ll
index 7474df14e9072..5a90990d46639 100644
--- a/llvm/test/tools/llvm-link/archivell.ll
+++ b/llvm/test/tools/llvm-link/archivell.ll
@@ -1,4 +1,4 @@
-# RUN: llvm-ar cr %t.fg.a %S/Inputs/f.ll llvm-as %S/Inputs/g.ll
+# RUN: llvm-ar cr %t.fg.a %S/Inputs/f.ll %S/Inputs/g.ll
 # RUN: not llvm-link %S/Inputs/h.ll %t.fg.a -o %t.linked.bc 2>&1 | FileCheck %s
 
 # RUN: rm -f %t.fg.a

From 099fd3748470435d478dc480c00ac1f848b7c55d Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Tue, 14 Jul 2020 17:12:46 -0700
Subject: [PATCH 317/771] Fix undefined behavior due to deleting an object with
 a non-virtual destructor via a pointer of the wrong static type.

This caused crashes during deallocation in C++14 builds when using a
deallocator whose sized delete requires the size argument to be correct.

Also make the LazyCallThroughManager destructor protected to catch this
sort of bug in the future.
---
 llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h         | 4 ++--
 llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h | 3 ++-
 llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp        | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index 96f8e169e7dcc..2b56a4e8b63e8 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -250,7 +250,7 @@ class LLLazyJIT : public LLJIT {
   // Create a single-threaded LLLazyJIT instance.
   LLLazyJIT(LLLazyJITBuilderState &S, Error &Err);
 
-  std::unique_ptr<LazyCallThroughManager> LCTMgr;
+  std::unique_ptr<LocalLazyCallThroughManager> LCTMgr;
   std::unique_ptr<CompileOnDemandLayer> CODLayer;
 };
 
@@ -384,7 +384,7 @@ class LLLazyJITBuilderState : public LLJITBuilderState {
 
   Triple TT;
   JITTargetAddress LazyCompileFailureAddr = 0;
-  std::unique_ptr<LazyCallThroughManager> LCTMgr;
+  std::unique_ptr<LocalLazyCallThroughManager> LCTMgr;
   IndirectStubsManagerBuilderFunction ISMBuilder;
 
   Error prepareForConstruction();
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
index 85c1fe7b19a91..3225d6078bf81 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -56,6 +56,7 @@ class LazyCallThroughManager {
 
   LazyCallThroughManager(ExecutionSession &ES,
                          JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP);
+  ~LazyCallThroughManager() = default;
 
   struct ReexportsEntry {
     JITDylib *SourceJD;
@@ -127,7 +128,7 @@ class LocalLazyCallThroughManager : public LazyCallThroughManager {
 
 /// Create a LocalLazyCallThroughManager from the given triple and execution
 /// session.
-Expected<std::unique_ptr<LazyCallThroughManager>>
+Expected<std::unique_ptr<LocalLazyCallThroughManager>>
 createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
                                   JITTargetAddress ErrorHandlerAddr);
 
diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index 153f6b80784f0..fda6c79305815 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -98,7 +98,7 @@ void LazyCallThroughManager::resolveTrampolineLandingAddress(
       NoDependenciesToRegister);
 }
 
-Expected<std::unique_ptr<LazyCallThroughManager>>
+Expected<std::unique_ptr<LocalLazyCallThroughManager>>
 createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
                                   JITTargetAddress ErrorHandlerAddr) {
   switch (T.getArch()) {

From 75c0f0d762c8bc105fbf3d8d9753110985a3db1c Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Tue, 14 Jul 2020 20:43:59 -0400
Subject: [PATCH 318/771] [gn build] (manually) merge c1e2f73c

---
 llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
index b54361526ce45..096812db893a5 100644
--- a/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/include/llvm/Config/BUILD.gn
@@ -325,6 +325,7 @@ write_cmake_config("llvm-config") {
     "LLVM_ENABLE_DUMP=",
     "LLVM_DEFAULT_TARGET_TRIPLE=$llvm_target_triple",
     "LLVM_HAS_ATOMICS=1",
+    "LLVM_HAVE_TF_API=",
     "LLVM_HOST_TRIPLE=$llvm_current_triple",
     "LLVM_NATIVE_ARCH=$native_target",
     "LLVM_NATIVE_ASMPARSER=1",

From 9aa3dca80f5cfb67640d53998673ad636ac8b4c9 Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Tue, 14 Jul 2020 20:43:40 -0400
Subject: [PATCH 319/771] [flang][openacc] Semantic checks for OpenACC 3.0
 clauses validity

Summary: This patch adds semantic checking for the OpenACC 3.0 clauses validity.

Reviewers: sscalpone, tskeith, klausler, ichoyjx, DavidTruby, jdoerfert

Reviewed By: tskeith, klausler

Subscribers: mgorny, llvm-commits

Tags: #llvm, #flang

Differential Revision: https://reviews.llvm.org/D83807
---
 flang/lib/Semantics/CMakeLists.txt            |   2 +
 flang/lib/Semantics/canonicalize-acc.cpp      |  84 +++
 flang/lib/Semantics/canonicalize-acc.h        |  21 +
 flang/lib/Semantics/check-acc-structure.cpp   | 501 ++++++++++++++++++
 flang/lib/Semantics/check-acc-structure.h     | 204 +++++++
 flang/lib/Semantics/semantics.cpp             |  15 +-
 flang/test/Semantics/acc-branch.f90           | 101 ++++
 ...c-validity.f90 => acc-clause-validity.f90} |  42 +-
 llvm/include/llvm/Frontend/OpenACC/ACC.td     |   2 +-
 .../llvm/Frontend/OpenACC/CMakeLists.txt      |   2 +-
 10 files changed, 950 insertions(+), 24 deletions(-)
 create mode 100644 flang/lib/Semantics/canonicalize-acc.cpp
 create mode 100644 flang/lib/Semantics/canonicalize-acc.h
 create mode 100644 flang/lib/Semantics/check-acc-structure.cpp
 create mode 100644 flang/lib/Semantics/check-acc-structure.h
 create mode 100644 flang/test/Semantics/acc-branch.f90
 rename flang/test/Semantics/{acc-validity.f90 => acc-clause-validity.f90} (53%)

diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt
index 05295f5900952..a869d831109bf 100644
--- a/flang/lib/Semantics/CMakeLists.txt
+++ b/flang/lib/Semantics/CMakeLists.txt
@@ -2,8 +2,10 @@
 add_flang_library(FortranSemantics
   assignment.cpp
   attr.cpp
+  canonicalize-acc.cpp
   canonicalize-do.cpp
   canonicalize-omp.cpp
+  check-acc-structure.cpp
   check-allocate.cpp
   check-arithmeticif.cpp
   check-call.cpp
diff --git a/flang/lib/Semantics/canonicalize-acc.cpp b/flang/lib/Semantics/canonicalize-acc.cpp
new file mode 100644
index 0000000000000..4c4d716fe7def
--- /dev/null
+++ b/flang/lib/Semantics/canonicalize-acc.cpp
@@ -0,0 +1,84 @@
+//===-- lib/Semantics/canonicalize-acc.cpp --------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "canonicalize-acc.h"
+#include "flang/Parser/parse-tree-visitor.h"
+#include "flang/Semantics/tools.h"
+
+// After Loop Canonicalization, rewrite OpenACC parse tree to make OpenACC
+// Constructs more structured which provide explicit scopes for later
+// structural checks and semantic analysis.
+//   1. move structured DoConstruct into
+//      OpenACCLoopConstruct. Compilation will not proceed in case of errors
+//      after this pass.
+namespace Fortran::semantics {
+
+using namespace parser::literals;
+
+class CanonicalizationOfAcc {
+public:
+  template <typename T> bool Pre(T &) { return true; }
+  template <typename T> void Post(T &) {}
+  CanonicalizationOfAcc(parser::Messages &messages) : messages_{messages} {}
+
+  void Post(parser::Block &block) {
+    for (auto it{block.begin()}; it != block.end(); ++it) {
+      if (auto *accLoop{parser::Unwrap<parser::OpenACCLoopConstruct>(*it)}) {
+        RewriteOpenACCLoopConstruct(*accLoop, block, it);
+      }
+    } // Block list
+  }
+
+private:
+  void RewriteOpenACCLoopConstruct(parser::OpenACCLoopConstruct &x,
+      parser::Block &block, parser::Block::iterator it) {
+    // Check the sequence of DoConstruct in the same iteration
+    //
+    // Original:
+    //   ExecutableConstruct -> OpenACCConstruct -> OpenACCLoopConstruct
+    //     ACCBeginLoopDirective
+    //   ExecutableConstruct -> DoConstruct
+    //
+    // After rewriting:
+    //   ExecutableConstruct -> OpenACCConstruct -> OpenACCLoopConstruct
+    //     AccBeginLoopDirective
+    //     DoConstruct
+    parser::Block::iterator nextIt;
+    auto &beginDir{std::get<parser::AccBeginLoopDirective>(x.t)};
+    auto &dir{std::get<parser::AccLoopDirective>(beginDir.t)};
+
+    nextIt = it;
+    if (++nextIt != block.end()) {
+      if (auto *doCons{parser::Unwrap<parser::DoConstruct>(*nextIt)}) {
+        if (doCons->GetLoopControl()) {
+          // move DoConstruct
+          std::get<std::optional<parser::DoConstruct>>(x.t) =
+              std::move(*doCons);
+          nextIt = block.erase(nextIt);
+        } else {
+          messages_.Say(dir.source,
+              "DO loop after the %s directive must have loop control"_err_en_US,
+              parser::ToUpperCaseLetters(dir.source.ToString()));
+        }
+        return; // found do-loop
+      }
+    }
+    messages_.Say(dir.source,
+        "A DO loop must follow the %s directive"_err_en_US,
+        parser::ToUpperCaseLetters(dir.source.ToString()));
+  }
+
+  parser::Messages &messages_;
+};
+
+bool CanonicalizeAcc(parser::Messages &messages, parser::Program &program) {
+  CanonicalizationOfAcc acc{messages};
+  Walk(program, acc);
+  return !messages.AnyFatalError();
+}
+} // namespace Fortran::semantics
diff --git a/flang/lib/Semantics/canonicalize-acc.h b/flang/lib/Semantics/canonicalize-acc.h
new file mode 100644
index 0000000000000..f24f9fbc44f3c
--- /dev/null
+++ b/flang/lib/Semantics/canonicalize-acc.h
@@ -0,0 +1,21 @@
+//===-- lib/Semantics/canonicalize-acc.h ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_SEMANTICS_CANONICALIZE_ACC_H_
+#define FORTRAN_SEMANTICS_CANONICALIZE_ACC_H_
+
+namespace Fortran::parser {
+struct Program;
+class Messages;
+} // namespace Fortran::parser
+
+namespace Fortran::semantics {
+bool CanonicalizeAcc(parser::Messages &messages, parser::Program &program);
+}
+
+#endif // FORTRAN_SEMANTICS_CANONICALIZE_ACC_H_
diff --git a/flang/lib/Semantics/check-acc-structure.cpp b/flang/lib/Semantics/check-acc-structure.cpp
new file mode 100644
index 0000000000000..974c9dc59abe6
--- /dev/null
+++ b/flang/lib/Semantics/check-acc-structure.cpp
@@ -0,0 +1,501 @@
+//===-- lib/Semantics/check-acc-structure.cpp -----------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "check-acc-structure.h"
+#include "flang/Parser/parse-tree.h"
+#include "flang/Semantics/tools.h"
+
+#define CHECK_SIMPLE_CLAUSE(X, Y) \
+  void AccStructureChecker::Enter(const parser::AccClause::X &) { \
+    CheckAllowed(llvm::acc::Clause::Y); \
+  }
+
+#define CHECK_REQ_SCALAR_INT_CONSTANT_CLAUSE(X, Y) \
+  void AccStructureChecker::Enter(const parser::AccClause::X &c) { \
+    CheckAllowed(llvm::acc::Clause::Y); \
+    RequiresConstantPositiveParameter(llvm::acc::Clause::Y, c.v); \
+  }
+
+namespace Fortran::semantics {
+
+static constexpr inline AccClauseSet
+    parallelAndKernelsOnlyAllowedAfterDeviceTypeClauses{
+        llvm::acc::Clause::ACCC_async, llvm::acc::Clause::ACCC_wait,
+        llvm::acc::Clause::ACCC_num_gangs, llvm::acc::Clause::ACCC_num_workers,
+        llvm::acc::Clause::ACCC_vector_length};
+
+static constexpr inline AccClauseSet serialOnlyAllowedAfterDeviceTypeClauses{
+    llvm::acc::Clause::ACCC_async, llvm::acc::Clause::ACCC_wait};
+
+static constexpr inline AccClauseSet loopOnlyAllowedAfterDeviceTypeClauses{
+    llvm::acc::Clause::ACCC_auto, llvm::acc::Clause::ACCC_collapse,
+    llvm::acc::Clause::ACCC_independent, llvm::acc::Clause::ACCC_gang,
+    llvm::acc::Clause::ACCC_seq, llvm::acc::Clause::ACCC_tile,
+    llvm::acc::Clause::ACCC_vector, llvm::acc::Clause::ACCC_worker};
+
+static constexpr inline AccClauseSet updateOnlyAllowedAfterDeviceTypeClauses{
+    llvm::acc::Clause::ACCC_async, llvm::acc::Clause::ACCC_wait};
+
+static constexpr inline AccClauseSet routineOnlyAllowedAfterDeviceTypeClauses{
+    llvm::acc::Clause::ACCC_bind, llvm::acc::Clause::ACCC_gang,
+    llvm::acc::Clause::ACCC_vector, llvm::acc::Clause::ACCC_worker};
+
+class NoBranchingEnforce {
+public:
+  NoBranchingEnforce(SemanticsContext &context,
+      parser::CharBlock sourcePosition, llvm::acc::Directive directive)
+      : context_{context}, sourcePosition_{sourcePosition}, currentDirective_{
+                                                                directive} {}
+  template <typename T> bool Pre(const T &) { return true; }
+  template <typename T> void Post(const T &) {}
+
+  template <typename T> bool Pre(const parser::Statement<T> &statement) {
+    currentStatementSourcePosition_ = statement.source;
+    return true;
+  }
+
+  void Post(const parser::ReturnStmt &) { emitBranchOutError("RETURN"); }
+  void Post(const parser::ExitStmt &) { emitBranchOutError("EXIT"); }
+  void Post(const parser::StopStmt &) { emitBranchOutError("STOP"); }
+
+private:
+  parser::MessageFixedText GetEnclosingMsg() {
+    return "Enclosing block construct"_en_US;
+  }
+
+  void emitBranchOutError(const char *stmt) {
+    context_
+        .Say(currentStatementSourcePosition_,
+            "%s statement is not allowed in a %s construct"_err_en_US, stmt,
+            parser::ToUpperCaseLetters(
+                llvm::acc::getOpenACCDirectiveName(currentDirective_).str()))
+        .Attach(sourcePosition_, GetEnclosingMsg());
+  }
+
+  SemanticsContext &context_;
+  parser::CharBlock currentStatementSourcePosition_;
+  parser::CharBlock sourcePosition_;
+  llvm::acc::Directive currentDirective_;
+};
+
+void AccStructureChecker::Enter(const parser::AccClause &x) {
+  SetContextClause(x);
+}
+
+void AccStructureChecker::Leave(const parser::AccClauseList &) {}
+
+void AccStructureChecker::Enter(const parser::OpenACCBlockConstruct &x) {
+  const auto &beginBlockDir{std::get<parser::AccBeginBlockDirective>(x.t)};
+  const auto &endBlockDir{std::get<parser::AccEndBlockDirective>(x.t)};
+  const auto &beginAccBlockDir{
+      std::get<parser::AccBlockDirective>(beginBlockDir.t)};
+
+  CheckMatching(beginAccBlockDir, endBlockDir.v);
+  PushContextAndClauseSets(beginAccBlockDir.source, beginAccBlockDir.v);
+}
+
+void AccStructureChecker::Leave(const parser::OpenACCBlockConstruct &x) {
+  const auto &beginBlockDir{std::get<parser::AccBeginBlockDirective>(x.t)};
+  const auto &blockDir{std::get<parser::AccBlockDirective>(beginBlockDir.t)};
+  const parser::Block &block{std::get<parser::Block>(x.t)};
+  switch (blockDir.v) {
+  case llvm::acc::Directive::ACCD_kernels:
+  case llvm::acc::Directive::ACCD_parallel:
+    // Restriction - 880-881 (KERNELS)
+    // Restriction - 843-844 (PARALLEL)
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        parallelAndKernelsOnlyAllowedAfterDeviceTypeClauses);
+    // Restriction - 877 (KERNELS)
+    // Restriction - 840 (PARALLEL)
+    CheckNoBranching(block, GetContext().directive, blockDir.source);
+    break;
+  case llvm::acc::Directive::ACCD_serial:
+    // Restriction - 919
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        serialOnlyAllowedAfterDeviceTypeClauses);
+    // Restriction - 916
+    CheckNoBranching(block, llvm::acc::Directive::ACCD_serial, blockDir.source);
+    break;
+  case llvm::acc::Directive::ACCD_data:
+    // Restriction - 1117-1118
+    CheckRequireAtLeastOneOf();
+    break;
+  case llvm::acc::Directive::ACCD_host_data:
+    // Restriction - 1578
+    CheckRequireAtLeastOneOf();
+    break;
+  default:
+    break;
+  }
+  accContext_.pop_back();
+}
+
+void AccStructureChecker::CheckNoBranching(const parser::Block &block,
+    const llvm::acc::Directive directive,
+    const parser::CharBlock &directiveSource) const {
+  NoBranchingEnforce noBranchingEnforce{context_, directiveSource, directive};
+  parser::Walk(block, noBranchingEnforce);
+}
+
+void AccStructureChecker::Enter(
+    const parser::OpenACCStandaloneDeclarativeConstruct &x) {
+  const auto &declarativeDir{std::get<parser::AccDeclarativeDirective>(x.t)};
+  PushContextAndClauseSets(declarativeDir.source, declarativeDir.v);
+}
+
+void AccStructureChecker::Leave(
+    const parser::OpenACCStandaloneDeclarativeConstruct &) {
+  // Restriction - 2075
+  CheckAtLeastOneClause();
+  accContext_.pop_back();
+}
+
+void AccStructureChecker::Enter(const parser::OpenACCCombinedConstruct &x) {
+  const auto &beginBlockDir{std::get<parser::AccBeginCombinedDirective>(x.t)};
+  const auto &combinedDir{
+      std::get<parser::AccCombinedDirective>(beginBlockDir.t)};
+  PushContextAndClauseSets(combinedDir.source, combinedDir.v);
+}
+
+void AccStructureChecker::Leave(const parser::OpenACCCombinedConstruct &x) {
+  const auto &beginBlockDir{std::get<parser::AccBeginCombinedDirective>(x.t)};
+  const auto &combinedDir{
+      std::get<parser::AccCombinedDirective>(beginBlockDir.t)};
+  switch (combinedDir.v) {
+  case llvm::acc::Directive::ACCD_kernels_loop:
+  case llvm::acc::Directive::ACCD_parallel_loop:
+    // Restriction - 1962 -> (880-881) (KERNELS LOOP)
+    // Restriction - 1962 -> (843-844) (PARALLEL LOOP)
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        {llvm::acc::Clause::ACCC_async, llvm::acc::Clause::ACCC_wait,
+            llvm::acc::Clause::ACCC_num_gangs,
+            llvm::acc::Clause::ACCC_num_workers,
+            llvm::acc::Clause::ACCC_vector_length});
+    break;
+  case llvm::acc::Directive::ACCD_serial_loop:
+    // Restriction - 1962 -> (919) (SERIAL LOOP)
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        {llvm::acc::Clause::ACCC_async, llvm::acc::Clause::ACCC_wait});
+    break;
+  default:
+    break;
+  }
+  accContext_.pop_back();
+}
+
+std::string AccStructureChecker::ContextDirectiveAsFortran() {
+  return parser::ToUpperCaseLetters(
+      llvm::acc::getOpenACCDirectiveName(GetContext().directive).str());
+}
+
+void AccStructureChecker::Enter(const parser::OpenACCLoopConstruct &x) {
+  const auto &beginDir{std::get<parser::AccBeginLoopDirective>(x.t)};
+  const auto &loopDir{std::get<parser::AccLoopDirective>(beginDir.t)};
+  PushContextAndClauseSets(loopDir.source, loopDir.v);
+}
+
+void AccStructureChecker::Leave(const parser::OpenACCLoopConstruct &x) {
+  const auto &beginDir{std::get<parser::AccBeginLoopDirective>(x.t)};
+  const auto &loopDir{std::get<parser::AccLoopDirective>(beginDir.t)};
+  if (loopDir.v == llvm::acc::Directive::ACCD_loop) {
+    // Restriction - 1615-1616
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        loopOnlyAllowedAfterDeviceTypeClauses);
+    // Restriction - 1622
+    CheckNotAllowedIfClause(llvm::acc::Clause::ACCC_seq,
+        {llvm::acc::Clause::ACCC_gang, llvm::acc::Clause::ACCC_vector,
+            llvm::acc::Clause::ACCC_worker});
+  }
+  accContext_.pop_back();
+}
+
+void AccStructureChecker::Enter(const parser::OpenACCStandaloneConstruct &x) {
+  const auto &standaloneDir{std::get<parser::AccStandaloneDirective>(x.t)};
+  PushContextAndClauseSets(standaloneDir.source, standaloneDir.v);
+}
+
+void AccStructureChecker::Leave(const parser::OpenACCStandaloneConstruct &x) {
+  const auto &standaloneDir{std::get<parser::AccStandaloneDirective>(x.t)};
+  switch (standaloneDir.v) {
+  case llvm::acc::Directive::ACCD_enter_data:
+  case llvm::acc::Directive::ACCD_exit_data:
+  case llvm::acc::Directive::ACCD_set:
+    // Restriction - 1117-1118 (ENTER DATA)
+    // Restriction - 1161-1162 (EXIT DATA)
+    // Restriction - 2254 (SET)
+    CheckRequireAtLeastOneOf();
+    break;
+  case llvm::acc::Directive::ACCD_update:
+    // Restriction - 2301
+    CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+        updateOnlyAllowedAfterDeviceTypeClauses);
+    break;
+  default:
+    break;
+  }
+  accContext_.pop_back();
+}
+
+void AccStructureChecker::Enter(const parser::OpenACCRoutineConstruct &x) {
+  PushContextAndClauseSets(x.source, llvm::acc::Directive::ACCD_routine);
+}
+void AccStructureChecker::Leave(const parser::OpenACCRoutineConstruct &) {
+  // Restriction - 2409
+  CheckRequireAtLeastOneOf();
+  // Restriction - 2407-2408
+  CheckOnlyAllowedAfter(llvm::acc::Clause::ACCC_device_type,
+      routineOnlyAllowedAfterDeviceTypeClauses);
+  accContext_.pop_back();
+}
+
+// Clause checkers
+CHECK_REQ_SCALAR_INT_CONSTANT_CLAUSE(Collapse, ACCC_collapse)
+
+CHECK_SIMPLE_CLAUSE(Auto, ACCC_auto)
+CHECK_SIMPLE_CLAUSE(Async, ACCC_async)
+CHECK_SIMPLE_CLAUSE(Attach, ACCC_attach)
+CHECK_SIMPLE_CLAUSE(Bind, ACCC_bind)
+CHECK_SIMPLE_CLAUSE(Capture, ACCC_capture)
+CHECK_SIMPLE_CLAUSE(Copy, ACCC_copy)
+CHECK_SIMPLE_CLAUSE(Default, ACCC_default)
+CHECK_SIMPLE_CLAUSE(DefaultAsync, ACCC_default_async)
+CHECK_SIMPLE_CLAUSE(Delete, ACCC_delete)
+CHECK_SIMPLE_CLAUSE(Detach, ACCC_detach)
+CHECK_SIMPLE_CLAUSE(Device, ACCC_device)
+CHECK_SIMPLE_CLAUSE(DeviceNum, ACCC_device_num)
+CHECK_SIMPLE_CLAUSE(DevicePtr, ACCC_deviceptr)
+CHECK_SIMPLE_CLAUSE(DeviceResident, ACCC_device_resident)
+CHECK_SIMPLE_CLAUSE(DeviceType, ACCC_device_type)
+CHECK_SIMPLE_CLAUSE(Finalize, ACCC_finalize)
+CHECK_SIMPLE_CLAUSE(FirstPrivate, ACCC_firstprivate)
+CHECK_SIMPLE_CLAUSE(Gang, ACCC_gang)
+CHECK_SIMPLE_CLAUSE(Host, ACCC_host)
+CHECK_SIMPLE_CLAUSE(If, ACCC_if)
+CHECK_SIMPLE_CLAUSE(IfPresent, ACCC_if_present)
+CHECK_SIMPLE_CLAUSE(Independent, ACCC_independent)
+CHECK_SIMPLE_CLAUSE(Link, ACCC_link)
+CHECK_SIMPLE_CLAUSE(NoCreate, ACCC_no_create)
+CHECK_SIMPLE_CLAUSE(NoHost, ACCC_nohost)
+CHECK_SIMPLE_CLAUSE(NumGangs, ACCC_num_gangs)
+CHECK_SIMPLE_CLAUSE(NumWorkers, ACCC_num_workers)
+CHECK_SIMPLE_CLAUSE(Present, ACCC_present)
+CHECK_SIMPLE_CLAUSE(Private, ACCC_private)
+CHECK_SIMPLE_CLAUSE(Read, ACCC_read)
+CHECK_SIMPLE_CLAUSE(Reduction, ACCC_reduction)
+CHECK_SIMPLE_CLAUSE(Self, ACCC_self)
+CHECK_SIMPLE_CLAUSE(Seq, ACCC_seq)
+CHECK_SIMPLE_CLAUSE(Tile, ACCC_tile)
+CHECK_SIMPLE_CLAUSE(UseDevice, ACCC_use_device)
+CHECK_SIMPLE_CLAUSE(Vector, ACCC_vector)
+CHECK_SIMPLE_CLAUSE(VectorLength, ACCC_vector_length)
+CHECK_SIMPLE_CLAUSE(Wait, ACCC_wait)
+CHECK_SIMPLE_CLAUSE(Worker, ACCC_worker)
+CHECK_SIMPLE_CLAUSE(Write, ACCC_write)
+
+void AccStructureChecker::Enter(const parser::AccClause::Create &c) {
+  CheckAllowed(llvm::acc::Clause::ACCC_create);
+  const auto &modifierClause{c.v};
+  if (const auto &modifier{
+          std::get<std::optional<parser::AccDataModifier>>(modifierClause.t)}) {
+    if (modifier->v != parser::AccDataModifier::Modifier::Zero) {
+      context_.Say(GetContext().clauseSource,
+          "Only the ZERO modifier is allowed for the %s clause "
+          "on the %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(llvm::acc::Clause::ACCC_create)
+                  .str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::Enter(const parser::AccClause::Copyin &c) {
+  CheckAllowed(llvm::acc::Clause::ACCC_copyin);
+  const auto &modifierClause{c.v};
+  if (const auto &modifier{
+          std::get<std::optional<parser::AccDataModifier>>(modifierClause.t)}) {
+    if (modifier->v != parser::AccDataModifier::Modifier::ReadOnly) {
+      context_.Say(GetContext().clauseSource,
+          "Only the READONLY modifier is allowed for the %s clause "
+          "on the %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(llvm::acc::Clause::ACCC_copyin)
+                  .str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::Enter(const parser::AccClause::Copyout &c) {
+  CheckAllowed(llvm::acc::Clause::ACCC_copyout);
+  const auto &modifierClause{c.v};
+  if (const auto &modifier{
+          std::get<std::optional<parser::AccDataModifier>>(modifierClause.t)}) {
+    if (modifier->v != parser::AccDataModifier::Modifier::Zero) {
+      context_.Say(GetContext().clauseSource,
+          "Only the ZERO modifier is allowed for the %s clause "
+          "on the %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(llvm::acc::Clause::ACCC_copyout)
+                  .str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::CheckAllowed(llvm::acc::Clause clause) {
+  if (!GetContext().allowedClauses.test(clause) &&
+      !GetContext().allowedOnceClauses.test(clause) &&
+      !GetContext().allowedExclusiveClauses.test(clause) &&
+      !GetContext().requiredClauses.test(clause)) {
+    context_.Say(GetContext().clauseSource,
+        "%s clause is not allowed on the %s directive"_err_en_US,
+        parser::ToUpperCaseLetters(
+            llvm::acc::getOpenACCClauseName(clause).str()),
+        parser::ToUpperCaseLetters(GetContext().directiveSource.ToString()));
+    return;
+  }
+  if ((GetContext().allowedOnceClauses.test(clause) ||
+          GetContext().allowedExclusiveClauses.test(clause)) &&
+      FindClause(clause)) {
+    context_.Say(GetContext().clauseSource,
+        "At most one %s clause can appear on the %s directive"_err_en_US,
+        parser::ToUpperCaseLetters(
+            llvm::acc::getOpenACCClauseName(clause).str()),
+        parser::ToUpperCaseLetters(GetContext().directiveSource.ToString()));
+    return;
+  }
+  if (GetContext().allowedExclusiveClauses.test(clause)) {
+    std::vector<llvm::acc::Clause> others;
+    GetContext().allowedExclusiveClauses.IterateOverMembers(
+        [&](llvm::acc::Clause o) {
+          if (FindClause(o)) {
+            others.emplace_back(o);
+          }
+        });
+    for (const auto &e : others) {
+      context_.Say(GetContext().clauseSource,
+          "%s and %s clauses are mutually exclusive and may not appear on the "
+          "same %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(clause).str()),
+          parser::ToUpperCaseLetters(llvm::acc::getOpenACCClauseName(e).str()),
+          parser::ToUpperCaseLetters(GetContext().directiveSource.ToString()));
+    }
+    if (!others.empty()) {
+      return;
+    }
+  }
+  SetContextClauseInfo(clause);
+  AddClauseToCrtContext(clause);
+}
+
+void AccStructureChecker::CheckOnlyAllowedAfter(
+    llvm::acc::Clause clause, AccClauseSet set) {
+  bool enforceCheck = false;
+  for (auto cl : GetContext().actualClauses) {
+    if (cl == clause) {
+      enforceCheck = true;
+      continue;
+    } else if (enforceCheck && !set.test(cl)) {
+      auto parserClause = GetContext().clauseInfo.find(cl);
+      context_.Say(parserClause->second->source,
+          "Clause %s is not allowed after clause %s on the %s "
+          "directive"_err_en_US,
+          parser::ToUpperCaseLetters(llvm::acc::getOpenACCClauseName(cl).str()),
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(clause).str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::CheckRequireAtLeastOneOf() {
+  for (auto cl : GetContext().actualClauses) {
+    if (GetContext().requiredClauses.test(cl))
+      return;
+  }
+  // No clause matched in the actual clauses list
+  context_.Say(GetContext().directiveSource,
+      "At least one of %s clause must appear on the %s directive"_err_en_US,
+      ClauseSetToString(GetContext().requiredClauses),
+      ContextDirectiveAsFortran());
+}
+
+void AccStructureChecker::CheckAtLeastOneClause() {
+  if (GetContext().actualClauses.empty()) {
+    context_.Say(GetContext().directiveSource,
+        "At least one clause is required on the %s directive"_err_en_US,
+        ContextDirectiveAsFortran());
+  }
+}
+
+// Enforce restriction where clauses in the given set are not allowed if the
+// given clause appears.
+void AccStructureChecker::CheckNotAllowedIfClause(
+    llvm::acc::Clause clause, AccClauseSet set) {
+  if (std::find(GetContext().actualClauses.begin(),
+          GetContext().actualClauses.end(),
+          clause) == GetContext().actualClauses.end()) {
+    return; // Clause is not present
+  }
+
+  for (auto cl : GetContext().actualClauses) {
+    if (set.test(cl)) {
+      context_.Say(GetContext().directiveSource,
+          "Clause %s is not allowed if clause %s appears on the %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(llvm::acc::getOpenACCClauseName(cl).str()),
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(clause).str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::RequiresConstantPositiveParameter(
+    const llvm::acc::Clause &clause, const parser::ScalarIntConstantExpr &i) {
+  if (const auto v{GetIntValue(i)}) {
+    if (*v <= 0) {
+      context_.Say(GetContext().clauseSource,
+          "The parameter of the %s clause on the %s directive must be "
+          "a constant positive integer expression"_err_en_US,
+          parser::ToUpperCaseLetters(
+              llvm::acc::getOpenACCClauseName(clause).str()),
+          ContextDirectiveAsFortran());
+    }
+  }
+}
+
+void AccStructureChecker::OptionalConstantPositiveParameter(
+    const llvm::acc::Clause &clause,
+    const std::optional<parser::ScalarIntConstantExpr> &o) {
+  if (o != std::nullopt) {
+    RequiresConstantPositiveParameter(clause, o.value());
+  }
+}
+
+std::string AccStructureChecker::ClauseSetToString(const AccClauseSet set) {
+  std::string list;
+  set.IterateOverMembers([&](llvm::acc::Clause o) {
+    if (!list.empty())
+      list.append(", ");
+    list.append(
+        parser::ToUpperCaseLetters(llvm::acc::getOpenACCClauseName(o).str()));
+  });
+  return list;
+}
+
+void AccStructureChecker::SayNotMatching(
+    const parser::CharBlock &beginSource, const parser::CharBlock &endSource) {
+  context_
+      .Say(endSource, "Unmatched %s directive"_err_en_US,
+          parser::ToUpperCaseLetters(endSource.ToString()))
+      .Attach(beginSource, "Does not match directive"_en_US);
+}
+
+} // namespace Fortran::semantics
diff --git a/flang/lib/Semantics/check-acc-structure.h b/flang/lib/Semantics/check-acc-structure.h
new file mode 100644
index 0000000000000..fef12383952db
--- /dev/null
+++ b/flang/lib/Semantics/check-acc-structure.h
@@ -0,0 +1,204 @@
+//===-- lib/Semantics/check-acc-structure.h ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+// OpenACC structure validity check list
+//    1. invalid clauses on directive
+//    2. invalid repeated clauses on directive
+//    3. invalid nesting of regions
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef FORTRAN_SEMANTICS_CHECK_ACC_STRUCTURE_H_
+#define FORTRAN_SEMANTICS_CHECK_ACC_STRUCTURE_H_
+
+#include "flang/Common/enum-set.h"
+#include "flang/Parser/parse-tree.h"
+#include "flang/Semantics/semantics.h"
+#include "llvm/Frontend/OpenACC/ACC.h.inc"
+
+#include <unordered_map>
+
+using AccDirectiveSet = Fortran::common::EnumSet<llvm::acc::Directive,
+    llvm::acc::Directive_enumSize>;
+
+using AccClauseSet =
+    Fortran::common::EnumSet<llvm::acc::Clause, llvm::acc::Clause_enumSize>;
+
+#define GEN_FLANG_DIRECTIVE_CLAUSE_SETS
+#include "llvm/Frontend/OpenACC/ACC.cpp.inc"
+
+namespace Fortran::semantics {
+
+class AccStructureChecker : public virtual BaseChecker {
+public:
+  AccStructureChecker(SemanticsContext &context) : context_{context} {}
+
+  // Construct and directives
+  void Enter(const parser::OpenACCBlockConstruct &);
+  void Leave(const parser::OpenACCBlockConstruct &);
+  void Enter(const parser::OpenACCCombinedConstruct &);
+  void Leave(const parser::OpenACCCombinedConstruct &);
+  void Enter(const parser::OpenACCLoopConstruct &);
+  void Leave(const parser::OpenACCLoopConstruct &);
+  void Enter(const parser::OpenACCRoutineConstruct &);
+  void Leave(const parser::OpenACCRoutineConstruct &);
+  void Enter(const parser::OpenACCStandaloneConstruct &);
+  void Leave(const parser::OpenACCStandaloneConstruct &);
+  void Enter(const parser::OpenACCStandaloneDeclarativeConstruct &);
+  void Leave(const parser::OpenACCStandaloneDeclarativeConstruct &);
+
+  // Clauses
+  void Leave(const parser::AccClauseList &);
+  void Enter(const parser::AccClause &);
+
+  void Enter(const parser::AccClause::Auto &);
+  void Enter(const parser::AccClause::Async &);
+  void Enter(const parser::AccClause::Attach &);
+  void Enter(const parser::AccClause::Bind &);
+  void Enter(const parser::AccClause::Capture &);
+  void Enter(const parser::AccClause::Create &);
+  void Enter(const parser::AccClause::Collapse &);
+  void Enter(const parser::AccClause::Copy &);
+  void Enter(const parser::AccClause::Copyin &);
+  void Enter(const parser::AccClause::Copyout &);
+  void Enter(const parser::AccClause::Default &);
+  void Enter(const parser::AccClause::DefaultAsync &);
+  void Enter(const parser::AccClause::Delete &);
+  void Enter(const parser::AccClause::Detach &);
+  void Enter(const parser::AccClause::Device &);
+  void Enter(const parser::AccClause::DeviceNum &);
+  void Enter(const parser::AccClause::DevicePtr &);
+  void Enter(const parser::AccClause::DeviceResident &);
+  void Enter(const parser::AccClause::DeviceType &);
+  void Enter(const parser::AccClause::Finalize &);
+  void Enter(const parser::AccClause::FirstPrivate &);
+  void Enter(const parser::AccClause::Gang &);
+  void Enter(const parser::AccClause::Host &);
+  void Enter(const parser::AccClause::If &);
+  void Enter(const parser::AccClause::IfPresent &);
+  void Enter(const parser::AccClause::Independent &);
+  void Enter(const parser::AccClause::Link &);
+  void Enter(const parser::AccClause::NoCreate &);
+  void Enter(const parser::AccClause::NoHost &);
+  void Enter(const parser::AccClause::NumGangs &);
+  void Enter(const parser::AccClause::NumWorkers &);
+  void Enter(const parser::AccClause::Present &);
+  void Enter(const parser::AccClause::Private &);
+  void Enter(const parser::AccClause::Read &);
+  void Enter(const parser::AccClause::Reduction &);
+  void Enter(const parser::AccClause::Self &);
+  void Enter(const parser::AccClause::Seq &);
+  void Enter(const parser::AccClause::Tile &);
+  void Enter(const parser::AccClause::UseDevice &);
+  void Enter(const parser::AccClause::Vector &);
+  void Enter(const parser::AccClause::VectorLength &);
+  void Enter(const parser::AccClause::Wait &);
+  void Enter(const parser::AccClause::Worker &);
+  void Enter(const parser::AccClause::Write &);
+
+private:
+#define GEN_FLANG_DIRECTIVE_CLAUSE_MAP
+#include "llvm/Frontend/OpenACC/ACC.cpp.inc"
+
+  struct AccContext {
+    AccContext(parser::CharBlock source, llvm::acc::Directive d)
+        : directiveSource{source}, directive{d} {}
+
+    parser::CharBlock directiveSource{nullptr};
+    parser::CharBlock clauseSource{nullptr};
+    llvm::acc::Directive directive;
+    AccClauseSet allowedClauses{};
+    AccClauseSet allowedOnceClauses{};
+    AccClauseSet allowedExclusiveClauses{};
+    AccClauseSet requiredClauses{};
+
+    const parser::AccClause *clause{nullptr};
+    std::multimap<llvm::acc::Clause, const parser::AccClause *> clauseInfo;
+    std::list<llvm::acc::Clause> actualClauses;
+  };
+
+  // back() is the top of the stack
+  AccContext &GetContext() {
+    CHECK(!accContext_.empty());
+    return accContext_.back();
+  }
+
+  void SetContextClause(const parser::AccClause &clause) {
+    GetContext().clauseSource = clause.source;
+    GetContext().clause = &clause;
+  }
+
+  void SetContextClauseInfo(llvm::acc::Clause type) {
+    GetContext().clauseInfo.emplace(type, GetContext().clause);
+  }
+
+  void AddClauseToCrtContext(llvm::acc::Clause type) {
+    GetContext().actualClauses.push_back(type);
+  }
+
+  const parser::AccClause *FindClause(llvm::acc::Clause type) {
+    auto it{GetContext().clauseInfo.find(type)};
+    if (it != GetContext().clauseInfo.end()) {
+      return it->second;
+    }
+    return nullptr;
+  }
+
+  void PushContext(const parser::CharBlock &source, llvm::acc::Directive dir) {
+    accContext_.emplace_back(source, dir);
+  }
+
+  void SetClauseSets(llvm::acc::Directive dir) {
+    accContext_.back().allowedClauses = directiveClausesTable[dir].allowed;
+    accContext_.back().allowedOnceClauses =
+        directiveClausesTable[dir].allowedOnce;
+    accContext_.back().allowedExclusiveClauses =
+        directiveClausesTable[dir].allowedExclusive;
+    accContext_.back().requiredClauses =
+        directiveClausesTable[dir].requiredOneOf;
+  }
+  void PushContextAndClauseSets(
+      const parser::CharBlock &source, llvm::acc::Directive dir) {
+    PushContext(source, dir);
+    SetClauseSets(dir);
+  }
+
+  void SayNotMatching(const parser::CharBlock &, const parser::CharBlock &);
+
+  template <typename B> void CheckMatching(const B &beginDir, const B &endDir) {
+    const auto &begin{beginDir.v};
+    const auto &end{endDir.v};
+    if (begin != end) {
+      SayNotMatching(beginDir.source, endDir.source);
+    }
+  }
+
+  // Check that only clauses in set are after the specific clauses.
+  void CheckOnlyAllowedAfter(llvm::acc::Clause clause, AccClauseSet set);
+  void CheckRequireAtLeastOneOf();
+  void CheckAllowed(llvm::acc::Clause clause);
+  void CheckAtLeastOneClause();
+  void CheckNotAllowedIfClause(llvm::acc::Clause clause, AccClauseSet set);
+  std::string ContextDirectiveAsFortran();
+
+  void CheckNoBranching(const parser::Block &block,
+      const llvm::acc::Directive directive,
+      const parser::CharBlock &directiveSource) const;
+
+  void RequiresConstantPositiveParameter(
+      const llvm::acc::Clause &clause, const parser::ScalarIntConstantExpr &i);
+  void OptionalConstantPositiveParameter(const llvm::acc::Clause &clause,
+      const std::optional<parser::ScalarIntConstantExpr> &o);
+
+  SemanticsContext &context_;
+  std::vector<AccContext> accContext_; // used as a stack
+
+  std::string ClauseSetToString(const AccClauseSet set);
+};
+
+} // namespace Fortran::semantics
+
+#endif // FORTRAN_SEMANTICS_CHECK_ACC_STRUCTURE_H_
diff --git a/flang/lib/Semantics/semantics.cpp b/flang/lib/Semantics/semantics.cpp
index 681e1dc5ca274..e949c92ff6ddd 100644
--- a/flang/lib/Semantics/semantics.cpp
+++ b/flang/lib/Semantics/semantics.cpp
@@ -8,8 +8,10 @@
 
 #include "flang/Semantics/semantics.h"
 #include "assignment.h"
+#include "canonicalize-acc.h"
 #include "canonicalize-do.h"
 #include "canonicalize-omp.h"
+#include "check-acc-structure.h"
 #include "check-allocate.h"
 #include "check-arithmeticif.h"
 #include "check-case.h"
@@ -154,12 +156,12 @@ class MiscChecker : public virtual BaseChecker {
 };
 
 using StatementSemanticsPass1 = ExprChecker;
-using StatementSemanticsPass2 = SemanticsVisitor<AllocateChecker,
-    ArithmeticIfStmtChecker, AssignmentChecker, CaseChecker, CoarrayChecker,
-    DataChecker, DeallocateChecker, DoForallChecker, IfStmtChecker, IoChecker,
-    MiscChecker, NamelistChecker, NullifyChecker, OmpStructureChecker,
-    PurityChecker, ReturnStmtChecker, SelectRankConstructChecker,
-    SelectTypeChecker, StopChecker>;
+using StatementSemanticsPass2 = SemanticsVisitor<AccStructureChecker,
+    AllocateChecker, ArithmeticIfStmtChecker, AssignmentChecker, CaseChecker,
+    CoarrayChecker, DataChecker, DeallocateChecker, DoForallChecker,
+    IfStmtChecker, IoChecker, MiscChecker, NamelistChecker, NullifyChecker,
+    OmpStructureChecker, PurityChecker, ReturnStmtChecker,
+    SelectRankConstructChecker, SelectTypeChecker, StopChecker>;
 
 static bool PerformStatementSemantics(
     SemanticsContext &context, parser::Program &program) {
@@ -325,6 +327,7 @@ SymbolVector SemanticsContext::GetIndexVars(IndexVarKind kind) {
 bool Semantics::Perform() {
   return ValidateLabels(context_, program_) &&
       parser::CanonicalizeDo(program_) && // force line break
+      CanonicalizeAcc(context_.messages(), program_) &&
       CanonicalizeOmp(context_.messages(), program_) &&
       PerformStatementSemantics(context_, program_) &&
       ModFileWriter{context_}.WriteAll();
diff --git a/flang/test/Semantics/acc-branch.f90 b/flang/test/Semantics/acc-branch.f90
new file mode 100644
index 0000000000000..b1c2a6b860e44
--- /dev/null
+++ b/flang/test/Semantics/acc-branch.f90
@@ -0,0 +1,101 @@
+! RUN: %S/test_errors.sh %s %t %f18 -fopenacc
+
+! Check OpenACC restruction in branch in and out of some construct
+!
+
+program openacc_clause_validity
+
+  implicit none
+
+  integer :: i
+  integer :: N = 256
+  real(8) :: a(256)
+
+  !$acc parallel
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+    !ERROR: RETURN statement is not allowed in a PARALLEL construct
+    return
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: EXIT statement is not allowed in a PARALLEL construct
+      exit
+    end if
+  end do
+  !$acc end parallel
+
+  !$acc parallel
+  !$acc loop
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: STOP statement is not allowed in a PARALLEL construct
+      stop 999
+    end if
+  end do
+  !$acc end parallel
+
+  !$acc kernels
+  do i = 1, N
+    a(i) = 3.14
+    !ERROR: RETURN statement is not allowed in a KERNELS construct
+    return
+  end do
+  !$acc end kernels
+
+  !$acc kernels
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: EXIT statement is not allowed in a KERNELS construct
+      exit
+    end if
+  end do
+  !$acc end kernels
+
+  !$acc kernels
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: STOP statement is not allowed in a KERNELS construct
+      stop 999
+    end if
+  end do
+  !$acc end kernels
+
+  !$acc serial
+  do i = 1, N
+    a(i) = 3.14
+    !ERROR: RETURN statement is not allowed in a SERIAL construct
+    return
+  end do
+  !$acc end serial
+
+  !$acc serial
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: EXIT statement is not allowed in a SERIAL construct
+      exit
+    end if
+  end do
+  !$acc end serial
+
+  !$acc serial
+  do i = 1, N
+    a(i) = 3.14
+    if(i == N-1) THEN
+      !ERROR: STOP statement is not allowed in a SERIAL construct
+      stop 999
+    end if
+  end do
+  !$acc end serial
+
+end program openacc_clause_validity
diff --git a/flang/test/Semantics/acc-validity.f90 b/flang/test/Semantics/acc-clause-validity.f90
similarity index 53%
rename from flang/test/Semantics/acc-validity.f90
rename to flang/test/Semantics/acc-clause-validity.f90
index 88f62a84d1610..a8aefad384b12 100644
--- a/flang/test/Semantics/acc-validity.f90
+++ b/flang/test/Semantics/acc-clause-validity.f90
@@ -16,31 +16,41 @@ program openacc_clause_validity
 
   integer :: i, j
   integer :: N = 256
-
+  !ERROR: At least one clause is required on the DECLARE directive
   !$acc declare
   real(8) :: a(256)
 
+  !ERROR: At least one of ATTACH, COPYIN, CREATE clause must appear on the ENTER DATA directive
   !$acc enter data
 
+  !ERROR: Only the READONLY modifier is allowed for the COPYIN clause on the ENTER DATA directive
   !$acc enter data copyin(zero: i)
 
+  !ERROR: Only the ZERO modifier is allowed for the CREATE clause on the ENTER DATA directive
   !$acc enter data create(readonly: i)
 
+  !ERROR: Only the ZERO modifier is allowed for the COPYOUT clause on the DATA directive
   !$acc data copyout(readonly: i)
   !$acc end data
 
+  !ERROR: COPYOUT clause is not allowed on the ENTER DATA directive
   !$acc enter data copyin(i) copyout(i)
 
+  !ERROR: At most one IF clause can appear on the DATA directive
   !$acc data copy(i) if(.true.) if(.true.)
   !$acc end data
 
+  !ERROR: At least one of COPYOUT, DELETE, DETACH clause must appear on the EXIT DATA directive
   !$acc exit data
 
+  !ERROR: At least one of USE_DEVICE clause must appear on the HOST_DATA directive
   !$acc host_data
   !$acc end host_data
 
+  !ERROR: At least one of DEFAULT_ASYNC, DEVICE_NUM, DEVICE_TYPE clause must appear on the SET directive
   !$acc set
 
+  !ERROR: At least one of ATTACH, COPY, COPYIN, COPYOUT, CREATE, DEFAULT, DEVICEPTR, NO_CREATE, PRESENT clause must appear on the DATA directive
   !$acc data
   !$acc end data
 
@@ -48,16 +58,16 @@ program openacc_clause_validity
   !$acc end data
 
   !$acc data copyin(i)
-
+  !ERROR: Unmatched PARALLEL directive
   !$acc end parallel
 
   !$acc update device(i) device_type(*) async
 
-
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the UPDATE directive
   !$acc update device(i) device_type(*) if(.TRUE.)
 
   !$acc parallel
-
+  !ERROR: INDEPENDENT and SEQ clauses are mutually exclusive and may not appear on the same LOOP directive
   !$acc loop seq independent
   do i = 1, N
     a(i) = 3.14
@@ -72,7 +82,7 @@ program openacc_clause_validity
   !$acc end parallel
 
   !$acc parallel
-
+  !ERROR: The parameter of the COLLAPSE clause on the LOOP directive must be a constant positive integer expression
   !$acc loop collapse(-1)
   do i = 1, N
     do j = 1, N
@@ -82,7 +92,7 @@ program openacc_clause_validity
   !$acc end parallel
 
   !$acc parallel
-
+  !ERROR: Clause PRIVATE is not allowed after clause DEVICE_TYPE on the LOOP directive
   !$acc loop device_type(*) private(i)
   do i = 1, N
     a(i) = 3.14
@@ -90,14 +100,14 @@ program openacc_clause_validity
   !$acc end parallel
 
   !$acc parallel
-
+  !ERROR: Clause GANG is not allowed if clause SEQ appears on the LOOP directive
   !$acc loop gang seq
   do i = 1, N
     a(i) = 3.14
   end do
   !$acc end parallel
 
-
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the PARALLEL directive
   !$acc parallel device_type(*) if(.TRUE.)
   !$acc loop
   do i = 1, N
@@ -105,7 +115,7 @@ program openacc_clause_validity
   end do
   !$acc end parallel
 
-
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the PARALLEL LOOP directive
   !$acc parallel loop device_type(*) if(.TRUE.)
   do i = 1, N
     a(i) = 3.14
@@ -118,14 +128,14 @@ program openacc_clause_validity
   end do
   !$acc end kernels
 
-
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the KERNELS directive
   !$acc kernels device_type(*) if(.TRUE.)
   do i = 1, N
     a(i) = 3.14
   end do
   !$acc end kernels
 
-
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the KERNELS LOOP directive
   !$acc kernels loop device_type(*) if(.TRUE.)
   do i = 1, N
     a(i) = 3.14
@@ -138,14 +148,14 @@ program openacc_clause_validity
   end do
   !$acc end serial
 
-
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the SERIAL directive
   !$acc serial device_type(*) if(.TRUE.)
   do i = 1, N
     a(i) = 3.14
   end do
   !$acc end serial
 
-
+  !ERROR: Clause IF is not allowed after clause DEVICE_TYPE on the SERIAL LOOP directive
   !$acc serial loop device_type(*) if(.TRUE.)
   do i = 1, N
     a(i) = 3.14
@@ -156,14 +166,14 @@ program openacc_clause_validity
 
    subroutine sub1(a)
      real :: a(:)
-
+     !ERROR: At least one of GANG, SEQ, VECTOR, WORKER clause must appear on the ROUTINE directive
      !$acc routine
    end subroutine sub1
 
    subroutine sub2(a)
      real :: a(:)
-
+     !ERROR: Clause NOHOST is not allowed after clause DEVICE_TYPE on the ROUTINE directive
      !$acc routine seq device_type(*) nohost
    end subroutine sub2
 
-end program openacc_clause_validity
\ No newline at end of file
+end program openacc_clause_validity
diff --git a/llvm/include/llvm/Frontend/OpenACC/ACC.td b/llvm/include/llvm/Frontend/OpenACC/ACC.td
index 0bc0f2481db5f..e96b7e8466628 100644
--- a/llvm/include/llvm/Frontend/OpenACC/ACC.td
+++ b/llvm/include/llvm/Frontend/OpenACC/ACC.td
@@ -103,7 +103,7 @@ def ACCC_Device : Clause<"device"> {
 }
 
 // 2.14.1
-def ACCC_DeviceNum : Clause<"devicenum">  {
+def ACCC_DeviceNum : Clause<"device_num">  {
   let flangClass = "ScalarIntConstantExpr";
 }
 
diff --git a/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt b/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
index 82cc7cfaccc9c..31086ec9a47bc 100644
--- a/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
+++ b/llvm/include/llvm/Frontend/OpenACC/CMakeLists.txt
@@ -1,4 +1,4 @@
 set(LLVM_TARGET_DEFINITIONS ACC.td)
 tablegen(LLVM ACC.h.inc --gen-directive-decl)
-tablegen(LLVM ACC.cpp.inc --gen-directive-impl)
+tablegen(LLVM ACC.cpp.inc --gen-directive-gen)
 add_public_tablegen_target(acc_gen)

From 8e9a505139fbef7d2e6e9d0adfe1efc87326f9ef Mon Sep 17 00:00:00 2001
From: Valentin Clement <clementval@gmail.com>
Date: Tue, 14 Jul 2020 20:50:10 -0400
Subject: [PATCH 320/771] [flang] Fix out-of-tree build with missing acc_gen
 target

This pacth fix out-of-tree build of Flang after the introduction of acc_gen.

Reviewed By: sscalpone

Differential Revision: https://reviews.llvm.org/D83835
---
 llvm/cmake/modules/AddLLVM.cmake       | 5 ++++-
 llvm/cmake/modules/LLVMConfig.cmake.in | 9 ++++++---
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/llvm/cmake/modules/AddLLVM.cmake b/llvm/cmake/modules/AddLLVM.cmake
index 744c52e82f3b3..333167bfb6b0d 100644
--- a/llvm/cmake/modules/AddLLVM.cmake
+++ b/llvm/cmake/modules/AddLLVM.cmake
@@ -891,7 +891,7 @@ endmacro(add_llvm_executable name)
 #   only an object library is built, and no module is built. This is specific to the Polly use case.
 #
 #   The SUBPROJECT argument contains the LLVM project the plugin belongs
-#   to. If set, the plugin will link statically by default it if the 
+#   to. If set, the plugin will link statically by default it if the
 #   project was enabled.
 function(add_llvm_pass_plugin name)
   cmake_parse_arguments(ARG
@@ -928,6 +928,9 @@ function(add_llvm_pass_plugin name)
     if (TARGET omp_gen)
       add_dependencies(obj.${name} omp_gen)
     endif()
+    if (TARGET acc_gen)
+      add_dependencies(obj.${name} acc_gen)
+    endif()
     set_property(GLOBAL APPEND PROPERTY LLVM_STATIC_EXTENSIONS ${name})
   elseif(NOT ARG_NO_MODULE)
     add_llvm_library(${name} MODULE ${ARG_UNPARSED_ARGUMENTS})
diff --git a/llvm/cmake/modules/LLVMConfig.cmake.in b/llvm/cmake/modules/LLVMConfig.cmake.in
index c9a1c8131d2d6..e729a839f614d 100644
--- a/llvm/cmake/modules/LLVMConfig.cmake.in
+++ b/llvm/cmake/modules/LLVMConfig.cmake.in
@@ -104,15 +104,18 @@ if(NOT TARGET LLVMSupport)
   @llvm_config_include_buildtree_only_exports@
 endif()
 
-# By creating intrinsics_gen and omp_gen here, subprojects that depend on LLVM's
-# tablegen-generated headers can always depend on this target whether building
-# in-tree with LLVM or not.
+# By creating intrinsics_gen, omp_gen and acc_gen here, subprojects that depend
+# on LLVM's tablegen-generated headers can always depend on this target whether
+# building in-tree with LLVM or not.
 if(NOT TARGET intrinsics_gen)
   add_custom_target(intrinsics_gen)
 endif()
 if(NOT TARGET omp_gen)
   add_custom_target(omp_gen)
 endif()
+if(NOT TARGET acc_gen)
+  add_custom_target(acc_gen)
+endif()
 
 set_property(GLOBAL PROPERTY LLVM_TARGETS_CONFIGURED On)
 include(${LLVM_CMAKE_DIR}/LLVM-Config.cmake)

From 74c8d01aff80a7371ea2ff16fbe84858a266711a Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Tue, 14 Jul 2020 18:12:19 -0700
Subject: [PATCH 321/771] Fix the skipIfRosetta decorator

the form that takes func as an argument isn't compatible with the
optional bugnumber argument. This means that only correct for to use it is now
@skipIfRosetta(bugnumber='url')
---
 lldb/packages/Python/lldbsuite/test/decorators.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/decorators.py b/lldb/packages/Python/lldbsuite/test/decorators.py
index be282f6db32c6..534bcbf59ac20 100644
--- a/lldb/packages/Python/lldbsuite/test/decorators.py
+++ b/lldb/packages/Python/lldbsuite/test/decorators.py
@@ -552,13 +552,15 @@ def are_sb_headers_missing():
     return skipTestIfFn(are_sb_headers_missing)(func)
 
 
-def skipIfRosetta(func, bugnumber=None):
+def skipIfRosetta(bugnumber):
     """Skip a test when running the testsuite on macOS under the Rosetta translation layer."""
     def is_running_rosetta(self):
         if not lldbplatformutil.getPlatform() in ['darwin', 'macosx']:
-            return False
-        return platform.uname()[5] == "arm" and self.getArchitecture() == "x86_64"
-    return skipTestIfFn(is_running_rosetta, bugnumber)(func)
+            return "not on macOS"
+        if (platform.uname()[5] == "arm") and (self.getArchitecture() == "x86_64"):
+            return "skipped under Rosetta"
+        return None
+    return skipTestIfFn(is_running_rosetta)
 
 def skipIfiOSSimulator(func):
     """Decorate the item to skip tests that should be skipped on the iOS Simulator."""

From 71292379d757f7a40b1771ade7738e25d7ddece5 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Tue, 14 Jul 2020 18:19:46 -0700
Subject: [PATCH 322/771] Make LazyCallThroughManager destructor virtual rather
 than arranging for all clients to delete it via the correct dynamic type.

---
 llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h         | 4 ++--
 llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h | 5 +++--
 llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp        | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
index 2b56a4e8b63e8..96f8e169e7dcc 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LLJIT.h
@@ -250,7 +250,7 @@ class LLLazyJIT : public LLJIT {
   // Create a single-threaded LLLazyJIT instance.
   LLLazyJIT(LLLazyJITBuilderState &S, Error &Err);
 
-  std::unique_ptr<LocalLazyCallThroughManager> LCTMgr;
+  std::unique_ptr<LazyCallThroughManager> LCTMgr;
   std::unique_ptr<CompileOnDemandLayer> CODLayer;
 };
 
@@ -384,7 +384,7 @@ class LLLazyJITBuilderState : public LLJITBuilderState {
 
   Triple TT;
   JITTargetAddress LazyCompileFailureAddr = 0;
-  std::unique_ptr<LocalLazyCallThroughManager> LCTMgr;
+  std::unique_ptr<LazyCallThroughManager> LCTMgr;
   IndirectStubsManagerBuilderFunction ISMBuilder;
 
   Error prepareForConstruction();
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
index 3225d6078bf81..0d3ccecdf121a 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -50,13 +50,14 @@ class LazyCallThroughManager {
       JITTargetAddress TrampolineAddr,
       TrampolinePool::NotifyLandingResolvedFunction NotifyLandingResolved);
 
+  virtual ~LazyCallThroughManager() = default;
+
 protected:
   using NotifyLandingResolvedFunction =
       TrampolinePool::NotifyLandingResolvedFunction;
 
   LazyCallThroughManager(ExecutionSession &ES,
                          JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP);
-  ~LazyCallThroughManager() = default;
 
   struct ReexportsEntry {
     JITDylib *SourceJD;
@@ -128,7 +129,7 @@ class LocalLazyCallThroughManager : public LazyCallThroughManager {
 
 /// Create a LocalLazyCallThroughManager from the given triple and execution
 /// session.
-Expected<std::unique_ptr<LocalLazyCallThroughManager>>
+Expected<std::unique_ptr<LazyCallThroughManager>>
 createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
                                   JITTargetAddress ErrorHandlerAddr);
 
diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index fda6c79305815..153f6b80784f0 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -98,7 +98,7 @@ void LazyCallThroughManager::resolveTrampolineLandingAddress(
       NoDependenciesToRegister);
 }
 
-Expected<std::unique_ptr<LocalLazyCallThroughManager>>
+Expected<std::unique_ptr<LazyCallThroughManager>>
 createLocalLazyCallThroughManager(const Triple &T, ExecutionSession &ES,
                                   JITTargetAddress ErrorHandlerAddr) {
   switch (T.getArch()) {

From 81cfb90f858e510ec5c570a264fe57203e96a193 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Tue, 14 Jul 2020 18:47:14 -0700
Subject: [PATCH 323/771] [IR] Add a few asserts to provide a better failure
 signature if you try to create a load/store/alloca with no alignment or
 insertion position

If no alignment is specified we try to find the datalayout by using the insert position to get the module so we can get the datalayout. But if those are null, then we deference a null pointer.

This patch adds asserts to make the failure a little more obvious than just seg faulting.

Differential Revision: https://reviews.llvm.org/D83829
---
 llvm/lib/IR/Instructions.cpp | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/llvm/lib/IR/Instructions.cpp b/llvm/lib/IR/Instructions.cpp
index f650ad9130ac5..2f17a0d73af40 100644
--- a/llvm/lib/IR/Instructions.cpp
+++ b/llvm/lib/IR/Instructions.cpp
@@ -1262,11 +1262,15 @@ static Value *getAISize(LLVMContext &Context, Value *Amt) {
 }
 
 static Align computeAllocaDefaultAlign(Type *Ty, BasicBlock *BB) {
+  assert(BB && "Insertion BB cannot be null when alignment not provided!");
+  assert(BB->getParent() &&
+         "BB must be in a Function when alignment not provided!");
   const DataLayout &DL = BB->getModule()->getDataLayout();
   return DL.getPrefTypeAlign(Ty);
 }
 
 static Align computeAllocaDefaultAlign(Type *Ty, Instruction *I) {
+  assert(I && "Insertion position cannot be null when alignment not provided!");
   return computeAllocaDefaultAlign(Ty, I->getParent());
 }
 
@@ -1342,11 +1346,15 @@ void LoadInst::AssertOK() {
 }
 
 static Align computeLoadStoreDefaultAlign(Type *Ty, BasicBlock *BB) {
+  assert(BB && "Insertion BB cannot be null when alignment not provided!");
+  assert(BB->getParent() &&
+         "BB must be in a Function when alignment not provided!");
   const DataLayout &DL = BB->getModule()->getDataLayout();
   return DL.getABITypeAlign(Ty);
 }
 
 static Align computeLoadStoreDefaultAlign(Type *Ty, Instruction *I) {
+  assert(I && "Insertion position cannot be null when alignment not provided!");
   return computeLoadStoreDefaultAlign(Ty, I->getParent());
 }
 

From 61cf9f4e723bd9522757931706b208a1357c30ba Mon Sep 17 00:00:00 2001
From: Davide Italiano <ditaliano@apple.com>
Date: Tue, 14 Jul 2020 18:51:56 -0700
Subject: [PATCH 324/771] [ObjectFilePECOFF] Try to avoid unaligned access.

Fixes an UBSAN error.
---
 lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
index 39808cdec7908..d606b49130c47 100644
--- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
+++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
@@ -656,7 +656,7 @@ Symtab *ObjectFilePECOFF::GetSymtab() {
           // because it is used as offset 0 to encode a NULL string.
           uint32_t *strtab_data_start = const_cast<uint32_t *>(
               reinterpret_cast<const uint32_t *>(strtab_data.GetDataStart()));
-          strtab_data_start[0] = 0;
+          ::memset(&strtab_data_start[0], 0, sizeof(uint32_t));
 
           offset = 0;
           std::string symbol_name;

From 233af8958e0cd1c0270429505a79f116c0e22c94 Mon Sep 17 00:00:00 2001
From: Luofan Chen <clfbbn@gmail.com>
Date: Wed, 15 Jul 2020 09:31:00 +0800
Subject: [PATCH 325/771] [Attributor] Create getter function for the ID of the
 abstract attribute

Summary: The `getIdAddr()` function returns the address of the ID of the abstract attribute

Reviewers: jdoerfert, sstefan1, uenoku, homerdin, baziotis

Reviewed By: jdoerfert

Subscribers: okura, hiraditya, uenoku, kuter, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83172
---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 180 ++++++++++++++++++
 llvm/lib/Transforms/IPO/Attributor.cpp        |   1 +
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp         |   8 +
 3 files changed, 189 insertions(+)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index d2666d4b86827..bed180e6717a2 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -2047,6 +2047,9 @@ struct AbstractAttribute : public IRPosition {
 
   /// This function should return the name of the AbstractAttribute
   virtual const std::string getName() const = 0;
+
+  /// This function should return the address of the ID of the AbstractAttribute
+  virtual const char *getIdAddr() const = 0;
   ///}
 
   /// Allow the Attributor access to the protected methods.
@@ -2164,6 +2167,15 @@ struct AAReturnedValues
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAReturnedValues"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAReturnedValues
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2185,6 +2197,14 @@ struct AANoUnwind
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AANoUnwind"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AANoUnwind
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2206,6 +2226,14 @@ struct AANoSync
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AANoSync"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AANoSync
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2228,6 +2256,14 @@ struct AANonNull
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AANonNull"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AANonNull
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2250,6 +2286,14 @@ struct AANoRecurse
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AANoRecurse"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AANoRecurse
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2272,6 +2316,14 @@ struct AAWillReturn
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAWillReturn"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AAWillReturn
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2301,6 +2353,15 @@ struct AAUndefinedBehavior
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAUndefinedBehavior"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAUndefineBehavior
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2332,6 +2393,15 @@ struct AAReachability : public StateWrapper<BooleanState, AbstractAttribute> {
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAReachability"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAReachability
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2354,6 +2424,14 @@ struct AANoAlias
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AANoAlias"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AANoAlias
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2376,6 +2454,14 @@ struct AANoFree
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AANoFree"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AANoFree
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2398,6 +2484,14 @@ struct AANoReturn
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AANoReturn"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AANoReturn
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2455,6 +2549,14 @@ struct AAIsDead : public StateWrapper<BooleanState, AbstractAttribute> {
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAIsDead"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AAIsDead
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 
@@ -2648,6 +2750,15 @@ struct AADereferenceable
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AADereferenceable"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AADereferenceable
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2669,6 +2780,14 @@ struct AAAlign : public IRAttribute<
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAAlign"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AAAlign
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Create an abstract attribute view for the position \p IRP.
   static AAAlign &createForPosition(const IRPosition &IRP, Attributor &A);
 
@@ -2726,6 +2845,14 @@ struct AANoCapture
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AANoCapture"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AANoCapture
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2747,6 +2874,15 @@ struct AAValueSimplify : public StateWrapper<BooleanState, AbstractAttribute> {
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAValueSimplify"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAValueSimplify
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2767,6 +2903,14 @@ struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute> {
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAHeapToStack"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AAHeapToStack
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2803,6 +2947,15 @@ struct AAPrivatizablePtr
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAPrivatizablePtr"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAPricatizablePtr
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -2857,6 +3010,15 @@ struct AAMemoryBehavior
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAMemoryBehavior"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAMemoryBehavior
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -3018,6 +3180,15 @@ struct AAMemoryLocation
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAMemoryLocation"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAMemoryLocation
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -3066,6 +3237,15 @@ struct AAValueConstantRange
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAValueConstantRange"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is
+  /// AAValueConstantRange
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 6e5625d26c38b..f96dac5f3515c 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -23,6 +23,7 @@
 #include "llvm/IR/NoFolder.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 2a3b2abf61762..1da47e97e3bd8 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1113,6 +1113,14 @@ struct AAICVTracker : public StateWrapper<BooleanState, AbstractAttribute> {
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAICVTracker"; }
 
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// This function should return true if the type of the \p AA is AAICVTracker
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
   static const char ID;
 };
 

From 674226126da6f08d97d383fca3b0c0e8c758d053 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Wed, 15 Jul 2020 10:22:42 +0900
Subject: [PATCH 326/771] [AMDGPU] Apply pre-emit s_cbranch_vcc optimation to
 more patterns

Add handling of s_andn2 and mask of 0.
This eliminates redundant instructions from uniform control flow.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D83641
---
 llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp  | 72 ++++++++++++++---
 llvm/test/CodeGen/AMDGPU/branch-relaxation.ll |  5 +-
 .../CodeGen/AMDGPU/indirect-addressing-si.ll  |  2 +-
 llvm/test/CodeGen/AMDGPU/infinite-loop.ll     |  2 +-
 .../CodeGen/AMDGPU/insert-skip-from-vcc.mir   | 77 +++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll | 10 +--
 .../AMDGPU/multi-divergent-exit-region.ll     |  1 -
 llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll | 14 +---
 llvm/test/CodeGen/AMDGPU/wave32.ll            |  2 +-
 llvm/test/CodeGen/AMDGPU/wqm.ll               |  8 +-
 10 files changed, 148 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index 1bb66907f9ce4..f31c722db1b26 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -54,14 +54,14 @@ char &llvm::SIPreEmitPeepholeID = SIPreEmitPeephole::ID;
 
 bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
   // Match:
-  // sreg = -1
-  // vcc = S_AND_B64 exec, sreg
+  // sreg = -1 or 0
+  // vcc = S_AND_B64 exec, sreg or S_ANDN2_B64 exec, sreg
   // S_CBRANCH_VCC[N]Z
   // =>
   // S_CBRANCH_EXEC[N]Z
   // We end up with this pattern sometimes after basic block placement.
-  // It happens while combining a block which assigns -1 to a saved mask and
-  // another block which consumes that saved mask and then a branch.
+  // It happens while combining a block which assigns -1 or 0 to a saved mask
+  // and another block which consumes that saved mask and then a branch.
   bool Changed = false;
   MachineBasicBlock &MBB = *MI.getParent();
   const GCNSubtarget &ST = MBB.getParent()->getSubtarget<GCNSubtarget>();
@@ -69,6 +69,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
   const unsigned CondReg = TRI->getVCC();
   const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
   const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
+  const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
 
   MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
                                       E = MBB.rend();
@@ -80,7 +81,8 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
     if (A->modifiesRegister(ExecReg, TRI))
       return false;
     if (A->modifiesRegister(CondReg, TRI)) {
-      if (!A->definesRegister(CondReg, TRI) || A->getOpcode() != And)
+      if (!A->definesRegister(CondReg, TRI) ||
+          (A->getOpcode() != And && A->getOpcode() != AndN2))
         return false;
       break;
     }
@@ -97,9 +99,10 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
   }
   if (Op1.getReg() != ExecReg)
     return Changed;
-  if (Op2.isImm() && Op2.getImm() != -1)
+  if (Op2.isImm() && !(Op2.getImm() == -1 || Op2.getImm() == 0))
     return Changed;
 
+  int64_t MaskValue = 0;
   Register SReg;
   if (Op2.isReg()) {
     SReg = Op2.getReg();
@@ -113,28 +116,75 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
       ReadsSreg |= M->readsRegister(SReg, TRI);
     }
     if (M == E || !M->isMoveImmediate() || !M->getOperand(1).isImm() ||
-        M->getOperand(1).getImm() != -1)
+        (M->getOperand(1).getImm() != -1 && M->getOperand(1).getImm() != 0))
       return Changed;
-    // First if sreg is only used in and instruction fold the immediate
-    // into that and.
+    MaskValue = M->getOperand(1).getImm();
+    // First if sreg is only used in the AND instruction fold the immediate
+    // into into the AND.
     if (!ReadsSreg && Op2.isKill()) {
-      A->getOperand(2).ChangeToImmediate(-1);
+      A->getOperand(2).ChangeToImmediate(MaskValue);
       M->eraseFromParent();
     }
+  } else if (Op2.isImm()) {
+    MaskValue = Op2.getImm();
+  } else {
+    llvm_unreachable("Op2 must be register or immediate");
   }
 
+  // Invert mask for s_andn2
+  assert(MaskValue == 0 || MaskValue == -1);
+  if (A->getOpcode() == AndN2)
+    MaskValue = ~MaskValue;
+
   if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
       MI.killsRegister(CondReg, TRI))
     A->eraseFromParent();
 
   bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
   if (SReg == ExecReg) {
+    // EXEC is updated directly
     if (IsVCCZ) {
       MI.eraseFromParent();
       return true;
     }
     MI.setDesc(TII->get(AMDGPU::S_BRANCH));
-  } else {
+  } else if (IsVCCZ && MaskValue == 0) {
+    // Will always branch
+    // Remove all succesors shadowed by new unconditional branch
+    MachineBasicBlock *Parent = MI.getParent();
+    SmallVector<MachineInstr *, 4> ToRemove;
+    bool Found = false;
+    for (MachineInstr &Term : Parent->terminators()) {
+      if (Found) {
+        if (Term.isBranch())
+          ToRemove.push_back(&Term);
+      } else {
+        Found = Term.isIdenticalTo(MI);
+      }
+    }
+    assert(Found && "conditional branch is not terminator");
+    for (auto BranchMI : ToRemove) {
+      MachineOperand &Dst = BranchMI->getOperand(0);
+      assert(Dst.isMBB() && "destination is not basic block");
+      Parent->removeSuccessor(Dst.getMBB());
+      BranchMI->eraseFromParent();
+    }
+
+    if (MachineBasicBlock *Succ = Parent->getFallThrough()) {
+      Parent->removeSuccessor(Succ);
+    }
+
+    // Rewrite to unconditional branch
+    MI.setDesc(TII->get(AMDGPU::S_BRANCH));
+  } else if (!IsVCCZ && MaskValue == 0) {
+    // Will never branch
+    MachineOperand &Dst = MI.getOperand(0);
+    assert(Dst.isMBB() && "destination is not basic block");
+    MI.getParent()->removeSuccessor(Dst.getMBB());
+    MI.eraseFromParent();
+    return true;
+  } else if (MaskValue == -1) {
+    // Depends only on EXEC
     MI.setDesc(
         TII->get(IsVCCZ ? AMDGPU::S_CBRANCH_EXECZ : AMDGPU::S_CBRANCH_EXECNZ));
   }
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
index 8c6b94da79cf8..1125dbb75c56b 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
+++ b/llvm/test/CodeGen/AMDGPU/branch-relaxation.ll
@@ -482,13 +482,10 @@ ret:
 ; GCN: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[LONG_BR_DEST0:BB[0-9]+_[0-9]+]]-(
 ; GCN-NEXT: s_addc_u32
 ; GCN-NEXT: s_setpc_b64
-
 ; GCN-NEXT: [[LONG_BR_0]]:
-; GCN: s_setpc_b64
 
-; GCN: [[LONG_BR_DEST0]]
+; GCN: [[LONG_BR_DEST0]]:
 
-; GCN: s_cbranch_vccnz
 ; GCN-DAG: v_cmp_lt_i32
 ; GCN-DAG: v_cmp_ge_i32
 
diff --git a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
index b108e26375366..bca00f69e25cd 100644
--- a/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
+++ b/llvm/test/CodeGen/AMDGPU/indirect-addressing-si.ll
@@ -524,7 +524,7 @@ define amdgpu_kernel void @insertelement_v16f32_or_index(<16 x float> addrspace(
 
 ; GCN: {{^; %bb.[0-9]}}:
 ; GCN: s_mov_b64 exec,
-; GCN: s_cbranch_vccnz [[BB2]]
+; GCN: s_cbranch_execnz [[BB2]]
 
 define amdgpu_kernel void @broken_phi_bb(i32 %arg, i32 %arg1) #0 {
 bb:
diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
index b2acc37493e43..6d63ca5332e7e 100644
--- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
@@ -159,7 +159,7 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
 ; SI-NEXT:    ; in Loop: Header=BB3_2 Depth=1
 ; SI-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; SI-NEXT:    s_and_b64 vcc, exec, 0
-; SI-NEXT:    s_cbranch_vccz BB3_2
+; SI-NEXT:    s_branch BB3_2
 ; SI-NEXT:  BB3_5: ; %UnifiedReturnBlock
 ; SI-NEXT:    s_endpgm
 ; IR-LABEL: @infinite_loop_nest_ret(
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir b/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir
index 3011da138c760..ecfd59dfbcd09 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir
@@ -338,3 +338,80 @@ body:             |
     S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
     S_ENDPGM 0
 ...
+---
+# GCN-LABEL: name: andn2_execz_mov_vccz
+# GCN-NOT: S_MOV_
+# GCN-NOT: S_ANDN2_
+# GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name:            andn2_execz_mov_vccz
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    S_NOP 0
+
+  bb.2:
+    $sgpr0_sgpr1 = S_MOV_B64 0
+    $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+    S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+    S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: andn2_branch_mov_vccz
+# GCN-NOT: S_MOV_
+# GCN-NOT: S_ANDN2_
+# GCN: S_BRANCH %bb.1
+name:            andn2_branch_mov_vccz
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    S_NOP 0
+
+  bb.2:
+    $sgpr0_sgpr1 = S_MOV_B64 -1
+    $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+    S_CBRANCH_VCCZ %bb.1, implicit killed $vcc
+    S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: andn2_execnz_mov_vccnz
+# GCN-NOT: S_MOV_
+# GCN-NOT: S_ANDN2_
+# GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec
+name:            andn2_execnz_mov_vccnz
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    S_NOP 0
+
+  bb.2:
+    $sgpr0_sgpr1 = S_MOV_B64 0
+    $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
+    S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: andn2_no_branch_mov_vccnz
+# GCN-NOT: S_MOV_
+# GCN-NOT: S_ANDN2_
+# GCN-NOT: S_CBRANCH
+# GCN-NOT: S_BRANCH
+name:            andn2_no_branch_mov_vccnz
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    S_NOP 0
+
+  bb.2:
+    $sgpr0_sgpr1 = S_MOV_B64 -1
+    $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+    S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index c53f2b07aa7c9..15643d4b67f76 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -1327,9 +1327,6 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
 ; SI-NEXT:    s_cbranch_vccz BB26_3
 ; SI-NEXT:    s_branch BB26_4
 ; SI-NEXT:  BB26_2:
-; SI-NEXT:    s_mov_b64 s[2:3], -1
-; SI-NEXT:    s_andn2_b64 vcc, exec, s[2:3]
-; SI-NEXT:    s_cbranch_vccnz BB26_4
 ; SI-NEXT:  BB26_3: ; %if
 ; SI-NEXT:    s_load_dword s1, s[6:7], 0x0
 ; SI-NEXT:  BB26_4: ; %endif
@@ -1350,14 +1347,9 @@ define amdgpu_kernel void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 add
 ; VI-NEXT:    s_cbranch_scc0 BB26_2
 ; VI-NEXT:  ; %bb.1: ; %else
 ; VI-NEXT:    s_load_dword s1, s[6:7], 0x4
-; VI-NEXT:    s_mov_b64 s[2:3], 0
-; VI-NEXT:    s_andn2_b64 vcc, exec, s[2:3]
-; VI-NEXT:    s_cbranch_vccz BB26_3
+; VI-NEXT:    s_cbranch_execz BB26_3
 ; VI-NEXT:    s_branch BB26_4
 ; VI-NEXT:  BB26_2:
-; VI-NEXT:    s_mov_b64 s[2:3], -1
-; VI-NEXT:    s_andn2_b64 vcc, exec, s[2:3]
-; VI-NEXT:    s_cbranch_vccnz BB26_4
 ; VI-NEXT:  BB26_3: ; %if
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_load_dword s1, s[6:7], 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
index 144b3f2599bf0..147d406a14f1a 100644
--- a/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
+++ b/llvm/test/CodeGen/AMDGPU/multi-divergent-exit-region.ll
@@ -367,7 +367,6 @@ exit1:                                     ; preds = %LeafBlock, %LeafBlock1
 ; GCN: v_cmp_ne_u32_e32 vcc, 7, v0
 
 ; GCN: {{^}}[[FLOW]]:
-; GCN: s_cbranch_vccnz [[FLOW1:BB[0-9]+]]
 
 ; GCN: s_or_b64 exec, exec
 ; GCN: v_mov_b32_e32 v0, 2.0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index a72af066a9c9b..d040a04877e6f 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -19,15 +19,10 @@ define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a,
 ; SI-NEXT:    s_cbranch_scc0 BB0_2
 ; SI-NEXT:  ; %bb.1: ; %else
 ; SI-NEXT:    s_add_i32 s2, s7, s2
-; SI-NEXT:    s_mov_b64 s[8:9], 0
-; SI-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
-; SI-NEXT:    s_cbranch_vccz BB0_3
+; SI-NEXT:    s_cbranch_execz BB0_3
 ; SI-NEXT:    s_branch BB0_4
 ; SI-NEXT:  BB0_2:
-; SI-NEXT:    s_mov_b64 s[8:9], -1
 ; SI-NEXT:    ; implicit-def: $sgpr2
-; SI-NEXT:    s_andn2_b64 vcc, exec, s[8:9]
-; SI-NEXT:    s_cbranch_vccnz BB0_4
 ; SI-NEXT:  BB0_3: ; %if
 ; SI-NEXT:    s_sub_i32 s2, s5, s6
 ; SI-NEXT:  BB0_4: ; %endif
@@ -69,15 +64,10 @@ define amdgpu_kernel void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, [8 x
 ; SI-NEXT:    s_load_dword s6, s[0:1], 0x37
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_add_i32 s3, s3, s6
-; SI-NEXT:    s_mov_b64 s[6:7], 0
-; SI-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
-; SI-NEXT:    s_cbranch_vccz BB1_3
+; SI-NEXT:    s_cbranch_execz BB1_3
 ; SI-NEXT:    s_branch BB1_4
 ; SI-NEXT:  BB1_2:
-; SI-NEXT:    s_mov_b64 s[6:7], -1
 ; SI-NEXT:    ; implicit-def: $sgpr3
-; SI-NEXT:    s_andn2_b64 vcc, exec, s[6:7]
-; SI-NEXT:    s_cbranch_vccnz BB1_4
 ; SI-NEXT:  BB1_3: ; %if
 ; SI-NEXT:    s_load_dword s3, s[0:1], 0x1c
 ; SI-NEXT:    s_load_dword s0, s[0:1], 0x25
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 0b0b9a30f113f..55557e51b82ce 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -668,7 +668,7 @@ define amdgpu_gs void @test_kill_i1_terminator_i1(i32 %a, i32 %b, i32 %c, i32 %d
 ; GCN-LABEL: {{^}}test_loop_vcc:
 ; GFX1032: v_cmp_lt_f32_e32 vcc_lo,
 ; GFX1064: v_cmp_lt_f32_e32 vcc,
-; GCN: s_cbranch_vccnz
+; GCN: s_cbranch_vccz
 define amdgpu_ps <4 x float> @test_loop_vcc(<4 x float> %in) #0 {
 entry:
   br label %loop
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.ll b/llvm/test/CodeGen/AMDGPU/wqm.ll
index 167d8fa21ccb3..127d0bc0fc686 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.ll
+++ b/llvm/test/CodeGen/AMDGPU/wqm.ll
@@ -652,13 +652,11 @@ main_body:
 ; CHECK-DAG: v_mov_b32_e32 [[CTR:v[0-9]+]], 0
 ; CHECK-DAG: s_mov_b32 [[SEVEN:s[0-9]+]], 0x40e00000
 
-; CHECK: ; %body
+; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %body
 ; CHECK: v_add_f32_e32 [[CTR]], 2.0, [[CTR]]
-; CHECK: [[LOOPHDR:BB[0-9]+_[0-9]+]]: ; %loop
+; CHECK: [[LOOP:BB[0-9]+_[0-9]+]]: ; %loop
 ; CHECK: v_cmp_lt_f32_e32 vcc, [[SEVEN]], [[CTR]]
-; CHECK: s_cbranch_vccz
-
-; CHECK: s_cbranch_vccnz [[LOOPHDR]]
+; CHECK: s_cbranch_vccz [[LOOPHDR]]
 
 ; CHECK: ; %break
 ; CHECK: ; return

From 0750757e80c2dbca87b77f57c9994c76964fa92d Mon Sep 17 00:00:00 2001
From: Luofan Chen <clfbbn@gmail.com>
Date: Wed, 15 Jul 2020 10:09:11 +0800
Subject: [PATCH 327/771] [Attributor] Unittest for Attributor

Summary: This patch introduces basic unittest interface for the Attributor and a simple test case for casting.

Reviewers: jdoerfert, sstefan1, uenoku, homerdin, baziotis

Reviewed By: jdoerfert

Subscribers: mgorny, uenoku, kuter, okura, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83754
---
 .../Transforms/IPO/AttributorTest.cpp         | 59 +++++++++++++++++++
 .../Transforms/IPO/AttributorTestBase.h       | 47 +++++++++++++++
 llvm/unittests/Transforms/IPO/CMakeLists.txt  |  1 +
 3 files changed, 107 insertions(+)
 create mode 100644 llvm/unittests/Transforms/IPO/AttributorTest.cpp
 create mode 100644 llvm/unittests/Transforms/IPO/AttributorTestBase.h

diff --git a/llvm/unittests/Transforms/IPO/AttributorTest.cpp b/llvm/unittests/Transforms/IPO/AttributorTest.cpp
new file mode 100644
index 0000000000000..8c1682c01f307
--- /dev/null
+++ b/llvm/unittests/Transforms/IPO/AttributorTest.cpp
@@ -0,0 +1,59 @@
+//===- AttributorTest.cpp - Attributor unit tests ------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===---------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/Attributor.h"
+#include "AttributorTestBase.h"
+#include "llvm/ADT/StringRef.h"
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/Analysis/LoopAnalysisManager.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+#include "gtest/gtest.h"
+#include <memory>
+
+namespace llvm {
+
+TEST_F(AttributorTestBase, TestCast) {
+  const char *ModuleString = R"(
+    define i32 @foo(i32 %a, i32 %b) {
+    entry:
+      %c = add i32 %a, %b
+      ret i32 %c
+    }
+  )";
+
+  Module &M = parseModule(ModuleString);
+
+  SetVector<Function *> Functions;
+  AnalysisGetter AG;
+  for (Function &F : M)
+    Functions.insert(&F);
+
+  CallGraphUpdater CGUpdater;
+  BumpPtrAllocator Allocator;
+  InformationCache InfoCache(M, AG, Allocator, nullptr);
+  Attributor A(Functions, InfoCache, CGUpdater);
+
+  Function *F = M.getFunction("foo");
+
+  AbstractAttribute *AA = (AbstractAttribute *)&(
+      A.getOrCreateAAFor<AAIsDead>(IRPosition::function(*F)));
+
+  EXPECT_TRUE(AA);
+
+  const auto *SFail = dyn_cast<AAAlign>(AA);
+  const auto *SSucc = dyn_cast<AAIsDead>(AA);
+
+  ASSERT_EQ(SFail, nullptr);
+  ASSERT_TRUE(SSucc);
+}
+
+} // namespace llvm
\ No newline at end of file
diff --git a/llvm/unittests/Transforms/IPO/AttributorTestBase.h b/llvm/unittests/Transforms/IPO/AttributorTestBase.h
new file mode 100644
index 0000000000000..d12a2b481af0b
--- /dev/null
+++ b/llvm/unittests/Transforms/IPO/AttributorTestBase.h
@@ -0,0 +1,47 @@
+//===- llvm/unittest/Transforms/IPO/AttributorTestBase.h -----------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+/// This file defines an AttributorTestBase class, which provides helpers to
+/// parse a LLVM IR string and create Attributor
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_UNITTESTS_TRANSFORMS_ATTRIBUTOR_TESTBASE_H
+#define LLVM_UNITTESTS_TRANSFORMS_ATTRIBUTOR_TESTBASE_H
+
+#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/CallGraphSCCPass.h"
+#include "llvm/AsmParser/Parser.h"
+#include "llvm/IR/Module.h"
+#include "llvm/IR/PassManager.h"
+#include "llvm/Support/Allocator.h"
+#include "llvm/Testing/Support/Error.h"
+#include "llvm/Transforms/IPO/Attributor.h"
+#include "llvm/Transforms/Utils/CallGraphUpdater.h"
+#include "gtest/gtest.h"
+#include <memory>
+
+namespace llvm {
+
+/// Helper class to create a module from assembly string and an Attributor
+class AttributorTestBase : public testing::Test {
+protected:
+  std::unique_ptr<LLVMContext> Ctx;
+  std::unique_ptr<Module> M;
+
+  AttributorTestBase() : Ctx(new LLVMContext) {}
+
+  Module &parseModule(const char *ModuleString) {
+    SMDiagnostic Err;
+    M = parseAssemblyString(ModuleString, Err, *Ctx);
+    EXPECT_TRUE(M);
+    return *M;
+  }
+};
+
+} // namespace llvm
+
+#endif
\ No newline at end of file
diff --git a/llvm/unittests/Transforms/IPO/CMakeLists.txt b/llvm/unittests/Transforms/IPO/CMakeLists.txt
index ee33a5fcd1b39..8399b925512b6 100644
--- a/llvm/unittests/Transforms/IPO/CMakeLists.txt
+++ b/llvm/unittests/Transforms/IPO/CMakeLists.txt
@@ -7,4 +7,5 @@ set(LLVM_LINK_COMPONENTS
 add_llvm_unittest(IPOTests
   LowerTypeTests.cpp
   WholeProgramDevirt.cpp
+  AttributorTest.cpp
   )

From 670e8372b3a0d986c3f8c8381514675dd094d79e Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Wed, 15 Jul 2020 02:13:43 +0000
Subject: [PATCH 328/771] [gn build] Port 0750757e80c

---
 llvm/utils/gn/secondary/llvm/unittests/Transforms/IPO/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/unittests/Transforms/IPO/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Transforms/IPO/BUILD.gn
index 1b0a86d482c90..904ee9c517481 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Transforms/IPO/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Transforms/IPO/BUILD.gn
@@ -7,6 +7,7 @@ unittest("IPOTests") {
     "//llvm/lib/Transforms/IPO",
   ]
   sources = [
+    "AttributorTest.cpp",
     "LowerTypeTests.cpp",
     "WholeProgramDevirt.cpp",
   ]

From 6b78ed60708b56d85c6d028e9a06ce24ec3c1ae5 Mon Sep 17 00:00:00 2001
From: Luofan Chen <clfbbn@gmail.com>
Date: Wed, 15 Jul 2020 10:19:37 +0800
Subject: [PATCH 329/771] [Attributor] [WIP] Track AA dependency using
 dependency graph

Summary: This patch added dependency graph to the attributor so that we can dump the dependencies between AAs more easily. We can also apply general graph algorithms to the graph, making it easier for us to create deep wrappers.

Reviewers: jdoerfert, sstefan1, uenoku, homerdin, baziotis

Reviewed By: jdoerfert

Subscribers: jfb, okura, mgrang, kuter, lebedev.ri, hiraditya, uenoku, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78861
---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 103 ++++++++--
 llvm/lib/Transforms/IPO/Attributor.cpp        | 192 ++++++++++++++++--
 .../Transforms/IPO/AttributorAttributes.cpp   |   8 +-
 llvm/test/Transforms/Attributor/depgraph.ll   | 152 ++++++++++++++
 4 files changed, 418 insertions(+), 37 deletions(-)
 create mode 100644 llvm/test/Transforms/Attributor/depgraph.ll

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index bed180e6717a2..3378b19834c03 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -97,8 +97,10 @@
 #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
 #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
 
+#include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
@@ -116,10 +118,15 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/GraphWriter.h"
 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
 
 namespace llvm {
 
+struct AADepGraphNode;
+struct AADepGraph;
 struct Attributor;
 struct AbstractAttribute;
 struct InformationCache;
@@ -144,6 +151,70 @@ enum class DepClassTy {
 };
 ///}
 
+/// The data structure for the nodes of a dependency graph
+struct AADepGraphNode {
+public:
+  virtual ~AADepGraphNode(){};
+  using DepTy = PointerIntPair<AADepGraphNode *, 1>;
+
+protected:
+  /// Set of dependency graph nodes which this one depends on.
+  /// The bit encodes if it is optional.
+  TinyPtrVector<DepTy> Deps;
+
+  static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
+  static AbstractAttribute *DepGetValAA(DepTy &DT) {
+    return cast<AbstractAttribute>(DT.getPointer());
+  }
+
+  operator AbstractAttribute *() { return cast<AbstractAttribute>(this); }
+
+public:
+  using iterator =
+      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+  using aaiterator =
+      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>;
+
+  aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); }
+  aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); }
+  iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); }
+  iterator child_end() { return iterator(Deps.end(), &DepGetVal); }
+
+  virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; }
+  TinyPtrVector<DepTy> &getDeps() { return Deps; }
+
+  friend struct Attributor;
+  friend struct AADepGraph;
+};
+
+struct AADepGraph {
+  AADepGraph() {}
+  ~AADepGraph() {}
+
+  using DepTy = AADepGraphNode::DepTy;
+  static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
+  using iterator =
+      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+
+  /// There is no root node for the dependency graph. But the SCCIterator
+  /// requires a single entry point, so we maintain a fake("synthetic") root
+  /// node that depends on every node.
+  AADepGraphNode SyntheticRoot;
+
+  AADepGraphNode *GetEntryNode() { return &SyntheticRoot; }
+
+  iterator begin() { return SyntheticRoot.child_begin(); }
+  iterator end() { return SyntheticRoot.child_end(); }
+
+  void viewGraph();
+
+  /// Dump graph to file
+  void dumpGraph();
+
+  /// Print dependency graph
+  void print();
+};
+
 /// Helper to describe and deal with positions in the LLVM-IR.
 ///
 /// A position in the IR is described by an anchor value and an "offset" that
@@ -1001,7 +1072,9 @@ struct Attributor {
     assert(!AAPtr && "Attribute already in map!");
     AAPtr = &AA;
 
-    AllAbstractAttributes.push_back(&AA);
+    DG.SyntheticRoot.Deps.push_back(
+        AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED)));
+
     return AA;
   }
 
@@ -1363,12 +1436,6 @@ struct Attributor {
   /// See getOrCreateAAFor.
   bool shouldSeedAttribute(AbstractAttribute &AA);
 
-  /// The set of all abstract attributes.
-  ///{
-  using AAVector = SmallVector<AbstractAttribute *, 64>;
-  AAVector AllAbstractAttributes;
-  ///}
-
   /// A nested map to lookup abstract attributes based on the argument position
   /// on the outer level, and the addresses of the static member (AAType::ID) on
   /// the inner level.
@@ -1390,6 +1457,9 @@ struct Attributor {
   /// Helper to update an underlying call graph.
   CallGraphUpdater &CGUpdater;
 
+  /// Abstract Attribute dependency graph
+  AADepGraph DG;
+
   /// Set of functions for which we modified the content such that it might
   /// impact the call graph.
   SmallPtrSet<Function *, 8> CGModifiedFunctions;
@@ -1439,6 +1509,8 @@ struct Attributor {
   SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
   SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
   ///}
+
+  friend AADepGraph;
 };
 
 /// An interface to query the internal state of an abstract attribute.
@@ -2011,7 +2083,7 @@ struct IRAttribute : public BaseType {
 ///       both directions will be added in the future.
 /// NOTE: The mechanics of adding a new "concrete" abstract attribute are
 ///       described in the file comment.
-struct AbstractAttribute : public IRPosition {
+struct AbstractAttribute : public IRPosition, public AADepGraphNode {
   using StateType = AbstractState;
 
   AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
@@ -2019,6 +2091,14 @@ struct AbstractAttribute : public IRPosition {
   /// Virtual destructor.
   virtual ~AbstractAttribute() {}
 
+  /// This function is used to identify if an \p DGN is of type
+  /// AbstractAttribute so that the dyn_cast and cast can use such information
+  /// to cast an AADepGraphNode to an AbstractAttribute.
+  ///
+  /// We eagerly return true here because all AADepGraphNodes except for the
+  /// Synthethis Node are of type AbstractAttribute
+  static bool classof(const AADepGraphNode *DGN) { return true; }
+
   /// Initialize the state with the information in the Attributor \p A.
   ///
   /// This function is called by the Attributor once all abstract attributes
@@ -2040,6 +2120,7 @@ struct AbstractAttribute : public IRPosition {
   /// Helper functions, for debug purposes only.
   ///{
   virtual void print(raw_ostream &OS) const;
+  virtual void printWithDeps(raw_ostream &OS) const;
   void dump() const { print(dbgs()); }
 
   /// This function should return the "summarized" assumed state as string.
@@ -2087,12 +2168,6 @@ struct AbstractAttribute : public IRPosition {
   ///
   /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
   virtual ChangeStatus updateImpl(Attributor &A) = 0;
-
-private:
-  /// Set of abstract attributes which were queried by this one. The bit encodes
-  /// if there is an optional of required dependence.
-  using DepTy = PointerIntPair<AbstractAttribute *, 1>;
-  TinyPtrVector<DepTy> Deps;
 };
 
 /// Forward declarations of output streams for debug purposes.
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index f96dac5f3515c..f021582c3b7df 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -15,7 +15,10 @@
 
 #include "llvm/Transforms/IPO/Attributor.h"
 
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -25,10 +28,15 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 
 #include <cassert>
+#include <string>
 
 using namespace llvm;
 
@@ -85,6 +93,23 @@ static cl::list<std::string>
                            "allowed to be seeded."),
                   cl::ZeroOrMore, cl::CommaSeparated);
 
+static cl::opt<bool>
+    DumpDepGraph("attributor-dump-dep-graph", cl::Hidden,
+                 cl::desc("Dump the dependency graph to dot files."),
+                 cl::init(false));
+
+static cl::opt<std::string> DepGraphDotFileNamePrefix(
+    "attributor-depgraph-dot-filename-prefix", cl::Hidden,
+    cl::desc("The prefix used for the CallGraph dot file names."));
+
+static cl::opt<bool> ViewDepGraph("attributor-view-dep-graph", cl::Hidden,
+                                  cl::desc("View the dependency graph."),
+                                  cl::init(false));
+
+static cl::opt<bool> PrintDependencies("attributor-print-dep", cl::Hidden,
+                                       cl::desc("Print attribute dependencies"),
+                                       cl::init(false));
+
 /// Logic operators for the change status enum class.
 ///
 ///{
@@ -498,8 +523,10 @@ Attributor::getAssumedConstant(const Value &V, const AbstractAttribute &AA,
 Attributor::~Attributor() {
   // The abstract attributes are allocated via the BumpPtrAllocator Allocator,
   // thus we cannot delete them. We can, and want to, destruct them though.
-  for (AbstractAttribute *AA : AllAbstractAttributes)
+  for (auto &DepAA : DG.SyntheticRoot.Deps) {
+    AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
     AA->~AbstractAttribute();
+  }
 }
 
 bool Attributor::isAssumedDead(const AbstractAttribute &AA,
@@ -904,7 +931,7 @@ bool Attributor::checkForAllReadWriteInstructions(
 
 void Attributor::runTillFixpoint() {
   LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
-                    << AllAbstractAttributes.size()
+                    << DG.SyntheticRoot.Deps.size()
                     << " abstract attributes.\n");
 
   // Now that all abstract attributes are collected and initialized we start
@@ -914,11 +941,11 @@ void Attributor::runTillFixpoint() {
 
   SmallVector<AbstractAttribute *, 32> ChangedAAs;
   SetVector<AbstractAttribute *> Worklist, InvalidAAs;
-  Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
+  Worklist.insert(DG.SyntheticRoot.begin(), DG.SyntheticRoot.end());
 
   do {
     // Remember the size to determine new attributes.
-    size_t NumAAs = AllAbstractAttributes.size();
+    size_t NumAAs = DG.SyntheticRoot.Deps.size();
     LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
                       << ", Worklist size: " << Worklist.size() << "\n");
 
@@ -935,7 +962,7 @@ void Attributor::runTillFixpoint() {
       while (!InvalidAA->Deps.empty()) {
         const auto &Dep = InvalidAA->Deps.back();
         InvalidAA->Deps.pop_back();
-        AbstractAttribute *DepAA = Dep.getPointer();
+        AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
         if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
           Worklist.insert(DepAA);
           continue;
@@ -953,7 +980,8 @@ void Attributor::runTillFixpoint() {
     // changed to the work list.
     for (AbstractAttribute *ChangedAA : ChangedAAs)
       while (!ChangedAA->Deps.empty()) {
-        Worklist.insert(ChangedAA->Deps.back().getPointer());
+        Worklist.insert(
+            cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
         ChangedAA->Deps.pop_back();
       }
 
@@ -981,8 +1009,8 @@ void Attributor::runTillFixpoint() {
 
     // Add attributes to the changed set if they have been created in the last
     // iteration.
-    ChangedAAs.append(AllAbstractAttributes.begin() + NumAAs,
-                      AllAbstractAttributes.end());
+    ChangedAAs.append(DG.SyntheticRoot.begin() + NumAAs,
+                      DG.SyntheticRoot.end());
 
     // Reset the work list and repopulate with the changed abstract attributes.
     // Note that dependent ones are added above.
@@ -1015,7 +1043,8 @@ void Attributor::runTillFixpoint() {
     }
 
     while (!ChangedAA->Deps.empty()) {
-      ChangedAAs.push_back(ChangedAA->Deps.back().getPointer());
+      ChangedAAs.push_back(
+          cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
       ChangedAA->Deps.pop_back();
     }
   }
@@ -1037,12 +1066,13 @@ void Attributor::runTillFixpoint() {
 }
 
 ChangeStatus Attributor::manifestAttributes() {
-  size_t NumFinalAAs = AllAbstractAttributes.size();
+  size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
 
   unsigned NumManifested = 0;
   unsigned NumAtFixpoint = 0;
   ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
-  for (AbstractAttribute *AA : AllAbstractAttributes) {
+  for (auto &DepAA : DG.SyntheticRoot.Deps) {
+    AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
     AbstractState &State = AA->getState();
 
     // If there is not already a fixpoint reached, we can now take the
@@ -1082,11 +1112,14 @@ ChangeStatus Attributor::manifestAttributes() {
   NumAttributesValidFixpoint += NumAtFixpoint;
 
   (void)NumFinalAAs;
-  if (NumFinalAAs != AllAbstractAttributes.size()) {
-    for (unsigned u = NumFinalAAs; u < AllAbstractAttributes.size(); ++u)
-      errs() << "Unexpected abstract attribute: " << *AllAbstractAttributes[u]
+  if (NumFinalAAs != DG.SyntheticRoot.Deps.size()) {
+    for (unsigned u = NumFinalAAs; u < DG.SyntheticRoot.Deps.size(); ++u)
+      errs() << "Unexpected abstract attribute: "
+             << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
              << " :: "
-             << AllAbstractAttributes[u]->getIRPosition().getAssociatedValue()
+             << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
+                    ->getIRPosition()
+                    .getAssociatedValue()
              << "\n";
     llvm_unreachable("Expected the final number of abstract attributes to "
                      "remain unchanged!");
@@ -1265,6 +1298,17 @@ ChangeStatus Attributor::cleanupIR() {
 ChangeStatus Attributor::run() {
   SeedingPeriod = false;
   runTillFixpoint();
+
+  // dump graphs on demand
+  if (DumpDepGraph)
+    DG.dumpGraph();
+
+  if (ViewDepGraph)
+    DG.viewGraph();
+
+  if (PrintDependencies)
+    DG.print();
+
   ChangeStatus ManifestChange = manifestAttributes();
   ChangeStatus CleanupChange = cleanupIR();
   return ManifestChange | CleanupChange;
@@ -2028,8 +2072,31 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
 }
 
 void AbstractAttribute::print(raw_ostream &OS) const {
-  OS << "[P: " << getIRPosition() << "][" << getAsStr() << "][S: " << getState()
-     << "]";
+  OS << "[";
+  OS << getName();
+  OS << "] for CtxI ";
+
+  if (auto *I = getCtxI()) {
+    OS << "'";
+    I->print(OS);
+    OS << "'";
+  } else
+    OS << "<<null inst>>";
+
+  OS << " at position " << getIRPosition() << " with state " << getAsStr()
+     << '\n';
+}
+
+void AbstractAttribute::printWithDeps(raw_ostream &OS) const {
+  print(OS);
+
+  for (const auto &DepAA : Deps) {
+    auto *AA = DepAA.getPointer();
+    OS << "  updates ";
+    AA->print(OS);
+  }
+
+  OS << '\n';
 }
 ///}
 
@@ -2064,8 +2131,8 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
       NumFnWithoutExactDefinition++;
 
     // We look at internal functions only on-demand but if any use is not a
-    // direct call or outside the current set of analyzed functions, we have to
-    // do it eagerly.
+    // direct call or outside the current set of analyzed functions, we have
+    // to do it eagerly.
     if (F->hasLocalLinkage()) {
       if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
             const auto *CB = dyn_cast<CallBase>(U.getUser());
@@ -2081,11 +2148,53 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
   }
 
   ChangeStatus Changed = A.run();
+
   LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size()
                     << " functions, result: " << Changed << ".\n");
   return Changed == ChangeStatus::CHANGED;
 }
 
+void AADepGraph::viewGraph() { llvm::ViewGraph(this, "Dependency Graph"); }
+
+void AADepGraph::dumpGraph() {
+  static std::atomic<int> CallTimes;
+  std::string Prefix;
+
+  if (!DepGraphDotFileNamePrefix.empty())
+    Prefix = DepGraphDotFileNamePrefix;
+  else
+    Prefix = "dep_graph";
+  std::string Filename =
+      Prefix + "_" + std::to_string(CallTimes.load()) + ".dot";
+
+  outs() << "Dependency graph dump to " << Filename << ".\n";
+
+  std::error_code EC;
+
+  raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+  if (!EC)
+    llvm::WriteGraph(File, this);
+
+  CallTimes++;
+}
+
+void AADepGraph::print() {
+  SmallVector<AbstractAttribute *, 16> AAs;
+  AAs.reserve(SyntheticRoot.Deps.size());
+
+  for (auto tAA : SyntheticRoot.Deps)
+    AAs.push_back(cast<AbstractAttribute>(tAA.getPointer()));
+
+  llvm::sort(AAs, [](AbstractAttribute *LHS, AbstractAttribute *RHS) {
+    if (LHS->getIdAddr() == RHS->getIdAddr())
+      return LHS < RHS;
+    return LHS->getIdAddr() < RHS->getIdAddr();
+  });
+
+  for (AbstractAttribute *AA : AAs)
+    AA->printWithDeps(outs());
+}
+
 PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
   FunctionAnalysisManager &FAM =
       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -2132,6 +2241,51 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
   return PreservedAnalyses::all();
 }
 
+namespace llvm {
+
+template <> struct GraphTraits<AADepGraphNode *> {
+  using NodeRef = AADepGraphNode *;
+  using DepTy = PointerIntPair<AADepGraphNode *, 1>;
+  using EdgeRef = PointerIntPair<AADepGraphNode *, 1>;
+
+  static NodeRef getEntryNode(AADepGraphNode *DGN) { return DGN; }
+  static NodeRef DepGetVal(DepTy &DT) { return DT.getPointer(); }
+
+  using ChildIteratorType =
+      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+  using ChildEdgeIteratorType = TinyPtrVector<DepTy>::iterator;
+
+  static ChildIteratorType child_begin(NodeRef N) { return N->child_begin(); }
+
+  static ChildIteratorType child_end(NodeRef N) { return N->child_end(); }
+};
+
+template <>
+struct GraphTraits<AADepGraph *> : public GraphTraits<AADepGraphNode *> {
+  static NodeRef getEntryNode(AADepGraph *DG) { return DG->GetEntryNode(); }
+
+  using nodes_iterator =
+      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+
+  static nodes_iterator nodes_begin(AADepGraph *DG) { return DG->begin(); }
+
+  static nodes_iterator nodes_end(AADepGraph *DG) { return DG->end(); }
+};
+
+template <> struct DOTGraphTraits<AADepGraph *> : public DefaultDOTGraphTraits {
+  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getNodeLabel(const AADepGraphNode *Node,
+                                  const AADepGraph *DG) {
+    std::string AAString = "";
+    raw_string_ostream O(AAString);
+    Node->print(O);
+    return AAString;
+  }
+};
+
+} // end namespace llvm
+
 namespace {
 
 struct AttributorLegacyPass : public ModulePass {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 7e9fd61eeb41e..510ddf8ad0f74 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1052,9 +1052,10 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
   // map, NewRVsMap.
   decltype(ReturnedValues) NewRVsMap;
 
-  auto HandleReturnValue = [&](Value *RV, SmallSetVector<ReturnInst *, 4> &RIs) {
-    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV
-                      << " by #" << RIs.size() << " RIs\n");
+  auto HandleReturnValue = [&](Value *RV,
+                               SmallSetVector<ReturnInst *, 4> &RIs) {
+    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV << " by #"
+                      << RIs.size() << " RIs\n");
     CallBase *CB = dyn_cast<CallBase>(RV);
     if (!CB || UnresolvedCalls.count(CB))
       return;
@@ -3425,7 +3426,6 @@ struct AADereferenceableFloating : AADereferenceableImpl {
         T.GlobalState &= DS.GlobalState;
       }
 
-
       // For now we do not try to "increase" dereferenceability due to negative
       // indices as we first have to come up with code to deal with loops and
       // for overflows of the dereferenceable bytes.
diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll
new file mode 100644
index 0000000000000..70597c875e020
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/depgraph.ll
@@ -0,0 +1,152 @@
+; RUN: opt -passes=attributor-cgscc -disable-output -attributor-print-dep < %s 2>&1 | FileCheck %s --check-prefixes=GRAPH
+; RUN: opt -passes=attributor-cgscc -disable-output -attributor-dump-dep-graph -attributor-depgraph-dot-filename-prefix=%t < %s 2>/dev/null
+; RUN: FileCheck %s -input-file=%t_0.dot --check-prefix=DOT
+
+; Test 0
+;
+; test copied from the attributor introduction video: checkAndAdvance(), and the C code is:
+; int *checkAndAdvance(int * __attribute__((aligned(16))) p) {
+;   if (*p == 0)
+;     return checkAndAdvance(p + 4);
+;   return p;
+; }
+;
+define i32* @checkAndAdvance(i32* align 16 %0) {
+  %2 = load i32, i32* %0, align 4
+  %3 = icmp eq i32 %2, 0
+  br i1 %3, label %4, label %7
+
+4:                                                ; preds = %1
+  %5 = getelementptr inbounds i32, i32* %0, i64 4
+  %6 = call i32* @checkAndAdvance(i32* %5)
+  br label %8
+
+7:                                                ; preds = %1
+  br label %8
+
+8:                                                ; preds = %7, %4
+  %.0 = phi i32* [ %6, %4 ], [ %0, %7 ]
+  ret i32* %.0
+}
+
+;
+; Check for graph
+;
+
+; GRAPH: [AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+
+; GRAPH: [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+; GRAPH:  updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
+; GRAPH:  updates [AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+
+; GRAPH: [AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} 
+; GRAPH:  updates [AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+
+; GRAPH: [AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+; GRAPH:  updates [AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+
+; GRAPH: [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+; GRAPH:  updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+
+; GRAPH: [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
+
+; GRAPH: [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+; GRAPH:  updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+
+; GRAPH: [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
+; GRAPH:  updates [AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
+
+; GRAPH: [AANonNull] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]}
+; GRAPH:  updates [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
+
+; GRAPH: [AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
+; GRAPH:  updates [AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
+
+; GRAPH: [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
+
+; GRAPH: [AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
+; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+
+; GRAPH: [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+
+; GRAPH: [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
+
+; GRAPH: [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+; GRAPH:  updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
+; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+
+; GRAPH: [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly
+; GRAPH:  updates [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
+
+; GRAPH: [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
+; GRAPH:  updates [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
+
+; GRAPH: [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
+; GRAPH:  updates [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
+
+;
+; Check for .dot file
+;
+
+; DOT-DAG: Node[[Node6:0x[a-z0-9]+]] [shape=record,label="{[AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node34:0x[a-z0-9]+]] [shape=record,label="{[AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
+; DOT-DAG: Node[[Node39:0x[a-z0-9]+]] [shape=record,label="{[AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
+; DOT-DAG: Node[[Node7:0x[a-z0-9]+]] [shape=record,label="{[AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node61:0x[a-z0-9]+]] [shape=record,label="{[AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
+; DOT-DAG: Node[[Node13:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node36:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
+; DOT-DAG: Node[[Node62:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
+; DOT-DAG: Node[[Node16:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node35:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
+; DOT-DAG: Node[[Node40:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
+; DOT-DAG: Node[[Node17:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node63:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
+; DOT-DAG: Node[[Node22:0x[a-z0-9]+]] [shape=record,label="{[AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn_ret:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node65:0x[a-z0-9]+]] [shape=record,label="{[AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
+; DOT-DAG: Node[[Node23:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn_ret:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node67:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
+; DOT-DAG: Node[[Node43:0x[a-z0-9]+]] [shape=record,label="{[AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
+; DOT-DAG: Node[[Node45:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
+; DOT-DAG: Node[[Node46:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
+; DOT-DAG: Node[[Node38:0x[a-z0-9]+]] [shape=record,label="{[AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
+; DOT-DAG: Node[[Node55:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position \{flt: [@-1]\}
+; DOT-DAG: Node[[Node31:0x[a-x0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
+
+; DOT-DAG: Node[[Node6]] -> Node[[Node34]]
+; DOT-DAG: Node[[Node6]] -> Node[[Node39]]
+; DOT-DAG: Node[[Node7]] -> Node[[Node61]]
+; DOT-DAG: Node[[Node13]] -> Node[[Node36]]
+; DOT-DAG: Node[[Node13]] -> Node[[Node62]]
+; DOT-DAG: Node[[Node16]] -> Node[[Node34]]
+; DOT-DAG: Node[[Node16]] -> Node[[Node35]]
+; DOT-DAG: Node[[Node16]] -> Node[[Node40]]
+; DOT-DAG: Node[[Node17]] -> Node[[Node63]]
+; DOT-DAG: Node[[Node22]] -> Node[[Node65]]
+; DOT-DAG: Node[[Node23]] -> Node[[Node67]]
+; DOT-DAG: Node[[Node34]] -> Node[[Node43]]
+; DOT-DAG: Node[[Node35]] -> Node[[Node45]]
+; DOT-DAG: Node[[Node36]] -> Node[[Node46]]
+; DOT-DAG: Node[[Node39]] -> Node[[Node38]]
+; DOT-DAG: Node[[Node39]] -> Node[[Node6]]
+; DOT-DAG: Node[[Node40]] -> Node[[Node38]]
+; DOT-DAG: Node[[Node40]] -> Node[[Node16]]
+; DOT-DAG: Node[[Node43]] -> Node[[Node34]]
+; DOT-DAG: Node[[Node45]] -> Node[[Node17]]
+; DOT-DAG: Node[[Node55]] -> Node[[Node55]]
+; DOT-DAG: Node[[Node55]] -> Node[[Node31]]
+; DOT-DAG: Node[[Node55]] -> Node[[Node23]]
+; DOT-DAG: Node[[Node61]] -> Node[[Node7]]
+; DOT-DAG: Node[[Node62]] -> Node[[Node13]]
+; DOT-DAG: Node[[Node63]] -> Node[[Node17]]
+; DOT-DAG: Node[[Node65]] -> Node[[Node22]]
+; DOT-DAG: Node[[Node67]] -> Node[[Node23]]
\ No newline at end of file

From 8c1a79dc12f3cc600e16153961cd8cc50ba2c33b Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Thu, 30 Apr 2020 13:07:13 -0700
Subject: [PATCH 330/771] [CMake] Simplify CMake handling for zlib

Rather than handling zlib handling manually, use find_package from CMake
to find zlib properly. Use this to normalize the LLVM_ENABLE_ZLIB,
HAVE_ZLIB, HAVE_ZLIB_H. Furthermore, require zlib if LLVM_ENABLE_ZLIB is
set to YES, which requires the distributor to explicitly select whether
zlib is enabled or not. This simplifies the CMake handling and usage in
the rest of the tooling.

This is a reland of abb0075 with all followup changes and fixes that
should address issues that were reported in PR44780.

Differential Revision: https://reviews.llvm.org/D79219
---
 clang/CMakeLists.txt                          |  4 +++
 clang/test/CMakeLists.txt                     | 11 +-----
 clang/test/lit.site.cfg.py.in                 |  2 +-
 compiler-rt/test/lit.common.configured.in     |  2 +-
 lld/CMakeLists.txt                            |  4 +++
 lld/test/CMakeLists.txt                       | 11 +-----
 lld/test/lit.site.cfg.py.in                   |  2 +-
 .../gdb-remote/GDBRemoteCommunication.cpp     |  4 +--
 .../GDBRemoteCommunicationClient.cpp          |  2 +-
 llvm/cmake/config-ix.cmake                    | 34 +++++--------------
 llvm/include/llvm/Config/config.h.cmake       |  6 ----
 llvm/lib/Support/CMakeLists.txt               | 34 ++++++++++++++++---
 llvm/lib/Support/CRC.cpp                      |  2 +-
 llvm/lib/Support/Compression.cpp              |  4 +--
 llvm/test/CMakeLists.txt                      |  2 +-
 llvm/test/lit.site.cfg.py.in                  |  2 +-
 llvm/unittests/Support/CompressionTest.cpp    |  2 +-
 17 files changed, 60 insertions(+), 68 deletions(-)

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 7f8e0718c2ebc..d35ca8df1f743 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -114,6 +114,10 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR )
   option(CLANG_ENABLE_BOOTSTRAP "Generate the clang bootstrap target" OFF)
   option(LLVM_ENABLE_LIBXML2 "Use libxml2 if available." ON)
 
+  if(LLVM_ENABLE_ZLIB)
+    find_package(ZLIB)
+  endif()
+
   include(AddLLVM)
   include(TableGen)
   include(HandleLLVMOptions)
diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 38bbc5be90d52..334a90498d0da 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -9,15 +9,6 @@ endif ()
 
 string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR})
 
-if(CLANG_BUILT_STANDALONE)
-  # Set HAVE_LIBZ according to recorded LLVM_ENABLE_ZLIB value. This
-  # value is forced to 0 if zlib was not found, so it is fine to use it
-  # instead of HAVE_LIBZ (not recorded).
-  if(LLVM_ENABLE_ZLIB)
-    set(HAVE_LIBZ 1)
-  endif()
-endif()
-
 llvm_canonicalize_cmake_booleans(
   CLANG_BUILD_EXAMPLES
   CLANG_ENABLE_ARCMT
@@ -25,7 +16,7 @@ llvm_canonicalize_cmake_booleans(
   CLANG_SPAWN_CC1
   ENABLE_BACKTRACES
   ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER
-  HAVE_LIBZ
+  LLVM_ENABLE_ZLIB
   LLVM_ENABLE_PER_TARGET_RUNTIME_DIR
   LLVM_ENABLE_PLUGINS
   LLVM_ENABLE_THREADS)
diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in
index d9b5b2f2592e5..286ea06d798c1 100644
--- a/clang/test/lit.site.cfg.py.in
+++ b/clang/test/lit.site.cfg.py.in
@@ -16,7 +16,7 @@ config.host_triple = "@LLVM_HOST_TRIPLE@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.host_cxx = "@CMAKE_CXX_COMPILER@"
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
-config.have_zlib = @HAVE_LIBZ@
+config.have_zlib = @LLVM_ENABLE_ZLIB@
 config.clang_arcmt = @CLANG_ENABLE_ARCMT@
 config.clang_default_cxx_stdlib = "@CLANG_DEFAULT_CXX_STDLIB@"
 config.clang_staticanalyzer = @CLANG_ENABLE_STATIC_ANALYZER@
diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in
index 4a3e268c8a6f9..6686962073ad8 100644
--- a/compiler-rt/test/lit.common.configured.in
+++ b/compiler-rt/test/lit.common.configured.in
@@ -56,7 +56,7 @@ elif config.android:
 else:
   set_default("target_suffix", "-%s" % config.target_arch)
 
-set_default("have_zlib", "@HAVE_LIBZ@")
+set_default("have_zlib", "@LLVM_ENABLE_ZLIB@")
 set_default("libcxx_used", "@LLVM_LIBCXX_USED@")
 
 # LLVM tools dir can be passed in lit parameters, so try to
diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt
index 5090c935e75ad..81ca92621cfaf 100644
--- a/lld/CMakeLists.txt
+++ b/lld/CMakeLists.txt
@@ -51,6 +51,10 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin)
   find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
 
+  if(LLVM_ENABLE_ZLIB)
+    find_package(ZLIB)
+  endif()
+
   include(AddLLVM)
   include(TableGen)
   include(HandleLLVMOptions)
diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt
index 4fbd2534b5a97..e7d1133307393 100644
--- a/lld/test/CMakeLists.txt
+++ b/lld/test/CMakeLists.txt
@@ -4,17 +4,8 @@ set(LLVM_BUILD_MODE "%(build_mode)s")
 set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
 set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/%(build_config)s")
 
-if(LLD_BUILT_STANDALONE)
-  # Set HAVE_LIBZ according to recorded LLVM_ENABLE_ZLIB value. This
-  # value is forced to 0 if zlib was not found, so it is fine to use it
-  # instead of HAVE_LIBZ (not recorded).
-  if(LLVM_ENABLE_ZLIB)
-    set(HAVE_LIBZ 1)
-  endif()
-endif()
-
 llvm_canonicalize_cmake_booleans(
-  HAVE_LIBZ
+  LLVM_ENABLE_ZLIB
   LLVM_LIBXML2_ENABLED
   )
 
diff --git a/lld/test/lit.site.cfg.py.in b/lld/test/lit.site.cfg.py.in
index 4aa2fcda73bb4..3d4c51f4ab647 100644
--- a/lld/test/lit.site.cfg.py.in
+++ b/lld/test/lit.site.cfg.py.in
@@ -14,7 +14,7 @@ config.lld_libs_dir = "@LLVM_LIBRARY_OUTPUT_INTDIR@"
 config.lld_tools_dir = "@LLVM_RUNTIME_OUTPUT_INTDIR@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.python_executable = "@Python3_EXECUTABLE@"
-config.have_zlib = @HAVE_LIBZ@
+config.have_zlib = @LLVM_ENABLE_ZLIB@
 config.sizeof_void_p = @CMAKE_SIZEOF_VOID_P@
 
 # Support substitution of the tools and libs dirs with user parameters. This is
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
index bfacd41dc1a3a..6a60502416959 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
@@ -50,7 +50,7 @@
 #include <compression.h>
 #endif
 
-#if defined(HAVE_LIBZ)
+#if LLVM_ENABLE_ZLIB
 #include <zlib.h>
 #endif
 
@@ -582,7 +582,7 @@ bool GDBRemoteCommunication::DecompressPacket() {
   }
 #endif
 
-#if defined(HAVE_LIBZ)
+#if LLVM_ENABLE_ZLIB
   if (decompressed_bytes == 0 && decompressed_bufsize != ULONG_MAX &&
       decompressed_buffer != nullptr &&
       m_compression_type == CompressionType::ZlibDeflate) {
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index c75d5e106cd02..d77f7a0b5a379 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -1053,7 +1053,7 @@ void GDBRemoteCommunicationClient::MaybeEnableCompression(
   }
 #endif
 
-#if defined(HAVE_LIBZ)
+#if LLVM_ENABLE_ZLIB
   if (avail_type == CompressionType::None) {
     for (auto compression : supported_compressions) {
       if (compression == "zlib-deflate") {
diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 90e5d327c7577..6dc917cbdf67e 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -56,7 +56,6 @@ check_include_file(sys/types.h HAVE_SYS_TYPES_H)
 check_include_file(termios.h HAVE_TERMIOS_H)
 check_include_file(unistd.h HAVE_UNISTD_H)
 check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H)
-check_include_file(zlib.h HAVE_ZLIB_H)
 check_include_file(fenv.h HAVE_FENV_H)
 check_symbol_exists(FE_ALL_EXCEPT "fenv.h" HAVE_DECL_FE_ALL_EXCEPT)
 check_symbol_exists(FE_INEXACT "fenv.h" HAVE_DECL_FE_INEXACT)
@@ -118,19 +117,6 @@ endif()
 # Don't look for these libraries if we're using MSan, since uninstrumented third
 # party code may call MSan interceptors like strlen, leading to false positives.
 if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*")
-  set(HAVE_LIBZ 0)
-  if(LLVM_ENABLE_ZLIB)
-    foreach(library z zlib_static zlib)
-      string(TOUPPER ${library} library_suffix)
-      check_library_exists(${library} compress2 "" HAVE_LIBZ_${library_suffix})
-      if(HAVE_LIBZ_${library_suffix})
-        set(HAVE_LIBZ 1)
-        set(ZLIB_LIBRARIES "${library}")
-        break()
-      endif()
-    endforeach()
-  endif()
-
   # Don't look for these libraries on Windows.
   if (NOT PURE_WINDOWS)
     # Skip libedit if using ASan as it contains memory leaks.
@@ -154,6 +140,15 @@ if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*")
       set(HAVE_TERMINFO 0)
     endif()
 
+    if(LLVM_ENABLE_ZLIB)
+      if(LLVM_ENABLE_ZLIB STREQUAL FORCE_ON)
+        find_package(ZLIB REQUIRED)
+      else()
+        find_package(ZLIB)
+      endif()
+      set(LLVM_ENABLE_ZLIB "${ZLIB_FOUND}")
+    endif()
+
     find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c)
     set(LLVM_LIBXML2_ENABLED 0)
     set(LIBXML2_FOUND 0)
@@ -175,10 +170,6 @@ if (LLVM_ENABLE_LIBXML2 STREQUAL "FORCE_ON" AND NOT LLVM_LIBXML2_ENABLED)
   message(FATAL_ERROR "Failed to congifure libxml2")
 endif()
 
-if (LLVM_ENABLE_ZLIB STREQUAL "FORCE_ON" AND NOT HAVE_LIBZ)
-  message(FATAL_ERROR "Failed to configure zlib")
-endif()
-
 check_library_exists(xar xar_open "" HAVE_LIBXAR)
 if(HAVE_LIBXAR)
   set(XAR_LIB xar)
@@ -517,13 +508,6 @@ else( LLVM_ENABLE_THREADS )
   message(STATUS "Threads disabled.")
 endif()
 
-if (LLVM_ENABLE_ZLIB )
-  # Check if zlib is available in the system.
-  if ( NOT HAVE_ZLIB_H OR NOT HAVE_LIBZ )
-    set(LLVM_ENABLE_ZLIB 0)
-  endif()
-endif()
-
 if (LLVM_ENABLE_DOXYGEN)
   message(STATUS "Doxygen enabled.")
   find_package(Doxygen REQUIRED)
diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake
index 290f74bd02d26..298fab318c10d 100644
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -109,9 +109,6 @@
 /* Define to 1 if you have the `pthread_setname_np' function. */
 #cmakedefine HAVE_PTHREAD_SETNAME_NP ${HAVE_PTHREAD_SETNAME_NP}
 
-/* Define to 1 if you have the `z' library (-lz). */
-#cmakedefine HAVE_LIBZ ${HAVE_LIBZ}
-
 /* Define to 1 if you have the <link.h> header file. */
 #cmakedefine HAVE_LINK_H ${HAVE_LINK_H}
 
@@ -220,9 +217,6 @@
 /* Define to 1 if you have the <valgrind/valgrind.h> header file. */
 #cmakedefine HAVE_VALGRIND_VALGRIND_H ${HAVE_VALGRIND_VALGRIND_H}
 
-/* Define to 1 if you have the <zlib.h> header file. */
-#cmakedefine HAVE_ZLIB_H ${HAVE_ZLIB_H}
-
 /* Have host's _alloca */
 #cmakedefine HAVE__ALLOCA ${HAVE__ALLOCA}
 
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 17bef02307897..51b53c8f66592 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -1,7 +1,7 @@
-set(system_libs)
-if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ )
-  set(system_libs ${system_libs} ${ZLIB_LIBRARIES})
+if(LLVM_ENABLE_ZLIB)
+  set(imported_libs ZLIB::ZLIB)
 endif()
+
 if( MSVC OR MINGW )
   # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc.
   # advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc.
@@ -194,10 +194,34 @@ add_llvm_component_library(LLVMSupport
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/ADT
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support
   ${Backtrace_INCLUDE_DIRS}
-  LINK_LIBS ${system_libs} ${delayload_flags} ${Z3_LINK_FILES}
+  LINK_LIBS ${system_libs} ${imported_libs} ${delayload_flags} ${Z3_LINK_FILES}
   )
 
-set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${system_libs}")
+set(llvm_system_libs ${system_libs})
+
+if(LLVM_ENABLE_ZLIB)
+  string(TOUPPER ${CMAKE_BUILD_TYPE} build_type)
+  get_property(zlib_library TARGET ZLIB::ZLIB PROPERTY LOCATION_${build_type})
+  if(NOT zlib_library)
+    get_property(zlib_library TARGET ZLIB::ZLIB PROPERTY LOCATION)
+  endif()
+  get_filename_component(zlib_library ${zlib_library} NAME)
+  if(CMAKE_STATIC_LIBRARY_PREFIX AND zlib_library MATCHES "^${CMAKE_STATIC_LIBRARY_PREFIX}.*")
+    STRING(REGEX REPLACE "^${CMAKE_STATIC_LIBRARY_PREFIX}" "" zlib_library ${zlib_library})
+  endif()
+  if(CMAKE_STATIC_LIBRARY_SUFFIX AND zlib_library MATCHES ".*${CMAKE_STATIC_LIBRARY_SUFFIX}$")
+    STRING(REGEX REPLACE "${CMAKE_STATIC_LIBRARY_SUFFIX}$" "" zlib_library ${zlib_library})
+  endif()
+  if(CMAKE_SHARED_LIBRARY_PREFIX AND zlib_library MATCHES "^${CMAKE_SHARED_LIBRARY_PREFIX}.*")
+    STRING(REGEX REPLACE "^${CMAKE_SHARED_LIBRARY_PREFIX}" "" zlib_library ${zlib_library})
+  endif()
+  if(CMAKE_SHARED_LIBRARY_SUFFIX AND zlib_library MATCHES ".*${CMAKE_SHARED_LIBRARY_SUFFIX}$")
+    STRING(REGEX REPLACE "${CMAKE_SHARED_LIBRARY_SUFFIX}$" "" zlib_library ${zlib_library})
+  endif()
+  set(llvm_system_libs ${llvm_system_libs} "${zlib_library}")
+endif()
+
+set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${llvm_system_libs}")
 
 if(LLVM_WITH_Z3)
   target_include_directories(LLVMSupport SYSTEM
diff --git a/llvm/lib/Support/CRC.cpp b/llvm/lib/Support/CRC.cpp
index 7ff09debe3b7c..2bc668beed322 100644
--- a/llvm/lib/Support/CRC.cpp
+++ b/llvm/lib/Support/CRC.cpp
@@ -25,7 +25,7 @@
 
 using namespace llvm;
 
-#if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H
+#if !LLVM_ENABLE_ZLIB
 
 static const uint32_t CRCTable[256] = {
     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index 27d92f0e0aec2..b8c77cf69b95f 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -17,13 +17,13 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
-#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H
+#if LLVM_ENABLE_ZLIB
 #include <zlib.h>
 #endif
 
 using namespace llvm;
 
-#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
+#if LLVM_ENABLE_ZLIB
 static Error createError(StringRef Err) {
   return make_error<StringError>(Err, inconvertibleErrorCode());
 }
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 6f826d54a4b9c..3581843300d32 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -1,12 +1,12 @@
 llvm_canonicalize_cmake_booleans(
   BUILD_SHARED_LIBS
   HAVE_LIBXAR
-  HAVE_LIBZ
   HAVE_OCAMLOPT
   HAVE_OCAML_OUNIT
   LLVM_ENABLE_DIA_SDK
   LLVM_ENABLE_FFI
   LLVM_ENABLE_THREADS
+  LLVM_ENABLE_ZLIB
   LLVM_INCLUDE_GO_TESTS
   LLVM_LIBXML2_ENABLED
   LLVM_LINK_LLVM_DYLIB
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
index 190cd4bccc2dc..23beabede0bc8 100644
--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
@@ -33,7 +33,7 @@ config.host_cxx = "@HOST_CXX@"
 config.host_ldflags = '@HOST_LDFLAGS@'
 config.llvm_use_intel_jitevents = @LLVM_USE_INTEL_JITEVENTS@
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
-config.have_zlib = @HAVE_LIBZ@
+config.have_zlib = @LLVM_ENABLE_ZLIB@
 config.have_libxar = @HAVE_LIBXAR@
 config.have_dia_sdk = @LLVM_ENABLE_DIA_SDK@
 config.enable_ffi = @LLVM_ENABLE_FFI@
diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp
index cc7be431b62bc..51723898e950d 100644
--- a/llvm/unittests/Support/CompressionTest.cpp
+++ b/llvm/unittests/Support/CompressionTest.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
 
 namespace {
 
-#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
+#if LLVM_ENABLE_ZLIB
 
 void TestZlibCompression(StringRef Input, int Level) {
   SmallString<32> Compressed;

From e21323a1e9d5b9dcef85310d4dbe21a6d6cd7261 Mon Sep 17 00:00:00 2001
From: Luofan Chen <clfbbn@gmail.com>
Date: Wed, 15 Jul 2020 10:33:55 +0800
Subject: [PATCH 331/771] Revert "[Attributor] [WIP] Track AA dependency using
 dependency graph"

This reverts commit 6b78ed60708b56d85c6d028e9a06ce24ec3c1ae5.
---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 103 ++--------
 llvm/lib/Transforms/IPO/Attributor.cpp        | 192 ++----------------
 .../Transforms/IPO/AttributorAttributes.cpp   |   8 +-
 llvm/test/Transforms/Attributor/depgraph.ll   | 152 --------------
 4 files changed, 37 insertions(+), 418 deletions(-)
 delete mode 100644 llvm/test/Transforms/Attributor/depgraph.ll

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 3378b19834c03..bed180e6717a2 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -97,10 +97,8 @@
 #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
 #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
 
-#include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
@@ -118,15 +116,10 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/DOTGraphTraits.h"
-#include "llvm/Support/GraphWriter.h"
 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
 
 namespace llvm {
 
-struct AADepGraphNode;
-struct AADepGraph;
 struct Attributor;
 struct AbstractAttribute;
 struct InformationCache;
@@ -151,70 +144,6 @@ enum class DepClassTy {
 };
 ///}
 
-/// The data structure for the nodes of a dependency graph
-struct AADepGraphNode {
-public:
-  virtual ~AADepGraphNode(){};
-  using DepTy = PointerIntPair<AADepGraphNode *, 1>;
-
-protected:
-  /// Set of dependency graph nodes which this one depends on.
-  /// The bit encodes if it is optional.
-  TinyPtrVector<DepTy> Deps;
-
-  static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
-  static AbstractAttribute *DepGetValAA(DepTy &DT) {
-    return cast<AbstractAttribute>(DT.getPointer());
-  }
-
-  operator AbstractAttribute *() { return cast<AbstractAttribute>(this); }
-
-public:
-  using iterator =
-      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
-  using aaiterator =
-      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>;
-
-  aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); }
-  aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); }
-  iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); }
-  iterator child_end() { return iterator(Deps.end(), &DepGetVal); }
-
-  virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; }
-  TinyPtrVector<DepTy> &getDeps() { return Deps; }
-
-  friend struct Attributor;
-  friend struct AADepGraph;
-};
-
-struct AADepGraph {
-  AADepGraph() {}
-  ~AADepGraph() {}
-
-  using DepTy = AADepGraphNode::DepTy;
-  static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
-  using iterator =
-      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
-
-  /// There is no root node for the dependency graph. But the SCCIterator
-  /// requires a single entry point, so we maintain a fake("synthetic") root
-  /// node that depends on every node.
-  AADepGraphNode SyntheticRoot;
-
-  AADepGraphNode *GetEntryNode() { return &SyntheticRoot; }
-
-  iterator begin() { return SyntheticRoot.child_begin(); }
-  iterator end() { return SyntheticRoot.child_end(); }
-
-  void viewGraph();
-
-  /// Dump graph to file
-  void dumpGraph();
-
-  /// Print dependency graph
-  void print();
-};
-
 /// Helper to describe and deal with positions in the LLVM-IR.
 ///
 /// A position in the IR is described by an anchor value and an "offset" that
@@ -1072,9 +1001,7 @@ struct Attributor {
     assert(!AAPtr && "Attribute already in map!");
     AAPtr = &AA;
 
-    DG.SyntheticRoot.Deps.push_back(
-        AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED)));
-
+    AllAbstractAttributes.push_back(&AA);
     return AA;
   }
 
@@ -1436,6 +1363,12 @@ struct Attributor {
   /// See getOrCreateAAFor.
   bool shouldSeedAttribute(AbstractAttribute &AA);
 
+  /// The set of all abstract attributes.
+  ///{
+  using AAVector = SmallVector<AbstractAttribute *, 64>;
+  AAVector AllAbstractAttributes;
+  ///}
+
   /// A nested map to lookup abstract attributes based on the argument position
   /// on the outer level, and the addresses of the static member (AAType::ID) on
   /// the inner level.
@@ -1457,9 +1390,6 @@ struct Attributor {
   /// Helper to update an underlying call graph.
   CallGraphUpdater &CGUpdater;
 
-  /// Abstract Attribute dependency graph
-  AADepGraph DG;
-
   /// Set of functions for which we modified the content such that it might
   /// impact the call graph.
   SmallPtrSet<Function *, 8> CGModifiedFunctions;
@@ -1509,8 +1439,6 @@ struct Attributor {
   SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
   SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
   ///}
-
-  friend AADepGraph;
 };
 
 /// An interface to query the internal state of an abstract attribute.
@@ -2083,7 +2011,7 @@ struct IRAttribute : public BaseType {
 ///       both directions will be added in the future.
 /// NOTE: The mechanics of adding a new "concrete" abstract attribute are
 ///       described in the file comment.
-struct AbstractAttribute : public IRPosition, public AADepGraphNode {
+struct AbstractAttribute : public IRPosition {
   using StateType = AbstractState;
 
   AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
@@ -2091,14 +2019,6 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
   /// Virtual destructor.
   virtual ~AbstractAttribute() {}
 
-  /// This function is used to identify if an \p DGN is of type
-  /// AbstractAttribute so that the dyn_cast and cast can use such information
-  /// to cast an AADepGraphNode to an AbstractAttribute.
-  ///
-  /// We eagerly return true here because all AADepGraphNodes except for the
-  /// Synthethis Node are of type AbstractAttribute
-  static bool classof(const AADepGraphNode *DGN) { return true; }
-
   /// Initialize the state with the information in the Attributor \p A.
   ///
   /// This function is called by the Attributor once all abstract attributes
@@ -2120,7 +2040,6 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
   /// Helper functions, for debug purposes only.
   ///{
   virtual void print(raw_ostream &OS) const;
-  virtual void printWithDeps(raw_ostream &OS) const;
   void dump() const { print(dbgs()); }
 
   /// This function should return the "summarized" assumed state as string.
@@ -2168,6 +2087,12 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
   ///
   /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
   virtual ChangeStatus updateImpl(Attributor &A) = 0;
+
+private:
+  /// Set of abstract attributes which were queried by this one. The bit encodes
+  /// if there is an optional of required dependence.
+  using DepTy = PointerIntPair<AbstractAttribute *, 1>;
+  TinyPtrVector<DepTy> Deps;
 };
 
 /// Forward declarations of output streams for debug purposes.
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index f021582c3b7df..f96dac5f3515c 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -15,10 +15,7 @@
 
 #include "llvm/Transforms/IPO/Attributor.h"
 
-#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -28,15 +25,10 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 
 #include <cassert>
-#include <string>
 
 using namespace llvm;
 
@@ -93,23 +85,6 @@ static cl::list<std::string>
                            "allowed to be seeded."),
                   cl::ZeroOrMore, cl::CommaSeparated);
 
-static cl::opt<bool>
-    DumpDepGraph("attributor-dump-dep-graph", cl::Hidden,
-                 cl::desc("Dump the dependency graph to dot files."),
-                 cl::init(false));
-
-static cl::opt<std::string> DepGraphDotFileNamePrefix(
-    "attributor-depgraph-dot-filename-prefix", cl::Hidden,
-    cl::desc("The prefix used for the CallGraph dot file names."));
-
-static cl::opt<bool> ViewDepGraph("attributor-view-dep-graph", cl::Hidden,
-                                  cl::desc("View the dependency graph."),
-                                  cl::init(false));
-
-static cl::opt<bool> PrintDependencies("attributor-print-dep", cl::Hidden,
-                                       cl::desc("Print attribute dependencies"),
-                                       cl::init(false));
-
 /// Logic operators for the change status enum class.
 ///
 ///{
@@ -523,10 +498,8 @@ Attributor::getAssumedConstant(const Value &V, const AbstractAttribute &AA,
 Attributor::~Attributor() {
   // The abstract attributes are allocated via the BumpPtrAllocator Allocator,
   // thus we cannot delete them. We can, and want to, destruct them though.
-  for (auto &DepAA : DG.SyntheticRoot.Deps) {
-    AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
+  for (AbstractAttribute *AA : AllAbstractAttributes)
     AA->~AbstractAttribute();
-  }
 }
 
 bool Attributor::isAssumedDead(const AbstractAttribute &AA,
@@ -931,7 +904,7 @@ bool Attributor::checkForAllReadWriteInstructions(
 
 void Attributor::runTillFixpoint() {
   LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
-                    << DG.SyntheticRoot.Deps.size()
+                    << AllAbstractAttributes.size()
                     << " abstract attributes.\n");
 
   // Now that all abstract attributes are collected and initialized we start
@@ -941,11 +914,11 @@ void Attributor::runTillFixpoint() {
 
   SmallVector<AbstractAttribute *, 32> ChangedAAs;
   SetVector<AbstractAttribute *> Worklist, InvalidAAs;
-  Worklist.insert(DG.SyntheticRoot.begin(), DG.SyntheticRoot.end());
+  Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
 
   do {
     // Remember the size to determine new attributes.
-    size_t NumAAs = DG.SyntheticRoot.Deps.size();
+    size_t NumAAs = AllAbstractAttributes.size();
     LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
                       << ", Worklist size: " << Worklist.size() << "\n");
 
@@ -962,7 +935,7 @@ void Attributor::runTillFixpoint() {
       while (!InvalidAA->Deps.empty()) {
         const auto &Dep = InvalidAA->Deps.back();
         InvalidAA->Deps.pop_back();
-        AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
+        AbstractAttribute *DepAA = Dep.getPointer();
         if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
           Worklist.insert(DepAA);
           continue;
@@ -980,8 +953,7 @@ void Attributor::runTillFixpoint() {
     // changed to the work list.
     for (AbstractAttribute *ChangedAA : ChangedAAs)
       while (!ChangedAA->Deps.empty()) {
-        Worklist.insert(
-            cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
+        Worklist.insert(ChangedAA->Deps.back().getPointer());
         ChangedAA->Deps.pop_back();
       }
 
@@ -1009,8 +981,8 @@ void Attributor::runTillFixpoint() {
 
     // Add attributes to the changed set if they have been created in the last
     // iteration.
-    ChangedAAs.append(DG.SyntheticRoot.begin() + NumAAs,
-                      DG.SyntheticRoot.end());
+    ChangedAAs.append(AllAbstractAttributes.begin() + NumAAs,
+                      AllAbstractAttributes.end());
 
     // Reset the work list and repopulate with the changed abstract attributes.
     // Note that dependent ones are added above.
@@ -1043,8 +1015,7 @@ void Attributor::runTillFixpoint() {
     }
 
     while (!ChangedAA->Deps.empty()) {
-      ChangedAAs.push_back(
-          cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
+      ChangedAAs.push_back(ChangedAA->Deps.back().getPointer());
       ChangedAA->Deps.pop_back();
     }
   }
@@ -1066,13 +1037,12 @@ void Attributor::runTillFixpoint() {
 }
 
 ChangeStatus Attributor::manifestAttributes() {
-  size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
+  size_t NumFinalAAs = AllAbstractAttributes.size();
 
   unsigned NumManifested = 0;
   unsigned NumAtFixpoint = 0;
   ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
-  for (auto &DepAA : DG.SyntheticRoot.Deps) {
-    AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
+  for (AbstractAttribute *AA : AllAbstractAttributes) {
     AbstractState &State = AA->getState();
 
     // If there is not already a fixpoint reached, we can now take the
@@ -1112,14 +1082,11 @@ ChangeStatus Attributor::manifestAttributes() {
   NumAttributesValidFixpoint += NumAtFixpoint;
 
   (void)NumFinalAAs;
-  if (NumFinalAAs != DG.SyntheticRoot.Deps.size()) {
-    for (unsigned u = NumFinalAAs; u < DG.SyntheticRoot.Deps.size(); ++u)
-      errs() << "Unexpected abstract attribute: "
-             << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
+  if (NumFinalAAs != AllAbstractAttributes.size()) {
+    for (unsigned u = NumFinalAAs; u < AllAbstractAttributes.size(); ++u)
+      errs() << "Unexpected abstract attribute: " << *AllAbstractAttributes[u]
              << " :: "
-             << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
-                    ->getIRPosition()
-                    .getAssociatedValue()
+             << AllAbstractAttributes[u]->getIRPosition().getAssociatedValue()
              << "\n";
     llvm_unreachable("Expected the final number of abstract attributes to "
                      "remain unchanged!");
@@ -1298,17 +1265,6 @@ ChangeStatus Attributor::cleanupIR() {
 ChangeStatus Attributor::run() {
   SeedingPeriod = false;
   runTillFixpoint();
-
-  // dump graphs on demand
-  if (DumpDepGraph)
-    DG.dumpGraph();
-
-  if (ViewDepGraph)
-    DG.viewGraph();
-
-  if (PrintDependencies)
-    DG.print();
-
   ChangeStatus ManifestChange = manifestAttributes();
   ChangeStatus CleanupChange = cleanupIR();
   return ManifestChange | CleanupChange;
@@ -2072,31 +2028,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
 }
 
 void AbstractAttribute::print(raw_ostream &OS) const {
-  OS << "[";
-  OS << getName();
-  OS << "] for CtxI ";
-
-  if (auto *I = getCtxI()) {
-    OS << "'";
-    I->print(OS);
-    OS << "'";
-  } else
-    OS << "<<null inst>>";
-
-  OS << " at position " << getIRPosition() << " with state " << getAsStr()
-     << '\n';
-}
-
-void AbstractAttribute::printWithDeps(raw_ostream &OS) const {
-  print(OS);
-
-  for (const auto &DepAA : Deps) {
-    auto *AA = DepAA.getPointer();
-    OS << "  updates ";
-    AA->print(OS);
-  }
-
-  OS << '\n';
+  OS << "[P: " << getIRPosition() << "][" << getAsStr() << "][S: " << getState()
+     << "]";
 }
 ///}
 
@@ -2131,8 +2064,8 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
       NumFnWithoutExactDefinition++;
 
     // We look at internal functions only on-demand but if any use is not a
-    // direct call or outside the current set of analyzed functions, we have
-    // to do it eagerly.
+    // direct call or outside the current set of analyzed functions, we have to
+    // do it eagerly.
     if (F->hasLocalLinkage()) {
       if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
             const auto *CB = dyn_cast<CallBase>(U.getUser());
@@ -2148,53 +2081,11 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
   }
 
   ChangeStatus Changed = A.run();
-
   LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size()
                     << " functions, result: " << Changed << ".\n");
   return Changed == ChangeStatus::CHANGED;
 }
 
-void AADepGraph::viewGraph() { llvm::ViewGraph(this, "Dependency Graph"); }
-
-void AADepGraph::dumpGraph() {
-  static std::atomic<int> CallTimes;
-  std::string Prefix;
-
-  if (!DepGraphDotFileNamePrefix.empty())
-    Prefix = DepGraphDotFileNamePrefix;
-  else
-    Prefix = "dep_graph";
-  std::string Filename =
-      Prefix + "_" + std::to_string(CallTimes.load()) + ".dot";
-
-  outs() << "Dependency graph dump to " << Filename << ".\n";
-
-  std::error_code EC;
-
-  raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
-  if (!EC)
-    llvm::WriteGraph(File, this);
-
-  CallTimes++;
-}
-
-void AADepGraph::print() {
-  SmallVector<AbstractAttribute *, 16> AAs;
-  AAs.reserve(SyntheticRoot.Deps.size());
-
-  for (auto tAA : SyntheticRoot.Deps)
-    AAs.push_back(cast<AbstractAttribute>(tAA.getPointer()));
-
-  llvm::sort(AAs, [](AbstractAttribute *LHS, AbstractAttribute *RHS) {
-    if (LHS->getIdAddr() == RHS->getIdAddr())
-      return LHS < RHS;
-    return LHS->getIdAddr() < RHS->getIdAddr();
-  });
-
-  for (AbstractAttribute *AA : AAs)
-    AA->printWithDeps(outs());
-}
-
 PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
   FunctionAnalysisManager &FAM =
       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -2241,51 +2132,6 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
   return PreservedAnalyses::all();
 }
 
-namespace llvm {
-
-template <> struct GraphTraits<AADepGraphNode *> {
-  using NodeRef = AADepGraphNode *;
-  using DepTy = PointerIntPair<AADepGraphNode *, 1>;
-  using EdgeRef = PointerIntPair<AADepGraphNode *, 1>;
-
-  static NodeRef getEntryNode(AADepGraphNode *DGN) { return DGN; }
-  static NodeRef DepGetVal(DepTy &DT) { return DT.getPointer(); }
-
-  using ChildIteratorType =
-      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
-  using ChildEdgeIteratorType = TinyPtrVector<DepTy>::iterator;
-
-  static ChildIteratorType child_begin(NodeRef N) { return N->child_begin(); }
-
-  static ChildIteratorType child_end(NodeRef N) { return N->child_end(); }
-};
-
-template <>
-struct GraphTraits<AADepGraph *> : public GraphTraits<AADepGraphNode *> {
-  static NodeRef getEntryNode(AADepGraph *DG) { return DG->GetEntryNode(); }
-
-  using nodes_iterator =
-      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
-
-  static nodes_iterator nodes_begin(AADepGraph *DG) { return DG->begin(); }
-
-  static nodes_iterator nodes_end(AADepGraph *DG) { return DG->end(); }
-};
-
-template <> struct DOTGraphTraits<AADepGraph *> : public DefaultDOTGraphTraits {
-  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
-
-  static std::string getNodeLabel(const AADepGraphNode *Node,
-                                  const AADepGraph *DG) {
-    std::string AAString = "";
-    raw_string_ostream O(AAString);
-    Node->print(O);
-    return AAString;
-  }
-};
-
-} // end namespace llvm
-
 namespace {
 
 struct AttributorLegacyPass : public ModulePass {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 510ddf8ad0f74..7e9fd61eeb41e 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1052,10 +1052,9 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
   // map, NewRVsMap.
   decltype(ReturnedValues) NewRVsMap;
 
-  auto HandleReturnValue = [&](Value *RV,
-                               SmallSetVector<ReturnInst *, 4> &RIs) {
-    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV << " by #"
-                      << RIs.size() << " RIs\n");
+  auto HandleReturnValue = [&](Value *RV, SmallSetVector<ReturnInst *, 4> &RIs) {
+    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV
+                      << " by #" << RIs.size() << " RIs\n");
     CallBase *CB = dyn_cast<CallBase>(RV);
     if (!CB || UnresolvedCalls.count(CB))
       return;
@@ -3426,6 +3425,7 @@ struct AADereferenceableFloating : AADereferenceableImpl {
         T.GlobalState &= DS.GlobalState;
       }
 
+
       // For now we do not try to "increase" dereferenceability due to negative
       // indices as we first have to come up with code to deal with loops and
       // for overflows of the dereferenceable bytes.
diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll
deleted file mode 100644
index 70597c875e020..0000000000000
--- a/llvm/test/Transforms/Attributor/depgraph.ll
+++ /dev/null
@@ -1,152 +0,0 @@
-; RUN: opt -passes=attributor-cgscc -disable-output -attributor-print-dep < %s 2>&1 | FileCheck %s --check-prefixes=GRAPH
-; RUN: opt -passes=attributor-cgscc -disable-output -attributor-dump-dep-graph -attributor-depgraph-dot-filename-prefix=%t < %s 2>/dev/null
-; RUN: FileCheck %s -input-file=%t_0.dot --check-prefix=DOT
-
-; Test 0
-;
-; test copied from the attributor introduction video: checkAndAdvance(), and the C code is:
-; int *checkAndAdvance(int * __attribute__((aligned(16))) p) {
-;   if (*p == 0)
-;     return checkAndAdvance(p + 4);
-;   return p;
-; }
-;
-define i32* @checkAndAdvance(i32* align 16 %0) {
-  %2 = load i32, i32* %0, align 4
-  %3 = icmp eq i32 %2, 0
-  br i1 %3, label %4, label %7
-
-4:                                                ; preds = %1
-  %5 = getelementptr inbounds i32, i32* %0, i64 4
-  %6 = call i32* @checkAndAdvance(i32* %5)
-  br label %8
-
-7:                                                ; preds = %1
-  br label %8
-
-8:                                                ; preds = %7, %4
-  %.0 = phi i32* [ %6, %4 ], [ %0, %7 ]
-  ret i32* %.0
-}
-
-;
-; Check for graph
-;
-
-; GRAPH: [AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-
-; GRAPH: [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-; GRAPH:  updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
-; GRAPH:  updates [AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-
-; GRAPH: [AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} 
-; GRAPH:  updates [AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-
-; GRAPH: [AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-; GRAPH:  updates [AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-
-; GRAPH: [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-; GRAPH:  updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-
-; GRAPH: [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
-
-; GRAPH: [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-; GRAPH:  updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-
-; GRAPH: [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
-; GRAPH:  updates [AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
-
-; GRAPH: [AANonNull] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]}
-; GRAPH:  updates [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
-
-; GRAPH: [AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
-; GRAPH:  updates [AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
-
-; GRAPH: [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
-
-; GRAPH: [AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
-; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-
-; GRAPH: [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-
-; GRAPH: [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
-
-; GRAPH: [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-; GRAPH:  updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
-; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-
-; GRAPH: [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly
-; GRAPH:  updates [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
-
-; GRAPH: [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
-; GRAPH:  updates [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
-
-; GRAPH: [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
-; GRAPH:  updates [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
-
-;
-; Check for .dot file
-;
-
-; DOT-DAG: Node[[Node6:0x[a-z0-9]+]] [shape=record,label="{[AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node34:0x[a-z0-9]+]] [shape=record,label="{[AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
-; DOT-DAG: Node[[Node39:0x[a-z0-9]+]] [shape=record,label="{[AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
-; DOT-DAG: Node[[Node7:0x[a-z0-9]+]] [shape=record,label="{[AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node61:0x[a-z0-9]+]] [shape=record,label="{[AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
-; DOT-DAG: Node[[Node13:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node36:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
-; DOT-DAG: Node[[Node62:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
-; DOT-DAG: Node[[Node16:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node35:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
-; DOT-DAG: Node[[Node40:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
-; DOT-DAG: Node[[Node17:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node63:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
-; DOT-DAG: Node[[Node22:0x[a-z0-9]+]] [shape=record,label="{[AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn_ret:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node65:0x[a-z0-9]+]] [shape=record,label="{[AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
-; DOT-DAG: Node[[Node23:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn_ret:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node67:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
-; DOT-DAG: Node[[Node43:0x[a-z0-9]+]] [shape=record,label="{[AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
-; DOT-DAG: Node[[Node45:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
-; DOT-DAG: Node[[Node46:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
-; DOT-DAG: Node[[Node38:0x[a-z0-9]+]] [shape=record,label="{[AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
-; DOT-DAG: Node[[Node55:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position \{flt: [@-1]\}
-; DOT-DAG: Node[[Node31:0x[a-x0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
-
-; DOT-DAG: Node[[Node6]] -> Node[[Node34]]
-; DOT-DAG: Node[[Node6]] -> Node[[Node39]]
-; DOT-DAG: Node[[Node7]] -> Node[[Node61]]
-; DOT-DAG: Node[[Node13]] -> Node[[Node36]]
-; DOT-DAG: Node[[Node13]] -> Node[[Node62]]
-; DOT-DAG: Node[[Node16]] -> Node[[Node34]]
-; DOT-DAG: Node[[Node16]] -> Node[[Node35]]
-; DOT-DAG: Node[[Node16]] -> Node[[Node40]]
-; DOT-DAG: Node[[Node17]] -> Node[[Node63]]
-; DOT-DAG: Node[[Node22]] -> Node[[Node65]]
-; DOT-DAG: Node[[Node23]] -> Node[[Node67]]
-; DOT-DAG: Node[[Node34]] -> Node[[Node43]]
-; DOT-DAG: Node[[Node35]] -> Node[[Node45]]
-; DOT-DAG: Node[[Node36]] -> Node[[Node46]]
-; DOT-DAG: Node[[Node39]] -> Node[[Node38]]
-; DOT-DAG: Node[[Node39]] -> Node[[Node6]]
-; DOT-DAG: Node[[Node40]] -> Node[[Node38]]
-; DOT-DAG: Node[[Node40]] -> Node[[Node16]]
-; DOT-DAG: Node[[Node43]] -> Node[[Node34]]
-; DOT-DAG: Node[[Node45]] -> Node[[Node17]]
-; DOT-DAG: Node[[Node55]] -> Node[[Node55]]
-; DOT-DAG: Node[[Node55]] -> Node[[Node31]]
-; DOT-DAG: Node[[Node55]] -> Node[[Node23]]
-; DOT-DAG: Node[[Node61]] -> Node[[Node7]]
-; DOT-DAG: Node[[Node62]] -> Node[[Node13]]
-; DOT-DAG: Node[[Node63]] -> Node[[Node17]]
-; DOT-DAG: Node[[Node65]] -> Node[[Node22]]
-; DOT-DAG: Node[[Node67]] -> Node[[Node23]]
\ No newline at end of file

From 8df7af560aebce3f3de3541d039e17331c479831 Mon Sep 17 00:00:00 2001
From: Luofan Chen <clfbbn@gmail.com>
Date: Wed, 15 Jul 2020 10:38:21 +0800
Subject: [PATCH 332/771] [Attributor] Track AA dependency using dependency
 graph

Summary: This patch added dependency graph to the attributor so that we can dump the dependencies between AAs more easily. We can also apply general graph algorithms to the graph, making it easier for us to create deep wrappers.

Reviewers: jdoerfert, sstefan1, uenoku, homerdin, baziotis

Reviewed By: jdoerfert

Subscribers: jfb, okura, mgrang, kuter, lebedev.ri, hiraditya, uenoku, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D78861
---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 103 ++++++++--
 llvm/lib/Transforms/IPO/Attributor.cpp        | 192 ++++++++++++++++--
 .../Transforms/IPO/AttributorAttributes.cpp   |   8 +-
 llvm/test/Transforms/Attributor/depgraph.ll   | 152 ++++++++++++++
 4 files changed, 418 insertions(+), 37 deletions(-)
 create mode 100644 llvm/test/Transforms/Attributor/depgraph.ll

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index bed180e6717a2..3378b19834c03 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -97,8 +97,10 @@
 #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
 #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
 
+#include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SCCIterator.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
@@ -116,10 +118,15 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Allocator.h"
+#include "llvm/Support/Casting.h"
+#include "llvm/Support/DOTGraphTraits.h"
+#include "llvm/Support/GraphWriter.h"
 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
 
 namespace llvm {
 
+struct AADepGraphNode;
+struct AADepGraph;
 struct Attributor;
 struct AbstractAttribute;
 struct InformationCache;
@@ -144,6 +151,70 @@ enum class DepClassTy {
 };
 ///}
 
+/// The data structure for the nodes of a dependency graph
+struct AADepGraphNode {
+public:
+  virtual ~AADepGraphNode(){};
+  using DepTy = PointerIntPair<AADepGraphNode *, 1>;
+
+protected:
+  /// Set of dependency graph nodes which this one depends on.
+  /// The bit encodes if it is optional.
+  TinyPtrVector<DepTy> Deps;
+
+  static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
+  static AbstractAttribute *DepGetValAA(DepTy &DT) {
+    return cast<AbstractAttribute>(DT.getPointer());
+  }
+
+  operator AbstractAttribute *() { return cast<AbstractAttribute>(this); }
+
+public:
+  using iterator =
+      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+  using aaiterator =
+      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>;
+
+  aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); }
+  aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); }
+  iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); }
+  iterator child_end() { return iterator(Deps.end(), &DepGetVal); }
+
+  virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; }
+  TinyPtrVector<DepTy> &getDeps() { return Deps; }
+
+  friend struct Attributor;
+  friend struct AADepGraph;
+};
+
+struct AADepGraph {
+  AADepGraph() {}
+  ~AADepGraph() {}
+
+  using DepTy = AADepGraphNode::DepTy;
+  static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
+  using iterator =
+      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+
+  /// There is no root node for the dependency graph. But the SCCIterator
+  /// requires a single entry point, so we maintain a fake("synthetic") root
+  /// node that depends on every node.
+  AADepGraphNode SyntheticRoot;
+
+  AADepGraphNode *GetEntryNode() { return &SyntheticRoot; }
+
+  iterator begin() { return SyntheticRoot.child_begin(); }
+  iterator end() { return SyntheticRoot.child_end(); }
+
+  void viewGraph();
+
+  /// Dump graph to file
+  void dumpGraph();
+
+  /// Print dependency graph
+  void print();
+};
+
 /// Helper to describe and deal with positions in the LLVM-IR.
 ///
 /// A position in the IR is described by an anchor value and an "offset" that
@@ -1001,7 +1072,9 @@ struct Attributor {
     assert(!AAPtr && "Attribute already in map!");
     AAPtr = &AA;
 
-    AllAbstractAttributes.push_back(&AA);
+    DG.SyntheticRoot.Deps.push_back(
+        AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED)));
+
     return AA;
   }
 
@@ -1363,12 +1436,6 @@ struct Attributor {
   /// See getOrCreateAAFor.
   bool shouldSeedAttribute(AbstractAttribute &AA);
 
-  /// The set of all abstract attributes.
-  ///{
-  using AAVector = SmallVector<AbstractAttribute *, 64>;
-  AAVector AllAbstractAttributes;
-  ///}
-
   /// A nested map to lookup abstract attributes based on the argument position
   /// on the outer level, and the addresses of the static member (AAType::ID) on
   /// the inner level.
@@ -1390,6 +1457,9 @@ struct Attributor {
   /// Helper to update an underlying call graph.
   CallGraphUpdater &CGUpdater;
 
+  /// Abstract Attribute dependency graph
+  AADepGraph DG;
+
   /// Set of functions for which we modified the content such that it might
   /// impact the call graph.
   SmallPtrSet<Function *, 8> CGModifiedFunctions;
@@ -1439,6 +1509,8 @@ struct Attributor {
   SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
   SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
   ///}
+
+  friend AADepGraph;
 };
 
 /// An interface to query the internal state of an abstract attribute.
@@ -2011,7 +2083,7 @@ struct IRAttribute : public BaseType {
 ///       both directions will be added in the future.
 /// NOTE: The mechanics of adding a new "concrete" abstract attribute are
 ///       described in the file comment.
-struct AbstractAttribute : public IRPosition {
+struct AbstractAttribute : public IRPosition, public AADepGraphNode {
   using StateType = AbstractState;
 
   AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
@@ -2019,6 +2091,14 @@ struct AbstractAttribute : public IRPosition {
   /// Virtual destructor.
   virtual ~AbstractAttribute() {}
 
+  /// This function is used to identify if an \p DGN is of type
+  /// AbstractAttribute so that the dyn_cast and cast can use such information
+  /// to cast an AADepGraphNode to an AbstractAttribute.
+  ///
+  /// We eagerly return true here because all AADepGraphNodes except for the
+  /// Synthethis Node are of type AbstractAttribute
+  static bool classof(const AADepGraphNode *DGN) { return true; }
+
   /// Initialize the state with the information in the Attributor \p A.
   ///
   /// This function is called by the Attributor once all abstract attributes
@@ -2040,6 +2120,7 @@ struct AbstractAttribute : public IRPosition {
   /// Helper functions, for debug purposes only.
   ///{
   virtual void print(raw_ostream &OS) const;
+  virtual void printWithDeps(raw_ostream &OS) const;
   void dump() const { print(dbgs()); }
 
   /// This function should return the "summarized" assumed state as string.
@@ -2087,12 +2168,6 @@ struct AbstractAttribute : public IRPosition {
   ///
   /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
   virtual ChangeStatus updateImpl(Attributor &A) = 0;
-
-private:
-  /// Set of abstract attributes which were queried by this one. The bit encodes
-  /// if there is an optional of required dependence.
-  using DepTy = PointerIntPair<AbstractAttribute *, 1>;
-  TinyPtrVector<DepTy> Deps;
 };
 
 /// Forward declarations of output streams for debug purposes.
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index f96dac5f3515c..f021582c3b7df 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -15,7 +15,10 @@
 
 #include "llvm/Transforms/IPO/Attributor.h"
 
+#include "llvm/ADT/GraphTraits.h"
+#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -25,10 +28,15 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/GraphWriter.h"
+#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 
 #include <cassert>
+#include <string>
 
 using namespace llvm;
 
@@ -85,6 +93,23 @@ static cl::list<std::string>
                            "allowed to be seeded."),
                   cl::ZeroOrMore, cl::CommaSeparated);
 
+static cl::opt<bool>
+    DumpDepGraph("attributor-dump-dep-graph", cl::Hidden,
+                 cl::desc("Dump the dependency graph to dot files."),
+                 cl::init(false));
+
+static cl::opt<std::string> DepGraphDotFileNamePrefix(
+    "attributor-depgraph-dot-filename-prefix", cl::Hidden,
+    cl::desc("The prefix used for the CallGraph dot file names."));
+
+static cl::opt<bool> ViewDepGraph("attributor-view-dep-graph", cl::Hidden,
+                                  cl::desc("View the dependency graph."),
+                                  cl::init(false));
+
+static cl::opt<bool> PrintDependencies("attributor-print-dep", cl::Hidden,
+                                       cl::desc("Print attribute dependencies"),
+                                       cl::init(false));
+
 /// Logic operators for the change status enum class.
 ///
 ///{
@@ -498,8 +523,10 @@ Attributor::getAssumedConstant(const Value &V, const AbstractAttribute &AA,
 Attributor::~Attributor() {
   // The abstract attributes are allocated via the BumpPtrAllocator Allocator,
   // thus we cannot delete them. We can, and want to, destruct them though.
-  for (AbstractAttribute *AA : AllAbstractAttributes)
+  for (auto &DepAA : DG.SyntheticRoot.Deps) {
+    AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
     AA->~AbstractAttribute();
+  }
 }
 
 bool Attributor::isAssumedDead(const AbstractAttribute &AA,
@@ -904,7 +931,7 @@ bool Attributor::checkForAllReadWriteInstructions(
 
 void Attributor::runTillFixpoint() {
   LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
-                    << AllAbstractAttributes.size()
+                    << DG.SyntheticRoot.Deps.size()
                     << " abstract attributes.\n");
 
   // Now that all abstract attributes are collected and initialized we start
@@ -914,11 +941,11 @@ void Attributor::runTillFixpoint() {
 
   SmallVector<AbstractAttribute *, 32> ChangedAAs;
   SetVector<AbstractAttribute *> Worklist, InvalidAAs;
-  Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
+  Worklist.insert(DG.SyntheticRoot.begin(), DG.SyntheticRoot.end());
 
   do {
     // Remember the size to determine new attributes.
-    size_t NumAAs = AllAbstractAttributes.size();
+    size_t NumAAs = DG.SyntheticRoot.Deps.size();
     LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
                       << ", Worklist size: " << Worklist.size() << "\n");
 
@@ -935,7 +962,7 @@ void Attributor::runTillFixpoint() {
       while (!InvalidAA->Deps.empty()) {
         const auto &Dep = InvalidAA->Deps.back();
         InvalidAA->Deps.pop_back();
-        AbstractAttribute *DepAA = Dep.getPointer();
+        AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
         if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
           Worklist.insert(DepAA);
           continue;
@@ -953,7 +980,8 @@ void Attributor::runTillFixpoint() {
     // changed to the work list.
     for (AbstractAttribute *ChangedAA : ChangedAAs)
       while (!ChangedAA->Deps.empty()) {
-        Worklist.insert(ChangedAA->Deps.back().getPointer());
+        Worklist.insert(
+            cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
         ChangedAA->Deps.pop_back();
       }
 
@@ -981,8 +1009,8 @@ void Attributor::runTillFixpoint() {
 
     // Add attributes to the changed set if they have been created in the last
     // iteration.
-    ChangedAAs.append(AllAbstractAttributes.begin() + NumAAs,
-                      AllAbstractAttributes.end());
+    ChangedAAs.append(DG.SyntheticRoot.begin() + NumAAs,
+                      DG.SyntheticRoot.end());
 
     // Reset the work list and repopulate with the changed abstract attributes.
     // Note that dependent ones are added above.
@@ -1015,7 +1043,8 @@ void Attributor::runTillFixpoint() {
     }
 
     while (!ChangedAA->Deps.empty()) {
-      ChangedAAs.push_back(ChangedAA->Deps.back().getPointer());
+      ChangedAAs.push_back(
+          cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
       ChangedAA->Deps.pop_back();
     }
   }
@@ -1037,12 +1066,13 @@ void Attributor::runTillFixpoint() {
 }
 
 ChangeStatus Attributor::manifestAttributes() {
-  size_t NumFinalAAs = AllAbstractAttributes.size();
+  size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
 
   unsigned NumManifested = 0;
   unsigned NumAtFixpoint = 0;
   ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
-  for (AbstractAttribute *AA : AllAbstractAttributes) {
+  for (auto &DepAA : DG.SyntheticRoot.Deps) {
+    AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
     AbstractState &State = AA->getState();
 
     // If there is not already a fixpoint reached, we can now take the
@@ -1082,11 +1112,14 @@ ChangeStatus Attributor::manifestAttributes() {
   NumAttributesValidFixpoint += NumAtFixpoint;
 
   (void)NumFinalAAs;
-  if (NumFinalAAs != AllAbstractAttributes.size()) {
-    for (unsigned u = NumFinalAAs; u < AllAbstractAttributes.size(); ++u)
-      errs() << "Unexpected abstract attribute: " << *AllAbstractAttributes[u]
+  if (NumFinalAAs != DG.SyntheticRoot.Deps.size()) {
+    for (unsigned u = NumFinalAAs; u < DG.SyntheticRoot.Deps.size(); ++u)
+      errs() << "Unexpected abstract attribute: "
+             << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
              << " :: "
-             << AllAbstractAttributes[u]->getIRPosition().getAssociatedValue()
+             << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
+                    ->getIRPosition()
+                    .getAssociatedValue()
              << "\n";
     llvm_unreachable("Expected the final number of abstract attributes to "
                      "remain unchanged!");
@@ -1265,6 +1298,17 @@ ChangeStatus Attributor::cleanupIR() {
 ChangeStatus Attributor::run() {
   SeedingPeriod = false;
   runTillFixpoint();
+
+  // dump graphs on demand
+  if (DumpDepGraph)
+    DG.dumpGraph();
+
+  if (ViewDepGraph)
+    DG.viewGraph();
+
+  if (PrintDependencies)
+    DG.print();
+
   ChangeStatus ManifestChange = manifestAttributes();
   ChangeStatus CleanupChange = cleanupIR();
   return ManifestChange | CleanupChange;
@@ -2028,8 +2072,31 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
 }
 
 void AbstractAttribute::print(raw_ostream &OS) const {
-  OS << "[P: " << getIRPosition() << "][" << getAsStr() << "][S: " << getState()
-     << "]";
+  OS << "[";
+  OS << getName();
+  OS << "] for CtxI ";
+
+  if (auto *I = getCtxI()) {
+    OS << "'";
+    I->print(OS);
+    OS << "'";
+  } else
+    OS << "<<null inst>>";
+
+  OS << " at position " << getIRPosition() << " with state " << getAsStr()
+     << '\n';
+}
+
+void AbstractAttribute::printWithDeps(raw_ostream &OS) const {
+  print(OS);
+
+  for (const auto &DepAA : Deps) {
+    auto *AA = DepAA.getPointer();
+    OS << "  updates ";
+    AA->print(OS);
+  }
+
+  OS << '\n';
 }
 ///}
 
@@ -2064,8 +2131,8 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
       NumFnWithoutExactDefinition++;
 
     // We look at internal functions only on-demand but if any use is not a
-    // direct call or outside the current set of analyzed functions, we have to
-    // do it eagerly.
+    // direct call or outside the current set of analyzed functions, we have
+    // to do it eagerly.
     if (F->hasLocalLinkage()) {
       if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
             const auto *CB = dyn_cast<CallBase>(U.getUser());
@@ -2081,11 +2148,53 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
   }
 
   ChangeStatus Changed = A.run();
+
   LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size()
                     << " functions, result: " << Changed << ".\n");
   return Changed == ChangeStatus::CHANGED;
 }
 
+void AADepGraph::viewGraph() { llvm::ViewGraph(this, "Dependency Graph"); }
+
+void AADepGraph::dumpGraph() {
+  static std::atomic<int> CallTimes;
+  std::string Prefix;
+
+  if (!DepGraphDotFileNamePrefix.empty())
+    Prefix = DepGraphDotFileNamePrefix;
+  else
+    Prefix = "dep_graph";
+  std::string Filename =
+      Prefix + "_" + std::to_string(CallTimes.load()) + ".dot";
+
+  outs() << "Dependency graph dump to " << Filename << ".\n";
+
+  std::error_code EC;
+
+  raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
+  if (!EC)
+    llvm::WriteGraph(File, this);
+
+  CallTimes++;
+}
+
+void AADepGraph::print() {
+  SmallVector<AbstractAttribute *, 16> AAs;
+  AAs.reserve(SyntheticRoot.Deps.size());
+
+  for (auto tAA : SyntheticRoot.Deps)
+    AAs.push_back(cast<AbstractAttribute>(tAA.getPointer()));
+
+  llvm::sort(AAs, [](AbstractAttribute *LHS, AbstractAttribute *RHS) {
+    if (LHS->getIdAddr() == RHS->getIdAddr())
+      return LHS < RHS;
+    return LHS->getIdAddr() < RHS->getIdAddr();
+  });
+
+  for (AbstractAttribute *AA : AAs)
+    AA->printWithDeps(outs());
+}
+
 PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
   FunctionAnalysisManager &FAM =
       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -2132,6 +2241,51 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
   return PreservedAnalyses::all();
 }
 
+namespace llvm {
+
+template <> struct GraphTraits<AADepGraphNode *> {
+  using NodeRef = AADepGraphNode *;
+  using DepTy = PointerIntPair<AADepGraphNode *, 1>;
+  using EdgeRef = PointerIntPair<AADepGraphNode *, 1>;
+
+  static NodeRef getEntryNode(AADepGraphNode *DGN) { return DGN; }
+  static NodeRef DepGetVal(DepTy &DT) { return DT.getPointer(); }
+
+  using ChildIteratorType =
+      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+  using ChildEdgeIteratorType = TinyPtrVector<DepTy>::iterator;
+
+  static ChildIteratorType child_begin(NodeRef N) { return N->child_begin(); }
+
+  static ChildIteratorType child_end(NodeRef N) { return N->child_end(); }
+};
+
+template <>
+struct GraphTraits<AADepGraph *> : public GraphTraits<AADepGraphNode *> {
+  static NodeRef getEntryNode(AADepGraph *DG) { return DG->GetEntryNode(); }
+
+  using nodes_iterator =
+      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
+
+  static nodes_iterator nodes_begin(AADepGraph *DG) { return DG->begin(); }
+
+  static nodes_iterator nodes_end(AADepGraph *DG) { return DG->end(); }
+};
+
+template <> struct DOTGraphTraits<AADepGraph *> : public DefaultDOTGraphTraits {
+  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
+
+  static std::string getNodeLabel(const AADepGraphNode *Node,
+                                  const AADepGraph *DG) {
+    std::string AAString = "";
+    raw_string_ostream O(AAString);
+    Node->print(O);
+    return AAString;
+  }
+};
+
+} // end namespace llvm
+
 namespace {
 
 struct AttributorLegacyPass : public ModulePass {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 7e9fd61eeb41e..510ddf8ad0f74 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1052,9 +1052,10 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
   // map, NewRVsMap.
   decltype(ReturnedValues) NewRVsMap;
 
-  auto HandleReturnValue = [&](Value *RV, SmallSetVector<ReturnInst *, 4> &RIs) {
-    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV
-                      << " by #" << RIs.size() << " RIs\n");
+  auto HandleReturnValue = [&](Value *RV,
+                               SmallSetVector<ReturnInst *, 4> &RIs) {
+    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV << " by #"
+                      << RIs.size() << " RIs\n");
     CallBase *CB = dyn_cast<CallBase>(RV);
     if (!CB || UnresolvedCalls.count(CB))
       return;
@@ -3425,7 +3426,6 @@ struct AADereferenceableFloating : AADereferenceableImpl {
         T.GlobalState &= DS.GlobalState;
       }
 
-
       // For now we do not try to "increase" dereferenceability due to negative
       // indices as we first have to come up with code to deal with loops and
       // for overflows of the dereferenceable bytes.
diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll
new file mode 100644
index 0000000000000..70597c875e020
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/depgraph.ll
@@ -0,0 +1,152 @@
+; RUN: opt -passes=attributor-cgscc -disable-output -attributor-print-dep < %s 2>&1 | FileCheck %s --check-prefixes=GRAPH
+; RUN: opt -passes=attributor-cgscc -disable-output -attributor-dump-dep-graph -attributor-depgraph-dot-filename-prefix=%t < %s 2>/dev/null
+; RUN: FileCheck %s -input-file=%t_0.dot --check-prefix=DOT
+
+; Test 0
+;
+; test copied from the attributor introduction video: checkAndAdvance(), and the C code is:
+; int *checkAndAdvance(int * __attribute__((aligned(16))) p) {
+;   if (*p == 0)
+;     return checkAndAdvance(p + 4);
+;   return p;
+; }
+;
+define i32* @checkAndAdvance(i32* align 16 %0) {
+  %2 = load i32, i32* %0, align 4
+  %3 = icmp eq i32 %2, 0
+  br i1 %3, label %4, label %7
+
+4:                                                ; preds = %1
+  %5 = getelementptr inbounds i32, i32* %0, i64 4
+  %6 = call i32* @checkAndAdvance(i32* %5)
+  br label %8
+
+7:                                                ; preds = %1
+  br label %8
+
+8:                                                ; preds = %7, %4
+  %.0 = phi i32* [ %6, %4 ], [ %0, %7 ]
+  ret i32* %.0
+}
+
+;
+; Check for graph
+;
+
+; GRAPH: [AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+
+; GRAPH: [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+; GRAPH:  updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
+; GRAPH:  updates [AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+
+; GRAPH: [AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} 
+; GRAPH:  updates [AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+
+; GRAPH: [AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+; GRAPH:  updates [AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+
+; GRAPH: [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+; GRAPH:  updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+
+; GRAPH: [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
+
+; GRAPH: [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+; GRAPH:  updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+
+; GRAPH: [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
+; GRAPH:  updates [AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
+
+; GRAPH: [AANonNull] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]}
+; GRAPH:  updates [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
+
+; GRAPH: [AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
+; GRAPH:  updates [AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
+
+; GRAPH: [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
+
+; GRAPH: [AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
+; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+
+; GRAPH: [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+
+; GRAPH: [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
+; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
+
+; GRAPH: [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
+; GRAPH:  updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
+; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
+
+; GRAPH: [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly
+; GRAPH:  updates [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
+
+; GRAPH: [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
+; GRAPH:  updates [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
+
+; GRAPH: [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
+; GRAPH:  updates [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
+
+;
+; Check for .dot file
+;
+
+; DOT-DAG: Node[[Node6:0x[a-z0-9]+]] [shape=record,label="{[AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node34:0x[a-z0-9]+]] [shape=record,label="{[AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
+; DOT-DAG: Node[[Node39:0x[a-z0-9]+]] [shape=record,label="{[AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
+; DOT-DAG: Node[[Node7:0x[a-z0-9]+]] [shape=record,label="{[AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node61:0x[a-z0-9]+]] [shape=record,label="{[AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
+; DOT-DAG: Node[[Node13:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node36:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
+; DOT-DAG: Node[[Node62:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
+; DOT-DAG: Node[[Node16:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node35:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
+; DOT-DAG: Node[[Node40:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
+; DOT-DAG: Node[[Node17:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node63:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
+; DOT-DAG: Node[[Node22:0x[a-z0-9]+]] [shape=record,label="{[AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn_ret:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node65:0x[a-z0-9]+]] [shape=record,label="{[AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
+; DOT-DAG: Node[[Node23:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn_ret:checkAndAdvance [checkAndAdvance@-1]\}
+; DOT-DAG: Node[[Node67:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
+; DOT-DAG: Node[[Node43:0x[a-z0-9]+]] [shape=record,label="{[AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
+; DOT-DAG: Node[[Node45:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
+; DOT-DAG: Node[[Node46:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
+; DOT-DAG: Node[[Node38:0x[a-z0-9]+]] [shape=record,label="{[AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
+; DOT-DAG: Node[[Node55:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position \{flt: [@-1]\}
+; DOT-DAG: Node[[Node31:0x[a-x0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
+
+; DOT-DAG: Node[[Node6]] -> Node[[Node34]]
+; DOT-DAG: Node[[Node6]] -> Node[[Node39]]
+; DOT-DAG: Node[[Node7]] -> Node[[Node61]]
+; DOT-DAG: Node[[Node13]] -> Node[[Node36]]
+; DOT-DAG: Node[[Node13]] -> Node[[Node62]]
+; DOT-DAG: Node[[Node16]] -> Node[[Node34]]
+; DOT-DAG: Node[[Node16]] -> Node[[Node35]]
+; DOT-DAG: Node[[Node16]] -> Node[[Node40]]
+; DOT-DAG: Node[[Node17]] -> Node[[Node63]]
+; DOT-DAG: Node[[Node22]] -> Node[[Node65]]
+; DOT-DAG: Node[[Node23]] -> Node[[Node67]]
+; DOT-DAG: Node[[Node34]] -> Node[[Node43]]
+; DOT-DAG: Node[[Node35]] -> Node[[Node45]]
+; DOT-DAG: Node[[Node36]] -> Node[[Node46]]
+; DOT-DAG: Node[[Node39]] -> Node[[Node38]]
+; DOT-DAG: Node[[Node39]] -> Node[[Node6]]
+; DOT-DAG: Node[[Node40]] -> Node[[Node38]]
+; DOT-DAG: Node[[Node40]] -> Node[[Node16]]
+; DOT-DAG: Node[[Node43]] -> Node[[Node34]]
+; DOT-DAG: Node[[Node45]] -> Node[[Node17]]
+; DOT-DAG: Node[[Node55]] -> Node[[Node55]]
+; DOT-DAG: Node[[Node55]] -> Node[[Node31]]
+; DOT-DAG: Node[[Node55]] -> Node[[Node23]]
+; DOT-DAG: Node[[Node61]] -> Node[[Node7]]
+; DOT-DAG: Node[[Node62]] -> Node[[Node13]]
+; DOT-DAG: Node[[Node63]] -> Node[[Node17]]
+; DOT-DAG: Node[[Node65]] -> Node[[Node22]]
+; DOT-DAG: Node[[Node67]] -> Node[[Node23]]
\ No newline at end of file

From bcd27d9d73f74f291fbd8b0fd1182e69a327be88 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Tue, 14 Jul 2020 19:56:10 -0700
Subject: [PATCH 333/771] Revert "[CMake] Simplify CMake handling for zlib"

This reverts commit 8c1a79dc12f3cc600e16153961cd8cc50ba2c33b because
it fails when zlib isn't installed.
---
 clang/CMakeLists.txt                          |  4 ---
 clang/test/CMakeLists.txt                     | 11 +++++-
 clang/test/lit.site.cfg.py.in                 |  2 +-
 compiler-rt/test/lit.common.configured.in     |  2 +-
 lld/CMakeLists.txt                            |  4 ---
 lld/test/CMakeLists.txt                       | 11 +++++-
 lld/test/lit.site.cfg.py.in                   |  2 +-
 .../gdb-remote/GDBRemoteCommunication.cpp     |  4 +--
 .../GDBRemoteCommunicationClient.cpp          |  2 +-
 llvm/cmake/config-ix.cmake                    | 34 ++++++++++++++-----
 llvm/include/llvm/Config/config.h.cmake       |  6 ++++
 llvm/lib/Support/CMakeLists.txt               | 34 +++----------------
 llvm/lib/Support/CRC.cpp                      |  2 +-
 llvm/lib/Support/Compression.cpp              |  4 +--
 llvm/test/CMakeLists.txt                      |  2 +-
 llvm/test/lit.site.cfg.py.in                  |  2 +-
 llvm/unittests/Support/CompressionTest.cpp    |  2 +-
 17 files changed, 68 insertions(+), 60 deletions(-)

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index d35ca8df1f743..7f8e0718c2ebc 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -114,10 +114,6 @@ if( CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR )
   option(CLANG_ENABLE_BOOTSTRAP "Generate the clang bootstrap target" OFF)
   option(LLVM_ENABLE_LIBXML2 "Use libxml2 if available." ON)
 
-  if(LLVM_ENABLE_ZLIB)
-    find_package(ZLIB)
-  endif()
-
   include(AddLLVM)
   include(TableGen)
   include(HandleLLVMOptions)
diff --git a/clang/test/CMakeLists.txt b/clang/test/CMakeLists.txt
index 334a90498d0da..38bbc5be90d52 100644
--- a/clang/test/CMakeLists.txt
+++ b/clang/test/CMakeLists.txt
@@ -9,6 +9,15 @@ endif ()
 
 string(REPLACE ${CMAKE_CFG_INTDIR} ${LLVM_BUILD_MODE} CLANG_TOOLS_DIR ${LLVM_RUNTIME_OUTPUT_INTDIR})
 
+if(CLANG_BUILT_STANDALONE)
+  # Set HAVE_LIBZ according to recorded LLVM_ENABLE_ZLIB value. This
+  # value is forced to 0 if zlib was not found, so it is fine to use it
+  # instead of HAVE_LIBZ (not recorded).
+  if(LLVM_ENABLE_ZLIB)
+    set(HAVE_LIBZ 1)
+  endif()
+endif()
+
 llvm_canonicalize_cmake_booleans(
   CLANG_BUILD_EXAMPLES
   CLANG_ENABLE_ARCMT
@@ -16,7 +25,7 @@ llvm_canonicalize_cmake_booleans(
   CLANG_SPAWN_CC1
   ENABLE_BACKTRACES
   ENABLE_EXPERIMENTAL_NEW_PASS_MANAGER
-  LLVM_ENABLE_ZLIB
+  HAVE_LIBZ
   LLVM_ENABLE_PER_TARGET_RUNTIME_DIR
   LLVM_ENABLE_PLUGINS
   LLVM_ENABLE_THREADS)
diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in
index 286ea06d798c1..d9b5b2f2592e5 100644
--- a/clang/test/lit.site.cfg.py.in
+++ b/clang/test/lit.site.cfg.py.in
@@ -16,7 +16,7 @@ config.host_triple = "@LLVM_HOST_TRIPLE@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.host_cxx = "@CMAKE_CXX_COMPILER@"
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
-config.have_zlib = @LLVM_ENABLE_ZLIB@
+config.have_zlib = @HAVE_LIBZ@
 config.clang_arcmt = @CLANG_ENABLE_ARCMT@
 config.clang_default_cxx_stdlib = "@CLANG_DEFAULT_CXX_STDLIB@"
 config.clang_staticanalyzer = @CLANG_ENABLE_STATIC_ANALYZER@
diff --git a/compiler-rt/test/lit.common.configured.in b/compiler-rt/test/lit.common.configured.in
index 6686962073ad8..4a3e268c8a6f9 100644
--- a/compiler-rt/test/lit.common.configured.in
+++ b/compiler-rt/test/lit.common.configured.in
@@ -56,7 +56,7 @@ elif config.android:
 else:
   set_default("target_suffix", "-%s" % config.target_arch)
 
-set_default("have_zlib", "@LLVM_ENABLE_ZLIB@")
+set_default("have_zlib", "@HAVE_LIBZ@")
 set_default("libcxx_used", "@LLVM_LIBCXX_USED@")
 
 # LLVM tools dir can be passed in lit parameters, so try to
diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt
index 81ca92621cfaf..5090c935e75ad 100644
--- a/lld/CMakeLists.txt
+++ b/lld/CMakeLists.txt
@@ -51,10 +51,6 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
   set(LLVM_RUNTIME_OUTPUT_INTDIR ${CMAKE_BINARY_DIR}/${CMAKE_CFG_INTDIR}/bin)
   find_program(LLVM_TABLEGEN_EXE "llvm-tblgen" ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH)
 
-  if(LLVM_ENABLE_ZLIB)
-    find_package(ZLIB)
-  endif()
-
   include(AddLLVM)
   include(TableGen)
   include(HandleLLVMOptions)
diff --git a/lld/test/CMakeLists.txt b/lld/test/CMakeLists.txt
index e7d1133307393..4fbd2534b5a97 100644
--- a/lld/test/CMakeLists.txt
+++ b/lld/test/CMakeLists.txt
@@ -4,8 +4,17 @@ set(LLVM_BUILD_MODE "%(build_mode)s")
 set(LLVM_TOOLS_DIR "${LLVM_TOOLS_BINARY_DIR}/%(build_config)s")
 set(LLVM_LIBS_DIR "${LLVM_BINARY_DIR}/lib${LLVM_LIBDIR_SUFFIX}/%(build_config)s")
 
+if(LLD_BUILT_STANDALONE)
+  # Set HAVE_LIBZ according to recorded LLVM_ENABLE_ZLIB value. This
+  # value is forced to 0 if zlib was not found, so it is fine to use it
+  # instead of HAVE_LIBZ (not recorded).
+  if(LLVM_ENABLE_ZLIB)
+    set(HAVE_LIBZ 1)
+  endif()
+endif()
+
 llvm_canonicalize_cmake_booleans(
-  LLVM_ENABLE_ZLIB
+  HAVE_LIBZ
   LLVM_LIBXML2_ENABLED
   )
 
diff --git a/lld/test/lit.site.cfg.py.in b/lld/test/lit.site.cfg.py.in
index 3d4c51f4ab647..4aa2fcda73bb4 100644
--- a/lld/test/lit.site.cfg.py.in
+++ b/lld/test/lit.site.cfg.py.in
@@ -14,7 +14,7 @@ config.lld_libs_dir = "@LLVM_LIBRARY_OUTPUT_INTDIR@"
 config.lld_tools_dir = "@LLVM_RUNTIME_OUTPUT_INTDIR@"
 config.target_triple = "@TARGET_TRIPLE@"
 config.python_executable = "@Python3_EXECUTABLE@"
-config.have_zlib = @LLVM_ENABLE_ZLIB@
+config.have_zlib = @HAVE_LIBZ@
 config.sizeof_void_p = @CMAKE_SIZEOF_VOID_P@
 
 # Support substitution of the tools and libs dirs with user parameters. This is
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
index 6a60502416959..bfacd41dc1a3a 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunication.cpp
@@ -50,7 +50,7 @@
 #include <compression.h>
 #endif
 
-#if LLVM_ENABLE_ZLIB
+#if defined(HAVE_LIBZ)
 #include <zlib.h>
 #endif
 
@@ -582,7 +582,7 @@ bool GDBRemoteCommunication::DecompressPacket() {
   }
 #endif
 
-#if LLVM_ENABLE_ZLIB
+#if defined(HAVE_LIBZ)
   if (decompressed_bytes == 0 && decompressed_bufsize != ULONG_MAX &&
       decompressed_buffer != nullptr &&
       m_compression_type == CompressionType::ZlibDeflate) {
diff --git a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
index d77f7a0b5a379..c75d5e106cd02 100644
--- a/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
+++ b/lldb/source/Plugins/Process/gdb-remote/GDBRemoteCommunicationClient.cpp
@@ -1053,7 +1053,7 @@ void GDBRemoteCommunicationClient::MaybeEnableCompression(
   }
 #endif
 
-#if LLVM_ENABLE_ZLIB
+#if defined(HAVE_LIBZ)
   if (avail_type == CompressionType::None) {
     for (auto compression : supported_compressions) {
       if (compression == "zlib-deflate") {
diff --git a/llvm/cmake/config-ix.cmake b/llvm/cmake/config-ix.cmake
index 6dc917cbdf67e..90e5d327c7577 100644
--- a/llvm/cmake/config-ix.cmake
+++ b/llvm/cmake/config-ix.cmake
@@ -56,6 +56,7 @@ check_include_file(sys/types.h HAVE_SYS_TYPES_H)
 check_include_file(termios.h HAVE_TERMIOS_H)
 check_include_file(unistd.h HAVE_UNISTD_H)
 check_include_file(valgrind/valgrind.h HAVE_VALGRIND_VALGRIND_H)
+check_include_file(zlib.h HAVE_ZLIB_H)
 check_include_file(fenv.h HAVE_FENV_H)
 check_symbol_exists(FE_ALL_EXCEPT "fenv.h" HAVE_DECL_FE_ALL_EXCEPT)
 check_symbol_exists(FE_INEXACT "fenv.h" HAVE_DECL_FE_INEXACT)
@@ -117,6 +118,19 @@ endif()
 # Don't look for these libraries if we're using MSan, since uninstrumented third
 # party code may call MSan interceptors like strlen, leading to false positives.
 if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*")
+  set(HAVE_LIBZ 0)
+  if(LLVM_ENABLE_ZLIB)
+    foreach(library z zlib_static zlib)
+      string(TOUPPER ${library} library_suffix)
+      check_library_exists(${library} compress2 "" HAVE_LIBZ_${library_suffix})
+      if(HAVE_LIBZ_${library_suffix})
+        set(HAVE_LIBZ 1)
+        set(ZLIB_LIBRARIES "${library}")
+        break()
+      endif()
+    endforeach()
+  endif()
+
   # Don't look for these libraries on Windows.
   if (NOT PURE_WINDOWS)
     # Skip libedit if using ASan as it contains memory leaks.
@@ -140,15 +154,6 @@ if(NOT LLVM_USE_SANITIZER MATCHES "Memory.*")
       set(HAVE_TERMINFO 0)
     endif()
 
-    if(LLVM_ENABLE_ZLIB)
-      if(LLVM_ENABLE_ZLIB STREQUAL FORCE_ON)
-        find_package(ZLIB REQUIRED)
-      else()
-        find_package(ZLIB)
-      endif()
-      set(LLVM_ENABLE_ZLIB "${ZLIB_FOUND}")
-    endif()
-
     find_library(ICONV_LIBRARY_PATH NAMES iconv libiconv libiconv-2 c)
     set(LLVM_LIBXML2_ENABLED 0)
     set(LIBXML2_FOUND 0)
@@ -170,6 +175,10 @@ if (LLVM_ENABLE_LIBXML2 STREQUAL "FORCE_ON" AND NOT LLVM_LIBXML2_ENABLED)
   message(FATAL_ERROR "Failed to congifure libxml2")
 endif()
 
+if (LLVM_ENABLE_ZLIB STREQUAL "FORCE_ON" AND NOT HAVE_LIBZ)
+  message(FATAL_ERROR "Failed to configure zlib")
+endif()
+
 check_library_exists(xar xar_open "" HAVE_LIBXAR)
 if(HAVE_LIBXAR)
   set(XAR_LIB xar)
@@ -508,6 +517,13 @@ else( LLVM_ENABLE_THREADS )
   message(STATUS "Threads disabled.")
 endif()
 
+if (LLVM_ENABLE_ZLIB )
+  # Check if zlib is available in the system.
+  if ( NOT HAVE_ZLIB_H OR NOT HAVE_LIBZ )
+    set(LLVM_ENABLE_ZLIB 0)
+  endif()
+endif()
+
 if (LLVM_ENABLE_DOXYGEN)
   message(STATUS "Doxygen enabled.")
   find_package(Doxygen REQUIRED)
diff --git a/llvm/include/llvm/Config/config.h.cmake b/llvm/include/llvm/Config/config.h.cmake
index 298fab318c10d..290f74bd02d26 100644
--- a/llvm/include/llvm/Config/config.h.cmake
+++ b/llvm/include/llvm/Config/config.h.cmake
@@ -109,6 +109,9 @@
 /* Define to 1 if you have the `pthread_setname_np' function. */
 #cmakedefine HAVE_PTHREAD_SETNAME_NP ${HAVE_PTHREAD_SETNAME_NP}
 
+/* Define to 1 if you have the `z' library (-lz). */
+#cmakedefine HAVE_LIBZ ${HAVE_LIBZ}
+
 /* Define to 1 if you have the <link.h> header file. */
 #cmakedefine HAVE_LINK_H ${HAVE_LINK_H}
 
@@ -217,6 +220,9 @@
 /* Define to 1 if you have the <valgrind/valgrind.h> header file. */
 #cmakedefine HAVE_VALGRIND_VALGRIND_H ${HAVE_VALGRIND_VALGRIND_H}
 
+/* Define to 1 if you have the <zlib.h> header file. */
+#cmakedefine HAVE_ZLIB_H ${HAVE_ZLIB_H}
+
 /* Have host's _alloca */
 #cmakedefine HAVE__ALLOCA ${HAVE__ALLOCA}
 
diff --git a/llvm/lib/Support/CMakeLists.txt b/llvm/lib/Support/CMakeLists.txt
index 51b53c8f66592..17bef02307897 100644
--- a/llvm/lib/Support/CMakeLists.txt
+++ b/llvm/lib/Support/CMakeLists.txt
@@ -1,7 +1,7 @@
-if(LLVM_ENABLE_ZLIB)
-  set(imported_libs ZLIB::ZLIB)
+set(system_libs)
+if ( LLVM_ENABLE_ZLIB AND HAVE_LIBZ )
+  set(system_libs ${system_libs} ${ZLIB_LIBRARIES})
 endif()
-
 if( MSVC OR MINGW )
   # libuuid required for FOLDERID_Profile usage in lib/Support/Windows/Path.inc.
   # advapi32 required for CryptAcquireContextW in lib/Support/Windows/Path.inc.
@@ -194,34 +194,10 @@ add_llvm_component_library(LLVMSupport
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/ADT
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/Support
   ${Backtrace_INCLUDE_DIRS}
-  LINK_LIBS ${system_libs} ${imported_libs} ${delayload_flags} ${Z3_LINK_FILES}
+  LINK_LIBS ${system_libs} ${delayload_flags} ${Z3_LINK_FILES}
   )
 
-set(llvm_system_libs ${system_libs})
-
-if(LLVM_ENABLE_ZLIB)
-  string(TOUPPER ${CMAKE_BUILD_TYPE} build_type)
-  get_property(zlib_library TARGET ZLIB::ZLIB PROPERTY LOCATION_${build_type})
-  if(NOT zlib_library)
-    get_property(zlib_library TARGET ZLIB::ZLIB PROPERTY LOCATION)
-  endif()
-  get_filename_component(zlib_library ${zlib_library} NAME)
-  if(CMAKE_STATIC_LIBRARY_PREFIX AND zlib_library MATCHES "^${CMAKE_STATIC_LIBRARY_PREFIX}.*")
-    STRING(REGEX REPLACE "^${CMAKE_STATIC_LIBRARY_PREFIX}" "" zlib_library ${zlib_library})
-  endif()
-  if(CMAKE_STATIC_LIBRARY_SUFFIX AND zlib_library MATCHES ".*${CMAKE_STATIC_LIBRARY_SUFFIX}$")
-    STRING(REGEX REPLACE "${CMAKE_STATIC_LIBRARY_SUFFIX}$" "" zlib_library ${zlib_library})
-  endif()
-  if(CMAKE_SHARED_LIBRARY_PREFIX AND zlib_library MATCHES "^${CMAKE_SHARED_LIBRARY_PREFIX}.*")
-    STRING(REGEX REPLACE "^${CMAKE_SHARED_LIBRARY_PREFIX}" "" zlib_library ${zlib_library})
-  endif()
-  if(CMAKE_SHARED_LIBRARY_SUFFIX AND zlib_library MATCHES ".*${CMAKE_SHARED_LIBRARY_SUFFIX}$")
-    STRING(REGEX REPLACE "${CMAKE_SHARED_LIBRARY_SUFFIX}$" "" zlib_library ${zlib_library})
-  endif()
-  set(llvm_system_libs ${llvm_system_libs} "${zlib_library}")
-endif()
-
-set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${llvm_system_libs}")
+set_property(TARGET LLVMSupport PROPERTY LLVM_SYSTEM_LIBS "${system_libs}")
 
 if(LLVM_WITH_Z3)
   target_include_directories(LLVMSupport SYSTEM
diff --git a/llvm/lib/Support/CRC.cpp b/llvm/lib/Support/CRC.cpp
index 2bc668beed322..7ff09debe3b7c 100644
--- a/llvm/lib/Support/CRC.cpp
+++ b/llvm/lib/Support/CRC.cpp
@@ -25,7 +25,7 @@
 
 using namespace llvm;
 
-#if !LLVM_ENABLE_ZLIB
+#if LLVM_ENABLE_ZLIB == 0 || !HAVE_ZLIB_H
 
 static const uint32_t CRCTable[256] = {
     0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f,
diff --git a/llvm/lib/Support/Compression.cpp b/llvm/lib/Support/Compression.cpp
index b8c77cf69b95f..27d92f0e0aec2 100644
--- a/llvm/lib/Support/Compression.cpp
+++ b/llvm/lib/Support/Compression.cpp
@@ -17,13 +17,13 @@
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/ErrorHandling.h"
-#if LLVM_ENABLE_ZLIB
+#if LLVM_ENABLE_ZLIB == 1 && HAVE_ZLIB_H
 #include <zlib.h>
 #endif
 
 using namespace llvm;
 
-#if LLVM_ENABLE_ZLIB
+#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
 static Error createError(StringRef Err) {
   return make_error<StringError>(Err, inconvertibleErrorCode());
 }
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 3581843300d32..6f826d54a4b9c 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -1,12 +1,12 @@
 llvm_canonicalize_cmake_booleans(
   BUILD_SHARED_LIBS
   HAVE_LIBXAR
+  HAVE_LIBZ
   HAVE_OCAMLOPT
   HAVE_OCAML_OUNIT
   LLVM_ENABLE_DIA_SDK
   LLVM_ENABLE_FFI
   LLVM_ENABLE_THREADS
-  LLVM_ENABLE_ZLIB
   LLVM_INCLUDE_GO_TESTS
   LLVM_LIBXML2_ENABLED
   LLVM_LINK_LLVM_DYLIB
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
index 23beabede0bc8..190cd4bccc2dc 100644
--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
@@ -33,7 +33,7 @@ config.host_cxx = "@HOST_CXX@"
 config.host_ldflags = '@HOST_LDFLAGS@'
 config.llvm_use_intel_jitevents = @LLVM_USE_INTEL_JITEVENTS@
 config.llvm_use_sanitizer = "@LLVM_USE_SANITIZER@"
-config.have_zlib = @LLVM_ENABLE_ZLIB@
+config.have_zlib = @HAVE_LIBZ@
 config.have_libxar = @HAVE_LIBXAR@
 config.have_dia_sdk = @LLVM_ENABLE_DIA_SDK@
 config.enable_ffi = @LLVM_ENABLE_FFI@
diff --git a/llvm/unittests/Support/CompressionTest.cpp b/llvm/unittests/Support/CompressionTest.cpp
index 51723898e950d..cc7be431b62bc 100644
--- a/llvm/unittests/Support/CompressionTest.cpp
+++ b/llvm/unittests/Support/CompressionTest.cpp
@@ -21,7 +21,7 @@ using namespace llvm;
 
 namespace {
 
-#if LLVM_ENABLE_ZLIB
+#if LLVM_ENABLE_ZLIB == 1 && HAVE_LIBZ
 
 void TestZlibCompression(StringRef Input, int Level) {
   SmallString<32> Compressed;

From fec1f2109f33c9a1a7650272b3bfb8f0f81f6a2b Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Tue, 14 Jul 2020 19:11:30 -0500
Subject: [PATCH 334/771] [OpenMP] Emit remarks during GPU state machine
 optimization

Since D83271 we can optimize the GPU state machine to avoid spurious
call edges that increase the register usage of kernels. With this patch
we inform the user why and if this optimization is happening and when it
is not.

Reviewed By: ye-luo

Differential Revision: https://reviews.llvm.org/D83707
---
 ...rallel_in_multiple_target_state_machines.c | 102 ++++++++++++++++++
 ...remarks_parallel_in_target_state_machine.c |  47 ++++++++
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp         |  74 +++++++++++--
 3 files changed, 216 insertions(+), 7 deletions(-)
 create mode 100644 clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
 create mode 100644 clang/test/OpenMP/remarks_parallel_in_target_state_machine.c

diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
new file mode 100644
index 0000000000000..c5152d401c8b4
--- /dev/null
+++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
@@ -0,0 +1,102 @@
+// RUN: %clang_cc1                                 -verify=host                                                              -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1                                 -verify=all,safe                                                          -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify=all,safe                                                          -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+
+// host-no-diagnostics
+
+void bar1(void) {
+#pragma omp parallel // #0
+                     // all-remark@#0 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // safe-remark@#0 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
+                     // force-remark@#0 {{[UNSAFE] Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will rewrite the state machine use due to command line flag, this can lead to undefined behavior if the parallel region is called from a target region outside this translation unit.}}
+                     // force-remark@#0 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: <NONE>}}
+  {
+  }
+}
+void bar2(void) {
+#pragma omp parallel // #1
+                     // all-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // safe-remark@#1 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
+                     // force-remark@#1 {{[UNSAFE] Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will rewrite the state machine use due to command line flag, this can lead to undefined behavior if the parallel region is called from a target region outside this translation unit.}}
+                     // force-remark@#1 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__6_wrapper, kernel ID: <NONE>}}
+  {
+  }
+}
+
+void foo1(void) {
+#pragma omp target teams // #2
+                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}}
+                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
+  {
+#pragma omp parallel // #3
+                     // all-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}}
+    {
+    }
+    bar1();
+#pragma omp parallel // #4
+                     // all-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
+    {
+    }
+  }
+}
+
+void foo2(void) {
+#pragma omp target teams // #5
+                         // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading_22}}
+                         // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading_22}}
+  {
+#pragma omp parallel // #6
+                     // all-remark@#6 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#6 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading_22}}
+    {
+    }
+    bar1();
+    bar2();
+#pragma omp parallel // #7
+                     // all-remark@#7 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#7 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading_22}}
+    {
+    }
+    bar1();
+    bar2();
+  }
+}
+
+void foo3(void) {
+#pragma omp target teams // #8
+                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__9_wrapper, kernel ID: __omp_offloading_22}}
+                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__10_wrapper, kernel ID: __omp_offloading_22}}
+  {
+#pragma omp parallel // #9
+                     // all-remark@#9 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#9 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__9_wrapper, kernel ID: __omp_offloading_22}}
+    {
+    }
+    bar1();
+    bar2();
+#pragma omp parallel // #10
+                     // all-remark@#10 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
+                     // all-remark@#10 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__10_wrapper, kernel ID: __omp_offloading_22}}
+    {
+    }
+    bar1();
+    bar2();
+  }
+}
+
+void spmd(void) {
+  // Verify we do not emit the remarks above for "SPMD" regions.
+#pragma omp target teams
+#pragma omp parallel
+  {
+  }
+
+#pragma omp target teams distribute parallel for
+  for (int i = 0; i < 100; ++i) {
+  }
+}
+
+// all-remark@* 3 {{OpenMP runtime call __kmpc_global_thread_num moved to}}
+// all-remark@* 3 {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}
diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
new file mode 100644
index 0000000000000..117ef6d46d49a
--- /dev/null
+++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
@@ -0,0 +1,47 @@
+// RUN: %clang_cc1                                 -verify=host -Rpass=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc
+// RUN: %clang_cc1                                 -verify      -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify      -Rpass=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out
+
+// host-no-diagnostics
+
+void bar(void) {
+#pragma omp parallel // #1                                                                                                                                                                                                                                                                                                                                           \
+                     // expected-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
+                     // expected-remark@#1 {{Parallel region is not known to be called from a unique single target region, maybe the surrounding function has external linkage?; will not attempt to rewrite the state machine use.}}
+  {
+  }
+}
+
+void foo(void) {
+#pragma omp target teams // #2                                                                                                                                                                      \
+                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}} \
+                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
+  {
+#pragma omp parallel // #3                                                                                                                                                                                                                                                                                                                                           \
+                     // expected-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
+                     // expected-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}}
+    {
+    }
+    bar();
+#pragma omp parallel // #4                                                                                                                                                                                                                                                                                                                                           \
+                     // expected-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
+                     // expected-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
+    {
+    }
+  }
+}
+
+void spmd(void) {
+  // Verify we do not emit the remarks above for "SPMD" regions.
+#pragma omp target teams
+#pragma omp parallel
+  {
+  }
+
+#pragma omp target teams distribute parallel for
+  for (int i = 0; i < 100; ++i) {
+  }
+}
+
+// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num moved to}}
+// expected-remark@* {{OpenMP runtime call __kmpc_global_thread_num deduplicated}}
diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index 1da47e97e3bd8..bc7e1dc6236e1 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1033,6 +1033,7 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
     // Check if the function is uses in a __kmpc_kernel_prepare_parallel call at
     // all.
     bool UnknownUse = false;
+    bool KernelPrepareUse = false;
     unsigned NumDirectCalls = 0;
 
     SmallVector<Use *, 2> ToBeReplacedStateMachineUses;
@@ -1047,33 +1048,92 @@ bool OpenMPOpt::rewriteDeviceCodeStateMachine() {
         ToBeReplacedStateMachineUses.push_back(&U);
         return;
       }
-      if (OpenMPOpt::getCallIfRegularCall(*U.getUser(),
-                                          &KernelPrepareParallelRFI)) {
+      if (!KernelPrepareUse && OpenMPOpt::getCallIfRegularCall(
+                                   *U.getUser(), &KernelPrepareParallelRFI)) {
+        KernelPrepareUse = true;
         ToBeReplacedStateMachineUses.push_back(&U);
         return;
       }
       UnknownUse = true;
     });
 
-    // If this ever hits, we should investigate.
-    if (UnknownUse || NumDirectCalls != 1)
+    // Do not emit a remark if we haven't seen a __kmpc_kernel_prepare_parallel
+    // use.
+    if (!KernelPrepareUse)
       continue;
 
-    // TODO: This is not a necessary restriction and should be lifted.
-    if (ToBeReplacedStateMachineUses.size() != 2)
+    {
+      auto Remark = [&](OptimizationRemark OR) {
+        return OR << "Found a parallel region that is called in a target "
+                     "region but not part of a combined target construct nor "
+                     "nesed inside a target construct without intermediate "
+                     "code. This can lead to excessive register usage for "
+                     "unrelated target regions in the same translation unit "
+                     "due to spurious call edges assumed by ptxas.";
+      };
+      emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
+    }
+
+    // If this ever hits, we should investigate.
+    // TODO: Checking the number of uses is not a necessary restriction and
+    // should be lifted.
+    if (UnknownUse || NumDirectCalls != 1 ||
+        ToBeReplacedStateMachineUses.size() != 2) {
+      {
+        auto Remark = [&](OptimizationRemark OR) {
+          return OR << "Parallel region is used in "
+                    << (UnknownUse ? "unknown" : "unexpected")
+                    << " ways; will not attempt to rewrite the state machine.";
+        };
+        emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD", Remark);
+      }
       continue;
+    }
 
     // Even if we have __kmpc_kernel_prepare_parallel calls, we (for now) give
     // up if the function is not called from a unique kernel.
     Kernel K = getUniqueKernelFor(*F);
-    if (!K)
+    if (!K) {
+      {
+        auto Remark = [&](OptimizationRemark OR) {
+          return OR << "Parallel region is not known to be called from a "
+                       "unique single target region, maybe the surrounding "
+                       "function has external linkage?; will not attempt to "
+                       "rewrite the state machine use.";
+        };
+        emitRemarkOnFunction(F, "OpenMPParallelRegionInMultipleKernesl",
+                             Remark);
+      }
       continue;
+    }
 
     // We now know F is a parallel body function called only from the kernel K.
     // We also identified the state machine uses in which we replace the
     // function pointer by a new global symbol for identification purposes. This
     // ensures only direct calls to the function are left.
 
+    {
+      auto RemarkParalleRegion = [&](OptimizationRemark OR) {
+        return OR << "Specialize parallel region that is only reached from a "
+                     "single target region to avoid spurious call edges and "
+                     "excessive register usage in other target regions. "
+                     "(parallel region ID: "
+                  << ore::NV("OpenMPParallelRegion", F->getName())
+                  << ", kernel ID: "
+                  << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
+      };
+      emitRemarkOnFunction(F, "OpenMPParallelRegionInNonSPMD",
+                           RemarkParalleRegion);
+      auto RemarkKernel = [&](OptimizationRemark OR) {
+        return OR << "Target region containing the parallel region that is "
+                     "specialized. (parallel region ID: "
+                  << ore::NV("OpenMPParallelRegion", F->getName())
+                  << ", kernel ID: "
+                  << ore::NV("OpenMPTargetRegion", K->getName()) << ")";
+      };
+      emitRemarkOnFunction(K, "OpenMPParallelRegionInNonSPMD", RemarkKernel);
+    }
+
     Module &M = *F->getParent();
     Type *Int8Ty = Type::getInt8Ty(M.getContext());
 

From 64d99a1d0476b8a451c3b36d900e84bc5707c061 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Mon, 13 Jul 2020 21:35:03 -0500
Subject: [PATCH 335/771] [CallGraph] Update callback call sites in
 RefreshCallGraph

Since D82572, we keep "reference" edges for callback call sites. While
not strictly necessary they can improve the traversal order. However, we
did not update them properly in case a pass removed the callback call
site which caused a verification error (PR46687). With this patch we
update these reference edges properly during the invocation of
`CallGraphSCCPass::RefreshCallGraph` in non-checking mode.

Reviewed By: sdmitriev

Differential Revision: https://reviews.llvm.org/D83718
---
 llvm/lib/Analysis/CallGraphSCCPass.cpp        | 51 ++++++++---
 .../OpenMP/parallel_deletion_cg_update.ll     | 89 +++++++++++++++++++
 2 files changed, 127 insertions(+), 13 deletions(-)
 create mode 100644 llvm/test/Transforms/OpenMP/parallel_deletion_cg_update.ll

diff --git a/llvm/lib/Analysis/CallGraphSCCPass.cpp b/llvm/lib/Analysis/CallGraphSCCPass.cpp
index fc65936024af6..91f8029cc326b 100644
--- a/llvm/lib/Analysis/CallGraphSCCPass.cpp
+++ b/llvm/lib/Analysis/CallGraphSCCPass.cpp
@@ -19,6 +19,7 @@
 #include "llvm/ADT/SCCIterator.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CallGraph.h"
+#include "llvm/IR/AbstractCallSite.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRPrintingPasses.h"
 #include "llvm/IR/Intrinsics.h"
@@ -225,11 +226,35 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
     // invalidated and removed.
     unsigned NumDirectRemoved = 0, NumIndirectRemoved = 0;
 
+    CallGraphNode::iterator CGNEnd = CGN->end();
+
+    auto RemoveAndCheckForDone = [&](CallGraphNode::iterator I) {
+      // Just remove the edge from the set of callees, keep track of whether
+      // I points to the last element of the vector.
+      bool WasLast = I + 1 == CGNEnd;
+      CGN->removeCallEdge(I);
+
+      // If I pointed to the last element of the vector, we have to bail out:
+      // iterator checking rejects comparisons of the resultant pointer with
+      // end.
+      if (WasLast)
+        return true;
+
+      CGNEnd = CGN->end();
+      return false;
+    };
+
     // Get the set of call sites currently in the function.
-    for (CallGraphNode::iterator I = CGN->begin(), E = CGN->end(); I != E; ) {
-      // Skip "reference" call records that do not have call instruction.
+    for (CallGraphNode::iterator I = CGN->begin(); I != CGNEnd;) {
+      // Delete "reference" call records that do not have call instruction. We
+      // reinsert them as needed later. However, keep them in checking mode.
       if (!I->first) {
-        ++I;
+        if (CheckingMode) {
+          ++I;
+          continue;
+        }
+        if (RemoveAndCheckForDone(I))
+          break;
         continue;
       }
 
@@ -258,17 +283,8 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
         else
           ++NumDirectRemoved;
 
-        // Just remove the edge from the set of callees, keep track of whether
-        // I points to the last element of the vector.
-        bool WasLast = I + 1 == E;
-        CGN->removeCallEdge(I);
-
-        // If I pointed to the last element of the vector, we have to bail out:
-        // iterator checking rejects comparisons of the resultant pointer with
-        // end.
-        if (WasLast)
+        if (RemoveAndCheckForDone(I))
           break;
-        E = CGN->end();
         continue;
       }
 
@@ -296,6 +312,15 @@ bool CGPassManager::RefreshCallGraph(const CallGraphSCC &CurSCC, CallGraph &CG,
         if (Callee && Callee->isIntrinsic())
           continue;
 
+        // If we are not in checking mode, insert potential callback calls as
+        // references. This is not a requirement but helps to iterate over the
+        // functions in the right order.
+        if (!CheckingMode) {
+          forEachCallbackFunction(*Call, [&](Function *CB) {
+            CGN->addCalledFunction(nullptr, CG.getOrInsertFunction(CB));
+          });
+        }
+
         // If this call site already existed in the callgraph, just verify it
         // matches up to expectations and remove it from Calls.
         DenseMap<Value *, CallGraphNode *>::iterator ExistingIt =
diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion_cg_update.ll b/llvm/test/Transforms/OpenMP/parallel_deletion_cg_update.ll
new file mode 100644
index 0000000000000..168d5bf46b3c8
--- /dev/null
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion_cg_update.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -instcombine -attributor-cgscc -print-callgraph -disable-output -verify 2>&1 | FileCheck %s
+
+; CHECK: Call graph node <<null function>><<{{.*}}>>  #uses=0
+; CHECK:   CS<None> calls function 'dead_fork_call'
+; CHECK:   CS<None> calls function 'd'
+; CHECK:   CS<None> calls function '__kmpc_fork_call'
+; CHECK:   CS<None> calls function 'live_fork_call'
+; CHECK:   CS<None> calls function '.omp_outlined..1'
+;
+; CHECK: Call graph node for function: '.omp_outlined..1'<<{{.*}}>>  #uses=3
+; CHECK:   CS<{{.*}}> calls function 'd'
+;
+; CHECK: Call graph node for function: '__kmpc_fork_call'<<{{.*}}>>  #uses=3
+; CHECK:   CS<None> calls external node
+;
+; CHECK: Call graph node for function: 'd'<<{{.*}}>>  #uses=2
+; CHECK:   CS<None> calls external node
+;
+; CHECK: Call graph node for function: 'dead_fork_call'<<{{.*}}>>  #uses=1
+;
+; CHECK: Call graph node for function: 'dead_fork_call2'<<{{.*}}>>  #uses=0
+; CHECK:   CS<{{.*}}> calls function '__kmpc_fork_call'
+; CHECK:   CS<None> calls function '.omp_outlined..1'
+;
+; CHECK: Call graph node for function: 'live_fork_call'<<{{.*}}>>  #uses=1
+; CHECK:   CS<{{.*}}> calls function '__kmpc_fork_call'
+; CHECK:   CS<None> calls function '.omp_outlined..1'
+
+
+%struct.ident_t = type { i32, i32, i32, i32, i8* }
+
+@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
+@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
+
+define dso_local void @dead_fork_call() {
+entry:
+  br i1 true, label %if.then, label %if.else
+
+if.then:                                          ; preds = %entry
+  br label %if.end
+
+if.else:                                          ; preds = %entry
+  call void @dead_fork_call2()
+  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..0 to void (i32*, i32*, ...)*))
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  ret void
+}
+
+define internal void @dead_fork_call2() {
+entry:
+  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+  ret void
+}
+
+define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+entry:
+  %.global_tid..addr = alloca i32*, align 8
+  %.bound_tid..addr = alloca i32*, align 8
+  store i32* %.global_tid., i32** %.global_tid..addr, align 8
+  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  ret void
+}
+
+declare !callback !2 void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+
+define dso_local void @live_fork_call() {
+entry:
+  call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @0, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*))
+  ret void
+}
+
+define internal void @.omp_outlined..1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+entry:
+  %.global_tid..addr = alloca i32*, align 8
+  %.bound_tid..addr = alloca i32*, align 8
+  store i32* %.global_tid., i32** %.global_tid..addr, align 8
+  store i32* %.bound_tid., i32** %.bound_tid..addr, align 8
+  call void (...) @d()
+  ret void
+}
+
+declare dso_local void @d(...)
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 11.0.0"}
+!2 = !{!3}
+!3 = !{i64 2, i64 -1, i64 -1, i1 true}

From 7af287d0d921471f18b5c3054ce42381c0f973ed Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Sun, 28 Jun 2020 13:41:33 -0500
Subject: [PATCH 336/771] [OpenMP][IRBuilder] Support nested parallel regions

During code generation we might change/add basic blocks so keeping a
list of them is fairly easy to break. Nested parallel regions were
enough. The new scheme does recompute the list of blocks to be outlined
once it is needed.

Reviewed By: anchu-rajendran

Differential Revision: https://reviews.llvm.org/D82722
---
 clang/test/OpenMP/cancel_codegen.cpp          |   8 +-
 .../irbuilder_nested_openmp_parallel_empty.c  | 110 ++++++++++++++++++
 .../llvm/Frontend/OpenMP/OMPIRBuilder.h       |   7 +-
 llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp     |  64 ++++++----
 4 files changed, 158 insertions(+), 31 deletions(-)
 create mode 100644 clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c

diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp
index b7d1cea56721f..a21a9db1e39a8 100644
--- a/clang/test/OpenMP/cancel_codegen.cpp
+++ b/clang/test/OpenMP/cancel_codegen.cpp
@@ -175,7 +175,7 @@ for (int i = 0; i < argc; ++i) {
 
 // IRBUILDER: define internal void @main
 
-// IRBUILDER: [[RETURN:omp.par.exit[^:]*]]
+// IRBUILDER: [[RETURN:omp.par.outlined.exit[^:]*]]
 // IRBUILDER-NEXT: ret void
 // IRBUILDER: [[FLAG:%.+]] = load float, float* @{{.+}},
 
@@ -192,10 +192,8 @@ for (int i = 0; i < argc; ++i) {
 // IRBUILDER: [[CMP:%.+]] = icmp eq i32 [[RES]], 0
 // IRBUILDER: br i1 [[CMP]], label %[[CONTINUE:[^,].+]], label %[[EXIT:.+]]
 // IRBUILDER: [[EXIT]]
-// IRBUILDER: br label %[[EXIT2:.+]]
-// IRBUILDER: [[CONTINUE]]
-// IRBUILDER: br label %[[ELSE:.+]]
-// IRBUILDER: [[EXIT2]]
 // IRBUILDER: br label %[[RETURN]]
+// IRBUILDER: [[CONTINUE]]
+// IRBUILDER: br label %[[ELSE2:.+]]
 
 #endif
diff --git a/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
new file mode 100644
index 0000000000000..552455eb97791
--- /dev/null
+++ b/clang/test/OpenMP/irbuilder_nested_openmp_parallel_empty.c
@@ -0,0 +1,110 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -verify -fopenmp -fopenmp-enable-irbuilder -x c++ -emit-llvm %s -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -o - | FileCheck %s --check-prefixes=ALL,IRBUILDER
+//  %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -std=c++11 -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -emit-pch -o /tmp/t1 %s
+//  %clang_cc1 -fopenmp -fopenmp-enable-irbuilder -x c++ -triple x86_64-unknown-unknown -fexceptions -fcxx-exceptions -debug-info-kind=limited -std=c++11 -include-pch /tmp/t1 -verify %s -emit-llvm -o - | FileCheck --check-prefixes=ALL-DEBUG,IRBUILDER-DEBUG %s
+
+// expected-no-diagnostics
+
+// TODO: Teach the update script to check new functions too.
+
+#ifndef HEADER
+#define HEADER
+
+// ALL-LABEL: @_Z17nested_parallel_0v(
+// ALL-NEXT:  entry:
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// ALL:       omp_parallel:
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 0, void (i32*, i32*, ...)* bitcast (void (i32*, i32*)* @_Z17nested_parallel_0v..omp_par.1 to void (i32*, i32*, ...)*))
+// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT12:%.*]]
+// ALL:       omp.par.outlined.exit12:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// ALL:       omp.par.exit.split:
+// ALL-NEXT:    ret void
+//
+void nested_parallel_0(void) {
+#pragma omp parallel
+  {
+#pragma omp parallel
+    {
+    }
+  }
+}
+
+// ALL-LABEL: @_Z17nested_parallel_1Pfid(
+// ALL-NEXT:  entry:
+// ALL-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// ALL-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// ALL-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// ALL-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// ALL-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// ALL-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// ALL:       omp_parallel:
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_1Pfid..omp_par.2 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT13:%.*]]
+// ALL:       omp.par.outlined.exit13:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// ALL:       omp.par.exit.split:
+// ALL-NEXT:    ret void
+//
+void nested_parallel_1(float *r, int a, double b) {
+#pragma omp parallel
+  {
+#pragma omp parallel
+    {
+      *r = a + b;
+    }
+  }
+}
+
+// ALL-LABEL: @_Z17nested_parallel_2Pfid(
+// ALL-NEXT:  entry:
+// ALL-NEXT:    [[R_ADDR:%.*]] = alloca float*, align 8
+// ALL-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// ALL-NEXT:    [[B_ADDR:%.*]] = alloca double, align 8
+// ALL-NEXT:    store float* [[R:%.*]], float** [[R_ADDR]], align 8
+// ALL-NEXT:    store i32 [[A:%.*]], i32* [[A_ADDR]], align 4
+// ALL-NEXT:    store double [[B:%.*]], double* [[B_ADDR]], align 8
+// ALL-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @1)
+// ALL-NEXT:    br label [[OMP_PARALLEL:%.*]]
+// ALL:       omp_parallel:
+// ALL-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @1, i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, double*, float**)* @_Z17nested_parallel_2Pfid..omp_par.5 to void (i32*, i32*, ...)*), i32* [[A_ADDR]], double* [[B_ADDR]], float** [[R_ADDR]])
+// ALL-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT55:%.*]]
+// ALL:       omp.par.outlined.exit55:
+// ALL-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
+// ALL:       omp.par.exit.split:
+// ALL-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
+// ALL-NEXT:    [[CONV56:%.*]] = sitofp i32 [[TMP0]] to double
+// ALL-NEXT:    [[TMP1:%.*]] = load double, double* [[B_ADDR]], align 8
+// ALL-NEXT:    [[ADD57:%.*]] = fadd double [[CONV56]], [[TMP1]]
+// ALL-NEXT:    [[CONV58:%.*]] = fptrunc double [[ADD57]] to float
+// ALL-NEXT:    [[TMP2:%.*]] = load float*, float** [[R_ADDR]], align 8
+// ALL-NEXT:    store float [[CONV58]], float* [[TMP2]], align 4
+// ALL-NEXT:    ret void
+//
+void nested_parallel_2(float *r, int a, double b) {
+#pragma omp parallel
+  {
+    *r = a + b;
+#pragma omp parallel
+    {
+      *r = a + b;
+#pragma omp parallel
+      {
+        *r = a + b;
+      }
+      *r = a + b;
+#pragma omp parallel
+      {
+        *r = a + b;
+      }
+      *r = a + b;
+    }
+    *r = a + b;
+  }
+  *r = a + b;
+}
+
+#endif
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
index 2a3a64a5f4ac2..95eed59f1b3d0 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h
@@ -285,9 +285,14 @@ class OpenMPIRBuilder {
   /// Helper that contains information about regions we need to outline
   /// during finalization.
   struct OutlineInfo {
-    SmallVector<BasicBlock *, 32> Blocks;
     using PostOutlineCBTy = std::function<void(Function &)>;
     PostOutlineCBTy PostOutlineCB;
+    BasicBlock *EntryBB, *ExitBB;
+
+    /// Collect all blocks in between EntryBB and ExitBB in both the given
+    /// vector and set.
+    void collectBlocks(SmallPtrSetImpl<BasicBlock *> &BlockSet,
+                       SmallVectorImpl<BasicBlock *> &BlockVector);
   };
 
   /// Collection of regions that need to be outlined during finalization.
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index b7212edab6ab2..9468a3aa3c8dd 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -127,13 +127,16 @@ Function *OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID) {
 void OpenMPIRBuilder::initialize() { initializeTypes(M); }
 
 void OpenMPIRBuilder::finalize() {
+  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
+  SmallVector<BasicBlock *, 32> Blocks;
   for (OutlineInfo &OI : OutlineInfos) {
-    assert(!OI.Blocks.empty() &&
-           "Outlined regions should have at least a single block!");
-    BasicBlock *RegEntryBB = OI.Blocks.front();
-    Function *OuterFn = RegEntryBB->getParent();
+    ParallelRegionBlockSet.clear();
+    Blocks.clear();
+    OI.collectBlocks(ParallelRegionBlockSet, Blocks);
+
+    Function *OuterFn = OI.EntryBB->getParent();
     CodeExtractorAnalysisCache CEAC(*OuterFn);
-    CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
+    CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
                             /* AggregateArgs */ false,
                             /* BlockFrequencyInfo */ nullptr,
                             /* BranchProbabilityInfo */ nullptr,
@@ -143,6 +146,8 @@ void OpenMPIRBuilder::finalize() {
                             /* Suffix */ ".omp_par");
 
     LLVM_DEBUG(dbgs() << "Before     outlining: " << *OuterFn << "\n");
+    LLVM_DEBUG(dbgs() << "Entry " << OI.EntryBB->getName()
+                      << " Exit: " << OI.ExitBB->getName() << "\n");
     assert(Extractor.isEligible() &&
            "Expected OpenMP outlining to be possible!");
 
@@ -162,12 +167,12 @@ void OpenMPIRBuilder::finalize() {
     // made our own entry block after all.
     {
       BasicBlock &ArtificialEntry = OutlinedFn->getEntryBlock();
-      assert(ArtificialEntry.getUniqueSuccessor() == RegEntryBB);
-      assert(RegEntryBB->getUniquePredecessor() == &ArtificialEntry);
-      RegEntryBB->moveBefore(&ArtificialEntry);
+      assert(ArtificialEntry.getUniqueSuccessor() == OI.EntryBB);
+      assert(OI.EntryBB->getUniquePredecessor() == &ArtificialEntry);
+      OI.EntryBB->moveBefore(&ArtificialEntry);
       ArtificialEntry.eraseFromParent();
     }
-    assert(&OutlinedFn->getEntryBlock() == RegEntryBB);
+    assert(&OutlinedFn->getEntryBlock() == OI.EntryBB);
     assert(OutlinedFn && OutlinedFn->getNumUses() == 1);
 
     // Run a user callback, e.g. to add attributes.
@@ -614,20 +619,12 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
   InsertPointTy PreFiniIP(PRegPreFiniBB, PRegPreFiniTI->getIterator());
   FiniCB(PreFiniIP);
 
-  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
-  SmallVector<BasicBlock *, 32> Worklist;
-  ParallelRegionBlockSet.insert(PRegEntryBB);
-  ParallelRegionBlockSet.insert(PRegExitBB);
+  OI.EntryBB = PRegEntryBB;
+  OI.ExitBB = PRegExitBB;
 
-  // Collect all blocks in-between PRegEntryBB and PRegExitBB.
-  Worklist.push_back(PRegEntryBB);
-  while (!Worklist.empty()) {
-    BasicBlock *BB = Worklist.pop_back_val();
-    OI.Blocks.push_back(BB);
-    for (BasicBlock *SuccBB : successors(BB))
-      if (ParallelRegionBlockSet.insert(SuccBB).second)
-        Worklist.push_back(SuccBB);
-  }
+  SmallPtrSet<BasicBlock *, 32> ParallelRegionBlockSet;
+  SmallVector<BasicBlock *, 32> Blocks;
+  OI.collectBlocks(ParallelRegionBlockSet, Blocks);
 
   // Ensure a single exit node for the outlined region by creating one.
   // We might have multiple incoming edges to the exit now due to finalizations,
@@ -635,10 +632,10 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
   BasicBlock *PRegOutlinedExitBB = PRegExitBB;
   PRegExitBB = SplitBlock(PRegExitBB, &*PRegExitBB->getFirstInsertionPt());
   PRegOutlinedExitBB->setName("omp.par.outlined.exit");
-  OI.Blocks.push_back(PRegOutlinedExitBB);
+  Blocks.push_back(PRegOutlinedExitBB);
 
   CodeExtractorAnalysisCache CEAC(*OuterFn);
-  CodeExtractor Extractor(OI.Blocks, /* DominatorTree */ nullptr,
+  CodeExtractor Extractor(Blocks, /* DominatorTree */ nullptr,
                           /* AggregateArgs */ false,
                           /* BlockFrequencyInfo */ nullptr,
                           /* BranchProbabilityInfo */ nullptr,
@@ -694,7 +691,7 @@ IRBuilder<>::InsertPoint OpenMPIRBuilder::CreateParallel(
 
   LLVM_DEBUG(dbgs() << "After  privatization: " << *OuterFn << "\n");
   LLVM_DEBUG({
-    for (auto *BB : OI.Blocks)
+    for (auto *BB : Blocks)
       dbgs() << " PBR: " << BB->getName() << "\n";
   });
 
@@ -1112,3 +1109,20 @@ void OpenMPIRBuilder::initializeTypes(Module &M) {
   VarName##Ptr = PointerType::getUnqual(T);
 #include "llvm/Frontend/OpenMP/OMPKinds.def"
 }
+
+void OpenMPIRBuilder::OutlineInfo::collectBlocks(
+    SmallPtrSetImpl<BasicBlock *> &BlockSet,
+    SmallVectorImpl<BasicBlock *> &BlockVector) {
+  SmallVector<BasicBlock *, 32> Worklist;
+  BlockSet.insert(EntryBB);
+  BlockSet.insert(ExitBB);
+
+  Worklist.push_back(EntryBB);
+  while (!Worklist.empty()) {
+    BasicBlock *BB = Worklist.pop_back_val();
+    BlockVector.push_back(BB);
+    for (BasicBlock *SuccBB : successors(BB))
+      if (BlockSet.insert(SuccBB).second)
+        Worklist.push_back(SuccBB);
+  }
+}

From 6db99d18b6bd4f51ee63f6899c7034f027116a57 Mon Sep 17 00:00:00 2001
From: Luofan Chen <clfbbn@gmail.com>
Date: Wed, 15 Jul 2020 11:48:08 +0800
Subject: [PATCH 337/771] Revert "[Attributor] Track AA dependency using
 dependency graph"

This reverts commit 8df7af560aebce3f3de3541d039e17331c479831.
---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 103 ++--------
 llvm/lib/Transforms/IPO/Attributor.cpp        | 192 ++----------------
 .../Transforms/IPO/AttributorAttributes.cpp   |   8 +-
 llvm/test/Transforms/Attributor/depgraph.ll   | 152 --------------
 4 files changed, 37 insertions(+), 418 deletions(-)
 delete mode 100644 llvm/test/Transforms/Attributor/depgraph.ll

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 3378b19834c03..bed180e6717a2 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -97,10 +97,8 @@
 #ifndef LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
 #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
 
-#include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SCCIterator.h"
-#include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
@@ -118,15 +116,10 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/PassManager.h"
 #include "llvm/Support/Allocator.h"
-#include "llvm/Support/Casting.h"
-#include "llvm/Support/DOTGraphTraits.h"
-#include "llvm/Support/GraphWriter.h"
 #include "llvm/Transforms/Utils/CallGraphUpdater.h"
 
 namespace llvm {
 
-struct AADepGraphNode;
-struct AADepGraph;
 struct Attributor;
 struct AbstractAttribute;
 struct InformationCache;
@@ -151,70 +144,6 @@ enum class DepClassTy {
 };
 ///}
 
-/// The data structure for the nodes of a dependency graph
-struct AADepGraphNode {
-public:
-  virtual ~AADepGraphNode(){};
-  using DepTy = PointerIntPair<AADepGraphNode *, 1>;
-
-protected:
-  /// Set of dependency graph nodes which this one depends on.
-  /// The bit encodes if it is optional.
-  TinyPtrVector<DepTy> Deps;
-
-  static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
-  static AbstractAttribute *DepGetValAA(DepTy &DT) {
-    return cast<AbstractAttribute>(DT.getPointer());
-  }
-
-  operator AbstractAttribute *() { return cast<AbstractAttribute>(this); }
-
-public:
-  using iterator =
-      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
-  using aaiterator =
-      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetValAA)>;
-
-  aaiterator begin() { return aaiterator(Deps.begin(), &DepGetValAA); }
-  aaiterator end() { return aaiterator(Deps.end(), &DepGetValAA); }
-  iterator child_begin() { return iterator(Deps.begin(), &DepGetVal); }
-  iterator child_end() { return iterator(Deps.end(), &DepGetVal); }
-
-  virtual void print(raw_ostream &OS) const { OS << "AADepNode Impl\n"; }
-  TinyPtrVector<DepTy> &getDeps() { return Deps; }
-
-  friend struct Attributor;
-  friend struct AADepGraph;
-};
-
-struct AADepGraph {
-  AADepGraph() {}
-  ~AADepGraph() {}
-
-  using DepTy = AADepGraphNode::DepTy;
-  static AADepGraphNode *DepGetVal(DepTy &DT) { return DT.getPointer(); }
-  using iterator =
-      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
-
-  /// There is no root node for the dependency graph. But the SCCIterator
-  /// requires a single entry point, so we maintain a fake("synthetic") root
-  /// node that depends on every node.
-  AADepGraphNode SyntheticRoot;
-
-  AADepGraphNode *GetEntryNode() { return &SyntheticRoot; }
-
-  iterator begin() { return SyntheticRoot.child_begin(); }
-  iterator end() { return SyntheticRoot.child_end(); }
-
-  void viewGraph();
-
-  /// Dump graph to file
-  void dumpGraph();
-
-  /// Print dependency graph
-  void print();
-};
-
 /// Helper to describe and deal with positions in the LLVM-IR.
 ///
 /// A position in the IR is described by an anchor value and an "offset" that
@@ -1072,9 +1001,7 @@ struct Attributor {
     assert(!AAPtr && "Attribute already in map!");
     AAPtr = &AA;
 
-    DG.SyntheticRoot.Deps.push_back(
-        AADepGraphNode::DepTy(&AA, unsigned(DepClassTy::REQUIRED)));
-
+    AllAbstractAttributes.push_back(&AA);
     return AA;
   }
 
@@ -1436,6 +1363,12 @@ struct Attributor {
   /// See getOrCreateAAFor.
   bool shouldSeedAttribute(AbstractAttribute &AA);
 
+  /// The set of all abstract attributes.
+  ///{
+  using AAVector = SmallVector<AbstractAttribute *, 64>;
+  AAVector AllAbstractAttributes;
+  ///}
+
   /// A nested map to lookup abstract attributes based on the argument position
   /// on the outer level, and the addresses of the static member (AAType::ID) on
   /// the inner level.
@@ -1457,9 +1390,6 @@ struct Attributor {
   /// Helper to update an underlying call graph.
   CallGraphUpdater &CGUpdater;
 
-  /// Abstract Attribute dependency graph
-  AADepGraph DG;
-
   /// Set of functions for which we modified the content such that it might
   /// impact the call graph.
   SmallPtrSet<Function *, 8> CGModifiedFunctions;
@@ -1509,8 +1439,6 @@ struct Attributor {
   SmallPtrSet<BasicBlock *, 8> ToBeDeletedBlocks;
   SmallDenseSet<WeakVH, 8> ToBeDeletedInsts;
   ///}
-
-  friend AADepGraph;
 };
 
 /// An interface to query the internal state of an abstract attribute.
@@ -2083,7 +2011,7 @@ struct IRAttribute : public BaseType {
 ///       both directions will be added in the future.
 /// NOTE: The mechanics of adding a new "concrete" abstract attribute are
 ///       described in the file comment.
-struct AbstractAttribute : public IRPosition, public AADepGraphNode {
+struct AbstractAttribute : public IRPosition {
   using StateType = AbstractState;
 
   AbstractAttribute(const IRPosition &IRP) : IRPosition(IRP) {}
@@ -2091,14 +2019,6 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
   /// Virtual destructor.
   virtual ~AbstractAttribute() {}
 
-  /// This function is used to identify if an \p DGN is of type
-  /// AbstractAttribute so that the dyn_cast and cast can use such information
-  /// to cast an AADepGraphNode to an AbstractAttribute.
-  ///
-  /// We eagerly return true here because all AADepGraphNodes except for the
-  /// Synthethis Node are of type AbstractAttribute
-  static bool classof(const AADepGraphNode *DGN) { return true; }
-
   /// Initialize the state with the information in the Attributor \p A.
   ///
   /// This function is called by the Attributor once all abstract attributes
@@ -2120,7 +2040,6 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
   /// Helper functions, for debug purposes only.
   ///{
   virtual void print(raw_ostream &OS) const;
-  virtual void printWithDeps(raw_ostream &OS) const;
   void dump() const { print(dbgs()); }
 
   /// This function should return the "summarized" assumed state as string.
@@ -2168,6 +2087,12 @@ struct AbstractAttribute : public IRPosition, public AADepGraphNode {
   ///
   /// \Return CHANGED if the internal state changed, otherwise UNCHANGED.
   virtual ChangeStatus updateImpl(Attributor &A) = 0;
+
+private:
+  /// Set of abstract attributes which were queried by this one. The bit encodes
+  /// if there is an optional of required dependence.
+  using DepTy = PointerIntPair<AbstractAttribute *, 1>;
+  TinyPtrVector<DepTy> Deps;
 };
 
 /// Forward declarations of output streams for debug purposes.
diff --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index f021582c3b7df..f96dac5f3515c 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -15,10 +15,7 @@
 
 #include "llvm/Transforms/IPO/Attributor.h"
 
-#include "llvm/ADT/GraphTraits.h"
-#include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/LazyValueInfo.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/ValueTracking.h"
@@ -28,15 +25,10 @@
 #include "llvm/InitializePasses.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/FileSystem.h"
-#include "llvm/Support/GraphWriter.h"
-#include "llvm/Support/raw_ostream.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 
 #include <cassert>
-#include <string>
 
 using namespace llvm;
 
@@ -93,23 +85,6 @@ static cl::list<std::string>
                            "allowed to be seeded."),
                   cl::ZeroOrMore, cl::CommaSeparated);
 
-static cl::opt<bool>
-    DumpDepGraph("attributor-dump-dep-graph", cl::Hidden,
-                 cl::desc("Dump the dependency graph to dot files."),
-                 cl::init(false));
-
-static cl::opt<std::string> DepGraphDotFileNamePrefix(
-    "attributor-depgraph-dot-filename-prefix", cl::Hidden,
-    cl::desc("The prefix used for the CallGraph dot file names."));
-
-static cl::opt<bool> ViewDepGraph("attributor-view-dep-graph", cl::Hidden,
-                                  cl::desc("View the dependency graph."),
-                                  cl::init(false));
-
-static cl::opt<bool> PrintDependencies("attributor-print-dep", cl::Hidden,
-                                       cl::desc("Print attribute dependencies"),
-                                       cl::init(false));
-
 /// Logic operators for the change status enum class.
 ///
 ///{
@@ -523,10 +498,8 @@ Attributor::getAssumedConstant(const Value &V, const AbstractAttribute &AA,
 Attributor::~Attributor() {
   // The abstract attributes are allocated via the BumpPtrAllocator Allocator,
   // thus we cannot delete them. We can, and want to, destruct them though.
-  for (auto &DepAA : DG.SyntheticRoot.Deps) {
-    AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
+  for (AbstractAttribute *AA : AllAbstractAttributes)
     AA->~AbstractAttribute();
-  }
 }
 
 bool Attributor::isAssumedDead(const AbstractAttribute &AA,
@@ -931,7 +904,7 @@ bool Attributor::checkForAllReadWriteInstructions(
 
 void Attributor::runTillFixpoint() {
   LLVM_DEBUG(dbgs() << "[Attributor] Identified and initialized "
-                    << DG.SyntheticRoot.Deps.size()
+                    << AllAbstractAttributes.size()
                     << " abstract attributes.\n");
 
   // Now that all abstract attributes are collected and initialized we start
@@ -941,11 +914,11 @@ void Attributor::runTillFixpoint() {
 
   SmallVector<AbstractAttribute *, 32> ChangedAAs;
   SetVector<AbstractAttribute *> Worklist, InvalidAAs;
-  Worklist.insert(DG.SyntheticRoot.begin(), DG.SyntheticRoot.end());
+  Worklist.insert(AllAbstractAttributes.begin(), AllAbstractAttributes.end());
 
   do {
     // Remember the size to determine new attributes.
-    size_t NumAAs = DG.SyntheticRoot.Deps.size();
+    size_t NumAAs = AllAbstractAttributes.size();
     LLVM_DEBUG(dbgs() << "\n\n[Attributor] #Iteration: " << IterationCounter
                       << ", Worklist size: " << Worklist.size() << "\n");
 
@@ -962,7 +935,7 @@ void Attributor::runTillFixpoint() {
       while (!InvalidAA->Deps.empty()) {
         const auto &Dep = InvalidAA->Deps.back();
         InvalidAA->Deps.pop_back();
-        AbstractAttribute *DepAA = cast<AbstractAttribute>(Dep.getPointer());
+        AbstractAttribute *DepAA = Dep.getPointer();
         if (Dep.getInt() == unsigned(DepClassTy::OPTIONAL)) {
           Worklist.insert(DepAA);
           continue;
@@ -980,8 +953,7 @@ void Attributor::runTillFixpoint() {
     // changed to the work list.
     for (AbstractAttribute *ChangedAA : ChangedAAs)
       while (!ChangedAA->Deps.empty()) {
-        Worklist.insert(
-            cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
+        Worklist.insert(ChangedAA->Deps.back().getPointer());
         ChangedAA->Deps.pop_back();
       }
 
@@ -1009,8 +981,8 @@ void Attributor::runTillFixpoint() {
 
     // Add attributes to the changed set if they have been created in the last
     // iteration.
-    ChangedAAs.append(DG.SyntheticRoot.begin() + NumAAs,
-                      DG.SyntheticRoot.end());
+    ChangedAAs.append(AllAbstractAttributes.begin() + NumAAs,
+                      AllAbstractAttributes.end());
 
     // Reset the work list and repopulate with the changed abstract attributes.
     // Note that dependent ones are added above.
@@ -1043,8 +1015,7 @@ void Attributor::runTillFixpoint() {
     }
 
     while (!ChangedAA->Deps.empty()) {
-      ChangedAAs.push_back(
-          cast<AbstractAttribute>(ChangedAA->Deps.back().getPointer()));
+      ChangedAAs.push_back(ChangedAA->Deps.back().getPointer());
       ChangedAA->Deps.pop_back();
     }
   }
@@ -1066,13 +1037,12 @@ void Attributor::runTillFixpoint() {
 }
 
 ChangeStatus Attributor::manifestAttributes() {
-  size_t NumFinalAAs = DG.SyntheticRoot.Deps.size();
+  size_t NumFinalAAs = AllAbstractAttributes.size();
 
   unsigned NumManifested = 0;
   unsigned NumAtFixpoint = 0;
   ChangeStatus ManifestChange = ChangeStatus::UNCHANGED;
-  for (auto &DepAA : DG.SyntheticRoot.Deps) {
-    AbstractAttribute *AA = cast<AbstractAttribute>(DepAA.getPointer());
+  for (AbstractAttribute *AA : AllAbstractAttributes) {
     AbstractState &State = AA->getState();
 
     // If there is not already a fixpoint reached, we can now take the
@@ -1112,14 +1082,11 @@ ChangeStatus Attributor::manifestAttributes() {
   NumAttributesValidFixpoint += NumAtFixpoint;
 
   (void)NumFinalAAs;
-  if (NumFinalAAs != DG.SyntheticRoot.Deps.size()) {
-    for (unsigned u = NumFinalAAs; u < DG.SyntheticRoot.Deps.size(); ++u)
-      errs() << "Unexpected abstract attribute: "
-             << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
+  if (NumFinalAAs != AllAbstractAttributes.size()) {
+    for (unsigned u = NumFinalAAs; u < AllAbstractAttributes.size(); ++u)
+      errs() << "Unexpected abstract attribute: " << *AllAbstractAttributes[u]
              << " :: "
-             << cast<AbstractAttribute>(DG.SyntheticRoot.Deps[u].getPointer())
-                    ->getIRPosition()
-                    .getAssociatedValue()
+             << AllAbstractAttributes[u]->getIRPosition().getAssociatedValue()
              << "\n";
     llvm_unreachable("Expected the final number of abstract attributes to "
                      "remain unchanged!");
@@ -1298,17 +1265,6 @@ ChangeStatus Attributor::cleanupIR() {
 ChangeStatus Attributor::run() {
   SeedingPeriod = false;
   runTillFixpoint();
-
-  // dump graphs on demand
-  if (DumpDepGraph)
-    DG.dumpGraph();
-
-  if (ViewDepGraph)
-    DG.viewGraph();
-
-  if (PrintDependencies)
-    DG.print();
-
   ChangeStatus ManifestChange = manifestAttributes();
   ChangeStatus CleanupChange = cleanupIR();
   return ManifestChange | CleanupChange;
@@ -2072,31 +2028,8 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const AbstractAttribute &AA) {
 }
 
 void AbstractAttribute::print(raw_ostream &OS) const {
-  OS << "[";
-  OS << getName();
-  OS << "] for CtxI ";
-
-  if (auto *I = getCtxI()) {
-    OS << "'";
-    I->print(OS);
-    OS << "'";
-  } else
-    OS << "<<null inst>>";
-
-  OS << " at position " << getIRPosition() << " with state " << getAsStr()
-     << '\n';
-}
-
-void AbstractAttribute::printWithDeps(raw_ostream &OS) const {
-  print(OS);
-
-  for (const auto &DepAA : Deps) {
-    auto *AA = DepAA.getPointer();
-    OS << "  updates ";
-    AA->print(OS);
-  }
-
-  OS << '\n';
+  OS << "[P: " << getIRPosition() << "][" << getAsStr() << "][S: " << getState()
+     << "]";
 }
 ///}
 
@@ -2131,8 +2064,8 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
       NumFnWithoutExactDefinition++;
 
     // We look at internal functions only on-demand but if any use is not a
-    // direct call or outside the current set of analyzed functions, we have
-    // to do it eagerly.
+    // direct call or outside the current set of analyzed functions, we have to
+    // do it eagerly.
     if (F->hasLocalLinkage()) {
       if (llvm::all_of(F->uses(), [&Functions](const Use &U) {
             const auto *CB = dyn_cast<CallBase>(U.getUser());
@@ -2148,53 +2081,11 @@ static bool runAttributorOnFunctions(InformationCache &InfoCache,
   }
 
   ChangeStatus Changed = A.run();
-
   LLVM_DEBUG(dbgs() << "[Attributor] Done with " << Functions.size()
                     << " functions, result: " << Changed << ".\n");
   return Changed == ChangeStatus::CHANGED;
 }
 
-void AADepGraph::viewGraph() { llvm::ViewGraph(this, "Dependency Graph"); }
-
-void AADepGraph::dumpGraph() {
-  static std::atomic<int> CallTimes;
-  std::string Prefix;
-
-  if (!DepGraphDotFileNamePrefix.empty())
-    Prefix = DepGraphDotFileNamePrefix;
-  else
-    Prefix = "dep_graph";
-  std::string Filename =
-      Prefix + "_" + std::to_string(CallTimes.load()) + ".dot";
-
-  outs() << "Dependency graph dump to " << Filename << ".\n";
-
-  std::error_code EC;
-
-  raw_fd_ostream File(Filename, EC, sys::fs::OF_Text);
-  if (!EC)
-    llvm::WriteGraph(File, this);
-
-  CallTimes++;
-}
-
-void AADepGraph::print() {
-  SmallVector<AbstractAttribute *, 16> AAs;
-  AAs.reserve(SyntheticRoot.Deps.size());
-
-  for (auto tAA : SyntheticRoot.Deps)
-    AAs.push_back(cast<AbstractAttribute>(tAA.getPointer()));
-
-  llvm::sort(AAs, [](AbstractAttribute *LHS, AbstractAttribute *RHS) {
-    if (LHS->getIdAddr() == RHS->getIdAddr())
-      return LHS < RHS;
-    return LHS->getIdAddr() < RHS->getIdAddr();
-  });
-
-  for (AbstractAttribute *AA : AAs)
-    AA->printWithDeps(outs());
-}
-
 PreservedAnalyses AttributorPass::run(Module &M, ModuleAnalysisManager &AM) {
   FunctionAnalysisManager &FAM =
       AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
@@ -2241,51 +2132,6 @@ PreservedAnalyses AttributorCGSCCPass::run(LazyCallGraph::SCC &C,
   return PreservedAnalyses::all();
 }
 
-namespace llvm {
-
-template <> struct GraphTraits<AADepGraphNode *> {
-  using NodeRef = AADepGraphNode *;
-  using DepTy = PointerIntPair<AADepGraphNode *, 1>;
-  using EdgeRef = PointerIntPair<AADepGraphNode *, 1>;
-
-  static NodeRef getEntryNode(AADepGraphNode *DGN) { return DGN; }
-  static NodeRef DepGetVal(DepTy &DT) { return DT.getPointer(); }
-
-  using ChildIteratorType =
-      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
-  using ChildEdgeIteratorType = TinyPtrVector<DepTy>::iterator;
-
-  static ChildIteratorType child_begin(NodeRef N) { return N->child_begin(); }
-
-  static ChildIteratorType child_end(NodeRef N) { return N->child_end(); }
-};
-
-template <>
-struct GraphTraits<AADepGraph *> : public GraphTraits<AADepGraphNode *> {
-  static NodeRef getEntryNode(AADepGraph *DG) { return DG->GetEntryNode(); }
-
-  using nodes_iterator =
-      mapped_iterator<TinyPtrVector<DepTy>::iterator, decltype(&DepGetVal)>;
-
-  static nodes_iterator nodes_begin(AADepGraph *DG) { return DG->begin(); }
-
-  static nodes_iterator nodes_end(AADepGraph *DG) { return DG->end(); }
-};
-
-template <> struct DOTGraphTraits<AADepGraph *> : public DefaultDOTGraphTraits {
-  DOTGraphTraits(bool isSimple = false) : DefaultDOTGraphTraits(isSimple) {}
-
-  static std::string getNodeLabel(const AADepGraphNode *Node,
-                                  const AADepGraph *DG) {
-    std::string AAString = "";
-    raw_string_ostream O(AAString);
-    Node->print(O);
-    return AAString;
-  }
-};
-
-} // end namespace llvm
-
 namespace {
 
 struct AttributorLegacyPass : public ModulePass {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 510ddf8ad0f74..7e9fd61eeb41e 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1052,10 +1052,9 @@ ChangeStatus AAReturnedValuesImpl::updateImpl(Attributor &A) {
   // map, NewRVsMap.
   decltype(ReturnedValues) NewRVsMap;
 
-  auto HandleReturnValue = [&](Value *RV,
-                               SmallSetVector<ReturnInst *, 4> &RIs) {
-    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV << " by #"
-                      << RIs.size() << " RIs\n");
+  auto HandleReturnValue = [&](Value *RV, SmallSetVector<ReturnInst *, 4> &RIs) {
+    LLVM_DEBUG(dbgs() << "[AAReturnedValues] Returned value: " << *RV
+                      << " by #" << RIs.size() << " RIs\n");
     CallBase *CB = dyn_cast<CallBase>(RV);
     if (!CB || UnresolvedCalls.count(CB))
       return;
@@ -3426,6 +3425,7 @@ struct AADereferenceableFloating : AADereferenceableImpl {
         T.GlobalState &= DS.GlobalState;
       }
 
+
       // For now we do not try to "increase" dereferenceability due to negative
       // indices as we first have to come up with code to deal with loops and
       // for overflows of the dereferenceable bytes.
diff --git a/llvm/test/Transforms/Attributor/depgraph.ll b/llvm/test/Transforms/Attributor/depgraph.ll
deleted file mode 100644
index 70597c875e020..0000000000000
--- a/llvm/test/Transforms/Attributor/depgraph.ll
+++ /dev/null
@@ -1,152 +0,0 @@
-; RUN: opt -passes=attributor-cgscc -disable-output -attributor-print-dep < %s 2>&1 | FileCheck %s --check-prefixes=GRAPH
-; RUN: opt -passes=attributor-cgscc -disable-output -attributor-dump-dep-graph -attributor-depgraph-dot-filename-prefix=%t < %s 2>/dev/null
-; RUN: FileCheck %s -input-file=%t_0.dot --check-prefix=DOT
-
-; Test 0
-;
-; test copied from the attributor introduction video: checkAndAdvance(), and the C code is:
-; int *checkAndAdvance(int * __attribute__((aligned(16))) p) {
-;   if (*p == 0)
-;     return checkAndAdvance(p + 4);
-;   return p;
-; }
-;
-define i32* @checkAndAdvance(i32* align 16 %0) {
-  %2 = load i32, i32* %0, align 4
-  %3 = icmp eq i32 %2, 0
-  br i1 %3, label %4, label %7
-
-4:                                                ; preds = %1
-  %5 = getelementptr inbounds i32, i32* %0, i64 4
-  %6 = call i32* @checkAndAdvance(i32* %5)
-  br label %8
-
-7:                                                ; preds = %1
-  br label %8
-
-8:                                                ; preds = %7, %4
-  %.0 = phi i32* [ %6, %4 ], [ %0, %7 ]
-  ret i32* %.0
-}
-
-;
-; Check for graph
-;
-
-; GRAPH: [AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-
-; GRAPH: [AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-; GRAPH:  updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
-; GRAPH:  updates [AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-
-; GRAPH: [AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} 
-; GRAPH:  updates [AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-
-; GRAPH: [AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-; GRAPH:  updates [AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-
-; GRAPH: [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-; GRAPH:  updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-
-; GRAPH: [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
-
-; GRAPH: [AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-; GRAPH:  updates [AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-
-; GRAPH: [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
-; GRAPH:  updates [AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
-
-; GRAPH: [AANonNull] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position {flt: [@-1]}
-; GRAPH:  updates [AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
-
-; GRAPH: [AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn_ret:checkAndAdvance [checkAndAdvance@-1]}
-; GRAPH:  updates [AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
-
-; GRAPH: [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
-
-; GRAPH: [AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
-; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-
-; GRAPH: [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-; GRAPH:  updates [AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-
-; GRAPH: [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {arg: [@0]}
-; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]}
-
-; GRAPH: [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]}
-; GRAPH:  updates [AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_ret: [@-1]}
-; GRAPH:  updates [AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]}
-
-; GRAPH: [AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs_arg: [@0]} with state readonly
-; GRAPH:  updates [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
-
-; GRAPH: [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
-; GRAPH:  updates [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
-
-; GRAPH: [AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position {cs: [@-1]} with state memory:argument
-; GRAPH:  updates [AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position {fn:checkAndAdvance [checkAndAdvance@-1]} with state memory:argument
-
-;
-; Check for .dot file
-;
-
-; DOT-DAG: Node[[Node6:0x[a-z0-9]+]] [shape=record,label="{[AANoUnwind] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node34:0x[a-z0-9]+]] [shape=record,label="{[AANoCapture] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
-; DOT-DAG: Node[[Node39:0x[a-z0-9]+]] [shape=record,label="{[AANoUnwind] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
-; DOT-DAG: Node[[Node7:0x[a-z0-9]+]] [shape=record,label="{[AANoSync] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node61:0x[a-z0-9]+]] [shape=record,label="{[AANoSync] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
-; DOT-DAG: Node[[Node13:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node36:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
-; DOT-DAG: Node[[Node62:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
-; DOT-DAG: Node[[Node16:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node35:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{arg: [@0]\}
-; DOT-DAG: Node[[Node40:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
-; DOT-DAG: Node[[Node17:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryLocation] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node63:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryLocation] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs: [@-1]\}
-; DOT-DAG: Node[[Node22:0x[a-z0-9]+]] [shape=record,label="{[AAAlign] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn_ret:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node65:0x[a-z0-9]+]] [shape=record,label="{[AAAlign] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
-; DOT-DAG: Node[[Node23:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %2 = load i32, i32* %0, align 4' at position \{fn_ret:checkAndAdvance [checkAndAdvance@-1]\}
-; DOT-DAG: Node[[Node67:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
-; DOT-DAG: Node[[Node43:0x[a-z0-9]+]] [shape=record,label="{[AANoCapture] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
-; DOT-DAG: Node[[Node45:0x[a-z0-9]+]] [shape=record,label="{[AAMemoryBehavior] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
-; DOT-DAG: Node[[Node46:0x[a-z0-9]+]] [shape=record,label="{[AANoFree] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
-; DOT-DAG: Node[[Node38:0x[a-z0-9]+]] [shape=record,label="{[AAIsDead] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_ret: [@-1]\}
-; DOT-DAG: Node[[Node55:0x[a-z0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %5 = getelementptr inbounds i32, i32* %0, i64 4' at position \{flt: [@-1]\}
-; DOT-DAG: Node[[Node31:0x[a-x0-9]+]] [shape=record,label="{[AANonNull] for CtxI '  %6 = call i32* @checkAndAdvance(i32* %5)' at position \{cs_arg: [@0]\}
-
-; DOT-DAG: Node[[Node6]] -> Node[[Node34]]
-; DOT-DAG: Node[[Node6]] -> Node[[Node39]]
-; DOT-DAG: Node[[Node7]] -> Node[[Node61]]
-; DOT-DAG: Node[[Node13]] -> Node[[Node36]]
-; DOT-DAG: Node[[Node13]] -> Node[[Node62]]
-; DOT-DAG: Node[[Node16]] -> Node[[Node34]]
-; DOT-DAG: Node[[Node16]] -> Node[[Node35]]
-; DOT-DAG: Node[[Node16]] -> Node[[Node40]]
-; DOT-DAG: Node[[Node17]] -> Node[[Node63]]
-; DOT-DAG: Node[[Node22]] -> Node[[Node65]]
-; DOT-DAG: Node[[Node23]] -> Node[[Node67]]
-; DOT-DAG: Node[[Node34]] -> Node[[Node43]]
-; DOT-DAG: Node[[Node35]] -> Node[[Node45]]
-; DOT-DAG: Node[[Node36]] -> Node[[Node46]]
-; DOT-DAG: Node[[Node39]] -> Node[[Node38]]
-; DOT-DAG: Node[[Node39]] -> Node[[Node6]]
-; DOT-DAG: Node[[Node40]] -> Node[[Node38]]
-; DOT-DAG: Node[[Node40]] -> Node[[Node16]]
-; DOT-DAG: Node[[Node43]] -> Node[[Node34]]
-; DOT-DAG: Node[[Node45]] -> Node[[Node17]]
-; DOT-DAG: Node[[Node55]] -> Node[[Node55]]
-; DOT-DAG: Node[[Node55]] -> Node[[Node31]]
-; DOT-DAG: Node[[Node55]] -> Node[[Node23]]
-; DOT-DAG: Node[[Node61]] -> Node[[Node7]]
-; DOT-DAG: Node[[Node62]] -> Node[[Node13]]
-; DOT-DAG: Node[[Node63]] -> Node[[Node17]]
-; DOT-DAG: Node[[Node65]] -> Node[[Node22]]
-; DOT-DAG: Node[[Node67]] -> Node[[Node23]]
\ No newline at end of file

From d87c92e5a2eca620903ce53592ccbe4f8807abe1 Mon Sep 17 00:00:00 2001
From: Johannes Doerfert <johannes@jdoerfert.de>
Date: Tue, 14 Jul 2020 22:46:38 -0500
Subject: [PATCH 338/771] [OpenMP][FIX] Check only for deterministic part of a
 generated function name

---
 ...rallel_in_multiple_target_state_machines.c | 24 +++++++++----------
 ...remarks_parallel_in_target_state_machine.c |  8 +++----
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
index c5152d401c8b4..163f0b92468af 100644
--- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
+++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c
@@ -25,18 +25,18 @@ void bar2(void) {
 
 void foo1(void) {
 #pragma omp target teams // #2
-                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}}
-                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
+                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}}
+                         // all-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
   {
 #pragma omp parallel // #3
                      // all-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}}
+                     // all-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}}
     {
     }
     bar1();
 #pragma omp parallel // #4
                      // all-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
+                     // all-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
     {
     }
   }
@@ -44,19 +44,19 @@ void foo1(void) {
 
 void foo2(void) {
 #pragma omp target teams // #5
-                         // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading_22}}
-                         // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading_22}}
+                         // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading}}
+                         // all-remark@#5 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading}}
   {
 #pragma omp parallel // #6
                      // all-remark@#6 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#6 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading_22}}
+                     // all-remark@#6 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__5_wrapper, kernel ID: __omp_offloading}}
     {
     }
     bar1();
     bar2();
 #pragma omp parallel // #7
                      // all-remark@#7 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#7 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading_22}}
+                     // all-remark@#7 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__7_wrapper, kernel ID: __omp_offloading}}
     {
     }
     bar1();
@@ -66,19 +66,19 @@ void foo2(void) {
 
 void foo3(void) {
 #pragma omp target teams // #8
-                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__9_wrapper, kernel ID: __omp_offloading_22}}
-                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__10_wrapper, kernel ID: __omp_offloading_22}}
+                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__9_wrapper, kernel ID: __omp_offloading}}
+                         // all-remark@#8 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__10_wrapper, kernel ID: __omp_offloading}}
   {
 #pragma omp parallel // #9
                      // all-remark@#9 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#9 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__9_wrapper, kernel ID: __omp_offloading_22}}
+                     // all-remark@#9 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__9_wrapper, kernel ID: __omp_offloading}}
     {
     }
     bar1();
     bar2();
 #pragma omp parallel // #10
                      // all-remark@#10 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}}
-                     // all-remark@#10 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__10_wrapper, kernel ID: __omp_offloading_22}}
+                     // all-remark@#10 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__10_wrapper, kernel ID: __omp_offloading}}
     {
     }
     bar1();
diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
index 117ef6d46d49a..97507041e1953 100644
--- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
+++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c
@@ -14,18 +14,18 @@ void bar(void) {
 
 void foo(void) {
 #pragma omp target teams // #2                                                                                                                                                                      \
-                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}} \
-                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
+                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}} \
+                         // expected-remark@#2 {{Target region containing the parallel region that is specialized. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
   {
 #pragma omp parallel // #3                                                                                                                                                                                                                                                                                                                                           \
                      // expected-remark@#3 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
-                     // expected-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading_22}}
+                     // expected-remark@#3 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__1_wrapper, kernel ID: __omp_offloading}}
     {
     }
     bar();
 #pragma omp parallel // #4                                                                                                                                                                                                                                                                                                                                           \
                      // expected-remark@#4 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nesed inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \
-                     // expected-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading_22}}
+                     // expected-remark@#4 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__3_wrapper, kernel ID: __omp_offloading}}
     {
     }
   }

From 5282a6186cfb1405756811815a0187c84881baee Mon Sep 17 00:00:00 2001
From: Alex Bradbury <asb@lowrisc.org>
Date: Wed, 15 Jul 2020 05:04:09 +0100
Subject: [PATCH 339/771] [Attributor] Fix build of unittest with
 DBUILD_SHARED_LIBS=True

The dependencies in llvm/unittests/Transforms/IPO/CMakeLists.txt
introduced in revision 0750757e were incomplete, leading to link errors
for a DBUILD_SHARED_LIBS=True build.
---
 llvm/unittests/Transforms/IPO/CMakeLists.txt | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/unittests/Transforms/IPO/CMakeLists.txt b/llvm/unittests/Transforms/IPO/CMakeLists.txt
index 8399b925512b6..0e95518093a54 100644
--- a/llvm/unittests/Transforms/IPO/CMakeLists.txt
+++ b/llvm/unittests/Transforms/IPO/CMakeLists.txt
@@ -1,7 +1,10 @@
 set(LLVM_LINK_COMPONENTS
+  Analysis
+  AsmParser
   Core
-  Support
   IPO
+  Support
+  TransformUtils
   )
 
 add_llvm_unittest(IPOTests

From 4f763b2172c591ab253c8489fcd53af0c544d5cb Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Tue, 14 Jul 2020 19:32:37 -0700
Subject: [PATCH 340/771] [llvm][NFC] Hide the tensorflow dependency from
 headers.

Summary:
This change avoids exposing tensorflow types when including TFUtils.h.
They are just an implementation detail, and don't need to be used
directly when implementing an analysis requiring ML model evaluation.

The TFUtils APIs, while generically typed, are still not exposed unless
the tensorflow C library is present, as they currently have no use
otherwise.

Reviewers: mehdi_amini, davidxl

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83843
---
 llvm/include/llvm/Analysis/Utils/TFUtils.h    | 105 ++++-----
 .../Analysis/InlineSizeEstimatorAnalysis.cpp  |   4 +-
 llvm/lib/Analysis/TFUtils.cpp                 | 220 +++++++++++++++---
 llvm/unittests/Analysis/TFUtilsTest.cpp       |   8 +-
 4 files changed, 229 insertions(+), 108 deletions(-)

diff --git a/llvm/include/llvm/Analysis/Utils/TFUtils.h b/llvm/include/llvm/Analysis/Utils/TFUtils.h
index 118081652e9e8..2ab2c7a57d941 100644
--- a/llvm/include/llvm/Analysis/Utils/TFUtils.h
+++ b/llvm/include/llvm/Analysis/Utils/TFUtils.h
@@ -12,7 +12,6 @@
 #include "llvm/Config/config.h"
 
 #ifdef LLVM_HAVE_TF_API
-#include "tensorflow/c/c_api.h"
 #include "llvm/IR/LLVMContext.h"
 
 #include <memory>
@@ -31,51 +30,35 @@ namespace llvm {
 /// - set input values by using getInput to get each input tensor, and then
 ///   setting internal scalars, for all dimensions (tensors are row-major:
 ///   https://github.com/tensorflow/tensorflow/blob/r1.5/tensorflow/c/c_api.h#L205)
-/// - prepare an output vector of TF_Output* type, with the correct number of
-/// outputs (i.e. same as OutputNames). Initialize the vector with nullptr
-/// values.
 /// - call evaluate. The input tensors' values are not consumed after this, and
 ///   may still be read.
 /// - use the outputs in the output vector
-/// - deallocate each output tensor in the output vector, using TF_DeleteTensor.
+class TFModelEvaluatorImpl;
+class EvaluationResultImpl;
+
 class TFModelEvaluator final {
 public:
   /// The result of a model evaluation. Handles the lifetime of the output
-  /// TF_Tensor objects, which means that their values need to be used before
+  /// tensors, which means that their values need to be used before
   /// the EvaluationResult's dtor is called.
   class EvaluationResult {
   public:
-    ~EvaluationResult() {
-      for (auto *P : Output)
-        if (P)
-          TF_DeleteTensor(P);
-    }
-
     EvaluationResult(const EvaluationResult &) = delete;
-    EvaluationResult(EvaluationResult &&Other)
-        : OutputSize(Other.OutputSize), Output(std::move(Other.Output)) {
-      Other.Output.clear();
-    };
+    EvaluationResult(EvaluationResult &&Other);
+    ~EvaluationResult();
 
     /// Get a pointer to the first element of the tensor at Index.
     template <typename T> T *getTensorValue(size_t Index) {
-      return static_cast<T *>(TF_TensorData(Output[Index]));
+      return static_cast<T *>(getUntypedTensorValue(Index));
     }
 
   private:
     friend class TFModelEvaluator;
-    EvaluationResult(size_t OutputSize)
-        : OutputSize(OutputSize), Output(OutputSize){};
-
-    const size_t OutputSize;
-    std::vector<TF_Tensor *> Output;
+    EvaluationResult(std::unique_ptr<EvaluationResultImpl> Impl);
+    void *getUntypedTensorValue(size_t Index);
+    std::unique_ptr<EvaluationResultImpl> Impl;
   };
 
-  using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
-  using TFSessionOptionsPtr =
-      std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
-  using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
-
   TFModelEvaluator(StringRef SavedModelPath,
                    const std::vector<std::string> &InputNames,
                    const std::vector<std::string> &OutputNames,
@@ -87,53 +70,45 @@ class TFModelEvaluator final {
   /// Evaluate the model, assuming it is valid. Returns None if the evaluation
   /// fails or the model is invalid, or an EvaluationResult otherwise. The
   /// inputs are assumed to have been already provided via getInput(). When
-  /// returning None, it also marks the object invalid. Pass an Output vector
-  /// with the same size as OutputNames, but with nullptr values. evaluate()
-  /// will populate it with tensors, matching in index the corresponding
-  /// OutputNames. The caller is responsible for the deallocation of those
-  /// tensors, using TF_DeleteTensor.
+  /// returning None, it also invalidates this object.
   Optional<EvaluationResult> evaluate();
 
-  /// Provides access to the input vector. It is already dimensioned correctly,
-  /// but the values need to be allocated by the user.
-  std::vector<TF_Tensor *> &getInput() { return Input; }
+  /// Provides access to the input vector.
+  template <typename T> T *getInput(size_t Index) {
+    return static_cast<T *>(getUntypedInput(Index));
+  }
 
   /// Returns true if the tensorflow model was loaded successfully, false
   /// otherwise.
-  bool isValid() const { return !!Session; }
+  bool isValid() const { return !!Impl; }
 
-  /// Initialize the input at Index as a tensor of the given type and dimensions
-  void initInput(int Index, TF_DataType Type,
-                 const std::vector<int64_t> &Dimensions);
+  /// Initialize the input at Index as a tensor of the given type and
+  /// dimensions.
+  template <typename T>
+  void initInput(size_t Index, const std::vector<int64_t> &Dimensions) {
+    return initInput(Index, getModelTypeIndex<T>(), Dimensions);
+  }
 
 private:
-  /// The objects necessary for carrying out an evaluation of the SavedModel.
-  /// They are expensive to set up, and we maintain them accross all the
-  /// evaluations of the model.
-  TF_Session *Session = nullptr;
-  TFGraphPtr Graph;
-  TFSessionOptionsPtr Options;
-
-  /// The specification of the input nodes.
-  std::vector<TF_Output> InputFeed;
-
-  /// The input tensors. They must match by index of the corresponding InputFeed
-  /// value. We set up the tensors once and just mutate theirs scalars before
-  /// each evaluation. The input tensors keep their value after an evaluation.
-  std::vector<TF_Tensor *> Input;
-
-  /// The specification of the output nodes. When evaluating, the tensors in the
-  /// output tensor vector must match by index the corresponding element in the
-  /// OutputFeed.
-  std::vector<TF_Output> OutputFeed;
-
-  /// Reusable utility for deleting the session.
-  void deleteSession();
-
-  /// Reusable utility for ensuring we can bind the requested Name to a node in
-  /// the SavedModel Graph.
-  bool checkReportAndReset(const TF_Output &Output, StringRef Name);
+  void *getUntypedInput(size_t Index);
+  template <typename T> int getModelTypeIndex();
+  void initInput(size_t Index, int TypeIndex,
+                 const std::vector<int64_t> &Dimensions);
+
+  std::unique_ptr<TFModelEvaluatorImpl> Impl;
 };
+
+template <> int TFModelEvaluator::getModelTypeIndex<float>();
+template <> int TFModelEvaluator::getModelTypeIndex<double>();
+template <> int TFModelEvaluator::getModelTypeIndex<int8_t>();
+template <> int TFModelEvaluator::getModelTypeIndex<uint8_t>();
+template <> int TFModelEvaluator::getModelTypeIndex<int16_t>();
+template <> int TFModelEvaluator::getModelTypeIndex<uint16_t>();
+template <> int TFModelEvaluator::getModelTypeIndex<int32_t>();
+template <> int TFModelEvaluator::getModelTypeIndex<uint32_t>();
+template <> int TFModelEvaluator::getModelTypeIndex<int64_t>();
+template <> int TFModelEvaluator::getModelTypeIndex<uint64_t>();
+
 } // namespace llvm
 
 #endif // LLVM_HAVE_TF_API
diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
index 1d1952ae6cbbe..ebc59879d3577 100644
--- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
+++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@@ -256,7 +256,7 @@ InlineSizeEstimatorAnalysis::InlineSizeEstimatorAnalysis() {
       1, static_cast<int64_t>(
              IRToNativeSizeLearning::FunctionFeatures::FeatureCount)};
 
-  Evaluator->initInput(0, TF_INT32, Dim);
+  Evaluator->initInput<int32_t>(0, Dim);
 }
 
 InlineSizeEstimatorAnalysis::Result
@@ -266,7 +266,7 @@ InlineSizeEstimatorAnalysis::run(const Function &F,
     return None;
   auto Features = IRToNativeSizeLearning::getFunctionFeatures(
       const_cast<Function &>(F), FAM);
-  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator->getInput()[0]));
+  int32_t *V = Evaluator->getInput<int32_t>(0);
   Features.fillTensor(V);
   auto ER = Evaluator->evaluate();
   if (!ER)
diff --git a/llvm/lib/Analysis/TFUtils.cpp b/llvm/lib/Analysis/TFUtils.cpp
index 6cd5b5c9b4eae..19e6d626e2386 100644
--- a/llvm/lib/Analysis/TFUtils.cpp
+++ b/llvm/lib/Analysis/TFUtils.cpp
@@ -17,6 +17,7 @@
 #include "llvm/Support/ManagedStatic.h"
 #include "llvm/Support/raw_ostream.h"
 
+#include "tensorflow/c/c_api.h"
 #include "tensorflow/c/c_api_experimental.h"
 
 #include <cassert>
@@ -25,6 +26,11 @@ using namespace llvm;
 
 namespace {
 
+using TFGraphPtr = std::unique_ptr<TF_Graph, decltype(&TF_DeleteGraph)>;
+using TFSessionOptionsPtr =
+    std::unique_ptr<TF_SessionOptions, decltype(&TF_DeleteSessionOptions)>;
+using TFStatusPtr = std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)>;
+
 struct TFInitializer {
   TFInitializer() {
     assert(!IsInitialized && "TFInitialized should be called only once");
@@ -41,24 +47,96 @@ llvm::ManagedStatic<TFInitializer> TFLibInitializer;
 
 bool ensureInitTF() { return TFLibInitializer->IsInitialized; }
 
-TFModelEvaluator::TFGraphPtr createTFGraph() {
-  return TFModelEvaluator::TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
+TFGraphPtr createTFGraph() {
+  return TFGraphPtr(TF_NewGraph(), &TF_DeleteGraph);
 }
 
-TFModelEvaluator::TFStatusPtr createTFStatus() {
-  return TFModelEvaluator::TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
+TFStatusPtr createTFStatus() {
+  return TFStatusPtr(TF_NewStatus(), &TF_DeleteStatus);
 }
 
-TFModelEvaluator::TFSessionOptionsPtr createTFSessionOptions() {
-  return TFModelEvaluator::TFSessionOptionsPtr(TF_NewSessionOptions(),
-                                               &TF_DeleteSessionOptions);
+TFSessionOptionsPtr createTFSessionOptions() {
+  return TFSessionOptionsPtr(TF_NewSessionOptions(), &TF_DeleteSessionOptions);
 }
 } // namespace
 
-TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
-                                   const std::vector<std::string> &InputNames,
-                                   const std::vector<std::string> &OutputNames,
-                                   const char *Tags)
+namespace llvm {
+class EvaluationResultImpl {
+public:
+  EvaluationResultImpl(size_t OutputSize)
+      : OutputSize(OutputSize), Output(OutputSize){};
+
+  ~EvaluationResultImpl() {
+    for (auto *P : Output)
+      if (P)
+        TF_DeleteTensor(P);
+  }
+
+  EvaluationResultImpl(const EvaluationResultImpl &) = delete;
+  EvaluationResultImpl(EvaluationResultImpl &&Other) = delete;
+  std::vector<TF_Tensor *> &getOutput() { return Output; }
+
+private:
+  const size_t OutputSize;
+  std::vector<TF_Tensor *> Output;
+};
+
+class TFModelEvaluatorImpl {
+public:
+  TFModelEvaluatorImpl(StringRef SavedModelPath,
+                       const std::vector<std::string> &InputNames,
+                       const std::vector<std::string> &OutputNames,
+                       const char *Tags);
+
+  bool isValid() const { return IsValid; }
+  size_t OutputSize() const { return OutputFeed.size(); }
+
+  void evaluate(TF_Tensor **Output, TF_Status *Status) {
+    TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(),
+                  Input.size(), OutputFeed.data(), Output, OutputFeed.size(),
+                  nullptr, 0, nullptr, Status);
+  }
+
+  void initInput(size_t Index, TF_DataType Type,
+                 const std::vector<int64_t> &Dimensions);
+  const std::vector<TF_Tensor *> &getInput() const { return Input; }
+
+  ~TFModelEvaluatorImpl();
+
+private:
+  /// The objects necessary for carrying out an evaluation of the SavedModel.
+  /// They are expensive to set up, and we maintain them accross all the
+  /// evaluations of the model.
+  TF_Session *Session = nullptr;
+  TFGraphPtr Graph;
+  TFSessionOptionsPtr Options;
+
+  /// The specification of the input nodes.
+  std::vector<TF_Output> InputFeed;
+
+  /// The input tensors. They must match by index of the corresponding InputFeed
+  /// value. We set up the tensors once and just mutate theirs scalars before
+  /// each evaluation. The input tensors keep their value after an evaluation.
+  std::vector<TF_Tensor *> Input;
+
+  /// The specification of the output nodes. When evaluating, the tensors in the
+  /// output tensor vector must match by index the corresponding element in the
+  /// OutputFeed.
+  std::vector<TF_Output> OutputFeed;
+
+  void invalidate() { IsValid = false; }
+
+  bool IsValid = true;
+
+  /// Reusable utility for ensuring we can bind the requested Name to a node in
+  /// the SavedModel Graph.
+  bool checkReportAndInvalidate(const TF_Output &Output, StringRef Name);
+};
+} // namespace llvm
+
+TFModelEvaluatorImpl::TFModelEvaluatorImpl(
+    StringRef SavedModelPath, const std::vector<std::string> &InputNames,
+    const std::vector<std::string> &OutputNames, const char *Tags)
     : Graph(createTFGraph()), Options(createTFSessionOptions()),
       InputFeed(InputNames.size()), Input(InputNames.size()),
       OutputFeed(OutputNames.size()) {
@@ -73,39 +151,36 @@ TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
                                          Graph.get(), nullptr, Status.get());
   if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
     errs() << TF_Message(Status.get());
-    deleteSession();
+    invalidate();
   }
   for (size_t I = 0; I < InputNames.size(); ++I) {
     InputFeed[I] = {
         TF_GraphOperationByName(Graph.get(), (InputNames[I]).c_str()), 0};
-    if (!checkReportAndReset(InputFeed[I], InputNames[I]))
+    if (!checkReportAndInvalidate(InputFeed[I], InputNames[I]))
       return;
   }
   for (size_t I = 0; I < OutputNames.size(); ++I) {
     OutputFeed[I] = {
         TF_GraphOperationByName(Graph.get(), (OutputNames[I]).c_str()), 0};
-    if (!checkReportAndReset(OutputFeed[I], OutputNames[I]))
+    if (!checkReportAndInvalidate(OutputFeed[I], OutputNames[I]))
       return;
   }
 }
 
-TFModelEvaluator::~TFModelEvaluator() {
+TFModelEvaluator::TFModelEvaluator(StringRef SavedModelPath,
+                                   const std::vector<std::string> &InputNames,
+                                   const std::vector<std::string> &OutputNames,
+                                   const char *Tags)
+    : Impl(new TFModelEvaluatorImpl(SavedModelPath, InputNames, OutputNames,
+                                    Tags)) {
+  if (!Impl->isValid())
+    Impl.reset();
+}
+
+TFModelEvaluatorImpl::~TFModelEvaluatorImpl() {
   for (auto *T : Input) {
     TF_DeleteTensor(T);
   }
-  deleteSession();
-}
-
-bool TFModelEvaluator::checkReportAndReset(const TF_Output &Output,
-                                           StringRef Name) {
-  if (Output.oper)
-    return true;
-  errs() << "Could not find TF_Output named: " + Name;
-  deleteSession();
-  return false;
-}
-
-void TFModelEvaluator::deleteSession() {
   if (Session == nullptr)
     return;
   auto Status = createTFStatus();
@@ -115,24 +190,32 @@ void TFModelEvaluator::deleteSession() {
     errs() << "Could not delete TF session";
 }
 
+bool TFModelEvaluatorImpl::checkReportAndInvalidate(const TF_Output &Output,
+                                                    StringRef Name) {
+  if (Output.oper)
+    return true;
+  errs() << "Could not find TF_Output named: " + Name;
+  IsValid = false;
+  return IsValid;
+}
+
 Optional<TFModelEvaluator::EvaluationResult> TFModelEvaluator::evaluate() {
   if (!isValid())
     return None;
-  EvaluationResult Ret(OutputFeed.size());
+  std::unique_ptr<EvaluationResultImpl> Ret =
+      std::make_unique<EvaluationResultImpl>(Impl->OutputSize());
   auto Status = createTFStatus();
-  TF_SessionRun(Session, nullptr, InputFeed.data(), Input.data(), Input.size(),
-                OutputFeed.data(), Ret.Output.data(), Ret.Output.size(),
-                nullptr, 0, nullptr, Status.get());
+  Impl->evaluate(Ret->getOutput().data(), Status.get());
   if (TF_GetCode(Status.get()) != TF_Code::TF_OK) {
     errs() << TF_Message(Status.get());
-    deleteSession();
+    Impl.reset();
     return None;
   }
-  return Ret;
+  return EvaluationResult(std::move(Ret));
 }
 
-void TFModelEvaluator::initInput(int Index, TF_DataType Type,
-                                 const std::vector<int64_t> &Dimensions) {
+void TFModelEvaluatorImpl::initInput(size_t Index, TF_DataType Type,
+                                     const std::vector<int64_t> &Dimensions) {
   int64_t TotalSize = TF_DataTypeSize(Type);
   for (auto &D : Dimensions)
     TotalSize *= D;
@@ -140,4 +223,67 @@ void TFModelEvaluator::initInput(int Index, TF_DataType Type,
   Input[Index] =
       TF_AllocateTensor(Type, Dimensions.data(), Dimensions.size(), TotalSize);
   std::memset(TF_TensorData(Input[Index]), 0, TotalSize);
-}
\ No newline at end of file
+}
+
+void *TFModelEvaluator::getUntypedInput(size_t Index) {
+  return TF_TensorData(Impl->getInput()[Index]);
+}
+
+TFModelEvaluator::EvaluationResult::EvaluationResult(
+    std::unique_ptr<EvaluationResultImpl> Impl)
+    : Impl(std::move(Impl)) {}
+
+TFModelEvaluator::EvaluationResult::EvaluationResult(EvaluationResult &&Other)
+    : Impl(std::move(Other.Impl)) {}
+
+void *TFModelEvaluator::EvaluationResult::getUntypedTensorValue(size_t Index) {
+  return TF_TensorData(Impl->getOutput()[Index]);
+}
+
+void TFModelEvaluator::initInput(size_t Index, int TypeIndex,
+                                 const std::vector<int64_t> &Dimensions) {
+  Impl->initInput(Index, static_cast<TF_DataType>(TypeIndex), Dimensions);
+}
+
+template <> int TFModelEvaluator::getModelTypeIndex<float>() {
+  return TF_FLOAT;
+}
+
+template <> int TFModelEvaluator::getModelTypeIndex<double>() {
+  return TF_DOUBLE;
+}
+
+template <> int TFModelEvaluator::getModelTypeIndex<int8_t>() {
+  return TF_INT8;
+}
+
+template <> int TFModelEvaluator::getModelTypeIndex<uint8_t>() {
+  return TF_UINT8;
+}
+
+template <> int TFModelEvaluator::getModelTypeIndex<int16_t>() {
+  return TF_INT16;
+}
+
+template <> int TFModelEvaluator::getModelTypeIndex<uint16_t>() {
+  return TF_UINT16;
+}
+
+template <> int TFModelEvaluator::getModelTypeIndex<int32_t>() {
+  return TF_INT32;
+}
+
+template <> int TFModelEvaluator::getModelTypeIndex<uint32_t>() {
+  return TF_UINT32;
+}
+
+template <> int TFModelEvaluator::getModelTypeIndex<int64_t>() {
+  return TF_INT64;
+}
+
+template <> int TFModelEvaluator::getModelTypeIndex<uint64_t>() {
+  return TF_UINT64;
+}
+
+TFModelEvaluator::EvaluationResult::~EvaluationResult() {}
+TFModelEvaluator::~TFModelEvaluator() {}
diff --git a/llvm/unittests/Analysis/TFUtilsTest.cpp b/llvm/unittests/Analysis/TFUtilsTest.cpp
index 4c775c4c0b93f..1e54f1885b2ca 100644
--- a/llvm/unittests/Analysis/TFUtilsTest.cpp
+++ b/llvm/unittests/Analysis/TFUtilsTest.cpp
@@ -45,9 +45,9 @@ TEST(TFUtilsTest, LoadAndExecuteTest) {
   static const std::vector<int64_t> Dim{1, KnownSize};
 
   EXPECT_TRUE(Evaluator.isValid());
-  Evaluator.initInput(0, TF_INT32, Dim);
+  Evaluator.initInput<int32_t>(0, Dim);
 
-  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
+  int32_t *V = Evaluator.getInput<int32_t>(0);
   // Fill it up with 1's, we know the output.
   for (auto I = 0; I < KnownSize; ++I) {
     V[I] = 1;
@@ -85,9 +85,9 @@ TEST(TFUtilsTest, EvalError) {
   static const std::vector<int64_t> Dim{1, KnownSize};
 
   EXPECT_TRUE(Evaluator.isValid());
-  Evaluator.initInput(0, TF_INT32, Dim);
+  Evaluator.initInput<int32_t>(0, Dim);
 
-  int32_t *V = static_cast<int32_t *>(TF_TensorData(Evaluator.getInput()[0]));
+  int32_t *V = Evaluator.getInput<int32_t>(0);
   // Fill it up with 1's, we know the output.
   for (auto I = 0; I < KnownSize; ++I) {
     V[I] = 1;

From 92f7bd3a399271afc92c93f17aeebe4d32e79c6e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Martin=20Storsj=C3=B6?= <martin@martin.st>
Date: Tue, 14 Jul 2020 00:12:13 +0300
Subject: [PATCH 341/771] [LLD] [MinGW] Implement the --file-alignment and
 --section-alignment options

Differential Revision: https://reviews.llvm.org/D83720
---
 lld/MinGW/Driver.cpp       |  4 ++++
 lld/MinGW/Options.td       |  2 ++
 lld/test/MinGW/driver.test | 12 ++++++++++++
 3 files changed, 18 insertions(+)

diff --git a/lld/MinGW/Driver.cpp b/lld/MinGW/Driver.cpp
index e7f51a36a39df..f33b5e19502c6 100644
--- a/lld/MinGW/Driver.cpp
+++ b/lld/MinGW/Driver.cpp
@@ -251,6 +251,10 @@ bool mingw::link(ArrayRef<const char *> argsArr, bool canExitEarly,
     add("-reproduce:" + StringRef(a->getValue()));
   if (auto *a = args.getLastArg(OPT_thinlto_cache_dir))
     add("-lldltocache:" + StringRef(a->getValue()));
+  if (auto *a = args.getLastArg(OPT_file_alignment))
+    add("-filealign:" + StringRef(a->getValue()));
+  if (auto *a = args.getLastArg(OPT_section_alignment))
+    add("-align:" + StringRef(a->getValue()));
 
   if (auto *a = args.getLastArg(OPT_o))
     add("-out:" + StringRef(a->getValue()));
diff --git a/lld/MinGW/Options.td b/lld/MinGW/Options.td
index 6410e4c1901c5..3281951dc89dd 100644
--- a/lld/MinGW/Options.td
+++ b/lld/MinGW/Options.td
@@ -34,6 +34,7 @@ def exclude_all_symbols: F<"exclude-all-symbols">,
     HelpText<"Don't automatically export any symbols">;
 def export_all_symbols: F<"export-all-symbols">,
     HelpText<"Export all symbols even if a def file or dllexport attributes are used">;
+defm file_alignment: Eq<"file-alignment", "Set file alignment">;
 def gc_sections: F<"gc-sections">, HelpText<"Remove unused sections">;
 def help: F<"help">, HelpText<"Print option help">;
 def icf: J<"icf=">, HelpText<"Identical code folding">;
@@ -64,6 +65,7 @@ def o: JoinedOrSeparate<["-"], "o">, MetaVarName<"<path>">,
   HelpText<"Path to file to write output">;
 defm out_implib: Eq<"out-implib", "Import library name">;
 defm output_def: Eq<"output-def", "Output def file">;
+defm section_alignment: Eq<"section-alignment", "Set section alignment">;
 def shared: F<"shared">, HelpText<"Build a shared object">;
 defm subs: Eq<"subsystem", "Specify subsystem">;
 def stack: S<"stack">;
diff --git a/lld/test/MinGW/driver.test b/lld/test/MinGW/driver.test
index 803bf34f1ec52..385822c7e1f70 100644
--- a/lld/test/MinGW/driver.test
+++ b/lld/test/MinGW/driver.test
@@ -244,3 +244,15 @@ DISABLE_RUNTIME_PSEUDO_RELOC: -runtime-pseudo-reloc:no
 
 RUN: ld.lld -### foo.o -m i386pe --thinlto-cache-dir=_foo | FileCheck -check-prefix=THINLTO_CACHEDIR %s
 THINLTO_CACHEDIR: -lldltocache:_foo
+
+RUN: ld.lld -### -m i386pep foo.o --file-alignment 0x1000 | FileCheck -check-prefix FILEALIGN %s
+RUN: ld.lld -### -m i386pep foo.o -file-alignment 0x1000 | FileCheck -check-prefix FILEALIGN %s
+RUN: ld.lld -### -m i386pep foo.o --file-alignment=0x1000 | FileCheck -check-prefix FILEALIGN %s
+RUN: ld.lld -### -m i386pep foo.o -file-alignment=0x1000 | FileCheck -check-prefix FILEALIGN %s
+FILEALIGN: -filealign:0x1000
+
+RUN: ld.lld -### -m i386pep foo.o --section-alignment 0x2000 | FileCheck -check-prefix ALIGN %s
+RUN: ld.lld -### -m i386pep foo.o -section-alignment 0x2000 | FileCheck -check-prefix ALIGN %s
+RUN: ld.lld -### -m i386pep foo.o --section-alignment=0x2000 | FileCheck -check-prefix ALIGN %s
+RUN: ld.lld -### -m i386pep foo.o -section-alignment=0x2000 | FileCheck -check-prefix ALIGN %s
+ALIGN: -align:0x2000

From 694ded37b9d70e385addfc482d298b054073ebe1 Mon Sep 17 00:00:00 2001
From: Giorgis Georgakoudis <georgakoudis1@llnl.gov>
Date: Tue, 14 Jul 2020 17:01:15 -0700
Subject: [PATCH 342/771] [OpenMPOpt] Fix preserved analyses return

---
 llvm/lib/Transforms/IPO/OpenMPOpt.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
index bc7e1dc6236e1..f664a24173747 100644
--- a/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
+++ b/llvm/lib/Transforms/IPO/OpenMPOpt.cpp
@@ -1367,7 +1367,9 @@ PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
   // TODO: Compute the module slice we are allowed to look at.
   OpenMPOpt OMPOpt(SCC, CGUpdater, OREGetter, InfoCache, A);
   bool Changed = OMPOpt.run();
-  (void)Changed;
+  if (Changed)
+    return PreservedAnalyses::none();
+
   return PreservedAnalyses::all();
 }
 

From 512da70be781e1fb1257e9eb442858f19bec2680 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <ntv@google.com>
Date: Tue, 14 Jul 2020 11:45:59 -0400
Subject: [PATCH 343/771] [mlir][Vector] Degrade masking information when
 forwarding linalg.copy to vector.transfer

Summary:
linalg.copy + linalg.fill can be used to create a padded local buffer.
The `masked` attribute is only valid on this padded buffer.
When forwarding to vector.transfer ops, the attribute must be reset
conservatively.

Differential Revision: https://reviews.llvm.org/D83782
---
 .../Linalg/Transforms/Vectorization.cpp        | 14 ++++++++++----
 .../Linalg/forward-vector-transfers.mlir       | 18 ++++++++++++------
 2 files changed, 22 insertions(+), 10 deletions(-)

diff --git a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
index bbdb8e7b46b4c..d923ea1bea76e 100644
--- a/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
+++ b/mlir/lib/Dialect/Linalg/Transforms/Vectorization.cpp
@@ -260,10 +260,13 @@ LogicalResult LinalgCopyVTRForwardingPattern::matchAndRewrite(
   // `in` is the subview that linalg.copy reads. Replace it.
   Value in = copyOp.getInput(0);
 
+  // linalg.copy + linalg.fill can be used to create a padded local buffer.
+  // The `masked` attribute is only valid on this padded buffer.
+  // When forwarding to vector.transfer_read, the attribute must be reset
+  // conservatively.
   Value res = rewriter.create<vector::TransferReadOp>(
       xferOp.getLoc(), xferOp.getVectorType(), in, xferOp.indices(),
-      xferOp.permutation_map(), xferOp.padding(),
-      xferOp.masked() ? *xferOp.masked() : ArrayAttr());
+      xferOp.permutation_map(), xferOp.padding(), ArrayAttr());
 
   if (maybeFillOp)
     rewriter.eraseOp(maybeFillOp);
@@ -308,10 +311,13 @@ LogicalResult LinalgCopyVTWForwardingPattern::matchAndRewrite(
   Value out = copyOp.getOutputBuffer(0);
 
   // Forward vector.transfer into copy.
+  // linalg.copy + linalg.fill can be used to create a padded local buffer.
+  // The `masked` attribute is only valid on this padded buffer.
+  // When forwarding to vector.transfer_write, the attribute must be reset
+  // conservatively.
   rewriter.create<vector::TransferWriteOp>(
       xferOp.getLoc(), xferOp.vector(), out, xferOp.indices(),
-      xferOp.permutation_map(),
-      xferOp.masked() ? *xferOp.masked() : ArrayAttr());
+      xferOp.permutation_map(), ArrayAttr());
 
   rewriter.eraseOp(copyOp);
   rewriter.eraseOp(xferOp);
diff --git a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
index 7f56234219fe6..35e9b7f13a7e2 100644
--- a/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
+++ b/mlir/test/Dialect/Linalg/forward-vector-transfers.mlir
@@ -6,13 +6,14 @@
 //   CHECK-NOT: linalg.copy
 //       CHECK: %[[ALLOC:.*]] = alloc
 //       CHECK: vector.transfer_read %[[ARG0]]
+//   CHECK-NOT: masked
 func @testAllocRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
   %alloc = alloc() : memref<32 x f32>
   %subview = subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   linalg.copy(%in, %subview): memref<? x f32>, memref<16 x f32>
-  %0 = vector.transfer_read %alloc[%c0], %f0: memref<32 x f32>, vector<32 x f32>
+  %0 = vector.transfer_read %alloc[%c0], %f0 {masked = [false]} : memref<32 x f32>, vector<32 x f32>
   dealloc %alloc : memref<32 x f32>
   return %0: vector<32 x f32>
 }
@@ -23,6 +24,7 @@ func @testAllocRead(%in: memref<? x f32>) -> vector<32 x f32> {
 //   CHECK-NOT: linalg.copy
 //       CHECK: %[[ALLOC:.*]] = alloc
 //       CHECK: vector.transfer_read %[[ARG0]]
+//   CHECK-NOT: masked
 func @testAllocFillRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
@@ -30,7 +32,7 @@ func @testAllocFillRead(%in: memref<? x f32>) -> vector<32 x f32> {
   linalg.fill(%alloc, %f0): memref<32 x f32>, f32
   %subview = subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
   linalg.copy(%in, %subview): memref<? x f32>, memref<16 x f32>
-  %0 = vector.transfer_read %alloc[%c0], %f0: memref<32 x f32>, vector<32 x f32>
+  %0 = vector.transfer_read %alloc[%c0], %f0 {masked = [false]} : memref<32 x f32>, vector<32 x f32>
   dealloc %alloc : memref<32 x f32>
   return %0: vector<32 x f32>
 }
@@ -41,6 +43,7 @@ func @testAllocFillRead(%in: memref<? x f32>) -> vector<32 x f32> {
 //   CHECK-NOT: linalg.copy
 //       CHECK: %[[ALLOC:.*]] = alloc
 //       CHECK: vector.transfer_read %[[ARG0]]
+//   CHECK-NOT: masked
 func @testViewRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
@@ -48,7 +51,7 @@ func @testViewRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %view = view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32>
   %subview = subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
   linalg.copy(%in, %subview): memref<? x f32>, memref<16 x f32>
-  %0 = vector.transfer_read %view[%c0], %f0: memref<32 x f32>, vector<32 x f32>
+  %0 = vector.transfer_read %view[%c0], %f0 {masked = [false]} : memref<32 x f32>, vector<32 x f32>
   dealloc %alloc : memref<128 x i8>
   return %0: vector<32 x f32>
 }
@@ -59,6 +62,7 @@ func @testViewRead(%in: memref<? x f32>) -> vector<32 x f32> {
 //   CHECK-NOT: linalg.copy
 //       CHECK: %[[ALLOC:.*]] = alloc
 //       CHECK: vector.transfer_read %[[ARG0]]
+//   CHECK-NOT: masked
 func @testViewFillRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
@@ -67,7 +71,7 @@ func @testViewFillRead(%in: memref<? x f32>) -> vector<32 x f32> {
   %subview = subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
   linalg.fill(%view, %f0): memref<32 x f32>, f32
   linalg.copy(%in, %subview): memref<? x f32>, memref<16 x f32>
-  %0 = vector.transfer_read %view[%c0], %f0: memref<32 x f32>, vector<32 x f32>
+  %0 = vector.transfer_read %view[%c0], %f0 {masked = [false]} : memref<32 x f32>, vector<32 x f32>
   dealloc %alloc : memref<128 x i8>
   return %0: vector<32 x f32>
 }
@@ -78,12 +82,13 @@ func @testViewFillRead(%in: memref<? x f32>) -> vector<32 x f32> {
 //   CHECK-NOT: linalg.copy
 //       CHECK: %[[ALLOC:.*]] = alloc
 //       CHECK: vector.transfer_write %[[ARG0]], %[[ARG1]]
+//   CHECK-NOT: masked
 func @testAllocWrite(%vec: vector<32 x f32>, %out: memref<? x f32>) {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
   %alloc = alloc() : memref<32 x f32>
   %subview = subview %alloc[0][16][1] : memref<32 x f32> to memref<16 x f32>
-  vector.transfer_write %vec, %alloc[%c0] : vector<32 x f32>, memref<32 x f32>
+  vector.transfer_write %vec, %alloc[%c0] {masked = [false]} : vector<32 x f32>, memref<32 x f32>
   linalg.copy(%subview, %out): memref<16 x f32>, memref<? x f32>
   dealloc %alloc : memref<32 x f32>
   return
@@ -95,13 +100,14 @@ func @testAllocWrite(%vec: vector<32 x f32>, %out: memref<? x f32>) {
 //   CHECK-NOT: linalg.copy
 //       CHECK: %[[ALLOC:.*]] = alloc
 //       CHECK: vector.transfer_write %[[ARG0]], %[[ARG1]]
+//   CHECK-NOT: masked
 func @testViewWrite(%vec: vector<32 x f32>, %out: memref<? x f32>) {
   %c0 = constant 0: index
   %f0 = constant 0.0: f32
   %alloc = alloc() : memref<128 x i8>
   %view = view %alloc[%c0][] : memref<128 x i8> to memref<32 x f32>
   %subview = subview %view[0][16][1] : memref<32 x f32> to memref<16 x f32>
-  vector.transfer_write %vec, %view[%c0] : vector<32 x f32>, memref<32 x f32>
+  vector.transfer_write %vec, %view[%c0] {masked = [false]} : vector<32 x f32>, memref<32 x f32>
   linalg.copy(%subview, %out): memref<16 x f32>, memref<? x f32>
   dealloc %alloc : memref<128 x i8>
   return

From 7f680be593ee1a1dfdaa027add6f75d3a61bf517 Mon Sep 17 00:00:00 2001
From: Giorgis Georgakoudis <georgakoudis1@llnl.gov>
Date: Tue, 14 Jul 2020 23:15:59 -0700
Subject: [PATCH 344/771] [OpenMPOpt][NFC] Update checks for parallel_deletion
 test

---
 .../Transforms/OpenMP/parallel_deletion.ll    | 144 +++++++++++++++++-
 1 file changed, 142 insertions(+), 2 deletions(-)

diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
index cb14cee9a325c..4d2f8e7cbc5e8 100644
--- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
@@ -39,24 +39,47 @@ entry:
 }
 
 define internal void @.omp_outlined.willreturn(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #0
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @unknown() #0
+; CHECK-NEXT:    ret void
+;
 entry:
   call void @unknown() willreturn
   ret void
 }
 
 define internal void @.omp_outlined.willreturn.0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) willreturn {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.0
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #1
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @readonly() #4
+; CHECK-NEXT:    ret void
+;
 entry:
   call void @readonly()
   ret void
 }
 
 define internal void @.omp_outlined.willreturn.1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.1
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #2
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @readnone() #0
+; CHECK-NEXT:    ret void
+;
 entry:
   call void @readnone() willreturn
   ret void
 }
 
 define internal void @.omp_outlined.willreturn.2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.willreturn.2
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
 entry:
   ret void
 }
@@ -90,24 +113,47 @@ entry:
 }
 
 define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined.
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @unknown()
+; CHECK-NEXT:    ret void
+;
 entry:
   call void @unknown()
   ret void
 }
 
 define internal void @.omp_outlined..0(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..0
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #4
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @readonly() #4
+; CHECK-NEXT:    ret void
+;
 entry:
   call void @readonly()
   ret void
 }
 
 define internal void @.omp_outlined..1(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..1
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #5
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    call void @readnone()
+; CHECK-NEXT:    ret void
+;
 entry:
   call void @readnone()
   ret void
 }
 
 define internal void @.omp_outlined..2(i32* noalias %.global_tid., i32* noalias %.bound_tid.) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..2
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]]) #3
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    ret void
+;
 entry:
   ret void
 }
@@ -167,6 +213,20 @@ entry:
 }
 
 define internal void @.omp_outlined..3(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..3
+; CHECK-SAME: (i32* noalias nocapture nofree readnone [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nofree nonnull align 4 dereferenceable(4) [[A:%.*]]) #6
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 @omp_get_thread_num() #4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[CALL]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP]], 1
+; CHECK-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %call = call i32 @omp_get_thread_num()
   %cmp = icmp eq i32 %call, 0
@@ -183,6 +243,22 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 define internal void @.omp_outlined..4(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4
+; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull align 4 dereferenceable(4) [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* nonnull @0, i32 [[TMP]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
+; CHECK:       omp_if.then:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK-NEXT:    call void @__kmpc_end_master(%struct.ident_t* nonnull @0, i32 [[TMP]])
+; CHECK-NEXT:    br label [[OMP_IF_END]]
+; CHECK:       omp_if.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   %tmp = load i32, i32* %.global_tid., align 4
   %tmp1 = call i32 @__kmpc_master(%struct.ident_t* nonnull @0, i32 %tmp)
@@ -209,6 +285,24 @@ declare i32 @__kmpc_master(%struct.ident_t*, i32)
 declare void @__kmpc_end_master(%struct.ident_t*, i32)
 
 define internal void @.omp_outlined..5(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..5
+; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull align 4 dereferenceable(4) [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
+; CHECK-NEXT:    [[TMP:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_single(%struct.ident_t* nonnull @0, i32 [[TMP]])
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT:    br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]]
+; CHECK:       omp_if.then:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[INC:%.*]] = add nsw i32 [[TMP3]], 1
+; CHECK-NEXT:    store i32 [[INC]], i32* [[A]], align 4
+; CHECK-NEXT:    call void @__kmpc_end_single(%struct.ident_t* nonnull @0, i32 [[TMP]])
+; CHECK-NEXT:    br label [[OMP_IF_END]]
+; CHECK:       omp_if.end:
+; CHECK-NEXT:    call void @__kmpc_barrier(%struct.ident_t* nonnull @1, i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %omp_global_thread_num = call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
   %tmp = load i32, i32* %.global_tid., align 4
@@ -229,6 +323,39 @@ omp_if.end:                                       ; preds = %entry, %omp_if.then
 }
 
 define internal void @.omp_outlined..6(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %a) {
+; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..6
+; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull align 4 dereferenceable(4) [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A1:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8
+; CHECK-NEXT:    [[TMP:%.*]] = bitcast i32* [[A1]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull align 4 [[TMP]]) #0
+; CHECK-NEXT:    store i32 1, i32* [[A1]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i32**
+; CHECK-NEXT:    store i32* [[A1]], i32** [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8*
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(%struct.ident_t* nonnull @2, i32 [[TMP2]], i32 1, i64 8, i8* nonnull align 8 [[TMP3]], void (i8*, i8*)* nonnull @.omp.reduction.reduction_func, [8 x i32]* nonnull @.gomp_critical_user_.reduction.var)
+; CHECK-NEXT:    switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [
+; CHECK-NEXT:    i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]]
+; CHECK-NEXT:    i32 2, label [[DOTOMP_REDUCTION_CASE2:%.*]]
+; CHECK-NEXT:    ]
+; CHECK:       .omp.reduction.case1:
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A1]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[A]], align 4
+; CHECK-NEXT:    call void @__kmpc_end_reduce_nowait(%struct.ident_t* nonnull @2, i32 [[TMP2]], [8 x i32]* nonnull @.gomp_critical_user_.reduction.var)
+; CHECK-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+; CHECK:       .omp.reduction.case2:
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[A1]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = atomicrmw add i32* [[A]], i32 [[TMP7]] monotonic
+; CHECK-NEXT:    br label [[DOTOMP_REDUCTION_DEFAULT]]
+; CHECK:       .omp.reduction.default:
+; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[A1]] to i8*
+; CHECK-NEXT:    call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull [[TMP9]])
+; CHECK-NEXT:    ret void
+;
 entry:
   %a1 = alloca i32, align 4
   %.omp.reduction.red_list = alloca [1 x i8*], align 8
@@ -241,8 +368,8 @@ entry:
   %tmp3 = bitcast [1 x i8*]* %.omp.reduction.red_list to i8*
   %tmp4 = call i32 @__kmpc_reduce_nowait(%struct.ident_t* nonnull @2, i32 %tmp2, i32 1, i64 8, i8* nonnull %tmp3, void (i8*, i8*)* nonnull @.omp.reduction.reduction_func, [8 x i32]* nonnull @.gomp_critical_user_.reduction.var)
   switch i32 %tmp4, label %.omp.reduction.default [
-    i32 1, label %.omp.reduction.case1
-    i32 2, label %.omp.reduction.case2
+  i32 1, label %.omp.reduction.case1
+  i32 2, label %.omp.reduction.case2
   ]
 
 .omp.reduction.case1:                             ; preds = %entry
@@ -265,6 +392,19 @@ entry:
 }
 
 define internal void @.omp.reduction.reduction_func(i8* %arg, i8* %arg1) {
+; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
+; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #10
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32**
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[ARG]] to i32**
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[TMP3]], align 8
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[TMP4]], align 4
+; CHECK-NEXT:    ret void
+;
 entry:
   %tmp = bitcast i8* %arg1 to i32**
   %tmp2 = load i32*, i32** %tmp, align 8

From c86c1e972da07ac3417c98e04382a295d4284755 Mon Sep 17 00:00:00 2001
From: Chen Zheng <czhengsz@cn.ibm.com>
Date: Wed, 15 Jul 2020 02:44:17 -0400
Subject: [PATCH 345/771] [IndVarSimplify] Uniformly use emplace_back for
 DeadInsts, nfc

---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index f6a0b6ea46372..0f36c3f772e65 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -2219,7 +2219,7 @@ linearFunctionTestReplace(Loop *L, BasicBlock *ExitingBB,
   // update the branch to use the new comparison; in the common case this
   // will make old comparison dead.
   BI->setCondition(Cond);
-  DeadInsts.push_back(OrigCond);
+  DeadInsts.emplace_back(OrigCond);
 
   ++NumLFTR;
   return true;
@@ -2411,7 +2411,7 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
                                      IsTaken ? ExitIfTrue : !ExitIfTrue);
     BI->setCondition(NewCond);
     if (OldCond->use_empty())
-      DeadInsts.push_back(OldCond);
+      DeadInsts.emplace_back(OldCond);
   };
 
   bool Changed = false;
@@ -2637,7 +2637,7 @@ bool IndVarSimplify::predicateLoopExits(Loop *L, SCEVExpander &Rewriter) {
     Value *OldCond = BI->getCondition();
     BI->setCondition(NewCond);
     if (OldCond->use_empty())
-      DeadInsts.push_back(OldCond);
+      DeadInsts.emplace_back(OldCond);
     Changed = true;
   }
 

From 1919c8bfe8379402401da52d84d5397233cab8b9 Mon Sep 17 00:00:00 2001
From: Stephan Herhut <herhut@google.com>
Date: Tue, 14 Jul 2020 16:23:19 +0200
Subject: [PATCH 346/771] Make linalg::ReshapeOp implement ViewLikeOpInterface

Summary: A reshape aliases its input memref, so it acts like a view.

Differential Revision: https://reviews.llvm.org/D83773
---
 mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td | 7 ++++---
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp         | 2 ++
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
index c58834c28530d..1366e920039bf 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgOps.td
@@ -61,8 +61,8 @@ def Linalg_RangeOp :
   let assemblyFormat = "$min `:` $max `:` $step attr-dict `:` type(results)";
 }
 
-class Linalg_ReshapeLikeOp<string mnemonic> :
-    Linalg_Op<mnemonic, [NoSideEffect]> {
+class Linalg_ReshapeLikeOp<string mnemonic, list<OpTrait> traits = []> :
+    Linalg_Op<mnemonic, !listconcat(traits, [NoSideEffect])> {
   let builders = [
     // Builders for a contracting reshape whose result type is computed from
     // `src` and `reassociation`.
@@ -103,7 +103,8 @@ class Linalg_ReshapeLikeOp<string mnemonic> :
   }];
 }
 
-def Linalg_ReshapeOp : Linalg_ReshapeLikeOp<"reshape">,
+def Linalg_ReshapeOp : Linalg_ReshapeLikeOp<"reshape",
+    [DeclareOpInterfaceMethods<ViewLikeOpInterface>]>,
     Arguments<(ins AnyStridedMemRef:$src, AffineMapArrayAttr:$reassociation)>,
     Results<(outs AnyStridedMemRef:$result)> {
   let summary = "linalg.reshape produces a new view into the operand view";
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 18ea31571aa47..7865add3663d9 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -591,6 +591,8 @@ void mlir::linalg::ReshapeOp::build(OpBuilder &b, OperationState &result,
                       b.getAffineMapArrayAttr(maps));
 }
 
+Value mlir::linalg::ReshapeOp::getViewSource() { return src(); }
+
 // Common verifier for reshape-like types. Fills `expandedType` and
 // `collapsedType` with the proper `src` or `result` type.
 template <typename Op, typename T>

From d431951343cdaa301cbd72743fde8114b93f9d33 Mon Sep 17 00:00:00 2001
From: George Mitenkov <georgemitenk0v@gmail.com>
Date: Wed, 15 Jul 2020 10:02:01 +0300
Subject: [PATCH 347/771] [MLIR][SPIRVToLLVM] SPIRV function fix and nits

This patch addresses the comments from https://reviews.llvm.org/D83030 and
https://reviews.llvm.org/D82639. `this->` is removed when not inside the
template. Also, type conversion for `spv.func` takes `convertRegionTypes()`
in order to apply type conversion on all blocks within the function.

Reviewed By: rriddle

Differential Revision: https://reviews.llvm.org/D83786
---
 .../SPIRVToLLVM/ConvertSPIRVToLLVM.cpp         | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp
index 5820d9022ae1f..b0702912c0fd0 100644
--- a/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp
+++ b/mlir/lib/Conversion/SPIRVToLLVM/ConvertSPIRVToLLVM.cpp
@@ -83,11 +83,12 @@ static IntegerAttr minusOneIntegerAttribute(Type type, Builder builder) {
 /// Creates `llvm.mlir.constant` with all bits set for the given type.
 static Value createConstantAllBitsSet(Location loc, Type srcType, Type dstType,
                                       PatternRewriter &rewriter) {
-  if (srcType.isa<VectorType>())
+  if (srcType.isa<VectorType>()) {
     return rewriter.create<LLVM::ConstantOp>(
         loc, dstType,
         SplatElementsAttr::get(srcType.cast<ShapedType>(),
                                minusOneIntegerAttribute(srcType, rewriter)));
+  }
   return rewriter.create<LLVM::ConstantOp>(
       loc, dstType, minusOneIntegerAttribute(srcType, rewriter));
 }
@@ -239,7 +240,7 @@ class BitFieldInsertPattern
   matchAndRewrite(spirv::BitFieldInsertOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto srcType = op.getType();
-    auto dstType = this->typeConverter.convertType(srcType);
+    auto dstType = typeConverter.convertType(srcType);
     if (!dstType)
       return failure();
     Location loc = op.getLoc();
@@ -328,7 +329,7 @@ class BitFieldSExtractPattern
   matchAndRewrite(spirv::BitFieldSExtractOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto srcType = op.getType();
-    auto dstType = this->typeConverter.convertType(srcType);
+    auto dstType = typeConverter.convertType(srcType);
     if (!dstType)
       return failure();
     Location loc = op.getLoc();
@@ -381,7 +382,7 @@ class BitFieldUExtractPattern
   matchAndRewrite(spirv::BitFieldUExtractOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto srcType = op.getType();
-    auto dstType = this->typeConverter.convertType(srcType);
+    auto dstType = typeConverter.convertType(srcType);
     if (!dstType)
       return failure();
     Location loc = op.getLoc();
@@ -473,7 +474,7 @@ class FunctionCallPattern
     }
 
     // Function returns a single result.
-    auto dstType = this->typeConverter.convertType(callOp.getType(0));
+    auto dstType = typeConverter.convertType(callOp.getType(0));
     rewriter.replaceOpWithNewOp<LLVM::CallOp>(callOp, dstType, operands,
                                               callOp.getAttrs());
     return success();
@@ -638,7 +639,7 @@ class FuncConversionPattern : public SPIRVToLLVMConversion<spirv::FuncOp> {
     auto funcType = funcOp.getType();
     TypeConverter::SignatureConversion signatureConverter(
         funcType.getNumInputs());
-    auto llvmType = this->typeConverter.convertFunctionSignature(
+    auto llvmType = typeConverter.convertFunctionSignature(
         funcOp.getType(), /*isVariadic=*/false, signatureConverter);
     if (!llvmType)
       return failure();
@@ -675,7 +676,10 @@ class FuncConversionPattern : public SPIRVToLLVMConversion<spirv::FuncOp> {
 
     rewriter.inlineRegionBefore(funcOp.getBody(), newFuncOp.getBody(),
                                 newFuncOp.end());
-    rewriter.applySignatureConversion(&newFuncOp.getBody(), signatureConverter);
+    if (failed(rewriter.convertRegionTypes(&newFuncOp.getBody(), typeConverter,
+                                           &signatureConverter))) {
+      return failure();
+    }
     rewriter.eraseOp(funcOp);
     return success();
   }

From 2b6215f188bf862d5a551499764121d91fd6e6ea Mon Sep 17 00:00:00 2001
From: Roger Ferrer Ibanez <roger.ferrer@bsc.es>
Date: Wed, 7 Aug 2019 12:43:07 +0000
Subject: [PATCH 348/771] [NFC] Add tests for boolean comparisons

They currently show that the not equal case may be improved.

See PR42876

Differential Revision: https://reviews.llvm.org/D65801
---
 llvm/test/CodeGen/AArch64/cmp-bool.ll | 45 ++++++++++++++++
 llvm/test/CodeGen/ARM/cmp-bool.ll     | 77 +++++++++++++++++++++++++++
 llvm/test/CodeGen/RISCV/cmp-bool.ll   | 63 ++++++++++++++++++++++
 llvm/test/CodeGen/X86/cmp-bool.ll     | 45 ++++++++++++++++
 4 files changed, 230 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/cmp-bool.ll
 create mode 100644 llvm/test/CodeGen/ARM/cmp-bool.ll
 create mode 100644 llvm/test/CodeGen/RISCV/cmp-bool.ll
 create mode 100644 llvm/test/CodeGen/X86/cmp-bool.ll

diff --git a/llvm/test/CodeGen/AArch64/cmp-bool.ll b/llvm/test/CodeGen/AArch64/cmp-bool.ll
new file mode 100644
index 0000000000000..41c3ddc4528a0
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/cmp-bool.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+define void @bool_eq(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
+; CHECK-LABEL: bool_eq:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    b.eq .LBB0_2
+; CHECK-NEXT:  // %bb.1: // %if.end
+; CHECK-NEXT:    ret
+; CHECK-NEXT:  .LBB0_2: // %if.then
+; CHECK-NEXT:    br x2
+entry:
+  %0 = xor i1 %a, %b
+  br i1 %0, label %if.end, label %if.then
+
+if.then:
+  tail call void %c() #1
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
+; CHECK-LABEL: bool_ne:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    eor w8, w0, w1
+; CHECK-NEXT:    cmp w8, #1 // =1
+; CHECK-NEXT:    b.ne .LBB1_2
+; CHECK-NEXT:  // %bb.1: // %if.then
+; CHECK-NEXT:    br x2
+; CHECK-NEXT:  .LBB1_2: // %if.end
+; CHECK-NEXT:    ret
+entry:
+  %cmp = xor i1 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void %c() #1
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/llvm/test/CodeGen/ARM/cmp-bool.ll b/llvm/test/CodeGen/ARM/cmp-bool.ll
new file mode 100644
index 0000000000000..a3ad9f7b8cabc
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/cmp-bool.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=armv7a < %s | FileCheck %s --check-prefix=ARM
+; RUN: llc -mtriple=armv6m < %s | FileCheck %s --check-prefix=THUMB
+; RUN: llc -mtriple=armv7m < %s | FileCheck %s --check-prefix=THUMB2
+
+define void @bool_eq(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
+; ARM-LABEL: bool_eq:
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    cmp r0, r1
+; ARM-NEXT:    bxne lr
+; ARM-NEXT:    bx r2
+;
+; THUMB-LABEL: bool_eq:
+; THUMB:       @ %bb.0: @ %entry
+; THUMB-NEXT:    push {r7, lr}
+; THUMB-NEXT:    cmp r0, r1
+; THUMB-NEXT:    bne .LBB0_2
+; THUMB-NEXT:  @ %bb.1: @ %if.then
+; THUMB-NEXT:    blx r2
+; THUMB-NEXT:  .LBB0_2: @ %if.end
+; THUMB-NEXT:    pop {r7, pc}
+;
+; THUMB2-LABEL: bool_eq:
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    cmp r0, r1
+; THUMB2-NEXT:    it ne
+; THUMB2-NEXT:    bxne lr
+; THUMB2-NEXT:    bx r2
+entry:
+  %0 = xor i1 %a, %b
+  br i1 %0, label %if.end, label %if.then
+
+if.then:
+  tail call void %c() #1
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
+; ARM-LABEL: bool_ne:
+; ARM:       @ %bb.0: @ %entry
+; ARM-NEXT:    eor r0, r0, r1
+; ARM-NEXT:    cmp r0, #1
+; ARM-NEXT:    bxne lr
+; ARM-NEXT:    bx r2
+;
+; THUMB-LABEL: bool_ne:
+; THUMB:       @ %bb.0: @ %entry
+; THUMB-NEXT:    push {r7, lr}
+; THUMB-NEXT:    eors r0, r1
+; THUMB-NEXT:    cmp r0, #1
+; THUMB-NEXT:    bne .LBB1_2
+; THUMB-NEXT:  @ %bb.1: @ %if.then
+; THUMB-NEXT:    blx r2
+; THUMB-NEXT:  .LBB1_2: @ %if.end
+; THUMB-NEXT:    pop {r7, pc}
+;
+; THUMB2-LABEL: bool_ne:
+; THUMB2:       @ %bb.0: @ %entry
+; THUMB2-NEXT:    eors r0, r1
+; THUMB2-NEXT:    cmp r0, #1
+; THUMB2-NEXT:    it ne
+; THUMB2-NEXT:    bxne lr
+; THUMB2-NEXT:    bx r2
+entry:
+  %cmp = xor i1 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void %c() #1
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/cmp-bool.ll b/llvm/test/CodeGen/RISCV/cmp-bool.ll
new file mode 100644
index 0000000000000..785fcb1eaa89f
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/cmp-bool.ll
@@ -0,0 +1,63 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 < %s | FileCheck --check-prefix=RV32 %s
+; RUN: llc -mtriple=riscv64 < %s | FileCheck --check-prefix=RV64 %s
+
+define void @bool_eq(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
+; RV32-LABEL: bool_eq:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    beq a0, a1, .LBB0_2
+; RV32-NEXT:  # %bb.1: # %if.end
+; RV32-NEXT:    ret
+; RV32-NEXT:  .LBB0_2: # %if.then
+; RV32-NEXT:    jr a2
+;
+; RV64-LABEL: bool_eq:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    beq a0, a1, .LBB0_2
+; RV64-NEXT:  # %bb.1: # %if.end
+; RV64-NEXT:    ret
+; RV64-NEXT:  .LBB0_2: # %if.then
+; RV64-NEXT:    jr a2
+entry:
+  %0 = xor i1 %a, %b
+  br i1 %0, label %if.end, label %if.then
+
+if.then:
+  tail call void %c() #1
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
+; RV32-LABEL: bool_ne:
+; RV32:       # %bb.0: # %entry
+; RV32-NEXT:    xor a0, a0, a1
+; RV32-NEXT:    addi a1, zero, 1
+; RV32-NEXT:    bne a0, a1, .LBB1_2
+; RV32-NEXT:  # %bb.1: # %if.then
+; RV32-NEXT:    jr a2
+; RV32-NEXT:  .LBB1_2: # %if.end
+; RV32-NEXT:    ret
+;
+; RV64-LABEL: bool_ne:
+; RV64:       # %bb.0: # %entry
+; RV64-NEXT:    xor a0, a0, a1
+; RV64-NEXT:    addi a1, zero, 1
+; RV64-NEXT:    bne a0, a1, .LBB1_2
+; RV64-NEXT:  # %bb.1: # %if.then
+; RV64-NEXT:    jr a2
+; RV64-NEXT:  .LBB1_2: # %if.end
+; RV64-NEXT:    ret
+entry:
+  %cmp = xor i1 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void %c() #1
+  br label %if.end
+
+if.end:
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/cmp-bool.ll b/llvm/test/CodeGen/X86/cmp-bool.ll
new file mode 100644
index 0000000000000..400844bbb79f5
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cmp-bool.ll
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=x86_64 < %s | FileCheck %s
+
+define void @bool_eq(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
+; CHECK-LABEL: bool_eq:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %esi, %edi
+; CHECK-NEXT:    je .LBB0_2
+; CHECK-NEXT:  # %bb.1: # %if.end
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB0_2: # %if.then
+; CHECK-NEXT:    jmpq *%rdx # TAILCALL
+entry:
+  %0 = xor i1 %a, %b
+  br i1 %0, label %if.end, label %if.then
+
+if.then:
+  tail call void %c() #1
+  br label %if.end
+
+if.end:
+  ret void
+}
+
+define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
+; CHECK-LABEL: bool_ne:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    xorl %esi, %edi
+; CHECK-NEXT:    cmpb $1, %dil
+; CHECK-NEXT:    jne .LBB1_1
+; CHECK-NEXT:  # %bb.2: # %if.then
+; CHECK-NEXT:    jmpq *%rdx # TAILCALL
+; CHECK-NEXT:  .LBB1_1: # %if.end
+; CHECK-NEXT:    retq
+entry:
+  %cmp = xor i1 %a, %b
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  tail call void %c() #1
+  br label %if.end
+
+if.end:
+  ret void
+}

From 14bc5e149d11766dee21cd679a9794fdf2e9414e Mon Sep 17 00:00:00 2001
From: Roger Ferrer Ibanez <roger.ferrer@bsc.es>
Date: Wed, 7 Aug 2019 15:31:29 +0000
Subject: [PATCH 349/771] [DAGCombiner] Rebuild (setcc x, y, ==) from (xor (xor
 x, y), 1)

The existing code already considered this case. Unfortunately a typo in
the condition prevents it from triggering. Also the existing code, had
it run, forgot to do the folding.

This fixes PR42876.

Differential Revision: https://reviews.llvm.org/D65802
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 21 ++++++++++---------
 llvm/test/CodeGen/AArch64/cmp-bool.ll         |  5 ++---
 llvm/test/CodeGen/ARM/cmp-bool.ll             | 17 +++++++--------
 llvm/test/CodeGen/RISCV/cmp-bool.ll           |  8 ++-----
 llvm/test/CodeGen/WebAssembly/reg-stackify.ll | 18 +++++++---------
 llvm/test/CodeGen/X86/cmp-bool.ll             |  5 ++---
 6 files changed, 31 insertions(+), 43 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index dc9c86264e602..f14b3dba4f318 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -14099,8 +14099,8 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
     }
   }
 
-  // Transform br(xor(x, y)) -> br(x != y)
-  // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+  // Transform (brcond (xor x, y)) -> (brcond (setcc, x, y, ne))
+  // Transform (brcond (xor (xor x, y), -1)) -> (brcond (setcc, x, y, eq))
   if (N.getOpcode() == ISD::XOR) {
     // Because we may call this on a speculatively constructed
     // SimplifiedSetCC Node, we need to simplify this node first.
@@ -14124,16 +14124,17 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
     if (N.getOpcode() != ISD::XOR)
       return N;
 
-    SDNode *TheXor = N.getNode();
-
-    SDValue Op0 = TheXor->getOperand(0);
-    SDValue Op1 = TheXor->getOperand(1);
+    SDValue Op0 = N->getOperand(0);
+    SDValue Op1 = N->getOperand(1);
 
     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
       bool Equal = false;
-      if (isOneConstant(Op0) && Op0.hasOneUse() &&
-          Op0.getOpcode() == ISD::XOR) {
-        TheXor = Op0.getNode();
+      // (brcond (xor (xor x, y), -1)) -> (brcond (setcc x, y, eq))
+      if (isBitwiseNot(N) && Op0.hasOneUse() && Op0.getOpcode() == ISD::XOR &&
+          Op0.getValueType() == MVT::i1) {
+        N = Op0;
+        Op0 = N->getOperand(0);
+        Op1 = N->getOperand(1);
         Equal = true;
       }
 
@@ -14141,7 +14142,7 @@ SDValue DAGCombiner::rebuildSetCC(SDValue N) {
       if (LegalTypes)
         SetCCVT = getSetCCResultType(SetCCVT);
       // Replace the uses of XOR with SETCC
-      return DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1,
+      return DAG.getSetCC(SDLoc(N), SetCCVT, Op0, Op1,
                           Equal ? ISD::SETEQ : ISD::SETNE);
     }
   }
diff --git a/llvm/test/CodeGen/AArch64/cmp-bool.ll b/llvm/test/CodeGen/AArch64/cmp-bool.ll
index 41c3ddc4528a0..907d982a7efd1 100644
--- a/llvm/test/CodeGen/AArch64/cmp-bool.ll
+++ b/llvm/test/CodeGen/AArch64/cmp-bool.ll
@@ -25,9 +25,8 @@ if.end:
 define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
 ; CHECK-LABEL: bool_ne:
 ; CHECK:       // %bb.0: // %entry
-; CHECK-NEXT:    eor w8, w0, w1
-; CHECK-NEXT:    cmp w8, #1 // =1
-; CHECK-NEXT:    b.ne .LBB1_2
+; CHECK-NEXT:    cmp w0, w1
+; CHECK-NEXT:    b.eq .LBB1_2
 ; CHECK-NEXT:  // %bb.1: // %if.then
 ; CHECK-NEXT:    br x2
 ; CHECK-NEXT:  .LBB1_2: // %if.end
diff --git a/llvm/test/CodeGen/ARM/cmp-bool.ll b/llvm/test/CodeGen/ARM/cmp-bool.ll
index a3ad9f7b8cabc..18ef348b9edac 100644
--- a/llvm/test/CodeGen/ARM/cmp-bool.ll
+++ b/llvm/test/CodeGen/ARM/cmp-bool.ll
@@ -41,17 +41,15 @@ if.end:
 define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
 ; ARM-LABEL: bool_ne:
 ; ARM:       @ %bb.0: @ %entry
-; ARM-NEXT:    eor r0, r0, r1
-; ARM-NEXT:    cmp r0, #1
-; ARM-NEXT:    bxne lr
+; ARM-NEXT:    cmp r0, r1
+; ARM-NEXT:    bxeq lr
 ; ARM-NEXT:    bx r2
 ;
 ; THUMB-LABEL: bool_ne:
 ; THUMB:       @ %bb.0: @ %entry
 ; THUMB-NEXT:    push {r7, lr}
-; THUMB-NEXT:    eors r0, r1
-; THUMB-NEXT:    cmp r0, #1
-; THUMB-NEXT:    bne .LBB1_2
+; THUMB-NEXT:    cmp r0, r1
+; THUMB-NEXT:    beq .LBB1_2
 ; THUMB-NEXT:  @ %bb.1: @ %if.then
 ; THUMB-NEXT:    blx r2
 ; THUMB-NEXT:  .LBB1_2: @ %if.end
@@ -59,10 +57,9 @@ define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwi
 ;
 ; THUMB2-LABEL: bool_ne:
 ; THUMB2:       @ %bb.0: @ %entry
-; THUMB2-NEXT:    eors r0, r1
-; THUMB2-NEXT:    cmp r0, #1
-; THUMB2-NEXT:    it ne
-; THUMB2-NEXT:    bxne lr
+; THUMB2-NEXT:    cmp r0, r1
+; THUMB2-NEXT:    it eq
+; THUMB2-NEXT:    bxeq lr
 ; THUMB2-NEXT:    bx r2
 entry:
   %cmp = xor i1 %a, %b
diff --git a/llvm/test/CodeGen/RISCV/cmp-bool.ll b/llvm/test/CodeGen/RISCV/cmp-bool.ll
index 785fcb1eaa89f..01c9c9cae32cf 100644
--- a/llvm/test/CodeGen/RISCV/cmp-bool.ll
+++ b/llvm/test/CodeGen/RISCV/cmp-bool.ll
@@ -33,9 +33,7 @@ if.end:
 define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
 ; RV32-LABEL: bool_ne:
 ; RV32:       # %bb.0: # %entry
-; RV32-NEXT:    xor a0, a0, a1
-; RV32-NEXT:    addi a1, zero, 1
-; RV32-NEXT:    bne a0, a1, .LBB1_2
+; RV32-NEXT:    beq a0, a1, .LBB1_2
 ; RV32-NEXT:  # %bb.1: # %if.then
 ; RV32-NEXT:    jr a2
 ; RV32-NEXT:  .LBB1_2: # %if.end
@@ -43,9 +41,7 @@ define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwi
 ;
 ; RV64-LABEL: bool_ne:
 ; RV64:       # %bb.0: # %entry
-; RV64-NEXT:    xor a0, a0, a1
-; RV64-NEXT:    addi a1, zero, 1
-; RV64-NEXT:    bne a0, a1, .LBB1_2
+; RV64-NEXT:    beq a0, a1, .LBB1_2
 ; RV64-NEXT:  # %bb.1: # %if.then
 ; RV64-NEXT:    jr a2
 ; RV64-NEXT:  .LBB1_2: # %if.end
diff --git a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll
index 24a8caffea93b..80507b52a0bf6 100644
--- a/llvm/test/CodeGen/WebAssembly/reg-stackify.ll
+++ b/llvm/test/CodeGen/WebAssembly/reg-stackify.ll
@@ -112,16 +112,14 @@ define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) {
 ; CHECK-NEXT: i32.const   $push[[L11:[0-9]+]]=, 2{{$}}
 ; CHECK-NEXT: i32.lt_s    $push[[L4:[0-9]+]]=, $3, $pop[[L11]]{{$}}
 ; CHECK-NEXT: i32.xor     $push[[L6:[0-9]+]]=, $pop[[L3]], $pop[[L4]]{{$}}
-; CHECK-NEXT: i32.xor     $push[[L7:[0-9]+]]=, $pop[[L5]], $pop[[L6]]{{$}}
-; CHECK-NEXT: i32.const   $push10=, 1{{$}}
-; CHECK-NEXT: i32.ne      $push8=, $pop7, $pop10{{$}}
-; CHECK-NEXT: br_if       0, $pop8{{$}}
-; CHECK-NEXT: i32.const   $push9=, 0{{$}}
-; CHECK-NEXT: return      $pop9{{$}}
+; CHECK-NEXT: i32.eq      $push7=, $pop[[L5]], $pop[[L6]]{{$}}
+; CHECK-NEXT: br_if       0, $pop7{{$}}
+; CHECK-NEXT: i32.const   $push8=, 0{{$}}
+; CHECK-NEXT: return      $pop8{{$}}
 ; CHECK-NEXT: .LBB{{[0-9]+}}_2:
 ; CHECK-NEXT: end_block{{$}}
-; CHECK-NEXT: i32.const   $push14=, 1{{$}}
-; CHECK-NEXT: return      $pop14{{$}}
+; CHECK-NEXT: i32.const   $push12=, 1{{$}}
+; CHECK-NEXT: return      $pop12{{$}}
 ; NOREGS-LABEL: stack_uses:
 ; NOREGS: .functype stack_uses (i32, i32, i32, i32) -> (i32){{$}}
 ; NOREGS-NEXT: block {{$}}
@@ -139,9 +137,7 @@ define i32 @no_sink_readonly_call(i32 %x, i32 %y, i32* %p) {
 ; NOREGS-NEXT: i32.const   2{{$}}
 ; NOREGS-NEXT: i32.lt_s
 ; NOREGS-NEXT: i32.xor {{$}}
-; NOREGS-NEXT: i32.xor {{$}}
-; NOREGS-NEXT: i32.const   1{{$}}
-; NOREGS-NEXT: i32.ne {{$}}
+; NOREGS-NEXT: i32.eq {{$}}
 ; NOREGS-NEXT: br_if       0{{$}}
 ; NOREGS-NEXT: i32.const   0{{$}}
 ; NOREGS-NEXT: return{{$}}
diff --git a/llvm/test/CodeGen/X86/cmp-bool.ll b/llvm/test/CodeGen/X86/cmp-bool.ll
index 400844bbb79f5..7af03cd7faf4b 100644
--- a/llvm/test/CodeGen/X86/cmp-bool.ll
+++ b/llvm/test/CodeGen/X86/cmp-bool.ll
@@ -25,9 +25,8 @@ if.end:
 define void @bool_ne(i1 zeroext %a, i1 zeroext %b, void ()* nocapture %c) nounwind {
 ; CHECK-LABEL: bool_ne:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xorl %esi, %edi
-; CHECK-NEXT:    cmpb $1, %dil
-; CHECK-NEXT:    jne .LBB1_1
+; CHECK-NEXT:    cmpb %sil, %dil
+; CHECK-NEXT:    je .LBB1_1
 ; CHECK-NEXT:  # %bb.2: # %if.then
 ; CHECK-NEXT:    jmpq *%rdx # TAILCALL
 ; CHECK-NEXT:  .LBB1_1: # %if.end

From de7bf722c23a1ab006bd306165c094669071577f Mon Sep 17 00:00:00 2001
From: Simon Cook <simon.cook@embecosm.com>
Date: Wed, 15 Jul 2020 09:23:35 +0100
Subject: [PATCH 350/771] [RISCV] Add error checking for extensions missing
 separating underscores

Currently if two multi-letter extensions are provided in a -march=
string, the verification code checks the version of the first and
consumes the second, resulting in that part of the architecture
string being ignored. This adds a test that when a version number has
been parsed for an extension, there are no subsequent characters.

Differential Revision: https://reviews.llvm.org/D83819
---
 clang/lib/Driver/ToolChains/Arch/RISCV.cpp | 12 +++++++++++-
 clang/test/Driver/riscv-arch.c             |  4 ++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index 8659ebf17a722..80d12e5aa8daa 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -89,7 +89,7 @@ static bool getExtensionVersion(const Driver &D, const ArgList &Args,
 
   if (Major.size() && In.consume_front("p")) {
     Minor = std::string(In.take_while(isDigit));
-    In = In.substr(Major.size());
+    In = In.substr(Major.size() + 1);
 
     // Expected 'p' to be followed by minor version number.
     if (Minor.empty()) {
@@ -101,6 +101,16 @@ static bool getExtensionVersion(const Driver &D, const ArgList &Args,
     }
   }
 
+  // Expected multi-character extension with version number to have no
+  // subsequent characters (i.e. must either end string or be followed by
+  // an underscore).
+  if (Ext.size() > 1 && In.size()) {
+    std::string Error =
+        "multi-character extensions must be separated by underscores";
+    D.Diag(diag::err_drv_invalid_riscv_ext_arch_name) << MArch << Error << In;
+    return false;
+  }
+
   // If experimental extension, require use of current version number number
   if (auto ExperimentalExtension = isExperimentalExtension(Ext)) {
     if (!Args.hasArg(options::OPT_menable_experimental_extensions)) {
diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index e3062feb7dee0..13d0748a967a0 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -361,6 +361,10 @@
 // RV32-EXPERIMENTAL-ZBB-ZBP: "-target-feature" "+experimental-zbb"
 // RV32-EXPERIMENTAL-ZBB-ZBP: "-target-feature" "+experimental-zbp"
 
+// RUN: %clang -target riscv32-unknown-elf -march=rv32izbb0p92zbp0p92 -menable-experimental-extensions -### %s \
+// RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-EXPERIMENTAL-ZBB-ZBP-UNDERSCORE %s
+// RV32-EXPERIMENTAL-ZBB-ZBP-UNDERSCORE: error: invalid arch name 'rv32izbb0p92zbp0p92', multi-character extensions must be separated by underscores
+
 // RUN: %clang -target riscv32-unknown-elf -march=rv32iv -### %s -c 2>&1 | \
 // RUN:   FileCheck -check-prefix=RV32-EXPERIMENTAL-V-NOFLAG %s
 // RV32-EXPERIMENTAL-V-NOFLAG: error: invalid arch name 'rv32iv'

From 001c78de35cc0637eb58b3d855bc9897acccdc64 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Tue, 14 Jul 2020 14:26:59 +0200
Subject: [PATCH 351/771] [lldb][formatters] Add support for printing
 NSConstantDate and fix distantPast value

Summary:

Certain `NSDate` constructors return a special `NSConstantDate` class which
currently ends up being unformatted as it's not in the list of supported classes
for the NSDate formatter. This patch adds that class to the supported class list
so LLDB produces a summary for it.

One of these special constructors is `[NSDate distantPast]` which returns the
date for `0001-01-01 00:00:00 UTC`. LLDB has a special case for formatting this
date but for some reason we did hardcode the wrong summary string in that
special case. Maybe the summary string was correct back when the code was
written but it isn't correct anymore (`distantPast` isn't actually defined to be
a special date but just some 'a guaranteed temporal boundary.' so maybe someone
changed the value in the last 10 years).

If someone else is wondering why we even have this special case for
`distantPast` but not for the future. The reason seems to be that our date
formatting for really old dates is off by 24 hours. So for example, adding one
second to `distantPast` will cause LLDB to print `0000-12-30 00:00:01 UTC`
(which is 24 hours behind the expected result). So to make our code appear to be
correct it seems we just hardcoded the most common NSDate result from that time
span. I'll replace that logic with a generic solution in a probably more
invasive follow up patch.

I also took the freedom to replace the magic value `-63114076800` with some
constant + documentation. I heard there are some people that don't know from the
top of their head that there are 63114076800 seconds between 1. Jan 0001 and 1.
January 2001 in whatever calendar system NSDate is using.

Reviewers: mib, davide

Reviewed By: mib

Subscribers: JDevlieghere

Differential Revision: https://reviews.llvm.org/D83217
---
 lldb/source/Plugins/Language/ObjC/Cocoa.cpp         | 13 ++++++++++---
 .../TestDataFormatterObjCNSDate.py                  |  3 +++
 .../data-formatter/data-formatter-objc/main.m       |  3 +++
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp
index da910f48e59a2..648fc4adf24fd 100644
--- a/lldb/source/Plugins/Language/ObjC/Cocoa.cpp
+++ b/lldb/source/Plugins/Language/ObjC/Cocoa.cpp
@@ -818,13 +818,14 @@ bool lldb_private::formatters::NSDateSummaryProvider(
   static const ConstString g___NSDate("__NSDate");
   static const ConstString g___NSTaggedDate("__NSTaggedDate");
   static const ConstString g_NSCalendarDate("NSCalendarDate");
+  static const ConstString g_NSConstantDate("NSConstantDate");
 
   if (class_name.IsEmpty())
     return false;
 
   uint64_t info_bits = 0, value_bits = 0;
   if ((class_name == g_NSDate) || (class_name == g___NSDate) ||
-      (class_name == g___NSTaggedDate)) {
+      (class_name == g___NSTaggedDate) || (class_name == g_NSConstantDate)) {
     if (descriptor->GetTaggedPointerInfo(&info_bits, &value_bits)) {
       date_value_bits = ((value_bits << 8) | (info_bits << 4));
       memcpy(&date_value, &date_value_bits, sizeof(date_value_bits));
@@ -850,8 +851,14 @@ bool lldb_private::formatters::NSDateSummaryProvider(
   } else
     return false;
 
-  if (date_value == -63114076800) {
-    stream.Printf("0001-12-30 00:00:00 +0000");
+  // FIXME: It seems old dates are not formatted according to NSDate's calendar
+  // so we hardcode distantPast's value so that it looks like LLDB is doing
+  // the right thing.
+
+  // The relative time in seconds from Cocoa Epoch to [NSDate distantPast].
+  const double RelSecondsFromCocoaEpochToNSDateDistantPast = -63114076800;
+  if (date_value == RelSecondsFromCocoaEpochToNSDateDistantPast) {
+    stream.Printf("0001-01-01 00:00:00 UTC");
     return true;
   }
 
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSDate.py b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSDate.py
index 61394c05f5d56..cdce4798e9863 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSDate.py
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/TestDataFormatterObjCNSDate.py
@@ -67,3 +67,6 @@ def nsdate_data_formatter_commands(self):
             substrs=[
                 '(CFMutableBitVectorRef) mut_bv = ',
                 '1110 0110 1011 0000 1101 1010 1000 1111 0011 0101 1101 0001 00'])
+
+        self.expect_expr("distant_past", result_summary="0001-01-01 00:00:00 UTC")
+        self.expect_expr("distant_future", result_summary="4001-01-01 00:00:00 UTC")
diff --git a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m
index a44a7837f7713..169b3aed4f222 100644
--- a/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m
+++ b/lldb/test/API/functionalities/data-formatter/data-formatter-objc/main.m
@@ -663,6 +663,9 @@ int main(int argc, const char *argv[]) {
   NSDate *date_1970_plus_05 = [NSDate dateWithTimeIntervalSince1970:0.5];
   NSDate *date_1970_plus_04 = [NSDate dateWithTimeIntervalSince1970:0.4];
 
+  NSDate *distant_past = [NSDate distantPast];
+  NSDate *distant_future = [NSDate distantFuture];
+
   CFAbsoluteTime date1_abs = CFDateGetAbsoluteTime(date1);
   CFAbsoluteTime date2_abs = CFDateGetAbsoluteTime(date2);
   CFAbsoluteTime date3_abs = CFDateGetAbsoluteTime(date3);

From fe5912249efa1ec5e6aa6e565f722dd4d33d1e54 Mon Sep 17 00:00:00 2001
From: David Spickett <david.spickett@linaro.org>
Date: Thu, 2 Jul 2020 16:04:06 +0100
Subject: [PATCH 352/771] [clang][Driver] Fix tool path priority test failures

Summary:
Failure type 1:
This test can fail when the path of the build includes the strings
we're checking for. E.g "/gcc" is found in ".../gcc_7.3.0/..."

To correct this look for '"' on the end of all matches. So that we
only match the end of paths printed by clang -###.
(which would be ".../gcc_7.3.0/.../gcc" for the example)

Also look for other gcc names like gcc-x.y.z in the first check.
This confirms that the copy of clang we made is isolated as expected.

Failure type 2:
If you use a triple like "powerpc64le-linux-gnu" clang actually reports
"powerpc64le-unknown-linux-gnu". Then it searches for the
former.

That combined with Mac OS adding a version number to cmake's triple
means we can't trust cmake or clang to give us the one default triple.
To fix the test, write to both names. As they don't overlap with our
fake triple, we're still showing that the lookup works.

Reviewers: MaskRay, stevewan

Reviewed By: stevewan

Subscribers: miyuki, JDevlieghere, steven.zhang, stevewan, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83055
---
 clang/test/Driver/program-path-priority.c | 74 +++++++++++++++--------
 clang/test/lit.cfg.py                     |  2 +
 2 files changed, 51 insertions(+), 25 deletions(-)

diff --git a/clang/test/Driver/program-path-priority.c b/clang/test/Driver/program-path-priority.c
index ba893e7e2e2cd..9f1109f530c65 100644
--- a/clang/test/Driver/program-path-priority.c
+++ b/clang/test/Driver/program-path-priority.c
@@ -13,6 +13,11 @@
 /// so only name priority is accounted for, unless we fail to find
 /// anything at all in the prefix.
 
+/// Note: All matches are expected to be at the end of file paths.
+/// So we match " on the end to account for build systems that
+/// put the name of the compiler in the build path.
+/// E.g. /build/gcc_X.Y.Z/0/...
+
 /// Symlink clang to a new dir which will be its
 /// "program path" for these tests
 // RUN: rm -rf %t && mkdir -p %t
@@ -21,14 +26,18 @@
 /// No gccs at all, nothing is found
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=NO_NOTREAL_GCC %s
-// NO_NOTREAL_GCC-NOT: notreal-none-elf-gcc
-// NO_NOTREAL_GCC-NOT: /gcc
+// NO_NOTREAL_GCC-NOT: notreal-none-elf-gcc"
+/// Some systems will have "gcc-x.y.z" so for this first check
+/// make sure we don't find "gcc" or "gcc-x.y.z". If we do find either
+/// then there is no point continuing as this copy of clang is not
+/// isolated as we expected.
+// NO_NOTREAL_GCC-NOT: {{/gcc[^/]*"}}
 
 /// <triple>-gcc in program path is found
 // RUN: touch %t/notreal-none-elf-gcc && chmod +x %t/notreal-none-elf-gcc
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=PROG_PATH_NOTREAL_GCC %s
-// PROG_PATH_NOTREAL_GCC: notreal-none-elf-gcc
+// PROG_PATH_NOTREAL_GCC: notreal-none-elf-gcc"
 
 /// <triple>-gcc on the PATH is found
 // RUN: mkdir -p %t/env
@@ -36,74 +45,89 @@
 // RUN: touch %t/env/notreal-none-elf-gcc && chmod +x %t/env/notreal-none-elf-gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=ENV_PATH_NOTREAL_GCC %s
-// ENV_PATH_NOTREAL_GCC: env/notreal-none-elf-gcc
+// ENV_PATH_NOTREAL_GCC: env/notreal-none-elf-gcc"
 
 /// <triple>-gcc in program path is preferred to one on the PATH
 // RUN: touch %t/notreal-none-elf-gcc && chmod +x %t/notreal-none-elf-gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=BOTH_NOTREAL_GCC %s
-// BOTH_NOTREAL_GCC: notreal-none-elf-gcc
-// BOTH_NOTREAL_GCC-NOT: env/notreal-none-elf-gcc
+// BOTH_NOTREAL_GCC: notreal-none-elf-gcc"
+// BOTH_NOTREAL_GCC-NOT: env/notreal-none-elf-gcc"
 
 /// On program path, <triple>-gcc is preferred to plain gcc
 // RUN: touch %t/gcc && chmod +x %t/gcc
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=NOTREAL_GCC_PREFERRED %s
-// NOTREAL_GCC_PREFERRED: notreal-none-elf-gcc
-// NOTREAL_GCC_PREFERRED-NOT: /gcc
+// NOTREAL_GCC_PREFERRED: notreal-none-elf-gcc"
+// NOTREAL_GCC_PREFERRED-NOT: /gcc"
 
 /// <triple>-gcc on the PATH is preferred to gcc in program path
 // RUN: rm %t/notreal-none-elf-gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=NOTREAL_PATH_OVER_GCC_PROG %s
-// NOTREAL_PATH_OVER_GCC_PROG: env/notreal-none-elf-gcc
-// NOTREAL_PATH_OVER_GCC_PROG-NOT: /gcc
+// NOTREAL_PATH_OVER_GCC_PROG: env/notreal-none-elf-gcc"
+// NOTREAL_PATH_OVER_GCC_PROG-NOT: /gcc"
 
 /// <triple>-gcc on the PATH is preferred to gcc on the PATH
 // RUN: rm %t/gcc
 // RUN: touch %t/env/gcc && chmod +x %t/env/gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=NOTREAL_PATH_OVER_GCC_PATH %s
-// NOTREAL_PATH_OVER_GCC_PATH: env/notreal-none-elf-gcc
-// NOTREAL_PATH_OVER_GCC_PATH-NOT: /gcc
+// NOTREAL_PATH_OVER_GCC_PATH: env/notreal-none-elf-gcc"
+// NOTREAL_PATH_OVER_GCC_PATH-NOT: /gcc"
+
+/// We cannot trust clang --version, or cmake's LLVM_DEFAULT_TARGET_TRIPLE
+/// to give us the one and only default triple.
+/// Can't trust cmake because on Darwin, triples have a verison appended to them.
+/// (and clang uses the versioned string to search)
+/// Can't trust --version because it will pad 3 item triples to 4 e.g.
+/// powerpc64le-linux-gnu -> powerpc64le-unknown-linux-gnu
+/// (and clang uses the former to search)
+/// So we write to both names which is a bit odd but still proves that the
+/// lookup is working.
 
 /// <default-triple>-gcc has lowest priority so <triple>-gcc
 /// on PATH beats default triple in program path
-/// Darwin triples have a version appended to them, even if set via
-/// LLVM_DEFAULT_TARGET_TRIPLE. So the only way to know for sure is to ask clang.
 // RUN: DEFAULT_TRIPLE=`%t/clang --version | grep "Target:" | cut -d ' ' -f2`
 // RUN: touch %t/$DEFAULT_TRIPLE-gcc && chmod +x %t/$DEFAULT_TRIPLE-gcc
+// RUN: touch %t/%target_triple-gcc && chmod +x %t/%target_triple-gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=DEFAULT_TRIPLE_GCC %s
-// DEFAULT_TRIPLE_GCC: env/notreal-none-elf-gcc
+// DEFAULT_TRIPLE_GCC: env/notreal-none-elf-gcc"
 
 /// plain gcc on PATH beats default triple in program path
 // RUN: rm %t/env/notreal-none-elf-gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=DEFAULT_TRIPLE_NO_NOTREAL %s
-// DEFAULT_TRIPLE_NO_NOTREAL: env/gcc
-// DEFAULT_TRIPLE_NO_NOTREAL-NOT: -gcc
+// DEFAULT_TRIPLE_NO_NOTREAL: env/gcc"
+// DEFAULT_TRIPLE_NO_NOTREAL-NOT: -gcc"
 
 /// default triple only chosen when no others are present
 // RUN: rm %t/env/gcc
 // RUN: env "PATH=%t/env/" %t/clang -### -target notreal-none-elf %s 2>&1 | \
 // RUN:   FileCheck --check-prefix=DEFAULT_TRIPLE_NO_OTHERS %s
-// DEFAULT_TRIPLE_NO_OTHERS: -gcc
-// DEFAULT_TRIPLE_NO_OTHERS-NOT: notreal-none-elf-gcc
-// DEFAULT_TRIPLE_NO_OTHERS-NOT: /gcc
+// DEFAULT_TRIPLE_NO_OTHERS: -gcc"
+// DEFAULT_TRIPLE_NO_OTHERS-NOT: notreal-none-elf-gcc"
+// DEFAULT_TRIPLE_NO_OTHERS-NOT: /gcc"
 
 /// -B paths are searched separately so default triple will win
 /// if put in one of those even if other paths have higher priority names
 // RUN: mkdir -p %t/prefix
-// RUN: mv %t/$DEFAULT_TRIPLE-gcc %t/prefix
+/// One of these will fail when $DEFAULT_TRIPLE == %target_triple
+// RUN: test -f %t/$DEFAULT_TRIPLE-gcc && \
+// RUN:   mv %t/$DEFAULT_TRIPLE-gcc %t/prefix || true
+// RUN: test -f %t/%target_triple-gcc && \
+// RUN:   mv %t/%target_triple-gcc %t/prefix || true
 // RUN: touch %t/notreal-none-elf-gcc && chmod +x %t/notreal-none-elf-gcc
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s -B %t/prefix 2>&1 | \
 // RUN:   FileCheck --check-prefix=DEFAULT_TRIPLE_IN_PREFIX %s
-// DEFAULT_TRIPLE_IN_PREFIX: prefix/{{.*}}-gcc
-// DEFAULT_TRIPLE_IN_PREFIX-NOT: notreal-none-elf-gcc
+// DEFAULT_TRIPLE_IN_PREFIX: prefix/{{.*}}-gcc"
+// DEFAULT_TRIPLE_IN_PREFIX-NOT: notreal-none-elf-gcc"
 
 /// Only if there is nothing in the prefix will we search other paths
-// RUN: rm %t/prefix/$DEFAULT_TRIPLE-gcc
+/// -f in case $DEFAULT_TRIPLE == %target_triple
+// RUN: rm -f %t/prefix/$DEFAULT_TRIPLE-gcc
+// RUN: rm -f %t/prefix/%target_triple-gcc
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s -B %t/prefix 2>&1 | \
 // RUN:   FileCheck --check-prefix=EMPTY_PREFIX_DIR %s
-// EMPTY_PREFIX_DIR: notreal-none-elf-gcc
+// EMPTY_PREFIX_DIR: notreal-none-elf-gcc"
diff --git a/clang/test/lit.cfg.py b/clang/test/lit.cfg.py
index ade32988b9a82..dacda6894a045 100644
--- a/clang/test/lit.cfg.py
+++ b/clang/test/lit.cfg.py
@@ -46,6 +46,8 @@
 config.substitutions.append(
     ('%src_include_dir', config.clang_src_dir + '/include'))
 
+config.substitutions.append(
+    ('%target_triple', config.target_triple))
 
 # Propagate path to symbolizer for ASan/MSan.
 llvm_config.with_system_environment(

From 5165b2b5fd5fd62c5a34970be81c79231844804c Mon Sep 17 00:00:00 2001
From: Tim Northover <t.p.northover@gmail.com>
Date: Wed, 15 Jul 2020 09:11:36 +0100
Subject: [PATCH 353/771] AArch64+ARM: make LLVM consider system registers
 volatile.

Some of the system registers readable on AArch64 and ARM platforms
return different values with each read (for example a timer counter),
these shouldn't be hoisted outside loops or otherwise interfered with,
but the normal @llvm.read_register intrinsic is only considered to read
memory.

This introduces a separate @llvm.read_volatile_register intrinsic and
maps all system-registers on ARM platforms to use it for the
__builtin_arm_rsr calls. Registers declared with asm("r9") or similar
are unaffected.
---
 clang/lib/CodeGen/CGBuiltin.cpp               | 42 +++++++++++++------
 clang/test/CodeGen/builtins-arm.c             |  6 +--
 clang/test/CodeGen/builtins-arm64.c           |  6 +--
 llvm/docs/LangRef.rst                         | 24 +++++++----
 llvm/include/llvm/IR/Intrinsics.td            |  3 ++
 llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp  |  1 +
 .../SelectionDAG/SelectionDAGBuilder.cpp      |  1 +
 .../Transforms/LICM/read-volatile-register.ll | 30 +++++++++++++
 8 files changed, 86 insertions(+), 27 deletions(-)
 create mode 100644 llvm/test/Transforms/LICM/read-volatile-register.ll

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 35a93a7889f40..34f4f21746f7f 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6361,6 +6361,12 @@ Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
                             llvm::ConstantInt::get(Int32Ty, Value));
 }
 
+enum SpecialRegisterAccessKind {
+  NormalRead,
+  VolatileRead,
+  Write,
+};
+
 // Generates the IR for the read/write special register builtin,
 // ValueType is the type of the value that is to be written or read,
 // RegisterType is the type of the register being written to or read from.
@@ -6368,7 +6374,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
                                          const CallExpr *E,
                                          llvm::Type *RegisterType,
                                          llvm::Type *ValueType,
-                                         bool IsRead,
+                                         SpecialRegisterAccessKind AccessKind,
                                          StringRef SysReg = "") {
   // write and register intrinsics only support 32 and 64 bit operations.
   assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
@@ -6393,8 +6399,12 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
   assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
             && "Can't fit 64-bit value in 32-bit register");
 
-  if (IsRead) {
-    llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
+  if (AccessKind != Write) {
+    assert(AccesKind == NormalRead || AccessKind == VolatileRead);
+    llvm::Function *F = CGM.getIntrinsic(
+        AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
+                                   : llvm::Intrinsic::read_register,
+        Types);
     llvm::Value *Call = Builder.CreateCall(F, Metadata);
 
     if (MixedTypes)
@@ -6773,9 +6783,11 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       BuiltinID == ARM::BI__builtin_arm_wsr64 ||
       BuiltinID == ARM::BI__builtin_arm_wsrp) {
 
-    bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
-                  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
-                  BuiltinID == ARM::BI__builtin_arm_rsrp;
+    SpecialRegisterAccessKind AccessKind = Write;
+    if (BuiltinID == ARM::BI__builtin_arm_rsr ||
+        BuiltinID == ARM::BI__builtin_arm_rsr64 ||
+        BuiltinID == ARM::BI__builtin_arm_rsrp)
+      AccessKind = VolatileRead;
 
     bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
                             BuiltinID == ARM::BI__builtin_arm_wsrp;
@@ -6794,7 +6806,8 @@ Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
       ValueType = RegisterType = Int32Ty;
     }
 
-    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
+    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
+                                      AccessKind);
   }
 
   // Deal with MVE builtins
@@ -8834,9 +8847,11 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
       BuiltinID == AArch64::BI__builtin_arm_wsrp) {
 
-    bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
-                  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
-                  BuiltinID == AArch64::BI__builtin_arm_rsrp;
+    SpecialRegisterAccessKind AccessKind = Write;
+    if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
+        BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
+        BuiltinID == AArch64::BI__builtin_arm_rsrp)
+      AccessKind = VolatileRead;
 
     bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
                             BuiltinID == AArch64::BI__builtin_arm_wsrp;
@@ -8854,7 +8869,8 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
       ValueType = Int32Ty;
     }
 
-    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
+    return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType,
+                                      AccessKind);
   }
 
   if (BuiltinID == AArch64::BI_ReadStatusReg ||
@@ -14797,7 +14813,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
   }
   case AMDGPU::BI__builtin_amdgcn_read_exec: {
     CallInst *CI = cast<CallInst>(
-      EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
+      EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, NormalRead, "exec"));
     CI->setConvergent();
     return CI;
   }
@@ -14806,7 +14822,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
     StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
       "exec_lo" : "exec_hi";
     CallInst *CI = cast<CallInst>(
-      EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
+      EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, NormalRead, RegName));
     CI->setConvergent();
     return CI;
   }
diff --git a/clang/test/CodeGen/builtins-arm.c b/clang/test/CodeGen/builtins-arm.c
index f3c4ecaeee903..98e4621971b71 100644
--- a/clang/test/CodeGen/builtins-arm.c
+++ b/clang/test/CodeGen/builtins-arm.c
@@ -222,19 +222,19 @@ uint64_t mrrc2() {
 }
 
 unsigned rsr() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i32 @llvm.read_register.i32(metadata ![[M0:.*]])
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i32 @llvm.read_volatile_register.i32(metadata ![[M0:.*]])
   // CHECK-NEXT: ret i32 [[V0]]
   return __builtin_arm_rsr("cp1:2:c3:c4:5");
 }
 
 unsigned long long rsr64() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_register.i64(metadata ![[M1:.*]])
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_volatile_register.i64(metadata ![[M1:.*]])
   // CHECK-NEXT: ret i64 [[V0]]
   return __builtin_arm_rsr64("cp1:2:c3");
 }
 
 void *rsrp() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i32 @llvm.read_register.i32(metadata ![[M2:.*]])
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i32 @llvm.read_volatile_register.i32(metadata ![[M2:.*]])
   // CHECK-NEXT: [[V1:[%A-Za-z0-9.]+]] = inttoptr i32 [[V0]] to i8*
   // CHECK-NEXT: ret i8* [[V1]]
   return __builtin_arm_rsrp("sysreg");
diff --git a/clang/test/CodeGen/builtins-arm64.c b/clang/test/CodeGen/builtins-arm64.c
index f5cf997e52266..35dbb09ea7ffe 100644
--- a/clang/test/CodeGen/builtins-arm64.c
+++ b/clang/test/CodeGen/builtins-arm64.c
@@ -68,7 +68,7 @@ int32_t jcvt(double v) {
 __typeof__(__builtin_arm_rsr("1:2:3:4:5")) rsr(void);
 
 uint32_t rsr() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_volatile_register.i64(metadata ![[M0:[0-9]]])
   // CHECK-NEXT: trunc i64 [[V0]] to i32
   return __builtin_arm_rsr("1:2:3:4:5");
 }
@@ -76,12 +76,12 @@ uint32_t rsr() {
 __typeof__(__builtin_arm_rsr64("1:2:3:4:5")) rsr64(void);
 
 uint64_t rsr64(void) {
-  // CHECK: call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
+  // CHECK: call i64 @llvm.read_volatile_register.i64(metadata ![[M0:[0-9]]])
   return __builtin_arm_rsr64("1:2:3:4:5");
 }
 
 void *rsrp() {
-  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_register.i64(metadata ![[M0:[0-9]]])
+  // CHECK: [[V0:[%A-Za-z0-9.]+]] = call i64 @llvm.read_volatile_register.i64(metadata ![[M0:[0-9]]])
   // CHECK-NEXT: inttoptr i64 [[V0]] to i8*
   return __builtin_arm_rsrp("1:2:3:4:5");
 }
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 8bb808a3256cc..9b58b9dfb1714 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -11643,9 +11643,11 @@ the escaped allocas are allocated, which would break attempts to use
 '``llvm.localrecover``'.
 
 .. _int_read_register:
+.. _int_read_volatile_register:
 .. _int_write_register:
 
-'``llvm.read_register``' and '``llvm.write_register``' Intrinsics
+'``llvm.read_register``', '``llvm.read_volatile_register``', and
+'``llvm.write_register``' Intrinsics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Syntax:
@@ -11655,6 +11657,8 @@ Syntax:
 
       declare i32 @llvm.read_register.i32(metadata)
       declare i64 @llvm.read_register.i64(metadata)
+      declare i32 @llvm.read_volatile_register.i32(metadata)
+      declare i64 @llvm.read_volatile_register.i64(metadata)
       declare void @llvm.write_register.i32(metadata, i32 @value)
       declare void @llvm.write_register.i64(metadata, i64 @value)
       !0 = !{!"sp\00"}
@@ -11662,17 +11666,21 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.read_register``' and '``llvm.write_register``' intrinsics
-provides access to the named register. The register must be valid on
-the architecture being compiled to. The type needs to be compatible
-with the register being read.
+The '``llvm.read_register``', '``llvm.read_volatile_register``', and
+'``llvm.write_register``' intrinsics provide access to the named register.
+The register must be valid on the architecture being compiled to. The type
+needs to be compatible with the register being read.
 
 Semantics:
 """"""""""
 
-The '``llvm.read_register``' intrinsic returns the current value of the
-register, where possible. The '``llvm.write_register``' intrinsic sets
-the current value of the register, where possible.
+The '``llvm.read_register``' and '``llvm.read_volatile_register``' intrinsics
+return the current value of the register, where possible. The
+'``llvm.write_register``' intrinsic sets the current value of the register,
+where possible.
+
+A call to '``llvm.read_volatile_register``' is assumed to have side-effects
+and possibly return a different value each time (e.g. for a timer register).
 
 This is useful to implement named register global variables that need
 to always be mapped to a specific register, as is common practice on
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 9b14a07eb7b99..4918ea876df65 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -458,6 +458,9 @@ def int_read_register  : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
                                    [IntrReadMem], "llvm.read_register">;
 def int_write_register : Intrinsic<[], [llvm_metadata_ty, llvm_anyint_ty],
                                    [], "llvm.write_register">;
+def int_read_volatile_register  : Intrinsic<[llvm_anyint_ty], [llvm_metadata_ty],
+                                            [IntrHasSideEffects],
+                                             "llvm.read_volatile_register">;
 
 // Gets the address of the local variable area. This is typically a copy of the
 // stack, frame, or base pointer depending on the type of prologue.
diff --git a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
index bbdefe3e5ca4d..8f6643b2f1935 100644
--- a/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/IRTranslator.cpp
@@ -1598,6 +1598,7 @@ bool IRTranslator::translateKnownIntrinsic(const CallInst &CI, Intrinsic::ID ID,
   case Intrinsic::sideeffect:
     // Discard annotate attributes, assumptions, and artificial side-effects.
     return true;
+  case Intrinsic::read_volatile_register:
   case Intrinsic::read_register: {
     Value *Arg = CI.getArgOperand(0);
     MIRBuilder
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index c8b72abb9b7d6..1d596c89c9113 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5698,6 +5698,7 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                              TLI.getFrameIndexTy(DAG.getDataLayout()),
                              getValue(I.getArgOperand(0))));
     return;
+  case Intrinsic::read_volatile_register:
   case Intrinsic::read_register: {
     Value *Reg = I.getArgOperand(0);
     SDValue Chain = getRoot();
diff --git a/llvm/test/Transforms/LICM/read-volatile-register.ll b/llvm/test/Transforms/LICM/read-volatile-register.ll
new file mode 100644
index 0000000000000..1836d3762b3bd
--- /dev/null
+++ b/llvm/test/Transforms/LICM/read-volatile-register.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -licm %s | FileCheck %s
+
+; Volatile register shouldn't be hoisted ourside loops.
+define i32 @test_read() {
+; CHECK-LABEL: define i32 @test_read()
+; CHECK:     br label %loop
+; CHECK: loop:
+; CHECK:     %counter = tail call i64 @llvm.read_volatile_register
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 0, %entry ], [ %i.next, %inc ]
+  %counter = tail call i64 @llvm.read_volatile_register.i64(metadata !1)
+  %tst = icmp ult i64 %counter, 1000
+  br i1 %tst, label %inc, label %done
+
+inc:
+  %i.next = add nuw nsw i32 %i, 1
+  br label %loop
+
+done:
+  ret i32 %i
+}
+
+declare i64 @llvm.read_register.i64(metadata)
+declare i64 @llvm.read_volatile_register.i64(metadata)
+
+!1 = !{!"cntpct_el0"}

From 9697a9e2d316f0d9d588f4de536b0a6bbef2810f Mon Sep 17 00:00:00 2001
From: Tim Northover <t.p.northover@gmail.com>
Date: Wed, 15 Jul 2020 09:57:53 +0100
Subject: [PATCH 354/771] Fix typo in identifier in assert.

---
 clang/lib/CodeGen/CGBuiltin.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 34f4f21746f7f..8994b939093e5 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -6400,7 +6400,7 @@ static Value *EmitSpecialRegisterBuiltin(CodeGenFunction &CGF,
             && "Can't fit 64-bit value in 32-bit register");
 
   if (AccessKind != Write) {
-    assert(AccesKind == NormalRead || AccessKind == VolatileRead);
+    assert(AccessKind == NormalRead || AccessKind == VolatileRead);
     llvm::Function *F = CGM.getIntrinsic(
         AccessKind == VolatileRead ? llvm::Intrinsic::read_volatile_register
                                    : llvm::Intrinsic::read_register,

From f782d9c7002edaaf56c06a6cc1775f8f67713a29 Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Wed, 15 Jul 2020 11:03:11 +0200
Subject: [PATCH 355/771] [clangd] Fix use-after-free in ArgStripper

---
 clang-tools-extra/clangd/CompileCommands.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/CompileCommands.h b/clang-tools-extra/clangd/CompileCommands.h
index 84c4c2a26a872..3efd80026cf6b 100644
--- a/clang-tools-extra/clangd/CompileCommands.h
+++ b/clang-tools-extra/clangd/CompileCommands.h
@@ -12,6 +12,7 @@
 #include "clang/Tooling/ArgumentsAdjusters.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "llvm/ADT/StringMap.h"
+#include <deque>
 #include <string>
 #include <vector>
 
@@ -92,7 +93,7 @@ class ArgStripper {
   const Rule *matchingRule(llvm::StringRef Arg, unsigned Mode,
                            unsigned &ArgCount) const;
   llvm::SmallVector<Rule, 4> Rules;
-  std::vector<std::string> Storage; // Store strings not found in option table.
+  std::deque<std::string> Storage; // Store strings not found in option table.
 };
 
 } // namespace clangd

From f6eb5daa16368fb90d5a59b14a72bfd8ddbcd2a0 Mon Sep 17 00:00:00 2001
From: Andrew Ng <andrew.ng@sony.com>
Date: Mon, 13 Jul 2020 13:36:33 +0100
Subject: [PATCH 356/771] [Support] Fix Windows directory_iterator_construct
 out of bounds

Fix incorrect use of the size of Path when accessing PathUTF16, as the
UTF-16 path can be shorter. Added unit test for coverage of this test
case.

Thanks to Ding Fei (danix800) for the code fix, see
https://reviews.llvm.org/D83321.

Differential Revision: https://reviews.llvm.org/D83689
---
 llvm/lib/Support/Windows/Path.inc |  6 +++---
 llvm/unittests/Support/Path.cpp   | 33 +++++++++++++++++++++++++++++++
 2 files changed, 36 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/Support/Windows/Path.inc b/llvm/lib/Support/Windows/Path.inc
index 399a0cc7a25c5..e352beb77616b 100644
--- a/llvm/lib/Support/Windows/Path.inc
+++ b/llvm/lib/Support/Windows/Path.inc
@@ -957,9 +957,9 @@ std::error_code detail::directory_iterator_construct(detail::DirIterState &IT,
     return EC;
 
   // Convert path to the format that Windows is happy with.
-  if (PathUTF16.size() > 0 &&
-      !is_separator(PathUTF16[Path.size() - 1]) &&
-      PathUTF16[Path.size() - 1] != L':') {
+  size_t PathUTF16Len = PathUTF16.size();
+  if (PathUTF16Len > 0 && !is_separator(PathUTF16[PathUTF16Len - 1]) &&
+      PathUTF16[PathUTF16Len - 1] != L':') {
     PathUTF16.push_back(L'\\');
     PathUTF16.push_back(L'*');
   } else {
diff --git a/llvm/unittests/Support/Path.cpp b/llvm/unittests/Support/Path.cpp
index 6a228f0939873..19ff49d254263 100644
--- a/llvm/unittests/Support/Path.cpp
+++ b/llvm/unittests/Support/Path.cpp
@@ -1149,6 +1149,39 @@ TEST_F(FileSystemTest, BrokenSymlinkDirectoryIteration) {
 }
 #endif
 
+#ifdef _WIN32
+TEST_F(FileSystemTest, UTF8ToUTF16DirectoryIteration) {
+  // The Windows filesystem support uses UTF-16 and converts paths from the
+  // input UTF-8. The UTF-16 equivalent of the input path can be shorter in
+  // length.
+
+  // This test relies on TestDirectory not being so long such that MAX_PATH
+  // would be exceeded (see widenPath). If that were the case, the UTF-16
+  // path is likely to be longer than the input.
+  const char *Pi = "\xcf\x80"; // UTF-8 lower case pi.
+  std::string RootDir = (TestDirectory + "/" + Pi).str();
+
+  // Create test directories.
+  ASSERT_NO_ERROR(fs::create_directories(Twine(RootDir) + "/a"));
+  ASSERT_NO_ERROR(fs::create_directories(Twine(RootDir) + "/b"));
+
+  std::error_code EC;
+  unsigned Count = 0;
+  for (fs::directory_iterator I(Twine(RootDir), EC), E; I != E;
+       I.increment(EC)) {
+    ASSERT_NO_ERROR(EC);
+    StringRef DirName = path::filename(I->path());
+    EXPECT_TRUE(DirName == "a" || DirName == "b");
+    ++Count;
+  }
+  EXPECT_EQ(Count, 2U);
+
+  ASSERT_NO_ERROR(fs::remove(Twine(RootDir) + "/a"));
+  ASSERT_NO_ERROR(fs::remove(Twine(RootDir) + "/b"));
+  ASSERT_NO_ERROR(fs::remove(Twine(RootDir)));
+}
+#endif
+
 TEST_F(FileSystemTest, Remove) {
   SmallString<64> BaseDir;
   SmallString<64> Paths[4];

From 327c4450353309ea97cbd9f56b46f099f3b0a1f9 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Tue, 14 Jul 2020 17:14:06 +0300
Subject: [PATCH 357/771] [llvm-readobj] - Verify the location of program
 headers better.

This improves condition in the ELFFile::program_headers().
Previously if was possible to read the headers from the wrong place when
the value of e_phoff was so large that computation overflowed.

Differential revision: https://reviews.llvm.org/D83774
---
 llvm/include/llvm/Object/ELF.h                | 12 ++++---
 .../tools/llvm-readobj/ELF/gnu-phdrs.test     | 31 +++++++++++++++++--
 2 files changed, 36 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Object/ELF.h b/llvm/include/llvm/Object/ELF.h
index c7f2a8e709f0b..b44dd3f486614 100644
--- a/llvm/include/llvm/Object/ELF.h
+++ b/llvm/include/llvm/Object/ELF.h
@@ -205,16 +205,18 @@ class ELFFile {
     if (getHeader()->e_phnum && getHeader()->e_phentsize != sizeof(Elf_Phdr))
       return createError("invalid e_phentsize: " +
                          Twine(getHeader()->e_phentsize));
-    if (getHeader()->e_phoff +
-            (getHeader()->e_phnum * getHeader()->e_phentsize) >
-        getBufSize())
+
+    uint64_t HeadersSize =
+        (uint64_t)getHeader()->e_phnum * getHeader()->e_phentsize;
+    uint64_t PhOff = getHeader()->e_phoff;
+    if (PhOff + HeadersSize < PhOff || PhOff + HeadersSize > getBufSize())
       return createError("program headers are longer than binary of size " +
                          Twine(getBufSize()) + ": e_phoff = 0x" +
                          Twine::utohexstr(getHeader()->e_phoff) +
                          ", e_phnum = " + Twine(getHeader()->e_phnum) +
                          ", e_phentsize = " + Twine(getHeader()->e_phentsize));
-    auto *Begin =
-        reinterpret_cast<const Elf_Phdr *>(base() + getHeader()->e_phoff);
+
+    auto *Begin = reinterpret_cast<const Elf_Phdr *>(base() + PhOff);
     return makeArrayRef(Begin, Begin + getHeader()->e_phnum);
   }
 
diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test b/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test
index d1d071f10ac6f..1b5bb2572b111 100644
--- a/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test
+++ b/llvm/test/tools/llvm-readobj/ELF/gnu-phdrs.test
@@ -356,7 +356,8 @@ ProgramHeaders:
     Offset: 0xAABBCCDDEEFF1122
 
 ## Check we report a warning when we are unable to read program headers.
-# RUN: yaml2obj --docnum=3 %s -o %t.phdr.err
+## Case A: the e_phentsize field is invalid.
+# RUN: yaml2obj --docnum=3 -DPHENTSIZE=1 %s -o %t.phdr.err
 # RUN: llvm-readelf --program-headers %t.phdr.err 2>&1 | \
 # RUN:   FileCheck %s -DFILE=%t.phdr.err --check-prefix=WARN-PHENTSIZE
 
@@ -373,7 +374,8 @@ FileHeader:
   Data:       ELFDATA2LSB
   Type:       ET_EXEC
   Machine:    EM_X86_64
-  EPhEntSize: 1
+  EPhEntSize: [[PHENTSIZE=56]]
+  EPhOff:     [[PHOFF=64]]
 Sections:
   - Name: .foo
     Type: SHT_PROGBITS
@@ -381,3 +383,28 @@ ProgramHeaders:
   - Type: PT_PHDR
     Sections:
       - Section: .foo
+
+## Case B: the value of the e_phoff field is invalid.
+
+## Check that we do not report a warning when the program header table ends right before the end of the file.
+## 0x160 + size of headers (56) == file size.
+# RUN: yaml2obj --docnum=3 -DPHOFF=0x160 %s -o %t.phdr.no.err2
+# RUN: llvm-readelf %t.phdr.no.err2 --program-headers 2>&1 | FileCheck %s --implicit-check-not=warning:
+
+## Check we report a warning when e_phoff goes 1 byte past the end of the file.
+# RUN: yaml2obj --docnum=3 -DPHOFF=0x161 %s -o %t.phdr.err2
+# RUN: llvm-readelf --program-headers %t.phdr.err2 2>&1 | \
+# RUN:   FileCheck %s -DFILE=%t.phdr.err2 --check-prefix=WARN-PHOFF -DOFF=0x161
+
+# WARN-PHOFF:      Program Headers:
+# WARN-PHOFF-NEXT:   Type Offset VirtAddr PhysAddr FileSiz MemSiz Flg Align
+# WARN-PHOFF-NEXT: warning: '[[FILE]]': unable to dump program headers: program headers are longer than binary of size 408: e_phoff = [[OFF]], e_phnum = 1, e_phentsize = 56
+# WARN-PHOFF:      Section to Segment mapping:
+# WARN-PHOFF-NEXT:   Segment Sections...
+# WARN-PHOFF-NEXT: warning: '[[FILE]]': can't read program headers to build section to segment mapping: program headers are longer than binary of size 408: e_phoff = [[OFF]], e_phnum = 1, e_phentsize = 56
+
+## Check we report a warning when the value of e_phoff is so large that
+## e_phoff + e_phnum * e_phentsize > UINT64_MAX.
+# RUN: yaml2obj --docnum=3 -DPHOFF=0xffffffffffffffff %s -o %t.phdr.err3
+# RUN: llvm-readelf --program-headers %t.phdr.err3 2>&1 | \
+# RUN:   FileCheck %s -DFILE=%t.phdr.err3 --check-prefix=WARN-PHOFF -DOFF=0xffffffffffffffff

From c872e809d1ac4aa405ae510e271f93d7662e26dd Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Wed, 15 Jul 2020 10:30:48 +0100
Subject: [PATCH 358/771] [Matrix] Only pass vector arg as overloaded type in
 MatrixBuilder.

In 2b3c505, the pointer arguments for the matrix load and store
intrinsics was changed to always be the element type of the vector
argument.

This patch updates the MatrixBuilder to not add the pointer type to the
overloaded types and adjusts the clang/mlir tests.

This should fix a few build failures on GreenDragon, including
 http://green.lab.llvm.org/green/job/test-suite-verify-machineinstrs-x86_64-O0-g/7891/
---
 clang/test/CodeGen/matrix-type-builtins.c     | 36 ++++++++++---------
 .../test/CodeGenCXX/matrix-type-builtins.cpp  | 22 ++++++------
 clang/test/CodeGenObjC/matrix-type-builtins.m |  4 +--
 llvm/include/llvm/IR/MatrixBuilder.h          |  4 +--
 mlir/test/Target/llvmir-intrinsics.mlir       |  8 ++---
 5 files changed, 39 insertions(+), 35 deletions(-)

diff --git a/clang/test/CodeGen/matrix-type-builtins.c b/clang/test/CodeGen/matrix-type-builtins.c
index 58fde6f01cc34..f7e9587def60a 100644
--- a/clang/test/CodeGen/matrix-type-builtins.c
+++ b/clang/test/CodeGen/matrix-type-builtins.c
@@ -1,5 +1,9 @@
 // RUN: %clang_cc1 -fenable-matrix -triple x86_64-apple-darwin %s -emit-llvm -disable-llvm-passes -o - | FileCheck %s
 
+// Also check we do not crash when running some middle-end passes. Most
+// importantly this includes the IR verifier, to ensure we emit valid IR.
+// RUN: %clang_cc1 -fenable-matrix -emit-llvm -triple x86_64-apple-darwin %s -o %t
+
 // Tests for the matrix type builtins.
 
 typedef double dx5x5_t __attribute__((matrix_type(5, 5)));
@@ -100,7 +104,7 @@ void transpose_global() {
 void column_major_load_with_const_stride_double(double *Ptr) {
   // CHECK-LABEL: define void @column_major_load_with_const_stride_double(double* %Ptr)
   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
 
   dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
 }
@@ -108,7 +112,7 @@ void column_major_load_with_const_stride_double(double *Ptr) {
 void column_major_load_with_const_stride2_double(double *Ptr) {
   // CHECK-LABEL: define void @column_major_load_with_const_stride2_double(double* %Ptr)
   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
 
   dx5x5_t m_a2 = __builtin_matrix_column_major_load(Ptr, 5, 5, 2 * 3 + 9);
 }
@@ -117,7 +121,7 @@ void column_major_load_with_variable_stride_ull_float(float *Ptr, unsigned long
   // CHECK-LABEL: define void @column_major_load_with_variable_stride_ull_float(float* %Ptr, i64 %S)
   // CHECK:         [[S:%.*]] = load i64, i64* %S.addr, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load float*, float** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <6 x float> @llvm.matrix.column.major.load.v6f32.p0f32(float* align 4 [[PTR]], i64 [[S]], i1 false, i32 2, i32 3)
+  // CHECK-NEXT:    call <6 x float> @llvm.matrix.column.major.load.v6f32(float* align 4 [[PTR]], i64 [[S]], i1 false, i32 2, i32 3)
 
   fx2x3_t m_b = __builtin_matrix_column_major_load(Ptr, 2, 3, S);
 }
@@ -128,7 +132,7 @@ void column_major_load_with_stride_math_int(int *Ptr, int S) {
   // CHECK-NEXT:    [[STRIDE:%.*]] = add nsw i32 [[S]], 32
   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <80 x i32> @llvm.matrix.column.major.load.v80i32.p0i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
+  // CHECK-NEXT:    call <80 x i32> @llvm.matrix.column.major.load.v80i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
 
   ix4x20_t m_c = __builtin_matrix_column_major_load(Ptr, 4, 20, S + 32);
 }
@@ -140,7 +144,7 @@ void column_major_load_with_stride_math_s_int(int *Ptr, short S) {
   // CHECK-NEXT:    [[STRIDE:%.*]] = add nsw i32 [[S_EXT]], 32
   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    %matrix = call <80 x i32> @llvm.matrix.column.major.load.v80i32.p0i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
+  // CHECK-NEXT:    %matrix = call <80 x i32> @llvm.matrix.column.major.load.v80i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 4, i32 20)
 
   ix4x20_t m_c = __builtin_matrix_column_major_load(Ptr, 4, 20, S + 32);
 }
@@ -148,7 +152,7 @@ void column_major_load_with_stride_math_s_int(int *Ptr, short S) {
 void column_major_load_array1(double Ptr[25]) {
   // CHECK-LABEL: define void @column_major_load_array1(double* %Ptr)
   // CHECK:         [[ADDR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 8 [[ADDR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[ADDR]], i64 5, i1 false, i32 5, i32 5)
 
   dx5x5_t m = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
 }
@@ -158,7 +162,7 @@ void column_major_load_array2() {
   // CHECK-NEXT:  entry:
   // CHECK-NEXT:    [[PTR:%.*]] = alloca [25 x double], align 16
   // CHECK:         [[ARRAY_DEC:%.*]] = getelementptr inbounds [25 x double], [25 x double]* [[PTR]], i64 0, i64 0
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 16 [[ARRAY_DEC]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 16 [[ARRAY_DEC]], i64 5, i1 false, i32 5, i32 5)
 
   double Ptr[25];
   dx5x5_t m = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
@@ -167,7 +171,7 @@ void column_major_load_array2() {
 void column_major_load_const(const double *Ptr) {
   // CHECK-LABEL: define void @column_major_load_const(double* %Ptr)
   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
 
   dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
 }
@@ -175,7 +179,7 @@ void column_major_load_const(const double *Ptr) {
 void column_major_load_volatile(volatile double *Ptr) {
   // CHECK-LABEL: define void @column_major_load_volatile(double* %Ptr)
   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64.p0f64(double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
+  // CHECK-NEXT:    call <25 x double> @llvm.matrix.column.major.load.v25f64(double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
 
   dx5x5_t m_a1 = __builtin_matrix_column_major_load(Ptr, 5, 5, 5);
 }
@@ -184,7 +188,7 @@ void column_major_store_with_const_stride_double(double *Ptr) {
   // CHECK-LABEL: define void @column_major_store_with_const_stride_double(double* %Ptr)
   // CHECK:         [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64.p0f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
 
   dx5x5_t m;
   __builtin_matrix_column_major_store(m, Ptr, 5);
@@ -194,7 +198,7 @@ void column_major_store_with_const_stride2_double(double *Ptr) {
   // CHECK-LABEL: define void @column_major_store_with_const_stride2_double(double* %Ptr)
   // CHECK:         [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64.p0f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 5, i32 5)
   //
   dx5x5_t m;
   __builtin_matrix_column_major_store(m, Ptr, 2 * 3 + 9);
@@ -207,7 +211,7 @@ void column_major_store_with_stride_math_int(int *Ptr, int S) {
   // CHECK-NEXT:    [[S:%.*]] = load i32, i32* %S.addr, align 4
   // CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[S]], 32
   // CHECK-NEXT:    [[IDX:%.*]] = sext i32 [[ADD]] to i64
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v80i32.p0i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v80i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
 
   ix4x20_t m;
   __builtin_matrix_column_major_store(m, Ptr, S + 32);
@@ -221,7 +225,7 @@ void column_major_store_with_stride_math_s_int(int *Ptr, short S) {
   // CHECK-NEXT:    [[EXT:%.*]] = sext i16 [[S]] to i32
   // CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[EXT]], 2
   // CHECK-NEXT:    [[IDX:%.*]] = sext i32 [[ADD]] to i64
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v80i32.p0i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v80i32(<80 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX]], i1 false, i32 4, i32 20)
 
   ix4x20_t m;
   __builtin_matrix_column_major_store(m, Ptr, S + 2);
@@ -231,7 +235,7 @@ void column_major_store_array1(double Ptr[25]) {
   // CHECK-LABEL: define void @column_major_store_array1(double* %Ptr)
   // CHECK:         [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64.p0f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 false, i32 5, i32 5)
 
   dx5x5_t m;
   __builtin_matrix_column_major_store(m, Ptr, 5);
@@ -241,7 +245,7 @@ void column_major_store_array2() {
   // CHECK-LABEL: define void @column_major_store_array2()
   // CHECK:         [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds [25 x double], [25 x double]* %Ptr, i64 0, i64 0
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64.p0f64(<25 x double> [[M]], double* align 16 [[PTR]], i64 5, i1 false, i32 5, i32 5)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 16 [[PTR]], i64 5, i1 false, i32 5, i32 5)
 
   double Ptr[25];
   dx5x5_t m;
@@ -252,7 +256,7 @@ void column_major_store_volatile(volatile double *Ptr) {
   // CHECK-LABEL: define void @column_major_store_volatile(double* %Ptr) #0 {
   // CHECK:         [[M:%.*]] = load <25 x double>, <25 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64.p0f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v25f64(<25 x double> [[M]], double* align 8 [[PTR]], i64 5, i1 true, i32 5, i32 5)
 
   dx5x5_t m;
   __builtin_matrix_column_major_store(m, Ptr, 5);
diff --git a/clang/test/CodeGenCXX/matrix-type-builtins.cpp b/clang/test/CodeGenCXX/matrix-type-builtins.cpp
index 314168701793b..dd341d2cf62cd 100644
--- a/clang/test/CodeGenCXX/matrix-type-builtins.cpp
+++ b/clang/test/CodeGenCXX/matrix-type-builtins.cpp
@@ -94,7 +94,7 @@ void test_column_major_load_with_stride_template_double(double *Ptr) {
 
   // CHECK-LABEL:  define linkonce_odr <40 x double> @_Z29column_major_load_with_strideIdLj10ELj4ELj15EEU11matrix_typeXT0_EXT1_ET_PS0_(double* %Ptr)
   // CHECK:         [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <40 x double> @llvm.matrix.column.major.load.v40f64.p0f64(double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
+  // CHECK-NEXT:    call <40 x double> @llvm.matrix.column.major.load.v40f64(double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
 
   matrix_t<double, 10, 4> M1 = column_major_load_with_stride<double, 10, 4, 15>(Ptr);
 }
@@ -106,7 +106,7 @@ void test_column_major_load_with_stride_template_int(int *Ptr) {
 
   // CHECK-LABEL: define linkonce_odr <6 x i32> @_Z29column_major_load_with_strideIiLj3ELj2ELj12EEU11matrix_typeXT0_EXT1_ET_PS0_(i32* %Ptr)
   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <6 x i32> @llvm.matrix.column.major.load.v6i32.p0i32(i32* align 4 [[PTR]], i64 12, i1 false, i32 3, i32 2)
+  // CHECK-NEXT:    call <6 x i32> @llvm.matrix.column.major.load.v6i32(i32* align 4 [[PTR]], i64 12, i1 false, i32 3, i32 2)
 
   matrix_t<int, 3, 2> M1 = column_major_load_with_stride<int, 3, 2, 12>(Ptr);
 }
@@ -124,7 +124,7 @@ void test_column_major_load_stride_wrapper(int *Ptr, UnsignedWrapper &W) {
   // CHECK-NEXT:    [[STRIDE:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* [[W]])
   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = zext i32 [[STRIDE]] to i64
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <4 x i32> @llvm.matrix.column.major.load.v4i32.p0i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
+  // CHECK-NEXT:    call <4 x i32> @llvm.matrix.column.major.load.v4i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
   matrix_t<int, 2, 2> M1 = __builtin_matrix_column_major_load(Ptr, 2, 2, W);
 }
 
@@ -133,7 +133,7 @@ constexpr int constexpr3() { return 3; }
 void test_column_major_load_constexpr_num_rows(int *Ptr) {
   // CHECK-LABEL: define void @_Z41test_column_major_load_constexpr_num_rowsPi(i32* %Ptr)
   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <6 x i32> @llvm.matrix.column.major.load.v6i32.p0i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
+  // CHECK-NEXT:    call <6 x i32> @llvm.matrix.column.major.load.v6i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
 
   matrix_t<int, 3, 2> M1 = __builtin_matrix_column_major_load(Ptr, constexpr3(), 2, 3);
 }
@@ -143,7 +143,7 @@ constexpr int constexpr1() { return 1; }
 void test_column_major_load_constexpr_num_columns(int *Ptr) {
   // CHECK-LABEL: define void @_Z44test_column_major_load_constexpr_num_columnsPi(i32* %Ptr)
   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <2 x i32> @llvm.matrix.column.major.load.v2i32.p0i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 1)
+  // CHECK-NEXT:    call <2 x i32> @llvm.matrix.column.major.load.v2i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 1)
   matrix_t<int, 2, 1> M1 = __builtin_matrix_column_major_load(Ptr, 2, constexpr1(), 3);
 }
 
@@ -153,7 +153,7 @@ constexpr int constexpr_plus1() { return N + 1; }
 void test_column_major_load_constexpr_num_columns_temp(int *Ptr) {
   // CHECK-LABEL:  define void @_Z49test_column_major_load_constexpr_num_columns_tempPi(i32* %Ptr)
   // CHECK:         [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <10 x i32> @llvm.matrix.column.major.load.v10i32.p0i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 5)
+  // CHECK-NEXT:    call <10 x i32> @llvm.matrix.column.major.load.v10i32(i32* align 4 [[PTR]], i64 3, i1 false, i32 2, i32 5)
   matrix_t<int, 2, 5> M1 = __builtin_matrix_column_major_load(Ptr, 2, constexpr_plus1<4>(), 3);
 }
 
@@ -162,7 +162,7 @@ void test_column_major_load_constexpr_stride_constexpr(int *Ptr) {
   // CHECK:         [[STRIDE:%.*]] = call i32 @_Z10constexpr3v()
   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call <4 x i32> @llvm.matrix.column.major.load.v4i32.p0i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
+  // CHECK-NEXT:    call <4 x i32> @llvm.matrix.column.major.load.v4i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 2, i32 2)
 
   matrix_t<int, 2, 2> M1 = __builtin_matrix_column_major_load(Ptr, 2, 2, constexpr3());
 }
@@ -200,7 +200,7 @@ void test_column_major_store_with_stride_template_double(double *Ptr) {
   // CHECK-LABEL:  define linkonce_odr void @_Z30column_major_store_with_strideIdLj10ELj4ELj15EEvRU11matrix_typeXT0_EXT1_ET_PS0_([40 x double]* nonnull align 8 dereferenceable(320) %m, double* %Ptr)
   // CHECK:         [[M:%.*]] = load <40 x double>, <40 x double>* {{.*}}, align 8
   // CHECK-NEXT:    [[PTR:%.*]] = load double*, double** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v40f64.p0f64(<40 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v40f64(<40 x double> [[M]], double* align 8 [[PTR]], i64 15, i1 false, i32 10, i32 4)
 
   matrix_t<double, 10, 4> M1;
   column_major_store_with_stride<double, 10, 4, 15>(M1, Ptr);
@@ -214,7 +214,7 @@ void test_column_major_store_with_stride_template_int(int *Ptr) {
   // CHECK-LABEL:  define linkonce_odr void @_Z30column_major_store_with_strideIiLj3ELj2ELj3EEvRU11matrix_typeXT0_EXT1_ET_PS0_([6 x i32]* nonnull align 4 dereferenceable(24) %m, i32* %Ptr)
   // CHECK:         [[M:%.*]] = load <6 x i32>, <6 x i32>* {{.*}}, align 4
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v6i32.p0i32(<6 x i32> [[M]], i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v6i32(<6 x i32> [[M]], i32* align 4 [[PTR]], i64 3, i1 false, i32 3, i32 2)
 
   matrix_t<int, 3, 2> M1;
   column_major_store_with_stride<int, 3, 2, 3>(M1, Ptr);
@@ -227,7 +227,7 @@ void test_column_major_store_stride_wrapper(int *Ptr, UnsignedWrapper &W) {
   // CHECK-NEXT:    [[W:%.*]] = load %struct.UnsignedWrapper*, %struct.UnsignedWrapper** %W.addr, align 8
   // CHECK-NEXT:    [[IDX:%.*]] = call i32 @_ZN15UnsignedWrappercvjEv(%struct.UnsignedWrapper* [[W]])
   // CHECK-NEXT:    [[IDX_EXT:%.*]] = zext i32 [[IDX]] to i64
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v4i32.p0i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v4i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
 
   matrix_t<int, 2, 2> M1;
   __builtin_matrix_column_major_store(M1, Ptr, W);
@@ -239,7 +239,7 @@ void test_column_major_store_constexpr_stride_constexpr(int *Ptr) {
   // CHECK-NEXT:    [[PTR:%.*]] = load i32*, i32** %Ptr.addr, align 8
   // CHECK-NEXT:    [[IDX:%.*]] = call i32 @_Z10constexpr3v()
   // CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[IDX]] to i64
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v4i32.p0i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v4i32(<4 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 2, i32 2)
 
   matrix_t<int, 2, 2> M;
   __builtin_matrix_column_major_store(M, Ptr, constexpr3());
diff --git a/clang/test/CodeGenObjC/matrix-type-builtins.m b/clang/test/CodeGenObjC/matrix-type-builtins.m
index 78dfad262b91a..19bd957bb6821 100644
--- a/clang/test/CodeGenObjC/matrix-type-builtins.m
+++ b/clang/test/CodeGenObjC/matrix-type-builtins.m
@@ -56,7 +56,7 @@ void test_column_major_load(PtrValue *Ptr, IntValue *Stride) {
   // CHECK:         [[STRIDE:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)
   // CHECK-NEXT:    [[STRIDE_EXT:%.*]] = sext i32 [[STRIDE]] to i64
   // CHECK:         [[PTR:%.*]] = call i32* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32* (i8*, i8*)*)
-  // CHECK-NEXT:    call <12 x i32> @llvm.matrix.column.major.load.v12i32.p0i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 3, i32 4)
+  // CHECK-NEXT:    call <12 x i32> @llvm.matrix.column.major.load.v12i32(i32* align 4 [[PTR]], i64 [[STRIDE_EXT]], i1 false, i32 3, i32 4)
 
   u3x4 m = __builtin_matrix_column_major_load(Ptr.value, 3, 4, Stride.value);
 }
@@ -67,7 +67,7 @@ void test_column_major_store(UnsignedMatrixValue *M, PtrValue *Ptr, IntValue *St
   // CHECK:         [[PTR:%.*]] = call i32* bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32* (i8*, i8*)*)
   // CHECK:         [[IDX:%.*]] = call i32 bitcast (i8* (i8*, i8*, ...)* @objc_msgSend to i32 (i8*, i8*)*)
   // CHECK-NEXT:    [[IDX_EXT:%.*]] = sext i32 [[IDX]] to i64
-  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v12i32.p0i32(<12 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 3, i32 4)
+  // CHECK-NEXT:    call void @llvm.matrix.column.major.store.v12i32(<12 x i32> [[M]], i32* align 4 [[PTR]], i64 [[IDX_EXT]], i1 false, i32 3, i32 4)
 
   __builtin_matrix_column_major_store(M.value, Ptr.value, Stride.value);
 }
diff --git a/llvm/include/llvm/IR/MatrixBuilder.h b/llvm/include/llvm/IR/MatrixBuilder.h
index 515b86ba1e78f..5d04b3563dd5d 100644
--- a/llvm/include/llvm/IR/MatrixBuilder.h
+++ b/llvm/include/llvm/IR/MatrixBuilder.h
@@ -69,7 +69,7 @@ template <class IRBuilderTy> class MatrixBuilder {
 
     Value *Ops[] = {DataPtr, Stride, B.getInt1(IsVolatile), B.getInt32(Rows),
                     B.getInt32(Columns)};
-    Type *OverloadedTypes[] = {RetType, PtrTy};
+    Type *OverloadedTypes[] = {RetType};
 
     Function *TheFn = Intrinsic::getDeclaration(
         getModule(), Intrinsic::matrix_column_major_load, OverloadedTypes);
@@ -92,7 +92,7 @@ template <class IRBuilderTy> class MatrixBuilder {
     Value *Ops[] = {Matrix,           Ptr,
                     Stride,           B.getInt1(IsVolatile),
                     B.getInt32(Rows), B.getInt32(Columns)};
-    Type *OverloadedTypes[] = {Matrix->getType(), Ptr->getType()};
+    Type *OverloadedTypes[] = {Matrix->getType()};
 
     Function *TheFn = Intrinsic::getDeclaration(
         getModule(), Intrinsic::matrix_column_major_store, OverloadedTypes);
diff --git a/mlir/test/Target/llvmir-intrinsics.mlir b/mlir/test/Target/llvmir-intrinsics.mlir
index ffbbf359b3801..1595edffb45b9 100644
--- a/mlir/test/Target/llvmir-intrinsics.mlir
+++ b/mlir/test/Target/llvmir-intrinsics.mlir
@@ -181,11 +181,11 @@ llvm.func @matrix_intrinsics(%A: !llvm<"<64 x float>">, %B: !llvm<"<48 x float>"
   // CHECK: call <48 x float> @llvm.matrix.transpose.v48f32(<48 x float> %1, i32 3, i32 16)
   %D = llvm.intr.matrix.transpose %B { rows = 3: i32, columns = 16: i32} :
     !llvm<"<48 x float>"> into !llvm<"<48 x float>">
-  // CHECK: call <48 x float> @llvm.matrix.column.major.load.v48f32.p0f32(float* align 4 %2, i64 %3, i1 false, i32 3, i32 16)
+  // CHECK: call <48 x float> @llvm.matrix.column.major.load.v48f32(float* align 4 %2, i64 %3, i1 false, i32 3, i32 16)
   %E = llvm.intr.matrix.column.major.load %ptr, <stride=%stride>
     { isVolatile = 0: i1, rows = 3: i32, columns = 16: i32} :
     !llvm<"<48 x float>"> from !llvm<"float*"> stride !llvm.i64
-  // CHECK: call void @llvm.matrix.column.major.store.v48f32.p0f32(<48 x float> %7, float* align 4 %2, i64 %3, i1 false, i32 3, i32 16)
+  // CHECK: call void @llvm.matrix.column.major.store.v48f32(<48 x float> %7, float* align 4 %2, i64 %3, i1 false, i32 3, i32 16)
   llvm.intr.matrix.column.major.store %E, %ptr, <stride=%stride>
     { isVolatile = 0: i1, rows = 3: i32, columns = 16: i32} :
     !llvm<"<48 x float>"> to !llvm<"float*"> stride !llvm.i64
@@ -242,8 +242,8 @@ llvm.func @memcpy_test(%arg0: !llvm.i32, %arg1: !llvm.i1, %arg2: !llvm<"i8*">, %
 // CHECK-DAG: declare float @llvm.copysign.f32(float, float)
 // CHECK-DAG: declare <12 x float> @llvm.matrix.multiply.v12f32.v64f32.v48f32(<64 x float>, <48 x float>, i32 immarg, i32 immarg, i32 immarg)
 // CHECK-DAG: declare <48 x float> @llvm.matrix.transpose.v48f32(<48 x float>, i32 immarg, i32 immarg)
-// CHECK-DAG: declare <48 x float> @llvm.matrix.column.major.load.v48f32.p0f32(float* nocapture, i64, i1 immarg, i32 immarg, i32 immarg)
-// CHECK-DAG: declare void @llvm.matrix.column.major.store.v48f32.p0f32(<48 x float>, float* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg)
+// CHECK-DAG: declare <48 x float> @llvm.matrix.column.major.load.v48f32(float* nocapture, i64, i1 immarg, i32 immarg, i32 immarg)
+// CHECK-DAG: declare void @llvm.matrix.column.major.store.v48f32(<48 x float>, float* nocapture writeonly, i64, i1 immarg, i32 immarg, i32 immarg)
 // CHECK-DAG: declare <7 x float> @llvm.masked.load.v7f32.p0v7f32(<7 x float>*, i32 immarg, <7 x i1>, <7 x float>)
 // CHECK-DAG: declare void @llvm.masked.store.v7f32.p0v7f32(<7 x float>, <7 x float>*, i32 immarg, <7 x i1>)
 // CHECK-DAG: declare void @llvm.memcpy.p0i8.p0i8.i32(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i32, i1 immarg)

From 7a587ca93200c49e47fe205ce037895c81c5a542 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Tue, 14 Jul 2020 15:48:03 +0300
Subject: [PATCH 359/771] [yaml2obj] - Rename FileHeader::SH* fields.

In D83482 we agreed to name e_* fields that are used for overriding
values (like e_phoff) as EPh* (e.g. EPhOff).

Currently we have a set of e_sh* fields that are named inconsistently
with this rule. This patch renames all of them.

Differential revision: https://reviews.llvm.org/D83766
---
 llvm/include/llvm/ObjectYAML/ELFYAML.h        |  9 ++-
 llvm/lib/ObjectYAML/ELFEmitter.cpp            | 16 ++---
 llvm/lib/ObjectYAML/ELFYAML.cpp               |  9 ++-
 llvm/test/Object/invalid.test                 | 70 +++++++++----------
 .../ELF/dynamic-reloc-no-section-headers.test |  4 +-
 .../tools/llvm-readobj/ELF/gnu-notes.test     |  2 +-
 .../tools/llvm-readobj/ELF/hash-table.test    |  4 +-
 .../llvm-readobj/ELF/invalid-shstrndx.test    |  2 +-
 .../tools/llvm-readobj/ELF/many-sections.s    | 26 +++----
 ...ctions-no-section-header-string-table.test | 10 +--
 llvm/test/tools/llvm-readobj/ELF/symbols.test |  2 +-
 .../tools/yaml2obj/ELF/header-sh-fields.yaml  | 16 ++---
 .../tools/yaml2obj/ELF/section-headers.yaml   |  6 +-
 llvm/tools/obj2yaml/elf2yaml.cpp              |  2 +-
 14 files changed, 88 insertions(+), 90 deletions(-)

diff --git a/llvm/include/llvm/ObjectYAML/ELFYAML.h b/llvm/include/llvm/ObjectYAML/ELFYAML.h
index 651cd6a83398b..b1ffb20681ea8 100644
--- a/llvm/include/llvm/ObjectYAML/ELFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/ELFYAML.h
@@ -84,11 +84,10 @@ struct FileHeader {
   Optional<llvm::yaml::Hex64> EPhOff;
   Optional<llvm::yaml::Hex16> EPhEntSize;
   Optional<llvm::yaml::Hex16> EPhNum;
-
-  Optional<llvm::yaml::Hex16> SHEntSize;
-  Optional<llvm::yaml::Hex64> SHOff;
-  Optional<llvm::yaml::Hex16> SHNum;
-  Optional<llvm::yaml::Hex16> SHStrNdx;
+  Optional<llvm::yaml::Hex16> EShEntSize;
+  Optional<llvm::yaml::Hex64> EShOff;
+  Optional<llvm::yaml::Hex16> EShNum;
+  Optional<llvm::yaml::Hex16> EShStrNdx;
 };
 
 struct SectionHeader {
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index 8513874ffea83..f9f2f128e2e82 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -417,21 +417,21 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
   else
     Header.e_phnum = 0;
 
-  Header.e_shentsize =
-      Doc.Header.SHEntSize ? (uint16_t)*Doc.Header.SHEntSize : sizeof(Elf_Shdr);
+  Header.e_shentsize = Doc.Header.EShEntSize ? (uint16_t)*Doc.Header.EShEntSize
+                                             : sizeof(Elf_Shdr);
 
   const bool NoShdrs =
       Doc.SectionHeaders && Doc.SectionHeaders->NoHeaders.getValueOr(false);
 
-  if (Doc.Header.SHOff)
-    Header.e_shoff = *Doc.Header.SHOff;
+  if (Doc.Header.EShOff)
+    Header.e_shoff = *Doc.Header.EShOff;
   else if (NoShdrs)
     Header.e_shoff = 0;
   else
     Header.e_shoff = SHOff;
 
-  if (Doc.Header.SHNum)
-    Header.e_shnum = *Doc.Header.SHNum;
+  if (Doc.Header.EShNum)
+    Header.e_shnum = *Doc.Header.EShNum;
   else if (!Doc.SectionHeaders)
     Header.e_shnum = Doc.getSections().size();
   else if (NoShdrs)
@@ -442,8 +442,8 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
                                       : 0) +
         /*Null section*/ 1;
 
-  if (Doc.Header.SHStrNdx)
-    Header.e_shstrndx = *Doc.Header.SHStrNdx;
+  if (Doc.Header.EShStrNdx)
+    Header.e_shstrndx = *Doc.Header.EShStrNdx;
   else if (NoShdrs || ExcludedSectionHeaders.count(".shstrtab"))
     Header.e_shstrndx = 0;
   else
diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index dc65f77d565b2..2353b34f188b1 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -872,11 +872,10 @@ void MappingTraits<ELFYAML::FileHeader>::mapping(IO &IO,
   IO.mapOptional("EPhOff", FileHdr.EPhOff);
   IO.mapOptional("EPhEntSize", FileHdr.EPhEntSize);
   IO.mapOptional("EPhNum", FileHdr.EPhNum);
-
-  IO.mapOptional("SHEntSize", FileHdr.SHEntSize);
-  IO.mapOptional("SHOff", FileHdr.SHOff);
-  IO.mapOptional("SHNum", FileHdr.SHNum);
-  IO.mapOptional("SHStrNdx", FileHdr.SHStrNdx);
+  IO.mapOptional("EShEntSize", FileHdr.EShEntSize);
+  IO.mapOptional("EShOff", FileHdr.EShOff);
+  IO.mapOptional("EShNum", FileHdr.EShNum);
+  IO.mapOptional("EShStrNdx", FileHdr.EShStrNdx);
 }
 
 void MappingTraits<ELFYAML::ProgramHeader>::mapping(
diff --git a/llvm/test/Object/invalid.test b/llvm/test/Object/invalid.test
index 499200bbc8a74..a930a6a9dbe68 100644
--- a/llvm/test/Object/invalid.test
+++ b/llvm/test/Object/invalid.test
@@ -181,11 +181,11 @@ Sections:
 
 --- !ELF
 FileHeader:
-  Class:     ELFCLASS64
-  Data:      ELFDATA2LSB
-  Type:      ET_REL
-  Machine:   EM_X86_64
-  SHEntSize: 1
+  Class:      ELFCLASS64
+  Data:       ELFDATA2LSB
+  Type:       ET_REL
+  Machine:    EM_X86_64
+  EShEntSize: 1
 
 ## Check that llvm-readobj reports a warning if .symtab has sh_size
 ## that is not a multiple of sh_entsize.
@@ -306,11 +306,11 @@ Symbols: []
  
 --- !ELF
 FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-  SHNum:   0xFF
+  Class:    ELFCLASS64
+  Data:     ELFDATA2LSB
+  Type:     ET_REL
+  Machine:  EM_X86_64
+  EShNum:   0xFF
 
 ## Check llvm-readobj does not crash on a truncated ELF.
 
@@ -521,11 +521,11 @@ ProgramHeaders:
 
 --- !ELF
 FileHeader:
-  Class:    ELFCLASS64
-  Data:     ELFDATA2LSB
-  Type:     ET_REL
-  Machine:  EM_X86_64
-  SHStrNdx: 0xFF
+  Class:     ELFCLASS64
+  Data:      ELFDATA2LSB
+  Type:      ET_REL
+  Machine:   EM_X86_64
+  EShStrNdx: 0xFF
 Sections:
   - Name: .foo
     Type: SHT_PROGBITS
@@ -541,11 +541,11 @@ Sections:
 
 --- !ELF
 FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-  SHNum:   0x0
+  Class:    ELFCLASS64
+  Data:     ELFDATA2LSB
+  Type:     ET_REL
+  Machine:  EM_X86_64
+  EShNum:   0x0
 Sections:
   - Type: SHT_NULL
     Size: 288230376151711743
@@ -560,11 +560,11 @@ Sections:
 
 --- !ELF
 FileHeader:
-  Class:   ELFCLASS64
-  Data:    ELFDATA2LSB
-  Type:    ET_REL
-  Machine: EM_X86_64
-  SHNum:   0x0
+  Class:    ELFCLASS64
+  Data:     ELFDATA2LSB
+  Type:     ET_REL
+  Machine:  EM_X86_64
+  EShNum:   0x0
 Sections:
   - Type: SHT_NULL
     Size: 288230376151711744
@@ -578,11 +578,11 @@ Sections:
  
 --- !ELF
 FileHeader:
-  Class:    ELFCLASS64
-  Data:     ELFDATA2LSB
-  Type:     ET_REL
-  Machine:  EM_X86_64
-  SHOff:    0xffffffffffffffff
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_REL
+  Machine: EM_X86_64
+  EShOff:  0xffffffffffffffff
 
 ## Check that llvm-objdump reports an error when it tries to dump a
 ## symbol name and .strtab is empty.
@@ -641,12 +641,12 @@ DynamicSymbols:
 
 --- !ELF
 FileHeader:
-  Class:    ELFCLASS64
-  Data:     ELFDATA2LSB
-  Type:     ET_REL
-  Machine:  EM_X86_64
+  Class:     ELFCLASS64
+  Data:      ELFDATA2LSB
+  Type:      ET_REL
+  Machine:   EM_X86_64
 ## SHN_XINDEX == 0xffff.
-  SHStrNdx: 0xffff
+  EShStrNdx: 0xffff
 Sections:
   - Type: SHT_NULL
     Link: 0xff
diff --git a/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc-no-section-headers.test b/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc-no-section-headers.test
index 16e5af819ebe6..1ec81578b9cc9 100644
--- a/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc-no-section-headers.test
+++ b/llvm/test/tools/llvm-readobj/ELF/dynamic-reloc-no-section-headers.test
@@ -23,8 +23,8 @@ FileHeader:
   Machine: EM_X86_64
 ## We simulate no section header table by
 ## overriding the ELF header properties.
-  SHOff:   0x0
-  SHNum:   0x0
+  EShOff:  0x0
+  EShNum:  0x0
 Sections:
   - Name:    .rela.dyn
     Type:    SHT_RELA
diff --git a/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test b/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test
index b1ea1981511f7..5af6a56e68950 100644
--- a/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test
+++ b/llvm/test/tools/llvm-readobj/ELF/gnu-notes.test
@@ -88,7 +88,7 @@ FileHeader:
   Type:       ET_EXEC
   Machine:    EM_X86_64
   EPhEntSize: [[PHENTSIZE=56]]
-  SHNum:      [[SHNUM=6]]
+  EShNum:     [[SHNUM=6]]
 Sections:
   - Name:         .note.ABI-tag
     Type:         SHT_NOTE
diff --git a/llvm/test/tools/llvm-readobj/ELF/hash-table.test b/llvm/test/tools/llvm-readobj/ELF/hash-table.test
index b247c597eeb11..864f7f1426f3d 100644
--- a/llvm/test/tools/llvm-readobj/ELF/hash-table.test
+++ b/llvm/test/tools/llvm-readobj/ELF/hash-table.test
@@ -64,8 +64,8 @@ FileHeader:
   Machine: EM_X86_64
 ## We simulate no section header table by
 ## overriding the ELF header properties.
-  SHOff:   0x0
-  SHNum:   0x0
+  EShOff:  0x0
+  EShNum:  0x0
 Sections:
   - Name:   .hash
     Type:   SHT_HASH
diff --git a/llvm/test/tools/llvm-readobj/ELF/invalid-shstrndx.test b/llvm/test/tools/llvm-readobj/ELF/invalid-shstrndx.test
index 84583d2c3cd9f..50bdd3fc1fb66 100644
--- a/llvm/test/tools/llvm-readobj/ELF/invalid-shstrndx.test
+++ b/llvm/test/tools/llvm-readobj/ELF/invalid-shstrndx.test
@@ -46,4 +46,4 @@ FileHeader:
   Data:      ELFDATA2LSB
   Type:      ET_REL
   Machine:   EM_X86_64
-  SHStrNdx:  0xFF
+  EShStrNdx: 0xFF
diff --git a/llvm/test/tools/llvm-readobj/ELF/many-sections.s b/llvm/test/tools/llvm-readobj/ELF/many-sections.s
index eea5ed7f95e47..4271be7a09179 100644
--- a/llvm/test/tools/llvm-readobj/ELF/many-sections.s
+++ b/llvm/test/tools/llvm-readobj/ELF/many-sections.s
@@ -18,12 +18,12 @@
 
 --- !ELF
 FileHeader:
-  Class:    ELFCLASS64
-  Data:     ELFDATA2LSB
-  Type:     ET_REL
-  Machine:  EM_X86_64
-  SHNum:    0
-  SHStrNdx: 0xffff ## SHN_XINDEX
+  Class:     ELFCLASS64
+  Data:      ELFDATA2LSB
+  Type:      ET_REL
+  Machine:   EM_X86_64
+  EShNum:    0
+  EShStrNdx: 0xffff ## SHN_XINDEX
 Sections:
   - Type: SHT_NULL
     Link: .shstrtab
@@ -56,13 +56,13 @@ Sections:
 
 --- !ELF
 FileHeader:
-  Class:    ELFCLASS64
-  Data:     ELFDATA2LSB
-  Type:     ET_REL
-  Machine:  EM_X86_64
-  SHOff:    0
-  SHNum:    0
-  SHStrNdx: 0xffff ## SHN_XINDEX
+  Class:     ELFCLASS64
+  Data:      ELFDATA2LSB
+  Type:      ET_REL
+  Machine:   EM_X86_64
+  EShOff:    0
+  EShNum:    0
+  EShStrNdx: 0xffff ## SHN_XINDEX
 Sections:
   - Type: SHT_NULL
     Link: .shstrtab
diff --git a/llvm/test/tools/llvm-readobj/ELF/sections-no-section-header-string-table.test b/llvm/test/tools/llvm-readobj/ELF/sections-no-section-header-string-table.test
index e5442d26444b5..95a72a4d05136 100644
--- a/llvm/test/tools/llvm-readobj/ELF/sections-no-section-header-string-table.test
+++ b/llvm/test/tools/llvm-readobj/ELF/sections-no-section-header-string-table.test
@@ -92,11 +92,11 @@
 
 --- !ELF
 FileHeader:
-  Class:    ELFCLASS64
-  Data:     ELFDATA2LSB
-  Type:     ET_DYN
-  Machine:  EM_X86_64
-  SHStrNdx: 0
+  Class:     ELFCLASS64
+  Data:      ELFDATA2LSB
+  Type:      ET_DYN
+  Machine:   EM_X86_64
+  EShStrNdx: 0
 Sections:
   - Name:   .foo
     Type:   SHT_PROGBITS
diff --git a/llvm/test/tools/llvm-readobj/ELF/symbols.test b/llvm/test/tools/llvm-readobj/ELF/symbols.test
index f9e4875a2875d..492097d55c0fd 100644
--- a/llvm/test/tools/llvm-readobj/ELF/symbols.test
+++ b/llvm/test/tools/llvm-readobj/ELF/symbols.test
@@ -161,7 +161,7 @@ FileHeader:
   Data:      ELFDATA2LSB
   Type:      ET_REL
   Machine:   EM_X86_64
-  SHStrNdx:  [[SHSTRTAB=2]]
+  EShStrNdx: [[SHSTRTAB=2]]
 Sections:
   - Name:    .symtab
     Type:    SHT_SYMTAB
diff --git a/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml b/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
index a6860546bdec8..15d7bbd3a019b 100644
--- a/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/header-sh-fields.yaml
@@ -33,14 +33,14 @@ ProgramHeaders:
 
 --- !ELF
 FileHeader:
-  Class:     ELFCLASS64
-  Data:      ELFDATA2LSB
-  Type:      ET_REL
-  Machine:   EM_X86_64
-  SHEntSize:  [[SHENTSIZE=64]]
-  SHOff:      [[SHOFF=200]]
-  SHNum:      [[SHNUM=3]]
-  SHStrNdx:   [[SHSTRNDX=2]]
+  Class:      ELFCLASS64
+  Data:       ELFDATA2LSB
+  Type:       ET_REL
+  Machine:    EM_X86_64
+  EShEntSize: [[SHENTSIZE=64]]
+  EShOff:     [[SHOFF=200]]
+  EShNum:     [[SHNUM=3]]
+  EShStrNdx:  [[SHSTRNDX=2]]
   EPhOff:     [[PHOFF=64]]
   EPhEntSize: [[PHENTSIZE=56]]
   EPhNum:     [[PHNUM=2]]
diff --git a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
index 01845ba2a6cb5..f593d76929fef 100644
--- a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
@@ -184,9 +184,9 @@ FileHeader:
   Data:      ELFDATA2LSB
   Type:      ET_REL
   Machine:   EM_X86_64
-  SHOff:     0x2
-  SHNum:     0x3
-  SHStrNdx:  0x4
+  EShOff:    0x2
+  EShNum:    0x3
+  EShStrNdx: 0x4
 Sections:
   - Name: .foo
     Type: SHT_PROGBITS
diff --git a/llvm/tools/obj2yaml/elf2yaml.cpp b/llvm/tools/obj2yaml/elf2yaml.cpp
index 7b0687a5a5f8d..b45098538f87d 100644
--- a/llvm/tools/obj2yaml/elf2yaml.cpp
+++ b/llvm/tools/obj2yaml/elf2yaml.cpp
@@ -200,7 +200,7 @@ bool ELFDumper<ELFT>::shouldPrintSection(const ELFYAML::Section &S,
 template <class ELFT> Expected<ELFYAML::Object *> ELFDumper<ELFT>::dump() {
   auto Y = std::make_unique<ELFYAML::Object>();
 
-  // Dump header. We do not dump EPh* and SH* fields. When not explicitly set,
+  // Dump header. We do not dump EPh* and ESh* fields. When not explicitly set,
   // the values are set by yaml2obj automatically and there is no need to dump
   // them here.
   Y->Header.Class = ELFYAML::ELF_ELFCLASS(Obj.getHeader()->getFileClass());

From 3382c243baf2c9761db80e498243f4c57fe64de8 Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27@jrtc27.com>
Date: Wed, 15 Jul 2020 10:48:41 +0100
Subject: [PATCH 360/771] [RISCV] Fix RISCVInstrInfo::getInstSizeInBytes for
 atomics pseudos

Summary:
Without these, the generic branch relaxation pass will underestimate the
range required for branches spanning these and we can end up with
"fixup value out of range" errors rather than relaxing the branches.
Some of the instructions in the expansion may end up being compressed
but exactly determining that is awkward, and these conservative values
should be safe, if slightly suboptimal in rare cases.

Reviewers: asb, lenary, luismarques, lewis-revill

Reviewed By: asb, luismarques

Subscribers: hiraditya, rbar, johnrusso, simoncook, sabuasal, niosHD, kito-cheng, shiva0217, MaskRay, zzheng, edward-jones, rogfer01, MartinMosbeck, brucehoult, the_o, rkruppe, jfb, PkmX, jocewei, psnobl, benna, Jim, s.egerton, pzheng, sameer.abuasal, apazos, evandro, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77443
---
 .../RISCV/RISCVExpandAtomicPseudoInsts.cpp    |  3 +++
 .../Target/RISCV/RISCVExpandPseudoInsts.cpp   |  3 +++
 llvm/lib/Target/RISCV/RISCVInstrInfo.cpp      | 23 +++++++++++++++++++
 3 files changed, 29 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
index b49c767ff2ca6..26ce16486bd9c 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandAtomicPseudoInsts.cpp
@@ -86,6 +86,9 @@ bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
 bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
                                        MachineBasicBlock::iterator MBBI,
                                        MachineBasicBlock::iterator &NextMBBI) {
+  // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
+  // instructions for each pseudo, and must be updated when adding new pseudos
+  // or changing existing ones.
   switch (MBBI->getOpcode()) {
   case RISCV::PseudoAtomicLoadNand32:
     return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
diff --git a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
index 5dcd294cef04c..504355fb8bf88 100644
--- a/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/RISCV/RISCVExpandPseudoInsts.cpp
@@ -87,6 +87,9 @@ bool RISCVExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
 bool RISCVExpandPseudo::expandMI(MachineBasicBlock &MBB,
                                  MachineBasicBlock::iterator MBBI,
                                  MachineBasicBlock::iterator &NextMBBI) {
+  // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
+  // instructions for each pseudo, and must be updated when adding new pseudos
+  // or changing existing ones.
   switch (MBBI->getOpcode()) {
   case RISCV::PseudoLLA:
     return expandLoadLocalAddress(MBB, MBBI, NextMBBI);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index dc212d9cde2ec..d39ec505127c4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -471,6 +471,9 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   case TargetOpcode::KILL:
   case TargetOpcode::DBG_VALUE:
     return 0;
+  // These values are determined based on RISCVExpandAtomicPseudoInsts,
+  // RISCVExpandPseudoInsts and RISCVMCCodeEmitter, depending on where the
+  // pseudos are expanded.
   case RISCV::PseudoCALLReg:
   case RISCV::PseudoCALL:
   case RISCV::PseudoJump:
@@ -480,6 +483,26 @@ unsigned RISCVInstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
   case RISCV::PseudoLA_TLS_IE:
   case RISCV::PseudoLA_TLS_GD:
     return 8;
+  case RISCV::PseudoAtomicLoadNand32:
+  case RISCV::PseudoAtomicLoadNand64:
+    return 20;
+  case RISCV::PseudoMaskedAtomicSwap32:
+  case RISCV::PseudoMaskedAtomicLoadAdd32:
+  case RISCV::PseudoMaskedAtomicLoadSub32:
+    return 28;
+  case RISCV::PseudoMaskedAtomicLoadNand32:
+    return 32;
+  case RISCV::PseudoMaskedAtomicLoadMax32:
+  case RISCV::PseudoMaskedAtomicLoadMin32:
+    return 44;
+  case RISCV::PseudoMaskedAtomicLoadUMax32:
+  case RISCV::PseudoMaskedAtomicLoadUMin32:
+    return 36;
+  case RISCV::PseudoCmpXchg32:
+  case RISCV::PseudoCmpXchg64:
+    return 16;
+  case RISCV::PseudoMaskedCmpXchg32:
+    return 32;
   case TargetOpcode::INLINEASM:
   case TargetOpcode::INLINEASM_BR: {
     const MachineFunction &MF = *MI.getParent()->getParent();

From 2dc16fbdf0f24b0b9fd529932e7ee0d1960fc8b0 Mon Sep 17 00:00:00 2001
From: Jessica Clarke <jrtc27@jrtc27.com>
Date: Wed, 15 Jul 2020 10:52:42 +0100
Subject: [PATCH 361/771] [RISCV] Duplicate pseudo expansion comment to
 RISCVMCCodeEmitter

Follow-on from D77443. Although we're not fixing any of these
pseudo-instructions, the potential for them to be out of sync still
exists.
---
 llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
index ae13c00565c43..816206c477dfa 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMCCodeEmitter.cpp
@@ -189,6 +189,9 @@ void RISCVMCCodeEmitter::encodeInstruction(const MCInst &MI, raw_ostream &OS,
   // Get byte count of instruction.
   unsigned Size = Desc.getSize();
 
+  // RISCVInstrInfo::getInstSizeInBytes hard-codes the number of expanded
+  // instructions for each pseudo, and must be updated when adding new pseudos
+  // or changing existing ones.
   if (MI.getOpcode() == RISCV::PseudoCALLReg ||
       MI.getOpcode() == RISCV::PseudoCALL ||
       MI.getOpcode() == RISCV::PseudoTAIL ||

From 2e10b7a39b930ef8d9c4362509d8835b221fbc0a Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Wed, 15 Jul 2020 13:00:02 +0300
Subject: [PATCH 362/771] [LLD][ELF] - Update test after yaml2obj change. NFC.

Names of YAML keys changed in rG7a587ca93200c49e47fe205ce037895c81c5a542
---
 lld/test/ELF/invalid/invalid-e_shnum.test | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/lld/test/ELF/invalid/invalid-e_shnum.test b/lld/test/ELF/invalid/invalid-e_shnum.test
index 7c04c105d2920..4e622ac9bece1 100644
--- a/lld/test/ELF/invalid/invalid-e_shnum.test
+++ b/lld/test/ELF/invalid/invalid-e_shnum.test
@@ -10,6 +10,6 @@ FileHeader:
   Data:      ELFDATA2LSB
   Type:      ET_REL
   Machine:   EM_X86_64
-  SHOff:     0
-  SHNum:     0x1
-  SHStrNdx:  0
+  EShOff:    0
+  EShNum:    0x1
+  EShStrNdx: 0

From 7ab7b979d29e1e43701cf690f5cf1903740f50e3 Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Wed, 15 Jul 2020 11:40:53 +0200
Subject: [PATCH 363/771] Bump the trunk major version to 12

and clear the release notes.
---
 clang-tools-extra/docs/ReleaseNotes.rst   | 190 +--------------
 clang-tools-extra/docs/conf.py            |   4 +-
 clang/docs/ReleaseNotes.rst               | 275 ++--------------------
 clang/docs/analyzer/conf.py               |   4 +-
 clang/docs/conf.py                        |   4 +-
 libcxx/CMakeLists.txt                     |   2 +-
 libcxx/docs/ReleaseNotes.rst              |  10 +-
 libcxx/docs/conf.py                       |   4 +-
 libcxx/include/__config                   |   2 +-
 libcxx/include/__libcpp_version           |   2 +-
 libunwind/CMakeLists.txt                  |   2 +-
 libunwind/docs/conf.py                    |   4 +-
 lld/docs/ReleaseNotes.rst                 |  14 +-
 lld/docs/conf.py                          |   4 +-
 llvm/CMakeLists.txt                       |   2 +-
 llvm/docs/ReleaseNotes.rst                |  86 +------
 llvm/utils/gn/secondary/llvm/version.gni  |   2 +-
 llvm/utils/lit/lit/__init__.py            |   2 +-
 llvm/utils/release/build_llvm_package.bat |   4 +-
 polly/docs/ReleaseNotes.rst               |   4 +-
 polly/docs/conf.py                        |   4 +-
 pstl/docs/ReleaseNotes.rst                |   8 +-
 pstl/include/pstl/internal/pstl_config.h  |   2 +-
 pstl/test/pstl/version.pass.cpp           |   4 +-
 24 files changed, 71 insertions(+), 568 deletions(-)

diff --git a/clang-tools-extra/docs/ReleaseNotes.rst b/clang-tools-extra/docs/ReleaseNotes.rst
index c08fd45c2f967..1d447938eae0c 100644
--- a/clang-tools-extra/docs/ReleaseNotes.rst
+++ b/clang-tools-extra/docs/ReleaseNotes.rst
@@ -1,5 +1,5 @@
 ====================================================
-Extra Clang Tools 11.0.0 (In-Progress) Release Notes
+Extra Clang Tools 12.0.0 (In-Progress) Release Notes
 ====================================================
 
 .. contents::
@@ -10,7 +10,7 @@ Written by the `LLVM Team <https://llvm.org/>`_
 
 .. warning::
 
-   These are in-progress notes for the upcoming Extra Clang Tools 11 release.
+   These are in-progress notes for the upcoming Extra Clang Tools 12 release.
    Release notes for previous releases can be found on
    `the Download Page <https://releases.llvm.org/download.html>`_.
 
@@ -18,7 +18,7 @@ Introduction
 ============
 
 This document contains the release notes for the Extra Clang Tools, part of the
-Clang release 11.0.0. Here we describe the status of the Extra Clang Tools in
+Clang release 12.0.0. Here we describe the status of the Extra Clang Tools in
 some detail, including major improvements from the previous release and new
 feature work. All LLVM releases may be downloaded from the `LLVM releases web
 site <https://llvm.org/releases/>`_.
@@ -32,7 +32,7 @@ main Clang web page, this document applies to the *next* release, not
 the current one. To see the release notes for a specific release, please
 see the `releases page <https://llvm.org/releases/>`_.
 
-What's New in Extra Clang Tools 11.0.0?
+What's New in Extra Clang Tools 12.0.0?
 =======================================
 
 Some of the major new features and improvements to Extra Clang Tools are listed
@@ -67,187 +67,7 @@ The improvements are...
 Improvements to clang-tidy
 --------------------------
 
-New module
-^^^^^^^^^^
-- New module `llvmlibc`.
-
-  This module contains checks related to the LLVM-libc coding standards.
-
-New checks
-^^^^^^^^^^
-
-- New :doc:`abseil-string-find-str-contains
-  <clang-tidy/checks/abseil-string-find-str-contains>` check.
-
-  Finds ``s.find(...) == string::npos`` comparisons (for various string-like types)
-  and suggests replacing with ``absl::StrContains()``.
-
-- New :doc:`cppcoreguidelines-avoid-non-const-global-variables
-  <clang-tidy/checks/cppcoreguidelines-avoid-non-const-global-variables>` check.
-  Finds non-const global variables as described in check I.2 of C++ Core
-  Guidelines.
-
-- New :doc:`bugprone-misplaced-pointer-arithmetic-in-alloc
-  <clang-tidy/checks/bugprone-misplaced-pointer-arithmetic-in-alloc>` check.
-
-  Finds cases where an integer expression is added to or subtracted from the
-  result of a memory allocation function (``malloc()``, ``calloc()``,
-  ``realloc()``, ``alloca()``) instead of its argument.
-
-- New :doc:`bugprone-no-escape
-  <clang-tidy/checks/bugprone-no-escape>` check.
-
-  Finds pointers with the ``noescape`` attribute that are captured by an
-  asynchronously-executed block.
-
-- New :doc:`bugprone-spuriously-wake-up-functions
-  <clang-tidy/checks/bugprone-spuriously-wake-up-functions>` check.
-
-  Finds ``cnd_wait``, ``cnd_timedwait``, ``wait``, ``wait_for``, or
-  ``wait_until`` function calls when the function is not invoked from a loop
-  that checks whether a condition predicate holds or the function has a
-  condition parameter.
-
-- New :doc:`bugprone-reserved-identifier
-  <clang-tidy/checks/bugprone-reserved-identifier>` check.
-
-  Checks for usages of identifiers reserved for use by the implementation.
-
-- New :doc:`bugprone-suspicious-include
-  <clang-tidy/checks/bugprone-suspicious-include>` check.
-
-  Finds cases where an include refers to what appears to be an implementation
-  file, which often leads to hard-to-track-down ODR violations, and diagnoses
-  them.
-
-- New :doc:`cert-oop57-cpp
-  <clang-tidy/checks/cert-oop57-cpp>` check.
-
-  Flags use of the `C` standard library functions ``memset``, ``memcpy`` and
-  ``memcmp`` and similar derivatives on non-trivial types.
-
-- New :doc:`llvmlibc-callee-namespace
-  <clang-tidy/checks/llvmlibc-callee-namespace>` check.
-
-  Checks all calls resolve to functions within ``__llvm_libc`` namespace.
-
-- New :doc:`llvmlibc-implementation-in-namespace
-  <clang-tidy/checks/llvmlibc-implementation-in-namespace>` check.
-
-  Checks all llvm-libc implementation is within the correct namespace.
-
-- New :doc:`llvmlibc-restrict-system-libc-headers
-  <clang-tidy/checks/llvmlibc-restrict-system-libc-headers>` check.
-
-  Finds includes of system libc headers not provided by the compiler within
-  llvm-libc implementations.
-
-- New :doc:`modernize-replace-disallow-copy-and-assign-macro
-  <clang-tidy/checks/modernize-replace-disallow-copy-and-assign-macro>` check.
-
-  Finds macro expansions of ``DISALLOW_COPY_AND_ASSIGN`` and replaces them with
-  a deleted copy constructor and a deleted assignment operator.
-
-- New :doc:`objc-dealloc-in-category
-  <clang-tidy/checks/objc-dealloc-in-category>` check.
-
-  Finds implementations of -dealloc in Objective-C categories.
-
-- New :doc:`misc-no-recursion
-  <clang-tidy/checks/misc-no-recursion>` check.
-
-  Finds recursive functions and diagnoses them.
-
-- New :doc:`objc-nsinvocation-argument-lifetime
-  <clang-tidy/checks/objc-nsinvocation-argument-lifetime>` check.
-
-  Finds calls to ``NSInvocation`` methods under ARC that don't have proper
-  argument object lifetimes.
-
-- New :doc:`readability-use-anyofallof
-  <clang-tidy/checks/readability-use-anyofallof>` check.
-
-  Finds range-based for loops that can be replaced by a call to ``std::any_of``
-  or ``std::all_of``.
-
-New check aliases
-^^^^^^^^^^^^^^^^^
-
-- New alias :doc:`cert-con36-c
-  <clang-tidy/checks/cert-con36-c>` to
-  :doc:`bugprone-spuriously-wake-up-functions
-  <clang-tidy/checks/bugprone-spuriously-wake-up-functions>` was added.
-
-- New alias :doc:`cert-con54-cpp
-  <clang-tidy/checks/cert-con54-cpp>` to
-  :doc:`bugprone-spuriously-wake-up-functions
-  <clang-tidy/checks/bugprone-spuriously-wake-up-functions>` was added.
-
-- New alias :doc:`cert-dcl37-c
-  <clang-tidy/checks/cert-dcl37-c>` to
-  :doc:`bugprone-reserved-identifier
-  <clang-tidy/checks/bugprone-reserved-identifier>` was added.
-
-- New alias :doc:`cert-dcl51-cpp
-  <clang-tidy/checks/cert-dcl51-cpp>` to
-  :doc:`bugprone-reserved-identifier
-  <clang-tidy/checks/bugprone-reserved-identifier>` was added.
-
-- New alias :doc:`cert-str34-c
-  <clang-tidy/checks/cert-str34-c>` to
-  :doc:`bugprone-signed-char-misuse
-  <clang-tidy/checks/bugprone-signed-char-misuse>` was added.
-
-- New alias :doc:`llvm-else-after-return
-  <clang-tidy/checks/llvm-else-after-return>` to
-  :doc:`readability-else-after-return
-  <clang-tidy/checks/readability-else-after-return>` was added.
-
-Changes in existing checks
-^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-- Improved :doc:`performance-faster-string-find
-  <clang-tidy/checks/performance-faster-string-find>` check.
-
-  Now checks ``std::basic_string_view`` by default.
-
-- Improved :doc:`readability-else-after-return
-  <clang-tidy/checks/readability-else-after-return>` check now supports a
-  `WarnOnConditionVariables` option to control whether to refactor condition
-  variables where possible.
-
-- Improved :doc:`readability-identifier-naming
-  <clang-tidy/checks/readability-identifier-naming>` check.
-
-  Now able to rename member references in class template definitions with
-  explicit access.
-
-- Improved :doc:`readability-qualified-auto
-  <clang-tidy/checks/readability-qualified-auto>` check now supports a
-  `AddConstToQualified` to enable adding ``const`` qualifiers to variables
-  typed with ``auto *`` and ``auto &``.
-
-- Improved :doc:`readability-redundant-string-init
-  <clang-tidy/checks/readability-redundant-string-init>` check now supports a
-  `StringNames` option enabling its application to custom string classes. The
-  check now detects in class initializers and constructor initializers which
-  are deemed to be redundant.
-
-- Checks supporting the ``HeaderFileExtensions`` flag now support ``;`` as a
-  delimiter in addition to ``,``, with the latter being deprecated as of this
-  release. This simplifies how one specifies the options on the command line:
-  ``--config="{CheckOptions: [{ key: HeaderFileExtensions, value: h;;hpp;hxx }]}"``
-
-Renamed checks
-^^^^^^^^^^^^^^
-
-- The 'fuchsia-restrict-system-headers' check was renamed to :doc:`portability-restrict-system-includes
-  <clang-tidy/checks/portability-restrict-system-includes>`
-
-Other improvements
-^^^^^^^^^^^^^^^^^^
-
-- For 'run-clang-tidy.py' add option to use alpha checkers from clang-analyzer.
+The improvements are...
 
 Improvements to include-fixer
 -----------------------------
diff --git a/clang-tools-extra/docs/conf.py b/clang-tools-extra/docs/conf.py
index 690917ef9a1be..a7579d55737e2 100644
--- a/clang-tools-extra/docs/conf.py
+++ b/clang-tools-extra/docs/conf.py
@@ -49,9 +49,9 @@
 # built documents.
 #
 # The short version.
-version = '11'
+version = '12'
 # The full version, including alpha/beta/rc tags.
-release = '11'
+release = '12'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 8b27e663d9f86..10ead604239c9 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -1,5 +1,5 @@
 ========================================
-Clang 11.0.0 (In-Progress) Release Notes
+Clang 12.0.0 (In-Progress) Release Notes
 ========================================
 
 .. contents::
@@ -10,7 +10,7 @@ Written by the `LLVM Team <https://llvm.org/>`_
 
 .. warning::
 
-   These are in-progress notes for the upcoming Clang 11 release.
+   These are in-progress notes for the upcoming Clang 12 release.
    Release notes for previous releases can be found on
    `the Download Page <https://releases.llvm.org/download.html>`_.
 
@@ -18,7 +18,7 @@ Introduction
 ============
 
 This document contains the release notes for the Clang C/C++/Objective-C
-frontend, part of the LLVM Compiler Infrastructure, release 11.0.0. Here we
+frontend, part of the LLVM Compiler Infrastructure, release 12.0.0. Here we
 describe the status of Clang in some detail, including major
 improvements from the previous release and new feature work. For the
 general LLVM release notes, see `the LLVM
@@ -35,7 +35,7 @@ main Clang web page, this document applies to the *next* release, not
 the current one. To see the release notes for a specific release, please
 see the `releases page <https://llvm.org/releases/>`_.
 
-What's New in Clang 11.0.0?
+What's New in Clang 12.0.0?
 ===========================
 
 Some of the major new features and improvements to Clang are listed
@@ -51,69 +51,17 @@ Major New Features
 Improvements to Clang's diagnostics
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-- -Wpointer-to-int-cast is a new warning group. This group warns about C-style
-  casts of pointers to a integer type too small to hold all possible values.
-
-- -Wuninitialized-const-reference is a new warning controlled by 
-  -Wuninitialized. It warns on cases where uninitialized variables are passed
-  as const reference arguments to a function.
+- ...
 
 Non-comprehensive list of changes in this release
 -------------------------------------------------
 
-- For the ARM target, C-language intrinsics are now provided for the full Arm
-  v8.1-M MVE instruction set. ``<arm_mve.h>`` supports the complete API defined
-  in the Arm C Language Extensions.
-
-- For the ARM target, C-language intrinsics ``<arm_cde.h>`` for the CDE
-  instruction set are now provided.
-
-- clang adds support for a set of  extended integer types (``_ExtInt(N)``) that
-  permit non-power of 2 integers, exposing the LLVM integer types. Since a major
-  motivating use case for these types is to limit 'bit' usage, these types don't
-  automatically promote to 'int' when operations are done between two
-  ``ExtInt(N)`` types, instead math occurs at the size of the largest
-  ``ExtInt(N)`` type.
-
-- Users of UBSan, PGO, and coverage on Windows will now need to add clang's
-  library resource directory to their library search path. These features all
-  use runtime libraries, and Clang provides these libraries in its resource
-  directory. For example, if LLVM is installed in ``C:\Program Files\LLVM``,
-  then the profile runtime library will appear at
-  ``C:\Program Files\LLVM\lib\clang\11.0.0\lib\windows\clang_rt.profile-x86_64.lib``.
-  To ensure that the linker can find the appropriate library, users should pass
-  ``/LIBPATH:C:\Program Files\LLVM\lib\clang\11.0.0\lib\windows`` to the
-  linker. If the user links the program with the ``clang`` or ``clang-cl``
-  drivers, the driver will pass this flag for them.
-
-- Clang's profile files generated through ``-fprofile-instr-generate`` are using
-  a fixed hashing algorithm that prevents some collision when loading
-  out-of-date profile informations. Clang can still read old profile files.
+- ...
 
 New Compiler Flags
 ------------------
 
-- -fstack-clash-protection will provide a protection against the stack clash
-  attack for x86, s390x and ppc64 architectures through automatic probing of
-  each page of allocated stack.
-
-- -ffp-exception-behavior={ignore,maytrap,strict} allows the user to specify
-  the floating-point exception behavior. The default setting is ``ignore``.
-
-- -ffp-model={precise,strict,fast} provides the user an umbrella option to
-  simplify access to the many single purpose floating point options. The default
-  setting is ``precise``.
-
-- The default module cache has moved from /tmp to a per-user cache directory.
-  By default, this is ~/.cache but on some platforms or installations, this
-  might be elsewhere. The -fmodules-cache-path=... flag continues to work.
-
-- -fpch-instantiate-templates tries to instantiate templates already while
-  generating a precompiled header. Such templates do not need to be
-  instantiated every time the precompiled header is used, which saves compile
-  time. This may result in an error during the precompiled header generation
-  if the source header file is not self-contained. This option is enabled
-  by default for clang-cl.
+- ...
 
 Deprecated Compiler Flags
 -------------------------
@@ -126,29 +74,7 @@ future versions of Clang.
 Modified Compiler Flags
 -----------------------
 
-- -fno-common has been enabled as the default for all targets.  Therefore, C
-  code that uses tentative definitions as definitions of a variable in multiple
-  translation units will trigger multiple-definition linker errors. Generally,
-  this occurs when the use of the ``extern`` keyword is neglected in the
-  declaration of a variable in a header file. In some cases, no specific
-  translation unit provides a definition of the variable. The previous
-  behavior can be restored by specifying ``-fcommon``.
-- -Wasm-ignored-qualifier (ex. `asm const ("")`) has been removed and replaced
-  with an error (this matches a recent change in GCC-9).
-- -Wasm-file-asm-volatile (ex. `asm volatile ("")` at global scope) has been
-  removed and replaced with an error (this matches GCC's behavior).
-- Duplicate qualifiers on asm statements (ex. `asm volatile volatile ("")`) no
-  longer produces a warning via -Wduplicate-decl-specifier, but now an error
-  (this matches GCC's behavior).
-- The deprecated argument ``-f[no-]sanitize-recover`` has changed to mean
-  ``-f[no-]sanitize-recover=all`` instead of
-  ``-f[no-]sanitize-recover=undefined,integer`` and is no longer deprecated.
-- The argument to ``-f[no-]sanitize-trap=...`` is now optional and defaults to
-  ``all``.
-- ``-fno-char8_t`` now disables the ``char8_t`` keyword, not just the use of
-  ``char8_t`` as the character type of ``u8`` literals. This restores the
-  Clang 8 behavior that regressed in Clang 9 and 10.
-- -print-targets has been added to print the registered targets.
+- ...
 
 New Pragmas in Clang
 --------------------
@@ -158,9 +84,7 @@ New Pragmas in Clang
 Attribute Changes in Clang
 --------------------------
 
-- Attributes can now be specified by clang plugins. See the
-  `Clang Plugins <ClangPlugins.html#defining-attributes>`_ documentation for
-  details.
+- ...
 
 Windows Support
 ---------------
@@ -168,58 +92,15 @@ Windows Support
 C Language Changes in Clang
 ---------------------------
 
-- The default C language standard used when `-std=` is not specified has been
-  upgraded from gnu11 to gnu17.
-
-- Clang now supports the GNU C extension `asm inline`; it won't do anything
-  *yet*, but it will be parsed.
-
 - ...
 
 C++ Language Changes in Clang
 -----------------------------
 
-- Clang now implements a restriction on giving non-C-compatible anonymous
-  structs a typedef name for linkage purposes, as described in C++ committee
-  paper `P1766R1 <http://wg21.link/p1766r1>`. This paper was adopted by the
-  C++ committee as a Defect Report resolution, so it is applied retroactively
-  to all C++ standard versions. This affects code such as:
-
-  .. code-block:: c++
-
-    typedef struct {
-      int f() { return 0; }
-    } S;
-
-  Previous versions of Clang rejected some constructs of this form
-  (specifically, where the linkage of the type happened to be computed
-  before the parser reached the typedef name); those cases are still rejected
-  in Clang 11. In addition, cases that previous versions of Clang did not
-  reject now produce an extension warning. This warning can be disabled with
-  the warning flag ``-Wno-non-c-typedef-for-linkage``.
-
-  Affected code should be updated to provide a tag name for the anonymous
-  struct:
-
-  .. code-block:: c++
-
-    struct S {
-      int f() { return 0; }
-    };
-
-  If the code is shared with a C compilation (for example, if the parts that
-  are not C-compatible are guarded with ``#ifdef __cplusplus``), the typedef
-  declaration should be retained, but a tag name should still be provided:
-
-  .. code-block:: c++
-
-    typedef struct S {
-      int f() { return 0; }
-    } S;
+- ...
 
 C++1z Feature Support
 ^^^^^^^^^^^^^^^^^^^^^
-
 ...
 
 Objective-C Language Changes in Clang
@@ -246,49 +127,19 @@ CUDA Support in Clang
 Internal API Changes
 --------------------
 
-These are major API changes that have happened since the 10.0.0 release of
+These are major API changes that have happened since the 11.0.0 release of
 Clang. If upgrading an external codebase that uses Clang as a library,
 this section should help get you past the largest hurdles of upgrading.
 
-- ``RecursiveASTVisitor`` no longer calls separate methods to visit specific
-  operator kinds. Previously, ``RecursiveASTVisitor`` treated unary, binary,
-  and compound assignment operators as if they were subclasses of the
-  corresponding AST node. For example, the binary operator plus was treated as
-  if it was a ``BinAdd`` subclass of the ``BinaryOperator`` class: during AST
-  traversal of a ``BinaryOperator`` AST node that had a ``BO_Add`` opcode,
-  ``RecursiveASTVisitor`` was calling the ``TraverseBinAdd`` method instead of
-  ``TraverseBinaryOperator``. This feature was contributing a non-trivial
-  amount of complexity to the implementation of ``RecursiveASTVisitor``, it was
-  used only in a minor way in Clang, was not tested, and as a result it was
-  buggy. Furthermore, this feature was creating a non-uniformity in the API.
-  Since this feature was not documented, it was quite difficult to figure out
-  how to use ``RecursiveASTVisitor`` to visit operators.
-
-  To update your code to the new uniform API, move the code from separate
-  visitation methods into methods that correspond to the actual AST node and
-  perform case analysis based on the operator opcode as needed:
-
-  * ``TraverseUnary*() => TraverseUnaryOperator()``
-  * ``WalkUpFromUnary*() => WalkUpFromUnaryOperator()``
-  * ``VisitUnary*() => VisiUnaryOperator()``
-  * ``TraverseBin*() => TraverseBinaryOperator()``
-  * ``WalkUpFromBin*() => WalkUpFromBinaryOperator()``
-  * ``VisitBin*() => VisiBinaryOperator()``
-  * ``TraverseBin*Assign() => TraverseCompoundAssignOperator()``
-  * ``WalkUpFromBin*Assign() => WalkUpFromCompoundAssignOperator()``
-  * ``VisitBin*Assign() => VisiCompoundAssignOperator()``
+- ...
 
 Build System Changes
 --------------------
 
-These are major changes to the build system that have happened since the 10.0.0
+These are major changes to the build system that have happened since the 11.0.0
 release of Clang. Users of the build system should adjust accordingly.
 
-- clang-tidy and clang-include-fixer are no longer compiled into libclang by
-  default. You can set ``LIBCLANG_INCLUDE_CLANG_TOOLS_EXTRA=ON`` to undo that,
-  but it's expected that that setting will go away eventually. If this is
-  something you need, please reach out to the mailing list to discuss possible
-  ways forward.
+- ...
 
 AST Matchers
 ------------
@@ -298,103 +149,7 @@ AST Matchers
 clang-format
 ------------
 
-- Option ``IndentExternBlock`` has been added to optionally apply indenting inside ``extern "C"`` and ``extern "C++"`` blocks.
-
-- ``IndentExternBlock`` option accepts ``AfterExternBlock`` to use the old behavior, as well as Indent and NoIndent options, which map to true and false, respectively.
-
-  .. code-block:: c++
-
-    Indent:                       NoIndent:
-     #ifdef __cplusplus          #ifdef __cplusplus
-     extern "C" {                extern "C++" {
-     #endif                      #endif
-
-          void f(void);          void f(void);
-
-     #ifdef __cplusplus          #ifdef __cplusplus
-     }                           }
-     #endif                      #endif
-
-- Option ``IndentCaseBlocks`` has been added to support treating the block
-  following a switch case label as a scope block which gets indented itself.
-  It helps avoid having the closing bracket align with the switch statement's
-  closing bracket (when ``IndentCaseLabels`` is ``false``).
-
-  .. code-block:: c++
-
-    switch (fool) {                vs.     switch (fool) {
-    case 1:                                case 1: {
-      {                                      bar();
-         bar();                            } break;
-      }                                    default: {
-      break;                                 plop();
-    default:                               }
-      {                                    }
-        plop();
-      }
-    }
-
-- Option ``ObjCBreakBeforeNestedBlockParam`` has been added to optionally apply
-  linebreaks for function arguments declarations before nested blocks.
-
-- Option ``InsertTrailingCommas`` can be set to ``TCS_Wrapped`` to insert
-  trailing commas in container literals (arrays and objects) that wrap across
-  multiple lines. It is currently only available for JavaScript and disabled by
-  default (``TCS_None``).
-
-- Option ``BraceWrapping.BeforeLambdaBody`` has been added to manage lambda
-  line break inside function parameter call in Allman style.
-
-  .. code-block:: c++
-
-      true:
-      connect(
-        []()
-        {
-          foo();
-          bar();
-        });
-
-      false:
-      connect([]() {
-          foo();
-          bar();
-        });
-
-- Option ``AlignConsecutiveBitFields`` has been added to align bit field
-  declarations across multiple adjacent lines
-
-  .. code-block:: c++
-
-      true:
-        bool aaa  : 1;
-        bool a    : 1;
-        bool bb   : 1;
-
-      false:
-        bool aaa : 1;
-        bool a : 1;
-        bool bb : 1;
-
-- Option ``BraceWrapping.BeforeWhile`` has been added to allow wrapping
-  before the ```while`` in a do..while loop. By default the value is (``false``)
-
-  In previous releases ``IndentBraces`` implied ``BraceWrapping.BeforeWhile``.
-  If using a Custom BraceWrapping style you may need to now set
-  ``BraceWrapping.BeforeWhile`` to (``true``) to be explicit.
-
-  .. code-block:: c++
-
-      true:
-      do {
-        foo();
-      }
-      while(1);
-
-      false:
-      do {
-        foo();
-      } while(1);
+- ...
 
 libclang
 --------
diff --git a/clang/docs/analyzer/conf.py b/clang/docs/analyzer/conf.py
index 6873ecc6c9c23..4fa65b2d0dae3 100644
--- a/clang/docs/analyzer/conf.py
+++ b/clang/docs/analyzer/conf.py
@@ -49,9 +49,9 @@
 # built documents.
 #
 # The short version.
-version = '11'
+version = '12'
 # The full version, including alpha/beta/rc tags.
-release = '11'
+release = '12'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/clang/docs/conf.py b/clang/docs/conf.py
index 164f15d1e2859..b63e829bfaf1d 100644
--- a/clang/docs/conf.py
+++ b/clang/docs/conf.py
@@ -50,9 +50,9 @@
 # built documents.
 #
 # The short version.
-version = '11'
+version = '12'
 # The full version, including alpha/beta/rc tags.
-release = '11'
+release = '12'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index aabe31fa6ec18..26bf553ddeed2 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -32,7 +32,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBCXX_STANDALONE_BUIL
   project(libcxx CXX C)
 
   set(PACKAGE_NAME libcxx)
-  set(PACKAGE_VERSION 11.0.0git)
+  set(PACKAGE_VERSION 12.0.0git)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 
diff --git a/libcxx/docs/ReleaseNotes.rst b/libcxx/docs/ReleaseNotes.rst
index 1db79153ed894..5157685cba01a 100644
--- a/libcxx/docs/ReleaseNotes.rst
+++ b/libcxx/docs/ReleaseNotes.rst
@@ -1,5 +1,5 @@
 =========================================
-Libc++ 11.0.0 (In-Progress) Release Notes
+Libc++ 12.0.0 (In-Progress) Release Notes
 =========================================
 
 .. contents::
@@ -10,7 +10,7 @@ Written by the `Libc++ Team <https://libcxx.llvm.org>`_
 
 .. warning::
 
-   These are in-progress notes for the upcoming libc++ 11 release.
+   These are in-progress notes for the upcoming libc++ 12 release.
    Release notes for previous releases can be found on
    `the Download Page <https://releases.llvm.org/download.html>`_.
 
@@ -18,7 +18,7 @@ Introduction
 ============
 
 This document contains the release notes for the libc++ C++ Standard Library,
-part of the LLVM Compiler Infrastructure, release 11.0.0. Here we describe the
+part of the LLVM Compiler Infrastructure, release 12.0.0. Here we describe the
 status of libc++ in some detail, including major improvements from the previous
 release and new feature work. For the general LLVM release notes, see `the LLVM
 documentation <https://llvm.org/docs/ReleaseNotes.html>`_. All LLVM releases may
@@ -32,13 +32,13 @@ main Libc++ web page, this document applies to the *next* release, not
 the current one. To see the release notes for a specific release, please
 see the `releases page <https://llvm.org/releases/>`_.
 
-What's New in Libc++ 11.0.0?
+What's New in Libc++ 12.0.0?
 ============================
 
 New Features
 ------------
 
-- ``<numbers>``
+- ...
 
 API Changes
 -----------
diff --git a/libcxx/docs/conf.py b/libcxx/docs/conf.py
index 00000eec027d3..797a4d80f512c 100644
--- a/libcxx/docs/conf.py
+++ b/libcxx/docs/conf.py
@@ -47,9 +47,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '11.0'
+version = '12.0'
 # The full version, including alpha/beta/rc tags.
-release = '11.0'
+release = '12.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/libcxx/include/__config b/libcxx/include/__config
index 575147cead42d..3b019a0493b55 100644
--- a/libcxx/include/__config
+++ b/libcxx/include/__config
@@ -32,7 +32,7 @@
 #  define _GNUC_VER_NEW 0
 #endif
 
-#define _LIBCPP_VERSION 11000
+#define _LIBCPP_VERSION 12000
 
 #ifndef _LIBCPP_ABI_VERSION
 #  define _LIBCPP_ABI_VERSION 1
diff --git a/libcxx/include/__libcpp_version b/libcxx/include/__libcpp_version
index 82b3803a20e9f..e334181b40062 100644
--- a/libcxx/include/__libcpp_version
+++ b/libcxx/include/__libcpp_version
@@ -1 +1 @@
-11000
+12000
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index 7065112627a25..f20893f4aa864 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -83,7 +83,7 @@ if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR OR LIBUNWIND_STANDALONE_B
   endif()
 
   set(PACKAGE_NAME libunwind)
-  set(PACKAGE_VERSION 11.0.0git)
+  set(PACKAGE_VERSION 12.0.0git)
   set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
   set(PACKAGE_BUGREPORT "llvm-bugs@lists.llvm.org")
 
diff --git a/libunwind/docs/conf.py b/libunwind/docs/conf.py
index 70fb71fd87330..6217ead0caf28 100644
--- a/libunwind/docs/conf.py
+++ b/libunwind/docs/conf.py
@@ -48,9 +48,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '11.0'
+version = '12.0'
 # The full version, including alpha/beta/rc tags.
-release = '11.0'
+release = '12.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst
index fe3de8306cd85..f50c3064f4744 100644
--- a/lld/docs/ReleaseNotes.rst
+++ b/lld/docs/ReleaseNotes.rst
@@ -1,19 +1,19 @@
 ========================
-lld 11.0.0 Release Notes
+lld 12.0.0 Release Notes
 ========================
 
 .. contents::
     :local:
 
 .. warning::
-   These are in-progress notes for the upcoming LLVM 11.0.0 release.
+   These are in-progress notes for the upcoming LLVM 12.0.0 release.
    Release notes for previous releases can be found on
    `the Download Page <https://releases.llvm.org/download.html>`_.
 
 Introduction
 ============
 
-This document contains the release notes for the lld linker, release 11.0.0.
+This document contains the release notes for the lld linker, release 12.0.0.
 Here we describe the status of lld, including major improvements
 from the previous release. All lld releases may be downloaded
 from the `LLVM releases web site <https://llvm.org/releases/>`_.
@@ -24,18 +24,12 @@ Non-comprehensive list of changes in this release
 ELF Improvements
 ----------------
 
-* New ``--time-trace`` option records a time trace file that can be viewed in
-  chrome://tracing. The file can be specified with ``--time-trace-file``.
-  Trace granularity can be specified with ``--time-trace-granularity``.
-  (`D71060 <https://reviews.llvm.org/D71060>`_)
 * ...
 
 Breaking changes
 ----------------
 
-* One-dash form of some long option (``--thinlto-*``, ``--lto-*``, ``--shuffle-sections=``)
-  are no longer supported.
-* ``--export-dynamic-symbol`` no longer implies ``-u``.
+* ...
 
 COFF Improvements
 -----------------
diff --git a/lld/docs/conf.py b/lld/docs/conf.py
index 7d4fc0c5ad75f..7867d9a7dddf3 100644
--- a/lld/docs/conf.py
+++ b/lld/docs/conf.py
@@ -48,9 +48,9 @@
 # built documents.
 #
 # The short version.
-version = '11'
+version = '12'
 # The full version, including alpha/beta/rc tags.
-release = '11'
+release = '12'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index eacf8d5e55011..d144266b20f0f 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -24,7 +24,7 @@ if(POLICY CMP0077)
 endif()
 
 if(NOT DEFINED LLVM_VERSION_MAJOR)
-  set(LLVM_VERSION_MAJOR 11)
+  set(LLVM_VERSION_MAJOR 12)
 endif()
 if(NOT DEFINED LLVM_VERSION_MINOR)
   set(LLVM_VERSION_MINOR 0)
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 8d8da954ece39..39a5361c369e9 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -1,12 +1,12 @@
 =========================
-LLVM 11.0.0 Release Notes
+LLVM 12.0.0 Release Notes
 =========================
 
 .. contents::
     :local:
 
 .. warning::
-   These are in-progress notes for the upcoming LLVM 11 release.
+   These are in-progress notes for the upcoming LLVM 12 release.
    Release notes for previous releases can be found on
    `the Download Page <https://releases.llvm.org/download.html>`_.
 
@@ -15,7 +15,7 @@ Introduction
 ============
 
 This document contains the release notes for the LLVM Compiler Infrastructure,
-release 11.0.0.  Here we describe the status of LLVM, including major improvements
+release 12.0.0.  Here we describe the status of LLVM, including major improvements
 from the previous release, improvements in various subprojects of LLVM, and
 some of the current users of the code.  All LLVM releases may be downloaded
 from the `LLVM releases web site <https://llvm.org/releases/>`_.
@@ -57,22 +57,7 @@ Non-comprehensive list of changes in this release
 Changes to the LLVM IR
 ----------------------
 
-* The callsite attribute `vector-function-abi-variant
-  <https://llvm.org/docs/LangRef.html#call-site-attributes>`_ has been
-  added to describe the mapping between scalar functions and vector
-  functions, to enable vectorization of call sites. The information
-  provided by the attribute is interfaced via the API provided by the
-  ``VFDatabase`` class.
-
-* `dereferenceable` attributes and metadata on pointers no longer imply
-  anything about the alignment of the pointer in question. Previously, some
-  optimizations would make assumptions based on the type of the pointer. This
-  behavior was undocumented. To preserve optimizations, frontends may need to
-  be updated to generate appropriate `align` attributes and metadata.
-
-* The DIModule metadata is extended to contain file and line number
-  information. This information is used to represent Fortran modules debug
-  info at IR level.
+* ...
 
 Changes to building LLVM
 ------------------------
@@ -82,21 +67,6 @@ Changes to the ARM Backend
 
 During this release ...
 
-* Implemented C-language intrinsics for the full Arm v8.1-M MVE instruction
-  set. ``<arm_mve.h>`` now supports the complete API defined in the Arm C
-  Language Extensions.
-
-* Added support for assembly for the optional Custom Datapath Extension (CDE)
-  for Arm M-profile targets.
-
-* Implemented C-language intrinsics ``<arm_cde.h>`` for the CDE instruction set.
-
-* Clang now defaults to ``-fomit-frame-pointer`` when targeting non-Android
-  Linux for arm and thumb when optimizations are enabled. Users that were
-  previously not specifying a value and relying on the implicit compiler
-  default may wish to specify ``-fno-omit-frame-pointer`` to get the old
-  behavior. This improves compatibility with GCC.
-
 Changes to the MIPS Target
 --------------------------
 
@@ -113,45 +83,20 @@ Changes to the X86 Target
 
 During this release ...
 
-
-* Functions with the probe-stack attribute set to "inline-asm" are now protected
-  against stack clash without the need of a third-party probing function and
-  with limited impact on performance.
-* -x86-enable-old-knl-abi command line switch has been removed. v32i16/v64i8
-  vectors are always passed in ZMM register when avx512f is enabled and avx512bw
-  is disabled.
-* Vectors larger than 512 bits with i16 or i8 elements will be passed in
-  multiple ZMM registers when avx512f is enabled. Previously this required
-  avx512bw otherwise they would split into multiple YMM registers. This means
-  vXi16/vXi8 vectors are consistently treated the same as
-  vXi32/vXi64/vXf64/vXf32 vectors of the same total width.
-
 Changes to the AMDGPU Target
 -----------------------------
 
-* The backend default denormal handling mode has been switched to on
-  for all targets for all compute function types. Frontends wishing to
-  retain the old behavior should explicitly request f32 denormal
-  flushing.
+During this release ...
 
 Changes to the AVR Target
 -----------------------------
 
-* Moved from an experimental backend to an official backend. AVR support is now
-  included by default in all LLVM builds and releases and is available under
-  the "avr-unknown-unknown" target triple.
+During this release ...
 
 Changes to the WebAssembly Target
 ---------------------------------
 
-* Programs which don't have a "main" function, called "reactors" are now
-  properly supported, with a new `-mexec-model=reactor` flag. Programs which
-  previously used `-Wl,--no-entry` to avoid having a main function should
-  switch to this new flag, so that static initialization is properly
-  performed.
-
-* `__attribute__((visibility("protected")))` now evokes a warning, as
-  WebAssembly does not support "protected" visibility.
+During this release ...
 
 Changes to the OCaml bindings
 -----------------------------
@@ -173,28 +118,17 @@ Changes to the DAG infrastructure
 Changes to the Debug Info
 ---------------------------------
 
-* LLVM now supports the debug entry values (DW_OP_entry_value) production for
-  the x86, ARM, and AArch64 targets by default. Other targets can use
-  the utility by using the experimental option ("-debug-entry-values").
-  This is a debug info feature that allows debuggers to recover the value of
-  optimized-out parameters by going up a stack frame and interpreting the values
-  passed to the callee. The feature improves the debugging user experience when
-  debugging optimized code.
+During this release ...
 
 Changes to the LLVM tools
 ---------------------------------
 
-* Added an option (--show-section-sizes) to llvm-dwarfdump to show the sizes
-  of all debug sections within a file.
-
-* llvm-nm now implements the flag ``--special-syms`` and will filter out special
-  symbols, i.e. mapping symbols on ARM and AArch64, by default. This matches
-  the GNU nm behavior.
+During this release ...
 
 Changes to LLDB
 ===============
 
-External Open Source Projects Using LLVM 11
+External Open Source Projects Using LLVM 12
 ===========================================
 
 * A project...
diff --git a/llvm/utils/gn/secondary/llvm/version.gni b/llvm/utils/gn/secondary/llvm/version.gni
index 44c8736132e3a..a66a92550a00e 100644
--- a/llvm/utils/gn/secondary/llvm/version.gni
+++ b/llvm/utils/gn/secondary/llvm/version.gni
@@ -1,4 +1,4 @@
-llvm_version_major = 11
+llvm_version_major = 12
 llvm_version_minor = 0
 llvm_version_patch = 0
 llvm_version = "$llvm_version_major.$llvm_version_minor.$llvm_version_patch"
diff --git a/llvm/utils/lit/lit/__init__.py b/llvm/utils/lit/lit/__init__.py
index 6bf0132d42528..d7b845e9077f4 100644
--- a/llvm/utils/lit/lit/__init__.py
+++ b/llvm/utils/lit/lit/__init__.py
@@ -2,7 +2,7 @@
 
 __author__ = 'Daniel Dunbar'
 __email__ = 'daniel@minormatter.com'
-__versioninfo__ = (0, 11, 0)
+__versioninfo__ = (0, 12, 0)
 __version__ = '.'.join(str(v) for v in __versioninfo__) + 'dev'
 
 __all__ = []
diff --git a/llvm/utils/release/build_llvm_package.bat b/llvm/utils/release/build_llvm_package.bat
index 8e487ef6812e9..35dcd9f613c42 100755
--- a/llvm/utils/release/build_llvm_package.bat
+++ b/llvm/utils/release/build_llvm_package.bat
@@ -27,8 +27,8 @@ set python64_dir=C:\Users\%USERNAME%\AppData\Local\Programs\Python\Python36
 for /f "usebackq" %%i in (`PowerShell ^(Get-Date^).ToString^('yyyyMMdd'^)`) do set datestamp=%%i
 
 set revision=%1
-set package_version=11.0.0-%revision:~0,8%
-set clang_format_vs_version=11.0.0.%datestamp%
+set package_version=12.0.0-%revision:~0,8%
+set clang_format_vs_version=12.0.0.%datestamp%
 set build_dir=llvm_package_%revision:~0,8%
 
 echo Revision: %revision%
diff --git a/polly/docs/ReleaseNotes.rst b/polly/docs/ReleaseNotes.rst
index ab95eae4e57ed..df3f7ba8389a3 100644
--- a/polly/docs/ReleaseNotes.rst
+++ b/polly/docs/ReleaseNotes.rst
@@ -1,8 +1,8 @@
 =============================
-Release Notes 11.0 (upcoming)
+Release Notes 12.0 (upcoming)
 =============================
 
-In Polly 11 the following important changes have been incorporated.
+In Polly 12 the following important changes have been incorporated.
 
 .. warning::
 
diff --git a/polly/docs/conf.py b/polly/docs/conf.py
index 8b599477ff105..31af9c6b50914 100644
--- a/polly/docs/conf.py
+++ b/polly/docs/conf.py
@@ -49,9 +49,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '11.0-devel'
+version = '12.0-devel'
 # The full version, including alpha/beta/rc tags.
-release = '11.0-devel'
+release = '12.0-devel'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
diff --git a/pstl/docs/ReleaseNotes.rst b/pstl/docs/ReleaseNotes.rst
index e7771d49ea886..60e1c5977d371 100644
--- a/pstl/docs/ReleaseNotes.rst
+++ b/pstl/docs/ReleaseNotes.rst
@@ -1,5 +1,5 @@
 =======================================
-PSTL 11.0.0 (In-Progress) Release Notes
+PSTL 12.0.0 (In-Progress) Release Notes
 =======================================
 
 .. contents::
@@ -10,7 +10,7 @@ Written by the `PSTL Team <https://pstl.llvm.org>`_
 
 .. warning::
 
-   These are in-progress notes for the upcoming pstl 11 release.
+   These are in-progress notes for the upcoming pstl 12 release.
    Release notes for previous releases can be found on
    `the Download Page <https://releases.llvm.org/download.html>`_.
 
@@ -18,7 +18,7 @@ Introduction
 ============
 
 This document contains the release notes for the PSTL parallel algorithms
-library, part of the LLVM Compiler Infrastructure, release 11.0.0. Here we
+library, part of the LLVM Compiler Infrastructure, release 12.0.0. Here we
 describe the status of the library in some detail, including major improvements
 from the previous release and new feature work. For the general LLVM release
 notes, see `the LLVM documentation <https://llvm.org/docs/ReleaseNotes.html>`_.
@@ -30,7 +30,7 @@ web page, this document applies to the *next* release, not the current one.
 To see the release notes for a specific release, please see the `releases
 page <https://llvm.org/releases/>`_.
 
-What's New in PSTL 11.0.0?
+What's New in PSTL 12.0.0?
 ==========================
 
 New Features
diff --git a/pstl/include/pstl/internal/pstl_config.h b/pstl/include/pstl/internal/pstl_config.h
index 4f9df12438160..7137a372162d6 100644
--- a/pstl/include/pstl/internal/pstl_config.h
+++ b/pstl/include/pstl/internal/pstl_config.h
@@ -13,7 +13,7 @@
 #include <__pstl_config_site>
 
 // The version is XYYZ, where X is major, YY is minor, and Z is patch (i.e. X.YY.Z)
-#define _PSTL_VERSION 11000
+#define _PSTL_VERSION 12000
 #define _PSTL_VERSION_MAJOR (_PSTL_VERSION / 1000)
 #define _PSTL_VERSION_MINOR ((_PSTL_VERSION % 1000) / 10)
 #define _PSTL_VERSION_PATCH (_PSTL_VERSION % 10)
diff --git a/pstl/test/pstl/version.pass.cpp b/pstl/test/pstl/version.pass.cpp
index c47ef149acb75..92d06cb426b93 100644
--- a/pstl/test/pstl/version.pass.cpp
+++ b/pstl/test/pstl/version.pass.cpp
@@ -8,8 +8,8 @@
 
 #include <pstl/internal/pstl_config.h>
 
-static_assert(_PSTL_VERSION == 11000);
-static_assert(_PSTL_VERSION_MAJOR == 11);
+static_assert(_PSTL_VERSION == 12000);
+static_assert(_PSTL_VERSION_MAJOR == 12);
 static_assert(_PSTL_VERSION_MINOR == 00);
 static_assert(_PSTL_VERSION_PATCH == 0);
 

From 22a084cfa337d5e5ea90eba5261f7937e28d250b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Bal=C3=A1zs=20K=C3=A9ri?= <1.int32@gmail.com>
Date: Wed, 15 Jul 2020 11:05:22 +0200
Subject: [PATCH 364/771] [Analyzer] Report every bug if only uniqueing
 location differs.

Summary:
Two CSA bug reports where only the uniqueing location is different
should be treated as different problems. The role of uniqueing location
is to differentiate bug reports.

Reviewers: Szelethus, baloghadamsoftware, NoQ, vsavchenko, xazax.hun, martong

Reviewed By: NoQ

Subscribers: NoQ, rnkovacs, xazax.hun, baloghadamsoftware, szepet, a.sidorin, mikhail.ramalho, Szelethus, donat.nagy, dkrupp, gamesh411, Charusso, martong, ASDenysPetrov, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83115
---
 clang/lib/Analysis/PathDiagnostic.cpp | 37 +++++++++++++++++++--------
 clang/test/Analysis/malloc.c          |  3 ++-
 clang/test/Analysis/pr22954.c         |  2 ++
 3 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/clang/lib/Analysis/PathDiagnostic.cpp b/clang/lib/Analysis/PathDiagnostic.cpp
index c88e6c1e1535f..9aa3386129d7f 100644
--- a/clang/lib/Analysis/PathDiagnostic.cpp
+++ b/clang/lib/Analysis/PathDiagnostic.cpp
@@ -327,6 +327,10 @@ static Optional<bool> comparePath(const PathPieces &X, const PathPieces &Y) {
 }
 
 static bool compareCrossTUSourceLocs(FullSourceLoc XL, FullSourceLoc YL) {
+  if (XL.isInvalid() && YL.isValid())
+    return true;
+  if (XL.isValid() && YL.isInvalid())
+    return false;
   std::pair<FileID, unsigned> XOffs = XL.getDecomposedLoc();
   std::pair<FileID, unsigned> YOffs = YL.getDecomposedLoc();
   const SourceManager &SM = XL.getManager();
@@ -349,6 +353,10 @@ static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y) {
   FullSourceLoc YL = Y.getLocation().asLocation();
   if (XL != YL)
     return compareCrossTUSourceLocs(XL, YL);
+  FullSourceLoc XUL = X.getUniqueingLoc().asLocation();
+  FullSourceLoc YUL = Y.getUniqueingLoc().asLocation();
+  if (XUL != YUL)
+    return compareCrossTUSourceLocs(XUL, YUL);
   if (X.getBugType() != Y.getBugType())
     return X.getBugType() < Y.getBugType();
   if (X.getCategory() != Y.getCategory())
@@ -357,20 +365,27 @@ static bool compare(const PathDiagnostic &X, const PathDiagnostic &Y) {
     return X.getVerboseDescription() < Y.getVerboseDescription();
   if (X.getShortDescription() != Y.getShortDescription())
     return X.getShortDescription() < Y.getShortDescription();
-  if (X.getDeclWithIssue() != Y.getDeclWithIssue()) {
-    const Decl *XD = X.getDeclWithIssue();
-    if (!XD)
+  auto CompareDecls = [&XL](const Decl *D1, const Decl *D2) -> Optional<bool> {
+    if (D1 == D2)
+      return None;
+    if (!D1)
       return true;
-    const Decl *YD = Y.getDeclWithIssue();
-    if (!YD)
+    if (!D2)
       return false;
-    SourceLocation XDL = XD->getLocation();
-    SourceLocation YDL = YD->getLocation();
-    if (XDL != YDL) {
+    SourceLocation D1L = D1->getLocation();
+    SourceLocation D2L = D2->getLocation();
+    if (D1L != D2L) {
       const SourceManager &SM = XL.getManager();
-      return compareCrossTUSourceLocs(FullSourceLoc(XDL, SM),
-                                      FullSourceLoc(YDL, SM));
+      return compareCrossTUSourceLocs(FullSourceLoc(D1L, SM),
+                                      FullSourceLoc(D2L, SM));
     }
+    return None;
+  };
+  if (auto Result = CompareDecls(X.getDeclWithIssue(), Y.getDeclWithIssue()))
+    return *Result;
+  if (XUL.isValid()) {
+    if (auto Result = CompareDecls(X.getUniqueingDecl(), Y.getUniqueingDecl()))
+      return *Result;
   }
   PathDiagnostic::meta_iterator XI = X.meta_begin(), XE = X.meta_end();
   PathDiagnostic::meta_iterator YI = Y.meta_begin(), YE = Y.meta_end();
@@ -1118,6 +1133,8 @@ void PathDiagnosticPopUpPiece::Profile(llvm::FoldingSetNodeID &ID) const {
 
 void PathDiagnostic::Profile(llvm::FoldingSetNodeID &ID) const {
   ID.Add(getLocation());
+  ID.Add(getUniqueingLoc());
+  ID.AddPointer(getUniqueingLoc().isValid() ? getUniqueingDecl() : nullptr);
   ID.AddString(BugType);
   ID.AddString(VerboseDesc);
   ID.AddString(Category);
diff --git a/clang/test/Analysis/malloc.c b/clang/test/Analysis/malloc.c
index 714c73c3c793e..a26b511967811 100644
--- a/clang/test/Analysis/malloc.c
+++ b/clang/test/Analysis/malloc.c
@@ -791,7 +791,8 @@ void mallocEscapeMalloc() {
 void mallocMalloc() {
   int *p = malloc(12);
   p = malloc(12);
-} // expected-warning {{Potential leak of memory pointed to by}}
+} // expected-warning {{Potential leak of memory pointed to by}}\
+  // expected-warning {{Potential leak of memory pointed to by}}
 
 void mallocFreeMalloc() {
   int *p = malloc(12);
diff --git a/clang/test/Analysis/pr22954.c b/clang/test/Analysis/pr22954.c
index e88acdc29d390..093f6311a5057 100644
--- a/clang/test/Analysis/pr22954.c
+++ b/clang/test/Analysis/pr22954.c
@@ -352,6 +352,8 @@ int f19(int i) {
   memcpy(J0.s1[i].s1, input, 2);
   clang_analyzer_eval(J0.s1[0].s1[0] == 1); // expected-warning{{UNKNOWN}}\
   expected-warning{{Potential leak of memory pointed to by field 's2'}}\
+  expected-warning{{Potential leak of memory pointed to by field 's2'}}\
+  expected-warning{{Potential leak of memory pointed to by field 's2'}}\
   expected-warning{{Potential leak of memory pointed to by 'J0.s2'}}
   clang_analyzer_eval(J0.s1[0].s1[1] == 2); // expected-warning{{UNKNOWN}}
   clang_analyzer_eval(J0.s1[1].s1[0] == 3); // expected-warning{{UNKNOWN}}

From cf7160c0b0c1250596cc9b2ba0e41423ac465a8f Mon Sep 17 00:00:00 2001
From: Sam McCall <sam.mccall@gmail.com>
Date: Tue, 14 Jul 2020 20:24:32 +0200
Subject: [PATCH 365/771] [clangd] Config: also propagate in sync (testing)
 mode

Summary:
I hit this while trying to add a config-over-LSP lit test, which I think
is an appropriate way to test this feature.

That needs a few more changes though...

Reviewers: kadircet

Subscribers: ilya-biryukov, javed.absar, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83802
---
 clang-tools-extra/clangd/TUScheduler.cpp      | 24 ++++++-----
 clang-tools-extra/clangd/TUScheduler.h        |  2 +-
 .../clangd/unittests/TUSchedulerTests.cpp     | 41 ++++++++++---------
 3 files changed, 37 insertions(+), 30 deletions(-)

diff --git a/clang-tools-extra/clangd/TUScheduler.cpp b/clang-tools-extra/clangd/TUScheduler.cpp
index 5454b1c92c8a8..ed367005177b2 100644
--- a/clang-tools-extra/clangd/TUScheduler.cpp
+++ b/clang-tools-extra/clangd/TUScheduler.cpp
@@ -965,6 +965,7 @@ void ASTWorker::startTask(llvm::StringRef Name,
   if (RunSync) {
     assert(!Done && "running a task after stop()");
     trace::Span Tracer(Name + ":" + llvm::sys::path::filename(FileName));
+    WithContext WithProvidedContext(ContextProvider(FileName));
     Task();
     return;
   }
@@ -1062,9 +1063,7 @@ void ASTWorker::run() {
         Status.ASTActivity.K = ASTAction::RunningAction;
         Status.ASTActivity.Name = CurrentRequest->Name;
       });
-      llvm::Optional<WithContext> WithProvidedContext;
-      if (ContextProvider)
-        WithProvidedContext.emplace(ContextProvider(FileName));
+      WithContext WithProvidedContext(ContextProvider(FileName));
       CurrentRequest->Action();
     }
 
@@ -1238,6 +1237,12 @@ TUScheduler::TUScheduler(const GlobalCompilationDatabase &CDB,
       Barrier(Opts.AsyncThreadsCount),
       IdleASTs(
           std::make_unique<ASTCache>(Opts.RetentionPolicy.MaxRetainedASTs)) {
+  // Avoid null checks everywhere.
+  if (!Opts.ContextProvider) {
+    this->Opts.ContextProvider = [](llvm::StringRef) {
+      return Context::current().clone();
+    };
+  }
   if (0 < Opts.AsyncThreadsCount) {
     PreambleTasks.emplace();
     WorkerThreads.emplace();
@@ -1300,16 +1305,16 @@ llvm::StringMap<std::string> TUScheduler::getAllFileContents() const {
 
 void TUScheduler::run(llvm::StringRef Name, llvm::StringRef Path,
                       llvm::unique_function<void()> Action) {
-  if (!PreambleTasks)
+  if (!PreambleTasks) {
+    WithContext WithProvidedContext(Opts.ContextProvider(Path));
     return Action();
+  }
   PreambleTasks->runAsync(Name, [this, Ctx = Context::current().clone(),
                                  Path(Path.str()),
                                  Action = std::move(Action)]() mutable {
     std::lock_guard<Semaphore> BarrierLock(Barrier);
     WithContext WC(std::move(Ctx));
-    llvm::Optional<WithContext> WithProvidedContext;
-    if (Opts.ContextProvider)
-      WithProvidedContext.emplace(Opts.ContextProvider(Path));
+    WithContext WithProvidedContext(Opts.ContextProvider(Path));
     Action();
   });
 }
@@ -1344,6 +1349,7 @@ void TUScheduler::runWithPreamble(llvm::StringRef Name, PathRef File,
     SPAN_ATTACH(Tracer, "file", File);
     std::shared_ptr<const PreambleData> Preamble =
         It->second->Worker->getPossiblyStalePreamble();
+    WithContext WithProvidedContext(Opts.ContextProvider(File));
     Action(InputsAndPreamble{It->second->Contents,
                              It->second->Worker->getCurrentCompileCommand(),
                              Preamble.get()});
@@ -1370,9 +1376,7 @@ void TUScheduler::runWithPreamble(llvm::StringRef Name, PathRef File,
         WithContext Guard(std::move(Ctx));
         trace::Span Tracer(Name);
         SPAN_ATTACH(Tracer, "file", File);
-        llvm::Optional<WithContext> WithProvidedContext;
-        if (Opts.ContextProvider)
-          WithProvidedContext.emplace(Opts.ContextProvider(File));
+        WithContext WithProvidedContext(Opts.ContextProvider(File));
         Action(InputsAndPreamble{Contents, Command, Preamble.get()});
       };
 
diff --git a/clang-tools-extra/clangd/TUScheduler.h b/clang-tools-extra/clangd/TUScheduler.h
index 05c06da13380d..5d545b366ec3e 100644
--- a/clang-tools-extra/clangd/TUScheduler.h
+++ b/clang-tools-extra/clangd/TUScheduler.h
@@ -313,7 +313,7 @@ class TUScheduler {
 
 private:
   const GlobalCompilationDatabase &CDB;
-  const Options Opts;
+  Options Opts;
   std::unique_ptr<ParsingCallbacks> Callbacks; // not nullptr
   Semaphore Barrier;
   llvm::StringMap<std::unique_ptr<FileData>> Files;
diff --git a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
index f40377fd5d85b..61ac4f7a27a4a 100644
--- a/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
+++ b/clang-tools-extra/clangd/unittests/TUSchedulerTests.cpp
@@ -864,25 +864,28 @@ TEST_F(TUSchedulerTests, NoChangeDiags) {
 }
 
 TEST_F(TUSchedulerTests, Run) {
-  auto Opts = optsForTest();
-  Opts.ContextProvider = bindPath;
-  TUScheduler S(CDB, Opts);
-  std::atomic<int> Counter(0);
-  S.run("add 1", /*Path=*/"", [&] { ++Counter; });
-  S.run("add 2", /*Path=*/"", [&] { Counter += 2; });
-  ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10)));
-  EXPECT_EQ(Counter.load(), 3);
-
-  Notification TaskRun;
-  Key<int> TestKey;
-  WithContextValue CtxWithKey(TestKey, 10);
-  const char *Path = "somepath";
-  S.run("props context", Path, [&] {
-    EXPECT_EQ(Context::current().getExisting(TestKey), 10);
-    EXPECT_EQ(Path, boundPath());
-    TaskRun.notify();
-  });
-  TaskRun.wait();
+  for (bool Sync : {false, true}) {
+    auto Opts = optsForTest();
+    if (Sync)
+      Opts.AsyncThreadsCount = 0;
+    TUScheduler S(CDB, Opts);
+    std::atomic<int> Counter(0);
+    S.run("add 1", /*Path=*/"", [&] { ++Counter; });
+    S.run("add 2", /*Path=*/"", [&] { Counter += 2; });
+    ASSERT_TRUE(S.blockUntilIdle(timeoutSeconds(10)));
+    EXPECT_EQ(Counter.load(), 3);
+
+    Notification TaskRun;
+    Key<int> TestKey;
+    WithContextValue CtxWithKey(TestKey, 10);
+    const char *Path = "somepath";
+    S.run("props context", Path, [&] {
+      EXPECT_EQ(Context::current().getExisting(TestKey), 10);
+      EXPECT_EQ(Path, boundPath());
+      TaskRun.notify();
+    });
+    TaskRun.wait();
+  }
 }
 
 TEST_F(TUSchedulerTests, TUStatus) {

From 978804821e88a34d484a8ebab72d2888f869a086 Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Wed, 15 Jul 2020 10:28:29 +0000
Subject: [PATCH 366/771] [MLIR][Shape] Add `shape.shape_eq` operation

Add `shape.shape_eq` operation to the shape dialect.
The operation allows to test shapes and extent tensors for equality.

Differential Revision: https://reviews.llvm.org/D82528
---
 mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td |  8 +++++++-
 mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td  | 16 ++++++++++++++++
 mlir/test/Dialect/Shape/ops.mlir                | 15 +++++++++++++++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td
index 8bf1e36c63e2e..ef20b5a9813d9 100644
--- a/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td
+++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeBase.td
@@ -104,7 +104,13 @@ def Shape_ValueShapeType : DialectType<ShapeDialect,
   }];
 }
 
-def Shape_ShapeOrSizeType: AnyTypeOf<[Shape_SizeType, Shape_ShapeType],
+def Shape_ShapeOrSizeType : AnyTypeOf<[Shape_SizeType, Shape_ShapeType],
   "shape or size">;
 
+def Shape_ExtentTensorType : 1DTensorOf<[Index]>;
+
+def Shape_ShapeOrExtentTensorType : AnyTypeOf<[Shape_ShapeType,
+                                               Shape_ExtentTensorType],
+                                              "shape or extent tensor">;
+
 #endif // SHAPE_BASE_TD
diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
index 6f263cd6db631..7b54616ad7033 100644
--- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
+++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
@@ -138,6 +138,22 @@ def Shape_ConstSizeOp : Shape_Op<"const_size", [
   let hasFolder = 1;
 }
 
+def Shape_ShapeEqOp : Shape_Op<"shape_eq", [Commutative, NoSideEffect]> {
+  let summary = "Returns whether the input shapes or extent tensors are equal";
+  let description = [{
+    Takes two shape or extent tensor operands and determines whether they are
+    equal. When extent tensors are compared to shapes they are regarded as their
+    equivalent non-error shapes. Error shapes can be tested for equality like
+    any other shape value, meaning that the error value is equal to itself.
+  }];
+
+  let arguments = (ins Shape_ShapeOrExtentTensorType:$lhs,
+                       Shape_ShapeOrExtentTensorType:$rhs);
+  let results = (outs I1:$result);
+
+  let assemblyFormat = "$lhs `,` $rhs attr-dict `:` type($lhs) `,` type($rhs)";
+}
+
 def Shape_FromExtentsOp : Shape_Op<"from_extents", [NoSideEffect]> {
   let summary = "Creates a shape from extents";
   let description = [{
diff --git a/mlir/test/Dialect/Shape/ops.mlir b/mlir/test/Dialect/Shape/ops.mlir
index 18953efa05f1b..94323e856750f 100644
--- a/mlir/test/Dialect/Shape/ops.mlir
+++ b/mlir/test/Dialect/Shape/ops.mlir
@@ -116,3 +116,18 @@ func @rank(%shape : !shape.shape) -> !shape.size {
   %rank = shape.rank %shape
   return %rank : !shape.size
 }
+
+func @shape_eq_on_shapes(%a : !shape.shape, %b : !shape.shape) -> i1 {
+  %result = shape.shape_eq %a, %b : !shape.shape, !shape.shape
+  return %result : i1
+}
+
+func @shape_eq_on_tensors(%a : tensor<?xindex>, %b : tensor<?xindex>) -> i1 {
+  %result = shape.shape_eq %a, %b : tensor<?xindex>, tensor<?xindex>
+  return %result : i1
+}
+
+func @shape_eq_on_mixed(%a : tensor<?xindex>, %b : !shape.shape) -> i1 {
+  %result = shape.shape_eq %a, %b : tensor<?xindex>, !shape.shape
+  return %result : i1
+}

From 9ea0d8c38fc58ecec2e2f62f3b65e99fcbc2ec4c Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Wed, 15 Jul 2020 11:49:24 +0100
Subject: [PATCH 367/771] [LoopRotate] Remove unnecessary verifyMemorySSA
 calls.

The actual rotation happens in processLoop, so the second removed
call to verifyMemorySSA was unnecessary.

In fact, processLoop/rotateLoop already verify MemorySSA before
and after transforming each loop. Hence, both calls can be removed.

Pointed out by @lebedev.ri post-commit D51718.
---
 llvm/lib/Transforms/Utils/LoopRotationUtils.cpp | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index 8804bba975b6a..da1c09e6ebca9 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -740,12 +740,7 @@ bool llvm::LoopRotation(Loop *L, LoopInfo *LI, const TargetTransformInfo *TTI,
                         const SimplifyQuery &SQ, bool RotationOnly = true,
                         unsigned Threshold = unsigned(-1),
                         bool IsUtilMode = true) {
-  if (MSSAU && VerifyMemorySSA)
-    MSSAU->getMemorySSA()->verifyMemorySSA();
   LoopRotate LR(Threshold, LI, TTI, AC, DT, SE, MSSAU, SQ, RotationOnly,
                 IsUtilMode);
-  if (MSSAU && VerifyMemorySSA)
-    MSSAU->getMemorySSA()->verifyMemorySSA();
-
   return LR.processLoop(L);
 }

From a9e10a09651fa685876d2104254236b6840ad1af Mon Sep 17 00:00:00 2001
From: Xing GUO <higuoxing@gmail.com>
Date: Wed, 15 Jul 2020 18:53:55 +0800
Subject: [PATCH 368/771] [DWARFYAML] Remove useless conditional statement.
 NFC.

The conditional statement is useless after
3a48a632d00ef1c98042140f402337fe13cdff52.
---
 llvm/lib/ObjectYAML/DWARFVisitor.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/llvm/lib/ObjectYAML/DWARFVisitor.cpp b/llvm/lib/ObjectYAML/DWARFVisitor.cpp
index a2dd37b5fe324..c393b17aaa333 100644
--- a/llvm/lib/ObjectYAML/DWARFVisitor.cpp
+++ b/llvm/lib/ObjectYAML/DWARFVisitor.cpp
@@ -49,8 +49,6 @@ static unsigned getRefSize(const DWARFYAML::Unit &Unit) {
 template <typename T> Error DWARFYAML::VisitorImpl<T>::traverseDebugInfo() {
   for (auto &Unit : DebugInfo.CompileUnits) {
     onStartCompileUnit(Unit);
-    if (Unit.Entries.empty())
-      continue;
 
     for (auto &Entry : Unit.Entries) {
       onStartDIE(Unit, Entry);

From 20854d85e14cf0b86f6e0dbbf9aef817845403bd Mon Sep 17 00:00:00 2001
From: John Brawn <john.brawn@arm.com>
Date: Wed, 15 Jul 2020 12:17:31 +0100
Subject: [PATCH 369/771] [DSE,MSSA] Recognise init_trampoline in
 getLocForWriteEx

This fixes an instance where MemorySSA-using Dead Store Elimination is failing
to do a transformation that the non-MemorySSA-using version does.

Differential Revision: https://reviews.llvm.org/D83783
---
 .../Scalar/DeadStoreElimination.cpp           |  6 +++++
 .../DeadStoreElimination/MSSA/simple-todo.ll  | 23 -------------------
 .../DeadStoreElimination/MSSA/simple.ll       | 12 ++++++++++
 3 files changed, 18 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index e58db03225eea..8c044ed0b9813 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -1593,6 +1593,12 @@ struct DSEState {
           break;
         }
       }
+      switch (CB->getIntrinsicID()) {
+      case Intrinsic::init_trampoline:
+        return {MemoryLocation(CB->getArgOperand(0))};
+      default:
+        break;
+      }
       return None;
     }
 
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
index f4274a9e87f30..a4d3127d25f3d 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
@@ -4,29 +4,6 @@
 ; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -enable-dse-memoryssa -S | FileCheck %s
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
 
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
-declare void @llvm.memset.element.unordered.atomic.p0i8.i64(i8* nocapture, i8, i64, i32) nounwind
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i1) nounwind
-declare void @llvm.memcpy.element.unordered.atomic.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32) nounwind
-declare void @llvm.init.trampoline(i8*, i8*, i8*)
-
-; DSE should delete the dead trampoline.
-declare void @test11f()
-define void @test11() {
-; CHECK-LABEL: @test11(
-; CHECK-NEXT:    ret void
-;
-  %storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
-  %cast = getelementptr [10 x i8], [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
-  call void @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null )		; <i8*> [#uses=1]
-  ret void
-}
-
-
-declare noalias i8* @malloc(i32)
-
-declare void @unknown_func()
-
 ; Remove redundant store if loaded value is in another block inside a loop.
 define i32 @test31(i1 %c, i32* %p, i32 %i) {
 ; CHECK-LABEL: @test31(
diff --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
index ef785f10ffafc..0c83a750a6a88 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple.ll
@@ -181,6 +181,18 @@ define double @test10(i8* %X) {
   ret double %tmp.0
 }
 
+; DSE should delete the dead trampoline.
+declare void @test11f()
+define void @test11() {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT:    ret void
+;
+  %storage = alloca [10 x i8], align 16		; <[10 x i8]*> [#uses=1]
+  %cast = getelementptr [10 x i8], [10 x i8]* %storage, i32 0, i32 0		; <i8*> [#uses=1]
+  call void @llvm.init.trampoline( i8* %cast, i8* bitcast (void ()* @test11f to i8*), i8* null )		; <i8*> [#uses=1]
+  ret void
+}
+
 ; %P doesn't escape, the DEAD instructions should be removed.
 declare void @test13f()
 define i32* @test13() {

From e2692f0ee7f338fea4fc918669643315cefc7678 Mon Sep 17 00:00:00 2001
From: lewis-revill <lewis.revill@embecosm.com>
Date: Wed, 15 Jul 2020 11:50:03 +0100
Subject: [PATCH 370/771] [RISCV] Add matching of codegen patterns to RISCV Bit
 Manipulation Zbb asm instructions

This patch provides optimization of bit manipulation operations by
enabling the +experimental-b target feature.
It adds matching of single block patterns of instructions to specific
bit-manip instructions from the base subset (zbb subextension) of the
experimental B extension of RISC-V.
It adds also the correspondent codegen tests.

This patch is based on Claire Wolf's proposal for the bit manipulation
extension of RISCV:
https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf

Differential Revision: https://reviews.llvm.org/D79870
---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp |  190 +++
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h   |    6 +
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |    9 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoB.td    |   76 ++
 llvm/test/CodeGen/RISCV/rv32Zbb.ll          | 1218 +++++++++++++++++++
 llvm/test/CodeGen/RISCV/rv64Zbb.ll          | 1149 +++++++++++++++++
 6 files changed, 2645 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rv32Zbb.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64Zbb.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index a0ae05081adcb..99e5135b424f8 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -184,6 +184,196 @@ bool RISCVDAGToDAGISel::SelectAddrFI(SDValue Addr, SDValue &Base) {
   return false;
 }
 
+// Check that it is a SLOI (Shift Left Ones Immediate). We first check that
+// it is the right node tree:
+//
+//  (OR (SHL RS1, VC2), VC1)
+//
+// and then we check that VC1, the mask used to fill with ones, is compatible
+// with VC2, the shamt:
+//
+//  VC1 == maskTrailingOnes<uint64_t>(VC2)
+
+bool RISCVDAGToDAGISel::SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt) {
+  MVT XLenVT = Subtarget->getXLenVT();
+  if (N.getOpcode() == ISD::OR) {
+    SDValue Or = N;
+    if (Or.getOperand(0).getOpcode() == ISD::SHL) {
+      SDValue Shl = Or.getOperand(0);
+      if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
+          isa<ConstantSDNode>(Or.getOperand(1))) {
+        if (XLenVT == MVT::i64) {
+          uint64_t VC1 = Or.getConstantOperandVal(1);
+          uint64_t VC2 = Shl.getConstantOperandVal(1);
+          if (VC1 == maskTrailingOnes<uint64_t>(VC2)) {
+            RS1 = Shl.getOperand(0);
+            Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+                           Shl.getOperand(1).getValueType());
+            return true;
+          }
+        }
+        if (XLenVT == MVT::i32) {
+          uint32_t VC1 = Or.getConstantOperandVal(1);
+          uint32_t VC2 = Shl.getConstantOperandVal(1);
+          if (VC1 == maskTrailingOnes<uint32_t>(VC2)) {
+            RS1 = Shl.getOperand(0);
+            Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+                           Shl.getOperand(1).getValueType());
+            return true;
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
+// Check that it is a SROI (Shift Right Ones Immediate). We first check that
+// it is the right node tree:
+//
+//  (OR (SRL RS1, VC2), VC1)
+//
+// and then we check that VC1, the mask used to fill with ones, is compatible
+// with VC2, the shamt:
+//
+//  VC1 == maskLeadingOnes<uint64_t>(VC2)
+
+bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) {
+  MVT XLenVT = Subtarget->getXLenVT();
+  if (N.getOpcode() == ISD::OR) {
+    SDValue Or = N;
+    if (Or.getOperand(0).getOpcode() == ISD::SRL) {
+      SDValue Srl = Or.getOperand(0);
+      if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
+          isa<ConstantSDNode>(Or.getOperand(1))) {
+        if (XLenVT == MVT::i64) {
+          uint64_t VC1 = Or.getConstantOperandVal(1);
+          uint64_t VC2 = Srl.getConstantOperandVal(1);
+          if (VC1 == maskLeadingOnes<uint64_t>(VC2)) {
+            RS1 = Srl.getOperand(0);
+            Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+                           Srl.getOperand(1).getValueType());
+            return true;
+          }
+        }
+        if (XLenVT == MVT::i32) {
+          uint32_t VC1 = Or.getConstantOperandVal(1);
+          uint32_t VC2 = Srl.getConstantOperandVal(1);
+          if (VC1 == maskLeadingOnes<uint32_t>(VC2)) {
+            RS1 = Srl.getOperand(0);
+            Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+                           Srl.getOperand(1).getValueType());
+            return true;
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
+// Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
+// on RV64).
+// SLLIUW is the same as SLLI except for the fact that it clears the bits
+// XLEN-1:32 of the input RS1 before shifting.
+// We first check that it is the right node tree:
+//
+//  (AND (SHL RS1, VC2), VC1)
+//
+// We check that VC2, the shamt is less than 32, otherwise the pattern is
+// exactly the same as SLLI and we give priority to that.
+// Eventually we check that that VC1, the mask used to clear the upper 32 bits
+// of RS1, is correct:
+//
+//  VC1 == (0xFFFFFFFF << VC2)
+
+bool RISCVDAGToDAGISel::SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt) {
+  if (N.getOpcode() == ISD::AND && Subtarget->getXLenVT() == MVT::i64) {
+    SDValue And = N;
+    if (And.getOperand(0).getOpcode() == ISD::SHL) {
+      SDValue Shl = And.getOperand(0);
+      if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
+          isa<ConstantSDNode>(And.getOperand(1))) {
+        uint64_t VC1 = And.getConstantOperandVal(1);
+        uint64_t VC2 = Shl.getConstantOperandVal(1);
+        if (VC2 < 32 && VC1 == ((uint64_t)0xFFFFFFFF << VC2)) {
+          RS1 = Shl.getOperand(0);
+          Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+                                            Shl.getOperand(1).getValueType());
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+// Check that it is a SLOIW (Shift Left Ones Immediate i32 on RV64).
+// We first check that it is the right node tree:
+//
+//  (SIGN_EXTEND_INREG (OR (SHL RS1, VC2), VC1))
+//
+// and then we check that VC1, the mask used to fill with ones, is compatible
+// with VC2, the shamt:
+//
+//  VC1 == maskTrailingOnes<uint32_t>(VC2)
+
+bool RISCVDAGToDAGISel::SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
+  if (Subtarget->getXLenVT() == MVT::i64 &&
+      N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+    if (N.getOperand(0).getOpcode() == ISD::OR) {
+      SDValue Or = N.getOperand(0);
+      if (Or.getOperand(0).getOpcode() == ISD::SHL) {
+        SDValue Shl = Or.getOperand(0);
+        if (isa<ConstantSDNode>(Shl.getOperand(1)) &&
+            isa<ConstantSDNode>(Or.getOperand(1))) {
+          uint32_t VC1 = Or.getConstantOperandVal(1);
+          uint32_t VC2 = Shl.getConstantOperandVal(1);
+          if (VC1 == maskTrailingOnes<uint32_t>(VC2)) {
+            RS1 = Shl.getOperand(0);
+            Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+                                              Shl.getOperand(1).getValueType());
+            return true;
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
+// Check that it is a SROIW (Shift Right Ones Immediate i32 on RV64).
+// We first check that it is the right node tree:
+//
+//  (OR (SHL RS1, VC2), VC1)
+//
+// and then we check that VC1, the mask used to fill with ones, is compatible
+// with VC2, the shamt:
+//
+//  VC1 == maskLeadingOnes<uint32_t>(VC2)
+
+bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
+  if (N.getOpcode() == ISD::OR && Subtarget->getXLenVT() == MVT::i64) {
+    SDValue Or = N;
+    if (Or.getOperand(0).getOpcode() == ISD::SRL) {
+      SDValue Srl = Or.getOperand(0);
+      if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
+          isa<ConstantSDNode>(Or.getOperand(1))) {
+        uint32_t VC1 = Or.getConstantOperandVal(1);
+        uint32_t VC2 = Srl.getConstantOperandVal(1);
+        if (VC1 == maskLeadingOnes<uint32_t>(VC2)) {
+          RS1 = Srl.getOperand(0);
+          Shamt = CurDAG->getTargetConstant(VC2, SDLoc(N),
+                                            Srl.getOperand(1).getValueType());
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
 // Merge an ADDI into the offset of a load/store instruction where possible.
 // (load (addi base, off1), off2) -> (load base, off1+off2)
 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index dcf733ec36751..4e382ee585002 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -45,6 +45,12 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
 
   bool SelectAddrFI(SDValue Addr, SDValue &Base);
 
+  bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt);
+  bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt);
+  bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
+  bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
+  bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
+
 // Include the pieces autogenerated from the target description.
 #include "RISCVGenDAGISel.inc"
 
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 91fc69b5bc10e..fb44f826eb6c7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -152,9 +152,12 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::ROTL, XLenVT, Expand);
   setOperationAction(ISD::ROTR, XLenVT, Expand);
   setOperationAction(ISD::BSWAP, XLenVT, Expand);
-  setOperationAction(ISD::CTTZ, XLenVT, Expand);
-  setOperationAction(ISD::CTLZ, XLenVT, Expand);
-  setOperationAction(ISD::CTPOP, XLenVT, Expand);
+
+  if (!Subtarget.hasStdExtZbb()) {
+    setOperationAction(ISD::CTTZ, XLenVT, Expand);
+    setOperationAction(ISD::CTLZ, XLenVT, Expand);
+    setOperationAction(ISD::CTPOP, XLenVT, Expand);
+  }
 
   ISD::CondCode FPCCToExtend[] = {
       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 34a463626e290..dc3d6cbb4fe84 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -632,3 +632,79 @@ let Predicates = [HasStdExtZbproposedc, HasStdExtZbbOrZbp, HasStdExtC, IsRV64] i
 def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
                   (C_ZEXTW GPRC:$rs1)>;
 } // Predicates = [HasStdExtZbproposedc, HasStdExtC, IsRV64]
+
+//===----------------------------------------------------------------------===//
+// Codegen patterns
+//===----------------------------------------------------------------------===//
+def SLOIPat   : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>;
+def SROIPat   : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
+def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
+def SLOIWPat  : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
+def SROIWPat  : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
+
+let Predicates = [HasStdExtZbb] in {
+def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1),
+          (SLO GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1),
+          (SRO GPR:$rs1, GPR:$rs2)>;
+def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
+          (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
+          (SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>;
+def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>;
+def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbb, IsRV32] in
+def : Pat<(sra (shl GPR:$rs1, (i32 24)), (i32 24)), (SEXTB GPR:$rs1)>;
+let Predicates = [HasStdExtZbb, IsRV64] in
+def : Pat<(sra (shl GPR:$rs1, (i64 56)), (i64 56)), (SEXTB GPR:$rs1)>;
+
+let Predicates = [HasStdExtZbb, IsRV32] in
+def : Pat<(sra (shl GPR:$rs1, (i32 16)), (i32 16)), (SEXTH GPR:$rs1)>;
+let Predicates = [HasStdExtZbb, IsRV64] in
+def : Pat<(sra (shl GPR:$rs1, (i64 48)), (i64 48)), (SEXTH GPR:$rs1)>;
+
+let Predicates = [HasStdExtZbb] in {
+def : Pat<(smin GPR:$rs1, GPR:$rs2), (MIN  GPR:$rs1, GPR:$rs2)>;
+def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 20), GPR:$rs1, GPR:$rs2),
+          (MIN  GPR:$rs1, GPR:$rs2)>;
+def : Pat<(smax GPR:$rs1, GPR:$rs2), (MAX  GPR:$rs1, GPR:$rs2)>;
+def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 20), GPR:$rs1, GPR:$rs2),
+          (MAX  GPR:$rs1, GPR:$rs2)>;
+def : Pat<(umin GPR:$rs1, GPR:$rs2), (MINU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(riscv_selectcc GPR:$rs1, GPR:$rs2, (XLenVT 12), GPR:$rs1, GPR:$rs2),
+          (MINU  GPR:$rs1, GPR:$rs2)>;
+def : Pat<(umax GPR:$rs1, GPR:$rs2), (MAXU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2),
+          (MAXU  GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
+def : Pat<(and (add GPR:$rs, simm12:$simm12), (i64 0xFFFFFFFF)),
+          (ADDIWU GPR:$rs, simm12:$simm12)>;
+def : Pat<(SLLIUWPat GPR:$rs1, uimmlog2xlen:$shamt),
+          (SLLIUW GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(and (add GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)),
+          (ADDWU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(and (sub GPR:$rs1, GPR:$rs2), (i64 0xFFFFFFFF)),
+          (SUBWU GPR:$rs1, GPR:$rs2)>;
+def : Pat<(add GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))),
+          (ADDUW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(sub GPR:$rs1, (and GPR:$rs2, (i64 0xFFFFFFFF))),
+          (SUBUW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1),
+          (SLOW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1),
+          (SROW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
+          (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
+          (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
+def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
+          (CLZW GPR:$rs1)>;
+// We don't pattern-match CTZW here as it has the same pattern and result as
+// RV64 CTZ
+def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
diff --git a/llvm/test/CodeGen/RISCV/rv32Zbb.ll b/llvm/test/CodeGen/RISCV/rv32Zbb.ll
new file mode 100644
index 0000000000000..6933bad1f8cd2
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32Zbb.ll
@@ -0,0 +1,1218 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IBB
+
+define i32 @slo_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: slo_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    sll a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: slo_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    slo a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: slo_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    slo a0, a0, a1
+; RV32IBB-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %shl = shl i32 %neg, %b
+  %neg1 = xor i32 %shl, -1
+  ret i32 %neg1
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @slo_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: slo_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a3, a2, -32
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    bltz a3, .LBB1_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a2, zero
+; RV32I-NEXT:    sll a1, a0, a3
+; RV32I-NEXT:    j .LBB1_3
+; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    sll a1, a1, a2
+; RV32I-NEXT:    addi a3, zero, 31
+; RV32I-NEXT:    sub a3, a3, a2
+; RV32I-NEXT:    srli a4, a0, 1
+; RV32I-NEXT:    srl a3, a4, a3
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    sll a2, a0, a2
+; RV32I-NEXT:  .LBB1_3:
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    not a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: slo_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    addi a3, a2, -32
+; RV32IB-NEXT:    not a0, a0
+; RV32IB-NEXT:    bltz a3, .LBB1_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    mv a2, zero
+; RV32IB-NEXT:    sll a1, a0, a3
+; RV32IB-NEXT:    j .LBB1_3
+; RV32IB-NEXT:  .LBB1_2:
+; RV32IB-NEXT:    not a1, a1
+; RV32IB-NEXT:    sll a1, a1, a2
+; RV32IB-NEXT:    addi a3, zero, 31
+; RV32IB-NEXT:    sub a3, a3, a2
+; RV32IB-NEXT:    srli a4, a0, 1
+; RV32IB-NEXT:    srl a3, a4, a3
+; RV32IB-NEXT:    or a1, a1, a3
+; RV32IB-NEXT:    sll a2, a0, a2
+; RV32IB-NEXT:  .LBB1_3:
+; RV32IB-NEXT:    not a1, a1
+; RV32IB-NEXT:    not a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: slo_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    addi a3, a2, -32
+; RV32IBB-NEXT:    not a0, a0
+; RV32IBB-NEXT:    bltz a3, .LBB1_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    mv a2, zero
+; RV32IBB-NEXT:    sll a1, a0, a3
+; RV32IBB-NEXT:    j .LBB1_3
+; RV32IBB-NEXT:  .LBB1_2:
+; RV32IBB-NEXT:    not a1, a1
+; RV32IBB-NEXT:    sll a1, a1, a2
+; RV32IBB-NEXT:    addi a3, zero, 31
+; RV32IBB-NEXT:    sub a3, a3, a2
+; RV32IBB-NEXT:    srli a4, a0, 1
+; RV32IBB-NEXT:    srl a3, a4, a3
+; RV32IBB-NEXT:    or a1, a1, a3
+; RV32IBB-NEXT:    sll a2, a0, a2
+; RV32IBB-NEXT:  .LBB1_3:
+; RV32IBB-NEXT:    not a1, a1
+; RV32IBB-NEXT:    not a0, a2
+; RV32IBB-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %shl = shl i64 %neg, %b
+  %neg1 = xor i64 %shl, -1
+  ret i64 %neg1
+}
+
+define i32 @sro_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sro_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srl a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sro_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sro a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: sro_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    sro a0, a0, a1
+; RV32IBB-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %shr = lshr i32 %neg, %b
+  %neg1 = xor i32 %shr, -1
+  ret i32 %neg1
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @sro_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: sro_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a3, a2, -32
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    bltz a3, .LBB3_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a2, zero
+; RV32I-NEXT:    srl a0, a1, a3
+; RV32I-NEXT:    j .LBB3_3
+; RV32I-NEXT:  .LBB3_2:
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    addi a3, zero, 31
+; RV32I-NEXT:    sub a3, a3, a2
+; RV32I-NEXT:    slli a4, a1, 1
+; RV32I-NEXT:    sll a3, a4, a3
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    srl a2, a1, a2
+; RV32I-NEXT:  .LBB3_3:
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    not a1, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sro_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    addi a3, a2, -32
+; RV32IB-NEXT:    not a1, a1
+; RV32IB-NEXT:    bltz a3, .LBB3_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    mv a2, zero
+; RV32IB-NEXT:    srl a0, a1, a3
+; RV32IB-NEXT:    j .LBB3_3
+; RV32IB-NEXT:  .LBB3_2:
+; RV32IB-NEXT:    not a0, a0
+; RV32IB-NEXT:    srl a0, a0, a2
+; RV32IB-NEXT:    addi a3, zero, 31
+; RV32IB-NEXT:    sub a3, a3, a2
+; RV32IB-NEXT:    slli a4, a1, 1
+; RV32IB-NEXT:    sll a3, a4, a3
+; RV32IB-NEXT:    or a0, a0, a3
+; RV32IB-NEXT:    srl a2, a1, a2
+; RV32IB-NEXT:  .LBB3_3:
+; RV32IB-NEXT:    not a0, a0
+; RV32IB-NEXT:    not a1, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: sro_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    addi a3, a2, -32
+; RV32IBB-NEXT:    not a1, a1
+; RV32IBB-NEXT:    bltz a3, .LBB3_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    mv a2, zero
+; RV32IBB-NEXT:    srl a0, a1, a3
+; RV32IBB-NEXT:    j .LBB3_3
+; RV32IBB-NEXT:  .LBB3_2:
+; RV32IBB-NEXT:    not a0, a0
+; RV32IBB-NEXT:    srl a0, a0, a2
+; RV32IBB-NEXT:    addi a3, zero, 31
+; RV32IBB-NEXT:    sub a3, a3, a2
+; RV32IBB-NEXT:    slli a4, a1, 1
+; RV32IBB-NEXT:    sll a3, a4, a3
+; RV32IBB-NEXT:    or a0, a0, a3
+; RV32IBB-NEXT:    srl a2, a1, a2
+; RV32IBB-NEXT:  .LBB3_3:
+; RV32IBB-NEXT:    not a0, a0
+; RV32IBB-NEXT:    not a1, a2
+; RV32IBB-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %shr = lshr i64 %neg, %b
+  %neg1 = xor i64 %shr, -1
+  ret i64 %neg1
+}
+
+define i32 @sloi_i32(i32 %a) nounwind {
+; RV32I-LABEL: sloi_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    ori a0, a0, 1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sloi_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sloi a0, a0, 1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: sloi_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    sloi a0, a0, 1
+; RV32IBB-NEXT:    ret
+  %neg = shl i32 %a, 1
+  %neg12 = or i32 %neg, 1
+  ret i32 %neg12
+}
+
+define i64 @sloi_i64(i64 %a) nounwind {
+; RV32I-LABEL: sloi_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a0, 31
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    slli a0, a0, 1
+; RV32I-NEXT:    ori a0, a0, 1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sloi_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    addi a2, zero, 1
+; RV32IB-NEXT:    fsl a1, a1, a2, a0
+; RV32IB-NEXT:    sloi a0, a0, 1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: sloi_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    srli a2, a0, 31
+; RV32IBB-NEXT:    slli a1, a1, 1
+; RV32IBB-NEXT:    or a1, a1, a2
+; RV32IBB-NEXT:    sloi a0, a0, 1
+; RV32IBB-NEXT:    ret
+  %neg = shl i64 %a, 1
+  %neg12 = or i64 %neg, 1
+  ret i64 %neg12
+}
+
+define i32 @sroi_i32(i32 %a) nounwind {
+; RV32I-LABEL: sroi_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    lui a1, 524288
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sroi_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sroi a0, a0, 1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: sroi_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    sroi a0, a0, 1
+; RV32IBB-NEXT:    ret
+  %neg = lshr i32 %a, 1
+  %neg12 = or i32 %neg, -2147483648
+  ret i32 %neg12
+}
+
+define i64 @sroi_i64(i64 %a) nounwind {
+; RV32I-LABEL: sroi_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a1, 31
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    lui a2, 524288
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sroi_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    addi a2, zero, 31
+; RV32IB-NEXT:    fsl a0, a1, a2, a0
+; RV32IB-NEXT:    sroi a1, a1, 1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: sroi_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    slli a2, a1, 31
+; RV32IBB-NEXT:    srli a0, a0, 1
+; RV32IBB-NEXT:    or a0, a0, a2
+; RV32IBB-NEXT:    sroi a1, a1, 1
+; RV32IBB-NEXT:    ret
+  %neg = lshr i64 %a, 1
+  %neg12 = or i64 %neg, -9223372036854775808
+  ret i64 %neg12
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define i32 @ctlz_i32(i32 %a) nounwind {
+; RV32I-LABEL: ctlz_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp)
+; RV32I-NEXT:    beqz a0, .LBB8_2
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 16
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    and a2, a0, a1
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    add a0, a2, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    call __mulsi3
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    j .LBB8_3
+; RV32I-NEXT:  .LBB8_2:
+; RV32I-NEXT:    addi a0, zero, 32
+; RV32I-NEXT:  .LBB8_3: # %cond.end
+; RV32I-NEXT:    lw ra, 12(sp)
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: ctlz_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    beqz a0, .LBB8_2
+; RV32IB-NEXT:  # %bb.1: # %cond.false
+; RV32IB-NEXT:    clz a0, a0
+; RV32IB-NEXT:    ret
+; RV32IB-NEXT:  .LBB8_2:
+; RV32IB-NEXT:    addi a0, zero, 32
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: ctlz_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    beqz a0, .LBB8_2
+; RV32IBB-NEXT:  # %bb.1: # %cond.false
+; RV32IBB-NEXT:    clz a0, a0
+; RV32IBB-NEXT:    ret
+; RV32IBB-NEXT:  .LBB8_2:
+; RV32IBB-NEXT:    addi a0, zero, 32
+; RV32IBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV32I-LABEL: ctlz_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp)
+; RV32I-NEXT:    sw s0, 24(sp)
+; RV32I-NEXT:    sw s1, 20(sp)
+; RV32I-NEXT:    sw s2, 16(sp)
+; RV32I-NEXT:    sw s3, 12(sp)
+; RV32I-NEXT:    sw s4, 8(sp)
+; RV32I-NEXT:    sw s5, 4(sp)
+; RV32I-NEXT:    sw s6, 0(sp)
+; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    srli a0, a1, 1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 16
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi s5, a2, 1365
+; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi s1, a1, 819
+; RV32I-NEXT:    and a1, a0, s1
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi s6, a1, -241
+; RV32I-NEXT:    and a0, a0, s6
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi s0, a1, 257
+; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    call __mulsi3
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    srli a0, s4, 1
+; RV32I-NEXT:    or a0, s4, a0
+; RV32I-NEXT:    srli a1, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 16
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    and a1, a0, s1
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, s1
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    and a0, a0, s6
+; RV32I-NEXT:    mv a1, s0
+; RV32I-NEXT:    call __mulsi3
+; RV32I-NEXT:    bnez s3, .LBB9_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    addi a0, a0, 32
+; RV32I-NEXT:    j .LBB9_3
+; RV32I-NEXT:  .LBB9_2:
+; RV32I-NEXT:    srli a0, s2, 24
+; RV32I-NEXT:  .LBB9_3:
+; RV32I-NEXT:    mv a1, zero
+; RV32I-NEXT:    lw s6, 0(sp)
+; RV32I-NEXT:    lw s5, 4(sp)
+; RV32I-NEXT:    lw s4, 8(sp)
+; RV32I-NEXT:    lw s3, 12(sp)
+; RV32I-NEXT:    lw s2, 16(sp)
+; RV32I-NEXT:    lw s1, 20(sp)
+; RV32I-NEXT:    lw s0, 24(sp)
+; RV32I-NEXT:    lw ra, 28(sp)
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: ctlz_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    bnez a1, .LBB9_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    clz a0, a0
+; RV32IB-NEXT:    addi a0, a0, 32
+; RV32IB-NEXT:    mv a1, zero
+; RV32IB-NEXT:    ret
+; RV32IB-NEXT:  .LBB9_2:
+; RV32IB-NEXT:    clz a0, a1
+; RV32IB-NEXT:    mv a1, zero
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: ctlz_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    bnez a1, .LBB9_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    clz a0, a0
+; RV32IBB-NEXT:    addi a0, a0, 32
+; RV32IBB-NEXT:    mv a1, zero
+; RV32IBB-NEXT:    ret
+; RV32IBB-NEXT:  .LBB9_2:
+; RV32IBB-NEXT:    clz a0, a1
+; RV32IBB-NEXT:    mv a1, zero
+; RV32IBB-NEXT:    ret
+  %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define i32 @cttz_i32(i32 %a) nounwind {
+; RV32I-LABEL: cttz_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp)
+; RV32I-NEXT:    beqz a0, .LBB10_2
+; RV32I-NEXT:  # %bb.1: # %cond.false
+; RV32I-NEXT:    addi a1, a0, -1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    and a2, a0, a1
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    add a0, a2, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    call __mulsi3
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    j .LBB10_3
+; RV32I-NEXT:  .LBB10_2:
+; RV32I-NEXT:    addi a0, zero, 32
+; RV32I-NEXT:  .LBB10_3: # %cond.end
+; RV32I-NEXT:    lw ra, 12(sp)
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: cttz_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    beqz a0, .LBB10_2
+; RV32IB-NEXT:  # %bb.1: # %cond.false
+; RV32IB-NEXT:    ctz a0, a0
+; RV32IB-NEXT:    ret
+; RV32IB-NEXT:  .LBB10_2:
+; RV32IB-NEXT:    addi a0, zero, 32
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: cttz_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    beqz a0, .LBB10_2
+; RV32IBB-NEXT:  # %bb.1: # %cond.false
+; RV32IBB-NEXT:    ctz a0, a0
+; RV32IBB-NEXT:    ret
+; RV32IBB-NEXT:  .LBB10_2:
+; RV32IBB-NEXT:    addi a0, zero, 32
+; RV32IBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV32I-LABEL: cttz_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp)
+; RV32I-NEXT:    sw s0, 24(sp)
+; RV32I-NEXT:    sw s1, 20(sp)
+; RV32I-NEXT:    sw s2, 16(sp)
+; RV32I-NEXT:    sw s3, 12(sp)
+; RV32I-NEXT:    sw s4, 8(sp)
+; RV32I-NEXT:    sw s5, 4(sp)
+; RV32I-NEXT:    sw s6, 0(sp)
+; RV32I-NEXT:    mv s3, a1
+; RV32I-NEXT:    mv s4, a0
+; RV32I-NEXT:    addi a0, a0, -1
+; RV32I-NEXT:    not a1, s4
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi s5, a2, 1365
+; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi s0, a1, 819
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi s6, a1, -241
+; RV32I-NEXT:    and a0, a0, s6
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi s1, a1, 257
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __mulsi3
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    addi a0, s3, -1
+; RV32I-NEXT:    not a1, s3
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    and a1, a1, s5
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    and a0, a0, s6
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __mulsi3
+; RV32I-NEXT:    bnez s4, .LBB11_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    addi a0, a0, 32
+; RV32I-NEXT:    j .LBB11_3
+; RV32I-NEXT:  .LBB11_2:
+; RV32I-NEXT:    srli a0, s2, 24
+; RV32I-NEXT:  .LBB11_3:
+; RV32I-NEXT:    mv a1, zero
+; RV32I-NEXT:    lw s6, 0(sp)
+; RV32I-NEXT:    lw s5, 4(sp)
+; RV32I-NEXT:    lw s4, 8(sp)
+; RV32I-NEXT:    lw s3, 12(sp)
+; RV32I-NEXT:    lw s2, 16(sp)
+; RV32I-NEXT:    lw s1, 20(sp)
+; RV32I-NEXT:    lw s0, 24(sp)
+; RV32I-NEXT:    lw ra, 28(sp)
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: cttz_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    bnez a0, .LBB11_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    ctz a0, a1
+; RV32IB-NEXT:    addi a0, a0, 32
+; RV32IB-NEXT:    mv a1, zero
+; RV32IB-NEXT:    ret
+; RV32IB-NEXT:  .LBB11_2:
+; RV32IB-NEXT:    ctz a0, a0
+; RV32IB-NEXT:    mv a1, zero
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: cttz_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    bnez a0, .LBB11_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    ctz a0, a1
+; RV32IBB-NEXT:    addi a0, a0, 32
+; RV32IBB-NEXT:    mv a1, zero
+; RV32IBB-NEXT:    ret
+; RV32IBB-NEXT:  .LBB11_2:
+; RV32IBB-NEXT:    ctz a0, a0
+; RV32IBB-NEXT:    mv a1, zero
+; RV32IBB-NEXT:    ret
+  %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.ctpop.i32(i32)
+
+define i32 @ctpop_i32(i32 %a) nounwind {
+; RV32I-LABEL: ctpop_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -16
+; RV32I-NEXT:    sw ra, 12(sp)
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    sub a0, a0, a1
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    and a2, a0, a1
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    add a0, a2, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    and a0, a0, a1
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi a1, a1, 257
+; RV32I-NEXT:    call __mulsi3
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    lw ra, 12(sp)
+; RV32I-NEXT:    addi sp, sp, 16
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: ctpop_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    pcnt a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: ctpop_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    pcnt a0, a0
+; RV32IBB-NEXT:    ret
+  %1 = call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctpop_i64(i64 %a) nounwind {
+; RV32I-LABEL: ctpop_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi sp, sp, -32
+; RV32I-NEXT:    sw ra, 28(sp)
+; RV32I-NEXT:    sw s0, 24(sp)
+; RV32I-NEXT:    sw s1, 20(sp)
+; RV32I-NEXT:    sw s2, 16(sp)
+; RV32I-NEXT:    sw s3, 12(sp)
+; RV32I-NEXT:    sw s4, 8(sp)
+; RV32I-NEXT:    sw s5, 4(sp)
+; RV32I-NEXT:    mv s2, a0
+; RV32I-NEXT:    srli a0, a1, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi s3, a2, 1365
+; RV32I-NEXT:    and a0, a0, s3
+; RV32I-NEXT:    sub a0, a1, a0
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi s0, a1, 819
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi s4, a1, -241
+; RV32I-NEXT:    and a0, a0, s4
+; RV32I-NEXT:    lui a1, 4112
+; RV32I-NEXT:    addi s1, a1, 257
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __mulsi3
+; RV32I-NEXT:    srli s5, a0, 24
+; RV32I-NEXT:    srli a0, s2, 1
+; RV32I-NEXT:    and a0, a0, s3
+; RV32I-NEXT:    sub a0, s2, a0
+; RV32I-NEXT:    and a1, a0, s0
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    and a0, a0, s0
+; RV32I-NEXT:    add a0, a1, a0
+; RV32I-NEXT:    srli a1, a0, 4
+; RV32I-NEXT:    add a0, a0, a1
+; RV32I-NEXT:    and a0, a0, s4
+; RV32I-NEXT:    mv a1, s1
+; RV32I-NEXT:    call __mulsi3
+; RV32I-NEXT:    srli a0, a0, 24
+; RV32I-NEXT:    add a0, a0, s5
+; RV32I-NEXT:    mv a1, zero
+; RV32I-NEXT:    lw s5, 4(sp)
+; RV32I-NEXT:    lw s4, 8(sp)
+; RV32I-NEXT:    lw s3, 12(sp)
+; RV32I-NEXT:    lw s2, 16(sp)
+; RV32I-NEXT:    lw s1, 20(sp)
+; RV32I-NEXT:    lw s0, 24(sp)
+; RV32I-NEXT:    lw ra, 28(sp)
+; RV32I-NEXT:    addi sp, sp, 32
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: ctpop_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    pcnt a1, a1
+; RV32IB-NEXT:    pcnt a0, a0
+; RV32IB-NEXT:    add a0, a0, a1
+; RV32IB-NEXT:    mv a1, zero
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: ctpop_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    pcnt a1, a1
+; RV32IBB-NEXT:    pcnt a0, a0
+; RV32IBB-NEXT:    add a0, a0, a1
+; RV32IBB-NEXT:    mv a1, zero
+; RV32IBB-NEXT:    ret
+  %1 = call i64 @llvm.ctpop.i64(i64 %a)
+  ret i64 %1
+}
+
+define i32 @sextb_i32(i32 %a) nounwind {
+; RV32I-LABEL: sextb_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    srai a0, a0, 24
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sextb_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sext.b a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: sextb_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    sext.b a0, a0
+; RV32IBB-NEXT:    ret
+  %shl = shl i32 %a, 24
+  %shr = ashr exact i32 %shl, 24
+  ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV32I-LABEL: sextb_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 24
+; RV32I-NEXT:    srai a0, a1, 24
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sextb_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sext.b a2, a0
+; RV32IB-NEXT:    slli a0, a0, 24
+; RV32IB-NEXT:    srai a1, a0, 31
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: sextb_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    sext.b a2, a0
+; RV32IBB-NEXT:    slli a0, a0, 24
+; RV32IBB-NEXT:    srai a1, a0, 31
+; RV32IBB-NEXT:    mv a0, a2
+; RV32IBB-NEXT:    ret
+  %shl = shl i64 %a, 56
+  %shr = ashr exact i64 %shl, 56
+  ret i64 %shr
+}
+
+define i32 @sexth_i32(i32 %a) nounwind {
+; RV32I-LABEL: sexth_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    srai a0, a0, 16
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sexth_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sext.h a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: sexth_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    sext.h a0, a0
+; RV32IBB-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = ashr exact i32 %shl, 16
+  ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV32I-LABEL: sexth_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srai a0, a1, 16
+; RV32I-NEXT:    srai a1, a1, 31
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sexth_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sext.h a2, a0
+; RV32IB-NEXT:    slli a0, a0, 16
+; RV32IB-NEXT:    srai a1, a0, 31
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: sexth_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    sext.h a2, a0
+; RV32IBB-NEXT:    slli a0, a0, 16
+; RV32IBB-NEXT:    srai a1, a0, 31
+; RV32IBB-NEXT:    mv a0, a2
+; RV32IBB-NEXT:    ret
+  %shl = shl i64 %a, 48
+  %shr = ashr exact i64 %shl, 48
+  ret i64 %shr
+}
+
+define i32 @min_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: min_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    blt a0, a1, .LBB18_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:  .LBB18_2:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: min_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    min a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: min_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    min a0, a0, a1
+; RV32IBB-NEXT:    ret
+  %cmp = icmp slt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @min_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: min_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beq a1, a3, .LBB19_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    slt a4, a1, a3
+; RV32I-NEXT:    beqz a4, .LBB19_3
+; RV32I-NEXT:    j .LBB19_4
+; RV32I-NEXT:  .LBB19_2:
+; RV32I-NEXT:    sltu a4, a0, a2
+; RV32I-NEXT:    bnez a4, .LBB19_4
+; RV32I-NEXT:  .LBB19_3:
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:  .LBB19_4:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: min_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    beq a1, a3, .LBB19_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    slt a4, a1, a3
+; RV32IB-NEXT:    beqz a4, .LBB19_3
+; RV32IB-NEXT:    j .LBB19_4
+; RV32IB-NEXT:  .LBB19_2:
+; RV32IB-NEXT:    sltu a4, a0, a2
+; RV32IB-NEXT:    bnez a4, .LBB19_4
+; RV32IB-NEXT:  .LBB19_3:
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    mv a1, a3
+; RV32IB-NEXT:  .LBB19_4:
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: min_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    beq a1, a3, .LBB19_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    slt a4, a1, a3
+; RV32IBB-NEXT:    beqz a4, .LBB19_3
+; RV32IBB-NEXT:    j .LBB19_4
+; RV32IBB-NEXT:  .LBB19_2:
+; RV32IBB-NEXT:    sltu a4, a0, a2
+; RV32IBB-NEXT:    bnez a4, .LBB19_4
+; RV32IBB-NEXT:  .LBB19_3:
+; RV32IBB-NEXT:    mv a0, a2
+; RV32IBB-NEXT:    mv a1, a3
+; RV32IBB-NEXT:  .LBB19_4:
+; RV32IBB-NEXT:    ret
+  %cmp = icmp slt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define i32 @max_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: max_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    blt a1, a0, .LBB20_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:  .LBB20_2:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: max_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    max a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: max_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    max a0, a0, a1
+; RV32IBB-NEXT:    ret
+  %cmp = icmp sgt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @max_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: max_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beq a1, a3, .LBB21_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    slt a4, a3, a1
+; RV32I-NEXT:    beqz a4, .LBB21_3
+; RV32I-NEXT:    j .LBB21_4
+; RV32I-NEXT:  .LBB21_2:
+; RV32I-NEXT:    sltu a4, a2, a0
+; RV32I-NEXT:    bnez a4, .LBB21_4
+; RV32I-NEXT:  .LBB21_3:
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:  .LBB21_4:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: max_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    beq a1, a3, .LBB21_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    slt a4, a3, a1
+; RV32IB-NEXT:    beqz a4, .LBB21_3
+; RV32IB-NEXT:    j .LBB21_4
+; RV32IB-NEXT:  .LBB21_2:
+; RV32IB-NEXT:    sltu a4, a2, a0
+; RV32IB-NEXT:    bnez a4, .LBB21_4
+; RV32IB-NEXT:  .LBB21_3:
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    mv a1, a3
+; RV32IB-NEXT:  .LBB21_4:
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: max_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    beq a1, a3, .LBB21_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    slt a4, a3, a1
+; RV32IBB-NEXT:    beqz a4, .LBB21_3
+; RV32IBB-NEXT:    j .LBB21_4
+; RV32IBB-NEXT:  .LBB21_2:
+; RV32IBB-NEXT:    sltu a4, a2, a0
+; RV32IBB-NEXT:    bnez a4, .LBB21_4
+; RV32IBB-NEXT:  .LBB21_3:
+; RV32IBB-NEXT:    mv a0, a2
+; RV32IBB-NEXT:    mv a1, a3
+; RV32IBB-NEXT:  .LBB21_4:
+; RV32IBB-NEXT:    ret
+  %cmp = icmp sgt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define i32 @minu_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: minu_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    bltu a0, a1, .LBB22_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:  .LBB22_2:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: minu_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    minu a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: minu_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    minu a0, a0, a1
+; RV32IBB-NEXT:    ret
+  %cmp = icmp ult i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @minu_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: minu_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beq a1, a3, .LBB23_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sltu a4, a1, a3
+; RV32I-NEXT:    beqz a4, .LBB23_3
+; RV32I-NEXT:    j .LBB23_4
+; RV32I-NEXT:  .LBB23_2:
+; RV32I-NEXT:    sltu a4, a0, a2
+; RV32I-NEXT:    bnez a4, .LBB23_4
+; RV32I-NEXT:  .LBB23_3:
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:  .LBB23_4:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: minu_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    beq a1, a3, .LBB23_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    sltu a4, a1, a3
+; RV32IB-NEXT:    beqz a4, .LBB23_3
+; RV32IB-NEXT:    j .LBB23_4
+; RV32IB-NEXT:  .LBB23_2:
+; RV32IB-NEXT:    sltu a4, a0, a2
+; RV32IB-NEXT:    bnez a4, .LBB23_4
+; RV32IB-NEXT:  .LBB23_3:
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    mv a1, a3
+; RV32IB-NEXT:  .LBB23_4:
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: minu_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    beq a1, a3, .LBB23_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    sltu a4, a1, a3
+; RV32IBB-NEXT:    beqz a4, .LBB23_3
+; RV32IBB-NEXT:    j .LBB23_4
+; RV32IBB-NEXT:  .LBB23_2:
+; RV32IBB-NEXT:    sltu a4, a0, a2
+; RV32IBB-NEXT:    bnez a4, .LBB23_4
+; RV32IBB-NEXT:  .LBB23_3:
+; RV32IBB-NEXT:    mv a0, a2
+; RV32IBB-NEXT:    mv a1, a3
+; RV32IBB-NEXT:  .LBB23_4:
+; RV32IBB-NEXT:    ret
+  %cmp = icmp ult i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define i32 @maxu_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: maxu_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    bltu a1, a0, .LBB24_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:  .LBB24_2:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: maxu_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    maxu a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: maxu_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    maxu a0, a0, a1
+; RV32IBB-NEXT:    ret
+  %cmp = icmp ugt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: maxu_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beq a1, a3, .LBB25_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sltu a4, a3, a1
+; RV32I-NEXT:    beqz a4, .LBB25_3
+; RV32I-NEXT:    j .LBB25_4
+; RV32I-NEXT:  .LBB25_2:
+; RV32I-NEXT:    sltu a4, a2, a0
+; RV32I-NEXT:    bnez a4, .LBB25_4
+; RV32I-NEXT:  .LBB25_3:
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:  .LBB25_4:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: maxu_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    beq a1, a3, .LBB25_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    sltu a4, a3, a1
+; RV32IB-NEXT:    beqz a4, .LBB25_3
+; RV32IB-NEXT:    j .LBB25_4
+; RV32IB-NEXT:  .LBB25_2:
+; RV32IB-NEXT:    sltu a4, a2, a0
+; RV32IB-NEXT:    bnez a4, .LBB25_4
+; RV32IB-NEXT:  .LBB25_3:
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    mv a1, a3
+; RV32IB-NEXT:  .LBB25_4:
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: maxu_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    beq a1, a3, .LBB25_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    sltu a4, a3, a1
+; RV32IBB-NEXT:    beqz a4, .LBB25_3
+; RV32IBB-NEXT:    j .LBB25_4
+; RV32IBB-NEXT:  .LBB25_2:
+; RV32IBB-NEXT:    sltu a4, a2, a0
+; RV32IBB-NEXT:    bnez a4, .LBB25_4
+; RV32IBB-NEXT:  .LBB25_3:
+; RV32IBB-NEXT:    mv a0, a2
+; RV32IBB-NEXT:    mv a1, a3
+; RV32IBB-NEXT:  .LBB25_4:
+; RV32IBB-NEXT:    ret
+  %cmp = icmp ugt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbb.ll b/llvm/test/CodeGen/RISCV/rv64Zbb.ll
new file mode 100644
index 0000000000000..2e4b69e4997b3
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64Zbb.ll
@@ -0,0 +1,1149 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IBB
+
+define signext i32 @slo_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: slo_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: slo_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    slow a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: slo_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    slow a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %shl = shl i32 %neg, %b
+  %neg1 = xor i32 %shl, -1
+  ret i32 %neg1
+}
+
+define i64 @slo_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: slo_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    sll a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: slo_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    slo a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: slo_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    slo a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %shl = shl i64 %neg, %b
+  %neg1 = xor i64 %shl, -1
+  ret i64 %neg1
+}
+
+define signext i32 @sro_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: sro_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sro_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    srow a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: sro_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    srow a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %shr = lshr i32 %neg, %b
+  %neg1 = xor i32 %shr, -1
+  ret i32 %neg1
+}
+
+define i64 @sro_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: sro_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sro_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sro a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: sro_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    sro a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %shr = lshr i64 %neg, %b
+  %neg1 = xor i64 %shr, -1
+  ret i64 %neg1
+}
+
+define signext i32 @sloi_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sloi_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    ori a0, a0, 1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sloi_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sloiw a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: sloi_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    sloiw a0, a0, 1
+; RV64IBB-NEXT:    ret
+  %neg = shl i32 %a, 1
+  %neg12 = or i32 %neg, 1
+  ret i32 %neg12
+}
+
+define i64 @sloi_i64(i64 %a) nounwind {
+; RV64I-LABEL: sloi_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    ori a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sloi_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sloi a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: sloi_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    sloi a0, a0, 1
+; RV64IBB-NEXT:    ret
+  %neg = shl i64 %a, 1
+  %neg12 = or i64 %neg, 1
+  ret i64 %neg12
+}
+
+define signext i32 @sroi_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sroi_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    lui a1, 524288
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sroi_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sroiw a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: sroi_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    sroiw a0, a0, 1
+; RV64IBB-NEXT:    ret
+  %neg = lshr i32 %a, 1
+  %neg12 = or i32 %neg, -2147483648
+  ret i32 %neg12
+}
+
+define i64 @sroi_i64(i64 %a) nounwind {
+; RV64I-LABEL: sroi_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    addi a1, zero, -1
+; RV64I-NEXT:    slli a1, a1, 63
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sroi_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sroi a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: sroi_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    sroi a0, a0, 1
+; RV64IBB-NEXT:    ret
+  %neg = lshr i64 %a, 1
+  %neg12 = or i64 %neg, -9223372036854775808
+  ret i64 %neg12
+}
+
+declare i32 @llvm.ctlz.i32(i32, i1)
+
+define signext i32 @ctlz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctlz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp)
+; RV64I-NEXT:    beqz a0, .LBB8_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 21845
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 13107
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 3855
+; RV64I-NEXT:    addiw a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    addi a0, a0, -32
+; RV64I-NEXT:    j .LBB8_3
+; RV64I-NEXT:  .LBB8_2:
+; RV64I-NEXT:    addi a0, zero, 32
+; RV64I-NEXT:  .LBB8_3: # %cond.end
+; RV64I-NEXT:    ld ra, 8(sp)
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: ctlz_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    beqz a0, .LBB8_2
+; RV64IB-NEXT:  # %bb.1: # %cond.false
+; RV64IB-NEXT:    clzw a0, a0
+; RV64IB-NEXT:    ret
+; RV64IB-NEXT:  .LBB8_2:
+; RV64IB-NEXT:    addi a0, zero, 32
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: ctlz_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    beqz a0, .LBB8_2
+; RV64IBB-NEXT:  # %bb.1: # %cond.false
+; RV64IBB-NEXT:    clzw a0, a0
+; RV64IBB-NEXT:    ret
+; RV64IBB-NEXT:  .LBB8_2:
+; RV64IBB-NEXT:    addi a0, zero, 32
+; RV64IBB-NEXT:    ret
+  %1 = call i32 @llvm.ctlz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+
+define i64 @ctlz_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctlz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp)
+; RV64I-NEXT:    beqz a0, .LBB9_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 8
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 16
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 32
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 21845
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 13107
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 3855
+; RV64I-NEXT:    addiw a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    j .LBB9_3
+; RV64I-NEXT:  .LBB9_2:
+; RV64I-NEXT:    addi a0, zero, 64
+; RV64I-NEXT:  .LBB9_3: # %cond.end
+; RV64I-NEXT:    ld ra, 8(sp)
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: ctlz_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    beqz a0, .LBB9_2
+; RV64IB-NEXT:  # %bb.1: # %cond.false
+; RV64IB-NEXT:    clz a0, a0
+; RV64IB-NEXT:    ret
+; RV64IB-NEXT:  .LBB9_2:
+; RV64IB-NEXT:    addi a0, zero, 64
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: ctlz_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    beqz a0, .LBB9_2
+; RV64IBB-NEXT:  # %bb.1: # %cond.false
+; RV64IBB-NEXT:    clz a0, a0
+; RV64IBB-NEXT:    ret
+; RV64IBB-NEXT:  .LBB9_2:
+; RV64IBB-NEXT:    addi a0, zero, 64
+; RV64IBB-NEXT:    ret
+  %1 = call i64 @llvm.ctlz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.cttz.i32(i32, i1)
+
+define signext i32 @cttz_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: cttz_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp)
+; RV64I-NEXT:    beqz a0, .LBB10_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 21845
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 13107
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 3855
+; RV64I-NEXT:    addiw a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    j .LBB10_3
+; RV64I-NEXT:  .LBB10_2:
+; RV64I-NEXT:    addi a0, zero, 32
+; RV64I-NEXT:  .LBB10_3: # %cond.end
+; RV64I-NEXT:    ld ra, 8(sp)
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: cttz_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    beqz a0, .LBB10_2
+; RV64IB-NEXT:  # %bb.1: # %cond.false
+; RV64IB-NEXT:    ctz a0, a0
+; RV64IB-NEXT:    ret
+; RV64IB-NEXT:  .LBB10_2:
+; RV64IB-NEXT:    addi a0, zero, 32
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: cttz_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    beqz a0, .LBB10_2
+; RV64IBB-NEXT:  # %bb.1: # %cond.false
+; RV64IBB-NEXT:    ctz a0, a0
+; RV64IBB-NEXT:    ret
+; RV64IBB-NEXT:  .LBB10_2:
+; RV64IBB-NEXT:    addi a0, zero, 32
+; RV64IBB-NEXT:    ret
+  %1 = call i32 @llvm.cttz.i32(i32 %a, i1 false)
+  ret i32 %1
+}
+
+declare i64 @llvm.cttz.i64(i64, i1)
+
+define i64 @cttz_i64(i64 %a) nounwind {
+; RV64I-LABEL: cttz_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp)
+; RV64I-NEXT:    beqz a0, .LBB11_2
+; RV64I-NEXT:  # %bb.1: # %cond.false
+; RV64I-NEXT:    addi a1, a0, -1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 21845
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 13107
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 3855
+; RV64I-NEXT:    addiw a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    j .LBB11_3
+; RV64I-NEXT:  .LBB11_2:
+; RV64I-NEXT:    addi a0, zero, 64
+; RV64I-NEXT:  .LBB11_3: # %cond.end
+; RV64I-NEXT:    ld ra, 8(sp)
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: cttz_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    beqz a0, .LBB11_2
+; RV64IB-NEXT:  # %bb.1: # %cond.false
+; RV64IB-NEXT:    ctz a0, a0
+; RV64IB-NEXT:    ret
+; RV64IB-NEXT:  .LBB11_2:
+; RV64IB-NEXT:    addi a0, zero, 64
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: cttz_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    beqz a0, .LBB11_2
+; RV64IBB-NEXT:  # %bb.1: # %cond.false
+; RV64IBB-NEXT:    ctz a0, a0
+; RV64IBB-NEXT:    ret
+; RV64IBB-NEXT:  .LBB11_2:
+; RV64IBB-NEXT:    addi a0, zero, 64
+; RV64IBB-NEXT:    ret
+  %1 = call i64 @llvm.cttz.i64(i64 %a, i1 false)
+  ret i64 %1
+}
+
+declare i32 @llvm.ctpop.i32(i32)
+
+define signext i32 @ctpop_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: ctpop_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp)
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    srliw a0, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    sub a0, a1, a0
+; RV64I-NEXT:    srli a1, a0, 2
+; RV64I-NEXT:    lui a2, 13107
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 819
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 3855
+; RV64I-NEXT:    addiw a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    ld ra, 8(sp)
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: ctpop_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    pcntw a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: ctpop_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    pcntw a0, a0
+; RV64IBB-NEXT:    ret
+  %1 = call i32 @llvm.ctpop.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.ctpop.i64(i64)
+
+define i64 @ctpop_i64(i64 %a) nounwind {
+; RV64I-LABEL: ctpop_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi sp, sp, -16
+; RV64I-NEXT:    sd ra, 8(sp)
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    lui a2, 21845
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    lui a1, 13107
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a2, a0, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    add a0, a2, a0
+; RV64I-NEXT:    srli a1, a0, 4
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    lui a1, 3855
+; RV64I-NEXT:    addiw a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    lui a1, 4112
+; RV64I-NEXT:    addiw a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 257
+; RV64I-NEXT:    call __muldi3
+; RV64I-NEXT:    srli a0, a0, 56
+; RV64I-NEXT:    ld ra, 8(sp)
+; RV64I-NEXT:    addi sp, sp, 16
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: ctpop_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    pcnt a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: ctpop_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    pcnt a0, a0
+; RV64IBB-NEXT:    ret
+  %1 = call i64 @llvm.ctpop.i64(i64 %a)
+  ret i64 %1
+}
+
+define signext i32 @sextb_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sextb_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sextb_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sext.b a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: sextb_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    sext.b a0, a0
+; RV64IBB-NEXT:    ret
+  %shl = shl i32 %a, 24
+  %shr = ashr exact i32 %shl, 24
+  ret i32 %shr
+}
+
+define i64 @sextb_i64(i64 %a) nounwind {
+; RV64I-LABEL: sextb_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    srai a0, a0, 56
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sextb_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sext.b a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: sextb_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    sext.b a0, a0
+; RV64IBB-NEXT:    ret
+  %shl = shl i64 %a, 56
+  %shr = ashr exact i64 %shl, 56
+  ret i64 %shr
+}
+
+define signext i32 @sexth_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sexth_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sexth_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sext.h a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: sexth_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    sext.h a0, a0
+; RV64IBB-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = ashr exact i32 %shl, 16
+  ret i32 %shr
+}
+
+define i64 @sexth_i64(i64 %a) nounwind {
+; RV64I-LABEL: sexth_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 48
+; RV64I-NEXT:    srai a0, a0, 48
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sexth_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sext.h a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: sexth_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    sext.h a0, a0
+; RV64IBB-NEXT:    ret
+  %shl = shl i64 %a, 48
+  %shr = ashr exact i64 %shl, 48
+  ret i64 %shr
+}
+
+define signext i32 @min_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: min_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a0, a1, .LBB18_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB18_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: min_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    min a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: min_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    min a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %cmp = icmp slt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @min_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: min_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a0, a1, .LBB19_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB19_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: min_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    min a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: min_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    min a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %cmp = icmp slt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define signext i32 @max_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: max_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a1, a0, .LBB20_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB20_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: max_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    max a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: max_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    max a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %cmp = icmp sgt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @max_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: max_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    blt a1, a0, .LBB21_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB21_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: max_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    max a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: max_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    max a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %cmp = icmp sgt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define signext i32 @minu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: minu_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a0, a1, .LBB22_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB22_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: minu_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    minu a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: minu_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    minu a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %cmp = icmp ult i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @minu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: minu_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a0, a1, .LBB23_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB23_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: minu_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    minu a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: minu_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    minu a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %cmp = icmp ult i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+define signext i32 @maxu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: maxu_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a1, a0, .LBB24_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB24_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: maxu_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    maxu a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: maxu_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    maxu a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %cmp = icmp ugt i32 %a, %b
+  %cond = select i1 %cmp, i32 %a, i32 %b
+  ret i32 %cond
+}
+
+define i64 @maxu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: maxu_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    bltu a1, a0, .LBB25_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:  .LBB25_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: maxu_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    maxu a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: maxu_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    maxu a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %cmp = icmp ugt i64 %a, %b
+  %cond = select i1 %cmp, i64 %a, i64 %b
+  ret i64 %cond
+}
+
+; We select a i32 addi that zero-extends the result on RV64 as addiwu
+
+define zeroext i32 @zext_add_to_addiwu(i32 signext %a) nounwind {
+; RV64I-LABEL: zext_add_to_addiwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: zext_add_to_addiwu:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    addiwu a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: zext_add_to_addiwu:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    addiwu a0, a0, 1
+; RV64IBB-NEXT:    ret
+  %add = add i32 %a, 1
+  ret i32 %add
+}
+
+define i64 @addiwu(i64 %a) nounwind {
+; RV64I-LABEL: addiwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a0, a0, 1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: addiwu:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    addiwu a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: addiwu:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    addiwu a0, a0, 1
+; RV64IBB-NEXT:    ret
+  %conv = add i64 %a, 1
+  %conv1 = and i64 %conv, 4294967295
+  ret i64 %conv1
+}
+
+define i64 @slliuw(i64 %a) nounwind {
+; RV64I-LABEL: slliuw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 1
+; RV64I-NEXT:    addi a1, zero, 1
+; RV64I-NEXT:    slli a1, a1, 33
+; RV64I-NEXT:    addi a1, a1, -2
+; RV64I-NEXT:    and a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: slliuw:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    slliu.w a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: slliuw:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    slliu.w a0, a0, 1
+; RV64IBB-NEXT:    ret
+  %conv1 = shl i64 %a, 1
+  %shl = and i64 %conv1, 8589934590
+  ret i64 %shl
+}
+
+; We select a i32 add that zero-extends the result on RV64 as addwu
+
+define zeroext i32 @zext_add_to_addwu(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: zext_add_to_addwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    add a0, a0, a1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: zext_add_to_addwu:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    addwu a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: zext_add_to_addwu:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    addwu a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %add = add i32 %a, %b
+  ret i32 %add
+}
+
+define i64 @addwu(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: addwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: addwu:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    addwu a0, a1, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: addwu:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    addwu a0, a1, a0
+; RV64IBB-NEXT:    ret
+  %add = add i64 %b, %a
+  %conv1 = and i64 %add, 4294967295
+  ret i64 %conv1
+}
+
+; We select a i32 sub that zero-extends the result on RV64 as subwu
+
+define zeroext i32 @zext_sub_to_subwu(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: zext_sub_to_subwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: zext_sub_to_subwu:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    subwu a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: zext_sub_to_subwu:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    subwu a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %sub = sub i32 %a, %b
+  ret i32 %sub
+}
+
+define i64 @subwu(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: subwu:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: subwu:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    subwu a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: subwu:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    subwu a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %sub = sub i64 %a, %b
+  %conv1 = and i64 %sub, 4294967295
+  ret i64 %conv1
+}
+
+define i64 @adduw(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: adduw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    add a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: adduw:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    addu.w a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: adduw:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    addu.w a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %and = and i64 %b, 4294967295
+  %add = add i64 %and, %a
+  ret i64 %add
+}
+
+define i64 @subuw(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: subuw:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    srli a1, a1, 32
+; RV64I-NEXT:    sub a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: subuw:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    subu.w a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: subuw:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    subu.w a0, a0, a1
+; RV64IBB-NEXT:    ret
+  %and = and i64 %b, 4294967295
+  %sub = sub i64 %a, %and
+  ret i64 %sub
+}

From 31b52b4345e36b169a2b6a89eac44651f59889dd Mon Sep 17 00:00:00 2001
From: lewis-revill <lewis.revill@embecosm.com>
Date: Wed, 15 Jul 2020 11:53:06 +0100
Subject: [PATCH 371/771] [RISCV] Add matching of codegen patterns to RISCV Bit
 Manipulation Zbp asm instructions

This patch provides optimization of bit manipulation operations by
enabling the +experimental-b target feature.
It adds matching of single block patterns of instructions to specific
bit-manip instructions from the permutation subset (zbp subextension) of
the experimental B extension of RISC-V.
It adds also the correspondent codegen tests.

This patch is based on Claire Wolf's proposal for the bit manipulation
extension of RISCV:
https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf

Differential Revision: https://reviews.llvm.org/D79871
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |    7 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoB.td    |  190 +++
 llvm/test/CodeGen/RISCV/rv32Zbp.ll          | 1245 +++++++++++++++++
 llvm/test/CodeGen/RISCV/rv64Zbp.ll          | 1343 +++++++++++++++++++
 4 files changed, 2784 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rv32Zbp.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64Zbp.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index fb44f826eb6c7..c89bb21c97016 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -151,7 +151,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
 
   setOperationAction(ISD::ROTL, XLenVT, Expand);
   setOperationAction(ISD::ROTR, XLenVT, Expand);
-  setOperationAction(ISD::BSWAP, XLenVT, Expand);
+
+  if (!Subtarget.hasStdExtZbp())
+    setOperationAction(ISD::BSWAP, XLenVT, Expand);
 
   if (!Subtarget.hasStdExtZbb()) {
     setOperationAction(ISD::CTTZ, XLenVT, Expand);
@@ -159,6 +161,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::CTPOP, XLenVT, Expand);
   }
 
+  if (Subtarget.hasStdExtZbp())
+    setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
+
   ISD::CondCode FPCCToExtend[] = {
       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index dc3d6cbb4fe84..09d5f1ef856a1 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -651,6 +651,97 @@ def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
 def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbp, IsRV32] in {
+def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1),
+              (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))),
+          (GORCI GPR:$rs1, (i32 1))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333)), GPR:$rs1),
+              (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC))),
+          (GORCI GPR:$rs1, (i32 2))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F)), GPR:$rs1),
+              (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0))),
+          (GORCI GPR:$rs1, (i32 4))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF)), GPR:$rs1),
+              (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00))),
+          (GORCI GPR:$rs1, (i32 8))>;
+def : Pat<(or (or (srl GPR:$rs1, (i32 16)), GPR:$rs1),
+              (shl GPR:$rs1, (i32 16))),
+          (GORCI GPR:$rs1, (i32 16))>;
+} // Predicates = [HasStdExtZbp, IsRV32]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : Pat<(or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555)),
+                   GPR:$rs1),
+              (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA))),
+          (GORCI GPR:$rs1, (i64 1))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333)),
+                   GPR:$rs1),
+              (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC))),
+          (GORCI GPR:$rs1, (i64 2))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F)),
+                   GPR:$rs1),
+              (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0))),
+          (GORCI GPR:$rs1, (i64 4))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF)),
+                   GPR:$rs1),
+              (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00))),
+          (GORCI GPR:$rs1, (i64 8))>;
+def : Pat<(or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF)),
+                   GPR:$rs1),
+              (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000))),
+          (GORCI GPR:$rs1, (i64 16))>;
+def : Pat<(or (or (srl GPR:$rs1, (i64 32)), GPR:$rs1),
+              (shl GPR:$rs1, (i64 32))),
+          (GORCI GPR:$rs1, (i64 32))>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+let Predicates = [HasStdExtZbp, IsRV32] in {
+def : Pat<(or (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA)),
+              (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555))),
+          (GREVI GPR:$rs1, (i32 1))>;
+def : Pat<(or (and (shl GPR:$rs1, (i32 2)), (i32 0xCCCCCCCC)),
+              (and (srl GPR:$rs1, (i32 2)), (i32 0x33333333))),
+          (GREVI GPR:$rs1, (i32 2))>;
+def : Pat<(or (and (shl GPR:$rs1, (i32 4)), (i32 0xF0F0F0F0)),
+              (and (srl GPR:$rs1, (i32 4)), (i32 0x0F0F0F0F))),
+          (GREVI GPR:$rs1, (i32 4))>;
+def : Pat<(or (and (shl GPR:$rs1, (i32 8)), (i32 0xFF00FF00)),
+              (and (srl GPR:$rs1, (i32 8)), (i32 0x00FF00FF))),
+          (GREVI GPR:$rs1, (i32 8))>;
+def : Pat<(rotr (bswap GPR:$rs1), (i32 16)), (GREVI GPR:$rs1, (i32 8))>;
+def : Pat<(or (shl GPR:$rs1, (i32 16)), (srl GPR:$rs1, (i32 16))),
+          (GREVI GPR:$rs1, (i32 16))>;
+def : Pat<(rotl GPR:$rs1, (i32 16)), (GREVI GPR:$rs1, (i32 16))>;
+def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i32 24))>;
+def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i32 31))>;
+} // Predicates = [HasStdExtZbp, IsRV32]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : Pat<(or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAAAAAAAAAA)),
+              (and (srl GPR:$rs1, (i64 1)), (i64 0x5555555555555555))),
+          (GREVI GPR:$rs1, (i64 1))>;
+def : Pat<(or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCCCCCCCCCC)),
+              (and (srl GPR:$rs1, (i64 2)), (i64 0x3333333333333333))),
+          (GREVI GPR:$rs1, (i64 2))>;
+def : Pat<(or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0F0F0F0F0)),
+              (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F0F0F0F0F))),
+          (GREVI GPR:$rs1, (i64 4))>;
+def : Pat<(or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00FF00FF00)),
+              (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF00FF00FF))),
+          (GREVI GPR:$rs1, (i64 8))>;
+def : Pat<(or (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000FFFF0000)),
+              (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF0000FFFF))),
+          (GREVI GPR:$rs1, (i64 16))>;
+def : Pat<(or (shl GPR:$rs1, (i64 32)), (srl GPR:$rs1, (i64 32))),
+          (GREVI GPR:$rs1, (i64 32))>;
+def : Pat<(rotl GPR:$rs1, (i64 32)), (GREVI GPR:$rs1, (i64 32))>;
+def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
+def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+let Predicates = [HasStdExtZbb] in {
 def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>;
 def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>;
 def : Pat<(ctpop GPR:$rs1), (PCNT GPR:$rs1)>;
@@ -681,6 +772,48 @@ def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2),
           (MAXU  GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZbb]
 
+let Predicates = [HasStdExtZbp, IsRV32] in {
+def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)),
+                  (and GPR:$rs1, (i32 0xFF0000FF))),
+              (and (srl GPR:$rs1, (i32 8)), (i32 0x0000FF00))),
+          (SHFLI GPR:$rs1, (i32 8))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i32 4)), (i32 0x0F000F00)),
+                  (and GPR:$rs1, (i32 0xF00FF00F))),
+              (and (srl GPR:$rs1, (i32 4)), (i32 0x00F000F0))),
+          (SHFLI GPR:$rs1, (i32 4))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i32 2)), (i32 0x30303030)),
+                  (and GPR:$rs1, (i32 0xC3C3C3C3))),
+              (and (srl GPR:$rs1, (i32 2)), (i32 0x0C0C0C0C))),
+          (SHFLI GPR:$rs1, (i32 2))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i32 1)), (i32 0x44444444)),
+                  (and GPR:$rs1, (i32 0x99999999))),
+              (and (srl GPR:$rs1, (i32 1)), (i32 0x22222222))),
+          (SHFLI GPR:$rs1, (i32 1))>;
+} // Predicates = [HasStdExtZbp, IsRV32]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : Pat<(or (or (and (shl GPR:$rs1, (i64 16)), (i64 0x0000FFFF00000000)),
+                  (and GPR:$rs1, (i64 0xFFFF00000000FFFF))),
+              (and (srl GPR:$rs1, (i64 16)), (i64 0x00000000FFFF0000))),
+          (SHFLI GPR:$rs1, (i64 16))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i64 8)), (i64 0x00FF000000FF0000)),
+                  (and GPR:$rs1, (i64 0xFF0000FFFF0000FF))),
+              (and (srl GPR:$rs1, (i64 8)), (i64 0x0000FF000000FF00))),
+          (SHFLI GPR:$rs1, (i64 8))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i64 4)), (i64 0x0F000F000F000F00)),
+                  (and GPR:$rs1, (i64 0xF00FF00FF00FF00F))),
+              (and (srl GPR:$rs1, (i64 4)), (i64 0x00F000F000F000F0))),
+          (SHFLI GPR:$rs1, (i64 4))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i64 2)), (i64 0x3030303030303030)),
+                  (and GPR:$rs1, (i64 0xC3C3C3C3C3C3C3C3))),
+              (and (srl GPR:$rs1, (i64 2)), (i64 0x0C0C0C0C0C0C0C0C))),
+          (SHFLI GPR:$rs1, (i64 2))>;
+def : Pat<(or (or (and (shl GPR:$rs1, (i64 1)), (i64 0x4444444444444444)),
+                  (and GPR:$rs1, (i64 0x9999999999999999))),
+              (and (srl GPR:$rs1, (i64 1)), (i64 0x2222222222222222))),
+          (SHFLI GPR:$rs1, (i64 1))>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
 let Predicates = [HasStdExtZbb, IsRV64] in {
 def : Pat<(and (add GPR:$rs, simm12:$simm12), (i64 0xFFFFFFFF)),
           (ADDIWU GPR:$rs, simm12:$simm12)>;
@@ -702,6 +835,63 @@ def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
 def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbp, IsRV64] in {
+def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555)),
+                              GPR:$rs1),
+                          (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA))),
+                      i32),
+          (GORCIW GPR:$rs1, (i64 1))>;
+def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333)),
+                              GPR:$rs1),
+                          (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC))),
+                      i32),
+          (GORCIW GPR:$rs1, (i64 2))>;
+def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F)),
+                              GPR:$rs1),
+                          (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0))),
+                      i32),
+          (GORCIW GPR:$rs1, (i64 4))>;
+def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF)),
+                              GPR:$rs1),
+                          (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00))),
+                      i32),
+          (GORCIW GPR:$rs1, (i64 8))>;
+def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 16)), (i64 0x0000FFFF)),
+                              GPR:$rs1),
+                          (and (shl GPR:$rs1, (i64 16)), (i64 0xFFFF0000))),
+                      i32),
+          (GORCIW GPR:$rs1, (i64 16))>;
+def : Pat<(sext_inreg (or (or (srl (and GPR:$rs1, (i64 0xFFFF0000)), (i64 16)),
+                              GPR:$rs1),
+                          (shl GPR:$rs1, (i64 16))), i32),
+          (GORCIW GPR:$rs1, (i64 16))>;
+
+def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 1)), (i64 0xAAAAAAAA)),
+                          (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555))),
+                      i32),
+          (GREVIW GPR:$rs1, (i64 1))>;
+def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 2)), (i64 0xCCCCCCCC)),
+                          (and (srl GPR:$rs1, (i64 2)), (i64 0x33333333))),
+                      i32),
+          (GREVIW GPR:$rs1, (i64 2))>;
+def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 4)), (i64 0xF0F0F0F0)),
+                          (and (srl GPR:$rs1, (i64 4)), (i64 0x0F0F0F0F))),
+                      i32),
+          (GREVIW GPR:$rs1, (i64 4))>;
+def : Pat<(sext_inreg (or (and (shl GPR:$rs1, (i64 8)), (i64 0xFF00FF00)),
+                          (and (srl GPR:$rs1, (i64 8)), (i64 0x00FF00FF))),
+                      i32),
+          (GREVIW GPR:$rs1, (i64 8))>;
+def : Pat<(sext_inreg (or (shl GPR:$rs1, (i64 16)),
+                          (srl (and GPR:$rs1, 0xFFFF0000), (i64 16))), i32),
+          (GREVIW GPR:$rs1, (i64 16))>;
+def : Pat<(sra (bswap GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 24))>;
+def : Pat<(sra (bitreverse GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 31))>;
+} // Predicates = [HasStdExtZbp, IsRV64]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
 def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
           (CLZW GPR:$rs1)>;
 // We don't pattern-match CTZW here as it has the same pattern and result as
diff --git a/llvm/test/CodeGen/RISCV/rv32Zbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbp.ll
new file mode 100644
index 0000000000000..8769ce77337c1
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32Zbp.ll
@@ -0,0 +1,1245 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IBP
+
+define i32 @gorc1_i32(i32 %a) nounwind {
+; RV32I-LABEL: gorc1_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    lui a2, 699051
+; RV32I-NEXT:    addi a2, a2, -1366
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 1
+; RV32I-NEXT:    lui a3, 349525
+; RV32I-NEXT:    addi a3, a3, 1365
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc1_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc.p a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc1_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc.p a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 1
+  %shl = and i32 %and, -1431655766
+  %and1 = lshr i32 %a, 1
+  %shr = and i32 %and1, 1431655765
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc1_i64(i64 %a) nounwind {
+; RV32I-LABEL: gorc1_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a3, a1, 1
+; RV32I-NEXT:    lui a4, 699051
+; RV32I-NEXT:    addi a4, a4, -1366
+; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a4, a1, 1
+; RV32I-NEXT:    srli a5, a0, 1
+; RV32I-NEXT:    lui a3, 349525
+; RV32I-NEXT:    addi a3, a3, 1365
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a5, a0
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc1_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc.p a0, a0
+; RV32IB-NEXT:    orc.p a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc1_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc.p a0, a0
+; RV32IBP-NEXT:    orc.p a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 1
+  %shl = and i64 %and, -6148914691236517206
+  %and1 = lshr i64 %a, 1
+  %shr = and i64 %and1, 6148914691236517205
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i32 @gorc2_i32(i32 %a) nounwind {
+; RV32I-LABEL: gorc2_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    lui a2, 838861
+; RV32I-NEXT:    addi a2, a2, -820
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 2
+; RV32I-NEXT:    lui a3, 209715
+; RV32I-NEXT:    addi a3, a3, 819
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc2_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc2.n a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc2_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc2.n a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 2
+  %shl = and i32 %and, -858993460
+  %and1 = lshr i32 %a, 2
+  %shr = and i32 %and1, 858993459
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc2_i64(i64 %a) nounwind {
+; RV32I-LABEL: gorc2_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a3, a1, 2
+; RV32I-NEXT:    lui a4, 838861
+; RV32I-NEXT:    addi a4, a4, -820
+; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a4, a1, 2
+; RV32I-NEXT:    srli a5, a0, 2
+; RV32I-NEXT:    lui a3, 209715
+; RV32I-NEXT:    addi a3, a3, 819
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a5, a0
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc2_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc2.n a0, a0
+; RV32IB-NEXT:    orc2.n a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc2_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc2.n a0, a0
+; RV32IBP-NEXT:    orc2.n a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 2
+  %shl = and i64 %and, -3689348814741910324
+  %and1 = lshr i64 %a, 2
+  %shr = and i64 %and1, 3689348814741910323
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i32 @gorc4_i32(i32 %a) nounwind {
+; RV32I-LABEL: gorc4_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 4
+; RV32I-NEXT:    lui a2, 986895
+; RV32I-NEXT:    addi a2, a2, 240
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc4_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc4.b a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc4_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc4.b a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 4
+  %shl = and i32 %and, -252645136
+  %and1 = lshr i32 %a, 4
+  %shr = and i32 %and1, 252645135
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc4_i64(i64 %a) nounwind {
+; RV32I-LABEL: gorc4_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 4
+; RV32I-NEXT:    slli a3, a1, 4
+; RV32I-NEXT:    lui a4, 986895
+; RV32I-NEXT:    addi a4, a4, 240
+; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a4, a1, 4
+; RV32I-NEXT:    srli a5, a0, 4
+; RV32I-NEXT:    lui a3, 61681
+; RV32I-NEXT:    addi a3, a3, -241
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a5, a0
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc4_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc4.b a0, a0
+; RV32IB-NEXT:    orc4.b a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc4_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc4.b a0, a0
+; RV32IBP-NEXT:    orc4.b a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 4
+  %shl = and i64 %and, -1085102592571150096
+  %and1 = lshr i64 %a, 4
+  %shr = and i64 %and1, 1085102592571150095
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i32 @gorc8_i32(i32 %a) nounwind {
+; RV32I-LABEL: gorc8_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    lui a2, 1044496
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    addi a3, a3, 255
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc8_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc8.h a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc8_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc8.h a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 8
+  %shl = and i32 %and, -16711936
+  %and1 = lshr i32 %a, 8
+  %shr = and i32 %and1, 16711935
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc8_i64(i64 %a) nounwind {
+; RV32I-LABEL: gorc8_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    slli a3, a1, 8
+; RV32I-NEXT:    lui a4, 1044496
+; RV32I-NEXT:    addi a4, a4, -256
+; RV32I-NEXT:    and a6, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a4, a1, 8
+; RV32I-NEXT:    srli a5, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    addi a3, a3, 255
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    or a0, a5, a0
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a1, a1, a6
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc8_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc8.h a0, a0
+; RV32IB-NEXT:    orc8.h a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc8_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc8.h a0, a0
+; RV32IBP-NEXT:    orc8.h a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 8
+  %shl = and i64 %and, -71777214294589696
+  %and1 = lshr i64 %a, 8
+  %shr = and i64 %and1, 71777214294589695
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i32 @gorc16_i32(i32 %a) nounwind {
+; RV32I-LABEL: gorc16_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srli a2, a0, 16
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc16_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc16 a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc16_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc16 a0, a0
+; RV32IBP-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = lshr i32 %a, 16
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc16_i64(i64 %a) nounwind {
+; RV32I-LABEL: gorc16_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a1, 16
+; RV32I-NEXT:    slli a3, a0, 16
+; RV32I-NEXT:    srli a4, a0, 16
+; RV32I-NEXT:    srli a5, a1, 16
+; RV32I-NEXT:    or a1, a5, a1
+; RV32I-NEXT:    or a0, a4, a0
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: gorc16_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orc16 a0, a0
+; RV32IB-NEXT:    orc16 a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: gorc16_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orc16 a0, a0
+; RV32IBP-NEXT:    orc16 a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 16
+  %shl = and i64 %and, -281470681808896
+  %and1 = lshr i64 %a, 16
+  %shr = and i64 %and1, 281470681808895
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i32 @grev1_i32(i32 %a) nounwind {
+; RV32I-LABEL: grev1_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 1
+; RV32I-NEXT:    lui a2, 699051
+; RV32I-NEXT:    addi a2, a2, -1366
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    lui a2, 349525
+; RV32I-NEXT:    addi a2, a2, 1365
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev1_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev.p a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev1_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev.p a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 1
+  %shl = and i32 %and, -1431655766
+  %and1 = lshr i32 %a, 1
+  %shr = and i32 %and1, 1431655765
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev1_i64(i64 %a) nounwind {
+; RV32I-LABEL: grev1_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    slli a3, a1, 1
+; RV32I-NEXT:    lui a4, 699051
+; RV32I-NEXT:    addi a4, a4, -1366
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    lui a4, 349525
+; RV32I-NEXT:    addi a4, a4, 1365
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev1_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev.p a0, a0
+; RV32IB-NEXT:    rev.p a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev1_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev.p a0, a0
+; RV32IBP-NEXT:    rev.p a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 1
+  %shl = and i64 %and, -6148914691236517206
+  %and1 = lshr i64 %a, 1
+  %shr = and i64 %and1, 6148914691236517205
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i32 @grev2_i32(i32 %a) nounwind {
+; RV32I-LABEL: grev2_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 2
+; RV32I-NEXT:    lui a2, 838861
+; RV32I-NEXT:    addi a2, a2, -820
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi a2, a2, 819
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev2_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev2.n a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev2_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev2.n a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 2
+  %shl = and i32 %and, -858993460
+  %and1 = lshr i32 %a, 2
+  %shr = and i32 %and1, 858993459
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev2_i64(i64 %a) nounwind {
+; RV32I-LABEL: grev2_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    slli a3, a1, 2
+; RV32I-NEXT:    lui a4, 838861
+; RV32I-NEXT:    addi a4, a4, -820
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    srli a1, a1, 2
+; RV32I-NEXT:    lui a4, 209715
+; RV32I-NEXT:    addi a4, a4, 819
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev2_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev2.n a0, a0
+; RV32IB-NEXT:    rev2.n a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev2_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev2.n a0, a0
+; RV32IBP-NEXT:    rev2.n a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 2
+  %shl = and i64 %and, -3689348814741910324
+  %and1 = lshr i64 %a, 2
+  %shr = and i64 %and1, 3689348814741910323
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i32 @grev4_i32(i32 %a) nounwind {
+; RV32I-LABEL: grev4_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 4
+; RV32I-NEXT:    lui a2, 986895
+; RV32I-NEXT:    addi a2, a2, 240
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    addi a2, a2, -241
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev4_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev4.b a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev4_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev4.b a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 4
+  %shl = and i32 %and, -252645136
+  %and1 = lshr i32 %a, 4
+  %shr = and i32 %and1, 252645135
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev4_i64(i64 %a) nounwind {
+; RV32I-LABEL: grev4_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 4
+; RV32I-NEXT:    slli a3, a1, 4
+; RV32I-NEXT:    lui a4, 986895
+; RV32I-NEXT:    addi a4, a4, 240
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    srli a1, a1, 4
+; RV32I-NEXT:    lui a4, 61681
+; RV32I-NEXT:    addi a4, a4, -241
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev4_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev4.b a0, a0
+; RV32IB-NEXT:    rev4.b a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev4_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev4.b a0, a0
+; RV32IBP-NEXT:    rev4.b a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 4
+  %shl = and i64 %and, -1085102592571150096
+  %and1 = lshr i64 %a, 4
+  %shr = and i64 %and1, 1085102592571150095
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i32 @grev8_i32(i32 %a) nounwind {
+; RV32I-LABEL: grev8_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 8
+; RV32I-NEXT:    lui a2, 1044496
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a0, a0, 8
+; RV32I-NEXT:    lui a2, 4080
+; RV32I-NEXT:    addi a2, a2, 255
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev8_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev8.h a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev8_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev8.h a0, a0
+; RV32IBP-NEXT:    ret
+  %and = shl i32 %a, 8
+  %shl = and i32 %and, -16711936
+  %and1 = lshr i32 %a, 8
+  %shr = and i32 %and1, 16711935
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev8_i64(i64 %a) nounwind {
+; RV32I-LABEL: grev8_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    slli a3, a1, 8
+; RV32I-NEXT:    lui a4, 1044496
+; RV32I-NEXT:    addi a4, a4, -256
+; RV32I-NEXT:    and a3, a3, a4
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    srli a0, a0, 8
+; RV32I-NEXT:    srli a1, a1, 8
+; RV32I-NEXT:    lui a4, 4080
+; RV32I-NEXT:    addi a4, a4, 255
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev8_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev8.h a0, a0
+; RV32IB-NEXT:    rev8.h a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev8_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev8.h a0, a0
+; RV32IBP-NEXT:    rev8.h a1, a1
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 8
+  %shl = and i64 %and, -71777214294589696
+  %and1 = lshr i64 %a, 8
+  %shr = and i64 %and1, 71777214294589695
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i32 @grev16_i32(i32 %a) nounwind {
+; RV32I-LABEL: grev16_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a0, 16
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev16_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rori a0, a0, 16
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev16_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rori a0, a0, 16
+; RV32IBP-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = lshr i32 %a, 16
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev16_i64(i64 %a) nounwind {
+; RV32I-LABEL: grev16_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a1, 16
+; RV32I-NEXT:    srli a3, a0, 16
+; RV32I-NEXT:    slli a0, a0, 16
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    srli a1, a1, 16
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: grev16_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rori a0, a0, 16
+; RV32IB-NEXT:    rori a1, a1, 16
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: grev16_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rori a0, a0, 16
+; RV32IBP-NEXT:    rori a1, a1, 16
+; RV32IBP-NEXT:    ret
+  %and = shl i64 %a, 16
+  %shl = and i64 %and, -281470681808896
+  %and1 = lshr i64 %a, 16
+  %shr = and i64 %and1, 281470681808895
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define i32 @bswap_i32(i32 %a) nounwind {
+; RV32I-LABEL: bswap_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 24
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: bswap_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev8 a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: bswap_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev8 a0, a0
+; RV32IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV32I-LABEL: bswap_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a1, 8
+; RV32I-NEXT:    lui a3, 16
+; RV32I-NEXT:    addi a3, a3, -256
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    srli a4, a1, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    lui a5, 4080
+; RV32I-NEXT:    and a4, a4, a5
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    or a2, a1, a2
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    and a1, a1, a3
+; RV32I-NEXT:    srli a3, a0, 24
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    slli a3, a0, 8
+; RV32I-NEXT:    and a3, a3, a5
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a3
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: bswap_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev8 a2, a1
+; RV32IB-NEXT:    rev8 a1, a0
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: bswap_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev8 a2, a1
+; RV32IBP-NEXT:    rev8 a1, a0
+; RV32IBP-NEXT:    mv a0, a2
+; RV32IBP-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}
+
+declare i32 @llvm.bitreverse.i32(i32)
+
+define i32 @bitreverse_i32(i32 %a) nounwind {
+; RV32I-LABEL: bitreverse_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a2, a0, 24
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    lui a1, 61681
+; RV32I-NEXT:    addi a1, a1, -241
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    lui a2, 986895
+; RV32I-NEXT:    addi a2, a2, 240
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    lui a1, 209715
+; RV32I-NEXT:    addi a1, a1, 819
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    lui a2, 838861
+; RV32I-NEXT:    addi a2, a2, -820
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    lui a1, 349525
+; RV32I-NEXT:    addi a1, a1, 1365
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    lui a2, 699051
+; RV32I-NEXT:    addi a2, a2, -1366
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: bitreverse_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: bitreverse_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev a0, a0
+; RV32IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.bitreverse.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.bitreverse.i64(i64)
+
+define i64 @bitreverse_i64(i64 %a) nounwind {
+; RV32I-LABEL: bitreverse_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a2, a1, 8
+; RV32I-NEXT:    lui a3, 16
+; RV32I-NEXT:    addi t0, a3, -256
+; RV32I-NEXT:    and a2, a2, t0
+; RV32I-NEXT:    srli a4, a1, 24
+; RV32I-NEXT:    or a2, a2, a4
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    lui a6, 4080
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    slli a1, a1, 24
+; RV32I-NEXT:    or a1, a1, a4
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    lui a2, 61681
+; RV32I-NEXT:    addi t1, a2, -241
+; RV32I-NEXT:    and a2, a1, t1
+; RV32I-NEXT:    slli a2, a2, 4
+; RV32I-NEXT:    lui a5, 986895
+; RV32I-NEXT:    addi t2, a5, 240
+; RV32I-NEXT:    and a1, a1, t2
+; RV32I-NEXT:    srli a1, a1, 4
+; RV32I-NEXT:    or a1, a1, a2
+; RV32I-NEXT:    lui a2, 209715
+; RV32I-NEXT:    addi t3, a2, 819
+; RV32I-NEXT:    and a3, a1, t3
+; RV32I-NEXT:    slli a3, a3, 2
+; RV32I-NEXT:    lui a4, 838861
+; RV32I-NEXT:    addi a4, a4, -820
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    srli a1, a1, 2
+; RV32I-NEXT:    or a1, a1, a3
+; RV32I-NEXT:    lui a3, 349525
+; RV32I-NEXT:    addi a3, a3, 1365
+; RV32I-NEXT:    and a5, a1, a3
+; RV32I-NEXT:    slli a5, a5, 1
+; RV32I-NEXT:    lui a2, 699051
+; RV32I-NEXT:    addi a2, a2, -1366
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    or a7, a1, a5
+; RV32I-NEXT:    srli a1, a0, 8
+; RV32I-NEXT:    and a1, a1, t0
+; RV32I-NEXT:    srli a5, a0, 24
+; RV32I-NEXT:    or a1, a1, a5
+; RV32I-NEXT:    slli a5, a0, 8
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    slli a0, a0, 24
+; RV32I-NEXT:    or a0, a0, a5
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    and a1, a0, t1
+; RV32I-NEXT:    slli a1, a1, 4
+; RV32I-NEXT:    and a0, a0, t2
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    and a1, a0, t3
+; RV32I-NEXT:    slli a1, a1, 2
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    and a1, a0, a3
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a7
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: bitreverse_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rev a2, a1
+; RV32IB-NEXT:    rev a1, a0
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: bitreverse_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rev a2, a1
+; RV32IBP-NEXT:    rev a1, a0
+; RV32IBP-NEXT:    mv a0, a2
+; RV32IBP-NEXT:    ret
+  %1 = call i64 @llvm.bitreverse.i64(i64 %a)
+  ret i64 %1
+}
+
+define i32 @shfl1_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: shfl1_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 629146
+; RV32I-NEXT:    addi a1, a1, -1639
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a2, a0, 1
+; RV32I-NEXT:    lui a3, 279620
+; RV32I-NEXT:    addi a3, a3, 1092
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    lui a2, 139810
+; RV32I-NEXT:    addi a2, a2, 546
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl1_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip.n a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl1_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip.n a0, a0
+; RV32IBP-NEXT:    ret
+  %and = and i32 %a, -1717986919
+  %shl = shl i32 %a, 1
+  %and1 = and i32 %shl, 1145324612
+  %or = or i32 %and1, %and
+  %shr = lshr i32 %a, 1
+  %and2 = and i32 %shr, 572662306
+  %or3 = or i32 %or, %and2
+  ret i32 %or3
+}
+
+define i64 @shfl1_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: shfl1_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 629146
+; RV32I-NEXT:    addi a2, a2, -1639
+; RV32I-NEXT:    and a6, a0, a2
+; RV32I-NEXT:    and a2, a1, a2
+; RV32I-NEXT:    slli a4, a1, 1
+; RV32I-NEXT:    slli a5, a0, 1
+; RV32I-NEXT:    lui a3, 279620
+; RV32I-NEXT:    addi a3, a3, 1092
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    or a3, a5, a6
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    lui a4, 139810
+; RV32I-NEXT:    addi a4, a4, 546
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl1_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip.n a0, a0
+; RV32IB-NEXT:    zip.n a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl1_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip.n a0, a0
+; RV32IBP-NEXT:    zip.n a1, a1
+; RV32IBP-NEXT:    ret
+  %and = and i64 %a, -7378697629483820647
+  %shl = shl i64 %a, 1
+  %and1 = and i64 %shl, 4919131752989213764
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 1
+  %and2 = and i64 %shr, 2459565876494606882
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i32 @shfl2_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: shfl2_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 801852
+; RV32I-NEXT:    addi a1, a1, 963
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a2, a0, 2
+; RV32I-NEXT:    lui a3, 197379
+; RV32I-NEXT:    addi a3, a3, 48
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    lui a2, 49345
+; RV32I-NEXT:    addi a2, a2, -1012
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl2_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip2.b a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl2_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip2.b a0, a0
+; RV32IBP-NEXT:    ret
+  %and = and i32 %a, -1010580541
+  %shl = shl i32 %a, 2
+  %and1 = and i32 %shl, 808464432
+  %or = or i32 %and1, %and
+  %shr = lshr i32 %a, 2
+  %and2 = and i32 %shr, 202116108
+  %or3 = or i32 %or, %and2
+  ret i32 %or3
+}
+
+define i64 @shfl2_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: shfl2_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 801852
+; RV32I-NEXT:    addi a2, a2, 963
+; RV32I-NEXT:    and a6, a0, a2
+; RV32I-NEXT:    and a2, a1, a2
+; RV32I-NEXT:    slli a4, a1, 2
+; RV32I-NEXT:    slli a5, a0, 2
+; RV32I-NEXT:    lui a3, 197379
+; RV32I-NEXT:    addi a3, a3, 48
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    or a3, a5, a6
+; RV32I-NEXT:    srli a0, a0, 2
+; RV32I-NEXT:    srli a1, a1, 2
+; RV32I-NEXT:    lui a4, 49345
+; RV32I-NEXT:    addi a4, a4, -1012
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl2_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip2.b a0, a0
+; RV32IB-NEXT:    zip2.b a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl2_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip2.b a0, a0
+; RV32IBP-NEXT:    zip2.b a1, a1
+; RV32IBP-NEXT:    ret
+  %and = and i64 %a, -4340410370284600381
+  %shl = shl i64 %a, 2
+  %and1 = and i64 %shl, 3472328296227680304
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 2
+  %and2 = and i64 %shr, 868082074056920076
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i32 @shfl4_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: shfl4_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 983295
+; RV32I-NEXT:    addi a1, a1, 15
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a2, a0, 4
+; RV32I-NEXT:    lui a3, 61441
+; RV32I-NEXT:    addi a3, a3, -256
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    lui a2, 3840
+; RV32I-NEXT:    addi a2, a2, 240
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl4_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip4.h a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl4_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip4.h a0, a0
+; RV32IBP-NEXT:    ret
+  %and = and i32 %a, -267390961
+  %shl = shl i32 %a, 4
+  %and1 = and i32 %shl, 251662080
+  %or = or i32 %and1, %and
+  %shr = lshr i32 %a, 4
+  %and2 = and i32 %shr, 15728880
+  %or3 = or i32 %or, %and2
+  ret i32 %or3
+}
+
+define i64 @shfl4_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: shfl4_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 983295
+; RV32I-NEXT:    addi a2, a2, 15
+; RV32I-NEXT:    and a6, a0, a2
+; RV32I-NEXT:    and a2, a1, a2
+; RV32I-NEXT:    slli a4, a1, 4
+; RV32I-NEXT:    slli a5, a0, 4
+; RV32I-NEXT:    lui a3, 61441
+; RV32I-NEXT:    addi a3, a3, -256
+; RV32I-NEXT:    and a5, a5, a3
+; RV32I-NEXT:    and a3, a4, a3
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    or a3, a5, a6
+; RV32I-NEXT:    srli a0, a0, 4
+; RV32I-NEXT:    srli a1, a1, 4
+; RV32I-NEXT:    lui a4, 3840
+; RV32I-NEXT:    addi a4, a4, 240
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl4_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip4.h a0, a0
+; RV32IB-NEXT:    zip4.h a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl4_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip4.h a0, a0
+; RV32IBP-NEXT:    zip4.h a1, a1
+; RV32IBP-NEXT:    ret
+  %and = and i64 %a, -1148435428713435121
+  %shl = shl i64 %a, 4
+  %and1 = and i64 %shl, 1080880403494997760
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 4
+  %and2 = and i64 %shr, 67555025218437360
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i32 @shfl8_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: shfl8_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a1, 1044480
+; RV32I-NEXT:    addi a1, a1, 255
+; RV32I-NEXT:    and a1, a0, a1
+; RV32I-NEXT:    slli a2, a0, 8
+; RV32I-NEXT:    lui a3, 4080
+; RV32I-NEXT:    and a2, a2, a3
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    srli a0, a0, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl8_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip8 a0, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl8_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip8 a0, a0
+; RV32IBP-NEXT:    ret
+  %and = and i32 %a, -16776961
+  %shl = shl i32 %a, 8
+  %and1 = and i32 %shl, 16711680
+  %or = or i32 %and1, %and
+  %shr = lshr i32 %a, 8
+  %and2 = and i32 %shr, 65280
+  %or3 = or i32 %or, %and2
+  ret i32 %or3
+}
+
+define i64 @shfl8_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: shfl8_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 1044480
+; RV32I-NEXT:    addi a2, a2, 255
+; RV32I-NEXT:    and a3, a0, a2
+; RV32I-NEXT:    and a2, a1, a2
+; RV32I-NEXT:    slli a4, a1, 8
+; RV32I-NEXT:    slli a5, a0, 8
+; RV32I-NEXT:    lui a6, 4080
+; RV32I-NEXT:    and a5, a5, a6
+; RV32I-NEXT:    and a4, a4, a6
+; RV32I-NEXT:    or a2, a4, a2
+; RV32I-NEXT:    or a3, a5, a3
+; RV32I-NEXT:    srli a0, a0, 8
+; RV32I-NEXT:    srli a1, a1, 8
+; RV32I-NEXT:    lui a4, 16
+; RV32I-NEXT:    addi a4, a4, -256
+; RV32I-NEXT:    and a1, a1, a4
+; RV32I-NEXT:    and a0, a0, a4
+; RV32I-NEXT:    or a0, a3, a0
+; RV32I-NEXT:    or a1, a2, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: shfl8_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    zip8 a0, a0
+; RV32IB-NEXT:    zip8 a1, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBP-LABEL: shfl8_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    zip8 a0, a0
+; RV32IBP-NEXT:    zip8 a1, a1
+; RV32IBP-NEXT:    ret
+  %and = and i64 %a, -72056494543077121
+  %shl = shl i64 %a, 8
+  %and1 = and i64 %shl, 71776119077928960
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 8
+  %and2 = and i64 %shr, 280375465148160
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbp.ll
new file mode 100644
index 0000000000000..ae467efaab832
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64Zbp.ll
@@ -0,0 +1,1343 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IBP
+
+define signext i32 @gorc1_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: gorc1_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    lui a2, 171
+; RV64I-NEXT:    addiw a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1366
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 1
+; RV64I-NEXT:    lui a3, 349525
+; RV64I-NEXT:    addiw a3, a3, 1365
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc1_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    gorciw a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc1_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    gorciw a0, a0, 1
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 1
+  %shl = and i32 %and, -1431655766
+  %and1 = lshr i32 %a, 1
+  %shr = and i32 %and1, 1431655765
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc1_i64(i64 %a) nounwind {
+; RV64I-LABEL: gorc1_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    lui a2, 1026731
+; RV64I-NEXT:    addiw a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1366
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 1
+; RV64I-NEXT:    lui a3, 21845
+; RV64I-NEXT:    addiw a3, a3, 1365
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 1365
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 1365
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 1365
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc1_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc.p a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc1_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc.p a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 1
+  %shl = and i64 %and, -6148914691236517206
+  %and1 = lshr i64 %a, 1
+  %shr = and i64 %and1, 6148914691236517205
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define signext i32 @gorc2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: gorc2_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    lui a2, 205
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 2
+; RV64I-NEXT:    lui a3, 209715
+; RV64I-NEXT:    addiw a3, a3, 819
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc2_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    gorciw a0, a0, 2
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc2_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    gorciw a0, a0, 2
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 2
+  %shl = and i32 %and, -858993460
+  %and1 = lshr i32 %a, 2
+  %shr = and i32 %and1, 858993459
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc2_i64(i64 %a) nounwind {
+; RV64I-LABEL: gorc2_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    lui a2, 1035469
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 2
+; RV64I-NEXT:    lui a3, 13107
+; RV64I-NEXT:    addiw a3, a3, 819
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 819
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 819
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 819
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc2_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc2.n a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc2_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc2.n a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 2
+  %shl = and i64 %and, -3689348814741910324
+  %and1 = lshr i64 %a, 2
+  %shr = and i64 %and1, 3689348814741910323
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define signext i32 @gorc4_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: gorc4_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 4
+; RV64I-NEXT:    lui a2, 241
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 4
+; RV64I-NEXT:    lui a3, 61681
+; RV64I-NEXT:    addiw a3, a3, -241
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc4_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    gorciw a0, a0, 4
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc4_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    gorciw a0, a0, 4
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 4
+  %shl = and i32 %and, -252645136
+  %and1 = lshr i32 %a, 4
+  %shr = and i32 %and1, 252645135
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc4_i64(i64 %a) nounwind {
+; RV64I-LABEL: gorc4_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 4
+; RV64I-NEXT:    lui a2, 1044721
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 4
+; RV64I-NEXT:    lui a3, 3855
+; RV64I-NEXT:    addiw a3, a3, 241
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, -241
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 241
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, -241
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc4_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc4.b a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc4_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc4.b a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 4
+  %shl = and i64 %and, -1085102592571150096
+  %and1 = lshr i64 %a, 4
+  %shr = and i64 %and1, 1085102592571150095
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define signext i32 @gorc8_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: gorc8_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    lui a3, 4080
+; RV64I-NEXT:    addiw a3, a3, 255
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc8_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    gorciw a0, a0, 8
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc8_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    gorciw a0, a0, 8
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 8
+  %shl = and i32 %and, -16711936
+  %and1 = lshr i32 %a, 8
+  %shr = and i32 %and1, 16711935
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc8_i64(i64 %a) nounwind {
+; RV64I-LABEL: gorc8_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    lui a2, 1044496
+; RV64I-NEXT:    addiw a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    lui a3, 4080
+; RV64I-NEXT:    addiw a3, a3, 255
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, 255
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, 255
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc8_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc8.h a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc8_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc8.h a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 8
+  %shl = and i64 %and, -71777214294589696
+  %and1 = lshr i64 %a, 8
+  %shr = and i64 %and1, 71777214294589695
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define signext i32 @gorc16_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: gorc16_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    srliw a2, a0, 16
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc16_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    gorciw a0, a0, 16
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc16_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    gorciw a0, a0, 16
+; RV64IBP-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = lshr i32 %a, 16
+  %or = or i32 %shr, %a
+  %or2 = or i32 %or, %shl
+  ret i32 %or2
+}
+
+define i64 @gorc16_i64(i64 %a) nounwind {
+; RV64I-LABEL: gorc16_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    lui a2, 1048560
+; RV64I-NEXT:    addiw a2, a2, 1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 16
+; RV64I-NEXT:    lui a3, 16
+; RV64I-NEXT:    addiw a3, a3, -1
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, 1
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, -1
+; RV64I-NEXT:    and a2, a2, a3
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc16_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc16.w a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc16_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc16.w a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 16
+  %shl = and i64 %and, -281470681808896
+  %and1 = lshr i64 %a, 16
+  %shr = and i64 %and1, 281470681808895
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define i64 @gorc32(i64 %a) nounwind {
+; RV64I-LABEL: gorc32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    srli a2, a0, 32
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: gorc32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orc32 a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: gorc32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orc32 a0, a0
+; RV64IBP-NEXT:    ret
+  %shl = shl i64 %a, 32
+  %shr = lshr i64 %a, 32
+  %or = or i64 %shr, %a
+  %or2 = or i64 %or, %shl
+  ret i64 %or2
+}
+
+define signext i32 @grev1_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: grev1_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    lui a2, 171
+; RV64I-NEXT:    addiw a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1366
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    lui a2, 349525
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev1_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev1_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 1
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 1
+  %shl = and i32 %and, -1431655766
+  %and1 = lshr i32 %a, 1
+  %shr = and i32 %and1, 1431655765
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev1_i64(i64 %a) nounwind {
+; RV64I-LABEL: grev1_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 1
+; RV64I-NEXT:    lui a2, 1026731
+; RV64I-NEXT:    addiw a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1366
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    lui a2, 21845
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 1365
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev1_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev.p a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev1_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev.p a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 1
+  %shl = and i64 %and, -6148914691236517206
+  %and1 = lshr i64 %a, 1
+  %shr = and i64 %and1, 6148914691236517205
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define signext i32 @grev2_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: grev2_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    lui a2, 205
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    lui a2, 209715
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev2_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 2
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev2_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 2
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 2
+  %shl = and i32 %and, -858993460
+  %and1 = lshr i32 %a, 2
+  %shr = and i32 %and1, 858993459
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev2_i64(i64 %a) nounwind {
+; RV64I-LABEL: grev2_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 2
+; RV64I-NEXT:    lui a2, 1035469
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    lui a2, 13107
+; RV64I-NEXT:    addiw a2, a2, 819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 819
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev2_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev2.n a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev2_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev2.n a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 2
+  %shl = and i64 %and, -3689348814741910324
+  %and1 = lshr i64 %a, 2
+  %shr = and i64 %and1, 3689348814741910323
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define signext i32 @grev4_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: grev4_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 4
+; RV64I-NEXT:    lui a2, 241
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 4
+; RV64I-NEXT:    lui a2, 61681
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev4_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 4
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev4_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 4
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 4
+  %shl = and i32 %and, -252645136
+  %and1 = lshr i32 %a, 4
+  %shr = and i32 %and1, 252645135
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev4_i64(i64 %a) nounwind {
+; RV64I-LABEL: grev4_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 4
+; RV64I-NEXT:    lui a2, 1044721
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 4
+; RV64I-NEXT:    lui a2, 3855
+; RV64I-NEXT:    addiw a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev4_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev4.b a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev4_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev4.b a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 4
+  %shl = and i64 %and, -1085102592571150096
+  %and1 = lshr i64 %a, 4
+  %shr = and i64 %and1, 1085102592571150095
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define signext i32 @grev8_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: grev8_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 8
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    addiw a2, a2, 255
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev8_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 8
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev8_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 8
+; RV64IBP-NEXT:    ret
+  %and = shl i32 %a, 8
+  %shl = and i32 %and, -16711936
+  %and1 = lshr i32 %a, 8
+  %shr = and i32 %and1, 16711935
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev8_i64(i64 %a) nounwind {
+; RV64I-LABEL: grev8_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    lui a2, 1044496
+; RV64I-NEXT:    addiw a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 8
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    addiw a2, a2, 255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, 255
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, 255
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev8_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev8.h a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev8_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev8.h a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 8
+  %shl = and i64 %and, -71777214294589696
+  %and1 = lshr i64 %a, 8
+  %shr = and i64 %and1, 71777214294589695
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define signext i32 @grev16_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: grev16_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    srliw a0, a0, 16
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev16_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 16
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev16_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 16
+; RV64IBP-NEXT:    ret
+  %shl = shl i32 %a, 16
+  %shr = lshr i32 %a, 16
+  %or = or i32 %shl, %shr
+  ret i32 %or
+}
+
+define i64 @grev16_i64(i64 %a) nounwind {
+; RV64I-LABEL: grev16_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 16
+; RV64I-NEXT:    lui a2, 1048560
+; RV64I-NEXT:    addiw a2, a2, 1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a0, a0, 16
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, 1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev16_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev16.w a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev16_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev16.w a0, a0
+; RV64IBP-NEXT:    ret
+  %and = shl i64 %a, 16
+  %shl = and i64 %and, -281470681808896
+  %and1 = lshr i64 %a, 16
+  %shr = and i64 %and1, 281470681808895
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+define i64 @grev32(i64 %a) nounwind {
+; RV64I-LABEL: grev32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: grev32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rori a0, a0, 32
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: grev32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rori a0, a0, 32
+; RV64IBP-NEXT:    ret
+  %shl = shl i64 %a, 32
+  %shr = lshr i64 %a, 32
+  %or = or i64 %shl, %shr
+  ret i64 %or
+}
+
+declare i32 @llvm.bswap.i32(i32)
+
+define signext i32 @bswap_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: bswap_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a1, a0, 8
+; RV64I-NEXT:    addi a2, zero, 255
+; RV64I-NEXT:    slli a3, a2, 32
+; RV64I-NEXT:    and a1, a1, a3
+; RV64I-NEXT:    slli a3, a0, 24
+; RV64I-NEXT:    slli a4, a2, 40
+; RV64I-NEXT:    and a3, a3, a4
+; RV64I-NEXT:    or a1, a3, a1
+; RV64I-NEXT:    slli a3, a0, 40
+; RV64I-NEXT:    slli a2, a2, 48
+; RV64I-NEXT:    and a2, a3, a2
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: bswap_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 24
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: bswap_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 24
+; RV64IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.bswap.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.bswap.i64(i64)
+
+define i64 @bswap_i64(i64 %a) {
+; RV64I-LABEL: bswap_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 24
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    addi a3, zero, 255
+; RV64I-NEXT:    slli a4, a3, 24
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a2, a0, 40
+; RV64I-NEXT:    lui a4, 16
+; RV64I-NEXT:    addiw a4, a4, -256
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    srli a4, a0, 56
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    slli a4, a0, 24
+; RV64I-NEXT:    slli a5, a3, 40
+; RV64I-NEXT:    and a4, a4, a5
+; RV64I-NEXT:    or a2, a4, a2
+; RV64I-NEXT:    slli a4, a0, 40
+; RV64I-NEXT:    slli a3, a3, 48
+; RV64I-NEXT:    and a3, a4, a3
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: bswap_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev8 a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: bswap_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev8 a0, a0
+; RV64IBP-NEXT:    ret
+  %1 = call i64 @llvm.bswap.i64(i64 %a)
+  ret i64 %1
+}
+
+declare i32 @llvm.bitreverse.i32(i32)
+
+define signext i32 @bitreverse_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: bitreverse_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 24
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    addi a3, zero, 255
+; RV64I-NEXT:    slli a4, a3, 24
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a2, a0, 40
+; RV64I-NEXT:    lui a4, 16
+; RV64I-NEXT:    addiw a4, a4, -256
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    srli a4, a0, 56
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    slli a4, a0, 24
+; RV64I-NEXT:    slli a5, a3, 40
+; RV64I-NEXT:    and a4, a4, a5
+; RV64I-NEXT:    or a2, a4, a2
+; RV64I-NEXT:    slli a4, a0, 40
+; RV64I-NEXT:    slli a3, a3, 48
+; RV64I-NEXT:    and a3, a4, a3
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 3855
+; RV64I-NEXT:    addiw a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 4
+; RV64I-NEXT:    lui a2, 1044721
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 13107
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    lui a2, 1035469
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 349525
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    lui a2, 873813
+; RV64I-NEXT:    addiw a2, a2, 1365
+; RV64I-NEXT:    slli a2, a2, 33
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    srai a0, a0, 32
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: bitreverse_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    greviw a0, a0, 31
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: bitreverse_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    greviw a0, a0, 31
+; RV64IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.bitreverse.i32(i32 %a)
+  ret i32 %1
+}
+
+declare i64 @llvm.bitreverse.i64(i64)
+
+define i64 @bitreverse_i64(i64 %a) nounwind {
+; RV64I-LABEL: bitreverse_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 24
+; RV64I-NEXT:    lui a2, 4080
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    srli a2, a0, 8
+; RV64I-NEXT:    addi a3, zero, 255
+; RV64I-NEXT:    slli a4, a3, 24
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a2, a0, 40
+; RV64I-NEXT:    lui a4, 16
+; RV64I-NEXT:    addiw a4, a4, -256
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    srli a4, a0, 56
+; RV64I-NEXT:    or a2, a2, a4
+; RV64I-NEXT:    or a1, a1, a2
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    slli a4, a0, 24
+; RV64I-NEXT:    slli a5, a3, 40
+; RV64I-NEXT:    and a4, a4, a5
+; RV64I-NEXT:    or a2, a4, a2
+; RV64I-NEXT:    slli a4, a0, 40
+; RV64I-NEXT:    slli a3, a3, 48
+; RV64I-NEXT:    and a3, a4, a3
+; RV64I-NEXT:    slli a0, a0, 56
+; RV64I-NEXT:    or a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 3855
+; RV64I-NEXT:    addiw a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 4
+; RV64I-NEXT:    lui a2, 1044721
+; RV64I-NEXT:    addiw a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -241
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 4
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 13107
+; RV64I-NEXT:    addiw a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 819
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 2
+; RV64I-NEXT:    lui a2, 1035469
+; RV64I-NEXT:    addiw a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -819
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -820
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    lui a1, 21845
+; RV64I-NEXT:    addiw a1, a1, 1365
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 1365
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 1365
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 1365
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a1, a1, 1
+; RV64I-NEXT:    lui a2, 1026731
+; RV64I-NEXT:    addiw a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1365
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1366
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: bitreverse_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rev a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: bitreverse_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rev a0, a0
+; RV64IBP-NEXT:    ret
+  %1 = call i64 @llvm.bitreverse.i64(i64 %a)
+  ret i64 %1
+}
+
+; There's no [un]shfliw instruction as slliu.w occupies the encoding slot that
+; would be occupied by shfliw.
+
+define i64 @shfl1_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: shfl1_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1035469
+; RV64I-NEXT:    addiw a1, a1, -819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -819
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -819
+; RV64I-NEXT:    slli a1, a1, 13
+; RV64I-NEXT:    addi a1, a1, -1639
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a2, a0, 1
+; RV64I-NEXT:    lui a3, 4369
+; RV64I-NEXT:    addiw a3, a3, 273
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 273
+; RV64I-NEXT:    slli a3, a3, 12
+; RV64I-NEXT:    addi a3, a3, 273
+; RV64I-NEXT:    slli a4, a3, 14
+; RV64I-NEXT:    addi a4, a4, 1092
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a0, a0, 1
+; RV64I-NEXT:    slli a2, a3, 13
+; RV64I-NEXT:    addi a2, a2, 546
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: shfl1_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    zip.n a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: shfl1_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    zip.n a0, a0
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, -7378697629483820647
+  %shl = shl i64 %a, 1
+  %and1 = and i64 %shl, 4919131752989213764
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 1
+  %and2 = and i64 %shr, 2459565876494606882
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i64 @shfl2_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: shfl2_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1044721
+; RV64I-NEXT:    addiw a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 241
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, -241
+; RV64I-NEXT:    slli a1, a1, 14
+; RV64I-NEXT:    addi a1, a1, 963
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a2, a0, 2
+; RV64I-NEXT:    lui a3, 48
+; RV64I-NEXT:    addiw a3, a3, 771
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, 771
+; RV64I-NEXT:    slli a4, a3, 16
+; RV64I-NEXT:    addi a4, a4, 771
+; RV64I-NEXT:    slli a4, a4, 12
+; RV64I-NEXT:    addi a4, a4, 48
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a0, a0, 2
+; RV64I-NEXT:    slli a2, a3, 14
+; RV64I-NEXT:    addi a2, a2, 193
+; RV64I-NEXT:    slli a2, a2, 12
+; RV64I-NEXT:    addi a2, a2, -1012
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: shfl2_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    zip2.b a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: shfl2_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    zip2.b a0, a0
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, -4340410370284600381
+  %shl = shl i64 %a, 2
+  %and1 = and i64 %shl, 3472328296227680304
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 2
+  %and2 = and i64 %shr, 868082074056920076
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i64 @shfl4_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: shfl4_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1048560
+; RV64I-NEXT:    addiw a1, a1, 255
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 255
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, 255
+; RV64I-NEXT:    slli a1, a1, 12
+; RV64I-NEXT:    addi a1, a1, 15
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a2, a0, 4
+; RV64I-NEXT:    lui a3, 240
+; RV64I-NEXT:    addiw a3, a3, 15
+; RV64I-NEXT:    slli a3, a3, 16
+; RV64I-NEXT:    addi a3, a3, 15
+; RV64I-NEXT:    slli a4, a3, 12
+; RV64I-NEXT:    addi a4, a4, 1
+; RV64I-NEXT:    slli a4, a4, 12
+; RV64I-NEXT:    addi a4, a4, -256
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a0, a0, 4
+; RV64I-NEXT:    slli a2, a3, 20
+; RV64I-NEXT:    addi a2, a2, 240
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: shfl4_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    zip4.h a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: shfl4_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    zip4.h a0, a0
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, -1148435428713435121
+  %shl = shl i64 %a, 4
+  %and1 = and i64 %shl, 1080880403494997760
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 4
+  %and2 = and i64 %shr, 67555025218437360
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i64 @shfl8_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: shfl8_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a1, 1048560
+; RV64I-NEXT:    addiw a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    slli a1, a1, 24
+; RV64I-NEXT:    addi a1, a1, 255
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a2, a0, 8
+; RV64I-NEXT:    addi a3, zero, 255
+; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    addi a4, a4, 255
+; RV64I-NEXT:    slli a4, a4, 16
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a0, a0, 8
+; RV64I-NEXT:    slli a2, a3, 24
+; RV64I-NEXT:    addi a2, a2, 1
+; RV64I-NEXT:    slli a2, a2, 16
+; RV64I-NEXT:    addi a2, a2, -256
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: shfl8_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    zip8.w a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: shfl8_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    zip8.w a0, a0
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, -72056494543077121
+  %shl = shl i64 %a, 8
+  %and1 = and i64 %shl, 71776119077928960
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 8
+  %and2 = and i64 %shr, 280375465148160
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}
+
+define i64 @shfl16(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: shfl16:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a1, zero, -1
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    addi a1, a1, 1
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    addi a1, a1, -1
+; RV64I-NEXT:    and a1, a0, a1
+; RV64I-NEXT:    slli a2, a0, 16
+; RV64I-NEXT:    lui a3, 16
+; RV64I-NEXT:    addiw a3, a3, -1
+; RV64I-NEXT:    slli a4, a3, 32
+; RV64I-NEXT:    and a2, a2, a4
+; RV64I-NEXT:    or a1, a2, a1
+; RV64I-NEXT:    srli a0, a0, 16
+; RV64I-NEXT:    slli a2, a3, 16
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: shfl16:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    zip16 a0, a0
+; RV64IB-NEXT:    ret
+;
+; RV64IBP-LABEL: shfl16:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    zip16 a0, a0
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, -281474976645121
+  %shl = shl i64 %a, 16
+  %and1 = and i64 %shl, 281470681743360
+  %or = or i64 %and1, %and
+  %shr = lshr i64 %a, 16
+  %and2 = and i64 %shr, 4294901760
+  %or3 = or i64 %or, %and2
+  ret i64 %or3
+}

From 6144f0a1e52e7f5439a67267ca65f2d72c21aaa6 Mon Sep 17 00:00:00 2001
From: lewis-revill <lewis.revill@embecosm.com>
Date: Wed, 15 Jul 2020 11:55:44 +0100
Subject: [PATCH 372/771] [RISCV] Add matching of codegen patterns to RISCV Bit
 Manipulation Zbbp asm instructions

This patch provides optimization of bit manipulation operations by
enabling the +experimental-b target feature.
It adds matching of single block patterns of instructions to specific
bit-manip instructions belonging to both the permutation and the base
subsets of the experimental B extension of RISC-V.
It adds also the correspondent codegen tests.

This patch is based on Claire Wolf's proposal for the bit manipulation
extension of RISCV:
https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf

Differential Revision: https://reviews.llvm.org/D79873
---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp |  85 ++
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h   |   2 +
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |   6 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoB.td    |  71 ++
 llvm/test/CodeGen/RISCV/rv32Zbbp.ll         | 892 ++++++++++++++++++++
 llvm/test/CodeGen/RISCV/rv64Zbbp.ll         | 517 ++++++++++++
 6 files changed, 1571 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/RISCV/rv32Zbbp.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64Zbbp.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 99e5135b424f8..fd1a91f688029 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -272,6 +272,44 @@ bool RISCVDAGToDAGISel::SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt) {
   return false;
 }
 
+// Check that it is a RORI (Rotate Right Immediate). We first check that
+// it is the right node tree:
+//
+//  (ROTL RS1, VC)
+//
+// The compiler translates immediate rotations to the right given by the call
+// to the rotateright32/rotateright64 intrinsics as rotations to the left.
+// Since the rotation to the left can be easily emulated as a rotation to the
+// right by negating the constant, there is no encoding for ROLI.
+// We then select the immediate left rotations as RORI by the complementary
+// constant:
+//
+//  Shamt == XLen - VC
+
+bool RISCVDAGToDAGISel::SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt) {
+  MVT XLenVT = Subtarget->getXLenVT();
+  if (N.getOpcode() == ISD::ROTL) {
+    if (isa<ConstantSDNode>(N.getOperand(1))) {
+      if (XLenVT == MVT::i64) {
+        uint64_t VC = N.getConstantOperandVal(1);
+        Shamt = CurDAG->getTargetConstant((64 - VC), SDLoc(N),
+                                          N.getOperand(1).getValueType());
+        RS1 = N.getOperand(0);
+        return true;
+      }
+      if (XLenVT == MVT::i32) {
+        uint32_t VC = N.getConstantOperandVal(1);
+        Shamt = CurDAG->getTargetConstant((32 - VC), SDLoc(N),
+                                          N.getOperand(1).getValueType());
+        RS1 = N.getOperand(0);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+
 // Check that it is a SLLIUW (Shift Logical Left Immediate Unsigned i32
 // on RV64).
 // SLLIUW is the same as SLLI except for the fact that it clears the bits
@@ -374,6 +412,53 @@ bool RISCVDAGToDAGISel::SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
   return false;
 }
 
+// Check that it is a RORIW (i32 Right Rotate Immediate on RV64).
+// We first check that it is the right node tree:
+//
+//  (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2),
+//                         (SRL (AND (AssertSext RS2, i32), VC3), VC1)))
+//
+// Then we check that the constant operands respect these constraints:
+//
+// VC2 == 32 - VC1
+// VC3 == maskLeadingOnes<uint32_t>(VC2)
+//
+// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
+// and VC3 a 32 bit mask of (32 - VC1) leading ones.
+
+bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
+  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      Subtarget->getXLenVT() == MVT::i64 &&
+      cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+    if (N.getOperand(0).getOpcode() == ISD::OR) {
+      SDValue Or = N.getOperand(0);
+      if (Or.getOperand(0).getOpcode() == ISD::SHL &&
+          Or.getOperand(1).getOpcode() == ISD::SRL) {
+        SDValue Shl = Or.getOperand(0);
+        SDValue Srl = Or.getOperand(1);
+        if (Srl.getOperand(0).getOpcode() == ISD::AND) {
+          SDValue And = Srl.getOperand(0);
+          if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
+              isa<ConstantSDNode>(Shl.getOperand(1)) &&
+              isa<ConstantSDNode>(And.getOperand(1))) {
+            uint32_t VC1 = Srl.getConstantOperandVal(1);
+            uint32_t VC2 = Shl.getConstantOperandVal(1);
+            uint32_t VC3 = And.getConstantOperandVal(1);
+            if (VC2 == (32 - VC1) &&
+                VC3 == maskLeadingOnes<uint32_t>(VC2)) {
+              RS1 = Shl.getOperand(0);
+              Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
+                                              Srl.getOperand(1).getValueType());
+              return true;
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
 // Merge an ADDI into the offset of a load/store instruction where possible.
 // (load (addi base, off1), off2) -> (load base, off1+off2)
 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index 4e382ee585002..bc1655b673d76 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -47,9 +47,11 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
 
   bool SelectSLOI(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSROI(SDValue N, SDValue &RS1, SDValue &Shamt);
+  bool SelectRORI(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSLLIUW(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
+  bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
 
 // Include the pieces autogenerated from the target description.
 #include "RISCVGenDAGISel.inc"
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c89bb21c97016..7cad9f9bd43e3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -149,8 +149,10 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::SRL_PARTS, XLenVT, Custom);
   setOperationAction(ISD::SRA_PARTS, XLenVT, Custom);
 
-  setOperationAction(ISD::ROTL, XLenVT, Expand);
-  setOperationAction(ISD::ROTR, XLenVT, Expand);
+  if (!(Subtarget.hasStdExtZbb() || Subtarget.hasStdExtZbp())) {
+    setOperationAction(ISD::ROTL, XLenVT, Expand);
+    setOperationAction(ISD::ROTR, XLenVT, Expand);
+  }
 
   if (!Subtarget.hasStdExtZbp())
     setOperationAction(ISD::BSWAP, XLenVT, Expand);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 09d5f1ef856a1..45eb41f93b2e8 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -638,21 +638,46 @@ def : CompressPat<(PACK GPRC:$rs1, GPRC:$rs1, X0),
 //===----------------------------------------------------------------------===//
 def SLOIPat   : ComplexPattern<XLenVT, 2, "SelectSLOI", [or]>;
 def SROIPat   : ComplexPattern<XLenVT, 2, "SelectSROI", [or]>;
+def RORIPat   : ComplexPattern<XLenVT, 2, "SelectRORI", [rotl]>;
 def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
 def SLOIWPat  : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
 def SROIWPat  : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
+def RORIWPat  : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or  GPR:$rs1, (not GPR:$rs2)), (ORN  GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor GPR:$rs1, (not GPR:$rs2)), (XNOR GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp]
 
 let Predicates = [HasStdExtZbb] in {
 def : Pat<(xor (shl (xor GPR:$rs1, -1), GPR:$rs2), -1),
           (SLO GPR:$rs1, GPR:$rs2)>;
 def : Pat<(xor (srl (xor GPR:$rs1, -1), GPR:$rs2), -1),
           (SRO GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbb]
+
+let Predicates = [HasStdExtZbbOrZbp] in {
+def : Pat<(rotl GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
+def : Pat<(fshl GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROL GPR:$rs1, GPR:$rs2)>;
+def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
+def : Pat<(fshr GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp]
+
+let Predicates = [HasStdExtZbb] in {
 def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
 def : Pat<(SROIPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SROI GPR:$rs1, uimmlog2xlen:$shamt)>;
 } // Predicates = [HasStdExtZbb]
 
+// There's no encoding for roli in the current version of the 'B' extension
+// (v0.92) as it can be implemented with rori by negating the immediate.
+// For this reason we pattern-match only against rori[w].
+let Predicates = [HasStdExtZbbOrZbp] in
+def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt),
+          (RORI GPR:$rs1, uimmlog2xlen:$shamt)>;
+
 let Predicates = [HasStdExtZbp, IsRV32] in {
 def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1),
               (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))),
@@ -772,6 +797,23 @@ def : Pat<(riscv_selectcc GPR:$rs2, GPR:$rs1, (XLenVT 12), GPR:$rs1, GPR:$rs2),
           (MAXU  GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZbb]
 
+let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
+def : Pat<(or (and GPR:$rs1, 0x0000FFFF), (shl GPR:$rs2, (i32 16))),
+          (PACK GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+def : Pat<(or (and GPR:$rs1, 0x00000000FFFFFFFF), (shl GPR:$rs2, (i64 32))),
+          (PACK GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV32] in
+def : Pat<(or (and GPR:$rs2, 0xFFFF0000), (srl GPR:$rs1, (i32 16))),
+          (PACKU GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+def : Pat<(or (and GPR:$rs2, 0xFFFFFFFF00000000), (srl GPR:$rs1, (i64 32))),
+          (PACKU GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbbOrZbp] in
+def : Pat<(or (and (shl GPR:$rs2, (XLenVT 8)), 0xFF00),
+              (and GPR:$rs1, 0x00FF)),
+          (PACKH GPR:$rs1, GPR:$rs2)>;
+
 let Predicates = [HasStdExtZbp, IsRV32] in {
 def : Pat<(or (or (and (shl GPR:$rs1, (i32 8)), (i32 0x00FF0000)),
                   (and GPR:$rs1, (i32 0xFF0000FF))),
@@ -831,12 +873,30 @@ def : Pat<(xor (riscv_sllw (xor GPR:$rs1, -1), GPR:$rs2), -1),
           (SLOW GPR:$rs1, GPR:$rs2)>;
 def : Pat<(xor (riscv_srlw (xor GPR:$rs1, -1), GPR:$rs2), -1),
           (SROW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)),
+              (riscv_srlw (assertsexti32 GPR:$rs1),
+                          (sub (i64 0), (assertsexti32 GPR:$rs2)))),
+          (ROLW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1),
+                          (sub (i64 0), (assertsexti32 GPR:$rs2))),
+              (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2))),
+          (RORW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
+
+let Predicates = [HasStdExtZbb, IsRV64] in {
 def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
 def : Pat<(SROIWPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SROIW GPR:$rs1, uimmlog2xlen:$shamt)>;
 } // Predicates = [HasStdExtZbb, IsRV64]
 
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in
+def : Pat<(RORIWPat GPR:$rs1, uimmlog2xlen:$shamt),
+          (RORIW GPR:$rs1, uimmlog2xlen:$shamt)>;
+
 let Predicates = [HasStdExtZbp, IsRV64] in {
 def : Pat<(sext_inreg (or (or (and (srl GPR:$rs1, (i64 1)), (i64 0x55555555)),
                               GPR:$rs1),
@@ -898,3 +958,14 @@ def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
 // RV64 CTZ
 def : Pat<(ctpop (and GPR:$rs1, (i64 0xFFFFFFFF))), (PCNTW GPR:$rs1)>;
 } // Predicates = [HasStdExtZbb, IsRV64]
+
+let Predicates = [HasStdExtZbbOrZbp, IsRV64] in {
+def : Pat<(sext_inreg (or (shl (assertsexti32 GPR:$rs2), (i64 16)),
+                          (and (assertsexti32 GPR:$rs1), 0x000000000000FFFF)),
+                      i32),
+          (PACKW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (and (assertsexti32 GPR:$rs2), 0xFFFFFFFFFFFF0000),
+              (srl (and (assertsexti32 GPR:$rs1), 0x00000000FFFF0000),
+                   (i64 16))),
+          (PACKUW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbbOrZbp, IsRV64]
diff --git a/llvm/test/CodeGen/RISCV/rv32Zbbp.ll b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll
new file mode 100644
index 0000000000000..0e6288928f0cb
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32Zbbp.ll
@@ -0,0 +1,892 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IBB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IBP
+
+define i32 @andn_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: andn_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: andn_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andn a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: andn_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    andn a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: andn_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    andn a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
+define i64 @andn_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: andn_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a3, a3
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    and a0, a2, a0
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: andn_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andn a0, a0, a2
+; RV32IB-NEXT:    andn a1, a1, a3
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: andn_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    andn a0, a0, a2
+; RV32IBB-NEXT:    andn a1, a1, a3
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: andn_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    andn a0, a0, a2
+; RV32IBP-NEXT:    andn a1, a1, a3
+; RV32IBP-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
+define i32 @orn_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: orn_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: orn_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orn a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: orn_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    orn a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: orn_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orn a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %or = or i32 %neg, %a
+  ret i32 %or
+}
+
+define i64 @orn_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: orn_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    not a3, a3
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: orn_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    orn a0, a0, a2
+; RV32IB-NEXT:    orn a1, a1, a3
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: orn_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    orn a0, a0, a2
+; RV32IBB-NEXT:    orn a1, a1, a3
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: orn_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    orn a0, a0, a2
+; RV32IBP-NEXT:    orn a1, a1, a3
+; RV32IBP-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %or = or i64 %neg, %a
+  ret i64 %or
+}
+
+define i32 @xnor_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: xnor_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    xor a0, a0, a1
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: xnor_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    xnor a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: xnor_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    xnor a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: xnor_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    xnor a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %xor = xor i32 %neg, %b
+  ret i32 %xor
+}
+
+define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: xnor_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    xor a1, a1, a3
+; RV32I-NEXT:    xor a0, a0, a2
+; RV32I-NEXT:    not a0, a0
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: xnor_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    xnor a0, a0, a2
+; RV32IB-NEXT:    xnor a1, a1, a3
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: xnor_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    xnor a0, a0, a2
+; RV32IBB-NEXT:    xnor a1, a1, a3
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: xnor_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    xnor a0, a0, a2
+; RV32IBP-NEXT:    xnor a1, a1, a3
+; RV32IBP-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %xor = xor i64 %neg, %b
+  ret i64 %xor
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define i32 @rol_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: rol_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    sll a2, a0, a1
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    srl a0, a0, a1
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: rol_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rol a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: rol_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    rol a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: rol_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rol a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @rol_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: rol_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a3, a2, 63
+; RV32I-NEXT:    addi t1, a3, -32
+; RV32I-NEXT:    addi a6, zero, 31
+; RV32I-NEXT:    bltz t1, .LBB7_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sll a7, a0, t1
+; RV32I-NEXT:    j .LBB7_3
+; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    sll a4, a1, a2
+; RV32I-NEXT:    sub a3, a6, a3
+; RV32I-NEXT:    srli a5, a0, 1
+; RV32I-NEXT:    srl a3, a5, a3
+; RV32I-NEXT:    or a7, a4, a3
+; RV32I-NEXT:  .LBB7_3:
+; RV32I-NEXT:    neg a4, a2
+; RV32I-NEXT:    andi a5, a4, 63
+; RV32I-NEXT:    addi a3, a5, -32
+; RV32I-NEXT:    bltz a3, .LBB7_7
+; RV32I-NEXT:  # %bb.4:
+; RV32I-NEXT:    mv t0, zero
+; RV32I-NEXT:    bgez a3, .LBB7_8
+; RV32I-NEXT:  .LBB7_5:
+; RV32I-NEXT:    srl a3, a0, a4
+; RV32I-NEXT:    sub a4, a6, a5
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    sll a1, a1, a4
+; RV32I-NEXT:    or a4, a3, a1
+; RV32I-NEXT:    or a1, a7, t0
+; RV32I-NEXT:    bgez t1, .LBB7_9
+; RV32I-NEXT:  .LBB7_6:
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    or a0, a0, a4
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB7_7:
+; RV32I-NEXT:    srl t0, a1, a4
+; RV32I-NEXT:    bltz a3, .LBB7_5
+; RV32I-NEXT:  .LBB7_8:
+; RV32I-NEXT:    srl a4, a1, a3
+; RV32I-NEXT:    or a1, a7, t0
+; RV32I-NEXT:    bltz t1, .LBB7_6
+; RV32I-NEXT:  .LBB7_9:
+; RV32I-NEXT:    or a0, zero, a4
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: rol_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi a3, a2, 63
+; RV32IB-NEXT:    addi t1, a3, -32
+; RV32IB-NEXT:    addi a6, zero, 31
+; RV32IB-NEXT:    bltz t1, .LBB7_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    sll a7, a0, t1
+; RV32IB-NEXT:    j .LBB7_3
+; RV32IB-NEXT:  .LBB7_2:
+; RV32IB-NEXT:    sll a4, a1, a2
+; RV32IB-NEXT:    sub a3, a6, a3
+; RV32IB-NEXT:    srli a5, a0, 1
+; RV32IB-NEXT:    srl a3, a5, a3
+; RV32IB-NEXT:    or a7, a4, a3
+; RV32IB-NEXT:  .LBB7_3:
+; RV32IB-NEXT:    neg a4, a2
+; RV32IB-NEXT:    andi a5, a4, 63
+; RV32IB-NEXT:    addi a3, a5, -32
+; RV32IB-NEXT:    bltz a3, .LBB7_7
+; RV32IB-NEXT:  # %bb.4:
+; RV32IB-NEXT:    mv t0, zero
+; RV32IB-NEXT:    bgez a3, .LBB7_8
+; RV32IB-NEXT:  .LBB7_5:
+; RV32IB-NEXT:    srl a3, a0, a4
+; RV32IB-NEXT:    sub a4, a6, a5
+; RV32IB-NEXT:    slli a1, a1, 1
+; RV32IB-NEXT:    sll a1, a1, a4
+; RV32IB-NEXT:    or a4, a3, a1
+; RV32IB-NEXT:    or a1, a7, t0
+; RV32IB-NEXT:    bgez t1, .LBB7_9
+; RV32IB-NEXT:  .LBB7_6:
+; RV32IB-NEXT:    sll a0, a0, a2
+; RV32IB-NEXT:    or a0, a0, a4
+; RV32IB-NEXT:    ret
+; RV32IB-NEXT:  .LBB7_7:
+; RV32IB-NEXT:    srl t0, a1, a4
+; RV32IB-NEXT:    bltz a3, .LBB7_5
+; RV32IB-NEXT:  .LBB7_8:
+; RV32IB-NEXT:    srl a4, a1, a3
+; RV32IB-NEXT:    or a1, a7, t0
+; RV32IB-NEXT:    bltz t1, .LBB7_6
+; RV32IB-NEXT:  .LBB7_9:
+; RV32IB-NEXT:    or a0, zero, a4
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: rol_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    andi a3, a2, 63
+; RV32IBB-NEXT:    addi t1, a3, -32
+; RV32IBB-NEXT:    addi a6, zero, 31
+; RV32IBB-NEXT:    bltz t1, .LBB7_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    sll a7, a0, t1
+; RV32IBB-NEXT:    j .LBB7_3
+; RV32IBB-NEXT:  .LBB7_2:
+; RV32IBB-NEXT:    sll a4, a1, a2
+; RV32IBB-NEXT:    sub a3, a6, a3
+; RV32IBB-NEXT:    srli a5, a0, 1
+; RV32IBB-NEXT:    srl a3, a5, a3
+; RV32IBB-NEXT:    or a7, a4, a3
+; RV32IBB-NEXT:  .LBB7_3:
+; RV32IBB-NEXT:    neg a4, a2
+; RV32IBB-NEXT:    andi a5, a4, 63
+; RV32IBB-NEXT:    addi a3, a5, -32
+; RV32IBB-NEXT:    bltz a3, .LBB7_7
+; RV32IBB-NEXT:  # %bb.4:
+; RV32IBB-NEXT:    mv t0, zero
+; RV32IBB-NEXT:    bgez a3, .LBB7_8
+; RV32IBB-NEXT:  .LBB7_5:
+; RV32IBB-NEXT:    srl a3, a0, a4
+; RV32IBB-NEXT:    sub a4, a6, a5
+; RV32IBB-NEXT:    slli a1, a1, 1
+; RV32IBB-NEXT:    sll a1, a1, a4
+; RV32IBB-NEXT:    or a4, a3, a1
+; RV32IBB-NEXT:    or a1, a7, t0
+; RV32IBB-NEXT:    bgez t1, .LBB7_9
+; RV32IBB-NEXT:  .LBB7_6:
+; RV32IBB-NEXT:    sll a0, a0, a2
+; RV32IBB-NEXT:    or a0, a0, a4
+; RV32IBB-NEXT:    ret
+; RV32IBB-NEXT:  .LBB7_7:
+; RV32IBB-NEXT:    srl t0, a1, a4
+; RV32IBB-NEXT:    bltz a3, .LBB7_5
+; RV32IBB-NEXT:  .LBB7_8:
+; RV32IBB-NEXT:    srl a4, a1, a3
+; RV32IBB-NEXT:    or a1, a7, t0
+; RV32IBB-NEXT:    bltz t1, .LBB7_6
+; RV32IBB-NEXT:  .LBB7_9:
+; RV32IBB-NEXT:    or a0, zero, a4
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: rol_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    andi a3, a2, 63
+; RV32IBP-NEXT:    addi t1, a3, -32
+; RV32IBP-NEXT:    addi a6, zero, 31
+; RV32IBP-NEXT:    bltz t1, .LBB7_2
+; RV32IBP-NEXT:  # %bb.1:
+; RV32IBP-NEXT:    sll a7, a0, t1
+; RV32IBP-NEXT:    j .LBB7_3
+; RV32IBP-NEXT:  .LBB7_2:
+; RV32IBP-NEXT:    sll a4, a1, a2
+; RV32IBP-NEXT:    sub a3, a6, a3
+; RV32IBP-NEXT:    srli a5, a0, 1
+; RV32IBP-NEXT:    srl a3, a5, a3
+; RV32IBP-NEXT:    or a7, a4, a3
+; RV32IBP-NEXT:  .LBB7_3:
+; RV32IBP-NEXT:    neg a4, a2
+; RV32IBP-NEXT:    andi a5, a4, 63
+; RV32IBP-NEXT:    addi a3, a5, -32
+; RV32IBP-NEXT:    bltz a3, .LBB7_7
+; RV32IBP-NEXT:  # %bb.4:
+; RV32IBP-NEXT:    mv t0, zero
+; RV32IBP-NEXT:    bgez a3, .LBB7_8
+; RV32IBP-NEXT:  .LBB7_5:
+; RV32IBP-NEXT:    srl a3, a0, a4
+; RV32IBP-NEXT:    sub a4, a6, a5
+; RV32IBP-NEXT:    slli a1, a1, 1
+; RV32IBP-NEXT:    sll a1, a1, a4
+; RV32IBP-NEXT:    or a4, a3, a1
+; RV32IBP-NEXT:    or a1, a7, t0
+; RV32IBP-NEXT:    bgez t1, .LBB7_9
+; RV32IBP-NEXT:  .LBB7_6:
+; RV32IBP-NEXT:    sll a0, a0, a2
+; RV32IBP-NEXT:    or a0, a0, a4
+; RV32IBP-NEXT:    ret
+; RV32IBP-NEXT:  .LBB7_7:
+; RV32IBP-NEXT:    srl t0, a1, a4
+; RV32IBP-NEXT:    bltz a3, .LBB7_5
+; RV32IBP-NEXT:  .LBB7_8:
+; RV32IBP-NEXT:    srl a4, a1, a3
+; RV32IBP-NEXT:    or a1, a7, t0
+; RV32IBP-NEXT:    bltz t1, .LBB7_6
+; RV32IBP-NEXT:  .LBB7_9:
+; RV32IBP-NEXT:    or a0, zero, a4
+; RV32IBP-NEXT:    ret
+  %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define i32 @ror_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: ror_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srl a2, a0, a1
+; RV32I-NEXT:    neg a1, a1
+; RV32I-NEXT:    sll a0, a0, a1
+; RV32I-NEXT:    or a0, a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: ror_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    ror a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: ror_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    ror a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: ror_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    ror a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @ror_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: ror_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a3, a2, 63
+; RV32I-NEXT:    addi t1, a3, -32
+; RV32I-NEXT:    addi a6, zero, 31
+; RV32I-NEXT:    bltz t1, .LBB9_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srl a7, a1, t1
+; RV32I-NEXT:    j .LBB9_3
+; RV32I-NEXT:  .LBB9_2:
+; RV32I-NEXT:    srl a4, a0, a2
+; RV32I-NEXT:    sub a3, a6, a3
+; RV32I-NEXT:    slli a5, a1, 1
+; RV32I-NEXT:    sll a3, a5, a3
+; RV32I-NEXT:    or a7, a4, a3
+; RV32I-NEXT:  .LBB9_3:
+; RV32I-NEXT:    neg a4, a2
+; RV32I-NEXT:    andi a5, a4, 63
+; RV32I-NEXT:    addi a3, a5, -32
+; RV32I-NEXT:    bltz a3, .LBB9_7
+; RV32I-NEXT:  # %bb.4:
+; RV32I-NEXT:    mv t0, zero
+; RV32I-NEXT:    bgez a3, .LBB9_8
+; RV32I-NEXT:  .LBB9_5:
+; RV32I-NEXT:    sll a3, a1, a4
+; RV32I-NEXT:    sub a4, a6, a5
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    srl a0, a0, a4
+; RV32I-NEXT:    or a4, a3, a0
+; RV32I-NEXT:    or a0, t0, a7
+; RV32I-NEXT:    bgez t1, .LBB9_9
+; RV32I-NEXT:  .LBB9_6:
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    or a1, a4, a1
+; RV32I-NEXT:    ret
+; RV32I-NEXT:  .LBB9_7:
+; RV32I-NEXT:    sll t0, a0, a4
+; RV32I-NEXT:    bltz a3, .LBB9_5
+; RV32I-NEXT:  .LBB9_8:
+; RV32I-NEXT:    sll a4, a0, a3
+; RV32I-NEXT:    or a0, t0, a7
+; RV32I-NEXT:    bltz t1, .LBB9_6
+; RV32I-NEXT:  .LBB9_9:
+; RV32I-NEXT:    or a1, a4, zero
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: ror_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi a3, a2, 63
+; RV32IB-NEXT:    addi t1, a3, -32
+; RV32IB-NEXT:    addi a6, zero, 31
+; RV32IB-NEXT:    bltz t1, .LBB9_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    srl a7, a1, t1
+; RV32IB-NEXT:    j .LBB9_3
+; RV32IB-NEXT:  .LBB9_2:
+; RV32IB-NEXT:    srl a4, a0, a2
+; RV32IB-NEXT:    sub a3, a6, a3
+; RV32IB-NEXT:    slli a5, a1, 1
+; RV32IB-NEXT:    sll a3, a5, a3
+; RV32IB-NEXT:    or a7, a4, a3
+; RV32IB-NEXT:  .LBB9_3:
+; RV32IB-NEXT:    neg a4, a2
+; RV32IB-NEXT:    andi a5, a4, 63
+; RV32IB-NEXT:    addi a3, a5, -32
+; RV32IB-NEXT:    bltz a3, .LBB9_7
+; RV32IB-NEXT:  # %bb.4:
+; RV32IB-NEXT:    mv t0, zero
+; RV32IB-NEXT:    bgez a3, .LBB9_8
+; RV32IB-NEXT:  .LBB9_5:
+; RV32IB-NEXT:    sll a3, a1, a4
+; RV32IB-NEXT:    sub a4, a6, a5
+; RV32IB-NEXT:    srli a0, a0, 1
+; RV32IB-NEXT:    srl a0, a0, a4
+; RV32IB-NEXT:    or a4, a3, a0
+; RV32IB-NEXT:    or a0, t0, a7
+; RV32IB-NEXT:    bgez t1, .LBB9_9
+; RV32IB-NEXT:  .LBB9_6:
+; RV32IB-NEXT:    srl a1, a1, a2
+; RV32IB-NEXT:    or a1, a4, a1
+; RV32IB-NEXT:    ret
+; RV32IB-NEXT:  .LBB9_7:
+; RV32IB-NEXT:    sll t0, a0, a4
+; RV32IB-NEXT:    bltz a3, .LBB9_5
+; RV32IB-NEXT:  .LBB9_8:
+; RV32IB-NEXT:    sll a4, a0, a3
+; RV32IB-NEXT:    or a0, t0, a7
+; RV32IB-NEXT:    bltz t1, .LBB9_6
+; RV32IB-NEXT:  .LBB9_9:
+; RV32IB-NEXT:    or a1, a4, zero
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: ror_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    andi a3, a2, 63
+; RV32IBB-NEXT:    addi t1, a3, -32
+; RV32IBB-NEXT:    addi a6, zero, 31
+; RV32IBB-NEXT:    bltz t1, .LBB9_2
+; RV32IBB-NEXT:  # %bb.1:
+; RV32IBB-NEXT:    srl a7, a1, t1
+; RV32IBB-NEXT:    j .LBB9_3
+; RV32IBB-NEXT:  .LBB9_2:
+; RV32IBB-NEXT:    srl a4, a0, a2
+; RV32IBB-NEXT:    sub a3, a6, a3
+; RV32IBB-NEXT:    slli a5, a1, 1
+; RV32IBB-NEXT:    sll a3, a5, a3
+; RV32IBB-NEXT:    or a7, a4, a3
+; RV32IBB-NEXT:  .LBB9_3:
+; RV32IBB-NEXT:    neg a4, a2
+; RV32IBB-NEXT:    andi a5, a4, 63
+; RV32IBB-NEXT:    addi a3, a5, -32
+; RV32IBB-NEXT:    bltz a3, .LBB9_7
+; RV32IBB-NEXT:  # %bb.4:
+; RV32IBB-NEXT:    mv t0, zero
+; RV32IBB-NEXT:    bgez a3, .LBB9_8
+; RV32IBB-NEXT:  .LBB9_5:
+; RV32IBB-NEXT:    sll a3, a1, a4
+; RV32IBB-NEXT:    sub a4, a6, a5
+; RV32IBB-NEXT:    srli a0, a0, 1
+; RV32IBB-NEXT:    srl a0, a0, a4
+; RV32IBB-NEXT:    or a4, a3, a0
+; RV32IBB-NEXT:    or a0, t0, a7
+; RV32IBB-NEXT:    bgez t1, .LBB9_9
+; RV32IBB-NEXT:  .LBB9_6:
+; RV32IBB-NEXT:    srl a1, a1, a2
+; RV32IBB-NEXT:    or a1, a4, a1
+; RV32IBB-NEXT:    ret
+; RV32IBB-NEXT:  .LBB9_7:
+; RV32IBB-NEXT:    sll t0, a0, a4
+; RV32IBB-NEXT:    bltz a3, .LBB9_5
+; RV32IBB-NEXT:  .LBB9_8:
+; RV32IBB-NEXT:    sll a4, a0, a3
+; RV32IBB-NEXT:    or a0, t0, a7
+; RV32IBB-NEXT:    bltz t1, .LBB9_6
+; RV32IBB-NEXT:  .LBB9_9:
+; RV32IBB-NEXT:    or a1, a4, zero
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: ror_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    andi a3, a2, 63
+; RV32IBP-NEXT:    addi t1, a3, -32
+; RV32IBP-NEXT:    addi a6, zero, 31
+; RV32IBP-NEXT:    bltz t1, .LBB9_2
+; RV32IBP-NEXT:  # %bb.1:
+; RV32IBP-NEXT:    srl a7, a1, t1
+; RV32IBP-NEXT:    j .LBB9_3
+; RV32IBP-NEXT:  .LBB9_2:
+; RV32IBP-NEXT:    srl a4, a0, a2
+; RV32IBP-NEXT:    sub a3, a6, a3
+; RV32IBP-NEXT:    slli a5, a1, 1
+; RV32IBP-NEXT:    sll a3, a5, a3
+; RV32IBP-NEXT:    or a7, a4, a3
+; RV32IBP-NEXT:  .LBB9_3:
+; RV32IBP-NEXT:    neg a4, a2
+; RV32IBP-NEXT:    andi a5, a4, 63
+; RV32IBP-NEXT:    addi a3, a5, -32
+; RV32IBP-NEXT:    bltz a3, .LBB9_7
+; RV32IBP-NEXT:  # %bb.4:
+; RV32IBP-NEXT:    mv t0, zero
+; RV32IBP-NEXT:    bgez a3, .LBB9_8
+; RV32IBP-NEXT:  .LBB9_5:
+; RV32IBP-NEXT:    sll a3, a1, a4
+; RV32IBP-NEXT:    sub a4, a6, a5
+; RV32IBP-NEXT:    srli a0, a0, 1
+; RV32IBP-NEXT:    srl a0, a0, a4
+; RV32IBP-NEXT:    or a4, a3, a0
+; RV32IBP-NEXT:    or a0, t0, a7
+; RV32IBP-NEXT:    bgez t1, .LBB9_9
+; RV32IBP-NEXT:  .LBB9_6:
+; RV32IBP-NEXT:    srl a1, a1, a2
+; RV32IBP-NEXT:    or a1, a4, a1
+; RV32IBP-NEXT:    ret
+; RV32IBP-NEXT:  .LBB9_7:
+; RV32IBP-NEXT:    sll t0, a0, a4
+; RV32IBP-NEXT:    bltz a3, .LBB9_5
+; RV32IBP-NEXT:  .LBB9_8:
+; RV32IBP-NEXT:    sll a4, a0, a3
+; RV32IBP-NEXT:    or a0, t0, a7
+; RV32IBP-NEXT:    bltz t1, .LBB9_6
+; RV32IBP-NEXT:  .LBB9_9:
+; RV32IBP-NEXT:    or a1, a4, zero
+; RV32IBP-NEXT:    ret
+  %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+define i32 @rori_i32(i32 %a) nounwind {
+; RV32I-LABEL: rori_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a0, 1
+; RV32I-NEXT:    slli a0, a0, 31
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: rori_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    rori a0, a0, 1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: rori_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    rori a0, a0, 1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: rori_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    rori a0, a0, 1
+; RV32IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %1
+}
+
+define i64 @rori_i64(i64 %a) nounwind {
+; RV32I-LABEL: rori_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a2, a1, 31
+; RV32I-NEXT:    srli a3, a0, 1
+; RV32I-NEXT:    or a2, a3, a2
+; RV32I-NEXT:    srli a1, a1, 1
+; RV32I-NEXT:    slli a0, a0, 31
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: rori_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    addi a3, zero, 31
+; RV32IB-NEXT:    fsl a2, a1, a3, a0
+; RV32IB-NEXT:    fsl a1, a0, a3, a1
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: rori_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    slli a2, a1, 31
+; RV32IBB-NEXT:    srli a3, a0, 1
+; RV32IBB-NEXT:    or a2, a3, a2
+; RV32IBB-NEXT:    srli a1, a1, 1
+; RV32IBB-NEXT:    slli a0, a0, 31
+; RV32IBB-NEXT:    or a1, a0, a1
+; RV32IBB-NEXT:    mv a0, a2
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: rori_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    slli a2, a1, 31
+; RV32IBP-NEXT:    srli a3, a0, 1
+; RV32IBP-NEXT:    or a2, a3, a2
+; RV32IBP-NEXT:    srli a1, a1, 1
+; RV32IBP-NEXT:    slli a0, a0, 31
+; RV32IBP-NEXT:    or a1, a0, a1
+; RV32IBP-NEXT:    mv a0, a2
+; RV32IBP-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %1
+}
+
+define i32 @pack_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: pack_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -1
+; RV32I-NEXT:    and a0, a0, a2
+; RV32I-NEXT:    slli a1, a1, 16
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: pack_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    pack a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: pack_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    pack a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: pack_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    pack a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %shl = and i32 %a, 65535
+  %shl1 = shl i32 %b, 16
+  %or = or i32 %shl1, %shl
+  ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @pack_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: pack_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    mv a1, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: pack_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    mv a1, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: pack_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    mv a1, a2
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: pack_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    mv a1, a2
+; RV32IBP-NEXT:    ret
+  %shl = and i64 %a, 4294967295
+  %shl1 = shl i64 %b, 32
+  %or = or i64 %shl1, %shl
+  ret i64 %or
+}
+
+define i32 @packu_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: packu_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a0, a0, 16
+; RV32I-NEXT:    lui a2, 1048560
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: packu_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    packu a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: packu_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    packu a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: packu_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    packu a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %shr = lshr i32 %a, 16
+  %shr1 = and i32 %b, -65536
+  %or = or i32 %shr1, %shr
+  ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @packu_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: packu_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    mv a1, a3
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: packu_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    mv a0, a1
+; RV32IB-NEXT:    mv a1, a3
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: packu_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    mv a0, a1
+; RV32IBB-NEXT:    mv a1, a3
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: packu_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    mv a0, a1
+; RV32IBP-NEXT:    mv a1, a3
+; RV32IBP-NEXT:    ret
+  %shr = lshr i64 %a, 32
+  %shr1 = and i64 %b, -4294967296
+  %or = or i64 %shr1, %shr
+  ret i64 %or
+}
+
+define i32 @packh_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: packh_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a0, a0, 255
+; RV32I-NEXT:    slli a1, a1, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: packh_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    packh a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: packh_i32:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    packh a0, a0, a1
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: packh_i32:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    packh a0, a0, a1
+; RV32IBP-NEXT:    ret
+  %and = and i32 %a, 255
+  %and1 = shl i32 %b, 8
+  %shl = and i32 %and1, 65280
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i64 @packh_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: packh_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a0, a0, 255
+; RV32I-NEXT:    slli a1, a2, 8
+; RV32I-NEXT:    lui a2, 16
+; RV32I-NEXT:    addi a2, a2, -256
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    mv a1, zero
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: packh_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    packh a0, a0, a2
+; RV32IB-NEXT:    mv a1, zero
+; RV32IB-NEXT:    ret
+;
+; RV32IBB-LABEL: packh_i64:
+; RV32IBB:       # %bb.0:
+; RV32IBB-NEXT:    packh a0, a0, a2
+; RV32IBB-NEXT:    mv a1, zero
+; RV32IBB-NEXT:    ret
+;
+; RV32IBP-LABEL: packh_i64:
+; RV32IBP:       # %bb.0:
+; RV32IBP-NEXT:    packh a0, a0, a2
+; RV32IBP-NEXT:    mv a1, zero
+; RV32IBP-NEXT:    ret
+  %and = and i64 %a, 255
+  %and1 = shl i64 %b, 8
+  %shl = and i64 %and1, 65280
+  %or = or i64 %shl, %and
+  ret i64 %or
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbbp.ll b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll
new file mode 100644
index 0000000000000..c3a6799739d2b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64Zbbp.ll
@@ -0,0 +1,517 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbb -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IBB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbp -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IBP
+
+define signext i32 @andn_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: andn_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: andn_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    andn a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: andn_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    andn a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: andn_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    andn a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %and = and i32 %neg, %a
+  ret i32 %and
+}
+
+define i64 @andn_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: andn_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: andn_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    andn a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: andn_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    andn a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: andn_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    andn a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %and = and i64 %neg, %a
+  ret i64 %and
+}
+
+define signext i32 @orn_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: orn_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: orn_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orn a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: orn_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    orn a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: orn_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orn a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i32 %b, -1
+  %or = or i32 %neg, %a
+  ret i32 %or
+}
+
+define i64 @orn_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: orn_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: orn_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    orn a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: orn_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    orn a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: orn_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    orn a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i64 %b, -1
+  %or = or i64 %neg, %a
+  ret i64 %or
+}
+
+define signext i32 @xnor_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: xnor_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: xnor_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    xnor a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: xnor_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    xnor a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: xnor_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    xnor a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i32 %a, -1
+  %xor = xor i32 %neg, %b
+  ret i32 %xor
+}
+
+define i64 @xnor_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: xnor_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    xor a0, a0, a1
+; RV64I-NEXT:    not a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: xnor_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    xnor a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: xnor_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    xnor a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: xnor_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    xnor a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %neg = xor i64 %a, -1
+  %xor = xor i64 %neg, %b
+  ret i64 %xor
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define signext i32 @rol_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: rol_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sllw a2, a0, a1
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: rol_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rolw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: rol_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    rolw a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: rol_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rolw a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @rol_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: rol_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    sll a2, a0, a1
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    or a0, a2, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: rol_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rol a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: rol_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    rol a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: rol_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rol a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define signext i32 @ror_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: ror_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a2, a0, a1
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    sllw a0, a0, a1
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: ror_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rorw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: ror_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    rorw a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: ror_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rorw a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @ror_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: ror_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srl a2, a0, a1
+; RV64I-NEXT:    neg a1, a1
+; RV64I-NEXT:    sll a0, a0, a1
+; RV64I-NEXT:    or a0, a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: ror_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    ror a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: ror_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    ror a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: ror_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    ror a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 %b)
+  ret i64 %or
+}
+
+define signext i32 @rori_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: rori_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 31
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: rori_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsriw a0, a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: rori_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    roriw a0, a0, 1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: rori_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    roriw a0, a0, 1
+; RV64IBP-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 31)
+  ret i32 %1
+}
+
+define i64 @rori_i64(i64 %a) nounwind {
+; RV64I-LABEL: rori_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a0, 1
+; RV64I-NEXT:    slli a0, a0, 63
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: rori_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    rori a0, a0, 1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: rori_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    rori a0, a0, 1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: rori_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    rori a0, a0, 1
+; RV64IBP-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 63)
+  ret i64 %1
+}
+
+define signext i32 @pack_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: pack_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -1
+; RV64I-NEXT:    and a0, a0, a2
+; RV64I-NEXT:    slli a1, a1, 16
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: pack_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    packw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: pack_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    packw a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: pack_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    packw a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %shl = and i32 %a, 65535
+  %shl1 = shl i32 %b, 16
+  %or = or i32 %shl1, %shl
+  ret i32 %or
+}
+
+define i64 @pack_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: pack_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    slli a0, a0, 32
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    slli a1, a1, 32
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: pack_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    pack a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: pack_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    pack a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: pack_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    pack a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %shl = and i64 %a, 4294967295
+  %shl1 = shl i64 %b, 32
+  %or = or i64 %shl1, %shl
+  ret i64 %or
+}
+
+define signext i32 @packu_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: packu_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a0, a0, 16
+; RV64I-NEXT:    lui a2, 1048560
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: packu_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    packuw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: packu_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    packuw a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: packu_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    packuw a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %shr = lshr i32 %a, 16
+  %shr1 = and i32 %b, -65536
+  %or = or i32 %shr1, %shr
+  ret i32 %or
+}
+
+define i64 @packu_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: packu_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a0, a0, 32
+; RV64I-NEXT:    addi a2, zero, -1
+; RV64I-NEXT:    slli a2, a2, 32
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: packu_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    packu a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: packu_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    packu a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: packu_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    packu a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %shr = lshr i64 %a, 32
+  %shr1 = and i64 %b, -4294967296
+  %or = or i64 %shr1, %shr
+  ret i64 %or
+}
+
+define signext i32 @packh_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: packh_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: packh_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    packh a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: packh_i32:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    packh a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: packh_i32:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    packh a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %and = and i32 %a, 255
+  %and1 = shl i32 %b, 8
+  %shl = and i32 %and1, 65280
+  %or = or i32 %shl, %and
+  ret i32 %or
+}
+
+define i64 @packh_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: packh_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a0, a0, 255
+; RV64I-NEXT:    slli a1, a1, 8
+; RV64I-NEXT:    lui a2, 16
+; RV64I-NEXT:    addiw a2, a2, -256
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: packh_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    packh a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBB-LABEL: packh_i64:
+; RV64IBB:       # %bb.0:
+; RV64IBB-NEXT:    packh a0, a0, a1
+; RV64IBB-NEXT:    ret
+;
+; RV64IBP-LABEL: packh_i64:
+; RV64IBP:       # %bb.0:
+; RV64IBP-NEXT:    packh a0, a0, a1
+; RV64IBP-NEXT:    ret
+  %and = and i64 %a, 255
+  %and1 = shl i64 %b, 8
+  %shl = and i64 %and1, 65280
+  %or = or i64 %shl, %and
+  ret i64 %or
+}

From d4be33374c07ea9a9362892876aa76b227298181 Mon Sep 17 00:00:00 2001
From: lewis-revill <lewis.revill@embecosm.com>
Date: Wed, 15 Jul 2020 11:57:29 +0100
Subject: [PATCH 373/771] [RISCV] Add matching of codegen patterns to RISCV Bit
 Manipulation Zbs asm instructions

This patch provides optimization of bit manipulation operations by
enabling the +experimental-b target feature.
It adds matching of single block patterns of instructions to specific
bit-manip instructions from the single-bit subset (zbs subextension) of
the experimental B extension of RISC-V.
It adds also the correspondent codegen tests.

This patch is based on Claire Wolf's proposal for the bit manipulation
extension of RISCV:
https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf

Differential Revision: https://reviews.llvm.org/D79874
---
 llvm/lib/Target/RISCV/RISCVInstrInfoB.td |  53 ++++
 llvm/test/CodeGen/RISCV/rv32Zbs.ll       | 361 +++++++++++++++++++++++
 llvm/test/CodeGen/RISCV/rv64Zbs.ll       | 235 +++++++++++++++
 3 files changed, 649 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rv32Zbs.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64Zbs.ll

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index 45eb41f93b2e8..aa1ed7ff79cdd 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -664,6 +664,38 @@ def : Pat<(rotr GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
 def : Pat<(fshr GPR:$rs1, GPR:$rs1, GPR:$rs2), (ROR GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZbbOrZbp]
 
+let Predicates = [HasStdExtZbs, IsRV32] in
+def : Pat<(and (xor (shl 1, (and GPR:$rs2, 31)), -1), GPR:$rs1),
+          (SBCLR GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbs, IsRV64] in
+def : Pat<(and (xor (shl 1, (and GPR:$rs2, 63)), -1), GPR:$rs1),
+          (SBCLR GPR:$rs1, GPR:$rs2)>;
+
+let Predicates = [HasStdExtZbs] in
+def : Pat<(and (rotl -2, GPR:$rs2), GPR:$rs1), (SBCLR GPR:$rs1, GPR:$rs2)>;
+
+let Predicates = [HasStdExtZbs, IsRV32] in
+def : Pat<(or (shl 1, (and GPR:$rs2, 31)), GPR:$rs1),
+          (SBSET GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbs, IsRV64] in
+def : Pat<(or (shl 1, (and GPR:$rs2, 63)), GPR:$rs1),
+          (SBSET GPR:$rs1, GPR:$rs2)>;
+
+let Predicates = [HasStdExtZbs, IsRV32] in
+def : Pat<(xor (shl 1, (and GPR:$rs2, 31)), GPR:$rs1),
+          (SBINV GPR:$rs1, GPR:$rs2)>;
+let Predicates = [HasStdExtZbs, IsRV64] in
+def : Pat<(xor (shl 1, (and GPR:$rs2, 63)), GPR:$rs1),
+          (SBINV GPR:$rs1, GPR:$rs2)>;
+
+let Predicates = [HasStdExtZbs, IsRV32] in
+def : Pat<(and (srl GPR:$rs1, (and GPR:$rs2, 31)), 1),
+          (SBEXT GPR:$rs1, GPR:$rs2)>;
+
+let Predicates = [HasStdExtZbs, IsRV64] in
+def : Pat<(and (srl GPR:$rs1, (and GPR:$rs2, 63)), 1),
+          (SBEXT GPR:$rs1, GPR:$rs2)>;
+
 let Predicates = [HasStdExtZbb] in {
 def : Pat<(SLOIPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SLOI GPR:$rs1, uimmlog2xlen:$shamt)>;
@@ -678,6 +710,12 @@ let Predicates = [HasStdExtZbbOrZbp] in
 def : Pat<(RORIPat GPR:$rs1, uimmlog2xlen:$shamt),
           (RORI GPR:$rs1, uimmlog2xlen:$shamt)>;
 
+// We don't pattern-match sbclri[w], sbseti[w], sbinvi[w] because they are
+// pattern-matched by simple andi, ori, and xori.
+let Predicates = [HasStdExtZbs] in
+def : Pat<(and (srl GPR:$rs1, uimmlog2xlen:$shamt), (XLenVT 1)),
+          (SBEXTI GPR:$rs1, uimmlog2xlen:$shamt)>;
+
 let Predicates = [HasStdExtZbp, IsRV32] in {
 def : Pat<(or (or (and (srl GPR:$rs1, (i32 1)), (i32 0x55555555)), GPR:$rs1),
               (and (shl GPR:$rs1, (i32 1)), (i32 0xAAAAAAAA))),
@@ -886,6 +924,21 @@ def : Pat<(or (riscv_sllw (assertsexti32 GPR:$rs1),
           (RORW GPR:$rs1, GPR:$rs2)>;
 } // Predicates = [HasStdExtZbbOrZbp, IsRV64]
 
+let Predicates = [HasStdExtZbs, IsRV64] in {
+def : Pat<(and (xor (riscv_sllw 1, (assertsexti32 GPR:$rs2)), -1),
+               (assertsexti32 GPR:$rs1)),
+          (SBCLRW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(or (riscv_sllw 1, (assertsexti32 GPR:$rs2)),
+              (assertsexti32 GPR:$rs1)),
+          (SBSETW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(xor (riscv_sllw 1, (assertsexti32 GPR:$rs2)),
+               (assertsexti32 GPR:$rs1)),
+          (SBINVW GPR:$rs1, GPR:$rs2)>;
+def : Pat<(and (riscv_srlw (assertsexti32 GPR:$rs1), (assertsexti32 GPR:$rs2)),
+               1),
+          (SBEXTW GPR:$rs1, GPR:$rs2)>;
+} // Predicates = [HasStdExtZbs, IsRV64]
+
 let Predicates = [HasStdExtZbb, IsRV64] in {
 def : Pat<(SLOIWPat GPR:$rs1, uimmlog2xlen:$shamt),
           (SLOIW GPR:$rs1, uimmlog2xlen:$shamt)>;
diff --git a/llvm/test/CodeGen/RISCV/rv32Zbs.ll b/llvm/test/CodeGen/RISCV/rv32Zbs.ll
new file mode 100644
index 0000000000000..16da34e49c663
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32Zbs.ll
@@ -0,0 +1,361 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbs -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IBS
+
+define i32 @sbclr_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sbclr_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a2, zero, 1
+; RV32I-NEXT:    sll a1, a2, a1
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sbclr_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sbclr a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBS-LABEL: sbclr_i32:
+; RV32IBS:       # %bb.0:
+; RV32IBS-NEXT:    sbclr a0, a0, a1
+; RV32IBS-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %neg = xor i32 %shl, -1
+  %and1 = and i32 %neg, %a
+  ret i32 %and1
+}
+
+define i64 @sbclr_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: sbclr_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a3, a2, 63
+; RV32I-NEXT:    addi a4, a3, -32
+; RV32I-NEXT:    addi a3, zero, 1
+; RV32I-NEXT:    bltz a4, .LBB1_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a2, zero
+; RV32I-NEXT:    sll a4, a3, a4
+; RV32I-NEXT:    j .LBB1_3
+; RV32I-NEXT:  .LBB1_2:
+; RV32I-NEXT:    mv a4, zero
+; RV32I-NEXT:    sll a2, a3, a2
+; RV32I-NEXT:  .LBB1_3:
+; RV32I-NEXT:    not a3, a4
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    and a0, a2, a0
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sbclr_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi a3, a2, 63
+; RV32IB-NEXT:    addi a4, a3, -32
+; RV32IB-NEXT:    addi a3, zero, 1
+; RV32IB-NEXT:    bltz a4, .LBB1_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    mv a2, zero
+; RV32IB-NEXT:    sll a4, a3, a4
+; RV32IB-NEXT:    j .LBB1_3
+; RV32IB-NEXT:  .LBB1_2:
+; RV32IB-NEXT:    mv a4, zero
+; RV32IB-NEXT:    sll a2, a3, a2
+; RV32IB-NEXT:  .LBB1_3:
+; RV32IB-NEXT:    andn a0, a0, a2
+; RV32IB-NEXT:    andn a1, a1, a4
+; RV32IB-NEXT:    ret
+;
+; RV32IBS-LABEL: sbclr_i64:
+; RV32IBS:       # %bb.0:
+; RV32IBS-NEXT:    andi a3, a2, 63
+; RV32IBS-NEXT:    addi a4, a3, -32
+; RV32IBS-NEXT:    addi a3, zero, 1
+; RV32IBS-NEXT:    bltz a4, .LBB1_2
+; RV32IBS-NEXT:  # %bb.1:
+; RV32IBS-NEXT:    mv a2, zero
+; RV32IBS-NEXT:    sll a4, a3, a4
+; RV32IBS-NEXT:    j .LBB1_3
+; RV32IBS-NEXT:  .LBB1_2:
+; RV32IBS-NEXT:    mv a4, zero
+; RV32IBS-NEXT:    sll a2, a3, a2
+; RV32IBS-NEXT:  .LBB1_3:
+; RV32IBS-NEXT:    not a3, a4
+; RV32IBS-NEXT:    not a2, a2
+; RV32IBS-NEXT:    and a0, a2, a0
+; RV32IBS-NEXT:    and a1, a3, a1
+; RV32IBS-NEXT:    ret
+  %and = and i64 %b, 63
+  %shl = shl nuw i64 1, %and
+  %neg = xor i64 %shl, -1
+  %and1 = and i64 %neg, %a
+  ret i64 %and1
+}
+
+define i32 @sbset_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sbset_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a2, zero, 1
+; RV32I-NEXT:    sll a1, a2, a1
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sbset_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sbset a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBS-LABEL: sbset_i32:
+; RV32IBS:       # %bb.0:
+; RV32IBS-NEXT:    sbset a0, a0, a1
+; RV32IBS-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %or = or i32 %shl, %a
+  ret i32 %or
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @sbset_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: sbset_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a3, zero, 1
+; RV32I-NEXT:    sll a2, a3, a2
+; RV32I-NEXT:    srai a3, a2, 31
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sbset_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    addi a3, zero, 1
+; RV32IB-NEXT:    sll a2, a3, a2
+; RV32IB-NEXT:    srai a3, a2, 31
+; RV32IB-NEXT:    or a0, a2, a0
+; RV32IB-NEXT:    or a1, a3, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBS-LABEL: sbset_i64:
+; RV32IBS:       # %bb.0:
+; RV32IBS-NEXT:    addi a3, zero, 1
+; RV32IBS-NEXT:    sll a2, a3, a2
+; RV32IBS-NEXT:    srai a3, a2, 31
+; RV32IBS-NEXT:    or a0, a2, a0
+; RV32IBS-NEXT:    or a1, a3, a1
+; RV32IBS-NEXT:    ret
+  %1 = trunc i64 %b to i32
+  %conv = and i32 %1, 63
+  %shl = shl nuw i32 1, %conv
+  %conv1 = sext i32 %shl to i64
+  %or = or i64 %conv1, %a
+  ret i64 %or
+}
+
+define i32 @sbinv_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sbinv_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a2, zero, 1
+; RV32I-NEXT:    sll a1, a2, a1
+; RV32I-NEXT:    xor a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sbinv_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sbinv a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBS-LABEL: sbinv_i32:
+; RV32IBS:       # %bb.0:
+; RV32IBS-NEXT:    sbinv a0, a0, a1
+; RV32IBS-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %xor = xor i32 %shl, %a
+  ret i32 %xor
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @sbinv_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: sbinv_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    addi a3, zero, 1
+; RV32I-NEXT:    sll a2, a3, a2
+; RV32I-NEXT:    srai a3, a2, 31
+; RV32I-NEXT:    xor a0, a2, a0
+; RV32I-NEXT:    xor a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sbinv_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    addi a3, zero, 1
+; RV32IB-NEXT:    sll a2, a3, a2
+; RV32IB-NEXT:    srai a3, a2, 31
+; RV32IB-NEXT:    xor a0, a2, a0
+; RV32IB-NEXT:    xor a1, a3, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBS-LABEL: sbinv_i64:
+; RV32IBS:       # %bb.0:
+; RV32IBS-NEXT:    addi a3, zero, 1
+; RV32IBS-NEXT:    sll a2, a3, a2
+; RV32IBS-NEXT:    srai a3, a2, 31
+; RV32IBS-NEXT:    xor a0, a2, a0
+; RV32IBS-NEXT:    xor a1, a3, a1
+; RV32IBS-NEXT:    ret
+  %1 = trunc i64 %b to i32
+  %conv = and i32 %1, 63
+  %shl = shl nuw i32 1, %conv
+  %conv1 = sext i32 %shl to i64
+  %xor = xor i64 %conv1, %a
+  ret i64 %xor
+}
+
+define i32 @sbext_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: sbext_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srl a0, a0, a1
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sbext_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sbext a0, a0, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBS-LABEL: sbext_i32:
+; RV32IBS:       # %bb.0:
+; RV32IBS-NEXT:    sbext a0, a0, a1
+; RV32IBS-NEXT:    ret
+  %and = and i32 %b, 31
+  %shr = lshr i32 %a, %and
+  %and1 = and i32 %shr, 1
+  ret i32 %and1
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet any matching bit manipulation instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions suitable for this pattern.
+
+define i64 @sbext_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: sbext_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a3, a2, 63
+; RV32I-NEXT:    addi a4, a3, -32
+; RV32I-NEXT:    bltz a4, .LBB7_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srl a0, a1, a4
+; RV32I-NEXT:    j .LBB7_3
+; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    addi a2, zero, 31
+; RV32I-NEXT:    sub a2, a2, a3
+; RV32I-NEXT:    slli a1, a1, 1
+; RV32I-NEXT:    sll a1, a1, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:  .LBB7_3:
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    mv a1, zero
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sbext_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi a3, a2, 63
+; RV32IB-NEXT:    addi a4, a3, -32
+; RV32IB-NEXT:    bltz a4, .LBB7_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    srl a0, a1, a4
+; RV32IB-NEXT:    j .LBB7_3
+; RV32IB-NEXT:  .LBB7_2:
+; RV32IB-NEXT:    srl a0, a0, a2
+; RV32IB-NEXT:    addi a2, zero, 31
+; RV32IB-NEXT:    sub a2, a2, a3
+; RV32IB-NEXT:    slli a1, a1, 1
+; RV32IB-NEXT:    sll a1, a1, a2
+; RV32IB-NEXT:    or a0, a0, a1
+; RV32IB-NEXT:  .LBB7_3:
+; RV32IB-NEXT:    andi a0, a0, 1
+; RV32IB-NEXT:    mv a1, zero
+; RV32IB-NEXT:    ret
+;
+; RV32IBS-LABEL: sbext_i64:
+; RV32IBS:       # %bb.0:
+; RV32IBS-NEXT:    andi a3, a2, 63
+; RV32IBS-NEXT:    addi a4, a3, -32
+; RV32IBS-NEXT:    bltz a4, .LBB7_2
+; RV32IBS-NEXT:  # %bb.1:
+; RV32IBS-NEXT:    srl a0, a1, a4
+; RV32IBS-NEXT:    j .LBB7_3
+; RV32IBS-NEXT:  .LBB7_2:
+; RV32IBS-NEXT:    srl a0, a0, a2
+; RV32IBS-NEXT:    addi a2, zero, 31
+; RV32IBS-NEXT:    sub a2, a2, a3
+; RV32IBS-NEXT:    slli a1, a1, 1
+; RV32IBS-NEXT:    sll a1, a1, a2
+; RV32IBS-NEXT:    or a0, a0, a1
+; RV32IBS-NEXT:  .LBB7_3:
+; RV32IBS-NEXT:    andi a0, a0, 1
+; RV32IBS-NEXT:    mv a1, zero
+; RV32IBS-NEXT:    ret
+  %conv = and i64 %b, 63
+  %shr = lshr i64 %a, %conv
+  %and1 = and i64 %shr, 1
+  ret i64 %and1
+}
+
+define i32 @sbexti_i32(i32 %a) nounwind {
+; RV32I-LABEL: sbexti_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a0, a0, 5
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sbexti_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sbexti a0, a0, 5
+; RV32IB-NEXT:    ret
+;
+; RV32IBS-LABEL: sbexti_i32:
+; RV32IBS:       # %bb.0:
+; RV32IBS-NEXT:    sbexti a0, a0, 5
+; RV32IBS-NEXT:    ret
+  %shr = lshr i32 %a, 5
+  %and = and i32 %shr, 1
+  ret i32 %and
+}
+
+define i64 @sbexti_i64(i64 %a) nounwind {
+; RV32I-LABEL: sbexti_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a0, a0, 5
+; RV32I-NEXT:    andi a0, a0, 1
+; RV32I-NEXT:    mv a1, zero
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: sbexti_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    sbexti a0, a0, 5
+; RV32IB-NEXT:    mv a1, zero
+; RV32IB-NEXT:    ret
+;
+; RV32IBS-LABEL: sbexti_i64:
+; RV32IBS:       # %bb.0:
+; RV32IBS-NEXT:    sbexti a0, a0, 5
+; RV32IBS-NEXT:    mv a1, zero
+; RV32IBS-NEXT:    ret
+  %shr = lshr i64 %a, 5
+  %and = and i64 %shr, 1
+  ret i64 %and
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbs.ll b/llvm/test/CodeGen/RISCV/rv64Zbs.ll
new file mode 100644
index 0000000000000..f7990b36dec86
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64Zbs.ll
@@ -0,0 +1,235 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbs -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IBS
+
+define signext i32 @sbclr_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: sbclr_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a2, zero, 1
+; RV64I-NEXT:    sllw a1, a2, a1
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sbclr_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sbclrw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBS-LABEL: sbclr_i32:
+; RV64IBS:       # %bb.0:
+; RV64IBS-NEXT:    sbclrw a0, a0, a1
+; RV64IBS-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %neg = xor i32 %shl, -1
+  %and1 = and i32 %neg, %a
+  ret i32 %and1
+}
+
+define i64 @sbclr_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: sbclr_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a2, zero, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sbclr_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sbclr a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBS-LABEL: sbclr_i64:
+; RV64IBS:       # %bb.0:
+; RV64IBS-NEXT:    sbclr a0, a0, a1
+; RV64IBS-NEXT:    ret
+  %and = and i64 %b, 63
+  %shl = shl nuw i64 1, %and
+  %neg = xor i64 %shl, -1
+  %and1 = and i64 %neg, %a
+  ret i64 %and1
+}
+
+define signext i32 @sbset_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: sbset_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a2, zero, 1
+; RV64I-NEXT:    sllw a1, a2, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sbset_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sbsetw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBS-LABEL: sbset_i32:
+; RV64IBS:       # %bb.0:
+; RV64IBS-NEXT:    sbsetw a0, a0, a1
+; RV64IBS-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %or = or i32 %shl, %a
+  ret i32 %or
+}
+
+define i64 @sbset_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: sbset_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a2, zero, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sbset_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sbset a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBS-LABEL: sbset_i64:
+; RV64IBS:       # %bb.0:
+; RV64IBS-NEXT:    sbset a0, a0, a1
+; RV64IBS-NEXT:    ret
+  %conv = and i64 %b, 63
+  %shl = shl nuw i64 1, %conv
+  %or = or i64 %shl, %a
+  ret i64 %or
+}
+
+define signext i32 @sbinv_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: sbinv_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a2, zero, 1
+; RV64I-NEXT:    sllw a1, a2, a1
+; RV64I-NEXT:    xor a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sbinv_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sbinvw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBS-LABEL: sbinv_i32:
+; RV64IBS:       # %bb.0:
+; RV64IBS-NEXT:    sbinvw a0, a0, a1
+; RV64IBS-NEXT:    ret
+  %and = and i32 %b, 31
+  %shl = shl nuw i32 1, %and
+  %xor = xor i32 %shl, %a
+  ret i32 %xor
+}
+
+define i64 @sbinv_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: sbinv_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    addi a2, zero, 1
+; RV64I-NEXT:    sll a1, a2, a1
+; RV64I-NEXT:    xor a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sbinv_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sbinv a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBS-LABEL: sbinv_i64:
+; RV64IBS:       # %bb.0:
+; RV64IBS-NEXT:    sbinv a0, a0, a1
+; RV64IBS-NEXT:    ret
+  %conv = and i64 %b, 63
+  %shl = shl nuw i64 1, %conv
+  %xor = xor i64 %shl, %a
+  ret i64 %xor
+}
+
+define signext i32 @sbext_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: sbext_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srlw a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sbext_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sbextw a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBS-LABEL: sbext_i32:
+; RV64IBS:       # %bb.0:
+; RV64IBS-NEXT:    sbextw a0, a0, a1
+; RV64IBS-NEXT:    ret
+  %and = and i32 %b, 31
+  %shr = lshr i32 %a, %and
+  %and1 = and i32 %shr, 1
+  ret i32 %and1
+}
+
+define i64 @sbext_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: sbext_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srl a0, a0, a1
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sbext_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sbext a0, a0, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBS-LABEL: sbext_i64:
+; RV64IBS:       # %bb.0:
+; RV64IBS-NEXT:    sbext a0, a0, a1
+; RV64IBS-NEXT:    ret
+  %conv = and i64 %b, 63
+  %shr = lshr i64 %a, %conv
+  %and1 = and i64 %shr, 1
+  ret i64 %and1
+}
+
+define signext i32 @sbexti_i32(i32 signext %a) nounwind {
+; RV64I-LABEL: sbexti_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a0, a0, 5
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sbexti_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sbexti a0, a0, 5
+; RV64IB-NEXT:    ret
+;
+; RV64IBS-LABEL: sbexti_i32:
+; RV64IBS:       # %bb.0:
+; RV64IBS-NEXT:    sbexti a0, a0, 5
+; RV64IBS-NEXT:    ret
+  %shr = lshr i32 %a, 5
+  %and = and i32 %shr, 1
+  ret i32 %and
+}
+
+define i64 @sbexti_i64(i64 %a) nounwind {
+; RV64I-LABEL: sbexti_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a0, a0, 5
+; RV64I-NEXT:    andi a0, a0, 1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: sbexti_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    sbexti a0, a0, 5
+; RV64IB-NEXT:    ret
+;
+; RV64IBS-LABEL: sbexti_i64:
+; RV64IBS:       # %bb.0:
+; RV64IBS-NEXT:    sbexti a0, a0, 5
+; RV64IBS-NEXT:    ret
+  %shr = lshr i64 %a, 5
+  %and = and i64 %shr, 1
+  ret i64 %and
+}

From c9c955ada8e65205312f2bc41b46eefa0e98b36c Mon Sep 17 00:00:00 2001
From: lewis-revill <lewis.revill@embecosm.com>
Date: Wed, 15 Jul 2020 11:59:47 +0100
Subject: [PATCH 374/771] [RISCV] Add matching of codegen patterns to RISCV Bit
 Manipulation Zbt asm instructions

This patch provides optimization of bit manipulation operations by
enabling the +experimental-b target feature.
It adds matching of single block patterns of instructions to specific
bit-manip instructions from the ternary subset (zbt subextension) of the
experimental B extension of RISC-V.
It adds also the correspondent codegen tests.

This patch is based on Claire Wolf's proposal for the bit manipulation
extension of RISCV:
https://github.com/riscv/riscv-bitmanip/blob/master/bitmanip-0.92.pdf

Differential Revision: https://reviews.llvm.org/D79875
---
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp |  49 ++
 llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h   |   1 +
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |   5 +
 llvm/lib/Target/RISCV/RISCVInstrInfoB.td    |  39 ++
 llvm/test/CodeGen/RISCV/rv32Zbt.ll          | 569 ++++++++++++++++++++
 llvm/test/CodeGen/RISCV/rv64Zbt.ll          | 266 +++++++++
 6 files changed, 929 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rv32Zbt.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64Zbt.ll

diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index fd1a91f688029..7570385e38e3a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -459,6 +459,55 @@ bool RISCVDAGToDAGISel::SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt) {
   return false;
 }
 
+// Check that it is a FSRIW (i32 Funnel Shift Right Immediate on RV64).
+// We first check that it is the right node tree:
+//
+//  (SIGN_EXTEND_INREG (OR (SHL (AsserSext RS1, i32), VC2),
+//                         (SRL (AND (AssertSext RS2, i32), VC3), VC1)))
+//
+// Then we check that the constant operands respect these constraints:
+//
+// VC2 == 32 - VC1
+// VC3 == maskLeadingOnes<uint32_t>(VC2)
+//
+// being VC1 the Shamt we need, VC2 the complementary of Shamt over 32
+// and VC3 a 32 bit mask of (32 - VC1) leading ones.
+
+bool RISCVDAGToDAGISel::SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2,
+                                    SDValue &Shamt) {
+  if (N.getOpcode() == ISD::SIGN_EXTEND_INREG &&
+      Subtarget->getXLenVT() == MVT::i64 &&
+      cast<VTSDNode>(N.getOperand(1))->getVT() == MVT::i32) {
+    if (N.getOperand(0).getOpcode() == ISD::OR) {
+      SDValue Or = N.getOperand(0);
+      if (Or.getOperand(0).getOpcode() == ISD::SHL &&
+          Or.getOperand(1).getOpcode() == ISD::SRL) {
+        SDValue Shl = Or.getOperand(0);
+        SDValue Srl = Or.getOperand(1);
+        if (Srl.getOperand(0).getOpcode() == ISD::AND) {
+          SDValue And = Srl.getOperand(0);
+          if (isa<ConstantSDNode>(Srl.getOperand(1)) &&
+              isa<ConstantSDNode>(Shl.getOperand(1)) &&
+              isa<ConstantSDNode>(And.getOperand(1))) {
+            uint32_t VC1 = Srl.getConstantOperandVal(1);
+            uint32_t VC2 = Shl.getConstantOperandVal(1);
+            uint32_t VC3 = And.getConstantOperandVal(1);
+            if (VC2 == (32 - VC1) &&
+                VC3 == maskLeadingOnes<uint32_t>(VC2)) {
+              RS1 = Shl.getOperand(0);
+              RS2 = And.getOperand(0);
+              Shamt = CurDAG->getTargetConstant(VC1, SDLoc(N),
+                                              Srl.getOperand(1).getValueType());
+              return true;
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
 // Merge an ADDI into the offset of a load/store instruction where possible.
 // (load (addi base, off1), off2) -> (load base, off1+off2)
 // (store val, (addi base, off1), off2) -> (store val, base, off1+off2)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
index bc1655b673d76..0ca12510a2308 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.h
@@ -52,6 +52,7 @@ class RISCVDAGToDAGISel : public SelectionDAGISel {
   bool SelectSLOIW(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectSROIW(SDValue N, SDValue &RS1, SDValue &Shamt);
   bool SelectRORIW(SDValue N, SDValue &RS1, SDValue &Shamt);
+  bool SelectFSRIW(SDValue N, SDValue &RS1, SDValue &RS2, SDValue &Shamt);
 
 // Include the pieces autogenerated from the target description.
 #include "RISCVGenDAGISel.inc"
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7cad9f9bd43e3..03d9eefd59d0b 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -166,6 +166,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
   if (Subtarget.hasStdExtZbp())
     setOperationAction(ISD::BITREVERSE, XLenVT, Legal);
 
+  if (Subtarget.hasStdExtZbt()) {
+    setOperationAction(ISD::FSHL, XLenVT, Legal);
+    setOperationAction(ISD::FSHR, XLenVT, Legal);
+  }
+
   ISD::CondCode FPCCToExtend[] = {
       ISD::SETOGT, ISD::SETOGE, ISD::SETONE, ISD::SETUEQ, ISD::SETUGT,
       ISD::SETUGE, ISD::SETULT, ISD::SETULE, ISD::SETUNE, ISD::SETGT,
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
index aa1ed7ff79cdd..afac509f743d7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoB.td
@@ -643,6 +643,7 @@ def SLLIUWPat : ComplexPattern<i64, 2, "SelectSLLIUW", [and]>;
 def SLOIWPat  : ComplexPattern<i64, 2, "SelectSLOIW", [sext_inreg]>;
 def SROIWPat  : ComplexPattern<i64, 2, "SelectSROIW", [or]>;
 def RORIWPat  : ComplexPattern<i64, 2, "SelectRORIW", [sext_inreg]>;
+def FSRIWPat  : ComplexPattern<i64, 3, "SelectFSRIW", [sext_inreg]>;
 
 let Predicates = [HasStdExtZbbOrZbp] in {
 def : Pat<(and GPR:$rs1, (not GPR:$rs2)), (ANDN GPR:$rs1, GPR:$rs2)>;
@@ -804,6 +805,19 @@ def : Pat<(bswap GPR:$rs1), (GREVI GPR:$rs1, (i64 56))>;
 def : Pat<(bitreverse GPR:$rs1), (GREVI GPR:$rs1, (i64 63))>;
 } // Predicates = [HasStdExtZbp, IsRV64]
 
+let Predicates = [HasStdExtZbt] in {
+def : Pat<(or (and (xor GPR:$rs2, -1), GPR:$rs3), (and GPR:$rs2, GPR:$rs1)),
+          (CMIX GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_selectcc GPR:$rs2, (XLenVT 0), (XLenVT 17), GPR:$rs3, GPR:$rs1),
+          (CMOV GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(fshl GPR:$rs1, GPR:$rs2, GPR:$rs3),
+          (FSL GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(fshr GPR:$rs1, GPR:$rs2, GPR:$rs3),
+          (FSR GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(fshr GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt),
+          (FSRI GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>;
+} // Predicates = [HasStdExtZbt]
+
 let Predicates = [HasStdExtZbb] in {
 def : Pat<(ctlz GPR:$rs1), (CLZ GPR:$rs1)>;
 def : Pat<(cttz GPR:$rs1), (CTZ GPR:$rs1)>;
@@ -1004,6 +1018,31 @@ def : Pat<(sra (bswap GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 24))>;
 def : Pat<(sra (bitreverse GPR:$rs1), (i64 32)), (GREVIW GPR:$rs1, (i64 31))>;
 } // Predicates = [HasStdExtZbp, IsRV64]
 
+let Predicates = [HasStdExtZbt, IsRV64] in {
+def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31),
+                          (i64 0),
+                          (i64 17),
+                          (assertsexti32 GPR:$rs1),
+                          (or (riscv_sllw (assertsexti32 GPR:$rs1),
+                                          (and (assertsexti32 GPR:$rs3), 31)),
+                              (riscv_srlw (assertsexti32 GPR:$rs2),
+                                          (sub (i64 32),
+                                               (assertsexti32 GPR:$rs3))))),
+          (FSLW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(riscv_selectcc (and (assertsexti32 GPR:$rs3), 31),
+                          (i64 0),
+                          (i64 17),
+                          (assertsexti32 GPR:$rs2),
+                          (or (riscv_sllw (assertsexti32 GPR:$rs1),
+                                          (sub (i64 32),
+                                               (assertsexti32 GPR:$rs3))),
+                              (riscv_srlw (assertsexti32 GPR:$rs2),
+                                          (and (assertsexti32 GPR:$rs3), 31)))),
+          (FSRW GPR:$rs1, GPR:$rs2, GPR:$rs3)>;
+def : Pat<(FSRIWPat GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt),
+          (FSRIW GPR:$rs1, GPR:$rs2, uimmlog2xlen:$shamt)>;
+} // Predicates = [HasStdExtZbt, IsRV64]
+
 let Predicates = [HasStdExtZbb, IsRV64] in {
 def : Pat<(add (ctlz (and GPR:$rs1, (i64 0xFFFFFFFF))), (i64 -32)),
           (CLZW GPR:$rs1)>;
diff --git a/llvm/test/CodeGen/RISCV/rv32Zbt.ll b/llvm/test/CodeGen/RISCV/rv32Zbt.ll
new file mode 100644
index 0000000000000..54b5b79778f42
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32Zbt.ll
@@ -0,0 +1,569 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32I
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IB
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-zbt -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32IBT
+
+define i32 @cmix_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; RV32I-LABEL: cmix_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    and a0, a1, a0
+; RV32I-NEXT:    not a1, a1
+; RV32I-NEXT:    and a1, a1, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: cmix_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    cmix a0, a1, a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: cmix_i32:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    cmix a0, a1, a0, a2
+; RV32IBT-NEXT:    ret
+  %and = and i32 %b, %a
+  %neg = xor i32 %b, -1
+  %and1 = and i32 %neg, %c
+  %or = or i32 %and1, %and
+  ret i32 %or
+}
+
+define i64 @cmix_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV32I-LABEL: cmix_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    and a1, a3, a1
+; RV32I-NEXT:    and a0, a2, a0
+; RV32I-NEXT:    not a2, a2
+; RV32I-NEXT:    not a3, a3
+; RV32I-NEXT:    and a3, a3, a5
+; RV32I-NEXT:    and a2, a2, a4
+; RV32I-NEXT:    or a0, a2, a0
+; RV32I-NEXT:    or a1, a3, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: cmix_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    cmix a0, a2, a0, a4
+; RV32IB-NEXT:    cmix a1, a3, a1, a5
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: cmix_i64:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    cmix a0, a2, a0, a4
+; RV32IBT-NEXT:    cmix a1, a3, a1, a5
+; RV32IBT-NEXT:    ret
+  %and = and i64 %b, %a
+  %neg = xor i64 %b, -1
+  %and1 = and i64 %neg, %c
+  %or = or i64 %and1, %and
+  ret i64 %or
+}
+
+define i32 @cmov_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; RV32I-LABEL: cmov_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    beqz a1, .LBB2_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a2, a0
+; RV32I-NEXT:  .LBB2_2:
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: cmov_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    cmov a0, a1, a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: cmov_i32:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    cmov a0, a1, a0, a2
+; RV32IBT-NEXT:    ret
+  %tobool.not = icmp eq i32 %b, 0
+  %cond = select i1 %tobool.not, i32 %c, i32 %a
+  ret i32 %cond
+}
+
+define i64 @cmov_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV32I-LABEL: cmov_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    or a2, a2, a3
+; RV32I-NEXT:    beqz a2, .LBB3_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    mv a4, a0
+; RV32I-NEXT:    mv a5, a1
+; RV32I-NEXT:  .LBB3_2:
+; RV32I-NEXT:    mv a0, a4
+; RV32I-NEXT:    mv a1, a5
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: cmov_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    or a2, a2, a3
+; RV32IB-NEXT:    cmov a0, a2, a0, a4
+; RV32IB-NEXT:    cmov a1, a2, a1, a5
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: cmov_i64:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    or a2, a2, a3
+; RV32IBT-NEXT:    cmov a0, a2, a0, a4
+; RV32IBT-NEXT:    cmov a1, a2, a1, a5
+; RV32IBT-NEXT:    ret
+  %tobool.not = icmp eq i64 %b, 0
+  %cond = select i1 %tobool.not, i64 %c, i64 %a
+  ret i64 %cond
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define i32 @fshl_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; RV32I-LABEL: fshl_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a3, a2, 31
+; RV32I-NEXT:    beqz a3, .LBB4_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    addi a2, zero, 32
+; RV32I-NEXT:    sub a2, a2, a3
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:  .LBB4_2:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshl_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    fsl a0, a0, a2, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshl_i32:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    fsl a0, a0, a2, a1
+; RV32IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %1
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet an efficient pattern-matching with bit manipulation
+; instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions that can match more efficiently this pattern.
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV32I-LABEL: fshl_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi t1, a4, 63
+; RV32I-NEXT:    addi a6, t1, -32
+; RV32I-NEXT:    addi a7, zero, 31
+; RV32I-NEXT:    bltz a6, .LBB5_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    sll t0, a0, a6
+; RV32I-NEXT:    j .LBB5_3
+; RV32I-NEXT:  .LBB5_2:
+; RV32I-NEXT:    sll t0, a1, a4
+; RV32I-NEXT:    sub t2, a7, t1
+; RV32I-NEXT:    srli a5, a0, 1
+; RV32I-NEXT:    srl a5, a5, t2
+; RV32I-NEXT:    or t0, t0, a5
+; RV32I-NEXT:  .LBB5_3:
+; RV32I-NEXT:    addi a5, zero, 32
+; RV32I-NEXT:    sub t4, a5, t1
+; RV32I-NEXT:    addi a5, zero, 64
+; RV32I-NEXT:    sub t2, a5, t1
+; RV32I-NEXT:    bltz t4, .LBB5_5
+; RV32I-NEXT:  # %bb.4:
+; RV32I-NEXT:    mv t3, zero
+; RV32I-NEXT:    bnez t1, .LBB5_6
+; RV32I-NEXT:    j .LBB5_7
+; RV32I-NEXT:  .LBB5_5:
+; RV32I-NEXT:    srl t3, a3, t2
+; RV32I-NEXT:    beqz t1, .LBB5_7
+; RV32I-NEXT:  .LBB5_6:
+; RV32I-NEXT:    or a1, t0, t3
+; RV32I-NEXT:  .LBB5_7:
+; RV32I-NEXT:    bltz t4, .LBB5_10
+; RV32I-NEXT:  # %bb.8:
+; RV32I-NEXT:    srl a2, a3, t4
+; RV32I-NEXT:    bgez a6, .LBB5_11
+; RV32I-NEXT:  .LBB5_9:
+; RV32I-NEXT:    sll a3, a0, a4
+; RV32I-NEXT:    bnez t1, .LBB5_12
+; RV32I-NEXT:    j .LBB5_13
+; RV32I-NEXT:  .LBB5_10:
+; RV32I-NEXT:    srl a2, a2, t2
+; RV32I-NEXT:    sub a5, a7, t2
+; RV32I-NEXT:    slli a3, a3, 1
+; RV32I-NEXT:    sll a3, a3, a5
+; RV32I-NEXT:    or a2, a2, a3
+; RV32I-NEXT:    bltz a6, .LBB5_9
+; RV32I-NEXT:  .LBB5_11:
+; RV32I-NEXT:    mv a3, zero
+; RV32I-NEXT:    beqz t1, .LBB5_13
+; RV32I-NEXT:  .LBB5_12:
+; RV32I-NEXT:    or a0, a3, a2
+; RV32I-NEXT:  .LBB5_13:
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshl_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi t1, a4, 63
+; RV32IB-NEXT:    addi a6, t1, -32
+; RV32IB-NEXT:    addi a7, zero, 31
+; RV32IB-NEXT:    bltz a6, .LBB5_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    sll t0, a0, a6
+; RV32IB-NEXT:    j .LBB5_3
+; RV32IB-NEXT:  .LBB5_2:
+; RV32IB-NEXT:    sll t0, a1, a4
+; RV32IB-NEXT:    sub t2, a7, t1
+; RV32IB-NEXT:    srli a5, a0, 1
+; RV32IB-NEXT:    srl a5, a5, t2
+; RV32IB-NEXT:    or t0, t0, a5
+; RV32IB-NEXT:  .LBB5_3:
+; RV32IB-NEXT:    addi a5, zero, 32
+; RV32IB-NEXT:    sub t4, a5, t1
+; RV32IB-NEXT:    addi a5, zero, 64
+; RV32IB-NEXT:    sub t2, a5, t1
+; RV32IB-NEXT:    bltz t4, .LBB5_7
+; RV32IB-NEXT:  # %bb.4:
+; RV32IB-NEXT:    mv t3, zero
+; RV32IB-NEXT:    or t0, t0, t3
+; RV32IB-NEXT:    bgez t4, .LBB5_8
+; RV32IB-NEXT:  .LBB5_5:
+; RV32IB-NEXT:    srl a2, a2, t2
+; RV32IB-NEXT:    sub a5, a7, t2
+; RV32IB-NEXT:    slli a3, a3, 1
+; RV32IB-NEXT:    sll a3, a3, a5
+; RV32IB-NEXT:    or a2, a2, a3
+; RV32IB-NEXT:    cmov a1, t1, t0, a1
+; RV32IB-NEXT:    bgez a6, .LBB5_9
+; RV32IB-NEXT:  .LBB5_6:
+; RV32IB-NEXT:    sll a3, a0, a4
+; RV32IB-NEXT:    j .LBB5_10
+; RV32IB-NEXT:  .LBB5_7:
+; RV32IB-NEXT:    srl t3, a3, t2
+; RV32IB-NEXT:    or t0, t0, t3
+; RV32IB-NEXT:    bltz t4, .LBB5_5
+; RV32IB-NEXT:  .LBB5_8:
+; RV32IB-NEXT:    srl a2, a3, t4
+; RV32IB-NEXT:    cmov a1, t1, t0, a1
+; RV32IB-NEXT:    bltz a6, .LBB5_6
+; RV32IB-NEXT:  .LBB5_9:
+; RV32IB-NEXT:    mv a3, zero
+; RV32IB-NEXT:  .LBB5_10:
+; RV32IB-NEXT:    or a2, a3, a2
+; RV32IB-NEXT:    cmov a0, t1, a2, a0
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshl_i64:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    andi t1, a4, 63
+; RV32IBT-NEXT:    addi a6, t1, -32
+; RV32IBT-NEXT:    addi a7, zero, 31
+; RV32IBT-NEXT:    bltz a6, .LBB5_2
+; RV32IBT-NEXT:  # %bb.1:
+; RV32IBT-NEXT:    sll t0, a0, a6
+; RV32IBT-NEXT:    j .LBB5_3
+; RV32IBT-NEXT:  .LBB5_2:
+; RV32IBT-NEXT:    sll t0, a1, a4
+; RV32IBT-NEXT:    sub t2, a7, t1
+; RV32IBT-NEXT:    srli a5, a0, 1
+; RV32IBT-NEXT:    srl a5, a5, t2
+; RV32IBT-NEXT:    or t0, t0, a5
+; RV32IBT-NEXT:  .LBB5_3:
+; RV32IBT-NEXT:    addi a5, zero, 32
+; RV32IBT-NEXT:    sub t4, a5, t1
+; RV32IBT-NEXT:    addi a5, zero, 64
+; RV32IBT-NEXT:    sub t2, a5, t1
+; RV32IBT-NEXT:    bltz t4, .LBB5_7
+; RV32IBT-NEXT:  # %bb.4:
+; RV32IBT-NEXT:    mv t3, zero
+; RV32IBT-NEXT:    or t0, t0, t3
+; RV32IBT-NEXT:    bgez t4, .LBB5_8
+; RV32IBT-NEXT:  .LBB5_5:
+; RV32IBT-NEXT:    srl a2, a2, t2
+; RV32IBT-NEXT:    sub a5, a7, t2
+; RV32IBT-NEXT:    slli a3, a3, 1
+; RV32IBT-NEXT:    sll a3, a3, a5
+; RV32IBT-NEXT:    or a2, a2, a3
+; RV32IBT-NEXT:    cmov a1, t1, t0, a1
+; RV32IBT-NEXT:    bgez a6, .LBB5_9
+; RV32IBT-NEXT:  .LBB5_6:
+; RV32IBT-NEXT:    sll a3, a0, a4
+; RV32IBT-NEXT:    j .LBB5_10
+; RV32IBT-NEXT:  .LBB5_7:
+; RV32IBT-NEXT:    srl t3, a3, t2
+; RV32IBT-NEXT:    or t0, t0, t3
+; RV32IBT-NEXT:    bltz t4, .LBB5_5
+; RV32IBT-NEXT:  .LBB5_8:
+; RV32IBT-NEXT:    srl a2, a3, t4
+; RV32IBT-NEXT:    cmov a1, t1, t0, a1
+; RV32IBT-NEXT:    bltz a6, .LBB5_6
+; RV32IBT-NEXT:  .LBB5_9:
+; RV32IBT-NEXT:    mv a3, zero
+; RV32IBT-NEXT:  .LBB5_10:
+; RV32IBT-NEXT:    or a2, a3, a2
+; RV32IBT-NEXT:    cmov a0, t1, a2, a0
+; RV32IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %1
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define i32 @fshr_i32(i32 %a, i32 %b, i32 %c) nounwind {
+; RV32I-LABEL: fshr_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    andi a3, a2, 31
+; RV32I-NEXT:    beqz a3, .LBB6_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srl a1, a1, a2
+; RV32I-NEXT:    addi a2, zero, 32
+; RV32I-NEXT:    sub a2, a2, a3
+; RV32I-NEXT:    sll a0, a0, a2
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:  .LBB6_2:
+; RV32I-NEXT:    mv a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshr_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    fsr a0, a0, a2, a1
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshr_i32:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    fsr a0, a0, a2, a1
+; RV32IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %1
+}
+
+; As we are not matching directly i64 code patterns on RV32 some i64 patterns
+; don't have yet an efficient pattern-matching with bit manipulation
+; instructions on RV32.
+; This test is presented here in case future expansions of the experimental-b
+; extension introduce instructions that can match more efficiently this pattern.
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV32I-LABEL: fshr_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    mv t1, a3
+; RV32I-NEXT:    mv a6, a2
+; RV32I-NEXT:    andi a5, a4, 63
+; RV32I-NEXT:    addi t2, a5, -32
+; RV32I-NEXT:    addi a7, zero, 31
+; RV32I-NEXT:    bltz t2, .LBB7_2
+; RV32I-NEXT:  # %bb.1:
+; RV32I-NEXT:    srl t0, t1, t2
+; RV32I-NEXT:    j .LBB7_3
+; RV32I-NEXT:  .LBB7_2:
+; RV32I-NEXT:    srl t0, a6, a4
+; RV32I-NEXT:    sub a3, a7, a5
+; RV32I-NEXT:    slli a2, t1, 1
+; RV32I-NEXT:    sll a2, a2, a3
+; RV32I-NEXT:    or t0, t0, a2
+; RV32I-NEXT:  .LBB7_3:
+; RV32I-NEXT:    addi a2, zero, 32
+; RV32I-NEXT:    sub a3, a2, a5
+; RV32I-NEXT:    addi a2, zero, 64
+; RV32I-NEXT:    sub a2, a2, a5
+; RV32I-NEXT:    bltz a3, .LBB7_5
+; RV32I-NEXT:  # %bb.4:
+; RV32I-NEXT:    mv t3, zero
+; RV32I-NEXT:    bnez a5, .LBB7_6
+; RV32I-NEXT:    j .LBB7_7
+; RV32I-NEXT:  .LBB7_5:
+; RV32I-NEXT:    sll t3, a0, a2
+; RV32I-NEXT:    beqz a5, .LBB7_7
+; RV32I-NEXT:  .LBB7_6:
+; RV32I-NEXT:    or a6, t3, t0
+; RV32I-NEXT:  .LBB7_7:
+; RV32I-NEXT:    bltz a3, .LBB7_10
+; RV32I-NEXT:  # %bb.8:
+; RV32I-NEXT:    sll a0, a0, a3
+; RV32I-NEXT:    bgez t2, .LBB7_11
+; RV32I-NEXT:  .LBB7_9:
+; RV32I-NEXT:    srl a1, t1, a4
+; RV32I-NEXT:    bnez a5, .LBB7_12
+; RV32I-NEXT:    j .LBB7_13
+; RV32I-NEXT:  .LBB7_10:
+; RV32I-NEXT:    sll a1, a1, a2
+; RV32I-NEXT:    sub a2, a7, a2
+; RV32I-NEXT:    srli a0, a0, 1
+; RV32I-NEXT:    srl a0, a0, a2
+; RV32I-NEXT:    or a0, a1, a0
+; RV32I-NEXT:    bltz t2, .LBB7_9
+; RV32I-NEXT:  .LBB7_11:
+; RV32I-NEXT:    mv a1, zero
+; RV32I-NEXT:    beqz a5, .LBB7_13
+; RV32I-NEXT:  .LBB7_12:
+; RV32I-NEXT:    or t1, a0, a1
+; RV32I-NEXT:  .LBB7_13:
+; RV32I-NEXT:    mv a0, a6
+; RV32I-NEXT:    mv a1, t1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshr_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    andi t1, a4, 63
+; RV32IB-NEXT:    addi a6, t1, -32
+; RV32IB-NEXT:    addi a7, zero, 31
+; RV32IB-NEXT:    bltz a6, .LBB7_2
+; RV32IB-NEXT:  # %bb.1:
+; RV32IB-NEXT:    srl t0, a3, a6
+; RV32IB-NEXT:    j .LBB7_3
+; RV32IB-NEXT:  .LBB7_2:
+; RV32IB-NEXT:    srl t0, a2, a4
+; RV32IB-NEXT:    sub t2, a7, t1
+; RV32IB-NEXT:    slli a5, a3, 1
+; RV32IB-NEXT:    sll a5, a5, t2
+; RV32IB-NEXT:    or t0, t0, a5
+; RV32IB-NEXT:  .LBB7_3:
+; RV32IB-NEXT:    addi a5, zero, 32
+; RV32IB-NEXT:    sub t4, a5, t1
+; RV32IB-NEXT:    addi a5, zero, 64
+; RV32IB-NEXT:    sub t2, a5, t1
+; RV32IB-NEXT:    bltz t4, .LBB7_7
+; RV32IB-NEXT:  # %bb.4:
+; RV32IB-NEXT:    mv t3, zero
+; RV32IB-NEXT:    or t0, t3, t0
+; RV32IB-NEXT:    bgez t4, .LBB7_8
+; RV32IB-NEXT:  .LBB7_5:
+; RV32IB-NEXT:    sll a1, a1, t2
+; RV32IB-NEXT:    sub a5, a7, t2
+; RV32IB-NEXT:    srli a0, a0, 1
+; RV32IB-NEXT:    srl a0, a0, a5
+; RV32IB-NEXT:    or a1, a1, a0
+; RV32IB-NEXT:    cmov a0, t1, t0, a2
+; RV32IB-NEXT:    bgez a6, .LBB7_9
+; RV32IB-NEXT:  .LBB7_6:
+; RV32IB-NEXT:    srl a2, a3, a4
+; RV32IB-NEXT:    j .LBB7_10
+; RV32IB-NEXT:  .LBB7_7:
+; RV32IB-NEXT:    sll t3, a0, t2
+; RV32IB-NEXT:    or t0, t3, t0
+; RV32IB-NEXT:    bltz t4, .LBB7_5
+; RV32IB-NEXT:  .LBB7_8:
+; RV32IB-NEXT:    sll a1, a0, t4
+; RV32IB-NEXT:    cmov a0, t1, t0, a2
+; RV32IB-NEXT:    bltz a6, .LBB7_6
+; RV32IB-NEXT:  .LBB7_9:
+; RV32IB-NEXT:    mv a2, zero
+; RV32IB-NEXT:  .LBB7_10:
+; RV32IB-NEXT:    or a1, a1, a2
+; RV32IB-NEXT:    cmov a1, t1, a1, a3
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshr_i64:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    andi t1, a4, 63
+; RV32IBT-NEXT:    addi a6, t1, -32
+; RV32IBT-NEXT:    addi a7, zero, 31
+; RV32IBT-NEXT:    bltz a6, .LBB7_2
+; RV32IBT-NEXT:  # %bb.1:
+; RV32IBT-NEXT:    srl t0, a3, a6
+; RV32IBT-NEXT:    j .LBB7_3
+; RV32IBT-NEXT:  .LBB7_2:
+; RV32IBT-NEXT:    srl t0, a2, a4
+; RV32IBT-NEXT:    sub t2, a7, t1
+; RV32IBT-NEXT:    slli a5, a3, 1
+; RV32IBT-NEXT:    sll a5, a5, t2
+; RV32IBT-NEXT:    or t0, t0, a5
+; RV32IBT-NEXT:  .LBB7_3:
+; RV32IBT-NEXT:    addi a5, zero, 32
+; RV32IBT-NEXT:    sub t4, a5, t1
+; RV32IBT-NEXT:    addi a5, zero, 64
+; RV32IBT-NEXT:    sub t2, a5, t1
+; RV32IBT-NEXT:    bltz t4, .LBB7_7
+; RV32IBT-NEXT:  # %bb.4:
+; RV32IBT-NEXT:    mv t3, zero
+; RV32IBT-NEXT:    or t0, t3, t0
+; RV32IBT-NEXT:    bgez t4, .LBB7_8
+; RV32IBT-NEXT:  .LBB7_5:
+; RV32IBT-NEXT:    sll a1, a1, t2
+; RV32IBT-NEXT:    sub a5, a7, t2
+; RV32IBT-NEXT:    srli a0, a0, 1
+; RV32IBT-NEXT:    srl a0, a0, a5
+; RV32IBT-NEXT:    or a1, a1, a0
+; RV32IBT-NEXT:    cmov a0, t1, t0, a2
+; RV32IBT-NEXT:    bgez a6, .LBB7_9
+; RV32IBT-NEXT:  .LBB7_6:
+; RV32IBT-NEXT:    srl a2, a3, a4
+; RV32IBT-NEXT:    j .LBB7_10
+; RV32IBT-NEXT:  .LBB7_7:
+; RV32IBT-NEXT:    sll t3, a0, t2
+; RV32IBT-NEXT:    or t0, t3, t0
+; RV32IBT-NEXT:    bltz t4, .LBB7_5
+; RV32IBT-NEXT:  .LBB7_8:
+; RV32IBT-NEXT:    sll a1, a0, t4
+; RV32IBT-NEXT:    cmov a0, t1, t0, a2
+; RV32IBT-NEXT:    bltz a6, .LBB7_6
+; RV32IBT-NEXT:  .LBB7_9:
+; RV32IBT-NEXT:    mv a2, zero
+; RV32IBT-NEXT:  .LBB7_10:
+; RV32IBT-NEXT:    or a1, a1, a2
+; RV32IBT-NEXT:    cmov a1, t1, a1, a3
+; RV32IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %1
+}
+
+define i32 @fshri_i32(i32 %a, i32 %b) nounwind {
+; RV32I-LABEL: fshri_i32:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    srli a1, a1, 5
+; RV32I-NEXT:    slli a0, a0, 27
+; RV32I-NEXT:    or a0, a0, a1
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshri_i32:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    fsri a0, a0, a1, 5
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshri_i32:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    fsri a0, a0, a1, 5
+; RV32IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5)
+  ret i32 %1
+}
+
+define i64 @fshri_i64(i64 %a, i64 %b) nounwind {
+; RV32I-LABEL: fshri_i64:
+; RV32I:       # %bb.0:
+; RV32I-NEXT:    slli a1, a3, 27
+; RV32I-NEXT:    srli a2, a2, 5
+; RV32I-NEXT:    or a2, a2, a1
+; RV32I-NEXT:    srli a1, a3, 5
+; RV32I-NEXT:    slli a0, a0, 27
+; RV32I-NEXT:    or a1, a0, a1
+; RV32I-NEXT:    mv a0, a2
+; RV32I-NEXT:    ret
+;
+; RV32IB-LABEL: fshri_i64:
+; RV32IB:       # %bb.0:
+; RV32IB-NEXT:    addi a1, zero, 27
+; RV32IB-NEXT:    fsl a2, a3, a1, a2
+; RV32IB-NEXT:    fsl a1, a0, a1, a3
+; RV32IB-NEXT:    mv a0, a2
+; RV32IB-NEXT:    ret
+;
+; RV32IBT-LABEL: fshri_i64:
+; RV32IBT:       # %bb.0:
+; RV32IBT-NEXT:    addi a1, zero, 27
+; RV32IBT-NEXT:    fsl a2, a3, a1, a2
+; RV32IBT-NEXT:    fsl a1, a0, a1, a3
+; RV32IBT-NEXT:    mv a0, a2
+; RV32IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 5)
+  ret i64 %1
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64Zbt.ll b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
new file mode 100644
index 0000000000000..22e25fadbd910
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64Zbt.ll
@@ -0,0 +1,266 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64I
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-b -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IB
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-zbt -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64IBT
+
+define signext i32 @cmix_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
+; RV64I-LABEL: cmix_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: cmix_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    cmix a0, a1, a0, a2
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: cmix_i32:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    cmix a0, a1, a0, a2
+; RV64IBT-NEXT:    ret
+  %and = and i32 %b, %a
+  %neg = xor i32 %b, -1
+  %and1 = and i32 %neg, %c
+  %or = or i32 %and1, %and
+  ret i32 %or
+}
+
+define i64 @cmix_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV64I-LABEL: cmix_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    and a0, a1, a0
+; RV64I-NEXT:    not a1, a1
+; RV64I-NEXT:    and a1, a1, a2
+; RV64I-NEXT:    or a0, a1, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: cmix_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    cmix a0, a1, a0, a2
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: cmix_i64:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    cmix a0, a1, a0, a2
+; RV64IBT-NEXT:    ret
+  %and = and i64 %b, %a
+  %neg = xor i64 %b, -1
+  %and1 = and i64 %neg, %c
+  %or = or i64 %and1, %and
+  ret i64 %or
+}
+
+define signext i32 @cmov_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
+; RV64I-LABEL: cmov_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a1, .LBB2_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:  .LBB2_2:
+; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: cmov_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    cmov a0, a1, a0, a2
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: cmov_i32:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    cmov a0, a1, a0, a2
+; RV64IBT-NEXT:    ret
+  %tobool.not = icmp eq i32 %b, 0
+  %cond = select i1 %tobool.not, i32 %c, i32 %a
+  ret i32 %cond
+}
+
+define i64 @cmov_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV64I-LABEL: cmov_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    beqz a1, .LBB3_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    mv a2, a0
+; RV64I-NEXT:  .LBB3_2:
+; RV64I-NEXT:    mv a0, a2
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: cmov_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    cmov a0, a1, a0, a2
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: cmov_i64:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    cmov a0, a1, a0, a2
+; RV64IBT-NEXT:    ret
+  %tobool.not = icmp eq i64 %b, 0
+  %cond = select i1 %tobool.not, i64 %c, i64 %a
+  ret i64 %cond
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+
+define signext i32 @fshl_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
+; RV64I-LABEL: fshl_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a2, 31
+; RV64I-NEXT:    beqz a3, .LBB4_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    addi a4, zero, 32
+; RV64I-NEXT:    sub a2, a4, a2
+; RV64I-NEXT:    srlw a1, a1, a2
+; RV64I-NEXT:    sllw a0, a0, a3
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:  .LBB4_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshl_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fslw a0, a0, a2, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshl_i32:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fslw a0, a0, a2, a1
+; RV64IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshl.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+
+define i64 @fshl_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV64I-LABEL: fshl_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a2, 63
+; RV64I-NEXT:    beqz a3, .LBB5_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    sll a0, a0, a2
+; RV64I-NEXT:    addi a2, zero, 64
+; RV64I-NEXT:    sub a2, a2, a3
+; RV64I-NEXT:    srl a1, a1, a2
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:  .LBB5_2:
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshl_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsl a0, a0, a2, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshl_i64:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fsl a0, a0, a2, a1
+; RV64IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshl.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %1
+}
+
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+
+define signext i32 @fshr_i32(i32 signext %a, i32 signext %b, i32 signext %c) nounwind {
+; RV64I-LABEL: fshr_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a2, 31
+; RV64I-NEXT:    beqz a3, .LBB6_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    srlw a1, a1, a3
+; RV64I-NEXT:    addi a3, zero, 32
+; RV64I-NEXT:    sub a2, a3, a2
+; RV64I-NEXT:    sllw a0, a0, a2
+; RV64I-NEXT:    or a1, a0, a1
+; RV64I-NEXT:  .LBB6_2:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshr_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsrw a0, a0, a2, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshr_i32:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fsrw a0, a0, a2, a1
+; RV64IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 %c)
+  ret i32 %1
+}
+
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define i64 @fshr_i64(i64 %a, i64 %b, i64 %c) nounwind {
+; RV64I-LABEL: fshr_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    andi a3, a2, 63
+; RV64I-NEXT:    beqz a3, .LBB7_2
+; RV64I-NEXT:  # %bb.1:
+; RV64I-NEXT:    srl a1, a1, a2
+; RV64I-NEXT:    addi a2, zero, 64
+; RV64I-NEXT:    sub a2, a2, a3
+; RV64I-NEXT:    sll a0, a0, a2
+; RV64I-NEXT:    or a1, a0, a1
+; RV64I-NEXT:  .LBB7_2:
+; RV64I-NEXT:    mv a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshr_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsr a0, a0, a2, a1
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshr_i64:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fsr a0, a0, a2, a1
+; RV64IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 %c)
+  ret i64 %1
+}
+
+define signext i32 @fshri_i32(i32 signext %a, i32 signext %b) nounwind {
+; RV64I-LABEL: fshri_i32:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srliw a1, a1, 5
+; RV64I-NEXT:    slli a0, a0, 27
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    sext.w a0, a0
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshri_i32:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsriw a0, a0, a1, 5
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshri_i32:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fsriw a0, a0, a1, 5
+; RV64IBT-NEXT:    ret
+  %1 = tail call i32 @llvm.fshr.i32(i32 %a, i32 %b, i32 5)
+  ret i32 %1
+}
+
+define i64 @fshri_i64(i64 %a, i64 %b) nounwind {
+; RV64I-LABEL: fshri_i64:
+; RV64I:       # %bb.0:
+; RV64I-NEXT:    srli a1, a1, 5
+; RV64I-NEXT:    slli a0, a0, 59
+; RV64I-NEXT:    or a0, a0, a1
+; RV64I-NEXT:    ret
+;
+; RV64IB-LABEL: fshri_i64:
+; RV64IB:       # %bb.0:
+; RV64IB-NEXT:    fsri a0, a0, a1, 5
+; RV64IB-NEXT:    ret
+;
+; RV64IBT-LABEL: fshri_i64:
+; RV64IBT:       # %bb.0:
+; RV64IBT-NEXT:    fsri a0, a0, a1, 5
+; RV64IBT-NEXT:    ret
+  %1 = tail call i64 @llvm.fshr.i64(i64 %a, i64 %b, i64 5)
+  ret i64 %1
+}

From 3a6c2a61c64277a51a9dda22eece8072c0590fa4 Mon Sep 17 00:00:00 2001
From: YunQiang Su <wzssyqa@gmail.com>
Date: Wed, 15 Jul 2020 12:11:36 +0300
Subject: [PATCH 375/771] [mips] Rename FeatureMadd4 to FeatureNoMadd4. NFC

`FeatureMadd4` is used to disable `madd4`, and the corresponding feature
option is `(+-)nomadd4`. Renaming to the `FeatureNoMadd4` makes its
purpose clear.

Patch by YunQiang Su.

Differential Revision: https://reviews.llvm.org/D83780
---
 llvm/lib/Target/Mips/Mips.td          | 2 +-
 llvm/lib/Target/Mips/MipsInstrInfo.td | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/Mips/Mips.td b/llvm/lib/Target/Mips/Mips.td
index 7fe750249c588..792960332bcc7 100644
--- a/llvm/lib/Target/Mips/Mips.td
+++ b/llvm/lib/Target/Mips/Mips.td
@@ -191,7 +191,7 @@ def FeatureUseTCCInDIV : SubtargetFeature<
                                "UseTCCInDIV", "false",
                                "Force the assembler to use trapping">;
 
-def FeatureMadd4
+def FeatureNoMadd4
     : SubtargetFeature<"nomadd4", "DisableMadd4", "true",
                        "Disable 4-operand madd.fmt and related instructions">;
 
diff --git a/llvm/lib/Target/Mips/MipsInstrInfo.td b/llvm/lib/Target/Mips/MipsInstrInfo.td
index a3b928870f3f6..089fed9ec0bf4 100644
--- a/llvm/lib/Target/Mips/MipsInstrInfo.td
+++ b/llvm/lib/Target/Mips/MipsInstrInfo.td
@@ -242,7 +242,7 @@ def HasEVA       :    Predicate<"Subtarget->hasEVA()">,
 def HasMSA : Predicate<"Subtarget->hasMSA()">,
              AssemblerPredicate<(all_of FeatureMSA)>;
 def HasMadd4 : Predicate<"!Subtarget->disableMadd4()">,
-               AssemblerPredicate<(all_of (not FeatureMadd4))>;
+               AssemblerPredicate<(all_of (not FeatureNoMadd4))>;
 def HasMT  : Predicate<"Subtarget->hasMT()">,
              AssemblerPredicate<(all_of FeatureMT)>;
 def UseIndirectJumpsHazard : Predicate<"Subtarget->useIndirectJumpsHazard()">,

From 29aab9b5c748b28b231e2ca0f9b95453638ade1a Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Wed, 15 Jul 2020 13:53:44 +0200
Subject: [PATCH 376/771] [lldb] Use the basename of the Python test for the
 log name instead of the class name

Summary:

From what I know we already have the restriction that every test in the test
suite needs to have a unique file name as that's used for generating the unique
build directory for a test. It seems there is also a restriction that every test
case class in the test suite needs to have a unique name as that's used to
generate the unique log file name for the test run.

This changes the log file format to use the basename of the test file instead so
that we only have to keep worrying about the 'unique file name' restriction from
now on.

This came up because I started naming the test classes "TestCase" (as repeating
the file name in the test class seems like redudant information that just makes
renaming tests a pain).

Reviewers: labath, JDevlieghere

Reviewed By: labath

Subscribers: mgorny, abidh

Differential Revision: https://reviews.llvm.org/D83767
---
 lldb/test/API/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt
index 34f3522c8dfec..23b211e14cfa5 100644
--- a/lldb/test/API/CMakeLists.txt
+++ b/lldb/test/API/CMakeLists.txt
@@ -38,7 +38,7 @@ set(LLDB_TEST_USER_ARGS
 set(LLDB_TEST_COMMON_ARGS
   -s
   ${CMAKE_BINARY_DIR}/lldb-test-traces
-  -S nm
+  -S fm
   -u CXXFLAGS
   -u CFLAGS
   )

From 10fd550d308d5dbcf7a3068f1f76d5f0f1a56661 Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Wed, 15 Jul 2020 13:55:32 +0200
Subject: [PATCH 377/771] [lldb] Make expect_expr fall back to the dummy target
 if no target is selected

Summary:

Currently expect_expr will not run the expression if no target is selected. This
patch changes this behavior so that expect_expr will instead fall back to the
dummy target similar to what the `expression` command is doing. This way we
don't have to compile an empty executable to be able to use `expect_expr` (which
is a waste of resources for tests that just test generic type system features).

As a test I modernized the TestTypeOfDeclTypeExpr into a Python test +
expect_expr (as it relied on the dummy target fallback of the expression
command).

Reviewers: labath, JDevlieghere

Reviewed By: labath

Subscribers: abidh

Differential Revision: https://reviews.llvm.org/D83388
---
 lldb/packages/Python/lldbsuite/test/lldbtest.py    |  7 ++++++-
 .../API/lang/cpp/typeof/TestTypeOfDeclTypeExpr.py  | 14 ++++++++++++++
 lldb/test/Shell/Expr/TestTypeOfDeclTypeExpr.test   | 13 -------------
 3 files changed, 20 insertions(+), 14 deletions(-)
 create mode 100644 lldb/test/API/lang/cpp/typeof/TestTypeOfDeclTypeExpr.py
 delete mode 100644 lldb/test/Shell/Expr/TestTypeOfDeclTypeExpr.test

diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py
index 9c32bdb42e283..280e02f56f287 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbtest.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py
@@ -2456,7 +2456,12 @@ def expect_expr(
             options.SetLanguage(frame.GuessLanguage())
             eval_result = self.frame().EvaluateExpression(expr, options)
         else:
-            eval_result = self.target().EvaluateExpression(expr, options)
+            target = self.target()
+            # If there is no selected target, run the expression in the dummy
+            # target.
+            if not target.IsValid():
+                target = self.dbg.GetDummyTarget()
+            eval_result = target.EvaluateExpression(expr, options)
 
         self.assertSuccess(eval_result.GetError())
 
diff --git a/lldb/test/API/lang/cpp/typeof/TestTypeOfDeclTypeExpr.py b/lldb/test/API/lang/cpp/typeof/TestTypeOfDeclTypeExpr.py
new file mode 100644
index 0000000000000..9c5289c4fa797
--- /dev/null
+++ b/lldb/test/API/lang/cpp/typeof/TestTypeOfDeclTypeExpr.py
@@ -0,0 +1,14 @@
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+class TestCase(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    @no_debug_info_test
+    def test(self):
+        self.expect_expr("int i; __typeof__(i) j = 1; j", result_type="typeof (i)", result_value="1")
+        self.expect_expr("int i; typeof(i) j = 1; j", result_type="typeof (i)", result_value="1")
+        self.expect_expr("int i; decltype(i) j = 1; j", result_type="decltype(i)", result_value="1")
diff --git a/lldb/test/Shell/Expr/TestTypeOfDeclTypeExpr.test b/lldb/test/Shell/Expr/TestTypeOfDeclTypeExpr.test
deleted file mode 100644
index c156ae556a714..0000000000000
--- a/lldb/test/Shell/Expr/TestTypeOfDeclTypeExpr.test
+++ /dev/null
@@ -1,13 +0,0 @@
-# RUN: %lldb -b -s %s | FileCheck %s
-
-expression int i; __typeof__(i) j = 1; j
-# CHECK: (lldb) expression int i; __typeof__(i) j = 1; j
-# CHECK-NEXT: (typeof (i)) {{.*}} = 1
-
-expression int i; typeof(i) j = 1; j
-# CHECK: (lldb) expression int i; typeof(i) j = 1; j
-# CHECK-NEXT: (typeof (i)) {{.*}} = 1
-
-expression int i; decltype(i) j = 1; j
-# CHECK: (lldb) expression int i; decltype(i) j = 1; j
-# CHECK-NEXT: (decltype(i)) {{.*}} = 1

From c11c78a1bd0b3275bf845604aae3c94e97acceed Mon Sep 17 00:00:00 2001
From: Kirill Bobyrev <kbobyrev@google.com>
Date: Wed, 15 Jul 2020 14:34:23 +0200
Subject: [PATCH 378/771] [clangd] Use llvm::errs() instead of outs() for
 errors

Summary: errs() is more appropriate for error messages in dexp and clangd-index-server.

Reviewers: sammccall

Reviewed By: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83827
---
 .../clangd/index/dex/dexp/Dexp.cpp             | 18 +++++++++---------
 .../clangd/index/remote/server/Server.cpp      |  4 ++--
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp b/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp
index 6fc844c189315..80d87aa3f9f51 100644
--- a/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp
+++ b/clang-tools-extra/clangd/index/dex/dexp/Dexp.cpp
@@ -181,7 +181,7 @@ class Lookup : public Command {
 
   void run() override {
     if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
-      llvm::outs()
+      llvm::errs()
           << "Missing required argument: please provide id or -name.\n";
       return;
     }
@@ -189,7 +189,7 @@ class Lookup : public Command {
     if (ID.getNumOccurrences()) {
       auto SID = SymbolID::fromStr(ID);
       if (!SID) {
-        llvm::outs() << llvm::toString(SID.takeError()) << "\n";
+        llvm::errs() << llvm::toString(SID.takeError()) << "\n";
         return;
       }
       IDs.push_back(*SID);
@@ -205,7 +205,7 @@ class Lookup : public Command {
       llvm::outs() << toYAML(Sym);
     });
     if (!FoundSymbol)
-      llvm::outs() << "not found\n";
+      llvm::errs() << "not found\n";
   }
 };
 
@@ -228,7 +228,7 @@ class Refs : public Command {
 
   void run() override {
     if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
-      llvm::outs()
+      llvm::errs()
           << "Missing required argument: please provide id or -name.\n";
       return;
     }
@@ -236,14 +236,14 @@ class Refs : public Command {
     if (ID.getNumOccurrences()) {
       auto SID = SymbolID::fromStr(ID);
       if (!SID) {
-        llvm::outs() << llvm::toString(SID.takeError()) << "\n";
+        llvm::errs() << llvm::toString(SID.takeError()) << "\n";
         return;
       }
       IDs.push_back(*SID);
     } else {
       IDs = getSymbolIDsFromIndex(Name, Index);
       if (IDs.size() > 1) {
-        llvm::outs() << llvm::formatv(
+        llvm::errs() << llvm::formatv(
             "The name {0} is ambiguous, found {1} different "
             "symbols. Please use id flag to disambiguate.\n",
             Name, IDs.size());
@@ -256,7 +256,7 @@ class Refs : public Command {
     Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
       auto U = URI::parse(R.Location.FileURI);
       if (!U) {
-        llvm::outs() << U.takeError();
+        llvm::errs() << U.takeError();
         return;
       }
       if (RegexFilter.match(U->body()))
@@ -358,7 +358,7 @@ bool runCommand(std::string Request, const SymbolIndex &Index) {
       return Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description,
                                                Index);
   }
-  llvm::outs() << "Unknown command. Try 'help'.\n";
+  llvm::errs() << "Unknown command. Try 'help'.\n";
   return false;
 }
 
@@ -380,7 +380,7 @@ int main(int argc, const char *argv[]) {
              [&]() { Index = openIndex(IndexLocation); });
 
   if (!Index) {
-    llvm::outs() << "Failed to open the index.\n";
+    llvm::errs() << "Failed to open the index.\n";
     return -1;
   }
 
diff --git a/clang-tools-extra/clangd/index/remote/server/Server.cpp b/clang-tools-extra/clangd/index/remote/server/Server.cpp
index 718d623a48456..fecd72806cbc0 100644
--- a/clang-tools-extra/clangd/index/remote/server/Server.cpp
+++ b/clang-tools-extra/clangd/index/remote/server/Server.cpp
@@ -141,14 +141,14 @@ int main(int argc, char *argv[]) {
   llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
 
   if (!llvm::sys::path::is_absolute(IndexRoot)) {
-    llvm::outs() << "Index root should be an absolute path.\n";
+    llvm::errs() << "Index root should be an absolute path.\n";
     return -1;
   }
 
   std::unique_ptr<clang::clangd::SymbolIndex> Index = openIndex(IndexPath);
 
   if (!Index) {
-    llvm::outs() << "Failed to open the index.\n";
+    llvm::errs() << "Failed to open the index.\n";
     return -1;
   }
 

From f819d257982e6c3f1fb57de4252b46bdfaa9415a Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 15 Jul 2020 15:42:53 +0200
Subject: [PATCH 379/771] [lldb] Delete useless assertion

It served a puprose while we were using the test name to provide a name
for the created file. Now that the files are created in memory, we don't
need that.
---
 lldb/unittests/TestingSupport/TestUtilities.cpp | 2 --
 1 file changed, 2 deletions(-)

diff --git a/lldb/unittests/TestingSupport/TestUtilities.cpp b/lldb/unittests/TestingSupport/TestUtilities.cpp
index 4d369bd0968a1..34f49e5862a7a 100644
--- a/lldb/unittests/TestingSupport/TestUtilities.cpp
+++ b/lldb/unittests/TestingSupport/TestUtilities.cpp
@@ -27,8 +27,6 @@ std::string lldb_private::GetInputFilePath(const llvm::Twine &name) {
 }
 
 llvm::Expected<TestFile> TestFile::fromYaml(llvm::StringRef Yaml) {
-  assert(testing::UnitTest::GetInstance()->current_test_info());
-
   std::string Buffer;
   llvm::raw_string_ostream OS(Buffer);
   llvm::yaml::Input YIn(Yaml);

From 313fca6520b43d95abb73e7c78a252a60ee4cf48 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 15 Jul 2020 15:42:00 +0200
Subject: [PATCH 380/771] [lldb/test] Remove JOIN_CMD from Makefile.rules

It's possible to achieve the same effect by providing multi-step recipe
instead of a single-step recipe where the step happens to contain
multiple commands.
---
 .../Python/lldbsuite/test/make/Makefile.rules | 34 +++++++++----------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
index 5316c51899c7a..b9a6937650d05 100644
--- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
+++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules
@@ -82,11 +82,9 @@ endif
 # we strictly required double-quotes
 #----------------------------------------------------------------------
 ifeq "$(HOST_OS)" "Windows_NT"
-	JOIN_CMD = &
 	QUOTE = "
 	FIXUP_SYNTAX_HIGHLIGHTING_IN_MY_EDITOR = "
 else
-	JOIN_CMD = ;
 	QUOTE = '
 	FIXUP_SYNTAX_HIGHLIGHTING_IN_MY_EDITOR = '
 endif
@@ -729,28 +727,28 @@ endif
 # and the -MM option will list all non-system dependencies.
 #----------------------------------------------------------------------
 %.d: %.c
-	@rm -f $@ $(JOIN_CMD) \
-	$(CC) -M $(CFLAGS) $< > $@.tmp && \
-	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \
-	rm -f $@.tmp
+	@rm -f $@
+	@$(CC) -M $(CFLAGS) $< > $@.tmp && \
+	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@
+	@rm -f $@.tmp
 
 %.d: %.cpp
-	@rm -f $@ $(JOIN_CMD) \
-	$(CXX) -M $(CXXFLAGS) $< > $@.tmp && \
-	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \
-	rm -f $@.tmp
+	@rm -f $@
+	@$(CXX) -M $(CXXFLAGS) $< > $@.tmp && \
+	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@
+	@rm -f $@.tmp
 
 %.d: %.m
-	@rm -f $@ $(JOIN_CMD) \
-	$(CC) -M $(CFLAGS) $< > $@.tmp && \
-	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \
-	rm -f $@.tmp
+	@rm -f $@
+	@$(CC) -M $(CFLAGS) $< > $@.tmp && \
+	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@
+	@rm -f $@.tmp
 
 %.d: %.mm
-	@rm -f $@ $(JOIN_CMD) \
-	$(CXX) -M $(CXXFLAGS) $< > $@.tmp && \
-	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@ $(JOIN_CMD) \
-	rm -f $@.tmp
+	@rm -f $@
+	@$(CXX) -M $(CXXFLAGS) $< > $@.tmp && \
+	sed $(QUOTE)s,\($*\)\.o[ :]*,\1.o $@ : ,g$(QUOTE) < $@.tmp > $@
+	@rm -f $@.tmp
 
 #----------------------------------------------------------------------
 # Include all of the makefiles for each source file so we don't have

From 37b96d51d0cfc82a64598aaae2a567fa77e44de9 Mon Sep 17 00:00:00 2001
From: Tim Northover <t.p.northover@gmail.com>
Date: Wed, 15 Jul 2020 09:49:49 +0100
Subject: [PATCH 381/771] CodeGenPrep: remove AssertingVH references before
 deleting dead instructions.

CodeGenPrepare keeps fairly close track of various instructions it's
seen, particularly GEPs, in maps and vectors. However, sometimes those
instructions become dead and get removed while it's still executing.
This triggers AssertingVH references to them in an asserts build and
could lead to miscompiles in a release build (I've only seen a later
segfault though).

So this patch adds a callback to
RecursivelyDeleteTriviallyDeadInstructions which can make sure the
instruction about to be deleted is removed from CodeGenPrepare's data
structures.
---
 llvm/include/llvm/Transforms/Utils/Local.h    | 12 +++--
 llvm/lib/CodeGen/CodeGenPrepare.cpp           | 52 +++++++++++++++----
 llvm/lib/Transforms/Utils/Local.cpp           | 18 +++++--
 .../Transforms/CodeGenPrepare/ARM/dead-gep.ll | 19 +++++++
 4 files changed, 84 insertions(+), 17 deletions(-)
 create mode 100644 llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll

diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index f55e336f1f6aa..3fab3bc21a078 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -160,7 +160,9 @@ bool wouldInstructionBeTriviallyDead(Instruction *I,
 /// recursively. Return true if any instructions were deleted.
 bool RecursivelyDeleteTriviallyDeadInstructions(
     Value *V, const TargetLibraryInfo *TLI = nullptr,
-    MemorySSAUpdater *MSSAU = nullptr);
+    MemorySSAUpdater *MSSAU = nullptr,
+    std::function<void(Value *)> AboutToDeleteCallback =
+        std::function<void(Value *)>());
 
 /// Delete all of the instructions in `DeadInsts`, and all other instructions
 /// that deleting these in turn causes to be trivially dead.
@@ -172,7 +174,9 @@ bool RecursivelyDeleteTriviallyDeadInstructions(
 /// empty afterward.
 void RecursivelyDeleteTriviallyDeadInstructions(
     SmallVectorImpl<WeakTrackingVH> &DeadInsts,
-    const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr);
+    const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr,
+    std::function<void(Value *)> AboutToDeleteCallback =
+        std::function<void(Value *)>());
 
 /// Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow
 /// instructions that are not trivially dead. These will be ignored.
@@ -180,7 +184,9 @@ void RecursivelyDeleteTriviallyDeadInstructions(
 /// were found and deleted.
 bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(
     SmallVectorImpl<WeakTrackingVH> &DeadInsts,
-    const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr);
+    const TargetLibraryInfo *TLI = nullptr, MemorySSAUpdater *MSSAU = nullptr,
+    std::function<void(Value *)> AboutToDeleteCallback =
+        std::function<void(Value *)>());
 
 /// If the specified value is an effectively dead PHI node, due to being a
 /// def-use chain of single-use nodes that either forms a cycle or is terminated
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index e8b8e6c93cf0d..465ba08dbfcdb 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -376,6 +376,7 @@ class TypePromotionTransaction;
       return *DT;
     }
 
+    void removeAllAssertingVHReferences(Value *V);
     bool eliminateFallThrough(Function &F);
     bool eliminateMostlyEmptyBlocks(Function &F);
     BasicBlock *findDestBlockOfMergeableEmptyBlock(BasicBlock *BB);
@@ -383,6 +384,7 @@ class TypePromotionTransaction;
     void eliminateMostlyEmptyBlock(BasicBlock *BB);
     bool isMergingEmptyBlockProfitable(BasicBlock *BB, BasicBlock *DestBB,
                                        bool isPreheader);
+    bool makeBitReverse(Instruction &I);
     bool optimizeBlock(BasicBlock &BB, bool &ModifiedDT);
     bool optimizeInst(Instruction *I, bool &ModifiedDT);
     bool optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
@@ -601,6 +603,33 @@ bool CodeGenPrepare::runOnFunction(Function &F) {
   return EverMadeChange;
 }
 
+/// An instruction is about to be deleted, so remove all references to it in our
+/// GEP-tracking data strcutures.
+void CodeGenPrepare::removeAllAssertingVHReferences(Value *V) {
+  LargeOffsetGEPMap.erase(V);
+  NewGEPBases.erase(V);
+
+  auto GEP = dyn_cast<GetElementPtrInst>(V);
+  if (!GEP)
+    return;
+
+  LargeOffsetGEPID.erase(GEP);
+
+  auto VecI = LargeOffsetGEPMap.find(GEP->getPointerOperand());
+  if (VecI == LargeOffsetGEPMap.end())
+    return;
+
+  auto &GEPVector = VecI->second;
+  const auto &I = std::find_if(GEPVector.begin(), GEPVector.end(),
+                               [=](auto &Elt) { return Elt.first == GEP; });
+  if (I == GEPVector.end())
+    return;
+
+  GEPVector.erase(I);
+  if (GEPVector.empty())
+    LargeOffsetGEPMap.erase(VecI);
+}
+
 // Verify BFI has been updated correctly by recomputing BFI and comparing them.
 void LLVM_ATTRIBUTE_UNUSED CodeGenPrepare::verifyBFIUpdates(Function &F) {
   DominatorTree NewDT(F);
@@ -5242,7 +5271,9 @@ bool CodeGenPrepare::optimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
     WeakTrackingVH IterHandle(CurValue);
     BasicBlock *BB = CurInstIterator->getParent();
 
-    RecursivelyDeleteTriviallyDeadInstructions(Repl, TLInfo);
+    RecursivelyDeleteTriviallyDeadInstructions(
+        Repl, TLInfo, nullptr,
+        [&](Value *V) { removeAllAssertingVHReferences(V); });
 
     if (IterHandle != CurValue) {
       // If the iterator instruction was recursively deleted, start over at the
@@ -5363,7 +5394,9 @@ bool CodeGenPrepare::optimizeGatherScatterInst(Instruction *MemoryInst,
   // If we have no uses, recursively delete the value and all dead instructions
   // using it.
   if (Ptr->use_empty())
-    RecursivelyDeleteTriviallyDeadInstructions(Ptr, TLInfo);
+    RecursivelyDeleteTriviallyDeadInstructions(
+        Ptr, TLInfo, nullptr,
+        [&](Value *V) { removeAllAssertingVHReferences(V); });
 
   return true;
 }
@@ -6647,7 +6680,8 @@ bool CodeGenPrepare::optimizeShuffleVectorInst(ShuffleVectorInst *SVI) {
   Value *BC2 = Builder.CreateBitCast(Shuffle, SVIVecType);
 
   SVI->replaceAllUsesWith(BC2);
-  RecursivelyDeleteTriviallyDeadInstructions(SVI);
+  RecursivelyDeleteTriviallyDeadInstructions(
+      SVI, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
 
   // Also hoist the bitcast up to its operand if it they are not in the same
   // block.
@@ -7604,11 +7638,10 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, bool &ModifiedDT) {
 
 /// Given an OR instruction, check to see if this is a bitreverse
 /// idiom. If so, insert the new intrinsic and return true.
-static bool makeBitReverse(Instruction &I, const DataLayout &DL,
-                           const TargetLowering &TLI) {
+bool CodeGenPrepare::makeBitReverse(Instruction &I) {
   if (!I.getType()->isIntegerTy() ||
-      !TLI.isOperationLegalOrCustom(ISD::BITREVERSE,
-                                    TLI.getValueType(DL, I.getType(), true)))
+      !TLI->isOperationLegalOrCustom(ISD::BITREVERSE,
+                                     TLI->getValueType(*DL, I.getType(), true)))
     return false;
 
   SmallVector<Instruction*, 4> Insts;
@@ -7616,7 +7649,8 @@ static bool makeBitReverse(Instruction &I, const DataLayout &DL,
     return false;
   Instruction *LastInst = Insts.back();
   I.replaceAllUsesWith(LastInst);
-  RecursivelyDeleteTriviallyDeadInstructions(&I);
+  RecursivelyDeleteTriviallyDeadInstructions(
+      &I, TLInfo, nullptr, [&](Value *V) { removeAllAssertingVHReferences(V); });
   return true;
 }
 
@@ -7638,7 +7672,7 @@ bool CodeGenPrepare::optimizeBlock(BasicBlock &BB, bool &ModifiedDT) {
   while (MadeBitReverse) {
     MadeBitReverse = false;
     for (auto &I : reverse(BB)) {
-      if (makeBitReverse(I, *DL, *TLI)) {
+      if (makeBitReverse(I)) {
         MadeBitReverse = MadeChange = true;
         break;
       }
diff --git a/llvm/lib/Transforms/Utils/Local.cpp b/llvm/lib/Transforms/Utils/Local.cpp
index da40c342af3ac..3d163b8a86bcc 100644
--- a/llvm/lib/Transforms/Utils/Local.cpp
+++ b/llvm/lib/Transforms/Utils/Local.cpp
@@ -453,21 +453,24 @@ bool llvm::wouldInstructionBeTriviallyDead(Instruction *I,
 /// trivially dead, delete them too, recursively.  Return true if any
 /// instructions were deleted.
 bool llvm::RecursivelyDeleteTriviallyDeadInstructions(
-    Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU) {
+    Value *V, const TargetLibraryInfo *TLI, MemorySSAUpdater *MSSAU,
+    std::function<void(Value *)> AboutToDeleteCallback) {
   Instruction *I = dyn_cast<Instruction>(V);
   if (!I || !isInstructionTriviallyDead(I, TLI))
     return false;
 
   SmallVector<WeakTrackingVH, 16> DeadInsts;
   DeadInsts.push_back(I);
-  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
+  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU,
+                                             AboutToDeleteCallback);
 
   return true;
 }
 
 bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
     SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
-    MemorySSAUpdater *MSSAU) {
+    MemorySSAUpdater *MSSAU,
+    std::function<void(Value *)> AboutToDeleteCallback) {
   unsigned S = 0, E = DeadInsts.size(), Alive = 0;
   for (; S != E; ++S) {
     auto *I = cast<Instruction>(DeadInsts[S]);
@@ -478,13 +481,15 @@ bool llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive(
   }
   if (Alive == E)
     return false;
-  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU);
+  RecursivelyDeleteTriviallyDeadInstructions(DeadInsts, TLI, MSSAU,
+                                             AboutToDeleteCallback);
   return true;
 }
 
 void llvm::RecursivelyDeleteTriviallyDeadInstructions(
     SmallVectorImpl<WeakTrackingVH> &DeadInsts, const TargetLibraryInfo *TLI,
-    MemorySSAUpdater *MSSAU) {
+    MemorySSAUpdater *MSSAU,
+    std::function<void(Value *)> AboutToDeleteCallback) {
   // Process the dead instruction list until empty.
   while (!DeadInsts.empty()) {
     Value *V = DeadInsts.pop_back_val();
@@ -498,6 +503,9 @@ void llvm::RecursivelyDeleteTriviallyDeadInstructions(
     // Don't lose the debug info while deleting the instructions.
     salvageDebugInfo(*I);
 
+    if (AboutToDeleteCallback)
+      AboutToDeleteCallback(I);
+
     // Null out all of the instruction's operands to see if any operand becomes
     // dead as we go.
     for (Use &OpU : I->operands()) {
diff --git a/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll b/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll
new file mode 100644
index 0000000000000..a82cce01a29f5
--- /dev/null
+++ b/llvm/test/Transforms/CodeGenPrepare/ARM/dead-gep.ll
@@ -0,0 +1,19 @@
+; RUN: opt -codegenprepare -S %s -o - | FileCheck %s
+target triple = "thumbv7-apple-ios7.0.0"
+
+
+%struct = type [1000 x i32]
+
+define void @test_dead_gep(%struct* %t0) {
+; CHECK-LABEL: define void @test_dead_gep
+; CHECK-NOT: getelementptr
+; CHECK: %t16 = load i32, i32* undef
+; CHECK: ret void
+
+  %t12 = getelementptr inbounds %struct, %struct* %t0, i32 1, i32 500
+  %t13 = load i32, i32* %t12, align 4
+  %t14 = icmp eq i32 %t13, 2
+  %t15 = select i1 %t14, i32* undef, i32* undef
+  %t16 = load i32, i32* %t15, align 4
+  ret void
+}

From 9c1c6a3fcca840b75a0ae818ac4e24e7460c397b Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Wed, 15 Jul 2020 16:26:37 +0200
Subject: [PATCH 382/771] Revert "[lldb] Use the basename of the Python test
 for the log name instead of the class name"

This reverts commit 29aab9b5c748b28b231e2ca0f9b95453638ade1a.

It seems on Windows the file name is just always "lldbsuite.test.lldbtest" for
all tests and that breaks pretty much everything. Reverting until we have
a better solution.
---
 lldb/test/API/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/test/API/CMakeLists.txt b/lldb/test/API/CMakeLists.txt
index 23b211e14cfa5..34f3522c8dfec 100644
--- a/lldb/test/API/CMakeLists.txt
+++ b/lldb/test/API/CMakeLists.txt
@@ -38,7 +38,7 @@ set(LLDB_TEST_USER_ARGS
 set(LLDB_TEST_COMMON_ARGS
   -s
   ${CMAKE_BINARY_DIR}/lldb-test-traces
-  -S fm
+  -S nm
   -u CXXFLAGS
   -u CFLAGS
   )

From 00e3a1ddec95c0b48ce216220d7e3481dab3bc78 Mon Sep 17 00:00:00 2001
From: Joachim Protze <protze@itc.rwth-aachen.de>
Date: Wed, 15 Jul 2020 16:45:27 +0200
Subject: [PATCH 383/771] [TSan] Optimize handling of racy address

This patch splits the handling of racy address and racy stack into separate
functions. If a race was already reported for the address, we can avoid the
cost for collecting the involved stacks.

This patch also removes the race condition in storing the racy address / racy
stack. This race condition allowed all threads to report the race.

This patch changes the transitive suppression of reports. Previously
suppression could transitively chain memory location and racy stacks.
Now racy memory and racy stack are separate suppressions.

Reviewed by: dvyukov

Differential Revision: https://reviews.llvm.org/D83625
---
 compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp | 103 +++++++++----------
 1 file changed, 50 insertions(+), 53 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index 949beac1c5513..3354546c2a107 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -439,65 +439,61 @@ void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
   ExtractTagFromStack(stk, tag);
 }
 
-static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
-                             uptr addr_min, uptr addr_max) {
-  bool equal_stack = false;
-  RacyStacks hash;
-  bool equal_address = false;
-  RacyAddress ra0 = {addr_min, addr_max};
-  {
-    ReadLock lock(&ctx->racy_mtx);
-    if (flags()->suppress_equal_stacks) {
-      hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-      hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
-      for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
-        if (hash == ctx->racy_stacks[i]) {
-          VPrintf(2,
-              "ThreadSanitizer: suppressing report as doubled (stack)\n");
-          equal_stack = true;
-          break;
-        }
-      }
-    }
-    if (flags()->suppress_equal_addresses) {
-      for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
-        RacyAddress ra2 = ctx->racy_addresses[i];
-        uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
-        uptr minend = min(ra0.addr_max, ra2.addr_max);
-        if (maxbeg < minend) {
-          VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
-          equal_address = true;
-          break;
-        }
-      }
+static bool FindRacyStacks(const RacyStacks &hash) {
+  for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
+    if (hash == ctx->racy_stacks[i]) {
+      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (stack)\n");
+      return true;
     }
   }
-  if (!equal_stack && !equal_address)
+  return false;
+}
+
+static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2]) {
+  if (!flags()->suppress_equal_stacks)
     return false;
-  if (!equal_stack) {
-    Lock lock(&ctx->racy_mtx);
-    ctx->racy_stacks.PushBack(hash);
-  }
-  if (!equal_address) {
-    Lock lock(&ctx->racy_mtx);
-    ctx->racy_addresses.PushBack(ra0);
+  RacyStacks hash;
+  hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
+  hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (FindRacyStacks(hash))
+      return true;
   }
-  return true;
+  Lock lock(&ctx->racy_mtx);
+  if (FindRacyStacks(hash))
+    return true;
+  ctx->racy_stacks.PushBack(hash);
+  return false;
 }
 
-static void AddRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
-                          uptr addr_min, uptr addr_max) {
-  Lock lock(&ctx->racy_mtx);
-  if (flags()->suppress_equal_stacks) {
-    RacyStacks hash;
-    hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-    hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
-    ctx->racy_stacks.PushBack(hash);
+static bool FindRacyAddress(const RacyAddress &ra0) {
+  for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
+    RacyAddress ra2 = ctx->racy_addresses[i];
+    uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
+    uptr minend = min(ra0.addr_max, ra2.addr_max);
+    if (maxbeg < minend) {
+      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
+      return true;
+    }
   }
-  if (flags()->suppress_equal_addresses) {
-    RacyAddress ra0 = {addr_min, addr_max};
-    ctx->racy_addresses.PushBack(ra0);
+  return false;
+}
+
+static bool HandleRacyAddress(ThreadState *thr, uptr addr_min, uptr addr_max) {
+  if (!flags()->suppress_equal_addresses)
+    return false;
+  RacyAddress ra0 = {addr_min, addr_max};
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (FindRacyAddress(ra0))
+      return true;
   }
+  Lock lock(&ctx->racy_mtx);
+  if (FindRacyAddress(ra0))
+    return true;
+  ctx->racy_addresses.PushBack(ra0);
+  return false;
 }
 
 bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
@@ -618,6 +614,8 @@ void ReportRace(ThreadState *thr) {
     if (IsExpectedReport(addr_min, addr_max - addr_min))
       return;
   }
+  if (HandleRacyAddress(thr, addr_min, addr_max))
+    return;
 
   ReportType typ = ReportTypeRace;
   if (thr->is_vptr_access && freed)
@@ -668,7 +666,7 @@ void ReportRace(ThreadState *thr) {
   if (IsFiredSuppression(ctx, typ, traces[1]))
     return;
 
-  if (HandleRacyStacks(thr, traces, addr_min, addr_max))
+  if (HandleRacyStacks(thr, traces))
     return;
 
   // If any of the accesses has a tag, treat this as an "external" race.
@@ -711,7 +709,6 @@ void ReportRace(ThreadState *thr) {
   if (!OutputReport(thr, rep))
     return;
 
-  AddRacyStacks(thr, traces, addr_min, addr_max);
 }
 
 void PrintCurrentStack(ThreadState *thr, uptr pc) {

From d6e79e3dd6df63425eb098f482be2c9744ad48eb Mon Sep 17 00:00:00 2001
From: "Joel E. Denny" <jdenny.ornl@gmail.com>
Date: Wed, 15 Jul 2020 11:17:00 -0400
Subject: [PATCH 384/771] [OpenMP][Docs] Update `present` map type modifier
 status

---
 clang/docs/OpenMPSupport.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/docs/OpenMPSupport.rst b/clang/docs/OpenMPSupport.rst
index 000f23141af30..26fbfab96bc8c 100644
--- a/clang/docs/OpenMPSupport.rst
+++ b/clang/docs/OpenMPSupport.rst
@@ -268,5 +268,5 @@ want to help with the implementation.
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
 | loop extension               | Loop tiling transformation                                   | :part:`claimed`          |                                                                       |
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+
-| device extension             | 'present' map type modifier                                  | :part:`claimed`          |                                                                       |
+| device extension             | 'present' map type modifier                                  | :part:`worked on`        | D83061, D83062                                                        |
 +------------------------------+--------------------------------------------------------------+--------------------------+-----------------------------------------------------------------------+

From 24cd66d2190a093ed580451496513a65a4c7b8ce Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Wed, 15 Jul 2020 14:49:57 +0100
Subject: [PATCH 385/771] [HardwareLoops] Add sibling loop test.

This missed being part of 9e03547cab69.
---
 .../Transforms/HardwareLoops/sibling-loops.ll | 94 +++++++++++++++++++
 1 file changed, 94 insertions(+)
 create mode 100644 llvm/test/Transforms/HardwareLoops/sibling-loops.ll

diff --git a/llvm/test/Transforms/HardwareLoops/sibling-loops.ll b/llvm/test/Transforms/HardwareLoops/sibling-loops.ll
new file mode 100644
index 0000000000000..e415e522da7b7
--- /dev/null
+++ b/llvm/test/Transforms/HardwareLoops/sibling-loops.ll
@@ -0,0 +1,94 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-DEC
+
+define arm_aapcs_vfpcc void @test(i16* noalias nocapture readonly %off, i16* noalias nocapture %data, i16* noalias nocapture %dst, i32 %n) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP252:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP252]], label [[FOR_COND1_PREHEADER_US:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.cond1.preheader.us:
+; CHECK-NEXT:    [[I_057_US:%.*]] = phi i32 [ [[INC29_US:%.*]], [[FOR_COND_CLEANUP14_US:%.*]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[MUL_US:%.*]] = mul i32 [[I_057_US]], [[N]]
+; CHECK-NEXT:    call void @llvm.set.loop.iterations.i32(i32 [[N]])
+; CHECK-NEXT:    br label [[FOR_BODY4_US:%.*]]
+; CHECK:       for.body4.us:
+; CHECK-NEXT:    [[J_053_US:%.*]] = phi i32 [ 0, [[FOR_COND1_PREHEADER_US]] ], [ [[INC_US:%.*]], [[FOR_BODY4_US]] ]
+; CHECK-NEXT:    [[ARRAYIDX_US:%.*]] = getelementptr inbounds i16, i16* [[OFF:%.*]], i32 [[J_053_US]]
+; CHECK-NEXT:    [[L2:%.*]] = load i16, i16* [[ARRAYIDX_US]], align 2
+; CHECK-NEXT:    [[ARRAYIDX5_US:%.*]] = getelementptr inbounds i16, i16* [[DATA:%.*]], i32 [[J_053_US]]
+; CHECK-NEXT:    [[L3:%.*]] = load i16, i16* [[ARRAYIDX5_US]], align 2
+; CHECK-NEXT:    [[ADD_US:%.*]] = add i16 [[L3]], [[L2]]
+; CHECK-NEXT:    [[ADD8_US:%.*]] = add i32 [[J_053_US]], [[MUL_US]]
+; CHECK-NEXT:    [[ARRAYIDX9_US:%.*]] = getelementptr inbounds i16, i16* [[DATA]], i32 [[ADD8_US]]
+; CHECK-NEXT:    store i16 [[ADD_US]], i16* [[ARRAYIDX9_US]], align 2
+; CHECK-NEXT:    [[INC_US]] = add nuw nsw i32 [[J_053_US]], 1
+; CHECK-NEXT:    [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1)
+; CHECK-NEXT:    br i1 [[TMP0]], label [[FOR_BODY4_US]], label [[FOR_BODY15_US_PREHEADER:%.*]]
+; CHECK:       for.body15.us.preheader:
+; CHECK-NEXT:    call void @llvm.set.loop.iterations.i32(i32 [[N]])
+; CHECK-NEXT:    br label [[FOR_BODY15_US:%.*]]
+; CHECK:       for.body15.us:
+; CHECK-NEXT:    [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ 0, [[FOR_BODY15_US_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX16_US:%.*]] = getelementptr inbounds i16, i16* [[OFF]], i32 [[J10_055_US]]
+; CHECK-NEXT:    [[L0:%.*]] = load i16, i16* [[ARRAYIDX16_US]], align 2
+; CHECK-NEXT:    [[ARRAYIDX18_US:%.*]] = getelementptr inbounds i16, i16* [[DATA]], i32 [[J10_055_US]]
+; CHECK-NEXT:    [[L1:%.*]] = load i16, i16* [[ARRAYIDX18_US]], align 2
+; CHECK-NEXT:    [[ADD20_US:%.*]] = add i16 [[L1]], [[L0]]
+; CHECK-NEXT:    [[ADD23_US:%.*]] = add i32 [[J10_055_US]], [[MUL_US]]
+; CHECK-NEXT:    [[ARRAYIDX24_US:%.*]] = getelementptr inbounds i16, i16* [[DST:%.*]], i32 [[ADD23_US]]
+; CHECK-NEXT:    store i16 [[ADD20_US]], i16* [[ARRAYIDX24_US]], align 2
+; CHECK-NEXT:    [[INC26_US]] = add nuw nsw i32 [[J10_055_US]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1)
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_BODY15_US]], label [[FOR_COND_CLEANUP14_US]]
+; CHECK:       for.cond.cleanup14.us:
+; CHECK-NEXT:    [[INC29_US]] = add nuw i32 [[I_057_US]], 1
+; CHECK-NEXT:    [[EXITCOND94:%.*]] = icmp eq i32 [[INC29_US]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND94]], label [[FOR_COND_CLEANUP]], label [[FOR_COND1_PREHEADER_US]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %cmp252 = icmp sgt i32 %n, 0
+  br i1 %cmp252, label %for.cond1.preheader.us, label %for.cond.cleanup
+
+for.cond1.preheader.us: ; preds = %entry, %for.cond.cleanup14.us
+  %i.057.us = phi i32 [ %inc29.us, %for.cond.cleanup14.us ], [ 0, %entry ]
+  %mul.us = mul i32 %i.057.us, %n
+  br label %for.body4.us
+
+for.body4.us: ; preds = %for.body4.us, %for.cond1.preheader.us
+  %j.053.us = phi i32 [ 0, %for.cond1.preheader.us ], [ %inc.us, %for.body4.us ]
+  %arrayidx.us = getelementptr inbounds i16, i16* %off, i32 %j.053.us
+  %l2 = load i16, i16* %arrayidx.us, align 2
+  %arrayidx5.us = getelementptr inbounds i16, i16* %data, i32 %j.053.us
+  %l3 = load i16, i16* %arrayidx5.us, align 2
+  %add.us = add i16 %l3, %l2
+  %add8.us = add i32 %j.053.us, %mul.us
+  %arrayidx9.us = getelementptr inbounds i16, i16* %data, i32 %add8.us
+  store i16 %add.us, i16* %arrayidx9.us, align 2
+  %inc.us = add nuw nsw i32 %j.053.us, 1
+  %exitcond = icmp eq i32 %inc.us, %n
+  br i1 %exitcond, label %for.body15.us, label %for.body4.us
+
+for.body15.us: ; preds = %for.body4.us, %for.body15.us
+  %j10.055.us = phi i32 [ %inc26.us, %for.body15.us ], [ 0, %for.body4.us ]
+  %arrayidx16.us = getelementptr inbounds i16, i16* %off, i32 %j10.055.us
+  %l0 = load i16, i16* %arrayidx16.us, align 2
+  %arrayidx18.us = getelementptr inbounds i16, i16* %data, i32 %j10.055.us
+  %l1 = load i16, i16* %arrayidx18.us, align 2
+  %add20.us = add i16 %l1, %l0
+  %add23.us = add i32 %j10.055.us, %mul.us
+  %arrayidx24.us = getelementptr inbounds i16, i16* %dst, i32 %add23.us
+  store i16 %add20.us, i16* %arrayidx24.us, align 2
+  %inc26.us = add nuw nsw i32 %j10.055.us, 1
+  %exitcond93 = icmp eq i32 %inc26.us, %n
+  br i1 %exitcond93, label %for.cond.cleanup14.us, label %for.body15.us
+
+for.cond.cleanup14.us: ; preds = %for.body15.us
+  %inc29.us = add nuw i32 %i.057.us, 1
+  %exitcond94 = icmp eq i32 %inc29.us, %n
+  br i1 %exitcond94, label %for.cond.cleanup, label %for.cond1.preheader.us
+
+for.cond.cleanup: ; preds = %for.cond.cleanup14.us, %entry
+  ret void
+}

From ad493300322099787cab5f3a9f7310af0f9b5e6c Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Wed, 15 Jul 2020 15:37:13 +0000
Subject: [PATCH 386/771] [MLIR][Shape] Fix `shape_of` lowering to `scf`

The use of the `scf.for` callback builder does not allow for a rollback of the
emitted conversions. Instead, we populate the loop body through the conversion
rewriter directly.

Differential Revision: https://reviews.llvm.org/D83873
---
 mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
index 55ebae99af53f..1f1134757b3a1 100644
--- a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
+++ b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
@@ -103,14 +103,15 @@ ShapeOfOpConverter::matchAndRewrite(ShapeOfOp op, ArrayRef<Value> operands,
   // Copy shape extents to stack-allocated memory.
   auto zeroVal = rewriter.create<ConstantIndexOp>(loc, 0);
   auto oneVal = rewriter.create<ConstantIndexOp>(loc, 1);
-  rewriter.create<scf::ForOp>(
-      loc, zeroVal, rankVal, oneVal, ValueRange(),
-      [&](OpBuilder &b, Location loc, Value iVal, ValueRange args) {
-        auto dimVal = b.create<DimOp>(loc, tensorVal, iVal);
-        auto dimIntVal = b.create<IndexCastOp>(loc, dimVal, i64Ty);
-        b.create<StoreOp>(loc, dimIntVal, memVal, ValueRange({iVal}));
-        b.create<scf::YieldOp>(loc);
-      });
+  auto loop = rewriter.create<scf::ForOp>(loc, zeroVal, rankVal, oneVal);
+  {
+    OpBuilder::InsertionGuard guard(rewriter);
+    rewriter.setInsertionPointToStart(loop.getBody());
+    auto iVal = loop.getInductionVar();
+    auto dimVal = rewriter.create<DimOp>(loc, tensorVal, iVal);
+    auto dimIntVal = rewriter.create<IndexCastOp>(loc, dimVal, i64Ty);
+    rewriter.create<StoreOp>(loc, dimIntVal, memVal, ValueRange{iVal});
+  }
 
   // Load extents to tensor value.
   auto shapeIntVal = rewriter.create<TensorLoadOp>(loc, memVal);

From d3849dddd267af300d76b57c055e89f1ad2622d0 Mon Sep 17 00:00:00 2001
From: Joachim Protze <protze@itc.rwth-aachen.de>
Date: Wed, 15 Jul 2020 17:39:30 +0200
Subject: [PATCH 387/771] Revert "[TSan] Optimize handling of racy address"

This reverts commit 00e3a1ddec95c0b48ce216220d7e3481dab3bc78.
The commit broke most build bots, investigating.
---
 compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp | 103 ++++++++++---------
 1 file changed, 53 insertions(+), 50 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index 3354546c2a107..949beac1c5513 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -439,61 +439,65 @@ void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
   ExtractTagFromStack(stk, tag);
 }
 
-static bool FindRacyStacks(const RacyStacks &hash) {
-  for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
-    if (hash == ctx->racy_stacks[i]) {
-      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (stack)\n");
-      return true;
-    }
-  }
-  return false;
-}
-
-static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2]) {
-  if (!flags()->suppress_equal_stacks)
-    return false;
+static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
+                             uptr addr_min, uptr addr_max) {
+  bool equal_stack = false;
   RacyStacks hash;
-  hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-  hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+  bool equal_address = false;
+  RacyAddress ra0 = {addr_min, addr_max};
   {
     ReadLock lock(&ctx->racy_mtx);
-    if (FindRacyStacks(hash))
-      return true;
-  }
-  Lock lock(&ctx->racy_mtx);
-  if (FindRacyStacks(hash))
-    return true;
-  ctx->racy_stacks.PushBack(hash);
-  return false;
-}
-
-static bool FindRacyAddress(const RacyAddress &ra0) {
-  for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
-    RacyAddress ra2 = ctx->racy_addresses[i];
-    uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
-    uptr minend = min(ra0.addr_max, ra2.addr_max);
-    if (maxbeg < minend) {
-      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
-      return true;
+    if (flags()->suppress_equal_stacks) {
+      hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
+      hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+      for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
+        if (hash == ctx->racy_stacks[i]) {
+          VPrintf(2,
+              "ThreadSanitizer: suppressing report as doubled (stack)\n");
+          equal_stack = true;
+          break;
+        }
+      }
+    }
+    if (flags()->suppress_equal_addresses) {
+      for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
+        RacyAddress ra2 = ctx->racy_addresses[i];
+        uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
+        uptr minend = min(ra0.addr_max, ra2.addr_max);
+        if (maxbeg < minend) {
+          VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
+          equal_address = true;
+          break;
+        }
+      }
     }
   }
-  return false;
-}
-
-static bool HandleRacyAddress(ThreadState *thr, uptr addr_min, uptr addr_max) {
-  if (!flags()->suppress_equal_addresses)
+  if (!equal_stack && !equal_address)
     return false;
-  RacyAddress ra0 = {addr_min, addr_max};
-  {
-    ReadLock lock(&ctx->racy_mtx);
-    if (FindRacyAddress(ra0))
-      return true;
+  if (!equal_stack) {
+    Lock lock(&ctx->racy_mtx);
+    ctx->racy_stacks.PushBack(hash);
+  }
+  if (!equal_address) {
+    Lock lock(&ctx->racy_mtx);
+    ctx->racy_addresses.PushBack(ra0);
   }
+  return true;
+}
+
+static void AddRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
+                          uptr addr_min, uptr addr_max) {
   Lock lock(&ctx->racy_mtx);
-  if (FindRacyAddress(ra0))
-    return true;
-  ctx->racy_addresses.PushBack(ra0);
-  return false;
+  if (flags()->suppress_equal_stacks) {
+    RacyStacks hash;
+    hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
+    hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+    ctx->racy_stacks.PushBack(hash);
+  }
+  if (flags()->suppress_equal_addresses) {
+    RacyAddress ra0 = {addr_min, addr_max};
+    ctx->racy_addresses.PushBack(ra0);
+  }
 }
 
 bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
@@ -614,8 +618,6 @@ void ReportRace(ThreadState *thr) {
     if (IsExpectedReport(addr_min, addr_max - addr_min))
       return;
   }
-  if (HandleRacyAddress(thr, addr_min, addr_max))
-    return;
 
   ReportType typ = ReportTypeRace;
   if (thr->is_vptr_access && freed)
@@ -666,7 +668,7 @@ void ReportRace(ThreadState *thr) {
   if (IsFiredSuppression(ctx, typ, traces[1]))
     return;
 
-  if (HandleRacyStacks(thr, traces))
+  if (HandleRacyStacks(thr, traces, addr_min, addr_max))
     return;
 
   // If any of the accesses has a tag, treat this as an "external" race.
@@ -709,6 +711,7 @@ void ReportRace(ThreadState *thr) {
   if (!OutputReport(thr, rep))
     return;
 
+  AddRacyStacks(thr, traces, addr_min, addr_max);
 }
 
 void PrintCurrentStack(ThreadState *thr, uptr pc) {

From 700dd17399bdcf2c580121e52b20e5768663dfe5 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 15 Jul 2020 08:51:24 -0700
Subject: [PATCH 388/771] [lldb/Test] Remove support for forking a subprocess
 from the test suite.

Remove the forkSubprocess method and its bookkeeping.
TestCreateAfterAttach is the only test using the fork method and I'm not
convinced it adds enough to warrant the maintenance. Pavel suggested the
same thing in D83815.
---
 .../Python/lldbsuite/test/lldbtest.py         | 27 ------------
 .../TestCreateAfterAttach.py                  | 41 +++++--------------
 2 files changed, 11 insertions(+), 57 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py
index 280e02f56f287..b1add79e488d9 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbtest.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py
@@ -800,9 +800,6 @@ def setUp(self):
         # List of spawned subproces.Popen objects
         self.subprocesses = []
 
-        # List of forked process PIDs
-        self.forkedProcessPids = []
-
         # List of log files produced by the current test.
         self.log_files = []
 
@@ -892,13 +889,6 @@ def cleanupSubprocesses(self):
             p.terminate()
             del p
         del self.subprocesses[:]
-        # Ensure any forked processes are cleaned up
-        for pid in self.forkedProcessPids:
-            try:
-                os.kill(pid, signal.SIGTERM)
-            except OSError:
-                pass
-        del self.forkedProcessPids[:]
 
     def spawnSubprocess(self, executable, args=[], install_remote=True):
         """ Creates a subprocess.Popen object with the specified executable and arguments,
@@ -910,23 +900,6 @@ def spawnSubprocess(self, executable, args=[], install_remote=True):
         self.subprocesses.append(proc)
         return proc
 
-    def forkSubprocess(self, executable, args=[]):
-        """ Fork a subprocess with its own group ID.
-        """
-        child_pid = os.fork()
-        if child_pid == 0:
-            # If more I/O support is required, this can be beefed up.
-            fd = os.open(os.devnull, os.O_RDWR)
-            os.dup2(fd, 1)
-            os.dup2(fd, 2)
-            # This call causes the child to have its of group ID
-            os.setpgid(0, 0)
-            os.execvp(executable, [executable] + args)
-        # Give the child time to get through the execvp() call
-        time.sleep(0.1)
-        self.forkedProcessPids.append(child_pid)
-        return child_pid
-
     def HideStdout(self):
         """Hide output to stdout from the user.
 
diff --git a/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py b/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py
index 19c02c12a8946..8ad9afe32afe9 100644
--- a/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py
+++ b/lldb/test/API/functionalities/thread/create_after_attach/TestCreateAfterAttach.py
@@ -14,29 +14,6 @@ class CreateAfterAttachTestCase(TestBase):
 
     mydir = TestBase.compute_mydir(__file__)
 
-    @skipIfFreeBSD  # Hangs.  May be the same as Linux issue llvm.org/pr16229 but
-    # not yet investigated.  Revisit once required functionality
-    # is implemented for FreeBSD.
-    # Occasionally hangs on Windows, may be same as other issues.
-    @skipIfWindows
-    @skipIfiOSSimulator
-    @expectedFailureNetBSD
-    def test_create_after_attach_with_popen(self):
-        """Test thread creation after process attach."""
-        self.build(dictionary=self.getBuildFlags(use_cpp11=False))
-        self.create_after_attach(use_fork=False)
-
-    @skipIfFreeBSD  # Hangs. Revisit once required functionality is implemented
-    # for FreeBSD.
-    @skipIfRemote
-    @skipIfWindows  # Windows doesn't have fork.
-    @skipIfiOSSimulator
-    @expectedFailureNetBSD
-    def test_create_after_attach_with_fork(self):
-        """Test thread creation after process attach."""
-        self.build(dictionary=self.getBuildFlags(use_cpp11=False))
-        self.create_after_attach(use_fork=True)
-
     def setUp(self):
         # Call super's setUp().
         TestBase.setUp(self)
@@ -45,17 +22,21 @@ def setUp(self):
         self.break_2 = line_number('main.cpp', '// Set second breakpoint here')
         self.break_3 = line_number('main.cpp', '// Set third breakpoint here')
 
-    def create_after_attach(self, use_fork):
+    @skipIfFreeBSD  # Hangs.  May be the same as Linux issue llvm.org/pr16229 but
+    # not yet investigated.  Revisit once required functionality
+    # is implemented for FreeBSD.
+    # Occasionally hangs on Windows, may be same as other issues.
+    @skipIfWindows
+    @skipIfiOSSimulator
+    @expectedFailureNetBSD
+    def test_create_after_attach(self):
         """Test thread creation after process attach."""
-
+        self.build(dictionary=self.getBuildFlags(use_cpp11=False))
         exe = self.getBuildArtifact("a.out")
 
         # Spawn a new process
-        if use_fork:
-            pid = self.forkSubprocess(exe)
-        else:
-            popen = self.spawnSubprocess(exe)
-            pid = popen.pid
+        popen = self.spawnSubprocess(exe)
+        pid = popen.pid
 
         # Attach to the spawned process
         self.runCmd("process attach -p " + str(pid))

From ae51a70030b5a5af9789378356b67b8d18ddde8a Mon Sep 17 00:00:00 2001
From: Cameron McInally <mcinally@cray.com>
Date: Wed, 15 Jul 2020 11:04:22 -0500
Subject: [PATCH 389/771] [Legalize] Hoist invariant condition in
 ExpandVectorBuildThroughStack(...)

The operands of a BUILD_VECTOR must all have the same type, so we can hoist this invariant condition out of the loop.

Differential Revision: https://reviews.llvm.org/D83882
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 6a6004c158bb8..5fff4c0d65d44 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1411,6 +1411,12 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
   SmallVector<SDValue, 8> Stores;
   unsigned TypeByteSize = MemVT.getSizeInBits() / 8;
   assert(TypeByteSize > 0 && "Vector element type too small for stack store!");
+
+  // If the destination vector element type of a BUILD_VECTOR is narrower than
+  // the source element type, only store the bits necessary.
+  bool Truncate = isa<BuildVectorSDNode>(Node) &&
+                  MemVT.bitsLT(Node->getOperand(0).getValueType());
+
   // Store (in the right endianness) the elements to memory.
   for (unsigned i = 0, e = Node->getNumOperands(); i != e; ++i) {
     // Ignore undef elements.
@@ -1420,9 +1426,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
 
     SDValue Idx = DAG.getMemBasePlusOffset(FIPtr, Offset, dl);
 
-    // If the destination vector element type is narrower than the source
-    // element type, only store the bits necessary.
-    if (MemVT.bitsLT(Node->getOperand(i).getValueType()))
+    if (Truncate)
       Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
                                          Node->getOperand(i), Idx,
                                          PtrInfo.getWithOffset(Offset), MemVT));

From ef30a00a57c78a91571a66555f2531af0f1f51e5 Mon Sep 17 00:00:00 2001
From: Qiu Chaofan <qiucofan@cn.ibm.com>
Date: Thu, 16 Jul 2020 00:05:25 +0800
Subject: [PATCH 390/771] [NFC] Add float aggregate ABI test for PowerPC

4c5a93bd landed adjustment to handle C++20 no_unique_address attribute
correctly, clang treats empty members in aggregate type differently if
having this attribute. This commit adds necessary test for PowerPC
target to reflect this change.
---
 clang/test/CodeGen/ppc-aggregate-abi.cpp | 60 ++++++++++++++++++++++++
 1 file changed, 60 insertions(+)
 create mode 100644 clang/test/CodeGen/ppc-aggregate-abi.cpp

diff --git a/clang/test/CodeGen/ppc-aggregate-abi.cpp b/clang/test/CodeGen/ppc-aggregate-abi.cpp
new file mode 100644
index 0000000000000..94afb6ab9e176
--- /dev/null
+++ b/clang/test/CodeGen/ppc-aggregate-abi.cpp
@@ -0,0 +1,60 @@
+// RUN: %clang_cc1 -triple powerpc64-unknown-linux-gnu -emit-llvm -x c++ \
+// RUN:   -o - %s | FileCheck %s -check-prefix=CHECK-BE
+// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm -x c++ \
+// RUN:   -o - %s | FileCheck %s -check-prefix=CHECK-LE
+
+class agg_float_class { float a; };
+// CHECK-BE-LABEL: define void @_Z20pass_agg_float_class15agg_float_class(%class.agg_float_class* noalias sret align 4 %{{.*}}, float inreg %{{.*}})
+// CHECK-LE-LABEL: define [1 x float] @_Z20pass_agg_float_class15agg_float_class(float inreg %{{.*}})
+agg_float_class pass_agg_float_class(agg_float_class arg) { return arg; }
+
+class agg_double_class { double a; };
+// CHECK-BE-LABEL: define void @_Z21pass_agg_double_class16agg_double_class(%class.agg_double_class* noalias sret align 8 %{{.*}}, double inreg %{{.*}})
+// CHECK-LE-LABEL: define [1 x double] @_Z21pass_agg_double_class16agg_double_class(double inreg %{{.*}})
+agg_double_class pass_agg_double_class(agg_double_class arg) { return arg; }
+
+struct agg_float_cpp { float a; int : 0; };
+// CHECK-BE-LABEL: define void @_Z18pass_agg_float_cpp13agg_float_cpp(%struct.agg_float_cpp* noalias sret align 4 %{{.*}}, float inreg %{{.*}})
+// CHECK-LE-LABEL: define [1 x float] @_Z18pass_agg_float_cpp13agg_float_cpp(float inreg %{{.*}})
+agg_float_cpp pass_agg_float_cpp(agg_float_cpp arg) { return arg; }
+
+struct empty { };
+struct agg_nofloat_empty { float a; empty dummy; };
+// CHECK-BE-LABEL: define void @_Z22pass_agg_nofloat_empty17agg_nofloat_empty(%struct.agg_nofloat_empty* noalias sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-LE-LABEL: define i64 @_Z22pass_agg_nofloat_empty17agg_nofloat_empty(i64 %{{.*}})
+agg_nofloat_empty pass_agg_nofloat_empty(agg_nofloat_empty arg) { return arg; }
+
+struct agg_float_empty { float a; [[no_unique_address]] empty dummy; };
+// CHECK-BE-LABEL: define void @_Z20pass_agg_float_empty15agg_float_empty(%struct.agg_float_empty* noalias sret align 4 %{{.*}}, float inreg %{{.*}})
+// CHECK-LE-LABEL: define [1 x float] @_Z20pass_agg_float_empty15agg_float_empty(float inreg %{{.*}})
+agg_float_empty pass_agg_float_empty(agg_float_empty arg) { return arg; }
+
+struct agg_nofloat_emptyarray { float a; [[no_unique_address]] empty dummy[3]; };
+// CHECK-BE-LABEL: define void @_Z27pass_agg_nofloat_emptyarray22agg_nofloat_emptyarray(%struct.agg_nofloat_emptyarray* noalias sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-LE-LABEL: define i64 @_Z27pass_agg_nofloat_emptyarray22agg_nofloat_emptyarray(i64 %{{.*}})
+agg_nofloat_emptyarray pass_agg_nofloat_emptyarray(agg_nofloat_emptyarray arg) { return arg; }
+
+struct noemptybase { empty dummy; };
+struct agg_nofloat_emptybase : noemptybase { float a; };
+// CHECK-BE-LABEL: define void @_Z26pass_agg_nofloat_emptybase21agg_nofloat_emptybase(%struct.agg_nofloat_emptybase* noalias sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-LE-LABEL: define i64 @_Z26pass_agg_nofloat_emptybase21agg_nofloat_emptybase(i64 %{{.*}})
+agg_nofloat_emptybase pass_agg_nofloat_emptybase(agg_nofloat_emptybase arg) { return arg; }
+
+struct emptybase { [[no_unique_address]] empty dummy; };
+struct agg_float_emptybase : emptybase { float a; };
+// CHECK-BE-LABEL: define void @_Z24pass_agg_float_emptybase19agg_float_emptybase(%struct.agg_float_emptybase* noalias sret align 4 %{{.*}}, float inreg %{{.*}})
+// CHECK-LE-LABEL: define [1 x float] @_Z24pass_agg_float_emptybase19agg_float_emptybase(float inreg %{{.*}})
+agg_float_emptybase pass_agg_float_emptybase(agg_float_emptybase arg) { return arg; }
+
+struct noemptybasearray { [[no_unique_address]] empty dummy[3]; };
+struct agg_nofloat_emptybasearray : noemptybasearray { float a; };
+// CHECK-BE-LABEL: define void @_Z31pass_agg_nofloat_emptybasearray26agg_nofloat_emptybasearray(%struct.agg_nofloat_emptybasearray* noalias sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-LE-LABEL: define i64 @_Z31pass_agg_nofloat_emptybasearray26agg_nofloat_emptybasearray(i64 %{{.*}})
+agg_nofloat_emptybasearray pass_agg_nofloat_emptybasearray(agg_nofloat_emptybasearray arg) { return arg; }
+
+// CHECK-BE: call void @_Z24pass_agg_float_emptybase19agg_float_emptybase(%struct.agg_float_emptybase* sret align 4 %{{.*}}, float inreg %{{.*}})
+// CHECK-LE: call [1 x float] @_Z24pass_agg_float_emptybase19agg_float_emptybase(float inreg %{{.*}})
+void pass_agg_float_emptybase_ptr(agg_float_emptybase* arg) { pass_agg_float_emptybase(*arg); }
+// CHECK-BE: call void @_Z26pass_agg_nofloat_emptybase21agg_nofloat_emptybase(%struct.agg_nofloat_emptybase* sret align 4 %{{.*}}, i64 %{{.*}})
+// CHECK-LE: call i64 @_Z26pass_agg_nofloat_emptybase21agg_nofloat_emptybase(i64 %{{.*}})
+void pass_agg_nofloat_emptybase_ptr(agg_nofloat_emptybase* arg) { pass_agg_nofloat_emptybase(*arg); }

From 7ebb10d46a8d43af0fdafe0d9766e8bdca4d3ec5 Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Wed, 15 Jul 2020 16:13:10 +0000
Subject: [PATCH 391/771] [MLIR][Standard] Update `assert` documentation post
 commit

Update line wrapping.

Differential Revision: https://reviews.llvm.org/D83769
---
 mlir/include/mlir/Dialect/StandardOps/IR/Ops.td | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index 452546f5da83a..c3e3ada8cd40e 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -449,10 +449,9 @@ def AssertOp : Std_Op<"assert"> {
   let summary = "Assert operation with message attribute";
   let description = [{
     Assert operation with single boolean operand and an error message attribute.
-    If the argument is `true` this operation has no effect.
-    Otherwise, the program execution will abort.
-    The provided error message may be used by a runtime to propagate the error
-    to the user.
+    If the argument is `true` this operation has no effect. Otherwise, the
+    program execution will abort. The provided error message may be used by a
+    runtime to propagate the error to the user.
 
     Example:
 

From cf3f100fcbf94af499501140590b322b4985c1a3 Mon Sep 17 00:00:00 2001
From: Jordan Rupprecht <rupprecht@google.com>
Date: Wed, 15 Jul 2020 09:16:30 -0700
Subject: [PATCH 392/771] [lldb][test] Prevent infinite loop while looking for
 use_lldb_suite_root.py.

Several scripts (two copies of use_lldb_suite.py, and an __init__.py) look for use_lldb_suite_root.py by checking parent directories. If for some reason it doesn't exist, it keeps checking parent directories until it finds it.

However, this only breaks when the parent directory is None, but at least on Linux, dirname('/') == '/', so this will never be None.

This changes the lookup to stop if the dirname(lldb_root) is unchanged. This was previously fixed in 67f6d842fab6d3ac8c949721be8e131cf6b17578, but only in one copy of this script.

Additionally, this makes the failure mode more visible -- if the root is not found, it complains loudly instead of silently failing, and having later modules that need lldb_root fail.

Differential Revision: https://reviews.llvm.org/D83840
---
 lldb/packages/Python/lldbsuite/__init__.py |  8 ++++----
 lldb/scripts/use_lldb_suite.py             | 18 ++++++++--------
 lldb/test/API/use_lldb_suite.py            | 24 +++++++++++-----------
 3 files changed, 24 insertions(+), 26 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/__init__.py b/lldb/packages/Python/lldbsuite/__init__.py
index 195b2683f7b4d..62f6aee71f307 100644
--- a/lldb/packages/Python/lldbsuite/__init__.py
+++ b/lldb/packages/Python/lldbsuite/__init__.py
@@ -8,14 +8,14 @@
 def find_lldb_root():
     lldb_root = os.path.dirname(inspect.getfile(inspect.currentframe()))
     while True:
-        lldb_root = os.path.dirname(lldb_root)
-        if lldb_root is None:
-            return None
+        parent = os.path.dirname(lldb_root)
+        if parent == lldb_root: # dirname('/') == '/'
+            raise Exception("use_lldb_suite_root.py not found")
+        lldb_root = parent
 
         test_path = os.path.join(lldb_root, "use_lldb_suite_root.py")
         if os.path.isfile(test_path):
             return lldb_root
-    return None
 
 # lldbsuite.lldb_root refers to the root of the git/svn source checkout
 lldb_root = find_lldb_root()
diff --git a/lldb/scripts/use_lldb_suite.py b/lldb/scripts/use_lldb_suite.py
index a1a2e8b936797..84380f6a5592d 100644
--- a/lldb/scripts/use_lldb_suite.py
+++ b/lldb/scripts/use_lldb_suite.py
@@ -8,20 +8,18 @@ def find_lldb_root():
     while True:
         parent = os.path.dirname(lldb_root)
         if parent == lldb_root: # dirname('/') == '/'
-            break
+            raise Exception("use_lldb_suite_root.py not found")
         lldb_root = parent
 
         test_path = os.path.join(lldb_root, "use_lldb_suite_root.py")
         if os.path.isfile(test_path):
             return lldb_root
-    return None
 
 lldb_root = find_lldb_root()
-if lldb_root is not None:
-    import imp
-    fp, pathname, desc = imp.find_module("use_lldb_suite_root", [lldb_root])
-    try:
-        imp.load_module("use_lldb_suite_root", fp, pathname, desc)
-    finally:
-        if fp:
-            fp.close()
+import imp
+fp, pathname, desc = imp.find_module("use_lldb_suite_root", [lldb_root])
+try:
+    imp.load_module("use_lldb_suite_root", fp, pathname, desc)
+finally:
+    if fp:
+        fp.close()
diff --git a/lldb/test/API/use_lldb_suite.py b/lldb/test/API/use_lldb_suite.py
index 6a8c12d81898c..f1edf1d7304f1 100644
--- a/lldb/test/API/use_lldb_suite.py
+++ b/lldb/test/API/use_lldb_suite.py
@@ -8,21 +8,21 @@ def find_lldb_root():
         os.path.abspath(inspect.getfile(inspect.currentframe()))
     )
     while True:
-        lldb_root = os.path.dirname(lldb_root)
-        if lldb_root is None:
-            return None
+        parent = os.path.dirname(lldb_root)
+        if parent == lldb_root: # dirname('/') == '/'
+            raise Exception("use_lldb_suite_root.py not found")
+        lldb_root = parent
 
         test_path = os.path.join(lldb_root, "use_lldb_suite_root.py")
         if os.path.isfile(test_path):
             return lldb_root
-    return None
 
 lldb_root = find_lldb_root()
-if lldb_root is not None:
-    import imp
-    fp, pathname, desc = imp.find_module("use_lldb_suite_root", [lldb_root])
-    try:
-        imp.load_module("use_lldb_suite_root", fp, pathname, desc)
-    finally:
-        if fp:
-            fp.close()
+
+import imp
+fp, pathname, desc = imp.find_module("use_lldb_suite_root", [lldb_root])
+try:
+    imp.load_module("use_lldb_suite_root", fp, pathname, desc)
+finally:
+    if fp:
+        fp.close()

From 3577317b6d6b6e3fc4ae42b79900968620571886 Mon Sep 17 00:00:00 2001
From: Petr Hosek <phosek@google.com>
Date: Tue, 14 Jul 2020 19:24:22 -0700
Subject: [PATCH 393/771] [CMake][runtimes] Pass the CMAKE_ARGS to runtimes
 build correctly

We were relying on CMAKE_ARGS argument to be passed to subbuild, but
this argument was never properly defined. This patch addresses that.

Differential Revision: https://reviews.llvm.org/D83842
---
 llvm/runtimes/CMakeLists.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/runtimes/CMakeLists.txt b/llvm/runtimes/CMakeLists.txt
index d56f7af583eda..0f29e24a26eb4 100644
--- a/llvm/runtimes/CMakeLists.txt
+++ b/llvm/runtimes/CMakeLists.txt
@@ -413,7 +413,7 @@ else() # if this is included from LLVM's CMake
   # runtime_register_target(target)
   #   Utility function to register external runtime target.
   function(runtime_register_target name target)
-    cmake_parse_arguments(ARG "" "" "DEPENDS" ${ARGN})
+    cmake_parse_arguments(ARG "" "" "DEPENDS;CMAKE_ARGS" ${ARGN})
     include(${LLVM_BINARY_DIR}/runtimes/${name}/Components.cmake OPTIONAL)
     set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS ${LLVM_BINARY_DIR}/runtimes/${name}/Components.cmake)
 
@@ -457,6 +457,7 @@ else() # if this is included from LLVM's CMake
       set(test_targets "${test_targets}" PARENT_SCOPE)
     endif()
 
+    set(${name}_extra_args ${ARG_CMAKE_ARGS})
     get_cmake_property(variableNames VARIABLES)
     foreach(variableName ${variableNames})
       string(FIND "${variableName}" "RUNTIMES_${target}_" out)

From 412b60531edd8caeccea9c3756d55c7f32337857 Mon Sep 17 00:00:00 2001
From: Stephan Herhut <herhut@google.com>
Date: Wed, 15 Jul 2020 09:31:48 +0200
Subject: [PATCH 394/771] [mlir][shape] Mark some operations as commutative

Summary:
This makes sure that their constant arguments are sorted to the back
and hence eases the specification of rewrite patterns.

Differential Revision: https://reviews.llvm.org/D83856
---
 mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
index 7b54616ad7033..38bac19d0fa89 100644
--- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
+++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
@@ -53,7 +53,7 @@ def Shape_WitnessType : DialectType<ShapeDialect,
 class Shape_Op<string mnemonic, list<OpTrait> traits = []> :
     Op<ShapeDialect, mnemonic, traits>;
 
-def Shape_AddOp : Shape_Op<"add", [SameOperandsAndResultType]> {
+def Shape_AddOp : Shape_Op<"add", [Commutative, SameOperandsAndResultType]> {
   let summary = "Addition of sizes";
   let description = [{
     Adds two valid sizes as follows:
@@ -67,7 +67,7 @@ def Shape_AddOp : Shape_Op<"add", [SameOperandsAndResultType]> {
   let assemblyFormat = "$lhs `,` $rhs attr-dict";
 }
 
-def Shape_BroadcastOp : Shape_Op<"broadcast", []> {
+def Shape_BroadcastOp : Shape_Op<"broadcast", [Commutative]> {
   let summary = "Returns the broadcasted output shape of two inputs";
   let description = [{
     Computes the broadcasted output shape following:
@@ -268,7 +268,7 @@ def Shape_IndexToSizeOp : Shape_Op<"index_to_size", [NoSideEffect]> {
   let hasCanonicalizer = 1;
 }
 
-def Shape_JoinOp : Shape_Op<"join", []> {
+def Shape_JoinOp : Shape_Op<"join", [Commutative]> {
   let summary = "Returns the least general shape.size of its operands";
   let description = [{
     An operation that computes the least general shape of input operands.
@@ -301,7 +301,7 @@ def Shape_JoinOp : Shape_Op<"join", []> {
   let results = (outs Shape_ShapeOrSizeType:$result);
 }
 
-def Shape_MulOp : Shape_Op<"mul", [SameOperandsAndResultType]> {
+def Shape_MulOp : Shape_Op<"mul", [Commutative, SameOperandsAndResultType]> {
   let summary = "Multiplication of sizes";
   let description = [{
     Multiplies two valid sizes as follows:

From f413b53a67ac3ed542fbe8f3a6dfc09b287f8140 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Wed, 15 Jul 2020 09:34:44 -0700
Subject: [PATCH 395/771] [NPM][IVUsers] Rename ivusers -> iv-users

 LPM passes were named iv-users, which seems nicer than ivusers.

Reviewed By: hans

Differential Revision: https://reviews.llvm.org/D83803
---
 llvm/lib/Passes/PassRegistry.def                              | 4 ++--
 llvm/test/Analysis/IVUsers/quadradic-exit-value.ll            | 2 +-
 llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll   | 2 +-
 .../Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll    | 2 +-
 .../Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll    | 2 +-
 llvm/test/Transforms/LoopDeletion/invalidation.ll             | 4 ++--
 6 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index dfdfc3d05976a..0bb7bea4f9a36 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -316,7 +316,7 @@ FUNCTION_PASS_WITH_PARAMS("print<stack-lifetime>",
 LOOP_ANALYSIS("no-op-loop", NoOpLoopAnalysis())
 LOOP_ANALYSIS("access-info", LoopAccessAnalysis())
 LOOP_ANALYSIS("ddg", DDGAnalysis())
-LOOP_ANALYSIS("ivusers", IVUsersAnalysis())
+LOOP_ANALYSIS("iv-users", IVUsersAnalysis())
 LOOP_ANALYSIS("pass-instrumentation", PassInstrumentationAnalysis(PIC))
 #undef LOOP_ANALYSIS
 
@@ -338,7 +338,7 @@ LOOP_PASS("indvars", IndVarSimplifyPass())
 LOOP_PASS("loop-unroll-full", LoopFullUnrollPass())
 LOOP_PASS("print-access-info", LoopAccessInfoPrinterPass(dbgs()))
 LOOP_PASS("print<ddg>", DDGAnalysisPrinterPass(dbgs()))
-LOOP_PASS("print<ivusers>", IVUsersPrinterPass(dbgs()))
+LOOP_PASS("print<iv-users>", IVUsersPrinterPass(dbgs()))
 LOOP_PASS("print<loopnest>", LoopNestPrinterPass(dbgs()))
 LOOP_PASS("print<loop-cache-cost>", LoopCachePrinterPass(dbgs()))
 LOOP_PASS("loop-predication", LoopPredicationPass())
diff --git a/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll b/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll
index 1597bfa8a3745..e01c066cbd97a 100644
--- a/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll
+++ b/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll
@@ -6,7 +6,7 @@
 ; checks at that point.
 
 ; RUN: opt < %s -analyze -iv-users | FileCheck %s --check-prefixes=CHECK,CHECK-NO-LCSSA
-; RUN: opt < %s -disable-output -passes='print<ivusers>' 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output -passes='print<iv-users>' 2>&1 | FileCheck %s
 
 ; Provide legal integer types.
 target datalayout = "n8:16:32:64"
diff --git a/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll b/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
index 4f126fbf6b3e7..729b6433e31c5 100644
--- a/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -analyze -iv-users
-; RUN: opt < %s -passes='print<ivusers>'
+; RUN: opt < %s -passes='print<iv-users>'
 ; PR9633: Tests that SCEV handles the mul.i2 recurrence being folded to
 ; constant zero.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll b/llvm/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll
index 853d43c4f875a..83682627a5445 100644
--- a/llvm/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2012-05-18-LoopPredRecurse.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -iv-users -S -disable-output
-; RUN: opt < %s -passes='require<ivusers>' -S -disable-output
+; RUN: opt < %s -passes='require<iv-users>' -S -disable-output
 ;
 ; PR12868: Infinite recursion:
 ; getUDivExpr()->getZeroExtendExpr()->isLoopBackedgeGuardedBy()
diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll b/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
index 8e519d409edeb..7f1cc17c924f8 100644
--- a/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
+++ b/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-1.ll
@@ -1,5 +1,5 @@
 ; RUN: opt < %s -iv-users
-; RUN: opt < %s -passes='require<ivusers>'
+; RUN: opt < %s -passes='require<iv-users>'
 ; PR4538
 
 ; ModuleID = 'bugpoint-reduced-simplified.bc'
diff --git a/llvm/test/Transforms/LoopDeletion/invalidation.ll b/llvm/test/Transforms/LoopDeletion/invalidation.ll
index 5564f90e1ea7f..3f7352672c568 100644
--- a/llvm/test/Transforms/LoopDeletion/invalidation.ll
+++ b/llvm/test/Transforms/LoopDeletion/invalidation.ll
@@ -2,9 +2,9 @@
 ; one version with a no-op loop pass to make sure that the loop doesn't get
 ; simplified away.
 ;
-; RUN: opt < %s -passes='require<ivusers>,no-op-loop,require<ivusers>' -S \
+; RUN: opt < %s -passes='require<iv-users>,no-op-loop,require<iv-users>' -S \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK,BEFORE
-; RUN: opt < %s -passes='require<ivusers>,loop-deletion,require<ivusers>' -S \
+; RUN: opt < %s -passes='require<iv-users>,loop-deletion,require<iv-users>' -S \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK,AFTER
 
 
From e122eba185055fb848cc7efc578035716dc1fec4 Mon Sep 17 00:00:00 2001
From: Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>
Date: Wed, 15 Jul 2020 19:44:07 +0300
Subject: [PATCH 396/771] [AMDGPU][MC] Corrected MTBUF parsing and decoding

MTBUF implementation has many issues and this change addresses most of these:
- refactored duplicated code;
- hardcoded constants moved out of high-level code;
- fixed a decoding error when nfmt or dfmt are zero (bug 36932);
- corrected parsing of operand separators (bug 46403);
- corrected handling of missing operands (bug 46404);
- corrected handling of out-of-range modifiers (bug 46421);
- corrected default value (bug 46467).

Reviewers: arsenm, rampitec, vpykhtin, artem.tamazov, kzhuravl

Differential Revision: https://reviews.llvm.org/D83760
---
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      | 126 ++++++++++++------
 llvm/lib/Target/AMDGPU/BUFInstructions.td     |  10 +-
 .../AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp |  23 +++-
 llvm/lib/Target/AMDGPU/SIDefines.h            |  42 ++++++
 llvm/lib/Target/AMDGPU/SIInstrInfo.td         |   2 +-
 .../Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp    |  17 +++
 llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h |   9 ++
 llvm/test/MC/AMDGPU/mtbuf-gfx10.s             |  49 ++++++-
 llvm/test/MC/AMDGPU/mtbuf.s                   |  84 +++++++++++-
 .../MC/Disassembler/AMDGPU/mtbuf_gfx10.txt    |  17 +++
 llvm/test/MC/Disassembler/AMDGPU/mtbuf_vi.txt |  18 +++
 11 files changed, 338 insertions(+), 59 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 013b7a0cf25d1..57f3546f4da5b 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -339,7 +339,7 @@ class AMDGPUOperand : public MCParsedAsmOperand {
   bool isSWZ() const { return isImmTy(ImmTySWZ); }
   bool isTFE() const { return isImmTy(ImmTyTFE); }
   bool isD16() const { return isImmTy(ImmTyD16); }
-  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
+  bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<7>(getImm()); }
   bool isBankMask() const { return isImmTy(ImmTyDppBankMask); }
   bool isRowMask() const { return isImmTy(ImmTyDppRowMask); }
   bool isBoundCtrl() const { return isImmTy(ImmTyDppBoundCtrl); }
@@ -1295,7 +1295,10 @@ class AMDGPUAsmParser : public MCTargetAsmParser {
   OperandMatchResultTy parseRegWithFPInputMods(OperandVector &Operands);
   OperandMatchResultTy parseRegWithIntInputMods(OperandVector &Operands);
   OperandMatchResultTy parseVReg32OrOff(OperandVector &Operands);
-  OperandMatchResultTy parseDfmtNfmt(OperandVector &Operands);
+  OperandMatchResultTy parseDfmtNfmt(int64_t &Format);
+  OperandMatchResultTy parseUfmt(int64_t &Format);
+  OperandMatchResultTy parseFORMAT(OperandVector &Operands);
+  bool tryParseFmt(const char *Pref, int64_t MaxVal, int64_t &Val);
 
   void cvtDSOffset01(MCInst &Inst, const OperandVector &Operands);
   void cvtDS(MCInst &Inst, const OperandVector &Operands) { cvtDSImpl(Inst, Operands, false); }
@@ -4870,50 +4873,96 @@ AMDGPUAsmParser::parseStringWithPrefix(StringRef Prefix, StringRef &Value) {
   return MatchOperand_Success;
 }
 
+//===----------------------------------------------------------------------===//
+// MTBUF format
+//===----------------------------------------------------------------------===//
+
+bool AMDGPUAsmParser::tryParseFmt(const char *Pref,
+                                  int64_t MaxVal,
+                                  int64_t &Fmt) {
+  int64_t Val;
+  SMLoc Loc = getLoc();
+
+  auto Res = parseIntWithPrefix(Pref, Val);
+  if (Res == MatchOperand_ParseFail)
+    return false;
+  if (Res == MatchOperand_NoMatch)
+    return true;
+
+  if (Val < 0 || Val > MaxVal) {
+    Error(Loc, Twine("out of range ", StringRef(Pref)));
+    return false;
+  }
+
+  Fmt = Val;
+  return true;
+}
+
 // dfmt and nfmt (in a tbuffer instruction) are parsed as one to allow their
 // values to live in a joint format operand in the MCInst encoding.
 OperandMatchResultTy
-AMDGPUAsmParser::parseDfmtNfmt(OperandVector &Operands) {
-  SMLoc S = Parser.getTok().getLoc();
-  int64_t Dfmt = 0, Nfmt = 0;
+AMDGPUAsmParser::parseDfmtNfmt(int64_t &Format) {
+  using namespace llvm::AMDGPU::MTBUFFormat;
+
+  int64_t Dfmt = DFMT_UNDEF;
+  int64_t Nfmt = NFMT_UNDEF;
+
   // dfmt and nfmt can appear in either order, and each is optional.
-  bool GotDfmt = false, GotNfmt = false;
-  while (!GotDfmt || !GotNfmt) {
-    if (!GotDfmt) {
-      auto Res = parseIntWithPrefix("dfmt", Dfmt);
-      if (Res != MatchOperand_NoMatch) {
-        if (Res != MatchOperand_Success)
-          return Res;
-        if (Dfmt >= 16) {
-          Error(Parser.getTok().getLoc(), "out of range dfmt");
-          return MatchOperand_ParseFail;
-        }
-        GotDfmt = true;
-        Parser.Lex();
-        continue;
-      }
+  for (int I = 0; I < 2; ++I) {
+    if (Dfmt == DFMT_UNDEF && !tryParseFmt("dfmt", DFMT_MAX, Dfmt))
+      return MatchOperand_ParseFail;
+
+    if (Nfmt == NFMT_UNDEF && !tryParseFmt("nfmt", NFMT_MAX, Nfmt)) {
+      return MatchOperand_ParseFail;
     }
-    if (!GotNfmt) {
-      auto Res = parseIntWithPrefix("nfmt", Nfmt);
-      if (Res != MatchOperand_NoMatch) {
-        if (Res != MatchOperand_Success)
-          return Res;
-        if (Nfmt >= 8) {
-          Error(Parser.getTok().getLoc(), "out of range nfmt");
-          return MatchOperand_ParseFail;
-        }
-        GotNfmt = true;
-        Parser.Lex();
-        continue;
-      }
+    // Skip optional comma between dfmt/nfmt
+    // but guard against 2 commas following each other.
+    if ((Dfmt == DFMT_UNDEF) != (Nfmt == NFMT_UNDEF) &&
+        !peekToken().is(AsmToken::Comma)) {
+      trySkipToken(AsmToken::Comma);
     }
-    break;
   }
-  if (!GotDfmt && !GotNfmt)
+
+  if (Dfmt == DFMT_UNDEF && Nfmt == NFMT_UNDEF)
     return MatchOperand_NoMatch;
-  auto Format = Dfmt | Nfmt << 4;
+
+  Dfmt = (Dfmt == DFMT_UNDEF)? DFMT_DEFAULT : Dfmt;
+  Nfmt = (Nfmt == NFMT_UNDEF)? NFMT_DEFAULT : Nfmt;
+
+  Format = encodeDfmtNfmt(Dfmt, Nfmt);
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseUfmt(int64_t &Format) {
+  using namespace llvm::AMDGPU::MTBUFFormat;
+
+  int64_t Fmt = UFMT_UNDEF;
+
+  if (!tryParseFmt("format", UFMT_MAX, Fmt))
+    return MatchOperand_ParseFail;
+
+  if (Fmt == UFMT_UNDEF)
+    return MatchOperand_NoMatch;
+
+  Format = Fmt;
+  return MatchOperand_Success;
+}
+
+OperandMatchResultTy
+AMDGPUAsmParser::parseFORMAT(OperandVector &Operands) {
+  using namespace llvm::AMDGPU::MTBUFFormat;
+
+  int64_t Format = isGFX10() ? UFMT_DEFAULT : DFMT_NFMT_DEFAULT;
+  OperandMatchResultTy Res;
+  SMLoc Loc = getLoc();
+
+  Res = isGFX10() ? parseUfmt(Format) : parseDfmtNfmt(Format);
+  if (Res == MatchOperand_ParseFail)
+    return Res;
+
   Operands.push_back(
-      AMDGPUOperand::CreateImm(this, Format, S, AMDGPUOperand::ImmTyFORMAT));
+    AMDGPUOperand::CreateImm(this, Format, Loc, AMDGPUOperand::ImmTyFORMAT));
   return MatchOperand_Success;
 }
 
@@ -6242,7 +6291,6 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
   {"offset",  AMDGPUOperand::ImmTyOffset, false, nullptr},
   {"inst_offset", AMDGPUOperand::ImmTyInstOffset, false, nullptr},
   {"dlc",     AMDGPUOperand::ImmTyDLC, true, nullptr},
-  {"format",  AMDGPUOperand::ImmTyFORMAT, false, nullptr},
   {"glc",     AMDGPUOperand::ImmTyGLC, true, nullptr},
   {"slc",     AMDGPUOperand::ImmTySLC, true, nullptr},
   {"swz",     AMDGPUOperand::ImmTySWZ, true, nullptr},
@@ -6327,8 +6375,6 @@ OperandMatchResultTy AMDGPUAsmParser::parseOptionalOpr(OperandVector &Operands)
                                         Op.ConvertResult);
     } else if (Op.Type == AMDGPUOperand::ImmTyDim) {
       res = parseDim(Operands);
-    } else if (Op.Type == AMDGPUOperand::ImmTyFORMAT && !isGFX10()) {
-      res = parseDfmtNfmt(Operands);
     } else {
       res = parseIntWithPrefix(Op.Name, Operands, Op.Type, Op.ConvertResult);
     }
diff --git a/llvm/lib/Target/AMDGPU/BUFInstructions.td b/llvm/lib/Target/AMDGPU/BUFInstructions.td
index fa42ddc54b565..370e9db9e83e9 100644
--- a/llvm/lib/Target/AMDGPU/BUFInstructions.td
+++ b/llvm/lib/Target/AMDGPU/BUFInstructions.td
@@ -168,15 +168,15 @@ class getMTBUFIns<int addrKind, list<RegisterClass> vdataList=[]> {
 
 class getMTBUFAsmOps<int addrKind> {
   string Pfx =
-    !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc, $format, $soffset",
+    !if(!eq(addrKind, BUFAddrKind.Offset), "off, $srsrc,$format $soffset",
     !if(!eq(addrKind, BUFAddrKind.OffEn),
-            "$vaddr, $srsrc, $format, $soffset offen",
+            "$vaddr, $srsrc,$format $soffset offen",
     !if(!eq(addrKind, BUFAddrKind.IdxEn),
-            "$vaddr, $srsrc, $format, $soffset idxen",
+            "$vaddr, $srsrc,$format $soffset idxen",
     !if(!eq(addrKind, BUFAddrKind.BothEn),
-            "$vaddr, $srsrc, $format, $soffset idxen offen",
+            "$vaddr, $srsrc,$format $soffset idxen offen",
     !if(!eq(addrKind, BUFAddrKind.Addr64),
-            "$vaddr, $srsrc, $format, $soffset addr64",
+            "$vaddr, $srsrc,$format $soffset addr64",
     "")))));
   string ret = Pfx # "$offset";
 }
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
index fe063d33ea3e0..00bf404dea279 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUInstPrinter.cpp
@@ -299,14 +299,23 @@ void AMDGPUInstPrinter::printExpVM(const MCInst *MI, unsigned OpNo,
 void AMDGPUInstPrinter::printFORMAT(const MCInst *MI, unsigned OpNo,
                                     const MCSubtargetInfo &STI,
                                     raw_ostream &O) {
-  if (unsigned Val = MI->getOperand(OpNo).getImm()) {
-    if (AMDGPU::isGFX10(STI))
-      O << " format:" << Val;
-    else {
-      O << " dfmt:" << (Val & 15);
-      O << ", nfmt:" << (Val >> 4);
-    }
+  using namespace llvm::AMDGPU::MTBUFFormat;
+
+  unsigned Val = MI->getOperand(OpNo).getImm();
+  if (AMDGPU::isGFX10(STI)) {
+    if (Val == UFMT_DEFAULT)
+      return;
+    O << " format:" << Val;
+  } else {
+    if (Val == DFMT_NFMT_DEFAULT)
+      return;
+    unsigned Dfmt;
+    unsigned Nfmt;
+    decodeDfmtNfmt(Val, Dfmt, Nfmt);
+    O << " dfmt:" << Dfmt;
+    O << ", nfmt:" << Nfmt;
   }
+  O << ',';
 }
 
 void AMDGPUInstPrinter::printRegOperand(unsigned RegNo, raw_ostream &O,
diff --git a/llvm/lib/Target/AMDGPU/SIDefines.h b/llvm/lib/Target/AMDGPU/SIDefines.h
index 4f7d255eb450a..9c9dd66a4a79c 100644
--- a/llvm/lib/Target/AMDGPU/SIDefines.h
+++ b/llvm/lib/Target/AMDGPU/SIDefines.h
@@ -392,6 +392,48 @@ enum ModeRegisterMasks : uint32_t {
 
 } // namespace Hwreg
 
+namespace MTBUFFormat {
+
+enum DataFormat {
+  DFMT_MAX = 15,
+
+  DFMT_UNDEF = -1,
+  DFMT_DEFAULT = 1,
+
+  DFMT_SHIFT = 0,
+  DFMT_MASK = DFMT_MAX
+};
+
+enum NumFormat {
+  NFMT_MAX = 7,
+
+  NFMT_UNDEF = -1,
+  NFMT_DEFAULT = 0,
+
+  NFMT_SHIFT = 4,
+  NFMT_MASK = NFMT_MAX
+};
+
+enum MergedFormat {
+  DFMT_NFMT_UNDEF = -1,
+  DFMT_NFMT_DEFAULT = ((DFMT_DEFAULT & DFMT_MASK) << DFMT_SHIFT) |
+                      ((NFMT_DEFAULT & NFMT_MASK) << NFMT_SHIFT),
+
+
+  DFMT_NFMT_MASK = (DFMT_MASK << DFMT_SHIFT) | (NFMT_MASK << NFMT_SHIFT),
+
+  DFMT_NFMT_MAX = DFMT_NFMT_MASK
+};
+
+enum UnifiedFormat {
+  UFMT_MAX = 127,
+
+  UFMT_UNDEF = -1,
+  UFMT_DEFAULT = 1
+};
+
+} // namespace MTBUFFormat
+
 namespace Swizzle { // Encoding of swizzle macro used in ds_swizzle_b32.
 
 enum Id : unsigned { // id of symbolic names
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 7aee52f913605..72feff80ac81c 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -1115,7 +1115,7 @@ def LWE : NamedOperandBit<"LWE", NamedMatchClass<"LWE">>;
 def exp_compr : NamedOperandBit<"ExpCompr", NamedMatchClass<"ExpCompr">>;
 def exp_vm : NamedOperandBit<"ExpVM", NamedMatchClass<"ExpVM">>;
 
-def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT">>;
+def FORMAT : NamedOperandU8<"FORMAT", NamedMatchClass<"FORMAT", 0>>;
 
 def DMask : NamedOperandU16<"DMask", NamedMatchClass<"DMask">>;
 def Dim : NamedOperandU8<"Dim", NamedMatchClass<"Dim", 0>>;
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
index 00e6d517bde58..b89e34e4c99c1 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp
@@ -783,6 +783,23 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width)
 
 } // namespace Hwreg
 
+//===----------------------------------------------------------------------===//
+// MTBUF Format
+//===----------------------------------------------------------------------===//
+
+namespace MTBUFFormat {
+
+int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt) {
+  return (Dfmt << DFMT_SHIFT) | (Nfmt << NFMT_SHIFT);
+}
+
+void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt) {
+  Dfmt = (Format >> DFMT_SHIFT) & DFMT_MASK;
+  Nfmt = (Format >> NFMT_SHIFT) & NFMT_MASK;
+}
+
+} // namespace MTBUFFormat
+
 //===----------------------------------------------------------------------===//
 // SendMsg
 //===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
index e71554575f6af..a9ea05755a676 100644
--- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
+++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.h
@@ -482,6 +482,15 @@ void decodeHwreg(unsigned Val, unsigned &Id, unsigned &Offset, unsigned &Width);
 
 } // namespace Hwreg
 
+namespace MTBUFFormat {
+
+LLVM_READNONE
+int64_t encodeDfmtNfmt(unsigned Dfmt, unsigned Nfmt);
+
+void decodeDfmtNfmt(unsigned Format, unsigned &Dfmt, unsigned &Nfmt);
+
+} // namespace MTBUFFormat
+
 namespace SendMsg {
 
 LLVM_READONLY
diff --git a/llvm/test/MC/AMDGPU/mtbuf-gfx10.s b/llvm/test/MC/AMDGPU/mtbuf-gfx10.s
index ea3f9df9bb5e7..40f082d02ebce 100644
--- a/llvm/test/MC/AMDGPU/mtbuf-gfx10.s
+++ b/llvm/test/MC/AMDGPU/mtbuf-gfx10.s
@@ -1,4 +1,5 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=GFX10 %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=gfx1010 -show-encoding %s 2>&1 | FileCheck -check-prefix=GFX10-ERR %s
 
 // GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], format:22, 0 ; encoding: [0x00,0x00,0xb0,0xe8,0x00,0x00,0x20,0x80]
 tbuffer_load_format_d16_x v0, off, s[0:3], format:22, 0
@@ -66,3 +67,49 @@ tbuffer_store_format_xyzw v[0:3], v6, s[0:3], format:46, 0 idxen
 tbuffer_store_format_x v0, v1, s[0:3], format:125, 0 idxen
 // GFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen ; encoding: [0x00,0x20,0x0d,0xe9,0x02,0x00,0x00,0x80]
 tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen
+
+// GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:127, 0 idxen ; encoding: [0x00,0x20,0xfc,0xeb,0x01,0x00,0x00,0x80]
+tbuffer_store_format_x v0, v1, s[0:3], format:127, 0 idxen
+
+// GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:127, 0 idxen ; encoding: [0x00,0x20,0xfc,0xeb,0x01,0x00,0x00,0x80]
+tbuffer_store_format_x v0, v1, s[0:3] format:127 0 idxen
+
+// GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:0, s0 idxen ; encoding: [0x00,0x20,0x04,0xe8,0x01,0x00,0x00,0x00]
+tbuffer_store_format_x v0, v1, s[0:3] format:0 s0 idxen
+
+// GFX10: tbuffer_store_format_x v0, v1, s[0:3], s0 idxen ; encoding: [0x00,0x20,0x0c,0xe8,0x01,0x00,0x00,0x00]
+tbuffer_store_format_x v0, v1, s[0:3] format:1 s0 idxen
+
+// GFX10: tbuffer_store_format_x v0, v1, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x0c,0xe8,0x01,0x00,0x00,0x80]
+tbuffer_store_format_x v0, v1, s[0:3], 0 idxen
+
+// GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0x08,0xe8,0x00,0x00,0x20,0x00]
+tbuffer_load_format_d16_x v0, off, s[0:3] s0
+
+//===----------------------------------------------------------------------===//
+// Errors handling.
+//===----------------------------------------------------------------------===//
+
+// GFX10-ERR: error: out of range format
+tbuffer_load_format_d16_x v0, off, s[0:3], format:-1, 0
+
+// GFX10-ERR: error: out of range format
+tbuffer_load_format_d16_x v0, off, s[0:3], format:128, s0
+
+// GFX10-ERR: error: too few operands for instruction
+tbuffer_load_format_d16_x v0, off, s[0:3], format:127
+
+// GFX10-ERR: error: too few operands for instruction
+tbuffer_load_format_d16_x v0, off, s[0:3]
+
+// GFX10-ERR: error: invalid operand for instruction
+tbuffer_load_format_d16_x v0, off, s[0:3] idxen
+
+// GFX10-ERR: error: unknown token in expression
+tbuffer_load_format_d16_x v0, off, s[0:3], format:1,, s0
+
+// GFX10-ERR: error: unknown token in expression
+tbuffer_load_format_d16_x v0, off, s[0:3], format:1:, s0
+
+// GFX10-ERR: error: not a valid operand
+tbuffer_load_format_d16_x v0, off, s[0:3],, format:1, s0
diff --git a/llvm/test/MC/AMDGPU/mtbuf.s b/llvm/test/MC/AMDGPU/mtbuf.s
index bfffb67bd7620..9d207ff326060 100644
--- a/llvm/test/MC/AMDGPU/mtbuf.s
+++ b/llvm/test/MC/AMDGPU/mtbuf.s
@@ -1,6 +1,10 @@
-// RUN: llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICI %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s
-// RUN: llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=SI -check-prefix=SICI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=CI -check-prefix=SICI %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
+
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tahiti -show-encoding %s  2>&1 | FileCheck -check-prefix=GCN-ERR %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=bonaire -show-encoding %s 2>&1 | FileCheck -check-prefix=GCN-ERR %s
+// RUN: not llvm-mc -arch=amdgcn -mcpu=tonga -show-encoding %s   2>&1 | FileCheck -check-prefix=GCN-ERR %s
 
 //===----------------------------------------------------------------------===//
 // Test for dfmt and nfmt (tbuffer only)
@@ -45,11 +49,81 @@ tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, ttmp1
 
 // dfmt is optional:
 tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], nfmt:2, ttmp1
-// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7],  dfmt:0,  nfmt:2, ttmp1 ; encoding: [0x00,0x00,0x07,0xe9,0x00,0x01,0x1d,0x71]
-// VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0,  nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x03,0xe9,0x00,0x01,0x1d,0x71]
+// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:1, nfmt:2, ttmp1 ; encoding: [0x00,0x00,0x0f,0xe9,0x00,0x01,0x1d,0x71]
+// VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:1, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x0b,0xe9,0x00,0x01,0x1d,0x71]
 
 // nfmt and dfmt can be in either order:
 tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], nfmt:2, dfmt:15, ttmp1
 // SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7],  dfmt:15,  nfmt:2, ttmp1 ; encoding: [0x00,0x00,0x7f,0xe9,0x00,0x01,0x1d,0x71]
 // VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15,  nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x1d,0x71]
 
+// nfmt and dfmt may be omitted:
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1
+// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x0f,0xe8,0x00,0x01,0x1d,0x71]
+// VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x80,0x0b,0xe8,0x00,0x01,0x1d,0x71]
+
+// Check dfmt/nfmt min values
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:0, ttmp1
+// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:0, ttmp1 ; encoding: [0x00,0x00,0x07,0xe8,0x00,0x01,0x1d,0x71]
+// VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:0, ttmp1 ; encoding: [0x00,0x80,0x03,0xe8,0x00,0x01,0x1d,0x71]
+
+// Check dfmt/nfmt max values
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1
+// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 ; encoding: [0x00,0x00,0xff,0xeb,0x00,0x01,0x1d,0x71]
+// VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 ; encoding: [0x00,0x80,0xfb,0xeb,0x00,0x01,0x1d,0x71]
+
+// Check default dfmt/nfmt values
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:1, nfmt:0, ttmp1
+// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x00,0x0f,0xe8,0x00,0x01,0x1d,0x71]
+// VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x80,0x0b,0xe8,0x00,0x01,0x1d,0x71]
+
+// Check that comma separators are optional
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:15 nfmt:7 ttmp1
+// SICI: tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 ; encoding: [0x00,0x00,0xff,0xeb,0x00,0x01,0x1d,0x71]
+// VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 ; encoding: [0x00,0x80,0xfb,0xeb,0x00,0x01,0x1d,0x71]
+
+//===----------------------------------------------------------------------===//
+// Errors handling.
+//===----------------------------------------------------------------------===//
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:-1 nfmt:1 s0
+// GCN-ERR: error: out of range dfmt
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:16 nfmt:1 s0
+// GCN-ERR: error: out of range dfmt
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 nfmt:-1 s0
+// GCN-ERR: error: out of range nfmt
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 nfmt:8 s0
+// GCN-ERR: error: out of range nfmt
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7]
+// GCN-ERR: error: too few operands for instruction
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7],, dfmt:1 nfmt:1 s0
+// GCN-ERR: error: not a valid operand
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1,, nfmt:1 s0
+// GCN-ERR: error: unknown token in expression
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 nfmt:1,, s0
+// GCN-ERR: error: unknown token in expression
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 dfmt:1 s0
+// GCN-ERR: error: not a valid operand
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] nfmt:1 nfmt:1 s0
+// GCN-ERR: error: not a valid operand
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 nfmt:1 dfmt:1 s0
+// GCN-ERR: error: not a valid operand
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] nfmt:1 dfmt:1 nfmt:1 s0
+// GCN-ERR: error: not a valid operand
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1: nfmt:1 s0
+// GCN-ERR: error: unknown token in expression
+
+tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7] dfmt:1 nfmt:1: s0
+// GCN-ERR: error: unknown token in expression
diff --git a/llvm/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt b/llvm/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt
index 86fcdfc3d6b17..10d92d9990dc0 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/mtbuf_gfx10.txt
@@ -67,3 +67,20 @@
 # GFX10: tbuffer_store_format_xy v[0:1], v2, s[0:3], format:33, 0 idxen
 0x00,0x20,0x0d,0xe9,0x02,0x00,0x00,0x80
 
+# GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:127, 0 idxen ; encoding: [0x00,0x20,0xfc,0xeb,0x01,0x00,0x00,0x80]
+0x00,0x20,0xfc,0xeb,0x01,0x00,0x00,0x80
+
+# GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:0, 0 idxen ; encoding: [0x00,0x20,0x04,0xe8,0x01,0x00,0x00,0x80]
+0x00,0x20,0x04,0xe8,0x01,0x00,0x00,0x80
+
+# GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], format:0, s0 ; encoding: [0x00,0x00,0x00,0xe8,0x00,0x00,0x20,0x00]
+0x00,0x00,0x00,0xe8,0x00,0x00,0x20,0x00
+
+# GFX10: tbuffer_store_format_x v0, v1, s[0:3], 0 idxen ; encoding: [0x00,0x20,0x0c,0xe8,0x01,0x00,0x00,0x80]
+0x00,0x20,0x0c,0xe8,0x01,0x00,0x00,0x80
+
+# GFX10: tbuffer_load_format_d16_x v0, off, s[0:3], s0 ; encoding: [0x00,0x00,0x08,0xe8,0x00,0x00,0x20,0x00]
+0x00,0x00,0x08,0xe8,0x00,0x00,0x20,0x00
+
+# GFX10: tbuffer_store_format_x v0, v1, s[0:3], format:2, s0 idxen ; encoding: [0x00,0x20,0x14,0xe8,0x01,0x00,0x00,0x00]
+0x00,0x20,0x14,0xe8,0x01,0x00,0x00,0x00
diff --git a/llvm/test/MC/Disassembler/AMDGPU/mtbuf_vi.txt b/llvm/test/MC/Disassembler/AMDGPU/mtbuf_vi.txt
index 519e03ede69eb..35f9d3bfd18f4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/mtbuf_vi.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/mtbuf_vi.txt
@@ -20,3 +20,21 @@
 
 # VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7],  dfmt:15,  nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x7b,0xe9,0x00,0x01,0x1d,0x71]
 0x00 0x80 0x7b 0xe9 0x00 0x01 0x1d 0x71
+
+# VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:2, ttmp1 ; encoding: [0x00,0x80,0x03,0xe9,0x00,0x01,0x1d,0x71]
+0x00,0x80,0x03,0xe9,0x00,0x01,0x1d,0x71
+
+# VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:0, ttmp1 ; encoding: [0x00,0x80,0x7b,0xe8,0x00,0x01,0x1d,0x71]
+0x00,0x80,0x7b,0xe8,0x00,0x01,0x1d,0x71
+
+# VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], ttmp1 ; encoding: [0x00,0x80,0x0b,0xe8,0x00,0x01,0x1d,0x71]
+0x00,0x80,0x0b,0xe8,0x00,0x01,0x1d,0x71
+
+# VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:0, nfmt:0, ttmp1 ; encoding: [0x00,0x80,0x03,0xe8,0x00,0x01,0x1d,0x71]
+0x00,0x80,0x03,0xe8,0x00,0x01,0x1d,0x71
+
+# VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:1, nfmt:1, ttmp1 ; encoding: [0x00,0x80,0x8b,0xe8,0x00,0x01,0x1d,0x71]
+0x00,0x80,0x8b,0xe8,0x00,0x01,0x1d,0x71
+
+# VI:   tbuffer_store_format_xyzw v[1:4], off, ttmp[4:7], dfmt:15, nfmt:7, ttmp1 ; encoding: [0x00,0x80,0xfb,0xeb,0x00,0x01,0x1d,0x71]
+0x00,0x80,0xfb,0xeb,0x00,0x01,0x1d,0x71

From 8ef47244b95f7b148e072a19563f6096ed4fe43c Mon Sep 17 00:00:00 2001
From: Stephan Herhut <herhut@google.com>
Date: Wed, 15 Jul 2020 09:25:39 +0200
Subject: [PATCH 397/771] [mlir][shape] Fold shape.broadcast with one scalar
 operand

This folds shape.broadcast where at least one operand is a scalar to the
other operand.

Also add an assemblyFormat for shape.broadcast and shape.concat.

Differential Revision: https://reviews.llvm.org/D83854
---
 .../include/mlir/Dialect/Shape/IR/ShapeOps.td |  3 ++
 mlir/lib/Dialect/Shape/IR/Shape.cpp           | 16 ++++++--
 mlir/test/Dialect/Shape/canonicalize.mlir     | 41 +++++++++++++++++--
 mlir/test/Dialect/Shape/ops.mlir              |  4 +-
 4 files changed, 56 insertions(+), 8 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
index 38bac19d0fa89..1f141a2e705ac 100644
--- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
+++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
@@ -90,6 +90,8 @@ def Shape_BroadcastOp : Shape_Op<"broadcast", [Commutative]> {
                    OptionalAttr<StrAttr>:$error);
   let results = (outs Shape_ShapeType:$result);
 
+  let assemblyFormat = "$lhs `,` $rhs attr-dict";
+
   let hasFolder = 1;
 }
 
@@ -488,6 +490,7 @@ def Shape_ConcatOp : Shape_Op<"concat", []> {
   let arguments = (ins Shape_ShapeType:$lhs, Shape_ShapeType:$rhs);
   let results = (outs Shape_ShapeType:$result);
 
+  let assemblyFormat = "$lhs `,` $rhs attr-dict";
   let hasFolder = 1;
 }
 
diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp
index 0a0608bbcda46..a6f54053a3260 100644
--- a/mlir/lib/Dialect/Shape/IR/Shape.cpp
+++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp
@@ -237,12 +237,22 @@ static LogicalResult verify(AssumingAllOp op) {
 //===----------------------------------------------------------------------===//
 
 OpFoldResult BroadcastOp::fold(ArrayRef<Attribute> operands) {
-  if (!operands[0] || !operands[1])
+  if (!operands[1])
     return nullptr;
-  auto lhsShape = llvm::to_vector<6>(
-      operands[0].cast<DenseIntElementsAttr>().getValues<int64_t>());
+
   auto rhsShape = llvm::to_vector<6>(
       operands[1].cast<DenseIntElementsAttr>().getValues<int64_t>());
+  if (rhsShape.empty())
+    return lhs();
+
+  if (!operands[0])
+    return nullptr;
+
+  auto lhsShape = llvm::to_vector<6>(
+      operands[0].cast<DenseIntElementsAttr>().getValues<int64_t>());
+  if (lhsShape.empty())
+    return rhs();
+
   SmallVector<int64_t, 6> resultShape;
   // If the shapes are not compatible, we can't fold it.
   // TODO: Fold to an "error".
diff --git a/mlir/test/Dialect/Shape/canonicalize.mlir b/mlir/test/Dialect/Shape/canonicalize.mlir
index 1665ef73f3e31..4e320f303b186 100644
--- a/mlir/test/Dialect/Shape/canonicalize.mlir
+++ b/mlir/test/Dialect/Shape/canonicalize.mlir
@@ -54,7 +54,42 @@ func @f() -> !shape.shape {
   // CHECK: shape.const_shape [7, 2]
   %0 = shape.const_shape [1, 2]
   %1 = shape.const_shape [7, 1]
-  %2 = "shape.broadcast"(%0, %1) : (!shape.shape, !shape.shape) -> !shape.shape
+  %2 = shape.broadcast %0, %1
+  return %2 : !shape.shape
+}
+
+// -----
+
+// Rhs is a scalar.
+// CHECK-LABEL: func @f
+func @f(%arg0 : !shape.shape) -> !shape.shape {
+  // CHECK: return %arg0
+  %0 = shape.const_shape []
+  %1 = shape.broadcast %arg0, %0
+  return %1 : !shape.shape
+}
+
+// -----
+
+// Lhs is a scalar.
+// CHECK-LABEL: func @f
+func @f(%arg0 : !shape.shape) -> !shape.shape {
+  // CHECK: return %arg0
+  %0 = shape.const_shape []
+  %1 = shape.broadcast %0, %arg0
+  return %1 : !shape.shape
+}
+
+// -----
+
+// Lhs is a scalar and rhs is constant.
+// CHECK-LABEL: func @f
+func @f() -> !shape.shape {
+  // CHECK: %[[CST:.*]] = shape.const_shape [1, 2, 3]
+  // CHECK: return %[[CST]]
+  %0 = shape.const_shape []
+  %1 = shape.const_shape [1, 2, 3]
+  %2 = shape.broadcast %0, %1
   return %2 : !shape.shape
 }
 
@@ -66,7 +101,7 @@ func @f() -> !shape.shape {
   // CHECK: shape.broadcast
   %0 = shape.const_shape [2]
   %1 = shape.const_shape [7]
-  %2 = "shape.broadcast"(%0, %1) : (!shape.shape, !shape.shape) -> !shape.shape
+  %2 = shape.broadcast %0, %1
   return %2 : !shape.shape
 }
 
@@ -78,7 +113,7 @@ func @f() -> !shape.shape {
   // CHECK: shape.const_shape [0, 1, 2, 3]
   %lhs = shape.const_shape [0, 1]
   %rhs = shape.const_shape [2, 3]
-  %0 = "shape.concat"(%lhs, %rhs) : (!shape.shape, !shape.shape) -> !shape.shape
+  %0 = shape.concat %lhs, %rhs
   return %0 : !shape.shape
 }
 
diff --git a/mlir/test/Dialect/Shape/ops.mlir b/mlir/test/Dialect/Shape/ops.mlir
index 94323e856750f..3a0bcf713073f 100644
--- a/mlir/test/Dialect/Shape/ops.mlir
+++ b/mlir/test/Dialect/Shape/ops.mlir
@@ -29,10 +29,10 @@ func @test_shape_num_elements_fixed() {
   return
 }
 
-func @test_broadcastable_fixed() {
+func @test_broadcast_fixed() {
   %0 = shape.const_shape [10, 1, 57, 92]
   %1 = shape.const_shape [4, 57, 92]
-  %2 = "shape.broadcastable"(%0, %1) : (!shape.shape, !shape.shape) -> !shape.shape
+  %2 = shape.broadcast %0, %1
   %3 = "shape.print"(%2) : (!shape.shape) -> !shape.shape
   return
 }

From f233b92f92a669f9f2cc6d08d57ca4931dd61b78 Mon Sep 17 00:00:00 2001
From: Hiroshi Yamauchi <yamauchi@google.com>
Date: Tue, 7 Jul 2020 11:13:55 -0700
Subject: [PATCH 398/771] [PGO][PGSO] Add profile guided size optimization to
 LegalizeDAG.

Reviewers: davidxl

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83333
---
 llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp |  2 +-
 .../test/CodeGen/AArch64/arm64-fp-imm-size.ll | 35 +++++++++++++++++++
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 5fff4c0d65d44..cbbcaf1601ed1 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -3319,7 +3319,7 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) {
     // Check to see if this FP immediate is already legal.
     // If this is a legal constant, turn it into a TargetConstantFP node.
     if (!TLI.isFPImmLegal(CFP->getValueAPF(), Node->getValueType(0),
-                          DAG.getMachineFunction().getFunction().hasOptSize()))
+                          DAG.shouldOptForSize()))
       Results.push_back(ExpandConstantFP(CFP, true));
     break;
   }
diff --git a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
index 5966fc65b0c00..58b025afd9370 100644
--- a/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-fp-imm-size.ll
@@ -38,3 +38,38 @@ define fp128 @baz() optsize {
 ; CHECK-NEXT:  ret
   ret fp128 0xL00000000000000000000000000000000
 }
+
+; CHECK: literal8
+; CHECK: .quad 0x0000001fffffffd
+define double @foo2_pgso() !prof !14 {
+; CHECK: _foo2_pgso:
+; CHECK: adrp x[[REG:[0-9]+]], lCPI4_0@PAGE
+; CHECK: ldr  d0, [x[[REG]], lCPI4_0@PAGEOFF]
+; CHECK-NEXT: ret
+  ret double 0x1FFFFFFFd1
+}
+
+define float @bar_pgso() !prof !14 {
+; CHECK: _bar_pgso:
+; CHECK: adrp x[[REG:[0-9]+]], lCPI5_0@PAGE
+; CHECK: ldr  s0, [x[[REG]], lCPI5_0@PAGEOFF]
+; CHECK-NEXT:  ret
+  ret float 0x400921FB80000000
+}
+
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"ProfileSummary", !1}
+!1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
+!2 = !{!"ProfileFormat", !"InstrProf"}
+!3 = !{!"TotalCount", i64 10000}
+!4 = !{!"MaxCount", i64 10}
+!5 = !{!"MaxInternalCount", i64 1}
+!6 = !{!"MaxFunctionCount", i64 1000}
+!7 = !{!"NumCounts", i64 3}
+!8 = !{!"NumFunctions", i64 3}
+!9 = !{!"DetailedSummary", !10}
+!10 = !{!11, !12, !13}
+!11 = !{i32 10000, i64 100, i32 1}
+!12 = !{i32 999000, i64 100, i32 1}
+!13 = !{i32 999999, i64 1, i32 2}
+!14 = !{!"function_entry_count", i64 0}

From 4a539faf74b9b4c25ee3b880e4007564bd5139b0 Mon Sep 17 00:00:00 2001
From: Hiroshi Yamauchi <yamauchi@google.com>
Date: Thu, 25 Jun 2020 12:19:30 -0700
Subject: [PATCH 399/771] [PGO] Extend the value profile buckets for mem op
 sizes.

Extend the memop value profile buckets to be more flexible (could accommodate a
mix of individual values and ranges) and to cover more value ranges (from 11 to
22 buckets).

Disabled behind a flag (to be enabled separately) and the existing code to be
removed later.
---
 compiler-rt/include/profile/InstrProfData.inc | 125 ++++++++++++++++++
 compiler-rt/lib/profile/InstrProfilingValue.c |  17 ++-
 llvm/include/llvm/ProfileData/InstrProf.h     |   8 ++
 .../llvm/ProfileData/InstrProfData.inc        | 125 ++++++++++++++++++
 .../Instrumentation/InstrProfiling.h          |   2 +
 llvm/lib/ProfileData/InstrProf.cpp            |   2 +
 .../Instrumentation/InstrProfiling.cpp        |  55 ++++++--
 .../Instrumentation/PGOMemOPSizeOpt.cpp       |  21 ++-
 llvm/test/Transforms/PGOProfile/memcpy.ll     |   9 +-
 .../PGOProfile/memop_profile_funclet.ll       |   9 +-
 llvm/unittests/ProfileData/CMakeLists.txt     |   1 +
 .../ProfileData/InstrProfDataTest.cpp         |  68 ++++++++++
 12 files changed, 423 insertions(+), 19 deletions(-)
 create mode 100644 llvm/unittests/ProfileData/InstrProfDataTest.cpp

diff --git a/compiler-rt/include/profile/InstrProfData.inc b/compiler-rt/include/profile/InstrProfData.inc
index a6913527e67f0..6d0ffb12294b0 100644
--- a/compiler-rt/include/profile/InstrProfData.inc
+++ b/compiler-rt/include/profile/InstrProfData.inc
@@ -157,6 +157,8 @@ VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA
 #ifndef VALUE_RANGE_PROF
 VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
 #else /* VALUE_RANGE_PROF */
+/* FIXME: This is to be removed after switching to the new memop value
+ * profiling. */
 VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \
                       INSTR_PROF_COMMA
 VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \
@@ -753,9 +755,14 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target
 #define INSTR_PROF_VALUE_PROF_FUNC_STR \
         INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC)
+/* FIXME: This is to be removed after switching to the new memop value
+ * profiling. */
 #define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range
 #define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \
         INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC)
+#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC __llvm_profile_instrument_memop
+#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR                                   \
+  INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_MEMOP_FUNC)
 
 /* InstrProfile per-function control data alignment.  */
 #define INSTR_PROF_DATA_ALIGNMENT 8
@@ -783,3 +790,121 @@ typedef struct InstrProfValueData {
 #endif
 
 #undef COVMAP_V2_OR_V3
+
+#ifdef INSTR_PROF_VALUE_PROF_MEMOP_API
+
+#ifdef __cplusplus
+#define INSTR_PROF_INLINE inline
+#else
+#define INSTR_PROF_INLINE
+#endif
+
+/* The value range buckets (22 buckets) for the memop size value profiling looks
+ * like:
+ *
+ *   [0, 0]
+ *   [1, 1]
+ *   [2, 2]
+ *   [3, 3]
+ *   [4, 4]
+ *   [5, 5]
+ *   [6, 6]
+ *   [7, 7]
+ *   [8, 8]
+ *   [9, 15]
+ *   [16, 16]
+ *   [17, 31]
+ *   [32, 32]
+ *   [33, 63]
+ *   [64, 64]
+ *   [65, 127]
+ *   [128, 128]
+ *   [129, 255]
+ *   [256, 256]
+ *   [257, 511]
+ *   [512, 512]
+ *   [513, UINT64_MAX]
+ *
+ * Each range has a 'representative value' which is the lower end value of the
+ * range and used to store in the runtime profile data records and the VP
+ * metadata. For example, it's 2 for [2, 2] and 64 for [65, 127].
+ */
+
+/*
+ * Clz and Popcount. This code was copied from
+ * compiler-rt/lib/fuzzer/{FuzzerBuiltins.h,FuzzerBuiltinsMsvc.h} and
+ * llvm/include/llvm/Support/MathExtras.h.
+ */
+#if defined(_MSC_VER) && !defined(__clang__)
+
+#include <intrin.h>
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfClzll(unsigned long long X) {
+  unsigned long LeadZeroIdx = 0;
+#if !defined(_M_ARM64) && !defined(_M_X64)
+  // Scan the high 32 bits.
+  if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X >> 32)))
+    return (int)(63 - (LeadZeroIdx + 32)); // Create a bit offset
+                                                      // from the MSB.
+  // Scan the low 32 bits.
+  if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X)))
+    return (int)(63 - LeadZeroIdx);
+#else
+  if (_BitScanReverse64(&LeadZeroIdx, X)) return 63 - LeadZeroIdx;
+#endif
+  return 64;
+}
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfPopcountll(unsigned long long X) {
+  // This code originates from https://reviews.llvm.org/rG30626254510f.
+  unsigned long long v = X;
+  v = v - ((v >> 1) & 0x5555555555555555ULL);
+  v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
+  v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+  return (int)((unsigned long long)(v * 0x0101010101010101ULL) >> 56);
+}
+
+#else
+
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfClzll(unsigned long long X) { return __builtin_clzll(X); }
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfPopcountll(unsigned long long X) { return __builtin_popcountll(X); }
+
+#endif  /* defined(_MSC_VER) && !defined(__clang__) */
+
+/* Map an (observed) memop size value to the representative value of its range.
+ * For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE uint64_t
+InstrProfGetRangeRepValue(uint64_t Value) {
+  if (Value <= 8)
+    // The first ranges are individually tracked. Use the value as is.
+    return Value;
+  else if (Value >= 513)
+    // The last range is mapped to its lowest value.
+    return 513;
+  else if (InstProfPopcountll(Value) == 1)
+    // If it's a power of two, use it as is.
+    return Value;
+  else
+    // Otherwise, take to the previous power of two + 1.
+    return (1 << (64 - InstProfClzll(Value) - 1)) + 1;
+}
+
+/* Return true if the range that an (observed) memop size value belongs to has
+ * only a single value in the range.  For example, 0 -> true, 8 -> true, 10 ->
+ * false, 64 -> true, 100 -> false, 513 -> false. */
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE unsigned
+InstrProfIsSingleValRange(uint64_t Value) {
+  if (Value <= 8)
+    // The first ranges are individually tracked.
+    return 1;
+  else if (InstProfPopcountll(Value) == 1)
+    // If it's a power of two, there's only one value.
+    return 1;
+  else
+    // Otherwise, there's more than one value in the range.
+    return 0;
+}
+
+#endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */
diff --git a/compiler-rt/lib/profile/InstrProfilingValue.c b/compiler-rt/lib/profile/InstrProfilingValue.c
index fd53cac3dff31..76e1d3fa11b80 100644
--- a/compiler-rt/lib/profile/InstrProfilingValue.c
+++ b/compiler-rt/lib/profile/InstrProfilingValue.c
@@ -17,13 +17,14 @@
 
 #define INSTR_PROF_VALUE_PROF_DATA
 #define INSTR_PROF_COMMON_API_IMPL
+#define INSTR_PROF_VALUE_PROF_MEMOP_API
 #include "profile/InstrProfData.inc"
 
 static int hasStaticCounters = 1;
 static int OutOfNodesWarnings = 0;
 static int hasNonDefaultValsPerSite = 0;
 #define INSTR_PROF_MAX_VP_WARNS 10
-#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 16
+#define INSTR_PROF_DEFAULT_NUM_VAL_PER_SITE 24
 #define INSTR_PROF_VNODE_POOL_SIZE 1024
 
 #ifndef _MSC_VER
@@ -250,6 +251,8 @@ __llvm_profile_instrument_target_value(uint64_t TargetValue, void *Data,
  * The range for large values is optional. The default value of INT64_MIN
  * indicates it is not specified.
  */
+/* FIXME: This is to be removed after switching to the new memop value
+ * profiling. */
 COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range(
     uint64_t TargetValue, void *Data, uint32_t CounterIndex,
     int64_t PreciseRangeStart, int64_t PreciseRangeLast, int64_t LargeValue) {
@@ -263,6 +266,18 @@ COMPILER_RT_VISIBILITY void __llvm_profile_instrument_range(
   __llvm_profile_instrument_target(TargetValue, Data, CounterIndex);
 }
 
+/*
+ * The target values are partitioned into multiple ranges. The range spec is
+ * defined in InstrProfData.inc.
+ */
+COMPILER_RT_VISIBILITY void
+__llvm_profile_instrument_memop(uint64_t TargetValue, void *Data,
+                                uint32_t CounterIndex) {
+  // Map the target value to the representative value of its range.
+  uint64_t RepValue = InstrProfGetRangeRepValue(TargetValue);
+  __llvm_profile_instrument_target(RepValue, Data, CounterIndex);
+}
+
 /*
  * A wrapper struct that represents value profile runtime data.
  * Like InstrProfRecord class which is used by profiling host tools,
diff --git a/llvm/include/llvm/ProfileData/InstrProf.h b/llvm/include/llvm/ProfileData/InstrProf.h
index 62a0c6955708e..3d9fca9422c41 100644
--- a/llvm/include/llvm/ProfileData/InstrProf.h
+++ b/llvm/include/llvm/ProfileData/InstrProf.h
@@ -75,10 +75,18 @@ inline StringRef getInstrProfValueProfFuncName() {
 }
 
 /// Return the name profile runtime entry point to do value range profiling.
+// FIXME: This is to be removed after switching to the new memop value
+// profiling.
 inline StringRef getInstrProfValueRangeProfFuncName() {
   return INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR;
 }
 
+/// Return the name profile runtime entry point to do memop size value
+/// profiling.
+inline StringRef getInstrProfValueProfMemOpFuncName() {
+  return INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR;
+}
+
 /// Return the name prefix of variables containing instrumented function names.
 inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; }
 
diff --git a/llvm/include/llvm/ProfileData/InstrProfData.inc b/llvm/include/llvm/ProfileData/InstrProfData.inc
index a6913527e67f0..6d0ffb12294b0 100644
--- a/llvm/include/llvm/ProfileData/InstrProfData.inc
+++ b/llvm/include/llvm/ProfileData/InstrProfData.inc
@@ -157,6 +157,8 @@ VALUE_PROF_FUNC_PARAM(void *, Data, Type::getInt8PtrTy(Ctx)) INSTR_PROF_COMMA
 #ifndef VALUE_RANGE_PROF
 VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx))
 #else /* VALUE_RANGE_PROF */
+/* FIXME: This is to be removed after switching to the new memop value
+ * profiling. */
 VALUE_PROF_FUNC_PARAM(uint32_t, CounterIndex, Type::getInt32Ty(Ctx)) \
                       INSTR_PROF_COMMA
 VALUE_PROF_FUNC_PARAM(uint64_t, PreciseRangeStart, Type::getInt64Ty(Ctx)) \
@@ -753,9 +755,14 @@ serializeValueProfDataFrom(ValueProfRecordClosure *Closure,
 #define INSTR_PROF_VALUE_PROF_FUNC __llvm_profile_instrument_target
 #define INSTR_PROF_VALUE_PROF_FUNC_STR \
         INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_FUNC)
+/* FIXME: This is to be removed after switching to the new memop value
+ * profiling. */
 #define INSTR_PROF_VALUE_RANGE_PROF_FUNC __llvm_profile_instrument_range
 #define INSTR_PROF_VALUE_RANGE_PROF_FUNC_STR \
         INSTR_PROF_QUOTE(INSTR_PROF_VALUE_RANGE_PROF_FUNC)
+#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC __llvm_profile_instrument_memop
+#define INSTR_PROF_VALUE_PROF_MEMOP_FUNC_STR                                   \
+  INSTR_PROF_QUOTE(INSTR_PROF_VALUE_PROF_MEMOP_FUNC)
 
 /* InstrProfile per-function control data alignment.  */
 #define INSTR_PROF_DATA_ALIGNMENT 8
@@ -783,3 +790,121 @@ typedef struct InstrProfValueData {
 #endif
 
 #undef COVMAP_V2_OR_V3
+
+#ifdef INSTR_PROF_VALUE_PROF_MEMOP_API
+
+#ifdef __cplusplus
+#define INSTR_PROF_INLINE inline
+#else
+#define INSTR_PROF_INLINE
+#endif
+
+/* The value range buckets (22 buckets) for the memop size value profiling looks
+ * like:
+ *
+ *   [0, 0]
+ *   [1, 1]
+ *   [2, 2]
+ *   [3, 3]
+ *   [4, 4]
+ *   [5, 5]
+ *   [6, 6]
+ *   [7, 7]
+ *   [8, 8]
+ *   [9, 15]
+ *   [16, 16]
+ *   [17, 31]
+ *   [32, 32]
+ *   [33, 63]
+ *   [64, 64]
+ *   [65, 127]
+ *   [128, 128]
+ *   [129, 255]
+ *   [256, 256]
+ *   [257, 511]
+ *   [512, 512]
+ *   [513, UINT64_MAX]
+ *
+ * Each range has a 'representative value' which is the lower end value of the
+ * range and used to store in the runtime profile data records and the VP
+ * metadata. For example, it's 2 for [2, 2] and 64 for [65, 127].
+ */
+
+/*
+ * Clz and Popcount. This code was copied from
+ * compiler-rt/lib/fuzzer/{FuzzerBuiltins.h,FuzzerBuiltinsMsvc.h} and
+ * llvm/include/llvm/Support/MathExtras.h.
+ */
+#if defined(_MSC_VER) && !defined(__clang__)
+
+#include <intrin.h>
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfClzll(unsigned long long X) {
+  unsigned long LeadZeroIdx = 0;
+#if !defined(_M_ARM64) && !defined(_M_X64)
+  // Scan the high 32 bits.
+  if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X >> 32)))
+    return (int)(63 - (LeadZeroIdx + 32)); // Create a bit offset
+                                                      // from the MSB.
+  // Scan the low 32 bits.
+  if (_BitScanReverse(&LeadZeroIdx, (unsigned long)(X)))
+    return (int)(63 - LeadZeroIdx);
+#else
+  if (_BitScanReverse64(&LeadZeroIdx, X)) return 63 - LeadZeroIdx;
+#endif
+  return 64;
+}
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfPopcountll(unsigned long long X) {
+  // This code originates from https://reviews.llvm.org/rG30626254510f.
+  unsigned long long v = X;
+  v = v - ((v >> 1) & 0x5555555555555555ULL);
+  v = (v & 0x3333333333333333ULL) + ((v >> 2) & 0x3333333333333333ULL);
+  v = (v + (v >> 4)) & 0x0F0F0F0F0F0F0F0FULL;
+  return (int)((unsigned long long)(v * 0x0101010101010101ULL) >> 56);
+}
+
+#else
+
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfClzll(unsigned long long X) { return __builtin_clzll(X); }
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE
+int InstProfPopcountll(unsigned long long X) { return __builtin_popcountll(X); }
+
+#endif  /* defined(_MSC_VER) && !defined(__clang__) */
+
+/* Map an (observed) memop size value to the representative value of its range.
+ * For example, 5 -> 5, 22 -> 17, 99 -> 65, 256 -> 256, 1001 -> 513. */
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE uint64_t
+InstrProfGetRangeRepValue(uint64_t Value) {
+  if (Value <= 8)
+    // The first ranges are individually tracked. Use the value as is.
+    return Value;
+  else if (Value >= 513)
+    // The last range is mapped to its lowest value.
+    return 513;
+  else if (InstProfPopcountll(Value) == 1)
+    // If it's a power of two, use it as is.
+    return Value;
+  else
+    // Otherwise, take to the previous power of two + 1.
+    return (1 << (64 - InstProfClzll(Value) - 1)) + 1;
+}
+
+/* Return true if the range that an (observed) memop size value belongs to has
+ * only a single value in the range.  For example, 0 -> true, 8 -> true, 10 ->
+ * false, 64 -> true, 100 -> false, 513 -> false. */
+INSTR_PROF_VISIBILITY INSTR_PROF_INLINE unsigned
+InstrProfIsSingleValRange(uint64_t Value) {
+  if (Value <= 8)
+    // The first ranges are individually tracked.
+    return 1;
+  else if (InstProfPopcountll(Value) == 1)
+    // If it's a power of two, there's only one value.
+    return 1;
+  else
+    // Otherwise, there's more than one value in the range.
+    return 0;
+}
+
+#endif /* INSTR_PROF_VALUE_PROF_MEMOP_API */
diff --git a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
index 263d3b629589c..a7052f7b6a2b1 100644
--- a/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
+++ b/llvm/include/llvm/Transforms/Instrumentation/InstrProfiling.h
@@ -68,6 +68,8 @@ class InstrProfiling : public PassInfoMixin<InstrProfiling> {
   // vector of counter load/store pairs to be register promoted.
   std::vector<LoadStorePair> PromotionCandidates;
 
+  // FIXME: These are to be removed after switching to the new memop value
+  // profiling.
   // The start value of precise value profile range for memory intrinsic sizes.
   int64_t MemOPSizeRangeStart;
   // The end value of precise value profile range for memory intrinsic sizes.
diff --git a/llvm/lib/ProfileData/InstrProf.cpp b/llvm/lib/ProfileData/InstrProf.cpp
index b9d8ae9ba60d6..8879674c324d5 100644
--- a/llvm/lib/ProfileData/InstrProf.cpp
+++ b/llvm/lib/ProfileData/InstrProf.cpp
@@ -1111,6 +1111,8 @@ bool canRenameComdatFunc(const Function &F, bool CheckAddressTaken) {
   return true;
 }
 
+// FIXME: This is to be removed after switching to the new memop value
+// profiling.
 // Parse the value profile options.
 void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart,
                                  int64_t &RangeLast) {
diff --git a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 7b03bbfcdfe4b..0a3519502994b 100644
--- a/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -57,6 +57,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "instrprof"
 
+// FIXME: These are to be removed after switching to the new memop value
+// profiling.
 // The start and end values of precise value profile range for memory
 // intrinsic sizes
 cl::opt<std::string> MemOPSizeRange(
@@ -72,6 +74,12 @@ cl::opt<unsigned> MemOPSizeLarge(
              "Value of 0 disables the large value profiling."),
     cl::init(8192));
 
+cl::opt<bool> UseOldMemOpValueProf(
+    "use-old-memop-value-prof",
+    cl::desc("Use the old memop value profiling buckets. This is "
+             "transitional and to be removed after switching. "),
+    cl::init(true));
+
 namespace {
 
 cl::opt<bool> DoHashBasedCounterSplit(
@@ -395,6 +403,19 @@ class PGOCounterPromoter {
   BlockFrequencyInfo *BFI;
 };
 
+enum class ValueProfilingCallType {
+  // Individual values are tracked. Currently used for indiret call target
+  // profiling.
+  Default,
+
+  // The old memop size value profiling. FIXME: To be removed after switching to
+  // the new one.
+  OldMemOp,
+
+  // MemOp: the (new) memop size value profiling with extended buckets.
+  MemOp
+};
+
 } // end anonymous namespace
 
 PreservedAnalyses InstrProfiling::run(Module &M, ModuleAnalysisManager &AM) {
@@ -579,9 +600,9 @@ bool InstrProfiling::run(
   return true;
 }
 
-static FunctionCallee
-getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI,
-                              bool IsRange = false) {
+static FunctionCallee getOrInsertValueProfilingCall(
+    Module &M, const TargetLibraryInfo &TLI,
+    ValueProfilingCallType CallType = ValueProfilingCallType::Default) {
   LLVMContext &Ctx = M.getContext();
   auto *ReturnTy = Type::getVoidTy(M.getContext());
 
@@ -589,16 +610,22 @@ getOrInsertValueProfilingCall(Module &M, const TargetLibraryInfo &TLI,
   if (auto AK = TLI.getExtAttrForI32Param(false))
     AL = AL.addParamAttribute(M.getContext(), 2, AK);
 
-  if (!IsRange) {
+  if (CallType == ValueProfilingCallType::Default ||
+      CallType == ValueProfilingCallType::MemOp) {
     Type *ParamTypes[] = {
 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
 #include "llvm/ProfileData/InstrProfData.inc"
     };
     auto *ValueProfilingCallTy =
         FunctionType::get(ReturnTy, makeArrayRef(ParamTypes), false);
-    return M.getOrInsertFunction(getInstrProfValueProfFuncName(),
-                                 ValueProfilingCallTy, AL);
+    StringRef FuncName = CallType == ValueProfilingCallType::Default
+                             ? getInstrProfValueProfFuncName()
+                             : getInstrProfValueProfMemOpFuncName();
+    return M.getOrInsertFunction(FuncName, ValueProfilingCallTy, AL);
   } else {
+    // FIXME: This code is to be removed after switching to the new memop value
+    // profiling.
+    assert(CallType == ValueProfilingCallType::OldMemOp);
     Type *RangeParamTypes[] = {
 #define VALUE_RANGE_PROF 1
 #define VALUE_PROF_FUNC_PARAM(ParamType, ParamName, ParamLLVMType) ParamLLVMType
@@ -638,8 +665,8 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
     Index += It->second.NumValueSites[Kind];
 
   IRBuilder<> Builder(Ind);
-  bool IsRange = (Ind->getValueKind()->getZExtValue() ==
-                  llvm::InstrProfValueKind::IPVK_MemOPSize);
+  bool IsMemOpSize = (Ind->getValueKind()->getZExtValue() ==
+                      llvm::InstrProfValueKind::IPVK_MemOPSize);
   CallInst *Call = nullptr;
   auto *TLI = &GetTLI(*Ind->getFunction());
 
@@ -649,12 +676,19 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
   // WinEHPrepare pass.
   SmallVector<OperandBundleDef, 1> OpBundles;
   Ind->getOperandBundlesAsDefs(OpBundles);
-  if (!IsRange) {
+  if (!IsMemOpSize) {
     Value *Args[3] = {Ind->getTargetValue(),
                       Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
                       Builder.getInt32(Index)};
     Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI), Args,
                               OpBundles);
+  } else if (!UseOldMemOpValueProf) {
+    Value *Args[3] = {Ind->getTargetValue(),
+                      Builder.CreateBitCast(DataVar, Builder.getInt8PtrTy()),
+                      Builder.getInt32(Index)};
+    Call = Builder.CreateCall(
+        getOrInsertValueProfilingCall(*M, *TLI, ValueProfilingCallType::MemOp),
+        Args, OpBundles);
   } else {
     Value *Args[6] = {
         Ind->getTargetValue(),
@@ -663,7 +697,8 @@ void InstrProfiling::lowerValueProfileInst(InstrProfValueProfileInst *Ind) {
         Builder.getInt64(MemOPSizeRangeStart),
         Builder.getInt64(MemOPSizeRangeLast),
         Builder.getInt64(MemOPSizeLarge == 0 ? INT64_MIN : MemOPSizeLarge)};
-    Call = Builder.CreateCall(getOrInsertValueProfilingCall(*M, *TLI, true),
+    Call = Builder.CreateCall(getOrInsertValueProfilingCall(
+                                  *M, *TLI, ValueProfilingCallType::OldMemOp),
                               Args, OpBundles);
   }
   if (auto AK = TLI->getExtAttrForI32Param(false))
diff --git a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index 2b7b859891dcd..43a1434ae2d37 100644
--- a/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -38,6 +38,8 @@
 #include "llvm/Pass.h"
 #include "llvm/PassRegistry.h"
 #include "llvm/ProfileData/InstrProf.h"
+#define INSTR_PROF_VALUE_PROF_MEMOP_API
+#include "llvm/ProfileData/InstrProfData.inc"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
@@ -89,17 +91,25 @@ static cl::opt<bool>
                     cl::desc("Scale the memop size counts using the basic "
                              " block count value"));
 
+// FIXME: These are to be removed after switching to the new memop value
+// profiling.
 // This option sets the rangge of precise profile memop sizes.
 extern cl::opt<std::string> MemOPSizeRange;
 
 // This option sets the value that groups large memop sizes
 extern cl::opt<unsigned> MemOPSizeLarge;
 
+extern cl::opt<bool> UseOldMemOpValueProf;
+
 cl::opt<bool>
     MemOPOptMemcmpBcmp("pgo-memop-optimize-memcmp-bcmp", cl::init(true),
                        cl::Hidden,
                        cl::desc("Size-specialize memcmp and bcmp calls"));
 
+static cl::opt<unsigned>
+    MemOpMaxOptSize("memop-value-prof-max-opt-size", cl::Hidden, cl::init(128),
+                    cl::desc("Optimize the memop size <= this value"));
+
 namespace {
 class PGOMemOPSizeOptLegacyPass : public FunctionPass {
 public:
@@ -269,6 +279,8 @@ class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
   TargetLibraryInfo &TLI;
   bool Changed;
   std::vector<MemOp> WorkList;
+  // FIXME: These are to be removed after switching to the new memop value
+  // profiling.
   // Start of the previse range.
   int64_t PreciseRangeStart;
   // Last value of the previse range.
@@ -277,6 +289,8 @@ class MemOPSizeOpt : public InstVisitor<MemOPSizeOpt> {
   std::unique_ptr<InstrProfValueData[]> ValueDataArray;
   bool perform(MemOp MO);
 
+  // FIXME: This is to be removed after switching to the new memop value
+  // profiling.
   // This kind shows which group the value falls in. For PreciseValue, we have
   // the profile count for that value. LargeGroup groups the values that are in
   // range [LargeValue, +inf). NonLargeGroup groups the rest of values.
@@ -365,8 +379,11 @@ bool MemOPSizeOpt::perform(MemOp MO) {
     if (MemOPScaleCount)
       C = getScaledCount(C, ActualCount, SavedTotalCount);
 
-    // Only care precise value here.
-    if (getMemOPSizeKind(V) != PreciseValue)
+    if (UseOldMemOpValueProf) {
+      // Only care precise value here.
+      if (getMemOPSizeKind(V) != PreciseValue)
+        continue;
+    } else if (!InstrProfIsSingleValRange(V) || V > MemOpMaxOptSize)
       continue;
 
     // ValueCounts are sorted on the count. Break at the first un-profitable
diff --git a/llvm/test/Transforms/PGOProfile/memcpy.ll b/llvm/test/Transforms/PGOProfile/memcpy.ll
index 6047c95e7c084..e00e1d350871d 100644
--- a/llvm/test/Transforms/PGOProfile/memcpy.ll
+++ b/llvm/test/Transforms/PGOProfile/memcpy.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -pgo-instr-gen -instrprof -S | FileCheck %s
-; RUN: opt <%s -passes=pgo-instr-gen,instrprof -S | FileCheck %s
+; RUN: opt < %s -pgo-instr-gen -instrprof -use-old-memop-value-prof=true -S | FileCheck %s --check-prefix=OLDMEMOPVP
+; RUN: opt < %s -pgo-instr-gen -instrprof -use-old-memop-value-prof=false -S | FileCheck %s --check-prefix=NEWMEMOPVP
+; RUN: opt <%s -passes=pgo-instr-gen,instrprof -use-old-memop-value-prof=true -S | FileCheck %s --check-prefix=OLDMEMOPVP
+; RUN: opt <%s -passes=pgo-instr-gen,instrprof -use-old-memop-value-prof=false -S | FileCheck %s --check-prefix=NEWMEMOPVP
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
@@ -23,7 +25,8 @@ for.cond1:
 
 for.body3:
   %conv = sext i32 %add to i64
-; CHECK: call void @__llvm_profile_instrument_range(i64 %conv, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0, i64 0, i64 8, i64 8192)
+; OLDMEMOPVP: call void @__llvm_profile_instrument_range(i64 %conv, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0, i64 0, i64 8, i64 8192)
+; NEWMEMOPVP: call void @__llvm_profile_instrument_memop(i64 %conv, i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [2 x i16] }* @__profd_foo to i8*), i32 0)
   call void @llvm.memcpy.p0i8.p0i8.i64(i8* %dst, i8* %src, i64 %conv, i1 false)
   %inc = add nsw i32 %j.0, 1
   br label %for.cond1
diff --git a/llvm/test/Transforms/PGOProfile/memop_profile_funclet.ll b/llvm/test/Transforms/PGOProfile/memop_profile_funclet.ll
index b79431e3128e8..43c85ed25baae 100644
--- a/llvm/test/Transforms/PGOProfile/memop_profile_funclet.ll
+++ b/llvm/test/Transforms/PGOProfile/memop_profile_funclet.ll
@@ -1,8 +1,10 @@
 ; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
-; RUN: opt < %s -pgo-instr-gen -instrprof -S | FileCheck %s --check-prefix=LOWER
+; RUN: opt < %s -pgo-instr-gen -instrprof -use-old-memop-value-prof=true -S | FileCheck %s --check-prefixes=LOWER,LOWEROLDMEMOPVP
+; RUN: opt < %s -pgo-instr-gen -instrprof -use-old-memop-value-prof=false -S | FileCheck %s --check-prefixes=LOWER,LOWERNEWMEMOPVP
 
 ; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
-; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s --check-prefix=LOWER
+; RUN: opt < %s -passes=pgo-instr-gen,instrprof -use-old-memop-value-prof=true -S | FileCheck %s --check-prefixes=LOWER,LOWEROLDMEMOPVP
+; RUN: opt < %s -passes=pgo-instr-gen,instrprof -use-old-memop-value-prof=false -S | FileCheck %s --check-prefixes=LOWER,LOWERNEWMEMOPVP
 
 ; This test is to verify that PGO runtime library calls get created with the
 ; appropriate operand bundle funclet information when a memory intrinsic
@@ -63,7 +65,8 @@ try.cont:                                         ; preds = %entry
 ; GEN-SAME: [ "funclet"(token %tmp1) ]
 
 ; LOWER: catch:
-; LOWER: call void @__llvm_profile_instrument_range(
+; LOWEROLDMEMOPVP: call void @__llvm_profile_instrument_range(
+; LOWERNEWMEMOPVP: call void @__llvm_profile_instrument_memop(
 ; LOWER-SAME: [ "funclet"(token %tmp1) ]
 
 declare dso_local void @"?may_throw@@YAXH@Z"(i32)
diff --git a/llvm/unittests/ProfileData/CMakeLists.txt b/llvm/unittests/ProfileData/CMakeLists.txt
index 366ed5482bf2c..00a0079e675a8 100644
--- a/llvm/unittests/ProfileData/CMakeLists.txt
+++ b/llvm/unittests/ProfileData/CMakeLists.txt
@@ -7,6 +7,7 @@ set(LLVM_LINK_COMPONENTS
 
 add_llvm_unittest(ProfileDataTests
   CoverageMappingTest.cpp
+  InstrProfDataTest.cpp
   InstrProfTest.cpp
   SampleProfTest.cpp
   )
diff --git a/llvm/unittests/ProfileData/InstrProfDataTest.cpp b/llvm/unittests/ProfileData/InstrProfDataTest.cpp
new file mode 100644
index 0000000000000..af1a3de0657c2
--- /dev/null
+++ b/llvm/unittests/ProfileData/InstrProfDataTest.cpp
@@ -0,0 +1,68 @@
+//===- unittest/ProfileData/InstProfDataTest.cpp ----------------------------=//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "gtest/gtest.h"
+
+#include <cstdint>
+
+#define INSTR_PROF_VALUE_PROF_MEMOP_API
+#include "llvm/ProfileData/InstrProfData.inc"
+
+namespace {
+
+TEST(InstrProfDataTest, MapValueToRangeRepValue) {
+  EXPECT_EQ(0ULL, InstrProfGetRangeRepValue(0));
+  EXPECT_EQ(1ULL, InstrProfGetRangeRepValue(1));
+  EXPECT_EQ(2ULL, InstrProfGetRangeRepValue(2));
+  EXPECT_EQ(3ULL, InstrProfGetRangeRepValue(3));
+  EXPECT_EQ(4ULL, InstrProfGetRangeRepValue(4));
+  EXPECT_EQ(5ULL, InstrProfGetRangeRepValue(5));
+  EXPECT_EQ(6ULL, InstrProfGetRangeRepValue(6));
+  EXPECT_EQ(7ULL, InstrProfGetRangeRepValue(7));
+  EXPECT_EQ(8ULL, InstrProfGetRangeRepValue(8));
+  EXPECT_EQ(9ULL, InstrProfGetRangeRepValue(9));
+  EXPECT_EQ(16ULL, InstrProfGetRangeRepValue(16));
+  EXPECT_EQ(17ULL, InstrProfGetRangeRepValue(30));
+  EXPECT_EQ(32ULL, InstrProfGetRangeRepValue(32));
+  EXPECT_EQ(33ULL, InstrProfGetRangeRepValue(54));
+  EXPECT_EQ(64ULL, InstrProfGetRangeRepValue(64));
+  EXPECT_EQ(65ULL, InstrProfGetRangeRepValue(127));
+  EXPECT_EQ(128ULL, InstrProfGetRangeRepValue(128));
+  EXPECT_EQ(129ULL, InstrProfGetRangeRepValue(200));
+  EXPECT_EQ(256ULL, InstrProfGetRangeRepValue(256));
+  EXPECT_EQ(257ULL, InstrProfGetRangeRepValue(397));
+  EXPECT_EQ(512ULL, InstrProfGetRangeRepValue(512));
+  EXPECT_EQ(513ULL, InstrProfGetRangeRepValue(2832048023));
+}
+
+TEST(InstrProfDataTest, IsInOneValueRange) {
+  EXPECT_EQ(true, InstrProfIsSingleValRange(0));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(1));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(2));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(3));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(4));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(5));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(6));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(7));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(8));
+  EXPECT_EQ(false, InstrProfIsSingleValRange(9));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(16));
+  EXPECT_EQ(false, InstrProfIsSingleValRange(30));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(32));
+  EXPECT_EQ(false, InstrProfIsSingleValRange(54));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(64));
+  EXPECT_EQ(false, InstrProfIsSingleValRange(127));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(128));
+  EXPECT_EQ(false, InstrProfIsSingleValRange(200));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(256));
+  EXPECT_EQ(false, InstrProfIsSingleValRange(397));
+  EXPECT_EQ(true, InstrProfIsSingleValRange(512));
+  EXPECT_EQ(false, InstrProfIsSingleValRange(2832048023344));
+}
+
+} // end anonymous namespace

From be8c59606d68c680ddcf1d2556772b3a93266717 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Wed, 15 Jul 2020 17:53:38 +0000
Subject: [PATCH 400/771] [gn build] Port 4a539faf74b

---
 llvm/utils/gn/secondary/llvm/unittests/ProfileData/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/unittests/ProfileData/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ProfileData/BUILD.gn
index e933b510003cd..3818b0caad04b 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ProfileData/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ProfileData/BUILD.gn
@@ -9,6 +9,7 @@ unittest("ProfileDataTests") {
   ]
   sources = [
     "CoverageMappingTest.cpp",
+    "InstrProfDataTest.cpp",
     "InstrProfTest.cpp",
     "SampleProfTest.cpp",
   ]

From efc30e591bb5a6e869fd8e084bd310ae516b0fae Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 15 Jul 2020 13:32:59 -0400
Subject: [PATCH 401/771] [InstCombine] update datalayout in test file; NFC

We need to specify legal integer widths to trigger PR46712,
so add those here. This doesn't appear to affect any existing
tests, and it's not clear why a datalayout would not include
any legal integer widths.

While here, change some variable names that include 'tmp' to
avoid warnings from the auto-generating script for CHECK lines.
---
 llvm/test/Transforms/InstCombine/or.ll | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 8037af6814650..b747c5d978103 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -1,7 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n32:64"
 
 define i32 @test12(i32 %A) {
         ; Should be eliminated
@@ -107,17 +107,17 @@ define i32 @test20(i32 %x) {
   ret i32 %z
 }
 
-define i32 @test21(i32 %tmp.1) {
+define i32 @test21(i32 %t1) {
 ; CHECK-LABEL: @test21(
-; CHECK-NEXT:    [[TMP_1_MASK1:%.*]] = add i32 [[TMP_1:%.*]], 2
-; CHECK-NEXT:    ret i32 [[TMP_1_MASK1]]
+; CHECK-NEXT:    [[T1_MASK1:%.*]] = add i32 [[T1:%.*]], 2
+; CHECK-NEXT:    ret i32 [[T1_MASK1]]
 ;
-  %tmp.1.mask1 = add i32 %tmp.1, 2
-  %tmp.3 = and i32 %tmp.1.mask1, -2
-  %tmp.5 = and i32 %tmp.1, 1
+  %t1.mask1 = add i32 %t1, 2
+  %t3 = and i32 %t1.mask1, -2
+  %t5 = and i32 %t1, 1
   ;; add tmp.1, 2
-  %tmp.6 = or i32 %tmp.5, %tmp.3
-  ret i32 %tmp.6
+  %t6 = or i32 %t5, %t3
+  ret i32 %t6
 }
 
 define i32 @test22(i32 %B) {

From d8b268680d0858aaf30cb1a278b64b11361bc780 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Wed, 15 Jul 2020 14:09:46 -0400
Subject: [PATCH 402/771] [InstCombine] prevent infinite looping in or-icmp
 fold (PR46712)

I'm not sure if the test is truly minimal, but we need to
induce a situation where a value becomes a constant but is
not immediately folded before getting to the 'or' transform.
---
 .../InstCombine/InstCombineAndOrXor.cpp       |  3 +-
 llvm/test/Transforms/InstCombine/or.ll        | 35 +++++++++++++++++++
 2 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index d3c718a919c0a..1304d46fdef4f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1148,11 +1148,12 @@ static Value *foldAndOrOfICmpsWithConstEq(ICmpInst *Cmp0, ICmpInst *Cmp1,
   assert((IsAnd || Logic.getOpcode() == Instruction::Or) && "Wrong logic op");
 
   // Match an equality compare with a non-poison constant as Cmp0.
+  // Also, give up if the compare can be constant-folded to avoid looping.
   ICmpInst::Predicate Pred0;
   Value *X;
   Constant *C;
   if (!match(Cmp0, m_ICmp(Pred0, m_Value(X), m_Constant(C))) ||
-      !isGuaranteedNotToBeUndefOrPoison(C))
+      !isGuaranteedNotToBeUndefOrPoison(C) || isa<Constant>(X))
     return nullptr;
   if ((IsAnd && Pred0 != ICmpInst::ICMP_EQ) ||
       (!IsAnd && Pred0 != ICmpInst::ICMP_NE))
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index b747c5d978103..48496948c1903 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -841,3 +841,38 @@ define <16 x i1> @test51(<16 x i1> %arg, <16 x i1> %arg1) {
   %tmp3 = or <16 x i1> %tmp, %tmp2
   ret <16 x i1> %tmp3
 }
+
+; This would infinite loop because it reaches a transform
+; that was not expecting a constant-foldable value.
+
+define i32 @PR46712(i1 %x, i1 %y, i1 %b, i64 %z) {
+; CHECK-LABEL: @PR46712(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 [[B:%.*]], label [[TRUE:%.*]], label [[END:%.*]]
+; CHECK:       true:
+; CHECK-NEXT:    [[BOOL5:%.*]] = icmp eq i64 [[Z:%.*]], 0
+; CHECK-NEXT:    [[SEL:%.*]] = zext i1 [[BOOL5]] to i32
+; CHECK-NEXT:    br label [[END]]
+; CHECK:       end:
+; CHECK-NEXT:    [[T5:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SEL]], [[TRUE]] ]
+; CHECK-NEXT:    ret i32 [[T5]]
+;
+entry:
+  %t2 = or i1 %x, %y
+  %conv = sext i1 %t2 to i32
+  %cmp = icmp sge i32 %conv, 1
+  %conv2 = zext i1 %cmp to i64
+  br i1 %b, label %true, label %end
+
+true:
+  %bool4 = icmp eq i64 %conv2, 0
+  %bool5 = icmp ne i64 %z, 0
+  %and = and i1 %bool4, %bool5
+  %sel = select i1 %and, i1 false, i1 true
+  br label %end
+
+end:
+  %t5 = phi i1 [ 0, %entry ], [ %sel, %true ]
+  %conv8 = zext i1 %t5 to i32
+  ret i32 %conv8
+}

From a3ad8f92b44d79487a34b1151251b413ef769070 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Wed, 15 Jul 2020 11:12:38 -0700
Subject: [PATCH 403/771] [MLIR] Add type checking capability to
 RegionBranchOpInterface

- Add function `verifyTypes` that Op's can call to do type checking verification
  along the control flow edges described the Op's RegionBranchOpInterface.
- We cannot rely on the verify methods on the OpInterface because the interface
  functions assume valid Ops, so they may crash if invoked on unverified Ops.
  (For example, scf.for getSuccessorRegions() calls getRegionIterArgs(), which
  dereferences getBody() block. If the scf.for is invalid with no body, this
  can lead to a segfault). `verifyTypes` can be called post op-verification to
  avoid this.

Differential Revision: https://reviews.llvm.org/D82829
---
 mlir/include/mlir/Dialect/SCF/SCFOps.td       |   6 +-
 .../mlir/Interfaces/ControlFlowInterfaces.h   |  19 ++-
 .../mlir/Interfaces/ControlFlowInterfaces.td  |  19 ++-
 mlir/lib/Dialect/SCF/SCF.cpp                  |  36 ++---
 mlir/lib/Interfaces/ControlFlowInterfaces.cpp | 146 +++++++++++++++++-
 mlir/test/Dialect/SCF/invalid.mlir            |  31 +++-
 6 files changed, 216 insertions(+), 41 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SCF/SCFOps.td b/mlir/include/mlir/Dialect/SCF/SCFOps.td
index c859cb794e850..78aefec00bf76 100644
--- a/mlir/include/mlir/Dialect/SCF/SCFOps.td
+++ b/mlir/include/mlir/Dialect/SCF/SCFOps.td
@@ -418,7 +418,8 @@ def ReduceReturnOp :
   let assemblyFormat = "$result attr-dict `:` type($result)";
 }
 
-def YieldOp : SCF_Op<"yield", [NoSideEffect, ReturnLike, Terminator]> {
+def YieldOp : SCF_Op<"yield", [NoSideEffect, ReturnLike, Terminator,
+                               ParentOneOf<["IfOp, ForOp", "ParallelOp"]>]> {
   let summary = "loop yield and termination operation";
   let description = [{
     "scf.yield" yields an SSA value from the SCF dialect op region and
@@ -437,5 +438,8 @@ def YieldOp : SCF_Op<"yield", [NoSideEffect, ReturnLike, Terminator]> {
     OpBuilder<"OpBuilder &builder, OperationState &result",
               [{ /* nothing to do */ }]>
   ];
+  // Override default verifier (defined in SCF_Op), no custom verification
+  // needed.
+  let verifier = ?;
 }
 #endif // MLIR_DIALECT_SCF_SCFOPS
diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
index 7e609ca13a097..725e13b8b9d2c 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.h
@@ -18,6 +18,7 @@
 
 namespace mlir {
 class BranchOpInterface;
+class RegionBranchOpInterface;
 
 //===----------------------------------------------------------------------===//
 // BranchOpInterface
@@ -40,12 +41,21 @@ LogicalResult verifyBranchSuccessorOperands(Operation *op, unsigned succNo,
 // RegionBranchOpInterface
 //===----------------------------------------------------------------------===//
 
+namespace detail {
+/// Verify that types match along control flow edges described the given op.
+LogicalResult verifyTypesAlongControlFlowEdges(Operation *op);
+} //  namespace detail
+
 /// This class represents a successor of a region. A region successor can either
 /// be another region, or the parent operation. If the successor is a region,
-/// this class accepts the destination region, as well as a set of arguments
+/// this class represents the destination region, as well as a set of arguments
 /// from that region that will be populated by values from the current region.
-/// If the successor is the parent operation, this class accepts an optional set
-/// of results that will be populated by values from the current region.
+/// If the successor is the parent operation, this class represents an optional
+/// set of results that will be populated by values from the current region.
+///
+/// This interface assumes that the values from the current region that are used
+/// to populate the successor inputs are the operands of the return-like
+/// terminator operations in the blocks within this region.
 class RegionSuccessor {
 public:
   /// Initialize a successor that branches to another region of the parent
@@ -61,6 +71,9 @@ class RegionSuccessor {
   /// parent operation.
   Region *getSuccessor() const { return region; }
 
+  /// Return true if the successor is the parent operation.
+  bool isParent() const { return region == nullptr; }
+
   /// Return the inputs to the successor that are remapped by the exit values of
   /// the current region.
   ValueRange getSuccessorInputs() const { return inputs; }
diff --git a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
index 8b5a0b769ab17..2cd2b9cb8b432 100644
--- a/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
+++ b/mlir/include/mlir/Interfaces/ControlFlowInterfaces.td
@@ -103,9 +103,9 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> {
   let methods = [
     InterfaceMethod<[{
         Returns the operands of this operation used as the entry arguments when
-        entering the region at `index`, which was specified as a successor by
-        `getSuccessorRegions`. These operands should correspond 1-1 with the
-        successor inputs specified in `getSuccessorRegions`, and may corre
+        entering the region at `index`, which was specified as a successor of this
+        operation by `getSuccessorRegions`. These operands should correspond 1-1
+        with the successor inputs specified in `getSuccessorRegions`.
       }],
       "OperandRange", "getSuccessorEntryOperands",
       (ins "unsigned":$index), [{}], /*defaultImplementation=*/[{
@@ -132,6 +132,19 @@ def RegionBranchOpInterface : OpInterface<"RegionBranchOpInterface"> {
            "SmallVectorImpl<RegionSuccessor> &":$regions)
     >
   ];
+
+  let verify = [{
+    static_assert(!ConcreteOpType::template hasTrait<OpTrait::ZeroRegion>(),
+                  "expected operation to have non-zero regions");
+    return success();
+  }];
+
+  let extraClassDeclaration = [{
+    /// Verify types along control flow edges described by this interface.
+    static LogicalResult verifyTypes(Operation *op) {
+      return detail::verifyTypesAlongControlFlowEdges(op);
+    }
+  }];
 }
 
 //===----------------------------------------------------------------------===//
diff --git a/mlir/lib/Dialect/SCF/SCF.cpp b/mlir/lib/Dialect/SCF/SCF.cpp
index 67a3ae34c1d93..d0958e54269fc 100644
--- a/mlir/lib/Dialect/SCF/SCF.cpp
+++ b/mlir/lib/Dialect/SCF/SCF.cpp
@@ -137,7 +137,8 @@ static LogicalResult verify(ForOp op) {
 
     i++;
   }
-  return success();
+
+  return RegionBranchOpInterface::verifyTypes(op);
 }
 
 static void print(OpAsmPrinter &p, ForOp op) {
@@ -413,7 +414,7 @@ static LogicalResult verify(IfOp op) {
   if (op.getNumResults() != 0 && op.elseRegion().empty())
     return op.emitOpError("must have an else block if defining values");
 
-  return success();
+  return RegionBranchOpInterface::verifyTypes(op);
 }
 
 static ParseResult parseIfOp(OpAsmParser &parser, OperationState &result) {
@@ -592,6 +593,12 @@ static LogicalResult verify(ParallelOp op) {
       return op.emitOpError(
           "expects arguments for the induction variable to be of index type");
 
+  // Check that the yield has no results
+  Operation *yield = body->getTerminator();
+  if (yield->getNumOperands() != 0)
+    return yield->emitOpError() << "not allowed to have operands inside '"
+                                << ParallelOp::getOperationName() << "'";
+
   // Check that the number of results is the same as the number of ReduceOps.
   SmallVector<ReduceOp, 4> reductions(body->getOps<ReduceOp>());
   auto resultsSize = op.results().size();
@@ -869,31 +876,6 @@ static LogicalResult verify(ReduceReturnOp op) {
 //===----------------------------------------------------------------------===//
 // YieldOp
 //===----------------------------------------------------------------------===//
-static LogicalResult verify(YieldOp op) {
-  auto parentOp = op.getParentOp();
-  auto results = parentOp->getResults();
-  auto operands = op.getOperands();
-
-  if (isa<IfOp, ForOp>(parentOp)) {
-    if (parentOp->getNumResults() != op.getNumOperands())
-      return op.emitOpError() << "parent of yield must have same number of "
-                                 "results as the yield operands";
-    for (auto e : llvm::zip(results, operands)) {
-      if (std::get<0>(e).getType() != std::get<1>(e).getType())
-        return op.emitOpError()
-               << "types mismatch between yield op and its parent";
-    }
-  } else if (isa<ParallelOp>(parentOp)) {
-    if (op.getNumOperands() != 0)
-      return op.emitOpError()
-             << "yield inside scf.parallel is not allowed to have operands";
-  } else {
-    return op.emitOpError()
-           << "yield only terminates If, For or Parallel regions";
-  }
-
-  return success();
-}
 
 static ParseResult parseYieldOp(OpAsmParser &parser, OperationState &result) {
   SmallVector<OpAsmParser::OperandType, 4> operands;
diff --git a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp
index c1fa833f26daf..fc79c820165d4 100644
--- a/mlir/lib/Interfaces/ControlFlowInterfaces.cpp
+++ b/mlir/lib/Interfaces/ControlFlowInterfaces.cpp
@@ -8,6 +8,7 @@
 
 #include "mlir/Interfaces/ControlFlowInterfaces.h"
 #include "mlir/IR/StandardTypes.h"
+#include "llvm/ADT/SmallPtrSet.h"
 
 using namespace mlir;
 
@@ -24,8 +25,9 @@ using namespace mlir;
 /// Returns the `BlockArgument` corresponding to operand `operandIndex` in some
 /// successor if 'operandIndex' is within the range of 'operands', or None if
 /// `operandIndex` isn't a successor operand index.
-Optional<BlockArgument> mlir::detail::getBranchSuccessorArgument(
-    Optional<OperandRange> operands, unsigned operandIndex, Block *successor) {
+Optional<BlockArgument>
+detail::getBranchSuccessorArgument(Optional<OperandRange> operands,
+                                   unsigned operandIndex, Block *successor) {
   // Check that the operands are valid.
   if (!operands || operands->empty())
     return llvm::None;
@@ -43,8 +45,8 @@ Optional<BlockArgument> mlir::detail::getBranchSuccessorArgument(
 
 /// Verify that the given operands match those of the given successor block.
 LogicalResult
-mlir::detail::verifyBranchSuccessorOperands(Operation *op, unsigned succNo,
-                                            Optional<OperandRange> operands) {
+detail::verifyBranchSuccessorOperands(Operation *op, unsigned succNo,
+                                      Optional<OperandRange> operands) {
   if (!operands)
     return success();
 
@@ -66,3 +68,139 @@ mlir::detail::verifyBranchSuccessorOperands(Operation *op, unsigned succNo,
   }
   return success();
 }
+
+//===----------------------------------------------------------------------===//
+// RegionBranchOpInterface
+//===----------------------------------------------------------------------===//
+
+/// Verify that types match along all region control flow edges originating from
+/// `sourceNo` (region # if source is a region, llvm::None if source is parent
+/// op). `getInputsTypesForRegion` is a function that returns the types of the
+/// inputs that flow from `sourceIndex' to the given region.
+static LogicalResult verifyTypesAlongAllEdges(
+    Operation *op, Optional<unsigned> sourceNo,
+    function_ref<TypeRange(Optional<unsigned>)> getInputsTypesForRegion) {
+  auto regionInterface = cast<RegionBranchOpInterface>(op);
+
+  SmallVector<RegionSuccessor, 2> successors;
+  unsigned numInputs;
+  if (sourceNo) {
+    Region &srcRegion = op->getRegion(sourceNo.getValue());
+    numInputs = srcRegion.getNumArguments();
+  } else {
+    numInputs = op->getNumOperands();
+  }
+  SmallVector<Attribute, 2> operands(numInputs, nullptr);
+  regionInterface.getSuccessorRegions(sourceNo, operands, successors);
+
+  for (RegionSuccessor &succ : successors) {
+    Optional<unsigned> succRegionNo;
+    if (!succ.isParent())
+      succRegionNo = succ.getSuccessor()->getRegionNumber();
+
+    auto printEdgeName = [&](InFlightDiagnostic &diag) -> InFlightDiagnostic & {
+      diag << "from ";
+      if (sourceNo)
+        diag << "Region #" << sourceNo.getValue();
+      else
+        diag << op->getName();
+
+      diag << " to ";
+      if (succRegionNo)
+        diag << "Region #" << succRegionNo.getValue();
+      else
+        diag << op->getName();
+      return diag;
+    };
+
+    TypeRange sourceTypes = getInputsTypesForRegion(succRegionNo);
+    TypeRange succInputsTypes = succ.getSuccessorInputs().getTypes();
+    if (sourceTypes.size() != succInputsTypes.size()) {
+      InFlightDiagnostic diag = op->emitOpError(" region control flow edge ");
+      return printEdgeName(diag)
+             << " has " << sourceTypes.size()
+             << " source operands, but target successor needs "
+             << succInputsTypes.size();
+    }
+
+    for (auto typesIdx :
+         llvm::enumerate(llvm::zip(sourceTypes, succInputsTypes))) {
+      Type sourceType = std::get<0>(typesIdx.value());
+      Type inputType = std::get<1>(typesIdx.value());
+      if (sourceType != inputType) {
+        InFlightDiagnostic diag = op->emitOpError(" along control flow edge ");
+        return printEdgeName(diag)
+               << " source #" << typesIdx.index() << " type " << sourceType
+               << " should match input #" << typesIdx.index() << " type "
+               << inputType;
+      }
+    }
+  }
+  return success();
+}
+
+/// Verify that types match along control flow edges described the given op.
+LogicalResult detail::verifyTypesAlongControlFlowEdges(Operation *op) {
+  auto regionInterface = cast<RegionBranchOpInterface>(op);
+
+  auto inputTypesFromParent = [&](Optional<unsigned> regionNo) -> TypeRange {
+    if (regionNo.hasValue()) {
+      return regionInterface.getSuccessorEntryOperands(regionNo.getValue())
+          .getTypes();
+    }
+
+    // If the successor of a parent op is the parent itself
+    // RegionBranchOpInterface does not have an API to query what the entry
+    // operands will be in that case. Vend out the result types of the op in
+    // that case so that type checking succeeds for this case.
+    return op->getResultTypes();
+  };
+
+  // Verify types along control flow edges originating from the parent.
+  if (failed(verifyTypesAlongAllEdges(op, llvm::None, inputTypesFromParent)))
+    return failure();
+
+  // RegionBranchOpInterface should not be implemented by Ops that do not have
+  // attached regions.
+  assert(op->getNumRegions() != 0);
+
+  // Verify types along control flow edges originating from each region.
+  for (unsigned regionNo : llvm::seq(0U, op->getNumRegions())) {
+    Region &region = op->getRegion(regionNo);
+
+    // Since the interface cannnot distinguish between different ReturnLike
+    // ops within the region branching to different successors, all ReturnLike
+    // ops in this region should have the same operand types. We will then use
+    // one of them as the representative for type matching.
+
+    Operation *regionReturn = nullptr;
+    for (Block &block : region) {
+      Operation *terminator = block.getTerminator();
+      if (!terminator->hasTrait<OpTrait::ReturnLike>())
+        continue;
+
+      if (!regionReturn) {
+        regionReturn = terminator;
+        continue;
+      }
+
+      // Found more than one ReturnLike terminator. Make sure the operand types
+      // match with the first one.
+      if (regionReturn->getOperandTypes() != terminator->getOperandTypes())
+        return op->emitOpError("Region #")
+               << regionNo
+               << " operands mismatch between return-like terminators";
+    }
+
+    auto inputTypesFromRegion = [&](Optional<unsigned> regionNo) -> TypeRange {
+      // All successors get the same set of operands.
+      return regionReturn ? TypeRange(regionReturn->getOperands().getTypes())
+                          : TypeRange();
+    };
+
+    if (failed(verifyTypesAlongAllEdges(op, regionNo, inputTypesFromRegion)))
+      return failure();
+  }
+
+  return success();
+}
diff --git a/mlir/test/Dialect/SCF/invalid.mlir b/mlir/test/Dialect/SCF/invalid.mlir
index 37e760495eb16..517e8855c97b8 100644
--- a/mlir/test/Dialect/SCF/invalid.mlir
+++ b/mlir/test/Dialect/SCF/invalid.mlir
@@ -325,13 +325,13 @@ func @reduceReturn_not_inside_reduce(%arg0 : f32) {
 
 func @std_if_incorrect_yield(%arg0: i1, %arg1: f32)
 {
+  // expected-error@+1 {{region control flow edge from Region #0 to scf.if has 1 source operands, but target successor needs 2}}
   %x, %y = scf.if %arg0 -> (f32, f32) {
     %0 = addf %arg1, %arg1 : f32
-    // expected-error@+1 {{parent of yield must have same number of results as the yield operands}}
     scf.yield %0 : f32
   } else {
     %0 = subf %arg1, %arg1 : f32
-    scf.yield %0 : f32
+    scf.yield %0, %0 : f32, f32
   }
   return
 }
@@ -396,14 +396,39 @@ func @std_for_operands_mismatch_3(%arg0 : index, %arg1 : index, %arg2 : index) {
   return
 }
 
+// -----
+
+func @std_for_operands_mismatch_4(%arg0 : index, %arg1 : index, %arg2 : index) {
+  %s0 = constant 0.0 : f32
+  %t0 = constant 1.0 : f32
+  // expected-error @+1 {{along control flow edge from Region #0 to Region #0 source #1 type 'i32' should match input #1 type 'f32'}}
+  %result1:2 = scf.for %i0 = %arg0 to %arg1 step %arg2
+                    iter_args(%si = %s0, %ti = %t0) -> (f32, f32) {
+    %sn = addf %si, %si : f32
+    %ic = constant 1 : i32
+    scf.yield %sn, %ic : f32, i32
+  }
+  return
+}
+
+
 // -----
 
 func @parallel_invalid_yield(
     %arg0: index, %arg1: index, %arg2: index) {
   scf.parallel (%i0) = (%arg0) to (%arg1) step (%arg2) {
     %c0 = constant 1.0 : f32
-    // expected-error@+1 {{yield inside scf.parallel is not allowed to have operands}}
+    // expected-error@+1 {{'scf.yield' op not allowed to have operands inside 'scf.parallel'}}
     scf.yield %c0 : f32
   }
   return
 }
+
+// -----
+func @yield_invalid_parent_op() {
+  "my.op"() ({
+   // expected-error@+1 {{'scf.yield' op expects parent op to be one of 'scf.if, scf.for, scf.parallel'}}
+   scf.yield
+  }) : () -> ()
+  return
+}

From 268025e2636c023fc39eed80cc4589f7ce9db786 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Wed, 15 Jul 2020 11:33:07 -0700
Subject: [PATCH 404/771] Fix "unused variable" warning from recent GCC.

---
 clang/lib/AST/ExprConstant.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/AST/ExprConstant.cpp b/clang/lib/AST/ExprConstant.cpp
index d20c2382b6ac1..41a4ae4b91c83 100644
--- a/clang/lib/AST/ExprConstant.cpp
+++ b/clang/lib/AST/ExprConstant.cpp
@@ -9930,8 +9930,7 @@ namespace {
       const ConstantArrayType *CAT =
           Info.Ctx.getAsConstantArrayType(E->getType());
       if (!CAT) {
-        if (const IncompleteArrayType *IAT =
-                Info.Ctx.getAsIncompleteArrayType(E->getType())) {
+        if (E->getType()->isIncompleteArrayType()) {
           // We can be asked to zero-initialize a flexible array member; this
           // is represented as an ImplicitValueInitExpr of incomplete array
           // type. In this case, the array has zero elements.

From 44b43a52dc17135e43824e826862c8b35081cac0 Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Wed, 15 Jul 2020 11:33:15 -0700
Subject: [PATCH 405/771] [lldb][NFC] Add 'override' where missing in source/
 and tools/

These were found by Clang's new -Wsuggest-override.

This patch doesn't touch any code in unittests/, since much of it intentionally doesn't use override to avoid massive warning spam from -Winconsistent-missing-override due to the use of MOCK_*** macros.

Differential Revision: https://reviews.llvm.org/D83847
---
 .../Process/MacOSX-Kernel/ProcessKDP.cpp      |  4 +-
 .../MacOSX-Kernel/RegisterContextKDP_arm.h    | 16 ++---
 .../MacOSX-Kernel/RegisterContextKDP_arm64.h  | 16 ++---
 .../MacOSX-Kernel/RegisterContextKDP_i386.h   | 12 ++--
 .../MacOSX-Kernel/RegisterContextKDP_x86_64.h | 12 ++--
 .../Plugins/Process/MacOSX-Kernel/ThreadKDP.h | 16 ++---
 .../Process/Utility/RegisterContextMach_arm.h | 16 ++---
 .../Utility/RegisterContextMach_i386.h        | 12 ++--
 .../Utility/RegisterContextMach_x86_64.h      | 12 ++--
 .../SymbolVendor/MacOSX/SymbolVendorMacOSX.h  |  4 +-
 lldb/source/Symbol/FuncUnwinders.cpp          |  4 +-
 .../source/MacOSX/i386/DNBArchImplI386.h      | 70 +++++++++---------
 .../source/MacOSX/x86_64/DNBArchImplX86_64.h  | 72 +++++++++----------
 13 files changed, 133 insertions(+), 133 deletions(-)

diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp
index 2f4a8917a78a7..dde25184a8c59 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ProcessKDP.cpp
@@ -873,7 +873,7 @@ class CommandObjectProcessKDPPacketSend : public CommandObjectParsed {
   OptionGroupUInt64 m_command_byte;
   OptionGroupString m_packet_data;
 
-  virtual Options *GetOptions() { return &m_option_group; }
+  Options *GetOptions() override { return &m_option_group; }
 
 public:
   CommandObjectProcessKDPPacketSend(CommandInterpreter &interpreter)
@@ -900,7 +900,7 @@ class CommandObjectProcessKDPPacketSend : public CommandObjectParsed {
 
   ~CommandObjectProcessKDPPacketSend() {}
 
-  bool DoExecute(Args &command, CommandReturnObject &result) {
+  bool DoExecute(Args &command, CommandReturnObject &result) override {
     const size_t argc = command.GetArgumentCount();
     if (argc == 0) {
       if (!m_command_byte.GetOptionValue().OptionWasSet()) {
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm.h b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm.h
index 616aff8afda7e..35ae0d03e2bbd 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm.h
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm.h
@@ -20,21 +20,21 @@ class RegisterContextKDP_arm : public RegisterContextDarwin_arm {
   virtual ~RegisterContextKDP_arm();
 
 protected:
-  virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr);
+  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override;
 
-  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu);
+  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override;
 
-  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc);
+  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override;
 
-  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg);
+  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override;
 
-  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr);
+  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override;
 
-  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu);
+  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override;
 
-  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc);
+  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override;
 
-  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg);
+  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override;
 
   ThreadKDP &m_kdp_thread;
 };
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm64.h b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm64.h
index 998a78a6b8af7..be387d69c6bcb 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm64.h
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_arm64.h
@@ -21,21 +21,21 @@ class RegisterContextKDP_arm64 : public RegisterContextDarwin_arm64 {
   virtual ~RegisterContextKDP_arm64();
 
 protected:
-  virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr);
+  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override;
 
-  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu);
+  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override;
 
-  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc);
+  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override;
 
-  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg);
+  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override;
 
-  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr);
+  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override;
 
-  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu);
+  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override;
 
-  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc);
+  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override;
 
-  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg);
+  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override;
 
   ThreadKDP &m_kdp_thread;
 };
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_i386.h b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_i386.h
index f32c88e6cfc5b..9ee6af7cc573d 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_i386.h
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_i386.h
@@ -20,17 +20,17 @@ class RegisterContextKDP_i386 : public RegisterContextDarwin_i386 {
   virtual ~RegisterContextKDP_i386();
 
 protected:
-  virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr);
+  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override;
 
-  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu);
+  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override;
 
-  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc);
+  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override;
 
-  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr);
+  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override;
 
-  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu);
+  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override;
 
-  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc);
+  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override;
 
   ThreadKDP &m_kdp_thread;
 };
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_x86_64.h b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_x86_64.h
index c4aad972ab56d..3d5139d0b613d 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_x86_64.h
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/RegisterContextKDP_x86_64.h
@@ -20,17 +20,17 @@ class RegisterContextKDP_x86_64 : public RegisterContextDarwin_x86_64 {
   virtual ~RegisterContextKDP_x86_64();
 
 protected:
-  virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr);
+  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override;
 
-  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu);
+  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override;
 
-  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc);
+  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override;
 
-  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr);
+  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override;
 
-  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu);
+  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override;
 
-  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc);
+  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override;
 
   ThreadKDP &m_kdp_thread;
 };
diff --git a/lldb/source/Plugins/Process/MacOSX-Kernel/ThreadKDP.h b/lldb/source/Plugins/Process/MacOSX-Kernel/ThreadKDP.h
index c75540a77302f..7f13fcbeb4a55 100644
--- a/lldb/source/Plugins/Process/MacOSX-Kernel/ThreadKDP.h
+++ b/lldb/source/Plugins/Process/MacOSX-Kernel/ThreadKDP.h
@@ -22,16 +22,16 @@ class ThreadKDP : public lldb_private::Thread {
 
   virtual ~ThreadKDP();
 
-  virtual void RefreshStateAfterStop();
+  void RefreshStateAfterStop() override;
 
-  virtual const char *GetName();
+  const char *GetName() override;
 
-  virtual const char *GetQueueName();
+  const char *GetQueueName() override;
 
-  virtual lldb::RegisterContextSP GetRegisterContext();
+  lldb::RegisterContextSP GetRegisterContext() override;
 
-  virtual lldb::RegisterContextSP
-  CreateRegisterContextForFrame(lldb_private::StackFrame *frame);
+  lldb::RegisterContextSP
+  CreateRegisterContextForFrame(lldb_private::StackFrame *frame) override;
 
   void Dump(lldb_private::Log *log, uint32_t index);
 
@@ -41,7 +41,7 @@ class ThreadKDP : public lldb_private::Thread {
 
   const char *GetBasicInfoAsString();
 
-  void SetName(const char *name) {
+  void SetName(const char *name) override {
     if (name && name[0])
       m_thread_name.assign(name);
     else
@@ -66,7 +66,7 @@ class ThreadKDP : public lldb_private::Thread {
   lldb::addr_t m_thread_dispatch_qaddr;
   lldb::StopInfoSP m_cached_stop_info_sp;
   // Protected member functions.
-  virtual bool CalculateStopInfo();
+  bool CalculateStopInfo() override;
 };
 
 #endif // LLDB_SOURCE_PLUGINS_PROCESS_MACOSX_KERNEL_THREADKDP_H
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextMach_arm.h b/lldb/source/Plugins/Process/Utility/RegisterContextMach_arm.h
index e7c180dbdd27e..1ceca65c97c37 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextMach_arm.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextMach_arm.h
@@ -19,21 +19,21 @@ class RegisterContextMach_arm : public RegisterContextDarwin_arm {
   virtual ~RegisterContextMach_arm();
 
 protected:
-  virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr);
+  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override;
 
-  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu);
+  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override;
 
-  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc);
+  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override;
 
-  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg);
+  int DoReadDBG(lldb::tid_t tid, int flavor, DBG &dbg) override;
 
-  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr);
+  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override;
 
-  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu);
+  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override;
 
-  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc);
+  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override;
 
-  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg);
+  int DoWriteDBG(lldb::tid_t tid, int flavor, const DBG &dbg) override;
 };
 
 #endif // LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTMACH_ARM_H
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextMach_i386.h b/lldb/source/Plugins/Process/Utility/RegisterContextMach_i386.h
index 09966be60c921..da5411eb2de24 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextMach_i386.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextMach_i386.h
@@ -19,17 +19,17 @@ class RegisterContextMach_i386 : public RegisterContextDarwin_i386 {
   virtual ~RegisterContextMach_i386();
 
 protected:
-  virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr);
+  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override;
 
-  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu);
+  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override;
 
-  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc);
+  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override;
 
-  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr);
+  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override;
 
-  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu);
+  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override;
 
-  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc);
+  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override;
 };
 
 #endif // LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTMACH_I386_H
diff --git a/lldb/source/Plugins/Process/Utility/RegisterContextMach_x86_64.h b/lldb/source/Plugins/Process/Utility/RegisterContextMach_x86_64.h
index 2a8a2cca2f8a8..c131c8282bd22 100644
--- a/lldb/source/Plugins/Process/Utility/RegisterContextMach_x86_64.h
+++ b/lldb/source/Plugins/Process/Utility/RegisterContextMach_x86_64.h
@@ -20,17 +20,17 @@ class RegisterContextMach_x86_64 : public RegisterContextDarwin_x86_64 {
   virtual ~RegisterContextMach_x86_64();
 
 protected:
-  virtual int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr);
+  int DoReadGPR(lldb::tid_t tid, int flavor, GPR &gpr) override;
 
-  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu);
+  int DoReadFPU(lldb::tid_t tid, int flavor, FPU &fpu) override;
 
-  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc);
+  int DoReadEXC(lldb::tid_t tid, int flavor, EXC &exc) override;
 
-  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr);
+  int DoWriteGPR(lldb::tid_t tid, int flavor, const GPR &gpr) override;
 
-  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu);
+  int DoWriteFPU(lldb::tid_t tid, int flavor, const FPU &fpu) override;
 
-  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc);
+  int DoWriteEXC(lldb::tid_t tid, int flavor, const EXC &exc) override;
 };
 
 #endif // LLDB_SOURCE_PLUGINS_PROCESS_UTILITY_REGISTERCONTEXTMACH_X86_64_H
diff --git a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.h b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.h
index 7d2de4ac29ac2..b15ea74d07dbb 100644
--- a/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.h
+++ b/lldb/source/Plugins/SymbolVendor/MacOSX/SymbolVendorMacOSX.h
@@ -33,9 +33,9 @@ class SymbolVendorMacOSX : public lldb_private::SymbolVendor {
   virtual ~SymbolVendorMacOSX();
 
   // PluginInterface protocol
-  virtual lldb_private::ConstString GetPluginName();
+  lldb_private::ConstString GetPluginName() override;
 
-  virtual uint32_t GetPluginVersion();
+  uint32_t GetPluginVersion() override;
 
 private:
   SymbolVendorMacOSX(const SymbolVendorMacOSX &) = delete;
diff --git a/lldb/source/Symbol/FuncUnwinders.cpp b/lldb/source/Symbol/FuncUnwinders.cpp
index 30266120d05e9..9a2671a08e86c 100644
--- a/lldb/source/Symbol/FuncUnwinders.cpp
+++ b/lldb/source/Symbol/FuncUnwinders.cpp
@@ -183,11 +183,11 @@ class RegisterContextToInfo: public SymbolFile::RegisterInfoResolver {
 public:
   RegisterContextToInfo(RegisterContext &ctx) : m_ctx(ctx) {}
 
-  const RegisterInfo *ResolveName(llvm::StringRef name) const {
+  const RegisterInfo *ResolveName(llvm::StringRef name) const override {
     return m_ctx.GetRegisterInfoByName(name);
   }
   const RegisterInfo *ResolveNumber(lldb::RegisterKind kind,
-                                    uint32_t number) const {
+                                    uint32_t number) const override {
     return m_ctx.GetRegisterInfo(kind, number);
   }
 
diff --git a/lldb/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h b/lldb/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h
index 43594e890f5f2..a702ea52e8536 100644
--- a/lldb/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h
+++ b/lldb/tools/debugserver/source/MacOSX/i386/DNBArchImplI386.h
@@ -31,38 +31,38 @@ class DNBArchImplI386 : public DNBArchProtocol {
 
   static void Initialize();
 
-  virtual bool GetRegisterValue(uint32_t set, uint32_t reg,
-                                DNBRegisterValue *value);
-  virtual bool SetRegisterValue(uint32_t set, uint32_t reg,
-                                const DNBRegisterValue *value);
-  virtual nub_size_t GetRegisterContext(void *buf, nub_size_t buf_len);
-  virtual nub_size_t SetRegisterContext(const void *buf, nub_size_t buf_len);
-  virtual uint32_t SaveRegisterState();
-  virtual bool RestoreRegisterState(uint32_t save_id);
-
-  virtual kern_return_t GetRegisterState(int set, bool force);
-  virtual kern_return_t SetRegisterState(int set);
-  virtual bool RegisterSetStateIsValid(int set) const;
-
-  virtual uint64_t GetPC(uint64_t failValue); // Get program counter
-  virtual kern_return_t SetPC(uint64_t value);
-  virtual uint64_t GetSP(uint64_t failValue); // Get stack pointer
-  virtual void ThreadWillResume();
-  virtual bool ThreadDidStop();
-  virtual bool NotifyException(MachException::Data &exc);
-
-  virtual uint32_t NumSupportedHardwareBreakpoints();
-  virtual uint32_t NumSupportedHardwareWatchpoints();
-  virtual uint32_t EnableHardwareBreakpoint(nub_addr_t addr, nub_size_t size,
-                                            bool also_set_on_task);
-  virtual bool DisableHardwareBreakpoint(uint32_t hw_index,
-                                         bool also_set_on_task);
-  virtual uint32_t EnableHardwareWatchpoint(nub_addr_t addr, nub_size_t size,
-                                            bool read, bool write,
-                                            bool also_set_on_task);
-  virtual bool DisableHardwareWatchpoint(uint32_t hw_break_index,
-                                         bool also_set_on_task);
-  virtual uint32_t GetHardwareWatchpointHit(nub_addr_t &addr);
+  bool GetRegisterValue(uint32_t set, uint32_t reg,
+                        DNBRegisterValue *value) override;
+  bool SetRegisterValue(uint32_t set, uint32_t reg,
+                        const DNBRegisterValue *value) override;
+  nub_size_t GetRegisterContext(void *buf, nub_size_t buf_len) override;
+  nub_size_t SetRegisterContext(const void *buf, nub_size_t buf_len) override;
+  uint32_t SaveRegisterState() override;
+  bool RestoreRegisterState(uint32_t save_id) override;
+
+  kern_return_t GetRegisterState(int set, bool force) override;
+  kern_return_t SetRegisterState(int set) override;
+  bool RegisterSetStateIsValid(int set) const override;
+
+  uint64_t GetPC(uint64_t failValue) override; // Get program counter
+  kern_return_t SetPC(uint64_t value) override;
+  uint64_t GetSP(uint64_t failValue) override; // Get stack pointer
+  void ThreadWillResume() override;
+  bool ThreadDidStop() override;
+  bool NotifyException(MachException::Data &exc) override;
+
+  uint32_t NumSupportedHardwareBreakpoints() override;
+  uint32_t NumSupportedHardwareWatchpoints() override;
+  uint32_t EnableHardwareBreakpoint(nub_addr_t addr, nub_size_t size,
+                                    bool also_set_on_task) override;
+  bool DisableHardwareBreakpoint(uint32_t hw_index,
+                                 bool also_set_on_task) override;
+  uint32_t EnableHardwareWatchpoint(nub_addr_t addr, nub_size_t size,
+                                    bool read, bool write,
+                                    bool also_set_on_task) override;
+  bool DisableHardwareWatchpoint(uint32_t hw_break_index,
+                                 bool also_set_on_task) override;
+  uint32_t GetHardwareWatchpointHit(nub_addr_t &addr) override;
 
 protected:
   kern_return_t EnableHardwareSingleStep(bool enable);
@@ -228,9 +228,9 @@ class DNBArchImplI386 : public DNBArchProtocol {
   static bool IsWatchpointHit(const DBG &debug_state, uint32_t hw_index);
   static nub_addr_t GetWatchAddress(const DBG &debug_state, uint32_t hw_index);
 
-  virtual bool StartTransForHWP();
-  virtual bool RollbackTransForHWP();
-  virtual bool FinishTransForHWP();
+  bool StartTransForHWP() override;
+  bool RollbackTransForHWP() override;
+  bool FinishTransForHWP() override;
   DBG GetDBGCheckpoint();
 
   MachThread *m_thread;
diff --git a/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h b/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h
index 0ed433f7f3a2d..96da02a4c9ff9 100644
--- a/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h
+++ b/lldb/tools/debugserver/source/MacOSX/x86_64/DNBArchImplX86_64.h
@@ -30,39 +30,39 @@ class DNBArchImplX86_64 : public DNBArchProtocol {
 
   static void Initialize();
 
-  virtual bool GetRegisterValue(uint32_t set, uint32_t reg,
-                                DNBRegisterValue *value);
-  virtual bool SetRegisterValue(uint32_t set, uint32_t reg,
-                                const DNBRegisterValue *value);
-  virtual nub_size_t GetRegisterContext(void *buf, nub_size_t buf_len);
-  virtual nub_size_t SetRegisterContext(const void *buf, nub_size_t buf_len);
-  virtual uint32_t SaveRegisterState();
-  virtual bool RestoreRegisterState(uint32_t save_id);
-
-  virtual kern_return_t GetRegisterState(int set, bool force);
-  virtual kern_return_t SetRegisterState(int set);
-  virtual bool RegisterSetStateIsValid(int set) const;
-
-  virtual uint64_t GetPC(uint64_t failValue); // Get program counter
-  virtual kern_return_t SetPC(uint64_t value);
-  virtual uint64_t GetSP(uint64_t failValue); // Get stack pointer
-  virtual void ThreadWillResume();
-  virtual bool ThreadDidStop();
-  virtual bool NotifyException(MachException::Data &exc);
-
-  virtual uint32_t NumSupportedHardwareBreakpoints();
-  virtual uint32_t NumSupportedHardwareWatchpoints();
-
-  virtual uint32_t EnableHardwareBreakpoint(nub_addr_t addr, nub_size_t size,
-                                            bool also_set_on_task);
-  virtual bool DisableHardwareBreakpoint(uint32_t hw_break_index,
-                                         bool also_set_on_task);
-  virtual uint32_t EnableHardwareWatchpoint(nub_addr_t addr, nub_size_t size,
-                                            bool read, bool write,
-                                            bool also_set_on_task);
-  virtual bool DisableHardwareWatchpoint(uint32_t hw_break_index,
-                                         bool also_set_on_task);
-  virtual uint32_t GetHardwareWatchpointHit(nub_addr_t &addr);
+  bool GetRegisterValue(uint32_t set, uint32_t reg,
+                        DNBRegisterValue *value) override;
+  bool SetRegisterValue(uint32_t set, uint32_t reg,
+                        const DNBRegisterValue *value) override;
+  nub_size_t GetRegisterContext(void *buf, nub_size_t buf_len) override;
+  nub_size_t SetRegisterContext(const void *buf, nub_size_t buf_len) override;
+  uint32_t SaveRegisterState() override;
+  bool RestoreRegisterState(uint32_t save_id) override;
+
+  kern_return_t GetRegisterState(int set, bool force) override;
+  kern_return_t SetRegisterState(int set) override;
+  bool RegisterSetStateIsValid(int set) const override;
+
+  uint64_t GetPC(uint64_t failValue) override; // Get program counter
+  kern_return_t SetPC(uint64_t value) override;
+  uint64_t GetSP(uint64_t failValue) override; // Get stack pointer
+  void ThreadWillResume() override;
+  bool ThreadDidStop() override;
+  bool NotifyException(MachException::Data &exc) override;
+
+  uint32_t NumSupportedHardwareBreakpoints() override;
+  uint32_t NumSupportedHardwareWatchpoints() override;
+
+  uint32_t EnableHardwareBreakpoint(nub_addr_t addr, nub_size_t size,
+                                    bool also_set_on_task) override;
+  bool DisableHardwareBreakpoint(uint32_t hw_break_index,
+                                 bool also_set_on_task) override;
+  uint32_t EnableHardwareWatchpoint(nub_addr_t addr, nub_size_t size,
+                                    bool read, bool write,
+                                    bool also_set_on_task) override;
+  bool DisableHardwareWatchpoint(uint32_t hw_break_index,
+                                 bool also_set_on_task) override;
+  uint32_t GetHardwareWatchpointHit(nub_addr_t &addr) override;
 
 protected:
   kern_return_t EnableHardwareSingleStep(bool enable);
@@ -232,9 +232,9 @@ class DNBArchImplX86_64 : public DNBArchProtocol {
   static bool IsWatchpointHit(const DBG &debug_state, uint32_t hw_index);
   static nub_addr_t GetWatchAddress(const DBG &debug_state, uint32_t hw_index);
 
-  virtual bool StartTransForHWP();
-  virtual bool RollbackTransForHWP();
-  virtual bool FinishTransForHWP();
+  bool StartTransForHWP() override;
+  bool RollbackTransForHWP() override;
+  bool FinishTransForHWP() override;
   DBG GetDBGCheckpoint();
 
   MachThread *m_thread;

From 30c382a7c6607a7d898730f8d288768110cdf1d2 Mon Sep 17 00:00:00 2001
From: Hiroshi Yamauchi <yamauchi@google.com>
Date: Tue, 7 Jul 2020 11:26:25 -0700
Subject: [PATCH 406/771] [PGO][PGSO] Add profile guided size optimization to
 loop vectorization legality.

Differential Revision: https://reviews.llvm.org/D83329
---
 .../Vectorize/LoopVectorizationLegality.h     |  9 ++-
 .../Vectorize/LoopVectorizationLegality.cpp   |  7 +-
 .../Transforms/Vectorize/LoopVectorize.cpp    | 33 ++++++----
 llvm/test/Transforms/LoopVectorize/optsize.ll | 65 ++++++++++++++++++-
 4 files changed, 99 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index c6c3450f77608..7235aa5861120 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -202,9 +202,10 @@ class LoopVectorizationLegality {
       Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
       LoopInfo *LI, OptimizationRemarkEmitter *ORE,
       LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
-      AssumptionCache *AC)
+      AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
       : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
-        GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
+        GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC),
+        BFI(BFI), PSI(PSI) {}
 
   /// ReductionList contains the reduction descriptors for all
   /// of the reductions that were found in the loop.
@@ -478,6 +479,10 @@ class LoopVectorizationLegality {
   /// Assume instructions in predicated blocks must be dropped if the CFG gets
   /// flattened.
   SmallPtrSet<Instruction *, 8> ConditionalAssumes;
+
+  /// BFI and PSI are used to check for profile guided size optimizations.
+  BlockFrequencyInfo *BFI;
+  ProfileSummaryInfo *PSI;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 23613775d896d..120b544808bed 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
+#include "llvm/Transforms/Utils/SizeOpts.h"
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 
 using namespace llvm;
@@ -412,7 +413,11 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   const ValueToValueMap &Strides =
       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
 
-  bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
+  Function *F = TheLoop->getHeader()->getParent();
+  bool OptForSize = F->hasOptSize() ||
+                    llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
+                                                PGSOQueryType::IRPass);
+  bool CanAddPredicate = !OptForSize;
   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
   if (Stride == 1 || Stride == -1)
     return Stride;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 35af8e4257789..5e3c5a69cd905 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -395,11 +395,13 @@ class InnerLoopVectorizer {
                       const TargetTransformInfo *TTI, AssumptionCache *AC,
                       OptimizationRemarkEmitter *ORE, unsigned VecWidth,
                       unsigned UnrollFactor, LoopVectorizationLegality *LVL,
-                      LoopVectorizationCostModel *CM)
+                      LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
+                      ProfileSummaryInfo *PSI)
       : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
         AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
         Builder(PSE.getSE()->getContext()),
-        VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM) {}
+        VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM),
+        BFI(BFI), PSI(PSI) {}
   virtual ~InnerLoopVectorizer() = default;
 
   /// Create a new empty loop. Unlink the old loop and connect the new one.
@@ -779,6 +781,10 @@ class InnerLoopVectorizer {
   // Vector of original scalar PHIs whose corresponding widened PHIs need to be
   // fixed up at the end of vector code generation.
   SmallVector<PHINode *, 8> OrigPHIsToFix;
+
+  /// BFI and PSI are used to check for profile guided size optimizations.
+  BlockFrequencyInfo *BFI;
+  ProfileSummaryInfo *PSI;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -789,9 +795,10 @@ class InnerLoopUnroller : public InnerLoopVectorizer {
                     const TargetTransformInfo *TTI, AssumptionCache *AC,
                     OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
                     LoopVectorizationLegality *LVL,
-                    LoopVectorizationCostModel *CM)
+                    LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
+                    ProfileSummaryInfo *PSI)
       : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1,
-                            UnrollFactor, LVL, CM) {}
+                            UnrollFactor, LVL, CM, BFI, PSI) {}
 
 private:
   Value *getBroadcastInstrs(Value *V) override;
@@ -2754,7 +2761,9 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
     if (C->isZero())
       return;
 
-  assert(!SCEVCheckBlock->getParent()->hasOptSize() &&
+  assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
+           llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+                                       PGSOQueryType::IRPass)) &&
          "Cannot SCEV check stride or overflow when optimizing for size");
 
   SCEVCheckBlock->setName("vector.scevcheck");
@@ -2800,7 +2809,9 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
   assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
                             "claimed checks are required");
 
-  if (MemCheckBlock->getParent()->hasOptSize()) {
+  if (MemCheckBlock->getParent()->hasOptSize() ||
+      llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
+                                  PGSOQueryType::IRPass)) {
     assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
            "Cannot emit memory checks when optimizing for size, unless forced "
            "to vectorize.");
@@ -7691,7 +7702,7 @@ static bool processLoopInVPlanNativePath(
   LVP.setBestPlan(VF.Width, 1);
 
   InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
-                         &CM);
+                         &CM, BFI, PSI);
   LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
                     << L->getHeader()->getParent()->getName() << "\"\n");
   LVP.executePlan(LB, DT);
@@ -7755,7 +7766,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   // Check if it is legal to vectorize the loop.
   LoopVectorizationRequirements Requirements(*ORE);
   LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE,
-                                &Requirements, &Hints, DB, AC);
+                                &Requirements, &Hints, DB, AC, BFI, PSI);
   if (!LVL.canVectorize(EnableVPlanNativePath)) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
     Hints.emitRemarkWithHints();
@@ -7955,8 +7966,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     assert(IC > 1 && "interleave count should not be 1 or 0");
     // If we decided that it is not legal to vectorize the loop, then
     // interleave it.
-    InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
-                               &CM);
+    InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM,
+                               BFI, PSI);
     LVP.executePlan(Unroller, DT);
 
     ORE->emit([&]() {
@@ -7968,7 +7979,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   } else {
     // If we decided that it is *legal* to vectorize the loop, then do it.
     InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
-                           &LVL, &CM);
+                           &LVL, &CM, BFI, PSI);
     LVP.executePlan(LB, DT);
     ++LoopsVectorized;
 
diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll
index 0e88f362746fb..b4233e6751cbc 100644
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -121,6 +121,38 @@ for.body29:
   br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
 }
 
+define void @pr43371_pgso() !prof !14 {
+;
+; CHECK-LABEL: @pr43371_pgso
+; CHECK-NOT:   vector.scevcheck
+;
+; We do not want to generate SCEV predicates when optimising for size, because
+; that will lead to extra code generation such as the SCEV overflow runtime
+; checks. Not generating SCEV predicates can still result in vectorisation as
+; the non-consecutive loads/stores can be scalarized:
+;
+; CHECK: vector.body:
+; CHECK: store i16 0, i16* %{{.*}}, align 1
+; CHECK: store i16 0, i16* %{{.*}}, align 1
+; CHECK: br i1 {{.*}}, label %vector.body
+;
+entry:
+  br label %for.body29
+
+for.cond.cleanup28:
+  unreachable
+
+for.body29:
+  %i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
+  %add33 = add i16 undef, %i24.0170
+  %idxprom34 = zext i16 %add33 to i32
+  %arrayidx35 = getelementptr [2592 x i16], [2592 x i16] * @cm_array, i32 0, i32 %idxprom34
+  store i16 0, i16 * %arrayidx35, align 1
+  %inc37 = add i16 %i24.0170, 1
+  %cmp26 = icmp ult i16 %inc37, 756
+  br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
+}
+
 ; PR45526: don't vectorize with fold-tail if first-order-recurrence is live-out.
 ;
 define i32 @pr45526() optsize {
@@ -154,6 +186,37 @@ exit:
   ret i32 %for
 }
 
+define i32 @pr45526_pgso() !prof !14 {
+;
+; CHECK-LABEL: @pr45526_pgso
+; CHECK-NEXT: entry:
+; CHECK-NEXT:   br label %loop
+; CHECK-EMPTY:
+; CHECK-NEXT: loop:
+; CHECK-NEXT:   %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
+; CHECK-NEXT:   %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
+; CHECK-NEXT:   %pivPlus1 = add nuw nsw i32 %piv, 1
+; CHECK-NEXT:   %cond = icmp ult i32 %piv, 510
+; CHECK-NEXT:   br i1 %cond, label %loop, label %exit
+; CHECK-EMPTY:
+; CHECK-NEXT: exit:
+; CHECK-NEXT:   %for.lcssa = phi i32 [ %for, %loop ]
+; CHECK-NEXT:   ret i32 %for.lcssa
+;
+entry:
+  br label %loop
+
+loop:
+  %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
+  %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
+  %pivPlus1 = add nuw nsw i32 %piv, 1
+  %cond = icmp ult i32 %piv, 510
+  br i1 %cond, label %loop, label %exit
+
+exit:
+  ret i32 %for
+}
+
 ; PR46228: Vectorize w/o versioning for unit stride under optsize and enabled
 ; vectorization.
 
@@ -190,7 +253,7 @@ define void @stride1(i16* noalias %B, i32 %BStride) optsize {
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !19
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !21
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:

From 9dc327d1b74637dac6dc432fb66f88711af16a55 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Mon, 13 Jul 2020 12:56:18 -0400
Subject: [PATCH 407/771] [OPENMP]Fix PR46688: cast the type of the allocated
 variable to the initial one.

Summary:
If the original variable is marked for allocation in the different
address space using #pragma omp allocate, need to cast the allocated
variable to its original type with the original address space.
Otherwise, the compiler may crash trying to bitcast the type of the new
allocated variable to the original type in some cases, like passing this
variable as an argument in function calls.

Reviewers: jdoerfert

Subscribers: jholewinski, cfe-commits, yaxunl, guansong, caomhin

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83696
---
 clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp   | 57 ++++++++------------
 clang/test/OpenMP/nvptx_allocate_codegen.cpp |  2 +-
 2 files changed, 24 insertions(+), 35 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index cbd443134e7a8..ac6ec742335c8 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -4770,6 +4770,7 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
                                                         const VarDecl *VD) {
   if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
     const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+    auto AS = LangAS::Default;
     switch (A->getAllocatorType()) {
       // Use the default allocator here as by default local vars are
       // threadlocal.
@@ -4783,42 +4784,30 @@ Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
     case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
       // TODO: implement aupport for user-defined allocators.
       return Address::invalid();
-    case OMPAllocateDeclAttr::OMPConstMemAlloc: {
-      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
-      auto *GV = new llvm::GlobalVariable(
-          CGM.getModule(), VarTy, /*isConstant=*/false,
-          llvm::GlobalValue::InternalLinkage,
-          llvm::Constant::getNullValue(VarTy), VD->getName(),
-          /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
-          CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant));
-      CharUnits Align = CGM.getContext().getDeclAlign(VD);
-      GV->setAlignment(Align.getAsAlign());
-      return Address(GV, Align);
-    }
-    case OMPAllocateDeclAttr::OMPPTeamMemAlloc: {
-      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
-      auto *GV = new llvm::GlobalVariable(
-          CGM.getModule(), VarTy, /*isConstant=*/false,
-          llvm::GlobalValue::InternalLinkage,
-          llvm::Constant::getNullValue(VarTy), VD->getName(),
-          /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
-          CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
-      CharUnits Align = CGM.getContext().getDeclAlign(VD);
-      GV->setAlignment(Align.getAsAlign());
-      return Address(GV, Align);
-    }
+    case OMPAllocateDeclAttr::OMPConstMemAlloc:
+      AS = LangAS::cuda_constant;
+      break;
+    case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+      AS = LangAS::cuda_shared;
+      break;
     case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
-    case OMPAllocateDeclAttr::OMPCGroupMemAlloc: {
-      llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
-      auto *GV = new llvm::GlobalVariable(
-          CGM.getModule(), VarTy, /*isConstant=*/false,
-          llvm::GlobalValue::InternalLinkage,
-          llvm::Constant::getNullValue(VarTy), VD->getName());
-      CharUnits Align = CGM.getContext().getDeclAlign(VD);
-      GV->setAlignment(Align.getAsAlign());
-      return Address(GV, Align);
-    }
+    case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+      break;
     }
+    llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
+    auto *GV = new llvm::GlobalVariable(
+        CGM.getModule(), VarTy, /*isConstant=*/false,
+        llvm::GlobalValue::InternalLinkage, llvm::Constant::getNullValue(VarTy),
+        VD->getName(),
+        /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+        CGM.getContext().getTargetAddressSpace(AS));
+    CharUnits Align = CGM.getContext().getDeclAlign(VD);
+    GV->setAlignment(Align.getAsAlign());
+    return Address(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            GV, VarTy->getPointerTo(CGM.getContext().getTargetAddressSpace(
+                    VD->getType().getAddressSpace()))),
+        Align);
   }
 
   if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
diff --git a/clang/test/OpenMP/nvptx_allocate_codegen.cpp b/clang/test/OpenMP/nvptx_allocate_codegen.cpp
index 46565443354ed..01542ca4044a8 100644
--- a/clang/test/OpenMP/nvptx_allocate_codegen.cpp
+++ b/clang/test/OpenMP/nvptx_allocate_codegen.cpp
@@ -101,7 +101,7 @@ void bar() {
 // CHECK: alloca float,
 // CHECK-NOT: alloca double,
 // CHECK: load float, float* %
-// CHECK: store double {{.+}}, double addrspace(3)* @bar_b,
+// CHECK: store double {{.+}}, double* addrspacecast (double addrspace(3)* @bar_b to double*),
 }
 
 #pragma omp end declare target

From ec85d7c8f3ada770d8a202ac7726b1e670fc0662 Mon Sep 17 00:00:00 2001
From: Uday Bondhugula <uday@polymagelabs.com>
Date: Wed, 15 Jul 2020 16:11:08 +0530
Subject: [PATCH 408/771] [MLIR][NFC] Fix clang tidy warnings in misc utilities

Fix clang tidy warnings in misc utilities - missing const or a star in
declaration.

Differential Revision: https://reviews.llvm.org/D83861
---
 .../Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp  | 4 ++--
 mlir/lib/Dialect/Affine/Utils/Utils.cpp                     | 2 +-
 mlir/lib/Transforms/LoopFusion.cpp                          | 4 ++--
 mlir/lib/Transforms/Utils/LoopFusionUtils.cpp               | 2 +-
 mlir/lib/Transforms/Utils/LoopUtils.cpp                     | 6 +++---
 5 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
index 3baac7c58945c..70c45b4d37af8 100644
--- a/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
+++ b/mlir/lib/Dialect/Affine/Transforms/AffineDataCopyGeneration.cpp
@@ -212,7 +212,7 @@ void AffineDataCopyGeneration::runOnFunction() {
   // Promote any single iteration loops in the copy nests and collect
   // load/stores to simplify.
   SmallVector<Operation *, 4> copyOps;
-  for (auto nest : copyNests)
+  for (Operation *nest : copyNests)
     // With a post order walk, the erasure of loops does not affect
     // continuation of the walk or the collection of load/store ops.
     nest->walk([&](Operation *op) {
@@ -228,6 +228,6 @@ void AffineDataCopyGeneration::runOnFunction() {
   OwningRewritePatternList patterns;
   AffineLoadOp::getCanonicalizationPatterns(patterns, &getContext());
   AffineStoreOp::getCanonicalizationPatterns(patterns, &getContext());
-  for (auto op : copyOps)
+  for (Operation *op : copyOps)
     applyOpPatternsAndFold(op, std::move(patterns));
 }
diff --git a/mlir/lib/Dialect/Affine/Utils/Utils.cpp b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
index 39e9cbc61e961..390b3f0b4d4ce 100644
--- a/mlir/lib/Dialect/Affine/Utils/Utils.cpp
+++ b/mlir/lib/Dialect/Affine/Utils/Utils.cpp
@@ -44,7 +44,7 @@ static void promoteIfBlock(AffineIfOp ifOp, bool elseBlock) {
 static Operation *getOutermostInvariantForOp(AffineIfOp ifOp) {
   // Walk up the parents past all for op that this conditional is invariant on.
   auto ifOperands = ifOp.getOperands();
-  auto res = ifOp.getOperation();
+  auto *res = ifOp.getOperation();
   while (!isa<FuncOp>(res->getParentOp())) {
     auto *parentOp = res->getParentOp();
     if (auto forOp = dyn_cast<AffineForOp>(parentOp)) {
diff --git a/mlir/lib/Transforms/LoopFusion.cpp b/mlir/lib/Transforms/LoopFusion.cpp
index c2f30fa3d1896..dd7b7b83debda 100644
--- a/mlir/lib/Transforms/LoopFusion.cpp
+++ b/mlir/lib/Transforms/LoopFusion.cpp
@@ -591,7 +591,7 @@ struct MemRefDependenceGraph {
   // dependence.
   void forEachMemRefEdge(ArrayRef<Edge> edges,
                          const std::function<void(Edge)> &callback) {
-    for (auto &edge : edges) {
+    for (const auto &edge : edges) {
       // Skip if 'edge' is not a memref dependence edge.
       if (!edge.value.getType().isa<MemRefType>())
         continue;
@@ -607,7 +607,7 @@ struct MemRefDependenceGraph {
   void print(raw_ostream &os) const {
     os << "\nMemRefDependenceGraph\n";
     os << "\nNodes:\n";
-    for (auto &idAndNode : nodes) {
+    for (const auto &idAndNode : nodes) {
       os << "Node: " << idAndNode.first << "\n";
       auto it = inEdges.find(idAndNode.first);
       if (it != inEdges.end()) {
diff --git a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
index 17dbf8eb166d3..1bf9177bd8161 100644
--- a/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopFusionUtils.cpp
@@ -113,7 +113,7 @@ static Operation *getLastDependentOpInRange(Operation *opA, Operation *opB) {
         return WalkResult::advance();
       }
       for (auto value : op->getResults()) {
-        for (auto user : value.getUsers()) {
+        for (Operation *user : value.getUsers()) {
           SmallVector<AffineForOp, 4> loops;
           // Check if any loop in loop nest surrounding 'user' is 'opB'.
           getLoopIVs(*user, &loops);
diff --git a/mlir/lib/Transforms/Utils/LoopUtils.cpp b/mlir/lib/Transforms/Utils/LoopUtils.cpp
index 249fa1cfdbc2f..ecb478adbbdfb 100644
--- a/mlir/lib/Transforms/Utils/LoopUtils.cpp
+++ b/mlir/lib/Transforms/Utils/LoopUtils.cpp
@@ -1245,8 +1245,8 @@ static LogicalResult hoistOpsBetween(scf::ForOp outer, scf::ForOp inner) {
 // be formed.
 static LogicalResult tryIsolateBands(const TileLoops &tileLoops) {
   LogicalResult status = success();
-  auto &interTile = tileLoops.first;
-  auto &intraTile = tileLoops.second;
+  const Loops &interTile = tileLoops.first;
+  const Loops &intraTile = tileLoops.second;
   auto size = interTile.size();
   assert(size == intraTile.size());
   if (size <= 1)
@@ -2135,7 +2135,7 @@ uint64_t mlir::affineDataCopyGenerate(Block::iterator begin,
     auto updateRegion =
         [&](const SmallMapVector<Value, std::unique_ptr<MemRefRegion>, 4>
                 &targetRegions) {
-          auto it = targetRegions.find(region->memref);
+          const auto it = targetRegions.find(region->memref);
           if (it == targetRegions.end())
             return false;
 

From 7520393842ea455fa7a6056d7dbc2e2cedd3c72f Mon Sep 17 00:00:00 2001
From: dfukalov <daniil.fukalov@amd.com>
Date: Wed, 15 Jul 2020 18:55:56 +0300
Subject: [PATCH 409/771] [NFC] Fixed typo in tests parameters

Summary:
llc reports `fp32-denormals` is not recognized. I guess it was intended to be
`-denormal-fp-math-f32={preserve-sign|ieee} -mattr=+mad-mac-f32-insts`

Reviewers: rampitec

Reviewed By: rampitec

Subscribers: jvesely, nhaehnle, llvm-commits, kerbowa

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83883
---
 llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
index 1a11c5b6c9a96..7b38b79c78a15 100644
--- a/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmuladd.f32.ll
@@ -17,8 +17,8 @@
 ; FIXME: Should probably test this, but sometimes selecting fmac is painful to match.
 ; XUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx906 -denormal-fp-math-f32=ieee -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GFX9-DENORM,GCN-DENORM-FASTFMA,GCN-DENORM-FASTFMA-STRICT,GFX906 %s
 
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -mattr=-fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT %s
-; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -mattr=+fp32-denormals -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM,GCN-DENORM-FASTFMA-STRICT,GCN-DENORM-STRICT %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -denormal-fp-math-f32=preserve-sign -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-FLUSH,GCN-FLUSH-STRICT,GCN-FLUSH-FMAC,GCN-FLUSH-FASTFMA,GCN-FLUSH-FASTFMA-STRICT,GFX1030 %s
+; RUN: llc -amdgpu-scalarize-global-loads=false -verify-machineinstrs -mcpu=gfx1030 -denormal-fp-math-f32=ieee -mattr=+mad-mac-f32-insts -fp-contract=on < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GCN-DENORM-STRICT,GCN-DENORM,GCN-DENORM-FASTFMA-STRICT,GFX1030 %s
 
 ; Test all permutations of: fp32 denormals, fast fp contract, fp contract enabled for fmuladd, fmaf fast/slow.
 

From 41d0af00740ac5140f11c7f37157fc6e6dd1b016 Mon Sep 17 00:00:00 2001
From: Alexey Bataev <a.bataev@hotmail.com>
Date: Tue, 7 Jul 2020 14:52:45 -0400
Subject: [PATCH 410/771] [OPENMP]Fix PR46593: Reduction initializer missing
 construnctor call.

Summary:
If user-defined reductions with the initializer are used with classes,
the compiler misses the constructor call when trying to create a private
copy of the reduction variable.

Reviewers: jdoerfert

Subscribers: cfe-commits, yaxunl, guansong, caomhin

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83334
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp          |  3 +++
 clang/lib/Sema/SemaOpenMP.cpp                  | 18 ++++++++++++------
 .../test/OpenMP/for_reduction_codegen_UDR.cpp  | 15 +++++++++++++++
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 43cbe9c720ea3..a7e1fe8560b6e 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -886,8 +886,11 @@ void ReductionCodeGen::emitInitialization(
       SharedType, SharedAddresses[N].first.getBaseInfo(),
       CGF.CGM.getTBAAInfoForSubobject(SharedAddresses[N].first, SharedType));
   if (CGF.getContext().getAsArrayType(PrivateVD->getType())) {
+    if (DRD && DRD->getInitializer())
+      (void)DefaultInit(CGF);
     emitAggregateInitialization(CGF, N, PrivateAddr, SharedLVal, DRD);
   } else if (DRD && (DRD->getInitializer() || !PrivateVD->hasInit())) {
+    (void)DefaultInit(CGF);
     emitInitWithReductionInitializer(CGF, DRD, ClausesData[N].ReductionOp,
                                      PrivateAddr, SharedLVal.getAddress(CGF),
                                      SharedLVal.getType());
diff --git a/clang/lib/Sema/SemaOpenMP.cpp b/clang/lib/Sema/SemaOpenMP.cpp
index 920463da40277..8bf605e5e76b8 100644
--- a/clang/lib/Sema/SemaOpenMP.cpp
+++ b/clang/lib/Sema/SemaOpenMP.cpp
@@ -15153,6 +15153,7 @@ static bool actOnOMPReductionKindClause(
       auto *DRDRef = DeclareReductionRef.getAs<DeclRefExpr>();
       auto *DRD = cast<OMPDeclareReductionDecl>(DRDRef->getDecl());
       if (DRD->getInitializer()) {
+        S.ActOnUninitializedDecl(PrivateVD);
         Init = DRDRef;
         RHSVD->setInit(DRDRef);
         RHSVD->setInitStyle(VarDecl::CallInit);
@@ -15259,10 +15260,19 @@ static bool actOnOMPReductionKindClause(
         llvm_unreachable("Unexpected reduction operation");
       }
     }
-    if (Init && DeclareReductionRef.isUnset())
+    if (Init && DeclareReductionRef.isUnset()) {
       S.AddInitializerToDecl(RHSVD, Init, /*DirectInit=*/false);
-    else if (!Init)
+      // Store initializer for single element in private copy. Will be used
+      // during codegen.
+      PrivateVD->setInit(RHSVD->getInit());
+      PrivateVD->setInitStyle(RHSVD->getInitStyle());
+    } else if (!Init) {
       S.ActOnUninitializedDecl(RHSVD);
+      // Store initializer for single element in private copy. Will be used
+      // during codegen.
+      PrivateVD->setInit(RHSVD->getInit());
+      PrivateVD->setInitStyle(RHSVD->getInitStyle());
+    }
     if (RHSVD->isInvalidDecl())
       continue;
     if (!RHSVD->hasInit() &&
@@ -15276,10 +15286,6 @@ static bool actOnOMPReductionKindClause(
           << D;
       continue;
     }
-    // Store initializer for single element in private copy. Will be used during
-    // codegen.
-    PrivateVD->setInit(RHSVD->getInit());
-    PrivateVD->setInitStyle(RHSVD->getInitStyle());
     DeclRefExpr *PrivateDRE = buildDeclRefExpr(S, PrivateVD, PrivateTy, ELoc);
     ExprResult ReductionOp;
     if (DeclareReductionRef.isUsable()) {
diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
index 45962b3ed2b1d..31168bc325e3a 100644
--- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
+++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp
@@ -203,9 +203,11 @@ int main() {
 // For + reduction operation initial value of private variable is -1.
 // CHECK: call void [[RED_INIT1:@.+]](float* %{{.+}}, float* %{{.+}})
 
+// CHECK: call void @_ZN1SIfEC1Ev([[S_FLOAT_TY]]* [[VAR_PRIV]]
 // For & reduction operation initial value of private variable is defined by call of 'init()' function.
 // CHECK: call void [[RED_INIT2:@.+]](
 
+// CHECK: call void @_ZN1SIfEC1Ev([[S_FLOAT_TY]]* [[VAR1_PRIV]]
 // For && reduction operation initial value of private variable is 1.0.
 // CHECK: call void [[RED_INIT3:@.+]](
 
@@ -598,6 +600,17 @@ int main() {
 // CHECK: br i1 [[DONE]],
 
 // Check initialization of private copy.
+// CHECK: [[BEGIN:%.+]] = getelementptr inbounds [10 x [4 x [[S_FLOAT_TY]]]], [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]], i32 0, i32 0, i32 0
+// CHECK: [[END:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[BEGIN]], i64 40
+// CHECK: br label %[[CTOR:[^,]+]]
+// CHECK: [[CTOR]]:
+// CHECK: [[CUR:%.+]] = phi [[S_FLOAT_TY]]* [ [[BEGIN]], %{{.+}} ], [ [[NEXT:%.+]], %[[CTOR]] ]
+// CHECK: call void @_ZN1SIfEC1Ev([[S_FLOAT_TY]]* [[CUR]])
+// CHECK: [[NEXT:%.+]] = getelementptr inbounds [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[CUR]], i64 1
+// CHECK: [[IS_DONE:%.+]] = icmp eq [[S_FLOAT_TY]]* [[NEXT]], [[END]]
+// CHECK: br i1 [[IS_DONE]], label %[[DONE:[^,]+]], label %[[CTOR]]
+// CHECK: [[DONE]]:
+
 // CHECK: [[BEGIN:%.+]] = getelementptr inbounds [10 x [4 x [[S_FLOAT_TY]]]], [10 x [4 x [[S_FLOAT_TY]]]]* [[ARRS_PRIV]], i32 0, i32 0, i32 0
 // CHECK: [[LHS_BEGIN:%.+]] = bitcast [10 x [4 x [[S_FLOAT_TY]]]]* %{{.+}} to [[S_FLOAT_TY]]*
 // CHECK: [[END:%.+]] = getelementptr [[S_FLOAT_TY]], [[S_FLOAT_TY]]* [[BEGIN]], i64 40
@@ -901,9 +914,11 @@ int main() {
 // For + reduction operation initial value of private variable is 0.
 // CHECK: call void [[RED_INIT6:@.+]](
 
+// CHECK: call void @_ZN1SIiEC1Ev([[S_INT_TY]]* [[VAR_PRIV]]
 // For & reduction operation initial value of private variable is ones in all bits.
 // CHECK: call void [[RED_INIT2:@.+]](
 
+// CHECK: call void @_ZN1SIiEC1Ev([[S_INT_TY]]* [[VAR1_PRIV]]
 // For && reduction operation initial value of private variable is 1.0.
 // CHECK: call void [[RED_INIT7:@.+]](
 

From f3731d34faa7432462c877714af235e9787c9b30 Mon Sep 17 00:00:00 2001
From: Hongtao Yu <hoy@fb.com>
Date: Sat, 4 Jul 2020 20:24:11 -0700
Subject: [PATCH 411/771] [LoopUnroll] Update branch weight for remainder loop

Unrolling a loop with compile-time unknown trip count results in a remainder loop. The remainder loop executes the remaining iterations of the original loop when the original trip count is not a multiple of the unroll factor. For better profile counts maintenance throughout the optimization pipeline, I'm assigning an artificial weight to the latch branch of the remainder loop.

A remainder loop runs up to as many times as the unroll factor subtracted by 1. Therefore I'm assigning the maximum possible trip count as the back edge weight. This should be more accurate than the default non-profile weight, which assumes the back edge runs much more frequently than the exit edge.

Differential Revision: https://reviews.llvm.org/D83187
---
 .../Transforms/Utils/LoopUnrollRuntime.cpp    | 32 +++++++++++++++++
 .../LoopUnroll/runtime-loop-branchweight.ll   | 34 +++++++++++++++++++
 2 files changed, 66 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll

diff --git a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
index 2515b1676cb99..ebcd820a27bda 100644
--- a/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUnrollRuntime.cpp
@@ -27,6 +27,7 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/Dominators.h"
+#include "llvm/IR/MDBuilder.h"
 #include "llvm/IR/Metadata.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/CommandLine.h"
@@ -505,6 +506,32 @@ static bool canProfitablyUnrollMultiExitLoop(
   // know of kinds of multiexit loops that would benefit from unrolling.
 }
 
+// Assign the maximum possible trip count as the back edge weight for the
+// remainder loop if the original loop comes with a branch weight.
+static void updateLatchBranchWeightsForRemainderLoop(Loop *OrigLoop,
+                                                     Loop *RemainderLoop,
+                                                     uint64_t UnrollFactor) {
+  uint64_t TrueWeight, FalseWeight;
+  BranchInst *LatchBR =
+      cast<BranchInst>(OrigLoop->getLoopLatch()->getTerminator());
+  if (LatchBR->extractProfMetadata(TrueWeight, FalseWeight)) {
+    uint64_t ExitWeight = LatchBR->getSuccessor(0) == OrigLoop->getHeader()
+                              ? FalseWeight
+                              : TrueWeight;
+    assert(UnrollFactor > 1);
+    uint64_t BackEdgeWeight = (UnrollFactor - 1) * ExitWeight;
+    BasicBlock *Header = RemainderLoop->getHeader();
+    BasicBlock *Latch = RemainderLoop->getLoopLatch();
+    auto *RemainderLatchBR = cast<BranchInst>(Latch->getTerminator());
+    unsigned HeaderIdx = (RemainderLatchBR->getSuccessor(0) == Header ? 0 : 1);
+    MDBuilder MDB(RemainderLatchBR->getContext());
+    MDNode *WeightNode =
+        HeaderIdx ? MDB.createBranchWeights(ExitWeight, BackEdgeWeight)
+                  : MDB.createBranchWeights(BackEdgeWeight, ExitWeight);
+    RemainderLatchBR->setMetadata(LLVMContext::MD_prof, WeightNode);
+  }
+}
+
 /// Insert code in the prolog/epilog code when unrolling a loop with a
 /// run-time trip-count.
 ///
@@ -788,6 +815,11 @@ bool llvm::UnrollRuntimeLoopRemainder(
       InsertTop, InsertBot,
       NewPreHeader, NewBlocks, LoopBlocks, VMap, DT, LI);
 
+  // Assign the maximum possible trip count as the back edge weight for the
+  // remainder loop if the original loop comes with a branch weight.
+  if (remainderLoop && !UnrollRemainder)
+    updateLatchBranchWeightsForRemainderLoop(L, remainderLoop, Count);
+
   // Insert the cloned blocks into the function.
   F->getBasicBlockList().splice(InsertBot->getIterator(),
                                 F->getBasicBlockList(),
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
new file mode 100644
index 0000000000000..c240d246bd1eb
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/runtime-loop-branchweight.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck %s
+; RUN: opt < %s -S -passes=loop-unroll -unroll-runtime=true -unroll-count=4 | FileCheck %s
+
+;; Check that the remainder loop is properly assigned a branch weight for its latch branch.
+; CHECK-LABEL: @test(
+; CHECK-LABEL: for.body:
+; CHECK: br i1 [[COND1:%.*]], label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !prof ![[#PROF:]], !llvm.loop ![[#LOOP:]]
+; CHECK-LABEL: for.body.epil:
+; CHECK: br i1 [[COND2:%.*]], label  %for.body.epil, label %for.end.loopexit.epilog-lcssa, !prof ![[#PROF2:]], !llvm.loop ![[#LOOP2:]]
+; CHECK: ![[#PROF]] = !{!"branch_weights", i32 1, i32 9999}
+; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 3, i32 1}
+
+define i3 @test(i3* %a, i3 %n) {
+entry:
+  %cmp1 = icmp eq i3 %n, 0
+  br i1 %cmp1, label %for.end, label %for.body
+
+for.body:
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+  %sum.02 = phi i3 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i3, i3* %a, i64 %indvars.iv
+  %0 = load i3, i3* %arrayidx
+  %add = add nsw i3 %0, %sum.02
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i3
+  %exitcond = icmp eq i3 %lftr.wideiv, %n
+  br i1 %exitcond, label %for.end, label %for.body, !prof !0
+
+for.end:
+  %sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ]
+  ret i3 %sum.0.lcssa
+}
+
+!0 = !{!"branch_weights", i32 1, i32 9999}

From b297563a751ec390d895c6ac4430e37c5efbb340 Mon Sep 17 00:00:00 2001
From: Tim Keith <tkeith@nvidia.com>
Date: Wed, 15 Jul 2020 13:02:32 -0700
Subject: [PATCH 412/771] [flang] Fix erroneous application of SAVE statement

A SAVE statement with no entity list applies the SAVE attribute only to
the entities that it is allowed on. We were applying it to automatic
data objects and reporting an error that they can't have SAVE.

The fix is to change `DeclarationVisitor::CheckSaveAttr` to check for
automatic objects. That controls both checking and setting the
attribute. This allows us to remove the check from `CheckSpecExpr`
(along with `symbolBeingChecked_`). Also, it was only called on constant
objects so the non-const overload can be eliminated.

The check in `CheckSpecExpr` is replaced by an explicit check for
automatic objects in modules. This caught an error in modfile03.f90 so
that part of the test was eliminated.

Differential Revision: https://reviews.llvm.org/D83899
---
 flang/lib/Semantics/check-declarations.cpp | 32 ++++++----------------
 flang/lib/Semantics/resolve-names.cpp      |  2 ++
 flang/test/Semantics/modfile03.f90         |  2 --
 flang/test/Semantics/resolve45.f90         | 11 ++++++++
 flang/test/Semantics/resolve77.f90         |  2 +-
 5 files changed, 22 insertions(+), 27 deletions(-)

diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp
index c98f7a542be78..a1f5be231fd9a 100644
--- a/flang/lib/Semantics/check-declarations.cpp
+++ b/flang/lib/Semantics/check-declarations.cpp
@@ -45,26 +45,8 @@ class CheckHelper {
 
 private:
   template <typename A> void CheckSpecExpr(const A &x) {
-    if (symbolBeingChecked_ && IsSaved(*symbolBeingChecked_)) {
-      if (!evaluate::IsConstantExpr(x)) {
-        messages_.Say(
-            "Specification expression must be constant in declaration of '%s' with the SAVE attribute"_err_en_US,
-            symbolBeingChecked_->name());
-      }
-    } else {
-      evaluate::CheckSpecificationExpr(
-          x, messages_, DEREF(scope_), context_.intrinsics());
-    }
-  }
-  template <typename A> void CheckSpecExpr(const std::optional<A> &x) {
-    if (x) {
-      CheckSpecExpr(*x);
-    }
-  }
-  template <typename A> void CheckSpecExpr(A &x) {
-    x = Fold(foldingContext_, std::move(x));
-    const A &constx{x};
-    CheckSpecExpr(constx);
+    evaluate::CheckSpecificationExpr(
+        x, messages_, DEREF(scope_), context_.intrinsics());
   }
   void CheckValue(const Symbol &, const DerivedTypeSpec *);
   void CheckVolatile(
@@ -120,7 +102,6 @@ class CheckHelper {
   // This symbol is the one attached to the innermost enclosing scope
   // that has a symbol.
   const Symbol *innermostSymbol_{nullptr};
-  const Symbol *symbolBeingChecked_{nullptr};
 };
 
 void CheckHelper::Check(const ParamValue &value, bool canBeAssumed) {
@@ -295,6 +276,12 @@ void CheckHelper::Check(const Symbol &symbol) {
     messages_.Say(
         "A CONTIGUOUS component must be an array with the POINTER attribute"_err_en_US);
   }
+  if (symbol.owner().IsModule() && IsAutomatic(symbol)) {
+    messages_.Say(
+        "Automatic data object '%s' may not appear in the specification part"
+        " of a module"_err_en_US,
+        symbol.name());
+  }
 }
 
 void CheckHelper::CheckValue(
@@ -388,13 +375,10 @@ void CheckHelper::CheckAssumedTypeEntity( // C709
 
 void CheckHelper::CheckObjectEntity(
     const Symbol &symbol, const ObjectEntityDetails &details) {
-  CHECK(!symbolBeingChecked_);
-  symbolBeingChecked_ = &symbol; // for specification expr checks
   CheckArraySpec(symbol, details.shape());
   Check(details.shape());
   Check(details.coshape());
   CheckAssumedTypeEntity(symbol, details);
-  symbolBeingChecked_ = nullptr;
   if (!details.coshape().empty()) {
     bool isDeferredShape{details.coshape().IsDeferredShape()};
     if (IsAllocatable(symbol)) {
diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index bd566408cd2ce..f0556ce7e930a 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -4451,6 +4451,8 @@ std::optional<MessageFixedText> DeclarationVisitor::CheckSaveAttr(
   } else if (symbol.has<ProcEntityDetails>() &&
       !symbol.attrs().test(Attr::POINTER)) {
     return "Procedure '%s' with SAVE attribute must also have POINTER attribute"_err_en_US;
+  } else if (IsAutomatic(symbol)) {
+    return "SAVE attribute may not be applied to automatic data object '%s'"_err_en_US;
   } else {
     return std::nullopt;
   }
diff --git a/flang/test/Semantics/modfile03.f90 b/flang/test/Semantics/modfile03.f90
index 1c68d0d72d920..9bdb35695f036 100644
--- a/flang/test/Semantics/modfile03.f90
+++ b/flang/test/Semantics/modfile03.f90
@@ -68,7 +68,6 @@ pure integer function f1(i)
 
 module m5b
   use m5a, only: k2 => k1, l2 => l1, f2 => f1
-  character(l2, k2) :: x
   interface
     subroutine s(x, y)
       import f2, l2
@@ -82,7 +81,6 @@ subroutine s(x, y)
 ! use m5a,only:k2=>k1
 ! use m5a,only:l2=>l1
 ! use m5a,only:f2=>f1
-! character(l2,4)::x
 ! interface
 !  subroutine s(x,y)
 !   import::f2
diff --git a/flang/test/Semantics/resolve45.f90 b/flang/test/Semantics/resolve45.f90
index 3e98ff662a171..c2a96915836e7 100644
--- a/flang/test/Semantics/resolve45.f90
+++ b/flang/test/Semantics/resolve45.f90
@@ -68,3 +68,14 @@ subroutine s7
   !ERROR: 'x' appears as a COMMON block in a SAVE statement but not in a COMMON statement
   save /x/
 end
+
+subroutine s8a(n)
+  integer :: n
+  real :: x(n)  ! OK: save statement doesn't affect x
+  save
+end
+subroutine s8b(n)
+  integer :: n
+  !ERROR: SAVE attribute may not be applied to automatic data object 'x'
+  real, save :: x(n)
+end
diff --git a/flang/test/Semantics/resolve77.f90 b/flang/test/Semantics/resolve77.f90
index 8b85db96be3d7..e21de909cf3be 100644
--- a/flang/test/Semantics/resolve77.f90
+++ b/flang/test/Semantics/resolve77.f90
@@ -8,7 +8,7 @@ module m
   interface ifn3
     module procedure if3
   end interface
-  !ERROR: Specification expression must be constant in declaration of 'a' with the SAVE attribute
+  !ERROR: Automatic data object 'a' may not appear in the specification part of a module
   real :: a(if1(1))
   !ERROR: No specific procedure of generic 'ifn2' matches the actual arguments
   real :: b(ifn2(1))

From eed19bd84424d3bcd3cc3d2425ad20ad38a55de3 Mon Sep 17 00:00:00 2001
From: Abhina Sreeskantharajan <Abhina.Sreeskantharajan@ibm.com>
Date: Wed, 15 Jul 2020 14:00:57 -0400
Subject: [PATCH 413/771] [NFC] [AIX] [z/OS] Fix build failure on AIX and z/OS

Summary: This PR contains a build failure fix that occurs on both AIX and z/OS as a result of this commit https://reviews.llvm.org/rG670915094462d831e3733e5b01a76471b8cf6dd8.

Reviewers: uweigand, Kai, hubert.reinterpretcast, daltenty, lhames

Reviewed By: Kai, hubert.reinterpretcast, daltenty

Subscribers: SeanP, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83889
---
 llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index 153f6b80784f0..ff1de3810835c 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -75,11 +75,15 @@ void LazyCallThroughManager::resolveTrampolineLandingAddress(
   if (!Entry)
     return NotifyLandingResolved(reportCallThroughError(Entry.takeError()));
 
+  // Declaring SLS outside of the call to ES.lookup is a workaround to fix build
+  // failures on AIX and on z/OS platforms.
+  SymbolLookupSet SLS({Entry->SymbolName});
+
   ES.lookup(
       LookupKind::Static,
       makeJITDylibSearchOrder(Entry->SourceJD,
                               JITDylibLookupFlags::MatchAllSymbols),
-      SymbolLookupSet({Entry->SymbolName}), SymbolState::Ready,
+      std::move(SLS), SymbolState::Ready,
       [this, TrampolineAddr, SymbolName = Entry->SymbolName,
        NotifyLandingResolved = std::move(NotifyLandingResolved)](
           Expected<SymbolMap> Result) mutable {

From 8b85f68ee2ddd983c027adbda9567f06d25b3c51 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 15 Jul 2020 09:59:54 -0700
Subject: [PATCH 414/771] [lldb/Test] Remove custom tearDownHooks from GDB
 Remote tests

Remove custom tearDownHooks from GDB Remote tests as we now cleanup
subprocesses unconditionally. This also changes the termination order to
be the reverse of the creation order. I don't think anything is relying
on that right now, but it better fits the setup/teardown paradigm.
---
 .../Python/lldbsuite/test/lldbtest.py         |  4 +--
 .../tools/lldb-server/gdbremote_testcase.py   | 31 +------------------
 2 files changed, 3 insertions(+), 32 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/lldbtest.py b/lldb/packages/Python/lldbsuite/test/lldbtest.py
index b1add79e488d9..25805726f9b39 100644
--- a/lldb/packages/Python/lldbsuite/test/lldbtest.py
+++ b/lldb/packages/Python/lldbsuite/test/lldbtest.py
@@ -884,8 +884,8 @@ def setAsync(self, value):
         self.addTearDownHook(lambda: self.dbg.SetAsync(old_async))
 
     def cleanupSubprocesses(self):
-        # Ensure any subprocesses are cleaned up
-        for p in self.subprocesses:
+        # Terminate subprocesses in reverse order from how they were created.
+        for p in reversed(self.subprocesses):
             p.terminate()
             del p
         del self.subprocesses[:]
diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
index ac611bcca1695..0b81912e3d3f9 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
@@ -402,7 +402,6 @@ def launch_debug_monitor(self, attach_pid=None, logfile=None):
             self.debug_monitor_exe,
             commandline_args,
             install_remote=False)
-        self.addTearDownHook(self.cleanupSubprocesses)
         self.assertIsNotNone(server)
 
         # If we're receiving the stub's listening port from the named pipe, do
@@ -418,15 +417,6 @@ def connect_to_debug_monitor(self, attach_pid=None):
             server = self.launch_debug_monitor(attach_pid=attach_pid)
             self.assertIsNotNone(server)
 
-            def shutdown_debug_monitor():
-                try:
-                    server.terminate()
-                except:
-                    logger.warning(
-                        "failed to terminate server for debug monitor: {}; ignoring".format(
-                            sys.exc_info()[0]))
-            self.addTearDownHook(shutdown_debug_monitor)
-
             # Schedule debug monitor to be shut down during teardown.
             logger = self.logger
 
@@ -445,15 +435,6 @@ def shutdown_debug_monitor():
             # Schedule debug monitor to be shut down during teardown.
             logger = self.logger
 
-            def shutdown_debug_monitor():
-                try:
-                    server.terminate()
-                except:
-                    logger.warning(
-                        "failed to terminate server for debug monitor: {}; ignoring".format(
-                            sys.exc_info()[0]))
-            self.addTearDownHook(shutdown_debug_monitor)
-
             connect_attemps = 0
             MAX_CONNECT_ATTEMPTS = 10
 
@@ -506,17 +487,7 @@ def launch_process_for_attach(
         if sleep_seconds:
             args.append("sleep:%d" % sleep_seconds)
 
-        inferior = self.spawnSubprocess(exe_path, args)
-
-        def shutdown_process_for_attach():
-            try:
-                inferior.terminate()
-            except:
-                logger.warning(
-                    "failed to terminate inferior process for attach: {}; ignoring".format(
-                        sys.exc_info()[0]))
-        self.addTearDownHook(shutdown_process_for_attach)
-        return inferior
+        return self.spawnSubprocess(exe_path, args)
 
     def prep_debug_monitor_and_inferior(
             self,

From 140c296ef5144136a00b166384c752c37a176879 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <aminim@google.com>
Date: Wed, 15 Jul 2020 21:11:30 +0000
Subject: [PATCH 415/771] Clarify a bit the guideline on omitting braces,
 including more examples (NFC)

Like most readability rules, it isn't absolute and there is a matter of taste
to it. I think more recent part of the project may be more consistent in the
current application of the guideline. I suspect sources like
mlir/lib/Dialect/StandardOps/IR/Ops.cpp may be examples of this at the moment.

Differential Revision: https://reviews.llvm.org/D82594
---
 llvm/docs/CodingStandards.rst | 94 ++++++++++++++++++++++++++---------
 1 file changed, 71 insertions(+), 23 deletions(-)

diff --git a/llvm/docs/CodingStandards.rst b/llvm/docs/CodingStandards.rst
index 99fb6af02a282..a9884cd9f3b5a 100644
--- a/llvm/docs/CodingStandards.rst
+++ b/llvm/docs/CodingStandards.rst
@@ -1575,27 +1575,28 @@ faraway places in the file to tell that the function is local.
 Don't Use Braces on Simple Single-Statement Bodies of if/else/loop Statements
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-When writing the body of an ``if``, ``else``, or loop statement, omit the braces to
-avoid unnecessary line noise. However, braces should be used in cases where the
-omission of braces harm the readability and maintainability of the code.
-
-Readability is harmed when a single statement is accompanied by a comment that loses
-its meaning if hoisted above the ``if`` or loop statement. Similarly, braces should
-be used when single-statement body is complex enough that it becomes difficult to see
-where the block containing the following statement began. An ``if``/``else`` chain or
-a loop is considered a single statement for this rule, and this rule applies recursively.
-This list is not exhaustive, for example, readability is also harmed if an
-``if``/``else`` chain starts using braced bodies partway through and does not continue
-on with braced bodies.
+When writing the body of an ``if``, ``else``, or loop statement, we prefer to
+omit the braces to avoid unnecessary line noise. However, braces should be used
+in cases where the omission of braces harm the readability and maintainability
+of the code.
+
+We consider that readability is harmed when omitting the brace in the presence
+of a single statement that is accompanied by a comment (assuming the comment
+can't be hoisted above the ``if`` or loop statement, see below).
+Similarly, braces should be used when a single-statement body is complex enough
+that it becomes difficult to see where the block containing the following
+statement began. An ``if``/``else`` chain or a loop is considered a single
+statement for this rule, and this rule applies recursively.
 
-Maintainability is harmed if the body of an ``if`` ends with a (directly or indirectly)
-nested ``if`` statement with no ``else``. Braces on the outer ``if`` would help to avoid
-running into a "dangling else" situation.
+This list is not exhaustive, for example, readability is also harmed if an
+``if``/``else`` chain does not use braced bodies for either all or none of its
+members, with complex conditionals, deep nesting, etc. The examples below
+intend to provide some guidelines.
 
+Maintainability is harmed if the body of an ``if`` ends with a (directly or
+indirectly) nested ``if`` statement with no ``else``. Braces on the outer ``if``
+would help to avoid running into a "dangling else" situation.
 
-Note that comments should only be hoisted for loops and
-``if``, and not in ``else if`` or ``else``, where it would be unclear whether the comment
-belonged to the preceeding condition, or the ``else``.
 
 .. code-block:: c++
 
@@ -1604,20 +1605,67 @@ belonged to the preceeding condition, or the ``else``.
     handleFunctionDecl(D);
   else if (isa<VarDecl>(D))
     handleVarDecl(D);
-  else {
+
+
+  // Here we document the condition itself and not the body.
+  if (isa<VarDecl>(D)) {
+    // It is necessary that we explain the situation with this surprisingly long
+    // comment, so it would be unclear without the braces whether the following
+    // statement is in the scope of the `if`.
+    // Because the condition is documented, we can't really hoist this
+    // comment that applies to the body above the if.
+    handleOtherDecl(D);
+  }
+
+  // Use braces on the outer `if` to avoid a potential dangling else situation.
+  if (isa<VarDecl>(D)) {
+    for (auto *A : D.attrs())
+      if (shouldProcessAttr(A))
+        handleAttr(A);
+  }
+
+  // Use braces for the `if` block to keep it uniform with the else block.
+  if (isa<FunctionDecl>(D)) {
+    handleFunctionDecl(D);
+  } else {
     // In this else case, it is necessary that we explain the situation with this
     // surprisingly long comment, so it would be unclear without the braces whether
-    // the following statement is in the scope of the else.
+    // the following statement is in the scope of the `if`.
     handleOtherDecl(D);
   }
 
-  // This should also omit braces.  The for loop contains only a single statement,
-  // so it shouldn't have braces.  The if also only contains a single statement (the
-  // for loop), so it also should omit braces.
+  // This should also omit braces.  The `for` loop contains only a single statement,
+  // so it shouldn't have braces.  The `if` also only contains a single simple
+  // statement (the for loop), so it also should omit braces.
   if (isa<FunctionDecl>(D))
     for (auto *A : D.attrs())
       handleAttr(A);
 
+  // Use braces for the outer `if` since the nested `for` is braced.
+  if (isa<FunctionDecl>(D)) {
+    for (auto *A : D.attrs()) {
+      // In this for loop body, it is necessary that we explain the situation
+      // with this surprisingly long comment, forcing braces on the `for` block.
+      handleAttr(A);
+    }
+  }
+
+  // Use braces on the outer block because there are more than two levels of nesting.
+  if (isa<FunctionDecl>(D)) {
+    for (auto *A : D.attrs())
+      for (ssize_t i : llvm::seq<ssize_t>(count))
+         handleAttrOnDecl(D, A, i);
+  }
+
+  // Use braces on the outer block because of a nested `if`, otherwise the
+  // compiler would warn: `add explicit braces to avoid dangling else`
+  if (auto *D = dyn_cast<FunctionDecl>(D)) {
+    if (shouldProcess(D))
+      handleVarDecl(D);
+    else
+      markAsIgnored(D);
+  }
+
 
 See Also
 ========

From 221979b6913667dc52b03b181a9349c1b73dbd52 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <aminim@google.com>
Date: Wed, 15 Jul 2020 21:11:39 +0000
Subject: [PATCH 416/771] Document the testing of Analyses in the LLVM testing
 guide (NFC)

This came up in a recent review, someone was wondering were was
this all documented and I couldn't find a reference to provide.

Differential Revision: https://reviews.llvm.org/D83816
---
 llvm/docs/TestingGuide.rst | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/llvm/docs/TestingGuide.rst b/llvm/docs/TestingGuide.rst
index c8ee65f132f99..2e937f0006272 100644
--- a/llvm/docs/TestingGuide.rst
+++ b/llvm/docs/TestingGuide.rst
@@ -47,6 +47,9 @@ Unit tests
 Unit tests are written using `Google Test <https://github.com/google/googletest/blob/master/googletest/docs/primer.md>`_
 and `Google Mock <https://github.com/google/googletest/blob/master/googlemock/docs/for_dummies.md>`_
 and are located in the ``llvm/unittests`` directory.
+In general unit tests are reserved for targeting the support library and other
+generic data structure, we prefer relying on regression tests for testing
+transformations and analysis on the IR.
 
 Regression tests
 ----------------
@@ -62,6 +65,17 @@ enough code to reproduce the problem should be written and placed
 somewhere underneath this directory. For example, it can be a small
 piece of LLVM IR distilled from an actual application or benchmark.
 
+Testing Analysis
+----------------
+
+An analysis is a pass that infer properties on some part of the IR and not
+transforming it. They are tested in general using the same infrastructure as the
+regression tests, by creating a separate "Printer" pass to consume the analysis
+result and print it on the standard output in a textual format suitable for
+FileCheck.
+See `llvm/test/Analysis/BranchProbabilityInfo/loop.ll <https://github.com/llvm/llvm-project/blob/master/llvm/test/Analysis/BranchProbabilityInfo/loop.ll>`_
+for an example of such test.
+
 ``test-suite``
 --------------
 
@@ -151,7 +165,7 @@ script which is built as part of LLVM. For example, to run the
 
 .. code-block:: bash
 
-    % llvm-lit ~/llvm/test/Integer/BitPacked.ll 
+    % llvm-lit ~/llvm/test/Integer/BitPacked.ll
 
 or to run all of the ARM CodeGen tests:
 
@@ -184,7 +198,7 @@ Writing new regression tests
 ----------------------------
 
 The regression test structure is very simple, but does require some
-information to be set. This information is gathered via ``configure``
+information to be set. This information is gathered via ``cmake``
 and is written to a file, ``test/lit.site.cfg`` in the build directory.
 The ``llvm/test`` Makefile does this work for you.
 
@@ -426,7 +440,7 @@ will be a failure if its execution succeeds.
 ``REQUIRES`` and ``UNSUPPORTED`` and ``XFAIL`` all accept a comma-separated
 list of boolean expressions. The values in each expression may be:
 
-- Features added to ``config.available_features`` by 
+- Features added to ``config.available_features`` by
   configuration files such as ``lit.cfg``.
 - Substrings of the target triple (``UNSUPPORTED`` and ``XFAIL`` only).
 
@@ -491,7 +505,7 @@ RUN lines:
   character with a ``/``. This is useful to normalize path separators.
 
    Example: ``%s:  C:\Desktop Files/foo_test.s.tmp``
-   
+
    Example: ``%/s: C:/Desktop Files/foo_test.s.tmp``
 
 ``%:s, %:S, %:t, %:T:``

From ef454c5444edf564557cbe226dd12bc9e2aae892 Mon Sep 17 00:00:00 2001
From: Jared Wyles <jared.wyles@gmail.com>
Date: Tue, 14 Jul 2020 16:57:59 +1000
Subject: [PATCH 417/771] [jitlink] Adding support for PCRel32GOTLoad in ELF
 x86 for the jitlinker

Summary: This adds the basic support for GOT in elf x86.
Was able to just get away using the macho code by generalising the edges.
There will be a follow up patch to turn that into a generic utility for both of the x86 and Mach-O code.

This patch also lands support for relocations relative to symbol.

Reviewers: lhames

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83748
---
 .../llvm/ExecutionEngine/JITLink/ELF_x86_64.h |   2 +
 .../ExecutionEngine/JITLink/ELF_x86_64.cpp    | 271 ++++++++++++++++--
 .../JITLink/X86/ELF_x86-64_relocations.s      |   1 +
 3 files changed, 248 insertions(+), 26 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
index 7860088f35692..1e1e282a89972 100644
--- a/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
+++ b/llvm/include/llvm/ExecutionEngine/JITLink/ELF_x86_64.h
@@ -46,6 +46,8 @@ enum ELFX86RelocationKind : Edge::Kind {
 
 /// jit-link the given object buffer, which must be a ELF x86-64 object file.
 void jitLink_ELF_x86_64(std::unique_ptr<JITLinkContext> Ctx);
+/// Return the string name of the given ELF x86-64 edge kind.
+StringRef getELFX86RelocationKindName(Edge::Kind R);
 } // end namespace jitlink
 } // end namespace llvm
 
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
index 505f03590b6b0..48bca4502920b 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
@@ -11,17 +11,192 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/ExecutionEngine/JITLink/ELF_x86_64.h"
+#include "BasicGOTAndStubsBuilder.h"
 #include "JITLinkGeneric.h"
 #include "llvm/ExecutionEngine/JITLink/JITLink.h"
 #include "llvm/Object/ELFObjectFile.h"
+#include "llvm/Support/Endian.h"
 
 #define DEBUG_TYPE "jitlink"
 
 using namespace llvm;
 using namespace llvm::jitlink;
+using namespace llvm::jitlink::ELF_x86_64_Edges;
+
+class ELF_x86_64_GOTAndStubsBuilder
+    : public BasicGOTAndStubsBuilder<ELF_x86_64_GOTAndStubsBuilder> {
+public:
+  static const uint8_t NullGOTEntryContent[8];
+  static const uint8_t StubContent[6];
+
+  ELF_x86_64_GOTAndStubsBuilder(LinkGraph &G)
+      : BasicGOTAndStubsBuilder<ELF_x86_64_GOTAndStubsBuilder>(G) {}
+
+  bool isGOTEdge(Edge &E) const {
+    return E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad;
+  }
+
+  Symbol &createGOTEntry(Symbol &Target) {
+    auto &GOTEntryBlock = G.createContentBlock(
+        getGOTSection(), getGOTEntryBlockContent(), 0, 8, 0);
+    GOTEntryBlock.addEdge(Pointer64, 0, Target, 0);
+    return G.addAnonymousSymbol(GOTEntryBlock, 0, 8, false, false);
+  }
+
+  void fixGOTEdge(Edge &E, Symbol &GOTEntry) {
+    assert((E.getKind() == PCRel32GOT || E.getKind() == PCRel32GOTLoad) &&
+           "Not a GOT edge?");
+    // If this is a PCRel32GOT then change it to an ordinary PCRel32. If it is
+    // a PCRel32GOTLoad then leave it as-is for now. We will use the kind to
+    // check for GOT optimization opportunities in the
+    // optimizeMachO_x86_64_GOTAndStubs pass below.
+    if (E.getKind() == PCRel32GOT)
+      E.setKind(PCRel32);
+
+    E.setTarget(GOTEntry);
+    // Leave the edge addend as-is.
+  }
+
+  bool isExternalBranchEdge(Edge &E) {
+    return E.getKind() == Branch32 && !E.getTarget().isDefined();
+  }
+
+  Symbol &createStub(Symbol &Target) {
+    auto &StubContentBlock =
+        G.createContentBlock(getStubsSection(), getStubBlockContent(), 0, 1, 0);
+    // Re-use GOT entries for stub targets.
+    auto &GOTEntrySymbol = getGOTEntrySymbol(Target);
+    StubContentBlock.addEdge(PCRel32, 2, GOTEntrySymbol, 0);
+    return G.addAnonymousSymbol(StubContentBlock, 0, 6, true, false);
+  }
+
+  void fixExternalBranchEdge(Edge &E, Symbol &Stub) {
+    assert(E.getKind() == Branch32 && "Not a Branch32 edge?");
+    assert(E.getAddend() == 0 && "Branch32 edge has non-zero addend?");
+
+    // Set the edge kind to Branch32ToStub. We will use this to check for stub
+    // optimization opportunities in the optimize ELF_x86_64_GOTAndStubs pass
+    // below.
+    E.setKind(Branch32ToStub);
+    E.setTarget(Stub);
+  }
+
+private:
+  Section &getGOTSection() {
+    if (!GOTSection)
+      GOTSection = &G.createSection("$__GOT", sys::Memory::MF_READ);
+    return *GOTSection;
+  }
+
+  Section &getStubsSection() {
+    if (!StubsSection) {
+      auto StubsProt = static_cast<sys::Memory::ProtectionFlags>(
+          sys::Memory::MF_READ | sys::Memory::MF_EXEC);
+      StubsSection = &G.createSection("$__STUBS", StubsProt);
+    }
+    return *StubsSection;
+  }
+
+  StringRef getGOTEntryBlockContent() {
+    return StringRef(reinterpret_cast<const char *>(NullGOTEntryContent),
+                     sizeof(NullGOTEntryContent));
+  }
+
+  StringRef getStubBlockContent() {
+    return StringRef(reinterpret_cast<const char *>(StubContent),
+                     sizeof(StubContent));
+  }
+
+  Section *GOTSection = nullptr;
+  Section *StubsSection = nullptr;
+};
+
+const uint8_t ELF_x86_64_GOTAndStubsBuilder::NullGOTEntryContent[8] = {
+    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
+const uint8_t ELF_x86_64_GOTAndStubsBuilder::StubContent[6] = {
+    0xFF, 0x25, 0x00, 0x00, 0x00, 0x00};
 
 static const char *CommonSectionName = "__common";
+static Error optimizeELF_x86_64_GOTAndStubs(LinkGraph &G) {
+  LLVM_DEBUG(dbgs() << "Optimizing GOT entries and stubs:\n");
+
+  for (auto *B : G.blocks())
+    for (auto &E : B->edges())
+      if (E.getKind() == PCRel32GOTLoad) {
+        assert(E.getOffset() >= 3 && "GOT edge occurs too early in block");
+
+        // Switch the edge kind to PCRel32: Whether we change the edge target
+        // or not this will be the desired kind.
+        E.setKind(PCRel32);
+
+        // Optimize GOT references.
+        auto &GOTBlock = E.getTarget().getBlock();
+        assert(GOTBlock.getSize() == G.getPointerSize() &&
+               "GOT entry block should be pointer sized");
+        assert(GOTBlock.edges_size() == 1 &&
+               "GOT entry should only have one outgoing edge");
+
+        auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
+        JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
+        JITTargetAddress TargetAddr = GOTTarget.getAddress();
+
+        // Check that this is a recognized MOV instruction.
+        // FIXME: Can we assume this?
+        constexpr uint8_t MOVQRIPRel[] = {0x48, 0x8b};
+        if (strncmp(B->getContent().data() + E.getOffset() - 3,
+                    reinterpret_cast<const char *>(MOVQRIPRel), 2) != 0)
+          continue;
+
+        int64_t Displacement = TargetAddr - EdgeAddr + 4;
+        if (Displacement >= std::numeric_limits<int32_t>::min() &&
+            Displacement <= std::numeric_limits<int32_t>::max()) {
+          E.setTarget(GOTTarget);
+          auto *BlockData = reinterpret_cast<uint8_t *>(
+              const_cast<char *>(B->getContent().data()));
+          BlockData[E.getOffset() - 2] = 0x8d;
+          LLVM_DEBUG({
+            dbgs() << "  Replaced GOT load wih LEA:\n    ";
+            printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind()));
+            dbgs() << "\n";
+          });
+        }
+      } else if (E.getKind() == Branch32ToStub) {
+
+        // Switch the edge kind to PCRel32: Whether we change the edge target
+        // or not this will be the desired kind.
+        E.setKind(Branch32);
+
+        auto &StubBlock = E.getTarget().getBlock();
+        assert(StubBlock.getSize() ==
+                   sizeof(ELF_x86_64_GOTAndStubsBuilder::StubContent) &&
+               "Stub block should be stub sized");
+        assert(StubBlock.edges_size() == 1 &&
+               "Stub block should only have one outgoing edge");
+
+        auto &GOTBlock = StubBlock.edges().begin()->getTarget().getBlock();
+        assert(GOTBlock.getSize() == G.getPointerSize() &&
+               "GOT block should be pointer sized");
+        assert(GOTBlock.edges_size() == 1 &&
+               "GOT block should only have one outgoing edge");
+
+        auto &GOTTarget = GOTBlock.edges().begin()->getTarget();
+        JITTargetAddress EdgeAddr = B->getAddress() + E.getOffset();
+        JITTargetAddress TargetAddr = GOTTarget.getAddress();
+
+        int64_t Displacement = TargetAddr - EdgeAddr + 4;
+        if (Displacement >= std::numeric_limits<int32_t>::min() &&
+            Displacement <= std::numeric_limits<int32_t>::max()) {
+          E.setTarget(GOTTarget);
+          LLVM_DEBUG({
+            dbgs() << "  Replaced stub branch with direct branch:\n    ";
+            printEdge(dbgs(), *B, E, getELFX86RelocationKindName(E.getKind()));
+            dbgs() << "\n";
+          });
+        }
+      }
 
+  return Error::success();
+}
 namespace llvm {
 namespace jitlink {
 
@@ -35,7 +210,8 @@ class ELFLinkGraphBuilder_x86_64 {
   // Find a better way
   using SymbolTable = object::ELFFile<object::ELF64LE>::Elf_Shdr;
   // For now we just assume
-  std::map<int32_t, Symbol *> JITSymbolTable;
+  using SymbolMap = std::map<int32_t, Symbol *>;
+  SymbolMap JITSymbolTable;
 
   Section &getCommonSection() {
     if (!CommonSection) {
@@ -51,6 +227,10 @@ class ELFLinkGraphBuilder_x86_64 {
     switch (Type) {
     case ELF::R_X86_64_PC32:
       return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32;
+    case ELF::R_X86_64_64:
+      return ELF_x86_64_Edges::ELFX86RelocationKind::Pointer64;
+    case ELF::R_X86_64_GOTPCREL:
+      return ELF_x86_64_Edges::ELFX86RelocationKind::PCRel32GOTLoad;
     }
     return make_error<JITLinkError>("Unsupported x86-64 relocation:" +
                                     formatv("{0:d}", Type));
@@ -101,10 +281,6 @@ class ELFLinkGraphBuilder_x86_64 {
 
       for (auto SymRef : *Symbols) {
         Optional<StringRef> Name;
-        uint64_t Size = 0;
-
-        // FIXME: Read size.
-        (void)Size;
 
         if (auto NameOrErr = SymRef.getName(*StringTable))
           Name = *NameOrErr;
@@ -120,7 +296,8 @@ class ELFLinkGraphBuilder_x86_64 {
           dbgs() << ": value = " << formatv("{0:x16}", SymRef.getValue())
                  << ", type = " << formatv("{0:x2}", SymRef.getType())
                  << ", binding = " << SymRef.getBinding()
-                 << ", size =" << Size;
+                 << ", size =" << SymRef.st_size
+                 << ", info =" << SymRef.st_info;
           dbgs() << "\n";
         });
       }
@@ -147,8 +324,8 @@ class ELFLinkGraphBuilder_x86_64 {
       uint64_t Flags = SecRef.sh_flags;
       uint64_t Alignment = SecRef.sh_addralign;
       const char *Data = nullptr;
-      // TODO: figure out what it is that has 0 size no name and address
-      // 0000-0000
+      // for now we just use this to skip the "undefined" section, probably need
+      // to revist
       if (Size == 0)
         continue;
 
@@ -229,13 +406,22 @@ class ELFLinkGraphBuilder_x86_64 {
           dbgs() << "Relocation Type: " << Type << "\n"
                  << "Name: " << Obj.getRelocationTypeName(Type) << "\n";
         });
-
+        auto SymbolIndex = Rela.getSymbol(false);
         auto Symbol = Obj.getRelocationSymbol(&Rela, &SymTab);
         if (!Symbol)
           return Symbol.takeError();
 
         auto BlockToFix = *(JITSection->blocks().begin());
-        auto TargetSymbol = JITSymbolTable[(*Symbol)->st_shndx];
+        auto *TargetSymbol = JITSymbolTable[SymbolIndex];
+
+        if (!TargetSymbol) {
+          return make_error<llvm::StringError>(
+              "Could not find symbol at given index, did you add it to "
+              "JITSymbolTable? index: " +
+                  std::to_string((*Symbol)->st_shndx) +
+                  " Size of table: " + std::to_string(JITSymbolTable.size()),
+              llvm::inconvertibleErrorCode());
+        }
         uint64_t Addend = Rela.r_addend;
         JITTargetAddress FixupAddress =
             (*UpdateSection)->sh_addr + Rela.r_offset;
@@ -251,8 +437,8 @@ class ELFLinkGraphBuilder_x86_64 {
         LLVM_DEBUG({
           Edge GE(*Kind, FixupAddress - BlockToFix->getAddress(), *TargetSymbol,
                   Addend);
-          // TODO a mapping of KIND => type then call getRelocationTypeName4
-          printEdge(dbgs(), *BlockToFix, GE, StringRef(""));
+          printEdge(dbgs(), *BlockToFix, GE,
+                    getELFX86RelocationKindName(*Kind));
           dbgs() << "\n";
         });
         BlockToFix->addEdge(*Kind, FixupAddress - BlockToFix->getAddress(),
@@ -299,10 +485,12 @@ class ELFLinkGraphBuilder_x86_64 {
       if (blocks.empty())
         return make_error<llvm::StringError>("Section has no block",
                                              llvm::inconvertibleErrorCode());
-
+      int SymbolIndex = -1;
       for (auto SymRef : *Symbols) {
+        ++SymbolIndex;
         auto Type = SymRef.getType();
-        if (Type == ELF::STT_NOTYPE || Type == ELF::STT_FILE)
+
+        if (Type == ELF::STT_FILE || SymbolIndex == 0)
           continue;
         // these should do it for now
         // if(Type != ELF::STT_NOTYPE &&
@@ -324,7 +512,8 @@ class ELFLinkGraphBuilder_x86_64 {
           bindings = {Linkage::Strong, Scope::Local};
 
         if (SymRef.isDefined() &&
-            (Type == ELF::STT_FUNC || Type == ELF::STT_OBJECT)) {
+            (Type == ELF::STT_FUNC || Type == ELF::STT_OBJECT ||
+             Type == ELF::STT_SECTION)) {
 
           auto DefinedSection = Obj.getSection(SymRef.st_shndx);
           if (!DefinedSection)
@@ -344,13 +533,19 @@ class ELFLinkGraphBuilder_x86_64 {
 
           auto B = *bs.begin();
           LLVM_DEBUG({ dbgs() << "  " << *Name << ": "; });
-
+          if (SymRef.getType() == ELF::STT_SECTION)
+            *Name = *sectName;
           auto &S = G->addDefinedSymbol(
               *B, SymRef.getValue(), *Name, SymRef.st_size, bindings.first,
               bindings.second, SymRef.getType() == ELF::STT_FUNC, false);
-          JITSymbolTable[SymRef.st_shndx] = &S;
+          JITSymbolTable[SymbolIndex] = &S;
+        } else if (SymRef.isUndefined() && SymRef.isExternal()) {
+          auto &S = G->addExternalSymbol(*Name, SymRef.st_size, bindings.first);
+          JITSymbolTable[SymbolIndex] = &S;
         }
-        //TODO: The following has to be implmented.
+
+        //  }
+        // TODO: The following has to be implmented.
         // leaving commented out to save time for future patchs
         /*
           G->addAbsoluteSymbol(*Name, SymRef.getValue(), SymRef.st_size,
@@ -360,9 +555,6 @@ class ELFLinkGraphBuilder_x86_64 {
             G->addCommonSymbol(*Name, Scope::Default, getCommonSection(), 0, 0,
           SymRef.getValue(), false);
           }
-
-
-          //G->addExternalSymbol(*Name, SymRef.st_size, Linkage::Strong);
   */
       }
     }
@@ -413,7 +605,9 @@ class ELFJITLinker_x86_64 : public JITLinker<ELFJITLinker_x86_64> {
       : JITLinker(std::move(Ctx), std::move(PassConfig)) {}
 
 private:
-  StringRef getEdgeKindName(Edge::Kind R) const override { return StringRef(); }
+  StringRef getEdgeKindName(Edge::Kind R) const override {
+    return getELFX86RelocationKindName(R);
+  }
 
   Expected<std::unique_ptr<LinkGraph>>
   buildGraph(MemoryBufferRef ObjBuffer) override {
@@ -430,16 +624,21 @@ class ELFJITLinker_x86_64 : public JITLinker<ELFJITLinker_x86_64> {
 
   Error applyFixup(Block &B, const Edge &E, char *BlockWorkingMem) const {
     using namespace ELF_x86_64_Edges;
+    using namespace llvm::support;
     char *FixupPtr = BlockWorkingMem + E.getOffset();
     JITTargetAddress FixupAddress = B.getAddress() + E.getOffset();
     switch (E.getKind()) {
-
-    case ELFX86RelocationKind::PCRel32:
+    case ELFX86RelocationKind::PCRel32: {
       int64_t Value = E.getTarget().getAddress() + E.getAddend() - FixupAddress;
-      // verify
-      *(support::little32_t *)FixupPtr = Value;
+      endian::write32le(FixupPtr, Value);
+      break;
+    }
+    case ELFX86RelocationKind::Pointer64: {
+      int64_t Value = E.getTarget().getAddress() + E.getAddend();
+      endian::write64le(FixupPtr, Value);
       break;
     }
+    }
     return Error::success();
   }
 };
@@ -454,10 +653,30 @@ void jitLink_ELF_x86_64(std::unique_ptr<JITLinkContext> Ctx) {
   else
     Config.PrePrunePasses.push_back(markAllSymbolsLive);
 
+  // Add an in-place GOT/Stubs pass.
+  Config.PostPrunePasses.push_back([](LinkGraph &G) -> Error {
+    ELF_x86_64_GOTAndStubsBuilder(G).run();
+    return Error::success();
+  });
+
+  // Add GOT/Stubs optimizer pass.
+  Config.PostAllocationPasses.push_back(optimizeELF_x86_64_GOTAndStubs);
+
   if (auto Err = Ctx->modifyPassConfig(TT, Config))
     return Ctx->notifyFailed(std::move(Err));
 
   ELFJITLinker_x86_64::link(std::move(Ctx), std::move(Config));
 }
+StringRef getELFX86RelocationKindName(Edge::Kind R) {
+  switch (R) {
+  case PCRel32:
+    return "PCRel32";
+  case Pointer64:
+    return "Pointer64";
+  case PCRel32GOTLoad:
+    return "PCRel32GOTLoad";
+  }
+  return getGenericEdgeKindName(static_cast<Edge::Kind>(R));
+}
 } // end namespace jitlink
 } // end namespace llvm
diff --git a/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s b/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s
index 20b26b1826ba9..0eee9a449cea9 100644
--- a/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s
+++ b/llvm/test/ExecutionEngine/JITLink/X86/ELF_x86-64_relocations.s
@@ -28,6 +28,7 @@ test_pcrel32:
 
         .type   named_data,@object
         .data
+	.globl named_data
         .p2align        2
 named_data:
         .long   42

From ce4459a0db91456ff98b9b43018833655367a59e Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 15 Jul 2020 23:38:43 +0300
Subject: [PATCH 418/771] [NFC][LoopRotate] Add a statistic for how many times
 rotation failed due to the header size

---
 llvm/lib/Transforms/Utils/LoopRotationUtils.cpp | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
index da1c09e6ebca9..d3151bb91047a 100644
--- a/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -44,6 +44,8 @@ using namespace llvm;
 
 #define DEBUG_TYPE "loop-rotate"
 
+STATISTIC(NumNotRotatedDueToHeaderSize,
+          "Number of loops not rotated due to the header size");
 STATISTIC(NumRotated, "Number of loops rotated");
 
 static cl::opt<bool>
@@ -320,6 +322,7 @@ bool LoopRotate::rotateLoop(Loop *L, bool SimplifiedLatch) {
                           << " instructions, which is more than the threshold ("
                           << MaxHeaderSize << " instructions): ";
                    L->dump());
+        ++NumNotRotatedDueToHeaderSize;
         return Rotated;
       }
     }

From 702a3c6410ab4ec89e4e137c58e59505e1860ba0 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 15 Jul 2020 23:42:40 +0300
Subject: [PATCH 419/771] [NFC][SimplifyCFG] Rename statistic NumSinkCommons
 into NumSinkCommonInstrs

It really counts instructions added into common block,
not number of instruction groups sunk.
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index b450d71c996cb..3a9326a1d120a 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -147,7 +147,7 @@ STATISTIC(
     NumLookupTablesHoles,
     "Number of switch instructions turned into lookup tables (holes checked)");
 STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
-STATISTIC(NumSinkCommons,
+STATISTIC(NumSinkCommonInstrs,
           "Number of common instructions sunk down to the end block");
 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
 
@@ -1892,7 +1892,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
 
     if (!sinkLastInstruction(UnconditionalPreds))
       return Changed;
-    NumSinkCommons++;
+    NumSinkCommonInstrs++;
     Changed = true;
   }
   return Changed;

From 4c798644881abc4c43a7cdbc5df465fff04d03e3 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 15 Jul 2020 23:46:35 +0300
Subject: [PATCH 420/771] [NFC][SimplifyCFG] SinkCommonCodeFromPredecessors():
 early return if nothing to sink

If we can't sink even one instruction, early return, to increase readability.
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3a9326a1d120a..74c7f0db01c2d 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1800,7 +1800,6 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
   if (UnconditionalPreds.size() < 2)
     return false;
 
-  bool Changed = false;
   // We take a two-step approach to tail sinking. First we scan from the end of
   // each block upwards in lockstep. If the n'th instruction from the end of each
   // block can be sunk, those instructions are added to ValuesToSink and we
@@ -1820,6 +1819,12 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
     --LRI;
   }
 
+  // If no instructions can be sunk, early-return.
+  if (ScanIdx == 0)
+    return false;
+
+  bool Changed = false;
+
   auto ProfitableToSinkInstruction = [&](LockstepReverseIterator &LRI) {
     unsigned NumPHIdValues = 0;
     for (auto *I : *LRI)
@@ -1834,7 +1839,7 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
     return NumPHIInsts <= 1;
   };
 
-  if (ScanIdx > 0 && Cond) {
+  if (Cond) {
     // Check if we would actually sink anything first! This mutates the CFG and
     // adds an extra block. The goal in doing this is to allow instructions that
     // couldn't be sunk before to be sunk - obviously, speculatable instructions

From 9ed65c76c04944e985ca8cde64e16919af0e26b0 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 15 Jul 2020 23:48:31 +0300
Subject: [PATCH 421/771] [NFC][SimplifyCFG] SinkCommonCodeFromPredecessors():
 add debug output when failing to actually sink instr

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 74c7f0db01c2d..58995c4f3bc9f 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1895,8 +1895,12 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
       break;
     }
 
-    if (!sinkLastInstruction(UnconditionalPreds))
+    if (!sinkLastInstruction(UnconditionalPreds)) {
+      LLVM_DEBUG(
+          dbgs()
+          << "SINK: stopping here, failed to actually sink instruction!\n");
       return Changed;
+    }
     NumSinkCommonInstrs++;
     Changed = true;
   }

From 3fc1defc0b28d9b0ac8917629716181c0ac8df07 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Wed, 15 Jul 2020 23:55:15 +0300
Subject: [PATCH 422/771] [NFC][SimplifyCFG] SinkCommonCodeFromPredecessors():
 count number of instruction "blocks" actually sunk

Out of all the times the function was called,
how many times did we actually sink anything?
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 58995c4f3bc9f..a1483537afd77 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -147,6 +147,8 @@ STATISTIC(
     NumLookupTablesHoles,
     "Number of switch instructions turned into lookup tables (holes checked)");
 STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
+STATISTIC(NumSinkCommonCode,
+          "Number of common instruction 'blocks' sunk down to the end block");
 STATISTIC(NumSinkCommonInstrs,
           "Number of common instructions sunk down to the end block");
 STATISTIC(NumSpeculations, "Number of speculative executed instructions");
@@ -1880,7 +1882,8 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
   // sink presuming a later value will also be sunk, but stop half way through
   // and never actually sink it which means we produce more PHIs than intended.
   // This is unlikely in practice though.
-  for (unsigned SinkIdx = 0; SinkIdx != ScanIdx; ++SinkIdx) {
+  unsigned SinkIdx = 0;
+  for (; SinkIdx != ScanIdx; ++SinkIdx) {
     LLVM_DEBUG(dbgs() << "SINK: Sink: "
                       << *UnconditionalPreds[0]->getTerminator()->getPrevNode()
                       << "\n");
@@ -1899,11 +1902,14 @@ static bool SinkCommonCodeFromPredecessors(BasicBlock *BB) {
       LLVM_DEBUG(
           dbgs()
           << "SINK: stopping here, failed to actually sink instruction!\n");
-      return Changed;
+      break;
     }
+
     NumSinkCommonInstrs++;
     Changed = true;
   }
+  if (SinkIdx != 0)
+    ++NumSinkCommonCode;
   return Changed;
 }
 

From 7b53ad88d4a93df469550b5cfdb85c3058260034 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 00:05:25 +0300
Subject: [PATCH 423/771] [NFC][SimplifyCFG] HoistThenElseCodeToIf(): count
 number of common instructions hoisted

---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index a1483537afd77..3befb7cc23f8f 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -147,6 +147,8 @@ STATISTIC(
     NumLookupTablesHoles,
     "Number of switch instructions turned into lookup tables (holes checked)");
 STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
+STATISTIC(NumHoistCommonInstrs,
+          "Number of common instructions hoisted up to the begin block");
 STATISTIC(NumSinkCommonCode,
           "Number of common instruction 'blocks' sunk down to the end block");
 STATISTIC(NumSinkCommonInstrs,
@@ -1355,6 +1357,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
       I2->eraseFromParent();
       Changed = true;
     }
+    ++NumHoistCommonInstrs;
 
     I1 = &*BB1_Itr++;
     I2 = &*BB2_Itr++;
@@ -1409,6 +1412,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
     I2->replaceAllUsesWith(NT);
     NT->takeName(I1);
   }
+  ++NumHoistCommonInstrs;
 
   // Ensure terminator gets a debug location, even an unknown one, in case
   // it involves inlinable calls.

From 1cfc24fd670f2b368670432b2d83fd706b8d98fc Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 00:10:20 +0300
Subject: [PATCH 424/771] [NFC][SimplifyCFG] HoistThenElseCodeToIf(): count
 number of common instruction "blocks" hoisted

I.e. out of all the times HoistThenElseCodeToIf() was called,
how many times did it actually hoist something?
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 3befb7cc23f8f..94445fe0c4d88 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/STLExtras.h"
+#include "llvm/ADT/ScopeExit.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallPtrSet.h"
@@ -147,6 +148,9 @@ STATISTIC(
     NumLookupTablesHoles,
     "Number of switch instructions turned into lookup tables (holes checked)");
 STATISTIC(NumTableCmpReuses, "Number of reused switch table lookup compares");
+STATISTIC(
+    NumHoistCommonCode,
+    "Number of common instruction 'blocks' hoisted up to the begin block");
 STATISTIC(NumHoistCommonInstrs,
           "Number of common instructions hoisted up to the begin block");
 STATISTIC(NumSinkCommonCode,
@@ -1289,6 +1293,12 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
   BasicBlock *BIParent = BI->getParent();
 
   bool Changed = false;
+
+  auto _ = make_scope_exit([&]() {
+    if (Changed)
+      ++NumHoistCommonCode;
+  });
+
   do {
     // If we are hoisting the terminator instruction, don't move one (making a
     // broken BB), instead clone it, and remove BI.

From 628288658c5ba14923e1be566b64bada650e35bc Mon Sep 17 00:00:00 2001
From: Stephen Neuendorffer <stephen.neuendorffer@xilinx.com>
Date: Fri, 15 May 2020 10:33:13 -0700
Subject: [PATCH 425/771] [MLIR] Add RegionKindInterface

Some dialects have semantics which is not well represented by common
SSA structures with dominance constraints.  This patch allows
operations to declare the 'kind' of their contained regions.
Currently, two kinds are allowed: "SSACFG" and "Graph".  The only
difference between them at the moment is that SSACFG regions are
required to have dominance, while Graph regions are not required to
have dominance.  The intention is that this Interface would be
generated by ODS for existing operations, although this has not yet
been implemented. Presumably, if someone were interested in code
generation, we might also have a "CFG" dialect, which defines control
flow, but does not require SSA.

The new behavior is mostly identical to the previous behavior, since
registered operations without a RegionKindInterface are assumed to
contain SSACFG regions.  However, the behavior has changed for
unregistered operations.  Previously, these were checked for
dominance, however the new behavior allows dominance violations, in
order to allow the processing of unregistered dialects with Graph
regions.  One implication of this is that regions in unregistered
operations with more than one op are no longer CSE'd (since it
requires dominance info).

I've also reorganized the LangRef documentation to remove assertions
about "sequential execution", "SSA Values", and "Dominance".  Instead,
the core IR is simply "ordered" (i.e. totally ordered) and consists of
"Values".  I've also clarified some things about how control flow
passes between blocks in an SSACFG region. Control Flow must enter a
region at the entry block and follow terminator operation successors
or be returned to the containing op.  Graph regions do not define a
notion of control flow.

see discussion here:
https://llvm.discourse.group/t/rfc-allowing-dialects-to-relax-the-ssa-dominance-condition/833/53

Differential Revision: https://reviews.llvm.org/D80358
---
 mlir/docs/Interfaces.md                     |  26 ++
 mlir/docs/LangRef.md                        | 445 +++++++++++++-------
 mlir/include/mlir/IR/CMakeLists.txt         |   1 +
 mlir/include/mlir/IR/Dominance.h            |  42 +-
 mlir/include/mlir/IR/RegionKindInterface.h  |  35 ++
 mlir/include/mlir/IR/RegionKindInterface.td |  53 +++
 mlir/lib/IR/CMakeLists.txt                  |   2 +
 mlir/lib/IR/Dominance.cpp                   |  92 +++-
 mlir/lib/IR/RegionKindInterface.cpp         |  18 +
 mlir/lib/IR/Verifier.cpp                    |  95 +++--
 mlir/lib/Transforms/CSE.cpp                 |   6 +
 mlir/test/CMakeLists.txt                    |   2 +-
 mlir/test/IR/invalid.mlir                   |  24 +-
 mlir/test/IR/parser.mlir                    | 137 +++++-
 mlir/test/IR/traits.mlir                    |  79 ++++
 mlir/test/lib/Dialect/Test/TestDialect.cpp  |  34 +-
 mlir/test/lib/Dialect/Test/TestDialect.h    |   1 +
 mlir/test/lib/Dialect/Test/TestOps.td       |  25 ++
 18 files changed, 888 insertions(+), 229 deletions(-)
 create mode 100644 mlir/include/mlir/IR/RegionKindInterface.h
 create mode 100644 mlir/include/mlir/IR/RegionKindInterface.td
 create mode 100644 mlir/lib/IR/RegionKindInterface.cpp

diff --git a/mlir/docs/Interfaces.md b/mlir/docs/Interfaces.md
index e54a67ad0d6a3..3eb3bd8818775 100644
--- a/mlir/docs/Interfaces.md
+++ b/mlir/docs/Interfaces.md
@@ -202,3 +202,29 @@ def ExampleOpInterface : OpInterface<"ExampleOpInterface"> {
   ];
 }
 ```
+
+#### Operation Interface List
+
+MLIR includes standard interfaces providing functionality that is
+likely to be common across many different operations. Below is a list
+of some key interfaces that may be used directly by any dialect. The
+format of the header for each interface section goes as follows:
+
+*   `Interface class name`
+    -   (`C++ class` -- `ODS class`(if applicable))
+
+##### CallInterfaces
+
+*   `CallOpInterface` - Used to represent operations like 'call'
+    -   `CallInterfaceCallable getCallableForCallee()`
+*   `CallableOpInterface` - Used to represent the target callee of call.
+    -   `Region * getCallableRegion()`
+    -   `ArrayRef<Type> getCallableResults()`
+
+##### RegionKindInterfaces
+
+*   `RegionKindInterface` - Used to describe the abstract semantics of regions.
+     - `RegionKind getRegionKind(unsigned index)` - Return the kind of the region with the given index inside this operation.
+         - RegionKind::Graph - represents a graph region without control flow semantics
+         - RegionKind::SSACFG - represents an [SSA-style control flow](LangRef.md#modeling-control-flow) region with basic blocks and reachability
+     - `hasSSADominance(unsigned index)` - Return true if the region with the given index inside this operation requires dominance.
diff --git a/mlir/docs/LangRef.md b/mlir/docs/LangRef.md
index 41b3984347bbd..17736ce065285 100644
--- a/mlir/docs/LangRef.md
+++ b/mlir/docs/LangRef.md
@@ -11,34 +11,63 @@ data parallel systems. Beyond its representational capabilities, its single
 continuous design provides a framework to lower from dataflow graphs to
 high-performance target-specific code.
 
-This document defines and describes the key concepts in MLIR, and is intended to
-be a dry reference document - the [rationale documentation](Rationale/Rationale.md),
-[glossary](../getting_started/Glossary.md), and other content are hosted elsewhere.
+This document defines and describes the key concepts in MLIR, and is intended
+to be a dry reference document - the [rationale
+documentation](Rationale/Rationale.md),
+[glossary](../getting_started/Glossary.md), and other content are hosted
+elsewhere.
 
 MLIR is designed to be used in three different forms: a human-readable textual
 form suitable for debugging, an in-memory form suitable for programmatic
-transformations and analysis, and a compact serialized form suitable for storage
-and transport. The different forms all describe the same semantic content. This
-document describes the human-readable textual form.
+transformations and analysis, and a compact serialized form suitable for
+storage and transport. The different forms all describe the same semantic
+content. This document describes the human-readable textual form.
 
 [TOC]
 
 ## High-Level Structure
 
-MLIR is an
+MLIR is fundamentally based on a graph-like data structure of nodes, called
+*Operations*, and edges, called *Values*. Each Value is the result of exactly
+one Operation or Block Argument, and has a *Value Type* defined by the [type
+system](#type-system).  [Operations](#operations) are contained in
+[Blocks](#blocks) and Blocks are contained in [Regions](#regions). Operations
+are also ordered within their containing block and Blocks are ordered in their
+containing region, although this order may or may not be semantically
+meaningful in a given [kind of region](Interfaces.md#regionkindinterface)).
+Operations may also contain regions, enabling hierarchical structures to be
+represented.
+
+Operations can represent many different concepts, from higher-level concepts
+like function definitions, function calls, buffer allocations, view or slices
+of buffers, and process creation, to lower-level concepts like
+target-independent arithmetic, target-specific instructions, configuration
+registers, and logic gates. These different concepts are represented by
+different operations in MLIR and the set of operations usable in MLIR can be
+arbitrarily extended.
+
+MLIR also provides an extensible framework for transformations on operations,
+using familiar concepts of compiler [Passes](Passes.md). Enabling an arbitrary
+set of passes on an arbitrary set of operations results in a significant
+scaling challenge, since each transformation must potentially take into
+account the semantics of any operation. MLIR addresses this complexity by
+allowing operation semantics to be described abstractly using
+[Traits](Traits.md) and [Interfaces](Interfaces.md), enabling transformations
+to operate on operations more generically.  Traits often describe verification
+constraints on valid IR, enabling complex invariants to be captured and
+checked. (see
+[docs/Tutorials/Toy/Ch-2/#op-vs-operation-using-mlir-operations])
+
+One obvious application of MLIR is to represent an
 [SSA-based](https://en.wikipedia.org/wiki/Static_single_assignment_form) IR,
-which means that values are defined before use and have scope defined by their
-dominance relations. Operations may produce zero or more results, and each is a
-distinct SSA value with its own type defined by the [type system](#type-system).
-
-The unit of code in MLIR is an [Operation](#operations). Operations allow for
-representing many different concepts: allocating buffers, producing views to
-transform them, target-independent arithmetic, target-specific operations, and
-even arbitrary user-defined high-level operations including the
-[Module](#module) and [Function](#functions) operations. Operations may contain
-[Regions](#regions) that represent a Control Flow Graph (CFG) of
-[Blocks](#blocks), that contain operations and end with a
-[terminator operation](#terminator-operations) (like branches).
+like the LLVM core IR, with appropriate choice of Operation Types to define
+[Modules](#module), [Functions](#functions), Branches, Allocations, and
+verification constraints to ensure the SSA Dominance property. MLIR includes a
+'standard' dialect which defines just such structures. However, MLIR is
+intended to be general enough to represent other compiler-like data
+structures, such as Abstract Syntax Trees in a language frontend, generated
+instructions in a target-specific backend, or circuits in a High-Level
+Synthesis tool.
 
 Here's an example of an MLIR module:
 
@@ -106,9 +135,9 @@ func @multiply(%A: memref<100x?xf32>, %B: memref<?x50xf32>)
 ## Notation
 
 MLIR has a simple and unambiguous grammar, allowing it to reliably round-trip
-through a textual form. This is important for development of the compiler - e.g.
-for understanding the state of code as it is being transformed and writing test
-cases.
+through a textual form. This is important for development of the compiler -
+e.g.  for understanding the state of code as it is being transformed and
+writing test cases.
 
 This document describes the grammar using
 [Extended Backus-Naur Form (EBNF)](https://en.wikipedia.org/wiki/Extended_Backus%E2%80%93Naur_form).
@@ -162,21 +191,21 @@ Syntax:
 // Identifiers
 bare-id ::= (letter|[_]) (letter|digit|[_$.])*
 bare-id-list ::= bare-id (`,` bare-id)*
-ssa-id ::= `%` suffix-id
+value-id ::= `%` suffix-id
 suffix-id ::= (digit+ | ((letter|id-punct) (letter|id-punct|digit)*))
 
 symbol-ref-id ::= `@` (suffix-id | string-literal)
-ssa-id-list ::= ssa-id (`,` ssa-id)*
+value-id-list ::= value-id (`,` value-id)*
 
-// Uses of an SSA value, e.g. in an operand list to an operation.
-ssa-use ::= ssa-id
-ssa-use-list ::= ssa-use (`,` ssa-use)*
+// Uses of value, e.g. in an operand list to an operation.
+value-use ::= value-id
+value-use-list ::= value-use (`,` value-use)*
 ```
 
-Identifiers name entities such as SSA values, types and functions, and are
+Identifiers name entities such as values, types and functions, and are
 chosen by the writer of MLIR code. Identifiers may be descriptive (e.g.
 `%batch_size`, `@matmul`), or may be non-descriptive when they are
-auto-generated (e.g. `%23`, `@func42`). Identifier names for SSA values may be
+auto-generated (e.g. `%23`, `@func42`). Identifier names for values may be
 used in an MLIR text file but are not persisted as part of the IR - the printer
 will give them anonymous names like `%42`.
 
@@ -186,10 +215,22 @@ with a sigil (e.g. `%`, `#`, `@`, `^`, `!`). In certain unambiguous contexts
 keywords may be added to future versions of MLIR without danger of collision
 with existing identifiers.
 
-The scope of SSA values is defined based on the standard definition of
-[dominance](https://en.wikipedia.org/wiki/Dominator_\(graph_theory\)). Argument
-identifiers in mapping functions are in scope for the mapping body. Function
-identifiers and mapping identifiers are visible across the entire module.
+Value identifiers are only [in scope](#value-scoping) for the (nested)
+region in which they are defined and cannot be accessed or referenced
+outside of that region. Argument identifiers in mapping functions are
+in scope for the mapping body. Particular operations may further limit
+which identifiers are in scope in their regions. For instance, the
+scope of values in a region with [SSA control flow
+semantics](#control-flow-and-ssacfg-regions) is constrained according
+to the standard definition of [SSA
+dominance](https://en.wikipedia.org/wiki/Dominator_\(graph_theory\)). Another
+example is the [IsolatedFromAbove trait](Traits.md#isolatedfromabove),
+which restricts directly accessing values defined in containing
+regions.
+
+Function identifiers and mapping identifiers are associated with
+[Symbols](SymbolsAndSymbolTables) and have scoping rules dependent on
+symbol attributes.
 
 ## Dialects
 
@@ -220,9 +261,9 @@ Dialects provide a modular way in which targets can expose target-specific
 operations directly through to MLIR. As an example, some targets go through
 LLVM. LLVM has a rich set of intrinsics for certain target-independent
 operations (e.g. addition with overflow check) as well as providing access to
-target-specific operations for the targets it supports (e.g. vector permutation
-operations). LLVM intrinsics in MLIR are represented via operations that start
-with an "llvm." name.
+target-specific operations for the targets it supports (e.g. vector
+permutation operations). LLVM intrinsics in MLIR are represented via
+operations that start with an "llvm." name.
 
 Example:
 
@@ -241,28 +282,28 @@ Syntax:
 ```
 operation         ::= op-result-list? (generic-operation | custom-operation)
                       trailing-location?
-generic-operation ::= string-literal `(` ssa-use-list? `)`  successor-list?
+generic-operation ::= string-literal `(` value-use-list? `)`  successor-list?
                       (`(` region-list `)`)? attribute-dict? `:` function-type
 custom-operation  ::= bare-id custom-operation-format
 op-result-list    ::= op-result (`,` op-result)* `=`
-op-result         ::= ssa-id (`:` integer-literal)
+op-result         ::= value-id (`:` integer-literal)
 successor-list    ::= successor (`,` successor)*
 successor         ::= caret-id (`:` bb-arg-list)?
 region-list       ::= region (`,` region)*
 trailing-location ::= (`loc` `(` location `)`)?
 ```
 
-MLIR introduces a uniform concept called _operations_ to enable describing many
-different levels of abstractions and computations. Operations in MLIR are fully
-extensible (there is no fixed list of operations) and have application-specific
-semantics. For example, MLIR supports
-[target-independent operations](Dialects/Standard.md#memory-operations),
-[affine operations](Dialects/Affine.md), and
-[target-specific machine operations](#target-specific-operations).
+MLIR introduces a uniform concept called _operations_ to enable describing
+many different levels of abstractions and computations. Operations in MLIR are
+fully extensible (there is no fixed list of operations) and have
+application-specific semantics. For example, MLIR supports [target-independent
+operations](Dialects/Standard.md#memory-operations), [affine
+operations](Dialects/Affine.md), and [target-specific machine
+operations](#target-specific-operations).
 
 The internal representation of an operation is simple: an operation is
 identified by a unique string (e.g. `dim`, `tf.Conv2d`, `x86.repmovsb`,
-`ppc.eieio`, etc), can return zero or more results, take zero or more SSA
+`ppc.eieio`, etc), can return zero or more results, take zero or more
 operands, may have zero or more attributes, may have zero or more successors,
 and zero or more enclosed [regions](#regions). The generic printing form
 includes all these elements literally, with a function type to indicate the
@@ -307,19 +348,23 @@ Example:
 module ::= `module` symbol-ref-id? (`attributes` attribute-dict)? region
 ```
 
-An MLIR Module represents a top-level container operation. It contains
-a single region containing a single block which can contain any
-operations.  Operations within this region must not implicitly capture
-values defined outside the module.  Modules have an optional symbol
-name that can be used to refer to them in operations.
+An MLIR Module represents a top-level container operation. It contains a single
+[SSACFG region](#control-flow-and-ssacfg-regions) containing a single block
+which can contain any operations. Operations within this region cannot
+implicitly capture values defined outside the module, i.e. Modules are
+[IsolatedFromAbove](Traits.md#isolatedfromabove). Modules have an optional
+[symbol name](SymbolsAndSymbolTables.md) which can be used to refer to them in
+operations.
 
 ### Functions
 
-An MLIR Function is an operation with a name containing one [region](#regions).
-The region of a function is not allowed to implicitly capture values defined
-outside of the function, and all external references must use function arguments
-or attributes that establish a symbolic connection (e.g. symbols referenced by
-name via a string attribute like [SymbolRefAttr](#symbol-reference-attribute)):
+An MLIR Function is an operation with a name containing a single [SSACFG
+region)[#control-flow-and-ssacfg-regions].  Operations within this region
+cannot implicitly capture values defined outside of the function,
+i.e. Functions are [IsolatedFromAbove](Traits.md#isolatedfromabove).  All
+external references must use function arguments or attributes that establish a
+symbolic connection (e.g. symbols referenced by name via a string attribute
+like [SymbolRefAttr](#symbol-reference-attribute)):
 
 ```
 function ::= `func` function-signature function-attributes? function-body?
@@ -329,7 +374,7 @@ function-signature ::= symbol-ref-id `(` argument-list `)`
 
 argument-list ::= (named-argument (`,` named-argument)*) | /*empty*/
 argument-list ::= (type attribute-dict? (`,` type attribute-dict?)*) | /*empty*/
-named-argument ::= ssa-id `:` type attribute-dict?
+named-argument ::= value-id `:` type attribute-dict?
 
 function-result-list ::= function-result-list-parens
                        | non-function-type
@@ -342,13 +387,13 @@ function-attributes ::= `attributes` attribute-dict
 function-body ::= region
 ```
 
-An external function declaration (used when referring to a function declared in
-some other module) has no body. While the MLIR textual form provides a nice
-inline syntax for function arguments, they are internally represented as "block
-arguments" to the first block in the region.
+An external function declaration (used when referring to a function declared
+in some other module) has no body. While the MLIR textual form provides a nice
+inline syntax for function arguments, they are internally represented as
+"block arguments" to the first block in the region.
 
-Only dialect attribute names may be specified in the attribute dictionaries for
-function arguments, results, or the function itself.
+Only dialect attribute names may be specified in the attribute dictionaries
+for function arguments, results, or the function itself.
 
 Examples:
 
@@ -382,23 +427,33 @@ block           ::= block-label operation+
 block-label     ::= block-id block-arg-list? `:`
 block-id        ::= caret-id
 caret-id        ::= `^` suffix-id
-ssa-id-and-type ::= ssa-id `:` type
+value-id-and-type ::= value-id `:` type
 
 // Non-empty list of names and types.
-ssa-id-and-type-list ::= ssa-id-and-type (`,` ssa-id-and-type)*
-
-block-arg-list ::= `(` ssa-id-and-type-list? `)`
-```
-
-A [block](https://en.wikipedia.org/wiki/Basic_block) is a sequential list of
-operations without control flow (a call or entering an op's region is not
-considered control flow for this purpose) that are executed from top to bottom.
-The last operation in a block is a
-[terminator operation](#terminator-operations), which ends the block.
-
-Blocks in MLIR take a list of block arguments, which represent SSA PHI nodes in
-a functional notation. The arguments are defined by the block, and values are
-provided for these block arguments by branches that go to the block.
+value-id-and-type-list ::= value-id-and-type (`,` value-id-and-type)*
+
+block-arg-list ::= `(` value-id-and-type-list? `)`
+```
+
+A *Block* is an ordered list of operations, concluding with a single
+[terminator operation](#terminator-operations). In [SSACFG
+regions](#control-flow-and-ssacfg-regions), each block represents a compiler
+[basic block] (https://en.wikipedia.org/wiki/Basic_block) where instructions
+inside the block are executed in order and terminator operations implement
+control flow branches between basic blocks.
+
+Blocks in MLIR take a list of block arguments, notated in a function-like
+way. Block arguments are bound to values specified by the semantics of
+individual operations. Block arguments of the entry block of a region are also
+arguments to the region and the values bound to these arguments are determined
+by the semantics of the containing operation. Block arguments of other blocks
+are determined by the semantics of terminator operations, e.g. Branches, which
+have the block as a successor. In regions with [control
+flow](#control-flow-and-ssacfg-regions), MLIR leverages this structure to
+implicitly represent the passage of control-flow dependent values without the
+complex nuances of PHI nodes in traditional SSA representations. Note that
+values which are not control-flow dependent can be referenced directly and do
+not need to be passed through block arguments.
 
 Here is a simple example function showing branches, returns, and block
 arguments:
@@ -416,61 +471,134 @@ func @simple(i64, i1) -> i64 {
   br ^bb3(%b: i64)    // Branch passes %b as the argument
 
 // ^bb3 receives an argument, named %c, from predecessors
-// and passes it on to bb4 twice.
+// and passes it on to bb4 along with %a. %a is referenced
+// directly from its defining operation and is not passed through
+// an argument of ^bb3.
 ^bb3(%c: i64):
-  br ^bb4(%c, %c : i64, i64)
+  br ^bb4(%c, %a : i64, i64)
 
 ^bb4(%d : i64, %e : i64):
   %0 = addi %d, %e : i64
-  return %0 : i64
+  return %0 : i64   // Return is also a terminator.
 }
 ```
 
-**Context:** The "block argument" representation eliminates a number of special
-cases from the IR compared to traditional "PHI nodes are operations" SSA IRs
-(like LLVM). For example, the
-[parallel copy semantics](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.524.5461&rep=rep1&type=pdf)
-of SSA is immediately apparent, and function arguments are no longer a special
-case: they become arguments to the entry block
-[[more rationale](Rationale/Rationale.md#block-arguments-vs-phi-nodes)].
+**Context:** The "block argument" representation eliminates a number
+of special cases from the IR compared to traditional "PHI nodes are
+operations" SSA IRs (like LLVM). For example, the [parallel copy
+semantics](http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.524.5461&rep=rep1&type=pdf)
+of SSA is immediately apparent, and function arguments are no longer a
+special case: they become arguments to the entry block [[more
+rationale](Rationale/Rationale.md#block-arguments-vs-phi-nodes)]. Blocks
+are also a fundamental concept that cannot be represented by
+operations because values defined in an operation cannot be accessed
+outside the operation.
 
 ## Regions
 
 ### Definition
 
-A region is a CFG of MLIR [Blocks](#blocks). Regions serve to group semantically
-connected blocks, where the semantics is not imposed by the IR. Instead, the
-containing operation defines the semantics of the regions it contains. Regions
-do not have a name or an address, only the blocks contained in a region do.
-Regions are meaningless outside of the containing entity and have no type or
-attributes.
+A region is an ordered list of MLIR [Blocks](#blocks). The semantics within a
+region is not imposed by the IR. Instead, the containing operation defines the
+semantics of the regions it contains. MLIR currently defines two kinds of
+regions: [SSACFG regions](#control-flow-and-ssacfg-regions), which describe
+control flow between blocks, and [Graph regions](#graph-regions), which do not
+require control flow between block. The kinds of regions within an operation
+are described using the
+[RegionKindInterface](Interfaces.md#regionkindinterface).
 
-The first block in the region cannot be a successor of any other block. The
-syntax for the region is as follows:
+Regions do not have a name or an address, only the blocks contained in a
+region do. Regions must be contained within operations and have no type or
+attributes. The first block in the region is a special block called the 'entry
+block'. The arguments to the entry block are also the arguments of the region
+itself. The entry block cannot be listed as a successor of any other
+block. The syntax for a region is as follows:
 
 ```
 region ::= `{` block* `}`
 ```
 
-The function body is an example of a region: it consists of a CFG of blocks and
-has additional semantic restrictions that other types of regions may not have
-(block terminators must either branch to a different block, or return from a
-function where the types of the `return` arguments must match the result types
-of the function signature).
+A function body is an example of a region: it consists of a CFG of blocks and
+has additional semantic restrictions that other types of regions may not have.
+For example, in a function body, block terminators must either branch to a
+different block, or return from a function where the types of the `return`
+arguments must match the result types of the function signature.  Similarly,
+the function arguments must match the types and count of the region arguments.
+In general, operations with regions can define these correspondances
+arbitrarily.
+
+### Value Scoping
+
+Regions provide hierarchical encapsulation of programs: it is impossible to
+reference, i.e. branch to, a block which is not in the same region as the
+source of the reference, i.e. a terminator operation. Similarly, regions
+provides a natural scoping for value visibility: values defined in a region
+don't escape to the enclosing region, if any. By default, operations inside a
+region can reference values defined outside of the region whenever it would
+have been legal for operands of the enclosing operation to reference those
+values, but this can be restricted using traits, such as
+[OpTrait::IsolatedFromAbove](Traits.md#isolatedfromabove), or a custom
+verifier.
 
-### Control and Value Scoping
+Example:
 
-Regions provide nested control isolation: it is impossible to branch to a block
-within a region from outside it or to branch from within a region to a block
-outside it. Similarly, it provides a natural scoping for value visibility: SSA
-values defined in a region don't escape to the enclosing region, if any. By
-default, a region can reference values defined outside of the region whenever it
-would have been legal to use them as operands to the enclosing operation.
+```mlir
+  "any_op"(%a) ({ // if %a is in-scope in the containing region...
+	 // then %a is in-scope here too.
+    %new_value = "another_op"(%a) : (i64) -> (i64)
+  }) : (i64) -> (i64)
+```
+
+MLIR defines a generalized 'hierarchical dominance' concept that operates
+across hierarchy and defines whether a value is 'in scope' and can be used by
+a particular operation. Whether a value can be used by another operation in
+the same region is defined by the kind of region. A value defined in a region
+can be used by an operation which has a parent in the same region, if and only
+if the parent could use the value. A value defined by an argument to a region
+can always be used by any operation deeply contained in the region. A value
+defined in a region can never be used outside of the region.
+
+### Control Flow and SSACFG Regions
+
+In MLIR, control flow semantics of a region is indicated by
+[RegionKind::SSACFG](Interfaces.md#regionkindinterface).  Informally, these
+regions support semantics where operations in a region 'execute
+sequentially'. Before an operation executes, its operands have well-defined
+values. After an operation executes, the operands have the same values and
+results also have well-defined values. After an operation executes, the next
+operation in the block executes until the operation is the terminator operation
+at the end of a block, in which case some other operation will execute. The
+determination of the next instruction to execute is the 'passing of control
+flow'.
+
+In general, when control flow is passed to an operation, MLIR does not
+restrict when control flow enters or exits the regions contained in that
+operation. However, when control flow enters a region, it always begins in the
+first block of the region, called the *entry* block.  Terminator operations
+ending each block represent control flow by explicitly specifying the
+successor blocks of the block. Control flow can only pass to one of the
+specified successor blocks as in a `branch` operation, or back to the
+containing operation as in a `return` operation. Terminator operations without
+successors can only pass control back to the containing operation. Within
+these restrictions, the particular semantics of terminator operations is
+determined by the specific dialect operations involved. Blocks (other than the
+entry block) that are not listed as a successor of a terminator operation are
+defined to be unreachable and can be removed without affecting the semantics
+of the containing operation.
+
+Although control flow always enters a region through the entry block, control
+flow may exit a region through any block with an appropriate terminator. The
+standard dialect leverages this capability to define operations with
+Single-Entry-Multiple-Exit (SEME) regions, possibly flowing through different
+blocks in the region and exiting through any block with a `return`
+operation. This behavior is similar to that of a function body in most
+programming languages. In addition, control flow may also not reach the end of
+a block or region, for example if a function call does not return.
 
 Example:
 
 ```mlir
-func @accelerator_compute(i64, i1) -> i64 {
+func @accelerator_compute(i64, i1) -> i64 { // An SSACFG region
 ^bb0(%a: i64, %cond: i1): // Code dominated by ^bb0 may refer to %a
   cond_br %cond, ^bb1, ^bb2
 
@@ -480,7 +608,7 @@ func @accelerator_compute(i64, i1) -> i64 {
   br ^bb3(%a: i64)    // Branch passes %a as the argument
 
 ^bb2:
-  "accelerator.launch"() {
+  accelerator.launch() { // An SSACFG region
     ^bb0:
       // Region of code nested under "accelerator.launch", it can reference %a but
       // not %value.
@@ -493,35 +621,19 @@ func @accelerator_compute(i64, i1) -> i64 {
 }
 ```
 
-This can be further restricted using the custom verifier associated with the
-enclosing operation, for example, disallowing references to values defined
-outside the region completely.
-
-### Control Flow
-
-Regions are Single-Entry-Multiple-Exit (SEME). This means that control can only
-flow into the first block of the region, but can flow out of the region at the
-end of any of the contained blocks (This behavior is similar to that of a
-function body in most programming languages). A terminator of a block within a
-region may transfer the control flow to another block in this region, or return
-it to the immediately enclosing op. The semantics of the enclosing op defines
-where the control flow is transmitted next. It may, for example, enter a region
-of the same op, including the same region that returned the control flow.
-
-The enclosing operation determines the way in which control is transmitted into
-the entry block of a Region. The successor to a Region’s exit points may not
-necessarily exist: for example a call to a function that does not return.
-Concurrent or asynchronous execution of Regions is unspecified. Operations may
-define specific rules of execution, e.g. sequential loops or switch cases.
-
-A Region may also enter another region within the enclosing operation. If an
-operation has multiple regions, the semantics of the operation defines into
-which regions the control flows and in which order, if any. An operation may
-transmit control into Regions that were specified in other operations, in
-particular those that defined the values the given operation uses. Thus, such
-operations can be treated opaquely in the enclosing control flow graph,
-providing a level of control flow isolation similar to that of the call
-operation.
+#### Operations with Multiple Regions
+
+An operation containing multiple regions also completely determines the
+semantics of those regions. In particular, when control flow is passed to an
+operation, it may transfer control flow to any contained region. When control
+flow exits a region and is returned to the containing operation, the
+containing operation may pass control flow to any region in the same
+operation. An operation may also pass control flow to multiple contained
+regions concurrently. An operation may also pass control flow into regions
+that were specified in other operations, in particular those that defined the
+values or symbols the given operation uses as in a call operation. This
+passage of control is generally independent of passage of control flow through
+the basic blocks of the containing region.
 
 #### Closure
 
@@ -532,6 +644,51 @@ asynchronous execution of the region, it is under the responsibility of the
 operation caller to wait for the region to be executed guaranteeing that any
 directly used values remain live.
 
+### Graph Regions
+
+In MLIR, graph-like semantics in a region is indicated by
+[RegionKind::Graph](Interfaces.md#regionkindinterface). Graph regions are
+appropriate for concurrent semantics without control flow, or for modeling
+generic directed graph data structures. Graph regions are appropriate for
+representing cyclic relationships between coupled values where there is no
+fundamental order to the relationships. For instance, operations in a graph
+region may represent independent threads of control with values representing
+streams of data. As usual in MLIR, the particular semantics of a region is
+completely determined by its containing operation. Graph regions may only
+contain a single basic block (the entry block).
+
+**Rationale:** Currently graph regions are arbitrarily limited to a single
+basic block, although there is no particular semantic reason for this
+limitation. This limitation has been added to make it easier to stabilize the
+pass infrastructure and commonly used passes for processing graph regions to
+properly handle feedback loops. Multi-block regions may be allowed in the
+future if use cases that require it arise.
+
+In graph regions, MLIR operations naturally represent nodes, while each MLIR
+value represents a multi-edge connecting a single source node and multiple
+destination nodes. All values defined in the region as results of operations
+are in scope within the region and can be accessed by any other operation in
+the region. In graph regions, the order of operations within a block and the
+order of blocks in a region is not semantically meaningful and non-terminator
+operations may be freely reordered, for instance, by canonicalization. Other
+kinds of graphs, such as graphs with multiple source nodes and multiple
+destination nodes, can also be represented by representing graph edges as MLIR
+operations.
+
+Note that cycles can occur within a single block in a graph region, or between
+basic blocks.
+
+```mlir
+"test.graph_region"() ({ // A Graph region
+  %1 = "op1"(%1, %3) : (i32, i32) -> (i32)  // OK: %1, %3 allowed here
+  %2 = "test.ssacfg_region"() ({
+	 %5 = "op2"(%1, %2, %3, %4) : (i32, i32, i32, i32) -> (i32) // OK: %1, %2, %3, %4 all defined in the containing region
+  }) : () -> (i32)
+  %3 = "op2"(%1, %4) : (i32, i32) -> (i32)  // OK: %4 allowed here
+  %4 = "op3"(%1) : (i32) -> (i32)
+}) : () -> ()
+```
+
 ### Arguments and Results
 
 The arguments of the first block of a region are treated as arguments of the
@@ -543,7 +700,7 @@ defines the relation between the region results and the operation results.
 
 ## Type System
 
-Each SSA value in MLIR has a type defined by the type system below. There are a
+Each value in MLIR has a type defined by the type system below. There are a
 number of primitive types (like integers) and also aggregate types for tensors
 and memory buffers. MLIR [standard types](#standard-types) do not include
 structures, arrays, or dictionaries.
@@ -559,7 +716,7 @@ type-list-no-parens ::=  type (`,` type)*
 type-list-parens ::= `(` `)`
                    | `(` type-list-no-parens `)`
 
-// This is a common way to refer to an SSA value with a specified type.
+// This is a common way to refer to a value with a specified type.
 ssa-use-and-type ::= ssa-use `:` type
 
 // Non-empty list of names and types.
@@ -713,7 +870,7 @@ function-type ::= type-list-parens `->` function-result-type
 
 MLIR supports first-class functions: for example, the
 [`constant` operation](Dialects/Standard.md#constant-operation) produces the
-address of a function as an SSA value. This SSA value may be passed to and
+address of a function as a value. This value may be passed to and
 returned from functions, merged across control flow boundaries with
 [block arguments](#blocks), and called with the
 [`call_indirect` operation](Dialects/Standard.md#call-indirect-operation).
@@ -1052,7 +1209,7 @@ dimension-list-ranked ::= (dimension `x`)*
 dimension ::= `?` | decimal-literal
 ```
 
-SSA values of tensor type represents aggregate N-dimensional data values, and
+Values with tensor type represents aggregate N-dimensional data values, and
 have a known element type. It may have an unknown rank (indicated by `*`) or may
 have a fixed rank with a list of dimensions. Each dimension may be a static
 non-negative decimal constant or be dynamically determined (indicated by `?`).
@@ -1467,7 +1624,7 @@ attribute dictionary), i.e. no other attribute kinds such as Locations or
 extended attribute kinds.
 
 **Rationale:** Identifying accesses to global data is critical to
-enabling efficient multi-threaded compilation.  Restricting global
+enabling efficient multi-threaded compilation. Restricting global
 data access to occur through symbols and limiting the places that can
 legally hold a symbol reference simplifies reasoning about these data
 accesses.
diff --git a/mlir/include/mlir/IR/CMakeLists.txt b/mlir/include/mlir/IR/CMakeLists.txt
index 734a7b085510a..c65616ff8eca6 100644
--- a/mlir/include/mlir/IR/CMakeLists.txt
+++ b/mlir/include/mlir/IR/CMakeLists.txt
@@ -1,2 +1,3 @@
 add_mlir_interface(OpAsmInterface)
 add_mlir_interface(SymbolInterfaces)
+add_mlir_interface(RegionKindInterface)
diff --git a/mlir/include/mlir/IR/Dominance.h b/mlir/include/mlir/IR/Dominance.h
index 9d9a19996765f..4a281bf42827a 100644
--- a/mlir/include/mlir/IR/Dominance.h
+++ b/mlir/include/mlir/IR/Dominance.h
@@ -39,6 +39,11 @@ template <bool IsPostDom> class DominanceInfoBase {
   /// nullptr.
   Block *findNearestCommonDominator(Block *a, Block *b) const;
 
+  /// Return true if there is dominanceInfo for the given region.
+  bool hasDominanceInfo(Region *region) {
+    return dominanceInfos.count(region) != 0;
+  }
+
   /// Get the root dominance node of the given region.
   DominanceInfoNode *getRootNode(Region *region) {
     assert(dominanceInfos.count(region) != 0);
@@ -63,39 +68,58 @@ template <bool IsPostDom> class DominanceInfoBase {
 };
 } // end namespace detail
 
-/// A class for computing basic dominance information.
+/// A class for computing basic dominance information. Note that this
+/// class is aware of different types of regions and returns a
+/// region-kind specific concept of dominance. See RegionKindInterface.
 class DominanceInfo : public detail::DominanceInfoBase</*IsPostDom=*/false> {
 public:
   using super::super;
 
-  /// Return true if the specified block is reachable from the entry
-  /// block of its region.
+  /// Return true if the specified block is reachable from the entry block of
+  /// its region. In an SSACFG region, a block is reachable from the entry block
+  /// if it is the successor of the entry block or another reachable block. In a
+  /// Graph region, all blocks are reachable.
   bool isReachableFromEntry(Block *a) const {
     return super::isReachableFromEntry(a);
   }
 
-  /// Return true if operation A properly dominates operation B.
+  /// Return true if operation A properly dominates operation B, i.e. if A and B
+  /// are in the same block and A properly dominates B within the block, or if
+  /// the block that contains A properly dominates the block that contains B. In
+  /// an SSACFG region, Operation A dominates Operation B in the same block if A
+  /// preceeds B. In a Graph region, all operations in a block dominate all
+  /// other operations in the same block.
   bool properlyDominates(Operation *a, Operation *b) const;
 
-  /// Return true if operation A dominates operation B.
+  /// Return true if operation A dominates operation B, i.e. if A and B are the
+  /// same operation or A properly dominates B.
   bool dominates(Operation *a, Operation *b) const {
     return a == b || properlyDominates(a, b);
   }
 
-  /// Return true if value A properly dominates operation B.
+  /// Return true if value A properly dominates operation B, i.e if the
+  /// operation that defines A properlyDominates B and the operation that
+  /// defines A does not contain B.
   bool properlyDominates(Value a, Operation *b) const;
 
-  /// Return true if operation A dominates operation B.
+  /// Return true if operation A dominates operation B, i.e if the operation
+  /// that defines A dominates B.
   bool dominates(Value a, Operation *b) const {
     return (Operation *)a.getDefiningOp() == b || properlyDominates(a, b);
   }
 
-  /// Return true if the specified block A dominates block B.
+  /// Return true if the specified block A dominates block B, i.e. if block A
+  /// and block B are the same block or block A properly dominates block B.
   bool dominates(Block *a, Block *b) const {
     return a == b || properlyDominates(a, b);
   }
 
-  /// Return true if the specified block A properly dominates block B.
+  /// Return true if the specified block A properly dominates block B, i.e.: if
+  /// block A contains block B, or if the region which contains block A also
+  /// contains block B or some parent of block B and block A dominates that
+  /// block in that kind of region.  In an SSACFG region, block A dominates
+  /// block B if all control flow paths from the entry block to block B flow
+  /// through block A. In a Graph region, all blocks dominate all other blocks.
   bool properlyDominates(Block *a, Block *b) const {
     return super::properlyDominates(a, b);
   }
diff --git a/mlir/include/mlir/IR/RegionKindInterface.h b/mlir/include/mlir/IR/RegionKindInterface.h
new file mode 100644
index 0000000000000..c1a1fa8074a71
--- /dev/null
+++ b/mlir/include/mlir/IR/RegionKindInterface.h
@@ -0,0 +1,35 @@
+//===- RegionKindInterface.h - Region Kind Interfaces -----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the infer op interfaces defined in
+// `RegionKindInterface.td`.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_IR_REGIONKINDINTERFACE_H_
+#define MLIR_IR_REGIONKINDINTERFACE_H_
+
+#include "mlir/IR/OpDefinition.h"
+
+namespace mlir {
+
+/// The kinds of regions contained in an operation. SSACFG regions
+/// require the SSA-Dominance property to hold. Graph regions do not
+/// require SSA-Dominance. If a registered operation does not implement
+/// RegionKindInterface, then any regions it contains are assumed to be
+/// SSACFG regions.
+enum class RegionKind {
+  SSACFG,
+  Graph,
+};
+
+} // namespace mlir
+
+#include "mlir/IR/RegionKindInterface.h.inc"
+
+#endif // MLIR_IR_REGIONKINDINTERFACE_H_
diff --git a/mlir/include/mlir/IR/RegionKindInterface.td b/mlir/include/mlir/IR/RegionKindInterface.td
new file mode 100644
index 0000000000000..1a6f739be172d
--- /dev/null
+++ b/mlir/include/mlir/IR/RegionKindInterface.td
@@ -0,0 +1,53 @@
+//===- RegionKindInterface.td - Region kind interfaces -----*- tablegen -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a set of interfaces to query the properties of regions
+// in an operation.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_IR_REGIONKINDINTERFACE
+#define MLIR_IR_REGIONKINDINTERFACE
+
+include "mlir/IR/OpBase.td"
+
+// OpInterface to query the properties of regions in an operation
+def RegionKindInterface : OpInterface<"RegionKindInterface"> {
+  let description = [{
+    Interface for operations to describe the abstract semantics of
+    their regions. Currently, two kinds of regions are
+    supported. RegionKind::Graph represents a graph region without
+    control flow semantics. RegionKind::SSACFG represents an
+    [SSA-style control flow](LangRef.md#modeling-control-flow) region
+    with basic blocks, sequential semantics, and reachability.
+  }];
+  let cppNamespace = "::mlir";
+
+  let methods = [
+    StaticInterfaceMethod<
+      /*desc=*/[{
+        Return the kind of the region with the given index inside this operation.
+      }],
+      /*retTy=*/"RegionKind",
+      /*methodName=*/"getRegionKind",
+      /*args=*/(ins "unsigned":$index)
+    >,
+    StaticInterfaceMethod<
+      /*desc=*/"Return true if the kind of the given region requires the "
+               "SSA-Dominance property",
+      /*retTy=*/"bool",
+      /*methodName=*/"hasSSADominance",
+      /*args=*/(ins "unsigned":$index),
+      /*methodBody=*/[{
+        return getRegionKind(index) == RegionKind::SSACFG;
+      }]
+    >,
+  ];
+}
+
+#endif // MLIR_IR_REGIONKINDINTERFACE
diff --git a/mlir/lib/IR/CMakeLists.txt b/mlir/lib/IR/CMakeLists.txt
index f0a2a5871fbe3..d90db0832f565 100644
--- a/mlir/lib/IR/CMakeLists.txt
+++ b/mlir/lib/IR/CMakeLists.txt
@@ -18,6 +18,7 @@ add_mlir_library(MLIRIR
   OperationSupport.cpp
   PatternMatch.cpp
   Region.cpp
+  RegionKindInterface.cpp
   StandardTypes.cpp
   SymbolTable.cpp
   Types.cpp
@@ -33,6 +34,7 @@ add_mlir_library(MLIRIR
   MLIRCallInterfacesIncGen
   MLIROpAsmInterfaceIncGen
   MLIRSymbolInterfacesIncGen
+  MLIRRegionKindInterfaceIncGen
 
   LINK_LIBS PUBLIC
   MLIRSupport
diff --git a/mlir/lib/IR/Dominance.cpp b/mlir/lib/IR/Dominance.cpp
index f7a6ac35eaeea..95b2ef95e6c88 100644
--- a/mlir/lib/IR/Dominance.cpp
+++ b/mlir/lib/IR/Dominance.cpp
@@ -13,6 +13,7 @@
 
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/Operation.h"
+#include "mlir/IR/RegionKindInterface.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/Support/GenericDomTreeConstruction.h"
 
@@ -23,6 +24,14 @@ template class llvm::DominatorTreeBase<Block, /*IsPostDom=*/false>;
 template class llvm::DominatorTreeBase<Block, /*IsPostDom=*/true>;
 template class llvm::DomTreeNodeBase<Block>;
 
+/// Return true if the region with the given index inside the operation
+/// has SSA dominance.
+static bool hasSSADominance(Operation *op, unsigned index) {
+  auto kindInterface = dyn_cast<RegionKindInterface>(op);
+  return op->isRegistered() &&
+         (!kindInterface || kindInterface.hasSSADominance(index));
+}
+
 //===----------------------------------------------------------------------===//
 // DominanceInfoBase
 //===----------------------------------------------------------------------===//
@@ -31,15 +40,30 @@ template <bool IsPostDom>
 void DominanceInfoBase<IsPostDom>::recalculate(Operation *op) {
   dominanceInfos.clear();
 
-  /// Build the dominance for each of the operation regions.
+  // Build the dominance for each of the operation regions.
   op->walk([&](Operation *op) {
-    for (auto &region : op->getRegions()) {
+    auto kindInterface = dyn_cast<RegionKindInterface>(op);
+    unsigned numRegions = op->getNumRegions();
+    for (unsigned i = 0; i < numRegions; i++) {
+      Region &region = op->getRegion(i);
       // Don't compute dominance if the region is empty.
       if (region.empty())
         continue;
-      auto opDominance = std::make_unique<base>();
-      opDominance->recalculate(region);
-      dominanceInfos.try_emplace(&region, std::move(opDominance));
+
+      // Dominance changes based on the region type. Avoid the helper
+      // function here so we don't do the region cast repeatedly.
+      bool hasSSADominance =
+          op->isRegistered() &&
+          (!kindInterface || kindInterface.hasSSADominance(i));
+      // If a region has SSADominance, then compute detailed dominance
+      // info.  Otherwise, all values in the region are live anywhere
+      // in the region, which is represented as an empty entry in the
+      // dominanceInfos map.
+      if (hasSSADominance) {
+        auto opDominance = std::make_unique<base>();
+        opDominance->recalculate(region);
+        dominanceInfos.try_emplace(&region, std::move(opDominance));
+      }
     }
   });
 }
@@ -132,7 +156,7 @@ DominanceInfoBase<IsPostDom>::findNearestCommonDominator(Block *a,
 
 template <bool IsPostDom>
 DominanceInfoNode *DominanceInfoBase<IsPostDom>::getNode(Block *a) {
-  auto *region = a->getParent();
+  Region *region = a->getParent();
   assert(dominanceInfos.count(region) != 0);
   return dominanceInfos[region]->getNode(a);
 }
@@ -151,7 +175,7 @@ bool DominanceInfoBase<IsPostDom>::properlyDominates(Block *a, Block *b) const {
   // If both blocks are not in the same region, 'a' properly dominates 'b' if
   // 'b' is defined in an operation region that (recursively) ends up being
   // dominated by 'a'. Walk up the list of containers enclosing B.
-  auto *regionA = a->getParent();
+  Region *regionA = a->getParent();
   if (regionA != b->getParent()) {
     b = traverseAncestors(
         b, [&](Block *block) { return block->getParent() == regionA; });
@@ -180,15 +204,15 @@ bool DominanceInfoBase<IsPostDom>::properlyDominates(Block *a, Block *b) const {
 /// region.
 template <bool IsPostDom>
 bool DominanceInfoBase<IsPostDom>::isReachableFromEntry(Block *a) const {
-  auto *regionA = a->getParent();
+  Region *regionA = a->getParent();
   auto baseInfoIt = dominanceInfos.find(regionA);
   if (baseInfoIt == dominanceInfos.end())
     return true;
   return baseInfoIt->second->isReachableFromEntry(a);
 }
 
-template class mlir::detail::DominanceInfoBase</*IsPostDom=*/true>;
-template class mlir::detail::DominanceInfoBase</*IsPostDom=*/false>;
+template class detail::DominanceInfoBase</*IsPostDom=*/true>;
+template class detail::DominanceInfoBase</*IsPostDom=*/false>;
 
 //===----------------------------------------------------------------------===//
 // DominanceInfo
@@ -196,15 +220,25 @@ template class mlir::detail::DominanceInfoBase</*IsPostDom=*/false>;
 
 /// Return true if operation A properly dominates operation B.
 bool DominanceInfo::properlyDominates(Operation *a, Operation *b) const {
-  auto *aBlock = a->getBlock(), *bBlock = b->getBlock();
+  Block *aBlock = a->getBlock(), *bBlock = b->getBlock();
+  Region *aRegion = a->getParentRegion();
+  unsigned aRegionNum = aRegion->getRegionNumber();
+  Operation *ancestor = aRegion->getParentOp();
 
   // If a or b are not within a block, then a does not dominate b.
   if (!aBlock || !bBlock)
     return false;
 
-  // If the blocks are the same, then check if b is before a in the block.
-  if (aBlock == bBlock)
-    return a->isBeforeInBlock(b);
+  if (aBlock == bBlock) {
+    // Dominance changes based on the region type. In a region with SSA
+    // dominance, uses inside the same block must follow defs. In other
+    // regions kinds, uses and defs can come in any order inside a block.
+    if (hasSSADominance(ancestor, aRegionNum)) {
+      // If the blocks are the same, then check if b is before a in the block.
+      return a->isBeforeInBlock(b);
+    }
+    return true;
+  }
 
   // Traverse up b's hierarchy to check if b's block is contained in a's.
   if (auto *bAncestor = aBlock->findAncestorOpInBlock(*b)) {
@@ -221,10 +255,19 @@ bool DominanceInfo::properlyDominates(Operation *a, Operation *b) const {
 /// Return true if value A properly dominates operation B.
 bool DominanceInfo::properlyDominates(Value a, Operation *b) const {
   if (auto *aOp = a.getDefiningOp()) {
-    // The values defined by an operation do *not* dominate any nested
-    // operations.
-    if (aOp->getParentRegion() != b->getParentRegion() && aOp->isAncestor(b))
-      return false;
+    // Dominance changes based on the region type.
+    auto *aRegion = aOp->getParentRegion();
+    unsigned aRegionNum = aRegion->getRegionNumber();
+    Operation *ancestor = aRegion->getParentOp();
+    // Dominance changes based on the region type. In a region with SSA
+    // dominance, values defined by an operation cannot be used by the
+    // operation. In other regions kinds they can be used the operation.
+    if (hasSSADominance(ancestor, aRegionNum)) {
+      // The values defined by an operation do *not* dominate any nested
+      // operations.
+      if (aOp->getParentRegion() != b->getParentRegion() && aOp->isAncestor(b))
+        return false;
+    }
     return properlyDominates(aOp, b);
   }
 
@@ -245,14 +288,23 @@ void DominanceInfo::updateDFSNumbers() {
 /// Returns true if statement 'a' properly postdominates statement b.
 bool PostDominanceInfo::properlyPostDominates(Operation *a, Operation *b) {
   auto *aBlock = a->getBlock(), *bBlock = b->getBlock();
+  auto *aRegion = a->getParentRegion();
+  unsigned aRegionNum = aRegion->getRegionNumber();
+  Operation *ancestor = aRegion->getParentOp();
 
   // If a or b are not within a block, then a does not post dominate b.
   if (!aBlock || !bBlock)
     return false;
 
   // If the blocks are the same, check if b is before a in the block.
-  if (aBlock == bBlock)
-    return b->isBeforeInBlock(a);
+  if (aBlock == bBlock) {
+    // Dominance changes based on the region type.
+    if (hasSSADominance(ancestor, aRegionNum)) {
+      // If the blocks are the same, then check if b is before a in the block.
+      return b->isBeforeInBlock(a);
+    }
+    return true;
+  }
 
   // Traverse up b's hierarchy to check if b's block is contained in a's.
   if (auto *bAncestor = a->getBlock()->findAncestorOpInBlock(*b))
diff --git a/mlir/lib/IR/RegionKindInterface.cpp b/mlir/lib/IR/RegionKindInterface.cpp
new file mode 100644
index 0000000000000..9950b9117d265
--- /dev/null
+++ b/mlir/lib/IR/RegionKindInterface.cpp
@@ -0,0 +1,18 @@
+//===- RegionKindInterface.cpp - Region Kind Interfaces ---------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the definitions of the region kind interfaces defined in
+// `RegionKindInterface.td`.
+//
+//===----------------------------------------------------------------------===//
+
+#include "mlir/IR/RegionKindInterface.h"
+
+using namespace mlir;
+
+#include "mlir/IR/RegionKindInterface.cpp.inc"
diff --git a/mlir/lib/IR/Verifier.cpp b/mlir/lib/IR/Verifier.cpp
index 20d3a9587463f..b1aed8842dc43 100644
--- a/mlir/lib/IR/Verifier.cpp
+++ b/mlir/lib/IR/Verifier.cpp
@@ -29,6 +29,7 @@
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/Dominance.h"
 #include "mlir/IR/Operation.h"
+#include "mlir/IR/RegionKindInterface.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/PrettyStackTrace.h"
@@ -58,9 +59,12 @@ class OperationVerifier {
   LogicalResult verifyBlock(Block &block);
   LogicalResult verifyOperation(Operation &op);
 
-  /// Verify the dominance within the given IR unit.
+  /// Verify the dominance property of operations within the given Region.
   LogicalResult verifyDominance(Region &region);
-  LogicalResult verifyDominance(Operation &op);
+
+  /// Verify the dominance property of regions contained within the given
+  /// Operation.
+  LogicalResult verifyDominanceOfContainedRegions(Operation &op);
 
   /// Emit an error for the given block.
   InFlightDiagnostic emitError(Block &bb, const Twine &message) {
@@ -96,9 +100,8 @@ LogicalResult OperationVerifier::verify(Operation &op) {
   // verifier to be resilient to malformed code.
   DominanceInfo theDomInfo(&op);
   domInfo = &theDomInfo;
-  for (auto &region : op.getRegions())
-    if (failed(verifyDominance(region)))
-      return failure();
+  if (failed(verifyDominanceOfContainedRegions(op)))
+    return failure();
 
   domInfo = nullptr;
   return success();
@@ -115,7 +118,7 @@ LogicalResult OperationVerifier::verifyRegion(Region &region) {
                            "entry block of region may not have predecessors");
 
   // Verify each of the blocks within the region.
-  for (auto &block : region)
+  for (Block &block : region)
     if (failed(verifyBlock(block)))
       return failure();
   return success();
@@ -178,10 +181,31 @@ LogicalResult OperationVerifier::verifyOperation(Operation &op) {
   if (opInfo && failed(opInfo->verifyInvariants(&op)))
     return failure();
 
+  auto kindInterface = dyn_cast<mlir::RegionKindInterface>(op);
+
   // Verify that all child regions are ok.
-  for (auto &region : op.getRegions())
+  unsigned numRegions = op.getNumRegions();
+  for (unsigned i = 0; i < numRegions; i++) {
+    Region &region = op.getRegion(i);
+    // Check that Graph Regions only have a single basic block. This is
+    // similar to the code in SingleBlockImplicitTerminator, but doesn't
+    // require the trait to be specified. This arbitrary limitation is
+    // designed to limit the number of cases that have to be handled by
+    // transforms and conversions until the concept stabilizes.
+    if (op.isRegistered() && kindInterface &&
+        kindInterface.getRegionKind(i) == RegionKind::Graph) {
+      // Empty regions are fine.
+      if (region.empty())
+        continue;
+
+      // Non-empty regions must contain a single basic block.
+      if (std::next(region.begin()) != region.end())
+        return op.emitOpError("expects graph region #")
+               << i << " to have 0 or 1 blocks";
+    }
     if (failed(verifyRegion(region)))
       return failure();
+  }
 
   // If this is a registered operation, there is nothing left to do.
   if (opInfo)
@@ -223,43 +247,40 @@ LogicalResult OperationVerifier::verifyOperation(Operation &op) {
 
 LogicalResult OperationVerifier::verifyDominance(Region &region) {
   // Verify the dominance of each of the held operations.
-  for (auto &block : region)
-    // Dominance is only reachable inside reachable blocks.
+  for (Block &block : region) {
+    // Dominance is only meaningful inside reachable blocks.
     if (domInfo->isReachableFromEntry(&block))
-      for (auto &op : block) {
-        if (failed(verifyDominance(op)))
+      for (Operation &op : block)
+        // Check that operands properly dominate this use.
+        for (unsigned operandNo = 0, e = op.getNumOperands(); operandNo != e;
+             ++operandNo) {
+          auto operand = op.getOperand(operandNo);
+          if (domInfo->properlyDominates(operand, &op))
+            continue;
+
+          auto diag = op.emitError("operand #")
+                      << operandNo << " does not dominate this use";
+          if (auto *useOp = operand.getDefiningOp())
+            diag.attachNote(useOp->getLoc()) << "operand defined here";
           return failure();
-      }
-    else
-      // Verify the dominance of each of the nested blocks within this
-      // operation, even if the operation itself is not reachable.
-      for (auto &op : block)
-        for (auto &region : op.getRegions())
-          if (failed(verifyDominance(region)))
-            return failure();
+        }
+    // Recursively verify dominance within each operation in the
+    // block, even if the block itself is not reachable, or we are in
+    // a region which doesn't respect dominance.
+    for (Operation &op : block)
+      if (failed(verifyDominanceOfContainedRegions(op)))
+        return failure();
+  }
   return success();
 }
 
-LogicalResult OperationVerifier::verifyDominance(Operation &op) {
-  // Check that operands properly dominate this use.
-  for (unsigned operandNo = 0, e = op.getNumOperands(); operandNo != e;
-       ++operandNo) {
-    auto operand = op.getOperand(operandNo);
-    if (domInfo->properlyDominates(operand, &op))
-      continue;
-
-    auto diag = op.emitError("operand #")
-                << operandNo << " does not dominate this use";
-    if (auto *useOp = operand.getDefiningOp())
-      diag.attachNote(useOp->getLoc()) << "operand defined here";
-    return failure();
-  }
-
-  // Verify the dominance of each of the nested blocks within this operation.
-  for (auto &region : op.getRegions())
+/// Verify the dominance of each of the nested blocks within the given operation
+LogicalResult
+OperationVerifier::verifyDominanceOfContainedRegions(Operation &op) {
+  for (Region &region : op.getRegions()) {
     if (failed(verifyDominance(region)))
       return failure();
-
+  }
   return success();
 }
 
diff --git a/mlir/lib/Transforms/CSE.cpp b/mlir/lib/Transforms/CSE.cpp
index 5f4791450c7b3..f9d6ee76a0415 100644
--- a/mlir/lib/Transforms/CSE.cpp
+++ b/mlir/lib/Transforms/CSE.cpp
@@ -171,6 +171,12 @@ void CSE::simplifyRegion(ScopedMapTy &knownValues, DominanceInfo &domInfo,
     return;
   }
 
+  // If the region does not have dominanceInfo, then skip it.
+  // TODO: Regions without SSA dominance should define a different
+  // traversal order which is appropriate and can be used here.
+  if (!domInfo.hasDominanceInfo(&region))
+    return;
+
   // Note, deque is being used here because there was significant performance
   // gains over vector when the container becomes very large due to the
   // specific access patterns. If/when these performance issues are no
diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt
index b663d37cb8483..18d63f34eebd9 100644
--- a/mlir/test/CMakeLists.txt
+++ b/mlir/test/CMakeLists.txt
@@ -97,5 +97,5 @@ add_lit_testsuite(check-mlir "Running the MLIR regression tests"
 set_target_properties(check-mlir PROPERTIES FOLDER "Tests")
 
 add_lit_testsuites(MLIR ${CMAKE_CURRENT_SOURCE_DIR}
-  DEPENDS ${MLIR_TEST_DEPS}
+  DEPENDS ${MLIR_TEST_DEPENDS}
 )
diff --git a/mlir/test/IR/invalid.mlir b/mlir/test/IR/invalid.mlir
index 0373308ad909d..2d8474c655f60 100644
--- a/mlir/test/IR/invalid.mlir
+++ b/mlir/test/IR/invalid.mlir
@@ -918,7 +918,7 @@ func @negative_in_tensor_size() -> tensor<1x-1xi32>
 // -----
 
 func @invalid_nested_dominance() {
-  "foo.region"() ({
+  "test.ssacfg_region"() ({
     // expected-error @+1 {{operand #0 does not dominate this use}}
     "foo.use" (%1) : (i32) -> ()
     br ^bb2
@@ -1106,7 +1106,7 @@ func @bad_complex(complex<i32)
 // -----
 
 func @invalid_region_dominance() {
-  "foo.region"() ({
+  "test.ssacfg_region"() ({
     // expected-error @+1 {{operand #0 does not dominate this use}}
     "foo.use" (%def) : (i32) -> ()
     "foo.yield" () : () -> ()
@@ -1121,7 +1121,7 @@ func @invalid_region_dominance() {
 
 func @invalid_region_dominance() {
   // expected-note @+1 {{operand defined here}}
-  %def = "foo.region_with_def"() ({
+  %def = "test.ssacfg_region"() ({
     // expected-error @+1 {{operand #0 does not dominate this use}}
     "foo.use" (%def) : (i32) -> ()
     "foo.yield" () : () -> ()
@@ -1534,7 +1534,7 @@ func @dominance_error_in_unreachable_op() -> i1 {
   %c = constant false
   return %c : i1
 ^bb0:
-  "dummy" () ({  // unreachable
+  "test.ssacfg_region" () ({ // unreachable
     ^bb1:
 // expected-error @+1 {{operand #0 does not dominate this use}}
       %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
@@ -1546,3 +1546,19 @@ func @dominance_error_in_unreachable_op() -> i1 {
   }) : () -> ()
   return %c : i1
 }
+
+// -----
+
+func @invalid_region_dominance_with_dominance_free_regions() {
+  test.graph_region {
+    "foo.use" (%1) : (i32) -> ()
+    "foo.region"() ({
+      %1 = constant 0 : i32  // This value is used outside of the region.
+      "foo.yield" () : () -> ()
+    }, {
+      // expected-error @+1 {{expected operation name in quotes}}
+      %2 = constant 1 i32  // Syntax error causes region deletion.
+    }) : () -> ()
+  }
+  return
+}
diff --git a/mlir/test/IR/parser.mlir b/mlir/test/IR/parser.mlir
index 1a59b53cf1000..93db23fd5d0db 100644
--- a/mlir/test/IR/parser.mlir
+++ b/mlir/test/IR/parser.mlir
@@ -1244,16 +1244,131 @@ func @pretty_names() {
   return
 }
 
+// CHECK-LABEL: func @unreachable_dominance_violation_ok
 func @unreachable_dominance_violation_ok() -> i1 {
-  %c = constant false       // CHECK: [[VAL:%.*]] = constant false
-  return %c : i1    // CHECK:   return [[VAL]] : i1
-^bb1:         // CHECK: ^bb1:   // no predecessors
+// CHECK:   [[VAL:%.*]] = constant false
+// CHECK:   return [[VAL]] : i1
+// CHECK: ^bb1:   // no predecessors
+// CHECK:   [[VAL2:%.*]]:3 = "bar"([[VAL3:%.*]]) : (i64) -> (i1, i1, i1)
+// CHECK:   br ^bb3
+// CHECK: ^bb2:   // pred: ^bb2
+// CHECK:   br ^bb2
+// CHECK: ^bb3:   // pred: ^bb1
+// CHECK:   [[VAL3]] = "foo"() : () -> i64
+// CHECK:   return [[VAL2]]#1 : i1
+// CHECK: }
+  %c = constant false
+  return %c : i1
+^bb1:
   // %1 is not dominated by it's definition, but block is not reachable.
-  %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1) // CHECK: [[VAL2:%.*]]:3 = "bar"([[VAL3:%.*]]) : (i64) -> (i1, i1, i1)
-  br ^bb4     // CHECK:   br ^bb3
-^bb2:         // CHECK: ^bb2:   // pred: ^bb2
-  br ^bb2     // CHECK:   br ^bb2
-^bb4:         // CHECK: ^bb3:   // pred: ^bb1
-  %1 = "foo"() : ()->i64 // CHECK: [[VAL3]] = "foo"() : () -> i64
-  return %2#1 : i1 // CHECK: return [[VAL2]]#1 : i1
-}            // CHECK: }
+  %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
+  br ^bb3
+^bb2:
+  br ^bb2
+^bb3:
+  %1 = "foo"() : ()->i64
+  return %2#1 : i1
+}
+
+// CHECK-LABEL: func @graph_region_in_hierarchy_ok
+func @graph_region_in_hierarchy_ok() -> i64 {
+// CHECK:   br ^bb2
+// CHECK: ^bb1:
+// CHECK:   test.graph_region {
+// CHECK:     [[VAL2:%.*]]:3 = "bar"([[VAL3:%.*]]) : (i64) -> (i1, i1, i1)
+// CHECK:   }
+// CHECK:   br ^bb3
+// CHECK: ^bb2:   // pred: ^bb0
+// CHECK:   [[VAL3]] = "foo"() : () -> i64
+// CHECK:   br ^bb1
+// CHECK: ^bb3:   // pred: ^bb1
+// CHECK:   return [[VAL3]] : i64
+// CHECK: }
+  br ^bb2
+^bb1:
+  test.graph_region {
+    // %1 is well-defined here, since bb2 dominates bb1.
+    %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
+  }
+  br ^bb4
+^bb2:
+  %1 = "foo"() : ()->i64
+  br ^bb1
+^bb4:
+  return %1 : i64
+}
+
+// CHECK-LABEL: func @graph_region_kind
+func @graph_region_kind() -> () {
+// CHECK: [[VAL2:%.*]]:3 = "bar"([[VAL3:%.*]]) : (i64) -> (i1, i1, i1)
+// CHECK: [[VAL3]] = "baz"([[VAL2]]#0) : (i1) -> i64
+  test.graph_region {
+    // %1 OK here in in graph region.
+    %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
+    %1 = "baz"(%2#0) : (i1) -> (i64)
+  }
+  return
+}
+
+// CHECK-LABEL: func @graph_region_inside_ssacfg_region
+func @graph_region_inside_ssacfg_region() -> () {
+// CHECK: "test.ssacfg_region"
+// CHECK:   [[VAL3:%.*]] = "baz"() : () -> i64
+// CHECK:   test.graph_region {
+// CHECK:     [[VAL2:%.*]]:3 = "bar"([[VAL3]]) : (i64) -> (i1, i1, i1)
+// CHECK:   }
+// CHECK:   [[VAL4:.*]] = "baz"() : () -> i64
+  "test.ssacfg_region"() ({
+    %1 = "baz"() : () -> (i64)
+    test.graph_region {
+      %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
+    }
+    %3 = "baz"() : () -> (i64)
+  }) : () -> ()
+  return
+}
+
+// CHECK-LABEL: func @graph_region_in_graph_region_ok
+func @graph_region_in_graph_region_ok() -> () {
+// CHECK: test.graph_region {
+// CHECK:   test.graph_region {
+// CHECK:     [[VAL2:%.*]]:3 = "bar"([[VAL3:%.*]]) : (i64) -> (i1, i1, i1)
+// CHECK:   }
+// CHECK:   [[VAL3]] = "foo"() : () -> i64
+// CHECK: }
+test.graph_region {
+    test.graph_region {
+    // %1 is well-defined here since defined in graph region
+      %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
+    }
+    %1 = "foo"() : ()->i64
+    "test.terminator"() : ()->()
+  }
+  return
+}
+
+// CHECK: test.graph_region {
+test.graph_region {
+// CHECK:   [[VAL1:%.*]] = "op1"([[VAL3:%.*]]) : (i32) -> i32
+// CHECK:   [[VAL2:%.*]] = "test.ssacfg_region"([[VAL1]], [[VAL2]], [[VAL3]], [[VAL4:%.*]]) ( {
+// CHECK:     [[VAL5:%.*]] = "op2"([[VAL1]], [[VAL2]], [[VAL3]], [[VAL4]]) : (i32, i32, i32, i32) -> i32
+// CHECK:   }) : (i32, i32, i32, i32) -> i32
+// CHECK:   [[VAL3]] = "op2"([[VAL1]], [[VAL4]]) : (i32, i32) -> i32
+// CHECK:   [[VAL4]] = "op3"([[VAL1]]) : (i32) -> i32
+  %1 = "op1"(%3) : (i32) -> (i32)
+  %2 = "test.ssacfg_region"(%1, %2, %3, %4) ({
+    %5 = "op2"(%1, %2, %3, %4) :
+	 (i32, i32, i32, i32) -> (i32)
+  }) : (i32, i32, i32, i32) -> (i32)
+  %3 = "op2"(%1, %4) : (i32, i32) -> (i32)
+  %4 = "op3"(%1) : (i32) -> (i32)
+}
+
+// CHECK: "unregistered_func_might_have_graph_region"() ( {
+// CHECK: [[VAL1:%.*]] = "foo"([[VAL1]], [[VAL2:%.*]]) : (i64, i64) -> i64
+// CHECK: [[VAL2]] = "bar"([[VAL1]])
+"unregistered_func_might_have_graph_region"() ( {
+  %1 = "foo"(%1, %2) : (i64, i64) -> i64
+  %2 = "bar"(%1) : (i64) -> i64
+  "unregistered_terminator"() : () -> ()
+}) {sym_name = "unregistered_op_dominance_violation_ok", type = () -> i1} : () -> ()
diff --git a/mlir/test/IR/traits.mlir b/mlir/test/IR/traits.mlir
index a08a22ce1adcd..206881a1c7678 100644
--- a/mlir/test/IR/traits.mlir
+++ b/mlir/test/IR/traits.mlir
@@ -383,3 +383,82 @@ func @succeededResultSizeAttr() {
   %0:4 = "test.attr_sized_results"() {result_segment_sizes = dense<[0, 2, 1, 1]>: vector<4xi32>} : () -> (i32, i32, i32, i32)
   return
 }
+
+// -----
+
+func @failedHasDominanceScopeOutsideDominanceFreeScope() -> () {
+  "test.ssacfg_region"() ({
+    test.graph_region {
+      // expected-error @+1 {{operand #0 does not dominate this use}}
+      %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
+    }
+    // expected-note @+1 {{operand defined here}}
+    %1 = "baz"() : () -> (i64)
+  }) : () -> ()
+  return
+}
+
+// -----
+
+// Ensure that SSACFG regions of operations in GRAPH regions are
+// checked for dominance
+func @illegalInsideDominanceFreeScope() -> () {
+  test.graph_region {
+    func @test() -> i1 {
+    ^bb1:
+      // expected-error @+1 {{operand #0 does not dominate this use}}
+      %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
+      // expected-note @+1 {{operand defined here}}
+	   %1 = "baz"(%2#0) : (i1) -> (i64)
+      return %2#1 : i1
+    }
+    "terminator"() : () -> ()
+  }
+  return
+}
+
+// -----
+
+// Ensure that SSACFG regions of operations in GRAPH regions are
+// checked for dominance
+func @illegalCDFGInsideDominanceFreeScope() -> () {
+  test.graph_region {
+    func @test() -> i1 {
+    ^bb1:
+      // expected-error @+1 {{operand #0 does not dominate this use}}
+      %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
+      br ^bb4
+    ^bb2:
+      br ^bb2
+    ^bb4:
+      %1 = "foo"() : ()->i64   // expected-note {{operand defined here}}
+		return %2#1 : i1
+    }
+     "terminator"() : () -> ()
+  }
+  return
+}
+
+// -----
+
+// Ensure that GRAPH regions still have all values defined somewhere.
+func @illegalCDFGInsideDominanceFreeScope() -> () {
+  test.graph_region {
+    // expected-error @+1 {{use of undeclared SSA value name}}
+    %2:3 = "bar"(%1) : (i64) -> (i1,i1,i1)
+    "terminator"() : () -> ()
+  }
+  return
+}
+
+// -----
+
+func @graph_region_cant_have_blocks() {
+  test.graph_region {
+    // expected-error@-1 {{'test.graph_region' op expects graph region #0 to have 0 or 1 blocks}}
+  ^bb42:
+    br ^bb43
+  ^bb43:
+    "terminator"() : () -> ()
+  }
+}
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.cpp b/mlir/test/lib/Dialect/Test/TestDialect.cpp
index 934e30d23fcf7..47aa86c45cc64 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.cpp
+++ b/mlir/test/lib/Dialect/Test/TestDialect.cpp
@@ -236,6 +236,34 @@ static void print(OpAsmPrinter &p, IsolatedRegionOp op) {
   p.printRegion(op.region(), /*printEntryBlockArgs=*/false);
 }
 
+//===----------------------------------------------------------------------===//
+// Test SSACFGRegionOp
+//===----------------------------------------------------------------------===//
+
+RegionKind SSACFGRegionOp::getRegionKind(unsigned index) {
+  return RegionKind::SSACFG;
+}
+
+//===----------------------------------------------------------------------===//
+// Test GraphRegionOp
+//===----------------------------------------------------------------------===//
+
+static ParseResult parseGraphRegionOp(OpAsmParser &parser,
+                                      OperationState &result) {
+  // Parse the body region, and reuse the operand info as the argument info.
+  Region *body = result.addRegion();
+  return parser.parseRegion(*body, /*arguments=*/{}, /*argTypes=*/{});
+}
+
+static void print(OpAsmPrinter &p, GraphRegionOp op) {
+  p << "test.graph_region ";
+  p.printRegion(op.region(), /*printEntryBlockArgs=*/false);
+}
+
+RegionKind GraphRegionOp::getRegionKind(unsigned index) {
+  return RegionKind::Graph;
+}
+
 //===----------------------------------------------------------------------===//
 // Test AffineScopeOp
 //===----------------------------------------------------------------------===//
@@ -368,7 +396,7 @@ OpFoldResult TestOpInPlaceFold::fold(ArrayRef<Attribute> operands) {
   return {};
 }
 
-LogicalResult mlir::OpWithInferTypeInterfaceOp::inferReturnTypes(
+LogicalResult OpWithInferTypeInterfaceOp::inferReturnTypes(
     MLIRContext *, Optional<Location> location, ValueRange operands,
     DictionaryAttr attributes, RegionRange regions,
     SmallVectorImpl<Type> &inferredReturnTypes) {
@@ -401,7 +429,7 @@ LogicalResult OpWithShapedTypeInferTypeInterfaceOp::inferReturnTypeComponents(
 LogicalResult OpWithShapedTypeInferTypeInterfaceOp::reifyReturnTypeShapes(
     OpBuilder &builder, llvm::SmallVectorImpl<Value> &shapes) {
   shapes = SmallVector<Value, 1>{
-      builder.createOrFold<mlir::DimOp>(getLoc(), getOperand(0), 0)};
+      builder.createOrFold<DimOp>(getLoc(), getOperand(0), 0)};
   return success();
 }
 
@@ -608,7 +636,7 @@ void RegionIfOp::getSuccessorRegions(
 //===----------------------------------------------------------------------===//
 
 // Static initialization for Test dialect registration.
-static mlir::DialectRegistration<mlir::TestDialect> testDialect;
+static DialectRegistration<TestDialect> testDialect;
 
 #include "TestOpEnums.cpp.inc"
 #include "TestOpStructs.cpp.inc"
diff --git a/mlir/test/lib/Dialect/Test/TestDialect.h b/mlir/test/lib/Dialect/Test/TestDialect.h
index d25b089f68fed..fd1914cbc6245 100644
--- a/mlir/test/lib/Dialect/Test/TestDialect.h
+++ b/mlir/test/lib/Dialect/Test/TestDialect.h
@@ -18,6 +18,7 @@
 #include "mlir/IR/Dialect.h"
 #include "mlir/IR/OpDefinition.h"
 #include "mlir/IR/OpImplementation.h"
+#include "mlir/IR/RegionKindInterface.h"
 #include "mlir/IR/StandardTypes.h"
 #include "mlir/IR/SymbolTable.h"
 #include "mlir/Interfaces/CallInterfaces.h"
diff --git a/mlir/test/lib/Dialect/Test/TestOps.td b/mlir/test/lib/Dialect/Test/TestOps.td
index e73c7c3f3230a..528d219f70c74 100644
--- a/mlir/test/lib/Dialect/Test/TestOps.td
+++ b/mlir/test/lib/Dialect/Test/TestOps.td
@@ -12,6 +12,7 @@
 include "mlir/Dialect/Affine/IR/AffineOpsBase.td"
 include "mlir/IR/OpBase.td"
 include "mlir/IR/OpAsmInterface.td"
+include "mlir/IR/RegionKindInterface.td"
 include "mlir/IR/SymbolInterfaces.td"
 include "mlir/Interfaces/SideEffectInterfaces.td"
 include "mlir/Interfaces/CallInterfaces.td"
@@ -1188,6 +1189,30 @@ def IsolatedRegionOp : TEST_Op<"isolated_region", [IsolatedFromAbove]> {
   let printer = [{ return ::print(p, *this); }];
 }
 
+def SSACFGRegionOp : TEST_Op<"ssacfg_region",  [
+    DeclareOpInterfaceMethods<RegionKindInterface>]> {
+  let summary =  "operation with an SSACFG region";
+  let description = [{
+    Test op that defines an SSACFG region.
+  }];
+
+  let regions = (region VariadicRegion<AnyRegion>:$regions);
+  let arguments = (ins Variadic<AnyType>);
+  let results = (outs Variadic<AnyType>);
+}
+
+def GraphRegionOp : TEST_Op<"graph_region",  [
+    DeclareOpInterfaceMethods<RegionKindInterface>]> {
+  let summary =  "operation with a graph region";
+  let description = [{
+    Test op that defines a graph region.
+  }];
+
+  let regions = (region AnyRegion:$region);
+  let parser = [{ return ::parse$cppClass(parser, result); }];
+  let printer = [{ return ::print(p, *this); }];
+}
+
 def AffineScopeOp : TEST_Op<"affine_scope", [AffineScope]> {
   let summary =  "affine scope operation";
   let description = [{

From 2815429d08fec06027c4adf81c0b438fb2c72ef0 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 00:27:36 +0300
Subject: [PATCH 426/771] [NFC][SimplifyCFG] HoistThenElseCodeToIf(): after
 hoisting terminator, do return Changed, not just true

Otherwise, if Changed was still false before that,
we would not account for that hoist in NumHoistCommonCode statistic.
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 94445fe0c4d88..5aa929fa1822f 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1422,6 +1422,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
     I2->replaceAllUsesWith(NT);
     NT->takeName(I1);
   }
+  Changed = true;
   ++NumHoistCommonInstrs;
 
   // Ensure terminator gets a debug location, even an unknown one, in case
@@ -1469,7 +1470,7 @@ bool SimplifyCFGOpt::HoistThenElseCodeToIf(BranchInst *BI,
     AddPredecessorToBlock(Succ, BIParent, BB1);
 
   EraseTerminatorAndDCECond(BI);
-  return true;
+  return Changed;
 }
 
 // Check lifetime markers.

From ed6b578040a85977026c93bf4188f996148f3218 Mon Sep 17 00:00:00 2001
From: Akira Hatanaka <ahatanaka@apple.com>
Date: Wed, 15 Jul 2020 14:47:45 -0700
Subject: [PATCH 427/771] [CodeGen] Emit a call instruction instead of an
 invoke if the called llvm function is marked nounwind

This fixes cases where an invoke is emitted, despite the called llvm
function being marked nounwind, because ConstructAttributeList failed to
add the attribute to the attribute list. llvm optimization passes turn
invokes into calls and optimize away the exception handling code, but
it's better to avoid emitting the code in the front-end if the called
function is known not to raise an exception.

Differential Revision: https://reviews.llvm.org/D83906
---
 clang/lib/CodeGen/CGCall.cpp                                 | 4 ++++
 clang/test/CodeGenCXX/debug-info-class.cpp                   | 2 +-
 clang/test/CodeGenObjCXX/arc-list-init-destruct.mm           | 2 ++
 clang/test/CodeGenObjCXX/os_log.mm                           | 5 +++--
 clang/test/OpenMP/atomic_codegen.cpp                         | 2 +-
 clang/test/OpenMP/critical_codegen.cpp                       | 2 +-
 .../OpenMP/distribute_parallel_for_num_threads_codegen.cpp   | 2 +-
 .../distribute_parallel_for_simd_num_threads_codegen.cpp     | 2 +-
 clang/test/OpenMP/for_codegen.cpp                            | 2 +-
 clang/test/OpenMP/for_simd_codegen.cpp                       | 4 ++--
 clang/test/OpenMP/master_codegen.cpp                         | 2 +-
 clang/test/OpenMP/parallel_for_codegen.cpp                   | 2 +-
 clang/test/OpenMP/parallel_for_simd_codegen.cpp              | 4 ++--
 clang/test/OpenMP/parallel_master_codegen.cpp                | 2 +-
 clang/test/OpenMP/parallel_num_threads_codegen.cpp           | 2 +-
 clang/test/OpenMP/parallel_sections_codegen.cpp              | 4 ++--
 clang/test/OpenMP/sections_codegen.cpp                       | 4 ++--
 clang/test/OpenMP/simd_codegen.cpp                           | 4 ++--
 clang/test/OpenMP/single_codegen.cpp                         | 2 +-
 clang/test/OpenMP/taskgroup_codegen.cpp                      | 2 +-
 20 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index e8235c775d8f5..3f27e1bb8955d 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -4841,6 +4841,10 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
   } else {
     // Otherwise, nounwind call sites will never throw.
     CannotThrow = Attrs.hasFnAttribute(llvm::Attribute::NoUnwind);
+
+    if (auto *FPtr = dyn_cast<llvm::Function>(CalleePtr))
+      if (FPtr->hasFnAttribute(llvm::Attribute::NoUnwind))
+        CannotThrow = true;
   }
 
   // If we made a temporary, be sure to clean up after ourselves. Note that we
diff --git a/clang/test/CodeGenCXX/debug-info-class.cpp b/clang/test/CodeGenCXX/debug-info-class.cpp
index b3e79c37923dd..94d5a0f1f0820 100644
--- a/clang/test/CodeGenCXX/debug-info-class.cpp
+++ b/clang/test/CodeGenCXX/debug-info-class.cpp
@@ -13,7 +13,7 @@ class B {
   virtual ~B();
 };
 
-B::~B() {
+B::~B() { extern void mayThrow(); mayThrow();
 }
 
 struct C {
diff --git a/clang/test/CodeGenObjCXX/arc-list-init-destruct.mm b/clang/test/CodeGenObjCXX/arc-list-init-destruct.mm
index 09a66458c2600..513af64d5203c 100644
--- a/clang/test/CodeGenObjCXX/arc-list-init-destruct.mm
+++ b/clang/test/CodeGenObjCXX/arc-list-init-destruct.mm
@@ -16,6 +16,8 @@ @interface Class0;
 };
 
 bool getBool() {
+  extern void mayThrow();
+  mayThrow();
   return false;
 }
 
diff --git a/clang/test/CodeGenObjCXX/os_log.mm b/clang/test/CodeGenObjCXX/os_log.mm
index b6e0bc25ca807..c9efe329b587b 100644
--- a/clang/test/CodeGenObjCXX/os_log.mm
+++ b/clang/test/CodeGenObjCXX/os_log.mm
@@ -6,13 +6,14 @@
   void release(int *lock);
 
   // CHECK-LABEL: define {{.*}} @_ZN13no_eh_cleanup3logERiPcS1_(
+  // CHECK: call void @__os_log_helper_1_2_2_4_0_8_34(
+
   void log(int &i, char *data, char *buf) {
       int lock __attribute__((cleanup(release)));
       __builtin_os_log_format(buf, "%d %{public}s", i, data);
   }
 
-  // An `invoke` of a `nounwind` callee is simplified to a direct
-  // call by an optimization in llvm. Just check that we emit `nounwind`.
+  // Check that the os_log_helper is marked `nounwind`.
   // CHECK: define {{.*}} @__os_log_helper_1_2_2_4_0_8_34({{.*}} [[NUW:#[0-9]+]]
 }
 
diff --git a/clang/test/OpenMP/atomic_codegen.cpp b/clang/test/OpenMP/atomic_codegen.cpp
index 4377213e6d824..47371fe912aa3 100644
--- a/clang/test/OpenMP/atomic_codegen.cpp
+++ b/clang/test/OpenMP/atomic_codegen.cpp
@@ -82,7 +82,7 @@ void parallel_atomic_ewc() {
     }
 }
 
-int &foo() { return a; }
+int &foo() { extern void mayThrow(); mayThrow(); return a; }
 
 // TERM_DEBUG-LABEL: parallel_atomic
 void parallel_atomic() {
diff --git a/clang/test/OpenMP/critical_codegen.cpp b/clang/test/OpenMP/critical_codegen.cpp
index f49c9cc9c21a4..4b2566bbf364b 100644
--- a/clang/test/OpenMP/critical_codegen.cpp
+++ b/clang/test/OpenMP/critical_codegen.cpp
@@ -22,7 +22,7 @@
 
 // ALL:       define {{.*}}void [[FOO:@.+]]()
 
-void foo() {}
+void foo() { extern void mayThrow(); mayThrow(); }
 
 // ALL-LABEL: @main
 // TERM_DEBUG-LABEL: @main
diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
index 8d941391c75b4..953d52b35f9b1 100644
--- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
@@ -22,7 +22,7 @@ void foo();
 struct S {
   intptr_t a, b, c;
   S(intptr_t a) : a(a) {}
-  operator char() { return a; }
+  operator char() { extern void mayThrow(); mayThrow(); return a; }
   ~S() {}
 };
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
index 318fc1401963c..69b833eaed241 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -22,7 +22,7 @@ void foo();
 struct S {
   intptr_t a, b, c;
   S(intptr_t a) : a(a) {}
-  operator char() { return a; }
+  operator char() {  extern void mayThrow(); mayThrow(); return a; }
   ~S() {}
 };
 
diff --git a/clang/test/OpenMP/for_codegen.cpp b/clang/test/OpenMP/for_codegen.cpp
index 26b09c574f3c7..71e481b3cd78b 100644
--- a/clang/test/OpenMP/for_codegen.cpp
+++ b/clang/test/OpenMP/for_codegen.cpp
@@ -536,7 +536,7 @@ void test_precond() {
 }
 
 // TERM_DEBUG-LABEL: foo
-int foo() {return 0;};
+int foo() { extern void mayThrow(); mayThrow(); return 0;};
 
 // TERM_DEBUG-LABEL: parallel_for
 void parallel_for(float *a) {
diff --git a/clang/test/OpenMP/for_simd_codegen.cpp b/clang/test/OpenMP/for_simd_codegen.cpp
index a668cb77a0683..5bb9811bcedf4 100644
--- a/clang/test/OpenMP/for_simd_codegen.cpp
+++ b/clang/test/OpenMP/for_simd_codegen.cpp
@@ -20,7 +20,7 @@
 #ifndef HEADER
 #define HEADER
 
-long long get_val() { return 0; }
+long long get_val() { extern void mayThrow(); mayThrow(); return 0; }
 double *g_ptr;
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
@@ -785,7 +785,7 @@ void widened(float *a, float *b, float *c, float *d) {
 }
 
 // TERM_DEBUG-LABEL: bar
-int bar() {return 0;};
+int bar() { extern void mayThrow(); mayThrow(); return 0; };
 
 // TERM_DEBUG-LABEL: parallel_simd
 void parallel_simd(float *a) {
diff --git a/clang/test/OpenMP/master_codegen.cpp b/clang/test/OpenMP/master_codegen.cpp
index 9a33f2f53b0d7..8554ad8e7deca 100644
--- a/clang/test/OpenMP/master_codegen.cpp
+++ b/clang/test/OpenMP/master_codegen.cpp
@@ -19,7 +19,7 @@
 
 // ALL:       define {{.*}}void [[FOO:@.+]]()
 
-void foo() {}
+void foo() { extern void mayThrow(); mayThrow(); }
 
 // ALL-LABEL: @main
 // TERM_DEBUG-LABEL: @main
diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp
index de445634470bb..4ef6e8228808f 100644
--- a/clang/test/OpenMP/parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_codegen.cpp
@@ -372,7 +372,7 @@ void runtime(float *a, float *b, float *c, float *d) {
 }
 
 // TERM_DEBUG-LABEL: foo
-int foo() {return 0;};
+int foo() { extern void mayThrow(); mayThrow(); return 0; };
 
 // TERM_DEBUG-LABEL: parallel_for
 // CLEANUP: parallel_for
diff --git a/clang/test/OpenMP/parallel_for_simd_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_codegen.cpp
index e9cc2f302eafc..715328771ccce 100644
--- a/clang/test/OpenMP/parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/parallel_for_simd_codegen.cpp
@@ -22,7 +22,7 @@
 #ifndef HEADER
 #define HEADER
 
-long long get_val() { return 0; }
+long long get_val() { extern void mayThrow(); mayThrow(); return 0; }
 double *g_ptr;
 
 // CHECK-LABEL: define {{.*void}} @{{.*}}simple{{.*}}(float* {{.+}}, float* {{.+}}, float* {{.+}}, float* {{.+}})
@@ -801,7 +801,7 @@ for (int i = 0; i < 10; ++i);
 // OMP50-DAG: ![[NOVM]] = !{!"llvm.loop.vectorize.enable", i1 false}
 
 // TERM_DEBUG-LABEL: bar
-int bar() {return 0;};
+int bar() { extern void mayThrow(); mayThrow(); return 0; };
 
 // TERM_DEBUG-LABEL: parallel_simd
 void parallel_simd(float *a) {
diff --git a/clang/test/OpenMP/parallel_master_codegen.cpp b/clang/test/OpenMP/parallel_master_codegen.cpp
index 82e18c80f103e..98993e05c8530 100644
--- a/clang/test/OpenMP/parallel_master_codegen.cpp
+++ b/clang/test/OpenMP/parallel_master_codegen.cpp
@@ -18,7 +18,7 @@
 // CK1-DAG: [[DEF_LOC:@.+]] = private unnamed_addr global %struct.ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* [[STR]], i32 0, i32 0) }
 
 // CK1-LABEL: foo
-void foo() {}
+void foo() { extern void mayThrow(); mayThrow(); }
 
 void parallel_master() {
 #pragma omp parallel master
diff --git a/clang/test/OpenMP/parallel_num_threads_codegen.cpp b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
index 79615b9341687..47109ffc7af37 100644
--- a/clang/test/OpenMP/parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/parallel_num_threads_codegen.cpp
@@ -22,7 +22,7 @@ void foo();
 struct S {
   intptr_t a, b, c;
   S(intptr_t a) : a(a) {}
-  operator char() { return a; }
+  operator char() { extern void mayThrow(); mayThrow(); return a; }
   ~S() {}
 };
 
diff --git a/clang/test/OpenMP/parallel_sections_codegen.cpp b/clang/test/OpenMP/parallel_sections_codegen.cpp
index eadc4937203a3..bee078050256f 100644
--- a/clang/test/OpenMP/parallel_sections_codegen.cpp
+++ b/clang/test/OpenMP/parallel_sections_codegen.cpp
@@ -10,9 +10,9 @@
 #ifndef HEADER
 #define HEADER
 // CHECK-LABEL: foo
-void foo() {};
+void foo() { extern void mayThrow(); mayThrow(); };
 // CHECK-LABEL: bar
-void bar() {};
+void bar() { extern void mayThrow(); mayThrow(); };
 
 template <class T>
 T tmain() {
diff --git a/clang/test/OpenMP/sections_codegen.cpp b/clang/test/OpenMP/sections_codegen.cpp
index 68fd38f7d0bba..d33e79238459f 100644
--- a/clang/test/OpenMP/sections_codegen.cpp
+++ b/clang/test/OpenMP/sections_codegen.cpp
@@ -12,9 +12,9 @@
 // CHECK-DAG: [[IMPLICIT_BARRIER_SECTIONS_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 194, i32 0, i32 0, i8*
 // CHECK-DAG: [[SECTIONS_LOC:@.+]] = private unnamed_addr global %{{.+}} { i32 0, i32 1026, i32 0, i32 0, i8*
 // CHECK-LABEL: foo
-void foo() {};
+void foo() { extern void mayThrow(); mayThrow(); };
 // CHECK-LABEL: bar
-void bar() {};
+void bar() { extern void mayThrow(); mayThrow(); };
 
 template <class T>
 T tmain() {
diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp
index 3440225673c4d..335dfd78cacea 100644
--- a/clang/test/OpenMP/simd_codegen.cpp
+++ b/clang/test/OpenMP/simd_codegen.cpp
@@ -26,7 +26,7 @@
 // OMP50-DAG: [[LAST_IV:@.+]] = {{.*}}common global i64 0
 // OMP50-DAG: [[LAST_A:@.+]] = {{.*}}common global i32 0
 
-long long get_val() { return 0; }
+long long get_val() { extern void mayThrow(); mayThrow(); return 0; }
 double *g_ptr;
 
 struct S {
@@ -798,7 +798,7 @@ void bartfoo() {
 
 #endif // OMP5
 // TERM_DEBUG-LABEL: bar
-int bar() {return 0;};
+int bar() { extern void mayThrow(); mayThrow(); return 0; };
 
 // TERM_DEBUG-LABEL: parallel_simd
 void parallel_simd(float *a) {
diff --git a/clang/test/OpenMP/single_codegen.cpp b/clang/test/OpenMP/single_codegen.cpp
index a56cdb0ae81a4..1d88c2808ed29 100644
--- a/clang/test/OpenMP/single_codegen.cpp
+++ b/clang/test/OpenMP/single_codegen.cpp
@@ -42,7 +42,7 @@ TestClass tc;
 TestClass tc2[2];
 #pragma omp threadprivate(tc, tc2)
 
-void foo() {}
+void foo() { extern void mayThrow(); mayThrow(); }
 
 struct SS {
   int a;
diff --git a/clang/test/OpenMP/taskgroup_codegen.cpp b/clang/test/OpenMP/taskgroup_codegen.cpp
index f672ab17fd59a..31ecb80b1a20c 100644
--- a/clang/test/OpenMP/taskgroup_codegen.cpp
+++ b/clang/test/OpenMP/taskgroup_codegen.cpp
@@ -16,7 +16,7 @@
 
 // CHECK:       define {{.*}}void [[FOO:@.+]]()
 
-void foo() {}
+void foo() { extern void mayThrow(); mayThrow(); }
 
 // CHECK-LABEL: @main
 // TERM_DEBUG-LABEL: @main

From fa5e4482e01f7eceae52529edb75e754b601e8d0 Mon Sep 17 00:00:00 2001
From: Tim Keith <tkeith@nvidia.com>
Date: Wed, 15 Jul 2020 15:08:07 -0700
Subject: [PATCH 428/771] [flang] Don't use-associate intrinsics

When an intrinsic is referenced in a module scope, a symbol for it is
added. When that module is USEd, the intrinsic should not be included.
Otherwise we can get ambiguous reference errors with the same intrinsic
coming from two difference modules.

Differential Revision: https://reviews.llvm.org/D83905
---
 flang/lib/Semantics/resolve-names.cpp |  1 +
 flang/test/Semantics/modfile30.f90    |  3 ---
 flang/test/Semantics/resolve14.f90    | 28 ++++++++++++++++++---------
 3 files changed, 20 insertions(+), 12 deletions(-)

diff --git a/flang/lib/Semantics/resolve-names.cpp b/flang/lib/Semantics/resolve-names.cpp
index f0556ce7e930a..73d111ca3c093 100644
--- a/flang/lib/Semantics/resolve-names.cpp
+++ b/flang/lib/Semantics/resolve-names.cpp
@@ -2310,6 +2310,7 @@ void ModuleVisitor::Post(const parser::UseStmt &x) {
     }
     for (const auto &[name, symbol] : *useModuleScope_) {
       if (symbol->attrs().test(Attr::PUBLIC) &&
+          !symbol->attrs().test(Attr::INTRINSIC) &&
           !symbol->detailsIf<MiscDetails>()) {
         if (useNames.count(name) == 0) {
           auto *localSymbol{FindInScope(currScope(), name)};
diff --git a/flang/test/Semantics/modfile30.f90 b/flang/test/Semantics/modfile30.f90
index 01c60d5a39900..dba950c2737aa 100644
--- a/flang/test/Semantics/modfile30.f90
+++ b/flang/test/Semantics/modfile30.f90
@@ -42,7 +42,6 @@ module m2
 ! type(t),parameter::a=t()
 !end
 
-! Don't write out intrinsics
 module m3a
   integer, parameter :: i4 = selected_int_kind(9)
 end
@@ -60,7 +59,6 @@ module m3b
 !Expect: m3b.mod
 !module m3b
 ! use m3a,only:i4
-! use m3a,only:selected_int_kind
 ! integer(4)::j
 !end
 
@@ -82,7 +80,6 @@ module m4b
 !Expect: m4b.mod
 !module m4b
 ! use m4a,only:a
-! use m4a,only:achar
 ! character(1_4,1),parameter::b="\001"
 !end
 
diff --git a/flang/test/Semantics/resolve14.f90 b/flang/test/Semantics/resolve14.f90
index 826e0da1c758a..44ece0b186440 100644
--- a/flang/test/Semantics/resolve14.f90
+++ b/flang/test/Semantics/resolve14.f90
@@ -3,20 +3,30 @@ module m1
   integer :: x
   integer :: y
   integer :: z
+  integer, parameter :: k1 = selected_int_kind(9)
 end
 module m2
   real :: y
   real :: z
   real :: w
+  integer, parameter :: k2 = selected_int_kind(9)
 end
 
-use m1, xx => x, y => z
-use m2
-volatile w
-!ERROR: Cannot change CONTIGUOUS attribute on use-associated 'w'
-contiguous w
-!ERROR: 'z' is use-associated from module 'm2' and cannot be re-declared
-integer z
-!ERROR: Reference to 'y' is ambiguous
-y = 1
+program p1
+  use m1
+  use m2
+  ! check that selected_int_kind is not use-associated
+  integer, parameter :: k = selected_int_kind(9)
+end
+
+program p2
+  use m1, xx => x, y => z
+  use m2
+  volatile w
+  !ERROR: Cannot change CONTIGUOUS attribute on use-associated 'w'
+  contiguous w
+  !ERROR: 'z' is use-associated from module 'm2' and cannot be re-declared
+  integer z
+  !ERROR: Reference to 'y' is ambiguous
+  y = 1
 end

From 3c2a56a857227b6bc39285747269f02cd7a9dbe5 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 15 Jul 2020 15:14:46 -0700
Subject: [PATCH 429/771] [X86] Teach assembler parser to accept lsl and lar
 with a 64 or 32 source register when the destination is a 64 register.

Previously we only accepted a 32-bit source with a 64-bit dest.

Accepting 64-bit as well is more consistent with gas behavior. I
think maybe we should accept 16 bit register as well, but I'm not
sure.
---
 llvm/lib/Target/X86/X86InstrSystem.td | 4 ++--
 llvm/test/MC/X86/I286-64.s            | 8 ++++++++
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index c23bc7ebbf702..d5f10646d80a4 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -223,7 +223,7 @@ def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
 let mayLoad = 1 in
 def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                  "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
-def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32orGR64:$src),
                  "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
 
 // i16mem operand in LSL32rm and GR32 operand in LSL32rr is not a typo.
@@ -245,7 +245,7 @@ def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
 let mayLoad = 1 in
 def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                  "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
-def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR32:$src),
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR32orGR64:$src),
                  "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
 
 def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
diff --git a/llvm/test/MC/X86/I286-64.s b/llvm/test/MC/X86/I286-64.s
index 73376de978875..1bab0a64f3e2b 100644
--- a/llvm/test/MC/X86/I286-64.s
+++ b/llvm/test/MC/X86/I286-64.s
@@ -32,6 +32,10 @@ larl %r13d, %r13d
 // CHECK: encoding: [0x44,0x0f,0x02,0x2a]        
 larl (%rdx), %r13d 
 
+// CHECK: larq %eax, %rax
+// CHECK: encoding: [0x48,0x0f,0x02,0xc0]        
+lar %rax, %rax
+
 // CHECK: lgdtq 485498096 
 // CHECK: encoding: [0x0f,0x01,0x14,0x25,0xf0,0x1c,0xf0,0x1c]         
 lgdtq 485498096 
@@ -164,6 +168,10 @@ lsll %r13d, %r13d
 // CHECK: encoding: [0x44,0x0f,0x03,0x2a]        
 lsll (%rdx), %r13d 
 
+// CHECK: lslq %eax, %rax
+// CHECK: encoding: [0x48,0x0f,0x03,0xc0]
+lsl %rax, %rax
+
 // CHECK: ltrw 485498096 
 // CHECK: encoding: [0x0f,0x00,0x1c,0x25,0xf0,0x1c,0xf0,0x1c]         
 ltrw 485498096 

From af19b1ceefce48534c138e223ba7cb1bfc9a02f8 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 00:48:36 +0300
Subject: [PATCH 430/771] [NFCI] CFGSimplifyPass: change (the only) constructor
 to take SimplifyCFGOptions

Taking that long list of parameters is already simply unmaintainable.
---
 .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 39 +++++++++----------
 1 file changed, 19 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 2e459c9a64d44..4187d5b55adf4 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -247,33 +247,27 @@ struct CFGSimplifyPass : public FunctionPass {
   SimplifyCFGOptions Options;
   std::function<bool(const Function &)> PredicateFtor;
 
-  CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false,
-                  bool ConvertSwitch = false, bool KeepLoops = true,
-                  bool SinkCommon = false,
+  CFGSimplifyPass(SimplifyCFGOptions Options_ = SimplifyCFGOptions(),
                   std::function<bool(const Function &)> Ftor = nullptr)
-      : FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
+      : FunctionPass(ID), Options(Options_), PredicateFtor(std::move(Ftor)) {
 
     initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
 
     // Check for command-line overrides of options for debug/customization.
-    Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences()
-                                    ? UserBonusInstThreshold
-                                    : Threshold;
+    if (UserBonusInstThreshold.getNumOccurrences())
+      Options.BonusInstThreshold = UserBonusInstThreshold;
 
-    Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences()
-                                         ? UserForwardSwitchCond
-                                         : ForwardSwitchCond;
+    if (UserForwardSwitchCond.getNumOccurrences())
+      Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
 
-    Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences()
-                                             ? UserSwitchToLookup
-                                             : ConvertSwitch;
+    if (UserSwitchToLookup.getNumOccurrences())
+      Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
 
-    Options.NeedCanonicalLoop =
-        UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops;
+    if (UserKeepLoops.getNumOccurrences())
+      Options.NeedCanonicalLoop = UserKeepLoops;
 
-    Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
-                                  ? UserSinkCommonInsts
-                                  : SinkCommon;
+    if (UserSinkCommonInsts.getNumOccurrences())
+      Options.SinkCommonInsts = UserSinkCommonInsts;
   }
 
   bool runOnFunction(Function &F) override {
@@ -314,6 +308,11 @@ llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
                                   bool ConvertSwitch, bool KeepLoops,
                                   bool SinkCommon,
                                   std::function<bool(const Function &)> Ftor) {
-  return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch,
-                             KeepLoops, SinkCommon, std::move(Ftor));
+  return new CFGSimplifyPass(SimplifyCFGOptions()
+                                 .bonusInstThreshold(Threshold)
+                                 .forwardSwitchCondToPhi(ForwardSwitchCond)
+                                 .convertSwitchToLookupTable(ConvertSwitch)
+                                 .needCanonicalLoops(KeepLoops)
+                                 .sinkCommonInsts(SinkCommon),
+                             std::move(Ftor));
 }

From b2018198c32a0535bb1f5bb5b40fbcf50d8d47b7 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 01:19:17 +0300
Subject: [PATCH 431/771] [NFCI] createCFGSimplificationPass(): migrate to also
 take SimplifyCFGOptions

Taking so many parameters is simply unmaintainable.

We don't want to include the entire llvm/Transforms/Utils/Local.h into
llvm/Transforms/Scalar.h so i've split SimplifyCFGOptions into
it's own header.
---
 llvm/include/llvm/Transforms/Scalar.h         |  4 +-
 .../llvm/Transforms/Scalar/SimplifyCFG.h      |  2 +-
 .../Transforms/Scalar/SimplifyCFGOptions.h    | 86 +++++++++++++++++++
 llvm/include/llvm/Transforms/Utils/Local.h    | 68 +--------------
 .../Target/AArch64/AArch64TargetMachine.cpp   |  6 +-
 llvm/lib/Target/ARM/ARMTargetMachine.cpp      |  2 +-
 .../Target/Hexagon/HexagonTargetMachine.cpp   |  6 +-
 .../lib/Transforms/IPO/PassManagerBuilder.cpp |  6 +-
 llvm/lib/Transforms/Scalar/Scalar.cpp         |  2 +-
 .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 13 +--
 10 files changed, 110 insertions(+), 85 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h

diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index a1aacec769794..19d158a2a1b5c 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_H
 #define LLVM_TRANSFORMS_SCALAR_H
 
+#include "llvm/Transforms/Scalar/SimplifyCFGOptions.h"
 #include <functional>
 
 namespace llvm {
@@ -256,8 +257,7 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1);
 // simplify terminator instructions, convert switches to lookup tables, etc.
 //
 FunctionPass *createCFGSimplificationPass(
-    unsigned Threshold = 1, bool ForwardSwitchCond = false,
-    bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false,
+    SimplifyCFGOptions Options = SimplifyCFGOptions(),
     std::function<bool(const Function &)> Ftor = nullptr);
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
index f9792d38bbe6b..026c183ec891c 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
 #define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
 
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Scalar/SimplifyCFGOptions.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h
new file mode 100644
index 0000000000000..42df3af5d7477
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h
@@ -0,0 +1,86 @@
+//===- SimplifyCFGOptions.h - Control structure for SimplifyCFG -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A set of parameters used to control the transforms in the SimplifyCFG pass.
+// Options may change depending on the position in the optimization pipeline.
+// For example, canonical form that includes switches and branches may later be
+// replaced by lookup tables and selects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFGOPTIONS_H
+#define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFGOPTIONS_H
+
+namespace llvm {
+
+class AssumptionCache;
+
+struct SimplifyCFGOptions {
+  int BonusInstThreshold;
+  bool ForwardSwitchCondToPhi;
+  bool ConvertSwitchToLookupTable;
+  bool NeedCanonicalLoop;
+  bool SinkCommonInsts;
+  bool SimplifyCondBranch;
+  bool FoldTwoEntryPHINode;
+
+  AssumptionCache *AC;
+
+  SimplifyCFGOptions(unsigned BonusThreshold = 1,
+                     bool ForwardSwitchCond = false,
+                     bool SwitchToLookup = false, bool CanonicalLoops = true,
+                     bool SinkCommon = false,
+                     AssumptionCache *AssumpCache = nullptr,
+                     bool SimplifyCondBranch = true,
+                     bool FoldTwoEntryPHINode = true)
+      : BonusInstThreshold(BonusThreshold),
+        ForwardSwitchCondToPhi(ForwardSwitchCond),
+        ConvertSwitchToLookupTable(SwitchToLookup),
+        NeedCanonicalLoop(CanonicalLoops), SinkCommonInsts(SinkCommon),
+        SimplifyCondBranch(SimplifyCondBranch),
+        FoldTwoEntryPHINode(FoldTwoEntryPHINode), AC(AssumpCache) {}
+
+  // Support 'builder' pattern to set members by name at construction time.
+  SimplifyCFGOptions &bonusInstThreshold(int I) {
+    BonusInstThreshold = I;
+    return *this;
+  }
+  SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) {
+    ForwardSwitchCondToPhi = B;
+    return *this;
+  }
+  SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
+    ConvertSwitchToLookupTable = B;
+    return *this;
+  }
+  SimplifyCFGOptions &needCanonicalLoops(bool B) {
+    NeedCanonicalLoop = B;
+    return *this;
+  }
+  SimplifyCFGOptions &sinkCommonInsts(bool B) {
+    SinkCommonInsts = B;
+    return *this;
+  }
+  SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
+    AC = Cache;
+    return *this;
+  }
+  SimplifyCFGOptions &setSimplifyCondBranch(bool B) {
+    SimplifyCondBranch = B;
+    return *this;
+  }
+
+  SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) {
+    FoldTwoEntryPHINode = B;
+    return *this;
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFGOPTIONS_H
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 3fab3bc21a078..3595dd627d5bb 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -30,6 +30,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Scalar/SimplifyCFGOptions.h"
 #include <cstdint>
 #include <limits>
 
@@ -58,73 +59,6 @@ class StoreInst;
 class TargetLibraryInfo;
 class TargetTransformInfo;
 
-/// A set of parameters used to control the transforms in the SimplifyCFG pass.
-/// Options may change depending on the position in the optimization pipeline.
-/// For example, canonical form that includes switches and branches may later be
-/// replaced by lookup tables and selects.
-struct SimplifyCFGOptions {
-  int BonusInstThreshold;
-  bool ForwardSwitchCondToPhi;
-  bool ConvertSwitchToLookupTable;
-  bool NeedCanonicalLoop;
-  bool SinkCommonInsts;
-  bool SimplifyCondBranch;
-  bool FoldTwoEntryPHINode;
-
-  AssumptionCache *AC;
-
-  SimplifyCFGOptions(unsigned BonusThreshold = 1,
-                     bool ForwardSwitchCond = false,
-                     bool SwitchToLookup = false, bool CanonicalLoops = true,
-                     bool SinkCommon = false,
-                     AssumptionCache *AssumpCache = nullptr,
-                     bool SimplifyCondBranch = true,
-                     bool FoldTwoEntryPHINode = true)
-      : BonusInstThreshold(BonusThreshold),
-        ForwardSwitchCondToPhi(ForwardSwitchCond),
-        ConvertSwitchToLookupTable(SwitchToLookup),
-        NeedCanonicalLoop(CanonicalLoops),
-        SinkCommonInsts(SinkCommon),
-        SimplifyCondBranch(SimplifyCondBranch),
-        FoldTwoEntryPHINode(FoldTwoEntryPHINode),
-        AC(AssumpCache) {}
-
-  // Support 'builder' pattern to set members by name at construction time.
-  SimplifyCFGOptions &bonusInstThreshold(int I) {
-    BonusInstThreshold = I;
-    return *this;
-  }
-  SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) {
-    ForwardSwitchCondToPhi = B;
-    return *this;
-  }
-  SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
-    ConvertSwitchToLookupTable = B;
-    return *this;
-  }
-  SimplifyCFGOptions &needCanonicalLoops(bool B) {
-    NeedCanonicalLoop = B;
-    return *this;
-  }
-  SimplifyCFGOptions &sinkCommonInsts(bool B) {
-    SinkCommonInsts = B;
-    return *this;
-  }
-  SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
-    AC = Cache;
-    return *this;
-  }
-  SimplifyCFGOptions &setSimplifyCondBranch(bool B) {
-    SimplifyCondBranch = B;
-    return *this;
-  }
-
-  SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) {
-    FoldTwoEntryPHINode = B;
-    return *this;
-  }
-};
-
 //===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index a63b9a97ada55..b0cef9b66e017 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -453,7 +453,11 @@ void AArch64PassConfig::addIRPasses() {
   // determine whether it succeeded. We can exploit existing control-flow in
   // ldrex/strex loops to simplify this, but it needs tidying up.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
-    addPass(createCFGSimplificationPass(1, true, true, false, true));
+    addPass(createCFGSimplificationPass(SimplifyCFGOptions()
+                                            .forwardSwitchCondToPhi(true)
+                                            .convertSwitchToLookupTable(true)
+                                            .needCanonicalLoops(false)
+                                            .sinkCommonInsts(true)));
 
   // Run LoopDataPrefetch
   //
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 9ead5fa4308c3..b316b1041f2c5 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -409,7 +409,7 @@ void ARMPassConfig::addIRPasses() {
   // ldrex/strex loops to simplify this, but it needs tidying up.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
     addPass(createCFGSimplificationPass(
-        1, false, false, true, true, [this](const Function &F) {
+        SimplifyCFGOptions().sinkCommonInsts(true), [this](const Function &F) {
           const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
           return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
         }));
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 3fe42ea13f51b..49d98622d946c 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -320,7 +320,11 @@ void HexagonPassConfig::addIRPasses() {
 
   if (!NoOpt) {
     if (EnableInitialCFGCleanup)
-      addPass(createCFGSimplificationPass(1, true, true, false, true));
+      addPass(createCFGSimplificationPass(SimplifyCFGOptions()
+                                              .forwardSwitchCondToPhi(true)
+                                              .convertSwitchToLookupTable(true)
+                                              .needCanonicalLoops(false)
+                                              .sinkCommonInsts(true)));
     if (EnableLoopPrefetch)
       addPass(createLoopDataPrefetchPass());
     if (EnableCommGEP)
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index d73d42c52074b..460297a26020e 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -777,7 +777,11 @@ void PassManagerBuilder::populateModulePassManager(
   // convert to more optimized IR using more aggressive simplify CFG options.
   // The extra sinking transform can create larger basic blocks, so do this
   // before SLP vectorization.
-  MPM.add(createCFGSimplificationPass(1, true, true, false, true));
+  MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
+                                          .forwardSwitchCondToPhi(true)
+                                          .convertSwitchToLookupTable(true)
+                                          .needCanonicalLoops(false)
+                                          .sinkCommonInsts(true)));
 
   if (SLPVectorize) {
     MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 9d088547b4369..42f79d89f0a28 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -139,7 +139,7 @@ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
 }
 
 void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createCFGSimplificationPass(1, false, false, true));
+  unwrap(PM)->add(createCFGSimplificationPass());
 }
 
 void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 4187d5b55adf4..f4ed24c92bf28 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -39,6 +39,7 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
+#include "llvm/Transforms/Scalar/SimplifyCFGOptions.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <utility>
 using namespace llvm;
@@ -304,15 +305,7 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
 
 // Public interface to the CFGSimplification pass
 FunctionPass *
-llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
-                                  bool ConvertSwitch, bool KeepLoops,
-                                  bool SinkCommon,
+llvm::createCFGSimplificationPass(SimplifyCFGOptions Options,
                                   std::function<bool(const Function &)> Ftor) {
-  return new CFGSimplifyPass(SimplifyCFGOptions()
-                                 .bonusInstThreshold(Threshold)
-                                 .forwardSwitchCondToPhi(ForwardSwitchCond)
-                                 .convertSwitchToLookupTable(ConvertSwitch)
-                                 .needCanonicalLoops(KeepLoops)
-                                 .sinkCommonInsts(SinkCommon),
-                             std::move(Ftor));
+  return new CFGSimplifyPass(Options, std::move(Ftor));
 }

From 3c2299612945caf75d5c3678ced0693ebd291819 Mon Sep 17 00:00:00 2001
From: Muhammad Omair Javaid <omair.javaid@linaro.org>
Date: Thu, 16 Jul 2020 03:06:53 +0500
Subject: [PATCH 432/771] [LLDB] Disable lldb-vscode test_terminate_commands
 test on Arm

Summary:
test_terminate_commands is flaky on LLDB Arm buildbot as well. It was already
being skipped for aarch64. I am going to mark it skipped for Arm too.

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D81978
---
 lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py b/lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py
index fb7d71872a16d..b63eb6e7201c1 100644
--- a/lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py
+++ b/lldb/test/API/tools/lldb-vscode/launch/TestVSCode_launch.py
@@ -431,7 +431,7 @@ def test_extra_launch_commands(self):
     @skipIfWindows
     @skipIfNetBSD # Hangs on NetBSD as well
     @skipIfDarwin
-    @skipIf(archs="aarch64") # Example of a flaky run http://lab.llvm.org:8011/builders/lldb-aarch64-ubuntu/builds/5540/steps/test/logs/stdio
+    @skipIf(archs=["arm", "aarch64"]) # Example of a flaky run http://lab.llvm.org:8011/builders/lldb-aarch64-ubuntu/builds/5540/steps/test/logs/stdio
     def test_terminate_commands(self):
         '''
             Tests that the "terminateCommands", that can be passed during

From b0ad73a2a0809188dd407ee2f92f71146759f279 Mon Sep 17 00:00:00 2001
From: Adrian Prantl <aprantl@apple.com>
Date: Wed, 15 Jul 2020 15:25:50 -0700
Subject: [PATCH 433/771] Add missing include

---
 lldb/include/lldb/Symbol/LineTable.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/include/lldb/Symbol/LineTable.h b/lldb/include/lldb/Symbol/LineTable.h
index d66b58ca4c6d7..b48e82f19ffb1 100644
--- a/lldb/include/lldb/Symbol/LineTable.h
+++ b/lldb/include/lldb/Symbol/LineTable.h
@@ -9,6 +9,7 @@
 #ifndef LLDB_SYMBOL_LINETABLE_H
 #define LLDB_SYMBOL_LINETABLE_H
 
+#include "lldb/Core/Address.h"
 #include "lldb/Core/ModuleChild.h"
 #include "lldb/Core/Section.h"
 #include "lldb/Symbol/LineEntry.h"

From c14e11b0bb269e3744d5858b13a6df244308f25f Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Wed, 15 Jul 2020 15:39:24 -0700
Subject: [PATCH 434/771] [lldb/Test] Skip async process connect tests with
 reproducers

Reproducers only support synchronous mode.
---
 .../API/functionalities/gdb_remote_client/TestProcessConnect.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py b/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
index e9e6b4e38ce1b..14891b24249b5 100644
--- a/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
+++ b/lldb/test/API/functionalities/gdb_remote_client/TestProcessConnect.py
@@ -21,6 +21,7 @@ def test_gdb_remote_sync(self):
             self.dbg.GetSelectedPlatform().DisconnectRemote()
 
     @skipIfWindows
+    @skipIfReproducer # Reproducer don't support async.
     def test_gdb_remote_async(self):
         """Test the gdb-remote command in asynchronous mode"""
         try:
@@ -45,6 +46,7 @@ def test_process_connect_sync(self):
             self.dbg.GetSelectedPlatform().DisconnectRemote()
 
     @skipIfWindows
+    @skipIfReproducer # Reproducer don't support async.
     def test_process_connect_async(self):
         """Test the gdb-remote command in asynchronous mode"""
         try:

From 90c1b0442a031d6cad686fdc4e5d3db03c3603a6 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 01:39:37 +0300
Subject: [PATCH 435/771] [NFC] SimplifyCFGOptions: drop multi-parameter ctor,
 use default member-init

Likewise, just use the builder pattern.
Taking multiple params is unmaintainable.
---
 .../Transforms/Scalar/SimplifyCFGOptions.h    | 30 +++++--------------
 .../AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp  |  2 +-
 2 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h
index 42df3af5d7477..9855400a2bae0 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h
@@ -21,29 +21,15 @@ namespace llvm {
 class AssumptionCache;
 
 struct SimplifyCFGOptions {
-  int BonusInstThreshold;
-  bool ForwardSwitchCondToPhi;
-  bool ConvertSwitchToLookupTable;
-  bool NeedCanonicalLoop;
-  bool SinkCommonInsts;
-  bool SimplifyCondBranch;
-  bool FoldTwoEntryPHINode;
+  int BonusInstThreshold = 1;
+  bool ForwardSwitchCondToPhi = false;
+  bool ConvertSwitchToLookupTable = false;
+  bool NeedCanonicalLoop = true;
+  bool SinkCommonInsts = false;
+  bool SimplifyCondBranch = true;
+  bool FoldTwoEntryPHINode = true;
 
-  AssumptionCache *AC;
-
-  SimplifyCFGOptions(unsigned BonusThreshold = 1,
-                     bool ForwardSwitchCond = false,
-                     bool SwitchToLookup = false, bool CanonicalLoops = true,
-                     bool SinkCommon = false,
-                     AssumptionCache *AssumpCache = nullptr,
-                     bool SimplifyCondBranch = true,
-                     bool FoldTwoEntryPHINode = true)
-      : BonusInstThreshold(BonusThreshold),
-        ForwardSwitchCondToPhi(ForwardSwitchCond),
-        ConvertSwitchToLookupTable(SwitchToLookup),
-        NeedCanonicalLoop(CanonicalLoops), SinkCommonInsts(SinkCommon),
-        SimplifyCondBranch(SimplifyCondBranch),
-        FoldTwoEntryPHINode(FoldTwoEntryPHINode), AC(AssumpCache) {}
+  AssumptionCache *AC = nullptr;
 
   // Support 'builder' pattern to set members by name at construction time.
   SimplifyCFGOptions &bonusInstThreshold(int I) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 418296684d765..3c375e0575255 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -187,7 +187,7 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
 
   for (BasicBlock *BB : ReturningBlocks) {
     // Cleanup possible branch to unconditional branch to the return.
-    simplifyCFG(BB, TTI, {2});
+    simplifyCFG(BB, TTI, SimplifyCFGOptions().bonusInstThreshold(2));
   }
 
   return NewRetBlock;

From 76a0c0ee6ffa9c38485776921948d8f930109674 Mon Sep 17 00:00:00 2001
From: dfukalov <daniil.fukalov@amd.com>
Date: Thu, 16 Jul 2020 02:25:01 +0300
Subject: [PATCH 436/771] [AMDGPU][CostModel] Improve cost estimation for fused
 {fadd|fsub}(a,fmul(b,c))

Summary:
If result of fmul(b,c) has one use, in almost all cases (except denormals are
IEEE) the pair of operations will be fused in one fma/mad/mac/etc.

Reviewers: rampitec

Reviewed By: rampitec

Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, hiraditya, llvm-commits, kerbowa

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83919
---
 llvm/include/llvm/CodeGen/BasicTTIImpl.h      |  3 +-
 .../AMDGPU/AMDGPUTargetTransformInfo.cpp      | 22 ++++++---
 .../Analysis/CostModel/AMDGPU/fused_costs.ll  | 48 +++++++++++++++++++
 3 files changed, 66 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll

diff --git a/llvm/include/llvm/CodeGen/BasicTTIImpl.h b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
index 407f09063dce2..0ea9a70f07f58 100644
--- a/llvm/include/llvm/CodeGen/BasicTTIImpl.h
+++ b/llvm/include/llvm/CodeGen/BasicTTIImpl.h
@@ -658,7 +658,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
       unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
       unsigned Cost = thisT()->getArithmeticInstrCost(
-          Opcode, VTy->getScalarType(), CostKind);
+          Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
+          Opd1PropInfo, Opd2PropInfo, Args, CxtI);
       // Return the cost of multiple scalar invocation plus the cost of
       // inserting and extracting the values.
       return getScalarizationOverhead(VTy, Args) + Num * Cost;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
index 542a5f006c0f7..9ca851c4d7463 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetTransformInfo.cpp
@@ -452,8 +452,8 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
     // implementation tries to generate legalize and scalarization costs. Maybe
     // we could hoist the scalarization code here?
     return BaseT::getArithmeticInstrCost(Opcode, Ty, TTI::TCK_RecipThroughput,
-                                         Opd1Info, Opd2Info,
-                                         Opd1PropInfo, Opd2PropInfo);
+                                         Opd1Info, Opd2Info, Opd1PropInfo,
+                                         Opd2PropInfo, Args, CxtI);
   }
 
   // Legalize the type.
@@ -506,9 +506,20 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
     // i32
     return QuarterRateCost * NElts * LT.first;
   }
+  case ISD::FMUL:
+    // Check possible fuse {fadd|fsub}(a,fmul(b,c)) and return zero cost for
+    // fmul(b,c) supposing the fadd|fsub will get estimated cost for the whole
+    // fused operation.
+    if (!HasFP32Denormals && SLT == MVT::f32 && CxtI && CxtI->hasOneUse())
+      if (const auto *FAdd = dyn_cast<BinaryOperator>(*CxtI->user_begin())) {
+        const int OPC = TLI->InstructionOpcodeToISD(FAdd->getOpcode());
+        if (OPC == ISD::FADD || OPC == ISD::FSUB) {
+          return TargetTransformInfo::TCC_Free;
+        }
+      }
+    LLVM_FALLTHROUGH;
   case ISD::FADD:
   case ISD::FSUB:
-  case ISD::FMUL:
     if (SLT == MVT::f64)
       return LT.first * NElts * get64BitInstrCost();
 
@@ -568,9 +579,8 @@ int GCNTTIImpl::getArithmeticInstrCost(unsigned Opcode, Type *Ty,
     break;
   }
 
-  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info,
-                                       Opd2Info,
-                                       Opd1PropInfo, Opd2PropInfo);
+  return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
+                                       Opd1PropInfo, Opd2PropInfo, Args, CxtI);
 }
 
 // Return true if there's a potential benefit from using v2f16 instructions for
diff --git a/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
new file mode 100644
index 0000000000000..7af1e48125d70
--- /dev/null
+++ b/llvm/test/Analysis/CostModel/AMDGPU/fused_costs.ll
@@ -0,0 +1,48 @@
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s
+; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=preserve-sign < %s | FileCheck -check-prefixes=FUSED,ALL %s
+; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -denormal-fp-math-f32=ieee < %s | FileCheck -check-prefixes=SLOW,ALL %s
+
+target triple = "amdgcn--"
+
+; ALL-LABEL: 'fmul_fadd_f32':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul float
+; SLOW: estimated cost of 1 for instruction:   %mul = fmul float
+; ALL: estimated cost of 1 for instruction:   %add = fadd float
+define float @fmul_fadd_f32(float %r0, float %r1, float %r2) #0 {
+  %mul = fmul float %r0, %r1
+  %add = fadd float %mul, %r2
+  ret float %add
+}
+
+; ALL-LABEL: 'fmul_fadd_v2f32':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul <2 x float>
+; SLOW: estimated cost of 2 for instruction:   %mul = fmul <2 x float>
+; ALL: estimated cost of 2 for instruction:   %add = fadd <2 x float>
+define <2 x float> @fmul_fadd_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 {
+  %mul = fmul <2 x float> %r0, %r1
+  %add = fadd <2 x float> %mul, %r2
+  ret <2 x float> %add
+}
+
+; ALL-LABEL: 'fmul_fsub_f32':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul float
+; SLOW: estimated cost of 1 for instruction:   %mul = fmul float
+; ALL: estimated cost of 1 for instruction:   %sub = fsub float
+define float @fmul_fsub_f32(float %r0, float %r1, float %r2) #0 {
+  %mul = fmul float %r0, %r1
+  %sub = fsub float %mul, %r2
+  ret float %sub
+}
+
+; ALL-LABEL: 'fmul_fsub_v2f32':
+; FUSED: estimated cost of 0 for instruction:   %mul = fmul <2 x float>
+; SLOW: estimated cost of 2 for instruction:   %mul = fmul <2 x float>
+; ALL: estimated cost of 2 for instruction:   %sub = fsub <2 x float>
+define <2 x float> @fmul_fsub_v2f32(<2 x float> %r0, <2 x float> %r1, <2 x float> %r2) #0 {
+  %mul = fmul <2 x float> %r0, %r1
+  %sub = fsub <2 x float> %mul, %r2
+  ret <2 x float> %sub
+}
+
+attributes #0 = { nounwind }

From 294be6b5d32e1fe44d0b36cd46b2931c5f0634c4 Mon Sep 17 00:00:00 2001
From: Quentin Colombet <qcolombet@apple.com>
Date: Wed, 15 Jul 2020 17:26:37 -0700
Subject: [PATCH 437/771] [CalcSpillWeights] Propagate the fact that a
 live-interval is not spillable

When we calculate the weight of a live-interval, add some code to
check if the original live-interval was markied as not spillable and
if so, progagate that information down to the new interval.

Previously we would just recompute a weight for the new interval,
thus, we could in theory just spill live-intervals marked as not
spillable by just splitting them. That goes against the spirit of
a non-spillable live-interval.

E.g., previously we could do:
v1 =  // v1 must not be spilled
...
= v1

Split:
v1 = // v1 must not be spilled
...
v2 = v1 // v2 can be spilled
...
v3 = v2 // v3 can be spilled
= v3

There's no test case for that one as we would need to split a
non-spillable live-interval without using LiveRangeEdit to see this
happening.
RegAlloc inserts non-spillable intervals only as part of the spilling
mechanism, thus at this point the intervals are not splittable anymore.
On top of that, RegAlloc uses the LiveRangeEdit API, which already
properly propagate that information.

In other words, this could only happen if a target was to mark
a live-interval as not spillable before register allocation and
split it without using LRE, e.g., through
LiveIntervals::splitSeparateComponent.
---
 llvm/lib/CodeGen/CalcSpillWeights.cpp | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/llvm/lib/CodeGen/CalcSpillWeights.cpp b/llvm/lib/CodeGen/CalcSpillWeights.cpp
index 5d6ee09c84387..254503673fd2b 100644
--- a/llvm/lib/CodeGen/CalcSpillWeights.cpp
+++ b/llvm/lib/CodeGen/CalcSpillWeights.cpp
@@ -161,6 +161,17 @@ float VirtRegAuxInfo::weightCalcHelper(LiveInterval &li, SlotIndex *start,
 
   std::pair<unsigned, unsigned> TargetHint = mri.getRegAllocationHint(li.reg);
 
+  if (li.isSpillable() && VRM) {
+    Register Reg = li.reg;
+    Register Original = VRM->getOriginal(Reg);
+    const LiveInterval &OrigInt = LIS.getInterval(Original);
+    // li comes from a split of OrigInt. If OrigInt was marked
+    // as not spillable, make sure the new interval is marked
+    // as not spillable as well.
+    if (!OrigInt.isSpillable())
+      li.markNotSpillable();
+  }
+
   // Don't recompute spill weight for an unspillable register.
   bool Spillable = li.isSpillable();
 

From 140ab574a1c81e0878b3238520302509457242d0 Mon Sep 17 00:00:00 2001
From: George Rokos <georgios.rokos@intel.com>
Date: Wed, 15 Jul 2020 13:24:03 -0700
Subject: [PATCH 438/771] [OpenMP][Offload] Declare mapper runtime
 implementation

Libomptarget patch adding runtime support for "declare mapper".
Patch co-developed by Lingda Li and George Rokos.

Differential revision: https://reviews.llvm.org/D68100
---
 openmp/libomptarget/include/omptarget.h       |  46 ++++++
 openmp/libomptarget/src/exports               |  10 ++
 openmp/libomptarget/src/interface.cpp         | 153 ++++++++++++++----
 openmp/libomptarget/src/omptarget.cpp         | 113 ++++++++++++-
 openmp/libomptarget/src/private.h             |  25 ++-
 openmp/libomptarget/src/rtl.cpp               |   4 +-
 .../test/mapping/declare_mapper_api.cpp       |   3 +-
 .../test/mapping/declare_mapper_target.cpp    |  37 +++++
 .../mapping/declare_mapper_target_data.cpp    |  40 +++++
 .../declare_mapper_target_data_enter_exit.cpp |  39 +++++
 .../mapping/declare_mapper_target_update.cpp  |  61 +++++++
 11 files changed, 485 insertions(+), 46 deletions(-)
 create mode 100644 openmp/libomptarget/test/mapping/declare_mapper_target.cpp
 create mode 100644 openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp
 create mode 100644 openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
 create mode 100644 openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp

diff --git a/openmp/libomptarget/include/omptarget.h b/openmp/libomptarget/include/omptarget.h
index de3afc36c7f28..95d7158969f34 100644
--- a/openmp/libomptarget/include/omptarget.h
+++ b/openmp/libomptarget/include/omptarget.h
@@ -160,6 +160,14 @@ void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
                                     int32_t depNum, void *depList,
                                     int32_t noAliasDepNum,
                                     void *noAliasDepList);
+void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num,
+                                    void **args_base, void **args,
+                                    int64_t *arg_sizes, int64_t *arg_types,
+                                    void **arg_mappers);
+void __tgt_target_data_begin_nowait_mapper(
+    int64_t device_id, int32_t arg_num, void **args_base, void **args,
+    int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList);
 
 // passes data from the target, release target memory and destroys the
 // host-target mapping (top entry from the stack of data maps) created by
@@ -171,6 +179,16 @@ void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
                                   int64_t *arg_sizes, int64_t *arg_types,
                                   int32_t depNum, void *depList,
                                   int32_t noAliasDepNum, void *noAliasDepList);
+void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num,
+                                  void **args_base, void **args,
+                                  int64_t *arg_sizes, int64_t *arg_types,
+                                  void **arg_mappers);
+void __tgt_target_data_end_nowait_mapper(int64_t device_id, int32_t arg_num,
+                                         void **args_base, void **args,
+                                         int64_t *arg_sizes, int64_t *arg_types,
+                                         void **arg_mappers, int32_t depNum,
+                                         void *depList, int32_t noAliasDepNum,
+                                         void *noAliasDepList);
 
 /// passes data to/from the target
 void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
@@ -182,6 +200,14 @@ void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
                                      int32_t depNum, void *depList,
                                      int32_t noAliasDepNum,
                                      void *noAliasDepList);
+void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num,
+                                     void **args_base, void **args,
+                                     int64_t *arg_sizes, int64_t *arg_types,
+                                     void **arg_mappers);
+void __tgt_target_data_update_nowait_mapper(
+    int64_t device_id, int32_t arg_num, void **args_base, void **args,
+    int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList);
 
 // Performs the same actions as data_begin in case arg_num is non-zero
 // and initiates run of offloaded region on target platform; if arg_num
@@ -196,6 +222,15 @@ int __tgt_target_nowait(int64_t device_id, void *host_ptr, int32_t arg_num,
                         void **args_base, void **args, int64_t *arg_sizes,
                         int64_t *arg_types, int32_t depNum, void *depList,
                         int32_t noAliasDepNum, void *noAliasDepList);
+int __tgt_target_mapper(int64_t device_id, void *host_ptr, int32_t arg_num,
+                        void **args_base, void **args, int64_t *arg_sizes,
+                        int64_t *arg_types, void **arg_mappers);
+int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr,
+                               int32_t arg_num, void **args_base, void **args,
+                               int64_t *arg_sizes, int64_t *arg_types,
+                               void **arg_mappers, int32_t depNum,
+                               void *depList, int32_t noAliasDepNum,
+                               void *noAliasDepList);
 
 int __tgt_target_teams(int64_t device_id, void *host_ptr, int32_t arg_num,
                        void **args_base, void **args, int64_t *arg_sizes,
@@ -207,6 +242,17 @@ int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
                               int32_t num_teams, int32_t thread_limit,
                               int32_t depNum, void *depList,
                               int32_t noAliasDepNum, void *noAliasDepList);
+int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr,
+                              int32_t arg_num, void **args_base, void **args,
+                              int64_t *arg_sizes, int64_t *arg_types,
+                              void **arg_mappers, int32_t num_teams,
+                              int32_t thread_limit);
+int __tgt_target_teams_nowait_mapper(
+    int64_t device_id, void *host_ptr, int32_t arg_num, void **args_base,
+    void **args, int64_t *arg_sizes, int64_t *arg_types, void **arg_mappers,
+    int32_t num_teams, int32_t thread_limit, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList);
+
 void __kmpc_push_target_tripcount(int64_t device_id, uint64_t loop_tripcount);
 
 #ifdef __cplusplus
diff --git a/openmp/libomptarget/src/exports b/openmp/libomptarget/src/exports
index e1fee4bbefcec..5e09a088533da 100644
--- a/openmp/libomptarget/src/exports
+++ b/openmp/libomptarget/src/exports
@@ -13,6 +13,16 @@ VERS1.0 {
     __tgt_target_data_update_nowait;
     __tgt_target_nowait;
     __tgt_target_teams_nowait;
+    __tgt_target_data_begin_mapper;
+    __tgt_target_data_end_mapper;
+    __tgt_target_data_update_mapper;
+    __tgt_target_mapper;
+    __tgt_target_teams_mapper;
+    __tgt_target_data_begin_nowait_mapper;
+    __tgt_target_data_end_nowait_mapper;
+    __tgt_target_data_update_nowait_mapper;
+    __tgt_target_nowait_mapper;
+    __tgt_target_teams_nowait_mapper;
     __tgt_mapper_num_components;
     __tgt_push_mapper_component;
     omp_get_num_devices;
diff --git a/openmp/libomptarget/src/interface.cpp b/openmp/libomptarget/src/interface.cpp
index 924bc490b1107..751641183437d 100644
--- a/openmp/libomptarget/src/interface.cpp
+++ b/openmp/libomptarget/src/interface.cpp
@@ -91,6 +91,24 @@ EXTERN void __tgt_unregister_lib(__tgt_bin_desc *desc) {
 /// and passes the data to the device.
 EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_begin_mapper(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    void **arg_mappers) {
   if (IsOffloadDisabled()) return;
 
   DP("Entering data begin region for device %" PRId64 " with %d mappings\n",
@@ -119,19 +137,19 @@ EXTERN void __tgt_target_data_begin(int64_t device_id, int32_t arg_num,
 #endif
 
   int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
-                             arg_types, nullptr);
+      arg_types, arg_mappers, nullptr);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 
-EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
-    int32_t depNum, void *depList, int32_t noAliasDepNum,
-    void *noAliasDepList) {
+EXTERN void __tgt_target_data_begin_nowait_mapper(int64_t device_id,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  __tgt_target_data_begin(device_id, arg_num, args_base, args, arg_sizes,
-                          arg_types);
+  __tgt_target_data_begin_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, arg_mappers);
 }
 
 /// passes data from the target, releases target memory and destroys
@@ -139,6 +157,24 @@ EXTERN void __tgt_target_data_begin_nowait(int64_t device_id, int32_t arg_num,
 /// created by the last __tgt_target_data_begin.
 EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+      arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+      arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_end_mapper(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    void **arg_mappers) {
   if (IsOffloadDisabled()) return;
   DP("Entering data end region with %d mappings\n", arg_num);
 
@@ -172,23 +208,41 @@ EXTERN void __tgt_target_data_end(int64_t device_id, int32_t arg_num,
 #endif
 
   int rc = target_data_end(Device, arg_num, args_base, args, arg_sizes,
-                           arg_types, nullptr);
+      arg_types, arg_mappers, nullptr);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 
-EXTERN void __tgt_target_data_end_nowait(int64_t device_id, int32_t arg_num,
+EXTERN void __tgt_target_data_end_nowait_mapper(int64_t device_id,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  __tgt_target_data_end_mapper(device_id, arg_num, args_base, args, arg_sizes,
+      arg_types, arg_mappers);
+}
+
+EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
+}
+
+EXTERN void __tgt_target_data_update_nowait(int64_t device_id, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
     int32_t depNum, void *depList, int32_t noAliasDepNum,
     void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  __tgt_target_data_end(device_id, arg_num, args_base, args, arg_sizes,
-                        arg_types);
+  __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
 }
 
-EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+EXTERN void __tgt_target_data_update_mapper(int64_t device_id, int32_t arg_num,
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    void **arg_mappers) {
   if (IsOffloadDisabled()) return;
   DP("Entering data update with %d mappings\n", arg_num);
 
@@ -205,23 +259,41 @@ EXTERN void __tgt_target_data_update(int64_t device_id, int32_t arg_num,
 
   DeviceTy& Device = Devices[device_id];
   int rc = target_data_update(Device, arg_num, args_base,
-      args, arg_sizes, arg_types);
+      args, arg_sizes, arg_types, arg_mappers);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 }
 
-EXTERN void __tgt_target_data_update_nowait(
-    int64_t device_id, int32_t arg_num, void **args_base, void **args,
-    int64_t *arg_sizes, int64_t *arg_types, int32_t depNum, void *depList,
+EXTERN void __tgt_target_data_update_nowait_mapper(int64_t device_id,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
     int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  __tgt_target_data_update(device_id, arg_num, args_base, args, arg_sizes,
-                           arg_types);
+  __tgt_target_data_update_mapper(device_id, arg_num, args_base, args,
+      arg_sizes, arg_types, arg_mappers);
 }
 
 EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+  return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
+}
+
+EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+      arg_sizes, arg_types, nullptr);
+}
+
+EXTERN int __tgt_target_mapper(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers) {
   if (IsOffloadDisabled()) return OFFLOAD_FAIL;
   DP("Entering target region with entry point " DPxMOD " and device Id %"
       PRId64 "\n", DPxPTR(host_ptr), device_id);
@@ -245,25 +317,43 @@ EXTERN int __tgt_target(int64_t device_id, void *host_ptr, int32_t arg_num,
 #endif
 
   int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
-      arg_types, 0, 0, false /*team*/);
+      arg_types, arg_mappers, 0, 0, false /*team*/);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
   return rc;
 }
 
-EXTERN int __tgt_target_nowait(int64_t device_id, void *host_ptr,
+EXTERN int __tgt_target_nowait_mapper(int64_t device_id, void *host_ptr,
     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
-    int64_t *arg_types, int32_t depNum, void *depList, int32_t noAliasDepNum,
-    void *noAliasDepList) {
+    int64_t *arg_types, void **arg_mappers, int32_t depNum, void *depList,
+    int32_t noAliasDepNum, void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  return __tgt_target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
-                      arg_types);
+  return __tgt_target_mapper(device_id, host_ptr, arg_num, args_base, args,
+      arg_sizes, arg_types, arg_mappers);
 }
 
 EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
     int64_t *arg_types, int32_t team_num, int32_t thread_limit) {
+  return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+      args, arg_sizes, arg_types, nullptr, team_num, thread_limit);
+}
+
+EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
+    void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
+  if (depNum + noAliasDepNum > 0)
+    __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
+
+  return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+      args, arg_sizes, arg_types, nullptr, team_num, thread_limit);
+}
+
+EXTERN int __tgt_target_teams_mapper(int64_t device_id, void *host_ptr,
+    int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
+    int64_t *arg_types, void **arg_mappers, int32_t team_num, int32_t thread_limit) {
   if (IsOffloadDisabled()) return OFFLOAD_FAIL;
   DP("Entering target region with entry point " DPxMOD " and device Id %"
       PRId64 "\n", DPxPTR(host_ptr), device_id);
@@ -287,21 +377,22 @@ EXTERN int __tgt_target_teams(int64_t device_id, void *host_ptr,
 #endif
 
   int rc = target(device_id, host_ptr, arg_num, args_base, args, arg_sizes,
-      arg_types, team_num, thread_limit, true /*team*/);
+      arg_types, arg_mappers, team_num, thread_limit, true /*team*/);
   HandleTargetOutcome(rc == OFFLOAD_SUCCESS);
 
   return rc;
 }
 
-EXTERN int __tgt_target_teams_nowait(int64_t device_id, void *host_ptr,
+EXTERN int __tgt_target_teams_nowait_mapper(int64_t device_id, void *host_ptr,
     int32_t arg_num, void **args_base, void **args, int64_t *arg_sizes,
-    int64_t *arg_types, int32_t team_num, int32_t thread_limit, int32_t depNum,
-    void *depList, int32_t noAliasDepNum, void *noAliasDepList) {
+    int64_t *arg_types, void **arg_mappers, int32_t team_num,
+    int32_t thread_limit, int32_t depNum, void *depList, int32_t noAliasDepNum,
+    void *noAliasDepList) {
   if (depNum + noAliasDepNum > 0)
     __kmpc_omp_taskwait(NULL, __kmpc_global_thread_num(NULL));
 
-  return __tgt_target_teams(device_id, host_ptr, arg_num, args_base, args,
-                            arg_sizes, arg_types, team_num, thread_limit);
+  return __tgt_target_teams_mapper(device_id, host_ptr, arg_num, args_base,
+      args, arg_sizes, arg_types, arg_mappers, team_num, thread_limit);
 }
 
 // Get the current number of components for a user-defined mapper.
diff --git a/openmp/libomptarget/src/omptarget.cpp b/openmp/libomptarget/src/omptarget.cpp
index cce9dbd2fe154..6b4549be6ae11 100644
--- a/openmp/libomptarget/src/omptarget.cpp
+++ b/openmp/libomptarget/src/omptarget.cpp
@@ -166,8 +166,8 @@ static int InitLibrary(DeviceTy& Device) {
         DP("Has pending ctors... call now\n");
         for (auto &entry : lib.second.PendingCtors) {
           void *ctor = entry;
-          int rc = target(device_id, ctor, 0, NULL, NULL, NULL,
-                          NULL, 1, 1, true /*team*/);
+          int rc = target(device_id, ctor, 0, NULL, NULL, NULL, NULL, NULL, 1,
+              1, true /*team*/);
           if (rc != OFFLOAD_SUCCESS) {
             DP("Running ctor " DPxMOD " failed.\n", DPxPTR(ctor));
             Device.PendingGlobalsMtx.unlock();
@@ -214,10 +214,46 @@ static int32_t member_of(int64_t type) {
   return ((type & OMP_TGT_MAPTYPE_MEMBER_OF) >> 48) - 1;
 }
 
+/// Call the user-defined mapper function followed by the appropriate
+// target_data_* function (target_data_{begin,end,update}).
+int target_data_mapper(DeviceTy &Device, void *arg_base,
+    void *arg, int64_t arg_size, int64_t arg_type, void *arg_mapper,
+    TargetDataFuncPtrTy target_data_function) {
+  DP("Calling the mapper function " DPxMOD "\n", DPxPTR(arg_mapper));
+
+  // The mapper function fills up Components.
+  MapperComponentsTy MapperComponents;
+  MapperFuncPtrTy MapperFuncPtr = (MapperFuncPtrTy)(arg_mapper);
+  (*MapperFuncPtr)((void *)&MapperComponents, arg_base, arg, arg_size,
+      arg_type);
+
+  // Construct new arrays for args_base, args, arg_sizes and arg_types
+  // using the information in MapperComponents and call the corresponding
+  // target_data_* function using these new arrays.
+  std::vector<void *> mapper_args_base;
+  std::vector<void *> mapper_args;
+  std::vector<int64_t> mapper_arg_sizes;
+  std::vector<int64_t> mapper_arg_types;
+
+  for (auto& C : MapperComponents.Components) {
+    mapper_args_base.push_back(C.Base);
+    mapper_args.push_back(C.Begin);
+    mapper_arg_sizes.push_back(C.Size);
+    mapper_arg_types.push_back(C.Type);
+  }
+
+  int rc = target_data_function(Device, MapperComponents.Components.size(),
+      mapper_args_base.data(), mapper_args.data(), mapper_arg_sizes.data(),
+      mapper_arg_types.data(), /*arg_mappers*/ nullptr,
+      /*__tgt_async_info*/ nullptr);
+
+  return rc;
+}
+
 /// Internal function to do the mapping and transfer the data to the device
 int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base,
                       void **args, int64_t *arg_sizes, int64_t *arg_types,
-                      __tgt_async_info *async_info_ptr) {
+                      void **arg_mappers, __tgt_async_info *async_info_ptr) {
   // process each input.
   for (int32_t i = 0; i < arg_num; ++i) {
     // Ignore private variables and arrays - there is no mapping for them.
@@ -225,6 +261,25 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base,
         (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
       continue;
 
+    if (arg_mappers && arg_mappers[i]) {
+      // Instead of executing the regular path of target_data_begin, call the
+      // target_data_mapper variant which will call target_data_begin again
+      // with new arguments.
+      DP("Calling target_data_mapper for the %dth argument\n", i);
+
+      int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i],
+          arg_types[i], arg_mappers[i], target_data_begin);
+
+      if (rc != OFFLOAD_SUCCESS) {
+        DP("Call to target_data_begin via target_data_mapper for custom mapper"
+            " failed.\n");
+        return OFFLOAD_FAIL;
+      }
+
+      // Skip the rest of this function, continue to the next argument.
+      continue;
+    }
+
     void *HstPtrBegin = args[i];
     void *HstPtrBase = args_base[i];
     int64_t data_size = arg_sizes[i];
@@ -353,7 +408,7 @@ int target_data_begin(DeviceTy &Device, int32_t arg_num, void **args_base,
 /// Internal function to undo the mapping and retrieve the data from the device.
 int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
                     void **args, int64_t *arg_sizes, int64_t *arg_types,
-                    __tgt_async_info *async_info_ptr) {
+                    void **arg_mappers, __tgt_async_info *async_info_ptr) {
   // process each input.
   for (int32_t i = arg_num - 1; i >= 0; --i) {
     // Ignore private variables and arrays - there is no mapping for them.
@@ -362,6 +417,25 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
         (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
       continue;
 
+    if (arg_mappers && arg_mappers[i]) {
+      // Instead of executing the regular path of target_data_end, call the
+      // target_data_mapper variant which will call target_data_end again
+      // with new arguments.
+      DP("Calling target_data_mapper for the %dth argument\n", i);
+
+      int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i],
+          arg_types[i], arg_mappers[i], target_data_end);
+
+      if (rc != OFFLOAD_SUCCESS) {
+        DP("Call to target_data_end via target_data_mapper for custom mapper"
+            " failed.\n");
+        return OFFLOAD_FAIL;
+      }
+
+      // Skip the rest of this function, continue to the next argument.
+      continue;
+    }
+
     void *HstPtrBegin = args[i];
     int64_t data_size = arg_sizes[i];
     // Adjust for proper alignment if this is a combined entry (for structs).
@@ -486,14 +560,36 @@ int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
 }
 
 /// Internal function to pass data to/from the target.
+// async_info_ptr is currently unused, added here so target_data_update has the
+// same signature as target_data_begin and target_data_end.
 int target_data_update(DeviceTy &Device, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types) {
+    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
+    void **arg_mappers, __tgt_async_info *async_info_ptr) {
   // process each input.
   for (int32_t i = 0; i < arg_num; ++i) {
     if ((arg_types[i] & OMP_TGT_MAPTYPE_LITERAL) ||
         (arg_types[i] & OMP_TGT_MAPTYPE_PRIVATE))
       continue;
 
+    if (arg_mappers && arg_mappers[i]) {
+      // Instead of executing the regular path of target_data_update, call the
+      // target_data_mapper variant which will call target_data_update again
+      // with new arguments.
+      DP("Calling target_data_mapper for the %dth argument\n", i);
+
+      int rc = target_data_mapper(Device, args_base[i], args[i], arg_sizes[i],
+          arg_types[i], arg_mappers[i], target_data_update);
+
+      if (rc != OFFLOAD_SUCCESS) {
+        DP("Call to target_data_update via target_data_mapper for custom mapper"
+            " failed.\n");
+        return OFFLOAD_FAIL;
+      }
+
+      // Skip the rest of this function, continue to the next argument.
+      continue;
+    }
+
     void *HstPtrBegin = args[i];
     int64_t MapSize = arg_sizes[i];
     bool IsLast, IsHostPtr;
@@ -589,7 +685,8 @@ static bool isLambdaMapping(int64_t Mapping) {
 /// integer different from zero otherwise.
 int target(int64_t device_id, void *host_ptr, int32_t arg_num,
     void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
-    int32_t team_num, int32_t thread_limit, int IsTeamConstruct) {
+    void **arg_mappers, int32_t team_num, int32_t thread_limit,
+    int IsTeamConstruct) {
   DeviceTy &Device = Devices[device_id];
 
   // Find the table information in the map or look it up in the translation
@@ -647,7 +744,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
 
   // Move data to device.
   int rc = target_data_begin(Device, arg_num, args_base, args, arg_sizes,
-                             arg_types, &AsyncInfo);
+                             arg_types, arg_mappers, &AsyncInfo);
   if (rc != OFFLOAD_SUCCESS) {
     DP("Call to target_data_begin failed, abort target.\n");
     return OFFLOAD_FAIL;
@@ -811,7 +908,7 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
 
   // Move data from device.
   int rt = target_data_end(Device, arg_num, args_base, args, arg_sizes,
-                           arg_types, &AsyncInfo);
+                           arg_types, arg_mappers, &AsyncInfo);
   if (rt != OFFLOAD_SUCCESS) {
     DP("Call to target_data_end failed, abort targe.\n");
     return OFFLOAD_FAIL;
diff --git a/openmp/libomptarget/src/private.h b/openmp/libomptarget/src/private.h
index dbc5bafbab5bf..cb20d8cdc7907 100644
--- a/openmp/libomptarget/src/private.h
+++ b/openmp/libomptarget/src/private.h
@@ -19,19 +19,24 @@
 
 extern int target_data_begin(DeviceTy &Device, int32_t arg_num,
                              void **args_base, void **args, int64_t *arg_sizes,
-                             int64_t *arg_types,
+                             int64_t *arg_types, void **arg_mappers,
                              __tgt_async_info *async_info_ptr);
 
 extern int target_data_end(DeviceTy &Device, int32_t arg_num, void **args_base,
                            void **args, int64_t *arg_sizes, int64_t *arg_types,
+                           void **arg_mappers,
                            __tgt_async_info *async_info_ptr);
 
 extern int target_data_update(DeviceTy &Device, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types);
+                              void **args_base, void **args,
+                              int64_t *arg_sizes, int64_t *arg_types,
+                              void **arg_mappers,
+                              __tgt_async_info *async_info_ptr = nullptr);
 
 extern int target(int64_t device_id, void *host_ptr, int32_t arg_num,
-    void **args_base, void **args, int64_t *arg_sizes, int64_t *arg_types,
-    int32_t team_num, int32_t thread_limit, int IsTeamConstruct);
+                  void **args_base, void **args, int64_t *arg_sizes,
+                  int64_t *arg_types, void **arg_mappers, int32_t team_num,
+                  int32_t thread_limit, int IsTeamConstruct);
 
 extern int CheckDeviceAndCtors(int64_t device_id);
 
@@ -60,8 +65,20 @@ struct MapComponentInfoTy {
 // implementation here.
 struct MapperComponentsTy {
   std::vector<MapComponentInfoTy> Components;
+  int32_t size() { return Components.size(); }
 };
 
+// The mapper function pointer type. It follows the signature below:
+// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
+//                                           void *base, void *begin,
+//                                           size_t size, int64_t type);
+typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t);
+
+// Function pointer type for target_data_* functions (target_data_begin,
+// target_data_end and target_data_update).
+typedef int (*TargetDataFuncPtrTy)(DeviceTy &, int32_t, void **, void **,
+    int64_t *, int64_t *, void **, __tgt_async_info *);
+
 ////////////////////////////////////////////////////////////////////////////////
 // implementation for fatal messages
 ////////////////////////////////////////////////////////////////////////////////
diff --git a/openmp/libomptarget/src/rtl.cpp b/openmp/libomptarget/src/rtl.cpp
index 7c344ca6ee669..4bab4c6da0637 100644
--- a/openmp/libomptarget/src/rtl.cpp
+++ b/openmp/libomptarget/src/rtl.cpp
@@ -387,8 +387,8 @@ void RTLsTy::UnregisterLib(__tgt_bin_desc *desc) {
         Device.PendingGlobalsMtx.lock();
         if (Device.PendingCtorsDtors[desc].PendingCtors.empty()) {
           for (auto &dtor : Device.PendingCtorsDtors[desc].PendingDtors) {
-            int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL, 1,
-                1, true /*team*/);
+            int rc = target(Device.DeviceID, dtor, 0, NULL, NULL, NULL, NULL,
+                NULL, 1, 1, true /*team*/);
             if (rc != OFFLOAD_SUCCESS) {
               DP("Running destructor " DPxMOD " failed.\n", DPxPTR(dtor));
             }
diff --git a/openmp/libomptarget/test/mapping/declare_mapper_api.cpp b/openmp/libomptarget/test/mapping/declare_mapper_api.cpp
index 275b6c3c57025..45bc076d4110a 100644
--- a/openmp/libomptarget/test/mapping/declare_mapper_api.cpp
+++ b/openmp/libomptarget/test/mapping/declare_mapper_api.cpp
@@ -6,6 +6,7 @@
 #include <cstdio>
 #include <cstdlib>
 #include <vector>
+#include <cinttypes>
 
 // Data structure definitions copied from OpenMP RTL.
 struct MapComponentInfoTy {
@@ -42,6 +43,6 @@ int main(int argc, char *argv[]) {
   __tgt_push_mapper_component((void *)&MC, base, begin, size, type);
   int64_t num = __tgt_mapper_num_components((void *)&MC);
   // CHECK: num=2
-  printf("num=%lld\n", num);
+  printf("num=%" PRId64 "\n", num);
   return 0;
 }
diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target.cpp
new file mode 100644
index 0000000000000..6246285283393
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target.cpp
@@ -0,0 +1,37 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target teams distribute parallel for map(mapper(id),tofrom: c)
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  int sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum = 2048
+  printf("Sum = %d\n", sum);
+  return 0;
+}
+
diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp
new file mode 100644
index 0000000000000..b457048a2e580
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target_data.cpp
@@ -0,0 +1,40 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target data map(mapper(id),tofrom: c)
+  {
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  }
+  int sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum = 2048
+  printf("Sum = %d\n", sum);
+  return 0;
+}
+
diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
new file mode 100644
index 0000000000000..ac915a08ea8e2
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target_data_enter_exit.cpp
@@ -0,0 +1,39 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target enter data map(mapper(id),to: c)
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  #pragma omp target exit data map(mapper(id),from: c)
+  int sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum = 2048
+  printf("Sum = %d\n", sum);
+  return 0;
+}
+
diff --git a/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp b/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp
new file mode 100644
index 0000000000000..689275962f2c5
--- /dev/null
+++ b/openmp/libomptarget/test/mapping/declare_mapper_target_update.cpp
@@ -0,0 +1,61 @@
+// RUN: %libomptarget-compile-run-and-check-aarch64-unknown-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-nvptx64-nvidia-cuda
+// RUN: %libomptarget-compile-run-and-check-powerpc64-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-powerpc64le-ibm-linux-gnu
+// RUN: %libomptarget-compile-run-and-check-x86_64-pc-linux-gnu
+
+#include <cstdio>
+#include <cstdlib>
+
+#define NUM 1024
+
+class C {
+public:
+  int *a;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a[0:NUM])
+
+int main() {
+  C c;
+  int sum = 0;
+  c.a = (int*) malloc(sizeof(int)*NUM);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target enter data map(mapper(id),alloc: c)
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 0;
+  }
+  #pragma omp target update from(mapper(id): c)
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum (after first update from) = 0
+  printf("Sum (after first update from) = %d\n", sum);
+  for (int i = 0; i < NUM; i++) {
+    c.a[i] = 1;
+  }
+  #pragma omp target update to(mapper(id): c)
+  #pragma omp target teams distribute parallel for
+  for (int i = 0; i < NUM; i++) {
+    ++c.a[i];
+  }
+  sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum (after update to) = 1024
+  printf("Sum (after update to) = %d\n", sum);
+  #pragma omp target update from(mapper(id): c)
+  sum = 0;
+  for (int i = 0; i < NUM; i++) {
+    sum += c.a[i];
+  }
+  // CHECK: Sum (after second update from) = 2048
+  printf("Sum (after second update from) = %d\n", sum);
+  #pragma omp target exit data map(mapper(id),delete: c)
+  return 0;
+}
+

From 537b16e9b8da97054663daf638a6c55026af2fe4 Mon Sep 17 00:00:00 2001
From: George Rokos <georgios.rokos@intel.com>
Date: Wed, 15 Jul 2020 18:10:57 -0700
Subject: [PATCH 439/771] [OpenMP 5.0] Codegen support to pass user-defined
 mapper functions to runtime

This patch implements the code generation to use OpenMP 5.0 declare mapper (a.k.a. user-defined mapper) constructs.
Patch written by Lingda Li.

Differential Revision: https://reviews.llvm.org/D67833
---
 clang/include/clang/AST/OpenMPClause.h        |  84 ++-
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         | 684 ++++++++++--------
 clang/lib/CodeGen/CGOpenMPRuntime.h           |  12 +-
 clang/lib/CodeGen/CGStmtOpenMP.cpp            |  15 +-
 clang/lib/CodeGen/CodeGenFunction.h           |   7 +-
 clang/test/OpenMP/capturing_in_templates.cpp  |   2 +-
 clang/test/OpenMP/declare_mapper_codegen.cpp  | 448 +++++++++++-
 .../OpenMP/declare_target_link_codegen.cpp    |   4 +-
 clang/test/OpenMP/distribute_codegen.cpp      |   2 +-
 .../distribute_firstprivate_codegen.cpp       |   6 +-
 .../OpenMP/distribute_lastprivate_codegen.cpp |   6 +-
 .../distribute_parallel_for_codegen.cpp       |  42 +-
 ...bute_parallel_for_firstprivate_codegen.cpp |   6 +-
 .../distribute_parallel_for_if_codegen.cpp    |  10 +-
 ...ibute_parallel_for_lastprivate_codegen.cpp |   6 +-
 ...ibute_parallel_for_num_threads_codegen.cpp |  12 +-
 ...istribute_parallel_for_private_codegen.cpp |   6 +-
 ...tribute_parallel_for_proc_bind_codegen.cpp |   6 +-
 ...tribute_parallel_for_reduction_codegen.cpp |   4 +-
 .../distribute_parallel_for_simd_codegen.cpp  |  42 +-
 ...parallel_for_simd_firstprivate_codegen.cpp |   6 +-
 ...istribute_parallel_for_simd_if_codegen.cpp |  10 +-
 ..._parallel_for_simd_lastprivate_codegen.cpp |   6 +-
 ..._parallel_for_simd_num_threads_codegen.cpp |  12 +-
 ...bute_parallel_for_simd_private_codegen.cpp |   6 +-
 ...te_parallel_for_simd_proc_bind_codegen.cpp |   6 +-
 .../OpenMP/distribute_private_codegen.cpp     |   6 +-
 clang/test/OpenMP/distribute_simd_codegen.cpp |   2 +-
 .../distribute_simd_firstprivate_codegen.cpp  |   6 +-
 .../distribute_simd_lastprivate_codegen.cpp   |   6 +-
 .../distribute_simd_private_codegen.cpp       |   6 +-
 .../distribute_simd_reduction_codegen.cpp     |   6 +-
 clang/test/OpenMP/nvptx_lambda_capturing.cpp  |   2 +-
 .../OpenMP/nvptx_lambda_pointer_capturing.cpp |   4 +-
 ..._target_requires_unified_shared_memory.cpp |   2 +-
 clang/test/OpenMP/openmp_offload_codegen.cpp  |   3 +-
 clang/test/OpenMP/target_codegen.cpp          |  20 +-
 clang/test/OpenMP/target_data_codegen.cpp     |  32 +-
 .../target_data_use_device_addr_codegen.cpp   |   8 +-
 .../test/OpenMP/target_defaultmap_codegen.cpp |  64 +-
 clang/test/OpenMP/target_depend_codegen.cpp   |   6 +-
 clang/test/OpenMP/target_device_codegen.cpp   |   8 +-
 .../test/OpenMP/target_enter_data_codegen.cpp |  16 +-
 .../target_enter_data_depend_codegen.cpp      |  34 +-
 .../test/OpenMP/target_exit_data_codegen.cpp  |  16 +-
 .../target_exit_data_depend_codegen.cpp       |  38 +-
 .../OpenMP/target_firstprivate_codegen.cpp    |  12 +-
 .../OpenMP/target_is_device_ptr_codegen.cpp   |  20 +-
 clang/test/OpenMP/target_map_codegen.cpp      | 272 +++----
 ..._map_member_expr_array_section_codegen.cpp |   4 +-
 clang/test/OpenMP/target_parallel_codegen.cpp |  14 +-
 .../OpenMP/target_parallel_depend_codegen.cpp |   6 +-
 .../OpenMP/target_parallel_for_codegen.cpp    |  14 +-
 .../target_parallel_for_depend_codegen.cpp    |   6 +-
 .../target_parallel_for_simd_codegen.cpp      |  16 +-
 ...arget_parallel_for_simd_depend_codegen.cpp |   6 +-
 ...allel_for_simd_uses_allocators_codegen.cpp |   2 +-
 ...t_parallel_for_uses_allocators_codegen.cpp |   2 +-
 .../OpenMP/target_parallel_if_codegen.cpp     |  12 +-
 .../target_parallel_num_threads_codegen.cpp   |  12 +-
 ...arget_parallel_uses_allocators_codegen.cpp |   2 +-
 clang/test/OpenMP/target_simd_codegen.cpp     |  16 +-
 .../OpenMP/target_simd_depend_codegen.cpp     |   6 +-
 .../target_simd_uses_allocators_codegen.cpp   |   2 +-
 clang/test/OpenMP/target_teams_codegen.cpp    |  14 +-
 .../OpenMP/target_teams_depend_codegen.cpp    |   6 +-
 .../target_teams_distribute_codegen.cpp       |  14 +-
 ...rget_teams_distribute_collapse_codegen.cpp |   6 +-
 ...target_teams_distribute_depend_codegen.cpp |   6 +-
 ...teams_distribute_dist_schedule_codegen.cpp |  18 +-
 ..._teams_distribute_firstprivate_codegen.cpp |   6 +-
 ...t_teams_distribute_lastprivate_codegen.cpp |   6 +-
 ..._teams_distribute_parallel_for_codegen.cpp |   4 +-
 ...stribute_parallel_for_collapse_codegen.cpp |   6 +-
 ...distribute_parallel_for_depend_codegen.cpp |   6 +-
 ...ute_parallel_for_dist_schedule_codegen.cpp |  18 +-
 ...bute_parallel_for_firstprivate_codegen.cpp |   6 +-
 ...ams_distribute_parallel_for_if_codegen.cpp |   8 +-
 ...ibute_parallel_for_lastprivate_codegen.cpp |   6 +-
 ..._distribute_parallel_for_order_codegen.cpp |   2 +-
 ...istribute_parallel_for_private_codegen.cpp |   6 +-
 ...tribute_parallel_for_proc_bind_codegen.cpp |   6 +-
 ...tribute_parallel_for_reduction_codegen.cpp |   6 +-
 ...stribute_parallel_for_schedule_codegen.cpp |  30 +-
 ...s_distribute_parallel_for_simd_codegen.cpp |   4 +-
 ...ute_parallel_for_simd_collapse_codegen.cpp |   6 +-
 ...ibute_parallel_for_simd_depend_codegen.cpp |   6 +-
 ...arallel_for_simd_dist_schedule_codegen.cpp |  18 +-
 ...parallel_for_simd_firstprivate_codegen.cpp |   6 +-
 ...istribute_parallel_for_simd_if_codegen.cpp |   8 +-
 ..._parallel_for_simd_lastprivate_codegen.cpp |   6 +-
 ...bute_parallel_for_simd_private_codegen.cpp |   6 +-
 ...te_parallel_for_simd_proc_bind_codegen.cpp |   6 +-
 ...te_parallel_for_simd_reduction_codegen.cpp |   6 +-
 ...ute_parallel_for_simd_schedule_codegen.cpp |  30 +-
 ...allel_for_simd_uses_allocators_codegen.cpp |   2 +-
 ...e_parallel_for_uses_allocators_codegen.cpp |   2 +-
 ...arget_teams_distribute_private_codegen.cpp |   6 +-
 ...get_teams_distribute_reduction_codegen.cpp |   6 +-
 .../target_teams_distribute_simd_codegen.cpp  |  16 +-
 ...teams_distribute_simd_collapse_codegen.cpp |   6 +-
 ...t_teams_distribute_simd_depend_codegen.cpp |  10 +-
 ..._distribute_simd_dist_schedule_codegen.cpp |  18 +-
 ...s_distribute_simd_firstprivate_codegen.cpp |   6 +-
 ...ms_distribute_simd_lastprivate_codegen.cpp |   6 +-
 ..._teams_distribute_simd_private_codegen.cpp |   6 +-
 ...eams_distribute_simd_reduction_codegen.cpp |   6 +-
 ...istribute_simd_uses_allocators_codegen.cpp |   2 +-
 ...ams_distribute_uses_allocators_codegen.cpp |   2 +-
 .../OpenMP/target_teams_num_teams_codegen.cpp |  12 +-
 .../target_teams_thread_limit_codegen.cpp     |  12 +-
 .../target_teams_uses_allocators_codegen.cpp  |   2 +-
 clang/test/OpenMP/target_update_codegen.cpp   |  40 +-
 .../OpenMP/target_update_depend_codegen.cpp   |  34 +-
 .../OpenMP/target_uses_allocators_codegen.cpp |   2 +-
 clang/test/OpenMP/teams_codegen.cpp           |  20 +-
 .../test/OpenMP/teams_distribute_codegen.cpp  |  12 +-
 .../teams_distribute_collapse_codegen.cpp     |   6 +-
 ...teams_distribute_dist_schedule_codegen.cpp |  18 +-
 .../teams_distribute_firstprivate_codegen.cpp |   6 +-
 .../teams_distribute_lastprivate_codegen.cpp  |   6 +-
 .../teams_distribute_parallel_for_codegen.cpp |  12 +-
 ...stribute_parallel_for_collapse_codegen.cpp |   6 +-
 ...distribute_parallel_for_copyin_codegen.cpp |   6 +-
 ...ute_parallel_for_dist_schedule_codegen.cpp |  18 +-
 ...bute_parallel_for_firstprivate_codegen.cpp |   6 +-
 ...ams_distribute_parallel_for_if_codegen.cpp |  10 +-
 ...ibute_parallel_for_lastprivate_codegen.cpp |   6 +-
 ...ibute_parallel_for_num_threads_codegen.cpp |  12 +-
 ...istribute_parallel_for_private_codegen.cpp |   6 +-
 ...tribute_parallel_for_proc_bind_codegen.cpp |   6 +-
 ...tribute_parallel_for_reduction_codegen.cpp |   6 +-
 ...stribute_parallel_for_schedule_codegen.cpp |  30 +-
 ...s_distribute_parallel_for_simd_codegen.cpp |  12 +-
 ...ute_parallel_for_simd_collapse_codegen.cpp |   6 +-
 ...arallel_for_simd_dist_schedule_codegen.cpp |  18 +-
 ...parallel_for_simd_firstprivate_codegen.cpp |   6 +-
 ...istribute_parallel_for_simd_if_codegen.cpp |  10 +-
 ..._parallel_for_simd_lastprivate_codegen.cpp |   6 +-
 ..._parallel_for_simd_num_threads_codegen.cpp |  12 +-
 ...bute_parallel_for_simd_private_codegen.cpp |   6 +-
 ...te_parallel_for_simd_proc_bind_codegen.cpp |   6 +-
 ...te_parallel_for_simd_reduction_codegen.cpp |   6 +-
 ...ute_parallel_for_simd_schedule_codegen.cpp |  30 +-
 .../teams_distribute_private_codegen.cpp      |   6 +-
 .../teams_distribute_reduction_codegen.cpp    |   6 +-
 .../OpenMP/teams_distribute_simd_codegen.cpp  |  14 +-
 ...teams_distribute_simd_collapse_codegen.cpp |   6 +-
 ..._distribute_simd_dist_schedule_codegen.cpp |  18 +-
 ...s_distribute_simd_firstprivate_codegen.cpp |   6 +-
 ...ms_distribute_simd_lastprivate_codegen.cpp |   6 +-
 .../teams_distribute_simd_private_codegen.cpp |   6 +-
 ...eams_distribute_simd_reduction_codegen.cpp |   6 +-
 .../include/llvm/Frontend/OpenMP/OMPKinds.def |  60 +-
 154 files changed, 1819 insertions(+), 1262 deletions(-)

diff --git a/clang/include/clang/AST/OpenMPClause.h b/clang/include/clang/AST/OpenMPClause.h
index 6de7b6deb5149..291eeb942b071 100644
--- a/clang/include/clang/AST/OpenMPClause.h
+++ b/clang/include/clang/AST/OpenMPClause.h
@@ -4820,6 +4820,11 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
   /// Total number of components in this clause.
   unsigned NumComponents;
 
+  /// Whether this clause is possible to have user-defined mappers associated.
+  /// It should be true for map, to, and from clauses, and false for
+  /// use_device_ptr and is_device_ptr.
+  const bool SupportsMapper;
+
   /// C++ nested name specifier for the associated user-defined mapper.
   NestedNameSpecifierLoc MapperQualifierLoc;
 
@@ -4840,19 +4845,21 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
   /// NumUniqueDeclarations: number of unique base declarations in this clause;
   /// 3) NumComponentLists: number of component lists in this clause; and 4)
   /// NumComponents: total number of expression components in the clause.
+  /// \param SupportsMapper Indicates whether this clause is possible to have
+  /// user-defined mappers associated.
   /// \param MapperQualifierLocPtr C++ nested name specifier for the associated
   /// user-defined mapper.
   /// \param MapperIdInfoPtr The identifier of associated user-defined mapper.
   OMPMappableExprListClause(
       OpenMPClauseKind K, const OMPVarListLocTy &Locs,
-      const OMPMappableExprListSizeTy &Sizes,
+      const OMPMappableExprListSizeTy &Sizes, bool SupportsMapper = false,
       NestedNameSpecifierLoc *MapperQualifierLocPtr = nullptr,
       DeclarationNameInfo *MapperIdInfoPtr = nullptr)
       : OMPVarListClause<T>(K, Locs.StartLoc, Locs.LParenLoc, Locs.EndLoc,
                             Sizes.NumVars),
         NumUniqueDeclarations(Sizes.NumUniqueDeclarations),
         NumComponentLists(Sizes.NumComponentLists),
-        NumComponents(Sizes.NumComponents) {
+        NumComponents(Sizes.NumComponents), SupportsMapper(SupportsMapper) {
     if (MapperQualifierLocPtr)
       MapperQualifierLoc = *MapperQualifierLocPtr;
     if (MapperIdInfoPtr)
@@ -5051,6 +5058,8 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
   /// Get the user-defined mapper references that are in the trailing objects of
   /// the class.
   MutableArrayRef<Expr *> getUDMapperRefs() {
+    assert(SupportsMapper &&
+           "Must be a clause that is possible to have user-defined mappers");
     return llvm::makeMutableArrayRef<Expr *>(
         static_cast<T *>(this)->template getTrailingObjects<Expr *>() +
             OMPVarListClause<T>::varlist_size(),
@@ -5060,8 +5069,10 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
   /// Get the user-defined mappers references that are in the trailing objects
   /// of the class.
   ArrayRef<Expr *> getUDMapperRefs() const {
+    assert(SupportsMapper &&
+           "Must be a clause that is possible to have user-defined mappers");
     return llvm::makeArrayRef<Expr *>(
-        static_cast<T *>(this)->template getTrailingObjects<Expr *>() +
+        static_cast<const T *>(this)->template getTrailingObjects<Expr *>() +
             OMPVarListClause<T>::varlist_size(),
         OMPVarListClause<T>::varlist_size());
   }
@@ -5071,6 +5082,8 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
   void setUDMapperRefs(ArrayRef<Expr *> DMDs) {
     assert(DMDs.size() == OMPVarListClause<T>::varlist_size() &&
            "Unexpected number of user-defined mappers.");
+    assert(SupportsMapper &&
+           "Must be a clause that is possible to have user-defined mappers");
     std::copy(DMDs.begin(), DMDs.end(), getUDMapperRefs().begin());
   }
 
@@ -5107,6 +5120,12 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
     // The list number associated with the current declaration.
     ArrayRef<unsigned>::iterator NumListsCur;
 
+    // Whether this clause is possible to have user-defined mappers associated.
+    const bool SupportsMapper;
+
+    // The user-defined mapper associated with the current declaration.
+    ArrayRef<Expr *>::iterator MapperCur;
+
     // Remaining lists for the current declaration.
     unsigned RemainingLists = 0;
 
@@ -5127,16 +5146,20 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
     explicit const_component_lists_iterator(
         ArrayRef<ValueDecl *> UniqueDecls, ArrayRef<unsigned> DeclsListNum,
         ArrayRef<unsigned> CumulativeListSizes,
-        MappableExprComponentListRef Components)
+        MappableExprComponentListRef Components, bool SupportsMapper,
+        ArrayRef<Expr *> Mappers)
         : const_component_lists_iterator::iterator_adaptor_base(
               Components.begin()),
           DeclCur(UniqueDecls.begin()), NumListsCur(DeclsListNum.begin()),
+          SupportsMapper(SupportsMapper),
           ListSizeCur(CumulativeListSizes.begin()),
           ListSizeEnd(CumulativeListSizes.end()), End(Components.end()) {
       assert(UniqueDecls.size() == DeclsListNum.size() &&
              "Inconsistent number of declarations and list sizes!");
       if (!DeclsListNum.empty())
         RemainingLists = *NumListsCur;
+      if (SupportsMapper)
+        MapperCur = Mappers.begin();
     }
 
     /// Construct an iterator that scan lists for a given declaration \a
@@ -5144,9 +5167,11 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
     explicit const_component_lists_iterator(
         const ValueDecl *Declaration, ArrayRef<ValueDecl *> UniqueDecls,
         ArrayRef<unsigned> DeclsListNum, ArrayRef<unsigned> CumulativeListSizes,
-        MappableExprComponentListRef Components)
+        MappableExprComponentListRef Components, bool SupportsMapper,
+        ArrayRef<Expr *> Mappers)
         : const_component_lists_iterator(UniqueDecls, DeclsListNum,
-                                         CumulativeListSizes, Components) {
+                                         CumulativeListSizes, Components,
+                                         SupportsMapper, Mappers) {
       // Look for the desired declaration. While we are looking for it, we
       // update the state so that we know the component where a given list
       // starts.
@@ -5161,6 +5186,9 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
         std::advance(ListSizeCur, *NumListsCur - 1);
         PrevListSize = *ListSizeCur;
         ++ListSizeCur;
+
+        if (SupportsMapper)
+          ++MapperCur;
       }
 
       // If we didn't find any declaration, advance the iterator to after the
@@ -5186,14 +5214,20 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
 
     // Return the array with the current list. The sizes are cumulative, so the
     // array size is the difference between the current size and previous one.
-    std::pair<const ValueDecl *, MappableExprComponentListRef>
+    std::tuple<const ValueDecl *, MappableExprComponentListRef,
+               const ValueDecl *>
     operator*() const {
       assert(ListSizeCur != ListSizeEnd && "Invalid iterator!");
-      return std::make_pair(
+      const ValueDecl *Mapper = nullptr;
+      if (SupportsMapper && *MapperCur)
+        Mapper = cast<ValueDecl>(cast<DeclRefExpr>(*MapperCur)->getDecl());
+      return std::make_tuple(
           *DeclCur,
-          MappableExprComponentListRef(&*this->I, *ListSizeCur - PrevListSize));
+          MappableExprComponentListRef(&*this->I, *ListSizeCur - PrevListSize),
+          Mapper);
     }
-    std::pair<const ValueDecl *, MappableExprComponentListRef>
+    std::tuple<const ValueDecl *, MappableExprComponentListRef,
+               const ValueDecl *>
     operator->() const {
       return **this;
     }
@@ -5216,6 +5250,8 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
         if (!(--RemainingLists)) {
           ++DeclCur;
           ++NumListsCur;
+          if (SupportsMapper)
+            ++MapperCur;
           RemainingLists = *NumListsCur;
           assert(RemainingLists && "No lists in the following declaration??");
         }
@@ -5233,13 +5269,15 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
   const_component_lists_iterator component_lists_begin() const {
     return const_component_lists_iterator(
         getUniqueDeclsRef(), getDeclNumListsRef(), getComponentListSizesRef(),
-        getComponentsRef());
+        getComponentsRef(), SupportsMapper,
+        SupportsMapper ? getUDMapperRefs() : llvm::None);
   }
   const_component_lists_iterator component_lists_end() const {
     return const_component_lists_iterator(
         ArrayRef<ValueDecl *>(), ArrayRef<unsigned>(), ArrayRef<unsigned>(),
         MappableExprComponentListRef(getComponentsRef().end(),
-                                     getComponentsRef().end()));
+                                     getComponentsRef().end()),
+        SupportsMapper, llvm::None);
   }
   const_component_lists_range component_lists() const {
     return {component_lists_begin(), component_lists_end()};
@@ -5251,7 +5289,8 @@ class OMPMappableExprListClause : public OMPVarListClause<T>,
   decl_component_lists_begin(const ValueDecl *VD) const {
     return const_component_lists_iterator(
         VD, getUniqueDeclsRef(), getDeclNumListsRef(),
-        getComponentListSizesRef(), getComponentsRef());
+        getComponentListSizesRef(), getComponentsRef(), SupportsMapper,
+        SupportsMapper ? getUDMapperRefs() : llvm::None);
   }
   const_component_lists_iterator decl_component_lists_end() const {
     return component_lists_end();
@@ -5399,7 +5438,8 @@ class OMPMapClause final : public OMPMappableExprListClause<OMPMapClause>,
                         SourceLocation MapLoc, const OMPVarListLocTy &Locs,
                         const OMPMappableExprListSizeTy &Sizes)
       : OMPMappableExprListClause(llvm::omp::OMPC_map, Locs, Sizes,
-                                  &MapperQualifierLoc, &MapperIdInfo),
+                                  /*SupportsMapper=*/true, &MapperQualifierLoc,
+                                  &MapperIdInfo),
         MapType(MapType), MapTypeIsImplicit(MapTypeIsImplicit), MapLoc(MapLoc) {
     assert(llvm::array_lengthof(MapTypeModifiers) == MapModifiers.size() &&
            "Unexpected number of map type modifiers.");
@@ -5419,8 +5459,8 @@ class OMPMapClause final : public OMPMappableExprListClause<OMPMapClause>,
   /// 3) NumComponentLists: number of component lists in this clause; and 4)
   /// NumComponents: total number of expression components in the clause.
   explicit OMPMapClause(const OMPMappableExprListSizeTy &Sizes)
-      : OMPMappableExprListClause(llvm::omp::OMPC_map, OMPVarListLocTy(),
-                                  Sizes) {}
+      : OMPMappableExprListClause(llvm::omp::OMPC_map, OMPVarListLocTy(), Sizes,
+                                  /*SupportsMapper=*/true) {}
 
   /// Set map-type-modifier for the clause.
   ///
@@ -6307,7 +6347,8 @@ class OMPToClause final : public OMPMappableExprListClause<OMPToClause>,
                        const OMPVarListLocTy &Locs,
                        const OMPMappableExprListSizeTy &Sizes)
       : OMPMappableExprListClause(llvm::omp::OMPC_to, Locs, Sizes,
-                                  &MapperQualifierLoc, &MapperIdInfo) {}
+                                  /*SupportsMapper=*/true, &MapperQualifierLoc,
+                                  &MapperIdInfo) {}
 
   /// Build an empty clause.
   ///
@@ -6317,8 +6358,8 @@ class OMPToClause final : public OMPMappableExprListClause<OMPToClause>,
   /// 3) NumComponentLists: number of component lists in this clause; and 4)
   /// NumComponents: total number of expression components in the clause.
   explicit OMPToClause(const OMPMappableExprListSizeTy &Sizes)
-      : OMPMappableExprListClause(llvm::omp::OMPC_to, OMPVarListLocTy(),
-                                  Sizes) {}
+      : OMPMappableExprListClause(llvm::omp::OMPC_to, OMPVarListLocTy(), Sizes,
+                                  /*SupportsMapper=*/true) {}
 
   /// Define the sizes of each trailing object array except the last one. This
   /// is required for TrailingObjects to work properly.
@@ -6426,7 +6467,8 @@ class OMPFromClause final
                          const OMPVarListLocTy &Locs,
                          const OMPMappableExprListSizeTy &Sizes)
       : OMPMappableExprListClause(llvm::omp::OMPC_from, Locs, Sizes,
-                                  &MapperQualifierLoc, &MapperIdInfo) {}
+                                  /*SupportsMapper=*/true, &MapperQualifierLoc,
+                                  &MapperIdInfo) {}
 
   /// Build an empty clause.
   ///
@@ -6437,7 +6479,7 @@ class OMPFromClause final
   /// NumComponents: total number of expression components in the clause.
   explicit OMPFromClause(const OMPMappableExprListSizeTy &Sizes)
       : OMPMappableExprListClause(llvm::omp::OMPC_from, OMPVarListLocTy(),
-                                  Sizes) {}
+                                  Sizes, /*SupportsMapper=*/true) {}
 
   /// Define the sizes of each trailing object array except the last one. This
   /// is required for TrailingObjects to work properly.
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index a7e1fe8560b6e..4fecd89d2bc53 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -7078,6 +7078,28 @@ class MappableExprsHandler {
   using MapBaseValuesArrayTy = SmallVector<BasePointerInfo, 4>;
   using MapValuesArrayTy = SmallVector<llvm::Value *, 4>;
   using MapFlagsArrayTy = SmallVector<OpenMPOffloadMappingFlags, 4>;
+  using MapMappersArrayTy = SmallVector<const ValueDecl *, 4>;
+
+  /// This structure contains combined information generated for mappable
+  /// clauses, including base pointers, pointers, sizes, map types, and
+  /// user-defined mappers.
+  struct MapCombinedInfoTy {
+    MapBaseValuesArrayTy BasePointers;
+    MapValuesArrayTy Pointers;
+    MapValuesArrayTy Sizes;
+    MapFlagsArrayTy Types;
+    MapMappersArrayTy Mappers;
+
+    /// Append arrays in \a CurInfo.
+    void append(MapCombinedInfoTy &CurInfo) {
+      BasePointers.append(CurInfo.BasePointers.begin(),
+                          CurInfo.BasePointers.end());
+      Pointers.append(CurInfo.Pointers.begin(), CurInfo.Pointers.end());
+      Sizes.append(CurInfo.Sizes.begin(), CurInfo.Sizes.end());
+      Types.append(CurInfo.Types.begin(), CurInfo.Types.end());
+      Mappers.append(CurInfo.Mappers.begin(), CurInfo.Mappers.end());
+    }
+  };
 
   /// Map between a struct and the its lowest & highest elements which have been
   /// mapped.
@@ -7099,6 +7121,7 @@ class MappableExprsHandler {
     ArrayRef<OpenMPMapModifierKind> MapModifiers;
     bool ReturnDevicePointer = false;
     bool IsImplicit = false;
+    const ValueDecl *Mapper = nullptr;
     bool ForDeviceAddr = false;
 
     MapInfo() = default;
@@ -7106,10 +7129,11 @@ class MappableExprsHandler {
         OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
         OpenMPMapClauseKind MapType,
         ArrayRef<OpenMPMapModifierKind> MapModifiers, bool ReturnDevicePointer,
-        bool IsImplicit, bool ForDeviceAddr = false)
+        bool IsImplicit, const ValueDecl *Mapper = nullptr,
+        bool ForDeviceAddr = false)
         : Components(Components), MapType(MapType), MapModifiers(MapModifiers),
           ReturnDevicePointer(ReturnDevicePointer), IsImplicit(IsImplicit),
-          ForDeviceAddr(ForDeviceAddr) {}
+          Mapper(Mapper), ForDeviceAddr(ForDeviceAddr) {}
   };
 
   /// If use_device_ptr or use_device_addr is used on a decl which is a struct
@@ -7305,17 +7329,17 @@ class MappableExprsHandler {
     return ConstLength.getSExtValue() != 1;
   }
 
-  /// Generate the base pointers, section pointers, sizes and map type
-  /// bits for the provided map type, map modifier, and expression components.
-  /// \a IsFirstComponent should be set to true if the provided set of
-  /// components is the first associated with a capture.
+  /// Generate the base pointers, section pointers, sizes, map type bits, and
+  /// user-defined mappers (all included in \a CombinedInfo) for the provided
+  /// map type, map modifier, and expression components. \a IsFirstComponent
+  /// should be set to true if the provided set of components is the first
+  /// associated with a capture.
   void generateInfoForComponentList(
       OpenMPMapClauseKind MapType, ArrayRef<OpenMPMapModifierKind> MapModifiers,
       OMPClauseMappableExprCommon::MappableExprComponentListRef Components,
-      MapBaseValuesArrayTy &BasePointers, MapValuesArrayTy &Pointers,
-      MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
-      StructRangeInfoTy &PartialStruct, bool IsFirstComponentList,
-      bool IsImplicit, bool ForDeviceAddr = false,
+      MapCombinedInfoTy &CombinedInfo, StructRangeInfoTy &PartialStruct,
+      bool IsFirstComponentList, bool IsImplicit,
+      const ValueDecl *Mapper = nullptr, bool ForDeviceAddr = false,
       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
           OverlappedElements = llvm::None) const {
     // The following summarizes what has to be generated for each map and the
@@ -7675,31 +7699,37 @@ class MappableExprsHandler {
                 break;
               }
             }
-            BasePointers.push_back(BP.getPointer());
-            Pointers.push_back(LB.getPointer());
-            Sizes.push_back(CGF.Builder.CreateIntCast(Size, CGF.Int64Ty,
-                                                      /*isSigned=*/true));
-            Types.push_back(Flags);
+            CombinedInfo.BasePointers.push_back(BP.getPointer());
+            CombinedInfo.Pointers.push_back(LB.getPointer());
+            CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
+                Size, CGF.Int64Ty, /*isSigned=*/true));
+            CombinedInfo.Types.push_back(Flags);
+            CombinedInfo.Mappers.push_back(nullptr);
             LB = CGF.Builder.CreateConstGEP(ComponentLB, 1);
           }
-          BasePointers.push_back(BP.getPointer());
-          Pointers.push_back(LB.getPointer());
+          CombinedInfo.BasePointers.push_back(BP.getPointer());
+          CombinedInfo.Pointers.push_back(LB.getPointer());
           Size = CGF.Builder.CreatePtrDiff(
               CGF.EmitCastToVoidPtr(
                   CGF.Builder.CreateConstGEP(HB, 1).getPointer()),
               CGF.EmitCastToVoidPtr(LB.getPointer()));
-          Sizes.push_back(
+          CombinedInfo.Sizes.push_back(
               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
-          Types.push_back(Flags);
+          CombinedInfo.Types.push_back(Flags);
+          CombinedInfo.Mappers.push_back(nullptr);
           break;
         }
         llvm::Value *Size = getExprTypeSize(I->getAssociatedExpression());
         if (!IsMemberPointerOrAddr) {
-          BasePointers.push_back(BP.getPointer());
-          Pointers.push_back(LB.getPointer());
-          Sizes.push_back(
+          CombinedInfo.BasePointers.push_back(BP.getPointer());
+          CombinedInfo.Pointers.push_back(LB.getPointer());
+          CombinedInfo.Sizes.push_back(
               CGF.Builder.CreateIntCast(Size, CGF.Int64Ty, /*isSigned=*/true));
 
+          // If Mapper is valid, the last component inherits the mapper.
+          bool HasMapper = Mapper && Next == CE;
+          CombinedInfo.Mappers.push_back(HasMapper ? Mapper : nullptr);
+
           // We need to add a pointer flag for each map that comes from the
           // same expression except for the first one. We also need to signal
           // this map is the first one that relates with the current capture
@@ -7726,7 +7756,7 @@ class MappableExprsHandler {
             }
           }
 
-          Types.push_back(Flags);
+          CombinedInfo.Types.push_back(Flags);
         }
 
         // If we have encountered a member expression so far, keep track of the
@@ -7899,7 +7929,7 @@ class MappableExprsHandler {
     // Extract device pointer clause information.
     for (const auto *C : Dir.getClausesOfKind<OMPIsDevicePtrClause>())
       for (auto L : C->component_lists())
-        DevPointersMap[L.first].push_back(L.second);
+        DevPointersMap[std::get<0>(L)].push_back(std::get<1>(L));
   }
 
   /// Constructor for the declare mapper directive.
@@ -7909,15 +7939,16 @@ class MappableExprsHandler {
   /// Generate code for the combined entry if we have a partially mapped struct
   /// and take care of the mapping flags of the arguments corresponding to
   /// individual struct members.
-  void emitCombinedEntry(MapBaseValuesArrayTy &BasePointers,
-                         MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
-                         MapFlagsArrayTy &Types, MapFlagsArrayTy &CurTypes,
+  void emitCombinedEntry(MapCombinedInfoTy &CombinedInfo,
+                         MapFlagsArrayTy &CurTypes,
                          const StructRangeInfoTy &PartialStruct) const {
     // Base is the base of the struct
-    BasePointers.push_back(PartialStruct.Base.getPointer());
+    CombinedInfo.BasePointers.push_back(PartialStruct.Base.getPointer());
     // Pointer is the address of the lowest element
     llvm::Value *LB = PartialStruct.LowestElem.second.getPointer();
-    Pointers.push_back(LB);
+    CombinedInfo.Pointers.push_back(LB);
+    // There should not be a mapper for a combined entry.
+    CombinedInfo.Mappers.push_back(nullptr);
     // Size is (addr of {highest+1} element) - (addr of lowest element)
     llvm::Value *HB = PartialStruct.HighestElem.second.getPointer();
     llvm::Value *HAddr = CGF.Builder.CreateConstGEP1_32(HB, /*Idx0=*/1);
@@ -7926,9 +7957,9 @@ class MappableExprsHandler {
     llvm::Value *Diff = CGF.Builder.CreatePtrDiff(CHAddr, CLAddr);
     llvm::Value *Size = CGF.Builder.CreateIntCast(Diff, CGF.Int64Ty,
                                                   /*isSigned=*/false);
-    Sizes.push_back(Size);
+    CombinedInfo.Sizes.push_back(Size);
     // Map type is always TARGET_PARAM
-    Types.push_back(OMP_MAP_TARGET_PARAM);
+    CombinedInfo.Types.push_back(OMP_MAP_TARGET_PARAM);
     // Remove TARGET_PARAM flag from the first element
     (*CurTypes.begin()) &= ~OMP_MAP_TARGET_PARAM;
 
@@ -7936,18 +7967,17 @@ class MappableExprsHandler {
     // (except for PTR_AND_OBJ entries which do not have a placeholder value
     // 0xFFFF in the MEMBER_OF field).
     OpenMPOffloadMappingFlags MemberOfFlag =
-        getMemberOfFlag(BasePointers.size() - 1);
+        getMemberOfFlag(CombinedInfo.BasePointers.size() - 1);
     for (auto &M : CurTypes)
       setCorrectMemberOfFlag(M, MemberOfFlag);
   }
 
-  /// Generate all the base pointers, section pointers, sizes and map
-  /// types for the extracted mappable expressions. Also, for each item that
-  /// relates with a device pointer, a pair of the relevant declaration and
-  /// index where it occurs is appended to the device pointers info array.
-  void generateAllInfo(MapBaseValuesArrayTy &BasePointers,
-                       MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
-                       MapFlagsArrayTy &Types) const {
+  /// Generate all the base pointers, section pointers, sizes, map types, and
+  /// mappers for the extracted mappable expressions (all included in \a
+  /// CombinedInfo). Also, for each item that relates with a device pointer, a
+  /// pair of the relevant declaration and index where it occurs is appended to
+  /// the device pointers info array.
+  void generateAllInfo(MapCombinedInfoTy &CombinedInfo) const {
     // We have to process the component lists that relate with the same
     // declaration in a single chunk so that we can generate the map flags
     // correctly. Therefore, we organize all lists in a map.
@@ -7961,11 +7991,11 @@ class MappableExprsHandler {
                 OpenMPMapClauseKind MapType,
                 ArrayRef<OpenMPMapModifierKind> MapModifiers,
                 bool ReturnDevicePointer, bool IsImplicit,
-                bool ForDeviceAddr = false) {
+                const ValueDecl *Mapper, bool ForDeviceAddr = false) {
           const ValueDecl *VD =
               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
           Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
-                                IsImplicit, ForDeviceAddr);
+                                IsImplicit, Mapper, ForDeviceAddr);
         };
 
     assert(CurDir.is<const OMPExecutableDirective *>() &&
@@ -7973,18 +8003,19 @@ class MappableExprsHandler {
     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>())
       for (const auto L : C->component_lists()) {
-        InfoGen(L.first, L.second, C->getMapType(), C->getMapTypeModifiers(),
-            /*ReturnDevicePointer=*/false, C->isImplicit());
+        InfoGen(std::get<0>(L), std::get<1>(L), C->getMapType(),
+                C->getMapTypeModifiers(), /*ReturnDevicePointer=*/false,
+                C->isImplicit(), std::get<2>(L));
       }
     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
-      for (const auto L : C->component_lists()) {
-        InfoGen(L.first, L.second, OMPC_MAP_to, llvm::None,
-            /*ReturnDevicePointer=*/false, C->isImplicit());
+      for (const auto &L : C->component_lists()) {
+        InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
+                /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
       }
     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
-      for (const auto L : C->component_lists()) {
-        InfoGen(L.first, L.second, OMPC_MAP_from, llvm::None,
-            /*ReturnDevicePointer=*/false, C->isImplicit());
+      for (const auto &L : C->component_lists()) {
+        InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
+                /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
       }
 
     // Look at the use_device_ptr clause information and mark the existing map
@@ -7998,11 +8029,14 @@ class MappableExprsHandler {
 
     for (const auto *C :
          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
-      for (const auto L : C->component_lists()) {
-        assert(!L.second.empty() && "Not expecting empty list of components!");
-        const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
+      for (const auto &L : C->component_lists()) {
+        OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
+            std::get<1>(L);
+        assert(!Components.empty() &&
+               "Not expecting empty list of components!");
+        const ValueDecl *VD = Components.back().getAssociatedDeclaration();
         VD = cast<ValueDecl>(VD->getCanonicalDecl());
-        const Expr *IE = L.second.back().getAssociatedExpression();
+        const Expr *IE = Components.back().getAssociatedExpression();
         // If the first component is a member expression, we have to look into
         // 'this', which maps to null in the map of map information. Otherwise
         // look directly for the information.
@@ -8034,16 +8068,19 @@ class MappableExprsHandler {
           // Nonetheless, generateInfoForComponentList must be called to take
           // the pointer into account for the calculation of the range of the
           // partial struct.
-          InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
-                  /*ReturnDevicePointer=*/false, C->isImplicit());
+          InfoGen(nullptr, Components, OMPC_MAP_unknown, llvm::None,
+                  /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr);
           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/false);
         } else {
           llvm::Value *Ptr =
               CGF.EmitLoadOfScalar(CGF.EmitLValue(IE), IE->getExprLoc());
-          BasePointers.emplace_back(Ptr, VD);
-          Pointers.push_back(Ptr);
-          Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
-          Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+          CombinedInfo.BasePointers.emplace_back(Ptr, VD);
+          CombinedInfo.Pointers.push_back(Ptr);
+          CombinedInfo.Sizes.push_back(
+              llvm::Constant::getNullValue(CGF.Int64Ty));
+          CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM |
+                                       OMP_MAP_TARGET_PARAM);
+          CombinedInfo.Mappers.push_back(nullptr);
         }
       }
     }
@@ -8058,12 +8095,13 @@ class MappableExprsHandler {
     for (const auto *C :
          CurExecDir->getClausesOfKind<OMPUseDeviceAddrClause>()) {
       for (const auto L : C->component_lists()) {
-        assert(!L.second.empty() && "Not expecting empty list of components!");
-        const ValueDecl *VD = L.second.back().getAssociatedDeclaration();
+        assert(!std::get<1>(L).empty() &&
+               "Not expecting empty list of components!");
+        const ValueDecl *VD = std::get<1>(L).back().getAssociatedDeclaration();
         if (!Processed.insert(VD).second)
           continue;
         VD = cast<ValueDecl>(VD->getCanonicalDecl());
-        const Expr *IE = L.second.back().getAssociatedExpression();
+        const Expr *IE = std::get<1>(L).back().getAssociatedExpression();
         // If the first component is a member expression, we have to look into
         // 'this', which maps to null in the map of map information. Otherwise
         // look directly for the information.
@@ -8094,8 +8132,8 @@ class MappableExprsHandler {
           // Nonetheless, generateInfoForComponentList must be called to take
           // the pointer into account for the calculation of the range of the
           // partial struct.
-          InfoGen(nullptr, L.second, OMPC_MAP_unknown, llvm::None,
-                  /*ReturnDevicePointer=*/false, C->isImplicit(),
+          InfoGen(nullptr, std::get<1>(L), OMPC_MAP_unknown, llvm::None,
+                  /*ReturnDevicePointer=*/false, C->isImplicit(), nullptr,
                   /*ForDeviceAddr=*/true);
           DeferredInfo[nullptr].emplace_back(IE, VD, /*ForDeviceAddr=*/true);
         } else {
@@ -8104,10 +8142,11 @@ class MappableExprsHandler {
             Ptr = CGF.EmitLValue(IE).getPointer(CGF);
           else
             Ptr = CGF.EmitScalarExpr(IE);
-          BasePointers.emplace_back(Ptr, VD);
-          Pointers.push_back(Ptr);
-          Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
-          Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+          CombinedInfo.BasePointers.emplace_back(Ptr, VD);
+          CombinedInfo.Pointers.push_back(Ptr);
+          CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
+          CombinedInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_TARGET_PARAM);
+          CombinedInfo.Mappers.push_back(nullptr);
         }
       }
     }
@@ -8117,11 +8156,8 @@ class MappableExprsHandler {
       // associated with a capture, because the mapping flags depend on it.
       bool IsFirstComponentList = true;
 
-      // Temporary versions of arrays
-      MapBaseValuesArrayTy CurBasePointers;
-      MapValuesArrayTy CurPointers;
-      MapValuesArrayTy CurSizes;
-      MapFlagsArrayTy CurTypes;
+      // Temporary generated information.
+      MapCombinedInfoTy CurInfo;
       StructRangeInfoTy PartialStruct;
 
       for (const MapInfo &L : M.second) {
@@ -8129,16 +8165,15 @@ class MappableExprsHandler {
                "Not expecting declaration with no component lists.");
 
         // Remember the current base pointer index.
-        unsigned CurrentBasePointersIdx = CurBasePointers.size();
+        unsigned CurrentBasePointersIdx = CurInfo.BasePointers.size();
         generateInfoForComponentList(
-            L.MapType, L.MapModifiers, L.Components, CurBasePointers,
-            CurPointers, CurSizes, CurTypes, PartialStruct,
-            IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
+            L.MapType, L.MapModifiers, L.Components, CurInfo, PartialStruct,
+            IsFirstComponentList, L.IsImplicit, L.Mapper, L.ForDeviceAddr);
 
         // If this entry relates with a device pointer, set the relevant
         // declaration and add the 'return pointer' flag.
         if (L.ReturnDevicePointer) {
-          assert(CurBasePointers.size() > CurrentBasePointersIdx &&
+          assert(CurInfo.BasePointers.size() > CurrentBasePointersIdx &&
                  "Unexpected number of mapped base pointers.");
 
           const ValueDecl *RelevantVD =
@@ -8146,8 +8181,9 @@ class MappableExprsHandler {
           assert(RelevantVD &&
                  "No relevant declaration related with device pointer??");
 
-          CurBasePointers[CurrentBasePointersIdx].setDevicePtrDecl(RelevantVD);
-          CurTypes[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
+          CurInfo.BasePointers[CurrentBasePointersIdx].setDevicePtrDecl(
+              RelevantVD);
+          CurInfo.Types[CurrentBasePointersIdx] |= OMP_MAP_RETURN_PARAM;
         }
         IsFirstComponentList = false;
       }
@@ -8168,7 +8204,7 @@ class MappableExprsHandler {
             // Entry is RETURN_PARAM. Also, set the placeholder value
             // MEMBER_OF=FFFF so that the entry is later updated with the
             // correct value of MEMBER_OF.
-            CurTypes.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
+            CurInfo.Types.push_back(OMP_MAP_RETURN_PARAM | OMP_MAP_MEMBER_OF);
           } else {
             BasePtr = this->CGF.EmitLValue(L.IE).getPointer(CGF);
             Ptr = this->CGF.EmitLoadOfScalar(this->CGF.EmitLValue(L.IE),
@@ -8176,35 +8212,31 @@ class MappableExprsHandler {
             // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the placeholder
             // value MEMBER_OF=FFFF so that the entry is later updated with the
             // correct value of MEMBER_OF.
-            CurTypes.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
-                               OMP_MAP_MEMBER_OF);
+            CurInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_RETURN_PARAM |
+                                    OMP_MAP_MEMBER_OF);
           }
-          CurBasePointers.emplace_back(BasePtr, L.VD);
-          CurPointers.push_back(Ptr);
-          CurSizes.push_back(llvm::Constant::getNullValue(this->CGF.Int64Ty));
+          CurInfo.BasePointers.emplace_back(BasePtr, L.VD);
+          CurInfo.Pointers.push_back(Ptr);
+          CurInfo.Sizes.push_back(
+              llvm::Constant::getNullValue(this->CGF.Int64Ty));
+          CurInfo.Mappers.push_back(nullptr);
         }
       }
 
       // If there is an entry in PartialStruct it means we have a struct with
       // individual members mapped. Emit an extra combined entry.
       if (PartialStruct.Base.isValid())
-        emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
-                          PartialStruct);
+        emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
 
       // We need to append the results of this capture to what we already have.
-      BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
-      Pointers.append(CurPointers.begin(), CurPointers.end());
-      Sizes.append(CurSizes.begin(), CurSizes.end());
-      Types.append(CurTypes.begin(), CurTypes.end());
+      CombinedInfo.append(CurInfo);
     }
   }
 
-  /// Generate all the base pointers, section pointers, sizes and map types for
-  /// the extracted map clauses of user-defined mapper.
-  void generateAllInfoForMapper(MapBaseValuesArrayTy &BasePointers,
-                                MapValuesArrayTy &Pointers,
-                                MapValuesArrayTy &Sizes,
-                                MapFlagsArrayTy &Types) const {
+  /// Generate all the base pointers, section pointers, sizes, map types, and
+  /// mappers for the extracted map clauses of user-defined mapper (all included
+  /// in \a CombinedInfo).
+  void generateAllInfoForMapper(MapCombinedInfoTy &CombinedInfo) const {
     assert(CurDir.is<const OMPDeclareMapperDecl *>() &&
            "Expect a declare mapper directive");
     const auto *CurMapperDir = CurDir.get<const OMPDeclareMapperDecl *>();
@@ -8213,25 +8245,17 @@ class MappableExprsHandler {
     // correctly. Therefore, we organize all lists in a map.
     llvm::MapVector<const ValueDecl *, SmallVector<MapInfo, 8>> Info;
 
-    // Helper function to fill the information map for the different supported
-    // clauses.
-    auto &&InfoGen = [&Info](
-        const ValueDecl *D,
-        OMPClauseMappableExprCommon::MappableExprComponentListRef L,
-        OpenMPMapClauseKind MapType,
-        ArrayRef<OpenMPMapModifierKind> MapModifiers,
-        bool ReturnDevicePointer, bool IsImplicit) {
-      const ValueDecl *VD =
-          D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
-      Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
-                            IsImplicit);
-    };
-
+    // Fill the information map for map clauses.
     for (const auto *C : CurMapperDir->clauselists()) {
       const auto *MC = cast<OMPMapClause>(C);
       for (const auto L : MC->component_lists()) {
-        InfoGen(L.first, L.second, MC->getMapType(), MC->getMapTypeModifiers(),
-                /*ReturnDevicePointer=*/false, MC->isImplicit());
+        const ValueDecl *VD =
+            std::get<0>(L) ? cast<ValueDecl>(std::get<0>(L)->getCanonicalDecl())
+                           : nullptr;
+        // Get the corresponding user-defined mapper.
+        Info[VD].emplace_back(
+            std::get<1>(L), MC->getMapType(), MC->getMapTypeModifiers(),
+            /*ReturnDevicePointer=*/false, MC->isImplicit(), std::get<2>(L));
       }
     }
 
@@ -8240,42 +8264,32 @@ class MappableExprsHandler {
       // associated with a capture, because the mapping flags depend on it.
       bool IsFirstComponentList = true;
 
-      // Temporary versions of arrays
-      MapBaseValuesArrayTy CurBasePointers;
-      MapValuesArrayTy CurPointers;
-      MapValuesArrayTy CurSizes;
-      MapFlagsArrayTy CurTypes;
+      // Temporary generated information.
+      MapCombinedInfoTy CurInfo;
       StructRangeInfoTy PartialStruct;
 
       for (const MapInfo &L : M.second) {
         assert(!L.Components.empty() &&
                "Not expecting declaration with no component lists.");
         generateInfoForComponentList(
-            L.MapType, L.MapModifiers, L.Components, CurBasePointers,
-            CurPointers, CurSizes, CurTypes, PartialStruct,
-            IsFirstComponentList, L.IsImplicit, L.ForDeviceAddr);
+            L.MapType, L.MapModifiers, L.Components, CurInfo, PartialStruct,
+            IsFirstComponentList, L.IsImplicit, L.Mapper, L.ForDeviceAddr);
         IsFirstComponentList = false;
       }
 
       // If there is an entry in PartialStruct it means we have a struct with
       // individual members mapped. Emit an extra combined entry.
       if (PartialStruct.Base.isValid())
-        emitCombinedEntry(BasePointers, Pointers, Sizes, Types, CurTypes,
-                          PartialStruct);
+        emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
 
       // We need to append the results of this capture to what we already have.
-      BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
-      Pointers.append(CurPointers.begin(), CurPointers.end());
-      Sizes.append(CurSizes.begin(), CurSizes.end());
-      Types.append(CurTypes.begin(), CurTypes.end());
+      CombinedInfo.append(CurInfo);
     }
   }
 
   /// Emit capture info for lambdas for variables captured by reference.
   void generateInfoForLambdaCaptures(
-      const ValueDecl *VD, llvm::Value *Arg, MapBaseValuesArrayTy &BasePointers,
-      MapValuesArrayTy &Pointers, MapValuesArrayTy &Sizes,
-      MapFlagsArrayTy &Types,
+      const ValueDecl *VD, llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
       llvm::DenseMap<llvm::Value *, llvm::Value *> &LambdaPointers) const {
     const auto *RD = VD->getType()
                          .getCanonicalType()
@@ -8295,13 +8309,14 @@ class MappableExprsHandler {
       LValue ThisLValVal = CGF.EmitLValueForField(VDLVal, ThisCapture);
       LambdaPointers.try_emplace(ThisLVal.getPointer(CGF),
                                  VDLVal.getPointer(CGF));
-      BasePointers.push_back(ThisLVal.getPointer(CGF));
-      Pointers.push_back(ThisLValVal.getPointer(CGF));
-      Sizes.push_back(
+      CombinedInfo.BasePointers.push_back(ThisLVal.getPointer(CGF));
+      CombinedInfo.Pointers.push_back(ThisLValVal.getPointer(CGF));
+      CombinedInfo.Sizes.push_back(
           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
                                     CGF.Int64Ty, /*isSigned=*/true));
-      Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
-                      OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+      CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+                                   OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+      CombinedInfo.Mappers.push_back(nullptr);
     }
     for (const LambdaCapture &LC : RD->captures()) {
       if (!LC.capturesVariable())
@@ -8316,9 +8331,9 @@ class MappableExprsHandler {
         LValue VarLValVal = CGF.EmitLValueForField(VDLVal, It->second);
         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
                                    VDLVal.getPointer(CGF));
-        BasePointers.push_back(VarLVal.getPointer(CGF));
-        Pointers.push_back(VarLValVal.getPointer(CGF));
-        Sizes.push_back(CGF.Builder.CreateIntCast(
+        CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
+        CombinedInfo.Pointers.push_back(VarLValVal.getPointer(CGF));
+        CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
             CGF.getTypeSize(
                 VD->getType().getCanonicalType().getNonReferenceType()),
             CGF.Int64Ty, /*isSigned=*/true));
@@ -8326,12 +8341,13 @@ class MappableExprsHandler {
         RValue VarRVal = CGF.EmitLoadOfLValue(VarLVal, RD->getLocation());
         LambdaPointers.try_emplace(VarLVal.getPointer(CGF),
                                    VDLVal.getPointer(CGF));
-        BasePointers.push_back(VarLVal.getPointer(CGF));
-        Pointers.push_back(VarRVal.getScalarVal());
-        Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
+        CombinedInfo.BasePointers.push_back(VarLVal.getPointer(CGF));
+        CombinedInfo.Pointers.push_back(VarRVal.getScalarVal());
+        CombinedInfo.Sizes.push_back(llvm::ConstantInt::get(CGF.Int64Ty, 0));
       }
-      Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
-                      OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+      CombinedInfo.Types.push_back(OMP_MAP_PTR_AND_OBJ | OMP_MAP_LITERAL |
+                                   OMP_MAP_MEMBER_OF | OMP_MAP_IMPLICIT);
+      CombinedInfo.Mappers.push_back(nullptr);
     }
   }
 
@@ -8364,13 +8380,10 @@ class MappableExprsHandler {
     }
   }
 
-  /// Generate the base pointers, section pointers, sizes and map types
-  /// associated to a given capture.
+  /// Generate the base pointers, section pointers, sizes, map types, and
+  /// mappers associated to a given capture (all included in \a CombinedInfo).
   void generateInfoForCapture(const CapturedStmt::Capture *Cap,
-                              llvm::Value *Arg,
-                              MapBaseValuesArrayTy &BasePointers,
-                              MapValuesArrayTy &Pointers,
-                              MapValuesArrayTy &Sizes, MapFlagsArrayTy &Types,
+                              llvm::Value *Arg, MapCombinedInfoTy &CombinedInfo,
                               StructRangeInfoTy &PartialStruct) const {
     assert(!Cap->capturesVariableArrayType() &&
            "Not expecting to generate map info for a variable array type!");
@@ -8384,31 +8397,35 @@ class MappableExprsHandler {
     // pass the pointer by value. If it is a reference to a declaration, we just
     // pass its value.
     if (DevPointersMap.count(VD)) {
-      BasePointers.emplace_back(Arg, VD);
-      Pointers.push_back(Arg);
-      Sizes.push_back(
+      CombinedInfo.BasePointers.emplace_back(Arg, VD);
+      CombinedInfo.Pointers.push_back(Arg);
+      CombinedInfo.Sizes.push_back(
           CGF.Builder.CreateIntCast(CGF.getTypeSize(CGF.getContext().VoidPtrTy),
                                     CGF.Int64Ty, /*isSigned=*/true));
-      Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
+      CombinedInfo.Types.push_back(OMP_MAP_LITERAL | OMP_MAP_TARGET_PARAM);
+      CombinedInfo.Mappers.push_back(nullptr);
       return;
     }
 
     using MapData =
         std::tuple<OMPClauseMappableExprCommon::MappableExprComponentListRef,
-                   OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool>;
+                   OpenMPMapClauseKind, ArrayRef<OpenMPMapModifierKind>, bool,
+                   const ValueDecl *>;
     SmallVector<MapData, 4> DeclComponentLists;
     assert(CurDir.is<const OMPExecutableDirective *>() &&
            "Expect a executable directive");
     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
       for (const auto L : C->decl_component_lists(VD)) {
-        assert(L.first == VD &&
-               "We got information for the wrong declaration??");
-        assert(!L.second.empty() &&
+        const ValueDecl *VDecl, *Mapper;
+        OMPClauseMappableExprCommon::MappableExprComponentListRef Components;
+        std::tie(VDecl, Components, Mapper) = L;
+        assert(VDecl == VD && "We got information for the wrong declaration??");
+        assert(!Components.empty() &&
                "Not expecting declaration with no component lists.");
-        DeclComponentLists.emplace_back(L.second, C->getMapType(),
+        DeclComponentLists.emplace_back(Components, C->getMapType(),
                                         C->getMapTypeModifiers(),
-                                        C->isImplicit());
+                                        C->isImplicit(), Mapper);
       }
     }
 
@@ -8425,11 +8442,12 @@ class MappableExprsHandler {
       OpenMPMapClauseKind MapType;
       ArrayRef<OpenMPMapModifierKind> MapModifiers;
       bool IsImplicit;
-      std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+      const ValueDecl *Mapper;
+      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
       ++Count;
       for (const MapData &L1 : makeArrayRef(DeclComponentLists).slice(Count)) {
         OMPClauseMappableExprCommon::MappableExprComponentListRef Components1;
-        std::tie(Components1, MapType, MapModifiers, IsImplicit) = L1;
+        std::tie(Components1, MapType, MapModifiers, IsImplicit, Mapper) = L1;
         auto CI = Components.rbegin();
         auto CE = Components.rend();
         auto SI = Components1.rbegin();
@@ -8515,14 +8533,15 @@ class MappableExprsHandler {
       OpenMPMapClauseKind MapType;
       ArrayRef<OpenMPMapModifierKind> MapModifiers;
       bool IsImplicit;
-      std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+      const ValueDecl *Mapper;
+      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
       ArrayRef<OMPClauseMappableExprCommon::MappableExprComponentListRef>
           OverlappedComponents = Pair.getSecond();
       bool IsFirstComponentList = true;
       generateInfoForComponentList(
-          MapType, MapModifiers, Components, BasePointers, Pointers, Sizes,
-          Types, PartialStruct, IsFirstComponentList, IsImplicit,
-          /*ForDeviceAddr=*/false, OverlappedComponents);
+          MapType, MapModifiers, Components, CombinedInfo, PartialStruct,
+          IsFirstComponentList, IsImplicit, Mapper, /*ForDeviceAddr=*/false,
+          OverlappedComponents);
     }
     // Go through other elements without overlapped elements.
     bool IsFirstComponentList = OverlappedData.empty();
@@ -8531,23 +8550,21 @@ class MappableExprsHandler {
       OpenMPMapClauseKind MapType;
       ArrayRef<OpenMPMapModifierKind> MapModifiers;
       bool IsImplicit;
-      std::tie(Components, MapType, MapModifiers, IsImplicit) = L;
+      const ValueDecl *Mapper;
+      std::tie(Components, MapType, MapModifiers, IsImplicit, Mapper) = L;
       auto It = OverlappedData.find(&L);
       if (It == OverlappedData.end())
         generateInfoForComponentList(MapType, MapModifiers, Components,
-                                     BasePointers, Pointers, Sizes, Types,
-                                     PartialStruct, IsFirstComponentList,
-                                     IsImplicit);
+                                     CombinedInfo, PartialStruct,
+                                     IsFirstComponentList, IsImplicit, Mapper);
       IsFirstComponentList = false;
     }
   }
 
-  /// Generate the base pointers, section pointers, sizes and map types
-  /// associated with the declare target link variables.
-  void generateInfoForDeclareTargetLink(MapBaseValuesArrayTy &BasePointers,
-                                        MapValuesArrayTy &Pointers,
-                                        MapValuesArrayTy &Sizes,
-                                        MapFlagsArrayTy &Types) const {
+  /// Generate the base pointers, section pointers, sizes, map types, and
+  /// mappers associated with the declare target link variables (all included in
+  /// \a CombinedInfo).
+  void generateInfoForDeclareTargetLink(MapCombinedInfoTy &CombinedInfo) const {
     assert(CurDir.is<const OMPExecutableDirective *>() &&
            "Expect a executable directive");
     const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
@@ -8555,9 +8572,9 @@ class MappableExprsHandler {
     // but "declare target link" global variables.
     for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
       for (const auto L : C->component_lists()) {
-        if (!L.first)
+        if (!std::get<0>(L))
           continue;
-        const auto *VD = dyn_cast<VarDecl>(L.first);
+        const auto *VD = dyn_cast_or_null<VarDecl>(std::get<0>(L));
         if (!VD)
           continue;
         llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
@@ -8567,9 +8584,9 @@ class MappableExprsHandler {
           continue;
         StructRangeInfoTy PartialStruct;
         generateInfoForComponentList(
-            C->getMapType(), C->getMapTypeModifiers(), L.second, BasePointers,
-            Pointers, Sizes, Types, PartialStruct,
-            /*IsFirstComponentList=*/true, C->isImplicit());
+            C->getMapType(), C->getMapTypeModifiers(), std::get<1>(L),
+            CombinedInfo, PartialStruct, /*IsFirstComponentList=*/true,
+            C->isImplicit());
         assert(!PartialStruct.Base.isValid() &&
                "No partial structs for declare target link expected.");
       }
@@ -8580,35 +8597,32 @@ class MappableExprsHandler {
   /// record field declaration \a RI and captured value \a CV.
   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
                               const FieldDecl &RI, llvm::Value *CV,
-                              MapBaseValuesArrayTy &CurBasePointers,
-                              MapValuesArrayTy &CurPointers,
-                              MapValuesArrayTy &CurSizes,
-                              MapFlagsArrayTy &CurMapTypes) const {
+                              MapCombinedInfoTy &CombinedInfo) const {
     bool IsImplicit = true;
     // Do the default mapping.
     if (CI.capturesThis()) {
-      CurBasePointers.push_back(CV);
-      CurPointers.push_back(CV);
+      CombinedInfo.BasePointers.push_back(CV);
+      CombinedInfo.Pointers.push_back(CV);
       const auto *PtrTy = cast<PointerType>(RI.getType().getTypePtr());
-      CurSizes.push_back(
+      CombinedInfo.Sizes.push_back(
           CGF.Builder.CreateIntCast(CGF.getTypeSize(PtrTy->getPointeeType()),
                                     CGF.Int64Ty, /*isSigned=*/true));
       // Default map type.
-      CurMapTypes.push_back(OMP_MAP_TO | OMP_MAP_FROM);
+      CombinedInfo.Types.push_back(OMP_MAP_TO | OMP_MAP_FROM);
     } else if (CI.capturesVariableByCopy()) {
-      CurBasePointers.push_back(CV);
-      CurPointers.push_back(CV);
+      CombinedInfo.BasePointers.push_back(CV);
+      CombinedInfo.Pointers.push_back(CV);
       if (!RI.getType()->isAnyPointerType()) {
         // We have to signal to the runtime captures passed by value that are
         // not pointers.
-        CurMapTypes.push_back(OMP_MAP_LITERAL);
-        CurSizes.push_back(CGF.Builder.CreateIntCast(
+        CombinedInfo.Types.push_back(OMP_MAP_LITERAL);
+        CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
             CGF.getTypeSize(RI.getType()), CGF.Int64Ty, /*isSigned=*/true));
       } else {
         // Pointers are implicitly mapped with a zero size and no flags
         // (other than first map that is added for all implicit maps).
-        CurMapTypes.push_back(OMP_MAP_NONE);
-        CurSizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
+        CombinedInfo.Types.push_back(OMP_MAP_NONE);
+        CombinedInfo.Sizes.push_back(llvm::Constant::getNullValue(CGF.Int64Ty));
       }
       const VarDecl *VD = CI.getCapturedVar();
       auto I = FirstPrivateDecls.find(VD);
@@ -8618,12 +8632,12 @@ class MappableExprsHandler {
       assert(CI.capturesVariable() && "Expected captured reference.");
       const auto *PtrTy = cast<ReferenceType>(RI.getType().getTypePtr());
       QualType ElementType = PtrTy->getPointeeType();
-      CurSizes.push_back(CGF.Builder.CreateIntCast(
+      CombinedInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
           CGF.getTypeSize(ElementType), CGF.Int64Ty, /*isSigned=*/true));
       // The default map type for a scalar/complex type is 'to' because by
       // default the value doesn't have to be retrieved. For an aggregate
       // type, the default is 'tofrom'.
-      CurMapTypes.push_back(getMapModifiersForPrivateClauses(CI));
+      CombinedInfo.Types.push_back(getMapModifiersForPrivateClauses(CI));
       const VarDecl *VD = CI.getCapturedVar();
       auto I = FirstPrivateDecls.find(VD);
       if (I != FirstPrivateDecls.end() &&
@@ -8634,30 +8648,33 @@ class MappableExprsHandler {
         CGF.Builder.CreateMemCpy(
             CGF.MakeNaturalAlignAddrLValue(Addr, ElementType).getAddress(CGF),
             Address(CV, CGF.getContext().getTypeAlignInChars(ElementType)),
-            CurSizes.back(), /*IsVolatile=*/false);
+            CombinedInfo.Sizes.back(), /*IsVolatile=*/false);
         // Use new global variable as the base pointers.
-        CurBasePointers.push_back(Addr);
-        CurPointers.push_back(Addr);
+        CombinedInfo.BasePointers.push_back(Addr);
+        CombinedInfo.Pointers.push_back(Addr);
       } else {
-        CurBasePointers.push_back(CV);
+        CombinedInfo.BasePointers.push_back(CV);
         if (I != FirstPrivateDecls.end() && ElementType->isAnyPointerType()) {
           Address PtrAddr = CGF.EmitLoadOfReference(CGF.MakeAddrLValue(
               CV, ElementType, CGF.getContext().getDeclAlign(VD),
               AlignmentSource::Decl));
-          CurPointers.push_back(PtrAddr.getPointer());
+          CombinedInfo.Pointers.push_back(PtrAddr.getPointer());
         } else {
-          CurPointers.push_back(CV);
+          CombinedInfo.Pointers.push_back(CV);
         }
       }
       if (I != FirstPrivateDecls.end())
         IsImplicit = I->getSecond();
     }
     // Every default map produces a single argument which is a target parameter.
-    CurMapTypes.back() |= OMP_MAP_TARGET_PARAM;
+    CombinedInfo.Types.back() |= OMP_MAP_TARGET_PARAM;
 
     // Add flag stating this is an implicit map.
     if (IsImplicit)
-      CurMapTypes.back() |= OMP_MAP_IMPLICIT;
+      CombinedInfo.Types.back() |= OMP_MAP_IMPLICIT;
+
+    // No user-defined mapper for default mapping.
+    CombinedInfo.Mappers.push_back(nullptr);
   }
 };
 } // anonymous namespace
@@ -8667,23 +8684,20 @@ class MappableExprsHandler {
 /// return nullptr by reference.
 static void
 emitOffloadingArrays(CodeGenFunction &CGF,
-                     MappableExprsHandler::MapBaseValuesArrayTy &BasePointers,
-                     MappableExprsHandler::MapValuesArrayTy &Pointers,
-                     MappableExprsHandler::MapValuesArrayTy &Sizes,
-                     MappableExprsHandler::MapFlagsArrayTy &MapTypes,
+                     MappableExprsHandler::MapCombinedInfoTy &CombinedInfo,
                      CGOpenMPRuntime::TargetDataInfo &Info) {
   CodeGenModule &CGM = CGF.CGM;
   ASTContext &Ctx = CGF.getContext();
 
   // Reset the array information.
   Info.clearArrayInfo();
-  Info.NumberOfPtrs = BasePointers.size();
+  Info.NumberOfPtrs = CombinedInfo.BasePointers.size();
 
   if (Info.NumberOfPtrs) {
     // Detect if we have any capture size requiring runtime evaluation of the
     // size so that a constant array could be eventually used.
     bool hasRuntimeEvaluationCaptureSize = false;
-    for (llvm::Value *S : Sizes)
+    for (llvm::Value *S : CombinedInfo.Sizes)
       if (!isa<llvm::Constant>(S)) {
         hasRuntimeEvaluationCaptureSize = true;
         break;
@@ -8698,6 +8712,9 @@ emitOffloadingArrays(CodeGenFunction &CGF,
         CGF.CreateMemTemp(PointerArrayType, ".offload_baseptrs").getPointer();
     Info.PointersArray =
         CGF.CreateMemTemp(PointerArrayType, ".offload_ptrs").getPointer();
+    Address MappersArray =
+        CGF.CreateMemTemp(PointerArrayType, ".offload_mappers");
+    Info.MappersArray = MappersArray.getPointer();
 
     // If we don't have any VLA types or other types that require runtime
     // evaluation, we can use a constant array for the map sizes, otherwise we
@@ -8714,7 +8731,7 @@ emitOffloadingArrays(CodeGenFunction &CGF,
       // We expect all the sizes to be constant, so we collect them to create
       // a constant array.
       SmallVector<llvm::Constant *, 16> ConstSizes;
-      for (llvm::Value *S : Sizes)
+      for (llvm::Value *S : CombinedInfo.Sizes)
         ConstSizes.push_back(cast<llvm::Constant>(S));
 
       auto *SizesArrayInit = llvm::ConstantArray::get(
@@ -8730,8 +8747,8 @@ emitOffloadingArrays(CodeGenFunction &CGF,
 
     // The map types are always constant so we don't need to generate code to
     // fill arrays. Instead, we create an array constant.
-    SmallVector<uint64_t, 4> Mapping(MapTypes.size(), 0);
-    llvm::copy(MapTypes, Mapping.begin());
+    SmallVector<uint64_t, 4> Mapping(CombinedInfo.Types.size(), 0);
+    llvm::copy(CombinedInfo.Types, Mapping.begin());
     llvm::Constant *MapTypesArrayInit =
         llvm::ConstantDataArray::get(CGF.Builder.getContext(), Mapping);
     std::string MaptypesName =
@@ -8744,7 +8761,7 @@ emitOffloadingArrays(CodeGenFunction &CGF,
     Info.MapTypesArray = MapTypesArrayGbl;
 
     for (unsigned I = 0; I < Info.NumberOfPtrs; ++I) {
-      llvm::Value *BPVal = *BasePointers[I];
+      llvm::Value *BPVal = *CombinedInfo.BasePointers[I];
       llvm::Value *BP = CGF.Builder.CreateConstInBoundsGEP2_32(
           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
           Info.BasePointersArray, 0, I);
@@ -8754,10 +8771,11 @@ emitOffloadingArrays(CodeGenFunction &CGF,
       CGF.Builder.CreateStore(BPVal, BPAddr);
 
       if (Info.requiresDevicePointerInfo())
-        if (const ValueDecl *DevVD = BasePointers[I].getDevicePtrDecl())
+        if (const ValueDecl *DevVD =
+                CombinedInfo.BasePointers[I].getDevicePtrDecl())
           Info.CaptureDeviceAddrMap.try_emplace(DevVD, BPAddr);
 
-      llvm::Value *PVal = Pointers[I];
+      llvm::Value *PVal = CombinedInfo.Pointers[I];
       llvm::Value *P = CGF.Builder.CreateConstInBoundsGEP2_32(
           llvm::ArrayType::get(CGM.VoidPtrTy, Info.NumberOfPtrs),
           Info.PointersArray, 0, I);
@@ -8773,20 +8791,33 @@ emitOffloadingArrays(CodeGenFunction &CGF,
             /*Idx0=*/0,
             /*Idx1=*/I);
         Address SAddr(S, Ctx.getTypeAlignInChars(Int64Ty));
-        CGF.Builder.CreateStore(
-            CGF.Builder.CreateIntCast(Sizes[I], CGM.Int64Ty, /*isSigned=*/true),
-            SAddr);
+        CGF.Builder.CreateStore(CGF.Builder.CreateIntCast(CombinedInfo.Sizes[I],
+                                                          CGM.Int64Ty,
+                                                          /*isSigned=*/true),
+                                SAddr);
       }
+
+      // Fill up the mapper array.
+      llvm::Value *MFunc = llvm::ConstantPointerNull::get(CGM.VoidPtrTy);
+      if (CombinedInfo.Mappers[I]) {
+        MFunc = CGM.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
+            cast<OMPDeclareMapperDecl>(CombinedInfo.Mappers[I]));
+        MFunc = CGF.Builder.CreatePointerCast(MFunc, CGM.VoidPtrTy);
+        Info.HasMapper = true;
+      }
+      Address MAddr = CGF.Builder.CreateConstArrayGEP(MappersArray, I);
+      CGF.Builder.CreateStore(MFunc, MAddr);
     }
   }
 }
 
 /// Emit the arguments to be passed to the runtime library based on the
-/// arrays of pointers, sizes and map types.
+/// arrays of base pointers, pointers, sizes, map types, and mappers.
 static void emitOffloadingArraysArgument(
     CodeGenFunction &CGF, llvm::Value *&BasePointersArrayArg,
     llvm::Value *&PointersArrayArg, llvm::Value *&SizesArrayArg,
-    llvm::Value *&MapTypesArrayArg, CGOpenMPRuntime::TargetDataInfo &Info) {
+    llvm::Value *&MapTypesArrayArg, llvm::Value *&MappersArrayArg,
+    CGOpenMPRuntime::TargetDataInfo &Info) {
   CodeGenModule &CGM = CGF.CGM;
   if (Info.NumberOfPtrs) {
     BasePointersArrayArg = CGF.Builder.CreateConstInBoundsGEP2_32(
@@ -8806,12 +8837,17 @@ static void emitOffloadingArraysArgument(
         Info.MapTypesArray,
         /*Idx0=*/0,
         /*Idx1=*/0);
+    MappersArrayArg =
+        Info.HasMapper
+            ? CGF.Builder.CreatePointerCast(Info.MappersArray, CGM.VoidPtrPtrTy)
+            : llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
   } else {
     BasePointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
     PointersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
     SizesArrayArg = llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
     MapTypesArrayArg =
         llvm::ConstantPointerNull::get(CGM.Int64Ty->getPointerTo());
+    MappersArrayArg = llvm::ConstantPointerNull::get(CGM.VoidPtrPtrTy);
   }
 }
 
@@ -8998,6 +9034,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
   llvm::Value *Size = MapperCGF.EmitLoadOfScalar(
       MapperCGF.GetAddrOfLocalVar(&SizeArg), /*Volatile=*/false,
       C.getPointerType(Int64Ty), Loc);
+  // Convert the size in bytes into the number of array elements.
+  Size = MapperCGF.Builder.CreateExactUDiv(
+      Size, MapperCGF.Builder.getInt64(ElementSize.getQuantity()));
   llvm::Value *PtrBegin = MapperCGF.Builder.CreateBitCast(
       MapperCGF.GetAddrOfLocalVar(&BeginArg).getPointer(),
       CGM.getTypes().ConvertTypeForMem(C.getPointerType(PtrTy)));
@@ -9036,6 +9075,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
 
   // Emit the loop body block.
   MapperCGF.EmitBlock(BodyBB);
+  llvm::BasicBlock *LastBB = BodyBB;
   llvm::PHINode *PtrPHI = MapperCGF.Builder.CreatePHI(
       PtrBegin->getType(), 2, "omp.arraymap.ptrcurrent");
   PtrPHI->addIncoming(PtrBegin, EntryBB);
@@ -9053,12 +9093,9 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
   (void)Scope.Privatize();
 
   // Get map clause information. Fill up the arrays with all mapped variables.
-  MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
-  MappableExprsHandler::MapValuesArrayTy Pointers;
-  MappableExprsHandler::MapValuesArrayTy Sizes;
-  MappableExprsHandler::MapFlagsArrayTy MapTypes;
+  MappableExprsHandler::MapCombinedInfoTy Info;
   MappableExprsHandler MEHandler(*D, MapperCGF);
-  MEHandler.generateAllInfoForMapper(BasePointers, Pointers, Sizes, MapTypes);
+  MEHandler.generateAllInfoForMapper(Info);
 
   // Call the runtime API __tgt_mapper_num_components to get the number of
   // pre-existing components.
@@ -9072,17 +9109,17 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
       MapperCGF.Builder.getInt64(MappableExprsHandler::getFlagMemberOffset()));
 
   // Fill up the runtime mapper handle for all components.
-  for (unsigned I = 0; I < BasePointers.size(); ++I) {
+  for (unsigned I = 0; I < Info.BasePointers.size(); ++I) {
     llvm::Value *CurBaseArg = MapperCGF.Builder.CreateBitCast(
-        *BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+        *Info.BasePointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
     llvm::Value *CurBeginArg = MapperCGF.Builder.CreateBitCast(
-        Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
-    llvm::Value *CurSizeArg = Sizes[I];
+        Info.Pointers[I], CGM.getTypes().ConvertTypeForMem(C.VoidPtrTy));
+    llvm::Value *CurSizeArg = Info.Sizes[I];
 
     // Extract the MEMBER_OF field from the map type.
     llvm::BasicBlock *MemberBB = MapperCGF.createBasicBlock("omp.member");
     MapperCGF.EmitBlock(MemberBB);
-    llvm::Value *OriMapType = MapperCGF.Builder.getInt64(MapTypes[I]);
+    llvm::Value *OriMapType = MapperCGF.Builder.getInt64(Info.Types[I]);
     llvm::Value *Member = MapperCGF.Builder.CreateAnd(
         OriMapType,
         MapperCGF.Builder.getInt64(MappableExprsHandler::OMP_MAP_MEMBER_OF));
@@ -9158,6 +9195,7 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
         MapperCGF.Builder.getInt64(~MappableExprsHandler::OMP_MAP_TO));
     // In case of tofrom, do nothing.
     MapperCGF.EmitBlock(EndBB);
+    LastBB = EndBB;
     llvm::PHINode *CurMapType =
         MapperCGF.Builder.CreatePHI(CGM.Int64Ty, 4, "omp.maptype");
     CurMapType->addIncoming(AllocMapType, AllocBB);
@@ -9165,23 +9203,29 @@ void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
     CurMapType->addIncoming(FromMapType, FromBB);
     CurMapType->addIncoming(MemberMapType, ToElseBB);
 
-    // TODO: call the corresponding mapper function if a user-defined mapper is
-    // associated with this map clause.
-    // Call the runtime API __tgt_push_mapper_component to fill up the runtime
-    // data structure.
     llvm::Value *OffloadingArgs[] = {Handle, CurBaseArg, CurBeginArg,
                                      CurSizeArg, CurMapType};
-    MapperCGF.EmitRuntimeCall(
-        OMPBuilder.getOrCreateRuntimeFunction(
-            CGM.getModule(), OMPRTL___tgt_push_mapper_component),
-        OffloadingArgs);
+    if (Info.Mappers[I]) {
+      // Call the corresponding mapper function.
+      llvm::Function *MapperFunc = getOrCreateUserDefinedMapperFunc(
+          cast<OMPDeclareMapperDecl>(Info.Mappers[I]));
+      assert(MapperFunc && "Expect a valid mapper function is available.");
+      MapperCGF.EmitNounwindRuntimeCall(MapperFunc, OffloadingArgs);
+    } else {
+      // Call the runtime API __tgt_push_mapper_component to fill up the runtime
+      // data structure.
+      MapperCGF.EmitRuntimeCall(
+          OMPBuilder.getOrCreateRuntimeFunction(
+              CGM.getModule(), OMPRTL___tgt_push_mapper_component),
+          OffloadingArgs);
+    }
   }
 
   // Update the pointer to point to the next element that needs to be mapped,
   // and check whether we have mapped all elements.
   llvm::Value *PtrNext = MapperCGF.Builder.CreateConstGEP1_32(
       PtrPHI, /*Idx0=*/1, "omp.arraymap.next");
-  PtrPHI->addIncoming(PtrNext, BodyBB);
+  PtrPHI->addIncoming(PtrNext, LastBB);
   llvm::Value *IsDone =
       MapperCGF.Builder.CreateICmpEQ(PtrNext, PtrEnd, "omp.arraymap.isdone");
   llvm::BasicBlock *ExitBB = MapperCGF.createBasicBlock("omp.arraymap.exit");
@@ -9259,6 +9303,15 @@ void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
       OffloadingArgs);
 }
 
+llvm::Function *CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
+    const OMPDeclareMapperDecl *D) {
+  auto I = UDMMap.find(D);
+  if (I != UDMMap.end())
+    return I->second;
+  emitUserDefinedMapper(D);
+  return UDMMap.lookup(D);
+}
+
 void CGOpenMPRuntime::emitTargetNumIterationsCall(
     CodeGenFunction &CGF, const OMPExecutableDirective &D,
     llvm::Value *DeviceID,
@@ -9404,12 +9457,14 @@ void CGOpenMPRuntime::emitTargetCall(
                                        InputInfo.PointersArray.getPointer(),
                                        InputInfo.SizesArray.getPointer(),
                                        MapTypesArray,
+                                       InputInfo.MappersArray.getPointer(),
                                        NumTeams,
                                        NumThreads};
       Return = CGF.EmitRuntimeCall(
           OMPBuilder.getOrCreateRuntimeFunction(
-              CGM.getModule(), HasNowait ? OMPRTL___tgt_target_teams_nowait
-                                         : OMPRTL___tgt_target_teams),
+              CGM.getModule(), HasNowait
+                                   ? OMPRTL___tgt_target_teams_nowait_mapper
+                                   : OMPRTL___tgt_target_teams_mapper),
           OffloadingArgs);
     } else {
       llvm::Value *OffloadingArgs[] = {DeviceID,
@@ -9418,11 +9473,12 @@ void CGOpenMPRuntime::emitTargetCall(
                                        InputInfo.BasePointersArray.getPointer(),
                                        InputInfo.PointersArray.getPointer(),
                                        InputInfo.SizesArray.getPointer(),
-                                       MapTypesArray};
+                                       MapTypesArray,
+                                       InputInfo.MappersArray.getPointer()};
       Return = CGF.EmitRuntimeCall(
           OMPBuilder.getOrCreateRuntimeFunction(
-              CGM.getModule(),
-              HasNowait ? OMPRTL___tgt_target_nowait : OMPRTL___tgt_target),
+              CGM.getModule(), HasNowait ? OMPRTL___tgt_target_nowait_mapper
+                                         : OMPRTL___tgt_target_mapper),
           OffloadingArgs);
     }
 
@@ -9460,10 +9516,7 @@ void CGOpenMPRuntime::emitTargetCall(
                           &CapturedVars, RequiresOuterTask,
                           &CS](CodeGenFunction &CGF, PrePostActionTy &) {
     // Fill up the arrays with all the captured variables.
-    MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
-    MappableExprsHandler::MapValuesArrayTy Pointers;
-    MappableExprsHandler::MapValuesArrayTy Sizes;
-    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
 
     // Get mappable expression information.
     MappableExprsHandler MEHandler(D, CGF);
@@ -9474,78 +9527,71 @@ void CGOpenMPRuntime::emitTargetCall(
     for (CapturedStmt::const_capture_iterator CI = CS.capture_begin(),
                                               CE = CS.capture_end();
          CI != CE; ++CI, ++RI, ++CV) {
-      MappableExprsHandler::MapBaseValuesArrayTy CurBasePointers;
-      MappableExprsHandler::MapValuesArrayTy CurPointers;
-      MappableExprsHandler::MapValuesArrayTy CurSizes;
-      MappableExprsHandler::MapFlagsArrayTy CurMapTypes;
+      MappableExprsHandler::MapCombinedInfoTy CurInfo;
       MappableExprsHandler::StructRangeInfoTy PartialStruct;
 
       // VLA sizes are passed to the outlined region by copy and do not have map
       // information associated.
       if (CI->capturesVariableArrayType()) {
-        CurBasePointers.push_back(*CV);
-        CurPointers.push_back(*CV);
-        CurSizes.push_back(CGF.Builder.CreateIntCast(
+        CurInfo.BasePointers.push_back(*CV);
+        CurInfo.Pointers.push_back(*CV);
+        CurInfo.Sizes.push_back(CGF.Builder.CreateIntCast(
             CGF.getTypeSize(RI->getType()), CGF.Int64Ty, /*isSigned=*/true));
         // Copy to the device as an argument. No need to retrieve it.
-        CurMapTypes.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
-                              MappableExprsHandler::OMP_MAP_TARGET_PARAM |
-                              MappableExprsHandler::OMP_MAP_IMPLICIT);
+        CurInfo.Types.push_back(MappableExprsHandler::OMP_MAP_LITERAL |
+                                MappableExprsHandler::OMP_MAP_TARGET_PARAM |
+                                MappableExprsHandler::OMP_MAP_IMPLICIT);
+        CurInfo.Mappers.push_back(nullptr);
       } else {
         // If we have any information in the map clause, we use it, otherwise we
         // just do a default mapping.
-        MEHandler.generateInfoForCapture(CI, *CV, CurBasePointers, CurPointers,
-                                         CurSizes, CurMapTypes, PartialStruct);
-        if (CurBasePointers.empty())
-          MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurBasePointers,
-                                           CurPointers, CurSizes, CurMapTypes);
+        MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
+        if (CurInfo.BasePointers.empty())
+          MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
         // Generate correct mapping for variables captured by reference in
         // lambdas.
         if (CI->capturesVariable())
-          MEHandler.generateInfoForLambdaCaptures(
-              CI->getCapturedVar(), *CV, CurBasePointers, CurPointers, CurSizes,
-              CurMapTypes, LambdaPointers);
+          MEHandler.generateInfoForLambdaCaptures(CI->getCapturedVar(), *CV,
+                                                  CurInfo, LambdaPointers);
       }
       // We expect to have at least an element of information for this capture.
-      assert(!CurBasePointers.empty() &&
+      assert(!CurInfo.BasePointers.empty() &&
              "Non-existing map pointer for capture!");
-      assert(CurBasePointers.size() == CurPointers.size() &&
-             CurBasePointers.size() == CurSizes.size() &&
-             CurBasePointers.size() == CurMapTypes.size() &&
+      assert(CurInfo.BasePointers.size() == CurInfo.Pointers.size() &&
+             CurInfo.BasePointers.size() == CurInfo.Sizes.size() &&
+             CurInfo.BasePointers.size() == CurInfo.Types.size() &&
+             CurInfo.BasePointers.size() == CurInfo.Mappers.size() &&
              "Inconsistent map information sizes!");
 
       // If there is an entry in PartialStruct it means we have a struct with
       // individual members mapped. Emit an extra combined entry.
       if (PartialStruct.Base.isValid())
-        MEHandler.emitCombinedEntry(BasePointers, Pointers, Sizes, MapTypes,
-                                    CurMapTypes, PartialStruct);
+        MEHandler.emitCombinedEntry(CombinedInfo, CurInfo.Types, PartialStruct);
 
       // We need to append the results of this capture to what we already have.
-      BasePointers.append(CurBasePointers.begin(), CurBasePointers.end());
-      Pointers.append(CurPointers.begin(), CurPointers.end());
-      Sizes.append(CurSizes.begin(), CurSizes.end());
-      MapTypes.append(CurMapTypes.begin(), CurMapTypes.end());
+      CombinedInfo.append(CurInfo);
     }
     // Adjust MEMBER_OF flags for the lambdas captures.
-    MEHandler.adjustMemberOfForLambdaCaptures(LambdaPointers, BasePointers,
-                                              Pointers, MapTypes);
+    MEHandler.adjustMemberOfForLambdaCaptures(
+        LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
+        CombinedInfo.Types);
     // Map other list items in the map clause which are not captured variables
     // but "declare target link" global variables.
-    MEHandler.generateInfoForDeclareTargetLink(BasePointers, Pointers, Sizes,
-                                               MapTypes);
+    MEHandler.generateInfoForDeclareTargetLink(CombinedInfo);
 
     TargetDataInfo Info;
     // Fill up the arrays and create the arguments.
-    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+    emitOffloadingArrays(CGF, CombinedInfo, Info);
     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
                                  Info.PointersArray, Info.SizesArray,
-                                 Info.MapTypesArray, Info);
+                                 Info.MapTypesArray, Info.MappersArray, Info);
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray =
         Address(Info.BasePointersArray, CGM.getPointerAlign());
     InputInfo.PointersArray =
         Address(Info.PointersArray, CGM.getPointerAlign());
     InputInfo.SizesArray = Address(Info.SizesArray, CGM.getPointerAlign());
+    InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
     MapTypesArray = Info.MapTypesArray;
     if (RequiresOuterTask)
       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
@@ -10134,24 +10180,23 @@ void CGOpenMPRuntime::emitTargetDataCalls(
   auto &&BeginThenGen = [this, &D, Device, &Info,
                          &CodeGen](CodeGenFunction &CGF, PrePostActionTy &) {
     // Fill up the arrays with all the mapped variables.
-    MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
-    MappableExprsHandler::MapValuesArrayTy Pointers;
-    MappableExprsHandler::MapValuesArrayTy Sizes;
-    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
 
     // Get map clause information.
-    MappableExprsHandler MCHandler(D, CGF);
-    MCHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
+    MappableExprsHandler MEHandler(D, CGF);
+    MEHandler.generateAllInfo(CombinedInfo);
 
     // Fill up the arrays and create the arguments.
-    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+    emitOffloadingArrays(CGF, CombinedInfo, Info);
 
     llvm::Value *BasePointersArrayArg = nullptr;
     llvm::Value *PointersArrayArg = nullptr;
     llvm::Value *SizesArrayArg = nullptr;
     llvm::Value *MapTypesArrayArg = nullptr;
+    llvm::Value *MappersArrayArg = nullptr;
     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
-                                 SizesArrayArg, MapTypesArrayArg, Info);
+                                 SizesArrayArg, MapTypesArrayArg,
+                                 MappersArrayArg, Info);
 
     // Emit device ID if any.
     llvm::Value *DeviceID = nullptr;
@@ -10166,11 +10211,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
 
     llvm::Value *OffloadingArgs[] = {
-        DeviceID,         PointerNum,    BasePointersArrayArg,
-        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
-    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                            CGM.getModule(), OMPRTL___tgt_target_data_begin),
-                        OffloadingArgs);
+        DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
+        SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
+    CGF.EmitRuntimeCall(
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___tgt_target_data_begin_mapper),
+        OffloadingArgs);
 
     // If device pointer privatization is required, emit the body of the region
     // here. It will have to be duplicated: with and without privatization.
@@ -10187,8 +10233,10 @@ void CGOpenMPRuntime::emitTargetDataCalls(
     llvm::Value *PointersArrayArg = nullptr;
     llvm::Value *SizesArrayArg = nullptr;
     llvm::Value *MapTypesArrayArg = nullptr;
+    llvm::Value *MappersArrayArg = nullptr;
     emitOffloadingArraysArgument(CGF, BasePointersArrayArg, PointersArrayArg,
-                                 SizesArrayArg, MapTypesArrayArg, Info);
+                                 SizesArrayArg, MapTypesArrayArg,
+                                 MappersArrayArg, Info);
 
     // Emit device ID if any.
     llvm::Value *DeviceID = nullptr;
@@ -10203,11 +10251,12 @@ void CGOpenMPRuntime::emitTargetDataCalls(
     llvm::Value *PointerNum = CGF.Builder.getInt32(Info.NumberOfPtrs);
 
     llvm::Value *OffloadingArgs[] = {
-        DeviceID,         PointerNum,    BasePointersArrayArg,
-        PointersArrayArg, SizesArrayArg, MapTypesArrayArg};
-    CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction(
-                            CGM.getModule(), OMPRTL___tgt_target_data_end),
-                        OffloadingArgs);
+        DeviceID,      PointerNum,       BasePointersArrayArg, PointersArrayArg,
+        SizesArrayArg, MapTypesArrayArg, MappersArrayArg};
+    CGF.EmitRuntimeCall(
+        OMPBuilder.getOrCreateRuntimeFunction(
+            CGM.getModule(), OMPRTL___tgt_target_data_end_mapper),
+        OffloadingArgs);
   };
 
   // If we need device pointer privatization, we need to emit the body of the
@@ -10281,24 +10330,25 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
                                      InputInfo.BasePointersArray.getPointer(),
                                      InputInfo.PointersArray.getPointer(),
                                      InputInfo.SizesArray.getPointer(),
-                                     MapTypesArray};
+                                     MapTypesArray,
+                                     InputInfo.MappersArray.getPointer()};
 
-    // Select the right runtime function call for each expected standalone
+    // Select the right runtime function call for each standalone
     // directive.
     const bool HasNowait = D.hasClausesOfKind<OMPNowaitClause>();
     RuntimeFunction RTLFn;
     switch (D.getDirectiveKind()) {
     case OMPD_target_enter_data:
-      RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait
-                        : OMPRTL___tgt_target_data_begin;
+      RTLFn = HasNowait ? OMPRTL___tgt_target_data_begin_nowait_mapper
+                        : OMPRTL___tgt_target_data_begin_mapper;
       break;
     case OMPD_target_exit_data:
-      RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait
-                        : OMPRTL___tgt_target_data_end;
+      RTLFn = HasNowait ? OMPRTL___tgt_target_data_end_nowait_mapper
+                        : OMPRTL___tgt_target_data_end_mapper;
       break;
     case OMPD_target_update:
-      RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait
-                        : OMPRTL___tgt_target_data_update;
+      RTLFn = HasNowait ? OMPRTL___tgt_target_data_update_nowait_mapper
+                        : OMPRTL___tgt_target_data_update_mapper;
       break;
     case OMPD_parallel:
     case OMPD_for:
@@ -10375,21 +10425,18 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
   auto &&TargetThenGen = [this, &ThenGen, &D, &InputInfo, &MapTypesArray](
                              CodeGenFunction &CGF, PrePostActionTy &) {
     // Fill up the arrays with all the mapped variables.
-    MappableExprsHandler::MapBaseValuesArrayTy BasePointers;
-    MappableExprsHandler::MapValuesArrayTy Pointers;
-    MappableExprsHandler::MapValuesArrayTy Sizes;
-    MappableExprsHandler::MapFlagsArrayTy MapTypes;
+    MappableExprsHandler::MapCombinedInfoTy CombinedInfo;
 
     // Get map clause information.
     MappableExprsHandler MEHandler(D, CGF);
-    MEHandler.generateAllInfo(BasePointers, Pointers, Sizes, MapTypes);
+    MEHandler.generateAllInfo(CombinedInfo);
 
     TargetDataInfo Info;
     // Fill up the arrays and create the arguments.
-    emitOffloadingArrays(CGF, BasePointers, Pointers, Sizes, MapTypes, Info);
+    emitOffloadingArrays(CGF, CombinedInfo, Info);
     emitOffloadingArraysArgument(CGF, Info.BasePointersArray,
                                  Info.PointersArray, Info.SizesArray,
-                                 Info.MapTypesArray, Info);
+                                 Info.MapTypesArray, Info.MappersArray, Info);
     InputInfo.NumberOfTargetItems = Info.NumberOfPtrs;
     InputInfo.BasePointersArray =
         Address(Info.BasePointersArray, CGM.getPointerAlign());
@@ -10397,6 +10444,7 @@ void CGOpenMPRuntime::emitTargetDataStandAloneCall(
         Address(Info.PointersArray, CGM.getPointerAlign());
     InputInfo.SizesArray =
         Address(Info.SizesArray, CGM.getPointerAlign());
+    InputInfo.MappersArray = Address(Info.MappersArray, CGM.getPointerAlign());
     MapTypesArray = Info.MapTypesArray;
     if (D.hasClausesOfKind<OMPDependClause>())
       CGF.EmitOMPTargetTaskBasedDirective(D, ThenGen, InputInfo);
diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.h b/clang/lib/CodeGen/CGOpenMPRuntime.h
index eb22f155f5ef4..0b91975343f70 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.h
@@ -912,6 +912,10 @@ class CGOpenMPRuntime {
   /// Emit the function for the user defined mapper construct.
   void emitUserDefinedMapper(const OMPDeclareMapperDecl *D,
                              CodeGenFunction *CGF = nullptr);
+  /// Get the function for the specified user-defined mapper. If it does not
+  /// exist, create one.
+  llvm::Function *
+  getOrCreateUserDefinedMapperFunc(const OMPDeclareMapperDecl *D);
 
   /// Emits outlined function for the specified OpenMP parallel directive
   /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
@@ -1620,6 +1624,10 @@ class CGOpenMPRuntime {
     llvm::Value *SizesArray = nullptr;
     /// The array of map types passed to the runtime library.
     llvm::Value *MapTypesArray = nullptr;
+    /// The array of user-defined mappers passed to the runtime library.
+    llvm::Value *MappersArray = nullptr;
+    /// Indicate whether any user-defined mapper exists.
+    bool HasMapper = false;
     /// The total number of pointers passed to the runtime library.
     unsigned NumberOfPtrs = 0u;
     /// Map between the a declaration of a capture and the corresponding base
@@ -1635,12 +1643,14 @@ class CGOpenMPRuntime {
       PointersArray = nullptr;
       SizesArray = nullptr;
       MapTypesArray = nullptr;
+      MappersArray = nullptr;
+      HasMapper = false;
       NumberOfPtrs = 0u;
     }
     /// Return true if the current target data information has valid arrays.
     bool isValid() {
       return BasePointersArray && PointersArray && SizesArray &&
-             MapTypesArray && NumberOfPtrs;
+             MapTypesArray && (!HasMapper || MappersArray) && NumberOfPtrs;
     }
     bool requiresDevicePointerInfo() { return RequiresDevicePointerInfo; }
   };
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index cfd5eda8cc80b..0ee1133ebaa16 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -4111,29 +4111,34 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
   VarDecl *BPVD = nullptr;
   VarDecl *PVD = nullptr;
   VarDecl *SVD = nullptr;
+  VarDecl *MVD = nullptr;
   if (InputInfo.NumberOfTargetItems > 0) {
     auto *CD = CapturedDecl::Create(
         getContext(), getContext().getTranslationUnitDecl(), /*NumParams=*/0);
     llvm::APInt ArrSize(/*numBits=*/32, InputInfo.NumberOfTargetItems);
-    QualType BaseAndPointersType = getContext().getConstantArrayType(
+    QualType BaseAndPointerAndMapperType = getContext().getConstantArrayType(
         getContext().VoidPtrTy, ArrSize, nullptr, ArrayType::Normal,
         /*IndexTypeQuals=*/0);
     BPVD = createImplicitFirstprivateForType(
-        getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
+        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
     PVD = createImplicitFirstprivateForType(
-        getContext(), Data, BaseAndPointersType, CD, S.getBeginLoc());
+        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
     QualType SizesType = getContext().getConstantArrayType(
         getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1),
         ArrSize, nullptr, ArrayType::Normal,
         /*IndexTypeQuals=*/0);
     SVD = createImplicitFirstprivateForType(getContext(), Data, SizesType, CD,
                                             S.getBeginLoc());
+    MVD = createImplicitFirstprivateForType(
+        getContext(), Data, BaseAndPointerAndMapperType, CD, S.getBeginLoc());
     TargetScope.addPrivate(
         BPVD, [&InputInfo]() { return InputInfo.BasePointersArray; });
     TargetScope.addPrivate(PVD,
                            [&InputInfo]() { return InputInfo.PointersArray; });
     TargetScope.addPrivate(SVD,
                            [&InputInfo]() { return InputInfo.SizesArray; });
+    TargetScope.addPrivate(MVD,
+                           [&InputInfo]() { return InputInfo.MappersArray; });
   }
   (void)TargetScope.Privatize();
   // Build list of dependences.
@@ -4142,7 +4147,7 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
         Data.Dependences.emplace_back(C->getDependencyKind(), C->getModifier());
     DD.DepExprs.append(C->varlist_begin(), C->varlist_end());
   }
-  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD,
+  auto &&CodeGen = [&Data, &S, CS, &BodyGen, BPVD, PVD, SVD, MVD,
                     &InputInfo](CodeGenFunction &CGF, PrePostActionTy &Action) {
     // Set proper addresses for generated private copies.
     OMPPrivateScope Scope(CGF);
@@ -4183,6 +4188,8 @@ void CodeGenFunction::EmitOMPTargetTaskBasedDirective(
           CGF.GetAddrOfLocalVar(PVD), /*Index=*/0);
       InputInfo.SizesArray = CGF.Builder.CreateConstArrayGEP(
           CGF.GetAddrOfLocalVar(SVD), /*Index=*/0);
+      InputInfo.MappersArray = CGF.Builder.CreateConstArrayGEP(
+          CGF.GetAddrOfLocalVar(MVD), /*Index=*/0);
     }
 
     Action.Enter(CGF);
diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h
index d794f4f0fa815..77671b0a99529 100644
--- a/clang/lib/CodeGen/CodeGenFunction.h
+++ b/clang/lib/CodeGen/CodeGenFunction.h
@@ -3334,12 +3334,15 @@ class CodeGenFunction : public CodeGenTypeCache {
     Address BasePointersArray = Address::invalid();
     Address PointersArray = Address::invalid();
     Address SizesArray = Address::invalid();
+    Address MappersArray = Address::invalid();
     unsigned NumberOfTargetItems = 0;
     explicit OMPTargetDataInfo() = default;
     OMPTargetDataInfo(Address BasePointersArray, Address PointersArray,
-                      Address SizesArray, unsigned NumberOfTargetItems)
+                      Address SizesArray, Address MappersArray,
+                      unsigned NumberOfTargetItems)
         : BasePointersArray(BasePointersArray), PointersArray(PointersArray),
-          SizesArray(SizesArray), NumberOfTargetItems(NumberOfTargetItems) {}
+          SizesArray(SizesArray), MappersArray(MappersArray),
+          NumberOfTargetItems(NumberOfTargetItems) {}
   };
   void EmitOMPTargetTaskBasedDirective(const OMPExecutableDirective &S,
                                        const RegionCodeGenTy &BodyGen,
diff --git a/clang/test/OpenMP/capturing_in_templates.cpp b/clang/test/OpenMP/capturing_in_templates.cpp
index 97a935214bdb8..53a711da11137 100644
--- a/clang/test/OpenMP/capturing_in_templates.cpp
+++ b/clang/test/OpenMP/capturing_in_templates.cpp
@@ -18,7 +18,7 @@ pair<T1, T2> make_pair(T1 &&t1, T2 &&t2) {
 
 // CHECK-LABEL: @main
 int main(int argc, char **argv) {
-// CHECK: call i32 @__tgt_target(i64 -1, i8* @{{.+}}.region_id, i32 0, i8** null, i8** null, i64* null, i64* null)
+// CHECK: call i32 @__tgt_target_mapper(i64 -1, i8* @{{.+}}.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null)
 #pragma omp target
  {
     for (int i = 0; i < 64; ++i) {
diff --git a/clang/test/OpenMP/declare_mapper_codegen.cpp b/clang/test/OpenMP/declare_mapper_codegen.cpp
index f2ed4d2b9c487..ee64fe2099007 100644
--- a/clang/test/OpenMP/declare_mapper_codegen.cpp
+++ b/clang/test/OpenMP/declare_mapper_codegen.cpp
@@ -20,17 +20,42 @@
 // RUN: %clang_cc1 -DCK0 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 
 #ifdef CK0
+// Mapper function code generation and runtime interface.
 
 // CK0-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}.region_id = weak constant i8 0
 // CK0-64: [[SIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
 // CK0-32: [[SIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
 // CK0: [[TYPES:@.+]] = {{.+}}constant [1 x i64] [i64 35]
+// CK0-64: [[NWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[NWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[NWTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 35]
+// CK0-64: [[TEAMSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[TEAMSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[TEAMTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 33]
+// CK0-64: [[TEAMNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[TEAMNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[TEAMNWTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 33]
+// CK0-64: [[EDSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[EDSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[EDTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 33]
+// CK0-64: [[EDNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[EDNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[EDNWTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 33]
+// CK0-64: [[EXDSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[EXDSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[EXDTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 34]
+// CK0-64: [[EXDNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[EXDNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[EXDNWTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 34]
 // CK0-64: [[TSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
 // CK0-32: [[TSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
 // CK0: [[TTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 33]
 // CK0-64: [[FSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
 // CK0-32: [[FSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
 // CK0: [[FTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 34]
+// CK0-64: [[FNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 16]
+// CK0-32: [[FNWSIZES:@.+]] = {{.+}}constant [1 x i64] [i64 8]
+// CK0: [[FNWTYPES:@.+]] = {{.+}}constant [1 x i64] [i64 34]
 
 class C {
 public:
@@ -40,13 +65,15 @@ class C {
 
 #pragma omp declare mapper(id: C s) map(s.a, s.b[0:2])
 
-// CK0-LABEL: define {{.*}}void @.omp_mapper.{{.*}}C.id{{.*}}(i8*{{.*}}, i8*{{.*}}, i8*{{.*}}, i64{{.*}}, i64{{.*}})
+// CK0: define {{.*}}void [[MPRFUNC:@[.]omp_mapper[.].*C[.]id]](i8*{{.*}}, i8*{{.*}}, i8*{{.*}}, i64{{.*}}, i64{{.*}})
 // CK0: store i8* %{{[^,]+}}, i8** [[HANDLEADDR:%[^,]+]]
 // CK0: store i8* %{{[^,]+}}, i8** [[BPTRADDR:%[^,]+]]
 // CK0: store i8* %{{[^,]+}}, i8** [[VPTRADDR:%[^,]+]]
 // CK0: store i64 %{{[^,]+}}, i{{64|32}}* [[SIZEADDR:%[^,]+]]
 // CK0: store i64 %{{[^,]+}}, i64* [[TYPEADDR:%[^,]+]]
-// CK0-DAG: [[SIZE:%.+]] = load i64, i64* [[SIZEADDR]]
+// CK0-DAG: [[BYTESIZE:%.+]] = load i64, i64* [[SIZEADDR]]
+// CK0-64-DAG: [[SIZE:%.+]] = udiv exact i64 [[BYTESIZE]], 16
+// CK0-32-DAG: [[SIZE:%.+]] = udiv exact i64 [[BYTESIZE]], 8
 // CK0-DAG: [[TYPE:%.+]] = load i64, i64* [[TYPEADDR]]
 // CK0-DAG: [[HANDLE:%.+]] = load i8*, i8** [[HANDLEADDR]]
 // CK0-DAG: [[PTRBEGIN:%.+]] = bitcast i8** [[VPTRADDR]] to %class.C**
@@ -209,42 +236,175 @@ void foo(int a){
   C c;
   c.a = a;
 
-  // CK0-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK0-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
   // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
   // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
   // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
   // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
   // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
   // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
   // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
   // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]])
   #pragma omp target map(mapper(id),tofrom: c)
   {
-   ++c.a;
+    ++c.a;
   }
 
-  // CK0-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[TGEPBP:%.+]], i8** [[TGEPP:%.+]], i64* getelementptr {{.+}}[1 x i64]* [[TSIZES]], i32 0, i32 0), {{.+}}getelementptr {{.+}}[1 x i64]* [[TTYPES]]{{.+}})
+  // CK0-DAG: call i32 @__tgt_target_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[NWSIZES]]{{.+}}, {{.+}}[[NWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]])
+  #pragma omp target map(mapper(id),tofrom: c) nowait
+  {
+    ++c.a;
+  }
+
+  // CK0-DAG: call i32 @__tgt_target_teams_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[TEAMSIZES]]{{.+}}, {{.+}}[[TEAMTYPES]]{{.+}}, i8** [[MPRGEP:%.+]], i32 0, i32 0)
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]])
+  #pragma omp target teams map(mapper(id),to: c)
+  {
+    ++c.a;
+  }
+
+  // CK0-DAG: call i32 @__tgt_target_teams_nowait_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[TEAMNWSIZES]]{{.+}}, {{.+}}[[TEAMNWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]], i32 0, i32 0)
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  // CK0: call void [[KERNEL:@.+]](%class.C* [[VAL]])
+  #pragma omp target teams map(mapper(id),to: c) nowait
+  {
+    ++c.a;
+  }
+
+  // CK0-DAG: call void @__tgt_target_data_begin_mapper(i64 {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[EDSIZES]]{{.+}}, {{.+}}[[EDTYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  #pragma omp target enter data map(mapper(id),to: c)
+
+  // CK0-DAG: call void @__tgt_target_data_begin_nowait_mapper(i64 {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[EDNWSIZES]]{{.+}}, {{.+}}[[EDNWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  #pragma omp target enter data map(mapper(id),to: c) nowait
+
+  // CK0-DAG: call void @__tgt_target_data_end_mapper(i64 {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[EXDSIZES]]{{.+}}, {{.+}}[[EXDTYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  #pragma omp target exit data map(mapper(id),from: c)
+
+  // CK0-DAG: call void @__tgt_target_data_end_nowait_mapper(i64 {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[EXDNWSIZES]]{{.+}}, {{.+}}[[EXDNWTYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK0-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK0-DAG: [[MPRGEP]] = bitcast [1 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK0-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK0-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK0-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.C**
+  // CK0-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL:%[^,]+]], %class.C** [[CBP1]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[CP1]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR1]]
+  #pragma omp target exit data map(mapper(id),from: c) nowait
+
+  // CK0-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[TGEPBP:%.+]], i8** [[TGEPP:%.+]], i64* getelementptr {{.+}}[1 x i64]* [[TSIZES]], i32 0, i32 0), {{.+}}getelementptr {{.+}}[1 x i64]* [[TTYPES]]{{.+}}, i8** [[TMPRGEP:%.+]])
   // CK0-DAG: [[TGEPBP]] = getelementptr inbounds {{.+}}[[TBP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
   // CK0-DAG: [[TGEPP]] = getelementptr inbounds {{.+}}[[TP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[TMPRGEP]] = bitcast [1 x i8*]* [[TMPR:%[^,]+]] to i8**
   // CK0-DAG: [[TBP0:%.+]] = getelementptr inbounds {{.+}}[[TBP]], i{{.+}} 0, i{{.+}} 0
   // CK0-DAG: [[TP0:%.+]] = getelementptr inbounds {{.+}}[[TP]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[TMPR1:%.+]] = getelementptr inbounds {{.+}}[[TMPR]], i[[sz]] 0, i[[sz]] 0
   // CK0-DAG: [[TCBP0:%.+]] = bitcast i8** [[TBP0]] to %class.C**
   // CK0-DAG: [[TCP0:%.+]] = bitcast i8** [[TP0]] to %class.C**
   // CK0-DAG: store %class.C* [[VAL]], %class.C** [[TCBP0]]
   // CK0-DAG: store %class.C* [[VAL]], %class.C** [[TCP0]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[TMPR1]]
   #pragma omp target update to(mapper(id): c)
 
-  // CK0-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[FGEPBP:%.+]], i8** [[FGEPP:%.+]], i64* getelementptr {{.+}}[1 x i64]* [[FSIZES]], i32 0, i32 0), {{.+}}getelementptr {{.+}}[1 x i64]* [[FTYPES]]{{.+}})
+  // CK0-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[FGEPBP:%.+]], i8** [[FGEPP:%.+]], i64* getelementptr {{.+}}[1 x i64]* [[FSIZES]], i32 0, i32 0), {{.+}}getelementptr {{.+}}[1 x i64]* [[FTYPES]]{{.+}}, i8** [[FMPRGEP:%.+]])
   // CK0-DAG: [[FGEPBP]] = getelementptr inbounds {{.+}}[[FBP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
   // CK0-DAG: [[FGEPP]] = getelementptr inbounds {{.+}}[[FP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FMPRGEP]] = bitcast [1 x i8*]* [[FMPR:%[^,]+]] to i8**
   // CK0-DAG: [[FBP0:%.+]] = getelementptr inbounds {{.+}}[[FBP]], i{{.+}} 0, i{{.+}} 0
   // CK0-DAG: [[FP0:%.+]] = getelementptr inbounds {{.+}}[[FP]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FMPR1:%.+]] = getelementptr inbounds {{.+}}[[FMPR]], i[[sz]] 0, i[[sz]] 0
   // CK0-DAG: [[FCBP0:%.+]] = bitcast i8** [[FBP0]] to %class.C**
   // CK0-DAG: [[FCP0:%.+]] = bitcast i8** [[FP0]] to %class.C**
   // CK0-DAG: store %class.C* [[VAL]], %class.C** [[FCBP0]]
   // CK0-DAG: store %class.C* [[VAL]], %class.C** [[FCP0]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[FMPR1]]
   #pragma omp target update from(mapper(id): c)
+
+  // CK0-DAG: call void @__tgt_target_data_update_nowait_mapper(i64 -1, i32 1, i8** [[FGEPBP:%.+]], i8** [[FGEPP:%.+]], i64* getelementptr {{.+}}[1 x i64]* [[FNWSIZES]], i32 0, i32 0), {{.+}}getelementptr {{.+}}[1 x i64]* [[FNWTYPES]]{{.+}}, i8** [[FMPRGEP:%.+]])
+  // CK0-DAG: [[FGEPBP]] = getelementptr inbounds {{.+}}[[FBP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FGEPP]] = getelementptr inbounds {{.+}}[[FP:%[^,]+]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FMPRGEP]] = bitcast [1 x i8*]* [[FMPR:%[^,]+]] to i8**
+  // CK0-DAG: [[FBP0:%.+]] = getelementptr inbounds {{.+}}[[FBP]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FP0:%.+]] = getelementptr inbounds {{.+}}[[FP]], i{{.+}} 0, i{{.+}} 0
+  // CK0-DAG: [[FMPR1:%.+]] = getelementptr inbounds {{.+}}[[FMPR]], i[[sz]] 0, i[[sz]] 0
+  // CK0-DAG: [[FCBP0:%.+]] = bitcast i8** [[FBP0]] to %class.C**
+  // CK0-DAG: [[FCP0:%.+]] = bitcast i8** [[FP0]] to %class.C**
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[FCBP0]]
+  // CK0-DAG: store %class.C* [[VAL]], %class.C** [[FCP0]]
+  // CK0-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[FMPR1]]
+  #pragma omp target update from(mapper(id): c) nowait
 }
 
 
@@ -257,7 +417,7 @@ void foo(int a){
 // CK0: {{.+}} = add nsw i32 [[VAL]], 1
 // CK0: }
 
-#endif
+#endif // CK0
 
 
 ///==========================================================================///
@@ -276,6 +436,7 @@ void foo(int a){
 // RUN: %clang_cc1 -DCK1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
 
 #ifdef CK1
+// C++ template
 
 template <class T>
 class C {
@@ -291,7 +452,8 @@ class C {
 // CK1: store i8* %{{[^,]+}}, i8** [[VPTRADDR:%[^,]+]]
 // CK1: store i64 %{{[^,]+}}, i{{64|32}}* [[SIZEADDR:%[^,]+]]
 // CK1: store i64 %{{[^,]+}}, i64* [[TYPEADDR:%[^,]+]]
-// CK1-DAG: [[SIZE:%.+]] = load i64, i64* [[SIZEADDR]]
+// CK1-DAG: [[BYTESIZE:%.+]] = load i64, i64* [[SIZEADDR]]
+// CK1-DAG: [[SIZE:%.+]] = udiv exact i64 [[BYTESIZE]], 4
 // CK1-DAG: [[TYPE:%.+]] = load i64, i64* [[TYPEADDR]]
 // CK1-DAG: [[HANDLE:%.+]] = load i8*, i8** [[HANDLEADDR]]
 // CK1-DAG: [[PTRBEGIN:%.+]] = bitcast i8** [[VPTRADDR]] to %class.C**
@@ -409,6 +571,272 @@ class C {
 // CK1: [[DONE]]
 // CK1: ret void
 
-#endif
+#endif // CK1
+
+
+///==========================================================================///
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix CK2 --check-prefix CK2-64 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix CK2 --check-prefix CK2-64 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix CK2 --check-prefix CK2-32 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix CK2 --check-prefix CK2-32 %s
+
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK2 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK2 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
+#ifdef CK2
+// Nested mappers.
+
+class B {
+public:
+  double a;
+};
+
+class C {
+public:
+  double a;
+  B b;
+};
+
+#pragma omp declare mapper(B s) map(s.a)
+
+#pragma omp declare mapper(id: C s) map(s.b)
+
+// CK2: define {{.*}}void [[BMPRFUNC:@[.]omp_mapper[.].*B[.]default]](i8*{{.*}}, i8*{{.*}}, i8*{{.*}}, i64{{.*}}, i64{{.*}})
+
+// CK2-LABEL: define {{.*}}void @.omp_mapper.{{.*}}C{{.*}}.id(i8*{{.*}}, i8*{{.*}}, i8*{{.*}}, i64{{.*}}, i64{{.*}})
+// CK2: store i8* %{{[^,]+}}, i8** [[HANDLEADDR:%[^,]+]]
+// CK2: store i8* %{{[^,]+}}, i8** [[BPTRADDR:%[^,]+]]
+// CK2: store i8* %{{[^,]+}}, i8** [[VPTRADDR:%[^,]+]]
+// CK2: store i64 %{{[^,]+}}, i{{64|32}}* [[SIZEADDR:%[^,]+]]
+// CK2: store i64 %{{[^,]+}}, i64* [[TYPEADDR:%[^,]+]]
+// CK2-DAG: [[BYTESIZE:%.+]] = load i64, i64* [[SIZEADDR]]
+// CK2-DAG: [[SIZE:%.+]] = udiv exact i64 [[BYTESIZE]], 16
+// CK2-DAG: [[TYPE:%.+]] = load i64, i64* [[TYPEADDR]]
+// CK2-DAG: [[HANDLE:%.+]] = load i8*, i8** [[HANDLEADDR]]
+// CK2-DAG: [[PTRBEGIN:%.+]] = bitcast i8** [[VPTRADDR]] to %class.C**
+// CK2-DAG: [[PTREND:%.+]] = getelementptr %class.C*, %class.C** [[PTRBEGIN]], i64 [[SIZE]]
+// CK2-DAG: [[BPTR:%.+]] = load i8*, i8** [[BPTRADDR]]
+// CK2-DAG: [[BEGIN:%.+]] = load i8*, i8** [[VPTRADDR]]
+// CK2: [[ISARRAY:%.+]] = icmp sge i64 [[SIZE]], 1
+// CK2: br i1 [[ISARRAY]], label %[[INITEVALDEL:[^,]+]], label %[[LHEAD:[^,]+]]
+
+// CK2: [[INITEVALDEL]]
+// CK2: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8
+// CK2: [[ISNOTDEL:%.+]] = icmp eq i64 [[TYPEDEL]], 0
+// CK2: br i1 [[ISNOTDEL]], label %[[INIT:[^,]+]], label %[[LHEAD:[^,]+]]
+// CK2: [[INIT]]
+// CK2-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 16
+// CK2-DAG: [[ITYPE:%.+]] = and i64 [[TYPE]], -4
+// CK2: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[ITYPE]])
+// CK2: br label %[[LHEAD:[^,]+]]
+
+// CK2: [[LHEAD]]
+// CK2: [[ISEMPTY:%.+]] = icmp eq %class.C** [[PTRBEGIN]], [[PTREND]]
+// CK2: br i1 [[ISEMPTY]], label %[[DONE:[^,]+]], label %[[LBODY:[^,]+]]
+// CK2: [[LBODY]]
+// CK2: [[PTR:%.+]] = phi %class.C** [ [[PTRBEGIN]], %[[LHEAD]] ], [ [[PTRNEXT:%.+]], %[[LCORRECT:[^,]+]] ]
+// CK2: [[OBJ:%.+]] = load %class.C*, %class.C** [[PTR]]
+// CK2-DAG: [[BBEGIN:%.+]] = getelementptr inbounds %class.C, %class.C* [[OBJ]], i32 0, i32 1
+// CK2-DAG: [[BEND:%.+]] = getelementptr %class.B, %class.B* [[BBEGIN]], i32 1
+// CK2-DAG: [[BBEGINV:%.+]] = bitcast %class.B* [[BBEGIN]] to i8*
+// CK2-DAG: [[BENDV:%.+]] = bitcast %class.B* [[BEND]] to i8*
+// CK2-DAG: [[BBEGINI:%.+]] = ptrtoint i8* [[BBEGINV]] to i64
+// CK2-DAG: [[BENDI:%.+]] = ptrtoint i8* [[BENDV]] to i64
+// CK2-DAG: [[BSIZE:%.+]] = sub i64 [[BENDI]], [[BBEGINI]]
+// CK2-DAG: [[BUSIZE:%.+]] = sdiv exact i64 [[BSIZE]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
+// CK2-DAG: [[BPTRADDR0BC:%.+]] = bitcast %class.C* [[OBJ]] to i8*
+// CK2-DAG: [[PTRADDR0BC:%.+]] = bitcast %class.B* [[BBEGIN]] to i8*
+// CK2-DAG: [[PRESIZE:%.+]] = call i64 @__tgt_mapper_num_components(i8* [[HANDLE]])
+// CK2-DAG: [[SHIPRESIZE:%.+]] = shl i64 [[PRESIZE]], 48
+// CK2-DAG: br label %[[MEMBER:[^,]+]]
+// CK2-DAG: [[MEMBER]]
+// CK2-DAG: br i1 true, label %[[LTYPE:[^,]+]], label %[[MEMBERCOM:[^,]+]]
+// CK2-DAG: [[MEMBERCOM]]
+// CK2-DAG: [[MEMBERCOMTYPE:%.+]] = add nuw i64 32, [[SHIPRESIZE]]
+// CK2-DAG: br label %[[LTYPE]]
+// CK2-DAG: [[LTYPE]]
+// CK2-DAG: [[MEMBERTYPE:%.+]] = phi i64 [ 32, %[[MEMBER]] ], [ [[MEMBERCOMTYPE]], %[[MEMBERCOM]] ]
+// CK2-DAG: [[TYPETF:%.+]] = and i64 [[TYPE]], 3
+// CK2-DAG: [[ISALLOC:%.+]] = icmp eq i64 [[TYPETF]], 0
+// CK2-DAG: br i1 [[ISALLOC]], label %[[ALLOC:[^,]+]], label %[[ALLOCELSE:[^,]+]]
+// CK2-DAG: [[ALLOC]]
+// CK2-DAG: [[ALLOCTYPE:%.+]] = and i64 [[MEMBERTYPE]], -4
+// CK2-DAG: br label %[[TYEND:[^,]+]]
+// CK2-DAG: [[ALLOCELSE]]
+// CK2-DAG: [[ISTO:%.+]] = icmp eq i64 [[TYPETF]], 1
+// CK2-DAG: br i1 [[ISTO]], label %[[TO:[^,]+]], label %[[TOELSE:[^,]+]]
+// CK2-DAG: [[TO]]
+// CK2-DAG: [[TOTYPE:%.+]] = and i64 [[MEMBERTYPE]], -3
+// CK2-DAG: br label %[[TYEND]]
+// CK2-DAG: [[TOELSE]]
+// CK2-DAG: [[ISFROM:%.+]] = icmp eq i64 [[TYPETF]], 2
+// CK2-DAG: br i1 [[ISFROM]], label %[[FROM:[^,]+]], label %[[TYEND]]
+// CK2-DAG: [[FROM]]
+// CK2-DAG: [[FROMTYPE:%.+]] = and i64 [[MEMBERTYPE]], -2
+// CK2-DAG: br label %[[TYEND]]
+// CK2-DAG: [[TYEND]]
+// CK2-DAG: [[TYPE0:%.+]] = phi i64 [ [[ALLOCTYPE]], %[[ALLOC]] ], [ [[TOTYPE]], %[[TO]] ], [ [[FROMTYPE]], %[[FROM]] ], [ [[MEMBERTYPE]], %[[TOELSE]] ]
+// CK2-64: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTRADDR0BC]], i8* [[PTRADDR0BC]], i64 [[BUSIZE]], i64 [[TYPE0]])
+// CK2-DAG: [[BPTRADDR1BC:%.+]] = bitcast %class.C* [[OBJ]] to i8*
+// CK2-DAG: [[PTRADDR1BC:%.+]] = bitcast %class.B* [[BBEGIN]] to i8*
+// CK2-DAG: br label %[[MEMBER:[^,]+]]
+// CK2-DAG: [[MEMBER]]
+// CK2-DAG: br i1 false, label %[[LTYPE:[^,]+]], label %[[MEMBERCOM:[^,]+]]
+// CK2-DAG: [[MEMBERCOM]]
+// 281474976710659 == 0x1,000,000,003
+// CK2-DAG: [[MEMBERCOMTYPE:%.+]] = add nuw i64 281474976710659, [[SHIPRESIZE]]
+// CK2-DAG: br label %[[LTYPE]]
+// CK2-DAG: [[LTYPE]]
+// CK2-DAG: [[MEMBERTYPE:%.+]] = phi i64 [ 281474976710659, %[[MEMBER]] ], [ [[MEMBERCOMTYPE]], %[[MEMBERCOM]] ]
+// CK2-DAG: [[TYPETF:%.+]] = and i64 [[TYPE]], 3
+// CK2-DAG: [[ISALLOC:%.+]] = icmp eq i64 [[TYPETF]], 0
+// CK2-DAG: br i1 [[ISALLOC]], label %[[ALLOC:[^,]+]], label %[[ALLOCELSE:[^,]+]]
+// CK2-DAG: [[ALLOC]]
+// CK2-DAG: [[ALLOCTYPE:%.+]] = and i64 [[MEMBERTYPE]], -4
+// CK2-DAG: br label %[[TYEND:[^,]+]]
+// CK2-DAG: [[ALLOCELSE]]
+// CK2-DAG: [[ISTO:%.+]] = icmp eq i64 [[TYPETF]], 1
+// CK2-DAG: br i1 [[ISTO]], label %[[TO:[^,]+]], label %[[TOELSE:[^,]+]]
+// CK2-DAG: [[TO]]
+// CK2-DAG: [[TOTYPE:%.+]] = and i64 [[MEMBERTYPE]], -3
+// CK2-DAG: br label %[[TYEND]]
+// CK2-DAG: [[TOELSE]]
+// CK2-DAG: [[ISFROM:%.+]] = icmp eq i64 [[TYPETF]], 2
+// CK2-DAG: br i1 [[ISFROM]], label %[[FROM:[^,]+]], label %[[TYEND]]
+// CK2-DAG: [[FROM]]
+// CK2-DAG: [[FROMTYPE:%.+]] = and i64 [[MEMBERTYPE]], -2
+// CK2-DAG: br label %[[TYEND]]
+// CK2-DAG: [[TYEND]]
+// CK2-DAG: [[TYPE1:%.+]] = phi i64 [ [[ALLOCTYPE]], %[[ALLOC]] ], [ [[TOTYPE]], %[[TO]] ], [ [[FROMTYPE]], %[[FROM]] ], [ [[MEMBERTYPE]], %[[TOELSE]] ]
+// CK2: call void [[BMPRFUNC]](i8* [[HANDLE]], i8* [[BPTRADDR1BC]], i8* [[PTRADDR1BC]], i64 8, i64 [[TYPE1]])
+// CK2: [[PTRNEXT]] = getelementptr %class.C*, %class.C** [[PTR]], i32 1
+// CK2: [[ISDONE:%.+]] = icmp eq %class.C** [[PTRNEXT]], [[PTREND]]
+// CK2: br i1 [[ISDONE]], label %[[LEXIT:[^,]+]], label %[[LBODY]]
+
+// CK2: [[LEXIT]]
+// CK2: [[ISARRAY:%.+]] = icmp sge i64 [[SIZE]], 1
+// CK2: br i1 [[ISARRAY]], label %[[EVALDEL:[^,]+]], label %[[DONE]]
+// CK2: [[EVALDEL]]
+// CK2: [[TYPEDEL:%.+]] = and i64 [[TYPE]], 8
+// CK2: [[ISDEL:%.+]] = icmp ne i64 [[TYPEDEL]], 0
+// CK2: br i1 [[ISDEL]], label %[[DEL:[^,]+]], label %[[DONE]]
+// CK2: [[DEL]]
+// CK2-DAG: [[ARRSIZE:%.+]] = mul nuw i64 [[SIZE]], 16
+// CK2-DAG: [[DTYPE:%.+]] = and i64 [[TYPE]], -4
+// CK2: call void @__tgt_push_mapper_component(i8* [[HANDLE]], i8* [[BPTR]], i8* [[BEGIN]], i64 [[ARRSIZE]], i64 [[DTYPE]])
+// CK2: br label %[[DONE]]
+// CK2: [[DONE]]
+// CK2: ret void
+
+#endif // CK2
+
+
+///==========================================================================///
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix CK3 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix CK3 %s
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix CK3 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix CK3 %s
+
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK3 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm -femit-all-decls -disable-llvm-passes %s -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -femit-all-decls -disable-llvm-passes -o %t %s
+// RUN: %clang_cc1 -DCK3 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -femit-all-decls -disable-llvm-passes -include-pch %t -verify %s -emit-llvm -o - | FileCheck --check-prefix SIMD-ONLY0 %s
+
+#ifdef CK3
+// map of array sections and nested components.
+
+// CK3-LABEL: @.__omp_offloading_{{.*}}foo{{.*}}.region_id = weak constant i8 0
+// CK3: [[TYPES:@.+]] = {{.+}}constant [3 x i64] [i64 32, i64 281474976710659, i64 35]
+
+class C {
+public:
+  int a;
+  double *b;
+};
+
+class B {
+public:
+  C c;
+};
+
+#pragma omp declare mapper(id: C s) map(s.a, s.b[0:2])
+
+// CK3: define {{.*}}void [[MPRFUNC:@[.]omp_mapper[.].*C[.]id]](i8*{{.*}}, i8*{{.*}}, i8*{{.*}}, i64{{.*}}, i64{{.*}})
+
+// CK3-LABEL: define {{.*}}void @{{.*}}foo{{.*}}
+void foo(int a){
+  // CK3-DAG: [[CVAL:%.+]] = alloca [10 x %class.C]
+  // CK3-DAG: [[BVAL:%.+]] = alloca %class.B
+  C c[10];
+  B b;
+
+  // CK3-DAG: [[BC:%.+]] = getelementptr inbounds %class.B, %class.B* [[BVAL]], i32 0, i32 0
+  // CK3-DAG: [[BCEND:%.+]] = getelementptr %class.C, %class.C* [[BC]], i32 1
+  // CK3-DAG: [[BCC:%.+]] = bitcast %class.C* [[BC]] to i8*
+  // CK3-DAG: [[BCENDC:%.+]] = bitcast %class.C* [[BCEND]] to i8*
+  // CK3-DAG: [[BCI:%.+]] = ptrtoint i8* [[BCC]] to i64
+  // CK3-DAG: [[BCENDI:%.+]] = ptrtoint i8* [[BCENDC]] to i64
+  // CK3-DAG: [[BSIZE:%.+]] = sub i64 [[BCENDI]], [[BCI]]
+  // CK3-DAG: [[BSIZED:%.+]] = sdiv exact i64 [[BSIZE]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64)
+
+  // CK3-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}}, i8** [[MPRGEP:%.+]])
+  // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
+  // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
+  // CK3-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SIZES:%[^,]+]], i32 0, i32 0
+  // CK3-DAG: [[MPRGEP]] = bitcast [3 x i8*]* [[MPR:%[^,]+]] to i8**
+  // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
+  // CK3-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 0
+  // CK3-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[SIZES]], i32 0, i32 0
+  // CK3-DAG: [[MPR1:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 0
+  // CK3-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to %class.B**
+  // CK3-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to %class.C**
+  // CK3-DAG: store %class.B* [[BVAL]], %class.B** [[CBP1]]
+  // CK3-DAG: store %class.C* [[BC]], %class.C** [[CP1]]
+  // CK3-DAG: store i64 [[BSIZED]], i64* [[S1]]
+  // CK3-DAG: store i8* null, i8** [[MPR1]]
+  // CK3-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 1
+  // CK3-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 1
+  // CK3-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[SIZES]], i32 0, i32 1
+  // CK3-DAG: [[MPR2:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 1
+  // CK3-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to %class.B**
+  // CK3-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to %class.C**
+  // CK3-DAG: store %class.B* [[BVAL]], %class.B** [[CBP2]]
+  // CK3-DAG: store %class.C* [[BC]], %class.C** [[CP2]]
+  // CK3-64-DAG: store i64 16, i64* [[S2]]
+  // CK3-32-DAG: store i64 8, i64* [[S2]]
+  // CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR2]]
+  // CK3-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
+  // CK3-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[PS]], i32 0, i32 2
+  // CK3-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[SIZES]], i32 0, i32 2
+  // CK3-DAG: [[MPR3:%.+]] = getelementptr inbounds {{.+}}[[MPR]], i{{64|32}} 0, i{{64|32}} 2
+  // CK3-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to [10 x %class.C]**
+  // CK3-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to %class.C**
+  // CK3-DAG: store [10 x %class.C]* [[CVAL]], [10 x %class.C]** [[CBP3]]
+  // CK3-DAG: [[CVALGEP:%.+]] = getelementptr inbounds {{.+}}[[CVAL]], i{{64|32}} 0, i{{64|32}} 0
+  // CK3-DAG: store %class.C* [[CVALGEP]], %class.C** [[CP3]]
+  // CK3-64-DAG: store i64 160, i64* [[S3]]
+  // CK3-32-DAG: store i64 80, i64* [[S3]]
+  // CK3-DAG: store i8* bitcast (void (i8*, i8*, i8*, i64, i64)* [[MPRFUNC]] to i8*), i8** [[MPR3]]
+  // CK3: call void [[KERNEL:@.+]](%class.B* [[BVAL]], [10 x %class.C]* [[CVAL]])
+  #pragma omp target map(mapper(id),tofrom: c[0:10], b.c)
+  for (int i = 0; i < 10; i++) {
+    b.c.a += ++c[i].a;
+  }
+}
+
+
+// CK3: define internal void [[KERNEL]](%class.B* {{[^,]+}}, [10 x %class.C]* {{[^,]+}})
+
+#endif // CK3
 
-#endif
+#endif // HEADER
diff --git a/clang/test/OpenMP/declare_target_link_codegen.cpp b/clang/test/OpenMP/declare_target_link_codegen.cpp
index ff0c6096c8422..dd62fa9d67308 100644
--- a/clang/test/OpenMP/declare_target_link_codegen.cpp
+++ b/clang/test/OpenMP/declare_target_link_codegen.cpp
@@ -77,9 +77,9 @@ int maini1() {
 
 // HOST: [[BP0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BASEPTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
 // HOST: [[P0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PTRS]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-// HOST: call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP0]], i8** [[P0]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPTYPES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0))
+// HOST: call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP0]], i8** [[P0]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPTYPES]], i{{[0-9]+}} 0, i{{[0-9]+}} 0), i8** null)
 // HOST: call void @__omp_offloading_{{.*}}_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* %{{[^,]+}})
-// HOST: call i32 @__tgt_target_teams(i64 -1, i8* @.__omp_offloading_{{.+}}_l47.region_id, i32 2, {{.+}})
+// HOST: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.__omp_offloading_{{.+}}_l47.region_id, i32 2, {{.+}})
 
 // HOST: define internal void @__omp_offloading_{{.*}}_{{.*}}maini1{{.*}}_l42(i32* nonnull align {{[0-9]+}} dereferenceable{{.*}})
 // HOST: [[C:%.*]] = load i32, i32* @c,
diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp
index 4e8bcb44f63df..d4484ce377c87 100644
--- a/clang/test/OpenMP/distribute_codegen.cpp
+++ b/clang/test/OpenMP/distribute_codegen.cpp
@@ -278,7 +278,7 @@ void test_precond() {
 
 // HCHECK: load i16, i16*
 // HCHECK: store i16 %
-// HCHECK: call i32 @__tgt_target_teams(
+// HCHECK: call i32 @__tgt_target_teams_mapper(
 // HCHECK: call void @__kmpc_for_static_init_4(
 template <typename T>
 T ftemplate() {
diff --git a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
index 9aac97da86c21..5d0905231c0aa 100644
--- a/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_firstprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -209,7 +209,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -310,7 +310,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
index 278e18ec31157..9c45186522697 100644
--- a/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_lastprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -200,7 +200,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -304,7 +304,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
index 6b57290f32ef2..e0e2515fe630b 100644
--- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp
@@ -110,25 +110,25 @@ int main() {
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
 
     // no schedule clauses
@@ -877,25 +877,25 @@ int main() {
 #else
   // CHECK-LABEL: @main
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
 
   // CHECK: call{{.+}} [[TMAIN:@.+]]()
@@ -1627,25 +1627,25 @@ int main() {
 // check code
 // CHECK: define{{.+}} [[TMAIN]]()
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
 
 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
diff --git a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
index ed1b9774149aa..ee24b1cc18251 100644
--- a/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_firstprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -275,7 +275,7 @@ int main() {
 // CHECK-LABEL: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 
@@ -462,7 +462,7 @@ int main() {
 // CHECK-LABEL: define{{.*}} i{{[0-9]+}} @{{.+}}tmain{{.+}}()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
index 7e7f24f3c9e8e..770a93bef5f73 100644
--- a/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_if_codegen.cpp
@@ -23,9 +23,9 @@ int Arg;
 void gtid_test() {
 #pragma omp target
 #pragma omp teams
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp distribute parallel for
   for(int i = 0 ; i < 100; i++) {}
@@ -87,11 +87,11 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target
diff --git a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
index 087aef8b2b41a..5b24ede6adde3 100644
--- a/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_lastprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -269,7 +269,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -484,7 +484,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
index 953d52b35f9b1..61763660b7dfa 100644
--- a/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_num_threads_codegen.cpp
@@ -44,9 +44,9 @@ int tmain() {
 int main() {
   S s(0);
   char a = s;
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
 // CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
@@ -82,16 +82,16 @@ int main() {
 
 // tmain 5
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
 
 // tmain 1
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
 
 // CHECK: define internal void [[T_OFFLOADING_FUN_0]](
diff --git a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
index a731031db7341..40bd67d685ab4 100644
--- a/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_private_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
@@ -164,7 +164,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
@@ -247,7 +247,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
 // CHECK: ret
diff --git a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
index 3e2a65e47f0e9..b04948acab1b6 100644
--- a/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_proc_bind_codegen.cpp
@@ -49,9 +49,9 @@ int main() {
   return tmain<int>();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -81,7 +81,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
index dc2a3ca5350fd..6aa2f4b3ef129 100644
--- a/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_reduction_codegen.cpp
@@ -45,7 +45,7 @@ int main() {
 }
 
 // CHECK-LABEL: main
-// CHECK: call{{.+}} @__tgt_target_teams(
+// CHECK: call{{.+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFL:@.+]](
 // CHECK: call{{.+}} [[TMAIN:@.+]](i{{32|64}}
 // CHECK: ret
@@ -64,7 +64,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]](i{{32|64}}
-// CHECK: call{{.+}} @__tgt_target_teams(
+// CHECK: call{{.+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[TOFFL:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
index 66576e1e8e241..fba510bea51f0 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp
@@ -109,25 +109,25 @@ int main() {
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_1:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_2:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_3:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_4:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_5:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_6:@.+]](
 
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN_7:@.+]](
 
     // no schedule clauses
@@ -876,25 +876,25 @@ int main() {
 #else
   // CHECK-LABEL: @main
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
 
-  // CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+  // CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
   // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
 
   // CHECK: call{{.+}} [[TMAIN:@.+]]()
@@ -1626,25 +1626,25 @@ int main() {
 // check code
 // CHECK: define{{.+}} [[TMAIN]]()
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_3:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_4:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_5:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_6:@.+]](
 
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_7:@.+]](
 
 // CHECK: define{{.+}} void [[OFFLOADING_FUN_1]](
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
index 8e96aa51c5323..9a605e0ce0211 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -274,7 +274,7 @@ int main() {
 // CHECK-LABEL: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
 
@@ -463,7 +463,7 @@ int main() {
 // CHECK-LABEL: define{{.*}} i{{[0-9]+}} @{{.+}}tmain{{.+}}()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
index dc8230d42ccac..cb10008f2d8a8 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_if_codegen.cpp
@@ -29,9 +29,9 @@ int Arg;
 void gtid_test() {
 #pragma omp target
 #pragma omp teams
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp distribute parallel for simd
   for(int i = 0 ; i < 100; i++) {}
@@ -93,11 +93,11 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
index 9a36bbf29cae5..ff766b5e1a921 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -279,7 +279,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -494,7 +494,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
index 69b833eaed241..375c1d2fb8f59 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -44,9 +44,9 @@ int tmain() {
 int main() {
   S s(0);
   char a = s;
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
 // CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
@@ -82,16 +82,16 @@ int main() {
 
 // tmain 5
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
 
 // tmain 1
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
 
 // CHECK: define internal void [[T_OFFLOADING_FUN_0]](
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
index 629e023b6597f..9900cb3726014 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_private_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
@@ -164,7 +164,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_0:@.+]](
 
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_DESTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
@@ -247,7 +247,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: call {{.*}} [[S_INT_TY_DEF_DESTR:@.+]]([[S_INT_TY]]* [[TEST]])
 // CHECK: ret
diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
index 716d7d7fa2e9a..5af0d797c0017 100644
--- a/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -49,9 +49,9 @@ int main() {
   return tmain<int>();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -81,7 +81,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/distribute_private_codegen.cpp b/clang/test/OpenMP/distribute_private_codegen.cpp
index c470e8f012227..02b267734fa0d 100644
--- a/clang/test/OpenMP/distribute_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_private_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
@@ -159,7 +159,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -191,7 +191,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp
index 7229c8095f0e2..32257a1ac119d 100644
--- a/clang/test/OpenMP/distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_codegen.cpp
@@ -312,7 +312,7 @@ void test_precond() {
 
 // HCHECK: load i16, i16*
 // HCHECK: store i16 %
-// HCHECK: call i32 @__tgt_target_teams(
+// HCHECK: call i32 @__tgt_target_teams_mapper(
 // HCHECK: call void @__kmpc_for_static_init_4(
 template <typename T>
 T ftemplate() {
diff --git a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
index 60557c1bfb650..2857e56022d3a 100644
--- a/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_firstprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -208,7 +208,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -306,7 +306,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
index 264b0e14ac449..2c6869feb2fc4 100644
--- a/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_lastprivate_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -208,7 +208,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -311,7 +311,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_simd_private_codegen.cpp b/clang/test/OpenMP/distribute_simd_private_codegen.cpp
index f675158be694f..30773ea489a1a 100644
--- a/clang/test/OpenMP/distribute_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_private_codegen.cpp
@@ -70,7 +70,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]]()
@@ -159,7 +159,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -191,7 +191,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
index 63fb75e000cbb..d24a47be3e355 100644
--- a/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/distribute_simd_reduction_codegen.cpp
@@ -46,7 +46,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -123,7 +123,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -169,7 +169,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp
index 8fe918b043cf4..38e9f0d03efd6 100644
--- a/clang/test/OpenMP/nvptx_lambda_capturing.cpp
+++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp
@@ -131,7 +131,7 @@ int main(int argc, char **argv) {
 
 // HOST-LABEL: @main
 
-// HOST-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 11, i8** [[BASES:%.+]], i8** [[PTRS:%.+]],
+// HOST-DAG: call i32 @__tgt_target_mapper(i64 -1, i8* @{{.+}}, i32 11, i8** [[BASES:%.+]], i8** [[PTRS:%.+]],
 // HOST-DAG: [[BASES:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR:%.+]], i32 0, i32 0
 // HOST-DAG: [[PTRS:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[PTR_PTR:%.+]], i32 0, i32 0
 // HOST-DAG: [[BASE_REF:%.+]] = getelementptr inbounds [11 x i8*], [11 x i8*]* [[BASE_PTR]], i32 0, i32 6
diff --git a/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp
index 033794726ff0c..3a7b0d8fbaf4f 100644
--- a/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp
+++ b/clang/test/OpenMP/nvptx_lambda_pointer_capturing.cpp
@@ -78,7 +78,7 @@ int main()
 // actual target invocation
 // CHECK: [[BASES_GEP:%.+]] = getelementptr {{.+}} [3 x {{.+}}*], [3 x {{.+}}*]* [[BASE_PTRS]], {{.+}} 0, {{.+}} 0
 // CHECK: [[PTRS_GEP:%.+]] = getelementptr {{.+}} [3 x {{.+}}*], [3 x {{.+}}*]* [[PTRS]], {{.+}} 0, {{.+}} 0
-// CHECK: {{%.+}} = call{{.+}} @__tgt_target_teams({{.+}}, {{.+}}, {{.+}}, i8** [[BASES_GEP]], i8** [[PTRS_GEP]], i[[PTRSZ]]* getelementptr inbounds ([3 x i{{.+}}], [3 x i{{.+}}]* [[SIZES]], i{{.+}} 0, i{{.+}} 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[TYPES]], i{{.+}} 0, i{{.+}} 0), {{.+}}, {{.+}})
+// CHECK: {{%.+}} = call{{.+}} @__tgt_target_teams_mapper({{.+}}, {{.+}}, {{.+}}, i8** [[BASES_GEP]], i8** [[PTRS_GEP]], i[[PTRSZ]]* getelementptr inbounds ([3 x i{{.+}}], [3 x i{{.+}}]* [[SIZES]], i{{.+}} 0, i{{.+}} 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[TYPES]], i{{.+}} 0, i{{.+}} 0), i8** null, {{.+}}, {{.+}})
 
 
   omp_loop(0,100,body);
@@ -120,6 +120,6 @@ int main()
 // actual target invocation
 // CHECK: [[BASES_GEP:%.+]] = getelementptr {{.+}} [5 x {{.+}}*], [5 x {{.+}}*]* [[BASE_PTRS]], {{.+}} 0, {{.+}} 0
 // CHECK: [[PTRS_GEP:%.+]] = getelementptr {{.+}} [5 x {{.+}}*], [5 x {{.+}}*]* [[PTRS]], {{.+}} 0, {{.+}} 0
-// CHECK: {{%.+}} = call{{.+}} @__tgt_target_teams({{.+}}, {{.+}}, {{.+}}, i8** [[BASES_GEP]], i8** [[PTRS_GEP]], i[[PTRSZ]]* getelementptr inbounds ([5 x i{{.+}}], [5 x i{{.+}}]* [[SIZES_TEMPLATE]], i{{.+}} 0, i{{.+}} 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[TYPES_TEMPLATE]], i{{.+}} 0, i{{.+}} 0), {{.+}}, {{.+}})
+// CHECK: {{%.+}} = call{{.+}} @__tgt_target_teams_mapper({{.+}}, {{.+}}, {{.+}}, i8** [[BASES_GEP]], i8** [[PTRS_GEP]], i[[PTRSZ]]* getelementptr inbounds ([5 x i{{.+}}], [5 x i{{.+}}]* [[SIZES_TEMPLATE]], i{{.+}} 0, i{{.+}} 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[TYPES_TEMPLATE]], i{{.+}} 0, i{{.+}} 0), i8** null, {{.+}}, {{.+}})
 
 #endif
diff --git a/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp b/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
index 877aa7ab0b622..90fc2b21c2f6e 100644
--- a/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
+++ b/clang/test/OpenMP/nvptx_target_requires_unified_shared_memory.cpp
@@ -71,7 +71,7 @@ int bar(int n){
 // CHECK-HOST: [[BPTR7:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_BASEPTRS]], i32 0, i32 0
 // CHECK-HOST: [[BPTR8:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[OFFLOAD_PTRS]], i32 0, i32 0
 
-// CHECK-HOST: call i32 @__tgt_target(i64 -1, i8* @{{.*}}.region_id, i32 2, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0))
+// CHECK-HOST: call i32 @__tgt_target_mapper(i64 -1, i8* @{{.*}}.region_id, i32 2, i8** [[BPTR7]], i8** [[BPTR8]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[OFFLOAD_MAPTYPES]], i32 0, i32 0), i8** null)
 
 // CHECK-DEVICE: [[VAR_LINK:@.+]] = weak global double* null
 // CHECK-DEVICE: [[VAR_TO:@.+]] = weak global double* null
diff --git a/clang/test/OpenMP/openmp_offload_codegen.cpp b/clang/test/OpenMP/openmp_offload_codegen.cpp
index 8df1745054d32..4e3cf82bf8311 100644
--- a/clang/test/OpenMP/openmp_offload_codegen.cpp
+++ b/clang/test/OpenMP/openmp_offload_codegen.cpp
@@ -35,8 +35,9 @@ void target_maps_parallel_integer(int a){
 // CK1: [[GEPOP:%.+]] = getelementptr inbounds {{.*}}
 // CK1: [[GEPOPBIT:%.+]] = bitcast i8** [[GEPOP]]
 // CK1: store i32* %ParamToKernel, i32** [[GEPOPBIT]]
+// CK1: [[GEPMAPPERARG:%.+]] = getelementptr inbounds {{.*}}
 // CK1: [[GEPOBPARG:%.+]] = getelementptr inbounds {{.*}}
 // CK1: [[GEPOPARG:%.+]] = getelementptr inbounds {{.*}}
-// CK1: call {{.*}}tgt_target({{.*}}i8** [[GEPOBPARG]], i8** [[GEPOPARG]]
+// CK1: call {{.*}}tgt_target_mapper({{.*}}i8** [[GEPOBPARG]], i8** [[GEPOPARG]]{{.*}}, i8** null)
 
 #endif
diff --git a/clang/test/OpenMP/target_codegen.cpp b/clang/test/OpenMP/target_codegen.cpp
index b8fd7cf34b37a..9cec6bfa5a48f 100644
--- a/clang/test/OpenMP/target_codegen.cpp
+++ b/clang/test/OpenMP/target_codegen.cpp
@@ -127,7 +127,7 @@ int foo(int n) {
   // CHECK:       store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
   // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -142,7 +142,7 @@ int foo(int n) {
   // CHECK-DAG:   store i32 [[ADD]], i32* [[DEVICE_CAP:%.+]],
   // CHECK-DAG:   [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK-DAG:   [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** null)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -178,7 +178,7 @@ int foo(int n) {
     global += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i8** null)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -202,7 +202,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -258,7 +258,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0))
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -537,7 +537,7 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i8** null)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -617,7 +617,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -668,7 +668,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0))
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -724,7 +724,7 @@ int bar(int n){
 
 // OMP45:       [[BPR:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
 // OMP45:       [[PR:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
-// OMP45:       [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET9]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT10]], i32 0, i32 0))
+// OMP45:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET9]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT10]], i32 0, i32 0), i8** null)
 // OMP45-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // OMP45-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // OMP45:       [[FAIL]]
@@ -819,7 +819,7 @@ int bar(int n){
 
 // OMP50:       [[BPR:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
 // OMP50:       [[PR:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
-// OMP50:       [[RET:%.+]] = call i32 @__tgt_target(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET9]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT10]], i32 0, i32 0))
+// OMP50:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET9]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT10]], i32 0, i32 0), i8** null)
 // OMP50-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // OMP50-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // OMP50:       [[FAIL]]
diff --git a/clang/test/OpenMP/target_data_codegen.cpp b/clang/test/OpenMP/target_data_codegen.cpp
index f9257615ce405..274b3e16b2f69 100644
--- a/clang/test/OpenMP/target_data_codegen.cpp
+++ b/clang/test/OpenMP/target_data_codegen.cpp
@@ -50,7 +50,7 @@ void foo(int arg) {
   float lb[arg];
 
   // Region 00
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
   // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -65,7 +65,7 @@ void foo(int arg) {
 
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
   // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
@@ -81,7 +81,7 @@ void foo(int arg) {
   // Region 02
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -100,7 +100,7 @@ void foo(int arg) {
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   // CK1: br label %[[IFEND:[^,]+]]
@@ -111,7 +111,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 03
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -129,7 +129,7 @@ void foo(int arg) {
   // CK1-32-DAG: [[CSVAL032]] = mul nuw i32 %{{[^,]+}}, 4
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]]
@@ -140,7 +140,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -162,7 +162,7 @@ void foo(int arg) {
 
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   #pragma omp target data map(to: gb.b[:3])
@@ -172,7 +172,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 05
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -190,7 +190,7 @@ void foo(int arg) {
   // CK1-32-DAG: [[CSVAL032]] = mul nuw i32 %{{[^,]+}}, 4
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]]
@@ -201,7 +201,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 06
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -219,7 +219,7 @@ void foo(int arg) {
   // CK1-32-DAG: [[CSVAL032]] = mul nuw i32 %{{[^,]+}}, 4
   // CK1: %{{.+}} = add nsw i32 %{{[^,]+}}, 1
 
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S]]
@@ -269,7 +269,7 @@ int bar(int arg){
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: call void @__tgt_target_data_begin_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]]
@@ -307,7 +307,7 @@ int bar(int arg){
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: call void @__tgt_target_data_end_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
@@ -387,7 +387,7 @@ int bar(int arg){
 // Region 00
 // CK4: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK4: [[IFTHEN]]
-// CK4-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK4-DAG: call void @__tgt_target_data_begin_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK4-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK4-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK4-DAG: [[GEPBP]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]]
@@ -425,7 +425,7 @@ int bar(int arg){
 // CK4: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 
 // CK4: [[IFTHEN]]
-// CK4-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK4-DAG: call void @__tgt_target_data_end_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK4-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK4-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK4-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP]]
diff --git a/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp b/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
index 4598c0e91220a..a73566ba09fd2 100644
--- a/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
+++ b/clang/test/OpenMP/target_data_use_device_addr_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
 // CHECK: store float* [[VLA_ADDR]], float** [[PTR4_VLA_ADDR]],
 // CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
 // CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
-// CHECK: call void @__tgt_target_data_begin(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0))
+// CHECK: call void @__tgt_target_data_begin_mapper(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0), i8** null)
 // CHECK: [[A_REF:%.+]] = load float*, float** [[BPTR0_A_ADDR]],
 // CHECK: [[REF_REF:%.+]] = load float*, float** [[BPTR2_REF_ADDR]],
 // CHECK: store float* [[REF_REF]], float** [[TMP_REF_ADDR:%.+]],
@@ -113,7 +113,7 @@ int main() {
 // CHECK: store float [[INC]], float* [[VLA0_ADDR]],
 // CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
 // CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
-// CHECK: call void @__tgt_target_data_end(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0))
+// CHECK: call void @__tgt_target_data_end_mapper(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZES1]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES1]], i32 0, i32 0), i8** null)
 
 // CHECK: foo
 // %this.addr = alloca %struct.S*, align 8
@@ -187,7 +187,7 @@ int main() {
 // CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
 // CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
 // CHECK: [[SIZE:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0
-// CHECK: call void @__tgt_target_data_begin(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0))
+// CHECK: call void @__tgt_target_data_begin_mapper(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0), i8** null)
 // CHECK: [[A_ADDR:%.+]] = load i32*, i32** [[BPTR1_A_ADDR]],
 // CHECK: store i32* [[A_ADDR]], i32** [[A_REF:%.+]],
 // CHECK: [[PTR_ADDR:%.+]] = load i32**, i32*** [[BPTR2_PTR_ADDR]],
@@ -219,6 +219,6 @@ int main() {
 // CHECK: [[BPTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BPTRS]], i32 0, i32 0
 // CHECK: [[PTR:%.+]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[PTRS]], i32 0, i32 0
 // CHECK: [[SIZE:%.+]] = getelementptr inbounds [5 x i64], [5 x i64]* [[SIZES]], i32 0, i32 0
-// CHECK: call void @__tgt_target_data_end(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0))
+// CHECK: call void @__tgt_target_data_end_mapper(i64 -1, i32 5, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPTYPES2]], i32 0, i32 0), i8** null)
 
 #endif
diff --git a/clang/test/OpenMP/target_defaultmap_codegen.cpp b/clang/test/OpenMP/target_defaultmap_codegen.cpp
index 3deff63273d53..f6119570974fc 100644
--- a/clang/test/OpenMP/target_defaultmap_codegen.cpp
+++ b/clang/test/OpenMP/target_defaultmap_codegen.cpp
@@ -30,7 +30,7 @@
 void implicit_maps_double_complex (int a){
   double _Complex dc = (double)a;
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -80,7 +80,7 @@ void implicit_maps_double_complex (int a){
 void implicit_maps_double_complex (int a){
   double _Complex dc = (double)a;
 
-  // CK2-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -130,7 +130,7 @@ void implicit_maps_double_complex (int a){
 void implicit_maps_double_complex (int a){
   double _Complex dc = (double)a;
 
-  // CK3-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK3-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -185,7 +185,7 @@ void implicit_maps_double_complex (int a){
 void implicit_maps_double (int a){
   double d = (double)a;
 
-  // CK4-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK4-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK4-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK4-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -252,7 +252,7 @@ void implicit_maps_double (int a){
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK5-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK5-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK5-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK5-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK5-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -303,7 +303,7 @@ void implicit_maps_array (int a){
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK6-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK6-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK6-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK6-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK6-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -354,7 +354,7 @@ void implicit_maps_array (int a){
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK7-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK7-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK7-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK7-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK7-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -405,7 +405,7 @@ void implicit_maps_array (int a){
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK8-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK8-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK8-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK8-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK8-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -459,7 +459,7 @@ void zero_size_section_and_private_maps (int ii){
   int pvtArr[10];
 
   // Region 09
-  // CK9-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK9-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK9-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK9-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -508,7 +508,7 @@ void zero_size_section_and_private_maps (int ii){
 void explicit_maps_single (){
   int *pa;
 
-  // CK10-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE]]{{.+}})
+  // CK10-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE]]{{.+}}, i8** null)
   // CK10-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK10-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -556,7 +556,7 @@ void explicit_maps_single (){
 void explicit_maps_single (){
   int *pa;
 
-  // CK11-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK11-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK11-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK11-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -604,7 +604,7 @@ void explicit_maps_single (){
 void explicit_maps_single (){
   int *pa;
 
-  // CK12-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK12-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK12-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK12-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -652,7 +652,7 @@ void explicit_maps_single (){
 void explicit_maps_single (){
   int *pa;
 
-  // CK13-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK13-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK13-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK13-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -700,7 +700,7 @@ void explicit_maps_single (){
 void explicit_maps_single (){
   int *pa;
 
-  // CK14-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK14-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK14-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK14-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -750,7 +750,7 @@ void explicit_maps_single (){
 void implicit_maps_variable_length_array (int a){
   double vla[2][a];
 
-  // CK15-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK15-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SS:%[^,]+]], i32 0, i32 0
@@ -835,7 +835,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK16-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK16-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK16-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK16-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK16-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -892,7 +892,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK17-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK17-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK17-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK17-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK17-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -949,7 +949,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK18-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK18-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK18-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK18-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK18-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1006,7 +1006,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK19-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK19-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK19-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1062,7 +1062,7 @@ void implicit_maps_struct (int a){
 void implicit_maps_double (int a){
   double d = (double)a;
 
-  // CK20-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK20-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK20-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK20-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1135,7 +1135,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK21-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK21-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK21-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK21-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1186,7 +1186,7 @@ void implicit_maps_struct (int a){
 void implicit_maps_pointer (){
   double *ddyn;
 
-  // CK22-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK22-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK22-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK22-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1258,7 +1258,7 @@ void foo(float *&lr, T *&tr) {
   float *l;
   T *t;
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1275,7 +1275,7 @@ void foo(float *&lr, T *&tr) {
     ++g;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1292,7 +1292,7 @@ void foo(float *&lr, T *&tr) {
     ++l;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1309,7 +1309,7 @@ void foo(float *&lr, T *&tr) {
     ++t;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1327,7 +1327,7 @@ void foo(float *&lr, T *&tr) {
     ++lr;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1345,7 +1345,7 @@ void foo(float *&lr, T *&tr) {
     ++tr;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1363,7 +1363,7 @@ void foo(float *&lr, T *&tr) {
     ++tr;
   }
 
-  // CK23-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}})
+  // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}}, i8** null)
   // CK23-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK23-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1428,7 +1428,7 @@ void explicit_maps_single (int ii){
 
   // Close.
   // Region 00
-  // CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1447,7 +1447,7 @@ void explicit_maps_single (int ii){
 
   // Always Close.
   // Region 01
-  // CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1538,7 +1538,7 @@ void declare_target_link()
 #pragma omp target defaultmap(none:scalar) defaultmap(none:aggregate) defaultmap(none:pointer)
   {
 
-    // CK26-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+    // CK26-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
     // CK26-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
     // CK26-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
     // CK26-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 2
diff --git a/clang/test/OpenMP/target_depend_codegen.cpp b/clang/test/OpenMP/target_depend_codegen.cpp
index e8b07ace5fb05..97999eadf38c7 100644
--- a/clang/test/OpenMP/target_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_depend_codegen.cpp
@@ -121,7 +121,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START:%.+]], i[[SZ]] 1
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* [[DEP_START]], i[[SZ]] 2
@@ -178,7 +178,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -195,7 +195,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]])
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_device_codegen.cpp b/clang/test/OpenMP/target_device_codegen.cpp
index 8117540d39396..4da7677e4ce7d 100644
--- a/clang/test/OpenMP/target_device_codegen.cpp
+++ b/clang/test/OpenMP/target_device_codegen.cpp
@@ -18,7 +18,7 @@ void foo(int n) {
   // CHECK:       store i32 [[N]], i32* [[DEVICE_CAP:%.+]],
   // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -31,7 +31,7 @@ void foo(int n) {
   // CHECK:       store i32 [[N]], i32* [[DEVICE_CAP:%.+]],
   // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -40,9 +40,9 @@ void foo(int n) {
   // CHECK:       [[END]]
   #pragma omp target device(device_num: n)
   ;
-  // CHECK-NOT:   call i32 @__tgt_target(
+  // CHECK-NOT:   call i32 @__tgt_target_mapper(
   // CHECK:       call void @__omp_offloading_{{.+}}_l46()
-  // CHECK-NOT:   call i32 @__tgt_target(
+  // CHECK-NOT:   call i32 @__tgt_target_mapper(
   #pragma omp target device(ancestor: n)
   ;
 }
diff --git a/clang/test/OpenMP/target_enter_data_codegen.cpp b/clang/test/OpenMP/target_enter_data_codegen.cpp
index 1bb2f76207ffc..541ea57b17e5b 100644
--- a/clang/test/OpenMP/target_enter_data_codegen.cpp
+++ b/clang/test/OpenMP/target_enter_data_codegen.cpp
@@ -50,7 +50,7 @@ void foo(int arg) {
   float lb[arg];
 
   // Region 00
-  // CK1-DAG: call void @__tgt_target_data_begin_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
   // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -76,7 +76,7 @@ void foo(int arg) {
   // Region 02
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -100,7 +100,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 03
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -125,7 +125,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -155,7 +155,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 05
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -180,7 +180,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 06
-  // CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -244,7 +244,7 @@ int bar(int arg){
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: call void @__tgt_target_data_begin_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -394,7 +394,7 @@ int bar(int arg){
 // Region 00
 // CK5: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK5: [[IFTHEN]]
-// CK5-DAG: call void @__tgt_target_data_begin(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK5-DAG: call void @__tgt_target_data_begin_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK5-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK5-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK5-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
diff --git a/clang/test/OpenMP/target_enter_data_depend_codegen.cpp b/clang/test/OpenMP/target_enter_data_depend_codegen.cpp
index 83e4cf8a89601..72d7ab933e977 100644
--- a/clang/test/OpenMP/target_enter_data_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_enter_data_depend_codegen.cpp
@@ -64,7 +64,7 @@ void foo(int arg) {
   // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]],
   // CK1: [[DEV1:%.+]] = load i32, i32* %{{.+}}
   // CK1: [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -130,7 +130,7 @@ void foo(int arg) {
   // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1
   // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8
   // CK1: store i8 [[IF]], i8* [[IF_DEVICE]],
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -213,7 +213,7 @@ void foo(int arg) {
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
   // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[S]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -298,7 +298,7 @@ void foo(int arg) {
   // CK1: store double* %{{.+}}, double** [[P1_BC]],
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|52}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{104|60}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -373,55 +373,61 @@ void foo(int arg) {
 }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_begin_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_begin_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_begin(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1-NOT: __tgt_target_data_end
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [2 x i64]*, [2 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [2 x i8*]*, [2 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]], [2 x i8*]** [[M_PRIV]])
 // CK1-NOT: __tgt_target_data_end
 // CK1: ret i32 0
 // CK1: }
diff --git a/clang/test/OpenMP/target_exit_data_codegen.cpp b/clang/test/OpenMP/target_exit_data_codegen.cpp
index c045373b0e6f4..4413ce12a0363 100644
--- a/clang/test/OpenMP/target_exit_data_codegen.cpp
+++ b/clang/test/OpenMP/target_exit_data_codegen.cpp
@@ -51,7 +51,7 @@ void foo(int arg) {
 
   // Region 00
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
   // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -77,7 +77,7 @@ void foo(int arg) {
   // CK1-NOT: __tgt_target_data_begin
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -101,7 +101,7 @@ void foo(int arg) {
 
   // Region 03
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -126,7 +126,7 @@ void foo(int arg) {
 
   // Region 04
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -156,7 +156,7 @@ void foo(int arg) {
 
   // Region 05
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -181,7 +181,7 @@ void foo(int arg) {
 
   // Region 06
   // CK1-NOT: __tgt_target_data_begin
-  // CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -245,7 +245,7 @@ int bar(int arg){
 // CK2-NOT: __tgt_target_data_begin
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:.+]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: call void @__tgt_target_data_end_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:.+]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -349,7 +349,7 @@ int bar(int arg){
 // CK4-NOT: __tgt_target_data_begin
 // CK4: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK4: [[IFTHEN]]
-// CK4-DAG: call void @__tgt_target_data_end(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:.+]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK4-DAG: call void @__tgt_target_data_end_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:.+]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK4-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK4-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK4-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
diff --git a/clang/test/OpenMP/target_exit_data_depend_codegen.cpp b/clang/test/OpenMP/target_exit_data_depend_codegen.cpp
index f5dec5e6ea91f..72be1090ca1b2 100644
--- a/clang/test/OpenMP/target_exit_data_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_exit_data_depend_codegen.cpp
@@ -64,7 +64,7 @@ void foo(int arg) {
   // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]],
   // CK1: [[DEV1:%.+]] = load i32, i32* %{{.+}}
   // CK1: [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -130,7 +130,7 @@ void foo(int arg) {
   // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1
   // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8
   // CK1: store i8 [[IF]], i8* [[IF_DEVICE]],
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -213,7 +213,7 @@ void foo(int arg) {
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
   // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[S]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -298,7 +298,7 @@ void foo(int arg) {
   // CK1: store double* %{{.+}}, double** [[P1_BC]],
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|52}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{104|60}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -373,56 +373,62 @@ void foo(int arg) {
 }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_end_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_end_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_end(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_end_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
-// CK1-NOT: __tgt_target_data_end
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
+// CK1-NOT: __tgt_target_data_end_mapper
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_end_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [2 x i64]*, [2 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]])
-// CK1-NOT: __tgt_target_data_end
+// CK1-DAG: [[M]] = load [2 x i8*]*, [2 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]], [2 x i8*]** [[M_PRIV]])
+// CK1-NOT: __tgt_target_data_end_mapper
 // CK1: ret i32 0
 // CK1: }
 
diff --git a/clang/test/OpenMP/target_firstprivate_codegen.cpp b/clang/test/OpenMP/target_firstprivate_codegen.cpp
index 895bff64b7270..e4a1a0302411d 100644
--- a/clang/test/OpenMP/target_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_firstprivate_codegen.cpp
@@ -130,7 +130,7 @@ int foo(int n, double *ptr) {
   // CHECK:  store i32* [[P_PTR]], i32** [[PCAST_TOPTR2]],
   // CHECK:  [[BASE_PTR_GEP_ARG:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[PTR_GEP_ARG:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK:  {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 2, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0))
+  // CHECK:  {{.+}} = call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 2, i8** [[BASE_PTR_GEP_ARG]], i8** [[PTR_GEP_ARG]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** null)
 
   // TCHECK:  define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], i32** nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[P_IN:%.+]])
   // TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
@@ -254,7 +254,7 @@ int foo(int n, double *ptr) {
   // CHECK:  [[BASE_PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BASE_PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[PTR_GEP_ARG2:%.+]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[PTR_ARR2]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[SIZES_ARG2:%.+]] = getelementptr inbounds [9 x i[[SZ]]], [9 x i[[SZ]]]* [[SIZET2]],  i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK: {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 9, i8** [[BASE_PTR_GEP_ARG2]], i8** [[PTR_GEP_ARG2]], i[[SZ]]* [[SIZES_ARG2]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT2]], i32 0, i32 0))
+  // CHECK: {{.+}} = call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 9, i8** [[BASE_PTR_GEP_ARG2]], i8** [[PTR_GEP_ARG2]], i[[SZ]]* [[SIZES_ARG2]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT2]], i32 0, i32 0), i8** null)
 
   // make sure that firstprivate variables are generated in all cases and that we use those instances for operations inside the
   // target region
@@ -353,7 +353,7 @@ int foo(int n, double *ptr) {
 
   // CHECK:  [[BASE_PTR_GEP_ARG3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BASE_PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
   // CHECK:  [[PTR_GEP_ARG3:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTR_ARR3]], i{{[0-9]+}} 0, i{{[0-9]+}} 0
-  // CHECK: {{.+}} = call i32 @__tgt_target(i64 -1, {{.+}}, i32 2, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0))
+  // CHECK: {{.+}} = call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 2, i8** [[BASE_PTR_GEP_ARG3]], i8** [[PTR_GEP_ARG3]], i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null)
 
   // TCHECK:  define weak void @__omp_offloading_{{.+}}(double* [[PTR_IN:%.+]], [[TTII]]* nonnull align {{[0-9]+}} dereferenceable({{[0-9]+}}) [[E:%.+]])
   // TCHECK-NOT: alloca [[TTII]],
@@ -500,7 +500,7 @@ struct S1 {
   // CHECK:  store i{{[0-9]+}} [[B_SIZE:%.+]], i{{[0-9]+}}* [[SIZES_GEP4_4]],
 
   // only check that we use the map types stored in the global variable
-  // CHECK:  call i32 @__tgt_target(i64 -1, {{.+}}, i32 6, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT4]], i32 0, i32 0))
+  // CHECK:  call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 6, i8** {{.+}}, i8** {{.+}}, i{{[0-9]+}}* {{.+}}, i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT4]], i32 0, i32 0), i8** null)
 
   // TCHECK: define weak void @__omp_offloading_{{.+}}([[S1]]* [[TH:%.+]], i{{[0-9]+}} [[B_IN:%.+]], i{{[0-9]+}} [[VLA:%.+]], i{{[0-9]+}} [[VLA1:%.+]], i{{[0-9]+}}{{.+}} [[C_IN:%.+]])
   // TCHECK:  [[TH_ADDR:%.+]] = alloca [[S1]]*,
@@ -572,7 +572,7 @@ struct S1 {
   // CHECK:  store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]],
 
   // only check that the right sizes and map types are used
-  // CHECK:  call i32 @__tgt_target(i64 -1, {{.+}}, i32 3, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0))
+  // CHECK:  call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 3, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([3 x i[[SZ]]], [3 x i[[SZ]]]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null)
 };
 
 int bar(int n, double *ptr) {
@@ -608,7 +608,7 @@ int bar(int n, double *ptr) {
 // CHECK:  [[BCAST_TOPTR:%.+]] = bitcast i8** [[PTRS_GEP6_1]] to [10 x i{{[0-9]+}}]**
 // CHECK:  store [10 x i{{[0-9]+}}]* [[B]], [10 x i{{[0-9]+}}]** [[BCAST_TOPTR]],
 
-// CHECK:  call i32 @__tgt_target(i64 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT6]], i32 0, i32 0))
+// CHECK:  call i32 @__tgt_target_mapper(i64 -1, {{.+}}, i32 2, i8** {{.+}}, i8** {{.+}}, i[[SZ]]* getelementptr inbounds ([2 x i[[SZ]]], [2 x i[[SZ]]]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT6]], i32 0, i32 0), i8** null)
 
 // TCHECK: define weak void @__omp_offloading_{{.+}}(i{{[0-9]+}} [[A_IN:%.+]], [10 x i{{[0-9]+}}]*{{.+}} [[B_IN:%.+]])
 // TCHECK:  [[A_ADDR:%.+]] = alloca i{{[0-9]+}},
diff --git a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp
index 90514acadf815..7c2eef577f9f3 100644
--- a/clang/test/OpenMP/target_is_device_ptr_codegen.cpp
+++ b/clang/test/OpenMP/target_is_device_ptr_codegen.cpp
@@ -49,7 +49,7 @@ void foo(float *&lr, T *&tr) {
   float *l;
   T *t;
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES00]]{{.+}}, {{.+}}[[TYPES00]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -66,7 +66,7 @@ void foo(float *&lr, T *&tr) {
     ++g;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES01]]{{.+}}, {{.+}}[[TYPES01]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -83,7 +83,7 @@ void foo(float *&lr, T *&tr) {
     ++l;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES02]]{{.+}}, {{.+}}[[TYPES02]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -100,7 +100,7 @@ void foo(float *&lr, T *&tr) {
     ++t;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES03]]{{.+}}, {{.+}}[[TYPES03]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -118,7 +118,7 @@ void foo(float *&lr, T *&tr) {
     ++lr;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES04]]{{.+}}, {{.+}}[[TYPES04]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -136,7 +136,7 @@ void foo(float *&lr, T *&tr) {
     ++tr;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES05]]{{.+}}, {{.+}}[[TYPES05]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -154,7 +154,7 @@ void foo(float *&lr, T *&tr) {
     ++tr;
   }
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES06]]{{.+}}, {{.+}}[[TYPES06]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -231,7 +231,7 @@ struct ST {
   void foo(double *&arg) {
     int *la = 0;
 
-    // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
     // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -246,7 +246,7 @@ struct ST {
       a++;
     }
 
-    // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
     // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -261,7 +261,7 @@ struct ST {
       b++;
     }
 
-    // CK2-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
     // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK2-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
diff --git a/clang/test/OpenMP/target_map_codegen.cpp b/clang/test/OpenMP/target_map_codegen.cpp
index 92e0224a2de3b..69d0fc3c5f30c 100644
--- a/clang/test/OpenMP/target_map_codegen.cpp
+++ b/clang/test/OpenMP/target_map_codegen.cpp
@@ -51,7 +51,7 @@ void implicit_maps_integer (int a){
   B::modify(a);
   int i = a;
 
-  // CK1-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK1-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK1-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK1-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -111,7 +111,7 @@ void implicit_maps_integer (int a){
 // CK2-LABEL: implicit_maps_reference{{.*}}(
 void implicit_maps_reference (int a, int *b){
   int &i = a;
-  // CK2-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -131,7 +131,7 @@ void implicit_maps_reference (int a, int *b){
   }
 
   int *&p = b;
-  // CK2-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}})
+  // CK2-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES2]]{{.+}}, {{.+}}[[TYPES2]]{{.+}}, i8** null)
   // CK2-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK2-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -197,7 +197,7 @@ void implicit_maps_reference (int a, int *b){
 // CK3-LABEL: implicit_maps_parameter{{.*}}(
 void implicit_maps_parameter (int a){
 
-  // CK3-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK3-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK3-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK3-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK3-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -259,7 +259,7 @@ void implicit_maps_nested_integer (int a){
   // CK4: define internal void [[KERNELP1]](i32* {{[^,]+}}, i32* {{[^,]+}}, i32* {{[^,]+}})
   #pragma omp parallel
   {
-    // CK4-DAG: call i32 @__tgt_target_teams(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i32 1, i32 0)
+    // CK4-DAG: call i32 @__tgt_target_teams_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null, i32 1, i32 0)
     // CK4-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
     // CK4-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
     // CK4-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -323,7 +323,7 @@ void implicit_maps_nested_integer_and_enum (int a){
   // Using an enum should not change the mapping information.
   int  i = a;
 
-  // CK5-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK5-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK5-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK5-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK5-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -378,7 +378,7 @@ void implicit_maps_nested_integer_and_enum (int a){
 // CK6-LABEL: implicit_maps_host_global{{.*}}(
 int Gi;
 void implicit_maps_host_global (int a){
-  // CK6-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK6-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK6-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK6-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK6-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -440,7 +440,7 @@ void implicit_maps_host_global (int a){
 void implicit_maps_double (int a){
   double d = (double)a;
 
-  // CK7-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK7-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK7-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK7-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK7-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -507,7 +507,7 @@ void implicit_maps_double (int a){
 void implicit_maps_float (int a){
   float f = (float)a;
 
-  // CK8-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK8-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK8-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK8-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK8-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -561,7 +561,7 @@ void implicit_maps_float (int a){
 void implicit_maps_array (int a){
   double darr[2] = {(double)a, (double)a};
 
-  // CK9-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK9-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK9-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK9-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK9-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -612,7 +612,7 @@ void implicit_maps_array (int a){
 void implicit_maps_pointer (){
   double *ddyn;
 
-  // CK10-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK10-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK10-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK10-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK10-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -664,7 +664,7 @@ void implicit_maps_pointer (){
 void implicit_maps_double_complex (int a, int *b){
   double _Complex dc = (double)a;
 
-  // CK11-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK11-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 2, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK11-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK11-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK11-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -719,7 +719,7 @@ void implicit_maps_double_complex (int a, int *b){
 void implicit_maps_float_complex (int a){
   float _Complex fc = (float)a;
 
-  // CK12-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK12-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK12-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK12-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK12-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -788,7 +788,7 @@ void implicit_maps_float_complex (int a){
 void implicit_maps_variable_length_array (int a){
   double vla[2][a];
 
-  // CK13-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK13-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 3, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SGEP:%[^,]+]], {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK13-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK13-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK13-DAG: [[SGEP]] = getelementptr inbounds {{.+}}[[SS:%[^,]+]], i32 0, i32 0
@@ -889,7 +889,7 @@ void implicit_maps_class (int a){
   SSS sss(a, (double)a);
 
   // CK14: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
-  // CK14-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK14-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:%[^,]+]], {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK14-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK14-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK14-DAG: [[SIZES]] = getelementptr inbounds {{.+}}[[S:%[^,]+]], i32 0, i32 0
@@ -1009,7 +1009,7 @@ void implicit_maps_templated_class (int a){
   SSST<123> ssst(a, (double)a);
 
   // CK15: define {{.*}}void @{{.+}}foo{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
-  // CK15-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:%[^,]+]], {{.+}}[[TYPES]]{{.+}})
+  // CK15-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:%[^,]+]], {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[SIZES]] = getelementptr inbounds {{.+}}[[S:%[^,]+]], i32 0, i32 0
@@ -1057,7 +1057,7 @@ void implicit_maps_templated_class (int a){
   ssst.foo(456);
 
   // CK15: define {{.*}}void @{{.+}}bar{{.+}}([[ST]]* {{[^,]+}}, i32 {{[^,]+}})
-  // CK15-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:[^,]+]], {{.+}}[[TYPES2]]{{.+}})
+  // CK15-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 4, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], i64* [[SIZES:[^,]+]], {{.+}}[[TYPES2]]{{.+}}, i8** null)
   // CK15-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK15-DAG: [[SIZES]] = getelementptr inbounds {{.+}}[[S:%[^,]+]], i32 0, i32 0
@@ -1164,7 +1164,7 @@ void implicit_maps_templated_function (int a){
   int i = a;
 
   // CK16: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}})
-  // CK16-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK16-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK16-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK16-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
 
@@ -1222,7 +1222,7 @@ class SSS {
 void implicit_maps_struct (int a){
   SSS s = {a, (double)a};
 
-  // CK17-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK17-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK17-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK17-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK17-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -1281,7 +1281,7 @@ void implicit_maps_template_type_capture (int a){
   int i = a;
 
   // CK18: define {{.*}}i32 @{{.+}}foo{{.+}}(i32 {{[^,]+}})
-  // CK18-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}})
+  // CK18-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZES]]{{.+}}, {{.+}}[[TYPES]]{{.+}}, i8** null)
   // CK18-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK18-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
 
@@ -1499,7 +1499,7 @@ void explicit_maps_single (int ii){
   int a = ii;
 
   // Region 00
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1520,7 +1520,7 @@ void explicit_maps_single (int ii){
   int b = a;
 
   // Region 00n
-  // CK19-DAG: call i32 @__tgt_target_teams(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00n]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00n]]{{.+}}, i32 1, i32 0)
+  // CK19-DAG: call i32 @__tgt_target_teams_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00n]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00n]]{{.+}}, i8** null, i32 1, i32 0)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1542,7 +1542,7 @@ void explicit_maps_single (int ii){
   int arra[100];
 
   // Region 01
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1560,7 +1560,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 02
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1579,7 +1579,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 03
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1598,7 +1598,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 04
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1617,7 +1617,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 05
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1636,7 +1636,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 06
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1659,7 +1659,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 07
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1682,7 +1682,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 08
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1704,7 +1704,7 @@ void explicit_maps_single (int ii){
   int *pa;
 
   // Region 09
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1722,7 +1722,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 10
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1743,7 +1743,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 11
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1764,7 +1764,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 12
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1785,7 +1785,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 13
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1810,7 +1810,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 14
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1835,7 +1835,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 15
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE15]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE15]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1859,7 +1859,7 @@ void explicit_maps_single (int ii){
   int va[ii];
 
   // Region 16
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1890,7 +1890,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 17
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1916,7 +1916,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 18
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1942,7 +1942,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 19
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1974,7 +1974,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 20
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2000,7 +2000,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 21
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2032,7 +2032,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 22
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2059,7 +2059,7 @@ void explicit_maps_single (int ii){
 
   // Always.
   // Region 23
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE23]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE23]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE23]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2081,7 +2081,7 @@ void explicit_maps_single (int ii){
   int ***mptr;
 
   // Region 24
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE24]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE24]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE24]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2099,7 +2099,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 25
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE25]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE25]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE25]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE25]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2120,7 +2120,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 26
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE26]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE26]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE26]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE26]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2141,7 +2141,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 27
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE27]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE27]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE27]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE27]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2162,7 +2162,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 28
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE28]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE28]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE28]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE28]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2207,7 +2207,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 29
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE29]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE29]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE29]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE29]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2255,7 +2255,7 @@ void explicit_maps_single (int ii){
   double mva[23][ii][ii+5];
 
   // Region 30
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE30]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE30]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2308,7 +2308,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 31
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE31]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE31]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE31]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE31]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   //
@@ -2357,7 +2357,7 @@ void explicit_maps_single (int ii){
   double ***mptras;
 
   // Region 32
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE32]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE32]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE32]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE32]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2375,7 +2375,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 33
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE33]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE33]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE33]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE33]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2394,7 +2394,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 34
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE34]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE34]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE34]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE34]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2413,7 +2413,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 35
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE35]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE35]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2438,7 +2438,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 36
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE36]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE36]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE36]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE36]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2459,7 +2459,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 37
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE37]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE37]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2499,7 +2499,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 38
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE38]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE38]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2541,7 +2541,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 39
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE39]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE39]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2583,7 +2583,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 40
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE40]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE40]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2625,7 +2625,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 41
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE41]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE41]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE41]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE41]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   //
@@ -2660,7 +2660,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 42
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE42]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE42]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE42]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE42]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2705,7 +2705,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 43 - the memory is not contiguous for this map - will map the whole last dimension.
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE43]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE43]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -2730,7 +2730,7 @@ void explicit_maps_single (int ii){
   }
 
   // Region 44
-  // CK19-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE44]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE44]]{{.+}})
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE44]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE44]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2839,7 +2839,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
   float *&dd = d;
 
   // Region 00
-  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2859,7 +2859,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
   }
 
   // Region 01
-  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2880,7 +2880,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
   }
 
   // Region 02
-  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2898,7 +2898,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
   }
 
   // Region 03
-  // CK20-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK20-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK20-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK20-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -2977,7 +2977,7 @@ struct CC {
     T *lb;
 
     // Region 00
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
     // CK21-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3009,7 +3009,7 @@ struct CC {
     }
 
     // Region 01
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3030,7 +3030,7 @@ struct CC {
     }
 
     // Region 02
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
     // CK21-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3064,7 +3064,7 @@ struct CC {
     }
 
     // Region 03
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3082,7 +3082,7 @@ struct CC {
     }
 
     // Region 04
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3101,7 +3101,7 @@ struct CC {
 
     // Make sure the extra flag is passed to the second map.
     // Region 05
-    // CK21-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE05]]{{.+}})
+    // CK21-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
     // CK21-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK21-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
     // CK21-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3261,7 +3261,7 @@ STT<int> *std;
 // CK22-LABEL: explicit_maps_globals{{.*}}(
 int explicit_maps_globals(void){
   // Region 00
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3277,7 +3277,7 @@ int explicit_maps_globals(void){
   { a+=1; }
 
   // Region 01
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3293,7 +3293,7 @@ int explicit_maps_globals(void){
   { c[3]+=1; }
 
   // Region 02
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3309,7 +3309,7 @@ int explicit_maps_globals(void){
   { d[3]+=1; }
 
   // Region 03
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3325,7 +3325,7 @@ int explicit_maps_globals(void){
   { c[3]+=1; }
 
   // Region 04
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3344,7 +3344,7 @@ int explicit_maps_globals(void){
   { d[3]+=1; }
 
   // Region 05
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3360,7 +3360,7 @@ int explicit_maps_globals(void){
   { sa.fa+=1; }
 
   // Region 06
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE06]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE06]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE06]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3376,7 +3376,7 @@ int explicit_maps_globals(void){
   { sc[3].fa+=1; }
 
   // Region 07
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE07]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE07]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE07]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3392,7 +3392,7 @@ int explicit_maps_globals(void){
   { sd[3].fa+=1; }
 
   // Region 08
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE08]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE08]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3408,7 +3408,7 @@ int explicit_maps_globals(void){
   { sc[3].fa+=1; }
 
   // Region 09
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3427,7 +3427,7 @@ int explicit_maps_globals(void){
   { sd[3].fa+=1; }
 
   // Region 10
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE10]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE10]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3443,7 +3443,7 @@ int explicit_maps_globals(void){
   { sta.fa+=1; }
 
   // Region 11
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE11]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE11]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3459,7 +3459,7 @@ int explicit_maps_globals(void){
   { stc[3].fa+=1; }
 
   // Region 12
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE12]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE12]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3475,7 +3475,7 @@ int explicit_maps_globals(void){
   { std[3].fa+=1; }
 
   // Region 13
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE13]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE13]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3491,7 +3491,7 @@ int explicit_maps_globals(void){
   { stc[3].fa+=1; }
 
   // Region 14
-  // CK22-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}})
+  // CK22-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE14]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE14]]{{.+}}, i8** null)
   // CK22-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK22-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3578,7 +3578,7 @@ int explicit_maps_inside_captured(int a){
   // CK23: define {{.*}}explicit_maps_inside_captured{{.*}}
   [&](void){
     // Region 00
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3597,7 +3597,7 @@ int explicit_maps_inside_captured(int a){
     #pragma omp target map(a)
       { a+=1; }
     // Region 01
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3616,7 +3616,7 @@ int explicit_maps_inside_captured(int a){
     #pragma omp target map(b)
       { b+=1; }
     // Region 02
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3636,7 +3636,7 @@ int explicit_maps_inside_captured(int a){
       { c[3]+=1; }
 
     // Region 03
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3655,7 +3655,7 @@ int explicit_maps_inside_captured(int a){
     #pragma omp target map(d)
       { d[3]+=1; }
     // Region 04
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3676,7 +3676,7 @@ int explicit_maps_inside_captured(int a){
       { c[3]+=1; }
 
     // Region 05
-    // CK23-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+    // CK23-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
     // CK23-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK23-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -3793,7 +3793,7 @@ int explicit_maps_struct_fields(int a){
   SC *p;
 
 // Region 01
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3825,7 +3825,7 @@ int explicit_maps_struct_fields(int a){
 // Same thing but starting from a pointer.
 //
 // Region 13
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE13]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE13]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3857,7 +3857,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 14
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE14]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE14]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3890,7 +3890,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 15
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE15]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE15]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3924,7 +3924,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 16
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3957,7 +3957,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 17
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -3993,7 +3993,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 18
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4028,7 +4028,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 19
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE19]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE19]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4075,7 +4075,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 20
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4109,7 +4109,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 21
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE21]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE21]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4156,7 +4156,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 22
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4191,7 +4191,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 23
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE23]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE23]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4239,7 +4239,7 @@ int explicit_maps_struct_fields(int a){
   { p->a++; }
 
 // Region 24
-// CK24-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE24]]{{.+}})
+// CK24-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE24]]{{.+}}, i8** null)
 // CK24-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK24-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK24-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4359,7 +4359,7 @@ struct CC {
 
   int foo(T arg) {
     // Region 00
-    // CK25-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK25-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
     // CK25-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK25-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
     // CK25-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -4392,7 +4392,7 @@ struct CC {
     }
 
     // Region 01
-    // CK25-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK25-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
     // CK25-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK25-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4487,7 +4487,7 @@ struct CC {
     #pragma omp parallel firstprivate(fA,fB) private(pA,pB)
     {
       // Region 00
-      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4514,7 +4514,7 @@ struct CC {
       }
 
       // Region 01
-      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4541,7 +4541,7 @@ struct CC {
       }
 
       // Region 02
-      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4568,7 +4568,7 @@ struct CC {
       }
 
       // Region 01
-      // CK26-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE03]]{{.+}})
+      // CK26-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
       // CK26-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
       // CK26-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4697,7 +4697,7 @@ void zero_size_section_and_private_maps (int ii){
   int *pa;
 
   // Region 00
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4715,7 +4715,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 01
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4736,7 +4736,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 02
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4757,7 +4757,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 03
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE03]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4782,7 +4782,7 @@ void zero_size_section_and_private_maps (int ii){
   int pvtArr[10];
 
   // Region 04
-  // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null)
+  // CK27: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null)
   // CK27: call void [[CALL04:@.+]]()
   #pragma omp target private(pvtPtr)
   {
@@ -4790,7 +4790,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 05
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE05]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE05]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4808,7 +4808,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 06
-  // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null)
+  // CK27: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null)
   // CK27: call void [[CALL06:@.+]]()
   #pragma omp target private(pvtScl)
   {
@@ -4816,7 +4816,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 07
-  // CK27-DAG: call i32 @__tgt_target(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZE07]]{{.+}}, {{.+}}[[MTYPE07]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{.+}}, i8* {{.+}}, i32 1, i8** [[BPGEP:%[0-9]+]], i8** [[PGEP:%[0-9]+]], {{.+}}[[SIZE07]]{{.+}}, {{.+}}[[MTYPE07]]{{.+}}, i8** null)
   // CK27-DAG: [[BPGEP]] = getelementptr inbounds {{.+}}[[BPS:%[^,]+]], i32 0, i32 0
   // CK27-DAG: [[PGEP]] = getelementptr inbounds {{.+}}[[PS:%[^,]+]], i32 0, i32 0
   // CK27-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BPS]], i32 0, i32 0
@@ -4836,7 +4836,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 08
-  // CK27: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null)
+  // CK27: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null)
   // CK27: call void [[CALL08:@.+]]()
   #pragma omp target private(pvtArr)
   {
@@ -4844,7 +4844,7 @@ void zero_size_section_and_private_maps (int ii){
   }
 
   // Region 09
-  // CK27-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}})
+  // CK27-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE09]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE09]]{{.+}}, i8** null)
   // CK27-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK27-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4901,7 +4901,7 @@ void explicit_maps_pointer_references (int *p){
   int *&a = p;
 
   // Region 00
-  // CK28-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK28-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK28-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK28-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4921,7 +4921,7 @@ void explicit_maps_pointer_references (int *p){
   }
 
   // Region 01
-  // CK28-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK28-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK28-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK28-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -4988,7 +4988,7 @@ struct SSB{
   void foo() {
 
     // Region 00
-    // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK29-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[Z:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 
     // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -5034,7 +5034,7 @@ struct SSB{
     }
 
     // Region 01
-    // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE01]]{{.+}})
+    // CK29-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
 
     // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -5079,7 +5079,7 @@ struct SSB{
     }
 
     // Region 02
-    // CK29-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE02]]{{.+}})
+    // CK29-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
 
     // CK29-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK29-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
@@ -5172,7 +5172,7 @@ typedef struct StructWithPtrTag : public Base {
   int *ptr1;
 } StructWithPtr;
 
-// CK30-DAG: call i32 @__tgt_target(i64 -1, i8* @.__omp_offloading_{{.*}}map_with_deep_copy{{.*}}_l{{[0-9]+}}.region_id, i32 6, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MTYPE00]], i32 0, i32 0))
+// CK30-DAG: call i32 @__tgt_target_mapper(i64 -1, i8* @.__omp_offloading_{{.*}}map_with_deep_copy{{.*}}_l{{[0-9]+}}.region_id, i32 6, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MTYPE00]], i32 0, i32 0), i8** null)
 // CK30-DAG: [[GEPS]] = getelementptr inbounds [6 x i{{64|32}}], [6 x i64]* [[SIZES:%.+]], i32 0, i32 0
 // CK30-DAG: [[GEPP]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[PTRS:%.+]], i32 0, i32 0
 // CK30-DAG: [[GEPBP]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BASES:%.+]], i32 0, i32 0
@@ -5314,7 +5314,7 @@ void explicit_maps_single (int ii){
 
   // Close.
   // Region 00
-  // CK31-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK31-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK31-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK31-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -5333,7 +5333,7 @@ void explicit_maps_single (int ii){
 
   // Always Close.
   // Region 01
-  // CK31-DAG: call i32 @__tgt_target(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}})
+  // CK31-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE01]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE01]]{{.+}}, i8** null)
   // CK31-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK31-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -5376,7 +5376,7 @@ void explicit_maps_single (int ii){
 
 void array_shaping(float *f, int sa) {
 
-  // CK32-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}})
+  // CK32-DAG: call i32 @__tgt_target_mapper(i64 -1, i8* @{{.+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}}, i8** null)
   // CK32-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK32-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK32-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -5402,7 +5402,7 @@ void array_shaping(float *f, int sa) {
   #pragma omp target map(to:([3][sa][4])f)
   f[0] = 1;
   sa = 1;
-  // CK32-DAG: call i32 @__tgt_target(i64 -1, i8* @{{.+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}})
+  // CK32-DAG: call i32 @__tgt_target_mapper(i64 -1, i8* @{{.+}}, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}}, i8** null)
   // CK32-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK32-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK32-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
diff --git a/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp b/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
index ccdd092350973..cc3df28b9839a 100644
--- a/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
+++ b/clang/test/OpenMP/target_map_member_expr_array_section_codegen.cpp
@@ -60,7 +60,7 @@ struct maptest {
     // CHECK: [[BPTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BPTRS]], i32 0, i32 0
     // CHECK: [[PTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i32 0, i32 0
     // CHECK: [[SIZE:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZES]], i32 0, i32 0
-    // CHECK: call void @__tgt_target_data_begin(i64 -1, i32 2, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAP_ENTER]], i32 0, i32 0))
+    // CHECK: call void @__tgt_target_data_begin_mapper(i64 -1, i32 2, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAP_ENTER]], i32 0, i32 0), i8** null)
 #pragma omp target enter data map(alloc : s.data[:6])
   }
 
@@ -104,7 +104,7 @@ struct maptest {
     // CHECK: [[BPTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BPTRS]], i32 0, i32 0
     // CHECK: [[PTR:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[PTRS]], i32 0, i32 0
     // CHECK: [[SIZE:%.+]] = getelementptr inbounds [2 x i64], [2 x i64]* [[SIZES]], i32 0, i32 0
-    // CHECK: call void @__tgt_target_data_end(i64 -1, i32 2, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAP_EXIT]], i32 0, i32 0))
+    // CHECK: call void @__tgt_target_data_end_mapper(i64 -1, i32 2, i8** [[BPTR]], i8** [[PTR]], i64* [[SIZE]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAP_EXIT]], i32 0, i32 0), i8** null)
 #pragma omp target exit data map(delete : s.data[:6])
   }
 };
diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp
index 2e094c294dfa0..b8a002f34789a 100644
--- a/clang/test/OpenMP/target_parallel_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_codegen.cpp
@@ -98,7 +98,7 @@ int foo(int n) {
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -115,7 +115,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -140,7 +140,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -196,7 +196,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -540,7 +540,7 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -607,7 +607,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -657,7 +657,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_parallel_depend_codegen.cpp b/clang/test/OpenMP/target_parallel_depend_codegen.cpp
index 71d02ec19b4be..7d2dc6fe20e27 100644
--- a/clang/test/OpenMP/target_parallel_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 1, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp
index e8590530a0d89..bc78515edd4c8 100644
--- a/clang/test/OpenMP/target_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp
@@ -102,7 +102,7 @@ int foo(int n) {
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -122,7 +122,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0
@@ -159,7 +159,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -217,7 +217,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [10 x i64], [10 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -560,7 +560,7 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -635,7 +635,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -685,7 +685,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_parallel_for_depend_codegen.cpp b/clang/test/OpenMP/target_parallel_for_depend_codegen.cpp
index 04680950a1596..fcb06bb83a255 100644
--- a/clang/test/OpenMP/target_parallel_for_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 1, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
index 055d5dce28bbd..5bb93b222b78c 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp
@@ -130,7 +130,7 @@ int foo(int n) {
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -148,7 +148,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0
@@ -185,7 +185,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -243,7 +243,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 1, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [10 x i64], [10 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -600,8 +600,8 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 0)
-// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 7, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 %{{.+}})
+// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 0)
+// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 7, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 %{{.+}})
 // OMP45-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -707,7 +707,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -757,7 +757,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp
index 66a065fb98ac8..a26033840c4fb 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[IN]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 1, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp
index e7c3abee6be47..4c08742a2fba7 100644
--- a/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_simd_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp
index f04b8108cd0ea..67ca60e51c2cf 100644
--- a/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_for_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_parallel_if_codegen.cpp b/clang/test/OpenMP/target_parallel_if_codegen.cpp
index b315362735fec..69e7f77231445 100644
--- a/clang/test/OpenMP/target_parallel_if_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_if_codegen.cpp
@@ -151,7 +151,7 @@ int bar(int n){
 // CHECK:       store i8 [[FB]], i8* [[CONV]], align
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i32 1, i32 [[NT:%.+]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i8** null, i32 1, i32 [[NT:%.+]])
 // CHECK-DAG:   [[NT]] = select i1 %{{.+}}, i32 0, i32 1
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
@@ -178,7 +178,7 @@ int bar(int n){
 // CHECK:       br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[NT:%.+]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i8** null, i32 1, i32 [[NT:%.+]])
 // CHECK-DAG:   [[NT]] = select i1 %{{.+}}, i32 0, i32 1
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
@@ -211,7 +211,7 @@ int bar(int n){
 // CHECK:       br i1 [[TB]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[NT:%.+]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 1, i32 [[NT:%.+]])
 // CHECK-DAG:   [[NT]] = select i1 %{{.+}}, i32 0, i32 1
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
@@ -233,7 +233,7 @@ int bar(int n){
 // CHECK:       br i1 [[CMP]], label {{%?}}[[IF_THEN:.+]], label {{%?}}[[IF_ELSE:.+]]
 //
 // CHECK:       [[IF_THEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i8** null, i32 1, i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -254,7 +254,7 @@ int bar(int n){
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 1)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 1, i32 1)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -266,7 +266,7 @@ int bar(int n){
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 1, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i8** null, i32 1, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
index f12248d6458ca..49e1f6af68b40 100644
--- a/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_num_threads_codegen.cpp
@@ -153,7 +153,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i8** null, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -164,7 +164,7 @@ int bar(int n){
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i32 1, i32 1024)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i8** null, i32 1, i32 1024)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -192,7 +192,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -213,7 +213,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -231,7 +231,7 @@ int bar(int n){
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 1, i32 20)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i8** null, i32 1, i32 20)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -253,7 +253,7 @@ int bar(int n){
 // CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
 // CHECK:       [[THREADS:%.+]] = zext i16 [[T]] to i32
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 1, i32 [[THREADS]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i8** null, i32 1, i32 [[THREADS]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp
index d98f76261caf5..155d2b081f479 100644
--- a/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_parallel_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 1, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_simd_codegen.cpp b/clang/test/OpenMP/target_simd_codegen.cpp
index 597cff7815a3b..5295312c7dd8a 100644
--- a/clang/test/OpenMP/target_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_simd_codegen.cpp
@@ -127,7 +127,7 @@ int foo(int n) {
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 1)
+  // CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 1)
   // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
   // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
   // CHECK:       [[FAIL]]
@@ -145,7 +145,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i32 1, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 1, i32 1)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 0
@@ -182,7 +182,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 1, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 1, i32 1)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -238,7 +238,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 1, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 1, i32 1)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -523,8 +523,8 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
-// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x  i64], [6 x  i64]* [[MAPT7]], i32 0, i32 0), i32 1, i32 1)
+// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 1)
+// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x  i64], [6 x  i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 1, i32 1)
 // OMP45-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[SR]] = getelementptr inbounds [5 x i64], [5 x i64]* [[S:%.+]], i32 0, i32 0
@@ -618,7 +618,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 1, i32 1)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 1, i32 1)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -668,7 +668,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 1, i32 1)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 1, i32 1)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_simd_depend_codegen.cpp b/clang/test/OpenMP/target_simd_depend_codegen.cpp
index 72cd550207b67..001068d4c2c95 100644
--- a/clang/test/OpenMP/target_simd_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_simd_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 1, i32 1)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 1, i32 1)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 1, i32 1)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 1, i32 1)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp
index eaade4b9b5cd6..16800694d94e9 100644
--- a/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_simd_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 1, i32 1)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 1, i32 1)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp
index 9011c3c0ff805..063003fbcc541 100644
--- a/clang/test/OpenMP/target_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_codegen.cpp
@@ -107,7 +107,7 @@ int foo(int n) {
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i32 {{[^,]+}}, i32 {{[^)]+}})
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -144,7 +144,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -168,7 +168,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -223,7 +223,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -582,7 +582,7 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -654,7 +654,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([4 x i64], [4 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -704,7 +704,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET5]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_teams_depend_codegen.cpp b/clang/test/OpenMP/target_teams_depend_codegen.cpp
index 9a58e40de7505..1327b5fc827b7 100644
--- a/clang/test/OpenMP/target_teams_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* @0, i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp
index 547e45f6d3e7e..3271a0bab8066 100644
--- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp
@@ -103,7 +103,7 @@ int foo(int n) {
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i32 {{[^,]+}}, i32 {{[^)]+}})
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -140,7 +140,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -164,7 +164,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -220,7 +220,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 0)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 10, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([10 x i64], [10 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 0, i32 0)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [10 x i8*], [10 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [10 x i64], [10 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -569,7 +569,7 @@ int bar(int n){
 // CHECK-32:    [[CSSZSIZE:%.+]] = mul nuw i[[SZ]] [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSSZSIZE]] to i64
 
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -644,7 +644,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -701,7 +701,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
index 62fc58980a7c6..39d843c5a9806 100644
--- a/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute collapse(2)
     for(int i = 0; i < X; i++) {
@@ -104,7 +104,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -120,7 +120,7 @@ int main (int argc, char **argv) {
 // CK2: call void @__kmpc_for_static_fini(
 // CK2: ret void
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_depend_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_depend_codegen.cpp
index ba846605d54e8..de0d81aa69dfc 100644
--- a/clang/test/OpenMP/target_teams_distribute_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
index 3eb153173006a..1c8346655d6d8 100644
--- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp
@@ -26,19 +26,19 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute dist_schedule(static, X/2)
     for(int i = 0; i < X; i++) {
@@ -136,11 +136,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -173,11 +173,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
index c268b72a25edd..3609f547bac1e 100644
--- a/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_firstprivate_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target teams distribute firstprivate(g, g1, sivar)
@@ -164,7 +164,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -258,7 +258,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[TOFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
index 459384f464fdc..71f4a01344816 100644
--- a/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_lastprivate_codegen.cpp
@@ -68,7 +68,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -203,7 +203,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -303,7 +303,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
index 14e734f2b30c1..19d095f4d0a3d 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_codegen.cpp
@@ -61,7 +61,7 @@ int target_teams_fun(int *g){
   // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
   // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-  // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}},
+  // HCK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}},
 
   // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
   #pragma omp target teams distribute parallel for num_teams(te), thread_limit(th)
@@ -70,7 +70,7 @@ int target_teams_fun(int *g){
     #pragma omp cancel for
   }
 
-  // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0),
+  // HCK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null,
   // HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   {{{
   #pragma omp target teams distribute parallel for is_device_ptr(g)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
index 80e38925b39ec..dd47fa349eb48 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for collapse(2)
     for(int i = 0; i < X; i++) {
@@ -106,7 +106,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -130,7 +130,7 @@ int main (int argc, char **argv) {
 
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp
index af6637f2e0dcc..02fbb9cc2ec3a 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
index 28619f883f9dd..d1c473b4b1926 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -26,19 +26,19 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute parallel for dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute parallel for dist_schedule(static, X/2)
     for(int i = 0; i < X; i++) {
@@ -157,11 +157,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -212,11 +212,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
index 9c91538ce7e0f..f2c066eb34a25 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -109,7 +109,7 @@ int main() {
   // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // HLAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // HLAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // HLAMBDA:  ret
 #pragma omp target teams distribute parallel for firstprivate(g, g1, sivar)
@@ -213,7 +213,7 @@ int main() {
 }
 
 // HCHECK: define {{.*}}i{{[0-9]+}} @main()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, 
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5,
 // HCHECK: call void @[[OFFL1:.+]](
 // HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // HCHECK:  ret
@@ -374,7 +374,7 @@ int main() {
 // CHECK: ret void
 
 // HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // HCHECK: call void @[[TOFFL1:.+]](
 // HCHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
index 8f1e88f980cdb..94b37b5d94fb2 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_if_codegen.cpp
@@ -23,10 +23,10 @@ int Arg;
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp target teams distribute parallel for
   for(int i = 0 ; i < 100; i++) {}
@@ -81,12 +81,12 @@ int tmain(T Arg) {
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
 // CHECK-NOT: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target teams distribute parallel for if (true)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
index c0f9ecaea20fb..71b42e785d32b 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -68,7 +68,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -219,7 +219,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -344,7 +344,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
index 201c19e2ed5b7..73c6f16fa3e32 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp
@@ -15,7 +15,7 @@
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: %0 = call i32 @__tgt_target_teams(i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK: %0 = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{.+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK: call void [[TARGET_OUTLINE:@.+]]()
 // CHECK: ret void
 #pragma omp target teams distribute parallel for order(concurrent)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
index 87a1499689304..1bfd8953298ad 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_private_codegen.cpp
@@ -108,7 +108,7 @@ int main() {
   // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+    // HLAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
     // HLAMBDA: call void @[[LOFFL1:.+]](
     // HLAMBDA:  ret
 #pragma omp target teams distribute parallel for private(g, g1, sivar)
@@ -199,7 +199,7 @@ int main() {
 }
 
 // HCHECK: define {{.*}}i{{[0-9]+}} @main()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i32 0, i32 0) 
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i8** null, i32 0, i32 0)
 // HCHECK: call void @[[OFFL1:.+]]()
 // HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // HCHECK:  ret
@@ -277,7 +277,7 @@ int main() {
 // CHECK: ret void
 
 // HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // HCHECK: call void @[[TOFFL1:.+]]()
 // HCHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
index a7242c9112451..11cfe8ddf151a 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -44,9 +44,9 @@ int main() {
   return tmain<int>();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -76,7 +76,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
index 6575659637a52..055e0d35d0789 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_codegen.cpp
@@ -45,7 +45,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target teams distribute parallel for reduction(+: sivar)
@@ -169,7 +169,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}}* @{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -261,7 +261,7 @@ int main() {
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
index 3ec1782f2c09d..979993f841da9 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp
@@ -34,33 +34,33 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute parallel for schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute parallel for schedule(static, X/2)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL4:.+]](
     #pragma omp target teams distribute parallel for schedule(dynamic)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL5:.+]](
     #pragma omp target teams distribute parallel for schedule(dynamic, X/2)
     for(int i = 0; i < X; i++) {
@@ -234,15 +234,15 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL5:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -326,15 +326,15 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT5:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
index 7192ef454d0a5..7dff11951d9f8 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -61,7 +61,7 @@ int target_teams_fun(int *g){
 // HCK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
 // HCK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 // HCK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-// HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}},
+// HCK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}},
 
 // HCK1: call void @[[OFFL1:.+]](i{{32|64}} [[I_PAR]], i{{32|64}} [[N_PAR]], {{.+}}, i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]])
   int i;
@@ -70,7 +70,7 @@ int target_teams_fun(int *g){
     a[i] = 0;
   }
 
-  // HCK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0),
+  // HCK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null,
   // HCK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   {{{
   #pragma omp target teams distribute parallel for simd is_device_ptr(g) simdlen(8)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
index 3aca29165cd89..72db05c031ec8 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for simd collapse(2)
     for(int i = 0; i < X; i++) {
@@ -106,7 +106,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -130,7 +130,7 @@ int main (int argc, char **argv) {
 
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp
index 1d223e675e8ed..47de671de4aec 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_depend_codegen.cpp
@@ -122,7 +122,7 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CHECK:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -182,7 +182,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -199,7 +199,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 2
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 0)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 0)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
index 281a8d7425343..8cad1f98ff089 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -26,19 +26,19 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute parallel for simd dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute parallel for simd dist_schedule(static, X/2)
     for(int i = 0; i < X; i++) {
@@ -157,11 +157,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -212,11 +212,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
index 978e90dec5432..6d5aac81e04cc 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -109,7 +109,7 @@ int main() {
   // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // HLAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // HLAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // HLAMBDA:  ret
 #pragma omp target teams distribute parallel for simd firstprivate(g, g1, sivar)
@@ -213,7 +213,7 @@ int main() {
 }
 
 // HCHECK: define {{.*}}i{{[0-9]+}} @main()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, 
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5,
 // HCHECK: call void @[[OFFL1:.+]](
 // HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // HCHECK:  ret
@@ -374,7 +374,7 @@ int main() {
 // CHECK: ret void
 
 // HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // HCHECK: call void @[[TOFFL1:.+]](
 // HCHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
index dda468e604eba..50de0a5e1dff3 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -29,10 +29,10 @@ int Arg;
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #ifdef OMP5
 #pragma omp target teams distribute parallel for simd if(simd: true) nontemporal(Arg)
@@ -96,12 +96,12 @@ int tmain(T Arg) {
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
 // CHECK-NOT: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: call void @__kmpc_push_target_tripcount(i64 -1, i64 100)
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target teams distribute parallel for simd if (true)
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
index ca09990fbaf62..226827335060a 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -68,7 +68,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -221,7 +221,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](
 // CHECK: ret
 
@@ -346,7 +346,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
index 88169197d1c61..72a5d8d76c844 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_private_codegen.cpp
@@ -108,7 +108,7 @@ int main() {
   // HLAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // HLAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // HLAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+    // HLAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
     // HLAMBDA: call void @[[LOFFL1:.+]](
     // HLAMBDA:  ret
 #pragma omp target teams distribute parallel for simd private(g, g1, sivar)
@@ -199,7 +199,7 @@ int main() {
 }
 
 // HCHECK: define {{.*}}i{{[0-9]+}} @main()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i32 0, i32 0)
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, {{.+}} null, {{.+}} null, i8** null, i32 0, i32 0)
 // HCHECK: call void @[[OFFL1:.+]]()
 // HCHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // HCHECK:  ret
@@ -277,7 +277,7 @@ int main() {
 // CHECK: ret void
 
 // HCHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// HCHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// HCHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // HCHECK: call void @[[TOFFL1:.+]]()
 // HCHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
index 9d10c2e3dc7c4..f3f4a7bc9f8ab 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -44,9 +44,9 @@ int main() {
   return tmain<int>();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -76,7 +76,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
index e938a7746313d..bfe26c6d7f0e7 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -45,7 +45,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target teams distribute parallel for simd reduction(+: sivar)
@@ -169,7 +169,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}}* @{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -261,7 +261,7 @@ int main() {
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
index 9852a75187481..a1525c5a44a0f 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -34,33 +34,33 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute parallel for simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute parallel for simd schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute parallel for simd schedule(static, X/2)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL4:.+]](
     #pragma omp target teams distribute parallel for simd schedule(dynamic)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL5:.+]](
     #pragma omp target teams distribute parallel for simd schedule(dynamic, X/2)
     for(int i = 0; i < X; i++) {
@@ -234,15 +234,15 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL5:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -326,15 +326,15 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT5:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp
index 30bcdcfa68c5f..6e7d6a18ff69b 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp
index f352b2e1bc51e..15b7e7f6c93dc 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
index f52d3b040f04b..e1cd6c40afa4d 100644
--- a/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_private_codegen.cpp
@@ -84,7 +84,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]]()
     // LAMBDA:  ret
 #pragma omp target teams distribute private(g, g1, sivar)
@@ -151,7 +151,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -196,7 +196,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
index 704bf00120168..ef01ee91b3b94 100644
--- a/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_reduction_codegen.cpp
@@ -46,7 +46,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target teams distribute reduction(+: sivar)
@@ -124,7 +124,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i32* {{.+}})
 // CHECK: [[RES:%.+]] = call{{.*}} i32 @[[TMAIN_INT:[^(]+]]()
 // CHECK: ret i32 [[RES]]
@@ -174,7 +174,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}}* {{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
index 4912352e17ca2..fd070a548fdfd 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp
@@ -132,7 +132,7 @@ int foo(int n) {
   double cn[5][n];
   TT<long long, char> d;
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 {{[^,]+}}, i32 {{[^)]+}})
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** null, i32 {{[^,]+}}, i32 {{[^)]+}})
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -173,7 +173,7 @@ int foo(int n) {
     a += 1;
   }
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i32 0, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** [[BP:%[^,]+]], i8** [[P:%[^,]+]], i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZET2]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPT2]], i32 0, i32 0), i8** null, i32 0, i32 1)
   // CHECK-DAG:   [[BP]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[P]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[PR:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[BPADDR0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BPR]], i32 0, i32 [[IDX0:[0-9]+]]
@@ -197,7 +197,7 @@ int foo(int n) {
   // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 10
   // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CHECK:       [[IFTHEN]]
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i32 0, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[SIZET3]], i32 0, i32 0), i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT3]], i32 0, i32 0), i8** null, i32 0, i32 1)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P:%[^,]+]], i32 0, i32 0
 
@@ -252,7 +252,7 @@ int foo(int n) {
   // CHECK-32:    [[CNSZSIZE:%.+]] = mul nuw i32 [[CNELEMSIZE2]], 8
   // CHECK-32:    [[CNSIZE:%.+]] = sext i32 [[CNSZSIZE]] to i64
 
-  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i32 0, i32 1)
+  // CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 9, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([9 x i64], [9 x i64]* [[MAPT4]], i32 0, i32 0), i8** null, i32 0, i32 1)
   // CHECK-DAG:   [[BPR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[BP:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[PR]] = getelementptr inbounds [9 x i8*], [9 x i8*]* [[P:%[^,]+]], i32 0, i32 0
   // CHECK-DAG:   [[SR]] = getelementptr inbounds [9 x i64], [9 x i64]* [[S:%[^,]+]], i32 0, i32 0
@@ -595,7 +595,7 @@ int bar(int n){
 // CHECK-32:    [[CSZSIZE:%.+]] = mul nuw i32 [[CELEMSIZE2]], 2
 // CHECK-32:    [[CSIZE:%.+]] = sext i32 [[CSZSIZE]] to i64
 
-// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 1)
+// OMP45-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 6, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([6 x i64], [6 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 0, i32 1)
 // OMP45-DAG:   [[BPR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[PR]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P:%.+]], i32 0, i32 0
 // OMP45-DAG:   [[SR]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S:%.+]], i32 0, i32 0
@@ -617,7 +617,7 @@ int bar(int n){
 // OMP45-DAG:   [[SADDR5:%.+]] = getelementptr inbounds [6 x i64], [6 x i64]* [[S]], i32 [[IDX5:[0-9]+]]
 // OMP45-DAG:   [[BPADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[BP]], i32 [[IDX5]]
 // OMP45-DAG:   [[PADDR5:%.+]] = getelementptr inbounds [6 x i8*], [6 x i8*]* [[P]], i32 [[IDX5]]
-// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 7, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* [[MAPT5]], i32 0, i32 0), i32 0, i32 1)
+// OMP50-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 7, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SR:%[^,]+]], i64* getelementptr inbounds ([7 x i64], [7 x i64]* [[MAPT5]], i32 0, i32 0), i8** null, i32 0, i32 1)
 // OMP50-DAG:   [[BPR]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[BP:%.+]], i32 0, i32 0
 // OMP50-DAG:   [[PR]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[P:%.+]], i32 0, i32 0
 // OMP50-DAG:   [[SR]] = getelementptr inbounds [7 x i64], [7 x i64]* [[S:%.+]], i32 0, i32 0
@@ -702,7 +702,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 50
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i32 0, i32 1)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[SIZET6]], i32 0, i32 0), i64* getelementptr inbounds ([5 x i64], [5 x i64]* [[MAPT6]], i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[P:%.+]], i32 0, i32 0
 
@@ -759,7 +759,7 @@ int bar(int n){
 // CHECK:       [[IF:%.+]] = icmp sgt i32 {{[^,]+}}, 40
 // CHECK:       br i1 [[IF]], label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CHECK:       [[IFTHEN]]
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i32 0, i32 1)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[SIZET7]], i32 0, i32 0), i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT7]], i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK-DAG:   [[BPR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[BP:%.+]], i32 0, i32 0
 // CHECK-DAG:   [[PR]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[P:%.+]], i32 0, i32 0
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
index 8a80774ef187d..95e0fcdf64db2 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute simd collapse(2)
     for(int i = 0; i < X; i++) {
@@ -104,7 +104,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -120,7 +120,7 @@ int main (int argc, char **argv) {
 // CK2: call void @__kmpc_for_static_fini(
 // CK2: ret void
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp
index 4e8ff2f70234a..d7c1f15c88983 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_depend_codegen.cpp
@@ -156,8 +156,8 @@ int foo(int n) {
   // CHECK:       [[DEV1:%.+]] = load i32, i32* [[DEVICE_CAP]],
   // CHECK:       [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
 
-  // OMP45:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{104|60}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
-  // OMP50:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{28|128|76}}, i[[SZ]] {{16|12|24}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // OMP45:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{120|68}}, i[[SZ]] {{16|12}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // OMP50:       [[TASK:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* [[ID]], i32 [[GTID]], i32 1, i[[SZ]] {{28|152|88}}, i[[SZ]] {{16|12|24}}, i32 (i32, i8*)* bitcast (i32 (i32, %{{.+}}*)* [[TASK_ENTRY1_:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CHECK:       [[BC_TASK:%.+]] = bitcast i8* [[TASK]] to [[TASK_TY1_:%.+]]*
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 0
   // CHECK:       getelementptr %struct.kmp_depend_info, %struct.kmp_depend_info* %{{.+}}, i[[SZ]] 1
@@ -221,7 +221,7 @@ int foo(int n) {
 // CHECK:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 0
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i32 0, i32 1)
+// CHECK:       [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i32 0, i32 1)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
 // CHECK:       [[FAIL]]
@@ -241,8 +241,8 @@ int foo(int n) {
 // OMP50-32:       [[DEVICE_CAP:%.+]] = getelementptr inbounds %{{.+}}, %{{.+}}* %{{.+}}, i32 0, i32 3
 // CHECK:       [[DEV:%.+]] = load i32, i32* [[DEVICE_CAP]],
 // CHECK:       [[DEVICE:%.+]] = sext i32 [[DEV]] to i64
-// OMP45:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 1)
-// OMP50:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i32 0, i32 1)
+// OMP45:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 2, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([2 x i64], [2 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 1)
+// OMP50:       [[RET:%.+]] = call i32 @__tgt_target_teams_nowait_mapper(i64 [[DEVICE]], i8* @{{[^,]+}}, i32 3, i8** [[BPR:%[^,]+]], i8** [[PR:%[^,]+]], i64* [[SZT]], i64* getelementptr inbounds ([3 x i64], [3 x i64]* [[MAPT]], i32 0, i32 0), i8** [[M:%[^,]+]], i32 0, i32 1)
 
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:[^,]+]], label %[[END:[^,]+]]
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
index 874d4fc92c667..c845d927c4229 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp
@@ -26,19 +26,19 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target teams distribute simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target teams distribute simd dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target teams distribute simd dist_schedule(static, X/2)
     for(int i = 0; i < X; i++) {
@@ -136,11 +136,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -173,11 +173,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
index 05ea0e897a4b5..4d3a87a7d18f5 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_firstprivate_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target teams distribute simd firstprivate(g, g1, sivar)
@@ -164,7 +164,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -258,7 +258,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[TOFFL1:.+]]({{[^,]+}}, {{[^,]+}}, {{[^,]+}}, {{[^,]+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
index 6fae7829874f9..55d969186fd11 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_lastprivate_codegen.cpp
@@ -68,7 +68,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -205,7 +205,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -306,7 +306,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]]([2 x i{{[0-9]+}}]* {{.+}}, i{{[0-9]+}} {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
index 402c83921d8f6..82449397d553f 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_private_codegen.cpp
@@ -84,7 +84,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]]()
     // LAMBDA:  ret
 #pragma omp target teams distribute simd private(g, g1, sivar)
@@ -151,7 +151,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -196,7 +196,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
index a850238af5fe5..ec0142162e835 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_reduction_codegen.cpp
@@ -46,7 +46,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target teams distribute simd reduction(+: sivar)
@@ -124,7 +124,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]](i32* {{.+}})
 // CHECK: [[RES:%.+]] = call{{.*}} i32 @[[TMAIN_INT:[^(]+]]()
 // CHECK: ret i32 [[RES]]
@@ -174,7 +174,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}}* {{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp
index 6091ae2716b1d..5ece55242833f 100644
--- a/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_simd_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 1)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp
index 376d51ab00676..a0fff790ee8cc 100644
--- a/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
index 93b28f8c43645..000160f9c0a35 100644
--- a/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_num_teams_codegen.cpp
@@ -153,7 +153,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i8** null, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -164,7 +164,7 @@ int bar(int n){
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i32 1024, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i8** null, i32 1024, i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -192,7 +192,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -213,7 +213,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -231,7 +231,7 @@ int bar(int n){
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 20, i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i8** null, i32 20, i32 0)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -253,7 +253,7 @@ int bar(int n){
 // CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = sext i16 [[T]] to i32
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 0)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i8** null, i32 [[TEAMS]], i32 0)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
index 2432d6b3ad6e6..d546ad747455c 100644
--- a/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_thread_limit_codegen.cpp
@@ -153,7 +153,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TL:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i32 0, i32 [[TL]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, {{.*}}, i8** null, i32 0, i32 [[TL]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -164,7 +164,7 @@ int bar(int n){
 //
 //
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i32 0, i32 1024)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, {{.+}}, i8** null, i32 0, i32 1024)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -201,7 +201,7 @@ int bar(int n){
 // CHECK:       [[TEAMS:%.+]] = load i32, i32* [[CAPE_ADDR1]], align
 // CHECK:       [[TL:%.+]] = load i32, i32* [[CAPE_ADDR2]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i32 [[TEAMS]], i32 [[TL]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, {{.*}}, i8** null, i32 [[TEAMS]], i32 [[TL]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -222,7 +222,7 @@ int bar(int n){
 // CHECK:       [[ARG:%.+]] = load i[[SZ]], i[[SZ]]* [[CAPEC_ADDR]], align
 // CHECK:       [[TL:%.+]] = load i32, i32* [[CAPE_ADDR]], align
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i32 0, i32 [[TL]])
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, {{.*}}, i8** null, i32 0, i32 [[TL]])
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -240,7 +240,7 @@ int bar(int n){
 //
 // CHECK: define {{.*}}[[FTEMPLATE]]
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i32 0, i32 20)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, {{.*}}, i8** null, i32 0, i32 20)
 // CHECK-NEXT:  [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK-NEXT:  br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
@@ -262,7 +262,7 @@ int bar(int n){
 // CHECK:       [[T:%.+]] = load i16, i16* [[CAPE_ADDR]], align
 // CHECK:       [[TEAMS:%.+]] = sext i16 [[T]] to i32
 //
-// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i32 [[TEAMS]], i32 1024)
+// CHECK-DAG:   [[RET:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, {{.*}}, i8** null, i32 [[TEAMS]], i32 1024)
 // CHECK:       [[ERROR:%.+]] = icmp ne i32 [[RET]], 0
 // CHECK:       br i1 [[ERROR]], label %[[FAIL:.+]], label %[[END:[^,]+]]
 //
diff --git a/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp
index 829a99bd8eb7f..0473e5aceb4a9 100644
--- a/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i32 0, i32 0)
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_teams_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/target_update_codegen.cpp b/clang/test/OpenMP/target_update_codegen.cpp
index fd5a62a8067c7..5d569f18ce6b6 100644
--- a/clang/test/OpenMP/target_update_codegen.cpp
+++ b/clang/test/OpenMP/target_update_codegen.cpp
@@ -46,7 +46,7 @@ void foo(int arg) {
   float lb[arg];
 
   // Region 00
-  // CK1-DAG: call void @__tgt_target_data_update_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
   // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -71,7 +71,7 @@ void foo(int arg) {
   // Region 02
   // CK1: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
   // CK1: [[IFTHEN]]
-  // CK1-DAG: call void @__tgt_target_data_update(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE02]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -94,7 +94,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 03
-  // CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -116,7 +116,7 @@ void foo(int arg) {
   {++arg;}
 
   // Region 04
-  // CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+  // CK1-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE04]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** null)
   // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -185,7 +185,7 @@ int bar(int arg){
 // Region 00
 // CK2: br i1 %{{[^,]+}}, label %[[IFTHEN:[^,]+]], label %[[IFELSE:[^,]+]]
 // CK2: [[IFTHEN]]
-// CK2-DAG: call void @__tgt_target_data_update(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK2-DAG: call void @__tgt_target_data_update_mapper(i64 [[DEV:%[^,]+]], i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i[[sz:64|32]]* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
 // CK2-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK2-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK2-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
@@ -316,7 +316,7 @@ void device_side_scan(int arg) {
 // CK5-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK5-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK5-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK5-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK5-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -358,7 +358,7 @@ void lvalue(int *B, int l, int e) {
 // CK6-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK6-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK6-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK6-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK6-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -403,7 +403,7 @@ void lvalue(int *B, int l, int e) {
 // CK7-LABEL: lvalue
 void lvalue(int *B, int l, int e) {
 
-  // CK7-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK7-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK7-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK7-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -454,7 +454,7 @@ void lvalue(int *B, int l, int e) {
 // CK8-LABEL: lvalue
 void lvalue(int **B, int l, int e) {
 
-  // CK8-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}], [2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK8-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}], [2 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK8-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK8-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -509,7 +509,7 @@ struct S {
 // CK9-LABEL: lvalue
 void lvalue(struct S *s, int l, int e) {
 
-  // CK9-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK9-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK9-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK9-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK9-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -559,7 +559,7 @@ struct S {
 // CK10-LABEL: lvalue
 void lvalue(struct S *s, int l, int e) {
 
-  // CK10-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK10-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK10-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK10-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK10-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -609,7 +609,7 @@ struct S {
 // CK11-LABEL: lvalue
 void lvalue(struct S *s, int l, int e) {
 
-  // CK11-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK11-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}, [2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK11-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK11-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK11-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -661,7 +661,7 @@ struct S {
 // CK12-LABEL: lvalue
 void lvalue(struct S *s, int l, int e) {
 
-  // CK12-DAG: call void @__tgt_target_data_update(i64 -1, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}, [3 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK12-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i{{.+}} [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}, [3 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK12-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK12-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK12-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -719,7 +719,7 @@ void lvalue(struct S *s, int l, int e) {
 // CK13-LABEL: lvalue
 void lvalue(int **BB, int a, int b) {
 
-  // CK13-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK13-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK13-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK13-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -778,7 +778,7 @@ struct SSB {
   // CK14-LABEL: define {{.+}}foo
   void foo() {
 
-    // CK14-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+    // CK14-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
     // CK14-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
     // CK14-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
     // CK14-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -845,7 +845,7 @@ struct SSA {
 //CK-15-LABEL: lvalue_member
 void lvalue_member(SSA *sap) {
 
-  // CK15-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK15-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GSIZE:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK15-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK15-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK15-DAG: [[GSIZE]] = getelementptr inbounds {{.+}}[[SIZE:%[^,]+]]
@@ -905,7 +905,7 @@ void lvalue_member(SSA *sap) {
 //CK16-LABEL: lvalue_find_base
 void lvalue_find_base(float *f, int *i) {
 
-  // CK16-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK16-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK16-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK16-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -955,7 +955,7 @@ struct SSA {
 //CK17-LABEL: lvalue_find_base
 void lvalue_find_base(float **f, SSA *sa) {
 
-  // CK17-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+  // CK17-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[SIZE00]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** null)
   // CK17-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK17-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
@@ -1008,7 +1008,7 @@ void lvalue_find_base(float **f, SSA *sa) {
 //CK18-LABEL: array_shaping
 void array_shaping(float *f, int sa) {
 
-  // CK18-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}})
+  // CK18-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_TO]]{{.+}}, i8** null)
   // CK18-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK18-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK18-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
@@ -1033,7 +1033,7 @@ void array_shaping(float *f, int sa) {
   // CK18-32-DAG: [[SZ2]] = mul nuw i32 12, %{{.+}}
   #pragma omp target update to(([3][sa][4])f)
   sa = 1;
-  // CK18-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}})
+  // CK18-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE_FROM]]{{.+}}, i8** null)
   // CK18-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK18-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK18-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
diff --git a/clang/test/OpenMP/target_update_depend_codegen.cpp b/clang/test/OpenMP/target_update_depend_codegen.cpp
index 5c61e058cf6db..7dec72c9b4d43 100644
--- a/clang/test/OpenMP/target_update_depend_codegen.cpp
+++ b/clang/test/OpenMP/target_update_depend_codegen.cpp
@@ -64,7 +64,7 @@ void foo(int arg) {
   // CK1: store i32 [[DEVICE]], i32* [[CAP_DEVICE]],
   // CK1: [[DEV1:%.+]] = load i32, i32* %{{.+}}
   // CK1: [[DEV2:%.+]] = sext i32 [[DEV1]] to i64
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 4, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates*)* [[TASK_ENTRY0:@.+]] to i32 (i32, i8*)*), i64 [[DEV2]])
   // CK1: [[BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates, %struct.kmp_task_t_with_privates* [[BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -130,7 +130,7 @@ void foo(int arg) {
   // CK1: [[IF_BOOL:%.+]] = trunc i8 [[IF]] to i1
   // CK1: [[IF:%.+]] = zext i1 [[IF_BOOL]] to i8
   // CK1: store i8 [[IF]], i8* [[IF_DEVICE]],
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY2:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[SHAREDS:%.+]] = getelementptr inbounds %struct.kmp_task_t, %struct.kmp_task_t* [[TASK_T]], i32 0, i32 0
@@ -213,7 +213,7 @@ void foo(int arg) {
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[P]], i32 0, i32 0
   // CK1: [[GEPS0:%.+]] = getelementptr inbounds [1 x i64], [1 x i64]* [[S]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{64|36}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{72|40}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY3:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -298,7 +298,7 @@ void foo(int arg) {
   // CK1: store double* %{{.+}}, double** [[P1_BC]],
   // CK1: [[GEPBP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[BP]], i32 0, i32 0
   // CK1: [[GEPP0:%.+]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[P]], i32 0, i32 0
-  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{88|52}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
+  // CK1: [[RES:%.+]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* {{.+}}, i32 {{.+}}, i32 1, i[[sz]] {{104|60}}, i[[sz]] 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates{{.+}}*)* [[TASK_ENTRY4:@.+]] to i32 (i32, i8*)*))
   // CK1: [[RES_BC:%.+]] = bitcast i8* [[RES]] to %struct.kmp_task_t_with_privates{{.+}}*
   // CK1: [[TASK_T:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 0
   // CK1: [[PRIVS:%.+]] = getelementptr inbounds %struct.kmp_task_t_with_privates{{.+}}, %struct.kmp_task_t_with_privates{{.+}}* [[RES_BC]], i32 0, i32 1
@@ -373,55 +373,61 @@ void foo(int arg) {
 }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY0]](i32{{.*}}, %struct.kmp_task_t_with_privates* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_update_nowait(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_update_nowait_mapper(i64 [[DEV:%[^,]+]], i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE00]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[DEV]] = sext i32 [[DEVi32:%[^,]+]] to i64
 // CK1-DAG: [[DEVi32]] = load i32, i32* %{{[^,]+}},
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY2]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_update(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_update_mapper(i64 4, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE02]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY3]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 1, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[1 x i{{.+}}]* [[MTYPE03]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [1 x i8*]*, [1 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [1 x i8*]*, [1 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [1 x i64]*, [1 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [1 x i8*]*, [1 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [1 x i8*]** [[BP_PRIV]], [1 x i8*]** [[P_PRIV]], [1 x i64]** [[S_PRIV]], [1 x i8*]** [[M_PRIV]])
 // CK1-NOT: __tgt_target_data_end
 // CK1: ret i32 0
 // CK1: }
 
 // CK1: define internal{{.*}} i32 [[TASK_ENTRY4]](i32{{.*}}, %struct.kmp_task_t_with_privates{{.+}}* noalias %1)
-// CK1-DAG: call void @__tgt_target_data_update(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}})
+// CK1-DAG: call void @__tgt_target_data_update_mapper(i64 -1, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE04]]{{.+}}, i8** [[GEPM:%.+]])
 // CK1-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
 // CK1-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 // CK1-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
-
+// CK1-DAG: [[GEPM]] = getelementptr inbounds {{.+}}[[M:%[^,]+]]
 // CK1-DAG: [[BP]] = load [2 x i8*]*, [2 x i8*]** [[BP_PRIV:%[^,]+]],
 // CK1-DAG: [[P]] = load [2 x i8*]*, [2 x i8*]** [[P_PRIV:%[^,]+]],
 // CK1-DAG: [[S]] = load [2 x i64]*, [2 x i64]** [[S_PRIV:%[^,]+]],
-// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]])
+// CK1-DAG: [[M]] = load [2 x i8*]*, [2 x i8*]** [[M_PRIV:%[^,]+]],
+// CK1-DAG: call void (i8*, ...) %{{.+}}(i8* %{{[^,]+}}, [2 x i8*]** [[BP_PRIV]], [2 x i8*]** [[P_PRIV]], [2 x i64]** [[S_PRIV]], [2 x i8*]** [[M_PRIV]])
 // CK1-NOT: __tgt_target_data_end
 // CK1: ret i32 0
 // CK1: }
diff --git a/clang/test/OpenMP/target_uses_allocators_codegen.cpp b/clang/test/OpenMP/target_uses_allocators_codegen.cpp
index 213e7c9a8778f..5e645aab67c19 100644
--- a/clang/test/OpenMP/target_uses_allocators_codegen.cpp
+++ b/clang/test/OpenMP/target_uses_allocators_codegen.cpp
@@ -65,7 +65,7 @@ void foo() {
   omp_alloctrait_t traits[10];
   omp_allocator_handle_t my_allocator;
 
-// CHECK: [[RES:%.+]] = call i32 @__tgt_target(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0))
+// CHECK: [[RES:%.+]] = call i32 @__tgt_target_mapper(i64 -1, i8* @.[[TGT_REGION:.+]].region_id, i32 1, i8** %{{.+}}, i8** %{{.+}}, i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[SIZES]], i32 0, i32 0), i64* getelementptr inbounds ([1 x i64], [1 x i64]* [[MAPTYPES]], i32 0, i32 0), i8** null)
 // CHECK: [[CMP:%.+]] = icmp ne i32 [[RES]], 0
 // CHECK: br i1 [[CMP]], label %[[FAILED:.+]], label %[[DONE:.+]]
 // CHECK: [[FAILED]]:
diff --git a/clang/test/OpenMP/teams_codegen.cpp b/clang/test/OpenMP/teams_codegen.cpp
index 54e0f6ea29eb4..7fed40226739d 100644
--- a/clang/test/OpenMP/teams_codegen.cpp
+++ b/clang/test/OpenMP/teams_codegen.cpp
@@ -29,7 +29,7 @@ int teams_argument_global_local(int a){
   int la = 23;
   float lc = 25.0;
 
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
   #pragma omp target
   #pragma omp teams
@@ -37,7 +37,7 @@ int teams_argument_global_local(int a){
     ++comp;
   }
 
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
   #pragma omp target
   {{{
@@ -47,7 +47,7 @@ int teams_argument_global_local(int a){
     }
   }}}
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 0)
+  // CK1-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 0)
   // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
 
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
@@ -57,7 +57,7 @@ int teams_argument_global_local(int a){
     ++comp;
   }
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 [[NT:%[^,]+]])
+  // CK1-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 [[NT:%[^,]+]])
   // CK1-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
 
   // CK1: call void @{{.+}}(i{{64|32}} %{{.+}})
@@ -67,7 +67,7 @@ int teams_argument_global_local(int a){
     ++comp;
   }
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK1-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], [[NTB:%[^,]+]]
   // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
@@ -86,7 +86,7 @@ int teams_argument_global_local(int a){
     ++comp;
   }
 
-  // CK1-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK1-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 {{.+}}, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK1-DAG: [[NT]] = add nsw i32 [[NTA:%[^,]+]], 1
   // CK1-DAG: [[NTA]] = load i32, i32* @Gbla,
@@ -141,7 +141,7 @@ int teams_template_arg(void) {
   SS<int> la;
   SS<long long> lb;
 
-  // CK2-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK2-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK2-DAG: [[NT]] = load i32, i32* getelementptr inbounds ([[SSI]], [[SSI]]* @Gbla, i32 0, i32 0)
 
@@ -157,7 +157,7 @@ int teams_template_arg(void) {
     ++comp;
   }
 
-  // CK2-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
+  // CK2-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 [[TL:%[^,]+]])
 
   // CK2-DAG: [[TL]] = trunc i64 [[TLD:%[^,]+]] to i32
   // CK2-DAG: [[TLD]] = load i64, i64* getelementptr inbounds ([[SSL]], [[SSL]]* @Gblb, i32 0, i32 0),
@@ -205,7 +205,7 @@ struct SS{
   int foo(void) {
     int comp = 1;
 
-    // CK3-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 [[NT:%[^,]+]], i32 123)
+    // CK3-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 [[NT:%[^,]+]], i32 123)
 
     // CK3-DAG: [[NT]] = load i32, i32* [[NTA:%[^,]+]],
     // CK3-DAG: [[NTA]] = getelementptr inbounds [[SSI]], [[SSI]]* [[NTB:%[^,]+]], i32 0, i32 0
@@ -218,7 +218,7 @@ struct SS{
       ++comp;
     }
 
-    // CK3-DAG: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 456, i32 [[TL:%[^,]+]])
+    // CK3-DAG: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 456, i32 [[TL:%[^,]+]])
 
     // CK3-DAG: [[TL]] = add nsw i32 [[TLA:%[^,]+]], 123
     // CK3-DAG: [[TLA]] = fptosi float [[TLB:%[^,]+]] to i32
diff --git a/clang/test/OpenMP/teams_distribute_codegen.cpp b/clang/test/OpenMP/teams_distribute_codegen.cpp
index a87a40eed6440..fa12fc09f8144 100644
--- a/clang/test/OpenMP/teams_distribute_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_codegen.cpp
@@ -34,7 +34,7 @@ int teams_argument_global(int n) {
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
   // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 {{.+}})
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
   #pragma omp target
@@ -43,7 +43,7 @@ int teams_argument_global(int n) {
     a[i] = 0;
   }
 
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   {{{
@@ -110,7 +110,7 @@ int teams_local_arg(void) {
   int n = 100;
   int a[n];
 
-  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK2: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   #pragma omp teams distribute
@@ -158,7 +158,7 @@ struct SS{
   // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK3: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
     #pragma omp target
     #pragma omp teams distribute
@@ -230,7 +230,7 @@ int main (int argc, char **argv) {
 }
 
 // CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CK4: call void @[[OFFL1:.+]]({{.+}})
 // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK4:  ret
@@ -245,7 +245,7 @@ int main (int argc, char **argv) {
 // CK4: ret void
 
 // CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 {{.+}})
 // CK4: call void @[[OFFLT:.+]]({{.+}})
 // CK4:  ret
 // CK4-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
index 716e766929134..b0b5a659305b4 100644
--- a/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
     
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute collapse(2)
@@ -107,7 +107,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -123,7 +123,7 @@ int main (int argc, char **argv) {
 // CK2: call void @__kmpc_for_static_fini(
 // CK2: ret void
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
index 97ae3871111da..9e18cbc4cd43f 100644
--- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp
@@ -26,21 +26,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute dist_schedule(static, X/2)
@@ -145,11 +145,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -182,11 +182,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
index a2e6533ba7a46..e6438a41256ea 100644
--- a/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_firstprivate_codegen.cpp
@@ -86,7 +86,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target
@@ -167,7 +167,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -261,7 +261,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
index 5d54ba774b77b..db6740fdbb473 100644
--- a/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_lastprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -192,7 +192,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -291,7 +291,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
index 46dd7db2eac5c..679fd549baf85 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_codegen.cpp
@@ -33,7 +33,7 @@ int teams_argument_global(int n){
   // CK1: [[TE_PAR:%.+]] = load{{.+}}, {{.+}} [[TE_CAST]],
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
   // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 {{.+}})
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
   #pragma omp target
@@ -43,7 +43,7 @@ int teams_argument_global(int n){
     #pragma omp cancel for
   }
 
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   {{{
@@ -112,7 +112,7 @@ int teams_local_arg(void) {
   int n = 100;
   int a[n];
 
-  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK2: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   #pragma omp teams distribute parallel for
@@ -161,7 +161,7 @@ struct SS{
   // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+  // CK3: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
   // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
     #pragma omp target
     #pragma omp teams distribute parallel for
@@ -234,7 +234,7 @@ int main (int argc, char **argv) {
 }
 
 // CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CK4: call void @[[OFFL1:.+]]({{.+}})
 // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK4:  ret
@@ -250,7 +250,7 @@ int main (int argc, char **argv) {
 // CK4: ret void
 
 // CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 {{.+}})
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 {{.+}})
 // CK4: call void @[[OFFLT:.+]]({{.+}})
 // CK4:  ret
 // CK4-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
index 11a97868ef725..49a8c8bd08a40 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for collapse(2)
@@ -109,7 +109,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -133,7 +133,7 @@ int main (int argc, char **argv) {
 
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
index 399025d8a4f1f..f96f7aef89231 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_copyin_codegen.cpp
@@ -47,7 +47,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -115,7 +115,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(
+// CHECK: call i32 @__tgt_target_teams_mapper(
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -161,7 +161,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(
+// CHECK: call i32 @__tgt_target_teams_mapper(
 // CHECK: call void @[[TOFFL1:.+]](
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
index 03276e4b5ecbe..b0f66e6806867 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_dist_schedule_codegen.cpp
@@ -26,21 +26,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for dist_schedule(static, X/2)
@@ -166,11 +166,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -221,11 +221,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
index 6f33c4f754a2b..6cd1f7f435ecf 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_firstprivate_codegen.cpp
@@ -86,7 +86,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target
@@ -191,7 +191,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -351,7 +351,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
index f4119fc47d3b9..8310189786a21 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_if_codegen.cpp
@@ -22,9 +22,9 @@ int Arg;
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
 #pragma omp target
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp teams distribute parallel for
   for(int i = 0 ; i < 100; i++) {}
@@ -82,11 +82,11 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
index 3dfb51320ddc4..d0b748f279bc0 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_lastprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -247,7 +247,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -428,7 +428,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
index 0f93fe219aae1..94225b8dfb6b7 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_num_threads_codegen.cpp
@@ -42,9 +42,9 @@ int tmain() {
 int main() {
   S s(0);
   char a = s;
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
 // CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
@@ -78,16 +78,16 @@ int main() {
 
 // tmain 5
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
 
 // tmain 1
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
 
 // CHECK: define internal void [[T_OFFLOADING_FUN_0]](
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
index 5fc7ee12eb0ad..f421ab30b2df1 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_private_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -177,7 +177,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -254,7 +254,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
index 0b7f3b2d8c62f..0f9c01e9f5d1b 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_proc_bind_codegen.cpp
@@ -46,9 +46,9 @@ int main() {
   return tmain<int>();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -78,7 +78,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
index 34cc2874f6f3e..382c4b6d1c95a 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_codegen.cpp
@@ -47,7 +47,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -172,7 +172,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -265,7 +265,7 @@ int main() {
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
index f095296dab5c5..d7cf485a8a76c 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp
@@ -33,21 +33,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for schedule(static, X/2)
@@ -55,7 +55,7 @@ struct SS{
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL4:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for schedule(dynamic)
@@ -63,7 +63,7 @@ struct SS{
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL5:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for schedule(dynamic, X/2)
@@ -248,15 +248,15 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL5:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -340,15 +340,15 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT5:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
index e1e0d52292afa..4118739226293 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_codegen.cpp
@@ -34,7 +34,7 @@ int teams_argument_global(int n){
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
   // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
   #pragma omp target
@@ -44,7 +44,7 @@ int teams_argument_global(int n){
   }
 
   int i;
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
   // CK1: call void @[[OFFL2:.+]](
   #pragma omp target
   {{{
@@ -116,7 +116,7 @@ int teams_local_arg(void) {
   int n = 100;
   int a[n], i;
 
-  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK2: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
   // CK2: call void @[[OFFL1:.+]](
   #pragma omp target
   #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
@@ -169,7 +169,7 @@ struct SS{
   // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
     int i;
-  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+  // CK3: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{[^,]+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
   // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
     #pragma omp target
     #pragma omp teams distribute parallel for simd safelen(4) aligned(a) linear(i)
@@ -245,7 +245,7 @@ int main (int argc, char **argv) {
 }
 
 // CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 // CK4: call void @[[OFFL1:.+]]({{.+}})
 // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK4:  ret
@@ -261,7 +261,7 @@ int main (int argc, char **argv) {
 // CK4: ret void
 
 // CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 // CK4: call void @[[OFFLT:.+]]({{.+}})
 // CK4:  ret
 // CK4-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
index 24bd31e07ba6d..10036e6553085 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd collapse(2)
@@ -112,7 +112,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -136,7 +136,7 @@ int main (int argc, char **argv) {
 
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
index 4f11dc67437b8..5a5e370cc63af 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_dist_schedule_codegen.cpp
@@ -26,21 +26,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd dist_schedule(static, X/2)
@@ -169,11 +169,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -224,11 +224,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
index c92f9be7ab74e..c694466d2fec4 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_firstprivate_codegen.cpp
@@ -86,7 +86,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target
@@ -194,7 +194,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -354,7 +354,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 // CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
index d5d0d8abeaae8..2f37f23775804 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_if_codegen.cpp
@@ -28,9 +28,9 @@ int Arg;
 // CHECK-LABEL: define {{.*}}void @{{.+}}gtid_test
 void gtid_test() {
 #pragma omp target
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 #pragma omp teams distribute parallel for simd
   for(int i = 0 ; i < 100; i++) {}
@@ -88,11 +88,11 @@ int tmain(T Arg) {
 
 // CHECK-LABEL: define {{.*}}i{{[0-9]+}} @main()
 int main() {
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_2:@.+]](
 // CHECK: = call {{.*}}i{{.+}} @{{.+}}tmain
 #pragma omp target
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
index 1b02d0c7394c0..26eca3545cb4c 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_lastprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -254,7 +254,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -438,7 +438,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
index 4faa99e2ee362..a4b801a729af3 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_num_threads_codegen.cpp
@@ -42,9 +42,9 @@ int tmain() {
 int main() {
   S s(0);
   char a = s;
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOADING_FUN_1:@.+]](
 // CHECK: invoke{{.+}} [[TMAIN_5:@.+]]()
 // CHECK: invoke{{.+}} [[TMAIN_1:@.+]]()
@@ -78,16 +78,16 @@ int main() {
 
 // tmain 5
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_5]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_0:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_1:@.+]](
 
 // tmain 1
 // CHECK-DAG: define {{.*}}i{{[0-9]+}} [[TMAIN_1]]()
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_2:@.+]](
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[T_OFFLOADING_FUN_3:@.+]](
 
 // CHECK: define internal void [[T_OFFLOADING_FUN_0]](
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
index b2c8c22b84d80..71512c74b3f84 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_private_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -180,7 +180,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -257,7 +257,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
index 447a1a60109c2..c876fa9565317 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_proc_bind_codegen.cpp
@@ -46,9 +46,9 @@ int main() {
   return tmain<int>();
 }
 
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL1:@.+]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL2:@.+]]()
 // CHECK: [[CALL_RET:%.+]] = call{{.+}} i32 [[TMAIN:@.+]]()
 // CHECK: ret i32 [[CALL_RET]]
@@ -78,7 +78,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.+}} [[TMAIN]]()
-// CHECK: call {{.*}}@__tgt_target_teams({{.+}})
+// CHECK: call {{.*}}@__tgt_target_teams_mapper({{.+}})
 // CHECK: call void [[OFFL3:@.+]]()
 
 // CHECK: define{{.+}} [[OFFL3]]()
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
index f97f0a050c346..6866ea9f13107 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_reduction_codegen.cpp
@@ -47,7 +47,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -175,7 +175,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -268,7 +268,7 @@ int main() {
 // CHECK: br
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
index 4662c46fcce34..4be521f1afd85 100644
--- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp
@@ -34,21 +34,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd schedule(static, X/2)
@@ -56,7 +56,7 @@ struct SS{
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL4:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd schedule(dynamic)
@@ -64,7 +64,7 @@ struct SS{
       a[i] = (T)0;
     }
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL5:.+]](
     #pragma omp target
     #pragma omp teams distribute parallel for simd schedule(dynamic, X/2)
@@ -255,15 +255,15 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL5:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -348,15 +348,15 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT4:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT5:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_private_codegen.cpp
index 8880e2f517698..0228d1504e9df 100644
--- a/clang/test/OpenMP/teams_distribute_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_private_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -154,7 +154,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -199,7 +199,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
index 019306c6106d8..73042b1eb345d 100644
--- a/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_reduction_codegen.cpp
@@ -47,7 +47,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -128,7 +128,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 0)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 0)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -179,7 +179,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
index 0ad2c27e562b0..ed7c185f2e76c 100644
--- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp
@@ -36,7 +36,7 @@ int teams_argument_global(int n) {
   // CK1: [[TH_PAR:%.+]] = load{{.+}}, {{.+}} [[TH_CAST]],
 
   // CK1: call void @__kmpc_push_target_tripcount(i64 -1, i64 %{{.+}})
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 1)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 1)
 
   // CK1: call void @[[OFFL1:.+]](i{{32|64}} [[TE_PAR]], i{{32|64}} [[TH_PAR]],
   #pragma omp target
@@ -45,7 +45,7 @@ int teams_argument_global(int n) {
     a[i] = 0;
   }
 
-  // CK1: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+  // CK1: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 2, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
   // CK1: call void @[[OFFL2:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   {{{
@@ -116,7 +116,7 @@ int teams_local_arg(void) {
   int n = 100;
   int a[n];
 
-  // CK2: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+  // CK2: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
   // CK2: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
   #pragma omp target
   #pragma omp teams distribute simd
@@ -177,7 +177,7 @@ struct SS{
   // CK3: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK3: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+  // CK3: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* %{{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
   // CK3: call void @[[OFFL1:.+]]([[SSI]]* %{{.+}})
     #pragma omp target
 #ifdef OMP5
@@ -275,8 +275,8 @@ int main (int argc, char **argv) {
 }
 
 // CK4:  define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// OMP4_45:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
-// OMP4_50:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// OMP4_45:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
+// OMP4_50:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}, i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CK4: call void @[[OFFL1:.+]]({{.+}})
 // CK4: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK4:  ret
@@ -292,7 +292,7 @@ int main (int argc, char **argv) {
 // CK4: ret void
 
 // CK4:  define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK4:   call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 {{.+}}, i32 1)
+// CK4:   call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 {{.+}}, i32 1)
 // CK4: call void @[[OFFLT:.+]]({{.+}})
 // CK4:  ret
 // CK4-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
index 41c494a562d70..0622a32cdb372 100644
--- a/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_collapse_codegen.cpp
@@ -26,7 +26,7 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
     
-    // CK1: call i32 @__tgt_target_teams(
+    // CK1: call i32 @__tgt_target_teams_mapper(
     // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute simd collapse(2)
@@ -109,7 +109,7 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -125,7 +125,7 @@ int main (int argc, char **argv) {
 // CK2: call void @__kmpc_for_static_fini(
 // CK2: ret void
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
index 8937a44f89e80..3bfd365af3a06 100644
--- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp
@@ -26,21 +26,21 @@ struct SS{
   // CK1: define {{.*}}i32 @{{.+}}foo{{.+}}(
   int foo(void) {
 
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL1:.+]](
     #pragma omp target
     #pragma omp teams distribute simd
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL2:.+]](
     #pragma omp target
     #pragma omp teams distribute simd dist_schedule(static)
     for(int i = 0; i < X; i++) {
       a[i] = (T)0;
     }
-  // CK1: call i32 @__tgt_target_teams(
+  // CK1: call i32 @__tgt_target_teams_mapper(
   // CK1: call void @[[OFFL3:.+]](
     #pragma omp target
     #pragma omp teams distribute simd dist_schedule(static, X/2)
@@ -146,11 +146,11 @@ int main (int argc, char **argv) {
 }
 
 // CK2: define {{.*}}i32 @{{[^,]+}}(i{{.+}}{{.+}} %[[ARGC:.+]], {{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFL3:.+]]({{.+}})
 // CK2: {{%.+}} = call{{.*}} i32 @[[TMAIN:.+]]({{.+}})
 // CK2: ret
@@ -183,11 +183,11 @@ int main (int argc, char **argv) {
 // CK2: ret void
 
 // CK2: define {{.*}}i32 @[[TMAIN]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT1:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT2:.+]]({{.+}})
-// CK2: call i32 @__tgt_target_teams(
+// CK2: call i32 @__tgt_target_teams_mapper(
 // CK2: call void @[[OFFLT3:.+]]({{.+}})
 // CK2:  ret
 // CK2-NEXT: }
diff --git a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
index 293af4296cea5..20c7514dbd9b6 100644
--- a/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_firstprivate_codegen.cpp
@@ -86,7 +86,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 3, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](i{{64|32}} %{{.+}})
     // LAMBDA:  ret
 #pragma omp target
@@ -167,7 +167,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 5, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -261,7 +261,7 @@ int main() {
 // CHECK: ret void
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 4, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[TOFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
index 9f7a186866570..cc899cf3195a8 100644
--- a/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_lastprivate_codegen.cpp
@@ -69,7 +69,7 @@ int main() {
   [&]() {
     static float sfvar;
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams(
+    // LAMBDA: call i{{[0-9]+}} @__tgt_target_teams_mapper(
     // LAMBDA: call void [[OFFLOADING_FUN:@.+]](
 
     // LAMBDA: define{{.+}} void [[OFFLOADING_FUN]](
@@ -194,7 +194,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} @main()
 // CHECK: [[TEST:%.+]] = alloca [[S_FLOAT_TY]],
 // CHECK: call {{.*}} [[S_FLOAT_TY_DEF_CONSTR:@.+]]([[S_FLOAT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_FLOAT_TY]]]* {{.+}}, [[S_FLOAT_TY]]* {{.+}}, i{{[0-9]+}} {{.+}})
 // CHECK: ret
 
@@ -294,7 +294,7 @@ int main() {
 // CHECK: define{{.*}} i{{[0-9]+}} [[TMAIN_INT:@.+]]()
 // CHECK: [[TEST:%.+]] = alloca [[S_INT_TY]],
 // CHECK: call {{.*}} [[S_INT_TY_DEF_CONSTR:@.+]]([[S_INT_TY]]* [[TEST]])
-// CHECK: call i{{[0-9]+}} @__tgt_target_teams(
+// CHECK: call i{{[0-9]+}} @__tgt_target_teams_mapper(
 // CHECK: call void [[OFFLOAD_FUN_1:@.+]](i{{[0-9]+}} {{.+}}, [2 x i{{[0-9]+}}]* {{.+}}, [2 x [[S_INT_TY]]]* {{.+}}, [[S_INT_TY]]* {{.+}})
 // CHECK: ret
 
diff --git a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
index 10ad66ad31218..1c638170296ba 100644
--- a/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_private_codegen.cpp
@@ -85,7 +85,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -155,7 +155,7 @@ int main() {
 }
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0, i8** null, i8** null, i{{64|32}}* null, i64* null, i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]]()
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -200,7 +200,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 0,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 0,
 // CHECK: call void @[[TOFFL1:.+]]()
 // CHECK:  ret
 
diff --git a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
index 2137913bcdd9d..a3f18db856bc1 100644
--- a/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
+++ b/clang/test/OpenMP/teams_distribute_simd_reduction_codegen.cpp
@@ -47,7 +47,7 @@ int main() {
   // LAMBDA: call void [[OUTER_LAMBDA:@.+]](
   [&]() {
     // LAMBDA: define{{.*}} internal{{.*}} void [[OUTER_LAMBDA]](
-    // LAMBDA: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+    // LAMBDA: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
     // LAMBDA: call void @[[LOFFL1:.+]](
     // LAMBDA:  ret
 #pragma omp target
@@ -128,7 +128,7 @@ int main() {
 // CHECK: [[RED_VAR:@.+]] = common global [8 x {{.+}}] zeroinitializer
 
 // CHECK: define {{.*}}i{{[0-9]+}} @main()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i32 0, i32 1)
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1, i8** %{{[^,]+}}, i8** %{{[^,]+}}, i{{64|32}}* {{.+}}@{{[^,]+}}, i32 0, i32 0), i64* {{.+}}@{{[^,]+}}, i32 0, i32 0), i8** null, i32 0, i32 1)
 // CHECK: call void @[[OFFL1:.+]](i{{64|32}} %{{.+}})
 // CHECK: {{%.+}} = call{{.*}} i32 @[[TMAIN_INT:.+]]()
 // CHECK:  ret
@@ -179,7 +179,7 @@ int main() {
 
 
 // CHECK: define{{.*}} i{{[0-9]+}} @[[TMAIN_INT]]()
-// CHECK: call i32 @__tgt_target_teams(i64 -1, i8* @{{[^,]+}}, i32 1,
+// CHECK: call i32 @__tgt_target_teams_mapper(i64 -1, i8* @{{[^,]+}}, i32 1,
 // CHECK: call void @[[TOFFL1:.+]]({{.+}})
 // CHECK:  ret
 
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 93ea63c1c2e60..0dc2b34f2e4d6 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -557,27 +557,27 @@ __OMP_RTL(__kmpc_destroy_allocator, false, Void, /* Int */ Int32,
           /* omp_allocator_handle_t */ VoidPtr)
 
 __OMP_RTL(__kmpc_push_target_tripcount, false, Void, Int64, Int64)
-__OMP_RTL(__tgt_target, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr,
-          VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_nowait, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr,
-          VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_teams, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr,
-          VoidPtrPtr, Int64Ptr, Int64Ptr, Int32, Int32)
-__OMP_RTL(__tgt_target_teams_nowait, false, Int32, Int64, VoidPtr, Int32,
-          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, Int32, Int32)
+__OMP_RTL(__tgt_target_mapper, false, Int32, Int64, VoidPtr, Int32, VoidPtrPtr,
+          VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_nowait_mapper, false, Int32, Int64, VoidPtr, Int32,
+          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_teams_mapper, false, Int32, Int64, VoidPtr, Int32,
+          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, Int32, Int32)
+__OMP_RTL(__tgt_target_teams_nowait_mapper, false, Int32, Int64, VoidPtr, Int32,
+          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr, Int32, Int32)
 __OMP_RTL(__tgt_register_requires, false, Void, Int64)
-__OMP_RTL(__tgt_target_data_begin, false, Void, Int64, Int32, VoidPtrPtr,
-          VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_data_begin_nowait, false, Void, Int64, Int32, VoidPtrPtr,
-          VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_data_end, false, Void, Int64, Int32, VoidPtrPtr,
-          VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_data_end_nowait, false, Void, Int64, Int32, VoidPtrPtr,
-          VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_data_update, false, Void, Int64, Int32, VoidPtrPtr,
-          VoidPtrPtr, Int64Ptr, Int64Ptr)
-__OMP_RTL(__tgt_target_data_update_nowait, false, Void, Int64, Int32,
-          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr)
+__OMP_RTL(__tgt_target_data_begin_mapper, false, Void, Int64, Int32, VoidPtrPtr,
+          VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_begin_nowait_mapper, false, Void, Int64, Int32,
+          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_end_mapper, false, Void, Int64, Int32, VoidPtrPtr,
+          VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_end_nowait_mapper, false, Void, Int64, Int32,
+          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_update_mapper, false, Void, Int64, Int32,
+          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
+__OMP_RTL(__tgt_target_data_update_nowait_mapper, false, Void, Int64, Int32,
+          VoidPtrPtr, VoidPtrPtr, Int64Ptr, Int64Ptr, VoidPtrPtr)
 __OMP_RTL(__tgt_mapper_num_components, false, Int64, VoidPtr)
 __OMP_RTL(__tgt_push_mapper_component, false, Void, VoidPtr, VoidPtr, VoidPtr,
           Int64, Int64)
@@ -872,37 +872,37 @@ __OMP_RTL_ATTRS(__kmpc_destroy_allocator, AttributeSet(EnumAttr(NoUnwind)),
 __OMP_RTL_ATTRS(__kmpc_push_target_tripcount,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target,
+__OMP_RTL_ATTRS(__tgt_target_mapper,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_nowait,
+__OMP_RTL_ATTRS(__tgt_target_nowait_mapper,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_teams,
+__OMP_RTL_ATTRS(__tgt_target_teams_mapper,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_teams_nowait,
+__OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
 __OMP_RTL_ATTRS(__tgt_register_requires,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_begin,
+__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait,
+__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_end,
+__OMP_RTL_ATTRS(__tgt_target_data_end_mapper,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_end_nowait,
+__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_update,
+__OMP_RTL_ATTRS(__tgt_target_data_update_mapper,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_update_nowait,
+__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper,
                 AttributeSet(EnumAttr(NoUnwind)),
                 AttributeSet(), {})
 __OMP_RTL_ATTRS(__tgt_mapper_num_components,

From 52d0a78b831584c46eda78b7cf349ab93ce13df0 Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard@google.com>
Date: Mon, 13 Jul 2020 22:06:22 -0700
Subject: [PATCH 440/771] [libunwind] Fix CIE v1 return address parsing

 - For CIE version 1 (e.g. in DWARF 2.0.0), the return_address_register
   field is a ubyte [0..255].

 - For CIE version 3 (e.g. in DWARF 3), the field is instead a ULEB128
   constant.

Previously, libunwind accepted a CIE version of 1 or 3, but always
parsed the field as ULEB128.

Clang always outputs CIE version 1 into .eh_frame. (It can output CIE
version 3 or 4, but only into .debug_frame.)

Differential Revision: https://reviews.llvm.org/D83741
---
 libunwind/src/DwarfParser.hpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/libunwind/src/DwarfParser.hpp b/libunwind/src/DwarfParser.hpp
index d05ac468367f9..c98c4f92a6ad3 100644
--- a/libunwind/src/DwarfParser.hpp
+++ b/libunwind/src/DwarfParser.hpp
@@ -336,7 +336,8 @@ const char *CFI_Parser<A>::parseCIE(A &addressSpace, pint_t cie,
   // parse data alignment factor
   cieInfo->dataAlignFactor = (int)addressSpace.getSLEB128(p, cieContentEnd);
   // parse return address register
-  uint64_t raReg = addressSpace.getULEB128(p, cieContentEnd);
+  uint64_t raReg = (version == 1) ? addressSpace.get8(p++)
+                                  : addressSpace.getULEB128(p, cieContentEnd);
   assert(raReg < 255 && "return address register too large");
   cieInfo->returnAddressRegister = (uint8_t)raReg;
   // parse augmentation data based on augmentation string

From fd802cc4dea4ed1a233ff725f98c686dc2836bf3 Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard@google.com>
Date: Mon, 13 Jul 2020 22:06:47 -0700
Subject: [PATCH 441/771] [libunwind] Fix getSLEB128 on large values

Previously, for large-enough values, getSLEB128 would attempt to shift
a signed int in the range [0..0x7f] by 28, 35, 42... bits, which is
undefined behavior and likely to fail.

Avoid shifting (-1ULL) by 70 for large values. e.g. For INT64_MAX, the
last two bytes will be:
 - 0x7f [bit==56]
 - 0x00 [bit==63]

Differential Revision: https://reviews.llvm.org/D83742
---
 libunwind/src/AddressSpace.hpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/libunwind/src/AddressSpace.hpp b/libunwind/src/AddressSpace.hpp
index a4564cb673286..764aaa3489f26 100644
--- a/libunwind/src/AddressSpace.hpp
+++ b/libunwind/src/AddressSpace.hpp
@@ -290,11 +290,11 @@ inline int64_t LocalAddressSpace::getSLEB128(pint_t &addr, pint_t end) {
     if (p == pend)
       _LIBUNWIND_ABORT("truncated sleb128 expression");
     byte = *p++;
-    result |= ((byte & 0x7f) << bit);
+    result |= (uint64_t)(byte & 0x7f) << bit;
     bit += 7;
   } while (byte & 0x80);
   // sign extend negative numbers
-  if ((byte & 0x40) != 0)
+  if ((byte & 0x40) != 0 && bit < 64)
     result |= (-1ULL) << bit;
   addr = (pint_t) p;
   return result;

From 5bf2a9dd40dbba6dacbbe61f843d4d3a6f54f294 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Thu, 16 Jul 2020 11:07:26 +0900
Subject: [PATCH 442/771] [AMDGPU] Update VMEM scalar write hazard mitigation
 sequence

Using s_waitcnt_depctr 0xffe3 is potentially faster than v_nop.

Reviewed By: rampitec, foad

Differential Revision: https://reviews.llvm.org/D83872
---
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 10 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll |  4 +-
 .../GlobalISel/llvm.amdgcn.end.cf.i32.ll      |  2 +-
 .../atomic_optimizations_local_pointer.ll     | 92 +++++++++----------
 .../atomic_optimizations_pixelshader.ll       |  8 +-
 llvm/test/CodeGen/AMDGPU/cc-update.ll         |  2 +-
 ...r-descriptor-waterfall-loop-idom-update.ll |  2 +-
 .../CodeGen/AMDGPU/vgpr-tuple-allocation.ll   |  2 +-
 .../CodeGen/AMDGPU/vmem-to-salu-hazard.mir    | 38 ++++----
 llvm/test/CodeGen/AMDGPU/wave32.ll            |  4 +-
 10 files changed, 84 insertions(+), 80 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 8482dbfec250b..2229231870819 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -930,10 +930,12 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
     return false;
   };
 
-  auto IsExpiredFn = [] (MachineInstr *MI, int) {
+  auto IsExpiredFn = [](MachineInstr *MI, int) {
     return MI && (SIInstrInfo::isVALU(*MI) ||
                   (MI->getOpcode() == AMDGPU::S_WAITCNT &&
-                   !MI->getOperand(0).getImm()));
+                   !MI->getOperand(0).getImm()) ||
+                  (MI->getOpcode() == AMDGPU::S_WAITCNT_DEPCTR &&
+                   MI->getOperand(0).getImm() == 0xffe3));
   };
 
   if (::getWaitStatesSince(IsHazardFn, MI, IsExpiredFn) ==
@@ -941,7 +943,9 @@ bool GCNHazardRecognizer::fixVMEMtoScalarWriteHazards(MachineInstr *MI) {
     return false;
 
   const SIInstrInfo *TII = ST.getInstrInfo();
-  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), TII->get(AMDGPU::V_NOP_e32));
+  BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
+          TII->get(AMDGPU::S_WAITCNT_DEPCTR))
+      .addImm(0xffe3);
   return true;
 }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
index 2a3034763087d..7b375641f7295 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
@@ -1075,7 +1075,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
 ; GFX10_W32-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX10_W32-NEXT:    s_cselect_b32 s4, 1, 0
 ; GFX10_W32-NEXT:  BB13_2: ; %exit
-; GFX10_W32-NEXT:    v_nop
+; GFX10_W32-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_W32-NEXT:    s_or_b32 exec_lo, exec_lo, s5
 ; GFX10_W32-NEXT:    s_and_b32 s0, 1, s4
 ; GFX10_W32-NEXT:    v_cmp_ne_u32_e64 vcc_lo, 0, s0
@@ -1113,7 +1113,7 @@ define amdgpu_kernel void @test_div_fmas_f32_i1_phi_vcc(float addrspace(1)* %out
 ; GFX10_W64-NEXT:    s_cmp_lg_u32 s0, 0
 ; GFX10_W64-NEXT:    s_cselect_b32 s6, 1, 0
 ; GFX10_W64-NEXT:  BB13_2: ; %exit
-; GFX10_W64-NEXT:    v_nop
+; GFX10_W64-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX10_W64-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX10_W64-NEXT:    s_and_b32 s0, 1, s6
 ; GFX10_W64-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
index 28c7d47e855f7..e38df28d23d36 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.end.cf.i32.ll
@@ -18,7 +18,7 @@ define amdgpu_kernel void @test_wave32(i32 %arg0, [8 x i32], i32 %saved) {
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    global_store_dword v[0:1], v0, off
 ; GCN-NEXT:  BB0_2: ; %bb
-; GCN-NEXT:    v_nop
+; GCN-NEXT:    s_waitcnt_depctr 0xffe3
 ; GCN-NEXT:    s_or_b32 exec_lo, exec_lo, s0
 ; GCN-NEXT:    v_mov_b32_e32 v0, 0
 ; GCN-NEXT:    global_store_dword v[0:1], v0, off
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
index 284da9da36ee4..a13320bea7a13 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll
@@ -124,7 +124,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB0_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v1
 ; GFX1064-NEXT:    s_mov_b32 s3, 0x31016000
@@ -156,7 +156,7 @@ define amdgpu_kernel void @add_i32_constant(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB0_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s3
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v1
 ; GFX1032-NEXT:    s_mov_b32 s3, 0x31016000
@@ -298,7 +298,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB1_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    v_mul_lo_u32 v0, s0, v0
@@ -334,7 +334,7 @@ define amdgpu_kernel void @add_i32_uniform(i32 addrspace(1)* %out, i32 %additive
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB1_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    v_mul_lo_u32 v0, s0, v0
@@ -520,7 +520,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB2_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -572,7 +572,7 @@ define amdgpu_kernel void @add_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB2_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -759,7 +759,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB3_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -811,7 +811,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1032(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB3_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -998,7 +998,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB4_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -1050,7 +1050,7 @@ define amdgpu_kernel void @add_i32_varying_gfx1064(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB4_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -1194,7 +1194,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB5_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v1
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v2
@@ -1228,7 +1228,7 @@ define amdgpu_kernel void @add_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB5_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v1
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v2
@@ -1406,7 +1406,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB6_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    v_mul_lo_u32 v3, s3, v0
@@ -1449,7 +1449,7 @@ define amdgpu_kernel void @add_i64_uniform(i64 addrspace(1)* %out, i64 %additive
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB6_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    v_mul_lo_u32 v3, s3, v0
@@ -1675,7 +1675,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB8_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v1
 ; GFX1064-NEXT:    v_mul_u32_u24_e32 v0, 5, v0
@@ -1708,7 +1708,7 @@ define amdgpu_kernel void @sub_i32_constant(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB8_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s3
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v1
 ; GFX1032-NEXT:    v_mul_u32_u24_e32 v0, 5, v0
@@ -1851,7 +1851,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB9_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[6:7]
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    v_mul_lo_u32 v0, s0, v0
@@ -1887,7 +1887,7 @@ define amdgpu_kernel void @sub_i32_uniform(i32 addrspace(1)* %out, i32 %subitive
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB9_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s1
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    v_mul_lo_u32 v0, s0, v0
@@ -2073,7 +2073,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB10_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -2125,7 +2125,7 @@ define amdgpu_kernel void @sub_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB10_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -2271,7 +2271,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB11_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v1
 ; GFX1064-NEXT:    v_mul_u32_u24_e32 v1, 5, v0
@@ -2307,7 +2307,7 @@ define amdgpu_kernel void @sub_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB11_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v1
 ; GFX1032-NEXT:    v_mul_u32_u24_e32 v1, 5, v0
@@ -2487,7 +2487,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB12_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1064-NEXT:    v_mul_lo_u32 v3, s3, v0
@@ -2530,7 +2530,7 @@ define amdgpu_kernel void @sub_i64_uniform(i64 addrspace(1)* %out, i64 %subitive
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB12_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX1032-NEXT:    v_mul_lo_u32 v3, s3, v0
@@ -2808,7 +2808,7 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB14_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -2859,7 +2859,7 @@ define amdgpu_kernel void @and_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB14_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -3046,7 +3046,7 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB15_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -3098,7 +3098,7 @@ define amdgpu_kernel void @or_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB15_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -3285,7 +3285,7 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB16_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -3337,7 +3337,7 @@ define amdgpu_kernel void @xor_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB16_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -3521,7 +3521,7 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB17_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -3572,7 +3572,7 @@ define amdgpu_kernel void @max_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB17_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -3719,7 +3719,7 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB18_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v1
@@ -3754,7 +3754,7 @@ define amdgpu_kernel void @max_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB18_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v1
@@ -3941,7 +3941,7 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB19_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -3992,7 +3992,7 @@ define amdgpu_kernel void @min_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB19_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -4139,7 +4139,7 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB20_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v1
@@ -4174,7 +4174,7 @@ define amdgpu_kernel void @min_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB20_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v1
@@ -4364,7 +4364,7 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB21_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -4416,7 +4416,7 @@ define amdgpu_kernel void @umax_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB21_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -4560,7 +4560,7 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB22_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v1
@@ -4595,7 +4595,7 @@ define amdgpu_kernel void @umax_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB22_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v1
@@ -4782,7 +4782,7 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB23_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[4:5]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, v1
@@ -4833,7 +4833,7 @@ define amdgpu_kernel void @umin_i32_varying(i32 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB23_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s4
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v0
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, v1
@@ -4977,7 +4977,7 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1064-NEXT:    buffer_gl0_inv
 ; GFX1064-NEXT:    buffer_gl1_inv
 ; GFX1064-NEXT:  BB24_2:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[2:3]
 ; GFX1064-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s3, v1
@@ -5012,7 +5012,7 @@ define amdgpu_kernel void @umin_i64_constant(i64 addrspace(1)* %out) {
 ; GFX1032-NEXT:    buffer_gl0_inv
 ; GFX1032-NEXT:    buffer_gl1_inv
 ; GFX1032-NEXT:  BB24_2:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s2
 ; GFX1032-NEXT:    v_readfirstlane_b32 s2, v0
 ; GFX1032-NEXT:    v_readfirstlane_b32 s3, v1
diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
index a4094573f8abb..b9ad02a77bdf4 100644
--- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
+++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_pixelshader.ll
@@ -130,7 +130,7 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i
 ; GFX1064-NEXT:    v_mul_u32_u24_e64 v1, s12, 5
 ; GFX1064-NEXT:    buffer_atomic_add v1, off, s[4:7], 0 glc
 ; GFX1064-NEXT:  BB0_3:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[30:31]
 ; GFX1064-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1064-NEXT:    v_readfirstlane_b32 s4, v1
@@ -164,7 +164,7 @@ define amdgpu_ps void @add_i32_constant(<4 x i32> inreg %out, <4 x i32> inreg %i
 ; GFX1032-NEXT:    v_mul_u32_u24_e64 v1, s10, 5
 ; GFX1032-NEXT:    buffer_atomic_add v1, off, s[4:7], 0 glc
 ; GFX1032-NEXT:  BB0_3:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s9
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1032-NEXT:    v_readfirstlane_b32 s4, v1
@@ -364,7 +364,7 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
 ; GFX1064-NEXT:    v_mov_b32_e32 v0, s12
 ; GFX1064-NEXT:    buffer_atomic_add v0, off, s[4:7], 0 glc
 ; GFX1064-NEXT:  BB1_3:
-; GFX1064-NEXT:    v_nop
+; GFX1064-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT:    s_or_b64 exec, exec, s[30:31]
 ; GFX1064-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1064-NEXT:    v_readfirstlane_b32 s4, v0
@@ -418,7 +418,7 @@ define amdgpu_ps void @add_i32_varying(<4 x i32> inreg %out, <4 x i32> inreg %in
 ; GFX1032-NEXT:    v_mov_b32_e32 v0, s10
 ; GFX1032-NEXT:    buffer_atomic_add v0, off, s[4:7], 0 glc
 ; GFX1032-NEXT:  BB1_3:
-; GFX1032-NEXT:    v_nop
+; GFX1032-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1032-NEXT:    s_or_b32 exec_lo, exec_lo, s9
 ; GFX1032-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1032-NEXT:    v_readfirstlane_b32 s4, v0
diff --git a/llvm/test/CodeGen/AMDGPU/cc-update.ll b/llvm/test/CodeGen/AMDGPU/cc-update.ll
index dcd3b63f10ce9..a3727cee15705 100644
--- a/llvm/test/CodeGen/AMDGPU/cc-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/cc-update.ll
@@ -386,7 +386,7 @@ define amdgpu_kernel void @test_sgpr_offset_kernel() #1 {
 ; GFX1010-NEXT:    ; implicit-def: $vcc_hi
 ; GFX1010-NEXT:    s_waitcnt vmcnt(0)
 ; GFX1010-NEXT:    buffer_store_dword v0, off, s[0:3], s6 ; 4-byte Folded Spill
-; GFX1010-NEXT:    v_nop
+; GFX1010-NEXT:    s_waitcnt_depctr 0xffe3
 ; GFX1010-NEXT:    s_mov_b32 s6, 0x20000
 ; GFX1010-NEXT:    ;;#ASMSTART
 ; GFX1010-NEXT:    ;;#ASMEND
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
index 384cb1b4699d7..85859fb61eb4f 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-descriptor-waterfall-loop-idom-update.ll
@@ -29,7 +29,7 @@ define void @vgpr_descriptor_waterfall_loop_idom_update(<4 x i32>* %arg) #0 {
 ; GCN-NEXT:    s_and_saveexec_b32 s4, s4
 ; GCN-NEXT:    s_nop 0
 ; GCN-NEXT:    buffer_store_dword v0, v0, s[8:11], 0 offen
-; GCN-NEXT:    v_nop
+; GCN-NEXT:    s_waitcnt_depctr 0xffe3
 ; GCN-NEXT:    s_xor_b32 exec_lo, exec_lo, s4
 ; GCN-NEXT:    s_cbranch_execnz BB0_2
 ; GCN-NEXT:  ; %bb.3: ; in Loop: Header=BB0_1 Depth=1
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
index 4dd9efa9c0088..432b016a2b595 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-tuple-allocation.ll
@@ -59,7 +59,7 @@ define <4 x float> @non_preserved_vgpr_tuple8(<8 x i32> %rsrc, <4 x i32> %samp,
 ; GFX10-NEXT: ;;#ASMEND
 
 ; GFX10: image_gather4_c_b_cl v[40:43], v[32:39], s[4:11], s[12:15] dmask:0x1
-; GFX10-NEXT: v_nop
+; GFX10-NEXT: s_waitcnt_depctr 0xffe3
 ; GFX10-NEXT: s_getpc_b64 s[4:5]
 ; GFX10-NEXT: s_add_u32 s4, s4, extern_func@gotpcrel32@lo+4
 ; GFX10-NEXT: s_addc_u32 s5, s5, extern_func@gotpcrel32@hi+4
diff --git a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
index 6ae620b8ad242..165ebcc6d1355 100644
--- a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
@@ -2,7 +2,7 @@
 
 # GCN-LABEL: name: vmem_write_sgpr
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_write_sgpr
@@ -16,7 +16,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_write_exec
 # GCN:      BUFFER_STORE_DWORD_OFFEN_exact
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_write_exec
@@ -35,7 +35,7 @@ body:             |
 # GCN-NEXT: S_MOV_B32
 # GCN-NEXT: S_MOV_B32
 # GCN-NEXT: S_MOV_B32
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_write_sgpr_chain
@@ -54,7 +54,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_smem_write_sgpr
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_LOAD_DWORD_IMM
 ---
 name:            vmem_smem_write_sgpr
@@ -69,7 +69,7 @@ body:             |
 # GCN-LABEL: name: vmem_snop_write_sgpr
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
 # GCN-NEXT: S_NOP
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_snop_write_sgpr
@@ -115,7 +115,7 @@ body:             |
 # GCN-LABEL: name: vmem_swait_any_write_sgpr
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
 # GCN-NEXT: S_WAITCNT
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_swait_any_write_sgpr
@@ -130,7 +130,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_write_exec_impread
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
-# GCN:      V_NOP
+# GCN:      S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B64
 ---
 name:            vmem_write_exec_impread
@@ -144,7 +144,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_write_exec_expread
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B64
 ---
 name:            vmem_write_exec_expread
@@ -157,7 +157,7 @@ body:             |
 ...
 # GCN-LABEL: name: ds_write_m0
 # GCN:      DS_READ_B32
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            ds_write_m0
@@ -171,7 +171,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_write_sgpr_fall_through
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
-# GCN:      V_NOP
+# GCN:      S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_write_sgpr_fall_through
@@ -189,7 +189,7 @@ body:             |
 # GCN-LABEL: name: vmem_write_sgpr_branch
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
 # GCN-NEXT: S_BRANCH
-# GCN:      V_NOP
+# GCN:      S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_write_sgpr_branch
@@ -209,7 +209,7 @@ body:             |
 # GCN:      BUFFER_LOAD_DWORD_OFFEN
 # GCN-NEXT: S_BRANCH
 # GCN:      bb.2:
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_write_sgpr_branch_around
@@ -237,7 +237,7 @@ body:             |
 # GCN:      S_WAITCNT
 # GCN:      V_ADD_I32
 # GCN:      bb.2:
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_write_sgpr_cbranch_around
@@ -262,7 +262,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_write_sgpr_branch_backedge
 # GCN:      $vgpr0 = IMPLICIT_DEF
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_write_sgpr_branch_backedge
@@ -280,7 +280,7 @@ body:             |
 ...
 # GCN-LABEL: name: ds_write_exec
 # GCN:      DS_WRITE_B32_gfx9
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            ds_write_exec
@@ -293,7 +293,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_scratch_exec
 # GCN:      SCRATCH_LOAD_DWORD
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_scratch_exec
@@ -305,7 +305,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_flat_exec
 # GCN:      FLAT_LOAD_DWORD
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_flat_exec
@@ -318,7 +318,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_global_exec
 # GCN:      GLOBAL_LOAD_DWORD
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_global_exec
@@ -331,7 +331,7 @@ body:             |
 ...
 # GCN-LABEL: name: vmem_global_atomic_exec
 # GCN: GLOBAL_ATOMIC_ADD_RTN
-# GCN-NEXT: V_NOP
+# GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
 ---
 name:            vmem_global_atomic_exec
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 55557e51b82ce..388a75d148bd3 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -1059,7 +1059,7 @@ declare void @external_void_func_void() #1
 ; GFX1064-NEXT: s_or_saveexec_b64 [[COPY_EXEC0:s\[[0-9]+:[0-9]+\]]], -1{{$}}
 ; GFX1032-NEXT: s_or_saveexec_b32 [[COPY_EXEC0:s[0-9]]], -1{{$}}
 ; GCN-NEXT: buffer_store_dword v40, off, s[0:3], s32 ; 4-byte Folded Spill
-; GCN-NEXT: v_nop
+; GCN-NEXT: s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC0]]
 ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC0]]
 
@@ -1082,7 +1082,7 @@ declare void @external_void_func_void() #1
 ; GFX1064: s_or_saveexec_b64 [[COPY_EXEC1:s\[[0-9]+:[0-9]+\]]], -1{{$}}
 ; GFX1032: s_or_saveexec_b32 [[COPY_EXEC1:s[0-9]]], -1{{$}}
 ; GCN-NEXT: buffer_load_dword v40, off, s[0:3], s32 ; 4-byte Folded Reload
-; GCN-NEXT: v_nop
+; GCN-NEXT: s_waitcnt_depctr 0xffe3
 ; GFX1064-NEXT: s_mov_b64 exec, [[COPY_EXEC1]]
 ; GFX1032-NEXT: s_mov_b32 exec_lo, [[COPY_EXEC1]]
 ; GCN-NEXT: s_waitcnt vmcnt(0)

From 911fcf382f10500c0fb86be66ec9d28ad655fff3 Mon Sep 17 00:00:00 2001
From: George Rokos <georgios.rokos@intel.com>
Date: Wed, 15 Jul 2020 20:30:34 -0700
Subject: [PATCH 443/771] Fix lit test related to declare mapper patch D67833.

---
 llvm/test/Transforms/OpenMP/add_attributes.ll | 36 +++++++++----------
 1 file changed, 18 insertions(+), 18 deletions(-)

diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll
index 84e52ef8562b5..cd3c5541aa822 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -629,25 +629,25 @@ declare void @__kmpc_destroy_allocator(i32, i8*)
 
 declare void @__kmpc_push_target_tripcount(i64, i64)
 
-declare i32 @__tgt_target(i64, i8*, i32, i8**, i8**, i64*, i64*)
+declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
 
-declare i32 @__tgt_target_nowait(i64, i8*, i32, i8**, i8**, i64*, i64*)
+declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
 
-declare i32 @__tgt_target_teams(i64, i8*, i32, i8**, i8**, i64*, i64*, i32, i32)
+declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
 
 declare void @__tgt_register_requires(i64)
 
-declare void @__tgt_target_data_begin(i64, i32, i8**, i8**, i64*, i64*)
+declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-declare void @__tgt_target_data_begin_nowait(i64, i32, i8**, i8**, i64*, i64*)
+declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-declare void @__tgt_target_data_end(i64, i32, i8**, i8**, i64*, i64*)
+declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-declare void @__tgt_target_data_end_nowait(i64, i32, i8**, i8**, i64*, i64*)
+declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-declare void @__tgt_target_data_update(i64, i32, i8**, i8**, i64*, i64*)
+declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-declare void @__tgt_target_data_update_nowait(i64, i32, i8**, i8**, i64*, i64*)
+declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 declare i64 @__tgt_mapper_num_components(i8*)
 
@@ -1141,34 +1141,34 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target(i64, i8*, i32, i8**, i8**, i64*, i64*)
+; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_nowait(i64, i8*, i32, i8**, i8**, i64*, i64*)
+; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams(i64, i8*, i32, i8**, i8**, i64*, i64*, i32, i32)
+; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
 
 ; CHECK: Function Attrs: nounwind
 ; CHECK-NEXT: declare void @__tgt_register_requires(i64)
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin(i64, i32, i8**, i8**, i64*, i64*)
+; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait(i64, i32, i8**, i8**, i64*, i64*)
+; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end(i64, i32, i8**, i8**, i64*, i64*)
+; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_nowait(i64, i32, i8**, i8**, i64*, i64*)
+; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update(i64, i32, i8**, i8**, i64*, i64*)
+; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_nowait(i64, i32, i8**, i8**, i64*, i64*)
+; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
 ; CHECK: Function Attrs: nounwind
 ; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*)

From 941fecc536f83523a919bcf62aa4ec57b2578b0b Mon Sep 17 00:00:00 2001
From: Aden Grue <agrue@google.com>
Date: Thu, 16 Jul 2020 03:46:08 +0000
Subject: [PATCH 444/771] Standardize `linalg.generic` on `args_in`/`args_out`
 instead of `inputCount`/`outputCount`

 This also fixes the outdated use of `n_views` in the documentation.

Reviewed By: nicolasvasilache

Differential Revision: https://reviews.llvm.org/D83795
---
 .../mlir/Dialect/Linalg/IR/LinalgStructuredOps.td  | 10 ++++++----
 mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp           | 14 ++++++--------
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
index 9cda61ca80b74..7d259fde05e72 100644
--- a/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
+++ b/mlir/include/mlir/Dialect/Linalg/IR/LinalgStructuredOps.td
@@ -562,7 +562,8 @@ def GenericOp : GenericOpBase<"generic"> {
         doc = "C(m, n) += A(m, k) * B(k, n)",
         indexing_maps = #matmul_accesses,
         library_call = "linalg_matmul",
-        n_views = [2, 1],
+        args_in = 2,
+        args_out = 1,
         iterator_types = ["parallel", "parallel", "reduction"]
       }
       ```
@@ -634,7 +635,7 @@ def GenericOp : GenericOpBase<"generic"> {
   let builders = [
     OpBuilder<
       "OpBuilder &builder, OperationState &result, ArrayRef<Type> resultTypes, "
-      "ValueRange args, int64_t inputCount, int64_t outputCount, "
+      "ValueRange args, int64_t argsIn, int64_t argsOut, "
       "ArrayRef<AffineMap> indexingMaps, ArrayRef<StringRef> iteratorTypes, "
       "function_ref<void(OpBuilder &, Location, ValueRange)> = nullptr">
   ];
@@ -689,7 +690,8 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
       doc = "C(m, n) += A(m, k) * B(k, n)",
       indexing_maps = #matmul_accesses,
       library_call = "linalg_matmul",
-      n_views = [2, 1],
+      args_in = 2,
+      args_out = 1,
       iterator_types = ["parallel", "parallel", "reduction"]
     }
     ```
@@ -768,7 +770,7 @@ def IndexedGenericOp : GenericOpBase<"indexed_generic"> {
   let builders = [
     OpBuilder<
       "OpBuilder &builder, OperationState &result, ArrayRef<Type> resultTypes, "
-      "ValueRange args, int64_t inputCount, int64_t outputCount, "
+      "ValueRange args, int64_t argsIn, int64_t argsOut, "
       "ArrayRef<AffineMap> indexingMaps, ArrayRef<StringRef> iteratorTypes, "
       "function_ref<void(OpBuilder &, Location, ValueRange, ValueRange)> "
       "= nullptr">
diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
index 7865add3663d9..528e856fe5bb2 100644
--- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
+++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp
@@ -72,12 +72,11 @@ static LogicalResult foldMemRefCast(Operation *op) {
 
 void GenericOp::build(
     OpBuilder &builder, OperationState &result, ArrayRef<Type> resultTypes,
-    ValueRange args, int64_t inputCount, int64_t outputCount,
+    ValueRange args, int64_t argsIn, int64_t argsOut,
     ArrayRef<AffineMap> indexingMaps, ArrayRef<StringRef> iteratorTypes,
     function_ref<void(OpBuilder &, Location, ValueRange)> bodyBuild) {
-  build(builder, result, resultTypes, args,
-        builder.getI64IntegerAttr(inputCount),
-        builder.getI64IntegerAttr(outputCount),
+  build(builder, result, resultTypes, args, builder.getI64IntegerAttr(argsIn),
+        builder.getI64IntegerAttr(argsOut),
         builder.getAffineMapArrayAttr(indexingMaps),
         builder.getStrArrayAttr(iteratorTypes),
         /*doc=*/nullptr, /*library_call=*/nullptr);
@@ -96,13 +95,12 @@ void GenericOp::build(
 
 void IndexedGenericOp::build(
     OpBuilder &builder, OperationState &result, ArrayRef<Type> resultTypes,
-    ValueRange args, int64_t inputCount, int64_t outputCount,
+    ValueRange args, int64_t argsIn, int64_t argsOut,
     ArrayRef<AffineMap> indexingMaps, ArrayRef<StringRef> iteratorTypes,
     function_ref<void(OpBuilder &, Location, ValueRange, ValueRange)>
         bodyBuild) {
-  build(builder, result, resultTypes, args,
-        builder.getI64IntegerAttr(inputCount),
-        builder.getI64IntegerAttr(outputCount),
+  build(builder, result, resultTypes, args, builder.getI64IntegerAttr(argsIn),
+        builder.getI64IntegerAttr(argsOut),
         builder.getAffineMapArrayAttr(indexingMaps),
         builder.getStrArrayAttr(iteratorTypes),
         /*doc=*/nullptr, /*library_call=*/nullptr);

From d4b1a14f0a6f6c330dbd0f1aca281d27034307c2 Mon Sep 17 00:00:00 2001
From: Kiran Kumar T P <kirankumar.tp@amd.com>
Date: Thu, 16 Jul 2020 10:10:59 +0530
Subject: [PATCH 445/771] [flang][OpenMP] Enhance parser support for taskwait
 construct to OpenMP 5.0

Summary:
This patch enhances parser support for taskwait construct to OpenMP 5.0.

2.17.5 taskwait Construct
        !$omp taskwait [clause[ [,] clause] ... ]
                where clause is one of the following:
                depend([depend-modifier,]dependence-type : locator-list)

The patch includes code changes and testcase modifications.

Reviewed By: Valentin Clement, Kiran Chandramohan

Differential Revision: https://reviews.llvm.org/D82255
---
 flang/test/Semantics/omp-clause-validity01.f90 | 3 +++
 llvm/include/llvm/Frontend/OpenMP/OMP.td       | 6 +++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/flang/test/Semantics/omp-clause-validity01.f90 b/flang/test/Semantics/omp-clause-validity01.f90
index 77e40e323e5f9..75050bdc06b5f 100644
--- a/flang/test/Semantics/omp-clause-validity01.f90
+++ b/flang/test/Semantics/omp-clause-validity01.f90
@@ -396,6 +396,9 @@
   !$omp taskyield
   !$omp barrier
   !$omp taskwait
+  !$omp taskwait depend(source)
+  !ERROR: Internal: no symbol found for 'i'
+  !$omp taskwait depend(sink:i-1)
   ! !$omp target enter data map(to:arrayA) map(alloc:arrayB)
   ! !$omp target update from(arrayA) to(arrayB)
   ! !$omp target exit data map(from:arrayA) map(delete:arrayB)
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMP.td b/llvm/include/llvm/Frontend/OpenMP/OMP.td
index a565bdf90b3f6..2df1d2c2bec26 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMP.td
+++ b/llvm/include/llvm/Frontend/OpenMP/OMP.td
@@ -312,7 +312,11 @@ def OMP_Critical : Directive<"critical"> {
 }
 def OMP_TaskYield : Directive<"taskyield"> {}
 def OMP_Barrier : Directive<"barrier"> {}
-def OMP_TaskWait : Directive<"taskwait"> {}
+def OMP_TaskWait : Directive<"taskwait"> {
+  let allowedClauses = [
+    VersionedClause<OMPC_Depend, 50>
+  ];
+}
 def OMP_TaskGroup : Directive<"taskgroup"> {
   let allowedClauses = [
     VersionedClause<OMPC_TaskReduction>,

From 00f3579aea6e3d4a4b7464c3db47294f71cef9e4 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 15 Jul 2020 20:15:30 -0700
Subject: [PATCH 446/771] Revert "[InstSimplify] Remove select ?, undef, X -> X
 and select ?, X, undef -> X transforms" and subsequent patches

This reverts most of the following patches due to reports of miscompiles.
I've left the added test cases with comments updated to be FIXMEs.

1cf6f210a2e [IR] Disable select ? C : undef -> C fold in ConstantFoldSelectInstruction unless we know C isn't poison.
469da663f2d [InstSimplify] Re-enable select ?, undef, X -> X transform when X is provably not poison
122b0640fc9 [InstSimplify] Don't fold vectors of partial undef in SimplifySelectInst if the non-undef element value might produce poison
ac0af12ed2f [InstSimplify] Add test cases for opportunities to fold select ?, X, undef -> X when we can prove X isn't poison
9b1e95329af [InstSimplify] Remove select ?, undef, X -> X and select ?, X, undef -> X transforms
---
 clang/test/CodeGen/arm-mve-intrinsics/dup.c   | 24 +++++--------
 llvm/lib/Analysis/InstructionSimplify.cpp     | 16 +++------
 llvm/lib/IR/ConstantFold.cpp                  | 24 ++-----------
 .../InferAddressSpaces/AMDGPU/select.ll       |  2 +-
 llvm/test/Transforms/InstCombine/select.ll    | 14 +++-----
 llvm/test/Transforms/InstSimplify/select.ll   | 35 +++++++------------
 6 files changed, 33 insertions(+), 82 deletions(-)

diff --git a/clang/test/CodeGen/arm-mve-intrinsics/dup.c b/clang/test/CodeGen/arm-mve-intrinsics/dup.c
index b443917cb2582..283c082570056 100644
--- a/clang/test/CodeGen/arm-mve-intrinsics/dup.c
+++ b/clang/test/CodeGen/arm-mve-intrinsics/dup.c
@@ -242,8 +242,7 @@ uint32x4_t test_vdupq_m_n_u32(uint32x4_t inactive, uint32_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x half> [[DOTSPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x half> [[DOTSPLAT]], <8 x half> undef
-// CHECK-NEXT:    ret <8 x half> [[TMP2]]
+// CHECK-NEXT:    ret <8 x half> [[DOTSPLAT]]
 //
 float16x8_t test_vdupq_x_n_f16(float16_t a, mve_pred16_t p)
 {
@@ -256,8 +255,7 @@ float16x8_t test_vdupq_x_n_f16(float16_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[DOTSPLAT]], <4 x float> undef
-// CHECK-NEXT:    ret <4 x float> [[TMP2]]
+// CHECK-NEXT:    ret <4 x float> [[DOTSPLAT]]
 //
 float32x4_t test_vdupq_x_n_f32(float32_t a, mve_pred16_t p)
 {
@@ -270,8 +268,7 @@ float32x4_t test_vdupq_x_n_f32(float32_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+// CHECK-NEXT:    ret <16 x i8> [[DOTSPLAT]]
 //
 int8x16_t test_vdupq_x_n_s8(int8_t a, mve_pred16_t p)
 {
@@ -284,8 +281,7 @@ int8x16_t test_vdupq_x_n_s8(int8_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-NEXT:    ret <8 x i16> [[DOTSPLAT]]
 //
 int16x8_t test_vdupq_x_n_s16(int16_t a, mve_pred16_t p)
 {
@@ -298,8 +294,7 @@ int16x8_t test_vdupq_x_n_s16(int16_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    ret <4 x i32> [[DOTSPLAT]]
 //
 int32x4_t test_vdupq_x_n_s32(int32_t a, mve_pred16_t p)
 {
@@ -312,8 +307,7 @@ int32x4_t test_vdupq_x_n_s32(int32_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i1> @llvm.arm.mve.pred.i2v.v16i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <16 x i8> [[DOTSPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[DOTSPLAT]], <16 x i8> undef
-// CHECK-NEXT:    ret <16 x i8> [[TMP2]]
+// CHECK-NEXT:    ret <16 x i8> [[DOTSPLAT]]
 //
 uint8x16_t test_vdupq_x_n_u8(uint8_t a, mve_pred16_t p)
 {
@@ -326,8 +320,7 @@ uint8x16_t test_vdupq_x_n_u8(uint8_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i1> @llvm.arm.mve.pred.i2v.v8i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <8 x i16> [[DOTSPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[DOTSPLAT]], <8 x i16> undef
-// CHECK-NEXT:    ret <8 x i16> [[TMP2]]
+// CHECK-NEXT:    ret <8 x i16> [[DOTSPLAT]]
 //
 uint16x8_t test_vdupq_x_n_u16(uint16_t a, mve_pred16_t p)
 {
@@ -340,8 +333,7 @@ uint16x8_t test_vdupq_x_n_u16(uint16_t a, mve_pred16_t p)
 // CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i1> @llvm.arm.mve.pred.i2v.v4i1(i32 [[TMP0]])
 // CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[A:%.*]], i32 0
 // CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
-// CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[DOTSPLAT]], <4 x i32> undef
-// CHECK-NEXT:    ret <4 x i32> [[TMP2]]
+// CHECK-NEXT:    ret <4 x i32> [[DOTSPLAT]]
 //
 uint32x4_t test_vdupq_x_n_u32(uint32_t a, mve_pred16_t p)
 {
diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index 0975a65d183e4..d3bdf9d6aafd0 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -4118,15 +4118,9 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
   if (TrueVal == FalseVal)
     return TrueVal;
 
-  // If the true or false value is undef, we can fold to the other value as
-  // long as the other value isn't poison.
-  // select ?, undef, X -> X
-  if (isa<UndefValue>(TrueVal) &&
-      isGuaranteedNotToBeUndefOrPoison(FalseVal, Q.CxtI, Q.DT))
+  if (isa<UndefValue>(TrueVal))   // select ?, undef, X -> X
     return FalseVal;
-  // select ?, X, undef -> X
-  if (isa<UndefValue>(FalseVal) &&
-      isGuaranteedNotToBeUndefOrPoison(TrueVal, Q.CxtI, Q.DT))
+  if (isa<UndefValue>(FalseVal))   // select ?, X, undef -> X
     return TrueVal;
 
   // Deal with partial undef vector constants: select ?, VecC, VecC' --> VecC''
@@ -4146,11 +4140,9 @@ static Value *SimplifySelectInst(Value *Cond, Value *TrueVal, Value *FalseVal,
       // one element is undef, choose the defined element as the safe result.
       if (TEltC == FEltC)
         NewC.push_back(TEltC);
-      else if (isa<UndefValue>(TEltC) &&
-               isGuaranteedNotToBeUndefOrPoison(FEltC))
+      else if (isa<UndefValue>(TEltC))
         NewC.push_back(FEltC);
-      else if (isa<UndefValue>(FEltC) &&
-               isGuaranteedNotToBeUndefOrPoison(TEltC))
+      else if (isa<UndefValue>(FEltC))
         NewC.push_back(TEltC);
       else
         break;
diff --git a/llvm/lib/IR/ConstantFold.cpp b/llvm/lib/IR/ConstantFold.cpp
index f02246cda7fc6..f3c3e9ad9f696 100644
--- a/llvm/lib/IR/ConstantFold.cpp
+++ b/llvm/lib/IR/ConstantFold.cpp
@@ -779,30 +779,10 @@ Constant *llvm::ConstantFoldSelectInstruction(Constant *Cond,
     if (isa<UndefValue>(V1)) return V1;
     return V2;
   }
-
+  if (isa<UndefValue>(V1)) return V2;
+  if (isa<UndefValue>(V2)) return V1;
   if (V1 == V2) return V1;
 
-  // If the true or false value is undef, we can fold to the other value as
-  // long as the other value isn't poison.
-  auto NotPoison = [](Constant *C) {
-    // TODO: We can analyze ConstExpr by opcode to determine if there is any
-    //       possibility of poison.
-    if (isa<ConstantExpr>(C))
-      return false;
-
-    if (isa<ConstantInt>(C) || isa<GlobalVariable>(C) || isa<ConstantFP>(C) ||
-        isa<ConstantPointerNull>(C) || isa<Function>(C))
-      return true;
-
-    if (C->getType()->isVectorTy())
-      return !C->containsUndefElement() && !C->containsConstantExpression();
-
-    // TODO: Recursively analyze aggregates or other constants.
-    return false;
-  };
-  if (isa<UndefValue>(V1) && NotPoison(V2)) return V2;
-  if (isa<UndefValue>(V2) && NotPoison(V1)) return V1;
-
   if (ConstantExpr *TrueVal = dyn_cast<ConstantExpr>(V1)) {
     if (TrueVal->getOpcode() == Instruction::Select)
       if (TrueVal->getOperand(0) == Cond)
diff --git a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
index 3acd21c739585..1fa4bdc1964e8 100644
--- a/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
+++ b/llvm/test/Transforms/InferAddressSpaces/AMDGPU/select.ll
@@ -221,7 +221,7 @@ define amdgpu_kernel void @store_select_group_global_mismatch_inttoptr_flat_null
 }
 
 ; CHECK-LABEL: @store_select_group_global_mismatch_undef_undef_constexpr(
-; CHECK: store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* undef), align 4
+; CHECK: store i32 7, i32 addrspace(3)* null
 define amdgpu_kernel void @store_select_group_global_mismatch_undef_undef_constexpr() #0 {
   store i32 7, i32* select (i1 icmp eq (i32 ptrtoint (i32 addrspace(3)* @lds1 to i32), i32 4), i32* addrspacecast (i32 addrspace(3)* null to i32*), i32* addrspacecast (i32 addrspace(1)* undef to i32*)), align 4
   ret void
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 08e547a6ea0ad..8cd0e35139a8e 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -2437,14 +2437,13 @@ exit:
   ret i32 %sel
 }
 
-; Negative tests to ensure we don't remove selects with undef true/false values.
+; FIXME: We shouldn't remove selects with undef true/false values.
 ; See https://bugs.llvm.org/show_bug.cgi?id=31633
 ; https://lists.llvm.org/pipermail/llvm-dev/2016-October/106182.html
 ; https://reviews.llvm.org/D83360
 define i32 @false_undef(i1 %cond, i32 %x) {
 ; CHECK-LABEL: @false_undef(
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[COND:%.*]], i32 [[X:%.*]], i32 undef
-; CHECK-NEXT:    ret i32 [[S]]
+; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
   %s = select i1 %cond, i32 %x, i32 undef
   ret i32 %s
@@ -2452,8 +2451,7 @@ define i32 @false_undef(i1 %cond, i32 %x) {
 
 define i32 @true_undef(i1 %cond, i32 %x) {
 ; CHECK-LABEL: @true_undef(
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[COND:%.*]], i32 undef, i32 [[X:%.*]]
-; CHECK-NEXT:    ret i32 [[S]]
+; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
   %s = select i1 %cond, i32 undef, i32 %x
   ret i32 %s
@@ -2461,8 +2459,7 @@ define i32 @true_undef(i1 %cond, i32 %x) {
 
 define <2 x i32> @false_undef_vec(i1 %cond, <2 x i32> %x) {
 ; CHECK-LABEL: @false_undef_vec(
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[COND:%.*]], <2 x i32> [[X:%.*]], <2 x i32> undef
-; CHECK-NEXT:    ret <2 x i32> [[S]]
+; CHECK-NEXT:    ret <2 x i32> [[X:%.*]]
 ;
   %s = select i1 %cond, <2 x i32> %x, <2 x i32> undef
   ret <2 x i32> %s
@@ -2470,8 +2467,7 @@ define <2 x i32> @false_undef_vec(i1 %cond, <2 x i32> %x) {
 
 define <2 x i32> @true_undef_vec(i1 %cond, <2 x i32> %x) {
 ; CHECK-LABEL: @true_undef_vec(
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[COND:%.*]], <2 x i32> undef, <2 x i32> [[X:%.*]]
-; CHECK-NEXT:    ret <2 x i32> [[S]]
+; CHECK-NEXT:    ret <2 x i32> [[X:%.*]]
 ;
   %s = select i1 %cond, <2 x i32> undef, <2 x i32> %x
   ret <2 x i32> %s
diff --git a/llvm/test/Transforms/InstSimplify/select.ll b/llvm/test/Transforms/InstSimplify/select.ll
index 353f2e6a6753d..b1264138a15ea 100644
--- a/llvm/test/Transforms/InstSimplify/select.ll
+++ b/llvm/test/Transforms/InstSimplify/select.ll
@@ -751,14 +751,13 @@ define i1 @y_might_be_poison(float %x, float %y) {
   ret i1 %c3
 }
 
-; Negative tests to ensure we don't remove selects with undef true/false values.
+; FIXME: We shouldn't remove selects with undef true/false values.
 ; See https://bugs.llvm.org/show_bug.cgi?id=31633
 ; https://lists.llvm.org/pipermail/llvm-dev/2016-October/106182.html
 ; https://reviews.llvm.org/D83360
 define i32 @false_undef(i1 %cond, i32 %x) {
 ; CHECK-LABEL: @false_undef(
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[COND:%.*]], i32 [[X:%.*]], i32 undef
-; CHECK-NEXT:    ret i32 [[S]]
+; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
   %s = select i1 %cond, i32 %x, i32 undef
   ret i32 %s
@@ -766,8 +765,7 @@ define i32 @false_undef(i1 %cond, i32 %x) {
 
 define i32 @true_undef(i1 %cond, i32 %x) {
 ; CHECK-LABEL: @true_undef(
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[COND:%.*]], i32 undef, i32 [[X:%.*]]
-; CHECK-NEXT:    ret i32 [[S]]
+; CHECK-NEXT:    ret i32 [[X:%.*]]
 ;
   %s = select i1 %cond, i32 undef, i32 %x
   ret i32 %s
@@ -775,8 +773,7 @@ define i32 @true_undef(i1 %cond, i32 %x) {
 
 define <2 x i32> @false_undef_vec(i1 %cond, <2 x i32> %x) {
 ; CHECK-LABEL: @false_undef_vec(
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[COND:%.*]], <2 x i32> [[X:%.*]], <2 x i32> undef
-; CHECK-NEXT:    ret <2 x i32> [[S]]
+; CHECK-NEXT:    ret <2 x i32> [[X:%.*]]
 ;
   %s = select i1 %cond, <2 x i32> %x, <2 x i32> undef
   ret <2 x i32> %s
@@ -784,8 +781,7 @@ define <2 x i32> @false_undef_vec(i1 %cond, <2 x i32> %x) {
 
 define <2 x i32> @true_undef_vec(i1 %cond, <2 x i32> %x) {
 ; CHECK-LABEL: @true_undef_vec(
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[COND:%.*]], <2 x i32> undef, <2 x i32> [[X:%.*]]
-; CHECK-NEXT:    ret <2 x i32> [[S]]
+; CHECK-NEXT:    ret <2 x i32> [[X:%.*]]
 ;
   %s = select i1 %cond, <2 x i32> undef, <2 x i32> %x
   ret <2 x i32> %s
@@ -847,13 +843,12 @@ define i32 @false_undef_false_freeze(i1 %cond, i32 %x) {
 
 @g = external global i32, align 1
 
-; Make sure we don't fold partial undef vectors when constexprs are involved.
+; FIXME: We shouldn't fold partial undef vectors when constexprs are involved.
 ; We would need to prove the constexpr doesn't result in poison which we aren't
 ; equiped to do yet.
 define <2 x i32> @false_undef_true_constextpr_vec(i1 %cond) {
 ; CHECK-LABEL: @false_undef_true_constextpr_vec(
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[COND:%.*]], <2 x i32> <i32 undef, i32 ptrtoint (i32* @g to i32)>, <2 x i32> <i32 ptrtoint (i32* @g to i32), i32 undef>
-; CHECK-NEXT:    ret <2 x i32> [[S]]
+; CHECK-NEXT:    ret <2 x i32> <i32 ptrtoint (i32* @g to i32), i32 ptrtoint (i32* @g to i32)>
 ;
   %s = select i1 %cond, <2 x i32> <i32 undef, i32 ptrtoint (i32* @g to i32)>, <2 x i32> <i32 ptrtoint (i32* @g to i32), i32 undef>
   ret <2 x i32> %s
@@ -891,11 +886,10 @@ define <2 x float> @all_constant_false_undef_vec() {
   ret <2 x float> %s
 }
 
-; Negative tests. Don't fold if the non-undef operand is a constexpr.
+; FIXME: We shouldn't fold if the non-undef operand is a constexpr.
 define i32 @all_constant_false_undef_true_constexpr() {
 ; CHECK-LABEL: @all_constant_false_undef_true_constexpr(
-; CHECK-NEXT:    [[S:%.*]] = select i1 ptrtoint (i32 ()* @all_constant_false_undef_true_constexpr to i1), i32 ptrtoint (i32 ()* @all_constant_false_undef_true_constexpr to i32), i32 undef
-; CHECK-NEXT:    ret i32 [[S]]
+; CHECK-NEXT:    ret i32 ptrtoint (i32 ()* @all_constant_false_undef_true_constexpr to i32)
 ;
   %s = select i1 ptrtoint (i32 ()* @all_constant_false_undef_true_constexpr to i1), i32 ptrtoint (i32 ()* @all_constant_false_undef_true_constexpr to i32), i32 undef
   ret i32 %s
@@ -903,18 +897,16 @@ define i32 @all_constant_false_undef_true_constexpr() {
 
 define i32 @all_constant_true_undef_false_constexpr() {
 ; CHECK-LABEL: @all_constant_true_undef_false_constexpr(
-; CHECK-NEXT:    [[S:%.*]] = select i1 ptrtoint (i32 ()* @all_constant_true_undef_false_constexpr to i1), i32 undef, i32 ptrtoint (i32 ()* @all_constant_true_undef_false_constexpr to i32)
-; CHECK-NEXT:    ret i32 [[S]]
+; CHECK-NEXT:    ret i32 ptrtoint (i32 ()* @all_constant_true_undef_false_constexpr to i32)
 ;
   %s = select i1 ptrtoint (i32 ()* @all_constant_true_undef_false_constexpr to i1), i32 undef, i32 ptrtoint (i32 ()* @all_constant_true_undef_false_constexpr to i32)
   ret i32 %s
 }
 
-; Negative tests. Don't fold if the non-undef operand is a vector containing a constexpr.
+; FIXME: We shouldn't fold if the non-undef operand is a vector containing a constexpr.
 define <2 x i32> @all_constant_false_undef_true_constexpr_vec() {
 ; CHECK-LABEL: @all_constant_false_undef_true_constexpr_vec(
-; CHECK-NEXT:    [[S:%.*]] = select i1 ptrtoint (<2 x i32> ()* @all_constant_false_undef_true_constexpr_vec to i1), <2 x i32> <i32 ptrtoint (<2 x i32> ()* @all_constant_false_undef_true_constexpr_vec to i32), i32 -1>, <2 x i32> undef
-; CHECK-NEXT:    ret <2 x i32> [[S]]
+; CHECK-NEXT:    ret <2 x i32> <i32 ptrtoint (<2 x i32> ()* @all_constant_false_undef_true_constexpr_vec to i32), i32 -1>
 ;
   %s = select i1 ptrtoint (<2 x i32> ()* @all_constant_false_undef_true_constexpr_vec to i1), <2 x i32> <i32 ptrtoint (<2 x i32> ()* @all_constant_false_undef_true_constexpr_vec to i32), i32 -1>, <2 x i32> undef
   ret <2 x i32> %s
@@ -922,8 +914,7 @@ define <2 x i32> @all_constant_false_undef_true_constexpr_vec() {
 
 define <2 x i32> @all_constant_true_undef_false_constexpr_vec() {
 ; CHECK-LABEL: @all_constant_true_undef_false_constexpr_vec(
-; CHECK-NEXT:    [[S:%.*]] = select i1 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i1), <2 x i32> undef, <2 x i32> <i32 -1, i32 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i32)>
-; CHECK-NEXT:    ret <2 x i32> [[S]]
+; CHECK-NEXT:    ret <2 x i32> <i32 -1, i32 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i32)>
 ;
   %s = select i1 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i1), <2 x i32> undef, <2 x i32><i32 -1, i32 ptrtoint (<2 x i32> ()* @all_constant_true_undef_false_constexpr_vec to i32)>
   ret <2 x i32> %s

From 00472067c34ccbceb2fad4b905524f3c780bb7d5 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Thu, 16 Jul 2020 12:04:01 +0700
Subject: [PATCH 447/771] [InstCombine] Simplify boolean Phis with const inputs
 using CFG

This patch adds simplification for pattern:
```
  if (cond)
  /       \
 ...      ...
  \       /
p = phi [true] [false]
...
br p, succ_1, succ_2
```
If we can prove that top block's branches dominate respective
inputs of a block that has a Phi with constant inputs, we can
use the branch condition (maybe inverted) instead of Phi.
This will make proofs of implication for further jump threading
more transparent.

Differential Revision: https://reviews.llvm.org/D81375
Reviewed By: xbolva00
---
 .../Transforms/InstCombine/InstCombinePHI.cpp | 73 +++++++++++++++++++
 .../CallSiteSplitting/callsite-split.ll       |  4 +-
 llvm/test/Transforms/InstCombine/branch.ll    | 14 +---
 .../InstCombine/icmp-constant-phi.ll          | 10 +--
 llvm/test/Transforms/InstCombine/phi.ll       |  5 +-
 llvm/test/Transforms/InstCombine/select.ll    | 20 ++---
 .../InstCombine/simple_phi_condition.ll       | 18 ++---
 .../PhaseOrdering/simplifycfg-options.ll      |  8 +-
 8 files changed, 106 insertions(+), 46 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 2b2f2e1b9470f..30f12d60f4c2d 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1129,6 +1129,75 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
   return replaceInstUsesWith(FirstPhi, Undef);
 }
 
+static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
+                                       const DominatorTree &DT) {
+  // Simplify the following patterns:
+  //       if (cond)
+  //       /       \
+  //      ...      ...
+  //       \       /
+  //    phi [true] [false]
+  if (!PN.getType()->isIntegerTy(1))
+    return nullptr;
+
+  if (PN.getNumOperands() != 2)
+    return nullptr;
+
+  // Make sure all inputs are constants.
+  if (!all_of(PN.operands(), [](Value *V) { return isa<ConstantInt>(V); }))
+    return nullptr;
+
+  BasicBlock *BB = PN.getParent();
+  // Do not bother with unreachable instructions.
+  if (!DT.isReachableFromEntry(BB))
+    return nullptr;
+
+  // Same inputs.
+  if (PN.getOperand(0) == PN.getOperand(1))
+    return PN.getOperand(0);
+
+  BasicBlock *TruePred = nullptr, *FalsePred = nullptr;
+  for (auto *Pred : predecessors(BB)) {
+    auto *Input = cast<ConstantInt>(PN.getIncomingValueForBlock(Pred));
+    if (Input->isAllOnesValue())
+      TruePred = Pred;
+    else
+      FalsePred = Pred;
+  }
+  assert(TruePred && FalsePred && "Must be!");
+
+  // Check which edge of the dominator dominates the true input. If it is the
+  // false edge, we should invert the condition.
+  auto *IDom = DT.getNode(BB)->getIDom()->getBlock();
+  auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
+  if (!BI || BI->isUnconditional())
+    return nullptr;
+
+  // Check that edges outgoing from the idom's terminators dominate respective
+  // inputs of the Phi.
+  BasicBlockEdge TrueOutEdge(IDom, BI->getSuccessor(0));
+  BasicBlockEdge FalseOutEdge(IDom, BI->getSuccessor(1));
+
+  BasicBlockEdge TrueIncEdge(TruePred, BB);
+  BasicBlockEdge FalseIncEdge(FalsePred, BB);
+
+  auto *Cond = BI->getCondition();
+  if (DT.dominates(TrueOutEdge, TrueIncEdge) &&
+      DT.dominates(FalseOutEdge, FalseIncEdge))
+    // This Phi is actually equivalent to branching condition of IDom.
+    return Cond;
+  else if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
+           DT.dominates(FalseOutEdge, TrueIncEdge)) {
+    // This Phi is actually opposite to branching condition of IDom. We invert
+    // the condition that will potentially open up some opportunities for
+    // sinking.
+    Self.Builder.SetInsertPoint(BB->getFirstNonPHI());
+    return Self.Builder.CreateNot(Cond);
+  }
+
+  return nullptr;
+}
+
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
@@ -1276,5 +1345,9 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
     if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
       return Res;
 
+  // Ultimately, try to replace this Phi with a dominating condition.
+  if (auto *V = SimplifyUsingControlFlow(*this, PN, DT))
+    return replaceInstUsesWith(PN, V);
+
   return nullptr;
 }
diff --git a/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll b/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll
index 117464904ceb4..4147720dcb459 100644
--- a/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll
+++ b/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll
@@ -74,8 +74,8 @@ declare void @dummy1(%struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.
 ;CHECK-LABEL: NextCond.split:
 ;CHECK: call void @dummy3()
 ;CheCK-LABEL: CallSiteBB:
-;CHECK: %phi.call = phi i1 [ true, %NextCond.split ], [ false, %Top.split ]
-;CHECK: call void @foo(i1 %phi.call)
+;CHECK: [[NEG:%.*]] = xor i1 %tobool1, true
+;CHECK: call void @foo(i1 [[NEG]])
 define void @caller2(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, %struct.bitmap* %c_elt) {
 entry:
   br label %Top
diff --git a/llvm/test/Transforms/InstCombine/branch.ll b/llvm/test/Transforms/InstCombine/branch.ll
index 4e271725d53aa..af87d238c9da0 100644
--- a/llvm/test/Transforms/InstCombine/branch.ll
+++ b/llvm/test/Transforms/InstCombine/branch.ll
@@ -32,7 +32,6 @@ patatino:
   ret i32 %x
 }
 
-; TODO: Simplify this to "ret cond".
 define i1 @test01(i1 %cond) {
 ; CHECK-LABEL: @test01(
 ; CHECK-NEXT:  entry:
@@ -42,15 +41,13 @@ define i1 @test01(i1 %cond) {
 ; CHECK:       if.false.1:
 ; CHECK-NEXT:    br label [[MERGE_1]]
 ; CHECK:       merge.1:
-; CHECK-NEXT:    [[MERGE_COND_1:%.*]] = phi i1 [ true, [[IF_TRUE_1]] ], [ false, [[IF_FALSE_1]] ]
-; CHECK-NEXT:    br i1 [[MERGE_COND_1]], label [[IF_TRUE_2:%.*]], label [[IF_FALSE_2:%.*]]
+; CHECK-NEXT:    br i1 [[COND]], label [[IF_TRUE_2:%.*]], label [[IF_FALSE_2:%.*]]
 ; CHECK:       if.true.2:
 ; CHECK-NEXT:    br label [[MERGE_2:%.*]]
 ; CHECK:       if.false.2:
 ; CHECK-NEXT:    br label [[MERGE_2]]
 ; CHECK:       merge.2:
-; CHECK-NEXT:    [[MERGE_COND_2:%.*]] = phi i1 [ true, [[IF_TRUE_2]] ], [ false, [[IF_FALSE_2]] ]
-; CHECK-NEXT:    ret i1 [[MERGE_COND_2]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true.1, label %if.false.1
@@ -76,7 +73,6 @@ merge.2:
   ret i1 %merge.cond.2
 }
 
-; TODO: Simplify this to "ret %cond".
 define i1 @test02(i1 %cond) {
 ; CHECK-LABEL: @test02(
 ; CHECK-NEXT:  entry:
@@ -86,15 +82,13 @@ define i1 @test02(i1 %cond) {
 ; CHECK:       if.false.1:
 ; CHECK-NEXT:    br label [[MERGE_1]]
 ; CHECK:       merge.1:
-; CHECK-NEXT:    [[MERGE_COND_1:%.*]] = phi i1 [ false, [[IF_TRUE_1]] ], [ true, [[IF_FALSE_1]] ]
-; CHECK-NEXT:    br i1 [[MERGE_COND_1]], label [[IF_TRUE_2:%.*]], label [[IF_FALSE_2:%.*]]
+; CHECK-NEXT:    br i1 [[COND]], label [[IF_FALSE_2:%.*]], label [[IF_TRUE_2:%.*]]
 ; CHECK:       if.true.2:
 ; CHECK-NEXT:    br label [[MERGE_2:%.*]]
 ; CHECK:       if.false.2:
 ; CHECK-NEXT:    br label [[MERGE_2]]
 ; CHECK:       merge.2:
-; CHECK-NEXT:    [[MERGE_COND_2:%.*]] = phi i1 [ false, [[IF_TRUE_2]] ], [ true, [[IF_FALSE_2]] ]
-; CHECK-NEXT:    ret i1 [[MERGE_COND_2]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true.1, label %if.false.1
diff --git a/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll b/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll
index 7d4b9294143fa..d87ed5e6192a2 100644
--- a/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll
@@ -11,10 +11,10 @@ define i1 @test_eq(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[COMPARE:%.*]] = phi i1 [ true, [[IF_FALSE]] ], [ false, [[IF_TRUE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    ret i1 [[COMPARE]]
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT:    ret i1 [[TMP0]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -43,10 +43,9 @@ define i1 @test_slt(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[COMPARE:%.*]] = phi i1 [ false, [[IF_FALSE]] ], [ true, [[IF_TRUE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    ret i1 [[COMPARE]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -106,10 +105,9 @@ define i1 @test_ne(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[COMPARE:%.*]] = phi i1 [ false, [[IF_FALSE]] ], [ true, [[IF_TRUE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    ret i1 [[COMPARE]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index d2e028534d87f..7c3f3102e0bcc 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -416,10 +416,11 @@ bb1:        ; preds = %entry
 
 bb2:        ; preds = %bb1, %entry
     %cond = phi i1 [ true, %bb1 ], [ false, %entry ]        ; <i1> [#uses=1]
-; CHECK-NOT: %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]
+; CHECK-NOT: phi i1
+; CHECK:     %res = phi i32 [ %0, %bb1 ], [ 0, %entry ]
+; CHECK:     ret i32 %res
     %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]        ; <i32> [#uses=1]
     %res = select i1 %cond, i32 %val, i32 0        ; <i32> [#uses=1]
-; CHECK: ret i32 %cond
     ret i32 %res
 }
 
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 8cd0e35139a8e..1f16f92d83a64 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -448,8 +448,8 @@ define i32 @test25(i1 %c)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[A]]
+; CHECK-NEXT:    [[B:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[B]]
 ;
 entry:
   br i1 %c, label %jump, label %ret
@@ -468,8 +468,8 @@ define i32 @test26(i1 %cond)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 20, [[ENTRY:%.*]] ], [ 10, [[JUMP]] ]
-; CHECK-NEXT:    ret i32 [[A]]
+; CHECK-NEXT:    [[B:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[B]]
 ;
 entry:
   br i1 %cond, label %jump, label %ret
@@ -489,8 +489,8 @@ define i32 @test27(i1 %c, i32 %A, i32 %B)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[P]]
+; CHECK-NEXT:    [[S:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[S]]
 ;
 entry:
   br i1 %c, label %jump, label %ret
@@ -509,8 +509,8 @@ define i32 @test28(i1 %cond, i32 %A, i32 %B)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[P]]
+; CHECK-NEXT:    [[S:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[S]]
 ;
 entry:
   br i1 %cond, label %jump, label %ret
@@ -530,10 +530,10 @@ define i32 @test29(i1 %cond, i32 %A, i32 %B)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[S:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[NEXT:%.*]]
 ; CHECK:       next:
-; CHECK-NEXT:    ret i32 [[P]]
+; CHECK-NEXT:    ret i32 [[S]]
 ;
 entry:
   br i1 %cond, label %jump, label %ret
diff --git a/llvm/test/Transforms/InstCombine/simple_phi_condition.ll b/llvm/test/Transforms/InstCombine/simple_phi_condition.ll
index cded618fba161..37f7fe77b7e3e 100644
--- a/llvm/test/Transforms/InstCombine/simple_phi_condition.ll
+++ b/llvm/test/Transforms/InstCombine/simple_phi_condition.ll
@@ -2,7 +2,6 @@
 ; RUN: opt -S < %s -instcombine | FileCheck %s
 ; RUN: opt -S < %s -passes=instcombine | FileCheck %s
 
-; TODO: Simplify to "ret cond".
 define i1 @test_direct_implication(i1 %cond) {
 ; CHECK-LABEL: @test_direct_implication(
 ; CHECK-NEXT:  entry:
@@ -12,8 +11,7 @@ define i1 @test_direct_implication(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ true, [[IF_TRUE]] ], [ false, [[IF_FALSE]] ]
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -29,7 +27,6 @@ merge:
   ret i1 %ret
 }
 
-; TODO: Simplify to "ret !cond".
 define i1 @test_inverted_implication(i1 %cond) {
 ; CHECK-LABEL: @test_inverted_implication(
 ; CHECK-NEXT:  entry:
@@ -39,8 +36,8 @@ define i1 @test_inverted_implication(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ false, [[IF_TRUE]] ], [ true, [[IF_FALSE]] ]
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT:    ret i1 [[TMP0]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -56,7 +53,6 @@ merge:
   ret i1 %ret
 }
 
-; TODO: Simplify to "ret cond".
 define i1 @test_direct_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK-LABEL: @test_direct_implication_complex_cfg(
 ; CHECK-NEXT:  entry:
@@ -73,8 +69,7 @@ define i1 @test_direct_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ true, [[IF_TRUE_END]] ], [ false, [[IF_FALSE]] ]
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -99,7 +94,6 @@ merge:
   ret i1 %ret
 }
 
-; TODO: Simplify to "ret !cond".
 define i1 @test_inverted_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK-LABEL: @test_inverted_implication_complex_cfg(
 ; CHECK-NEXT:  entry:
@@ -116,8 +110,8 @@ define i1 @test_inverted_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ false, [[IF_TRUE_END]] ], [ true, [[IF_FALSE]] ]
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT:    ret i1 [[TMP0]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
diff --git a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
index 6b3ba66c951eb..0115c68342773 100644
--- a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
+++ b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
@@ -18,16 +18,16 @@ define i1 @PR33605(i32 %a, i32 %b, i32* %c) {
 ; ALL-NEXT:    call void @foo()
 ; ALL-NEXT:    br label [[IF_END]]
 ; ALL:       if.end:
-; ALL-NEXT:    [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[ENTRY:%.*]] ]
-; ALL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[C]], align 4
-; ALL-NEXT:    [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP1]]
+; ALL-NEXT:    [[TMP1:%.*]] = xor i1 [[CMP]], true
+; ALL-NEXT:    [[TMP2:%.*]] = load i32, i32* [[C]], align 4
+; ALL-NEXT:    [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP2]]
 ; ALL-NEXT:    br i1 [[CMP_1]], label [[IF_END_1:%.*]], label [[IF_THEN_1:%.*]]
 ; ALL:       if.then.1:
 ; ALL-NEXT:    store i32 [[OR]], i32* [[C]], align 4
 ; ALL-NEXT:    call void @foo()
 ; ALL-NEXT:    br label [[IF_END_1]]
 ; ALL:       if.end.1:
-; ALL-NEXT:    [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[CHANGED_1_OFF0]], [[IF_END]] ]
+; ALL-NEXT:    [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[TMP1]], [[IF_END]] ]
 ; ALL-NEXT:    ret i1 [[CHANGED_1_OFF0_1]]
 ;
 entry:

From fc55308628709bfc64b100dadf9a030fbb2afaee Mon Sep 17 00:00:00 2001
From: Amy Kwan <amy.kwan1@ibm.com>
Date: Thu, 16 Jul 2020 00:10:54 -0500
Subject: [PATCH 448/771] [PowerPC][Power10] Fix VINS* (vector insert
 byte/half/word) instructions to have i32 arguments.

Previously, the vins* intrinsic was incorrectly defined to have its second and
third argument arguments as an i64. This patch fixes the second and third
argument of the vins* instruction and intrinsic to have i32s instead.

Differential Revision: https://reviews.llvm.org/D83497
---
 clang/include/clang/Basic/BuiltinsPPC.def     | 12 +--
 clang/test/CodeGen/builtins-ppc-p10vector.c   | 24 +++---
 llvm/include/llvm/IR/IntrinsicsPowerPC.td     | 14 ++--
 llvm/lib/Target/PowerPC/PPCInstrPrefix.td     | 74 +++++++++----------
 .../PowerPC/builtins-ppc-p10permute.ll        | 42 +++++------
 5 files changed, 80 insertions(+), 86 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def
index 6b291e6b08063..5d445c253a855 100644
--- a/clang/include/clang/Basic/BuiltinsPPC.def
+++ b/clang/include/clang/Basic/BuiltinsPPC.def
@@ -321,12 +321,12 @@ BUILTIN(__builtin_altivec_vsldbi, "V16UcV16UcV16UcIi", "")
 BUILTIN(__builtin_altivec_vsrdbi, "V16UcV16UcV16UcIi", "")
 
 // P10 Vector Insert built-ins.
-BUILTIN(__builtin_altivec_vinsblx, "V16UcV16UcULLiULLi", "")
-BUILTIN(__builtin_altivec_vinsbrx, "V16UcV16UcULLiULLi", "")
-BUILTIN(__builtin_altivec_vinshlx, "V8UsV8UsULLiULLi", "")
-BUILTIN(__builtin_altivec_vinshrx, "V8UsV8UsULLiULLi", "")
-BUILTIN(__builtin_altivec_vinswlx, "V4UiV4UiULLiULLi", "")
-BUILTIN(__builtin_altivec_vinswrx, "V4UiV4UiULLiULLi", "")
+BUILTIN(__builtin_altivec_vinsblx, "V16UcV16UcUiUi", "")
+BUILTIN(__builtin_altivec_vinsbrx, "V16UcV16UcUiUi", "")
+BUILTIN(__builtin_altivec_vinshlx, "V8UsV8UsUiUi", "")
+BUILTIN(__builtin_altivec_vinshrx, "V8UsV8UsUiUi", "")
+BUILTIN(__builtin_altivec_vinswlx, "V4UiV4UiUiUi", "")
+BUILTIN(__builtin_altivec_vinswrx, "V4UiV4UiUiUi", "")
 BUILTIN(__builtin_altivec_vinsdlx, "V2ULLiV2ULLiULLiULLi", "")
 BUILTIN(__builtin_altivec_vinsdrx, "V2ULLiV2ULLiULLiULLi", "")
 BUILTIN(__builtin_altivec_vinsbvlx, "V16UcV16UcULLiV16Uc", "")
diff --git a/clang/test/CodeGen/builtins-ppc-p10vector.c b/clang/test/CodeGen/builtins-ppc-p10vector.c
index c51c24f259862..4e804fbafb301 100644
--- a/clang/test/CodeGen/builtins-ppc-p10vector.c
+++ b/clang/test/CodeGen/builtins-ppc-p10vector.c
@@ -402,25 +402,25 @@ vector double test_vec_blend_d(void) {
 }
 
 vector unsigned char test_vec_insertl_uc(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <16 x i8>
-  // CHECK-LE: @llvm.ppc.altivec.vinsbrx(<16 x i8> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinsbrx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <16 x i8>
   return vec_insertl(uca, vuca, uia);
 }
 
 vector unsigned short test_vec_insertl_us(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinshlx(<8 x i16> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinshlx(<8 x i16> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <8 x i16>
-  // CHECK-LE: @llvm.ppc.altivec.vinshrx(<8 x i16> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinshrx(<8 x i16> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <8 x i16>
   return vec_insertl(usa, vusa, uia);
 }
 
 vector unsigned int test_vec_insertl_ui(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinswlx(<4 x i32> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinswlx(<4 x i32> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <4 x i32>
-  // CHECK-LE: @llvm.ppc.altivec.vinswrx(<4 x i32> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinswrx(<4 x i32> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <4 x i32>
   return vec_insertl(uib, vuia, uia);
 }
@@ -458,25 +458,25 @@ vector unsigned int test_vec_insertl_uiv(void) {
 }
 
 vector unsigned char test_vec_inserth_uc(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinsbrx(<16 x i8> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinsbrx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <16 x i8>
-  // CHECK-LE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinsblx(<16 x i8> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <16 x i8>
   return vec_inserth(uca, vuca, uia);
 }
 
 vector unsigned short test_vec_inserth_us(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinshrx(<8 x i16> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinshrx(<8 x i16> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <8 x i16>
-  // CHECK-LE: @llvm.ppc.altivec.vinshlx(<8 x i16> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinshlx(<8 x i16> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <8 x i16>
   return vec_inserth(usa, vusa, uia);
 }
 
 vector unsigned int test_vec_inserth_ui(void) {
-  // CHECK-BE: @llvm.ppc.altivec.vinswrx(<4 x i32> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-BE: @llvm.ppc.altivec.vinswrx(<4 x i32> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-BE-NEXT: ret <4 x i32>
-  // CHECK-LE: @llvm.ppc.altivec.vinswlx(<4 x i32> %{{.+}}, i64 %{{.+}}, i64
+  // CHECK-LE: @llvm.ppc.altivec.vinswlx(<4 x i32> %{{.+}}, i32 %{{.+}}, i32
   // CHECK-LE-NEXT: ret <4 x i32>
   return vec_inserth(uib, vuia, uia);
 }
diff --git a/llvm/include/llvm/IR/IntrinsicsPowerPC.td b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
index 614a29049686a..2abb6b4e55fe7 100644
--- a/llvm/include/llvm/IR/IntrinsicsPowerPC.td
+++ b/llvm/include/llvm/IR/IntrinsicsPowerPC.td
@@ -468,27 +468,27 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   // P10 Vector Insert.
   def int_ppc_altivec_vinsblx : GCCBuiltin<"__builtin_altivec_vinsblx">,
               Intrinsic<[llvm_v16i8_ty],
-                        [llvm_v16i8_ty, llvm_i64_ty, llvm_i64_ty],
+                        [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
                         [IntrNoMem]>;
   def int_ppc_altivec_vinsbrx : GCCBuiltin<"__builtin_altivec_vinsbrx">,
               Intrinsic<[llvm_v16i8_ty],
-                        [llvm_v16i8_ty, llvm_i64_ty, llvm_i64_ty],
+                        [llvm_v16i8_ty, llvm_i32_ty, llvm_i32_ty],
                         [IntrNoMem]>;
   def int_ppc_altivec_vinshlx : GCCBuiltin<"__builtin_altivec_vinshlx">,
               Intrinsic<[llvm_v8i16_ty],
-                        [llvm_v8i16_ty, llvm_i64_ty, llvm_i64_ty],
+                        [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty],
                         [IntrNoMem]>;
   def int_ppc_altivec_vinshrx : GCCBuiltin<"__builtin_altivec_vinshrx">,
               Intrinsic<[llvm_v8i16_ty],
-                        [llvm_v8i16_ty, llvm_i64_ty, llvm_i64_ty],
+                        [llvm_v8i16_ty, llvm_i32_ty, llvm_i32_ty],
                         [IntrNoMem]>;
   def int_ppc_altivec_vinswlx : GCCBuiltin<"__builtin_altivec_vinswlx">,
               Intrinsic<[llvm_v4i32_ty],
-                        [llvm_v4i32_ty, llvm_i64_ty, llvm_i64_ty],
+                        [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
                         [IntrNoMem]>;
   def int_ppc_altivec_vinswrx : GCCBuiltin<"__builtin_altivec_vinswrx">,
               Intrinsic<[llvm_v4i32_ty],
-                        [llvm_v4i32_ty, llvm_i64_ty, llvm_i64_ty],
+                        [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
                         [IntrNoMem]>;
   def int_ppc_altivec_vinsdlx : GCCBuiltin<"__builtin_altivec_vinsdlx">,
               Intrinsic<[llvm_v2i64_ty],
@@ -525,7 +525,7 @@ let TargetPrefix = "ppc" in {  // All intrinsics start with "llvm.ppc.".
   // P10 Vector Insert with immediate.
   def int_ppc_altivec_vinsw :
               Intrinsic<[llvm_v4i32_ty],
-                        [llvm_v4i32_ty, llvm_i64_ty, llvm_i32_ty],
+                        [llvm_v4i32_ty, llvm_i32_ty, llvm_i32_ty],
                         [IntrNoMem, ImmArg<ArgIndex<2>>]>;
   def int_ppc_altivec_vinsd :
               Intrinsic<[llvm_v2i64_ty],
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 2bab73418e10d..1759570469e82 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -242,15 +242,6 @@ class VXForm_RD5_N3_VB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
 }
 
 
-// VX-Form: [PO VRT / UIM RB XO].
-// We use VXForm_1 to implement it, that is, we use "VRA" (5 bit) to represent
-// "/ UIM" (unused bit followed by a 4-bit immediate)
-// Destructive (insert) forms are suffixed with _ins.
-class VXForm_VRT5_UIM5_RB5_ins<bits<11> xo, string opc, list<dag> pattern>
-  : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB),
-             !strconcat(opc, " $vD, $rB, $UIM"), IIC_VecGeneral, pattern>,
-             RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
-
 // VX-Form: [PO VRT RA VRB XO].
 // Destructive (insert) forms are suffixed with _ins.
 class VXForm_VTB5_RA5_ins<bits<11> xo, string opc, list<dag> pattern>
@@ -261,7 +252,7 @@ class VXForm_VTB5_RA5_ins<bits<11> xo, string opc, list<dag> pattern>
 // VX-Form: [PO VRT RA RB XO].
 // Destructive (insert) forms are suffixed with _ins.
 class VXForm_VRT5_RAB5_ins<bits<11> xo, string opc, list<dag> pattern>
-  : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+  : VXForm_1<xo, (outs vrrc:$vD), (ins vrrc:$vDi, gprc:$rA, gprc:$rB),
              !strconcat(opc, " $vD, $rA, $rB"), IIC_VecGeneral, pattern>,
              RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
 
@@ -813,16 +804,18 @@ let Predicates = [IsISA3_1] in {
                                       (int_ppc_altivec_vsrdbi v16i8:$VRA,
                                                               v16i8:$VRB, 
                                                               i32:$SH))]>;
-  def VINSW : 
-    VXForm_VRT5_UIM5_RB5_ins<207, "vinsw",
-                             [(set v4i32:$vD,
-                                   (int_ppc_altivec_vinsw v4i32:$vDi, i64:$rB,
-                                                          timm:$UIM))]>;
+  def VINSW :
+    VXForm_1<207, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, gprc:$rB),
+             "vinsw $vD, $rB, $UIM", IIC_VecGeneral,
+             [(set v4i32:$vD,
+                   (int_ppc_altivec_vinsw v4i32:$vDi, i32:$rB, timm:$UIM))]>,
+             RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
   def VINSD :
-    VXForm_VRT5_UIM5_RB5_ins<463, "vinsd",
-                             [(set v2i64:$vD,
-                                   (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB,
-                                                          timm:$UIM))]>;
+    VXForm_1<463, (outs vrrc:$vD), (ins vrrc:$vDi, u4imm:$UIM, g8rc:$rB),
+             "vinsd $vD, $rB, $UIM", IIC_VecGeneral,
+             [(set v2i64:$vD,
+                   (int_ppc_altivec_vinsd v2i64:$vDi, i64:$rB, timm:$UIM))]>,
+             RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
   def VINSBVLX :
     VXForm_VTB5_RA5_ins<15, "vinsbvlx",
                         [(set v16i8:$vD,
@@ -856,44 +849,45 @@ let Predicates = [IsISA3_1] in {
   def VINSBLX :
     VXForm_VRT5_RAB5_ins<527, "vinsblx",
                          [(set v16i8:$vD,
-                               (int_ppc_altivec_vinsblx v16i8:$vDi, i64:$rA,
-                                                        i64:$rB))]>;
+                               (int_ppc_altivec_vinsblx v16i8:$vDi, i32:$rA,
+                                                        i32:$rB))]>;
   def VINSBRX :
     VXForm_VRT5_RAB5_ins<783, "vinsbrx",
                          [(set v16i8:$vD,
-                               (int_ppc_altivec_vinsbrx v16i8:$vDi, i64:$rA,
-                                                        i64:$rB))]>;
+                               (int_ppc_altivec_vinsbrx v16i8:$vDi, i32:$rA,
+                                                        i32:$rB))]>;
   def VINSHLX :
     VXForm_VRT5_RAB5_ins<591, "vinshlx",
                          [(set v8i16:$vD,
-                               (int_ppc_altivec_vinshlx v8i16:$vDi, i64:$rA,
-                                                        i64:$rB))]>;
+                               (int_ppc_altivec_vinshlx v8i16:$vDi, i32:$rA,
+                                                        i32:$rB))]>;
   def VINSHRX :
     VXForm_VRT5_RAB5_ins<847, "vinshrx",
                          [(set v8i16:$vD,
-                               (int_ppc_altivec_vinshrx v8i16:$vDi, i64:$rA,
-                                                        i64:$rB))]>;
+                               (int_ppc_altivec_vinshrx v8i16:$vDi, i32:$rA,
+                                                        i32:$rB))]>;
   def VINSWLX :
     VXForm_VRT5_RAB5_ins<655, "vinswlx",
                          [(set v4i32:$vD,
-                               (int_ppc_altivec_vinswlx v4i32:$vDi, i64:$rA,
-                                                        i64:$rB))]>;
+                               (int_ppc_altivec_vinswlx v4i32:$vDi, i32:$rA,
+                                                        i32:$rB))]>;
   def VINSWRX :
     VXForm_VRT5_RAB5_ins<911, "vinswrx",
                          [(set v4i32:$vD,
-                               (int_ppc_altivec_vinswrx v4i32:$vDi, i64:$rA,
-                                                        i64:$rB))]>;
+                               (int_ppc_altivec_vinswrx v4i32:$vDi, i32:$rA,
+                                                        i32:$rB))]>;
   def VINSDLX :
-    VXForm_VRT5_RAB5_ins<719, "vinsdlx",
-                         [(set v2i64:$vD,
-                               (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA,
-                                                        i64:$rB))]>;
+    VXForm_1<719, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+             "vinsdlx $vD, $rA, $rB", IIC_VecGeneral,
+              [(set v2i64:$vD,
+                    (int_ppc_altivec_vinsdlx v2i64:$vDi, i64:$rA, i64:$rB))]>,
+              RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
   def VINSDRX :
-    VXForm_VRT5_RAB5_ins<975, "vinsdrx",
-                         [(set v2i64:$vD,
-                               (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA,
-                                                        i64:$rB))]>;
-
+    VXForm_1<975, (outs vrrc:$vD), (ins vrrc:$vDi, g8rc:$rA, g8rc:$rB),
+             "vinsdrx $vD, $rA, $rB", IIC_VecGeneral,
+              [(set v2i64:$vD,
+                    (int_ppc_altivec_vinsdrx v2i64:$vDi, i64:$rA, i64:$rB))]>,
+              RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
    def VPDEPD : VXForm_1<1485, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
                          "vpdepd $vD, $vA, $vB", IIC_VecGeneral,
                          [(set v2i64:$vD,
diff --git a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll
index 84bf4032aa34f..3e4a509dc943b 100644
--- a/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll
+++ b/llvm/test/CodeGen/PowerPC/builtins-ppc-p10permute.ll
@@ -82,71 +82,71 @@ entry:
 }
 declare <2 x i64> @llvm.ppc.vsx.xxblendvd(<2 x i64>, <2 x i64>, <2 x i64>)
 
-define <16 x i8> @testVINSBLX(<16 x i8> %a, i64 %b, i64 %c) {
+define <16 x i8> @testVINSBLX(<16 x i8> %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: testVINSBLX:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vinsblx v2, r5, r6
 ; CHECK-NEXT:    blr
 entry:
-  %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsblx(<16 x i8> %a, i64 %b, i64 %c)
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsblx(<16 x i8> %a, i32 %b, i32 %c)
   ret <16 x i8> %0
 }
-declare <16 x i8> @llvm.ppc.altivec.vinsblx(<16 x i8>, i64, i64)
+declare <16 x i8> @llvm.ppc.altivec.vinsblx(<16 x i8>, i32, i32)
 
-define <16 x i8> @testVINSBRX(<16 x i8> %a, i64 %b, i64 %c) {
+define <16 x i8> @testVINSBRX(<16 x i8> %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: testVINSBRX:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vinsbrx v2, r5, r6
 ; CHECK-NEXT:    blr
 entry:
-  %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsbrx(<16 x i8> %a, i64 %b, i64 %c)
+  %0 = tail call <16 x i8> @llvm.ppc.altivec.vinsbrx(<16 x i8> %a, i32 %b, i32 %c)
   ret <16 x i8> %0
 }
-declare <16 x i8> @llvm.ppc.altivec.vinsbrx(<16 x i8>, i64, i64)
+declare <16 x i8> @llvm.ppc.altivec.vinsbrx(<16 x i8>, i32, i32)
 
-define <8 x i16> @testVINSHLX(<8 x i16> %a, i64 %b, i64 %c) {
+define <8 x i16> @testVINSHLX(<8 x i16> %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: testVINSHLX:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vinshlx v2, r5, r6
 ; CHECK-NEXT:    blr
 entry:
-  %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshlx(<8 x i16> %a, i64 %b, i64 %c)
+  %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshlx(<8 x i16> %a, i32 %b, i32 %c)
   ret <8 x i16> %0
 }
-declare <8 x i16> @llvm.ppc.altivec.vinshlx(<8 x i16>, i64, i64)
+declare <8 x i16> @llvm.ppc.altivec.vinshlx(<8 x i16>, i32, i32)
 
-define <8 x i16> @testVINSHRX(<8 x i16> %a, i64 %b, i64 %c) {
+define <8 x i16> @testVINSHRX(<8 x i16> %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: testVINSHRX:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vinshrx v2, r5, r6
 ; CHECK-NEXT:    blr
 entry:
-  %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshrx(<8 x i16> %a, i64 %b, i64 %c)
+  %0 = tail call <8 x i16> @llvm.ppc.altivec.vinshrx(<8 x i16> %a, i32 %b, i32 %c)
   ret <8 x i16> %0
 }
-declare <8 x i16> @llvm.ppc.altivec.vinshrx(<8 x i16>, i64, i64)
+declare <8 x i16> @llvm.ppc.altivec.vinshrx(<8 x i16>, i32, i32)
 
-define <4 x i32> @testVINSWLX(<4 x i32> %a, i64 %b, i64 %c) {
+define <4 x i32> @testVINSWLX(<4 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: testVINSWLX:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vinswlx v2, r5, r6
 ; CHECK-NEXT:    blr
 entry:
-  %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswlx(<4 x i32> %a, i64 %b, i64 %c)
+  %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswlx(<4 x i32> %a, i32 %b, i32 %c)
   ret <4 x i32> %0
 }
-declare <4 x i32> @llvm.ppc.altivec.vinswlx(<4 x i32>, i64, i64)
+declare <4 x i32> @llvm.ppc.altivec.vinswlx(<4 x i32>, i32, i32)
 
-define <4 x i32> @testVINSWRX(<4 x i32> %a, i64 %b, i64 %c) {
+define <4 x i32> @testVINSWRX(<4 x i32> %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: testVINSWRX:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vinswrx v2, r5, r6
 ; CHECK-NEXT:    blr
 entry:
-  %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswrx(<4 x i32> %a, i64 %b, i64 %c)
+  %0 = tail call <4 x i32> @llvm.ppc.altivec.vinswrx(<4 x i32> %a, i32 %b, i32 %c)
   ret <4 x i32> %0
 }
-declare <4 x i32> @llvm.ppc.altivec.vinswrx(<4 x i32>, i64, i64)
+declare <4 x i32> @llvm.ppc.altivec.vinswrx(<4 x i32>, i32, i32)
 
 define <2 x i64> @testVINSDLX(<2 x i64> %a, i64 %b, i64 %c) {
 ; CHECK-LABEL: testVINSDLX:
@@ -232,16 +232,16 @@ entry:
 }
 declare <4 x i32> @llvm.ppc.altivec.vinswvrx(<4 x i32>, i64, <4 x i32>)
 
-define <4 x i32> @testVINSW(<4 x i32> %a, i64 %b) {
+define <4 x i32> @testVINSW(<4 x i32> %a, i32 %b) {
 ; CHECK-LABEL: testVINSW:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    vinsw v2, r5, 1
 ; CHECK-NEXT:    blr
 entry:
-  %0 = tail call <4 x i32> @llvm.ppc.altivec.vinsw(<4 x i32> %a, i64 %b, i32 1)
+  %0 = tail call <4 x i32> @llvm.ppc.altivec.vinsw(<4 x i32> %a, i32 %b, i32 1)
   ret <4 x i32> %0
 }
-declare <4 x i32> @llvm.ppc.altivec.vinsw(<4 x i32>, i64, i32 immarg)
+declare <4 x i32> @llvm.ppc.altivec.vinsw(<4 x i32>, i32, i32 immarg)
 
 define <2 x i64> @testVINSD(<2 x i64> %a, i64 %b) {
 ; CHECK-LABEL: testVINSD:

From b893822e32ffe3c1dcf4d5ac0571a282582d72b2 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Thu, 16 Jul 2020 12:58:39 +0700
Subject: [PATCH 449/771] Revert "[InstCombine] Simplify boolean Phis with
 const inputs using CFG"

This reverts commit 00472067c34ccbceb2fad4b905524f3c780bb7d5.

Need to fix failing clang tests.
---
 .../Transforms/InstCombine/InstCombinePHI.cpp | 73 -------------------
 .../CallSiteSplitting/callsite-split.ll       |  4 +-
 llvm/test/Transforms/InstCombine/branch.ll    | 14 +++-
 .../InstCombine/icmp-constant-phi.ll          | 10 ++-
 llvm/test/Transforms/InstCombine/phi.ll       |  5 +-
 llvm/test/Transforms/InstCombine/select.ll    | 20 ++---
 .../InstCombine/simple_phi_condition.ll       | 18 +++--
 .../PhaseOrdering/simplifycfg-options.ll      |  8 +-
 8 files changed, 46 insertions(+), 106 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 30f12d60f4c2d..2b2f2e1b9470f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1129,75 +1129,6 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
   return replaceInstUsesWith(FirstPhi, Undef);
 }
 
-static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
-                                       const DominatorTree &DT) {
-  // Simplify the following patterns:
-  //       if (cond)
-  //       /       \
-  //      ...      ...
-  //       \       /
-  //    phi [true] [false]
-  if (!PN.getType()->isIntegerTy(1))
-    return nullptr;
-
-  if (PN.getNumOperands() != 2)
-    return nullptr;
-
-  // Make sure all inputs are constants.
-  if (!all_of(PN.operands(), [](Value *V) { return isa<ConstantInt>(V); }))
-    return nullptr;
-
-  BasicBlock *BB = PN.getParent();
-  // Do not bother with unreachable instructions.
-  if (!DT.isReachableFromEntry(BB))
-    return nullptr;
-
-  // Same inputs.
-  if (PN.getOperand(0) == PN.getOperand(1))
-    return PN.getOperand(0);
-
-  BasicBlock *TruePred = nullptr, *FalsePred = nullptr;
-  for (auto *Pred : predecessors(BB)) {
-    auto *Input = cast<ConstantInt>(PN.getIncomingValueForBlock(Pred));
-    if (Input->isAllOnesValue())
-      TruePred = Pred;
-    else
-      FalsePred = Pred;
-  }
-  assert(TruePred && FalsePred && "Must be!");
-
-  // Check which edge of the dominator dominates the true input. If it is the
-  // false edge, we should invert the condition.
-  auto *IDom = DT.getNode(BB)->getIDom()->getBlock();
-  auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
-  if (!BI || BI->isUnconditional())
-    return nullptr;
-
-  // Check that edges outgoing from the idom's terminators dominate respective
-  // inputs of the Phi.
-  BasicBlockEdge TrueOutEdge(IDom, BI->getSuccessor(0));
-  BasicBlockEdge FalseOutEdge(IDom, BI->getSuccessor(1));
-
-  BasicBlockEdge TrueIncEdge(TruePred, BB);
-  BasicBlockEdge FalseIncEdge(FalsePred, BB);
-
-  auto *Cond = BI->getCondition();
-  if (DT.dominates(TrueOutEdge, TrueIncEdge) &&
-      DT.dominates(FalseOutEdge, FalseIncEdge))
-    // This Phi is actually equivalent to branching condition of IDom.
-    return Cond;
-  else if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
-           DT.dominates(FalseOutEdge, TrueIncEdge)) {
-    // This Phi is actually opposite to branching condition of IDom. We invert
-    // the condition that will potentially open up some opportunities for
-    // sinking.
-    Self.Builder.SetInsertPoint(BB->getFirstNonPHI());
-    return Self.Builder.CreateNot(Cond);
-  }
-
-  return nullptr;
-}
-
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
@@ -1345,9 +1276,5 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
     if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
       return Res;
 
-  // Ultimately, try to replace this Phi with a dominating condition.
-  if (auto *V = SimplifyUsingControlFlow(*this, PN, DT))
-    return replaceInstUsesWith(PN, V);
-
   return nullptr;
 }
diff --git a/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll b/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll
index 4147720dcb459..117464904ceb4 100644
--- a/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll
+++ b/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll
@@ -74,8 +74,8 @@ declare void @dummy1(%struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.
 ;CHECK-LABEL: NextCond.split:
 ;CHECK: call void @dummy3()
 ;CheCK-LABEL: CallSiteBB:
-;CHECK: [[NEG:%.*]] = xor i1 %tobool1, true
-;CHECK: call void @foo(i1 [[NEG]])
+;CHECK: %phi.call = phi i1 [ true, %NextCond.split ], [ false, %Top.split ]
+;CHECK: call void @foo(i1 %phi.call)
 define void @caller2(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, %struct.bitmap* %c_elt) {
 entry:
   br label %Top
diff --git a/llvm/test/Transforms/InstCombine/branch.ll b/llvm/test/Transforms/InstCombine/branch.ll
index af87d238c9da0..4e271725d53aa 100644
--- a/llvm/test/Transforms/InstCombine/branch.ll
+++ b/llvm/test/Transforms/InstCombine/branch.ll
@@ -32,6 +32,7 @@ patatino:
   ret i32 %x
 }
 
+; TODO: Simplify this to "ret cond".
 define i1 @test01(i1 %cond) {
 ; CHECK-LABEL: @test01(
 ; CHECK-NEXT:  entry:
@@ -41,13 +42,15 @@ define i1 @test01(i1 %cond) {
 ; CHECK:       if.false.1:
 ; CHECK-NEXT:    br label [[MERGE_1]]
 ; CHECK:       merge.1:
-; CHECK-NEXT:    br i1 [[COND]], label [[IF_TRUE_2:%.*]], label [[IF_FALSE_2:%.*]]
+; CHECK-NEXT:    [[MERGE_COND_1:%.*]] = phi i1 [ true, [[IF_TRUE_1]] ], [ false, [[IF_FALSE_1]] ]
+; CHECK-NEXT:    br i1 [[MERGE_COND_1]], label [[IF_TRUE_2:%.*]], label [[IF_FALSE_2:%.*]]
 ; CHECK:       if.true.2:
 ; CHECK-NEXT:    br label [[MERGE_2:%.*]]
 ; CHECK:       if.false.2:
 ; CHECK-NEXT:    br label [[MERGE_2]]
 ; CHECK:       merge.2:
-; CHECK-NEXT:    ret i1 [[COND]]
+; CHECK-NEXT:    [[MERGE_COND_2:%.*]] = phi i1 [ true, [[IF_TRUE_2]] ], [ false, [[IF_FALSE_2]] ]
+; CHECK-NEXT:    ret i1 [[MERGE_COND_2]]
 ;
 entry:
   br i1 %cond, label %if.true.1, label %if.false.1
@@ -73,6 +76,7 @@ merge.2:
   ret i1 %merge.cond.2
 }
 
+; TODO: Simplify this to "ret %cond".
 define i1 @test02(i1 %cond) {
 ; CHECK-LABEL: @test02(
 ; CHECK-NEXT:  entry:
@@ -82,13 +86,15 @@ define i1 @test02(i1 %cond) {
 ; CHECK:       if.false.1:
 ; CHECK-NEXT:    br label [[MERGE_1]]
 ; CHECK:       merge.1:
-; CHECK-NEXT:    br i1 [[COND]], label [[IF_FALSE_2:%.*]], label [[IF_TRUE_2:%.*]]
+; CHECK-NEXT:    [[MERGE_COND_1:%.*]] = phi i1 [ false, [[IF_TRUE_1]] ], [ true, [[IF_FALSE_1]] ]
+; CHECK-NEXT:    br i1 [[MERGE_COND_1]], label [[IF_TRUE_2:%.*]], label [[IF_FALSE_2:%.*]]
 ; CHECK:       if.true.2:
 ; CHECK-NEXT:    br label [[MERGE_2:%.*]]
 ; CHECK:       if.false.2:
 ; CHECK-NEXT:    br label [[MERGE_2]]
 ; CHECK:       merge.2:
-; CHECK-NEXT:    ret i1 [[COND]]
+; CHECK-NEXT:    [[MERGE_COND_2:%.*]] = phi i1 [ false, [[IF_TRUE_2]] ], [ true, [[IF_FALSE_2]] ]
+; CHECK-NEXT:    ret i1 [[MERGE_COND_2]]
 ;
 entry:
   br i1 %cond, label %if.true.1, label %if.false.1
diff --git a/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll b/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll
index d87ed5e6192a2..7d4b9294143fa 100644
--- a/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll
@@ -11,10 +11,10 @@ define i1 @test_eq(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
+; CHECK-NEXT:    [[COMPARE:%.*]] = phi i1 [ true, [[IF_FALSE]] ], [ false, [[IF_TRUE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT:    ret i1 [[TMP0]]
+; CHECK-NEXT:    ret i1 [[COMPARE]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -43,9 +43,10 @@ define i1 @test_slt(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
+; CHECK-NEXT:    [[COMPARE:%.*]] = phi i1 [ false, [[IF_FALSE]] ], [ true, [[IF_TRUE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    ret i1 [[COND]]
+; CHECK-NEXT:    ret i1 [[COMPARE]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -105,9 +106,10 @@ define i1 @test_ne(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
+; CHECK-NEXT:    [[COMPARE:%.*]] = phi i1 [ false, [[IF_FALSE]] ], [ true, [[IF_TRUE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    ret i1 [[COND]]
+; CHECK-NEXT:    ret i1 [[COMPARE]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index 7c3f3102e0bcc..d2e028534d87f 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -416,11 +416,10 @@ bb1:        ; preds = %entry
 
 bb2:        ; preds = %bb1, %entry
     %cond = phi i1 [ true, %bb1 ], [ false, %entry ]        ; <i1> [#uses=1]
-; CHECK-NOT: phi i1
-; CHECK:     %res = phi i32 [ %0, %bb1 ], [ 0, %entry ]
-; CHECK:     ret i32 %res
+; CHECK-NOT: %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]
     %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]        ; <i32> [#uses=1]
     %res = select i1 %cond, i32 %val, i32 0        ; <i32> [#uses=1]
+; CHECK: ret i32 %cond
     ret i32 %res
 }
 
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 1f16f92d83a64..8cd0e35139a8e 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -448,8 +448,8 @@ define i32 @test25(i1 %c)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[B:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[B]]
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[A]]
 ;
 entry:
   br i1 %c, label %jump, label %ret
@@ -468,8 +468,8 @@ define i32 @test26(i1 %cond)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[B:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[B]]
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 20, [[ENTRY:%.*]] ], [ 10, [[JUMP]] ]
+; CHECK-NEXT:    ret i32 [[A]]
 ;
 entry:
   br i1 %cond, label %jump, label %ret
@@ -489,8 +489,8 @@ define i32 @test27(i1 %c, i32 %A, i32 %B)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[S:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[S]]
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[P]]
 ;
 entry:
   br i1 %c, label %jump, label %ret
@@ -509,8 +509,8 @@ define i32 @test28(i1 %cond, i32 %A, i32 %B)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[S:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[S]]
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[P]]
 ;
 entry:
   br i1 %cond, label %jump, label %ret
@@ -530,10 +530,10 @@ define i32 @test29(i1 %cond, i32 %A, i32 %B)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[S:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[NEXT:%.*]]
 ; CHECK:       next:
-; CHECK-NEXT:    ret i32 [[S]]
+; CHECK-NEXT:    ret i32 [[P]]
 ;
 entry:
   br i1 %cond, label %jump, label %ret
diff --git a/llvm/test/Transforms/InstCombine/simple_phi_condition.ll b/llvm/test/Transforms/InstCombine/simple_phi_condition.ll
index 37f7fe77b7e3e..cded618fba161 100644
--- a/llvm/test/Transforms/InstCombine/simple_phi_condition.ll
+++ b/llvm/test/Transforms/InstCombine/simple_phi_condition.ll
@@ -2,6 +2,7 @@
 ; RUN: opt -S < %s -instcombine | FileCheck %s
 ; RUN: opt -S < %s -passes=instcombine | FileCheck %s
 
+; TODO: Simplify to "ret cond".
 define i1 @test_direct_implication(i1 %cond) {
 ; CHECK-LABEL: @test_direct_implication(
 ; CHECK-NEXT:  entry:
@@ -11,7 +12,8 @@ define i1 @test_direct_implication(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    ret i1 [[COND]]
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ true, [[IF_TRUE]] ], [ false, [[IF_FALSE]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -27,6 +29,7 @@ merge:
   ret i1 %ret
 }
 
+; TODO: Simplify to "ret !cond".
 define i1 @test_inverted_implication(i1 %cond) {
 ; CHECK-LABEL: @test_inverted_implication(
 ; CHECK-NEXT:  entry:
@@ -36,8 +39,8 @@ define i1 @test_inverted_implication(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT:    ret i1 [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ false, [[IF_TRUE]] ], [ true, [[IF_FALSE]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -53,6 +56,7 @@ merge:
   ret i1 %ret
 }
 
+; TODO: Simplify to "ret cond".
 define i1 @test_direct_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK-LABEL: @test_direct_implication_complex_cfg(
 ; CHECK-NEXT:  entry:
@@ -69,7 +73,8 @@ define i1 @test_direct_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    ret i1 [[COND]]
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ true, [[IF_TRUE_END]] ], [ false, [[IF_FALSE]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -94,6 +99,7 @@ merge:
   ret i1 %ret
 }
 
+; TODO: Simplify to "ret !cond".
 define i1 @test_inverted_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK-LABEL: @test_inverted_implication_complex_cfg(
 ; CHECK-NEXT:  entry:
@@ -110,8 +116,8 @@ define i1 @test_inverted_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
-; CHECK-NEXT:    ret i1 [[TMP0]]
+; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ false, [[IF_TRUE_END]] ], [ true, [[IF_FALSE]] ]
+; CHECK-NEXT:    ret i1 [[RET]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
diff --git a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
index 0115c68342773..6b3ba66c951eb 100644
--- a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
+++ b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
@@ -18,16 +18,16 @@ define i1 @PR33605(i32 %a, i32 %b, i32* %c) {
 ; ALL-NEXT:    call void @foo()
 ; ALL-NEXT:    br label [[IF_END]]
 ; ALL:       if.end:
-; ALL-NEXT:    [[TMP1:%.*]] = xor i1 [[CMP]], true
-; ALL-NEXT:    [[TMP2:%.*]] = load i32, i32* [[C]], align 4
-; ALL-NEXT:    [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP2]]
+; ALL-NEXT:    [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[ENTRY:%.*]] ]
+; ALL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[C]], align 4
+; ALL-NEXT:    [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP1]]
 ; ALL-NEXT:    br i1 [[CMP_1]], label [[IF_END_1:%.*]], label [[IF_THEN_1:%.*]]
 ; ALL:       if.then.1:
 ; ALL-NEXT:    store i32 [[OR]], i32* [[C]], align 4
 ; ALL-NEXT:    call void @foo()
 ; ALL-NEXT:    br label [[IF_END_1]]
 ; ALL:       if.end.1:
-; ALL-NEXT:    [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[TMP1]], [[IF_END]] ]
+; ALL-NEXT:    [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[CHANGED_1_OFF0]], [[IF_END]] ]
 ; ALL-NEXT:    ret i1 [[CHANGED_1_OFF0_1]]
 ;
 entry:

From 71b49aa438b22b02230fff30e8874ff756336e6d Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Wed, 15 Jul 2020 23:50:29 -0700
Subject: [PATCH 450/771] [X86] Allow lsl/lar to be parsed with a GR16, GR32,
 or GR64 as source register.

This matches GNU assembler behavior. Operand size is determined
only from the destination register.
---
 llvm/lib/Target/X86/AsmParser/X86Operand.h   | 18 +++++++++++++++++-
 llvm/lib/Target/X86/X86InstrInfo.td          |  9 ++++++++-
 llvm/lib/Target/X86/X86InstrSystem.td        | 16 ++++++----------
 llvm/test/MC/X86/I286-32.s                   |  4 ++--
 llvm/test/MC/X86/I286-64.s                   | 16 ++++++++++++----
 llvm/utils/TableGen/X86RecognizableInstr.cpp |  3 +++
 6 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/llvm/lib/Target/X86/AsmParser/X86Operand.h b/llvm/lib/Target/X86/AsmParser/X86Operand.h
index 5cf4516ede974..e32335331879e 100644
--- a/llvm/lib/Target/X86/AsmParser/X86Operand.h
+++ b/llvm/lib/Target/X86/AsmParser/X86Operand.h
@@ -463,7 +463,14 @@ struct X86Operand final : public MCParsedAsmOperand {
   bool isGR32orGR64() const {
     return Kind == Register &&
       (X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) ||
-      X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
+       X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
+  }
+
+  bool isGR16orGR32orGR64() const {
+    return Kind == Register &&
+      (X86MCRegisterClasses[X86::GR16RegClassID].contains(getReg()) ||
+       X86MCRegisterClasses[X86::GR32RegClassID].contains(getReg()) ||
+       X86MCRegisterClasses[X86::GR64RegClassID].contains(getReg()));
   }
 
   bool isVectorReg() const {
@@ -520,6 +527,15 @@ struct X86Operand final : public MCParsedAsmOperand {
     Inst.addOperand(MCOperand::createReg(RegNo));
   }
 
+  void addGR16orGR32orGR64Operands(MCInst &Inst, unsigned N) const {
+    assert(N == 1 && "Invalid number of operands!");
+    MCRegister RegNo = getReg();
+    if (X86MCRegisterClasses[X86::GR32RegClassID].contains(RegNo) ||
+        X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo))
+      RegNo = getX86SubSuperRegister(RegNo, 16);
+    Inst.addOperand(MCOperand::createReg(RegNo));
+  }
+
   void addAVX512RCOperands(MCInst &Inst, unsigned N) const {
     assert(N == 1 && "Invalid number of operands!");
     addExpr(Inst, getImm());
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 23841c3d7e506..3ea0ae8a88407 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -640,10 +640,17 @@ class ImmSExtAsmOperandClass : AsmOperandClass {
 def X86GR32orGR64AsmOperand : AsmOperandClass {
   let Name = "GR32orGR64";
 }
-
 def GR32orGR64 : RegisterOperand<GR32> {
   let ParserMatchClass = X86GR32orGR64AsmOperand;
 }
+
+def X86GR16orGR32orGR64AsmOperand : AsmOperandClass {
+  let Name = "GR16orGR32orGR64";
+}
+def GR16orGR32orGR64 : RegisterOperand<GR16> {
+  let ParserMatchClass = X86GR16orGR32orGR64AsmOperand;
+}
+
 def AVX512RCOperand : AsmOperandClass {
   let Name = "AVX512RC";
 }
diff --git a/llvm/lib/Target/X86/X86InstrSystem.td b/llvm/lib/Target/X86/X86InstrSystem.td
index d5f10646d80a4..13659b5c456e3 100644
--- a/llvm/lib/Target/X86/X86InstrSystem.td
+++ b/llvm/lib/Target/X86/X86InstrSystem.td
@@ -207,45 +207,41 @@ let mayLoad = 1 in
 def LAR16rm : I<0x02, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                 "lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize16, NotMemoryFoldable;
-def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+def LAR16rr : I<0x02, MRMSrcReg, (outs GR16:$dst), (ins GR16orGR32orGR64:$src),
                 "lar{w}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize16, NotMemoryFoldable;
 
-// i16mem operand in LAR32rm and GR32 operand in LAR32rr is not a typo.
 let mayLoad = 1 in
 def LAR32rm : I<0x02, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                 "lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize32, NotMemoryFoldable;
-def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+def LAR32rr : I<0x02, MRMSrcReg, (outs GR32:$dst), (ins GR16orGR32orGR64:$src),
                 "lar{l}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize32, NotMemoryFoldable;
-// i16mem operand in LAR64rm and GR32 operand in LAR64rr is not a typo.
 let mayLoad = 1 in
 def LAR64rm : RI<0x02, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                  "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
-def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR32orGR64:$src),
+def LAR64rr : RI<0x02, MRMSrcReg, (outs GR64:$dst), (ins GR16orGR32orGR64:$src),
                  "lar{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
 
-// i16mem operand in LSL32rm and GR32 operand in LSL32rr is not a typo.
 let mayLoad = 1 in
 def LSL16rm : I<0x03, MRMSrcMem, (outs GR16:$dst), (ins i16mem:$src),
                 "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize16, NotMemoryFoldable;
-def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
+def LSL16rr : I<0x03, MRMSrcReg, (outs GR16:$dst), (ins GR16orGR32orGR64:$src),
                 "lsl{w}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize16, NotMemoryFoldable;
-// i16mem operand in LSL64rm and GR32 operand in LSL64rr is not a typo.
 let mayLoad = 1 in
 def LSL32rm : I<0x03, MRMSrcMem, (outs GR32:$dst), (ins i16mem:$src),
                 "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize32, NotMemoryFoldable;
-def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
+def LSL32rr : I<0x03, MRMSrcReg, (outs GR32:$dst), (ins GR16orGR32orGR64:$src),
                 "lsl{l}\t{$src, $dst|$dst, $src}", []>, TB,
                 OpSize32, NotMemoryFoldable;
 let mayLoad = 1 in
 def LSL64rm : RI<0x03, MRMSrcMem, (outs GR64:$dst), (ins i16mem:$src),
                  "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
-def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR32orGR64:$src),
+def LSL64rr : RI<0x03, MRMSrcReg, (outs GR64:$dst), (ins GR16orGR32orGR64:$src),
                  "lsl{q}\t{$src, $dst|$dst, $src}", []>, TB, NotMemoryFoldable;
 
 def INVLPG : I<0x01, MRM7m, (outs), (ins i8mem:$addr), "invlpg\t$addr", []>, TB;
diff --git a/llvm/test/MC/X86/I286-32.s b/llvm/test/MC/X86/I286-32.s
index 0d463669f34ad..648de019127f9 100644
--- a/llvm/test/MC/X86/I286-32.s
+++ b/llvm/test/MC/X86/I286-32.s
@@ -24,7 +24,7 @@ larl 485498096(%edx), %eax
 // CHECK: encoding: [0x0f,0x02,0x44,0x02,0x40]        
 larl 64(%edx,%eax), %eax 
 
-// CHECK: larl %eax, %eax 
+// CHECK: larl %ax, %eax 
 // CHECK: encoding: [0x0f,0x02,0xc0]        
 larl %eax, %eax 
 
@@ -100,7 +100,7 @@ lsll 485498096(%edx), %eax
 // CHECK: encoding: [0x0f,0x03,0x44,0x02,0x40]        
 lsll 64(%edx,%eax), %eax 
 
-// CHECK: lsll %eax, %eax 
+// CHECK: lsll %ax, %eax 
 // CHECK: encoding: [0x0f,0x03,0xc0]        
 lsll %eax, %eax 
 
diff --git a/llvm/test/MC/X86/I286-64.s b/llvm/test/MC/X86/I286-64.s
index 1bab0a64f3e2b..7707d7ba4d587 100644
--- a/llvm/test/MC/X86/I286-64.s
+++ b/llvm/test/MC/X86/I286-64.s
@@ -24,7 +24,7 @@ larl -64(%rdx,%rax,4), %r13d
 // CHECK: encoding: [0x44,0x0f,0x02,0x6c,0x02,0x40]        
 larl 64(%rdx,%rax), %r13d 
 
-// CHECK: larl %r13d, %r13d 
+// CHECK: larl %r13w, %r13d 
 // CHECK: encoding: [0x45,0x0f,0x02,0xed]        
 larl %r13d, %r13d 
 
@@ -32,7 +32,11 @@ larl %r13d, %r13d
 // CHECK: encoding: [0x44,0x0f,0x02,0x2a]        
 larl (%rdx), %r13d 
 
-// CHECK: larq %eax, %rax
+// CHECK: larq %ax, %rax
+// CHECK: encoding: [0x48,0x0f,0x02,0xc0]        
+lar %ax, %rax
+
+// CHECK: larq %ax, %rax
 // CHECK: encoding: [0x48,0x0f,0x02,0xc0]        
 lar %rax, %rax
 
@@ -160,7 +164,7 @@ lsll -64(%rdx,%rax,4), %r13d
 // CHECK: encoding: [0x44,0x0f,0x03,0x6c,0x02,0x40]        
 lsll 64(%rdx,%rax), %r13d 
 
-// CHECK: lsll %r13d, %r13d 
+// CHECK: lsll %r13w, %r13d 
 // CHECK: encoding: [0x45,0x0f,0x03,0xed]        
 lsll %r13d, %r13d 
 
@@ -168,7 +172,11 @@ lsll %r13d, %r13d
 // CHECK: encoding: [0x44,0x0f,0x03,0x2a]        
 lsll (%rdx), %r13d 
 
-// CHECK: lslq %eax, %rax
+// CHECK: lslq %ax, %rax
+// CHECK: encoding: [0x48,0x0f,0x03,0xc0]
+lsl %ax, %rax
+
+// CHECK: lslq %ax, %rax
 // CHECK: encoding: [0x48,0x0f,0x03,0xc0]
 lsl %rax, %rax
 
diff --git a/llvm/utils/TableGen/X86RecognizableInstr.cpp b/llvm/utils/TableGen/X86RecognizableInstr.cpp
index 84f6d5210d745..6a245b5eb4253 100644
--- a/llvm/utils/TableGen/X86RecognizableInstr.cpp
+++ b/llvm/utils/TableGen/X86RecognizableInstr.cpp
@@ -874,6 +874,7 @@ OperandType RecognizableInstr::typeFromString(const std::string &s,
   TYPE("i16imm",              TYPE_IMM)
   TYPE("i16i8imm",            TYPE_IMM)
   TYPE("GR16",                TYPE_R16)
+  TYPE("GR16orGR32orGR64",    TYPE_R16)
   TYPE("i32mem",              TYPE_M)
   TYPE("i32imm",              TYPE_IMM)
   TYPE("i32i8imm",            TYPE_IMM)
@@ -1035,6 +1036,7 @@ RecognizableInstr::rmRegisterEncodingFromString(const std::string &s,
   ENCODING("RST",             ENCODING_FP)
   ENCODING("RSTi",            ENCODING_FP)
   ENCODING("GR16",            ENCODING_RM)
+  ENCODING("GR16orGR32orGR64",ENCODING_RM)
   ENCODING("GR32",            ENCODING_RM)
   ENCODING("GR32orGR64",      ENCODING_RM)
   ENCODING("GR64",            ENCODING_RM)
@@ -1072,6 +1074,7 @@ OperandEncoding
 RecognizableInstr::roRegisterEncodingFromString(const std::string &s,
                                                 uint8_t OpSize) {
   ENCODING("GR16",            ENCODING_REG)
+  ENCODING("GR16orGR32orGR64",ENCODING_REG)
   ENCODING("GR32",            ENCODING_REG)
   ENCODING("GR32orGR64",      ENCODING_REG)
   ENCODING("GR64",            ENCODING_REG)

From 93ec6cd684265161623b4ea67836f022cd18c224 Mon Sep 17 00:00:00 2001
From: Jaroslav Sevcik <jarin@google.com>
Date: Wed, 15 Jul 2020 09:18:20 +0200
Subject: [PATCH 451/771] [lldb] Desugar template specializations

Template specializations are not handled in many of the
TypeSystemClang methods. For example, GetNumChildren does not handle
the TemplateSpecialization type class, so template specializations
always look like empty objects.

This patch just desugars template specializations in the existing
RemoveWrappingTypes desugaring helper.

Differential Revision: https://reviews.llvm.org/D83858
---
 .../TypeSystem/Clang/TypeSystemClang.cpp      |  1 +
 .../cpp/template-specialization-type/Makefile |  3 ++
 .../TestTemplateSpecializationType.py         | 30 +++++++++++++++++++
 .../cpp/template-specialization-type/main.cpp |  9 ++++++
 4 files changed, 43 insertions(+)
 create mode 100644 lldb/test/API/lang/cpp/template-specialization-type/Makefile
 create mode 100644 lldb/test/API/lang/cpp/template-specialization-type/TestTemplateSpecializationType.py
 create mode 100644 lldb/test/API/lang/cpp/template-specialization-type/main.cpp

diff --git a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
index bc06ea8164d43..8825b473cd33d 100644
--- a/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
+++ b/lldb/source/Plugins/TypeSystem/Clang/TypeSystemClang.cpp
@@ -2499,6 +2499,7 @@ RemoveWrappingTypes(QualType type, ArrayRef<clang::Type::TypeClass> mask = {}) {
     case clang::Type::Decltype:
     case clang::Type::Elaborated:
     case clang::Type::Paren:
+    case clang::Type::TemplateSpecialization:
     case clang::Type::Typedef:
     case clang::Type::TypeOf:
     case clang::Type::TypeOfExpr:
diff --git a/lldb/test/API/lang/cpp/template-specialization-type/Makefile b/lldb/test/API/lang/cpp/template-specialization-type/Makefile
new file mode 100644
index 0000000000000..99998b20bcb05
--- /dev/null
+++ b/lldb/test/API/lang/cpp/template-specialization-type/Makefile
@@ -0,0 +1,3 @@
+CXX_SOURCES := main.cpp
+
+include Makefile.rules
diff --git a/lldb/test/API/lang/cpp/template-specialization-type/TestTemplateSpecializationType.py b/lldb/test/API/lang/cpp/template-specialization-type/TestTemplateSpecializationType.py
new file mode 100644
index 0000000000000..31f0081dc6977
--- /dev/null
+++ b/lldb/test/API/lang/cpp/template-specialization-type/TestTemplateSpecializationType.py
@@ -0,0 +1,30 @@
+"""
+Test value with a template specialization type.
+"""
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+
+class TemplateSpecializationTypeTestCase(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    def test_template_specialization_cast_children(self):
+        self.build()
+        lldbutil.run_to_source_breakpoint(self, '// break here',
+                lldb.SBFileSpec("main.cpp", False))
+
+        v = self.frame().EvaluateExpression("t")
+        self.assertEquals(2, v.GetNumChildren())
+        self.assertEquals("42", v.GetChildAtIndex(0).GetValue())
+        self.assertEquals("21", v.GetChildAtIndex(1).GetValue())
+
+        # Test a value of the TemplateSpecialization type. We turn
+        # RecordType into TemplateSpecializationType by casting and
+        # dereferencing a pointer to a record.
+        v = self.frame().EvaluateExpression("*((TestObj<int>*)&t)")
+        self.assertEquals(2, v.GetNumChildren())
+        self.assertEquals("42", v.GetChildAtIndex(0).GetValue())
+        self.assertEquals("21", v.GetChildAtIndex(1).GetValue())
diff --git a/lldb/test/API/lang/cpp/template-specialization-type/main.cpp b/lldb/test/API/lang/cpp/template-specialization-type/main.cpp
new file mode 100644
index 0000000000000..5ef9c4962c853
--- /dev/null
+++ b/lldb/test/API/lang/cpp/template-specialization-type/main.cpp
@@ -0,0 +1,9 @@
+template <typename T> struct TestObj {
+  int f;
+  T g;
+};
+
+int main() {
+  TestObj<int> t{42, 21};
+  return t.f + t.g; // break here
+}

From ae74387fc0fd0866766dad877c844e35a3932a51 Mon Sep 17 00:00:00 2001
From: Mikael Holmen <mikael.holmen@ericsson.com>
Date: Thu, 16 Jul 2020 09:27:26 +0200
Subject: [PATCH 452/771] [MasmParser] Remove unused method emitStructValue to
 silence warning

The method was added in bc8e262afe83 and has been unused ever since so
remove it to silence a gcc warning.
---
 llvm/lib/MC/MCParser/MasmParser.cpp | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 58c22b2ccef26..3ed9623e4d5a0 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -794,8 +794,6 @@ class MasmParser : public MCAsmParser {
   bool emitFieldValue(const FieldInfo &Field, const RealFieldInfo &Contents);
   bool emitFieldValue(const FieldInfo &Field, const StructFieldInfo &Contents);
 
-  bool emitStructValue(const StructInfo &Structure);
-
   bool emitFieldInitializer(const FieldInfo &Field,
                             const FieldInitializer &Initializer);
   bool emitFieldInitializer(const FieldInfo &Field,
@@ -3833,20 +3831,6 @@ bool MasmParser::emitFieldValue(const FieldInfo &Field) {
   llvm_unreachable("Unhandled FieldType enum");
 }
 
-bool MasmParser::emitStructValue(const StructInfo &Structure) {
-  size_t Offset = 0;
-  for (const auto &Field : Structure.Fields) {
-    getStreamer().emitZeros(Field.Offset - Offset);
-    if (emitFieldValue(Field))
-      return true;
-    Offset = Field.Offset + Field.SizeOf;
-  }
-  // Add final padding.
-  if (Offset != Structure.Size)
-    getStreamer().emitZeros(Structure.Size - Offset);
-  return false;
-}
-
 bool MasmParser::emitFieldInitializer(const FieldInfo &Field,
                                       const IntFieldInfo &Contents,
                                       const IntFieldInfo &Initializer) {

From 274332282cb4ce167de8e73fb9c59d2eecd67c25 Mon Sep 17 00:00:00 2001
From: Mikael Holmen <mikael.holmen@ericsson.com>
Date: Thu, 16 Jul 2020 09:28:34 +0200
Subject: [PATCH 453/771] [clangd] Fix a few gcc warnings [NFC]

---
 clang-tools-extra/clangd/CompileCommands.cpp | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp
index 473122157cac6..f6210a43b34eb 100644
--- a/clang-tools-extra/clangd/CompileCommands.cpp
+++ b/clang-tools-extra/clangd/CompileCommands.cpp
@@ -271,6 +271,7 @@ std::pair<unsigned, unsigned> getArgCount(const llvm::opt::Option &Opt) {
   case Option::RemainingArgsJoinedClass:
     return {Rest, Rest};
   }
+  llvm_unreachable("Unhandled option kind");
 }
 
 // Flag-parsing mode, which affects which flags are available.
@@ -321,7 +322,7 @@ unsigned char getModes(const llvm::opt::Option &Opt) {
     }
   }
   return Result;
-};
+}
 
 } // namespace
 
@@ -475,7 +476,7 @@ void ArgStripper::process(std::vector<std::string> &Args) const {
   bool WasXclang = false;
   while (Read < Args.size()) {
     unsigned ArgCount = 0;
-    if (const Rule *R = matchingRule(Args[Read], CurrentMode, ArgCount)) {
+    if (matchingRule(Args[Read], CurrentMode, ArgCount)) {
       // Delete it and its args.
       if (WasXclang) {
         assert(Write > 0);

From 5831e86190966d58385678eb74b26aefacbfd101 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 16 Jul 2020 10:32:50 +0200
Subject: [PATCH 454/771] Revert "[NFC] SimplifyCFGOptions: drop
 multi-parameter ctor, use default member-init"

This reverts commit 90c1b0442a031d6cad686fdc4e5d3db03c3603a6.
This is based on another commit which also needs to be reverted.
The other commit introduced a Dependency Cycle between Transforms/Scalar
and TransformUtils. Scalar already depends (in many ways) on
TransformUtils, so making TransformUtils depend on Scalar should be
avoided.
---
 .../Transforms/Scalar/SimplifyCFGOptions.h    | 30 ++++++++++++++-----
 .../AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp  |  2 +-
 2 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h
index 9855400a2bae0..42df3af5d7477 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h
@@ -21,15 +21,29 @@ namespace llvm {
 class AssumptionCache;
 
 struct SimplifyCFGOptions {
-  int BonusInstThreshold = 1;
-  bool ForwardSwitchCondToPhi = false;
-  bool ConvertSwitchToLookupTable = false;
-  bool NeedCanonicalLoop = true;
-  bool SinkCommonInsts = false;
-  bool SimplifyCondBranch = true;
-  bool FoldTwoEntryPHINode = true;
+  int BonusInstThreshold;
+  bool ForwardSwitchCondToPhi;
+  bool ConvertSwitchToLookupTable;
+  bool NeedCanonicalLoop;
+  bool SinkCommonInsts;
+  bool SimplifyCondBranch;
+  bool FoldTwoEntryPHINode;
 
-  AssumptionCache *AC = nullptr;
+  AssumptionCache *AC;
+
+  SimplifyCFGOptions(unsigned BonusThreshold = 1,
+                     bool ForwardSwitchCond = false,
+                     bool SwitchToLookup = false, bool CanonicalLoops = true,
+                     bool SinkCommon = false,
+                     AssumptionCache *AssumpCache = nullptr,
+                     bool SimplifyCondBranch = true,
+                     bool FoldTwoEntryPHINode = true)
+      : BonusInstThreshold(BonusThreshold),
+        ForwardSwitchCondToPhi(ForwardSwitchCond),
+        ConvertSwitchToLookupTable(SwitchToLookup),
+        NeedCanonicalLoop(CanonicalLoops), SinkCommonInsts(SinkCommon),
+        SimplifyCondBranch(SimplifyCondBranch),
+        FoldTwoEntryPHINode(FoldTwoEntryPHINode), AC(AssumpCache) {}
 
   // Support 'builder' pattern to set members by name at construction time.
   SimplifyCFGOptions &bonusInstThreshold(int I) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 3c375e0575255..418296684d765 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -187,7 +187,7 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
 
   for (BasicBlock *BB : ReturningBlocks) {
     // Cleanup possible branch to unconditional branch to the return.
-    simplifyCFG(BB, TTI, SimplifyCFGOptions().bonusInstThreshold(2));
+    simplifyCFG(BB, TTI, {2});
   }
 
   return NewRetBlock;

From 1067d3e176ea7b0b1942c163bf8c6c90107768c1 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Thu, 16 Jul 2020 10:54:10 +0200
Subject: [PATCH 455/771] Revert "[NFCI] createCFGSimplificationPass(): migrate
 to also take SimplifyCFGOptions"

This reverts commit b2018198c32a0535bb1f5bb5b40fbcf50d8d47b7.
This commit introduced a Dependency Cycle between Transforms/Scalar and
Transforms/Utils. Transforms/Scalar already depends on Transforms/Utils,
so if SimplifyCFGOptions.h is moved to Scalar, and Utils/Local.h still
depends on it, we have a cycle.
---
 llvm/include/llvm/Transforms/Scalar.h         |  4 +-
 .../llvm/Transforms/Scalar/SimplifyCFG.h      |  2 +-
 .../Transforms/Scalar/SimplifyCFGOptions.h    | 86 -------------------
 llvm/include/llvm/Transforms/Utils/Local.h    | 68 ++++++++++++++-
 .../Target/AArch64/AArch64TargetMachine.cpp   |  6 +-
 llvm/lib/Target/ARM/ARMTargetMachine.cpp      |  2 +-
 .../Target/Hexagon/HexagonTargetMachine.cpp   |  6 +-
 .../lib/Transforms/IPO/PassManagerBuilder.cpp |  6 +-
 llvm/lib/Transforms/Scalar/Scalar.cpp         |  2 +-
 .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 13 ++-
 10 files changed, 85 insertions(+), 110 deletions(-)
 delete mode 100644 llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h

diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index 19d158a2a1b5c..a1aacec769794 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -14,7 +14,6 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_H
 #define LLVM_TRANSFORMS_SCALAR_H
 
-#include "llvm/Transforms/Scalar/SimplifyCFGOptions.h"
 #include <functional>
 
 namespace llvm {
@@ -257,7 +256,8 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1);
 // simplify terminator instructions, convert switches to lookup tables, etc.
 //
 FunctionPass *createCFGSimplificationPass(
-    SimplifyCFGOptions Options = SimplifyCFGOptions(),
+    unsigned Threshold = 1, bool ForwardSwitchCond = false,
+    bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false,
     std::function<bool(const Function &)> Ftor = nullptr);
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
index 026c183ec891c..f9792d38bbe6b 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
 #define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
 
+#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
-#include "llvm/Transforms/Scalar/SimplifyCFGOptions.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h
deleted file mode 100644
index 42df3af5d7477..0000000000000
--- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFGOptions.h
+++ /dev/null
@@ -1,86 +0,0 @@
-//===- SimplifyCFGOptions.h - Control structure for SimplifyCFG -*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// A set of parameters used to control the transforms in the SimplifyCFG pass.
-// Options may change depending on the position in the optimization pipeline.
-// For example, canonical form that includes switches and branches may later be
-// replaced by lookup tables and selects.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFGOPTIONS_H
-#define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFGOPTIONS_H
-
-namespace llvm {
-
-class AssumptionCache;
-
-struct SimplifyCFGOptions {
-  int BonusInstThreshold;
-  bool ForwardSwitchCondToPhi;
-  bool ConvertSwitchToLookupTable;
-  bool NeedCanonicalLoop;
-  bool SinkCommonInsts;
-  bool SimplifyCondBranch;
-  bool FoldTwoEntryPHINode;
-
-  AssumptionCache *AC;
-
-  SimplifyCFGOptions(unsigned BonusThreshold = 1,
-                     bool ForwardSwitchCond = false,
-                     bool SwitchToLookup = false, bool CanonicalLoops = true,
-                     bool SinkCommon = false,
-                     AssumptionCache *AssumpCache = nullptr,
-                     bool SimplifyCondBranch = true,
-                     bool FoldTwoEntryPHINode = true)
-      : BonusInstThreshold(BonusThreshold),
-        ForwardSwitchCondToPhi(ForwardSwitchCond),
-        ConvertSwitchToLookupTable(SwitchToLookup),
-        NeedCanonicalLoop(CanonicalLoops), SinkCommonInsts(SinkCommon),
-        SimplifyCondBranch(SimplifyCondBranch),
-        FoldTwoEntryPHINode(FoldTwoEntryPHINode), AC(AssumpCache) {}
-
-  // Support 'builder' pattern to set members by name at construction time.
-  SimplifyCFGOptions &bonusInstThreshold(int I) {
-    BonusInstThreshold = I;
-    return *this;
-  }
-  SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) {
-    ForwardSwitchCondToPhi = B;
-    return *this;
-  }
-  SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
-    ConvertSwitchToLookupTable = B;
-    return *this;
-  }
-  SimplifyCFGOptions &needCanonicalLoops(bool B) {
-    NeedCanonicalLoop = B;
-    return *this;
-  }
-  SimplifyCFGOptions &sinkCommonInsts(bool B) {
-    SinkCommonInsts = B;
-    return *this;
-  }
-  SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
-    AC = Cache;
-    return *this;
-  }
-  SimplifyCFGOptions &setSimplifyCondBranch(bool B) {
-    SimplifyCondBranch = B;
-    return *this;
-  }
-
-  SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) {
-    FoldTwoEntryPHINode = B;
-    return *this;
-  }
-};
-
-} // namespace llvm
-
-#endif // LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFGOPTIONS_H
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 3595dd627d5bb..3fab3bc21a078 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -30,7 +30,6 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
-#include "llvm/Transforms/Scalar/SimplifyCFGOptions.h"
 #include <cstdint>
 #include <limits>
 
@@ -59,6 +58,73 @@ class StoreInst;
 class TargetLibraryInfo;
 class TargetTransformInfo;
 
+/// A set of parameters used to control the transforms in the SimplifyCFG pass.
+/// Options may change depending on the position in the optimization pipeline.
+/// For example, canonical form that includes switches and branches may later be
+/// replaced by lookup tables and selects.
+struct SimplifyCFGOptions {
+  int BonusInstThreshold;
+  bool ForwardSwitchCondToPhi;
+  bool ConvertSwitchToLookupTable;
+  bool NeedCanonicalLoop;
+  bool SinkCommonInsts;
+  bool SimplifyCondBranch;
+  bool FoldTwoEntryPHINode;
+
+  AssumptionCache *AC;
+
+  SimplifyCFGOptions(unsigned BonusThreshold = 1,
+                     bool ForwardSwitchCond = false,
+                     bool SwitchToLookup = false, bool CanonicalLoops = true,
+                     bool SinkCommon = false,
+                     AssumptionCache *AssumpCache = nullptr,
+                     bool SimplifyCondBranch = true,
+                     bool FoldTwoEntryPHINode = true)
+      : BonusInstThreshold(BonusThreshold),
+        ForwardSwitchCondToPhi(ForwardSwitchCond),
+        ConvertSwitchToLookupTable(SwitchToLookup),
+        NeedCanonicalLoop(CanonicalLoops),
+        SinkCommonInsts(SinkCommon),
+        SimplifyCondBranch(SimplifyCondBranch),
+        FoldTwoEntryPHINode(FoldTwoEntryPHINode),
+        AC(AssumpCache) {}
+
+  // Support 'builder' pattern to set members by name at construction time.
+  SimplifyCFGOptions &bonusInstThreshold(int I) {
+    BonusInstThreshold = I;
+    return *this;
+  }
+  SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) {
+    ForwardSwitchCondToPhi = B;
+    return *this;
+  }
+  SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
+    ConvertSwitchToLookupTable = B;
+    return *this;
+  }
+  SimplifyCFGOptions &needCanonicalLoops(bool B) {
+    NeedCanonicalLoop = B;
+    return *this;
+  }
+  SimplifyCFGOptions &sinkCommonInsts(bool B) {
+    SinkCommonInsts = B;
+    return *this;
+  }
+  SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
+    AC = Cache;
+    return *this;
+  }
+  SimplifyCFGOptions &setSimplifyCondBranch(bool B) {
+    SimplifyCondBranch = B;
+    return *this;
+  }
+
+  SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) {
+    FoldTwoEntryPHINode = B;
+    return *this;
+  }
+};
+
 //===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index b0cef9b66e017..a63b9a97ada55 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -453,11 +453,7 @@ void AArch64PassConfig::addIRPasses() {
   // determine whether it succeeded. We can exploit existing control-flow in
   // ldrex/strex loops to simplify this, but it needs tidying up.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
-    addPass(createCFGSimplificationPass(SimplifyCFGOptions()
-                                            .forwardSwitchCondToPhi(true)
-                                            .convertSwitchToLookupTable(true)
-                                            .needCanonicalLoops(false)
-                                            .sinkCommonInsts(true)));
+    addPass(createCFGSimplificationPass(1, true, true, false, true));
 
   // Run LoopDataPrefetch
   //
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index b316b1041f2c5..9ead5fa4308c3 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -409,7 +409,7 @@ void ARMPassConfig::addIRPasses() {
   // ldrex/strex loops to simplify this, but it needs tidying up.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
     addPass(createCFGSimplificationPass(
-        SimplifyCFGOptions().sinkCommonInsts(true), [this](const Function &F) {
+        1, false, false, true, true, [this](const Function &F) {
           const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
           return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
         }));
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 49d98622d946c..3fe42ea13f51b 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -320,11 +320,7 @@ void HexagonPassConfig::addIRPasses() {
 
   if (!NoOpt) {
     if (EnableInitialCFGCleanup)
-      addPass(createCFGSimplificationPass(SimplifyCFGOptions()
-                                              .forwardSwitchCondToPhi(true)
-                                              .convertSwitchToLookupTable(true)
-                                              .needCanonicalLoops(false)
-                                              .sinkCommonInsts(true)));
+      addPass(createCFGSimplificationPass(1, true, true, false, true));
     if (EnableLoopPrefetch)
       addPass(createLoopDataPrefetchPass());
     if (EnableCommGEP)
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 460297a26020e..d73d42c52074b 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -777,11 +777,7 @@ void PassManagerBuilder::populateModulePassManager(
   // convert to more optimized IR using more aggressive simplify CFG options.
   // The extra sinking transform can create larger basic blocks, so do this
   // before SLP vectorization.
-  MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
-                                          .forwardSwitchCondToPhi(true)
-                                          .convertSwitchToLookupTable(true)
-                                          .needCanonicalLoops(false)
-                                          .sinkCommonInsts(true)));
+  MPM.add(createCFGSimplificationPass(1, true, true, false, true));
 
   if (SLPVectorize) {
     MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 42f79d89f0a28..9d088547b4369 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -139,7 +139,7 @@ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
 }
 
 void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createCFGSimplificationPass());
+  unwrap(PM)->add(createCFGSimplificationPass(1, false, false, true));
 }
 
 void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index f4ed24c92bf28..4187d5b55adf4 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -39,7 +39,6 @@
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
-#include "llvm/Transforms/Scalar/SimplifyCFGOptions.h"
 #include "llvm/Transforms/Utils/Local.h"
 #include <utility>
 using namespace llvm;
@@ -305,7 +304,15 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
 
 // Public interface to the CFGSimplification pass
 FunctionPass *
-llvm::createCFGSimplificationPass(SimplifyCFGOptions Options,
+llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
+                                  bool ConvertSwitch, bool KeepLoops,
+                                  bool SinkCommon,
                                   std::function<bool(const Function &)> Ftor) {
-  return new CFGSimplifyPass(Options, std::move(Ftor));
+  return new CFGSimplifyPass(SimplifyCFGOptions()
+                                 .bonusInstThreshold(Threshold)
+                                 .forwardSwitchCondToPhi(ForwardSwitchCond)
+                                 .convertSwitchToLookupTable(ConvertSwitch)
+                                 .needCanonicalLoops(KeepLoops)
+                                 .sinkCommonInsts(SinkCommon),
+                             std::move(Ftor));
 }

From 90798e09e29012fe316565ea04a1f848c5e40e6e Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Thu, 16 Jul 2020 13:29:16 +0700
Subject: [PATCH 456/771] Re-enable "[InstCombine] Simplify boolean Phis with
 const inputs using CFG"

This reverts commit b893822e32ffe3c1dcf4d5ac0571a282582d72b2.

+ Clang test fixes
+ Insertion point fix for landing pads
---
 clang/test/CodeGenObjC/exceptions.m           | 27 +++----
 clang/test/CodeGenObjCXX/exceptions-legacy.mm | 10 ++-
 clang/test/CodeGenOpenCL/convergent.cl        |  3 +-
 .../Transforms/InstCombine/InstCombinePHI.cpp | 76 +++++++++++++++++++
 .../CallSiteSplitting/callsite-split.ll       |  4 +-
 llvm/test/Transforms/InstCombine/branch.ll    | 14 +---
 .../InstCombine/icmp-constant-phi.ll          | 10 +--
 llvm/test/Transforms/InstCombine/phi.ll       |  5 +-
 llvm/test/Transforms/InstCombine/select.ll    | 20 ++---
 .../InstCombine/simple_phi_condition.ll       | 18 ++---
 .../PhaseOrdering/simplifycfg-options.ll      |  8 +-
 11 files changed, 131 insertions(+), 64 deletions(-)

diff --git a/clang/test/CodeGenObjC/exceptions.m b/clang/test/CodeGenObjC/exceptions.m
index 3bb4f86cf0256..55a117bcc3dd5 100644
--- a/clang/test/CodeGenObjC/exceptions.m
+++ b/clang/test/CodeGenObjC/exceptions.m
@@ -25,12 +25,12 @@ void f1() {
     // CHECK-NEXT: icmp
     // CHECK-NEXT: br i1
     @try {
+    // CHECK:      call void asm sideeffect "", "=*m"
     // CHECK:      call void asm sideeffect "", "*m"
     // CHECK-NEXT: call void @foo()
       foo();
     // CHECK:      call void @objc_exception_try_exit
 
-    // CHECK:      call void asm sideeffect "", "=*m"
     } @finally {
       break;
     }
@@ -53,14 +53,6 @@ int f2() {
   // CHECK-NEXT:   [[CAUGHT:%.*]] = icmp eq i32 [[SETJMP]], 0
   // CHECK-NEXT:   br i1 [[CAUGHT]]
   @try {
-    // CHECK: store i32 6, i32* [[X]]
-    x++;
-    // CHECK-NEXT: call void asm sideeffect "", "*m,*m"(i32* nonnull [[X]]
-    // CHECK-NEXT: call void @foo()
-    // CHECK-NEXT: call void @objc_exception_try_exit
-    // CHECK-NEXT: [[T:%.*]] = load i32, i32* [[X]]
-    foo();
-  } @catch (id) {
     // Landing pad.  Note that we elide the re-enter.
     // CHECK:      call void asm sideeffect "", "=*m,=*m"(i32* nonnull [[X]]
     // CHECK-NEXT: call i8* @objc_exception_extract
@@ -69,6 +61,15 @@ int f2() {
 
     // This store is dead.
     // CHECK-NEXT: store i32 [[T2]], i32* [[X]]
+
+    // CHECK: store i32 6, i32* [[X]]
+    x++;
+    // CHECK-NEXT: call void asm sideeffect "", "*m,*m"(i32* nonnull [[X]]
+    // CHECK-NEXT: call void @foo()
+    // CHECK-NEXT: call void @objc_exception_try_exit
+    // CHECK-NEXT: [[T:%.*]] = load i32, i32* [[X]]
+    foo();
+  } @catch (id) {
     x--;
   }
 
@@ -89,23 +90,23 @@ void f3() {
 
   // CHECK:      call void @objc_exception_try_enter(
   // CHECK:      call i32 @_setjmp
-  // CHECK-NEXT: icmp eq
-  // CHECK-NEXT: br i1
+  // CHECK-NEXT: [[DEST1:%.*]] = icmp eq
+  // CHECK-NEXT: br i1 [[DEST1]]
 
   @try {
     // CHECK:    call void @f3_helper(i32 0, i32* nonnull [[X]])
     // CHECK:    call void @objc_exception_try_exit(
     f3_helper(0, &x);
   } @finally {
-    // CHECK:    [[DEST1:%.*]] = phi i1 [ true, {{%.*}} ], [ false, {{%.*}} ]
     // CHECK:    call void @objc_exception_try_enter
     // CHECK:    call i32 @_setjmp
+    // CHECK-NEXT: [[DEST2:%.*]] = icmp eq
+    // CHECK-NEXT: br i1 [[DEST2]]
     @try {
       // CHECK:  call void @f3_helper(i32 1, i32* nonnull [[X]])
       // CHECK:  call void @objc_exception_try_exit(
       f3_helper(1, &x);
     } @finally {
-      // CHECK:  [[DEST2:%.*]] = phi i1 [ true, {{%.*}} ], [ false, {{%.*}} ]
       // CHECK:  call void @f3_helper(i32 2, i32* nonnull [[X]])
       f3_helper(2, &x);
 
diff --git a/clang/test/CodeGenObjCXX/exceptions-legacy.mm b/clang/test/CodeGenObjCXX/exceptions-legacy.mm
index bfc8d640b7104..563569478679b 100644
--- a/clang/test/CodeGenObjCXX/exceptions-legacy.mm
+++ b/clang/test/CodeGenObjCXX/exceptions-legacy.mm
@@ -63,18 +63,20 @@ void test1(id obj, bool *failed) {
 //   Body.
 // CHECK:      invoke void @_Z3foov()
 
+//   Catch handler.  Reload of 'failed' address is unnecessary.
+// CHECK:      [[T0:%.*]] = load i8*, i8**
+// CHECK-NEXT: store i8 1, i8* [[T0]],
+// CHECK-NEXT: br label
+
 //   Leave the @try.
 // CHECK:      call void @objc_exception_try_exit([[BUF_T]]* nonnull [[BUF]])
 // CHECK-NEXT: br label
 // CHECK:      ret void
 
+
 //   Real EH cleanup.
 // CHECK:      [[T0:%.*]] = landingpad
 // CHECK-NEXT:    cleanup
 // CHECK-NEXT: call void @objc_exception_try_exit([[BUF_T]]* nonnull [[BUF]])
 // CHECK-NEXT: resume
 
-//   Catch handler.  Reload of 'failed' address is unnecessary.
-// CHECK:      [[T0:%.*]] = load i8*, i8**
-// CHECK-NEXT: store i8 1, i8* [[T0]],
-// CHECK-NEXT: br label
diff --git a/clang/test/CodeGenOpenCL/convergent.cl b/clang/test/CodeGenOpenCL/convergent.cl
index 49d182579e4c8..631b04b5909cf 100644
--- a/clang/test/CodeGenOpenCL/convergent.cl
+++ b/clang/test/CodeGenOpenCL/convergent.cl
@@ -70,7 +70,8 @@ void test_merge_if(int a) {
 // CHECK-NOT: call spir_func void @g()
 // CHECK: br label %[[if_end]]
 // CHECK: [[if_end]]:
-// CHECK:  %[[tobool_not_pr:.+]] = phi i1 [ true, %{{.+}} ], [ false, %[[if_then]] ]
+// FIXME: SimplifyCFG is being stupid inserting this Phi. It is not supposed to be here.
+// CHECK:  %[[tobool_not_pr:.+]] = phi i1
 // CHECK:  tail call spir_func void @convfun() #[[attr4:.+]]
 // CHECK:  br i1 %[[tobool_not_pr]], label %[[if_end3:.+]], label %[[if_then2:.+]]
 // CHECK: [[if_then2]]:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
index 2b2f2e1b9470f..dfaad1b5f8c3a 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -1129,6 +1129,78 @@ Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
   return replaceInstUsesWith(FirstPhi, Undef);
 }
 
+static Value *SimplifyUsingControlFlow(InstCombiner &Self, PHINode &PN,
+                                       const DominatorTree &DT) {
+  // Simplify the following patterns:
+  //       if (cond)
+  //       /       \
+  //      ...      ...
+  //       \       /
+  //    phi [true] [false]
+  if (!PN.getType()->isIntegerTy(1))
+    return nullptr;
+
+  if (PN.getNumOperands() != 2)
+    return nullptr;
+
+  // Make sure all inputs are constants.
+  if (!all_of(PN.operands(), [](Value *V) { return isa<ConstantInt>(V); }))
+    return nullptr;
+
+  BasicBlock *BB = PN.getParent();
+  // Do not bother with unreachable instructions.
+  if (!DT.isReachableFromEntry(BB))
+    return nullptr;
+
+  // Same inputs.
+  if (PN.getOperand(0) == PN.getOperand(1))
+    return PN.getOperand(0);
+
+  BasicBlock *TruePred = nullptr, *FalsePred = nullptr;
+  for (auto *Pred : predecessors(BB)) {
+    auto *Input = cast<ConstantInt>(PN.getIncomingValueForBlock(Pred));
+    if (Input->isAllOnesValue())
+      TruePred = Pred;
+    else
+      FalsePred = Pred;
+  }
+  assert(TruePred && FalsePred && "Must be!");
+
+  // Check which edge of the dominator dominates the true input. If it is the
+  // false edge, we should invert the condition.
+  auto *IDom = DT.getNode(BB)->getIDom()->getBlock();
+  auto *BI = dyn_cast<BranchInst>(IDom->getTerminator());
+  if (!BI || BI->isUnconditional())
+    return nullptr;
+
+  // Check that edges outgoing from the idom's terminators dominate respective
+  // inputs of the Phi.
+  BasicBlockEdge TrueOutEdge(IDom, BI->getSuccessor(0));
+  BasicBlockEdge FalseOutEdge(IDom, BI->getSuccessor(1));
+
+  BasicBlockEdge TrueIncEdge(TruePred, BB);
+  BasicBlockEdge FalseIncEdge(FalsePred, BB);
+
+  auto *Cond = BI->getCondition();
+  if (DT.dominates(TrueOutEdge, TrueIncEdge) &&
+      DT.dominates(FalseOutEdge, FalseIncEdge))
+    // This Phi is actually equivalent to branching condition of IDom.
+    return Cond;
+  else if (DT.dominates(TrueOutEdge, FalseIncEdge) &&
+           DT.dominates(FalseOutEdge, TrueIncEdge)) {
+    // This Phi is actually opposite to branching condition of IDom. We invert
+    // the condition that will potentially open up some opportunities for
+    // sinking.
+    auto InsertPt = BB->getFirstInsertionPt();
+    if (InsertPt != BB->end()) {
+      Self.Builder.SetInsertPoint(&*InsertPt);
+      return Self.Builder.CreateNot(Cond);
+    }
+  }
+
+  return nullptr;
+}
+
 // PHINode simplification
 //
 Instruction *InstCombiner::visitPHINode(PHINode &PN) {
@@ -1276,5 +1348,9 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
     if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
       return Res;
 
+  // Ultimately, try to replace this Phi with a dominating condition.
+  if (auto *V = SimplifyUsingControlFlow(*this, PN, DT))
+    return replaceInstUsesWith(PN, V);
+
   return nullptr;
 }
diff --git a/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll b/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll
index 117464904ceb4..4147720dcb459 100644
--- a/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll
+++ b/llvm/test/Transforms/CallSiteSplitting/callsite-split.ll
@@ -74,8 +74,8 @@ declare void @dummy1(%struct.bitmap*, %struct.bitmap*, %struct.bitmap*, %struct.
 ;CHECK-LABEL: NextCond.split:
 ;CHECK: call void @dummy3()
 ;CheCK-LABEL: CallSiteBB:
-;CHECK: %phi.call = phi i1 [ true, %NextCond.split ], [ false, %Top.split ]
-;CHECK: call void @foo(i1 %phi.call)
+;CHECK: [[NEG:%.*]] = xor i1 %tobool1, true
+;CHECK: call void @foo(i1 [[NEG]])
 define void @caller2(i1 %c, %struct.bitmap* %a_elt, %struct.bitmap* %b_elt, %struct.bitmap* %c_elt) {
 entry:
   br label %Top
diff --git a/llvm/test/Transforms/InstCombine/branch.ll b/llvm/test/Transforms/InstCombine/branch.ll
index 4e271725d53aa..af87d238c9da0 100644
--- a/llvm/test/Transforms/InstCombine/branch.ll
+++ b/llvm/test/Transforms/InstCombine/branch.ll
@@ -32,7 +32,6 @@ patatino:
   ret i32 %x
 }
 
-; TODO: Simplify this to "ret cond".
 define i1 @test01(i1 %cond) {
 ; CHECK-LABEL: @test01(
 ; CHECK-NEXT:  entry:
@@ -42,15 +41,13 @@ define i1 @test01(i1 %cond) {
 ; CHECK:       if.false.1:
 ; CHECK-NEXT:    br label [[MERGE_1]]
 ; CHECK:       merge.1:
-; CHECK-NEXT:    [[MERGE_COND_1:%.*]] = phi i1 [ true, [[IF_TRUE_1]] ], [ false, [[IF_FALSE_1]] ]
-; CHECK-NEXT:    br i1 [[MERGE_COND_1]], label [[IF_TRUE_2:%.*]], label [[IF_FALSE_2:%.*]]
+; CHECK-NEXT:    br i1 [[COND]], label [[IF_TRUE_2:%.*]], label [[IF_FALSE_2:%.*]]
 ; CHECK:       if.true.2:
 ; CHECK-NEXT:    br label [[MERGE_2:%.*]]
 ; CHECK:       if.false.2:
 ; CHECK-NEXT:    br label [[MERGE_2]]
 ; CHECK:       merge.2:
-; CHECK-NEXT:    [[MERGE_COND_2:%.*]] = phi i1 [ true, [[IF_TRUE_2]] ], [ false, [[IF_FALSE_2]] ]
-; CHECK-NEXT:    ret i1 [[MERGE_COND_2]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true.1, label %if.false.1
@@ -76,7 +73,6 @@ merge.2:
   ret i1 %merge.cond.2
 }
 
-; TODO: Simplify this to "ret %cond".
 define i1 @test02(i1 %cond) {
 ; CHECK-LABEL: @test02(
 ; CHECK-NEXT:  entry:
@@ -86,15 +82,13 @@ define i1 @test02(i1 %cond) {
 ; CHECK:       if.false.1:
 ; CHECK-NEXT:    br label [[MERGE_1]]
 ; CHECK:       merge.1:
-; CHECK-NEXT:    [[MERGE_COND_1:%.*]] = phi i1 [ false, [[IF_TRUE_1]] ], [ true, [[IF_FALSE_1]] ]
-; CHECK-NEXT:    br i1 [[MERGE_COND_1]], label [[IF_TRUE_2:%.*]], label [[IF_FALSE_2:%.*]]
+; CHECK-NEXT:    br i1 [[COND]], label [[IF_FALSE_2:%.*]], label [[IF_TRUE_2:%.*]]
 ; CHECK:       if.true.2:
 ; CHECK-NEXT:    br label [[MERGE_2:%.*]]
 ; CHECK:       if.false.2:
 ; CHECK-NEXT:    br label [[MERGE_2]]
 ; CHECK:       merge.2:
-; CHECK-NEXT:    [[MERGE_COND_2:%.*]] = phi i1 [ false, [[IF_TRUE_2]] ], [ true, [[IF_FALSE_2]] ]
-; CHECK-NEXT:    ret i1 [[MERGE_COND_2]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true.1, label %if.false.1
diff --git a/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll b/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll
index 7d4b9294143fa..d87ed5e6192a2 100644
--- a/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll
+++ b/llvm/test/Transforms/InstCombine/icmp-constant-phi.ll
@@ -11,10 +11,10 @@ define i1 @test_eq(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[COMPARE:%.*]] = phi i1 [ true, [[IF_FALSE]] ], [ false, [[IF_TRUE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    ret i1 [[COMPARE]]
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT:    ret i1 [[TMP0]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -43,10 +43,9 @@ define i1 @test_slt(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[COMPARE:%.*]] = phi i1 [ false, [[IF_FALSE]] ], [ true, [[IF_TRUE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    ret i1 [[COMPARE]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -106,10 +105,9 @@ define i1 @test_ne(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[COMPARE:%.*]] = phi i1 [ false, [[IF_FALSE]] ], [ true, [[IF_TRUE]] ]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       exit:
-; CHECK-NEXT:    ret i1 [[COMPARE]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
diff --git a/llvm/test/Transforms/InstCombine/phi.ll b/llvm/test/Transforms/InstCombine/phi.ll
index d2e028534d87f..7c3f3102e0bcc 100644
--- a/llvm/test/Transforms/InstCombine/phi.ll
+++ b/llvm/test/Transforms/InstCombine/phi.ll
@@ -416,10 +416,11 @@ bb1:        ; preds = %entry
 
 bb2:        ; preds = %bb1, %entry
     %cond = phi i1 [ true, %bb1 ], [ false, %entry ]        ; <i1> [#uses=1]
-; CHECK-NOT: %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]
+; CHECK-NOT: phi i1
+; CHECK:     %res = phi i32 [ %0, %bb1 ], [ 0, %entry ]
+; CHECK:     ret i32 %res
     %val = phi i32 [ %0, %bb1 ], [ 0, %entry ]        ; <i32> [#uses=1]
     %res = select i1 %cond, i32 %val, i32 0        ; <i32> [#uses=1]
-; CHECK: ret i32 %cond
     ret i32 %res
 }
 
diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 8cd0e35139a8e..1f16f92d83a64 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -448,8 +448,8 @@ define i32 @test25(i1 %c)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[A]]
+; CHECK-NEXT:    [[B:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[B]]
 ;
 entry:
   br i1 %c, label %jump, label %ret
@@ -468,8 +468,8 @@ define i32 @test26(i1 %cond)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[A:%.*]] = phi i32 [ 20, [[ENTRY:%.*]] ], [ 10, [[JUMP]] ]
-; CHECK-NEXT:    ret i32 [[A]]
+; CHECK-NEXT:    [[B:%.*]] = phi i32 [ 10, [[JUMP]] ], [ 20, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[B]]
 ;
 entry:
   br i1 %cond, label %jump, label %ret
@@ -489,8 +489,8 @@ define i32 @test27(i1 %c, i32 %A, i32 %B)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[P]]
+; CHECK-NEXT:    [[S:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[S]]
 ;
 entry:
   br i1 %c, label %jump, label %ret
@@ -509,8 +509,8 @@ define i32 @test28(i1 %cond, i32 %A, i32 %B)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT:    ret i32 [[P]]
+; CHECK-NEXT:    [[S:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    ret i32 [[S]]
 ;
 entry:
   br i1 %cond, label %jump, label %ret
@@ -530,10 +530,10 @@ define i32 @test29(i1 %cond, i32 %A, i32 %B)  {
 ; CHECK:       jump:
 ; CHECK-NEXT:    br label [[RET]]
 ; CHECK:       ret:
-; CHECK-NEXT:    [[P:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[S:%.*]] = phi i32 [ [[A:%.*]], [[JUMP]] ], [ [[B:%.*]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    br label [[NEXT:%.*]]
 ; CHECK:       next:
-; CHECK-NEXT:    ret i32 [[P]]
+; CHECK-NEXT:    ret i32 [[S]]
 ;
 entry:
   br i1 %cond, label %jump, label %ret
diff --git a/llvm/test/Transforms/InstCombine/simple_phi_condition.ll b/llvm/test/Transforms/InstCombine/simple_phi_condition.ll
index cded618fba161..37f7fe77b7e3e 100644
--- a/llvm/test/Transforms/InstCombine/simple_phi_condition.ll
+++ b/llvm/test/Transforms/InstCombine/simple_phi_condition.ll
@@ -2,7 +2,6 @@
 ; RUN: opt -S < %s -instcombine | FileCheck %s
 ; RUN: opt -S < %s -passes=instcombine | FileCheck %s
 
-; TODO: Simplify to "ret cond".
 define i1 @test_direct_implication(i1 %cond) {
 ; CHECK-LABEL: @test_direct_implication(
 ; CHECK-NEXT:  entry:
@@ -12,8 +11,7 @@ define i1 @test_direct_implication(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ true, [[IF_TRUE]] ], [ false, [[IF_FALSE]] ]
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -29,7 +27,6 @@ merge:
   ret i1 %ret
 }
 
-; TODO: Simplify to "ret !cond".
 define i1 @test_inverted_implication(i1 %cond) {
 ; CHECK-LABEL: @test_inverted_implication(
 ; CHECK-NEXT:  entry:
@@ -39,8 +36,8 @@ define i1 @test_inverted_implication(i1 %cond) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ false, [[IF_TRUE]] ], [ true, [[IF_FALSE]] ]
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT:    ret i1 [[TMP0]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -56,7 +53,6 @@ merge:
   ret i1 %ret
 }
 
-; TODO: Simplify to "ret cond".
 define i1 @test_direct_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK-LABEL: @test_direct_implication_complex_cfg(
 ; CHECK-NEXT:  entry:
@@ -73,8 +69,7 @@ define i1 @test_direct_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ true, [[IF_TRUE_END]] ], [ false, [[IF_FALSE]] ]
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    ret i1 [[COND]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
@@ -99,7 +94,6 @@ merge:
   ret i1 %ret
 }
 
-; TODO: Simplify to "ret !cond".
 define i1 @test_inverted_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK-LABEL: @test_inverted_implication_complex_cfg(
 ; CHECK-NEXT:  entry:
@@ -116,8 +110,8 @@ define i1 @test_inverted_implication_complex_cfg(i1 %cond, i32 %cnt1) {
 ; CHECK:       if.false:
 ; CHECK-NEXT:    br label [[MERGE]]
 ; CHECK:       merge:
-; CHECK-NEXT:    [[RET:%.*]] = phi i1 [ false, [[IF_TRUE_END]] ], [ true, [[IF_FALSE]] ]
-; CHECK-NEXT:    ret i1 [[RET]]
+; CHECK-NEXT:    [[TMP0:%.*]] = xor i1 [[COND]], true
+; CHECK-NEXT:    ret i1 [[TMP0]]
 ;
 entry:
   br i1 %cond, label %if.true, label %if.false
diff --git a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
index 6b3ba66c951eb..0115c68342773 100644
--- a/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
+++ b/llvm/test/Transforms/PhaseOrdering/simplifycfg-options.ll
@@ -18,16 +18,16 @@ define i1 @PR33605(i32 %a, i32 %b, i32* %c) {
 ; ALL-NEXT:    call void @foo()
 ; ALL-NEXT:    br label [[IF_END]]
 ; ALL:       if.end:
-; ALL-NEXT:    [[CHANGED_1_OFF0:%.*]] = phi i1 [ true, [[IF_THEN]] ], [ false, [[ENTRY:%.*]] ]
-; ALL-NEXT:    [[TMP1:%.*]] = load i32, i32* [[C]], align 4
-; ALL-NEXT:    [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP1]]
+; ALL-NEXT:    [[TMP1:%.*]] = xor i1 [[CMP]], true
+; ALL-NEXT:    [[TMP2:%.*]] = load i32, i32* [[C]], align 4
+; ALL-NEXT:    [[CMP_1:%.*]] = icmp eq i32 [[OR]], [[TMP2]]
 ; ALL-NEXT:    br i1 [[CMP_1]], label [[IF_END_1:%.*]], label [[IF_THEN_1:%.*]]
 ; ALL:       if.then.1:
 ; ALL-NEXT:    store i32 [[OR]], i32* [[C]], align 4
 ; ALL-NEXT:    call void @foo()
 ; ALL-NEXT:    br label [[IF_END_1]]
 ; ALL:       if.end.1:
-; ALL-NEXT:    [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[CHANGED_1_OFF0]], [[IF_END]] ]
+; ALL-NEXT:    [[CHANGED_1_OFF0_1:%.*]] = phi i1 [ true, [[IF_THEN_1]] ], [ [[TMP1]], [[IF_END]] ]
 ; ALL-NEXT:    ret i1 [[CHANGED_1_OFF0_1]]
 ;
 entry:

From 5658002b80c105f715b8deb495b2d4443ddf9914 Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@amd.com>
Date: Thu, 16 Jul 2020 11:09:35 +0200
Subject: [PATCH 457/771] AMDGPU/GlobalISel: Select G_FREEZE

Select G_FREEZE in the same way that COPY is selected.

Differential Revision: https://reviews.llvm.org/D83031
---
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |   2 +
 .../Target/AMDGPU/AMDGPURegisterBankInfo.cpp  |  10 +-
 .../AMDGPU/GlobalISel/inst-select-freeze.mir  | 744 ++++++++++++++++++
 .../GlobalISel/regbankselect-freeze.mir       | 559 +++++++++++++
 4 files changed, 1313 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir
 create mode 100644 llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 2025c0fa5d21b..4fb9c053fe89d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -2807,6 +2807,8 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
     return selectG_PTR_ADD(I);
   case TargetOpcode::G_IMPLICIT_DEF:
     return selectG_IMPLICIT_DEF(I);
+  case TargetOpcode::G_FREEZE:
+    return selectCOPY(I);
   case TargetOpcode::G_INSERT:
     return selectG_INSERT(I);
   case TargetOpcode::G_INTRINSIC:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
index dfaf97bfb08e7..be4f3e1be0386 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp
@@ -3361,7 +3361,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
   const MachineFunction &MF = *MI.getParent()->getParent();
   const MachineRegisterInfo &MRI = MF.getRegInfo();
 
-  if (MI.isCopy()) {
+  if (MI.isCopy() || MI.getOpcode() == AMDGPU::G_FREEZE) {
     // The default logic bothers to analyze impossible alternative mappings. We
     // want the most straightforward mapping, so just directly handle this.
     const RegisterBank *DstBank = getRegBank(MI.getOperand(0).getReg(), MRI,
@@ -3377,9 +3377,15 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const {
       return getInvalidInstructionMapping();
 
     const ValueMapping &ValMap = getValueMapping(0, Size, *DstBank);
+    unsigned OpdsMappingSize = MI.isCopy() ? 1 : 2;
+    SmallVector<const ValueMapping *, 1> OpdsMapping(OpdsMappingSize);
+    OpdsMapping[0] = &ValMap;
+    if (MI.getOpcode() == AMDGPU::G_FREEZE)
+      OpdsMapping[1] = &ValMap;
+
     return getInstructionMapping(
         1, /*Cost*/ 1,
-        /*OperandsMapping*/ getOperandsMapping({&ValMap}), 1);
+        /*OperandsMapping*/ getOperandsMapping(OpdsMapping), OpdsMappingSize);
   }
 
   if (MI.isRegSequence()) {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir
new file mode 100644
index 0000000000000..1fd95b5b7947a
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-freeze.mir
@@ -0,0 +1,744 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=instruction-select %s -o - | FileCheck -check-prefix=GFX6 %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=instruction-select %s -o - |  FileCheck -check-prefix=GFX10 %s
+
+---
+name:            test_freeze_s1_vgpr_to_vgpr
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GFX6-LABEL: name: test_freeze_s1_vgpr_to_vgpr
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: $vgpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s1_vgpr_to_vgpr
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: $vgpr0 = COPY [[COPY]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s1) = G_TRUNC %0(s32)
+    %2:vgpr(s1) = G_FREEZE %1
+    %3:vgpr(s32) = G_ANYEXT %2(s1)
+    $vgpr0 = COPY %3(s32)
+
+...
+
+---
+name:            test_freeze_s1_vgpr_to_agpr
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GFX6-LABEL: name: test_freeze_s1_vgpr_to_agpr
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: $agpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s1_vgpr_to_agpr
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: $agpr0 = COPY [[COPY]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s1) = G_TRUNC %0(s32)
+    %2:vgpr(s1) = G_FREEZE %1
+    %3:vgpr(s32) = G_ANYEXT %2(s1)
+    $agpr0 = COPY %3(s32)
+
+...
+
+---
+name:            test_freeze_s1_vcc
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; GFX6-LABEL: name: test_freeze_s1_vcc
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX6: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX6: [[COPY2:%[0-9]+]]:sreg_64_xexec = COPY [[V_CMP_EQ_U32_e64_]]
+    ; GFX6: S_ENDPGM 0, implicit [[COPY2]]
+    ; GFX10-LABEL: name: test_freeze_s1_vcc
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
+    ; GFX10: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_32 = V_CMP_EQ_U32_e64 [[COPY]], [[COPY1]], implicit $exec
+    ; GFX10: [[COPY2:%[0-9]+]]:sreg_32_xm0_xexec = COPY [[V_CMP_EQ_U32_e64_]]
+    ; GFX10: S_ENDPGM 0, implicit [[COPY2]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = COPY $vgpr1
+    %2:vcc(s1) = G_ICMP intpred(eq), %0(s32), %1
+    %3:vcc(s1) = G_FREEZE %2
+    S_ENDPGM 0, implicit %3(s1)
+
+...
+
+---
+name:            test_freeze_s16_vgpr_to_vgpr
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GFX6-LABEL: name: test_freeze_s16_vgpr_to_vgpr
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: $vgpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s16_vgpr_to_vgpr
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: $vgpr0 = COPY [[COPY]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s16) = G_TRUNC %0(s32)
+    %2:vgpr(s16) = G_FREEZE %1
+    %3:vgpr(s32) = G_ANYEXT %2(s16)
+    $vgpr0 = COPY %3(s32)
+
+...
+
+---
+name:            test_freeze_s32_vgpr_to_vgpr
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GFX6-LABEL: name: test_freeze_s32_vgpr_to_vgpr
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: $vgpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s32_vgpr_to_vgpr
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: $vgpr0 = COPY [[COPY]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = G_FREEZE %0
+    $vgpr0 = COPY %1(s32)
+
+...
+
+---
+name:            test_freeze_s32_sgpr_to_sgpr
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_sgpr
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: $sgpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_sgpr
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10: $sgpr0 = COPY [[COPY]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = G_FREEZE %0
+    $sgpr0 = COPY %1(s32)
+
+...
+
+---
+name:            test_freeze_s32_sgpr_to_vgpr
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_vgpr
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: $vgpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_vgpr
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10: $vgpr0 = COPY [[COPY]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = G_FREEZE %0
+    $vgpr0 = COPY %1(s32)
+
+...
+
+---
+name:            test_freeze_s32_vgpr_to_agpr
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GFX6-LABEL: name: test_freeze_s32_vgpr_to_agpr
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: $agpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s32_vgpr_to_agpr
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: $agpr0 = COPY [[COPY]]
+    %0:vgpr(s32) = COPY $vgpr0
+    %1:vgpr(s32) = G_FREEZE %0
+    $agpr0 = COPY %1(s32)
+
+...
+
+---
+name:            test_freeze_s32_sgpr_to_agpr
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; GFX6-LABEL: name: test_freeze_s32_sgpr_to_agpr
+    ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX6: $agpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s32_sgpr_to_agpr
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
+    ; GFX10: $agpr0 = COPY [[COPY]]
+    %0:sgpr(s32) = COPY $sgpr0
+    %1:sgpr(s32) = G_FREEZE %0
+    $agpr0 = COPY %1(s32)
+
+...
+
+---
+name:            test_freeze_s32_agpr_to_vgpr
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $agpr0
+    ; GFX6-LABEL: name: test_freeze_s32_agpr_to_vgpr
+    ; GFX6: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0
+    ; GFX6: $vgpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s32_agpr_to_vgpr
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0
+    ; GFX10: $vgpr0 = COPY [[COPY]]
+    %0:agpr(s32) = COPY $agpr0
+    %1:agpr(s32) = G_FREEZE %0
+    $vgpr0 = COPY %1(s32)
+
+...
+
+---
+name:            test_freeze_s32_agpr_to_agpr
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $agpr0
+    ; GFX6-LABEL: name: test_freeze_s32_agpr_to_agpr
+    ; GFX6: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0
+    ; GFX6: $agpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s32_agpr_to_agpr
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:agpr_32 = COPY $agpr0
+    ; GFX10: $agpr0 = COPY [[COPY]]
+    %0:agpr(s32) = COPY $agpr0
+    %1:agpr(s32) = G_FREEZE %0
+    $agpr0 = COPY %1(s32)
+
+...
+
+---
+name:            test_freeze_s64
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; GFX6-LABEL: name: test_freeze_s64
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s64
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]]
+    %0:vgpr(s64) = COPY $vgpr0_vgpr1
+    %1:vgpr(s64) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(s64)
+
+...
+
+---
+name:            test_freeze_s128
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6-LABEL: name: test_freeze_s128
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s128
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]]
+    %0:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:vgpr(s128) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(s128)
+
+...
+
+---
+name:            test_freeze_256
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX6-LABEL: name: test_freeze_256
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_256
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]]
+    %0:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(s256) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(s256)
+
+...
+
+---
+name:            test_freeze_s512
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX6-LABEL: name: test_freeze_s512
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_s512
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]]
+    %0:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:vgpr(s512) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(s512)
+
+...
+
+---
+name:            test_freeze_v2s32
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; GFX6-LABEL: name: test_freeze_v2s32
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v2s32
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]]
+    %0:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:vgpr(<2 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(<2 x s32>)
+
+...
+
+---
+name:            test_freeze_v3s32
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+    ; GFX6-LABEL: name: test_freeze_v3s32
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v3s32
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]]
+    %0:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:vgpr(<3 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x s32>)
+
+...
+
+---
+name:            test_freeze_v4s32
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6-LABEL: name: test_freeze_v4s32
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v4s32
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]]
+    %0:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:vgpr(<4 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x s32>)
+
+...
+
+---
+name:            test_freeze_v5s32
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    ; GFX6-LABEL: name: test_freeze_v5s32
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v5s32
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_160 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[COPY]]
+    %0:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    %1:vgpr(<5 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x s32>)
+
+...
+
+---
+name:            test_freeze_v8s32
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX6-LABEL: name: test_freeze_v8s32
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v8s32
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_256 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[COPY]]
+    %0:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:vgpr(<8 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x s32>)
+
+...
+
+---
+name:            test_freeze_v16s32
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX6-LABEL: name: test_freeze_v16s32
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v16s32
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_512 = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[COPY]]
+    %0:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:vgpr(<16 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x s32>)
+
+...
+
+---
+name:            test_freeze_v2s16
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GFX6-LABEL: name: test_freeze_v2s16
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: $vgpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v2s16
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: $vgpr0 = COPY [[COPY]]
+    %0:vgpr(<2 x s16>) = COPY $vgpr0
+    %1:vgpr(<2 x s16>) = G_FREEZE %0
+    $vgpr0 = COPY %1(<2 x s16>)
+
+...
+
+---
+name:            test_freeze_v4s16
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; GFX6-LABEL: name: test_freeze_v4s16
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v4s16
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]]
+    %0:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:vgpr(<4 x s16>) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(<4 x s16>)
+
+...
+
+---
+name:            test_freeze_v6s16
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+    ; GFX6-LABEL: name: test_freeze_v6s16
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX6: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v6s16
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_96 = COPY $vgpr0_vgpr1_vgpr2
+    ; GFX10: $vgpr0_vgpr1_vgpr2 = COPY [[COPY]]
+    %0:vgpr(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:vgpr(<6 x s16>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x s16>)
+
+...
+
+---
+name:            test_freeze_v8s16
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6-LABEL: name: test_freeze_v8s16
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v8s16
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]]
+    %0:vgpr(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:vgpr(<8 x s16>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x s16>)
+
+...
+
+---
+name:            test_freeze_v2s64
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6-LABEL: name: test_freeze_v2s64
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_v2s64
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_128 = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX10: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[COPY]]
+    %0:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:vgpr(<2 x s64>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x s64>)
+
+...
+
+---
+name:            test_freeze_p0
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; GFX6-LABEL: name: test_freeze_p0
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_p0
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]]
+    %0:vgpr(p0) = COPY $vgpr0_vgpr1
+    %1:vgpr(p0) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(p0)
+
+...
+
+---
+name:            test_freeze_p1
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; GFX6-LABEL: name: test_freeze_p1
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_p1
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]]
+    %0:vgpr(p1) = COPY $vgpr0_vgpr1
+    %1:vgpr(p1) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(p1)
+
+...
+
+---
+name:            test_freeze_p2
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GFX6-LABEL: name: test_freeze_p2
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: $vgpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_p2
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: $vgpr0 = COPY [[COPY]]
+    %0:vgpr(p2) = COPY $vgpr0
+    %1:vgpr(p2) = G_FREEZE %0
+    $vgpr0 = COPY %1(p2)
+
+...
+
+---
+name:            test_freeze_p3
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GFX6-LABEL: name: test_freeze_p3
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: $vgpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_p3
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: $vgpr0 = COPY [[COPY]]
+    %0:vgpr(p3) = COPY $vgpr0
+    %1:vgpr(p3) = G_FREEZE %0
+    $vgpr0 = COPY %1(p3)
+
+...
+
+---
+name:            test_freeze_p4
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; GFX6-LABEL: name: test_freeze_p4
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_p4
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]]
+    %0:vgpr(p4) = COPY $vgpr0_vgpr1
+    %1:vgpr(p4) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(p4)
+
+...
+
+---
+name:            test_freeze_p5
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; GFX6-LABEL: name: test_freeze_p5
+    ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX6: $vgpr0 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_p5
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
+    ; GFX10: $vgpr0 = COPY [[COPY]]
+    %0:vgpr(p5) = COPY $vgpr0
+    %1:vgpr(p5) = G_FREEZE %0
+    $vgpr0 = COPY %1(p5)
+
+...
+
+---
+name:            test_freeze_p999
+alignment:       1
+legalized:       true
+regBankSelected: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; GFX6-LABEL: name: test_freeze_p999
+    ; GFX6: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX6: $vgpr0_vgpr1 = COPY [[COPY]]
+    ; GFX10-LABEL: name: test_freeze_p999
+    ; GFX10: $vcc_hi = IMPLICIT_DEF
+    ; GFX10: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+    ; GFX10: $vgpr0_vgpr1 = COPY [[COPY]]
+    %0:vgpr(p999) = COPY $vgpr0_vgpr1
+    %1:vgpr(p999) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(p999)
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir
new file mode 100644
index 0000000000000..83067f1e1c866
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/regbankselect-freeze.mir
@@ -0,0 +1,559 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=tahiti -run-pass=regbankselect %s -o - | FileCheck %s
+# RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji -run-pass=regbankselect %s -o - | FileCheck %s
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=regbankselect %s -o - | FileCheck %s
+
+---
+name: test_freeze_s1_vgpr_to_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_s1_vgpr_to_vgpr
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s1) = G_FREEZE [[TRUNC]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FREEZE]](s1)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s1) = G_TRUNC %0(s32)
+    %2:_(s1) = G_FREEZE %1
+    %3:_(s32) = G_ANYEXT %2(s1)
+    $vgpr0 = COPY %3(s32)
+
+...
+
+---
+name: test_freeze_s1_vgpr_to_agpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_s1_vgpr_to_agpr
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s1) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s1) = G_FREEZE [[TRUNC]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FREEZE]](s1)
+    ; CHECK: $agpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s1) = G_TRUNC %0(s32)
+    %2:_(s1) = G_FREEZE %1
+    %3:_(s32) = G_ANYEXT %2(s1)
+    $agpr0 = COPY %3(s32)
+
+...
+
+---
+name: test_freeze_s1_vcc
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0, $vgpr1
+    ; CHECK-LABEL: name: test_freeze_s1_vcc
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
+    ; CHECK: [[ICMP:%[0-9]+]]:vcc(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[COPY1]]
+    ; CHECK: [[FREEZE:%[0-9]+]]:vcc(s1) = G_FREEZE [[ICMP]]
+    ; CHECK: S_ENDPGM 0, implicit [[FREEZE]](s1)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = COPY $vgpr1
+    %2:_(s1) = G_ICMP intpred(eq), %0(s32), %1
+    %3:_(s1) = G_FREEZE %2
+    S_ENDPGM 0, implicit %3
+
+...
+
+---
+name: test_freeze_s16_vgpr_to_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_s16_vgpr_to_vgpr
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[TRUNC:%[0-9]+]]:vgpr(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s16) = G_FREEZE [[TRUNC]]
+    ; CHECK: [[ANYEXT:%[0-9]+]]:vgpr(s32) = G_ANYEXT [[FREEZE]](s16)
+    ; CHECK: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s16) = G_TRUNC %0(s32)
+    %2:_(s16) = G_FREEZE %1
+    %3:_(s32) = G_ANYEXT %2(s16)
+    $vgpr0 = COPY %3(s32)
+
+...
+
+---
+name: test_freeze_s32_vgpr_to_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_s32_vgpr_to_vgpr
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FREEZE %0
+    $vgpr0 = COPY %1(s32)
+
+...
+
+---
+name: test_freeze_s32_sgpr_to_sgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; CHECK-LABEL: name: test_freeze_s32_sgpr_to_sgpr
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:sgpr(s32) = G_FREEZE [[COPY]]
+    ; CHECK: $sgpr0 = COPY [[FREEZE]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = G_FREEZE %0
+    $sgpr0 = COPY %1(s32)
+
+...
+
+---
+name: test_freeze_s32_sgpr_to_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; CHECK-LABEL: name: test_freeze_s32_sgpr_to_vgpr
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:sgpr(s32) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = G_FREEZE %0
+    $vgpr0 = COPY %1(s32)
+
+...
+
+---
+name: test_freeze_s32_vgpr_to_agpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_s32_vgpr_to_agpr
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s32) = G_FREEZE [[COPY]]
+    ; CHECK: $agpr0 = COPY [[FREEZE]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FREEZE %0
+    $agpr0 = COPY %1(s32)
+
+...
+
+---
+name: test_freeze_s32_sgpr_to_agpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $sgpr0
+    ; CHECK-LABEL: name: test_freeze_s32_sgpr_to_agpr
+    ; CHECK: [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:sgpr(s32) = G_FREEZE [[COPY]]
+    ; CHECK: $agpr0 = COPY [[FREEZE]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = G_FREEZE %0
+    $agpr0 = COPY %1(s32)
+
+...
+
+---
+name: test_freeze_s32_agpr_to_vgpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $agpr0
+    ; CHECK-LABEL: name: test_freeze_s32_agpr_to_vgpr
+    ; CHECK: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:agpr(s32) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](s32)
+    %0:_(s32) = COPY $agpr0
+    %1:_(s32) = G_FREEZE %0
+    $vgpr0 = COPY %1(s32)
+
+...
+
+---
+name: test_freeze_s32_agpr_to_agpr
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $agpr0
+    ; CHECK-LABEL: name: test_freeze_s32_agpr_to_agpr
+    ; CHECK: [[COPY:%[0-9]+]]:agpr(s32) = COPY $agpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:agpr(s32) = G_FREEZE [[COPY]]
+    ; CHECK: $agpr0 = COPY [[FREEZE]](s32)
+    %0:_(s32) = COPY $agpr0
+    %1:_(s32) = G_FREEZE %0
+    $agpr0 = COPY %1(s32)
+
+...
+
+---
+name: test_freeze_s64
+legalized: true
+
+body:  |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: test_freeze_s64
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s64) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s64) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](s64)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s64) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(s64)
+...
+
+---
+name: test_freeze_s128
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-LABEL: name: test_freeze_s128
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s128) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](s128)
+    %0:_(s128) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(s128) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(s128)
+...
+
+---
+name: test_freeze_256
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-LABEL: name: test_freeze_256
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s256) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](s256)
+    %0:_(s256) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(s256) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(s256)
+...
+
+---
+name: test_freeze_s512
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK-LABEL: name: test_freeze_s512
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(s512) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](s512)
+    %0:_(s512) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(s512) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(s512)
+...
+
+---
+name: test_freeze_v2s32
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: test_freeze_v2s32
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s32>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<2 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](<2 x s32>)
+    %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
+    %1:_(<2 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(<2 x s32>)
+...
+
+---
+name: test_freeze_v3s32
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+    ; CHECK-LABEL: name: test_freeze_v3s32
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<3 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<3 x s32>)
+    %0:_(<3 x s32>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<3 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2 = COPY %1(<3 x s32>)
+...
+
+---
+name: test_freeze_v4s32
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-LABEL: name: test_freeze_v4s32
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<4 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<4 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<4 x s32>)
+...
+
+---
+name: test_freeze_v5s32
+legalized: true
+
+body:  |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    ; CHECK-LABEL: name: test_freeze_v5s32
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<5 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY [[FREEZE]](<5 x s32>)
+    %0:_(<5 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
+    %1:_(<5 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = COPY %1(<5 x s32>)
+...
+
+---
+name: test_freeze_v8s32
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK-LABEL: name: test_freeze_v8s32
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<8 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY [[FREEZE]](<8 x s32>)
+    %0:_(<8 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    %1:_(<8 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY %1(<8 x s32>)
+...
+
+---
+name: test_freeze_v16s32
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK-LABEL: name: test_freeze_v16s32
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<16 x s32>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY [[FREEZE]](<16 x s32>)
+    %0:_(<16 x s32>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
+    %1:_(<16 x s32>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = COPY %1(<16 x s32>)
+...
+
+---
+name: test_freeze_v2s16
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_v2s16
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s16>) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<2 x s16>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](<2 x s16>)
+    %0:_(<2 x s16>) = COPY $vgpr0
+    %1:_(<2 x s16>) = G_FREEZE %0
+    $vgpr0 = COPY %1(<2 x s16>)
+...
+
+---
+name: test_freeze_v4s16
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: test_freeze_v4s16
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<4 x s16>) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<4 x s16>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](<4 x s16>)
+    %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
+    %1:_(<4 x s16>) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(<4 x s16>)
+...
+
+---
+name: test_freeze_v6s16
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+    ; CHECK-LABEL: name: test_freeze_v6s16
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<6 x s16>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2 = COPY [[FREEZE]](<6 x s16>)
+    %0:_(<6 x s16>) = COPY $vgpr0_vgpr1_vgpr2
+    %1:_(<6 x s16>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2 = COPY %1(<6 x s16>)
+...
+
+---
+name: test_freeze_v8s16
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-LABEL: name: test_freeze_v8s16
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<8 x s16>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<8 x s16>)
+    %0:_(<8 x s16>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<8 x s16>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<8 x s16>)
+...
+
+---
+name: test_freeze_v2s64
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK-LABEL: name: test_freeze_v2s64
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(<2 x s64>) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1_vgpr2_vgpr3 = COPY [[FREEZE]](<2 x s64>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x s64>) = G_FREEZE %0
+    $vgpr0_vgpr1_vgpr2_vgpr3 = COPY %1(<2 x s64>)
+...
+
+---
+name: test_freeze_p0
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: test_freeze_p0
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p0) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p0) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p0)
+    %0:_(p0) = COPY $vgpr0_vgpr1
+    %1:_(p0) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(p0)
+...
+
+---
+name: test_freeze_p1
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: test_freeze_p1
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p1) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p1) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p1)
+    %0:_(p1) = COPY $vgpr0_vgpr1
+    %1:_(p1) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(p1)
+...
+
+---
+name: test_freeze_p2
+legalized: true
+
+body:  |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_p2
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p2) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p2) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](p2)
+    %0:_(p2) = COPY $vgpr0
+    %1:_(p2) = G_FREEZE %0
+    $vgpr0 = COPY %1(p2)
+...
+
+---
+name: test_freeze_p3
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_p3
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p3) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p3) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](p3)
+    %0:_(p3) = COPY $vgpr0
+    %1:_(p3) = G_FREEZE %0
+    $vgpr0 = COPY %1(p3)
+...
+
+---
+name: test_freeze_p4
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: test_freeze_p4
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p4) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p4) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p4)
+    %0:_(p4) = COPY $vgpr0_vgpr1
+    %1:_(p4) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(p4)
+...
+
+---
+name: test_freeze_p5
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0
+    ; CHECK-LABEL: name: test_freeze_p5
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p5) = COPY $vgpr0
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p5) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0 = COPY [[FREEZE]](p5)
+    %0:_(p5) = COPY $vgpr0
+    %1:_(p5) = G_FREEZE %0
+    $vgpr0 = COPY %1(p5)
+...
+
+---
+name: test_freeze_p999
+legalized: true
+
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; CHECK-LABEL: name: test_freeze_p999
+    ; CHECK: [[COPY:%[0-9]+]]:vgpr(p999) = COPY $vgpr0_vgpr1
+    ; CHECK: [[FREEZE:%[0-9]+]]:vgpr(p999) = G_FREEZE [[COPY]]
+    ; CHECK: $vgpr0_vgpr1 = COPY [[FREEZE]](p999)
+    %0:_(p999) = COPY $vgpr0_vgpr1
+    %1:_(p999) = G_FREEZE %0
+    $vgpr0_vgpr1 = COPY %1(p999)
+...

From 1c93671e594d075cb0008dc4f33d863611af9ab9 Mon Sep 17 00:00:00 2001
From: David Truby <david.truby@arm.com>
Date: Tue, 14 Jul 2020 15:04:38 +0100
Subject: [PATCH 458/771] [flang] Fix shared library builds for lib/Lower.

Summary:
This adds missing definitions for functions in the Lower directory
that were causing failures in shared library builds.
The definitions for these are taken from the fir-dev branch on github.

Reviewers: sscalpone, schweitz, jeanPerier, klausler

Reviewed By: schweitz

Subscribers: mgorny, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83771
---
 flang/lib/Lower/CMakeLists.txt         |  1 +
 flang/lib/Lower/ConvertExpr.cpp        | 95 ++++++++++++++++++++++++++
 flang/lib/Optimizer/Dialect/FIROps.cpp | 21 ++++--
 3 files changed, 113 insertions(+), 4 deletions(-)
 create mode 100644 flang/lib/Lower/ConvertExpr.cpp

diff --git a/flang/lib/Lower/CMakeLists.txt b/flang/lib/Lower/CMakeLists.txt
index 3cd71c007a00a..975065c9ed7de 100644
--- a/flang/lib/Lower/CMakeLists.txt
+++ b/flang/lib/Lower/CMakeLists.txt
@@ -7,6 +7,7 @@ add_flang_library(FortranLower
   Coarray.cpp
   ComplexExpr.cpp
   ConvertType.cpp
+  ConvertExpr.cpp
   DoLoopHelper.cpp
   FIRBuilder.cpp
   IntrinsicCall.cpp
diff --git a/flang/lib/Lower/ConvertExpr.cpp b/flang/lib/Lower/ConvertExpr.cpp
new file mode 100644
index 0000000000000..1bac6884a5f7e
--- /dev/null
+++ b/flang/lib/Lower/ConvertExpr.cpp
@@ -0,0 +1,95 @@
+//===-- ConvertExpr.cpp ---------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "flang/Common/idioms.h"
+#include "flang/Lower/IntrinsicCall.h"
+#include "flang/Lower/Support/BoxValue.h"
+
+mlir::Value fir::getBase(const fir::ExtendedValue &ex) {
+  return std::visit(Fortran::common::visitors{
+                        [](const fir::UnboxedValue &x) { return x; },
+                        [](const auto &x) { return x.getAddr(); },
+                    },
+                    ex.box);
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::CharBoxValue &box) {
+  os << "boxchar { addr: " << box.getAddr() << ", len: " << box.getLen()
+     << " }";
+  return os;
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::ArrayBoxValue &box) {
+  os << "boxarray { addr: " << box.getAddr();
+  if (box.getLBounds().size()) {
+    os << ", lbounds: [";
+    llvm::interleaveComma(box.getLBounds(), os);
+    os << "]";
+  } else {
+    os << ", lbounds: all-ones";
+  }
+  os << ", shape: [";
+  llvm::interleaveComma(box.getExtents(), os);
+  os << "]}";
+  return os;
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::CharArrayBoxValue &box) {
+  os << "boxchararray { addr: " << box.getAddr() << ", len : " << box.getLen();
+  if (box.getLBounds().size()) {
+    os << ", lbounds: [";
+    llvm::interleaveComma(box.getLBounds(), os);
+    os << "]";
+  } else {
+    os << " lbounds: all-ones";
+  }
+  os << ", shape: [";
+  llvm::interleaveComma(box.getExtents(), os);
+  os << "]}";
+  return os;
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::BoxValue &box) {
+  os << "box { addr: " << box.getAddr();
+  if (box.getLen())
+    os << ", size: " << box.getLen();
+  if (box.params.size()) {
+    os << ", type params: [";
+    llvm::interleaveComma(box.params, os);
+    os << "]";
+  }
+  if (box.getLBounds().size()) {
+    os << ", lbounds: [";
+    llvm::interleaveComma(box.getLBounds(), os);
+    os << "]";
+  }
+  if (box.getExtents().size()) {
+    os << ", shape: [";
+    llvm::interleaveComma(box.getExtents(), os);
+    os << "]";
+  }
+  os << "}";
+  return os;
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::ProcBoxValue &box) {
+  os << "boxproc: { addr: " << box.getAddr() << ", context: " << box.hostContext
+     << "}";
+  return os;
+}
+
+llvm::raw_ostream &fir::operator<<(llvm::raw_ostream &os,
+                                   const fir::ExtendedValue &ex) {
+  std::visit([&](const auto &value) { os << value; }, ex.box);
+  return os;
+}
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 30cd365f139bc..44310d6e06914 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -1395,15 +1395,28 @@ mlir::OpFoldResult fir::SubfOp::fold(llvm::ArrayRef<mlir::Attribute> opnds) {
 //===----------------------------------------------------------------------===//
 // WhereOp
 //===----------------------------------------------------------------------===//
-
 void fir::WhereOp::build(mlir::OpBuilder &builder, OperationState &result,
                          mlir::Value cond, bool withElseRegion) {
+  build(builder, result, llvm::None, cond, withElseRegion);
+}
+
+void fir::WhereOp::build(mlir::OpBuilder &builder, OperationState &result,
+                         mlir::TypeRange resultTypes, mlir::Value cond,
+                         bool withElseRegion) {
   result.addOperands(cond);
+  result.addTypes(resultTypes);
+
   mlir::Region *thenRegion = result.addRegion();
+  thenRegion->push_back(new mlir::Block());
+  if (resultTypes.empty())
+    WhereOp::ensureTerminator(*thenRegion, builder, result.location);
+
   mlir::Region *elseRegion = result.addRegion();
-  WhereOp::ensureTerminator(*thenRegion, builder, result.location);
-  if (withElseRegion)
-    WhereOp::ensureTerminator(*elseRegion, builder, result.location);
+  if (withElseRegion) {
+    elseRegion->push_back(new mlir::Block());
+    if (resultTypes.empty())
+      WhereOp::ensureTerminator(*elseRegion, builder, result.location);
+  }
 }
 
 static mlir::ParseResult parseWhereOp(OpAsmParser &parser,

From 989ee11df6814465714f6292424f9e10668c7755 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Thu, 16 Jul 2020 16:21:01 +0700
Subject: [PATCH 459/771] [Test] Add test that shows how SimplifyCFG may insert
 redunant Phi

It happens when a block cannot be threaded because of a convergent function.
---
 .../test/Transforms/SimplifyCFG/convergent.ll | 44 +++++++++++++++++++
 1 file changed, 44 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/convergent.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/convergent.ll b/llvm/test/Transforms/SimplifyCFG/convergent.ll
new file mode 100644
index 0000000000000..be8e8482e94b3
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/convergent.ll
@@ -0,0 +1,44 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+; RUN: opt -S -passes=simplify-cfg < %s | FileCheck %s
+
+declare void @foo() convergent
+
+; FIXME: We should not be inserting a PR Phi here.
+define i32 @test_01(i32 %a) {
+; CHECK-LABEL: @test_01(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[A:%.*]], 0
+; CHECK-NEXT:    br i1 [[COND]], label [[MERGE:%.*]], label [[IF_FALSE:%.*]]
+; CHECK:       if.false:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[MERGE]]
+; CHECK:       merge:
+; CHECK-NEXT:    [[COND_PR:%.*]] = phi i1 [ [[COND]], [[IF_FALSE]] ], [ true, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br i1 [[COND_PR]], label [[EXIT:%.*]], label [[IF_FALSE_2:%.*]]
+; CHECK:       if.false.2:
+; CHECK-NEXT:    call void @foo()
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 [[A]]
+;
+entry:
+  %cond = icmp eq i32 %a, 0
+  br i1 %cond, label %merge, label %if.false
+
+if.false:
+  call void @foo()
+  br label %merge
+
+merge:
+  call void @foo()
+  br i1 %cond, label %exit, label %if.false.2
+
+if.false.2:
+  call void @foo()
+  br label %exit
+
+exit:
+  ret i32 %a
+}

From cbe0e539e79eaa30d7d0d6f39b9ea4e45f923141 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 16 Jul 2020 09:55:34 +0100
Subject: [PATCH 460/771] [Matrix] Also run lowering during -O0.

Currently the backends cannot lower the matrix intrinsics directly and
rely on the lowering to vector instructions happening in the middle-end.
At the moment, this means the backend crashes when matrix types
extension code is compiled with -O0, e.g.
http://green.lab.llvm.org/green/job/test-suite-verify-machineinstrs-aarch64-O0-g/7902/

This patch enables also runs the lowering with -O0 in the middle-end as
a temporary solution. Long term, a lightweight version of the lowering
should run in the backend, on demand.
---
 llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index d73d42c52074b..24814370f57aa 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -294,6 +294,13 @@ void PassManagerBuilder::populateFunctionPassManager(
   if (LibraryInfo)
     FPM.add(new TargetLibraryInfoWrapperPass(*LibraryInfo));
 
+  // The backends do not handle matrix intrinsics currently.
+  // Make sure they are also lowered in O0.
+  // FIXME: A lightweight version of the pass should run in the backend
+  //        pipeline on demand.
+  if (EnableMatrix)
+    FPM.add(createLowerMatrixIntrinsicsPass());
+
   if (OptLevel == 0) return;
 
   addInitialAliasAnalysisPasses(FPM);

From 2762da0a16a763654254e3320a3f46be2bb742b4 Mon Sep 17 00:00:00 2001
From: Kerry McLaughlin <kerry.mclaughlin@arm.com>
Date: Thu, 16 Jul 2020 10:12:41 +0100
Subject: [PATCH 461/771] [SVE][CodeGen] Legalisation of masked loads and
 stores

Summary:
This patch modifies IncrementMemoryAddress to use a vscale
when calculating the new address if the data type is scalable.

Also adds tablegen patterns which match an extract_subvector
of a legal predicate type with zip1/zip2 instructions

Reviewers: sdesmalen, efriedma, david-arm

Reviewed By: efriedma, david-arm

Subscribers: tschuett, hiraditya, psnobl, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83137
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |  7 ++
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 14 +++
 llvm/test/CodeGen/AArch64/sve-split-load.ll   | 85 ++++++++++++++++++-
 llvm/test/CodeGen/AArch64/sve-split-store.ll  | 81 ++++++++++++++++++
 4 files changed, 184 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 96df20039b15d..3c989a933c48e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7153,6 +7153,9 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
   assert(DataVT.getVectorNumElements() == MaskVT.getVectorNumElements() &&
          "Incompatible types of Data and Mask");
   if (IsCompressedMemory) {
+    if (DataVT.isScalableVector())
+      report_fatal_error(
+          "Cannot currently handle compressed memory with scalable vectors");
     // Incrementing the pointer according to number of '1's in the mask.
     EVT MaskIntVT = EVT::getIntegerVT(*DAG.getContext(), MaskVT.getSizeInBits());
     SDValue MaskInIntReg = DAG.getBitcast(MaskIntVT, Mask);
@@ -7168,6 +7171,10 @@ TargetLowering::IncrementMemoryAddress(SDValue Addr, SDValue Mask,
     SDValue Scale = DAG.getConstant(DataVT.getScalarSizeInBits() / 8, DL,
                                     AddrVT);
     Increment = DAG.getNode(ISD::MUL, DL, AddrVT, Increment, Scale);
+  } else if (DataVT.isScalableVector()) {
+    Increment = DAG.getVScale(DL, AddrVT,
+                              APInt(AddrVT.getSizeInBits().getFixedSize(),
+                                    DataVT.getStoreSize().getKnownMinSize()));
   } else
     Increment = DAG.getConstant(DataVT.getStoreSize(), DL, AddrVT);
 
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 28a54e6f7d79f..5b1990e492623 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -1109,6 +1109,20 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
   defm TRN1_PPP : sve_int_perm_bin_perm_pp<0b100, "trn1", AArch64trn1>;
   defm TRN2_PPP : sve_int_perm_bin_perm_pp<0b101, "trn2", AArch64trn2>;
 
+  // Extract lo/hi halves of legal predicate types.
+  def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 0))),
+            (ZIP1_PPP_S PPR:$Ps, (PFALSE))>;
+  def : Pat<(nxv2i1 (extract_subvector (nxv4i1 PPR:$Ps), (i64 2))),
+            (ZIP2_PPP_S PPR:$Ps, (PFALSE))>;
+  def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 0))),
+            (ZIP1_PPP_H PPR:$Ps, (PFALSE))>;
+  def : Pat<(nxv4i1 (extract_subvector (nxv8i1 PPR:$Ps), (i64 4))),
+            (ZIP2_PPP_H PPR:$Ps, (PFALSE))>;
+  def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 0))),
+            (ZIP1_PPP_B PPR:$Ps, (PFALSE))>;
+  def : Pat<(nxv8i1 (extract_subvector (nxv16i1 PPR:$Ps), (i64 8))),
+            (ZIP2_PPP_B PPR:$Ps, (PFALSE))>;
+
   defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
   defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
   defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
diff --git a/llvm/test/CodeGen/AArch64/sve-split-load.ll b/llvm/test/CodeGen/AArch64/sve-split-load.ll
index a76b27e635574..5e64ff9812860 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-load.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-load.ll
@@ -1,10 +1,10 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
-; LOAD
+; UNPREDICATED
 
-define <vscale x 4 x i16> @load_promote_4i8(<vscale x 4 x i16>* %a) {
-; CHECK-LABEL: load_promote_4i8:
+define <vscale x 4 x i16> @load_promote_4i16(<vscale x 4 x i16>* %a) {
+; CHECK-LABEL: load_promote_4i16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    ld1h { z0.s }, p0/z, [x0]
@@ -53,3 +53,82 @@ define <vscale x 16 x i64> @load_split_16i64(<vscale x 16 x i64>* %a) {
   %load = load <vscale x 16 x i64>, <vscale x 16 x i64>* %a
   ret <vscale x 16 x i64> %load
 }
+
+; MASKED
+
+define <vscale x 2 x i32> @masked_load_promote_2i32(<vscale x 2 x i32> *%a, <vscale x 2 x i1> %pg) {
+; CHECK-LABEL: masked_load_promote_2i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1sw { z0.d }, p0/z, [x0]
+; CHECK-NEXT:    ret
+  %load = call <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32> *%a, i32 1, <vscale x 2 x i1> %pg, <vscale x 2 x i32> undef)
+  ret <vscale x 2 x i32> %load
+}
+
+define <vscale x 32 x i8> @masked_load_split_32i8(<vscale x 32 x i8> *%a, <vscale x 32 x i1> %pg) {
+; CHECK-LABEL: masked_load_split_32i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ld1b { z0.b }, p0/z, [x0]
+; CHECK-NEXT:    ld1b { z1.b }, p1/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
+  %load = call <vscale x 32 x i8> @llvm.masked.load.nxv32i8(<vscale x 32 x i8> *%a, i32 1, <vscale x 32 x i1> %pg, <vscale x 32 x i8> undef)
+  ret <vscale x 32 x i8> %load
+}
+
+define <vscale x 32 x i16> @masked_load_split_32i16(<vscale x 32 x i16> *%a, <vscale x 32 x i1> %pg) {
+; CHECK-LABEL: masked_load_split_32i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pfalse p2.b
+; CHECK-NEXT:    zip1 p3.b, p0.b, p2.b
+; CHECK-NEXT:    zip2 p0.b, p0.b, p2.b
+; CHECK-NEXT:    ld1h { z0.h }, p3/z, [x0]
+; CHECK-NEXT:    zip1 p3.b, p1.b, p2.b
+; CHECK-NEXT:    ld1h { z1.h }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    zip2 p0.b, p1.b, p2.b
+; CHECK-NEXT:    ld1h { z2.h }, p3/z, [x0, #2, mul vl]
+; CHECK-NEXT:    ld1h { z3.h }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT:    ret
+  %load = call <vscale x 32 x i16> @llvm.masked.load.nxv32i16(<vscale x 32 x i16> *%a, i32 1, <vscale x 32 x i1> %pg, <vscale x 32 x i16> undef)
+  ret <vscale x 32 x i16> %load
+}
+
+define <vscale x 8 x i32> @masked_load_split_8i32(<vscale x 8 x i32> *%a, <vscale x 8 x i1> %pg) {
+; CHECK-LABEL: masked_load_split_8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pfalse p1.b
+; CHECK-NEXT:    zip1 p2.h, p0.h, p1.h
+; CHECK-NEXT:    zip2 p0.h, p0.h, p1.h
+; CHECK-NEXT:    ld1w { z0.s }, p2/z, [x0]
+; CHECK-NEXT:    ld1w { z1.s }, p0/z, [x0, #1, mul vl]
+; CHECK-NEXT:    ret
+  %load = call <vscale x 8 x i32> @llvm.masked.load.nxv8i32(<vscale x 8 x i32> *%a, i32 1, <vscale x 8 x i1> %pg, <vscale x 8 x i32> undef)
+  ret <vscale x 8 x i32> %load
+}
+
+define <vscale x 8 x i64> @masked_load_split_8i64(<vscale x 8 x i64> *%a, <vscale x 8 x i1> %pg) {
+; CHECK-LABEL: masked_load_split_8i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pfalse p1.b
+; CHECK-NEXT:    zip1 p2.h, p0.h, p1.h
+; CHECK-NEXT:    zip2 p0.h, p0.h, p1.h
+; CHECK-NEXT:    zip1 p3.s, p2.s, p1.s
+; CHECK-NEXT:    zip2 p2.s, p2.s, p1.s
+; CHECK-NEXT:    ld1d { z0.d }, p3/z, [x0]
+; CHECK-NEXT:    ld1d { z1.d }, p2/z, [x0, #1, mul vl]
+; CHECK-NEXT:    zip1 p2.s, p0.s, p1.s
+; CHECK-NEXT:    zip2 p0.s, p0.s, p1.s
+; CHECK-NEXT:    ld1d { z2.d }, p2/z, [x0, #2, mul vl]
+; CHECK-NEXT:    ld1d { z3.d }, p0/z, [x0, #3, mul vl]
+; CHECK-NEXT:    ret
+  %load = call <vscale x 8 x i64> @llvm.masked.load.nxv8i64(<vscale x 8 x i64> *%a, i32 1, <vscale x 8 x i1> %pg, <vscale x 8 x i64> undef)
+  ret <vscale x 8 x i64> %load
+}
+
+declare <vscale x 32 x i8> @llvm.masked.load.nxv32i8(<vscale x 32 x i8>*, i32, <vscale x 32 x i1>, <vscale x 32 x i8>)
+
+declare <vscale x 32 x i16> @llvm.masked.load.nxv32i16(<vscale x 32 x i16>*, i32, <vscale x 32 x i1>, <vscale x 32 x i16>)
+
+declare <vscale x 2 x i32> @llvm.masked.load.nxv2i32(<vscale x 2 x i32>*, i32, <vscale x 2 x i1>, <vscale x 2 x i32>)
+declare <vscale x 8 x i32> @llvm.masked.load.nxv8i32(<vscale x 8 x i32>*, i32, <vscale x 8 x i1>, <vscale x 8 x i32>)
+
+declare <vscale x 8 x i64> @llvm.masked.load.nxv8i64(<vscale x 8 x i64>*, i32, <vscale x 8 x i1>, <vscale x 8 x i64>)
diff --git a/llvm/test/CodeGen/AArch64/sve-split-store.ll b/llvm/test/CodeGen/AArch64/sve-split-store.ll
index 2fba0404ef348..a3a9b8b53ec70 100644
--- a/llvm/test/CodeGen/AArch64/sve-split-store.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-store.ll
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s
 
+; UNPREDICATED
+
 define void @store_promote_4i8(<vscale x 4 x i8> %data, <vscale x 4 x i8>* %a) {
 ; CHECK-LABEL: store_promote_4i8:
 ; CHECK:       // %bb.0:
@@ -51,3 +53,82 @@ define void @store_split_16i64(<vscale x 16 x i64> %data, <vscale x 16 x i64>* %
   store <vscale x 16 x i64> %data, <vscale x 16 x i64>* %a
   ret void
 }
+
+; MASKED
+
+define void @masked_store_promote_2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8> *%a, <vscale x 2 x i1> %pg) {
+; CHECK-LABEL: masked_store_promote_2i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.nxv2i8(<vscale x 2 x i8> %data, <vscale x 2 x i8> *%a, i32 1, <vscale x 2 x i1> %pg)
+  ret void
+}
+
+define void @masked_store_split_32i8(<vscale x 32 x i8> %data, <vscale x 32 x i8> *%a, <vscale x 32 x i1> %pg) {
+; CHECK-LABEL: masked_store_split_32i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    st1b { z1.b }, p1, [x0, #1, mul vl]
+; CHECK-NEXT:    st1b { z0.b }, p0, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.nxv32i8(<vscale x 32 x i8> %data, <vscale x 32 x i8> *%a, i32 1, <vscale x 32 x i1> %pg)
+  ret void
+}
+
+define void @masked_store_split_32i16(<vscale x 32 x i16> %data, <vscale x 32 x i16> *%a, <vscale x 32 x i1> %pg) {
+; CHECK-LABEL: masked_store_split_32i16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pfalse p2.b
+; CHECK-NEXT:    zip2 p3.b, p1.b, p2.b
+; CHECK-NEXT:    zip1 p1.b, p1.b, p2.b
+; CHECK-NEXT:    st1h { z3.h }, p3, [x0, #3, mul vl]
+; CHECK-NEXT:    zip2 p3.b, p0.b, p2.b
+; CHECK-NEXT:    zip1 p0.b, p0.b, p2.b
+; CHECK-NEXT:    st1h { z2.h }, p1, [x0, #2, mul vl]
+; CHECK-NEXT:    st1h { z1.h }, p3, [x0, #1, mul vl]
+; CHECK-NEXT:    st1h { z0.h }, p0, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.nxv32i16(<vscale x 32 x i16> %data, <vscale x 32 x i16> *%a, i32 1, <vscale x 32 x i1> %pg)
+  ret void
+}
+
+define void @masked_store_split_8i32(<vscale x 8 x i32> %data, <vscale x 8 x i32> *%a, <vscale x 8 x i1> %pg) {
+; CHECK-LABEL: masked_store_split_8i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pfalse p1.b
+; CHECK-NEXT:    zip2 p2.h, p0.h, p1.h
+; CHECK-NEXT:    zip1 p0.h, p0.h, p1.h
+; CHECK-NEXT:    st1w { z1.s }, p2, [x0, #1, mul vl]
+; CHECK-NEXT:    st1w { z0.s }, p0, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.nxv8i32(<vscale x 8 x i32> %data, <vscale x 8 x i32> *%a, i32 1, <vscale x 8 x i1> %pg)
+  ret void
+}
+
+define void @masked_store_split_8i64(<vscale x 8 x i64> %data, <vscale x 8 x i64> *%a, <vscale x 8 x i1> %pg) {
+; CHECK-LABEL: masked_store_split_8i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pfalse p1.b
+; CHECK-NEXT:    zip2 p2.h, p0.h, p1.h
+; CHECK-NEXT:    zip1 p0.h, p0.h, p1.h
+; CHECK-NEXT:    zip2 p3.s, p2.s, p1.s
+; CHECK-NEXT:    zip1 p2.s, p2.s, p1.s
+; CHECK-NEXT:    st1d { z2.d }, p2, [x0, #2, mul vl]
+; CHECK-NEXT:    zip2 p2.s, p0.s, p1.s
+; CHECK-NEXT:    zip1 p0.s, p0.s, p1.s
+; CHECK-NEXT:    st1d { z3.d }, p3, [x0, #3, mul vl]
+; CHECK-NEXT:    st1d { z1.d }, p2, [x0, #1, mul vl]
+; CHECK-NEXT:    st1d { z0.d }, p0, [x0]
+; CHECK-NEXT:    ret
+  call void @llvm.masked.store.nxv8i64(<vscale x 8 x i64> %data, <vscale x 8 x i64> *%a, i32 1, <vscale x 8 x i1> %pg)
+  ret void
+}
+
+declare void @llvm.masked.store.nxv2i8(<vscale x 2 x i8>, <vscale x 2 x i8>*, i32, <vscale x 2 x i1>)
+declare void @llvm.masked.store.nxv32i8(<vscale x 32 x i8>, <vscale x 32 x i8>*, i32, <vscale x 32 x i1>)
+
+declare void @llvm.masked.store.nxv32i16(<vscale x 32 x i16>, <vscale x 32 x i16>*, i32, <vscale x 32 x i1>)
+
+declare void @llvm.masked.store.nxv8i32(<vscale x 8 x i32>, <vscale x 8 x i32>*, i32, <vscale x 8 x i1>)
+
+declare void @llvm.masked.store.nxv8i64(<vscale x 8 x i64>, <vscale x 8 x i64>*, i32, <vscale x 8 x i1>)

From 146d35b6eeb5b360217d2f14a18c87b1a0aca77e Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Tue, 14 Jul 2020 10:04:55 +0100
Subject: [PATCH 462/771] [ARM] CSEL generation

This adds a peephole optimisation to turn a t2MOVccr that could not be
folded into any other instruction into a CSEL on 8.1-m. The t2MOVccr
would usually be expanded into a conditional mov, that becomes an IT;
MOV pair. We can instead generate a CSEL instruction, which can
potentially be smaller and allows better register allocation freedom,
which can help reduce codesize. Performance is more variable and may
depend on the micrarchitecture details, but initial results look good.
If we need to control this per-cpu, we can add a subtarget feature as we
need it.

Original patch by David Penry.

Differential Revision: https://reviews.llvm.org/D83566
---
 llvm/lib/Target/ARM/ARMInstrThumb2.td         |  1 +
 llvm/lib/Target/ARM/Thumb2InstrInfo.cpp       | 26 ++++++++
 llvm/lib/Target/ARM/Thumb2InstrInfo.h         |  4 ++
 llvm/test/CodeGen/Thumb2/csel.ll              |  4 +-
 llvm/test/CodeGen/Thumb2/float-ops.ll         |  6 +-
 llvm/test/CodeGen/Thumb2/mve-abs.ll           | 43 +++++++------
 llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll  | 53 +++++++---------
 .../CodeGen/Thumb2/mve-vecreduce-loops.ll     | 36 ++++-------
 llvm/test/CodeGen/Thumb2/mve-vmaxv.ll         | 60 +++++++------------
 9 files changed, 111 insertions(+), 122 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrThumb2.td b/llvm/lib/Target/ARM/ARMInstrThumb2.td
index 7137e8ee66b8f..d5143adaac179 100644
--- a/llvm/lib/Target/ARM/ARMInstrThumb2.td
+++ b/llvm/lib/Target/ARM/ARMInstrThumb2.td
@@ -5446,6 +5446,7 @@ class CS<string iname, bits<4> opcode, list<dag> pattern=[]>
   let Inst{3-0} = Rm{3-0};
 
   let Uses = [CPSR];
+  let hasSideEffects = 0;
 }
 
 def t2CSEL  : CS<"csel",  0b1000>;
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 48c6b47f21545..6ada546e5f48e 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -12,6 +12,7 @@
 
 #include "Thumb2InstrInfo.h"
 #include "ARMMachineFunctionInfo.h"
+#include "ARMSubtarget.h"
 #include "MCTargetDesc/ARMAddressingModes.h"
 #include "llvm/CodeGen/MachineBasicBlock.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
@@ -118,6 +119,31 @@ Thumb2InstrInfo::isLegalToSplitMBBAt(MachineBasicBlock &MBB,
   return getITInstrPredicate(*MBBI, PredReg) == ARMCC::AL;
 }
 
+MachineInstr *
+Thumb2InstrInfo::optimizeSelect(MachineInstr &MI,
+                                SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+                                bool PreferFalse) const {
+  // Try to use the base optimizeSelect, which uses canFoldIntoMOVCC to fold the
+  // MOVCC into another instruction. If that fails on 8.1-M fall back to using a
+  // CSEL.
+  MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse);
+  if (!RV && getSubtarget().hasV8_1MMainlineOps()) {
+    Register DestReg = MI.getOperand(0).getReg();
+
+    if (!DestReg.isVirtual())
+      return nullptr;
+
+    MachineInstrBuilder NewMI = BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
+                                        get(ARM::t2CSEL), DestReg)
+                                    .add(MI.getOperand(2))
+                                    .add(MI.getOperand(1))
+                                    .add(MI.getOperand(3));
+    SeenMIs.insert(NewMI);
+    return NewMI;
+  }
+  return RV;
+}
+
 void Thumb2InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                                   MachineBasicBlock::iterator I,
                                   const DebugLoc &DL, MCRegister DestReg,
diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
index ec37636322398..e31c49a38959f 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h
@@ -60,6 +60,10 @@ class Thumb2InstrInfo : public ARMBaseInstrInfo {
   ///
   const ThumbRegisterInfo &getRegisterInfo() const override { return RI; }
 
+  MachineInstr *optimizeSelect(MachineInstr &MI,
+                               SmallPtrSetImpl<MachineInstr *> &SeenMIs,
+                               bool) const override;
+
 private:
   void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override;
 };
diff --git a/llvm/test/CodeGen/Thumb2/csel.ll b/llvm/test/CodeGen/Thumb2/csel.ll
index f2cf3e839a805..5a56fb6f692da 100644
--- a/llvm/test/CodeGen/Thumb2/csel.ll
+++ b/llvm/test/CodeGen/Thumb2/csel.ll
@@ -107,9 +107,7 @@ define i32 @csel_var(i32 %a, i32 %b, i32 %c) {
 ; CHECK-LABEL: csel_var:
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    cmp r0, #45
-; CHECK-NEXT:    it le
-; CHECK-NEXT:    movle r1, r2
-; CHECK-NEXT:    mov r0, r1
+; CHECK-NEXT:    csel r0, r1, r2, gt
 ; CHECK-NEXT:    bx lr
 entry:
   %cmp = icmp sgt i32 %a, 45
diff --git a/llvm/test/CodeGen/Thumb2/float-ops.ll b/llvm/test/CodeGen/Thumb2/float-ops.ll
index fdd1b659d0075..709bd49f22860 100644
--- a/llvm/test/CodeGen/Thumb2/float-ops.ll
+++ b/llvm/test/CodeGen/Thumb2/float-ops.ll
@@ -278,8 +278,10 @@ define double @select_d(double %a, double %b, i1 %c) {
 ; CHECK-LABEL: select_d:
 ; NONE: ldr{{(.w)?}}     [[REG:r[0-9]+]], [sp]
 ; NONE: ands    [[REG]], [[REG]], #1
-; NONE-DAG: moveq   r0, r2
-; NONE-DAG: moveq   r1, r3
+; NOREGS-DAG: moveq   r0, r2
+; NOREGS-DAG: moveq   r1, r3
+; ONLYREGS-DAG: csel   r0, r0, r2
+; ONLYREGS-DAG: csel   r1, r1, r3
 ; SP: ands r0, r0, #1
 ; SP-DAG: vmov [[ALO:r[0-9]+]], [[AHI:r[0-9]+]], d0
 ; SP-DAG: vmov [[BLO:r[0-9]+]], [[BHI:r[0-9]+]], d1
diff --git a/llvm/test/CodeGen/Thumb2/mve-abs.ll b/llvm/test/CodeGen/Thumb2/mve-abs.ll
index 29878063a8ca6..0b5dcbced1a56 100644
--- a/llvm/test/CodeGen/Thumb2/mve-abs.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-abs.ll
@@ -42,33 +42,30 @@ define arm_aapcs_vfpcc <2 x i64> @abs_v2i64(<2 x i64> %s1) {
 ; CHECK:       @ %bb.0: @ %entry
 ; CHECK-NEXT:    .save {r7, lr}
 ; CHECK-NEXT:    push {r7, lr}
-; CHECK-NEXT:    vmov q1, q0
+; CHECK-NEXT:    vmov r1, s0
 ; CHECK-NEXT:    mov.w r12, #0
-; CHECK-NEXT:    vmov lr, s4
-; CHECK-NEXT:    vmov r0, s5
-; CHECK-NEXT:    rsbs.w r3, lr, #0
+; CHECK-NEXT:    vmov r0, s1
+; CHECK-NEXT:    rsbs.w lr, r1, #0
 ; CHECK-NEXT:    sbc.w r2, r12, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    cset r1, mi
-; CHECK-NEXT:    ands r1, r1, #1
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq r2, r0
-; CHECK-NEXT:    moveq r3, lr
-; CHECK-NEXT:    vmov lr, s6
-; CHECK-NEXT:    vmov.32 q0[0], r3
-; CHECK-NEXT:    vmov r0, s7
-; CHECK-NEXT:    vmov.32 q0[1], r2
-; CHECK-NEXT:    rsbs.w r2, lr, #0
-; CHECK-NEXT:    sbc.w r3, r12, r0
+; CHECK-NEXT:    cset r3, mi
+; CHECK-NEXT:    ands r3, r3, #1
+; CHECK-NEXT:    csel r1, lr, r1, ne
+; CHECK-NEXT:    csel r0, r2, r0, ne
+; CHECK-NEXT:    vmov.32 q1[0], r1
+; CHECK-NEXT:    vmov r1, s2
+; CHECK-NEXT:    vmov.32 q1[1], r0
+; CHECK-NEXT:    vmov r0, s3
+; CHECK-NEXT:    rsbs r2, r1, #0
+; CHECK-NEXT:    sbc.w r12, r12, r0
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    cset r1, mi
-; CHECK-NEXT:    ands r1, r1, #1
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq r2, lr
-; CHECK-NEXT:    vmov.32 q0[2], r2
-; CHECK-NEXT:    it eq
-; CHECK-NEXT:    moveq r3, r0
-; CHECK-NEXT:    vmov.32 q0[3], r3
+; CHECK-NEXT:    cset r3, mi
+; CHECK-NEXT:    ands r3, r3, #1
+; CHECK-NEXT:    csel r1, r2, r1, ne
+; CHECK-NEXT:    csel r0, r12, r0, ne
+; CHECK-NEXT:    vmov.32 q1[2], r1
+; CHECK-NEXT:    vmov.32 q1[3], r0
+; CHECK-NEXT:    vmov q0, q1
 ; CHECK-NEXT:    pop {r7, pc}
 entry:
   %0 = icmp slt <2 x i64> %s1, zeroinitializer
diff --git a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
index 291c13543d14d..9897b607d6b3a 100644
--- a/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-satmul-loops.ll
@@ -125,14 +125,12 @@ define arm_aapcs_vfpcc void @ssatmul_s_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    movlt r5, #1
 ; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq r3, r0
-; CHECK-NEXT:    moveq r4, r1
+; CHECK-NEXT:    csel r4, r4, r1, ne
+; CHECK-NEXT:    csel r3, r3, r0, ne
 ; CHECK-NEXT:    subs r5, r4, r2
 ; CHECK-NEXT:    sbcs r3, r3, #0
-; CHECK-NEXT:    it ge
-; CHECK-NEXT:    movge r4, r2
-; CHECK-NEXT:    str r4, [r11], #4
+; CHECK-NEXT:    csel r3, r4, r2, lt
+; CHECK-NEXT:    str r3, [r11], #4
 ; CHECK-NEXT:    le lr, .LBB0_7
 ; CHECK-NEXT:  .LBB0_8: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #8
@@ -406,22 +404,20 @@ define arm_aapcs_vfpcc void @ssatmul_4_q31(i32* nocapture readonly %pSrcA, i32*
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r2, [r12], #4
 ; CHECK-NEXT:    ldr r4, [r10], #4
-; CHECK-NEXT:    smull r4, r5, r4, r2
-; CHECK-NEXT:    asrl r4, r5, #31
-; CHECK-NEXT:    subs r2, r1, r4
-; CHECK-NEXT:    sbcs.w r2, r0, r5
-; CHECK-NEXT:    mov.w r2, #0
+; CHECK-NEXT:    smull r2, r5, r4, r2
+; CHECK-NEXT:    asrl r2, r5, #31
+; CHECK-NEXT:    subs r4, r1, r2
+; CHECK-NEXT:    sbcs.w r4, r0, r5
+; CHECK-NEXT:    mov.w r4, #0
 ; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    itt eq
-; CHECK-NEXT:    moveq r5, r0
-; CHECK-NEXT:    moveq r4, r1
-; CHECK-NEXT:    subs r2, r4, r3
-; CHECK-NEXT:    sbcs r2, r5, #0
-; CHECK-NEXT:    it ge
-; CHECK-NEXT:    movge r4, r3
-; CHECK-NEXT:    str r4, [r11], #4
+; CHECK-NEXT:    movlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    csel r2, r2, r1, ne
+; CHECK-NEXT:    csel r4, r5, r0, ne
+; CHECK-NEXT:    subs r5, r2, r3
+; CHECK-NEXT:    sbcs r4, r4, #0
+; CHECK-NEXT:    csel r2, r2, r3, lt
+; CHECK-NEXT:    str r2, [r11], #4
 ; CHECK-NEXT:    le lr, .LBB1_7
 ; CHECK-NEXT:  .LBB1_8: @ %for.cond.cleanup
 ; CHECK-NEXT:    add sp, #8
@@ -1158,9 +1154,8 @@ define arm_aapcs_vfpcc void @ssatmul_4_q15(i16* nocapture readonly %pSrcA, i16*
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    asrlt r3, r2, #15
 ; CHECK-NEXT:    cmp r3, r1
-; CHECK-NEXT:    it ge
-; CHECK-NEXT:    movge r3, r1
-; CHECK-NEXT:    strh r3, [r4], #2
+; CHECK-NEXT:    csel r2, r3, r1, lt
+; CHECK-NEXT:    strh r2, [r4], #2
 ; CHECK-NEXT:    le lr, .LBB5_7
 ; CHECK-NEXT:  .LBB5_8: @ %for.cond.cleanup
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
@@ -1300,9 +1295,8 @@ define arm_aapcs_vfpcc void @ssatmul_8_q15(i16* nocapture readonly %pSrcA, i16*
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    asrlt r3, r2, #15
 ; CHECK-NEXT:    cmp r3, r1
-; CHECK-NEXT:    it ge
-; CHECK-NEXT:    movge r3, r1
-; CHECK-NEXT:    strh r3, [r4], #2
+; CHECK-NEXT:    csel r2, r3, r1, lt
+; CHECK-NEXT:    strh r2, [r4], #2
 ; CHECK-NEXT:    le lr, .LBB6_7
 ; CHECK-NEXT:  .LBB6_8: @ %for.cond.cleanup
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
@@ -1439,9 +1433,8 @@ define arm_aapcs_vfpcc void @ssatmul_8i_q15(i16* nocapture readonly %pSrcA, i16*
 ; CHECK-NEXT:    it lt
 ; CHECK-NEXT:    asrlt r3, r2, #15
 ; CHECK-NEXT:    cmp r3, r1
-; CHECK-NEXT:    it ge
-; CHECK-NEXT:    movge r3, r1
-; CHECK-NEXT:    strh r3, [r4], #2
+; CHECK-NEXT:    csel r2, r3, r1, lt
+; CHECK-NEXT:    strh r2, [r4], #2
 ; CHECK-NEXT:    le lr, .LBB7_7
 ; CHECK-NEXT:  .LBB7_8: @ %for.cond.cleanup
 ; CHECK-NEXT:    pop {r4, r5, r6, pc}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
index 29e441e3e90cf..0d22a7f3cd99d 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vecreduce-loops.ll
@@ -732,8 +732,7 @@ define i32 @smin_i32(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r1, [r0], #4
 ; CHECK-NEXT:    cmp r2, r1
-; CHECK-NEXT:    it ge
-; CHECK-NEXT:    movge r2, r1
+; CHECK-NEXT:    csel r2, r2, r1, lt
 ; CHECK-NEXT:    le lr, .LBB7_8
 ; CHECK-NEXT:  .LBB7_9: @ %for.cond.cleanup
 ; CHECK-NEXT:    mov r0, r2
@@ -819,8 +818,7 @@ define i32 @smin_i32_inloop(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    mvn r4, #-2147483648
 ; CHECK-NEXT:    vminv.s32 r4, q0
 ; CHECK-NEXT:    cmp r0, r4
-; CHECK-NEXT:    it ge
-; CHECK-NEXT:    movge r0, r4
+; CHECK-NEXT:    csel r0, r0, r4, lt
 ; CHECK-NEXT:    le lr, .LBB8_5
 ; CHECK-NEXT:  @ %bb.6: @ %middle.block
 ; CHECK-NEXT:    cmp r3, r1
@@ -834,8 +832,7 @@ define i32 @smin_i32_inloop(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r2, [r1], #4
 ; CHECK-NEXT:    cmp r0, r2
-; CHECK-NEXT:    it ge
-; CHECK-NEXT:    movge r0, r2
+; CHECK-NEXT:    csel r0, r0, r2, lt
 ; CHECK-NEXT:    le lr, .LBB8_8
 ; CHECK-NEXT:  .LBB8_9: @ %for.cond.cleanup
 ; CHECK-NEXT:    pop {r4, pc}
@@ -933,8 +930,7 @@ define i32 @smax_i32(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r1, [r0], #4
 ; CHECK-NEXT:    cmp r2, r1
-; CHECK-NEXT:    it le
-; CHECK-NEXT:    movle r2, r1
+; CHECK-NEXT:    csel r2, r2, r1, gt
 ; CHECK-NEXT:    le lr, .LBB9_8
 ; CHECK-NEXT:  .LBB9_9: @ %for.cond.cleanup
 ; CHECK-NEXT:    mov r0, r2
@@ -1020,8 +1016,7 @@ define i32 @smax_i32_inloop(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    mov.w r4, #-2147483648
 ; CHECK-NEXT:    vmaxv.s32 r4, q0
 ; CHECK-NEXT:    cmp r0, r4
-; CHECK-NEXT:    it le
-; CHECK-NEXT:    movle r0, r4
+; CHECK-NEXT:    csel r0, r0, r4, gt
 ; CHECK-NEXT:    le lr, .LBB10_5
 ; CHECK-NEXT:  @ %bb.6: @ %middle.block
 ; CHECK-NEXT:    cmp r3, r1
@@ -1035,8 +1030,7 @@ define i32 @smax_i32_inloop(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r2, [r1], #4
 ; CHECK-NEXT:    cmp r0, r2
-; CHECK-NEXT:    it le
-; CHECK-NEXT:    movle r0, r2
+; CHECK-NEXT:    csel r0, r0, r2, gt
 ; CHECK-NEXT:    le lr, .LBB10_8
 ; CHECK-NEXT:  .LBB10_9: @ %for.cond.cleanup
 ; CHECK-NEXT:    pop {r4, pc}
@@ -1134,8 +1128,7 @@ define i32 @umin_i32(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r1, [r0], #4
 ; CHECK-NEXT:    cmp r2, r1
-; CHECK-NEXT:    it hs
-; CHECK-NEXT:    movhs r2, r1
+; CHECK-NEXT:    csel r2, r2, r1, lo
 ; CHECK-NEXT:    le lr, .LBB11_8
 ; CHECK-NEXT:  .LBB11_9: @ %for.cond.cleanup
 ; CHECK-NEXT:    mov r0, r2
@@ -1221,8 +1214,7 @@ define i32 @umin_i32_inloop(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    mov.w r4, #-1
 ; CHECK-NEXT:    vminv.u32 r4, q0
 ; CHECK-NEXT:    cmp r0, r4
-; CHECK-NEXT:    it hs
-; CHECK-NEXT:    movhs r0, r4
+; CHECK-NEXT:    csel r0, r0, r4, lo
 ; CHECK-NEXT:    le lr, .LBB12_5
 ; CHECK-NEXT:  @ %bb.6: @ %middle.block
 ; CHECK-NEXT:    cmp r3, r1
@@ -1236,8 +1228,7 @@ define i32 @umin_i32_inloop(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r2, [r1], #4
 ; CHECK-NEXT:    cmp r0, r2
-; CHECK-NEXT:    it ls
-; CHECK-NEXT:    movls r0, r2
+; CHECK-NEXT:    csel r0, r0, r2, hi
 ; CHECK-NEXT:    le lr, .LBB12_8
 ; CHECK-NEXT:  .LBB12_9: @ %for.cond.cleanup
 ; CHECK-NEXT:    pop {r4, pc}
@@ -1335,8 +1326,7 @@ define i32 @umax_i32(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r1, [r0], #4
 ; CHECK-NEXT:    cmp r2, r1
-; CHECK-NEXT:    it ls
-; CHECK-NEXT:    movls r2, r1
+; CHECK-NEXT:    csel r2, r2, r1, hi
 ; CHECK-NEXT:    le lr, .LBB13_8
 ; CHECK-NEXT:  .LBB13_9: @ %for.cond.cleanup
 ; CHECK-NEXT:    mov r0, r2
@@ -1418,8 +1408,7 @@ define i32 @umax_i32_inloop(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    movs r4, #0
 ; CHECK-NEXT:    vmaxv.u32 r4, q0
 ; CHECK-NEXT:    cmp r0, r4
-; CHECK-NEXT:    it ls
-; CHECK-NEXT:    movls r0, r4
+; CHECK-NEXT:    csel r0, r0, r4, hi
 ; CHECK-NEXT:    le lr, .LBB14_3
 ; CHECK-NEXT:  @ %bb.4: @ %middle.block
 ; CHECK-NEXT:    cmp r3, r1
@@ -1433,8 +1422,7 @@ define i32 @umax_i32_inloop(i32* nocapture readonly %x, i32 %n) {
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    ldr r2, [r1], #4
 ; CHECK-NEXT:    cmp r0, r2
-; CHECK-NEXT:    it ls
-; CHECK-NEXT:    movls r0, r2
+; CHECK-NEXT:    csel r0, r0, r2, hi
 ; CHECK-NEXT:    le lr, .LBB14_6
 ; CHECK-NEXT:  @ %bb.7: @ %for.cond.cleanup
 ; CHECK-NEXT:    pop {r4, pc}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll
index 36c201cced56c..eca5f44904a16 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vmaxv.ll
@@ -145,8 +145,7 @@ define arm_aapcs_vfpcc i8 @vmaxv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) {
 ; CHECK-NEXT:    vmaxv.s8 r1, q0
 ; CHECK-NEXT:    sxtb r2, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, gt
 ; CHECK-NEXT:    bx lr
   %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1)
   %c = icmp sgt i8 %r, %s2
@@ -161,8 +160,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) {
 ; CHECK-NEXT:    vmaxv.s8 r1, q0
 ; CHECK-NEXT:    sxtb r1, r1
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, gt
 ; CHECK-NEXT:    bx lr
   %r = call i8 @llvm.experimental.vector.reduce.smax.v16i8(<16 x i8> %s1)
   %rs = sext i8 %r to i32
@@ -180,8 +178,7 @@ define arm_aapcs_vfpcc i16 @vmaxv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) {
 ; CHECK-NEXT:    vmaxv.s16 r1, q0
 ; CHECK-NEXT:    sxth r2, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, gt
 ; CHECK-NEXT:    bx lr
   %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1)
   %c = icmp sgt i16 %r, %s2
@@ -197,8 +194,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) {
 ; CHECK-NEXT:    vmaxv.s16 r1, q0
 ; CHECK-NEXT:    sxth r1, r1
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, gt
 ; CHECK-NEXT:    bx lr
   %r = call i16 @llvm.experimental.vector.reduce.smax.v8i16(<8 x i16> %s1)
   %rs = sext i16 %r to i32
@@ -213,8 +209,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) {
 ; CHECK-NEXT:    mov.w r1, #-2147483648
 ; CHECK-NEXT:    vmaxv.s32 r1, q0
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it gt
-; CHECK-NEXT:    movgt r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, gt
 ; CHECK-NEXT:    bx lr
   %r = call i32 @llvm.experimental.vector.reduce.smax.v4i32(<4 x i32> %s1)
   %c = icmp sgt i32 %r, %s2
@@ -230,8 +225,7 @@ define arm_aapcs_vfpcc i8 @vmaxv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) {
 ; CHECK-NEXT:    vmaxv.u8 r1, q0
 ; CHECK-NEXT:    uxtb r2, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    it hi
-; CHECK-NEXT:    movhi r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, hi
 ; CHECK-NEXT:    bx lr
   %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1)
   %c = icmp ugt i8 %r, %s2
@@ -246,8 +240,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) {
 ; CHECK-NEXT:    vmaxv.u8 r1, q0
 ; CHECK-NEXT:    uxtb r1, r1
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it hi
-; CHECK-NEXT:    movhi r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, hi
 ; CHECK-NEXT:    bx lr
   %r = call i8 @llvm.experimental.vector.reduce.umax.v16i8(<16 x i8> %s1)
   %rs = zext i8 %r to i32
@@ -264,8 +257,7 @@ define arm_aapcs_vfpcc i16 @vmaxv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) {
 ; CHECK-NEXT:    vmaxv.u16 r1, q0
 ; CHECK-NEXT:    uxth r2, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    it hi
-; CHECK-NEXT:    movhi r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, hi
 ; CHECK-NEXT:    bx lr
   %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1)
   %c = icmp ugt i16 %r, %s2
@@ -280,8 +272,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) {
 ; CHECK-NEXT:    vmaxv.u16 r1, q0
 ; CHECK-NEXT:    uxth r1, r1
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it hi
-; CHECK-NEXT:    movhi r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, hi
 ; CHECK-NEXT:    bx lr
   %r = call i16 @llvm.experimental.vector.reduce.umax.v8i16(<8 x i16> %s1)
   %rs = zext i16 %r to i32
@@ -296,8 +287,7 @@ define arm_aapcs_vfpcc i32 @vmaxv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) {
 ; CHECK-NEXT:    movs r1, #0
 ; CHECK-NEXT:    vmaxv.u32 r1, q0
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it hi
-; CHECK-NEXT:    movhi r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, hi
 ; CHECK-NEXT:    bx lr
   %r = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> %s1)
   %c = icmp ugt i32 %r, %s2
@@ -313,8 +303,7 @@ define arm_aapcs_vfpcc i8 @vminv_s_v16i8_i8(<16 x i8> %s1, i8 %s2) {
 ; CHECK-NEXT:    vminv.s8 r1, q0
 ; CHECK-NEXT:    sxtb r2, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, lt
 ; CHECK-NEXT:    bx lr
   %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1)
   %c = icmp slt i8 %r, %s2
@@ -329,8 +318,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v16i8_i32(<16 x i8> %s1, i32 %s2) {
 ; CHECK-NEXT:    vminv.s8 r1, q0
 ; CHECK-NEXT:    sxtb r1, r1
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, lt
 ; CHECK-NEXT:    bx lr
   %r = call i8 @llvm.experimental.vector.reduce.smin.v16i8(<16 x i8> %s1)
   %rs = sext i8 %r to i32
@@ -347,8 +335,7 @@ define arm_aapcs_vfpcc i16 @vminv_s_v8i16_i16(<8 x i16> %s1, i16 %s2) {
 ; CHECK-NEXT:    vminv.s16 r1, q0
 ; CHECK-NEXT:    sxth r2, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, lt
 ; CHECK-NEXT:    bx lr
   %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1)
   %c = icmp slt i16 %r, %s2
@@ -363,8 +350,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v8i16_i32(<8 x i16> %s1, i32 %s2) {
 ; CHECK-NEXT:    vminv.s16 r1, q0
 ; CHECK-NEXT:    sxth r1, r1
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, lt
 ; CHECK-NEXT:    bx lr
   %r = call i16 @llvm.experimental.vector.reduce.smin.v8i16(<8 x i16> %s1)
   %rs = sext i16 %r to i32
@@ -379,8 +365,7 @@ define arm_aapcs_vfpcc i32 @vminv_s_v4i32_i32(<4 x i32> %s1, i32 %s2) {
 ; CHECK-NEXT:    mvn r1, #-2147483648
 ; CHECK-NEXT:    vminv.s32 r1, q0
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, lt
 ; CHECK-NEXT:    bx lr
   %r = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> %s1)
   %c = icmp slt i32 %r, %s2
@@ -396,8 +381,7 @@ define arm_aapcs_vfpcc i8 @vminv_u_v16i8_i8(<16 x i8> %s1, i8 %s2) {
 ; CHECK-NEXT:    vminv.u8 r1, q0
 ; CHECK-NEXT:    uxtb r2, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    it lo
-; CHECK-NEXT:    movlo r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, lo
 ; CHECK-NEXT:    bx lr
   %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1)
   %c = icmp ult i8 %r, %s2
@@ -412,8 +396,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v16i8_i32(<16 x i8> %s1, i32 %s2) {
 ; CHECK-NEXT:    vminv.u8 r1, q0
 ; CHECK-NEXT:    uxtb r1, r1
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it lo
-; CHECK-NEXT:    movlo r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, lo
 ; CHECK-NEXT:    bx lr
   %r = call i8 @llvm.experimental.vector.reduce.umin.v16i8(<16 x i8> %s1)
   %rs = zext i8 %r to i32
@@ -430,8 +413,7 @@ define arm_aapcs_vfpcc i16 @vminv_u_v8i16_i16(<8 x i16> %s1, i16 %s2) {
 ; CHECK-NEXT:    vminv.u16 r1, q0
 ; CHECK-NEXT:    uxth r2, r1
 ; CHECK-NEXT:    cmp r2, r3
-; CHECK-NEXT:    it lo
-; CHECK-NEXT:    movlo r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, lo
 ; CHECK-NEXT:    bx lr
   %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1)
   %c = icmp ult i16 %r, %s2
@@ -446,8 +428,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v8i16_i32(<8 x i16> %s1, i32 %s2) {
 ; CHECK-NEXT:    vminv.u16 r1, q0
 ; CHECK-NEXT:    uxth r1, r1
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it lo
-; CHECK-NEXT:    movlo r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, lo
 ; CHECK-NEXT:    bx lr
   %r = call i16 @llvm.experimental.vector.reduce.umin.v8i16(<8 x i16> %s1)
   %rs = zext i16 %r to i32
@@ -462,8 +443,7 @@ define arm_aapcs_vfpcc i32 @vminv_u_v4i32_i32(<4 x i32> %s1, i32 %s2) {
 ; CHECK-NEXT:    mov.w r1, #-1
 ; CHECK-NEXT:    vminv.u32 r1, q0
 ; CHECK-NEXT:    cmp r1, r0
-; CHECK-NEXT:    it lo
-; CHECK-NEXT:    movlo r0, r1
+; CHECK-NEXT:    csel r0, r1, r0, lo
 ; CHECK-NEXT:    bx lr
   %r = call i32 @llvm.experimental.vector.reduce.umin.v4i32(<4 x i32> %s1)
   %c = icmp ult i32 %r, %s2

From 15d058f16ec3a103587d589a8ccbbb7375feae7a Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 16 Jul 2020 11:05:02 +0100
Subject: [PATCH 463/771] Follow up of 2b3c505d0f6e: fixed a typo, and added
 some more formatting. NFC.

---
 llvm/docs/LangRef.rst | 97 ++++++++++++++++++++++---------------------
 1 file changed, 49 insertions(+), 48 deletions(-)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 9b58b9dfb1714..e42b6c1f7b0d5 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -15580,17 +15580,17 @@ This is an overloaded intrinsic.
 Overview:
 """""""""
 
-The '``llvm.matrix.transpose.*``' intrinsics treat %In as a <Rows> x <Cols> matrix
-and return the transposed matrix in the result vector.
+The '``llvm.matrix.transpose.*``' intrinsics treat ``%In`` as a ``<Rows> x
+<Cols>`` matrix and return the transposed matrix in the result vector.
 
 Arguments:
 """"""""""
 
-First argument %In is vector that corresponds to a <Rows> x <Cols> matrix.
-Thus, arguments <Rows> and <Cols> correspond to the number of rows and columns,
-respectively, and must be positive, constant integers. The returned vector must
-have <Rows> * <Cols> elements, and have the same float or integer element type
-as %In.
+The first argument ``%In`` is a vector that corresponds to a ``<Rows> x
+<Cols>`` matrix. Thus, arguments ``<Rows>`` and ``<Cols>`` correspond to the
+number of rows and columns, respectively, and must be positive, constant
+integers. The returned vector must have ``<Rows> * <Cols>`` elements, and have
+the same float or integer element type as ``%In``.
 
 '``llvm.matrix.multiply.*``' Intrinsic
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -15606,18 +15606,19 @@ This is an overloaded intrinsic.
 Overview:
 """""""""
 
-The '``llvm.matrix.multiply.*``' intrinsics treat %A as a <OuterRows> x <Inner>
-matrix, %B as a <Inner> x <OuterColumns> matrix, and multiplies them. The result
-matrix is returned in the result vector.
+The '``llvm.matrix.multiply.*``' intrinsics treat ``%A`` as a ``<OuterRows> x
+<Inner>`` matrix, ``%B`` as a ``<Inner> x <OuterColumns>`` matrix, and
+multiplies them. The result matrix is returned in the result vector.
 
 Arguments:
 """"""""""
 
-The first vector argument %A corresponds to a matrix with <OuterRows> * <Inner>
-elements, and the second argument %B to a matrix with <Inner> * <OuterColumns>
-elements. Arguments <OuterRows>, <Inner> and <OuterColumns> must be positive,
-constant integers. The returned vector must have <OuterRows> * <OuterColumns>
-elements. Vectors %A, %B, and the returned vector all have the same float or
+The first vector argument ``%A`` corresponds to a matrix with ``<OuterRows> *
+<Inner>`` elements, and the second argument ``%B`` to a matrix with
+``<Inner> * <OuterColumns>`` elements. Arguments ``<OuterRows>``,
+``<Inner>`` and ``<OuterColumns>`` must be positive, constant integers. The
+returned vector must have ``<OuterRows> * <OuterColumns>`` elements.
+Vectors ``%A``, ``%B``, and the returned vector all have the same float or
 integer element type.
 
 
@@ -15636,29 +15637,29 @@ This is an overloaded intrinsic.
 Overview:
 """""""""
 
-The '``llvm.matrix.column.major.load.*``' intrinsics load a <Rows> x <Cols>
-matrix using a stride of %Stride to compute the start address of the different
-columns.  This allows for convenient loading of sub matrixes. If <IsVolatile>
-is true, the intrinsic is considered a :ref:`volatile memory access
-<volatile>`. The result matrix is returned in the result vector. If the %Ptr
-argument is known to be aligned to some boundary, this can be specified as an
-attribute on the argument.
+The '``llvm.matrix.column.major.load.*``' intrinsics load a ``<Rows> x <Cols>``
+matrix using a stride of ``%Stride`` to compute the start address of the
+different columns.  This allows for convenient loading of sub matrixes. If
+``<IsVolatile>`` is true, the intrinsic is considered a :ref:`volatile memory
+access <volatile>`. The result matrix is returned in the result vector. If the
+``%Ptr`` argument is known to be aligned to some boundary, this can be
+specified as an attribute on the argument.
 
 Arguments:
 """"""""""
 
-The first argument %Ptr is a pointer type to the returned vector type, and
-correponds to the start address to load from. The second argument %Stride is a
-postive, constant integer with %Stride ``>=`` <Rows>. %Stride is used to compute
-the column memory addresses. I.e., for a column ``C``, its start memory
-addresses is calculated with %Ptr + ``C`` * %Stride. The third Argument
-<IsVolatile> is a boolean value.  The fourth and fifth arguments, <Rows> and
-<Cols>, correspond to the number of rows and columns, respectively, and must be
-positive, constant integers. The returned vector must have <Rows> * <Cols>
-elements.
+The first argument ``%Ptr`` is a pointer type to the returned vector type, and
+correponds to the start address to load from. The second argument ``%Stride``
+is a postive, constant integer with ``%Stride >= <Rows>``. ``%Stride`` is used
+to compute the column memory addresses. I.e., for a column ``C``, its start
+memory addresses is calculated with ``%Ptr + C * %Stride``. The third Argument
+``<IsVolatile>`` is a boolean value.  The fourth and fifth arguments,
+``<Rows>`` and ``<Cols>``, correspond to the number of rows and columns,
+respectively, and must be positive, constant integers. The returned vector must
+have ``<Rows> * <Cols>`` elements.
 
-The :ref:`align <attr_align>` parameter attribute can be provided
-for the %Ptr arguments.
+The :ref:`align <attr_align>` parameter attribute can be provided for the
+``%Ptr`` arguments.
 
 
 '``llvm.matrix.column.major.store.*``' Intrinsic
@@ -15675,29 +15676,29 @@ Syntax:
 Overview:
 """""""""
 
-The '``llvm.matrix.column.major.store.*``' intrinsics store the <Rows> x <Cols>
-matrix in %In to memory using a stride of %Stride between columns. If
-<IsVolatile> is true, the intrinsic is considered a :ref:`volatile memory
-access <volatile>`.
+The '``llvm.matrix.column.major.store.*``' intrinsics store the ``<Rows> x
+<Cols>`` matrix in ``%In`` to memory using a stride of ``%Stride`` between
+columns.  If ``<IsVolatile>`` is true, the intrinsic is considered a
+:ref:`volatile memory access <volatile>`.
 
-If the %Ptr argument is known to be aligned to some boundary, this can be
+If the ``%Ptr`` argument is known to be aligned to some boundary, this can be
 specified as an attribute on the argument.
 
 Arguments:
 """"""""""
 
-The first argument %In is a vector that corresponds to a <Rows> x <Cols> matrix
-to be stored to memory. The second argument %Ptr is a pointer to the vector
-type of %In, and is the start address of the matrix in memory. The third
-argument %Stride is a positive, constant integer with %Stride ``>=`` <Rows>.
-%Stride is used to compute the column memory addresses. I.e., for a column
-``C``, its start memory addresses is calculated with %Ptr + ``C`` * %Stride.
-The fourth argument <IsVolatile> is a boolean value. The arguments <Rows> and
-<Cols> correspond to the number of rows and columns, respectively, and must be
-positive, constant integers.
+The first argument ``%In`` is a vector that corresponds to a ``<Rows> x
+<Cols>`` matrix to be stored to memory. The second argument ``%Ptr`` is a
+pointer to the vector type of ``%In``, and is the start address of the matrix
+in memory. The third argument ``%Stride`` is a positive, constant integer with
+``%Stride >= <Rows>``.  ``%Stride`` is used to compute the column memory
+addresses. I.e., for a column ``C``, its start memory addresses is calculated
+with ``%Ptr + C * %Stride``.  The fourth argument ``<IsVolatile>`` is a boolean
+value. The arguments ``<Rows>`` and ``<Cols>`` correspond to the number of rows
+and columns, respectively, and must be positive, constant integers.
 
 The :ref:`align <attr_align>` parameter attribute can be provided
-for the %Ptr arguments.
+for the ``%Ptr`` arguments.
 
 
 Half Precision Floating-Point Intrinsics

From b9a6fb64281b6836e565ee39fb0d543bf184fd88 Mon Sep 17 00:00:00 2001
From: Pavel Iliin <Pavel.Iliin@arm.com>
Date: Tue, 14 Jul 2020 19:36:56 +0100
Subject: [PATCH 464/771] [ARM] VBIT/VBIF support added.

Vector bitwise selects are matched by pseudo VBSP instruction
and expanded to VBSL/VBIT/VBIF after register allocation
depend on operands registers to minimize extra copies.
---
 llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp  |  60 ++
 llvm/lib/Target/ARM/ARMISelLowering.cpp       |   4 +-
 llvm/lib/Target/ARM/ARMISelLowering.h         |   4 +-
 llvm/lib/Target/ARM/ARMInstrNEON.td           |  91 +--
 llvm/lib/Target/ARM/ARMScheduleA57.td         |   2 +-
 llvm/lib/Target/ARM/ARMScheduleR52.td         |   4 +-
 llvm/lib/Target/ARM/ARMScheduleSwift.td       |   4 +-
 llvm/test/CodeGen/ARM/fcopysign.ll            |  28 +-
 llvm/test/CodeGen/ARM/fp16-promote.ll         |  14 +-
 llvm/test/CodeGen/ARM/vbsl-constant.ll        |  24 +-
 llvm/test/CodeGen/ARM/vbsl.ll                 | 140 ++---
 llvm/test/CodeGen/ARM/vselect_imax.ll         | 571 +++++++++---------
 .../CodeGen/Thumb2/float-intrinsics-double.ll |   3 +-
 .../CodeGen/Thumb2/float-intrinsics-float.ll  |  16 +-
 llvm/test/MC/ARM/neon-bitwise-encoding.s      |   9 +-
 llvm/test/MC/ARM/neont2-bitwise-encoding.s    |   8 +
 llvm/test/MC/Disassembler/ARM/neon-tests.txt  |   3 +
 llvm/test/MC/Disassembler/ARM/neon.txt        |   9 +
 llvm/test/MC/Disassembler/ARM/neont2.txt      |  10 +
 19 files changed, 553 insertions(+), 451 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index 48622aae3cb41..ec72c2b5ac194 100644
--- a/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -1860,6 +1860,66 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
     default:
       return false;
 
+    case ARM::VBSPd:
+    case ARM::VBSPq: {
+      Register DstReg = MI.getOperand(0).getReg();
+      if (DstReg == MI.getOperand(3).getReg()) {
+        // Expand to VBIT
+        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+            .add(MI.getOperand(0))
+            .add(MI.getOperand(3))
+            .add(MI.getOperand(2))
+            .add(MI.getOperand(1))
+            .addImm(MI.getOperand(4).getImm())
+            .add(MI.getOperand(5));
+      } else if (DstReg == MI.getOperand(2).getReg()) {
+        // Expand to VBIF
+        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
+        BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+            .add(MI.getOperand(0))
+            .add(MI.getOperand(2))
+            .add(MI.getOperand(3))
+            .add(MI.getOperand(1))
+            .addImm(MI.getOperand(4).getImm())
+            .add(MI.getOperand(5));
+      } else {
+        // Expand to VBSL
+        unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
+        if (DstReg == MI.getOperand(1).getReg()) {
+          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+              .add(MI.getOperand(0))
+              .add(MI.getOperand(1))
+              .add(MI.getOperand(2))
+              .add(MI.getOperand(3))
+              .addImm(MI.getOperand(4).getImm())
+              .add(MI.getOperand(5));
+        } else {
+          // Use move to satisfy constraints
+          unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
+          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
+              .addReg(DstReg,
+                      RegState::Define |
+                          getRenamableRegState(MI.getOperand(0).isRenamable()))
+              .add(MI.getOperand(1))
+              .add(MI.getOperand(1))
+              .addImm(MI.getOperand(4).getImm())
+              .add(MI.getOperand(5));
+          BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
+              .add(MI.getOperand(0))
+              .addReg(DstReg,
+                      RegState::Kill |
+                          getRenamableRegState(MI.getOperand(0).isRenamable()))
+              .add(MI.getOperand(2))
+              .add(MI.getOperand(3))
+              .addImm(MI.getOperand(4).getImm())
+              .add(MI.getOperand(5));
+        }
+      }
+      MI.eraseFromParent();
+      return true;
+    }
+
     case ARM::TCRETURNdi:
     case ARM::TCRETURNri: {
       MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.cpp b/llvm/lib/Target/ARM/ARMISelLowering.cpp
index 287e2e60e572c..7c2798b0a4ba2 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMISelLowering.cpp
@@ -1755,7 +1755,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
   case ARMISD::BFI:           return "ARMISD::BFI";
   case ARMISD::VORRIMM:       return "ARMISD::VORRIMM";
   case ARMISD::VBICIMM:       return "ARMISD::VBICIMM";
-  case ARMISD::VBSL:          return "ARMISD::VBSL";
+  case ARMISD::VBSP:          return "ARMISD::VBSP";
   case ARMISD::MEMCPY:        return "ARMISD::MEMCPY";
   case ARMISD::VLD1DUP:       return "ARMISD::VLD1DUP";
   case ARMISD::VLD2DUP:       return "ARMISD::VLD2DUP";
@@ -13153,7 +13153,7 @@ static SDValue PerformORCombine(SDNode *N,
                 // Canonicalize the vector type to make instruction selection
                 // simpler.
                 EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
-                SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
+                SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
                                              N0->getOperand(1),
                                              N0->getOperand(0),
                                              N1->getOperand(0));
diff --git a/llvm/lib/Target/ARM/ARMISelLowering.h b/llvm/lib/Target/ARM/ARMISelLowering.h
index 8b1f4183032eb..f4d77d4ff70f5 100644
--- a/llvm/lib/Target/ARM/ARMISelLowering.h
+++ b/llvm/lib/Target/ARM/ARMISelLowering.h
@@ -271,8 +271,8 @@ class VectorType;
       // Vector AND with NOT of immediate
       VBICIMM,
 
-      // Vector bitwise select
-      VBSL,
+      // Pseudo vector bitwise select
+      VBSP,
 
       // Pseudo-instruction representing a memory copy using ldm/stm
       // instructions.
diff --git a/llvm/lib/Target/ARM/ARMInstrNEON.td b/llvm/lib/Target/ARM/ARMInstrNEON.td
index 1b3f6075c0e9d..c097a4ad4facf 100644
--- a/llvm/lib/Target/ARM/ARMInstrNEON.td
+++ b/llvm/lib/Target/ARM/ARMInstrNEON.td
@@ -509,7 +509,7 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
 def NEONvsliImm      : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
 def NEONvsriImm      : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
 
-def NEONvbsl      : SDNode<"ARMISD::VBSL",
+def NEONvbsp      : SDNode<"ARMISD::VBSP",
                            SDTypeProfile<1, 3, [SDTCisVec<0>,
                                                 SDTCisSameAs<0, 1>,
                                                 SDTCisSameAs<0, 2>,
@@ -4526,9 +4526,9 @@ let Predicates = [HasNEON, HasV8_1a] in {
                                     (SubReg_i16_lane imm:$lane)))>;
   def : Pat<(v4i32 (saddsat
                      (v4i32 QPR:$src1),
-                     (v4i32 (int_arm_neon_vqrdmulh 
+                     (v4i32 (int_arm_neon_vqrdmulh
                               (v4i32 QPR:$src2),
-                              (v4i32 (ARMvduplane (v4i32 QPR:$src3), 
+                              (v4i32 (ARMvduplane (v4i32 QPR:$src3),
                                                    imm:$lane)))))),
             (v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
                                     (v4i32 QPR:$src2),
@@ -4579,17 +4579,17 @@ let Predicates = [HasNEON, HasV8_1a] in {
                               (v2i32 DPR:$Vn),
                               (v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
                                                    imm:$lane)))))),
-            (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, 
+            (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
                                     imm:$lane))>;
   def : Pat<(v8i16 (ssubsat
                      (v8i16 QPR:$src1),
                      (v8i16 (int_arm_neon_vqrdmulh
                               (v8i16 QPR:$src2),
-                              (v8i16 (ARMvduplane (v8i16 QPR:$src3), 
+                              (v8i16 (ARMvduplane (v8i16 QPR:$src3),
                                                    imm:$lane)))))),
             (v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
                                     (v8i16 QPR:$src2),
-                                    (v4i16 (EXTRACT_SUBREG 
+                                    (v4i16 (EXTRACT_SUBREG
                                              QPR:$src3,
                                              (DSubReg_i16_reg imm:$lane))),
                                     (SubReg_i16_lane imm:$lane)))>;
@@ -4601,7 +4601,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
                                                     imm:$lane)))))),
             (v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
                                     (v4i32 QPR:$src2),
-                                    (v2i32 (EXTRACT_SUBREG 
+                                    (v2i32 (EXTRACT_SUBREG
                                              QPR:$src3,
                                              (DSubReg_i32_reg imm:$lane))),
                                     (SubReg_i32_lane imm:$lane)))>;
@@ -5442,74 +5442,86 @@ def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
 def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
 }
 
-//   VBSL     : Vector Bitwise Select
-def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
-                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
-                     N3RegFrm, IIC_VCNTiD,
-                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [(set DPR:$Vd,
-                           (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
+// The TwoAddress pass will not go looking for equivalent operations
+// with different register constraints; it just inserts copies.
+// That is why pseudo VBSP implemented. Is is expanded later into
+// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
+def  VBSPd
+  : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+                IIC_VBINiD, "",
+                [(set DPR:$Vd,
+                      (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
 let Predicates = [HasNEON] in {
 def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
                                    (v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
-          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
 def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
                                     (v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
-          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
 def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
                                     (v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
-          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
 def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
                                     (v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
-          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
 def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
                                     (v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
-          (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
 
 def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
                      (and DPR:$Vm, (vnotd DPR:$Vd)))),
-          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
 
 def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
                      (and DPR:$Vm, (vnotd DPR:$Vd)))),
-          (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
+          (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
 }
 
-def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
-                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
-                     N3RegFrm, IIC_VCNTiQ,
-                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
-                     [(set QPR:$Vd,
-                           (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
-
+def  VBSPq
+  : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+                IIC_VBINiQ, "",
+                [(set QPR:$Vd,
+                      (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
 let Predicates = [HasNEON] in {
 def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
                                    (v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
-          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
 def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
                                     (v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
-          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
 def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
                                     (v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
-          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
 def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
                                     (v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
-          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
 def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
                                     (v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
-          (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
 
 def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
                      (and QPR:$Vm, (vnotq QPR:$Vd)))),
-          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
 def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
                      (and QPR:$Vm, (vnotq QPR:$Vd)))),
-          (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
+          (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
 }
 
+//   VBSL     : Vector Bitwise Select
+def  VBSLd    : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
+                     (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
+                     N3RegFrm, IIC_VBINiD,
+                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     []>;
+
+def  VBSLq    : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
+                     (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
+                     N3RegFrm, IIC_VBINiQ,
+                     "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
+                     []>;
+
 //   VBIF     : Vector Bitwise Insert if False
 //              like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
-// FIXME: This instruction's encoding MAY NOT BE correct.
 def  VBIFd    : N3VX<1, 0, 0b11, 0b0001, 0, 1,
                      (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
                      N3RegFrm, IIC_VBINiD,
@@ -5523,7 +5535,6 @@ def  VBIFq    : N3VX<1, 0, 0b11, 0b0001, 1, 1,
 
 //   VBIT     : Vector Bitwise Insert if True
 //              like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
-// FIXME: This instruction's encoding MAY NOT BE correct.
 def  VBITd    : N3VX<1, 0, 0b10, 0b0001, 0, 1,
                      (outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
                      N3RegFrm, IIC_VBINiD,
@@ -5535,10 +5546,6 @@ def  VBITq    : N3VX<1, 0, 0b10, 0b0001, 1, 1,
                      "vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
                      []>;
 
-// VBIT/VBIF are not yet implemented.  The TwoAddress pass will not go looking
-// for equivalent operations with different register constraints; it just
-// inserts copies.
-
 // Vector Absolute Differences.
 
 //   VABD     : Vector Absolute Difference
@@ -7953,7 +7960,7 @@ let Predicates = [HasNEON,IsLE] in {
            (VLD1LNd16 addrmode6:$addr,
                       (f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
 }
-// The following patterns are basically a copy of the patterns above, 
+// The following patterns are basically a copy of the patterns above,
 // however with an additional VREV16d instruction to convert data
 // loaded by VLD1LN into proper vector format in big endian mode.
 let Predicates = [HasNEON,IsBE] in {
diff --git a/llvm/lib/Target/ARM/ARMScheduleA57.td b/llvm/lib/Target/ARM/ARMScheduleA57.td
index d9a8d304c41fd..a52a2db3a0d63 100644
--- a/llvm/lib/Target/ARM/ARMScheduleA57.td
+++ b/llvm/lib/Target/ARM/ARMScheduleA57.td
@@ -1201,7 +1201,7 @@ def : InstRW<[A57Write_5cyc_1V], (instregex
 // --- 3.16 ASIMD Miscellaneous Instructions ---
 
 // ASIMD bitwise insert
-def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>;
+def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>;
 
 // ASIMD count
 def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;
diff --git a/llvm/lib/Target/ARM/ARMScheduleR52.td b/llvm/lib/Target/ARM/ARMScheduleR52.td
index d1cbf754b5a1b..466acec6f76ae 100644
--- a/llvm/lib/Target/ARM/ARMScheduleR52.td
+++ b/llvm/lib/Target/ARM/ARMScheduleR52.td
@@ -787,8 +787,8 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC
 def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
 def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
 
-def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
-def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
+def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)d")>;
+def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)q")>;
 
 def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
       (instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;
diff --git a/llvm/lib/Target/ARM/ARMScheduleSwift.td b/llvm/lib/Target/ARM/ARMScheduleSwift.td
index e0e98bfa0e9b2..d66b3065c7b74 100644
--- a/llvm/lib/Target/ARM/ARMScheduleSwift.td
+++ b/llvm/lib/Target/ARM/ARMScheduleSwift.td
@@ -558,8 +558,8 @@ let SchedModel = SwiftModel in {
         (instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
                    "VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
                    "VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
-                   "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF",
-                   "VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
+                   "VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF", "VBIT",
+                   "VBSL", "VBSP", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
 
   def : InstRW<[SwiftWriteP1TwoCycle],
         (instregex "VEXT", "VREV16", "VREV32", "VREV64")>;
diff --git a/llvm/test/CodeGen/ARM/fcopysign.ll b/llvm/test/CodeGen/ARM/fcopysign.ll
index 05dbb65a6deba..930ef1f2d20b4 100644
--- a/llvm/test/CodeGen/ARM/fcopysign.ll
+++ b/llvm/test/CodeGen/ARM/fcopysign.ll
@@ -12,11 +12,11 @@ define float @test1(float %x, float %y) nounwind {
 ;
 ; HARD-LABEL: test1:
 ; HARD:       @ %bb.0: @ %entry
-; HARD-NEXT:    vmov.f32 s4, s1
+; HARD-NEXT:    vmov.f32 s2, s1
 ; HARD-NEXT:    @ kill: def $s0 killed $s0 def $d0
-; HARD-NEXT:    vmov.i32 d1, #0x80000000
-; HARD-NEXT:    vbsl d1, d2, d0
-; HARD-NEXT:    vmov.f32 s0, s2
+; HARD-NEXT:    vmov.i32 d16, #0x80000000
+; HARD-NEXT:    vbit d0, d1, d16
+; HARD-NEXT:    @ kill: def $s0 killed $s0 killed $d0
 ; HARD-NEXT:    bx lr
 entry:
 
@@ -35,8 +35,7 @@ define double @test2(double %x, double %y) nounwind {
 ; HARD:       @ %bb.0: @ %entry
 ; HARD-NEXT:    vmov.i32 d16, #0x80000000
 ; HARD-NEXT:    vshl.i64 d16, d16, #32
-; HARD-NEXT:    vbsl d16, d1, d0
-; HARD-NEXT:    vorr d0, d16, d16
+; HARD-NEXT:    vbit d0, d1, d16
 ; HARD-NEXT:    bx lr
 entry:
 
@@ -53,15 +52,16 @@ define double @test3(double %x, double %y, double %z) nounwind {
 ; SOFT-NEXT:    vmov.i32 d17, #0x80000000
 ; SOFT-NEXT:    vshl.i64 d17, d17, #32
 ; SOFT-NEXT:    vldr d18, [sp]
-; SOFT-NEXT:    vbsl d17, d18, d16
-; SOFT-NEXT:    vmov r0, r1, d17
+; SOFT-NEXT:    vbit d16, d18, d17
+; SOFT-NEXT:    vmov r0, r1, d16
 ; SOFT-NEXT:    bx lr
 ;
 ; HARD-LABEL: test3:
 ; HARD:       @ %bb.0: @ %entry
 ; HARD-NEXT:    vmul.f64 d16, d0, d1
 ; HARD-NEXT:    vmov.i32 d17, #0x80000000
-; HARD-NEXT:    vshl.i64 d0, d17, #32
+; HARD-NEXT:    vshl.i64 d17, d17, #32
+; HARD-NEXT:    vorr d0, d17, d17
 ; HARD-NEXT:    vbsl d0, d2, d16
 ; HARD-NEXT:    bx lr
 entry:
@@ -81,8 +81,8 @@ define float @test4() nounwind {
 ; SOFT-NEXT:    vmov.i32 d17, #0x80000000
 ; SOFT-NEXT:    vshr.u64 d16, d16, #32
 ; SOFT-NEXT:    vmov.f32 d18, #5.000000e-01
-; SOFT-NEXT:    vbsl d17, d16, d18
-; SOFT-NEXT:    vadd.f32 d0, d0, d17
+; SOFT-NEXT:    vbif d16, d18, d17
+; SOFT-NEXT:    vadd.f32 d0, d0, d16
 ; SOFT-NEXT:    vmov r0, s0
 ; SOFT-NEXT:    pop {lr}
 ;
@@ -93,10 +93,10 @@ define float @test4() nounwind {
 ; HARD-NEXT:    bl bar
 ; HARD-NEXT:    vmov d16, r0, r1
 ; HARD-NEXT:    vcvt.f32.f64 s0, d16
-; HARD-NEXT:    vmov.i32 d1, #0x80000000
+; HARD-NEXT:    vmov.i32 d17, #0x80000000
 ; HARD-NEXT:    vshr.u64 d16, d16, #32
-; HARD-NEXT:    vmov.f32 s4, #5.000000e-01
-; HARD-NEXT:    vbsl d1, d16, d2
+; HARD-NEXT:    vmov.f32 s2, #5.000000e-01
+; HARD-NEXT:    vbit d1, d16, d17
 ; HARD-NEXT:    vadd.f32 s0, s0, s2
 ; HARD-NEXT:    pop {r11, pc}
 entry:
diff --git a/llvm/test/CodeGen/ARM/fp16-promote.ll b/llvm/test/CodeGen/ARM/fp16-promote.ll
index 11670d7b57ad8..65b8217ecfe57 100644
--- a/llvm/test/CodeGen/ARM/fp16-promote.ll
+++ b/llvm/test/CodeGen/ARM/fp16-promote.ll
@@ -701,13 +701,13 @@ define void @test_maximum(half* %p) #0 {
 
 ; CHECK-FP16-LABEL: test_copysign:
 ; CHECK-FP16:         ldrh r2, [r0]
-; CHECK-FP16-NEXT:    vmov.i32 d0, #0x80000000
+; CHECK-FP16-NEXT:    vmov.i32 d16, #0x80000000
 ; CHECK-FP16-NEXT:    ldrh r1, [r1]
-; CHECK-FP16-NEXT:    vmov s2, r2
-; CHECK-FP16-NEXT:    vmov s4, r1
+; CHECK-FP16-NEXT:    vmov s0, r2
+; CHECK-FP16-NEXT:    vmov s2, r1
+; CHECK-FP16-NEXT:    vcvtb.f32.f16 s0, s0
 ; CHECK-FP16-NEXT:    vcvtb.f32.f16 s2, s2
-; CHECK-FP16-NEXT:    vcvtb.f32.f16 s4, s4
-; CHECK-FP16-NEXT:    vbsl d0, d2, d1
+; CHECK-FP16-NEXT:    vbit d0, d1, d16
 ; CHECK-FP16-NEXT:    vcvtb.f16.f32 s0, s0
 ; CHECK-FP16-NEXT:    vmov r1, s0
 ; CHECK-FP16-NEXT:    strh r1, [r0]
@@ -729,8 +729,8 @@ define void @test_maximum(half* %p) #0 {
 ; CHECK-LIBCALL-VFP-NEXT:    mov r0, r1
 ; CHECK-LIBCALL: bl __aeabi_h2f
 ; CHECK-LIBCALL-VFP:         vmov s0, r0
-; CHECK-LIBCALL-VFP-NEXT:    vbsl d8, d0, d9
-; CHECK-LIBCALL-VFP-NEXT:    vmov r0, s16
+; CHECK-LIBCALL-VFP-NEXT:    vbif d0, d9, d8
+; CHECK-LIBCALL-VFP-NEXT:    vmov r0, s0
 ; CHECK-LIBCALL: bl __aeabi_f2h
 ; CHECK-LIBCALL-VFP:         strh r0, [r5]
 ; CHECK-LIBCALL-VFP-NEXT:    vpop {d8, d9}
diff --git a/llvm/test/CodeGen/ARM/vbsl-constant.ll b/llvm/test/CodeGen/ARM/vbsl-constant.ll
index 83b34a133dd10..392bea1f19335 100644
--- a/llvm/test/CodeGen/ARM/vbsl-constant.ll
+++ b/llvm/test/CodeGen/ARM/vbsl-constant.ll
@@ -79,9 +79,9 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 ; CHECK-NEXT:    vld1.32 {d16, d17}, [r2]
 ; CHECK-NEXT:    vmov.i8 q9, #0x3
 ; CHECK-NEXT:    vld1.32 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q9, q10, q8
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vbit q8, q10, q9
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
@@ -98,9 +98,9 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 ; CHECK-NEXT:    vld1.32 {d16, d17}, [r2]
 ; CHECK-NEXT:    vmov.i16 q9, #0x3
 ; CHECK-NEXT:    vld1.32 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q9, q10, q8
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vbit q8, q10, q9
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
@@ -117,9 +117,9 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 ; CHECK-NEXT:    vld1.32 {d16, d17}, [r2]
 ; CHECK-NEXT:    vmov.i32 q9, #0x3
 ; CHECK-NEXT:    vld1.32 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q9, q10, q8
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vbit q8, q10, q9
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
@@ -137,9 +137,9 @@ define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwin
 ; CHECK-NEXT:    vld1.32 {d18, d19}, [r0]
 ; CHECK-NEXT:    adr r0, LCPI7_0
 ; CHECK-NEXT:    vld1.64 {d20, d21}, [r0:128]
-; CHECK-NEXT:    vbsl q10, q9, q8
-; CHECK-NEXT:    vmov r0, r1, d20
-; CHECK-NEXT:    vmov r2, r3, d21
+; CHECK-NEXT:    vbit q8, q9, q10
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = load <2 x i64>, <2 x i64>* %B
diff --git a/llvm/test/CodeGen/ARM/vbsl.ll b/llvm/test/CodeGen/ARM/vbsl.ll
index 01e1ffb2e983a..b43c709c99848 100644
--- a/llvm/test/CodeGen/ARM/vbsl.ll
+++ b/llvm/test/CodeGen/ARM/vbsl.ll
@@ -6,11 +6,11 @@
 define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 ; CHECK-LABEL: v_bsli8:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d18, [r0]
 ; CHECK-NEXT:    vldr d16, [r2]
 ; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d18, [r0]
-; CHECK-NEXT:    vbsl d18, d17, d16
-; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vbit d16, d17, d18
+; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i8>, <8 x i8>* %A
 	%tmp2 = load <8 x i8>, <8 x i8>* %B
@@ -25,11 +25,11 @@ define <8 x i8> @v_bsli8(<8 x i8>* %A, <8 x i8>* %B, <8 x i8>* %C) nounwind {
 define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind {
 ; CHECK-LABEL: v_bsli16:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d18, [r0]
 ; CHECK-NEXT:    vldr d16, [r2]
 ; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d18, [r0]
-; CHECK-NEXT:    vbsl d18, d17, d16
-; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vbit d16, d17, d18
+; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i16>, <4 x i16>* %A
 	%tmp2 = load <4 x i16>, <4 x i16>* %B
@@ -44,11 +44,11 @@ define <4 x i16> @v_bsli16(<4 x i16>* %A, <4 x i16>* %B, <4 x i16>* %C) nounwind
 define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind {
 ; CHECK-LABEL: v_bsli32:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d18, [r0]
 ; CHECK-NEXT:    vldr d16, [r2]
 ; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d18, [r0]
-; CHECK-NEXT:    vbsl d18, d17, d16
-; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vbit d16, d17, d18
+; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <2 x i32>, <2 x i32>* %A
 	%tmp2 = load <2 x i32>, <2 x i32>* %B
@@ -63,11 +63,11 @@ define <2 x i32> @v_bsli32(<2 x i32>* %A, <2 x i32>* %B, <2 x i32>* %C) nounwind
 define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind {
 ; CHECK-LABEL: v_bsli64:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vldr d18, [r0]
 ; CHECK-NEXT:    vldr d16, [r2]
 ; CHECK-NEXT:    vldr d17, [r1]
-; CHECK-NEXT:    vldr d18, [r0]
-; CHECK-NEXT:    vbsl d18, d17, d16
-; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vbit d16, d17, d18
+; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <1 x i64>, <1 x i64>* %A
 	%tmp2 = load <1 x i64>, <1 x i64>* %B
@@ -82,12 +82,12 @@ define <1 x i64> @v_bsli64(<1 x i64>* %A, <1 x i64>* %B, <1 x i64>* %C) nounwind
 define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind {
 ; CHECK-LABEL: v_bslQi8:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q10, q9, q8
-; CHECK-NEXT:    vmov r0, r1, d20
-; CHECK-NEXT:    vmov r2, r3, d21
+; CHECK-NEXT:    vbit q8, q9, q10
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <16 x i8>, <16 x i8>* %A
 	%tmp2 = load <16 x i8>, <16 x i8>* %B
@@ -102,12 +102,12 @@ define <16 x i8> @v_bslQi8(<16 x i8>* %A, <16 x i8>* %B, <16 x i8>* %C) nounwind
 define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwind {
 ; CHECK-LABEL: v_bslQi16:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q10, q9, q8
-; CHECK-NEXT:    vmov r0, r1, d20
-; CHECK-NEXT:    vmov r2, r3, d21
+; CHECK-NEXT:    vbit q8, q9, q10
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <8 x i16>, <8 x i16>* %A
 	%tmp2 = load <8 x i16>, <8 x i16>* %B
@@ -122,12 +122,12 @@ define <8 x i16> @v_bslQi16(<8 x i16>* %A, <8 x i16>* %B, <8 x i16>* %C) nounwin
 define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwind {
 ; CHECK-LABEL: v_bslQi32:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q10, q9, q8
-; CHECK-NEXT:    vmov r0, r1, d20
-; CHECK-NEXT:    vmov r2, r3, d21
+; CHECK-NEXT:    vbit q8, q9, q10
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <4 x i32>, <4 x i32>* %A
 	%tmp2 = load <4 x i32>, <4 x i32>* %B
@@ -142,12 +142,12 @@ define <4 x i32> @v_bslQi32(<4 x i32>* %A, <4 x i32>* %B, <4 x i32>* %C) nounwin
 define <2 x i64> @v_bslQi64(<2 x i64>* %A, <2 x i64>* %B, <2 x i64>* %C) nounwind {
 ; CHECK-LABEL: v_bslQi64:
 ; CHECK:       @ %bb.0:
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
 ; CHECK-NEXT:    vld1.64 {d16, d17}, [r2]
 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r1]
-; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q10, q9, q8
-; CHECK-NEXT:    vmov r0, r1, d20
-; CHECK-NEXT:    vmov r2, r3, d21
+; CHECK-NEXT:    vbit q8, q9, q10
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
 	%tmp1 = load <2 x i64>, <2 x i64>* %A
 	%tmp2 = load <2 x i64>, <2 x i64>* %B
@@ -165,8 +165,8 @@ define <8 x i8> @f1(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind readnone opt
 ; CHECK-NEXT:    vldr d16, [sp]
 ; CHECK-NEXT:    vmov d17, r2, r3
 ; CHECK-NEXT:    vmov d18, r0, r1
-; CHECK-NEXT:    vbsl d18, d17, d16
-; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vbit d16, d17, d18
+; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
   %vbsl.i = tail call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> %a, <8 x i8> %b, <8 x i8> %c) nounwind
   ret <8 x i8> %vbsl.i
@@ -178,8 +178,8 @@ define <4 x i16> @f2(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind readnone
 ; CHECK-NEXT:    vldr d16, [sp]
 ; CHECK-NEXT:    vmov d17, r2, r3
 ; CHECK-NEXT:    vmov d18, r0, r1
-; CHECK-NEXT:    vbsl d18, d17, d16
-; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vbit d16, d17, d18
+; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <4 x i16> @llvm.arm.neon.vbsl.v4i16(<4 x i16> %a, <4 x i16> %b, <4 x i16> %c) nounwind
   ret <4 x i16> %vbsl3.i
@@ -191,8 +191,8 @@ define <2 x i32> @f3(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind readnone
 ; CHECK-NEXT:    vldr d16, [sp]
 ; CHECK-NEXT:    vmov d17, r2, r3
 ; CHECK-NEXT:    vmov d18, r0, r1
-; CHECK-NEXT:    vbsl d18, d17, d16
-; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vbit d16, d17, d18
+; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <2 x i32> @llvm.arm.neon.vbsl.v2i32(<2 x i32> %a, <2 x i32> %b, <2 x i32> %c) nounwind
   ret <2 x i32> %vbsl3.i
@@ -204,8 +204,8 @@ define <2 x float> @f4(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
 ; CHECK-NEXT:    vldr d16, [sp]
 ; CHECK-NEXT:    vmov d17, r2, r3
 ; CHECK-NEXT:    vmov d18, r0, r1
-; CHECK-NEXT:    vbsl d18, d17, d16
-; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vbit d16, d17, d18
+; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
   %vbsl4.i = tail call <2 x float> @llvm.arm.neon.vbsl.v2f32(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
   ret <2 x float> %vbsl4.i
@@ -214,15 +214,15 @@ define <2 x float> @f4(<2 x float> %a, <2 x float> %b, <2 x float> %c) nounwind
 define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: g1:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d19, r2, r3
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
 ; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q9, q10, q8
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vbit q8, q10, q9
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
   %vbsl.i = tail call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind
   ret <16 x i8> %vbsl.i
@@ -231,15 +231,15 @@ define <16 x i8> @g1(<16 x i8> %a, <16 x i8> %b, <16 x i8> %c) nounwind readnone
 define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: g2:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d19, r2, r3
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
 ; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q9, q10, q8
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vbit q8, q10, q9
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <8 x i16> @llvm.arm.neon.vbsl.v8i16(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind
   ret <8 x i16> %vbsl3.i
@@ -248,15 +248,15 @@ define <8 x i16> @g2(<8 x i16> %a, <8 x i16> %b, <8 x i16> %c) nounwind readnone
 define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: g3:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d19, r2, r3
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
 ; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q9, q10, q8
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vbit q8, q10, q9
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <4 x i32> @llvm.arm.neon.vbsl.v4i32(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind
   ret <4 x i32> %vbsl3.i
@@ -265,15 +265,15 @@ define <4 x i32> @g3(<4 x i32> %a, <4 x i32> %b, <4 x i32> %c) nounwind readnone
 define <4 x float> @g4(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: g4:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d19, r2, r3
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
 ; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q9, q10, q8
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vbit q8, q10, q9
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
   %vbsl4.i = tail call <4 x float> @llvm.arm.neon.vbsl.v4f32(<4 x float> %a, <4 x float> %b, <4 x float> %c) nounwind
   ret <4 x float> %vbsl4.i
@@ -285,8 +285,8 @@ define <1 x i64> @test_vbsl_s64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwi
 ; CHECK-NEXT:    vldr d16, [sp]
 ; CHECK-NEXT:    vmov d17, r2, r3
 ; CHECK-NEXT:    vmov d18, r0, r1
-; CHECK-NEXT:    vbsl d18, d17, d16
-; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vbit d16, d17, d18
+; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
   ret <1 x i64> %vbsl3.i
@@ -298,8 +298,8 @@ define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwi
 ; CHECK-NEXT:    vldr d16, [sp]
 ; CHECK-NEXT:    vmov d17, r2, r3
 ; CHECK-NEXT:    vmov d18, r0, r1
-; CHECK-NEXT:    vbsl d18, d17, d16
-; CHECK-NEXT:    vmov r0, r1, d18
+; CHECK-NEXT:    vbit d16, d17, d18
+; CHECK-NEXT:    vmov r0, r1, d16
 ; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <1 x i64> @llvm.arm.neon.vbsl.v1i64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwind
   ret <1 x i64> %vbsl3.i
@@ -308,15 +308,15 @@ define <1 x i64> @test_vbsl_u64(<1 x i64> %a, <1 x i64> %b, <1 x i64> %c) nounwi
 define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: test_vbslq_s64:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d19, r2, r3
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
 ; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q9, q10, q8
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vbit q8, q10, q9
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
   ret <2 x i64> %vbsl3.i
@@ -325,15 +325,15 @@ define <2 x i64> @test_vbslq_s64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounw
 define <2 x i64> @test_vbslq_u64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind readnone optsize ssp {
 ; CHECK-LABEL: test_vbslq_u64:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d19, r2, r3
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
+; CHECK-NEXT:    add r12, sp, #16
 ; CHECK-NEXT:    vmov d18, r0, r1
 ; CHECK-NEXT:    mov r0, sp
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r12]
 ; CHECK-NEXT:    vld1.64 {d20, d21}, [r0]
-; CHECK-NEXT:    vbsl q9, q10, q8
-; CHECK-NEXT:    vmov r0, r1, d18
-; CHECK-NEXT:    vmov r2, r3, d19
+; CHECK-NEXT:    vbit q8, q10, q9
+; CHECK-NEXT:    vmov r0, r1, d16
+; CHECK-NEXT:    vmov r2, r3, d17
 ; CHECK-NEXT:    mov pc, lr
   %vbsl3.i = tail call <2 x i64> @llvm.arm.neon.vbsl.v2i64(<2 x i64> %a, <2 x i64> %b, <2 x i64> %c) nounwind
   ret <2 x i64> %vbsl3.i
diff --git a/llvm/test/CodeGen/ARM/vselect_imax.ll b/llvm/test/CodeGen/ARM/vselect_imax.ll
index f9d88cc4af982..03e212c75567f 100644
--- a/llvm/test/CodeGen/ARM/vselect_imax.ll
+++ b/llvm/test/CodeGen/ARM/vselect_imax.ll
@@ -70,20 +70,20 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
 ; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    vld1.64 {d22, d23}, [r0:128]!
-; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]!
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r1:128]!
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r1:128]
 ; CHECK-NEXT:    mov r1, #0
 ; CHECK-NEXT:    vld1.64 {d20, d21}, [r0:128]
-; CHECK-NEXT:    vmov.32 r12, d16[0]
+; CHECK-NEXT:    vmov.32 r12, d18[0]
 ; CHECK-NEXT:    vmov.32 r2, d20[0]
-; CHECK-NEXT:    vmov.32 lr, d16[1]
+; CHECK-NEXT:    vmov.32 lr, d18[1]
 ; CHECK-NEXT:    vmov.32 r0, d20[1]
-; CHECK-NEXT:    vmov.32 r7, d18[0]
+; CHECK-NEXT:    vmov.32 r7, d16[0]
 ; CHECK-NEXT:    vmov.32 r5, d22[0]
 ; CHECK-NEXT:    vmov.32 r4, d22[1]
-; CHECK-NEXT:    vmov.32 r6, d17[0]
+; CHECK-NEXT:    vmov.32 r6, d19[0]
 ; CHECK-NEXT:    subs r2, r2, r12
-; CHECK-NEXT:    vmov.32 r2, d18[1]
+; CHECK-NEXT:    vmov.32 r2, d16[1]
 ; CHECK-NEXT:    sbcs r0, r0, lr
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movlt r0, #1
@@ -91,7 +91,7 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
 ; CHECK-NEXT:    mvnne r0, #0
 ; CHECK-NEXT:    subs r7, r5, r7
 ; CHECK-NEXT:    vmov.32 r7, d21[0]
-; CHECK-NEXT:    vmov.32 r5, d17[1]
+; CHECK-NEXT:    vmov.32 r5, d19[1]
 ; CHECK-NEXT:    sbcs r2, r4, r2
 ; CHECK-NEXT:    vmov.32 r4, d21[1]
 ; CHECK-NEXT:    mov r2, #0
@@ -100,11 +100,11 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
 ; CHECK-NEXT:    mvnne r2, #0
 ; CHECK-NEXT:    subs r7, r7, r6
 ; CHECK-NEXT:    vmov.32 r6, d23[0]
-; CHECK-NEXT:    vmov.32 r7, d19[0]
+; CHECK-NEXT:    vmov.32 r7, d17[0]
 ; CHECK-NEXT:    sbcs r5, r4, r5
 ; CHECK-NEXT:    mov r4, #0
 ; CHECK-NEXT:    movlt r4, #1
-; CHECK-NEXT:    vmov.32 r5, d19[1]
+; CHECK-NEXT:    vmov.32 r5, d17[1]
 ; CHECK-NEXT:    subs r7, r6, r7
 ; CHECK-NEXT:    vmov.32 r7, d23[1]
 ; CHECK-NEXT:    sbcs r7, r7, r5
@@ -116,11 +116,11 @@ define void @func_blend18(%T0_18* %loadaddr, %T0_18* %loadaddr2,
 ; CHECK-NEXT:    mvnne r4, #0
 ; CHECK-NEXT:    vdup.32 d24, r2
 ; CHECK-NEXT:    vdup.32 d27, r4
-; CHECK-NEXT:    vbsl q12, q11, q9
+; CHECK-NEXT:    vbit q8, q11, q12
 ; CHECK-NEXT:    vdup.32 d26, r0
-; CHECK-NEXT:    vbsl q13, q10, q8
-; CHECK-NEXT:    vst1.64 {d24, d25}, [r3:128]!
-; CHECK-NEXT:    vst1.64 {d26, d27}, [r3:128]
+; CHECK-NEXT:    vbit q9, q10, q13
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r3:128]!
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r3:128]
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    mov pc, lr
   %v0 = load %T0_18, %T0_18* %loadaddr
@@ -138,121 +138,119 @@ define void @func_blend19(%T0_19* %loadaddr, %T0_19* %loadaddr2,
                            %T1_19* %blend, %T0_19* %storeaddr) {
 ; CHECK-LABEL: func_blend19:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    mov r12, r1
-; CHECK-NEXT:    mov r2, r0
-; CHECK-NEXT:    vld1.64 {d24, d25}, [r12:128]!
-; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    mov lr, #0
-; CHECK-NEXT:    vld1.64 {d28, d29}, [r2:128]!
-; CHECK-NEXT:    vld1.64 {d16, d17}, [r12:128]
+; CHECK-NEXT:    .save {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    push {r4, r5, r6, r7, r11, lr}
+; CHECK-NEXT:    add r2, r1, #48
+; CHECK-NEXT:    add r5, r1, #32
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r2:128]
+; CHECK-NEXT:    add r2, r0, #48
+; CHECK-NEXT:    add r6, r0, #32
+; CHECK-NEXT:    mov r7, #0
 ; CHECK-NEXT:    vld1.64 {d18, d19}, [r2:128]
-; CHECK-NEXT:    add r2, r1, #32
-; CHECK-NEXT:    add r1, r1, #48
-; CHECK-NEXT:    vld1.64 {d20, d21}, [r2:128]
-; CHECK-NEXT:    add r2, r0, #32
-; CHECK-NEXT:    add r0, r0, #48
-; CHECK-NEXT:    vld1.64 {d30, d31}, [r2:128]
-; CHECK-NEXT:    vmov.32 r4, d16[0]
+; CHECK-NEXT:    vmov.32 r12, d16[0]
 ; CHECK-NEXT:    vmov.32 r2, d18[0]
-; CHECK-NEXT:    vmov.32 r12, d16[1]
-; CHECK-NEXT:    vmov.32 r5, d18[1]
-; CHECK-NEXT:    vld1.64 {d22, d23}, [r1:128]
-; CHECK-NEXT:    vmov.32 r1, d21[0]
-; CHECK-NEXT:    vld1.64 {d26, d27}, [r0:128]
-; CHECK-NEXT:    vmov.32 r0, d21[1]
-; CHECK-NEXT:    subs r2, r2, r4
-; CHECK-NEXT:    vmov.32 r4, d31[1]
-; CHECK-NEXT:    vmov.32 r2, d31[0]
-; CHECK-NEXT:    sbcs r5, r5, r12
+; CHECK-NEXT:    vmov.32 lr, d16[1]
+; CHECK-NEXT:    vmov.32 r4, d18[1]
+; CHECK-NEXT:    vld1.64 {d28, d29}, [r0:128]!
+; CHECK-NEXT:    vld1.64 {d26, d27}, [r5:128]
+; CHECK-NEXT:    vld1.64 {d30, d31}, [r6:128]
+; CHECK-NEXT:    vmov.32 r5, d17[0]
+; CHECK-NEXT:    vld1.64 {d22, d23}, [r0:128]
+; CHECK-NEXT:    vmov.32 r0, d17[1]
+; CHECK-NEXT:    vld1.64 {d24, d25}, [r1:128]!
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r1:128]
+; CHECK-NEXT:    mov r1, #0
+; CHECK-NEXT:    subs r2, r2, r12
 ; CHECK-NEXT:    mov r12, #0
+; CHECK-NEXT:    vmov.32 r2, d19[0]
+; CHECK-NEXT:    sbcs r6, r4, lr
+; CHECK-NEXT:    vmov.32 r4, d24[0]
+; CHECK-NEXT:    vmov.32 r6, d19[1]
 ; CHECK-NEXT:    movlt r12, #1
 ; CHECK-NEXT:    cmp r12, #0
 ; CHECK-NEXT:    mvnne r12, #0
-; CHECK-NEXT:    vmov.32 r5, d25[0]
-; CHECK-NEXT:    subs r1, r2, r1
+; CHECK-NEXT:    subs r2, r2, r5
+; CHECK-NEXT:    vmov.32 r5, d28[0]
 ; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    sbcs r0, r4, r0
-; CHECK-NEXT:    vmov.32 r1, d29[0]
-; CHECK-NEXT:    vmov.32 r0, d25[1]
+; CHECK-NEXT:    sbcs r0, r6, r0
+; CHECK-NEXT:    vmov.32 r6, d28[1]
+; CHECK-NEXT:    vmov.32 r0, d24[1]
 ; CHECK-NEXT:    movlt r2, #1
-; CHECK-NEXT:    vmov.32 r4, d29[1]
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d5, r2
-; CHECK-NEXT:    subs r1, r1, r5
-; CHECK-NEXT:    vmov.32 r5, d24[1]
-; CHECK-NEXT:    vmov.32 r1, d24[0]
-; CHECK-NEXT:    sbcs r0, r4, r0
-; CHECK-NEXT:    vmov.32 r4, d28[0]
+; CHECK-NEXT:    vdup.32 d7, r2
+; CHECK-NEXT:    vdup.32 d6, r12
+; CHECK-NEXT:    subs r5, r5, r4
+; CHECK-NEXT:    vmov.32 r4, d25[1]
+; CHECK-NEXT:    vmov.32 r5, d25[0]
+; CHECK-NEXT:    sbcs r0, r6, r0
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    vmov.32 r0, d29[0]
+; CHECK-NEXT:    movlt r6, #1
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    mvnne r6, #0
+; CHECK-NEXT:    subs r0, r0, r5
+; CHECK-NEXT:    vmov.32 r5, d21[0]
+; CHECK-NEXT:    vmov.32 r0, d29[1]
+; CHECK-NEXT:    sbcs r0, r0, r4
+; CHECK-NEXT:    vmov.32 r4, d23[0]
 ; CHECK-NEXT:    mov r0, #0
 ; CHECK-NEXT:    movlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
 ; CHECK-NEXT:    mvnne r0, #0
 ; CHECK-NEXT:    vdup.32 d1, r0
-; CHECK-NEXT:    vmov.32 r0, d19[0]
-; CHECK-NEXT:    subs r1, r4, r1
-; CHECK-NEXT:    vmov.32 r4, d17[0]
-; CHECK-NEXT:    vmov.32 r1, d28[1]
-; CHECK-NEXT:    sbcs r1, r1, r5
-; CHECK-NEXT:    vmov.32 r5, d17[1]
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    movlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mvnne r1, #0
-; CHECK-NEXT:    subs r0, r0, r4
-; CHECK-NEXT:    vmov.32 r0, d19[1]
-; CHECK-NEXT:    vmov.32 r4, d22[0]
-; CHECK-NEXT:    vdup.32 d0, r1
-; CHECK-NEXT:    vmov.32 r1, d22[1]
-; CHECK-NEXT:    vbsl q0, q14, q12
-; CHECK-NEXT:    sbcs r0, r0, r5
-; CHECK-NEXT:    vmov.32 r5, d26[0]
 ; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    movlt r0, #1
-; CHECK-NEXT:    subs r4, r5, r4
-; CHECK-NEXT:    vmov.32 r5, d20[0]
-; CHECK-NEXT:    vmov.32 r4, d26[1]
-; CHECK-NEXT:    sbcs r1, r4, r1
-; CHECK-NEXT:    vmov.32 r4, d30[0]
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    movlt r1, #1
-; CHECK-NEXT:    subs r4, r4, r5
-; CHECK-NEXT:    vmov.32 r5, d30[1]
-; CHECK-NEXT:    vmov.32 r4, d20[1]
-; CHECK-NEXT:    sbcs r4, r5, r4
-; CHECK-NEXT:    vmov.32 r5, d27[0]
-; CHECK-NEXT:    vmov.32 r4, d23[0]
-; CHECK-NEXT:    movlt r6, #1
-; CHECK-NEXT:    subs r4, r5, r4
-; CHECK-NEXT:    vmov.32 r5, d27[1]
+; CHECK-NEXT:    vdup.32 d0, r6
+; CHECK-NEXT:    vmov.32 r6, d22[0]
+; CHECK-NEXT:    vbit q12, q14, q0
+; CHECK-NEXT:    subs r5, r4, r5
 ; CHECK-NEXT:    vmov.32 r4, d23[1]
-; CHECK-NEXT:    sbcs r4, r5, r4
-; CHECK-NEXT:    movlt lr, #1
-; CHECK-NEXT:    cmp lr, #0
-; CHECK-NEXT:    mvnne lr, #0
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    mvnne r6, #0
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mvnne r1, #0
+; CHECK-NEXT:    vmov.32 r5, d21[1]
+; CHECK-NEXT:    sbcs r5, r4, r5
+; CHECK-NEXT:    vmov.32 r4, d20[1]
+; CHECK-NEXT:    vmov.32 r5, d20[0]
+; CHECK-NEXT:    movlt r0, #1
 ; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vdup.32 d4, r6
 ; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vdup.32 d3, lr
-; CHECK-NEXT:    vbsl q2, q15, q10
-; CHECK-NEXT:    vdup.32 d21, r0
+; CHECK-NEXT:    vdup.32 d5, r0
 ; CHECK-NEXT:    add r0, r3, #32
+; CHECK-NEXT:    subs r6, r6, r5
+; CHECK-NEXT:    vmov.32 r5, d26[0]
+; CHECK-NEXT:    vmov.32 r6, d22[1]
+; CHECK-NEXT:    sbcs r6, r6, r4
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    vmov.32 r6, d30[0]
+; CHECK-NEXT:    movlt r4, #1
+; CHECK-NEXT:    subs r6, r6, r5
+; CHECK-NEXT:    vmov.32 r5, d30[1]
+; CHECK-NEXT:    vmov.32 r6, d26[1]
+; CHECK-NEXT:    sbcs r6, r5, r6
+; CHECK-NEXT:    vmov.32 r5, d31[0]
+; CHECK-NEXT:    vmov.32 r6, d27[0]
+; CHECK-NEXT:    movlt r1, #1
+; CHECK-NEXT:    subs r6, r5, r6
+; CHECK-NEXT:    vmov.32 r5, d31[1]
+; CHECK-NEXT:    vmov.32 r6, d27[1]
+; CHECK-NEXT:    sbcs r6, r5, r6
+; CHECK-NEXT:    movlt r7, #1
+; CHECK-NEXT:    cmp r7, #0
+; CHECK-NEXT:    mvnne r7, #0
+; CHECK-NEXT:    cmp r1, #0
+; CHECK-NEXT:    mvnne r1, #0
+; CHECK-NEXT:    vdup.32 d3, r7
+; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    vdup.32 d2, r1
-; CHECK-NEXT:    vdup.32 d20, r12
-; CHECK-NEXT:    vbsl q1, q13, q11
-; CHECK-NEXT:    vst1.64 {d4, d5}, [r0:128]
+; CHECK-NEXT:    mvnne r4, #0
+; CHECK-NEXT:    vbit q13, q15, q1
+; CHECK-NEXT:    vdup.32 d4, r4
+; CHECK-NEXT:    vbit q10, q11, q2
+; CHECK-NEXT:    vbit q8, q9, q3
+; CHECK-NEXT:    vst1.64 {d26, d27}, [r0:128]
 ; CHECK-NEXT:    add r0, r3, #48
-; CHECK-NEXT:    vbsl q10, q9, q8
-; CHECK-NEXT:    vst1.64 {d0, d1}, [r3:128]!
-; CHECK-NEXT:    vst1.64 {d2, d3}, [r0:128]
+; CHECK-NEXT:    vst1.64 {d24, d25}, [r3:128]!
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128]
 ; CHECK-NEXT:    vst1.64 {d20, d21}, [r3:128]
-; CHECK-NEXT:    pop {r4, r5, r6, lr}
+; CHECK-NEXT:    pop {r4, r5, r6, r7, r11, lr}
 ; CHECK-NEXT:    mov pc, lr
   %v0 = load %T0_19, %T0_19* %loadaddr
   %v1 = load %T0_19, %T0_19* %loadaddr2
@@ -277,232 +275,233 @@ define void @func_blend20(%T0_20* %loadaddr, %T0_20* %loadaddr2,
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11}
 ; CHECK-NEXT:    .pad #8
 ; CHECK-NEXT:    sub sp, sp, #8
-; CHECK-NEXT:    add r9, r1, #64
-; CHECK-NEXT:    mov r2, #32
-; CHECK-NEXT:    add r8, r0, #64
-; CHECK-NEXT:    vld1.64 {d18, d19}, [r9:128], r2
-; CHECK-NEXT:    mov r10, #0
-; CHECK-NEXT:    vld1.64 {d22, d23}, [r8:128], r2
-; CHECK-NEXT:    vmov.32 r2, d19[0]
+; CHECK-NEXT:    mov r8, r1
+; CHECK-NEXT:    mov r9, r0
+; CHECK-NEXT:    vld1.64 {d16, d17}, [r8:128]!
+; CHECK-NEXT:    add r10, r0, #64
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r9:128]!
+; CHECK-NEXT:    vmov.32 r2, d16[0]
 ; CHECK-NEXT:    str r3, [sp, #4] @ 4-byte Spill
-; CHECK-NEXT:    vmov.32 r7, d23[0]
-; CHECK-NEXT:    mov r3, #0
-; CHECK-NEXT:    vmov.32 r5, d19[1]
-; CHECK-NEXT:    vmov.32 r6, d23[1]
-; CHECK-NEXT:    vld1.64 {d2, d3}, [r9:128]!
-; CHECK-NEXT:    vmov.32 r12, d2[0]
-; CHECK-NEXT:    subs r2, r7, r2
-; CHECK-NEXT:    mov r7, r1
-; CHECK-NEXT:    vld1.64 {d20, d21}, [r7:128]!
-; CHECK-NEXT:    sbcs r2, r6, r5
-; CHECK-NEXT:    vmov.32 r5, d18[0]
+; CHECK-NEXT:    vmov.32 r6, d18[0]
+; CHECK-NEXT:    vmov.32 r4, d16[1]
+; CHECK-NEXT:    vmov.32 r7, d18[1]
+; CHECK-NEXT:    vmov.32 r5, d17[0]
+; CHECK-NEXT:    subs r2, r6, r2
+; CHECK-NEXT:    mov r6, #0
+; CHECK-NEXT:    vmov.32 r2, d19[0]
+; CHECK-NEXT:    sbcs r7, r7, r4
+; CHECK-NEXT:    movlt r6, #1
+; CHECK-NEXT:    vmov.32 r7, d17[1]
+; CHECK-NEXT:    subs r2, r2, r5
+; CHECK-NEXT:    vmov.32 r2, d19[1]
+; CHECK-NEXT:    sbcs r2, r2, r7
 ; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    vmov.32 r6, d22[0]
 ; CHECK-NEXT:    movlt r2, #1
 ; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    vld1.64 {d0, d1}, [r7:128]
 ; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d17, r2
-; CHECK-NEXT:    mov r2, r0
-; CHECK-NEXT:    subs r5, r6, r5
-; CHECK-NEXT:    vmov.32 r6, d22[1]
-; CHECK-NEXT:    vmov.32 r5, d18[1]
-; CHECK-NEXT:    sbcs r5, r6, r5
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    movlt r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    mvnne r5, #0
-; CHECK-NEXT:    vdup.32 d16, r5
-; CHECK-NEXT:    vbsl q8, q11, q9
-; CHECK-NEXT:    vld1.64 {d22, d23}, [r2:128]!
-; CHECK-NEXT:    vmov.32 r5, d21[0]
-; CHECK-NEXT:    vmov.32 r6, d23[0]
-; CHECK-NEXT:    vld1.64 {d30, d31}, [r2:128]
-; CHECK-NEXT:    vmov.32 r2, d1[0]
-; CHECK-NEXT:    vmov.32 r7, d30[0]
-; CHECK-NEXT:    subs r5, r6, r5
-; CHECK-NEXT:    vmov.32 r6, d23[1]
-; CHECK-NEXT:    vmov.32 r5, d21[1]
-; CHECK-NEXT:    sbcs r5, r6, r5
-; CHECK-NEXT:    vmov.32 r6, d22[0]
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    movlt r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    mvnne r5, #0
-; CHECK-NEXT:    vdup.32 d19, r5
-; CHECK-NEXT:    vmov.32 r5, d20[0]
+; CHECK-NEXT:    cmp r6, #0
+; CHECK-NEXT:    vdup.32 d21, r2
+; CHECK-NEXT:    mvnne r6, #0
+; CHECK-NEXT:    vdup.32 d20, r6
+; CHECK-NEXT:    mov r2, #32
+; CHECK-NEXT:    add r6, r1, #64
+; CHECK-NEXT:    vld1.64 {d24, d25}, [r10:128], r2
+; CHECK-NEXT:    vbit q8, q9, q10
+; CHECK-NEXT:    vld1.64 {d28, d29}, [r6:128], r2
+; CHECK-NEXT:    vmov.32 r4, d29[0]
+; CHECK-NEXT:    vmov.32 r5, d25[0]
+; CHECK-NEXT:    vld1.64 {d0, d1}, [r9:128]
+; CHECK-NEXT:    vld1.64 {d2, d3}, [r8:128]
+; CHECK-NEXT:    vld1.64 {d22, d23}, [r6:128]!
+; CHECK-NEXT:    vld1.64 {d20, d21}, [r6:128]
+; CHECK-NEXT:    vmov.32 r6, d0[0]
+; CHECK-NEXT:    vld1.64 {d18, d19}, [r10:128]!
+; CHECK-NEXT:    vmov.32 r9, d23[0]
+; CHECK-NEXT:    vmov.32 r11, d19[0]
+; CHECK-NEXT:    vmov.32 r8, d23[1]
+; CHECK-NEXT:    subs r4, r5, r4
+; CHECK-NEXT:    vmov.32 r5, d25[1]
+; CHECK-NEXT:    vmov.32 r4, d29[1]
+; CHECK-NEXT:    sbcs r4, r5, r4
+; CHECK-NEXT:    vmov.32 r5, d24[0]
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    mvnne r4, #0
+; CHECK-NEXT:    vdup.32 d5, r4
+; CHECK-NEXT:    vmov.32 r4, d28[0]
+; CHECK-NEXT:    subs r4, r5, r4
+; CHECK-NEXT:    vmov.32 r5, d24[1]
+; CHECK-NEXT:    vmov.32 r4, d28[1]
+; CHECK-NEXT:    sbcs r4, r5, r4
+; CHECK-NEXT:    vmov.32 r5, d1[0]
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    mvnne r4, #0
+; CHECK-NEXT:    vdup.32 d4, r4
+; CHECK-NEXT:    vmov.32 r4, d3[0]
+; CHECK-NEXT:    subs r4, r5, r4
+; CHECK-NEXT:    vmov.32 r5, d1[1]
+; CHECK-NEXT:    vmov.32 r4, d3[1]
+; CHECK-NEXT:    sbcs r4, r5, r4
+; CHECK-NEXT:    add r5, r1, #32
+; CHECK-NEXT:    vld1.64 {d26, d27}, [r5:128]
+; CHECK-NEXT:    add r5, r1, #48
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    add r1, r1, #80
+; CHECK-NEXT:    vld1.64 {d30, d31}, [r5:128]
+; CHECK-NEXT:    movlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    vbif q12, q14, q2
+; CHECK-NEXT:    vmov.32 r5, d2[0]
+; CHECK-NEXT:    mvnne r4, #0
+; CHECK-NEXT:    vdup.32 d29, r4
+; CHECK-NEXT:    vmov.32 r4, d31[1]
 ; CHECK-NEXT:    subs r5, r6, r5
-; CHECK-NEXT:    vmov.32 r6, d22[1]
-; CHECK-NEXT:    vmov.32 r5, d20[1]
+; CHECK-NEXT:    vmov.32 r6, d0[1]
+; CHECK-NEXT:    vmov.32 r5, d2[1]
 ; CHECK-NEXT:    sbcs r5, r6, r5
+; CHECK-NEXT:    add r6, r0, #48
 ; CHECK-NEXT:    mov r5, #0
+; CHECK-NEXT:    vld1.64 {d6, d7}, [r6:128]
 ; CHECK-NEXT:    movlt r5, #1
 ; CHECK-NEXT:    cmp r5, #0
 ; CHECK-NEXT:    mvnne r5, #0
-; CHECK-NEXT:    vdup.32 d18, r5
-; CHECK-NEXT:    add r5, r0, #32
-; CHECK-NEXT:    vbsl q9, q11, q10
-; CHECK-NEXT:    vld1.64 {d22, d23}, [r5:128]
-; CHECK-NEXT:    add r5, r1, #32
-; CHECK-NEXT:    vld1.64 {d24, d25}, [r5:128]
-; CHECK-NEXT:    vmov.32 r5, d24[0]
-; CHECK-NEXT:    vmov.32 r6, d22[0]
-; CHECK-NEXT:    vmov.32 r4, d23[0]
-; CHECK-NEXT:    vld1.64 {d20, d21}, [r8:128]!
-; CHECK-NEXT:    vmov.32 r11, d21[0]
-; CHECK-NEXT:    subs r5, r6, r5
-; CHECK-NEXT:    vmov.32 r6, d22[1]
-; CHECK-NEXT:    vmov.32 r5, d24[1]
-; CHECK-NEXT:    sbcs r5, r6, r5
-; CHECK-NEXT:    vmov.32 r6, d25[0]
-; CHECK-NEXT:    movlt r10, #1
-; CHECK-NEXT:    cmp r10, #0
-; CHECK-NEXT:    mvnne r10, #0
-; CHECK-NEXT:    subs r4, r4, r6
-; CHECK-NEXT:    vmov.32 r6, d23[1]
-; CHECK-NEXT:    vmov.32 r4, d25[1]
+; CHECK-NEXT:    vmov.32 r7, d7[0]
+; CHECK-NEXT:    vdup.32 d28, r5
+; CHECK-NEXT:    vmov.32 r5, d31[0]
+; CHECK-NEXT:    vbsl q14, q0, q1
+; CHECK-NEXT:    vmov.32 r6, d7[1]
+; CHECK-NEXT:    vmov.32 r2, d6[0]
+; CHECK-NEXT:    subs r5, r7, r5
+; CHECK-NEXT:    vmov.32 r7, d6[1]
 ; CHECK-NEXT:    sbcs r4, r6, r4
-; CHECK-NEXT:    mov r6, #0
-; CHECK-NEXT:    vmov.32 r4, d31[0]
-; CHECK-NEXT:    movlt r6, #1
-; CHECK-NEXT:    cmp r6, #0
-; CHECK-NEXT:    mvnne r6, #0
-; CHECK-NEXT:    subs r2, r4, r2
-; CHECK-NEXT:    vmov.32 r4, d31[1]
-; CHECK-NEXT:    vmov.32 r2, d1[1]
-; CHECK-NEXT:    sbcs r2, r4, r2
+; CHECK-NEXT:    vmov.32 r6, d30[0]
+; CHECK-NEXT:    vmov.32 r5, d30[1]
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movlt r4, #1
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    mvnne r4, #0
+; CHECK-NEXT:    vdup.32 d3, r4
+; CHECK-NEXT:    vmov.32 r4, d26[1]
+; CHECK-NEXT:    subs r2, r2, r6
+; CHECK-NEXT:    sbcs r2, r7, r5
+; CHECK-NEXT:    add r5, r0, #32
 ; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    vld1.64 {d0, d1}, [r5:128]
 ; CHECK-NEXT:    movlt r2, #1
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d27, r2
-; CHECK-NEXT:    add r2, r0, #48
-; CHECK-NEXT:    vld1.64 {d4, d5}, [r2:128]
-; CHECK-NEXT:    add r2, r1, #48
+; CHECK-NEXT:    vmov.32 r6, d0[0]
+; CHECK-NEXT:    vdup.32 d2, r2
 ; CHECK-NEXT:    add r0, r0, #80
-; CHECK-NEXT:    add r1, r1, #80
-; CHECK-NEXT:    vld1.64 {d6, d7}, [r2:128]
-; CHECK-NEXT:    vmov.32 r2, d7[0]
-; CHECK-NEXT:    vmov.32 r4, d5[0]
-; CHECK-NEXT:    vmov.32 r5, d4[0]
+; CHECK-NEXT:    vmov.32 r2, d26[0]
+; CHECK-NEXT:    vbit q15, q3, q1
+; CHECK-NEXT:    vmov.32 r5, d0[1]
+; CHECK-NEXT:    vmov.32 r7, d1[0]
+; CHECK-NEXT:    vld1.64 {d2, d3}, [r10:128]
+; CHECK-NEXT:    vld1.64 {d6, d7}, [r1:128]
 ; CHECK-NEXT:    vld1.64 {d8, d9}, [r0:128]
-; CHECK-NEXT:    subs r2, r4, r2
-; CHECK-NEXT:    vmov.32 r4, d5[1]
-; CHECK-NEXT:    vmov.32 r2, d7[1]
-; CHECK-NEXT:    sbcs r2, r4, r2
-; CHECK-NEXT:    vmov.32 r4, d0[0]
+; CHECK-NEXT:    vmov.32 r1, d7[1]
+; CHECK-NEXT:    vmov.32 r10, d19[1]
+; CHECK-NEXT:    vmov.32 lr, d6[0]
+; CHECK-NEXT:    vmov.32 r3, d8[0]
+; CHECK-NEXT:    vmov.32 r12, d8[1]
+; CHECK-NEXT:    subs r2, r6, r2
+; CHECK-NEXT:    vmov.32 r6, d1[1]
+; CHECK-NEXT:    sbcs r2, r5, r4
+; CHECK-NEXT:    vmov.32 r5, d27[0]
+; CHECK-NEXT:    vmov.32 r4, d27[1]
 ; CHECK-NEXT:    mov r2, #0
 ; CHECK-NEXT:    movlt r2, #1
 ; CHECK-NEXT:    cmp r2, #0
 ; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d29, r2
-; CHECK-NEXT:    vmov.32 r2, d6[1]
-; CHECK-NEXT:    subs r4, r7, r4
-; CHECK-NEXT:    vmov.32 r7, d30[1]
-; CHECK-NEXT:    vmov.32 r4, d0[1]
-; CHECK-NEXT:    sbcs r4, r7, r4
-; CHECK-NEXT:    vmov.32 r7, d4[1]
+; CHECK-NEXT:    subs r5, r7, r5
+; CHECK-NEXT:    vmov.32 r7, d7[0]
+; CHECK-NEXT:    sbcs r4, r6, r4
+; CHECK-NEXT:    vmov.32 r6, d2[0]
 ; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    vmov.32 r5, d2[1]
 ; CHECK-NEXT:    movlt r4, #1
 ; CHECK-NEXT:    cmp r4, #0
 ; CHECK-NEXT:    mvnne r4, #0
-; CHECK-NEXT:    vdup.32 d26, r4
-; CHECK-NEXT:    vmov.32 r4, d6[0]
-; CHECK-NEXT:    vbsl q13, q15, q0
-; CHECK-NEXT:    vld1.64 {d0, d1}, [r9:128]
-; CHECK-NEXT:    vdup.32 d31, r6
-; CHECK-NEXT:    vmov.32 r9, d3[0]
-; CHECK-NEXT:    vdup.32 d30, r10
-; CHECK-NEXT:    vmov.32 r10, d21[1]
-; CHECK-NEXT:    vbsl q15, q11, q12
-; CHECK-NEXT:    subs r4, r5, r4
-; CHECK-NEXT:    sbcs r2, r7, r2
-; CHECK-NEXT:    vmov.32 r4, d0[1]
-; CHECK-NEXT:    mov r2, #0
-; CHECK-NEXT:    movlt r2, #1
-; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    vdup.32 d28, r2
-; CHECK-NEXT:    vbsl q14, q2, q3
-; CHECK-NEXT:    vld1.64 {d4, d5}, [r8:128]
-; CHECK-NEXT:    vmov.32 r2, d0[0]
-; CHECK-NEXT:    vmov.32 r6, d4[0]
-; CHECK-NEXT:    vmov.32 r5, d4[1]
-; CHECK-NEXT:    vld1.64 {d6, d7}, [r1:128]
-; CHECK-NEXT:    vmov.32 r7, d7[0]
-; CHECK-NEXT:    vmov.32 r1, d7[1]
-; CHECK-NEXT:    vmov.32 lr, d5[0]
-; CHECK-NEXT:    vmov.32 r8, d3[1]
+; CHECK-NEXT:    vdup.32 d5, r4
+; CHECK-NEXT:    vdup.32 d4, r2
+; CHECK-NEXT:    vmov.32 r2, d20[0]
+; CHECK-NEXT:    vbit q13, q0, q2
+; CHECK-NEXT:    vmov.32 r4, d20[1]
 ; CHECK-NEXT:    subs r0, r6, r2
 ; CHECK-NEXT:    vmov.32 r2, d9[1]
 ; CHECK-NEXT:    sbcs r0, r5, r4
 ; CHECK-NEXT:    vmov.32 r4, d9[0]
-; CHECK-NEXT:    movlt r3, #1
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    mvnne r3, #0
-; CHECK-NEXT:    vmov.32 r6, d8[1]
-; CHECK-NEXT:    mov r5, #0
-; CHECK-NEXT:    vmov.32 r0, d5[1]
+; CHECK-NEXT:    mov r0, #0
+; CHECK-NEXT:    vmov.32 r6, d18[0]
+; CHECK-NEXT:    movlt r0, #1
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    mvnne r0, #0
+; CHECK-NEXT:    vmov.32 r5, d18[1]
 ; CHECK-NEXT:    subs r4, r4, r7
-; CHECK-NEXT:    vmov.32 r7, d2[1]
+; CHECK-NEXT:    vmov.32 r7, d21[1]
 ; CHECK-NEXT:    sbcs r1, r2, r1
-; CHECK-NEXT:    vmov.32 r2, d8[0]
-; CHECK-NEXT:    vmov.32 r1, d6[0]
-; CHECK-NEXT:    movlt r5, #1
-; CHECK-NEXT:    vmov.32 r4, d6[1]
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    mvnne r5, #0
-; CHECK-NEXT:    vdup.32 d11, r5
-; CHECK-NEXT:    vmov.32 r5, d20[0]
-; CHECK-NEXT:    subs r1, r2, r1
-; CHECK-NEXT:    vmov.32 r2, d1[0]
-; CHECK-NEXT:    sbcs r1, r6, r4
-; CHECK-NEXT:    vmov.32 r6, d1[1]
-; CHECK-NEXT:    vmov.32 r4, d20[1]
+; CHECK-NEXT:    vmov.32 r4, d22[1]
+; CHECK-NEXT:    vmov.32 r1, d22[0]
+; CHECK-NEXT:    mov r2, #0
+; CHECK-NEXT:    movlt r2, #1
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    mvnne r2, #0
+; CHECK-NEXT:    vdup.32 d11, r2
+; CHECK-NEXT:    vmov.32 r2, d3[1]
+; CHECK-NEXT:    subs r1, r6, r1
+; CHECK-NEXT:    vmov.32 r6, d21[0]
+; CHECK-NEXT:    sbcs r1, r5, r4
+; CHECK-NEXT:    vmov.32 r4, d3[0]
+; CHECK-NEXT:    vmov.32 r5, d6[1]
 ; CHECK-NEXT:    mov r1, #0
 ; CHECK-NEXT:    movlt r1, #1
 ; CHECK-NEXT:    cmp r1, #0
 ; CHECK-NEXT:    mvnne r1, #0
-; CHECK-NEXT:    vdup.32 d10, r1
-; CHECK-NEXT:    mov r1, #0
-; CHECK-NEXT:    vbsl q5, q4, q3
-; CHECK-NEXT:    subs r2, lr, r2
-; CHECK-NEXT:    sbcs r0, r0, r6
-; CHECK-NEXT:    mov r0, #0
-; CHECK-NEXT:    movlt r0, #1
-; CHECK-NEXT:    subs r2, r5, r12
-; CHECK-NEXT:    sbcs r2, r4, r7
+; CHECK-NEXT:    subs r4, r4, r6
+; CHECK-NEXT:    sbcs r2, r2, r7
 ; CHECK-NEXT:    mov r2, #0
 ; CHECK-NEXT:    movlt r2, #1
-; CHECK-NEXT:    subs r7, r11, r9
-; CHECK-NEXT:    sbcs r7, r10, r8
-; CHECK-NEXT:    movlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    mvnne r1, #0
+; CHECK-NEXT:    subs r4, r11, r9
+; CHECK-NEXT:    sbcs r4, r10, r8
+; CHECK-NEXT:    mov r4, #0
+; CHECK-NEXT:    movlt r4, #1
+; CHECK-NEXT:    subs r3, r3, lr
+; CHECK-NEXT:    sbcs r3, r12, r5
+; CHECK-NEXT:    mov r3, #0
+; CHECK-NEXT:    movlt r3, #1
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    mvnne r3, #0
+; CHECK-NEXT:    cmp r4, #0
+; CHECK-NEXT:    mvnne r4, #0
+; CHECK-NEXT:    vdup.32 d10, r3
+; CHECK-NEXT:    vdup.32 d1, r4
+; CHECK-NEXT:    vorr q2, q5, q5
 ; CHECK-NEXT:    cmp r2, #0
-; CHECK-NEXT:    vdup.32 d23, r1
+; CHECK-NEXT:    vdup.32 d0, r1
+; CHECK-NEXT:    vbsl q2, q4, q3
 ; CHECK-NEXT:    mvnne r2, #0
-; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vbif q9, q11, q0
 ; CHECK-NEXT:    ldr r1, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    mvnne r0, #0
-; CHECK-NEXT:    vdup.32 d22, r2
-; CHECK-NEXT:    vdup.32 d25, r0
+; CHECK-NEXT:    vdup.32 d7, r2
+; CHECK-NEXT:    vdup.32 d6, r0
 ; CHECK-NEXT:    add r0, r1, #80
-; CHECK-NEXT:    vbsl q11, q10, q1
-; CHECK-NEXT:    vdup.32 d24, r3
-; CHECK-NEXT:    vst1.64 {d10, d11}, [r0:128]
+; CHECK-NEXT:    vbit q10, q1, q3
+; CHECK-NEXT:    vst1.64 {d4, d5}, [r0:128]
 ; CHECK-NEXT:    add r0, r1, #32
-; CHECK-NEXT:    vbsl q12, q2, q0
-; CHECK-NEXT:    vst1.64 {d30, d31}, [r0:128]
+; CHECK-NEXT:    vst1.64 {d26, d27}, [r0:128]
 ; CHECK-NEXT:    add r0, r1, #48
-; CHECK-NEXT:    vst1.64 {d28, d29}, [r0:128]
+; CHECK-NEXT:    vst1.64 {d30, d31}, [r0:128]
 ; CHECK-NEXT:    add r0, r1, #64
-; CHECK-NEXT:    vst1.64 {d18, d19}, [r1:128]!
-; CHECK-NEXT:    vst1.64 {d26, d27}, [r1:128]
+; CHECK-NEXT:    vst1.64 {d16, d17}, [r1:128]!
+; CHECK-NEXT:    vst1.64 {d28, d29}, [r1:128]
 ; CHECK-NEXT:    mov r1, #32
-; CHECK-NEXT:    vst1.64 {d16, d17}, [r0:128], r1
-; CHECK-NEXT:    vst1.64 {d22, d23}, [r0:128]!
-; CHECK-NEXT:    vst1.64 {d24, d25}, [r0:128]
+; CHECK-NEXT:    vst1.64 {d24, d25}, [r0:128], r1
+; CHECK-NEXT:    vst1.64 {d18, d19}, [r0:128]!
+; CHECK-NEXT:    vst1.64 {d20, d21}, [r0:128]
 ; CHECK-NEXT:    add sp, sp, #8
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11}
 ; CHECK-NEXT:    add sp, sp, #4
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
index 611a9c1500d82..a691fd553665c 100644
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-double.ll
@@ -129,8 +129,7 @@ define double @copysign_d(double %a, double %b) {
 ; VFP: bfi r1, [[REG]], #31, #1
 ; NEON:         vmov.i32 d16, #0x80000000
 ; NEON-NEXT:    vshl.i64 d16, d16, #32
-; NEON-NEXT:    vbsl d16, d1, d0
-; NEON-NEXT:    vorr d0, d16, d16
+; NEON-NEXT:    vbit d0, d1, d16
 ; NEON-NEXT:    bx lr
   %1 = call double @llvm.copysign.f64(double %a, double %b)
   ret double %1
diff --git a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
index 5e8276f071159..ac55b00ddfb1d 100644
--- a/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
+++ b/llvm/test/CodeGen/Thumb2/float-intrinsics-float.ll
@@ -124,18 +124,18 @@ define float @copysign_f(float %a, float %b) {
 ; VFP: lsrs [[REG:r[0-9]+]], r{{[0-9]+}}, #31
 ; VFP: bfi r{{[0-9]+}}, [[REG]], #31, #1
 ; NEON-A7:       @ %bb.0:
-; NEON-A7-NEXT:    vmov.f32 s4, s1
+; NEON-A7-NEXT:    vmov.f32 s2, s1
 ; NEON-A7-NEXT:    @ kill: def $s0 killed $s0 def $d0
-; NEON-A7-NEXT:    vmov.i32 d1, #0x80000000
-; NEON-A7-NEXT:    vbsl d1, d2, d0
-; NEON-A7-NEXT:    vmov.f32 s0, s2
+; NEON-A7-NEXT:    vmov.i32 d16, #0x80000000
+; NEON-A7-NEXT:    vbit d0, d1, d16
+; NEON-A7-NEXT:    @ kill: def $s0 killed $s0 killed $d0
 ; NEON-A7-NEXT:    bx lr
 ; NEON-A57:       @ %bb.0:
-; NEON-A57-NEXT:    vmov.f32 s4, s1
-; NEON-A57-NEXT:    vmov.i32 d1, #0x80000000
+; NEON-A57-NEXT:    vmov.f32 s2, s1
+; NEON-A57-NEXT:    vmov.i32 d16, #0x80000000
 ; NEON-A57-NEXT:    @ kill: def $s0 killed $s0 def $d0
-; NEON-A57-NEXT:    vbsl d1, d2, d0
-; NEON-A57-NEXT:    vmov.f32 s0, s2
+; NEON-A57-NEXT:    vbit d0, d1, d16
+; NEON-A57-NEXT:    @ kill: def $s0 killed $s0 killed $d0
 ; NEON-A57-NEXT:    bx lr
   %1 = call float @llvm.copysign.f32(float %a, float %b)
   ret float %1
diff --git a/llvm/test/MC/ARM/neon-bitwise-encoding.s b/llvm/test/MC/ARM/neon-bitwise-encoding.s
index d142dbabec9e9..7f3a5a02c9ead 100644
--- a/llvm/test/MC/ARM/neon-bitwise-encoding.s
+++ b/llvm/test/MC/ARM/neon-bitwise-encoding.s
@@ -101,10 +101,17 @@
 
 	vbsl	d18, d17, d16
 	vbsl	q8, q10, q9
+	vbit	d18, d17, d16
+	vbit	q8, q10, q9
+	vbif	d18, d17, d16
+	vbif	q8, q10, q9
 
 @ CHECK: vbsl	d18, d17, d16           @ encoding: [0xb0,0x21,0x51,0xf3]
 @ CHECK: vbsl	q8, q10, q9             @ encoding: [0xf2,0x01,0x54,0xf3]
-
+@ CHECK: vbit	d18, d17, d16           @ encoding: [0xb0,0x21,0x61,0xf3]
+@ CHECK: vbit	q8, q10, q9             @ encoding: [0xf2,0x01,0x64,0xf3]
+@ CHECK: vbif	d18, d17, d16           @ encoding: [0xb0,0x21,0x71,0xf3]
+@ CHECK: vbif	q8, q10, q9             @ encoding: [0xf2,0x01,0x74,0xf3]
 
 @ Size suffices are optional.
         veor q4, q7, q3
diff --git a/llvm/test/MC/ARM/neont2-bitwise-encoding.s b/llvm/test/MC/ARM/neont2-bitwise-encoding.s
index 175873b69718c..f5c2a90f915a2 100644
--- a/llvm/test/MC/ARM/neont2-bitwise-encoding.s
+++ b/llvm/test/MC/ARM/neont2-bitwise-encoding.s
@@ -50,6 +50,14 @@
 
 	vbsl	d18, d17, d16
 	vbsl	q8, q10, q9
+	vbit	d18, d17, d16
+	vbit	q8, q10, q9
+	vbif	d18, d17, d16
+	vbif	q8, q10, q9
 
 @ CHECK: vbsl	d18, d17, d16           @ encoding: [0x51,0xff,0xb0,0x21]
 @ CHECK: vbsl	q8, q10, q9             @ encoding: [0x54,0xff,0xf2,0x01]
+@ CHECK: vbit	d18, d17, d16           @ encoding: [0x61,0xff,0xb0,0x21]
+@ CHECK: vbit	q8, q10, q9             @ encoding: [0x64,0xff,0xf2,0x01]
+@ CHECK: vbif	d18, d17, d16           @ encoding: [0x71,0xff,0xb0,0x21]
+@ CHECK: vbif	q8, q10, q9             @ encoding: [0x74,0xff,0xf2,0x01]
diff --git a/llvm/test/MC/Disassembler/ARM/neon-tests.txt b/llvm/test/MC/Disassembler/ARM/neon-tests.txt
index 65e9954ac68b5..515697b74705b 100644
--- a/llvm/test/MC/Disassembler/ARM/neon-tests.txt
+++ b/llvm/test/MC/Disassembler/ARM/neon-tests.txt
@@ -3,6 +3,9 @@
 # CHECK:	vbif	q15, q7, q0
 0x50 0xe1 0x7e 0xf3
 
+# CHECK:	vbit	q15, q7, q0
+0x50 0xe1 0x6e 0xf3
+
 # CHECK:	vcvt.f32.s32	q15, q0, #1
 0x50 0xee 0xff 0xf2
 
diff --git a/llvm/test/MC/Disassembler/ARM/neon.txt b/llvm/test/MC/Disassembler/ARM/neon.txt
index cd5f418b56c0b..134a535452870 100644
--- a/llvm/test/MC/Disassembler/ARM/neon.txt
+++ b/llvm/test/MC/Disassembler/ARM/neon.txt
@@ -326,6 +326,15 @@
 0xf2 0x01 0x54 0xf3
 # CHECK: vbsl	q8, q10, q9
 
+0xb0 0x21 0x61 0xf3
+# CHECK: vbit	d18, d17, d16
+0xf2 0x01 0x64 0xf3
+# CHECK: vbit	q8, q10, q9
+
+0xb0 0x21 0x71 0xf3
+# CHECK: vbif	d18, d17, d16
+0xf2 0x01 0x74 0xf3
+# CHECK: vbif	q8, q10, q9
 
 # CHECK: vceq.i8	d16, d16, d17
 # CHECK: vceq.i16	d16, d16, d17
diff --git a/llvm/test/MC/Disassembler/ARM/neont2.txt b/llvm/test/MC/Disassembler/ARM/neont2.txt
index 536095f6a37b3..fb80967af1aa0 100644
--- a/llvm/test/MC/Disassembler/ARM/neont2.txt
+++ b/llvm/test/MC/Disassembler/ARM/neont2.txt
@@ -320,6 +320,16 @@
 0x54 0xff 0xf2 0x01
 # CHECK: vbsl	q8, q10, q9
 
+0x61 0xff 0xb0 0x21
+# CHECK: vbit	d18, d17, d16
+0x64 0xff 0xf2 0x01
+# CHECK: vbit	q8, q10, q9
+
+0x71 0xff 0xb0 0x21
+# CHECK: vbif	d18, d17, d16
+0x74 0xff 0xf2 0x01
+# CHECK: vbif	q8, q10, q9
+
 0xfb 0xff 0x20 0x07
 # CHECK: vcvt.s32.f32	d16, d16
 0xfb 0xff 0xa0 0x07

From 46c921003c2ce5f1cdc4de9ef613eb001980780c Mon Sep 17 00:00:00 2001
From: Kadir Cetinkaya <kadircet@google.com>
Date: Thu, 16 Jul 2020 11:27:31 +0200
Subject: [PATCH 465/771] [clangd] Always retrieve ProjectInfo from Base in
 OverlayCDB

Summary:
Clangd is returning current working directory for overriden commands.
This can cause inconsistencies between:
- header and the main files, as OverlayCDB only contains entries for the main
  files it direct any queries for the headers to the base, creating a
  discrepancy between the two.
- different clangd instances, as the results will be different depending on the
  timing of execution of the query and override of the command. hence clangd
  might see two different project infos for the same file between different
  invocations.
- editors and the way user has invoked it, as current working directory of
  clangd will depend on those, hence even when there's no underlying base CWD
  might change depending on the editor, or the directory user has started the
  editor in.

This patch gets rid of that discrepency by always directing queries to base or
returning llvm::None in absence of it.

For a sample bug see https://reviews.llvm.org/D83099#2154185.

Reviewers: sammccall

Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83934
---
 .../clangd/GlobalCompilationDatabase.cpp          | 10 +++-------
 .../clangd/GlobalCompilationDatabase.h            |  3 ++-
 .../unittests/GlobalCompilationDatabaseTests.cpp  | 15 ++++++++++++++-
 3 files changed, 19 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
index 5e75864ec8d44..23e8c9fe716d0 100644
--- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
+++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp
@@ -298,15 +298,11 @@ void OverlayCDB::setCompileCommand(
 }
 
 llvm::Optional<ProjectInfo> OverlayCDB::getProjectInfo(PathRef File) const {
-  {
-    std::lock_guard<std::mutex> Lock(Mutex);
-    auto It = Commands.find(removeDots(File));
-    if (It != Commands.end())
-      return ProjectInfo{};
-  }
+  // It wouldn't make much sense to treat files with overridden commands
+  // specially when we can't do the same for the (unknown) local headers they
+  // include or changing behavior mid-air after receiving an override.
   if (Base)
     return Base->getProjectInfo(File);
-
   return llvm::None;
 }
 } // namespace clangd
diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h
index e9a5417d9d69b..95677f9f8c19a 100644
--- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h
+++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h
@@ -119,7 +119,6 @@ std::unique_ptr<GlobalCompilationDatabase>
 getQueryDriverDatabase(llvm::ArrayRef<std::string> QueryDriverGlobs,
                        std::unique_ptr<GlobalCompilationDatabase> Base);
 
-
 /// Wraps another compilation database, and supports overriding the commands
 /// using an in-memory mapping.
 class OverlayCDB : public GlobalCompilationDatabase {
@@ -134,6 +133,8 @@ class OverlayCDB : public GlobalCompilationDatabase {
   llvm::Optional<tooling::CompileCommand>
   getCompileCommand(PathRef File) const override;
   tooling::CompileCommand getFallbackCommand(PathRef File) const override;
+  /// Project info is gathered purely from the inner compilation database to
+  /// ensure consistency.
   llvm::Optional<ProjectInfo> getProjectInfo(PathRef File) const override;
 
   /// Sets or clears the compilation command for a particular file.
diff --git a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp
index e68b8d727172e..ef9a299483f62 100644
--- a/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp
+++ b/clang-tools-extra/clangd/unittests/GlobalCompilationDatabaseTests.cpp
@@ -313,9 +313,22 @@ TEST(GlobalCompilationDatabaseTest, NonCanonicalFilenames) {
   llvm::sys::path::append(File, "blabla", "..", "a.cc");
 
   EXPECT_TRUE(DB.getCompileCommand(File));
-  EXPECT_TRUE(DB.getProjectInfo(File));
+  EXPECT_FALSE(DB.getProjectInfo(File));
 }
 
+TEST_F(OverlayCDBTest, GetProjectInfo) {
+  OverlayCDB DB(Base.get());
+  Path File = testPath("foo.cc");
+  Path Header = testPath("foo.h");
+
+  EXPECT_EQ(DB.getProjectInfo(File)->SourceRoot, testRoot());
+  EXPECT_EQ(DB.getProjectInfo(Header)->SourceRoot, testRoot());
+
+  // Shouldn't change after an override.
+  DB.setCompileCommand(File, tooling::CompileCommand());
+  EXPECT_EQ(DB.getProjectInfo(File)->SourceRoot, testRoot());
+  EXPECT_EQ(DB.getProjectInfo(Header)->SourceRoot, testRoot());
+}
 } // namespace
 } // namespace clangd
 } // namespace clang

From fb432a51f430e3f2324c6a180bdbaa3627e0a0c8 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 12:52:55 +0300
Subject: [PATCH 466/771] Reland "[NFCI] createCFGSimplificationPass(): migrate
 to also take SimplifyCFGOptions"

This reverts commit 1067d3e176ea7b0b1942c163bf8c6c90107768c1,
which reverted commit b2018198c32a0535bb1f5bb5b40fbcf50d8d47b7,
because it introduced a Dependency Cycle between Transforms/Scalar and
Transforms/Utils.

So let's just move SimplifyCFGOptions.h into Utils/, thus avoiding
the cycle.
---
 llvm/include/llvm/Transforms/Scalar.h         |  4 +-
 .../llvm/Transforms/Scalar/SimplifyCFG.h      |  2 +-
 llvm/include/llvm/Transforms/Utils/Local.h    | 68 +--------------
 .../Transforms/Utils/SimplifyCFGOptions.h     | 86 +++++++++++++++++++
 .../Target/AArch64/AArch64TargetMachine.cpp   |  6 +-
 llvm/lib/Target/ARM/ARMTargetMachine.cpp      |  2 +-
 .../Target/Hexagon/HexagonTargetMachine.cpp   |  6 +-
 .../lib/Transforms/IPO/PassManagerBuilder.cpp |  6 +-
 llvm/lib/Transforms/Scalar/Scalar.cpp         |  2 +-
 .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 13 +--
 10 files changed, 110 insertions(+), 85 deletions(-)
 create mode 100644 llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h

diff --git a/llvm/include/llvm/Transforms/Scalar.h b/llvm/include/llvm/Transforms/Scalar.h
index a1aacec769794..7f5583570a44f 100644
--- a/llvm/include/llvm/Transforms/Scalar.h
+++ b/llvm/include/llvm/Transforms/Scalar.h
@@ -14,6 +14,7 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_H
 #define LLVM_TRANSFORMS_SCALAR_H
 
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
 #include <functional>
 
 namespace llvm {
@@ -256,8 +257,7 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1);
 // simplify terminator instructions, convert switches to lookup tables, etc.
 //
 FunctionPass *createCFGSimplificationPass(
-    unsigned Threshold = 1, bool ForwardSwitchCond = false,
-    bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false,
+    SimplifyCFGOptions Options = SimplifyCFGOptions(),
     std::function<bool(const Function &)> Ftor = nullptr);
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
index f9792d38bbe6b..3625eb176b830 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -14,9 +14,9 @@
 #ifndef LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
 #define LLVM_TRANSFORMS_SCALAR_SIMPLIFYCFG_H
 
-#include "llvm/Transforms/Utils/Local.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/PassManager.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
 
 namespace llvm {
 
diff --git a/llvm/include/llvm/Transforms/Utils/Local.h b/llvm/include/llvm/Transforms/Utils/Local.h
index 3fab3bc21a078..5cc8d1fa74376 100644
--- a/llvm/include/llvm/Transforms/Utils/Local.h
+++ b/llvm/include/llvm/Transforms/Utils/Local.h
@@ -30,6 +30,7 @@
 #include "llvm/IR/Value.h"
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Support/Casting.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
 #include <cstdint>
 #include <limits>
 
@@ -58,73 +59,6 @@ class StoreInst;
 class TargetLibraryInfo;
 class TargetTransformInfo;
 
-/// A set of parameters used to control the transforms in the SimplifyCFG pass.
-/// Options may change depending on the position in the optimization pipeline.
-/// For example, canonical form that includes switches and branches may later be
-/// replaced by lookup tables and selects.
-struct SimplifyCFGOptions {
-  int BonusInstThreshold;
-  bool ForwardSwitchCondToPhi;
-  bool ConvertSwitchToLookupTable;
-  bool NeedCanonicalLoop;
-  bool SinkCommonInsts;
-  bool SimplifyCondBranch;
-  bool FoldTwoEntryPHINode;
-
-  AssumptionCache *AC;
-
-  SimplifyCFGOptions(unsigned BonusThreshold = 1,
-                     bool ForwardSwitchCond = false,
-                     bool SwitchToLookup = false, bool CanonicalLoops = true,
-                     bool SinkCommon = false,
-                     AssumptionCache *AssumpCache = nullptr,
-                     bool SimplifyCondBranch = true,
-                     bool FoldTwoEntryPHINode = true)
-      : BonusInstThreshold(BonusThreshold),
-        ForwardSwitchCondToPhi(ForwardSwitchCond),
-        ConvertSwitchToLookupTable(SwitchToLookup),
-        NeedCanonicalLoop(CanonicalLoops),
-        SinkCommonInsts(SinkCommon),
-        SimplifyCondBranch(SimplifyCondBranch),
-        FoldTwoEntryPHINode(FoldTwoEntryPHINode),
-        AC(AssumpCache) {}
-
-  // Support 'builder' pattern to set members by name at construction time.
-  SimplifyCFGOptions &bonusInstThreshold(int I) {
-    BonusInstThreshold = I;
-    return *this;
-  }
-  SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) {
-    ForwardSwitchCondToPhi = B;
-    return *this;
-  }
-  SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
-    ConvertSwitchToLookupTable = B;
-    return *this;
-  }
-  SimplifyCFGOptions &needCanonicalLoops(bool B) {
-    NeedCanonicalLoop = B;
-    return *this;
-  }
-  SimplifyCFGOptions &sinkCommonInsts(bool B) {
-    SinkCommonInsts = B;
-    return *this;
-  }
-  SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
-    AC = Cache;
-    return *this;
-  }
-  SimplifyCFGOptions &setSimplifyCondBranch(bool B) {
-    SimplifyCondBranch = B;
-    return *this;
-  }
-
-  SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) {
-    FoldTwoEntryPHINode = B;
-    return *this;
-  }
-};
-
 //===----------------------------------------------------------------------===//
 //  Local constant propagation.
 //
diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
new file mode 100644
index 0000000000000..75cd2582f9981
--- /dev/null
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
@@ -0,0 +1,86 @@
+//===- SimplifyCFGOptions.h - Control structure for SimplifyCFG -*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A set of parameters used to control the transforms in the SimplifyCFG pass.
+// Options may change depending on the position in the optimization pipeline.
+// For example, canonical form that includes switches and branches may later be
+// replaced by lookup tables and selects.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H
+#define LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H
+
+namespace llvm {
+
+class AssumptionCache;
+
+struct SimplifyCFGOptions {
+  int BonusInstThreshold;
+  bool ForwardSwitchCondToPhi;
+  bool ConvertSwitchToLookupTable;
+  bool NeedCanonicalLoop;
+  bool SinkCommonInsts;
+  bool SimplifyCondBranch;
+  bool FoldTwoEntryPHINode;
+
+  AssumptionCache *AC;
+
+  SimplifyCFGOptions(unsigned BonusThreshold = 1,
+                     bool ForwardSwitchCond = false,
+                     bool SwitchToLookup = false, bool CanonicalLoops = true,
+                     bool SinkCommon = false,
+                     AssumptionCache *AssumpCache = nullptr,
+                     bool SimplifyCondBranch = true,
+                     bool FoldTwoEntryPHINode = true)
+      : BonusInstThreshold(BonusThreshold),
+        ForwardSwitchCondToPhi(ForwardSwitchCond),
+        ConvertSwitchToLookupTable(SwitchToLookup),
+        NeedCanonicalLoop(CanonicalLoops), SinkCommonInsts(SinkCommon),
+        SimplifyCondBranch(SimplifyCondBranch),
+        FoldTwoEntryPHINode(FoldTwoEntryPHINode), AC(AssumpCache) {}
+
+  // Support 'builder' pattern to set members by name at construction time.
+  SimplifyCFGOptions &bonusInstThreshold(int I) {
+    BonusInstThreshold = I;
+    return *this;
+  }
+  SimplifyCFGOptions &forwardSwitchCondToPhi(bool B) {
+    ForwardSwitchCondToPhi = B;
+    return *this;
+  }
+  SimplifyCFGOptions &convertSwitchToLookupTable(bool B) {
+    ConvertSwitchToLookupTable = B;
+    return *this;
+  }
+  SimplifyCFGOptions &needCanonicalLoops(bool B) {
+    NeedCanonicalLoop = B;
+    return *this;
+  }
+  SimplifyCFGOptions &sinkCommonInsts(bool B) {
+    SinkCommonInsts = B;
+    return *this;
+  }
+  SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
+    AC = Cache;
+    return *this;
+  }
+  SimplifyCFGOptions &setSimplifyCondBranch(bool B) {
+    SimplifyCondBranch = B;
+    return *this;
+  }
+
+  SimplifyCFGOptions &setFoldTwoEntryPHINode(bool B) {
+    FoldTwoEntryPHINode = B;
+    return *this;
+  }
+};
+
+} // namespace llvm
+
+#endif // LLVM_TRANSFORMS_UTILS_SIMPLIFYCFGOPTIONS_H
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index a63b9a97ada55..b0cef9b66e017 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -453,7 +453,11 @@ void AArch64PassConfig::addIRPasses() {
   // determine whether it succeeded. We can exploit existing control-flow in
   // ldrex/strex loops to simplify this, but it needs tidying up.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
-    addPass(createCFGSimplificationPass(1, true, true, false, true));
+    addPass(createCFGSimplificationPass(SimplifyCFGOptions()
+                                            .forwardSwitchCondToPhi(true)
+                                            .convertSwitchToLookupTable(true)
+                                            .needCanonicalLoops(false)
+                                            .sinkCommonInsts(true)));
 
   // Run LoopDataPrefetch
   //
diff --git a/llvm/lib/Target/ARM/ARMTargetMachine.cpp b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
index 9ead5fa4308c3..b316b1041f2c5 100644
--- a/llvm/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/llvm/lib/Target/ARM/ARMTargetMachine.cpp
@@ -409,7 +409,7 @@ void ARMPassConfig::addIRPasses() {
   // ldrex/strex loops to simplify this, but it needs tidying up.
   if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
     addPass(createCFGSimplificationPass(
-        1, false, false, true, true, [this](const Function &F) {
+        SimplifyCFGOptions().sinkCommonInsts(true), [this](const Function &F) {
           const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
           return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
         }));
diff --git a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
index 3fe42ea13f51b..49d98622d946c 100644
--- a/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
+++ b/llvm/lib/Target/Hexagon/HexagonTargetMachine.cpp
@@ -320,7 +320,11 @@ void HexagonPassConfig::addIRPasses() {
 
   if (!NoOpt) {
     if (EnableInitialCFGCleanup)
-      addPass(createCFGSimplificationPass(1, true, true, false, true));
+      addPass(createCFGSimplificationPass(SimplifyCFGOptions()
+                                              .forwardSwitchCondToPhi(true)
+                                              .convertSwitchToLookupTable(true)
+                                              .needCanonicalLoops(false)
+                                              .sinkCommonInsts(true)));
     if (EnableLoopPrefetch)
       addPass(createLoopDataPrefetchPass());
     if (EnableCommGEP)
diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index 24814370f57aa..f5ba86b04ed06 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -784,7 +784,11 @@ void PassManagerBuilder::populateModulePassManager(
   // convert to more optimized IR using more aggressive simplify CFG options.
   // The extra sinking transform can create larger basic blocks, so do this
   // before SLP vectorization.
-  MPM.add(createCFGSimplificationPass(1, true, true, false, true));
+  MPM.add(createCFGSimplificationPass(SimplifyCFGOptions()
+                                          .forwardSwitchCondToPhi(true)
+                                          .convertSwitchToLookupTable(true)
+                                          .needCanonicalLoops(false)
+                                          .sinkCommonInsts(true)));
 
   if (SLPVectorize) {
     MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
diff --git a/llvm/lib/Transforms/Scalar/Scalar.cpp b/llvm/lib/Transforms/Scalar/Scalar.cpp
index 9d088547b4369..42f79d89f0a28 100644
--- a/llvm/lib/Transforms/Scalar/Scalar.cpp
+++ b/llvm/lib/Transforms/Scalar/Scalar.cpp
@@ -139,7 +139,7 @@ void LLVMAddAlignmentFromAssumptionsPass(LLVMPassManagerRef PM) {
 }
 
 void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
-  unwrap(PM)->add(createCFGSimplificationPass(1, false, false, true));
+  unwrap(PM)->add(createCFGSimplificationPass());
 }
 
 void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 4187d5b55adf4..d48d5408dd3c3 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -40,6 +40,7 @@
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/SimplifyCFG.h"
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Transforms/Utils/SimplifyCFGOptions.h"
 #include <utility>
 using namespace llvm;
 
@@ -304,15 +305,7 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
 
 // Public interface to the CFGSimplification pass
 FunctionPass *
-llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
-                                  bool ConvertSwitch, bool KeepLoops,
-                                  bool SinkCommon,
+llvm::createCFGSimplificationPass(SimplifyCFGOptions Options,
                                   std::function<bool(const Function &)> Ftor) {
-  return new CFGSimplifyPass(SimplifyCFGOptions()
-                                 .bonusInstThreshold(Threshold)
-                                 .forwardSwitchCondToPhi(ForwardSwitchCond)
-                                 .convertSwitchToLookupTable(ConvertSwitch)
-                                 .needCanonicalLoops(KeepLoops)
-                                 .sinkCommonInsts(SinkCommon),
-                             std::move(Ftor));
+  return new CFGSimplifyPass(Options, std::move(Ftor));
 }

From 4028409d77f9ea77cec115104409cca605841728 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 13:18:45 +0300
Subject: [PATCH 467/771] Reland "[NFC] SimplifyCFGOptions: drop
 multi-parameter ctor, use default member-init"

This reverts commit 5831e86190966d58385678eb74b26aefacbfd101,
which reverted commit 90c1b0442a031d6cad686fdc4e5d3db03c3603a6
in preparation for reverting
commit b2018198c32a0535bb1f5bb5b40fbcf50d8d47b7 in
commit 1067d3e176ea7b0b1942c163bf8c6c90107768c1 due to the introducton
of a dependency cycle.

Now that the other revert is reverted with a fix, this can be relanded.
---
 .../Transforms/Utils/SimplifyCFGOptions.h     | 30 +++++--------------
 .../AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp  |  2 +-
 2 files changed, 9 insertions(+), 23 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
index 75cd2582f9981..ca9a7e7223dbe 100644
--- a/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
+++ b/llvm/include/llvm/Transforms/Utils/SimplifyCFGOptions.h
@@ -21,29 +21,15 @@ namespace llvm {
 class AssumptionCache;
 
 struct SimplifyCFGOptions {
-  int BonusInstThreshold;
-  bool ForwardSwitchCondToPhi;
-  bool ConvertSwitchToLookupTable;
-  bool NeedCanonicalLoop;
-  bool SinkCommonInsts;
-  bool SimplifyCondBranch;
-  bool FoldTwoEntryPHINode;
+  int BonusInstThreshold = 1;
+  bool ForwardSwitchCondToPhi = false;
+  bool ConvertSwitchToLookupTable = false;
+  bool NeedCanonicalLoop = true;
+  bool SinkCommonInsts = false;
+  bool SimplifyCondBranch = true;
+  bool FoldTwoEntryPHINode = true;
 
-  AssumptionCache *AC;
-
-  SimplifyCFGOptions(unsigned BonusThreshold = 1,
-                     bool ForwardSwitchCond = false,
-                     bool SwitchToLookup = false, bool CanonicalLoops = true,
-                     bool SinkCommon = false,
-                     AssumptionCache *AssumpCache = nullptr,
-                     bool SimplifyCondBranch = true,
-                     bool FoldTwoEntryPHINode = true)
-      : BonusInstThreshold(BonusThreshold),
-        ForwardSwitchCondToPhi(ForwardSwitchCond),
-        ConvertSwitchToLookupTable(SwitchToLookup),
-        NeedCanonicalLoop(CanonicalLoops), SinkCommonInsts(SinkCommon),
-        SimplifyCondBranch(SimplifyCondBranch),
-        FoldTwoEntryPHINode(FoldTwoEntryPHINode), AC(AssumpCache) {}
+  AssumptionCache *AC = nullptr;
 
   // Support 'builder' pattern to set members by name at construction time.
   SimplifyCFGOptions &bonusInstThreshold(int I) {
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
index 418296684d765..3c375e0575255 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUUnifyDivergentExitNodes.cpp
@@ -187,7 +187,7 @@ static BasicBlock *unifyReturnBlockSet(Function &F,
 
   for (BasicBlock *BB : ReturningBlocks) {
     // Cleanup possible branch to unconditional branch to the return.
-    simplifyCFG(BB, TTI, {2});
+    simplifyCFG(BB, TTI, SimplifyCFGOptions().bonusInstThreshold(2));
   }
 
   return NewRetBlock;

From b2dda33034934ead00ff8601ce5df487b0e760e9 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 13:24:12 +0300
Subject: [PATCH 468/771] [NFC] SimplifyCFGPass::SimplifyCFGPass(): use default
 SimplifyCFGOptions - we aren't deviating from them here

---
 llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
index 3625eb176b830..978562186ebae 100644
--- a/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
+++ b/llvm/include/llvm/Transforms/Scalar/SimplifyCFG.h
@@ -34,13 +34,7 @@ class SimplifyCFGPass : public PassInfoMixin<SimplifyCFGPass> {
   /// rather than optimal IR. That is, by default we bypass transformations that
   /// are likely to improve performance but make analysis for other passes more
   /// difficult.
-  SimplifyCFGPass()
-      : SimplifyCFGPass(SimplifyCFGOptions()
-                            .forwardSwitchCondToPhi(false)
-                            .convertSwitchToLookupTable(false)
-                            .needCanonicalLoops(true)
-                            .sinkCommonInsts(false)) {}
-
+  SimplifyCFGPass() {}
 
   /// Construct a pass with optional optimizations.
   SimplifyCFGPass(const SimplifyCFGOptions &PassOptions);

From 740a1da108ab9097268b509c85ed9ede7f4d5df5 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 13:25:44 +0300
Subject: [PATCH 469/771] [NFC] SimplifyCFG: refactor/deduplicate command-line
 settings override handling

---
 .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 44 +++++++------------
 1 file changed, 15 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index d48d5408dd3c3..9d810a1ecfa6b 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -213,22 +213,21 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
 }
 
 // Command-line settings override compile-time settings.
+static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
+  if (UserBonusInstThreshold.getNumOccurrences())
+    Options.BonusInstThreshold = UserBonusInstThreshold;
+  if (UserForwardSwitchCond.getNumOccurrences())
+    Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
+  if (UserSwitchToLookup.getNumOccurrences())
+    Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
+  if (UserKeepLoops.getNumOccurrences())
+    Options.NeedCanonicalLoop = UserKeepLoops;
+  if (UserSinkCommonInsts.getNumOccurrences())
+    Options.SinkCommonInsts = UserSinkCommonInsts;
+}
+
 SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) {
-  Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences()
-                                   ? UserBonusInstThreshold
-                                   : Opts.BonusInstThreshold;
-  Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences()
-                                       ? UserForwardSwitchCond
-                                       : Opts.ForwardSwitchCondToPhi;
-  Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences()
-                                           ? UserSwitchToLookup
-                                           : Opts.ConvertSwitchToLookupTable;
-  Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
-                                  ? UserKeepLoops
-                                  : Opts.NeedCanonicalLoop;
-  Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
-                                ? UserSinkCommonInsts
-                                : Opts.SinkCommonInsts;
+  applyCommandLineOverridesToOptions(Options);
 }
 
 PreservedAnalyses SimplifyCFGPass::run(Function &F,
@@ -255,20 +254,7 @@ struct CFGSimplifyPass : public FunctionPass {
     initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
 
     // Check for command-line overrides of options for debug/customization.
-    if (UserBonusInstThreshold.getNumOccurrences())
-      Options.BonusInstThreshold = UserBonusInstThreshold;
-
-    if (UserForwardSwitchCond.getNumOccurrences())
-      Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
-
-    if (UserSwitchToLookup.getNumOccurrences())
-      Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
-
-    if (UserKeepLoops.getNumOccurrences())
-      Options.NeedCanonicalLoop = UserKeepLoops;
-
-    if (UserSinkCommonInsts.getNumOccurrences())
-      Options.SinkCommonInsts = UserSinkCommonInsts;
+    applyCommandLineOverridesToOptions(Options);
   }
 
   bool runOnFunction(Function &F) override {

From a130cf8ae8ab56ba1cfa7edc52b637c9d0c3fd38 Mon Sep 17 00:00:00 2001
From: Ilya Golovenko <ilya.golovenko@huawei.com>
Date: Thu, 16 Jul 2020 12:47:44 +0200
Subject: [PATCH 470/771] [clang] Fix printing of lambdas with capture
 expressions

Patch by @walrus !

Reviewers: lattner, kadircet

Reviewed By: kadircet

Subscribers: riccibruno, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83855
---
 clang/lib/AST/StmtPrinter.cpp         | 19 +++++++++++++++++--
 clang/test/AST/ast-printer-lambda.cpp | 20 ++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)

diff --git a/clang/lib/AST/StmtPrinter.cpp b/clang/lib/AST/StmtPrinter.cpp
index f797f5fe8e6d1..ea160025ae3dc 100644
--- a/clang/lib/AST/StmtPrinter.cpp
+++ b/clang/lib/AST/StmtPrinter.cpp
@@ -2005,8 +2005,23 @@ void StmtPrinter::VisitLambdaExpr(LambdaExpr *Node) {
     if (C->isPackExpansion())
       OS << "...";
 
-    if (Node->isInitCapture(C))
-      PrintExpr(C->getCapturedVar()->getInit());
+    if (Node->isInitCapture(C)) {
+      VarDecl *D = C->getCapturedVar();
+
+      llvm::StringRef Pre;
+      llvm::StringRef Post;
+      if (D->getInitStyle() == VarDecl::CallInit &&
+          !isa<ParenListExpr>(D->getInit())) {
+        Pre = "(";
+        Post = ")";
+      } else if (D->getInitStyle() == VarDecl::CInit) {
+        Pre = " = ";
+      }
+
+      OS << Pre;
+      PrintExpr(D->getInit());
+      OS << Post;
+    }
   }
   OS << ']';
 
diff --git a/clang/test/AST/ast-printer-lambda.cpp b/clang/test/AST/ast-printer-lambda.cpp
index 27a361da5cb18..08f1ff555b0b1 100644
--- a/clang/test/AST/ast-printer-lambda.cpp
+++ b/clang/test/AST/ast-printer-lambda.cpp
@@ -15,6 +15,18 @@ void test1(int i, T... t) {
   auto lambda = [&]{};
   //CHECK: [&] {
 }
+{
+  auto lambda = [k{i}] {};
+  //CHECK: [k{i}] {
+}
+{
+  auto lambda = [k(i)] {};
+  //CHECK: [k(i)] {
+}
+{
+  auto lambda = [k = i] {};
+  //CHECK: [k = i] {
+}
 {
   auto lambda = [t..., i]{};
   //CHECK: [t..., i] {
@@ -31,6 +43,14 @@ void test1(int i, T... t) {
   auto lambda = [t..., this]{};
   //CHECK: [t..., this] {
 }
+{
+  auto lambda = [k(t...)] {};
+  //CHECK: [k(t...)] {
+}
+{
+  auto lambda = [k{t...}] {};
+  //CHECK: [k{t...}] {
+}
 }
 
 };
\ No newline at end of file

From ed605b7b96b93a4418ec124552654785574e9f71 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Thu, 16 Jul 2020 13:33:01 +0300
Subject: [PATCH 471/771] [yaml2obj] - Fix an issue with NoHeaders key.

When setting the NoHeaders to false,
the e_shnum field wasn't set correctly.

This patch fixes this bug.

Differential revision: https://reviews.llvm.org/D83941
---
 llvm/lib/ObjectYAML/ELFEmitter.cpp                |  3 ++-
 llvm/test/tools/yaml2obj/ELF/section-headers.yaml | 11 +++++++++--
 2 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index f9f2f128e2e82..a7f4a5d252372 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -432,7 +432,8 @@ void ELFState<ELFT>::writeELFHeader(raw_ostream &OS, uint64_t SHOff) {
 
   if (Doc.Header.EShNum)
     Header.e_shnum = *Doc.Header.EShNum;
-  else if (!Doc.SectionHeaders)
+  else if (!Doc.SectionHeaders ||
+           (Doc.SectionHeaders->NoHeaders && !*Doc.SectionHeaders->NoHeaders))
     Header.e_shnum = Doc.getSections().size();
   else if (NoShdrs)
     Header.e_shnum = 0;
diff --git a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
index f593d76929fef..97288cb659a29 100644
--- a/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
+++ b/llvm/test/tools/yaml2obj/ELF/section-headers.yaml
@@ -122,13 +122,20 @@ SectionHeaderTable:
 ## Test that we are able to set NoHeaders to false. In this case the tool produces an output
 ## as if there were no `SectionHeaderTable` key at all.
 # RUN: yaml2obj %s --docnum=3 -DNOHEADERS=false -o %t3.2
-# RUN: llvm-readelf --file-headers %t3.2 | FileCheck %s --check-prefix=NO-HEADERS-FALSE
+# RUN: llvm-readelf --file-headers --sections %t3.2 | FileCheck %s --check-prefix=NO-HEADERS-FALSE
 
 # NO-HEADERS-FALSE: Start of section headers:          96 (bytes into file)
 # NO-HEADERS-FALSE: Size of section headers:           64 (bytes)
-# NO-HEADERS-FALSE: Number of section headers:         1
+# NO-HEADERS-FALSE: Number of section headers:         4
 # NO-HEADERS-FALSE: Section header string table index: 3
 
+# NO-HEADERS-FALSE:      Section Headers:
+# NO-HEADERS-FALSE-NEXT:  [Nr] Name      Type     Address          Off    Size   ES Flg Lk Inf Al
+# NO-HEADERS-FALSE-NEXT:  [ 0]           NULL     0000000000000000 000000 000000 00      0   0  0
+# NO-HEADERS-FALSE-NEXT:  [ 1] .foo      PROGBITS 0000000000000000 000040 000000 00      0   0  0
+# NO-HEADERS-FALSE-NEXT:  [ 2] .strtab   STRTAB   0000000000000000 000040 000001 00      0   0  1
+# NO-HEADERS-FALSE-NEXT:  [ 3] .shstrtab STRTAB   0000000000000000 000041 000018 00      0   0  1
+
 ## Check we do not allow using "Sections" together with "NoHeaders".
 # RUN: not yaml2obj %s --docnum=4 -DNOHEADERS=true -o /dev/null 2>&1 | FileCheck %s --check-prefix=SECTIONS-NO-HEADERS
 # RUN: not yaml2obj %s --docnum=4 -DNOHEADERS=false -o /dev/null 2>&1 | FileCheck %s --check-prefix=SECTIONS-NO-HEADERS

From ff2f5c3e58a9dfcea19eddeb8894284838df0379 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 14:26:32 +0300
Subject: [PATCH 472/771] Revert "[NFC] SimplifyCFG: refactor/deduplicate
 command-line settings override handling"

Seems to be breaking the bots.
This reverts commit 740a1da108ab9097268b509c85ed9ede7f4d5df5.
---
 .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 44 ++++++++++++-------
 1 file changed, 29 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index 9d810a1ecfa6b..d48d5408dd3c3 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -213,21 +213,22 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
 }
 
 // Command-line settings override compile-time settings.
-static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
-  if (UserBonusInstThreshold.getNumOccurrences())
-    Options.BonusInstThreshold = UserBonusInstThreshold;
-  if (UserForwardSwitchCond.getNumOccurrences())
-    Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
-  if (UserSwitchToLookup.getNumOccurrences())
-    Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
-  if (UserKeepLoops.getNumOccurrences())
-    Options.NeedCanonicalLoop = UserKeepLoops;
-  if (UserSinkCommonInsts.getNumOccurrences())
-    Options.SinkCommonInsts = UserSinkCommonInsts;
-}
-
 SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) {
-  applyCommandLineOverridesToOptions(Options);
+  Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences()
+                                   ? UserBonusInstThreshold
+                                   : Opts.BonusInstThreshold;
+  Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences()
+                                       ? UserForwardSwitchCond
+                                       : Opts.ForwardSwitchCondToPhi;
+  Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences()
+                                           ? UserSwitchToLookup
+                                           : Opts.ConvertSwitchToLookupTable;
+  Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
+                                  ? UserKeepLoops
+                                  : Opts.NeedCanonicalLoop;
+  Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
+                                ? UserSinkCommonInsts
+                                : Opts.SinkCommonInsts;
 }
 
 PreservedAnalyses SimplifyCFGPass::run(Function &F,
@@ -254,7 +255,20 @@ struct CFGSimplifyPass : public FunctionPass {
     initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
 
     // Check for command-line overrides of options for debug/customization.
-    applyCommandLineOverridesToOptions(Options);
+    if (UserBonusInstThreshold.getNumOccurrences())
+      Options.BonusInstThreshold = UserBonusInstThreshold;
+
+    if (UserForwardSwitchCond.getNumOccurrences())
+      Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
+
+    if (UserSwitchToLookup.getNumOccurrences())
+      Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
+
+    if (UserKeepLoops.getNumOccurrences())
+      Options.NeedCanonicalLoop = UserKeepLoops;
+
+    if (UserSinkCommonInsts.getNumOccurrences())
+      Options.SinkCommonInsts = UserSinkCommonInsts;
   }
 
   bool runOnFunction(Function &F) override {

From ffd8f009311965cfaa75594f98da351350ea0df4 Mon Sep 17 00:00:00 2001
From: AndreyChurbanov <andrey.churbanov@intel.com>
Date: Thu, 16 Jul 2020 14:28:09 +0300
Subject: [PATCH 473/771] [openmp] libomp: added itt notifications for task,
 taskwait, taskgroup

Add releasing->acquire edges for child task->taskwait and
child task->end of taskgroup.

Differential Revision: https://reviews.llvm.org/D83804
---
 openmp/runtime/src/kmp_runtime.cpp | 33 ++++++++++++++++++++++++++++++
 openmp/runtime/src/kmp_tasking.cpp | 33 ++++++++++++++++++++++--------
 2 files changed, 57 insertions(+), 9 deletions(-)

diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index e0c8cf2410440..a53920436901f 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -4301,6 +4301,39 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
 
   TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr);
 
+#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
+  // suppress race conditions detection on synchronization flags in debug mode
+  // this helps to analyze library internals eliminating false positives
+  __itt_suppress_mark_range(
+      __itt_suppress_range, __itt_suppress_threading_errors,
+      &new_thr->th.th_sleep_loc, sizeof(new_thr->th.th_sleep_loc));
+  __itt_suppress_mark_range(
+      __itt_suppress_range, __itt_suppress_threading_errors,
+      &new_thr->th.th_reap_state, sizeof(new_thr->th.th_reap_state));
+#if KMP_OS_WINDOWS
+  __itt_suppress_mark_range(
+      __itt_suppress_range, __itt_suppress_threading_errors,
+      &new_thr->th.th_suspend_init, sizeof(new_thr->th.th_suspend_init));
+#else
+  __itt_suppress_mark_range(__itt_suppress_range,
+                            __itt_suppress_threading_errors,
+                            &new_thr->th.th_suspend_init_count,
+                            sizeof(new_thr->th.th_suspend_init_count));
+#endif
+  // TODO: check if we need to also suppress b_arrived flags
+  __itt_suppress_mark_range(__itt_suppress_range,
+                            __itt_suppress_threading_errors,
+                            CCAST(kmp_uint64 *, &new_thr->th.th_bar[0].bb.b_go),
+                            sizeof(new_thr->th.th_bar[0].bb.b_go));
+  __itt_suppress_mark_range(__itt_suppress_range,
+                            __itt_suppress_threading_errors,
+                            CCAST(kmp_uint64 *, &new_thr->th.th_bar[1].bb.b_go),
+                            sizeof(new_thr->th.th_bar[1].bb.b_go));
+  __itt_suppress_mark_range(__itt_suppress_range,
+                            __itt_suppress_threading_errors,
+                            CCAST(kmp_uint64 *, &new_thr->th.th_bar[2].bb.b_go),
+                            sizeof(new_thr->th.th_bar[2].bb.b_go));
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
   if (__kmp_storage_map) {
     __kmp_print_thread_storage_map(new_thr, new_gtid);
   }
diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp
index 2ddc2e7a6fd7d..c5a3744ad27ba 100644
--- a/openmp/runtime/src/kmp_tasking.cpp
+++ b/openmp/runtime/src/kmp_tasking.cpp
@@ -420,7 +420,8 @@ static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) {
       (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td);
   TCW_4(thread_data->td.td_deque_ntasks,
         TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count
-
+  KMP_FSYNC_RELEASING(thread->th.th_current_task); // releasing self
+  KMP_FSYNC_RELEASING(taskdata); // releasing child
   KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: "
                 "task=%p ntasks=%d head=%u tail=%u\n",
                 gtid, taskdata, thread_data->td.td_deque_ntasks,
@@ -1560,6 +1561,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
       else
         kmp_itt_count_task = 0; // thread is not on a barrier - skip timing
     }
+    KMP_FSYNC_ACQUIRED(taskdata); // acquired self (new task)
 #endif
 
 #ifdef KMP_GOMP_COMPAT
@@ -1577,11 +1579,12 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
       // Barrier imbalance - adjust arrive time with the task duration
       thread->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time);
     }
+    KMP_FSYNC_CANCEL(taskdata); // destroy self (just executed)
+    KMP_FSYNC_RELEASING(taskdata->td_parent); // releasing parent
 #endif
 
   }
 
-
   // Proxy tasks are not handled by the runtime
   if (taskdata->td_flags.proxy != TASK_PROXY) {
     ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent);
@@ -1883,6 +1886,7 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
 #if USE_ITT_BUILD
     if (itt_sync_obj != NULL)
       __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
+    KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with children
 #endif /* USE_ITT_BUILD */
 
     // Debugger:  The taskwait is completed. Location remains, but thread is
@@ -2521,6 +2525,7 @@ void __kmpc_end_taskgroup(ident_t *loc, int gtid) {
 #if USE_ITT_BUILD
     if (itt_sync_obj != NULL)
       __kmp_itt_taskwait_finished(gtid, itt_sync_obj);
+    KMP_FSYNC_ACQUIRED(taskdata); // acquire self - sync with descendants
 #endif /* USE_ITT_BUILD */
   }
   KMP_DEBUG_ASSERT(taskgroup->count == 0);
@@ -3341,15 +3346,25 @@ static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread,
     KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating "
                   "task team for team %p\n",
                   __kmp_gtid_from_thread(thread), team));
-    // Allocate a new task team if one is not available.
-    // Cannot use __kmp_thread_malloc() because threads not around for
-    // kmp_reap_task_team( ).
+    // Allocate a new task team if one is not available. Cannot use
+    // __kmp_thread_malloc because threads not around for kmp_reap_task_team.
     task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t));
     __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock);
-    // AC: __kmp_allocate zeroes returned memory
-    // task_team -> tt.tt_threads_data = NULL;
-    // task_team -> tt.tt_max_threads = 0;
-    // task_team -> tt.tt_next = NULL;
+#if USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG
+    // suppress race conditions detection on synchronization flags in debug mode
+    // this helps to analyze library internals eliminating false positives
+    __itt_suppress_mark_range(
+        __itt_suppress_range, __itt_suppress_threading_errors,
+        &task_team->tt.tt_found_tasks, sizeof(task_team->tt.tt_found_tasks));
+    __itt_suppress_mark_range(__itt_suppress_range,
+                              __itt_suppress_threading_errors,
+                              CCAST(kmp_uint32 *, &task_team->tt.tt_active),
+                              sizeof(task_team->tt.tt_active));
+#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY && KMP_DEBUG */
+    // Note: __kmp_allocate zeroes returned memory, othewise we would need:
+    // task_team->tt.tt_threads_data = NULL;
+    // task_team->tt.tt_max_threads = 0;
+    // task_team->tt.tt_next = NULL;
   }
 
   TCW_4(task_team->tt.tt_found_tasks, FALSE);

From 509351d7689c518f1c2ae8975e704a5324c39ff8 Mon Sep 17 00:00:00 2001
From: Paul Walker <paul.walker@arm.com>
Date: Thu, 16 Jul 2020 11:22:22 +0000
Subject: [PATCH 474/771] [SVE] Add lowering for scalable vector fadd, fdiv,
 fmul and fsub operations.

Lower the operations to predicated variants.  This is prep work
required for fixed length code generation but also fixes a bug
whereby these operations fail selection when "unpacked" vector
types (e.g. MVT::nxv2f32) are used.

This patch also adds the missing "unpacked" patterns for FMA.

Differential Revision: https://reviews.llvm.org/D83765
---
 .../Target/AArch64/AArch64ISelLowering.cpp    |  16 +-
 llvm/lib/Target/AArch64/AArch64ISelLowering.h |   5 +-
 .../lib/Target/AArch64/AArch64SVEInstrInfo.td |  20 +-
 llvm/lib/Target/AArch64/SVEInstrFormats.td    |  15 +-
 llvm/test/CodeGen/AArch64/sve-fp.ll           | 246 ++++++++++++++++--
 5 files changed, 267 insertions(+), 35 deletions(-)

diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 85db14ab66feb..dae347cd8c2b9 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -948,7 +948,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
         setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
         setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
         setOperationAction(ISD::SELECT, VT, Custom);
+        setOperationAction(ISD::FADD, VT, Custom);
+        setOperationAction(ISD::FDIV, VT, Custom);
         setOperationAction(ISD::FMA, VT, Custom);
+        setOperationAction(ISD::FMUL, VT, Custom);
+        setOperationAction(ISD::FSUB, VT, Custom);
       }
     }
 
@@ -1483,11 +1487,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
     MAKE_CASE(AArch64ISD::FADD_PRED)
     MAKE_CASE(AArch64ISD::FADDA_PRED)
     MAKE_CASE(AArch64ISD::FADDV_PRED)
+    MAKE_CASE(AArch64ISD::FDIV_PRED)
     MAKE_CASE(AArch64ISD::FMA_PRED)
     MAKE_CASE(AArch64ISD::FMAXV_PRED)
     MAKE_CASE(AArch64ISD::FMAXNMV_PRED)
     MAKE_CASE(AArch64ISD::FMINV_PRED)
     MAKE_CASE(AArch64ISD::FMINNMV_PRED)
+    MAKE_CASE(AArch64ISD::FMUL_PRED)
+    MAKE_CASE(AArch64ISD::FSUB_PRED)
     MAKE_CASE(AArch64ISD::NOT)
     MAKE_CASE(AArch64ISD::BIT)
     MAKE_CASE(AArch64ISD::CBZ)
@@ -3468,16 +3475,23 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
   case ISD::UMULO:
     return LowerXALUO(Op, DAG);
   case ISD::FADD:
-    if (useSVEForFixedLengthVectorVT(Op.getValueType()))
+    if (Op.getValueType().isScalableVector() ||
+        useSVEForFixedLengthVectorVT(Op.getValueType()))
       return LowerToPredicatedOp(Op, DAG, AArch64ISD::FADD_PRED);
     return LowerF128Call(Op, DAG, RTLIB::ADD_F128);
   case ISD::FSUB:
+    if (Op.getValueType().isScalableVector())
+      return LowerToPredicatedOp(Op, DAG, AArch64ISD::FSUB_PRED);
     return LowerF128Call(Op, DAG, RTLIB::SUB_F128);
   case ISD::FMUL:
+    if (Op.getValueType().isScalableVector())
+      return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMUL_PRED);
     return LowerF128Call(Op, DAG, RTLIB::MUL_F128);
   case ISD::FMA:
     return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMA_PRED);
   case ISD::FDIV:
+    if (Op.getValueType().isScalableVector())
+      return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
     return LowerF128Call(Op, DAG, RTLIB::DIV_F128);
   case ISD::FP_ROUND:
   case ISD::STRICT_FP_ROUND:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 4fe77481706b3..982dbc86d1694 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -75,9 +75,12 @@ enum NodeType : unsigned {
   // Arithmetic instructions
   ADD_PRED,
   FADD_PRED,
+  FDIV_PRED,
+  FMA_PRED,
+  FMUL_PRED,
+  FSUB_PRED,
   SDIV_PRED,
   UDIV_PRED,
-  FMA_PRED,
   SMIN_MERGE_OP1,
   UMIN_MERGE_OP1,
   SMAX_MERGE_OP1,
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 5b1990e492623..1d7b774f2ee43 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -175,7 +175,10 @@ def SDT_AArch64FMA : SDTypeProfile<1, 4, [
 // Predicated operations with the result of inactive lanes being unspecified.
 def AArch64add_p  : SDNode<"AArch64ISD::ADD_PRED",  SDT_AArch64Arith>;
 def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
+def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
 def AArch64fma_p  : SDNode<"AArch64ISD::FMA_PRED",  SDT_AArch64FMA>;
+def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
+def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
 def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
 def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
 
@@ -361,6 +364,9 @@ let Predicates = [HasSVE] in {
   defm FDIV_ZPmZ   : sve_fp_2op_p_zds<0b1101, "fdiv", "FDIV_ZPZZ", int_aarch64_sve_fdiv, DestructiveBinaryCommWithRev, "FDIVR_ZPmZ">;
 
   defm FADD_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fadd_p>;
+  defm FSUB_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fsub_p>;
+  defm FMUL_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fmul_p>;
+  defm FDIV_ZPZZ   : sve_fp_bin_pred_hfd<AArch64fdiv_p>;
 
   let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
     defm FADD_ZPZZ   : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fadd>;
@@ -377,10 +383,10 @@ let Predicates = [HasSVE] in {
     defm FDIV_ZPZZ   : sve_fp_2op_p_zds_zeroing_hsd<int_aarch64_sve_fdiv>;
   }
 
-  defm FADD_ZZZ    : sve_fp_3op_u_zd<0b000, "fadd",    fadd>;
-  defm FSUB_ZZZ    : sve_fp_3op_u_zd<0b001, "fsub",    fsub>;
-  defm FMUL_ZZZ    : sve_fp_3op_u_zd<0b010, "fmul",    fmul>;
-  defm FTSMUL_ZZZ  : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul",  int_aarch64_sve_ftsmul_x>;
+  defm FADD_ZZZ    : sve_fp_3op_u_zd<0b000, "fadd", fadd, AArch64fadd_p>;
+  defm FSUB_ZZZ    : sve_fp_3op_u_zd<0b001, "fsub", fsub, AArch64fsub_p>;
+  defm FMUL_ZZZ    : sve_fp_3op_u_zd<0b010, "fmul", fmul, AArch64fmul_p>;
+  defm FTSMUL_ZZZ  : sve_fp_3op_u_zd_ftsmul<0b011, "ftsmul", int_aarch64_sve_ftsmul_x>;
   defm FRECPS_ZZZ  : sve_fp_3op_u_zd<0b110, "frecps",  int_aarch64_sve_frecps_x>;
   defm FRSQRTS_ZZZ : sve_fp_3op_u_zd<0b111, "frsqrts", int_aarch64_sve_frsqrts_x>;
 
@@ -404,8 +410,14 @@ let Predicates = [HasSVE] in {
   // regalloc.
   def : Pat<(nxv8f16 (AArch64fma_p nxv8i1:$P, nxv8f16:$Op1, nxv8f16:$Op2, nxv8f16:$Op3)),
             (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
+  def : Pat<(nxv4f16 (AArch64fma_p nxv4i1:$P, nxv4f16:$Op1, nxv4f16:$Op2, nxv4f16:$Op3)),
+            (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
+  def : Pat<(nxv2f16 (AArch64fma_p nxv2i1:$P, nxv2f16:$Op1, nxv2f16:$Op2, nxv2f16:$Op3)),
+            (FMLA_ZPmZZ_H $P, $Op3, $Op1, $Op2)>;
   def : Pat<(nxv4f32 (AArch64fma_p nxv4i1:$P, nxv4f32:$Op1, nxv4f32:$Op2, nxv4f32:$Op3)),
             (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
+  def : Pat<(nxv2f32 (AArch64fma_p nxv2i1:$P, nxv2f32:$Op1, nxv2f32:$Op2, nxv2f32:$Op3)),
+            (FMLA_ZPmZZ_S $P, $Op3, $Op1, $Op2)>;
   def : Pat<(nxv2f64 (AArch64fma_p nxv2i1:$P, nxv2f64:$Op1, nxv2f64:$Op2, nxv2f64:$Op3)),
             (FMLA_ZPmZZ_D $P, $Op3, $Op1, $Op2)>;
 
diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td
index a005d1e65abe1..ee36ac0168003 100644
--- a/llvm/lib/Target/AArch64/SVEInstrFormats.td
+++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td
@@ -340,6 +340,12 @@ class SVE_2_Op_Pat<ValueType vtd, SDPatternOperator op, ValueType vt1,
 : Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
       (inst $Op1, $Op2)>;
 
+class SVE_2_Op_Pred_All_Active<ValueType vtd, SDPatternOperator op,
+                               ValueType pt, ValueType vt1, ValueType vt2,
+                               Instruction inst>
+: Pat<(vtd (op (pt (AArch64ptrue 31)), vt1:$Op1, vt2:$Op2)),
+      (inst $Op1, $Op2)>;
+
 class SVE_2_Op_Pat_Reduce_To_Neon<ValueType vtd, SDPatternOperator op, ValueType vt1,
                    ValueType vt2, Instruction inst, SubRegIndex sub>
 : Pat<(vtd (op vt1:$Op1, vt2:$Op2)),
@@ -1665,7 +1671,8 @@ class sve_fp_3op_u_zd<bits<2> sz, bits<3> opc, string asm, ZPRRegOp zprty>
   let Inst{4-0}   = Zd;
 }
 
-multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
+multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op,
+                           SDPatternOperator predicated_op = null_frag> {
   def _H : sve_fp_3op_u_zd<0b01, opc, asm, ZPR16>;
   def _S : sve_fp_3op_u_zd<0b10, opc, asm, ZPR32>;
   def _D : sve_fp_3op_u_zd<0b11, opc, asm, ZPR64>;
@@ -1674,6 +1681,9 @@ multiclass sve_fp_3op_u_zd<bits<3> opc, string asm, SDPatternOperator op> {
   def : SVE_2_Op_Pat<nxv4f32, op, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
   def : SVE_2_Op_Pat<nxv2f64, op, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
 
+  def : SVE_2_Op_Pred_All_Active<nxv8f16, predicated_op, nxv8i1, nxv8f16, nxv8f16, !cast<Instruction>(NAME # _H)>;
+  def : SVE_2_Op_Pred_All_Active<nxv4f32, predicated_op, nxv4i1, nxv4f32, nxv4f32, !cast<Instruction>(NAME # _S)>;
+  def : SVE_2_Op_Pred_All_Active<nxv2f64, predicated_op, nxv2i1, nxv2f64, nxv2f64, !cast<Instruction>(NAME # _D)>;
 }
 
 multiclass sve_fp_3op_u_zd_ftsmul<bits<3> opc, string asm, SDPatternOperator op> {
@@ -7804,7 +7814,10 @@ multiclass sve_fp_bin_pred_hfd<SDPatternOperator op> {
   def _UNDEF_D : PredTwoOpPseudo<NAME # _D, ZPR64, FalseLanesUndef>;
 
   def : SVE_3_Op_Pat<nxv8f16, op, nxv8i1, nxv8f16, nxv8f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+  def : SVE_3_Op_Pat<nxv4f16, op, nxv4i1, nxv4f16, nxv4f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
+  def : SVE_3_Op_Pat<nxv2f16, op, nxv2i1, nxv2f16, nxv2f16, !cast<Pseudo>(NAME # _UNDEF_H)>;
   def : SVE_3_Op_Pat<nxv4f32, op, nxv4i1, nxv4f32, nxv4f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
+  def : SVE_3_Op_Pat<nxv2f32, op, nxv2i1, nxv2f32, nxv2f32, !cast<Pseudo>(NAME # _UNDEF_S)>;
   def : SVE_3_Op_Pat<nxv2f64, op, nxv2i1, nxv2f64, nxv2f64, !cast<Pseudo>(NAME # _UNDEF_D)>;
 }
 
diff --git a/llvm/test/CodeGen/AArch64/sve-fp.ll b/llvm/test/CodeGen/AArch64/sve-fp.ll
index 6a882216bcc44..891a5c144234d 100644
--- a/llvm/test/CodeGen/AArch64/sve-fp.ll
+++ b/llvm/test/CodeGen/AArch64/sve-fp.ll
@@ -5,8 +5,8 @@
 ; If this check fails please read test/CodeGen/AArch64/README for instructions on how to resolve it.
 ; WARN-NOT: warning
 
-define <vscale x 8 x half> @fadd_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
-; CHECK-LABEL: fadd_h:
+define <vscale x 8 x half> @fadd_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fadd_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fadd z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
@@ -14,8 +14,28 @@ define <vscale x 8 x half> @fadd_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
   ret <vscale x 8 x half> %res
 }
 
-define <vscale x 4 x float> @fadd_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
-; CHECK-LABEL: fadd_s:
+define <vscale x 4 x half> @fadd_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: fadd_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = fadd <vscale x 4 x half> %a, %b
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @fadd_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; CHECK-LABEL: fadd_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fadd z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = fadd <vscale x 2 x half> %a, %b
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @fadd_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fadd_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fadd z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
@@ -23,8 +43,18 @@ define <vscale x 4 x float> @fadd_s(<vscale x 4 x float> %a, <vscale x 4 x float
   ret <vscale x 4 x float> %res
 }
 
-define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
-; CHECK-LABEL: fadd_d:
+define <vscale x 2 x float> @fadd_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: fadd_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fadd z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = fadd <vscale x 2 x float> %a, %b
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @fadd_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fadd_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fadd z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
@@ -32,8 +62,68 @@ define <vscale x 2 x double> @fadd_d(<vscale x 2 x double> %a, <vscale x 2 x dou
   ret <vscale x 2 x double> %res
 }
 
-define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
-; CHECK-LABEL: fsub_h:
+define <vscale x 8 x half> @fdiv_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fdiv_nxv8f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.h
+; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = fdiv <vscale x 8 x half> %a, %b
+  ret <vscale x 8 x half> %res
+}
+
+define <vscale x 4 x half> @fdiv_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: fdiv_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = fdiv <vscale x 4 x half> %a, %b
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @fdiv_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; CHECK-LABEL: fdiv_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fdiv z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = fdiv <vscale x 2 x half> %a, %b
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @fdiv_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fdiv_nxv4f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = fdiv <vscale x 4 x float> %a, %b
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 2 x float> @fdiv_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: fdiv_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fdiv z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = fdiv <vscale x 2 x float> %a, %b
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @fdiv_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fdiv_nxv2f64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fdiv z0.d, p0/m, z0.d, z1.d
+; CHECK-NEXT:    ret
+  %res = fdiv <vscale x 2 x double> %a, %b
+  ret <vscale x 2 x double> %res
+}
+
+define <vscale x 8 x half> @fsub_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fsub_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fsub z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
@@ -41,8 +131,28 @@ define <vscale x 8 x half> @fsub_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
   ret <vscale x 8 x half> %res
 }
 
-define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
-; CHECK-LABEL: fsub_s:
+define <vscale x 4 x half> @fsub_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: fsub_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = fsub <vscale x 4 x half> %a, %b
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @fsub_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; CHECK-LABEL: fsub_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fsub z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = fsub <vscale x 2 x half> %a, %b
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @fsub_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fsub_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fsub z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
@@ -50,8 +160,18 @@ define <vscale x 4 x float> @fsub_s(<vscale x 4 x float> %a, <vscale x 4 x float
   ret <vscale x 4 x float> %res
 }
 
-define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
-; CHECK-LABEL: fsub_d:
+define <vscale x 2 x float> @fsub_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: fsub_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fsub z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = fsub <vscale x 2 x float> %a, %b
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @fsub_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fsub_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fsub z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
@@ -59,8 +179,8 @@ define <vscale x 2 x double> @fsub_d(<vscale x 2 x double> %a, <vscale x 2 x dou
   ret <vscale x 2 x double> %res
 }
 
-define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
-; CHECK-LABEL: fmul_h:
+define <vscale x 8 x half> @fmul_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b) {
+; CHECK-LABEL: fmul_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmul z0.h, z0.h, z1.h
 ; CHECK-NEXT:    ret
@@ -68,8 +188,28 @@ define <vscale x 8 x half> @fmul_h(<vscale x 8 x half> %a, <vscale x 8 x half> %
   ret <vscale x 8 x half> %res
 }
 
-define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
-; CHECK-LABEL: fmul_s:
+define <vscale x 4 x half> @fmul_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b) {
+; CHECK-LABEL: fmul_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = fmul <vscale x 4 x half> %a, %b
+  ret <vscale x 4 x half> %res
+}
+
+define <vscale x 2 x half> @fmul_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b) {
+; CHECK-LABEL: fmul_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmul z0.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    ret
+  %res = fmul <vscale x 2 x half> %a, %b
+  ret <vscale x 2 x half> %res
+}
+
+define <vscale x 4 x float> @fmul_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b) {
+; CHECK-LABEL: fmul_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmul z0.s, z0.s, z1.s
 ; CHECK-NEXT:    ret
@@ -77,8 +217,18 @@ define <vscale x 4 x float> @fmul_s(<vscale x 4 x float> %a, <vscale x 4 x float
   ret <vscale x 4 x float> %res
 }
 
-define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
-; CHECK-LABEL: fmul_d:
+define <vscale x 2 x float> @fmul_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b) {
+; CHECK-LABEL: fmul_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmul z0.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = fmul <vscale x 2 x float> %a, %b
+  ret <vscale x 2 x float> %res
+}
+
+define <vscale x 2 x double> @fmul_nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b) {
+; CHECK-LABEL: fmul_nxv2f64:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    fmul z0.d, z0.d, z1.d
 ; CHECK-NEXT:    ret
@@ -86,8 +236,8 @@ define <vscale x 2 x double> @fmul_d(<vscale x 2 x double> %a, <vscale x 2 x dou
   ret <vscale x 2 x double> %res
 }
 
-define <vscale x 8 x half> @fma_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
-; CHECK-LABEL: fma_half:
+define <vscale x 8 x half> @fma_nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c) {
+; CHECK-LABEL: fma_nxv8f16:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.h
 ; CHECK-NEXT:    fmla z2.h, p0/m, z0.h, z1.h
@@ -96,8 +246,31 @@ define <vscale x 8 x half> @fma_half(<vscale x 8 x half> %a, <vscale x 8 x half>
   %r = call <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half> %a, <vscale x 8 x half> %b, <vscale x 8 x half> %c)
   ret <vscale x 8 x half> %r
 }
-define <vscale x 4 x float> @fma_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
-; CHECK-LABEL: fma_float:
+
+define <vscale x 4 x half> @fma_nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c) {
+; CHECK-LABEL: fma_nxv4f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    fmla z2.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %r = call <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half> %a, <vscale x 4 x half> %b, <vscale x 4 x half> %c)
+  ret <vscale x 4 x half> %r
+}
+
+define <vscale x 2 x half> @fma_nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c) {
+; CHECK-LABEL: fma_nxv2f16:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmla z2.h, p0/m, z0.h, z1.h
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %r = call <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half> %a, <vscale x 2 x half> %b, <vscale x 2 x half> %c)
+  ret <vscale x 2 x half> %r
+}
+
+define <vscale x 4 x float> @fma_nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c) {
+; CHECK-LABEL: fma_nxv4f32:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.s
 ; CHECK-NEXT:    fmla z2.s, p0/m, z0.s, z1.s
@@ -106,8 +279,20 @@ define <vscale x 4 x float> @fma_float(<vscale x 4 x float> %a, <vscale x 4 x fl
   %r = call <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float> %a, <vscale x 4 x float> %b, <vscale x 4 x float> %c)
   ret <vscale x 4 x float> %r
 }
-define <vscale x 2 x double> @fma_double_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
-; CHECK-LABEL: fma_double_1:
+
+define <vscale x 2 x float> @fma_nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c) {
+; CHECK-LABEL: fma_nxv2f32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    fmla z2.s, p0/m, z0.s, z1.s
+; CHECK-NEXT:    mov z0.d, z2.d
+; CHECK-NEXT:    ret
+  %r = call <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float> %a, <vscale x 2 x float> %b, <vscale x 2 x float> %c)
+  ret <vscale x 2 x float> %r
+}
+
+define <vscale x 2 x double> @fma_nxv2f64_1(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
+; CHECK-LABEL: fma_nxv2f64_1:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmla z2.d, p0/m, z0.d, z1.d
@@ -116,8 +301,9 @@ define <vscale x 2 x double> @fma_double_1(<vscale x 2 x double> %a, <vscale x 2
   %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c)
   ret <vscale x 2 x double> %r
 }
-define <vscale x 2 x double> @fma_double_2(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
-; CHECK-LABEL: fma_double_2:
+
+define <vscale x 2 x double> @fma_nxv2f64_2(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
+; CHECK-LABEL: fma_nxv2f64_2:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmla z2.d, p0/m, z1.d, z0.d
@@ -126,8 +312,9 @@ define <vscale x 2 x double> @fma_double_2(<vscale x 2 x double> %a, <vscale x 2
   %r = call <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double> %b, <vscale x 2 x double> %a, <vscale x 2 x double> %c)
   ret <vscale x 2 x double> %r
 }
-define <vscale x 2 x double> @fma_double_3(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
-; CHECK-LABEL: fma_double_3:
+
+define <vscale x 2 x double> @fma_nxv2f64_3(<vscale x 2 x double> %a, <vscale x 2 x double> %b, <vscale x 2 x double> %c) {
+; CHECK-LABEL: fma_nxv2f64_3:
 ; CHECK:       // %bb.0:
 ; CHECK-NEXT:    ptrue p0.d
 ; CHECK-NEXT:    fmla z0.d, p0/m, z2.d, z1.d
@@ -231,7 +418,10 @@ declare <vscale x 2 x double> @llvm.aarch64.sve.frsqrts.x.nxv2f64(<vscale x 2 x
 
 declare <vscale x 2 x double> @llvm.fma.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>, <vscale x 2 x double>)
 declare <vscale x 4 x float> @llvm.fma.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 2 x float> @llvm.fma.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>, <vscale x 2 x float>)
 declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
+declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
 
 ; Function Attrs: nounwind readnone
 declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2

From 7bbde17e62aafa487dbcb170ca07d0aed4833adc Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Thu, 16 Jul 2020 12:28:42 +0100
Subject: [PATCH 475/771] [ARM] Add a PreferNoCSEL option. NFC

This disables CSEL, falling back to the old predicated move behaviour
for cases where that is useful for debugging.
---
 llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
index 6ada546e5f48e..43942316b80e6 100644
--- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -39,6 +39,11 @@ OldT2IfCvt("old-thumb2-ifcvt", cl::Hidden,
            cl::desc("Use old-style Thumb2 if-conversion heuristics"),
            cl::init(false));
 
+static cl::opt<bool>
+PreferNoCSEL("prefer-no-csel", cl::Hidden,
+             cl::desc("Prefer predicated Move to CSEL"),
+             cl::init(false));
+
 Thumb2InstrInfo::Thumb2InstrInfo(const ARMSubtarget &STI)
     : ARMBaseInstrInfo(STI) {}
 
@@ -127,7 +132,7 @@ Thumb2InstrInfo::optimizeSelect(MachineInstr &MI,
   // MOVCC into another instruction. If that fails on 8.1-M fall back to using a
   // CSEL.
   MachineInstr *RV = ARMBaseInstrInfo::optimizeSelect(MI, SeenMIs, PreferFalse);
-  if (!RV && getSubtarget().hasV8_1MMainlineOps()) {
+  if (!RV && getSubtarget().hasV8_1MMainlineOps() && !PreferNoCSEL) {
     Register DestReg = MI.getOperand(0).getReg();
 
     if (!DestReg.isVirtual())

From 30f6c08ba3ba98921768073bf349cfcc8096ed6c Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 15:04:47 +0300
Subject: [PATCH 476/771] Reland "[NFC] SimplifyCFG: refactor/deduplicate
 command-line settings override handling"

Initially i forgot to stage the SimplifyCFGPass::SimplifyCFGPass() change
to actually take the passed params..
---
 .../lib/Transforms/Scalar/SimplifyCFGPass.cpp | 47 +++++++------------
 1 file changed, 17 insertions(+), 30 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
index d48d5408dd3c3..99055b9918050 100644
--- a/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
+++ b/llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp
@@ -213,22 +213,22 @@ static bool simplifyFunctionCFG(Function &F, const TargetTransformInfo &TTI,
 }
 
 // Command-line settings override compile-time settings.
-SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) {
-  Options.BonusInstThreshold = UserBonusInstThreshold.getNumOccurrences()
-                                   ? UserBonusInstThreshold
-                                   : Opts.BonusInstThreshold;
-  Options.ForwardSwitchCondToPhi = UserForwardSwitchCond.getNumOccurrences()
-                                       ? UserForwardSwitchCond
-                                       : Opts.ForwardSwitchCondToPhi;
-  Options.ConvertSwitchToLookupTable = UserSwitchToLookup.getNumOccurrences()
-                                           ? UserSwitchToLookup
-                                           : Opts.ConvertSwitchToLookupTable;
-  Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
-                                  ? UserKeepLoops
-                                  : Opts.NeedCanonicalLoop;
-  Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
-                                ? UserSinkCommonInsts
-                                : Opts.SinkCommonInsts;
+static void applyCommandLineOverridesToOptions(SimplifyCFGOptions &Options) {
+  if (UserBonusInstThreshold.getNumOccurrences())
+    Options.BonusInstThreshold = UserBonusInstThreshold;
+  if (UserForwardSwitchCond.getNumOccurrences())
+    Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
+  if (UserSwitchToLookup.getNumOccurrences())
+    Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
+  if (UserKeepLoops.getNumOccurrences())
+    Options.NeedCanonicalLoop = UserKeepLoops;
+  if (UserSinkCommonInsts.getNumOccurrences())
+    Options.SinkCommonInsts = UserSinkCommonInsts;
+}
+
+SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts)
+    : Options(Opts) {
+  applyCommandLineOverridesToOptions(Options);
 }
 
 PreservedAnalyses SimplifyCFGPass::run(Function &F,
@@ -255,20 +255,7 @@ struct CFGSimplifyPass : public FunctionPass {
     initializeCFGSimplifyPassPass(*PassRegistry::getPassRegistry());
 
     // Check for command-line overrides of options for debug/customization.
-    if (UserBonusInstThreshold.getNumOccurrences())
-      Options.BonusInstThreshold = UserBonusInstThreshold;
-
-    if (UserForwardSwitchCond.getNumOccurrences())
-      Options.ForwardSwitchCondToPhi = UserForwardSwitchCond;
-
-    if (UserSwitchToLookup.getNumOccurrences())
-      Options.ConvertSwitchToLookupTable = UserSwitchToLookup;
-
-    if (UserKeepLoops.getNumOccurrences())
-      Options.NeedCanonicalLoop = UserKeepLoops;
-
-    if (UserSinkCommonInsts.getNumOccurrences())
-      Options.SinkCommonInsts = UserSinkCommonInsts;
+    applyCommandLineOverridesToOptions(Options);
   }
 
   bool runOnFunction(Function &F) override {

From 482753fe9c9e7a4cf4a94a387dba2ae16c2e935a Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 16 Jul 2020 13:41:29 +0100
Subject: [PATCH 477/771] [PowerPC] Use CHECK-LABEL for better diagnostics

---
 llvm/test/CodeGen/PowerPC/ppcf128-endian.ll | 22 ++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll b/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll
index c48ba256094fc..b3ccc4e6646f8 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-endian.ll
@@ -13,7 +13,7 @@ entry:
   store ppc_fp128 %0, ppc_fp128* @g, align 16
   ret void
 }
-; CHECK: @callee
+; CHECK-LABEL: @callee
 ; CHECK: ld [[REG:[0-9]+]], .LC
 ; CHECK: stfd 2, 8([[REG]])
 ; CHECK: stfd 1, 0([[REG]])
@@ -25,7 +25,7 @@ entry:
   call void @test(ppc_fp128 %0)
   ret void
 }
-; CHECK: @caller
+; CHECK-LABEL: @caller
 ; CHECK: ld [[REG:[0-9]+]], .LC
 ; CHECK: lfd 2, 8([[REG]])
 ; CHECK: lfd 1, 0([[REG]])
@@ -42,7 +42,7 @@ entry:
 ; CHECK: .long   0x3f800000
 ; CHECK: .LCPI[[LC]]_1:
 ; CHECK: .long   0
-; CHECK: @caller_const
+; CHECK-LABEL: @caller_const
 ; CHECK: addis [[REG0:[0-9]+]], 2, .LCPI[[LC]]_0@toc@ha
 ; CHECK: addis [[REG1:[0-9]+]], 2, .LCPI[[LC]]_1@toc@ha
 ; CHECK: lfs 1, .LCPI[[LC]]_0@toc@l([[REG0]])
@@ -54,7 +54,7 @@ entry:
   %0 = load ppc_fp128, ppc_fp128* @g, align 16
   ret ppc_fp128 %0
 }
-; CHECK: @result
+; CHECK-LABEL: @result
 ; CHECK: ld [[REG:[0-9]+]], .LC
 ; CHECK: lfd 1, 0([[REG]])
 ; CHECK: lfd 2, 8([[REG]])
@@ -66,7 +66,7 @@ entry:
   store ppc_fp128 %call, ppc_fp128* @g, align 16
   ret void
 }
-; CHECK: @use_result
+; CHECK-LABEL: @use_result
 ; CHECK: bl test_result
 ; CHECK: ld [[REG:[0-9]+]], .LC
 ; CHECK: stfd 2, 8([[REG]])
@@ -81,7 +81,7 @@ entry:
   tail call void @test(ppc_fp128 %call)
   ret void
 }
-; CHECK: @caller_result
+; CHECK-LABEL: @caller_result
 ; CHECK: bl test_result
 ; CHECK-NEXT: nop
 ; CHECK-NEXT: bl test
@@ -92,7 +92,7 @@ entry:
   %0 = bitcast ppc_fp128 %x to i128
   ret i128 %0
 }
-; CHECK: @convert_from
+; CHECK-LABEL: @convert_from
 ; CHECK: stfd 1, [[OFF1:.*]](1)
 ; CHECK: stfd 2, [[OFF2:.*]](1)
 ; CHECK: ld 3, [[OFF1]](1)
@@ -104,7 +104,7 @@ entry:
   %0 = bitcast i128 %x to ppc_fp128
   ret ppc_fp128 %0
 }
-; CHECK: convert_to:
+; CHECK-LABEL: convert_to:
 ; CHECK-DAG: std 3, [[OFF1:.*]](1)
 ; CHECK-DAG: std 4, [[OFF2:.*]](1)
 ; CHECK: lfd 1, [[OFF1]](1)
@@ -118,7 +118,7 @@ entry:
   ret ppc_fp128 %0
 }
 
-; CHECK: convert_to2:
+; CHECK-LABEL: convert_to2:
 ; CHECK: std 3, [[OFF1:.*]](1)
 ; CHECK: std 5, [[OFF2:.*]](1)
 ; CHECK: lfd 1, [[OFF1]](1)
@@ -131,7 +131,7 @@ entry:
   %conv = fptrunc ppc_fp128 %cast to double
   ret double %conv
 }
-; CHECK: @convert_vector
+; CHECK-LABEL: @convert_vector
 ; CHECK: addi [[REG:[0-9]+]], 1, [[OFF:.*]]
 ; CHECK: stvx 2, 0, [[REG]]
 ; CHECK: lfd 1, [[OFF]](1)
@@ -148,7 +148,7 @@ entry:
   %conv = fptrunc ppc_fp128 %arg to double
   ret double %conv
 }
-; CHECK: @vararg
+; CHECK-LABEL: @vararg
 ; CHECK: lfd 1, 0({{[0-9]+}})
 ; CHECK: blr
 

From 920e127e02531640204be0944a37bd3bf449f66b Mon Sep 17 00:00:00 2001
From: David Truby <david.truby@arm.com>
Date: Thu, 16 Jul 2020 11:27:03 +0100
Subject: [PATCH 478/771] [flang] Add missing link dependencies to
 FrontendOpenACC.

Summary:
These link dependencies are required for shared library builds to
work correctly.

Reviewers: clementval

Reviewed By: clementval

Subscribers: mgorny, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83938
---
 flang/lib/Parser/CMakeLists.txt    | 1 +
 flang/lib/Semantics/CMakeLists.txt | 1 +
 2 files changed, 2 insertions(+)

diff --git a/flang/lib/Parser/CMakeLists.txt b/flang/lib/Parser/CMakeLists.txt
index e1e77ac6e92df..9ee4168031771 100644
--- a/flang/lib/Parser/CMakeLists.txt
+++ b/flang/lib/Parser/CMakeLists.txt
@@ -30,6 +30,7 @@ add_flang_library(FortranParser
 
   LINK_COMPONENTS
   Support
+  FrontendOpenACC
 
   DEPENDS
   omp_gen
diff --git a/flang/lib/Semantics/CMakeLists.txt b/flang/lib/Semantics/CMakeLists.txt
index a869d831109bf..2bdc5f9582819 100644
--- a/flang/lib/Semantics/CMakeLists.txt
+++ b/flang/lib/Semantics/CMakeLists.txt
@@ -52,4 +52,5 @@ add_flang_library(FortranSemantics
   LINK_COMPONENTS
   Support
   FrontendOpenMP
+  FrontendOpenACC
 )

From 0eb50e614c65d189a3f1bdf747be973829046bc1 Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Thu, 16 Jul 2020 13:52:42 +0000
Subject: [PATCH 479/771] [MLIR][Shape] Allow `shape.reduce` to operate on
 extent tensors

Allow `shape.reduce` to take both `shape.shape` and `tensor<?xindex>` as an
argument.

Differential Revision: https://reviews.llvm.org/D83943
---
 .../include/mlir/Dialect/Shape/IR/ShapeOps.td | 20 +++++++------
 mlir/lib/Dialect/Shape/IR/Shape.cpp           | 28 ++++++++++++++-----
 .../Conversion/ShapeToSCF/shape-to-scf.mlir   |  6 ++--
 mlir/test/Dialect/Shape/invalid.mlir          | 24 +++++++++++-----
 mlir/test/Dialect/Shape/ops.mlir              | 21 ++++++++++----
 mlir/test/Dialect/Shape/shape-to-shape.mlir   | 10 +++----
 6 files changed, 73 insertions(+), 36 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
index 1f141a2e705ac..090b4c6f4abbc 100644
--- a/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
+++ b/mlir/include/mlir/Dialect/Shape/IR/ShapeOps.td
@@ -338,23 +338,26 @@ def Shape_NumElementsOp : Shape_Op<"num_elements", [NoSideEffect]> {
 
 def Shape_ReduceOp : Shape_Op<"reduce",
     [SingleBlockImplicitTerminator<"YieldOp">]> {
-  let summary = "Returns an expression reduced over a shape";
+  let summary = "Returns an expression reduced over a shape or extent tensor";
   let description = [{
-    An operation that takes as input a shape, number of initial values and has a
-    region/function that is applied repeatedly for every dimension of the shape.
+    An operation that takes as input a shape or extent tensor, and a number of
+    initial values. This operation has a region/function that is applied
+    repeatedly for every extent of the input. Starting with the initial values,
+    the individual extents are then aggregated as defined by the associated
+    region.
 
     Conceptually this op performs the following reduction:
 
     ```
     res[] = init;
-    for (int i = 0, e = shape.rank(); i != e; ++i) {
+    for (int i = 0, i < shape.rank(); i++) {
       res = fn(i, shape[i], res[0], ..., res[n]);
     }
     ```
 
-    Where fn is provided by the user and the result of the reduce op is the
+    Where `fn` is provided by the user and the result of the reduce op is the
     last computed output of the reduce function. As an example, computing the
-    number of elements
+    number of elements can be defined as follows:
 
     ```mlir
     func @reduce(%shape : !shape.shape, %init : !shape.size) -> !shape.size {
@@ -367,11 +370,10 @@ def Shape_ReduceOp : Shape_Op<"reduce",
       return %num_elements : !shape.size
     }
     ```
-
-    If the shape is unranked, then the results of the op is also unranked.
   }];
 
-  let arguments = (ins Shape_ShapeType:$shape, Variadic<AnyType>:$initVals);
+  let arguments = (ins Shape_ShapeOrExtentTensorType:$shape,
+                       Variadic<AnyType>:$initVals);
   let results = (outs Variadic<AnyType>:$result);
   let regions = (region SizedRegion<1>:$region);
 
diff --git a/mlir/lib/Dialect/Shape/IR/Shape.cpp b/mlir/lib/Dialect/Shape/IR/Shape.cpp
index a6f54053a3260..b983968b124dc 100644
--- a/mlir/lib/Dialect/Shape/IR/Shape.cpp
+++ b/mlir/lib/Dialect/Shape/IR/Shape.cpp
@@ -721,18 +721,31 @@ static LogicalResult verify(ReduceOp op) {
   // Verify block arg types.
   Block &block = op.region().front();
 
+  // The block takes index, extent, and aggregated values as arguments.
   auto blockArgsCount = op.initVals().size() + 2;
   if (block.getNumArguments() != blockArgsCount)
     return op.emitOpError() << "ReduceOp body is expected to have "
                             << blockArgsCount << " arguments";
 
-  if (block.getArgument(0).getType() != IndexType::get(op.getContext()))
+  // The first block argument is the index and must always be of type `index`.
+  if (!block.getArgument(0).getType().isa<IndexType>())
     return op.emitOpError(
         "argument 0 of ReduceOp body is expected to be of IndexType");
 
-  if (block.getArgument(1).getType() != SizeType::get(op.getContext()))
-    return op.emitOpError(
-        "argument 1 of ReduceOp body is expected to be of SizeType");
+  // The second block argument is the extent and must be of type `size` or
+  // `index`, depending on whether the reduce operation is applied to a shape or
+  // to an extent tensor.
+  Type extentTy = block.getArgument(1).getType();
+  if (op.shape().getType().isa<ShapeType>()) {
+    if (!extentTy.isa<SizeType>())
+      return op.emitOpError("argument 1 of ReduceOp body is expected to be of "
+                            "SizeType if the ReduceOp operates on a ShapeType");
+  } else {
+    if (!extentTy.isa<IndexType>())
+      return op.emitOpError(
+          "argument 1 of ReduceOp body is expected to be of IndexType if the "
+          "ReduceOp operates on an extent tensor");
+  }
 
   for (auto type : llvm::enumerate(op.initVals()))
     if (block.getArgument(type.index() + 2).getType() != type.value().getType())
@@ -743,17 +756,18 @@ static LogicalResult verify(ReduceOp op) {
 }
 
 static ParseResult parseReduceOp(OpAsmParser &parser, OperationState &result) {
-  auto *ctx = parser.getBuilder().getContext();
   // Parse operands.
   SmallVector<OpAsmParser::OperandType, 3> operands;
+  Type shapeOrExtentTensorType;
   if (parser.parseOperandList(operands, /*requiredOperandCount=*/-1,
                               OpAsmParser::Delimiter::Paren) ||
+      parser.parseColonType(shapeOrExtentTensorType) ||
       parser.parseOptionalArrowTypeList(result.types))
     return failure();
 
   // Resolve operands.
   auto initVals = llvm::makeArrayRef(operands).drop_front();
-  if (parser.resolveOperand(operands.front(), ShapeType::get(ctx),
+  if (parser.resolveOperand(operands.front(), shapeOrExtentTensorType,
                             result.operands) ||
       parser.resolveOperands(initVals, result.types, parser.getNameLoc(),
                              result.operands))
@@ -773,7 +787,7 @@ static ParseResult parseReduceOp(OpAsmParser &parser, OperationState &result) {
 
 static void print(OpAsmPrinter &p, ReduceOp op) {
   p << op.getOperationName() << '(' << op.shape() << ", " << op.initVals()
-    << ") ";
+    << ") : " << op.shape().getType();
   p.printOptionalArrowTypeList(op.getResultTypes());
   p.printRegion(op.region());
   p.printOptionalAttrDict(op.getAttrs());
diff --git a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir
index 1c214567c63ab..9051054b3f18f 100644
--- a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir
+++ b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir
@@ -1,10 +1,10 @@
 // RUN: mlir-opt -convert-shape-to-scf -split-input-file %s | FileCheck %s
 
-// CHECK-LABEL: shape_reduce
-// CHECK-SAME:   [[SHAPE:%.*]]: !shape.shape) -> !shape.size {
+// CHECK-LABEL: @shape_reduce
+// CHECK-SAME:  ([[SHAPE:%.*]]: !shape.shape) -> !shape.size
 func @shape_reduce(%shape : !shape.shape) -> !shape.size {
   %init = shape.const_size 1
-  %num_elements = shape.reduce(%shape, %init) -> !shape.size {
+  %num_elements = shape.reduce(%shape, %init) : !shape.shape -> !shape.size {
     ^bb0(%index: index, %dim: !shape.size, %acc: !shape.size):
       %new_acc = shape.mul %acc, %dim
       shape.yield %new_acc : !shape.size
diff --git a/mlir/test/Dialect/Shape/invalid.mlir b/mlir/test/Dialect/Shape/invalid.mlir
index da059a489be30..3aca3677c143f 100644
--- a/mlir/test/Dialect/Shape/invalid.mlir
+++ b/mlir/test/Dialect/Shape/invalid.mlir
@@ -2,7 +2,7 @@
 
 func @reduce_op_args_num_mismatch(%shape : !shape.shape, %init : !shape.size) {
   // expected-error@+1 {{ReduceOp body is expected to have 3 arguments}}
-  %num_elements = shape.reduce(%shape, %init) -> !shape.size {
+  %num_elements = shape.reduce(%shape, %init) : !shape.shape -> !shape.size {
     ^bb0(%index: index, %dim: !shape.size):
       shape.yield %dim : !shape.size
   }
@@ -12,7 +12,7 @@ func @reduce_op_args_num_mismatch(%shape : !shape.shape, %init : !shape.size) {
 
 func @reduce_op_arg0_wrong_type(%shape : !shape.shape, %init : !shape.size) {
   // expected-error@+1 {{argument 0 of ReduceOp body is expected to be of IndexType}}
-  %num_elements = shape.reduce(%shape, %init) -> !shape.size {
+  %num_elements = shape.reduce(%shape, %init) : !shape.shape -> !shape.size {
     ^bb0(%index: f32, %dim: !shape.size, %acc: !shape.size):
       %new_acc = "shape.add"(%acc, %dim)
           : (!shape.size, !shape.size) -> !shape.size
@@ -23,8 +23,8 @@ func @reduce_op_arg0_wrong_type(%shape : !shape.shape, %init : !shape.size) {
 // -----
 
 func @reduce_op_arg1_wrong_type(%shape : !shape.shape, %init : !shape.size) {
-  // expected-error@+1 {{argument 1 of ReduceOp body is expected to be of SizeType}}
-  %num_elements = shape.reduce(%shape, %init) -> !shape.size {
+  // expected-error@+1 {{argument 1 of ReduceOp body is expected to be of SizeType if the ReduceOp operates on a ShapeType}}
+  %num_elements = shape.reduce(%shape, %init) : !shape.shape -> !shape.size {
     ^bb0(%index: index, %dim: f32, %lci: !shape.size):
       shape.yield
   }
@@ -32,9 +32,19 @@ func @reduce_op_arg1_wrong_type(%shape : !shape.shape, %init : !shape.size) {
 
 // -----
 
+func @reduce_op_arg1_wrong_type(%shape : tensor<?xindex>, %init : index) {
+  // expected-error@+1 {{argument 1 of ReduceOp body is expected to be of IndexType if the ReduceOp operates on an extent tensor}}
+  %num_elements = shape.reduce(%shape, %init) : tensor<?xindex> -> index {
+    ^bb0(%index: index, %dim: f32, %lci: index):
+      shape.yield
+  }
+}
+
+// -----
+
 func @reduce_op_init_type_mismatch(%shape : !shape.shape, %init : f32) {
   // expected-error@+1 {{type mismatch between argument 2 of ReduceOp body and initial value 0}}
-  %num_elements = shape.reduce(%shape, %init) -> f32 {
+  %num_elements = shape.reduce(%shape, %init) : !shape.shape -> f32 {
     ^bb0(%index: index, %dim: !shape.size, %lci: !shape.size):
       shape.yield
   }
@@ -44,7 +54,7 @@ func @reduce_op_init_type_mismatch(%shape : !shape.shape, %init : f32) {
 
 func @yield_op_args_num_mismatch(%shape : !shape.shape, %init : !shape.size) {
   // expected-error@+3 {{number of operands does not match number of results of its parent}}
-  %num_elements = shape.reduce(%shape, %init) -> !shape.size {
+  %num_elements = shape.reduce(%shape, %init) : !shape.shape -> !shape.size {
     ^bb0(%index: index, %dim: !shape.size, %lci: !shape.size):
       shape.yield %dim, %dim : !shape.size, !shape.size
   }
@@ -54,7 +64,7 @@ func @yield_op_args_num_mismatch(%shape : !shape.shape, %init : !shape.size) {
 
 func @yield_op_type_mismatch(%shape : !shape.shape, %init : !shape.size) {
   // expected-error@+4 {{types mismatch between yield op and its parent}}
-  %num_elements = shape.reduce(%shape, %init) -> !shape.size {
+  %num_elements = shape.reduce(%shape, %init) : !shape.shape -> !shape.size {
     ^bb0(%index: index, %dim: !shape.size, %lci: !shape.size):
       %c0 = constant 1 : index
       shape.yield %c0 : index
diff --git a/mlir/test/Dialect/Shape/ops.mlir b/mlir/test/Dialect/Shape/ops.mlir
index 3a0bcf713073f..c6f52519ad2e8 100644
--- a/mlir/test/Dialect/Shape/ops.mlir
+++ b/mlir/test/Dialect/Shape/ops.mlir
@@ -6,15 +6,26 @@
 
 // CHECK-LABEL: shape_num_elements
 func @shape_num_elements(%shape : !shape.shape) -> !shape.size {
-  %init = shape.const_size 0
-  %num_elements = shape.reduce(%shape, %init) -> !shape.size {
-    ^bb0(%index: index, %dim: !shape.size, %lci: !shape.size):
-      %acc = shape.add %lci, %dim
-      shape.yield %acc : !shape.size
+  %init = shape.const_size 1
+  %num_elements = shape.reduce(%shape, %init) : !shape.shape -> !shape.size {
+    ^bb0(%index : index, %extent : !shape.size, %acc : !shape.size):
+      %acc_next = shape.mul %acc, %extent
+      shape.yield %acc_next : !shape.size
   }
   return %num_elements : !shape.size
 }
 
+// CHECK-LABEL: extent_tensor_num_elements
+func @extent_tensor_num_elements(%shape : tensor<?xindex>) -> index {
+  %init = constant 1 : index
+  %num_elements = shape.reduce(%shape, %init) : tensor<?xindex> -> index {
+    ^bb0(%index : index, %extent : index, %acc : index):
+      %acc_next = muli %acc, %extent : index
+      shape.yield %acc_next : index
+  }
+  return %num_elements : index
+}
+
 func @test_shape_num_elements_unknown() {
   %0 = "shape.unknown_shape"() : () -> !shape.shape
   %1 = call @shape_num_elements(%0) : (!shape.shape) -> (!shape.size)
diff --git a/mlir/test/Dialect/Shape/shape-to-shape.mlir b/mlir/test/Dialect/Shape/shape-to-shape.mlir
index b3be4c9de3a1b..9a75f0b9ca1bc 100644
--- a/mlir/test/Dialect/Shape/shape-to-shape.mlir
+++ b/mlir/test/Dialect/Shape/shape-to-shape.mlir
@@ -1,16 +1,16 @@
 // RUN: mlir-opt -shape-to-shape-lowering -split-input-file %s | FileCheck %s
 
 // CHECK-LABEL: func @num_elements_to_reduce(
-// CHECK-SAME:    [[ARG:%.*]]: !shape.shape) -> [[SIZE_TY:!.*]] {
+// CHECK-SAME:    [[ARG:%.*]]: !shape.shape) -> !shape.size {
 func @num_elements_to_reduce(%shape : !shape.shape) -> !shape.size {
   %num_elements = shape.num_elements %shape
   return %num_elements : !shape.size
 }
 // CHECK: [[C1:%.*]] = shape.const_size 1
-// CHECK: [[NUM_ELEMENTS:%.*]] = shape.reduce([[ARG]], [[C1]])  -> [[SIZE_TY]]
-// CHECK: ^bb0({{.*}}: index, [[DIM:%.*]]: [[SIZE_TY]], [[ACC:%.*]]: [[SIZE_TY]]
+// CHECK: [[NUM_ELEMENTS:%.*]] = shape.reduce([[ARG]], [[C1]]) : !shape.shape -> !shape.size
+// CHECK: ^bb0({{.*}}: index, [[DIM:%.*]]: !shape.size, [[ACC:%.*]]: !shape.size
 // CHECK:   [[NEW_ACC:%.*]] = shape.mul [[DIM]], [[ACC]]
-// CHECK:   shape.yield [[NEW_ACC]] : [[SIZE_TY]]
+// CHECK:   shape.yield [[NEW_ACC]] : !shape.size
 // CHECK: }
-// CHECK: return [[NUM_ELEMENTS]] : [[SIZE_TY]]
+// CHECK: return [[NUM_ELEMENTS]] : !shape.size
 

From 67391a7045486c5d82b763dc1c32dba6d99ee31a Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Thu, 16 Jul 2020 13:55:08 +0000
Subject: [PATCH 480/771] [MLIR] Lower `shape.reduce` to `scf.for` only when
 argument is `tensor<?xindex>`

To make it clear when shape error values cannot occur the shape operations can
operate on extent tensors. This change updates the lowering for `shape.reduce`
accordingly.

Differential Revision: https://reviews.llvm.org/D83944
---
 mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp | 51 ++++++++++---------
 .../Conversion/ShapeToSCF/shape-to-scf.mlir   | 39 +++++++-------
 2 files changed, 43 insertions(+), 47 deletions(-)

diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
index 1f1134757b3a1..0caaacd75bedd 100644
--- a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
+++ b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
@@ -17,45 +17,46 @@
 
 using namespace mlir;
 using namespace mlir::shape;
+using namespace mlir::scf;
 
 namespace {
 /// Converts `shape.reduce` to `scf.for`.
-struct ReduceOpConverter : public OpRewritePattern<ReduceOp> {
+struct ReduceOpConverter : public OpConversionPattern<shape::ReduceOp> {
 public:
-  using OpRewritePattern::OpRewritePattern;
+  using OpConversionPattern::OpConversionPattern;
 
-  LogicalResult matchAndRewrite(ReduceOp op,
-                                PatternRewriter &rewriter) const final;
+  LogicalResult
+  matchAndRewrite(shape::ReduceOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const final;
 };
 } // namespace
 
 LogicalResult
-ReduceOpConverter::matchAndRewrite(ReduceOp reduceOp,
-                                   PatternRewriter &rewriter) const {
-  auto loc = reduceOp.getLoc();
+ReduceOpConverter::matchAndRewrite(shape::ReduceOp op, ArrayRef<Value> operands,
+                                   ConversionPatternRewriter &rewriter) const {
+  // For now, this lowering is only defined on `tensor<?xindex>` operands.
+  if (!op.shape().getType().isa<RankedTensorType>())
+    return failure();
+
+  auto loc = op.getLoc();
+  shape::ReduceOp::Adaptor transformed(operands);
 
   Value zero = rewriter.create<ConstantIndexOp>(loc, 0);
   Value one = rewriter.create<ConstantIndexOp>(loc, 1);
-  Value extentTensor = rewriter.create<ToExtentTensorOp>(
-      loc,
-      RankedTensorType::get({ShapedType::kDynamicSize},
-                            rewriter.getIndexType()),
-      reduceOp.shape());
-  Value size =
-      rewriter.create<DimOp>(loc, rewriter.getIndexType(), extentTensor, zero);
+  Type indexTy = rewriter.getIndexType();
+  Value rank = rewriter.create<DimOp>(loc, indexTy, transformed.shape(), zero);
 
   auto loop = rewriter.create<scf::ForOp>(
-      loc, zero, size, one, reduceOp.initVals(),
-      [&](OpBuilder &b, Location nestedLoc, Value iv, ValueRange args) {
-        Value indexExtent = b.create<ExtractElementOp>(loc, extentTensor, iv);
-        Value sizeExtent = b.create<IndexToSizeOp>(loc, indexExtent);
+      loc, zero, rank, one, op.initVals(),
+      [&](OpBuilder &b, Location loc, Value iv, ValueRange args) {
+        Value extent = b.create<ExtractElementOp>(loc, transformed.shape(), iv);
 
-        SmallVector<Value, 2> mapped_values{iv, sizeExtent};
-        mapped_values.append(args.begin(), args.end());
+        SmallVector<Value, 2> mappedValues{iv, extent};
+        mappedValues.append(args.begin(), args.end());
 
         BlockAndValueMapping mapping;
-        Block *reduceBody = reduceOp.getBody();
-        mapping.map(reduceBody->getArguments(), mapped_values);
+        Block *reduceBody = op.getBody();
+        mapping.map(reduceBody->getArguments(), mappedValues);
         for (auto &nested : reduceBody->without_terminator())
           b.clone(nested, mapping);
 
@@ -65,7 +66,7 @@ ReduceOpConverter::matchAndRewrite(ReduceOp reduceOp,
         b.create<scf::YieldOp>(loc, mappedResults);
       });
 
-  rewriter.replaceOp(reduceOp, loop.getResults());
+  rewriter.replaceOp(op, loop.getResults());
   return success();
 }
 
@@ -138,8 +139,8 @@ void ConvertShapeToSCFPass::runOnFunction() {
 
   // Setup target legality.
   ConversionTarget target(getContext());
-  target.addLegalDialect<ShapeDialect, scf::SCFDialect, StandardOpsDialect>();
-  target.addIllegalOp<ReduceOp, ShapeOfOp>();
+  target.addLegalDialect<SCFDialect, StandardOpsDialect>();
+  target.addLegalOp<ModuleOp, FuncOp>();
 
   // Apply conversion.
   if (failed(applyPartialConversion(getFunction(), target, patterns)))
diff --git a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir
index 9051054b3f18f..6ba630aa4aa61 100644
--- a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir
+++ b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir
@@ -1,31 +1,26 @@
 // RUN: mlir-opt -convert-shape-to-scf -split-input-file %s | FileCheck %s
 
 // CHECK-LABEL: @shape_reduce
-// CHECK-SAME:  ([[SHAPE:%.*]]: !shape.shape) -> !shape.size
-func @shape_reduce(%shape : !shape.shape) -> !shape.size {
-  %init = shape.const_size 1
-  %num_elements = shape.reduce(%shape, %init) : !shape.shape -> !shape.size {
-    ^bb0(%index: index, %dim: !shape.size, %acc: !shape.size):
-      %new_acc = shape.mul %acc, %dim
-      shape.yield %new_acc : !shape.size
+// CHECK-SAME:  (%[[SHAPE:.*]]: tensor<?xindex>) -> index
+func @shape_reduce(%shape : tensor<?xindex>) -> index {
+  %init = constant 1 : index
+  %num_elements = shape.reduce(%shape, %init) : tensor<?xindex> -> index {
+    ^bb0(%index : index, %extent : index, %acc: index):
+      %new_acc = muli %acc, %extent : index
+      shape.yield %new_acc : index
   }
-  return %num_elements : !shape.size
+  return %num_elements : index
 }
-// CHECK-NEXT: [[SHAPE_C1:%.*]] = shape.const_size 1
-// CHECK-NEXT: [[C0:%.*]] = constant 0 : index
-// CHECK-NEXT: [[C1:%.*]] = constant 1 : index
-
-// CHECK-NEXT: [[EXTENTS:%.*]] = shape.to_extent_tensor [[SHAPE]]
-// CHECK-NEXT: [[SIZE:%.*]] = dim [[EXTENTS]], [[C0]] : tensor<?xindex>
-
-// CHECK-NEXT: [[RESULT:%.*]] = scf.for [[I:%.*]] = [[C0]] to [[SIZE]]
-// CHECK-SAME:       step [[C1]] iter_args([[ACC:%.*]] = [[SHAPE_C1]])
-// CHECK-NEXT:   [[EXTENT_INDEX:%.*]] = extract_element [[EXTENTS]]{{\[}}[[I]]]
-// CHECK-NEXT:   [[EXTENT:%.*]] = shape.index_to_size [[EXTENT_INDEX]]
-// CHECK-NEXT:   [[NEW_ACC:%.*]] = shape.mul [[ACC]], [[EXTENT]]
-// CHECK-NEXT:   scf.yield [[NEW_ACC]] : !shape.size
+// CHECK-NEXT: %[[INIT:.*]] = constant 1 : index
+// CHECK-NEXT: %[[C0:.*]] = constant 0 : index
+// CHECK-NEXT: %[[C1:.*]] = constant 1 : index
+// CHECK-NEXT: %[[RANK:.*]] = dim %[[SHAPE]], %[[C0]] : tensor<?xindex>
+// CHECK-NEXT: %[[RESULT:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK]] step %[[C1]] iter_args(%[[ACC:.*]] = %[[INIT]]) -> (index)
+// CHECK-NEXT:   %[[EXTENT:.*]] = extract_element %[[SHAPE]][%[[I]]]
+// CHECK-NEXT:   %[[NEW_ACC:.*]] = muli %[[ACC]], %[[EXTENT]] : index
+// CHECK-NEXT:   scf.yield %[[NEW_ACC]] : index
 // CHECK-NEXT: }
-// CHECK-NEXT: return [[RESULT]] : !shape.size
+// CHECK-NEXT: return %[[RESULT]] : index
 
 // -----
 

From c430c21202c377cfb9fce0e7272f7208d1e8a531 Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Thu, 16 Jul 2020 13:57:56 +0000
Subject: [PATCH 481/771] [MLIR][Shape] Use callback builder again

The issue that callback builders caused during rollback of conversion patterns
has been resolved. We can use them again.
See https://bugs.llvm.org/show_bug.cgi?id=46731

Differential Revision: https://reviews.llvm.org/D83932
---
 mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
index 0caaacd75bedd..f050c4eb22786 100644
--- a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
+++ b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
@@ -104,15 +104,14 @@ ShapeOfOpConverter::matchAndRewrite(ShapeOfOp op, ArrayRef<Value> operands,
   // Copy shape extents to stack-allocated memory.
   auto zeroVal = rewriter.create<ConstantIndexOp>(loc, 0);
   auto oneVal = rewriter.create<ConstantIndexOp>(loc, 1);
-  auto loop = rewriter.create<scf::ForOp>(loc, zeroVal, rankVal, oneVal);
-  {
-    OpBuilder::InsertionGuard guard(rewriter);
-    rewriter.setInsertionPointToStart(loop.getBody());
-    auto iVal = loop.getInductionVar();
-    auto dimVal = rewriter.create<DimOp>(loc, tensorVal, iVal);
-    auto dimIntVal = rewriter.create<IndexCastOp>(loc, dimVal, i64Ty);
-    rewriter.create<StoreOp>(loc, dimIntVal, memVal, ValueRange{iVal});
-  }
+  rewriter.create<scf::ForOp>(
+      loc, zeroVal, rankVal, oneVal, llvm::None,
+      [&](OpBuilder &b, Location loc, Value iVal, ValueRange args) {
+        auto dimVal = rewriter.create<DimOp>(loc, tensorVal, iVal);
+        auto dimIntVal = rewriter.create<IndexCastOp>(loc, dimVal, i64Ty);
+        rewriter.create<StoreOp>(loc, dimIntVal, memVal, ValueRange{iVal});
+        rewriter.create<scf::YieldOp>(loc);
+      });
 
   // Load extents to tensor value.
   auto shapeIntVal = rewriter.create<TensorLoadOp>(loc, memVal);

From 60433c63acb71935111304d71e41b7ee982398f8 Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight@google.com>
Date: Thu, 16 Jul 2020 10:01:52 -0400
Subject: [PATCH 482/771] Remove
 TwoAddressInstructionPass::sink3AddrInstruction.

This function has a bug which will incorrectly reschedule instructions
after an INLINEASM_BR (which can branch). (The bug may also allow
scheduling past a throwing-CALL, I'm not certain.)

I could fix that bug, but, as the removed FIXME notes, it's better to
attempt rescheduling before converting to 3-addr form, as that may
remove the need to convert in the first place. In fact, the code to do
such reordering was added to this pass only a few months later, in
2011, via the addition of the function rescheduleMIBelowKill. That
code does not contain the same bug.

The removal of the sink3AddrInstruction function is not a no-op: in
some cases it would move an instruction post-conversion, when
rescheduleMIBelowKill would not move the instruction pre-converison.
However, this does not appear to be important: the machine instruction
scheduler can reorder the after-conversion instructions, in any case.

This patch fixes a kernel panic 4.4 LTS x86_64 Linux kernels, when
built with clang after 4b0aa5724feaa89a9538dcab97e018110b0e4bc3.

Link: https://github.com/ClangBuiltLinux/linux/issues/1085

Differential Revision: https://reviews.llvm.org/D83708
---
 .../lib/CodeGen/TwoAddressInstructionPass.cpp | 161 +-----------------
 llvm/test/CodeGen/X86/callbr-asm-sink.ll      |  35 ++++
 llvm/test/CodeGen/X86/masked-iv-unsafe.ll     |  14 +-
 llvm/test/CodeGen/X86/reverse_branches.ll     |  18 +-
 llvm/test/CodeGen/X86/rotate-extract.ll       |   6 +-
 llvm/test/CodeGen/X86/twoaddr-lea.ll          |   5 +-
 llvm/test/CodeGen/X86/twoaddr-pass-sink.ll    |  30 ----
 7 files changed, 62 insertions(+), 207 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/callbr-asm-sink.ll
 delete mode 100644 llvm/test/CodeGen/X86/twoaddr-pass-sink.ll

diff --git a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
index de336abe607a5..615ff4b8789c0 100644
--- a/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -70,7 +70,6 @@ STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions");
 STATISTIC(NumCommuted        , "Number of instructions commuted to coalesce");
 STATISTIC(NumAggrCommuted    , "Number of instructions aggressively commuted");
 STATISTIC(NumConvertedTo3Addr, "Number of instructions promoted to 3-address");
-STATISTIC(Num3AddrSunk,        "Number of 3-address instructions sunk");
 STATISTIC(NumReSchedUps,       "Number of instructions re-scheduled up");
 STATISTIC(NumReSchedDowns,     "Number of instructions re-scheduled down");
 
@@ -109,10 +108,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   // Set of already processed instructions in the current block.
   SmallPtrSet<MachineInstr*, 8> Processed;
 
-  // Set of instructions converted to three-address by target and then sunk
-  // down current basic block.
-  SmallPtrSet<MachineInstr*, 8> SunkInstrs;
-
   // A map from virtual registers to physical registers which are likely targets
   // to be coalesced to due to copies from physical registers to virtual
   // registers. e.g. v1024 = move r0.
@@ -123,9 +118,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass {
   // registers. e.g. r1 = move v1024.
   DenseMap<unsigned, unsigned> DstRegMap;
 
-  bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg,
-                            MachineBasicBlock::iterator OldPos);
-
   bool isRevCopyChain(unsigned FromReg, unsigned ToReg, int Maxlen);
 
   bool noUseAfterLastDef(unsigned Reg, unsigned Dist, unsigned &LastDef);
@@ -209,136 +201,6 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, DEBUG_TYPE,
 
 static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS);
 
-/// A two-address instruction has been converted to a three-address instruction
-/// to avoid clobbering a register. Try to sink it past the instruction that
-/// would kill the above mentioned register to reduce register pressure.
-bool TwoAddressInstructionPass::
-sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg,
-                     MachineBasicBlock::iterator OldPos) {
-  // FIXME: Shouldn't we be trying to do this before we three-addressify the
-  // instruction?  After this transformation is done, we no longer need
-  // the instruction to be in three-address form.
-
-  // Check if it's safe to move this instruction.
-  bool SeenStore = true; // Be conservative.
-  if (!MI->isSafeToMove(AA, SeenStore))
-    return false;
-
-  unsigned DefReg = 0;
-  SmallSet<unsigned, 4> UseRegs;
-
-  for (const MachineOperand &MO : MI->operands()) {
-    if (!MO.isReg())
-      continue;
-    Register MOReg = MO.getReg();
-    if (!MOReg)
-      continue;
-    if (MO.isUse() && MOReg != SavedReg)
-      UseRegs.insert(MO.getReg());
-    if (!MO.isDef())
-      continue;
-    if (MO.isImplicit())
-      // Don't try to move it if it implicitly defines a register.
-      return false;
-    if (DefReg)
-      // For now, don't move any instructions that define multiple registers.
-      return false;
-    DefReg = MO.getReg();
-  }
-
-  // Find the instruction that kills SavedReg.
-  MachineInstr *KillMI = nullptr;
-  if (LIS) {
-    LiveInterval &LI = LIS->getInterval(SavedReg);
-    assert(LI.end() != LI.begin() &&
-           "Reg should not have empty live interval.");
-
-    SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot();
-    LiveInterval::const_iterator I = LI.find(MBBEndIdx);
-    if (I != LI.end() && I->start < MBBEndIdx)
-      return false;
-
-    --I;
-    KillMI = LIS->getInstructionFromIndex(I->end);
-  }
-  if (!KillMI) {
-    for (MachineOperand &UseMO : MRI->use_nodbg_operands(SavedReg)) {
-      if (!UseMO.isKill())
-        continue;
-      KillMI = UseMO.getParent();
-      break;
-    }
-  }
-
-  // If we find the instruction that kills SavedReg, and it is in an
-  // appropriate location, we can try to sink the current instruction
-  // past it.
-  if (!KillMI || KillMI->getParent() != MBB || KillMI == MI ||
-      MachineBasicBlock::iterator(KillMI) == OldPos || KillMI->isTerminator())
-    return false;
-
-  // If any of the definitions are used by another instruction between the
-  // position and the kill use, then it's not safe to sink it.
-  //
-  // FIXME: This can be sped up if there is an easy way to query whether an
-  // instruction is before or after another instruction. Then we can use
-  // MachineRegisterInfo def / use instead.
-  MachineOperand *KillMO = nullptr;
-  MachineBasicBlock::iterator KillPos = KillMI;
-  ++KillPos;
-
-  unsigned NumVisited = 0;
-  for (MachineInstr &OtherMI : make_range(std::next(OldPos), KillPos)) {
-    // Debug instructions cannot be counted against the limit.
-    if (OtherMI.isDebugInstr())
-      continue;
-    if (NumVisited > 30)  // FIXME: Arbitrary limit to reduce compile time cost.
-      return false;
-    ++NumVisited;
-    for (unsigned i = 0, e = OtherMI.getNumOperands(); i != e; ++i) {
-      MachineOperand &MO = OtherMI.getOperand(i);
-      if (!MO.isReg())
-        continue;
-      Register MOReg = MO.getReg();
-      if (!MOReg)
-        continue;
-      if (DefReg == MOReg)
-        return false;
-
-      if (MO.isKill() || (LIS && isPlainlyKilled(&OtherMI, MOReg, LIS))) {
-        if (&OtherMI == KillMI && MOReg == SavedReg)
-          // Save the operand that kills the register. We want to unset the kill
-          // marker if we can sink MI past it.
-          KillMO = &MO;
-        else if (UseRegs.count(MOReg))
-          // One of the uses is killed before the destination.
-          return false;
-      }
-    }
-  }
-  assert(KillMO && "Didn't find kill");
-
-  if (!LIS) {
-    // Update kill and LV information.
-    KillMO->setIsKill(false);
-    KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI);
-    KillMO->setIsKill(true);
-
-    if (LV)
-      LV->replaceKillInstruction(SavedReg, *KillMI, *MI);
-  }
-
-  // Move instruction to its destination.
-  MBB->remove(MI);
-  MBB->insert(KillPos, MI);
-
-  if (LIS)
-    LIS->handleMove(*MI);
-
-  ++Num3AddrSunk;
-  return true;
-}
-
 /// Return the MachineInstr* if it is the single def of the Reg in current BB.
 static MachineInstr *getSingleDef(unsigned Reg, MachineBasicBlock *BB,
                                   const MachineRegisterInfo *MRI) {
@@ -740,26 +602,15 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi,
 
   LLVM_DEBUG(dbgs() << "2addr: CONVERTING 2-ADDR: " << *mi);
   LLVM_DEBUG(dbgs() << "2addr:         TO 3-ADDR: " << *NewMI);
-  bool Sunk = false;
 
   if (LIS)
     LIS->ReplaceMachineInstrInMaps(*mi, *NewMI);
 
-  if (NewMI->findRegisterUseOperand(RegB, false, TRI))
-    // FIXME: Temporary workaround. If the new instruction doesn't
-    // uses RegB, convertToThreeAddress must have created more
-    // then one instruction.
-    Sunk = sink3AddrInstruction(NewMI, RegB, mi);
-
   MBB->erase(mi); // Nuke the old inst.
 
-  if (!Sunk) {
-    DistanceMap.insert(std::make_pair(NewMI, Dist));
-    mi = NewMI;
-    nmi = std::next(mi);
-  }
-  else
-    SunkInstrs.insert(NewMI);
+  DistanceMap.insert(std::make_pair(NewMI, Dist));
+  mi = NewMI;
+  nmi = std::next(mi);
 
   // Update source and destination register maps.
   SrcRegMap.erase(RegA);
@@ -1700,13 +1551,11 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) {
     SrcRegMap.clear();
     DstRegMap.clear();
     Processed.clear();
-    SunkInstrs.clear();
     for (MachineBasicBlock::iterator mi = MBB->begin(), me = MBB->end();
          mi != me; ) {
       MachineBasicBlock::iterator nmi = std::next(mi);
-      // Don't revisit an instruction previously converted by target. It may
-      // contain undef register operands (%noreg), which are not handled.
-      if (mi->isDebugInstr() || SunkInstrs.count(&*mi)) {
+      // Skip debug instructions.
+      if (mi->isDebugInstr()) {
         mi = nmi;
         continue;
       }
diff --git a/llvm/test/CodeGen/X86/callbr-asm-sink.ll b/llvm/test/CodeGen/X86/callbr-asm-sink.ll
new file mode 100644
index 0000000000000..758ac37f8ba43
--- /dev/null
+++ b/llvm/test/CodeGen/X86/callbr-asm-sink.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+;; Verify that the machine instructions generated from the first
+;; getelementptr don't get sunk below the callbr. (Reduced from a bug
+;; report.)
+
+%struct1 = type { i8*, i32 }
+
+define void @klist_dec_and_del(%struct1*) {
+; CHECK-LABEL: klist_dec_and_del:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    leaq 8(%rdi), %rax
+; CHECK-NEXT:    #APP
+; CHECK-NEXT:    # 8(%rdi) .Ltmp0
+; CHECK-NEXT:    #NO_APP
+; CHECK-NEXT:  # %bb.2:
+; CHECK-NEXT:    retq
+; CHECK-NEXT:  .Ltmp0: # Block address taken
+; CHECK-NEXT:  .LBB0_1:
+; CHECK-NEXT:    movq $0, -8(%rax)
+; CHECK-NEXT:    retq
+  %2 = getelementptr inbounds %struct1, %struct1* %0, i64 0, i32 1
+  callbr void asm sideeffect "# $0 $1", "*m,X,~{memory},~{dirflag},~{fpsr},~{flags}"(i32* %2, i8* blockaddress(@klist_dec_and_del, %3))
+          to label %6 [label %3]
+
+3:
+  %4 = getelementptr i32, i32* %2, i64 -2
+  %5 = bitcast i32* %4 to i8**
+  store i8* null, i8** %5, align 8
+  br label %6
+
+6:
+  ret void
+}
diff --git a/llvm/test/CodeGen/X86/masked-iv-unsafe.ll b/llvm/test/CodeGen/X86/masked-iv-unsafe.ll
index 76f2ad22b44a2..e4c82faa90d8f 100644
--- a/llvm/test/CodeGen/X86/masked-iv-unsafe.ll
+++ b/llvm/test/CodeGen/X86/masked-iv-unsafe.ll
@@ -402,9 +402,9 @@ return:
 define void @another_count_down_signed(double* %d, i64 %n) nounwind {
 ; CHECK-LABEL: another_count_down_signed:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    movq %rsi, %rax
-; CHECK-NEXT:    shlq $24, %rax
-; CHECK-NEXT:    leaq -10(%rsi), %rcx
+; CHECK-NEXT:    leaq -10(%rsi), %rax
+; CHECK-NEXT:    movq %rsi, %rcx
+; CHECK-NEXT:    shlq $24, %rcx
 ; CHECK-NEXT:    shlq $8, %rsi
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
 ; CHECK-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
@@ -417,17 +417,17 @@ define void @another_count_down_signed(double* %d, i64 %n) nounwind {
 ; CHECK-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
 ; CHECK-NEXT:    mulsd %xmm0, %xmm3
 ; CHECK-NEXT:    movsd %xmm3, (%rdi,%rdx,8)
-; CHECK-NEXT:    movq %rax, %rdx
+; CHECK-NEXT:    movq %rcx, %rdx
 ; CHECK-NEXT:    sarq $24, %rdx
 ; CHECK-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
 ; CHECK-NEXT:    mulsd %xmm1, %xmm3
 ; CHECK-NEXT:    movsd %xmm3, (%rdi,%rdx,8)
 ; CHECK-NEXT:    movsd {{.*#+}} xmm3 = mem[0],zero
 ; CHECK-NEXT:    mulsd %xmm2, %xmm3
-; CHECK-NEXT:    movsd %xmm3, 80(%rdi,%rcx,8)
-; CHECK-NEXT:    addq $-16777216, %rax # imm = 0xFF000000
+; CHECK-NEXT:    movsd %xmm3, 80(%rdi,%rax,8)
+; CHECK-NEXT:    addq $-16777216, %rcx # imm = 0xFF000000
 ; CHECK-NEXT:    addq $-256, %rsi
-; CHECK-NEXT:    decq %rcx
+; CHECK-NEXT:    decq %rax
 ; CHECK-NEXT:    jne .LBB7_1
 ; CHECK-NEXT:  # %bb.2: # %return
 ; CHECK-NEXT:    retq
diff --git a/llvm/test/CodeGen/X86/reverse_branches.ll b/llvm/test/CodeGen/X86/reverse_branches.ll
index 170fc6a762807..7a9ff8452d1d2 100644
--- a/llvm/test/CodeGen/X86/reverse_branches.ll
+++ b/llvm/test/CodeGen/X86/reverse_branches.ll
@@ -48,25 +48,25 @@ define i32 @test_branches_order() uwtable ssp {
 ; CHECK-NEXT:    jg LBB0_7
 ; CHECK-NEXT:  ## %bb.2: ## %for.cond1.preheader
 ; CHECK-NEXT:    ## in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT:    movl $-1, %r13d
-; CHECK-NEXT:    movq %r15, %rbx
-; CHECK-NEXT:    movq %r14, %rbp
+; CHECK-NEXT:    movl $-1, %ebp
+; CHECK-NEXT:    movq %r15, %rdi
+; CHECK-NEXT:    movq %r14, %rbx
 ; CHECK-NEXT:    .p2align 4, 0x90
 ; CHECK-NEXT:  LBB0_3: ## %for.cond1
 ; CHECK-NEXT:    ## Parent Loop BB0_1 Depth=1
 ; CHECK-NEXT:    ## => This Inner Loop Header: Depth=2
-; CHECK-NEXT:    incl %r13d
-; CHECK-NEXT:    cmpl $999, %r13d ## imm = 0x3E7
+; CHECK-NEXT:    incl %ebp
+; CHECK-NEXT:    cmpl $999, %ebp ## imm = 0x3E7
 ; CHECK-NEXT:    jg LBB0_6
 ; CHECK-NEXT:  ## %bb.4: ## %for.body3
 ; CHECK-NEXT:    ## in Loop: Header=BB0_3 Depth=2
-; CHECK-NEXT:    addq $1002, %rbp ## imm = 0x3EA
-; CHECK-NEXT:    movq %rbx, %rdi
-; CHECK-NEXT:    addq $1001, %rbx ## imm = 0x3E9
+; CHECK-NEXT:    addq $1002, %rbx ## imm = 0x3EA
+; CHECK-NEXT:    leaq 1001(%rdi), %r13
 ; CHECK-NEXT:    movl $1000, %edx ## imm = 0x3E8
 ; CHECK-NEXT:    movl $120, %esi
 ; CHECK-NEXT:    callq _memchr
-; CHECK-NEXT:    cmpq %rax, %rbp
+; CHECK-NEXT:    cmpq %rax, %rbx
+; CHECK-NEXT:    movq %r13, %rdi
 ; CHECK-NEXT:    je LBB0_3
 ; CHECK-NEXT:    jmp LBB0_5
 ; CHECK-NEXT:  LBB0_7: ## %for.end11
diff --git a/llvm/test/CodeGen/X86/rotate-extract.ll b/llvm/test/CodeGen/X86/rotate-extract.ll
index 9ef29c7883d4d..41003c9d335d0 100644
--- a/llvm/test/CodeGen/X86/rotate-extract.ll
+++ b/llvm/test/CodeGen/X86/rotate-extract.ll
@@ -306,9 +306,9 @@ define i32 @extract_add_1_comut(i32 %i) nounwind {
 define i32 @no_extract_add_1(i32 %i) nounwind {
 ; X86-LABEL: no_extract_add_1:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    leal (%ecx,%ecx), %eax
-; X86-NEXT:    shrl $27, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    leal (%eax,%eax), %ecx
+; X86-NEXT:    shrl $27, %eax
 ; X86-NEXT:    orl %ecx, %eax
 ; X86-NEXT:    retl
 ;
diff --git a/llvm/test/CodeGen/X86/twoaddr-lea.ll b/llvm/test/CodeGen/X86/twoaddr-lea.ll
index 077cf805bcb15..716d20d63c443 100644
--- a/llvm/test/CodeGen/X86/twoaddr-lea.ll
+++ b/llvm/test/CodeGen/X86/twoaddr-lea.ll
@@ -68,8 +68,9 @@ bb2:
   br label %bb6
 
 bb3:
-; CHECK: subl %e[[REG0:[a-z0-9]+]],
-; CHECK: addq $4, %r[[REG0]]
+; CHECK: LBB3_3:
+; CHECK: addq $4, %r
+; CHECK: subl %e
   %tmp14 = phi i64 [ %tmp15, %bb5 ], [ 0, %bb1 ]
   %tmp15 = add nuw i64 %tmp14, 4
   %tmp16 = trunc i64 %tmp14 to i32
diff --git a/llvm/test/CodeGen/X86/twoaddr-pass-sink.ll b/llvm/test/CodeGen/X86/twoaddr-pass-sink.ll
deleted file mode 100644
index a06eaec894caa..0000000000000
--- a/llvm/test/CodeGen/X86/twoaddr-pass-sink.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; REQUIRES: asserts
-; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -stats 2>&1 | grep "Number of 3-address instructions sunk"
-
-define void @t2(<2 x i64>* %vDct, <2 x i64>* %vYp, i8* %skiplist, <2 x i64> %a1) nounwind  {
-entry:
-	%tmp25 = bitcast <2 x i64> %a1 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	br label %bb
-bb:		; preds = %bb, %entry
-	%skiplist_addr.0.rec = phi i32 [ 0, %entry ], [ %indvar.next, %bb ]		; <i32> [#uses=3]
-	%vYp_addr.0.rec = shl i32 %skiplist_addr.0.rec, 3		; <i32> [#uses=3]
-	%vDct_addr.0 = getelementptr <2 x i64>, <2 x i64>* %vDct, i32 %vYp_addr.0.rec		; <<2 x i64>*> [#uses=1]
-	%vYp_addr.0 = getelementptr <2 x i64>, <2 x i64>* %vYp, i32 %vYp_addr.0.rec		; <<2 x i64>*> [#uses=1]
-	%skiplist_addr.0 = getelementptr i8, i8* %skiplist, i32 %skiplist_addr.0.rec		; <i8*> [#uses=1]
-	%vDct_addr.0.sum43 = or i32 %vYp_addr.0.rec, 1		; <i32> [#uses=1]
-	%tmp7 = getelementptr <2 x i64>, <2 x i64>* %vDct, i32 %vDct_addr.0.sum43		; <<2 x i64>*> [#uses=1]
-	%tmp8 = load <2 x i64>, <2 x i64>* %tmp7, align 16		; <<2 x i64>> [#uses=1]
-	%tmp11 = load <2 x i64>, <2 x i64>* %vDct_addr.0, align 16		; <<2 x i64>> [#uses=1]
-	%tmp13 = bitcast <2 x i64> %tmp8 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%tmp15 = bitcast <2 x i64> %tmp11 to <8 x i16>		; <<8 x i16>> [#uses=1]
-	%tmp16 = shufflevector <8 x i16> %tmp15, <8 x i16> %tmp13, <8 x i32> < i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11 >		; <<8 x i16>> [#uses=1]
-	%tmp26 = mul <8 x i16> %tmp25, %tmp16		; <<8 x i16>> [#uses=1]
-	%tmp27 = bitcast <8 x i16> %tmp26 to <2 x i64>		; <<2 x i64>> [#uses=1]
-	store <2 x i64> %tmp27, <2 x i64>* %vYp_addr.0, align 16
-	%tmp37 = load i8, i8* %skiplist_addr.0, align 1		; <i8> [#uses=1]
-	%tmp38 = icmp eq i8 %tmp37, 0		; <i1> [#uses=1]
-	%indvar.next = add i32 %skiplist_addr.0.rec, 1		; <i32> [#uses=1]
-	br i1 %tmp38, label %return, label %bb
-return:		; preds = %bb
-	ret void
-}

From 569868f6b7f390bb6ceffb4fceb70ff0019c5bea Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 16 Jul 2020 12:08:54 +0100
Subject: [PATCH 483/771] [SCCP] Only track returns of functions with non-void
 ret ty (NFC).

There is no need to add functions with void return types to the set of
tracked return values. This does not change functionality, because we
such functions do not have return values and we never update or access
them.
---
 llvm/lib/Transforms/Scalar/SCCP.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index 5ebd3b71fe78c..2a5fcfc092685 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -233,7 +233,7 @@ class SCCPSolver : public InstVisitor<SCCPSolver> {
       for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i)
         TrackedMultipleRetVals.insert(
             std::make_pair(std::make_pair(F, i), ValueLatticeElement()));
-    } else
+    } else if (!F->getReturnType()->isVoidTy())
       TrackedRetVals.insert(std::make_pair(F, ValueLatticeElement()));
   }
 

From 3a624c327adde62d075a4477e9bb9e6a2c186731 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 16 Jul 2020 13:11:30 +0100
Subject: [PATCH 484/771] [Matrix] Add the matrix test from D83570. NFC.

---
 .../test/CodeGen/matrix-lowering-opt-levels.c | 20 +++++++++++++++++++
 1 file changed, 20 insertions(+)
 create mode 100644 clang/test/CodeGen/matrix-lowering-opt-levels.c

diff --git a/clang/test/CodeGen/matrix-lowering-opt-levels.c b/clang/test/CodeGen/matrix-lowering-opt-levels.c
new file mode 100644
index 0000000000000..8f86cc158f677
--- /dev/null
+++ b/clang/test/CodeGen/matrix-lowering-opt-levels.c
@@ -0,0 +1,20 @@
+// RUN: %clang -O0 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
+// RUN: %clang -O1 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
+// RUN: %clang -O2 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
+// RUN: %clang -O3 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
+// RUN: %clang -Ofast -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
+// RUN: %clang -Os -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
+// RUN: %clang -Oz -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
+
+// Smoke test that the matrix intrinsics are lowered at any optimisation level.
+
+typedef float m4x4_t __attribute__((matrix_type(4, 4)));
+
+m4x4_t f(m4x4_t a, m4x4_t b, m4x4_t c) {
+  //
+  // CHECK-LABEL: f(
+  // CHECK-NOT:     @llvm.matrix
+  // CHECK:       }
+  //
+  return a + b * c;
+}

From fc2317f0f5f5872b12ca1622068763777fa846d1 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 16 Jul 2020 13:10:09 +0100
Subject: [PATCH 485/771] [PowerPC] Precommit 64-bit funnel shift test cases

---
 llvm/test/CodeGen/PowerPC/funnel-shift.ll | 28 +++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/llvm/test/CodeGen/PowerPC/funnel-shift.ll b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
index caaa4fa0db852..48a10eda1cf1d 100644
--- a/llvm/test/CodeGen/PowerPC/funnel-shift.ll
+++ b/llvm/test/CodeGen/PowerPC/funnel-shift.ll
@@ -29,6 +29,20 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) {
   ret i32 %f
 }
 
+define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) {
+; CHECK-LABEL: fshl_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi. 5, 5, 63
+; CHECK-NEXT:    subfic 6, 5, 64
+; CHECK-NEXT:    sld 5, 3, 5
+; CHECK-NEXT:    srd 4, 4, 6
+; CHECK-NEXT:    or 4, 5, 4
+; CHECK-NEXT:    iseleq 3, 3, 4
+; CHECK-NEXT:    blr
+  %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
+  ret i64 %f
+}
+
 ; Verify that weird types are minimally supported.
 declare i37 @llvm.fshl.i37(i37, i37, i37)
 define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) {
@@ -135,6 +149,20 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) {
   ret i32 %f
 }
 
+define i64 @fshr_i64(i64 %x, i64 %y, i64 %z) {
+; CHECK-LABEL: fshr_i64:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    andi. 5, 5, 63
+; CHECK-NEXT:    subfic 6, 5, 64
+; CHECK-NEXT:    srd 5, 4, 5
+; CHECK-NEXT:    sld 3, 3, 6
+; CHECK-NEXT:    or 3, 3, 5
+; CHECK-NEXT:    iseleq 3, 4, 3
+; CHECK-NEXT:    blr
+  %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
+  ret i64 %f
+}
+
 ; Verify that weird types are minimally supported.
 declare i37 @llvm.fshr.i37(i37, i37, i37)
 define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) {

From 7358a1104a02d5f5e645ebff0530787453ae98da Mon Sep 17 00:00:00 2001
From: Joachim Protze <protze@itc.rwth-aachen.de>
Date: Thu, 16 Jul 2020 16:15:21 +0200
Subject: [PATCH 486/771] [TSan] Optimize handling of racy address

This patch splits the handling of racy address and racy stack into separate
functions. If a race was already reported for the address, we can avoid the
cost for collecting the involved stacks.

This patch also removes the race condition in storing the racy address / racy
stack. This race condition allowed all threads to report the race.

This patch changes the transitive suppression of reports. Previously
suppression could transitively chain memory location and racy stacks.
Now racy memory and racy stack are separate suppressions.

Commit again, now with fixed tests.

Reviewed by: dvyukov

Differential Revision: https://reviews.llvm.org/D83625
---
 compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp  | 103 +++++++++---------
 .../tsan/tests/rtl/tsan_test_util_posix.cpp   |  51 ++++++---
 2 files changed, 87 insertions(+), 67 deletions(-)

diff --git a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
index 949beac1c5513..3354546c2a107 100644
--- a/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
+++ b/compiler-rt/lib/tsan/rtl/tsan_rtl_report.cpp
@@ -439,65 +439,61 @@ void RestoreStack(int tid, const u64 epoch, VarSizeStackTrace *stk,
   ExtractTagFromStack(stk, tag);
 }
 
-static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
-                             uptr addr_min, uptr addr_max) {
-  bool equal_stack = false;
-  RacyStacks hash;
-  bool equal_address = false;
-  RacyAddress ra0 = {addr_min, addr_max};
-  {
-    ReadLock lock(&ctx->racy_mtx);
-    if (flags()->suppress_equal_stacks) {
-      hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-      hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
-      for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
-        if (hash == ctx->racy_stacks[i]) {
-          VPrintf(2,
-              "ThreadSanitizer: suppressing report as doubled (stack)\n");
-          equal_stack = true;
-          break;
-        }
-      }
-    }
-    if (flags()->suppress_equal_addresses) {
-      for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
-        RacyAddress ra2 = ctx->racy_addresses[i];
-        uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
-        uptr minend = min(ra0.addr_max, ra2.addr_max);
-        if (maxbeg < minend) {
-          VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
-          equal_address = true;
-          break;
-        }
-      }
+static bool FindRacyStacks(const RacyStacks &hash) {
+  for (uptr i = 0; i < ctx->racy_stacks.Size(); i++) {
+    if (hash == ctx->racy_stacks[i]) {
+      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (stack)\n");
+      return true;
     }
   }
-  if (!equal_stack && !equal_address)
+  return false;
+}
+
+static bool HandleRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2]) {
+  if (!flags()->suppress_equal_stacks)
     return false;
-  if (!equal_stack) {
-    Lock lock(&ctx->racy_mtx);
-    ctx->racy_stacks.PushBack(hash);
-  }
-  if (!equal_address) {
-    Lock lock(&ctx->racy_mtx);
-    ctx->racy_addresses.PushBack(ra0);
+  RacyStacks hash;
+  hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
+  hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (FindRacyStacks(hash))
+      return true;
   }
-  return true;
+  Lock lock(&ctx->racy_mtx);
+  if (FindRacyStacks(hash))
+    return true;
+  ctx->racy_stacks.PushBack(hash);
+  return false;
 }
 
-static void AddRacyStacks(ThreadState *thr, VarSizeStackTrace traces[2],
-                          uptr addr_min, uptr addr_max) {
-  Lock lock(&ctx->racy_mtx);
-  if (flags()->suppress_equal_stacks) {
-    RacyStacks hash;
-    hash.hash[0] = md5_hash(traces[0].trace, traces[0].size * sizeof(uptr));
-    hash.hash[1] = md5_hash(traces[1].trace, traces[1].size * sizeof(uptr));
-    ctx->racy_stacks.PushBack(hash);
+static bool FindRacyAddress(const RacyAddress &ra0) {
+  for (uptr i = 0; i < ctx->racy_addresses.Size(); i++) {
+    RacyAddress ra2 = ctx->racy_addresses[i];
+    uptr maxbeg = max(ra0.addr_min, ra2.addr_min);
+    uptr minend = min(ra0.addr_max, ra2.addr_max);
+    if (maxbeg < minend) {
+      VPrintf(2, "ThreadSanitizer: suppressing report as doubled (addr)\n");
+      return true;
+    }
   }
-  if (flags()->suppress_equal_addresses) {
-    RacyAddress ra0 = {addr_min, addr_max};
-    ctx->racy_addresses.PushBack(ra0);
+  return false;
+}
+
+static bool HandleRacyAddress(ThreadState *thr, uptr addr_min, uptr addr_max) {
+  if (!flags()->suppress_equal_addresses)
+    return false;
+  RacyAddress ra0 = {addr_min, addr_max};
+  {
+    ReadLock lock(&ctx->racy_mtx);
+    if (FindRacyAddress(ra0))
+      return true;
   }
+  Lock lock(&ctx->racy_mtx);
+  if (FindRacyAddress(ra0))
+    return true;
+  ctx->racy_addresses.PushBack(ra0);
+  return false;
 }
 
 bool OutputReport(ThreadState *thr, const ScopedReport &srep) {
@@ -618,6 +614,8 @@ void ReportRace(ThreadState *thr) {
     if (IsExpectedReport(addr_min, addr_max - addr_min))
       return;
   }
+  if (HandleRacyAddress(thr, addr_min, addr_max))
+    return;
 
   ReportType typ = ReportTypeRace;
   if (thr->is_vptr_access && freed)
@@ -668,7 +666,7 @@ void ReportRace(ThreadState *thr) {
   if (IsFiredSuppression(ctx, typ, traces[1]))
     return;
 
-  if (HandleRacyStacks(thr, traces, addr_min, addr_max))
+  if (HandleRacyStacks(thr, traces))
     return;
 
   // If any of the accesses has a tag, treat this as an "external" race.
@@ -711,7 +709,6 @@ void ReportRace(ThreadState *thr) {
   if (!OutputReport(thr, rep))
     return;
 
-  AddRacyStacks(thr, traces, addr_min, addr_max);
 }
 
 void PrintCurrentStack(ThreadState *thr, uptr pc) {
diff --git a/compiler-rt/lib/tsan/tests/rtl/tsan_test_util_posix.cpp b/compiler-rt/lib/tsan/tests/rtl/tsan_test_util_posix.cpp
index a24d04f470073..733e5d282a379 100644
--- a/compiler-rt/lib/tsan/tests/rtl/tsan_test_util_posix.cpp
+++ b/compiler-rt/lib/tsan/tests/rtl/tsan_test_util_posix.cpp
@@ -27,6 +27,8 @@
 #include <unistd.h>
 #include <errno.h>
 
+#define CALLERPC (__builtin_return_address(0))
+
 using namespace __tsan;
 
 static __thread bool expect_report;
@@ -249,22 +251,42 @@ void ScopedThread::Impl::HandleEvent(Event *ev) {
   switch (ev->type) {
   case Event::READ:
   case Event::WRITE: {
-    void (*tsan_mop)(void *addr) = 0;
+    void (*tsan_mop)(void *addr, void *pc) = 0;
     if (ev->type == Event::READ) {
       switch (ev->arg /*size*/) {
-        case 1: tsan_mop = __tsan_read1; break;
-        case 2: tsan_mop = __tsan_read2; break;
-        case 4: tsan_mop = __tsan_read4; break;
-        case 8: tsan_mop = __tsan_read8; break;
-        case 16: tsan_mop = __tsan_read16; break;
+        case 1:
+          tsan_mop = __tsan_read1_pc;
+          break;
+        case 2:
+          tsan_mop = __tsan_read2_pc;
+          break;
+        case 4:
+          tsan_mop = __tsan_read4_pc;
+          break;
+        case 8:
+          tsan_mop = __tsan_read8_pc;
+          break;
+        case 16:
+          tsan_mop = __tsan_read16_pc;
+          break;
       }
     } else {
       switch (ev->arg /*size*/) {
-        case 1: tsan_mop = __tsan_write1; break;
-        case 2: tsan_mop = __tsan_write2; break;
-        case 4: tsan_mop = __tsan_write4; break;
-        case 8: tsan_mop = __tsan_write8; break;
-        case 16: tsan_mop = __tsan_write16; break;
+        case 1:
+          tsan_mop = __tsan_write1_pc;
+          break;
+        case 2:
+          tsan_mop = __tsan_write2_pc;
+          break;
+        case 4:
+          tsan_mop = __tsan_write4_pc;
+          break;
+        case 8:
+          tsan_mop = __tsan_write8_pc;
+          break;
+        case 16:
+          tsan_mop = __tsan_write16_pc;
+          break;
       }
     }
     CHECK_NE(tsan_mop, 0);
@@ -274,7 +296,7 @@ void ScopedThread::Impl::HandleEvent(Event *ev) {
     const int ErrCode = ECHRNG;
 #endif
     errno = ErrCode;
-    tsan_mop(ev->ptr);
+    tsan_mop(ev->ptr, (void *)ev->arg2);
     CHECK_EQ(ErrCode, errno);  // In no case must errno be changed.
     break;
   }
@@ -327,7 +349,7 @@ void ScopedThread::Impl::HandleEvent(Event *ev) {
 }
 
 void *ScopedThread::Impl::ScopedThreadCallback(void *arg) {
-  __tsan_func_entry(__builtin_return_address(0));
+  __tsan_func_entry(CALLERPC);
   Impl *impl = (Impl*)arg;
   for (;;) {
     Event* ev = (Event*)atomic_load(&impl->event, memory_order_acquire);
@@ -392,7 +414,8 @@ void ScopedThread::Detach() {
 
 void ScopedThread::Access(void *addr, bool is_write,
                           int size, bool expect_race) {
-  Event event(is_write ? Event::WRITE : Event::READ, addr, size);
+  Event event(is_write ? Event::WRITE : Event::READ, addr, size,
+              (uptr)CALLERPC);
   if (expect_race)
     event.ExpectReport(ReportTypeRace);
   impl_->send(&event);

From 6850033ca68b42a0cb5229f71d90c58b86c940ce Mon Sep 17 00:00:00 2001
From: Petar Avramovic <Petar.Avramovic@amd.com>
Date: Thu, 16 Jul 2020 16:31:57 +0200
Subject: [PATCH 487/771] AMDGPU/GlobalISel: Legalize s64->s16
 G_SITOFP/G_UITOFP

Add widenScalar for TypeIdx == 0 for G_SITOFP/G_UITOFP.
Legailize, using widenScalar, as s64->s32 G_SITOFP/G_UITOFP
followed by s32->s16 G_FPTRUNC.

Differential Revision: https://reviews.llvm.org/D83880
---
 .../CodeGen/GlobalISel/LegalizerHelper.cpp    |  20 +-
 .../lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp |   1 +
 .../AMDGPU/GlobalISel/legalize-sitofp.mir     | 259 ++++++++++++++++++
 .../AMDGPU/GlobalISel/legalize-uitofp.mir     | 195 +++++++++++++
 4 files changed, 467 insertions(+), 8 deletions(-)

diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
index da519f99ad7e8..5b6937e471cdc 100644
--- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp
@@ -1908,21 +1908,25 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
     Observer.changedInstr(MI);
     return Legalized;
   case TargetOpcode::G_SITOFP:
-    if (TypeIdx != 1)
-      return UnableToLegalize;
     Observer.changingInstr(MI);
-    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+
+    if (TypeIdx == 0)
+      widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+    else
+      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_SEXT);
+
     Observer.changedInstr(MI);
     return Legalized;
-
   case TargetOpcode::G_UITOFP:
-    if (TypeIdx != 1)
-      return UnableToLegalize;
     Observer.changingInstr(MI);
-    widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+
+    if (TypeIdx == 0)
+      widenScalarDst(MI, WideTy, 0, TargetOpcode::G_FPTRUNC);
+    else
+      widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ZEXT);
+
     Observer.changedInstr(MI);
     return Legalized;
-
   case TargetOpcode::G_LOAD:
   case TargetOpcode::G_SEXTLOAD:
   case TargetOpcode::G_ZEXTLOAD:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
index 2976794b49c3b..92ff345cd78c4 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp
@@ -648,6 +648,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
   if (ST.has16BitInsts())
     IToFP.legalFor({{S16, S16}});
   IToFP.clampScalar(1, S32, S64)
+       .minScalar(0, S32)
        .scalarize(0)
        .widenScalarToNextPow2(1);
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
index 276029cd1fdd3..53a90c318be8f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-sitofp.mir
@@ -535,3 +535,262 @@ body: |
     %2:_(s32) = G_SITOFP %1
     $vgpr0 = COPY %2
 ...
+
+---
+name: test_sitofp_s64_to_s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; GFX6-LABEL: name: test_sitofp_s64_to_s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX6: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64)
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32)
+    ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]]
+    ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX6: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]]
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[ADD]]
+    ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT3]](s32)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: test_sitofp_s64_to_s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[COPY]], [[C]](s32)
+    ; GFX8: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](s64)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV]], [[UV2]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV1]], [[UV3]], [[UADDO1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64)
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32)
+    ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX8: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]]
+    ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX8: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]]
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[ADD]]
+    ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT3]](s32)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s16) = G_SITOFP %0
+    %2:_(s32) = G_ANYEXT %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: test_sitofp_v2s64_to_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: test_sitofp_v2s64_to_v2s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX6: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX6: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX6: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX6: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV2]], [[UV4]]
+    ; GFX6: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV5]], [[UADDO1]]
+    ; GFX6: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX6: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX6: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64)
+    ; GFX6: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]]
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32)
+    ; GFX6: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX6: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]]
+    ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX6: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]]
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[ADD]]
+    ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT3]](s32)
+    ; GFX6: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX6: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX6: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX6: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV6]], [[UV8]]
+    ; GFX6: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV9]], [[UADDO3]]
+    ; GFX6: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX6: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX6: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR1]](s64)
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF1]]
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR1]](s64), [[C2]]
+    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[SUB1]], [[C1]]
+    ; GFX6: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[XOR1]], [[CTLZ_ZERO_UNDEF1]](s32)
+    ; GFX6: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL2]], [[C4]]
+    ; GFX6: [[AND4:%[0-9]+]]:_(s64) = G_AND [[AND3]], [[C5]]
+    ; GFX6: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C6]](s32)
+    ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SELECT4]], [[C7]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64)
+    ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[TRUNC1]]
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND4]](s64), [[C8]]
+    ; GFX6: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s64), [[C8]]
+    ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]]
+    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[AND5]], [[C1]]
+    ; GFX6: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[C9]], [[SELECT5]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[OR1]], [[SELECT6]]
+    ; GFX6: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[ADD1]]
+    ; GFX6: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR1]](s64), [[C2]]
+    ; GFX6: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[FNEG1]], [[ADD1]]
+    ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT7]](s32)
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
+    ; GFX6: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX6: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; GFX8-LABEL: name: test_sitofp_v2s64_to_v2s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 63
+    ; GFX8: [[ASHR:%[0-9]+]]:_(s64) = G_ASHR [[UV]], [[C]](s32)
+    ; GFX8: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV]](s64)
+    ; GFX8: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR]](s64)
+    ; GFX8: [[UADDO:%[0-9]+]]:_(s32), [[UADDO1:%[0-9]+]]:_(s1) = G_UADDO [[UV2]], [[UV4]]
+    ; GFX8: [[UADDE:%[0-9]+]]:_(s32), [[UADDE1:%[0-9]+]]:_(s1) = G_UADDE [[UV3]], [[UV5]], [[UADDO1]]
+    ; GFX8: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO]](s32), [[UADDE]](s32)
+    ; GFX8: [[XOR:%[0-9]+]]:_(s64) = G_XOR [[MV]], [[ASHR]]
+    ; GFX8: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR]](s64)
+    ; GFX8: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR]](s64), [[C2]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C1]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[XOR]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C4]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C5]]
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C6]](s32)
+    ; GFX8: [[C7:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C7]](s32)
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX8: [[C8:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C8]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C8]]
+    ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C9]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C1]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C9]], [[SELECT1]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX8: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[ADD]]
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR]](s64), [[C2]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[FNEG]], [[ADD]]
+    ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT3]](s32)
+    ; GFX8: [[ASHR1:%[0-9]+]]:_(s64) = G_ASHR [[UV1]], [[C]](s32)
+    ; GFX8: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[UV1]](s64)
+    ; GFX8: [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[ASHR1]](s64)
+    ; GFX8: [[UADDO2:%[0-9]+]]:_(s32), [[UADDO3:%[0-9]+]]:_(s1) = G_UADDO [[UV6]], [[UV8]]
+    ; GFX8: [[UADDE2:%[0-9]+]]:_(s32), [[UADDE3:%[0-9]+]]:_(s1) = G_UADDE [[UV7]], [[UV9]], [[UADDO3]]
+    ; GFX8: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[UADDO2]](s32), [[UADDE2]](s32)
+    ; GFX8: [[XOR1:%[0-9]+]]:_(s64) = G_XOR [[MV1]], [[ASHR1]]
+    ; GFX8: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[XOR1]](s64)
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C3]], [[CTLZ_ZERO_UNDEF1]]
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[XOR1]](s64), [[C2]]
+    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[SUB1]], [[C1]]
+    ; GFX8: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[XOR1]], [[CTLZ_ZERO_UNDEF1]](s32)
+    ; GFX8: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL2]], [[C4]]
+    ; GFX8: [[AND4:%[0-9]+]]:_(s64) = G_AND [[AND3]], [[C5]]
+    ; GFX8: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C6]](s32)
+    ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SELECT4]], [[C7]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64)
+    ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[TRUNC1]]
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND4]](s64), [[C8]]
+    ; GFX8: [[ICMP6:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s64), [[C8]]
+    ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C9]]
+    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP6]](s1), [[AND5]], [[C1]]
+    ; GFX8: [[SELECT6:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[C9]], [[SELECT5]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[OR1]], [[SELECT6]]
+    ; GFX8: [[FNEG1:%[0-9]+]]:_(s32) = G_FNEG [[ADD1]]
+    ; GFX8: [[ICMP7:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[ASHR1]](s64), [[C2]]
+    ; GFX8: [[SELECT7:%[0-9]+]]:_(s32) = G_SELECT [[ICMP7]](s1), [[FNEG1]], [[ADD1]]
+    ; GFX8: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[SELECT7]](s32)
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
+    ; GFX8: [[C10:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C10]](s32)
+    ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x s16>) = G_SITOFP %0
+    $vgpr0 = COPY %1
+...
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
index a0cd0d2cc0d0b..214900c18ed96 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-uitofp.mir
@@ -479,3 +479,198 @@ body: |
     %2:_(s32) = G_UITOFP %1
     $vgpr0 = COPY %2
 ...
+
+---
+name: test_uitofp_s64_to_s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+
+    ; GFX6-LABEL: name: test_uitofp_s64_to_s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[C1]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C5]](s32)
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32)
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]]
+    ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD]](s32)
+    ; GFX6: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
+    ; GFX6: $vgpr0 = COPY [[ANYEXT]](s32)
+    ; GFX8-LABEL: name: test_uitofp_s64_to_s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[COPY]](s64)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[COPY]](s64), [[C1]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[COPY]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C5]](s32)
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32)
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]]
+    ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD]](s32)
+    ; GFX8: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
+    ; GFX8: $vgpr0 = COPY [[ANYEXT]](s32)
+    %0:_(s64) = COPY $vgpr0_vgpr1
+    %1:_(s16) = G_UITOFP %0
+    %2:_(s32) = G_ANYEXT %1
+    $vgpr0 = COPY %2
+...
+
+---
+name: test_sitofp_v2s64_to_v2s16
+body: |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3
+
+    ; GFX6-LABEL: name: test_sitofp_v2s64_to_v2s16
+    ; GFX6: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX6: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX6: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX6: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX6: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s64)
+    ; GFX6: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX6: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX6: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[C1]]
+    ; GFX6: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]]
+    ; GFX6: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX6: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX6: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]]
+    ; GFX6: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX6: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]]
+    ; GFX6: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX6: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C5]](s32)
+    ; GFX6: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX6: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32)
+    ; GFX6: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX6: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX6: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX6: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]]
+    ; GFX6: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]]
+    ; GFX6: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX6: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
+    ; GFX6: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]]
+    ; GFX6: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]]
+    ; GFX6: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX6: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD]](s32)
+    ; GFX6: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s64)
+    ; GFX6: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF1]]
+    ; GFX6: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[C1]]
+    ; GFX6: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB1]], [[C]]
+    ; GFX6: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[CTLZ_ZERO_UNDEF1]](s32)
+    ; GFX6: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL2]], [[C3]]
+    ; GFX6: [[AND4:%[0-9]+]]:_(s64) = G_AND [[AND3]], [[C4]]
+    ; GFX6: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C5]](s32)
+    ; GFX6: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SELECT3]], [[C6]](s32)
+    ; GFX6: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64)
+    ; GFX6: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[TRUNC1]]
+    ; GFX6: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND4]](s64), [[C7]]
+    ; GFX6: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s64), [[C7]]
+    ; GFX6: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]]
+    ; GFX6: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[AND5]], [[C]]
+    ; GFX6: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C8]], [[SELECT4]]
+    ; GFX6: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[OR1]], [[SELECT5]]
+    ; GFX6: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD1]](s32)
+    ; GFX6: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
+    ; GFX6: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
+    ; GFX6: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX6: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX6: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX6: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX6: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    ; GFX8-LABEL: name: test_sitofp_v2s64_to_v2s16
+    ; GFX8: [[COPY:%[0-9]+]]:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    ; GFX8: [[UV:%[0-9]+]]:_(s64), [[UV1:%[0-9]+]]:_(s64) = G_UNMERGE_VALUES [[COPY]](<2 x s64>)
+    ; GFX8: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+    ; GFX8: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 0
+    ; GFX8: [[CTLZ_ZERO_UNDEF:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV]](s64)
+    ; GFX8: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 190
+    ; GFX8: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF]]
+    ; GFX8: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV]](s64), [[C1]]
+    ; GFX8: [[SELECT:%[0-9]+]]:_(s32) = G_SELECT [[ICMP]](s1), [[SUB]], [[C]]
+    ; GFX8: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 9223372036854775807
+    ; GFX8: [[SHL:%[0-9]+]]:_(s64) = G_SHL [[UV]], [[CTLZ_ZERO_UNDEF]](s32)
+    ; GFX8: [[AND:%[0-9]+]]:_(s64) = G_AND [[SHL]], [[C3]]
+    ; GFX8: [[C4:%[0-9]+]]:_(s64) = G_CONSTANT i64 1099511627775
+    ; GFX8: [[AND1:%[0-9]+]]:_(s64) = G_AND [[AND]], [[C4]]
+    ; GFX8: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 40
+    ; GFX8: [[LSHR:%[0-9]+]]:_(s64) = G_LSHR [[AND]], [[C5]](s32)
+    ; GFX8: [[C6:%[0-9]+]]:_(s32) = G_CONSTANT i32 23
+    ; GFX8: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[SELECT]], [[C6]](s32)
+    ; GFX8: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR]](s64)
+    ; GFX8: [[OR:%[0-9]+]]:_(s32) = G_OR [[SHL1]], [[TRUNC]]
+    ; GFX8: [[C7:%[0-9]+]]:_(s64) = G_CONSTANT i64 549755813888
+    ; GFX8: [[ICMP1:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND1]](s64), [[C7]]
+    ; GFX8: [[ICMP2:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND1]](s64), [[C7]]
+    ; GFX8: [[C8:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+    ; GFX8: [[AND2:%[0-9]+]]:_(s32) = G_AND [[OR]], [[C8]]
+    ; GFX8: [[SELECT1:%[0-9]+]]:_(s32) = G_SELECT [[ICMP2]](s1), [[AND2]], [[C]]
+    ; GFX8: [[SELECT2:%[0-9]+]]:_(s32) = G_SELECT [[ICMP1]](s1), [[C8]], [[SELECT1]]
+    ; GFX8: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[OR]], [[SELECT2]]
+    ; GFX8: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD]](s32)
+    ; GFX8: [[CTLZ_ZERO_UNDEF1:%[0-9]+]]:_(s32) = G_CTLZ_ZERO_UNDEF [[UV1]](s64)
+    ; GFX8: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[C2]], [[CTLZ_ZERO_UNDEF1]]
+    ; GFX8: [[ICMP3:%[0-9]+]]:_(s1) = G_ICMP intpred(ne), [[UV1]](s64), [[C1]]
+    ; GFX8: [[SELECT3:%[0-9]+]]:_(s32) = G_SELECT [[ICMP3]](s1), [[SUB1]], [[C]]
+    ; GFX8: [[SHL2:%[0-9]+]]:_(s64) = G_SHL [[UV1]], [[CTLZ_ZERO_UNDEF1]](s32)
+    ; GFX8: [[AND3:%[0-9]+]]:_(s64) = G_AND [[SHL2]], [[C3]]
+    ; GFX8: [[AND4:%[0-9]+]]:_(s64) = G_AND [[AND3]], [[C4]]
+    ; GFX8: [[LSHR1:%[0-9]+]]:_(s64) = G_LSHR [[AND3]], [[C5]](s32)
+    ; GFX8: [[SHL3:%[0-9]+]]:_(s32) = G_SHL [[SELECT3]], [[C6]](s32)
+    ; GFX8: [[TRUNC1:%[0-9]+]]:_(s32) = G_TRUNC [[LSHR1]](s64)
+    ; GFX8: [[OR1:%[0-9]+]]:_(s32) = G_OR [[SHL3]], [[TRUNC1]]
+    ; GFX8: [[ICMP4:%[0-9]+]]:_(s1) = G_ICMP intpred(ugt), [[AND4]](s64), [[C7]]
+    ; GFX8: [[ICMP5:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[AND4]](s64), [[C7]]
+    ; GFX8: [[AND5:%[0-9]+]]:_(s32) = G_AND [[OR1]], [[C8]]
+    ; GFX8: [[SELECT4:%[0-9]+]]:_(s32) = G_SELECT [[ICMP5]](s1), [[AND5]], [[C]]
+    ; GFX8: [[SELECT5:%[0-9]+]]:_(s32) = G_SELECT [[ICMP4]](s1), [[C8]], [[SELECT4]]
+    ; GFX8: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[OR1]], [[SELECT5]]
+    ; GFX8: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[ADD1]](s32)
+    ; GFX8: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
+    ; GFX8: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
+    ; GFX8: [[C9:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
+    ; GFX8: [[SHL4:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C9]](s32)
+    ; GFX8: [[OR2:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL4]]
+    ; GFX8: [[BITCAST:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR2]](s32)
+    ; GFX8: $vgpr0 = COPY [[BITCAST]](<2 x s16>)
+    %0:_(<2 x s64>) = COPY $vgpr0_vgpr1_vgpr2_vgpr3
+    %1:_(<2 x s16>) = G_UITOFP %0
+    $vgpr0 = COPY %1
+...

From 30fa57662760e1489cf70cb411c55fbe9fc189fe Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Thu, 16 Jul 2020 13:37:57 +0100
Subject: [PATCH 488/771] [BasicAA] Add additional negative phi tests. NFC

---
 llvm/test/Analysis/BasicAA/recphi.ll | 106 +++++++++++++++++++++++++++
 1 file changed, 106 insertions(+)

diff --git a/llvm/test/Analysis/BasicAA/recphi.ll b/llvm/test/Analysis/BasicAA/recphi.ll
index 130058c745604..3c8397cc0fec7 100644
--- a/llvm/test/Analysis/BasicAA/recphi.ll
+++ b/llvm/test/Analysis/BasicAA/recphi.ll
@@ -83,3 +83,109 @@ if.then: ; preds = %f.exit
 if.end: ; preds = %f.exit
   ret i32 0
 }
+
+; CHECK-LABEL: Function: reverse: 6 pointers, 0 call sites
+; CHECK:         MustAlias:    [10 x i32]* %tab, i8* %0
+; CHECK:         MustAlias:    [10 x i32]* %tab, i32* %arrayidx
+; CHECK:         MustAlias:    i32* %arrayidx, i8* %0
+; CHECK:         PartialAlias: [10 x i32]* %tab, i32* %arrayidx1
+; CHECK:         NoAlias:      i32* %arrayidx1, i8* %0
+; CHECK:         NoAlias:      i32* %arrayidx, i32* %arrayidx1
+; CHECK:         MayAlias:     [10 x i32]* %tab, i32* %p.addr.05.i
+; CHECK:         NoAlias:      i32* %p.addr.05.i, i8* %0
+; CHECK:         NoAlias:      i32* %arrayidx, i32* %p.addr.05.i
+; CHECK:         MayAlias:     i32* %arrayidx1, i32* %p.addr.05.i
+; CHECK:         MayAlias:     [10 x i32]* %tab, i32* %incdec.ptr.i
+; CHECK:         MayAlias:     i32* %incdec.ptr.i, i8* %0
+; CHECK:         MayAlias:     i32* %arrayidx, i32* %incdec.ptr.i
+; CHECK:         MayAlias:     i32* %arrayidx1, i32* %incdec.ptr.i
+; CHECK:         NoAlias:      i32* %incdec.ptr.i, i32* %p.addr.05.i
+define i32 @reverse() nounwind {
+entry:
+  %tab = alloca [10 x i32], align 4
+  %0 = bitcast [10 x i32]* %tab to i8*
+  %arrayidx = getelementptr inbounds [10 x i32], [10 x i32]* %tab, i32 0, i32 0
+  store i32 0, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* %tab, i32 0, i32 9
+  store i32 0, i32* %arrayidx1, align 4
+  %1 = add i32 1, 1
+  %cmp4.i = icmp slt i32 %1, 2
+  br i1 %cmp4.i, label %while.body.i, label %f.exit
+
+while.body.i: ; preds = %while.body.i, %entry
+  %2 = phi i32 [ 1, %while.body.i ], [ %1, %entry ]
+  %foo.06.i = phi i32 [ %sub.i, %while.body.i ], [ 2, %entry ]
+  %p.addr.05.i = phi i32* [ %incdec.ptr.i, %while.body.i ], [ %arrayidx1, %entry ]
+  %sub.i = sub nsw i32 %foo.06.i, %2
+  %incdec.ptr.i = getelementptr inbounds i32, i32* %p.addr.05.i, i32 -1
+  store i32 %sub.i, i32* %p.addr.05.i, align 4
+  %cmp.i = icmp sgt i32 %sub.i, 1
+  br i1 %cmp.i, label %while.body.i, label %f.exit
+
+f.exit: ; preds = %entry, %while.body.i
+  %3 = load i32, i32* %arrayidx1, align 4
+  %cmp = icmp eq i32 %3, 2
+  %4 = load i32, i32* %arrayidx, align 4
+  %cmp4 = icmp eq i32 %4, 1
+  %or.cond = and i1 %cmp, %cmp4
+  br i1 %or.cond, label %if.end, label %if.then
+
+if.then: ; preds = %f.exit
+  unreachable
+
+if.end: ; preds = %f.exit
+  ret i32 0
+}
+
+; CHECK-LABEL: Function: negative: 6 pointers, 1 call sites
+; CHECK:         NoAlias:      [3 x i16]* %int_arr.10, i16** %argv.6.par
+; CHECK:         NoAlias:      i16* %_tmp1, i16** %argv.6.par
+; CHECK:         PartialAlias: [3 x i16]* %int_arr.10, i16* %_tmp1
+; CHECK:         NoAlias:      i16* %ls1.9.0, i16** %argv.6.par
+; CHECK:         MayAlias:     [3 x i16]* %int_arr.10, i16* %ls1.9.0
+; CHECK:         MayAlias:     i16* %_tmp1, i16* %ls1.9.0
+; CHECK:         NoAlias:      i16* %_tmp7, i16** %argv.6.par
+; CHECK:         MayAlias:     [3 x i16]* %int_arr.10, i16* %_tmp7
+; CHECK:         MayAlias:     i16* %_tmp1, i16* %_tmp7
+; CHECK:         NoAlias:      i16* %_tmp7, i16* %ls1.9.0
+; CHECK:         NoAlias:      i16* %_tmp11, i16** %argv.6.par
+; CHECK:         PartialAlias: [3 x i16]* %int_arr.10, i16* %_tmp11
+; CHECK:         NoAlias:      i16* %_tmp1, i16* %_tmp11
+; CHECK:         NoAlias:      i16* %_tmp11, i16* %ls1.9.0
+; CHECK:         MayAlias:     i16* %_tmp11, i16* %_tmp7
+; CHECK:         Both ModRef:  Ptr: i16** %argv.6.par  <->  %_tmp16 = call i16 @call(i32 %_tmp13)
+; CHECK:         NoModRef:  Ptr: [3 x i16]* %int_arr.10        <->  %_tmp16 = call i16 @call(i32 %_tmp13)
+; CHECK:         NoModRef:  Ptr: i16* %_tmp1   <->  %_tmp16 = call i16 @call(i32 %_tmp13)
+; CHECK:         Both ModRef:  Ptr: i16* %ls1.9.0      <->  %_tmp16 = call i16 @call(i32 %_tmp13)
+; CHECK:         Both ModRef:  Ptr: i16* %_tmp7        <->  %_tmp16 = call i16 @call(i32 %_tmp13)
+; CHECK:         NoModRef:  Ptr: i16* %_tmp11  <->  %_tmp16 = call i16 @call(i32 %_tmp13)
+define i16 @negative(i16 %argc.5.par, i16** nocapture readnone %argv.6.par) {
+  %int_arr.10 = alloca [3 x i16], align 1
+  %_tmp1 = getelementptr inbounds [3 x i16], [3 x i16]* %int_arr.10, i16 0, i16 2
+  br label %bb1
+
+bb1:                                              ; preds = %bb1, %0
+  %i.7.0 = phi i16 [ 2, %0 ], [ %_tmp5, %bb1 ]
+  %ls1.9.0 = phi i16* [ %_tmp1, %0 ], [ %_tmp7, %bb1 ]
+  store i16 %i.7.0, i16* %ls1.9.0, align 1
+  %_tmp5 = add nsw i16 %i.7.0, -1
+  %_tmp7 = getelementptr i16, i16* %ls1.9.0, i16 -1
+  %_tmp9 = icmp sgt i16 %i.7.0, 0
+  br i1 %_tmp9, label %bb1, label %bb3
+
+bb3:                                              ; preds = %bb1
+  %_tmp11 = getelementptr inbounds [3 x i16], [3 x i16]* %int_arr.10, i16 0, i16 1
+  %_tmp12 = load i16, i16* %_tmp11, align 1
+  %_tmp13 = sext i16 %_tmp12 to i32
+  %_tmp16 = call i16 @call(i32 %_tmp13)
+  %_tmp18.not = icmp eq i16 %_tmp12, 1
+  br i1 %_tmp18.not, label %bb5, label %bb4
+
+bb4:                                              ; preds = %bb3
+  ret i16 1
+
+bb5:                                              ; preds = %bb3, %bb4
+  ret i16 0
+}
+
+declare i16 @call(i32)

From 10478c9c643fd789b7a81a4f82aaffe505fdfd4f Mon Sep 17 00:00:00 2001
From: Xing GUO <higuoxing@gmail.com>
Date: Thu, 16 Jul 2020 22:32:31 +0800
Subject: [PATCH 489/771] [DWARFYAML] Implement the .debug_str_offsets section.

This patch helps add support for emitting the .debug_str_offsets section
to yaml2elf.

Reviewed By: jhenderson, MaskRay

Differential Revision: https://reviews.llvm.org/D83853
---
 llvm/include/llvm/ObjectYAML/DWARFEmitter.h   |   1 +
 llvm/include/llvm/ObjectYAML/DWARFYAML.h      |  14 +
 llvm/lib/ObjectYAML/DWARFEmitter.cpp          |  25 ++
 llvm/lib/ObjectYAML/DWARFYAML.cpp             |  12 +
 llvm/lib/ObjectYAML/ELFEmitter.cpp            |   2 +
 .../yaml2obj/ELF/DWARF/debug-str-offsets.yaml | 277 ++++++++++++++++++
 6 files changed, 331 insertions(+)
 create mode 100644 llvm/test/tools/yaml2obj/ELF/DWARF/debug-str-offsets.yaml

diff --git a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h
index 0ec3f90e1686a..1552ec7c178e1 100644
--- a/llvm/include/llvm/ObjectYAML/DWARFEmitter.h
+++ b/llvm/include/llvm/ObjectYAML/DWARFEmitter.h
@@ -38,6 +38,7 @@ Error emitPubSection(raw_ostream &OS, const PubSection &Sect,
 Error emitDebugInfo(raw_ostream &OS, const Data &DI);
 Error emitDebugLine(raw_ostream &OS, const Data &DI);
 Error emitDebugAddr(raw_ostream &OS, const Data &DI);
+Error emitDebugStrOffsets(raw_ostream &OS, const Data &DI);
 
 Expected<StringMap<std::unique_ptr<MemoryBuffer>>>
 emitDebugSections(StringRef YAMLString, bool ApplyFixups = false,
diff --git a/llvm/include/llvm/ObjectYAML/DWARFYAML.h b/llvm/include/llvm/ObjectYAML/DWARFYAML.h
index 9f62a4a2be570..244a5ff19298e 100644
--- a/llvm/include/llvm/ObjectYAML/DWARFYAML.h
+++ b/llvm/include/llvm/ObjectYAML/DWARFYAML.h
@@ -177,11 +177,20 @@ struct AddrTableEntry {
   std::vector<SegAddrPair> SegAddrPairs;
 };
 
+struct StringOffsetsTable {
+  dwarf::DwarfFormat Format;
+  Optional<yaml::Hex64> Length;
+  yaml::Hex16 Version;
+  yaml::Hex16 Padding;
+  std::vector<yaml::Hex64> Offsets;
+};
+
 struct Data {
   bool IsLittleEndian;
   bool Is64BitAddrSize;
   std::vector<Abbrev> AbbrevDecls;
   std::vector<StringRef> DebugStrings;
+  Optional<std::vector<StringOffsetsTable>> DebugStrOffsets;
   std::vector<ARange> ARanges;
   std::vector<Ranges> DebugRanges;
   std::vector<AddrTableEntry> DebugAddr;
@@ -218,6 +227,7 @@ LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LineTable)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::LineTableOpcode)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::SegAddrPair)
 LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::AddrTableEntry)
+LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::DWARFYAML::StringOffsetsTable)
 
 namespace llvm {
 namespace yaml {
@@ -290,6 +300,10 @@ template <> struct MappingTraits<DWARFYAML::AddrTableEntry> {
   static void mapping(IO &IO, DWARFYAML::AddrTableEntry &AddrTable);
 };
 
+template <> struct MappingTraits<DWARFYAML::StringOffsetsTable> {
+  static void mapping(IO &IO, DWARFYAML::StringOffsetsTable &StrOffsetsTable);
+};
+
 template <> struct MappingTraits<DWARFYAML::InitialLength> {
   static void mapping(IO &IO, DWARFYAML::InitialLength &DWARF);
 };
diff --git a/llvm/lib/ObjectYAML/DWARFEmitter.cpp b/llvm/lib/ObjectYAML/DWARFEmitter.cpp
index ed3732ba29f6c..dc815cd69c129 100644
--- a/llvm/lib/ObjectYAML/DWARFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/DWARFEmitter.cpp
@@ -416,6 +416,31 @@ Error DWARFYAML::emitDebugAddr(raw_ostream &OS, const Data &DI) {
   return Error::success();
 }
 
+Error DWARFYAML::emitDebugStrOffsets(raw_ostream &OS, const Data &DI) {
+  assert(DI.DebugStrOffsets && "unexpected emitDebugStrOffsets() call");
+  for (const DWARFYAML::StringOffsetsTable &Table : *DI.DebugStrOffsets) {
+    uint64_t Length;
+    if (Table.Length)
+      Length = *Table.Length;
+    else
+      // sizeof(version) + sizeof(padding) = 4
+      Length =
+          4 + Table.Offsets.size() * (Table.Format == dwarf::DWARF64 ? 8 : 4);
+
+    writeInitialLength(Table.Format, Length, OS, DI.IsLittleEndian);
+    writeInteger((uint16_t)Table.Version, OS, DI.IsLittleEndian);
+    writeInteger((uint16_t)Table.Padding, OS, DI.IsLittleEndian);
+
+    for (uint64_t Offset : Table.Offsets) {
+      cantFail(writeVariableSizedInteger(Offset,
+                                         Table.Format == dwarf::DWARF64 ? 8 : 4,
+                                         OS, DI.IsLittleEndian));
+    }
+  }
+
+  return Error::success();
+}
+
 using EmitFuncType = Error (*)(raw_ostream &, const DWARFYAML::Data &);
 
 static Error
diff --git a/llvm/lib/ObjectYAML/DWARFYAML.cpp b/llvm/lib/ObjectYAML/DWARFYAML.cpp
index bedf31dc8179f..4ed3b48b67852 100644
--- a/llvm/lib/ObjectYAML/DWARFYAML.cpp
+++ b/llvm/lib/ObjectYAML/DWARFYAML.cpp
@@ -46,6 +46,8 @@ SetVector<StringRef> DWARFYAML::Data::getUsedSectionNames() const {
     SecNames.insert("debug_gnu_pubnames");
   if (GNUPubTypes)
     SecNames.insert("debug_gnu_pubtypes");
+  if (DebugStrOffsets)
+    SecNames.insert("debug_str_offsets");
   return SecNames;
 }
 
@@ -69,6 +71,7 @@ void MappingTraits<DWARFYAML::Data>::mapping(IO &IO, DWARFYAML::Data &DWARF) {
   IO.mapOptional("debug_info", DWARF.CompileUnits);
   IO.mapOptional("debug_line", DWARF.DebugLines);
   IO.mapOptional("debug_addr", DWARF.DebugAddr);
+  IO.mapOptional("debug_str_offsets", DWARF.DebugStrOffsets);
   IO.setContext(OldContext);
 }
 
@@ -221,6 +224,15 @@ void MappingTraits<DWARFYAML::AddrTableEntry>::mapping(
   IO.mapOptional("Entries", AddrTable.SegAddrPairs);
 }
 
+void MappingTraits<DWARFYAML::StringOffsetsTable>::mapping(
+    IO &IO, DWARFYAML::StringOffsetsTable &StrOffsetsTable) {
+  IO.mapOptional("Format", StrOffsetsTable.Format, dwarf::DWARF32);
+  IO.mapOptional("Length", StrOffsetsTable.Length);
+  IO.mapOptional("Version", StrOffsetsTable.Version, 5);
+  IO.mapOptional("Padding", StrOffsetsTable.Padding, 0);
+  IO.mapOptional("Offsets", StrOffsetsTable.Offsets);
+}
+
 void MappingTraits<DWARFYAML::InitialLength>::mapping(
     IO &IO, DWARFYAML::InitialLength &InitialLength) {
   IO.mapRequired("TotalLength", InitialLength.TotalLength);
diff --git a/llvm/lib/ObjectYAML/ELFEmitter.cpp b/llvm/lib/ObjectYAML/ELFEmitter.cpp
index a7f4a5d252372..26fe1236752fa 100644
--- a/llvm/lib/ObjectYAML/ELFEmitter.cpp
+++ b/llvm/lib/ObjectYAML/ELFEmitter.cpp
@@ -974,6 +974,8 @@ Expected<uint64_t> emitDWARF(typename ELFT::Shdr &SHeader, StringRef Name,
   else if (Name == ".debug_gnu_pubtypes")
     Err = DWARFYAML::emitPubSection(*OS, *DWARF.GNUPubTypes,
                                     DWARF.IsLittleEndian, /*IsGNUStyle=*/true);
+  else if (Name == ".debug_str_offsets")
+    Err = DWARFYAML::emitDebugStrOffsets(*OS, DWARF);
   else
     llvm_unreachable("unexpected emitDWARF() call");
 
diff --git a/llvm/test/tools/yaml2obj/ELF/DWARF/debug-str-offsets.yaml b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-str-offsets.yaml
new file mode 100644
index 0000000000000..9a8c5b54c0335
--- /dev/null
+++ b/llvm/test/tools/yaml2obj/ELF/DWARF/debug-str-offsets.yaml
@@ -0,0 +1,277 @@
+## Test that yaml2obj emits a .debug_str_offsets section when requested.
+
+## a) Generate and verify a little endian .debug_str_offsets section.
+
+# RUN: yaml2obj -DENDIAN=ELFDATA2LSB --docnum=1 %s -o %t1.le.o
+# RUN: llvm-readobj --sections --section-data %t1.le.o | \
+# RUN:   FileCheck -DSIZE=48 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF-LE
+
+#          SHDR: Index: 1
+#     SHDR-NEXT: Name: .debug_str_offsets (1)
+#     SHDR-NEXT: Type: SHT_PROGBITS (0x1)
+#     SHDR-NEXT: Flags [ (0x0)
+#     SHDR-NEXT: ]
+#     SHDR-NEXT: Address: 0x0
+#     SHDR-NEXT: Offset: 0x40
+#     SHDR-NEXT: Size: [[SIZE]]
+#     SHDR-NEXT: Link: 0
+#     SHDR-NEXT: Info: 0
+#     SHDR-NEXT: AddressAlignment: [[ADDRALIGN]]
+#     SHDR-NEXT: EntrySize: 0
+# DWARF-LE-NEXT: SectionData (
+# DWARF-LE-NEXT:   0000: 0C000000 05000000 78563412 21436587  |........xV4.!Ce.|
+##                       ^-------                             unit_length (4-byte)
+##                                ^---                        version (2-byte)
+##                                    ^---                    padding (2-byte)
+##                                         ^-------           offsets[0] (4-byte)
+##                                                  ^-------  offsets[1] (4-byte)
+# DWARF-LE-NEXT:   0010: FFFFFFFF 14000000 00000000 05000000  |................|
+##                       ^-------------------------           unit_length (12-byte)
+##                                                  ^---      version (2-byte)
+##                                                      ^---  padding (2-byte)
+# DWARF-LE-NEXT:   0020: F0DEBC9A 78563412 89674523 01EFCDAB  |....xV4..gE#....|
+##                       ^----------------                    offsets[0] (8-byte)
+##                                         ^----------------  offsets[1] (8-byte)
+# DWARF-LE-NEXT: )
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    [[ENDIAN]]
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+DWARF:
+  debug_str_offsets:
+    - Offsets:
+        - 0x12345678
+        - 0x87654321
+    - Format: DWARF64
+      Offsets:
+        - 0x123456789abcdef0
+        - 0xabcdef0123456789
+
+## b) Generate and verify a big endian .debug_str_offsets section.
+
+# RUN: yaml2obj -DENDIAN=ELFDATA2MSB --docnum=1 %s -o %t1.be.o
+# RUN: llvm-readobj --sections --section-data %t1.be.o | \
+# RUN:   FileCheck -DSIZE=48 -DADDRALIGN=1 %s --check-prefixes=SHDR,DWARF-BE
+
+# DWARF-BE-NEXT: SectionData (
+# DWARF-BE-NEXT:   0000: 0000000C 00050000 12345678 87654321  |.........4Vx.eC!|
+##                       ^-------                             unit_length (4-byte)
+##                                ^---                        version (2-byte)
+##                                    ^---                    padding (2-byte)
+##                                         ^-------           offsets[0] (4-byte)
+##                                                  ^-------  offsets[1] (4-byte)
+# DWARF-BE-NEXT:   0010: FFFFFFFF 00000000 00000014 00050000  |................|
+##                       ^-------------------------           unit_length (12-byte)
+##                                                  ^---      version (2-byte)
+##                                                      ^---  padding (2-byte)
+# DWARF-BE-NEXT:   0020: 12345678 9ABCDEF0 ABCDEF01 23456789  |.4Vx........#Eg.|
+##                       ^----------------                    offsets[0] (8-byte)
+##                                         ^----------------  offsets[1] (8-byte)
+# DWARF-BE-NEXT: )
+
+## c) Test that the length, version and padding fields can be overwritten.
+
+# RUN: yaml2obj --docnum=2 %s -o %t2.o
+# RUN: llvm-readelf --hex-dump=.debug_str_offsets %t2.o | \
+# RUN:   FileCheck %s --check-prefix=OVERWRITE
+
+#      OVERWRITE: Hex dump of section '.debug_str_offsets':
+# OVERWRITE-NEXT: 0x00000000 34120000 06001200                   4.......
+##                           ^-------                            unit_length (4-byte)
+##                                    ^---                       version (2-byte)
+##                                        ^---                   padding (2-bye)
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+DWARF:
+  debug_str_offsets:
+    - Length:  0x1234
+      Version: 6
+      Padding: 0x12
+
+## d) Test that an empty 'Offsets' field is allowed.
+
+# RUN: yaml2obj --docnum=3 %s -o %t3.o
+# RUN: llvm-readelf --hex-dump=.debug_str_offsets %t3.o | \
+# RUN:   FileCheck %s --check-prefix=EMPTY-OFFSETS
+
+#      EMPTY-OFFSETS: Hex dump of section '.debug_str_offsets':
+# EMPTY-OFFSETS-NEXT: 0x00000000 04000000 05000000                   ........
+##                               ^-------                            unit_length (4-byte)
+##                                        ^---                       version (2-byte)
+##                                            ^---                   padding (2-byte)
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+DWARF:
+  debug_str_offsets:
+    - Offsets: []
+
+## e) Test that the .debug_str_offsets section header is emitted if the "debug_str_offsets" is empty.
+
+# RUN: yaml2obj --docnum=4 %s -o %t4.o
+# RUN: llvm-readobj --sections --section-data %t4.o | \
+# RUN:   FileCheck -DSIZE=0 -DADDRALIGN=1 %s --check-prefixes=SHDR,EMPTY-CONTENT
+
+# EMPTY-CONTENT-NEXT: SectionData (
+# EMPTY-CONTENT-NEXT: )
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+DWARF:
+  debug_str_offsets: []
+
+## f) Generate the .debug_str_offsets section from raw section content.
+
+# RUN: yaml2obj --docnum=5 %s -o %t5.o
+# RUN: llvm-readobj --sections --section-data %t5.o | \
+# RUN:   FileCheck %s -DADDRALIGN=0 -DSIZE=3 --check-prefixes=SHDR,ARBITRARY-CONTENT
+
+#      ARBITRARY-CONTENT: SectionData (
+# ARBITRARY-CONTENT-NEXT:   0000: 112233
+# ARBITRARY-CONTENT-NEXT: )
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .debug_str_offsets
+    Type:    SHT_PROGBITS
+    Content: "112233"
+
+## g) Generate the .debug_str_offsets section when the "Size" is specified.
+
+# RUN: yaml2obj --docnum=6 %s -o %t6.o
+# RUN: llvm-readelf --hex-dump=.debug_str_offsets %t6.o | \
+# RUN:   FileCheck %s --check-prefix=SIZE
+
+#       SIZE: Hex dump of section '.debug_str_offsets':
+#  SIZE-NEXT: 0x00000000 00000000 00000000 00000000 00000000 ................
+# SIZE-EMPTY:
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name: .debug_str_offsets
+    Type: SHT_PROGBITS
+    Size: 0x10
+
+## h) Test that yaml2obj emits an error message when both the "Size" and the
+## "debug_str_offsets" entry are specified at the same time.
+
+# RUN: not yaml2obj --docnum=7 %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+# ERROR: yaml2obj: error: cannot specify section '.debug_str_offsets' contents in the 'DWARF' entry and the 'Content' or 'Size' in the 'Sections' entry at the same time
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name: .debug_str_offsets
+    Type: SHT_PROGBITS
+    Size: 0x10
+DWARF:
+  debug_str_offsets:
+    - Offsets: []
+
+## i) Test that yaml2obj emits an error message when both the "Content" and the
+## "debug_str_offsets" entry are specified at the same time.
+
+# RUN: not yaml2obj --docnum=8 %s 2>&1 | FileCheck %s --check-prefix=ERROR
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:    .debug_str_offsets
+    Type:    SHT_PROGBITS
+    Content: "00"
+DWARF:
+  debug_str_offsets:
+    - Offsets: []
+
+## j) Test that all the properties can be overridden by the section header when
+## the "debug_str_offsets" entry doesn't exist.
+
+# RUN: yaml2obj --docnum=9 %s -o %t9.o
+# RUN: llvm-readelf --sections %t9.o | FileCheck %s --check-prefix=OVERRIDDEN
+
+#      OVERRIDDEN: [Nr] Name               Type   Address          Off    Size   ES Flg Lk Inf Al
+#      OVERRIDDEN: [ 1] .debug_str_offsets STRTAB 0000000000002020 000050 000008 01   A  2   1  2
+# OVERRIDDEN-NEXT: [ 2] .sec               STRTAB 0000000000000000 000058 000000 00      0   0  0
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:         .debug_str_offsets
+    Type:         SHT_STRTAB  ## SHT_PROGBITS by default.
+    Flags:        [SHF_ALLOC] ## 0 by default.
+    Link:         .sec        ## 0 by default.
+    EntSize:      1           ## 0 by default.
+    Info:         1           ## 0 by default.
+    AddressAlign: 2           ## 0 by default.
+    Address:      0x2020      ## 0x00 by default.
+    Offset:       0x50        ## 0x40 for the first section.
+    Size:         0x08        ## Set the "Size" so that we can reuse the check tag "OVERRIDDEN".
+  - Name:         .sec        ## Linked by .debug_str_offsets.
+    Type:         SHT_STRTAB
+
+## k) Test that all the properties can be overridden by the section header when
+## the "debug_str_offsets" entry exists.
+
+# RUN: yaml2obj --docnum=10 %s -o %t10.o
+# RUN: llvm-readelf --sections %t10.o | FileCheck %s --check-prefix=OVERRIDDEN
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS64
+  Data:    ELFDATA2LSB
+  Type:    ET_EXEC
+  Machine: EM_X86_64
+Sections:
+  - Name:         .debug_str_offsets
+    Type:         SHT_STRTAB  ## SHT_PROGBITS by default.
+    Flags:        [SHF_ALLOC] ## 0 by default.
+    Link:         .sec        ## 0 by default.
+    EntSize:      1           ## 0 by default.
+    Info:         1           ## 0 by default.
+    AddressAlign: 2           ## 1 by default.
+    Address:      0x2020      ## 0x00 by default.
+    Offset:       0x50        ## 0x40 for the first section.
+  - Name:         .sec        ## Linked by .debug_str_offsets.
+    Type:         SHT_STRTAB
+DWARF:
+  debug_str_offsets:
+    - Offsets: []

From 037c812191511cc8bed0f846fb3f64e970e5ee24 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 16 Jul 2020 15:22:29 +0100
Subject: [PATCH 490/771] [SCCP] Add test cases for adding !range to
 call-sites.

---
 .../Transforms/SCCP/ip-add-range-to-call.ll   | 166 ++++++++++++++++++
 1 file changed, 166 insertions(+)
 create mode 100644 llvm/test/Transforms/SCCP/ip-add-range-to-call.ll

diff --git a/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll b/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll
new file mode 100644
index 0000000000000..6ae234de79a0f
--- /dev/null
+++ b/llvm/test/Transforms/SCCP/ip-add-range-to-call.ll
@@ -0,0 +1,166 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -ipsccp -S %s | FileCheck %s
+
+; Test 1.
+; Both arguments and return value of @callee can be tracked. The inferred range
+; can be added to call sites.
+define internal i32 @callee(i32 %x) {
+; CHECK-LABEL: @callee(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  ret i32 %x
+}
+
+define i32 @caller1() {
+; CHECK-LABEL: @caller1(
+; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee(i32 10)
+; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee(i32 20)
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[C1]], [[C2]]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %c1 = call i32 @callee(i32 10)
+  %c2 = call i32 @callee(i32 20)
+  %a = add i32 %c1, %c2
+  ret i32 %a
+}
+
+define i32 @caller2(i32 %x) {
+; CHECK-LABEL: @caller2(
+; CHECK-NEXT:    [[X_15:%.*]] = and i32 [[X:%.*]], 15
+; CHECK-NEXT:    [[C:%.*]] = call i32 @callee(i32 [[X_15]])
+; CHECK-NEXT:    ret i32 [[C]]
+;
+  %x.15 = and i32 %x, 15
+  %c = call i32 @callee(i32 %x.15)
+  ret i32 %c
+}
+
+; Test 2.
+; The return value of @callee2 can be tracked, but arguments cannot, because
+; it is passed to @use_cb1. We cannot infer a range for the return value, no
+; metadata should be added.
+
+declare void @use_cb1(i32 (i32)*)
+
+define internal i32 @callee2(i32 %x) {
+; CHECK-LABEL: @callee2(
+; CHECK-NEXT:    ret i32 [[X:%.*]]
+;
+  ret i32 %x
+}
+
+define void @caller_cb1() {
+; CHECK-LABEL: @caller_cb1(
+; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee2(i32 9)
+; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee2(i32 10)
+; CHECK-NEXT:    call void @use_cb1(i32 (i32)* @callee2)
+; CHECK-NEXT:    ret void
+;
+  %c1 = call i32 @callee2(i32 9)
+  %c2 = call i32 @callee2(i32 10)
+  call void @use_cb1(i32 (i32)* @callee2)
+  ret void
+}
+
+; Test 3.
+; The return value can be tracked and it the result range ([500, 601) does not
+; depend on the arguments, which cannot be tracked because @callee3 is passed
+; to @use_cb2. The result range can be added to the call sites of @callee.
+
+declare void @use_cb2(i32 (i32)*)
+
+define internal i32 @callee3(i32 %x) {
+; CHECK-LABEL: @callee3(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 10
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 500, i32 600
+; CHECK-NEXT:    ret i32 [[S]]
+;
+  %c = icmp eq i32 %x, 10
+  %s = select i1 %c, i32 500, i32 600
+  ret i32 %s
+}
+
+define void @caller_cb2() {
+; CHECK-LABEL: @caller_cb2(
+; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee3(i32 9)
+; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee3(i32 10)
+; CHECK-NEXT:    call void @use_cb2(i32 (i32)* @callee3)
+; CHECK-NEXT:    ret void
+;
+  %c1 = call i32 @callee3(i32 9)
+  %c2 = call i32 @callee3(i32 10)
+  call void @use_cb2(i32 (i32)* @callee3)
+  ret void
+}
+
+; Test 4.
+; The return value of @callee4 can be tracked, but depends on an argument which
+; cannot be tracked. No result range can be inferred.
+
+declare void @use_cb3(i32 (i32, i32)*)
+
+define internal i32 @callee4(i32 %x, i32 %y) {
+; CHECK-LABEL: @callee4(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i32 [[X:%.*]], 10
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C]], i32 500, i32 [[Y:%.*]]
+; CHECK-NEXT:    ret i32 [[S]]
+;
+  %c = icmp eq i32 %x, 10
+  %s = select i1 %c, i32 500, i32 %y
+  ret i32 %s
+}
+
+define void @caller_cb3() {
+; CHECK-LABEL: @caller_cb3(
+; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee4(i32 11, i32 30)
+; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee4(i32 12, i32 40)
+; CHECK-NEXT:    call void @use_cb3(i32 (i32, i32)* @callee4)
+; CHECK-NEXT:    ret void
+;
+  %c1 = call i32 @callee4(i32 11, i32 30)
+  %c2 = call i32 @callee4(i32 12, i32 40)
+  call void @use_cb3(i32 (i32, i32)* @callee4)
+  ret void
+}
+
+; Test 5.
+; Range for the return value of callee5 includes undef. No range metadata
+; should be added at call sites.
+define internal i32 @callee5(i32 %x, i32 %y) {
+; CHECK-LABEL: @callee5(
+; CHECK-NEXT:    [[C:%.*]] = icmp slt i32 [[X:%.*]], 15
+; CHECK-NEXT:    br i1 [[C]], label [[BB1:%.*]], label [[BB2:%.*]]
+; CHECK:       bb1:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RES:%.*]] = phi i32 [ [[Y:%.*]], [[BB1]] ], [ undef, [[BB2]] ]
+; CHECK-NEXT:    ret i32 [[RES]]
+;
+  %c = icmp slt i32 %x, 15
+  br i1 %c, label %bb1, label %bb2
+
+bb1:
+  br label %exit
+
+bb2:
+  br label %exit
+
+exit:
+  %res = phi i32 [ %y, %bb1 ], [ undef, %bb2]
+  ret i32 %res
+}
+
+define i32 @caller5() {
+; CHECK-LABEL: @caller5(
+; CHECK-NEXT:    [[C1:%.*]] = call i32 @callee5(i32 10, i32 100)
+; CHECK-NEXT:    [[C2:%.*]] = call i32 @callee5(i32 20, i32 200)
+; CHECK-NEXT:    [[A:%.*]] = add i32 [[C1]], [[C2]]
+; CHECK-NEXT:    ret i32 [[A]]
+;
+  %c1 = call i32 @callee5(i32 10, i32 100)
+  %c2 = call i32 @callee5(i32 20, i32 200)
+  %a = add i32 %c1, %c2
+  ret i32 %a
+}

From a7a07a8d63b2008750347932e351d479a45bfc2c Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 16 Jul 2020 15:36:01 +0100
Subject: [PATCH 491/771] Follow up of rG3a624c327add: pacify buildbot, add
 "REQUIRES: aarch64" to test

---
 clang/test/CodeGen/matrix-lowering-opt-levels.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/clang/test/CodeGen/matrix-lowering-opt-levels.c b/clang/test/CodeGen/matrix-lowering-opt-levels.c
index 8f86cc158f677..a288d18264bfe 100644
--- a/clang/test/CodeGen/matrix-lowering-opt-levels.c
+++ b/clang/test/CodeGen/matrix-lowering-opt-levels.c
@@ -1,3 +1,5 @@
+// REQUIRES: aarch64-registered-target
+
 // RUN: %clang -O0 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
 // RUN: %clang -O1 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
 // RUN: %clang -O2 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s

From 69f3378ad65b41c979acc1bcb4968d2247e6adf7 Mon Sep 17 00:00:00 2001
From: Xiangling Liao <Xiangling.Liao@ibm.com>
Date: Sat, 11 Jul 2020 17:34:53 -0400
Subject: [PATCH 492/771] [AIX]Generate debug info for static init related
 functions

Set the debug location for static init related functions(__dtor
and __finalize) so we can generate valid debug info on AIX by invoking
-g with clang or -debug-info-kind=limited with clang_cc1.

This also works for any other future targets who may use sinit and
sterm functions for static initialization, where a direct call to
dtor will be generated within finalize function body.

This patch also aims at validating that the debug info generated
is correct for AIX sinit related functions.

Differential Revision: https://reviews.llvm.org/D83702
---
 clang/lib/CodeGen/CGDeclCXX.cpp               |  3 +-
 clang/lib/CodeGen/ItaniumCXXABI.cpp           |  3 +-
 .../CodeGenCXX/aix-static-init-debug-info.cpp | 64 +++++++++++++++++++
 3 files changed, 68 insertions(+), 2 deletions(-)
 create mode 100644 clang/test/CodeGenCXX/aix-static-init-debug-info.cpp

diff --git a/clang/lib/CodeGen/CGDeclCXX.cpp b/clang/lib/CodeGen/CGDeclCXX.cpp
index 5a85003642954..4e941021daa30 100644
--- a/clang/lib/CodeGen/CGDeclCXX.cpp
+++ b/clang/lib/CodeGen/CGDeclCXX.cpp
@@ -246,7 +246,8 @@ llvm::Function *CodeGenFunction::createAtExitStub(const VarDecl &VD,
   CodeGenFunction CGF(CGM);
 
   CGF.StartFunction(GlobalDecl(&VD, DynamicInitKind::AtExit),
-                    CGM.getContext().VoidTy, fn, FI, FunctionArgList());
+                    CGM.getContext().VoidTy, fn, FI, FunctionArgList(),
+                    VD.getLocation(), VD.getInit()->getExprLoc());
 
   llvm::CallInst *call = CGF.Builder.CreateCall(dtor, addr);
 
diff --git a/clang/lib/CodeGen/ItaniumCXXABI.cpp b/clang/lib/CodeGen/ItaniumCXXABI.cpp
index 80de2a6e39505..12d00c7d59a3c 100644
--- a/clang/lib/CodeGen/ItaniumCXXABI.cpp
+++ b/clang/lib/CodeGen/ItaniumCXXABI.cpp
@@ -4567,7 +4567,8 @@ void XLCXXABI::emitCXXStermFinalizer(const VarDecl &D, llvm::Function *dtorStub,
   CodeGenFunction CGF(CGM);
 
   CGF.StartFunction(GlobalDecl(), CGM.getContext().VoidTy, StermFinalizer, FI,
-                    FunctionArgList());
+                    FunctionArgList(), D.getLocation(),
+                    D.getInit()->getExprLoc());
 
   // The unatexit subroutine unregisters __dtor functions that were previously
   // registered by the atexit subroutine. If the referenced function is found,
diff --git a/clang/test/CodeGenCXX/aix-static-init-debug-info.cpp b/clang/test/CodeGenCXX/aix-static-init-debug-info.cpp
new file mode 100644
index 0000000000000..39de0cdd513ae
--- /dev/null
+++ b/clang/test/CodeGenCXX/aix-static-init-debug-info.cpp
@@ -0,0 +1,64 @@
+// RUN: %clang_cc1 -triple powerpc-ibm-aix-xcoff -emit-llvm -x c++ \
+// RUN:     -debug-info-kind=limited < %s | \
+// RUN:   FileCheck --check-prefixes=CHECK,CHECK64 %s
+
+// RUN: %clang_cc1 -triple powerpc64-ibm-aix-xcoff -emit-llvm -x c++ \
+// RUN:     -debug-info-kind=limited  < %s | \
+// RUN:   FileCheck --check-prefixes=CHECK,CHECK64 %s
+
+struct X {
+  X();
+  ~X();
+};
+
+X v;
+
+// CHECK: define internal void @__cxx_global_var_init() [[ATTR:#[0-9]+]] !dbg ![[DBGVAR16:[0-9]+]] {
+// CHECK: entry:
+// CHECK:   call void @_ZN1XC1Ev(%struct.X* @v), !dbg ![[DBGVAR19:[0-9]+]]
+// CHECK:   %0 = call i32 @atexit(void ()* @__dtor_v) [[ATTR:#[0-9]+]], !dbg ![[DBGVAR19]]
+// CHECK:   ret void, !dbg ![[DBGVAR19]]
+// CHECK: }
+
+// CHECK: define internal void @__dtor_v() [[ATTR:#[0-9]+]] !dbg ![[DBGVAR20:[0-9]+]] {
+// CHECK: entry:
+// CHECK:   call void @_ZN1XD1Ev(%struct.X* @v), !dbg ![[DBGVAR21:[0-9]+]]
+// CHECK:   ret void, !dbg ![[DBGVAR21]]
+// CHECK: }
+
+// CHECK: define internal void @__finalize_v() [[ATTR:#[0-9]+]] !dbg ![[DBGVAR22:[0-9]+]] {
+// CHECK: entry:
+// CHECK:   %0 = call i32 @unatexit(void ()* @__dtor_v) [[ATTR:#[0-9]+]], !dbg ![[DBGVAR24:[0-9]+]]
+// CHECK:   %needs_destruct = icmp eq i32 %0, 0, !dbg ![[DBGVAR24]]
+// CHECK:   br i1 %needs_destruct, label %destruct.call, label %destruct.end, !dbg ![[DBGVAR24]]
+
+// CHECK: destruct.call:
+// CHECK:   call void @__dtor_v(), !dbg ![[DBGVAR24]]
+// CHECK:   br label %destruct.end, !dbg ![[DBGVAR24]]
+
+// CHECK: destruct.end:
+// CHECK:   ret void, !dbg ![[DBGVAR24]]
+// CHECK: }
+
+// CHECK: define void @__sinit80000000_clang_c3236cbaa79f2bae3a15e6379a05f625() [[ATTR:#[0-9]+]] !dbg ![[DBGVAR25:[0-9]+]] {
+// CHECK: entry:
+// CHECK:   call void @__cxx_global_var_init(), !dbg ![[DBGVAR26:[0-9]+]]
+// CHECK:   ret void
+// CHECK: }
+
+// CHECK: define void @__sterm80000000_clang_c3236cbaa79f2bae3a15e6379a05f625() [[ATTR:#[0-9]+]] !dbg ![[DBGVAR27:[0-9]+]] {
+// CHECK: entry:
+// CHECK:   call void @__finalize_v(), !dbg ![[DBGVAR28:[0-9]+]]
+// CHECK:   ret void
+// CHECK: }
+
+// CHECK: ![[DBGVAR16]] = distinct !DISubprogram(name: "__cxx_global_var_init", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 14, type: !{{[0-9]+}}, scopeLine: 14, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !{{[0-9]+}}, retainedNodes: !{{[0-9]+}})
+// CHECK: ![[DBGVAR19]] = !DILocation(line: 14, column: 3, scope: ![[DBGVAR16]])
+// CHECK: ![[DBGVAR20]] = distinct !DISubprogram(name: "__dtor_v", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 14, type: !{{[0-9]+}}, scopeLine: 14, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !{{[0-9]+}}, retainedNodes: !{{[0-9]+}})
+// CHECK: ![[DBGVAR21]] = !DILocation(line: 14, column: 3, scope: ![[DBGVAR20]])
+// CHECK: ![[DBGVAR22]] = distinct !DISubprogram(linkageName: "__finalize_v", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, line: 14, type: !{{[0-9]+}}, scopeLine: 14, flags: DIFlagArtificial, spFlags: DISPFlagLocalToUnit | DISPFlagDefinition, unit: !{{[0-9]+}}, retainedNodes: !{{[0-9]+}})
+// CHECK: ![[DBGVAR24]] = !DILocation(line: 14, column: 3, scope: ![[DBGVAR22]])
+// CHECK: ![[DBGVAR25]] = distinct !DISubprogram(linkageName: "__sinit80000000_clang_c3236cbaa79f2bae3a15e6379a05f625", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, type: !{{[0-9]+}}, flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: !{{[0-9]+}}, retainedNodes: !{{[0-9]+}})
+// CHECK: ![[DBGVAR26]] = !DILocation(line: 0, scope: ![[DBGVAR25]])
+// CHECK: ![[DBGVAR27]] = distinct !DISubprogram(linkageName: "__sterm80000000_clang_c3236cbaa79f2bae3a15e6379a05f625", scope: !{{[0-9]+}}, file: !{{[0-9]+}}, type: !{{[0-9]+}}, flags: DIFlagArtificial, spFlags: DISPFlagDefinition, unit: !{{[0-9]+}}, retainedNodes: !{{[0-9]+}})
+// CHECK: ![[DBGVAR28]] = !DILocation(line: 0, scope: ![[DBGVAR27]])

From aca7b8dd63c8be4fc9c301a6079adc04add23c22 Mon Sep 17 00:00:00 2001
From: Frederik Gossen <frgossen@google.com>
Date: Thu, 16 Jul 2020 14:43:42 +0000
Subject: [PATCH 493/771] [MLIR][Shape] Lower `shape.shape_eq` to `scf`

Lower `shape.shape_eq` to the `scf` (and `std`) dialect. For now, this lowering
is limited to extent tensor operands.

Differential Revision: https://reviews.llvm.org/D82530
---
 mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp | 93 ++++++++++++++++++-
 .../Conversion/ShapeToSCF/shape-to-scf.mlir   | 28 ++++++
 2 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
index f050c4eb22786..7986aaaa68163 100644
--- a/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
+++ b/mlir/lib/Conversion/ShapeToSCF/ShapeToSCF.cpp
@@ -19,6 +19,92 @@ using namespace mlir;
 using namespace mlir::shape;
 using namespace mlir::scf;
 
+namespace {
+/// Converts `shape.shape_eq` to an `scf.for` loop. For now, the lowering is
+/// only defined on `tensor<?xindex>` operands. The test for equality first
+/// compares their size and, if equal, checks every extent for equality.
+///
+/// Example:
+///
+/// %result = shape.shape_eq %a, %b : tensor<?xindex>, tensor<?xindex>
+///
+/// becomes
+///
+/// %c0 = constant 0 : index
+/// %0 = dim %arg0, %c0 : tensor<?xindex>
+/// %1 = dim %arg1, %c0 : tensor<?xindex>
+/// %2 = cmpi "eq", %0, %1 : index
+/// %result = scf.if %2 -> (i1) {
+///   %c1 = constant 1 : index
+///   %true = constant true
+///   %4 = scf.for %arg2 = %c0 to %0 step %c1 iter_args(%arg3 = %true) -> (i1) {
+///     %5 = extract_element %arg0[%arg2] : tensor<?xindex>
+///     %6 = extract_element %arg1[%arg2] : tensor<?xindex>
+///     %7 = cmpi "eq", %5, %6 : index
+///     %8 = and %arg3, %7 : i1
+///     scf.yield %8 : i1
+///   }
+///   scf.yield %4 : i1
+/// } else {
+///   %false = constant false
+///   scf.yield %false : i1
+/// }
+///
+struct ShapeEqOpConverter : public OpConversionPattern<ShapeEqOp> {
+  using OpConversionPattern<ShapeEqOp>::OpConversionPattern;
+
+  LogicalResult
+  matchAndRewrite(ShapeEqOp op, ArrayRef<Value> operands,
+                  ConversionPatternRewriter &rewriter) const override;
+};
+} // namespace
+
+LogicalResult
+ShapeEqOpConverter::matchAndRewrite(ShapeEqOp op, ArrayRef<Value> operands,
+                                    ConversionPatternRewriter &rewriter) const {
+  // For now, this lowering is only defined on `tensor<?xindex>` operands, not
+  // on shapes.
+  if (op.lhs().getType().isa<ShapeType>() ||
+      op.rhs().getType().isa<ShapeType>()) {
+    return failure();
+  }
+
+  ShapeEqOp::Adaptor transformed(operands);
+  auto loc = op.getLoc();
+  Type indexTy = rewriter.getIndexType();
+  Value zero = rewriter.create<ConstantIndexOp>(loc, 0);
+  Value lhsRank = rewriter.create<DimOp>(loc, indexTy, transformed.lhs(), zero);
+  Value rhsRank = rewriter.create<DimOp>(loc, indexTy, transformed.rhs(), zero);
+  Value eqRank =
+      rewriter.create<CmpIOp>(loc, CmpIPredicate::eq, lhsRank, rhsRank);
+  Type i1Ty = rewriter.getI1Type();
+  rewriter.replaceOpWithNewOp<IfOp>(
+      op, i1Ty, eqRank,
+      [&](OpBuilder &b, Location loc) {
+        Value one = b.create<ConstantIndexOp>(loc, 1);
+        Value init = b.create<ConstantOp>(loc, i1Ty, b.getBoolAttr(true));
+        auto loop = b.create<scf::ForOp>(
+            loc, zero, lhsRank, one, ValueRange{init},
+            [&](OpBuilder &b, Location nestedLoc, Value iv, ValueRange args) {
+              Value conj = args[0];
+              Value lhsExtent =
+                  b.create<ExtractElementOp>(loc, transformed.lhs(), iv);
+              Value rhsExtent =
+                  b.create<ExtractElementOp>(loc, transformed.rhs(), iv);
+              Value eqExtent = b.create<CmpIOp>(loc, CmpIPredicate::eq,
+                                                lhsExtent, rhsExtent);
+              Value conjNext = b.create<AndOp>(loc, conj, eqExtent);
+              b.create<scf::YieldOp>(loc, ValueRange({conjNext}));
+            });
+        b.create<scf::YieldOp>(loc, loop.getResults());
+      },
+      [&](OpBuilder &b, Location loc) {
+        Value result = b.create<ConstantOp>(loc, i1Ty, b.getBoolAttr(false));
+        b.create<scf::YieldOp>(loc, result);
+      });
+  return success();
+}
+
 namespace {
 /// Converts `shape.reduce` to `scf.for`.
 struct ReduceOpConverter : public OpConversionPattern<shape::ReduceOp> {
@@ -148,7 +234,12 @@ void ConvertShapeToSCFPass::runOnFunction() {
 
 void mlir::populateShapeToSCFConversionPatterns(
     OwningRewritePatternList &patterns, MLIRContext *ctx) {
-  patterns.insert<ReduceOpConverter, ShapeOfOpConverter>(ctx);
+  // clang-format off
+  patterns.insert<
+      ShapeEqOpConverter,
+      ReduceOpConverter,
+      ShapeOfOpConverter>(ctx);
+  // clang-format on
 }
 
 std::unique_ptr<FunctionPass> mlir::createConvertShapeToSCFPass() {
diff --git a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir
index 6ba630aa4aa61..2e5a45c4cc11e 100644
--- a/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir
+++ b/mlir/test/Conversion/ShapeToSCF/shape-to-scf.mlir
@@ -43,3 +43,31 @@ func @shape_of_unranked(%arg : tensor<*xf32>) {
   return
 }
 
+// -----
+
+// CHECK-LABEL:  @shape_eq
+// CHECK-SAME:   (%[[A:.*]]: tensor<?xindex>, %[[B:.*]]: tensor<?xindex>) -> i1
+func @shape_eq(%a : tensor<?xindex>, %b : tensor<?xindex>) -> i1 {
+  // CHECK: %[[C0:.*]] = constant 0 : index
+  // CHECK: %[[RANK_A:.*]] = dim %[[A]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_B:.*]] = dim %[[B]], %[[C0]] : tensor<?xindex>
+  // CHECK: %[[RANK_EQ:.*]] = cmpi "eq", %[[RANK_A]], %[[RANK_B]]
+  // CHECK: %[[SHAPE_EQ:.*]] = scf.if %[[RANK_EQ]] -> (i1) {
+  // CHECK:   %[[C1:.*]] = constant 1 : index
+  // CHECK:   %[[INIT:.*]] = constant true
+  // CHECK:   %[[SHAPE_EQ_INNER:.*]] = scf.for %[[I:.*]] = %[[C0]] to %[[RANK_A]] step %[[C1]] iter_args(%[[CONJ:.*]] = %[[INIT]]) -> (i1) {
+  // CHECK:     %[[EXTENT_A:.*]] = extract_element %[[A]][%[[I]]] : tensor<?xindex>
+  // CHECK:     %[[EXTENT_B:.*]] = extract_element %[[B]][%[[I]]] : tensor<?xindex>
+  // CHECK:     %[[EXTENT_EQ:.*]] = cmpi "eq", %[[EXTENT_A]], %[[EXTENT_B]]
+  // CHECK:     %[[CONJ_NEXT:.*]] = and %[[CONJ]], %[[EXTENT_EQ]]
+  // CHECK:     scf.yield %[[CONJ_NEXT]] : i1
+  // CHECK:   }
+  // CHECK:   scf.yield %[[SHAPE_EQ_INNER]] : i1
+  // CHECK: } else {
+  // CHECK:   %[[SHAPE_EQ_INNER:.*]] = constant false
+  // CHECK:   scf.yield %[[SHAPE_EQ_INNER]] : i1
+  // CHECK: }
+  // CHECK: return %[[SHAPE_EQ]] : i1
+  %result = shape.shape_eq %a, %b : tensor<?xindex>, tensor<?xindex>
+  return %result : i1
+}

From 0f03626fbf40ba1a74badf2d1476a550535c184f Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 16 Jul 2020 06:03:00 -0400
Subject: [PATCH 494/771] [runtimes][NFC] Remove unused or unnecessary CMake
 variables

---
 libcxx/CMakeLists.txt          | 4 ----
 libcxx/test/lit.site.cfg.in    | 2 +-
 libcxxabi/CMakeLists.txt       | 4 ----
 libcxxabi/test/lit.site.cfg.in | 2 +-
 libunwind/CMakeLists.txt       | 4 ----
 libunwind/test/lit.site.cfg.in | 2 +-
 6 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/libcxx/CMakeLists.txt b/libcxx/CMakeLists.txt
index 26bf553ddeed2..caf655d6799aa 100644
--- a/libcxx/CMakeLists.txt
+++ b/libcxx/CMakeLists.txt
@@ -407,14 +407,10 @@ endif ()
 # Configure System
 #===============================================================================
 
-set(LIBCXX_COMPILER    ${CMAKE_CXX_COMPILER})
 set(LIBCXX_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
 set(LIBCXX_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
 set(LIBCXX_BINARY_INCLUDE_DIR "${LIBCXX_BINARY_DIR}/include/c++build")
 
-string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION
-       ${PACKAGE_VERSION})
-
 if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
   set(LIBCXX_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
   set(LIBCXX_HEADER_DIR ${LLVM_BINARY_DIR})
diff --git a/libcxx/test/lit.site.cfg.in b/libcxx/test/lit.site.cfg.in
index 939776f2287da..1f3370ccc9bc2 100644
--- a/libcxx/test/lit.site.cfg.in
+++ b/libcxx/test/lit.site.cfg.in
@@ -3,7 +3,7 @@
 import os
 import site
 
-config.cxx_under_test           = "@LIBCXX_COMPILER@"
+config.cxx_under_test           = "@CMAKE_CXX_COMPILER@"
 config.project_obj_root         = "@CMAKE_BINARY_DIR@"
 config.libcxx_src_root          = "@LIBCXX_SOURCE_DIR@"
 config.libcxx_obj_root          = "@LIBCXX_BINARY_DIR@"
diff --git a/libcxxabi/CMakeLists.txt b/libcxxabi/CMakeLists.txt
index 8881a5018dc43..e4e20d950b890 100644
--- a/libcxxabi/CMakeLists.txt
+++ b/libcxxabi/CMakeLists.txt
@@ -151,13 +151,9 @@ set(CMAKE_MODULE_PATH
   ${CMAKE_MODULE_PATH}
   )
 
-set(LIBCXXABI_COMPILER    ${CMAKE_CXX_COMPILER})
 set(LIBCXXABI_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
 set(LIBCXXABI_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
 
-string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION
-       ${PACKAGE_VERSION})
-
 if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
   set(LIBCXXABI_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
   set(LIBCXXABI_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
diff --git a/libcxxabi/test/lit.site.cfg.in b/libcxxabi/test/lit.site.cfg.in
index 75fde7ee92507..06d5706da7d24 100644
--- a/libcxxabi/test/lit.site.cfg.in
+++ b/libcxxabi/test/lit.site.cfg.in
@@ -3,7 +3,7 @@
 import os
 import site
 
-config.cxx_under_test           = "@LIBCXXABI_COMPILER@"
+config.cxx_under_test           = "@CMAKE_CXX_COMPILER@"
 config.project_obj_root         = "@CMAKE_BINARY_DIR@"
 config.libcxxabi_src_root       = "@LIBCXXABI_SOURCE_DIR@"
 config.libcxxabi_obj_root       = "@LIBCXXABI_BINARY_DIR@"
diff --git a/libunwind/CMakeLists.txt b/libunwind/CMakeLists.txt
index f20893f4aa864..b50550dc376ee 100644
--- a/libunwind/CMakeLists.txt
+++ b/libunwind/CMakeLists.txt
@@ -182,13 +182,9 @@ set(CMAKE_MODULE_PATH
     "${CMAKE_CURRENT_SOURCE_DIR}/cmake"
     ${CMAKE_MODULE_PATH})
 
-set(LIBUNWIND_COMPILER    ${CMAKE_CXX_COMPILER})
 set(LIBUNWIND_SOURCE_DIR  ${CMAKE_CURRENT_SOURCE_DIR})
 set(LIBUNWIND_BINARY_DIR  ${CMAKE_CURRENT_BINARY_DIR})
 
-string(REGEX MATCH "[0-9]+\\.[0-9]+(\\.[0-9]+)?" CLANG_VERSION
-       ${PACKAGE_VERSION})
-
 if(LLVM_ENABLE_PER_TARGET_RUNTIME_DIR AND NOT APPLE)
   set(LIBUNWIND_LIBRARY_DIR ${LLVM_LIBRARY_OUTPUT_INTDIR}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
   set(LIBUNWIND_INSTALL_LIBRARY_DIR lib${LLVM_LIBDIR_SUFFIX}/${LLVM_DEFAULT_TARGET_TRIPLE}/c++)
diff --git a/libunwind/test/lit.site.cfg.in b/libunwind/test/lit.site.cfg.in
index d0f0e08fc9263..30a996cf37837 100644
--- a/libunwind/test/lit.site.cfg.in
+++ b/libunwind/test/lit.site.cfg.in
@@ -3,7 +3,7 @@
 import os
 import site
 
-config.cxx_under_test           = "@LIBUNWIND_COMPILER@"
+config.cxx_under_test           = "@CMAKE_CXX_COMPILER@"
 config.project_obj_root         = "@CMAKE_BINARY_DIR@"
 config.libunwind_src_root       = "@LIBUNWIND_SOURCE_DIR@"
 config.libunwind_obj_root       = "@LIBUNWIND_BINARY_DIR@"

From 1d3f61f8a799489f7f4d81e46d9e6b31a954ea4e Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Wed, 22 Apr 2020 11:16:27 -0400
Subject: [PATCH 495/771] [CMake] Enforce the minimum CMake version to be at
 least 3.13.4

This commit changes the warning for CMake < 3.13.4 into a fatal error.
The intent is to revert and re-apply this simple commit until all build
bots are migrated to CMake >= 3.13.4.

This is part of the effort discussed on llvm-dev here:

	http://lists.llvm.org/pipermail/llvm-dev/2020-April/140578.html

Differential Revision: https://reviews.llvm.org/D78646
---
 llvm/CMakeLists.txt | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/llvm/CMakeLists.txt b/llvm/CMakeLists.txt
index d144266b20f0f..303b39221d9e6 100644
--- a/llvm/CMakeLists.txt
+++ b/llvm/CMakeLists.txt
@@ -3,11 +3,9 @@
 cmake_minimum_required(VERSION 3.4.3)
 
 if ("${CMAKE_VERSION}" VERSION_LESS "3.13.4")
-  message(WARNING
-    "Your CMake version is ${CMAKE_VERSION}. Starting with LLVM 12.0.0, the "
-    "minimum version of CMake required to build LLVM will become 3.13.4, and "
-    "using an older CMake will become an error. Please upgrade your CMake to "
-    "at least 3.13.4 now to avoid issues in the future!")
+  message(FATAL_ERROR
+    "Your CMake version is ${CMAKE_VERSION}. The minimum version of CMake "
+    "required to build LLVM is now 3.13.4.")
 endif()
 
 if(POLICY CMP0068)

From 86ae0dd7f754b8c95015d85a52593aae6e5c059f Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Wed, 15 Jul 2020 22:27:17 -0700
Subject: [PATCH 496/771] [MLIR] Add OpPrintingFlags to IRPrinterConfig.

- This will enable tweaking IR printing options when enabling printing (for ex,
  tweak elideLargeElementsAttrs to create smaller IR logs)

Differential Revision: https://reviews.llvm.org/D83930
---
 mlir/include/mlir/Pass/PassManager.h | 19 ++++++++++++++++---
 mlir/lib/Pass/IRPrinting.cpp         | 23 +++++++++++++++--------
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/mlir/include/mlir/Pass/PassManager.h b/mlir/include/mlir/Pass/PassManager.h
index 74fc77215434f..9cbfb0b277100 100644
--- a/mlir/include/mlir/Pass/PassManager.h
+++ b/mlir/include/mlir/Pass/PassManager.h
@@ -9,6 +9,7 @@
 #ifndef MLIR_PASS_PASSMANAGER_H
 #define MLIR_PASS_PASSMANAGER_H
 
+#include "mlir/IR/OperationSupport.h"
 #include "mlir/Support/LogicalResult.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
@@ -172,8 +173,11 @@ class PassManager : public OpPassManager {
     ///   pass, in the case of a non-failure, we should first check if any
     ///   potential mutations were made. This allows for reducing the number of
     ///   logs that don't contain meaningful changes.
-    explicit IRPrinterConfig(bool printModuleScope = false,
-                             bool printAfterOnlyOnChange = false);
+    /// * 'opPrintingFlags' sets up the printing flags to use when printing the
+    ///   IR.
+    explicit IRPrinterConfig(
+        bool printModuleScope = false, bool printAfterOnlyOnChange = false,
+        OpPrintingFlags opPrintingFlags = OpPrintingFlags());
     virtual ~IRPrinterConfig();
 
     /// A hook that may be overridden by a derived config that checks if the IR
@@ -197,6 +201,9 @@ class PassManager : public OpPassManager {
     /// "changed".
     bool shouldPrintAfterOnlyOnChange() const { return printAfterOnlyOnChange; }
 
+    /// Returns the printing flags to be used to print the IR.
+    OpPrintingFlags getOpPrintingFlags() const { return opPrintingFlags; }
+
   private:
     /// A flag that indicates if the IR should be printed at module scope.
     bool printModuleScope;
@@ -204,6 +211,9 @@ class PassManager : public OpPassManager {
     /// A flag that indicates that the IR after a pass should only be printed if
     /// a change is detected.
     bool printAfterOnlyOnChange;
+
+    /// Flags to control printing behavior.
+    OpPrintingFlags opPrintingFlags;
   };
 
   /// Add an instrumentation to print the IR before and after pass execution,
@@ -220,6 +230,8 @@ class PassManager : public OpPassManager {
   /// * 'printAfterOnlyOnChange' signals that when printing the IR after a
   ///   pass, in the case of a non-failure, we should first check if any
   ///   potential mutations were made.
+  /// * 'opPrintingFlags' sets up the printing flags to use when printing the
+  ///   IR.
   /// * 'out' corresponds to the stream to output the printed IR to.
   void enableIRPrinting(
       std::function<bool(Pass *, Operation *)> shouldPrintBeforePass =
@@ -227,7 +239,8 @@ class PassManager : public OpPassManager {
       std::function<bool(Pass *, Operation *)> shouldPrintAfterPass =
           [](Pass *, Operation *) { return true; },
       bool printModuleScope = true, bool printAfterOnlyOnChange = true,
-      raw_ostream &out = llvm::errs());
+      raw_ostream &out = llvm::errs(),
+      OpPrintingFlags opPrintingFlags = OpPrintingFlags());
 
   //===--------------------------------------------------------------------===//
   // Pass Timing
diff --git a/mlir/lib/Pass/IRPrinting.cpp b/mlir/lib/Pass/IRPrinting.cpp
index c27e47f8dc74d..7ae209765d1ac 100644
--- a/mlir/lib/Pass/IRPrinting.cpp
+++ b/mlir/lib/Pass/IRPrinting.cpp
@@ -141,7 +141,8 @@ void IRPrinterInstrumentation::runBeforePass(Pass *pass, Operation *op) {
 
   config->printBeforeIfEnabled(pass, op, [&](raw_ostream &out) {
     out << formatv("// *** IR Dump Before {0} ***", pass->getName());
-    printIR(op, config->shouldPrintAtModuleScope(), out, OpPrintingFlags());
+    printIR(op, config->shouldPrintAtModuleScope(), out,
+            config->getOpPrintingFlags());
     out << "\n\n";
   });
 }
@@ -165,7 +166,8 @@ void IRPrinterInstrumentation::runAfterPass(Pass *pass, Operation *op) {
 
   config->printAfterIfEnabled(pass, op, [&](raw_ostream &out) {
     out << formatv("// *** IR Dump After {0} ***", pass->getName());
-    printIR(op, config->shouldPrintAtModuleScope(), out, OpPrintingFlags());
+    printIR(op, config->shouldPrintAtModuleScope(), out,
+            config->getOpPrintingFlags());
     out << "\n\n";
   });
 }
@@ -190,9 +192,11 @@ void IRPrinterInstrumentation::runAfterPassFailed(Pass *pass, Operation *op) {
 
 /// Initialize the configuration.
 PassManager::IRPrinterConfig::IRPrinterConfig(bool printModuleScope,
-                                              bool printAfterOnlyOnChange)
+                                              bool printAfterOnlyOnChange,
+                                              OpPrintingFlags opPrintingFlags)
     : printModuleScope(printModuleScope),
-      printAfterOnlyOnChange(printAfterOnlyOnChange) {}
+      printAfterOnlyOnChange(printAfterOnlyOnChange),
+      opPrintingFlags(opPrintingFlags) {}
 PassManager::IRPrinterConfig::~IRPrinterConfig() {}
 
 /// A hook that may be overridden by a derived config that checks if the IR
@@ -223,8 +227,10 @@ struct BasicIRPrinterConfig : public PassManager::IRPrinterConfig {
   BasicIRPrinterConfig(
       std::function<bool(Pass *, Operation *)> shouldPrintBeforePass,
       std::function<bool(Pass *, Operation *)> shouldPrintAfterPass,
-      bool printModuleScope, bool printAfterOnlyOnChange, raw_ostream &out)
-      : IRPrinterConfig(printModuleScope, printAfterOnlyOnChange),
+      bool printModuleScope, bool printAfterOnlyOnChange,
+      OpPrintingFlags opPrintingFlags, raw_ostream &out)
+      : IRPrinterConfig(printModuleScope, printAfterOnlyOnChange,
+                        opPrintingFlags),
         shouldPrintBeforePass(shouldPrintBeforePass),
         shouldPrintAfterPass(shouldPrintAfterPass), out(out) {
     assert((shouldPrintBeforePass || shouldPrintAfterPass) &&
@@ -267,8 +273,9 @@ void PassManager::enableIRPrinting(std::unique_ptr<IRPrinterConfig> config) {
 void PassManager::enableIRPrinting(
     std::function<bool(Pass *, Operation *)> shouldPrintBeforePass,
     std::function<bool(Pass *, Operation *)> shouldPrintAfterPass,
-    bool printModuleScope, bool printAfterOnlyOnChange, raw_ostream &out) {
+    bool printModuleScope, bool printAfterOnlyOnChange, raw_ostream &out,
+    OpPrintingFlags opPrintingFlags) {
   enableIRPrinting(std::make_unique<BasicIRPrinterConfig>(
       std::move(shouldPrintBeforePass), std::move(shouldPrintAfterPass),
-      printModuleScope, printAfterOnlyOnChange, out));
+      printModuleScope, printAfterOnlyOnChange, opPrintingFlags, out));
 }

From 31248b4785c14e50c1694fc986c69afcea64bd81 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 16 Jul 2020 16:11:27 +0100
Subject: [PATCH 497/771] Last attempt for rG3a624c327add: one test fails with
 the NPM, so disable that one for now.

---
 clang/test/CodeGen/matrix-lowering-opt-levels.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/clang/test/CodeGen/matrix-lowering-opt-levels.c b/clang/test/CodeGen/matrix-lowering-opt-levels.c
index a288d18264bfe..db003f140ee87 100644
--- a/clang/test/CodeGen/matrix-lowering-opt-levels.c
+++ b/clang/test/CodeGen/matrix-lowering-opt-levels.c
@@ -1,6 +1,3 @@
-// REQUIRES: aarch64-registered-target
-
-// RUN: %clang -O0 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
 // RUN: %clang -O1 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
 // RUN: %clang -O2 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
 // RUN: %clang -O3 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
@@ -10,6 +7,10 @@
 
 // Smoke test that the matrix intrinsics are lowered at any optimisation level.
 
+// FIXME: this fails with the NPM:
+//
+// RUN: %clang -O0 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
+
 typedef float m4x4_t __attribute__((matrix_type(4, 4)));
 
 m4x4_t f(m4x4_t a, m4x4_t b, m4x4_t c) {

From 0160ad802e899c2922bc9b29564080c22eb0908c Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Thu, 16 Jul 2020 16:14:47 +0100
Subject: [PATCH 498/771] And now really disable that test.

---
 clang/test/CodeGen/matrix-lowering-opt-levels.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/test/CodeGen/matrix-lowering-opt-levels.c b/clang/test/CodeGen/matrix-lowering-opt-levels.c
index db003f140ee87..9edecbe46bc83 100644
--- a/clang/test/CodeGen/matrix-lowering-opt-levels.c
+++ b/clang/test/CodeGen/matrix-lowering-opt-levels.c
@@ -9,7 +9,7 @@
 
 // FIXME: this fails with the NPM:
 //
-// RUN: %clang -O0 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
+// %clang -O0 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
 
 typedef float m4x4_t __attribute__((matrix_type(4, 4)));
 

From 2e046be90e51df4978a72fb1665493ed3f57882e Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Thu, 16 Jul 2020 07:35:46 -0700
Subject: [PATCH 499/771] [flang] Adopt NoRegionArguments (WhereOp) and
 ParentOneOf (ResultOp) traits

Differential Revision: https://reviews.llvm.org/D83522
---
 .../include/flang/Optimizer/Dialect/FIROps.td |  6 ++--
 flang/lib/Optimizer/Dialect/FIROps.cpp        | 29 ++++---------------
 2 files changed, 10 insertions(+), 25 deletions(-)

diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td
index 58bf38aa8a4bc..0bc543882a268 100644
--- a/flang/include/flang/Optimizer/Dialect/FIROps.td
+++ b/flang/include/flang/Optimizer/Dialect/FIROps.td
@@ -1853,7 +1853,9 @@ def fir_LenParamIndexOp : fir_OneResultOp<"len_param_index", [NoSideEffect]> {
 // Fortran loops
 //===----------------------------------------------------------------------===//
 
-def fir_ResultOp : fir_Op<"result", [NoSideEffect, ReturnLike, Terminator]> {
+def fir_ResultOp : fir_Op<"result",
+    [NoSideEffect, ReturnLike, Terminator,
+     ParentOneOf<["WhereOp", "LoopOp", "IterWhileOp"]>]> {
   let summary = "special terminator for use in fir region operations";
 
   let description = [{
@@ -1970,7 +1972,7 @@ def fir_LoopOp : region_Op<"do_loop",
   }];
 }
 
-def fir_WhereOp : region_Op<"if"> {
+def fir_WhereOp : region_Op<"if", [NoRegionArguments]> {
   let summary = "if-then-else conditional operation";
   let description = [{
     Used to conditionally execute operations. This operation is the FIR
diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp
index 44310d6e06914..36334167184d5 100644
--- a/flang/lib/Optimizer/Dialect/FIROps.cpp
+++ b/flang/lib/Optimizer/Dialect/FIROps.cpp
@@ -968,19 +968,12 @@ static mlir::LogicalResult verify(fir::ResultOp op) {
   auto results = parentOp->getResults();
   auto operands = op.getOperands();
 
-  if (isa<fir::WhereOp>(parentOp) || isa<fir::LoopOp>(parentOp) ||
-      isa<fir::IterWhileOp>(parentOp)) {
-    if (parentOp->getNumResults() != op.getNumOperands())
-      return op.emitOpError() << "parent of result must have same arity";
-    for (auto e : llvm::zip(results, operands)) {
-      if (std::get<0>(e).getType() != std::get<1>(e).getType())
-        return op.emitOpError()
-               << "types mismatch between result op and its parent";
-    }
-  } else {
-    return op.emitOpError()
-           << "result only terminates if, do_loop, or iterate_while regions";
-  }
+  if (parentOp->getNumResults() != op.getNumOperands())
+    return op.emitOpError() << "parent of result must have same arity";
+  for (auto e : llvm::zip(results, operands))
+    if (std::get<0>(e).getType() != std::get<1>(e).getType())
+      return op.emitOpError()
+             << "types mismatch between result op and its parent";
   return success();
 }
 
@@ -1452,16 +1445,6 @@ static mlir::ParseResult parseWhereOp(OpAsmParser &parser,
 }
 
 static LogicalResult verify(fir::WhereOp op) {
-  // Verify that the entry of each child region does not have arguments.
-  for (auto &region : op.getOperation()->getRegions()) {
-    if (region.empty())
-      continue;
-
-    for (auto &b : region)
-      if (b.getNumArguments() != 0)
-        return op.emitOpError(
-            "requires that child entry blocks have no arguments");
-  }
   if (op.getNumResults() != 0 && op.otherRegion().empty())
     return op.emitOpError("must have an else block if defining values");
 

From 1360e140cc7bf46ec4beee232c1ce4f501a6d8d2 Mon Sep 17 00:00:00 2001
From: Vy Nguyen <vyng@google.com>
Date: Wed, 17 Jun 2020 22:45:31 -0400
Subject: [PATCH 500/771]     [llvm-exegesis] Add benchmark latency option on
 X86 that uses LBR for more precise measurements.

    Starting with Skylake, the LBR contains the precise number of cycles between the two
    consecutive branches.
    Making use of this will hopefully make the measurements more precise than the
    existing methods of using RDTSC.

            Differential Revision: https://reviews.llvm.org/D77422
---
 llvm/docs/CommandGuide/llvm-exegesis.rst      |  20 +-
 .../llvm-exegesis/X86/lbr/Inputs/mov_add.att  |   4 +
 .../tools/llvm-exegesis/X86/lbr/lit.local.cfg |  31 +++
 .../tools/llvm-exegesis/X86/lbr/mov-add.s     |  18 ++
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     |   5 +-
 llvm/tools/llvm-exegesis/lib/PerfHelper.cpp   |   6 +-
 llvm/tools/llvm-exegesis/lib/PerfHelper.h     |  15 +-
 .../llvm-exegesis/lib/X86/CMakeLists.txt      |   1 +
 llvm/tools/llvm-exegesis/lib/X86/Target.cpp   |  44 ++++
 .../llvm-exegesis/lib/X86/X86Counter.cpp      | 218 ++++++++++++++++++
 llvm/tools/llvm-exegesis/lib/X86/X86Counter.h |  53 +++++
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    |  13 ++
 12 files changed, 416 insertions(+), 12 deletions(-)
 create mode 100644 llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att
 create mode 100644 llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg
 create mode 100644 llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s
 create mode 100644 llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
 create mode 100644 llvm/tools/llvm-exegesis/lib/X86/X86Counter.h

diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index 321cdf5a6dab1..8cc1a237e9969 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -192,10 +192,24 @@ OPTIONS
 
 .. option:: -mode=[latency|uops|inverse_throughput|analysis]
 
- Specify the run mode. Note that if you pick `analysis` mode, you also need
- to specify at least one of the `-analysis-clusters-output-file=` and
- `-analysis-inconsistencies-output-file=`.
+ Specify the run mode. Note that some modes have additional requirements and options.
 
+ `latency` mode can be  make use of either RDTSC or LBR.
+ `latency[LBR]` is only available on X86 (at least `Skylake`).
+  To run in this mode, a positive value  must be specified for `x86-lbr-sample-period` and `--repetition-mode=loop`
+
+ In `analysis` mode, you also need to specify at least one of the
+ `-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`.
+
+.. option:: -x86-lbr-sample-period=<nBranches/sample>
+
+  Specify the LBR sampling period - how many branches before we take a sample.
+  When a positive value is specified for this option and when the mode is `latency`,
+  we will use LBRs for measuring.
+  On choosing the "right" sampling period, a small value is preferred, but throttling
+  could occur if the sampling is too frequent. A prime number should be used to
+  avoid consistently skipping certain blocks.
+  
 .. option:: -repetition-mode=[duplicate|loop|min]
 
  Specify the repetition mode. `duplicate` will create a large, straight line
diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att b/llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att
new file mode 100644
index 0000000000000..8f85b395e7319
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att
@@ -0,0 +1,4 @@
+# LLVM-EXEGESIS-LIVEIN RDI
+# LLVM-EXEGESIS-DEFREG XMM1 42
+movq $2, %rdi
+addq $0x10, %rdi
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg
new file mode 100644
index 0000000000000..431967c1ec9b0
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg
@@ -0,0 +1,31 @@
+import subprocess
+import lit.util
+
+if not ('X86' in config.root.targets):
+    # We need support for X86.
+    config.unsupported = True
+
+elif not ('x86_64' in config.root.host_triple):
+    # We need to be running on an X86 host.
+    config.unsupported = True
+    
+else:    
+    # We need libpfm to be installed and the host to be at least skylake.
+    llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir)
+    if not llvm_exegesis_exe:
+        print('llvm-exegesis not found')
+        config.unsupported = True
+    else:
+      try:
+          with open(os.devnull, 'w') as quiet:
+              check_llvm_exegesis_uops_result = subprocess.call(
+                [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
+              check_llvm_exegesis_latency_result = subprocess.call(
+                [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
+      except OSError:
+          print('could not exec llvm-exegesis')
+          config.unsupported = True
+      if not check_llvm_exegesis_uops_result == 0:
+        config.unsupported = True
+      if not check_llvm_exegesis_latency_result == 0:
+        config.unsupported = True
diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s b/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s
new file mode 100644
index 0000000000000..5f72e8f99b30d
--- /dev/null
+++ b/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s
@@ -0,0 +1,18 @@
+# RUN: llvm-exegesis -mode=latency --repetition-mode=loop --x86-lbr-sample-period=521 --snippets-file=%p/Inputs/mov_add.att
+
+
+CHECK:      ---
+CHECK-NEXT: mode: latency
+CHECK-NEXT: key:
+CHECK-NEXT:   instructions:
+CHECK-NEXT:     'MOV64ri32 RDI i_0x2'
+CHECK-NEXT:     'ADD64ri8 RDI RDI i_0x10'
+CHECK-NEXT: config: ''
+CHECK-NEXT: {{.*}}
+CHECK-NEXT: {{.*}}
+CHECK-NEXT: {{.*}}
+CHECK-NEXT: {{.*}}
+CHECK-NEXT: num_repetitions: 10000
+CHECK-NEXT: measurements:
+CHECK-NEXT: {{.*}} value: 0.0001, per_snippet_value: 0.0002 {{.*}}
+CHECK-LAST: ...
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index bdef8f8a89189..f015147b0fc2f 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -55,7 +55,6 @@ class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
   static void
   accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues,
                           llvm::SmallVector<int64_t, 4> *Result) {
-
     const size_t NumValues = std::max(NewValues.size(), Result->size());
     if (NumValues > Result->size())
       Result->resize(NumValues, 0);
@@ -106,10 +105,10 @@ class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
         if (Crashed)
           return make_error<SnippetCrash>("snippet crashed while running");
       }
-      auto ValueOrError = Counter->readOrError();
+
+      auto ValueOrError = Counter->readOrError(Function.getFunctionBytes());
       if (!ValueOrError)
         return ValueOrError.takeError();
-
       accumulateCounterValues(ValueOrError.get(), &CounterValues);
     }
     return CounterValues;
diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
index cba4846709e80..58e1f4dc2a2b2 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
@@ -128,7 +128,8 @@ int64_t Counter::read() const {
   return -1;
 }
 
-llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
+llvm::Expected<llvm::SmallVector<int64_t, 4>>
+Counter::readOrError(StringRef /*unused*/) const {
   int64_t Count = 0;
   ssize_t ReadSize = ::read(FileDescriptor, &Count, sizeof(Count));
   if (ReadSize != sizeof(Count))
@@ -152,7 +153,8 @@ void Counter::stop() {}
 
 int64_t Counter::read() const { return 42; }
 
-llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
+llvm::Expected<llvm::SmallVector<int64_t, 4>>
+Counter::readOrError(StringRef /*unused*/) const {
   return llvm::make_error<llvm::StringError>("Not implemented",
                                              llvm::errc::io_error);
 }
diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.h b/llvm/tools/llvm-exegesis/lib/PerfHelper.h
index d41b090e85f17..19a35595c9af7 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.h
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.h
@@ -59,8 +59,9 @@ class PerfEvent {
   // e.g. "snb_ep::INSTRUCTION_RETIRED:e=0:i=0:c=0:t=0:u=1:k=0:mg=0:mh=1"
   StringRef getPfmEventString() const;
 
-private:
-  const std::string EventString;
+protected:
+  PerfEvent() = default;
+  std::string EventString;
   std::string FullQualifiedEventString;
   perf_event_attr *Attr;
 };
@@ -87,11 +88,17 @@ class Counter {
   int64_t read() const;
 
   /// Returns the current value of the counter or error if it cannot be read.
-  virtual llvm::Expected<llvm::SmallVector<int64_t, 4>> readOrError() const;
+  /// FunctionBytes: The benchmark function being executed.
+  /// This is used to filter out the measurements to ensure they are only
+  /// within the benchmarked code.
+  /// If empty (or not specified), then no filtering will be done.
+  /// Not all counters choose to use this.
+  virtual llvm::Expected<llvm::SmallVector<int64_t, 4>>
+  readOrError(StringRef FunctionBytes = StringRef()) const;
 
   virtual int numValues() const;
 
-private:
+protected:
   PerfEvent Event;
 #ifdef HAVE_LIBPFM
   int FileDescriptor = -1;
diff --git a/llvm/tools/llvm-exegesis/lib/X86/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/X86/CMakeLists.txt
index 912877dd6ed1f..ce3bbd5908a83 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/CMakeLists.txt
+++ b/llvm/tools/llvm-exegesis/lib/X86/CMakeLists.txt
@@ -6,6 +6,7 @@ include_directories(
 add_library(LLVMExegesisX86
   STATIC
   Target.cpp
+  X86Counter.cpp
   )
 
 llvm_update_compile_flags(LLVMExegesisX86)
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 7a84f936e0d0e..327cddccb30f3 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -14,15 +14,40 @@
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
 #include "X86.h"
+#include "X86Counter.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/MC/MCInstBuilder.h"
+#include "llvm/Support/Errc.h"
+#include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
 
+#include <memory>
+#include <string>
+#include <vector>
+
 namespace llvm {
 namespace exegesis {
 
+static cl::OptionCategory
+    BenchmarkOptions("llvm-exegesis benchmark x86-options");
+
+// If a positive value is specified, we are going to use the LBR in
+// latency-mode.
+//
+// Note:
+//  -  A small value is preferred, but too low a value could result in
+//     throttling.
+//  -  A prime number is preferred to avoid always skipping certain blocks.
+//
+static cl::opt<unsigned> LbrSamplingPeriod(
+    "x86-lbr-sample-period",
+    cl::desc("The sample period (nbranches/sample), used for LBR sampling"),
+    cl::cat(BenchmarkOptions), cl::init(0));
+
+// FIXME: Validates that repetition-mode is loop if LBR is requested.
+
 // Returns a non-null reason if we cannot handle the memory references in this
 // instruction.
 static const char *isInvalidMemoryInstr(const Instruction &Instr) {
@@ -568,10 +593,29 @@ void ConstantInliner::initStack(unsigned Bytes) {
 #include "X86GenExegesis.inc"
 
 namespace {
+
 class ExegesisX86Target : public ExegesisTarget {
 public:
   ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters) {}
 
+  Expected<std::unique_ptr<pfm::Counter>>
+  createCounter(StringRef CounterName, const LLVMState &State) const override {
+    // If LbrSamplingPeriod was provided, then ignore the
+    // CounterName because we only have one for LBR.
+    if (LbrSamplingPeriod > 0) {
+      // Can't use LBR without HAVE_LIBPFM, or __linux__ (for now)
+#if defined(HAVE_LIBPFM) && defined(__linux__)
+      return std::make_unique<X86LbrCounter>(
+          X86LbrPerfEvent(LbrSamplingPeriod));
+#else
+      return llvm::make_error<llvm::StringError>(
+          "LBR counter requested without HAVE_LIBPFM or running on Linux.",
+          llvm::errc::invalid_argument);
+#endif
+    }
+    return ExegesisTarget::createCounter(CounterName, State);
+  }
+
 private:
   void addTargetSpecificPasses(PassManagerBase &PM) const override;
 
diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
new file mode 100644
index 0000000000000..c9f9264d7d2a1
--- /dev/null
+++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
@@ -0,0 +1,218 @@
+//===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "X86Counter.h"
+
+// FIXME: Use appropriate wrappers for poll.h and mman.h
+// to support Windows and remove this linux-only guard.
+#ifdef __linux__
+#include "llvm/Support/Endian.h"
+#include "llvm/Support/Errc.h"
+
+#ifdef HAVE_LIBPFM
+#include "perfmon/perf_event.h"
+#include "perfmon/pfmlib.h"
+#include "perfmon/pfmlib_perf_event.h"
+#endif // HAVE_LIBPFM
+
+#include <atomic>
+#include <cstddef>
+#include <cstdint>
+#include <limits>
+#include <memory>
+#include <vector>
+
+#include <poll.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+#ifdef HAVE_LIBPFM
+namespace llvm {
+namespace exegesis {
+
+static constexpr size_t kBufferPages = 8;
+static const size_t kDataBufferSize = kBufferPages * getpagesize();
+
+// Waits for the LBR perf events.
+static int pollLbrPerfEvent(const int FileDescriptor) {
+  struct pollfd PollFd;
+  PollFd.fd = FileDescriptor;
+  PollFd.events = POLLIN;
+  PollFd.revents = 0;
+  return poll(&PollFd, 1 /* num of fds */, 10000 /* timeout in ms */);
+}
+
+// Copies the data-buffer into Buf, given the pointer to MMapped.
+static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
+                           size_t DataSize) {
+  // First page is reserved for perf_event_mmap_page. Data buffer starts on
+  // the next page.
+  char *Start = reinterpret_cast<char *>(MMappedBuffer) + getpagesize();
+  // The LBR buffer is a cyclic buffer, we copy data to another buffer.
+  uint64_t Offset = Tail % kDataBufferSize;
+  size_t CopySize = kDataBufferSize - Offset;
+  memcpy(Buf, Start + Offset, CopySize);
+  if (CopySize >= DataSize)
+    return;
+
+  memcpy(Buf + CopySize, Start, Offset);
+  return;
+}
+
+// Parses the given data-buffer for stats and fill the CycleArray.
+// If data has been extracted successfully, also modifies the code to jump
+// out the benchmark loop.
+static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
+                                   const void *From, const void *To,
+                                   llvm::SmallVector<int64_t, 4> *CycleArray) {
+  assert(From != nullptr && To != nullptr);
+  const char *DataPtr = DataBuf;
+  while (DataPtr < DataBuf + DataSize) {
+    struct perf_event_header Header;
+    memcpy(&Header, DataPtr, sizeof(struct perf_event_header));
+    if (Header.type != PERF_RECORD_SAMPLE) {
+      // Ignores non-sample records.
+      DataPtr += Header.size;
+      continue;
+    }
+    DataPtr += sizeof(Header);
+    uint64_t Count = llvm::support::endian::read64(DataPtr, support::native);
+    DataPtr += sizeof(Count);
+
+    struct perf_branch_entry Entry;
+    memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
+    // Read the perf_branch_entry array.
+    for (uint64_t i = 0; i < Count; ++i) {
+      const uint64_t BlockStart = From == nullptr
+                                      ? std::numeric_limits<uint64_t>::min()
+                                      : reinterpret_cast<uint64_t>(From);
+      const uint64_t BlockEnd = To == nullptr
+                                    ? std::numeric_limits<uint64_t>::max()
+                                    : reinterpret_cast<uint64_t>(To);
+
+      if (BlockStart <= Entry.from && BlockEnd >= Entry.to)
+        CycleArray->push_back(Entry.cycles);
+
+      if (i == Count - 1)
+        // We've reached the last entry.
+        return llvm::Error::success();
+
+      // Advance to next entry
+      DataPtr += sizeof(Entry);
+      memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
+    }
+  }
+  return llvm::make_error<llvm::StringError>("Unable to parse databuffer.",
+                                             llvm::errc::io_error);
+}
+
+#ifdef HAVE_LIBPFM
+X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
+  assert(SamplingPeriod > 0 && "SamplingPeriod must be positive");
+  EventString = "BR_INST_RETIRED.NEAR_TAKEN";
+  Attr = new perf_event_attr();
+  Attr->size = sizeof(*Attr);
+  Attr->type = PERF_TYPE_RAW;
+  // FIXME This is SKL's encoding. Not sure if it'll change.
+  Attr->config = 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
+  Attr->sample_type = PERF_SAMPLE_BRANCH_STACK;
+  // Don't need to specify "USER" because we've already excluded HV and Kernel.
+  Attr->branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
+  Attr->sample_period = SamplingPeriod;
+  Attr->wakeup_events = 1; // We need this even when using ioctl REFRESH.
+  Attr->disabled = 1;
+  Attr->exclude_kernel = 1;
+  Attr->exclude_hv = 1;
+  Attr->read_format = PERF_FORMAT_GROUP;
+
+  FullQualifiedEventString = EventString;
+}
+#else
+X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
+  EventString = "";
+  Attr = nullptr;
+}
+#endif
+
+X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent)
+    : Counter(std::move(NewEvent)) {
+  // First page is reserved for perf_event_mmap_page. Data buffer starts on
+  // the next page, so we allocate one more page.
+  MMappedBuffer = mmap(nullptr, (kBufferPages + 1) * getpagesize(),
+                       PROT_READ | PROT_WRITE, MAP_SHARED, FileDescriptor, 0);
+  if (MMappedBuffer == MAP_FAILED)
+    llvm::errs() << "Failed to mmap buffer.";
+}
+
+X86LbrCounter::~X86LbrCounter() { close(FileDescriptor); }
+
+void X86LbrCounter::start() {
+  ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
+}
+
+llvm::Expected<llvm::SmallVector<int64_t, 4>>
+X86LbrCounter::readOrError(StringRef FunctionBytes) const {
+  // The max number of time-outs/retries before we give up.
+  static constexpr int kMaxTimeouts = 160;
+
+  // Disable the event before reading
+  ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
+
+  // Parses the LBR buffer and fills CycleArray with the sequence of cycle
+  // counts from the buffer.
+  llvm::SmallVector<int64_t, 4> CycleArray;
+  std::unique_ptr<char[]> DataBuf(new char[kDataBufferSize]);
+  int NumTimeouts = 0;
+  int PollResult = 0;
+
+  // Find the boundary of the function so that we could filter the LBRs
+  // to keep only the relevant records.
+  if (FunctionBytes.empty())
+    return llvm::make_error<llvm::StringError>("Empty function bytes",
+                                               llvm::errc::invalid_argument);
+  const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
+  const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
+                                                  FunctionBytes.size());
+  while (PollResult <= 0) {
+    PollResult = pollLbrPerfEvent(FileDescriptor);
+    if (PollResult > 0)
+      break;
+    if (PollResult == -1)
+      return llvm::make_error<llvm::StringError>("Cannot poll LBR perf event.",
+                                                 llvm::errc::io_error);
+    if (NumTimeouts++ >= kMaxTimeouts)
+      return llvm::make_error<llvm::StringError>(
+          "LBR polling still timed out after max number of attempts.",
+          llvm::errc::device_or_resource_busy);
+  }
+
+  struct perf_event_mmap_page Page;
+  memcpy(&Page, MMappedBuffer, sizeof(struct perf_event_mmap_page));
+
+  const uint64_t DataTail = Page.data_tail;
+  const uint64_t DataHead = Page.data_head;
+  // We're supposed to use a barrier after reading data_head.
+  std::atomic_thread_fence(std::memory_order_acq_rel);
+  const size_t DataSize = DataHead - DataTail;
+  if (DataSize > kDataBufferSize)
+    return llvm::make_error<llvm::StringError>(
+        "DataSize larger than buffer size.", llvm::errc::invalid_argument);
+
+  copyDataBuffer(MMappedBuffer, DataBuf.get(), DataTail, DataSize);
+  llvm::Error error =
+      parseDataBuffer(DataBuf.get(), DataSize, From, To, &CycleArray);
+  if (!error)
+    return CycleArray;
+  return std::move(error);
+}
+
+} // namespace exegesis
+} // namespace llvm
+
+#endif // HAVE_LIBPFM
+#endif // __linux__
diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h
new file mode 100644
index 0000000000000..4409d7c93a03a
--- /dev/null
+++ b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h
@@ -0,0 +1,53 @@
+//===-- X86Counter.h --------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Perf counter that reads the LBRs for measuring the benchmarked block's
+/// throughput.
+///
+/// More info at: https://lwn.net/Articles/680985
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H
+#define LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H
+
+#include "../PerfHelper.h"
+#include "llvm/Support/Error.h"
+
+// FIXME: Use appropriate wrappers for poll.h and mman.h
+// to support Windows and remove this linux-only guard.
+#if defined(__linux__) && defined(HAVE_LIBPFM)
+
+namespace llvm {
+namespace exegesis {
+
+class X86LbrPerfEvent : public pfm::PerfEvent {
+public:
+  X86LbrPerfEvent(unsigned SamplingPeriod);
+};
+
+class X86LbrCounter : public pfm::Counter {
+public:
+  explicit X86LbrCounter(pfm::PerfEvent &&Event);
+
+  virtual ~X86LbrCounter();
+
+  void start() override;
+
+  llvm::Expected<llvm::SmallVector<int64_t, 4>>
+  readOrError(StringRef FunctionBytes) const override;
+
+private:
+  void *MMappedBuffer = nullptr;
+};
+
+} // namespace exegesis
+} // namespace llvm
+
+#endif // defined(__linux__) && defined(HAVE_LIBPFM)
+
+#endif // LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 507015b97472b..8eeda48823859 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -160,6 +160,12 @@ static cl::opt<std::string>
                                       cl::desc(""), cl::cat(AnalysisOptions),
                                       cl::init(""));
 
+static cl::list<std::string>
+    AllowedHostCpus("allowed-host-cpu",
+                    cl::desc("If specified, only run the benchmark if the host "
+                             "CPU matches the names"),
+                    cl::cat(Options), cl::ZeroOrMore);
+
 static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
     "analysis-display-unstable-clusters",
     cl::desc("if there is more than one benchmark for an opcode, said "
@@ -296,6 +302,13 @@ void benchmarkMain() {
 
   const LLVMState State(CpuName);
 
+  llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU();
+  for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end();
+       ++Begin) {
+    if (ActualCpu != *Begin)
+      ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu));
+  }
+
   const std::unique_ptr<BenchmarkRunner> Runner =
       ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
           BenchmarkMode, State, ResultAggMode));

From c74cfd40452d68966c0f22f85773aeef32118974 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 16 Jul 2020 16:14:13 +0000
Subject: [PATCH 501/771] [gn build] Port 1360e140cc7

---
 .../gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
index c93827d8cbd7d..bc242b4136d73 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
@@ -15,8 +15,9 @@ static_library("X86") {
     "//llvm/lib/Target/X86/MCTargetDesc",
   ]
   sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
     "Target.cpp",
+    "X86Counter.cpp",  # Make `gn format` not collapse this, for
+                       # sync_source_lists_from_cmake.py.
   ]
   include_dirs = [ "//llvm/lib/Target/X86" ]
 }

From 311fafd2c90aed5b3fed9566503eebe629f1e979 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Thu, 16 Jul 2020 15:42:05 +0100
Subject: [PATCH 502/771] [BasicAA] Fix -basicaa-recphi for geps with negative
 offsets

As shown in D82998, the basic-aa-recphi option can cause miscompiles for
gep's with negative constants. The option checks for recursive phi, that
recurse through a contant gep. If it finds one, it performs aliasing
calculations using the other phi operands with an unknown size, to
specify that an unknown number of elements after the initial value are
potentially accessed. This works fine expect where the constant is
negative, as the size is still considered to be positive. So this patch
expands the check to make sure that the constant is also positive.

Differential Revision: https://reviews.llvm.org/D83576
---
 llvm/lib/Analysis/BasicAliasAnalysis.cpp | 55 +++++++++++++-----------
 llvm/test/Analysis/BasicAA/recphi.ll     | 10 ++---
 2 files changed, 34 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
index 74664098ce1d4..33f122728d2aa 100644
--- a/llvm/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/llvm/lib/Analysis/BasicAliasAnalysis.cpp
@@ -1648,8 +1648,32 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
     }
 
   SmallVector<Value *, 4> V1Srcs;
+  // For a recursive phi, that recurses through a contant gep, we can perform
+  // aliasing calculations using the other phi operands with an unknown size to
+  // specify that an unknown number of elements after the initial value are
+  // potentially accessed.
   bool isRecursive = false;
-  if (PV)  {
+  auto CheckForRecPhi = [&](Value *PV) {
+    if (!EnableRecPhiAnalysis)
+      return false;
+    if (GEPOperator *PVGEP = dyn_cast<GEPOperator>(PV)) {
+      // Check whether the incoming value is a GEP that advances the pointer
+      // result of this PHI node (e.g. in a loop). If this is the case, we
+      // would recurse and always get a MayAlias. Handle this case specially
+      // below. We need to ensure that the phi is inbounds and has a constant
+      // positive operand so that we can check for alias with the initial value
+      // and an unknown but positive size.
+      if (PVGEP->getPointerOperand() == PN && PVGEP->isInBounds() &&
+          PVGEP->getNumIndices() == 1 && isa<ConstantInt>(PVGEP->idx_begin()) &&
+          !cast<ConstantInt>(PVGEP->idx_begin())->isNegative()) {
+        isRecursive = true;
+        return true;
+      }
+    }
+    return false;
+  };
+
+  if (PV) {
     // If we have PhiValues then use it to get the underlying phi values.
     const PhiValues::ValueSet &PhiValueSet = PV->getValuesForPhi(PN);
     // If we have more phi values than the search depth then return MayAlias
@@ -1660,19 +1684,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
       return MayAlias;
     // Add the values to V1Srcs
     for (Value *PV1 : PhiValueSet) {
-      if (EnableRecPhiAnalysis) {
-        if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
-          // Check whether the incoming value is a GEP that advances the pointer
-          // result of this PHI node (e.g. in a loop). If this is the case, we
-          // would recurse and always get a MayAlias. Handle this case specially
-          // below.
-          if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
-              isa<ConstantInt>(PV1GEP->idx_begin())) {
-            isRecursive = true;
-            continue;
-          }
-        }
-      }
+      if (CheckForRecPhi(PV1))
+        continue;
       V1Srcs.push_back(PV1);
     }
   } else {
@@ -1687,18 +1700,8 @@ AliasResult BasicAAResult::aliasPHI(const PHINode *PN, LocationSize PNSize,
         // and 'n' are the number of PHI sources.
         return MayAlias;
 
-      if (EnableRecPhiAnalysis)
-        if (GEPOperator *PV1GEP = dyn_cast<GEPOperator>(PV1)) {
-          // Check whether the incoming value is a GEP that advances the pointer
-          // result of this PHI node (e.g. in a loop). If this is the case, we
-          // would recurse and always get a MayAlias. Handle this case specially
-          // below.
-          if (PV1GEP->getPointerOperand() == PN && PV1GEP->getNumIndices() == 1 &&
-              isa<ConstantInt>(PV1GEP->idx_begin())) {
-            isRecursive = true;
-            continue;
-          }
-        }
+      if (CheckForRecPhi(PV1))
+        continue;
 
       if (UniqueSrc.insert(PV1).second)
         V1Srcs.push_back(PV1);
diff --git a/llvm/test/Analysis/BasicAA/recphi.ll b/llvm/test/Analysis/BasicAA/recphi.ll
index 3c8397cc0fec7..dfc88937bf699 100644
--- a/llvm/test/Analysis/BasicAA/recphi.ll
+++ b/llvm/test/Analysis/BasicAA/recphi.ll
@@ -92,8 +92,8 @@ if.end: ; preds = %f.exit
 ; CHECK:         NoAlias:      i32* %arrayidx1, i8* %0
 ; CHECK:         NoAlias:      i32* %arrayidx, i32* %arrayidx1
 ; CHECK:         MayAlias:     [10 x i32]* %tab, i32* %p.addr.05.i
-; CHECK:         NoAlias:      i32* %p.addr.05.i, i8* %0
-; CHECK:         NoAlias:      i32* %arrayidx, i32* %p.addr.05.i
+; CHECK:         MayAlias:     i32* %p.addr.05.i, i8* %0
+; CHECK:         MayAlias:     i32* %arrayidx, i32* %p.addr.05.i
 ; CHECK:         MayAlias:     i32* %arrayidx1, i32* %p.addr.05.i
 ; CHECK:         MayAlias:     [10 x i32]* %tab, i32* %incdec.ptr.i
 ; CHECK:         MayAlias:     i32* %incdec.ptr.i, i8* %0
@@ -141,17 +141,17 @@ if.end: ; preds = %f.exit
 ; CHECK:         NoAlias:      [3 x i16]* %int_arr.10, i16** %argv.6.par
 ; CHECK:         NoAlias:      i16* %_tmp1, i16** %argv.6.par
 ; CHECK:         PartialAlias: [3 x i16]* %int_arr.10, i16* %_tmp1
-; CHECK:         NoAlias:      i16* %ls1.9.0, i16** %argv.6.par
+; CHECK:         MayAlias:     i16* %ls1.9.0, i16** %argv.6.par
 ; CHECK:         MayAlias:     [3 x i16]* %int_arr.10, i16* %ls1.9.0
 ; CHECK:         MayAlias:     i16* %_tmp1, i16* %ls1.9.0
-; CHECK:         NoAlias:      i16* %_tmp7, i16** %argv.6.par
+; CHECK:         MayAlias:     i16* %_tmp7, i16** %argv.6.par
 ; CHECK:         MayAlias:     [3 x i16]* %int_arr.10, i16* %_tmp7
 ; CHECK:         MayAlias:     i16* %_tmp1, i16* %_tmp7
 ; CHECK:         NoAlias:      i16* %_tmp7, i16* %ls1.9.0
 ; CHECK:         NoAlias:      i16* %_tmp11, i16** %argv.6.par
 ; CHECK:         PartialAlias: [3 x i16]* %int_arr.10, i16* %_tmp11
 ; CHECK:         NoAlias:      i16* %_tmp1, i16* %_tmp11
-; CHECK:         NoAlias:      i16* %_tmp11, i16* %ls1.9.0
+; CHECK:         MayAlias:     i16* %_tmp11, i16* %ls1.9.0
 ; CHECK:         MayAlias:     i16* %_tmp11, i16* %_tmp7
 ; CHECK:         Both ModRef:  Ptr: i16** %argv.6.par  <->  %_tmp16 = call i16 @call(i32 %_tmp13)
 ; CHECK:         NoModRef:  Ptr: [3 x i16]* %int_arr.10        <->  %_tmp16 = call i16 @call(i32 %_tmp13)

From 764931d248d5bc27d624cd920fdf1654b29a3cb5 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Tue, 14 Jul 2020 18:27:51 -0700
Subject: [PATCH 503/771] [MLIR][TableGen] Add default value for named
 attributes for 2 more build methods

- Added more default values for `attributes` parameter for 2 more build methods
- Extend the op-decls.td unit test to test these build methods.

Differential Revision: https://reviews.llvm.org/D83839
---
 mlir/test/mlir-tblgen/op-decl.td            | 24 +++++++++++++++++++++
 mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 19 +++++++++-------
 2 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/mlir/test/mlir-tblgen/op-decl.td b/mlir/test/mlir-tblgen/op-decl.td
index f8ff60e355574..a30de0959d579 100644
--- a/mlir/test/mlir-tblgen/op-decl.td
+++ b/mlir/test/mlir-tblgen/op-decl.td
@@ -221,6 +221,30 @@ def NS_HCollectiveParamsSuppress2Op : NS_Op<"op_collective_suppress2", [SameVari
 // CHECK-NOT: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> b, ::mlir::ValueRange a);
 // CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {});
 
+// Check default value of `attributes` for the `genUseOperandAsResultTypeCollectiveParamBuilder` builder
+def NS_IOp : NS_Op<"op_with_same_operands_and_result_types_trait", [SameOperandsAndResultType]> {
+  let arguments = (ins AnyType:$a, AnyType:$b);
+  let results = (outs AnyType:$r);
+}
+// CHECK_LABEL: class NS_IOp :
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type r, ::mlir::Value a, ::mlir::Value b);
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::Value a, ::mlir::Value b);
+// CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {});
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Value a, ::mlir::Value b);
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {});
+
+// Check default value of `attributes` for the `genInferredTypeCollectiveParamBuilder` builder
+def NS_JOp : NS_Op<"op_with_InferTypeOpInterface_interface", [DeclareOpInterfaceMethods<InferTypeOpInterface>]> {
+  let arguments = (ins AnyType:$a, AnyType:$b);
+  let results = (outs AnyType:$r);
+}
+// CHECK_LABEL: class NS_JOp :
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Type r, ::mlir::Value a, ::mlir::Value b);
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::Value a, ::mlir::Value b);
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::Value a, ::mlir::Value b);
+// CHECK: static void build(::mlir::OpBuilder &, ::mlir::OperationState &odsState, ::llvm::ArrayRef<::mlir::Type> resultTypes, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {});
+// CHECK: static void build(::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &odsState, ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> attributes = {});
+
 // Check that default builders can be suppressed.
 // ---
 
diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
index 08035a95a0a13..989008d53f9fa 100644
--- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
+++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp
@@ -1026,8 +1026,12 @@ void OpEmitter::genUseOperandAsResultTypeCollectiveParamBuilder() {
       builderOpState +
       ", ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> "
       "attributes";
-  if (op.getNumVariadicRegions())
+  if (op.getNumVariadicRegions()) {
     params += ", unsigned numRegions";
+  } else {
+    // Provide default value for `attributes` since its the last parameter
+    params += " = {}";
+  }
   auto &m = opClass.newMethod("void", "build", params, OpMethod::MP_Static);
   auto &body = m.body();
 
@@ -1053,13 +1057,12 @@ void OpEmitter::genUseOperandAsResultTypeCollectiveParamBuilder() {
 
 void OpEmitter::genInferredTypeCollectiveParamBuilder() {
   // TODO: Expand to support regions.
-  const char *params =
-      "::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &{0}, "
-      "::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> "
-      "attributes";
-  auto &m =
-      opClass.newMethod("void", "build", formatv(params, builderOpState).str(),
-                        OpMethod::MP_Static);
+  std::string params =
+      std::string("::mlir::OpBuilder &odsBuilder, ::mlir::OperationState &") +
+      builderOpState +
+      ", ::mlir::ValueRange operands, ::llvm::ArrayRef<::mlir::NamedAttribute> "
+      "attributes = {}";
+  auto &m = opClass.newMethod("void", "build", params, OpMethod::MP_Static);
   auto &body = m.body();
 
   int numResults = op.getNumResults();

From 3c6a518a2fd2f85bb3e0e53a02c56234affccf94 Mon Sep 17 00:00:00 2001
From: Rahul Joshi <jurahul@google.com>
Date: Tue, 14 Jul 2020 22:34:26 -0700
Subject: [PATCH 504/771] [NFC] Use appropriate names for `for_each` and
 `transform` template parameters

Differential Revision: https://reviews.llvm.org/D83848
---
 llvm/include/llvm/ADT/STLExtras.h | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/ADT/STLExtras.h b/llvm/include/llvm/ADT/STLExtras.h
index 50b688b366489..eed676bb74e14 100644
--- a/llvm/include/llvm/ADT/STLExtras.h
+++ b/llvm/include/llvm/ADT/STLExtras.h
@@ -1480,9 +1480,9 @@ auto size(R &&Range,
 
 /// Provide wrappers to std::for_each which take ranges instead of having to
 /// pass begin/end explicitly.
-template <typename R, typename UnaryPredicate>
-UnaryPredicate for_each(R &&Range, UnaryPredicate P) {
-  return std::for_each(adl_begin(Range), adl_end(Range), P);
+template <typename R, typename UnaryFunction>
+UnaryFunction for_each(R &&Range, UnaryFunction F) {
+  return std::for_each(adl_begin(Range), adl_end(Range), F);
 }
 
 /// Provide wrappers to std::all_of which take ranges instead of having to pass
@@ -1577,9 +1577,9 @@ auto count_if(R &&Range, UnaryPredicate P) {
 
 /// Wrapper function around std::transform to apply a function to a range and
 /// store the result elsewhere.
-template <typename R, typename OutputIt, typename UnaryPredicate>
-OutputIt transform(R &&Range, OutputIt d_first, UnaryPredicate P) {
-  return std::transform(adl_begin(Range), adl_end(Range), d_first, P);
+template <typename R, typename OutputIt, typename UnaryFunction>
+OutputIt transform(R &&Range, OutputIt d_first, UnaryFunction F) {
+  return std::transform(adl_begin(Range), adl_end(Range), d_first, F);
 }
 
 /// Provide wrappers to std::partition which take ranges instead of having to

From 4f244c4b42b096a55f2e7f719e1101c6fd26c034 Mon Sep 17 00:00:00 2001
From: Dmitri Gribenko <gribozavr@gmail.com>
Date: Thu, 16 Jul 2020 18:14:59 +0200
Subject: [PATCH 505/771] Use TestClangConfig in AST Matchers tests and run
 them in more configurations

Summary:
I am changing tests for AST Matchers to run in multiple language standards
versions, and under multiple triples that have different behavior with regards
to templates. This change is similar to https://reviews.llvm.org/D82179.

To keep the size of the patch manageable, in this patch I'm only migrating one
file to get the process started and get feedback on this approach.

Reviewers: ymandel

Reviewed By: ymandel

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83868
---
 clang/include/clang/Testing/TestClangConfig.h |    2 +
 .../ASTMatchers/ASTMatchersNarrowingTest.cpp  | 1425 +++++++++++++----
 clang/unittests/ASTMatchers/ASTMatchersTest.h |    5 -
 3 files changed, 1084 insertions(+), 348 deletions(-)

diff --git a/clang/include/clang/Testing/TestClangConfig.h b/clang/include/clang/Testing/TestClangConfig.h
index eefa36dc2ebb9..5d6be4f65d0ad 100644
--- a/clang/include/clang/Testing/TestClangConfig.h
+++ b/clang/include/clang/Testing/TestClangConfig.h
@@ -51,6 +51,8 @@ struct TestClangConfig {
     return Language == Lang_CXX17 || Language == Lang_CXX20;
   }
 
+  bool isCXX20OrLater() const { return Language == Lang_CXX20; }
+
   bool supportsCXXDynamicExceptionSpecification() const {
     return Language == Lang_CXX03 || Language == Lang_CXX11 ||
            Language == Lang_CXX14;
diff --git a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
index c249410201ba9..36e92c632c03a 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
+++ b/clang/unittests/ASTMatchers/ASTMatchersNarrowingTest.cpp
@@ -18,7 +18,7 @@
 namespace clang {
 namespace ast_matchers {
 
-TEST(IsExpandedFromMacro, ShouldMatchInFile) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesInFile) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
     void Test() { MY_MACRO(4); }
@@ -26,7 +26,7 @@ TEST(IsExpandedFromMacro, ShouldMatchInFile) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("MY_MACRO"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchNested) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesNested) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
 #define WRAPPER(a) MY_MACRO(a)
@@ -35,7 +35,7 @@ TEST(IsExpandedFromMacro, ShouldMatchNested) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("MY_MACRO"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchIntermediate) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesIntermediate) {
   StringRef input = R"cc(
 #define IMPL(a) (4 + (a))
 #define MY_MACRO(a) IMPL(a)
@@ -45,7 +45,7 @@ TEST(IsExpandedFromMacro, ShouldMatchIntermediate) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("MY_MACRO"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchTransitive) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesTransitive) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
 #define WRAPPER(a) MY_MACRO(a)
@@ -54,7 +54,7 @@ TEST(IsExpandedFromMacro, ShouldMatchTransitive) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("WRAPPER"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchArgument) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesArgument) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
     void Test() {
@@ -65,9 +65,9 @@ TEST(IsExpandedFromMacro, ShouldMatchArgument) {
   EXPECT_TRUE(matches(input, declRefExpr(isExpandedFromMacro("MY_MACRO"))));
 }
 
-// Like IsExpandedFromMacroShouldMatchArgumentMacro, but the argument is itself
-// a macro.
-TEST(IsExpandedFromMacro, ShouldMatchArgumentMacroExpansion) {
+// Like IsExpandedFromMacro_MatchesArgument, but the argument is itself a
+// macro.
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesArgumentMacroExpansion) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
 #define IDENTITY(a) (a)
@@ -78,7 +78,7 @@ TEST(IsExpandedFromMacro, ShouldMatchArgumentMacroExpansion) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("IDENTITY"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchWhenInArgument) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesWhenInArgument) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
 #define IDENTITY(a) (a)
@@ -89,7 +89,7 @@ TEST(IsExpandedFromMacro, ShouldMatchWhenInArgument) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("MY_MACRO"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchObjectMacro) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_MatchesObjectMacro) {
   StringRef input = R"cc(
 #define PLUS (2 + 2)
     void Test() {
@@ -99,7 +99,7 @@ TEST(IsExpandedFromMacro, ShouldMatchObjectMacro) {
   EXPECT_TRUE(matches(input, binaryOperator(isExpandedFromMacro("PLUS"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldMatchFromCommandLine) {
+TEST(IsExpandedFromMacro, MatchesFromCommandLine) {
   StringRef input = R"cc(
     void Test() { FOUR_PLUS_FOUR; }
   )cc";
@@ -108,7 +108,7 @@ TEST(IsExpandedFromMacro, ShouldMatchFromCommandLine) {
       {"-std=c++11", "-DFOUR_PLUS_FOUR=4+4"}));
 }
 
-TEST(IsExpandedFromMacro, ShouldNotMatchBeginOnly) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_NotMatchesBeginOnly) {
   StringRef input = R"cc(
 #define ONE_PLUS 1+
   void Test() { ONE_PLUS 4; }
@@ -117,7 +117,7 @@ TEST(IsExpandedFromMacro, ShouldNotMatchBeginOnly) {
       notMatches(input, binaryOperator(isExpandedFromMacro("ONE_PLUS"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldNotMatchEndOnly) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_NotMatchesEndOnly) {
   StringRef input = R"cc(
 #define PLUS_ONE +1
   void Test() { 4 PLUS_ONE; }
@@ -126,7 +126,7 @@ TEST(IsExpandedFromMacro, ShouldNotMatchEndOnly) {
       notMatches(input, binaryOperator(isExpandedFromMacro("PLUS_ONE"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldNotMatchDifferentMacro) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_NotMatchesDifferentMacro) {
   StringRef input = R"cc(
 #define MY_MACRO(a) (4 + (a))
     void Test() { MY_MACRO(4); }
@@ -134,7 +134,7 @@ TEST(IsExpandedFromMacro, ShouldNotMatchDifferentMacro) {
   EXPECT_TRUE(notMatches(input, binaryOperator(isExpandedFromMacro("OTHER"))));
 }
 
-TEST(IsExpandedFromMacro, ShouldNotMatchDifferentInstances) {
+TEST_P(ASTMatchersTest, IsExpandedFromMacro_NotMatchesDifferentInstances) {
   StringRef input = R"cc(
 #define FOUR 4
     void Test() { FOUR + FOUR; }
@@ -142,10 +142,10 @@ TEST(IsExpandedFromMacro, ShouldNotMatchDifferentInstances) {
   EXPECT_TRUE(notMatches(input, binaryOperator(isExpandedFromMacro("FOUR"))));
 }
 
-TEST(AllOf, AllOverloadsWork) {
+TEST_P(ASTMatchersTest, AllOf) {
   const char Program[] = "struct T { };"
-                         "int f(int, T*, int, int);"
-                         "void g(int x) { T t; f(x, &t, 3, 4); }";
+                         "int f(int, struct T*, int, int);"
+                         "void g(int x) { struct T t; f(x, &t, 3, 4); }";
   EXPECT_TRUE(matches(
       Program, callExpr(allOf(callee(functionDecl(hasName("f"))),
                               hasArgument(0, declRefExpr(to(varDecl())))))));
@@ -170,7 +170,12 @@ TEST(AllOf, AllOverloadsWork) {
                    hasArgument(3, integerLiteral(equals(4)))))));
 }
 
-TEST(DeclarationMatcher, MatchHas) {
+TEST_P(ASTMatchersTest, Has) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `has()` that does not depend on C++.
+    return;
+  }
+
   DeclarationMatcher HasClassX = recordDecl(has(recordDecl(hasName("X"))));
   EXPECT_TRUE(matches("class Y { class X {}; };", HasClassX));
   EXPECT_TRUE(matches("class X {};", HasClassX));
@@ -182,7 +187,11 @@ TEST(DeclarationMatcher, MatchHas) {
   EXPECT_TRUE(notMatches("class Y { class Z { class X {}; }; };", YHasClassX));
 }
 
-TEST(DeclarationMatcher, MatchHasRecursiveAllOf) {
+TEST_P(ASTMatchersTest, Has_RecursiveAllOf) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   DeclarationMatcher Recursive =
       recordDecl(has(recordDecl(has(recordDecl(hasName("X"))),
                                 has(recordDecl(hasName("Y"))), hasName("Z"))),
@@ -235,7 +244,11 @@ TEST(DeclarationMatcher, MatchHasRecursiveAllOf) {
                       Recursive));
 }
 
-TEST(DeclarationMatcher, MatchHasRecursiveAnyOf) {
+TEST_P(ASTMatchersTest, Has_RecursiveAnyOf) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   DeclarationMatcher Recursive = recordDecl(
       anyOf(has(recordDecl(anyOf(has(recordDecl(hasName("X"))),
                                  has(recordDecl(hasName("Y"))), hasName("Z")))),
@@ -254,7 +267,12 @@ TEST(DeclarationMatcher, MatchHasRecursiveAnyOf) {
                       Recursive));
 }
 
-TEST(DeclarationMatcher, MatchNot) {
+TEST_P(ASTMatchersTest, Unless) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `unless()` that does not depend on C++.
+    return;
+  }
+
   DeclarationMatcher NotClassX =
       cxxRecordDecl(isDerivedFrom("Y"), unless(hasName("X")));
   EXPECT_TRUE(notMatches("", NotClassX));
@@ -277,7 +295,7 @@ TEST(DeclarationMatcher, MatchNot) {
   EXPECT_TRUE(notMatches("struct Foo {};", NamedNotRecord));
 }
 
-TEST(CastExpression, HasCastKind) {
+TEST_P(ASTMatchersTest, HasCastKind) {
   EXPECT_TRUE(
       matches("char *p = 0;",
               traverse(ast_type_traits::TK_AsIs,
@@ -292,7 +310,12 @@ TEST(CastExpression, HasCastKind) {
                varDecl(has(implicitCastExpr(hasCastKind(CK_NullToPointer)))))));
 }
 
-TEST(DeclarationMatcher, HasDescendant) {
+TEST_P(ASTMatchersTest, HasDescendant) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `hasDescendant()` that does not depend on C++.
+    return;
+  }
+
   DeclarationMatcher ZDescendantClassX =
       recordDecl(hasDescendant(recordDecl(hasName("X"))), hasName("Z"));
   EXPECT_TRUE(matches("class Z { class X {}; };", ZDescendantClassX));
@@ -346,13 +369,13 @@ TEST(DeclarationMatcher, HasDescendant) {
                       ZDescendantClassXDescendantClassY));
 }
 
-TEST(DeclarationMatcher, HasDescendantMemoization) {
+TEST_P(ASTMatchersTest, HasDescendant_Memoization) {
   DeclarationMatcher CannotMemoize =
       decl(hasDescendant(typeLoc().bind("x")), has(decl()));
   EXPECT_TRUE(matches("void f() { int i; }", CannotMemoize));
 }
 
-TEST(DeclarationMatcher, HasDescendantMemoizationUsesRestrictKind) {
+TEST_P(ASTMatchersTest, HasDescendant_MemoizationUsesRestrictKind) {
   auto Name = hasName("i");
   auto VD = internal::Matcher<VarDecl>(Name).dynCastTo<Decl>();
   auto RD = internal::Matcher<RecordDecl>(Name).dynCastTo<Decl>();
@@ -366,7 +389,11 @@ TEST(DeclarationMatcher, HasDescendantMemoizationUsesRestrictKind) {
                       decl(anyOf(hasDescendant(RD), hasDescendant(VD)))));
 }
 
-TEST(DeclarationMatcher, HasAncestorMemoization) {
+TEST_P(ASTMatchersTest, HasAncestor_Memoization) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   // This triggers an hasAncestor with a TemplateArgument in the bound nodes.
   // That node can't be memoized so we have to check for it before trying to put
   // it on the cache.
@@ -379,13 +406,18 @@ TEST(DeclarationMatcher, HasAncestorMemoization) {
                          CannotMemoize));
 }
 
-TEST(DeclarationMatcher, HasAttr) {
+TEST_P(ASTMatchersTest, HasAttr) {
   EXPECT_TRUE(matches("struct __attribute__((warn_unused)) X {};",
                       decl(hasAttr(clang::attr::WarnUnused))));
   EXPECT_FALSE(matches("struct X {};", decl(hasAttr(clang::attr::WarnUnused))));
 }
 
-TEST(DeclarationMatcher, MatchAnyOf) {
+TEST_P(ASTMatchersTest, AnyOf) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `anyOf()` that does not depend on C++.
+    return;
+  }
+
   DeclarationMatcher YOrZDerivedFromX = cxxRecordDecl(
       anyOf(hasName("Y"), allOf(isDerivedFrom("X"), hasName("Z"))));
   EXPECT_TRUE(matches("class X {}; class Z : public X {};", YOrZDerivedFromX));
@@ -418,7 +450,11 @@ TEST(DeclarationMatcher, MatchAnyOf) {
               cxxCatchStmt(anyOf(hasDescendant(varDecl()), isCatchAll()))));
 }
 
-TEST(DeclarationMatcher, ClassIsDerived) {
+TEST_P(ASTMatchersTest, IsDerivedFrom) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   DeclarationMatcher IsDerivedFromX = cxxRecordDecl(isDerivedFrom("X"));
 
   EXPECT_TRUE(matches("class X {}; class Y : public X {};", IsDerivedFromX));
@@ -664,14 +700,18 @@ TEST(DeclarationMatcher, ClassIsDerived) {
                       cxxRecordDecl(isDerivedFrom(namedDecl(hasName("X"))))));
 }
 
-TEST(DeclarationMatcher, IsDerivedFromEmptyName) {
+TEST_P(ASTMatchersTest, IsDerivedFrom_EmptyName) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   const char *const Code = "class X {}; class Y : public X {};";
   EXPECT_TRUE(notMatches(Code, cxxRecordDecl(isDerivedFrom(""))));
   EXPECT_TRUE(notMatches(Code, cxxRecordDecl(isDirectlyDerivedFrom(""))));
   EXPECT_TRUE(notMatches(Code, cxxRecordDecl(isSameOrDerivedFrom(""))));
 }
 
-TEST(DeclarationMatcher, ObjCClassIsDerived) {
+TEST_P(ASTMatchersTest, IsDerivedFrom_ObjC) {
   DeclarationMatcher IsDerivedFromX = objcInterfaceDecl(isDerivedFrom("X"));
   EXPECT_TRUE(
       matchesObjC("@interface X @end @interface Y : X @end", IsDerivedFromX));
@@ -782,13 +822,17 @@ TEST(DeclarationMatcher, ObjCClassIsDerived) {
                   ZIsDirectlyDerivedFromX));
 }
 
-TEST(DeclarationMatcher, IsLambda) {
+TEST_P(ASTMatchersTest, IsLambda) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   const auto IsLambda = cxxMethodDecl(ofClass(cxxRecordDecl(isLambda())));
   EXPECT_TRUE(matches("auto x = []{};", IsLambda));
   EXPECT_TRUE(notMatches("struct S { void operator()() const; };", IsLambda));
 }
 
-TEST(Matcher, BindMatchedNodes) {
+TEST_P(ASTMatchersTest, Bind) {
   DeclarationMatcher ClassX = has(recordDecl(hasName("::X")).bind("x"));
 
   EXPECT_TRUE(matchAndVerifyResultTrue(
@@ -814,7 +858,7 @@ TEST(Matcher, BindMatchedNodes) {
       std::make_unique<VerifyIdIsBoundTo<CXXMemberCallExpr>>("x")));
 }
 
-TEST(Matcher, BindTheSameNameInAlternatives) {
+TEST_P(ASTMatchersTest, Bind_SameNameInAlternatives) {
   StatementMatcher matcher = anyOf(
       binaryOperator(hasOperatorName("+"), hasLHS(expr().bind("x")),
                      hasRHS(integerLiteral(equals(0)))),
@@ -828,7 +872,7 @@ TEST(Matcher, BindTheSameNameInAlternatives) {
       std::make_unique<VerifyIdIsBoundTo<CallExpr>>("x")));
 }
 
-TEST(Matcher, BindsIDForMemoizedResults) {
+TEST_P(ASTMatchersTest, Bind_BindsIDForMemoizedResults) {
   // Using the same matcher in two match expressions will make memoization
   // kick in.
   DeclarationMatcher ClassX = recordDecl(hasName("X")).bind("x");
@@ -840,7 +884,12 @@ TEST(Matcher, BindsIDForMemoizedResults) {
       std::make_unique<VerifyIdIsBoundTo<Decl>>("x", 2)));
 }
 
-TEST(HasType, MatchesAsString) {
+TEST_P(ASTMatchersTest, HasType_MatchesAsString) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `hasType()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(
       matches("class Y { public: void x(); }; void z() {Y* y; y->x(); }",
               cxxMemberCallExpr(on(hasType(asString("class Y *"))))));
@@ -854,7 +903,11 @@ TEST(HasType, MatchesAsString) {
               fieldDecl(hasType(asString("struct (anonymous namespace)::A")))));
 }
 
-TEST(Matcher, HasOperatorNameForOverloadedOperatorCall) {
+TEST_P(ASTMatchersTest, HasOverloadedOperatorName) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StatementMatcher OpCallAndAnd =
       cxxOperatorCallExpr(hasOverloadedOperatorName("&&"));
   EXPECT_TRUE(matches("class Y { }; "
@@ -886,7 +939,11 @@ TEST(Matcher, HasOperatorNameForOverloadedOperatorCall) {
   EXPECT_TRUE(matches("class Y { Y operator&&(Y &); };", AnyAndOp));
 }
 
-TEST(Matcher, NestedOverloadedOperatorCalls) {
+TEST_P(ASTMatchersTest, HasOverloadedOperatorName_MatchesNestedCalls) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "class Y { }; "
       "Y& operator&&(Y& x, Y& y) { return x; }; "
@@ -904,19 +961,21 @@ TEST(Matcher, NestedOverloadedOperatorCalls) {
               cxxOperatorCallExpr(hasDescendant(cxxOperatorCallExpr()))));
 }
 
-TEST(Matcher, VarDecl_Storage) {
+TEST_P(ASTMatchersTest, HasLocalStorage) {
   auto M = varDecl(hasName("X"), hasLocalStorage());
   EXPECT_TRUE(matches("void f() { int X; }", M));
   EXPECT_TRUE(notMatches("int X;", M));
   EXPECT_TRUE(notMatches("void f() { static int X; }", M));
+}
 
-  M = varDecl(hasName("X"), hasGlobalStorage());
+TEST_P(ASTMatchersTest, HasGlobalStorage) {
+  auto M = varDecl(hasName("X"), hasGlobalStorage());
   EXPECT_TRUE(notMatches("void f() { int X; }", M));
   EXPECT_TRUE(matches("int X;", M));
   EXPECT_TRUE(matches("void f() { static int X; }", M));
 }
 
-TEST(Matcher, VarDecl_IsStaticLocal) {
+TEST_P(ASTMatchersTest, IsStaticLocal) {
   auto M = varDecl(isStaticLocal());
   EXPECT_TRUE(matches("void f() { static int X; }", M));
   EXPECT_TRUE(notMatches("static int X;", M));
@@ -924,7 +983,7 @@ TEST(Matcher, VarDecl_IsStaticLocal) {
   EXPECT_TRUE(notMatches("int X;", M));
 }
 
-TEST(Matcher, VarDecl_StorageDuration) {
+TEST_P(ASTMatchersTest, StorageDuration) {
   StringRef T =
       "void f() { int x; static int y; } int a;static int b;extern int c;";
 
@@ -940,33 +999,30 @@ TEST(Matcher, VarDecl_StorageDuration) {
   EXPECT_TRUE(matches(T, varDecl(hasName("c"), hasStaticStorageDuration())));
   EXPECT_TRUE(notMatches(T, varDecl(hasName("x"), hasStaticStorageDuration())));
 
-  // FIXME: It is really hard to test with thread_local itself because not all
-  // targets support TLS, which causes this to be an error depending on what
-  // platform the test is being run on. We do not have access to the TargetInfo
-  // object to be able to test whether the platform supports TLS or not.
+  // FIXME: Add thread_local variables to the source code snippet.
   EXPECT_TRUE(notMatches(T, varDecl(hasName("x"), hasThreadStorageDuration())));
   EXPECT_TRUE(notMatches(T, varDecl(hasName("y"), hasThreadStorageDuration())));
   EXPECT_TRUE(notMatches(T, varDecl(hasName("a"), hasThreadStorageDuration())));
 }
 
-TEST(Matcher, FindsVarDeclInFunctionParameter) {
+TEST_P(ASTMatchersTest, VarDecl_MatchesFunctionParameter) {
   EXPECT_TRUE(matches("void f(int i) {}", varDecl(hasName("i"))));
 }
 
-TEST(UnaryExpressionOrTypeTraitExpression, MatchesCorrectType) {
+TEST_P(ASTMatchersTest, SizeOfExpr_MatchesCorrectType) {
   EXPECT_TRUE(matches("void x() { int a = sizeof(a); }",
                       sizeOfExpr(hasArgumentOfType(asString("int")))));
   EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }",
                          sizeOfExpr(hasArgumentOfType(asString("float")))));
   EXPECT_TRUE(matches(
-      "struct A {}; void x() { A a; int b = sizeof(a); }",
+      "struct A {}; void x() { struct A a; int b = sizeof(a); }",
       sizeOfExpr(hasArgumentOfType(hasDeclaration(recordDecl(hasName("A")))))));
   EXPECT_TRUE(notMatches("void x() { int a = sizeof(a); }",
                          sizeOfExpr(hasArgumentOfType(
                              hasDeclaration(recordDecl(hasName("string")))))));
 }
 
-TEST(IsInteger, MatchesIntegers) {
+TEST_P(ASTMatchersTest, IsInteger_MatchesIntegers) {
   EXPECT_TRUE(matches("int i = 0;", varDecl(hasType(isInteger()))));
   EXPECT_TRUE(
       matches("long long i = 0; void f(long long) { }; void g() {f(i);}",
@@ -974,7 +1030,13 @@ TEST(IsInteger, MatchesIntegers) {
                   0, declRefExpr(to(varDecl(hasType(isInteger()))))))));
 }
 
-TEST(IsInteger, ReportsNoFalsePositives) {
+TEST_P(ASTMatchersTest, IsInteger_ReportsNoFalsePositives) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a similar negative test for `isInteger()` that does not depend
+    // on C++.
+    return;
+  }
+
   EXPECT_TRUE(notMatches("int *i;", varDecl(hasType(isInteger()))));
   EXPECT_TRUE(
       notMatches("struct T {}; T t; void f(T *) { }; void g() {f(&t);}",
@@ -982,40 +1044,54 @@ TEST(IsInteger, ReportsNoFalsePositives) {
                      0, declRefExpr(to(varDecl(hasType(isInteger()))))))));
 }
 
-TEST(IsSignedInteger, MatchesSignedIntegers) {
+TEST_P(ASTMatchersTest, IsSignedInteger_MatchesSignedIntegers) {
   EXPECT_TRUE(matches("int i = 0;", varDecl(hasType(isSignedInteger()))));
   EXPECT_TRUE(
       notMatches("unsigned i = 0;", varDecl(hasType(isSignedInteger()))));
 }
 
-TEST(IsUnsignedInteger, MatchesUnsignedIntegers) {
+TEST_P(ASTMatchersTest, IsUnsignedInteger_MatchesUnsignedIntegers) {
   EXPECT_TRUE(notMatches("int i = 0;", varDecl(hasType(isUnsignedInteger()))));
   EXPECT_TRUE(
       matches("unsigned i = 0;", varDecl(hasType(isUnsignedInteger()))));
 }
 
-TEST(IsAnyPointer, MatchesPointers) {
+TEST_P(ASTMatchersTest, IsAnyPointer_MatchesPointers) {
+  if (!GetParam().isCXX11OrLater()) {
+    // FIXME: Add a test for `isAnyPointer()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("int* i = nullptr;", varDecl(hasType(isAnyPointer()))));
 }
 
-TEST(IsAnyPointer, MatchesObjcPointer) {
+TEST_P(ASTMatchersTest, IsAnyPointer_MatchesObjcPointer) {
   EXPECT_TRUE(matchesObjC("@interface Foo @end Foo *f;",
                           varDecl(hasType(isAnyPointer()))));
 }
 
-TEST(IsAnyPointer, ReportsNoFalsePositives) {
+TEST_P(ASTMatchersTest, IsAnyPointer_ReportsNoFalsePositives) {
   EXPECT_TRUE(notMatches("int i = 0;", varDecl(hasType(isAnyPointer()))));
 }
 
-TEST(IsAnyCharacter, MatchesCharacters) {
+TEST_P(ASTMatchersTest, IsAnyCharacter_MatchesCharacters) {
   EXPECT_TRUE(matches("char i = 0;", varDecl(hasType(isAnyCharacter()))));
 }
 
-TEST(IsAnyCharacter, ReportsNoFalsePositives) {
+TEST_P(ASTMatchersTest, IsAnyCharacter_ReportsNoFalsePositives) {
   EXPECT_TRUE(notMatches("int i;", varDecl(hasType(isAnyCharacter()))));
 }
 
-TEST(IsArrow, MatchesMemberVariablesViaArrow) {
+TEST_P(ASTMatchersTest, IsArrow_MatchesMemberVariablesViaArrow) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `isArrow()` that does not depend on C++.
+    return;
+  }
+  if (GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("class Y { void x() { this->y; } int y; };",
                       memberExpr(isArrow())));
   EXPECT_TRUE(
@@ -1029,7 +1105,12 @@ TEST(IsArrow, MatchesMemberVariablesViaArrow) {
                  cxxDependentScopeMemberExpr(isArrow())));
 }
 
-TEST(IsArrow, MatchesStaticMemberVariablesViaArrow) {
+TEST_P(ASTMatchersTest, IsArrow_MatchesStaticMemberVariablesViaArrow) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `isArrow()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("class Y { void x() { this->y; } static int y; };",
                       memberExpr(isArrow())));
   EXPECT_TRUE(notMatches("class Y { void x() { y; } static int y; };",
@@ -1038,7 +1119,16 @@ TEST(IsArrow, MatchesStaticMemberVariablesViaArrow) {
                          memberExpr(isArrow())));
 }
 
-TEST(IsArrow, MatchesMemberCallsViaArrow) {
+TEST_P(ASTMatchersTest, IsArrow_MatchesMemberCallsViaArrow) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `isArrow()` that does not depend on C++.
+    return;
+  }
+  if (GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(
       matches("class Y { void x() { this->x(); } };", memberExpr(isArrow())));
   EXPECT_TRUE(matches("class Y { void x() { x(); } };", memberExpr(isArrow())));
@@ -1054,38 +1144,70 @@ TEST(IsArrow, MatchesMemberCallsViaArrow) {
                  unresolvedMemberExpr(isArrow())));
 }
 
-TEST(ConversionDeclaration, IsExplicit) {
+TEST_P(ASTMatchersTest, IsExplicit_CXXConversionDecl) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("struct S { explicit operator int(); };",
                       cxxConversionDecl(isExplicit())));
   EXPECT_TRUE(notMatches("struct S { operator int(); };",
                          cxxConversionDecl(isExplicit())));
+}
+
+TEST_P(ASTMatchersTest, IsExplicit_CXXConversionDecl_CXX20) {
+  if (!GetParam().isCXX20OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(
       notMatches("template<bool b> struct S { explicit(b) operator int(); };",
-                 cxxConversionDecl(isExplicit()), langCxx20OrLater()));
+                 cxxConversionDecl(isExplicit())));
   EXPECT_TRUE(matches("struct S { explicit(true) operator int(); };",
-                      cxxConversionDecl(isExplicit()), langCxx20OrLater()));
+                      cxxConversionDecl(isExplicit())));
   EXPECT_TRUE(notMatches("struct S { explicit(false) operator int(); };",
-                         cxxConversionDecl(isExplicit()), langCxx20OrLater()));
+                         cxxConversionDecl(isExplicit())));
 }
 
-TEST(Matcher, ArgumentCount) {
+TEST_P(ASTMatchersTest, ArgumentCountIs_CallExpr) {
   StatementMatcher Call1Arg = callExpr(argumentCountIs(1));
 
   EXPECT_TRUE(matches("void x(int) { x(0); }", Call1Arg));
-  EXPECT_TRUE(matches("class X { void x(int) { x(0); } };", Call1Arg));
   EXPECT_TRUE(notMatches("void x(int, int) { x(0, 0); }", Call1Arg));
 }
 
-TEST(Matcher, ParameterCount) {
+TEST_P(ASTMatchersTest, ArgumentCountIs_CallExpr_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  StatementMatcher Call1Arg = callExpr(argumentCountIs(1));
+  EXPECT_TRUE(matches("class X { void x(int) { x(0); } };", Call1Arg));
+}
+
+TEST_P(ASTMatchersTest, ParameterCountIs) {
   DeclarationMatcher Function1Arg = functionDecl(parameterCountIs(1));
   EXPECT_TRUE(matches("void f(int i) {}", Function1Arg));
-  EXPECT_TRUE(matches("class X { void f(int i) {} };", Function1Arg));
   EXPECT_TRUE(notMatches("void f() {}", Function1Arg));
   EXPECT_TRUE(notMatches("void f(int i, int j, int k) {}", Function1Arg));
   EXPECT_TRUE(matches("void f(int i, ...) {};", Function1Arg));
 }
 
-TEST(Matcher, References) {
+TEST_P(ASTMatchersTest, ParameterCountIs_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  DeclarationMatcher Function1Arg = functionDecl(parameterCountIs(1));
+  EXPECT_TRUE(matches("class X { void f(int i) {} };", Function1Arg));
+}
+
+TEST_P(ASTMatchersTest, References) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `references()` that does not depend on C++.
+    return;
+  }
+
   DeclarationMatcher ReferenceClassX =
       varDecl(hasType(references(recordDecl(hasName("X")))));
   EXPECT_TRUE(
@@ -1100,7 +1222,12 @@ TEST(Matcher, References) {
       notMatches("class X {}; void y(X *y) { X *&x = y; }", ReferenceClassX));
 }
 
-TEST(QualType, hasLocalQualifiers) {
+TEST_P(ASTMatchersTest, HasLocalQualifiers) {
+  if (!GetParam().isCXX11OrLater()) {
+    // FIXME: Add a test for `hasLocalQualifiers()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(notMatches("typedef const int const_int; const_int i = 1;",
                          varDecl(hasType(hasLocalQualifiers()))));
   EXPECT_TRUE(matches("int *const j = nullptr;",
@@ -1110,20 +1237,28 @@ TEST(QualType, hasLocalQualifiers) {
   EXPECT_TRUE(notMatches("int m;", varDecl(hasType(hasLocalQualifiers()))));
 }
 
-TEST(IsExternC, MatchesExternCFunctionDeclarations) {
+TEST_P(ASTMatchersTest, IsExternC_MatchesExternCFunctionDeclarations) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("extern \"C\" void f() {}", functionDecl(isExternC())));
   EXPECT_TRUE(
       matches("extern \"C\" { void f() {} }", functionDecl(isExternC())));
   EXPECT_TRUE(notMatches("void f() {}", functionDecl(isExternC())));
 }
 
-TEST(IsExternC, MatchesExternCVariableDeclarations) {
+TEST_P(ASTMatchersTest, IsExternC_MatchesExternCVariableDeclarations) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("extern \"C\" int i;", varDecl(isExternC())));
   EXPECT_TRUE(matches("extern \"C\" { int i; }", varDecl(isExternC())));
   EXPECT_TRUE(notMatches("int i;", varDecl(isExternC())));
 }
 
-TEST(IsStaticStorageClass, MatchesStaticDeclarations) {
+TEST_P(ASTMatchersTest, IsStaticStorageClass) {
   EXPECT_TRUE(
       matches("static void f() {}", functionDecl(isStaticStorageClass())));
   EXPECT_TRUE(matches("static int i = 1;", varDecl(isStaticStorageClass())));
@@ -1132,70 +1267,117 @@ TEST(IsStaticStorageClass, MatchesStaticDeclarations) {
   EXPECT_TRUE(notMatches("void f() {}", functionDecl(isStaticStorageClass())));
 }
 
-TEST(IsDefaulted, MatchesDefaultedFunctionDeclarations) {
+TEST_P(ASTMatchersTest, IsDefaulted) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("class A { ~A(); };",
                          functionDecl(hasName("~A"), isDefaulted())));
   EXPECT_TRUE(matches("class B { ~B() = default; };",
                       functionDecl(hasName("~B"), isDefaulted())));
 }
 
-TEST(IsDeleted, MatchesDeletedFunctionDeclarations) {
+TEST_P(ASTMatchersTest, IsDeleted) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       notMatches("void Func();", functionDecl(hasName("Func"), isDeleted())));
   EXPECT_TRUE(matches("void Func() = delete;",
                       functionDecl(hasName("Func"), isDeleted())));
 }
 
-TEST(IsNoThrow, MatchesNoThrowFunctionDeclarations) {
+TEST_P(ASTMatchersTest, IsNoThrow_DynamicExceptionSpec) {
+  if (!GetParam().supportsCXXDynamicExceptionSpecification()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("void f();", functionDecl(isNoThrow())));
   EXPECT_TRUE(notMatches("void f() throw(int);", functionDecl(isNoThrow())));
-  EXPECT_TRUE(
-      notMatches("void f() noexcept(false);", functionDecl(isNoThrow())));
   EXPECT_TRUE(matches("void f() throw();", functionDecl(isNoThrow())));
-  EXPECT_TRUE(matches("void f() noexcept;", functionDecl(isNoThrow())));
 
   EXPECT_TRUE(notMatches("void f();", functionProtoType(isNoThrow())));
   EXPECT_TRUE(
       notMatches("void f() throw(int);", functionProtoType(isNoThrow())));
+  EXPECT_TRUE(matches("void f() throw();", functionProtoType(isNoThrow())));
+}
+
+TEST_P(ASTMatchersTest, IsNoThrow_CXX11) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
+  EXPECT_TRUE(
+      notMatches("void f() noexcept(false);", functionDecl(isNoThrow())));
+  EXPECT_TRUE(matches("void f() noexcept;", functionDecl(isNoThrow())));
+
   EXPECT_TRUE(
       notMatches("void f() noexcept(false);", functionProtoType(isNoThrow())));
-  EXPECT_TRUE(matches("void f() throw();", functionProtoType(isNoThrow())));
   EXPECT_TRUE(matches("void f() noexcept;", functionProtoType(isNoThrow())));
 }
 
-TEST(isConstexpr, MatchesConstexprDeclarations) {
+TEST_P(ASTMatchersTest, IsConstexpr) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("constexpr int foo = 42;",
                       varDecl(hasName("foo"), isConstexpr())));
   EXPECT_TRUE(matches("constexpr int bar();",
                       functionDecl(hasName("bar"), isConstexpr())));
-  EXPECT_TRUE(matches("void baz() { if constexpr(1 > 0) {} }",
-                      ifStmt(isConstexpr()), langCxx17OrLater()));
-  EXPECT_TRUE(notMatches("void baz() { if (1 > 0) {} }", ifStmt(isConstexpr()),
-                         langCxx17OrLater()));
 }
 
-TEST(hasInitStatement, MatchesSelectionInitializers) {
-  EXPECT_TRUE(matches("void baz() { if (int i = 1; i > 0) {} }",
-                      ifStmt(hasInitStatement(anything())),
-                      langCxx17OrLater()));
-  EXPECT_TRUE(notMatches("void baz() { if (int i = 1) {} }",
-                         ifStmt(hasInitStatement(anything()))));
+TEST_P(ASTMatchersTest, IsConstexpr_MatchesIfConstexpr) {
+  if (!GetParam().isCXX17OrLater()) {
+    return;
+  }
+
+  EXPECT_TRUE(
+      matches("void baz() { if constexpr(1 > 0) {} }", ifStmt(isConstexpr())));
+  EXPECT_TRUE(
+      notMatches("void baz() { if (1 > 0) {} }", ifStmt(isConstexpr())));
+}
+
+TEST_P(ASTMatchersTest, HasInitStatement_MatchesSelectionInitializers) {
   EXPECT_TRUE(notMatches("void baz() { if (1 > 0) {} }",
                          ifStmt(hasInitStatement(anything()))));
-  EXPECT_TRUE(
-      matches("void baz(int i) { switch (int j = i; j) { default: break; } }",
-              switchStmt(hasInitStatement(anything())), langCxx17OrLater()));
   EXPECT_TRUE(notMatches("void baz(int i) { switch (i) { default: break; } }",
                          switchStmt(hasInitStatement(anything()))));
 }
 
-TEST(hasInitStatement, MatchesRangeForInitializers) {
+TEST_P(ASTMatchersTest, HasInitStatement_MatchesSelectionInitializers_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(notMatches("void baz() { if (int i = 1) {} }",
+                         ifStmt(hasInitStatement(anything()))));
+}
+
+TEST_P(ASTMatchersTest, HasInitStatement_MatchesSelectionInitializers_CXX17) {
+  if (!GetParam().isCXX17OrLater()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("void baz() { if (int i = 1; i > 0) {} }",
+                      ifStmt(hasInitStatement(anything()))));
+  EXPECT_TRUE(
+      matches("void baz(int i) { switch (int j = i; j) { default: break; } }",
+              switchStmt(hasInitStatement(anything()))));
+}
+
+TEST_P(ASTMatchersTest, HasInitStatement_MatchesRangeForInitializers) {
+  if (!GetParam().isCXX20OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("void baz() {"
                       "int items[] = {};"
                       "for (auto &arr = items; auto &item : arr) {}"
                       "}",
-                      cxxForRangeStmt(hasInitStatement(anything())),
-                      langCxx20OrLater()));
+                      cxxForRangeStmt(hasInitStatement(anything()))));
   EXPECT_TRUE(notMatches("void baz() {"
                          "int items[] = {};"
                          "for (auto &item : items) {}"
@@ -1203,7 +1385,11 @@ TEST(hasInitStatement, MatchesRangeForInitializers) {
                          cxxForRangeStmt(hasInitStatement(anything()))));
 }
 
-TEST(TemplateArgumentCountIs, Matches) {
+TEST_P(ASTMatchersTest, TemplateArgumentCountIs) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("template<typename T> struct C {}; C<int> c;",
               classTemplateSpecializationDecl(templateArgumentCountIs(1))));
@@ -1218,7 +1404,11 @@ TEST(TemplateArgumentCountIs, Matches) {
                  templateSpecializationType(templateArgumentCountIs(2))));
 }
 
-TEST(IsIntegral, Matches) {
+TEST_P(ASTMatchersTest, IsIntegral) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches(
       "template<int T> struct C {}; C<42> c;",
       classTemplateSpecializationDecl(hasAnyTemplateArgument(isIntegral()))));
@@ -1227,7 +1417,11 @@ TEST(IsIntegral, Matches) {
                              templateArgument(isIntegral())))));
 }
 
-TEST(EqualsIntegralValue, Matches) {
+TEST_P(ASTMatchersTest, EqualsIntegralValue) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("template<int T> struct C {}; C<42> c;",
                       classTemplateSpecializationDecl(
                           hasAnyTemplateArgument(equalsIntegralValue("42")))));
@@ -1242,7 +1436,11 @@ TEST(EqualsIntegralValue, Matches) {
                              equalsIntegralValue("0042")))));
 }
 
-TEST(Matcher, MatchesAccessSpecDecls) {
+TEST_P(ASTMatchersTest, AccessSpecDecl) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class C { public: int i; };", accessSpecDecl()));
   EXPECT_TRUE(
       matches("class C { public: int i; };", accessSpecDecl(isPublic())));
@@ -1254,7 +1452,11 @@ TEST(Matcher, MatchesAccessSpecDecls) {
   EXPECT_TRUE(notMatches("class C { int i; };", accessSpecDecl()));
 }
 
-TEST(Matcher, MatchesFinal) {
+TEST_P(ASTMatchersTest, IsFinal) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class X final {};", cxxRecordDecl(isFinal())));
   EXPECT_TRUE(matches("class X { virtual void f() final; };",
                       cxxMethodDecl(isFinal())));
@@ -1263,13 +1465,21 @@ TEST(Matcher, MatchesFinal) {
       notMatches("class X { virtual void f(); };", cxxMethodDecl(isFinal())));
 }
 
-TEST(Matcher, MatchesVirtualMethod) {
+TEST_P(ASTMatchersTest, IsVirtual) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class X { virtual int f(); };",
                       cxxMethodDecl(isVirtual(), hasName("::X::f"))));
   EXPECT_TRUE(notMatches("class X { int f(); };", cxxMethodDecl(isVirtual())));
 }
 
-TEST(Matcher, MatchesVirtualAsWrittenMethod) {
+TEST_P(ASTMatchersTest, IsVirtualAsWritten) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class A { virtual int f(); };"
                       "class B : public A { int f(); };",
                       cxxMethodDecl(isVirtualAsWritten(), hasName("::A::f"))));
@@ -1279,13 +1489,21 @@ TEST(Matcher, MatchesVirtualAsWrittenMethod) {
                  cxxMethodDecl(isVirtualAsWritten(), hasName("::B::f"))));
 }
 
-TEST(Matcher, MatchesPureMethod) {
+TEST_P(ASTMatchersTest, IsPure) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class X { virtual int f() = 0; };",
                       cxxMethodDecl(isPure(), hasName("::X::f"))));
   EXPECT_TRUE(notMatches("class X { int f(); };", cxxMethodDecl(isPure())));
 }
 
-TEST(Matcher, MatchesCopyAssignmentOperator) {
+TEST_P(ASTMatchersTest, IsCopyAssignmentOperator) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   auto CopyAssignment =
       cxxMethodDecl(isCopyAssignmentOperator(), unless(isImplicit()));
   EXPECT_TRUE(matches("class X { X &operator=(X); };", CopyAssignment));
@@ -1298,7 +1516,11 @@ TEST(Matcher, MatchesCopyAssignmentOperator) {
   EXPECT_TRUE(notMatches("class X { X &operator=(X &&); };", CopyAssignment));
 }
 
-TEST(Matcher, MatchesMoveAssignmentOperator) {
+TEST_P(ASTMatchersTest, IsMoveAssignmentOperator) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   auto MoveAssignment =
       cxxMethodDecl(isMoveAssignmentOperator(), unless(isImplicit()));
   EXPECT_TRUE(notMatches("class X { X &operator=(X); };", MoveAssignment));
@@ -1312,14 +1534,22 @@ TEST(Matcher, MatchesMoveAssignmentOperator) {
   EXPECT_TRUE(notMatches("class X { X &operator=(X &); };", MoveAssignment));
 }
 
-TEST(Matcher, MatchesConstMethod) {
+TEST_P(ASTMatchersTest, IsConst) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("struct A { void foo() const; };", cxxMethodDecl(isConst())));
   EXPECT_TRUE(
       notMatches("struct A { void foo(); };", cxxMethodDecl(isConst())));
 }
 
-TEST(Matcher, MatchesOverridingMethod) {
+TEST_P(ASTMatchersTest, IsOverride) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class X { virtual int f(); }; "
                       "class Y : public X { int f(); };",
                       cxxMethodDecl(isOverride(), hasName("::Y::f"))));
@@ -1336,7 +1566,11 @@ TEST(Matcher, MatchesOverridingMethod) {
               cxxMethodDecl(isOverride(), hasName("::Y::f"))));
 }
 
-TEST(Matcher, ConstructorArgument) {
+TEST_P(ASTMatchersTest, HasArgument_CXXConstructorDecl) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   auto Constructor = traverse(
       ast_type_traits::TK_AsIs,
       cxxConstructExpr(hasArgument(0, declRefExpr(to(varDecl(hasName("y")))))));
@@ -1360,7 +1594,11 @@ TEST(Matcher, ConstructorArgument) {
       "class X { public: X(int); }; void x() { int y; X x(y); }", WrongIndex));
 }
 
-TEST(Matcher, ConstructorArgumentCount) {
+TEST_P(ASTMatchersTest, ArgumentCountIs_CXXConstructExpr) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   auto Constructor1Arg =
       traverse(ast_type_traits::TK_AsIs, cxxConstructExpr(argumentCountIs(1)));
 
@@ -1375,7 +1613,11 @@ TEST(Matcher, ConstructorArgumentCount) {
                  Constructor1Arg));
 }
 
-TEST(Matcher, ConstructorListInitialization) {
+TEST_P(ASTMatchersTest, IsListInitialization) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   auto ConstructorListInit =
       traverse(ast_type_traits::TK_AsIs,
                varDecl(has(cxxConstructExpr(isListInitialization()))));
@@ -1386,7 +1628,11 @@ TEST(Matcher, ConstructorListInitialization) {
                        ConstructorListInit));
 }
 
-TEST(ConstructorDeclaration, IsImplicit) {
+TEST_P(ASTMatchersTest, IsImplicit_CXXConstructorDecl) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   // This one doesn't match because the constructor is not added by the
   // compiler (it is not needed).
   EXPECT_TRUE(notMatches("class Foo { };", cxxConstructorDecl(isImplicit())));
@@ -1400,41 +1646,65 @@ TEST(ConstructorDeclaration, IsImplicit) {
                       cxxMethodDecl(isImplicit(), hasName("operator="))));
 }
 
-TEST(ConstructorDeclaration, IsExplicit) {
+TEST_P(ASTMatchersTest, IsExplicit_CXXConstructorDecl) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("struct S { explicit S(int); };",
                       cxxConstructorDecl(isExplicit())));
   EXPECT_TRUE(
       notMatches("struct S { S(int); };", cxxConstructorDecl(isExplicit())));
+}
+
+TEST_P(ASTMatchersTest, IsExplicit_CXXConstructorDecl_CXX20) {
+  if (!GetParam().isCXX20OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template<bool b> struct S { explicit(b) S(int);};",
-                         cxxConstructorDecl(isExplicit()), langCxx20OrLater()));
+                         cxxConstructorDecl(isExplicit())));
   EXPECT_TRUE(matches("struct S { explicit(true) S(int);};",
-                      cxxConstructorDecl(isExplicit()), langCxx20OrLater()));
+                      cxxConstructorDecl(isExplicit())));
   EXPECT_TRUE(notMatches("struct S { explicit(false) S(int);};",
-                         cxxConstructorDecl(isExplicit()), langCxx20OrLater()));
+                         cxxConstructorDecl(isExplicit())));
 }
 
-TEST(DeductionGuideDeclaration, IsExplicit) {
+TEST_P(ASTMatchersTest, IsExplicit_CXXDeductionGuideDecl) {
+  if (!GetParam().isCXX17OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template<typename T> struct S { S(int);};"
                          "S(int) -> S<int>;",
-                         cxxDeductionGuideDecl(isExplicit()),
-                         langCxx17OrLater()));
+                         cxxDeductionGuideDecl(isExplicit())));
   EXPECT_TRUE(matches("template<typename T> struct S { S(int);};"
                       "explicit S(int) -> S<int>;",
-                      cxxDeductionGuideDecl(isExplicit()), langCxx17OrLater()));
+                      cxxDeductionGuideDecl(isExplicit())));
+}
+
+TEST_P(ASTMatchersTest, IsExplicit_CXXDeductionGuideDecl_CXX20) {
+  if (!GetParam().isCXX20OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("template<typename T> struct S { S(int);};"
                       "explicit(true) S(int) -> S<int>;",
-                      cxxDeductionGuideDecl(isExplicit()), langCxx20OrLater()));
+                      cxxDeductionGuideDecl(isExplicit())));
   EXPECT_TRUE(notMatches("template<typename T> struct S { S(int);};"
                          "explicit(false) S(int) -> S<int>;",
-                         cxxDeductionGuideDecl(isExplicit()),
-                         langCxx20OrLater()));
+                         cxxDeductionGuideDecl(isExplicit())));
   EXPECT_TRUE(
       notMatches("template<typename T> struct S { S(int);};"
                  "template<bool b = true> explicit(b) S(int) -> S<int>;",
-                 cxxDeductionGuideDecl(isExplicit()), langCxx20OrLater()));
+                 cxxDeductionGuideDecl(isExplicit())));
 }
 
-TEST(ConstructorDeclaration, Kinds) {
+TEST_P(ASTMatchersTest, CXXConstructorDecl_Kinds) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("struct S { S(); };", cxxConstructorDecl(isDefaultConstructor(),
                                                        unless(isImplicit()))));
@@ -1466,7 +1736,11 @@ TEST(ConstructorDeclaration, Kinds) {
               cxxConstructorDecl(isMoveConstructor(), unless(isImplicit()))));
 }
 
-TEST(ConstructorDeclaration, IsUserProvided) {
+TEST_P(ASTMatchersTest, IsUserProvided) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("struct S { int X = 0; };",
                          cxxConstructorDecl(isUserProvided())));
   EXPECT_TRUE(notMatches("struct S { S() = default; };",
@@ -1479,7 +1753,11 @@ TEST(ConstructorDeclaration, IsUserProvided) {
                       cxxConstructorDecl(isUserProvided())));
 }
 
-TEST(ConstructorDeclaration, IsDelegatingConstructor) {
+TEST_P(ASTMatchersTest, IsDelegatingConstructor) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("struct S { S(); S(int); int X; };",
                          cxxConstructorDecl(isDelegatingConstructor())));
   EXPECT_TRUE(notMatches("struct S { S(){} S(int X) : X(X) {} int X; };",
@@ -1492,18 +1770,31 @@ TEST(ConstructorDeclaration, IsDelegatingConstructor) {
       cxxConstructorDecl(isDelegatingConstructor(), parameterCountIs(1))));
 }
 
-TEST(StringLiteral, HasSize) {
+TEST_P(ASTMatchersTest, HasSize) {
   StatementMatcher Literal = stringLiteral(hasSize(4));
   EXPECT_TRUE(matches("const char *s = \"abcd\";", Literal));
-  // wide string
-  EXPECT_TRUE(matches("const wchar_t *s = L\"abcd\";", Literal));
   // with escaped characters
   EXPECT_TRUE(matches("const char *s = \"\x05\x06\x07\x08\";", Literal));
   // no matching, too small
   EXPECT_TRUE(notMatches("const char *s = \"ab\";", Literal));
 }
 
-TEST(Matcher, HasNameSupportsNamespaces) {
+TEST_P(ASTMatchersTest, HasSize_CXX) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Fix this test to also work in non-C++ language modes.
+    return;
+  }
+
+  StatementMatcher Literal = stringLiteral(hasSize(4));
+  // wide string
+  EXPECT_TRUE(matches("const wchar_t *s = L\"abcd\";", Literal));
+}
+
+TEST_P(ASTMatchersTest, HasName_MatchesNamespaces) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("namespace a { namespace b { class C; } }",
                       recordDecl(hasName("a::b::C"))));
   EXPECT_TRUE(matches("namespace a { namespace b { class C; } }",
@@ -1530,7 +1821,11 @@ TEST(Matcher, HasNameSupportsNamespaces) {
                          recordDecl(hasName("C"))));
 }
 
-TEST(Matcher, HasNameSupportsOuterClasses) {
+TEST_P(ASTMatchersTest, HasName_MatchesOuterClasses) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class A { class B { class C; }; };",
                       recordDecl(hasName("A::B::C"))));
   EXPECT_TRUE(matches("class A { class B { class C; }; };",
@@ -1555,7 +1850,11 @@ TEST(Matcher, HasNameSupportsOuterClasses) {
                          recordDecl(hasName("A+B::C"))));
 }
 
-TEST(Matcher, HasNameSupportsInlinedNamespaces) {
+TEST_P(ASTMatchersTest, HasName_MatchesInlinedNamespaces) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StringRef code = "namespace a { inline namespace b { class C; } }";
   EXPECT_TRUE(matches(code, recordDecl(hasName("a::b::C"))));
   EXPECT_TRUE(matches(code, recordDecl(hasName("a::C"))));
@@ -1563,7 +1862,11 @@ TEST(Matcher, HasNameSupportsInlinedNamespaces) {
   EXPECT_TRUE(matches(code, recordDecl(hasName("::a::C"))));
 }
 
-TEST(Matcher, HasNameSupportsAnonymousNamespaces) {
+TEST_P(ASTMatchersTest, HasName_MatchesAnonymousNamespaces) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StringRef code = "namespace a { namespace { class C; } }";
   EXPECT_TRUE(
       matches(code, recordDecl(hasName("a::(anonymous namespace)::C"))));
@@ -1573,7 +1876,11 @@ TEST(Matcher, HasNameSupportsAnonymousNamespaces) {
   EXPECT_TRUE(matches(code, recordDecl(hasName("::a::C"))));
 }
 
-TEST(Matcher, HasNameSupportsAnonymousOuterClasses) {
+TEST_P(ASTMatchersTest, HasName_MatchesAnonymousOuterClasses) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class A { class { class C; } x; };",
                       recordDecl(hasName("A::(anonymous class)::C"))));
   EXPECT_TRUE(matches("class A { class { class C; } x; };",
@@ -1588,7 +1895,11 @@ TEST(Matcher, HasNameSupportsAnonymousOuterClasses) {
                        recordDecl(hasName("::A::C"))));
 }
 
-TEST(Matcher, HasNameSupportsFunctionScope) {
+TEST_P(ASTMatchersTest, HasName_MatchesFunctionScope) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StringRef code =
       "namespace a { void F(int a) { struct S { int m; }; int i; } }";
   EXPECT_TRUE(matches(code, varDecl(hasName("i"))));
@@ -1601,7 +1912,11 @@ TEST(Matcher, HasNameSupportsFunctionScope) {
   EXPECT_TRUE(matches(code, fieldDecl(hasName("::a::F(int)::S::m"))));
 }
 
-TEST(Matcher, HasNameQualifiedSupportsLinkage) {
+TEST_P(ASTMatchersTest, HasName_QualifiedStringMatchesThroughLinkage) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   // https://bugs.llvm.org/show_bug.cgi?id=42193
   StringRef code = R"cpp(namespace foo { extern "C" void test(); })cpp";
   EXPECT_TRUE(matches(code, functionDecl(hasName("test"))));
@@ -1616,7 +1931,12 @@ TEST(Matcher, HasNameQualifiedSupportsLinkage) {
   EXPECT_TRUE(notMatches(code, functionDecl(hasName("::test"))));
 }
 
-TEST(Matcher, HasAnyName) {
+TEST_P(ASTMatchersTest, HasAnyName) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `hasAnyName()` that does not depend on C++.
+    return;
+  }
+
   StringRef Code = "namespace a { namespace b { class C; } }";
 
   EXPECT_TRUE(matches(Code, recordDecl(hasAnyName("XX", "a::b::C"))));
@@ -1632,16 +1952,22 @@ TEST(Matcher, HasAnyName) {
   EXPECT_TRUE(matches(Code, recordDecl(hasAnyName(Names))));
 }
 
-TEST(Matcher, IsDefinition) {
+TEST_P(ASTMatchersTest, IsDefinition) {
   DeclarationMatcher DefinitionOfClassA =
       recordDecl(hasName("A"), isDefinition());
-  EXPECT_TRUE(matches("class A {};", DefinitionOfClassA));
-  EXPECT_TRUE(notMatches("class A;", DefinitionOfClassA));
+  EXPECT_TRUE(matches("struct A {};", DefinitionOfClassA));
+  EXPECT_TRUE(notMatches("struct A;", DefinitionOfClassA));
 
   DeclarationMatcher DefinitionOfVariableA =
       varDecl(hasName("a"), isDefinition());
   EXPECT_TRUE(matches("int a;", DefinitionOfVariableA));
   EXPECT_TRUE(notMatches("extern int a;", DefinitionOfVariableA));
+}
+
+TEST_P(ASTMatchersTest, IsDefinition_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
 
   DeclarationMatcher DefinitionOfMethodA =
       cxxMethodDecl(hasName("a"), isDefinition());
@@ -1657,7 +1983,12 @@ TEST(Matcher, IsDefinition) {
       notMatchesObjC("@interface A; - (void)a; @end", DefinitionOfObjCMethodA));
 }
 
-TEST(Matcher, HandlesNullQualTypes) {
+TEST_P(ASTMatchersTest, HandlesNullQualTypes) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add an equivalent test that does not depend on C++.
+    return;
+  }
+
   // FIXME: Add a Type matcher so we can replace uses of this
   // variable with Type(True())
   const TypeMatcher AnyType = anything();
@@ -1679,7 +2010,7 @@ TEST(Matcher, HandlesNullQualTypes) {
                                      ))))));
 }
 
-TEST(ObjCIvarRefExprMatcher, IvarExpr) {
+TEST_P(ASTMatchersTest, ObjCIvarRefExpr) {
   StringRef ObjCString =
       "@interface A @end "
       "@implementation A { A *x; } - (void) func { x = 0; } @end";
@@ -1690,27 +2021,28 @@ TEST(ObjCIvarRefExprMatcher, IvarExpr) {
       ObjCString, objcIvarRefExpr(hasDeclaration(namedDecl(hasName("y"))))));
 }
 
-TEST(BlockExprMatcher, BlockExpr) {
+TEST_P(ASTMatchersTest, BlockExpr) {
   EXPECT_TRUE(matchesObjC("void f() { ^{}(); }", blockExpr()));
 }
 
-TEST(StatementCountIs, FindsNoStatementsInAnEmptyCompoundStatement) {
+TEST_P(ASTMatchersTest,
+       StatementCountIs_FindsNoStatementsInAnEmptyCompoundStatement) {
   EXPECT_TRUE(matches("void f() { }", compoundStmt(statementCountIs(0))));
   EXPECT_TRUE(notMatches("void f() {}", compoundStmt(statementCountIs(1))));
 }
 
-TEST(StatementCountIs, AppearsToMatchOnlyOneCount) {
+TEST_P(ASTMatchersTest, StatementCountIs_AppearsToMatchOnlyOneCount) {
   EXPECT_TRUE(matches("void f() { 1; }", compoundStmt(statementCountIs(1))));
   EXPECT_TRUE(notMatches("void f() { 1; }", compoundStmt(statementCountIs(0))));
   EXPECT_TRUE(notMatches("void f() { 1; }", compoundStmt(statementCountIs(2))));
 }
 
-TEST(StatementCountIs, WorksWithMultipleStatements) {
+TEST_P(ASTMatchersTest, StatementCountIs_WorksWithMultipleStatements) {
   EXPECT_TRUE(
       matches("void f() { 1; 2; 3; }", compoundStmt(statementCountIs(3))));
 }
 
-TEST(StatementCountIs, WorksWithNestedCompoundStatements) {
+TEST_P(ASTMatchersTest, StatementCountIs_WorksWithNestedCompoundStatements) {
   EXPECT_TRUE(matches("void f() { { 1; } { 1; 2; 3; 4; } }",
                       compoundStmt(statementCountIs(1))));
   EXPECT_TRUE(matches("void f() { { 1; } { 1; 2; 3; 4; } }",
@@ -1721,19 +2053,32 @@ TEST(StatementCountIs, WorksWithNestedCompoundStatements) {
                       compoundStmt(statementCountIs(4))));
 }
 
-TEST(Member, WorksInSimplestCase) {
+TEST_P(ASTMatchersTest, Member_WorksInSimplestCase) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `member()` that does not depend on C++.
+    return;
+  }
   EXPECT_TRUE(matches("struct { int first; } s; int i(s.first);",
                       memberExpr(member(hasName("first")))));
 }
 
-TEST(Member, DoesNotMatchTheBaseExpression) {
+TEST_P(ASTMatchersTest, Member_DoesNotMatchTheBaseExpression) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `member()` that does not depend on C++.
+    return;
+  }
+
   // Don't pick out the wrong part of the member expression, this should
   // be checking the member (name) only.
   EXPECT_TRUE(notMatches("struct { int i; } first; int i(first.i);",
                          memberExpr(member(hasName("first")))));
 }
 
-TEST(Member, MatchesInMemberFunctionCall) {
+TEST_P(ASTMatchersTest, Member_MatchesInMemberFunctionCall) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("void f() {"
                       "  struct { void first() {}; } s;"
                       "  s.first();"
@@ -1741,25 +2086,29 @@ TEST(Member, MatchesInMemberFunctionCall) {
                       memberExpr(member(hasName("first")))));
 }
 
-TEST(Member, MatchesMember) {
+TEST_P(ASTMatchersTest, FieldDecl) {
   EXPECT_TRUE(
-      matches("struct A { int i; }; void f() { A a; a.i = 2; }",
+      matches("struct A { int i; }; void f() { struct A a; a.i = 2; }",
               memberExpr(hasDeclaration(fieldDecl(hasType(isInteger()))))));
   EXPECT_TRUE(
-      notMatches("struct A { float f; }; void f() { A a; a.f = 2.0f; }",
+      notMatches("struct A { float f; }; void f() { struct A a; a.f = 2.0f; }",
                  memberExpr(hasDeclaration(fieldDecl(hasType(isInteger()))))));
 }
 
-TEST(Member, BitFields) {
-  EXPECT_TRUE(matches("class C { int a : 2; int b; };",
+TEST_P(ASTMatchersTest, IsBitField) {
+  EXPECT_TRUE(matches("struct C { int a : 2; int b; };",
                       fieldDecl(isBitField(), hasName("a"))));
-  EXPECT_TRUE(notMatches("class C { int a : 2; int b; };",
+  EXPECT_TRUE(notMatches("struct C { int a : 2; int b; };",
                          fieldDecl(isBitField(), hasName("b"))));
-  EXPECT_TRUE(matches("class C { int a : 2; int b : 4; };",
+  EXPECT_TRUE(matches("struct C { int a : 2; int b : 4; };",
                       fieldDecl(isBitField(), hasBitWidth(2), hasName("a"))));
 }
 
-TEST(Member, InClassInitializer) {
+TEST_P(ASTMatchersTest, HasInClassInitializer) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("class C { int a = 2; int b; };",
               fieldDecl(hasInClassInitializer(integerLiteral(equals(2))),
@@ -1769,7 +2118,11 @@ TEST(Member, InClassInitializer) {
                  fieldDecl(hasInClassInitializer(anything()), hasName("b"))));
 }
 
-TEST(Member, UnderstandsAccess) {
+TEST_P(ASTMatchersTest, IsPublic_IsProtected_IsPrivate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("struct A { int i; };", fieldDecl(isPublic(), hasName("i"))));
   EXPECT_TRUE(notMatches("struct A { int i; };",
@@ -1797,14 +2150,13 @@ TEST(Member, UnderstandsAccess) {
   EXPECT_TRUE(notMatches("int i;", varDecl(isPrivate(), hasName("i"))));
 }
 
-TEST(hasDynamicExceptionSpec, MatchesDynamicExceptionSpecifications) {
+TEST_P(ASTMatchersTest,
+       HasDynamicExceptionSpec_MatchesDynamicExceptionSpecifications) {
+  if (!GetParam().supportsCXXDynamicExceptionSpecification()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("void f();", functionDecl(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(notMatches("void g() noexcept;",
-                         functionDecl(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(notMatches("void h() noexcept(true);",
-                         functionDecl(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(notMatches("void i() noexcept(false);",
-                         functionDecl(hasDynamicExceptionSpec())));
   EXPECT_TRUE(
       matches("void j() throw();", functionDecl(hasDynamicExceptionSpec())));
   EXPECT_TRUE(
@@ -1814,12 +2166,6 @@ TEST(hasDynamicExceptionSpec, MatchesDynamicExceptionSpecifications) {
 
   EXPECT_TRUE(
       notMatches("void f();", functionProtoType(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(notMatches("void g() noexcept;",
-                         functionProtoType(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(notMatches("void h() noexcept(true);",
-                         functionProtoType(hasDynamicExceptionSpec())));
-  EXPECT_TRUE(notMatches("void i() noexcept(false);",
-                         functionProtoType(hasDynamicExceptionSpec())));
   EXPECT_TRUE(matches("void j() throw();",
                       functionProtoType(hasDynamicExceptionSpec())));
   EXPECT_TRUE(matches("void k() throw(int);",
@@ -1828,19 +2174,52 @@ TEST(hasDynamicExceptionSpec, MatchesDynamicExceptionSpecifications) {
                       functionProtoType(hasDynamicExceptionSpec())));
 }
 
-TEST(HasObjectExpression, DoesNotMatchMember) {
+TEST_P(ASTMatchersTest,
+       HasDynamicExceptionSpec_MatchesDynamicExceptionSpecifications_CXX11) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
+  EXPECT_TRUE(notMatches("void g() noexcept;",
+                         functionDecl(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(notMatches("void h() noexcept(true);",
+                         functionDecl(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(notMatches("void i() noexcept(false);",
+                         functionDecl(hasDynamicExceptionSpec())));
+
+  EXPECT_TRUE(notMatches("void g() noexcept;",
+                         functionProtoType(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(notMatches("void h() noexcept(true);",
+                         functionProtoType(hasDynamicExceptionSpec())));
+  EXPECT_TRUE(notMatches("void i() noexcept(false);",
+                         functionProtoType(hasDynamicExceptionSpec())));
+}
+
+TEST_P(ASTMatchersTest, HasObjectExpression_DoesNotMatchMember) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches(
       "class X {}; struct Z { X m; }; void f(Z z) { z.m; }",
       memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
 }
 
-TEST(HasObjectExpression, MatchesBaseOfVariable) {
+TEST_P(ASTMatchersTest, HasObjectExpression_MatchesBaseOfVariable) {
   EXPECT_TRUE(matches(
-      "struct X { int m; }; void f(X x) { x.m; }",
+      "struct X { int m; }; void f(struct X x) { x.m; }",
       memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
-  EXPECT_TRUE(matches("struct X { int m; }; void f(X* x) { x->m; }",
+  EXPECT_TRUE(matches("struct X { int m; }; void f(struct X* x) { x->m; }",
                       memberExpr(hasObjectExpression(
                           hasType(pointsTo(recordDecl(hasName("X"))))))));
+}
+
+TEST_P(ASTMatchersTest, HasObjectExpression_MatchesBaseOfVariable_CXX) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template <class T> struct X { void f() { T t; t.m; } };",
                       cxxDependentScopeMemberExpr(hasObjectExpression(
                           declRefExpr(to(namedDecl(hasName("t"))))))));
@@ -1850,10 +2229,22 @@ TEST(HasObjectExpression, MatchesBaseOfVariable) {
                   declRefExpr(to(namedDecl(hasName("t"))))))));
 }
 
-TEST(HasObjectExpression, MatchesBaseOfMemberFunc) {
+TEST_P(ASTMatchersTest, HasObjectExpression_MatchesBaseOfMemberFunc) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches(
       "struct X { void f(); }; void g(X x) { x.f(); }",
       memberExpr(hasObjectExpression(hasType(recordDecl(hasName("X")))))));
+}
+
+TEST_P(ASTMatchersTest, HasObjectExpression_MatchesBaseOfMemberFunc_Template) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("struct X { template <class T> void f(); };"
                       "template <class T> void g(X x) { x.f<T>(); }",
                       unresolvedMemberExpr(hasObjectExpression(
@@ -1863,8 +2254,11 @@ TEST(HasObjectExpression, MatchesBaseOfMemberFunc) {
                           declRefExpr(to(namedDecl(hasName("t"))))))));
 }
 
-TEST(HasObjectExpression,
-     MatchesObjectExpressionOfImplicitlyFormedMemberExpression) {
+TEST_P(ASTMatchersTest, HasObjectExpression_ImplicitlyFormedMemberExpression) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class X {}; struct S { X m; void f() { this->m; } };",
                       memberExpr(hasObjectExpression(
                           hasType(pointsTo(recordDecl(hasName("S"))))))));
@@ -1873,18 +2267,22 @@ TEST(HasObjectExpression,
                           hasType(pointsTo(recordDecl(hasName("S"))))))));
 }
 
-TEST(Field, DoesNotMatchNonFieldMembers) {
+TEST_P(ASTMatchersTest, FieldDecl_DoesNotMatchNonFieldMembers) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("class X { void m(); };", fieldDecl(hasName("m"))));
   EXPECT_TRUE(notMatches("class X { class m {}; };", fieldDecl(hasName("m"))));
   EXPECT_TRUE(notMatches("class X { enum { m }; };", fieldDecl(hasName("m"))));
   EXPECT_TRUE(notMatches("class X { enum m {}; };", fieldDecl(hasName("m"))));
 }
 
-TEST(Field, MatchesField) {
-  EXPECT_TRUE(matches("class X { int m; };", fieldDecl(hasName("m"))));
+TEST_P(ASTMatchersTest, FieldDecl_MatchesField) {
+  EXPECT_TRUE(matches("struct X { int m; };", fieldDecl(hasName("m"))));
 }
 
-TEST(IsVolatileQualified, QualifiersMatch) {
+TEST_P(ASTMatchersTest, IsVolatileQualified) {
   EXPECT_TRUE(
       matches("volatile int i = 42;", varDecl(hasType(isVolatileQualified()))));
   EXPECT_TRUE(
@@ -1893,31 +2291,31 @@ TEST(IsVolatileQualified, QualifiersMatch) {
                       varDecl(hasType(isVolatileQualified()))));
 }
 
-TEST(IsConstQualified, MatchesConstInt) {
+TEST_P(ASTMatchersTest, IsConstQualified_MatchesConstInt) {
   EXPECT_TRUE(
       matches("const int i = 42;", varDecl(hasType(isConstQualified()))));
 }
 
-TEST(IsConstQualified, MatchesConstPointer) {
-  EXPECT_TRUE(matches("int i = 42; int* const p(&i);",
+TEST_P(ASTMatchersTest, IsConstQualified_MatchesConstPointer) {
+  EXPECT_TRUE(matches("int i = 42; int* const p = &i;",
                       varDecl(hasType(isConstQualified()))));
 }
 
-TEST(IsConstQualified, MatchesThroughTypedef) {
+TEST_P(ASTMatchersTest, IsConstQualified_MatchesThroughTypedef) {
   EXPECT_TRUE(matches("typedef const int const_int; const_int i = 42;",
                       varDecl(hasType(isConstQualified()))));
-  EXPECT_TRUE(matches("typedef int* int_ptr; const int_ptr p(0);",
+  EXPECT_TRUE(matches("typedef int* int_ptr; const int_ptr p = ((int*)0);",
                       varDecl(hasType(isConstQualified()))));
 }
 
-TEST(IsConstQualified, DoesNotMatchInappropriately) {
+TEST_P(ASTMatchersTest, IsConstQualified_DoesNotMatchInappropriately) {
   EXPECT_TRUE(notMatches("typedef int nonconst_int; nonconst_int i = 42;",
                          varDecl(hasType(isConstQualified()))));
   EXPECT_TRUE(
       notMatches("int const* p;", varDecl(hasType(isConstQualified()))));
 }
 
-TEST(DeclCount, DeclCountIsCorrect) {
+TEST_P(ASTMatchersTest, DeclCountIs_DeclCountIsCorrect) {
   EXPECT_TRUE(matches("void f() {int i,j;}", declStmt(declCountIs(2))));
   EXPECT_TRUE(
       notMatches("void f() {int i,j; int k;}", declStmt(declCountIs(3))));
@@ -1925,7 +2323,7 @@ TEST(DeclCount, DeclCountIsCorrect) {
       notMatches("void f() {int i,j, k, l;}", declStmt(declCountIs(3))));
 }
 
-TEST(EachOf, TriggersForEachMatch) {
+TEST_P(ASTMatchersTest, EachOf_TriggersForEachMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "class A { int a; int b; };",
       recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
@@ -1933,24 +2331,24 @@ TEST(EachOf, TriggersForEachMatch) {
       std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v", 2)));
 }
 
-TEST(EachOf, BehavesLikeAnyOfUnlessBothMatch) {
+TEST_P(ASTMatchersTest, EachOf_BehavesLikeAnyOfUnlessBothMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
-      "class A { int a; int c; };",
+      "struct A { int a; int c; };",
       recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
                         has(fieldDecl(hasName("b")).bind("v")))),
       std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v", 1)));
   EXPECT_TRUE(matchAndVerifyResultTrue(
-      "class A { int c; int b; };",
+      "struct A { int c; int b; };",
       recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
                         has(fieldDecl(hasName("b")).bind("v")))),
       std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v", 1)));
   EXPECT_TRUE(
-      notMatches("class A { int c; int d; };",
+      notMatches("struct A { int c; int d; };",
                  recordDecl(eachOf(has(fieldDecl(hasName("a")).bind("v")),
                                    has(fieldDecl(hasName("b")).bind("v"))))));
 }
 
-TEST(Optionally, SubmatchersDoNotMatch) {
+TEST_P(ASTMatchersTest, Optionally_SubmatchersDoNotMatch) {
   EXPECT_TRUE(matchAndVerifyResultFalse(
       "class A { int a; int b; };",
       recordDecl(optionally(has(fieldDecl(hasName("c")).bind("c")))),
@@ -1958,7 +2356,7 @@ TEST(Optionally, SubmatchersDoNotMatch) {
 }
 
 // Regression test.
-TEST(Optionally, SubmatchersDoNotMatchButPreserveBindings) {
+TEST_P(ASTMatchersTest, Optionally_SubmatchersDoNotMatchButPreserveBindings) {
   StringRef Code = "class A { int a; int b; };";
   auto Matcher = recordDecl(decl().bind("decl"),
                             optionally(has(fieldDecl(hasName("c")).bind("v"))));
@@ -1970,14 +2368,19 @@ TEST(Optionally, SubmatchersDoNotMatchButPreserveBindings) {
       Code, Matcher, std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v")));
 }
 
-TEST(Optionally, SubmatchersMatch) {
+TEST_P(ASTMatchersTest, Optionally_SubmatchersMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "class A { int a; int c; };",
       recordDecl(optionally(has(fieldDecl(hasName("a")).bind("v")))),
       std::make_unique<VerifyIdIsBoundTo<FieldDecl>>("v")));
 }
 
-TEST(IsTemplateInstantiation, MatchesImplicitClassTemplateInstantiation) {
+TEST_P(ASTMatchersTest,
+       IsTemplateInstantiation_MatchesImplicitClassTemplateInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   // Make sure that we can both match the class by name (::X) and by the type
   // the template was instantiated with (via a field).
 
@@ -1992,14 +2395,24 @@ TEST(IsTemplateInstantiation, MatchesImplicitClassTemplateInstantiation) {
           hasDescendant(fieldDecl(hasType(recordDecl(hasName("A"))))))));
 }
 
-TEST(IsTemplateInstantiation, MatchesImplicitFunctionTemplateInstantiation) {
+TEST_P(ASTMatchersTest,
+       IsTemplateInstantiation_MatchesImplicitFunctionTemplateInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches(
       "template <typename T> void f(T t) {} class A {}; void g() { f(A()); }",
       functionDecl(hasParameter(0, hasType(recordDecl(hasName("A")))),
                    isTemplateInstantiation())));
 }
 
-TEST(IsTemplateInstantiation, MatchesExplicitClassTemplateInstantiation) {
+TEST_P(ASTMatchersTest,
+       IsTemplateInstantiation_MatchesExplicitClassTemplateInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("template <typename T> class X { T t; }; class A {};"
                       "template class X<A>;",
                       cxxRecordDecl(isTemplateInstantiation(),
@@ -2015,16 +2428,26 @@ TEST(IsTemplateInstantiation, MatchesExplicitClassTemplateInstantiation) {
                             unless(hasDescendant(varDecl(hasName("t")))))));
 }
 
-TEST(IsTemplateInstantiation,
-     MatchesInstantiationOfPartiallySpecializedClassTemplate) {
+TEST_P(
+    ASTMatchersTest,
+    IsTemplateInstantiation_MatchesInstantiationOfPartiallySpecializedClassTemplate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("template <typename T> class X {};"
               "template <typename T> class X<T*> {}; class A {}; X<A*> x;",
               cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 }
 
-TEST(IsTemplateInstantiation,
-     MatchesInstantiationOfClassTemplateNestedInNonTemplate) {
+TEST_P(
+    ASTMatchersTest,
+    IsTemplateInstantiation_MatchesInstantiationOfClassTemplateNestedInNonTemplate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("class A {};"
               "class X {"
@@ -2034,7 +2457,13 @@ TEST(IsTemplateInstantiation,
               cxxRecordDecl(hasName("::X::Y"), isTemplateInstantiation())));
 }
 
-TEST(IsTemplateInstantiation, DoesNotMatchInstantiationsInsideOfInstantiation) {
+TEST_P(
+    ASTMatchersTest,
+    IsTemplateInstantiation_DoesNotMatchInstantiationsInsideOfInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   // FIXME: Figure out whether this makes sense. It doesn't affect the
   // normal use case as long as the uppermost instantiation always is marked
   // as template instantiation, but it might be confusing as a predicate.
@@ -2047,63 +2476,111 @@ TEST(IsTemplateInstantiation, DoesNotMatchInstantiationsInsideOfInstantiation) {
       cxxRecordDecl(hasName("::X<A>::Y"), unless(isTemplateInstantiation()))));
 }
 
-TEST(IsTemplateInstantiation, DoesNotMatchExplicitClassTemplateSpecialization) {
+TEST_P(
+    ASTMatchersTest,
+    IsTemplateInstantiation_DoesNotMatchExplicitClassTemplateSpecialization) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       notMatches("template <typename T> class X {}; class A {};"
                  "template <> class X<A> {}; X<A> x;",
                  cxxRecordDecl(hasName("::X"), isTemplateInstantiation())));
 }
 
-TEST(IsTemplateInstantiation, DoesNotMatchNonTemplate) {
+TEST_P(ASTMatchersTest, IsTemplateInstantiation_DoesNotMatchNonTemplate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("class A {}; class Y { A a; };",
                          cxxRecordDecl(isTemplateInstantiation())));
 }
 
-TEST(IsInstantiated, MatchesInstantiation) {
+TEST_P(ASTMatchersTest, IsInstantiated_MatchesInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("template<typename T> class A { T i; }; class Y { A<int> a; };",
               cxxRecordDecl(isInstantiated())));
 }
 
-TEST(IsInstantiated, NotMatchesDefinition) {
+TEST_P(ASTMatchersTest, IsInstantiated_NotMatchesDefinition) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template<typename T> class A { T i; };",
                          cxxRecordDecl(isInstantiated())));
 }
 
-TEST(IsInTemplateInstantiation, MatchesInstantiationStmt) {
+TEST_P(ASTMatchersTest, IsInTemplateInstantiation_MatchesInstantiationStmt) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("template<typename T> struct A { A() { T i; } };"
                       "class Y { A<int> a; }; Y y;",
                       declStmt(isInTemplateInstantiation())));
 }
 
-TEST(IsInTemplateInstantiation, NotMatchesDefinitionStmt) {
+TEST_P(ASTMatchersTest, IsInTemplateInstantiation_NotMatchesDefinitionStmt) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template<typename T> struct A { void x() { T i; } };",
                          declStmt(isInTemplateInstantiation())));
 }
 
-TEST(IsInstantiated, MatchesFunctionInstantiation) {
+TEST_P(ASTMatchersTest, IsInstantiated_MatchesFunctionInstantiation) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("template<typename T> void A(T t) { T i; } void x() { A(0); }",
               functionDecl(isInstantiated())));
 }
 
-TEST(IsInstantiated, NotMatchesFunctionDefinition) {
+TEST_P(ASTMatchersTest, IsInstantiated_NotMatchesFunctionDefinition) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template<typename T> void A(T t) { T i; }",
                          varDecl(isInstantiated())));
 }
 
-TEST(IsInTemplateInstantiation, MatchesFunctionInstantiationStmt) {
+TEST_P(ASTMatchersTest,
+       IsInTemplateInstantiation_MatchesFunctionInstantiationStmt) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("template<typename T> void A(T t) { T i; } void x() { A(0); }",
               declStmt(isInTemplateInstantiation())));
 }
 
-TEST(IsInTemplateInstantiation, NotMatchesFunctionDefinitionStmt) {
+TEST_P(ASTMatchersTest,
+       IsInTemplateInstantiation_NotMatchesFunctionDefinitionStmt) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template<typename T> void A(T t) { T i; }",
                          declStmt(isInTemplateInstantiation())));
 }
 
-TEST(IsInTemplateInstantiation, Sharing) {
+TEST_P(ASTMatchersTest, IsInTemplateInstantiation_Sharing) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   auto Matcher = binaryOperator(unless(isInTemplateInstantiation()));
   // FIXME: Node sharing is an implementation detail, exposing it is ugly
   // and makes the matcher behave in non-obvious ways.
@@ -2115,57 +2592,106 @@ TEST(IsInTemplateInstantiation, Sharing) {
       Matcher));
 }
 
-TEST(IsInstantiationDependent, MatchesNonValueTypeDependent) {
+TEST_P(ASTMatchersTest, IsInstantiationDependent_MatchesNonValueTypeDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches(
       "template<typename T> void f() { (void) sizeof(sizeof(T() + T())); }",
       expr(isInstantiationDependent())));
 }
 
-TEST(IsInstantiationDependent, MatchesValueDependent) {
+TEST_P(ASTMatchersTest, IsInstantiationDependent_MatchesValueDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template<int T> int f() { return T; }",
                       expr(isInstantiationDependent())));
 }
 
-TEST(IsInstantiationDependent, MatchesTypeDependent) {
+TEST_P(ASTMatchersTest, IsInstantiationDependent_MatchesTypeDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template<typename T> T f() { return T(); }",
                       expr(isInstantiationDependent())));
 }
 
-TEST(IsTypeDependent, MatchesTypeDependent) {
+TEST_P(ASTMatchersTest, IsTypeDependent_MatchesTypeDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template<typename T> T f() { return T(); }",
                       expr(isTypeDependent())));
 }
 
-TEST(IsTypeDependent, NotMatchesValueDependent) {
+TEST_P(ASTMatchersTest, IsTypeDependent_NotMatchesValueDependent) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template<int T> int f() { return T; }",
                          expr(isTypeDependent())));
 }
 
-TEST(IsValueDependent, MatchesValueDependent) {
+TEST_P(ASTMatchersTest, IsValueDependent_MatchesValueDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template<int T> int f() { return T; }",
                       expr(isValueDependent())));
 }
 
-TEST(IsValueDependent, MatchesTypeDependent) {
+TEST_P(ASTMatchersTest, IsValueDependent_MatchesTypeDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches("template<typename T> T f() { return T(); }",
                       expr(isValueDependent())));
 }
 
-TEST(IsValueDependent, MatchesInstantiationDependent) {
+TEST_P(ASTMatchersTest, IsValueDependent_MatchesInstantiationDependent) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   EXPECT_TRUE(matches(
       "template<typename T> void f() { (void) sizeof(sizeof(T() + T())); }",
       expr(isValueDependent())));
 }
 
-TEST(IsExplicitTemplateSpecialization, DoesNotMatchPrimaryTemplate) {
+TEST_P(ASTMatchersTest,
+       IsExplicitTemplateSpecialization_DoesNotMatchPrimaryTemplate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template <typename T> class X {};",
                          cxxRecordDecl(isExplicitTemplateSpecialization())));
   EXPECT_TRUE(notMatches("template <typename T> void f(T t);",
                          functionDecl(isExplicitTemplateSpecialization())));
 }
 
-TEST(IsExplicitTemplateSpecialization,
-     DoesNotMatchExplicitTemplateInstantiations) {
+TEST_P(
+    ASTMatchersTest,
+    IsExplicitTemplateSpecialization_DoesNotMatchExplicitTemplateInstantiations) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       notMatches("template <typename T> class X {};"
                  "template class X<int>; extern template class X<long>;",
@@ -2176,8 +2702,13 @@ TEST(IsExplicitTemplateSpecialization,
                  functionDecl(isExplicitTemplateSpecialization())));
 }
 
-TEST(IsExplicitTemplateSpecialization,
-     DoesNotMatchImplicitTemplateInstantiations) {
+TEST_P(
+    ASTMatchersTest,
+    IsExplicitTemplateSpecialization_DoesNotMatchImplicitTemplateInstantiations) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("template <typename T> class X {}; X<int> x;",
                          cxxRecordDecl(isExplicitTemplateSpecialization())));
   EXPECT_TRUE(
@@ -2185,7 +2716,13 @@ TEST(IsExplicitTemplateSpecialization,
                  functionDecl(isExplicitTemplateSpecialization())));
 }
 
-TEST(IsExplicitTemplateSpecialization, MatchesExplicitTemplateSpecializations) {
+TEST_P(
+    ASTMatchersTest,
+    IsExplicitTemplateSpecialization_MatchesExplicitTemplateSpecializations) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("template <typename T> class X {};"
                       "template<> class X<int> {};",
                       cxxRecordDecl(isExplicitTemplateSpecialization())));
@@ -2194,12 +2731,23 @@ TEST(IsExplicitTemplateSpecialization, MatchesExplicitTemplateSpecializations) {
                       functionDecl(isExplicitTemplateSpecialization())));
 }
 
-TEST(TypeMatching, MatchesNoReturn) {
+TEST_P(ASTMatchersTest, IsNoReturn) {
   EXPECT_TRUE(notMatches("void func();", functionDecl(isNoReturn())));
   EXPECT_TRUE(notMatches("void func() {}", functionDecl(isNoReturn())));
 
-  EXPECT_TRUE(notMatchesC("void func();", functionDecl(isNoReturn())));
-  EXPECT_TRUE(notMatchesC("void func() {}", functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("__attribute__((noreturn)) void func();",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("__attribute__((noreturn)) void func() {}",
+                      functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(matches("_Noreturn void func();", functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("_Noreturn void func() {}", functionDecl(isNoReturn())));
+}
+
+TEST_P(ASTMatchersTest, IsNoReturn_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
 
   EXPECT_TRUE(
       notMatches("struct S { void func(); };", functionDecl(isNoReturn())));
@@ -2216,32 +2764,6 @@ TEST(TypeMatching, MatchesNoReturn) {
 
   // ---
 
-  EXPECT_TRUE(matches("[[noreturn]] void func();", functionDecl(isNoReturn())));
-  EXPECT_TRUE(
-      matches("[[noreturn]] void func() {}", functionDecl(isNoReturn())));
-
-  EXPECT_TRUE(matches("struct S { [[noreturn]] void func(); };",
-                      functionDecl(isNoReturn())));
-  EXPECT_TRUE(matches("struct S { [[noreturn]] void func() {} };",
-                      functionDecl(isNoReturn())));
-
-  EXPECT_TRUE(matches("struct S { [[noreturn]] static void func(); };",
-                      functionDecl(isNoReturn())));
-  EXPECT_TRUE(matches("struct S { [[noreturn]] static void func() {} };",
-                      functionDecl(isNoReturn())));
-
-  EXPECT_TRUE(
-      matches("struct S { [[noreturn]] S(); };", functionDecl(isNoReturn())));
-  EXPECT_TRUE(
-      matches("struct S { [[noreturn]] S() {} };", functionDecl(isNoReturn())));
-
-  // ---
-
-  EXPECT_TRUE(matches("__attribute__((noreturn)) void func();",
-                      functionDecl(isNoReturn())));
-  EXPECT_TRUE(matches("__attribute__((noreturn)) void func() {}",
-                      functionDecl(isNoReturn())));
-
   EXPECT_TRUE(matches("struct S { __attribute__((noreturn)) void func(); };",
                       functionDecl(isNoReturn())));
   EXPECT_TRUE(matches("struct S { __attribute__((noreturn)) void func() {} };",
@@ -2258,31 +2780,62 @@ TEST(TypeMatching, MatchesNoReturn) {
                       functionDecl(isNoReturn())));
   EXPECT_TRUE(matches("struct S { __attribute__((noreturn)) S() {} };",
                       functionDecl(isNoReturn())));
+}
 
-  // ---
+TEST_P(ASTMatchersTest, IsNoReturn_CXX11Attribute) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
 
-  EXPECT_TRUE(matchesC("__attribute__((noreturn)) void func();",
-                       functionDecl(isNoReturn())));
-  EXPECT_TRUE(matchesC("__attribute__((noreturn)) void func() {}",
-                       functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("[[noreturn]] void func();", functionDecl(isNoReturn())));
+  EXPECT_TRUE(
+      matches("[[noreturn]] void func() {}", functionDecl(isNoReturn())));
 
-  EXPECT_TRUE(matchesC("_Noreturn void func();", functionDecl(isNoReturn())));
-  EXPECT_TRUE(matchesC("_Noreturn void func() {}", functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("struct S { [[noreturn]] void func(); };",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("struct S { [[noreturn]] void func() {} };",
+                      functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(matches("struct S { [[noreturn]] static void func(); };",
+                      functionDecl(isNoReturn())));
+  EXPECT_TRUE(matches("struct S { [[noreturn]] static void func() {} };",
+                      functionDecl(isNoReturn())));
+
+  EXPECT_TRUE(
+      matches("struct S { [[noreturn]] S(); };", functionDecl(isNoReturn())));
+  EXPECT_TRUE(
+      matches("struct S { [[noreturn]] S() {} };", functionDecl(isNoReturn())));
 }
 
-TEST(TypeMatching, MatchesBool) {
+TEST_P(ASTMatchersTest, BooleanType) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `booleanType()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("struct S { bool func(); };",
                       cxxMethodDecl(returns(booleanType()))));
   EXPECT_TRUE(notMatches("struct S { void func(); };",
                          cxxMethodDecl(returns(booleanType()))));
 }
 
-TEST(TypeMatching, MatchesVoid) {
+TEST_P(ASTMatchersTest, VoidType) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `voidType()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("struct S { void func(); };",
                       cxxMethodDecl(returns(voidType()))));
 }
 
-TEST(TypeMatching, MatchesRealFloats) {
+TEST_P(ASTMatchersTest, RealFloatingPointType) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `realFloatingPointType()` that does not depend on
+    // C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("struct S { float func(); };",
                       cxxMethodDecl(returns(realFloatingPointType()))));
   EXPECT_TRUE(notMatches("struct S { int func(); };",
@@ -2291,12 +2844,12 @@ TEST(TypeMatching, MatchesRealFloats) {
                       cxxMethodDecl(returns(realFloatingPointType()))));
 }
 
-TEST(TypeMatching, MatchesArrayTypes) {
+TEST_P(ASTMatchersTest, ArrayType) {
   EXPECT_TRUE(matches("int a[] = {2,3};", arrayType()));
   EXPECT_TRUE(matches("int a[42];", arrayType()));
   EXPECT_TRUE(matches("void f(int b) { int a[b]; }", arrayType()));
 
-  EXPECT_TRUE(notMatches("struct A {}; A a[7];",
+  EXPECT_TRUE(notMatches("struct A {}; struct A a[7];",
                          arrayType(hasElementType(builtinType()))));
 
   EXPECT_TRUE(matches("int const a[] = { 2, 3 };",
@@ -2322,14 +2875,14 @@ TEST(TypeMatching, MatchesArrayTypes) {
   EXPECT_TRUE(matches("const int a = 0;", qualType(isInteger())));
 }
 
-TEST(TypeMatching, DecayedType) {
+TEST_P(ASTMatchersTest, DecayedType) {
   EXPECT_TRUE(
       matches("void f(int i[]);",
               valueDecl(hasType(decayedType(hasDecayedType(pointerType()))))));
   EXPECT_TRUE(notMatches("int i[7];", decayedType()));
 }
 
-TEST(TypeMatching, MatchesComplexTypes) {
+TEST_P(ASTMatchersTest, ComplexType) {
   EXPECT_TRUE(matches("_Complex float f;", complexType()));
   EXPECT_TRUE(
       matches("_Complex float f;", complexType(hasElementType(builtinType()))));
@@ -2337,12 +2890,20 @@ TEST(TypeMatching, MatchesComplexTypes) {
                          complexType(hasElementType(isInteger()))));
 }
 
-TEST(NS, Anonymous) {
+TEST_P(ASTMatchersTest, IsAnonymous) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("namespace N {}", namespaceDecl(isAnonymous())));
   EXPECT_TRUE(matches("namespace {}", namespaceDecl(isAnonymous())));
 }
 
-TEST(DeclarationMatcher, InStdNamespace) {
+TEST_P(ASTMatchersTest, InStdNamespace) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("class vector {};"
                          "namespace foo {"
                          "  class vector {};"
@@ -2358,6 +2919,13 @@ TEST(DeclarationMatcher, InStdNamespace) {
                       "  class vector {};"
                       "}",
                       cxxRecordDecl(hasName("vector"), isInStdNamespace())));
+}
+
+TEST_P(ASTMatchersTest, InStdNamespace_CXX11) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("namespace std {"
                       "  inline namespace __1 {"
                       "    class vector {};"
@@ -2393,7 +2961,7 @@ TEST(DeclarationMatcher, InStdNamespace) {
                                                       isInStdNamespace())))));
 }
 
-TEST(EqualsBoundNodeMatcher, QualType) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_QualType) {
   EXPECT_TRUE(matches(
       "int i = 1;", varDecl(hasType(qualType().bind("type")),
                             hasInitializer(ignoringParenImpCasts(
@@ -2404,25 +2972,31 @@ TEST(EqualsBoundNodeMatcher, QualType) {
                                      qualType(equalsBoundNode("type"))))))));
 }
 
-TEST(EqualsBoundNodeMatcher, NonMatchingTypes) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_NonMatchingTypes) {
   EXPECT_TRUE(notMatches(
       "int i = 1;", varDecl(namedDecl(hasName("i")).bind("name"),
                             hasInitializer(ignoringParenImpCasts(
                                 hasType(qualType(equalsBoundNode("type"))))))));
 }
 
-TEST(EqualsBoundNodeMatcher, Stmt) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_Stmt) {
   EXPECT_TRUE(
-      matches("void f() { if(true) {} }",
+      matches("void f() { if(1) {} }",
               stmt(allOf(ifStmt().bind("if"),
                          hasParent(stmt(has(stmt(equalsBoundNode("if")))))))));
 
   EXPECT_TRUE(notMatches(
-      "void f() { if(true) { if (true) {} } }",
+      "void f() { if(1) { if (1) {} } }",
       stmt(allOf(ifStmt().bind("if"), has(stmt(equalsBoundNode("if")))))));
 }
 
-TEST(EqualsBoundNodeMatcher, Decl) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_Decl) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `equalsBoundNode()` for declarations that does not
+    // depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches(
       "class X { class Y {}; };",
       decl(allOf(recordDecl(hasName("::X::Y")).bind("record"),
@@ -2433,7 +3007,12 @@ TEST(EqualsBoundNodeMatcher, Decl) {
                                     has(decl(equalsBoundNode("record")))))));
 }
 
-TEST(EqualsBoundNodeMatcher, Type) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_Type) {
+  if (!GetParam().isCXX()) {
+    // FIXME: Add a test for `equalsBoundNode()` for types that does not depend
+    // on C++.
+    return;
+  }
   EXPECT_TRUE(matches(
       "class X { int a; int b; };",
       recordDecl(
@@ -2447,7 +3026,7 @@ TEST(EqualsBoundNodeMatcher, Type) {
           has(fieldDecl(hasName("b"), hasType(type(equalsBoundNode("t"))))))));
 }
 
-TEST(EqualsBoundNodeMatcher, UsingForEachDescendant) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_UsingForEachDescendant) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "int f() {"
       "  if (1) {"
@@ -2469,7 +3048,7 @@ TEST(EqualsBoundNodeMatcher, UsingForEachDescendant) {
       std::make_unique<VerifyIdIsBoundTo<VarDecl>>("decl", 2)));
 }
 
-TEST(EqualsBoundNodeMatcher, FiltersMatchedCombinations) {
+TEST_P(ASTMatchersTest, EqualsBoundNodeMatcher_FiltersMatchedCombinations) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "void f() {"
       "  int x;"
@@ -2482,7 +3061,8 @@ TEST(EqualsBoundNodeMatcher, FiltersMatchedCombinations) {
       std::make_unique<VerifyIdIsBoundTo<VarDecl>>("d", 5)));
 }
 
-TEST(EqualsBoundNodeMatcher, UnlessDescendantsOfAncestorsMatch) {
+TEST_P(ASTMatchersTest,
+       EqualsBoundNodeMatcher_UnlessDescendantsOfAncestorsMatch) {
   EXPECT_TRUE(matchAndVerifyResultTrue(
       "struct StringRef { int size() const; const char* data() const; };"
       "void f(StringRef v) {"
@@ -2514,15 +3094,29 @@ TEST(EqualsBoundNodeMatcher, UnlessDescendantsOfAncestorsMatch) {
           .bind("data")));
 }
 
-TEST(NullPointerConstants, Basic) {
+TEST_P(ASTMatchersTest, NullPointerConstant) {
   EXPECT_TRUE(matches("#define NULL ((void *)0)\n"
                       "void *v1 = NULL;",
                       expr(nullPointerConstant())));
-  EXPECT_TRUE(matches("void *v2 = nullptr;", expr(nullPointerConstant())));
-  EXPECT_TRUE(matches("void *v3 = __null;", expr(nullPointerConstant())));
   EXPECT_TRUE(matches("char *cp = (char *)0;", expr(nullPointerConstant())));
   EXPECT_TRUE(matches("int *ip = 0;", expr(nullPointerConstant())));
   EXPECT_TRUE(matches("int i = 0;", expr(nullPointerConstant())));
+}
+
+TEST_P(ASTMatchersTest, NullPointerConstant_GNUNull) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("void *p = __null;", expr(nullPointerConstant())));
+}
+
+TEST_P(ASTMatchersTest, NullPointerConstant_GNUNullInTemplate) {
+  if (!GetParam().isCXX() || GetParam().hasDelayedTemplateParsing()) {
+    // FIXME: Fix this test to work with delayed template parsing.
+    return;
+  }
+
   const char kTest[] = R"(
     template <typename T>
     struct MyTemplate {
@@ -2533,7 +3127,15 @@ TEST(NullPointerConstants, Basic) {
   EXPECT_TRUE(matches(kTest, expr(nullPointerConstant())));
 }
 
-TEST(HasExternalFormalLinkage, Basic) {
+TEST_P(ASTMatchersTest, NullPointerConstant_CXX11Nullptr) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("void *p = nullptr;", expr(nullPointerConstant())));
+}
+
+TEST_P(ASTMatchersTest, HasExternalFormalLinkage) {
   EXPECT_TRUE(matches("int a = 0;",
                       namedDecl(hasName("a"), hasExternalFormalLinkage())));
   EXPECT_TRUE(notMatches("static int a = 0;",
@@ -2542,32 +3144,47 @@ TEST(HasExternalFormalLinkage, Basic) {
                          namedDecl(hasName("a"), hasExternalFormalLinkage())));
   EXPECT_TRUE(notMatches("void f(void) { int a = 0; }",
                          namedDecl(hasName("a"), hasExternalFormalLinkage())));
+}
+
+TEST_P(ASTMatchersTest, HasExternalFormalLinkage_CXX) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(notMatches("namespace { int a = 0; }",
                          namedDecl(hasName("a"), hasExternalFormalLinkage())));
 }
 
-TEST(HasDefaultArgument, Basic) {
+TEST_P(ASTMatchersTest, HasDefaultArgument) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("void x(int val = 0) {}", parmVarDecl(hasDefaultArgument())));
   EXPECT_TRUE(
       notMatches("void x(int val) {}", parmVarDecl(hasDefaultArgument())));
 }
 
-TEST(IsAtPosition, Basic) {
+TEST_P(ASTMatchersTest, IsAtPosition) {
   EXPECT_TRUE(matches("void x(int a, int b) {}", parmVarDecl(isAtPosition(1))));
   EXPECT_TRUE(matches("void x(int a, int b) {}", parmVarDecl(isAtPosition(0))));
   EXPECT_TRUE(matches("void x(int a, int b) {}", parmVarDecl(isAtPosition(1))));
   EXPECT_TRUE(notMatches("void x(int val) {}", parmVarDecl(isAtPosition(1))));
 }
 
-TEST(IsAtPosition, FunctionDecl) {
+TEST_P(ASTMatchersTest, IsAtPosition_FunctionDecl) {
   EXPECT_TRUE(matches("void x(int a);", parmVarDecl(isAtPosition(0))));
   EXPECT_TRUE(matches("void x(int a, int b);", parmVarDecl(isAtPosition(0))));
   EXPECT_TRUE(matches("void x(int a, int b);", parmVarDecl(isAtPosition(1))));
   EXPECT_TRUE(notMatches("void x(int val);", parmVarDecl(isAtPosition(1))));
 }
 
-TEST(IsAtPosition, Lambda) {
+TEST_P(ASTMatchersTest, IsAtPosition_Lambda) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(
       matches("void x() { [](int a) {};  }", parmVarDecl(isAtPosition(0))));
   EXPECT_TRUE(matches("void x() { [](int a, int b) {}; }",
@@ -2578,7 +3195,7 @@ TEST(IsAtPosition, Lambda) {
       notMatches("void x() { [](int val) {}; }", parmVarDecl(isAtPosition(1))));
 }
 
-TEST(IsAtPosition, BlockDecl) {
+TEST_P(ASTMatchersTest, IsAtPosition_BlockDecl) {
   EXPECT_TRUE(matchesObjC(
       "void func()  { void (^my_block)(int arg) = ^void(int arg) {}; } ",
       parmVarDecl(isAtPosition(0))));
@@ -2592,54 +3209,102 @@ TEST(IsAtPosition, BlockDecl) {
       parmVarDecl(isAtPosition(1))));
 }
 
-TEST(IsArray, Basic) {
+TEST_P(ASTMatchersTest, IsArray) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("struct MyClass {}; MyClass *p1 = new MyClass[10];",
                       cxxNewExpr(isArray())));
 }
 
-TEST(HasArraySize, Basic) {
+TEST_P(ASTMatchersTest, HasArraySize) {
+  if (GetParam().Language != Lang_CXX03) {
+    // FIXME: Fix this test to work in all C++ language modes.
+    return;
+  }
+
   EXPECT_TRUE(matches("struct MyClass {}; MyClass *p1 = new MyClass[10];",
                       cxxNewExpr(hasArraySize(integerLiteral(equals(10))))));
 }
 
-TEST(HasDefinition, MatchesStructDefinition) {
+TEST_P(ASTMatchersTest, HasDefinition_MatchesStructDefinition) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("struct x {};", cxxRecordDecl(hasDefinition())));
   EXPECT_TRUE(notMatches("struct x;", cxxRecordDecl(hasDefinition())));
 }
 
-TEST(HasDefinition, MatchesClassDefinition) {
+TEST_P(ASTMatchersTest, HasDefinition_MatchesClassDefinition) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("class x {};", cxxRecordDecl(hasDefinition())));
   EXPECT_TRUE(notMatches("class x;", cxxRecordDecl(hasDefinition())));
 }
 
-TEST(HasDefinition, MatchesUnionDefinition) {
+TEST_P(ASTMatchersTest, HasDefinition_MatchesUnionDefinition) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("union x {};", cxxRecordDecl(hasDefinition())));
   EXPECT_TRUE(notMatches("union x;", cxxRecordDecl(hasDefinition())));
 }
 
-TEST(IsScopedEnum, MatchesScopedEnum) {
+TEST_P(ASTMatchersTest, IsScoped_MatchesScopedEnum) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
   EXPECT_TRUE(matches("enum class X {};", enumDecl(isScoped())));
-  EXPECT_TRUE(notMatches("enum X {};", enumDecl(isScoped())));
 }
 
-TEST(TagDeclKind, MatchesTagDeclKind) {
-  EXPECT_TRUE(matches("struct X {};", tagDecl(isStruct())));
-  EXPECT_TRUE(matches("class C {};", tagDecl(isClass())));
+TEST_P(ASTMatchersTest, IsScoped_NotMatchesRegularEnum) {
+  EXPECT_TRUE(notMatches("enum E { E1 };", enumDecl(isScoped())));
+}
+
+TEST_P(ASTMatchersTest, IsStruct) {
+  EXPECT_TRUE(matches("struct S {};", tagDecl(isStruct())));
+}
+
+TEST_P(ASTMatchersTest, IsUnion) {
   EXPECT_TRUE(matches("union U {};", tagDecl(isUnion())));
-  EXPECT_TRUE(matches("enum E {};", tagDecl(isEnum())));
 }
 
-TEST(HasTrailingReturn, MatchesTrailingReturn) {
+TEST_P(ASTMatchersTest, IsEnum) {
+  EXPECT_TRUE(matches("enum E { E1 };", tagDecl(isEnum())));
+}
+
+TEST_P(ASTMatchersTest, IsClass) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
+  EXPECT_TRUE(matches("class C {};", tagDecl(isClass())));
+}
+
+TEST_P(ASTMatchersTest, HasTrailingReturn_MatchesTrailingReturn) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches("auto Y() -> int { return 0; }",
                       functionDecl(hasTrailingReturn())));
   EXPECT_TRUE(matches("auto X() -> int;", functionDecl(hasTrailingReturn())));
   EXPECT_TRUE(
       notMatches("int X() { return 0; }", functionDecl(hasTrailingReturn())));
   EXPECT_TRUE(notMatches("int X();", functionDecl(hasTrailingReturn())));
-  EXPECT_TRUE(notMatchesC("void X();", functionDecl(hasTrailingReturn())));
+  EXPECT_TRUE(notMatches("void X();", functionDecl(hasTrailingReturn())));
 }
 
-TEST(HasTrailingReturn, MatchesLambdaTrailingReturn) {
+TEST_P(ASTMatchersTest, HasTrailingReturn_MatchesLambdaTrailingReturn) {
+  if (!GetParam().isCXX11OrLater()) {
+    return;
+  }
+
   EXPECT_TRUE(matches(
       "auto lambda2 = [](double x, double y) -> double {return x + y;};",
       functionDecl(hasTrailingReturn())));
@@ -2648,7 +3313,11 @@ TEST(HasTrailingReturn, MatchesLambdaTrailingReturn) {
                  functionDecl(hasTrailingReturn())));
 }
 
-TEST(IsAssignmentOperator, Basic) {
+TEST_P(ASTMatchersTest, IsAssignmentOperator) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StatementMatcher BinAsgmtOperator = binaryOperator(isAssignmentOperator());
   StatementMatcher CXXAsgmtOperator =
       cxxOperatorCallExpr(isAssignmentOperator());
@@ -2663,7 +3332,11 @@ TEST(IsAssignmentOperator, Basic) {
       notMatches("void x() { int a; if(a == 0) return; }", BinAsgmtOperator));
 }
 
-TEST(IsComparisonOperator, Basic) {
+TEST_P(ASTMatchersTest, IsComparisonOperator) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   StatementMatcher BinCompOperator = binaryOperator(isComparisonOperator());
   StatementMatcher CXXCompOperator =
       cxxOperatorCallExpr(isComparisonOperator());
@@ -2677,19 +3350,24 @@ TEST(IsComparisonOperator, Basic) {
       notMatches("void x() { int a; if(a = 0) return; }", BinCompOperator));
 }
 
-TEST(HasInit, Basic) {
+TEST_P(ASTMatchersTest, HasInit) {
+  if (!GetParam().isCXX11OrLater()) {
+    // FIXME: Add a test for `hasInit()` that does not depend on C++.
+    return;
+  }
+
   EXPECT_TRUE(matches("int x{0};", initListExpr(hasInit(0, expr()))));
   EXPECT_FALSE(matches("int x{0};", initListExpr(hasInit(1, expr()))));
   EXPECT_FALSE(matches("int x;", initListExpr(hasInit(0, expr()))));
 }
 
-TEST(Matcher, isMain) {
+TEST_P(ASTMatchersTest, IsMain) {
   EXPECT_TRUE(matches("int main() {}", functionDecl(isMain())));
 
   EXPECT_TRUE(notMatches("int main2() {}", functionDecl(isMain())));
 }
 
-TEST(OMPExecutableDirective, isStandaloneDirective) {
+TEST_P(ASTMatchersTest, OMPExecutableDirective_IsStandaloneDirective) {
   auto Matcher = ompExecutableDirective(isStandaloneDirective());
 
   StringRef Source0 = R"(
@@ -2706,7 +3384,7 @@ void x() {
   EXPECT_TRUE(matchesWithOpenMP(Source1, Matcher));
 }
 
-TEST(OMPExecutableDirective, hasStructuredBlock) {
+TEST_P(ASTMatchersTest, OMPExecutableDirective_HasStructuredBlock) {
   StringRef Source0 = R"(
 void x() {
 #pragma omp parallel
@@ -2734,7 +3412,7 @@ void x() {
       Source2, ompExecutableDirective(hasStructuredBlock(anything()))));
 }
 
-TEST(OMPExecutableDirective, hasClause) {
+TEST_P(ASTMatchersTest, OMPExecutableDirective_HasClause) {
   auto Matcher = ompExecutableDirective(hasAnyClause(anything()));
 
   StringRef Source0 = R"(
@@ -2779,7 +3457,7 @@ void x(int x) {
   EXPECT_TRUE(matchesWithOpenMP(Source5, Matcher));
 }
 
-TEST(OMPDefaultClause, isNoneKind) {
+TEST_P(ASTMatchersTest, OMPDefaultClause_IsNoneKind) {
   auto Matcher =
       ompExecutableDirective(hasAnyClause(ompDefaultClause(isNoneKind())));
 
@@ -2825,7 +3503,7 @@ void x(int x) {
   EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
 }
 
-TEST(OMPDefaultClause, isSharedKind) {
+TEST_P(ASTMatchersTest, OMPDefaultClause_IsSharedKind) {
   auto Matcher =
       ompExecutableDirective(hasAnyClause(ompDefaultClause(isSharedKind())));
 
@@ -2917,7 +3595,7 @@ void x(int x) {
   EXPECT_TRUE(notMatchesWithOpenMP(Source5, Matcher));
 }
 
-TEST(OMPExecutableDirective, isAllowedToContainClauseKind) {
+TEST_P(ASTMatchersTest, OMPExecutableDirective_IsAllowedToContainClauseKind) {
   auto Matcher = ompExecutableDirective(
       isAllowedToContainClauseKind(llvm::omp::OMPC_default));
 
@@ -2976,7 +3654,10 @@ void x() {
   EXPECT_TRUE(matchesWithOpenMP(Source7, Matcher));
 }
 
-TEST(HasAnyBase, DirectBase) {
+TEST_P(ASTMatchersTest, HasAnyBase_DirectBase) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches(
       "struct Base {};"
       "struct ExpectedMatch : Base {};",
@@ -2984,7 +3665,10 @@ TEST(HasAnyBase, DirectBase) {
                     hasAnyBase(hasType(cxxRecordDecl(hasName("Base")))))));
 }
 
-TEST(HasAnyBase, IndirectBase) {
+TEST_P(ASTMatchersTest, HasAnyBase_IndirectBase) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches(
       "struct Base {};"
       "struct Intermediate : Base {};"
@@ -2993,97 +3677,145 @@ TEST(HasAnyBase, IndirectBase) {
                     hasAnyBase(hasType(cxxRecordDecl(hasName("Base")))))));
 }
 
-TEST(HasAnyBase, NoBase) {
+TEST_P(ASTMatchersTest, HasAnyBase_NoBase) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("struct Foo {};"
                          "struct Bar {};",
                          cxxRecordDecl(hasAnyBase(hasType(cxxRecordDecl())))));
 }
 
-TEST(IsPublicBase, Public) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPublic_Public) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("class Base {};"
                       "class Derived : public Base {};",
                       cxxRecordDecl(hasAnyBase(isPublic()))));
 }
 
-TEST(IsPublicBase, DefaultAccessSpecifierPublic) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPublic_DefaultAccessSpecifierPublic) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("class Base {};"
                       "struct Derived : Base {};",
                       cxxRecordDecl(hasAnyBase(isPublic()))));
 }
 
-TEST(IsPublicBase, Private) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPublic_Private) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : private Base {};",
                          cxxRecordDecl(hasAnyBase(isPublic()))));
 }
 
-TEST(IsPublicBase, DefaultAccessSpecifierPrivate) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPublic_DefaultAccessSpecifierPrivate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : Base {};",
                          cxxRecordDecl(hasAnyBase(isPublic()))));
 }
 
-TEST(IsPublicBase, Protected) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPublic_Protected) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : protected Base {};",
                          cxxRecordDecl(hasAnyBase(isPublic()))));
 }
 
-TEST(IsPrivateBase, Private) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPrivate_Private) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("class Base {};"
                       "class Derived : private Base {};",
                       cxxRecordDecl(hasAnyBase(isPrivate()))));
 }
 
-TEST(IsPrivateBase, DefaultAccessSpecifierPrivate) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPrivate_DefaultAccessSpecifierPrivate) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("struct Base {};"
                       "class Derived : Base {};",
                       cxxRecordDecl(hasAnyBase(isPrivate()))));
 }
 
-TEST(IsPrivateBase, Public) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPrivate_Public) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : public Base {};",
                          cxxRecordDecl(hasAnyBase(isPrivate()))));
 }
 
-TEST(IsPrivateBase, DefaultAccessSpecifierPublic) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPrivate_DefaultAccessSpecifierPublic) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "struct Derived : Base {};",
                          cxxRecordDecl(hasAnyBase(isPrivate()))));
 }
 
-TEST(IsPrivateBase, Protected) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsPrivate_Protected) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : protected Base {};",
                          cxxRecordDecl(hasAnyBase(isPrivate()))));
 }
 
-TEST(IsProtectedBase, Protected) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsProtected_Protected) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("class Base {};"
                       "class Derived : protected Base {};",
                       cxxRecordDecl(hasAnyBase(isProtected()))));
 }
 
-TEST(IsProtectedBase, Public) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsProtected_Public) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : public Base {};",
                          cxxRecordDecl(hasAnyBase(isProtected()))));
 }
 
-TEST(IsProtectedBase, Private) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsProtected_Private) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : private Base {};",
                          cxxRecordDecl(hasAnyBase(isProtected()))));
 }
 
-TEST(IsVirtual, Directly) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsVirtual_Directly) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(matches("class Base {};"
                       "class Derived : virtual Base {};",
                       cxxRecordDecl(hasAnyBase(isVirtual()))));
 }
 
-TEST(IsVirtual, Indirectly) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsVirtual_Indirectly) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(
       matches("class Base {};"
               "class Intermediate : virtual Base {};"
@@ -3091,13 +3823,20 @@ TEST(IsVirtual, Indirectly) {
               cxxRecordDecl(hasName("Derived"), hasAnyBase(isVirtual()))));
 }
 
-TEST(IsVirtual, NoVirtualBase) {
+TEST_P(ASTMatchersTest, HasAnyBase_IsVirtual_NoVirtualBase) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
   EXPECT_TRUE(notMatches("class Base {};"
                          "class Derived : Base {};",
                          cxxRecordDecl(hasAnyBase(isVirtual()))));
 }
 
-TEST(BaseSpecifier, hasDirectBase) {
+TEST_P(ASTMatchersTest, HasDirectBase) {
+  if (!GetParam().isCXX()) {
+    return;
+  }
+
   EXPECT_TRUE(matches(
       R"cc(
     class Base {};
diff --git a/clang/unittests/ASTMatchers/ASTMatchersTest.h b/clang/unittests/ASTMatchers/ASTMatchersTest.h
index bde6297f82ddc..af248906bf658 100644
--- a/clang/unittests/ASTMatchers/ASTMatchersTest.h
+++ b/clang/unittests/ASTMatchers/ASTMatchersTest.h
@@ -183,11 +183,6 @@ testing::AssertionResult matchesC(const Twine &Code, const T &AMatcher) {
                               "input.c");
 }
 
-template <typename T>
-testing::AssertionResult notMatchesC(const Twine &Code, const T &AMatcher) {
-  return matchesConditionally(Code, AMatcher, false, {Lang_C89});
-}
-
 template <typename T>
 testing::AssertionResult notMatchesObjC(const Twine &Code, const T &AMatcher) {
   return matchesObjC(Code, AMatcher, false);

From 3f05a4853ebda5621a082eb55ae81859ee5f5b27 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 16 Jul 2020 12:42:26 -0400
Subject: [PATCH 506/771] [libc++abi] NFC: Fix indentation

---
 libcxxabi/test/guard_threaded_test.pass.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libcxxabi/test/guard_threaded_test.pass.cpp b/libcxxabi/test/guard_threaded_test.pass.cpp
index 0dc911c73a495..d562b4851afe5 100644
--- a/libcxxabi/test/guard_threaded_test.pass.cpp
+++ b/libcxxabi/test/guard_threaded_test.pass.cpp
@@ -36,7 +36,7 @@ constexpr int TestSamples = 50;
 
 
 void BusyWait() {
-    std::this_thread::yield();
+  std::this_thread::yield();
 }
 
 void YieldAfterBarrier() {

From 79de8f8441d12b715b6616dde4b8b15507b88324 Mon Sep 17 00:00:00 2001
From: Kostya Kortchinsky <kostyak@google.com>
Date: Wed, 17 Jun 2020 10:31:53 -0700
Subject: [PATCH 507/771] [scudo][standalone] Release smaller blocks less often

Summary:
Releasing smaller blocks is costly and only yields significant
results when there is a large percentage of free bytes for a given
size class (see numbers below).

This CL introduces a couple of additional checks for sizes lower
than 256. First we want to make sure that there is enough free bytes,
relatively to the amount of allocated bytes. We are looking at 8X% to
9X% (smaller blocks require higher percentage). We also want to make
sure there has been enough activity with the freelist to make it
worth the time, so we now check that the bytes pushed to the freelist
is at least 1/16th of the allocated bytes for those classes.

Additionally, we clear batches before destroying them now - this
could have prevented some releases to occur (class id 0 rarely
releases anyway).

Here are the numbers, for about 1M allocations in multiple threads:

Size: 16
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 0% released
90% freed -> 0% released
91% freed -> 0% released
92% freed -> 0% released
93% freed -> 0% released
94% freed -> 0% released
95% freed -> 0% released
96% freed -> 0% released
97% freed -> 2% released
98% freed -> 7% released
99% freed -> 27% released
Size: 32
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 0% released
90% freed -> 0% released
91% freed -> 0% released
92% freed -> 0% released
93% freed -> 0% released
94% freed -> 0% released
95% freed -> 1% released
96% freed -> 3% released
97% freed -> 7% released
98% freed -> 17% released
99% freed -> 41% released
Size: 48
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 0% released
90% freed -> 0% released
91% freed -> 0% released
92% freed -> 0% released
93% freed -> 0% released
94% freed -> 1% released
95% freed -> 3% released
96% freed -> 7% released
97% freed -> 13% released
98% freed -> 27% released
99% freed -> 52% released
Size: 64
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 0% released
90% freed -> 0% released
91% freed -> 0% released
92% freed -> 1% released
93% freed -> 2% released
94% freed -> 3% released
95% freed -> 6% released
96% freed -> 11% released
97% freed -> 20% released
98% freed -> 35% released
99% freed -> 59% released
Size: 80
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 0% released
90% freed -> 1% released
91% freed -> 1% released
92% freed -> 2% released
93% freed -> 4% released
94% freed -> 6% released
95% freed -> 10% released
96% freed -> 17% released
97% freed -> 26% released
98% freed -> 41% released
99% freed -> 64% released
Size: 96
85% freed -> 0% released
86% freed -> 0% released
87% freed -> 0% released
88% freed -> 0% released
89% freed -> 1% released
90% freed -> 1% released
91% freed -> 3% released
92% freed -> 4% released
93% freed -> 6% released
94% freed -> 10% released
95% freed -> 14% released
96% freed -> 21% released
97% freed -> 31% released
98% freed -> 47% released
99% freed -> 68% released
Size: 112
85% freed -> 0% released
86% freed -> 1% released
87% freed -> 1% released
88% freed -> 2% released
89% freed -> 3% released
90% freed -> 4% released
91% freed -> 6% released
92% freed -> 8% released
93% freed -> 11% released
94% freed -> 16% released
95% freed -> 22% released
96% freed -> 30% released
97% freed -> 40% released
98% freed -> 55% released
99% freed -> 74% released
Size: 128
85% freed -> 0% released
86% freed -> 1% released
87% freed -> 1% released
88% freed -> 2% released
89% freed -> 3% released
90% freed -> 4% released
91% freed -> 6% released
92% freed -> 8% released
93% freed -> 11% released
94% freed -> 16% released
95% freed -> 22% released
96% freed -> 30% released
97% freed -> 40% released
98% freed -> 55% released
99% freed -> 74% released
Size: 144
85% freed -> 1% released
86% freed -> 2% released
87% freed -> 3% released
88% freed -> 4% released
89% freed -> 6% released
90% freed -> 7% released
91% freed -> 10% released
92% freed -> 13% released
93% freed -> 17% released
94% freed -> 22% released
95% freed -> 28% released
96% freed -> 37% released
97% freed -> 47% released
98% freed -> 61% released
99% freed -> 78% released
Size: 160
85% freed -> 1% released
86% freed -> 2% released
87% freed -> 3% released
88% freed -> 4% released
89% freed -> 5% released
90% freed -> 7% released
91% freed -> 10% released
92% freed -> 13% released
93% freed -> 17% released
94% freed -> 22% released
95% freed -> 28% released
96% freed -> 37% released
97% freed -> 47% released
98% freed -> 61% released
99% freed -> 78% released
Size: 176
85% freed -> 2% released
86% freed -> 3% released
87% freed -> 4% released
88% freed -> 6% released
89% freed -> 7% released
90% freed -> 9% released
91% freed -> 12% released
92% freed -> 15% released
93% freed -> 20% released
94% freed -> 25% released
95% freed -> 32% released
96% freed -> 40% released
97% freed -> 51% released
98% freed -> 64% released
99% freed -> 80% released
Size: 192
85% freed -> 4% released
86% freed -> 5% released
87% freed -> 6% released
88% freed -> 8% released
89% freed -> 10% released
90% freed -> 13% released
91% freed -> 16% released
92% freed -> 20% released
93% freed -> 24% released
94% freed -> 30% released
95% freed -> 37% released
96% freed -> 45% released
97% freed -> 55% released
98% freed -> 68% released
99% freed -> 82% released
Size: 224
85% freed -> 8% released
86% freed -> 10% released
87% freed -> 12% released
88% freed -> 14% released
89% freed -> 17% released
90% freed -> 20% released
91% freed -> 23% released
92% freed -> 28% released
93% freed -> 33% released
94% freed -> 39% released
95% freed -> 46% released
96% freed -> 53% released
97% freed -> 63% released
98% freed -> 73% released
99% freed -> 85% released
Size: 240
85% freed -> 8% released
86% freed -> 10% released
87% freed -> 12% released
88% freed -> 14% released
89% freed -> 17% released
90% freed -> 20% released
91% freed -> 23% released
92% freed -> 28% released
93% freed -> 33% released
94% freed -> 39% released
95% freed -> 46% released
96% freed -> 54% released
97% freed -> 63% released
98% freed -> 73% released
99% freed -> 85% released

Reviewers: cferris, pcc, hctim, eugenis

Subscribers: #sanitizers, llvm-commits

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D82031
---
 compiler-rt/lib/scudo/standalone/local_cache.h |  1 +
 compiler-rt/lib/scudo/standalone/primary32.h   | 12 ++++++++++++
 compiler-rt/lib/scudo/standalone/primary64.h   | 16 +++++++++++++---
 compiler-rt/lib/scudo/standalone/release.cpp   |  2 +-
 compiler-rt/lib/scudo/standalone/release.h     |  8 +++++---
 5 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/compiler-rt/lib/scudo/standalone/local_cache.h b/compiler-rt/lib/scudo/standalone/local_cache.h
index a6425fc6d1ea1..089aeb939627d 100644
--- a/compiler-rt/lib/scudo/standalone/local_cache.h
+++ b/compiler-rt/lib/scudo/standalone/local_cache.h
@@ -159,6 +159,7 @@ template <class SizeClassAllocator> struct SizeClassAllocatorLocalCache {
     DCHECK_GT(B->getCount(), 0);
     C->Count = B->getCount();
     B->copyToArray(C->Chunks);
+    B->clear();
     destroyBatch(ClassId, B);
     return true;
   }
diff --git a/compiler-rt/lib/scudo/standalone/primary32.h b/compiler-rt/lib/scudo/standalone/primary32.h
index 29a2680981852..2ee0f6c600ab2 100644
--- a/compiler-rt/lib/scudo/standalone/primary32.h
+++ b/compiler-rt/lib/scudo/standalone/primary32.h
@@ -444,6 +444,18 @@ class SizeClassAllocator32 {
     if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
 
+    // Releasing smaller blocks is expensive, so we want to make sure that a
+    // significant amount of bytes are free, and that there has been a good
+    // amount of batches pushed to the freelist before attempting to release.
+    if (BlockSize < PageSize / 16U) {
+      if (!Force && BytesPushed < Sci->AllocatedUser / 16U)
+        return 0;
+      // We want 8x% to 9x% free bytes (the larger the bock, the lower the %).
+      if ((BytesInFreeList * 100U) / Sci->AllocatedUser <
+          (100U - 1U - BlockSize / 16U))
+        return 0;
+    }
+
     if (!Force) {
       const s32 IntervalMs = getReleaseToOsIntervalMs();
       if (IntervalMs < 0)
diff --git a/compiler-rt/lib/scudo/standalone/primary64.h b/compiler-rt/lib/scudo/standalone/primary64.h
index d4767882ba2c7..01e674bf3fba5 100644
--- a/compiler-rt/lib/scudo/standalone/primary64.h
+++ b/compiler-rt/lib/scudo/standalone/primary64.h
@@ -213,9 +213,7 @@ class SizeClassAllocator64 {
     return reinterpret_cast<const char *>(RegionInfoArray);
   }
 
-  static uptr getRegionInfoArraySize() {
-    return sizeof(RegionInfoArray);
-  }
+  static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); }
 
   static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) {
     const RegionInfo *RegionInfoArray =
@@ -458,6 +456,18 @@ class SizeClassAllocator64 {
     if (BytesPushed < PageSize)
       return 0; // Nothing new to release.
 
+    // Releasing smaller blocks is expensive, so we want to make sure that a
+    // significant amount of bytes are free, and that there has been a good
+    // amount of batches pushed to the freelist before attempting to release.
+    if (BlockSize < PageSize / 16U) {
+      if (!Force && BytesPushed < Region->AllocatedUser / 16U)
+        return 0;
+      // We want 8x% to 9x% free bytes (the larger the bock, the lower the %).
+      if ((BytesInFreeList * 100U) / Region->AllocatedUser <
+          (100U - 1U - BlockSize / 16U))
+        return 0;
+    }
+
     if (!Force) {
       const s32 IntervalMs = getReleaseToOsIntervalMs();
       if (IntervalMs < 0)
diff --git a/compiler-rt/lib/scudo/standalone/release.cpp b/compiler-rt/lib/scudo/standalone/release.cpp
index e144b354b258a..5d7c6c5fc110b 100644
--- a/compiler-rt/lib/scudo/standalone/release.cpp
+++ b/compiler-rt/lib/scudo/standalone/release.cpp
@@ -11,6 +11,6 @@
 namespace scudo {
 
 HybridMutex PackedCounterArray::Mutex = {};
-uptr PackedCounterArray::StaticBuffer[1024];
+uptr PackedCounterArray::StaticBuffer[PackedCounterArray::StaticBufferCount];
 
 } // namespace scudo
diff --git a/compiler-rt/lib/scudo/standalone/release.h b/compiler-rt/lib/scudo/standalone/release.h
index 323bf9db6dcac..fd55ea24132e6 100644
--- a/compiler-rt/lib/scudo/standalone/release.h
+++ b/compiler-rt/lib/scudo/standalone/release.h
@@ -69,7 +69,8 @@ class PackedCounterArray {
     BufferSize = (roundUpTo(N, static_cast<uptr>(1U) << PackingRatioLog) >>
                   PackingRatioLog) *
                  sizeof(*Buffer);
-    if (BufferSize <= StaticBufferSize && Mutex.tryLock()) {
+    if (BufferSize <= (StaticBufferCount * sizeof(Buffer[0])) &&
+        Mutex.tryLock()) {
       Buffer = &StaticBuffer[0];
       memset(Buffer, 0, BufferSize);
     } else {
@@ -114,6 +115,8 @@ class PackedCounterArray {
 
   uptr getBufferSize() const { return BufferSize; }
 
+  static const uptr StaticBufferCount = 1024U;
+
 private:
   const uptr N;
   uptr CounterSizeBitsLog;
@@ -125,8 +128,7 @@ class PackedCounterArray {
   uptr *Buffer;
 
   static HybridMutex Mutex;
-  static const uptr StaticBufferSize = 1024U;
-  static uptr StaticBuffer[StaticBufferSize];
+  static uptr StaticBuffer[StaticBufferCount];
 };
 
 template <class ReleaseRecorderT> class FreePagesRangeTracker {

From ff0d4367bf0b219fb69612f856892701eb2b1a8c Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 16 Jul 2020 12:30:11 -0400
Subject: [PATCH 508/771] [runtimes] Move the enable_rtti Lit parameter to the
 DSL

---
 libcxx/utils/libcxx/test/config.py      | 7 -------
 libcxx/utils/libcxx/test/params.py      | 5 +++++
 libcxxabi/test/libcxxabi/test/config.py | 3 ---
 libunwind/test/libunwind/test/config.py | 3 ---
 4 files changed, 5 insertions(+), 13 deletions(-)

diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py
index b88b085d408f9..50973e3fe47a0 100644
--- a/libcxx/utils/libcxx/test/config.py
+++ b/libcxx/utils/libcxx/test/config.py
@@ -319,7 +319,6 @@ def configure_default_compile_flags(self):
         self.configure_compile_flags_header_includes()
         self.target_info.add_cxx_compile_flags(self.cxx.compile_flags)
         # Configure feature flags.
-        self.configure_compile_flags_rtti()
         enable_32bit = self.get_lit_bool('enable_32bit', False)
         if enable_32bit:
             self.cxx.flags += ['-m32']
@@ -406,12 +405,6 @@ def configure_config_site_header(self):
             return
         self.cxx.compile_flags += ['-include', config_site_header]
 
-    def configure_compile_flags_rtti(self):
-        enable_rtti = self.get_lit_bool('enable_rtti', True)
-        if not enable_rtti:
-            self.config.available_features.add('-fno-rtti')
-            self.cxx.compile_flags += ['-fno-rtti']
-
     def configure_link_flags(self):
         # Configure library path
         self.configure_link_flags_cxx_library_path()
diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py
index 864a5108fc098..f541ed6bf3cf2 100644
--- a/libcxx/utils/libcxx/test/params.py
+++ b/libcxx/utils/libcxx/test/params.py
@@ -24,6 +24,11 @@
             feature=lambda exceptions: None if exceptions else
               Feature(name='no-exceptions', compileFlag='-fno-exceptions')),
 
+  Parameter(name='enable_rtti', choices=[True, False], type=bool, default=True,
+            help="Whether to enable RTTI when compiling the test suite.",
+            feature=lambda rtti: None if rtti else
+              Feature(name='-fno-rtti', compileFlag='-fno-rtti')),
+
   Parameter(name='stdlib', choices=['libc++', 'libstdc++', 'msvc'], type=str, default='libc++',
             help="The C++ Standard Library implementation being tested.",
             feature=lambda stdlib: Feature(name=stdlib)),
diff --git a/libcxxabi/test/libcxxabi/test/config.py b/libcxxabi/test/libcxxabi/test/config.py
index b9b2b6e90c6a9..45fb0f5d7afcb 100644
--- a/libcxxabi/test/libcxxabi/test/config.py
+++ b/libcxxabi/test/libcxxabi/test/config.py
@@ -83,6 +83,3 @@ def configure_compile_flags_header_includes(self):
                 self.lit_config.fatal("libunwind_headers='%s' is not a directory."
                                       % libunwind_headers)
             self.cxx.compile_flags += ['-I' + libunwind_headers]
-
-    def configure_compile_flags_rtti(self):
-        pass
diff --git a/libunwind/test/libunwind/test/config.py b/libunwind/test/libunwind/test/config.py
index 31f6148879c5f..977f9a0fb3f93 100644
--- a/libunwind/test/libunwind/test/config.py
+++ b/libunwind/test/libunwind/test/config.py
@@ -59,9 +59,6 @@ def configure_compile_flags_header_includes(self):
                                   % libunwind_headers)
         self.cxx.compile_flags += ['-I' + libunwind_headers]
 
-    def configure_compile_flags_rtti(self):
-        pass
-
     def configure_link_flags_cxx_library(self):
         # libunwind tests should not link with libc++
         pass

From f0a4ceb2fa7adb783b33deb0575abdc29fe59d40 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Thu, 16 Jul 2020 17:55:54 +0100
Subject: [PATCH 509/771] [Matrix] Add test for running matrix lowering with
 -O0.

---
 .../Other/opt-O0-pipeline-enable-matrix.ll    | 24 +++++++++++++++++++
 1 file changed, 24 insertions(+)
 create mode 100644 llvm/test/Other/opt-O0-pipeline-enable-matrix.ll

diff --git a/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll
new file mode 100644
index 0000000000000..5e2d2726eb874
--- /dev/null
+++ b/llvm/test/Other/opt-O0-pipeline-enable-matrix.ll
@@ -0,0 +1,24 @@
+; RUN: opt -O0 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+; CHECK:      Pass Arguments:
+; CHECK-NEXT: Target Transform Information
+; CHECK-NEXT: Target Library Information
+; CHECK-NEXT: Assumption Cache Tracker
+; CHECK-NEXT:   FunctionPass Manager
+; CHECK-NEXT:     Module Verifier
+; CHECK-NEXT:     Instrument function entry/exit with calls to e.g. mcount() (pre inlining)
+; CHECK-NEXT:     Dominator Tree Construction
+; CHECK-NEXT:     Natural Loop Information
+; CHECK-NEXT:     Lazy Branch Probability Analysis
+; CHECK-NEXT:     Lazy Block Frequency Analysis
+; CHECK-NEXT:     Optimization Remark Emitter
+; CHECK-NEXT:     Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:     Function Alias Analysis Results
+; CHECK-NEXT:     Lower the matrix intrinsics
+
+
+define void @f() {
+  ret void
+}

From 199af46e50aa3576657bff58769571b0a5dc92df Mon Sep 17 00:00:00 2001
From: Michael Forster <forster@google.com>
Date: Thu, 16 Jul 2020 18:48:34 +0200
Subject: [PATCH 510/771] Add hashing support for std::tuple

Summary:
All tuple values are passed directly to hash_combine. This is inspired by the implementation used for Swift:

https://github.com/llvm/llvm-project-staging/commit/4a1b4edbe1d1969284c1528e2950ac81b25edc8f
https://github.com/llvm/llvm-project-staging/commit/845f3829b91522920a59c351b9011af01c5c7f87

Reviewers: gribozavr2

Reviewed By: gribozavr2

Subscribers: dexonsmith, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83887
---
 llvm/include/llvm/ADT/Hashing.h    | 25 +++++++++++++++++++++++++
 llvm/unittests/ADT/HashingTest.cpp | 11 +++++++++++
 2 files changed, 36 insertions(+)

diff --git a/llvm/include/llvm/ADT/Hashing.h b/llvm/include/llvm/ADT/Hashing.h
index 9ee310c879fd3..cb53b7fa74692 100644
--- a/llvm/include/llvm/ADT/Hashing.h
+++ b/llvm/include/llvm/ADT/Hashing.h
@@ -52,6 +52,7 @@
 #include <cassert>
 #include <cstring>
 #include <string>
+#include <tuple>
 #include <utility>
 
 namespace llvm {
@@ -112,6 +113,10 @@ template <typename T> hash_code hash_value(const T *ptr);
 template <typename T, typename U>
 hash_code hash_value(const std::pair<T, U> &arg);
 
+/// Compute a hash_code for a tuple.
+template <typename... Ts>
+hash_code hash_value(const std::tuple<Ts...> &arg);
+
 /// Compute a hash_code for a standard string.
 template <typename T>
 hash_code hash_value(const std::basic_string<T> &arg);
@@ -645,6 +650,26 @@ hash_code hash_value(const std::pair<T, U> &arg) {
   return hash_combine(arg.first, arg.second);
 }
 
+// Implementation details for the hash_value overload for std::tuple<...>(...).
+namespace hashing {
+namespace detail {
+
+template <typename... Ts, std::size_t... Indices>
+hash_code hash_value_tuple_helper(const std::tuple<Ts...> &arg,
+                                  std::index_sequence<Indices...> indices) {
+  return hash_combine(std::get<Indices>(arg)...);
+}
+
+} // namespace detail
+} // namespace hashing
+
+template <typename... Ts>
+hash_code hash_value(const std::tuple<Ts...> &arg) {
+  // TODO: Use std::apply when LLVM starts using C++17.
+  return ::llvm::hashing::detail::hash_value_tuple_helper(
+      arg, typename std::index_sequence_for<Ts...>());
+}
+
 // Declared and documented above, but defined here so that any of the hashing
 // infrastructure is available.
 template <typename T>
diff --git a/llvm/unittests/ADT/HashingTest.cpp b/llvm/unittests/ADT/HashingTest.cpp
index 66d63d437a466..d2cda3afdda0f 100644
--- a/llvm/unittests/ADT/HashingTest.cpp
+++ b/llvm/unittests/ADT/HashingTest.cpp
@@ -101,6 +101,17 @@ TEST(HashingTest, HashValueStdPair) {
             hash_value(std::make_pair(obj1, std::make_pair(obj2, obj3))));
 }
 
+TEST(HashingTest, HashValueStdTuple) {
+  EXPECT_EQ(hash_combine(), hash_value(std::make_tuple()));
+  EXPECT_EQ(hash_combine(42), hash_value(std::make_tuple(42)));
+  EXPECT_EQ(hash_combine(42, 'c'), hash_value(std::make_tuple(42, 'c')));
+
+  EXPECT_NE(hash_combine(43, 42), hash_value(std::make_tuple(42, 43)));
+  EXPECT_NE(hash_combine(42, 43), hash_value(std::make_tuple(42ull, 43ull)));
+  EXPECT_NE(hash_combine(42, 43), hash_value(std::make_tuple(42, 43ull)));
+  EXPECT_NE(hash_combine(42, 43), hash_value(std::make_tuple(42ull, 43)));
+}
+
 TEST(HashingTest, HashValueStdString) {
   std::string s = "Hello World!";
   EXPECT_EQ(hash_combine_range(s.c_str(), s.c_str() + s.size()), hash_value(s));

From 971dd3f1509661a09691368d66abaf1559e3c708 Mon Sep 17 00:00:00 2001
From: Jinsong Ji <jji@us.ibm.com>
Date: Thu, 16 Jul 2020 17:07:12 +0000
Subject: [PATCH 511/771] [docs][lldb] Fix lldb item in releasenotes

Reviewed By: JDevlieghere

Differential Revision: https://reviews.llvm.org/D83962
---
 llvm/docs/ReleaseNotes.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 39a5361c369e9..e234965aaa7c0 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -126,7 +126,7 @@ Changes to the LLVM tools
 During this release ...
 
 Changes to LLDB
-===============
+---------------------------------
 
 External Open Source Projects Using LLVM 12
 ===========================================

From 79f67cae91ed90ca52f528c80d8c131febe14241 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 14 Jul 2020 09:18:36 -0400
Subject: [PATCH 512/771] AMDGPU: Rename add/sub with carry out instructions

The hardware has created a real mess in the naming for add/sub, which
have been renamed basically every generation. Switch the carry out
pseudos to have the gfx9/gfx10 names. We were using the original SI/CI
v_add_i32/v_sub_i32 names. Later targets reintroduced these names as
carryless instructions with a saturating clamp bit, which we do not
define. Do this rename so we can unambiguously add these missing
instructions.

The carry-in versions should also be renamed, but at least those had a
consistent _u32 name to begin with. The 16-bit instructions were also
renamed, but aren't ambiguous.

This does regress assembler error message quality in some cases. In
mismatched wave32/wave64 situations, this will switch from
"unsupported instruction" to "invalid operand", with the error
pointing at the wrong position. I couldn't quite follow how the
assembler selects these, but the previous behavior seemed accidental
to me. It looked like there was a partial attempt to handle this which
was never completed (i.e. there is an AMDGPUOperand::isBoolReg but it
isn't used for anything).
---
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp |  10 +-
 .../AMDGPU/AMDGPUInstructionSelector.cpp      |   6 +-
 .../AMDGPU/AsmParser/AMDGPUAsmParser.cpp      |   4 +-
 llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp      |   8 +-
 llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp  |   4 +-
 llvm/lib/Target/AMDGPU/SIFoldOperands.cpp     |   6 +-
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp     |   8 +-
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  22 +-
 llvm/lib/Target/AMDGPU/SIInstructions.td      |   2 +-
 .../Target/AMDGPU/SILoadStoreOptimizer.cpp    |   6 +-
 llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp     |  20 +-
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |   2 +-
 llvm/lib/Target/AMDGPU/VOP2Instructions.td    | 111 ++++---
 .../AMDGPU/GlobalISel/inst-select-abs.mir     |   2 +-
 .../AMDGPU/GlobalISel/inst-select-add.mir     |  10 +-
 ...inst-select-amdgpu-atomic-cmpxchg-flat.mir |  42 +--
 ...st-select-amdgpu-atomic-cmpxchg-global.mir |  48 +--
 .../inst-select-atomic-cmpxchg-local.mir      |   2 +-
 .../inst-select-atomicrmw-add-flat.mir        | 132 ++++----
 .../inst-select-atomicrmw-add-global.mir      | 120 +++----
 .../inst-select-atomicrmw-xchg-local.mir      |   2 +-
 .../inst-select-load-atomic-flat.mir          |  18 +-
 .../inst-select-load-atomic-global.mir        |  30 +-
 .../inst-select-load-atomic-local.mir         |   2 +-
 .../GlobalISel/inst-select-load-flat.mir      | 270 ++++++++--------
 .../GlobalISel/inst-select-load-global.mir    | 300 +++++++++---------
 .../GlobalISel/inst-select-load-local.mir     |  12 +-
 .../GlobalISel/inst-select-load-private.mir   |  26 +-
 .../GlobalISel/inst-select-pattern-add3.mir   |  24 +-
 .../AMDGPU/GlobalISel/inst-select-ptr-add.mir |  98 +++---
 .../GlobalISel/inst-select-store-flat.mir     |  18 +-
 .../GlobalISel/inst-select-store-global.mir   |  12 +-
 .../GlobalISel/inst-select-store-local.mir    |   2 +-
 .../AMDGPU/GlobalISel/inst-select-sub.mir     |   6 +-
 .../AMDGPU/GlobalISel/inst-select-uaddo.mir   |  72 ++---
 .../AMDGPU/GlobalISel/inst-select-usubo.mir   |  72 ++---
 .../GlobalISel/llvm.amdgcn.raw.buffer.load.ll |   4 +-
 ...llvm.amdgcn.raw.buffer.store.format.f16.ll |   8 +-
 ...llvm.amdgcn.raw.buffer.store.format.f32.ll |   4 +-
 .../llvm.amdgcn.raw.buffer.store.ll           |   6 +-
 .../GlobalISel/llvm.amdgcn.s.buffer.load.ll   |  18 +-
 .../test/CodeGen/AMDGPU/carryout-selection.ll |  36 +--
 .../AMDGPU/cluster-flat-loads-postra.mir      |   2 +-
 .../coalescer-extend-pruned-subrange.mir      |   2 +-
 .../coalescing-with-subregs-in-loop-bug.mir   |   6 +-
 llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir  |   4 +-
 .../AMDGPU/constant-fold-imm-immreg.mir       |  10 +-
 llvm/test/CodeGen/AMDGPU/dpp_combine.mir      |  68 ++--
 llvm/test/CodeGen/AMDGPU/endpgm-dce.mir       |   4 +-
 llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir  |   2 +-
 .../CodeGen/AMDGPU/flat-load-clustering.mir   |   8 +-
 .../CodeGen/AMDGPU/fold-fi-operand-shrink.mir |  60 ++--
 ...ld-immediate-operand-shrink-with-carry.mir |  24 +-
 .../AMDGPU/fold-immediate-operand-shrink.mir  | 120 +++----
 .../AMDGPU/global-load-store-atomics.mir      |   2 +-
 .../CodeGen/AMDGPU/inserted-wait-states.mir   |  16 +-
 .../AMDGPU/macro-fusion-cluster-vcc-uses.mir  |  24 +-
 ...egalizer-multiple-mem-operands-atomics.mir |   2 +-
 ...er-multiple-mem-operands-nontemporal-1.mir |   2 +-
 ...er-multiple-mem-operands-nontemporal-2.mir |   2 +-
 .../CodeGen/AMDGPU/merge-load-store-vreg.mir  |   8 +-
 llvm/test/CodeGen/AMDGPU/merge-load-store.mir |   2 +-
 .../AMDGPU/mubuf-legalize-operands.mir        |   2 +-
 .../AMDGPU/pei-scavenge-sgpr-carry-out.mir    |   4 +-
 .../AMDGPU/pei-scavenge-vgpr-spill.mir        |   2 +-
 .../CodeGen/AMDGPU/phi-elimination-end-cf.mir |   2 +-
 .../promote-constOffset-to-imm-gfx10.mir      |  54 ++--
 .../AMDGPU/promote-constOffset-to-imm.mir     |  52 +--
 .../AMDGPU/regcoal-subrange-join-seg.mir      |   2 +-
 .../AMDGPU/s_add_co_pseudo_lowering.mir       |   6 +-
 ...ssert-dead-def-subreg-use-other-subreg.mir |   4 +-
 .../CodeGen/AMDGPU/sched-crash-dbg-value.mir  |  16 +-
 ...dleMoveUp-subreg-def-across-subreg-def.mir |   4 +-
 llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir        |   8 +-
 llvm/test/CodeGen/AMDGPU/sdwa-ops.mir         |  68 ++--
 llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir  |   8 +-
 .../CodeGen/AMDGPU/shrink-vop3-carry-out.mir  |  12 +-
 ...si-instr-info-correct-implicit-operands.ll |   4 +-
 .../AMDGPU/skip-branch-taildup-ret.mir        |   4 +-
 llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir |   2 +-
 .../CodeGen/AMDGPU/vmem-to-salu-hazard.mir    |   4 +-
 .../CodeGen/AMDGPU/vop-shrink-frame-index.mir |  24 +-
 .../CodeGen/AMDGPU/vop-shrink-non-ssa.mir     |   8 +-
 llvm/test/CodeGen/AMDGPU/wqm.mir              |   4 +-
 llvm/test/MC/AMDGPU/wave32.s                  |  24 +-
 85 files changed, 1155 insertions(+), 1136 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index aaf448346b533..cf65daa99c438 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -994,7 +994,7 @@ void AMDGPUDAGToDAGISel::SelectADD_SUB_I64(SDNode *N) {
 
   static const unsigned OpcMap[2][2][2] = {
       {{AMDGPU::S_SUB_U32, AMDGPU::S_ADD_U32},
-       {AMDGPU::V_SUB_I32_e32, AMDGPU::V_ADD_I32_e32}},
+       {AMDGPU::V_SUB_CO_U32_e32, AMDGPU::V_ADD_CO_U32_e32}},
       {{AMDGPU::S_SUBB_U32, AMDGPU::S_ADDC_U32},
        {AMDGPU::V_SUBB_U32_e32, AMDGPU::V_ADDC_U32_e32}}};
 
@@ -1073,7 +1073,7 @@ void AMDGPUDAGToDAGISel::SelectUADDO_USUBO(SDNode *N) {
     }
 
   if (IsVALU) {
-    unsigned Opc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+    unsigned Opc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
 
     CurDAG->SelectNodeTo(
         N, Opc, N->getVTList(),
@@ -1190,7 +1190,7 @@ bool AMDGPUDAGToDAGISel::SelectDS1Addr1Offset(SDValue Addr, SDValue &Base,
           Opnds.push_back(Addr.getOperand(1));
 
           // FIXME: Select to VOP3 version for with-carry.
-          unsigned SubOp = AMDGPU::V_SUB_I32_e32;
+          unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
           if (Subtarget->hasAddNoCarry()) {
             SubOp = AMDGPU::V_SUB_U32_e64;
             Opnds.push_back(
@@ -1269,7 +1269,7 @@ bool AMDGPUDAGToDAGISel::SelectDS64Bit4ByteAligned(SDValue Addr, SDValue &Base,
           SmallVector<SDValue, 3> Opnds;
           Opnds.push_back(Zero);
           Opnds.push_back(Addr.getOperand(1));
-          unsigned SubOp = AMDGPU::V_SUB_I32_e32;
+          unsigned SubOp = AMDGPU::V_SUB_CO_U32_e32;
           if (Subtarget->hasAddNoCarry()) {
             SubOp = AMDGPU::V_SUB_U32_e64;
             Opnds.push_back(
@@ -1739,7 +1739,7 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
         SDValue Clamp = CurDAG->getTargetConstant(0, DL, MVT::i1);
 
         SDNode *Add =
-            CurDAG->getMachineNode(AMDGPU::V_ADD_I32_e64, DL, VTs,
+            CurDAG->getMachineNode(AMDGPU::V_ADD_CO_U32_e64, DL, VTs,
                                    {AddOffsetLo, SDValue(N0Lo, 0), Clamp});
 
         SDNode *Addc = CurDAG->getMachineNode(
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
index 4fb9c053fe89d..74e6f0c438b2d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp
@@ -365,7 +365,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
       return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
     }
 
-    const unsigned Opc = Sub ? AMDGPU::V_SUB_I32_e64 : AMDGPU::V_ADD_I32_e64;
+    const unsigned Opc = Sub ? AMDGPU::V_SUB_CO_U32_e64 : AMDGPU::V_ADD_CO_U32_e64;
 
     Register UnusedCarry = MRI->createVirtualRegister(TRI.getWaveMaskRegClass());
     MachineInstr *Add
@@ -403,7 +403,7 @@ bool AMDGPUInstructionSelector::selectG_ADD_SUB(MachineInstr &I) const {
   } else {
     const TargetRegisterClass *CarryRC = TRI.getWaveMaskRegClass();
     Register CarryReg = MRI->createVirtualRegister(CarryRC);
-    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_I32_e64), DstLo)
+    BuildMI(*BB, &I, DL, TII.get(AMDGPU::V_ADD_CO_U32_e64), DstLo)
       .addDef(CarryReg)
       .add(Lo1)
       .add(Lo2)
@@ -449,7 +449,7 @@ bool AMDGPUInstructionSelector::selectG_UADDO_USUBO_UADDE_USUBE(
       // The name of the opcodes are misleading. v_add_i32/v_sub_i32 have unsigned
       // carry out despite the _i32 name. These were renamed in VI to _U32.
       // FIXME: We should probably rename the opcodes here.
-    unsigned NoCarryOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+    unsigned NoCarryOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
     unsigned CarryOpc = IsAdd ? AMDGPU::V_ADDC_U32_e64 : AMDGPU::V_SUBB_U32_e64;
     I.setDesc(TII.get(HasCarryIn ? CarryOpc : NoCarryOpc));
     I.addOperand(*MF, MachineOperand::CreateReg(AMDGPU::EXEC, false, true));
diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
index 57f3546f4da5b..9f3a6ffc35e6f 100644
--- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
+++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp
@@ -3242,8 +3242,8 @@ static bool IsRevOpcode(const unsigned Opcode)
   case AMDGPU::V_SUBREV_F32_e64_gfx6_gfx7:
   case AMDGPU::V_SUBREV_F32_e64_vi:
 
-  case AMDGPU::V_SUBREV_I32_e32:
-  case AMDGPU::V_SUBREV_I32_e64:
+  case AMDGPU::V_SUBREV_CO_U32_e32:
+  case AMDGPU::V_SUBREV_CO_U32_e64:
   case AMDGPU::V_SUBREV_I32_e32_gfx6_gfx7:
   case AMDGPU::V_SUBREV_I32_e64_gfx6_gfx7:
 
diff --git a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
index 719a968b83147..10a74bf4c2f7d 100644
--- a/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNDPPCombine.cpp
@@ -274,14 +274,14 @@ static bool isIdentityValue(unsigned OrigMIOp, MachineOperand *OldOpnd) {
   default: break;
   case AMDGPU::V_ADD_U32_e32:
   case AMDGPU::V_ADD_U32_e64:
-  case AMDGPU::V_ADD_I32_e32:
-  case AMDGPU::V_ADD_I32_e64:
+  case AMDGPU::V_ADD_CO_U32_e32:
+  case AMDGPU::V_ADD_CO_U32_e64:
   case AMDGPU::V_OR_B32_e32:
   case AMDGPU::V_OR_B32_e64:
   case AMDGPU::V_SUBREV_U32_e32:
   case AMDGPU::V_SUBREV_U32_e64:
-  case AMDGPU::V_SUBREV_I32_e32:
-  case AMDGPU::V_SUBREV_I32_e64:
+  case AMDGPU::V_SUBREV_CO_U32_e32:
+  case AMDGPU::V_SUBREV_CO_U32_e64:
   case AMDGPU::V_MAX_U32_e32:
   case AMDGPU::V_MAX_U32_e64:
   case AMDGPU::V_XOR_B32_e32:
diff --git a/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp b/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
index 8e3402b537b3b..abde092c7b84a 100644
--- a/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFixupVectorISel.cpp
@@ -13,7 +13,7 @@
 /// and decompose it into a base and index.
 ///
 /// Transform:
-/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21:sgpr_32, %22:vgpr_32
+/// %17:vgpr_32, %19:sreg_64_xexec = V_ADD_CO_U32_e64 %21:sgpr_32, %22:vgpr_32
 /// %18:vgpr_32, %20:sreg_64_xexec = V_ADDC_U32_e64 %25:vgpr_32,
 ///                                    %24:vgpr_32, %19:sreg_64_xexec
 /// %16:vreg_64 = REG_SEQUENCE %17:vgpr_32, %sub0, %18:vgpr_32, %sub1
@@ -106,7 +106,7 @@ static bool findSRegBaseAndIndex(MachineOperand *Op,
       Worklist.push_back(&DefInst->getOperand(1));
       Worklist.push_back(&DefInst->getOperand(3));
       break;
-    case AMDGPU::V_ADD_I32_e64:
+    case AMDGPU::V_ADD_CO_U32_e64:
       // The V_ADD_* and its analogous V_ADDCV_* are generated by
       // a previous pass which lowered from an ADD_64_PSEUDO,
       // which generates subregs to break up the 64 bit args.
diff --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index ffcf4c30bc70d..0986e1efb9840 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -399,9 +399,9 @@ static bool tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
       return false;
 
     if (!TII->isOperandLegal(*MI, CommuteOpNo, OpToFold)) {
-      if ((Opc == AMDGPU::V_ADD_I32_e64 ||
-           Opc == AMDGPU::V_SUB_I32_e64 ||
-           Opc == AMDGPU::V_SUBREV_I32_e64) && // FIXME
+      if ((Opc == AMDGPU::V_ADD_CO_U32_e64 ||
+           Opc == AMDGPU::V_SUB_CO_U32_e64 ||
+           Opc == AMDGPU::V_SUBREV_CO_U32_e64) && // FIXME
           (OpToFold->isImm() || OpToFold->isFI() || OpToFold->isGlobal())) {
         MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo();
 
diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index d035aa8f72bd7..e22e526d343e3 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -3849,7 +3849,7 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     MachineOperand SrcReg1Sub1 = TII->buildExtractSubRegOrImm(
         MI, MRI, Src1, Src1RC, AMDGPU::sub1, Src1SubRC);
 
-    unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+    unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
     MachineInstr *LoHalf = BuildMI(*BB, MI, DL, TII->get(LoOpc), DestSub0)
                                .addReg(CarryReg, RegState::Define)
                                .add(SrcReg0Sub0)
@@ -4111,9 +4111,9 @@ MachineBasicBlock *SITargetLowering::EmitInstrWithCustomInserter(
     MI.eraseFromParent();
     return BB;
   }
-  case AMDGPU::V_ADD_I32_e32:
-  case AMDGPU::V_SUB_I32_e32:
-  case AMDGPU::V_SUBREV_I32_e32: {
+  case AMDGPU::V_ADD_CO_U32_e32:
+  case AMDGPU::V_SUB_CO_U32_e32:
+  case AMDGPU::V_SUBREV_CO_U32_e32: {
     // TODO: Define distinct V_*_I32_Pseudo instructions instead.
     const DebugLoc &DL = MI.getDebugLoc();
     unsigned Opc = MI.getOpcode();
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 9af8ffedce0f3..04a808cad69e7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -4079,17 +4079,17 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
            AMDGPU::COPY : AMDGPU::V_MOV_B32_e32;
   }
   case AMDGPU::S_ADD_I32:
-    return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_I32_e32;
+    return ST.hasAddNoCarry() ? AMDGPU::V_ADD_U32_e64 : AMDGPU::V_ADD_CO_U32_e32;
   case AMDGPU::S_ADDC_U32:
     return AMDGPU::V_ADDC_U32_e32;
   case AMDGPU::S_SUB_I32:
-    return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_I32_e32;
+    return ST.hasAddNoCarry() ? AMDGPU::V_SUB_U32_e64 : AMDGPU::V_SUB_CO_U32_e32;
     // FIXME: These are not consistently handled, and selected when the carry is
     // used.
   case AMDGPU::S_ADD_U32:
-    return AMDGPU::V_ADD_I32_e32;
+    return AMDGPU::V_ADD_CO_U32_e32;
   case AMDGPU::S_SUB_U32:
-    return AMDGPU::V_SUB_I32_e32;
+    return AMDGPU::V_SUB_CO_U32_e32;
   case AMDGPU::S_SUBB_U32: return AMDGPU::V_SUBB_U32_e32;
   case AMDGPU::S_MUL_I32: return AMDGPU::V_MUL_LO_U32;
   case AMDGPU::S_MUL_HI_U32: return AMDGPU::V_MUL_HI_U32;
@@ -5046,7 +5046,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
 
       // NewVaddrLo = RsrcPtr:sub0 + VAddr:sub0
       const DebugLoc &DL = MI.getDebugLoc();
-      BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_I32_e64), NewVAddrLo)
+      BuildMI(MBB, MI, DL, get(AMDGPU::V_ADD_CO_U32_e64), NewVAddrLo)
         .addDef(CondReg0)
         .addReg(RsrcPtr, 0, AMDGPU::sub0)
         .addReg(VAddr->getReg(), 0, AMDGPU::sub0)
@@ -5376,8 +5376,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst,
       MachineOperand &Src1 = Inst.getOperand(3);
 
       unsigned Opc = (Inst.getOpcode() == AMDGPU::S_UADDO_PSEUDO)
-                         ? AMDGPU::V_ADD_I32_e64
-                         : AMDGPU::V_SUB_I32_e64;
+                         ? AMDGPU::V_ADD_CO_U32_e64
+                         : AMDGPU::V_SUB_CO_U32_e64;
       const TargetRegisterClass *NewRC =
           RI.getEquivalentVGPRClass(MRI.getRegClass(Dest0.getReg()));
       Register DestReg = MRI.createVirtualRegister(NewRC);
@@ -5626,7 +5626,7 @@ void SIInstrInfo::lowerScalarAbs(SetVectorType &Worklist,
   Register ResultReg = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
 
   unsigned SubOp = ST.hasAddNoCarry() ?
-    AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_I32_e32;
+    AMDGPU::V_SUB_U32_e32 : AMDGPU::V_SUB_CO_U32_e32;
 
   BuildMI(MBB, MII, DL, get(SubOp), TmpReg)
     .addImm(0)
@@ -5855,7 +5855,7 @@ void SIInstrInfo::splitScalar64BitAddSub(SetVectorType &Worklist,
   MachineOperand SrcReg1Sub1 = buildExtractSubRegOrImm(MII, MRI, Src1, Src1RC,
                                                        AMDGPU::sub1, Src1SubRC);
 
-  unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_I32_e64 : AMDGPU::V_SUB_I32_e64;
+  unsigned LoOpc = IsAdd ? AMDGPU::V_ADD_CO_U32_e64 : AMDGPU::V_SUB_CO_U32_e64;
   MachineInstr *LoHalf =
     BuildMI(MBB, MII, DL, get(LoOpc), DestSub0)
     .addReg(CarryReg, RegState::Define)
@@ -6716,7 +6716,7 @@ SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
   Register UnusedCarry = MRI.createVirtualRegister(RI.getBoolRC());
   MRI.setRegAllocationHint(UnusedCarry, 0, RI.getVCC());
 
-  return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
+  return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg)
            .addReg(UnusedCarry, RegState::Define | RegState::Dead);
 }
 
@@ -6737,7 +6737,7 @@ MachineInstrBuilder SIInstrInfo::getAddNoCarry(MachineBasicBlock &MBB,
   if (!UnusedCarry.isValid())
     return MachineInstrBuilder();
 
-  return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_I32_e64), DestReg)
+  return BuildMI(MBB, I, DL, get(AMDGPU::V_ADD_CO_U32_e64), DestReg)
            .addReg(UnusedCarry, RegState::Define | RegState::Dead);
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 0c4c9e0e9df2b..c0a7116de772b 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -2213,7 +2213,7 @@ def : GCNPat<
 
 def : GCNPat<
   (add i32:$src0, (i32 NegSubInlineConst32:$src1)),
-  (V_SUB_I32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
+  (V_SUB_CO_U32_e64 VS_32:$src0, NegSubInlineConst32:$src1)> {
   let SubtargetPredicate = NotHasAddNoCarryInsts;
 }
 
diff --git a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
index 2eb1c52f1b595..110d82412c280 100644
--- a/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
+++ b/llvm/lib/Target/AMDGPU/SILoadStoreOptimizer.cpp
@@ -1667,7 +1667,7 @@ Register SILoadStoreOptimizer::computeBase(MachineInstr &MI,
   Register DestSub0 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   Register DestSub1 = MRI->createVirtualRegister(&AMDGPU::VGPR_32RegClass);
   MachineInstr *LoHalf =
-    BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_I32_e64), DestSub0)
+    BuildMI(*MBB, MBBI, DL, TII->get(AMDGPU::V_ADD_CO_U32_e64), DestSub0)
       .addReg(CarryReg, RegState::Define)
       .addReg(Addr.Base.LoReg, 0, Addr.Base.LoSubReg)
       .add(OffsetLo)
@@ -1730,7 +1730,7 @@ SILoadStoreOptimizer::extractConstOffset(const MachineOperand &Op) const {
 // Expecting base computation as:
 //   %OFFSET0:sgpr_32 = S_MOV_B32 8000
 //   %LO:vgpr_32, %c:sreg_64_xexec =
-//       V_ADD_I32_e64 %BASE_LO:vgpr_32, %103:sgpr_32,
+//       V_ADD_CO_U32_e64 %BASE_LO:vgpr_32, %103:sgpr_32,
 //   %HI:vgpr_32, = V_ADDC_U32_e64 %BASE_HI:vgpr_32, 0, killed %c:sreg_64_xexec
 //   %Base:vreg_64 =
 //       REG_SEQUENCE %LO:vgpr_32, %subreg.sub0, %HI:vgpr_32, %subreg.sub1
@@ -1752,7 +1752,7 @@ void SILoadStoreOptimizer::processBaseWithConstOffset(const MachineOperand &Base
   MachineInstr *BaseLoDef = MRI->getUniqueVRegDef(BaseLo.getReg());
   MachineInstr *BaseHiDef = MRI->getUniqueVRegDef(BaseHi.getReg());
 
-  if (!BaseLoDef || BaseLoDef->getOpcode() != AMDGPU::V_ADD_I32_e64 ||
+  if (!BaseLoDef || BaseLoDef->getOpcode() != AMDGPU::V_ADD_CO_U32_e64 ||
       !BaseHiDef || BaseHiDef->getOpcode() != AMDGPU::V_ADDC_U32_e64)
     return;
 
diff --git a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
index 9a1855c3458be..87bacc5880ac8 100644
--- a/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPeepholeSDWA.cpp
@@ -10,11 +10,11 @@
 ///
 /// E.g. original:
 ///   V_LSHRREV_B32_e32 %0, 16, %1
-///   V_ADD_I32_e32 %2, %0, %3
+///   V_ADD_CO_U32_e32 %2, %0, %3
 ///   V_LSHLREV_B32_e32 %4, 16, %2
 ///
 /// Replace:
-///   V_ADD_I32_sdwa %4, %1, %3
+///   V_ADD_CO_U32_sdwa %4, %1, %3
 ///       dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ///
 //===----------------------------------------------------------------------===//
@@ -863,19 +863,19 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
 }
 
 // Convert the V_ADDC_U32_e64 into V_ADDC_U32_e32, and
-// V_ADD_I32_e64 into V_ADD_I32_e32. This allows isConvertibleToSDWA
-// to perform its transformation on V_ADD_I32_e32 into V_ADD_I32_sdwa.
+// V_ADD_CO_U32_e64 into V_ADD_CO_U32_e32. This allows isConvertibleToSDWA
+// to perform its transformation on V_ADD_CO_U32_e32 into V_ADD_CO_U32_sdwa.
 //
 // We are transforming from a VOP3 into a VOP2 form of the instruction.
 //   %19:vgpr_32 = V_AND_B32_e32 255,
 //       killed %16:vgpr_32, implicit $exec
-//   %47:vgpr_32, %49:sreg_64_xexec = V_ADD_I32_e64
+//   %47:vgpr_32, %49:sreg_64_xexec = V_ADD_CO_U32_e64
 //       %26.sub0:vreg_64, %19:vgpr_32, implicit $exec
 //  %48:vgpr_32, dead %50:sreg_64_xexec = V_ADDC_U32_e64
 //       %26.sub1:vreg_64, %54:vgpr_32, killed %49:sreg_64_xexec, implicit $exec
 //
 // becomes
-//   %47:vgpr_32 = V_ADD_I32_sdwa
+//   %47:vgpr_32 = V_ADD_CO_U32_sdwa
 //       0, %26.sub0:vreg_64, 0, killed %16:vgpr_32, 0, 6, 0, 6, 0,
 //       implicit-def $vcc, implicit $exec
 //  %48:vgpr_32 = V_ADDC_U32_e32
@@ -883,8 +883,8 @@ void SIPeepholeSDWA::matchSDWAOperands(MachineBasicBlock &MBB) {
 void SIPeepholeSDWA::pseudoOpConvertToVOP2(MachineInstr &MI,
                                            const GCNSubtarget &ST) const {
   int Opc = MI.getOpcode();
-  assert((Opc == AMDGPU::V_ADD_I32_e64 || Opc == AMDGPU::V_SUB_I32_e64) &&
-         "Currently only handles V_ADD_I32_e64 or V_SUB_I32_e64");
+  assert((Opc == AMDGPU::V_ADD_CO_U32_e64 || Opc == AMDGPU::V_SUB_CO_U32_e64) &&
+         "Currently only handles V_ADD_CO_U32_e64 or V_SUB_CO_U32_e64");
 
   // Can the candidate MI be shrunk?
   if (!TII->canShrink(MI, *MRI))
@@ -1235,8 +1235,8 @@ bool SIPeepholeSDWA::runOnMachineFunction(MachineFunction &MF) {
         const auto &Operand = OperandPair.second;
         MachineInstr *PotentialMI = Operand->potentialToConvert(TII);
         if (PotentialMI &&
-           (PotentialMI->getOpcode() == AMDGPU::V_ADD_I32_e64 ||
-            PotentialMI->getOpcode() == AMDGPU::V_SUB_I32_e64))
+           (PotentialMI->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 ||
+            PotentialMI->getOpcode() == AMDGPU::V_SUB_CO_U32_e64))
           pseudoOpConvertToVOP2(*PotentialMI, ST);
       }
       SDWAOperands.clear();
diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 5d6009ebf3843..956658296a94a 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1356,7 +1356,7 @@ void SIRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MI,
               if (!IsVOP2)
                 MIB.addImm(0); // clamp bit
             } else {
-              assert(MIB->getOpcode() == AMDGPU::V_ADD_I32_e64 &&
+              assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
                      "Need to reuse carry out register");
 
               // Use scavenged unused carry out as offset register.
diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index aa37dbf1418f9..55b64c4e614c4 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -240,12 +240,16 @@ multiclass VOP2eInst <string opName,
   }
 }
 
-class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd> :
+class VOP2eInstAlias <VOP2_Pseudo ps, Instruction inst, string opnd = ""> :
   InstAlias <ps.OpName#" "#ps.Pfl.Asm32#", "#opnd,
              (inst ps.Pfl.DstRC:$vdst, ps.Pfl.Src0RC32:$src0,
-                   ps.Pfl.Src1RC32:$src1)>,
-  PredicateControl {
-}
+                   ps.Pfl.Src1RC32:$src1)>, PredicateControl;
+
+class VOP2e64InstAlias <VOP3_Pseudo ps, Instruction inst> :
+  InstAlias <ps.OpName#" "#ps.Pfl.Asm64,
+             (inst ps.Pfl.DstRC:$vdst, VOPDstS64orS32:$sdst,
+                   ps.Pfl.Src0RC32:$src0, ps.Pfl.Src1RC32:$src1, clampmod:$clamp)>,
+  PredicateControl;
 
 multiclass VOP2eInstAliases<VOP2_Pseudo ps, VOP2_Real inst> {
   let WaveSizePredicate = isWave32 in {
@@ -502,12 +506,9 @@ def V_MADAK_F32 : VOP2_Pseudo <"v_madak_f32", VOP_MADAK_F32, []>;
 
 // No patterns so that the scalar instructions are always selected.
 // The scalar versions will be replaced with vector when needed later.
-
-// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in VI,
-// but the VI instructions behave the same as the SI versions.
-defm V_ADD_I32 : VOP2bInst <"v_add_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_i32", 1>;
-defm V_SUB_I32 : VOP2bInst <"v_sub_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>;
-defm V_SUBREV_I32 : VOP2bInst <"v_subrev_i32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_i32", 1>;
+defm V_ADD_CO_U32 : VOP2bInst <"v_add_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_add_co_u32", 1>;
+defm V_SUB_CO_U32 : VOP2bInst <"v_sub_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>;
+defm V_SUBREV_CO_U32 : VOP2bInst <"v_subrev_co_u32", VOP2b_I32_I1_I32_I32, null_frag, "v_sub_co_u32", 1>;
 defm V_ADDC_U32 : VOP2bInst <"v_addc_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_addc_u32", 1>;
 defm V_SUBB_U32 : VOP2bInst <"v_subb_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
 defm V_SUBBREV_U32 : VOP2bInst <"v_subbrev_u32", VOP2b_I32_I1_I32_I32_I1, null_frag, "v_subb_u32", 1>;
@@ -595,8 +596,8 @@ let SubtargetPredicate = HasAddNoCarryInsts in {
 }
 
 let SubtargetPredicate = isGFX6GFX7GFX8GFX9, Predicates = [isGFX6GFX7GFX8GFX9] in {
-def : DivergentClampingBinOp<add, V_ADD_I32_e64>;
-def : DivergentClampingBinOp<sub, V_SUB_I32_e64>;
+def : DivergentClampingBinOp<add, V_ADD_CO_U32_e64>;
+def : DivergentClampingBinOp<sub, V_SUB_CO_U32_e64>;
 }
 
 def : DivergentBinOp<adde, V_ADDC_U32_e32>;
@@ -1090,13 +1091,10 @@ let AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10" in {
   }
 
   //===---------------------------- VOP3beOnly ----------------------------===//
-  multiclass VOP3beOnly_Real_gfx10<bits<10> op, string opName, string asmName> {
+  multiclass VOP3beOnly_Real_gfx10<bits<10> op> {
     def _e64_gfx10 :
-      VOP3_Real<!cast<VOP3_Pseudo>(opName#"_e64"), SIEncodingFamily.GFX10>,
-      VOP3be_gfx10<op, !cast<VOP3_Pseudo>(opName#"_e64").Pfl> {
-        VOP3_Pseudo Ps = !cast<VOP3_Pseudo>(opName#"_e64");
-        let AsmString = asmName # Ps.AsmOperands;
-      }
+      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.GFX10>,
+      VOP3be_gfx10<op, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
   }
 } // End AssemblerPredicate = isGFX10Plus, DecoderNamespace = "GFX10"
 
@@ -1172,13 +1170,10 @@ defm V_CVT_PKNORM_U16_F32 : VOP3Only_Real_gfx10<0x369>;
 defm V_CVT_PK_U16_U32     : VOP3Only_Real_gfx10<0x36a>;
 defm V_CVT_PK_I16_I32     : VOP3Only_Real_gfx10<0x36b>;
 
-// VOP3 carry-in, carry-out.
-defm V_ADD_CO_U32 :
-  VOP3beOnly_Real_gfx10<0x30f, "V_ADD_I32", "v_add_co_u32">;
-defm V_SUB_CO_U32 :
-  VOP3beOnly_Real_gfx10<0x310, "V_SUB_I32", "v_sub_co_u32">;
-defm V_SUBREV_CO_U32 :
-  VOP3beOnly_Real_gfx10<0x319, "V_SUBREV_I32", "v_subrev_co_u32">;
+// VOP3 carry-out.
+defm V_ADD_CO_U32 : VOP3beOnly_Real_gfx10<0x30f>;
+defm V_SUB_CO_U32 : VOP3beOnly_Real_gfx10<0x310>;
+defm V_SUBREV_CO_U32 : VOP3beOnly_Real_gfx10<0x319>;
 
 let SubtargetPredicate = isGFX10Plus in {
   defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx10>;
@@ -1217,20 +1212,20 @@ let AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7" in {
       VOP2_Real<!cast<VOP2_Pseudo>(NAME), SIEncodingFamily.SI>,
       VOP2_MADKe<op{5-0}, !cast<VOP2_Pseudo>(NAME).Pfl>;
   }
-  multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op> {
+  multiclass VOP2_Real_e32_gfx6_gfx7<bits<6> op, string PseudoName = NAME> {
     def _e32_gfx6_gfx7 :
-      VOP2_Real<!cast<VOP2_Pseudo>(NAME#"_e32"), SIEncodingFamily.SI>,
-      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(NAME#"_e32").Pfl>;
+      VOP2_Real<!cast<VOP2_Pseudo>(PseudoName#"_e32"), SIEncodingFamily.SI>,
+      VOP2e<op{5-0}, !cast<VOP2_Pseudo>(PseudoName#"_e32").Pfl>;
   }
-  multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op> {
+  multiclass VOP2_Real_e64_gfx6_gfx7<bits<6> op, string PseudoName = NAME> {
     def _e64_gfx6_gfx7 :
-      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
-      VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+      VOP3_Real<!cast<VOP3_Pseudo>(PseudoName#"_e64"), SIEncodingFamily.SI>,
+      VOP3e_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(PseudoName#"_e64").Pfl>;
   }
-  multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op> {
+  multiclass VOP2be_Real_e64_gfx6_gfx7<bits<6> op, string PseudoName = NAME> {
     def _e64_gfx6_gfx7 :
-      VOP3_Real<!cast<VOP3_Pseudo>(NAME#"_e64"), SIEncodingFamily.SI>,
-      VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(NAME#"_e64").Pfl>;
+      VOP3_Real<!cast<VOP3_Pseudo>(PseudoName#"_e64"), SIEncodingFamily.SI>,
+      VOP3be_gfx6_gfx7<{1, 0, 0, op{5-0}}, !cast<VOP3_Pseudo>(PseudoName#"_e64").Pfl>;
   }
 } // End AssemblerPredicate = isGFX6GFX7, DecoderNamespace = "GFX6GFX7"
 
@@ -1246,6 +1241,20 @@ multiclass VOP2_Real_gfx6_gfx7_gfx10<bits<6> op> :
 multiclass VOP2be_Real_gfx6_gfx7<bits<6> op> :
   VOP2_Real_e32_gfx6_gfx7<op>, VOP2be_Real_e64_gfx6_gfx7<op>;
 
+multiclass VOP2be_Real_gfx6_gfx7_with_name<bits<6> op,
+  string PseudoName, string asmName>  {
+  defvar ps32 = !cast<VOP2_Pseudo>(PseudoName#"_e32");
+  defvar ps64 = !cast<VOP3_Pseudo>(PseudoName#"_e64");
+
+  let AsmString = asmName # ps32.AsmOperands in {
+    defm "" : VOP2_Real_e32_gfx6_gfx7<op, PseudoName>;
+  }
+
+   let AsmString = asmName # ps64.AsmOperands in {
+    defm "" : VOP2be_Real_e64_gfx6_gfx7<op, PseudoName>;
+  }
+}
+
 defm V_CNDMASK_B32        : VOP2_Real_gfx6_gfx7<0x000>;
 defm V_MIN_LEGACY_F32     : VOP2_Real_gfx6_gfx7<0x00d>;
 defm V_MAX_LEGACY_F32     : VOP2_Real_gfx6_gfx7<0x00e>;
@@ -1262,9 +1271,12 @@ defm V_CVT_PKNORM_I16_F32 : VOP2_Real_gfx6_gfx7<0x02d>;
 defm V_CVT_PKNORM_U16_F32 : VOP2_Real_gfx6_gfx7<0x02e>;
 defm V_CVT_PK_U16_U32     : VOP2_Real_gfx6_gfx7<0x030>;
 defm V_CVT_PK_I16_I32     : VOP2_Real_gfx6_gfx7<0x031>;
-defm V_ADD_I32            : VOP2be_Real_gfx6_gfx7<0x025>;
-defm V_SUB_I32            : VOP2be_Real_gfx6_gfx7<0x026>;
-defm V_SUBREV_I32         : VOP2be_Real_gfx6_gfx7<0x027>;
+
+// V_ADD_I32, V_SUB_I32, and V_SUBREV_I32 where renamed to *_U32 in
+// VI, but the VI instructions behave the same as the SI versions.
+defm V_ADD_I32            : VOP2be_Real_gfx6_gfx7_with_name<0x025, "V_ADD_CO_U32", "v_add_i32">;
+defm V_SUB_I32            : VOP2be_Real_gfx6_gfx7_with_name<0x026, "V_SUB_CO_U32", "v_sub_i32">;
+defm V_SUBREV_I32         : VOP2be_Real_gfx6_gfx7_with_name<0x027, "V_SUBREV_CO_U32", "v_subrev_i32">;
 defm V_ADDC_U32           : VOP2be_Real_gfx6_gfx7<0x028>;
 defm V_SUBB_U32           : VOP2be_Real_gfx6_gfx7<0x029>;
 defm V_SUBBREV_U32        : VOP2be_Real_gfx6_gfx7<0x02a>;
@@ -1277,6 +1289,13 @@ let InOperandList = (ins SSrcOrLds_b32:$src0, SCSrc_b32:$src1, VGPR_32:$vdst_in)
 
 let SubtargetPredicate = isGFX6GFX7 in {
   defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_gfx6_gfx7>;
+  defm : VOP2eInstAliases<V_ADD_CO_U32_e32, V_ADD_I32_e32_gfx6_gfx7>;
+  defm : VOP2eInstAliases<V_SUB_CO_U32_e32, V_SUB_I32_e32_gfx6_gfx7>;
+  defm : VOP2eInstAliases<V_SUBREV_CO_U32_e32, V_SUBREV_I32_e32_gfx6_gfx7>;
+
+  def : VOP2e64InstAlias<V_ADD_CO_U32_e64, V_ADD_I32_e64_gfx6_gfx7>;
+  def : VOP2e64InstAlias<V_SUB_CO_U32_e64, V_SUB_I32_e64_gfx6_gfx7>;
+  def : VOP2e64InstAlias<V_SUBREV_CO_U32_e64, V_SUBREV_I32_e64_gfx6_gfx7>;
 } // End SubtargetPredicate = isGFX6GFX7
 
 defm V_ADD_F32            : VOP2_Real_gfx6_gfx7_gfx10<0x003>;
@@ -1490,16 +1509,16 @@ defm V_MAC_F32            : VOP2_Real_e32e64_vi <0x16>;
 defm V_MADMK_F32          : VOP2_Real_MADK_vi <0x17>;
 defm V_MADAK_F32          : VOP2_Real_MADK_vi <0x18>;
 
-defm V_ADD_U32            : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_I32",     "v_add_u32">;
-defm V_SUB_U32            : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_I32",     "v_sub_u32">;
-defm V_SUBREV_U32         : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_I32",  "v_subrev_u32">;
+defm V_ADD_U32            : VOP2be_Real_e32e64_vi_only <0x19, "V_ADD_CO_U32",     "v_add_u32">;
+defm V_SUB_U32            : VOP2be_Real_e32e64_vi_only <0x1a, "V_SUB_CO_U32",     "v_sub_u32">;
+defm V_SUBREV_U32         : VOP2be_Real_e32e64_vi_only <0x1b, "V_SUBREV_CO_U32",  "v_subrev_u32">;
 defm V_ADDC_U32           : VOP2be_Real_e32e64_vi_only <0x1c, "V_ADDC_U32",    "v_addc_u32">;
 defm V_SUBB_U32           : VOP2be_Real_e32e64_vi_only <0x1d, "V_SUBB_U32",    "v_subb_u32">;
 defm V_SUBBREV_U32        : VOP2be_Real_e32e64_vi_only <0x1e, "V_SUBBREV_U32", "v_subbrev_u32">;
 
-defm V_ADD_CO_U32         : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_I32",     "v_add_co_u32">;
-defm V_SUB_CO_U32         : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_I32",     "v_sub_co_u32">;
-defm V_SUBREV_CO_U32      : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_I32",  "v_subrev_co_u32">;
+defm V_ADD_CO_U32         : VOP2be_Real_e32e64_gfx9 <0x19, "V_ADD_CO_U32",     "v_add_co_u32">;
+defm V_SUB_CO_U32         : VOP2be_Real_e32e64_gfx9 <0x1a, "V_SUB_CO_U32",     "v_sub_co_u32">;
+defm V_SUBREV_CO_U32      : VOP2be_Real_e32e64_gfx9 <0x1b, "V_SUBREV_CO_U32",  "v_subrev_co_u32">;
 defm V_ADDC_CO_U32        : VOP2be_Real_e32e64_gfx9 <0x1c, "V_ADDC_U32",    "v_addc_co_u32">;
 defm V_SUBB_CO_U32        : VOP2be_Real_e32e64_gfx9 <0x1d, "V_SUBB_U32",    "v_subb_co_u32">;
 defm V_SUBBREV_CO_U32     : VOP2be_Real_e32e64_gfx9 <0x1e, "V_SUBBREV_U32", "v_subbrev_co_u32">;
@@ -1568,11 +1587,11 @@ defm : VOP2eInstAliases<V_CNDMASK_B32_e32, V_CNDMASK_B32_e32_vi>;
 
 let SubtargetPredicate = isGFX9Only in {
 
-defm : VOP2bInstAliases<V_ADD_I32_e32,     V_ADD_CO_U32_e32_gfx9,     "v_add_co_u32">;
+defm : VOP2bInstAliases<V_ADD_U32_e32,     V_ADD_CO_U32_e32_gfx9,     "v_add_co_u32">;
 defm : VOP2bInstAliases<V_ADDC_U32_e32,    V_ADDC_CO_U32_e32_gfx9,    "v_addc_co_u32">;
-defm : VOP2bInstAliases<V_SUB_I32_e32,     V_SUB_CO_U32_e32_gfx9,     "v_sub_co_u32">;
+defm : VOP2bInstAliases<V_SUB_U32_e32,     V_SUB_CO_U32_e32_gfx9,     "v_sub_co_u32">;
 defm : VOP2bInstAliases<V_SUBB_U32_e32,    V_SUBB_CO_U32_e32_gfx9,    "v_subb_co_u32">;
-defm : VOP2bInstAliases<V_SUBREV_I32_e32,  V_SUBREV_CO_U32_e32_gfx9,  "v_subrev_co_u32">;
+defm : VOP2bInstAliases<V_SUBREV_U32_e32,  V_SUBREV_CO_U32_e32_gfx9,  "v_subrev_co_u32">;
 defm : VOP2bInstAliases<V_SUBBREV_U32_e32, V_SUBBREV_CO_U32_e32_gfx9, "v_subbrev_co_u32">;
 
 } // End SubtargetPredicate = isGFX9Only
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir
index a9e1124d10266..e4bd1b43e880f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-abs.mir
@@ -70,7 +70,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: %src0:vgpr_32 = COPY $vgpr0
     ; GFX6: %zero:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
-    ; GFX6: %ineg:vgpr_32, dead %4:sreg_64_xexec = V_SUB_I32_e64 %zero, %src0, 0, implicit $exec
+    ; GFX6: %ineg:vgpr_32, dead %4:sreg_64_xexec = V_SUB_CO_U32_e64 %zero, %src0, 0, implicit $exec
     ; GFX6: %smax:vgpr_32 = V_MAX_I32_e64 %src0, %ineg, implicit $exec
     ; GFX6: S_ENDPGM 0, implicit %smax
     ; GFX9-LABEL: name: smax_neg_abs_pattern_s32_vv
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
index 79c9f98880335..51a116a944ad6 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-add.mir
@@ -18,9 +18,9 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[S_ADD_I32_:%[0-9]+]]:sreg_32 = S_ADD_I32 [[COPY]], [[COPY1]], implicit-def $scc
-    ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec
-    ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_I32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec
-    ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_I32_e64 %8, [[COPY2]], 0, implicit $exec
+    ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[S_ADD_I32_]], 0, implicit $exec
+    ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_ADD_I32_]], %7, 0, implicit $exec
+    ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_ADD_CO_U32_e64 %8, [[COPY2]], 0, implicit $exec
     ; GFX6: S_ENDPGM 0, implicit [[S_ADD_I32_]], implicit %7, implicit %8, implicit %9
     ; GFX9-LABEL: name: add_s32
     ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
@@ -95,7 +95,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967232, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: S_ENDPGM 0, implicit %2
     ; GFX9-LABEL: name: add_neg_inline_const_64_to_sub_s32_v
     ; GFX9: liveins: $vgpr0
@@ -152,7 +152,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: S_ENDPGM 0, implicit %2
     ; GFX9-LABEL: name: add_neg_inline_const_16_to_sub_s32_v
     ; GFX9: liveins: $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
index f59fe2b87aaea..eaafe1285a303 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-flat.mir
@@ -69,9 +69,9 @@ body:             |
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
     ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gep4
@@ -96,9 +96,9 @@ body:             |
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
@@ -177,9 +177,9 @@ body:             |
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
     ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
     ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_flat_gep4
@@ -204,9 +204,9 @@ body:             |
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
     ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
@@ -242,9 +242,9 @@ body:             |
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX7: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
     ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
@@ -260,9 +260,9 @@ body:             |
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX9: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX9: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
     ; GFX10-LABEL: name: amdgpu_atomic_cmpxchg_s32_flat_gepm4
@@ -279,9 +279,9 @@ body:             |
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX10: %12:vgpr_32, dead %14:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
index ef9ee940bcd69..ff80f873ff0af 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgpu-atomic-cmpxchg-global.mir
@@ -135,9 +135,9 @@ body:             |
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
     ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
@@ -153,9 +153,9 @@ body:             |
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
     ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gep4
@@ -315,9 +315,9 @@ body:             |
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
     ; GFX7-FLAT: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
     ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
@@ -333,9 +333,9 @@ body:             |
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX8: [[FLAT_ATOMIC_CMPSWAP_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_CMPSWAP_X2_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
     ; GFX8: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_CMPSWAP_X2_RTN]]
     ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s64_global_gep4
@@ -388,9 +388,9 @@ body:             |
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX6: %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %18, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX6: %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %18, %subreg.sub1
     ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX6: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -412,9 +412,9 @@ body:             |
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX7: %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %18, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX7: %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %18, %subreg.sub1
     ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX7: [[REG_SEQUENCE3:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -436,9 +436,9 @@ body:             |
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX7-FLAT: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
     ; GFX8-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
@@ -454,9 +454,9 @@ body:             |
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub0
     ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE1]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
-    ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %12, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY3]], [[COPY4]], 0, implicit $exec
+    ; GFX8: %12:vgpr_32, dead %14:sreg_64_xexec = V_ADDC_U32_e64 [[COPY5]], [[COPY6]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %12, %subreg.sub1
     ; GFX8: [[FLAT_ATOMIC_CMPSWAP_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_CMPSWAP_RTN [[REG_SEQUENCE2]], [[REG_SEQUENCE]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_ATOMIC_CMPSWAP_RTN]]
     ; GFX9-LABEL: name: amdgpu_atomic_cmpxchg_s32_global_gepm4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir
index b134008cfa591..bf4db71346f31 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomic-cmpxchg-local.mir
@@ -61,7 +61,7 @@ body:             |
     ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
-    ; GFX6: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %4:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: $m0 = S_MOV_B32 -1
     ; GFX6: [[DS_CMPST_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_CMPST_RTN_B32 %4, [[COPY1]], [[COPY2]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3)
     ; GFX6: $vgpr0 = COPY [[DS_CMPST_RTN_B32_]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
index 94f0d2ba9e1e7..2258c1bf308c1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-flat.mir
@@ -90,9 +90,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047
@@ -113,9 +113,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
@@ -147,9 +147,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2047_nortn
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -168,9 +168,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
@@ -200,9 +200,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048
@@ -223,9 +223,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
@@ -257,9 +257,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset2048_nortn
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -278,9 +278,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
@@ -310,9 +310,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095
@@ -333,9 +333,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
@@ -367,9 +367,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4095_nortn
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -388,9 +388,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
@@ -420,9 +420,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097
@@ -436,9 +436,9 @@ body:             |
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX9: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097
@@ -453,9 +453,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX10: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
@@ -487,9 +487,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX9-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -502,9 +502,9 @@ body:             |
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX9: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     ; GFX10-LABEL: name: flat_atomicrmw_add_s32_offset4097_nortn
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@@ -518,9 +518,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4)
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
@@ -616,9 +616,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
     ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
     ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095
@@ -639,9 +639,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
     ; GFX10: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
@@ -673,9 +673,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
     ; GFX9-LABEL: name: flat_atomicrmw_add_s64_offset4095_nortn
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@@ -694,9 +694,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8)
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = COPY $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir
index a1c853f7e5e91..e232e5032c534 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-add-global.mir
@@ -123,9 +123,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047
@@ -180,9 +180,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2047_nortn
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -234,9 +234,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048
@@ -257,9 +257,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -301,9 +301,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset2048_nortn
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -322,9 +322,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
@@ -365,9 +365,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095
@@ -388,9 +388,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -432,9 +432,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4095_nortn
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -453,9 +453,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
@@ -497,9 +497,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_ATOMIC_ADD_RTN]]
     ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097
@@ -513,9 +513,9 @@ body:             |
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
     ; GFX9: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
     ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097
@@ -530,9 +530,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_ATOMIC_ADD_RTN]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -575,9 +575,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = FLAT_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 4, addrspace 1)
     ; GFX9-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -590,9 +590,9 @@ body:             |
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX9: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX9: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
     ; GFX10-LABEL: name: global_atomicrmw_add_s32_offset4097_nortn
     ; GFX10: liveins: $vgpr0_vgpr1, $vgpr2
@@ -606,9 +606,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 4, addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
@@ -736,9 +736,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
     ; GFX7: $vgpr0_vgpr1 = COPY [[FLAT_ATOMIC_ADD_X2_RTN]]
     ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095
@@ -759,9 +759,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
     ; GFX10: $vgpr0_vgpr1 = COPY [[GLOBAL_ATOMIC_ADD_X2_RTN]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -803,9 +803,9 @@ body:             |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %10:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX7: [[FLAT_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = FLAT_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec, implicit $flat_scr :: (load store seq_cst 8, addrspace 1)
     ; GFX9-LABEL: name: global_atomicrmw_add_s64_offset4095_nortn
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
@@ -824,9 +824,9 @@ body:             |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %10, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %10:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %10, %subreg.sub1
     ; GFX10: [[GLOBAL_ATOMIC_ADD_X2_RTN:%[0-9]+]]:vreg_64 = GLOBAL_ATOMIC_ADD_X2_RTN [[REG_SEQUENCE1]], [[COPY1]], 0, 0, implicit $exec :: (load store seq_cst 8, addrspace 1)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = COPY $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir
index 0c922c04c9bae..16e01429f68c4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-atomicrmw-xchg-local.mir
@@ -56,7 +56,7 @@ body:             |
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4, implicit $exec
-    ; GFX6: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %3:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: $m0 = S_MOV_B32 -1
     ; GFX6: [[DS_WRXCHG_RTN_B32_:%[0-9]+]]:vgpr_32 = DS_WRXCHG_RTN_B32 %3, [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store seq_cst 4, addrspace 3)
     ; GFX6: $vgpr0 = COPY [[DS_WRXCHG_RTN_B32_]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
index ad2f418980aaa..d31d8ac361ece 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-flat.mir
@@ -239,9 +239,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
     ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_m2048
@@ -254,9 +254,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
     ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
@@ -288,9 +288,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
     ; GFX9-LABEL: name: load_atomic_flat_s32_seq_cst_gep_4095
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
index deb59ffa10c40..0d225dc7dab62 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-global.mir
@@ -341,9 +341,9 @@ body: |
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -361,9 +361,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -381,9 +381,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
     ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_m2048
@@ -396,9 +396,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load seq_cst 4, addrspace 1)
     ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_DWORD]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -450,9 +450,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_DWORD:%[0-9]+]]:vgpr_32 = FLAT_LOAD_DWORD [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load seq_cst 4, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_DWORD]]
     ; GFX9-LABEL: name: load_atomic_global_s32_seq_cst_gep_4095
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir
index b7283ecfade89..ee72309b16792 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-atomic-local.mir
@@ -290,7 +290,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: $m0 = S_MOV_B32 -1
     ; GFX6: [[DS_READ_B32_:%[0-9]+]]:vgpr_32 = DS_READ_B32 %2, 0, 0, implicit $m0, implicit $exec :: (load seq_cst 4, addrspace 3)
     ; GFX6: $vgpr0 = COPY [[DS_READ_B32_]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
index e0783e9b636d8..d45ef60f5a36c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-flat.mir
@@ -738,9 +738,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2047
@@ -753,9 +753,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2047
@@ -774,9 +774,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -808,9 +808,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_2048
@@ -823,9 +823,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_2048
@@ -844,9 +844,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -878,9 +878,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2047
@@ -893,9 +893,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2047
@@ -908,9 +908,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2047
@@ -924,9 +924,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -958,9 +958,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m2048
@@ -973,9 +973,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m2048
@@ -988,9 +988,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m2048
@@ -1004,9 +1004,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1038,9 +1038,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4095
@@ -1053,9 +1053,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4095
@@ -1074,9 +1074,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1108,9 +1108,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_4096
@@ -1123,9 +1123,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_4096
@@ -1138,9 +1138,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_flat_s32_from_1_gep_4096
@@ -1154,9 +1154,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1188,9 +1188,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4095
@@ -1203,9 +1203,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4095
@@ -1218,9 +1218,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4095
@@ -1234,9 +1234,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1268,9 +1268,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m4096
@@ -1283,9 +1283,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m4096
@@ -1298,9 +1298,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m4096
@@ -1314,9 +1314,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1348,9 +1348,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8191
@@ -1363,9 +1363,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8191
@@ -1378,9 +1378,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8191
@@ -1394,9 +1394,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1428,9 +1428,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_8192
@@ -1443,9 +1443,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_8192
@@ -1458,9 +1458,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_flat_s32_from_1_gep_8192
@@ -1474,9 +1474,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1508,9 +1508,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8191
@@ -1523,9 +1523,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8191
@@ -1538,9 +1538,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8191
@@ -1554,9 +1554,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1588,9 +1588,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX7: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_flat_s32_from_1_gep_m8192
@@ -1603,9 +1603,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_flat_s32_from_1_gep_m8192
@@ -1618,9 +1618,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX9: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_flat_s32_from_1_gep_m8192
@@ -1634,9 +1634,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1)
     ; GFX10: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
index eb3de7b1e7658..a9c0560369548 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-global.mir
@@ -867,9 +867,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_2047
@@ -882,9 +882,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_2047
@@ -947,9 +947,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_2048
@@ -962,9 +962,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_2048
@@ -983,9 +983,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1017,9 +1017,9 @@ body: |
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1037,9 +1037,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1057,9 +1057,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2047
@@ -1072,9 +1072,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2047
@@ -1117,9 +1117,9 @@ body: |
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1137,9 +1137,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1157,9 +1157,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_m2048
@@ -1172,9 +1172,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_m2048
@@ -1237,9 +1237,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_4095
@@ -1252,9 +1252,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_4095
@@ -1273,9 +1273,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1329,9 +1329,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_4096
@@ -1344,9 +1344,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_4096
@@ -1359,9 +1359,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_global_s32_from_1_gep_4096
@@ -1375,9 +1375,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1409,9 +1409,9 @@ body: |
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1429,9 +1429,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1449,9 +1449,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4095
@@ -1464,9 +1464,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4095
@@ -1485,9 +1485,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1519,9 +1519,9 @@ body: |
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1539,9 +1539,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1559,9 +1559,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_m4096
@@ -1574,9 +1574,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_m4096
@@ -1595,9 +1595,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1651,9 +1651,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_8191
@@ -1666,9 +1666,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_8191
@@ -1681,9 +1681,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_global_s32_from_1_gep_8191
@@ -1697,9 +1697,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1753,9 +1753,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_8192
@@ -1768,9 +1768,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_8192
@@ -1783,9 +1783,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_global_s32_from_1_gep_8192
@@ -1799,9 +1799,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1833,9 +1833,9 @@ body: |
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1853,9 +1853,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1873,9 +1873,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8191
@@ -1888,9 +1888,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8191
@@ -1903,9 +1903,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8191
@@ -1919,9 +1919,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
@@ -1953,9 +1953,9 @@ body: |
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX6: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX6: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX6: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX6: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1973,9 +1973,9 @@ body: |
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %14, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7: %14:vgpr_32, dead %16:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %14, %subreg.sub1
     ; GFX7: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
     ; GFX7: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 61440
     ; GFX7: [[REG_SEQUENCE2:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[S_MOV_B32_]], %subreg.sub0, [[S_MOV_B32_1]], %subreg.sub1
@@ -1993,9 +1993,9 @@ body: |
     ; GFX7-FLAT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX7-FLAT: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX8-LABEL: name: load_global_s32_from_1_gep_m8192
@@ -2008,9 +2008,9 @@ body: |
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: [[FLAT_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = FLAT_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 1, addrspace 1)
     ; GFX8: $vgpr0 = COPY [[FLAT_LOAD_UBYTE]]
     ; GFX9-LABEL: name: load_global_s32_from_1_gep_m8192
@@ -2023,9 +2023,9 @@ body: |
     ; GFX9: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX9: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX9: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX9: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     ; GFX10-LABEL: name: load_global_s32_from_1_gep_m8192
@@ -2039,9 +2039,9 @@ body: |
     ; GFX10: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[COPY2]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY3]], [[COPY4]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: [[GLOBAL_LOAD_UBYTE:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_UBYTE [[REG_SEQUENCE1]], 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 1)
     ; GFX10: $vgpr0 = COPY [[GLOBAL_LOAD_UBYTE]]
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
index 1382434fe0a74..a80ad208b5898 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-local.mir
@@ -568,7 +568,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65535, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: $m0 = S_MOV_B32 -1
     ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
     ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
@@ -640,7 +640,7 @@ body: |
     ; GFX7: liveins: $vgpr0
     ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
-    ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX7: $m0 = S_MOV_B32 -1
     ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
     ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
@@ -655,7 +655,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 65536, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: $m0 = S_MOV_B32 -1
     ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
     ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
@@ -682,7 +682,7 @@ body: |
     ; GFX7: liveins: $vgpr0
     ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
-    ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX7: $m0 = S_MOV_B32 -1
     ; GFX7: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
     ; GFX7: $vgpr0 = COPY [[DS_READ_U8_]]
@@ -697,7 +697,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294967295, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: $m0 = S_MOV_B32 -1
     ; GFX6: [[DS_READ_U8_:%[0-9]+]]:vgpr_32 = DS_READ_U8 %2, 0, 0, implicit $m0, implicit $exec :: (load 1, addrspace 3)
     ; GFX6: $vgpr0 = COPY [[DS_READ_U8_]]
@@ -762,7 +762,7 @@ body: |
     ; GFX7: liveins: $vgpr0_vgpr1
     ; GFX7: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec
-    ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX7: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX7: $m0 = S_MOV_B32 -1
     ; GFX7: [[DS_READ2_B32_:%[0-9]+]]:vreg_64 = DS_READ2_B32 %2, 0, 1, 0, implicit $m0, implicit $exec :: (load 8, align 4, addrspace 3)
     ; GFX7: $vgpr0_vgpr1 = COPY [[DS_READ2_B32_]]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
index 2a93510f237ba..162dd01de66d1 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-load-private.mir
@@ -205,7 +205,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_2047
@@ -278,7 +278,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_2048
@@ -312,7 +312,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047
@@ -348,7 +348,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048
@@ -384,7 +384,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_4095
@@ -418,7 +418,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_4096
@@ -454,7 +454,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095
@@ -490,7 +490,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096
@@ -526,7 +526,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_8191
@@ -562,7 +562,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_8192
@@ -598,7 +598,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191
@@ -634,7 +634,7 @@ body: |
     ; GFX6: liveins: $vgpr0
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192
@@ -828,7 +828,7 @@ body: |
     ; GFX6-LABEL: name: load_private_s32_from_1_fi_offset_4096
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
-    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
     ; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
     ; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
     ; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
index a33e4c3b313f4..12e75bb32d39c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-pattern-add3.mir
@@ -61,8 +61,8 @@ body: |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 %3, [[COPY2]], 0, implicit $exec
+    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec
     ; GFX8: S_ENDPGM 0, implicit %4
     ; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -102,8 +102,8 @@ body: |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 %3, [[COPY2]], 0, implicit $exec
+    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec
     ; GFX8: S_ENDPGM 0, implicit %4, implicit %3
     ; GFX9-LABEL: name: add_s32_vgpr_vgpr_vgpr_multi_use
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -146,8 +146,8 @@ body: |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 %3, [[COPY2]], 0, implicit $exec
+    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec
     ; GFX8: S_ENDPGM 0, implicit %4
     ; GFX9-LABEL: name: add_p3_vgpr_vgpr_vgpr
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -190,8 +190,8 @@ body: |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 %3, [[COPY2]], 0, implicit $exec
+    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 %3, [[COPY2]], 0, implicit $exec
     ; GFX8: S_ENDPGM 0, implicit %4
     ; GFX9-LABEL: name: add_p5_vgpr_vgpr_vgpr
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -234,8 +234,8 @@ body: |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], %3, 0, implicit $exec
+    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], %3, 0, implicit $exec
     ; GFX8: S_ENDPGM 0, implicit %4
     ; GFX9-LABEL: name: add_p3_s32_vgpr_vgpr_vgpr
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
@@ -278,8 +278,8 @@ body: |
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX8: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], %3, 0, implicit $exec
+    ; GFX8: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: %4:vgpr_32, dead %5:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], %3, 0, implicit $exec
     ; GFX8: S_ENDPGM 0, implicit %4
     ; GFX9-LABEL: name: add_p5_s32_vgpr_vgpr_vgpr
     ; GFX9: liveins: $vgpr0, $vgpr1, $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir
index f2a30dd5b0e64..98fdcac99d4aa 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-ptr-add.mir
@@ -91,9 +91,9 @@ body: |
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX8-LABEL: name: gep_p0_vgpr_vgpr
     ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -102,9 +102,9 @@ body: |
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX9-LABEL: name: gep_p0_vgpr_vgpr
     ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -113,9 +113,9 @@ body: |
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-WAVE64-LABEL: name: gep_p0_vgpr_vgpr
     ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -124,9 +124,9 @@ body: |
     ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX10-WAVE64: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-WAVE32-LABEL: name: gep_p0_vgpr_vgpr
     ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
@@ -136,9 +136,9 @@ body: |
     ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX10-WAVE32: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:vgpr(p0) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = COPY $vgpr2_vgpr3
@@ -162,9 +162,9 @@ body: |
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX8-LABEL: name: gep_p0_sgpr_vgpr
     ; GFX8: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
@@ -173,9 +173,9 @@ body: |
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX9-LABEL: name: gep_p0_sgpr_vgpr
     ; GFX9: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
@@ -184,9 +184,9 @@ body: |
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-WAVE64-LABEL: name: gep_p0_sgpr_vgpr
     ; GFX10-WAVE64: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
@@ -195,9 +195,9 @@ body: |
     ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX10-WAVE64: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-WAVE32-LABEL: name: gep_p0_sgpr_vgpr
     ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
@@ -207,9 +207,9 @@ body: |
     ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX10-WAVE32: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:sgpr(p0) = COPY $sgpr0_sgpr1
     %1:vgpr(s64) = COPY $vgpr0_vgpr1
@@ -270,12 +270,12 @@ body: |
     ; GFX6-LABEL: name: gep_p3_vgpr_vgpr
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX6: S_ENDPGM 0, implicit %2
     ; GFX8-LABEL: name: gep_p3_vgpr_vgpr
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX8: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX8: S_ENDPGM 0, implicit %2
     ; GFX9-LABEL: name: gep_p3_vgpr_vgpr
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
@@ -311,12 +311,12 @@ body: |
     ; GFX6-LABEL: name: gep_p3_sgpr_vgpr
     ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX6: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX6: S_ENDPGM 0, implicit %2
     ; GFX8-LABEL: name: gep_p3_sgpr_vgpr
     ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX8: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX8: S_ENDPGM 0, implicit %2
     ; GFX9-LABEL: name: gep_p3_sgpr_vgpr
     ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
@@ -509,9 +509,9 @@ body: |
     ; GFX6: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX6: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX6: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX6: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX6: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX6: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX8-LABEL: name: gep_p999_vgpr_vgpr
     ; GFX8: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -520,9 +520,9 @@ body: |
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX8: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX8: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX9-LABEL: name: gep_p999_vgpr_vgpr
     ; GFX9: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -531,9 +531,9 @@ body: |
     ; GFX9: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX9: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX9: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX9: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX9: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX9: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-WAVE64-LABEL: name: gep_p999_vgpr_vgpr
     ; GFX10-WAVE64: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
@@ -542,9 +542,9 @@ body: |
     ; GFX10-WAVE64: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX10-WAVE64: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10-WAVE64: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX10-WAVE64: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX10-WAVE64: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10-WAVE64: %8:vgpr_32, dead %10:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10-WAVE64: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX10-WAVE64: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     ; GFX10-WAVE32-LABEL: name: gep_p999_vgpr_vgpr
     ; GFX10-WAVE32: $vcc_hi = IMPLICIT_DEF
@@ -554,9 +554,9 @@ body: |
     ; GFX10-WAVE32: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub0
     ; GFX10-WAVE32: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10-WAVE32: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[COPY1]].sub1
-    ; GFX10-WAVE32: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %8, %subreg.sub1
+    ; GFX10-WAVE32: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10-WAVE32: %8:vgpr_32, dead %10:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10-WAVE32: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %8, %subreg.sub1
     ; GFX10-WAVE32: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
     %0:vgpr(p999) = COPY $vgpr0_vgpr1
     %1:vgpr(s64) = COPY $vgpr2_vgpr3
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
index 86026edb25725..f961ba3b65495 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-flat.mir
@@ -771,9 +771,9 @@ body: |
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     ; GFX8-LABEL: name: store_flat_s32_gep_2047
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
@@ -786,9 +786,9 @@ body: |
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     ; GFX9-LABEL: name: store_flat_s32_gep_2047
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
@@ -807,9 +807,9 @@ body: |
     ; GFX10: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX10: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX10: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX10: %9:vgpr_32, dead %11:sreg_32_xm0_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX10: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX10: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     %0:vgpr(p1) = COPY $vgpr0_vgpr1
     %1:vgpr(s32) = COPY $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
index 87ccd3de32e49..814a051cbc7d4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-global.mir
@@ -904,9 +904,9 @@ body: |
     ; GFX7-FLAT: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX7-FLAT: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7-FLAT: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX7-FLAT: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX7-FLAT: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX7-FLAT: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX7-FLAT: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX7-FLAT: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
     ; GFX8-LABEL: name: store_global_s32_gep_2047
     ; GFX8: liveins: $vgpr0_vgpr1, $vgpr2
@@ -919,9 +919,9 @@ body: |
     ; GFX8: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub0
     ; GFX8: [[COPY4:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX8: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[REG_SEQUENCE]].sub1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
-    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_I32_e64_1]], 0, implicit $exec
-    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_]], %subreg.sub0, %9, %subreg.sub1
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY2]], [[COPY3]], 0, implicit $exec
+    ; GFX8: %9:vgpr_32, dead %11:sreg_64_xexec = V_ADDC_U32_e64 [[COPY4]], [[COPY5]], killed [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
+    ; GFX8: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_]], %subreg.sub0, %9, %subreg.sub1
     ; GFX8: FLAT_STORE_DWORD [[REG_SEQUENCE1]], [[COPY1]], 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4, addrspace 1)
     ; GFX9-LABEL: name: store_global_s32_gep_2047
     ; GFX9: liveins: $vgpr0_vgpr1, $vgpr2
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
index 440c34f101633..f918818117363 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-store-local.mir
@@ -631,7 +631,7 @@ body: |
     ; GFX7: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
     ; GFX7: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX7: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1020, implicit $exec
-    ; GFX7: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_I32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GFX7: %3:vgpr_32, dead %6:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY1]], [[V_MOV_B32_e32_]], 0, implicit $exec
     ; GFX7: $m0 = S_MOV_B32 -1
     ; GFX7: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub1
     ; GFX7: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]].sub0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir
index de09d91eb9984..5a1d8b8cda1f2 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-sub.mir
@@ -21,9 +21,9 @@ body: |
     ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr1
     ; GFX6: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[S_SUB_I32_:%[0-9]+]]:sreg_32 = S_SUB_I32 [[COPY]], [[COPY1]], implicit-def $scc
-    ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_I32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec
-    ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_I32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec
-    ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_I32_e64 %8, [[COPY2]], 0, implicit $exec
+    ; GFX6: %7:vgpr_32, dead %12:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY2]], [[S_SUB_I32_]], 0, implicit $exec
+    ; GFX6: %8:vgpr_32, dead %11:sreg_64_xexec = V_SUB_CO_U32_e64 [[S_SUB_I32_]], %7, 0, implicit $exec
+    ; GFX6: %9:vgpr_32, dead %10:sreg_64_xexec = V_SUB_CO_U32_e64 %8, [[COPY2]], 0, implicit $exec
     ; GFX6: S_ENDPGM 0, implicit %9
     ; GFX9-LABEL: name: sub_s32
     ; GFX9: liveins: $sgpr0, $sgpr1, $vgpr0, $vgpr3_vgpr4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir
index ef395a7465be9..8c774a54577a9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-uaddo.mir
@@ -65,28 +65,28 @@ body: |
     ; GFX6-LABEL: name: uaddo_s32_s1_vvv
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX6: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX8-LABEL: name: uaddo_s32_s1_vvv
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX8: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX9-LABEL: name: uaddo_s32_s1_vvv
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX10-LABEL: name: uaddo_s32_s1_vvv
     ; GFX10: $vcc_hi = IMPLICIT_DEF
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX10: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32), %3:vcc(s1) = G_UADDO %0, %1
@@ -106,36 +106,36 @@ body: |
     ; GFX6-LABEL: name: uaddo_s32_s1_vsv
     ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX6: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX8-LABEL: name: uaddo_s32_s1_vsv
     ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX8: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX9-LABEL: name: uaddo_s32_s1_vsv
     ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX10-LABEL: name: uaddo_s32_s1_vsv
     ; GFX10: $vcc_hi = IMPLICIT_DEF
     ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX10: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s32) = COPY $vgpr0
     %2:vgpr(s32), %3:vcc(s1) = G_UADDO %0, %1
@@ -157,36 +157,36 @@ body: |
     ; GFX6-LABEL: name: uaddo_s32_s1_vvs
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX6: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX6: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX6: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX8-LABEL: name: uaddo_s32_s1_vvs
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX8: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX8: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX9-LABEL: name: uaddo_s32_s1_vvs
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX9: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX9: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX10-LABEL: name: uaddo_s32_s1_vvs
     ; GFX10: $vcc_hi = IMPLICIT_DEF
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX10: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX10: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_I32_e64_1]], implicit $exec
-    ; GFX10: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_ADD_CO_U32_e64_1]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr(s32) = COPY $sgpr0
     %2:vgpr(s32), %3:vcc(s1) = G_UADDO %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir
index 3113c7d90cf0d..6112845f89e30 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-usubo.mir
@@ -65,28 +65,28 @@ body: |
     ; GFX6-LABEL: name: usubo_s32_s1_vvv
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX6: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX6: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX6: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX8-LABEL: name: usubo_s32_s1_vvv
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX8: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX8: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX8: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX9-LABEL: name: usubo_s32_s1_vvv
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX9: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX9: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX9: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX10-LABEL: name: usubo_s32_s1_vvv
     ; GFX10: $vcc_hi = IMPLICIT_DEF
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GFX10: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
-    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX10: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX10: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[COPY1]], 0, [[COPY]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
     %2:vgpr(s32), %3:vcc(s1) = G_USUBO %0, %1
@@ -106,36 +106,36 @@ body: |
     ; GFX6-LABEL: name: usubo_s32_s1_vsv
     ; GFX6: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX6: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX6: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX6: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX8-LABEL: name: usubo_s32_s1_vsv
     ; GFX8: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX8: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX8: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX9-LABEL: name: usubo_s32_s1_vsv
     ; GFX9: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX9: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX9: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX9: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX10-LABEL: name: usubo_s32_s1_vsv
     ; GFX10: $vcc_hi = IMPLICIT_DEF
     ; GFX10: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GFX10: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GFX10: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX10: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX10: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s32) = COPY $vgpr0
     %2:vgpr(s32), %3:vcc(s1) = G_USUBO %0, %1
@@ -157,36 +157,36 @@ body: |
     ; GFX6-LABEL: name: usubo_s32_s1_vvs
     ; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX6: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX6: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX6: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX6: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX6: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX6: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX8-LABEL: name: usubo_s32_s1_vvs
     ; GFX8: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX8: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX8: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX8: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX8: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX8: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX8: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX8: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX8: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX9-LABEL: name: usubo_s32_s1_vvs
     ; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX9: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX9: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX9: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX9: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX9: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     ; GFX10-LABEL: name: usubo_s32_s1_vvs
     ; GFX10: $vcc_hi = IMPLICIT_DEF
     ; GFX10: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GFX10: [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr0
-    ; GFX10: [[V_SUB_I32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_I32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_I32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
+    ; GFX10: [[V_SUB_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_SUB_CO_U32_e64_1:%[0-9]+]]:sreg_32_xm0_xexec = V_SUB_CO_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
     ; GFX10: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     ; GFX10: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
-    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_I32_e64_1]], implicit $exec
-    ; GFX10: S_ENDPGM 0, implicit [[V_SUB_I32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
+    ; GFX10: [[V_CNDMASK_B32_e64_:%[0-9]+]]:vgpr_32 = V_CNDMASK_B32_e64 0, [[V_MOV_B32_e32_1]], 0, [[V_MOV_B32_e32_]], [[V_SUB_CO_U32_e64_1]], implicit $exec
+    ; GFX10: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e64_]], implicit [[V_CNDMASK_B32_e64_]]
     %0:vgpr(s32) = COPY $vgpr0
     %1:sgpr(s32) = COPY $sgpr0
     %2:vgpr(s32), %3:vcc(s1) = G_USUBO %0, %1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
index b8341e1183da1..9c71580ac3c4a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.load.ll
@@ -612,7 +612,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; CHECK:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+  ; CHECK:   %10:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
   ; CHECK:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from custom "TargetCustom7" + 4096, align 1, addrspace 4)
   ; CHECK:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
   ; CHECK:   SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -780,7 +780,7 @@ define amdgpu_ps float @raw_buffer_load_f32__sgpr_rsrc__vgpr_voffset__sgpr_soffs
   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; CHECK:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   %13:vgpr_32, dead %35:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+  ; CHECK:   %13:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
   ; CHECK:   [[COPY7:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
   ; CHECK:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
   ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
index b5372ef76a7a5..86f177422ae4f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f16.ll
@@ -380,7 +380,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
   ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; UNPACKED:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; UNPACKED:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; UNPACKED:   %11:vgpr_32, dead %24:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
+  ; UNPACKED:   %11:vgpr_32, dead %24:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
   ; UNPACKED:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16
   ; UNPACKED:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
   ; UNPACKED:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
@@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
   ; PACKED:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; PACKED:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; PACKED:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; PACKED:   %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
+  ; PACKED:   %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
   ; PACKED:   BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
   ; PACKED:   S_ENDPGM 0
   %voffset.add = add i32 %voffset, 4096
@@ -426,7 +426,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
   ; UNPACKED:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; UNPACKED:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; UNPACKED:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; UNPACKED:   %13:vgpr_32, dead %49:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
+  ; UNPACKED:   %13:vgpr_32, dead %49:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
   ; UNPACKED:   [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 16
   ; UNPACKED:   [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
   ; UNPACKED:   [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY4]], implicit $exec
@@ -473,7 +473,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
   ; PACKED:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
   ; PACKED:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; PACKED:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; PACKED:   %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
+  ; PACKED:   %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
   ; PACKED:   [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
   ; PACKED:   [[COPY10:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
   ; PACKED:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
index 94360c0d18689..a9f39605270fb 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.format.f32.ll
@@ -243,7 +243,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; CHECK:   [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
+  ; CHECK:   %13:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
   ; CHECK:   BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
   ; CHECK:   S_ENDPGM 0
   %voffset.add = add i32 %voffset, 4096
@@ -272,7 +272,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
   ; CHECK:   [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; CHECK:   [[COPY10:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   %15:vgpr_32, dead %35:sreg_64_xexec = V_ADD_I32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec
+  ; CHECK:   %15:vgpr_32, dead %35:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY8]], [[COPY10]], 0, implicit $exec
   ; CHECK:   [[COPY11:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
   ; CHECK:   [[COPY12:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
   ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
index 4473d64dfa2a4..c2240dd355ea5 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.raw.buffer.store.ll
@@ -575,7 +575,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; CHECK:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
+  ; CHECK:   %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
   ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
   ; CHECK:   S_ENDPGM 0
   %voffset.add = add i32 %voffset, 4096
@@ -671,7 +671,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; CHECK:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
+  ; CHECK:   %11:vgpr_32, dead %15:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
   ; CHECK:   BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom "TargetCustom7" + 4096, align 1, addrspace 4)
   ; CHECK:   S_ENDPGM 0
   %voffset.add = add i32 %voffset, 4096
@@ -695,7 +695,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
   ; CHECK:   [[REG_SEQUENCE:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; CHECK:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
   ; CHECK:   [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; CHECK:   %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
+  ; CHECK:   %13:vgpr_32, dead %33:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
   ; CHECK:   [[COPY8:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub0_sub1
   ; CHECK:   [[COPY9:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]].sub2_sub3
   ; CHECK:   [[S_MOV_B64_term:%[0-9]+]]:sreg_64_xexec = S_MOV_B64_term $exec
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
index 9c44181a888e5..66425c27a19fe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.s.buffer.load.ll
@@ -4347,7 +4347,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr
   ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
   ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
-  ; GFX6:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+  ; GFX6:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
   ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
   ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
@@ -4363,7 +4363,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr
   ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
   ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
-  ; GFX7:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+  ; GFX7:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
   ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
   ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
@@ -4379,7 +4379,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_vgpr_sgpr_imm(<4 x i32> inr
   ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
   ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
-  ; GFX8:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+  ; GFX8:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
   ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
   ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
@@ -4402,7 +4402,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr
   ; GFX6:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
   ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
-  ; GFX6:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+  ; GFX6:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
   ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
   ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
@@ -4418,7 +4418,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr
   ; GFX7:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
   ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
-  ; GFX7:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+  ; GFX7:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
   ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
   ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
@@ -4434,7 +4434,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_sgpr_vgpr_imm(<4 x i32> inr
   ; GFX8:   [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
   ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
-  ; GFX8:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
+  ; GFX8:   %9:vgpr_32, dead %17:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY6]], [[COPY4]], 0, implicit $exec
   ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 0
   ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %9, [[REG_SEQUENCE]], [[S_MOV_B32_]], 1024, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
   ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
@@ -4511,7 +4511,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
   ; GFX6:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX6:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
   ; GFX6:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; GFX6:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+  ; GFX6:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
   ; GFX6:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
   ; GFX6:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
   ; GFX6:   SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -4527,7 +4527,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
   ; GFX7:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX7:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
   ; GFX7:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; GFX7:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+  ; GFX7:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
   ; GFX7:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
   ; GFX7:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
   ; GFX7:   SI_RETURN_TO_EPILOG implicit $vgpr0
@@ -4543,7 +4543,7 @@ define amdgpu_ps float @s_buffer_load_f32_offset_add_imm_vgpr_sgpr(<4 x i32> inr
   ; GFX8:   [[REG_SEQUENCE:%[0-9]+]]:sgpr_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
   ; GFX8:   [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 1024
   ; GFX8:   [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
-  ; GFX8:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_I32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
+  ; GFX8:   %10:vgpr_32, dead %16:sreg_64_xexec = V_ADD_CO_U32_e64 [[COPY4]], [[COPY6]], 0, implicit $exec
   ; GFX8:   [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %10, [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
   ; GFX8:   $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
   ; GFX8:   SI_RETURN_TO_EPILOG implicit $vgpr0
diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
index b99e82b312ee2..04c8a2e9aa36a 100644
--- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
+++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
@@ -5,7 +5,7 @@
 ; RUN: llc -march=amdgcn -mcpu=gfx900  -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9    %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s
 
-; GCN-ISEL-LABEL: name:   sadd64rr 
+; GCN-ISEL-LABEL: name:   sadd64rr
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0.entry:
 ; GCN-ISEL: S_ADD_U64_PSEUDO
@@ -20,7 +20,7 @@ entry:
   ret void
 }
 
-; GCN-ISEL-LABEL: name:   sadd64ri 
+; GCN-ISEL-LABEL: name:   sadd64ri
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0.entry:
 ; GCN-ISEL: S_ADD_U64_PSEUDO
@@ -35,7 +35,7 @@ entry:
   ret void
 }
 
-; GCN-ISEL-LABEL: name:   vadd64rr 
+; GCN-ISEL-LABEL: name:   vadd64rr
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0.entry:
 ; GCN-ISEL: V_ADD_U64_PSEUDO
@@ -62,7 +62,7 @@ entry:
   ret void
 }
 
-; GCN-ISEL-LABEL: name:   vadd64ri 
+; GCN-ISEL-LABEL: name:   vadd64ri
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0.entry:
 ; GCN-ISEL: V_ADD_U64_PSEUDO
@@ -77,9 +77,9 @@ entry:
 ; VI: v_mov_b32_e32 v1, 0x1234
 ; VI: v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ;
-; GFX9:	v_add_co_u32_e32 v0, vcc, 0x56789876, v0 
-; GFX9: v_mov_b32_e32 v1, 0x1234                 
-; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc    
+; GFX9:	v_add_co_u32_e32 v0, vcc, 0x56789876, v0
+; GFX9: v_mov_b32_e32 v1, 0x1234
+; GFX9: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ;
 ; GFX1010: v_add_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}}
 ; GFX1010: v_add_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0, 0x1234, [[CARRY]]
@@ -108,7 +108,7 @@ define amdgpu_kernel void @suaddo32(i32 addrspace(1)* %out, i1 addrspace(1)* %ca
 ; GCN-ISEL-LABEL: name:   uaddo32_vcc_user
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0
-; GCN-ISEL: V_ADD_I32_e64
+; GCN-ISEL: V_ADD_CO_U32_e64
 
 ; below we check selection to v_add/addc
 ; because the only user of VCC produced by the UADDOis v_cndmask.
@@ -190,7 +190,7 @@ define amdgpu_kernel void @vuaddo64(i64 addrspace(1)* %out, i1 addrspace(1)* %ca
 ; RUN: llc -march=amdgcn -mcpu=gfx900  -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9    %s
 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s
 
-; GCN-ISEL-LABEL: name:   ssub64rr 
+; GCN-ISEL-LABEL: name:   ssub64rr
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0.entry:
 ; GCN-ISEL: S_SUB_U64_PSEUDO
@@ -205,7 +205,7 @@ entry:
   ret void
 }
 
-; GCN-ISEL-LABEL: name:   ssub64ri 
+; GCN-ISEL-LABEL: name:   ssub64ri
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0.entry:
 ; GCN-ISEL: S_SUB_U64_PSEUDO
@@ -220,7 +220,7 @@ entry:
   ret void
 }
 
-; GCN-ISEL-LABEL: name:   vsub64rr 
+; GCN-ISEL-LABEL: name:   vsub64rr
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0.entry:
 ; GCN-ISEL: V_SUB_U64_PSEUDO
@@ -247,7 +247,7 @@ entry:
   ret void
 }
 
-; GCN-ISEL-LABEL: name:   vsub64ri 
+; GCN-ISEL-LABEL: name:   vsub64ri
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0.entry:
 ; GCN-ISEL: V_SUB_U64_PSEUDO
@@ -262,9 +262,9 @@ entry:
 ; VI: v_mov_b32_e32 v1, 0x1234
 ; VI: v_subbrev_u32_e32 v1, vcc, 0, v1, vcc
 ;
-; GFX9:	v_sub_co_u32_e32 v0, vcc, 0x56789876, v0 
-; GFX9: v_mov_b32_e32 v1, 0x1234                 
-; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc    
+; GFX9:	v_sub_co_u32_e32 v0, vcc, 0x56789876, v0
+; GFX9: v_mov_b32_e32 v1, 0x1234
+; GFX9: v_subbrev_co_u32_e32 v1, vcc, 0, v1, vcc
 ;
 ; GFX1010: v_sub_co_u32_e64 v{{[0-9]+}}, [[CARRY:s[0-9]+]], 0x56789876, v{{[0-9]+}}
 ; GFX1010: v_sub_co_ci_u32_e64 v{{[0-9]+}}, [[CARRY]], 0x1234, 0, [[CARRY]]
@@ -293,7 +293,7 @@ define amdgpu_kernel void @susubo32(i32 addrspace(1)* %out, i1 addrspace(1)* %ca
 ; GCN-ISEL-LABEL: name:   usubo32_vcc_user
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.0
-; GCN-ISEL: V_SUB_I32_e64
+; GCN-ISEL: V_SUB_CO_U32_e64
 
 ; below we check selection to v_sub/subb
 ; because the only user of VCC produced by the USUBOis v_cndmask.
@@ -371,9 +371,9 @@ define amdgpu_kernel void @vusubo64(i64 addrspace(1)* %out, i1 addrspace(1)* %ca
 ; GCN-ISEL-LABEL: name:   sudiv64
 ; GCN-ISEL-LABEL: body:
 ; GCN-ISEL-LABEL: bb.3
-; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64
+; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64
 ; GCN-ISEL: S_ADD_CO_PSEUDO %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]]
-; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_SUB_I32_e64
+; GCN-ISEL: %[[CARRY:[0-9]+]]:sreg_64_xexec = V_SUB_CO_U32_e64
 ; GCN-ISEL: S_SUB_CO_PSEUDO %{{[0-9]+}}, %{{[0-9]+}}, %[[CARRY]]
 define amdgpu_kernel void @sudiv64(i64 addrspace(1)* %out, i64 %x, i64 %y) {
   %result = udiv i64 %x, %y
diff --git a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
index 3970c9fdf193a..64023cdd525f0 100644
--- a/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
+++ b/llvm/test/CodeGen/AMDGPU/cluster-flat-loads-postra.mir
@@ -22,7 +22,7 @@ body:             |
     $vgpr2 = IMPLICIT_DEF
     $vgpr3 = IMPLICIT_DEF
     $vgpr6 = IMPLICIT_DEF
-    $vgpr0 = V_ADD_I32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec
+    $vgpr0 = V_ADD_CO_U32_e32 16, $vgpr2, implicit-def $vcc, implicit $exec
     $vgpr1 = V_ADDC_U32_e32 $vgpr3, killed $vgpr6, implicit-def dead $vcc, implicit $vcc, implicit $exec
     FLAT_STORE_DWORD $vgpr2_vgpr3, killed $vgpr0, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     FLAT_STORE_DWORD $vgpr0_vgpr1, killed $vgpr4, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
diff --git a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
index 7fff7ca70dc74..0a0928a51813a 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescer-extend-pruned-subrange.mir
@@ -104,7 +104,7 @@ body: |
 
   bb.11:
     successors: %bb.2, %bb.1
-    %42:vgpr_32 = V_ADD_I32_e32 32, %9, implicit-def dead $vcc, implicit $exec
+    %42:vgpr_32 = V_ADD_CO_U32_e32 32, %9, implicit-def dead $vcc, implicit $exec
     V_CMP_EQ_U32_e32 0, %42, implicit-def $vcc, implicit $exec
     %43:vgpr_32 = COPY %42
     $vcc = S_AND_B64 $exec, killed $vcc, implicit-def dead $scc
diff --git a/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir b/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir
index cd4a851bc3ea5..71e5ec7d52ed3 100644
--- a/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir
+++ b/llvm/test/CodeGen/AMDGPU/coalescing-with-subregs-in-loop-bug.mir
@@ -64,19 +64,19 @@ body: |
     %36:vreg_128 = COPY killed %44
     %0:sreg_64 = COPY killed %43
     %39:vgpr_32 = V_LSHLREV_B32_e32 2, %29.sub2, implicit $exec
-    %41:vgpr_32 = V_ADD_I32_e32 1152, %39, implicit-def dead $vcc, implicit $exec
+    %41:vgpr_32 = V_ADD_CO_U32_e32 1152, %39, implicit-def dead $vcc, implicit $exec
     $m0 = S_MOV_B32 -1
     %12:vreg_64 = DS_READ2_B32 killed %41, 0, 1, 0, implicit $m0, implicit $exec
     %13:vreg_64 = DS_READ2_B32 %39, -112, -111, 0, implicit $m0, implicit $exec
     %14:vreg_64 = DS_READ2_B32 %39, 0, 1, 0, implicit $m0, implicit $exec
-    %40:vgpr_32 = V_ADD_I32_e32 1160, %39, implicit-def dead $vcc, implicit $exec
+    %40:vgpr_32 = V_ADD_CO_U32_e32 1160, %39, implicit-def dead $vcc, implicit $exec
     %15:vreg_64 = DS_READ2_B32 killed %40, 0, 1, 0, implicit $m0, implicit $exec
     %16:vreg_64 = DS_READ2_B32 %39, -110, -109, 0, implicit $m0, implicit $exec
     %17:vreg_64 = DS_READ2_B32 %39, 2, 3, 0, implicit $m0, implicit $exec
     undef %35.sub1:vreg_128 = COPY undef %34
     %31:vreg_128 = COPY killed %29
     %31.sub1:vreg_128 = COPY %34
-    %38:vgpr_32 = V_ADD_I32_e32 1, %36.sub0, implicit-def dead $vcc, implicit $exec
+    %38:vgpr_32 = V_ADD_CO_U32_e32 1, %36.sub0, implicit-def dead $vcc, implicit $exec
     %18:sreg_64 = V_CMP_LT_I32_e64 5, %38, implicit $exec
     %1:sreg_64 = S_OR_B64 killed %18, killed %0, implicit-def $scc
     %30:vreg_128 = COPY %31
diff --git a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
index 9219083bb64ce..666dfd74c5008 100644
--- a/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
+++ b/llvm/test/CodeGen/AMDGPU/collapse-endcf2.mir
@@ -38,7 +38,7 @@ body:             |
   ; GCN:   undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, [[COPY1]], implicit $exec
   ; GCN:   %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
   ; GCN:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY %5.sub1
-  ; GCN:   undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec
+  ; GCN:   undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_CO_U32_e64 %5.sub0, %6.sub0, 0, implicit $exec
   ; GCN:   %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
   ; GCN:   %5.sub3:sgpr_128 = S_MOV_B32 61440
   ; GCN:   %5.sub2:sgpr_128 = S_MOV_B32 0
@@ -87,7 +87,7 @@ body:             |
     undef %6.sub0:vreg_64 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %6.sub1:vreg_64 = V_MOV_B32_e32 0, implicit $exec
     %7:vgpr_32 = COPY %5.sub1
-    undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_I32_e64 %5.sub0, %6.sub0, 0, implicit $exec
+    undef %8.sub0:vreg_64, %9:sreg_64_xexec = V_ADD_CO_U32_e64 %5.sub0, %6.sub0, 0, implicit $exec
     %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
     %5.sub3:sgpr_128 = S_MOV_B32 61440
     %5.sub2:sgpr_128 = S_MOV_B32 0
diff --git a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
index 5baf4ac94d0aa..c8adce6fed389 100644
--- a/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/constant-fold-imm-immreg.mir
@@ -76,7 +76,7 @@ body:             |
     %14:vreg_64 = REG_SEQUENCE %3, %subreg.hi16, %13, %subreg.lo16
     %15:vreg_64 = V_LSHLREV_B64 2, killed %14, implicit $exec
     %5:sreg_32_xm0 = COPY %4.sub1
-    %20:vgpr_32 = V_ADD_I32_e32 %4.sub0, %15.sub0, implicit-def $vcc, implicit $exec
+    %20:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %15.sub0, implicit-def $vcc, implicit $exec
     %18:vgpr_32 = COPY killed %5
     %17:vgpr_32 = V_ADDC_U32_e32 %15.sub1, %18, implicit-def $vcc, implicit $vcc, implicit $exec
     %19:vreg_64 = REG_SEQUENCE %20, %subreg.hi16, killed %17, %subreg.lo16
@@ -206,7 +206,7 @@ body:             |
     %16:vreg_64 = REG_SEQUENCE %2, %subreg.hi16, %15, %subreg.lo16
     %17:vreg_64 = V_LSHLREV_B64 2, killed %16, implicit $exec
     %9:sreg_32_xm0 = COPY %3.sub1
-    %21:vgpr_32 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec
+    %21:vgpr_32 = V_ADD_CO_U32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec
     %19:vgpr_32 = COPY killed %9
     %18:vgpr_32 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def $vcc, implicit $vcc, implicit $exec
     %20:vreg_64 = REG_SEQUENCE %21, %subreg.hi16, killed %18, %subreg.lo16
@@ -330,7 +330,7 @@ body:             |
     %16:vreg_64 = REG_SEQUENCE %2, %subreg.hi16, %15, %subreg.lo16
     %17:vreg_64 = V_LSHLREV_B64 2, killed %16, implicit $exec
     %9:sreg_32_xm0 = COPY %3.sub1
-    %21:vgpr_32 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec
+    %21:vgpr_32 = V_ADD_CO_U32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec
     %19:vgpr_32 = COPY killed %9
     %18:vgpr_32 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def $vcc, implicit $vcc, implicit $exec
     %20:vreg_64 = REG_SEQUENCE %21, %subreg.hi16, killed %18, %subreg.lo16
@@ -580,7 +580,7 @@ body:             |
     %14:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %13, %subreg.sub1
     %15:vreg_64 = V_LSHLREV_B64 2, killed %14, implicit $exec
     %5:sreg_32_xm0 = COPY %4.sub1
-    %20:vgpr_32 = V_ADD_I32_e32 %4.sub0, %15.sub0, implicit-def $vcc, implicit $exec
+    %20:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %15.sub0, implicit-def $vcc, implicit $exec
     %18:vgpr_32 = COPY killed %5
     %17:vgpr_32 = V_ADDC_U32_e32 %15.sub1, %18, implicit-def $vcc, implicit $vcc, implicit $exec
     %19:vreg_64 = REG_SEQUENCE %20, %subreg.sub0, killed %17, %subreg.sub1
@@ -787,7 +787,7 @@ body:             |
     %16:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %15, %subreg.sub1
     %17:vreg_64 = V_LSHLREV_B64 2, killed %16, implicit $exec
     %9:sreg_32_xm0 = COPY %3.sub1
-    %21:vgpr_32 = V_ADD_I32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec
+    %21:vgpr_32 = V_ADD_CO_U32_e32 %3.sub0, %17.sub0, implicit-def $vcc, implicit $exec
     %19:vgpr_32 = COPY killed %9
     %18:vgpr_32 = V_ADDC_U32_e32 %17.sub1, %19, implicit-def $vcc, implicit $vcc, implicit $exec
     %20:vreg_64 = REG_SEQUENCE %21, %subreg.sub0, killed %18, %subreg.sub1
diff --git a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
index 6194515c3bec4..274f8ddc89443 100644
--- a/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
+++ b/llvm/test/CodeGen/AMDGPU/dpp_combine.mir
@@ -256,8 +256,8 @@ body: |
 # GCN: %7:vgpr_32 = V_AND_B32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
 # GCN: %10:vgpr_32 = V_MAX_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
 # GCN: %13:vgpr_32 = V_MIN_I32_dpp %1, %0, %1, 1, 15, 14, 0, implicit $exec
-# GCN: %16:vgpr_32 = V_SUBREV_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
-# GCN: %19:vgpr_32 = V_ADD_I32_e32 5, %18, implicit-def $vcc, implicit $exec
+# GCN: %16:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
+# GCN: %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec
 name: dpp_commute
 tracksRegLiveness: true
 body:             |
@@ -285,12 +285,12 @@ body:             |
 
     %14:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     %15:vgpr_32 = V_MOV_B32_dpp %14, %0, 1, 14, 15, 0, implicit $exec
-    %16:vgpr_32 = V_SUB_I32_e32 %1, %15, implicit-def $vcc, implicit $exec
+    %16:vgpr_32 = V_SUB_CO_U32_e32 %1, %15, implicit-def $vcc, implicit $exec
 
     ; this cannot be combined because immediate as src0 isn't commutable
     %17:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
     %18:vgpr_32 = V_MOV_B32_dpp %17, %0, 1, 14, 15, 0, implicit $exec
-    %19:vgpr_32 = V_ADD_I32_e32 5, %18, implicit-def $vcc, implicit $exec
+    %19:vgpr_32 = V_ADD_CO_U32_e32 5, %18, implicit-def $vcc, implicit $exec
 ...
 
 ---
@@ -356,8 +356,8 @@ body:             |
 
 # tests on sequences of dpp consumers
 # GCN-LABEL: name: dpp_seq
-# GCN: %4:vgpr_32 = V_ADD_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
-# GCN: %5:vgpr_32 = V_SUBREV_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
+# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
+# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
 # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
 # broken sequence:
 # GCN: %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
@@ -372,20 +372,20 @@ body: |
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 
     %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
-    %4:vgpr_32 = V_ADD_I32_e32 %3, %1, implicit-def $vcc, implicit $exec
-    %5:vgpr_32 = V_SUB_I32_e32 %1, %3, implicit-def $vcc, implicit $exec
+    %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec
+    %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec
     %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
 
     %7:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
-    %8:vgpr_32 = V_ADD_I32_e32 %7, %1, implicit-def $vcc, implicit $exec
+    %8:vgpr_32 = V_ADD_CO_U32_e32 %7, %1, implicit-def $vcc, implicit $exec
     ; this breaks the sequence
-    %9:vgpr_32 = V_SUB_I32_e32 5, %7, implicit-def $vcc, implicit $exec
+    %9:vgpr_32 = V_SUB_CO_U32_e32 5, %7, implicit-def $vcc, implicit $exec
 ...
 
 # tests on sequences of dpp consumers followed by control flow
 # GCN-LABEL: name: dpp_seq_cf
-# GCN: %4:vgpr_32 = V_ADD_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
-# GCN: %5:vgpr_32 = V_SUBREV_I32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
+# GCN: %4:vgpr_32 = V_ADD_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
+# GCN: %5:vgpr_32 = V_SUBREV_CO_U32_dpp %1, %0, %1, 1, 14, 15, 0, implicit-def $vcc, implicit $exec
 # GCN: %6:vgpr_32 = V_OR_B32_dpp %1, %0, %1, 1, 14, 15, 0, implicit $exec
 
 name: dpp_seq_cf
@@ -399,8 +399,8 @@ body: |
     %2:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 
     %3:vgpr_32 = V_MOV_B32_dpp %2, %0, 1, 14, 15, 0, implicit $exec
-    %4:vgpr_32 = V_ADD_I32_e32 %3, %1, implicit-def $vcc, implicit $exec
-    %5:vgpr_32 = V_SUB_I32_e32 %1, %3, implicit-def $vcc, implicit $exec
+    %4:vgpr_32 = V_ADD_CO_U32_e32 %3, %1, implicit-def $vcc, implicit $exec
+    %5:vgpr_32 = V_SUB_CO_U32_e32 %1, %3, implicit-def $vcc, implicit $exec
     %6:vgpr_32 = V_OR_B32_e32 %3, %1, implicit $exec
 
     %7:sreg_64 = V_CMP_EQ_U32_e64 %5, %6, implicit $exec
@@ -607,7 +607,7 @@ body: |
 # GCN: %2:vgpr_32 = V_MOV_B32_e32 5, implicit $exec
 # GCN: %9:vgpr_32 = IMPLICIT_DEF
 # GCN: %8:vgpr_32 = IMPLICIT_DEF
-# GCN: %6:vgpr_32 = V_ADD_I32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %9, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
 # GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
 name: dpp_reg_sequence_both_combined
 tracksRegLiveness: true
@@ -621,7 +621,7 @@ body: |
     %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
-    %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+    %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
 ...
 
@@ -632,7 +632,7 @@ body: |
 # GCN: %8:vgpr_32 = IMPLICIT_DEF
 # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE undef %3:vgpr_32, %subreg.sub0, %4, %subreg.sub1
-# GCN: %6:vgpr_32 = V_ADD_I32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+# GCN: %6:vgpr_32 = V_ADD_CO_U32_dpp %8, %1.sub0, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
 # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
 name: dpp_reg_sequence_first_combined
 tracksRegLiveness: true
@@ -646,7 +646,7 @@ body: |
     %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
-    %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+    %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
 ...
 
@@ -657,7 +657,7 @@ body: |
 # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
 # GCN: %8:vgpr_32 = IMPLICIT_DEF
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, undef %4:vgpr_32, %subreg.sub1
-# GCN: %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
 # GCN: %7:vgpr_32 = V_ADDC_U32_dpp %8, %1.sub1, %2, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
 name: dpp_reg_sequence_second_combined
 tracksRegLiveness: true
@@ -671,7 +671,7 @@ body: |
     %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
     %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
-    %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+    %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
 ...
 
@@ -682,7 +682,7 @@ body: |
 # GCN: %3:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
 # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
-# GCN: %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN: %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
 # GCN: %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
 name: dpp_reg_sequence_none_combined
 tracksRegLiveness: true
@@ -696,7 +696,7 @@ body: |
     %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 1, 1, 1, implicit $exec
     %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 1, 1, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
-    %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+    %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
 ...
 
@@ -709,7 +709,7 @@ body: |
 # GCN:   %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
 # GCN:   S_BRANCH %bb.1
 # GCN: bb.1:
-# GCN:   %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN:   %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %2, implicit-def $vcc, implicit $exec
 # GCN:   %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
 name: dpp_reg_sequence_exec_changed
 tracksRegLiveness: true
@@ -726,7 +726,7 @@ body: |
     S_BRANCH %bb.1
 
   bb.1:
-    %6:vgpr_32 = V_ADD_I32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
+    %6:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %5, implicit-def $vcc, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %5, implicit-def $vcc, implicit $vcc, implicit $exec
 ...
 
@@ -738,7 +738,7 @@ body: |
 # GCN: %4:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
 # GCN: %5:vreg_64 = REG_SEQUENCE %3, %subreg.sub0, %4, %subreg.sub1
 # GCN: %6:vreg_64 = REG_SEQUENCE %5.sub0, %subreg.sub0, %5.sub1, %subreg.sub1
-# GCN: %7:vgpr_32 = V_ADD_I32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec
+# GCN: %7:vgpr_32 = V_ADD_CO_U32_e32 %6.sub0, %2, implicit-def $vcc, implicit $exec
 # GCN: %8:vgpr_32 = V_ADDC_U32_e32 %6.sub1, %2, implicit-def $vcc, implicit $vcc, implicit $exec
 name: dpp_reg_sequence_subreg
 tracksRegLiveness: true
@@ -753,12 +753,12 @@ body: |
     %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
     %5:vreg_64 = REG_SEQUENCE %4.sub0, %subreg.sub0, %4.sub1, %subreg.sub1
-    %6:vgpr_32 = V_ADD_I32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec
+    %6:vgpr_32 = V_ADD_CO_U32_e32 %5.sub0, %8, implicit-def $vcc, implicit $exec
     %7:vgpr_32 = V_ADDC_U32_e32 %5.sub1, %8, implicit-def $vcc, implicit $vcc, implicit $exec
 ...
 
 # GCN-LABEL: name: dpp64_add64_impdef
-# GCN: %3:vgpr_32 = V_ADD_I32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp %1.sub0, %0.sub0, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
 # GCN: %5:vgpr_32 = V_ADDC_U32_dpp %1.sub1, %0.sub1, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
 name: dpp64_add64_impdef
 tracksRegLiveness: true
@@ -767,33 +767,33 @@ body: |
     %0:vreg_64 = IMPLICIT_DEF
     %1:vreg_64 = IMPLICIT_DEF
     %2:vreg_64 = V_MOV_B64_DPP_PSEUDO %1:vreg_64, %0:vreg_64, 1, 15, 15, 1, implicit $exec
-    %5:vgpr_32 = V_ADD_I32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec
+    %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec
     %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
 ...
 
 # GCN-LABEL: name:  dpp64_add64_undef
-# GCN: %3:vgpr_32 = V_ADD_I32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
 # GCN: %5:vgpr_32 = V_ADDC_U32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $vcc, implicit $exec
 name: dpp64_add64_undef
 tracksRegLiveness: true
 body: |
   bb.0:
     %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec
-    %5:vgpr_32 = V_ADD_I32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec
+    %5:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %4:vgpr_32, implicit-def $vcc, implicit $exec
     %6:vgpr_32 = V_ADDC_U32_e32 %2.sub1, undef %4, implicit-def $vcc, implicit $vcc, implicit $exec
 ...
 
 # GCN-LABEL: name: dpp64_add64_first_combined
 # GCN: %8:vgpr_32 = V_MOV_B32_dpp undef %1.sub1:vreg_64, undef %2.sub1:vreg_64, 1, 15, 15, 1, implicit $exec
 # GCN: %0:vreg_64 = REG_SEQUENCE undef %7:vgpr_32, %subreg.sub0, %8, %subreg.sub1
-# GCN: %3:vgpr_32 = V_ADD_I32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
+# GCN: %3:vgpr_32 = V_ADD_CO_U32_dpp undef %1.sub0:vreg_64, undef %2.sub0:vreg_64, undef %4:vgpr_32, 1, 15, 15, 1, implicit-def $vcc, implicit $exec
 # GCN: %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %0.sub1, undef $vcc, 0, implicit $exec
 name: dpp64_add64_first_combined
 tracksRegLiveness: true
 body: |
   bb.0:
     %2:vreg_64 = V_MOV_B64_DPP_PSEUDO undef %1:vreg_64, undef %0:vreg_64, 1, 15, 15, 1, implicit $exec
-    %4:vgpr_32 = V_ADD_I32_e32 %2.sub0, undef %3:vgpr_32, implicit-def $vcc, implicit $exec
+    %4:vgpr_32 = V_ADD_CO_U32_e32 %2.sub0, undef %3:vgpr_32, implicit-def $vcc, implicit $exec
     %5:vgpr_32, dead %6:sreg_64_xexec = V_ADDC_U32_e64 1, %2.sub1, undef $vcc, 0, implicit $exec
 ...
 
@@ -848,7 +848,7 @@ body: |
 ...
 
 # GCN-LABEL: name: dont_combine_more_than_one_operand_dpp_reg_sequence
-# GCN: %5:vgpr_32 = V_ADD_I32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec
+# GCN: %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec
 # GCN: %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
 name: dont_combine_more_than_one_operand_dpp_reg_sequence
 tracksRegLiveness: true
@@ -860,6 +860,6 @@ body: |
     %2:vgpr_32 = V_MOV_B32_dpp %0.sub0, %1.sub0, 1, 15, 15, 1, implicit $exec
     %3:vgpr_32 = V_MOV_B32_dpp %0.sub1, %1.sub1, 1, 15, 15, 1, implicit $exec
     %4:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %3, %subreg.sub1
-    %5:vgpr_32 = V_ADD_I32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec
+    %5:vgpr_32 = V_ADD_CO_U32_e32 %4.sub0, %4.sub0, implicit-def $vcc, implicit $exec
     %6:vgpr_32 = V_ADDC_U32_e32 %4.sub1, %4.sub1, implicit-def $vcc, implicit $vcc, implicit $exec
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
index 95a878c1997ff..3c0c5715420e9 100644
--- a/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
+++ b/llvm/test/CodeGen/AMDGPU/endpgm-dce.mir
@@ -298,7 +298,7 @@ body:             |
 ...
 
 # GCN-LABEL: name: implicit_use_on_S_ENDPGM 0
-# GCN: V_ADD_I32
+# GCN: V_ADD_CO_U32
 # GCN: COPY
 # GCN: V_ADDC_U32
 # GCN: S_ENDPGM 0, implicit %3
@@ -307,7 +307,7 @@ tracksRegLiveness: true
 
 body:             |
   bb.0:
-    dead %0:vgpr_32 = V_ADD_I32_e32 12345, undef %1:vgpr_32, implicit-def $vcc, implicit $exec
+    dead %0:vgpr_32 = V_ADD_CO_U32_e32 12345, undef %1:vgpr_32, implicit-def $vcc, implicit $exec
     %2:sreg_64_xexec = COPY $vcc
     %3:vgpr_32, dead %4:sreg_64_xexec = V_ADDC_U32_e64 undef %5:vgpr_32, undef %6:vgpr_32, %2, 0, implicit $exec
     S_ENDPGM 0, implicit %3
diff --git a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir
index 22775ec82714f..1315c227ecde3 100644
--- a/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir
+++ b/llvm/test/CodeGen/AMDGPU/fix-sgpr-copies.mir
@@ -1,7 +1,7 @@
 # RUN: llc -march=amdgcn -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
 
 # GCN-LABEL: name: fix-sgpr-copies
-# GCN: V_ADD_I32_e32
+# GCN: V_ADD_CO_U32_e32
 # GCN: V_ADDC_U32_e32
 ---
 name: fix-sgpr-copies
diff --git a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
index 3cc0f8d9a1d20..cfa623fa1ebf7 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
+++ b/llvm/test/CodeGen/AMDGPU/flat-load-clustering.mir
@@ -58,17 +58,17 @@ body:             |
     %4 = S_LOAD_DWORDX2_IMM %1, 8, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
     %7 = V_LSHLREV_B32_e32 2, %0, implicit $exec
     %2 = V_MOV_B32_e32 0, implicit $exec
-    undef %12.sub0 = V_ADD_I32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec
+    undef %12.sub0 = V_ADD_CO_U32_e32 %4.sub0, %7, implicit-def $vcc, implicit $exec
     %11 = COPY %4.sub1
     %12.sub1 = V_ADDC_U32_e32 %11, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
     %5 = FLAT_LOAD_DWORD %12, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep1)
-    undef %9.sub0 = V_ADD_I32_e32 %3.sub0, %7, implicit-def $vcc, implicit $exec
+    undef %9.sub0 = V_ADD_CO_U32_e32 %3.sub0, %7, implicit-def $vcc, implicit $exec
     %8 = COPY %3.sub1
     %9.sub1 = V_ADDC_U32_e32 %8, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
-    undef %13.sub0 = V_ADD_I32_e32 16, %12.sub0, implicit-def $vcc, implicit $exec
+    undef %13.sub0 = V_ADD_CO_U32_e32 16, %12.sub0, implicit-def $vcc, implicit $exec
     %13.sub1 = V_ADDC_U32_e32 %12.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
     %6 = FLAT_LOAD_DWORD %13, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.gep34)
-    undef %10.sub0 = V_ADD_I32_e32 16, %9.sub0, implicit-def $vcc, implicit $exec
+    undef %10.sub0 = V_ADD_CO_U32_e32 16, %9.sub0, implicit-def $vcc, implicit $exec
     %10.sub1 = V_ADDC_U32_e32 %9.sub1, %2, implicit-def dead $vcc, implicit killed $vcc, implicit $exec
     FLAT_STORE_DWORD %9, %5, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep2)
     FLAT_STORE_DWORD %10, %6, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.gep4)
diff --git a/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir
index d5058c026a10d..473193a2a3b4d 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-fi-operand-shrink.mir
@@ -16,11 +16,11 @@ body:             |
     ; GCN: liveins: $vgpr0
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     %1:vgpr_32 = COPY $vgpr0
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -40,11 +40,11 @@ body:             |
     ; GCN: liveins: $vgpr0
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[COPY]], [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -64,11 +64,11 @@ body:             |
     ; GCN: liveins: $sgpr0
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
-    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
+    ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
     %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     %1:sreg_32_xm0 = COPY $sgpr0
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -88,11 +88,11 @@ body:             |
     ; GCN: liveins: $sgpr0
     ; GCN: [[COPY:%[0-9]+]]:sreg_32_xm0 = COPY $sgpr0
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
+    ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[COPY]], 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
     %0:sreg_32_xm0 = COPY $sgpr0
     %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -112,11 +112,11 @@ body:             |
     ; GCN: liveins: $vgpr0
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 %stack.0
     %1:vgpr_32 = COPY $vgpr0
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -136,11 +136,11 @@ body:             |
     ; GCN: liveins: $vgpr0
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 %stack.0
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = COPY $vgpr0
     %1:sreg_32_xm0 = S_MOV_B32 %stack.0
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -157,11 +157,11 @@ body:             |
 
     ; GCN-LABEL: name: shrink_vgpr_imm_fi_vgpr_v_add_i32_e64_no_carry_out_use
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 16, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -178,11 +178,11 @@ body:             |
 
     ; GCN-LABEL: name: shrink_vgpr_imm_vgpr_fi_v_add_i32_e64_no_carry_out_use
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 16, [[V_MOV_B32_e32_]], 0, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
+    ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 16, [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
     %0:vgpr_32 = V_MOV_B32_e32 16, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -199,11 +199,11 @@ body:             |
 
     ; GCN-LABEL: name: shrink_vgpr_k_fi_vgpr_v_add_i32_e64_no_carry_out_use
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 1234, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -220,11 +220,11 @@ body:             |
 
     ; GCN-LABEL: name: shrink_vgpr_k_vgpr_fi_v_add_i32_e64_no_carry_out_use
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 %stack.0, [[V_MOV_B32_e32_]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = V_MOV_B32_e32 1234, implicit $exec
     %1:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
index c026c5c0e6b5d..079147083863b 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink-with-carry.mir
@@ -13,7 +13,7 @@ body:             |
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_other_carry_out_use
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc
     ; GCN: S_ENDPGM 0, implicit [[COPY]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
@@ -21,7 +21,7 @@ body:             |
     %2:vgpr_32 = IMPLICIT_DEF
     %3:vgpr_32 = IMPLICIT_DEF
 
-    %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %5
 
 ...
@@ -36,17 +36,17 @@ body:             |
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
-    ; GCN: [[V_ADD_I32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF1]], 0, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_1]], implicit [[V_ADD_I32_e64_2]]
+    ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
+    ; GCN: [[V_ADD_CO_U32_e64_2:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_3:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF1]], 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_1]], implicit [[V_ADD_CO_U32_e64_2]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
     %2:vgpr_32 = IMPLICIT_DEF
     %3:vgpr_32 = IMPLICIT_DEF
     %4:vgpr_32 = IMPLICIT_DEF
 
-    %5:vgpr_32, %6:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec
-    %7:vgpr_32, %8:sreg_64_xexec = V_ADD_I32_e64 %0, %2, 0, implicit $exec
+    %5:vgpr_32, %6:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
+    %7:vgpr_32, %8:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %2, 0, implicit $exec
     S_ENDPGM 0, implicit %6, implicit %7
 
 ...
@@ -62,15 +62,15 @@ body:             |
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_dbg_only_carry_out_use
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN: DBG_VALUE %5:sreg_64_xexec, $noreg
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
     %2:vgpr_32 = IMPLICIT_DEF
     %3:vgpr_32 = IMPLICIT_DEF
 
-    %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     DBG_VALUE %5, $noreg
     S_ENDPGM 0, implicit %4
 
@@ -90,7 +90,7 @@ body:             |
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN: [[DEF2:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN: [[COPY:%[0-9]+]]:sreg_64_xexec = COPY killed $vcc
     ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 [[DEF1]], [[DEF2]], [[COPY]], 0, implicit $exec
     ; GCN: S_ENDPGM 0, implicit [[V_ADDC_U32_e64_]]
@@ -99,7 +99,7 @@ body:             |
     %2:vgpr_32 = IMPLICIT_DEF
     %3:vgpr_32 = IMPLICIT_DEF
 
-    %4:vgpr_32, %5:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %4:vgpr_32, %5:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     %6:vgpr_32, %7:sreg_64_xexec = V_ADDC_U32_e64 %2, %3, %5, 0, implicit $exec
     S_ENDPGM 0, implicit %6
 
diff --git a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
index 865c84ad8fce4..aec3f28f12932 100644
--- a/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
+++ b/llvm/test/CodeGen/AMDGPU/fold-immediate-operand-shrink.mir
@@ -11,11 +11,11 @@ body:             |
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_no_carry_out_use
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -30,11 +30,11 @@ body:             |
     ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_add_i32_e64_no_carry_out_use
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32_xm0 = S_MOV_B32 12345
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -48,11 +48,11 @@ body:             |
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_add_i32_e64_carry_out_use
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -70,11 +70,11 @@ body:             |
     ; GCN-LABEL: name: shrink_vector_imm_sgpr_v_add_i32_e64_no_carry_out_use
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
     ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
-    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[DEF]], [[V_MOV_B32_e32_]], 0, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
+    ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[DEF]], [[V_MOV_B32_e32_]], 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
     %0:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
     %1:sreg_32_xm0 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -89,11 +89,11 @@ body:             |
     ; GCN-LABEL: name: shrink_sgpr_vector_imm_v_add_i32_e64_no_carry_out_use
     ; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[DEF]], 0, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]]
+    ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[V_MOV_B32_e32_]], [[DEF]], 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]]
     %0:sreg_32_xm0 = IMPLICIT_DEF
     %1:vgpr_32 = V_MOV_B32_e32 12345, implicit $exec
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -109,12 +109,12 @@ body:             |
     ; GCN: $vcc = S_MOV_B64 -1
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc
+    ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit $vcc
     $vcc = S_MOV_B64 -1
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2, implicit $vcc
 
 ...
@@ -131,16 +131,16 @@ body:             |
   ; GCN:   $vcc = S_MOV_B64 -1
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
   ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN:   [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
+  ; GCN:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
   ; GCN: bb.1:
   ; GCN:   liveins: $vcc
-  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc
+  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit $vcc
   bb.0:
     successors: %bb.1
     $vcc = S_MOV_B64 -1
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
 
   bb.1:
     liveins: $vcc
@@ -158,16 +158,16 @@ body:             |
   ; GCN:   successors: %bb.1(0x80000000)
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
   ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN:   [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
+  ; GCN:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
   ; GCN: bb.1:
   ; GCN:   liveins: $vcc_lo
-  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_lo
+  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit $vcc_lo
   bb.0:
     successors: %bb.1
     $vcc = S_MOV_B64 -1
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
 
   bb.1:
     liveins: $vcc_lo
@@ -190,8 +190,8 @@ body:             |
   ; GCN:   liveins: $vcc
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
   ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN:   [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
-  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_lo
+  ; GCN:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
+  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit $vcc_lo
   bb.0:
     successors: %bb.1
     $vcc = S_MOV_B64 -1
@@ -200,7 +200,7 @@ body:             |
     liveins: $vcc
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2, implicit $vcc_lo
 
 ...
@@ -219,10 +219,10 @@ body:             |
   ; GCN:   liveins: $vcc_hi
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
   ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN:   [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_I32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
+  ; GCN:   [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64 = V_ADD_CO_U32_e64 [[S_MOV_B32_]], [[DEF]], 0, implicit $exec
   ; GCN: bb.2:
   ; GCN:   liveins: $vcc_hi
-  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_I32_e64_]], implicit $vcc_hi
+  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_CO_U32_e64_]], implicit $vcc_hi
   bb.0:
     successors: %bb.1
     $vcc_hi = S_MOV_B32 -1
@@ -231,7 +231,7 @@ body:             |
     liveins: $vcc_hi
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
 
   bb.2:
     liveins: $vcc_hi
@@ -250,11 +250,11 @@ body:             |
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_sub_i32_e64_no_carry_out_use
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]]
+    ; GCN: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_SUB_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -269,11 +269,11 @@ body:             |
     ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_sub_i32_e64_no_carry_out_use
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]]
+    ; GCN: [[V_SUBREV_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_CO_U32_e32_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32_xm0 = S_MOV_B32 12345
-    %2:vgpr_32, %3:sreg_64 = V_SUB_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_SUB_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -288,11 +288,11 @@ body:             |
     ; GCN-LABEL: name: shrink_scalar_imm_vgpr_v_subrev_i32_e64_no_carry_out_use
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_SUBREV_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_I32_e32_]]
+    ; GCN: [[V_SUBREV_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUBREV_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_SUBREV_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_SUBREV_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -307,11 +307,11 @@ body:             |
     ; GCN-LABEL: name: shrink_vgpr_scalar_imm_v_subrev_i32_e64_no_carry_out_use
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
-    ; GCN: [[V_SUB_I32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_SUB_I32_e32_]]
+    ; GCN: [[V_SUB_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_SUB_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_SUB_CO_U32_e32_]]
     %0:vgpr_32 = IMPLICIT_DEF
     %1:sreg_32_xm0 = S_MOV_B32 12345
-    %2:vgpr_32, %3:sreg_64 = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_SUBREV_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -329,9 +329,9 @@ body:             |
   ; GCN:   successors: %bb.1(0x80000000)
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
   ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN:   [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+  ; GCN:   [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
   ; GCN: bb.1:
-  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
   bb.0:
     successors: %bb.1
 
@@ -367,7 +367,7 @@ body:             |
     S_NOP 0
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_NOP 0
     S_NOP 0
 
@@ -390,16 +390,16 @@ body:             |
   ; GCN:   successors: %bb.1(0x80000000)
   ; GCN:   [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
   ; GCN:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; GCN:   [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+  ; GCN:   [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
   ; GCN: bb.1:
-  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+  ; GCN:   S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
   bb.0:
     successors: %bb.1
 
     S_NOP 0, implicit-def $vcc
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_NOP 0
     S_NOP 0
 
@@ -448,8 +448,8 @@ body:             |
     ; GCN: DBG_VALUE $noreg, 0
     ; GCN: DBG_VALUE $noreg, 0
     ; GCN: DBG_VALUE $noreg, 0
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
     DBG_VALUE $noreg, 0
@@ -480,7 +480,7 @@ body:             |
     DBG_VALUE $noreg, 0
     DBG_VALUE $noreg, 0
     DBG_VALUE $noreg, 0
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0, implicit %2
 
 ...
@@ -497,7 +497,7 @@ body:             |
     ; GCN-LABEL: name: vcc_liveness_dbg_value_search_after
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 12345
     ; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[S_MOV_B32_]], [[DEF]], implicit-def $vcc, implicit $exec
     ; GCN: DBG_VALUE $noreg, 0
     ; GCN: DBG_VALUE $noreg, 0
     ; GCN: DBG_VALUE $noreg, 0
@@ -526,7 +526,7 @@ body:             |
     ; GCN: DBG_VALUE $noreg, 0
     ; GCN: DBG_VALUE $noreg, 0
     ; GCN: DBG_VALUE $noreg, 0
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:sreg_32_xm0 = S_MOV_B32 12345
     %1:vgpr_32 = IMPLICIT_DEF
     S_NOP 0
@@ -557,7 +557,7 @@ body:             |
     S_NOP 0
     S_NOP 0
     S_NOP 0
-    %2:vgpr_32, %3:sreg_64 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     DBG_VALUE $noreg, 0
     DBG_VALUE $noreg, 0
     DBG_VALUE $noreg, 0
@@ -601,11 +601,11 @@ body:             |
     ; GCN: liveins: $vgpr0
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 killed [[V_MOV_B32_e32_]], [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
-    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 killed %1, %0, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 killed %1, %0, 0, implicit $exec
    S_ENDPGM 0, implicit %2
 ...
 
@@ -619,11 +619,11 @@ body:             |
     ; GCN: liveins: $vgpr0
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
-    ; GCN: [[V_ADD_I32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_I32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec
-    ; GCN: S_ENDPGM 0, implicit [[V_ADD_I32_e32_]]
+    ; GCN: [[V_ADD_CO_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_CO_U32_e32 [[V_MOV_B32_e32_]], killed [[COPY]], implicit-def $vcc, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit [[V_ADD_CO_U32_e32_]]
     %0:vgpr_32 = COPY $vgpr0
     %1:vgpr_32 = V_MOV_B32_e32 518144, implicit $exec
-    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %1, killed %0, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %1, killed %0, 0, implicit $exec
    S_ENDPGM 0, implicit %2
 ...
 
diff --git a/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir b/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir
index 9b6b086c5c42f..778b6fc1ae582 100644
--- a/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir
+++ b/llvm/test/CodeGen/AMDGPU/global-load-store-atomics.mir
@@ -103,7 +103,7 @@ body:             |
     %22:vgpr_32 = COPY %14.sub0
     %23:sgpr_32 = COPY %4.sub1
     %24:vgpr_32 = COPY %14.sub1
-    %17:vgpr_32, %19:sreg_64_xexec = V_ADD_I32_e64 %21, %22, 0, implicit $exec
+    %17:vgpr_32, %19:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %22, 0, implicit $exec
     %25:vgpr_32 = COPY %23
     %18:vgpr_32, dead %20:sreg_64_xexec = V_ADDC_U32_e64 %25, %24, killed %19, 0, implicit $exec
     %16:vreg_64 = REG_SEQUENCE %17, %subreg.sub0, %18, %subreg.sub1
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index a8c930d27c9be..64b7299c45d91 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -267,7 +267,7 @@ body: |
 # GCN-LABEL: name: readwrite_lane
 
 # GCN-LABEL: bb.0:
-# GCN: V_ADD_I32
+# GCN: V_ADD_CO_U32
 # GCN: S_NOP
 # GCN: S_NOP
 # GCN: S_NOP
@@ -275,7 +275,7 @@ body: |
 # GCN: V_READLANE_B32
 
 # GCN-LABEL: bb.1:
-# GCN: V_ADD_I32
+# GCN: V_ADD_CO_U32
 # GCN: S_NOP
 # GCN: S_NOP
 # GCN: S_NOP
@@ -283,7 +283,7 @@ body: |
 # GCN: V_WRITELANE_B32
 
 # GCN-LABEL: bb.2:
-# GCN: V_ADD_I32
+# GCN: V_ADD_CO_U32
 # GCN: S_NOP
 # GCN: S_NOP
 # GCN: S_NOP
@@ -291,7 +291,7 @@ body: |
 # GCN: V_READLANE_B32
 
 # GCN-LABEL: bb.3:
-# GCN: V_ADD_I32
+# GCN: V_ADD_CO_U32
 # GCN: S_NOP
 # GCN: S_NOP
 # GCN: S_NOP
@@ -302,23 +302,23 @@ name: readwrite_lane
 
 body: |
   bb.0:
-    $vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec
+    $vgpr0,$sgpr0_sgpr1 = V_ADD_CO_U32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec
     $sgpr4 = V_READLANE_B32 $vgpr4, $sgpr0
     S_BRANCH %bb.1
 
   bb.1:
-    $vgpr0,$sgpr0_sgpr1 = V_ADD_I32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec
+    $vgpr0,$sgpr0_sgpr1 = V_ADD_CO_U32_e64 $vgpr1, $vgpr2, implicit $vcc, 0, implicit $exec
     $vgpr4 = V_WRITELANE_B32 $sgpr0, $sgpr0, $vgpr4
     S_BRANCH %bb.2
 
   bb.2:
-    $vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+    $vgpr0,implicit $vcc = V_ADD_CO_U32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
     $sgpr4 = V_READLANE_B32 $vgpr4, $vcc_lo
     S_BRANCH %bb.3
 
   bb.3:
     $m0 = S_MOV_B32 $sgpr4
-    $vgpr0,implicit $vcc = V_ADD_I32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
+    $vgpr0,implicit $vcc = V_ADD_CO_U32_e32 $vgpr1, $vgpr2, implicit $vcc, implicit $exec
     $vgpr4 = V_WRITELANE_B32 $m0, $vcc_lo, $vgpr4
     S_ENDPGM 0
 
diff --git a/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir b/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
index a394d344cdd65..4ed0e400b7a75 100644
--- a/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
+++ b/llvm/test/CodeGen/AMDGPU/macro-fusion-cluster-vcc-uses.mir
@@ -2,7 +2,7 @@
 
 # GCN-LABEL: name: cluster_add_addc
 # GCN: S_NOP 0, implicit-def $vcc
-# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
 # GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %3, 0, implicit $exec
 name: cluster_add_addc
 registers:
@@ -19,7 +19,7 @@ body: |
   bb.0:
     %0 = V_MOV_B32_e32 0, implicit $exec
     %1 = V_MOV_B32_e32 0, implicit $exec
-    %2, %3 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2, %3 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     %6 = V_MOV_B32_e32 0, implicit $exec
     %7 = V_MOV_B32_e32 0, implicit $exec
     S_NOP 0, implicit def $vcc
@@ -27,9 +27,9 @@ body: |
 ...
 
 # GCN-LABEL: name: interleave_add64s
-# GCN: dead %8:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+# GCN: dead %8:vgpr_32, %9:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
 # GCN-NEXT: dead %12:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %4, %5, %9, 0, implicit $exec
-# GCN-NEXT: dead %10:vgpr_32, %11:sreg_64_xexec = V_ADD_I32_e64 %2, %3, 0, implicit $exec
+# GCN-NEXT: dead %10:vgpr_32, %11:sreg_64_xexec = V_ADD_CO_U32_e64 %2, %3, 0, implicit $exec
 # GCN-NEXT: dead %14:vgpr_32, dead %15:sreg_64_xexec = V_ADDC_U32_e64 %6, %7, %11, 0, implicit $exec
 name: interleave_add64s
 registers:
@@ -61,8 +61,8 @@ body: |
     %6 = V_MOV_B32_e32 0, implicit $exec
     %7 = V_MOV_B32_e32 0, implicit $exec
 
-    %8, %9 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
-    %10, %11 = V_ADD_I32_e64 %2, %3, 0, implicit $exec
+    %8, %9 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
+    %10, %11 = V_ADD_CO_U32_e64 %2, %3, 0, implicit $exec
 
 
     %12, %13 = V_ADDC_U32_e64 %4, %5, %9, 0, implicit $exec
@@ -93,7 +93,7 @@ body: |
 ...
 
 # GCN-LABEL: name: no_cluster_add_addc_diff_sgpr
-# GCN: dead %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+# GCN: dead %2:vgpr_32, dead %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
 # GCN-NEXT: %6:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 # GCN-NEXT: %7:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
 # GCN-NEXT: S_NOP 0, implicit-def $vcc
@@ -115,7 +115,7 @@ body: |
     %0 = V_MOV_B32_e32 0, implicit $exec
     %1 = V_MOV_B32_e32 0, implicit $exec
     %8 = S_MOV_B64 0
-    %2, %3 = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2, %3 = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     %6 = V_MOV_B32_e32 0, implicit $exec
     %7 = V_MOV_B32_e32 0, implicit $exec
     S_NOP 0, implicit def $vcc
@@ -123,7 +123,7 @@ body: |
 ...
 # GCN-LABEL: name: cluster_sub_subb
 # GCN: S_NOP 0, implicit-def $vcc
-# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUB_I32_e64 %0, %1, 0, implicit $exec
+# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUB_CO_U32_e64 %0, %1, 0, implicit $exec
 # GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_SUBB_U32_e64 %6, %7, %3, 0, implicit $exec
 name: cluster_sub_subb
 registers:
@@ -140,7 +140,7 @@ body: |
   bb.0:
     %0 = V_MOV_B32_e32 0, implicit $exec
     %1 = V_MOV_B32_e32 0, implicit $exec
-    %2, %3 = V_SUB_I32_e64 %0, %1, 0, implicit $exec
+    %2, %3 = V_SUB_CO_U32_e64 %0, %1, 0, implicit $exec
     %6 = V_MOV_B32_e32 0, implicit $exec
     %7 = V_MOV_B32_e32 0, implicit $exec
     S_NOP 0, implicit def $vcc
@@ -149,7 +149,7 @@ body: |
 
 # GCN-LABEL: name: cluster_subrev_subbrev
 # GCN: S_NOP 0, implicit-def $vcc
-# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec
+# GCN: dead %2:vgpr_32, %3:sreg_64_xexec = V_SUBREV_CO_U32_e64 %0, %1, 0, implicit $exec
 # GCN: dead %4:vgpr_32, dead %5:sreg_64_xexec = V_SUBBREV_U32_e64 %6, %7, %3, 0, implicit $exec
 name: cluster_subrev_subbrev
 registers:
@@ -166,7 +166,7 @@ body: |
   bb.0:
     %0 = V_MOV_B32_e32 0, implicit $exec
     %1 = V_MOV_B32_e32 0, implicit $exec
-    %2, %3 = V_SUBREV_I32_e64 %0, %1, 0, implicit $exec
+    %2, %3 = V_SUBREV_CO_U32_e64 %0, %1, 0, implicit $exec
     %6 = V_MOV_B32_e32 0, implicit $exec
     %7 = V_MOV_B32_e32 0, implicit $exec
     S_NOP 0, implicit def $vcc
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
index 99348a57b9f6a..a9545e6641587 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-atomics.mir
@@ -54,7 +54,7 @@ body:             |
 
     S_WAITCNT 127
     $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
-    $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
+    $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
     $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
     $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
     $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
index f52275af48c9e..6a037a77784e4 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-1.mir
@@ -148,7 +148,7 @@ body:             |
 
     S_WAITCNT 127
     $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
-    $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
+    $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
     $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
     $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
     $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
index c543b80454b62..0dfa137999f3a 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-multiple-mem-operands-nontemporal-2.mir
@@ -128,7 +128,7 @@ body:             |
 
     S_WAITCNT 127
     $sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
-    $vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
+    $vgpr0 = V_ADD_CO_U32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
     $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
     $vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
     $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir
index 5e13ed178aaa0..deee04f7ce213 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-load-store-vreg.mir
@@ -8,9 +8,9 @@
 
 # GCN-LABEL: name: ds_combine_base_offset{{$}}
 
-# VI: V_ADD_I32_e64 %6, %0,
+# VI: V_ADD_CO_U32_e64 %6, %0,
 # VI-NEXT: DS_WRITE2_B32 killed %7, %0, %3, 0, 8,
-# VI: V_ADD_I32_e64 %10, %3,
+# VI: V_ADD_CO_U32_e64 %10, %3,
 # VI-NEXT: DS_READ2_B32 killed %11, 0, 8,
 
 # GFX9: V_ADD_U32_e64 %6, %0,
@@ -91,9 +91,9 @@ body:             |
 
 # GCN-LABEL: name: ds_combine_base_offset_subreg{{$}}
 
-# VI: V_ADD_I32_e64 %6, %0.sub0,
+# VI: V_ADD_CO_U32_e64 %6, %0.sub0,
 # VI-NEXT: DS_WRITE2_B32 killed %7, %0.sub0, %3.sub0, 0, 8,
-# VI: V_ADD_I32_e64 %10, %3.sub0,
+# VI: V_ADD_CO_U32_e64 %10, %3.sub0,
 # VI-NEXT: DS_READ2_B32 killed %11, 0, 8,
 
 # GFX9: V_ADD_U32_e64 %6, %0.sub0,
diff --git a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
index dcc58ba956e30..743594b91bbe2 100644
--- a/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
+++ b/llvm/test/CodeGen/AMDGPU/merge-load-store.mir
@@ -107,7 +107,7 @@ body:             |
     %6:vreg_64 = DS_READ2_B32 %1, 16, 17, 0, implicit $m0, implicit $exec :: (load 8 from %ir.ptr.64, align 4)
     %3:vgpr_32 = COPY %6.sub0
     %4:vgpr_32 = DS_READ_B32 %1, 4, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.4)
-    %5:vgpr_32 = V_ADD_I32_e32 killed %3, killed %4, implicit-def $vcc, implicit $exec
+    %5:vgpr_32 = V_ADD_CO_U32_e32 killed %3, killed %4, implicit-def $vcc, implicit $exec
     DS_WRITE_B32 killed %1, %5, 0, 0, implicit killed $m0, implicit $exec :: (store 4 into %ir.ptr.0)
     S_ENDPGM 0
 
diff --git a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
index be3e6284b103b..8efdccadf6273 100644
--- a/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
+++ b/llvm/test/CodeGen/AMDGPU/mubuf-legalize-operands.mir
@@ -223,7 +223,7 @@ body:             |
 # ADDR64: %16:sgpr_32 = S_MOV_B32 0
 # ADDR64: %17:sgpr_32 = S_MOV_B32 61440
 # ADDR64: %18:sgpr_128 = REG_SEQUENCE %15, %subreg.sub0_sub1, %16, %subreg.sub2, %17, %subreg.sub3
-# ADDR64: %9:vgpr_32, %12:sreg_64_xexec = V_ADD_I32_e64 %14.sub0, %4.sub0, 0, implicit $exec
+# ADDR64: %9:vgpr_32, %12:sreg_64_xexec = V_ADD_CO_U32_e64 %14.sub0, %4.sub0, 0, implicit $exec
 # ADDR64: %10:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %14.sub1, %4.sub1, killed %12, 0, implicit $exec
 # ADDR64: %11:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1
 # ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
index cbb5fa2b68e00..a591713b0b4f3 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-sgpr-carry-out.mir
@@ -117,7 +117,7 @@ body:             |
     ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31, implicit-def $vcc
     ; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
     ; CHECK: $sgpr28 = S_MOV_B32 8192
-    ; CHECK: $vgpr2, dead $sgpr28_sgpr29 = V_ADD_I32_e64 killed $sgpr28, killed $vgpr2, 0, implicit $exec
+    ; CHECK: $vgpr2, dead $sgpr28_sgpr29 = V_ADD_CO_U32_e64 killed $sgpr28, killed $vgpr2, 0, implicit $exec
     ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31
     ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
     ; CHECK: $sgpr33 = frame-setup COPY $sgpr27
@@ -156,7 +156,7 @@ body:             |
     ; CHECK: S_NOP 0, implicit-def $sgpr4, implicit-def $sgpr5, implicit-def $sgpr6, implicit-def $sgpr7, implicit-def $sgpr8, implicit-def $sgpr9, implicit-def $sgpr10, implicit-def $sgpr11, implicit-def $sgpr12, implicit-def $sgpr13, implicit-def $sgpr14, implicit-def $sgpr15, implicit-def $sgpr16, implicit-def $sgpr17, implicit-def $sgpr18, implicit-def $sgpr19, implicit-def $sgpr20, implicit-def $sgpr21, implicit-def $sgpr22, implicit-def $sgpr23, implicit-def $sgpr24, implicit-def $sgpr25, implicit-def $sgpr26, implicit-def $sgpr17, implicit-def $sgpr30, implicit-def $sgpr31
     ; CHECK: $vgpr2 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
     ; CHECK: $vcc_lo = S_MOV_B32 8192
-    ; CHECK: $vgpr2, dead $vcc = V_ADD_I32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
+    ; CHECK: $vgpr2, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr2, 0, implicit $exec
     ; CHECK: $vgpr0 = V_OR_B32_e32 killed $vgpr2, $vgpr1, implicit $exec, implicit $sgpr4, implicit $sgpr5, implicit $sgpr6, implicit $sgpr7, implicit $sgpr8, implicit $sgpr9, implicit $sgpr10, implicit $sgpr11, implicit $sgpr12, implicit $sgpr13, implicit $sgpr14, implicit $sgpr15, implicit $sgpr16, implicit $sgpr17, implicit $sgpr18, implicit $sgpr19, implicit $sgpr20, implicit $sgpr21, implicit $sgpr22, implicit $sgpr23, implicit $sgpr24, implicit $sgpr25, implicit $sgpr26, implicit $sgpr17, implicit $sgpr31
     ; CHECK: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
     ; CHECK: $sgpr33 = frame-setup COPY $sgpr27
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
index 579ba6dfc3f93..96cd14e947e27 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -32,7 +32,7 @@ body:             |
     ; GFX8: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %stack.3, addrspace 5)
     ; GFX8: $vgpr3 = V_LSHRREV_B32_e64 6, $sgpr33, implicit $exec
     ; GFX8: $vcc_lo = S_MOV_B32 8192
-    ; GFX8: $vgpr3, dead $vcc = V_ADD_I32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
+    ; GFX8: $vgpr3, dead $vcc = V_ADD_CO_U32_e64 killed $vcc_lo, killed $vgpr3, 0, implicit $exec
     ; GFX8: $vgpr0 = V_OR_B32_e32 killed $vgpr3, $vgpr1, implicit $exec
     ; GFX8: $sgpr32 = frame-destroy S_SUB_U32 $sgpr32, 1572864, implicit-def $scc
     ; GFX8: $sgpr33 = V_READLANE_B32_vi $vgpr2, 0
diff --git a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir
index 9059c8edf3e48..1a2d187bed825 100644
--- a/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir
+++ b/llvm/test/CodeGen/AMDGPU/phi-elimination-end-cf.mir
@@ -37,7 +37,7 @@ body:             |
     %24:sreg_64 = PHI %20, %bb.3, %22, %bb.0
     %23:vgpr_32 = PHI %19, %bb.3, %18, %bb.0
     SI_END_CF %24, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
-    %3:vgpr_32, dead %10:sreg_64 = nsw V_ADD_I32_e64 1, %23, 0, implicit $exec
+    %3:vgpr_32, dead %10:sreg_64 = nsw V_ADD_CO_U32_e64 1, %23, 0, implicit $exec
 
   bb.3:
     successors: %bb.3(0x40000000), %bb.2(0x40000000)
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir
index c4fd98098032b..cde23cb76089f 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm-gfx10.mir
@@ -23,20 +23,20 @@ body:             |
     %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
     %12:sgpr_32 = COPY %1.sub1
     %13:vgpr_32 = COPY %5
-    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
     %16:vgpr_32 = COPY %12
     %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
     %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
     %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
-    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
     %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
     %25:sgpr_32 = S_MOV_B32 4096
-    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec
+    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
     %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
     %32:sgpr_32 = S_MOV_B32 6144
-    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
+    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
     %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
     %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
     %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
@@ -45,15 +45,15 @@ body:             |
 
 # GFX10-LABEL: name: LowestInMiddle
 # GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 6400
-# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
-# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]]
+# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
+# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
 # GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
 # GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 1600, 0, 0
 # GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0,
 #
 # GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 11200
-# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
-# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]]
+# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
+# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
 # GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
 # GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0,
 
@@ -76,25 +76,25 @@ body:             |
     %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
     %12:sgpr_32 = COPY %1.sub1
     %13:vgpr_32 = COPY %5
-    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
     %16:vgpr_32 = COPY %12
     %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
     %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
     %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
-    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
     %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
     %25:sgpr_32 = S_MOV_B32 8000
-    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
+    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
     %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
     %32:sgpr_32 = S_MOV_B32 6400
-    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
+    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
     %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
     %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
     %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
     %39:sgpr_32 = S_MOV_B32 11200
-    %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
+    %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
     %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
     %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
     %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec
@@ -103,14 +103,14 @@ body:             |
 
 # GFX10-LABEL: name: NegativeDistance
 # GFX10: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 8192
-# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
-# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]]
+# GFX10: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
+# GFX10: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
 # GFX10: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
 # GFX10: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0, 0
 # GFX10: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0
 # GFX10: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 10240
-# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
-# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]]
+# GFX10: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
+# GFX10: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_32_xm0_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
 # GFX10: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
 # GFX10: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0
 
@@ -133,25 +133,25 @@ body:             |
     %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
     %12:sgpr_32 = COPY %1.sub1
     %13:vgpr_32 = COPY %5
-    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
     %16:vgpr_32 = COPY %12
     %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
     %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
     %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
-    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
     %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
     %25:sgpr_32 = S_MOV_B32 6144
-    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
+    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
     %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
     %32:sgpr_32 = S_MOV_B32 8192
-    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
+    %33:vgpr_32, %34:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
     %35:vgpr_32, dead %36:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
     %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
     %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
     %39:sgpr_32 = S_MOV_B32 10240
-    %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
+    %40:vgpr_32, %41:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
     %42:vgpr_32, dead %43:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
     %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
     %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec
@@ -178,16 +178,16 @@ body:             |
     %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
     %12:sgpr_32 = COPY %1.sub1
     %13:vgpr_32 = COPY %5
-    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %14:vgpr_32, %15:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
     %16:vgpr_32 = COPY %12
     %17:vgpr_32, dead %18:sreg_32_xm0_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
     %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
     %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
-    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %21:vgpr_32, %22:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
     %23:vgpr_32, dead %24:sreg_32_xm0_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
 
     %25:sgpr_32 = S_MOV_B32 6144
-    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
+    %26:vgpr_32, %27:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_32_xm0_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
     %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
@@ -205,13 +205,13 @@ body:             |
     %0:vreg_64 = COPY $vgpr0_vgpr1
 
     %1:sgpr_32 = S_MOV_B32 4000
-    %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_I32_e64 %0.sub0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
     %4:vgpr_32, dead %5:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
     %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
     GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, 0, 0, implicit $exec
 
     %8:sgpr_32 = S_MOV_B32 3000
-    %9:vgpr_32, %10:sreg_32_xm0_xexec = V_ADD_I32_e64 %0.sub0, %8, 0, implicit $exec
+    %9:vgpr_32, %10:sreg_32_xm0_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
     %11:vgpr_32, dead %12:sreg_32_xm0_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
     %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
     GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
index aa4bdfe238d68..ec1095040898a 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.mir
@@ -23,20 +23,20 @@ body:             |
     %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
     %12:sgpr_32 = COPY %1.sub1
     %13:vgpr_32 = COPY %5
-    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
     %16:vgpr_32 = COPY %12
     %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
     %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
     %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
-    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
     %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
     %25:sgpr_32 = S_MOV_B32 4096
-    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %25, %21, 0, implicit $exec
+    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %25, %21, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
     %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
     %32:sgpr_32 = S_MOV_B32 6144
-    %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
+    %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
     %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
     %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
     %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
@@ -45,14 +45,14 @@ body:             |
 
 # GFX9-LABEL: name: LowestInMiddle
 # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 11200
-# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
-# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]]
+# GFX9: [[BASE_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
+# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
 # GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE_LO]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -3200, 0, 0
 #
 # GFX9: [[S_MOV_B32_2:%[0-9]+]]:sgpr_32 = S_MOV_B32 6400
-# GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
-# GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_7]]
+# GFX9: [[BASE1_LO:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_7:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_2]]
+# GFX9: [[BASE1_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_7]]
 # GFX9: [[REG_SEQUENCE3:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[BASE1_LO]], %subreg.sub0, [[BASE1_HI]], %subreg.sub1
 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE3]], 0, 0, 0,
 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0,
@@ -76,25 +76,25 @@ body:             |
     %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
     %12:sgpr_32 = COPY %1.sub1
     %13:vgpr_32 = COPY %5
-    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
     %16:vgpr_32 = COPY %12
     %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
     %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
     %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
-    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
     %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
     %25:sgpr_32 = S_MOV_B32 8000
-    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
+    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
     %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
     %32:sgpr_32 = S_MOV_B32 6400
-    %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
+    %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
     %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
     %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
     %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
     %39:sgpr_32 = S_MOV_B32 11200
-    %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
+    %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
     %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
     %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
     %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec
@@ -103,9 +103,9 @@ body:             |
 
 # GFX9-LABEL: name: NegativeDistance
 # GFX9: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 10240
-# GFX9: [[V_ADD_I32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
-# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_I32_e64_5]]
-# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_I32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
+# GFX9: [[V_ADD_CO_U32_e64_4:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_5:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 %{{[0-9]+}}, [[S_MOV_B32_1]]
+# GFX9: [[BASE_HI:%[0-9]+]]:vgpr_32, dead %{{[0-9]+}}:sreg_64_xexec = V_ADDC_U32_e64 %{{[0-9]+}}, 0, killed [[V_ADD_CO_U32_e64_5]]
+# GFX9: [[REG_SEQUENCE2:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_ADD_CO_U32_e64_4]], %subreg.sub0, [[BASE_HI]], %subreg.sub1
 # GFX9: [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -4096, 0, 0
 # GFX9: [[GLOBAL_LOAD_DWORDX2_1:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], -2048, 0, 0
 # GFX9: [[GLOBAL_LOAD_DWORDX2_2:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[REG_SEQUENCE2]], 0, 0, 0
@@ -129,25 +129,25 @@ body:             |
     %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
     %12:sgpr_32 = COPY %1.sub1
     %13:vgpr_32 = COPY %5
-    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
     %16:vgpr_32 = COPY %12
     %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
     %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
     %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
-    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
     %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
     %25:sgpr_32 = S_MOV_B32 6144
-    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
+    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
     %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
     %32:sgpr_32 = S_MOV_B32 8192
-    %33:vgpr_32, %34:sreg_64_xexec = V_ADD_I32_e64 %21, %32, 0, implicit $exec
+    %33:vgpr_32, %34:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %32, 0, implicit $exec
     %35:vgpr_32, dead %36:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %34, 0, implicit $exec
     %37:vreg_64 = REG_SEQUENCE %33, %subreg.sub0, %35, %subreg.sub1
     %38:vreg_64 = GLOBAL_LOAD_DWORDX2 %37, 0, 0, 0, 0, implicit $exec
     %39:sgpr_32 = S_MOV_B32 10240
-    %40:vgpr_32, %41:sreg_64_xexec = V_ADD_I32_e64 %21, %39, 0, implicit $exec
+    %40:vgpr_32, %41:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %39, 0, implicit $exec
     %42:vgpr_32, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %23, 0, killed %41, 0, implicit $exec
     %44:vreg_64 = REG_SEQUENCE %40, %subreg.sub0, %42, %subreg.sub1
     %45:vreg_64 = GLOBAL_LOAD_DWORDX2 %44, 0, 0, 0, 0, implicit $exec
@@ -174,16 +174,16 @@ body:             |
     %11:vgpr_32 = V_AND_B32_e32 -32768, killed %10, implicit $exec
     %12:sgpr_32 = COPY %1.sub1
     %13:vgpr_32 = COPY %5
-    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_I32_e64 %1.sub0, %11, 0, implicit $exec
+    %14:vgpr_32, %15:sreg_64_xexec = V_ADD_CO_U32_e64 %1.sub0, %11, 0, implicit $exec
     %16:vgpr_32 = COPY %12
     %17:vgpr_32, dead %18:sreg_64_xexec = V_ADDC_U32_e64 %16, %13, killed %15, 0, implicit $exec
     %19:vreg_64 = REG_SEQUENCE %14, %subreg.sub0, %17, %subreg.sub1
     %20:vreg_64 = V_LSHLREV_B64 3, %9, implicit $exec
-    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_I32_e64 %14, %20.sub0, 0, implicit $exec
+    %21:vgpr_32, %22:sreg_64_xexec = V_ADD_CO_U32_e64 %14, %20.sub0, 0, implicit $exec
     %23:vgpr_32, dead %24:sreg_64_xexec = V_ADDC_U32_e64 %17, %20.sub1, killed %22, 0, implicit $exec
 
     %25:sgpr_32 = S_MOV_B32 6144
-    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_I32_e64 %21, %25, 0, implicit $exec
+    %26:vgpr_32, %27:sreg_64_xexec = V_ADD_CO_U32_e64 %21, %25, 0, implicit $exec
     %28:vgpr_32, dead %29:sreg_64_xexec = V_ADDC_U32_e64 %23, 4294967295, killed %27, 0, implicit $exec
     %30:vreg_64 = REG_SEQUENCE %26, %subreg.sub0, %28, %subreg.sub1
     %31:vreg_64 = GLOBAL_LOAD_DWORDX2 %30, 0, 0, 0, 0, implicit $exec
@@ -201,13 +201,13 @@ body:             |
     %0:vreg_64 = COPY $vgpr0_vgpr1
 
     %1:sgpr_32 = S_MOV_B32 4000
-    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_I32_e64 %0.sub0, %1, 0, implicit $exec
+    %2:vgpr_32, %3:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %1, 0, implicit $exec
     %4:vgpr_32, dead %5:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %3, 0, implicit $exec
     %6:vreg_64 = REG_SEQUENCE %2, %subreg.sub0, %4, %subreg.sub1
     GLOBAL_STORE_DWORD %6, %0.sub0, 0, 0, 0, 0, implicit $exec
 
     %8:sgpr_32 = S_MOV_B32 3000
-    %9:vgpr_32, %10:sreg_64_xexec = V_ADD_I32_e64 %0.sub0, %8, 0, implicit $exec
+    %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 %0.sub0, %8, 0, implicit $exec
     %11:vgpr_32, dead %12:sreg_64_xexec = V_ADDC_U32_e64 %0.sub1, 0, %10, 0, implicit $exec
     %13:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %11, %subreg.sub1
     GLOBAL_STORE_DWORD %13, %0.sub1, 0, 0, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
index e4e33026da4b0..93129f20d5a9d 100644
--- a/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
+++ b/llvm/test/CodeGen/AMDGPU/regcoal-subrange-join-seg.mir
@@ -196,7 +196,7 @@ body:             |
     successors: %bb.30(0x30000000), %bb.36(0x50000000)
 
     %53 = COPY killed %62
-    %47 = V_ADD_I32_e32 -1, %46, implicit-def dead $vcc, implicit $exec
+    %47 = V_ADD_CO_U32_e32 -1, %46, implicit-def dead $vcc, implicit $exec
     %48 = V_OR_B32_e32 killed %47, %26, implicit $exec
     %49 = COPY %53
     %49.sub2 = COPY undef %48
diff --git a/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir
index 42f34646f6976..3b4c0a4ef28f8 100644
--- a/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir
+++ b/llvm/test/CodeGen/AMDGPU/s_add_co_pseudo_lowering.mir
@@ -18,12 +18,12 @@ body:             |
     ; GCN: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr2
     ; GCN: [[COPY6:%[0-9]+]]:sgpr_32 = COPY [[COPY3]]
     ; GCN: [[V_MUL_LO_U32_:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY4]], implicit $exec
-    ; GCN: [[V_ADD_I32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_I32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_I32_e64 killed [[V_MUL_LO_U32_]], [[COPY6]], 0, implicit $exec
+    ; GCN: [[V_ADD_CO_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADD_CO_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADD_CO_U32_e64 killed [[V_MUL_LO_U32_]], [[COPY6]], 0, implicit $exec
     ; GCN: [[S_MUL_HI_U32_:%[0-9]+]]:sreg_32 = S_MUL_HI_U32 [[COPY4]], [[COPY5]]
     ; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 -614296167
     ; GCN: [[V_MUL_LO_U32_1:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[COPY]], [[COPY3]], implicit $exec
     ; GCN: [[COPY7:%[0-9]+]]:vgpr_32 = COPY killed [[S_MOV_B32_]]
-    ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_1]], [[COPY7]], [[V_ADD_I32_e64_1]], 0, implicit $exec
+    ; GCN: [[V_ADDC_U32_e64_:%[0-9]+]]:vgpr_32, [[V_ADDC_U32_e64_1:%[0-9]+]]:sreg_64_xexec = V_ADDC_U32_e64 killed [[V_MUL_LO_U32_1]], [[COPY7]], [[V_ADD_CO_U32_e64_1]], 0, implicit $exec
     ; GCN: [[V_MUL_HI_U32_:%[0-9]+]]:vgpr_32 = V_MUL_HI_U32 [[COPY4]], [[V_ADDC_U32_e64_]], implicit $exec
     ; GCN: [[S_MOV_B32_1:%[0-9]+]]:sreg_32 = S_MOV_B32 -181084736
     ; GCN: [[V_MUL_LO_U32_2:%[0-9]+]]:vgpr_32 = V_MUL_LO_U32 [[V_MUL_HI_U32_]], [[S_MOV_B32_1]], implicit $exec
@@ -38,7 +38,7 @@ body:             |
     %5:sreg_32 = COPY $sgpr2
     %20:vgpr_32 = COPY %3
     %7:sreg_32 = S_MUL_I32 %6, %4
-    %9:vgpr_32, %10:sreg_64_xexec = V_ADD_I32_e64 killed %7, %20, 0, implicit $exec
+    %9:vgpr_32, %10:sreg_64_xexec = V_ADD_CO_U32_e64 killed %7, %20, 0, implicit $exec
     %8:sreg_32 = S_MUL_HI_U32 %4, %5
     %11:sreg_32 = S_MOV_B32 -614296167
     %12:sreg_32 = S_MUL_I32 %6, %3
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
index 79af30b8d59ca..02a58b704df0a 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
@@ -36,7 +36,7 @@ body:             |
   ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:VGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
   ; CHECK:   %11.sub0:vreg_512 = COPY [[COPY]].sub0
   ; CHECK:   %11.sub3:vreg_512 = COPY [[COPY]].sub3
-  ; CHECK:   dead %10:vgpr_32 = V_ADD_I32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
+  ; CHECK:   dead %10:vgpr_32 = V_ADD_CO_U32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
   ; CHECK:   %11.sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_]]
   ; CHECK:   %11.sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_]]
   ; CHECK:   [[COPY2:%[0-9]+]]:vreg_512 = COPY %11
@@ -55,7 +55,7 @@ body:             |
     %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
     %8:vreg_64 = DS_READ_B64_gfx9 %1, 0, 0, implicit $exec
     %9:vreg_128 = DS_READ_B128_gfx9 %2, 0, 0, implicit $exec
-    %10:vgpr_32 = V_ADD_I32_e32 4, %3, implicit-def dead $vcc, implicit $exec
+    %10:vgpr_32 = V_ADD_CO_U32_e32 4, %3, implicit-def dead $vcc, implicit $exec
     undef %11.sub0:vreg_512 = COPY %4.sub0
     %12:vgpr_32 = COPY %4.sub0
     %11.sub1:vreg_512 = COPY %4.sub1
diff --git a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
index aac40b73a41e0..6e0d016125c53 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-crash-dbg-value.mir
@@ -210,12 +210,12 @@ body:             |
     %12:sreg_32_xm0 = S_MUL_I32 %11, %10.sub1
     %13:vgpr_32 = V_MUL_LO_I32 0, %0, implicit $exec
     %14:vgpr_32 = V_MUL_LO_I32 %1, %10.sub1, implicit $exec
-    %15:vgpr_32 = V_ADD_I32_e32 0, %13, implicit-def dead $vcc, implicit $exec
-    %16:vgpr_32 = V_ADD_I32_e32 0, %15, implicit-def dead $vcc, implicit $exec
+    %15:vgpr_32 = V_ADD_CO_U32_e32 0, %13, implicit-def dead $vcc, implicit $exec
+    %16:vgpr_32 = V_ADD_CO_U32_e32 0, %15, implicit-def dead $vcc, implicit $exec
     %17:vgpr_32 = IMPLICIT_DEF
     %18:sreg_64 = S_MOV_B64 0
     %19:sreg_32_xm0_xexec = IMPLICIT_DEF
-    %20:vgpr_32 = V_ADD_I32_e32 %19, %0, implicit-def dead $vcc, implicit $exec
+    %20:vgpr_32 = V_ADD_CO_U32_e32 %19, %0, implicit-def dead $vcc, implicit $exec
     %21:vreg_64, dead %22:sreg_64 = V_MAD_I64_I32 %20, 12, %7, 0, implicit $exec
     %23:vgpr_32 = GLOBAL_LOAD_DWORD %21, 4, 0, 0, 0, implicit $exec
     %24:vreg_64, dead %25:sreg_64 = V_MAD_I64_I32 %20, 48, %8, 0, implicit $exec
@@ -236,21 +236,21 @@ body:             |
     undef %38.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %37.sub0, implicit $exec
     %38.sub0:vreg_64 = COPY %37.sub0
     %39:vreg_64 = V_LSHLREV_B64 3, %38, implicit $exec
-    undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_I32_e64 0, %39.sub0, 0, implicit $exec
+    undef %40.sub0:vreg_64, %41:sreg_64_xexec = V_ADD_CO_U32_e64 0, %39.sub0, 0, implicit $exec
     %42:vgpr_32 = COPY %33
     %40.sub1:vreg_64, dead %43:sreg_64_xexec = V_ADDC_U32_e64 %42, %39.sub1, %41, 0, implicit $exec
     %44:vreg_64 = GLOBAL_LOAD_DWORDX2 %40, 0, 0, 0, 0, implicit $exec :: (load 8 from %ir.tmp34)
     undef %45.sub1:vreg_64 = IMPLICIT_DEF
     %45.sub0:vreg_64 = COPY %37.sub1
     %46:vreg_64 = V_LSHLREV_B64 3, %45, implicit $exec
-    undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_I32_e64 %32, %46.sub0, 0, implicit $exec
+    undef %47.sub0:vreg_64, %48:sreg_64_xexec = V_ADD_CO_U32_e64 %32, %46.sub0, 0, implicit $exec
     %49:vgpr_32 = COPY %33
     %47.sub1:vreg_64, dead %50:sreg_64_xexec = V_ADDC_U32_e64 %49, %46.sub1, %48, 0, implicit $exec
     %51:vreg_64 = IMPLICIT_DEF
     undef %52.sub0:vreg_64 = GLOBAL_LOAD_DWORD %35, 40, 0, 0, 0, implicit $exec :: (load 4 from %ir.18 + 8)
     %52.sub1:vreg_64 = IMPLICIT_DEF
     %53:vreg_64 = V_LSHLREV_B64 3, %52, implicit $exec
-    undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_I32_e64 0, %53.sub0, 0, implicit $exec
+    undef %54.sub0:vreg_64, %55:sreg_64_xexec = V_ADD_CO_U32_e64 0, %53.sub0, 0, implicit $exec
     %56:vgpr_32 = COPY %33
     %54.sub1:vreg_64, dead %57:sreg_64_xexec = V_ADDC_U32_e64 0, %53.sub1, %55, 0, implicit $exec
     %58:vreg_64 = IMPLICIT_DEF
@@ -262,14 +262,14 @@ body:             |
     undef %63.sub1:vreg_64 = V_ASHRREV_I32_e32 31, %62.sub0, implicit $exec
     %63.sub0:vreg_64 = COPY %62.sub0
     %64:vreg_64 = IMPLICIT_DEF
-    undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_I32_e64 %60, %64.sub0, 0, implicit $exec
+    undef %65.sub0:vreg_64, %66:sreg_64_xexec = V_ADD_CO_U32_e64 %60, %64.sub0, 0, implicit $exec
     %67:vgpr_32 = COPY %61
     %65.sub1:vreg_64, dead %68:sreg_64_xexec = V_ADDC_U32_e64 %67, %64.sub1, %66, 0, implicit $exec
     %69:vreg_128 = GLOBAL_LOAD_DWORDX4 %65, 0, 0, 0, 0, implicit $exec :: (load 16 from %ir.tmp58)
     undef %70.sub1:vreg_64 = IMPLICIT_DEF
     %70.sub0:vreg_64 = IMPLICIT_DEF
     %71:vreg_64 = IMPLICIT_DEF
-    undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_I32_e64 %60, %71.sub0, 0, implicit $exec
+    undef %72.sub0:vreg_64, %73:sreg_64_xexec = V_ADD_CO_U32_e64 %60, %71.sub0, 0, implicit $exec
     %74:vgpr_32 = COPY %61
     %72.sub1:vreg_64, dead %75:sreg_64_xexec = V_ADDC_U32_e64 0, %71.sub1, %73, 0, implicit $exec
     %76:vreg_128 = GLOBAL_LOAD_DWORDX4 %72, 0, 0, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
index d0eea78bd2353..7d1661746087f 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-handleMoveUp-subreg-def-across-subreg-def.mir
@@ -64,7 +64,7 @@ body:             |
   ; CHECK:   [[V_ADD_U32_e32_1:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[V_SUB_U32_e32_]], [[DEF]].sub0, implicit $exec
   ; CHECK:   [[V_SUB_U32_e32_1:%[0-9]+]]:vgpr_32 = V_SUB_U32_e32 [[V_MUL_LO_U32_1]], [[V_MUL_LO_U32_]], implicit $exec
   ; CHECK:   [[DEF]].sub0:vreg_64 = V_ADD_U32_e32 [[V_SUB_U32_e32_1]], [[V_ADD_U32_e32_1]], implicit $exec
-  ; CHECK:   undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[DEF]].sub0, 0, implicit $exec
+  ; CHECK:   undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_CO_U32_e64 [[S_LOAD_DWORDX2_IMM]].sub0, [[DEF]].sub0, 0, implicit $exec
   ; CHECK:   undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 [[COPY1]], [[DEF]].sub1, %39, 0, implicit $exec
   ; CHECK:   undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
   ; CHECK:   %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
@@ -118,7 +118,7 @@ body:             |
     %37:vgpr_32 = COPY %3.sub1
     undef %8.sub0:vreg_64 = V_ADD_U32_e32 %36, %35, implicit $exec
     %8.sub1:vreg_64 = COPY %6
-    undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_I32_e64 %3.sub0, %8.sub0, 0, implicit $exec
+    undef %38.sub0:vreg_64, %39:sreg_64_xexec = V_ADD_CO_U32_e64 %3.sub0, %8.sub0, 0, implicit $exec
     undef %40.sub1:vreg_64, dead %41:sreg_64_xexec = V_ADDC_U32_e64 %37, %8.sub1, %39, 0, implicit $exec
     undef %42.sub0:sgpr_64 = V_READFIRSTLANE_B32 %38.sub0, implicit $exec
     %42.sub1:sgpr_64 = V_READFIRSTLANE_B32 %40.sub1, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
index 192bce362c4f9..2a202294e1995 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-gfx9.mir
@@ -6,12 +6,12 @@
 # GCN: [[SMOV:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 123
 
 # CI: [[SHIFT:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 16, %{{[0-9]+}}, implicit $exec
-# CI: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_e32 [[SMOV]], killed [[SHIFT]], implicit-def $vcc, implicit $exec
+# CI: %{{[0-9]+}}:vgpr_32 = V_ADD_CO_U32_e32 [[SMOV]], killed [[SHIFT]], implicit-def $vcc, implicit $exec
 
 # VI: [[VMOV:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 [[SMOV]], implicit $exec
-# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_sdwa 0, [[VMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def $vcc, implicit $exec
+# VI: %{{[0-9]+}}:vgpr_32 = V_ADD_CO_U32_sdwa 0, [[VMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def $vcc, implicit $exec
 
-# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_sdwa 0, [[SMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def $vcc, implicit $exec
+# GFX9: %{{[0-9]+}}:vgpr_32 = V_ADD_CO_U32_sdwa 0, [[SMOV]], 0, %{{[0-9]+}}, 0, 6, 0, 6, 5, implicit-def $vcc, implicit $exec
 
 ---
 name:            add_shr_i32
@@ -40,7 +40,7 @@ body:             |
     %3 = FLAT_LOAD_DWORD %1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4)
     %12 = S_MOV_B32 123
     %10 = V_LSHRREV_B32_e64 16, %3, implicit $exec
-    %11 = V_ADD_I32_e32 %12, killed %10, implicit-def $vcc, implicit $exec
+    %11 = V_ADD_CO_U32_e32 %12, killed %10, implicit-def $vcc, implicit $exec
     FLAT_STORE_DWORD %0, %11, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4)
     $sgpr30_sgpr31 = COPY %2
     S_SETPC_B64_return $sgpr30_sgpr31
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
index c181f51e747fd..2546775582c9e 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-ops.mir
@@ -3,11 +3,11 @@
 
 # test for 3 consecutive _sdwa's
 # GFX9-LABEL: name:            test1_add_co_sdwa
-# GFX9: = nsw V_ADD_I32_sdwa
+# GFX9: = nsw V_ADD_CO_U32_sdwa
 # GFX9-NEXT: = nuw V_ADDC_U32_e32
-# GFX9: V_ADD_I32_sdwa
+# GFX9: V_ADD_CO_U32_sdwa
 # GFX9-NEXT: V_ADDC_U32_e32
-# GFX9: V_ADD_I32_sdwa
+# GFX9: V_ADD_CO_U32_sdwa
 # GFX9-NEXT: V_ADDC_U32_e32
 ---
 name:            test1_add_co_sdwa
@@ -26,19 +26,19 @@ body:             |
     %22:sreg_32_xm0 = S_MOV_B32 255
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
     %30:vreg_64 = COPY $sgpr0_sgpr1
-    %63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = nsw V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
     %64:vgpr_32, dead %66:sreg_64_xexec = nuw V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
     GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
     %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
-    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec
+    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
     %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
     %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
     GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
     %171:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
-    %173:vgpr_32, %175:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %171, 0, implicit $exec
+    %173:vgpr_32, %175:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %171, 0, implicit $exec
     %174:vgpr_32, dead %176:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %175, 0, implicit $exec
     %172:vreg_64 = REG_SEQUENCE %173, %subreg.sub0, %174, %subreg.sub1
     GLOBAL_STORE_DWORDX2_SADDR %30, %172, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
@@ -47,9 +47,9 @@ body:             |
 
 # test for VCC interference on sdwa, should generate 1 xform only
 # GFX9-LABEL: name:            test2_add_co_sdwa
-# GFX9: V_ADD_I32_sdwa
+# GFX9: V_ADD_CO_U32_sdwa
 # GFX9: V_ADDC_U32_e32
-# GFX9-NOT: V_ADD_I32_sdwa
+# GFX9-NOT: V_ADD_CO_U32_sdwa
 # GFX9-NOT: V_ADDC_U32_e32
 ---
 name:            test2_add_co_sdwa
@@ -68,10 +68,10 @@ body:             |
     %22:sreg_32_xm0 = S_MOV_B32 255
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
     %30:vreg_64 = COPY $sgpr0_sgpr1
-    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
 
     %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
-    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec
+    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
     %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
     %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
 
@@ -80,7 +80,7 @@ body:             |
     GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
     %161:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
-    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %161, 0, implicit $exec
+    %163:vgpr_32, %165:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %161, 0, implicit $exec
     %164:vgpr_32, dead %166:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %165, 0, implicit $exec
     %162:vreg_64 = REG_SEQUENCE %163, %subreg.sub0, %164, %subreg.sub1
     GLOBAL_STORE_DWORDX2_SADDR %30, %162, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
@@ -89,9 +89,9 @@ body:             |
 
 # test for CarryOut used, should reject
 # GFX9-LABEL: name:            test3_add_co_sdwa
-# GFX9: V_ADD_I32_e64
+# GFX9: V_ADD_CO_U32_e64
 # GFX9: V_ADDC_U32_e64
-# GFX9-NOT: V_ADD_I32_sdwa
+# GFX9-NOT: V_ADD_CO_U32_sdwa
 # GFX9-NOT: V_ADDC_U32_e32
 ---
 name:            test3_add_co_sdwa
@@ -110,7 +110,7 @@ body:             |
     %22:sreg_32_xm0 = S_MOV_B32 255
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
     %30:vreg_64 = COPY $sgpr0_sgpr1
-    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, killed %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %66, %subreg.sub1
     GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
@@ -119,9 +119,9 @@ body:             |
 
 # test for CarryIn used more than once, should reject
 # GFX9-LABEL: name:            test4_add_co_sdwa
-# GFX9: V_ADD_I32_e64
+# GFX9: V_ADD_CO_U32_e64
 # GFX9: V_ADDC_U32_e64
-# GFX9-NOT: V_ADD_I32_sdwa
+# GFX9-NOT: V_ADD_CO_U32_sdwa
 # GFX9-NOT: V_ADDC_U32_e32
 ---
 name:            test4_add_co_sdwa
@@ -140,7 +140,7 @@ body:             |
     %22:sreg_32_xm0 = S_MOV_B32 255
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
     %30:vreg_64 = COPY $sgpr0_sgpr1
-    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %65, %subreg.sub1
     GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
@@ -150,7 +150,7 @@ body:             |
 
 # test for simple example, should generate sdwa
 # GFX9-LABEL: name:            test5_add_co_sdwa
-# GFX9: V_ADD_I32_sdwa
+# GFX9: V_ADD_CO_U32_sdwa
 # GFX9: V_ADDC_U32_e32
 ---
 name:            test5_add_co_sdwa
@@ -169,7 +169,7 @@ body:             |
     %22:sreg_32_xm0 = S_MOV_B32 255
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
     %30:vreg_64 = COPY $sgpr0_sgpr1
-    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
     GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
@@ -177,10 +177,10 @@ body:             |
 
 ...
 
-# test for V_ADD_I32_e64 only, should reject
+# test for V_ADD_CO_U32_e64 only, should reject
 # GFX9-LABEL: name:            test6_add_co_sdwa
-# GFX9: V_ADD_I32_e64
-# GFX9-NOT: V_ADD_I32_sdwa
+# GFX9: V_ADD_CO_U32_e64
+# GFX9-NOT: V_ADD_CO_U32_sdwa
 # GFX9-NOT: V_ADDC_U32_e32
 ---
 name:            test6_add_co_sdwa
@@ -199,7 +199,7 @@ body:             |
     %22:sreg_32_xm0 = S_MOV_B32 255
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
     %30:vreg_64 = COPY $sgpr0_sgpr1
-    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %23, %subreg.sub1
     GLOBAL_STORE_DWORDX2_SADDR %30, %62, %1, 0, 0, 0, 0, implicit $exec, implicit $exec :: (store 8)
 
@@ -209,7 +209,7 @@ body:             |
 # test for V_ADDC_U32_e64 only, should reject
 # GFX9-LABEL: name:            test7_add_co_sdwa
 # GFX9: V_ADDC_U32_e64
-# GFX9-NOT: V_ADD_I32_sdwa
+# GFX9-NOT: V_ADD_CO_U32_sdwa
 # GFX9-NOT: V_ADDC_U32_e32
 ---
 name:            test7_add_co_sdwa
@@ -239,7 +239,7 @@ body:             |
 
 # test for $vcc defined between two adds, should not generate
 # GFX9-LABEL: name:            test8_add_co_sdwa
-# GFX9-NOT: V_ADD_I32_sdwa
+# GFX9-NOT: V_ADD_CO_U32_sdwa
 # GFX9: V_ADDC_U32_e64
 ---
 name:            test8_add_co_sdwa
@@ -258,7 +258,7 @@ body:             |
     %22:sreg_32_xm0 = S_MOV_B32 255
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
     %30:vreg_64 = COPY $sgpr0_sgpr1
-    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
     $vcc = COPY %30
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %31:vreg_64 = COPY $vcc
@@ -270,7 +270,7 @@ body:             |
 
 # test for non dead $vcc, should not generate
 # GFX9-LABEL: name:            test9_add_co_sdwa
-# GFX9-NOT: V_ADD_I32_sdwa
+# GFX9-NOT: V_ADD_CO_U32_sdwa
 # GFX9: V_ADDC_U32_e64
 ---
 name:            test9_add_co_sdwa
@@ -290,7 +290,7 @@ body:             |
     %30:vreg_64 = COPY $sgpr0_sgpr1
     $vcc = COPY %30
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
-    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
     %31:vreg_64 = COPY $vcc
     %62:vreg_64 = REG_SEQUENCE %63, %subreg.sub0, %64, %subreg.sub1
@@ -300,7 +300,7 @@ body:             |
 
 # test for def $vcc_lo, should not generate
 # GFX9-LABEL: name:            test10_add_co_sdwa
-# GFX9-NOT: V_ADD_I32_sdwa
+# GFX9-NOT: V_ADD_CO_U32_sdwa
 # GFX9: V_ADDC_U32_e64
 ---
 name:            test10_add_co_sdwa
@@ -320,7 +320,7 @@ body:             |
     %30:vreg_64 = COPY $sgpr0_sgpr1
     $vcc_lo = COPY %30.sub0
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
-    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
     %31:vgpr_32 = COPY $vcc_lo
     %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
@@ -331,7 +331,7 @@ body:             |
 
 # test for read $vcc_hi, should not generate
 # GFX9-LABEL: name:            test11_add_co_sdwa
-# GFX9-NOT: V_ADD_I32_sdwa
+# GFX9-NOT: V_ADD_CO_U32_sdwa
 # GFX9: V_ADDC_U32_e64
 ---
 name:            test11_add_co_sdwa
@@ -351,7 +351,7 @@ body:             |
     %30:vreg_64 = COPY $sgpr0_sgpr1
     $vcc_hi = COPY %30.sub0
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
-    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
     %31:vgpr_32 = COPY $vcc_hi
     %32:vreg_64 = REG_SEQUENCE %31, %subreg.sub0, %23, %subreg.sub1
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
@@ -362,7 +362,7 @@ body:             |
 
 # test for $vcc defined and used between adds, should not generate
 # GFX9-LABEL: name:            test12_add_co_sdwa
-# GFX9-NOT: V_ADD_I32_sdwa
+# GFX9-NOT: V_ADD_CO_U32_sdwa
 # GFX9: V_ADDC_U32_e64
 ---
 name:            test12_add_co_sdwa
@@ -381,7 +381,7 @@ body:             |
     %22:sreg_32_xm0 = S_MOV_B32 255
     %30:vreg_64 = COPY $sgpr0_sgpr1
     %23:vgpr_32 = V_AND_B32_e32 %22, %0, implicit $exec
-    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_I32_e64 %30.sub0, %23, 0, implicit $exec
+    %63:vgpr_32, %65:sreg_64_xexec = V_ADD_CO_U32_e64 %30.sub0, %23, 0, implicit $exec
     $vcc = COPY %30
     %31:vreg_64 = COPY killed $vcc
     %64:vgpr_32, %66:sreg_64_xexec = V_ADDC_U32_e64 %30.sub1, %0, %65, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
index ed12cdd9d25b5..47469c5f9109f 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-scalar-ops.mir
@@ -224,7 +224,7 @@ body:             |
     %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
     %60 = V_BFE_U32 %17, 8, 8, implicit $exec
     %61 = V_LSHLREV_B32_e32 2, killed %60, implicit $exec
-    %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
+    %70 = V_ADD_CO_U32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
     %66 = COPY %13
     %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
     %67 = REG_SEQUENCE %70, %subreg.sub0, killed %65, %subreg.sub1
@@ -237,7 +237,7 @@ body:             |
     %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
     %73 = V_BFE_U32 %40, 8, 8, implicit $exec
     %74 = V_LSHLREV_B32_e32 2, killed %73, implicit $exec
-    %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
+    %83 = V_ADD_CO_U32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
     %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
     %80 = REG_SEQUENCE %83, %subreg.sub0, killed %78, %subreg.sub1
     FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
@@ -387,7 +387,7 @@ body:             |
     %17 = FLAT_LOAD_DWORD %18, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.uglygep45)
     %60 = V_BFE_U32 %17, 8, 8, implicit $exec
     %61 = V_LSHLREV_B32_e32 %84, killed %60, implicit $exec
-    %70 = V_ADD_I32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
+    %70 = V_ADD_CO_U32_e32 %7.sub0, %61, implicit-def $vcc, implicit $exec
     %66 = COPY %13
     %65 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
     %67 = REG_SEQUENCE %70, %subreg.sub0, killed %65, %subreg.sub1
@@ -400,7 +400,7 @@ body:             |
     %40 = FLAT_LOAD_DWORD killed %41, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.scevgep)
     %73 = V_BFE_U32 %40, 8, 8, implicit $exec
     %74 = V_LSHLREV_B32_e32 %84, killed %73, implicit $exec
-    %83 = V_ADD_I32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
+    %83 = V_ADD_CO_U32_e32 %7.sub0, %74, implicit-def $vcc, implicit $exec
     %78 = V_ADDC_U32_e32 0, %66, implicit-def $vcc, implicit $vcc, implicit $exec
     %80 = REG_SEQUENCE %83, %subreg.sub0, killed %78, %subreg.sub1
     FLAT_STORE_DWORD %80, %30, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (store 4 into %ir.tmp17)
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir b/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
index 0eaa485903a79..81b304b7fa9c9 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
+++ b/llvm/test/CodeGen/AMDGPU/shrink-vop3-carry-out.mir
@@ -8,7 +8,7 @@
 
 ...
 # GCN-LABEL: name: shrink_add_vop3{{$}}
-# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_ADD_I32_e64 %19, %17, 0, implicit $exec
+# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_ADD_CO_U32_e64 %19, %17, 0, implicit $exec
 # GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
 name:            shrink_add_vop3
 alignment:       1
@@ -83,7 +83,7 @@ body:             |
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
     %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec
-    %29, %9 = V_ADD_I32_e64 %19, %17, 0, implicit $exec
+    %29, %9 = V_ADD_CO_U32_e64 %19, %17, 0, implicit $exec
     %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
     BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
@@ -91,7 +91,7 @@ body:             |
 ...
 ---
 # GCN-LABEL: name: shrink_sub_vop3{{$}}
-# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUB_I32_e64 %19, %17, 0, implicit $exec
+# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUB_CO_U32_e64 %19, %17, 0, implicit $exec
 # GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
 
 name:            shrink_sub_vop3
@@ -167,7 +167,7 @@ body:             |
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
     %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec
-    %29, %9 = V_SUB_I32_e64 %19, %17, 0, implicit $exec
+    %29, %9 = V_SUB_CO_U32_e64 %19, %17, 0, implicit $exec
     %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
     BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
@@ -175,7 +175,7 @@ body:             |
 ...
 ---
 # GCN-LABEL: name: shrink_subrev_vop3{{$}}
-# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUBREV_I32_e64 %19, %17, 0, implicit $exec
+# GCN: %29:vgpr_32, %9:sreg_64_xexec = V_SUBREV_CO_U32_e64 %19, %17, 0, implicit $exec
 # GCN: %24:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
 
 name:            shrink_subrev_vop3
@@ -251,7 +251,7 @@ body:             |
     %16 = REG_SEQUENCE killed %4, 17, %12, 18
     %17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     %19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec
-    %29, %9 = V_SUBREV_I32_e64 %19, %17, 0, implicit $exec
+    %29, %9 = V_SUBREV_CO_U32_e64 %19, %17, 0, implicit $exec
     %24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
     BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
     S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll b/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
index 927a0d9daab3f..df8366a722e93 100644
--- a/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
+++ b/llvm/test/CodeGen/AMDGPU/si-instr-info-correct-implicit-operands.ll
@@ -1,9 +1,9 @@
 ; RUN: llc -o - %s -march=amdgcn -mcpu=verde -verify-machineinstrs -stop-after finalize-isel | FileCheck %s
 ; This test verifies that the instruction selection will add the implicit
 ; register operands in the correct order when modifying the opcode of an
-; instruction to V_ADD_I32_e32.
+; instruction to V_ADD_CO_U32_e32.
 
-; CHECK: %{{[0-9]+}}:vgpr_32 = V_ADD_I32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def $vcc, implicit $exec
+; CHECK: %{{[0-9]+}}:vgpr_32 = V_ADD_CO_U32_e32 %{{[0-9]+}}, %{{[0-9]+}}, implicit-def $vcc, implicit $exec
 
 define amdgpu_kernel void @test(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 entry:
diff --git a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
index 9574edd0af98b..3daf2b88943f6 100644
--- a/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
+++ b/llvm/test/CodeGen/AMDGPU/skip-branch-taildup-ret.mir
@@ -13,7 +13,7 @@ body:             |
   ; CHECK:   renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
   ; CHECK:   S_WAITCNT 127
   ; CHECK:   $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
-  ; CHECK:   renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
+  ; CHECK:   renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
   ; CHECK:   renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec
   ; CHECK:   renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
   ; CHECK:   renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
@@ -59,7 +59,7 @@ body:             |
     renamable $vgpr0 = V_LSHLREV_B32_e32 2, killed $vgpr0, implicit $exec
     S_WAITCNT 127
     $vgpr1 = V_MOV_B32_e32 $sgpr1, implicit $exec, implicit $exec
-    renamable $vgpr0 = V_ADD_I32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
+    renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr0, killed $vgpr0, implicit-def $vcc, implicit $exec
     renamable $vgpr1 = V_ADDC_U32_e32 0, killed $vgpr1, implicit-def $vcc, implicit killed $vcc, implicit $exec
     renamable $vgpr0 = FLAT_LOAD_DWORD renamable $vgpr0_vgpr1, 0, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4, addrspace 1)
     renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0, 0 :: (dereferenceable invariant load 8, align 16, addrspace 4)
diff --git a/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir b/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir
index f62cb869fdf04..e855bbe1f1818 100644
--- a/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir
+++ b/llvm/test/CodeGen/AMDGPU/smrd-fold-offset.mir
@@ -17,7 +17,7 @@ body:             |
     %5:sgpr_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
     %6:sreg_32_xm0 = S_MOV_B32 4095
     %8:vgpr_32 = COPY %6
-    %7:vgpr_32 = V_ADD_I32_e32 %4, killed %8, implicit-def dead $vcc, implicit $exec
+    %7:vgpr_32 = V_ADD_CO_U32_e32 %4, killed %8, implicit-def dead $vcc, implicit $exec
     %10:sreg_32 = COPY %7
     %9:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_SGPR killed %5, killed %10, 0, 0
     $vgpr0 = COPY %9
diff --git a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
index 165ebcc6d1355..c2a329f9fa66a 100644
--- a/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
+++ b/llvm/test/CodeGen/AMDGPU/vmem-to-salu-hazard.mir
@@ -235,7 +235,7 @@ body:             |
 # GCN-NEXT: S_BRANCH
 # GCN:      bb.1:
 # GCN:      S_WAITCNT
-# GCN:      V_ADD_I32
+# GCN:      V_ADD_CO_U32
 # GCN:      bb.2:
 # GCN-NEXT: S_WAITCNT_DEPCTR 65507
 # GCN-NEXT: S_MOV_B32
@@ -254,7 +254,7 @@ body:             |
   bb.1:
     successors: %bb.2
     S_WAITCNT 0
-    $vgpr2, $vcc_lo = V_ADD_I32_e64 $vgpr1, $vgpr1, 0, implicit $exec
+    $vgpr2, $vcc_lo = V_ADD_CO_U32_e64 $vgpr1, $vgpr1, 0, implicit $exec
     S_BRANCH %bb.2
 
   bb.2:
diff --git a/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir b/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir
index 187d786449217..bac89399d0350 100644
--- a/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/vop-shrink-frame-index.mir
@@ -35,7 +35,7 @@
 # GCN-LABEL: name: fold_fi_vgpr{{$}}
 # GCN: %1:vgpr_32 = IMPLICIT_DEF
 
-# GCN: %2:vgpr_32 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec
+# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec
 name: fold_fi_vgpr
 tracksRegLiveness: true
 registers:
@@ -50,13 +50,13 @@ body:             |
   bb.0:
     %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec
     %1 = IMPLICIT_DEF
-    %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2, $vcc = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0
 
 ...
 # GCN-LABEL: name: fold_vgpr_fi{{$}}
 # GCN: %1:vgpr_32 = IMPLICIT_DEF
-# GCN: %2:vgpr_32 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec
+# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec
 name: fold_vgpr_fi
 tracksRegLiveness: true
 registers:
@@ -71,14 +71,14 @@ body:             |
   bb.0:
     %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec
     %1 = IMPLICIT_DEF
-    %2, $vcc = V_ADD_I32_e64 %1, %0, 0, implicit $exec
+    %2, $vcc = V_ADD_CO_U32_e64 %1, %0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
 # GCN-LABEL: name: fold_sgpr_fi{{$}}
 # GCN: %0:vgpr_32 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec
 # GCN: %1:sgpr_32 = IMPLICIT_DEF
-# GCN: %2:vgpr_32 = V_ADD_I32_e32 %1, %0, implicit-def $vcc, implicit $exec
+# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 %1, %0, implicit-def $vcc, implicit $exec
 name: fold_sgpr_fi
 tracksRegLiveness: true
 registers:
@@ -93,14 +93,14 @@ body:             |
   bb.0:
     %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec
     %1 = IMPLICIT_DEF
-    %2, $vcc = V_ADD_I32_e64 %1, %0, 0, implicit $exec
+    %2, $vcc = V_ADD_CO_U32_e64 %1, %0, 0, implicit $exec
     S_ENDPGM 0
 
 ...
 # GCN-LABEL: name: fold_fi_sgpr{{$}}
 # GCN: %0:vgpr_32 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec
 # GCN: %1:sgpr_32 = IMPLICIT_DEF
-# GCN: %2:vgpr_32 = V_ADD_I32_e32 %1, %0, implicit-def $vcc, implicit $exec
+# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 %1, %0, implicit-def $vcc, implicit $exec
 name: fold_fi_sgpr
 tracksRegLiveness: true
 registers:
@@ -115,13 +115,13 @@ body:             |
   bb.0:
     %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec
     %1 = IMPLICIT_DEF
-    %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2, $vcc = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0
 ...
 # TODO: Should probably prefer folding immediate first
 # GCN-LABEL: name: fold_fi_imm{{$}}
 # GCN: %1:vgpr_32 = V_MOV_B32_e32 999, implicit $exec
-# GCN: %2:vgpr_32 = V_ADD_I32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec
+# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 %stack.0.alloca, %1, implicit-def $vcc, implicit $exec
 name: fold_fi_imm
 tracksRegLiveness: true
 registers:
@@ -136,13 +136,13 @@ body:             |
   bb.0:
     %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec
     %1 = V_MOV_B32_e32 999, implicit $exec
-    %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2, $vcc = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0
 
 ...
 # GCN-LABEL: name: fold_imm_fi{{$}}
 # GCN: %0:vgpr_32 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec
-# GCN: %2:vgpr_32 = V_ADD_I32_e32 999, %0, implicit-def $vcc, implicit $exec
+# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 999, %0, implicit-def $vcc, implicit $exec
 name: fold_imm_fi
 tracksRegLiveness: true
 registers:
@@ -157,5 +157,5 @@ body:             |
   bb.0:
     %0 = V_MOV_B32_e32 %stack.0.alloca, implicit $exec
     %1 = V_MOV_B32_e32 999, implicit $exec
-    %2, $vcc = V_ADD_I32_e64 %1, %0, 0, implicit $exec
+    %2, $vcc = V_ADD_CO_U32_e64 %1, %0, 0, implicit $exec
     S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir b/llvm/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir
index 5ad1b3b6ecf78..fe71266668d53 100644
--- a/llvm/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir
+++ b/llvm/test/CodeGen/AMDGPU/vop-shrink-non-ssa.mir
@@ -2,7 +2,7 @@
 ...
 # GCN-LABEL: name: fold_imm_non_ssa{{$}}
 # GCN: %0:vgpr_32 = V_MOV_B32_e32 123, implicit $exec
-# GCN: %2:vgpr_32 = V_ADD_I32_e32 456, %0, implicit-def $vcc, implicit $exec
+# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 456, %0, implicit-def $vcc, implicit $exec
 
 name: fold_imm_non_ssa
 tracksRegLiveness: true
@@ -16,13 +16,13 @@ body:             |
     %0 = COPY undef %0
     %0 = V_MOV_B32_e32 123, implicit $exec
     %1 = V_MOV_B32_e32 456, implicit $exec
-    %2, $vcc = V_ADD_I32_e64 %0, %1, 0, implicit $exec
+    %2, $vcc = V_ADD_CO_U32_e64 %0, %1, 0, implicit $exec
     S_ENDPGM 0
 
 ...
 # GCN-LABEL: name: fold_partially_defined_superreg{{$}}
 # GCN: %1:vgpr_32 = V_MOV_B32_e32 456, implicit $exec
-# GCN: %2:vgpr_32 = V_ADD_I32_e32 123, %1, implicit-def $vcc, implicit $exec
+# GCN: %2:vgpr_32 = V_ADD_CO_U32_e32 123, %1, implicit-def $vcc, implicit $exec
 name: fold_partially_defined_superreg
 tracksRegLiveness: true
 registers:
@@ -34,7 +34,7 @@ body:             |
   bb.0:
     undef %3.sub0 = V_MOV_B32_e32 123, implicit $exec, implicit-def %3
     %1 = V_MOV_B32_e32 456, implicit $exec
-    %2, $vcc = V_ADD_I32_e64 %3.sub0, %1, 0, implicit $exec
+    %2, $vcc = V_ADD_CO_U32_e64 %3.sub0, %1, 0, implicit $exec
     S_ENDPGM 0
 
 ...
diff --git a/llvm/test/CodeGen/AMDGPU/wqm.mir b/llvm/test/CodeGen/AMDGPU/wqm.mir
index cb84da90b3863..010edf85cfade 100644
--- a/llvm/test/CodeGen/AMDGPU/wqm.mir
+++ b/llvm/test/CodeGen/AMDGPU/wqm.mir
@@ -41,9 +41,9 @@ body:             |
     %1 = COPY $sgpr1
     %0 = COPY $sgpr0
     S_CMP_LT_I32 0, %0, implicit-def $scc
-    %12 = V_ADD_I32_e32 %3, %3, implicit-def $vcc, implicit $exec
+    %12 = V_ADD_CO_U32_e32 %3, %3, implicit-def $vcc, implicit $exec
     %5 = S_CSELECT_B32 %2, %1, implicit $scc
-    %11 = V_ADD_I32_e32 %5, %12, implicit-def $vcc, implicit $exec
+    %11 = V_ADD_CO_U32_e32 %5, %12, implicit-def $vcc, implicit $exec
     $vgpr0 = WWM %11, implicit $exec
     SI_RETURN_TO_EPILOG $vgpr0
 
diff --git a/llvm/test/MC/AMDGPU/wave32.s b/llvm/test/MC/AMDGPU/wave32.s
index aa0b8727cf27d..b9532aebd1579 100644
--- a/llvm/test/MC/AMDGPU/wave32.s
+++ b/llvm/test/MC/AMDGPU/wave32.s
@@ -89,7 +89,7 @@ v_cndmask_b32_dpp v5, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 
 v_add_co_u32_e32 v2, vcc_lo, s0, v2
 // GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction
 
 v_add_co_u32_e32 v2, vcc, s0, v2
 // GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
@@ -109,7 +109,7 @@ v_add_co_ci_u32_e32 v3, v3, v4
 
 v_sub_co_u32_e32 v2, vcc_lo, s0, v2
 // GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction
 
 v_sub_co_u32_e32 v2, vcc, s0, v2
 // GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
@@ -117,7 +117,7 @@ v_sub_co_u32_e32 v2, vcc, s0, v2
 
 v_subrev_co_u32_e32 v2, vcc_lo, s0, v2
 // GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
-// GFX1064-ERR: :[[@LINE-2]]:1: error: instruction not supported on this GPU
+// GFX1064-ERR: :[[@LINE-2]]:33: error: invalid operand for instruction
 
 v_subrev_co_u32_e32 v2, vcc, s0, v2
 // GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
@@ -149,7 +149,7 @@ v_subrev_co_ci_u32_e32 v1, 0, v1
 
 v_add_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 // GFX1032-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction{{$}}
-// GFX1064-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction{{$}}
 
 v_add_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 // GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
@@ -173,7 +173,7 @@ v_add_co_ci_u32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYT
 
 v_sub_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 // GFX1032-ERR: :[[@LINE-1]]:38: error: invalid operand for instruction{{$}}
-// GFX1064-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:35: error: invalid operand for instruction{{$}}
 
 v_sub_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 // GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
@@ -185,7 +185,7 @@ v_sub_co_u32_sdwa v0, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
 
 v_subrev_co_u32_sdwa v0, vcc_lo, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 // GFX1032-ERR: :[[@LINE-1]]:41: error: invalid operand for instruction{{$}}
-// GFX1064-ERR: :[[@LINE-2]]:41: error: invalid operand for instruction{{$}}
+// GFX1064-ERR: :[[@LINE-2]]:38: error: invalid operand for instruction{{$}}
 
 v_subrev_co_u32_sdwa v0, vcc, v0, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 // GFX1032-ERR: :[[@LINE-1]]:1: error: instruction not supported on this GPU
@@ -297,11 +297,11 @@ v_subrev_co_ci_u32_dpp v5, vcc, v1, v2, vcc quad_perm:[0,1,2,3] row_mask:0x0 ban
 
 v_add_co_u32 v0, s0, v0, v2
 // GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction
 
 v_add_co_u32_e64 v0, s0, v0, v2
 // GFX1032: v_add_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x0f,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction
 
 v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
 // GFX1032: v_add_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x28,0xd5,0x01,0x0b,0x0a,0x00]
@@ -309,11 +309,11 @@ v_add_co_ci_u32_e64 v4, s0, v1, v5, s2
 
 v_sub_co_u32 v0, s0, v0, v2
 // GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: :[[@LINE-2]]:18: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:26: error: invalid operand for instruction
 
 v_sub_co_u32_e64 v0, s0, v0, v2
 // GFX1032: v_sub_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x10,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: :[[@LINE-2]]:22: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:30: error: invalid operand for instruction
 
 v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
 // GFX1032: v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x29,0xd5,0x01,0x0b,0x0a,0x00]
@@ -321,11 +321,11 @@ v_sub_co_ci_u32_e64 v4, s0, v1, v5, s2
 
 v_subrev_co_u32 v0, s0, v0, v2
 // GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: :[[@LINE-2]]:21: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:29: error: invalid operand for instruction
 
 v_subrev_co_u32_e64 v0, s0, v0, v2
 // GFX1032: v_subrev_co_u32_e64 v0, s0, v0, v2 ; encoding: [0x00,0x00,0x19,0xd7,0x00,0x05,0x02,0x00]
-// GFX1064-ERR: :[[@LINE-2]]:25: error: invalid operand for instruction
+// GFX1064-ERR: :[[@LINE-2]]:33: error: invalid operand for instruction
 
 v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2
 // GFX1032: v_subrev_co_ci_u32_e64 v4, s0, v1, v5, s2 ; encoding: [0x04,0x00,0x2a,0xd5,0x01,0x0b,0x0a,0x00]

From 32d36d9edce2eaf923ea6f038aac0d54e2c91fd6 Mon Sep 17 00:00:00 2001
From: Jinsong Ji <jji@us.ibm.com>
Date: Thu, 16 Jul 2020 17:25:21 +0000
Subject: [PATCH 513/771] [docs] fix ident in llvm-exegesis.rst

---
 llvm/docs/CommandGuide/llvm-exegesis.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index 8cc1a237e9969..f3bce16b93dda 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -196,7 +196,7 @@ OPTIONS
 
  `latency` mode can be  make use of either RDTSC or LBR.
  `latency[LBR]` is only available on X86 (at least `Skylake`).
-  To run in this mode, a positive value  must be specified for `x86-lbr-sample-period` and `--repetition-mode=loop`
+ To run in this mode, a positive value  must be specified for `x86-lbr-sample-period` and `--repetition-mode=loop`
 
  In `analysis` mode, you also need to specify at least one of the
  `-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`.

From 219a9fea1467b32d2effe5b1a6cb8b28b1e4b659 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 14 Jul 2020 20:51:45 -0400
Subject: [PATCH 514/771] AMDGPU: Rename gfx9 version of v_add_i32/v_sub_i32

The carry-out opcode is renamed, so eliminate the deceptive _gfx9,
which looked like the encoded instruction. The real encoded version
was named _gfx9_gfx9.

Move it into the VI encoding namespace. The gfx9 namespace is just to
deal with the renamed instructions that reinterpret the opcode. When
codegened, it would fail to find the real instruction since it wasn't
in the right namespace.
---
 llvm/lib/Target/AMDGPU/VOP3Instructions.td | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 169949f2171ae..dcbfeb547a32d 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -649,8 +649,8 @@ def V_MAD_I32_I16 : VOP3Inst <"v_mad_i32_i16", VOP3_Profile<VOP_I32_I16_I16_I32,
 def V_CVT_PKNORM_I16_F16 : VOP3Inst <"v_cvt_pknorm_i16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
 def V_CVT_PKNORM_U16_F16 : VOP3Inst <"v_cvt_pknorm_u16_f16", VOP3_Profile<VOP_B32_F16_F16, VOP3_OPSEL>>;
 
-def V_ADD_I32_gfx9 : VOP3Inst <"v_add_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
-def V_SUB_I32_gfx9 : VOP3Inst <"v_sub_i32_gfx9", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
+def V_ADD_I32 : VOP3Inst <"v_add_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
+def V_SUB_I32 : VOP3Inst <"v_sub_i32", VOP3_Profile<VOP_I32_I32_I32_ARITH>>;
 
 
 class ThreeOp_i32_Pats <SDPatternOperator op1, SDPatternOperator op2, Instruction inst> : GCNPat <
@@ -868,9 +868,9 @@ defm V_ADD_NC_I16 :
 defm V_SUB_NC_I16 :
   VOP3OpSel_Real_gfx10_with_name<0x30e, "V_SUB_I16", "v_sub_nc_i16">;
 defm V_SUB_NC_I32 :
-  VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32_gfx9", "v_sub_nc_i32">;
+  VOP3_Real_gfx10_with_name<0x376, "V_SUB_I32", "v_sub_nc_i32">;
 defm V_ADD_NC_I32 :
-  VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32_gfx9", "v_add_nc_i32">;
+  VOP3_Real_gfx10_with_name<0x37f, "V_ADD_I32", "v_add_nc_i32">;
 
 defm V_INTERP_P1_F32_e64  : VOP3Interp_Real_gfx10<0x200>;
 defm V_INTERP_P2_F32_e64  : VOP3Interp_Real_gfx10<0x201>;
@@ -1177,8 +1177,8 @@ defm V_FMA_F16_gfx9         : VOP3OpSel_F16_Real_gfx9 <0x206, "v_fma_f16">;
 defm V_DIV_FIXUP_F16_gfx9   : VOP3OpSel_F16_Real_gfx9 <0x207, "v_div_fixup_f16">;
 defm V_INTERP_P2_F16_gfx9   : VOP3Interp_F16_Real_gfx9 <0x277, "V_INTERP_P2_F16_gfx9", "v_interp_p2_f16">;
 
-defm V_ADD_I32_gfx9         : VOP3_Real_gfx9 <0x29c, "v_add_i32">;
-defm V_SUB_I32_gfx9         : VOP3_Real_gfx9 <0x29d, "v_sub_i32">;
+defm V_ADD_I32         : VOP3_Real_vi <0x29c>;
+defm V_SUB_I32         : VOP3_Real_vi <0x29d>;
 
 defm V_INTERP_P1_F32_e64  : VOP3Interp_Real_vi <0x270>;
 defm V_INTERP_P2_F32_e64  : VOP3Interp_Real_vi <0x271>;

From 8113a8bb793453832301e2684dc2b8cebec331b0 Mon Sep 17 00:00:00 2001
From: Fred Riss <friss@apple.com>
Date: Wed, 15 Jul 2020 15:00:52 -0700
Subject: [PATCH 515/771] [lldb/ObjectFileMachO] Fetch shared cache images from
 our own shared cache

Summary:
On macOS 11, the libraries that have been integrated in the system
shared cache are not present on the filesystem anymore. LLDB was
using those files to get access to the symbols of those libraries.
LLDB can get the images from the target process memory though.

This has 2 consequences:
 - LLDB cannot load the images before the process starts, reporting
   an error if someone tries to break on a system symbol.
 - Loading the symbols by downloading the data from the inferior
   is super slow. It takes tens of seconds at the start of the
   debug session to populate the Module list.

To fix this, we can use the library images LLDB has in its own
mapping of the shared cache. Shared cache images are somewhat
special as their LINKEDIT segment is moved to the end of the cache
and thus the images are not contiguous in memory. All of this can
hidden in ObjectFileMachO.

This patch fixes a number of test failures on macOS 11 due to the
first problem described above and adds some specific unittesting
for the new SharedCache Host utilities.

Reviewers: jasonmolenda, labath

Subscribers: llvm-commits, lldb-commits

Tags: #lldb, #llvm

Differential Revision: https://reviews.llvm.org/D83023
---
 lldb/include/lldb/Host/HostInfoBase.h         |  13 +++
 .../include/lldb/Host/macosx/HostInfoMacOSX.h |   5 +
 .../Host/macosx/objcxx/HostInfoMacOSX.mm      |  63 ++++++++++
 .../MacOSX-DYLD/DynamicLoaderDarwin.cpp       |  43 +++++--
 .../ObjectFile/Mach-O/ObjectFileMachO.cpp     | 110 +++++++-----------
 .../ObjectFile/Mach-O/ObjectFileMachO.h       |   2 +
 .../Platform/MacOSX/PlatformDarwin.cpp        |  24 ++++
 lldb/unittests/ObjectFile/CMakeLists.txt      |   1 +
 .../unittests/ObjectFile/MachO/CMakeLists.txt |  10 ++
 .../ObjectFile/MachO/TestObjectFileMachO.cpp  |  79 +++++++++++++
 10 files changed, 274 insertions(+), 76 deletions(-)
 create mode 100644 lldb/unittests/ObjectFile/MachO/CMakeLists.txt
 create mode 100644 lldb/unittests/ObjectFile/MachO/TestObjectFileMachO.cpp

diff --git a/lldb/include/lldb/Host/HostInfoBase.h b/lldb/include/lldb/Host/HostInfoBase.h
index 70682c9b685eb..15bb168aad97f 100644
--- a/lldb/include/lldb/Host/HostInfoBase.h
+++ b/lldb/include/lldb/Host/HostInfoBase.h
@@ -11,6 +11,7 @@
 
 #include "lldb/Utility/ArchSpec.h"
 #include "lldb/Utility/FileSpec.h"
+#include "lldb/Utility/UUID.h"
 #include "lldb/Utility/UserIDResolver.h"
 #include "lldb/Utility/XcodeSDK.h"
 #include "lldb/lldb-enumerations.h"
@@ -24,6 +25,11 @@ namespace lldb_private {
 
 class FileSpec;
 
+struct SharedCacheImageInfo {
+  UUID uuid;
+  lldb::DataBufferSP data_sp;
+};
+
 class HostInfoBase {
 private:
   // Static class, unconstructable.
@@ -98,6 +104,13 @@ class HostInfoBase {
   /// Return the directory containing a specific Xcode SDK.
   static llvm::StringRef GetXcodeSDKPath(XcodeSDK sdk) { return {}; }
 
+  /// Return information about module \p image_name if it is loaded in
+  /// the current process's address space.
+  static SharedCacheImageInfo
+  GetSharedCacheImageInfo(llvm::StringRef image_name) {
+    return {};
+  }
+
 protected:
   static bool ComputeSharedLibraryDirectory(FileSpec &file_spec);
   static bool ComputeSupportExeDirectory(FileSpec &file_spec);
diff --git a/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h b/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h
index 3941414f8abdd..ee9f12a90943f 100644
--- a/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h
+++ b/lldb/include/lldb/Host/macosx/HostInfoMacOSX.h
@@ -37,6 +37,11 @@ class HostInfoMacOSX : public HostInfoPosix {
 
   /// Query xcrun to find an Xcode SDK directory.
   static llvm::StringRef GetXcodeSDKPath(XcodeSDK sdk);
+
+  /// Shared cache utilities
+  static SharedCacheImageInfo
+  GetSharedCacheImageInfo(llvm::StringRef image_name);
+
 protected:
   static bool ComputeSupportExeDirectory(FileSpec &file_spec);
   static void ComputeHostArchitectureSupport(ArchSpec &arch_32,
diff --git a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm
index 60eacb1e49b2c..b325bd2c5b745 100644
--- a/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm
+++ b/lldb/source/Host/macosx/objcxx/HostInfoMacOSX.mm
@@ -12,8 +12,10 @@
 #include "lldb/Host/HostInfo.h"
 #include "lldb/Utility/Args.h"
 #include "lldb/Utility/Log.h"
+#include "Utility/UuidCompatibility.h"
 
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringMap.h"
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 #include "llvm/Support/raw_ostream.h"
@@ -457,3 +459,64 @@ FileSpec path(
   auto it_new = g_sdk_path.insert({sdk.GetString(), GetXcodeSDK(sdk)});
   return it_new.first->second;
 }
+
+namespace {
+struct dyld_shared_cache_dylib_text_info {
+  uint64_t version; // current version 1
+  // following fields all exist in version 1
+  uint64_t loadAddressUnslid;
+  uint64_t textSegmentSize;
+  uuid_t dylibUuid;
+  const char *path; // pointer invalid at end of iterations
+  // following fields all exist in version 2
+  uint64_t textSegmentOffset; // offset from start of cache
+};
+typedef struct dyld_shared_cache_dylib_text_info
+    dyld_shared_cache_dylib_text_info;
+}
+
+extern "C" int dyld_shared_cache_iterate_text(
+    const uuid_t cacheUuid,
+    void (^callback)(const dyld_shared_cache_dylib_text_info *info));
+extern "C" uint8_t *_dyld_get_shared_cache_range(size_t *length);
+extern "C" bool _dyld_get_shared_cache_uuid(uuid_t uuid);
+
+namespace {
+class SharedCacheInfo {
+public:
+  const UUID &GetUUID() const { return m_uuid; };
+  const llvm::StringMap<SharedCacheImageInfo> &GetImages() const {
+    return m_images;
+  };
+
+  SharedCacheInfo();
+
+private:
+  llvm::StringMap<SharedCacheImageInfo> m_images;
+  UUID m_uuid;
+};
+}
+
+SharedCacheInfo::SharedCacheInfo() {
+  size_t shared_cache_size;
+  uint8_t *shared_cache_start =
+      _dyld_get_shared_cache_range(&shared_cache_size);
+  uuid_t dsc_uuid;
+  _dyld_get_shared_cache_uuid(dsc_uuid);
+  m_uuid = UUID::fromData(dsc_uuid);
+
+  dyld_shared_cache_iterate_text(
+      dsc_uuid, ^(const dyld_shared_cache_dylib_text_info *info) {
+        m_images[info->path] = SharedCacheImageInfo{
+            UUID::fromData(info->dylibUuid, 16),
+            std::make_shared<DataBufferUnowned>(
+                shared_cache_start + info->textSegmentOffset,
+                shared_cache_size - info->textSegmentOffset)};
+      });
+}
+
+SharedCacheImageInfo
+HostInfoMacOSX::GetSharedCacheImageInfo(llvm::StringRef image_name) {
+  static SharedCacheInfo g_shared_cache_info;
+  return g_shared_cache_info.GetImages().lookup(image_name);
+}
diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
index 7b0d6f343c030..7310043404348 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
@@ -16,6 +16,7 @@
 #include "lldb/Core/Section.h"
 #include "lldb/Expression/DiagnosticManager.h"
 #include "lldb/Host/FileSystem.h"
+#include "lldb/Host/HostInfo.h"
 #include "lldb/Symbol/Function.h"
 #include "lldb/Symbol/ObjectFile.h"
 #include "lldb/Target/ABI.h"
@@ -123,19 +124,39 @@ ModuleSP DynamicLoaderDarwin::FindTargetModuleForImageInfo(
       module_sp.reset();
   }
 
-  if (!module_sp) {
-    if (can_create) {
-      // We'll call Target::ModulesDidLoad after all the modules have been
-      // added to the target, don't let it be called for every one.
-      module_sp = target.GetOrCreateModule(module_spec, false /* notify */);
-      if (!module_sp || module_sp->GetObjectFile() == nullptr)
-        module_sp = m_process->ReadModuleFromMemory(image_info.file_spec,
-                                                    image_info.address);
-
-      if (did_create_ptr)
-        *did_create_ptr = (bool)module_sp;
+  if (module_sp || !can_create)
+    return module_sp;
+
+  if (HostInfo::GetArchitecture().IsCompatibleMatch(target.GetArchitecture())) {
+    // When debugging on the host, we are most likely using the same shared
+    // cache as our inferior. The dylibs from the shared cache might not
+    // exist on the filesystem, so let's use the images in our own memory
+    // to create the modules.
+    // Check if the requested image is in our shared cache.
+    SharedCacheImageInfo image_info =
+        HostInfo::GetSharedCacheImageInfo(module_spec.GetFileSpec().GetPath());
+
+    // If we found it and it has the correct UUID, let's proceed with
+    // creating a module from the memory contents.
+    if (image_info.uuid &&
+        (!module_spec.GetUUID() || module_spec.GetUUID() == image_info.uuid)) {
+      ModuleSpec shared_cache_spec(module_spec.GetFileSpec(), image_info.uuid,
+                                   image_info.data_sp);
+      module_sp =
+          target.GetOrCreateModule(shared_cache_spec, false /* notify */);
     }
   }
+  // We'll call Target::ModulesDidLoad after all the modules have been
+  // added to the target, don't let it be called for every one.
+  if (!module_sp)
+    module_sp = target.GetOrCreateModule(module_spec, false /* notify */);
+  if (!module_sp || module_sp->GetObjectFile() == nullptr)
+    module_sp = m_process->ReadModuleFromMemory(image_info.file_spec,
+                                                image_info.address);
+
+  if (did_create_ptr)
+    *did_create_ptr = (bool)module_sp;
+
   return module_sp;
 }
 
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
index 2bb4b21adeaec..ab1a6a8bb5f3e 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.cpp
@@ -47,8 +47,8 @@
 
 #include "ObjectFileMachO.h"
 
-#if defined(__APPLE__) &&                                                      \
-    (defined(__arm__) || defined(__arm64__) || defined(__aarch64__))
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
 // GetLLDBSharedCacheUUID() needs to call dlsym()
 #include <dlfcn.h>
 #endif
@@ -1328,6 +1328,19 @@ void ObjectFileMachO::SanitizeSegmentCommand(segment_command_64 &seg_cmd,
   if (m_length == 0 || seg_cmd.filesize == 0)
     return;
 
+  if ((m_header.flags & MH_DYLIB_IN_CACHE) && !IsInMemory()) {
+    // In shared cache images, the load commands are relative to the
+    // shared cache file, and not the the specific image we are
+    // examining. Let's fix this up so that it looks like a normal
+    // image.
+    if (strncmp(seg_cmd.segname, "__TEXT", sizeof(seg_cmd.segname)) == 0)
+      m_text_address = seg_cmd.vmaddr;
+    if (strncmp(seg_cmd.segname, "__LINKEDIT", sizeof(seg_cmd.segname)) == 0)
+      m_linkedit_original_offset = seg_cmd.fileoff;
+
+    seg_cmd.fileoff = seg_cmd.vmaddr - m_text_address;
+  }
+
   if (seg_cmd.fileoff > m_length) {
     // We have a load command that says it extends past the end of the file.
     // This is likely a corrupt file.  We don't have any way to return an error
@@ -1664,6 +1677,10 @@ void ObjectFileMachO::ProcessSegmentCommand(const load_command &load_cmd_,
     if (m_data.GetU32(&offset, &sect64.offset, num_u32s) == nullptr)
       break;
 
+    if ((m_header.flags & MH_DYLIB_IN_CACHE) && !IsInMemory()) {
+      sect64.offset = sect64.addr - m_text_address;
+    }
+
     // Keep a list of mach sections around in case we need to get at data that
     // isn't stored in the abstracted Sections.
     m_mach_sections.push_back(sect64);
@@ -2264,14 +2281,17 @@ size_t ObjectFileMachO::ParseSymtab() {
   Process *process = process_sp.get();
 
   uint32_t memory_module_load_level = eMemoryModuleLoadLevelComplete;
+  bool is_shared_cache_image = m_header.flags & MH_DYLIB_IN_CACHE;
+  bool is_local_shared_cache_image = is_shared_cache_image && !IsInMemory();
+  SectionSP linkedit_section_sp(
+      section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
 
-  if (process && m_header.filetype != llvm::MachO::MH_OBJECT) {
+  if (process && m_header.filetype != llvm::MachO::MH_OBJECT &&
+      !is_local_shared_cache_image) {
     Target &target = process->GetTarget();
 
     memory_module_load_level = target.GetMemoryModuleLoadLevel();
 
-    SectionSP linkedit_section_sp(
-        section_list->FindSectionByName(GetSegmentNameLINKEDIT()));
     // Reading mach file from memory in a process or core file...
 
     if (linkedit_section_sp) {
@@ -2293,62 +2313,6 @@ size_t ObjectFileMachO::ParseSymtab() {
       strtab_addr = linkedit_load_addr + symtab_load_command.stroff -
                     linkedit_file_offset;
 
-      bool data_was_read = false;
-
-#if defined(__APPLE__) &&                                                      \
-    (defined(__arm__) || defined(__arm64__) || defined(__aarch64__))
-      if (m_header.flags & MH_DYLIB_IN_CACHE &&
-          process->GetAddressByteSize() == sizeof(void *)) {
-        // This mach-o memory file is in the dyld shared cache. If this
-        // program is not remote and this is iOS, then this process will
-        // share the same shared cache as the process we are debugging and we
-        // can read the entire __LINKEDIT from the address space in this
-        // process. This is a needed optimization that is used for local iOS
-        // debugging only since all shared libraries in the shared cache do
-        // not have corresponding files that exist in the file system of the
-        // device. They have been combined into a single file. This means we
-        // always have to load these files from memory. All of the symbol and
-        // string tables from all of the __LINKEDIT sections from the shared
-        // libraries in the shared cache have been merged into a single large
-        // symbol and string table. Reading all of this symbol and string
-        // table data across can slow down debug launch times, so we optimize
-        // this by reading the memory for the __LINKEDIT section from this
-        // process.
-
-        UUID lldb_shared_cache;
-        addr_t lldb_shared_cache_addr;
-        GetLLDBSharedCacheUUID(lldb_shared_cache_addr, lldb_shared_cache);
-        UUID process_shared_cache;
-        addr_t process_shared_cache_addr;
-        GetProcessSharedCacheUUID(process, process_shared_cache_addr,
-                                  process_shared_cache);
-        bool use_lldb_cache = true;
-        if (lldb_shared_cache.IsValid() && process_shared_cache.IsValid() &&
-            (lldb_shared_cache != process_shared_cache ||
-             process_shared_cache_addr != lldb_shared_cache_addr)) {
-          use_lldb_cache = false;
-        }
-
-        PlatformSP platform_sp(target.GetPlatform());
-        if (platform_sp && platform_sp->IsHost() && use_lldb_cache) {
-          data_was_read = true;
-          nlist_data.SetData((void *)symoff_addr, nlist_data_byte_size,
-                             eByteOrderLittle);
-          strtab_data.SetData((void *)strtab_addr, strtab_data_byte_size,
-                              eByteOrderLittle);
-          if (function_starts_load_command.cmd) {
-            const addr_t func_start_addr =
-                linkedit_load_addr + function_starts_load_command.dataoff -
-                linkedit_file_offset;
-            function_starts_data.SetData((void *)func_start_addr,
-                                         function_starts_load_command.datasize,
-                                         eByteOrderLittle);
-          }
-        }
-      }
-#endif
-
-      if (!data_was_read) {
         // Always load dyld - the dynamic linker - from memory if we didn't
         // find a binary anywhere else. lldb will not register
         // dylib/framework/bundle loads/unloads if we don't have the dyld
@@ -2379,7 +2343,7 @@ size_t ObjectFileMachO::ParseSymtab() {
             // problem. For binaries outside the shared cache, it's faster to
             // read the entire strtab at once instead of piece-by-piece as we
             // process the nlist records.
-            if ((m_header.flags & MH_DYLIB_IN_CACHE) == 0) {
+            if (!is_shared_cache_image) {
               DataBufferSP strtab_data_sp(
                   ReadMemory(process_sp, strtab_addr, strtab_data_byte_size));
               if (strtab_data_sp) {
@@ -2388,7 +2352,6 @@ size_t ObjectFileMachO::ParseSymtab() {
               }
             }
           }
-        }
         if (memory_module_load_level >= eMemoryModuleLoadLevelPartial) {
           if (function_starts_load_command.cmd) {
             const addr_t func_start_addr =
@@ -2405,6 +2368,24 @@ size_t ObjectFileMachO::ParseSymtab() {
       }
     }
   } else {
+    if (is_local_shared_cache_image) {
+      // The load commands in shared cache images are relative to the
+      // beginning of the shared cache, not the library image. The
+      // data we get handed when creating the ObjectFileMachO starts
+      // at the beginning of a specific library and spans to the end
+      // of the cache to be able to reach the shared LINKEDIT
+      // segments. We need to convert the load command offsets to be
+      // relative to the beginning of our specific image.
+      lldb::addr_t linkedit_offset = linkedit_section_sp->GetFileOffset();
+      lldb::offset_t linkedit_slide =
+          linkedit_offset - m_linkedit_original_offset;
+      symtab_load_command.symoff += linkedit_slide;
+      symtab_load_command.stroff += linkedit_slide;
+      dyld_info.export_off += linkedit_slide;
+      m_dysymtab.indirectsymoff += linkedit_slide;
+      function_starts_load_command.dataoff += linkedit_slide;
+    }
+
     nlist_data.SetData(m_data, symtab_load_command.symoff,
                        nlist_data_byte_size);
     strtab_data.SetData(m_data, symtab_load_command.stroff,
@@ -5807,8 +5788,7 @@ void ObjectFileMachO::GetLLDBSharedCacheUUID(addr_t &base_addr, UUID &uuid) {
   uuid.Clear();
   base_addr = LLDB_INVALID_ADDRESS;
 
-#if defined(__APPLE__) &&                                                      \
-    (defined(__arm__) || defined(__arm64__) || defined(__aarch64__))
+#if defined(__APPLE__)
   uint8_t *(*dyld_get_all_image_infos)(void);
   dyld_get_all_image_infos =
       (uint8_t * (*)()) dlsym(RTLD_DEFAULT, "_dyld_get_all_image_infos");
diff --git a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h
index 979e637ef6fd8..0c1d178b19215 100644
--- a/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h
+++ b/lldb/source/Plugins/ObjectFile/Mach-O/ObjectFileMachO.h
@@ -225,6 +225,8 @@ class ObjectFileMachO : public lldb_private::ObjectFile {
   typedef lldb_private::RangeVector<uint32_t, uint32_t> FileRangeArray;
   lldb_private::Address m_entry_point_address;
   FileRangeArray m_thread_context_offsets;
+  lldb::offset_t m_linkedit_original_offset;
+  lldb::addr_t m_text_address;
   bool m_thread_context_offsets_valid;
   lldb_private::FileSpecList m_reexported_dylibs;
   bool m_allow_assembly_emulation_unwind_plans;
diff --git a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
index f5ec08a1a199c..d31559bc90183 100644
--- a/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
+++ b/lldb/source/Plugins/Platform/MacOSX/PlatformDarwin.cpp
@@ -237,6 +237,30 @@ lldb_private::Status PlatformDarwin::GetSharedModuleWithLocalCache(
 
   Status err;
 
+  if (IsHost()) {
+    // When debugging on the host, we are most likely using the same shared
+    // cache as our inferior. The dylibs from the shared cache might not
+    // exist on the filesystem, so let's use the images in our own memory
+    // to create the modules.
+
+    // Check if the requested image is in our shared cache.
+    SharedCacheImageInfo image_info =
+        HostInfo::GetSharedCacheImageInfo(module_spec.GetFileSpec().GetPath());
+
+    // If we found it and it has the correct UUID, let's proceed with
+    // creating a module from the memory contents.
+    if (image_info.uuid &&
+        (!module_spec.GetUUID() || module_spec.GetUUID() == image_info.uuid)) {
+      ModuleSpec shared_cache_spec(module_spec.GetFileSpec(), image_info.uuid,
+                                   image_info.data_sp);
+      err = ModuleList::GetSharedModule(shared_cache_spec, module_sp,
+                                        module_search_paths_ptr,
+                                        old_module_sp_ptr, did_create_ptr);
+      if (module_sp)
+        return err;
+    }
+  }
+
   err = ModuleList::GetSharedModule(module_spec, module_sp,
                                     module_search_paths_ptr, old_module_sp_ptr,
                                     did_create_ptr);
diff --git a/lldb/unittests/ObjectFile/CMakeLists.txt b/lldb/unittests/ObjectFile/CMakeLists.txt
index a9b42ea3199d7..b5d248e3965d3 100644
--- a/lldb/unittests/ObjectFile/CMakeLists.txt
+++ b/lldb/unittests/ObjectFile/CMakeLists.txt
@@ -1,3 +1,4 @@
 add_subdirectory(Breakpad)
 add_subdirectory(ELF)
+add_subdirectory(MachO)
 add_subdirectory(PECOFF)
diff --git a/lldb/unittests/ObjectFile/MachO/CMakeLists.txt b/lldb/unittests/ObjectFile/MachO/CMakeLists.txt
new file mode 100644
index 0000000000000..b6c4225114a36
--- /dev/null
+++ b/lldb/unittests/ObjectFile/MachO/CMakeLists.txt
@@ -0,0 +1,10 @@
+add_lldb_unittest(ObjectFileMachOTests
+  TestObjectFileMachO.cpp
+
+  LINK_LIBS
+    lldbPluginObjectFileMachO
+    lldbPluginSymbolFileSymtab
+    lldbCore
+    lldbUtilityHelpers
+    LLVMTestingSupport
+  )
diff --git a/lldb/unittests/ObjectFile/MachO/TestObjectFileMachO.cpp b/lldb/unittests/ObjectFile/MachO/TestObjectFileMachO.cpp
new file mode 100644
index 0000000000000..119be3822ccb9
--- /dev/null
+++ b/lldb/unittests/ObjectFile/MachO/TestObjectFileMachO.cpp
@@ -0,0 +1,79 @@
+//===-- ObjectFileMachOTest.cpp -------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "lldb/Host/HostInfo.h"
+#include "Plugins/ObjectFile/Mach-O/ObjectFileMachO.h"
+#include "TestingSupport/SubsystemRAII.h"
+#include "TestingSupport/TestUtilities.h"
+#include "lldb/Core/Module.h"
+#include "lldb/Host/FileSystem.h"
+#include "lldb/lldb-defines.h"
+#include "gtest/gtest.h"
+
+#ifdef __APPLE__
+#include <dlfcn.h>
+#endif
+
+using namespace lldb_private;
+using namespace llvm;
+
+namespace {
+class ObjectFileMachOTest : public ::testing::Test {
+  SubsystemRAII<FileSystem, HostInfo, ObjectFileMachO> subsystems;
+};
+} // namespace
+
+#if defined(__APPLE__)
+TEST_F(ObjectFileMachOTest, ModuleFromSharedCacheInfo) {
+  SharedCacheImageInfo image_info =
+      HostInfo::GetSharedCacheImageInfo("/usr/lib/libobjc.A.dylib");
+  EXPECT_TRUE(image_info.uuid);
+  EXPECT_TRUE(image_info.data_sp);
+
+  ModuleSpec spec(FileSpec(), UUID(), image_info.data_sp);
+  lldb::ModuleSP module = std::make_shared<Module>(spec);
+  ObjectFile *OF = module->GetObjectFile();
+  ASSERT_TRUE(llvm::isa<ObjectFileMachO>(OF));
+  EXPECT_TRUE(
+      OF->GetArchitecture().IsCompatibleMatch(HostInfo::GetArchitecture()));
+  Symtab *symtab = OF->GetSymtab();
+  ASSERT_NE(symtab, nullptr);
+  void *libobjc = dlopen("/usr/lib/libobjc.A.dylib", RTLD_LAZY);
+  ASSERT_NE(libobjc, nullptr);
+
+  // This function checks that if we read something from the
+  // ObjectFile we get through the shared cache in-mmeory
+  // buffer, it matches what we get by reading directly the
+  // memory of the symbol.
+  auto check_symbol = [&](const char *sym_name) {
+    std::vector<uint32_t> symbol_indices;
+    symtab->FindAllSymbolsWithNameAndType(ConstString(sym_name),
+                                          lldb::eSymbolTypeAny, symbol_indices);
+    EXPECT_EQ(symbol_indices.size(), 1u);
+
+    Symbol *sym = symtab->SymbolAtIndex(symbol_indices[0]);
+    ASSERT_NE(sym, nullptr);
+    Address base = sym->GetAddress();
+    size_t size = sym->GetByteSize();
+    ASSERT_NE(size, 0u);
+    uint8_t buffer[size];
+    EXPECT_EQ(OF->ReadSectionData(base.GetSection().get(), base.GetOffset(),
+                                  buffer, size),
+              size);
+
+    void *sym_addr = dlsym(libobjc, sym_name);
+    ASSERT_NE(sym_addr, nullptr);
+    EXPECT_EQ(memcmp(buffer, sym_addr, size), 0);
+  };
+
+  // Read a symbol from the __TEXT segment...
+  check_symbol("objc_msgSend");
+  // ... and one from the __DATA segment
+  check_symbol("OBJC_CLASS_$_NSObject");
+}
+#endif

From d9b7a18ebdcfd5953b3bb3568ee281e3bef1df80 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Thu, 16 Jul 2020 13:46:45 -0400
Subject: [PATCH 516/771] [gn build] Fix merge script mishap

---
 .../gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn     | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
index bc242b4136d73..a4e028e653186 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
@@ -16,8 +16,7 @@ static_library("X86") {
   ]
   sources = [
     "Target.cpp",
-    "X86Counter.cpp",  # Make `gn format` not collapse this, for
-                       # sync_source_lists_from_cmake.py.
+    "X86Counter.cpp",
   ]
   include_dirs = [ "//llvm/lib/Target/X86" ]
 }

From b16dfbead21a458799a0dab96599eb15f5d9b7ea Mon Sep 17 00:00:00 2001
From: Julian Lettner <julian.lettner@apple.com>
Date: Thu, 4 Jun 2020 12:33:30 -0700
Subject: [PATCH 517/771] [Darwin] Fix OS version checks inside simulators

compiler-rt checks OS versions by querying the Darwin kernel version.
This is not necessarily correct inside the simulators if the simulator
runtime is not aligned with the host macOS.  Let's instead check the
`SIMULATOR_RUNTIME_VERSION` env var.

Note that we still use the old code path as a fallback in case the
`SIMULATOR_RUNTIME_VERSION` environment variable isn't set.

rdar://63031937

Reviewers: delcypher

Differential Revision: https://reviews.llvm.org/D79979
---
 .../lib/sanitizer_common/sanitizer_mac.cpp    | 61 ++++++++++++++-----
 .../tests/sanitizer_mac_test.cpp              | 39 +++++++++---
 2 files changed, 75 insertions(+), 25 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
index 7a3dfbcc27607..eb9c662190e72 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
@@ -606,9 +606,32 @@ HandleSignalMode GetHandleSignalMode(int signum) {
   return result;
 }
 
+void ParseVersion(const char *vers, u16 *major, u16 *minor) {
+  // Format: <major>.<minor>[.<patch>]\0
+  CHECK_GE(internal_strlen(vers), 3);
+  const char *p = vers;
+  *major = internal_simple_strtoll(p, &p, /*base=*/10);
+  CHECK_EQ(*p, '.');
+  p += 1;
+  *minor = internal_simple_strtoll(p, &p, /*base=*/10);
+}
+
+// Aligned versions example:
+// Darwin 19 -- macOS 10.15 -- iOS 13 -- tvOS 13 -- watchOS 6
+static u16 GetDarwinKernelMajorFromOSMajor(u16 os_major) {
+  u16 offset;
+  if (SANITIZER_IOS || SANITIZER_TVOS)
+    offset = 6;
+  else if (SANITIZER_WATCHOS)
+    offset = 13;
+  else  // macOS
+    UNREACHABLE("GetDarwinKernelMajorFromOSMajor() does not support macOS");
+
+  return os_major + offset;
+}
+
 // This corresponds to Triple::getMacOSXVersion() in the Clang driver.
-static MacosVersion GetMacosAlignedVersionInternal() {
-  u16 kernel_major = GetDarwinKernelVersion().major;
+static MacosVersion GetMacosVersionFromDarwinMajor(u16 kernel_major) {
   // Darwin 0-3  -> unsupported
   // Darwin 4-19 -> macOS 10.x
   // Darwin 20+  -> macOS 11+
@@ -624,6 +647,22 @@ static MacosVersion GetMacosAlignedVersionInternal() {
   return MacosVersion(major, minor);
 }
 
+static MacosVersion GetMacosAlignedVersionInternal() {
+  if (SANITIZER_IOSSIM) {
+    if (auto vers = GetEnv("SIMULATOR_RUNTIME_VERSION")) {
+      u16 major, minor;
+      ParseVersion(vers, &major, &minor);
+      u16 kernel_major = GetDarwinKernelMajorFromOSMajor(major);
+      return GetMacosVersionFromDarwinMajor(kernel_major);
+    }
+    Report("WARNING: Running in simulator but SIMULATOR_RUNTIME_VERSION env "
+           "var is not set.\n");
+  }
+
+  u16 kernel_major = GetDarwinKernelVersion().major;
+  return GetMacosVersionFromDarwinMajor(kernel_major);
+}
+
 static_assert(sizeof(MacosVersion) == sizeof(atomic_uint32_t::Type),
               "MacosVersion cache size");
 static atomic_uint32_t cached_macos_version;
@@ -639,24 +678,14 @@ MacosVersion GetMacosAlignedVersion() {
   return *reinterpret_cast<MacosVersion *>(&result);
 }
 
-void ParseVersion(const char *vers, u16 *major, u16 *minor) {
-  // Format: <major>.<minor>.<patch>\0
-  CHECK_GE(internal_strlen(vers), 5);
-  const char *p = vers;
-  *major = internal_simple_strtoll(p, &p, /*base=*/10);
-  CHECK_EQ(*p, '.');
-  p += 1;
-  *minor = internal_simple_strtoll(p, &p, /*base=*/10);
-}
-
 DarwinKernelVersion GetDarwinKernelVersion() {
-  char buf[100];
-  size_t len = sizeof(buf);
-  int res = internal_sysctlbyname("kern.osrelease", buf, &len, nullptr, 0);
+  char vers[100];
+  size_t len = sizeof(vers);
+  int res = internal_sysctlbyname("kern.osrelease", vers, &len, nullptr, 0);
   CHECK_EQ(res, 0);
 
   u16 major, minor;
-  ParseVersion(buf, &major, &minor);
+  ParseVersion(vers, &major, &minor);
 
   return DarwinKernelVersion(major, minor);
 }
diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp
index c8658ea55d034..db5cbce7cbee6 100644
--- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp
@@ -22,6 +22,35 @@
 
 namespace __sanitizer {
 
+void ParseVersion(const char *vers, u16 *major, u16 *minor);
+
+TEST(SanitizerMac, ParseVersion) {
+  u16 major, minor;
+
+  ParseVersion("11.22.33", &major, &minor);
+  EXPECT_EQ(major, 11); EXPECT_EQ(minor, 22);
+
+  ParseVersion("1.2", &major, &minor);
+  EXPECT_EQ(major, 1); EXPECT_EQ(minor, 2);
+}
+
+#if SANITIZER_IOSSIM
+TEST(SanitizerMac, GetMacosAlignedVersion) {
+  const char *vers_str;
+  if (SANITIZER_IOS || SANITIZER_TVOS) {
+    vers_str = "13.0";
+  } else if (SANITIZER_WATCHOS) {
+    vers_str = "6.5";
+  } else {
+    FAIL() << "unsupported simulator runtime";
+  }
+  setenv("SIMULATOR_RUNTIME_VERSION", vers_str, /*overwrite=*/1);
+
+  MacosVersion vers = GetMacosAlignedVersion();
+  EXPECT_EQ(vers.major, 10);
+  EXPECT_EQ(vers.minor, 15);
+}
+#else
 TEST(SanitizerMac, GetMacosAlignedVersion) {
   MacosVersion vers = GetMacosAlignedVersion();
   u16 kernel_major = GetDarwinKernelVersion().major;
@@ -31,15 +60,7 @@ TEST(SanitizerMac, GetMacosAlignedVersion) {
   EXPECT_EQ(vers.major, expected_major);
   EXPECT_EQ(vers.minor, expected_minor);
 }
-
-void ParseVersion(const char *vers, u16 *major, u16 *minor);
-
-TEST(SanitizerMac, ParseVersion) {
-  u16 major, minor;
-  ParseVersion("11.22.33", &major, &minor);
-  EXPECT_EQ(major, 11);
-  EXPECT_EQ(minor, 22);
-}
+#endif
 
 TEST(SanitizerMac, GetDarwinKernelVersion) {
   DarwinKernelVersion vers = GetDarwinKernelVersion();

From 0347039a6e7daa774d016e0a4e0f2568c7913351 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 25 Jun 2020 19:12:55 -0400
Subject: [PATCH 518/771] ValueTracking: Fix isKnownNonZero for non-0 null
 pointers for byval

The IR doesn't have a proper concept of invalid pointers, and "null"
constants are just all zeros (though it really needs one).

I think it's not possible to break this for AMDGPU due to the copy
semantics of byval. If you have an original stack object at 0, the
byval copy will be placed above it so I don't think it's really
possible to hit a 0 address.
---
 llvm/lib/Analysis/ValueTracking.cpp           | 13 ++++++----
 .../null-ptr-is-valid-attribute.ll            | 20 ++++++++++++++++
 .../InstSimplify/null-ptr-is-valid.ll         | 24 +++++++++++++++++++
 3 files changed, 53 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/InstSimplify/null-ptr-is-valid-attribute.ll
 create mode 100644 llvm/test/Transforms/InstSimplify/null-ptr-is-valid.ll

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 43caaa62c2ec5..306f6a4a35ca3 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -2353,15 +2353,20 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
     return false;
 
   // Check for pointer simplifications.
-  if (V->getType()->isPointerTy()) {
+
+  if (PointerType *PtrTy = dyn_cast<PointerType>(V->getType())) {
     // Alloca never returns null, malloc might.
     if (isa<AllocaInst>(V) && Q.DL.getAllocaAddrSpace() == 0)
       return true;
 
-    // A byval, inalloca, or nonnull argument is never null.
-    if (const Argument *A = dyn_cast<Argument>(V))
-      if (A->hasPassPointeeByValueAttr() || A->hasNonNullAttr())
+    // A byval, inalloca may not be null in a non-default addres space. A
+    // nonnull argument is assumed never 0.
+    if (const Argument *A = dyn_cast<Argument>(V)) {
+      if (((A->hasPassPointeeByValueAttr() &&
+            !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
+           A->hasNonNullAttr()))
         return true;
+    }
 
     // A Load tagged with nonnull metadata is never null.
     if (const LoadInst *LI = dyn_cast<LoadInst>(V))
diff --git a/llvm/test/Transforms/InstSimplify/null-ptr-is-valid-attribute.ll b/llvm/test/Transforms/InstSimplify/null-ptr-is-valid-attribute.ll
new file mode 100644
index 0000000000000..8ce04e7d3a9cb
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/null-ptr-is-valid-attribute.ll
@@ -0,0 +1,20 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+; A 0 valued byval pointer may be valid
+define i1 @byval_may_be_zero(i32* byval(i32) %ptr) null_pointer_is_valid {
+; CHECK-LABEL: @byval_may_be_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32* [[PTR:%.*]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp eq i32* %ptr, null
+  ret i1 %cmp
+}
+
+define i1 @nonnull_may_be_zero(i32* nonnull %ptr) null_pointer_is_valid {
+; CHECK-LABEL: @nonnull_may_be_zero(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = icmp eq i32* %ptr, null
+  ret i1 %cmp
+}
diff --git a/llvm/test/Transforms/InstSimplify/null-ptr-is-valid.ll b/llvm/test/Transforms/InstSimplify/null-ptr-is-valid.ll
new file mode 100644
index 0000000000000..7b7fb140e8c36
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/null-ptr-is-valid.ll
@@ -0,0 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instsimplify < %s | FileCheck %s
+
+target datalayout = "A5"
+
+; A 0 valued byval pointer may be valid
+define i1 @byval_may_be_zero(i32 addrspace(5)* byval(i32) %ptr) {
+; CHECK-LABEL: @byval_may_be_zero(
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 addrspace(5)* [[PTR:%.*]], null
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+  %cmp = icmp eq i32 addrspace(5)* %ptr, null
+  ret i1 %cmp
+}
+
+; FIXME: The interpretation of nonnull assumes a 0 pointer value, so
+; this really is an incorrect fold.
+define i1 @nonnull_may_be_zero(i32 addrspace(5)* nonnull %ptr) {
+; CHECK-LABEL: @nonnull_may_be_zero(
+; CHECK-NEXT:    ret i1 false
+;
+  %cmp = icmp eq i32 addrspace(5)* %ptr, null
+  ret i1 %cmp
+}

From 023883a8346076b2869516c7c0c2fd9e1f139acb Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Mon, 29 Jun 2020 15:13:32 -0400
Subject: [PATCH 519/771] IR: Rename Argument::hasPassPointeeByValueAttr to
 prepare for byref

When the byref attribute is added, there will need to be two similar
functions for the existing cases which have an associate value copy,
and byref which does not. Most, but not all of the existing uses will
use the existing version.

The associated size function added by D82679 also needs to
contextually differ, and will help eliminate a few places still
relying on pointee element types.
---
 llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h     | 2 +-
 llvm/include/llvm/IR/Argument.h                       | 5 +++--
 llvm/lib/Analysis/MemoryBuiltins.cpp                  | 2 +-
 llvm/lib/Analysis/ValueTracking.cpp                   | 2 +-
 llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 2 +-
 llvm/lib/IR/Function.cpp                              | 2 +-
 llvm/lib/IR/Mangler.cpp                               | 2 +-
 llvm/lib/Target/ARM/ARMCallLowering.cpp               | 2 +-
 llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp   | 2 +-
 llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp   | 4 ++--
 llvm/lib/Transforms/Utils/InlineFunction.cpp          | 2 +-
 11 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
index cad1c52f7f879..945d41c376779 100644
--- a/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
+++ b/llvm/include/llvm/Analysis/ObjCARCAnalysisUtils.h
@@ -146,7 +146,7 @@ inline bool IsPotentialRetainableObjPtr(const Value *Op) {
     return false;
   // Special arguments can not be a valid retainable object pointer.
   if (const Argument *Arg = dyn_cast<Argument>(Op))
-    if (Arg->hasPassPointeeByValueAttr() || Arg->hasNestAttr() ||
+    if (Arg->hasPassPointeeByValueCopyAttr() || Arg->hasNestAttr() ||
         Arg->hasStructRetAttr())
       return false;
   // Only consider values with pointer types.
diff --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index af469e8a5d1aa..2ca18c6103495 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -72,8 +72,9 @@ class Argument final : public Value {
   bool hasSwiftErrorAttr() const;
 
   /// Return true if this argument has the byval, inalloca, or preallocated
-  /// attribute. These attributes represent arguments being passed by value.
-  bool hasPassPointeeByValueAttr() const;
+  /// attribute. These attributes represent arguments being passed by value,
+  /// with an associated copy between the caller and callee
+  bool hasPassPointeeByValueCopyAttr() const;
 
   /// If this argument satisfies has hasPassPointeeByValueAttr, return the
   /// in-memory ABI size copied to the stack for the call. Otherwise, return 0.
diff --git a/llvm/lib/Analysis/MemoryBuiltins.cpp b/llvm/lib/Analysis/MemoryBuiltins.cpp
index 0b61b1c0eabd7..204f855d28b33 100644
--- a/llvm/lib/Analysis/MemoryBuiltins.cpp
+++ b/llvm/lib/Analysis/MemoryBuiltins.cpp
@@ -677,7 +677,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) {
 
 SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) {
   // No interprocedural analysis is done at the moment.
-  if (!A.hasPassPointeeByValueAttr()) {
+  if (!A.hasPassPointeeByValueCopyAttr()) {
     ++ObjectVisitorArgument;
     return unknown();
   }
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 306f6a4a35ca3..bc8f02972de82 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -2362,7 +2362,7 @@ bool isKnownNonZero(const Value *V, const APInt &DemandedElts, unsigned Depth,
     // A byval, inalloca may not be null in a non-default addres space. A
     // nonnull argument is assumed never 0.
     if (const Argument *A = dyn_cast<Argument>(V)) {
-      if (((A->hasPassPointeeByValueAttr() &&
+      if (((A->hasPassPointeeByValueCopyAttr() &&
             !NullPointerIsDefined(A->getParent(), PtrTy->getAddressSpace())) ||
            A->hasNonNullAttr()))
         return true;
diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 1d596c89c9113..c0f84055cfceb 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -9571,7 +9571,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
     // initializes the alloca. Don't elide copies from the same argument twice.
     const Value *Val = SI->getValueOperand()->stripPointerCasts();
     const auto *Arg = dyn_cast<Argument>(Val);
-    if (!Arg || Arg->hasPassPointeeByValueAttr() ||
+    if (!Arg || Arg->hasPassPointeeByValueCopyAttr() ||
         Arg->getType()->isEmptyTy() ||
         DL.getTypeStoreSize(Arg->getType()) !=
             DL.getTypeAllocSize(AI->getAllocatedType()) ||
diff --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index 10d535e3ab113..8db2389ef5428 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -121,7 +121,7 @@ bool Argument::hasPreallocatedAttr() const {
   return hasAttribute(Attribute::Preallocated);
 }
 
-bool Argument::hasPassPointeeByValueAttr() const {
+bool Argument::hasPassPointeeByValueCopyAttr() const {
   if (!getType()->isPointerTy()) return false;
   AttributeList Attrs = getParent()->getAttributes();
   return Attrs.hasParamAttribute(getArgNo(), Attribute::ByVal) ||
diff --git a/llvm/lib/IR/Mangler.cpp b/llvm/lib/IR/Mangler.cpp
index 0d66e321c396b..218c12ba433f8 100644
--- a/llvm/lib/IR/Mangler.cpp
+++ b/llvm/lib/IR/Mangler.cpp
@@ -100,7 +100,7 @@ static void addByteCountSuffix(raw_ostream &OS, const Function *F,
   for (Function::const_arg_iterator AI = F->arg_begin(), AE = F->arg_end();
        AI != AE; ++AI) {
     // 'Dereference' type in case of byval or inalloca parameter attribute.
-    uint64_t AllocSize = AI->hasPassPointeeByValueAttr() ?
+    uint64_t AllocSize = AI->hasPassPointeeByValueCopyAttr() ?
       AI->getPassPointeeByValueCopySize(DL) :
       DL.getTypeAllocSize(AI->getType());
 
diff --git a/llvm/lib/Target/ARM/ARMCallLowering.cpp b/llvm/lib/Target/ARM/ARMCallLowering.cpp
index d860473011e77..4f1410eecff4f 100644
--- a/llvm/lib/Target/ARM/ARMCallLowering.cpp
+++ b/llvm/lib/Target/ARM/ARMCallLowering.cpp
@@ -435,7 +435,7 @@ bool ARMCallLowering::lowerFormalArguments(
   for (auto &Arg : F.args()) {
     if (!isSupportedType(DL, TLI, Arg.getType()))
       return false;
-    if (Arg.hasPassPointeeByValueAttr())
+    if (Arg.hasPassPointeeByValueCopyAttr())
       return false;
   }
 
diff --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 54c51b6e7161b..af5f72f6b6365 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -289,7 +289,7 @@ bool DeadArgumentEliminationPass::RemoveDeadArgumentsFromCallers(Function &Fn) {
 
   for (Argument &Arg : Fn.args()) {
     if (!Arg.hasSwiftErrorAttr() && Arg.use_empty() &&
-        !Arg.hasPassPointeeByValueAttr()) {
+        !Arg.hasPassPointeeByValueCopyAttr()) {
       if (Arg.isUsedByMetadata()) {
         Arg.replaceAllUsesWith(UndefValue::get(Arg.getType()));
         Changed = true;
diff --git a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 8c044ed0b9813..258fd5b9454f0 100644
--- a/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/llvm/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -848,7 +848,7 @@ static bool handleEndBlock(BasicBlock &BB, AliasAnalysis *AA,
   // Treat byval or inalloca arguments the same, stores to them are dead at the
   // end of the function.
   for (Argument &AI : BB.getParent()->args())
-    if (AI.hasPassPointeeByValueAttr())
+    if (AI.hasPassPointeeByValueCopyAttr())
       DeadStackObjects.insert(&AI);
 
   const DataLayout &DL = BB.getModule()->getDataLayout();
@@ -1563,7 +1563,7 @@ struct DSEState {
     // Treat byval or inalloca arguments the same as Allocas, stores to them are
     // dead at the end of the function.
     for (Argument &AI : F.args())
-      if (AI.hasPassPointeeByValueAttr()) {
+      if (AI.hasPassPointeeByValueCopyAttr()) {
         // For byval, the caller doesn't know the address of the allocation.
         if (AI.hasByValAttr())
           State.InvisibleToCallerBeforeRet.insert(&AI);
diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp
index b0b7ca4847980..5c9cb1245d01c 100644
--- a/llvm/lib/Transforms/Utils/InlineFunction.cpp
+++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp
@@ -1245,7 +1245,7 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) {
   Function *CalledFunc = CB.getCalledFunction();
   for (Argument &Arg : CalledFunc->args()) {
     unsigned Align = Arg.getType()->isPointerTy() ? Arg.getParamAlignment() : 0;
-    if (Align && !Arg.hasPassPointeeByValueAttr() && !Arg.hasNUses(0)) {
+    if (Align && !Arg.hasPassPointeeByValueCopyAttr() && !Arg.hasNUses(0)) {
       if (!DTCalculated) {
         DT.recalculate(*CB.getCaller());
         DTCalculated = true;

From d909764cc763ae4cdf2125e0adfe288afa829f5b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 9 Jul 2020 21:02:25 -0400
Subject: [PATCH 520/771] Use findEnumAttribute helper for preallocated

---
 llvm/lib/IR/Attributes.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index f67d96a854f4d..d57597a55dc8a 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -925,14 +925,13 @@ MaybeAlign AttributeSetNode::getStackAlignment() const {
 Type *AttributeSetNode::getByValType() const {
   if (auto A = findEnumAttribute(Attribute::ByVal))
     return A->getValueAsType();
-  return 0;
+  return nullptr;
 }
 
 Type *AttributeSetNode::getPreallocatedType() const {
-  for (const auto &I : *this)
-    if (I.hasAttribute(Attribute::Preallocated))
-      return I.getValueAsType();
-  return 0;
+  if (auto A = findEnumAttribute(Attribute::Preallocated))
+    return A->getValueAsType();
+  return nullptr;
 }
 
 uint64_t AttributeSetNode::getDereferenceableBytes() const {

From ef658ebd6292f2c555ad774d68705d307c1f2fbf Mon Sep 17 00:00:00 2001
From: Denis Antrushin <dantrushin@gmail.com>
Date: Wed, 10 Jun 2020 19:53:54 +0700
Subject: [PATCH 521/771] MIR Statepoint refactoring. Part 1: Basic MI level
 changes.

Basic support for variadic-def MIR Statepoint:
- Change TableGen STATEPOINT description to variadic out list
  (For self-documentation purpose; by itself it does not affect
  code generation in any way).
- Update StatepointOpers helper class to handle variadic defs.
- Update MachineVerifier to properly handle them, too.

With this change, new Statepoint instruction can be passed through
backend (excluding ISEL) without errors.

Full change set is available at D81603.
Reviewed By: reames
Differential Revision: https://reviews.llvm.org/D81645
---
 llvm/include/llvm/CodeGen/StackMaps.h     |  19 +--
 llvm/include/llvm/Target/Target.td        |   2 +-
 llvm/lib/CodeGen/MachineVerifier.cpp      |   3 +
 llvm/test/CodeGen/X86/statepoint-vreg.mir | 156 ++++++++++++++++++++++
 4 files changed, 171 insertions(+), 9 deletions(-)
 create mode 100644 llvm/test/CodeGen/X86/statepoint-vreg.mir

diff --git a/llvm/include/llvm/CodeGen/StackMaps.h b/llvm/include/llvm/CodeGen/StackMaps.h
index e33ee226e41a5..ce4eb85d64525 100644
--- a/llvm/include/llvm/CodeGen/StackMaps.h
+++ b/llvm/include/llvm/CodeGen/StackMaps.h
@@ -166,21 +166,23 @@ class StatepointOpers {
   enum { CCOffset = 1, FlagsOffset = 3, NumDeoptOperandsOffset = 5 };
 
 public:
-  explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {}
+  explicit StatepointOpers(const MachineInstr *MI) : MI(MI) {
+    NumDefs = MI->getNumDefs();
+  }
 
   /// Get index of statepoint ID operand.
-  unsigned getIDPos() const { return IDPos; }
+  unsigned getIDPos() const { return NumDefs + IDPos; }
 
   /// Get index of Num Patch Bytes operand.
-  unsigned getNBytesPos() const { return NBytesPos; }
+  unsigned getNBytesPos() const { return NumDefs + NBytesPos; }
 
   /// Get index of Num Call Arguments operand.
-  unsigned getNCallArgsPos() const { return NCallArgsPos; }
+  unsigned getNCallArgsPos() const { return NumDefs + NCallArgsPos; }
 
   /// Get starting index of non call related arguments
   /// (calling convention, statepoint flags, vm state and gc state).
   unsigned getVarIdx() const {
-    return MI->getOperand(NCallArgsPos).getImm() + MetaEnd;
+    return MI->getOperand(NumDefs + NCallArgsPos).getImm() + MetaEnd + NumDefs;
   }
 
   /// Get index of Calling Convention operand.
@@ -195,16 +197,16 @@ class StatepointOpers {
   }
 
   /// Return the ID for the given statepoint.
-  uint64_t getID() const { return MI->getOperand(IDPos).getImm(); }
+  uint64_t getID() const { return MI->getOperand(NumDefs + IDPos).getImm(); }
 
   /// Return the number of patchable bytes the given statepoint should emit.
   uint32_t getNumPatchBytes() const {
-    return MI->getOperand(NBytesPos).getImm();
+    return MI->getOperand(NumDefs + NBytesPos).getImm();
   }
 
   /// Return the target of the underlying call.
   const MachineOperand &getCallTarget() const {
-    return MI->getOperand(CallTargetPos);
+    return MI->getOperand(NumDefs + CallTargetPos);
   }
 
   /// Return the calling convention.
@@ -217,6 +219,7 @@ class StatepointOpers {
 
 private:
   const MachineInstr *MI;
+  unsigned NumDefs;
 };
 
 class StackMaps {
diff --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index aab5376db4535..16a817980f7c6 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1157,7 +1157,7 @@ def PATCHPOINT : StandardPseudoInstruction {
   let usesCustomInserter = 1;
 }
 def STATEPOINT : StandardPseudoInstruction {
-  let OutOperandList = (outs);
+  let OutOperandList = (outs variable_ops);
   let InOperandList = (ins variable_ops);
   let usesCustomInserter = 1;
   let mayLoad = 1;
diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index c1a2c4e0bc6e6..63f534f20a711 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1565,6 +1565,9 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   if (MCID.getOpcode() == TargetOpcode::PATCHPOINT)
     NumDefs = (MONum == 0 && MO->isReg()) ? NumDefs : 0;
 
+  if (MCID.getOpcode() == TargetOpcode::STATEPOINT)
+    NumDefs = MI->getNumDefs();
+
   // The first MCID.NumDefs operands must be explicit register defines
   if (MONum < NumDefs) {
     const MCOperandInfo &MCOI = MCID.OpInfo[MONum];
diff --git a/llvm/test/CodeGen/X86/statepoint-vreg.mir b/llvm/test/CodeGen/X86/statepoint-vreg.mir
new file mode 100644
index 0000000000000..311a71205f2aa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/statepoint-vreg.mir
@@ -0,0 +1,156 @@
+# NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+# RUN: llc -o - %s -start-after=finalize-isel | FileCheck %s
+
+--- |
+  ; ModuleID = 'test/CodeGen/X86/statepoint-vreg.ll'
+  source_filename = "test/CodeGen/X86/statepoint-vreg.ll"
+  target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+  target triple = "x86_64-pc-linux-gnu"
+
+  declare void @bar()
+
+  define i32 @test_basic(i32 addrspace(1)* %obj1, i32 addrspace(1)* %obj2) gc "statepoint-example" {
+  ; CHECK-LABEL: test_basic:
+  ; CHECK:       # %bb.0:
+  ; CHECK-NEXT:    pushq %r14
+  ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+  ; CHECK-NEXT:    pushq %rbx
+  ; CHECK-NEXT:    .cfi_def_cfa_offset 24
+  ; CHECK-NEXT:    pushq %rax
+  ; CHECK-NEXT:    .cfi_def_cfa_offset 32
+  ; CHECK-NEXT:    .cfi_offset %rbx, -24
+  ; CHECK-NEXT:    .cfi_offset %r14, -16
+  ; CHECK-NEXT:    movq %rsi, %r14
+  ; CHECK-NEXT:    movq %rdi, %rbx
+  ; CHECK-NEXT:    callq bar
+  ; CHECK-NEXT:  .Ltmp0:
+  ; CHECK-NEXT:    movl (%rbx), %eax
+  ; CHECK-NEXT:    addl (%r14), %eax
+  ; CHECK-NEXT:    addq $8, %rsp
+  ; CHECK-NEXT:    .cfi_def_cfa_offset 24
+  ; CHECK-NEXT:    popq %rbx
+  ; CHECK-NEXT:    .cfi_def_cfa_offset 16
+  ; CHECK-NEXT:    popq %r14
+  ; CHECK-NEXT:    .cfi_def_cfa_offset 8
+  ; CHECK-NEXT:    retq
+    %token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @bar, i32 0, i32 0, i32 0, i32 0) [ "gc-live"(i32 addrspace(1)* %obj1, i32 addrspace(1)* %obj2) ]
+    %rel1 = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 0, i32 0) ; (%obj1, %obj1)
+    %rel2 = call coldcc i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %token, i32 1, i32 1) ; (%obj2, %obj2)
+    %a = load i32, i32 addrspace(1)* %rel1, align 4
+    %b = load i32, i32 addrspace(1)* %rel2, align 4
+    %c = add i32 %a, %b
+    ret i32 %c
+  }
+
+  ; CHECK-LABEL:  __LLVM_StackMaps:
+  ; CHECK-NEXT:    .byte	3
+  ; CHECK-NEXT:    .byte	0
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .long	1
+  ; CHECK-NEXT:    .long	0
+  ; CHECK-NEXT:    .long	1
+  ; CHECK-NEXT:    .quad	test_basic
+  ; CHECK-NEXT:    .quad	24
+  ; CHECK-NEXT:    .quad	1
+  ; CHECK-NEXT:    .quad	2882400000
+  ; CHECK-NEXT:    .long	.Ltmp0-test_basic
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .short	8
+  ; CHECK-NEXT:    .byte	4
+  ; CHECK-NEXT:    .byte	0
+  ; CHECK-NEXT:    .short	8
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .long	0
+  ; CHECK-NEXT:    .byte	4
+  ; CHECK-NEXT:    .byte	0
+  ; CHECK-NEXT:    .short	8
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .long	0
+  ; CHECK-NEXT:    .byte	4
+  ; CHECK-NEXT:    .byte	0
+  ; CHECK-NEXT:    .short	8
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .long	1
+  ; CHECK-NEXT:    .byte	4
+  ; CHECK-NEXT:    .byte	0
+  ; CHECK-NEXT:    .short	8
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .long	0
+  ; CHECK-NEXT:    .byte	1
+  ; CHECK-NEXT:    .byte	0
+  ; CHECK-NEXT:    .short	8
+  ; CHECK-NEXT:    .short	14
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .long	0
+  ; CHECK-NEXT:    .byte	1
+  ; CHECK-NEXT:    .byte	0
+  ; CHECK-NEXT:    .short	8
+  ; CHECK-NEXT:    .short	14
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .long	0
+  ; CHECK-NEXT:    .byte	1
+  ; CHECK-NEXT:    .byte	0
+  ; CHECK-NEXT:    .short	8
+  ; CHECK-NEXT:    .short	3
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .long	0
+  ; CHECK-NEXT:    .byte	1
+  ; CHECK-NEXT:    .byte	0
+  ; CHECK-NEXT:    .short	8
+  ; CHECK-NEXT:    .short	3
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .long	0
+  ; CHECK-NEXT:    .p2align	3
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .short	0
+  ; CHECK-NEXT:    .p2align	3
+
+  declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 immarg, i32 immarg, void ()*, i32 immarg, i32 immarg, ...)
+
+  ; Function Attrs: nounwind readonly
+  declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32 immarg, i32 immarg) #0
+
+  attributes #0 = { nounwind readonly }
+  attributes #1 = { nounwind }
+
+...
+---
+name:            test_basic
+alignment:       16
+selected:        false
+failedISel:      false
+tracksRegLiveness: true
+registers:
+  - { id: 0, class: gr64, preferred-register: '' }
+  - { id: 1, class: gr64, preferred-register: '' }
+  - { id: 2, class: gr64, preferred-register: '' }
+  - { id: 3, class: gr64, preferred-register: '' }
+  - { id: 4, class: gr32, preferred-register: '' }
+  - { id: 5, class: gr32, preferred-register: '' }
+liveins:
+  - { reg: '$rdi', virtual-reg: '%0' }
+  - { reg: '$rsi', virtual-reg: '%1' }
+fixedStack:      []
+stack:           []
+callSites:       []
+constants:       []
+machineFunctionInfo: {}
+body:             |
+  bb.0 (%ir-block.0):
+    liveins: $rdi, $rsi
+
+    %1:gr64 = COPY $rsi
+    %0:gr64 = COPY $rdi
+    ADJCALLSTACKDOWN64 0, 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %2:gr64, %3:gr64 = STATEPOINT 2882400000, 0, 0, @bar, 2, 0, 2, 0, 2, 1, 2, 0, %1, %1(tied-def 0), %0, %0(tied-def 1), csr_64, implicit-def $rsp, implicit-def $ssp
+    ADJCALLSTACKUP64 0, 0, implicit-def dead $rsp, implicit-def dead $eflags, implicit-def dead $ssp, implicit $rsp, implicit $ssp
+    %4:gr32 = MOV32rm killed %3, 1, $noreg, 0, $noreg :: (load 4 from %ir.rel1, addrspace 1)
+    %5:gr32 = ADD32rm %4, killed %2, 1, $noreg, 0, $noreg, implicit-def dead $eflags :: (load 4 from %ir.rel2, addrspace 1)
+    $eax = COPY %5
+    RET 0, $eax
+
+...

From fc47c0e0a6a2681154efa9d31b8605fc91a62daa Mon Sep 17 00:00:00 2001
From: George Rokos <georgios.rokos@intel.com>
Date: Thu, 16 Jul 2020 11:02:55 -0700
Subject: [PATCH 522/771] [clang] Fix compilation warnings in OpenMP declare
 mapper codegen.

This patch fixes the compilation warnings that L is not a reference.
Thanks to Lingda Li for providing the patch.

Differential Revision: https://reviews.llvm.org/D83959
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 4fecd89d2bc53..89f403f2c82f4 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -8008,12 +8008,12 @@ class MappableExprsHandler {
                 C->isImplicit(), std::get<2>(L));
       }
     for (const auto *C : CurExecDir->getClausesOfKind<OMPToClause>())
-      for (const auto &L : C->component_lists()) {
+      for (const auto L : C->component_lists()) {
         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_to, llvm::None,
                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
       }
     for (const auto *C : CurExecDir->getClausesOfKind<OMPFromClause>())
-      for (const auto &L : C->component_lists()) {
+      for (const auto L : C->component_lists()) {
         InfoGen(std::get<0>(L), std::get<1>(L), OMPC_MAP_from, llvm::None,
                 /*ReturnDevicePointer=*/false, C->isImplicit(), std::get<2>(L));
       }
@@ -8029,7 +8029,7 @@ class MappableExprsHandler {
 
     for (const auto *C :
          CurExecDir->getClausesOfKind<OMPUseDevicePtrClause>()) {
-      for (const auto &L : C->component_lists()) {
+      for (const auto L : C->component_lists()) {
         OMPClauseMappableExprCommon::MappableExprComponentListRef Components =
             std::get<1>(L);
         assert(!Components.empty() &&

From 9d3e56e2eea1849e4b8bd9a46ceb41b9ee2aa017 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Tue, 14 Jul 2020 08:19:39 -0400
Subject: [PATCH 523/771] DAG: Try scalarizing when expanding saturating
 add/sub

In an upcoming AMDGPU patch, the scalar cases will be legal and vector
ops should be scalarized, rather than producing a long sequence of
vector ops which will also need to be scalarized.

Use a lazy heuristic that seems to work and improves the thumb2 MVE
test.
---
 .../CodeGen/SelectionDAG/TargetLowering.cpp   |   5 +
 .../CodeGen/Thumb2/mve-saturating-arith.ll    | 381 ++++++++----------
 2 files changed, 168 insertions(+), 218 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 3c989a933c48e..d140b15067a6e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -7331,6 +7331,11 @@ SDValue TargetLowering::expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const {
                      "addition or subtraction node.");
   }
 
+  // FIXME: Should really try to split the vector in case it's legal on a
+  // subvector.
+  if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
+    return DAG.UnrollVectorOp(Node);
+
   unsigned BitWidth = LHS.getScalarValueSizeInBits();
   EVT BoolVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
   SDValue Result = DAG.getNode(OverflowOp, dl, DAG.getVTList(VT, BoolVT),
diff --git a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll
index 8457a3ab7a169..7313cb66c9c9b 100644
--- a/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-saturating-arith.ll
@@ -34,78 +34,67 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @sadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
 ; CHECK-LABEL: sadd_int64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    push.w {r4, r5, r6, r7, r8, lr}
-; CHECK-NEXT:    vmov r0, s4
-; CHECK-NEXT:    vmov r5, s0
-; CHECK-NEXT:    vmov r8, s5
-; CHECK-NEXT:    vmov r4, s1
-; CHECK-NEXT:    vmov r7, s2
-; CHECK-NEXT:    vmov r3, s7
-; CHECK-NEXT:    vmov r6, s3
-; CHECK-NEXT:    adds.w r12, r5, r0
-; CHECK-NEXT:    adc.w r0, r4, r8
-; CHECK-NEXT:    asrs r2, r0, #31
-; CHECK-NEXT:    vmov.32 q2[0], r2
-; CHECK-NEXT:    vmov.32 q2[1], r2
-; CHECK-NEXT:    vmov r2, s6
-; CHECK-NEXT:    adds.w lr, r7, r2
-; CHECK-NEXT:    adc.w r2, r6, r3
-; CHECK-NEXT:    subs.w r5, r12, r5
-; CHECK-NEXT:    sbcs.w r4, r0, r4
-; CHECK-NEXT:    asr.w r1, r2, #31
-; CHECK-NEXT:    mov.w r4, #0
-; CHECK-NEXT:    vmov.32 q2[2], r1
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r4, #1
-; CHECK-NEXT:    vmov.32 q2[3], r1
-; CHECK-NEXT:    adr r1, .LCPI3_0
-; CHECK-NEXT:    vldrw.u32 q0, [r1]
-; CHECK-NEXT:    adr r1, .LCPI3_1
-; CHECK-NEXT:    vldrw.u32 q1, [r1]
-; CHECK-NEXT:    cmp r4, #0
-; CHECK-NEXT:    vbic q0, q0, q2
-; CHECK-NEXT:    csetm r4, ne
-; CHECK-NEXT:    vand q1, q1, q2
-; CHECK-NEXT:    movs r1, #0
-; CHECK-NEXT:    vorr q0, q1, q0
-; CHECK-NEXT:    vmov.32 q1[0], r4
-; CHECK-NEXT:    vmov.32 q1[1], r4
-; CHECK-NEXT:    subs.w r4, lr, r7
-; CHECK-NEXT:    sbcs.w r4, r2, r6
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
-; CHECK-NEXT:    vmov.32 q1[2], r1
-; CHECK-NEXT:    vmov.32 q1[3], r1
-; CHECK-NEXT:    asr.w r1, r8, #31
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vmov r0, s5
+; CHECK-NEXT:    vmov r2, s1
+; CHECK-NEXT:    vmov lr, s4
+; CHECK-NEXT:    vmov r4, s2
+; CHECK-NEXT:    cmp.w r0, #-1
+; CHECK-NEXT:    cset r1, gt
+; CHECK-NEXT:    cmp.w r2, #-1
+; CHECK-NEXT:    cset r3, gt
+; CHECK-NEXT:    cmp r3, r1
+; CHECK-NEXT:    vmov r1, s0
+; CHECK-NEXT:    cset r12, eq
+; CHECK-NEXT:    adds.w r1, r1, lr
+; CHECK-NEXT:    adcs r2, r0
+; CHECK-NEXT:    cmp.w r2, #-1
+; CHECK-NEXT:    cset r0, gt
+; CHECK-NEXT:    cmp r3, r0
+; CHECK-NEXT:    cset r0, ne
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    and.w r0, r0, r12
+; CHECK-NEXT:    mvn r12, #-2147483648
+; CHECK-NEXT:    and r3, r0, #1
+; CHECK-NEXT:    cset r0, mi
+; CHECK-NEXT:    tst.w r0, #1
+; CHECK-NEXT:    cinv r0, r12, eq
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    asrne r1, r2, #31
+; CHECK-NEXT:    csel r0, r0, r2, ne
 ; CHECK-NEXT:    vmov.32 q2[0], r1
-; CHECK-NEXT:    vmov.32 q2[1], r1
-; CHECK-NEXT:    asrs r1, r3, #31
-; CHECK-NEXT:    vmov.32 q2[2], r1
-; CHECK-NEXT:    vmov.32 q2[3], r1
-; CHECK-NEXT:    veor q1, q2, q1
-; CHECK-NEXT:    vmov.32 q2[0], r12
+; CHECK-NEXT:    vmov r2, s3
 ; CHECK-NEXT:    vmov.32 q2[1], r0
-; CHECK-NEXT:    vand q0, q0, q1
-; CHECK-NEXT:    vmov.32 q2[2], lr
-; CHECK-NEXT:    vmov.32 q2[3], r2
-; CHECK-NEXT:    vbic q1, q2, q1
-; CHECK-NEXT:    vorr q0, q0, q1
-; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, pc}
-; CHECK-NEXT:    .p2align 4
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI3_0:
-; CHECK-NEXT:    .long 0 @ 0x0
-; CHECK-NEXT:    .long 2147483648 @ 0x80000000
-; CHECK-NEXT:    .long 0 @ 0x0
-; CHECK-NEXT:    .long 2147483648 @ 0x80000000
-; CHECK-NEXT:  .LCPI3_1:
-; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
-; CHECK-NEXT:    .long 2147483647 @ 0x7fffffff
-; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
-; CHECK-NEXT:    .long 2147483647 @ 0x7fffffff
+; CHECK-NEXT:    vmov r0, s7
+; CHECK-NEXT:    cmp.w r0, #-1
+; CHECK-NEXT:    cset r1, gt
+; CHECK-NEXT:    cmp.w r2, #-1
+; CHECK-NEXT:    cset r3, gt
+; CHECK-NEXT:    cmp r3, r1
+; CHECK-NEXT:    vmov r1, s6
+; CHECK-NEXT:    cset lr, eq
+; CHECK-NEXT:    adds r1, r1, r4
+; CHECK-NEXT:    adcs r0, r2
+; CHECK-NEXT:    cmp.w r0, #-1
+; CHECK-NEXT:    cset r2, gt
+; CHECK-NEXT:    cmp r3, r2
+; CHECK-NEXT:    cset r2, ne
+; CHECK-NEXT:    and.w r2, r2, lr
+; CHECK-NEXT:    ands r2, r2, #1
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    asrne r1, r0, #31
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vmov.32 q2[2], r1
+; CHECK-NEXT:    cset r1, mi
+; CHECK-NEXT:    tst.w r1, #1
+; CHECK-NEXT:    cinv r1, r12, eq
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r0, r1, r0, ne
+; CHECK-NEXT:    vmov.32 q2[3], r0
+; CHECK-NEXT:    vmov q0, q2
+; CHECK-NEXT:    pop {r4, pc}
 entry:
   %0 = call <2 x i64> @llvm.sadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
   ret <2 x i64> %0
@@ -144,44 +133,34 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @uadd_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
 ; CHECK-LABEL: uadd_int64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    vmov r2, s4
+; CHECK-NEXT:    mov.w r12, #0
 ; CHECK-NEXT:    vmov r3, s0
 ; CHECK-NEXT:    vmov r0, s5
 ; CHECK-NEXT:    vmov r1, s1
-; CHECK-NEXT:    vmov r4, s2
-; CHECK-NEXT:    adds.w lr, r3, r2
+; CHECK-NEXT:    adds r2, r2, r3
+; CHECK-NEXT:    vmov r3, s2
+; CHECK-NEXT:    adcs r0, r1
+; CHECK-NEXT:    adcs r1, r12, #0
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movne.w r0, #-1
+; CHECK-NEXT:    movne.w r2, #-1
+; CHECK-NEXT:    vmov.32 q2[0], r2
 ; CHECK-NEXT:    vmov r2, s6
-; CHECK-NEXT:    adc.w r12, r1, r0
-; CHECK-NEXT:    subs.w r3, lr, r3
-; CHECK-NEXT:    sbcs.w r1, r12, r1
-; CHECK-NEXT:    vmov r3, s3
-; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    mov.w r0, #0
-; CHECK-NEXT:    it lo
-; CHECK-NEXT:    movlo r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
-; CHECK-NEXT:    vmov.32 q0[0], lr
-; CHECK-NEXT:    vmov.32 q2[0], r1
-; CHECK-NEXT:    vmov.32 q0[1], r12
-; CHECK-NEXT:    vmov.32 q2[1], r1
-; CHECK-NEXT:    vmov r1, s7
-; CHECK-NEXT:    adds r2, r2, r4
-; CHECK-NEXT:    vmov.32 q0[2], r2
-; CHECK-NEXT:    adcs r1, r3
-; CHECK-NEXT:    subs r4, r2, r4
-; CHECK-NEXT:    sbcs.w r3, r1, r3
-; CHECK-NEXT:    it lo
-; CHECK-NEXT:    movlo r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vmov.32 q0[3], r1
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    vmov.32 q2[2], r0
+; CHECK-NEXT:    vmov.32 q2[1], r0
+; CHECK-NEXT:    vmov r0, s7
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    adds r2, r2, r3
+; CHECK-NEXT:    adcs r0, r1
+; CHECK-NEXT:    adcs r1, r12, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r2, #-1
+; CHECK-NEXT:    vmov.32 q2[2], r2
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne.w r0, #-1
 ; CHECK-NEXT:    vmov.32 q2[3], r0
-; CHECK-NEXT:    vorr q0, q0, q2
-; CHECK-NEXT:    pop {r4, pc}
+; CHECK-NEXT:    vmov q0, q2
+; CHECK-NEXT:    bx lr
 entry:
   %0 = call <2 x i64> @llvm.uadd.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
   ret <2 x i64> %0
@@ -221,93 +200,67 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @ssub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
 ; CHECK-LABEL: ssub_int64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, r5, r6, lr}
-; CHECK-NEXT:    push {r4, r5, r6, lr}
-; CHECK-NEXT:    .vsave {d8, d9}
-; CHECK-NEXT:    vpush {d8, d9}
-; CHECK-NEXT:    vmov r2, s4
-; CHECK-NEXT:    movs r0, #0
-; CHECK-NEXT:    vmov lr, s5
-; CHECK-NEXT:    vmov r12, s7
-; CHECK-NEXT:    vmov r5, s0
-; CHECK-NEXT:    vmov r4, s1
-; CHECK-NEXT:    rsbs r3, r2, #0
-; CHECK-NEXT:    sbcs.w r3, r0, lr
-; CHECK-NEXT:    mov.w r3, #0
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r3, #1
-; CHECK-NEXT:    cmp r3, #0
-; CHECK-NEXT:    csetm r3, ne
-; CHECK-NEXT:    vmov.32 q2[0], r3
-; CHECK-NEXT:    vmov.32 q2[1], r3
-; CHECK-NEXT:    vmov r3, s6
-; CHECK-NEXT:    rsbs r1, r3, #0
-; CHECK-NEXT:    sbcs.w r1, r0, r12
-; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
-; CHECK-NEXT:    subs r6, r5, r2
-; CHECK-NEXT:    vmov.32 q2[2], r1
-; CHECK-NEXT:    vmov.32 q2[3], r1
-; CHECK-NEXT:    sbc.w r1, r4, lr
-; CHECK-NEXT:    subs r5, r6, r5
-; CHECK-NEXT:    sbcs.w r5, r1, r4
+; CHECK-NEXT:    .save {r4, lr}
+; CHECK-NEXT:    push {r4, lr}
+; CHECK-NEXT:    vmov r0, s5
+; CHECK-NEXT:    vmov r2, s1
+; CHECK-NEXT:    vmov lr, s4
 ; CHECK-NEXT:    vmov r4, s2
-; CHECK-NEXT:    mov.w r5, #0
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r5, #1
-; CHECK-NEXT:    cmp r5, #0
-; CHECK-NEXT:    csetm r5, ne
-; CHECK-NEXT:    vmov.32 q1[0], r5
-; CHECK-NEXT:    vmov.32 q1[1], r5
-; CHECK-NEXT:    vmov r5, s3
-; CHECK-NEXT:    subs r3, r4, r3
-; CHECK-NEXT:    sbc.w r2, r5, r12
-; CHECK-NEXT:    subs r4, r3, r4
-; CHECK-NEXT:    sbcs.w r5, r2, r5
-; CHECK-NEXT:    it lt
-; CHECK-NEXT:    movlt r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    vmov.32 q1[2], r0
-; CHECK-NEXT:    vmov.32 q1[3], r0
-; CHECK-NEXT:    asrs r0, r1, #31
-; CHECK-NEXT:    veor q0, q2, q1
-; CHECK-NEXT:    vmov.32 q2[0], r0
+; CHECK-NEXT:    cmp.w r0, #-1
+; CHECK-NEXT:    cset r1, gt
+; CHECK-NEXT:    cmp.w r2, #-1
+; CHECK-NEXT:    cset r3, gt
+; CHECK-NEXT:    cmp r3, r1
+; CHECK-NEXT:    vmov r1, s0
+; CHECK-NEXT:    cset r12, ne
+; CHECK-NEXT:    subs.w r1, r1, lr
+; CHECK-NEXT:    sbcs r2, r0
+; CHECK-NEXT:    cmp.w r2, #-1
+; CHECK-NEXT:    cset r0, gt
+; CHECK-NEXT:    cmp r3, r0
+; CHECK-NEXT:    cset r0, ne
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    and.w r0, r0, r12
+; CHECK-NEXT:    mvn r12, #-2147483648
+; CHECK-NEXT:    and r3, r0, #1
+; CHECK-NEXT:    cset r0, mi
+; CHECK-NEXT:    tst.w r0, #1
+; CHECK-NEXT:    cinv r0, r12, eq
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    asrne r1, r2, #31
+; CHECK-NEXT:    csel r0, r0, r2, ne
+; CHECK-NEXT:    vmov.32 q2[0], r1
+; CHECK-NEXT:    vmov r2, s3
 ; CHECK-NEXT:    vmov.32 q2[1], r0
-; CHECK-NEXT:    asrs r0, r2, #31
-; CHECK-NEXT:    vmov.32 q2[2], r0
-; CHECK-NEXT:    vmov.32 q1[0], r6
+; CHECK-NEXT:    vmov r0, s7
+; CHECK-NEXT:    cmp.w r0, #-1
+; CHECK-NEXT:    cset r1, gt
+; CHECK-NEXT:    cmp.w r2, #-1
+; CHECK-NEXT:    cset r3, gt
+; CHECK-NEXT:    cmp r3, r1
+; CHECK-NEXT:    vmov r1, s6
+; CHECK-NEXT:    cset lr, ne
+; CHECK-NEXT:    subs r1, r4, r1
+; CHECK-NEXT:    sbc.w r0, r2, r0
+; CHECK-NEXT:    cmp.w r0, #-1
+; CHECK-NEXT:    cset r2, gt
+; CHECK-NEXT:    cmp r3, r2
+; CHECK-NEXT:    cset r2, ne
+; CHECK-NEXT:    and.w r2, r2, lr
+; CHECK-NEXT:    ands r2, r2, #1
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    asrne r1, r0, #31
+; CHECK-NEXT:    cmp r0, #0
+; CHECK-NEXT:    vmov.32 q2[2], r1
+; CHECK-NEXT:    cset r1, mi
+; CHECK-NEXT:    tst.w r1, #1
+; CHECK-NEXT:    cinv r1, r12, eq
+; CHECK-NEXT:    cmp r2, #0
+; CHECK-NEXT:    csel r0, r1, r0, ne
 ; CHECK-NEXT:    vmov.32 q2[3], r0
-; CHECK-NEXT:    adr r0, .LCPI11_0
-; CHECK-NEXT:    vldrw.u32 q3, [r0]
-; CHECK-NEXT:    adr r0, .LCPI11_1
-; CHECK-NEXT:    vldrw.u32 q4, [r0]
-; CHECK-NEXT:    vmov.32 q1[1], r1
-; CHECK-NEXT:    vmov.32 q1[2], r3
-; CHECK-NEXT:    vbic q3, q3, q2
-; CHECK-NEXT:    vand q2, q4, q2
-; CHECK-NEXT:    vmov.32 q1[3], r2
-; CHECK-NEXT:    vorr q2, q2, q3
-; CHECK-NEXT:    vbic q1, q1, q0
-; CHECK-NEXT:    vand q0, q2, q0
-; CHECK-NEXT:    vorr q0, q0, q1
-; CHECK-NEXT:    vpop {d8, d9}
-; CHECK-NEXT:    pop {r4, r5, r6, pc}
-; CHECK-NEXT:    .p2align 4
-; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:  .LCPI11_0:
-; CHECK-NEXT:    .long 0 @ 0x0
-; CHECK-NEXT:    .long 2147483648 @ 0x80000000
-; CHECK-NEXT:    .long 0 @ 0x0
-; CHECK-NEXT:    .long 2147483648 @ 0x80000000
-; CHECK-NEXT:  .LCPI11_1:
-; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
-; CHECK-NEXT:    .long 2147483647 @ 0x7fffffff
-; CHECK-NEXT:    .long 4294967295 @ 0xffffffff
-; CHECK-NEXT:    .long 2147483647 @ 0x7fffffff
+; CHECK-NEXT:    vmov q0, q2
+; CHECK-NEXT:    pop {r4, pc}
 entry:
   %0 = call <2 x i64> @llvm.ssub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
   ret <2 x i64> %0
@@ -346,44 +299,36 @@ entry:
 define arm_aapcs_vfpcc <2 x i64> @usub_int64_t(<2 x i64> %src1, <2 x i64> %src2) {
 ; CHECK-LABEL: usub_int64_t:
 ; CHECK:       @ %bb.0: @ %entry
-; CHECK-NEXT:    .save {r4, lr}
-; CHECK-NEXT:    push {r4, lr}
 ; CHECK-NEXT:    vmov r2, s4
+; CHECK-NEXT:    mov.w r12, #0
 ; CHECK-NEXT:    vmov r3, s0
 ; CHECK-NEXT:    vmov r0, s5
 ; CHECK-NEXT:    vmov r1, s1
-; CHECK-NEXT:    vmov r4, s2
-; CHECK-NEXT:    subs.w lr, r3, r2
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    vmov r3, s2
+; CHECK-NEXT:    sbcs.w r0, r1, r0
+; CHECK-NEXT:    adc r1, r12, #0
+; CHECK-NEXT:    rsbs.w r1, r1, #1
+; CHECK-NEXT:    itt ne
+; CHECK-NEXT:    movne r0, #0
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    vmov.32 q2[0], r2
 ; CHECK-NEXT:    vmov r2, s6
-; CHECK-NEXT:    sbc.w r12, r1, r0
-; CHECK-NEXT:    subs.w r3, r3, lr
-; CHECK-NEXT:    sbcs.w r1, r1, r12
-; CHECK-NEXT:    vmov r3, s3
-; CHECK-NEXT:    mov.w r1, #0
-; CHECK-NEXT:    mov.w r0, #0
-; CHECK-NEXT:    it lo
-; CHECK-NEXT:    movlo r1, #1
-; CHECK-NEXT:    cmp r1, #0
-; CHECK-NEXT:    csetm r1, ne
-; CHECK-NEXT:    vmov.32 q0[0], lr
-; CHECK-NEXT:    vmov.32 q2[0], r1
-; CHECK-NEXT:    vmov.32 q0[1], r12
-; CHECK-NEXT:    vmov.32 q2[1], r1
-; CHECK-NEXT:    vmov r1, s7
-; CHECK-NEXT:    subs r2, r4, r2
-; CHECK-NEXT:    vmov.32 q0[2], r2
-; CHECK-NEXT:    sbc.w r1, r3, r1
-; CHECK-NEXT:    subs r4, r4, r2
-; CHECK-NEXT:    sbcs r3, r1
-; CHECK-NEXT:    it lo
-; CHECK-NEXT:    movlo r0, #1
-; CHECK-NEXT:    cmp r0, #0
-; CHECK-NEXT:    vmov.32 q0[3], r1
-; CHECK-NEXT:    csetm r0, ne
-; CHECK-NEXT:    vmov.32 q2[2], r0
+; CHECK-NEXT:    vmov.32 q2[1], r0
+; CHECK-NEXT:    vmov r0, s7
+; CHECK-NEXT:    vmov r1, s3
+; CHECK-NEXT:    subs r2, r3, r2
+; CHECK-NEXT:    sbcs.w r0, r1, r0
+; CHECK-NEXT:    adc r1, r12, #0
+; CHECK-NEXT:    rsbs.w r1, r1, #1
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r2, #0
+; CHECK-NEXT:    vmov.32 q2[2], r2
+; CHECK-NEXT:    it ne
+; CHECK-NEXT:    movne r0, #0
 ; CHECK-NEXT:    vmov.32 q2[3], r0
-; CHECK-NEXT:    vbic q0, q0, q2
-; CHECK-NEXT:    pop {r4, pc}
+; CHECK-NEXT:    vmov q0, q2
+; CHECK-NEXT:    bx lr
 entry:
   %0 = call <2 x i64> @llvm.usub.sat.v2i64(<2 x i64> %src1, <2 x i64> %src2)
   ret <2 x i64> %0

From 9adf7461f721170419058684a8d3f9228d641d59 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 16 Jul 2020 10:33:20 -0700
Subject: [PATCH 524/771] [X86] Add test case for PR46455.

---
 llvm/lib/Target/X86/X86ISelLowering.cpp |  2 ++
 llvm/test/CodeGen/X86/pr46455.ll        | 38 +++++++++++++++++++++++++
 2 files changed, 40 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr46455.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 450927aaf5cc7..8dfe7396699f5 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44523,6 +44523,8 @@ static SDValue combineFaddFsub(SDNode *N, SelectionDAG &DAG,
       isHorizontalBinOp(LHS, RHS, DAG, Subtarget, IsFadd))
     return DAG.getNode(HorizOpcode, SDLoc(N), VT, LHS, RHS);
 
+  // NOTE: isHorizontalBinOp may have changed LHS/RHS variables.
+
   return SDValue();
 }
 
diff --git a/llvm/test/CodeGen/X86/pr46455.ll b/llvm/test/CodeGen/X86/pr46455.ll
new file mode 100644
index 0000000000000..7f608fbfdf6d3
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr46455.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512vl,avx512bw,avx512dq | FileCheck %s
+
+define void @EntryModule(i8** %buffer_table) {
+; CHECK-LABEL: EntryModule:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT:    movq (%rdi), %rax
+; CHECK-NEXT:    movq 24(%rdi), %rcx
+; CHECK-NEXT:    vcmpneqps (%rax), %ymm0, %ymm0
+; CHECK-NEXT:    vandps {{.*}}(%rip){1to4}, %xmm0, %xmm1
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm2 = xmm1[2,3,0,1]
+; CHECK-NEXT:    vpermilps {{.*#+}} xmm3 = xmm1[3,1,2,3]
+; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
+; CHECK-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vmovd %xmm0, (%rcx)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+entry:
+  %i = bitcast i8** %buffer_table to <8 x float>**
+  %i1 = load <8 x float>*, <8 x float>** %i, align 8
+  %i6 = load <8 x float>, <8 x float>* %i1, align 16
+  %i7 = fcmp une <8 x float> %i6, zeroinitializer
+  %i8 = zext <8 x i1> %i7 to <8 x i32>
+  %i18 = getelementptr inbounds i8*, i8** %buffer_table, i64 3
+  %i19 = load i8*, i8** %i18, align 8
+  %shift = shufflevector <8 x i32> %i8, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %i20 = add nuw nsw <8 x i32> %shift, %i8
+  %shift13 = shufflevector <8 x i32> %i8, <8 x i32> undef, <8 x i32> <i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %i21 = add nuw nsw <8 x i32> %i20, %shift13
+  %shift14 = shufflevector <8 x i32> %i8, <8 x i32> undef, <8 x i32> <i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+  %i22 = add nuw nsw <8 x i32> %i21, %shift14
+  %i23 = extractelement <8 x i32> %i22, i32 0
+  %i24 = bitcast i8* %i19 to i32*
+  store i32 %i23, i32* %i24, align 8
+  ret void
+}

From f0f97876469e7e0461ba3521f38bdb889cee05b7 Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 16 Jul 2020 11:11:19 -0700
Subject: [PATCH 525/771] [WebAssembly] Lower vselect to v128.bitselect

We were previously expanding vselect and matching on the expansion to
generate bitselects, but in some cases the expansion would be further
combined and a bitselect would not get generated. This patch improves
codegen in those cases by legalizing vselect and lowering it to
v128.bitselect. The old pattern that matches the expansion is still
useful for lowering IR that already uses the expansion rather than a
select operation.

Differential Revision: https://reviews.llvm.org/D83734
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   |  5 +-
 .../WebAssembly/WebAssemblyInstrSIMD.td       | 10 +++
 llvm/test/CodeGen/WebAssembly/simd-select.ll  | 84 +++++++++++++++++++
 3 files changed, 97 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index a9b9eceb41304..4962f0e9dacb3 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -156,8 +156,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     // There is no i8x16.mul instruction
     setOperationAction(ISD::MUL, MVT::v16i8, Expand);
 
-    // There are no vector select instructions
-    for (auto Op : {ISD::VSELECT, ISD::SELECT_CC, ISD::SELECT})
+    // There is no vector conditional select instruction
+    // TODO: Implement SELECT_V128
+    for (auto Op : {ISD::SELECT_CC, ISD::SELECT})
       for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
                      MVT::v2f64})
         setOperationAction(Op, T, Expand);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 4f3da2f35c61d..6463b8c421b90 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -574,6 +574,16 @@ foreach vec_t = [v16i8, v8i16, v4i32, v2i64] in
             (!cast<Instruction>("BITSELECT_"#vec_t)
               V128:$v1, V128:$v2, V128:$c)>;
 
+// Also implement vselect in terms of bitselect
+foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64],
+                 [v4f32, v4i32], [v2f64, v2i64]] in
+  def : Pat<(types[0] (vselect
+              (types[1] V128:$c), (types[0] V128:$v1), (types[0] V128:$v2)
+            )),
+            (!cast<Instruction>("BITSELECT_"#types[0])
+              V128:$v1, V128:$v2, V128:$c
+            )>;
+
 //===----------------------------------------------------------------------===//
 // Integer unary arithmetic
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/WebAssembly/simd-select.ll b/llvm/test/CodeGen/WebAssembly/simd-select.ll
index c3af6f9abe60b..553cfe83f86c1 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-select.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-select.ll
@@ -21,6 +21,18 @@ define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) {
   ret <16 x i8> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v16i8:
+; CHECK-NEXT: .functype vselect_cmp_v16i8 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i8x16.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <16 x i8> @vselect_cmp_v16i8(<16 x i8> %a, <16 x i8> %b,
+                                    <16 x i8> %x, <16 x i8> %y) {
+  %c = icmp slt <16 x i8> %a, %b
+  %res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
+  ret <16 x i8> %res
+}
+
 ; CHECK-LABEL: select_v16i8:
 ; CHECK-NEXT: .functype select_v16i8 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@@ -91,6 +103,18 @@ define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) {
   ret <8 x i16> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v8i16:
+; CHECK-NEXT: .functype vselect_cmp_v8i16 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i16x8.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <8 x i16> @vselect_cmp_v8i16(<8 x i16> %a, <8 x i16> %b,
+                                           <8 x i16> %x, <8 x i16> %y) {
+  %c = icmp slt <8 x i16> %a, %b
+  %res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
+  ret <8 x i16> %res
+}
+
 ; CHECK-LABEL: select_v8i16:
 ; CHECK-NEXT: .functype select_v8i16 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@@ -161,6 +185,17 @@ define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) {
   ret <4 x i32> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v4i32:
+; CHECK-NEXT: .functype vselect_cmp_v4i32 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i32x4.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <4 x i32> @vselect_cmp_v4i32(<4 x i32> %a, <4 x i32> %b,
+                                    <4 x i32> %x, <4 x i32> %y) {
+  %c = icmp slt <4 x i32> %a, %b
+  %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
+  ret <4 x i32> %res
+}
 
 ; CHECK-LABEL: select_v4i32:
 ; CHECK-NEXT: .functype select_v4i32 (i32, v128, v128) -> (v128){{$}}
@@ -232,6 +267,31 @@ define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) {
   ret <2 x i64> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v2i64:
+; CHECK-NEXT: .functype vselect_cmp_v2i64 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
+; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L2:[0-9]+]]=, $0, 0{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L3:[0-9]+]]=, $1, 0{{$}}
+; CHECK-NEXT: i64.lt_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
+; CHECK-NEXT: i64.select $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $pop[[L4]]{{$}}
+; CHECK-NEXT: i64x2.splat $push[[L6:[0-9]+]]=, $pop[[L5]]{{$}}
+; CHECK-NEXT: i64.const $push[[L7:[0-9]+]]=, -1{{$}}
+; CHECK-NEXT: i64.const $push[[L8:[0-9]+]]=, 0{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L9:[0-9]+]]=, $0, 1{{$}}
+; CHECK-NEXT: i64x2.extract_lane $push[[L10:[0-9]+]]=, $1, 1{{$}}
+; CHECK-NEXT: i64.lt_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
+; CHECK-NEXT: i64.select $push[[L12:[0-9]+]]=, $pop[[L7]], $pop[[L8]], $pop[[L11]]{{$}}
+; CHECK-NEXT: i64x2.replace_lane $push[[L13:[0-9]+]]=, $pop[[L6]], 1, $pop[[L12]]{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L13]]{{$}}
+; CHECK-NEXT: return $pop[[R]]{{$}}
+define <2 x i64> @vselect_cmp_v2i64(<2 x i64> %a, <2 x i64> %b,
+                                    <2 x i64> %x, <2 x i64> %y) {
+  %c = icmp slt <2 x i64> %a, %b
+  %res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
+  ret <2 x i64> %res
+}
+
 ; CHECK-LABEL: select_v2i64:
 ; CHECK-NEXT: .functype select_v2i64 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
@@ -305,6 +365,18 @@ define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) {
   ret <4 x float> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v4f32:
+; CHECK-NEXT: .functype vselect_cmp_v4f32 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f32x4.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <4 x float> @vselect_cmp_v4f32(<4 x float> %a, <4 x float> %b,
+                                      <4 x float> %x, <4 x float> %y) {
+  %c = fcmp olt <4 x float> %a, %b
+  %res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
+  ret <4 x float> %res
+}
+
 ; CHECK-LABEL: select_v4f32:
 ; CHECK-NEXT: .functype select_v4f32 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
@@ -375,6 +447,18 @@ define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y
   ret <2 x double> %res
 }
 
+; CHECK-LABEL: vselect_cmp_v2f64:
+; CHECK-NEXT: .functype vselect_cmp_v2f64 (v128, v128, v128, v128) -> (v128){{$}}
+; CHECK-NEXT: f64x2.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
+; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
+; CHECK-NEXT: return  $pop[[R]]{{$}}
+define <2 x double> @vselect_cmp_v2f64(<2 x double> %a, <2 x double> %b,
+                                       <2 x double> %x, <2 x double> %y) {
+  %c = fcmp olt <2 x double> %a, %b
+  %res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
+  ret <2 x double> %res
+}
+
 ; CHECK-LABEL: select_v2f64:
 ; CHECK-NEXT: .functype select_v2f64 (i32, v128, v128) -> (v128){{$}}
 ; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}

From f7868f87accb1941bced3b8ff2debc8d56d53c3e Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 16 Jul 2020 11:19:08 -0700
Subject: [PATCH 526/771] [WebAssembly] Autogenerate tests for simd-select.ll

Updating the simd-select.ll tests manually with consistent named
regexps for the register numbers was taking more time than it was
worth, so this patch updates that test file to have autogenerated
output. This is not a significant readability regression because the
tests in that file are all very small.

Depends on D83734.

Differential Revision: https://reviews.llvm.org/D83736
---
 llvm/test/CodeGen/WebAssembly/simd-select.ll | 738 +++++++++++--------
 1 file changed, 438 insertions(+), 300 deletions(-)

diff --git a/llvm/test/CodeGen/WebAssembly/simd-select.ll b/llvm/test/CodeGen/WebAssembly/simd-select.ll
index 553cfe83f86c1..ba9de08655871 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-select.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-select.ll
@@ -1,6 +1,7 @@
-; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mattr=+unimplemented-simd128 | FileCheck %s
 
-; Test that vector selects of various varieties lower correctly to bitselects.
+; Test that vector selects of various varieties lower correctly.
 
 target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
 target triple = "wasm32-unknown-unknown"
@@ -8,509 +9,646 @@ target triple = "wasm32-unknown-unknown"
 ; ==============================================================================
 ; 16 x i8
 ; ==============================================================================
-; CHECK-LABEL: vselect_v16i8:
-; CHECK-NEXT: .functype vselect_v16i8 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 7{{$}}
-; CHECK-NEXT: i8x16.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
-; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 7{{$}}
-; CHECK-NEXT: i8x16.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @vselect_v16i8(<16 x i1> %c, <16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: vselect_v16i8:
+; CHECK:         .functype vselect_v16i8 (v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 7
+; CHECK-NEXT:    i8x16.shl
+; CHECK-NEXT:    i32.const 7
+; CHECK-NEXT:    i8x16.shr_s
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
   ret <16 x i8> %res
 }
 
-; CHECK-LABEL: vselect_cmp_v16i8:
-; CHECK-NEXT: .functype vselect_cmp_v16i8 (v128, v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i8x16.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
-; CHECK-NEXT: return  $pop[[R]]{{$}}
 define <16 x i8> @vselect_cmp_v16i8(<16 x i8> %a, <16 x i8> %b,
+; CHECK-LABEL: vselect_cmp_v16i8:
+; CHECK:         .functype vselect_cmp_v16i8 (v128, v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 3
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i8x16.lt_s
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
                                     <16 x i8> %x, <16 x i8> %y) {
   %c = icmp slt <16 x i8> %a, %b
   %res = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y
   ret <16 x i8> %res
 }
 
-; CHECK-LABEL: select_v16i8:
-; CHECK-NEXT: .functype select_v16i8 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: select_v16i8:
+; CHECK:         .functype select_v16i8 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i8x16.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <16 x i8> %x, <16 x i8> %y
   ret <16 x i8> %res
 }
 
-; CHECK-LABEL: select_cmp_v16i8:
-; CHECK-NEXT: .functype select_cmp_v16i8 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31
-; CHECK-NEXT: i32.shr_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
-; CHECK-NEXT: i8x16.splat $push[[L2:[0-9]+]]=, $pop[[L1]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L2]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @select_cmp_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: select_cmp_v16i8:
+; CHECK:         .functype select_cmp_v16i8 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 31
+; CHECK-NEXT:    i32.shr_s
+; CHECK-NEXT:    i8x16.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <16 x i8> %x, <16 x i8> %y
   ret <16 x i8> %res
 }
 
-; CHECK-LABEL: select_ne_v16i8:
-; CHECK-NEXT: .functype select_ne_v16i8 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @select_ne_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: select_ne_v16i8:
+; CHECK:         .functype select_ne_v16i8 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i8x16.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <16 x i8> %x, <16 x i8> %y
   ret <16 x i8> %res
 }
 
-; CHECK-LABEL: select_eq_v16i8:
-; CHECK-NEXT: .functype select_eq_v16i8 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i8x16.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <16 x i8> @select_eq_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
+; CHECK-LABEL: select_eq_v16i8:
+; CHECK:         .functype select_eq_v16i8 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i8x16.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <16 x i8> %x, <16 x i8> %y
   ret <16 x i8> %res
 }
 
-; ==============================================================================
-; 8 x i16
-; ==============================================================================
-; CHECK-LABEL: vselect_v8i16:
-; CHECK-NEXT: .functype vselect_v8i16 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 15{{$}}
-; CHECK-NEXT: i16x8.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
-; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 15{{$}}
-; CHECK-NEXT: i16x8.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <8 x i16> @vselect_v8i16(<8 x i1> %c, <8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: vselect_v8i16:
+; CHECK:         .functype vselect_v8i16 (v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 15
+; CHECK-NEXT:    i16x8.shl
+; CHECK-NEXT:    i32.const 15
+; CHECK-NEXT:    i16x8.shr_s
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
   ret <8 x i16> %res
 }
 
-; CHECK-LABEL: vselect_cmp_v8i16:
-; CHECK-NEXT: .functype vselect_cmp_v8i16 (v128, v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i16x8.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
-; CHECK-NEXT: return  $pop[[R]]{{$}}
 define <8 x i16> @vselect_cmp_v8i16(<8 x i16> %a, <8 x i16> %b,
+; CHECK-LABEL: vselect_cmp_v8i16:
+; CHECK:         .functype vselect_cmp_v8i16 (v128, v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 3
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i16x8.lt_s
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
                                            <8 x i16> %x, <8 x i16> %y) {
   %c = icmp slt <8 x i16> %a, %b
   %res = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y
   ret <8 x i16> %res
 }
 
-; CHECK-LABEL: select_v8i16:
-; CHECK-NEXT: .functype select_v8i16 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: select_v8i16:
+; CHECK:         .functype select_v8i16 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i16x8.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <8 x i16> %x, <8 x i16> %y
   ret <8 x i16> %res
 }
 
-; CHECK-LABEL: select_cmp_v8i16:
-; CHECK-NEXT: .functype select_cmp_v8i16 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}}
-; CHECK-NEXT: i32.shr_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
-; CHECK-NEXT: i16x8.splat $push[[L2:[0-9]+]]=, $pop[[L1]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L2]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <8 x i16> @select_cmp_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: select_cmp_v8i16:
+; CHECK:         .functype select_cmp_v8i16 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 31
+; CHECK-NEXT:    i32.shr_s
+; CHECK-NEXT:    i16x8.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <8 x i16> %x, <8 x i16> %y
   ret <8 x i16> %res
 }
 
-; CHECK-LABEL: select_ne_v8i16:
-; CHECK-NEXT: .functype select_ne_v8i16 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <8 x i16> @select_ne_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: select_ne_v8i16:
+; CHECK:         .functype select_ne_v8i16 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i16x8.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <8 x i16> %x, <8 x i16> %y
   ret <8 x i16> %res
 }
 
-; CHECK-LABEL: select_eq_v8i16:
-; CHECK-NEXT: .functype select_eq_v8i16 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i16x8.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <8 x i16> @select_eq_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
+; CHECK-LABEL: select_eq_v8i16:
+; CHECK:         .functype select_eq_v8i16 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i16x8.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <8 x i16> %x, <8 x i16> %y
   ret <8 x i16> %res
 }
 
-; ==============================================================================
-; 4 x i32
-; ==============================================================================
-; CHECK-LABEL: vselect_v4i32:
-; CHECK-NEXT: .functype vselect_v4i32 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}}
-; CHECK-NEXT: i32x4.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
-; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 31{{$}}
-; CHECK-NEXT: i32x4.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @vselect_v4i32(<4 x i1> %c, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: vselect_v4i32:
+; CHECK:         .functype vselect_v4i32 (v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 31
+; CHECK-NEXT:    i32x4.shl
+; CHECK-NEXT:    i32.const 31
+; CHECK-NEXT:    i32x4.shr_s
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
   ret <4 x i32> %res
 }
 
-; CHECK-LABEL: vselect_cmp_v4i32:
-; CHECK-NEXT: .functype vselect_cmp_v4i32 (v128, v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32x4.lt_s $push[[L0:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
-; CHECK-NEXT: return  $pop[[R]]{{$}}
 define <4 x i32> @vselect_cmp_v4i32(<4 x i32> %a, <4 x i32> %b,
+; CHECK-LABEL: vselect_cmp_v4i32:
+; CHECK:         .functype vselect_cmp_v4i32 (v128, v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 3
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i32x4.lt_s
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
                                     <4 x i32> %x, <4 x i32> %y) {
   %c = icmp slt <4 x i32> %a, %b
   %res = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y
   ret <4 x i32> %res
 }
 
-; CHECK-LABEL: select_v4i32:
-; CHECK-NEXT: .functype select_v4i32 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: select_v4i32:
+; CHECK:         .functype select_v4i32 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i32x4.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <4 x i32> %x, <4 x i32> %y
   ret <4 x i32> %res
 }
 
-; CHECK-LABEL: select_cmp_v4i32:
-; CHECK-NEXT: .functype select_cmp_v4i32 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}}
-; CHECK-NEXT: i32.shr_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
-; CHECK-NEXT: i32x4.splat $push[[L2:[0-9]+]]=, $pop[[L1]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L2]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @select_cmp_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: select_cmp_v4i32:
+; CHECK:         .functype select_cmp_v4i32 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 31
+; CHECK-NEXT:    i32.shr_s
+; CHECK-NEXT:    i32x4.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <4 x i32> %x, <4 x i32> %y
   ret <4 x i32> %res
 }
 
-; CHECK-LABEL: select_ne_v4i32:
-; CHECK-NEXT: .functype select_ne_v4i32 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @select_ne_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: select_ne_v4i32:
+; CHECK:         .functype select_ne_v4i32 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i32x4.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <4 x i32> %x, <4 x i32> %y
   ret <4 x i32> %res
 }
 
-; CHECK-LABEL: select_eq_v4i32:
-; CHECK-NEXT: .functype select_eq_v4i32 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x i32> @select_eq_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
+; CHECK-LABEL: select_eq_v4i32:
+; CHECK:         .functype select_eq_v4i32 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i32x4.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <4 x i32> %x, <4 x i32> %y
   ret <4 x i32> %res
 }
 
-; ==============================================================================
-; 2 x i64
-; ==============================================================================
-; CHECK-LABEL: vselect_v2i64:
-; CHECK-NEXT: .functype vselect_v2i64 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 63{{$}}
-; CHECK-NEXT: i64x2.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
-; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 63{{$}}
-; CHECK-NEXT: i64x2.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @vselect_v2i64(<2 x i1> %c, <2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: vselect_v2i64:
+; CHECK:         .functype vselect_v2i64 (v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 63
+; CHECK-NEXT:    i64x2.shl
+; CHECK-NEXT:    i32.const 63
+; CHECK-NEXT:    i64x2.shr_s
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
   ret <2 x i64> %res
 }
 
-; CHECK-LABEL: vselect_cmp_v2i64:
-; CHECK-NEXT: .functype vselect_cmp_v2i64 (v128, v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i64x2.extract_lane $push[[L2:[0-9]+]]=, $0, 0{{$}}
-; CHECK-NEXT: i64x2.extract_lane $push[[L3:[0-9]+]]=, $1, 0{{$}}
-; CHECK-NEXT: i64.lt_s $push[[L4:[0-9]+]]=, $pop[[L2]], $pop[[L3]]{{$}}
-; CHECK-NEXT: i64.select $push[[L5:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $pop[[L4]]{{$}}
-; CHECK-NEXT: i64x2.splat $push[[L6:[0-9]+]]=, $pop[[L5]]{{$}}
-; CHECK-NEXT: i64.const $push[[L7:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i64.const $push[[L8:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i64x2.extract_lane $push[[L9:[0-9]+]]=, $0, 1{{$}}
-; CHECK-NEXT: i64x2.extract_lane $push[[L10:[0-9]+]]=, $1, 1{{$}}
-; CHECK-NEXT: i64.lt_s $push[[L11:[0-9]+]]=, $pop[[L9]], $pop[[L10]]{{$}}
-; CHECK-NEXT: i64.select $push[[L12:[0-9]+]]=, $pop[[L7]], $pop[[L8]], $pop[[L11]]{{$}}
-; CHECK-NEXT: i64x2.replace_lane $push[[L13:[0-9]+]]=, $pop[[L6]], 1, $pop[[L12]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L13]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @vselect_cmp_v2i64(<2 x i64> %a, <2 x i64> %b,
+; CHECK-LABEL: vselect_cmp_v2i64:
+; CHECK:         .functype vselect_cmp_v2i64 (v128, v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 3
+; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    i64.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64x2.extract_lane 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i64x2.extract_lane 0
+; CHECK-NEXT:    i64.lt_s
+; CHECK-NEXT:    i64.select
+; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    i64.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64x2.extract_lane 1
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    i64x2.extract_lane 1
+; CHECK-NEXT:    i64.lt_s
+; CHECK-NEXT:    i64.select
+; CHECK-NEXT:    i64x2.replace_lane 1
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
                                     <2 x i64> %x, <2 x i64> %y) {
   %c = icmp slt <2 x i64> %a, %b
   %res = select <2 x i1> %c, <2 x i64> %x, <2 x i64> %y
   ret <2 x i64> %res
 }
 
-; CHECK-LABEL: select_v2i64:
-; CHECK-NEXT: .functype select_v2i64 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: select_v2i64:
+; CHECK:         .functype select_v2i64 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    i64.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64.select
+; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <2 x i64> %x, <2 x i64> %y
   ret <2 x i64> %res
 }
 
-; CHECK-LABEL: select_cmp_v2i64:
-; CHECK-NEXT: .functype select_cmp_v2i64 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.lt_s $push[[L3:[0-9]+]]=, $0, $pop[[L2]]{{$}}
-; CHECK-NEXT: i64.select $push[[L4:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $pop[[L3]]{{$}}
-; CHECK-NEXT: i64x2.splat $push[[L5:[0-9]+]]=, $pop[[L4]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L5]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @select_cmp_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: select_cmp_v2i64:
+; CHECK:         .functype select_cmp_v2i64 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    i64.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.lt_s
+; CHECK-NEXT:    i64.select
+; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <2 x i64> %x, <2 x i64> %y
   ret <2 x i64> %res
 }
 
-; CHECK-LABEL: select_ne_v2i64:
-; CHECK-NEXT: .functype select_ne_v2i64 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @select_ne_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: select_ne_v2i64:
+; CHECK:         .functype select_ne_v2i64 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    i64.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64.select
+; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <2 x i64> %x, <2 x i64> %y
   ret <2 x i64> %res
 }
 
-; CHECK-LABEL: select_eq_v2i64:
-; CHECK-NEXT: .functype select_eq_v2i64 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x i64> @select_eq_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
+; CHECK-LABEL: select_eq_v2i64:
+; CHECK:         .functype select_eq_v2i64 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i64.const 0
+; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64.select
+; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <2 x i64> %x, <2 x i64> %y
   ret <2 x i64> %res
 }
 
-; ==============================================================================
-; 4 x float
-; ==============================================================================
-; CHECK-LABEL: vselect_v4f32:
-; CHECK-NEXT: .functype vselect_v4f32 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}}
-; CHECK-NEXT: i32x4.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
-; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 31{{$}}
-; CHECK-NEXT: i32x4.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @vselect_v4f32(<4 x i1> %c, <4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: vselect_v4f32:
+; CHECK:         .functype vselect_v4f32 (v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 31
+; CHECK-NEXT:    i32x4.shl
+; CHECK-NEXT:    i32.const 31
+; CHECK-NEXT:    i32x4.shr_s
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
   ret <4 x float> %res
 }
 
-; CHECK-LABEL: vselect_cmp_v4f32:
-; CHECK-NEXT: .functype vselect_cmp_v4f32 (v128, v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: f32x4.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
-; CHECK-NEXT: return  $pop[[R]]{{$}}
 define <4 x float> @vselect_cmp_v4f32(<4 x float> %a, <4 x float> %b,
+; CHECK-LABEL: vselect_cmp_v4f32:
+; CHECK:         .functype vselect_cmp_v4f32 (v128, v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 3
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f32x4.lt
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
                                       <4 x float> %x, <4 x float> %y) {
   %c = fcmp olt <4 x float> %a, %b
   %res = select <4 x i1> %c, <4 x float> %x, <4 x float> %y
   ret <4 x float> %res
 }
 
-; CHECK-LABEL: select_v4f32:
-; CHECK-NEXT: .functype select_v4f32 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: select_v4f32:
+; CHECK:         .functype select_v4f32 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i32x4.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <4 x float> %x, <4 x float> %y
   ret <4 x float> %res
 }
 
-; CHECK-LABEL: select_cmp_v4f32:
-; CHECK-NEXT: .functype select_cmp_v4f32 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 31{{$}}
-; CHECK-NEXT: i32.shr_s $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
-; CHECK-NEXT: i32x4.splat $push[[L2:[0-9]+]]=, $pop[[L1]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L2]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @select_cmp_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: select_cmp_v4f32:
+; CHECK:         .functype select_cmp_v4f32 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 31
+; CHECK-NEXT:    i32.shr_s
+; CHECK-NEXT:    i32x4.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <4 x float> %x, <4 x float> %y
   ret <4 x float> %res
 }
 
-; CHECK-LABEL: select_ne_v4f32:
-; CHECK-NEXT: .functype select_ne_v4f32 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @select_ne_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: select_ne_v4f32:
+; CHECK:         .functype select_ne_v4f32 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i32x4.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <4 x float> %x, <4 x float> %y
   ret <4 x float> %res
 }
 
-; CHECK-LABEL: select_eq_v4f32:
-; CHECK-NEXT: .functype select_eq_v4f32 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.const $push[[L1:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i32.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i32x4.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <4 x float> @select_eq_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
+; CHECK-LABEL: select_eq_v4f32:
+; CHECK:         .functype select_eq_v4f32 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.select
+; CHECK-NEXT:    i32x4.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <4 x float> %x, <4 x float> %y
   ret <4 x float> %res
 }
 
-; ==============================================================================
-; 2 x double
-; ==============================================================================
-; CHECK-LABEL: vselect_v2f64:
-; CHECK-NEXT: .functype vselect_v2f64 (v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i32.const $push[[L0:[0-9]+]]=, 63{{$}}
-; CHECK-NEXT: i64x2.shl $push[[L1:[0-9]+]]=, $0, $pop[[L0]]{{$}}
-; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 63{{$}}
-; CHECK-NEXT: i64x2.shr_s $push[[L3:[0-9]+]]=, $pop[[L1]], $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @vselect_v2f64(<2 x i1> %c, <2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: vselect_v2f64:
+; CHECK:         .functype vselect_v2f64 (v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 63
+; CHECK-NEXT:    i64x2.shl
+; CHECK-NEXT:    i32.const 63
+; CHECK-NEXT:    i64x2.shr_s
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
   ret <2 x double> %res
 }
 
-; CHECK-LABEL: vselect_cmp_v2f64:
-; CHECK-NEXT: .functype vselect_cmp_v2f64 (v128, v128, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: f64x2.lt $push[[L0:[0-9]+]]=, $0, $1{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $2, $3, $pop[[L0]]{{$}}
-; CHECK-NEXT: return  $pop[[R]]{{$}}
 define <2 x double> @vselect_cmp_v2f64(<2 x double> %a, <2 x double> %b,
+; CHECK-LABEL: vselect_cmp_v2f64:
+; CHECK:         .functype vselect_cmp_v2f64 (v128, v128, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    local.get 3
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    f64x2.lt
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
                                        <2 x double> %x, <2 x double> %y) {
   %c = fcmp olt <2 x double> %a, %b
   %res = select <2 x i1> %c, <2 x double> %x, <2 x double> %y
   ret <2 x double> %res
 }
 
-; CHECK-LABEL: select_v2f64:
-; CHECK-NEXT: .functype select_v2f64 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: select_v2f64:
+; CHECK:         .functype select_v2f64 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    i64.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64.select
+; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <2 x double> %x, <2 x double> %y
   ret <2 x double> %res
 }
 
-; CHECK-LABEL: select_cmp_v2f64:
-; CHECK-NEXT: .functype select_cmp_v2f64 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.const $push[[L2:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i32.lt_s $push[[L3:[0-9]+]]=, $0, $pop[[L2]]{{$}}
-; CHECK-NEXT: i64.select $push[[L4:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $pop[[L3]]{{$}}
-; CHECK-NEXT: i64x2.splat $push[[L5:[0-9]+]]=, $pop[[L4]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L5]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @select_cmp_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: select_cmp_v2f64:
+; CHECK:         .functype select_cmp_v2f64 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    i64.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.lt_s
+; CHECK-NEXT:    i64.select
+; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <2 x double> %x, <2 x double> %y
   ret <2 x double> %res
 }
 
-; CHECK-LABEL: select_ne_v2f64:
-; CHECK-NEXT: .functype select_ne_v2f64 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @select_ne_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: select_ne_v2f64:
+; CHECK:         .functype select_ne_v2f64 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    i64.const 0
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64.select
+; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <2 x double> %x, <2 x double> %y
   ret <2 x double> %res
 }
 
-; CHECK-LABEL: select_eq_v2f64:
-; CHECK-NEXT: .functype select_eq_v2f64 (i32, v128, v128) -> (v128){{$}}
-; CHECK-NEXT: i64.const $push[[L0:[0-9]+]]=, 0{{$}}
-; CHECK-NEXT: i64.const $push[[L1:[0-9]+]]=, -1{{$}}
-; CHECK-NEXT: i64.select $push[[L2:[0-9]+]]=, $pop[[L0]], $pop[[L1]], $0{{$}}
-; CHECK-NEXT: i64x2.splat $push[[L3:[0-9]+]]=, $pop[[L2]]{{$}}
-; CHECK-NEXT: v128.bitselect $push[[R:[0-9]+]]=, $1, $2, $pop[[L3]]{{$}}
-; CHECK-NEXT: return $pop[[R]]{{$}}
 define <2 x double> @select_eq_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) {
+; CHECK-LABEL: select_eq_v2f64:
+; CHECK:         .functype select_eq_v2f64 (i32, v128, v128) -> (v128)
+; CHECK-NEXT:  # %bb.0:
+; CHECK-NEXT:    local.get 1
+; CHECK-NEXT:    local.get 2
+; CHECK-NEXT:    i64.const 0
+; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    local.get 0
+; CHECK-NEXT:    i64.select
+; CHECK-NEXT:    i64x2.splat
+; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <2 x double> %x, <2 x double> %y
   ret <2 x double> %res

From 9adbb5cb3a56f77bc3739da10cacafca36d5bedf Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Thu, 16 Jul 2020 11:09:47 -0700
Subject: [PATCH 527/771] [SCEV] Fix ScalarEvolution tests under NPM

Many tests use opt's -analyze feature, which does not translate well to
NPM and has better alternatives. The alternative here is to explicitly
add a pass that calls ScalarEvolution::print().

The legacy pass manager RUNs aren't changing, but they are now pinned to
the legacy pass manager.  For each legacy pass manager RUN, I added a
corresponding NPM RUN using the 'print<scalar-evolution>' pass. For
compatibility with update_analyze_test_checks.py and existing test
CHECKs, 'print<scalar-evolution>' now prints what -analyze prints per
function.

This was generated by the following Python script and failures were
manually fixed up:

import sys
for i in sys.argv:
    with open(i, 'r') as f:
        s = f.read()
    with open(i, 'w') as f:
        for l in s.splitlines():
            if "RUN:" in l and ' -analyze ' in l and '\\' not in l:
                f.write(l.replace(' -analyze ', ' -analyze -enable-new-pm=0 '))
                f.write('\n')
                f.write(l.replace(' -analyze ', ' -disable-output ').replace(' -scalar-evolution ', ' "-passes=print<scalar-evolution>" ').replace(" | ", " 2>&1 | "))
                f.write('\n')
            else:
                f.write(l)

There are a couple failures still in ScalarEvolution under NPM, but
those are due to other unrelated naming conflicts.

Reviewed By: asbirlea

Differential Revision: https://reviews.llvm.org/D83798
---
 llvm/lib/Analysis/ScalarEvolution.cpp                       | 5 +++++
 .../Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll   | 3 ++-
 llvm/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll   | 3 ++-
 .../Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll    | 3 ++-
 .../Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll    | 3 ++-
 .../ScalarEvolution/2008-02-11-ReversedCondition.ll         | 3 ++-
 .../Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll    | 3 ++-
 llvm/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll       | 3 ++-
 .../ScalarEvolution/2008-05-25-NegativeStepToZero.ll        | 3 ++-
 .../Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll    | 3 ++-
 .../Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll  | 3 ++-
 .../Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll  | 3 ++-
 .../Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll     | 3 ++-
 llvm/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll | 3 ++-
 .../Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll     | 4 ++--
 llvm/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll   | 4 ++--
 llvm/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll | 4 ++--
 llvm/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll | 4 ++--
 .../Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll   | 3 ++-
 llvm/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll   | 3 ++-
 .../Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll    | 3 ++-
 llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll    | 3 ++-
 llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll  | 3 ++-
 .../Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll     | 3 ++-
 .../Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll  | 3 ++-
 .../test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll | 3 ++-
 .../ScalarEvolution/2009-01-02-SignedNegativeStride.ll      | 3 ++-
 llvm/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll  | 3 ++-
 .../Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll | 3 ++-
 .../2009-07-04-GroupConstantsWidthMismatch.ll               | 3 ++-
 .../ScalarEvolution/2010-09-03-RequiredTransitive.ll        | 3 ++-
 llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll | 3 ++-
 .../test/Analysis/ScalarEvolution/2011-10-04-ConstEvolve.ll | 3 ++-
 .../test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll | 3 ++-
 llvm/test/Analysis/ScalarEvolution/ZeroStep.ll              | 3 ++-
 .../ScalarEvolution/add-expr-pointer-operand-sorting.ll     | 3 ++-
 llvm/test/Analysis/ScalarEvolution/add-like-or.ll           | 3 ++-
 llvm/test/Analysis/ScalarEvolution/and-xor.ll               | 3 ++-
 llvm/test/Analysis/ScalarEvolution/avoid-assume-hang.ll     | 5 +++--
 .../Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/binomial-explision.ll    | 3 ++-
 llvm/test/Analysis/ScalarEvolution/constant_condition.ll    | 3 ++-
 llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/div-overflow.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/do-loop.ll               | 3 ++-
 .../ScalarEvolution/exact-exit-count-more-precise.ll        | 3 ++-
 llvm/test/Analysis/ScalarEvolution/exact_iter_count.ll      | 3 ++-
 .../test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll | 3 ++-
 llvm/test/Analysis/ScalarEvolution/exponential-behavior.ll  | 3 ++-
 .../ScalarEvolution/extract-highbits-sameconstmask.ll       | 3 ++-
 .../ScalarEvolution/extract-highbits-variablemask.ll        | 3 ++-
 .../ScalarEvolution/extract-lowbits-sameconstmask.ll        | 3 ++-
 .../ScalarEvolution/extract-lowbits-variablemask.ll         | 3 ++-
 llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll | 3 ++-
 llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll     | 3 ++-
 llvm/test/Analysis/ScalarEvolution/flattened-0.ll           | 3 ++-
 llvm/test/Analysis/ScalarEvolution/fold.ll                  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/how-far-to-zero.ll       | 3 ++-
 llvm/test/Analysis/ScalarEvolution/huge_expression_limit.ll | 3 ++-
 llvm/test/Analysis/ScalarEvolution/implied-via-division.ll  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/incorrect-nsw.ll         | 3 ++-
 .../Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll | 3 ++-
 .../test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll | 3 ++-
 .../ScalarEvolution/inner-loop-by-latch-cond-unknown.ll     | 3 ++-
 llvm/test/Analysis/ScalarEvolution/limit-depth.ll           | 3 ++-
 .../Analysis/ScalarEvolution/load-with-range-metadata.ll    | 3 ++-
 llvm/test/Analysis/ScalarEvolution/load.ll                  | 3 ++-
 .../Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll | 3 ++-
 llvm/test/Analysis/ScalarEvolution/max-addops-inline.ll     | 6 ++++--
 llvm/test/Analysis/ScalarEvolution/max-addrec-size.ll       | 3 ++-
 .../Analysis/ScalarEvolution/max-be-count-not-constant.ll   | 3 ++-
 llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll        | 3 ++-
 llvm/test/Analysis/ScalarEvolution/max-mulops-inline.ll     | 6 ++++--
 .../ScalarEvolution/max-trip-count-address-space.ll         | 3 ++-
 llvm/test/Analysis/ScalarEvolution/max-trip-count.ll        | 3 ++-
 .../Analysis/ScalarEvolution/merge-add-rec-many-inputs.ll   | 3 ++-
 llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll         | 3 ++-
 llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll     | 3 ++-
 .../Analysis/ScalarEvolution/no-wrap-unknown-becount.ll     | 3 ++-
 llvm/test/Analysis/ScalarEvolution/non-IV-phi.ll            | 3 ++-
 llvm/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll     | 3 ++-
 llvm/test/Analysis/ScalarEvolution/nsw-offset.ll            | 3 ++-
 llvm/test/Analysis/ScalarEvolution/nsw.ll                   | 3 ++-
 .../ScalarEvolution/overflow-intrinsics-trip-count.ll       | 3 ++-
 llvm/test/Analysis/ScalarEvolution/overflow-intrinsics.ll   | 3 ++-
 llvm/test/Analysis/ScalarEvolution/pointer-sign-bits.ll     | 3 ++-
 llvm/test/Analysis/ScalarEvolution/pr22179.ll               | 3 ++-
 llvm/test/Analysis/ScalarEvolution/pr22641.ll               | 3 ++-
 llvm/test/Analysis/ScalarEvolution/pr24757.ll               | 3 ++-
 llvm/test/Analysis/ScalarEvolution/pr25369.ll               | 3 ++-
 llvm/test/Analysis/ScalarEvolution/pr27315.ll               | 3 ++-
 llvm/test/Analysis/ScalarEvolution/pr34538.ll               | 6 ++++--
 llvm/test/Analysis/ScalarEvolution/pr40420.ll               | 3 ++-
 llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll | 3 ++-
 llvm/test/Analysis/ScalarEvolution/range-signedness.ll      | 3 ++-
 llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll         | 3 ++-
 llvm/test/Analysis/ScalarEvolution/returned.ll              | 3 ++-
 llvm/test/Analysis/ScalarEvolution/scalable-vector.ll       | 3 ++-
 llvm/test/Analysis/ScalarEvolution/scev-dispositions.ll     | 3 ++-
 llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/sdiv.ll                  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/sext-inreg.ll            | 3 ++-
 llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll             | 3 ++-
 llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll             | 4 ++--
 llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll             | 3 ++-
 llvm/test/Analysis/ScalarEvolution/sext-mul.ll              | 3 ++-
 llvm/test/Analysis/ScalarEvolution/sext-to-zext.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/sext-zero.ll             | 3 ++-
 llvm/test/Analysis/ScalarEvolution/shift-op.ll              | 3 ++-
 .../Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll | 3 ++-
 llvm/test/Analysis/ScalarEvolution/sle.ll                   | 3 ++-
 llvm/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll    | 3 ++-
 llvm/test/Analysis/ScalarEvolution/smax.ll                  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll    | 3 ++-
 .../Analysis/ScalarEvolution/solve-quadratic-overflow.ll    | 3 ++-
 llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll       | 3 ++-
 llvm/test/Analysis/ScalarEvolution/srem.ll                  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll  | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count-andor.ll      | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count-pow2.ll       | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count-switch.ll     | 3 ++-
 .../Analysis/ScalarEvolution/trip-count-unknown-stride.ll   | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count.ll            | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count10.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count11.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count12.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count13.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count14.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count15.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count2.ll           | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count3.ll           | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count4.ll           | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count5.ll           | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count6.ll           | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count7.ll           | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count8.ll           | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trip-count9.ll           | 3 ++-
 .../Analysis/ScalarEvolution/tripmultiple_calculation.ll    | 3 ++-
 llvm/test/Analysis/ScalarEvolution/trunc-simplify.ll        | 3 ++-
 llvm/test/Analysis/ScalarEvolution/truncate.ll              | 3 ++-
 llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll       | 3 ++-
 llvm/test/Analysis/ScalarEvolution/undefined.ll             | 3 ++-
 llvm/test/Analysis/ScalarEvolution/unknown_phis.ll          | 3 ++-
 llvm/test/Analysis/ScalarEvolution/unreachable-code.ll      | 3 ++-
 llvm/test/Analysis/ScalarEvolution/unsimplified-loop.ll     | 3 ++-
 llvm/test/Analysis/ScalarEvolution/urem-0.ll                | 3 ++-
 llvm/test/Analysis/ScalarEvolution/widenable-condition.ll   | 3 ++-
 llvm/test/Analysis/ScalarEvolution/zext-divrem.ll           | 3 ++-
 llvm/test/Analysis/ScalarEvolution/zext-mul.ll              | 3 ++-
 llvm/test/Analysis/ScalarEvolution/zext-wrap.ll             | 3 ++-
 154 files changed, 318 insertions(+), 162 deletions(-)

diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index 48c686b732608..755a4e9685211 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -11937,6 +11937,11 @@ ScalarEvolutionVerifierPass::run(Function &F, FunctionAnalysisManager &AM) {
 
 PreservedAnalyses
 ScalarEvolutionPrinterPass::run(Function &F, FunctionAnalysisManager &AM) {
+  // For compatibility with opt's -analyze feature under legacy pass manager
+  // which was not ported to NPM. This keeps tests using
+  // update_analyze_test_checks.py working.
+  OS << "Printing analysis 'Scalar Evolution Analysis' for function '"
+     << F.getName() << "':\n";
   AM.getResult<ScalarEvolutionAnalysis>(F).print(OS);
   return PreservedAnalyses::all();
 }
diff --git a/llvm/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll b/llvm/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
index 7380da3ae7f87..e927ba1bccbae 100644
--- a/llvm/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2007-07-15-NegativeStride.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
 ; PR1533
 
 @array = weak global [101 x i32] zeroinitializer, align 32		; <[100 x i32]*> [#uses=1]
diff --git a/llvm/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll b/llvm/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
index 9e19ccab6eb1c..52601823de284 100644
--- a/llvm/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2007-08-06-Unsigned.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 ; PR1597
 
 ; CHECK: Loop %bb: backedge-taken count is (-1 + (-1 * %x) + %y)
diff --git a/llvm/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll b/llvm/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
index b65a525024474..f766674460dfc 100644
--- a/llvm/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2007-09-27-LargeStepping.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
 ; PR1706
 
 ; CHECK: backedge-taken count is 13
diff --git a/llvm/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll b/llvm/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
index c12721d82f011..ce6d298443f24 100644
--- a/llvm/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2007-11-18-OrInstruction.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 ; PR1810
 
 define void @fun() {
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll b/llvm/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
index 6ebfa61de41d1..33fc682d4b1fa 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-02-11-ReversedCondition.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 ; CHECK: Loop %header: backedge-taken count is (0 smax %n)
 
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll b/llvm/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
index ce0329d9ce8c9..cec11a8f9fb8a 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-02-12-SMAXTripCount.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 ; PR2002
 
 ; CHECK: Loop %loop: backedge-taken count is (100 + (-100 smax %n))
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll b/llvm/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
index 527fd273cd25b..34c1f34f11fba 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-02-15-UMax.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 ; PR2003
 
 ; CHECK: umax
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll b/llvm/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
index 9a05d88c4ce7c..2ab3f6d7004d4 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-05-25-NegativeStepToZero.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
 ; PR2364
 
 ; CHECK: backedge-taken count is 61
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll b/llvm/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll
index d503329292c7d..09b924c726a5a 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-06-12-BinomialInt64.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>/dev/null
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>/dev/null
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>/dev/null
 ; PR2433
 
 define i32 @main1(i32 %argc, i8** %argv) nounwind  {
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
index 7e42530798f62..a0bb0978f65d7 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect1.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 2>&1 | FileCheck %s
 ; PR2261
 
 ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'foo'
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
index c804bd905510c..964300fffe4eb 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-07-12-UnneededSelect2.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 2>&1 | FileCheck %s
 ; PR2070
 
 ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'a'
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
index ad34f6cedf61d..808f315173f3e 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-07-19-InfiniteLoop.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
 ; PR2088
 
 ; CHECK: Unpredictable
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
index 82b9d560425e0..c9fe1bbf6fc96 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-07-19-WrappingIV.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
 ; PR2088
 
 ; CHECK: backedge-taken count is 113
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
index 75bd634b3ef12..d7c7e4ef12d7d 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-07-29-SGTTripCount.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
 ; PR2607
 
 define i32 @_Z1aj(i32 %j) nounwind  {
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll b/llvm/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
index d930706d7d2ac..1deb654f79de6 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-07-29-SMinExpr.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
 ; PR2607
 
 define i32 @b(i32 %x, i32 %y) nounwind {
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll b/llvm/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
index 3b31d797cf488..216988f9f8d2b 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-08-04-IVOverflow.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
 ; PR2621
 
 define i32 @a() nounwind  {
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll b/llvm/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
index b296a19716c8b..edb083102aa45 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-08-04-LongAddRec.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution \
-; RUN:   -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
 ; PR2621
 
 define i32 @a() nounwind  {
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll b/llvm/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
index 7722122117dc6..0f6eeb6c3ff60 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-11-02-QuadraticCrash.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution
+; RUN: opt < %s -disable-output -scalar-evolution
 ; PR1827
 
 declare void @use(i32)
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll b/llvm/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
index 2e2aabc475a0c..bcb6559d77a9c 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-11-15-CubicOOM.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution
+; RUN: opt < %s -disable-output -scalar-evolution
 ; PR2602
 
 define i32 @a() nounwind  {
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll b/llvm/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
index 84561c5c6dc93..364f42ac0f91b 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-11-18-LessThanOrEqual.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 2>&1 | FileCheck %s
 
 ; CHECK: Loop %bb: backedge-taken count is (7 + (-1 * %argc))
 
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll b/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
index 7acf90c7330cb..d780feb1251e3 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride1.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK: Loop %bb: backedge-taken count is ((-5 + %x) /u 3)
 ; CHECK: Loop %bb: max backedge-taken count is 1431655764
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll b/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
index 2b2296a3a24fa..cece09305e0c6 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-11-18-Stride2.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 2>&1 | FileCheck %s
 
 ; CHECK: Loop %bb: backedge-taken count is ((999 + (-1 * %x)) /u 3)
 ; CHECK: Loop %bb: max backedge-taken count is 334
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll b/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
index 0c24ee4eaff1f..abe1272b35e87 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-12-08-FiniteSGE.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK: backedge-taken count is 255
 
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll b/llvm/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
index 12254e37dcc77..0c90b60859c0f 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-12-11-SMaxOverflow.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK: @f
 ; CHECK: Loop %bb16.preheader: backedge-taken count is (-1 + %c.idx.val)
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll b/llvm/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
index 95aa1fc85e20c..9e24519f2d1ae 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-12-14-StrideAndSigned.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 |  FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 |  FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 2>&1 |  FileCheck %s
 ; XFAIL: *
 
 ; CHECK: (((-1 * %i0) + (100005 smax %i0)) /u 5)
diff --git a/llvm/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll b/llvm/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
index 70588bc0574e8..97b686c676aa2 100644
--- a/llvm/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2008-12-15-DontUseSDiv.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 2>&1 | FileCheck %s
 
 ; CHECK: /u 5
 
diff --git a/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll b/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
index ebcecbf74294d..75b5162876361 100644
--- a/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2009-01-02-SignedNegativeStride.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 ; PR3275
 
 ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'func_15'
diff --git a/llvm/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll b/llvm/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
index a845465a26b51..16bb84e4b5040 100644
--- a/llvm/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2009-04-22-TruncCast.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'test1'
 ; CHECK-NOT: (trunc i{{.*}}ext
diff --git a/llvm/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll b/llvm/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
index 4f6b90b39f6f2..487309bebd850 100644
--- a/llvm/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2009-05-09-PointerEdgeCount.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 ; PR3171
 
 ; CHECK: count is 2
diff --git a/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll b/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
index d18bdaf7cba73..6797d15dcb5af 100644
--- a/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2009-07-04-GroupConstantsWidthMismatch.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution
+; RUN: opt < %s -disable-output -scalar-evolution
 ; PR4501
 
 define void @test() {
diff --git a/llvm/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll b/llvm/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
index 5a02398104186..2e28da170be54 100644
--- a/llvm/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2010-09-03-RequiredTransitive.ll
@@ -1,4 +1,5 @@
-; RUN: opt -indvars -scalar-evolution -analyze < %s | FileCheck %s
+; RUN: opt -indvars -scalar-evolution -analyze -enable-new-pm=0 < %s | FileCheck %s
+; RUN: opt "-passes=loop(indvars),print<scalar-evolution>" -disable-output < %s 2>&1 | FileCheck %s
 ; This test checks if the SCEV analysis is printed out at all.
 ; It failed once as the RequiredTransitive option was not implemented
 ; correctly.
diff --git a/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll b/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
index 729b6433e31c5..53d24eb2ee140 100644
--- a/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2011-04-26-FoldAddRec.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -iv-users
+; RUN: opt < %s -analyze -enable-new-pm=0 -iv-users
+; RUN: opt < %s -disable-output -iv-users
 ; RUN: opt < %s -passes='print<iv-users>'
 ; PR9633: Tests that SCEV handles the mul.i2 recurrence being folded to
 ; constant zero.
diff --git a/llvm/test/Analysis/ScalarEvolution/2011-10-04-ConstEvolve.ll b/llvm/test/Analysis/ScalarEvolution/2011-10-04-ConstEvolve.ll
index 29bb64ad642b9..32a65238db5e9 100644
--- a/llvm/test/Analysis/ScalarEvolution/2011-10-04-ConstEvolve.ll
+++ b/llvm/test/Analysis/ScalarEvolution/2011-10-04-ConstEvolve.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; Exercise getConstantEvolvingPHIOperands on an interesting loop.
 ; This should complete in milliseconds, not minutes.
diff --git a/llvm/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll b/llvm/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
index f7ef0ea9e4847..f4175fc7979a1 100644
--- a/llvm/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
+++ b/llvm/test/Analysis/ScalarEvolution/SolveQuadraticEquation.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0 2>&1 | FileCheck %s
 
 ; PR1101
 
diff --git a/llvm/test/Analysis/ScalarEvolution/ZeroStep.ll b/llvm/test/Analysis/ScalarEvolution/ZeroStep.ll
index fc6ed018e9033..9d3b872110014 100644
--- a/llvm/test/Analysis/ScalarEvolution/ZeroStep.ll
+++ b/llvm/test/Analysis/ScalarEvolution/ZeroStep.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s  -o - -S | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s  -o - -S | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s  -o - -S 2>&1 | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.9.0"
diff --git a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
index e164b7fc5e431..93a3bf4d4c378 100644
--- a/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
+++ b/llvm/test/Analysis/ScalarEvolution/add-expr-pointer-operand-sorting.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; Reduced from test-suite/MultiSource/Benchmarks/MiBench/office-ispell/correct.c
 ; getelementptr, obviously, takes pointer as it's base, and returns a pointer.
diff --git a/llvm/test/Analysis/ScalarEvolution/add-like-or.ll b/llvm/test/Analysis/ScalarEvolution/add-like-or.ll
index c0be633f2eb3c..73a82388d7e4d 100644
--- a/llvm/test/Analysis/ScalarEvolution/add-like-or.ll
+++ b/llvm/test/Analysis/ScalarEvolution/add-like-or.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 define i8 @or-of-constant-with-no-common-bits-set(i8 %x, i8 %y) {
 ; CHECK-LABEL: 'or-of-constant-with-no-common-bits-set'
diff --git a/llvm/test/Analysis/ScalarEvolution/and-xor.ll b/llvm/test/Analysis/ScalarEvolution/and-xor.ll
index 8217e1a1d91e6..aa57d9d043ba7 100644
--- a/llvm/test/Analysis/ScalarEvolution/and-xor.ll
+++ b/llvm/test/Analysis/ScalarEvolution/and-xor.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 ; CHECK-LABEL: @test1
 ; CHECK: -->  (zext
diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-assume-hang.ll b/llvm/test/Analysis/ScalarEvolution/avoid-assume-hang.ll
index e2428ed1f73f7..3a5abc46dbe6a 100644
--- a/llvm/test/Analysis/ScalarEvolution/avoid-assume-hang.ll
+++ b/llvm/test/Analysis/ScalarEvolution/avoid-assume-hang.ll
@@ -1,4 +1,5 @@
-; RUN: opt %s -always-inline | opt -analyze -scalar-evolution
+; RUN: opt %s -always-inline | opt -analyze -enable-new-pm=0 -scalar-evolution
+; RUN: opt %s -always-inline 2>&1 | opt -disable-output -scalar-evolution
 ; There was optimization bug in ScalarEvolution, that causes too long 
 ; compute time and stack overflow crash.
 
@@ -136,4 +137,4 @@ loop:
 
 exit:
   ret void
-}
\ No newline at end of file
+}
diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll b/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
index 0976ef92985ce..20a42320883d5 100644
--- a/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
+++ b/llvm/test/Analysis/ScalarEvolution/avoid-infinite-recursion-0.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution
+; RUN: opt < %s -disable-output -scalar-evolution
 ; PR4537
 
 ; ModuleID = 'b.bc'
diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll b/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll
index a282ee6993f01..f3603de2afadf 100644
--- a/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll
+++ b/llvm/test/Analysis/ScalarEvolution/avoid-smax-0.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 ; CHECK: Loop %bb3: backedge-taken count is (-1 + %n)
 
diff --git a/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll b/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll
index 50c30431af585..52fc3285e3b8c 100644
--- a/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll
+++ b/llvm/test/Analysis/ScalarEvolution/avoid-smax-1.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution -S | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -S | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -S 2>&1 | FileCheck %s
 
 ; Indvars should be able to find the trip count for the bb6 loop
 ; without using a maximum calculation (icmp, select) because it should
diff --git a/llvm/test/Analysis/ScalarEvolution/binomial-explision.ll b/llvm/test/Analysis/ScalarEvolution/binomial-explision.ll
index ff27bfcbd764e..7513378bf2a4b 100644
--- a/llvm/test/Analysis/ScalarEvolution/binomial-explision.ll
+++ b/llvm/test/Analysis/ScalarEvolution/binomial-explision.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:1"
 
diff --git a/llvm/test/Analysis/ScalarEvolution/constant_condition.ll b/llvm/test/Analysis/ScalarEvolution/constant_condition.ll
index 32ab91b2c857b..f4dee5da32b17 100644
--- a/llvm/test/Analysis/ScalarEvolution/constant_condition.ll
+++ b/llvm/test/Analysis/ScalarEvolution/constant_condition.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define i32 @branch_true(i32 %x, i32 %y) {
 ; CHECK-LABEL: Classifying expressions for: @branch_true
diff --git a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
index 1af50352e3ff5..074d150b1e038 100644
--- a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; This test set ensures that we can correctly operate with recurrencies from
 ; different loops.
diff --git a/llvm/test/Analysis/ScalarEvolution/div-overflow.ll b/llvm/test/Analysis/ScalarEvolution/div-overflow.ll
index aca964ae62b6e..f05e87e29f33b 100644
--- a/llvm/test/Analysis/ScalarEvolution/div-overflow.ll
+++ b/llvm/test/Analysis/ScalarEvolution/div-overflow.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 ; CHECK: -->  ((-128 * %a) /u -128)
 
diff --git a/llvm/test/Analysis/ScalarEvolution/do-loop.ll b/llvm/test/Analysis/ScalarEvolution/do-loop.ll
index e35ea7d57e3a5..1cbd3719ea9f0 100644
--- a/llvm/test/Analysis/ScalarEvolution/do-loop.ll
+++ b/llvm/test/Analysis/ScalarEvolution/do-loop.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 ; PR1614
 
 ; CHECK: smax
diff --git a/llvm/test/Analysis/ScalarEvolution/exact-exit-count-more-precise.ll b/llvm/test/Analysis/ScalarEvolution/exact-exit-count-more-precise.ll
index 8b1f878b1c0a7..fa49bf39fae1c 100644
--- a/llvm/test/Analysis/ScalarEvolution/exact-exit-count-more-precise.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exact-exit-count-more-precise.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" %s 2>&1 | FileCheck %s
 
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Analysis/ScalarEvolution/exact_iter_count.ll b/llvm/test/Analysis/ScalarEvolution/exact_iter_count.ll
index 443da146e771d..e4f4ef3147b44 100644
--- a/llvm/test/Analysis/ScalarEvolution/exact_iter_count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exact_iter_count.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 ; One side exit dominating the latch, exact backedge taken count is known.
 define void @test_01() {
diff --git a/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll b/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll
index 16d9cc5443872..83659ceea685c 100644
--- a/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exhaustive-trip-counts.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/exponential-behavior.ll b/llvm/test/Analysis/ScalarEvolution/exponential-behavior.ll
index 919521a58b6d1..867a4d08b7762 100644
--- a/llvm/test/Analysis/ScalarEvolution/exponential-behavior.ll
+++ b/llvm/test/Analysis/ScalarEvolution/exponential-behavior.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; CHECK: Printing analysis 'Scalar Evolution Analysis' for function 'f':
 
diff --git a/llvm/test/Analysis/ScalarEvolution/extract-highbits-sameconstmask.ll b/llvm/test/Analysis/ScalarEvolution/extract-highbits-sameconstmask.ll
index a4df455e2abb7..b6b8777a92d0e 100644
--- a/llvm/test/Analysis/ScalarEvolution/extract-highbits-sameconstmask.ll
+++ b/llvm/test/Analysis/ScalarEvolution/extract-highbits-sameconstmask.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; The obvious case.
 define i32 @div(i32 %val) nounwind {
diff --git a/llvm/test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll b/llvm/test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll
index 56d86f0309e9d..d4ab8d8bef1f8 100644
--- a/llvm/test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll
+++ b/llvm/test/Analysis/ScalarEvolution/extract-highbits-variablemask.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; These testcases aren't *identical* but they have the same/similar meaning.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/extract-lowbits-sameconstmask.ll b/llvm/test/Analysis/ScalarEvolution/extract-lowbits-sameconstmask.ll
index cb7af18a8c1fa..079298630ea1a 100644
--- a/llvm/test/Analysis/ScalarEvolution/extract-lowbits-sameconstmask.ll
+++ b/llvm/test/Analysis/ScalarEvolution/extract-lowbits-sameconstmask.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; The obvious case.
 define i32 @mul(i32 %val) nounwind {
diff --git a/llvm/test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll b/llvm/test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll
index cd73cf366b8a0..88ccd1a63141b 100644
--- a/llvm/test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll
+++ b/llvm/test/Analysis/ScalarEvolution/extract-lowbits-variablemask.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; These testcases aren't *identical* but they have the same/similar meaning.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll
index d2f9f2b979509..8c1a2e76315af 100644
--- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll
+++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison-dbg.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; Make sure poison value tracking works in the presence of @llvm.dbg
 ; intrinsics.  Unfortunately, I was not able to reduce this file
diff --git a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
index 122d7dc5f1718..49f8af1554bda 100644
--- a/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
+++ b/llvm/test/Analysis/ScalarEvolution/flags-from-poison.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; Positive and negative tests for inferring flags like nsw from
 ; reasoning about how a poison value from overflow would trigger
diff --git a/llvm/test/Analysis/ScalarEvolution/flattened-0.ll b/llvm/test/Analysis/ScalarEvolution/flattened-0.ll
index e6614ffd6467e..dc4cb06e4ca95 100644
--- a/llvm/test/Analysis/ScalarEvolution/flattened-0.ll
+++ b/llvm/test/Analysis/ScalarEvolution/flattened-0.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 define void @foo([7 x i8]* %a) {
 ; CHECK-LABEL: @foo
diff --git a/llvm/test/Analysis/ScalarEvolution/fold.ll b/llvm/test/Analysis/ScalarEvolution/fold.ll
index 1006b9f81d6c5..d55651cef3c64 100644
--- a/llvm/test/Analysis/ScalarEvolution/fold.ll
+++ b/llvm/test/Analysis/ScalarEvolution/fold.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -S < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" -S < %s 2>&1 | FileCheck %s
 
 define i16 @test1(i8 %x) {
   %A = zext i8 %x to i12
diff --git a/llvm/test/Analysis/ScalarEvolution/how-far-to-zero.ll b/llvm/test/Analysis/ScalarEvolution/how-far-to-zero.ll
index 07af88ffbebeb..7c9b25da322c8 100644
--- a/llvm/test/Analysis/ScalarEvolution/how-far-to-zero.ll
+++ b/llvm/test/Analysis/ScalarEvolution/how-far-to-zero.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; PR13228
 define void @f() nounwind uwtable readnone {
diff --git a/llvm/test/Analysis/ScalarEvolution/huge_expression_limit.ll b/llvm/test/Analysis/ScalarEvolution/huge_expression_limit.ll
index 5740915783189..e72c3b755032b 100644
--- a/llvm/test/Analysis/ScalarEvolution/huge_expression_limit.ll
+++ b/llvm/test/Analysis/ScalarEvolution/huge_expression_limit.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-huge-expr-threshold=1 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-huge-expr-threshold=1 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-huge-expr-threshold=1 2>&1 | FileCheck %s
 
 define void @test(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f) {
 ; CHECK-LABEL: 'test'
diff --git a/llvm/test/Analysis/ScalarEvolution/implied-via-division.ll b/llvm/test/Analysis/ScalarEvolution/implied-via-division.ll
index 43f4c04fa9275..3ff9dc276e1c8 100644
--- a/llvm/test/Analysis/ScalarEvolution/implied-via-division.ll
+++ b/llvm/test/Analysis/ScalarEvolution/implied-via-division.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 declare void @llvm.experimental.guard(i1, ...)
 
diff --git a/llvm/test/Analysis/ScalarEvolution/incorrect-nsw.ll b/llvm/test/Analysis/ScalarEvolution/incorrect-nsw.ll
index dd981c404ccde..9b0c3a6aba66d 100644
--- a/llvm/test/Analysis/ScalarEvolution/incorrect-nsw.ll
+++ b/llvm/test/Analysis/ScalarEvolution/incorrect-nsw.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>,print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @bad.nsw() {
 ; CHECK-LABEL: Classifying expressions for: @bad.nsw
diff --git a/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll b/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll
index 249698d36ed5e..d4fda88bd0bd1 100644
--- a/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll
+++ b/llvm/test/Analysis/ScalarEvolution/increasing-or-decreasing-iv.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @f0(i1 %c) {
 ; CHECK-LABEL: Classifying expressions for: @f0
diff --git a/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll b/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
index 318078ebf6ae1..8e698685ac193 100644
--- a/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
+++ b/llvm/test/Analysis/ScalarEvolution/infer-prestart-no-wrap.ll
@@ -1,4 +1,5 @@
-; ; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; ; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; ; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @infer.sext.0(i1* %c, i32 %start, i32* %buf) {
 ; CHECK-LABEL: Classifying expressions for: @infer.sext.0
diff --git a/llvm/test/Analysis/ScalarEvolution/inner-loop-by-latch-cond-unknown.ll b/llvm/test/Analysis/ScalarEvolution/inner-loop-by-latch-cond-unknown.ll
index cf75d1fb8cf01..766d18e3d5fd6 100644
--- a/llvm/test/Analysis/ScalarEvolution/inner-loop-by-latch-cond-unknown.ll
+++ b/llvm/test/Analysis/ScalarEvolution/inner-loop-by-latch-cond-unknown.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -iv-users -S | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -iv-users -S | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<iv-users>" -S 2>&1 | FileCheck %s
 
 ; This is a regression test for the commit rL327362.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
index 6057270f50944..8837b305e7ac0 100644
--- a/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
+++ b/llvm/test/Analysis/ScalarEvolution/limit-depth.ll
@@ -1,4 +1,5 @@
-; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-cast-depth=0 -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-cast-depth=0 -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -scalar-evolution-max-arith-depth=0 -scalar-evolution-max-cast-depth=0 -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; Check that depth set to 0 prevents getAddExpr and getMulExpr from making
 ; transformations in SCEV. We expect the result to be very straightforward.
diff --git a/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll b/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll
index f26c8d56754d1..68793cc15ad25 100644
--- a/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll
+++ b/llvm/test/Analysis/ScalarEvolution/load-with-range-metadata.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define i32 @slt_trip_count_with_range(i32 *%ptr0, i32 *%ptr1) {
 ; CHECK-LABEL: slt_trip_count_with_range
diff --git a/llvm/test/Analysis/ScalarEvolution/load.ll b/llvm/test/Analysis/ScalarEvolution/load.ll
index ea79476de6b31..6d9745bd76efb 100644
--- a/llvm/test/Analysis/ScalarEvolution/load.ll
+++ b/llvm/test/Analysis/ScalarEvolution/load.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
 target triple = "i386-pc-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll b/llvm/test/Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll
index c7fda08fc713b..c8285fbd4d229 100644
--- a/llvm/test/Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll
+++ b/llvm/test/Analysis/ScalarEvolution/lshr-shl-differentconstmask.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; The obvious case.
 define i32 @udiv_biggerLshr(i32 %val) nounwind {
diff --git a/llvm/test/Analysis/ScalarEvolution/max-addops-inline.ll b/llvm/test/Analysis/ScalarEvolution/max-addops-inline.ll
index 2701ed32839f2..7c8d44c89aef5 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-addops-inline.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-addops-inline.ll
@@ -1,5 +1,7 @@
-; RUN: opt -analyze -scalar-evolution -scev-addops-inline-threshold=1 < %s | FileCheck --check-prefix=CHECK1 %s
-; RUN: opt -analyze -scalar-evolution -scev-addops-inline-threshold=10 < %s | FileCheck --check-prefix=CHECK10 %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scev-addops-inline-threshold=1 < %s | FileCheck --check-prefix=CHECK1 %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" -scev-addops-inline-threshold=1 < %s 2>&1 | FileCheck --check-prefix=CHECK1 %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scev-addops-inline-threshold=10 < %s | FileCheck --check-prefix=CHECK10 %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" -scev-addops-inline-threshold=10 < %s 2>&1 | FileCheck --check-prefix=CHECK10 %s
 
 define i32 @foo(i64 %p0, i32 %p1) {
 ; CHECK1: %add2 = add nsw i32 %mul1, %add
diff --git a/llvm/test/Analysis/ScalarEvolution/max-addrec-size.ll b/llvm/test/Analysis/ScalarEvolution/max-addrec-size.ll
index aad0ddda37bcf..c37d68d4ee7f0 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-addrec-size.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-addrec-size.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution -scalar-evolution-max-add-rec-size=3 < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-add-rec-size=3 < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-add-rec-size=3 < %s 2>&1 | FileCheck %s
 
 ; Show that we are able to avoid creation of huge SCEVs by capping the max
 ; AddRec size.
diff --git a/llvm/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll b/llvm/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll
index b593fc269a7b0..f05d4ff0f2f1d 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-be-count-not-constant.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll b/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll
index a0fa4a9d21625..8d401c3bb70fd 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-expr-cache.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; SCEV would take a long time to compute SCEV expressions for this IR.  If SCEV
 ; finishes in < 1 second then the bug is fixed.
diff --git a/llvm/test/Analysis/ScalarEvolution/max-mulops-inline.ll b/llvm/test/Analysis/ScalarEvolution/max-mulops-inline.ll
index c0dc6e012c123..ec21ec3c31f06 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-mulops-inline.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-mulops-inline.ll
@@ -1,5 +1,7 @@
-; RUN: opt -analyze -scalar-evolution -scev-mulops-inline-threshold=1 < %s | FileCheck --check-prefix=CHECK1 %s
-; RUN: opt -analyze -scalar-evolution -scev-mulops-inline-threshold=10 < %s | FileCheck --check-prefix=CHECK10 %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scev-mulops-inline-threshold=1 < %s | FileCheck --check-prefix=CHECK1 %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" -scev-mulops-inline-threshold=1 < %s 2>&1 | FileCheck --check-prefix=CHECK1 %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -scev-mulops-inline-threshold=10 < %s | FileCheck --check-prefix=CHECK10 %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" -scev-mulops-inline-threshold=10 < %s 2>&1 | FileCheck --check-prefix=CHECK10 %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll b/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll
index 5260fe90e6b7d..aaae7ace00237 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-trip-count-address-space.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; ScalarEvolution should be able to understand the loop and eliminate the casts.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll
index e3ba313a690b8..c266d1621ac2c 100644
--- a/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/max-trip-count.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; ScalarEvolution should be able to understand the loop and eliminate the casts.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/merge-add-rec-many-inputs.ll b/llvm/test/Analysis/ScalarEvolution/merge-add-rec-many-inputs.ll
index 7a18dcda423e3..3a3ac55699419 100644
--- a/llvm/test/Analysis/ScalarEvolution/merge-add-rec-many-inputs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/merge-add-rec-many-inputs.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; Check that isImpliedViaMerge wouldn't crash when trying to prove
 ; SCEVUnknown and AddRec with phi having many inputs
diff --git a/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll b/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll
index 51f72c643cc6b..3cc515b957ca9 100644
--- a/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/min-max-exprs.ll
@@ -1,4 +1,5 @@
-; RUN: opt -scalar-evolution -analyze < %s | FileCheck %s
+; RUN: opt -scalar-evolution -analyze -enable-new-pm=0 < %s | FileCheck %s
+; RUN: opt "-passes=print<scalar-evolution>" -disable-output < %s 2>&1 | FileCheck %s
 ;
 ; This checks if the min and max expressions are properly recognized by
 ; ScalarEvolution even though they the ICmpInst and SelectInst have different
diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
index 8ce80ee14118a..5a7bb3c9e5cd5 100644
--- a/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-add-exprs.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 !0 = !{i8 0, i8 127}
 
diff --git a/llvm/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll b/llvm/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll
index bc01f22b3f308..8a7e0bc34576b 100644
--- a/llvm/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll
+++ b/llvm/test/Analysis/ScalarEvolution/no-wrap-unknown-becount.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 declare void @llvm.experimental.guard(i1, ...)
 declare void @llvm.assume(i1)
diff --git a/llvm/test/Analysis/ScalarEvolution/non-IV-phi.ll b/llvm/test/Analysis/ScalarEvolution/non-IV-phi.ll
index f0d6c2f5d9d33..9f18719eea3c6 100644
--- a/llvm/test/Analysis/ScalarEvolution/non-IV-phi.ll
+++ b/llvm/test/Analysis/ScalarEvolution/non-IV-phi.ll
@@ -1,4 +1,5 @@
-; RUN: opt -scalar-evolution -analyze < %s | FileCheck %s
+; RUN: opt -scalar-evolution -analyze -enable-new-pm=0 < %s | FileCheck %s
+; RUN: opt "-passes=print<scalar-evolution>" -disable-output < %s 2>&1 | FileCheck %s
 
 define void @test1(i8 %t, i32 %len) {
 ; CHECK-LABEL: test1
diff --git a/llvm/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll b/llvm/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll
index fa5ab82e064c6..650eb8a6fa3ed 100644
--- a/llvm/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nowrap-preinc-limits.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @f(i1* %condition) {
 ; CHECK-LABEL: Classifying expressions for: @f
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
index 3ef31ff5581bd..b6867ca471f7a 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset-assume.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; ScalarEvolution should be able to fold away the sign-extensions
 ; on this loop with a primary induction variable incremented with
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
index a5c4b575f1365..0310ff341516b 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw-offset.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; ScalarEvolution should be able to fold away the sign-extensions
 ; on this loop with a primary induction variable incremented with
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll
index cb48aa9d23fc3..39f199868eaa5 100644
--- a/llvm/test/Analysis/ScalarEvolution/nsw.ll
+++ b/llvm/test/Analysis/ScalarEvolution/nsw.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; The addrecs in this loop are analyzable only by using nsw information.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll
index c58a034578e12..7d6e87eef805c 100644
--- a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics-trip-count.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 declare { i16, i1 } @llvm.sadd.with.overflow.i16(i16, i16) nounwind readnone
 declare { i16, i1 } @llvm.uadd.with.overflow.i16(i16, i16) nounwind readnone
diff --git a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics.ll b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics.ll
index fb3d816c9fab4..02ed4d88c808e 100644
--- a/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics.ll
+++ b/llvm/test/Analysis/ScalarEvolution/overflow-intrinsics.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/pointer-sign-bits.ll b/llvm/test/Analysis/ScalarEvolution/pointer-sign-bits.ll
index b2cec2d9fc892..ed6b478962877 100644
--- a/llvm/test/Analysis/ScalarEvolution/pointer-sign-bits.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pointer-sign-bits.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution
+; RUN: opt < %s -disable-output -scalar-evolution
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
   %JavaObject = type { [0 x i32 (...)*]*, i8* }
diff --git a/llvm/test/Analysis/ScalarEvolution/pr22179.ll b/llvm/test/Analysis/ScalarEvolution/pr22179.ll
index 21ed055dc7b2c..8bea883cdc5b2 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr22179.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr22179.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 %struct.anon = type { i8 }
 %struct.S = type { i32 }
diff --git a/llvm/test/Analysis/ScalarEvolution/pr22641.ll b/llvm/test/Analysis/ScalarEvolution/pr22641.ll
index 3b55afe084545..6c824e47a4eb8 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr22641.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr22641.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define i1 @main(i16 %a) {
 ; CHECK-LABEL: Classifying expressions for: @main
diff --git a/llvm/test/Analysis/ScalarEvolution/pr24757.ll b/llvm/test/Analysis/ScalarEvolution/pr24757.ll
index 83baade34ad3a..981661597d576 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr24757.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr24757.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; CHECK: Loop %bb1: backedge-taken count is ((2 * %a.promoted) /u 2)
 
diff --git a/llvm/test/Analysis/ScalarEvolution/pr25369.ll b/llvm/test/Analysis/ScalarEvolution/pr25369.ll
index 10754867a3683..6378d7443e316 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr25369.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr25369.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/pr27315.ll b/llvm/test/Analysis/ScalarEvolution/pr27315.ll
index 8f5f79df563b9..697d90a1eeef3 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr27315.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr27315.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 declare i1 @use(i64)
 
diff --git a/llvm/test/Analysis/ScalarEvolution/pr34538.ll b/llvm/test/Analysis/ScalarEvolution/pr34538.ll
index abef58e496822..55fd76c9882c8 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr34538.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr34538.ll
@@ -1,5 +1,7 @@
-; RUN: opt -scalar-evolution -loop-deletion -simplifycfg -analyze < %s | FileCheck %s --check-prefix=CHECK-ANALYSIS-1
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s --check-prefix=CHECK-ANALYSIS-2
+; RUN: opt -scalar-evolution -loop-deletion -simplifycfg -analyze -enable-new-pm=0 < %s | FileCheck %s --check-prefix=CHECK-ANALYSIS-1
+; RUN: opt "-passes=print<scalar-evolution>,loop(loop-deletion),simplifycfg" -disable-output < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ANALYSIS-1
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s --check-prefix=CHECK-ANALYSIS-2
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s --check-prefix=CHECK-ANALYSIS-2
 
 define i32 @pr34538() local_unnamed_addr #0 {
 ; CHECK-ANALYSIS-1: Loop %do.body: backedge-taken count is 10000
diff --git a/llvm/test/Analysis/ScalarEvolution/pr40420.ll b/llvm/test/Analysis/ScalarEvolution/pr40420.ll
index 0f57958c24c6d..a1654af5cae82 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr40420.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr40420.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 define void @test(i8 %tmp6) {
diff --git a/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll
index b07662ed95f5a..3a30733dc7524 100644
--- a/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/predicated-trip-count.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/llvm/test/Analysis/ScalarEvolution/range-signedness.ll b/llvm/test/Analysis/ScalarEvolution/range-signedness.ll
index d04fc9eb56baa..bfbbe3668cb37 100644
--- a/llvm/test/Analysis/ScalarEvolution/range-signedness.ll
+++ b/llvm/test/Analysis/ScalarEvolution/range-signedness.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @x(i1* %cond) {
 ; CHECK-LABEL: Classifying expressions for: @x
diff --git a/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll b/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll
index 0a0b5fb5eb8c6..b49d473eb394a 100644
--- a/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll
+++ b/llvm/test/Analysis/ScalarEvolution/range_nw_flag.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; copied from flags-from-poison.ll
 ; CHECK-LABEL: @test-add-nuw
diff --git a/llvm/test/Analysis/ScalarEvolution/returned.ll b/llvm/test/Analysis/ScalarEvolution/returned.ll
index 4c07cd1346657..e0079e8810b75 100644
--- a/llvm/test/Analysis/ScalarEvolution/returned.ll
+++ b/llvm/test/Analysis/ScalarEvolution/returned.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -S -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -S -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
 define i8* @foo(i32 %no, i8* nocapture %d) nounwind {
diff --git a/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll b/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll
index 3adb2e0b1a068..34758e11c0a20 100644
--- a/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll
+++ b/llvm/test/Analysis/ScalarEvolution/scalable-vector.ll
@@ -1,4 +1,5 @@
-; RUN: opt -scalar-evolution -analyze < %s | FileCheck %s
+; RUN: opt -scalar-evolution -analyze -enable-new-pm=0 < %s | FileCheck %s
+; RUN: opt "-passes=print<scalar-evolution>" -disable-output < %s 2>&1 | FileCheck %s
 
 ; CHECK: %1 = getelementptr <vscale x 4 x i32>, <vscale x 4 x i32>* null, i32 3
 ; CHECK: -->  (3 * sizeof(<vscale x 4 x i32>)) U: [0,-15) S: [-9223372036854775808,9223372036854775793)
diff --git a/llvm/test/Analysis/ScalarEvolution/scev-dispositions.ll b/llvm/test/Analysis/ScalarEvolution/scev-dispositions.ll
index 4e382a93cda95..94074111f9f7b 100644
--- a/llvm/test/Analysis/ScalarEvolution/scev-dispositions.ll
+++ b/llvm/test/Analysis/ScalarEvolution/scev-dispositions.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @single_loop(i32* %buf, i32 %start) {
 ; CHECK-LABEL: Classifying expressions for: @single_loop
diff --git a/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll b/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll
index 77f3482f03c06..89698b65f28a7 100644
--- a/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll
+++ b/llvm/test/Analysis/ScalarEvolution/scev-prestart-nowrap.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; An example run where SCEV(%postinc)->getStart() may overflow:
 ;
diff --git a/llvm/test/Analysis/ScalarEvolution/sdiv.ll b/llvm/test/Analysis/ScalarEvolution/sdiv.ll
index 106cda1b7f0ff..89a3e77564ae5 100644
--- a/llvm/test/Analysis/ScalarEvolution/sdiv.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sdiv.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-inreg.ll b/llvm/test/Analysis/ScalarEvolution/sext-inreg.ll
index cc738a17bc471..ec473ab027145 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-inreg.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-inreg.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9.6"
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll
index 5634078d2b0e4..3bbd1d238753d 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-0.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 ; Convert (sext {-128,+,1}) to {sext(-128),+,sext(1)}, since the
 ; trip count is within range where this is safe.
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll
index 575b744a1a40b..a198bcb2e8fdb 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-1.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze \
-; RUN:  | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 ; CHECK: -->  (sext i{{.}} {{{.*}},+,{{.*}}}<%bb1> to i64)
 ; CHECK: -->  (sext i{{.}} {{{.*}},+,{{.*}}}<%bb1> to i64)
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll
index 8749ff3987faa..b84c13938dfae 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-iv-2.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK: %tmp3 = sext i8 %tmp2 to i32
 ; CHECK: -->  (sext i8 {0,+,1}<%bb1> to i32){{ U: [^ ]+ S: [^ ]+}}{{ *}}Exits: -1
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-mul.ll b/llvm/test/Analysis/ScalarEvolution/sext-mul.ll
index 4a10749819712..b3a8dca7902f7 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-mul.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-mul.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK: %tmp9 = shl i64 %tmp8, 33
 ; CHECK-NEXT: --> {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-to-zext.ll b/llvm/test/Analysis/ScalarEvolution/sext-to-zext.ll
index ca9c6de0d50d5..4cda4c7497e3b 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-to-zext.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-to-zext.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @f(i1 %c) {
 ; CHECK-LABEL: Classifying expressions for: @f
diff --git a/llvm/test/Analysis/ScalarEvolution/sext-zero.ll b/llvm/test/Analysis/ScalarEvolution/sext-zero.ll
index cac42638e9592..4c6abd052df48 100644
--- a/llvm/test/Analysis/ScalarEvolution/sext-zero.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sext-zero.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK:  %tmp9 = shl i64 %tmp8, 33
 ; CHECK-NEXT:  -->  {{.*}} Exits: (-8589934592 + (8589934592 * (zext i32 %arg2 to i64)))
diff --git a/llvm/test/Analysis/ScalarEvolution/shift-op.ll b/llvm/test/Analysis/ScalarEvolution/shift-op.ll
index ae13b2879df85..e3e63d5a9901d 100644
--- a/llvm/test/Analysis/ScalarEvolution/shift-op.ll
+++ b/llvm/test/Analysis/ScalarEvolution/shift-op.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @test0(i32 %init) {
 ; CHECK-LABEL: Classifying expressions for: @test0
diff --git a/llvm/test/Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll b/llvm/test/Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll
index 1886848b8be93..373dd7d666e58 100644
--- a/llvm/test/Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll
+++ b/llvm/test/Analysis/ScalarEvolution/shl-lshr-differentconstmask.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; The obvious case.
 define i32 @mul_biggerShl(i32 %val) nounwind {
diff --git a/llvm/test/Analysis/ScalarEvolution/sle.ll b/llvm/test/Analysis/ScalarEvolution/sle.ll
index f24c4807114fd..3e208c280b2e8 100644
--- a/llvm/test/Analysis/ScalarEvolution/sle.ll
+++ b/llvm/test/Analysis/ScalarEvolution/sle.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; ScalarEvolution should be able to use nsw information to prove that
 ; this loop has a finite trip count.
diff --git a/llvm/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll b/llvm/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
index dc24bd1b80478..62d9c7dc1d5c2 100644
--- a/llvm/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
+++ b/llvm/test/Analysis/ScalarEvolution/smax-br-phi-idioms.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define i32 @f0(i32 %x, i32 %y) {
 ; CHECK-LABEL: Classifying expressions for: @f0
diff --git a/llvm/test/Analysis/ScalarEvolution/smax.ll b/llvm/test/Analysis/ScalarEvolution/smax.ll
index 122e9e47e56f4..2b2c81c8e90df 100644
--- a/llvm/test/Analysis/ScalarEvolution/smax.ll
+++ b/llvm/test/Analysis/ScalarEvolution/smax.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 ; PR1614
 
 ; CHECK: -->  (%a smax %b)
diff --git a/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll b/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll
index 7ed0dadca252b..525b8df764b9f 100644
--- a/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll
+++ b/llvm/test/Analysis/ScalarEvolution/solve-quadratic-i1.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll b/llvm/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll
index 0f18b8df69c6f..331bf31e9e78d 100644
--- a/llvm/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll
+++ b/llvm/test/Analysis/ScalarEvolution/solve-quadratic-overflow.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -S < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" -S < %s 2>&1 | FileCheck %s
 
 ; The exit value from this loop was originally calculated as 0.
 ; The actual exit condition is 256*256 == 0 (in i16).
diff --git a/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll b/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll
index 5d5e02efd0e4a..e23d0ab73c409 100644
--- a/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll
+++ b/llvm/test/Analysis/ScalarEvolution/solve-quadratic.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution -S -debug-only=scalar-evolution,apint < %s 2>&1 | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -S -debug-only=scalar-evolution,apint < %s 2>&1 | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" -S -debug-only=scalar-evolution,apint < %s 2>&1 2>&1 | FileCheck %s
 ; REQUIRES: asserts
 
 ; Use the following template to get a chrec {L,+,M,+,N}.
diff --git a/llvm/test/Analysis/ScalarEvolution/srem.ll b/llvm/test/Analysis/ScalarEvolution/srem.ll
index 6debab34e3b31..197437b51ca12 100644
--- a/llvm/test/Analysis/ScalarEvolution/srem.ll
+++ b/llvm/test/Analysis/ScalarEvolution/srem.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll b/llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll
index b618b71a358de..353e3d40418fd 100644
--- a/llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll
+++ b/llvm/test/Analysis/ScalarEvolution/strip-injective-zext.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; The initial SCEV for the backedge count is
 ;   (zext i2 {(trunc i32 (1 + %a1) to i2),+,1}<%b2> to i32).
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-andor.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-andor.ll
index 7ffc423e08759..bafb75486a8c1 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-andor.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-andor.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 2>&1 2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-pow2.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-pow2.ll
index 3a6f5fec2b8f7..d4c98de296697 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-pow2.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-pow2.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 define void @test1(i32 %n) {
 entry:
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-switch.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-switch.ll
index 2d2b6b4994089..db335dc89aca2 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-switch.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-switch.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 declare void @foo()
 
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
index 60370d63e036a..fa9c7832adbe3 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count-unknown-stride.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; ScalarEvolution should be able to compute trip count of the loop by proving
 ; that this is not an infinite loop with side effects.
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count.ll b/llvm/test/Analysis/ScalarEvolution/trip-count.ll
index aef7f1b9bba52..dd6cf4eaae4fd 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-max-iterations=0  -scalar-evolution-classify-expressions=0  | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-max-iterations=0  -scalar-evolution-classify-expressions=0  | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-max-iterations=0  -scalar-evolution-classify-expressions=0  2>&1 | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count10.ll b/llvm/test/Analysis/ScalarEvolution/trip-count10.ll
index 5540e3e6a2dac..459d293af9401 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count10.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count10.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
 
 ; Trip counts with trivial exit conditions.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count11.ll b/llvm/test/Analysis/ScalarEvolution/trip-count11.ll
index 819a89efd5079..88728690d926f 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count11.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count11.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-p1:16:16:16-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count12.ll b/llvm/test/Analysis/ScalarEvolution/trip-count12.ll
index d0086ee2e6acc..66577e4cc4a26 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count12.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count12.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK: Determining loop execution counts for: @test
 ; CHECK: Loop %for.body: backedge-taken count is ((-2 + %len) /u 2)
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count13.ll b/llvm/test/Analysis/ScalarEvolution/trip-count13.ll
index 3e1009748f1cc..42d96ca9d42f4 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count13.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count13.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @u_0(i8 %rhs) {
 ; E.g.: %rhs = 255, %start = 99, backedge taken 156 times
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count14.ll b/llvm/test/Analysis/ScalarEvolution/trip-count14.ll
index 711939bc112e2..3f3f816369125 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count14.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count14.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @s32_max1(i32 %n, i32* %p) {
 entry:
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count15.ll b/llvm/test/Analysis/ScalarEvolution/trip-count15.ll
index 3ad83776b1ce0..bfc88452a9cf2 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count15.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count15.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -S -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @umin_unsigned_check(i64 %n) {
 ; CHECK-LABEL: 'umin_unsigned_check'
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count2.ll b/llvm/test/Analysis/ScalarEvolution/trip-count2.ll
index 7f45527238606..626cacd2b4db7 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count2.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count2.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -analyze -scalar-evolution  -scalar-evolution-classify-expressions=0  | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution  -scalar-evolution-classify-expressions=0  | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>"  -scalar-evolution-classify-expressions=0  2>&1 | FileCheck %s
 
 @A = weak global [1000 x i32] zeroinitializer, align 32
 
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count3.ll b/llvm/test/Analysis/ScalarEvolution/trip-count3.ll
index a50886be325f3..7941d4ab33822 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count3.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count3.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -scalar-evolution -analyze -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
 
 ; ScalarEvolution can't compute a trip count because it doesn't know if
 ; dividing by the stride will have a remainder. This could theoretically
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count4.ll b/llvm/test/Analysis/ScalarEvolution/trip-count4.ll
index 4c2d079e1922f..24978f62df729 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count4.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count4.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
 
 ; ScalarEvolution should be able to compute a loop exit value for %indvar.i8.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count5.ll b/llvm/test/Analysis/ScalarEvolution/trip-count5.ll
index f3ca343da6f15..3359a1f5d96e6 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count5.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count5.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; ScalarEvolution should be able to compute a maximum trip count
 ; value sufficient to fold away both sext casts.
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count6.ll b/llvm/test/Analysis/ScalarEvolution/trip-count6.ll
index 103b097e09b15..925dfbb096957 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count6.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count6.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
 
 @mode_table = global [4 x i32] zeroinitializer          ; <[4 x i32]*> [#uses=1]
 
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count7.ll b/llvm/test/Analysis/ScalarEvolution/trip-count7.ll
index 8b92bf71c041e..195a03ec30d61 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count7.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count7.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
 
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count8.ll b/llvm/test/Analysis/ScalarEvolution/trip-count8.ll
index ac06fbf5db59c..a70e12c8f23f6 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count8.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count8.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt < %s -analyze -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution -scalar-evolution-classify-expressions=0 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" -scalar-evolution-classify-expressions=0 2>&1 | FileCheck %s
 ; PR4599
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/llvm/test/Analysis/ScalarEvolution/trip-count9.ll b/llvm/test/Analysis/ScalarEvolution/trip-count9.ll
index d0fb51a1ee5f3..664f9b1296b36 100644
--- a/llvm/test/Analysis/ScalarEvolution/trip-count9.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trip-count9.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -analyze -scalar-evolution -S -scalar-evolution-classify-expressions=0 < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -S -scalar-evolution-classify-expressions=0 < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" -S -scalar-evolution-classify-expressions=0 < %s 2>&1 | FileCheck %s
 
 ; Every combination of
 ;  - starting at 0, 1, or %x
diff --git a/llvm/test/Analysis/ScalarEvolution/tripmultiple_calculation.ll b/llvm/test/Analysis/ScalarEvolution/tripmultiple_calculation.ll
index 133532e31a5be..f57ab3dbdad36 100644
--- a/llvm/test/Analysis/ScalarEvolution/tripmultiple_calculation.ll
+++ b/llvm/test/Analysis/ScalarEvolution/tripmultiple_calculation.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -analyze -scalar-evolution < %s 2>&1 | FileCheck %s
+; RUN: opt -S -analyze -enable-new-pm=0 -scalar-evolution < %s 2>&1 | FileCheck %s
+; RUN: opt -S -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 2>&1 | FileCheck %s
 
 ; umin is represented using -1 * umax in scalar evolution. -1 is considered as the
 ; constant of the multiply expression (-1 * ((-1 + (-1 * %a)) umax (-1 + (-1 * %b)))).
diff --git a/llvm/test/Analysis/ScalarEvolution/trunc-simplify.ll b/llvm/test/Analysis/ScalarEvolution/trunc-simplify.ll
index cf37371939d5e..e4c752b02e0eb 100644
--- a/llvm/test/Analysis/ScalarEvolution/trunc-simplify.ll
+++ b/llvm/test/Analysis/ScalarEvolution/trunc-simplify.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; Check that we convert
 ;   trunc(C * a) -> trunc(C) * trunc(a)
diff --git a/llvm/test/Analysis/ScalarEvolution/truncate.ll b/llvm/test/Analysis/ScalarEvolution/truncate.ll
index 7ae6908fc2097..148bbe0746c4c 100644
--- a/llvm/test/Analysis/ScalarEvolution/truncate.ll
+++ b/llvm/test/Analysis/ScalarEvolution/truncate.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution 2>&1 | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 2>&1 | FileCheck %s
 ; RUN: opt < %s -passes='print<scalar-evolution>' -S 2>&1 | FileCheck %s
 ; Regression test for assert ScalarEvolution::getTruncateExpr.
 
diff --git a/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll b/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll
index fb8f59fe42520..40cdb4921a42c 100644
--- a/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll
+++ b/llvm/test/Analysis/ScalarEvolution/umin-umax-folds.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 define void @umin_sext_x_zext_x(i32 %len) {
 ; CHECK-LABEL: 'umin_sext_x_zext_x'
diff --git a/llvm/test/Analysis/ScalarEvolution/undefined.ll b/llvm/test/Analysis/ScalarEvolution/undefined.ll
index b1f44460af6bd..693a2adf80ce1 100644
--- a/llvm/test/Analysis/ScalarEvolution/undefined.ll
+++ b/llvm/test/Analysis/ScalarEvolution/undefined.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; ScalarEvolution shouldn't attempt to interpret expressions which have
 ; undefined results.
diff --git a/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll b/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll
index e5335b1ae9137..cce9c218e24bc 100644
--- a/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll
+++ b/llvm/test/Analysis/ScalarEvolution/unknown_phis.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 define void @merge_values_with_ranges(i32 *%a_len_ptr, i32 *%b_len_ptr, i1 %unknown_cond) {
 
diff --git a/llvm/test/Analysis/ScalarEvolution/unreachable-code.ll b/llvm/test/Analysis/ScalarEvolution/unreachable-code.ll
index 69a7e39839a7e..90049e43fa0f5 100644
--- a/llvm/test/Analysis/ScalarEvolution/unreachable-code.ll
+++ b/llvm/test/Analysis/ScalarEvolution/unreachable-code.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; CHECK: %t = add i64 %t, 1
 ; CHECK: -->  undef
diff --git a/llvm/test/Analysis/ScalarEvolution/unsimplified-loop.ll b/llvm/test/Analysis/ScalarEvolution/unsimplified-loop.ll
index a3175077b6861..0ab46cd7d0c2b 100644
--- a/llvm/test/Analysis/ScalarEvolution/unsimplified-loop.ll
+++ b/llvm/test/Analysis/ScalarEvolution/unsimplified-loop.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; This loop has no preheader, multiple backedges, etc., but ScalarEvolution
 ; should still be able to analyze it.
diff --git a/llvm/test/Analysis/ScalarEvolution/urem-0.ll b/llvm/test/Analysis/ScalarEvolution/urem-0.ll
index a53f75b86faa8..25998fa340f7f 100644
--- a/llvm/test/Analysis/ScalarEvolution/urem-0.ll
+++ b/llvm/test/Analysis/ScalarEvolution/urem-0.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -scalar-evolution -analyze | FileCheck %s
+; RUN: opt < %s -scalar-evolution -analyze -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s "-passes=print<scalar-evolution>" -disable-output 2>&1 | FileCheck %s
 
 define i8 @foo(i8 %a) {
 ; CHECK-LABEL: @foo
diff --git a/llvm/test/Analysis/ScalarEvolution/widenable-condition.ll b/llvm/test/Analysis/ScalarEvolution/widenable-condition.ll
index b7b5f71542826..52890936063cd 100644
--- a/llvm/test/Analysis/ScalarEvolution/widenable-condition.ll
+++ b/llvm/test/Analysis/ScalarEvolution/widenable-condition.ll
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
-; RUN: opt -analyze -scalar-evolution < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" < %s 2>&1 | FileCheck %s
 
 ; The semanics of this example are a bit subtle.  The loop is required
 ; execute some number of times up to 1999.  The compiler is free to reduce
diff --git a/llvm/test/Analysis/ScalarEvolution/zext-divrem.ll b/llvm/test/Analysis/ScalarEvolution/zext-divrem.ll
index 86037437f979e..3e5f3b0f5485e 100644
--- a/llvm/test/Analysis/ScalarEvolution/zext-divrem.ll
+++ b/llvm/test/Analysis/ScalarEvolution/zext-divrem.ll
@@ -1,4 +1,5 @@
-; RUN: opt -analyze -scalar-evolution -S < %s | FileCheck %s
+; RUN: opt -analyze -enable-new-pm=0 -scalar-evolution -S < %s | FileCheck %s
+; RUN: opt -disable-output "-passes=print<scalar-evolution>" -S < %s 2>&1 | FileCheck %s
 
 define i64 @test1(i32 %a, i32 %b) {
 ; CHECK-LABEL: @test1
diff --git a/llvm/test/Analysis/ScalarEvolution/zext-mul.ll b/llvm/test/Analysis/ScalarEvolution/zext-mul.ll
index 0c0f16c1deaa8..cf820f550741f 100644
--- a/llvm/test/Analysis/ScalarEvolution/zext-mul.ll
+++ b/llvm/test/Analysis/ScalarEvolution/zext-mul.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 
 ; Check that we convert
 ;   zext((a * b)<nuw>)
diff --git a/llvm/test/Analysis/ScalarEvolution/zext-wrap.ll b/llvm/test/Analysis/ScalarEvolution/zext-wrap.ll
index 34462208fbb31..66bedcea7edf2 100644
--- a/llvm/test/Analysis/ScalarEvolution/zext-wrap.ll
+++ b/llvm/test/Analysis/ScalarEvolution/zext-wrap.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -analyze -scalar-evolution | FileCheck %s
+; RUN: opt < %s -analyze -enable-new-pm=0 -scalar-evolution | FileCheck %s
+; RUN: opt < %s -disable-output "-passes=print<scalar-evolution>" 2>&1 | FileCheck %s
 ; PR4569
 
 define i16 @main() nounwind {

From 0afe172e2ee5fb7600fb423a992dbeb884cbebd2 Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Thu, 16 Jul 2020 11:27:16 -0700
Subject: [PATCH 528/771] [Driver] Make -B take precedence over COMPILER_PATH

There is currently no COMPILER_PATH test. A subsequent --ld-path patch
will improve the coverage here.
---
 clang/lib/Driver/Driver.cpp | 21 ++++++++++-----------
 1 file changed, 10 insertions(+), 11 deletions(-)

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index ece8222dcf24f..d2b6268d5fa35 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -978,17 +978,6 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
   // FIXME: Handle environment options which affect driver behavior, somewhere
   // (client?). GCC_EXEC_PREFIX, LPATH, CC_PRINT_OPTIONS.
 
-  if (Optional<std::string> CompilerPathValue =
-          llvm::sys::Process::GetEnv("COMPILER_PATH")) {
-    StringRef CompilerPath = *CompilerPathValue;
-    while (!CompilerPath.empty()) {
-      std::pair<StringRef, StringRef> Split =
-          CompilerPath.split(llvm::sys::EnvPathSeparator);
-      PrefixDirs.push_back(std::string(Split.first));
-      CompilerPath = Split.second;
-    }
-  }
-
   // We look for the driver mode option early, because the mode can affect
   // how other options are parsed.
   ParseDriverMode(ClangExecutable, ArgList.slice(1));
@@ -1106,6 +1095,16 @@ Compilation *Driver::BuildCompilation(ArrayRef<const char *> ArgList) {
     A->claim();
     PrefixDirs.push_back(A->getValue(0));
   }
+  if (Optional<std::string> CompilerPathValue =
+          llvm::sys::Process::GetEnv("COMPILER_PATH")) {
+    StringRef CompilerPath = *CompilerPathValue;
+    while (!CompilerPath.empty()) {
+      std::pair<StringRef, StringRef> Split =
+          CompilerPath.split(llvm::sys::EnvPathSeparator);
+      PrefixDirs.push_back(std::string(Split.first));
+      CompilerPath = Split.second;
+    }
+  }
   if (const Arg *A = Args.getLastArg(options::OPT__sysroot_EQ))
     SysRoot = A->getValue();
   if (const Arg *A = Args.getLastArg(options::OPT__dyld_prefix_EQ))

From 1912ace968766dfeae8f40425b1931ff5371b725 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 1 Jul 2020 12:30:24 -0400
Subject: [PATCH 529/771] AMDGPU: Move handling of AGPR copies to a separate
 function

This is in preparation for fixing multiple problems with the way AGPR
copies are handled, but this change is NFC itself. First, it's relying
on recursively calling copyPhysReg, which is losing information
necessary to get correct super register handling.

Second, it's constructing a new RegScavenger and doing a O(N^2) walk
on every single sub-spill for every AGPR tuple copy. Third, it's using
the forward form of the scavenger, and not using the preferred
backwards scan.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 145 ++++++++++++++-----------
 1 file changed, 81 insertions(+), 64 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 04a808cad69e7..84e091caed108 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -533,6 +533,80 @@ static void reportIllegalCopy(const SIInstrInfo *TII, MachineBasicBlock &MBB,
     .addReg(SrcReg, getKillRegState(KillSrc));
 }
 
+/// Handle copying from SGPR to AGPR, or from AGPR to AGPR. It is not possible
+/// to directly copy, so an intermediate VGPR needs to be used.
+static void indirectCopyToAGPR(const SIInstrInfo &TII,
+                               MachineBasicBlock &MBB,
+                               MachineBasicBlock::iterator MI,
+                               const DebugLoc &DL, MCRegister DestReg,
+                               MCRegister SrcReg, bool KillSrc,
+                               RegScavenger &RS) {
+  const SIRegisterInfo &RI = TII.getRegisterInfo();
+
+  assert(AMDGPU::SReg_32RegClass.contains(SrcReg) ||
+         AMDGPU::AGPR_32RegClass.contains(SrcReg));
+
+  // First try to find defining accvgpr_write to avoid temporary registers.
+  for (auto Def = MI, E = MBB.begin(); Def != E; ) {
+    --Def;
+    if (!Def->definesRegister(SrcReg, &RI))
+      continue;
+    if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
+      break;
+
+    MachineOperand &DefOp = Def->getOperand(1);
+    assert(DefOp.isReg() || DefOp.isImm());
+
+    if (DefOp.isReg()) {
+      // Check that register source operand if not clobbered before MI.
+      // Immediate operands are always safe to propagate.
+      bool SafeToPropagate = true;
+      for (auto I = Def; I != MI && SafeToPropagate; ++I)
+        if (I->modifiesRegister(DefOp.getReg(), &RI))
+          SafeToPropagate = false;
+
+      if (!SafeToPropagate)
+        break;
+
+      DefOp.setIsKill(false);
+    }
+
+    BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
+      .add(DefOp);
+    return;
+  }
+
+  RS.enterBasicBlock(MBB);
+  RS.forward(MI);
+
+  // Ideally we want to have three registers for a long reg_sequence copy
+  // to hide 2 waitstates between v_mov_b32 and accvgpr_write.
+  unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
+                                             *MBB.getParent());
+
+  // Registers in the sequence are allocated contiguously so we can just
+  // use register number to pick one of three round-robin temps.
+  unsigned RegNo = DestReg % 3;
+  Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+  if (!Tmp)
+    report_fatal_error("Cannot scavenge VGPR to copy to AGPR");
+  RS.setRegUsed(Tmp);
+  // Only loop through if there are any free registers left, otherwise
+  // scavenger may report a fatal error without emergency spill slot
+  // or spill with the slot.
+  while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
+    Register Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
+    if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
+      break;
+    Tmp = Tmp2;
+    RS.setRegUsed(Tmp);
+  }
+
+  TII.copyPhysReg(MBB, MI, DL, Tmp, SrcReg, KillSrc);
+  BuildMI(MBB, MI, DL, TII.get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
+    .addReg(Tmp, RegState::Kill);
+}
+
 void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
                               MachineBasicBlock::iterator MI,
                               const DebugLoc &DL, MCRegister DestReg,
@@ -652,75 +726,18 @@ void SIInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
     return;
   }
 
-  if (RC == &AMDGPU::AGPR_32RegClass) {
-    assert(AMDGPU::VGPR_32RegClass.contains(SrcReg) ||
-           AMDGPU::SReg_32RegClass.contains(SrcReg) ||
-           AMDGPU::AGPR_32RegClass.contains(SrcReg));
-    if (!AMDGPU::VGPR_32RegClass.contains(SrcReg)) {
-      // First try to find defining accvgpr_write to avoid temporary registers.
-      for (auto Def = MI, E = MBB.begin(); Def != E; ) {
-        --Def;
-        if (!Def->definesRegister(SrcReg, &RI))
-          continue;
-        if (Def->getOpcode() != AMDGPU::V_ACCVGPR_WRITE_B32)
-          break;
-
-        MachineOperand &DefOp = Def->getOperand(1);
-        assert(DefOp.isReg() || DefOp.isImm());
-
-        if (DefOp.isReg()) {
-          // Check that register source operand if not clobbered before MI.
-          // Immediate operands are always safe to propagate.
-          bool SafeToPropagate = true;
-          for (auto I = Def; I != MI && SafeToPropagate; ++I)
-            if (I->modifiesRegister(DefOp.getReg(), &RI))
-              SafeToPropagate = false;
-
-          if (!SafeToPropagate)
-            break;
 
-          DefOp.setIsKill(false);
-        }
-
-        BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
-          .add(DefOp);
-        return;
-      }
-
-      RegScavenger RS;
-      RS.enterBasicBlock(MBB);
-      RS.forward(MI);
-
-      // Ideally we want to have three registers for a long reg_sequence copy
-      // to hide 2 waitstates between v_mov_b32 and accvgpr_write.
-      unsigned MaxVGPRs = RI.getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
-                                                 *MBB.getParent());
-
-      // Registers in the sequence are allocated contiguously so we can just
-      // use register number to pick one of three round-robin temps.
-      unsigned RegNo = DestReg % 3;
-      Register Tmp = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
-      if (!Tmp)
-        report_fatal_error("Cannot scavenge VGPR to copy to AGPR");
-      RS.setRegUsed(Tmp);
-      // Only loop through if there are any free registers left, otherwise
-      // scavenger may report a fatal error without emergency spill slot
-      // or spill with the slot.
-      while (RegNo-- && RS.FindUnusedReg(&AMDGPU::VGPR_32RegClass)) {
-        unsigned Tmp2 = RS.scavengeRegister(&AMDGPU::VGPR_32RegClass, 0);
-        if (!Tmp2 || RI.getHWRegIndex(Tmp2) >= MaxVGPRs)
-          break;
-        Tmp = Tmp2;
-        RS.setRegUsed(Tmp);
-      }
-      copyPhysReg(MBB, MI, DL, Tmp, SrcReg, KillSrc);
+  if (RC == &AMDGPU::AGPR_32RegClass) {
+    if (AMDGPU::VGPR_32RegClass.contains(SrcReg)) {
       BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
-        .addReg(Tmp, RegState::Kill);
+        .addReg(SrcReg, getKillRegState(KillSrc));
       return;
     }
 
-    BuildMI(MBB, MI, DL, get(AMDGPU::V_ACCVGPR_WRITE_B32), DestReg)
-      .addReg(SrcReg, getKillRegState(KillSrc));
+    // FIXME: Pass should maintain scavenger to avoid scan through the block on
+    // every AGPR spill.
+    RegScavenger RS;
+    indirectCopyToAGPR(*this, MBB, MI, DL, DestReg, SrcReg, KillSrc, RS);
     return;
   }
 

From b3417d80aed708d64e810ed9c5d0d0fb84b65715 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nadav256@gmail.com>
Date: Wed, 15 Jul 2020 22:36:32 -0700
Subject: [PATCH 530/771] [TableGen] Change std::vector to SmallVector

The size of VTList that is pushed into this container is usually 1, but
often 6 or 7. Change the vector to SmallVector to eliminate frequent
mallocs.  This happens hundreds of thousands of times in each tablegen
execution during the LLVM/clang build.

https://reviews.llvm.org/D83849
---
 llvm/utils/TableGen/CodeGenDAGPatterns.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/utils/TableGen/CodeGenDAGPatterns.h b/llvm/utils/TableGen/CodeGenDAGPatterns.h
index a3b84d76fde9e..5d8d6d346cd26 100644
--- a/llvm/utils/TableGen/CodeGenDAGPatterns.h
+++ b/llvm/utils/TableGen/CodeGenDAGPatterns.h
@@ -190,7 +190,7 @@ struct MachineValueTypeSet {
 
 struct TypeSetByHwMode : public InfoByHwMode<MachineValueTypeSet> {
   using SetType = MachineValueTypeSet;
-  std::vector<unsigned> AddrSpaces;
+  SmallVector<unsigned, 16> AddrSpaces;
 
   TypeSetByHwMode() = default;
   TypeSetByHwMode(const TypeSetByHwMode &VTS) = default;

From ecb2e5bcd7e616f62a8c61722ee8d4033c78e32e Mon Sep 17 00:00:00 2001
From: Thomas Lively <tlively@google.com>
Date: Thu, 16 Jul 2020 11:37:25 -0700
Subject: [PATCH 531/771] [WebAssembly] Implement v128.select

Although the SIMD spec proposal does not specifically include a
select instruction, the select instruction in MVP WebAssembly is
polymorphic over the selected types, so it is able to work on v128
values when they are enabled. This patch introduces a new variant of
the select instruction for each legal vector type. Additional ISel
patterns are adapted from the SELECT_I32 and SELECT_I64 patterns.

Depends on D83736.

Differential Revision: https://reviews.llvm.org/D83737
---
 .../WebAssembly/WebAssemblyISelLowering.cpp   |   8 +-
 .../WebAssembly/WebAssemblyInstrSIMD.td       |  31 ++++
 llvm/test/CodeGen/WebAssembly/simd-select.ll  | 160 +++++-------------
 3 files changed, 72 insertions(+), 127 deletions(-)

diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 4962f0e9dacb3..71b173d769081 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -157,11 +157,9 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     setOperationAction(ISD::MUL, MVT::v16i8, Expand);
 
     // There is no vector conditional select instruction
-    // TODO: Implement SELECT_V128
-    for (auto Op : {ISD::SELECT_CC, ISD::SELECT})
-      for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
-                     MVT::v2f64})
-        setOperationAction(Op, T, Expand);
+    for (auto T : {MVT::v16i8, MVT::v8i16, MVT::v4i32, MVT::v4f32, MVT::v2i64,
+                   MVT::v2f64})
+      setOperationAction(ISD::SELECT_CC, T, Expand);
 
     // Expand integer operations supported for scalars but not SIMD
     for (auto Op : {ISD::CTLZ, ISD::CTTZ, ISD::CTPOP, ISD::SDIV, ISD::UDIV,
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 6463b8c421b90..b603701ab930e 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -584,6 +584,37 @@ foreach types = [[v16i8, v16i8], [v8i16, v8i16], [v4i32, v4i32], [v2i64, v2i64],
               V128:$v1, V128:$v2, V128:$c
             )>;
 
+// MVP select on v128 values
+foreach vec_t = [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64] in {
+defm SELECT_#vec_t : I<(outs V128:$dst), (ins V128:$lhs, V128:$rhs, I32:$cond),
+                    (outs), (ins),
+                    [(set V128:$dst,
+                       (select I32:$cond,
+                         (vec_t V128:$lhs), (vec_t V128:$rhs)
+                       )
+                    )],
+                    "v128.select\t$dst, $lhs, $rhs, $cond",
+                    "v128.select", 0x1b>;
+
+// ISD::SELECT requires its operand to conform to getBooleanContents, but
+// WebAssembly's select interprets any non-zero value as true, so we can fold
+// a setne with 0 into a select.
+def : Pat<(select
+            (i32 (setne I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs)
+          ),
+          (!cast<Instruction>("SELECT_"#vec_t)
+            V128:$lhs, V128:$rhs, I32:$cond
+          )>;
+
+// And again, this time with seteq instead of setne and the arms reversed.
+def : Pat<(select
+            (i32 (seteq I32:$cond, 0)), (vec_t V128:$lhs), (vec_t V128:$rhs)
+          ),
+          (!cast<Instruction>("SELECT_"#vec_t)
+            V128:$rhs, V128:$lhs, I32:$cond
+          )>;
+} // foreach vec_t
+
 //===----------------------------------------------------------------------===//
 // Integer unary arithmetic
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/WebAssembly/simd-select.ll b/llvm/test/CodeGen/WebAssembly/simd-select.ll
index ba9de08655871..be36f94cf5a6d 100644
--- a/llvm/test/CodeGen/WebAssembly/simd-select.ll
+++ b/llvm/test/CodeGen/WebAssembly/simd-select.ll
@@ -49,12 +49,8 @@ define <16 x i8> @select_v16i8(i1 zeroext %c, <16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const -1
-; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i8x16.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <16 x i8> %x, <16 x i8> %y
   ret <16 x i8> %res
@@ -67,10 +63,9 @@ define <16 x i8> @select_cmp_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 31
-; CHECK-NEXT:    i32.shr_s
-; CHECK-NEXT:    i8x16.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.lt_s
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <16 x i8> %x, <16 x i8> %y
@@ -83,12 +78,8 @@ define <16 x i8> @select_ne_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const -1
-; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i8x16.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <16 x i8> %x, <16 x i8> %y
@@ -99,14 +90,10 @@ define <16 x i8> @select_eq_v16i8(i32 %i, <16 x i8> %x, <16 x i8> %y) {
 ; CHECK-LABEL: select_eq_v16i8:
 ; CHECK:         .functype select_eq_v16i8 (i32, v128, v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i8x16.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <16 x i8> %x, <16 x i8> %y
@@ -153,12 +140,8 @@ define <8 x i16> @select_v8i16(i1 zeroext %c, <8 x i16> %x, <8 x i16> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const -1
-; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i16x8.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <8 x i16> %x, <8 x i16> %y
   ret <8 x i16> %res
@@ -171,10 +154,9 @@ define <8 x i16> @select_cmp_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 31
-; CHECK-NEXT:    i32.shr_s
-; CHECK-NEXT:    i16x8.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.lt_s
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <8 x i16> %x, <8 x i16> %y
@@ -187,12 +169,8 @@ define <8 x i16> @select_ne_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const -1
-; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i16x8.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <8 x i16> %x, <8 x i16> %y
@@ -203,14 +181,10 @@ define <8 x i16> @select_eq_v8i16(i32 %i, <8 x i16> %x, <8 x i16> %y) {
 ; CHECK-LABEL: select_eq_v8i16:
 ; CHECK:         .functype select_eq_v8i16 (i32, v128, v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i16x8.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <8 x i16> %x, <8 x i16> %y
@@ -257,12 +231,8 @@ define <4 x i32> @select_v4i32(i1 zeroext %c, <4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const -1
-; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <4 x i32> %x, <4 x i32> %y
   ret <4 x i32> %res
@@ -275,10 +245,9 @@ define <4 x i32> @select_cmp_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 31
-; CHECK-NEXT:    i32.shr_s
-; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.lt_s
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <4 x i32> %x, <4 x i32> %y
@@ -291,12 +260,8 @@ define <4 x i32> @select_ne_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const -1
-; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <4 x i32> %x, <4 x i32> %y
@@ -307,14 +272,10 @@ define <4 x i32> @select_eq_v4i32(i32 %i, <4 x i32> %x, <4 x i32> %y) {
 ; CHECK-LABEL: select_eq_v4i32:
 ; CHECK:         .functype select_eq_v4i32 (i32, v128, v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <4 x i32> %x, <4 x i32> %y
@@ -376,12 +337,8 @@ define <2 x i64> @select_v2i64(i1 zeroext %c, <2 x i64> %x, <2 x i64> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <2 x i64> %x, <2 x i64> %y
   ret <2 x i64> %res
@@ -393,14 +350,10 @@ define <2 x i64> @select_cmp_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    i32.lt_s
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <2 x i64> %x, <2 x i64> %y
@@ -413,12 +366,8 @@ define <2 x i64> @select_ne_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <2 x i64> %x, <2 x i64> %y
@@ -429,14 +378,10 @@ define <2 x i64> @select_eq_v2i64(i32 %i, <2 x i64> %x, <2 x i64> %y) {
 ; CHECK-LABEL: select_eq_v2i64:
 ; CHECK:         .functype select_eq_v2i64 (i32, v128, v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <2 x i64> %x, <2 x i64> %y
@@ -483,12 +428,8 @@ define <4 x float> @select_v4f32(i1 zeroext %c, <4 x float> %x, <4 x float> %y)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const -1
-; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <4 x float> %x, <4 x float> %y
   ret <4 x float> %res
@@ -501,10 +442,9 @@ define <4 x float> @select_cmp_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.const 31
-; CHECK-NEXT:    i32.shr_s
-; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    i32.const 0
+; CHECK-NEXT:    i32.lt_s
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <4 x float> %x, <4 x float> %y
@@ -517,12 +457,8 @@ define <4 x float> @select_ne_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const -1
-; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <4 x float> %x, <4 x float> %y
@@ -533,14 +469,10 @@ define <4 x float> @select_eq_v4f32(i32 %i, <4 x float> %x, <4 x float> %y) {
 ; CHECK-LABEL: select_eq_v4f32:
 ; CHECK:         .functype select_eq_v4f32 (i32, v128, v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i32.const 0
-; CHECK-NEXT:    i32.const -1
+; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i32.select
-; CHECK-NEXT:    i32x4.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <4 x float> %x, <4 x float> %y
@@ -587,12 +519,8 @@ define <2 x double> @select_v2f64(i1 zeroext %c, <2 x double> %x, <2 x double> %
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %res = select i1 %c, <2 x double> %x, <2 x double> %y
   ret <2 x double> %res
@@ -604,14 +532,10 @@ define <2 x double> @select_cmp_v2f64(i32 %i, <2 x double> %x, <2 x double> %y)
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 0
 ; CHECK-NEXT:    i32.const 0
 ; CHECK-NEXT:    i32.lt_s
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp slt i32 %i, 0
   %res = select i1 %c, <2 x double> %x, <2 x double> %y
@@ -624,12 +548,8 @@ define <2 x double> @select_ne_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) {
 ; CHECK-NEXT:  # %bb.0:
 ; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const -1
-; CHECK-NEXT:    i64.const 0
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp ne i32 %i, 0
   %res = select i1 %c, <2 x double> %x, <2 x double> %y
@@ -640,14 +560,10 @@ define <2 x double> @select_eq_v2f64(i32 %i, <2 x double> %x, <2 x double> %y) {
 ; CHECK-LABEL: select_eq_v2f64:
 ; CHECK:         .functype select_eq_v2f64 (i32, v128, v128) -> (v128)
 ; CHECK-NEXT:  # %bb.0:
-; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 2
-; CHECK-NEXT:    i64.const 0
-; CHECK-NEXT:    i64.const -1
+; CHECK-NEXT:    local.get 1
 ; CHECK-NEXT:    local.get 0
-; CHECK-NEXT:    i64.select
-; CHECK-NEXT:    i64x2.splat
-; CHECK-NEXT:    v128.bitselect
+; CHECK-NEXT:    v128.select
 ; CHECK-NEXT:    # fallthrough-return
   %c = icmp eq i32 %i, 0
   %res = select i1 %c, <2 x double> %x, <2 x double> %y

From a394aa1b974bd320242569e5bed53284010cfa63 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nadav256@gmail.com>
Date: Wed, 15 Jul 2020 17:12:48 -0700
Subject: [PATCH 532/771] [LiveVariables] Replace std::vector with SmallVector.

Replace std::vector with SmallVector to reduce the number of mallocs.
This method is frequently executed, and the number of elements in the
vector is typically small.

https://reviews.llvm.org/D83920
---
 llvm/include/llvm/CodeGen/LiveVariables.h | 5 +++--
 llvm/lib/CodeGen/LiveVariables.cpp        | 9 ++++-----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/LiveVariables.h b/llvm/include/llvm/CodeGen/LiveVariables.h
index efb0fa85a0fef..815f779e4f426 100644
--- a/llvm/include/llvm/CodeGen/LiveVariables.h
+++ b/llvm/include/llvm/CodeGen/LiveVariables.h
@@ -274,9 +274,10 @@ class LiveVariables : public MachineFunctionPass {
 
   void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock,
                                MachineBasicBlock *BB);
-  void MarkVirtRegAliveInBlock(VarInfo& VRInfo, MachineBasicBlock* DefBlock,
+  void MarkVirtRegAliveInBlock(VarInfo &VRInfo, MachineBasicBlock *DefBlock,
                                MachineBasicBlock *BB,
-                               std::vector<MachineBasicBlock*> &WorkList);
+                               SmallVectorImpl<MachineBasicBlock *> &WorkList);
+
   void HandleVirtRegDef(unsigned reg, MachineInstr &MI);
   void HandleVirtRegUse(unsigned reg, MachineBasicBlock *MBB, MachineInstr &MI);
 
diff --git a/llvm/lib/CodeGen/LiveVariables.cpp b/llvm/lib/CodeGen/LiveVariables.cpp
index 6610491dd111d..4fba8f3842550 100644
--- a/llvm/lib/CodeGen/LiveVariables.cpp
+++ b/llvm/lib/CodeGen/LiveVariables.cpp
@@ -89,10 +89,9 @@ LiveVariables::VarInfo &LiveVariables::getVarInfo(unsigned RegIdx) {
   return VirtRegInfo[RegIdx];
 }
 
-void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
-                                            MachineBasicBlock *DefBlock,
-                                            MachineBasicBlock *MBB,
-                                    std::vector<MachineBasicBlock*> &WorkList) {
+void LiveVariables::MarkVirtRegAliveInBlock(
+    VarInfo &VRInfo, MachineBasicBlock *DefBlock, MachineBasicBlock *MBB,
+    SmallVectorImpl<MachineBasicBlock *> &WorkList) {
   unsigned BBNum = MBB->getNumber();
 
   // Check to see if this basic block is one of the killing blocks.  If so,
@@ -118,7 +117,7 @@ void LiveVariables::MarkVirtRegAliveInBlock(VarInfo& VRInfo,
 void LiveVariables::MarkVirtRegAliveInBlock(VarInfo &VRInfo,
                                             MachineBasicBlock *DefBlock,
                                             MachineBasicBlock *MBB) {
-  std::vector<MachineBasicBlock*> WorkList;
+  SmallVector<MachineBasicBlock *, 16> WorkList;
   MarkVirtRegAliveInBlock(VRInfo, DefBlock, MBB, WorkList);
 
   while (!WorkList.empty()) {

From 294d1eae75bf8867821a4491f0d67445227f8470 Mon Sep 17 00:00:00 2001
From: Zakk Chen <zakk.chen@sifive.com>
Date: Thu, 16 Jul 2020 10:32:01 -0700
Subject: [PATCH 533/771] [RISCV] Add support for -mcpu option.

Summary:
1. gcc uses `-march` and `-mtune` flag to chose arch and
pipeline model, but clang does not have `-mtune` flag,
we uses `-mcpu` to chose both infos.
2. Add SiFive e31 and u54 cpu which have default march
and pipeline model.
3. Specific `-mcpu` with rocket-rv[32|64] would select
pipeline model only, and use the driver's arch choosing
logic to get default arch.

Reviewers: lenary, asb, evandro, HsiangKai

Reviewed By: lenary, asb, evandro

Tags: #llvm, #clang

Differential Revision: https://reviews.llvm.org/D71124
---
 clang/lib/Basic/Targets/RISCV.cpp             | 21 +++++
 clang/lib/Basic/Targets/RISCV.h               | 15 ++-
 clang/lib/Driver/ToolChains/Arch/RISCV.cpp    | 93 +++++++++++++------
 clang/lib/Driver/ToolChains/CommonArgs.cpp    |  5 +
 clang/test/Driver/riscv-arch.c                |  4 +-
 clang/test/Driver/riscv-cpus.c                | 38 ++++++++
 clang/test/Misc/target-invalid-cpu-note.c     |  7 ++
 .../llvm/Support/RISCVTargetParser.def        | 13 +++
 llvm/include/llvm/Support/TargetParser.h      | 26 ++++++
 llvm/lib/Support/TargetParser.cpp             | 64 ++++++++++++-
 llvm/lib/Target/RISCV/RISCV.td                | 10 ++
 11 files changed, 262 insertions(+), 34 deletions(-)
 create mode 100644 clang/test/Driver/riscv-cpus.c
 create mode 100644 llvm/include/llvm/Support/RISCVTargetParser.def

diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 522776437cd21..4ba703c8dd1aa 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -13,6 +13,7 @@
 #include "RISCV.h"
 #include "clang/Basic/MacroBuilder.h"
 #include "llvm/ADT/StringSwitch.h"
+#include "llvm/Support/TargetParser.h"
 
 using namespace clang;
 using namespace clang::targets;
@@ -166,3 +167,23 @@ bool RISCVTargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
 
   return true;
 }
+
+bool RISCV32TargetInfo::isValidCPUName(StringRef Name) const {
+  return llvm::RISCV::checkCPUKind(llvm::RISCV::parseCPUKind(Name),
+                                   /*Is64Bit=*/false);
+}
+
+void RISCV32TargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  llvm::RISCV::fillValidCPUArchList(Values, false);
+}
+
+bool RISCV64TargetInfo::isValidCPUName(StringRef Name) const {
+  return llvm::RISCV::checkCPUKind(llvm::RISCV::parseCPUKind(Name),
+                                   /*Is64Bit=*/true);
+}
+
+void RISCV64TargetInfo::fillValidCPUList(
+    SmallVectorImpl<StringRef> &Values) const {
+  llvm::RISCV::fillValidCPUArchList(Values, true);
+}
diff --git a/clang/lib/Basic/Targets/RISCV.h b/clang/lib/Basic/Targets/RISCV.h
index 73652b409e9ce..6db526da4c59f 100644
--- a/clang/lib/Basic/Targets/RISCV.h
+++ b/clang/lib/Basic/Targets/RISCV.h
@@ -24,7 +24,7 @@ namespace targets {
 // RISC-V Target
 class RISCVTargetInfo : public TargetInfo {
 protected:
-  std::string ABI;
+  std::string ABI, CPU;
   bool HasM;
   bool HasA;
   bool HasF;
@@ -44,6 +44,13 @@ class RISCVTargetInfo : public TargetInfo {
     WIntType = UnsignedInt;
   }
 
+  bool setCPU(const std::string &Name) override {
+    if (!isValidCPUName(Name))
+      return false;
+    CPU = Name;
+    return true;
+  }
+
   StringRef getABI() const override { return ABI; }
   void getTargetDefines(const LangOptions &Opts,
                         MacroBuilder &Builder) const override;
@@ -97,6 +104,9 @@ class LLVM_LIBRARY_VISIBILITY RISCV32TargetInfo : public RISCVTargetInfo {
     return false;
   }
 
+  bool isValidCPUName(StringRef Name) const override;
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
   void setMaxAtomicWidth() override {
     MaxAtomicPromoteWidth = 128;
 
@@ -121,6 +131,9 @@ class LLVM_LIBRARY_VISIBILITY RISCV64TargetInfo : public RISCVTargetInfo {
     return false;
   }
 
+  bool isValidCPUName(StringRef Name) const override;
+  void fillValidCPUList(SmallVectorImpl<StringRef> &Values) const override;
+
   void setMaxAtomicWidth() override {
     MaxAtomicPromoteWidth = 128;
 
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index 80d12e5aa8daa..be3f0a07b5763 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -446,6 +446,19 @@ static bool getArchFeatures(const Driver &D, StringRef MArch,
   return true;
 }
 
+// Get features except standard extension feature
+void getRISCFeaturesFromMcpu(const Driver &D, const llvm::Triple &Triple,
+                             const llvm::opt::ArgList &Args,
+                             const llvm::opt::Arg *A, StringRef Mcpu,
+                             std::vector<StringRef> &Features) {
+  bool Is64Bit = (Triple.getArch() == llvm::Triple::riscv64);
+  llvm::RISCV::CPUKind CPUKind = llvm::RISCV::parseCPUKind(Mcpu);
+  if (!llvm::RISCV::checkCPUKind(CPUKind, Is64Bit) ||
+      !llvm::RISCV::getCPUFeaturesExceptStdExt(CPUKind, Features)) {
+    D.Diag(clang::diag::err_drv_clang_unsupported) << A->getAsString(Args);
+  }
+}
+
 void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple,
                                    const ArgList &Args,
                                    std::vector<StringRef> &Features) {
@@ -454,6 +467,11 @@ void riscv::getRISCVTargetFeatures(const Driver &D, const llvm::Triple &Triple,
   if (!getArchFeatures(D, MArch, Features, Args))
     return;
 
+  // If users give march and mcpu, get std extension feature from MArch
+  // and other features (ex. mirco architecture feature) from mcpu
+  if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
+    getRISCFeaturesFromMcpu(D, Triple, Args, A, A->getValue(), Features);
+
   // Handle features corresponding to "-ffixed-X" options
   if (Args.hasArg(options::OPT_ffixed_x1))
     Features.push_back("+reserve-x1");
@@ -543,11 +561,9 @@ StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) {
 
   // GCC's logic around choosing a default `-mabi=` is complex. If GCC is not
   // configured using `--with-abi=`, then the logic for the default choice is
-  // defined in config.gcc. This function is based on the logic in GCC 9.2.0. We
-  // deviate from GCC's default only on baremetal targets (UnknownOS) where
-  // neither `-march` nor `-mabi` is specified.
+  // defined in config.gcc. This function is based on the logic in GCC 9.2.0.
   //
-  // The logic uses the following, in order:
+  // The logic used in GCC 9.2.0 is the following, in order:
   // 1. Explicit choices using `--with-abi=`
   // 2. A default based on `--with-arch=`, if provided
   // 3. A default based on the target triple's arch
@@ -556,38 +572,40 @@ StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) {
   //
   // Clang does not have `--with-arch=` or `--with-abi=`, so we use `-march=`
   // and `-mabi=` respectively instead.
+  //
+  // In order to make chosing logic more clear, Clang uses the following logic,
+  // in order:
+  // 1. Explicit choices using `-mabi=`
+  // 2. A default based on the architecture as determined by getRISCVArch
+  // 3. Choose a default based on the triple
 
   // 1. If `-mabi=` is specified, use it.
   if (const Arg *A = Args.getLastArg(options::OPT_mabi_EQ))
     return A->getValue();
 
-  // 2. Choose a default based on `-march=`
+  // 2. Choose a default based on the target architecture.
   //
   // rv32g | rv32*d -> ilp32d
   // rv32e -> ilp32e
   // rv32* -> ilp32
   // rv64g | rv64*d -> lp64d
   // rv64* -> lp64
-  if (const Arg *A = Args.getLastArg(options::OPT_march_EQ)) {
-    StringRef MArch = A->getValue();
-
-    if (MArch.startswith_lower("rv32")) {
-      // FIXME: parse `March` to find `D` extension properly
-      if (MArch.substr(4).contains_lower("d") ||
-          MArch.startswith_lower("rv32g"))
-        return "ilp32d";
-      else if (MArch.startswith_lower("rv32e"))
-        return "ilp32e";
-      else
-        return "ilp32";
-    } else if (MArch.startswith_lower("rv64")) {
-      // FIXME: parse `March` to find `D` extension properly
-      if (MArch.substr(4).contains_lower("d") ||
-          MArch.startswith_lower("rv64g"))
-        return "lp64d";
-      else
-        return "lp64";
-    }
+  StringRef MArch = getRISCVArch(Args, Triple);
+
+  if (MArch.startswith_lower("rv32")) {
+    // FIXME: parse `March` to find `D` extension properly
+    if (MArch.substr(4).contains_lower("d") || MArch.startswith_lower("rv32g"))
+      return "ilp32d";
+    else if (MArch.startswith_lower("rv32e"))
+      return "ilp32e";
+    else
+      return "ilp32";
+  } else if (MArch.startswith_lower("rv64")) {
+    // FIXME: parse `March` to find `D` extension properly
+    if (MArch.substr(4).contains_lower("d") || MArch.startswith_lower("rv64g"))
+      return "lp64d";
+    else
+      return "lp64";
   }
 
   // 3. Choose a default based on the triple
@@ -617,10 +635,11 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args,
   // GCC's logic around choosing a default `-march=` is complex. If GCC is not
   // configured using `--with-arch=`, then the logic for the default choice is
   // defined in config.gcc. This function is based on the logic in GCC 9.2.0. We
-  // deviate from GCC's default only on baremetal targets (UnknownOS) where
-  // neither `-march` nor `-mabi` is specified.
+  // deviate from GCC's default on additional `-mcpu` option (GCC does not
+  // support `-mcpu`) and baremetal targets (UnknownOS) where neither `-march`
+  // nor `-mabi` is specified.
   //
-  // The logic uses the following, in order:
+  // The logic used in GCC 9.2.0 is the following, in order:
   // 1. Explicit choices using `--with-arch=`
   // 2. A default based on `--with-abi=`, if provided
   // 3. A default based on the target triple's arch
@@ -630,6 +649,12 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args,
   // Clang does not have `--with-arch=` or `--with-abi=`, so we use `-march=`
   // and `-mabi=` respectively instead.
   //
+  // Clang uses the following logic, in order:
+  // 1. Explicit choices using `-march=`
+  // 2. Based on `-mcpu` if the target CPU has a default ISA string
+  // 3. A default based on `-mabi`, if provided
+  // 4. A default based on the target triple's arch
+  //
   // Clang does not yet support MULTILIB_REUSE, so we use `rv{XLEN}imafdc`
   // instead of `rv{XLEN}gc` though they are (currently) equivalent.
 
@@ -637,7 +662,15 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args,
   if (const Arg *A = Args.getLastArg(options::OPT_march_EQ))
     return A->getValue();
 
-  // 2. Choose a default based on `-mabi=`
+  // 2. Get march (isa string) based on `-mcpu=`
+  if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) {
+    StringRef MArch = llvm::RISCV::getMArchFromMcpu(A->getValue());
+    // Bypass if target cpu's default march is empty.
+    if (MArch != "")
+      return MArch;
+  }
+
+  // 3. Choose a default based on `-mabi=`
   //
   // ilp32e -> rv32e
   // ilp32 | ilp32f | ilp32d -> rv32imafdc
@@ -653,7 +686,7 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args,
       return "rv64imafdc";
   }
 
-  // 3. Choose a default based on the triple
+  // 4. Choose a default based on the triple
   //
   // We deviate from GCC's defaults here:
   // - On `riscv{XLEN}-unknown-elf` we default to `rv{XLEN}imac`
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 1cac5a0822a4b..6b6e276b8ce79 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -333,6 +333,11 @@ std::string tools::getCPUName(const ArgList &Args, const llvm::Triple &T,
 
     return TargetCPUName;
   }
+  case llvm::Triple::riscv32:
+  case llvm::Triple::riscv64:
+    if (const Arg *A = Args.getLastArg(options::OPT_mcpu_EQ))
+      return A->getValue();
+    return "";
 
   case llvm::Triple::bpfel:
   case llvm::Triple::bpfeb:
diff --git a/clang/test/Driver/riscv-arch.c b/clang/test/Driver/riscv-arch.c
index 13d0748a967a0..725201a77ba7c 100644
--- a/clang/test/Driver/riscv-arch.c
+++ b/clang/test/Driver/riscv-arch.c
@@ -156,9 +156,9 @@
 // RV32-LOWER: error: invalid arch name 'rv32imC',
 // RV32-LOWER: string must be lowercase
 
-// RUN: %clang -target riscv32-unknown-elf -march=rv32 -### %s \
+// RUN: %clang -target riscv32-unknown-elf -march=unknown -### %s \
 // RUN: -fsyntax-only 2>&1 | FileCheck -check-prefix=RV32-STR %s
-// RV32-STR: error: invalid arch name 'rv32',
+// RV32-STR: error: invalid arch name 'unknown',
 // RV32-STR: string must begin with rv32{i,e,g} or rv64{i,g}
 
 // RUN: %clang -target riscv32-unknown-elf -march=rv32q -### %s \
diff --git a/clang/test/Driver/riscv-cpus.c b/clang/test/Driver/riscv-cpus.c
new file mode 100644
index 0000000000000..c6281a0b64335
--- /dev/null
+++ b/clang/test/Driver/riscv-cpus.c
@@ -0,0 +1,38 @@
+// Check target CPUs are correctly passed.
+
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=rocket-rv32 | FileCheck -check-prefix=MCPU-ROCKETCHIP32 %s
+// MCPU-ROCKETCHIP32: "-nostdsysteminc" "-target-cpu" "rocket-rv32"
+
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=rocket-rv64 | FileCheck -check-prefix=MCPU-ROCKETCHIP64 %s
+// MCPU-ROCKETCHIP64: "-nostdsysteminc" "-target-cpu" "rocket-rv64"
+// MCPU-ROCKETCHIP64: "-target-feature" "+64bit"
+
+// mcpu with default march
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=sifive-u54 | FileCheck -check-prefix=MCPU-SIFIVE-U54 %s
+// MCPU-SIFIVE-U54: "-nostdsysteminc" "-target-cpu" "sifive-u54"
+// MCPU-SIFIVE-U54: "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d"
+// MCPU-SIFIVE-U54: "-target-feature" "+c" "-target-feature" "+64bit"
+// MCPU-SIFIVE-U54: "-target-abi" "lp64d"
+
+// mcpu with mabi option
+// RUN: %clang -target riscv64 -### -c %s 2>&1 -mcpu=sifive-u54 -mabi=lp64 | FileCheck -check-prefix=MCPU-ABI-SIFIVE-U54 %s
+// MCPU-ABI-SIFIVE-U54: "-nostdsysteminc" "-target-cpu" "sifive-u54"
+// MCPU-ABI-SIFIVE-U54: "-target-feature" "+m" "-target-feature" "+a" "-target-feature" "+f" "-target-feature" "+d"
+// MCPU-ABI-SIFIVE-U54: "-target-feature" "+c" "-target-feature" "+64bit"
+// MCPU-ABI-SIFIVE-U54: "-target-abi" "lp64"
+
+// march overwirte mcpu's default march
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=sifive-e31 -march=rv32imc | FileCheck -check-prefix=MCPU-MARCH %s
+// MCPU-MARCH: "-nostdsysteminc" "-target-cpu" "sifive-e31" "-target-feature" "+m" "-target-feature" "+c"
+// MCPU-MARCH: "-target-abi" "ilp32"
+
+// Check failed cases
+
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=generic-rv321 | FileCheck -check-prefix=FAIL-MCPU-NAME %s
+// FAIL-MCPU-NAME: error: the clang compiler does not support '-mcpu=generic-rv321'
+
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=generic-rv32 -march=rv64i | FileCheck -check-prefix=MISMATCH-ARCH %s
+// MISMATCH-ARCH: error: the clang compiler does not support '-mcpu=generic-rv32'
+
+// RUN: %clang -target riscv32 -### -c %s 2>&1 -mcpu=generic-rv64 | FileCheck -check-prefix=MISMATCH-MCPU %s
+// MISMATCH-MCPU: error: the clang compiler does not support '-mcpu=generic-rv64'
diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c
index 5c571fb458ec5..3a376a7caab46 100644
--- a/clang/test/Misc/target-invalid-cpu-note.c
+++ b/clang/test/Misc/target-invalid-cpu-note.c
@@ -156,3 +156,10 @@
 // AVR-SAME: ttiny4, attiny5, attiny9, attiny10, attiny20, attiny40, attiny102,
 // AVR-SAME: attiny104
 
+// RUN: not %clang_cc1 -triple riscv32 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV32
+// RISCV32: error: unknown target CPU 'not-a-cpu'
+// RISCV32: note: valid target CPU values are: generic-rv32, rocket-rv32, sifive-e31
+
+// RUN: not %clang_cc1 -triple riscv64 -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix RISCV64
+// RISCV64: error: unknown target CPU 'not-a-cpu'
+// RISCV64: note: valid target CPU values are: generic-rv64, rocket-rv64, sifive-u54
diff --git a/llvm/include/llvm/Support/RISCVTargetParser.def b/llvm/include/llvm/Support/RISCVTargetParser.def
new file mode 100644
index 0000000000000..28de6cd40132e
--- /dev/null
+++ b/llvm/include/llvm/Support/RISCVTargetParser.def
@@ -0,0 +1,13 @@
+#ifndef PROC
+#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH)
+#endif
+
+PROC(INVALID, {"invalid"}, FK_INVALID, {""})
+PROC(GENERIC_RV32, {"generic-rv32"}, FK_NONE, {""})
+PROC(GENERIC_RV64, {"generic-rv64"}, FK_64BIT, {""})
+PROC(ROCKET_RV32, {"rocket-rv32"}, FK_NONE, {""})
+PROC(ROCKET_RV64, {"rocket-rv64"}, FK_64BIT, {""})
+PROC(SIFIVE_E31, {"sifive-e31"}, FK_NONE, {"rv32imac"})
+PROC(SIFIVE_U54, {"sifive-u54"}, FK_64BIT, {"rv64gc"})
+
+#undef PROC
diff --git a/llvm/include/llvm/Support/TargetParser.h b/llvm/include/llvm/Support/TargetParser.h
index a0bd88c153b6e..f521d8f836b4b 100644
--- a/llvm/include/llvm/Support/TargetParser.h
+++ b/llvm/include/llvm/Support/TargetParser.h
@@ -130,6 +130,32 @@ IsaVersion getIsaVersion(StringRef GPU);
 
 } // namespace AMDGPU
 
+namespace RISCV {
+
+enum CPUKind : unsigned {
+#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) CK_##ENUM,
+#include "RISCVTargetParser.def"
+};
+
+enum FeatureKind : unsigned {
+  FK_INVALID = 0,
+  FK_NONE = 1,
+  FK_STDEXTM = 1 << 2,
+  FK_STDEXTA = 1 << 3,
+  FK_STDEXTF = 1 << 4,
+  FK_STDEXTD = 1 << 5,
+  FK_STDEXTC = 1 << 6,
+  FK_64BIT = 1 << 7,
+};
+
+bool checkCPUKind(CPUKind Kind, bool IsRV64);
+CPUKind parseCPUKind(StringRef CPU);
+StringRef getMArchFromMcpu(StringRef CPU);
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64);
+bool getCPUFeaturesExceptStdExt(CPUKind Kind, std::vector<StringRef> &Features);
+
+} // namespace RISCV
+
 } // namespace llvm
 
 #endif
diff --git a/llvm/lib/Support/TargetParser.cpp b/llvm/lib/Support/TargetParser.cpp
index be9b541237c74..031384ebaa91c 100644
--- a/llvm/lib/Support/TargetParser.cpp
+++ b/llvm/lib/Support/TargetParser.cpp
@@ -11,11 +11,12 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/Support/ARMBuildAttributes.h"
 #include "llvm/Support/TargetParser.h"
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/ARMBuildAttributes.h"
 
 using namespace llvm;
 using namespace AMDGPU;
@@ -208,3 +209,64 @@ AMDGPU::IsaVersion AMDGPU::getIsaVersion(StringRef GPU) {
   default:         return {0, 0, 0};
   }
 }
+
+namespace llvm {
+namespace RISCV {
+
+struct CPUInfo {
+  StringLiteral Name;
+  CPUKind Kind;
+  unsigned Features;
+  StringLiteral DefaultMarch;
+  bool is64Bit() const { return (Features & FK_64BIT); }
+};
+
+constexpr CPUInfo RISCVCPUInfo[] = {
+#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH)                              \
+  {NAME, CK_##ENUM, FEATURES, DEFAULT_MARCH},
+#include "llvm/Support/RISCVTargetParser.def"
+};
+
+bool checkCPUKind(CPUKind Kind, bool IsRV64) {
+  if (Kind == CK_INVALID)
+    return false;
+  return RISCVCPUInfo[static_cast<unsigned>(Kind)].is64Bit() == IsRV64;
+}
+
+CPUKind parseCPUKind(StringRef CPU) {
+  return llvm::StringSwitch<CPUKind>(CPU)
+#define PROC(ENUM, NAME, FEATURES, DEFAULT_MARCH) .Case(NAME, CK_##ENUM)
+#include "llvm/Support/RISCVTargetParser.def"
+      .Default(CK_INVALID);
+}
+
+StringRef getMArchFromMcpu(StringRef CPU) {
+  CPUKind Kind = parseCPUKind(CPU);
+  return RISCVCPUInfo[static_cast<unsigned>(Kind)].DefaultMarch;
+}
+
+void fillValidCPUArchList(SmallVectorImpl<StringRef> &Values, bool IsRV64) {
+  for (const auto &C : RISCVCPUInfo) {
+    if (C.Kind != CK_INVALID && IsRV64 == C.is64Bit())
+      Values.emplace_back(C.Name);
+  }
+}
+
+// Get all features except standard extension feature
+bool getCPUFeaturesExceptStdExt(CPUKind Kind,
+                                std::vector<StringRef> &Features) {
+  unsigned CPUFeatures = RISCVCPUInfo[static_cast<unsigned>(Kind)].Features;
+
+  if (CPUFeatures == FK_INVALID)
+    return false;
+
+  if (CPUFeatures & FK_64BIT)
+    Features.push_back("+64bit");
+  else
+    Features.push_back("-64bit");
+
+  return true;
+}
+
+} // namespace RISCV
+} // namespace llvm
diff --git a/llvm/lib/Target/RISCV/RISCV.td b/llvm/lib/Target/RISCV/RISCV.td
index f0583f6919361..57e7c41c42711 100644
--- a/llvm/lib/Target/RISCV/RISCV.td
+++ b/llvm/lib/Target/RISCV/RISCV.td
@@ -215,6 +215,16 @@ def : ProcessorModel<"rocket-rv32", Rocket32Model, []>;
 
 def : ProcessorModel<"rocket-rv64", Rocket64Model, [Feature64Bit]>;
 
+def : ProcessorModel<"sifive-e31", Rocket32Model, [FeatureStdExtM,
+                                                   FeatureStdExtA,
+                                                   FeatureStdExtC]>;
+
+def : ProcessorModel<"sifive-u54", Rocket64Model, [Feature64Bit,
+                                                   FeatureStdExtM,
+                                                   FeatureStdExtA,
+                                                   FeatureStdExtF,
+                                                   FeatureStdExtD,
+                                                   FeatureStdExtC]>;
 
 //===----------------------------------------------------------------------===//
 // Define the RISC-V target.

From 5d2be1a18845c528d3e86f7efcc59872e4a757c3 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson@google.com>
Date: Mon, 6 Jul 2020 11:05:12 -0700
Subject: [PATCH 534/771] [compiler-rt][asan][hwasan] Refactor shadow setup
 into sanitizer_common (NFCI)

Summary:
This refactors some common support related to shadow memory setup from
asan and hwasan into sanitizer_common. This should not only reduce code
duplication but also make these facilities available for new compiler-rt
uses (e.g. heap profiling).

In most cases the separate copies of the code were either identical, or
at least functionally identical. A few notes:

In ProtectGap, the asan version checked the address against an upper
bound (kZeroBaseMaxShadowStart, which is (2^18). I have created a copy
of kZeroBaseMaxShadowStart in hwasan_mapping.h, with the same value, as
it isn't clear why that code should not do the same check. If it
shouldn't, I can remove this and guard this check so that it only
happens for asan.

In asan's InitializeShadowMemory, in the dynamic shadow case it was
setting __asan_shadow_memory_dynamic_address to 0 (which then sets both
macro SHADOW_OFFSET as well as macro kLowShadowBeg to 0) before calling
FindDynamicShadowStart(). AFAICT this is only needed because
FindDynamicShadowStart utilizes kHighShadowEnd to
get the shadow size, and kHighShadowEnd is a macro invoking
MEM_TO_SHADOW(kHighMemEnd) which in turn invokes:
(((kHighMemEnd) >> SHADOW_SCALE) + (SHADOW_OFFSET))
I.e. it computes the shadow space needed by kHighMemEnd (the shift), and
adds the offset. Since we only want the shadow space here, the earlier
setting of SHADOW_OFFSET to 0 via __asan_shadow_memory_dynamic_address
accomplishes this. In the hwasan version, it simply gets the shadow
space via "MemToShadowSize(kHighMemEnd)", where MemToShadowSize just
does the shift. I've simplified the asan handling to do the same
thing, and therefore was able to remove the setting of the SHADOW_OFFSET
via __asan_shadow_memory_dynamic_address to 0.

Reviewers: vitalybuka, kcc, eugenis

Subscribers: dberris, #sanitizers, llvm-commits, davidxl

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D83247
---
 compiler-rt/lib/asan/asan_internal.h          |  2 -
 compiler-rt/lib/asan/asan_linux.cpp           | 36 ++-------
 compiler-rt/lib/asan/asan_mac.cpp             | 42 +----------
 compiler-rt/lib/asan/asan_mapping.h           |  1 +
 compiler-rt/lib/asan/asan_premap_shadow.cpp   | 18 +----
 compiler-rt/lib/asan/asan_rtl.cpp             |  2 +-
 compiler-rt/lib/asan/asan_shadow_setup.cpp    | 42 +----------
 compiler-rt/lib/asan/asan_win.cpp             | 11 +--
 compiler-rt/lib/hwasan/hwasan.cpp             |  2 -
 compiler-rt/lib/hwasan/hwasan.h               |  1 -
 .../lib/hwasan/hwasan_dynamic_shadow.cpp      | 50 ++-----------
 compiler-rt/lib/hwasan/hwasan_linux.cpp       | 74 ++++---------------
 compiler-rt/lib/hwasan/hwasan_mapping.h       |  9 +++
 .../lib/sanitizer_common/sanitizer_common.h   | 24 ++++++
 .../sanitizer_common_libcdep.cpp              | 49 ++++++++++++
 .../sanitizer_linux_libcdep.cpp               | 35 +++++++++
 .../lib/sanitizer_common/sanitizer_mac.cpp    | 47 ++++++++++++
 .../lib/sanitizer_common/sanitizer_win.cpp    | 16 ++++
 18 files changed, 217 insertions(+), 244 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_internal.h b/compiler-rt/lib/asan/asan_internal.h
index d4bfe996b664e..cfb54927c6cf4 100644
--- a/compiler-rt/lib/asan/asan_internal.h
+++ b/compiler-rt/lib/asan/asan_internal.h
@@ -118,8 +118,6 @@ void AppendToErrorMessageBuffer(const char *buffer);
 
 void *AsanDlSymNext(const char *sym);
 
-void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name);
-
 // Returns `true` iff most of ASan init process should be skipped due to the
 // ASan library being loaded via `dlopen()`. Platforms may perform any
 // `dlopen()` specific initialization inside this function.
diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp
index ce5e873dc5180..aa93bbd79d132 100644
--- a/compiler-rt/lib/asan/asan_linux.cpp
+++ b/compiler-rt/lib/asan/asan_linux.cpp
@@ -87,25 +87,12 @@ void *AsanDoesNotSupportStaticLinkage() {
   return &_DYNAMIC;  // defined in link.h
 }
 
-static void UnmapFromTo(uptr from, uptr to) {
-  CHECK(to >= from);
-  if (to == from) return;
-  uptr res = internal_munmap(reinterpret_cast<void *>(from), to - from);
-  if (UNLIKELY(internal_iserror(res))) {
-    Report(
-        "ERROR: AddresSanitizer failed to unmap 0x%zx (%zd) bytes at address "
-        "%p\n",
-        to - from, to - from, from);
-    CHECK("unable to unmap" && 0);
-  }
-}
-
 #if ASAN_PREMAP_SHADOW
-uptr FindPremappedShadowStart() {
+uptr FindPremappedShadowStart(uptr shadow_size_bytes) {
   uptr granularity = GetMmapGranularity();
   uptr shadow_start = reinterpret_cast<uptr>(&__asan_shadow);
   uptr premap_shadow_size = PremapShadowSize();
-  uptr shadow_size = RoundUpTo(kHighShadowEnd, granularity);
+  uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity);
   // We may have mapped too much. Release extra memory.
   UnmapFromTo(shadow_start + shadow_size, shadow_start + premap_shadow_size);
   return shadow_start;
@@ -113,25 +100,14 @@ uptr FindPremappedShadowStart() {
 #endif
 
 uptr FindDynamicShadowStart() {
+  uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd);
 #if ASAN_PREMAP_SHADOW
   if (!PremapShadowFailed())
-    return FindPremappedShadowStart();
+    return FindPremappedShadowStart(shadow_size_bytes);
 #endif
 
-  uptr granularity = GetMmapGranularity();
-  uptr alignment = granularity * 8;
-  uptr left_padding = granularity;
-  uptr shadow_size = RoundUpTo(kHighShadowEnd, granularity);
-  uptr map_size = shadow_size + left_padding + alignment;
-
-  uptr map_start = (uptr)MmapNoAccess(map_size);
-  CHECK_NE(map_start, ~(uptr)0);
-
-  uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
-  UnmapFromTo(map_start, shadow_start - left_padding);
-  UnmapFromTo(shadow_start + shadow_size, map_start + map_size);
-
-  return shadow_start;
+  return MapDynamicShadow(shadow_size_bytes, SHADOW_SCALE,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd);
 }
 
 void AsanApplyToGlobals(globals_op_fptr op, const void *needle) {
diff --git a/compiler-rt/lib/asan/asan_mac.cpp b/compiler-rt/lib/asan/asan_mac.cpp
index a8d3f5d3473c4..3182aacb0b5e9 100644
--- a/compiler-rt/lib/asan/asan_mac.cpp
+++ b/compiler-rt/lib/asan/asan_mac.cpp
@@ -55,46 +55,8 @@ void *AsanDoesNotSupportStaticLinkage() {
 }
 
 uptr FindDynamicShadowStart() {
-  uptr granularity = GetMmapGranularity();
-  uptr alignment = 8 * granularity;
-  uptr left_padding = granularity;
-  uptr space_size = kHighShadowEnd + left_padding;
-
-  uptr largest_gap_found = 0;
-  uptr max_occupied_addr = 0;
-  VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
-  uptr shadow_start =
-      FindAvailableMemoryRange(space_size, alignment, granularity,
-                               &largest_gap_found, &max_occupied_addr);
-  // If the shadow doesn't fit, restrict the address space to make it fit.
-  if (shadow_start == 0) {
-    VReport(
-        2,
-        "Shadow doesn't fit, largest_gap_found = %p, max_occupied_addr = %p\n",
-        largest_gap_found, max_occupied_addr);
-    uptr new_max_vm = RoundDownTo(largest_gap_found << SHADOW_SCALE, alignment);
-    if (new_max_vm < max_occupied_addr) {
-      Report("Unable to find a memory range for dynamic shadow.\n");
-      Report(
-          "space_size = %p, largest_gap_found = %p, max_occupied_addr = %p, "
-          "new_max_vm = %p\n",
-          space_size, largest_gap_found, max_occupied_addr, new_max_vm);
-      CHECK(0 && "cannot place shadow");
-    }
-    RestrictMemoryToMaxAddress(new_max_vm);
-    kHighMemEnd = new_max_vm - 1;
-    space_size = kHighShadowEnd + left_padding;
-    VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
-    shadow_start = FindAvailableMemoryRange(space_size, alignment, granularity,
-                                            nullptr, nullptr);
-    if (shadow_start == 0) {
-      Report("Unable to find a memory range after restricting VM.\n");
-      CHECK(0 && "cannot place shadow after restricting vm");
-    }
-  }
-  CHECK_NE((uptr)0, shadow_start);
-  CHECK(IsAligned(shadow_start, alignment));
-  return shadow_start;
+  return MapDynamicShadow(MemToShadowSize(kHighMemEnd), SHADOW_SCALE,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd);
 }
 
 // No-op. Mac does not support static linkage anyway.
diff --git a/compiler-rt/lib/asan/asan_mapping.h b/compiler-rt/lib/asan/asan_mapping.h
index 41fb49ee46d46..c64c033567314 100644
--- a/compiler-rt/lib/asan/asan_mapping.h
+++ b/compiler-rt/lib/asan/asan_mapping.h
@@ -304,6 +304,7 @@ extern uptr kHighMemEnd, kMidMemBeg, kMidMemEnd;  // Initialized in __asan_init.
 
 namespace __asan {
 
+static inline uptr MemToShadowSize(uptr size) { return size >> SHADOW_SCALE; }
 static inline bool AddrIsInLowMem(uptr a) {
   PROFILE_ASAN_MAPPING();
   return a <= kLowMemEnd;
diff --git a/compiler-rt/lib/asan/asan_premap_shadow.cpp b/compiler-rt/lib/asan/asan_premap_shadow.cpp
index 7835e99748ffa..666bb9b34bd39 100644
--- a/compiler-rt/lib/asan/asan_premap_shadow.cpp
+++ b/compiler-rt/lib/asan/asan_premap_shadow.cpp
@@ -32,22 +32,8 @@ uptr PremapShadowSize() {
 // Returns an address aligned to 8 pages, such that one page on the left and
 // PremapShadowSize() bytes on the right of it are mapped r/o.
 uptr PremapShadow() {
-  uptr granularity = GetMmapGranularity();
-  uptr alignment = granularity * 8;
-  uptr left_padding = granularity;
-  uptr shadow_size = PremapShadowSize();
-  uptr map_size = shadow_size + left_padding + alignment;
-
-  uptr map_start = (uptr)MmapNoAccess(map_size);
-  CHECK_NE(map_start, ~(uptr)0);
-
-  uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
-  uptr shadow_end = shadow_start + shadow_size;
-  internal_munmap(reinterpret_cast<void *>(map_start),
-                  shadow_start - left_padding - map_start);
-  internal_munmap(reinterpret_cast<void *>(shadow_end),
-                  map_start + map_size - shadow_end);
-  return shadow_start;
+  return MapDynamicShadow(PremapShadowSize(), /*mmap_alignment_scale*/ 3,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd);
 }
 
 bool PremapShadowFailed() {
diff --git a/compiler-rt/lib/asan/asan_rtl.cpp b/compiler-rt/lib/asan/asan_rtl.cpp
index 463bfa02f9f16..115733cdaa48e 100644
--- a/compiler-rt/lib/asan/asan_rtl.cpp
+++ b/compiler-rt/lib/asan/asan_rtl.cpp
@@ -319,7 +319,7 @@ static void InitializeHighMemEnd() {
   kHighMemEnd = GetMaxUserVirtualAddress();
   // Increase kHighMemEnd to make sure it's properly
   // aligned together with kHighMemBeg:
-  kHighMemEnd |= SHADOW_GRANULARITY * GetMmapGranularity() - 1;
+  kHighMemEnd |= (GetMmapGranularity() << SHADOW_SCALE) - 1;
 #endif  // !ASAN_FIXED_MAPPING
   CHECK_EQ((kHighMemBeg % GetMmapGranularity()), 0);
 #endif  // !SANITIZER_MYRIAD2
diff --git a/compiler-rt/lib/asan/asan_shadow_setup.cpp b/compiler-rt/lib/asan/asan_shadow_setup.cpp
index 17324932a86f9..0e2623a23028e 100644
--- a/compiler-rt/lib/asan/asan_shadow_setup.cpp
+++ b/compiler-rt/lib/asan/asan_shadow_setup.cpp
@@ -22,24 +22,6 @@
 
 namespace __asan {
 
-// ---------------------- mmap -------------------- {{{1
-// Reserve memory range [beg, end].
-// We need to use inclusive range because end+1 may not be representable.
-void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
-  CHECK_EQ((beg % GetMmapGranularity()), 0);
-  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
-  uptr size = end - beg + 1;
-  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
-  if (!MmapFixedSuperNoReserve(beg, size, name)) {
-    Report(
-        "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
-        "Perhaps you're using ulimit -v\n",
-        size);
-    Abort();
-  }
-  if (common_flags()->use_madv_dontdump) DontDumpShadowMemory(beg, size);
-}
-
 static void ProtectGap(uptr addr, uptr size) {
   if (!flags()->protect_shadow_gap) {
     // The shadow gap is unprotected, so there is a chance that someone
@@ -57,26 +39,8 @@ static void ProtectGap(uptr addr, uptr size) {
                              "unprotected gap shadow");
     return;
   }
-  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-  if (addr == (uptr)res) return;
-  // A few pages at the start of the address space can not be protected.
-  // But we really want to protect as much as possible, to prevent this memory
-  // being returned as a result of a non-FIXED mmap().
-  if (addr == kZeroBaseShadowStart) {
-    uptr step = GetMmapGranularity();
-    while (size > step && addr < kZeroBaseMaxShadowStart) {
-      addr += step;
-      size -= step;
-      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-      if (addr == (uptr)res) return;
-    }
-  }
-
-  Report(
-      "ERROR: Failed to protect the shadow gap. "
-      "ASan cannot proceed correctly. ABORTING.\n");
-  DumpProcessMap();
-  Die();
+  __sanitizer::ProtectGap(addr, size, kZeroBaseShadowStart,
+                          kZeroBaseMaxShadowStart);
 }
 
 static void MaybeReportLinuxPIEBug() {
@@ -99,8 +63,6 @@ void InitializeShadowMemory() {
   // |kDefaultShadowSentinel|.
   bool full_shadow_is_available = false;
   if (shadow_start == kDefaultShadowSentinel) {
-    __asan_shadow_memory_dynamic_address = 0;
-    CHECK_EQ(0, kLowShadowBeg);
     shadow_start = FindDynamicShadowStart();
     if (SANITIZER_LINUX) full_shadow_is_available = true;
   }
diff --git a/compiler-rt/lib/asan/asan_win.cpp b/compiler-rt/lib/asan/asan_win.cpp
index 03feddbe86b44..fe635c2d5b6b4 100644
--- a/compiler-rt/lib/asan/asan_win.cpp
+++ b/compiler-rt/lib/asan/asan_win.cpp
@@ -247,15 +247,8 @@ void *AsanDoesNotSupportStaticLinkage() {
 }
 
 uptr FindDynamicShadowStart() {
-  uptr granularity = GetMmapGranularity();
-  uptr alignment = 8 * granularity;
-  uptr left_padding = granularity;
-  uptr space_size = kHighShadowEnd + left_padding;
-  uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
-                                               granularity, nullptr, nullptr);
-  CHECK_NE((uptr)0, shadow_start);
-  CHECK(IsAligned(shadow_start, alignment));
-  return shadow_start;
+  return MapDynamicShadow(MemToShadowSize(kHighMemEnd), SHADOW_SCALE,
+                          /*min_shadow_base_alignment*/ 0, kHighMemEnd);
 }
 
 void AsanCheckDynamicRTPrereqs() {}
diff --git a/compiler-rt/lib/hwasan/hwasan.cpp b/compiler-rt/lib/hwasan/hwasan.cpp
index d67a88d455eff..11b4d3891bc2c 100644
--- a/compiler-rt/lib/hwasan/hwasan.cpp
+++ b/compiler-rt/lib/hwasan/hwasan.cpp
@@ -286,8 +286,6 @@ void __hwasan_init() {
   // initialized when InitInstrumentation() was called.
   GetCurrentThread()->InitRandomState();
 
-  MadviseShadow();
-
   SetPrintfAndReportCallback(AppendToErrorMessageBuffer);
   // This may call libc -> needs initialized shadow.
   AndroidLogInit();
diff --git a/compiler-rt/lib/hwasan/hwasan.h b/compiler-rt/lib/hwasan/hwasan.h
index 8cbd9e74e3350..b8b7a1865e860 100644
--- a/compiler-rt/lib/hwasan/hwasan.h
+++ b/compiler-rt/lib/hwasan/hwasan.h
@@ -75,7 +75,6 @@ extern int hwasan_report_count;
 bool InitShadow();
 void InitPrctl();
 void InitThreads();
-void MadviseShadow();
 void InitializeInterceptors();
 
 void HwasanAllocatorInit();
diff --git a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp
index a04751f44a311..12730b29bae36 100644
--- a/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_dynamic_shadow.cpp
@@ -24,47 +24,6 @@
 // The code in this file needs to run in an unrelocated binary. It should not
 // access any external symbol, including its own non-hidden globals.
 
-namespace __hwasan {
-
-static void UnmapFromTo(uptr from, uptr to) {
-  if (to == from)
-    return;
-  CHECK(to >= from);
-  uptr res = internal_munmap(reinterpret_cast<void *>(from), to - from);
-  if (UNLIKELY(internal_iserror(res))) {
-    Report("ERROR: %s failed to unmap 0x%zx (%zd) bytes at address %p\n",
-           SanitizerToolName, to - from, to - from, from);
-    CHECK("unable to unmap" && 0);
-  }
-}
-
-// Returns an address aligned to kShadowBaseAlignment, such that
-// 2**kShadowBaseAlingment on the left and shadow_size_bytes bytes on the right
-// of it are mapped no access.
-static uptr MapDynamicShadow(uptr shadow_size_bytes) {
-  const uptr granularity = GetMmapGranularity();
-  const uptr min_alignment = granularity << kShadowScale;
-  const uptr alignment = 1ULL << kShadowBaseAlignment;
-  CHECK_GE(alignment, min_alignment);
-
-  const uptr left_padding = 1ULL << kShadowBaseAlignment;
-  const uptr shadow_size =
-      RoundUpTo(shadow_size_bytes, granularity);
-  const uptr map_size = shadow_size + left_padding + alignment;
-
-  const uptr map_start = (uptr)MmapNoAccess(map_size);
-  CHECK_NE(map_start, ~(uptr)0);
-
-  const uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
-
-  UnmapFromTo(map_start, shadow_start - left_padding);
-  UnmapFromTo(shadow_start + shadow_size, map_start + map_size);
-
-  return shadow_start;
-}
-
-}  // namespace __hwasan
-
 #if SANITIZER_ANDROID
 extern "C" {
 
@@ -82,7 +41,8 @@ static uptr PremapShadowSize() {
 }
 
 static uptr PremapShadow() {
-  return MapDynamicShadow(PremapShadowSize());
+  return MapDynamicShadow(PremapShadowSize(), kShadowScale,
+                          kShadowBaseAlignment, kHighMemEnd);
 }
 
 static bool IsPremapShadowAvailable() {
@@ -146,7 +106,8 @@ void InitShadowGOT() {
 uptr FindDynamicShadowStart(uptr shadow_size_bytes) {
   if (IsPremapShadowAvailable())
     return FindPremappedShadowStart(shadow_size_bytes);
-  return MapDynamicShadow(shadow_size_bytes);
+  return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment,
+                          kHighMemEnd);
 }
 
 }  // namespace __hwasan
@@ -156,7 +117,8 @@ namespace __hwasan {
 void InitShadowGOT() {}
 
 uptr FindDynamicShadowStart(uptr shadow_size_bytes) {
-  return MapDynamicShadow(shadow_size_bytes);
+  return MapDynamicShadow(shadow_size_bytes, kShadowScale, kShadowBaseAlignment,
+                          kHighMemEnd);
 }
 
 }  // namespace __hwasan
diff --git a/compiler-rt/lib/hwasan/hwasan_linux.cpp b/compiler-rt/lib/hwasan/hwasan_linux.cpp
index f1e830ddf901f..e99926d355cfa 100644
--- a/compiler-rt/lib/hwasan/hwasan_linux.cpp
+++ b/compiler-rt/lib/hwasan/hwasan_linux.cpp
@@ -57,56 +57,24 @@ THREADLOCAL uptr __hwasan_tls;
 
 namespace __hwasan {
 
-static void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name) {
-  CHECK_EQ((beg % GetMmapGranularity()), 0);
-  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
-  uptr size = end - beg + 1;
-  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
-  if (!MmapFixedNoReserve(beg, size, name)) {
-    Report(
-        "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
-        "Perhaps you're using ulimit -v\n",
-        size);
-    Abort();
-  }
-}
+// With the zero shadow base we can not actually map pages starting from 0.
+// This constant is somewhat arbitrary.
+constexpr uptr kZeroBaseShadowStart = 0;
+constexpr uptr kZeroBaseMaxShadowStart = 1 << 18;
 
 static void ProtectGap(uptr addr, uptr size) {
-  if (!size)
-    return;
-  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-  if (addr == (uptr)res)
-    return;
-  // A few pages at the start of the address space can not be protected.
-  // But we really want to protect as much as possible, to prevent this memory
-  // being returned as a result of a non-FIXED mmap().
-  if (addr == 0) {
-    uptr step = GetMmapGranularity();
-    while (size > step) {
-      addr += step;
-      size -= step;
-      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
-      if (addr == (uptr)res)
-        return;
-    }
-  }
-
-  Report(
-      "ERROR: Failed to protect shadow gap [%p, %p]. "
-      "HWASan cannot proceed correctly. ABORTING.\n", (void *)addr,
-      (void *)(addr + size));
-  DumpProcessMap();
-  Die();
+  __sanitizer::ProtectGap(addr, size, kZeroBaseShadowStart,
+                          kZeroBaseMaxShadowStart);
 }
 
-static uptr kLowMemStart;
-static uptr kLowMemEnd;
-static uptr kLowShadowEnd;
-static uptr kLowShadowStart;
-static uptr kHighShadowStart;
-static uptr kHighShadowEnd;
-static uptr kHighMemStart;
-static uptr kHighMemEnd;
+uptr kLowMemStart;
+uptr kLowMemEnd;
+uptr kLowShadowEnd;
+uptr kLowShadowStart;
+uptr kHighShadowStart;
+uptr kHighShadowEnd;
+uptr kHighMemStart;
+uptr kHighMemEnd;
 
 static void PrintRange(uptr start, uptr end, const char *name) {
   Printf("|| [%p, %p] || %.*s ||\n", (void *)start, (void *)end, 10, name);
@@ -242,24 +210,12 @@ void InitThreads() {
   uptr thread_space_end =
       __hwasan_shadow_memory_dynamic_address - guard_page_size;
   ReserveShadowMemoryRange(thread_space_start, thread_space_end - 1,
-                           "hwasan threads");
+                           "hwasan threads", /*madvise_shadow*/ false);
   ProtectGap(thread_space_end,
              __hwasan_shadow_memory_dynamic_address - thread_space_end);
   InitThreadList(thread_space_start, thread_space_end - thread_space_start);
 }
 
-static void MadviseShadowRegion(uptr beg, uptr end) {
-  uptr size = end - beg + 1;
-  SetShadowRegionHugePageMode(beg, size);
-  if (common_flags()->use_madv_dontdump)
-    DontDumpShadowMemory(beg, size);
-}
-
-void MadviseShadow() {
-  MadviseShadowRegion(kLowShadowStart, kLowShadowEnd);
-  MadviseShadowRegion(kHighShadowStart, kHighShadowEnd);
-}
-
 bool MemIsApp(uptr p) {
   CHECK(GetTagFromPointer(p) == 0);
   return p >= kHighMemStart || (p >= kLowMemStart && p <= kLowMemEnd);
diff --git a/compiler-rt/lib/hwasan/hwasan_mapping.h b/compiler-rt/lib/hwasan/hwasan_mapping.h
index a86ad7ca80360..c149687bdfa60 100644
--- a/compiler-rt/lib/hwasan/hwasan_mapping.h
+++ b/compiler-rt/lib/hwasan/hwasan_mapping.h
@@ -39,6 +39,15 @@ constexpr uptr kShadowAlignment = 1ULL << kShadowScale;
 
 namespace __hwasan {
 
+extern uptr kLowMemStart;
+extern uptr kLowMemEnd;
+extern uptr kLowShadowEnd;
+extern uptr kLowShadowStart;
+extern uptr kHighShadowStart;
+extern uptr kHighShadowEnd;
+extern uptr kHighMemStart;
+extern uptr kHighMemEnd;
+
 inline uptr MemToShadow(uptr untagged_addr) {
   return (untagged_addr >> kShadowScale) +
          __hwasan_shadow_memory_dynamic_address;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
index 07b307a602c97..72e92e7ad70ea 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
@@ -121,6 +121,30 @@ bool MprotectReadOnly(uptr addr, uptr size);
 
 void MprotectMallocZones(void *addr, int prot);
 
+// Get the max address, taking into account alignment due to the mmap
+// granularity and shadow size.
+uptr GetHighMemEnd(uptr shadow_scale);
+
+// Maps shadow_size_bytes of shadow memory and returns shadow address. It will
+// be aligned to the mmap granularity * 2^shadow_scale, or to
+// 2^min_shadow_base_alignment if that is larger. The returned address will
+// have max(2^min_shadow_base_alignment, mmap granularity) on the left, and
+// shadow_size_bytes bytes on the right, mapped no access.
+// The high_mem_end may be updated if the original shadow size doesn't fit.
+uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
+                      uptr min_shadow_base_alignment, uptr &high_mem_end);
+
+// Reserve memory range [beg, end]. If madvise_shadow is true then apply
+// madvise (e.g. hugepages, core dumping) requested by options.
+void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name,
+                              bool madvise_shadow = true);
+
+// Protect size bytes of memory starting at addr. Also try to protect
+// several pages at the start of the address space as specified by
+// zero_base_shadow_start, at most up to the size or zero_base_max_shadow_start.
+void ProtectGap(uptr addr, uptr size, uptr zero_base_shadow_start,
+                uptr zero_base_max_shadow_start);
+
 // Find an available address space.
 uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
                               uptr *largest_gap_found, uptr *max_occupied_addr);
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
index 0c918ebb4a9d6..ddd688bb2dca4 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
@@ -139,6 +139,55 @@ uptr ReservedAddressRange::InitAligned(uptr size, uptr align,
   return start;
 }
 
+// Reserve memory range [beg, end].
+// We need to use inclusive range because end+1 may not be representable.
+void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name,
+                              bool madvise_shadow) {
+  CHECK_EQ((beg % GetMmapGranularity()), 0);
+  CHECK_EQ(((end + 1) % GetMmapGranularity()), 0);
+  uptr size = end - beg + 1;
+  DecreaseTotalMmap(size);  // Don't count the shadow against mmap_limit_mb.
+  if (madvise_shadow ? !MmapFixedSuperNoReserve(beg, size, name)
+                     : !MmapFixedNoReserve(beg, size, name)) {
+    Report(
+        "ReserveShadowMemoryRange failed while trying to map 0x%zx bytes. "
+        "Perhaps you're using ulimit -v\n",
+        size);
+    Abort();
+  }
+  if (madvise_shadow && common_flags()->use_madv_dontdump)
+    DontDumpShadowMemory(beg, size);
+}
+
+void ProtectGap(uptr addr, uptr size, uptr zero_base_shadow_start,
+                uptr zero_base_max_shadow_start) {
+  if (!size)
+    return;
+  void *res = MmapFixedNoAccess(addr, size, "shadow gap");
+  if (addr == (uptr)res)
+    return;
+  // A few pages at the start of the address space can not be protected.
+  // But we really want to protect as much as possible, to prevent this memory
+  // being returned as a result of a non-FIXED mmap().
+  if (addr == zero_base_shadow_start) {
+    uptr step = GetMmapGranularity();
+    while (size > step && addr < zero_base_max_shadow_start) {
+      addr += step;
+      size -= step;
+      void *res = MmapFixedNoAccess(addr, size, "shadow gap");
+      if (addr == (uptr)res)
+        return;
+    }
+  }
+
+  Report(
+      "ERROR: Failed to protect the shadow gap. "
+      "%s cannot proceed correctly. ABORTING.\n",
+      SanitizerToolName);
+  DumpProcessMap();
+  Die();
+}
+
 }  // namespace __sanitizer
 
 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_sandbox_on_notify,
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index 4d17c9686e4ed..d4e747d74ff39 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -841,6 +841,41 @@ void ReExec() {
 }
 #endif  // !SANITIZER_OPENBSD
 
+static void UnmapFromTo(uptr from, uptr to) {
+  if (to == from)
+    return;
+  CHECK(to >= from);
+  uptr res = internal_munmap(reinterpret_cast<void *>(from), to - from);
+  if (UNLIKELY(internal_iserror(res))) {
+    Report("ERROR: %s failed to unmap 0x%zx (%zd) bytes at address %p\n",
+           SanitizerToolName, to - from, to - from, (void *)from);
+    CHECK("unable to unmap" && 0);
+  }
+}
+
+uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
+                      uptr min_shadow_base_alignment,
+                      UNUSED uptr &high_mem_end) {
+  const uptr granularity = GetMmapGranularity();
+  const uptr alignment =
+      Max<uptr>(granularity << shadow_scale, 1ULL << min_shadow_base_alignment);
+  const uptr left_padding =
+      Max<uptr>(granularity, 1ULL << min_shadow_base_alignment);
+
+  const uptr shadow_size = RoundUpTo(shadow_size_bytes, granularity);
+  const uptr map_size = shadow_size + left_padding + alignment;
+
+  const uptr map_start = (uptr)MmapNoAccess(map_size);
+  CHECK_NE(map_start, ~(uptr)0);
+
+  const uptr shadow_start = RoundUpTo(map_start + left_padding, alignment);
+
+  UnmapFromTo(map_start, shadow_start - left_padding);
+  UnmapFromTo(shadow_start + shadow_size, map_start + map_size);
+
+  return shadow_start;
+}
+
 } // namespace __sanitizer
 
 #endif
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
index eb9c662190e72..883786e867e71 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
@@ -1099,6 +1099,53 @@ uptr GetMaxVirtualAddress() {
   return GetMaxUserVirtualAddress();
 }
 
+uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
+                      uptr min_shadow_base_alignment, uptr &high_mem_end) {
+  const uptr granularity = GetMmapGranularity();
+  const uptr alignment =
+      Max<uptr>(granularity << shadow_scale, 1ULL << min_shadow_base_alignment);
+  const uptr left_padding =
+      Max<uptr>(granularity, 1ULL << min_shadow_base_alignment);
+
+  uptr space_size = shadow_size_bytes + left_padding;
+
+  uptr largest_gap_found = 0;
+  uptr max_occupied_addr = 0;
+  VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
+  uptr shadow_start =
+      FindAvailableMemoryRange(space_size, alignment, granularity,
+                               &largest_gap_found, &max_occupied_addr);
+  // If the shadow doesn't fit, restrict the address space to make it fit.
+  if (shadow_start == 0) {
+    VReport(
+        2,
+        "Shadow doesn't fit, largest_gap_found = %p, max_occupied_addr = %p\n",
+        largest_gap_found, max_occupied_addr);
+    uptr new_max_vm = RoundDownTo(largest_gap_found << shadow_scale, alignment);
+    if (new_max_vm < max_occupied_addr) {
+      Report("Unable to find a memory range for dynamic shadow.\n");
+      Report(
+          "space_size = %p, largest_gap_found = %p, max_occupied_addr = %p, "
+          "new_max_vm = %p\n",
+          space_size, largest_gap_found, max_occupied_addr, new_max_vm);
+      CHECK(0 && "cannot place shadow");
+    }
+    RestrictMemoryToMaxAddress(new_max_vm);
+    high_mem_end = new_max_vm - 1;
+    space_size = (high_mem_end >> shadow_scale) + left_padding;
+    VReport(2, "FindDynamicShadowStart, space_size = %p\n", space_size);
+    shadow_start = FindAvailableMemoryRange(space_size, alignment, granularity,
+                                            nullptr, nullptr);
+    if (shadow_start == 0) {
+      Report("Unable to find a memory range after restricting VM.\n");
+      CHECK(0 && "cannot place shadow after restricting vm");
+    }
+  }
+  CHECK_NE((uptr)0, shadow_start);
+  CHECK(IsAligned(shadow_start, alignment));
+  return shadow_start;
+}
+
 uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
                               uptr *largest_gap_found,
                               uptr *max_occupied_addr) {
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
index fca15beb61612..53a537d398475 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_win.cpp
@@ -348,6 +348,22 @@ bool DontDumpShadowMemory(uptr addr, uptr length) {
   return true;
 }
 
+uptr MapDynamicShadow(uptr shadow_size_bytes, uptr shadow_scale,
+                      uptr min_shadow_base_alignment,
+                      UNUSED uptr &high_mem_end) {
+  const uptr granularity = GetMmapGranularity();
+  const uptr alignment =
+      Max<uptr>(granularity << shadow_scale, 1ULL << min_shadow_base_alignment);
+  const uptr left_padding =
+      Max<uptr>(granularity, 1ULL << min_shadow_base_alignment);
+  uptr space_size = shadow_size_bytes + left_padding;
+  uptr shadow_start = FindAvailableMemoryRange(space_size, alignment,
+                                               granularity, nullptr, nullptr);
+  CHECK_NE((uptr)0, shadow_start);
+  CHECK(IsAligned(shadow_start, alignment));
+  return shadow_start;
+}
+
 uptr FindAvailableMemoryRange(uptr size, uptr alignment, uptr left_padding,
                               uptr *largest_gap_found,
                               uptr *max_occupied_addr) {

From bd88991a011be895ba73fe07015df25e1e55992e Mon Sep 17 00:00:00 2001
From: Julian Lettner <julian.lettner@apple.com>
Date: Thu, 16 Jul 2020 11:46:45 -0700
Subject: [PATCH 535/771] Revert "[Darwin] Fix OS version checks inside
 simulators"

This reverts commit b16dfbead21a458799a0dab96599eb15f5d9b7ea.

Accidental push, reverting and creating a new revision.
---
 .../lib/sanitizer_common/sanitizer_mac.cpp    | 61 +++++--------------
 .../tests/sanitizer_mac_test.cpp              | 39 +++---------
 2 files changed, 25 insertions(+), 75 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
index 883786e867e71..db8a09e6f0de3 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_mac.cpp
@@ -606,32 +606,9 @@ HandleSignalMode GetHandleSignalMode(int signum) {
   return result;
 }
 
-void ParseVersion(const char *vers, u16 *major, u16 *minor) {
-  // Format: <major>.<minor>[.<patch>]\0
-  CHECK_GE(internal_strlen(vers), 3);
-  const char *p = vers;
-  *major = internal_simple_strtoll(p, &p, /*base=*/10);
-  CHECK_EQ(*p, '.');
-  p += 1;
-  *minor = internal_simple_strtoll(p, &p, /*base=*/10);
-}
-
-// Aligned versions example:
-// Darwin 19 -- macOS 10.15 -- iOS 13 -- tvOS 13 -- watchOS 6
-static u16 GetDarwinKernelMajorFromOSMajor(u16 os_major) {
-  u16 offset;
-  if (SANITIZER_IOS || SANITIZER_TVOS)
-    offset = 6;
-  else if (SANITIZER_WATCHOS)
-    offset = 13;
-  else  // macOS
-    UNREACHABLE("GetDarwinKernelMajorFromOSMajor() does not support macOS");
-
-  return os_major + offset;
-}
-
 // This corresponds to Triple::getMacOSXVersion() in the Clang driver.
-static MacosVersion GetMacosVersionFromDarwinMajor(u16 kernel_major) {
+static MacosVersion GetMacosAlignedVersionInternal() {
+  u16 kernel_major = GetDarwinKernelVersion().major;
   // Darwin 0-3  -> unsupported
   // Darwin 4-19 -> macOS 10.x
   // Darwin 20+  -> macOS 11+
@@ -647,22 +624,6 @@ static MacosVersion GetMacosVersionFromDarwinMajor(u16 kernel_major) {
   return MacosVersion(major, minor);
 }
 
-static MacosVersion GetMacosAlignedVersionInternal() {
-  if (SANITIZER_IOSSIM) {
-    if (auto vers = GetEnv("SIMULATOR_RUNTIME_VERSION")) {
-      u16 major, minor;
-      ParseVersion(vers, &major, &minor);
-      u16 kernel_major = GetDarwinKernelMajorFromOSMajor(major);
-      return GetMacosVersionFromDarwinMajor(kernel_major);
-    }
-    Report("WARNING: Running in simulator but SIMULATOR_RUNTIME_VERSION env "
-           "var is not set.\n");
-  }
-
-  u16 kernel_major = GetDarwinKernelVersion().major;
-  return GetMacosVersionFromDarwinMajor(kernel_major);
-}
-
 static_assert(sizeof(MacosVersion) == sizeof(atomic_uint32_t::Type),
               "MacosVersion cache size");
 static atomic_uint32_t cached_macos_version;
@@ -678,14 +639,24 @@ MacosVersion GetMacosAlignedVersion() {
   return *reinterpret_cast<MacosVersion *>(&result);
 }
 
+void ParseVersion(const char *vers, u16 *major, u16 *minor) {
+  // Format: <major>.<minor>.<patch>\0
+  CHECK_GE(internal_strlen(vers), 5);
+  const char *p = vers;
+  *major = internal_simple_strtoll(p, &p, /*base=*/10);
+  CHECK_EQ(*p, '.');
+  p += 1;
+  *minor = internal_simple_strtoll(p, &p, /*base=*/10);
+}
+
 DarwinKernelVersion GetDarwinKernelVersion() {
-  char vers[100];
-  size_t len = sizeof(vers);
-  int res = internal_sysctlbyname("kern.osrelease", vers, &len, nullptr, 0);
+  char buf[100];
+  size_t len = sizeof(buf);
+  int res = internal_sysctlbyname("kern.osrelease", buf, &len, nullptr, 0);
   CHECK_EQ(res, 0);
 
   u16 major, minor;
-  ParseVersion(vers, &major, &minor);
+  ParseVersion(buf, &major, &minor);
 
   return DarwinKernelVersion(major, minor);
 }
diff --git a/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp b/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp
index db5cbce7cbee6..c8658ea55d034 100644
--- a/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp
+++ b/compiler-rt/lib/sanitizer_common/tests/sanitizer_mac_test.cpp
@@ -22,35 +22,6 @@
 
 namespace __sanitizer {
 
-void ParseVersion(const char *vers, u16 *major, u16 *minor);
-
-TEST(SanitizerMac, ParseVersion) {
-  u16 major, minor;
-
-  ParseVersion("11.22.33", &major, &minor);
-  EXPECT_EQ(major, 11); EXPECT_EQ(minor, 22);
-
-  ParseVersion("1.2", &major, &minor);
-  EXPECT_EQ(major, 1); EXPECT_EQ(minor, 2);
-}
-
-#if SANITIZER_IOSSIM
-TEST(SanitizerMac, GetMacosAlignedVersion) {
-  const char *vers_str;
-  if (SANITIZER_IOS || SANITIZER_TVOS) {
-    vers_str = "13.0";
-  } else if (SANITIZER_WATCHOS) {
-    vers_str = "6.5";
-  } else {
-    FAIL() << "unsupported simulator runtime";
-  }
-  setenv("SIMULATOR_RUNTIME_VERSION", vers_str, /*overwrite=*/1);
-
-  MacosVersion vers = GetMacosAlignedVersion();
-  EXPECT_EQ(vers.major, 10);
-  EXPECT_EQ(vers.minor, 15);
-}
-#else
 TEST(SanitizerMac, GetMacosAlignedVersion) {
   MacosVersion vers = GetMacosAlignedVersion();
   u16 kernel_major = GetDarwinKernelVersion().major;
@@ -60,7 +31,15 @@ TEST(SanitizerMac, GetMacosAlignedVersion) {
   EXPECT_EQ(vers.major, expected_major);
   EXPECT_EQ(vers.minor, expected_minor);
 }
-#endif
+
+void ParseVersion(const char *vers, u16 *major, u16 *minor);
+
+TEST(SanitizerMac, ParseVersion) {
+  u16 major, minor;
+  ParseVersion("11.22.33", &major, &minor);
+  EXPECT_EQ(major, 11);
+  EXPECT_EQ(minor, 22);
+}
 
 TEST(SanitizerMac, GetDarwinKernelVersion) {
   DarwinKernelVersion vers = GetDarwinKernelVersion();

From a59d4ae4313c0a961c50d14c0616b49220c5a469 Mon Sep 17 00:00:00 2001
From: Adam Balogh <adam.balogh@ericsson.com>
Date: Tue, 7 Jul 2020 12:03:07 +0200
Subject: [PATCH 536/771] [Analyzer] Hotfix for various crashes in iterator
 checkers

The patch that introduces handling iterators implemented as pointers may
cause crash in some projects because pointer difference is mistakenly
handled as pointer decrement. (Similair case for iterators implemented
as class instances are already handled correctly.) This patch fixes this
issue.

The second case that causes crash is comparison of an iterator
implemented as pointer and a null-pointer. This patch contains a fix for
this issue as well.

The third case which causes crash is that the checker mistakenly
considers all integers as nonloc::ConcreteInt when handling an increment
or decrement of an iterator implemented as pointers. This patch adds a
fix for this too.

The last case where crashes were detected is when checking for success
of an std::advance() operation. Since the modeling of iterators
implemented as pointers is still incomplete this may result in an
assertion. This patch replaces the assertion with an early exit and
adds a FIXME there.

Differential Revision: https://reviews.llvm.org/D83295
---
 .../Checkers/IteratorModeling.cpp             | 32 +++++++++++++------
 .../Checkers/IteratorRangeChecker.cpp         |  2 ++
 clang/test/Analysis/iterator-modeling.cpp     | 18 +++++++++++
 clang/test/Analysis/iterator-range.cpp        |  4 +++
 4 files changed, 47 insertions(+), 9 deletions(-)

diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp
index fd8cbd694b240..632de9e5dc832 100644
--- a/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/IteratorModeling.cpp
@@ -272,6 +272,8 @@ void IteratorModeling::checkPostStmt(const BinaryOperator *BO,
     handleComparison(C, BO, Result, LVal, RVal,
                      BinaryOperator::getOverloadedOperator(OK));
   } else if (isRandomIncrOrDecrOperator(OK)) {
+    if (!BO->getRHS()->getType()->isIntegralOrEnumerationType())
+      return;
     handlePtrIncrOrDecr(C, BO->getLHS(),
                         BinaryOperator::getOverloadedOperator(OK), RVal);
   }
@@ -461,6 +463,12 @@ void IteratorModeling::handleComparison(CheckerContext &C, const Expr *CE,
     RPos = getIteratorPosition(State, RVal);
   }
 
+  // If the value for which we just tried to set a new iterator position is
+  // an `SVal`for which no iterator position can be set then the setting was
+  // unsuccessful. We cannot handle the comparison in this case.
+  if (!LPos || !RPos)
+    return;
+
   // We cannot make assumptions on `UnknownVal`. Let us conjure a symbol
   // instead.
   if (RetVal.isUnknown()) {
@@ -599,6 +607,9 @@ void IteratorModeling::handlePtrIncrOrDecr(CheckerContext &C,
                                            const Expr *Iterator,
                                            OverloadedOperatorKind OK,
                                            SVal Offset) const {
+  if (!Offset.getAs<DefinedSVal>())
+    return;
+
   QualType PtrType = Iterator->getType();
   if (!PtrType->isPointerType())
     return;
@@ -612,13 +623,11 @@ void IteratorModeling::handlePtrIncrOrDecr(CheckerContext &C,
     return;
 
   SVal NewVal;
-  if (OK == OO_Plus || OK == OO_PlusEqual)
+  if (OK == OO_Plus || OK == OO_PlusEqual) {
     NewVal = State->getLValue(ElementType, Offset, OldVal);
-  else {
-    const llvm::APSInt &OffsetInt =
-      Offset.castAs<nonloc::ConcreteInt>().getValue();
-    auto &BVF = C.getSymbolManager().getBasicVals();
-    SVal NegatedOffset = nonloc::ConcreteInt(BVF.getValue(-OffsetInt));
+  } else {
+    auto &SVB = C.getSValBuilder();
+    SVal NegatedOffset = SVB.evalMinus(Offset.castAs<NonLoc>());
     NewVal = State->getLValue(ElementType, NegatedOffset, OldVal);
   }
 
@@ -684,9 +693,14 @@ bool IteratorModeling::noChangeInAdvance(CheckerContext &C, SVal Iter,
 
   const auto StateBefore = N->getState();
   const auto *PosBefore = getIteratorPosition(StateBefore, Iter);
-
-  assert(PosBefore && "`std::advance() should not create new iterator "
-         "position but change existing ones");
+  // FIXME: `std::advance()` should not create a new iterator position but
+  //        change existing ones. However, in case of iterators implemented as
+  //        pointers the handling of parameters in `std::advance()`-like
+  //        functions is still incomplete which may result in cases where
+  //        the new position is assigned to the wrong pointer. This causes
+  //        crash if we use an assertion here.
+  if (!PosBefore)
+    return false;
 
   return PosBefore->getOffset() == PosAfter->getOffset();
 }
diff --git a/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp
index df8e379d1f20e..dd014648eb6fd 100644
--- a/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp
+++ b/clang/lib/StaticAnalyzer/Checkers/IteratorRangeChecker.cpp
@@ -169,6 +169,8 @@ void IteratorRangeChecker::checkPreStmt(const BinaryOperator *BO,
     verifyDereference(C, LVal);
   } else if (isRandomIncrOrDecrOperator(OK)) {
     SVal RVal = State->getSVal(BO->getRHS(), C.getLocationContext());
+    if (!BO->getRHS()->getType()->isIntegralOrEnumerationType())
+      return;
     verifyRandomIncrOrDecr(C, BinaryOperator::getOverloadedOperator(OK), LVal,
                            RVal);
   }
diff --git a/clang/test/Analysis/iterator-modeling.cpp b/clang/test/Analysis/iterator-modeling.cpp
index f19848b8dc935..0b76b0bfa7232 100644
--- a/clang/test/Analysis/iterator-modeling.cpp
+++ b/clang/test/Analysis/iterator-modeling.cpp
@@ -1948,6 +1948,13 @@ void minus_equal_ptr_iterator(const cont_with_ptr_iterator<int> &c) {
   clang_analyzer_express(clang_analyzer_iterator_position(i)); // expected-warning{{$c.end() - 2}}
 }
 
+void minus_equal_ptr_iterator_variable(const cont_with_ptr_iterator<int> &c,
+                                       int n) {
+  auto i = c.end();
+
+  i -= n; // no-crash
+}
+
 void plus_ptr_iterator(const cont_with_ptr_iterator<int> &c) {
   auto i1 = c.begin();
 
@@ -1972,6 +1979,17 @@ void minus_ptr_iterator(const cont_with_ptr_iterator<int> &c) {
   clang_analyzer_express(clang_analyzer_iterator_position(i2)); // expected-warning{{$c.end() - 2}}
 }
 
+void ptr_iter_diff(cont_with_ptr_iterator<int> &c) {
+  auto i0 = c.begin(), i1 = c.end();
+  ptrdiff_t len = i1 - i0; // no-crash
+}
+
+void ptr_iter_cmp_nullptr(cont_with_ptr_iterator<int> &c) {
+  auto i0 = c.begin();
+  if (i0 != nullptr) // no-crash
+    ++i0;
+}
+
 void clang_analyzer_printState();
 
 void print_state(std::vector<int> &V) {
diff --git a/clang/test/Analysis/iterator-range.cpp b/clang/test/Analysis/iterator-range.cpp
index 657ae89998e81..8d71039290470 100644
--- a/clang/test/Analysis/iterator-range.cpp
+++ b/clang/test/Analysis/iterator-range.cpp
@@ -935,3 +935,7 @@ void postfix_minus_assign_2_begin_ptr_iterator(
           // expected-note@-1{{Iterator decremented ahead of its valid range}}
 }
 
+void ptr_iter_diff(cont_with_ptr_iterator<S> &c) {
+  auto i0 = c.begin(), i1 = c.end();
+  ptrdiff_t len = i1 - i0; // no-crash
+}

From 8f0a8ed44e27c694b3b32721f913f32048564e5e Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nadav256@gmail.com>
Date: Tue, 14 Jul 2020 13:35:06 -0700
Subject: [PATCH 537/771] [InjectTLIMappings] Use StringRef instead of
 std::string for FN name.

https://reviews.llvm.org/D83797
---
 llvm/lib/Transforms/Utils/InjectTLIMappings.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index 9d8f59d62d6d0..0c43c1e1ac2a3 100644
--- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -77,7 +77,8 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
   if (CI.isNoBuiltin() || !CI.getCalledFunction())
     return;
 
-  const std::string ScalarName = std::string(CI.getCalledFunction()->getName());
+  StringRef ScalarName = CI.getCalledFunction()->getName();
+
   // Nothing to be done if the TLI thinks the function is not
   // vectorizable.
   if (!TLI.isFunctionVectorizable(ScalarName))

From 7bfaa40086359ed7e41c862ab0a65e0bb1be0aeb Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Thu, 16 Jul 2020 11:40:43 -0700
Subject: [PATCH 538/771] Temporarily Revert "[AssumeBundles] Use operand
 bundles to encode alignment assumptions" due to the performance bugs filed in
 https://bugs.llvm.org/show_bug.cgi?id=46753.

An SROA change soon may obviate some of these problems.

This reverts commit 8d09f20798ac180b1749276bff364682ce0196ab.
---
 clang/lib/CodeGen/CodeGenFunction.cpp         |  36 +-----
 clang/test/CodeGen/align_value.cpp            |  30 ++++-
 clang/test/CodeGen/alloc-align-attr.c         |  44 +++++--
 ...ssume-aligned-and-alloc-align-attributes.c |   8 +-
 clang/test/CodeGen/builtin-align-array.c      |  32 +++--
 clang/test/CodeGen/builtin-align.c            |  24 +++-
 clang/test/CodeGen/builtin-assume-aligned.c   |  32 ++++-
 ...mption-attribute-align_value-on-lvalue.cpp |   8 +-
 ...tion-attribute-align_value-on-paramvar.cpp |   2 +-
 ...ibute-alloc_align-on-function-variable.cpp |  10 +-
 ...tion-attribute-alloc_align-on-function.cpp |   2 +-
 ...-assume_aligned-on-function-two-params.cpp |  10 +-
 ...n-attribute-assume_aligned-on-function.cpp |   2 +-
 ...n_assume_aligned-three-params-variable.cpp |  10 +-
 ...on-builtin_assume_aligned-three-params.cpp |  10 +-
 ...tion-builtin_assume_aligned-two-params.cpp |   8 +-
 .../catch-alignment-assumption-openmp.cpp     |   8 +-
 .../non-power-of-2-alignment-assumptions.c    |  13 +-
 clang/test/OpenMP/simd_codegen.cpp            |  16 +++
 clang/test/OpenMP/simd_metadata.c             | 117 ++++++++++-------
 ...s_distribute_parallel_for_simd_codegen.cpp |   5 +-
 llvm/include/llvm/IR/IRBuilder.h              |  28 ++--
 .../Scalar/AlignmentFromAssumptions.h         |   6 +-
 llvm/lib/Analysis/AssumeBundleQueries.cpp     |  13 +-
 llvm/lib/IR/IRBuilder.cpp                     |  77 +++++++----
 llvm/lib/IR/Verifier.cpp                      |  23 +---
 .../InstCombine/InstCombineCalls.cpp          |  15 +--
 .../Scalar/AlignmentFromAssumptions.cpp       | 121 +++++++++++++-----
 .../AlignmentFromAssumptions/simple.ll        |  75 +++++++----
 .../AlignmentFromAssumptions/simple32.ll      | 114 +++++++++++++----
 llvm/test/Transforms/Inline/align.ll          |  15 ++-
 llvm/test/Transforms/InstCombine/assume.ll    |   1 -
 .../inlining-alignment-assumptions.ll         |  27 +++-
 llvm/test/Verifier/assume-bundles.ll          |  16 +--
 .../Analysis/AssumeBundleQueriesTest.cpp      |  38 ------
 35 files changed, 627 insertions(+), 369 deletions(-)

diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp
index 4a7c84562deef..8ce488f35dd32 100644
--- a/clang/lib/CodeGen/CodeGenFunction.cpp
+++ b/clang/lib/CodeGen/CodeGenFunction.cpp
@@ -2154,39 +2154,13 @@ void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue,
                                               SourceLocation AssumptionLoc,
                                               llvm::Value *Alignment,
                                               llvm::Value *OffsetValue) {
-  if (Alignment->getType() != IntPtrTy)
-    Alignment =
-        Builder.CreateIntCast(Alignment, IntPtrTy, false, "casted.align");
-  if (OffsetValue && OffsetValue->getType() != IntPtrTy)
-    OffsetValue =
-        Builder.CreateIntCast(OffsetValue, IntPtrTy, true, "casted.offset");
-  llvm::Value *TheCheck = nullptr;
+  llvm::Value *TheCheck;
+  llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
+      CGM.getDataLayout(), PtrValue, Alignment, OffsetValue, &TheCheck);
   if (SanOpts.has(SanitizerKind::Alignment)) {
-    llvm::Value *PtrIntValue =
-        Builder.CreatePtrToInt(PtrValue, IntPtrTy, "ptrint");
-
-    if (OffsetValue) {
-      bool IsOffsetZero = false;
-      if (const auto *CI = dyn_cast<llvm::ConstantInt>(OffsetValue))
-        IsOffsetZero = CI->isZero();
-
-      if (!IsOffsetZero)
-        PtrIntValue = Builder.CreateSub(PtrIntValue, OffsetValue, "offsetptr");
-    }
-
-    llvm::Value *Zero = llvm::ConstantInt::get(IntPtrTy, 0);
-    llvm::Value *Mask =
-        Builder.CreateSub(Alignment, llvm::ConstantInt::get(IntPtrTy, 1));
-    llvm::Value *MaskedPtr = Builder.CreateAnd(PtrIntValue, Mask, "maskedptr");
-    TheCheck = Builder.CreateICmpEQ(MaskedPtr, Zero, "maskcond");
+    emitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
+                                 OffsetValue, TheCheck, Assumption);
   }
-  llvm::Instruction *Assumption = Builder.CreateAlignmentAssumption(
-      CGM.getDataLayout(), PtrValue, Alignment, OffsetValue);
-
-  if (!SanOpts.has(SanitizerKind::Alignment))
-    return;
-  emitAlignmentAssumptionCheck(PtrValue, Ty, Loc, AssumptionLoc, Alignment,
-                               OffsetValue, TheCheck, Assumption);
 }
 
 void CodeGenFunction::emitAlignmentAssumption(llvm::Value *PtrValue,
diff --git a/clang/test/CodeGen/align_value.cpp b/clang/test/CodeGen/align_value.cpp
index a18cb651fe4c0..acbfbaf2ba5c7 100644
--- a/clang/test/CodeGen/align_value.cpp
+++ b/clang/test/CodeGen/align_value.cpp
@@ -29,7 +29,10 @@ struct ad_struct {
 // CHECK-NEXT:    [[TMP0:%.*]] = load %struct.ad_struct*, %struct.ad_struct** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_AD_STRUCT:%.*]], %struct.ad_struct* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[A]], align 8
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    ret double* [[TMP1]]
 //
 double *foo(ad_struct& x) {
@@ -45,7 +48,10 @@ double *foo(ad_struct& x) {
 // CHECK-NEXT:    [[TMP0:%.*]] = load %struct.ad_struct*, %struct.ad_struct** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[A:%.*]] = getelementptr inbounds [[STRUCT_AD_STRUCT:%.*]], %struct.ad_struct* [[TMP0]], i32 0, i32 0
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[A]], align 8
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    ret double* [[TMP1]]
 //
 double *goo(ad_struct *x) {
@@ -60,7 +66,10 @@ double *goo(ad_struct *x) {
 // CHECK-NEXT:    store double** [[X]], double*** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load double**, double*** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[TMP0]], align 8
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    ret double* [[TMP1]]
 //
 double *bar(aligned_double *x) {
@@ -75,7 +84,10 @@ double *bar(aligned_double *x) {
 // CHECK-NEXT:    store double** [[X]], double*** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load double**, double*** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[TMP0]], align 8
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    ret double* [[TMP1]]
 //
 double *car(aligned_double &x) {
@@ -91,7 +103,10 @@ double *car(aligned_double &x) {
 // CHECK-NEXT:    [[TMP0:%.*]] = load double**, double*** [[X_ADDR]], align 8
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double*, double** [[TMP0]], i64 5
 // CHECK-NEXT:    [[TMP1:%.*]] = load double*, double** [[ARRAYIDX]], align 8
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[TMP1]], i64 64) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[TMP1]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    ret double* [[TMP1]]
 //
 double *dar(aligned_double *x) {
@@ -103,7 +118,10 @@ aligned_double eep();
 // CHECK-LABEL: define {{[^@]+}}@_Z3retv() #0
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[CALL:%.*]] = call double* @_Z3eepv()
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(double* [[CALL]], i64 64) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint double* [[CALL]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 63
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    ret double* [[CALL]]
 //
 double *ret() {
diff --git a/clang/test/CodeGen/alloc-align-attr.c b/clang/test/CodeGen/alloc-align-attr.c
index 44a57291b47c8..9517c50dbb1db 100644
--- a/clang/test/CodeGen/alloc-align-attr.c
+++ b/clang/test/CodeGen/alloc-align-attr.c
@@ -11,8 +11,12 @@ __INT32_TYPE__*m1(__INT32_TYPE__ i) __attribute__((alloc_align(1)));
 // CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m1(i32 [[TMP0]])
-// CHECK-NEXT:    [[CASTED_ALIGN:%.*]] = zext i32 [[TMP0]] to i64
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ]
+// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = zext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -28,8 +32,12 @@ __INT32_TYPE__ test1(__INT32_TYPE__ a) {
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[A_ADDR]], align 8
 // CHECK-NEXT:    [[CONV:%.*]] = trunc i64 [[TMP0]] to i32
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m1(i32 [[CONV]])
-// CHECK-NEXT:    [[CASTED_ALIGN:%.*]] = zext i32 [[CONV]] to i64
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ]
+// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = zext i32 [[CONV]] to i64
+// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -47,7 +55,11 @@ __INT32_TYPE__ *m2(__SIZE_TYPE__ i) __attribute__((alloc_align(1)));
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
 // CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP0]] to i64
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m2(i64 [[CONV]])
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CONV]]) ]
+// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[CONV]], 1
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -63,7 +75,11 @@ __INT32_TYPE__ test3(__INT32_TYPE__ a) {
 // CHECK-NEXT:    store i64 [[A]], i64* [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[A_ADDR]], align 8
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m2(i64 [[TMP0]])
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[TMP0]]) ]
+// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[TMP0]], 1
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP1]]
 //
@@ -99,8 +115,12 @@ __INT32_TYPE__ *m3(struct Empty s, __int128_t i) __attribute__((alloc_align(2)))
 // CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[TMP4]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m3(i64 [[TMP6]], i64 [[TMP8]])
-// CHECK-NEXT:    [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP3]] to i64
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ]
+// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = trunc i128 [[TMP3]] to i64
+// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP9]]
 //
@@ -137,8 +157,12 @@ __INT32_TYPE__ *m4(struct MultiArgs s, __int128_t i) __attribute__((alloc_align(
 // CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds { i64, i64 }, { i64, i64 }* [[TMP9]], i32 0, i32 1
 // CHECK-NEXT:    [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* @m4(i64 [[TMP6]], i64 [[TMP8]], i64 [[TMP11]], i64 [[TMP13]])
-// CHECK-NEXT:    [[CASTED_ALIGN:%.*]] = trunc i128 [[TMP3]] to i64
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 [[CASTED_ALIGN]]) ]
+// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = trunc i128 [[TMP3]] to i64
+// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP14]]
 //
diff --git a/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c b/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c
index cd8a6f19b4f49..fa4ee8db12e7f 100644
--- a/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c
+++ b/clang/test/CodeGen/assume-aligned-and-alloc-align-attributes.c
@@ -36,8 +36,12 @@ void *t2_immediate2() {
 // CHECK-NEXT:    store i32 [[ALIGNMENT:%.*]], i32* [[ALIGNMENT_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ALIGNMENT_ADDR]], align 4
 // CHECK-NEXT:    [[CALL:%.*]] = call align 32 i8* @my_aligned_alloc(i32 320, i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[CALL]], i64 [[TMP1]]) ]
+// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = zext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[CALL]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    ret i8* [[CALL]]
 //
 void *t3_variable(int alignment) {
diff --git a/clang/test/CodeGen/builtin-align-array.c b/clang/test/CodeGen/builtin-align-array.c
index 31f7b42b56170..97235c33b7fbe 100644
--- a/clang/test/CodeGen/builtin-align-array.c
+++ b/clang/test/CodeGen/builtin-align-array.c
@@ -4,7 +4,7 @@
 
 extern int func(char *c);
 
-// CHECK-LABEL: @test_array(
+// CHECK-LABEL: define {{[^@]+}}@test_array() #0
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[BUF:%.*]] = alloca [1024 x i8], align 16
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 44
@@ -12,7 +12,10 @@ extern int func(char *c);
 // CHECK-NEXT:    [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], -16
 // CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]]
 // CHECK-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX]], i64 [[DIFF]]
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 16) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 15
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[CALL:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT]])
 // CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 22
 // CHECK-NEXT:    [[INTPTR2:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64
@@ -20,10 +23,13 @@ extern int func(char *c);
 // CHECK-NEXT:    [[ALIGNED_INTPTR4:%.*]] = and i64 [[OVER_BOUNDARY]], -32
 // CHECK-NEXT:    [[DIFF5:%.*]] = sub i64 [[ALIGNED_INTPTR4]], [[INTPTR2]]
 // CHECK-NEXT:    [[ALIGNED_RESULT6:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX1]], i64 [[DIFF5]]
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT6]], i64 32) ]
-// CHECK-NEXT:    [[CALL7:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]])
-// CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 16
-// CHECK-NEXT:    [[SRC_ADDR:%.*]] = ptrtoint i8* [[ARRAYIDX8]] to i64
+// CHECK-NEXT:    [[PTRINT7:%.*]] = ptrtoint i8* [[ALIGNED_RESULT6]] to i64
+// CHECK-NEXT:    [[MASKEDPTR8:%.*]] = and i64 [[PTRINT7]], 31
+// CHECK-NEXT:    [[MASKCOND9:%.*]] = icmp eq i64 [[MASKEDPTR8]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND9]])
+// CHECK-NEXT:    [[CALL10:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]])
+// CHECK-NEXT:    [[ARRAYIDX11:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 16
+// CHECK-NEXT:    [[SRC_ADDR:%.*]] = ptrtoint i8* [[ARRAYIDX11]] to i64
 // CHECK-NEXT:    [[SET_BITS:%.*]] = and i64 [[SRC_ADDR]], 63
 // CHECK-NEXT:    [[IS_ALIGNED:%.*]] = icmp eq i64 [[SET_BITS]], 0
 // CHECK-NEXT:    [[CONV:%.*]] = zext i1 [[IS_ALIGNED]] to i32
@@ -36,7 +42,7 @@ int test_array(void) {
   return __builtin_is_aligned(&buf[16], 64);
 }
 
-// CHECK-LABEL: @test_array_should_not_mask(
+// CHECK-LABEL: define {{[^@]+}}@test_array_should_not_mask() #0
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[BUF:%.*]] = alloca [1024 x i8], align 32
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 64
@@ -44,7 +50,10 @@ int test_array(void) {
 // CHECK-NEXT:    [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], -16
 // CHECK-NEXT:    [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]]
 // CHECK-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX]], i64 [[DIFF]]
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 16) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 15
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[CALL:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT]])
 // CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BUF]], i64 0, i64 32
 // CHECK-NEXT:    [[INTPTR2:%.*]] = ptrtoint i8* [[ARRAYIDX1]] to i64
@@ -52,8 +61,11 @@ int test_array(void) {
 // CHECK-NEXT:    [[ALIGNED_INTPTR4:%.*]] = and i64 [[OVER_BOUNDARY]], -32
 // CHECK-NEXT:    [[DIFF5:%.*]] = sub i64 [[ALIGNED_INTPTR4]], [[INTPTR2]]
 // CHECK-NEXT:    [[ALIGNED_RESULT6:%.*]] = getelementptr inbounds i8, i8* [[ARRAYIDX1]], i64 [[DIFF5]]
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT6]], i64 32) ]
-// CHECK-NEXT:    [[CALL7:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]])
+// CHECK-NEXT:    [[PTRINT7:%.*]] = ptrtoint i8* [[ALIGNED_RESULT6]] to i64
+// CHECK-NEXT:    [[MASKEDPTR8:%.*]] = and i64 [[PTRINT7]], 31
+// CHECK-NEXT:    [[MASKCOND9:%.*]] = icmp eq i64 [[MASKEDPTR8]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND9]])
+// CHECK-NEXT:    [[CALL10:%.*]] = call i32 @func(i8* [[ALIGNED_RESULT6]])
 // CHECK-NEXT:    ret i32 1
 //
 int test_array_should_not_mask(void) {
diff --git a/clang/test/CodeGen/builtin-align.c b/clang/test/CodeGen/builtin-align.c
index 60f7fc99c1d4d..7e66e2b5c0b9b 100644
--- a/clang/test/CodeGen/builtin-align.c
+++ b/clang/test/CodeGen/builtin-align.c
@@ -122,7 +122,11 @@ _Bool is_aligned(TYPE ptr, unsigned align) {
 // CHECK-VOID_PTR-NEXT:    [[ALIGNED_INTPTR:%.*]] = and i64 [[OVER_BOUNDARY]], [[INVERTED_MASK]]
 // CHECK-VOID_PTR-NEXT:    [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]]
 // CHECK-VOID_PTR-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[DIFF]]
-// CHECK-VOID_PTR-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 [[ALIGNMENT]]) ]
+// CHECK-VOID_PTR-NEXT:    [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1
+// CHECK-VOID_PTR-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64
+// CHECK-VOID_PTR-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]]
+// CHECK-VOID_PTR-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-VOID_PTR-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-VOID_PTR-NEXT:    ret i8* [[ALIGNED_RESULT]]
 //
 // CHECK-FLOAT_PTR-LABEL: define {{[^@]+}}@align_up
@@ -138,7 +142,11 @@ _Bool is_aligned(TYPE ptr, unsigned align) {
 // CHECK-FLOAT_PTR-NEXT:    [[TMP0:%.*]] = bitcast float* [[PTR]] to i8*
 // CHECK-FLOAT_PTR-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 [[DIFF]]
 // CHECK-FLOAT_PTR-NEXT:    [[TMP1:%.*]] = bitcast i8* [[ALIGNED_RESULT]] to float*
-// CHECK-FLOAT_PTR-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 [[ALIGNMENT]]) ]
+// CHECK-FLOAT_PTR-NEXT:    [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1
+// CHECK-FLOAT_PTR-NEXT:    [[PTRINT:%.*]] = ptrtoint float* [[TMP1]] to i64
+// CHECK-FLOAT_PTR-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]]
+// CHECK-FLOAT_PTR-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-FLOAT_PTR-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-FLOAT_PTR-NEXT:    ret float* [[TMP1]]
 //
 // CHECK-LONG-LABEL: define {{[^@]+}}@align_up
@@ -176,7 +184,11 @@ TYPE align_up(TYPE ptr, unsigned align) {
 // CHECK-VOID_PTR-NEXT:    [[ALIGNED_INTPTR:%.*]] = and i64 [[INTPTR]], [[INVERTED_MASK]]
 // CHECK-VOID_PTR-NEXT:    [[DIFF:%.*]] = sub i64 [[ALIGNED_INTPTR]], [[INTPTR]]
 // CHECK-VOID_PTR-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[PTR]], i64 [[DIFF]]
-// CHECK-VOID_PTR-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[ALIGNED_RESULT]], i64 [[ALIGNMENT]]) ]
+// CHECK-VOID_PTR-NEXT:    [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1
+// CHECK-VOID_PTR-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[ALIGNED_RESULT]] to i64
+// CHECK-VOID_PTR-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]]
+// CHECK-VOID_PTR-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-VOID_PTR-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-VOID_PTR-NEXT:    ret i8* [[ALIGNED_RESULT]]
 //
 // CHECK-FLOAT_PTR-LABEL: define {{[^@]+}}@align_down
@@ -191,7 +203,11 @@ TYPE align_up(TYPE ptr, unsigned align) {
 // CHECK-FLOAT_PTR-NEXT:    [[TMP0:%.*]] = bitcast float* [[PTR]] to i8*
 // CHECK-FLOAT_PTR-NEXT:    [[ALIGNED_RESULT:%.*]] = getelementptr inbounds i8, i8* [[TMP0]], i64 [[DIFF]]
 // CHECK-FLOAT_PTR-NEXT:    [[TMP1:%.*]] = bitcast i8* [[ALIGNED_RESULT]] to float*
-// CHECK-FLOAT_PTR-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[TMP1]], i64 [[ALIGNMENT]]) ]
+// CHECK-FLOAT_PTR-NEXT:    [[MASK1:%.*]] = sub i64 [[ALIGNMENT]], 1
+// CHECK-FLOAT_PTR-NEXT:    [[PTRINT:%.*]] = ptrtoint float* [[TMP1]] to i64
+// CHECK-FLOAT_PTR-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK1]]
+// CHECK-FLOAT_PTR-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-FLOAT_PTR-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-FLOAT_PTR-NEXT:    ret float* [[TMP1]]
 //
 // CHECK-LONG-LABEL: define {{[^@]+}}@align_down
diff --git a/clang/test/CodeGen/builtin-assume-aligned.c b/clang/test/CodeGen/builtin-assume-aligned.c
index b9f1ebfbdcf58..90693cc215200 100644
--- a/clang/test/CodeGen/builtin-assume-aligned.c
+++ b/clang/test/CodeGen/builtin-assume-aligned.c
@@ -8,7 +8,10 @@
 // CHECK-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32, i64 0) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
 // CHECK-NEXT:    store i32* [[TMP2]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
@@ -28,7 +31,10 @@ int test1(int *a) {
 // CHECK-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32, i64 0) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
 // CHECK-NEXT:    store i32* [[TMP2]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
@@ -48,7 +54,10 @@ int test2(int *a) {
 // CHECK-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
 // CHECK-NEXT:    store i32* [[TMP2]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
@@ -72,7 +81,11 @@ int test3(int *a) {
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[B_ADDR]], align 4
 // CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP2]] to i64
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 32, i64 [[CONV]]) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64
+// CHECK-NEXT:    [[OFFSETPTR:%.*]] = sub i64 [[PTRINT]], [[CONV]]
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 31
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP1]] to i32*
 // CHECK-NEXT:    store i32* [[TMP3]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32*, i32** [[A_ADDR]], align 8
@@ -102,7 +115,11 @@ int *m2() __attribute__((assume_aligned(64, 12)));
 // CHECK-LABEL: define {{[^@]+}}@test6() #0
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[CALL:%.*]] = call i32* (...) @m2()
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[CALL]], i64 64, i64 12) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[CALL]] to i64
+// CHECK-NEXT:    [[OFFSETPTR:%.*]] = sub i64 [[PTRINT]], 12
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 63
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[CALL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP0]]
 //
@@ -117,7 +134,10 @@ int test6() {
 // CHECK-NEXT:    store i32* [[A]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to i8*
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[TMP1]], i64 536870912) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[TMP1]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 536870911
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to i32*
 // CHECK-NEXT:    store i32* [[TMP2]], i32** [[A_ADDR]], align 8
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32*, i32** [[A_ADDR]], align 8
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp
index fb2b1a76116e9..96d264190bec7 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-lvalue.cpp
@@ -21,9 +21,9 @@ char **load_from_ac_struct(struct ac_struct *x) {
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load %[[STRUCT_AC_STRUCT]]*, %[[STRUCT_AC_STRUCT]]** %[[STRUCT_AC_STRUCT_ADDR]], align 8
   // CHECK:                             %[[A_ADDR:.*]] = getelementptr inbounds %[[STRUCT_AC_STRUCT]], %[[STRUCT_AC_STRUCT]]* %[[X_RELOADED]], i32 0, i32 0
   // CHECK:                             %[[A:.*]] = load i8**, i8*** %[[A_ADDR]], align 8
-  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8** %[[A]] to i64
-  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 2147483647
-  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8** %[[A]] to i64
+  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 2147483647
+  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8** %[[A]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -32,7 +32,7 @@ char **load_from_ac_struct(struct ac_struct *x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8** %[[A]], i64 2147483648) ]
+  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
   // CHECK-NEXT:                        ret i8** %[[A]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp
index 46f7d09ae2aa5..0e3fa750c66c3 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-align_value-on-paramvar.cpp
@@ -24,7 +24,7 @@ char **passthrough(__attribute__((align_value(0x80000000))) char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RELOADED]], i64 2147483648) ]
+  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 %[[MASKCOND]])
   // CHECK-NEXT:                        ret i8** %[[X_RELOADED]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp
index 40abbc3871996..591eaa0e13131 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function-variable.cpp
@@ -30,10 +30,10 @@ char **caller(char **x, unsigned long alignment) {
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[ALIGNMENT_RELOADED:.*]] = load i64, i64* %[[ALIGNMENT_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RETURNED:.*]] = call i8** @[[PASSTHROUGH]](i8** %[[X_RELOADED]], i64 %[[ALIGNMENT_RELOADED]])
-  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64
-  // CHECK-SANITIZE-NEXT:               %[[MASK:.*]] = sub i64 %[[ALIGNMENT_RELOADED]], 1
-  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], %[[MASK]]
-  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-NEXT:                        %[[MASK:.*]] = sub i64 %[[ALIGNMENT_RELOADED]], 1
+  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64
+  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], %[[MASK]]
+  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -42,7 +42,7 @@ char **caller(char **x, unsigned long alignment) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 %1) ]
+  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
   // CHECK-NEXT:                        ret i8** %[[X_RETURNED]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp
index 87d903c69716c..a41357933f918 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-alloc_align-on-function.cpp
@@ -39,7 +39,7 @@ char **caller(char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 128) ]
+  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 %[[MASKCOND]])
   // CHECK-NEXT:                        ret i8** %[[X_RETURNED]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp
index ecc96bcf6a53b..e78667ce16e06 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function-two-params.cpp
@@ -24,10 +24,10 @@ char **caller(char **x) {
   // CHECK-NEXT:                        store i8** %[[X]], i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RETURNED:.*]] = call i8** @[[PASSTHROUGH]](i8** %[[X_RELOADED]])
-  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64
-  // CHECK-SANITIZE-NEXT:               %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42
-  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 2147483647
-  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64
+  // CHECK-NEXT:                        %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42
+  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 2147483647
+  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8** %[[X_RETURNED]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -36,7 +36,7 @@ char **caller(char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 2147483648, i64 42) ]
+  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
   // CHECK-NEXT:                        ret i8** %[[X_RETURNED]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp
index 5bbc5843b89f8..f750bbd77d42f 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-attribute-assume_aligned-on-function.cpp
@@ -36,7 +36,7 @@ char **caller(char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 true) [ "align"(i8** %[[X_RETURNED]], i64 128) ]
+  // CHECK-SANITIZE-NEXT:               call void @llvm.assume(i1 %[[MASKCOND]])
   // CHECK-NEXT:                        ret i8** %[[X_RETURNED]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp
index 9c8944ba280b4..4306e322f5fb6 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params-variable.cpp
@@ -16,10 +16,10 @@ void *caller(char **x, unsigned long offset) {
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[BITCAST:.*]] = bitcast i8** %[[X_RELOADED]] to i8*
   // CHECK-NEXT:                        %[[OFFSET_RELOADED:.*]] = load i64, i64* %[[OFFSET_ADDR]], align 8
-  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
-  // CHECK-SANITIZE-NEXT:               %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], %[[OFFSET_RELOADED]]
-  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911
-  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
+  // CHECK-NEXT:                        %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], %[[OFFSET_RELOADED]]
+  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911
+  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[BITCAST]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -28,7 +28,7 @@ void *caller(char **x, unsigned long offset) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8* %[[BITCAST]], i64 536870912, i64 %[[OFFSET_RELOADED]]) ]
+  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
   // CHECK-NEXT:                        ret i8* %[[BITCAST]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp
index 9f61e08106a01..27f53e92bed89 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-three-params.cpp
@@ -13,10 +13,10 @@ void *caller(char **x) {
   // CHECK-NEXT:                        store i8** %[[X]], i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[BITCAST:.*]] = bitcast i8** %[[X_RELOADED]] to i8*
-  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
-  // CHECK-SANITIZE-NEXT:               %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42
-  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911
-  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
+  // CHECK-NEXT:                        %[[OFFSETPTR:.*]] = sub i64 %[[PTRINT]], 42
+  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[OFFSETPTR]], 536870911
+  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[BITCAST]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -25,7 +25,7 @@ void *caller(char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8* %[[BITCAST]], i64 536870912, i64 42) ]
+  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
   // CHECK-NEXT:                        ret i8* %[[BITCAST]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp
index 20bed646ff951..5412270f37619 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-builtin_assume_aligned-two-params.cpp
@@ -13,9 +13,9 @@ void *caller(char **x) {
   // CHECK-NEXT:                        store i8** %[[X]], i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[X_RELOADED:.*]] = load i8**, i8*** %[[X_ADDR]], align 8
   // CHECK-NEXT:                        %[[BITCAST:.*]] = bitcast i8** %[[X_RELOADED]] to i8*
-  // CHECK-SANITIZE-NEXT:               %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
-  // CHECK-SANITIZE-NEXT:               %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 536870911
-  // CHECK-SANITIZE-NEXT:               %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-NEXT:                        %[[PTRINT:.*]] = ptrtoint i8* %[[BITCAST]] to i64
+  // CHECK-NEXT:                        %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 536870911
+  // CHECK-NEXT:                        %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[BITCAST]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -24,7 +24,7 @@ void *caller(char **x) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8* %[[BITCAST]], i64 536870912) ]
+  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
   // CHECK-NEXT:                        ret i8* %[[BITCAST]]
   // CHECK-NEXT:                      }
 #line 100
diff --git a/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp b/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp
index 353f2fd7f17bd..6d75ee0858dac 100644
--- a/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp
+++ b/clang/test/CodeGen/catch-alignment-assumption-openmp.cpp
@@ -12,9 +12,9 @@ void func(char *data) {
   // CHECK-NEXT:   %[[DATA_ADDR:.*]] = alloca i8*, align 8
   // CHECK:   store i8* %[[DATA]], i8** %[[DATA_ADDR]], align 8
   // CHECK:   %[[DATA_RELOADED:.*]] = load i8*, i8** %[[DATA_ADDR]], align 8
-  // CHECK-SANITIZE-NEXT:   %[[PTRINT:.*]] = ptrtoint i8* %[[DATA_RELOADED]] to i64
-  // CHECK-SANITIZE-NEXT:   %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 1073741823
-  // CHECK-SANITIZE-NEXT:   %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
+  // CHECK-NEXT:   %[[PTRINT:.*]] = ptrtoint i8* %[[DATA_RELOADED]] to i64
+  // CHECK-NEXT:   %[[MASKEDPTR:.*]] = and i64 %[[PTRINT]], 1073741823
+  // CHECK-NEXT:   %[[MASKCOND:.*]] = icmp eq i64 %[[MASKEDPTR]], 0
   // CHECK-SANITIZE-NEXT:               %[[PTRINT_DUP:.*]] = ptrtoint i8* %[[DATA_RELOADED]] to i64, !nosanitize
   // CHECK-SANITIZE-NEXT:               br i1 %[[MASKCOND]], label %[[CONT:.*]], label %[[HANDLER_ALIGNMENT_ASSUMPTION:[^,]+]],{{.*}} !nosanitize
   // CHECK-SANITIZE:                  [[HANDLER_ALIGNMENT_ASSUMPTION]]:
@@ -23,7 +23,7 @@ void func(char *data) {
   // CHECK-SANITIZE-TRAP-NEXT:          call void @llvm.trap(){{.*}}, !nosanitize
   // CHECK-SANITIZE-UNREACHABLE-NEXT:   unreachable, !nosanitize
   // CHECK-SANITIZE:                  [[CONT]]:
-  // CHECK-NEXT:                        call void @llvm.assume(i1 true) [ "align"(i8* %[[DATA_RELOADED]], i64 1073741824) ]
+  // CHECK-NEXT:                        call void @llvm.assume(i1 %[[MASKCOND]])
 
 #line 100
 #pragma omp for simd aligned(data : 0x40000000)
diff --git a/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c b/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c
index b8ce1699f7ed0..9467f6228dfc4 100644
--- a/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c
+++ b/clang/test/CodeGen/non-power-of-2-alignment-assumptions.c
@@ -9,8 +9,12 @@ void *__attribute__((alloc_align(1))) alloc(int align);
 // CHECK-NEXT:    store i32 [[ALIGN:%.*]], i32* [[ALIGN_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ALIGN_ADDR]], align 4
 // CHECK-NEXT:    [[CALL:%.*]] = call i8* @alloc(i32 [[TMP0]])
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[CALL]], i64 [[TMP1]]) ]
+// CHECK-NEXT:    [[ALIGNMENTCAST:%.*]] = zext i32 [[TMP0]] to i64
+// CHECK-NEXT:    [[MASK:%.*]] = sub i64 [[ALIGNMENTCAST]], 1
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[CALL]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], [[MASK]]
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    ret void
 //
 void t0(int align) {
@@ -21,7 +25,10 @@ void t0(int align) {
 // CHECK-NEXT:    [[ALIGN_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store i32 [[ALIGN:%.*]], i32* [[ALIGN_ADDR]], align 4
 // CHECK-NEXT:    [[CALL:%.*]] = call i8* @alloc(i32 7)
-// CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i8* [[CALL]], i64 7) ]
+// CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i8* [[CALL]] to i64
+// CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 6
+// CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 // CHECK-NEXT:    ret void
 //
 void t1(int align) {
diff --git a/clang/test/OpenMP/simd_codegen.cpp b/clang/test/OpenMP/simd_codegen.cpp
index 335dfd78cacea..8ba87dce82fcb 100644
--- a/clang/test/OpenMP/simd_codegen.cpp
+++ b/clang/test/OpenMP/simd_codegen.cpp
@@ -817,9 +817,25 @@ void parallel_simd(float *a) {
 // TERM_DEBUG: !{{[0-9]+}} = !DILocation(line: [[@LINE-11]],
 
 // CHECK-LABEL: S8
+// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
+// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
+// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
+// CHECK-DAG: ptrtoint [[SS_TY]]* %{{.+}} to i64
+
+// CHECK-DAG: and i64 %{{.+}}, 15
+// CHECK-DAG: icmp eq i64 %{{.+}}, 0
 // CHECK-DAG: call void @llvm.assume(i1
+
+// CHECK-DAG: and i64 %{{.+}}, 7
+// CHECK-DAG: icmp eq i64 %{{.+}}, 0
 // CHECK-DAG: call void @llvm.assume(i1
+
+// CHECK-DAG: and i64 %{{.+}}, 15
+// CHECK-DAG: icmp eq i64 %{{.+}}, 0
 // CHECK-DAG: call void @llvm.assume(i1
+
+// CHECK-DAG: and i64 %{{.+}}, 3
+// CHECK-DAG: icmp eq i64 %{{.+}}, 0
 // CHECK-DAG: call void @llvm.assume(i1
 struct SS {
   SS(): a(0) {}
diff --git a/clang/test/OpenMP/simd_metadata.c b/clang/test/OpenMP/simd_metadata.c
index 18133e3b6c2e7..f0ae0200dd08e 100644
--- a/clang/test/OpenMP/simd_metadata.c
+++ b/clang/test/OpenMP/simd_metadata.c
@@ -21,21 +21,30 @@ void h1(float *c, float *a, double b[], int size)
 // CHECK-LABEL: define void @h1
   int t = 0;
 #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b)
-  // CHECK:         call void @llvm.assume(i1 true) [ "align"(float* [[PTR4:%.*]], {{i64|i32}} 32) ]
-  // CHECK-NEXT:    load
-
-  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
-  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 32) ]
-  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 64) ]
-  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
-  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
-  // CHECK-NEXT:     load
-
-  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
-  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
-  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 64) ]
-  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
-  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
+// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
+// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
+// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
+// CHECK:         [[A_PTRINT:%.+]] = ptrtoint
+
+// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
+// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
+// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+
+// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
+// CHECK:         [[B_PTRINT:%.+]] = ptrtoint
+
+// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
+// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
+// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
+// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+
+// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
   for (int i = 0; i < size; ++i) {
     c[i] = a[i] * a[i] + b[i] * b[t];
     ++t;
@@ -43,21 +52,30 @@ void h1(float *c, float *a, double b[], int size)
 // do not emit llvm.access.group metadata due to usage of safelen clause.
 // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}}
 #pragma omp simd safelen(16) linear(t) aligned(c:32) aligned(a,b) simdlen(8)
-  // CHECK:         call void @llvm.assume(i1 true) [ "align"(float* [[PTR4:%.*]], {{i64|i32}} 32) ]
-  // CHECK-NEXT:    load
-
-  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
-  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 32) ]
-  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 64) ]
-  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
-  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
-  // CHECK-NEXT:     load
-
-  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
-  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
-  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 64) ]
-  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
-  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
+// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
+// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
+// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
+// CHECK:         [[A_PTRINT:%.+]] = ptrtoint
+
+// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
+// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
+// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+
+// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
+// CHECK:         [[B_PTRINT:%.+]] = ptrtoint
+
+// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
+// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
+// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
+// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+
+// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
   for (int i = 0; i < size; ++i) {
     c[i] = a[i] * a[i] + b[i] * b[t];
     ++t;
@@ -65,21 +83,30 @@ void h1(float *c, float *a, double b[], int size)
 // do not emit llvm.access.group metadata due to usage of safelen clause.
 // CHECK-NOT: store float {{.+}}, float* {{.+}}, align {{.+}}, !llvm.access.group {{![0-9]+}}
 #pragma omp simd linear(t) aligned(c:32) aligned(a,b) simdlen(8)
-  // CHECK:         call void @llvm.assume(i1 true) [ "align"(float* [[PTR4:%.*]], {{i64|i32}} 32) ]
-  // CHECK-NEXT:    load
-
-  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
-  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 32) ]
-  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 64) ]
-  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
-  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(float* [[PTR5:%.*]], {{i64|i32}} 16) ]
-  // CHECK-NEXT:     load
-
-  // X86-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
-  // X86-AVX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
-  // X86-AVX512-NEXT:call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 64) ]
-  // PPC-NEXT:       call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 16) ]
-  // PPC-QPX-NEXT:   call void @llvm.assume(i1 true) [ "align"(double* [[PTR6:%.*]], {{i64|i32}} 32) ]
+// CHECK:         [[C_PTRINT:%.+]] = ptrtoint
+// CHECK-NEXT:    [[C_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[C_PTRINT]], 31
+// CHECK-NEXT:    [[C_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[C_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[C_MASKCOND]])
+// CHECK:         [[A_PTRINT:%.+]] = ptrtoint
+
+// X86-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+// X86-AVX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 31
+// X86-AVX512-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 63
+// PPC-NEXT:     [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+// PPC-QPX-NEXT: [[A_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[A_PTRINT]], 15
+
+// CHECK-NEXT:    [[A_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[A_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[A_MASKCOND]])
+// CHECK:         [[B_PTRINT:%.+]] = ptrtoint
+
+// X86-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
+// X86-AVX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+// X86-AVX512-NEXT: [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 63
+// PPC-NEXT:      [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 15
+// PPC-QPX-NEXT:  [[B_MASKEDPTR:%.+]] = and i{{[0-9]+}} [[B_PTRINT]], 31
+
+// CHECK-NEXT:    [[B_MASKCOND:%.+]] = icmp eq i{{[0-9]+}} [[B_MASKEDPTR]], 0
+// CHECK-NEXT:    call void @llvm.assume(i1 [[B_MASKCOND]])
   for (int i = 0; i < size; ++i) {
     c[i] = a[i] * a[i] + b[i] * b[t];
     ++t;
diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
index 7dff11951d9f8..d2031d6d214b1 100644
--- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_codegen.cpp
@@ -101,7 +101,10 @@ int target_teams_fun(int *g){
 
   // CK1: define internal void @[[OUTL1]]({{.+}})
   // CK1: [[ARRDECAY:%.+]] = getelementptr inbounds [1000 x i32], [1000 x i32]* %{{.+}}, i{{32|64}} 0, i{{32|64}} 0
-  // CK1: call void @llvm.assume(i1 true) [ "align"(i32* [[ARRDECAY]], {{i64|i32}} 8) ]
+  // CK1: [[ARR_CAST:%.+]] = ptrtoint i32* [[ARRDECAY]] to i{{32|64}}
+  // CK1: [[MASKED_PTR:%.+]] = and i{{32|64}} [[ARR_CAST]], 7
+  // CK1: [[COND:%.+]] = icmp eq i{{32|64}} [[MASKED_PTR]], 0
+  // CK1: call void @llvm.assume(i1 [[COND]])
   // CK1: call void @__kmpc_for_static_init_4(
   // CK1: call void {{.+}} @__kmpc_fork_call(
   // CK1: call void @__kmpc_for_static_fini(
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 4552ca016bd76..ffec4ff64ca66 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -782,11 +782,7 @@ class IRBuilderBase {
 
   /// Create an assume intrinsic call that allows the optimizer to
   /// assume that the provided condition will be true.
-  ///
-  /// The optional argument \p OpBundles specifies operand bundles that are
-  /// added to the call instruction.
-  CallInst *CreateAssumption(Value *Cond,
-                             ArrayRef<OperandBundleDef> OpBundles = llvm::None);
+  CallInst *CreateAssumption(Value *Cond);
 
   /// Create a call to the experimental.gc.statepoint intrinsic to
   /// start a new statepoint sequence.
@@ -2506,11 +2502,13 @@ class IRBuilderBase {
 
 private:
   /// Helper function that creates an assume intrinsic call that
-  /// represents an alignment assumption on the provided pointer \p PtrValue
-  /// with offset \p OffsetValue and alignment value \p AlignValue.
+  /// represents an alignment assumption on the provided Ptr, Mask, Type
+  /// and Offset. It may be sometimes useful to do some other logic
+  /// based on this alignment check, thus it can be stored into 'TheCheck'.
   CallInst *CreateAlignmentAssumptionHelper(const DataLayout &DL,
-                                            Value *PtrValue, Value *AlignValue,
-                                            Value *OffsetValue);
+                                            Value *PtrValue, Value *Mask,
+                                            Type *IntPtrTy, Value *OffsetValue,
+                                            Value **TheCheck);
 
 public:
   /// Create an assume intrinsic call that represents an alignment
@@ -2519,9 +2517,13 @@ class IRBuilderBase {
   /// An optional offset can be provided, and if it is provided, the offset
   /// must be subtracted from the provided pointer to get the pointer with the
   /// specified alignment.
+  ///
+  /// It may be sometimes useful to do some other logic
+  /// based on this alignment check, thus it can be stored into 'TheCheck'.
   CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
                                       unsigned Alignment,
-                                      Value *OffsetValue = nullptr);
+                                      Value *OffsetValue = nullptr,
+                                      Value **TheCheck = nullptr);
 
   /// Create an assume intrinsic call that represents an alignment
   /// assumption on the provided pointer.
@@ -2530,11 +2532,15 @@ class IRBuilderBase {
   /// must be subtracted from the provided pointer to get the pointer with the
   /// specified alignment.
   ///
+  /// It may be sometimes useful to do some other logic
+  /// based on this alignment check, thus it can be stored into 'TheCheck'.
+  ///
   /// This overload handles the condition where the Alignment is dependent
   /// on an existing value rather than a static value.
   CallInst *CreateAlignmentAssumption(const DataLayout &DL, Value *PtrValue,
                                       Value *Alignment,
-                                      Value *OffsetValue = nullptr);
+                                      Value *OffsetValue = nullptr,
+                                      Value **TheCheck = nullptr);
 };
 
 /// This provides a uniform API for creating instructions and inserting
diff --git a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
index 10b6e1c6a21b6..be119b8ab8552 100644
--- a/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
+++ b/llvm/include/llvm/Transforms/Scalar/AlignmentFromAssumptions.h
@@ -37,9 +37,9 @@ struct AlignmentFromAssumptionsPass
   ScalarEvolution *SE = nullptr;
   DominatorTree *DT = nullptr;
 
-  bool extractAlignmentInfo(CallInst *I, unsigned Idx, Value *&AAPtr,
-                            const SCEV *&AlignSCEV, const SCEV *&OffSCEV);
-  bool processAssumption(CallInst *I, unsigned Idx);
+  bool extractAlignmentInfo(CallInst *I, Value *&AAPtr, const SCEV *&AlignSCEV,
+                            const SCEV *&OffSCEV);
+  bool processAssumption(CallInst *I);
 };
 }
 
diff --git a/llvm/lib/Analysis/AssumeBundleQueries.cpp b/llvm/lib/Analysis/AssumeBundleQueries.cpp
index 05fe05a0bd851..972d0d3ea7f2b 100644
--- a/llvm/lib/Analysis/AssumeBundleQueries.cpp
+++ b/llvm/lib/Analysis/AssumeBundleQueries.cpp
@@ -108,17 +108,10 @@ llvm::getKnowledgeFromBundle(CallInst &Assume,
   Result.AttrKind = Attribute::getAttrKindFromName(BOI.Tag->getKey());
   if (bundleHasArgument(BOI, ABA_WasOn))
     Result.WasOn = getValueFromBundleOpInfo(Assume, BOI, ABA_WasOn);
-  auto GetArgOr1 = [&](unsigned Idx) -> unsigned {
-    if (auto *ConstInt = dyn_cast<ConstantInt>(
-            getValueFromBundleOpInfo(Assume, BOI, ABA_Argument + Idx)))
-      return ConstInt->getZExtValue();
-    return 1;
-  };
   if (BOI.End - BOI.Begin > ABA_Argument)
-    Result.ArgValue = GetArgOr1(0);
-  if (Result.AttrKind == Attribute::Alignment)
-    if (BOI.End - BOI.Begin > ABA_Argument + 1)
-      Result.ArgValue = MinAlign(Result.ArgValue, GetArgOr1(1));
+    Result.ArgValue =
+        cast<ConstantInt>(getValueFromBundleOpInfo(Assume, BOI, ABA_Argument))
+            ->getZExtValue();
   return Result;
 }
 
diff --git a/llvm/lib/IR/IRBuilder.cpp b/llvm/lib/IR/IRBuilder.cpp
index b87dfe1c8df65..1fffce015f707 100644
--- a/llvm/lib/IR/IRBuilder.cpp
+++ b/llvm/lib/IR/IRBuilder.cpp
@@ -71,9 +71,8 @@ Value *IRBuilderBase::getCastedInt8PtrValue(Value *Ptr) {
 static CallInst *createCallHelper(Function *Callee, ArrayRef<Value *> Ops,
                                   IRBuilderBase *Builder,
                                   const Twine &Name = "",
-                                  Instruction *FMFSource = nullptr,
-                                  ArrayRef<OperandBundleDef> OpBundles = {}) {
-  CallInst *CI = Builder->CreateCall(Callee, Ops, OpBundles, Name);
+                                  Instruction *FMFSource = nullptr) {
+  CallInst *CI = Builder->CreateCall(Callee, Ops, Name);
   if (FMFSource)
     CI->copyFastMathFlags(FMFSource);
   return CI;
@@ -450,16 +449,14 @@ CallInst *IRBuilderBase::CreateInvariantStart(Value *Ptr, ConstantInt *Size) {
   return createCallHelper(TheFn, Ops, this);
 }
 
-CallInst *
-IRBuilderBase::CreateAssumption(Value *Cond,
-                                ArrayRef<OperandBundleDef> OpBundles) {
+CallInst *IRBuilderBase::CreateAssumption(Value *Cond) {
   assert(Cond->getType() == getInt1Ty() &&
          "an assumption condition must be of type i1");
 
   Value *Ops[] = { Cond };
   Module *M = BB->getParent()->getParent();
   Function *FnAssume = Intrinsic::getDeclaration(M, Intrinsic::assume);
-  return createCallHelper(FnAssume, Ops, this, "", nullptr, OpBundles);
+  return createCallHelper(FnAssume, Ops, this);
 }
 
 /// Create a call to a Masked Load intrinsic.
@@ -1110,37 +1107,63 @@ Value *IRBuilderBase::CreatePreserveStructAccessIndex(
   return Fn;
 }
 
-CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper(const DataLayout &DL,
-                                                         Value *PtrValue,
-                                                         Value *AlignValue,
-                                                         Value *OffsetValue) {
-  SmallVector<Value *, 4> Vals({PtrValue, AlignValue});
-  if (OffsetValue)
-    Vals.push_back(OffsetValue);
-  OperandBundleDefT<Value *> AlignOpB("align", Vals);
-  return CreateAssumption(ConstantInt::getTrue(getContext()), {AlignOpB});
+CallInst *IRBuilderBase::CreateAlignmentAssumptionHelper(
+    const DataLayout &DL, Value *PtrValue, Value *Mask, Type *IntPtrTy,
+    Value *OffsetValue, Value **TheCheck) {
+  Value *PtrIntValue = CreatePtrToInt(PtrValue, IntPtrTy, "ptrint");
+
+  if (OffsetValue) {
+    bool IsOffsetZero = false;
+    if (const auto *CI = dyn_cast<ConstantInt>(OffsetValue))
+      IsOffsetZero = CI->isZero();
+
+    if (!IsOffsetZero) {
+      if (OffsetValue->getType() != IntPtrTy)
+        OffsetValue = CreateIntCast(OffsetValue, IntPtrTy, /*isSigned*/ true,
+                                    "offsetcast");
+      PtrIntValue = CreateSub(PtrIntValue, OffsetValue, "offsetptr");
+    }
+  }
+
+  Value *Zero = ConstantInt::get(IntPtrTy, 0);
+  Value *MaskedPtr = CreateAnd(PtrIntValue, Mask, "maskedptr");
+  Value *InvCond = CreateICmpEQ(MaskedPtr, Zero, "maskcond");
+  if (TheCheck)
+    *TheCheck = InvCond;
+
+  return CreateAssumption(InvCond);
 }
 
-CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL,
-                                                   Value *PtrValue,
-                                                   unsigned Alignment,
-                                                   Value *OffsetValue) {
+CallInst *IRBuilderBase::CreateAlignmentAssumption(
+    const DataLayout &DL, Value *PtrValue, unsigned Alignment,
+    Value *OffsetValue, Value **TheCheck) {
   assert(isa<PointerType>(PtrValue->getType()) &&
          "trying to create an alignment assumption on a non-pointer?");
   assert(Alignment != 0 && "Invalid Alignment");
   auto *PtrTy = cast<PointerType>(PtrValue->getType());
   Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
-  Value *AlignValue = ConstantInt::get(IntPtrTy, Alignment);
-  return CreateAlignmentAssumptionHelper(DL, PtrValue, AlignValue, OffsetValue);
+
+  Value *Mask = ConstantInt::get(IntPtrTy, Alignment - 1);
+  return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
+                                         OffsetValue, TheCheck);
 }
 
-CallInst *IRBuilderBase::CreateAlignmentAssumption(const DataLayout &DL,
-                                                   Value *PtrValue,
-                                                   Value *Alignment,
-                                                   Value *OffsetValue) {
+CallInst *IRBuilderBase::CreateAlignmentAssumption(
+    const DataLayout &DL, Value *PtrValue, Value *Alignment,
+    Value *OffsetValue, Value **TheCheck) {
   assert(isa<PointerType>(PtrValue->getType()) &&
          "trying to create an alignment assumption on a non-pointer?");
-  return CreateAlignmentAssumptionHelper(DL, PtrValue, Alignment, OffsetValue);
+  auto *PtrTy = cast<PointerType>(PtrValue->getType());
+  Type *IntPtrTy = getIntPtrTy(DL, PtrTy->getAddressSpace());
+
+  if (Alignment->getType() != IntPtrTy)
+    Alignment = CreateIntCast(Alignment, IntPtrTy, /*isSigned*/ false,
+                              "alignmentcast");
+
+  Value *Mask = CreateSub(Alignment, ConstantInt::get(IntPtrTy, 1), "mask");
+
+  return CreateAlignmentAssumptionHelper(DL, PtrValue, Mask, IntPtrTy,
+                                         OffsetValue, TheCheck);
 }
 
 IRBuilderDefaultInserter::~IRBuilderDefaultInserter() {}
diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp
index 6df1072925f92..c518ae87ea9b0 100644
--- a/llvm/lib/IR/Verifier.cpp
+++ b/llvm/lib/IR/Verifier.cpp
@@ -4449,32 +4449,21 @@ void Verifier::visitIntrinsicCall(Intrinsic::ID ID, CallBase &Call) {
       Assert(Elem.Tag->getKey() == "ignore" ||
                  Attribute::isExistingAttribute(Elem.Tag->getKey()),
              "tags must be valid attribute names");
+      Assert(Elem.End - Elem.Begin <= 2, "to many arguments");
       Attribute::AttrKind Kind =
           Attribute::getAttrKindFromName(Elem.Tag->getKey());
-      unsigned ArgCount = Elem.End - Elem.Begin;
-      if (Kind == Attribute::Alignment) {
-        Assert(ArgCount <= 3 && ArgCount >= 2,
-               "alignment assumptions should have 2 or 3 arguments");
-        Assert(Call.getOperand(Elem.Begin)->getType()->isPointerTy(),
-               "first argument should be a pointer");
-        Assert(Call.getOperand(Elem.Begin + 1)->getType()->isIntegerTy(),
-               "second argument should be an integer");
-        if (ArgCount == 3)
-          Assert(Call.getOperand(Elem.Begin + 2)->getType()->isIntegerTy(),
-                 "third argument should be an integer if present");
-        return;
-      }
-      Assert(ArgCount <= 2, "to many arguments");
       if (Kind == Attribute::None)
         break;
       if (Attribute::doesAttrKindHaveArgument(Kind)) {
-        Assert(ArgCount == 2, "this attribute should have 2 arguments");
+        Assert(Elem.End - Elem.Begin == 2,
+               "this attribute should have 2 arguments");
         Assert(isa<ConstantInt>(Call.getOperand(Elem.Begin + 1)),
                "the second argument should be a constant integral value");
       } else if (isFuncOnlyAttr(Kind)) {
-        Assert((ArgCount) == 0, "this attribute has no argument");
+        Assert((Elem.End - Elem.Begin) == 0, "this attribute has no argument");
       } else if (!isFuncOrArgAttr(Kind)) {
-        Assert((ArgCount) == 1, "this attribute should have one argument");
+        Assert((Elem.End - Elem.Begin) == 1,
+               "this attribute should have one argument");
       }
     }
     break;
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index c734c9a68fb2d..836af6234ad5c 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4220,16 +4220,11 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     break;
   case Intrinsic::assume: {
     Value *IIOperand = II->getArgOperand(0);
-    SmallVector<OperandBundleDef, 4> OpBundles;
-    II->getOperandBundlesAsDefs(OpBundles);
-    bool HasOpBundles = !OpBundles.empty();
     // Remove an assume if it is followed by an identical assume.
     // TODO: Do we need this? Unless there are conflicting assumptions, the
     // computeKnownBits(IIOperand) below here eliminates redundant assumes.
     Instruction *Next = II->getNextNonDebugInstruction();
-    if (HasOpBundles &&
-        match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))) &&
-        !cast<IntrinsicInst>(Next)->hasOperandBundles())
+    if (match(Next, m_Intrinsic<Intrinsic::assume>(m_Specific(IIOperand))))
       return eraseInstFromFunction(CI);
 
     // Canonicalize assume(a && b) -> assume(a); assume(b);
@@ -4239,15 +4234,14 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
     Value *AssumeIntrinsic = II->getCalledOperand();
     Value *A, *B;
     if (match(IIOperand, m_And(m_Value(A), m_Value(B)))) {
-      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, OpBundles,
-                         II->getName());
+      Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, A, II->getName());
       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic, B, II->getName());
       return eraseInstFromFunction(*II);
     }
     // assume(!(a || b)) -> assume(!a); assume(!b);
     if (match(IIOperand, m_Not(m_Or(m_Value(A), m_Value(B))))) {
       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
-                         Builder.CreateNot(A), OpBundles, II->getName());
+                         Builder.CreateNot(A), II->getName());
       Builder.CreateCall(AssumeIntrinsicTy, AssumeIntrinsic,
                          Builder.CreateNot(B), II->getName());
       return eraseInstFromFunction(*II);
@@ -4263,8 +4257,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
         isValidAssumeForContext(II, LHS, &DT)) {
       MDNode *MD = MDNode::get(II->getContext(), None);
       LHS->setMetadata(LLVMContext::MD_nonnull, MD);
-      if (!HasOpBundles)
-        return eraseInstFromFunction(*II);
+      return eraseInstFromFunction(*II);
 
       // TODO: apply nonnull return attributes to calls and invokes
       // TODO: apply range metadata for range check patterns?
diff --git a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
index bccf94fc217fe..5c008585869cd 100644
--- a/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
+++ b/llvm/lib/Transforms/Scalar/AlignmentFromAssumptions.cpp
@@ -15,7 +15,6 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/Instructions.h"
 #include "llvm/InitializePasses.h"
 #define AA_NAME "alignment-from-assumptions"
 #define DEBUG_TYPE AA_NAME
@@ -204,33 +203,103 @@ static Align getNewAlignment(const SCEV *AASCEV, const SCEV *AlignSCEV,
 }
 
 bool AlignmentFromAssumptionsPass::extractAlignmentInfo(CallInst *I,
-                                                        unsigned Idx,
                                                         Value *&AAPtr,
                                                         const SCEV *&AlignSCEV,
                                                         const SCEV *&OffSCEV) {
-  Type *Int64Ty = Type::getInt64Ty(I->getContext());
-  OperandBundleUse AlignOB = I->getOperandBundleAt(Idx);
-  if (AlignOB.getTagName() != "align")
+  // An alignment assume must be a statement about the least-significant
+  // bits of the pointer being zero, possibly with some offset.
+  ICmpInst *ICI = dyn_cast<ICmpInst>(I->getArgOperand(0));
+  if (!ICI)
     return false;
-  assert(AlignOB.Inputs.size() >= 2);
-  AAPtr = AlignOB.Inputs[0].get();
-  // TODO: Consider accumulating the offset to the base.
-  AAPtr = AAPtr->stripPointerCastsSameRepresentation();
-  AlignSCEV = SE->getSCEV(AlignOB.Inputs[1].get());
-  AlignSCEV = SE->getTruncateOrZeroExtend(AlignSCEV, Int64Ty);
-  if (AlignOB.Inputs.size() == 3)
-    OffSCEV = SE->getSCEV(AlignOB.Inputs[2].get());
-  else
+
+  // This must be an expression of the form: x & m == 0.
+  if (ICI->getPredicate() != ICmpInst::ICMP_EQ)
+    return false;
+
+  // Swap things around so that the RHS is 0.
+  Value *CmpLHS = ICI->getOperand(0);
+  Value *CmpRHS = ICI->getOperand(1);
+  const SCEV *CmpLHSSCEV = SE->getSCEV(CmpLHS);
+  const SCEV *CmpRHSSCEV = SE->getSCEV(CmpRHS);
+  if (CmpLHSSCEV->isZero())
+    std::swap(CmpLHS, CmpRHS);
+  else if (!CmpRHSSCEV->isZero())
+    return false;
+
+  BinaryOperator *CmpBO = dyn_cast<BinaryOperator>(CmpLHS);
+  if (!CmpBO || CmpBO->getOpcode() != Instruction::And)
+    return false;
+
+  // Swap things around so that the right operand of the and is a constant
+  // (the mask); we cannot deal with variable masks.
+  Value *AndLHS = CmpBO->getOperand(0);
+  Value *AndRHS = CmpBO->getOperand(1);
+  const SCEV *AndLHSSCEV = SE->getSCEV(AndLHS);
+  const SCEV *AndRHSSCEV = SE->getSCEV(AndRHS);
+  if (isa<SCEVConstant>(AndLHSSCEV)) {
+    std::swap(AndLHS, AndRHS);
+    std::swap(AndLHSSCEV, AndRHSSCEV);
+  }
+
+  const SCEVConstant *MaskSCEV = dyn_cast<SCEVConstant>(AndRHSSCEV);
+  if (!MaskSCEV)
+    return false;
+
+  // The mask must have some trailing ones (otherwise the condition is
+  // trivial and tells us nothing about the alignment of the left operand).
+  unsigned TrailingOnes = MaskSCEV->getAPInt().countTrailingOnes();
+  if (!TrailingOnes)
+    return false;
+
+  // Cap the alignment at the maximum with which LLVM can deal (and make sure
+  // we don't overflow the shift).
+  uint64_t Alignment;
+  TrailingOnes = std::min(TrailingOnes,
+    unsigned(sizeof(unsigned) * CHAR_BIT - 1));
+  Alignment = std::min(1u << TrailingOnes, +Value::MaximumAlignment);
+
+  Type *Int64Ty = Type::getInt64Ty(I->getParent()->getParent()->getContext());
+  AlignSCEV = SE->getConstant(Int64Ty, Alignment);
+
+  // The LHS might be a ptrtoint instruction, or it might be the pointer
+  // with an offset.
+  AAPtr = nullptr;
+  OffSCEV = nullptr;
+  if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(AndLHS)) {
+    AAPtr = PToI->getPointerOperand();
     OffSCEV = SE->getZero(Int64Ty);
-  OffSCEV = SE->getTruncateOrZeroExtend(OffSCEV, Int64Ty);
+  } else if (const SCEVAddExpr* AndLHSAddSCEV =
+             dyn_cast<SCEVAddExpr>(AndLHSSCEV)) {
+    // Try to find the ptrtoint; subtract it and the rest is the offset.
+    for (SCEVAddExpr::op_iterator J = AndLHSAddSCEV->op_begin(),
+         JE = AndLHSAddSCEV->op_end(); J != JE; ++J)
+      if (const SCEVUnknown *OpUnk = dyn_cast<SCEVUnknown>(*J))
+        if (PtrToIntInst *PToI = dyn_cast<PtrToIntInst>(OpUnk->getValue())) {
+          AAPtr = PToI->getPointerOperand();
+          OffSCEV = SE->getMinusSCEV(AndLHSAddSCEV, *J);
+          break;
+        }
+  }
+
+  if (!AAPtr)
+    return false;
+
+  // Sign extend the offset to 64 bits (so that it is like all of the other
+  // expressions).
+  unsigned OffSCEVBits = OffSCEV->getType()->getPrimitiveSizeInBits();
+  if (OffSCEVBits < 64)
+    OffSCEV = SE->getSignExtendExpr(OffSCEV, Int64Ty);
+  else if (OffSCEVBits > 64)
+    return false;
+
+  AAPtr = AAPtr->stripPointerCasts();
   return true;
 }
 
-bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
-                                                     unsigned Idx) {
+bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall) {
   Value *AAPtr;
   const SCEV *AlignSCEV, *OffSCEV;
-  if (!extractAlignmentInfo(ACall, Idx, AAPtr, AlignSCEV, OffSCEV))
+  if (!extractAlignmentInfo(ACall, AAPtr, AlignSCEV, OffSCEV))
     return false;
 
   // Skip ConstantPointerNull and UndefValue.  Assumptions on these shouldn't
@@ -248,14 +317,13 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
       continue;
 
     if (Instruction *K = dyn_cast<Instruction>(J))
+      if (isValidAssumeForContext(ACall, K, DT))
         WorkList.push_back(K);
   }
 
   while (!WorkList.empty()) {
     Instruction *J = WorkList.pop_back_val();
     if (LoadInst *LI = dyn_cast<LoadInst>(J)) {
-      if (!isValidAssumeForContext(ACall, J, DT))
-        continue;
       Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
                                            LI->getPointerOperand(), SE);
       if (NewAlignment > LI->getAlign()) {
@@ -263,8 +331,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
         ++NumLoadAlignChanged;
       }
     } else if (StoreInst *SI = dyn_cast<StoreInst>(J)) {
-      if (!isValidAssumeForContext(ACall, J, DT))
-        continue;
       Align NewAlignment = getNewAlignment(AASCEV, AlignSCEV, OffSCEV,
                                            SI->getPointerOperand(), SE);
       if (NewAlignment > SI->getAlign()) {
@@ -272,8 +338,6 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
         ++NumStoreAlignChanged;
       }
     } else if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(J)) {
-      if (!isValidAssumeForContext(ACall, J, DT))
-        continue;
       Align NewDestAlignment =
           getNewAlignment(AASCEV, AlignSCEV, OffSCEV, MI->getDest(), SE);
 
@@ -305,7 +369,7 @@ bool AlignmentFromAssumptionsPass::processAssumption(CallInst *ACall,
     Visited.insert(J);
     for (User *UJ : J->users()) {
       Instruction *K = cast<Instruction>(UJ);
-      if (!Visited.count(K))
+      if (!Visited.count(K) && isValidAssumeForContext(ACall, K, DT))
         WorkList.push_back(K);
     }
   }
@@ -332,11 +396,8 @@ bool AlignmentFromAssumptionsPass::runImpl(Function &F, AssumptionCache &AC,
 
   bool Changed = false;
   for (auto &AssumeVH : AC.assumptions())
-    if (AssumeVH) {
-      CallInst *Call = cast<CallInst>(AssumeVH);
-      for (unsigned Idx = 0; Idx < Call->getNumOperandBundles(); Idx++)
-        Changed |= processAssumption(Call, Idx);
-    }
+    if (AssumeVH)
+      Changed |= processAssumption(cast<CallInst>(AssumeVH));
 
   return Changed;
 }
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
index 610fd448c3b98..14e764f042c7a 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple.ll
@@ -4,7 +4,10 @@ target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
 
 define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %0 = load i32, i32* %a, align 4
   ret i32 %0
 
@@ -15,7 +18,11 @@ entry:
 
 define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 24)]
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 24
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 2
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
@@ -27,7 +34,11 @@ entry:
 
 define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 28)]
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 28
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
@@ -39,7 +50,10 @@ entry:
 
 define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i32 32, i32 0)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %0 = load i32, i32* %a, align 4
   ret i32 %0
 
@@ -50,7 +64,10 @@ entry:
 
 define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i32 0)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -81,7 +98,10 @@ for.end:                                          ; preds = %for.body
 ;         load(a, i0+i1+i2+32)
 define void @hoo2(i32* nocapture %a, i64 %id, i64 %num) nounwind uwtable readonly {
 entry:
-  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i64 0)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %id.mul = shl nsw i64 %id, 6
   %num.mul = shl nsw i64 %num, 6
   br label %for0.body
@@ -127,7 +147,10 @@ return:
 
 define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i8 0)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -152,13 +175,16 @@ for.end:                                          ; preds = %for.body
 
 define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
 entry:
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
   %r.06 = phi i32 [ 0, %entry ], [ %add, %for.body ]
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
-  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i8 32, i8 0)]
   %0 = load i32, i32* %arrayidx, align 4
   %add = add nsw i32 %0, %r.06
   %indvars.iv.next = add i64 %indvars.iv, 4
@@ -177,7 +203,10 @@ for.end:                                          ; preds = %for.body
 
 define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
 entry:
-  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i128 32, i128 0)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -202,7 +231,10 @@ for.end:                                          ; preds = %for.body
 
 define i32 @moo(i32* nocapture %a) nounwind uwtable {
 entry:
-  tail call void @llvm.assume(i1 true) ["align"(i32* %a, i16 32)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %0 = bitcast i32* %a to i8*
   tail call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 64, i1 false)
   ret i32 undef
@@ -214,9 +246,15 @@ entry:
 
 define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
 entry:
-  tail call void @llvm.assume(i1 true) ["align"(i32* %b, i32 128)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr3 = and i64 %ptrint1, 127
+  %maskcond4 = icmp eq i64 %maskedptr3, 0
+  tail call void @llvm.assume(i1 %maskcond4)
   %0 = bitcast i32* %a to i8*
-  tail call void @llvm.assume(i1 true) ["align"(i8* %0, i16 32)]
   %1 = bitcast i32* %b to i8*
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false)
   ret i32 undef
@@ -226,19 +264,6 @@ entry:
 ; CHECK: ret i32 undef
 }
 
-define i32 @moo3(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
-entry:
-  %0 = bitcast i32* %a to i8*
-  tail call void @llvm.assume(i1 true) ["align"(i8* %0, i16 32), "align"(i32* %b, i32 128)]
-  %1 = bitcast i32* %b to i8*
-  tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false)
-  ret i32 undef
-
-; CHECK-LABEL: @moo3
-; CHECK: @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 %0, i8* align 128 %1, i64 64, i1 false)
-; CHECK: ret i32 undef
-}
-
 declare void @llvm.assume(i1) nounwind
 
 declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1) nounwind
diff --git a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
index 453899c15c4fb..3f0819e3641b3 100644
--- a/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
+++ b/llvm/test/Transforms/AlignmentFromAssumptions/simple32.ll
@@ -7,12 +7,18 @@ define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@foo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 32
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
-  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %0 = load i32, i32* %a, align 4
   ret i32 %0
 
@@ -22,13 +28,21 @@ define i32 @foo2(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@foo2
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32, i64 24) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[OFFSETPTR:%.*]] = add i64 [[PTRINT]], 24
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 2
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 16
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
-  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i64 24)]
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 24
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 2
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
@@ -39,13 +53,21 @@ define i32 @foo2a(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@foo2a
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32, i64 28) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[OFFSETPTR:%.*]] = add i64 [[PTRINT]], 28
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[OFFSETPTR]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 -1
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 32
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
-  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32, i64 28)]
+  %ptrint = ptrtoint i32* %a to i64
+  %offsetptr = add i64 %ptrint, 28
+  %maskedptr = and i64 %offsetptr, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %arrayidx = getelementptr inbounds i32, i32* %a, i64 -1
   %0 = load i32, i32* %arrayidx, align 4
   ret i32 %0
@@ -56,12 +78,18 @@ define i32 @goo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@goo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 32
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
 entry:
-  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %0 = load i32, i32* %a, align 4
   ret i32 %0
 
@@ -71,7 +99,10 @@ define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@hoo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -88,7 +119,10 @@ define i32 @hoo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-NEXT:    ret i32 [[ADD_LCSSA]]
 ;
 entry:
-  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -112,7 +146,10 @@ define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@joo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 4, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -129,7 +166,10 @@ define i32 @joo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-NEXT:    ret i32 [[ADD_LCSSA]]
 ;
 entry:
-  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -153,7 +193,10 @@ define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@koo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -170,7 +213,10 @@ define i32 @koo(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-NEXT:    ret i32 [[ADD_LCSSA]]
 ;
 entry:
-  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -194,7 +240,10 @@ define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-LABEL: define {{[^@]+}}@koo2
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ -4, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
@@ -211,7 +260,10 @@ define i32 @koo2(i32* nocapture %a) nounwind uwtable readonly {
 ; CHECK-NEXT:    ret i32 [[ADD_LCSSA]]
 ;
 entry:
-  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
@@ -235,13 +287,19 @@ define i32 @moo(i32* nocapture %a) nounwind uwtable {
 ; CHECK-LABEL: define {{[^@]+}}@moo
 ; CHECK-SAME: (i32* nocapture [[A:%.*]]) #1
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to i8*
 ; CHECK-NEXT:    tail call void @llvm.memset.p0i8.i64(i8* align 32 [[TMP0]], i8 0, i64 64, i1 false)
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
   %0 = bitcast i32* %a to i8*
   tail call void @llvm.memset.p0i8.i64(i8* align 4 %0, i8 0, i64 64, i1 false)
   ret i32 undef
@@ -252,16 +310,28 @@ define i32 @moo2(i32* nocapture %a, i32* nocapture %b) nounwind uwtable {
 ; CHECK-LABEL: define {{[^@]+}}@moo2
 ; CHECK-SAME: (i32* nocapture [[A:%.*]], i32* nocapture [[B:%.*]]) #1
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[A]], i64 32) ]
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(i32* [[B]], i64 128) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint i32* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 31
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    [[PTRINT1:%.*]] = ptrtoint i32* [[B]] to i64
+; CHECK-NEXT:    [[MASKEDPTR3:%.*]] = and i64 [[PTRINT1]], 127
+; CHECK-NEXT:    [[MASKCOND4:%.*]] = icmp eq i64 [[MASKEDPTR3]], 0
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND4]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[A]] to i8*
 ; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK-NEXT:    tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 32 [[TMP0]], i8* align 128 [[TMP1]], i64 64, i1 false)
 ; CHECK-NEXT:    ret i32 undef
 ;
 entry:
-  call void @llvm.assume(i1 true) ["align"(i32* %a, i64 32)]
-  call void @llvm.assume(i1 true) ["align"(i32* %b, i64 128)]
+  %ptrint = ptrtoint i32* %a to i64
+  %maskedptr = and i64 %ptrint, 31
+  %maskcond = icmp eq i64 %maskedptr, 0
+  tail call void @llvm.assume(i1 %maskcond)
+  %ptrint1 = ptrtoint i32* %b to i64
+  %maskedptr3 = and i64 %ptrint1, 127
+  %maskcond4 = icmp eq i64 %maskedptr3, 0
+  tail call void @llvm.assume(i1 %maskcond4)
   %0 = bitcast i32* %a to i8*
   %1 = bitcast i32* %b to i8*
   tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 %0, i8* align 4 %1, i64 64, i1 false)
diff --git a/llvm/test/Transforms/Inline/align.ll b/llvm/test/Transforms/Inline/align.ll
index f3a5184564850..ede6c3fa7bcf4 100644
--- a/llvm/test/Transforms/Inline/align.ll
+++ b/llvm/test/Transforms/Inline/align.ll
@@ -23,7 +23,10 @@ define void @foo(float* nocapture %a, float* nocapture readonly %c) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@foo
 ; CHECK-SAME: (float* nocapture [[A:%.*]], float* nocapture readonly [[C:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[A]], i64 128) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint float* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 127
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[C]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5
 ; CHECK-NEXT:    store float [[TMP0]], float* [[ARRAYIDX_I]], align 4
@@ -84,8 +87,14 @@ define void @foo2(float* nocapture %a, float* nocapture %b, float* nocapture rea
 ; CHECK-LABEL: define {{[^@]+}}@foo2
 ; CHECK-SAME: (float* nocapture [[A:%.*]], float* nocapture [[B:%.*]], float* nocapture readonly [[C:%.*]]) #0
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[A]], i64 128) ]
-; CHECK-NEXT:    call void @llvm.assume(i1 true) [ "align"(float* [[B]], i64 128) ]
+; CHECK-NEXT:    [[PTRINT:%.*]] = ptrtoint float* [[A]] to i64
+; CHECK-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 127
+; CHECK-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+; CHECK-NEXT:    [[PTRINT1:%.*]] = ptrtoint float* [[B]] to i64
+; CHECK-NEXT:    [[MASKEDPTR2:%.*]] = and i64 [[PTRINT1]], 127
+; CHECK-NEXT:    [[MASKCOND3:%.*]] = icmp eq i64 [[MASKEDPTR2]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[MASKCOND3]])
 ; CHECK-NEXT:    [[TMP0:%.*]] = load float, float* [[C]], align 4
 ; CHECK-NEXT:    [[ARRAYIDX_I:%.*]] = getelementptr inbounds float, float* [[A]], i64 5
 ; CHECK-NEXT:    store float [[TMP0]], float* [[ARRAYIDX_I]], align 4
diff --git a/llvm/test/Transforms/InstCombine/assume.ll b/llvm/test/Transforms/InstCombine/assume.ll
index b372f52a2cdf0..6f33e83ee3362 100644
--- a/llvm/test/Transforms/InstCombine/assume.ll
+++ b/llvm/test/Transforms/InstCombine/assume.ll
@@ -377,7 +377,6 @@ define i32 @assumption_conflicts_with_known_bits(i32 %a, i32 %b) {
 define void @debug_interference(i8 %x) {
 ; CHECK-LABEL: @debug_interference(
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i8 [[X:%.*]], 0
-; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
 ; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9
 ; CHECK-NEXT:    tail call void @llvm.assume(i1 false)
 ; CHECK-NEXT:    tail call void @llvm.dbg.value(metadata i32 5, metadata !7, metadata !DIExpression()), !dbg !9
diff --git a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll
index 2605701d231d2..61287e35005ff 100644
--- a/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll
+++ b/llvm/test/Transforms/PhaseOrdering/inlining-alignment-assumptions.ll
@@ -41,7 +41,10 @@ define void @caller1(i1 %c, i64* align 1 %ptr) {
 ; ASSUMPTIONS-ON-NEXT:    br i1 [[C:%.*]], label [[TRUE2_CRITEDGE:%.*]], label [[FALSE1:%.*]]
 ; ASSUMPTIONS-ON:       false1:
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 1, i64* [[PTR:%.*]], align 8
-; ASSUMPTIONS-ON-NEXT:    call void @llvm.assume(i1 true) [ "align"(i64* [[PTR]], i64 8) ]
+; ASSUMPTIONS-ON-NEXT:    [[PTRINT:%.*]] = ptrtoint i64* [[PTR]] to i64
+; ASSUMPTIONS-ON-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7
+; ASSUMPTIONS-ON-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; ASSUMPTIONS-ON-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND]])
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 0, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
@@ -51,7 +54,10 @@ define void @caller1(i1 %c, i64* align 1 %ptr) {
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 3, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    ret void
 ; ASSUMPTIONS-ON:       true2.critedge:
-; ASSUMPTIONS-ON-NEXT:    call void @llvm.assume(i1 true) [ "align"(i64* [[PTR]], i64 8) ]
+; ASSUMPTIONS-ON-NEXT:    [[PTRINT_C:%.*]] = ptrtoint i64* [[PTR]] to i64
+; ASSUMPTIONS-ON-NEXT:    [[MASKEDPTR_C:%.*]] = and i64 [[PTRINT_C]], 7
+; ASSUMPTIONS-ON-NEXT:    [[MASKCOND_C:%.*]] = icmp eq i64 [[MASKEDPTR_C]], 0
+; ASSUMPTIONS-ON-NEXT:    tail call void @llvm.assume(i1 [[MASKCOND_C]])
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 0, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
 ; ASSUMPTIONS-ON-NEXT:    store volatile i64 -1, i64* [[PTR]], align 8
@@ -88,17 +94,26 @@ false2:
 ; This test checks that alignment assumptions do not prevent SROA.
 ; See PR45763.
 
-define internal void @callee2(i64* noalias sret align 32 %arg) {
+define internal void @callee2(i64* noalias sret align 8 %arg) {
   store i64 0, i64* %arg, align 8
   ret void
 }
 
 define amdgpu_kernel void @caller2() {
-; CHECK-LABEL: @caller2(
-; CHECK-NEXT:    ret void
+; ASSUMPTIONS-OFF-LABEL: @caller2(
+; ASSUMPTIONS-OFF-NEXT:    ret void
+;
+; ASSUMPTIONS-ON-LABEL: @caller2(
+; ASSUMPTIONS-ON-NEXT:    [[ALLOCA:%.*]] = alloca i64, align 8, addrspace(5)
+; ASSUMPTIONS-ON-NEXT:    [[CAST:%.*]] = addrspacecast i64 addrspace(5)* [[ALLOCA]] to i64*
+; ASSUMPTIONS-ON-NEXT:    [[PTRINT:%.*]] = ptrtoint i64* [[CAST]] to i64
+; ASSUMPTIONS-ON-NEXT:    [[MASKEDPTR:%.*]] = and i64 [[PTRINT]], 7
+; ASSUMPTIONS-ON-NEXT:    [[MASKCOND:%.*]] = icmp eq i64 [[MASKEDPTR]], 0
+; ASSUMPTIONS-ON-NEXT:    call void @llvm.assume(i1 [[MASKCOND]])
+; ASSUMPTIONS-ON-NEXT:    ret void
 ;
   %alloca = alloca i64, align 8, addrspace(5)
   %cast = addrspacecast i64 addrspace(5)* %alloca to i64*
-  call void @callee2(i64* sret align 32 %cast)
+  call void @callee2(i64* sret align 8 %cast)
   ret void
 }
diff --git a/llvm/test/Verifier/assume-bundles.ll b/llvm/test/Verifier/assume-bundles.ll
index 6e260f25129ee..302421715c797 100644
--- a/llvm/test/Verifier/assume-bundles.ll
+++ b/llvm/test/Verifier/assume-bundles.ll
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: not opt -verify < %s 2>&1 | FileCheck %s
 
 declare void @llvm.assume(i1)
@@ -7,21 +6,14 @@ define void @func(i32* %P, i32 %P1, i32* %P2, i32* %P3) {
 ; CHECK: tags must be valid attribute names
   call void @llvm.assume(i1 true) ["adazdazd"()]
 ; CHECK: the second argument should be a constant integral value
-  call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P, i32 %P1)]
+  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1)]
 ; CHECK: to many arguments
-  call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P, i32 8, i32 8)]
+  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 8, i32 8)]
 ; CHECK: this attribute should have 2 arguments
-  call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P)]
+  call void @llvm.assume(i1 true) ["align"(i32* %P)]
 ; CHECK: this attribute has no argument
-  call void @llvm.assume(i1 true) ["dereferenceable"(i32* %P, i32 4), "cold"(i32* %P)]
+  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 4), "cold"(i32* %P)]
 ; CHECK: this attribute should have one argument
   call void @llvm.assume(i1 true) ["noalias"()]
-  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1, i32 4)]
-; CHECK: alignment assumptions should have 2 or 3 arguments
-  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1, i32 4, i32 4)]
-; CHECK: second argument should be an integer
-  call void @llvm.assume(i1 true) ["align"(i32* %P, i32* %P2)]
-; CHECK: third argument should be an integer if present
-  call void @llvm.assume(i1 true) ["align"(i32* %P, i32 %P1, i32* %P2)]
   ret void
 }
diff --git a/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp b/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp
index 946368e1cb947..d35a77fa379be 100644
--- a/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp
+++ b/llvm/unittests/Analysis/AssumeBundleQueriesTest.cpp
@@ -546,41 +546,3 @@ TEST(AssumeQueryAPI, AssumptionCache) {
   ASSERT_EQ(AR[0].Index, 1u);
   ASSERT_EQ(AR[0].Assume, &*First);
 }
-
-TEST(AssumeQueryAPI, Alignment) {
-  LLVMContext C;
-  SMDiagnostic Err;
-  std::unique_ptr<Module> Mod = parseAssemblyString(
-      "declare void @llvm.assume(i1)\n"
-      "define void @test(i32* %P, i32* %P1, i32* %P2, i32 %I3, i1 %B) {\n"
-      "call void @llvm.assume(i1 true) [\"align\"(i32* %P, i32 8, i32 %I3)]\n"
-      "call void @llvm.assume(i1 true) [\"align\"(i32* %P1, i32 %I3, i32 "
-      "%I3)]\n"
-      "call void @llvm.assume(i1 true) [\"align\"(i32* %P2, i32 16, i32 8)]\n"
-      "ret void\n}\n",
-      Err, C);
-  if (!Mod)
-    Err.print("AssumeQueryAPI", errs());
-
-  Function *F = Mod->getFunction("test");
-  BasicBlock::iterator Start = F->begin()->begin();
-  IntrinsicInst *II;
-  RetainedKnowledge RK;
-  II = cast<IntrinsicInst>(&*Start);
-  RK = getKnowledgeFromBundle(*II, II->bundle_op_info_begin()[0]);
-  ASSERT_EQ(RK.AttrKind, Attribute::Alignment);
-  ASSERT_EQ(RK.WasOn, F->getArg(0));
-  ASSERT_EQ(RK.ArgValue, 1u);
-  Start++;
-  II = cast<IntrinsicInst>(&*Start);
-  RK = getKnowledgeFromBundle(*II, II->bundle_op_info_begin()[0]);
-  ASSERT_EQ(RK.AttrKind, Attribute::Alignment);
-  ASSERT_EQ(RK.WasOn, F->getArg(1));
-  ASSERT_EQ(RK.ArgValue, 1u);
-  Start++;
-  II = cast<IntrinsicInst>(&*Start);
-  RK = getKnowledgeFromBundle(*II, II->bundle_op_info_begin()[0]);
-  ASSERT_EQ(RK.AttrKind, Attribute::Alignment);
-  ASSERT_EQ(RK.WasOn, F->getArg(2));
-  ASSERT_EQ(RK.ArgValue, 8u);
-}

From 29f8c9f6c25d50fd21e255060ea36eb0025ca2eb Mon Sep 17 00:00:00 2001
From: Wouter van Oortmerssen <aardappel@gmail.com>
Date: Mon, 6 Jul 2020 13:34:16 -0700
Subject: [PATCH 539/771] [WebAssembly] Triple::wasm64 related cleanup

Differential Revision: https://reviews.llvm.org/D83713
---
 clang/lib/Driver/ToolChain.cpp     |  9 +++------
 lld/wasm/Config.h                  |  2 +-
 lld/wasm/Driver.cpp                | 19 ++++++++++---------
 lld/wasm/InputChunks.cpp           | 10 ++++++----
 lld/wasm/InputFiles.cpp            | 10 ++++++++--
 lld/wasm/SyntheticSections.cpp     |  4 ++--
 lld/wasm/Writer.cpp                |  3 ++-
 llvm/include/llvm/Object/Wasm.h    |  1 +
 llvm/lib/Object/WasmObjectFile.cpp | 15 ++++++++++++---
 9 files changed, 45 insertions(+), 28 deletions(-)

diff --git a/clang/lib/Driver/ToolChain.cpp b/clang/lib/Driver/ToolChain.cpp
index b8c12fc9241a6..b7256eb08ac66 100644
--- a/clang/lib/Driver/ToolChain.cpp
+++ b/clang/lib/Driver/ToolChain.cpp
@@ -631,9 +631,7 @@ bool ToolChain::isThreadModelSupported(const StringRef Model) const {
     return Triple.getArch() == llvm::Triple::arm ||
            Triple.getArch() == llvm::Triple::armeb ||
            Triple.getArch() == llvm::Triple::thumb ||
-           Triple.getArch() == llvm::Triple::thumbeb ||
-           Triple.getArch() == llvm::Triple::wasm32 ||
-           Triple.getArch() == llvm::Triple::wasm64;
+           Triple.getArch() == llvm::Triple::thumbeb || Triple.isWasm();
   } else if (Model == "posix")
     return true;
 
@@ -999,9 +997,8 @@ SanitizerMask ToolChain::getSupportedSanitizers() const {
                       SanitizerKind::Nullability | SanitizerKind::LocalBounds;
   if (getTriple().getArch() == llvm::Triple::x86 ||
       getTriple().getArch() == llvm::Triple::x86_64 ||
-      getTriple().getArch() == llvm::Triple::arm ||
-      getTriple().getArch() == llvm::Triple::wasm32 ||
-      getTriple().getArch() == llvm::Triple::wasm64 || getTriple().isAArch64())
+      getTriple().getArch() == llvm::Triple::arm || getTriple().isWasm() ||
+      getTriple().isAArch64())
     Res |= SanitizerKind::CFIICall;
   if (getTriple().getArch() == llvm::Triple::x86_64 || getTriple().isAArch64())
     Res |= SanitizerKind::ShadowCallStack;
diff --git a/lld/wasm/Config.h b/lld/wasm/Config.h
index cae2852baf86b..e8d018f09bf6e 100644
--- a/lld/wasm/Config.h
+++ b/lld/wasm/Config.h
@@ -37,7 +37,7 @@ struct Configuration {
   bool importMemory;
   bool sharedMemory;
   bool importTable;
-  bool is64;
+  llvm::Optional<bool> is64;
   bool mergeDataSegments;
   bool pie;
   bool printGcSections;
diff --git a/lld/wasm/Driver.cpp b/lld/wasm/Driver.cpp
index d0805bf3b3036..7307aaa3f7be1 100644
--- a/lld/wasm/Driver.cpp
+++ b/lld/wasm/Driver.cpp
@@ -380,7 +380,6 @@ static void readConfigs(opt::InputArgList &args) {
       args.hasFlag(OPT_export_dynamic, OPT_no_export_dynamic, config->shared);
 
   // Parse wasm32/64.
-  config->is64 = false;
   if (auto *arg = args.getLastArg(OPT_m)) {
     StringRef s = arg->getValue();
     if (s == "wasm32")
@@ -528,7 +527,7 @@ createUndefinedGlobal(StringRef name, llvm::wasm::WasmGlobalType *type) {
 static GlobalSymbol *createGlobalVariable(StringRef name, bool isMutable,
                                           int value) {
   llvm::wasm::WasmGlobal wasmGlobal;
-  if (config->is64) {
+  if (config->is64.getValueOr(false)) {
     wasmGlobal.Type = {WASM_TYPE_I64, isMutable};
     wasmGlobal.InitExpr.Value.Int64 = value;
     wasmGlobal.InitExpr.Opcode = WASM_OPCODE_I64_CONST;
@@ -570,16 +569,18 @@ static void createSyntheticSymbols() {
 
 
   if (config->isPic) {
-    WasmSym::stackPointer = createUndefinedGlobal(
-        "__stack_pointer",
-        config->is64 ? &mutableGlobalTypeI64 : &mutableGlobalTypeI32);
+    WasmSym::stackPointer =
+        createUndefinedGlobal("__stack_pointer", config->is64.getValueOr(false)
+                                                     ? &mutableGlobalTypeI64
+                                                     : &mutableGlobalTypeI32);
     // For PIC code, we import two global variables (__memory_base and
     // __table_base) from the environment and use these as the offset at
     // which to load our static data and function table.
     // See:
     // https://github.com/WebAssembly/tool-conventions/blob/master/DynamicLinking.md
     WasmSym::memoryBase = createUndefinedGlobal(
-        "__memory_base", config->is64 ? &globalTypeI64 : &globalTypeI32);
+        "__memory_base",
+        config->is64.getValueOr(false) ? &globalTypeI64 : &globalTypeI32);
     WasmSym::tableBase = createUndefinedGlobal("__table_base", &globalTypeI32);
     WasmSym::memoryBase->markLive();
     WasmSym::tableBase->markLive();
@@ -604,9 +605,9 @@ static void createSyntheticSymbols() {
     WasmSym::tlsAlign = createGlobalVariable("__tls_align", false, 1);
     WasmSym::initTLS = symtab->addSyntheticFunction(
         "__wasm_init_tls", WASM_SYMBOL_VISIBILITY_HIDDEN,
-        make<SyntheticFunction>(config->is64 ? i64ArgSignature
-                                             : i32ArgSignature,
-                                "__wasm_init_tls"));
+        make<SyntheticFunction>(
+            config->is64.getValueOr(false) ? i64ArgSignature : i32ArgSignature,
+            "__wasm_init_tls"));
   }
 }
 
diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
index 7f06e61a4b5ae..e28dc51134100 100644
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -335,10 +335,12 @@ void InputSegment::generateRelocationCode(raw_ostream &os) const {
   LLVM_DEBUG(dbgs() << "generating runtime relocations: " << getName()
                     << " count=" << relocations.size() << "\n");
 
-  unsigned opcode_ptr_const =
-      config->is64 ? WASM_OPCODE_I64_CONST : WASM_OPCODE_I32_CONST;
-  unsigned opcode_ptr_add =
-      config->is64 ? WASM_OPCODE_I64_ADD : WASM_OPCODE_I32_ADD;
+  unsigned opcode_ptr_const = config->is64.getValueOr(false)
+                                  ? WASM_OPCODE_I64_CONST
+                                  : WASM_OPCODE_I32_CONST;
+  unsigned opcode_ptr_add = config->is64.getValueOr(false)
+                                ? WASM_OPCODE_I64_ADD
+                                : WASM_OPCODE_I32_ADD;
 
   // TODO(sbc): Encode the relocations in the data section and write a loop
   // here to apply them.
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index 93d390a5457a9..8c2b70fe2849f 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -576,10 +576,16 @@ void BitcodeFile::parse() {
   obj = check(lto::InputFile::create(MemoryBufferRef(
       mb.getBuffer(), saver.save(archiveName + mb.getBufferIdentifier()))));
   Triple t(obj->getTargetTriple());
-  if (t.getArch() != Triple::wasm32) {
-    error(toString(this) + ": machine type must be wasm32");
+  if (!t.isWasm()) {
+    error(toString(this) + ": machine type must be wasm32 or wasm64");
     return;
   }
+  bool is64 = t.getArch() == Triple::wasm64;
+  if (config->is64.hasValue() && *config->is64 != is64) {
+    error(toString(this) + ": machine type for all bitcode files must match");
+    return;
+  }
+  config->is64 = is64;
   std::vector<bool> keptComdats;
   for (StringRef s : obj->getComdatTable())
     keptComdats.push_back(symtab->addComdat(s));
diff --git a/lld/wasm/SyntheticSections.cpp b/lld/wasm/SyntheticSections.cpp
index 70d6a10200c6e..753482fda4109 100644
--- a/lld/wasm/SyntheticSections.cpp
+++ b/lld/wasm/SyntheticSections.cpp
@@ -139,7 +139,7 @@ void ImportSection::writeBody() {
     }
     if (config->sharedMemory)
       import.Memory.Flags |= WASM_LIMITS_FLAG_IS_SHARED;
-    if (config->is64)
+    if (config->is64.getValueOr(false))
       import.Memory.Flags |= WASM_LIMITS_FLAG_IS_64;
     writeImport(os, import);
   }
@@ -236,7 +236,7 @@ void MemorySection::writeBody() {
     flags |= WASM_LIMITS_FLAG_HAS_MAX;
   if (config->sharedMemory)
     flags |= WASM_LIMITS_FLAG_IS_SHARED;
-  if (config->is64)
+  if (config->is64.getValueOr(false))
     flags |= WASM_LIMITS_FLAG_IS_64;
   writeUleb128(os, flags, "memory limits flags");
   writeUleb128(os, numMemoryPages, "initial pages");
diff --git a/lld/wasm/Writer.cpp b/lld/wasm/Writer.cpp
index 1401dc50931b3..36b56a408f1dd 100644
--- a/lld/wasm/Writer.cpp
+++ b/lld/wasm/Writer.cpp
@@ -304,7 +304,8 @@ void Writer::layoutMemory() {
   if (WasmSym::heapBase)
     WasmSym::heapBase->setVirtualAddress(memoryPtr);
 
-  uint64_t maxMemorySetting = 1ULL << (config->is64 ? 48 : 32);
+  uint64_t maxMemorySetting = 1ULL
+                              << (config->is64.getValueOr(false) ? 48 : 32);
 
   if (config->initialMemory != 0) {
     if (config->initialMemory != alignTo(config->initialMemory, WasmPageSize))
diff --git a/llvm/include/llvm/Object/Wasm.h b/llvm/include/llvm/Object/Wasm.h
index dc90c891ab95f..05a04af347fc3 100644
--- a/llvm/include/llvm/Object/Wasm.h
+++ b/llvm/include/llvm/Object/Wasm.h
@@ -282,6 +282,7 @@ class WasmObjectFile : public ObjectFile {
   bool HasLinkingSection = false;
   bool HasDylinkSection = false;
   bool SeenCodeSection = false;
+  bool HasMemory64 = false;
   wasm::WasmLinkingData LinkingData;
   uint32_t NumImportedGlobals = 0;
   uint32_t NumImportedFunctions = 0;
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index bb2e81d64047f..47c68ab52883e 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -957,6 +957,8 @@ Error WasmObjectFile::parseImportSection(ReadContext &Ctx) {
       break;
     case wasm::WASM_EXTERNAL_MEMORY:
       Im.Memory = readLimits(Ctx);
+      if (Im.Memory.Flags & wasm::WASM_LIMITS_FLAG_IS_64)
+        HasMemory64 = true;
       break;
     case wasm::WASM_EXTERNAL_TABLE:
       Im.Table = readTable(Ctx);
@@ -1019,7 +1021,10 @@ Error WasmObjectFile::parseMemorySection(ReadContext &Ctx) {
   uint32_t Count = readVaruint32(Ctx);
   Memories.reserve(Count);
   while (Count--) {
-    Memories.push_back(readLimits(Ctx));
+    auto Limits = readLimits(Ctx);
+    if (Limits.Flags & wasm::WASM_LIMITS_FLAG_IS_64)
+      HasMemory64 = true;
+    Memories.push_back(Limits);
   }
   if (Ctx.Ptr != Ctx.End)
     return make_error<GenericBinaryError>("Memory section ended prematurely",
@@ -1576,11 +1581,15 @@ section_iterator WasmObjectFile::section_end() const {
   return section_iterator(SectionRef(Ref, this));
 }
 
-uint8_t WasmObjectFile::getBytesInAddress() const { return 4; }
+uint8_t WasmObjectFile::getBytesInAddress() const {
+  return HasMemory64 ? 8 : 4;
+}
 
 StringRef WasmObjectFile::getFileFormatName() const { return "WASM"; }
 
-Triple::ArchType WasmObjectFile::getArch() const { return Triple::wasm32; }
+Triple::ArchType WasmObjectFile::getArch() const {
+  return HasMemory64 ? Triple::wasm64 : Triple::wasm32;
+}
 
 SubtargetFeatures WasmObjectFile::getFeatures() const {
   return SubtargetFeatures();

From 5e8b4be9f8546884889b1f178a6a2c2a5c1c9d1f Mon Sep 17 00:00:00 2001
From: Jan Korous <jkorous@apple.com>
Date: Thu, 16 Jul 2020 12:06:02 -0700
Subject: [PATCH 540/771] [AST][NFC] Simplify a regression test

Differential Revision: https://reviews.llvm.org/D83438
---
 clang/test/AST/regression-new-expr-crash.cpp |  7 +-
 clang/unittests/AST/CMakeLists.txt           |  1 -
 clang/unittests/AST/HasSideEffectsTest.cpp   | 86 --------------------
 3 files changed, 6 insertions(+), 88 deletions(-)
 delete mode 100644 clang/unittests/AST/HasSideEffectsTest.cpp

diff --git a/clang/test/AST/regression-new-expr-crash.cpp b/clang/test/AST/regression-new-expr-crash.cpp
index 81dd193b93e88..e1d93f92a125b 100644
--- a/clang/test/AST/regression-new-expr-crash.cpp
+++ b/clang/test/AST/regression-new-expr-crash.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only %s
+// RUN: %clang_cc1 -triple x86_64-unknown-unknown -fsyntax-only -verify %s
 
 struct Bar {int a;};
 const Bar arr[2] = {{1}};
@@ -11,3 +11,8 @@ void foo(int a) {
   Foo *foo_array;
   foo_array = new Foo[arr[0].a];
 }
+
+void Test(int N) {
+  int arr[N];
+  decltype([&arr]{}) *p; // expected-error {{lambda expression in an unevaluated operand}}
+}
diff --git a/clang/unittests/AST/CMakeLists.txt b/clang/unittests/AST/CMakeLists.txt
index 185995d5b5a27..2e750ac9ea925 100644
--- a/clang/unittests/AST/CMakeLists.txt
+++ b/clang/unittests/AST/CMakeLists.txt
@@ -26,7 +26,6 @@ add_clang_unittest(ASTTests
   DeclTest.cpp
   EvaluateAsRValueTest.cpp
   ExternalASTSourceTest.cpp
-  HasSideEffectsTest.cpp
   NamedDeclPrinterTest.cpp
   RecursiveASTVisitorTest.cpp
   SizelessTypesTest.cpp
diff --git a/clang/unittests/AST/HasSideEffectsTest.cpp b/clang/unittests/AST/HasSideEffectsTest.cpp
deleted file mode 100644
index 842afd8d7a9c3..0000000000000
--- a/clang/unittests/AST/HasSideEffectsTest.cpp
+++ /dev/null
@@ -1,86 +0,0 @@
-//===- unittest/AST/HasSideEffectsTest.cpp --------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "clang/AST/RecursiveASTVisitor.h"
-#include "clang/AST/ASTConsumer.h"
-#include "clang/AST/ASTContext.h"
-#include "clang/AST/Attr.h"
-#include "clang/Frontend/FrontendAction.h"
-#include "clang/Tooling/Tooling.h"
-#include "llvm/ADT/FunctionExtras.h"
-#include "llvm/ADT/STLExtras.h"
-#include "gmock/gmock.h"
-#include "gtest/gtest.h"
-#include <cassert>
-
-using namespace clang;
-
-namespace {
-class ProcessASTAction : public clang::ASTFrontendAction {
-public:
-  ProcessASTAction(llvm::unique_function<void(clang::ASTContext &)> Process)
-      : Process(std::move(Process)) {
-    assert(this->Process);
-  }
-
-  std::unique_ptr<ASTConsumer> CreateASTConsumer(CompilerInstance &CI,
-                                                 StringRef InFile) {
-    class Consumer : public ASTConsumer {
-    public:
-      Consumer(llvm::function_ref<void(ASTContext &CTx)> Process)
-          : Process(Process) {}
-
-      void HandleTranslationUnit(ASTContext &Ctx) override { Process(Ctx); }
-
-    private:
-      llvm::function_ref<void(ASTContext &CTx)> Process;
-    };
-
-    return std::make_unique<Consumer>(Process);
-  }
-
-private:
-  llvm::unique_function<void(clang::ASTContext &)> Process;
-};
-
-class RunHasSideEffects
-    : public RecursiveASTVisitor<RunHasSideEffects> {
-public:
-  RunHasSideEffects(ASTContext& Ctx)
-  : Ctx(Ctx) {}
-
-  bool VisitLambdaExpr(LambdaExpr *LE) {
-    LE->HasSideEffects(Ctx);
-    return true;
-  }
-
-  ASTContext& Ctx;
-};
-} // namespace
-
-TEST(HasSideEffectsTest, All) {
-  llvm::StringRef Code = R"cpp(
-void Test() {
-  int msize = 4;
-  float arr[msize];
-  [&arr] {};
-}
-  )cpp";
-
-  ASSERT_NO_FATAL_FAILURE(
-    clang::tooling::runToolOnCode(
-      std::make_unique<ProcessASTAction>(
-          [&](clang::ASTContext &Ctx) {
-              RunHasSideEffects Visitor(Ctx);
-              Visitor.TraverseAST(Ctx);
-          }
-      ),
-      Code)
-  );
-
-}

From 4e2f72ce19182da3fd247024d57b1cd1aae69319 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 16 Jul 2020 19:08:09 +0000
Subject: [PATCH 541/771] [gn build] Port 5e8b4be9f85

---
 llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn
index e8050f469f046..f25ead00165c0 100644
--- a/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/AST/BUILD.gn
@@ -31,7 +31,6 @@ unittest("ASTTests") {
     "DeclTest.cpp",
     "EvaluateAsRValueTest.cpp",
     "ExternalASTSourceTest.cpp",
-    "HasSideEffectsTest.cpp",
     "NamedDeclPrinterTest.cpp",
     "RecursiveASTVisitorTest.cpp",
     "SizelessTypesTest.cpp",

From 9785f7b1966d6687285f1b970bd518dc6cc63b74 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 16 Jul 2020 15:01:09 -0400
Subject: [PATCH 542/771] [libc++] Improve how we report the testing
 configuration

---
 libcxx/test/libcxx/selftest/dsl/dsl.sh.py | 10 +++++-
 libcxx/utils/libcxx/test/config.py        | 44 ++++++-----------------
 libcxx/utils/libcxx/test/dsl.py           | 19 +++++++---
 libcxx/utils/libcxx/test/features.py      |  8 ++---
 libcxx/utils/libcxx/test/newconfig.py     | 36 +++++++++++++++++++
 libcxx/utils/libcxx/test/params.py        |  2 +-
 6 files changed, 74 insertions(+), 45 deletions(-)
 create mode 100644 libcxx/utils/libcxx/test/newconfig.py

diff --git a/libcxx/test/libcxx/selftest/dsl/dsl.sh.py b/libcxx/test/libcxx/selftest/dsl/dsl.sh.py
index ff4ac2147bf68..7086c69cdd116 100644
--- a/libcxx/test/libcxx/selftest/dsl/dsl.sh.py
+++ b/libcxx/test/libcxx/selftest/dsl/dsl.sh.py
@@ -244,21 +244,29 @@ def test_trivial(self):
         self.assertIn('name', self.config.available_features)
 
     def test_name_can_be_a_callable(self):
-        feature = dsl.Feature(name=lambda cfg: (self.assertIs(self.config, cfg), 'name')[1])
+        feature = dsl.Feature(name=lambda cfg: 'name')
         assert feature.isSupported(self.config)
+        self.assertEqual('name', feature.getName(self.config))
         feature.enableIn(self.config)
         self.assertIn('name', self.config.available_features)
 
     def test_name_is_not_a_string_1(self):
         feature = dsl.Feature(name=None)
         assert feature.isSupported(self.config)
+        self.assertRaises(ValueError, lambda: feature.getName(self.config))
         self.assertRaises(ValueError, lambda: feature.enableIn(self.config))
 
     def test_name_is_not_a_string_2(self):
         feature = dsl.Feature(name=lambda cfg: None)
         assert feature.isSupported(self.config)
+        self.assertRaises(ValueError, lambda: feature.getName(self.config))
         self.assertRaises(ValueError, lambda: feature.enableIn(self.config))
 
+    def test_getName_when_unsupported(self):
+        feature = dsl.Feature(name='name', when=lambda _: False)
+        assert not feature.isSupported(self.config)
+        self.assertRaises(AssertionError, lambda: feature.getName(self.config))
+
     def test_adding_compile_flag(self):
         feature = dsl.Feature(name='name', compileFlag='-foo')
         origLinkFlags = copy.deepcopy(self.getSubstitution('%{link_flags}'))
diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py
index 50973e3fe47a0..985dc808cb559 100644
--- a/libcxx/utils/libcxx/test/config.py
+++ b/libcxx/utils/libcxx/test/config.py
@@ -20,6 +20,7 @@
 from libcxx.test.target_info import make_target_info
 import libcxx.util
 import libcxx.test.features
+import libcxx.test.newconfig
 import libcxx.test.params
 
 def loadSiteConfig(lit_config, config, param_name, env_name):
@@ -118,8 +119,8 @@ def make_static_lib_name(self, name):
             return 'lib' + name + '.a'
 
     def configure(self):
-        self.configure_target_info()
-        self.configure_executor()
+        self.target_info = make_target_info(self)
+        self.executor = self.get_lit_conf('executor')
         self.configure_cxx()
         self.configure_triple()
         self.configure_deployment()
@@ -139,35 +140,20 @@ def configure(self):
         self.configure_modules()
         self.configure_substitutions()
         self.configure_features()
-        self.configure_new_params()
-        self.configure_new_features()
 
-    def configure_new_features(self):
-        supportedFeatures = [f for f in libcxx.test.features.features if f.isSupported(self.config)]
-        for feature in supportedFeatures:
-            feature.enableIn(self.config)
-
-    def configure_new_params(self):
-        for param in libcxx.test.params.parameters:
-            feature = param.getFeature(self.config, self.lit_config.params)
-            if feature:
-                feature.enableIn(self.config)
+        libcxx.test.newconfig.configure(
+            libcxx.test.params.DEFAULT_PARAMETERS,
+            libcxx.test.features.DEFAULT_FEATURES,
+            self.config,
+            self.lit_config
+        )
 
     def print_config_info(self):
-        # Print the final compile and link flags.
-        self.lit_config.note('Using compiler: %s' % self.cxx.path)
-        self.lit_config.note('Using flags: %s' % self.cxx.flags)
         if self.cxx.use_modules:
             self.lit_config.note('Using modules flags: %s' %
                                  self.cxx.modules_flags)
-        self.lit_config.note('Using compile flags: %s'
-                             % self.cxx.compile_flags)
         if len(self.cxx.warning_flags):
             self.lit_config.note('Using warnings: %s' % self.cxx.warning_flags)
-        self.lit_config.note('Using link flags: %s' % self.cxx.link_flags)
-        # Print as list to prevent "set([...])" from being printed.
-        self.lit_config.note('Using available_features: %s' %
-                             list(sorted(self.config.available_features)))
         show_env_vars = {}
         for k,v in self.exec_env.items():
             if k not in os.environ or os.environ[k] != v:
@@ -185,13 +171,6 @@ def get_test_format(self):
             self.executor,
             exec_env=self.exec_env)
 
-    def configure_executor(self):
-        self.executor = self.get_lit_conf('executor')
-        self.lit_config.note("Using executor: {}".format(self.executor))
-
-    def configure_target_info(self):
-        self.target_info = make_target_info(self)
-
     def configure_cxx(self):
         # Gather various compiler parameters.
         cxx = self.get_lit_conf('cxx_under_test')
@@ -730,11 +709,8 @@ def configure_deployment(self):
         arch = self.get_lit_conf('arch')
         if not arch:
             arch = self.cxx.getTriple().split('-', 1)[0]
-            self.lit_config.note("inferred arch as: %r" % arch)
 
-        inferred_platform, name, version = self.target_info.get_platform()
-        if inferred_platform:
-            self.lit_config.note("inferred platform as: %r" % (name + version))
+        _, name, version = self.target_info.get_platform()
         self.config.deployment = (arch, name, version)
 
         # Set the target triple for use by lit.
diff --git a/libcxx/utils/libcxx/test/dsl.py b/libcxx/utils/libcxx/test/dsl.py
index e0e09e6bcac47..2c54921844b2a 100644
--- a/libcxx/utils/libcxx/test/dsl.py
+++ b/libcxx/utils/libcxx/test/dsl.py
@@ -227,6 +227,19 @@ def isSupported(self, config):
     """
     return self._isSupported(config)
 
+  def getName(self, config):
+    """
+    Return the name of the feature.
+
+    It is an error to call `f.getName(cfg)` if the feature `f` is not supported.
+    """
+    assert self.isSupported(config), \
+      "Trying to get the name of a feature that is not supported in the given configuration"
+    name = self._name(config) if callable(self._name) else self._name
+    if not isinstance(name, str):
+      raise ValueError("Feature did not resolve to a name that's a string, got {}".format(name))
+    return name
+
   def enableIn(self, config):
     """
     Enable a feature in a TestingConfig.
@@ -249,11 +262,7 @@ def enableIn(self, config):
     if self._linkFlag:
       linkFlag = self._linkFlag(config) if callable(self._linkFlag) else self._linkFlag
       config.substitutions = addTo(config.substitutions, '%{link_flags}', linkFlag)
-
-    name = self._name(config) if callable(self._name) else self._name
-    if not isinstance(name, str):
-      raise ValueError("Feature did not resolve to a name that's a string, got {}".format(name))
-    config.available_features.add(name)
+    config.available_features.add(self.getName(config))
 
 
 def _str_to_bool(s):
diff --git a/libcxx/utils/libcxx/test/features.py b/libcxx/utils/libcxx/test/features.py
index f3d8e782be8e1..6a16ca851d3f9 100644
--- a/libcxx/utils/libcxx/test/features.py
+++ b/libcxx/utils/libcxx/test/features.py
@@ -13,7 +13,7 @@
 _isAppleClang = lambda cfg: '__apple_build_version__' in compilerMacros(cfg)
 _isGCC        = lambda cfg: '__GNUC__' in compilerMacros(cfg) and '__clang__' not in compilerMacros(cfg)
 
-features = [
+DEFAULT_FEATURES = [
   Feature(name='fcoroutines-ts', compileFlag='-fcoroutines-ts',
           when=lambda cfg: hasCompileFlag(cfg, '-fcoroutines-ts') and
                            featureTestMacros(cfg, flags='-fcoroutines-ts').get('__cpp_coroutines', 0) >= 201703),
@@ -76,7 +76,7 @@
   '_LIBCPP_ABI_UNSTABLE': 'libcpp-abi-unstable'
 }
 for macro, feature in macros.items():
-  features += [
+  DEFAULT_FEATURES += [
     Feature(name=lambda cfg, m=macro, f=feature: f + (
               '={}'.format(compilerMacros(cfg)[m]) if compilerMacros(cfg)[m] else ''
             ),
@@ -104,14 +104,14 @@
   'cs_CZ.ISO8859-2': ['cs_CZ.ISO8859-2', 'Czech_Czech Republic.1250']
 }
 for locale, alts in locales.items():
-  features += [
+  DEFAULT_FEATURES += [
     Feature(name='locale.{}'.format(locale),
             when=lambda cfg: any(hasLocale(cfg, alt) for alt in alts))
   ]
 
 
 # Add features representing the platform name: darwin, linux, windows, etc...
-features += [
+DEFAULT_FEATURES += [
   Feature(name='darwin', when=lambda cfg: '__APPLE__' in compilerMacros(cfg)),
   Feature(name='windows', when=lambda cfg: '_WIN32' in compilerMacros(cfg)),
   Feature(name='linux', when=lambda cfg: '__linux__' in compilerMacros(cfg)),
diff --git a/libcxx/utils/libcxx/test/newconfig.py b/libcxx/utils/libcxx/test/newconfig.py
new file mode 100644
index 0000000000000..8996484ba20ba
--- /dev/null
+++ b/libcxx/utils/libcxx/test/newconfig.py
@@ -0,0 +1,36 @@
+#===----------------------------------------------------------------------===##
+#
+# Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+#
+#===----------------------------------------------------------------------===##
+
+def _getSubstitution(substitution, config):
+  for (orig, replacement) in config.substitutions:
+    if orig == substitution:
+      return replacement
+  raise ValueError('Substitution {} is not in the config.'.format(substitution))
+
+def configure(parameters, features, config, lit_config):
+  # Apply parameters to the configuration first, since parameters are things
+  # that we request explicitly and which might influence what features are
+  # implicitly made available next.
+  for param in parameters:
+    feature = param.getFeature(config, lit_config.params)
+    if feature:
+      feature.enableIn(config)
+      lit_config.note("Enabling Lit feature '{}' as a result of parameter '{}'".format(feature.getName(config), param.name))
+
+  # Then, apply the automatically-detected features.
+  printFeatures = []
+  for feature in features:
+    if feature.isSupported(config):
+      feature.enableIn(config)
+      printFeatures.append(feature.getName(config))
+  printFeatures = ["'{}'".format(f) for f in sorted(printFeatures)]
+  lit_config.note("Enabling implicitly detected Lit features {}".format(', '.join(printFeatures)))
+
+  # Print the basic substitutions
+  for sub in ('%{cxx}', '%{flags}', '%{compile_flags}', '%{link_flags}', '%{exec}'):
+    lit_config.note("Using {} substitution: '{}'".format(sub, _getSubstitution(sub, config)))
diff --git a/libcxx/utils/libcxx/test/params.py b/libcxx/utils/libcxx/test/params.py
index f541ed6bf3cf2..a9431ec073f8d 100644
--- a/libcxx/utils/libcxx/test/params.py
+++ b/libcxx/utils/libcxx/test/params.py
@@ -10,7 +10,7 @@
 
 _allStandards = ['c++98', 'c++03', 'c++11', 'c++14', 'c++17', 'c++2a']
 
-parameters = [
+DEFAULT_PARAMETERS = [
   # Core parameters of the test suite
   Parameter(name='std', choices=_allStandards, type=str,
             help="The version of the standard to compile the test suite with.",

From 4c5d52397e8c8015046ff5541fd0abc738953870 Mon Sep 17 00:00:00 2001
From: Walter Erquinigo <wallace@fb.com>
Date: Thu, 16 Jul 2020 12:18:59 -0700
Subject: [PATCH 543/771] [intel-pt] Fix building due to CMake + python changes

Python is now handled in CMake with different variables, thus
the intel plugin needs a corresponding update.

Test Plan:

Differential Revision: https://phabricator.intern.facebook.com/D22555992
---
 lldb/tools/intel-features/CMakeLists.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lldb/tools/intel-features/CMakeLists.txt b/lldb/tools/intel-features/CMakeLists.txt
index efba2f74904f7..e5f3bbfaf11a7 100644
--- a/lldb/tools/intel-features/CMakeLists.txt
+++ b/lldb/tools/intel-features/CMakeLists.txt
@@ -56,7 +56,7 @@ add_lldb_library(lldbIntelFeatures SHARED
 
   LINK_LIBS
     ${FEATURE_LIBS}
-    ${PYTHON_LIBRARY}
+    ${PYTHON_LIBRARIES}
   )
 
 # Add link dependencies for python wrapper

From 9870f77441c58d4a2dfefe69c6c103b0a03d940a Mon Sep 17 00:00:00 2001
From: Mircea Trofin <mtrofin@google.com>
Date: Wed, 15 Jul 2020 16:02:15 -0700
Subject: [PATCH 544/771] [llvm] Moved InlineSizeEstimatorAnalysis test to .ll

Summary:
Following guidance in
https://llvm.org/docs/TestingGuide.html#testing-analysis

Reviewers: mehdi_amini

Subscribers: mgorny, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83918
---
 .../Analysis/InlineSizeEstimatorAnalysis.h    |  10 ++
 .../Analysis/InlineSizeEstimatorAnalysis.cpp  |  10 +-
 llvm/lib/Passes/PassRegistry.def              |   2 +
 llvm/test/CMakeLists.txt                      |   1 +
 .../Inline/ML/Inputs/size-estimator.ll        |  28 +++++
 .../Inline/ML/size-estimator-default.ll       |   4 +
 .../Inline/ML/size-estimator-training.ll      |   6 ++
 llvm/test/lit.cfg.py                          |   3 +
 llvm/test/lit.site.cfg.py.in                  |   1 +
 llvm/unittests/Analysis/CMakeLists.txt        |   1 -
 .../InlineSizeEstimatorAnalysisTest.cpp       | 101 ------------------
 11 files changed, 64 insertions(+), 103 deletions(-)
 create mode 100644 llvm/test/Transforms/Inline/ML/Inputs/size-estimator.ll
 create mode 100644 llvm/test/Transforms/Inline/ML/size-estimator-default.ll
 create mode 100644 llvm/test/Transforms/Inline/ML/size-estimator-training.ll
 delete mode 100644 llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp

diff --git a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
index 29a6f59146748..5fc919a6dc56f 100644
--- a/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
+++ b/llvm/include/llvm/Analysis/InlineSizeEstimatorAnalysis.h
@@ -31,5 +31,15 @@ class InlineSizeEstimatorAnalysis
 private:
   std::unique_ptr<TFModelEvaluator> Evaluator;
 };
+
+class InlineSizeEstimatorAnalysisPrinterPass
+    : public PassInfoMixin<InlineSizeEstimatorAnalysisPrinterPass> {
+  raw_ostream &OS;
+
+public:
+  explicit InlineSizeEstimatorAnalysisPrinterPass(raw_ostream &OS) : OS(OS) {}
+
+  PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
 } // namespace llvm
 #endif // LLVM_ANALYSIS_INLINESIZEESTIMATORANALYSIS_H
\ No newline at end of file
diff --git a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
index ebc59879d3577..cffdbe4116086 100644
--- a/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
+++ b/llvm/lib/Analysis/InlineSizeEstimatorAnalysis.cpp
@@ -296,4 +296,12 @@ InlineSizeEstimatorAnalysis::run(const Function &F,
   return None;
 }
 bool InlineSizeEstimatorAnalysis::isEvaluatorRequested() { return false; }
-#endif
\ No newline at end of file
+#endif
+
+PreservedAnalyses
+InlineSizeEstimatorAnalysisPrinterPass::run(Function &F,
+                                            FunctionAnalysisManager &AM) {
+  OS << "[InlineSizeEstimatorAnalysis] size estimate for " << F.getName()
+     << ": " << AM.getResult<InlineSizeEstimatorAnalysis>(F) << "\n";
+  return PreservedAnalyses::all();
+}
diff --git a/llvm/lib/Passes/PassRegistry.def b/llvm/lib/Passes/PassRegistry.def
index 0bb7bea4f9a36..ad20d02436dae 100644
--- a/llvm/lib/Passes/PassRegistry.def
+++ b/llvm/lib/Passes/PassRegistry.def
@@ -235,6 +235,8 @@ FUNCTION_PASS("print<postdomtree>", PostDominatorTreePrinterPass(dbgs()))
 FUNCTION_PASS("print<demanded-bits>", DemandedBitsPrinterPass(dbgs()))
 FUNCTION_PASS("print<domfrontier>", DominanceFrontierPrinterPass(dbgs()))
 FUNCTION_PASS("print<inline-cost>", InlineCostAnnotationPrinterPass(dbgs()))
+FUNCTION_PASS("print<inliner-size-estimator>", 
+  InlineSizeEstimatorAnalysisPrinterPass(dbgs()))
 FUNCTION_PASS("print<loops>", LoopPrinterPass(dbgs()))
 FUNCTION_PASS("print<memoryssa>", MemorySSAPrinterPass(dbgs()))
 FUNCTION_PASS("print<phi-values>", PhiValuesPrinterPass(dbgs()))
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index 6f826d54a4b9c..b0a0a259d7413 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -16,6 +16,7 @@ llvm_canonicalize_cmake_booleans(
   LLVM_ENABLE_PLUGINS
   LLVM_BYE_LINK_INTO_TOOLS
   LLVM_HAVE_TF_AOT
+  LLVM_HAVE_TF_API
   )
 
 configure_lit_site_cfg(
diff --git a/llvm/test/Transforms/Inline/ML/Inputs/size-estimator.ll b/llvm/test/Transforms/Inline/ML/Inputs/size-estimator.ll
new file mode 100644
index 0000000000000..b13595f355fb4
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ML/Inputs/size-estimator.ll
@@ -0,0 +1,28 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+declare i32 @f1(i32)
+declare i32 @f2(i32)
+
+define i32 @branches(i32) {
+  %cond = icmp slt i32 %0, 3
+  br i1 %cond, label %then, label %else
+
+then:
+  %ret.1 = call i32 @f1(i32 %0)
+  br label %last.block
+
+else:
+  %ret.2 = call i32 @f2(i32 %0)
+  br label %last.block
+
+last.block:
+  %ret = phi i32 [%ret.1, %then], [%ret.2, %else]
+  ret i32 %ret
+}
+
+define internal i32 @top() {
+  %1 = call i32 @branches(i32 2)
+  %2 = call i32 @f1(i32 %1)
+  ret i32 %2
+}
\ No newline at end of file
diff --git a/llvm/test/Transforms/Inline/ML/size-estimator-default.ll b/llvm/test/Transforms/Inline/ML/size-estimator-default.ll
new file mode 100644
index 0000000000000..88881c636d8b9
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ML/size-estimator-default.ll
@@ -0,0 +1,4 @@
+; REQUIRES: !have_tf_api
+; RUN: opt -passes='print<inliner-size-estimator>' -S < %S/Inputs/size-estimator.ll 2>&1 | FileCheck %s
+
+; CHECK: [InlineSizeEstimatorAnalysis] size estimate for branches: None
\ No newline at end of file
diff --git a/llvm/test/Transforms/Inline/ML/size-estimator-training.ll b/llvm/test/Transforms/Inline/ML/size-estimator-training.ll
new file mode 100644
index 0000000000000..7197f7432517c
--- /dev/null
+++ b/llvm/test/Transforms/Inline/ML/size-estimator-training.ll
@@ -0,0 +1,6 @@
+; REQUIRES: have_tf_api
+; RUN: opt -passes='print<inliner-size-estimator>' -S < %S/Inputs/size-estimator.ll 2>&1 | FileCheck %s --check-prefix=DEFAULT
+; RUN: opt -passes='print<inliner-size-estimator>' -ml-inliner-ir2native-model=%S/../../../../unittests/Analysis/Inputs/ir2native_x86_64_model -S < %S/Inputs/size-estimator.ll 2>&1 | FileCheck %s
+
+; DEFAULT: [InlineSizeEstimatorAnalysis] size estimate for branches: None
+; CHECK: [InlineSizeEstimatorAnalysis] size estimate for branches: 28
\ No newline at end of file
diff --git a/llvm/test/lit.cfg.py b/llvm/test/lit.cfg.py
index 4eaa6cb77c824..0a3289fcc4ad4 100644
--- a/llvm/test/lit.cfg.py
+++ b/llvm/test/lit.cfg.py
@@ -222,6 +222,9 @@ def get_asan_rtlib():
 if config.have_tf_aot:
     config.available_features.add("have_tf_aot")
 
+if config.have_tf_api:
+    config.available_features.add("have_tf_api")
+
 def have_cxx_shared_library():
     readobj_exe = lit.util.which('llvm-readobj', config.llvm_tools_dir)
     if not readobj_exe:
diff --git a/llvm/test/lit.site.cfg.py.in b/llvm/test/lit.site.cfg.py.in
index 190cd4bccc2dc..c04ef04fea601 100644
--- a/llvm/test/lit.site.cfg.py.in
+++ b/llvm/test/lit.site.cfg.py.in
@@ -49,6 +49,7 @@ config.libcxx_used = @LLVM_LIBCXX_USED@
 config.has_plugins = @LLVM_ENABLE_PLUGINS@
 config.linked_bye_extension = @LLVM_BYE_LINK_INTO_TOOLS@
 config.have_tf_aot = @LLVM_HAVE_TF_AOT@
+config.have_tf_api = @LLVM_HAVE_TF_API@
 
 # Support substitution of the tools_dir with user parameters. This is
 # used when we can't determine the tool dir at configuration time.
diff --git a/llvm/unittests/Analysis/CMakeLists.txt b/llvm/unittests/Analysis/CMakeLists.txt
index 59ad444d32fb4..b1b396a897ca8 100644
--- a/llvm/unittests/Analysis/CMakeLists.txt
+++ b/llvm/unittests/Analysis/CMakeLists.txt
@@ -28,7 +28,6 @@ add_llvm_unittest_with_input_files(AnalysisTests
   DomTreeUpdaterTest.cpp
   GlobalsModRefTest.cpp
   InlineFeaturesAnalysisTest.cpp
-  InlineSizeEstimatorAnalysisTest.cpp
   IVDescriptorsTest.cpp
   LazyCallGraphTest.cpp
   LoadsTest.cpp
diff --git a/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp b/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
deleted file mode 100644
index 1d51ae292c889..0000000000000
--- a/llvm/unittests/Analysis/InlineSizeEstimatorAnalysisTest.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-//===- InlineSizeEstimatorAnalysisTest.cpp - test for ir2native -----------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/InlineSizeEstimatorAnalysis.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/TargetLibraryInfo.h"
-#include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/AsmParser/Parser.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/IR/LLVMContext.h"
-#include "llvm/IR/Module.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Path.h"
-#include "llvm/Support/SourceMgr.h"
-#include "llvm/Testing/Support/SupportHelpers.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-extern const char *TestMainArgv0;
-extern cl::opt<std::string> TFIR2NativeModelPath;
-
-#ifdef LLVM_HAVE_TF_API
-static std::string getModelPath() {
-  SmallString<128> InputsDir = unittest::getInputFileDirectory(TestMainArgv0);
-  llvm::sys::path::append(InputsDir, "ir2native_x86_64_model");
-  return std::string(InputsDir);
-}
-#endif
-
-static std::unique_ptr<Module> parseIR(LLVMContext &C, const char *IR) {
-  SMDiagnostic Err;
-  std::unique_ptr<Module> Mod = parseAssemblyString(IR, Err, C);
-  if (!Mod)
-    Err.print("MLAnalysisTests", errs());
-  return Mod;
-}
-
-static FunctionAnalysisManager buildFAM() {
-  FunctionAnalysisManager FAM;
-  FAM.registerPass([&] { return DominatorTreeAnalysis(); });
-  FAM.registerPass([&] { return PassInstrumentationAnalysis(); });
-  FAM.registerPass([&] { return TargetIRAnalysis(); });
-  FAM.registerPass([&] { return LoopAnalysis(); });
-  return FAM;
-}
-
-// Test model loading and evaluation.
-TEST(InlineSizeEstimatorAnalysis, SizeIsValidTest) {
-  LLVMContext C;
-  std::unique_ptr<Module> M = parseIR(C,
-                                      R"IR(
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-pc-linux-gnu"
-
-declare i32 @f1(i32)
-declare i32 @f2(i32)
-
-define i32 @branches(i32) {
-  %cond = icmp slt i32 %0, 3
-  br i1 %cond, label %then, label %else
-
-then:
-  %ret.1 = call i32 @f1(i32 %0)
-  br label %last.block
-
-else:
-  %ret.2 = call i32 @f2(i32 %0)
-  br label %last.block
-
-last.block:
-  %ret = phi i32 [%ret.1, %then], [%ret.2, %else]
-  ret i32 %ret
-}
-
-define internal i32 @top() {
-  %1 = call i32 @branches(i32 2)
-  %2 = call i32 @f1(i32 %1)
-  ret i32 %2
-}
-)IR");
-
-  FunctionAnalysisManager FAM = buildFAM();
-#ifdef LLVM_HAVE_TF_API
-  TFIR2NativeModelPath = getModelPath();
-#endif
-
-  InlineSizeEstimatorAnalysis FA;
-  auto SizeEstimate = FA.run(*M->getFunction("branches"), FAM);
-#ifdef LLVM_HAVE_TF_API
-  EXPECT_GT(*SizeEstimate, 0);
-#else
-  EXPECT_FALSE(SizeEstimate.hasValue());
-#endif
-}

From 10382285ac704737aa8e5bd459f53d3c8eb36032 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 16 Jul 2020 15:17:10 -0400
Subject: [PATCH 545/771] AMDGPU: Add missing tests for copyPhysReg AGPR tuples

---
 llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir | 259 ++++++++++++++++++++++
 1 file changed, 259 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
index ca7012607aa1c..e45ab97d46a89 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
@@ -3,18 +3,30 @@
 
 --- |
     define amdgpu_kernel void @a_to_v() #0 { ret void }
+    define amdgpu_kernel void @a2_to_v2() #0 { ret void }
+    define amdgpu_kernel void @a3_to_v3() #0 { ret void }
     define amdgpu_kernel void @a4_to_v4() #0 { ret void }
     define amdgpu_kernel void @a16_to_v16() #0 { ret void }
 
     define amdgpu_kernel void @v_to_a() #0 { ret void }
+    define amdgpu_kernel void @v2_to_a2() #0 { ret void }
+    define amdgpu_kernel void @v3_to_a3() #0 { ret void }
     define amdgpu_kernel void @v4_to_a4() #0 { ret void }
+    define amdgpu_kernel void @v8_to_a8() #0 { ret void }
     define amdgpu_kernel void @v16_to_a16() #0 { ret void }
 
     define amdgpu_kernel void @s_to_a() #0 { ret void }
     define amdgpu_kernel void @s2_to_a2() #0 { ret void }
+    define amdgpu_kernel void @s3_to_a3() #0 { ret void }
+    define amdgpu_kernel void @s4_to_a4() #0 { ret void }
+    define amdgpu_kernel void @s6_to_a6() #0 { ret void }
+    define amdgpu_kernel void @s8_to_a8() #0 { ret void }
 
     define amdgpu_kernel void @a_to_a() #0 { ret void }
     define amdgpu_kernel void @a2_to_a2() #0 { ret void }
+    define amdgpu_kernel void @a3_to_a3() #0 { ret void }
+    define amdgpu_kernel void @a4_to_a4() #0 { ret void }
+    define amdgpu_kernel void @a8_to_a8() #0 { ret void }
 
     define amdgpu_kernel void @a_to_a_spill() #0 { ret void }
     attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
@@ -34,6 +46,39 @@ body:             |
     S_ENDPGM 0, implicit $vgpr0
 ...
 
+---
+name:            a2_to_v2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $agpr0_agpr1
+
+    ; GCN-LABEL: name: a2_to_v2
+    ; GCN: liveins: $agpr0_agpr1
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $agpr0_agpr1
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit killed $agpr0_agpr1, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $vgpr0_vgpr1
+    $vgpr0_vgpr1 = COPY killed $agpr0_agpr1, implicit $exec
+    S_ENDPGM 0, implicit $vgpr0_vgpr1
+...
+
+---
+name:            a3_to_v3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $agpr0_agpr1_agpr2
+
+    ; GCN-LABEL: name: a3_to_v3
+    ; GCN: liveins: $agpr0_agpr1_agpr2
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $agpr0_agpr1_agpr2
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit killed $agpr0_agpr1_agpr2, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2
+    $vgpr0_vgpr1_vgpr2 = COPY killed $agpr0_agpr1_agpr2, implicit $exec
+    S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2
+...
+
 ---
 name:            a4_to_v4
 tracksRegLiveness: true
@@ -94,6 +139,37 @@ body:             |
     S_ENDPGM 0, implicit $agpr0
 ...
 
+---
+name:            v2_to_a2
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1
+    ; GCN-LABEL: name: v2_to_a2
+    ; GCN: liveins: $vgpr0_vgpr1
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $vgpr0_vgpr1
+    ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1
+    $agpr0_agpr1 = COPY killed $vgpr0_vgpr1, implicit $exec
+    S_ENDPGM 0, implicit $agpr0_agpr1
+...
+
+---
+name:            v3_to_a3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2
+    ; GCN-LABEL: name: v3_to_a3
+    ; GCN: liveins: $vgpr0_vgpr1_vgpr2
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $vgpr0_vgpr1_vgpr2
+    ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
+    $agpr0_agpr1_agpr2 = COPY killed $vgpr0_vgpr1_vgpr2, implicit $exec
+    S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
+...
+
 ---
 name:            v4_to_a4
 tracksRegLiveness: true
@@ -111,6 +187,27 @@ body:             |
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
 ...
 
+---
+name:            v8_to_a8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN-LABEL: name: v8_to_a8
+    ; GCN: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 $vgpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 $vgpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = COPY killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+...
+
 ---
 name:            v16_to_a16
 tracksRegLiveness: true
@@ -172,6 +269,100 @@ body:             |
     S_ENDPGM 0, implicit $agpr0_agpr1
 ...
 
+---
+name:            s3_to_a3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2
+    ; GCN-LABEL: name: s3_to_a3
+    ; GCN: liveins: $sgpr0_sgpr1_sgpr2
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec
+    ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
+    $agpr0_agpr1_agpr2 = COPY killed $sgpr0_sgpr1_sgpr2, implicit $exec
+    S_ENDPGM 0, implicit $agpr0_agpr1_agpr2
+...
+
+---
+name:            s4_to_a4
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GCN-LABEL: name: s4_to_a4
+    ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec
+    ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec
+    ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
+    $agpr0_agpr1_agpr2_agpr3 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3, implicit $exec
+    S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3
+...
+
+---
+name:            s6_to_a6
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GCN-LABEL: name: s6_to_a6
+    ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec
+    ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec
+    ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
+    ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec
+    ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
+    $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5, implicit $exec
+    S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
+...
+
+---
+name:            s8_to_a8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GCN-LABEL: name: s8_to_a8
+    ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec
+    ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec
+    ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
+    ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec
+    ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr6, implicit $exec
+    ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr7, implicit $exec
+    ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, implicit $exec
+    S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+...
+
 ---
 name:            a_to_a
 tracksRegLiveness: true
@@ -204,6 +395,74 @@ body:             |
     S_ENDPGM 0, implicit $agpr1_agpr2
 ...
 
+---
+name:            a3_to_a3
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: a3_to_a3
+    ; GCN: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec
+    ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
+    ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4
+    $agpr0_agpr1_agpr2 = IMPLICIT_DEF
+    $agpr2_agpr3_agpr4 = COPY killed $agpr0_agpr1_agpr2, implicit $exec
+    S_ENDPGM 0, implicit $agpr2_agpr3_agpr4
+...
+
+---
+name:            a4_to_a4
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: a4_to_a4
+    ; GCN: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec
+    ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec
+    ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
+    ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5
+    $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
+    $agpr2_agpr3_agpr4_agpr5 = COPY killed $agpr0_agpr1_agpr2_agpr3, implicit $exec
+    S_ENDPGM 0, implicit $agpr2_agpr3_agpr4_agpr5
+...
+
+---
+name:            a8_to_a8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    ; GCN-LABEL: name: a8_to_a8
+    ; GCN: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr7, implicit $exec
+    ; GCN: $agpr15 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr6, implicit $exec
+    ; GCN: $agpr14 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr5, implicit $exec
+    ; GCN: $agpr13 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec
+    ; GCN: $agpr12 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec
+    ; GCN: $agpr11 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec
+    ; GCN: $agpr10 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
+    ; GCN: $agpr9 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+    ; GCN: $agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
+    $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
+    S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+...
 
 # Using last vgpr255 will raise error about absence of emergency spill slot.
 

From 2659663ee31890bae63054c6fce5d444d1975ee2 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 16 Jul 2020 15:34:15 -0400
Subject: [PATCH 546/771] [libc++] Remove shortcut Lit features for Apple
 backdeployment

Some time ago, I introduced shortcut features like dylib-has-no-shared_mutex
to encode whether the deployment target supported shared_mutex (say). This
made the test suite annotations cleaner.

However, the problem with building Lit features on top of other Lit
features is that it's easier for them to become stale, especially when
they are generated programmatically. Furthermore, it makes the bar for
defining configurations from scratch higher, since more features have
to be defined. Instead, I think it's better to put the XFAILs in the
tests directly, which allows cleaning them up with a simple grep.
---
 .../thread.lock.shared.cons/default.pass.cpp  |  6 ++++-
 .../move_assign.pass.cpp                      |  6 ++++-
 .../move_ctor.pass.cpp                        |  6 ++++-
 .../thread.lock.shared.cons/mutex.pass.cpp    |  6 ++++-
 .../mutex_adopt_lock.pass.cpp                 |  6 ++++-
 .../mutex_defer_lock.pass.cpp                 |  6 ++++-
 .../mutex_duration.pass.cpp                   |  6 ++++-
 .../mutex_time_point.pass.cpp                 |  6 ++++-
 .../mutex_try_to_lock.pass.cpp                |  6 ++++-
 .../thread.lock.shared.locking/lock.pass.cpp  |  6 ++++-
 .../thread.lock.shared.obs/mutex.pass.cpp     |  6 ++++-
 .../thread.lock.shared.obs/op_bool.pass.cpp   |  6 ++++-
 .../thread.lock.shared.obs/owns_lock.pass.cpp |  6 ++++-
 .../any/any.class/any.assign/copy.pass.cpp    |  6 ++++-
 .../any/any.class/any.assign/move.pass.cpp    |  6 ++++-
 .../any/any.class/any.assign/value.pass.cpp   |  6 ++++-
 .../any/any.class/any.cons/copy.pass.cpp      |  6 ++++-
 .../any.class/any.cons/in_place_type.pass.cpp |  6 ++++-
 .../any/any.class/any.cons/move.pass.cpp      |  6 ++++-
 .../any/any.class/any.cons/value.pass.cpp     |  6 ++++-
 .../any.class/any.modifiers/emplace.pass.cpp  |  6 ++++-
 .../any.class/any.modifiers/reset.pass.cpp    |  6 ++++-
 .../any/any.class/any.modifiers/swap.pass.cpp |  6 ++++-
 .../any.cast/any_cast_pointer.pass.cpp        |  6 ++++-
 .../any.cast/any_cast_reference.pass.cpp      |  6 ++++-
 ...st_request_invalid_value_category.fail.cpp |  7 +++++-
 .../any.cast/const_correctness.fail.cpp       |  7 +++++-
 .../any.cast/not_copy_constructible.fail.cpp  |  7 +++++-
 .../any/any.nonmembers/make_any.pass.cpp      |  6 ++++-
 .../any/any.nonmembers/swap.pass.cpp          |  6 ++++-
 .../default.pass.cpp                          |  6 ++++-
 .../derive.pass.cpp                           |  7 +++++-
 .../optional.object.ctor/U.pass.cpp           |  6 ++++-
 .../optional.object.ctor/const_T.pass.cpp     |  6 ++++-
 .../optional.object.ctor/move.pass.cpp        |  6 ++++-
 .../optional.object.ctor/rvalue_T.pass.cpp    |  6 ++++-
 .../optional.object.observe/value.pass.cpp    |  6 ++++-
 .../value_const.pass.cpp                      |  6 ++++-
 .../value_const_rvalue.pass.cpp               |  6 ++++-
 .../value_rvalue.pass.cpp                     |  6 ++++-
 .../optional.specalg/make_optional.pass.cpp   |  7 +++++-
 .../bad_variant_access.pass.cpp               |  7 ++++--
 .../variant/variant.get/get_index.pass.cpp    |  6 ++++-
 .../variant/variant.get/get_type.pass.cpp     |  6 ++++-
 .../variant.variant/variant.assign/T.pass.cpp |  6 ++++-
 .../variant.assign/copy.pass.cpp              |  6 ++++-
 .../variant.assign/move.pass.cpp              |  7 ++++--
 .../variant.variant/variant.ctor/T.pass.cpp   |  7 +++++-
 .../variant.ctor/copy.pass.cpp                |  6 ++++-
 .../variant.ctor/default.pass.cpp             |  6 ++++-
 .../variant.ctor/in_place_index_args.pass.cpp |  6 ++++-
 .../in_place_index_init_list_args.pass.cpp    |  7 +++++-
 .../variant.ctor/in_place_type_args.pass.cpp  |  6 ++++-
 .../in_place_type_init_list_args.pass.cpp     |  7 +++++-
 .../variant.ctor/move.pass.cpp                |  6 ++++-
 .../variant.mod/emplace_index_args.pass.cpp   |  6 ++++-
 .../emplace_index_init_list_args.pass.cpp     |  6 ++++-
 .../variant.mod/emplace_type_args.pass.cpp    |  6 ++++-
 .../emplace_type_init_list_args.pass.cpp      |  6 ++++-
 .../variant.swap/swap.pass.cpp                |  6 ++++-
 .../variant/variant.visit/visit.pass.cpp      |  6 ++++-
 libcxx/utils/libcxx/test/config.py            | 22 ++-----------------
 62 files changed, 315 insertions(+), 83 deletions(-)

diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp
index 78fc96ba4a0de..4ab6ff69417ca 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/default.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <shared_mutex>
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp
index b3ac748edf6a1..22a265f3abab0 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_assign.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <shared_mutex>
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp
index 501d11dd937c6..6b04074820317 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/move_ctor.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <shared_mutex>
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp
index 3741611c36931..cfffafdce822f 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // ALLOW_RETRIES: 2
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp
index 831873320b2ed..aada7797c14d7 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_adopt_lock.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <shared_mutex>
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp
index eb19009e99b9c..1e3f4e012f26e 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_defer_lock.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <shared_mutex>
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp
index 45cdf646340a9..31643071f1fd6 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_duration.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <shared_mutex>
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp
index 9a3e222e30a06..f0081c2c2dd85 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_time_point.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <shared_mutex>
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp
index a7004151f7bb6..d13eeb860fc34 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.cons/mutex_try_to_lock.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // ALLOW_RETRIES: 2
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp
index d13884e934058..6db3a44c71f51 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.locking/lock.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // ALLOW_RETRIES: 2
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp
index 39cca84ab22e0..50916833b169d 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/mutex.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <shared_mutex>
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp
index 94da2a46257f6..fe8d038d6d1a9 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/op_bool.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <shared_mutex>
 
diff --git a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp
index d7989df91c52b..8ed1fcfb81455 100644
--- a/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp
+++ b/libcxx/test/std/thread/thread.mutex/thread.lock/thread.lock.shared/thread.lock.shared.obs/owns_lock.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: libcpp-has-no-threads
 // UNSUPPORTED: c++03, c++11
-// XFAIL: dylib-has-no-shared_mutex
+
+// dylib support for shared_mutex was added in macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <shared_mutex>
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp
index f75ddafbb42ee..e59ae952e1de2 100644
--- a/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.assign/copy.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp
index 9b1e7dd42b003..f1367c8402fa8 100644
--- a/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.assign/move.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp
index 1c4eceb1353b3..7ccaef832d1d0 100644
--- a/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.assign/value.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp
index 88aa8ac07e72f..88789e146c60f 100644
--- a/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.cons/copy.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp
index ff9e5f7e7b7ec..5d3ff7b46e87a 100644
--- a/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.cons/in_place_type.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp
index a04282c3f374a..7e21a1a9ec584 100644
--- a/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.cons/move.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp
index c5a1afd5ebbeb..47f609225ff83 100644
--- a/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.cons/value.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp
index d5c1416a86bdb..bb6b3345d9457 100644
--- a/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/emplace.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp
index fa94283143360..f2d5f0640202e 100644
--- a/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/reset.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp b/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp
index 7c1ec1f323081..8cce5bfd6254c 100644
--- a/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.class/any.modifiers/swap.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp
index 838c1185cdcfe..175e9f6732041 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_pointer.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp
index 495a928725135..be245998cdbaf 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_reference.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp
index e14ecae5bab35..018e7b817009e 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/any_cast_request_invalid_value_category.fail.cpp
@@ -7,7 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// UNSUPPORTED: dylib-has-no-bad_any_cast
+
+// Throwing bad_any_cast is supported starting in macosx10.13
+// UNSUPPORTED: with_system_cxx_lib=macosx10.12
+// UNSUPPORTED: with_system_cxx_lib=macosx10.11
+// UNSUPPORTED: with_system_cxx_lib=macosx10.10
+// UNSUPPORTED: with_system_cxx_lib=macosx10.9
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp
index 79bbcfd118f2e..8e43be514ade8 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/const_correctness.fail.cpp
@@ -7,7 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// UNSUPPORTED: dylib-has-no-bad_any_cast
+
+// Throwing bad_any_cast is supported starting in macosx10.13
+// UNSUPPORTED: with_system_cxx_lib=macosx10.12
+// UNSUPPORTED: with_system_cxx_lib=macosx10.11
+// UNSUPPORTED: with_system_cxx_lib=macosx10.10
+// UNSUPPORTED: with_system_cxx_lib=macosx10.9
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp
index 2f29278035590..4c314fd69d226 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/any.cast/not_copy_constructible.fail.cpp
@@ -7,7 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// UNSUPPORTED: dylib-has-no-bad_any_cast
+
+// Throwing bad_any_cast is supported starting in macosx10.13
+// UNSUPPORTED: with_system_cxx_lib=macosx10.12
+// UNSUPPORTED: with_system_cxx_lib=macosx10.11
+// UNSUPPORTED: with_system_cxx_lib=macosx10.10
+// UNSUPPORTED: with_system_cxx_lib=macosx10.9
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp
index 6974d7e746b06..9bebbcd6ee0fa 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/make_any.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp b/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp
index fbbca818c1a55..b4ac37edcdd54 100644
--- a/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp
+++ b/libcxx/test/std/utilities/any/any.nonmembers/swap.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_any_cast && !no-exceptions
+// Throwing bad_any_cast is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <any>
 
diff --git a/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp b/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp
index 7f35b5a34fb8d..7c8c9c3966478 100644
--- a/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.bad_optional_access/default.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <optional>
 
diff --git a/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp b/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp
index adb13a29c20dc..f0ea77371ab92 100644
--- a/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.bad_optional_access/derive.pass.cpp
@@ -7,7 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// XFAIL: dylib-has-no-bad_optional_access
+
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <optional>
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp
index c2b74d4eebd23..f48a304e5add7 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/U.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <optional>
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp
index 2838627297dde..af6cd5721762e 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/const_T.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <optional>
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
index c19f6cc9c719a..fcbd26797acba 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/move.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <optional>
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp
index ebb70defb232e..fc19d52e0de2b 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.ctor/rvalue_T.pass.cpp
@@ -8,7 +8,11 @@
 //
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <optional>
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
index 4fcdd96b3e114..f2cabd34e5760 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <optional>
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp
index cff41d3775029..48996026d0eea 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <optional>
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp
index 958d506fe561c..4a64c8ad8c43f 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_const_rvalue.pass.cpp
@@ -8,7 +8,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <optional>
 
diff --git a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp
index ef907cc02ac37..655dbd33664b6 100644
--- a/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.object/optional.object.observe/value_rvalue.pass.cpp
@@ -9,7 +9,11 @@
 // UNSUPPORTED: c++03, c++11, c++14
 // <optional>
 
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // constexpr T& optional<T>::value() &&;
 
diff --git a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp
index d1a0945712471..49678bb68b3d5 100644
--- a/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp
+++ b/libcxx/test/std/utilities/optional/optional.specalg/make_optional.pass.cpp
@@ -7,7 +7,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// XFAIL: dylib-has-no-bad_optional_access && !no-exceptions
+
+// Throwing bad_optional_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <optional>
 //
diff --git a/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp b/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp
index 1e06373c7a39e..61c2cae221049 100644
--- a/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.bad_variant_access/bad_variant_access.pass.cpp
@@ -9,8 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access
-
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12
+// XFAIL: with_system_cxx_lib=macosx10.11
+// XFAIL: with_system_cxx_lib=macosx10.10
+// XFAIL: with_system_cxx_lib=macosx10.9
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp
index 1575bc4ad62d1..2570dde92b5b1 100644
--- a/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.get/get_index.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp b/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp
index 06a3767f56135..2b8e67a116169 100644
--- a/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.get/get_type.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp
index 8659fa1272ad3..f3af26539539e 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/T.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp
index ae0cf0da54cee..3b46850a42664 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/copy.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp
index dcc317cf51901..6d11b50acf0d7 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.assign/move.pass.cpp
@@ -9,8 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
-
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
index 383ebd11a986d..1ac42019f7de2 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/T.pass.cpp
@@ -8,7 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp
index 9f3c47ed54ed1..d70b305714efa 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/copy.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp
index 6857f62c151ff..04b8074a80561 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/default.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp
index 9f1322a104e81..2273a0e1c5976 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp
index b285f97dc2ac9..495a72bbeeb00 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_index_init_list_args.pass.cpp
@@ -8,7 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp
index 88b10bf74f44b..ac1a05a0f9d0b 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp
index 8bf369957dc67..ed464b86a8634 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/in_place_type_init_list_args.pass.cpp
@@ -8,7 +8,12 @@
 //===----------------------------------------------------------------------===//
 
 // UNSUPPORTED: c++03, c++11, c++14
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp
index ab30a417567d7..7157988dafcc2 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.ctor/move.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp
index 529c78d43a6c4..290a05b0223c9 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp
index 78a7f41f5184f..04e1178c4eb68 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_index_init_list_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp
index 458bbc28bdb4e..e4eaffd0c5f19 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp
index 2a4def6087b2f..30259b67d275a 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.mod/emplace_type_init_list_args.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
index 0a3d7b4c91cc7..313ce67632105 100644
--- a/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.variant/variant.swap/swap.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 
diff --git a/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp b/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp
index dce94110cedee..41ce442ae5103 100644
--- a/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp
+++ b/libcxx/test/std/utilities/variant/variant.visit/visit.pass.cpp
@@ -9,7 +9,11 @@
 
 // UNSUPPORTED: c++03, c++11, c++14
 
-// XFAIL: dylib-has-no-bad_variant_access && !no-exceptions
+// Throwing bad_variant_access is supported starting in macosx10.13
+// XFAIL: with_system_cxx_lib=macosx10.12 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.11 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.10 && !no-exceptions
+// XFAIL: with_system_cxx_lib=macosx10.9 && !no-exceptions
 
 // <variant>
 // template <class Visitor, class... Variants>
diff --git a/libcxx/utils/libcxx/test/config.py b/libcxx/utils/libcxx/test/config.py
index 985dc808cb559..d54ee8fa32913 100644
--- a/libcxx/utils/libcxx/test/config.py
+++ b/libcxx/utils/libcxx/test/config.py
@@ -718,27 +718,9 @@ def configure_deployment(self):
         self.lit_config.note(
             "computed target_triple as: %r" % self.config.target_triple)
 
-        # If we're testing a system libc++ as opposed to the upstream LLVM one,
-        # take the version of the system libc++ into account to compute which
-        # features are enabled/disabled. Otherwise, disable availability markup,
+        # If we're testing the upstream LLVM libc++, disable availability markup,
         # which is not relevant for non-shipped flavors of libc++.
-        if self.use_system_cxx_lib:
-            # Dylib support for shared_mutex was added in macosx10.12.
-            if name == 'macosx' and version in ('10.%s' % v for v in range(9, 12)):
-                self.config.available_features.add('dylib-has-no-shared_mutex')
-                self.lit_config.note("shared_mutex is not supported by the deployment target")
-            # Throwing bad_optional_access, bad_variant_access and bad_any_cast is
-            # supported starting in macosx10.13.
-            if name == 'macosx' and version in ('10.%s' % v for v in range(9, 13)):
-                self.config.available_features.add('dylib-has-no-bad_optional_access')
-                self.lit_config.note("throwing bad_optional_access is not supported by the deployment target")
-
-                self.config.available_features.add('dylib-has-no-bad_variant_access')
-                self.lit_config.note("throwing bad_variant_access is not supported by the deployment target")
-
-                self.config.available_features.add('dylib-has-no-bad_any_cast')
-                self.lit_config.note("throwing bad_any_cast is not supported by the deployment target")
-        else:
+        if not self.use_system_cxx_lib:
             self.cxx.compile_flags += ['-D_LIBCPP_DISABLE_AVAILABILITY']
 
     def configure_env(self):

From fc9865c4a76ed3bbec77da34529e01e64006fed3 Mon Sep 17 00:00:00 2001
From: Louis Dionne <ldionne@apple.com>
Date: Thu, 16 Jul 2020 15:41:55 -0400
Subject: [PATCH 547/771] [libc++abi] Temporarily disable test on Apple to fix
 the CI

This test has been failing on some SDKs for a long time because we lack
a proper way of identifying the SDK version in Lit. Until that is possible,
mark the test as unsupported on Apple to restore the CI.
---
 libcxxabi/test/test_exception_address_alignment.pass.cpp | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/libcxxabi/test/test_exception_address_alignment.pass.cpp b/libcxxabi/test/test_exception_address_alignment.pass.cpp
index 5847f23caa000..92652f464b8bd 100644
--- a/libcxxabi/test/test_exception_address_alignment.pass.cpp
+++ b/libcxxabi/test/test_exception_address_alignment.pass.cpp
@@ -11,11 +11,10 @@
 
 // The <unwind.h> header provided in the SDK of older Xcodes used to provide
 // an incorrectly aligned _Unwind_Exception type. That causes these tests to
-// fail with those SDKs. Note that we use the AppleClang version as a cheap
-// proxy for the SDK version.
-// XFAIL: apple-clang-11 && libcxxabi-has-system-unwinder
-// XFAIL: apple-clang-10 && libcxxabi-has-system-unwinder
-// XFAIL: apple-clang-9 && libcxxabi-has-system-unwinder
+// fail with those SDKs.
+// FIXME: We mark the test as unsupported on Apple until we have a Lit feature
+//        representing the SDK version.
+// UNSUPPORTED: darwin
 
 // Test that the address of the exception object is properly aligned as required
 // by the relevant ABI

From 4fd91b0f946f49370a3ab33c480a807a3f48b247 Mon Sep 17 00:00:00 2001
From: Nadav Rotem <nadav256@gmail.com>
Date: Tue, 14 Jul 2020 13:59:36 -0700
Subject: [PATCH 548/771] Remove an unused variable in Clang.

https://reviews.llvm.org/D83788
---
 clang/lib/Sema/SemaExpr.cpp | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index ccae79636f323..8fd0e2892b148 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -15543,7 +15543,6 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
   bool CheckInferredResultType = false;
   bool isInvalid = false;
   unsigned DiagKind = 0;
-  FixItHint Hint;
   ConversionFixItGenerator ConvHints;
   bool MayHaveConvFixit = false;
   bool MayHaveFunctionDiff = false;
@@ -15596,10 +15595,9 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
     }
     CheckInferredResultType = DstType->isObjCObjectPointerType() &&
       SrcType->isObjCObjectPointerType();
-    if (Hint.isNull() && !CheckInferredResultType) {
+    if (!CheckInferredResultType) {
       ConvHints.tryToFixConversion(SrcExpr, SrcType, DstType, *this);
-    }
-    else if (CheckInferredResultType) {
+    } else if (CheckInferredResultType) {
       SrcType = SrcType.getUnqualifiedType();
       DstType = DstType.getUnqualifiedType();
     }
@@ -15768,13 +15766,11 @@ bool Sema::DiagnoseAssignmentResult(AssignConvertType ConvTy,
     FDiag << FirstType << SecondType << Action << SrcExpr->getSourceRange();
 
   // If we can fix the conversion, suggest the FixIts.
-  assert(ConvHints.isNull() || Hint.isNull());
   if (!ConvHints.isNull()) {
     for (FixItHint &H : ConvHints.Hints)
       FDiag << H;
-  } else {
-    FDiag << Hint;
   }
+
   if (MayHaveConvFixit) { FDiag << (unsigned) (ConvHints.Kind); }
 
   if (MayHaveFunctionDiff)

From ad171d24b92d8cf1a9e64567af74cea353b5bf5b Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 16 Jul 2020 11:20:57 -0700
Subject: [PATCH 549/771] [X86] Change the tuning settings for pentium4 to be
 more modern since its the default 32-bit cpu in clang

Alternative to D83897. I believe the big change here is that I removed slow unaligned memory 16

Down side that it may adversely effect tuning if someone explicitly targets -march=pentium4 and expects pentium4 tuned code. Of course pentium4 is so old our default behavior with the previous settings may not have been the best either.

Reviewed By: echristo, RKSimon

Differential Revision: https://reviews.llvm.org/D83913
---
 llvm/lib/Target/X86/X86.td                    | 14 +++++++++--
 .../CodeGen/X86/cfguard-x86-vectorcall.ll     | 24 +++++++++----------
 llvm/test/CodeGen/X86/slow-unaligned-mem.ll   |  6 +++--
 llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll   |  8 +++----
 llvm/test/DebugInfo/COFF/types-array.ll       |  4 ++--
 5 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index dc1ff72add491..10d3007e5839b 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1072,10 +1072,20 @@ def : ProcessorModel<"pentium-m", GenericPostRAModel,
                       FeatureCMOV, FeatureInsertVZEROUPPER]>;
 
 foreach P = ["pentium4", "pentium4m"] in {
+//  def : ProcessorModel<P, GenericPostRAModel,
+//                       [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+//                        FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
+//                        FeatureCMOV, FeatureInsertVZEROUPPER]>;
+
+  // Since 'pentium4' is the default 32-bit CPU on Linux and Windows,
+  // give it more modern tunings.
+  // FIXME: This wouldn't be needed if we supported mtune.
   def : ProcessorModel<P, GenericPostRAModel,
-                       [FeatureX87, FeatureSlowUAMem16, FeatureCMPXCHG8B,
+                       [FeatureX87, FeatureCMPXCHG8B,
                         FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
-                        FeatureCMOV, FeatureInsertVZEROUPPER]>;
+                        FeatureCMOV, FeatureInsertVZEROUPPER,
+                        FeatureSlow3OpsLEA, FeatureSlowDivide64,
+                        FeatureSlowIncDec, FeatureMacroFusion]>;
 }
 
 // Intel Quark.
diff --git a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
index 25e3691913c8c..380c18fbf5c5b 100644
--- a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
+++ b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
@@ -16,19 +16,19 @@ entry:
   ; X32-LABEL: func_cf_vector_x86
   ; X32: 	     movl 12(%ebp), %eax
   ; X32: 	     movl 8(%ebp), %ecx
-  ; X32: 	     movsd 24(%eax), %xmm4         # xmm4 = mem[0],zero
-  ; X32: 	     movsd %xmm4, 24(%esp)
-  ; X32: 	     movsd 16(%eax), %xmm5         # xmm5 = mem[0],zero
-  ; X32: 	     movsd %xmm5, 16(%esp)
-  ; X32: 	     movsd (%eax), %xmm6           # xmm6 = mem[0],zero
-  ; X32: 	     movsd 8(%eax), %xmm7          # xmm7 = mem[0],zero
-  ; X32: 	     movsd %xmm7, 8(%esp)
-  ; X32: 	     movsd %xmm6, (%esp)
+  ; X32: 	     movups	(%eax), %xmm0
+  ; X32: 	     movups	16(%eax), %xmm1
+  ; X32: 	     movaps	%xmm0, (%esp)
+  ; X32: 	     movaps	%xmm1, 16(%esp)
+  ; X32: 	     movsd	(%esp), %xmm4
+  ; X32: 	     movsd	8(%esp), %xmm5
+  ; X32: 	     movsd	16(%esp), %xmm6
+  ; X32: 	     movsd	24(%esp), %xmm7
   ; X32: 	     calll *___guard_check_icall_fptr
-  ; X32: 	     movaps %xmm6, %xmm0
-  ; X32: 	     movaps %xmm7, %xmm1
-  ; X32: 	     movaps %xmm5, %xmm2
-  ; X32: 	     movaps %xmm4, %xmm3
+  ; X32: 	     movaps %xmm4, %xmm0
+  ; X32: 	     movaps %xmm5, %xmm1
+  ; X32: 	     movaps %xmm6, %xmm2
+  ; X32: 	     movaps %xmm7, %xmm3
   ; X32: 	     calll  *%ecx
 }
 attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
diff --git a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
index f2c7c2fa4a564..295fdfb5a2617 100644
--- a/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
+++ b/llvm/test/CodeGen/X86/slow-unaligned-mem.ll
@@ -3,8 +3,6 @@
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3      2>&1 | FileCheck %s --check-prefix=SLOW
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium3m     2>&1 | FileCheck %s --check-prefix=SLOW
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium-m     2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4      2>&1 | FileCheck %s --check-prefix=SLOW
-; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m     2>&1 | FileCheck %s --check-prefix=SLOW
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=yonah         2>&1 | FileCheck %s --check-prefix=SLOW
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=prescott      2>&1 | FileCheck %s --check-prefix=SLOW
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nocona        2>&1 | FileCheck %s --check-prefix=SLOW
@@ -14,6 +12,10 @@
 
 ; Intel chips with fast unaligned memory accesses
 
+; Marked fast because this is the default 32-bit mode CPU in clang.
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4      2>&1 | FileCheck %s --check-prefix=FAST
+; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=pentium4m     2>&1 | FileCheck %s --check-prefix=FAST
+
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=silvermont     2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=nehalem        2>&1 | FileCheck %s --check-prefix=FAST
 ; RUN: llc < %s -mtriple=i386-unknown-unknown -mcpu=westmere       2>&1 | FileCheck %s --check-prefix=FAST
diff --git a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
index d42dcf0dd1742..23a1f56fdf409 100644
--- a/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-shrink-wrap.ll
@@ -40,7 +40,7 @@
 ; OBJ: SubSectionType: FrameData (0xF5)
 ; OBJ:    FrameData {
 ; OBJ:      RvaStart: 0x0
-; OBJ:      CodeSize: 0x34
+; OBJ:      CodeSize: 0x36
 ; OBJ:      PrologSize: 0x9
 ; OBJ:      FrameFunc [
 ; OBJ-NEXT:   $T0 .raSearch =
@@ -50,7 +50,7 @@
 ; OBJ:    }
 ; OBJ:    FrameData {
 ; OBJ:      RvaStart: 0x7
-; OBJ:      CodeSize: 0x2D
+; OBJ:      CodeSize: 0x2F
 ; OBJ:      PrologSize: 0x2
 ; OBJ:      FrameFunc [
 ; OBJ-NEXT:   $T0 .raSearch =
@@ -61,7 +61,7 @@
 ; OBJ:    }
 ; OBJ:    FrameData {
 ; OBJ:      RvaStart: 0x8
-; OBJ:      CodeSize: 0x2C
+; OBJ:      CodeSize: 0x2E
 ; OBJ:      PrologSize: 0x1
 ; OBJ:      FrameFunc [
 ; OBJ-NEXT:   $T0 .raSearch =
@@ -73,7 +73,7 @@
 ; OBJ:    }
 ; OBJ:    FrameData {
 ; OBJ:      RvaStart: 0x9
-; OBJ:      CodeSize: 0x2B
+; OBJ:      CodeSize: 0x2D
 ; OBJ:      PrologSize: 0x0
 ; OBJ:      FrameFunc [
 ; OBJ-NEXT:   $T0 .raSearch =
diff --git a/llvm/test/DebugInfo/COFF/types-array.ll b/llvm/test/DebugInfo/COFF/types-array.ll
index 2962f970aca14..19ddcf9ffe2c9 100644
--- a/llvm/test/DebugInfo/COFF/types-array.ll
+++ b/llvm/test/DebugInfo/COFF/types-array.ll
@@ -51,7 +51,7 @@
 ; CHECK:       PtrParent: 0x0
 ; CHECK:       PtrEnd: 0x0
 ; CHECK:       PtrNext: 0x0
-; CHECK:       CodeSize: 0x39
+; CHECK:       CodeSize: 0x2A
 ; CHECK:       DbgStart: 0x0
 ; CHECK:       DbgEnd: 0x0
 ; CHECK:       FunctionType: f (0x1002)
@@ -73,7 +73,7 @@
 ; CHECK:       LocalVariableAddrRange {
 ; CHECK:         OffsetStart: .text+0x6
 ; CHECK:         ISectStart: 0x0
-; CHECK:         Range: 0x33
+; CHECK:         Range: 0x24
 ; CHECK:       }
 ; CHECK:     }
 ; CHECK:     ProcEnd {

From 6c5b635e95657493dba472a3f124a966a19f5920 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Thu, 16 Jul 2020 15:44:44 -0400
Subject: [PATCH 550/771] AMDGPU: Add a few more missing test for AGPR tuple
 copying

---
 llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir | 116 ++++++++++++++++++++++
 1 file changed, 116 insertions(+)

diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
index e45ab97d46a89..667a22ea0c1da 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-copy.mir
@@ -6,6 +6,7 @@
     define amdgpu_kernel void @a2_to_v2() #0 { ret void }
     define amdgpu_kernel void @a3_to_v3() #0 { ret void }
     define amdgpu_kernel void @a4_to_v4() #0 { ret void }
+    define amdgpu_kernel void @a8_to_v8() #0 { ret void }
     define amdgpu_kernel void @a16_to_v16() #0 { ret void }
 
     define amdgpu_kernel void @v_to_a() #0 { ret void }
@@ -21,12 +22,14 @@
     define amdgpu_kernel void @s4_to_a4() #0 { ret void }
     define amdgpu_kernel void @s6_to_a6() #0 { ret void }
     define amdgpu_kernel void @s8_to_a8() #0 { ret void }
+    define amdgpu_kernel void @s16_to_a16() #0 { ret void }
 
     define amdgpu_kernel void @a_to_a() #0 { ret void }
     define amdgpu_kernel void @a2_to_a2() #0 { ret void }
     define amdgpu_kernel void @a3_to_a3() #0 { ret void }
     define amdgpu_kernel void @a4_to_a4() #0 { ret void }
     define amdgpu_kernel void @a8_to_a8() #0 { ret void }
+    define amdgpu_kernel void @a16_to_a16() #0 { ret void }
 
     define amdgpu_kernel void @a_to_a_spill() #0 { ret void }
     attributes #0 = { "amdgpu-flat-work-group-size"="1,256" }
@@ -96,6 +99,28 @@ body:             |
     S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3
 ...
 
+---
+name:            a8_to_v8
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+
+    ; GCN-LABEL: name: a8_to_v8
+    ; GCN: liveins: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GCN: $vgpr3 = V_ACCVGPR_READ_B32 $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GCN: $vgpr4 = V_ACCVGPR_READ_B32 $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GCN: $vgpr5 = V_ACCVGPR_READ_B32 $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GCN: $vgpr6 = V_ACCVGPR_READ_B32 $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
+    ; GCN: $vgpr7 = V_ACCVGPR_READ_B32 $agpr7, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+    $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $exec
+    S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
+...
+
 ---
 name:            a16_to_v16
 tracksRegLiveness: true
@@ -363,6 +388,51 @@ body:             |
     S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
 ...
 
+---
+name:            s16_to_a16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN-LABEL: name: s16_to_a16
+    ; GCN: liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr0, implicit $exec
+    ; GCN: $agpr0 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr1, implicit $exec
+    ; GCN: $agpr1 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
+    ; GCN: $agpr2 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr3, implicit $exec
+    ; GCN: $agpr3 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
+    ; GCN: $agpr4 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec
+    ; GCN: $agpr5 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr6, implicit $exec
+    ; GCN: $agpr6 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr7, implicit $exec
+    ; GCN: $agpr7 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
+    ; GCN: $agpr8 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr9, implicit $exec
+    ; GCN: $agpr9 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr10, implicit $exec
+    ; GCN: $agpr10 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr11, implicit $exec
+    ; GCN: $agpr11 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr12, implicit $exec
+    ; GCN: $agpr12 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr1 = V_MOV_B32_e32 killed $sgpr13, implicit $exec
+    ; GCN: $agpr13 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr2 = V_MOV_B32_e32 killed $sgpr14, implicit $exec
+    ; GCN: $agpr14 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr0 = V_MOV_B32_e32 killed $sgpr15, implicit $exec
+    ; GCN: $agpr15 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+    $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = COPY killed $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15, implicit $exec
+    S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
+...
+
 ---
 name:            a_to_a
 tracksRegLiveness: true
@@ -464,6 +534,52 @@ body:             |
     S_ENDPGM 0, implicit $agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
 ...
 
+---
+name:            a16_to_a16
+tracksRegLiveness: true
+body:             |
+  bb.0:
+
+    ; GCN-LABEL: name: a16_to_a16
+    ; GCN: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr15, implicit $exec
+    ; GCN: $agpr31 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr14, implicit $exec
+    ; GCN: $agpr30 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr13, implicit $exec
+    ; GCN: $agpr29 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr12, implicit $exec
+    ; GCN: $agpr28 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr11, implicit $exec
+    ; GCN: $agpr27 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr10, implicit $exec
+    ; GCN: $agpr26 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr9, implicit $exec
+    ; GCN: $agpr25 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr8, implicit $exec
+    ; GCN: $agpr24 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr7, implicit $exec
+    ; GCN: $agpr23 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr6, implicit $exec
+    ; GCN: $agpr22 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr5, implicit $exec
+    ; GCN: $agpr21 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr4, implicit $exec
+    ; GCN: $agpr20 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr3, implicit $exec
+    ; GCN: $agpr19 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec
+    ; GCN: $vgpr0 = V_ACCVGPR_READ_B32 killed $agpr2, implicit $exec
+    ; GCN: $agpr18 = V_ACCVGPR_WRITE_B32 killed $vgpr0, implicit $exec
+    ; GCN: $vgpr2 = V_ACCVGPR_READ_B32 killed $agpr1, implicit $exec
+    ; GCN: $agpr17 = V_ACCVGPR_WRITE_B32 killed $vgpr2, implicit $exec
+    ; GCN: $vgpr1 = V_ACCVGPR_READ_B32 killed $agpr0, implicit $exec
+    ; GCN: $agpr16 = V_ACCVGPR_WRITE_B32 killed $vgpr1, implicit $exec, implicit $exec
+    ; GCN: S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+    $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
+    $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = COPY killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $exec
+    S_ENDPGM 0, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
+...
+
 # Using last vgpr255 will raise error about absence of emergency spill slot.
 
 ---

From a2a3adcc665b794857156216dcd916db1017153b Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Wed, 15 Jul 2020 20:42:17 -0400
Subject: [PATCH 551/771] Fix incorrect file path in documentation

---
 llvm/docs/ExtendingLLVM.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/docs/ExtendingLLVM.rst b/llvm/docs/ExtendingLLVM.rst
index 53daf5c8427a0..f2eb374b4ef94 100644
--- a/llvm/docs/ExtendingLLVM.rst
+++ b/llvm/docs/ExtendingLLVM.rst
@@ -147,7 +147,7 @@ complicated behavior in a single node (rotate).
    code you wrote in ``LegalizeOp`` above to decompose your new node into other
    legal nodes for this target.
 
-#. ``lib/Target/TargetSelectionDAG.td``:
+#. ``include/llvm/Target/TargetSelectionDAG.td``:
 
    Most current targets supported by LLVM generate code using the DAGToDAG
    method, where SelectionDAG nodes are pattern matched to target-specific

From 515bc8c1554f03515565878ea3d35cffdc6fd195 Mon Sep 17 00:00:00 2001
From: serge-sans-paille <sguelton@redhat.com>
Date: Wed, 15 Jul 2020 09:31:13 +0200
Subject: [PATCH 552/771] Harmonize Python shebang

Differential Revision: https://reviews.llvm.org/D83857
---
 clang/utils/clangdiag.py                                        | 2 +-
 clang/utils/modfuzz.py                                          | 2 +-
 compiler-rt/lib/sanitizer_common/scripts/litlint_test.py        | 2 +-
 .../test/sanitizer_common/android_commands/android_compile.py   | 2 +-
 .../test/sanitizer_common/android_commands/android_run.py       | 2 +-
 .../test/sanitizer_common/ios_commands/iossim_compile.py        | 2 +-
 compiler-rt/test/sanitizer_common/ios_commands/iossim_env.py    | 2 +-
 .../test/sanitizer_common/ios_commands/iossim_prepare.py        | 2 +-
 compiler-rt/test/sanitizer_common/ios_commands/iossim_run.py    | 2 +-
 debuginfo-tests/dexter/dexter.py                                | 2 +-
 debuginfo-tests/llgdb-tests/llgdb.py                            | 2 +-
 libc/AOR_v20.02/math/tools/plot.py                              | 2 +-
 libcxx/utils/google-benchmark/mingw.py                          | 2 +-
 lldb/examples/darwin/heap_find/heap.py                          | 2 +-
 lldb/examples/python/armv7_cortex_m_target_defintion.py         | 2 +-
 lldb/examples/python/bsd.py                                     | 2 +-
 lldb/examples/python/cmdtemplate.py                             | 2 +-
 lldb/examples/python/crashlog.py                                | 2 +-
 lldb/examples/python/delta.py                                   | 2 +-
 lldb/examples/python/disasm-stress-test.py                      | 2 +-
 lldb/examples/python/disasm.py                                  | 2 +-
 lldb/examples/python/file_extract.py                            | 2 +-
 lldb/examples/python/gdbremote.py                               | 2 +-
 lldb/examples/python/globals.py                                 | 2 +-
 lldb/examples/python/lldb_module_utils.py                       | 2 +-
 lldb/examples/python/lldbtk.py                                  | 2 +-
 lldb/examples/python/mach_o.py                                  | 2 +-
 lldb/examples/python/memory.py                                  | 2 +-
 lldb/examples/python/operating_system.py                        | 2 +-
 lldb/examples/python/performance.py                             | 2 +-
 lldb/examples/python/process_events.py                          | 2 +-
 lldb/examples/python/sbvalue.py                                 | 2 +-
 lldb/examples/python/shadow.py                                  | 2 +-
 lldb/examples/python/sources.py                                 | 2 +-
 lldb/examples/python/stacks.py                                  | 2 +-
 lldb/examples/python/symbolication.py                           | 2 +-
 lldb/examples/python/types.py                                   | 2 +-
 lldb/examples/python/x86_64_linux_target_definition.py          | 2 +-
 lldb/examples/python/x86_64_qemu_target_definition.py           | 2 +-
 lldb/examples/python/x86_64_target_definition.py                | 2 +-
 lldb/scripts/analyze-project-deps.py                            | 2 +-
 lldb/scripts/reproducer-replay.py                               | 2 +-
 .../plugins/python_os_plugin/operating_system.py                | 2 +-
 .../plugins/python_os_plugin/operating_system2.py               | 2 +-
 .../stepping_plugin_threads/operating_system.py                 | 2 +-
 lldb/test/Shell/helper/build.py                                 | 2 +-
 lldb/third_party/Python/module/progress/progress.py             | 2 +-
 llvm/utils/DSAclean.py                                          | 2 +-
 llvm/utils/DSAextract.py                                        | 2 +-
 llvm/utils/benchmark/mingw.py                                   | 2 +-
 llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py        | 2 +-
 llvm/utils/lint/common_lint.py                                  | 2 +-
 llvm/utils/lint/cpp_lint.py                                     | 2 +-
 llvm/utils/lint/generic_lint.py                                 | 2 +-
 llvm/utils/schedcover.py                                        | 2 +-
 llvm/utils/testgen/mc-bundling-x86-gen.py                       | 2 +-
 openmp/runtime/tools/summarizeStats.py                          | 2 +-
 polly/test/update_check.py                                      | 2 +-
 polly/utils/jscop2cloog.py                                      | 2 +-
 polly/utils/pyscop/jscop2iscc.py                                | 2 +-
 60 files changed, 60 insertions(+), 60 deletions(-)

diff --git a/clang/utils/clangdiag.py b/clang/utils/clangdiag.py
index 6baf65a8761c3..4de8c570df7f5 100755
--- a/clang/utils/clangdiag.py
+++ b/clang/utils/clangdiag.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/clang/utils/modfuzz.py b/clang/utils/modfuzz.py
index 61ca3272aca56..84707f471ada1 100644
--- a/clang/utils/modfuzz.py
+++ b/clang/utils/modfuzz.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#!/usr/bin/env python
 
 # To use:
 #  1) Update the 'decls' list below with your fuzzing configuration.
diff --git a/compiler-rt/lib/sanitizer_common/scripts/litlint_test.py b/compiler-rt/lib/sanitizer_common/scripts/litlint_test.py
index 3ce482d704442..30c9f16efed54 100755
--- a/compiler-rt/lib/sanitizer_common/scripts/litlint_test.py
+++ b/compiler-rt/lib/sanitizer_common/scripts/litlint_test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # Tests for litlint.py
 #
diff --git a/compiler-rt/test/sanitizer_common/android_commands/android_compile.py b/compiler-rt/test/sanitizer_common/android_commands/android_compile.py
index 4b880886b0c1e..a57bc311bd522 100755
--- a/compiler-rt/test/sanitizer_common/android_commands/android_compile.py
+++ b/compiler-rt/test/sanitizer_common/android_commands/android_compile.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import os, sys, subprocess
 from android_common import *
diff --git a/compiler-rt/test/sanitizer_common/android_commands/android_run.py b/compiler-rt/test/sanitizer_common/android_commands/android_run.py
index 8a97aa5f7b1b3..41a587cb404c5 100755
--- a/compiler-rt/test/sanitizer_common/android_commands/android_run.py
+++ b/compiler-rt/test/sanitizer_common/android_commands/android_run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import os, signal, sys, subprocess, tempfile
 from android_common import *
diff --git a/compiler-rt/test/sanitizer_common/ios_commands/iossim_compile.py b/compiler-rt/test/sanitizer_common/ios_commands/iossim_compile.py
index 8fa480ed5f601..e4c50d7949a29 100755
--- a/compiler-rt/test/sanitizer_common/ios_commands/iossim_compile.py
+++ b/compiler-rt/test/sanitizer_common/ios_commands/iossim_compile.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import os, sys, subprocess
 
diff --git a/compiler-rt/test/sanitizer_common/ios_commands/iossim_env.py b/compiler-rt/test/sanitizer_common/ios_commands/iossim_env.py
index 28f626900f0bf..78d197483df1b 100755
--- a/compiler-rt/test/sanitizer_common/ios_commands/iossim_env.py
+++ b/compiler-rt/test/sanitizer_common/ios_commands/iossim_env.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import os, sys, subprocess
 
diff --git a/compiler-rt/test/sanitizer_common/ios_commands/iossim_prepare.py b/compiler-rt/test/sanitizer_common/ios_commands/iossim_prepare.py
index ad1b922778757..cbcc6029fc1a2 100755
--- a/compiler-rt/test/sanitizer_common/ios_commands/iossim_prepare.py
+++ b/compiler-rt/test/sanitizer_common/ios_commands/iossim_prepare.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import json
 
diff --git a/compiler-rt/test/sanitizer_common/ios_commands/iossim_run.py b/compiler-rt/test/sanitizer_common/ios_commands/iossim_run.py
index 8af3eec441f2f..5ad15af3a33e1 100755
--- a/compiler-rt/test/sanitizer_common/ios_commands/iossim_run.py
+++ b/compiler-rt/test/sanitizer_common/ios_commands/iossim_run.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import glob, os, pipes, sys, subprocess
 
diff --git a/debuginfo-tests/dexter/dexter.py b/debuginfo-tests/dexter/dexter.py
index 8190a4b4e22ed..49ba85db43d9c 100755
--- a/debuginfo-tests/dexter/dexter.py
+++ b/debuginfo-tests/dexter/dexter.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 # DExTer : Debugging Experience Tester
 # ~~~~~~   ~         ~~         ~   ~~
 #
diff --git a/debuginfo-tests/llgdb-tests/llgdb.py b/debuginfo-tests/llgdb-tests/llgdb.py
index 5f14497f628f1..83b5ec686de73 100755
--- a/debuginfo-tests/llgdb-tests/llgdb.py
+++ b/debuginfo-tests/llgdb-tests/llgdb.py
@@ -1,4 +1,4 @@
-#!/bin/env python
+#!/usr/bin/env python
 """
 A gdb-compatible frontend for lldb that implements just enough
 commands to run the tests in the debuginfo-tests repository with lldb.
diff --git a/libc/AOR_v20.02/math/tools/plot.py b/libc/AOR_v20.02/math/tools/plot.py
index 611c99a9e69f2..8c7da5a8ffd72 100755
--- a/libc/AOR_v20.02/math/tools/plot.py
+++ b/libc/AOR_v20.02/math/tools/plot.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # ULP error plot tool.
 #
diff --git a/libcxx/utils/google-benchmark/mingw.py b/libcxx/utils/google-benchmark/mingw.py
index 706ad559db9c7..0b69692ca2a40 100644
--- a/libcxx/utils/google-benchmark/mingw.py
+++ b/libcxx/utils/google-benchmark/mingw.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#!/usr/bin/env python
 # encoding: utf-8
 
 import argparse
diff --git a/lldb/examples/darwin/heap_find/heap.py b/lldb/examples/darwin/heap_find/heap.py
index a8bc377ffe4cc..8fb2a8c95927e 100644
--- a/lldb/examples/darwin/heap_find/heap.py
+++ b/lldb/examples/darwin/heap_find/heap.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # This module is designed to live inside the "lldb" python package
diff --git a/lldb/examples/python/armv7_cortex_m_target_defintion.py b/lldb/examples/python/armv7_cortex_m_target_defintion.py
index 342de89c462fc..e8f39ccb09e8b 100755
--- a/lldb/examples/python/armv7_cortex_m_target_defintion.py
+++ b/lldb/examples/python/armv7_cortex_m_target_defintion.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #===-- armv7_cortex_m_target_definition.py.py ------------------*- C++ -*-===//
 #
 #                     The LLVM Compiler Infrastructure
diff --git a/lldb/examples/python/bsd.py b/lldb/examples/python/bsd.py
index c66226e3710c9..fdf5455fe69e3 100755
--- a/lldb/examples/python/bsd.py
+++ b/lldb/examples/python/bsd.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from __future__ import print_function
 
 import cmd
diff --git a/lldb/examples/python/cmdtemplate.py b/lldb/examples/python/cmdtemplate.py
index 97af943e6de2e..aa99e4c03beb8 100644
--- a/lldb/examples/python/cmdtemplate.py
+++ b/lldb/examples/python/cmdtemplate.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # ---------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/lldb/examples/python/crashlog.py b/lldb/examples/python/crashlog.py
index 1c1602b0131ea..c9494d708d22f 100755
--- a/lldb/examples/python/crashlog.py
+++ b/lldb/examples/python/crashlog.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/lldb/examples/python/delta.py b/lldb/examples/python/delta.py
index 1a1f060c5e5e4..0176fb0b3345f 100755
--- a/lldb/examples/python/delta.py
+++ b/lldb/examples/python/delta.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # This module will enable GDB remote packet logging when the
diff --git a/lldb/examples/python/disasm-stress-test.py b/lldb/examples/python/disasm-stress-test.py
index 5d0ce96fbd685..241a73acd4ea9 100755
--- a/lldb/examples/python/disasm-stress-test.py
+++ b/lldb/examples/python/disasm-stress-test.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import argparse
 import datetime
diff --git a/lldb/examples/python/disasm.py b/lldb/examples/python/disasm.py
index 819a0522388e6..20b441835f1a4 100755
--- a/lldb/examples/python/disasm.py
+++ b/lldb/examples/python/disasm.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/lldb/examples/python/file_extract.py b/lldb/examples/python/file_extract.py
index decbba0b2997c..7278ce54a8a74 100755
--- a/lldb/examples/python/file_extract.py
+++ b/lldb/examples/python/file_extract.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#!/usr/bin/env python
 
 import string
 import struct
diff --git a/lldb/examples/python/gdbremote.py b/lldb/examples/python/gdbremote.py
index 52601c09d3bee..804977259de77 100755
--- a/lldb/examples/python/gdbremote.py
+++ b/lldb/examples/python/gdbremote.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # This module will enable GDB remote packet logging when the
diff --git a/lldb/examples/python/globals.py b/lldb/examples/python/globals.py
index 3e77344164223..96645afef00fb 100755
--- a/lldb/examples/python/globals.py
+++ b/lldb/examples/python/globals.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # For the shells csh, tcsh:
diff --git a/lldb/examples/python/lldb_module_utils.py b/lldb/examples/python/lldb_module_utils.py
index 2b2fea9d4f15a..c0ac5751ce285 100644
--- a/lldb/examples/python/lldb_module_utils.py
+++ b/lldb/examples/python/lldb_module_utils.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from __future__ import print_function
 
 import lldb
diff --git a/lldb/examples/python/lldbtk.py b/lldb/examples/python/lldbtk.py
index 3734b14f95b24..a6a420ba1bd1f 100644
--- a/lldb/examples/python/lldbtk.py
+++ b/lldb/examples/python/lldbtk.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from __future__ import print_function
 
 import lldb
diff --git a/lldb/examples/python/mach_o.py b/lldb/examples/python/mach_o.py
index 1780bc3e32094..03ab73b3be9de 100755
--- a/lldb/examples/python/mach_o.py
+++ b/lldb/examples/python/mach_o.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from __future__ import print_function
 
 import cmd
diff --git a/lldb/examples/python/memory.py b/lldb/examples/python/memory.py
index 9f8f7e384c42c..26703462c2cde 100755
--- a/lldb/examples/python/memory.py
+++ b/lldb/examples/python/memory.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/lldb/examples/python/operating_system.py b/lldb/examples/python/operating_system.py
index bfa13f0568ea9..f4a5d385320e0 100644
--- a/lldb/examples/python/operating_system.py
+++ b/lldb/examples/python/operating_system.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import lldb
 import struct
diff --git a/lldb/examples/python/performance.py b/lldb/examples/python/performance.py
index aec6b307f8767..f90857808fc0c 100755
--- a/lldb/examples/python/performance.py
+++ b/lldb/examples/python/performance.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/lldb/examples/python/process_events.py b/lldb/examples/python/process_events.py
index 6039ebf00200b..3a1391c4476f2 100755
--- a/lldb/examples/python/process_events.py
+++ b/lldb/examples/python/process_events.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/lldb/examples/python/sbvalue.py b/lldb/examples/python/sbvalue.py
index 6e512998da2f3..cc7188a8ea08d 100755
--- a/lldb/examples/python/sbvalue.py
+++ b/lldb/examples/python/sbvalue.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import lldb
 
diff --git a/lldb/examples/python/shadow.py b/lldb/examples/python/shadow.py
index b14467c52c9a5..73534dce53527 100644
--- a/lldb/examples/python/shadow.py
+++ b/lldb/examples/python/shadow.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from __future__ import print_function
 
 import lldb
diff --git a/lldb/examples/python/sources.py b/lldb/examples/python/sources.py
index 9684f7f6e7862..38b3926768b24 100644
--- a/lldb/examples/python/sources.py
+++ b/lldb/examples/python/sources.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from __future__ import print_function
 
 import lldb
diff --git a/lldb/examples/python/stacks.py b/lldb/examples/python/stacks.py
index a676b82d097d5..41729ec67443f 100755
--- a/lldb/examples/python/stacks.py
+++ b/lldb/examples/python/stacks.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 from __future__ import print_function
 import lldb
 import optparse
diff --git a/lldb/examples/python/symbolication.py b/lldb/examples/python/symbolication.py
index a6daa802cda20..7b29489bd0967 100755
--- a/lldb/examples/python/symbolication.py
+++ b/lldb/examples/python/symbolication.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/lldb/examples/python/types.py b/lldb/examples/python/types.py
index a401e373f4814..513a03b2600ad 100755
--- a/lldb/examples/python/types.py
+++ b/lldb/examples/python/types.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 #----------------------------------------------------------------------
 # Be sure to add the python path that points to the LLDB shared library.
diff --git a/lldb/examples/python/x86_64_linux_target_definition.py b/lldb/examples/python/x86_64_linux_target_definition.py
index 13bde540f7e2f..a39b9bed134f8 100644
--- a/lldb/examples/python/x86_64_linux_target_definition.py
+++ b/lldb/examples/python/x86_64_linux_target_definition.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #===-- x86_64_linux_target_definition.py -----------------------------*- C++ -*-===//
 #
 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
diff --git a/lldb/examples/python/x86_64_qemu_target_definition.py b/lldb/examples/python/x86_64_qemu_target_definition.py
index aa081c1611717..f0bed692baacd 100644
--- a/lldb/examples/python/x86_64_qemu_target_definition.py
+++ b/lldb/examples/python/x86_64_qemu_target_definition.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #===-- x86_64_qemu_target_definition.py -----------------------------*- C++ -*-===//
 #
 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
diff --git a/lldb/examples/python/x86_64_target_definition.py b/lldb/examples/python/x86_64_target_definition.py
index 3f7f60dec9b0a..533cafca4007f 100644
--- a/lldb/examples/python/x86_64_target_definition.py
+++ b/lldb/examples/python/x86_64_target_definition.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #===-- x86_64_target_definition.py -----------------------------*- C++ -*-===//
 #
 # Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
diff --git a/lldb/scripts/analyze-project-deps.py b/lldb/scripts/analyze-project-deps.py
index a120260abfe2c..89da3dc9df7b3 100755
--- a/lldb/scripts/analyze-project-deps.py
+++ b/lldb/scripts/analyze-project-deps.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#!/usr/bin/env python
 
 import argparse
 import itertools
diff --git a/lldb/scripts/reproducer-replay.py b/lldb/scripts/reproducer-replay.py
index 4dd3470583428..40d7cebca05d4 100755
--- a/lldb/scripts/reproducer-replay.py
+++ b/lldb/scripts/reproducer-replay.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python3
+#!/usr/bin/env python3
 
 from multiprocessing import Pool
 import multiprocessing
diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py
index 394c24b4a8805..3c949792983fe 100644
--- a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py
+++ b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import lldb
 import struct
diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system2.py b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system2.py
index 438538ca922ed..26864bbc6aa34 100644
--- a/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system2.py
+++ b/lldb/test/API/functionalities/plugins/python_os_plugin/operating_system2.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import lldb
 import struct
diff --git a/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/operating_system.py b/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/operating_system.py
index ff9a57367a2aa..a91852965f92a 100644
--- a/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/operating_system.py
+++ b/lldb/test/API/functionalities/plugins/python_os_plugin/stepping_plugin_threads/operating_system.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import lldb
 import struct
diff --git a/lldb/test/Shell/helper/build.py b/lldb/test/Shell/helper/build.py
index 3de2f33503185..5689373d37a51 100755
--- a/lldb/test/Shell/helper/build.py
+++ b/lldb/test/Shell/helper/build.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#!/usr/bin/env python
 
 from __future__ import print_function
 
diff --git a/lldb/third_party/Python/module/progress/progress.py b/lldb/third_party/Python/module/progress/progress.py
index 3397cf0430176..e4bd9d5fd5b4c 100644
--- a/lldb/third_party/Python/module/progress/progress.py
+++ b/lldb/third_party/Python/module/progress/progress.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 from __future__ import print_function
 
diff --git a/llvm/utils/DSAclean.py b/llvm/utils/DSAclean.py
index 789a825a06712..c5fb56b037ebf 100755
--- a/llvm/utils/DSAclean.py
+++ b/llvm/utils/DSAclean.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!/usr/bin/env python
 
 #changelog: 
 #10/13/2005b: replaced the # in tmp(.#*)* with alphanumeric and _, this will then remove
diff --git a/llvm/utils/DSAextract.py b/llvm/utils/DSAextract.py
index 258aac47e82d2..1d93f1e30c55f 100755
--- a/llvm/utils/DSAextract.py
+++ b/llvm/utils/DSAextract.py
@@ -1,4 +1,4 @@
-#! /usr/bin/python
+#!/usr/bin/env python
 
 #this is a script to extract given named nodes from a dot file, with
 #the associated edges.  An edge is kept iff for edge x -> y
diff --git a/llvm/utils/benchmark/mingw.py b/llvm/utils/benchmark/mingw.py
index 706ad559db9c7..0b69692ca2a40 100644
--- a/llvm/utils/benchmark/mingw.py
+++ b/llvm/utils/benchmark/mingw.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python
+#!/usr/bin/env python
 # encoding: utf-8
 
 import argparse
diff --git a/llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py b/llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py
index f554d920104f0..1cfbf2b56c709 100755
--- a/llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py
+++ b/llvm/utils/docker/scripts/llvm_checksum/llvm_checksum.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 """ A small program to compute checksums of LLVM checkout.
 """
 from __future__ import absolute_import
diff --git a/llvm/utils/lint/common_lint.py b/llvm/utils/lint/common_lint.py
index aec907929b421..641048b7ee5b4 100644
--- a/llvm/utils/lint/common_lint.py
+++ b/llvm/utils/lint/common_lint.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 # Common lint functions applicable to multiple types of files.
 
diff --git a/llvm/utils/lint/cpp_lint.py b/llvm/utils/lint/cpp_lint.py
index 316ad98090351..4ef457e83daa5 100755
--- a/llvm/utils/lint/cpp_lint.py
+++ b/llvm/utils/lint/cpp_lint.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 # Checks C++ files to make sure they conform to LLVM standards, as specified in
 # http://llvm.org/docs/CodingStandards.html .
diff --git a/llvm/utils/lint/generic_lint.py b/llvm/utils/lint/generic_lint.py
index c8f4835bb6a6b..06218d7ea3c19 100755
--- a/llvm/utils/lint/generic_lint.py
+++ b/llvm/utils/lint/generic_lint.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 #
 # Checks files to make sure they conform to LLVM standards which can be applied
 # to any programming language: at present, line length and trailing whitespace.
diff --git a/llvm/utils/schedcover.py b/llvm/utils/schedcover.py
index 9532f1b554bd9..7cd2fcf2563fa 100755
--- a/llvm/utils/schedcover.py
+++ b/llvm/utils/schedcover.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # This creates a CSV file from the output of the debug output of subtarget:
 #   llvm-tblgen --gen-subtarget --debug-only=subtarget-emitter
diff --git a/llvm/utils/testgen/mc-bundling-x86-gen.py b/llvm/utils/testgen/mc-bundling-x86-gen.py
index 5c1c6c4562800..1032c9c5a1f3f 100644
--- a/llvm/utils/testgen/mc-bundling-x86-gen.py
+++ b/llvm/utils/testgen/mc-bundling-x86-gen.py
@@ -1,5 +1,5 @@
 
-#!/usr/bin/python
+#!/usr/bin/env python
 
 # Auto-generates an exhaustive and repetitive test for correct bundle-locked
 # alignment on x86.
diff --git a/openmp/runtime/tools/summarizeStats.py b/openmp/runtime/tools/summarizeStats.py
index f2c5f5e6c95f9..7daed2e1cd5cc 100644
--- a/openmp/runtime/tools/summarizeStats.py
+++ b/openmp/runtime/tools/summarizeStats.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 
 import pandas as pd
 import numpy as np
diff --git a/polly/test/update_check.py b/polly/test/update_check.py
index 53c0845b89077..9890843a68109 100644
--- a/polly/test/update_check.py
+++ b/polly/test/update_check.py
@@ -1,4 +1,4 @@
-#! /usr/bin/env python3
+#!/usr/bin/env python3
 # -*- coding: UTF-8 -*-
 
 # Polly/LLVM update_check.py
diff --git a/polly/utils/jscop2cloog.py b/polly/utils/jscop2cloog.py
index 0d62646187181..29383974f2678 100755
--- a/polly/utils/jscop2cloog.py
+++ b/polly/utils/jscop2cloog.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 import argparse, os
 import json
 
diff --git a/polly/utils/pyscop/jscop2iscc.py b/polly/utils/pyscop/jscop2iscc.py
index 3267e8ebc3c2c..42f4cc180f1fb 100755
--- a/polly/utils/pyscop/jscop2iscc.py
+++ b/polly/utils/pyscop/jscop2iscc.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
 import argparse, isl, os
 import json
 

From e04fe9aefd4b95e0f0d93b14be28aa1ae2f42258 Mon Sep 17 00:00:00 2001
From: Denis Antrushin <dantrushin@gmail.com>
Date: Thu, 16 Jul 2020 22:54:58 +0300
Subject: [PATCH 553/771] [Statepoint] Fix bug found by sanitaizer.

Statepoint has no static operands, so it cannot be verified
against MCInstrDescr. Revert NumDefs change introduced by ef658ebd629.
---
 llvm/lib/CodeGen/MachineVerifier.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/lib/CodeGen/MachineVerifier.cpp b/llvm/lib/CodeGen/MachineVerifier.cpp
index 63f534f20a711..c1a2c4e0bc6e6 100644
--- a/llvm/lib/CodeGen/MachineVerifier.cpp
+++ b/llvm/lib/CodeGen/MachineVerifier.cpp
@@ -1565,9 +1565,6 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) {
   if (MCID.getOpcode() == TargetOpcode::PATCHPOINT)
     NumDefs = (MONum == 0 && MO->isReg()) ? NumDefs : 0;
 
-  if (MCID.getOpcode() == TargetOpcode::STATEPOINT)
-    NumDefs = MI->getNumDefs();
-
   // The first MCID.NumDefs operands must be explicit register defines
   if (MONum < NumDefs) {
     const MCOperandInfo &MCOI = MCID.OpInfo[MONum];

From f78d9fceea736d431e9e3cbca291e3909e3aa46d Mon Sep 17 00:00:00 2001
From: Dokyung Song <dokyungs@google.com>
Date: Thu, 16 Jul 2020 20:26:03 +0000
Subject: [PATCH 554/771] [libFuzzer] Link libFuzzer's own interceptors when
 other compiler runtimes are not linked.

Summary: libFuzzer intercepts certain library functions such as memcmp/strcmp by defining weak hooks. Weak hooks, however, are called only when other runtimes such as ASan is linked. This patch defines libFuzzer's own interceptors, which is linked into the libFuzzer executable when other runtimes are not linked, i.e., when -fsanitize=fuzzer is given, but not others.

Reviewers: kcc, morehouse, hctim

Reviewed By: morehouse, hctim

Subscribers: krytarowski, mgorny, cfe-commits, #sanitizers

Tags: #clang, #sanitizers

Differential Revision: https://reviews.llvm.org/D83494
---
 clang/include/clang/Driver/SanitizerArgs.h    |   1 +
 clang/lib/Driver/SanitizerArgs.cpp            |   4 +
 clang/lib/Driver/ToolChains/CommonArgs.cpp    |   3 +
 compiler-rt/lib/fuzzer/CMakeLists.txt         |  18 ++
 compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp | 170 ++++++++++++++++++
 compiler-rt/test/fuzzer/memcmp.test           |   4 +
 compiler-rt/test/fuzzer/memcmp64.test         |   4 +
 compiler-rt/test/fuzzer/strcmp.test           |   5 +-
 compiler-rt/test/fuzzer/strncmp.test          |   5 +-
 compiler-rt/test/fuzzer/strstr.test           |   5 +-
 10 files changed, 216 insertions(+), 3 deletions(-)
 create mode 100644 compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp

diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h
index 934dab808e823..563d6c3ff9de2 100644
--- a/clang/include/clang/Driver/SanitizerArgs.h
+++ b/clang/include/clang/Driver/SanitizerArgs.h
@@ -74,6 +74,7 @@ class SanitizerArgs {
            !Sanitizers.has(SanitizerKind::Address) &&
            !Sanitizers.has(SanitizerKind::HWAddress);
   }
+  bool needsFuzzerInterceptors() const;
   bool needsUbsanRt() const;
   bool requiresMinimalRuntime() const { return MinimalRuntime; }
   bool needsDfsanRt() const { return Sanitizers.has(SanitizerKind::DataFlow); }
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index bcc9ffc7ff8f6..e4fda752c041d 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -240,6 +240,10 @@ static SanitizerMask parseSanitizeTrapArgs(const Driver &D,
   return TrappingKinds;
 }
 
+bool SanitizerArgs::needsFuzzerInterceptors() const {
+  return needsFuzzer() && !needsAsanRt() && !needsTsanRt() && !needsMsanRt();
+}
+
 bool SanitizerArgs::needsUbsanRt() const {
   // All of these include ubsan.
   if (needsAsanRt() || needsMsanRt() || needsHwasanRt() || needsTsanRt() ||
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 6b6e276b8ce79..acde6d9e2111a 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -784,6 +784,9 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
       !Args.hasArg(options::OPT_shared)) {
 
     addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer", false, true);
+    if (SanArgs.needsFuzzerInterceptors())
+      addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer_interceptors", false,
+                          true);
     if (!Args.hasArg(clang::driver::options::OPT_nostdlibxx))
       TC.AddCXXStdlibLibArgs(Args, CmdArgs);
   }
diff --git a/compiler-rt/lib/fuzzer/CMakeLists.txt b/compiler-rt/lib/fuzzer/CMakeLists.txt
index b5be6b89452e9..02be89cb70a55 100644
--- a/compiler-rt/lib/fuzzer/CMakeLists.txt
+++ b/compiler-rt/lib/fuzzer/CMakeLists.txt
@@ -99,6 +99,13 @@ add_compiler_rt_object_libraries(RTfuzzer_main
   CFLAGS ${LIBFUZZER_CFLAGS}
   DEPS ${LIBFUZZER_DEPS})
 
+add_compiler_rt_object_libraries(RTfuzzer_interceptors
+  OS ${FUZZER_SUPPORTED_OS}
+  ARCHS ${FUZZER_SUPPORTED_ARCH}
+  SOURCES FuzzerInterceptors.cpp
+  CFLAGS ${LIBFUZZER_CFLAGS}
+  DEPS ${LIBFUZZER_DEPS})
+
 add_compiler_rt_runtime(clang_rt.fuzzer
   STATIC
   OS ${FUZZER_SUPPORTED_OS}
@@ -115,6 +122,14 @@ add_compiler_rt_runtime(clang_rt.fuzzer_no_main
   CFLAGS ${LIBFUZZER_CFLAGS}
   PARENT_TARGET fuzzer)
 
+add_compiler_rt_runtime(clang_rt.fuzzer_interceptors
+  STATIC
+  OS ${FUZZER_SUPPORTED_OS}
+  ARCHS ${FUZZER_SUPPORTED_ARCH}
+  OBJECT_LIBS RTfuzzer_interceptors
+  CFLAGS ${LIBFUZZER_CFLAGS}
+  PARENT_TARGET fuzzer)
+
 if(OS_NAME MATCHES "Linux|Fuchsia" AND
    COMPILER_RT_LIBCXX_PATH AND
    COMPILER_RT_LIBCXXABI_PATH)
@@ -148,7 +163,10 @@ if(OS_NAME MATCHES "Linux|Fuchsia" AND
     add_dependencies(RTfuzzer.${arch} libcxx_fuzzer_${arch}-build)
     target_compile_options(RTfuzzer_main.${arch} PRIVATE -isystem ${LIBCXX_${arch}_PREFIX}/include/c++/v1)
     add_dependencies(RTfuzzer_main.${arch} libcxx_fuzzer_${arch}-build)
+    target_compile_options(RTfuzzer_interceptors.${arch} PRIVATE -isystem ${LIBCXX_${arch}_PREFIX}/include/c++/v1)
+    add_dependencies(RTfuzzer_interceptors.${arch} libcxx_fuzzer_${arch}-build)
     partially_link_libcxx(fuzzer_no_main ${LIBCXX_${arch}_PREFIX} ${arch})
+    partially_link_libcxx(fuzzer_interceptors ${LIBCXX_${arch}_PREFIX} ${arch})
     partially_link_libcxx(fuzzer ${LIBCXX_${arch}_PREFIX} ${arch})
   endforeach()
 endif()
diff --git a/compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp b/compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp
new file mode 100644
index 0000000000000..cb55b4af38fa8
--- /dev/null
+++ b/compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp
@@ -0,0 +1,170 @@
+//===-- FuzzerInterceptors.cpp --------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+// Intercept certain libc functions to aid fuzzing.
+// Linked only when other RTs that define their own interceptors are not linked.
+//===----------------------------------------------------------------------===//
+
+#include "FuzzerPlatform.h"
+
+#if LIBFUZZER_LINUX
+
+#define GET_CALLER_PC() __builtin_return_address(0)
+
+#define PTR_TO_REAL(x) real_##x
+#define REAL(x) __interception::PTR_TO_REAL(x)
+#define FUNC_TYPE(x) x##_type
+#define DEFINE_REAL(ret_type, func, ...)                                       \
+  typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__);                            \
+  namespace __interception {                                                   \
+  FUNC_TYPE(func) PTR_TO_REAL(func);                                           \
+  }
+
+#include <cassert>
+#include <cstdint>
+#include <dlfcn.h> // for dlsym()
+#include <sanitizer/common_interface_defs.h>
+
+static void *getFuncAddr(const char *name, uintptr_t wrapper_addr) {
+  void *addr = dlsym(RTLD_NEXT, name);
+  if (!addr) {
+    // If the lookup using RTLD_NEXT failed, the sanitizer runtime library is
+    // later in the library search order than the DSO that we are trying to
+    // intercept, which means that we cannot intercept this function. We still
+    // want the address of the real definition, though, so look it up using
+    // RTLD_DEFAULT.
+    addr = dlsym(RTLD_DEFAULT, name);
+
+    // In case `name' is not loaded, dlsym ends up finding the actual wrapper.
+    // We don't want to intercept the wrapper and have it point to itself.
+    if (reinterpret_cast<uintptr_t>(addr) == wrapper_addr)
+      addr = nullptr;
+  }
+  return addr;
+}
+
+static int FuzzerInited = 0;
+static bool FuzzerInitIsRunning;
+
+static void fuzzerInit();
+
+static void ensureFuzzerInited() {
+  assert(!FuzzerInitIsRunning);
+  if (!FuzzerInited) {
+    fuzzerInit();
+  }
+}
+
+extern "C" {
+
+DEFINE_REAL(int, memcmp, const void *, const void *, size_t)
+DEFINE_REAL(int, strncmp, const char *, const char *, size_t)
+DEFINE_REAL(int, strcmp, const char *, const char *)
+DEFINE_REAL(int, strncasecmp, const char *, const char *, size_t)
+DEFINE_REAL(int, strcasecmp, const char *, const char *)
+DEFINE_REAL(char *, strstr, const char *, const char *)
+DEFINE_REAL(char *, strcasestr, const char *, const char *)
+DEFINE_REAL(void *, memmem, const void *, size_t, const void *, size_t)
+
+ATTRIBUTE_INTERFACE int memcmp(const void *s1, const void *s2, size_t n) {
+  ensureFuzzerInited();
+  int result = REAL(memcmp)(s1, s2, n);
+  __sanitizer_weak_hook_memcmp(GET_CALLER_PC(), s1, s2, n, result);
+
+  return result;
+}
+
+ATTRIBUTE_INTERFACE int strncmp(const char *s1, const char *s2, size_t n) {
+  ensureFuzzerInited();
+  int result = REAL(strncmp)(s1, s2, n);
+  __sanitizer_weak_hook_strncmp(GET_CALLER_PC(), s1, s2, n, result);
+
+  return result;
+}
+
+ATTRIBUTE_INTERFACE int strcmp(const char *s1, const char *s2) {
+  ensureFuzzerInited();
+  int result = REAL(strcmp)(s1, s2);
+  __sanitizer_weak_hook_strcmp(GET_CALLER_PC(), s1, s2, result);
+
+  return result;
+}
+
+ATTRIBUTE_INTERFACE int strncasecmp(const char *s1, const char *s2, size_t n) {
+  ensureFuzzerInited();
+  int result = REAL(strncasecmp)(s1, s2, n);
+  __sanitizer_weak_hook_strncasecmp(GET_CALLER_PC(), s1, s2, n, result);
+
+  return result;
+}
+
+ATTRIBUTE_INTERFACE int strcasecmp(const char *s1, const char *s2) {
+  ensureFuzzerInited();
+  int result = REAL(strcasecmp)(s1, s2);
+  __sanitizer_weak_hook_strcasecmp(GET_CALLER_PC(), s1, s2, result);
+
+  return result;
+}
+
+ATTRIBUTE_INTERFACE char *strstr(const char *s1, const char *s2) {
+  ensureFuzzerInited();
+  char *result = REAL(strstr)(s1, s2);
+  __sanitizer_weak_hook_strstr(GET_CALLER_PC(), s1, s2, result);
+
+  return result;
+}
+
+ATTRIBUTE_INTERFACE char *strcasestr(const char *s1, const char *s2) {
+  ensureFuzzerInited();
+  char *result = REAL(strcasestr)(s1, s2);
+  __sanitizer_weak_hook_strcasestr(GET_CALLER_PC(), s1, s2, result);
+
+  return result;
+}
+
+ATTRIBUTE_INTERFACE
+void *memmem(const void *s1, size_t len1, const void *s2, size_t len2) {
+  ensureFuzzerInited();
+  void *result = REAL(memmem)(s1, len1, s2, len2);
+  __sanitizer_weak_hook_memmem(GET_CALLER_PC(), s1, len1, s2, len2, result);
+
+  return result;
+}
+
+__attribute__((section(".preinit_array"),
+               used)) static void (*__local_fuzzer_preinit)(void) = fuzzerInit;
+
+} // extern "C"
+
+static void fuzzerInit() {
+  assert(!FuzzerInitIsRunning);
+  if (FuzzerInited)
+    return;
+  FuzzerInitIsRunning = true;
+
+  REAL(memcmp) = reinterpret_cast<memcmp_type>(
+      getFuncAddr("memcmp", reinterpret_cast<uintptr_t>(&memcmp)));
+  REAL(strncmp) = reinterpret_cast<strncmp_type>(
+      getFuncAddr("strncmp", reinterpret_cast<uintptr_t>(&strncmp)));
+  REAL(strcmp) = reinterpret_cast<strcmp_type>(
+      getFuncAddr("strcmp", reinterpret_cast<uintptr_t>(&strcmp)));
+  REAL(strncasecmp) = reinterpret_cast<strncasecmp_type>(
+      getFuncAddr("strncasecmp", reinterpret_cast<uintptr_t>(&strncasecmp)));
+  REAL(strcasecmp) = reinterpret_cast<strcasecmp_type>(
+      getFuncAddr("strcasecmp", reinterpret_cast<uintptr_t>(&strcasecmp)));
+  REAL(strstr) = reinterpret_cast<strstr_type>(
+      getFuncAddr("strstr", reinterpret_cast<uintptr_t>(&strstr)));
+  REAL(strcasestr) = reinterpret_cast<strcasestr_type>(
+      getFuncAddr("strcasestr", reinterpret_cast<uintptr_t>(&strcasestr)));
+  REAL(memmem) = reinterpret_cast<memmem_type>(
+      getFuncAddr("memmem", reinterpret_cast<uintptr_t>(&memmem)));
+
+  FuzzerInitIsRunning = false;
+  FuzzerInited = 1;
+}
+
+#endif
diff --git a/compiler-rt/test/fuzzer/memcmp.test b/compiler-rt/test/fuzzer/memcmp.test
index 5657cab41dfc2..8859afbe8a97d 100644
--- a/compiler-rt/test/fuzzer/memcmp.test
+++ b/compiler-rt/test/fuzzer/memcmp.test
@@ -1,4 +1,8 @@
 UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/MemcmpTest.cpp -o %t-MemcmpTest
 RUN: not %run %t-MemcmpTest               -seed=1 -runs=10000000   2>&1 | FileCheck %s
+
+RUN: %cpp_compiler -fno-sanitize=address -fno-builtin-memcmp %S/MemcmpTest.cpp -o %t-NoAsanMemcmpTest
+RUN: not %run %t-MemcmpTest               -seed=1 -runs=10000000   2>&1 | FileCheck %s
+
 CHECK: BINGO
diff --git a/compiler-rt/test/fuzzer/memcmp64.test b/compiler-rt/test/fuzzer/memcmp64.test
index 24d14bf73bbf4..fc9d023243731 100644
--- a/compiler-rt/test/fuzzer/memcmp64.test
+++ b/compiler-rt/test/fuzzer/memcmp64.test
@@ -1,4 +1,8 @@
 UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/Memcmp64BytesTest.cpp -o %t-Memcmp64BytesTest
 RUN: not %run %t-Memcmp64BytesTest        -seed=1 -runs=1000000   2>&1 | FileCheck %s
+
+RUN: %cpp_compiler -fno-sanitize=address -fno-builtin-memcmp %S/Memcmp64BytesTest.cpp -o %t-NoAsanMemcmp64BytesTest
+RUN: not %run %t-Memcmp64BytesTest        -seed=1 -runs=1000000   2>&1 | FileCheck %s
+
 CHECK: BINGO
diff --git a/compiler-rt/test/fuzzer/strcmp.test b/compiler-rt/test/fuzzer/strcmp.test
index bd917bba6b698..eebcf8ef5c708 100644
--- a/compiler-rt/test/fuzzer/strcmp.test
+++ b/compiler-rt/test/fuzzer/strcmp.test
@@ -1,5 +1,8 @@
 UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/StrcmpTest.cpp -o %t-StrcmpTest
 RUN: not %run %t-StrcmpTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
-CHECK: BINGO
 
+RUN: %cpp_compiler -fno-sanitize=address -fno-builtin-strcmp %S/StrcmpTest.cpp -o %t-NoAsanStrcmpTest
+RUN: not %run %t-StrcmpTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
+
+CHECK: BINGO
diff --git a/compiler-rt/test/fuzzer/strncmp.test b/compiler-rt/test/fuzzer/strncmp.test
index 50189445b102c..f8ff9299a1d97 100644
--- a/compiler-rt/test/fuzzer/strncmp.test
+++ b/compiler-rt/test/fuzzer/strncmp.test
@@ -1,5 +1,8 @@
 UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/StrncmpTest.cpp -o %t-StrncmpTest
 RUN: not %run %t-StrncmpTest              -seed=2 -runs=10000000   2>&1 | FileCheck %s
-CHECK: BINGO
 
+RUN: %cpp_compiler -fno-sanitize=address -fno-builtin-strncmp %S/StrncmpTest.cpp -o %t-NoAsanStrncmpTest
+RUN: not %run %t-StrncmpTest              -seed=2 -runs=10000000   2>&1 | FileCheck %s
+
+CHECK: BINGO
diff --git a/compiler-rt/test/fuzzer/strstr.test b/compiler-rt/test/fuzzer/strstr.test
index f1fb210b47c7f..54a5abe8a414f 100644
--- a/compiler-rt/test/fuzzer/strstr.test
+++ b/compiler-rt/test/fuzzer/strstr.test
@@ -1,5 +1,8 @@
 UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/StrstrTest.cpp -o %t-StrstrTest
 RUN: not %run %t-StrstrTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
-CHECK: BINGO
 
+RUN: %cpp_compiler -fno-sanitize=address -fno-builtin-strstr %S/StrstrTest.cpp -o %t-NoAsanStrstrTest
+RUN: not %run %t-StrstrTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
+
+CHECK: BINGO

From 5408024fa87e0b23b169fec07913bd4357acdbc4 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 16 Jul 2020 12:52:02 -0700
Subject: [PATCH 555/771] [X86] Move integer hadd/hsub formation into a helper
 function shared by combineAdd and combineSub.

There was a lot of duplicate code here for checking the VT and
subtarget. Moving it into a helper avoids that.

It also fixes a bug that combineAdd reused Op0/Op1 after a call
to isHorizontalBinOp may have changed it. The new helper function
has its own local version of Op0/Op1 that aren't shared by other
code.

Fixes PR46455.

Reviewed By: spatel, bkramer

Differential Revision: https://reviews.llvm.org/D83971
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 51 ++++++++++++++-----------
 llvm/test/CodeGen/X86/pr46455.ll        | 13 ++++---
 2 files changed, 35 insertions(+), 29 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 8dfe7396699f5..ea4b4734225d4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -47606,6 +47606,30 @@ static SDValue matchPMADDWD_2(SelectionDAG &DAG, SDValue N0, SDValue N1,
                           PMADDBuilder);
 }
 
+static SDValue combineAddOrSubToHADDorHSUB(SDNode *N, SelectionDAG &DAG,
+                                           const X86Subtarget &Subtarget) {
+  EVT VT = N->getValueType(0);
+  SDValue Op0 = N->getOperand(0);
+  SDValue Op1 = N->getOperand(1);
+  bool IsAdd = N->getOpcode() == ISD::ADD;
+  assert((IsAdd || N->getOpcode() == ISD::SUB) && "Wrong opcode");
+
+  if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
+       VT == MVT::v8i32) &&
+      Subtarget.hasSSSE3() &&
+      isHorizontalBinOp(Op0, Op1, DAG, Subtarget, IsAdd)) {
+    auto HOpBuilder = [IsAdd](SelectionDAG &DAG, const SDLoc &DL,
+                              ArrayRef<SDValue> Ops) {
+      return DAG.getNode(IsAdd ? X86ISD::HADD : X86ISD::HSUB,
+                         DL, Ops[0].getValueType(), Ops);
+    };
+    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
+                            HOpBuilder);
+  }
+
+  return SDValue();
+}
+
 static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
                           TargetLowering::DAGCombinerInfo &DCI,
                           const X86Subtarget &Subtarget) {
@@ -47619,17 +47643,8 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
     return MAdd;
 
   // Try to synthesize horizontal adds from adds of shuffles.
-  if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
-       VT == MVT::v8i32) &&
-      Subtarget.hasSSSE3() &&
-      isHorizontalBinOp(Op0, Op1, DAG, Subtarget, true)) {
-    auto HADDBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
-                          ArrayRef<SDValue> Ops) {
-      return DAG.getNode(X86ISD::HADD, DL, Ops[0].getValueType(), Ops);
-    };
-    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
-                            HADDBuilder);
-  }
+  if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget))
+    return V;
 
   // If vectors of i1 are legal, turn (add (zext (vXi1 X)), Y) into
   // (sub Y, (sext (vXi1 X))).
@@ -47802,18 +47817,8 @@ static SDValue combineSub(SDNode *N, SelectionDAG &DAG,
   }
 
   // Try to synthesize horizontal subs from subs of shuffles.
-  EVT VT = N->getValueType(0);
-  if ((VT == MVT::v8i16 || VT == MVT::v4i32 || VT == MVT::v16i16 ||
-       VT == MVT::v8i32) &&
-      Subtarget.hasSSSE3() &&
-      isHorizontalBinOp(Op0, Op1, DAG, Subtarget, false)) {
-    auto HSUBBuilder = [](SelectionDAG &DAG, const SDLoc &DL,
-                          ArrayRef<SDValue> Ops) {
-      return DAG.getNode(X86ISD::HSUB, DL, Ops[0].getValueType(), Ops);
-    };
-    return SplitOpsAndApply(DAG, Subtarget, SDLoc(N), VT, {Op0, Op1},
-                            HSUBBuilder);
-  }
+  if (SDValue V = combineAddOrSubToHADDorHSUB(N, DAG, Subtarget))
+    return V;
 
   // Try to create PSUBUS if SUB's argument is max/min
   if (SDValue V = combineSubToSubus(N, DAG, Subtarget))
diff --git a/llvm/test/CodeGen/X86/pr46455.ll b/llvm/test/CodeGen/X86/pr46455.ll
index 7f608fbfdf6d3..e5ed94aa54934 100644
--- a/llvm/test/CodeGen/X86/pr46455.ll
+++ b/llvm/test/CodeGen/X86/pr46455.ll
@@ -8,12 +8,13 @@ define void @EntryModule(i8** %buffer_table) {
 ; CHECK-NEXT:    movq (%rdi), %rax
 ; CHECK-NEXT:    movq 24(%rdi), %rcx
 ; CHECK-NEXT:    vcmpneqps (%rax), %ymm0, %ymm0
-; CHECK-NEXT:    vandps {{.*}}(%rip){1to4}, %xmm0, %xmm1
-; CHECK-NEXT:    vpermilps {{.*#+}} xmm2 = xmm1[2,3,0,1]
-; CHECK-NEXT:    vpermilps {{.*#+}} xmm3 = xmm1[3,1,2,3]
-; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm2
-; CHECK-NEXT:    vpsubd %xmm0, %xmm1, %xmm0
-; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT:    vpsrld $31, %xmm0, %xmm1
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm2 = xmm1[1,1,2,3]
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm3 = xmm1[2,3,0,1]
+; CHECK-NEXT:    vpshufd {{.*#+}} xmm1 = xmm1[3,1,2,3]
+; CHECK-NEXT:    vpaddd %xmm1, %xmm3, %xmm1
+; CHECK-NEXT:    vpsubd %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 ; CHECK-NEXT:    vmovd %xmm0, (%rcx)
 ; CHECK-NEXT:    vzeroupper
 ; CHECK-NEXT:    retq

From b636e7d1fc61635d214edc81fd98b3717add8aef Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Thu, 16 Jul 2020 23:33:10 +0300
Subject: [PATCH 556/771] [NFC][PhaseOrdering] Add a test demonstrating
 pitfails of common code hoisting on loop rotation

Depending on the -rotation-max-header-size=?,
hoisting common code early makes loop rotation impossible.
---
 .../loop-rotation-vs-common-code-hoisting.ll  | 224 ++++++++++++++++++
 1 file changed, 224 insertions(+)
 create mode 100644 llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll

diff --git a/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll
new file mode 100644
index 0000000000000..1d8cce6879e9d
--- /dev/null
+++ b/llvm/test/Transforms/PhaseOrdering/loop-rotation-vs-common-code-hoisting.ll
@@ -0,0 +1,224 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -O3 -rotation-max-header-size=0 -S < %s                    | FileCheck %s --check-prefixes=HOIST,THR0,FALLBACK0
+; RUN: opt -passes='default<O3>' -rotation-max-header-size=0 -S < %s  | FileCheck %s --check-prefixes=HOIST,THR0,FALLBACK1
+
+; RUN: opt -O3 -rotation-max-header-size=1 -S < %s                    | FileCheck %s --check-prefixes=HOIST,THR1,FALLBACK2
+; RUN: opt -passes='default<O3>' -rotation-max-header-size=1 -S < %s  | FileCheck %s --check-prefixes=HOIST,THR1,FALLBACK3
+
+; RUN: opt -O3 -rotation-max-header-size=2 -S < %s                    | FileCheck %s --check-prefixes=HOIST,THR2,FALLBACK4
+; RUN: opt -passes='default<O3>' -rotation-max-header-size=2 -S < %s  | FileCheck %s --check-prefixes=HOIST,THR2,FALLBACK5
+
+; RUN: opt -O3 -rotation-max-header-size=3 -S < %s                    | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_OLDPM,FALLBACK6
+; RUN: opt -passes='default<O3>' -rotation-max-header-size=3 -S < %s  | FileCheck %s --check-prefixes=ROTATED_LATER,ROTATED_LATER_NEWPM,FALLBACK7
+
+; RUN: opt -O3 -rotation-max-header-size=4 -S < %s                    | FileCheck %s --check-prefixes=ROTATE,ROTATE_OLDPM,FALLBACK8
+; RUN: opt -passes='default<O3>' -rotation-max-header-size=4 -S < %s  | FileCheck %s --check-prefixes=ROTATE,ROTATE_NEWPM,FALLBACK9
+
+; This example is produced from a very basic C code:
+;
+;   void f0();
+;   void f1();
+;   void f2();
+;
+;   void loop(int width) {
+;       if(width < 1)
+;           return;
+;       for(int i = 0; i < width - 1; ++i) {
+;           f0();
+;           f1();
+;       }
+;       f0();
+;       f2();
+;   }
+
+; We have a choice here. We can either
+; * hoist the f0() call into loop header,
+;   * which potentially makes loop rotation unprofitable since loop header might
+;     have grown above certain threshold, and such unrotated loops will be
+;     ignored by LoopVectorizer, preventing vectorization
+;   * or loop rotation will succeed, resulting in some weird PHIs that will also
+;     harm vectorization
+; * or not hoist f0() call before performing loop rotation,
+;   at the cost of potential code bloat and/or potentially successfully rotating
+;   the loops, vectorizing them at the cost of compile time.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+declare void @f0()
+declare void @f1()
+declare void @f2()
+
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture)
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture)
+
+define void @_Z4loopi(i32 %width) {
+; HOIST-LABEL: @_Z4loopi(
+; HOIST-NEXT:  entry:
+; HOIST-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
+; HOIST-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; HOIST:       for.cond.preheader:
+; HOIST-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
+; HOIST-NEXT:    br label [[FOR_COND:%.*]]
+; HOIST:       for.cond:
+; HOIST-NEXT:    [[I_0:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[FOR_COND_PREHEADER]] ]
+; HOIST-NEXT:    tail call void @f0()
+; HOIST-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[I_0]], [[TMP0]]
+; HOIST-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+; HOIST:       for.cond.cleanup:
+; HOIST-NEXT:    tail call void @f2()
+; HOIST-NEXT:    br label [[RETURN]]
+; HOIST:       for.body:
+; HOIST-NEXT:    tail call void @f1()
+; HOIST-NEXT:    [[INC]] = add nuw i32 [[I_0]], 1
+; HOIST-NEXT:    br label [[FOR_COND]]
+; HOIST:       return:
+; HOIST-NEXT:    ret void
+;
+; ROTATED_LATER_OLDPM-LABEL: @_Z4loopi(
+; ROTATED_LATER_OLDPM-NEXT:  entry:
+; ROTATED_LATER_OLDPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
+; ROTATED_LATER_OLDPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; ROTATED_LATER_OLDPM:       for.cond.preheader:
+; ROTATED_LATER_OLDPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
+; ROTATED_LATER_OLDPM-NEXT:    tail call void @f0()
+; ROTATED_LATER_OLDPM-NEXT:    [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0
+; ROTATED_LATER_OLDPM-NEXT:    br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY:%.*]]
+; ROTATED_LATER_OLDPM:       for.cond.cleanup:
+; ROTATED_LATER_OLDPM-NEXT:    tail call void @f2()
+; ROTATED_LATER_OLDPM-NEXT:    br label [[RETURN]]
+; ROTATED_LATER_OLDPM:       for.body:
+; ROTATED_LATER_OLDPM-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_COND_PREHEADER]] ]
+; ROTATED_LATER_OLDPM-NEXT:    tail call void @f1()
+; ROTATED_LATER_OLDPM-NEXT:    [[INC]] = add nuw i32 [[I_04]], 1
+; ROTATED_LATER_OLDPM-NEXT:    tail call void @f0()
+; ROTATED_LATER_OLDPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
+; ROTATED_LATER_OLDPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+; ROTATED_LATER_OLDPM:       return:
+; ROTATED_LATER_OLDPM-NEXT:    ret void
+;
+; ROTATED_LATER_NEWPM-LABEL: @_Z4loopi(
+; ROTATED_LATER_NEWPM-NEXT:  entry:
+; ROTATED_LATER_NEWPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
+; ROTATED_LATER_NEWPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; ROTATED_LATER_NEWPM:       for.cond.preheader:
+; ROTATED_LATER_NEWPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
+; ROTATED_LATER_NEWPM-NEXT:    tail call void @f0()
+; ROTATED_LATER_NEWPM-NEXT:    [[EXITCOND_NOT3:%.*]] = icmp eq i32 [[TMP0]], 0
+; ROTATED_LATER_NEWPM-NEXT:    br i1 [[EXITCOND_NOT3]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE:%.*]]
+; ROTATED_LATER_NEWPM:       for.cond.preheader.for.body_crit_edge:
+; ROTATED_LATER_NEWPM-NEXT:    [[INC_1:%.*]] = add nuw i32 0, 1
+; ROTATED_LATER_NEWPM-NEXT:    br label [[FOR_BODY:%.*]]
+; ROTATED_LATER_NEWPM:       for.cond.cleanup:
+; ROTATED_LATER_NEWPM-NEXT:    tail call void @f2()
+; ROTATED_LATER_NEWPM-NEXT:    br label [[RETURN]]
+; ROTATED_LATER_NEWPM:       for.body:
+; ROTATED_LATER_NEWPM-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_COND_PREHEADER_FOR_BODY_CRIT_EDGE]] ]
+; ROTATED_LATER_NEWPM-NEXT:    tail call void @f1()
+; ROTATED_LATER_NEWPM-NEXT:    tail call void @f0()
+; ROTATED_LATER_NEWPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]]
+; ROTATED_LATER_NEWPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]
+; ROTATED_LATER_NEWPM:       for.body.for.body_crit_edge:
+; ROTATED_LATER_NEWPM-NEXT:    [[INC_0]] = add nuw i32 [[INC_PHI]], 1
+; ROTATED_LATER_NEWPM-NEXT:    br label [[FOR_BODY]]
+; ROTATED_LATER_NEWPM:       return:
+; ROTATED_LATER_NEWPM-NEXT:    ret void
+;
+; ROTATE_OLDPM-LABEL: @_Z4loopi(
+; ROTATE_OLDPM-NEXT:  entry:
+; ROTATE_OLDPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
+; ROTATE_OLDPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; ROTATE_OLDPM:       for.cond.preheader:
+; ROTATE_OLDPM-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
+; ROTATE_OLDPM-NEXT:    tail call void @f0()
+; ROTATE_OLDPM-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; ROTATE_OLDPM:       for.body.preheader:
+; ROTATE_OLDPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
+; ROTATE_OLDPM-NEXT:    br label [[FOR_BODY:%.*]]
+; ROTATE_OLDPM:       for.cond.cleanup:
+; ROTATE_OLDPM-NEXT:    tail call void @f2()
+; ROTATE_OLDPM-NEXT:    br label [[RETURN]]
+; ROTATE_OLDPM:       for.body:
+; ROTATE_OLDPM-NEXT:    [[I_04:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; ROTATE_OLDPM-NEXT:    tail call void @f1()
+; ROTATE_OLDPM-NEXT:    [[INC]] = add nuw nsw i32 [[I_04]], 1
+; ROTATE_OLDPM-NEXT:    tail call void @f0()
+; ROTATE_OLDPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[TMP0]]
+; ROTATE_OLDPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]]
+; ROTATE_OLDPM:       return:
+; ROTATE_OLDPM-NEXT:    ret void
+;
+; ROTATE_NEWPM-LABEL: @_Z4loopi(
+; ROTATE_NEWPM-NEXT:  entry:
+; ROTATE_NEWPM-NEXT:    [[CMP:%.*]] = icmp slt i32 [[WIDTH:%.*]], 1
+; ROTATE_NEWPM-NEXT:    br i1 [[CMP]], label [[RETURN:%.*]], label [[FOR_COND_PREHEADER:%.*]]
+; ROTATE_NEWPM:       for.cond.preheader:
+; ROTATE_NEWPM-NEXT:    [[CMP13_NOT:%.*]] = icmp eq i32 [[WIDTH]], 1
+; ROTATE_NEWPM-NEXT:    tail call void @f0()
+; ROTATE_NEWPM-NEXT:    br i1 [[CMP13_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY_PREHEADER:%.*]]
+; ROTATE_NEWPM:       for.body.preheader:
+; ROTATE_NEWPM-NEXT:    [[TMP0:%.*]] = add nsw i32 [[WIDTH]], -1
+; ROTATE_NEWPM-NEXT:    [[INC_1:%.*]] = add nuw nsw i32 0, 1
+; ROTATE_NEWPM-NEXT:    br label [[FOR_BODY:%.*]]
+; ROTATE_NEWPM:       for.cond.cleanup:
+; ROTATE_NEWPM-NEXT:    tail call void @f2()
+; ROTATE_NEWPM-NEXT:    br label [[RETURN]]
+; ROTATE_NEWPM:       for.body:
+; ROTATE_NEWPM-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[INC_0:%.*]], [[FOR_BODY_FOR_BODY_CRIT_EDGE:%.*]] ], [ [[INC_1]], [[FOR_BODY_PREHEADER]] ]
+; ROTATE_NEWPM-NEXT:    tail call void @f1()
+; ROTATE_NEWPM-NEXT:    tail call void @f0()
+; ROTATE_NEWPM-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC_PHI]], [[TMP0]]
+; ROTATE_NEWPM-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY_FOR_BODY_CRIT_EDGE]]
+; ROTATE_NEWPM:       for.body.for.body_crit_edge:
+; ROTATE_NEWPM-NEXT:    [[INC_0]] = add nuw nsw i32 [[INC_PHI]], 1
+; ROTATE_NEWPM-NEXT:    br label [[FOR_BODY]]
+; ROTATE_NEWPM:       return:
+; ROTATE_NEWPM-NEXT:    ret void
+;
+entry:
+  %width.addr = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 %width, i32* %width.addr, align 4
+  %i1 = load i32, i32* %width.addr, align 4
+  %cmp = icmp slt i32 %i1, 1
+  br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+  br label %return
+
+if.end:
+  %i2 = bitcast i32* %i to i8*
+  call void @llvm.lifetime.start.p0i8(i64 4, i8* %i2)
+  store i32 0, i32* %i, align 4
+  br label %for.cond
+
+for.cond:
+  %i3 = load i32, i32* %i, align 4
+  %i4 = load i32, i32* %width.addr, align 4
+  %sub = sub nsw i32 %i4, 1
+  %cmp1 = icmp slt i32 %i3, %sub
+  br i1 %cmp1, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+  %i5 = bitcast i32* %i to i8*
+  call void @llvm.lifetime.end.p0i8(i64 4, i8* %i5)
+  br label %for.end
+
+for.body:
+  call void @f0()
+  call void @f1()
+  br label %for.inc
+
+for.inc:
+  %i6 = load i32, i32* %i, align 4
+  %inc = add nsw i32 %i6, 1
+  store i32 %inc, i32* %i, align 4
+  br label %for.cond
+
+for.end:
+  call void @f0()
+  call void @f2()
+  br label %return
+
+return:
+  ret void
+}

From cc1b9b680f890962cbc3e1eec32a352c3c20849e Mon Sep 17 00:00:00 2001
From: Wouter van Oortmerssen <aardappel@gmail.com>
Date: Fri, 10 Jul 2020 16:51:01 -0700
Subject: [PATCH 557/771] [WebAssembly] 64-bit (function) pointer fixes.

Accounting for the fact that Wasm function indices are 32-bit, but in wasm64 we want uniform 64-bit pointers.
Includes reloc types for 64-bit table indices.

Differential Revision: https://reviews.llvm.org/D83729
---
 lld/wasm/InputChunks.cpp                      |  9 ++-
 lld/wasm/InputFiles.cpp                       |  6 +-
 lld/wasm/MarkLive.cpp                         |  4 +-
 lld/wasm/Relocations.cpp                      |  4 ++
 llvm/include/llvm/BinaryFormat/WasmRelocs.def |  2 +
 llvm/lib/MC/WasmObjectWriter.cpp              |  8 ++-
 llvm/lib/Object/RelocationResolver.cpp        |  4 ++
 llvm/lib/Object/WasmObjectFile.cpp            |  5 +-
 .../WebAssemblyWasmObjectWriter.cpp           |  5 +-
 .../WebAssembly/WebAssemblyISelLowering.cpp   | 13 +++++
 .../WebAssembly/WebAssemblyInstrInfo.td       | 16 +++--
 .../CodeGen/WebAssembly/function-pointer64.ll | 58 +++++++++++++++++++
 12 files changed, 123 insertions(+), 11 deletions(-)
 create mode 100644 llvm/test/CodeGen/WebAssembly/function-pointer64.ll

diff --git a/lld/wasm/InputChunks.cpp b/lld/wasm/InputChunks.cpp
index e28dc51134100..c7261cf3da2c2 100644
--- a/lld/wasm/InputChunks.cpp
+++ b/lld/wasm/InputChunks.cpp
@@ -72,6 +72,7 @@ void InputChunk::verifyRelocTargets() const {
       existingValue = decodeULEB128(loc, &bytesRead);
       break;
     case R_WASM_TABLE_INDEX_SLEB:
+    case R_WASM_TABLE_INDEX_SLEB64:
     case R_WASM_TABLE_INDEX_REL_SLEB:
     case R_WASM_MEMORY_ADDR_SLEB:
     case R_WASM_MEMORY_ADDR_SLEB64:
@@ -86,6 +87,7 @@ void InputChunk::verifyRelocTargets() const {
     case R_WASM_GLOBAL_INDEX_I32:
       existingValue = read32le(loc);
       break;
+    case R_WASM_TABLE_INDEX_I64:
     case R_WASM_MEMORY_ADDR_I64:
       existingValue = read64le(loc);
       break;
@@ -151,6 +153,7 @@ void InputChunk::writeTo(uint8_t *buf) const {
     case R_WASM_MEMORY_ADDR_REL_SLEB:
       encodeSLEB128(static_cast<int32_t>(value), loc, 5);
       break;
+    case R_WASM_TABLE_INDEX_SLEB64:
     case R_WASM_MEMORY_ADDR_SLEB64:
     case R_WASM_MEMORY_ADDR_REL_SLEB64:
       encodeSLEB128(static_cast<int64_t>(value), loc, 10);
@@ -162,6 +165,7 @@ void InputChunk::writeTo(uint8_t *buf) const {
     case R_WASM_GLOBAL_INDEX_I32:
       write32le(loc, value);
       break;
+    case R_WASM_TABLE_INDEX_I64:
     case R_WASM_MEMORY_ADDR_I64:
       write64le(loc, value);
       break;
@@ -219,6 +223,7 @@ static unsigned writeCompressedReloc(uint8_t *buf, const WasmRelocation &rel,
   case R_WASM_MEMORY_ADDR_LEB64:
     return encodeULEB128(value, buf);
   case R_WASM_TABLE_INDEX_SLEB:
+  case R_WASM_TABLE_INDEX_SLEB64:
   case R_WASM_MEMORY_ADDR_SLEB:
   case R_WASM_MEMORY_ADDR_SLEB64:
     return encodeSLEB128(static_cast<int64_t>(value), buf);
@@ -237,6 +242,7 @@ static unsigned getRelocWidthPadded(const WasmRelocation &rel) {
   case R_WASM_TABLE_INDEX_SLEB:
   case R_WASM_MEMORY_ADDR_SLEB:
     return 5;
+  case R_WASM_TABLE_INDEX_SLEB64:
   case R_WASM_MEMORY_ADDR_LEB64:
   case R_WASM_MEMORY_ADDR_SLEB64:
     return 10;
@@ -382,7 +388,8 @@ void InputSegment::generateRelocationCode(raw_ostream &os) const {
       }
     } else {
       const GlobalSymbol* baseSymbol = WasmSym::memoryBase;
-      if (rel.Type == R_WASM_TABLE_INDEX_I32)
+      if (rel.Type == R_WASM_TABLE_INDEX_I32 ||
+          rel.Type == R_WASM_TABLE_INDEX_I64)
         baseSymbol = WasmSym::tableBase;
       writeU8(os, WASM_OPCODE_GLOBAL_GET, "GLOBAL_GET");
       writeUleb128(os, baseSymbol->getGlobalIndex(), "base");
diff --git a/lld/wasm/InputFiles.cpp b/lld/wasm/InputFiles.cpp
index 8c2b70fe2849f..fbe6888355eae 100644
--- a/lld/wasm/InputFiles.cpp
+++ b/lld/wasm/InputFiles.cpp
@@ -126,7 +126,9 @@ uint64_t ObjFile::calcNewAddend(const WasmRelocation &reloc) const {
 uint64_t ObjFile::calcExpectedValue(const WasmRelocation &reloc) const {
   switch (reloc.Type) {
   case R_WASM_TABLE_INDEX_I32:
-  case R_WASM_TABLE_INDEX_SLEB: {
+  case R_WASM_TABLE_INDEX_I64:
+  case R_WASM_TABLE_INDEX_SLEB:
+  case R_WASM_TABLE_INDEX_SLEB64: {
     const WasmSymbol &sym = wasmObj->syms()[reloc.Index];
     return tableEntries[sym.Info.ElementIndex];
   }
@@ -195,7 +197,9 @@ uint64_t ObjFile::calcNewValue(const WasmRelocation &reloc) const {
 
   switch (reloc.Type) {
   case R_WASM_TABLE_INDEX_I32:
+  case R_WASM_TABLE_INDEX_I64:
   case R_WASM_TABLE_INDEX_SLEB:
+  case R_WASM_TABLE_INDEX_SLEB64:
   case R_WASM_TABLE_INDEX_REL_SLEB: {
     if (!getFunctionSymbol(reloc.Index)->hasTableIndex())
       return 0;
diff --git a/lld/wasm/MarkLive.cpp b/lld/wasm/MarkLive.cpp
index 6906f86f7150a..2764c88f492cf 100644
--- a/lld/wasm/MarkLive.cpp
+++ b/lld/wasm/MarkLive.cpp
@@ -122,7 +122,9 @@ void MarkLive::mark() {
       // functions used for weak-undefined symbols have this behaviour (compare
       // equal to null pointer, only reachable via direct call).
       if (reloc.Type == R_WASM_TABLE_INDEX_SLEB ||
-          reloc.Type == R_WASM_TABLE_INDEX_I32) {
+          reloc.Type == R_WASM_TABLE_INDEX_SLEB64 ||
+          reloc.Type == R_WASM_TABLE_INDEX_I32 ||
+          reloc.Type == R_WASM_TABLE_INDEX_I64) {
         auto *funcSym = cast<FunctionSymbol>(sym);
         if (funcSym->hasTableIndex() && funcSym->getTableIndex() == 0)
           continue;
diff --git a/lld/wasm/Relocations.cpp b/lld/wasm/Relocations.cpp
index 4cc4fff91cd90..2559e0f869cce 100644
--- a/lld/wasm/Relocations.cpp
+++ b/lld/wasm/Relocations.cpp
@@ -70,7 +70,9 @@ void scanRelocations(InputChunk *chunk) {
 
     switch (reloc.Type) {
     case R_WASM_TABLE_INDEX_I32:
+    case R_WASM_TABLE_INDEX_I64:
     case R_WASM_TABLE_INDEX_SLEB:
+    case R_WASM_TABLE_INDEX_SLEB64:
     case R_WASM_TABLE_INDEX_REL_SLEB:
       if (requiresGOTAccess(sym))
         break;
@@ -86,6 +88,7 @@ void scanRelocations(InputChunk *chunk) {
     if (config->isPic) {
       switch (reloc.Type) {
       case R_WASM_TABLE_INDEX_SLEB:
+      case R_WASM_TABLE_INDEX_SLEB64:
       case R_WASM_MEMORY_ADDR_SLEB:
       case R_WASM_MEMORY_ADDR_LEB:
       case R_WASM_MEMORY_ADDR_SLEB64:
@@ -97,6 +100,7 @@ void scanRelocations(InputChunk *chunk) {
               "; recompile with -fPIC");
         break;
       case R_WASM_TABLE_INDEX_I32:
+      case R_WASM_TABLE_INDEX_I64:
       case R_WASM_MEMORY_ADDR_I32:
       case R_WASM_MEMORY_ADDR_I64:
         // These relocation types are only present in the data section and
diff --git a/llvm/include/llvm/BinaryFormat/WasmRelocs.def b/llvm/include/llvm/BinaryFormat/WasmRelocs.def
index 05c5147e63144..b6ea2c59b4bb3 100644
--- a/llvm/include/llvm/BinaryFormat/WasmRelocs.def
+++ b/llvm/include/llvm/BinaryFormat/WasmRelocs.def
@@ -20,3 +20,5 @@ WASM_RELOC(R_WASM_MEMORY_ADDR_LEB64,      14)
 WASM_RELOC(R_WASM_MEMORY_ADDR_SLEB64,     15)
 WASM_RELOC(R_WASM_MEMORY_ADDR_I64,        16)
 WASM_RELOC(R_WASM_MEMORY_ADDR_REL_SLEB64, 17)
+WASM_RELOC(R_WASM_TABLE_INDEX_SLEB64,     18)
+WASM_RELOC(R_WASM_TABLE_INDEX_I64,        19)
diff --git a/llvm/lib/MC/WasmObjectWriter.cpp b/llvm/lib/MC/WasmObjectWriter.cpp
index f51d908c53e13..af4620361c34d 100644
--- a/llvm/lib/MC/WasmObjectWriter.cpp
+++ b/llvm/lib/MC/WasmObjectWriter.cpp
@@ -556,7 +556,9 @@ WasmObjectWriter::getProvisionalValue(const WasmRelocationEntry &RelEntry,
   switch (RelEntry.Type) {
   case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
   case wasm::R_WASM_TABLE_INDEX_SLEB:
-  case wasm::R_WASM_TABLE_INDEX_I32: {
+  case wasm::R_WASM_TABLE_INDEX_SLEB64:
+  case wasm::R_WASM_TABLE_INDEX_I32:
+  case wasm::R_WASM_TABLE_INDEX_I64: {
     // Provisional value is table address of the resolved symbol itself
     const MCSymbolWasm *Base =
         cast<MCSymbolWasm>(Layout.getBaseSymbol(*RelEntry.Symbol));
@@ -688,6 +690,7 @@ void WasmObjectWriter::applyRelocations(
     case wasm::R_WASM_GLOBAL_INDEX_I32:
       patchI32(Stream, Value, Offset);
       break;
+    case wasm::R_WASM_TABLE_INDEX_I64:
     case wasm::R_WASM_MEMORY_ADDR_I64:
       patchI64(Stream, Value, Offset);
       break;
@@ -697,6 +700,7 @@ void WasmObjectWriter::applyRelocations(
     case wasm::R_WASM_MEMORY_ADDR_REL_SLEB:
       writePatchableSLEB<5>(Stream, Value, Offset);
       break;
+    case wasm::R_WASM_TABLE_INDEX_SLEB64:
     case wasm::R_WASM_MEMORY_ADDR_SLEB64:
     case wasm::R_WASM_MEMORY_ADDR_REL_SLEB64:
       writePatchableSLEB<10>(Stream, Value, Offset);
@@ -1599,7 +1603,9 @@ uint64_t WasmObjectWriter::writeObject(MCAssembler &Asm,
       // purely to make the object file's provisional values readable, and is
       // ignored by the linker, which re-calculates the relocations itself.
       if (Rel.Type != wasm::R_WASM_TABLE_INDEX_I32 &&
+          Rel.Type != wasm::R_WASM_TABLE_INDEX_I64 &&
           Rel.Type != wasm::R_WASM_TABLE_INDEX_SLEB &&
+          Rel.Type != wasm::R_WASM_TABLE_INDEX_SLEB64 &&
           Rel.Type != wasm::R_WASM_TABLE_INDEX_REL_SLEB)
         return;
       assert(Rel.Symbol->isFunction());
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index 3f3f79b0f4ff7..93917655073fc 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -531,6 +531,8 @@ static bool supportsWasm64(uint64_t Type) {
   case wasm::R_WASM_MEMORY_ADDR_LEB64:
   case wasm::R_WASM_MEMORY_ADDR_SLEB64:
   case wasm::R_WASM_MEMORY_ADDR_I64:
+  case wasm::R_WASM_TABLE_INDEX_SLEB64:
+  case wasm::R_WASM_TABLE_INDEX_I64:
     return true;
   default:
     return supportsWasm32(Type);
@@ -563,6 +565,8 @@ static uint64_t resolveWasm64(RelocationRef R, uint64_t S, uint64_t A) {
   case wasm::R_WASM_MEMORY_ADDR_LEB64:
   case wasm::R_WASM_MEMORY_ADDR_SLEB64:
   case wasm::R_WASM_MEMORY_ADDR_I64:
+  case wasm::R_WASM_TABLE_INDEX_SLEB64:
+  case wasm::R_WASM_TABLE_INDEX_I64:
     // For wasm section, its offset at 0 -- ignoring Value
     return A;
   default:
diff --git a/llvm/lib/Object/WasmObjectFile.cpp b/llvm/lib/Object/WasmObjectFile.cpp
index 47c68ab52883e..23418a358fa40 100644
--- a/llvm/lib/Object/WasmObjectFile.cpp
+++ b/llvm/lib/Object/WasmObjectFile.cpp
@@ -791,7 +791,9 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
     switch (Reloc.Type) {
     case wasm::R_WASM_FUNCTION_INDEX_LEB:
     case wasm::R_WASM_TABLE_INDEX_SLEB:
+    case wasm::R_WASM_TABLE_INDEX_SLEB64:
     case wasm::R_WASM_TABLE_INDEX_I32:
+    case wasm::R_WASM_TABLE_INDEX_I64:
     case wasm::R_WASM_TABLE_INDEX_REL_SLEB:
       if (!isValidFunctionSymbol(Reloc.Index))
         return make_error<GenericBinaryError>("Bad relocation function index",
@@ -871,7 +873,8 @@ Error WasmObjectFile::parseRelocSection(StringRef Name, ReadContext &Ctx) {
         Reloc.Type == wasm::R_WASM_FUNCTION_OFFSET_I32 ||
         Reloc.Type == wasm::R_WASM_GLOBAL_INDEX_I32)
       Size = 4;
-    if (Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64)
+    if (Reloc.Type == wasm::R_WASM_TABLE_INDEX_I64 ||
+        Reloc.Type == wasm::R_WASM_MEMORY_ADDR_I64)
       Size = 8;
     if (Reloc.Offset + Size > EndOffset)
       return make_error<GenericBinaryError>("Bad relocation offset",
diff --git a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
index 779e921c1d949..23f8b4f78bbdd 100644
--- a/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
+++ b/llvm/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyWasmObjectWriter.cpp
@@ -92,7 +92,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
       return wasm::R_WASM_TABLE_INDEX_SLEB;
     return wasm::R_WASM_MEMORY_ADDR_SLEB;
   case WebAssembly::fixup_sleb128_i64:
-    assert(SymA.isData());
+    if (SymA.isFunction())
+      return wasm::R_WASM_TABLE_INDEX_SLEB64;
     return wasm::R_WASM_MEMORY_ADDR_SLEB64;
   case WebAssembly::fixup_uleb128_i32:
     if (SymA.isGlobal())
@@ -119,6 +120,8 @@ unsigned WebAssemblyWasmObjectWriter::getRelocType(const MCValue &Target,
     }
     return wasm::R_WASM_MEMORY_ADDR_I32;
   case FK_Data_8:
+    if (SymA.isFunction())
+      return wasm::R_WASM_TABLE_INDEX_I64;
     assert(SymA.isData());
     return wasm::R_WASM_MEMORY_ADDR_I64;
   default:
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index 71b173d769081..c6519fafbc491 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -441,6 +441,19 @@ static MachineBasicBlock *LowerCallResults(MachineInstr &CallResults,
   const MCInstrDesc &MCID = TII.get(CallOp);
   MachineInstrBuilder MIB(MF, MF.CreateMachineInstr(MCID, DL));
 
+  // See if we must truncate the function pointer.
+  // CALL_INDIRECT takes an i32, but in wasm64 we represent function pointers
+  // as 64-bit for uniformity with other pointer types.
+  if (IsIndirect && MF.getSubtarget<WebAssemblySubtarget>().hasAddr64()) {
+    Register Reg32 =
+        MF.getRegInfo().createVirtualRegister(&WebAssembly::I32RegClass);
+    auto &FnPtr = CallParams.getOperand(0);
+    BuildMI(*BB, CallResults.getIterator(), DL,
+            TII.get(WebAssembly::I32_WRAP_I64), Reg32)
+        .addReg(FnPtr.getReg());
+    FnPtr.setReg(Reg32);
+  }
+
   // Move the function pointer to the end of the arguments for indirect calls
   if (IsIndirect) {
     auto FnPtr = CallParams.getOperand(0);
diff --git a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 5ff0d73534a6a..08b964542b5bd 100644
--- a/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/llvm/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -328,19 +328,25 @@ defm CONST_F64 : I<(outs F64:$res), (ins f64imm_op:$imm),
 } // isMoveImm = 1, isAsCheapAsAMove = 1, isReMaterializable = 1
 
 def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
-          (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC]>;
+          (CONST_I32 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr32]>;
+def : Pat<(i64 (WebAssemblywrapper tglobaladdr:$addr)),
+          (CONST_I64 tglobaladdr:$addr)>, Requires<[IsNotPIC, HasAddr64]>;
 
 def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$addr)),
-          (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>;
+          (GLOBAL_GET_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>;
 
 def : Pat<(i32 (WebAssemblywrapperPIC tglobaladdr:$addr)),
-          (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC]>;
+          (CONST_I32 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr32]>;
+def : Pat<(i64 (WebAssemblywrapperPIC tglobaladdr:$addr)),
+          (CONST_I64 tglobaladdr:$addr)>, Requires<[IsPIC, HasAddr64]>;
 
 def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
-          (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC]>;
+          (GLOBAL_GET_I32 texternalsym:$addr)>, Requires<[IsPIC, HasAddr32]>;
 
 def : Pat<(i32 (WebAssemblywrapper texternalsym:$addr)),
-          (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC]>;
+          (CONST_I32 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr32]>;
+def : Pat<(i64 (WebAssemblywrapper texternalsym:$addr)),
+          (CONST_I64 texternalsym:$addr)>, Requires<[IsNotPIC, HasAddr64]>;
 
 def : Pat<(i32 (WebAssemblywrapper mcsym:$sym)), (CONST_I32 mcsym:$sym)>;
 def : Pat<(i64 (WebAssemblywrapper mcsym:$sym)), (CONST_I64 mcsym:$sym)>;
diff --git a/llvm/test/CodeGen/WebAssembly/function-pointer64.ll b/llvm/test/CodeGen/WebAssembly/function-pointer64.ll
new file mode 100644
index 0000000000000..0bea3930de0af
--- /dev/null
+++ b/llvm/test/CodeGen/WebAssembly/function-pointer64.ll
@@ -0,0 +1,58 @@
+; RUN: llc < %s -asm-verbose=false -O2 | FileCheck %s
+; RUN: llc < %s -asm-verbose=false -O2 --filetype=obj | obj2yaml | FileCheck --check-prefix=YAML %s
+
+; This tests pointer features that may codegen differently in wasm64.
+
+target datalayout = "e-m:e-p:64:64-i64:64-n32:64-S128"
+target triple = "wasm64-unknown-unknown"
+
+define void @bar(i32 %n) {
+entry:
+  ret void
+}
+
+define void @foo(void (i32)* %fp) {
+entry:
+  call void %fp(i32 1)
+  ret void
+}
+
+define void @test() {
+entry:
+  call void @foo(void (i32)* @bar)
+  store void (i32)* @bar, void (i32)** @fptr
+  ret void
+}
+
+@fptr = global void (i32)* @bar
+
+; For simplicity (and compatibility with UB C/C++ code) we keep all types
+; of pointers the same size, so function pointers (which are 32-bit indices
+; in Wasm) are represented as 64-bit until called.
+
+; CHECK:      .functype foo (i64) -> ()
+; CHECK-NEXT: i32.const 1
+; CHECK-NEXT: local.get 0
+; CHECK-NEXT: i32.wrap_i64
+; CHECK-NEXT: call_indirect (i32) -> ()
+
+; CHECK:      .functype test () -> ()
+; CHECK-NEXT: i64.const bar
+; CHECK-NEXT: call foo
+
+
+; Check we're emitting a 64-bit reloc for `i64.const bar` and the global.
+
+; YAML:      Memory:
+; YAML-NEXT:   Flags:   [ IS_64 ]
+; YAML-NEXT:   Initial: 0x00000001
+
+; YAML:      - Type:   CODE
+; YAML:      - Type:   R_WASM_TABLE_INDEX_SLEB64
+; YAML-NEXT:   Index:  0
+; YAML-NEXT:   Offset: 0x00000016
+
+; YAML:      - Type:   DATA
+; YAML:      - Type:   R_WASM_TABLE_INDEX_I64
+; YAML-NEXT:   Index:  0
+; YAML-NEXT:   Offset: 0x00000006

From 2dd9e43579b341e5de238de924cc910042b0194e Mon Sep 17 00:00:00 2001
From: Lei Zhang <antiagainst@google.com>
Date: Thu, 16 Jul 2020 16:05:51 -0400
Subject: [PATCH 558/771] [spirv] Use owning module ref to avoid leaks and fix
 ASAN tests

Differential Revision: https://reviews.llvm.org/D83982
---
 .../SPIRV/Serialization/Deserializer.cpp      | 22 ++++++++++---------
 .../Dialect/SPIRV/SerializationTest.cpp       |  9 ++++----
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
index 4e6b294beaae3..4ba3f16feef07 100644
--- a/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
+++ b/mlir/lib/Dialect/SPIRV/Serialization/Deserializer.cpp
@@ -103,7 +103,7 @@ class Deserializer {
   LogicalResult deserialize();
 
   /// Collects the final SPIR-V ModuleOp.
-  Optional<spirv::ModuleOp> collect();
+  spirv::OwningSPIRVModuleRef collect();
 
 private:
   //===--------------------------------------------------------------------===//
@@ -111,7 +111,7 @@ class Deserializer {
   //===--------------------------------------------------------------------===//
 
   /// Initializes the `module` ModuleOp in this deserializer instance.
-  spirv::ModuleOp createModuleOp();
+  spirv::OwningSPIRVModuleRef createModuleOp();
 
   /// Processes SPIR-V module header in `binary`.
   LogicalResult processHeader();
@@ -425,7 +425,7 @@ class Deserializer {
   Location unknownLoc;
 
   /// The SPIR-V ModuleOp.
-  Optional<spirv::ModuleOp> module;
+  spirv::OwningSPIRVModuleRef module;
 
   /// The current function under construction.
   Optional<spirv::FuncOp> curFunction;
@@ -556,13 +556,15 @@ LogicalResult Deserializer::deserialize() {
   return success();
 }
 
-Optional<spirv::ModuleOp> Deserializer::collect() { return module; }
+spirv::OwningSPIRVModuleRef Deserializer::collect() {
+  return std::move(module);
+}
 
 //===----------------------------------------------------------------------===//
 // Module structure
 //===----------------------------------------------------------------------===//
 
-spirv::ModuleOp Deserializer::createModuleOp() {
+spirv::OwningSPIRVModuleRef Deserializer::createModuleOp() {
   OpBuilder builder(context);
   OperationState state(unknownLoc, spirv::ModuleOp::getOperationName());
   spirv::ModuleOp::build(builder, state);
@@ -1912,10 +1914,10 @@ LogicalResult ControlFlowStructurizer::structurizeImpl() {
   // Go through all ops and remap the operands.
   auto remapOperands = [&](Operation *op) {
     for (auto &operand : op->getOpOperands())
-      if (auto mappedOp = mapper.lookupOrNull(operand.get()))
+      if (Value mappedOp = mapper.lookupOrNull(operand.get()))
         operand.set(mappedOp);
     for (auto &succOp : op->getBlockOperands())
-      if (auto mappedOp = mapper.lookupOrNull(succOp.get()))
+      if (Block *mappedOp = mapper.lookupOrNull(succOp.get()))
         succOp.set(mappedOp);
   };
   for (auto &block : body) {
@@ -2354,7 +2356,7 @@ Deserializer::processOp<spirv::EntryPointOp>(ArrayRef<uint32_t> words) {
     return emitError(unknownLoc,
                      "missing Execution Model specification in OpEntryPoint");
   }
-  auto exec_model = opBuilder.getI32IntegerAttr(words[wordIndex++]);
+  auto execModel = opBuilder.getI32IntegerAttr(words[wordIndex++]);
   if (wordIndex >= words.size()) {
     return emitError(unknownLoc, "missing <id> in OpEntryPoint");
   }
@@ -2382,7 +2384,7 @@ Deserializer::processOp<spirv::EntryPointOp>(ArrayRef<uint32_t> words) {
     interface.push_back(opBuilder.getSymbolRefAttr(arg.getOperation()));
     wordIndex++;
   }
-  opBuilder.create<spirv::EntryPointOp>(unknownLoc, exec_model,
+  opBuilder.create<spirv::EntryPointOp>(unknownLoc, execModel,
                                         opBuilder.getSymbolRefAttr(fnName),
                                         opBuilder.getArrayAttr(interface));
   return success();
@@ -2594,5 +2596,5 @@ spirv::OwningSPIRVModuleRef spirv::deserialize(ArrayRef<uint32_t> binary,
   if (failed(deserializer.deserialize()))
     return nullptr;
 
-  return deserializer.collect().getValueOr(nullptr);
+  return deserializer.collect();
 }
diff --git a/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp b/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp
index 340bfd939e1b3..3d57e559ca5ec 100644
--- a/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp
+++ b/mlir/unittests/Dialect/SPIRV/SerializationTest.cpp
@@ -15,6 +15,7 @@
 #include "mlir/Dialect/SPIRV/SPIRVAttributes.h"
 #include "mlir/Dialect/SPIRV/SPIRVBinaryUtils.h"
 #include "mlir/Dialect/SPIRV/SPIRVDialect.h"
+#include "mlir/Dialect/SPIRV/SPIRVModule.h"
 #include "mlir/Dialect/SPIRV/SPIRVOps.h"
 #include "mlir/Dialect/SPIRV/SPIRVTypes.h"
 #include "mlir/IR/Builders.h"
@@ -56,7 +57,7 @@ class SerializationTest : public ::testing::Test {
   }
 
   Type getFloatStructType() {
-    OpBuilder opBuilder(module.body());
+    OpBuilder opBuilder(module->body());
     llvm::SmallVector<Type, 1> elementTypes{opBuilder.getF32Type()};
     llvm::SmallVector<spirv::StructType::OffsetInfo, 1> offsetInfo{0};
     auto structType = spirv::StructType::get(elementTypes, offsetInfo);
@@ -64,7 +65,7 @@ class SerializationTest : public ::testing::Test {
   }
 
   void addGlobalVar(Type type, llvm::StringRef name) {
-    OpBuilder opBuilder(module.body());
+    OpBuilder opBuilder(module->body());
     auto ptrType = spirv::PointerType::get(type, spirv::StorageClass::Uniform);
     opBuilder.create<spirv::GlobalVariableOp>(
         UnknownLoc::get(&context), TypeAttr::get(ptrType),
@@ -98,7 +99,7 @@ class SerializationTest : public ::testing::Test {
 
 protected:
   MLIRContext context;
-  spirv::ModuleOp module;
+  spirv::OwningSPIRVModuleRef module;
   SmallVector<uint32_t, 0> binary;
 };
 
@@ -109,7 +110,7 @@ class SerializationTest : public ::testing::Test {
 TEST_F(SerializationTest, BlockDecorationTest) {
   auto structType = getFloatStructType();
   addGlobalVar(structType, "var0");
-  ASSERT_TRUE(succeeded(spirv::serialize(module, binary)));
+  ASSERT_TRUE(succeeded(spirv::serialize(module.get(), binary)));
   auto hasBlockDecoration = [](spirv::Opcode opcode,
                                ArrayRef<uint32_t> operands) -> bool {
     if (opcode != spirv::Opcode::OpDecorate || operands.size() != 2)

From a0537fc35f0e72123e426ced7879188aaab6f76b Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs@apple.com>
Date: Thu, 16 Jul 2020 15:10:22 -0600
Subject: [PATCH 559/771] [SimplifyCFG] Fix crash in the EXPENSIVE_CHECKS build

SimplifyCFG was incorrectly reporting to the pass manager that it had not made
changes after folding away a PHI.  This is detected in the EXPENSIVE_CHECKS
build when the function's hash changes.

Differential Revision: https://reviews.llvm.org/D83985
---
 llvm/lib/Transforms/Utils/SimplifyCFG.cpp     | 10 ++--
 .../SimplifyCFG/two-entry-phi-fold-crash.ll   | 52 +++++++++++++++++++
 2 files changed, 58 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll

diff --git a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
index 5aa929fa1822f..aa015ffe56227 100644
--- a/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/llvm/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -2404,11 +2404,13 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
   int BudgetRemaining =
       TwoEntryPHINodeFoldingThreshold * TargetTransformInfo::TCC_Basic;
 
+  bool Changed = false;
   for (BasicBlock::iterator II = BB->begin(); isa<PHINode>(II);) {
     PHINode *PN = cast<PHINode>(II++);
     if (Value *V = SimplifyInstruction(PN, {DL, PN})) {
       PN->replaceAllUsesWith(V);
       PN->eraseFromParent();
+      Changed = true;
       continue;
     }
 
@@ -2416,7 +2418,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
                              BudgetRemaining, TTI) ||
         !DominatesMergePoint(PN->getIncomingValue(1), BB, AggressiveInsts,
                              BudgetRemaining, TTI))
-      return false;
+      return Changed;
   }
 
   // If we folded the first phi, PN dangles at this point.  Refresh it.  If
@@ -2443,7 +2445,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
        isa<BinaryOperator>(IfCond)) &&
       !CanHoistNotFromBothValues(PN->getIncomingValue(0),
                                  PN->getIncomingValue(1)))
-    return false;
+    return Changed;
 
   // If all PHI nodes are promotable, check to make sure that all instructions
   // in the predecessor blocks can be promoted as well. If not, we won't be able
@@ -2461,7 +2463,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
         // This is not an aggressive instruction that we can promote.
         // Because of this, we won't be able to get rid of the control flow, so
         // the xform is not worth it.
-        return false;
+        return Changed;
       }
   }
 
@@ -2474,7 +2476,7 @@ static bool FoldTwoEntryPHINode(PHINode *PN, const TargetTransformInfo &TTI,
         // This is not an aggressive instruction that we can promote.
         // Because of this, we won't be able to get rid of the control flow, so
         // the xform is not worth it.
-        return false;
+        return Changed;
       }
   }
   assert(DomBlock && "Failed to find root DomBlock");
diff --git a/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll
new file mode 100644
index 0000000000000..1a620617f7b31
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/two-entry-phi-fold-crash.ll
@@ -0,0 +1,52 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -S -simplifycfg -debug-pass=Details 2>&1 | FileCheck %s
+
+;; Check the report from the pass manager, telling us whether simplifycfg
+;; reported the "changed" status correctly:
+; CHECK: Made Modification 'Simplify the CFG' on Function 'wibble'...
+
+declare i32 @blam(i8*, i32)
+
+define i32 @wibble(i8* %arg, i8** %arg1) {
+; CHECK-LABEL: @wibble(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    br label [[BB2:%.*]]
+; CHECK:       bb2:
+; CHECK-NEXT:    [[BORG:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[BORG]], [[BB8:%.*]] ]
+; CHECK-NEXT:    [[BORG3:%.*]] = phi i32 [ 8, [[BB]] ], [ [[BORG10:%.*]], [[BB8]] ]
+; CHECK-NEXT:    [[BORG4:%.*]] = tail call i32 @blam(i8* [[ARG:%.*]], i32 [[BORG]])
+; CHECK-NEXT:    [[BORG5:%.*]] = icmp eq i32 [[BORG4]], 0
+; CHECK-NEXT:    br i1 [[BORG5]], label [[BB8]], label [[BB6:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[BORG7:%.*]] = load i8*, i8** [[ARG1:%.*]], align 4
+; CHECK-NEXT:    br label [[BB8]]
+; CHECK:       bb8:
+; CHECK-NEXT:    [[BORG10]] = phi i32 [ [[BORG4]], [[BB6]] ], [ [[BORG3]], [[BB2]] ]
+; CHECK-NEXT:    [[BORG11:%.*]] = icmp ult i32 [[BORG]], 2
+; CHECK-NEXT:    br i1 [[BORG11]], label [[BB2]], label [[BB12:%.*]]
+; CHECK:       bb12:
+; CHECK-NEXT:    ret i32 1
+;
+bb:
+  br label %bb2
+
+bb2:                                              ; preds = %bb8, %bb
+  %borg = phi i32 [ 0, %bb ], [ %borg9, %bb8 ]
+  %borg3 = phi i32 [ 8, %bb ], [ %borg10, %bb8 ]
+  %borg4 = tail call i32 @blam(i8* %arg, i32 %borg)
+  %borg5 = icmp eq i32 %borg4, 0
+  br i1 %borg5, label %bb8, label %bb6
+
+bb6:                                              ; preds = %bb2
+  %borg7 = load i8*, i8** %arg1, align 4
+  br label %bb8
+
+bb8:                                              ; preds = %bb6, %bb2
+  %borg9 = phi i32 [ %borg, %bb6 ], [ %borg, %bb2 ]
+  %borg10 = phi i32 [ %borg4, %bb6 ], [ %borg3, %bb2 ]
+  %borg11 = icmp ult i32 %borg9, 2
+  br i1 %borg11, label %bb2, label %bb12
+
+bb12:                                             ; preds = %bb8
+  ret i32 1
+}

From 72958c9ab1cdf18c447778b836f13694a7e4e9e1 Mon Sep 17 00:00:00 2001
From: Adrian McCarthy <amccarth@google.com>
Date: Thu, 16 Jul 2020 14:44:24 -0700
Subject: [PATCH 560/771] [lldb] Eliminated unused local variable

I got misled by this remnant from earlier changes.
---
 lldb/source/Commands/CommandObjectTarget.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lldb/source/Commands/CommandObjectTarget.cpp b/lldb/source/Commands/CommandObjectTarget.cpp
index 7bb71f4d518cc..e50415f930b3b 100644
--- a/lldb/source/Commands/CommandObjectTarget.cpp
+++ b/lldb/source/Commands/CommandObjectTarget.cpp
@@ -4332,7 +4332,6 @@ class CommandObjectTargetSymbolsAdd : public CommandObjectParsed {
                 module_spec.GetSymbolFileSpec() = symfile_spec;
             }
 
-            ArchSpec arch;
             bool symfile_exists =
                 FileSystem::Instance().Exists(module_spec.GetSymbolFileSpec());
 

From 0e940d55f8a9388c42cc5998ea05212a983f05a7 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 16 Jul 2020 12:51:14 -0700
Subject: [PATCH 561/771] [ORC] Add TargetProcessControl and
 TPCIndirectionUtils APIs.

TargetProcessControl is a new API for communicating with JIT target processes.
It supports memory allocation and access, and inspection of some process
properties, e.g. the target proces triple and page size.

Centralizing these APIs allows utilities written against TargetProcessControl
to remain independent of the communication procotol with the target process
(which may be direct memory access/allocation for in-process JITing, or may
involve some form of IPC or RPC).

An initial set of TargetProcessControl-based utilities for lazy compilation is
provided by the TPCIndirectionUtils class.

An initial implementation of TargetProcessControl for in-process JITing
is provided by the SelfTargetProcessControl class.

An example program showing how the APIs can be used is provided in
llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl.
---
 llvm/examples/OrcV2Examples/CMakeLists.txt    |   1 +
 .../CMakeLists.txt                            |  12 +
 .../LLJITWithTargetProcessControl.cpp         | 178 ++++++++
 .../llvm/ExecutionEngine/Orc/LazyReexports.h  |   6 +-
 .../ExecutionEngine/Orc/TPCIndirectionUtils.h | 209 +++++++++
 .../Orc/TargetProcessControl.h                | 162 +++++++
 llvm/lib/ExecutionEngine/Orc/CMakeLists.txt   |   4 +-
 .../Orc/TPCIndirectionUtils.cpp               | 420 ++++++++++++++++++
 .../Orc/TargetProcessControl.cpp              |  79 ++++
 9 files changed, 1067 insertions(+), 4 deletions(-)
 create mode 100644 llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/CMakeLists.txt
 create mode 100644 llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h
 create mode 100644 llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h
 create mode 100644 llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
 create mode 100644 llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp

diff --git a/llvm/examples/OrcV2Examples/CMakeLists.txt b/llvm/examples/OrcV2Examples/CMakeLists.txt
index 2c737296ae10f..0f1be0e35f239 100644
--- a/llvm/examples/OrcV2Examples/CMakeLists.txt
+++ b/llvm/examples/OrcV2Examples/CMakeLists.txt
@@ -5,6 +5,7 @@ add_subdirectory(LLJITWithInitializers)
 add_subdirectory(LLJITWithLazyReexports)
 add_subdirectory(LLJITWithObjectCache)
 add_subdirectory(LLJITWithObjectLinkingLayerPlugin)
+add_subdirectory(LLJITWithTargetProcessControl)
 add_subdirectory(OrcV2CBindingsAddObjectFile)
 add_subdirectory(OrcV2CBindingsBasicUsage)
 add_subdirectory(OrcV2CBindingsReflectProcessSymbols)
diff --git a/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/CMakeLists.txt b/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/CMakeLists.txt
new file mode 100644
index 0000000000000..10e0de8d64bda
--- /dev/null
+++ b/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(LLVM_LINK_COMPONENTS
+  Core
+  ExecutionEngine
+  IRReader
+  OrcJIT
+  Support
+  nativecodegen
+  )
+
+add_llvm_example(LLJITWithTargetProcessControl
+  LLJITWithTargetProcessControl.cpp
+  )
diff --git a/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp b/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp
new file mode 100644
index 0000000000000..a88360e3a8cd6
--- /dev/null
+++ b/llvm/examples/OrcV2Examples/LLJITWithTargetProcessControl/LLJITWithTargetProcessControl.cpp
@@ -0,0 +1,178 @@
+//===--- LLJITWithLazyReexports.cpp - LLJIT example with custom laziness --===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// In this example we will use the lazy re-exports utility to lazily compile
+// IR modules. We will do this in seven steps:
+//
+// 1. Create an LLJIT instance.
+// 2. Install a transform so that we can see what is being compiled.
+// 3. Create an indirect stubs manager and lazy call-through manager.
+// 4. Add two modules that will be conditionally compiled, plus a main module.
+// 5. Add lazy-rexports of the symbols in the conditionally compiled modules.
+// 6. Dump the ExecutionSession state to see the symbol table prior to
+//    executing any code.
+// 7. Verify that only modules containing executed code are compiled.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/StringMap.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/LLJIT.h"
+#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
+#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
+#include "llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h"
+#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/raw_ostream.h"
+
+#include "../ExampleModules.h"
+
+#include <future>
+
+using namespace llvm;
+using namespace llvm::orc;
+
+ExitOnError ExitOnErr;
+
+// Example IR modules.
+//
+// Note that in the conditionally compiled modules, FooMod and BarMod, functions
+// have been given an _body suffix. This is to ensure that their names do not
+// clash with their lazy-reexports.
+// For clients who do not wish to rename function bodies (e.g. because they want
+// to re-use cached objects between static and JIT compiles) techniques exist to
+// avoid renaming. See the lazy-reexports section of the ORCv2 design doc.
+
+const llvm::StringRef FooMod =
+    R"(
+  define i32 @foo_body() {
+  entry:
+    ret i32 1
+  }
+)";
+
+const llvm::StringRef BarMod =
+    R"(
+  define i32 @bar_body() {
+  entry:
+    ret i32 2
+  }
+)";
+
+const llvm::StringRef MainMod =
+    R"(
+
+  define i32 @entry(i32 %argc) {
+  entry:
+    %and = and i32 %argc, 1
+    %tobool = icmp eq i32 %and, 0
+    br i1 %tobool, label %if.end, label %if.then
+
+  if.then:                                          ; preds = %entry
+    %call = tail call i32 @foo() #2
+    br label %return
+
+  if.end:                                           ; preds = %entry
+    %call1 = tail call i32 @bar() #2
+    br label %return
+
+  return:                                           ; preds = %if.end, %if.then
+    %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.end ]
+    ret i32 %retval.0
+  }
+
+  declare i32 @foo()
+  declare i32 @bar()
+)";
+
+static void *reenter(void *Ctx, void *TrampolineAddr) {
+  std::promise<void *> LandingAddressP;
+  auto LandingAddressF = LandingAddressP.get_future();
+
+  auto *TPCIU = static_cast<TPCIndirectionUtils *>(Ctx);
+  TPCIU->getLazyCallThroughManager().resolveTrampolineLandingAddress(
+      pointerToJITTargetAddress(TrampolineAddr),
+      [&](JITTargetAddress LandingAddress) {
+        LandingAddressP.set_value(
+            jitTargetAddressToPointer<void *>(LandingAddress));
+      });
+  return LandingAddressF.get();
+}
+
+cl::list<std::string> InputArgv(cl::Positional,
+                                cl::desc("<program arguments>..."));
+
+int main(int argc, char *argv[]) {
+  // Initialize LLVM.
+  InitLLVM X(argc, argv);
+
+  InitializeNativeTarget();
+  InitializeNativeTargetAsmPrinter();
+
+  cl::ParseCommandLineOptions(argc, argv, "LLJITWithLazyReexports");
+  ExitOnErr.setBanner(std::string(argv[0]) + ": ");
+
+  // (1) Create LLJIT instance.
+  auto J = ExitOnErr(LLJITBuilder().create());
+
+  // (2) Install transform to print modules as they are compiled:
+  J->getIRTransformLayer().setTransform(
+      [](ThreadSafeModule TSM,
+         const MaterializationResponsibility &R) -> Expected<ThreadSafeModule> {
+        TSM.withModuleDo([](Module &M) { dbgs() << "---Compiling---\n" << M; });
+        return std::move(TSM); // Not a redundant move: fix build on gcc-7.5
+      });
+
+  // (3) Create stubs and call-through managers:
+
+  auto TPC = ExitOnErr(SelfTargetProcessControl::Create());
+  auto TPCIU = ExitOnErr(TPCIndirectionUtils::Create(*TPC));
+  ExitOnErr(TPCIU->writeResolverBlock(pointerToJITTargetAddress(&reenter),
+                                      pointerToJITTargetAddress(TPCIU.get())));
+  TPCIU->createLazyCallThroughManager(J->getExecutionSession(), 0);
+  auto ISM = TPCIU->createIndirectStubsManager();
+
+  // (4) Add modules.
+  ExitOnErr(J->addIRModule(ExitOnErr(parseExampleModule(FooMod, "foo-mod"))));
+  ExitOnErr(J->addIRModule(ExitOnErr(parseExampleModule(BarMod, "bar-mod"))));
+  ExitOnErr(J->addIRModule(ExitOnErr(parseExampleModule(MainMod, "main-mod"))));
+
+  // (5) Add lazy reexports.
+  MangleAndInterner Mangle(J->getExecutionSession(), J->getDataLayout());
+  SymbolAliasMap ReExports(
+      {{Mangle("foo"),
+        {Mangle("foo_body"),
+         JITSymbolFlags::Exported | JITSymbolFlags::Callable}},
+       {Mangle("bar"),
+        {Mangle("bar_body"),
+         JITSymbolFlags::Exported | JITSymbolFlags::Callable}}});
+  ExitOnErr(J->getMainJITDylib().define(
+      lazyReexports(TPCIU->getLazyCallThroughManager(), *ISM,
+                    J->getMainJITDylib(), std::move(ReExports))));
+
+  // (6) Dump the ExecutionSession state.
+  dbgs() << "---Session state---\n";
+  J->getExecutionSession().dump(dbgs());
+  dbgs() << "\n";
+
+  // (7) Execute the JIT'd main function and pass the example's command line
+  // arguments unmodified. This should cause either ExampleMod1 or ExampleMod2
+  // to be compiled, and either "1" or "2" returned depending on the number of
+  // arguments passed.
+
+  // Look up the JIT'd function, cast it to a function pointer, then call it.
+  auto EntrySym = ExitOnErr(J->lookup("entry"));
+  auto *Entry = (int (*)(int))EntrySym.getAddress();
+
+  int Result = Entry(argc);
+  outs() << "---Result---\n"
+         << "entry(" << argc << ") = " << Result << "\n";
+
+  return 0;
+}
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
index 0d3ccecdf121a..9206e40fffb1c 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/LazyReexports.h
@@ -40,6 +40,9 @@ class LazyCallThroughManager {
   using NotifyResolvedFunction =
       unique_function<Error(JITTargetAddress ResolvedAddr)>;
 
+  LazyCallThroughManager(ExecutionSession &ES,
+                         JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP);
+
   // Return a free call-through trampoline and bind it to look up and call
   // through to the given symbol.
   Expected<JITTargetAddress>
@@ -56,9 +59,6 @@ class LazyCallThroughManager {
   using NotifyLandingResolvedFunction =
       TrampolinePool::NotifyLandingResolvedFunction;
 
-  LazyCallThroughManager(ExecutionSession &ES,
-                         JITTargetAddress ErrorHandlerAddr, TrampolinePool *TP);
-
   struct ReexportsEntry {
     JITDylib *SourceJD;
     SymbolStringPtr SymbolName;
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h b/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h
new file mode 100644
index 0000000000000..db9cd1b98cf9c
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h
@@ -0,0 +1,209 @@
+//===--- TPCIndirectionUtils.h - TPC based indirection utils ----*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Indirection utilities (stubs, trampolines, lazy call-throughs) that use the
+// TargetProcessControl API to interact with the target process.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H
+#define LLVM_EXECUTIONENGINE_ORC_TPCINDIRECTIONUTILS_H
+
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/ExecutionEngine/Orc/IndirectionUtils.h"
+#include "llvm/ExecutionEngine/Orc/LazyReexports.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+
+class TargetProcessControl;
+
+/// Provides TargetProcessControl based indirect stubs, trampoline pool and
+/// lazy call through manager.
+class TPCIndirectionUtils {
+  friend class TPCIndirectionUtilsAccess;
+
+public:
+  /// ABI support base class. Used to write resolver, stub, and trampoline
+  /// blocks.
+  class ABISupport {
+  protected:
+    ABISupport(unsigned PointerSize, unsigned TrampolineSize, unsigned StubSize,
+               unsigned StubToPointerMaxDisplacement, unsigned ResolverCodeSize)
+        : PointerSize(PointerSize), TrampolineSize(TrampolineSize),
+          StubSize(StubSize),
+          StubToPointerMaxDisplacement(StubToPointerMaxDisplacement),
+          ResolverCodeSize(ResolverCodeSize) {}
+
+  public:
+    virtual ~ABISupport();
+
+    unsigned getPointerSize() const { return PointerSize; }
+    unsigned getTrampolineSize() const { return TrampolineSize; }
+    unsigned getStubSize() const { return StubSize; }
+    unsigned getStubToPointerMaxDisplacement() const {
+      return StubToPointerMaxDisplacement;
+    }
+    unsigned getResolverCodeSize() const { return ResolverCodeSize; }
+
+    virtual void writeResolverCode(char *ResolverWorkingMem,
+                                   JITTargetAddress ResolverTargetAddr,
+                                   JITTargetAddress ReentryFnAddr,
+                                   JITTargetAddress ReentryCtxAddr) const = 0;
+
+    virtual void writeTrampolines(char *TrampolineBlockWorkingMem,
+                                  JITTargetAddress TrampolineBlockTragetAddr,
+                                  JITTargetAddress ResolverAddr,
+                                  unsigned NumTrampolines) const = 0;
+
+    virtual void
+    writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+                            JITTargetAddress StubsBlockTargetAddress,
+                            JITTargetAddress PointersBlockTargetAddress,
+                            unsigned NumStubs) const = 0;
+
+  private:
+    unsigned PointerSize = 0;
+    unsigned TrampolineSize = 0;
+    unsigned StubSize = 0;
+    unsigned StubToPointerMaxDisplacement = 0;
+    unsigned ResolverCodeSize = 0;
+  };
+
+  /// Create using the given ABI class.
+  template <typename ORCABI>
+  static std::unique_ptr<TPCIndirectionUtils>
+  CreateWithABI(TargetProcessControl &TPC);
+
+  /// Create based on the TargetProcessControl triple.
+  static Expected<std::unique_ptr<TPCIndirectionUtils>>
+  Create(TargetProcessControl &TPC);
+
+  /// Return a reference to the TargetProcessControl object.
+  TargetProcessControl &getTargetProcessControl() const { return TPC; }
+
+  /// Return a reference to the ABISupport object for this instance.
+  ABISupport &getABISupport() const { return *ABI; }
+
+  /// Release memory for resources held by this instance. This *must* be called
+  /// prior to destruction of the class.
+  Error cleanup();
+
+  /// Write resolver code to the target process and return its address.
+  /// This must be called before any call to createTrampolinePool or
+  /// createLazyCallThroughManager.
+  Expected<JITTargetAddress>
+  writeResolverBlock(JITTargetAddress ReentryFnAddr,
+                     JITTargetAddress ReentryCtxAddr);
+
+  /// Returns the address of the Resolver block. Returns zero if the
+  /// writeResolverBlock method has not previously been called.
+  JITTargetAddress getResolverBlockAddress() const { return ResolverBlockAddr; }
+
+  /// Create an IndirectStubsManager for the target process.
+  std::unique_ptr<IndirectStubsManager> createIndirectStubsManager();
+
+  /// Create a TrampolinePool for the target process.
+  TrampolinePool &getTrampolinePool();
+
+  /// Create a LazyCallThroughManager.
+  /// This function should only be called once.
+  LazyCallThroughManager &
+  createLazyCallThroughManager(ExecutionSession &ES,
+                               JITTargetAddress ErrorHandlerAddr);
+
+  /// Create a LazyCallThroughManager for the target process.
+  LazyCallThroughManager &getLazyCallThroughManager() {
+    assert(LCTM && "createLazyCallThroughManager must be called first");
+    return *LCTM;
+  }
+
+private:
+  using Allocation = jitlink::JITLinkMemoryManager::Allocation;
+
+  struct IndirectStubInfo {
+    IndirectStubInfo() = default;
+    IndirectStubInfo(JITTargetAddress StubAddress,
+                     JITTargetAddress PointerAddress)
+        : StubAddress(StubAddress), PointerAddress(PointerAddress) {}
+    JITTargetAddress StubAddress = 0;
+    JITTargetAddress PointerAddress = 0;
+  };
+
+  using IndirectStubInfoVector = std::vector<IndirectStubInfo>;
+
+  /// Create a TPCIndirectionUtils instance.
+  TPCIndirectionUtils(TargetProcessControl &TPC,
+                      std::unique_ptr<ABISupport> ABI);
+
+  Expected<IndirectStubInfoVector> getIndirectStubs(unsigned NumStubs);
+
+  std::mutex TPCUIMutex;
+  TargetProcessControl &TPC;
+  std::unique_ptr<ABISupport> ABI;
+  JITTargetAddress ResolverBlockAddr;
+  std::unique_ptr<jitlink::JITLinkMemoryManager::Allocation> ResolverBlock;
+  std::unique_ptr<TrampolinePool> TP;
+  std::unique_ptr<LazyCallThroughManager> LCTM;
+
+  std::vector<IndirectStubInfo> AvailableIndirectStubs;
+  std::vector<std::unique_ptr<Allocation>> IndirectStubAllocs;
+};
+
+namespace detail {
+
+template <typename ORCABI>
+class ABISupportImpl : public TPCIndirectionUtils::ABISupport {
+public:
+  ABISupportImpl()
+      : ABISupport(ORCABI::PointerSize, ORCABI::TrampolineSize,
+                   ORCABI::StubSize, ORCABI::StubToPointerMaxDisplacement,
+                   ORCABI::ResolverCodeSize) {}
+
+  void writeResolverCode(char *ResolverWorkingMem,
+                         JITTargetAddress ResolverTargetAddr,
+                         JITTargetAddress ReentryFnAddr,
+                         JITTargetAddress ReentryCtxAddr) const override {
+    ORCABI::writeResolverCode(ResolverWorkingMem, ResolverTargetAddr,
+                              ReentryFnAddr, ReentryCtxAddr);
+  }
+
+  void writeTrampolines(char *TrampolineBlockWorkingMem,
+                        JITTargetAddress TrampolineBlockTargetAddr,
+                        JITTargetAddress ResolverAddr,
+                        unsigned NumTrampolines) const override {
+    ORCABI::writeTrampolines(TrampolineBlockWorkingMem,
+                             TrampolineBlockTargetAddr, ResolverAddr,
+                             NumTrampolines);
+  }
+
+  void writeIndirectStubsBlock(char *StubsBlockWorkingMem,
+                               JITTargetAddress StubsBlockTargetAddress,
+                               JITTargetAddress PointersBlockTargetAddress,
+                               unsigned NumStubs) const override {
+    ORCABI::writeIndirectStubsBlock(StubsBlockWorkingMem,
+                                    StubsBlockTargetAddress,
+                                    PointersBlockTargetAddress, NumStubs);
+  }
+};
+
+} // end namespace detail
+
+template <typename ORCABI>
+std::unique_ptr<TPCIndirectionUtils>
+TPCIndirectionUtils::CreateWithABI(TargetProcessControl &TPC) {
+  return std::unique_ptr<TPCIndirectionUtils>(new TPCIndirectionUtils(
+      TPC, std::make_unique<detail::ABISupportImpl<ORCABI>>()));
+}
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_T_H
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h
new file mode 100644
index 0000000000000..facafd8836530
--- /dev/null
+++ b/llvm/include/llvm/ExecutionEngine/Orc/TargetProcessControl.h
@@ -0,0 +1,162 @@
+//===--- TargetProcessControl.h - Target process control APIs ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// Utilities for interacting with target processes.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H
+#define LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H
+
+#include "llvm/ADT/Triple.h"
+#include "llvm/ExecutionEngine/JITLink/JITLinkMemoryManager.h"
+#include "llvm/Support/MSVCErrorWorkarounds.h"
+
+#include <future>
+
+namespace llvm {
+namespace orc {
+
+/// TargetProcessControl supports interaction with a JIT target process.
+class TargetProcessControl {
+public:
+  /// APIs for manipulating memory in the target process.
+  class MemoryAccess {
+  public:
+    template <typename T> struct UIntWrite {
+      UIntWrite() = default;
+      UIntWrite(JITTargetAddress Address, T Value)
+          : Address(Address), Value(Value) {}
+
+      JITTargetAddress Address = 0;
+      T Value = 0;
+    };
+
+    using UInt8Write = UIntWrite<uint8_t>;
+    using UInt16Write = UIntWrite<uint16_t>;
+    using UInt32Write = UIntWrite<uint32_t>;
+    using UInt64Write = UIntWrite<uint64_t>;
+
+    struct BufferWrite {
+      BufferWrite(JITTargetAddress Address, StringRef Buffer)
+          : Address(Address), Buffer(Buffer) {}
+
+      JITTargetAddress Address = 0;
+      StringRef Buffer;
+    };
+
+    using WriteResultFn = unique_function<void(Error)>;
+
+    virtual ~MemoryAccess();
+
+    virtual void writeUInt8s(ArrayRef<UInt8Write> Ws,
+                             WriteResultFn OnWriteComplete) = 0;
+
+    virtual void writeUInt16s(ArrayRef<UInt16Write> Ws,
+                              WriteResultFn OnWriteComplete) = 0;
+
+    virtual void writeUInt32s(ArrayRef<UInt32Write> Ws,
+                              WriteResultFn OnWriteComplete) = 0;
+
+    virtual void writeUInt64s(ArrayRef<UInt64Write> Ws,
+                              WriteResultFn OnWriteComplete) = 0;
+
+    virtual void writeBuffers(ArrayRef<BufferWrite> Ws,
+                              WriteResultFn OnWriteComplete) = 0;
+
+    Error writeUInt8s(ArrayRef<UInt8Write> Ws) {
+      std::promise<MSVCPError> ResultP;
+      auto ResultF = ResultP.get_future();
+      writeUInt8s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+      return ResultF.get();
+    }
+
+    Error writeUInt16s(ArrayRef<UInt16Write> Ws) {
+      std::promise<MSVCPError> ResultP;
+      auto ResultF = ResultP.get_future();
+      writeUInt16s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+      return ResultF.get();
+    }
+
+    Error writeUInt32s(ArrayRef<UInt32Write> Ws) {
+      std::promise<MSVCPError> ResultP;
+      auto ResultF = ResultP.get_future();
+      writeUInt32s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+      return ResultF.get();
+    }
+
+    Error writeUInt64s(ArrayRef<UInt64Write> Ws) {
+      std::promise<MSVCPError> ResultP;
+      auto ResultF = ResultP.get_future();
+      writeUInt64s(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+      return ResultF.get();
+    }
+
+    Error writeBuffers(ArrayRef<BufferWrite> Ws) {
+      std::promise<MSVCPError> ResultP;
+      auto ResultF = ResultP.get_future();
+      writeBuffers(Ws, [&](Error Err) { ResultP.set_value(std::move(Err)); });
+      return ResultF.get();
+    }
+  };
+
+  virtual ~TargetProcessControl();
+
+  /// Return the Triple for the target process.
+  const Triple &getTargetTriple() const { return TT; }
+
+  /// Get the page size for the target process.
+  unsigned getPageSize() const { return PageSize; }
+
+  /// Return a JITLinkMemoryManager for the target process.
+  jitlink::JITLinkMemoryManager &getMemMgr() const { return *MemMgr; }
+
+  /// Return a MemoryAccess object for the target process.
+  MemoryAccess &getMemoryAccess() const { return *MemAccess; }
+
+protected:
+  TargetProcessControl(Triple TT, unsigned PageSize);
+
+  Triple TT;
+  unsigned PageSize = 0;
+  jitlink::JITLinkMemoryManager *MemMgr = nullptr;
+  MemoryAccess *MemAccess = nullptr;
+};
+
+/// A TargetProcessControl
+class SelfTargetProcessControl : public TargetProcessControl,
+                                 private TargetProcessControl::MemoryAccess {
+public:
+  SelfTargetProcessControl(Triple TT, unsigned PageSize);
+
+  static Expected<std::unique_ptr<SelfTargetProcessControl>> Create();
+
+private:
+  void writeUInt8s(ArrayRef<UInt8Write> Ws,
+                   WriteResultFn OnWriteComplete) override;
+
+  void writeUInt16s(ArrayRef<UInt16Write> Ws,
+                    WriteResultFn OnWriteComplete) override;
+
+  void writeUInt32s(ArrayRef<UInt32Write> Ws,
+                    WriteResultFn OnWriteComplete) override;
+
+  void writeUInt64s(ArrayRef<UInt64Write> Ws,
+                    WriteResultFn OnWriteComplete) override;
+
+  void writeBuffers(ArrayRef<BufferWrite> Ws,
+                    WriteResultFn OnWriteComplete) override;
+
+  std::unique_ptr<jitlink::InProcessMemoryManager> IPMM =
+      std::make_unique<jitlink::InProcessMemoryManager>();
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+#endif // LLVM_EXECUTIONENGINE_ORC_TARGETPROCESSCONTROL_H
diff --git a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
index 473cf9299523c..f9d7924cd5e8c 100644
--- a/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
+++ b/llvm/lib/ExecutionEngine/Orc/CMakeLists.txt
@@ -22,9 +22,11 @@ add_llvm_component_library(LLVMOrcJIT
   OrcV2CBindings.cpp
   OrcMCJITReplacement.cpp
   RTDyldObjectLinkingLayer.cpp
-  ThreadSafeModule.cpp
   Speculation.cpp
   SpeculateAnalyses.cpp
+  TargetProcessControl.cpp
+  ThreadSafeModule.cpp
+  TPCIndirectionUtils.cpp
   ADDITIONAL_HEADER_DIRS
   ${LLVM_MAIN_INCLUDE_DIR}/llvm/ExecutionEngine/Orc
 
diff --git a/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
new file mode 100644
index 0000000000000..51e6928c39e0b
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
@@ -0,0 +1,420 @@
+//===------ TargetProcessControl.cpp -- Target process control APIs -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TPCIndirectionUtils.h"
+
+#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
+#include "llvm/Support/MathExtras.h"
+
+using namespace llvm;
+using namespace llvm::orc;
+
+namespace llvm {
+namespace orc {
+
+class TPCIndirectionUtilsAccess {
+public:
+  using IndirectStubInfo = TPCIndirectionUtils::IndirectStubInfo;
+  using IndirectStubInfoVector = TPCIndirectionUtils::IndirectStubInfoVector;
+
+  static Expected<IndirectStubInfoVector>
+  getIndirectStubs(TPCIndirectionUtils &TPCIU, unsigned NumStubs) {
+    return TPCIU.getIndirectStubs(NumStubs);
+  };
+};
+
+} // end namespace orc
+} // end namespace llvm
+
+namespace {
+
+class TPCTrampolinePool : public TrampolinePool {
+public:
+  TPCTrampolinePool(TPCIndirectionUtils &TPCIU);
+  Error deallocatePool();
+  Expected<JITTargetAddress> getTrampoline() override;
+  void releaseTrampoline(JITTargetAddress TrampolineAddr);
+
+protected:
+  Error grow();
+
+  using Allocation = jitlink::JITLinkMemoryManager::Allocation;
+
+  std::mutex TPMutex;
+  TPCIndirectionUtils &TPCIU;
+  unsigned TrampolineSize = 0;
+  unsigned TrampolinesPerPage = 0;
+  std::vector<std::unique_ptr<Allocation>> TrampolineBlocks;
+  std::vector<JITTargetAddress> AvailableTrampolines;
+};
+
+class TPCIndirectStubsManager : public IndirectStubsManager,
+                                private TPCIndirectionUtilsAccess {
+public:
+  TPCIndirectStubsManager(TPCIndirectionUtils &TPCIU) : TPCIU(TPCIU) {}
+
+  Error deallocateStubs();
+
+  Error createStub(StringRef StubName, JITTargetAddress StubAddr,
+                   JITSymbolFlags StubFlags) override;
+
+  Error createStubs(const StubInitsMap &StubInits) override;
+
+  JITEvaluatedSymbol findStub(StringRef Name, bool ExportedStubsOnly) override;
+
+  JITEvaluatedSymbol findPointer(StringRef Name) override;
+
+  Error updatePointer(StringRef Name, JITTargetAddress NewAddr) override;
+
+private:
+  using StubInfo = std::pair<IndirectStubInfo, JITSymbolFlags>;
+
+  std::mutex ISMMutex;
+  TPCIndirectionUtils &TPCIU;
+  StringMap<StubInfo> StubInfos;
+};
+
+TPCTrampolinePool::TPCTrampolinePool(TPCIndirectionUtils &TPCIU)
+    : TPCIU(TPCIU) {
+  auto &TPC = TPCIU.getTargetProcessControl();
+  auto &ABI = TPCIU.getABISupport();
+
+  TrampolineSize = ABI.getTrampolineSize();
+  TrampolinesPerPage =
+      (TPC.getPageSize() - ABI.getPointerSize()) / TrampolineSize;
+}
+
+Error TPCTrampolinePool::deallocatePool() {
+  Error Err = Error::success();
+  for (auto &Alloc : TrampolineBlocks)
+    Err = joinErrors(std::move(Err), Alloc->deallocate());
+  return Err;
+}
+
+Expected<JITTargetAddress> TPCTrampolinePool::getTrampoline() {
+  std::lock_guard<std::mutex> Lock(TPMutex);
+  if (AvailableTrampolines.empty()) {
+    if (auto Err = grow())
+      return std::move(Err);
+  }
+
+  assert(!AvailableTrampolines.empty() && "Failed to grow trampoline pool");
+  auto TrampolineAddr = AvailableTrampolines.back();
+  AvailableTrampolines.pop_back();
+  return TrampolineAddr;
+}
+
+void TPCTrampolinePool::releaseTrampoline(JITTargetAddress TrampolineAddr) {
+  std::lock_guard<std::mutex> Lock(TPMutex);
+  AvailableTrampolines.push_back(TrampolineAddr);
+}
+
+Error TPCTrampolinePool::grow() {
+  assert(this->AvailableTrampolines.empty() &&
+         "Grow called with trampolines still available");
+
+  auto ResolverAddress = TPCIU.getResolverBlockAddress();
+  assert(ResolverAddress && "Resolver address can not be null");
+
+  auto &TPC = TPCIU.getTargetProcessControl();
+  constexpr auto TrampolinePagePermissions =
+      static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+                                                sys::Memory::MF_EXEC);
+  auto PageSize = TPC.getPageSize();
+  auto Alloc = TPC.getMemMgr().allocate(
+      {{TrampolinePagePermissions, {PageSize, PageSize, 0}}});
+
+  if (!Alloc)
+    return Alloc.takeError();
+
+  unsigned NumTrampolines = TrampolinesPerPage;
+
+  auto WorkingMemory = (*Alloc)->getWorkingMemory(TrampolinePagePermissions);
+  auto TargetAddress = (*Alloc)->getTargetMemory(TrampolinePagePermissions);
+
+  TPCIU.getABISupport().writeTrampolines(WorkingMemory.data(), TargetAddress,
+                                         ResolverAddress, NumTrampolines);
+
+  auto TargetAddr = (*Alloc)->getTargetMemory(TrampolinePagePermissions);
+  for (unsigned I = 0; I < NumTrampolines; ++I)
+    this->AvailableTrampolines.push_back(TargetAddr + (I * TrampolineSize));
+
+  if (auto Err = (*Alloc)->finalize())
+    return Err;
+
+  TrampolineBlocks.push_back(std::move(*Alloc));
+
+  return Error::success();
+}
+
+Error TPCIndirectStubsManager::createStub(StringRef StubName,
+                                          JITTargetAddress StubAddr,
+                                          JITSymbolFlags StubFlags) {
+  StubInitsMap SIM;
+  SIM[StubName] = std::make_pair(StubAddr, StubFlags);
+  return createStubs(SIM);
+}
+
+Error TPCIndirectStubsManager::createStubs(const StubInitsMap &StubInits) {
+  auto AvailableStubInfos = getIndirectStubs(TPCIU, StubInits.size());
+  if (!AvailableStubInfos)
+    return AvailableStubInfos.takeError();
+
+  {
+    std::lock_guard<std::mutex> Lock(ISMMutex);
+    unsigned ASIdx = 0;
+    for (auto &SI : StubInits) {
+      auto &A = (*AvailableStubInfos)[ASIdx++];
+      StubInfos[SI.first()] = std::make_pair(A, SI.second.second);
+    }
+  }
+
+  auto &MemAccess = TPCIU.getTargetProcessControl().getMemoryAccess();
+  switch (TPCIU.getABISupport().getPointerSize()) {
+  case 4: {
+    unsigned ASIdx = 0;
+    std::vector<TargetProcessControl::MemoryAccess::UInt32Write> PtrUpdates;
+    for (auto &SI : StubInits)
+      PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress,
+                            static_cast<uint32_t>(SI.second.first)});
+    return MemAccess.writeUInt32s(PtrUpdates);
+  }
+  case 8: {
+    unsigned ASIdx = 0;
+    std::vector<TargetProcessControl::MemoryAccess::UInt64Write> PtrUpdates;
+    for (auto &SI : StubInits)
+      PtrUpdates.push_back({(*AvailableStubInfos)[ASIdx++].PointerAddress,
+                            static_cast<uint64_t>(SI.second.first)});
+    return MemAccess.writeUInt64s(PtrUpdates);
+  }
+  default:
+    return make_error<StringError>("Unsupported pointer size",
+                                   inconvertibleErrorCode());
+  }
+}
+
+JITEvaluatedSymbol TPCIndirectStubsManager::findStub(StringRef Name,
+                                                     bool ExportedStubsOnly) {
+  std::lock_guard<std::mutex> Lock(ISMMutex);
+  auto I = StubInfos.find(Name);
+  if (I == StubInfos.end())
+    return nullptr;
+  return {I->second.first.StubAddress, I->second.second};
+}
+
+JITEvaluatedSymbol TPCIndirectStubsManager::findPointer(StringRef Name) {
+  std::lock_guard<std::mutex> Lock(ISMMutex);
+  auto I = StubInfos.find(Name);
+  if (I == StubInfos.end())
+    return nullptr;
+  return {I->second.first.PointerAddress, I->second.second};
+}
+
+Error TPCIndirectStubsManager::updatePointer(StringRef Name,
+                                             JITTargetAddress NewAddr) {
+
+  JITTargetAddress PtrAddr = 0;
+  {
+    std::lock_guard<std::mutex> Lock(ISMMutex);
+    auto I = StubInfos.find(Name);
+    if (I == StubInfos.end())
+      return make_error<StringError>("Unknown stub name",
+                                     inconvertibleErrorCode());
+    PtrAddr = I->second.first.PointerAddress;
+  }
+
+  auto &MemAccess = TPCIU.getTargetProcessControl().getMemoryAccess();
+  switch (TPCIU.getABISupport().getPointerSize()) {
+  case 4: {
+    TargetProcessControl::MemoryAccess::UInt32Write PUpdate(PtrAddr, NewAddr);
+    return MemAccess.writeUInt32s(PUpdate);
+  }
+  case 8: {
+    TargetProcessControl::MemoryAccess::UInt64Write PUpdate(PtrAddr, NewAddr);
+    return MemAccess.writeUInt64s(PUpdate);
+  }
+  default:
+    return make_error<StringError>("Unsupported pointer size",
+                                   inconvertibleErrorCode());
+  }
+}
+
+} // end anonymous namespace.
+
+namespace llvm {
+namespace orc {
+
+TPCIndirectionUtils::ABISupport::~ABISupport() {}
+
+Expected<std::unique_ptr<TPCIndirectionUtils>>
+TPCIndirectionUtils::Create(TargetProcessControl &TPC) {
+  const auto &TT = TPC.getTargetTriple();
+  switch (TT.getArch()) {
+  default:
+    return make_error<StringError>(
+        std::string("No TPCIndirectionUtils available for ") + TT.str(),
+        inconvertibleErrorCode());
+  case Triple::aarch64:
+  case Triple::aarch64_32:
+    return CreateWithABI<OrcAArch64>(TPC);
+
+  case Triple::x86:
+    return CreateWithABI<OrcI386>(TPC);
+
+  case Triple::mips:
+    return CreateWithABI<OrcMips32Be>(TPC);
+
+  case Triple::mipsel:
+    return CreateWithABI<OrcMips32Le>(TPC);
+
+  case Triple::mips64:
+  case Triple::mips64el:
+    return CreateWithABI<OrcMips64>(TPC);
+
+  case Triple::x86_64:
+    if (TT.getOS() == Triple::OSType::Win32)
+      return CreateWithABI<OrcX86_64_Win32>(TPC);
+    else
+      return CreateWithABI<OrcX86_64_SysV>(TPC);
+  }
+}
+
+Error TPCIndirectionUtils::cleanup() {
+  Error Err = Error::success();
+
+  for (auto &A : IndirectStubAllocs)
+    Err = joinErrors(std::move(Err), A->deallocate());
+
+  if (TP)
+    Err = joinErrors(std::move(Err),
+                     static_cast<TPCTrampolinePool &>(*TP).deallocatePool());
+
+  if (ResolverBlock)
+    Err = joinErrors(std::move(Err), ResolverBlock->deallocate());
+
+  return Err;
+}
+
+Expected<JITTargetAddress>
+TPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
+                                        JITTargetAddress ReentryCtxAddr) {
+  assert(ABI && "ABI can not be null");
+  constexpr auto ResolverBlockPermissions =
+      static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+                                                sys::Memory::MF_EXEC);
+  auto ResolverSize = ABI->getResolverCodeSize();
+
+  auto Alloc = TPC.getMemMgr().allocate(
+      {{ResolverBlockPermissions, {TPC.getPageSize(), ResolverSize, 0}}});
+  if (!Alloc)
+    return Alloc.takeError();
+
+  auto WorkingMemory = (*Alloc)->getWorkingMemory(ResolverBlockPermissions);
+  auto TargetAddress = (*Alloc)->getTargetMemory(ResolverBlockPermissions);
+  ABI->writeResolverCode(WorkingMemory.data(), TargetAddress, ReentryFnAddr,
+                         ReentryCtxAddr);
+
+  if (auto Err = (*Alloc)->finalize())
+    return std::move(Err);
+
+  ResolverBlock = std::move(*Alloc);
+  ResolverBlockAddr = ResolverBlock->getTargetMemory(ResolverBlockPermissions);
+  return ResolverBlockAddr;
+}
+
+std::unique_ptr<IndirectStubsManager>
+TPCIndirectionUtils::createIndirectStubsManager() {
+  return std::make_unique<TPCIndirectStubsManager>(*this);
+}
+
+TrampolinePool &TPCIndirectionUtils::getTrampolinePool() {
+  if (!TP)
+    TP = std::make_unique<TPCTrampolinePool>(*this);
+  return *TP;
+}
+
+LazyCallThroughManager &TPCIndirectionUtils::createLazyCallThroughManager(
+    ExecutionSession &ES, JITTargetAddress ErrorHandlerAddr) {
+  assert(!LCTM &&
+         "createLazyCallThroughManager can not have been called before");
+  LCTM = std::make_unique<LazyCallThroughManager>(ES, ErrorHandlerAddr,
+                                                  &getTrampolinePool());
+  return *LCTM;
+}
+
+TPCIndirectionUtils::TPCIndirectionUtils(TargetProcessControl &TPC,
+                                         std::unique_ptr<ABISupport> ABI)
+    : TPC(TPC), ABI(std::move(ABI)) {
+  assert(this->ABI && "ABI can not be null");
+
+  assert(TPC.getPageSize() > getABISupport().getStubSize() &&
+         "Stubs larger than one page are not supported");
+}
+
+Expected<TPCIndirectionUtils::IndirectStubInfoVector>
+TPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
+
+  std::lock_guard<std::mutex> Lock(TPCUIMutex);
+
+  // If there aren't enough stubs available then allocate some more.
+  if (NumStubs > AvailableIndirectStubs.size()) {
+    auto NumStubsToAllocate = NumStubs;
+    auto PageSize = TPC.getPageSize();
+    auto StubBytes = alignTo(NumStubsToAllocate * ABI->getStubSize(), PageSize);
+    NumStubsToAllocate = StubBytes / ABI->getStubSize();
+    auto PointerBytes =
+        alignTo(NumStubsToAllocate * ABI->getPointerSize(), PageSize);
+
+    constexpr auto StubPagePermissions =
+        static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+                                                  sys::Memory::MF_EXEC);
+    constexpr auto PointerPagePermissions =
+        static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
+                                                  sys::Memory::MF_WRITE);
+
+    auto Alloc = TPC.getMemMgr().allocate(
+        {{StubPagePermissions, {PageSize, StubBytes, 0}},
+         {PointerPagePermissions, {PageSize, 0, PointerBytes}}});
+
+    if (!Alloc)
+      return Alloc.takeError();
+
+    auto StubTargetAddr = (*Alloc)->getTargetMemory(StubPagePermissions);
+    auto PointerTargetAddr = (*Alloc)->getTargetMemory(PointerPagePermissions);
+
+    ABI->writeIndirectStubsBlock(
+        (*Alloc)->getWorkingMemory(StubPagePermissions).data(), StubTargetAddr,
+        PointerTargetAddr, NumStubsToAllocate);
+
+    if (auto Err = (*Alloc)->finalize())
+      return std::move(Err);
+
+    for (unsigned I = 0; I != NumStubsToAllocate; ++I) {
+      AvailableIndirectStubs.push_back(
+          IndirectStubInfo(StubTargetAddr, PointerTargetAddr));
+      StubTargetAddr += ABI->getStubSize();
+      PointerTargetAddr += ABI->getPointerSize();
+    }
+
+    IndirectStubAllocs.push_back(std::move(*Alloc));
+  }
+
+  assert(NumStubs <= AvailableIndirectStubs.size() &&
+         "Sufficient stubs should have been allocated above");
+
+  IndirectStubInfoVector Result;
+  while (NumStubs--) {
+    Result.push_back(AvailableIndirectStubs.back());
+    AvailableIndirectStubs.pop_back();
+  }
+
+  return std::move(Result);
+}
+
+} // end namespace orc
+} // end namespace llvm
diff --git a/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp b/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp
new file mode 100644
index 0000000000000..833b597fe712a
--- /dev/null
+++ b/llvm/lib/ExecutionEngine/Orc/TargetProcessControl.cpp
@@ -0,0 +1,79 @@
+//===------ TargetProcessControl.cpp -- Target process control APIs -------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ExecutionEngine/Orc/TargetProcessControl.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/Process.h"
+
+#include <mutex>
+
+namespace llvm {
+namespace orc {
+
+TargetProcessControl::MemoryAccess::~MemoryAccess() {}
+
+TargetProcessControl::TargetProcessControl(Triple TT, unsigned PageSize)
+    : TT(std::move(TT)), PageSize(PageSize) {}
+
+TargetProcessControl::~TargetProcessControl() {}
+
+SelfTargetProcessControl::SelfTargetProcessControl(Triple TT, unsigned PageSize)
+    : TargetProcessControl(std::move(TT), PageSize) {
+  this->MemMgr = IPMM.get();
+  this->MemAccess = this;
+}
+
+Expected<std::unique_ptr<SelfTargetProcessControl>>
+SelfTargetProcessControl::Create() {
+  auto PageSize = sys::Process::getPageSize();
+  if (!PageSize)
+    return PageSize.takeError();
+
+  Triple TT(sys::getProcessTriple());
+
+  return std::make_unique<SelfTargetProcessControl>(std::move(TT), *PageSize);
+}
+
+void SelfTargetProcessControl::writeUInt8s(ArrayRef<UInt8Write> Ws,
+                                           WriteResultFn OnWriteComplete) {
+  for (auto &W : Ws)
+    *jitTargetAddressToPointer<uint8_t *>(W.Address) = W.Value;
+  OnWriteComplete(Error::success());
+}
+
+void SelfTargetProcessControl::writeUInt16s(ArrayRef<UInt16Write> Ws,
+                                            WriteResultFn OnWriteComplete) {
+  for (auto &W : Ws)
+    *jitTargetAddressToPointer<uint16_t *>(W.Address) = W.Value;
+  OnWriteComplete(Error::success());
+}
+
+void SelfTargetProcessControl::writeUInt32s(ArrayRef<UInt32Write> Ws,
+                                            WriteResultFn OnWriteComplete) {
+  for (auto &W : Ws)
+    *jitTargetAddressToPointer<uint32_t *>(W.Address) = W.Value;
+  OnWriteComplete(Error::success());
+}
+
+void SelfTargetProcessControl::writeUInt64s(ArrayRef<UInt64Write> Ws,
+                                            WriteResultFn OnWriteComplete) {
+  for (auto &W : Ws)
+    *jitTargetAddressToPointer<uint64_t *>(W.Address) = W.Value;
+  OnWriteComplete(Error::success());
+}
+
+void SelfTargetProcessControl::writeBuffers(ArrayRef<BufferWrite> Ws,
+                                            WriteResultFn OnWriteComplete) {
+  for (auto &W : Ws)
+    memcpy(jitTargetAddressToPointer<char *>(W.Address), W.Buffer.data(),
+           W.Buffer.size());
+  OnWriteComplete(Error::success());
+}
+
+} // end namespace orc
+} // end namespace llvm

From c489ae2a8605761d784171ff7152dfc75ab7c064 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson@google.com>
Date: Thu, 16 Jul 2020 14:42:00 -0700
Subject: [PATCH 562/771] [compiler-rt][asan][hwasan] Fix Android bots

Fix failure in Android bots from refactoring in
5d2be1a18845c528d3e86f7efcc59872e4a757c3 (https://crbug.com/1106482).

We need to make the UnmapFromTo available outside sanitizer_common for
calls from hwasan and asan linux handling. While here, remove
declaration of GetHighMemEnd which is no longer in sanitizer_common.
---
 compiler-rt/lib/sanitizer_common/sanitizer_common.h        | 7 ++++---
 .../lib/sanitizer_common/sanitizer_linux_libcdep.cpp       | 2 +-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
index 72e92e7ad70ea..bf6ca735fb0d8 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
@@ -121,9 +121,10 @@ bool MprotectReadOnly(uptr addr, uptr size);
 
 void MprotectMallocZones(void *addr, int prot);
 
-// Get the max address, taking into account alignment due to the mmap
-// granularity and shadow size.
-uptr GetHighMemEnd(uptr shadow_scale);
+#if SANITIZER_LINUX
+// Unmap memory. Currently only used on Linux.
+void UnmapFromTo(uptr from, uptr to);
+#endif
 
 // Maps shadow_size_bytes of shadow memory and returns shadow address. It will
 // be aligned to the mmap granularity * 2^shadow_scale, or to
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
index d4e747d74ff39..86918a51a2460 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_linux_libcdep.cpp
@@ -841,7 +841,7 @@ void ReExec() {
 }
 #endif  // !SANITIZER_OPENBSD
 
-static void UnmapFromTo(uptr from, uptr to) {
+void UnmapFromTo(uptr from, uptr to) {
   if (to == from)
     return;
   CHECK(to >= from);

From 15b37e1cfa5f09af376a47a1bc67d67bb5c7848b Mon Sep 17 00:00:00 2001
From: Ryan Prichard <rprichard@google.com>
Date: Thu, 16 Jul 2020 15:10:22 -0700
Subject: [PATCH 563/771] [builtins] Omit 80-bit builtins on Android and MSVC

long double is a 64-bit double-precision type on:
 - MSVC (32- and 64-bit x86)
 - Android (32-bit x86)

long double is a 128-bit quad-precision type on x86_64 Android.

The assembly variants of the 80-bit builtins are correct, but some of
the builtins are implemented in C and require that long double be the
80-bit type passed via an x87 register.

Reviewed By: compnerd

Differential Revision: https://reviews.llvm.org/D82153
---
 compiler-rt/lib/builtins/CMakeLists.txt | 48 +++++++++++++++++--------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index 3a66dd9c3fb3f..f93023416c470 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -161,6 +161,8 @@ set(GENERIC_SOURCES
   umodti3.c
 )
 
+# TODO: Several "tf" files (and divtc3.c, but not multc3.c) are in
+# GENERIC_SOURCES instead of here.
 set(GENERIC_TF_SOURCES
   addtf3.c
   comparetf2.c
@@ -234,9 +236,19 @@ if (NOT FUCHSIA)
   )
 endif()
 
-# These sources work on all x86 variants, but only x86 variants.
-set(x86_ARCH_SOURCES
-  cpu_model.c
+# These files are used on 32-bit and 64-bit x86.
+set(x86_ARCH_SOURCES cpu_model.c)
+
+if (NOT MSVC)
+  set(x86_ARCH_SOURCES
+    ${x86_ARCH_SOURCES}
+    i386/fp_mode.c
+  )
+endif ()
+
+# Implement extended-precision builtins, assuming long double is 80 bits.
+# long double is not 80 bits on Android or MSVC.
+set(x86_80_BIT_SOURCES
   divxc3.c
   fixxfdi.c
   fixxfti.c
@@ -251,13 +263,6 @@ set(x86_ARCH_SOURCES
   powixf2.c
 )
 
-if (NOT MSVC)
-  set(x86_ARCH_SOURCES
-    ${x86_ARCH_SOURCES}
-    i386/fp_mode.c
-  )
-endif ()
-
 if (NOT MSVC)
   set(x86_64_SOURCES
     ${GENERIC_SOURCES}
@@ -265,12 +270,19 @@ if (NOT MSVC)
     ${x86_ARCH_SOURCES}
     x86_64/floatdidf.c
     x86_64/floatdisf.c
-    x86_64/floatdixf.c
     x86_64/floatundidf.S
     x86_64/floatundisf.S
-    x86_64/floatundixf.S
   )
 
+  if (NOT ANDROID)
+    set(x86_64_SOURCES
+      ${x86_64_SOURCES}
+      ${x86_80_BIT_SOURCES}
+      x86_64/floatdixf.c
+      x86_64/floatundixf.S
+    )
+  endif()
+
   # Darwin x86_64 Haswell
   set(x86_64h_SOURCES ${x86_64_SOURCES})
 
@@ -290,10 +302,8 @@ if (NOT MSVC)
     i386/divdi3.S
     i386/floatdidf.S
     i386/floatdisf.S
-    i386/floatdixf.S
     i386/floatundidf.S
     i386/floatundisf.S
-    i386/floatundixf.S
     i386/lshrdi3.S
     i386/moddi3.S
     i386/muldi3.S
@@ -301,6 +311,15 @@ if (NOT MSVC)
     i386/umoddi3.S
   )
 
+  if (NOT ANDROID)
+    set(i386_SOURCES
+      ${i386_SOURCES}
+      ${x86_80_BIT_SOURCES}
+      i386/floatdixf.S
+      i386/floatundixf.S
+    )
+  endif()
+
   if (WIN32)
     set(i386_SOURCES
       ${i386_SOURCES}
@@ -317,7 +336,6 @@ else () # MSVC
     ${x86_ARCH_SOURCES}
     x86_64/floatdidf.c
     x86_64/floatdisf.c
-    x86_64/floatdixf.c
   )
   set(i386_SOURCES ${GENERIC_SOURCES} ${x86_ARCH_SOURCES})
 endif () # if (NOT MSVC)

From c2735635523d0aed8e882df24d20bbe318d54616 Mon Sep 17 00:00:00 2001
From: Albion Fung <albion.fung@ibm.com>
Date: Thu, 16 Jul 2020 12:03:11 -0500
Subject: [PATCH 564/771] [PowerPC][Power10] Add 128-bit Binary Integer
 Operation instruction definitions and MC Tests

This patch adds the instruction definitions and MC tests for the 128-bit Binary
Integer Operation instructions introduced in Power10.

Differential Revision: https://reviews.llvm.org/D83516
---
 llvm/lib/Target/PowerPC/PPCInstrPrefix.td     | 66 +++++++++++++++
 .../PowerPC/ppc64-encoding-ISA31.txt          | 82 +++++++++++++++++++
 llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s   | 81 ++++++++++++++++++
 3 files changed, 229 insertions(+)

diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 1759570469e82..742693083432c 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -256,6 +256,23 @@ class VXForm_VRT5_RAB5_ins<bits<11> xo, string opc, list<dag> pattern>
              !strconcat(opc, " $vD, $rA, $rB"), IIC_VecGeneral, pattern>,
              RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
 
+// VX-Form: [ PO BF // VRA VRB XO ]
+class VXForm_BF3_VAB5<bits<11> xo, dag OOL, dag IOL, string asmstr,
+                      InstrItinClass itin, list<dag> pattern>
+  : I<4, OOL, IOL, asmstr, itin> {
+  bits<3> BF;
+  bits<5> VA;
+  bits<5> VB;
+
+  let Pattern = pattern;
+
+  let Inst{6-8} = BF;
+  let Inst{9-10} = 0;
+  let Inst{11-15} = VA;
+  let Inst{16-20} = VB;
+  let Inst{21-31} = xo;
+}
+
 // VN-Form: [PO VRT VRA VRB PS SD XO]
 // SD is "Shift Direction"
 class VNForm_VTAB5_SD3<bits<6> xo, bits<2> ps, dag OOL, dag IOL, string asmstr,
@@ -974,6 +991,55 @@ let Predicates = [IsISA3_1] in {
     def STXVRWX : X_XS6_RA5_RB5<31, 205, "stxvrwx", vsrc, []>;
     def STXVRDX : X_XS6_RA5_RB5<31, 237, "stxvrdx", vsrc, []>;
   }
+
+  def VMULESD : VXForm_1<968, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                         "vmulesd $vD, $vA, $vB", IIC_VecGeneral, []>;
+  def VMULEUD : VXForm_1<712, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                         "vmuleud $vD, $vA, $vB", IIC_VecGeneral, []>;
+  def VMULOSD : VXForm_1<456, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                         "vmulosd $vD, $vA, $vB", IIC_VecGeneral, []>;
+  def VMULOUD : VXForm_1<200, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                         "vmuloud $vD, $vA, $vB", IIC_VecGeneral, []>;
+  def VMSUMCUD : VAForm_1a<23, (outs vrrc:$vD),
+                           (ins vrrc:$vA, vrrc:$vB, vrrc:$vC),
+                           "vmsumcud $vD, $vA, $vB, $vC", IIC_VecGeneral, []>;
+  def VDIVSQ : VXForm_1<267, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                        "vdivsq $vD, $vA, $vB", IIC_VecGeneral, []>;
+  def VDIVUQ : VXForm_1<11, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                        "vdivuq $vD, $vA, $vB", IIC_VecGeneral, []>;
+  def VDIVESQ : VXForm_1<779, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                         "vdivesq $vD, $vA, $vB", IIC_VecGeneral, []>;
+  def VDIVEUQ : VXForm_1<523, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                         "vdiveuq $vD, $vA, $vB", IIC_VecGeneral, []>;
+  def VCMPEQUQ : VCMP <455, "vcmpequq $vD, $vA, $vB" , v1i128>;
+  def VCMPGTSQ : VCMP <903, "vcmpgtsq $vD, $vA, $vB" , v1i128>;
+  def VCMPGTUQ : VCMP <647, "vcmpgtuq $vD, $vA, $vB" , v1i128>;
+  def VCMPEQUQ_rec : VCMPo <455, "vcmpequq. $vD, $vA, $vB" , v1i128>;
+  def VCMPGTSQ_rec : VCMPo <903, "vcmpgtsq. $vD, $vA, $vB" , v1i128>;
+  def VCMPGTUQ_rec : VCMPo <647, "vcmpgtuq. $vD, $vA, $vB" , v1i128>;
+  def VMODSQ : VXForm_1<1803, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                        "vmodsq $vD, $vA, $vB", IIC_VecGeneral, []>;
+  def VMODUQ : VXForm_1<1547, (outs vrrc:$vD), (ins vrrc:$vA, vrrc:$vB),
+                        "vmoduq $vD, $vA, $vB", IIC_VecGeneral, []>;
+  def VEXTSD2Q : VXForm_RD5_XO5_RS5<1538, 27, (outs vrrc:$vD), (ins vrrc:$vB),
+                                    "vextsd2q $vD, $vB", IIC_VecGeneral, []>;
+  def VCMPUQ : VXForm_BF3_VAB5<257, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
+                               "vcmpuq $BF, $vA, $vB", IIC_VecGeneral, []>;
+  def VCMPSQ : VXForm_BF3_VAB5<321, (outs crrc:$BF), (ins vrrc:$vA, vrrc:$vB),
+                               "vcmpsq $BF, $vA, $vB", IIC_VecGeneral, []>;
+  def VRLQNM : VX1_VT5_VA5_VB5<325, "vrlqnm", []>;
+  def VRLQMI : VXForm_1<69, (outs vrrc:$vD),
+                        (ins vrrc:$vA, vrrc:$vB, vrrc:$vDi),
+                        "vrlqmi $vD, $vA, $vB", IIC_VecFP, []>,
+                        RegConstraint<"$vDi = $vD">, NoEncode<"$vDi">;
+  def VSLQ : VX1_VT5_VA5_VB5<261, "vslq", []>;
+  def VSRAQ : VX1_VT5_VA5_VB5<773, "vsraq", []>;
+  def VSRQ : VX1_VT5_VA5_VB5<517, "vsrq", []>;
+  def VRLQ : VX1_VT5_VA5_VB5<5, "vrlq", []>;
+  def XSCVQPUQZ : X_VT5_XO5_VB5<63, 0, 836, "xscvqpuqz", []>;
+  def XSCVQPSQZ : X_VT5_XO5_VB5<63, 8, 836, "xscvqpsqz", []>;
+  def XSCVUQQP : X_VT5_XO5_VB5<63, 3, 836, "xscvuqqp", []>;
+  def XSCVSQQP : X_VT5_XO5_VB5<63, 11, 836, "xscvsqqp", []>;
 }
 
 //---------------------------- Anonymous Patterns ----------------------------//
diff --git a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
index f8d310fa7e147..15ea5e9f1138e 100644
--- a/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
+++ b/llvm/test/MC/Disassembler/PowerPC/ppc64-encoding-ISA31.txt
@@ -302,3 +302,85 @@
 
 # CHECK: stxvrdx 35, 3, 1
 0x7c 0x63 0x09 0xdb
+
+# CHECK: vmulesd 1, 2, 3
+0x10 0x22 0x1b 0xc8
+
+# CHECK: vmulosd 1, 2, 3
+0x10 0x22 0x19 0xc8
+
+# CHECK: vmuleud 1, 2, 3
+0x10 0x22 0x1a 0xc8
+
+# CHECK: vmuloud 1, 2, 3
+0x10 0x22 0x18 0xc8
+
+# CHECK: vmsumcud 1, 2, 3, 4
+0x10 0x22 0x19 0x17
+
+# CHECK: vdivsq 3, 4, 5
+0x10 0x64 0x29 0x0b
+
+# CHECK: vdivuq 3, 4, 5
+0x10 0x64 0x28 0x0b
+
+# CHECK: vdivesq 3, 4, 5
+0x10 0x64 0x2b 0x0b
+
+# CHECK: vdiveuq 3, 4, 5
+0x10 0x64 0x2a 0x0b
+
+# CHECK: vcmpequq 4, 5, 6
+0x10 0x85 0x31 0xc7
+
+# CHECK: vcmpequq. 4, 5, 6
+0x10 0x85 0x35 0xc7
+
+# CHECK: vcmpgtsq 4, 5, 6
+0x10 0x85 0x33 0x87
+
+# CHECK: vcmpgtsq. 4, 5, 6
+0x10 0x85 0x37 0x87
+
+# CHECK: vcmpgtuq 4, 5, 6
+0x10 0x85 0x32 0x87
+
+# CHECK: vcmpgtuq. 4, 5, 6
+0x10 0x85 0x36 0x87
+
+# CHECK: vmoduq 3, 4, 5
+0x10 0x64 0x2e 0x0b
+
+# CHECK: vextsd2q 20, 25
+0x12 0x9b 0xce 0x02
+
+# CHECK: vrlq 4, 5, 6
+0x10 0x85 0x30 0x05
+
+# CHECK: vrlqnm 4, 5, 6
+0x10 0x85 0x31 0x45
+
+# CHECK: vrlqmi 4, 5, 6
+0x10 0x85 0x30 0x45
+
+# CHECK: vslq 4, 5, 6
+0x10 0x85 0x31 0x05
+
+# CHECK: vsrq 4, 5, 6
+0x10 0x85 0x32 0x05
+
+# CHECK: vsraq 4, 5, 6
+0x10 0x85 0x33 0x05
+
+# CHECK: xscvqpuqz 8, 28
+0xfd 0x00 0xe6 0x88
+
+# CHECK: xscvqpsqz 8, 28
+0xfd 0x08 0xe6 0x88
+
+# CHECK: xscvuqqp 8, 28
+0xfd 0x03 0xe6 0x88
+
+# CHECK: xscvsqqp 8, 28
+0xfd 0xb 0xe6 0x88
+
diff --git a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
index 5ed6b14d38aea..b144493eb2a9e 100644
--- a/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
+++ b/llvm/test/MC/PowerPC/ppc64-encoding-ISA31.s
@@ -429,3 +429,84 @@
 # CHECK-BE: stxvrdx 35, 3, 1                      # encoding: [0x7c,0x63,0x09,0xdb]
 # CHECK-LE: stxvrdx 35, 3, 1                      # encoding: [0xdb,0x09,0x63,0x7c]
             stxvrdx 35, 3, 1
+# CHECK-BE: vmulesd 1, 2, 3                       # encoding: [0x10,0x22,0x1b,0xc8]
+# CHECK-LE: vmulesd 1, 2, 3                       # encoding: [0xc8,0x1b,0x22,0x10]
+            vmulesd 1, 2, 3
+# CHECK-BE: vmulosd 1, 2, 3                       # encoding: [0x10,0x22,0x19,0xc8]
+# CHECK-LE: vmulosd 1, 2, 3                       # encoding: [0xc8,0x19,0x22,0x10]
+            vmulosd 1, 2, 3
+# CHECK-BE: vmuleud 1, 2, 3                       # encoding: [0x10,0x22,0x1a,0xc8]
+# CHECK-LE: vmuleud 1, 2, 3                       # encoding: [0xc8,0x1a,0x22,0x10]
+            vmuleud 1, 2, 3
+# CHECK-BE: vmuloud 1, 2, 3                       # encoding: [0x10,0x22,0x18,0xc8]
+# CHECK-LE: vmuloud 1, 2, 3                       # encoding: [0xc8,0x18,0x22,0x10]
+            vmuloud 1, 2, 3
+# CHECK-BE: vmsumcud 1, 2, 3, 4                   # encoding: [0x10,0x22,0x19,0x17]
+# CHECK-LE: vmsumcud 1, 2, 3, 4                   # encoding: [0x17,0x19,0x22,0x10]
+            vmsumcud 1, 2, 3, 4
+# CHECK-BE: vdivsq 3, 4, 5                        # encoding: [0x10,0x64,0x29,0x0b]
+# CHECK-LE: vdivsq 3, 4, 5                        # encoding: [0x0b,0x29,0x64,0x10]
+            vdivsq 3, 4, 5
+# CHECK-BE: vdivuq 3, 4, 5                        # encoding: [0x10,0x64,0x28,0x0b]
+# CHECK-LE: vdivuq 3, 4, 5                        # encoding: [0x0b,0x28,0x64,0x10]
+            vdivuq 3, 4, 5
+# CHECK-BE: vdivesq 3, 4, 5                       # encoding: [0x10,0x64,0x2b,0x0b]
+# CHECK-LE: vdivesq 3, 4, 5                       # encoding: [0x0b,0x2b,0x64,0x10]
+            vdivesq 3, 4, 5
+# CHECK-BE: vdiveuq 3, 4, 5                       # encoding: [0x10,0x64,0x2a,0x0b]
+# CHECK-LE: vdiveuq 3, 4, 5                       # encoding: [0x0b,0x2a,0x64,0x10]
+            vdiveuq 3, 4, 5
+# CHECK-BE: vcmpequq 4, 5, 6                      # encoding: [0x10,0x85,0x31,0xc7]
+# CHECK-LE: vcmpequq 4, 5, 6                      # encoding: [0xc7,0x31,0x85,0x10]
+            vcmpequq 4, 5, 6
+# CHECK-BE: vcmpequq. 4, 5, 6                     # encoding: [0x10,0x85,0x35,0xc7]
+# CHECK-LE: vcmpequq. 4, 5, 6                     # encoding: [0xc7,0x35,0x85,0x10]
+            vcmpequq. 4, 5, 6
+# CHECK-BE: vcmpgtsq 4, 5, 6                      # encoding: [0x10,0x85,0x33,0x87]
+# CHECK-LE: vcmpgtsq 4, 5, 6                      # encoding: [0x87,0x33,0x85,0x10]
+            vcmpgtsq 4, 5, 6
+# CHECK-BE: vcmpgtsq. 4, 5, 6                     # encoding: [0x10,0x85,0x37,0x87]
+# CHECK-LE: vcmpgtsq. 4, 5, 6                     # encoding: [0x87,0x37,0x85,0x10]
+            vcmpgtsq. 4, 5, 6
+# CHECK-BE: vcmpgtuq 4, 5, 6                      # encoding: [0x10,0x85,0x32,0x87]
+# CHECK-LE: vcmpgtuq 4, 5, 6                      # encoding: [0x87,0x32,0x85,0x10]
+            vcmpgtuq 4, 5, 6
+# CHECK-BE: vcmpgtuq. 4, 5, 6                     # encoding: [0x10,0x85,0x36,0x87]
+# CHECK-LE: vcmpgtuq. 4, 5, 6                     # encoding: [0x87,0x36,0x85,0x10]
+            vcmpgtuq. 4, 5, 6
+# CHECK-BE: vmoduq 3, 4, 5                        # encoding: [0x10,0x64,0x2e,0x0b]
+# CHECK-LE: vmoduq 3, 4, 5                        # encoding: [0x0b,0x2e,0x64,0x10]
+            vmoduq 3, 4, 5
+# CHECK-BE: vextsd2q 20, 25                       # encoding: [0x12,0x9b,0xce,0x02]
+# CHECK-LE: vextsd2q 20, 25                       # encoding: [0x02,0xce,0x9b,0x12]
+            vextsd2q 20, 25
+# CHECK-BE: vrlq 4, 5, 6                          # encoding: [0x10,0x85,0x30,0x05]
+# CHECK-LE: vrlq 4, 5, 6                          # encoding: [0x05,0x30,0x85,0x10]
+            vrlq 4, 5, 6
+# CHECK-BE: vrlqnm 4, 5, 6                        # encoding: [0x10,0x85,0x31,0x45]
+# CHECK-LE: vrlqnm 4, 5, 6                        # encoding: [0x45,0x31,0x85,0x10]
+            vrlqnm 4, 5, 6
+# CHECK-BE: vrlqmi 4, 5, 6                        # encoding: [0x10,0x85,0x30,0x45]
+# CHECK-LE: vrlqmi 4, 5, 6                        # encoding: [0x45,0x30,0x85,0x10]
+            vrlqmi 4, 5, 6
+# CHECK-BE: vslq 4, 5, 6                          # encoding: [0x10,0x85,0x31,0x05]
+# CHECK-LE: vslq 4, 5, 6                          # encoding: [0x05,0x31,0x85,0x10]
+            vslq 4, 5, 6
+# CHECK-BE: vsrq 4, 5, 6                          # encoding: [0x10,0x85,0x32,0x05]
+# CHECK-LE: vsrq 4, 5, 6                          # encoding: [0x05,0x32,0x85,0x10]
+            vsrq 4, 5, 6
+# CHECK-BE: vsraq 4, 5, 6                         # encoding: [0x10,0x85,0x33,0x05]
+# CHECK-LE: vsraq 4, 5, 6                         # encoding: [0x05,0x33,0x85,0x10]
+            vsraq 4, 5, 6
+# CHECK-BE: xscvqpuqz 8, 28                       # encoding: [0xfd,0x00,0xe6,0x88]
+# CHECK-LE: xscvqpuqz 8, 28                       # encoding: [0x88,0xe6,0x00,0xfd]
+            xscvqpuqz 8, 28
+# CHECK-BE: xscvqpsqz 8, 28                       # encoding: [0xfd,0x08,0xe6,0x88]
+# CHECK-LE: xscvqpsqz 8, 28                       # encoding: [0x88,0xe6,0x08,0xfd]
+            xscvqpsqz 8, 28
+# CHECK-BE: xscvuqqp 8, 28                        # encoding: [0xfd,0x03,0xe6,0x88]
+# CHECK-LE: xscvuqqp 8, 28                        # encoding: [0x88,0xe6,0x03,0xfd]
+            xscvuqqp 8, 28
+# CHECK-BE: xscvsqqp 8, 28                        # encoding: [0xfd,0x0b,0xe6,0x88]
+# CHECK-LE: xscvsqqp 8, 28                        # encoding: [0x88,0xe6,0x0b,0xfd]
+            xscvsqqp 8, 28

From 669b07093695592cc6b9b1bc1c5a65d32d7a8ef7 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Thu, 16 Jul 2020 18:29:48 -0400
Subject: [PATCH 565/771] cmake list formatting fix

---
 compiler-rt/lib/builtins/CMakeLists.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/builtins/CMakeLists.txt b/compiler-rt/lib/builtins/CMakeLists.txt
index f93023416c470..058bfc815a1a6 100644
--- a/compiler-rt/lib/builtins/CMakeLists.txt
+++ b/compiler-rt/lib/builtins/CMakeLists.txt
@@ -237,7 +237,9 @@ if (NOT FUCHSIA)
 endif()
 
 # These files are used on 32-bit and 64-bit x86.
-set(x86_ARCH_SOURCES cpu_model.c)
+set(x86_ARCH_SOURCES
+  cpu_model.c
+  )
 
 if (NOT MSVC)
   set(x86_ARCH_SOURCES

From 121302ac6286c79aed5625b9e17ae907ed25f71d Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 16 Jul 2020 15:26:56 -0700
Subject: [PATCH 566/771] [ORC] Add explicit cast to fix a narrowing conversion
 error.

---
 llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
index 51e6928c39e0b..e7e1ff95e33ae 100644
--- a/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
@@ -378,7 +378,7 @@ TPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
                                                   sys::Memory::MF_WRITE);
 
     auto Alloc = TPC.getMemMgr().allocate(
-        {{StubPagePermissions, {PageSize, StubBytes, 0}},
+        {{StubPagePermissions, {PageSize, static_cast<size_t>(StubBytes), 0}},
          {PointerPagePermissions, {PageSize, 0, PointerBytes}}});
 
     if (!Alloc)

From b0bc77380d8ca6c4b5037d0437b5d3a15b0c3f6f Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 16 Jul 2020 15:35:51 -0700
Subject: [PATCH 567/771] [ORC] Add more explicit casts to fix a narrowing
 conversion errors.

---
 llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
index e7e1ff95e33ae..1689776c68e13 100644
--- a/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
@@ -127,7 +127,8 @@ Error TPCTrampolinePool::grow() {
                                                 sys::Memory::MF_EXEC);
   auto PageSize = TPC.getPageSize();
   auto Alloc = TPC.getMemMgr().allocate(
-      {{TrampolinePagePermissions, {PageSize, PageSize, 0}}});
+      {{TrampolinePagePermissions,
+        {PageSize, static_cast<size_t>(PageSize), 0}}});
 
   if (!Alloc)
     return Alloc.takeError();
@@ -310,7 +311,8 @@ TPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
   auto ResolverSize = ABI->getResolverCodeSize();
 
   auto Alloc = TPC.getMemMgr().allocate(
-      {{ResolverBlockPermissions, {TPC.getPageSize(), ResolverSize, 0}}});
+      {{ResolverBlockPermissions,
+        {TPC.getPageSize(), static_cast<size_t>(ResolverSize), 0}}});
   if (!Alloc)
     return Alloc.takeError();
 

From 2f99059aa09247c4c904f55be52fc5e84a4ece60 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 16 Jul 2020 15:49:22 -0700
Subject: [PATCH 568/771] [lldb/Test] Skip TestMacABImacOSFramework.py with
 reproducers

This test is hitting https://bugs.python.org/issue22393 which results in
the lit multiprocessing pool deadlocking and the reproducer job timing
out on GreenDragon.
---
 lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py b/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py
index 5346b9904ce50..aff99e3e2804b 100644
--- a/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py
+++ b/lldb/test/API/macosx/macabi/TestMacABImacOSFramework.py
@@ -15,6 +15,7 @@ class TestMacABImacOSFramework(TestBase):
     @skipIfDarwinEmbedded
     # There is a Clang driver change missing on llvm.org.
     @expectedFailureAll(bugnumber="rdar://problem/54986190>")
+    @skipIfReproducer # This is hitting https://bugs.python.org/issue22393
     def test_macabi(self):
         """Test the x86_64-apple-ios-macabi target linked against a macos dylib"""
         self.build()

From 12d1124c49beec0fb79d36944960e5bf0f236d4c Mon Sep 17 00:00:00 2001
From: Dokyung Song <dokyungs@google.com>
Date: Thu, 16 Jul 2020 21:24:06 +0000
Subject: [PATCH 569/771] [libFuzzer] Disable implicit builtin knowledge about
 memcmp-like functions when -fsanitize=fuzzer-no-link is given.

Summary: This patch disables implicit builtin knowledge about memcmp-like functions when compiling the program for fuzzing, i.e., when -fsanitize=fuzzer(-no-link) is given. This allows libFuzzer to always intercept memcmp-like functions as it effectively disables optimizing calls to such functions into different forms. This is done by adding a set of flags (-fno-builtin-memcmp and others) in the clang driver. Individual -fno-builtin-* flags previously used in several libFuzzer tests are now removed, as it is now done automatically in the clang driver.

Reviewers: morehouse, hctim

Subscribers: cfe-commits, #sanitizers

Tags: #clang, #sanitizers

Differential Revision: https://reviews.llvm.org/D83987
---
 clang/lib/Driver/SanitizerArgs.cpp    | 16 ++++++++++++++++
 compiler-rt/test/fuzzer/memcmp.test   |  2 +-
 compiler-rt/test/fuzzer/memcmp64.test |  2 +-
 compiler-rt/test/fuzzer/strcmp.test   |  2 +-
 compiler-rt/test/fuzzer/strncmp.test  |  2 +-
 compiler-rt/test/fuzzer/strstr.test   |  2 +-
 6 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index e4fda752c041d..4af24662ca918 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -1088,6 +1088,22 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
       Sanitizers.has(SanitizerKind::Address))
     CmdArgs.push_back("-fno-assume-sane-operator-new");
 
+  // libFuzzer wants to intercept calls to certain library functions, so the
+  // following -fno-builtin-* flags force the compiler to emit interposable
+  // libcalls to these functions. Other sanitizers effectively do the same thing
+  // by marking all library call sites with NoBuiltin attribute in their LLVM
+  // pass. (see llvm::maybeMarkSanitizerLibraryCallNoBuiltin)
+  if (Sanitizers.has(SanitizerKind::FuzzerNoLink)) {
+    CmdArgs.push_back("-fno-builtin-memcmp");
+    CmdArgs.push_back("-fno-builtin-strncmp");
+    CmdArgs.push_back("-fno-builtin-strcmp");
+    CmdArgs.push_back("-fno-builtin-strncasecmp");
+    CmdArgs.push_back("-fno-builtin-strcasecmp");
+    CmdArgs.push_back("-fno-builtin-strstr");
+    CmdArgs.push_back("-fno-builtin-strcasestr");
+    CmdArgs.push_back("-fno-builtin-memmem");
+  }
+
   // Require -fvisibility= flag on non-Windows when compiling if vptr CFI is
   // enabled.
   if (Sanitizers.hasOneOf(CFIClasses) && !TC.getTriple().isOSWindows() &&
diff --git a/compiler-rt/test/fuzzer/memcmp.test b/compiler-rt/test/fuzzer/memcmp.test
index 8859afbe8a97d..fa995a22c68a4 100644
--- a/compiler-rt/test/fuzzer/memcmp.test
+++ b/compiler-rt/test/fuzzer/memcmp.test
@@ -2,7 +2,7 @@ UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/MemcmpTest.cpp -o %t-MemcmpTest
 RUN: not %run %t-MemcmpTest               -seed=1 -runs=10000000   2>&1 | FileCheck %s
 
-RUN: %cpp_compiler -fno-sanitize=address -fno-builtin-memcmp %S/MemcmpTest.cpp -o %t-NoAsanMemcmpTest
+RUN: %cpp_compiler -fno-sanitize=address %S/MemcmpTest.cpp -o %t-NoAsanMemcmpTest
 RUN: not %run %t-MemcmpTest               -seed=1 -runs=10000000   2>&1 | FileCheck %s
 
 CHECK: BINGO
diff --git a/compiler-rt/test/fuzzer/memcmp64.test b/compiler-rt/test/fuzzer/memcmp64.test
index fc9d023243731..ca8c8fe8206f1 100644
--- a/compiler-rt/test/fuzzer/memcmp64.test
+++ b/compiler-rt/test/fuzzer/memcmp64.test
@@ -2,7 +2,7 @@ UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/Memcmp64BytesTest.cpp -o %t-Memcmp64BytesTest
 RUN: not %run %t-Memcmp64BytesTest        -seed=1 -runs=1000000   2>&1 | FileCheck %s
 
-RUN: %cpp_compiler -fno-sanitize=address -fno-builtin-memcmp %S/Memcmp64BytesTest.cpp -o %t-NoAsanMemcmp64BytesTest
+RUN: %cpp_compiler -fno-sanitize=address %S/Memcmp64BytesTest.cpp -o %t-NoAsanMemcmp64BytesTest
 RUN: not %run %t-Memcmp64BytesTest        -seed=1 -runs=1000000   2>&1 | FileCheck %s
 
 CHECK: BINGO
diff --git a/compiler-rt/test/fuzzer/strcmp.test b/compiler-rt/test/fuzzer/strcmp.test
index eebcf8ef5c708..61065de6fa94b 100644
--- a/compiler-rt/test/fuzzer/strcmp.test
+++ b/compiler-rt/test/fuzzer/strcmp.test
@@ -2,7 +2,7 @@ UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/StrcmpTest.cpp -o %t-StrcmpTest
 RUN: not %run %t-StrcmpTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
 
-RUN: %cpp_compiler -fno-sanitize=address -fno-builtin-strcmp %S/StrcmpTest.cpp -o %t-NoAsanStrcmpTest
+RUN: %cpp_compiler -fno-sanitize=address %S/StrcmpTest.cpp -o %t-NoAsanStrcmpTest
 RUN: not %run %t-StrcmpTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
 
 CHECK: BINGO
diff --git a/compiler-rt/test/fuzzer/strncmp.test b/compiler-rt/test/fuzzer/strncmp.test
index f8ff9299a1d97..102451058d440 100644
--- a/compiler-rt/test/fuzzer/strncmp.test
+++ b/compiler-rt/test/fuzzer/strncmp.test
@@ -2,7 +2,7 @@ UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/StrncmpTest.cpp -o %t-StrncmpTest
 RUN: not %run %t-StrncmpTest              -seed=2 -runs=10000000   2>&1 | FileCheck %s
 
-RUN: %cpp_compiler -fno-sanitize=address -fno-builtin-strncmp %S/StrncmpTest.cpp -o %t-NoAsanStrncmpTest
+RUN: %cpp_compiler -fno-sanitize=address %S/StrncmpTest.cpp -o %t-NoAsanStrncmpTest
 RUN: not %run %t-StrncmpTest              -seed=2 -runs=10000000   2>&1 | FileCheck %s
 
 CHECK: BINGO
diff --git a/compiler-rt/test/fuzzer/strstr.test b/compiler-rt/test/fuzzer/strstr.test
index 54a5abe8a414f..5c10805e18c65 100644
--- a/compiler-rt/test/fuzzer/strstr.test
+++ b/compiler-rt/test/fuzzer/strstr.test
@@ -2,7 +2,7 @@ UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/StrstrTest.cpp -o %t-StrstrTest
 RUN: not %run %t-StrstrTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
 
-RUN: %cpp_compiler -fno-sanitize=address -fno-builtin-strstr %S/StrstrTest.cpp -o %t-NoAsanStrstrTest
+RUN: %cpp_compiler -fno-sanitize=address %S/StrstrTest.cpp -o %t-NoAsanStrstrTest
 RUN: not %run %t-StrstrTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
 
 CHECK: BINGO

From 10056238ac07a95b8cc5766bb4173b4a01203bc6 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Thu, 16 Jul 2020 15:55:29 -0700
Subject: [PATCH 570/771] [ORC] Switch from initializer lists to named
 arguments to work around MSVC.

MSVC doesn't like some of the initializer list uses in 0e940d55f8a.
Switch to named arguments to work around this.
---
 .../Orc/TPCIndirectionUtils.cpp               | 23 +++++++++++--------
 1 file changed, 13 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp b/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
index 1689776c68e13..150040cd11e5d 100644
--- a/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/TPCIndirectionUtils.cpp
@@ -126,9 +126,10 @@ Error TPCTrampolinePool::grow() {
       static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
                                                 sys::Memory::MF_EXEC);
   auto PageSize = TPC.getPageSize();
-  auto Alloc = TPC.getMemMgr().allocate(
-      {{TrampolinePagePermissions,
-        {PageSize, static_cast<size_t>(PageSize), 0}}});
+  jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
+  Request[TrampolinePagePermissions] = {PageSize, static_cast<size_t>(PageSize),
+                                        0};
+  auto Alloc = TPC.getMemMgr().allocate(Request);
 
   if (!Alloc)
     return Alloc.takeError();
@@ -310,9 +311,10 @@ TPCIndirectionUtils::writeResolverBlock(JITTargetAddress ReentryFnAddr,
                                                 sys::Memory::MF_EXEC);
   auto ResolverSize = ABI->getResolverCodeSize();
 
-  auto Alloc = TPC.getMemMgr().allocate(
-      {{ResolverBlockPermissions,
-        {TPC.getPageSize(), static_cast<size_t>(ResolverSize), 0}}});
+  jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
+  Request[ResolverBlockPermissions] = {TPC.getPageSize(),
+                                       static_cast<size_t>(ResolverSize), 0};
+  auto Alloc = TPC.getMemMgr().allocate(Request);
   if (!Alloc)
     return Alloc.takeError();
 
@@ -379,10 +381,11 @@ TPCIndirectionUtils::getIndirectStubs(unsigned NumStubs) {
         static_cast<sys::Memory::ProtectionFlags>(sys::Memory::MF_READ |
                                                   sys::Memory::MF_WRITE);
 
-    auto Alloc = TPC.getMemMgr().allocate(
-        {{StubPagePermissions, {PageSize, static_cast<size_t>(StubBytes), 0}},
-         {PointerPagePermissions, {PageSize, 0, PointerBytes}}});
-
+    jitlink::JITLinkMemoryManager::SegmentsRequestMap Request;
+    Request[StubPagePermissions] = {PageSize, static_cast<size_t>(StubBytes),
+                                    0};
+    Request[PointerPagePermissions] = {PageSize, 0, PointerBytes};
+    auto Alloc = TPC.getMemMgr().allocate(Request);
     if (!Alloc)
       return Alloc.takeError();
 

From 2cf3458c3b2937f34ac91cbd95291ae3aef28d82 Mon Sep 17 00:00:00 2001
From: Jon Roelofs <jonathan_roelofs@apple.com>
Date: Thu, 16 Jul 2020 17:01:06 -0600
Subject: [PATCH 571/771] [tsan][go] Fix for missing symbols needed by
 GotsanRuntimeCheck

---
 compiler-rt/lib/tsan/go/buildgo.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler-rt/lib/tsan/go/buildgo.sh b/compiler-rt/lib/tsan/go/buildgo.sh
index 2238caf53b34a..e9b4b4c9f9c07 100755
--- a/compiler-rt/lib/tsan/go/buildgo.sh
+++ b/compiler-rt/lib/tsan/go/buildgo.sh
@@ -138,6 +138,7 @@ elif [ "`uname -a | grep Darwin`" != "" ]; then
 		$SRCS
 		../rtl/tsan_platform_mac.cpp
 		../../sanitizer_common/sanitizer_mac.cpp
+		../../sanitizer_common/sanitizer_mac_libcdep.cpp
 		../../sanitizer_common/sanitizer_posix.cpp
 		../../sanitizer_common/sanitizer_posix_libcdep.cpp
 		../../sanitizer_common/sanitizer_procmaps_mac.cpp

From 0f6220ddd6c751ff83fa1afe464ff0737ab0bf07 Mon Sep 17 00:00:00 2001
From: Min-Yih Hsu <minyihh@uci.edu>
Date: Fri, 10 Jul 2020 09:40:09 -0700
Subject: [PATCH 572/771] [profile] Move __llvm_profile_raw_version into a
 separate file

Similar to the reason behind moving __llvm_profile_filename into a
separate file[1]. When users try to use Full LTO with BFD linker to
generate IR level PGO profile, the __llvm_profile_raw_version variable,
which is used for marking instrumentation level, generated by frontend
would somehow conflict with the weak symbol provided by profiling
runtime.

In most of the cases, BFD linkers will pick profiling runtime's weak symbol
as the real definition and thus generate the incorrect instrumentation
level metadata in the final executables.

Moving __llvm_profile_raw_version into a separate file would make
linkers not seeing the weak symbol in the archive unless the frontend
doesn't generate one.

[1] https://reviews.llvm.org/D34797

Differential Revision: https://reviews.llvm.org/D83967
---
 compiler-rt/lib/profile/CMakeLists.txt          |  1 +
 compiler-rt/lib/profile/InstrProfiling.c        |  2 --
 .../lib/profile/InstrProfilingVersionVar.c      | 17 +++++++++++++++++
 compiler-rt/test/profile/instrprof-lto-pgogen.c | 12 ++++++++++++
 4 files changed, 30 insertions(+), 2 deletions(-)
 create mode 100644 compiler-rt/lib/profile/InstrProfilingVersionVar.c
 create mode 100644 compiler-rt/test/profile/instrprof-lto-pgogen.c

diff --git a/compiler-rt/lib/profile/CMakeLists.txt b/compiler-rt/lib/profile/CMakeLists.txt
index ece674b2daa14..63532b72ff82e 100644
--- a/compiler-rt/lib/profile/CMakeLists.txt
+++ b/compiler-rt/lib/profile/CMakeLists.txt
@@ -59,6 +59,7 @@ set(PROFILE_SOURCES
   InstrProfilingMerge.c
   InstrProfilingMergeFile.c
   InstrProfilingNameVar.c
+  InstrProfilingVersionVar.c
   InstrProfilingWriter.c
   InstrProfilingPlatformDarwin.c
   InstrProfilingPlatformFuchsia.c
diff --git a/compiler-rt/lib/profile/InstrProfiling.c b/compiler-rt/lib/profile/InstrProfiling.c
index 31a9fe9962931..92ad25f62cd1b 100644
--- a/compiler-rt/lib/profile/InstrProfiling.c
+++ b/compiler-rt/lib/profile/InstrProfiling.c
@@ -18,8 +18,6 @@
 #include "profile/InstrProfData.inc"
 
 
-COMPILER_RT_WEAK uint64_t INSTR_PROF_RAW_VERSION_VAR = INSTR_PROF_RAW_VERSION;
-
 COMPILER_RT_VISIBILITY uint64_t __llvm_profile_get_magic(void) {
   return sizeof(void *) == sizeof(uint64_t) ? (INSTR_PROF_RAW_MAGIC_64)
                                             : (INSTR_PROF_RAW_MAGIC_32);
diff --git a/compiler-rt/lib/profile/InstrProfilingVersionVar.c b/compiler-rt/lib/profile/InstrProfilingVersionVar.c
new file mode 100644
index 0000000000000..a6f222150794d
--- /dev/null
+++ b/compiler-rt/lib/profile/InstrProfilingVersionVar.c
@@ -0,0 +1,17 @@
+/*===- InstrProfilingVersionVar.c - profile version variable setup  -------===*\
+|*
+|* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+|* See https://llvm.org/LICENSE.txt for license information.
+|* SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+|*
+\*===----------------------------------------------------------------------===*/
+
+#include "InstrProfiling.h"
+
+/* uint64 __llvm_profile_raw_version
+ *
+ * The runtime should only provide its own definition of this symbol when the
+ * user has not specified one. Set this up by moving the runtime's copy of this
+ * symbol to an object file within the archive.
+ */
+COMPILER_RT_WEAK uint64_t INSTR_PROF_RAW_VERSION_VAR = INSTR_PROF_RAW_VERSION;
diff --git a/compiler-rt/test/profile/instrprof-lto-pgogen.c b/compiler-rt/test/profile/instrprof-lto-pgogen.c
new file mode 100644
index 0000000000000..0abae0d7b4c23
--- /dev/null
+++ b/compiler-rt/test/profile/instrprof-lto-pgogen.c
@@ -0,0 +1,12 @@
+// REQUIRES: lto
+
+// RUN: %clang_pgogen=%t.profraw -flto %s -o %t
+// RUN: %run %t
+// RUN: llvm-profdata merge %t.profraw -o %t.profdata
+// RUN: llvm-profdata show %t.profdata | FileCheck %s
+
+// Testing a bug that happens when trying to generate IR
+// profile with BFD linker + LTO plugin
+
+// CHECK: Instrumentation level: IR
+int main() { return 0; }

From 0beac5b815b3b7fb0dde6cc43bd54abf1c17719f Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Thu, 16 Jul 2020 19:07:28 -0400
Subject: [PATCH 573/771] [gn build] (manually) merge 9870f77441c

---
 llvm/utils/gn/secondary/llvm/test/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
index d433385dccad6..bf61bf41a3ec0 100644
--- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
@@ -84,6 +84,7 @@ write_lit_config("lit_site_cfg") {
     "OCAMLFLAGS=",
     "LLVM_BUILD_EXAMPLES=0",
     "LLVM_HAVE_TF_AOT=0",
+    "LLVM_HAVE_TF_API=0",
   ]
 
   if (host_cpu == "x64") {

From c1cc3a86527e6fb38912baee2e43cd4cca843b08 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 16 Jul 2020 23:07:45 +0000
Subject: [PATCH 574/771] [gn build] Port 0e940d55f8a

---
 llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn
index 3ed59ffa1d1a2..b83fe92d74166 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/Orc/BUILD.gn
@@ -37,6 +37,8 @@ static_library("Orc") {
     "RTDyldObjectLinkingLayer.cpp",
     "SpeculateAnalyses.cpp",
     "Speculation.cpp",
+    "TPCIndirectionUtils.cpp",
+    "TargetProcessControl.cpp",
     "ThreadSafeModule.cpp",
   ]
 }

From 75e2040e93850bfefa804c85cb7d1c571f28ade4 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 16 Jul 2020 23:07:45 +0000
Subject: [PATCH 575/771] [gn build] Port 0f6220ddd6c

---
 llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn
index f5fc87c102682..77e2b6d218f76 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/profile/BUILD.gn
@@ -52,6 +52,7 @@ static_library("profile") {
     "InstrProfilingUtil.c",
     "InstrProfilingUtil.h",
     "InstrProfilingValue.c",
+    "InstrProfilingVersionVar.c",
     "InstrProfilingWriter.c",
   ]
   if (current_os == "win") {

From f6275ca2565984fe928b27740ab2982048dcb7bc Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Thu, 16 Jul 2020 23:07:46 +0000
Subject: [PATCH 576/771] [gn build] Port 9870f77441c

---
 llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn | 1 -
 1 file changed, 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
index 27733f63c2c50..b0dcd497d844e 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Analysis/BUILD.gn
@@ -25,7 +25,6 @@ unittest("AnalysisTests") {
     "GlobalsModRefTest.cpp",
     "IVDescriptorsTest.cpp",
     "InlineFeaturesAnalysisTest.cpp",
-    "InlineSizeEstimatorAnalysisTest.cpp",
     "LazyCallGraphTest.cpp",
     "LoadsTest.cpp",
     "LoopInfoTest.cpp",

From 8ef9e2bf355d05bc81d8b0fe1e5333eec59a0a91 Mon Sep 17 00:00:00 2001
From: Richard Smith <richard@metafoo.co.uk>
Date: Thu, 16 Jul 2020 18:06:06 -0700
Subject: [PATCH 577/771] Revert "[libFuzzer] Link libFuzzer's own interceptors
 when other compiler runtimes are not linked."

This causes binaries linked with this runtime to crash on startup if
dlsym uses any of the intercepted functions. (For example, that happens
when using tcmalloc as the allocator: dlsym attempts to allocate memory
with malloc, and tcmalloc uses strncmp within its implementation.)

Also revert dependent commit "[libFuzzer] Disable implicit builtin knowledge about memcmp-like functions when -fsanitize=fuzzer-no-link is given."

This reverts commit f78d9fceea736d431e9e3cbca291e3909e3aa46d and 12d1124c49beec0fb79d36944960e5bf0f236d4c.
---
 clang/include/clang/Driver/SanitizerArgs.h    |   1 -
 clang/lib/Driver/SanitizerArgs.cpp            |  20 ---
 clang/lib/Driver/ToolChains/CommonArgs.cpp    |   3 -
 compiler-rt/lib/fuzzer/CMakeLists.txt         |  18 --
 compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp | 170 ------------------
 compiler-rt/test/fuzzer/memcmp.test           |   4 -
 compiler-rt/test/fuzzer/memcmp64.test         |   4 -
 compiler-rt/test/fuzzer/strcmp.test           |   5 +-
 compiler-rt/test/fuzzer/strncmp.test          |   5 +-
 compiler-rt/test/fuzzer/strstr.test           |   5 +-
 10 files changed, 3 insertions(+), 232 deletions(-)
 delete mode 100644 compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp

diff --git a/clang/include/clang/Driver/SanitizerArgs.h b/clang/include/clang/Driver/SanitizerArgs.h
index 563d6c3ff9de2..934dab808e823 100644
--- a/clang/include/clang/Driver/SanitizerArgs.h
+++ b/clang/include/clang/Driver/SanitizerArgs.h
@@ -74,7 +74,6 @@ class SanitizerArgs {
            !Sanitizers.has(SanitizerKind::Address) &&
            !Sanitizers.has(SanitizerKind::HWAddress);
   }
-  bool needsFuzzerInterceptors() const;
   bool needsUbsanRt() const;
   bool requiresMinimalRuntime() const { return MinimalRuntime; }
   bool needsDfsanRt() const { return Sanitizers.has(SanitizerKind::DataFlow); }
diff --git a/clang/lib/Driver/SanitizerArgs.cpp b/clang/lib/Driver/SanitizerArgs.cpp
index 4af24662ca918..bcc9ffc7ff8f6 100644
--- a/clang/lib/Driver/SanitizerArgs.cpp
+++ b/clang/lib/Driver/SanitizerArgs.cpp
@@ -240,10 +240,6 @@ static SanitizerMask parseSanitizeTrapArgs(const Driver &D,
   return TrappingKinds;
 }
 
-bool SanitizerArgs::needsFuzzerInterceptors() const {
-  return needsFuzzer() && !needsAsanRt() && !needsTsanRt() && !needsMsanRt();
-}
-
 bool SanitizerArgs::needsUbsanRt() const {
   // All of these include ubsan.
   if (needsAsanRt() || needsMsanRt() || needsHwasanRt() || needsTsanRt() ||
@@ -1088,22 +1084,6 @@ void SanitizerArgs::addArgs(const ToolChain &TC, const llvm::opt::ArgList &Args,
       Sanitizers.has(SanitizerKind::Address))
     CmdArgs.push_back("-fno-assume-sane-operator-new");
 
-  // libFuzzer wants to intercept calls to certain library functions, so the
-  // following -fno-builtin-* flags force the compiler to emit interposable
-  // libcalls to these functions. Other sanitizers effectively do the same thing
-  // by marking all library call sites with NoBuiltin attribute in their LLVM
-  // pass. (see llvm::maybeMarkSanitizerLibraryCallNoBuiltin)
-  if (Sanitizers.has(SanitizerKind::FuzzerNoLink)) {
-    CmdArgs.push_back("-fno-builtin-memcmp");
-    CmdArgs.push_back("-fno-builtin-strncmp");
-    CmdArgs.push_back("-fno-builtin-strcmp");
-    CmdArgs.push_back("-fno-builtin-strncasecmp");
-    CmdArgs.push_back("-fno-builtin-strcasecmp");
-    CmdArgs.push_back("-fno-builtin-strstr");
-    CmdArgs.push_back("-fno-builtin-strcasestr");
-    CmdArgs.push_back("-fno-builtin-memmem");
-  }
-
   // Require -fvisibility= flag on non-Windows when compiling if vptr CFI is
   // enabled.
   if (Sanitizers.hasOneOf(CFIClasses) && !TC.getTriple().isOSWindows() &&
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index acde6d9e2111a..6b6e276b8ce79 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -784,9 +784,6 @@ bool tools::addSanitizerRuntimes(const ToolChain &TC, const ArgList &Args,
       !Args.hasArg(options::OPT_shared)) {
 
     addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer", false, true);
-    if (SanArgs.needsFuzzerInterceptors())
-      addSanitizerRuntime(TC, Args, CmdArgs, "fuzzer_interceptors", false,
-                          true);
     if (!Args.hasArg(clang::driver::options::OPT_nostdlibxx))
       TC.AddCXXStdlibLibArgs(Args, CmdArgs);
   }
diff --git a/compiler-rt/lib/fuzzer/CMakeLists.txt b/compiler-rt/lib/fuzzer/CMakeLists.txt
index 02be89cb70a55..b5be6b89452e9 100644
--- a/compiler-rt/lib/fuzzer/CMakeLists.txt
+++ b/compiler-rt/lib/fuzzer/CMakeLists.txt
@@ -99,13 +99,6 @@ add_compiler_rt_object_libraries(RTfuzzer_main
   CFLAGS ${LIBFUZZER_CFLAGS}
   DEPS ${LIBFUZZER_DEPS})
 
-add_compiler_rt_object_libraries(RTfuzzer_interceptors
-  OS ${FUZZER_SUPPORTED_OS}
-  ARCHS ${FUZZER_SUPPORTED_ARCH}
-  SOURCES FuzzerInterceptors.cpp
-  CFLAGS ${LIBFUZZER_CFLAGS}
-  DEPS ${LIBFUZZER_DEPS})
-
 add_compiler_rt_runtime(clang_rt.fuzzer
   STATIC
   OS ${FUZZER_SUPPORTED_OS}
@@ -122,14 +115,6 @@ add_compiler_rt_runtime(clang_rt.fuzzer_no_main
   CFLAGS ${LIBFUZZER_CFLAGS}
   PARENT_TARGET fuzzer)
 
-add_compiler_rt_runtime(clang_rt.fuzzer_interceptors
-  STATIC
-  OS ${FUZZER_SUPPORTED_OS}
-  ARCHS ${FUZZER_SUPPORTED_ARCH}
-  OBJECT_LIBS RTfuzzer_interceptors
-  CFLAGS ${LIBFUZZER_CFLAGS}
-  PARENT_TARGET fuzzer)
-
 if(OS_NAME MATCHES "Linux|Fuchsia" AND
    COMPILER_RT_LIBCXX_PATH AND
    COMPILER_RT_LIBCXXABI_PATH)
@@ -163,10 +148,7 @@ if(OS_NAME MATCHES "Linux|Fuchsia" AND
     add_dependencies(RTfuzzer.${arch} libcxx_fuzzer_${arch}-build)
     target_compile_options(RTfuzzer_main.${arch} PRIVATE -isystem ${LIBCXX_${arch}_PREFIX}/include/c++/v1)
     add_dependencies(RTfuzzer_main.${arch} libcxx_fuzzer_${arch}-build)
-    target_compile_options(RTfuzzer_interceptors.${arch} PRIVATE -isystem ${LIBCXX_${arch}_PREFIX}/include/c++/v1)
-    add_dependencies(RTfuzzer_interceptors.${arch} libcxx_fuzzer_${arch}-build)
     partially_link_libcxx(fuzzer_no_main ${LIBCXX_${arch}_PREFIX} ${arch})
-    partially_link_libcxx(fuzzer_interceptors ${LIBCXX_${arch}_PREFIX} ${arch})
     partially_link_libcxx(fuzzer ${LIBCXX_${arch}_PREFIX} ${arch})
   endforeach()
 endif()
diff --git a/compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp b/compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp
deleted file mode 100644
index cb55b4af38fa8..0000000000000
--- a/compiler-rt/lib/fuzzer/FuzzerInterceptors.cpp
+++ /dev/null
@@ -1,170 +0,0 @@
-//===-- FuzzerInterceptors.cpp --------------------------------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-// Intercept certain libc functions to aid fuzzing.
-// Linked only when other RTs that define their own interceptors are not linked.
-//===----------------------------------------------------------------------===//
-
-#include "FuzzerPlatform.h"
-
-#if LIBFUZZER_LINUX
-
-#define GET_CALLER_PC() __builtin_return_address(0)
-
-#define PTR_TO_REAL(x) real_##x
-#define REAL(x) __interception::PTR_TO_REAL(x)
-#define FUNC_TYPE(x) x##_type
-#define DEFINE_REAL(ret_type, func, ...)                                       \
-  typedef ret_type (*FUNC_TYPE(func))(__VA_ARGS__);                            \
-  namespace __interception {                                                   \
-  FUNC_TYPE(func) PTR_TO_REAL(func);                                           \
-  }
-
-#include <cassert>
-#include <cstdint>
-#include <dlfcn.h> // for dlsym()
-#include <sanitizer/common_interface_defs.h>
-
-static void *getFuncAddr(const char *name, uintptr_t wrapper_addr) {
-  void *addr = dlsym(RTLD_NEXT, name);
-  if (!addr) {
-    // If the lookup using RTLD_NEXT failed, the sanitizer runtime library is
-    // later in the library search order than the DSO that we are trying to
-    // intercept, which means that we cannot intercept this function. We still
-    // want the address of the real definition, though, so look it up using
-    // RTLD_DEFAULT.
-    addr = dlsym(RTLD_DEFAULT, name);
-
-    // In case `name' is not loaded, dlsym ends up finding the actual wrapper.
-    // We don't want to intercept the wrapper and have it point to itself.
-    if (reinterpret_cast<uintptr_t>(addr) == wrapper_addr)
-      addr = nullptr;
-  }
-  return addr;
-}
-
-static int FuzzerInited = 0;
-static bool FuzzerInitIsRunning;
-
-static void fuzzerInit();
-
-static void ensureFuzzerInited() {
-  assert(!FuzzerInitIsRunning);
-  if (!FuzzerInited) {
-    fuzzerInit();
-  }
-}
-
-extern "C" {
-
-DEFINE_REAL(int, memcmp, const void *, const void *, size_t)
-DEFINE_REAL(int, strncmp, const char *, const char *, size_t)
-DEFINE_REAL(int, strcmp, const char *, const char *)
-DEFINE_REAL(int, strncasecmp, const char *, const char *, size_t)
-DEFINE_REAL(int, strcasecmp, const char *, const char *)
-DEFINE_REAL(char *, strstr, const char *, const char *)
-DEFINE_REAL(char *, strcasestr, const char *, const char *)
-DEFINE_REAL(void *, memmem, const void *, size_t, const void *, size_t)
-
-ATTRIBUTE_INTERFACE int memcmp(const void *s1, const void *s2, size_t n) {
-  ensureFuzzerInited();
-  int result = REAL(memcmp)(s1, s2, n);
-  __sanitizer_weak_hook_memcmp(GET_CALLER_PC(), s1, s2, n, result);
-
-  return result;
-}
-
-ATTRIBUTE_INTERFACE int strncmp(const char *s1, const char *s2, size_t n) {
-  ensureFuzzerInited();
-  int result = REAL(strncmp)(s1, s2, n);
-  __sanitizer_weak_hook_strncmp(GET_CALLER_PC(), s1, s2, n, result);
-
-  return result;
-}
-
-ATTRIBUTE_INTERFACE int strcmp(const char *s1, const char *s2) {
-  ensureFuzzerInited();
-  int result = REAL(strcmp)(s1, s2);
-  __sanitizer_weak_hook_strcmp(GET_CALLER_PC(), s1, s2, result);
-
-  return result;
-}
-
-ATTRIBUTE_INTERFACE int strncasecmp(const char *s1, const char *s2, size_t n) {
-  ensureFuzzerInited();
-  int result = REAL(strncasecmp)(s1, s2, n);
-  __sanitizer_weak_hook_strncasecmp(GET_CALLER_PC(), s1, s2, n, result);
-
-  return result;
-}
-
-ATTRIBUTE_INTERFACE int strcasecmp(const char *s1, const char *s2) {
-  ensureFuzzerInited();
-  int result = REAL(strcasecmp)(s1, s2);
-  __sanitizer_weak_hook_strcasecmp(GET_CALLER_PC(), s1, s2, result);
-
-  return result;
-}
-
-ATTRIBUTE_INTERFACE char *strstr(const char *s1, const char *s2) {
-  ensureFuzzerInited();
-  char *result = REAL(strstr)(s1, s2);
-  __sanitizer_weak_hook_strstr(GET_CALLER_PC(), s1, s2, result);
-
-  return result;
-}
-
-ATTRIBUTE_INTERFACE char *strcasestr(const char *s1, const char *s2) {
-  ensureFuzzerInited();
-  char *result = REAL(strcasestr)(s1, s2);
-  __sanitizer_weak_hook_strcasestr(GET_CALLER_PC(), s1, s2, result);
-
-  return result;
-}
-
-ATTRIBUTE_INTERFACE
-void *memmem(const void *s1, size_t len1, const void *s2, size_t len2) {
-  ensureFuzzerInited();
-  void *result = REAL(memmem)(s1, len1, s2, len2);
-  __sanitizer_weak_hook_memmem(GET_CALLER_PC(), s1, len1, s2, len2, result);
-
-  return result;
-}
-
-__attribute__((section(".preinit_array"),
-               used)) static void (*__local_fuzzer_preinit)(void) = fuzzerInit;
-
-} // extern "C"
-
-static void fuzzerInit() {
-  assert(!FuzzerInitIsRunning);
-  if (FuzzerInited)
-    return;
-  FuzzerInitIsRunning = true;
-
-  REAL(memcmp) = reinterpret_cast<memcmp_type>(
-      getFuncAddr("memcmp", reinterpret_cast<uintptr_t>(&memcmp)));
-  REAL(strncmp) = reinterpret_cast<strncmp_type>(
-      getFuncAddr("strncmp", reinterpret_cast<uintptr_t>(&strncmp)));
-  REAL(strcmp) = reinterpret_cast<strcmp_type>(
-      getFuncAddr("strcmp", reinterpret_cast<uintptr_t>(&strcmp)));
-  REAL(strncasecmp) = reinterpret_cast<strncasecmp_type>(
-      getFuncAddr("strncasecmp", reinterpret_cast<uintptr_t>(&strncasecmp)));
-  REAL(strcasecmp) = reinterpret_cast<strcasecmp_type>(
-      getFuncAddr("strcasecmp", reinterpret_cast<uintptr_t>(&strcasecmp)));
-  REAL(strstr) = reinterpret_cast<strstr_type>(
-      getFuncAddr("strstr", reinterpret_cast<uintptr_t>(&strstr)));
-  REAL(strcasestr) = reinterpret_cast<strcasestr_type>(
-      getFuncAddr("strcasestr", reinterpret_cast<uintptr_t>(&strcasestr)));
-  REAL(memmem) = reinterpret_cast<memmem_type>(
-      getFuncAddr("memmem", reinterpret_cast<uintptr_t>(&memmem)));
-
-  FuzzerInitIsRunning = false;
-  FuzzerInited = 1;
-}
-
-#endif
diff --git a/compiler-rt/test/fuzzer/memcmp.test b/compiler-rt/test/fuzzer/memcmp.test
index fa995a22c68a4..5657cab41dfc2 100644
--- a/compiler-rt/test/fuzzer/memcmp.test
+++ b/compiler-rt/test/fuzzer/memcmp.test
@@ -1,8 +1,4 @@
 UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/MemcmpTest.cpp -o %t-MemcmpTest
 RUN: not %run %t-MemcmpTest               -seed=1 -runs=10000000   2>&1 | FileCheck %s
-
-RUN: %cpp_compiler -fno-sanitize=address %S/MemcmpTest.cpp -o %t-NoAsanMemcmpTest
-RUN: not %run %t-MemcmpTest               -seed=1 -runs=10000000   2>&1 | FileCheck %s
-
 CHECK: BINGO
diff --git a/compiler-rt/test/fuzzer/memcmp64.test b/compiler-rt/test/fuzzer/memcmp64.test
index ca8c8fe8206f1..24d14bf73bbf4 100644
--- a/compiler-rt/test/fuzzer/memcmp64.test
+++ b/compiler-rt/test/fuzzer/memcmp64.test
@@ -1,8 +1,4 @@
 UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/Memcmp64BytesTest.cpp -o %t-Memcmp64BytesTest
 RUN: not %run %t-Memcmp64BytesTest        -seed=1 -runs=1000000   2>&1 | FileCheck %s
-
-RUN: %cpp_compiler -fno-sanitize=address %S/Memcmp64BytesTest.cpp -o %t-NoAsanMemcmp64BytesTest
-RUN: not %run %t-Memcmp64BytesTest        -seed=1 -runs=1000000   2>&1 | FileCheck %s
-
 CHECK: BINGO
diff --git a/compiler-rt/test/fuzzer/strcmp.test b/compiler-rt/test/fuzzer/strcmp.test
index 61065de6fa94b..bd917bba6b698 100644
--- a/compiler-rt/test/fuzzer/strcmp.test
+++ b/compiler-rt/test/fuzzer/strcmp.test
@@ -1,8 +1,5 @@
 UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/StrcmpTest.cpp -o %t-StrcmpTest
 RUN: not %run %t-StrcmpTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
-
-RUN: %cpp_compiler -fno-sanitize=address %S/StrcmpTest.cpp -o %t-NoAsanStrcmpTest
-RUN: not %run %t-StrcmpTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
-
 CHECK: BINGO
+
diff --git a/compiler-rt/test/fuzzer/strncmp.test b/compiler-rt/test/fuzzer/strncmp.test
index 102451058d440..50189445b102c 100644
--- a/compiler-rt/test/fuzzer/strncmp.test
+++ b/compiler-rt/test/fuzzer/strncmp.test
@@ -1,8 +1,5 @@
 UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/StrncmpTest.cpp -o %t-StrncmpTest
 RUN: not %run %t-StrncmpTest              -seed=2 -runs=10000000   2>&1 | FileCheck %s
-
-RUN: %cpp_compiler -fno-sanitize=address %S/StrncmpTest.cpp -o %t-NoAsanStrncmpTest
-RUN: not %run %t-StrncmpTest              -seed=2 -runs=10000000   2>&1 | FileCheck %s
-
 CHECK: BINGO
+
diff --git a/compiler-rt/test/fuzzer/strstr.test b/compiler-rt/test/fuzzer/strstr.test
index 5c10805e18c65..f1fb210b47c7f 100644
--- a/compiler-rt/test/fuzzer/strstr.test
+++ b/compiler-rt/test/fuzzer/strstr.test
@@ -1,8 +1,5 @@
 UNSUPPORTED: freebsd
 RUN: %cpp_compiler %S/StrstrTest.cpp -o %t-StrstrTest
 RUN: not %run %t-StrstrTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
-
-RUN: %cpp_compiler -fno-sanitize=address %S/StrstrTest.cpp -o %t-NoAsanStrstrTest
-RUN: not %run %t-StrstrTest               -seed=1 -runs=2000000   2>&1 | FileCheck %s
-
 CHECK: BINGO
+

From 147a608b94d92b94599a022d3a05265e14b67921 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Thu, 16 Jul 2020 19:33:50 -0700
Subject: [PATCH 578/771] [sanitizer] Fix symbolizer build broken by D82702

---
 .../lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh | 2 +-
 .../lib/sanitizer_common/symbolizer/scripts/global_symbols.txt  | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
index a0aa79ee54bb5..5b6433011a098 100755
--- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/build_symbolizer.sh
@@ -98,7 +98,7 @@ make -j${J} libz.a
 if [[ ! -d ${LIBCXX_BUILD} ]]; then
   mkdir -p ${LIBCXX_BUILD}
   cd ${LIBCXX_BUILD}
-  LIBCXX_FLAGS="${FLAGS} -Wno-macro-redefined -I${LIBCXX_SRC}/include"
+  LIBCXX_FLAGS="${FLAGS} -Wno-macro-redefined"
   PROJECTS=
   if [[ ! -d $LLVM_SRC/projects/libcxxabi ]] ; then
     PROJECTS="-DLLVM_ENABLE_PROJECTS='libcxx;libcxxabi'"
diff --git a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
index c3f41f19c3656..29b2960e11fe4 100644
--- a/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
+++ b/compiler-rt/lib/sanitizer_common/symbolizer/scripts/global_symbols.txt
@@ -31,6 +31,8 @@ __interceptor_pthread_setspecific w
 __interceptor_read w
 __interceptor_realpath w
 __isinf U
+__isoc99_sscanf U
+__isoc99_vsscanf U
 __moddi3 U
 __sanitizer_symbolize_code T
 __sanitizer_symbolize_data T

From b128f719a4c826e8f723eaa9b42b607c81f563a5 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Thu, 16 Jul 2020 19:47:36 -0700
Subject: [PATCH 579/771] [profile] Disable new failing test from D83967

---
 compiler-rt/test/profile/instrprof-lto-pgogen.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler-rt/test/profile/instrprof-lto-pgogen.c b/compiler-rt/test/profile/instrprof-lto-pgogen.c
index 0abae0d7b4c23..99870c70bef37 100644
--- a/compiler-rt/test/profile/instrprof-lto-pgogen.c
+++ b/compiler-rt/test/profile/instrprof-lto-pgogen.c
@@ -1,4 +1,5 @@
 // REQUIRES: lto
+// XFAIL: msvc
 
 // RUN: %clang_pgogen=%t.profraw -flto %s -o %t
 // RUN: %run %t

From 3a18665748342d8a55c3000dcc2c444ba1de2049 Mon Sep 17 00:00:00 2001
From: Carl Ritson <carl.ritson@amd.com>
Date: Fri, 17 Jul 2020 11:12:12 +0900
Subject: [PATCH 580/771] [AMDGPU] Translate s_and/s_andn2 to s_mov in vcc
 optimisation

When SCC is dead, but VCC is required then replace s_and / s_andn2
with s_mov into VCC when mask value is 0 or -1.

Reviewed By: rampitec

Differential Revision: https://reviews.llvm.org/D83850
---
 llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp  |  16 ++-
 llvm/test/CodeGen/AMDGPU/infinite-loop.ll     |   2 +-
 .../CodeGen/AMDGPU/insert-skip-from-vcc.mir   | 120 ++++++++++++++++++
 llvm/test/CodeGen/AMDGPU/wave32.ll            |   4 +-
 4 files changed, 137 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
index f31c722db1b26..c9bec366a54a2 100644
--- a/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
+++ b/llvm/lib/Target/AMDGPU/SIPreEmitPeephole.cpp
@@ -70,6 +70,7 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
   const unsigned ExecReg = IsWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
   const unsigned And = IsWave32 ? AMDGPU::S_AND_B32 : AMDGPU::S_AND_B64;
   const unsigned AndN2 = IsWave32 ? AMDGPU::S_ANDN2_B32 : AMDGPU::S_ANDN2_B64;
+  const unsigned Mov = IsWave32 ? AMDGPU::S_MOV_B32 : AMDGPU::S_MOV_B64;
 
   MachineBasicBlock::reverse_iterator A = MI.getReverseIterator(),
                                       E = MBB.rend();
@@ -136,9 +137,20 @@ bool SIPreEmitPeephole::optimizeVccBranch(MachineInstr &MI) const {
   if (A->getOpcode() == AndN2)
     MaskValue = ~MaskValue;
 
-  if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC) &&
-      MI.killsRegister(CondReg, TRI))
+  if (!ReadsCond && A->registerDefIsDead(AMDGPU::SCC)) {
+    if (!MI.killsRegister(CondReg, TRI)) {
+      // Replace AND with MOV
+      if (MaskValue == 0) {
+        BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
+            .addImm(0);
+      } else {
+        BuildMI(*A->getParent(), *A, A->getDebugLoc(), TII->get(Mov), CondReg)
+            .addReg(ExecReg);
+      }
+    }
+    // Remove AND instruction
     A->eraseFromParent();
+  }
 
   bool IsVCCZ = MI.getOpcode() == AMDGPU::S_CBRANCH_VCCZ;
   if (SReg == ExecReg) {
diff --git a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
index 6d63ca5332e7e..d82d90564aa4d 100644
--- a/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
+++ b/llvm/test/CodeGen/AMDGPU/infinite-loop.ll
@@ -158,7 +158,7 @@ define amdgpu_kernel void @infinite_loop_nest_ret(i32 addrspace(1)* %out) {
 ; SI-NEXT:  ; %bb.4: ; %loop.exit.guard
 ; SI-NEXT:    ; in Loop: Header=BB3_2 Depth=1
 ; SI-NEXT:    s_or_b64 exec, exec, s[2:3]
-; SI-NEXT:    s_and_b64 vcc, exec, 0
+; SI-NEXT:    s_mov_b64 vcc, 0
 ; SI-NEXT:    s_branch BB3_2
 ; SI-NEXT:  BB3_5: ; %UnifiedReturnBlock
 ; SI-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir b/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir
index ecfd59dfbcd09..c6bb9dd0b1afe 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir
+++ b/llvm/test/CodeGen/AMDGPU/insert-skip-from-vcc.mir
@@ -415,3 +415,123 @@ body:             |
     S_CBRANCH_VCCNZ %bb.1, implicit killed $vcc
     S_ENDPGM 0
 ...
+---
+# GCN-LABEL: name: and_0_mov
+# GCN: bb.2:
+# GCN-NOT: S_AND
+# GCN: $vcc = S_MOV_B64 0
+# GCN-NEXT: S_BRANCH %bb.1
+name:            and_0_mov
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    S_NOP 0
+
+  bb.2:
+    $sgpr0_sgpr1 = S_MOV_B64 0
+    $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+    S_CBRANCH_VCCZ %bb.1, implicit $vcc
+    S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: andn2_m1_mov
+# GCN: bb.2:
+# GCN-NOT: S_ANDN2
+# GCN: $vcc = S_MOV_B64 0
+# GCN-NEXT: S_BRANCH %bb.1
+name:            andn2_m1_mov
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    S_NOP 0
+
+  bb.2:
+    $sgpr0_sgpr1 = S_MOV_B64 -1
+    $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+    S_CBRANCH_VCCZ %bb.1, implicit $vcc
+    S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: and_m1_mov
+# GCN: bb.2:
+# GCN-NOT: S_AND
+# GCN: $vcc = S_MOV_B64 $exec
+# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name:            and_m1_mov
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    S_NOP 0
+
+  bb.2:
+    $sgpr0_sgpr1 = S_MOV_B64 -1
+    $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+    S_CBRANCH_VCCZ %bb.1, implicit $vcc
+    S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: andn2_0_mov
+# GCN: bb.2:
+# GCN-NOT: S_ANDN2
+# GCN: $vcc = S_MOV_B64 $exec
+# GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
+name:            andn2_0_mov
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    S_NOP 0
+
+  bb.2:
+    $sgpr0_sgpr1 = S_MOV_B64 0
+    $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def dead $scc
+    S_CBRANCH_VCCZ %bb.1, implicit $vcc
+    S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: and_0_scc_req
+# GCN: bb.2:
+# GCN-NOT: S_MOV_
+# GCN: S_AND_
+# GCN-NEXT: S_BRANCH %bb.1
+name:            and_0_scc_req
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    S_NOP 0
+
+  bb.2:
+    $sgpr0_sgpr1 = S_MOV_B64 0
+    $vcc = S_AND_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+    S_CBRANCH_VCCZ %bb.1, implicit $vcc
+    S_ENDPGM 0
+...
+---
+# GCN-LABEL: name: andn2_m1_scc_req
+# GCN: bb.2:
+# GCN-NOT: S_MOV_
+# GCN: S_ANDN2_
+# GCN-NEXT: S_BRANCH %bb.1
+name:            andn2_m1_scc_req
+body:             |
+  bb.0:
+    S_NOP 0
+
+  bb.1:
+    S_NOP 0
+
+  bb.2:
+    $sgpr0_sgpr1 = S_MOV_B64 -1
+    $vcc = S_ANDN2_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
+    S_CBRANCH_VCCZ %bb.1, implicit $vcc
+    S_ENDPGM 0
+...
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 388a75d148bd3..896b974bc2baa 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -836,8 +836,8 @@ define amdgpu_ps void @test_wqm_vote(float %a) {
 }
 
 ; GCN-LABEL: {{^}}test_branch_true:
-; GFX1032: s_and_b32 vcc_lo, exec_lo, -1
-; GFX1064: s_and_b64 vcc, exec, -1
+; GFX1032: s_mov_b32 vcc_lo, exec_lo
+; GFX1064: s_mov_b64 vcc, exec
 define amdgpu_kernel void @test_branch_true() #2 {
 entry:
   br i1 true, label %for.end, label %for.body.lr.ph

From fd1f8072a863d7d542165c6bec73f415c65b2482 Mon Sep 17 00:00:00 2001
From: Juneyoung Lee <aqjune@gmail.com>
Date: Fri, 17 Jul 2020 11:53:26 +0900
Subject: [PATCH 581/771] [LangRef] Mention that freeze does not consider
 aggregate's paddings

Make explicit that freeze does not touch paddings of an aggregate.
(Relevant comment: https://reviews.llvm.org/D83752#2152550)

This implies that `v = freeze(load p); store v, q` may still leave undef bits
or poison in memory if `v` is an aggregate, but it still happens for
non-byte integers such as i1.

Differential Revision: https://reviews.llvm.org/D83927
---
 llvm/docs/LangRef.rst | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index e42b6c1f7b0d5..86d8c62af2b7e 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -3523,6 +3523,9 @@ uses with" concept would not hold.
 To ensure all uses of a given register observe the same value (even if
 '``undef``'), the :ref:`freeze instruction <i_freeze>` can be used.
 A value is frozen if its uses see the same value.
+An aggregate value or vector is frozen if its elements are frozen.
+The padding of an aggregate isn't considered, since it isn't visible
+without storing it into memory and loading it with a different type.
 
 .. code-block:: llvm
 
@@ -10682,6 +10685,9 @@ instructions may yield different values.
 While ``undef`` and ``poison`` pointers can be frozen, the result is a
 non-dereferenceable pointer. See the
 :ref:`Pointer Aliasing Rules <pointeraliasing>` section for more information.
+If an aggregate value or vector is frozen, the operand is frozen element-wise.
+The padding of an aggregate isn't considered, since it isn't visible
+without storing it into memory and loading it with a different type.
 
 
 Example:

From 947bf0fdf6e6445ce09ec85da956d4c5e7f467f8 Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Thu, 16 Jul 2020 20:07:57 -0700
Subject: [PATCH 582/771] [compiler-rt][NFC] Add missing 'override's

---
 compiler-rt/lib/sanitizer_common/sanitizer_flags.cpp      | 2 +-
 .../lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp   | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_flags.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_flags.cpp
index 684ee1e0b9995..d3290493fd6da 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_flags.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_flags.cpp
@@ -91,7 +91,7 @@ class FlagHandlerInclude : public FlagHandlerBase {
     }
     return parser_->ParseFile(value, ignore_missing_);
   }
-  bool Format(char *buffer, uptr size) {
+  bool Format(char *buffer, uptr size) override {
     // Note `original_path_` isn't actually what's parsed due to `%`
     // substitutions. Printing the substituted path would require holding onto
     // mmap'ed memory.
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp
index 6c577426ad566..7f9529aa35562 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_stoptheworld_mac.cpp
@@ -31,15 +31,15 @@ class SuspendedThreadsListMac : public SuspendedThreadsList {
  public:
   SuspendedThreadsListMac() : threads_(1024) {}
 
-  tid_t GetThreadID(uptr index) const;
+  tid_t GetThreadID(uptr index) const override;
   thread_t GetThread(uptr index) const;
-  uptr ThreadCount() const;
+  uptr ThreadCount() const override;
   bool ContainsThread(thread_t thread) const;
   void Append(thread_t thread);
 
   PtraceRegistersStatus GetRegistersAndSP(uptr index, uptr *buffer,
-                                          uptr *sp) const;
-  uptr RegisterCount() const;
+                                          uptr *sp) const override;
+  uptr RegisterCount() const override;
 
  private:
   InternalMmapVector<SuspendedThreadInfo> threads_;

From 5d31d09f768a491681f36bba8b588f205d964b0f Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Thu, 16 Jul 2020 20:12:13 -0700
Subject: [PATCH 583/771] [polly][NFC] Add missing 'override's

---
 polly/lib/Transform/Canonicalization.cpp | 8 ++++----
 polly/lib/Transform/CodePreparation.cpp  | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/polly/lib/Transform/Canonicalization.cpp b/polly/lib/Transform/Canonicalization.cpp
index 8945f1f6b863c..3b19ecaff06db 100644
--- a/polly/lib/Transform/Canonicalization.cpp
+++ b/polly/lib/Transform/Canonicalization.cpp
@@ -65,10 +65,10 @@ class PollyCanonicalize : public ModulePass {
 
   /// @name FunctionPass interface.
   //@{
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-  virtual void releaseMemory();
-  virtual bool runOnModule(Module &M);
-  virtual void print(raw_ostream &OS, const Module *) const;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void releaseMemory() override;
+  bool runOnModule(Module &M) override;
+  void print(raw_ostream &OS, const Module *) const override;
   //@}
 };
 } // namespace
diff --git a/polly/lib/Transform/CodePreparation.cpp b/polly/lib/Transform/CodePreparation.cpp
index 2d1571e85cfb0..dfbf616ff96c4 100644
--- a/polly/lib/Transform/CodePreparation.cpp
+++ b/polly/lib/Transform/CodePreparation.cpp
@@ -48,10 +48,10 @@ class CodePreparation : public FunctionPass {
 
   /// @name FunctionPass interface.
   //@{
-  virtual void getAnalysisUsage(AnalysisUsage &AU) const;
-  virtual void releaseMemory();
-  virtual bool runOnFunction(Function &F);
-  virtual void print(raw_ostream &OS, const Module *) const;
+  void getAnalysisUsage(AnalysisUsage &AU) const override;
+  void releaseMemory() override;
+  bool runOnFunction(Function &F) override;
+  void print(raw_ostream &OS, const Module *) const override;
   //@}
 };
 } // namespace

From dc65f57124a81511b085396dea621f585382299c Mon Sep 17 00:00:00 2001
From: Xing GUO <higuoxing@gmail.com>
Date: Fri, 17 Jul 2020 11:28:29 +0800
Subject: [PATCH 584/771] [DWARFYAML] Merge forms that use same encodings. NFC.

---
 llvm/lib/ObjectYAML/DWARFVisitor.cpp | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/llvm/lib/ObjectYAML/DWARFVisitor.cpp b/llvm/lib/ObjectYAML/DWARFVisitor.cpp
index c393b17aaa333..bea71fb3235d2 100644
--- a/llvm/lib/ObjectYAML/DWARFVisitor.cpp
+++ b/llvm/lib/ObjectYAML/DWARFVisitor.cpp
@@ -118,6 +118,10 @@ template <typename T> Error DWARFYAML::VisitorImpl<T>::traverseDebugInfo() {
           case dwarf::DW_FORM_addrx:
           case dwarf::DW_FORM_rnglistx:
           case dwarf::DW_FORM_loclistx:
+          case dwarf::DW_FORM_udata:
+          case dwarf::DW_FORM_ref_udata:
+          case dwarf::DW_FORM_GNU_addr_index:
+          case dwarf::DW_FORM_GNU_str_index:
             onValue((uint64_t)FormVal->Value, /*LEB=*/true);
             break;
           case dwarf::DW_FORM_data1:
@@ -143,15 +147,12 @@ template <typename T> Error DWARFYAML::VisitorImpl<T>::traverseDebugInfo() {
           case dwarf::DW_FORM_data8:
           case dwarf::DW_FORM_ref8:
           case dwarf::DW_FORM_ref_sup8:
+          case dwarf::DW_FORM_ref_sig8:
             onValue((uint64_t)FormVal->Value);
             break;
           case dwarf::DW_FORM_sdata:
             onValue((int64_t)FormVal->Value, true);
             break;
-          case dwarf::DW_FORM_udata:
-          case dwarf::DW_FORM_ref_udata:
-            onValue((uint64_t)FormVal->Value, true);
-            break;
           case dwarf::DW_FORM_string:
             onValue(FormVal->CStr);
             break;
@@ -169,13 +170,6 @@ template <typename T> Error DWARFYAML::VisitorImpl<T>::traverseDebugInfo() {
           case dwarf::DW_FORM_strp_sup:
             onVariableSizeValue(FormVal->Value, getOffsetSize(Unit));
             break;
-          case dwarf::DW_FORM_ref_sig8:
-            onValue((uint64_t)FormVal->Value);
-            break;
-          case dwarf::DW_FORM_GNU_addr_index:
-          case dwarf::DW_FORM_GNU_str_index:
-            onValue((uint64_t)FormVal->Value, true);
-            break;
           default:
             break;
           }

From cd4953246b84b59d91e6a9495f2e205f0bf8ef48 Mon Sep 17 00:00:00 2001
From: Juneyoung Lee <aqjune@gmail.com>
Date: Fri, 17 Jul 2020 12:50:28 +0900
Subject: [PATCH 585/771] Add a test for D83752

---
 .../Transforms/InstSimplify/freeze-noundef.ll | 99 +++++++++++++++++++
 1 file changed, 99 insertions(+)
 create mode 100644 llvm/test/Transforms/InstSimplify/freeze-noundef.ll

diff --git a/llvm/test/Transforms/InstSimplify/freeze-noundef.ll b/llvm/test/Transforms/InstSimplify/freeze-noundef.ll
new file mode 100644
index 0000000000000..466d4bb6ff666
--- /dev/null
+++ b/llvm/test/Transforms/InstSimplify/freeze-noundef.ll
@@ -0,0 +1,99 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define i8 @noundef(i8 noundef %x) {
+; CHECK-LABEL: @noundef(
+; CHECK-NEXT:    [[Y:%.*]] = freeze i8 [[X:%.*]]
+; CHECK-NEXT:    ret i8 [[Y]]
+;
+  %y = freeze i8 %x
+  ret i8 %y
+}
+
+define i1 @icmp(i8 noundef %x, i8 noundef %y) {
+; CHECK-LABEL: @icmp(
+; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
+; CHECK-NEXT:    [[F:%.*]] = freeze i1 [[C]]
+; CHECK-NEXT:    ret i1 [[F]]
+;
+  %c = icmp eq i8 %x, %y
+  %f = freeze i1 %c
+  ret i1 %f
+}
+
+define i1 @or(i1 noundef %x, i1 noundef %x2) {
+; CHECK-LABEL: @or(
+; CHECK-NEXT:    [[Y:%.*]] = or i1 [[X:%.*]], [[X2:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = freeze i1 [[Y]]
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = or i1 %x, %x2
+  %z = freeze i1 %y
+  ret i1 %z
+}
+
+define i1 @or2(i1 noundef %x, i1 %x2) {
+; CHECK-LABEL: @or2(
+; CHECK-NEXT:    [[Y:%.*]] = or i1 [[X:%.*]], [[X2:%.*]]
+; CHECK-NEXT:    [[Z:%.*]] = freeze i1 [[Y]]
+; CHECK-NEXT:    ret i1 [[Z]]
+;
+  %y = or i1 %x, %x2
+  %z = freeze i1 %y
+  ret i1 %z
+}
+
+define i8 @add(i8 noundef %x) {
+; CHECK-LABEL: @add(
+; CHECK-NEXT:    [[Y:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[Z:%.*]] = freeze i8 [[Y]]
+; CHECK-NEXT:    ret i8 [[Z]]
+;
+  %y = add i8 %x, 1
+  %z = freeze i8 %y
+  ret i8 %z
+}
+
+define i8 @addnsw(i8 noundef %x) {
+; CHECK-LABEL: @addnsw(
+; CHECK-NEXT:    [[Y:%.*]] = add nsw i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[Z:%.*]] = freeze i8 [[Y]]
+; CHECK-NEXT:    ret i8 [[Z]]
+;
+  %y = add nsw i8 %x, 1
+  %z = freeze i8 %y
+  ret i8 %z
+}
+
+define {i8, i32} @aggr({i8, i32} noundef %x) {
+; CHECK-LABEL: @aggr(
+; CHECK-NEXT:    [[Y:%.*]] = freeze { i8, i32 } [[X:%.*]]
+; CHECK-NEXT:    ret { i8, i32 } [[Y]]
+;
+  %y = freeze {i8, i32} %x
+  ret {i8, i32} %y
+}
+
+define i32 @extract({i8, i32} noundef %x) {
+; CHECK-LABEL: @extract(
+; CHECK-NEXT:    [[Y:%.*]] = extractvalue { i8, i32 } [[X:%.*]], 1
+; CHECK-NEXT:    [[Z:%.*]] = freeze i32 [[Y]]
+; CHECK-NEXT:    ret i32 [[Z]]
+;
+  %y = extractvalue {i8, i32} %x, 1
+  %z = freeze i32 %y
+  ret i32 %z
+}
+
+define i32 @extract2({i8, {i8, i32}} noundef %x) {
+; CHECK-LABEL: @extract2(
+; CHECK-NEXT:    [[Y:%.*]] = extractvalue { i8, { i8, i32 } } [[X:%.*]], 1
+; CHECK-NEXT:    [[Z:%.*]] = extractvalue { i8, i32 } [[Y]], 1
+; CHECK-NEXT:    [[W:%.*]] = freeze i32 [[Z]]
+; CHECK-NEXT:    ret i32 [[W]]
+;
+  %y = extractvalue {i8, {i8, i32}} %x, 1
+  %z = extractvalue {i8, i32} %y, 1
+  %w = freeze i32 %z
+  ret i32 %w
+}

From 582901d0b53a2a23ec91a6210e933a622b0bb739 Mon Sep 17 00:00:00 2001
From: Juneyoung Lee <aqjune@gmail.com>
Date: Fri, 17 Jul 2020 12:53:08 +0900
Subject: [PATCH 586/771] [ValueTracking] Let isGuaranteedNotToBeUndefOrPoison
 consider noundef

 This patch adds support for noundef arguments.

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D83752
---
 llvm/lib/Analysis/ValueTracking.cpp                 |  6 ++++++
 llvm/test/Transforms/InstSimplify/freeze-noundef.ll | 13 +++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index bc8f02972de82..8d7bb1805a57d 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4766,6 +4766,12 @@ bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V,
   // TODO: Some instructions are guaranteed to return neither undef
   // nor poison if their arguments are not poison/undef.
 
+  if (auto *A = dyn_cast<Argument>(V)) {
+    // NoUndef does not guarantee that paddings are not undef.
+    if (A->hasAttribute(Attribute::NoUndef))
+      return true;
+  }
+
   if (auto *C = dyn_cast<Constant>(V)) {
     // TODO: We can analyze ConstExpr by opcode to determine if there is any
     //       possibility of poison.
diff --git a/llvm/test/Transforms/InstSimplify/freeze-noundef.ll b/llvm/test/Transforms/InstSimplify/freeze-noundef.ll
index 466d4bb6ff666..4244778910587 100644
--- a/llvm/test/Transforms/InstSimplify/freeze-noundef.ll
+++ b/llvm/test/Transforms/InstSimplify/freeze-noundef.ll
@@ -3,8 +3,7 @@
 
 define i8 @noundef(i8 noundef %x) {
 ; CHECK-LABEL: @noundef(
-; CHECK-NEXT:    [[Y:%.*]] = freeze i8 [[X:%.*]]
-; CHECK-NEXT:    ret i8 [[Y]]
+; CHECK-NEXT:    ret i8 [[X:%.*]]
 ;
   %y = freeze i8 %x
   ret i8 %y
@@ -13,14 +12,14 @@ define i8 @noundef(i8 noundef %x) {
 define i1 @icmp(i8 noundef %x, i8 noundef %y) {
 ; CHECK-LABEL: @icmp(
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[X:%.*]], [[Y:%.*]]
-; CHECK-NEXT:    [[F:%.*]] = freeze i1 [[C]]
-; CHECK-NEXT:    ret i1 [[F]]
+; CHECK-NEXT:    ret i1 [[C]]
 ;
   %c = icmp eq i8 %x, %y
   %f = freeze i1 %c
   ret i1 %f
 }
 
+; TODO: should look into binary operations
 define i1 @or(i1 noundef %x, i1 noundef %x2) {
 ; CHECK-LABEL: @or(
 ; CHECK-NEXT:    [[Y:%.*]] = or i1 [[X:%.*]], [[X2:%.*]]
@@ -43,6 +42,7 @@ define i1 @or2(i1 noundef %x, i1 %x2) {
   ret i1 %z
 }
 
+; TODO: should look into binary operations
 define i8 @add(i8 noundef %x) {
 ; CHECK-LABEL: @add(
 ; CHECK-NEXT:    [[Y:%.*]] = add i8 [[X:%.*]], 1
@@ -67,13 +67,13 @@ define i8 @addnsw(i8 noundef %x) {
 
 define {i8, i32} @aggr({i8, i32} noundef %x) {
 ; CHECK-LABEL: @aggr(
-; CHECK-NEXT:    [[Y:%.*]] = freeze { i8, i32 } [[X:%.*]]
-; CHECK-NEXT:    ret { i8, i32 } [[Y]]
+; CHECK-NEXT:    ret { i8, i32 } [[X:%.*]]
 ;
   %y = freeze {i8, i32} %x
   ret {i8, i32} %y
 }
 
+; TODO: should look into extract operations
 define i32 @extract({i8, i32} noundef %x) {
 ; CHECK-LABEL: @extract(
 ; CHECK-NEXT:    [[Y:%.*]] = extractvalue { i8, i32 } [[X:%.*]], 1
@@ -85,6 +85,7 @@ define i32 @extract({i8, i32} noundef %x) {
   ret i32 %z
 }
 
+; TODO: should look into extract operations
 define i32 @extract2({i8, {i8, i32}} noundef %x) {
 ; CHECK-LABEL: @extract2(
 ; CHECK-NEXT:    [[Y:%.*]] = extractvalue { i8, { i8, i32 } } [[X:%.*]], 1

From 1b3c25e7b61f44b80788f8758f0d7f0b013135b5 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Thu, 16 Jul 2020 21:38:49 -0700
Subject: [PATCH 587/771] [llvm] Add RISCVTargetParser.def to the module map

This fixes the modules build.
---
 llvm/include/llvm/module.modulemap | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/module.modulemap b/llvm/include/llvm/module.modulemap
index b262311a96a07..778a17c8aeee5 100644
--- a/llvm/include/llvm/module.modulemap
+++ b/llvm/include/llvm/module.modulemap
@@ -388,7 +388,7 @@ module LLVM_Utils {
 
     umbrella "Support"
     module * { export * }
-    
+
     // Exclude this; it should only be used on Windows.
     exclude header "Support/Windows/WindowsSupport.h"
 
@@ -397,8 +397,9 @@ module LLVM_Utils {
     exclude header "Support/Solaris/sys/regset.h"
 
     // These are intended for textual inclusion.
-    textual header "Support/ARMTargetParser.def"
     textual header "Support/AArch64TargetParser.def"
+    textual header "Support/ARMTargetParser.def"
+    textual header "Support/RISCVTargetParser.def"
     textual header "Support/TargetOpcodes.def"
     textual header "Support/X86TargetParser.def"
   }

From addbf732c83b54259d6aa2c6ed982099be0aeeaf Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 16 Jul 2020 20:14:20 -0700
Subject: [PATCH 588/771] [X86] Reorder how the subtarget map key is created.

We use a SmallString<512> and attempted to reserve enough space
for CPU plus Features, but that doesn't account for all the things
that get added to the string.

Reorder the string so the shortest things go first which shouldn't
exceed the small size. Finally add the feature string at the end
which might be long. This should ensure at most one heap allocation
without needing to use reserve.

I don't know if this matters much in practice, but I was looking
into something else that will require more code here and noticed
the odd reserve call.
---
 llvm/lib/Target/X86/X86TargetMachine.cpp | 54 +++++++++++++-----------
 1 file changed, 29 insertions(+), 25 deletions(-)

diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp
index 7344116e14af6..9a9ea245f7027 100644
--- a/llvm/lib/Target/X86/X86TargetMachine.cpp
+++ b/llvm/lib/Target/X86/X86TargetMachine.cpp
@@ -249,25 +249,10 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
                      : (StringRef)TargetFS;
 
   SmallString<512> Key;
-  Key.reserve(CPU.size() + FS.size());
-  Key += CPU;
-  Key += FS;
-
-  // FIXME: This is related to the code below to reset the target options,
-  // we need to know whether or not the soft float flag is set on the
-  // function before we can generate a subtarget. We also need to use
-  // it as a key for the subtarget since that can be the only difference
-  // between two functions.
-  bool SoftFloat =
-      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
-  // If the soft float attribute is set on the function turn on the soft float
-  // subtarget feature.
-  if (SoftFloat)
-    Key += FS.empty() ? "+soft-float" : ",+soft-float";
-
-  // Keep track of the key width after all features are added so we can extract
-  // the feature string out later.
-  unsigned CPUFSWidth = Key.size();
+  // The additions here are ordered so that the definitely short strings are
+  // added first so we won't exceed the small size. We append the
+  // much longer FS string at the end so that we only heap allocate at most
+  // one time.
 
   // Extract prefer-vector-width attribute.
   unsigned PreferVectorWidthOverride = 0;
@@ -275,7 +260,7 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
     StringRef Val = F.getFnAttribute("prefer-vector-width").getValueAsString();
     unsigned Width;
     if (!Val.getAsInteger(0, Width)) {
-      Key += ",prefer-vector-width=";
+      Key += "prefer-vector-width=";
       Key += Val;
       PreferVectorWidthOverride = Width;
     }
@@ -288,16 +273,35 @@ X86TargetMachine::getSubtargetImpl(const Function &F) const {
         F.getFnAttribute("min-legal-vector-width").getValueAsString();
     unsigned Width;
     if (!Val.getAsInteger(0, Width)) {
-      Key += ",min-legal-vector-width=";
+      Key += "min-legal-vector-width=";
       Key += Val;
       RequiredVectorWidth = Width;
     }
   }
 
-  // Extracted here so that we make sure there is backing for the StringRef. If
-  // we assigned earlier, its possible the SmallString reallocated leaving a
-  // dangling StringRef.
-  FS = Key.slice(CPU.size(), CPUFSWidth);
+  // Add CPU to the Key.
+  Key += CPU;
+
+  // Keep track of the start of the feature portion of the string.
+  unsigned FSStart = Key.size();
+
+  // FIXME: This is related to the code below to reset the target options,
+  // we need to know whether or not the soft float flag is set on the
+  // function before we can generate a subtarget. We also need to use
+  // it as a key for the subtarget since that can be the only difference
+  // between two functions.
+  bool SoftFloat =
+      F.getFnAttribute("use-soft-float").getValueAsString() == "true";
+  // If the soft float attribute is set on the function turn on the soft float
+  // subtarget feature.
+  if (SoftFloat)
+    Key += FS.empty() ? "+soft-float" : "+soft-float,";
+
+  Key += FS;
+
+  // We may have added +soft-float to the features so move the StringRef to
+  // point to the full string in the Key.
+  FS = Key.substr(FSStart);
 
   auto &I = SubtargetMap[Key];
   if (!I) {

From 6bba95831e480656124a5fbcd84f4f2a31e6c0b6 Mon Sep 17 00:00:00 2001
From: Craig Topper <craig.topper@intel.com>
Date: Thu, 16 Jul 2020 22:04:24 -0700
Subject: [PATCH 589/771] [X86] Change the scheduler model for 'pentium4' to
 SandyBridgeModel.

I meant to do this in D83913, but missed it while updating the
feature list.

Interestingly I think this is disabling the postRA scheduler. But
it does match our default 64-bit behavior.

Reviewed By: echristo

Differential Revision: https://reviews.llvm.org/D83996
---
 llvm/lib/Target/X86/X86.td                    |  2 +-
 .../CodeGen/X86/cfguard-x86-vectorcall.ll     | 43 +++++++++++--------
 llvm/test/CodeGen/X86/cmov-fp.ll              | 40 ++++++++---------
 llvm/test/CodeGen/X86/post-ra-sched.ll        | 19 +++++++-
 llvm/test/CodeGen/X86/pr34088.ll              |  8 ++--
 llvm/test/CodeGen/X86/pr40539.ll              |  2 +-
 llvm/test/DebugInfo/COFF/fpo-stack-protect.ll |  4 +-
 7 files changed, 71 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index 10d3007e5839b..8ca6dac036754 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -1080,7 +1080,7 @@ foreach P = ["pentium4", "pentium4m"] in {
   // Since 'pentium4' is the default 32-bit CPU on Linux and Windows,
   // give it more modern tunings.
   // FIXME: This wouldn't be needed if we supported mtune.
-  def : ProcessorModel<P, GenericPostRAModel,
+  def : ProcessorModel<P, SandyBridgeModel,
                        [FeatureX87, FeatureCMPXCHG8B,
                         FeatureMMX, FeatureSSE2, FeatureFXSR, FeatureNOPL,
                         FeatureCMOV, FeatureInsertVZEROUPPER,
diff --git a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
index 380c18fbf5c5b..4446f360ec042 100644
--- a/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
+++ b/llvm/test/CodeGen/X86/cfguard-x86-vectorcall.ll
@@ -1,9 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_rip
 ; RUN: llc < %s -mtriple=i686-pc-windows-msvc | FileCheck %s -check-prefix=X32
 ; Control Flow Guard is currently only available on Windows
 
 
 ; Test that Control Flow Guard checks are correctly added for x86 vector calls.
 define void @func_cf_vector_x86(void (%struct.HVA)* %0, %struct.HVA* %1) #0 {
+; X32-LABEL: func_cf_vector_x86:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    movl %esp, %ebp
+; X32-NEXT:    andl $-16, %esp
+; X32-NEXT:    subl $48, %esp
+; X32-NEXT:    movl 8(%ebp), %ecx
+; X32-NEXT:    movl 12(%ebp), %eax
+; X32-NEXT:    movups (%eax), %xmm0
+; X32-NEXT:    movups 16(%eax), %xmm1
+; X32-NEXT:    movaps %xmm0, (%esp)
+; X32-NEXT:    movaps %xmm1, 16(%esp)
+; X32-NEXT:    movsd (%esp), %xmm4
+; X32-NEXT:    movsd 8(%esp), %xmm5
+; X32-NEXT:    movsd 16(%esp), %xmm6
+; X32-NEXT:    movsd 24(%esp), %xmm7
+; X32-NEXT:    calll *___guard_check_icall_fptr
+; X32-NEXT:    movaps %xmm4, %xmm0
+; X32-NEXT:    movaps %xmm5, %xmm1
+; X32-NEXT:    movaps %xmm6, %xmm2
+; X32-NEXT:    movaps %xmm7, %xmm3
+; X32-NEXT:    calll *%ecx
+; X32-NEXT:    movl %ebp, %esp
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
 entry:
   %2 = alloca %struct.HVA, align 8
   %3 = bitcast %struct.HVA* %2 to i8*
@@ -13,23 +39,6 @@ entry:
   call x86_vectorcallcc void %0(%struct.HVA inreg %5)
   ret void
 
-  ; X32-LABEL: func_cf_vector_x86
-  ; X32: 	     movl 12(%ebp), %eax
-  ; X32: 	     movl 8(%ebp), %ecx
-  ; X32: 	     movups	(%eax), %xmm0
-  ; X32: 	     movups	16(%eax), %xmm1
-  ; X32: 	     movaps	%xmm0, (%esp)
-  ; X32: 	     movaps	%xmm1, 16(%esp)
-  ; X32: 	     movsd	(%esp), %xmm4
-  ; X32: 	     movsd	8(%esp), %xmm5
-  ; X32: 	     movsd	16(%esp), %xmm6
-  ; X32: 	     movsd	24(%esp), %xmm7
-  ; X32: 	     calll *___guard_check_icall_fptr
-  ; X32: 	     movaps %xmm4, %xmm0
-  ; X32: 	     movaps %xmm5, %xmm1
-  ; X32: 	     movaps %xmm6, %xmm2
-  ; X32: 	     movaps %xmm7, %xmm3
-  ; X32: 	     calll  *%ecx
 }
 attributes #0 = { "target-cpu"="pentium4" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" }
 
diff --git a/llvm/test/CodeGen/X86/cmov-fp.ll b/llvm/test/CodeGen/X86/cmov-fp.ll
index 756324bbdfdc9..6bbad427a9b6d 100644
--- a/llvm/test/CodeGen/X86/cmov-fp.ll
+++ b/llvm/test/CodeGen/X86/cmov-fp.ll
@@ -1056,11 +1056,11 @@ define float @test16(i32 %a, i32 %b, float %x) nounwind {
 define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test17:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    flds {{\.LCPI.*}}
 ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fcmovnbe %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1109,11 +1109,11 @@ define x86_fp80 @test17(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test18:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    flds {{\.LCPI.*}}
 ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fcmovnb %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1162,11 +1162,11 @@ define x86_fp80 @test18(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test19:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    flds {{\.LCPI.*}}
 ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fcmovb %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1215,11 +1215,11 @@ define x86_fp80 @test19(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test20:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    flds {{\.LCPI.*}}
 ; SSE-NEXT:    fxch %st(1)
-; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fcmovbe %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1268,13 +1268,13 @@ define x86_fp80 @test20(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test21:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    setg %al
 ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
 ; SSE-NEXT:    fcmovne %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1328,13 +1328,13 @@ define x86_fp80 @test21(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test22:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    setge %al
 ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
 ; SSE-NEXT:    fcmovne %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1387,13 +1387,13 @@ define x86_fp80 @test22(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test23:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    setl %al
 ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
 ; SSE-NEXT:    fcmovne %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
@@ -1446,13 +1446,13 @@ define x86_fp80 @test23(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 define x86_fp80 @test24(i32 %a, i32 %b, x86_fp80 %x) nounwind {
 ; SSE-LABEL: test24:
 ; SSE:       # %bb.0:
-; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    fldt {{[0-9]+}}(%esp)
-; SSE-NEXT:    flds {{\.LCPI.*}}
-; SSE-NEXT:    fxch %st(1)
+; SSE-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    cmpl {{[0-9]+}}(%esp), %eax
 ; SSE-NEXT:    setle %al
 ; SSE-NEXT:    testb %al, %al
+; SSE-NEXT:    flds {{\.LCPI.*}}
+; SSE-NEXT:    fxch %st(1)
 ; SSE-NEXT:    fcmovne %st(1), %st
 ; SSE-NEXT:    fstp %st(1)
 ; SSE-NEXT:    retl
diff --git a/llvm/test/CodeGen/X86/post-ra-sched.ll b/llvm/test/CodeGen/X86/post-ra-sched.ll
index f6de77a698835..70882fba50608 100644
--- a/llvm/test/CodeGen/X86/post-ra-sched.ll
+++ b/llvm/test/CodeGen/X86/post-ra-sched.ll
@@ -1,5 +1,6 @@
-; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s
-; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4 | FileCheck %s --check-prefix=PENTIUM4
+; RUN: llc < %s -mtriple=i386 -mcpu=pentium4m | FileCheck %s --check-prefix=PENTIUM4
 ; RUN: llc < %s -mtriple=i386 -mcpu=pentium-m | FileCheck %s
 ; RUN: llc < %s -mtriple=i386 -mcpu=prescott | FileCheck %s
 ; RUN: llc < %s -mtriple=i386 -mcpu=nocona | FileCheck %s
@@ -9,12 +10,26 @@
 ; happens during the post-RA-scheduler, which should be enabled by
 ; default with the above specified cpus.
 
+; Pentium4 is the default 32-bit CPU on Linux and currently has the postRA
+; scheduler disabled. Leaving the command lines in place in case we change that.
+
 @ptrs = external global [0 x i32*], align 4
 @idxa = common global i32 0, align 4
 @idxb = common global i32 0, align 4
 @res = common global i32 0, align 4
 
 define void @addindirect() {
+; PENTIUM4-LABEL: addindirect:
+; PENTIUM4:       # %bb.0: # %entry
+; PENTIUM4-NEXT:    movl idxa, %eax
+; PENTIUM4-NEXT:    movl ptrs(,%eax,4), %eax
+; PENTIUM4-NEXT:    movl idxb, %ecx
+; PENTIUM4-NEXT:    movl ptrs(,%ecx,4), %ecx
+; PENTIUM4-NEXT:    movl (%ecx), %ecx
+; PENTIUM4-NEXT:    addl (%eax), %ecx
+; PENTIUM4-NEXT:    movl %ecx, res
+; PENTIUM4-NEXT:    retl
+;
 ; CHECK-LABEL: addindirect:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movl idxb, %ecx
diff --git a/llvm/test/CodeGen/X86/pr34088.ll b/llvm/test/CodeGen/X86/pr34088.ll
index 6950e50dd7556..a57ff09cc037b 100644
--- a/llvm/test/CodeGen/X86/pr34088.ll
+++ b/llvm/test/CodeGen/X86/pr34088.ll
@@ -6,7 +6,7 @@
 %struct.Buffer = type { i8*, i32 }
 
 ; This test checks that the load of store %2 is not dropped.
-; 
+;
 define i32 @pr34088() local_unnamed_addr {
 ; CHECK-LABEL: pr34088:
 ; CHECK:       # %bb.0: # %entry
@@ -18,13 +18,13 @@ define i32 @pr34088() local_unnamed_addr {
 ; CHECK-NEXT:    andl $-16, %esp
 ; CHECK-NEXT:    subl $32, %esp
 ; CHECK-NEXT:    xorps %xmm0, %xmm0
-; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205]
-; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    movaps %xmm0, (%esp)
 ; CHECK-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
-; CHECK-NEXT:    movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
+; CHECK-NEXT:    movaps {{.*#+}} xmm1 = [205,205,205,205,205,205,205,205,205,205,205,205,205,205,205,205]
 ; CHECK-NEXT:    movaps %xmm1, (%esp)
+; CHECK-NEXT:    movl $-842150451, {{[0-9]+}}(%esp) # imm = 0xCDCDCDCD
 ; CHECK-NEXT:    movsd %xmm0, {{[0-9]+}}(%esp)
+; CHECK-NEXT:    xorl %eax, %eax
 ; CHECK-NEXT:    movl %ebp, %esp
 ; CHECK-NEXT:    popl %ebp
 ; CHECK-NEXT:    .cfi_def_cfa %esp, 4
diff --git a/llvm/test/CodeGen/X86/pr40539.ll b/llvm/test/CodeGen/X86/pr40539.ll
index f2135cd2e73b2..f52fec51203a8 100644
--- a/llvm/test/CodeGen/X86/pr40539.ll
+++ b/llvm/test/CodeGen/X86/pr40539.ll
@@ -40,7 +40,6 @@ define zeroext i1 @_Z8test_cosv() {
 ; CHECK-NEXT:    subl $8, %esp
 ; CHECK-NEXT:    .cfi_def_cfa_offset 12
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    divss {{\.LCPI.*}}, %xmm0
 ; CHECK-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
 ; CHECK-NEXT:    flds {{[0-9]+}}(%esp)
@@ -49,6 +48,7 @@ define zeroext i1 @_Z8test_cosv() {
 ; CHECK-NEXT:    #NO_APP
 ; CHECK-NEXT:    fstps (%esp)
 ; CHECK-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
+; CHECK-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
 ; CHECK-NEXT:    ucomiss %xmm0, %xmm1
 ; CHECK-NEXT:    setae %cl
 ; CHECK-NEXT:    ucomiss {{\.LCPI.*}}, %xmm0
diff --git a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
index 26fe7c49e7acf..c604234a60554 100644
--- a/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
+++ b/llvm/test/DebugInfo/COFF/fpo-stack-protect.ll
@@ -15,9 +15,9 @@
 ; CHECK:         subl    $20, %esp
 ; CHECK:         .cv_fpo_stackalloc      20
 ; CHECK:         .cv_fpo_endprologue
+; CHECK:         movl    28(%esp), %esi
 ; CHECK:         ___security_cookie
 
-; CHECK:         movl    28(%esp), %esi
 ; CHECK:         movl    %esi, {{[0-9]*}}(%esp)
 ; CHECK:         movl    %esi, {{[0-9]*}}(%esp)
 ; CHECK:         movl    %esi, {{[0-9]*}}(%esp)
@@ -30,7 +30,7 @@
 ; CHECK:         addl    $20, %esp
 ; CHECK:         popl    %esi
 ; CHECK:         retl
-; CHECK: Ltmp3:
+; CHECK: Ltmp2:
 ; CHECK:         .cv_fpo_endproc
 
 ; ModuleID = 't.c'

From 16926115ed28d1370bca1085dfb1b20c842b0ffb Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Fri, 17 Jul 2020 08:03:07 +0200
Subject: [PATCH 590/771] [lldb] Only set the executable module for a target
 once

Summary:

When we try to find the executable module for our target we don't check
if we already have an executable module set. This causes that when debugging
a program that dlopens another executable, LLDB will take that other executable
as the new executable of the target (which causes that future launches of the
target will launch the dlopen'd executable instead of the original executable).

This just adds a check that we only set the executable when we haven't already
found one.

Fixes rdar://63443099

Reviewers: jasonmolenda, jingham, teemperor

Reviewed By: jasonmolenda, teemperor

Subscribers: jingham, JDevlieghere

Differential Revision: https://reviews.llvm.org/D80724
---
 .../MacOSX-DYLD/DynamicLoaderDarwin.cpp       |  3 +-
 .../dlopen_other_executable/Makefile          |  8 ++++
 .../TestDlopenOtherExecutable.py              | 42 +++++++++++++++++++
 .../dlopen_other_executable/main.c            | 10 +++++
 .../dlopen_other_executable/other.c           |  1 +
 5 files changed, 63 insertions(+), 1 deletion(-)
 create mode 100644 lldb/test/API/functionalities/dlopen_other_executable/Makefile
 create mode 100644 lldb/test/API/functionalities/dlopen_other_executable/TestDlopenOtherExecutable.py
 create mode 100644 lldb/test/API/functionalities/dlopen_other_executable/main.c
 create mode 100644 lldb/test/API/functionalities/dlopen_other_executable/other.c

diff --git a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
index 7310043404348..569d84d39c807 100644
--- a/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
+++ b/lldb/source/Plugins/DynamicLoader/MacOSX-DYLD/DynamicLoaderDarwin.cpp
@@ -577,7 +577,8 @@ void DynamicLoaderDarwin::UpdateSpecialBinariesFromNewImageInfos(
     }
   }
 
-  if (exe_idx != UINT32_MAX) {
+  // Set the target executable if we haven't found one so far.
+  if (exe_idx != UINT32_MAX && !target.GetExecutableModule()) {
     const bool can_create = true;
     ModuleSP exe_module_sp(FindTargetModuleForImageInfo(image_infos[exe_idx],
                                                         can_create, nullptr));
diff --git a/lldb/test/API/functionalities/dlopen_other_executable/Makefile b/lldb/test/API/functionalities/dlopen_other_executable/Makefile
new file mode 100644
index 0000000000000..113b9fd7d3f18
--- /dev/null
+++ b/lldb/test/API/functionalities/dlopen_other_executable/Makefile
@@ -0,0 +1,8 @@
+C_SOURCES := main.c
+USE_LIBDL := 1
+
+other:
+	$(MAKE) -f $(MAKEFILE_RULES) C_SOURCES=other.c EXE=other
+all: other
+
+include Makefile.rules
diff --git a/lldb/test/API/functionalities/dlopen_other_executable/TestDlopenOtherExecutable.py b/lldb/test/API/functionalities/dlopen_other_executable/TestDlopenOtherExecutable.py
new file mode 100644
index 0000000000000..2ccfaeaea41af
--- /dev/null
+++ b/lldb/test/API/functionalities/dlopen_other_executable/TestDlopenOtherExecutable.py
@@ -0,0 +1,42 @@
+import lldb
+from lldbsuite.test.decorators import *
+from lldbsuite.test.lldbtest import *
+from lldbsuite.test import lldbutil
+
+class TestCase(TestBase):
+
+    mydir = TestBase.compute_mydir(__file__)
+
+    @skipIfRemote
+    @skipIfWindows
+    # glibc's dlopen doesn't support opening executables.
+    # https://sourceware.org/bugzilla/show_bug.cgi?id=11754
+    @skipIfLinux
+    @no_debug_info_test
+    def test(self):
+        self.build()
+        # Launch and stop before the dlopen call.
+        lldbutil.run_to_source_breakpoint(self, "// break here", lldb.SBFileSpec("main.c"))
+
+        # Delete the breakpoint we no longer need.
+        self.target().DeleteAllBreakpoints()
+
+        # Check that the executable is the test binary.
+        self.assertEqual(self.target().GetExecutable().GetFilename(), "a.out")
+
+        # Continue so that dlopen is called.
+        breakpoint = self.target().BreakpointCreateBySourceRegex(
+            "// break after dlopen", lldb.SBFileSpec("main.c"))
+        self.assertNotEqual(breakpoint.GetNumResolvedLocations(), 0)
+        stopped_threads = lldbutil.continue_to_breakpoint(self.process(), breakpoint)
+        self.assertEqual(len(stopped_threads), 1)
+
+        # Check that the executable is still the test binary and not "other".
+        self.assertEqual(self.target().GetExecutable().GetFilename(), "a.out")
+
+        # Kill the process and run the program again.
+        err = self.process().Kill()
+        self.assertTrue(err.Success(), str(err))
+
+        # Test that we hit the breakpoint after dlopen.
+        lldbutil.run_to_breakpoint_do_run(self, self.target(), breakpoint)
diff --git a/lldb/test/API/functionalities/dlopen_other_executable/main.c b/lldb/test/API/functionalities/dlopen_other_executable/main.c
new file mode 100644
index 0000000000000..8f21e862a2b58
--- /dev/null
+++ b/lldb/test/API/functionalities/dlopen_other_executable/main.c
@@ -0,0 +1,10 @@
+#include <dlfcn.h>
+#include <assert.h>
+
+int main() {
+  int i = 0; // break here
+  // dlopen the 'other' test executable.
+  int h = dlopen("other", RTLD_LAZY);
+  assert(h && "dlopen failed?");
+  return i; // break after dlopen
+}
diff --git a/lldb/test/API/functionalities/dlopen_other_executable/other.c b/lldb/test/API/functionalities/dlopen_other_executable/other.c
new file mode 100644
index 0000000000000..237c8ce181774
--- /dev/null
+++ b/lldb/test/API/functionalities/dlopen_other_executable/other.c
@@ -0,0 +1 @@
+int main() {}

From f76a0cd97aa2ee52dd7e0fccb0a6a33d3af2af77 Mon Sep 17 00:00:00 2001
From: Igor Kudrin <ikudrin@accesssoftek.com>
Date: Fri, 17 Jul 2020 12:46:47 +0700
Subject: [PATCH 591/771] [DebugInfo] Fix a misleading usage of DWARF forms
 with DIEExpr. NFCI.

For now, DIEExpr is used only in two places:

 1) in the debug info library unit test suite to emit
    a DW_AT_str_offsets_base attribute with the DW_FORM_sec_offset
    form, see dwarfgen::DIE::addStrOffsetsBaseAttribute();

 2) in DwarfCompileUnit::addLocationAttribute() to generate the location
    attribute for a TLS variable.

The later case used an incorrect DWARF form of DW_FORM_udata, which
implies storing an uleb128 value, not a 4/8 byte constant. The generated
result was as expected because DIEExpr::SizeOf() did not handle the used
form, but returned the size of the code pointer by default.

The patch fixes the issue by using more appropriate DWARF forms for
the problematic case and making DIEExpr::SizeOf() more straightforward.

Differential Revision: https://reviews.llvm.org/D83958
---
 llvm/lib/CodeGen/AsmPrinter/DIE.cpp              | 15 +++++++++++----
 llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp |  4 +++-
 2 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
index edf82fbed650a..713a15dd09391 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DIE.cpp
@@ -472,10 +472,17 @@ void DIEExpr::emitValue(const AsmPrinter *AP, dwarf::Form Form) const {
 /// SizeOf - Determine size of expression value in bytes.
 ///
 unsigned DIEExpr::SizeOf(const AsmPrinter *AP, dwarf::Form Form) const {
-  if (Form == dwarf::DW_FORM_data4) return 4;
-  if (Form == dwarf::DW_FORM_sec_offset) return 4;
-  if (Form == dwarf::DW_FORM_strp) return 4;
-  return AP->getPointerSize();
+  switch (Form) {
+  case dwarf::DW_FORM_data4:
+    return 4;
+  case dwarf::DW_FORM_data8:
+    return 8;
+  case dwarf::DW_FORM_sec_offset:
+    // FIXME: add support for DWARF64
+    return 4;
+  default:
+    llvm_unreachable("DIE Value form not supported yet");
+  }
 }
 
 LLVM_DUMP_METHOD
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index 296c380ae5508..d90e49c1c2511 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -260,7 +260,9 @@ void DwarfCompileUnit::addLocationAttribute(
                                      : dwarf::DW_OP_const8u);
             // 2) containing the (relocated) offset of the TLS variable
             //    within the module's TLS block.
-            addExpr(*Loc, dwarf::DW_FORM_udata,
+            addExpr(*Loc,
+                    PointerSize == 4 ? dwarf::DW_FORM_data4
+                                     : dwarf::DW_FORM_data8,
                     Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym));
           } else {
             addUInt(*Loc, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index);

From 4905536086ee47f26cd13d716eff8aa6424dfdd7 Mon Sep 17 00:00:00 2001
From: hsmahesha <mahesha.comp@gmail.com>
Date: Fri, 17 Jul 2020 11:40:10 +0530
Subject: [PATCH 592/771] Revert "[AMDGPU/MemOpsCluster] Implement new
 heuristic for computing max mem ops cluster size"

This reverts commit cc9d69385659be32178506a38b4f2e112ed01ad4.
---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  68 +++-
 .../AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll |  90 ++---
 .../CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll   |   2 +-
 llvm/test/CodeGen/AMDGPU/global-saddr.ll      |   2 +-
 llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll |  42 +-
 llvm/test/CodeGen/AMDGPU/kernel-args.ll       |   4 +-
 llvm/test/CodeGen/AMDGPU/memory_clause.ll     |  64 +--
 .../AMDGPU/promote-constOffset-to-imm.ll      |   9 +-
 llvm/test/CodeGen/AMDGPU/salu-to-valu.ll      |   2 +-
 llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll |  96 ++---
 llvm/test/CodeGen/AMDGPU/shift-i128.ll        | 380 +++++++++---------
 llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll | 254 ++++++------
 llvm/test/CodeGen/AMDGPU/trunc-store-i64.ll   |   8 +-
 llvm/test/CodeGen/AMDGPU/udivrem.ll           |  69 ++--
 14 files changed, 566 insertions(+), 524 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 84e091caed108..5d7cd5ffc4ce7 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -474,27 +474,65 @@ bool SIInstrInfo::shouldClusterMemOps(ArrayRef<const MachineOperand *> BaseOps1,
                                       ArrayRef<const MachineOperand *> BaseOps2,
                                       unsigned NumLoads,
                                       unsigned NumBytes) const {
-  // If current mem ops pair do not have same base pointer, then they cannot be
-  // clustered.
   assert(!BaseOps1.empty() && !BaseOps2.empty());
   const MachineInstr &FirstLdSt = *BaseOps1.front()->getParent();
   const MachineInstr &SecondLdSt = *BaseOps2.front()->getParent();
+
   if (!memOpsHaveSameBasePtr(FirstLdSt, BaseOps1, SecondLdSt, BaseOps2))
     return false;
 
-  // Compute max cluster size based on average number bytes clustered till now,
-  // and decide based on it, if current mem ops pair can be clustered or not.
-  assert((NumLoads > 0) && (NumBytes > 0) && (NumBytes >= NumLoads) &&
-         "Invalid NumLoads/NumBytes values");
-  unsigned MaxNumLoads;
-  if (NumBytes <= 4 * NumLoads) {
-    // Loads are dword or smaller (on average).
-    MaxNumLoads = 5;
-  } else {
-    // Loads are bigger than a dword (on average).
-    MaxNumLoads = 4;
-  }
-  return NumLoads <= MaxNumLoads;
+  const MachineOperand *FirstDst = nullptr;
+  const MachineOperand *SecondDst = nullptr;
+
+  if ((isMUBUF(FirstLdSt) && isMUBUF(SecondLdSt)) ||
+      (isMTBUF(FirstLdSt) && isMTBUF(SecondLdSt)) ||
+      (isMIMG(FirstLdSt) && isMIMG(SecondLdSt)) ||
+      (isFLAT(FirstLdSt) && isFLAT(SecondLdSt))) {
+    const unsigned MaxGlobalLoadCluster = 7;
+    if (NumLoads > MaxGlobalLoadCluster)
+      return false;
+
+    FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdata);
+    if (!FirstDst)
+      FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
+    SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdata);
+    if (!SecondDst)
+      SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
+  } else if (isSMRD(FirstLdSt) && isSMRD(SecondLdSt)) {
+    FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::sdst);
+    SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::sdst);
+  } else if (isDS(FirstLdSt) && isDS(SecondLdSt)) {
+    FirstDst = getNamedOperand(FirstLdSt, AMDGPU::OpName::vdst);
+    SecondDst = getNamedOperand(SecondLdSt, AMDGPU::OpName::vdst);
+  }
+
+  if (!FirstDst || !SecondDst)
+    return false;
+
+  // Try to limit clustering based on the total number of bytes loaded
+  // rather than the number of instructions.  This is done to help reduce
+  // register pressure.  The method used is somewhat inexact, though,
+  // because it assumes that all loads in the cluster will load the
+  // same number of bytes as FirstLdSt.
+
+  // The unit of this value is bytes.
+  // FIXME: This needs finer tuning.
+  unsigned LoadClusterThreshold = 16;
+
+  const MachineRegisterInfo &MRI =
+      FirstLdSt.getParent()->getParent()->getRegInfo();
+
+  const Register Reg = FirstDst->getReg();
+
+  const TargetRegisterClass *DstRC = Register::isVirtualRegister(Reg)
+                                         ? MRI.getRegClass(Reg)
+                                         : RI.getPhysRegClass(Reg);
+
+  // FIXME: NumLoads should not be subtracted 1. This is to match behavior
+  // of clusterNeighboringMemOps which was previosly passing cluster length
+  // less 1. LoadClusterThreshold should be tuned instead.
+  return ((NumLoads - 1) * (RI.getRegSizeInBits(*DstRC) / 8)) <=
+         LoadClusterThreshold;
 }
 
 // FIXME: This behaves strangely. If, for example, you have 32 load + stores,
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
index 7b375641f7295..524482df53569 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
@@ -235,17 +235,17 @@ define amdgpu_kernel void @test_div_fmas_f32(float addrspace(1)* %out, [8 x i32]
 ;
 ; GFX8-LABEL: test_div_fmas_f32:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:	s_load_dword s2, s[0:1], 0xb8
-; GFX8-NEXT:	s_load_dword s3, s[0:1], 0x4c
-; GFX8-NEXT:	s_load_dword s4, s[0:1], 0x70
-; GFX8-NEXT:	s_load_dword s5, s[0:1], 0x94
-; GFX8-NEXT:	s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX8-NEXT:	s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:	s_and_b32 s2, 1, s2
-; GFX8-NEXT:	v_mov_b32_e32 v0, s3
-; GFX8-NEXT:	v_mov_b32_e32 v1, s4
-; GFX8-NEXT:	v_mov_b32_e32 v2, s5
-; GFX8-NEXT:	v_cmp_ne_u32_e64 vcc, 0, s2
+; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x4c
+; GFX8-NEXT:    s_load_dword s3, s[0:1], 0x70
+; GFX8-NEXT:    s_load_dword s4, s[0:1], 0x94
+; GFX8-NEXT:    s_load_dword s5, s[0:1], 0xb8
+; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8-NEXT:    v_mov_b32_e32 v1, s3
+; GFX8-NEXT:    v_mov_b32_e32 v2, s4
+; GFX8-NEXT:    s_and_b32 s2, 1, s5
+; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
 ; GFX8-NEXT:    s_nop 3
 ; GFX8-NEXT:    v_div_fmas_f32 v2, v0, v1, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
@@ -527,43 +527,43 @@ define amdgpu_kernel void @test_div_fmas_f32_inline_imm_2(float addrspace(1)* %o
 define amdgpu_kernel void @test_div_fmas_f64(double addrspace(1)* %out, double %a, double %b, double %c, i1 %d) {
 ; GFX7-LABEL: test_div_fmas_f64:
 ; GFX7:       ; %bb.0:
-; GFX7-NEXT:	s_load_dwordx8 s[4:11], s[0:1], 0x9
-; GFX7-NEXT:	s_load_dword s0, s[0:1], 0x11
-; GFX7-NEXT:	s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:	v_mov_b32_e32 v0, s6
-; GFX7-NEXT:	v_mov_b32_e32 v2, s8
-; GFX7-NEXT:	v_mov_b32_e32 v4, s10
-; GFX7-NEXT:	s_and_b32 s0, 1, s0
-; GFX7-NEXT:	v_mov_b32_e32 v1, s7
-; GFX7-NEXT:	v_mov_b32_e32 v3, s9
-; GFX7-NEXT:	v_mov_b32_e32 v5, s11
-; GFX7-NEXT:	v_cmp_ne_u32_e64 vcc, 0, s0
-; GFX7-NEXT:	s_nop 3
-; GFX7-NEXT:	v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; GFX7-NEXT:	v_mov_b32_e32 v2, s4
-; GFX7-NEXT:	v_mov_b32_e32 v3, s5
-; GFX7-NEXT:	flat_store_dwordx2 v[2:3], v[0:1]
-; GFX7-NEXT:	s_endpgm
+; GFX7-NEXT:    s_load_dword s8, s[0:1], 0x11
+; GFX7-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
+; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX7-NEXT:    v_mov_b32_e32 v0, s2
+; GFX7-NEXT:    v_mov_b32_e32 v1, s3
+; GFX7-NEXT:    v_mov_b32_e32 v2, s4
+; GFX7-NEXT:    v_mov_b32_e32 v4, s6
+; GFX7-NEXT:    s_and_b32 s2, 1, s8
+; GFX7-NEXT:    v_mov_b32_e32 v3, s5
+; GFX7-NEXT:    v_mov_b32_e32 v5, s7
+; GFX7-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
+; GFX7-NEXT:    s_nop 3
+; GFX7-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX7-NEXT:    v_mov_b32_e32 v3, s1
+; GFX7-NEXT:    v_mov_b32_e32 v2, s0
+; GFX7-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GFX7-NEXT:    s_endpgm
 ;
 ; GFX8-LABEL: test_div_fmas_f64:
 ; GFX8:       ; %bb.0:
-; GFX8-NEXT:	s_load_dwordx8 s[4:11], s[0:1], 0x24
-; GFX8-NEXT:	s_load_dword s0, s[0:1], 0x44
-; GFX8-NEXT:	s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:	v_mov_b32_e32 v0, s6
-; GFX8-NEXT:	v_mov_b32_e32 v2, s8
-; GFX8-NEXT:	v_mov_b32_e32 v4, s10
-; GFX8-NEXT:	s_and_b32 s0, 1, s0
-; GFX8-NEXT:	v_mov_b32_e32 v1, s7
-; GFX8-NEXT:	v_mov_b32_e32 v3, s9
-; GFX8-NEXT:	v_mov_b32_e32 v5, s11
-; GFX8-NEXT:	v_cmp_ne_u32_e64 vcc, 0, s0
-; GFX8-NEXT:	s_nop 3
-; GFX8-NEXT:	v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
-; GFX8-NEXT:	v_mov_b32_e32 v2, s4
-; GFX8-NEXT:	v_mov_b32_e32 v3, s5
-; GFX8-NEXT:	flat_store_dwordx2 v[2:3], v[0:1]
-; GFX8-NEXT:	s_endpgm
+; GFX8-NEXT:    s_load_dword s8, s[0:1], 0x44
+; GFX8-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
+; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX8-NEXT:    v_mov_b32_e32 v0, s2
+; GFX8-NEXT:    v_mov_b32_e32 v1, s3
+; GFX8-NEXT:    v_mov_b32_e32 v2, s4
+; GFX8-NEXT:    v_mov_b32_e32 v4, s6
+; GFX8-NEXT:    s_and_b32 s2, 1, s8
+; GFX8-NEXT:    v_mov_b32_e32 v3, s5
+; GFX8-NEXT:    v_mov_b32_e32 v5, s7
+; GFX8-NEXT:    v_cmp_ne_u32_e64 vcc, 0, s2
+; GFX8-NEXT:    s_nop 3
+; GFX8-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX8-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8-NEXT:    v_mov_b32_e32 v2, s0
+; GFX8-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; GFX8-NEXT:    s_endpgm
 ;
 ; GFX10_W32-LABEL: test_div_fmas_f64:
 ; GFX10_W32:       ; %bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll b/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll
index 8f4f1c3915351..3d75eca93cb48 100644
--- a/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdhsa-trap-num-sgprs.ll
@@ -3,7 +3,7 @@
 
 ; GCN-LABEL: {{^}}amdhsa_trap_num_sgprs
 ; TRAP-HANDLER-ENABLE:  NumSgprs: 61
-; TRAP-HANDLER-DISABLE: NumSgprs: 77
+; TRAP-HANDLER-DISABLE: NumSgprs: 79
 define amdgpu_kernel void @amdhsa_trap_num_sgprs(
     i32 addrspace(1)* %out0, i32 %in0,
     i32 addrspace(1)* %out1, i32 %in1,
diff --git a/llvm/test/CodeGen/AMDGPU/global-saddr.ll b/llvm/test/CodeGen/AMDGPU/global-saddr.ll
index 8a3d1d3053f9b..136cfd63686c8 100644
--- a/llvm/test/CodeGen/AMDGPU/global-saddr.ll
+++ b/llvm/test/CodeGen/AMDGPU/global-saddr.ll
@@ -46,8 +46,8 @@ entry:
 
 ; Test various offset boundaries.
 ; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:4088{{$}}
+; GFX9: global_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:2040{{$}}
 ; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:4088{{$}}
-; GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:2056{{$}}
   %gep11 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 511
   %load11 = load i64, i64 addrspace(1)* %gep11
   %gep12 = getelementptr inbounds i64, i64 addrspace(1)* %gep, i64 1023
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
index 15643d4b67f76..817e3e5ca28c7 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.ll
@@ -681,27 +681,27 @@ define amdgpu_kernel void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %
 ;
 ; VI-LABEL: dynamic_insertelement_v4i32:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x0
-; VI-NEXT:    s_load_dwordx4 s[8:11], s[4:5], 0x10
-; VI-NEXT:    s_load_dword s6, s[4:5], 0x20
-; VI-NEXT:    s_load_dword s4, s[4:5], 0x44
-; VI-NEXT:    s_mov_b32 s3, 0x1100f000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_cmp_eq_u32 s6, 3
-; VI-NEXT:    s_cselect_b32 s5, s4, s11
-; VI-NEXT:    s_cmp_eq_u32 s6, 2
-; VI-NEXT:    s_cselect_b32 s7, s4, s10
-; VI-NEXT:    s_cmp_eq_u32 s6, 1
-; VI-NEXT:    s_cselect_b32 s9, s4, s9
-; VI-NEXT:    s_cmp_eq_u32 s6, 0
-; VI-NEXT:    s_cselect_b32 s4, s4, s8
-; VI-NEXT:    v_mov_b32_e32 v0, s4
-; VI-NEXT:    v_mov_b32_e32 v1, s9
-; VI-NEXT:    v_mov_b32_e32 v2, s7
-; VI-NEXT:    v_mov_b32_e32 v3, s5
-; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
-; VI-NEXT:    s_endpgm
+; VI-NEXT:        s_load_dwordx2 s[0:1], s[4:5], 0x0
+; VI-NEXT:        s_load_dwordx4 s[8:11], s[4:5], 0x10
+; VI-NEXT:        s_load_dword s6, s[4:5], 0x20
+; VI-NEXT:        s_load_dword s4, s[4:5], 0x44
+; VI-NEXT:        s_mov_b32 s3, 0x1100f000
+; VI-NEXT:        s_mov_b32 s2, -1
+; VI-NEXT:        s_waitcnt lgkmcnt(0)
+; VI-NEXT:        s_cmp_eq_u32 s6, 3
+; VI-NEXT:        s_cselect_b32 s5, s4, s11
+; VI-NEXT:        s_cmp_eq_u32 s6, 2
+; VI-NEXT:        s_cselect_b32 s7, s4, s10
+; VI-NEXT:        s_cmp_eq_u32 s6, 1
+; VI-NEXT:        s_cselect_b32 s9, s4, s9
+; VI-NEXT:        s_cmp_eq_u32 s6, 0
+; VI-NEXT:        s_cselect_b32 s4, s4, s8
+; VI-NEXT:        v_mov_b32_e32 v0, s4
+; VI-NEXT:        v_mov_b32_e32 v1, s9
+; VI-NEXT:        v_mov_b32_e32 v2, s7
+; VI-NEXT:        v_mov_b32_e32 v3, s5
+; VI-NEXT:        buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-NEXT:        s_endpgm
   %vecins = insertelement <4 x i32> %a, i32 %val, i32 %b
   store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-args.ll b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
index 38fddcafaa290..49c2bf08ba3e1 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-args.ll
+++ b/llvm/test/CodeGen/AMDGPU/kernel-args.ll
@@ -855,10 +855,10 @@ define amdgpu_kernel void @struct_argument_alignment({i32, i64} %arg0, i8, {i32,
 ; multiple.
 ; FUNC-LABEL: {{^}}packed_struct_argument_alignment:
 ; HSA-GFX9: kernarg_segment_byte_size = 28
-; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
-; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4
 ; HSA-GFX9: global_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:17
 ; HSA-GFX9: global_load_dword v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:13
+; HSA-GFX9: s_load_dword s{{[0-9]+}}, s[4:5], 0x0
+; HSA-GFX9: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s[4:5], 0x4
 define amdgpu_kernel void @packed_struct_argument_alignment(<{i32, i64}> %arg0, i8, <{i32, i64}> %arg1) {
   %val0 = extractvalue <{i32, i64}> %arg0, 0
   %val1 = extractvalue <{i32, i64}> %arg0, 1
diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll
index bc4c0d03db932..5a435f01925c6 100644
--- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll
@@ -51,38 +51,38 @@ bb:
 define amdgpu_kernel void @scalar_clause(<4 x i32> addrspace(1)* noalias nocapture readonly %arg, <4 x i32> addrspace(1)* noalias nocapture %arg1) {
 ; GCN-LABEL: scalar_clause:
 ; GCN:       ; %bb.0: ; %bb
-; GCN-NEXT:	s_load_dwordx2 s[16:17], s[0:1], 0x24
-; GCN-NEXT:	s_load_dwordx2 s[18:19], s[0:1], 0x2c
-; GCN-NEXT:	s_nop 0
-; GCN-NEXT:	s_waitcnt lgkmcnt(0)
-; GCN-NEXT:	s_load_dwordx4 s[0:3], s[16:17], 0x0
-; GCN-NEXT:	s_load_dwordx4 s[4:7], s[16:17], 0x10
-; GCN-NEXT:	s_load_dwordx4 s[8:11], s[16:17], 0x20
-; GCN-NEXT:	s_load_dwordx4 s[12:15], s[16:17], 0x30
-; GCN-NEXT:	v_mov_b32_e32 v16, s18
-; GCN-NEXT:	s_waitcnt lgkmcnt(0)
-; GCN-NEXT:	v_mov_b32_e32 v0, s0
-; GCN-NEXT:	v_mov_b32_e32 v4, s4
-; GCN-NEXT:	v_mov_b32_e32 v8, s8
-; GCN-NEXT:	v_mov_b32_e32 v12, s12
-; GCN-NEXT:	v_mov_b32_e32 v17, s19
-; GCN-NEXT:	v_mov_b32_e32 v1, s1
-; GCN-NEXT:	v_mov_b32_e32 v2, s2
-; GCN-NEXT:	v_mov_b32_e32 v3, s3
-; GCN-NEXT:	v_mov_b32_e32 v5, s5
-; GCN-NEXT:	v_mov_b32_e32 v6, s6
-; GCN-NEXT:	v_mov_b32_e32 v7, s7
-; GCN-NEXT:	v_mov_b32_e32 v9, s9
-; GCN-NEXT:	v_mov_b32_e32 v10, s10
-; GCN-NEXT:	v_mov_b32_e32 v11, s11
-; GCN-NEXT:	v_mov_b32_e32 v13, s13
-; GCN-NEXT:	v_mov_b32_e32 v14, s14
-; GCN-NEXT:	v_mov_b32_e32 v15, s15
-; GCN-NEXT:	global_store_dwordx4 v[16:17], v[0:3], off
-; GCN-NEXT:	global_store_dwordx4 v[16:17], v[4:7], off offset:16
-; GCN-NEXT:	global_store_dwordx4 v[16:17], v[8:11], off offset:32
-; GCN-NEXT:	global_store_dwordx4 v[16:17], v[12:15], off offset:48
-; GCN-NEXT:	s_endpgm
+; GCN-NEXT:    s_load_dwordx2 s[16:17], s[0:1], 0x24
+; GCN-NEXT:    s_load_dwordx2 s[18:19], s[0:1], 0x2c
+; GCN-NEXT:    s_nop 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[16:17], 0x0
+; GCN-NEXT:    s_load_dwordx4 s[4:7], s[16:17], 0x10
+; GCN-NEXT:    s_load_dwordx4 s[8:11], s[16:17], 0x20
+; GCN-NEXT:    s_load_dwordx4 s[12:15], s[16:17], 0x30
+; GCN-NEXT:    v_mov_b32_e32 v12, s18
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    v_mov_b32_e32 v4, s4
+; GCN-NEXT:    v_mov_b32_e32 v8, s8
+; GCN-NEXT:    v_mov_b32_e32 v13, s19
+; GCN-NEXT:    v_mov_b32_e32 v1, s1
+; GCN-NEXT:    v_mov_b32_e32 v2, s2
+; GCN-NEXT:    v_mov_b32_e32 v3, s3
+; GCN-NEXT:    v_mov_b32_e32 v5, s5
+; GCN-NEXT:    v_mov_b32_e32 v6, s6
+; GCN-NEXT:    v_mov_b32_e32 v7, s7
+; GCN-NEXT:    global_store_dwordx4 v[12:13], v[0:3], off
+; GCN-NEXT:    global_store_dwordx4 v[12:13], v[4:7], off offset:16
+; GCN-NEXT:    v_mov_b32_e32 v0, s12
+; GCN-NEXT:    v_mov_b32_e32 v9, s9
+; GCN-NEXT:    v_mov_b32_e32 v10, s10
+; GCN-NEXT:    v_mov_b32_e32 v11, s11
+; GCN-NEXT:    v_mov_b32_e32 v1, s13
+; GCN-NEXT:    v_mov_b32_e32 v2, s14
+; GCN-NEXT:    v_mov_b32_e32 v3, s15
+; GCN-NEXT:    global_store_dwordx4 v[12:13], v[8:11], off offset:32
+; GCN-NEXT:    global_store_dwordx4 v[12:13], v[0:3], off offset:48
+; GCN-NEXT:    s_endpgm
 bb:
   %tmp = load <4 x i32>, <4 x i32> addrspace(1)* %arg, align 16
   %tmp2 = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(1)* %arg, i64 1
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index 4c6fa2f0f4c8c..c7ae08c839ee4 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -17,8 +17,8 @@ define amdgpu_kernel void @clmem_read_simplified(i8 addrspace(1)*  %buffer) {
 ;
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
@@ -86,7 +86,6 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)*  %buffer) {
 ; GFX8:    flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 ; GFX8:    flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 ;
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
@@ -96,8 +95,10 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)*  %buffer) {
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
 ;
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
@@ -299,9 +300,9 @@ define amdgpu_kernel void @Offset64(i8 addrspace(1)*  %buffer) {
 ; GFX8:    flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 ;
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ;
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
@@ -455,11 +456,11 @@ define amdgpu_kernel void @ReverseOrder(i8 addrspace(1)* %buffer) {
 ;
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
 ;
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
index 091ed34e11121..14635ab7e708f 100644
--- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
+++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
@@ -203,10 +203,10 @@ entry:
 ; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:
 
 ; SI: s_mov_b32 {{s[0-9]+}}, 0x13480
-; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64
 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32
 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64
 ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
 ; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
 ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
index d040a04877e6f..f423672b8da5c 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-control-flow.ll
@@ -11,27 +11,28 @@
 define amdgpu_kernel void @sgpr_if_else_salu_br(i32 addrspace(1)* %out, i32 %a, i32 %b, i32 %c, i32 %d, i32 %e) {
 ; SI-LABEL: sgpr_if_else_salu_br:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0xb
-; SI-NEXT:    s_load_dword s2, s[0:1], 0xf
-; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_cmp_lg_u32 s4, 0
-; SI-NEXT:    s_cbranch_scc0 BB0_2
-; SI-NEXT:  ; %bb.1: ; %else
-; SI-NEXT:    s_add_i32 s2, s7, s2
-; SI-NEXT:    s_cbranch_execz BB0_3
-; SI-NEXT:    s_branch BB0_4
-; SI-NEXT:  BB0_2:
-; SI-NEXT:    ; implicit-def: $sgpr2
-; SI-NEXT:  BB0_3: ; %if
-; SI-NEXT:    s_sub_i32 s2, s5, s6
-; SI-NEXT:  BB0_4: ; %endif
-; SI-NEXT:    s_add_i32 s4, s2, s4
-; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s2, -1
-; SI-NEXT:    v_mov_b32_e32 v0, s4
-; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
-; SI-NEXT:    s_endpgm
+; SI-NEXT:        s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:        s_load_dwordx4 s[8:11], s[0:1], 0xb
+; SI-NEXT:        s_load_dword s0, s[0:1], 0xf
+; SI-NEXT:        s_waitcnt lgkmcnt(0)
+; SI-NEXT:        s_cmp_lg_u32 s8, 0
+; SI-NEXT:        s_cbranch_scc0 BB0_2
+; SI-NEXT:; %bb.1:                                ; %else
+; SI-NEXT:        s_add_i32 s0, s11, s0
+; SI-NEXT:        s_cbranch_execz BB0_3
+; SI-NEXT:        s_branch BB0_4
+; SI-NEXT:BB0_2:
+; SI-NEXT:                                        ; implicit-def: $sgpr0
+; SI-NEXT:BB0_3:                                  ; %if
+; SI-NEXT:        s_sub_i32 s0, s9, s10
+; SI-NEXT:BB0_4:                                  ; %endif
+; SI-NEXT:        s_add_i32 s0, s0, s8
+; SI-NEXT:        s_mov_b32 s7, 0xf000
+; SI-NEXT:        s_mov_b32 s6, -1
+; SI-NEXT:        v_mov_b32_e32 v0, s0
+; SI-NEXT:        buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:        s_endpgm
+
 entry:
   %0 = icmp eq i32 %a, 0
   br i1 %0, label %if, label %else
@@ -54,32 +55,33 @@ endif:
 define amdgpu_kernel void @sgpr_if_else_salu_br_opt(i32 addrspace(1)* %out, [8 x i32], i32 %a, [8 x i32], i32 %b, [8 x i32], i32 %c, [8 x i32], i32 %d, [8 x i32], i32 %e) {
 ; SI-LABEL: sgpr_if_else_salu_br_opt:
 ; SI:       ; %bb.0: ; %entry
-; SI-NEXT:    s_load_dword s2, s[0:1], 0x13
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_cmp_lg_u32 s2, 0
-; SI-NEXT:    s_cbranch_scc0 BB1_2
-; SI-NEXT:  ; %bb.1: ; %else
-; SI-NEXT:    s_load_dword s3, s[0:1], 0x2e
-; SI-NEXT:    s_load_dword s6, s[0:1], 0x37
-; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_add_i32 s3, s3, s6
-; SI-NEXT:    s_cbranch_execz BB1_3
-; SI-NEXT:    s_branch BB1_4
-; SI-NEXT:  BB1_2:
-; SI-NEXT:    ; implicit-def: $sgpr3
-; SI-NEXT:  BB1_3: ; %if
-; SI-NEXT:    s_load_dword s3, s[0:1], 0x1c
-; SI-NEXT:    s_load_dword s0, s[0:1], 0x25
-; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_add_i32 s3, s3, s0
-; SI-NEXT:  BB1_4: ; %endif
-; SI-NEXT:    s_add_i32 s0, s3, s2
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    v_mov_b32_e32 v0, s0
-; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
-; SI-NEXT:    s_endpgm
+; SI-NEXT:        s_load_dword s2, s[0:1], 0x13
+; SI-NEXT:        s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:        s_waitcnt lgkmcnt(0)
+; SI-NEXT:        s_cmp_lg_u32 s2, 0
+; SI-NEXT:        s_cbranch_scc0 BB1_2
+; SI-NEXT:; %bb.1:                                ; %else
+; SI-NEXT:        s_load_dword s3, s[0:1], 0x2e
+; SI-NEXT:        s_load_dword s6, s[0:1], 0x37
+; SI-NEXT:        s_waitcnt lgkmcnt(0)
+; SI-NEXT:        s_add_i32 s3, s3, s6
+; SI-NEXT:        s_cbranch_execz BB1_3
+; SI-NEXT:        s_branch BB1_4
+; SI-NEXT:BB1_2:
+; SI-NEXT:                                        ; implicit-def: $sgpr3
+; SI-NEXT:BB1_3:                                  ; %if
+; SI-NEXT:        s_load_dword s3, s[0:1], 0x1c
+; SI-NEXT:        s_load_dword s0, s[0:1], 0x25
+; SI-NEXT:        s_waitcnt lgkmcnt(0)
+; SI-NEXT:        s_add_i32 s3, s3, s0
+; SI-NEXT:BB1_4:                                  ; %endif
+; SI-NEXT:        s_add_i32 s0, s3, s2
+; SI-NEXT:        s_mov_b32 s7, 0xf000
+; SI-NEXT:        s_mov_b32 s6, -1
+; SI-NEXT:        v_mov_b32_e32 v0, s0
+; SI-NEXT:        buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:        s_endpgm
+
 entry:
   %cmp0 = icmp eq i32 %a, 0
   br i1 %cmp0, label %if, label %else
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i128.ll b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
index 59aebaeed56e9..f2077aa2a1ad0 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i128.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i128.ll
@@ -446,68 +446,68 @@ define <2 x i128> @v_ashr_v2i128_vv(<2 x i128> %lhs, <2 x i128> %rhs) {
 define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-LABEL: s_shl_v2i128ss:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:	s_load_dwordx8 s[8:15], s[4:5], 0x0
-; GCN-NEXT:	s_load_dwordx8 s[16:23], s[4:5], 0x8
-; GCN-NEXT:	v_mov_b32_e32 v10, 16
-; GCN-NEXT:	v_mov_b32_e32 v8, 0
-; GCN-NEXT:	v_mov_b32_e32 v11, 0
-; GCN-NEXT:	v_mov_b32_e32 v9, 0
-; GCN-NEXT:	s_waitcnt lgkmcnt(0)
-; GCN-NEXT:	s_sub_i32 s6, 64, s16
-; GCN-NEXT:	v_cmp_lt_u64_e64 s[0:1], s[16:17], 64
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[2:3], s[18:19], 0
-; GCN-NEXT:	s_sub_i32 s4, s16, 64
-; GCN-NEXT:	s_lshr_b64 s[6:7], s[8:9], s6
-; GCN-NEXT:	s_lshl_b64 s[24:25], s[10:11], s16
-; GCN-NEXT:	s_and_b64 vcc, s[2:3], s[0:1]
-; GCN-NEXT:	s_or_b64 s[0:1], s[16:17], s[18:19]
-; GCN-NEXT:	s_lshl_b64 s[4:5], s[8:9], s4
-; GCN-NEXT:	s_or_b64 s[6:7], s[24:25], s[6:7]
-; GCN-NEXT:	v_mov_b32_e32 v0, s5
-; GCN-NEXT:	v_mov_b32_e32 v1, s7
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[0:1], s[0:1], 0
-; GCN-NEXT:	v_cndmask_b32_e32 v0, v0, v1, vcc
-; GCN-NEXT:	v_mov_b32_e32 v1, s11
-; GCN-NEXT:	v_cndmask_b32_e64 v3, v0, v1, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v0, s4
-; GCN-NEXT:	v_mov_b32_e32 v1, s6
-; GCN-NEXT:	v_cndmask_b32_e32 v0, v0, v1, vcc
-; GCN-NEXT:	v_mov_b32_e32 v1, s10
-; GCN-NEXT:	s_sub_i32 s6, 64, s20
-; GCN-NEXT:	v_cndmask_b32_e64 v2, v0, v1, s[0:1]
-; GCN-NEXT:	v_cmp_lt_u64_e64 s[0:1], s[20:21], 64
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[2:3], s[22:23], 0
-; GCN-NEXT:	s_sub_i32 s4, s20, 64
-; GCN-NEXT:	s_lshr_b64 s[6:7], s[12:13], s6
-; GCN-NEXT:	s_lshl_b64 s[10:11], s[14:15], s20
-; GCN-NEXT:	s_lshl_b64 s[4:5], s[12:13], s4
-; GCN-NEXT:	s_or_b64 s[6:7], s[10:11], s[6:7]
-; GCN-NEXT:	s_and_b64 s[0:1], s[2:3], s[0:1]
-; GCN-NEXT:	s_or_b64 s[2:3], s[20:21], s[22:23]
-; GCN-NEXT:	v_mov_b32_e32 v0, s5
-; GCN-NEXT:	v_mov_b32_e32 v1, s7
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[2:3], s[2:3], 0
-; GCN-NEXT:	v_cndmask_b32_e64 v0, v0, v1, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v1, s15
-; GCN-NEXT:	v_cndmask_b32_e64 v7, v0, v1, s[2:3]
-; GCN-NEXT:	v_mov_b32_e32 v0, s4
-; GCN-NEXT:	v_mov_b32_e32 v1, s6
-; GCN-NEXT:	v_cndmask_b32_e64 v0, v0, v1, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v1, s14
-; GCN-NEXT:	v_cndmask_b32_e64 v6, v0, v1, s[2:3]
-; GCN-NEXT:	s_lshl_b64 s[2:3], s[8:9], s16
-; GCN-NEXT:	v_mov_b32_e32 v0, s3
-; GCN-NEXT:	v_cndmask_b32_e32 v1, 0, v0, vcc
-; GCN-NEXT:	v_mov_b32_e32 v0, s2
-; GCN-NEXT:	s_lshl_b64 s[2:3], s[12:13], s20
-; GCN-NEXT:	v_mov_b32_e32 v4, s3
-; GCN-NEXT:	v_cndmask_b32_e64 v5, 0, v4, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v4, s2
-; GCN-NEXT:	v_cndmask_b32_e64 v4, 0, v4, s[0:1]
-; GCN-NEXT:	v_cndmask_b32_e32 v0, 0, v0, vcc
-; GCN-NEXT:	flat_store_dwordx4 v[10:11], v[4:7]
-; GCN-NEXT:	flat_store_dwordx4 v[8:9], v[0:3]
-; GCN-NEXT:	s_endpgm
+; GCN-NEXT:    s_load_dwordx8 s[16:23], s[4:5], 0x8
+; GCN-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x0
+; GCN-NEXT:    v_mov_b32_e32 v10, 16
+; GCN-NEXT:    v_mov_b32_e32 v8, 0
+; GCN-NEXT:    v_mov_b32_e32 v11, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_sub_i32 s6, 64, s16
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[16:17], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[18:19], 0
+; GCN-NEXT:    s_sub_i32 s4, s16, 64
+; GCN-NEXT:    s_lshr_b64 s[6:7], s[8:9], s6
+; GCN-NEXT:    s_lshl_b64 s[24:25], s[10:11], s16
+; GCN-NEXT:    s_and_b64 vcc, s[2:3], s[0:1]
+; GCN-NEXT:    s_or_b64 s[0:1], s[16:17], s[18:19]
+; GCN-NEXT:    s_lshl_b64 s[4:5], s[8:9], s4
+; GCN-NEXT:    s_or_b64 s[6:7], s[24:25], s[6:7]
+; GCN-NEXT:    v_mov_b32_e32 v0, s5
+; GCN-NEXT:    v_mov_b32_e32 v1, s7
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[0:1], 0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_mov_b32_e32 v1, s11
+; GCN-NEXT:    v_cndmask_b32_e64 v3, v0, v1, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    v_mov_b32_e32 v1, s6
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_mov_b32_e32 v1, s10
+; GCN-NEXT:    s_sub_i32 s6, 64, s20
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v0, v1, s[0:1]
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[20:21], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[22:23], 0
+; GCN-NEXT:    s_sub_i32 s4, s20, 64
+; GCN-NEXT:    s_lshr_b64 s[6:7], s[12:13], s6
+; GCN-NEXT:    s_lshl_b64 s[10:11], s[14:15], s20
+; GCN-NEXT:    s_lshl_b64 s[4:5], s[12:13], s4
+; GCN-NEXT:    s_or_b64 s[6:7], s[10:11], s[6:7]
+; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
+; GCN-NEXT:    s_or_b64 s[2:3], s[20:21], s[22:23]
+; GCN-NEXT:    v_mov_b32_e32 v0, s5
+; GCN-NEXT:    v_mov_b32_e32 v1, s7
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], 0
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v1, s15
+; GCN-NEXT:    v_cndmask_b32_e64 v7, v0, v1, s[2:3]
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    v_mov_b32_e32 v1, s6
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v1, s14
+; GCN-NEXT:    v_cndmask_b32_e64 v6, v0, v1, s[2:3]
+; GCN-NEXT:    s_lshl_b64 s[2:3], s[8:9], s16
+; GCN-NEXT:    v_mov_b32_e32 v0, s3
+; GCN-NEXT:    v_cndmask_b32_e32 v1, 0, v0, vcc
+; GCN-NEXT:    v_mov_b32_e32 v0, s2
+; GCN-NEXT:    s_lshl_b64 s[2:3], s[12:13], s20
+; GCN-NEXT:    v_mov_b32_e32 v4, s3
+; GCN-NEXT:    v_cndmask_b32_e64 v5, 0, v4, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v4, s2
+; GCN-NEXT:    v_cndmask_b32_e64 v4, 0, v4, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v9, 0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc
+; GCN-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
+; GCN-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
+; GCN-NEXT:    s_endpgm
   %shift = shl <2 x i128> %lhs, %rhs
   store <2 x i128> %shift, <2 x i128> addrspace(1)* null
   ret void
@@ -516,68 +516,68 @@ define amdgpu_kernel void @s_shl_v2i128ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-LABEL: s_lshr_v2i128_ss:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:	s_load_dwordx8 s[8:15], s[4:5], 0x0
-; GCN-NEXT:	s_load_dwordx8 s[16:23], s[4:5], 0x8
-; GCN-NEXT:	v_mov_b32_e32 v10, 16
-; GCN-NEXT:	v_mov_b32_e32 v8, 0
-; GCN-NEXT:	v_mov_b32_e32 v11, 0
-; GCN-NEXT:	v_mov_b32_e32 v9, 0
-; GCN-NEXT:	s_waitcnt lgkmcnt(0)
-; GCN-NEXT:	s_sub_i32 s6, 64, s16
-; GCN-NEXT:	v_cmp_lt_u64_e64 s[0:1], s[16:17], 64
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[2:3], s[18:19], 0
-; GCN-NEXT:	s_sub_i32 s4, s16, 64
-; GCN-NEXT:	s_lshl_b64 s[6:7], s[10:11], s6
-; GCN-NEXT:	s_lshr_b64 s[24:25], s[8:9], s16
-; GCN-NEXT:	s_or_b64 s[6:7], s[24:25], s[6:7]
-; GCN-NEXT:	s_and_b64 vcc, s[2:3], s[0:1]
-; GCN-NEXT:	s_or_b64 s[0:1], s[16:17], s[18:19]
-; GCN-NEXT:	s_lshr_b64 s[4:5], s[10:11], s4
-; GCN-NEXT:	v_mov_b32_e32 v0, s5
-; GCN-NEXT:	v_mov_b32_e32 v1, s7
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[0:1], s[0:1], 0
-; GCN-NEXT:	v_cndmask_b32_e32 v0, v0, v1, vcc
-; GCN-NEXT:	v_mov_b32_e32 v1, s9
-; GCN-NEXT:	v_cndmask_b32_e64 v1, v0, v1, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v0, s4
-; GCN-NEXT:	v_mov_b32_e32 v2, s6
-; GCN-NEXT:	v_cndmask_b32_e32 v0, v0, v2, vcc
-; GCN-NEXT:	v_mov_b32_e32 v2, s8
-; GCN-NEXT:	s_sub_i32 s6, 64, s20
-; GCN-NEXT:	v_cndmask_b32_e64 v0, v0, v2, s[0:1]
-; GCN-NEXT:	v_cmp_lt_u64_e64 s[0:1], s[20:21], 64
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[2:3], s[22:23], 0
-; GCN-NEXT:	s_sub_i32 s4, s20, 64
-; GCN-NEXT:	s_lshl_b64 s[6:7], s[14:15], s6
-; GCN-NEXT:	s_lshr_b64 s[8:9], s[12:13], s20
-; GCN-NEXT:	s_lshr_b64 s[4:5], s[14:15], s4
-; GCN-NEXT:	s_or_b64 s[6:7], s[8:9], s[6:7]
-; GCN-NEXT:	s_and_b64 s[0:1], s[2:3], s[0:1]
-; GCN-NEXT:	s_or_b64 s[2:3], s[20:21], s[22:23]
-; GCN-NEXT:	v_mov_b32_e32 v2, s5
-; GCN-NEXT:	v_mov_b32_e32 v3, s7
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[2:3], s[2:3], 0
-; GCN-NEXT:	v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v3, s13
-; GCN-NEXT:	v_cndmask_b32_e64 v5, v2, v3, s[2:3]
-; GCN-NEXT:	v_mov_b32_e32 v2, s4
-; GCN-NEXT:	v_mov_b32_e32 v3, s6
-; GCN-NEXT:	v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v3, s12
-; GCN-NEXT:	v_cndmask_b32_e64 v4, v2, v3, s[2:3]
-; GCN-NEXT:	s_lshr_b64 s[2:3], s[10:11], s16
-; GCN-NEXT:	v_mov_b32_e32 v2, s3
-; GCN-NEXT:	v_cndmask_b32_e32 v3, 0, v2, vcc
-; GCN-NEXT:	v_mov_b32_e32 v2, s2
-; GCN-NEXT:	s_lshr_b64 s[2:3], s[14:15], s20
-; GCN-NEXT:	v_mov_b32_e32 v6, s3
-; GCN-NEXT:	v_cndmask_b32_e64 v7, 0, v6, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v6, s2
-; GCN-NEXT:	v_cndmask_b32_e64 v6, 0, v6, s[0:1]
-; GCN-NEXT:	v_cndmask_b32_e32 v2, 0, v2, vcc
-; GCN-NEXT:	flat_store_dwordx4 v[10:11], v[4:7]
-; GCN-NEXT:	flat_store_dwordx4 v[8:9], v[0:3]
-; GCN-NEXT:	s_endpgm
+; GCN-NEXT:    s_load_dwordx8 s[16:23], s[4:5], 0x8
+; GCN-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x0
+; GCN-NEXT:    v_mov_b32_e32 v10, 16
+; GCN-NEXT:    v_mov_b32_e32 v8, 0
+; GCN-NEXT:    v_mov_b32_e32 v11, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_sub_i32 s6, 64, s16
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[16:17], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[18:19], 0
+; GCN-NEXT:    s_sub_i32 s4, s16, 64
+; GCN-NEXT:    s_lshl_b64 s[6:7], s[10:11], s6
+; GCN-NEXT:    s_lshr_b64 s[24:25], s[8:9], s16
+; GCN-NEXT:    s_or_b64 s[6:7], s[24:25], s[6:7]
+; GCN-NEXT:    s_and_b64 vcc, s[2:3], s[0:1]
+; GCN-NEXT:    s_or_b64 s[0:1], s[16:17], s[18:19]
+; GCN-NEXT:    s_lshr_b64 s[4:5], s[10:11], s4
+; GCN-NEXT:    v_mov_b32_e32 v0, s5
+; GCN-NEXT:    v_mov_b32_e32 v1, s7
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[0:1], 0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_mov_b32_e32 v1, s9
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v0, v1, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    v_mov_b32_e32 v2, s6
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GCN-NEXT:    v_mov_b32_e32 v2, s8
+; GCN-NEXT:    s_sub_i32 s6, 64, s20
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[20:21], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[22:23], 0
+; GCN-NEXT:    s_sub_i32 s4, s20, 64
+; GCN-NEXT:    s_lshl_b64 s[6:7], s[14:15], s6
+; GCN-NEXT:    s_lshr_b64 s[8:9], s[12:13], s20
+; GCN-NEXT:    s_lshr_b64 s[4:5], s[14:15], s4
+; GCN-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
+; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
+; GCN-NEXT:    s_or_b64 s[2:3], s[20:21], s[22:23]
+; GCN-NEXT:    v_mov_b32_e32 v2, s5
+; GCN-NEXT:    v_mov_b32_e32 v3, s7
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], 0
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v3, s13
+; GCN-NEXT:    v_cndmask_b32_e64 v5, v2, v3, s[2:3]
+; GCN-NEXT:    v_mov_b32_e32 v2, s4
+; GCN-NEXT:    v_mov_b32_e32 v3, s6
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v3, s12
+; GCN-NEXT:    v_cndmask_b32_e64 v4, v2, v3, s[2:3]
+; GCN-NEXT:    s_lshr_b64 s[2:3], s[10:11], s16
+; GCN-NEXT:    v_mov_b32_e32 v2, s3
+; GCN-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
+; GCN-NEXT:    v_mov_b32_e32 v2, s2
+; GCN-NEXT:    s_lshr_b64 s[2:3], s[14:15], s20
+; GCN-NEXT:    v_mov_b32_e32 v6, s3
+; GCN-NEXT:    v_cndmask_b32_e64 v7, 0, v6, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v6, s2
+; GCN-NEXT:    v_cndmask_b32_e64 v6, 0, v6, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v9, 0
+; GCN-NEXT:    v_cndmask_b32_e32 v2, 0, v2, vcc
+; GCN-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
+; GCN-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
+; GCN-NEXT:    s_endpgm
   %shift = lshr <2 x i128> %lhs, %rhs
   store <2 x i128> %shift, <2 x i128> addrspace(1)* null
   ret void
@@ -586,72 +586,72 @@ define amdgpu_kernel void @s_lshr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 define amdgpu_kernel void @s_ashr_v2i128_ss(<2 x i128> %lhs, <2 x i128> %rhs) {
 ; GCN-LABEL: s_ashr_v2i128_ss:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:	s_load_dwordx8 s[8:15], s[4:5], 0x0
-; GCN-NEXT:	s_load_dwordx8 s[16:23], s[4:5], 0x8
-; GCN-NEXT:	v_mov_b32_e32 v8, 0
-; GCN-NEXT:	v_mov_b32_e32 v9, 0
-; GCN-NEXT:	s_waitcnt lgkmcnt(0)
-; GCN-NEXT:	s_sub_i32 s6, 64, s16
-; GCN-NEXT:	v_cmp_lt_u64_e64 s[0:1], s[16:17], 64
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[2:3], s[18:19], 0
-; GCN-NEXT:	s_sub_i32 s4, s16, 64
-; GCN-NEXT:	s_lshl_b64 s[6:7], s[10:11], s6
-; GCN-NEXT:	s_lshr_b64 s[24:25], s[8:9], s16
-; GCN-NEXT:	s_or_b64 s[6:7], s[24:25], s[6:7]
-; GCN-NEXT:	s_and_b64 vcc, s[2:3], s[0:1]
-; GCN-NEXT:	s_or_b64 s[0:1], s[16:17], s[18:19]
-; GCN-NEXT:	s_ashr_i64 s[4:5], s[10:11], s4
-; GCN-NEXT:	v_mov_b32_e32 v0, s5
-; GCN-NEXT:	v_mov_b32_e32 v1, s7
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[0:1], s[0:1], 0
-; GCN-NEXT:	v_cndmask_b32_e32 v0, v0, v1, vcc
-; GCN-NEXT:	v_mov_b32_e32 v1, s9
-; GCN-NEXT:	v_cndmask_b32_e64 v1, v0, v1, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v0, s4
-; GCN-NEXT:	v_mov_b32_e32 v2, s6
-; GCN-NEXT:	v_cndmask_b32_e32 v0, v0, v2, vcc
-; GCN-NEXT:	v_mov_b32_e32 v2, s8
-; GCN-NEXT:	s_sub_i32 s6, 64, s20
-; GCN-NEXT:	v_cndmask_b32_e64 v0, v0, v2, s[0:1]
-; GCN-NEXT:	v_cmp_lt_u64_e64 s[0:1], s[20:21], 64
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[2:3], s[22:23], 0
-; GCN-NEXT:	s_sub_i32 s4, s20, 64
-; GCN-NEXT:	s_lshl_b64 s[6:7], s[14:15], s6
-; GCN-NEXT:	s_lshr_b64 s[8:9], s[12:13], s20
-; GCN-NEXT:	s_ashr_i64 s[4:5], s[14:15], s4
-; GCN-NEXT:	s_or_b64 s[6:7], s[8:9], s[6:7]
-; GCN-NEXT:	s_and_b64 s[0:1], s[2:3], s[0:1]
-; GCN-NEXT:	s_or_b64 s[2:3], s[20:21], s[22:23]
-; GCN-NEXT:	v_mov_b32_e32 v2, s5
-; GCN-NEXT:	v_mov_b32_e32 v3, s7
-; GCN-NEXT:	v_cmp_eq_u64_e64 s[2:3], s[2:3], 0
-; GCN-NEXT:	v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v3, s13
-; GCN-NEXT:	v_cndmask_b32_e64 v5, v2, v3, s[2:3]
-; GCN-NEXT:	v_mov_b32_e32 v2, s4
-; GCN-NEXT:	v_mov_b32_e32 v3, s6
-; GCN-NEXT:	v_cndmask_b32_e64 v2, v2, v3, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v3, s12
-; GCN-NEXT:	v_cndmask_b32_e64 v4, v2, v3, s[2:3]
-; GCN-NEXT:	s_ashr_i64 s[2:3], s[10:11], s16
-; GCN-NEXT:	s_ashr_i32 s4, s11, 31
-; GCN-NEXT:	v_mov_b32_e32 v2, s4
-; GCN-NEXT:	v_mov_b32_e32 v3, s3
-; GCN-NEXT:	v_mov_b32_e32 v6, s2
-; GCN-NEXT:	s_ashr_i64 s[2:3], s[14:15], s20
-; GCN-NEXT:	s_ashr_i32 s4, s15, 31
-; GCN-NEXT:	v_cndmask_b32_e32 v3, v2, v3, vcc
-; GCN-NEXT:	v_cndmask_b32_e32 v2, v2, v6, vcc
-; GCN-NEXT:	v_mov_b32_e32 v6, s4
-; GCN-NEXT:	v_mov_b32_e32 v7, s3
-; GCN-NEXT:	v_mov_b32_e32 v10, s2
-; GCN-NEXT:	v_cndmask_b32_e64 v7, v6, v7, s[0:1]
-; GCN-NEXT:	v_cndmask_b32_e64 v6, v6, v10, s[0:1]
-; GCN-NEXT:	v_mov_b32_e32 v10, 16
-; GCN-NEXT:	v_mov_b32_e32 v11, 0
-; GCN-NEXT:	flat_store_dwordx4 v[10:11], v[4:7]
-; GCN-NEXT:	flat_store_dwordx4 v[8:9], v[0:3]
-; GCN-NEXT:	s_endpgm
+; GCN-NEXT:    s_load_dwordx8 s[16:23], s[4:5], 0x8
+; GCN-NEXT:    s_load_dwordx8 s[8:15], s[4:5], 0x0
+; GCN-NEXT:    v_mov_b32_e32 v8, 0
+; GCN-NEXT:    v_mov_b32_e32 v9, 0
+; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_sub_i32 s6, 64, s16
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[16:17], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[18:19], 0
+; GCN-NEXT:    s_sub_i32 s4, s16, 64
+; GCN-NEXT:    s_lshl_b64 s[6:7], s[10:11], s6
+; GCN-NEXT:    s_lshr_b64 s[24:25], s[8:9], s16
+; GCN-NEXT:    s_or_b64 s[6:7], s[24:25], s[6:7]
+; GCN-NEXT:    s_and_b64 vcc, s[2:3], s[0:1]
+; GCN-NEXT:    s_or_b64 s[0:1], s[16:17], s[18:19]
+; GCN-NEXT:    s_ashr_i64 s[4:5], s[10:11], s4
+; GCN-NEXT:    v_mov_b32_e32 v0, s5
+; GCN-NEXT:    v_mov_b32_e32 v1, s7
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[0:1], s[0:1], 0
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
+; GCN-NEXT:    v_mov_b32_e32 v1, s9
+; GCN-NEXT:    v_cndmask_b32_e64 v1, v0, v1, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    v_mov_b32_e32 v2, s6
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GCN-NEXT:    v_mov_b32_e32 v2, s8
+; GCN-NEXT:    s_sub_i32 s6, 64, s20
+; GCN-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; GCN-NEXT:    v_cmp_lt_u64_e64 s[0:1], s[20:21], 64
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[22:23], 0
+; GCN-NEXT:    s_sub_i32 s4, s20, 64
+; GCN-NEXT:    s_lshl_b64 s[6:7], s[14:15], s6
+; GCN-NEXT:    s_lshr_b64 s[8:9], s[12:13], s20
+; GCN-NEXT:    s_ashr_i64 s[4:5], s[14:15], s4
+; GCN-NEXT:    s_or_b64 s[6:7], s[8:9], s[6:7]
+; GCN-NEXT:    s_and_b64 s[0:1], s[2:3], s[0:1]
+; GCN-NEXT:    s_or_b64 s[2:3], s[20:21], s[22:23]
+; GCN-NEXT:    v_mov_b32_e32 v2, s5
+; GCN-NEXT:    v_mov_b32_e32 v3, s7
+; GCN-NEXT:    v_cmp_eq_u64_e64 s[2:3], s[2:3], 0
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v3, s13
+; GCN-NEXT:    v_cndmask_b32_e64 v5, v2, v3, s[2:3]
+; GCN-NEXT:    v_mov_b32_e32 v2, s4
+; GCN-NEXT:    v_mov_b32_e32 v3, s6
+; GCN-NEXT:    v_cndmask_b32_e64 v2, v2, v3, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v3, s12
+; GCN-NEXT:    v_cndmask_b32_e64 v4, v2, v3, s[2:3]
+; GCN-NEXT:    s_ashr_i64 s[2:3], s[10:11], s16
+; GCN-NEXT:    s_ashr_i32 s4, s11, 31
+; GCN-NEXT:    v_mov_b32_e32 v2, s4
+; GCN-NEXT:    v_mov_b32_e32 v3, s3
+; GCN-NEXT:    v_mov_b32_e32 v6, s2
+; GCN-NEXT:    s_ashr_i64 s[2:3], s[14:15], s20
+; GCN-NEXT:    s_ashr_i32 s4, s15, 31
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v2, v3, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GCN-NEXT:    v_mov_b32_e32 v6, s4
+; GCN-NEXT:    v_mov_b32_e32 v7, s3
+; GCN-NEXT:    v_mov_b32_e32 v10, s2
+; GCN-NEXT:    v_cndmask_b32_e64 v7, v6, v7, s[0:1]
+; GCN-NEXT:    v_cndmask_b32_e64 v6, v6, v10, s[0:1]
+; GCN-NEXT:    v_mov_b32_e32 v10, 16
+; GCN-NEXT:    v_mov_b32_e32 v11, 0
+; GCN-NEXT:    flat_store_dwordx4 v[10:11], v[4:7]
+; GCN-NEXT:    flat_store_dwordx4 v[8:9], v[0:3]
+; GCN-NEXT:    s_endpgm
   %shift = ashr <2 x i128> %lhs, %rhs
   store <2 x i128> %shift, <2 x i128> addrspace(1)* null
   ret void
diff --git a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
index 9c673c74248b9..2b8eba5f90149 100644
--- a/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-weird-sizes.ll
@@ -6,14 +6,14 @@
 define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 {
 ; CIVI-LABEL: local_store_i56:
 ; CIVI:       ; %bb.0:
-; CIVI-NEXT:	s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CIVI-NEXT:	v_lshrrev_b32_e32 v3, 16, v2
-; CIVI-NEXT:	s_mov_b32 m0, -1
-; CIVI-NEXT:	ds_write_b8 v0, v3 offset:6
-; CIVI-NEXT:	ds_write_b16 v0, v2 offset:4
-; CIVI-NEXT:	ds_write_b32 v0, v1
-; CIVI-NEXT:	s_waitcnt lgkmcnt(0)
-; CIVI-NEXT:	s_setpc_b64 s[30:31]
+; CIVI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CIVI-NEXT:    s_mov_b32 m0, -1
+; CIVI-NEXT:    ds_write_b16 v0, v2 offset:4
+; CIVI-NEXT:    ds_write_b32 v0, v1
+; CIVI-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; CIVI-NEXT:    ds_write_b8 v0, v1 offset:6
+; CIVI-NEXT:    s_waitcnt lgkmcnt(0)
+; CIVI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: local_store_i56:
 ; GFX9:       ; %bb.0:
@@ -30,70 +30,70 @@ define void @local_store_i56(i56 addrspace(3)* %ptr, i56 %arg) #0 {
 define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0 {
 ; HAWAII-LABEL: local_store_i55:
 ; HAWAII:       ; %bb.0:
-; HAWAII-NEXT:	s_or_b32 s0, s4, 14
-; HAWAII-NEXT:	v_mov_b32_e32 v0, s0
-; HAWAII-NEXT:	v_mov_b32_e32 v1, s5
-; HAWAII-NEXT:	flat_load_ubyte v0, v[0:1]
-; HAWAII-NEXT:	s_load_dword s0, s[4:5], 0x0
-; HAWAII-NEXT:	s_load_dword s1, s[4:5], 0x2
-; HAWAII-NEXT:	s_load_dword s2, s[4:5], 0x3
-; HAWAII-NEXT:	s_mov_b32 m0, -1
-; HAWAII-NEXT:	s_waitcnt lgkmcnt(0)
-; HAWAII-NEXT:	v_mov_b32_e32 v1, s0
-; HAWAII-NEXT:	v_mov_b32_e32 v2, s1
-; HAWAII-NEXT:	v_mov_b32_e32 v3, s2
-; HAWAII-NEXT:	s_waitcnt vmcnt(0)
-; HAWAII-NEXT:	v_and_b32_e32 v0, 0x7f, v0
-; HAWAII-NEXT:	ds_write_b8 v1, v0 offset:6
-; HAWAII-NEXT:	ds_write_b16 v1, v3 offset:4
-; HAWAII-NEXT:	ds_write_b32 v1, v2
-; HAWAII-NEXT:	s_endpgm
+; HAWAII-NEXT:    s_or_b32 s0, s4, 14
+; HAWAII-NEXT:    v_mov_b32_e32 v0, s0
+; HAWAII-NEXT:    v_mov_b32_e32 v1, s5
+; HAWAII-NEXT:    flat_load_ubyte v0, v[0:1]
+; HAWAII-NEXT:    s_load_dword s0, s[4:5], 0x0
+; HAWAII-NEXT:    s_load_dword s1, s[4:5], 0x2
+; HAWAII-NEXT:    s_load_dword s2, s[4:5], 0x3
+; HAWAII-NEXT:    s_mov_b32 m0, -1
+; HAWAII-NEXT:    s_waitcnt lgkmcnt(0)
+; HAWAII-NEXT:    v_mov_b32_e32 v1, s0
+; HAWAII-NEXT:    v_mov_b32_e32 v3, s1
+; HAWAII-NEXT:    v_mov_b32_e32 v2, s2
+; HAWAII-NEXT:    ds_write_b16 v1, v2 offset:4
+; HAWAII-NEXT:    s_waitcnt vmcnt(0)
+; HAWAII-NEXT:    v_and_b32_e32 v0, 0x7f, v0
+; HAWAII-NEXT:    ds_write_b8 v1, v0 offset:6
+; HAWAII-NEXT:    ds_write_b32 v1, v3
+; HAWAII-NEXT:    s_endpgm
 ;
 ; FIJI-LABEL: local_store_i55:
 ; FIJI:       ; %bb.0:
-; FIJI-NEXT:	s_or_b32 s0, s4, 14
-; FIJI-NEXT:	v_mov_b32_e32 v0, s0
-; FIJI-NEXT:	v_mov_b32_e32 v1, s5
-; FIJI-NEXT:	flat_load_ubyte v0, v[0:1]
-; FIJI-NEXT:	s_load_dword s0, s[4:5], 0x0
-; FIJI-NEXT:	s_load_dword s1, s[4:5], 0x8
-; FIJI-NEXT:	s_load_dword s2, s[4:5], 0xc
-; FIJI-NEXT:	s_mov_b32 m0, -1
-; FIJI-NEXT:	s_waitcnt lgkmcnt(0)
-; FIJI-NEXT:	v_mov_b32_e32 v1, s0
-; FIJI-NEXT:	v_mov_b32_e32 v3, s1
-; FIJI-NEXT:	s_and_b32 s3, s2, 0xffff
-; FIJI-NEXT:	v_mov_b32_e32 v2, s2
-; FIJI-NEXT:	s_waitcnt vmcnt(0)
-; FIJI-NEXT:	v_lshlrev_b32_e32 v0, 16, v0
-; FIJI-NEXT:	v_or_b32_e32 v0, s3, v0
-; FIJI-NEXT:	v_bfe_u32 v0, v0, 16, 7
-; FIJI-NEXT:	ds_write_b8 v1, v0 offset:6
-; FIJI-NEXT:	ds_write_b16 v1, v2 offset:4
-; FIJI-NEXT:	ds_write_b32 v1, v3
-; FIJI-NEXT:	s_endpgm
+; FIJI-NEXT:    s_or_b32 s0, s4, 14
+; FIJI-NEXT:    v_mov_b32_e32 v0, s0
+; FIJI-NEXT:    v_mov_b32_e32 v1, s5
+; FIJI-NEXT:    flat_load_ubyte v0, v[0:1]
+; FIJI-NEXT:    s_load_dword s0, s[4:5], 0x0
+; FIJI-NEXT:    s_load_dword s1, s[4:5], 0x8
+; FIJI-NEXT:    s_load_dword s2, s[4:5], 0xc
+; FIJI-NEXT:    s_mov_b32 m0, -1
+; FIJI-NEXT:    s_waitcnt lgkmcnt(0)
+; FIJI-NEXT:    v_mov_b32_e32 v1, s0
+; FIJI-NEXT:    v_mov_b32_e32 v3, s1
+; FIJI-NEXT:    s_and_b32 s3, s2, 0xffff
+; FIJI-NEXT:    v_mov_b32_e32 v2, s2
+; FIJI-NEXT:    ds_write_b16 v1, v2 offset:4
+; FIJI-NEXT:    s_waitcnt vmcnt(0)
+; FIJI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
+; FIJI-NEXT:    v_or_b32_e32 v0, s3, v0
+; FIJI-NEXT:    v_bfe_u32 v0, v0, 16, 7
+; FIJI-NEXT:    ds_write_b8 v1, v0 offset:6
+; FIJI-NEXT:    ds_write_b32 v1, v3
+; FIJI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: local_store_i55:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:	v_mov_b32_e32 v0, s4
-; GFX9-NEXT:	v_mov_b32_e32 v1, s5
-; GFX9-NEXT:	v_mov_b32_e32 v2, 0
-; GFX9-NEXT:	global_load_ubyte_d16_hi v2, v[0:1], off offset:14
-; GFX9-NEXT:	s_load_dword s0, s[4:5], 0x0
-; GFX9-NEXT:	s_load_dword s1, s[4:5], 0x8
-; GFX9-NEXT:	s_load_dword s2, s[4:5], 0xc
-; GFX9-NEXT:	s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:	v_mov_b32_e32 v0, s0
-; GFX9-NEXT:	v_mov_b32_e32 v3, s1
-; GFX9-NEXT:	s_and_b32 s3, s2, 0xffff
-; GFX9-NEXT:	v_mov_b32_e32 v1, s2
-; GFX9-NEXT:	s_waitcnt vmcnt(0)
-; GFX9-NEXT:	v_or_b32_e32 v2, s3, v2
-; GFX9-NEXT:	v_and_b32_e32 v2, 0x7fffff, v2
-; GFX9-NEXT:	ds_write_b8_d16_hi v0, v2 offset:6
-; GFX9-NEXT:	ds_write_b16 v0, v1 offset:4
-; GFX9-NEXT:	ds_write_b32 v0, v3
-; GFX9-NEXT:	s_endpgm
+; GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-NEXT:    v_mov_b32_e32 v2, 0
+; GFX9-NEXT:    global_load_ubyte_d16_hi v2, v[0:1], off offset:14
+; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x8
+; GFX9-NEXT:    s_load_dword s2, s[4:5], 0xc
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_mov_b32_e32 v1, s2
+; GFX9-NEXT:    s_and_b32 s3, s2, 0xffff
+; GFX9-NEXT:    ds_write_b16 v0, v1 offset:4
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_or_b32_e32 v1, s3, v2
+; GFX9-NEXT:    v_and_b32_e32 v1, 0x7fffff, v1
+; GFX9-NEXT:    ds_write_b8_d16_hi v0, v1 offset:6
+; GFX9-NEXT:    ds_write_b32 v0, v3
+; GFX9-NEXT:    s_endpgm
   store i55 %arg, i55 addrspace(3)* %ptr, align 8
   ret void
 }
@@ -101,31 +101,31 @@ define amdgpu_kernel void @local_store_i55(i55 addrspace(3)* %ptr, i55 %arg) #0
 define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0 {
 ; HAWAII-LABEL: local_store_i48:
 ; HAWAII:       ; %bb.0:
-; HAWAII-NEXT:	s_load_dword s0, s[4:5], 0x0
-; HAWAII-NEXT:	s_load_dword s1, s[4:5], 0x2
-; HAWAII-NEXT:	s_load_dword s2, s[4:5], 0x3
-; HAWAII-NEXT:	s_mov_b32 m0, -1
-; HAWAII-NEXT:	s_waitcnt lgkmcnt(0)
-; HAWAII-NEXT:	v_mov_b32_e32 v0, s0
-; HAWAII-NEXT:	v_mov_b32_e32 v2, s1
-; HAWAII-NEXT:	v_mov_b32_e32 v1, s2
-; HAWAII-NEXT:	ds_write_b16 v0, v1 offset:4
-; HAWAII-NEXT:	ds_write_b32 v0, v2
-; HAWAII-NEXT:	s_endpgm
+; HAWAII-NEXT:    s_load_dword s0, s[4:5], 0x0
+; HAWAII-NEXT:    s_load_dword s1, s[4:5], 0x2
+; HAWAII-NEXT:    s_load_dword s2, s[4:5], 0x3
+; HAWAII-NEXT:    s_mov_b32 m0, -1
+; HAWAII-NEXT:    s_waitcnt lgkmcnt(0)
+; HAWAII-NEXT:    v_mov_b32_e32 v0, s0
+; HAWAII-NEXT:    v_mov_b32_e32 v1, s2
+; HAWAII-NEXT:    ds_write_b16 v0, v1 offset:4
+; HAWAII-NEXT:    v_mov_b32_e32 v1, s1
+; HAWAII-NEXT:    ds_write_b32 v0, v1
+; HAWAII-NEXT:    s_endpgm
 ;
 ; FIJI-LABEL: local_store_i48:
 ; FIJI:       ; %bb.0:
-; FIJI-NEXT:	s_load_dword s0, s[4:5], 0x0
-; FIJI-NEXT:	s_load_dword s1, s[4:5], 0x8
-; FIJI-NEXT:	s_load_dword s2, s[4:5], 0xc
-; FIJI-NEXT:	s_mov_b32 m0, -1
-; FIJI-NEXT:	s_waitcnt lgkmcnt(0)
-; FIJI-NEXT:	v_mov_b32_e32 v0, s0
-; FIJI-NEXT:	v_mov_b32_e32 v2, s1
-; FIJI-NEXT:	v_mov_b32_e32 v1, s2
-; FIJI-NEXT:	ds_write_b16 v0, v1 offset:4
-; FIJI-NEXT:	ds_write_b32 v0, v2
-; FIJI-NEXT:	s_endpgm
+; FIJI-NEXT:    s_load_dword s0, s[4:5], 0x0
+; FIJI-NEXT:    s_load_dword s1, s[4:5], 0x8
+; FIJI-NEXT:    s_load_dword s2, s[4:5], 0xc
+; FIJI-NEXT:    s_mov_b32 m0, -1
+; FIJI-NEXT:    s_waitcnt lgkmcnt(0)
+; FIJI-NEXT:    v_mov_b32_e32 v0, s0
+; FIJI-NEXT:    v_mov_b32_e32 v1, s2
+; FIJI-NEXT:    ds_write_b16 v0, v1 offset:4
+; FIJI-NEXT:    v_mov_b32_e32 v1, s1
+; FIJI-NEXT:    ds_write_b32 v0, v1
+; FIJI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: local_store_i48:
 ; GFX9:       ; %bb.0:
@@ -146,35 +146,35 @@ define amdgpu_kernel void @local_store_i48(i48 addrspace(3)* %ptr, i48 %arg) #0
 define amdgpu_kernel void @local_store_i65(i65 addrspace(3)* %ptr, i65 %arg) #0 {
 ; HAWAII-LABEL: local_store_i65:
 ; HAWAII:       ; %bb.0:
-; HAWAII-NEXT:	s_load_dword s2, s[4:5], 0x0
-; HAWAII-NEXT:	s_load_dwordx2 s[0:1], s[4:5], 0x2
-; HAWAII-NEXT:	s_load_dword s3, s[4:5], 0x4
-; HAWAII-NEXT:	s_mov_b32 m0, -1
-; HAWAII-NEXT:	s_waitcnt lgkmcnt(0)
-; HAWAII-NEXT:	v_mov_b32_e32 v2, s2
-; HAWAII-NEXT:	v_mov_b32_e32 v0, s0
-; HAWAII-NEXT:	s_and_b32 s3, s3, 1
-; HAWAII-NEXT:	v_mov_b32_e32 v3, s3
-; HAWAII-NEXT:	v_mov_b32_e32 v1, s1
-; HAWAII-NEXT:	ds_write_b8 v2, v3 offset:8
-; HAWAII-NEXT:	ds_write_b64 v2, v[0:1]
-; HAWAII-NEXT:	s_endpgm
+; HAWAII-NEXT:    s_load_dword s2, s[4:5], 0x0
+; HAWAII-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x2
+; HAWAII-NEXT:    s_load_dword s3, s[4:5], 0x4
+; HAWAII-NEXT:    s_mov_b32 m0, -1
+; HAWAII-NEXT:    s_waitcnt lgkmcnt(0)
+; HAWAII-NEXT:    v_mov_b32_e32 v2, s2
+; HAWAII-NEXT:    s_and_b32 s3, s3, 1
+; HAWAII-NEXT:    v_mov_b32_e32 v0, s3
+; HAWAII-NEXT:    ds_write_b8 v2, v0 offset:8
+; HAWAII-NEXT:    v_mov_b32_e32 v0, s0
+; HAWAII-NEXT:    v_mov_b32_e32 v1, s1
+; HAWAII-NEXT:    ds_write_b64 v2, v[0:1]
+; HAWAII-NEXT:    s_endpgm
 ;
 ; FIJI-LABEL: local_store_i65:
 ; FIJI:       ; %bb.0:
-; FIJI-NEXT:	s_load_dword s2, s[4:5], 0x0
-; FIJI-NEXT:	s_load_dwordx2 s[0:1], s[4:5], 0x8
-; FIJI-NEXT:	s_load_dword s3, s[4:5], 0x10
-; FIJI-NEXT:	s_mov_b32 m0, -1
-; FIJI-NEXT:	s_waitcnt lgkmcnt(0)
-; FIJI-NEXT:	v_mov_b32_e32 v2, s2
-; FIJI-NEXT:	v_mov_b32_e32 v0, s0
-; FIJI-NEXT:	s_and_b32 s3, s3, 1
-; FIJI-NEXT:	v_mov_b32_e32 v3, s3
-; FIJI-NEXT:	v_mov_b32_e32 v1, s1
-; FIJI-NEXT:	ds_write_b8 v2, v3 offset:8
-; FIJI-NEXT:	ds_write_b64 v2, v[0:1]
-; FIJI-NEXT:	s_endpgm
+; FIJI-NEXT:    s_load_dword s2, s[4:5], 0x0
+; FIJI-NEXT:    s_load_dwordx2 s[0:1], s[4:5], 0x8
+; FIJI-NEXT:    s_load_dword s3, s[4:5], 0x10
+; FIJI-NEXT:    s_mov_b32 m0, -1
+; FIJI-NEXT:    s_waitcnt lgkmcnt(0)
+; FIJI-NEXT:    v_mov_b32_e32 v2, s2
+; FIJI-NEXT:    s_and_b32 s3, s3, 1
+; FIJI-NEXT:    v_mov_b32_e32 v0, s3
+; FIJI-NEXT:    ds_write_b8 v2, v0 offset:8
+; FIJI-NEXT:    v_mov_b32_e32 v0, s0
+; FIJI-NEXT:    v_mov_b32_e32 v1, s1
+; FIJI-NEXT:    ds_write_b64 v2, v[0:1]
+; FIJI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: local_store_i65:
 ; GFX9:       ; %bb.0:
@@ -218,22 +218,22 @@ define void @local_store_i13(i13 addrspace(3)* %ptr, i13 %arg) #0 {
 define void @local_store_i17(i17 addrspace(3)* %ptr, i17 %arg) #0 {
 ; CIVI-LABEL: local_store_i17:
 ; CIVI:       ; %bb.0:
-; CIVI-NEXT:	s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; CIVI-NEXT:	s_mov_b32 m0, -1
-; CIVI-NEXT:	v_bfe_u32 v2, v1, 16, 1
-; CIVI-NEXT:	ds_write_b16 v0, v1
-; CIVI-NEXT:	ds_write_b8 v0, v2 offset:2
-; CIVI-NEXT:	s_waitcnt lgkmcnt(0)
-; CIVI-NEXT:	s_setpc_b64 s[30:31]
+; CIVI-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CIVI-NEXT:    s_mov_b32 m0, -1
+; CIVI-NEXT:    ds_write_b16 v0, v1
+; CIVI-NEXT:    v_bfe_u32 v1, v1, 16, 1
+; CIVI-NEXT:    ds_write_b8 v0, v1 offset:2
+; CIVI-NEXT:    s_waitcnt lgkmcnt(0)
+; CIVI-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX9-LABEL: local_store_i17:
 ; GFX9:       ; %bb.0:
-; GFX9-NEXT:	s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:	v_and_b32_e32 v2, 0x1ffff, v1
-; GFX9-NEXT:	ds_write_b16 v0, v1
-; GFX9-NEXT:	ds_write_b8_d16_hi v0, v2 offset:2
-; GFX9-NEXT:	s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:	s_setpc_b64 s[30:31]
+; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    ds_write_b16 v0, v1
+; GFX9-NEXT:    v_and_b32_e32 v1, 0x1ffff, v1
+; GFX9-NEXT:    ds_write_b8_d16_hi v0, v1 offset:2
+; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_setpc_b64 s[30:31]
   store i17 %arg, i17 addrspace(3)* %ptr, align 8
   ret void
 }
diff --git a/llvm/test/CodeGen/AMDGPU/trunc-store-i64.ll b/llvm/test/CodeGen/AMDGPU/trunc-store-i64.ll
index 1c52aac1068f3..627ba9e0f7170 100644
--- a/llvm/test/CodeGen/AMDGPU/trunc-store-i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/trunc-store-i64.ll
@@ -38,10 +38,10 @@ entry:
 }
 
 ; GCN-LABEL: {{^}}trunc_store_v16i64_v16i32:
-; GCN: global_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:48
-; GCN: global_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:32
-; GCN: global_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off offset:16
-; GCN: global_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, off
+; GCN: global_store_dwordx4 v{{\[[0-9]:[0-9]+\]}}, v{{\[[0-9]:[0-9]+\]}}, off offset:48
+; GCN: global_store_dwordx4 v{{\[[0-9]:[0-9]+\]}}, v{{\[[0-9]:[0-9]+\]}}, off offset:32
+; GCN: global_store_dwordx4 v{{\[[0-9]:[0-9]+\]}}, v{{\[[0-9]:[0-9]+\]}}, off offset:16
+; GCN: global_store_dwordx4 v{{\[[0-9]:[0-9]+\]}}, v{{\[[0-9]:[0-9]+\]}}, off
 define amdgpu_kernel void @trunc_store_v16i64_v16i32(< 16 x i32> addrspace(1)* %out, <16 x i64> %in) {
 entry:
   %trunc = trunc <16 x i64> %in to <16 x i32>
diff --git a/llvm/test/CodeGen/AMDGPU/udivrem.ll b/llvm/test/CodeGen/AMDGPU/udivrem.ll
index 10299b314e83e..559f1092e6e44 100644
--- a/llvm/test/CodeGen/AMDGPU/udivrem.ll
+++ b/llvm/test/CodeGen/AMDGPU/udivrem.ll
@@ -36,40 +36,41 @@ define amdgpu_kernel void @test_udivrem(i32 addrspace(1)* %out0, [8 x i32], i32
 ;
 ; GFX6-LABEL: test_udivrem:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dword s3, s[0:1], 0x26
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x13
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x1d
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b32 s6, -1
-; GFX6-NEXT:    s_mov_b32 s10, s6
-; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    v_cvt_f32_u32_e32 v0, s3
-; GFX6-NEXT:    s_sub_i32 s2, 0, s3
-; GFX6-NEXT:    s_mov_b32 s11, s7
-; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
-; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
-; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
-; GFX6-NEXT:    v_mul_lo_u32 v1, s2, v0
-; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
-; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v1, v0
-; GFX6-NEXT:    v_mul_hi_u32 v0, s0, v0
-; GFX6-NEXT:    v_mul_lo_u32 v1, v0, s3
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s0, v1
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v1
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s3, v1
-; GFX6-NEXT:    v_cndmask_b32_e64 v1, v1, v2, s[0:1]
-; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
-; GFX6-NEXT:    v_cmp_le_u32_e64 s[0:1], s3, v1
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v0, v2, s[0:1]
-; GFX6-NEXT:    v_subrev_i32_e32 v2, vcc, s3, v1
-; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
-; GFX6-NEXT:    s_waitcnt expcnt(0)
-; GFX6-NEXT:    v_cndmask_b32_e64 v0, v1, v2, s[0:1]
-; GFX6-NEXT:    buffer_store_dword v0, off, s[8:11], 0
-; GFX6-NEXT:    s_endpgm
+; GFX6-NEXT:        s_load_dword s3, s[0:1], 0x26
+; GFX6-NEXT:        s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT:        s_load_dwordx2 s[8:9], s[0:1], 0x13
+; GFX6-NEXT:        s_mov_b32 s7, 0xf000
+; GFX6-NEXT:        s_mov_b32 s6, -1
+; GFX6-NEXT:        s_mov_b32 s10, s6
+; GFX6-NEXT:        s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:        v_cvt_f32_u32_e32 v0, s3
+; GFX6-NEXT:        s_sub_i32 s2, 0, s3
+; GFX6-NEXT:        s_mov_b32 s11, s7
+; GFX6-NEXT:        v_rcp_iflag_f32_e32 v0, v0
+; GFX6-NEXT:        v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; GFX6-NEXT:        v_cvt_u32_f32_e32 v0, v0
+; GFX6-NEXT:        v_mul_lo_u32 v1, s2, v0
+; GFX6-NEXT:        s_load_dword s2, s[0:1], 0x1d
+; GFX6-NEXT:        v_mul_hi_u32 v1, v0, v1
+; GFX6-NEXT:        v_add_i32_e32 v0, vcc, v1, v0
+; GFX6-NEXT:        s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:        v_mul_hi_u32 v0, s2, v0
+; GFX6-NEXT:        v_mul_lo_u32 v1, v0, s3
+; GFX6-NEXT:        v_add_i32_e32 v2, vcc, 1, v0
+; GFX6-NEXT:        v_sub_i32_e32 v1, vcc, s2, v1
+; GFX6-NEXT:        v_cmp_le_u32_e64 s[0:1], s3, v1
+; GFX6-NEXT:        v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; GFX6-NEXT:        v_subrev_i32_e32 v2, vcc, s3, v1
+; GFX6-NEXT:        v_cndmask_b32_e64 v1, v1, v2, s[0:1]
+; GFX6-NEXT:        v_add_i32_e32 v2, vcc, 1, v0
+; GFX6-NEXT:        v_cmp_le_u32_e64 s[0:1], s3, v1
+; GFX6-NEXT:        v_cndmask_b32_e64 v0, v0, v2, s[0:1]
+; GFX6-NEXT:        v_subrev_i32_e32 v2, vcc, s3, v1
+; GFX6-NEXT:        buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:        s_waitcnt expcnt(0)
+; GFX6-NEXT:        v_cndmask_b32_e64 v0, v1, v2, s[0:1]
+; GFX6-NEXT:        buffer_store_dword v0, off, s[8:11], 0
+; GFX6-NEXT:        s_endpgm
 ;
 ; GFX8-LABEL: test_udivrem:
 ; GFX8:       ; %bb.0:

From c98988107868db41c12b9d782fae25dea2a81c87 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Fri, 17 Jul 2020 14:01:59 +0700
Subject: [PATCH 593/771] [InstCombine] Fix replace select with Phis when
 branch has the same labels

```
define i32 @test(i1 %cond) {
entry:
  br i1 %cond, label %exit, label %exit
exit:
  %result = select i1 %cond, i32 123, i32 456
  ret i32 %result
}
```
In this test, after applying transformation of replacing select with Phis,
the result will be:

```
define i32 @test(i1 %cond) {
entry:
  br i1 %cond, label %exit, label %exit
exit:
  %result = i32 phi [123, %exit], [123, %exit]
  ret i32 %result
}
```
That is, select is transformed into an invalid Phi, which will then be
reduced to 123 and the second value will be lost. But it is worth
noting that this problem will arise only if select is in the InstCombine
worklist will be before the branch. Otherwise, InstCombine will replace
the branch condition with false and transformation will not be applied.

The fix is to check the target labels in the branch condition for equality.

Patch By: Kirill Polushin
Differential Revision: https://reviews.llvm.org/D84003
Reviewed By: mkazantsev
---
 llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 17124f717af79..db27711f29b17 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -2469,6 +2469,10 @@ static Instruction *foldSelectToPhiImpl(SelectInst &Sel, BasicBlock *BB,
   } else
     return nullptr;
 
+  // Make sure the branches are actually different.
+  if (TrueSucc == FalseSucc)
+    return nullptr;
+
   // We want to replace select %cond, %a, %b with a phi that takes value %a
   // for all incoming edges that are dominated by condition `%cond == true`,
   // and value %b for edges dominated by condition `%cond == false`. If %a

From 1cb8cc1fd1b9b8191a101008d9fdf5cc5992a929 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Fri, 17 Jul 2020 00:05:07 -0700
Subject: [PATCH 594/771] [profile] Enabled test on windows

Works with D34797
---
 compiler-rt/lib/sanitizer_common/sanitizer_common.h   | 2 +-
 compiler-rt/test/profile/instrprof-version-mismatch.c | 3 ---
 2 files changed, 1 insertion(+), 4 deletions(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
index bf6ca735fb0d8..df9088b6929a5 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
@@ -109,7 +109,7 @@ void *MmapFixedOrDie(uptr fixed_addr, uptr size, const char *name = nullptr);
 void *MmapFixedOrDieOnFatalError(uptr fixed_addr, uptr size,
                                  const char *name = nullptr);
 void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name = nullptr);
-void *MmapNoAccess(uptr size);
+void *MmapNoAccess(uptr size, const char *name = nullptr);
 // Map aligned chunk of address space; size and alignment are powers of two.
 // Dies on all but out of memory errors, in the latter case returns nullptr.
 void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,
diff --git a/compiler-rt/test/profile/instrprof-version-mismatch.c b/compiler-rt/test/profile/instrprof-version-mismatch.c
index c63b299c76d73..81ae52119693f 100644
--- a/compiler-rt/test/profile/instrprof-version-mismatch.c
+++ b/compiler-rt/test/profile/instrprof-version-mismatch.c
@@ -1,9 +1,6 @@
 // RUN: %clang_profgen -o %t -O3 %s
 // RUN: %run %t 1 2>&1 | FileCheck %s
 
-// FIXME: Weak symbols are once again a portability problem for Windows.
-// XFAIL: windows
-
 // override the version variable with a bogus version:
 unsigned long long __llvm_profile_raw_version = 10000;
 int main(int argc, const char *argv[]) {

From dbf39e54f648771e5e4e9fb0b787c1b74e74dffd Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Fri, 17 Jul 2020 00:08:45 -0700
Subject: [PATCH 595/771] [sanitizer] Revert accidentally committed file

---
 compiler-rt/lib/sanitizer_common/sanitizer_common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common.h b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
index df9088b6929a5..bf6ca735fb0d8 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common.h
@@ -109,7 +109,7 @@ void *MmapFixedOrDie(uptr fixed_addr, uptr size, const char *name = nullptr);
 void *MmapFixedOrDieOnFatalError(uptr fixed_addr, uptr size,
                                  const char *name = nullptr);
 void *MmapFixedNoAccess(uptr fixed_addr, uptr size, const char *name = nullptr);
-void *MmapNoAccess(uptr size, const char *name = nullptr);
+void *MmapNoAccess(uptr size);
 // Map aligned chunk of address space; size and alignment are powers of two.
 // Dies on all but out of memory errors, in the latter case returns nullptr.
 void *MmapAlignedOrDieOnFatalError(uptr size, uptr alignment,

From f5db2411c25399bf5e6e7258c9f7412dbee0b753 Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Fri, 17 Jul 2020 00:08:28 -0700
Subject: [PATCH 596/771] [libc][Obvious] Cleanup of include lines and target
 listings in FPUtil.

---
 libc/utils/FPUtil/CMakeLists.txt      | 3 +++
 libc/utils/FPUtil/LongDoubleBitsX86.h | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/libc/utils/FPUtil/CMakeLists.txt b/libc/utils/FPUtil/CMakeLists.txt
index d9084f72e8410..682db93859a96 100644
--- a/libc/utils/FPUtil/CMakeLists.txt
+++ b/libc/utils/FPUtil/CMakeLists.txt
@@ -8,11 +8,14 @@ add_header_library(
   fputil
   HDRS
     ${LONG_DOUBLE_HDR}
+    BasicOperations.h
     BitPatterns.h
+    ClassificationFunctions.h
     FloatOperations.h
     FloatProperties.h
     FPBits.h
     ManipulationFunctions.h
+    NearestIntegerOperations.h
   DEPENDS
     libc.utils.CPP.standalone_cpp
 )
diff --git a/libc/utils/FPUtil/LongDoubleBitsX86.h b/libc/utils/FPUtil/LongDoubleBitsX86.h
index 3d7f455ff22c9..5438e0b2b6edb 100644
--- a/libc/utils/FPUtil/LongDoubleBitsX86.h
+++ b/libc/utils/FPUtil/LongDoubleBitsX86.h
@@ -9,7 +9,7 @@
 #ifndef LLVM_LIBC_UTILS_FPUTIL_LONG_DOUBLE_BITS_X86_H
 #define LLVM_LIBC_UTILS_FPUTIL_LONG_DOUBLE_BITS_X86_H
 
-#include "utils/FPUtil/FPBits.h"
+#include "FPBits.h"
 
 #include <stdint.h>
 

From 1b7c9eae6dcad09c264e29e41a69bed0d34c2f5f Mon Sep 17 00:00:00 2001
From: Raphael Isemann <teemperor@gmail.com>
Date: Fri, 17 Jul 2020 08:36:38 +0200
Subject: [PATCH 597/771] [lldb] Store StackFrameRecognizers in the target
 instead of a global list

Summary:

Currently the frame recognizers are stored in a global list (the list in the
StackFrameRecognizersManagerImpl singleton to be precise). All commands and
plugins that modify the list are just modifying that global list of recognizers
which is shared by all Target and Debugger instances.

This is clearly against the idea of LLDB being usable as a library and it also
leads to some very obscure errors as now multiple tests are sharing the used
frame recognizers. For example D83400 is currently failing as it reorders some
test_ functions which permanently changes the frame recognizers of all
debuggers/targets. As all frame recognizers are also initialized in a 'once'
guard, it's also impossible to every restore back the original frame recognizers
once they are deleted in a process.

This patch just moves the frame recognizers into the current target. This seems
the way everyone assumes the system works as for example the assert frame
recognizers is using the current target to find the function/so-name to look for
(which only works if the recognizers are stored in the target).

Reviewers: jingham, mib

Reviewed By: jingham, mib

Subscribers: MrHate, JDevlieghere

Differential Revision: https://reviews.llvm.org/D83757
---
 .../lldb/Target/StackFrameRecognizer.h        |  56 +++--
 lldb/include/lldb/Target/Target.h             |   6 +
 lldb/include/lldb/lldb-forward.h              |   2 +
 lldb/source/Commands/CommandObjectFrame.cpp   |  27 ++-
 .../AppleObjCRuntime/AppleObjCRuntimeV2.cpp   |  26 +--
 lldb/source/Target/AssertFrameRecognizer.cpp  |  25 +-
 lldb/source/Target/StackFrame.cpp             |   7 +-
 lldb/source/Target/StackFrameRecognizer.cpp   | 219 +++++++-----------
 lldb/source/Target/Target.cpp                 |   6 +
 .../frame/recognizer/TestFrameRecognizer.py   |  44 ++++
 .../Target/StackFrameRecognizerTest.cpp       |  28 ++-
 11 files changed, 233 insertions(+), 213 deletions(-)

diff --git a/lldb/include/lldb/Target/StackFrameRecognizer.h b/lldb/include/lldb/Target/StackFrameRecognizer.h
index 9c9105ac04e44..38b21e7c9856b 100644
--- a/lldb/include/lldb/Target/StackFrameRecognizer.h
+++ b/lldb/include/lldb/Target/StackFrameRecognizer.h
@@ -17,6 +17,8 @@
 #include "lldb/lldb-private-forward.h"
 #include "lldb/lldb-public.h"
 
+#include <vector>
+
 namespace lldb_private {
 
 /// \class RecognizedStackFrame
@@ -95,37 +97,45 @@ class ScriptedStackFrameRecognizer : public StackFrameRecognizer {
   operator=(const ScriptedStackFrameRecognizer &) = delete;
 };
 
-/// \class StackFrameRecognizerManager
-///
-/// Static class that provides a registry of known stack frame recognizers.
-/// Has static methods to add, enumerate, remove, query and invoke recognizers.
-
+/// Class that provides a registry of known stack frame recognizers.
 class StackFrameRecognizerManager {
 public:
-  static void AddRecognizer(lldb::StackFrameRecognizerSP recognizer,
-                            ConstString module,
-                            llvm::ArrayRef<ConstString> symbols,
-                            bool first_instruction_only = true);
+  void AddRecognizer(lldb::StackFrameRecognizerSP recognizer,
+                     ConstString module, llvm::ArrayRef<ConstString> symbols,
+                     bool first_instruction_only = true);
+
+  void AddRecognizer(lldb::StackFrameRecognizerSP recognizer,
+                     lldb::RegularExpressionSP module,
+                     lldb::RegularExpressionSP symbol,
+                     bool first_instruction_only = true);
 
-  static void AddRecognizer(lldb::StackFrameRecognizerSP recognizer,
-                            lldb::RegularExpressionSP module,
-                            lldb::RegularExpressionSP symbol,
-                            bool first_instruction_only = true);
+  void ForEach(std::function<
+               void(uint32_t recognizer_id, std::string recognizer_name,
+                    std::string module, llvm::ArrayRef<ConstString> symbols,
+                    bool regexp)> const &callback);
 
-  static void
-  ForEach(std::function<void(uint32_t recognizer_id,
-                             std::string recognizer_name, std::string module,
-                             llvm::ArrayRef<ConstString> symbols,
-                             bool regexp)> const &callback);
+  bool RemoveRecognizerWithID(uint32_t recognizer_id);
 
-  static bool RemoveRecognizerWithID(uint32_t recognizer_id);
+  void RemoveAllRecognizers();
 
-  static void RemoveAllRecognizers();
+  lldb::StackFrameRecognizerSP GetRecognizerForFrame(lldb::StackFrameSP frame);
 
-  static lldb::StackFrameRecognizerSP GetRecognizerForFrame(
-      lldb::StackFrameSP frame);
+  lldb::RecognizedStackFrameSP RecognizeFrame(lldb::StackFrameSP frame);
+
+private:
+  struct RegisteredEntry {
+    uint32_t recognizer_id;
+    bool deleted;
+    lldb::StackFrameRecognizerSP recognizer;
+    bool is_regexp;
+    ConstString module;
+    lldb::RegularExpressionSP module_regexp;
+    std::vector<ConstString> symbols;
+    lldb::RegularExpressionSP symbol_regexp;
+    bool first_instruction_only;
+  };
 
-  static lldb::RecognizedStackFrameSP RecognizeFrame(lldb::StackFrameSP frame);
+  std::deque<RegisteredEntry> m_recognizers;
 };
 
 /// \class ValueObjectRecognizerSynthesizedValue
diff --git a/lldb/include/lldb/Target/Target.h b/lldb/include/lldb/Target/Target.h
index 280ce6359c72f..c12c68d292b8b 100644
--- a/lldb/include/lldb/Target/Target.h
+++ b/lldb/include/lldb/Target/Target.h
@@ -1251,6 +1251,10 @@ class Target : public std::enable_shared_from_this<Target>,
 
   void SetREPL(lldb::LanguageType language, lldb::REPLSP repl_sp);
 
+  StackFrameRecognizerManager &GetFrameRecognizerManager() {
+    return *m_frame_recognizer_manager_up;
+  }
+
 protected:
   /// Implementing of ModuleList::Notifier.
 
@@ -1325,6 +1329,8 @@ class Target : public std::enable_shared_from_this<Target>,
   bool m_suppress_stop_hooks;
   bool m_is_dummy_target;
   unsigned m_next_persistent_variable_index = 0;
+  /// Stores the frame recognizers of this target.
+  lldb::StackFrameRecognizerManagerUP m_frame_recognizer_manager_up;
 
   static void ImageSearchPathsChanged(const PathMappingList &path_list,
                                       void *baton);
diff --git a/lldb/include/lldb/lldb-forward.h b/lldb/include/lldb/lldb-forward.h
index 4fd2a07dd6165..478ed1a06443e 100644
--- a/lldb/include/lldb/lldb-forward.h
+++ b/lldb/include/lldb/lldb-forward.h
@@ -402,6 +402,8 @@ typedef std::weak_ptr<lldb_private::StackFrame> StackFrameWP;
 typedef std::shared_ptr<lldb_private::StackFrameList> StackFrameListSP;
 typedef std::shared_ptr<lldb_private::StackFrameRecognizer>
     StackFrameRecognizerSP;
+typedef std::unique_ptr<lldb_private::StackFrameRecognizerManager>
+    StackFrameRecognizerManagerUP;
 typedef std::shared_ptr<lldb_private::StopInfo> StopInfoSP;
 typedef std::shared_ptr<lldb_private::StoppointLocation> StoppointLocationSP;
 typedef std::shared_ptr<lldb_private::Stream> StreamSP;
diff --git a/lldb/source/Commands/CommandObjectFrame.cpp b/lldb/source/Commands/CommandObjectFrame.cpp
index 6ebad9b5c488d..b42020d76751e 100644
--- a/lldb/source/Commands/CommandObjectFrame.cpp
+++ b/lldb/source/Commands/CommandObjectFrame.cpp
@@ -898,12 +898,14 @@ bool CommandObjectFrameRecognizerAdd::DoExecute(Args &command,
         RegularExpressionSP(new RegularExpression(m_options.m_module));
     auto func =
         RegularExpressionSP(new RegularExpression(m_options.m_symbols.front()));
-    StackFrameRecognizerManager::AddRecognizer(recognizer_sp, module, func);
+    GetSelectedOrDummyTarget().GetFrameRecognizerManager().AddRecognizer(
+        recognizer_sp, module, func);
   } else {
     auto module = ConstString(m_options.m_module);
     std::vector<ConstString> symbols(m_options.m_symbols.begin(),
                                      m_options.m_symbols.end());
-    StackFrameRecognizerManager::AddRecognizer(recognizer_sp, module, symbols);
+    GetSelectedOrDummyTarget().GetFrameRecognizerManager().AddRecognizer(
+        recognizer_sp, module, symbols);
   }
 #endif
 
@@ -921,7 +923,9 @@ class CommandObjectFrameRecognizerClear : public CommandObjectParsed {
 
 protected:
   bool DoExecute(Args &command, CommandReturnObject &result) override {
-    StackFrameRecognizerManager::RemoveAllRecognizers();
+    GetSelectedOrDummyTarget()
+        .GetFrameRecognizerManager()
+        .RemoveAllRecognizers();
     result.SetStatus(eReturnStatusSuccessFinishResult);
     return result.Succeeded();
   }
@@ -941,7 +945,7 @@ class CommandObjectFrameRecognizerDelete : public CommandObjectParsed {
     if (request.GetCursorIndex() != 0)
       return;
 
-    StackFrameRecognizerManager::ForEach(
+    GetSelectedOrDummyTarget().GetFrameRecognizerManager().ForEach(
         [&request](uint32_t rid, std::string rname, std::string module,
                    llvm::ArrayRef<lldb_private::ConstString> symbols,
                    bool regexp) {
@@ -973,7 +977,9 @@ class CommandObjectFrameRecognizerDelete : public CommandObjectParsed {
         return false;
       }
 
-      StackFrameRecognizerManager::RemoveAllRecognizers();
+      GetSelectedOrDummyTarget()
+          .GetFrameRecognizerManager()
+          .RemoveAllRecognizers();
       result.SetStatus(eReturnStatusSuccessFinishResult);
       return result.Succeeded();
     }
@@ -993,7 +999,9 @@ class CommandObjectFrameRecognizerDelete : public CommandObjectParsed {
       return false;
     }
 
-    StackFrameRecognizerManager::RemoveRecognizerWithID(recognizer_id);
+    GetSelectedOrDummyTarget()
+        .GetFrameRecognizerManager()
+        .RemoveRecognizerWithID(recognizer_id);
     result.SetStatus(eReturnStatusSuccessFinishResult);
     return result.Succeeded();
   }
@@ -1011,7 +1019,7 @@ class CommandObjectFrameRecognizerList : public CommandObjectParsed {
 protected:
   bool DoExecute(Args &command, CommandReturnObject &result) override {
     bool any_printed = false;
-    StackFrameRecognizerManager::ForEach(
+    GetSelectedOrDummyTarget().GetFrameRecognizerManager().ForEach(
         [&result, &any_printed](
             uint32_t recognizer_id, std::string name, std::string module,
             llvm::ArrayRef<ConstString> symbols, bool regexp) {
@@ -1106,8 +1114,9 @@ class CommandObjectFrameRecognizerInfo : public CommandObjectParsed {
       return false;
     }
 
-    auto recognizer =
-        StackFrameRecognizerManager::GetRecognizerForFrame(frame_sp);
+    auto recognizer = GetSelectedOrDummyTarget()
+                          .GetFrameRecognizerManager()
+                          .GetRecognizerForFrame(frame_sp);
 
     Stream &output_stream = result.GetOutputStream();
     output_stream.Printf("frame %d ", frame_index);
diff --git a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
index ac9a093940217..ae77dfeb4ad4b 100644
--- a/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
+++ b/lldb/source/Plugins/LanguageRuntime/ObjC/AppleObjCRuntime/AppleObjCRuntimeV2.cpp
@@ -407,7 +407,7 @@ ExtractRuntimeGlobalSymbol(Process *process, ConstString name,
   }
 }
 
-static void RegisterObjCExceptionRecognizer();
+static void RegisterObjCExceptionRecognizer(Process *process);
 
 AppleObjCRuntimeV2::AppleObjCRuntimeV2(Process *process,
                                        const ModuleSP &objc_module_sp)
@@ -429,7 +429,7 @@ AppleObjCRuntimeV2::AppleObjCRuntimeV2(Process *process,
   m_has_object_getClass =
       (objc_module_sp->FindFirstSymbolWithNameAndType(
            g_gdb_object_getClass, eSymbolTypeCode) != nullptr);
-  RegisterObjCExceptionRecognizer();
+  RegisterObjCExceptionRecognizer(process);
 }
 
 bool AppleObjCRuntimeV2::GetDynamicTypeAndAddress(
@@ -2711,16 +2711,14 @@ class ObjCExceptionThrowFrameRecognizer : public StackFrameRecognizer {
   };
 };
 
-static void RegisterObjCExceptionRecognizer() {
-  static llvm::once_flag g_once_flag;
-  llvm::call_once(g_once_flag, []() {
-    FileSpec module;
-    ConstString function;
-    std::tie(module, function) = AppleObjCRuntime::GetExceptionThrowLocation();
-    std::vector<ConstString> symbols = {function};
-    StackFrameRecognizerManager::AddRecognizer(
-        StackFrameRecognizerSP(new ObjCExceptionThrowFrameRecognizer()),
-        module.GetFilename(), symbols,
-        /*first_instruction_only*/ true);
-  });
+static void RegisterObjCExceptionRecognizer(Process *process) {
+  FileSpec module;
+  ConstString function;
+  std::tie(module, function) = AppleObjCRuntime::GetExceptionThrowLocation();
+  std::vector<ConstString> symbols = {function};
+
+  process->GetTarget().GetFrameRecognizerManager().AddRecognizer(
+      StackFrameRecognizerSP(new ObjCExceptionThrowFrameRecognizer()),
+      module.GetFilename(), symbols,
+      /*first_instruction_only*/ true);
 }
diff --git a/lldb/source/Target/AssertFrameRecognizer.cpp b/lldb/source/Target/AssertFrameRecognizer.cpp
index d87459ac2fdd4..fe5fa3a362f82 100644
--- a/lldb/source/Target/AssertFrameRecognizer.cpp
+++ b/lldb/source/Target/AssertFrameRecognizer.cpp
@@ -86,20 +86,17 @@ bool GetAssertLocation(llvm::Triple::OSType os, SymbolLocation &location) {
 }
 
 void RegisterAssertFrameRecognizer(Process *process) {
-  static llvm::once_flag g_once_flag;
-  llvm::call_once(g_once_flag, [process]() {
-    Target &target = process->GetTarget();
-    llvm::Triple::OSType os = target.GetArchitecture().GetTriple().getOS();
-    SymbolLocation location;
-
-    if (!GetAbortLocation(os, location))
-      return;
-
-    StackFrameRecognizerManager::AddRecognizer(
-        StackFrameRecognizerSP(new AssertFrameRecognizer()),
-        location.module_spec.GetFilename(), location.symbols,
-        /*first_instruction_only*/ false);
-  });
+  Target &target = process->GetTarget();
+  llvm::Triple::OSType os = target.GetArchitecture().GetTriple().getOS();
+  SymbolLocation location;
+
+  if (!GetAbortLocation(os, location))
+    return;
+
+  target.GetFrameRecognizerManager().AddRecognizer(
+      StackFrameRecognizerSP(new AssertFrameRecognizer()),
+      location.module_spec.GetFilename(), location.symbols,
+      /*first_instruction_only*/ false);
 }
 
 } // namespace lldb_private
diff --git a/lldb/source/Target/StackFrame.cpp b/lldb/source/Target/StackFrame.cpp
index 3d6cc5dc90b30..098aed9cd8125 100644
--- a/lldb/source/Target/StackFrame.cpp
+++ b/lldb/source/Target/StackFrame.cpp
@@ -1956,8 +1956,11 @@ bool StackFrame::GetStatus(Stream &strm, bool show_frame_info, bool show_source,
 
 RecognizedStackFrameSP StackFrame::GetRecognizedFrame() {
   if (!m_recognized_frame_sp) {
-    m_recognized_frame_sp =
-        StackFrameRecognizerManager::RecognizeFrame(CalculateStackFrame());
+    m_recognized_frame_sp = GetThread()
+                                ->GetProcess()
+                                ->GetTarget()
+                                .GetFrameRecognizerManager()
+                                .RecognizeFrame(CalculateStackFrame());
   }
   return m_recognized_frame_sp;
 }
diff --git a/lldb/source/Target/StackFrameRecognizer.cpp b/lldb/source/Target/StackFrameRecognizer.cpp
index 7dc6e9d1e490a..6fa09a387ad2e 100644
--- a/lldb/source/Target/StackFrameRecognizer.cpp
+++ b/lldb/source/Target/StackFrameRecognizer.cpp
@@ -6,12 +6,11 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include <vector>
+#include "lldb/Target/StackFrameRecognizer.h"
 #include "lldb/Core/Module.h"
 #include "lldb/Interpreter/ScriptInterpreter.h"
 #include "lldb/Symbol/Symbol.h"
 #include "lldb/Target/StackFrame.h"
-#include "lldb/Target/StackFrameRecognizer.h"
 #include "lldb/Utility/RegularExpression.h"
 
 using namespace lldb;
@@ -48,158 +47,106 @@ ScriptedStackFrameRecognizer::RecognizeFrame(lldb::StackFrameSP frame) {
       new ScriptedRecognizedStackFrame(args_synthesized));
 }
 
-class StackFrameRecognizerManagerImpl {
-public:
-  void AddRecognizer(StackFrameRecognizerSP recognizer, ConstString module,
-                     llvm::ArrayRef<ConstString> symbols,
-                     bool first_instruction_only) {
-    m_recognizers.push_front({(uint32_t)m_recognizers.size(), false, recognizer,
-                              false, module, RegularExpressionSP(), symbols,
-                              RegularExpressionSP(), first_instruction_only});
-  }
-
-  void AddRecognizer(StackFrameRecognizerSP recognizer,
-                     RegularExpressionSP module, RegularExpressionSP symbol,
-                     bool first_instruction_only) {
-    m_recognizers.push_front(
-        {(uint32_t)m_recognizers.size(), false, recognizer, true, ConstString(),
-         module, std::vector<ConstString>(), symbol, first_instruction_only});
-  }
-
-  void ForEach(std::function<
-               void(uint32_t recognized_id, std::string recognizer_name,
-                    std::string module, llvm::ArrayRef<ConstString> symbols,
-                    bool regexp)> const &callback) {
-    for (auto entry : m_recognizers) {
-      if (entry.is_regexp) {
-        std::string module_name;
-        std::string symbol_name;
-
-        if (entry.module_regexp)
-          module_name = entry.module_regexp->GetText().str();
-        if (entry.symbol_regexp)
-          symbol_name = entry.symbol_regexp->GetText().str();
-
-        callback(entry.recognizer_id, entry.recognizer->GetName(), module_name,
-                 llvm::makeArrayRef(ConstString(symbol_name)), true);
-
-      } else {
-        callback(entry.recognizer_id, entry.recognizer->GetName(),
-                 entry.module.GetCString(), entry.symbols, false);
-      }
-    }
-  }
-
-  bool RemoveRecognizerWithID(uint32_t recognizer_id) {
-    if (recognizer_id >= m_recognizers.size()) return false;
-    if (m_recognizers[recognizer_id].deleted) return false;
-    m_recognizers[recognizer_id].deleted = true;
-    return true;
-  }
+void StackFrameRecognizerManager::AddRecognizer(
+    StackFrameRecognizerSP recognizer, ConstString module,
+    llvm::ArrayRef<ConstString> symbols, bool first_instruction_only) {
+  m_recognizers.push_front({(uint32_t)m_recognizers.size(), false, recognizer,
+                            false, module, RegularExpressionSP(), symbols,
+                            RegularExpressionSP(), first_instruction_only});
+}
 
-  void RemoveAllRecognizers() {
-    m_recognizers.clear();
-  }
+void StackFrameRecognizerManager::AddRecognizer(
+    StackFrameRecognizerSP recognizer, RegularExpressionSP module,
+    RegularExpressionSP symbol, bool first_instruction_only) {
+  m_recognizers.push_front(
+      {(uint32_t)m_recognizers.size(), false, recognizer, true, ConstString(),
+       module, std::vector<ConstString>(), symbol, first_instruction_only});
+}
 
-  StackFrameRecognizerSP GetRecognizerForFrame(StackFrameSP frame) {
-    const SymbolContext &symctx = frame->GetSymbolContext(
-        eSymbolContextModule | eSymbolContextFunction | eSymbolContextSymbol);
-    ConstString function_name = symctx.GetFunctionName();
-    ModuleSP module_sp = symctx.module_sp;
-    if (!module_sp) return StackFrameRecognizerSP();
-    ConstString module_name = module_sp->GetFileSpec().GetFilename();
-    Symbol *symbol = symctx.symbol;
-    if (!symbol) return StackFrameRecognizerSP();
-    Address start_addr = symbol->GetAddress();
-    Address current_addr = frame->GetFrameCodeAddress();
-
-    for (auto entry : m_recognizers) {
-      if (entry.deleted) continue;
-      if (entry.module)
-        if (entry.module != module_name) continue;
+void StackFrameRecognizerManager::ForEach(
+    const std::function<void(uint32_t, std::string, std::string,
+                             llvm::ArrayRef<ConstString>, bool)> &callback) {
+  for (auto entry : m_recognizers) {
+    if (entry.is_regexp) {
+      std::string module_name;
+      std::string symbol_name;
 
       if (entry.module_regexp)
-        if (!entry.module_regexp->Execute(module_name.GetStringRef())) continue;
-
-      if (!entry.symbols.empty())
-        if (!llvm::is_contained(entry.symbols, function_name))
-          continue;
-
+        module_name = entry.module_regexp->GetText().str();
       if (entry.symbol_regexp)
-        if (!entry.symbol_regexp->Execute(function_name.GetStringRef()))
-          continue;
+        symbol_name = entry.symbol_regexp->GetText().str();
 
-      if (entry.first_instruction_only)
-        if (start_addr != current_addr) continue;
+      callback(entry.recognizer_id, entry.recognizer->GetName(), module_name,
+               llvm::makeArrayRef(ConstString(symbol_name)), true);
 
-      return entry.recognizer;
+    } else {
+      callback(entry.recognizer_id, entry.recognizer->GetName(),
+               entry.module.GetCString(), entry.symbols, false);
     }
-    return StackFrameRecognizerSP();
   }
-
-  RecognizedStackFrameSP RecognizeFrame(StackFrameSP frame) {
-    auto recognizer = GetRecognizerForFrame(frame);
-    if (!recognizer) return RecognizedStackFrameSP();
-    return recognizer->RecognizeFrame(frame);
-  }
-
- private:
-  struct RegisteredEntry {
-    uint32_t recognizer_id;
-    bool deleted;
-    StackFrameRecognizerSP recognizer;
-    bool is_regexp;
-    ConstString module;
-    RegularExpressionSP module_regexp;
-    std::vector<ConstString> symbols;
-    RegularExpressionSP symbol_regexp;
-    bool first_instruction_only;
-  };
-
-  std::deque<RegisteredEntry> m_recognizers;
-};
-
-StackFrameRecognizerManagerImpl &GetStackFrameRecognizerManagerImpl() {
-  static StackFrameRecognizerManagerImpl instance =
-      StackFrameRecognizerManagerImpl();
-  return instance;
 }
 
-void StackFrameRecognizerManager::AddRecognizer(
-    StackFrameRecognizerSP recognizer, ConstString module,
-    llvm::ArrayRef<ConstString> symbols, bool first_instruction_only) {
-  GetStackFrameRecognizerManagerImpl().AddRecognizer(
-      recognizer, module, symbols, first_instruction_only);
+bool StackFrameRecognizerManager::RemoveRecognizerWithID(
+    uint32_t recognizer_id) {
+  if (recognizer_id >= m_recognizers.size())
+    return false;
+  if (m_recognizers[recognizer_id].deleted)
+    return false;
+  m_recognizers[recognizer_id].deleted = true;
+  return true;
 }
 
-void StackFrameRecognizerManager::AddRecognizer(
-    StackFrameRecognizerSP recognizer, RegularExpressionSP module,
-    RegularExpressionSP symbol, bool first_instruction_only) {
-  GetStackFrameRecognizerManagerImpl().AddRecognizer(recognizer, module, symbol,
-                                                     first_instruction_only);
+void StackFrameRecognizerManager::RemoveAllRecognizers() {
+  m_recognizers.clear();
 }
 
-void StackFrameRecognizerManager::ForEach(
-    std::function<void(uint32_t recognized_id, std::string recognizer_name,
-                       std::string module, llvm::ArrayRef<ConstString> symbols,
-                       bool regexp)> const &callback) {
-  GetStackFrameRecognizerManagerImpl().ForEach(callback);
-}
+StackFrameRecognizerSP
+StackFrameRecognizerManager::GetRecognizerForFrame(StackFrameSP frame) {
+  const SymbolContext &symctx = frame->GetSymbolContext(
+      eSymbolContextModule | eSymbolContextFunction | eSymbolContextSymbol);
+  ConstString function_name = symctx.GetFunctionName();
+  ModuleSP module_sp = symctx.module_sp;
+  if (!module_sp)
+    return StackFrameRecognizerSP();
+  ConstString module_name = module_sp->GetFileSpec().GetFilename();
+  Symbol *symbol = symctx.symbol;
+  if (!symbol)
+    return StackFrameRecognizerSP();
+  Address start_addr = symbol->GetAddress();
+  Address current_addr = frame->GetFrameCodeAddress();
 
-void StackFrameRecognizerManager::RemoveAllRecognizers() {
-  GetStackFrameRecognizerManagerImpl().RemoveAllRecognizers();
-}
+  for (auto entry : m_recognizers) {
+    if (entry.deleted)
+      continue;
+    if (entry.module)
+      if (entry.module != module_name)
+        continue;
 
-bool StackFrameRecognizerManager::RemoveRecognizerWithID(uint32_t recognizer_id) {
-  return GetStackFrameRecognizerManagerImpl().RemoveRecognizerWithID(recognizer_id);
-}
+    if (entry.module_regexp)
+      if (!entry.module_regexp->Execute(module_name.GetStringRef()))
+        continue;
 
-RecognizedStackFrameSP StackFrameRecognizerManager::RecognizeFrame(
-    StackFrameSP frame) {
-  return GetStackFrameRecognizerManagerImpl().RecognizeFrame(frame);
+    if (!entry.symbols.empty())
+      if (!llvm::is_contained(entry.symbols, function_name))
+        continue;
+
+    if (entry.symbol_regexp)
+      if (!entry.symbol_regexp->Execute(function_name.GetStringRef()))
+        continue;
+
+    if (entry.first_instruction_only)
+      if (start_addr != current_addr)
+        continue;
+
+    return entry.recognizer;
+  }
+  return StackFrameRecognizerSP();
 }
 
-StackFrameRecognizerSP StackFrameRecognizerManager::GetRecognizerForFrame(
-    lldb::StackFrameSP frame) {
-  return GetStackFrameRecognizerManagerImpl().GetRecognizerForFrame(frame);
+RecognizedStackFrameSP
+StackFrameRecognizerManager::RecognizeFrame(StackFrameSP frame) {
+  auto recognizer = GetRecognizerForFrame(frame);
+  if (!recognizer)
+    return RecognizedStackFrameSP();
+  return recognizer->RecognizeFrame(frame);
 }
diff --git a/lldb/source/Target/Target.cpp b/lldb/source/Target/Target.cpp
index dad56376005c6..364997f139b13 100644
--- a/lldb/source/Target/Target.cpp
+++ b/lldb/source/Target/Target.cpp
@@ -45,6 +45,7 @@
 #include "lldb/Target/Process.h"
 #include "lldb/Target/SectionLoadList.h"
 #include "lldb/Target/StackFrame.h"
+#include "lldb/Target/StackFrameRecognizer.h"
 #include "lldb/Target/SystemRuntime.h"
 #include "lldb/Target/Thread.h"
 #include "lldb/Target/ThreadSpec.h"
@@ -94,6 +95,8 @@ Target::Target(Debugger &debugger, const ArchSpec &target_arch,
       m_source_manager_up(), m_stop_hooks(), m_stop_hook_next_id(0),
       m_valid(true), m_suppress_stop_hooks(false),
       m_is_dummy_target(is_dummy_target),
+      m_frame_recognizer_manager_up(
+          std::make_unique<StackFrameRecognizerManager>()),
       m_stats_storage(static_cast<int>(StatisticKind::StatisticMax))
 
 {
@@ -143,6 +146,9 @@ void Target::PrimeFromDummyTarget(Target *target) {
     BreakpointName *new_bp_name = new BreakpointName(*bp_name_entry.second);
     AddBreakpointName(new_bp_name);
   }
+
+  m_frame_recognizer_manager_up = std::make_unique<StackFrameRecognizerManager>(
+      *target->m_frame_recognizer_manager_up);
 }
 
 void Target::Dump(Stream *s, lldb::DescriptionLevel description_level) {
diff --git a/lldb/test/API/commands/frame/recognizer/TestFrameRecognizer.py b/lldb/test/API/commands/frame/recognizer/TestFrameRecognizer.py
index 218c7e85aae74..f807937f2f17b 100644
--- a/lldb/test/API/commands/frame/recognizer/TestFrameRecognizer.py
+++ b/lldb/test/API/commands/frame/recognizer/TestFrameRecognizer.py
@@ -145,6 +145,50 @@ def test_frame_recognizer_multi_symbol(self):
         self.expect("frame recognizer info 0",
                     substrs=['frame 0 is recognized by recognizer.MyFrameRecognizer'])
 
+    @skipUnlessDarwin
+    def test_frame_recognizer_target_specific(self):
+        self.build()
+        exe = self.getBuildArtifact("a.out")
+
+        # Clear internal & plugins recognizers that get initialized at launch
+        self.runCmd("frame recognizer clear")
+
+        # Create a target.
+        target, process, thread, _ = lldbutil.run_to_name_breakpoint(self, "foo",
+                                                                 exe_name = exe)
+
+        self.runCmd("command script import " + os.path.join(self.getSourceDir(), "recognizer.py"))
+
+        # Check that this doesn't contain our own FrameRecognizer somehow.
+        self.expect("frame recognizer list",
+                    matching=False, substrs=['MyFrameRecognizer'])
+
+        # Add a frame recognizer in that target.
+        self.runCmd("frame recognizer add -l recognizer.MyFrameRecognizer -s a.out -n foo -n bar")
+
+        self.expect("frame recognizer list",
+                    substrs=['recognizer.MyFrameRecognizer, module a.out, symbol foo, symbol bar'])
+
+        self.expect("frame recognizer info 0",
+                    substrs=['frame 0 is recognized by recognizer.MyFrameRecognizer'])
+
+        # Create a second target. That one shouldn't have the frame recognizer.
+        target, process, thread, _ = lldbutil.run_to_name_breakpoint(self, "bar",
+                                                                 exe_name = exe)
+
+        self.expect("frame recognizer info 0",
+                    substrs=['frame 0 not recognized by any recognizer'])
+
+        # Add a frame recognizer to the new target.
+        self.runCmd("frame recognizer add -l recognizer.MyFrameRecognizer -s a.out -n bar")
+
+        self.expect("frame recognizer list",
+                    substrs=['recognizer.MyFrameRecognizer, module a.out, symbol bar'])
+
+        # Now the new target should also recognize the frame.
+        self.expect("frame recognizer info 0",
+                    substrs=['frame 0 is recognized by recognizer.MyFrameRecognizer'])
+
     @no_debug_info_test
     def test_frame_recognizer_delete_invalid_arg(self):
         self.expect("frame recognizer delete a", error=True,
diff --git a/lldb/unittests/Target/StackFrameRecognizerTest.cpp b/lldb/unittests/Target/StackFrameRecognizerTest.cpp
index 067a56a19902b..bf458b3b14145 100644
--- a/lldb/unittests/Target/StackFrameRecognizerTest.cpp
+++ b/lldb/unittests/Target/StackFrameRecognizerTest.cpp
@@ -51,18 +51,14 @@ class DummyStackFrameRecognizer : public StackFrameRecognizer {
   std::string GetName() override { return "Dummy StackFrame Recognizer"; }
 };
 
-void RegisterDummyStackFrameRecognizer() {
-  static llvm::once_flag g_once_flag;
+void RegisterDummyStackFrameRecognizer(StackFrameRecognizerManager &manager) {
+  RegularExpressionSP module_regex_sp = nullptr;
+  RegularExpressionSP symbol_regex_sp(new RegularExpression("boom"));
 
-  llvm::call_once(g_once_flag, []() {
-    RegularExpressionSP module_regex_sp = nullptr;
-    RegularExpressionSP symbol_regex_sp(new RegularExpression("boom"));
+  StackFrameRecognizerSP dummy_recognizer_sp(new DummyStackFrameRecognizer());
 
-    StackFrameRecognizerSP dummy_recognizer_sp(new DummyStackFrameRecognizer());
-
-    StackFrameRecognizerManager::AddRecognizer(
-        dummy_recognizer_sp, module_regex_sp, symbol_regex_sp, false);
-  });
+  manager.AddRecognizer(dummy_recognizer_sp, module_regex_sp, symbol_regex_sp,
+                        false);
 }
 
 } // namespace
@@ -71,13 +67,15 @@ TEST_F(StackFrameRecognizerTest, NullModuleRegex) {
   DebuggerSP debugger_sp = Debugger::CreateInstance();
   ASSERT_TRUE(debugger_sp);
 
-  RegisterDummyStackFrameRecognizer();
+  StackFrameRecognizerManager manager;
+
+  RegisterDummyStackFrameRecognizer(manager);
 
   bool any_printed = false;
-  StackFrameRecognizerManager::ForEach(
-      [&any_printed](uint32_t recognizer_id, std::string name,
-                     std::string function, llvm::ArrayRef<ConstString> symbols,
-                     bool regexp) { any_printed = true; });
+  manager.ForEach([&any_printed](uint32_t recognizer_id, std::string name,
+                                 std::string function,
+                                 llvm::ArrayRef<ConstString> symbols,
+                                 bool regexp) { any_printed = true; });
 
   EXPECT_TRUE(any_printed);
 }

From 3e0ccf9a9075a3c6eb1d6cb750c77eb71167c2e4 Mon Sep 17 00:00:00 2001
From: Simon Wallis <simon.wallis2@arm.com>
Date: Fri, 17 Jul 2020 08:26:30 +0100
Subject: [PATCH 598/771] [ARM] halfword store hits llvm_unreachable with
 big-endian

Summary:
[ARM] halfword store hits llvm_unreachable with big-endian

Provide missing case in getFixupKindContainerSizeBytes().

This stops execution reaching llvm_unreachable("Unknown fixup kind!")

D83947

Reviewers: olista01, ostannard

Reviewed By: ostannard

Subscribers: ostannard, kristof.beyls, hiraditya, danielkiss, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83947

Change-Id: I598aa1fb51fd1c6f424c557c85d6df6d1958bc62
---
 llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 1 +
 llvm/test/CodeGen/ARM/store_half.ll                | 9 +++++++++
 2 files changed, 10 insertions(+)
 create mode 100644 llvm/test/CodeGen/ARM/store_half.ll

diff --git a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
index 9ad595c016c4c..b02aef3c338b8 100644
--- a/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
+++ b/llvm/lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp
@@ -1010,6 +1010,7 @@ static unsigned getFixupKindContainerSizeBytes(unsigned Kind) {
   case ARM::fixup_t2_condbranch:
   case ARM::fixup_t2_uncondbranch:
   case ARM::fixup_t2_pcrel_10:
+  case ARM::fixup_t2_pcrel_9:
   case ARM::fixup_t2_adr_pcrel_12:
   case ARM::fixup_arm_thumb_bl:
   case ARM::fixup_arm_thumb_blx:
diff --git a/llvm/test/CodeGen/ARM/store_half.ll b/llvm/test/CodeGen/ARM/store_half.ll
new file mode 100644
index 0000000000000..c182f9c3f7664
--- /dev/null
+++ b/llvm/test/CodeGen/ARM/store_half.ll
@@ -0,0 +1,9 @@
+; RUN: llc < %s -mtriple=thumbebv8.2a-arm-none-eabi -mattr=+fullfp16 -filetype=obj -o /dev/null
+; RUN: llc < %s -mtriple=thumbv8.2a-arm-none-eabi -mattr=+fullfp16 -filetype=obj -o /dev/null
+; RUN: llc < %s -mtriple=armebv8.2a-arm-none-eabi -mattr=+fullfp16 -filetype=obj -o /dev/null
+; RUN: llc < %s -mtriple=armv8.2a-arm-none-eabi -mattr=+fullfp16 -filetype=obj -o /dev/null
+
+define void @woah(half* %waythere) {
+  store half 0xHE110, half* %waythere
+  ret void
+}

From 0db3ac33540ee6a846d68472a439385fbf219fb6 Mon Sep 17 00:00:00 2001
From: Rainer Orth <ro@gcc.gnu.org>
Date: Fri, 17 Jul 2020 10:04:19 +0200
Subject: [PATCH 599/771] [compiler-rt][asan] Define MemToShadowSize for
 sparc64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

[compiler-rt][asan][hwasan] Refactor shadow setup into sanitizer_common (NFCI) <https://reviews.llvm.org/D83247>
broke the Solaris/sparcv9 <http://lab.llvm.org:8014/builders/clang-solaris11-sparcv9/builds/6623> build:

  FAILED: projects/compiler-rt/lib/asan/CMakeFiles/RTAsan.sparcv9.dir/asan_linux.cpp.o
  /opt/llvm-buildbot/bin/c++  -D_DEBUG -D_FILE_OFFSET_BITS=64 -D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS -D__STDC_LIMIT_MACROS -Iprojects/compiler-rt/lib/asan -I/opt/llvm-buildbot/home/solaris11-sparcv9/clang-solaris11-sparcv9/llvm/compiler-rt/lib/asan -Iinclude -I/opt/llvm-buildbot/home/solaris11-sparcv9/clang-solaris11-sparcv9/llvm/llvm/include -I/opt/llvm-buildbot/home/solaris11-sparcv9/clang-solaris11-sparcv9/llvm/llvm/include/llvm/Support/Solaris -I/opt/llvm-buildbot/home/solaris11-sparcv9/clang-solaris11-sparcv9/llvm/compiler-rt/lib/asan/.. -fPIC -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wno-maybe-uninitialized -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wno-comment -fdiagnostics-color -ffunction-sections -fdata-sections -Wall -std=c++14 -Wno-unused-parameter -O3     -m64 -fPIC -fno-builtin -fno-exceptions -fomit-frame-pointer -funwind-tables -fno-stack-protector -fvisibility=hidden -fno-lto -O3 -g -Wno-variadic-macros -Wno-non-virtual-dtor -fno-rtti -UNDEBUG -std=c++14 -MD -MT projects/compiler-rt/lib/asan/CMakeFiles/RTAsan.sparcv9.dir/asan_linux.cpp.o -MF projects/compiler-rt/lib/asan/CMakeFiles/RTAsan.sparcv9.dir/asan_linux.cpp.o.d -o projects/compiler-rt/lib/asan/CMakeFiles/RTAsan.sparcv9.dir/asan_linux.cpp.o -c /opt/llvm-buildbot/home/solaris11-sparcv9/clang-solaris11-sparcv9/llvm/compiler-rt/lib/asan/asan_linux.cpp
  /opt/llvm-buildbot/home/solaris11-sparcv9/clang-solaris11-sparcv9/llvm/compiler-rt/lib/asan/asan_linux.cpp: In function ‘__sanitizer::uptr __asan::FindDynamicShadowStart()’:
  /opt/llvm-buildbot/home/solaris11-sparcv9/clang-solaris11-sparcv9/llvm/compiler-rt/lib/asan/asan_linux.cpp:103:28: error: ‘MemToShadowSize’ was not declared in this scope
     uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd);
                              ^~~~~~~~~~~~~~~
  /opt/llvm-buildbot/home/solaris11-sparcv9/clang-solaris11-sparcv9/llvm/compiler-rt/lib/asan/asan_linux.cpp:103:28: note: suggested alternative: ‘MemToShadow’
     uptr shadow_size_bytes = MemToShadowSize(kHighMemEnd);
                              ^~~~~~~~~~~~~~~
                              MemToShadow

Fixed by also definining `MemToShadowSize` in `compiler-rt/lib/asan/asan_mapping_sparc64.h`.

Tested on `sparcv9-sun-solaris2.11`.

Differential Revision: https://reviews.llvm.org/D84004
---
 compiler-rt/lib/asan/asan_mapping_sparc64.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler-rt/lib/asan/asan_mapping_sparc64.h b/compiler-rt/lib/asan/asan_mapping_sparc64.h
index 432a1816f797d..167ef154e79bd 100644
--- a/compiler-rt/lib/asan/asan_mapping_sparc64.h
+++ b/compiler-rt/lib/asan/asan_mapping_sparc64.h
@@ -61,6 +61,7 @@
 
 namespace __asan {
 
+static inline uptr MemToShadowSize(uptr size) { return size >> SHADOW_SCALE; }
 static inline bool AddrIsInLowMem(uptr a) {
   PROFILE_ASAN_MAPPING();
   return a <= kLowMemEnd;

From 785d41a261d136b64ab6c15c5d35f2adc5ad53e3 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 17 Jul 2020 08:53:56 +0200
Subject: [PATCH 600/771] [TSan] Add option for emitting compound read-write
 instrumentation

This adds option -tsan-compound-read-before-write to emit different
instrumentation for the write if the read before that write is omitted
from instrumentation. The default TSan runtime currently does not
support the different instrumentation, and the option is disabled by
default.

Alternative runtimes, such as the Kernel Concurrency Sanitizer (KCSAN)
can make use of the feature. Indeed, the initial motivation is for use
in KCSAN as it was determined that due to the Linux kernel having a
large number of unaddressed data races, it makes sense to improve
performance and reporting by distinguishing compounded operations. E.g.
the compounded instrumentation is typically emitted for compound
operations such as ++, +=, |=, etc. By emitting different reports, such
data races can easily be noticed, and also automatically bucketed
differently by CI systems.

Reviewed By: dvyukov, glider

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83867
---
 .../Instrumentation/ThreadSanitizer.cpp       | 180 ++++++++++++------
 .../ThreadSanitizer/read_before_write.ll      |  63 +++++-
 2 files changed, 177 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
index c911b37afac7e..8ce12c514f0bc 100644
--- a/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/ThreadSanitizer.cpp
@@ -19,7 +19,8 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Instrumentation/ThreadSanitizer.h"
-#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
@@ -52,30 +53,36 @@ using namespace llvm;
 
 #define DEBUG_TYPE "tsan"
 
-static cl::opt<bool>  ClInstrumentMemoryAccesses(
+static cl::opt<bool> ClInstrumentMemoryAccesses(
     "tsan-instrument-memory-accesses", cl::init(true),
     cl::desc("Instrument memory accesses"), cl::Hidden);
-static cl::opt<bool>  ClInstrumentFuncEntryExit(
-    "tsan-instrument-func-entry-exit", cl::init(true),
-    cl::desc("Instrument function entry and exit"), cl::Hidden);
-static cl::opt<bool>  ClHandleCxxExceptions(
+static cl::opt<bool>
+    ClInstrumentFuncEntryExit("tsan-instrument-func-entry-exit", cl::init(true),
+                              cl::desc("Instrument function entry and exit"),
+                              cl::Hidden);
+static cl::opt<bool> ClHandleCxxExceptions(
     "tsan-handle-cxx-exceptions", cl::init(true),
     cl::desc("Handle C++ exceptions (insert cleanup blocks for unwinding)"),
     cl::Hidden);
-static cl::opt<bool>  ClInstrumentAtomics(
-    "tsan-instrument-atomics", cl::init(true),
-    cl::desc("Instrument atomics"), cl::Hidden);
-static cl::opt<bool>  ClInstrumentMemIntrinsics(
+static cl::opt<bool> ClInstrumentAtomics("tsan-instrument-atomics",
+                                         cl::init(true),
+                                         cl::desc("Instrument atomics"),
+                                         cl::Hidden);
+static cl::opt<bool> ClInstrumentMemIntrinsics(
     "tsan-instrument-memintrinsics", cl::init(true),
     cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden);
-static cl::opt<bool>  ClDistinguishVolatile(
+static cl::opt<bool> ClDistinguishVolatile(
     "tsan-distinguish-volatile", cl::init(false),
     cl::desc("Emit special instrumentation for accesses to volatiles"),
     cl::Hidden);
-static cl::opt<bool>  ClInstrumentReadBeforeWrite(
+static cl::opt<bool> ClInstrumentReadBeforeWrite(
     "tsan-instrument-read-before-write", cl::init(false),
     cl::desc("Do not eliminate read instrumentation for read-before-writes"),
     cl::Hidden);
+static cl::opt<bool> ClCompoundReadBeforeWrite(
+    "tsan-compound-read-before-write", cl::init(false),
+    cl::desc("Emit special compound instrumentation for reads-before-writes"),
+    cl::Hidden);
 
 STATISTIC(NumInstrumentedReads, "Number of instrumented reads");
 STATISTIC(NumInstrumentedWrites, "Number of instrumented writes");
@@ -101,15 +108,37 @@ namespace {
 /// ensures the __tsan_init function is in the list of global constructors for
 /// the module.
 struct ThreadSanitizer {
+  ThreadSanitizer() {
+    // Sanity check options and warn user.
+    if (ClInstrumentReadBeforeWrite && ClCompoundReadBeforeWrite) {
+      errs()
+          << "warning: Option -tsan-compound-read-before-write has no effect "
+             "when -tsan-instrument-read-before-write is set.\n";
+    }
+  }
+
   bool sanitizeFunction(Function &F, const TargetLibraryInfo &TLI);
 
 private:
+  // Internal Instruction wrapper that contains more information about the
+  // Instruction from prior analysis.
+  struct InstructionInfo {
+    // Instrumentation emitted for this instruction is for a compounded set of
+    // read and write operations in the same basic block.
+    static constexpr unsigned kCompoundRW = (1U << 0);
+
+    explicit InstructionInfo(Instruction *Inst) : Inst(Inst) {}
+
+    Instruction *Inst;
+    unsigned Flags = 0;
+  };
+
   void initialize(Module &M);
-  bool instrumentLoadOrStore(Instruction *I, const DataLayout &DL);
+  bool instrumentLoadOrStore(const InstructionInfo &II, const DataLayout &DL);
   bool instrumentAtomic(Instruction *I, const DataLayout &DL);
   bool instrumentMemIntrinsic(Instruction *I);
   void chooseInstructionsToInstrument(SmallVectorImpl<Instruction *> &Local,
-                                      SmallVectorImpl<Instruction *> &All,
+                                      SmallVectorImpl<InstructionInfo> &All,
                                       const DataLayout &DL);
   bool addrPointsToConstantData(Value *Addr);
   int getMemoryAccessFuncIndex(Value *Addr, const DataLayout &DL);
@@ -130,6 +159,8 @@ struct ThreadSanitizer {
   FunctionCallee TsanVolatileWrite[kNumberOfAccessSizes];
   FunctionCallee TsanUnalignedVolatileRead[kNumberOfAccessSizes];
   FunctionCallee TsanUnalignedVolatileWrite[kNumberOfAccessSizes];
+  FunctionCallee TsanCompoundRW[kNumberOfAccessSizes];
+  FunctionCallee TsanUnalignedCompoundRW[kNumberOfAccessSizes];
   FunctionCallee TsanAtomicLoad[kNumberOfAccessSizes];
   FunctionCallee TsanAtomicStore[kNumberOfAccessSizes];
   FunctionCallee TsanAtomicRMW[AtomicRMWInst::LAST_BINOP + 1]
@@ -268,6 +299,15 @@ void ThreadSanitizer::initialize(Module &M) {
     TsanUnalignedVolatileWrite[i] = M.getOrInsertFunction(
         UnalignedVolatileWriteName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
 
+    SmallString<64> CompoundRWName("__tsan_read_write" + ByteSizeStr);
+    TsanCompoundRW[i] = M.getOrInsertFunction(
+        CompoundRWName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
+    SmallString<64> UnalignedCompoundRWName("__tsan_unaligned_read_write" +
+                                            ByteSizeStr);
+    TsanUnalignedCompoundRW[i] = M.getOrInsertFunction(
+        UnalignedCompoundRWName, Attr, IRB.getVoidTy(), IRB.getInt8PtrTy());
+
     Type *Ty = Type::getIntNTy(M.getContext(), BitSize);
     Type *PtrTy = Ty->getPointerTo();
     SmallString<32> AtomicLoadName("__tsan_atomic" + BitSizeStr + "_load");
@@ -402,34 +442,42 @@ bool ThreadSanitizer::addrPointsToConstantData(Value *Addr) {
 // 'Local' is a vector of insns within the same BB (no calls between).
 // 'All' is a vector of insns that will be instrumented.
 void ThreadSanitizer::chooseInstructionsToInstrument(
-    SmallVectorImpl<Instruction *> &Local, SmallVectorImpl<Instruction *> &All,
-    const DataLayout &DL) {
-  SmallPtrSet<Value*, 8> WriteTargets;
+    SmallVectorImpl<Instruction *> &Local,
+    SmallVectorImpl<InstructionInfo> &All, const DataLayout &DL) {
+  DenseMap<Value *, size_t> WriteTargets; // Map of addresses to index in All
   // Iterate from the end.
   for (Instruction *I : reverse(Local)) {
-    if (StoreInst *Store = dyn_cast<StoreInst>(I)) {
-      Value *Addr = Store->getPointerOperand();
-      if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr))
-        continue;
-      WriteTargets.insert(Addr);
-    } else {
-      LoadInst *Load = cast<LoadInst>(I);
-      Value *Addr = Load->getPointerOperand();
-      if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr))
-        continue;
-      if (!ClInstrumentReadBeforeWrite && WriteTargets.count(Addr)) {
-        // We will write to this temp, so no reason to analyze the read.
-        NumOmittedReadsBeforeWrite++;
-        continue;
+    const bool IsWrite = isa<StoreInst>(*I);
+    Value *Addr = IsWrite ? cast<StoreInst>(I)->getPointerOperand()
+                          : cast<LoadInst>(I)->getPointerOperand();
+
+    if (!shouldInstrumentReadWriteFromAddress(I->getModule(), Addr))
+      continue;
+
+    if (!IsWrite) {
+      const auto WriteEntry = WriteTargets.find(Addr);
+      if (!ClInstrumentReadBeforeWrite && WriteEntry != WriteTargets.end()) {
+        auto &WI = All[WriteEntry->second];
+        // If we distinguish volatile accesses and if either the read or write
+        // is volatile, do not omit any instrumentation.
+        const bool AnyVolatile =
+            ClDistinguishVolatile && (cast<LoadInst>(I)->isVolatile() ||
+                                      cast<StoreInst>(WI.Inst)->isVolatile());
+        if (!AnyVolatile) {
+          // We will write to this temp, so no reason to analyze the read.
+          // Mark the write instruction as compound.
+          WI.Flags |= InstructionInfo::kCompoundRW;
+          NumOmittedReadsBeforeWrite++;
+          continue;
+        }
       }
+
       if (addrPointsToConstantData(Addr)) {
         // Addr points to some constant data -- it can not race with any writes.
         continue;
       }
     }
-    Value *Addr = isa<StoreInst>(*I)
-        ? cast<StoreInst>(I)->getPointerOperand()
-        : cast<LoadInst>(I)->getPointerOperand();
+
     if (isa<AllocaInst>(GetUnderlyingObject(Addr, DL)) &&
         !PointerMayBeCaptured(Addr, true, true)) {
       // The variable is addressable but not captured, so it cannot be
@@ -438,7 +486,14 @@ void ThreadSanitizer::chooseInstructionsToInstrument(
       NumOmittedNonCaptured++;
       continue;
     }
-    All.push_back(I);
+
+    // Instrument this instruction.
+    All.emplace_back(I);
+    if (IsWrite) {
+      // For read-before-write and compound instrumentation we only need one
+      // write target, and we can override any previous entry if it exists.
+      WriteTargets[Addr] = All.size() - 1;
+    }
   }
   Local.clear();
 }
@@ -479,7 +534,7 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
   if (F.hasFnAttribute(Attribute::Naked))
     return false;
   initialize(*F.getParent());
-  SmallVector<Instruction*, 8> AllLoadsAndStores;
+  SmallVector<InstructionInfo, 8> AllLoadsAndStores;
   SmallVector<Instruction*, 8> LocalLoadsAndStores;
   SmallVector<Instruction*, 8> AtomicAccesses;
   SmallVector<Instruction*, 8> MemIntrinCalls;
@@ -514,8 +569,8 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
 
   // Instrument memory accesses only if we want to report bugs in the function.
   if (ClInstrumentMemoryAccesses && SanitizeFunction)
-    for (auto Inst : AllLoadsAndStores) {
-      Res |= instrumentLoadOrStore(Inst, DL);
+    for (const auto &II : AllLoadsAndStores) {
+      Res |= instrumentLoadOrStore(II, DL);
     }
 
   // Instrument atomic memory accesses in any case (they can be used to
@@ -553,13 +608,12 @@ bool ThreadSanitizer::sanitizeFunction(Function &F,
   return Res;
 }
 
-bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
+bool ThreadSanitizer::instrumentLoadOrStore(const InstructionInfo &II,
                                             const DataLayout &DL) {
-  IRBuilder<> IRB(I);
-  bool IsWrite = isa<StoreInst>(*I);
-  Value *Addr = IsWrite
-      ? cast<StoreInst>(I)->getPointerOperand()
-      : cast<LoadInst>(I)->getPointerOperand();
+  IRBuilder<> IRB(II.Inst);
+  const bool IsWrite = isa<StoreInst>(*II.Inst);
+  Value *Addr = IsWrite ? cast<StoreInst>(II.Inst)->getPointerOperand()
+                        : cast<LoadInst>(II.Inst)->getPointerOperand();
 
   // swifterror memory addresses are mem2reg promoted by instruction selection.
   // As such they cannot have regular uses like an instrumentation function and
@@ -570,9 +624,9 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
   int Idx = getMemoryAccessFuncIndex(Addr, DL);
   if (Idx < 0)
     return false;
-  if (IsWrite && isVtableAccess(I)) {
-    LLVM_DEBUG(dbgs() << "  VPTR : " << *I << "\n");
-    Value *StoredValue = cast<StoreInst>(I)->getValueOperand();
+  if (IsWrite && isVtableAccess(II.Inst)) {
+    LLVM_DEBUG(dbgs() << "  VPTR : " << *II.Inst << "\n");
+    Value *StoredValue = cast<StoreInst>(II.Inst)->getValueOperand();
     // StoredValue may be a vector type if we are storing several vptrs at once.
     // In this case, just take the first element of the vector since this is
     // enough to find vptr races.
@@ -588,36 +642,46 @@ bool ThreadSanitizer::instrumentLoadOrStore(Instruction *I,
     NumInstrumentedVtableWrites++;
     return true;
   }
-  if (!IsWrite && isVtableAccess(I)) {
+  if (!IsWrite && isVtableAccess(II.Inst)) {
     IRB.CreateCall(TsanVptrLoad,
                    IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
     NumInstrumentedVtableReads++;
     return true;
   }
-  const unsigned Alignment = IsWrite
-      ? cast<StoreInst>(I)->getAlignment()
-      : cast<LoadInst>(I)->getAlignment();
-  const bool IsVolatile =
-      ClDistinguishVolatile && (IsWrite ? cast<StoreInst>(I)->isVolatile()
-                                        : cast<LoadInst>(I)->isVolatile());
+
+  const unsigned Alignment = IsWrite ? cast<StoreInst>(II.Inst)->getAlignment()
+                                     : cast<LoadInst>(II.Inst)->getAlignment();
+  const bool IsCompoundRW =
+      ClCompoundReadBeforeWrite && (II.Flags & InstructionInfo::kCompoundRW);
+  const bool IsVolatile = ClDistinguishVolatile &&
+                          (IsWrite ? cast<StoreInst>(II.Inst)->isVolatile()
+                                   : cast<LoadInst>(II.Inst)->isVolatile());
+  assert((!IsVolatile || !IsCompoundRW) && "Compound volatile invalid!");
+
   Type *OrigTy = cast<PointerType>(Addr->getType())->getElementType();
   const uint32_t TypeSize = DL.getTypeStoreSizeInBits(OrigTy);
   FunctionCallee OnAccessFunc = nullptr;
   if (Alignment == 0 || Alignment >= 8 || (Alignment % (TypeSize / 8)) == 0) {
-    if (IsVolatile)
+    if (IsCompoundRW)
+      OnAccessFunc = TsanCompoundRW[Idx];
+    else if (IsVolatile)
       OnAccessFunc = IsWrite ? TsanVolatileWrite[Idx] : TsanVolatileRead[Idx];
     else
       OnAccessFunc = IsWrite ? TsanWrite[Idx] : TsanRead[Idx];
   } else {
-    if (IsVolatile)
+    if (IsCompoundRW)
+      OnAccessFunc = TsanUnalignedCompoundRW[Idx];
+    else if (IsVolatile)
       OnAccessFunc = IsWrite ? TsanUnalignedVolatileWrite[Idx]
                              : TsanUnalignedVolatileRead[Idx];
     else
       OnAccessFunc = IsWrite ? TsanUnalignedWrite[Idx] : TsanUnalignedRead[Idx];
   }
   IRB.CreateCall(OnAccessFunc, IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()));
-  if (IsWrite) NumInstrumentedWrites++;
-  else         NumInstrumentedReads++;
+  if (IsCompoundRW || IsWrite)
+    NumInstrumentedWrites++;
+  if (IsCompoundRW || !IsWrite)
+    NumInstrumentedReads++;
   return true;
 }
 
diff --git a/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll b/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll
index 33c4f3ab302c5..f121b0dcaf0d7 100644
--- a/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll
+++ b/llvm/test/Instrumentation/ThreadSanitizer/read_before_write.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -tsan -S | FileCheck %s
+; RUN: opt < %s -tsan -S | FileCheck --check-prefixes=CHECK,CHECK-OPT %s
 ; RUN: opt < %s -tsan -tsan-instrument-read-before-write -S | FileCheck %s --check-prefixes=CHECK,CHECK-UNOPT
+; RUN: opt < %s -tsan -tsan-compound-read-before-write -S | FileCheck %s --check-prefixes=CHECK,CHECK-COMPOUND
+; RUN: opt < %s -tsan -tsan-distinguish-volatile -tsan-compound-read-before-write -S | FileCheck %s --check-prefixes=CHECK,CHECK-COMPOUND-VOLATILE
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
@@ -10,10 +12,13 @@ entry:
   store i32 %inc, i32* %ptr, align 4
   ret void
 }
-; CHECK: define void @IncrementMe
-; CHECK-NOT: __tsan_read
-; CHECK-UNOPT: __tsan_read
-; CHECK: __tsan_write
+; CHECK-LABEL: define void @IncrementMe
+; CHECK-OPT-NOT: __tsan_read4
+; CHECK-COMPOUND-NOT: __tsan_read4
+; CHECK-UNOPT: __tsan_read4
+; CHECK-OPT: __tsan_write4
+; CHECK-UNOPT: __tsan_write4
+; CHECK-COMPOUND: __tsan_read_write4
 ; CHECK: ret void
 
 define void @IncrementMeWithCallInBetween(i32* nocapture %ptr) nounwind uwtable sanitize_thread {
@@ -25,10 +30,52 @@ entry:
   ret void
 }
 
-; CHECK: define void @IncrementMeWithCallInBetween
-; CHECK: __tsan_read
-; CHECK: __tsan_write
+; CHECK-LABEL: define void @IncrementMeWithCallInBetween
+; CHECK: __tsan_read4
+; CHECK: __tsan_write4
 ; CHECK: ret void
 
 declare void @foo()
 
+define void @VolatileLoad(i32* nocapture %ptr) nounwind uwtable sanitize_thread {
+entry:
+  %0 = load volatile i32, i32* %ptr, align 4
+  %inc = add nsw i32 %0, 1
+  store i32 %inc, i32* %ptr, align 4
+  ret void
+}
+; CHECK-LABEL: define void @VolatileLoad
+; CHECK-COMPOUND-NOT: __tsan_read4
+; CHECK-COMPOUND-VOLATILE: __tsan_volatile_read4
+; CHECK-COMPOUND: __tsan_read_write4
+; CHECK-COMPOUND-VOLATILE: __tsan_write4
+; CHECK: ret void
+
+define void @VolatileStore(i32* nocapture %ptr) nounwind uwtable sanitize_thread {
+entry:
+  %0 = load i32, i32* %ptr, align 4
+  %inc = add nsw i32 %0, 1
+  store volatile i32 %inc, i32* %ptr, align 4
+  ret void
+}
+; CHECK-LABEL: define void @VolatileStore
+; CHECK-COMPOUND-NOT: __tsan_read4
+; CHECK-COMPOUND-VOLATILE: __tsan_read4
+; CHECK-COMPOUND: __tsan_read_write4
+; CHECK-COMPOUND-VOLATILE: __tsan_volatile_write4
+; CHECK: ret void
+
+define void @VolatileBoth(i32* nocapture %ptr) nounwind uwtable sanitize_thread {
+entry:
+  %0 = load volatile i32, i32* %ptr, align 4
+  %inc = add nsw i32 %0, 1
+  store volatile i32 %inc, i32* %ptr, align 4
+  ret void
+}
+; CHECK-LABEL: define void @VolatileBoth
+; CHECK-COMPOUND-NOT: __tsan_read4
+; CHECK-COMPOUND-VOLATILE: __tsan_volatile_read4
+; CHECK-COMPOUND: __tsan_read_write4
+; CHECK-COMPOUND-VOLATILE: __tsan_volatile_write4
+; CHECK: ret void
+

From 817767abeec8343b20de83f8b1b2c8c20bbbe00a Mon Sep 17 00:00:00 2001
From: Kai Luo <lkail@cn.ibm.com>
Date: Fri, 17 Jul 2020 08:36:30 +0000
Subject: [PATCH 601/771] [PowerPC] Precommit test case for PR46759. NFC.

---
 llvm/test/CodeGen/PowerPC/pr46759.ll | 58 ++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)
 create mode 100644 llvm/test/CodeGen/PowerPC/pr46759.ll

diff --git a/llvm/test/CodeGen/PowerPC/pr46759.ll b/llvm/test/CodeGen/PowerPC/pr46759.ll
new file mode 100644
index 0000000000000..2c0af89500998
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/pr46759.ll
@@ -0,0 +1,58 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -ppc-asm-full-reg-names -verify-machineinstrs \
+; RUN:   -mtriple=powerpc64le-linux-gnu < %s | FileCheck \
+; RUN:   -check-prefix=CHECK-LE %s
+
+define void @foo(i32 %vla_size) #0 {
+; CHECK-LE-LABEL: foo:
+; CHECK-LE:       # %bb.0: # %entry
+; CHECK-LE-NEXT:    std r31, -8(r1)
+; CHECK-LE-NEXT:    std r30, -16(r1)
+; CHECK-LE-NEXT:    mr r30, r1
+; CHECK-LE-NEXT:    mr r12, r1
+; CHECK-LE-NEXT:    .cfi_def_cfa r12, 0
+; CHECK-LE-NEXT:    clrldi r0, r12, 53
+; CHECK-LE-NEXT:    stdux r12, r1, r0
+; CHECK-LE-NEXT:    stdu r12, -2048(r1)
+; CHECK-LE-NEXT:    stdu r12, -4096(r1)
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r1
+; CHECK-LE-NEXT:    .cfi_def_cfa_register r30
+; CHECK-LE-NEXT:    .cfi_offset r31, -8
+; CHECK-LE-NEXT:    .cfi_offset r30, -16
+; CHECK-LE-NEXT:    clrldi r3, r3, 32
+; CHECK-LE-NEXT:    li r6, -4096
+; CHECK-LE-NEXT:    ld r4, 0(r1)
+; CHECK-LE-NEXT:    mr r31, r1
+; CHECK-LE-NEXT:    addi r3, r3, 15
+; CHECK-LE-NEXT:    rldicl r3, r3, 60, 4
+; CHECK-LE-NEXT:    rldicl r3, r3, 4, 31
+; CHECK-LE-NEXT:    neg r5, r3
+; CHECK-LE-NEXT:    li r3, -2048
+; CHECK-LE-NEXT:    divd r7, r5, r6
+; CHECK-LE-NEXT:    and r3, r5, r3
+; CHECK-LE-NEXT:    add r3, r1, r3
+; CHECK-LE-NEXT:    mulld r6, r7, r6
+; CHECK-LE-NEXT:    sub r5, r5, r6
+; CHECK-LE-NEXT:    stdux r4, r1, r5
+; CHECK-LE-NEXT:    cmpd r1, r3
+; CHECK-LE-NEXT:    beq cr0, .LBB0_2
+; CHECK-LE-NEXT:  .LBB0_1: # %entry
+; CHECK-LE-NEXT:    #
+; CHECK-LE-NEXT:    stdu r4, -4096(r1)
+; CHECK-LE-NEXT:    cmpd r1, r3
+; CHECK-LE-NEXT:    bne cr0, .LBB0_1
+; CHECK-LE-NEXT:  .LBB0_2: # %entry
+; CHECK-LE-NEXT:    addi r3, r1, 2048
+; CHECK-LE-NEXT:    lbz r3, 0(r3)
+; CHECK-LE-NEXT:    ld r1, 0(r1)
+; CHECK-LE-NEXT:    ld r31, -8(r1)
+; CHECK-LE-NEXT:    ld r30, -16(r1)
+; CHECK-LE-NEXT:    blr
+entry:
+  %0 = zext i32 %vla_size to i64
+  %vla = alloca i8, i64 %0, align 2048
+  %1 = load volatile i8, i8* %vla, align 2048
+  ret void
+}
+
+attributes #0 = { "probe-stack"="inline-asm" }

From 40a073eac1bb18fec5e283e4488eac05ab6e629a Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Fri, 17 Jul 2020 01:23:27 -0700
Subject: [PATCH 602/771] [sanitizer] Fix protoent without network

---
 .../sanitizer_common/TestCases/Linux/protoent.cpp | 15 +++------------
 1 file changed, 3 insertions(+), 12 deletions(-)

diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp
index a1a93badf6b81..a10fd114022c9 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp
@@ -1,4 +1,6 @@
-// RUN: %clangxx -std=c++11 -O0 -g %s -o %t && %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx -std=c++11 -O0 -g %s -o %t
+// RUN: %clangxx -fno-sanitize=all -std=c++11 -O0 -g %s -o %t.nosan
+// RUN: diff <(%run %t 2>&1) <(%run %t.nosan 2>&1)
 // REQUIRES: !android
 
 #include <assert.h>
@@ -46,23 +48,12 @@ void print_protoent_by_num(int num) {
 }
 
 int main() {
-  // CHECK: All protoent
-  // CHECK: ip (0)
-  // CHECK-NEXT: alias IP
-  // CHECK: ipv6 (41)
-  // CHECK-NEXT: alias IPv6
   fprintf(stderr, "All protoent\n");
   print_all_protoent();
 
-  // CHECK: Protoent by name
-  // CHECK-NEXT: ipv6 (41)
-  // CHECK-NEXT: alias IPv6
   fprintf(stderr, "Protoent by name\n");
   print_protoent_by_name("ipv6");
 
-  // CHECK: Protoent by num
-  // CHECK-NEXT: udp (17)
-  // CHECK-NEXT: alias UDP
   fprintf(stderr, "Protoent by num\n");
   print_protoent_by_num(17);
   return 0;

From 05fe0697903a318ed0419eb981c3d35de0bee8f7 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Fri, 17 Jul 2020 01:46:14 -0700
Subject: [PATCH 603/771] [gwp-asan] Fix check-all with LIT_FILTER

---
 compiler-rt/test/gwp_asan/CMakeLists.txt | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/compiler-rt/test/gwp_asan/CMakeLists.txt b/compiler-rt/test/gwp_asan/CMakeLists.txt
index 95796521f31dc..de53ad5b7eed4 100644
--- a/compiler-rt/test/gwp_asan/CMakeLists.txt
+++ b/compiler-rt/test/gwp_asan/CMakeLists.txt
@@ -19,12 +19,8 @@ if (COMPILER_RT_INCLUDE_TESTS AND COMPILER_RT_HAS_GWP_ASAN AND NOT ANDROID)
   configure_lit_site_cfg(
     ${CMAKE_CURRENT_SOURCE_DIR}/unit/lit.site.cfg.py.in
     ${CMAKE_CURRENT_BINARY_DIR}/unit/lit.site.cfg.py)
-  add_lit_testsuite(check-gwp_asan-unit "Running GWP-ASan unit tests"
-    ${CMAKE_CURRENT_BINARY_DIR}/unit
-    DEPENDS ${GWP_ASAN_TEST_DEPS})
-  set_target_properties(check-gwp_asan-unit PROPERTIES FOLDER
-    "Compiler-RT Tests")
-    list(APPEND GWP_ASAN_TEST_DEPS check-gwp_asan-unit)
+  list(APPEND GWP_ASAN_TEST_DEPS ${GWP_ASAN_TEST_DEPS})
+  list(APPEND GWP_ASAN_TESTSUITES ${CMAKE_CURRENT_BINARY_DIR}/unit)
 endif()
 
 configure_lit_site_cfg(

From 02c74907e06dd8ec551eca6fe075b74927264c9b Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Fri, 17 Jul 2020 01:47:45 -0700
Subject: [PATCH 604/771] [lsan] Fix SyntaxWarning: "is" with a literal

---
 compiler-rt/test/lsan/lit.common.cfg.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compiler-rt/test/lsan/lit.common.cfg.py b/compiler-rt/test/lsan/lit.common.cfg.py
index 6b699b274c637..1d393880af6a9 100644
--- a/compiler-rt/test/lsan/lit.common.cfg.py
+++ b/compiler-rt/test/lsan/lit.common.cfg.py
@@ -70,7 +70,7 @@ def build_invocation(compile_flags):
 config.substitutions.append( ("%clangxx_lsan ", build_invocation(clang_lsan_cxxflags)) )
 
 # LeakSanitizer tests are currently supported on x86-64 Linux, PowerPC64 Linux, arm Linux, mips64 Linux, s390x Linux and x86_64 Darwin.
-supported_linux = config.host_os is 'Linux' and config.host_arch in ['x86_64', 'ppc64', 'ppc64le', 'mips64', 'arm', 'armhf', 'armv7l', 's390x']
+supported_linux = config.host_os == 'Linux' and config.host_arch in ['x86_64', 'ppc64', 'ppc64le', 'mips64', 'arm', 'armhf', 'armv7l', 's390x']
 supported_darwin = config.host_os == 'Darwin' and config.target_arch in ['x86_64']
 supported_netbsd = config.host_os == 'NetBSD' and config.target_arch in ['x86_64', 'i386']
 if not (supported_linux or supported_darwin or supported_netbsd):

From 650baf22e69fff99bbfbea65edcd8e202b05fdff Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Fri, 17 Jul 2020 01:45:14 -0700
Subject: [PATCH 605/771] [msan] Fix strxfrm test

Revert D83719 and explicitly set locate to "C".
---
 compiler-rt/test/msan/strxfrm.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/test/msan/strxfrm.cpp b/compiler-rt/test/msan/strxfrm.cpp
index d40b56b234cd3..b4fee6f55c4c5 100644
--- a/compiler-rt/test/msan/strxfrm.cpp
+++ b/compiler-rt/test/msan/strxfrm.cpp
@@ -7,12 +7,12 @@
 #include <string.h>
 
 int main(void) {
-  char q[30];
+  char q[10];
   size_t n = strxfrm(q, "abcdef", sizeof(q));
   assert(n < sizeof(q));
   __msan_check_mem_is_initialized(q, n + 1);
 
-  locale_t loc = newlocale(LC_ALL_MASK, "", (locale_t)0);
+  locale_t loc = newlocale(LC_ALL_MASK, "C", (locale_t)0);
 
   __msan_poison(&q, sizeof(q));
   n = strxfrm_l(q, "qwerty", sizeof(q), loc);

From e297006d6f02f0f54a69223b98defde09c43158f Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Fri, 17 Jul 2020 10:27:14 +0100
Subject: [PATCH 606/771] [ScheduleDAG] Move DBG_VALUEs after first term
 forward.

MBBs are not allowed to have non-terminator instructions after the first
terminator. Currently in some cases (see the modified test),
EmitSchedule can add DBG_VALUEs after the last terminator, for example
when referring a debug value that gets folded into a TCRETURN
instruction on ARM.

This patch updates EmitSchedule to move inserted DBG_VALUEs just before
the first terminator. I am not sure if there are terminators produce
values that can in turn be used by a DBG_VALUE. In that case, moving the
DBG_VALUE might result in referencing an undefined register. But in any
case, it seems like currently there is no way to insert a proper DBG_VALUEs
for such registers anyways.

Alternatively it might make sense to just remove those extra DBG_VALUES.

I am not too familiar with the details of debug info in the backend and
would appreciate any suggestions on how to address the issue in the best
possible way.

Reviewers: vsk, aprantl, jpaquette, efriedma, paquette

Reviewed By: aprantl

Differential Revision: https://reviews.llvm.org/D83561
---
 llvm/include/llvm/CodeGen/MachineInstr.h      |  3 +++
 llvm/lib/CodeGen/MachineInstr.cpp             |  4 ++++
 .../SelectionDAG/ScheduleDAGSDNodes.cpp       | 24 ++++++++++++++++++-
 llvm/test/CodeGen/ARM/dbg-tcreturn.ll         |  5 ++--
 4 files changed, 32 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/MachineInstr.h b/llvm/include/llvm/CodeGen/MachineInstr.h
index 970d6d7db3345..2c912b177384b 100644
--- a/llvm/include/llvm/CodeGen/MachineInstr.h
+++ b/llvm/include/llvm/CodeGen/MachineInstr.h
@@ -280,6 +280,9 @@ class MachineInstr
   const MachineBasicBlock* getParent() const { return Parent; }
   MachineBasicBlock* getParent() { return Parent; }
 
+  /// Move the instruction before \p MovePos.
+  void moveBefore(MachineInstr *MovePos);
+
   /// Return the function that contains the basic block that this instruction
   /// belongs to.
   ///
diff --git a/llvm/lib/CodeGen/MachineInstr.cpp b/llvm/lib/CodeGen/MachineInstr.cpp
index d4181591deabc..5fbf91e26a89f 100644
--- a/llvm/lib/CodeGen/MachineInstr.cpp
+++ b/llvm/lib/CodeGen/MachineInstr.cpp
@@ -147,6 +147,10 @@ MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
   setFlags(MI.Flags);
 }
 
+void MachineInstr::moveBefore(MachineInstr *MovePos) {
+  MovePos->getParent()->splice(MovePos, getParent(), getIterator());
+}
+
 /// getRegInfo - If this instruction is embedded into a MachineFunction,
 /// return the MachineRegisterInfo object for the current function, otherwise
 /// return null.
diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index ce20d506586f0..17c68f2bf73b3 100644
--- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -1034,7 +1034,29 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) {
   }
 
   InsertPos = Emitter.getInsertPos();
-  return Emitter.getBlock();
+  // In some cases, DBG_VALUEs might be inserted after the first terminator,
+  // which results in an invalid MBB. If that happens, move the DBG_VALUEs
+  // before the first terminator.
+  MachineBasicBlock *InsertBB = Emitter.getBlock();
+  auto FirstTerm = InsertBB->getFirstTerminator();
+  if (FirstTerm != InsertBB->end()) {
+    assert(!FirstTerm->isDebugValue() &&
+           "first terminator cannot be a debug value");
+    for (MachineInstr &MI : make_early_inc_range(
+             make_range(std::next(FirstTerm), InsertBB->end()))) {
+      if (!MI.isDebugValue())
+        continue;
+
+      if (&MI == InsertPos)
+        InsertPos = std::prev(InsertPos->getIterator());
+
+      // The DBG_VALUE was referencing a value produced by a terminator. By
+      // moving the DBG_VALUE, the referenced value also needs invalidating.
+      MI.getOperand(0).ChangeToRegister(0, false);
+      MI.moveBefore(&*FirstTerm);
+    }
+  }
+  return InsertBB;
 }
 
 /// Return the basic block label.
diff --git a/llvm/test/CodeGen/ARM/dbg-tcreturn.ll b/llvm/test/CodeGen/ARM/dbg-tcreturn.ll
index 37ec4e3d92ee4..d4061be981803 100644
--- a/llvm/test/CodeGen/ARM/dbg-tcreturn.ll
+++ b/llvm/test/CodeGen/ARM/dbg-tcreturn.ll
@@ -1,5 +1,4 @@
-; XFAIL: *
-; RUN: llc %s -o - -stop-after=finalize-isel -verify-machineinstr | FileCheck %s
+; RUN: llc %s -o - -stop-after=finalize-isel -verify-machineinstrs | FileCheck %s
 
 target datalayout = "e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
 target triple = "thumbv7-apple-ios7.0.0"
@@ -12,8 +11,8 @@ target triple = "thumbv7-apple-ios7.0.0"
 ; CHECK-NEXT:     %0:gpr = COPY $r0
 ; CHECK-NEXT:     $r0 = COPY %0
 ; CHECK-NEXT:     $r1 = COPY %1
-; CHECK-NEXT:     TCRETURNdi &__divsi3, implicit $sp, implicit $r0, implicit $r1
 ; CHECK-NEXT:     DBG_VALUE $noreg, $noreg, !13, !DIExpression(), debug-location !16
+; CHECK-NEXT:     TCRETURNdi &__divsi3, implicit $sp, implicit $r0, implicit $r1
 
 define i32 @test(i32 %a1, i32 %a2) !dbg !5 {
 entry:

From e412cc402e84444ff13cc4e4ae4c2c5830f9d9da Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Fri, 17 Jul 2020 02:38:18 -0700
Subject: [PATCH 607/771] [asan] Dedup MemToShadowSize

Added D83247 and D84004.
---
 compiler-rt/lib/asan/asan_mapping.h         | 3 ++-
 compiler-rt/lib/asan/asan_mapping_sparc64.h | 1 -
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/lib/asan/asan_mapping.h b/compiler-rt/lib/asan/asan_mapping.h
index c64c033567314..36e2cb5364f3a 100644
--- a/compiler-rt/lib/asan/asan_mapping.h
+++ b/compiler-rt/lib/asan/asan_mapping.h
@@ -304,7 +304,6 @@ extern uptr kHighMemEnd, kMidMemBeg, kMidMemEnd;  // Initialized in __asan_init.
 
 namespace __asan {
 
-static inline uptr MemToShadowSize(uptr size) { return size >> SHADOW_SCALE; }
 static inline bool AddrIsInLowMem(uptr a) {
   PROFILE_ASAN_MAPPING();
   return a <= kLowMemEnd;
@@ -356,6 +355,8 @@ static inline bool AddrIsInShadowGap(uptr a) {
 
 namespace __asan {
 
+static inline uptr MemToShadowSize(uptr size) { return size >> SHADOW_SCALE; }
+
 static inline bool AddrIsInMem(uptr a) {
   PROFILE_ASAN_MAPPING();
   return AddrIsInLowMem(a) || AddrIsInMidMem(a) || AddrIsInHighMem(a) ||
diff --git a/compiler-rt/lib/asan/asan_mapping_sparc64.h b/compiler-rt/lib/asan/asan_mapping_sparc64.h
index 167ef154e79bd..432a1816f797d 100644
--- a/compiler-rt/lib/asan/asan_mapping_sparc64.h
+++ b/compiler-rt/lib/asan/asan_mapping_sparc64.h
@@ -61,7 +61,6 @@
 
 namespace __asan {
 
-static inline uptr MemToShadowSize(uptr size) { return size >> SHADOW_SCALE; }
 static inline bool AddrIsInLowMem(uptr a) {
   PROFILE_ASAN_MAPPING();
   return a <= kLowMemEnd;

From f3fab392f57421a5bdabfb7e40820257d8f637b2 Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Thu, 16 Jul 2020 17:28:17 +0200
Subject: [PATCH 608/771] [lldb/DWARF] Don't get confused by line sequences
 with tombstone values

Summary:
With D81784, lld has started debug info resolving relocations to
garbage-collected symbols as -1 (instead of relocation addend). For an
unaware consumer this generated sequences which seemingly wrap the
address space -- their first entry was 0xfffff, but all other entries
were low numbers.

Lldb stores line sequences concatenated into one large vector, sorted by
the first entry, and searched with std::lower_bound. This resulted in
the low-value entries being placed at the end of the vector, which
utterly confused the lower_bound algorithm, and caused it to not find a
match. (Previously, these sequences would be at the start of the vector,
and normally would contain addresses that are far smaller than any real
address we want to look up, so std::lower_bound was fine.)

This patch makes lldb ignore these kinds of sequences completely. It
does that by changing the construction algorithm from iterating over the
rows (as parsed by llvm), to iterating over the sequences. This is
important because the llvm parsed performs validity checks when
constructing the sequence array, whereas the row array contains raw
data.

Reviewers: JDevlieghere, MaskRay

Differential Revision: https://reviews.llvm.org/D83957
---
 .../SymbolFile/DWARF/SymbolFileDWARF.cpp      |  22 ++--
 .../SymbolFile/DWARF/debug_line-tombstone.s   | 106 ++++++++++++++++++
 2 files changed, 118 insertions(+), 10 deletions(-)
 create mode 100644 lldb/test/Shell/SymbolFile/DWARF/debug_line-tombstone.s

diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
index 9f64e5255fd5c..0b7e31ae2d1df 100644
--- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
+++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp
@@ -1036,18 +1036,20 @@ bool SymbolFileDWARF::ParseLineTable(CompileUnit &comp_unit) {
   // FIXME: Rather than parsing the whole line table and then copying it over
   // into LLDB, we should explore using a callback to populate the line table
   // while we parse to reduce memory usage.
-  std::unique_ptr<LineSequence> sequence =
-      LineTable::CreateLineSequenceContainer();
   std::vector<std::unique_ptr<LineSequence>> sequences;
-  for (auto &row : line_table->Rows) {
-    LineTable::AppendLineEntryToSequence(
-        sequence.get(), row.Address.Address, row.Line, row.Column, row.File,
-        row.IsStmt, row.BasicBlock, row.PrologueEnd, row.EpilogueBegin,
-        row.EndSequence);
-    if (row.EndSequence) {
-      sequences.push_back(std::move(sequence));
-      sequence = LineTable::CreateLineSequenceContainer();
+  // The Sequences view contains only valid line sequences. Don't iterate over
+  // the Rows directly.
+  for (const llvm::DWARFDebugLine::Sequence &seq : line_table->Sequences) {
+    std::unique_ptr<LineSequence> sequence =
+        LineTable::CreateLineSequenceContainer();
+    for (unsigned idx = seq.FirstRowIndex; idx < seq.LastRowIndex; ++idx) {
+      const llvm::DWARFDebugLine::Row &row = line_table->Rows[idx];
+      LineTable::AppendLineEntryToSequence(
+          sequence.get(), row.Address.Address, row.Line, row.Column, row.File,
+          row.IsStmt, row.BasicBlock, row.PrologueEnd, row.EpilogueBegin,
+          row.EndSequence);
     }
+    sequences.push_back(std::move(sequence));
   }
 
   std::unique_ptr<LineTable> line_table_up =
diff --git a/lldb/test/Shell/SymbolFile/DWARF/debug_line-tombstone.s b/lldb/test/Shell/SymbolFile/DWARF/debug_line-tombstone.s
new file mode 100644
index 0000000000000..53600ac5f4b1b
--- /dev/null
+++ b/lldb/test/Shell/SymbolFile/DWARF/debug_line-tombstone.s
@@ -0,0 +1,106 @@
+# This test that we don't get confused by line tables containing a tombstone
+# (-1) value, as produced by recent lld's. Line sequences with the tombstone
+# value should be completely ignored. The tombstone sequence is deliberately
+# longer so that any attempt at an address binary search will likely land inside
+# the sequence.
+
+# RUN: llvm-mc --filetype=obj --triple=x86_64-pc-linux %s -o %t
+# RUN: %lldb -o "image lookup -n main -v" -o "image dump line-table main.cpp" \
+# RUN:   -o exit %t | FileCheck %s
+
+# CHECK-LABEL: image lookup -n main -v
+# CHECK: LineEntry: [0x0000000000001000-0x0000000000001001): main.cpp:1
+# CHECK-LABEL: image dump line-table main.cpp
+# CHECK-NEXT: Line table for main.cpp
+# CHECK-NEXT: 0x0000000000001000: main.cpp:1
+# CHECK-NEXT: 0x0000000000001001: main.cpp:1
+# CHECK-EMPTY:
+# CHECK-NEXT: exit
+
+        .text
+.space 0x1000
+main:
+  nop
+.Lmain_end:
+
+        .section        .debug_abbrev,"",@progbits
+        .byte   1                               # Abbreviation Code
+        .byte   17                              # DW_TAG_compile_unit
+        .byte   0                               # DW_CHILDREN_no
+        .byte   37                              # DW_AT_producer
+        .byte   8                               # DW_FORM_string
+        .byte   3                               # DW_AT_name
+        .byte   8                               # DW_FORM_string
+        .byte   16                              # DW_AT_stmt_list
+        .byte   23                              # DW_FORM_sec_offset
+        .byte   17                              # DW_AT_low_pc
+        .byte   1                               # DW_FORM_addr
+        .byte   18                              # DW_AT_high_pc
+        .byte   6                               # DW_FORM_data4
+        .byte   0                               # EOM(1)
+        .byte   0                               # EOM(2)
+        .byte   0                               # EOM(3)
+
+        .section        .debug_info,"",@progbits
+.Lcu_begin0:
+        .long   .Ldebug_info_end0-.Ldebug_info_start0 # Length of Unit
+.Ldebug_info_start0:
+        .short  4                               # DWARF version number
+        .long   0                               # Offset Into Abbrev. Section
+        .byte   8                               # Address Size (in bytes)
+        .byte   1                               # Abbrev [1] 0xb:0xc4 DW_TAG_compile_unit
+        .asciz  "Hand-written DWARF"            # DW_AT_producer
+        .asciz  "main.cpp"                      # DW_AT_name
+        .long   0                               # DW_AT_stmt_list
+        .quad   main-.text                      # DW_AT_low_pc
+        .long   .Lmain_end-main                 # DW_AT_high_pc
+.Ldebug_info_end0:
+
+.section .debug_line,"",@progbits
+        .long   .Llt1_end - .Llt1_start # Length of Unit (DWARF-32 format)
+.Llt1_start:
+        .short  4               # DWARF version number
+        .long   .Lprologue1_end-.Lprologue1_start # Length of Prologue
+.Lprologue1_start:
+        .byte   1               # Minimum Instruction Length
+        .byte   1               # Maximum Operations per Instruction
+        .byte   1               # Default is_stmt
+        .byte   -5              # Line Base
+        .byte   14              # Line Range
+        .byte   13              # Opcode Base
+        .byte   0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1 # Standard Opcode Lengths
+        .byte   0
+        .asciz "main.cpp"          # File table
+        .byte   0, 0, 0
+        .byte   0
+.Lprologue1_end:
+        .byte   0, 9, 2         # DW_LNE_set_address
+        .quad   -1
+        .byte   1               # DW_LNS_copy
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   33              # address += 1,  line += 1
+        .byte   2               # DW_LNS_advance_pc
+        .uleb128 1
+        .byte   0, 1, 1         # DW_LNE_end_sequence
+
+        .byte   0, 9, 2         # DW_LNE_set_address
+        .quad   main-.text
+        .byte   18              # address += 0,  line += 0
+        .byte   2               # DW_LNS_advance_pc
+        .uleb128 1
+        .byte   0, 1, 1         # DW_LNE_end_sequence
+.Llt1_end:
+

From 62fd7f767c4151b4ec3f4f703e73a27c52f5315c Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Tue, 7 Jan 2020 15:43:46 +0000
Subject: [PATCH 609/771] [MachineScheduler] Fix the TopDepth/BotHeightReduce
 latency heuristics

tryLatency compares two sched candidates. For the top zone it prefers
the one with lesser depth, but only if that depth is greater than the
total latency of the instructions we've already scheduled -- otherwise
its latency would be hidden and there would be no stall.

Unfortunately it only tests the depth of one of the candidates. This can
lead to situations where the TopDepthReduce heuristic does not kick in,
but a lower priority heuristic chooses the other candidate, whose depth
*is* greater than the already scheduled latency, which causes a stall.

The fix is to apply the heuristic if the depth of *either* candidate is
greater than the already scheduled latency.

All this also applies to the BotHeightReduce heuristic in the bottom
zone.

Differential Revision: https://reviews.llvm.org/D72392
---
 llvm/lib/CodeGen/MachineScheduler.cpp         |  12 +-
 .../AArch64/arm64-zero-cycle-zeroing.ll       |   8 +-
 .../AMDGPU/GlobalISel/cvt_f32_ubyte.ll        |  24 +-
 .../GlobalISel/divergent-control-flow.ll      |  18 +-
 .../GlobalISel/llvm.amdgcn.atomic.inc.ll      |  40 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll |  10 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll     |  20 +-
 .../AMDGPU/GlobalISel/load-constant.96.ll     | 346 +++++++++---------
 .../CodeGen/AMDGPU/GlobalISel/localizer.ll    |   4 +-
 .../CodeGen/AMDGPU/GlobalISel/zextload.ll     |  16 +-
 llvm/test/CodeGen/AMDGPU/add.v2i16.ll         |   4 +-
 .../AMDGPU/amdgpu-codegenprepare-idiv.ll      |  44 +--
 llvm/test/CodeGen/AMDGPU/bitreverse.ll        | 206 +++++------
 llvm/test/CodeGen/AMDGPU/bswap.ll             |  72 ++--
 llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll    |  18 +-
 llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll | 326 ++++++++---------
 llvm/test/CodeGen/AMDGPU/ctlz.ll              |  56 +--
 llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll   |   4 +-
 llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll     | 109 +++---
 llvm/test/CodeGen/AMDGPU/fcmp.f16.ll          |   4 +-
 llvm/test/CodeGen/AMDGPU/fneg-combines.ll     |   8 +-
 llvm/test/CodeGen/AMDGPU/idiv-licm.ll         |  74 ++--
 llvm/test/CodeGen/AMDGPU/idot2.ll             |  76 ++--
 llvm/test/CodeGen/AMDGPU/idot4s.ll            | 100 ++---
 llvm/test/CodeGen/AMDGPU/idot4u.ll            | 200 +++++-----
 llvm/test/CodeGen/AMDGPU/idot8s.ll            | 252 ++++++-------
 llvm/test/CodeGen/AMDGPU/idot8u.ll            |  92 ++---
 .../CodeGen/AMDGPU/insert_vector_elt.v2i16.ll | 294 +++++++--------
 .../AMDGPU/llvm.amdgcn.buffer.atomic.ll       |   1 +
 .../llvm.amdgcn.struct.buffer.atomic.ll       |   2 +-
 llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll  |  26 +-
 llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll      |  14 +-
 llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll      |  12 +-
 llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll   |  88 ++---
 llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll   |  88 ++---
 llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll    |  20 +-
 llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll      |  14 +-
 llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll      |   4 +-
 llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll        | 174 ++++-----
 llvm/test/CodeGen/AMDGPU/madak.ll             |   4 +-
 llvm/test/CodeGen/AMDGPU/max.i16.ll           | 107 +++---
 llvm/test/CodeGen/AMDGPU/memory_clause.ll     |  86 +++--
 .../CodeGen/AMDGPU/mul24-pass-ordering.ll     |  28 +-
 .../AMDGPU/promote-constOffset-to-imm.ll      |   6 +-
 llvm/test/CodeGen/AMDGPU/saddo.ll             | 212 ++++++-----
 llvm/test/CodeGen/AMDGPU/salu-to-valu.ll      |   4 +-
 ...ssert-dead-def-subreg-use-other-subreg.mir |   2 +-
 ...ched-assert-onlydbg-value-empty-region.mir |   8 +-
 llvm/test/CodeGen/AMDGPU/sdiv.ll              | 236 ++++++------
 llvm/test/CodeGen/AMDGPU/sdiv64.ll            |  68 ++--
 llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll     |   2 +-
 llvm/test/CodeGen/AMDGPU/select.f16.ll        | 327 +++++++++--------
 llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll    |   4 +-
 llvm/test/CodeGen/AMDGPU/shl.ll               |  56 +--
 llvm/test/CodeGen/AMDGPU/shl.v2i16.ll         | 192 +++++-----
 .../CodeGen/AMDGPU/shrink-add-sub-constant.ll | 286 +++++++--------
 llvm/test/CodeGen/AMDGPU/sign_extend.ll       |  40 +-
 llvm/test/CodeGen/AMDGPU/sub.v2i16.ll         | 214 +++++------
 llvm/test/CodeGen/AMDGPU/trunc-combine.ll     |   8 +-
 llvm/test/CodeGen/AMDGPU/udiv64.ll            |  44 +--
 llvm/test/CodeGen/AMDGPU/urem64.ll            |  56 +--
 llvm/test/CodeGen/AMDGPU/v_madak_f16.ll       |  32 +-
 .../CodeGen/AMDGPU/vector-extract-insert.ll   |  28 +-
 .../CodeGen/AMDGPU/vector_shuffle.packed.ll   |  12 +-
 llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll  | 170 ++++-----
 llvm/test/CodeGen/PowerPC/PR33671.ll          |   2 +-
 llvm/test/CodeGen/PowerPC/botheightreduce.mir |  92 +++++
 .../PowerPC/canonical-merge-shuffles.ll       |   2 +-
 llvm/test/CodeGen/PowerPC/dform-adjust.ll     |  12 +-
 .../test/CodeGen/PowerPC/extract-and-store.ll |  20 +-
 llvm/test/CodeGen/PowerPC/f128-aggregates.ll  |   8 +-
 llvm/test/CodeGen/PowerPC/f128-conv.ll        |   4 +-
 llvm/test/CodeGen/PowerPC/f128-passByValue.ll |   8 +-
 .../CodeGen/PowerPC/float-load-store-pair.ll  |  18 +-
 .../PowerPC/handle-f16-storage-type.ll        |  12 +-
 .../PowerPC/load-shuffle-and-shuffle-store.ll |   6 +-
 .../PowerPC/loop-instr-form-prepare.ll        |  66 ++--
 llvm/test/CodeGen/PowerPC/machine-pre.ll      |   4 +-
 .../ppcf128-constrained-fp-intrinsics.ll      |   2 +-
 llvm/test/CodeGen/PowerPC/pr45432.ll          |   2 +-
 llvm/test/CodeGen/PowerPC/pr45448.ll          |   6 +-
 llvm/test/CodeGen/PowerPC/pr45628.ll          |   2 +-
 llvm/test/CodeGen/PowerPC/pre-inc-disable.ll  |  66 ++--
 llvm/test/CodeGen/PowerPC/recipest.ll         |   4 +-
 ...materializable-instruction-machine-licm.ll | 192 +++++-----
 .../PowerPC/remove-redundant-load-imm.ll      |   2 +-
 .../CodeGen/PowerPC/scalar_vector_test_4.ll   |  14 +-
 llvm/test/CodeGen/PowerPC/sched-addi.ll       |  12 +-
 llvm/test/CodeGen/PowerPC/sms-cpy-1.ll        |  14 +-
 llvm/test/CodeGen/PowerPC/sms-grp-order.ll    |   2 +-
 llvm/test/CodeGen/PowerPC/sms-phi-3.ll        |   2 +-
 llvm/test/CodeGen/PowerPC/sms-simple.ll       |  10 +-
 llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll  |  86 ++---
 .../PowerPC/stack-clash-dynamic-alloca.ll     |  34 +-
 .../CodeGen/PowerPC/topdepthreduce-postra.mir |  18 +
 llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll |   4 +-
 .../PowerPC/unaligned-addressing-mode.ll      |   6 +-
 llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll  |  72 ++--
 llvm/test/CodeGen/PowerPC/vavg.ll             |   2 +-
 llvm/test/CodeGen/PowerPC/vec-bswap.ll        |   3 +-
 .../PowerPC/vec_conv_fp32_to_i16_elts.ll      | 156 ++++----
 .../PowerPC/vec_conv_fp32_to_i8_elts.ll       | 104 +++---
 .../PowerPC/vec_conv_fp64_to_i16_elts.ll      | 120 +++---
 .../PowerPC/vec_conv_fp64_to_i32_elts.ll      |  64 ++--
 .../PowerPC/vec_conv_fp64_to_i8_elts.ll       |  56 +--
 .../PowerPC/vec_conv_i16_to_fp32_elts.ll      |  18 +-
 .../PowerPC/vec_conv_i16_to_fp64_elts.ll      |  74 ++--
 .../PowerPC/vec_conv_i32_to_fp64_elts.ll      |  24 +-
 .../PowerPC/vec_conv_i64_to_fp32_elts.ll      |  96 ++---
 .../PowerPC/vec_conv_i8_to_fp32_elts.ll       |  26 +-
 .../PowerPC/vec_conv_i8_to_fp64_elts.ll       |  32 +-
 .../vector-constrained-fp-intrinsics.ll       | 196 +++++-----
 llvm/test/CodeGen/X86/testb-je-fusion.ll      |   8 +-
 .../CodeGen/X86/topdepthreduce-postra.mir     |  16 +
 114 files changed, 3507 insertions(+), 3404 deletions(-)
 create mode 100644 llvm/test/CodeGen/PowerPC/botheightreduce.mir
 create mode 100644 llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir
 create mode 100644 llvm/test/CodeGen/X86/topdepthreduce-postra.mir

diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp
index cf75d531deb27..d9d0a783f8a22 100644
--- a/llvm/lib/CodeGen/MachineScheduler.cpp
+++ b/llvm/lib/CodeGen/MachineScheduler.cpp
@@ -2724,7 +2724,11 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
                 GenericSchedulerBase::SchedCandidate &Cand,
                 SchedBoundary &Zone) {
   if (Zone.isTop()) {
-    if (Cand.SU->getDepth() > Zone.getScheduledLatency()) {
+    // Prefer the candidate with the lesser depth, but only if one of them has
+    // depth greater than the total latency scheduled so far, otherwise either
+    // of them could be scheduled now with no stall.
+    if (std::max(TryCand.SU->getDepth(), Cand.SU->getDepth()) >
+        Zone.getScheduledLatency()) {
       if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(),
                   TryCand, Cand, GenericSchedulerBase::TopDepthReduce))
         return true;
@@ -2733,7 +2737,11 @@ bool tryLatency(GenericSchedulerBase::SchedCandidate &TryCand,
                    TryCand, Cand, GenericSchedulerBase::TopPathReduce))
       return true;
   } else {
-    if (Cand.SU->getHeight() > Zone.getScheduledLatency()) {
+    // Prefer the candidate with the lesser height, but only if one of them has
+    // height greater than the total latency scheduled so far, otherwise either
+    // of them could be scheduled now with no stall.
+    if (std::max(TryCand.SU->getHeight(), Cand.SU->getHeight()) >
+        Zone.getScheduledLatency()) {
       if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(),
                   TryCand, Cand, GenericSchedulerBase::BotHeightReduce))
         return true;
diff --git a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
index 82fec748928a8..3a7c06c37e01f 100644
--- a/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-zero-cycle-zeroing.ll
@@ -27,10 +27,10 @@ entry:
 ; NONE16: fmov s1, wzr
 ; NONE16: fmov d2, xzr
 ; NONE16: movi{{(.16b)?}} v3{{(.2d)?}}, #0
-; ZEROFP: ldr h0,{{.*}}
-; ZEROFP: movi v{{[0-3]+}}.2d, #0
-; ZEROFP: movi v{{[0-3]+}}.2d, #0
-; ZEROFP: movi v{{[0-3]+}}.2d, #0
+; ZEROFP-DAG: ldr h0,{{.*}}
+; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0
+; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0
+; ZEROFP-DAG: movi v{{[0-3]+}}.2d, #0
 ; ZERO16: movi v{{[0-3]+}}.2d, #0
 ; ZERO16: movi v{{[0-3]+}}.2d, #0
 ; ZERO16: movi v{{[0-3]+}}.2d, #0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
index 1622edace5b22..d5d991288ccee 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/cvt_f32_ubyte.ll
@@ -590,21 +590,21 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)
 ; SI-NEXT:    buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:1
 ; SI-NEXT:    buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 offset:2
 ; SI-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 offset:3
-; SI-NEXT:    s_movk_i32 s6, 0xff
+; SI-NEXT:    s_movk_i32 s0, 0xff
 ; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; SI-NEXT:    s_waitcnt vmcnt(3)
-; SI-NEXT:    v_and_b32_e32 v1, s6, v2
+; SI-NEXT:    v_and_b32_e32 v1, s0, v2
 ; SI-NEXT:    s_waitcnt vmcnt(2)
-; SI-NEXT:    v_and_b32_e32 v2, s6, v3
+; SI-NEXT:    v_and_b32_e32 v2, s0, v3
 ; SI-NEXT:    s_waitcnt vmcnt(1)
-; SI-NEXT:    v_and_b32_e32 v3, s6, v4
+; SI-NEXT:    v_and_b32_e32 v3, s0, v4
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_and_b32_e32 v4, s6, v0
+; SI-NEXT:    v_and_b32_e32 v4, s0, v0
 ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v1
 ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v1, v2
 ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v2, v3
 ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v3, v4
-; SI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -839,21 +839,21 @@ define amdgpu_kernel void @v4i8_zext_v4i32_to_v4f32(<4 x float> addrspace(1)* no
 ; SI-NEXT:    buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:1
 ; SI-NEXT:    buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 offset:2
 ; SI-NEXT:    buffer_load_ubyte v0, v[0:1], s[0:3], 0 addr64 offset:3
-; SI-NEXT:    s_movk_i32 s6, 0xff
+; SI-NEXT:    s_movk_i32 s0, 0xff
 ; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; SI-NEXT:    s_waitcnt vmcnt(3)
-; SI-NEXT:    v_and_b32_e32 v1, s6, v2
+; SI-NEXT:    v_and_b32_e32 v1, s0, v2
 ; SI-NEXT:    s_waitcnt vmcnt(2)
-; SI-NEXT:    v_and_b32_e32 v2, s6, v3
+; SI-NEXT:    v_and_b32_e32 v2, s0, v3
 ; SI-NEXT:    s_waitcnt vmcnt(1)
-; SI-NEXT:    v_and_b32_e32 v3, s6, v4
+; SI-NEXT:    v_and_b32_e32 v3, s0, v4
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_and_b32_e32 v4, s6, v0
+; SI-NEXT:    v_and_b32_e32 v4, s0, v0
 ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v1
 ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v1, v2
 ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v2, v3
 ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v3, v4
-; SI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
index 9c47fab05aa05..2695952bfd193 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll
@@ -141,18 +141,18 @@ define void @constrained_if_register_class() {
 ; CHECK-NEXT:    s_cmp_lg_u32 s4, 0
 ; CHECK-NEXT:    s_cbranch_scc0 BB4_6
 ; CHECK-NEXT:  ; %bb.1: ; %bb2
-; CHECK-NEXT:    s_getpc_b64 s[6:7]
-; CHECK-NEXT:    s_add_u32 s6, s6, const.ptr@gotpcrel32@lo+4
-; CHECK-NEXT:    s_addc_u32 s7, s7, const.ptr@gotpcrel32@hi+4
-; CHECK-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
-; CHECK-NEXT:    s_mov_b32 s4, -1
+; CHECK-NEXT:    s_getpc_b64 s[4:5]
+; CHECK-NEXT:    s_add_u32 s4, s4, const.ptr@gotpcrel32@lo+4
+; CHECK-NEXT:    s_addc_u32 s5, s5, const.ptr@gotpcrel32@hi+4
+; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
+; CHECK-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, 1
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
+; CHECK-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; CHECK-NEXT:    s_waitcnt lgkmcnt(0)
-; CHECK-NEXT:    v_mov_b32_e32 v0, s6
-; CHECK-NEXT:    v_mov_b32_e32 v1, s7
+; CHECK-NEXT:    v_mov_b32_e32 v0, s4
+; CHECK-NEXT:    v_mov_b32_e32 v1, s5
 ; CHECK-NEXT:    flat_load_dword v0, v[0:1]
-; CHECK-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, 1
+; CHECK-NEXT:    s_mov_b32 s4, -1
 ; CHECK-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; CHECK-NEXT:    v_cmp_gt_f32_e32 vcc, 1.0, v0
 ; CHECK-NEXT:    s_xor_b64 s[8:9], vcc, s[6:7]
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
index de462105dc481..74c6fe270b794 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.atomic.inc.ll
@@ -1555,40 +1555,40 @@ define amdgpu_kernel void @nocse_lds_atomic_inc_ret_i32(i32 addrspace(1)* %out0,
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    s_load_dword s4, s[4:5], 0x4
-; CI-NEXT:    v_mov_b32_e32 v2, 42
+; CI-NEXT:    v_mov_b32_e32 v0, 42
 ; CI-NEXT:    s_mov_b32 m0, -1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_mov_b32_e32 v0, s2
-; CI-NEXT:    v_mov_b32_e32 v3, s4
-; CI-NEXT:    ds_inc_rtn_u32 v4, v3, v2
-; CI-NEXT:    ds_inc_rtn_u32 v5, v3, v2
-; CI-NEXT:    v_mov_b32_e32 v3, s1
-; CI-NEXT:    v_mov_b32_e32 v2, s0
-; CI-NEXT:    v_mov_b32_e32 v1, s3
+; CI-NEXT:    v_mov_b32_e32 v2, s2
+; CI-NEXT:    v_mov_b32_e32 v1, s4
+; CI-NEXT:    ds_inc_rtn_u32 v4, v1, v0
+; CI-NEXT:    ds_inc_rtn_u32 v5, v1, v0
+; CI-NEXT:    v_mov_b32_e32 v0, s0
+; CI-NEXT:    v_mov_b32_e32 v1, s1
+; CI-NEXT:    v_mov_b32_e32 v3, s3
 ; CI-NEXT:    s_waitcnt lgkmcnt(1)
-; CI-NEXT:    flat_store_dword v[2:3], v4
+; CI-NEXT:    flat_store_dword v[0:1], v4
 ; CI-NEXT:    s_waitcnt lgkmcnt(1)
-; CI-NEXT:    flat_store_dword v[0:1], v5
+; CI-NEXT:    flat_store_dword v[2:3], v5
 ; CI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: nocse_lds_atomic_inc_ret_i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x10
-; VI-NEXT:    v_mov_b32_e32 v2, 42
+; VI-NEXT:    v_mov_b32_e32 v0, 42
 ; VI-NEXT:    s_mov_b32 m0, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v0, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s4
-; VI-NEXT:    ds_inc_rtn_u32 v4, v3, v2
-; VI-NEXT:    ds_inc_rtn_u32 v5, v3, v2
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_mov_b32_e32 v2, s2
+; VI-NEXT:    v_mov_b32_e32 v1, s4
+; VI-NEXT:    ds_inc_rtn_u32 v4, v1, v0
+; VI-NEXT:    ds_inc_rtn_u32 v5, v1, v0
+; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s3
 ; VI-NEXT:    s_waitcnt lgkmcnt(1)
-; VI-NEXT:    flat_store_dword v[2:3], v4
+; VI-NEXT:    flat_store_dword v[0:1], v4
 ; VI-NEXT:    s_waitcnt lgkmcnt(1)
-; VI-NEXT:    flat_store_dword v[0:1], v5
+; VI-NEXT:    flat_store_dword v[2:3], v5
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: nocse_lds_atomic_inc_ret_i32:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
index 524482df53569..80f86c6b1f50a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.div.fmas.ll
@@ -848,17 +848,17 @@ define amdgpu_kernel void @test_div_fmas_f32_logical_cond_to_vcc(float addrspace
 ; GFX7-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; GFX7-NEXT:    buffer_load_dword v3, v[1:2], s[0:3], 0 addr64
 ; GFX7-NEXT:    buffer_load_dword v4, v[1:2], s[0:3], 0 addr64 offset:4
-; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX7-NEXT:    buffer_load_dword v0, v[1:2], s[0:3], 0 addr64 offset:8
+; GFX7-NEXT:    buffer_load_dword v1, v[1:2], s[0:3], 0 addr64 offset:8
 ; GFX7-NEXT:    s_cmp_lg_u32 s8, 0
-; GFX7-NEXT:    s_cselect_b32 s6, 1, 0
-; GFX7-NEXT:    s_and_b32 s0, 1, s6
+; GFX7-NEXT:    s_cselect_b32 s0, 1, 0
+; GFX7-NEXT:    s_and_b32 s0, 1, s0
+; GFX7-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v0
 ; GFX7-NEXT:    v_cmp_ne_u32_e64 s[0:1], 0, s0
 ; GFX7-NEXT:    s_mov_b32 s2, -1
 ; GFX7-NEXT:    s_and_b64 vcc, vcc, s[0:1]
 ; GFX7-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_div_fmas_f32 v0, v3, v4, v0
+; GFX7-NEXT:    v_div_fmas_f32 v0, v3, v4, v1
 ; GFX7-NEXT:    buffer_store_dword v0, off, s[4:7], 0 offset:8
 ; GFX7-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
index bac18c1f6ce06..a5737e8233af3 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ubfe.ll
@@ -950,21 +950,21 @@ define amdgpu_kernel void @bfe_u32_constant_fold_test_18(i32 addrspace(1)* %out)
 define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out0,
 ; GFX6-LABEL: simplify_bfe_u32_multi_use_arg:
 ; GFX6:       ; %bb.0:
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
-; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xd
 ; GFX6-NEXT:    s_mov_b32 s6, -1
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
-; GFX6-NEXT:    s_mov_b64 s[10:11], s[6:7]
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_load_dword s0, s[0:1], 0x0
+; GFX6-NEXT:    s_load_dword s8, s[2:3], 0x0
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; GFX6-NEXT:    s_mov_b64 s[2:3], s[6:7]
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_and_b32 s0, s0, 63
-; GFX6-NEXT:    s_bfe_u32 s1, s0, 0x20002
-; GFX6-NEXT:    v_mov_b32_e32 v1, s1
-; GFX6-NEXT:    v_mov_b32_e32 v0, s0
+; GFX6-NEXT:    s_and_b32 s8, s8, 63
+; GFX6-NEXT:    s_bfe_u32 s9, s8, 0x20002
+; GFX6-NEXT:    v_mov_b32_e32 v1, s9
+; GFX6-NEXT:    v_mov_b32_e32 v0, s8
 ; GFX6-NEXT:    buffer_store_dword v1, off, s[4:7], 0
-; GFX6-NEXT:    buffer_store_dword v0, off, s[8:11], 0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX6-NEXT:    s_endpgm
                                             i32 addrspace(1)* %out1,
                                             i32 addrspace(1)* %in) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
index 4ca0a1aa049fb..af8b4f0f9e5ae 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/load-constant.96.ll
@@ -20,53 +20,53 @@ define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) {
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NOUNALIGNED-NEXT:    v_add_co_u32_e32 v2, vcc, 11, v0
 ; GFX9-NOUNALIGNED-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v6, v[2:3], off offset:-6
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v7, v[2:3], off offset:-5
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v8, v[2:3], off offset:-4
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v9, v[2:3], off offset:-3
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v10, v[2:3], off offset:-2
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v11, v[2:3], off offset:-1
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v12, v[2:3], off
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v4, v[2:3], off offset:-6
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v5, v[2:3], off offset:-5
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v6, v[2:3], off offset:-4
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v7, v[2:3], off offset:-3
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v8, v[2:3], off offset:-2
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v9, v[2:3], off offset:-1
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v10, v[2:3], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v1, v[2:3], off offset:-10
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v13, v[2:3], off offset:-9
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v14, v[2:3], off offset:-8
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v11, v[2:3], off offset:-9
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v12, v[2:3], off offset:-8
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v2, v[2:3], off offset:-7
-; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v4, 0xff
+; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v3, 0xff
 ; GFX9-NOUNALIGNED-NEXT:    s_movk_i32 s4, 0xff
+; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v13, 8
 ; GFX9-NOUNALIGNED-NEXT:    s_mov_b32 s5, 8
-; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v5, 8
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(11)
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v6, v5, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v4, v13, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(10)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v7, v7, v4
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, v5, v3
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(9)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v8, v8, v4
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v6, v6, v3
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(7)
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v5, v5, v10 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v8, v13, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(6)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v10, v11, v4
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v9, v9, v3
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v11, v12, v4
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v10, v10, v3
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s4, v13
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v11, s4, v11
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v13, s4, v14
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v12, s4, v12
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v2, v2, v4, v6
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v2, v2, v3, v4
 ; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v0, s4, v1
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 24, v13
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 16, v7
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v7, 24, v8
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v4, v9, v4, v5
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 16, v10
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v8, 24, v11
-; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v0, v0, v1, v3
-; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v1, v2, v6, v7
-; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v2, v4, v5, v8
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 16, v11
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v11, 24, v12
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v4, 16, v5
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 24, v6
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v3, v7, v3, v8
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 16, v9
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v7, 24, v10
+; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v0, v0, v1, v11
+; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v1, v2, v4, v5
+; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v2, v3, v6, v7
 ; GFX9-NOUNALIGNED-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align1:
@@ -85,61 +85,62 @@ define <3 x i32> @v_load_constant_v3i32_align1(<3 x i32> addrspace(4)* %ptr) {
 ; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s6, 0
 ; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX7-NOUNALIGNED-NEXT:    s_mov_b64 s[4:5], 0
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v3, v[0:1], s[4:7], 0 addr64 offset:5
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v4, v[0:1], s[4:7], 0 addr64 offset:6
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v5, v[0:1], s[4:7], 0 addr64 offset:7
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v6, v[0:1], s[4:7], 0 addr64 offset:8
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v7, v[0:1], s[4:7], 0 addr64 offset:9
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v8, v[0:1], s[4:7], 0 addr64 offset:10
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v9, v[0:1], s[4:7], 0 addr64 offset:11
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v10, v[0:1], s[4:7], 0 addr64
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v11, v[0:1], s[4:7], 0 addr64 offset:1
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v12, v[0:1], s[4:7], 0 addr64 offset:2
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v13, v[0:1], s[4:7], 0 addr64 offset:3
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:4
-; GFX7-NOUNALIGNED-NEXT:    v_mov_b32_e32 v2, 0xff
-; GFX7-NOUNALIGNED-NEXT:    s_movk_i32 s8, 0xff
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v2, v[0:1], s[4:7], 0 addr64
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v3, v[0:1], s[4:7], 0 addr64 offset:1
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v4, v[0:1], s[4:7], 0 addr64 offset:2
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v5, v[0:1], s[4:7], 0 addr64 offset:3
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v6, v[0:1], s[4:7], 0 addr64 offset:4
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v7, v[0:1], s[4:7], 0 addr64 offset:5
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v8, v[0:1], s[4:7], 0 addr64 offset:6
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v9, v[0:1], s[4:7], 0 addr64 offset:7
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v10, v[0:1], s[4:7], 0 addr64 offset:8
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v11, v[0:1], s[4:7], 0 addr64 offset:9
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v12, v[0:1], s[4:7], 0 addr64 offset:10
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v0, v[0:1], s[4:7], 0 addr64 offset:11
+; GFX7-NOUNALIGNED-NEXT:    v_mov_b32_e32 v1, 0xff
+; GFX7-NOUNALIGNED-NEXT:    s_movk_i32 s4, 0xff
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(11)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, v3, v2
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s4, v2
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(10)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, v4, v2
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s4, v3
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(9)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, v5, v2
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s4, v4
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(8)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v6, v6, v2
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, s4, v5
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(7)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v7, v7, v2
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v6, s4, v6
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(6)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v8, v8, v2
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v7, v7, v1
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, v9, v2
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v8, v8, v1
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s8, v10
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v9, v9, v1
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v10, s8, v11
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v10, v10, v1
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v11, s8, v12
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v9, 8, v10
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v11, v11, v1
+; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v12, v12, v1
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, s8, v0
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v7, 8, v7
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, v0, v1
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 8, v3
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 16, v4
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v4, 24, v5
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 8, v7
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v7, 16, v8
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v8, 24, v9
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v9, 8, v11
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v11, 16, v12
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v12, 24, v0
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v2, v1
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v6, v5
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v10, v9
 ; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v3
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v12, s8, v13
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v10, 16, v11
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v9
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v8, 16, v8
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v3, v6, v7
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v11, 24, v12
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 24, v5
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v3, v3, v8
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v10
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v4, v0, v4
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v1, v11
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v4, v5
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v3, v2
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v7
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v2, v11
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v4
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v8
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v2, v12
 ; GFX7-NOUNALIGNED-NEXT:    s_setpc_b64 s[30:31]
   %load = load <3 x i32>, <3 x i32> addrspace(4)* %ptr, align 1
   ret <3 x i32> %load
@@ -158,27 +159,27 @@ define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) {
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NOUNALIGNED-NEXT:    v_add_co_u32_e32 v2, vcc, 10, v0
 ; GFX9-NOUNALIGNED-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
-; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v5, v[2:3], off
+; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v4, v[2:3], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v0, v[0:1], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v1, v[2:3], off offset:-8
-; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v6, v[2:3], off offset:-6
-; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v7, v[2:3], off offset:-4
+; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v5, v[2:3], off offset:-6
+; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v6, v[2:3], off offset:-4
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v2, v[2:3], off offset:-2
-; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v3, 0xffff
 ; GFX9-NOUNALIGNED-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, v5, v4
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, v4, v3
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
 ; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s4, v1
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, v7, v4
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v6, v6, v3
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
 ; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v0, s4, v1
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v1, v6, v4, v3
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v1, v5, v3, v6
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v2, v2, v4, v5
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v2, v2, v3, v4
 ; GFX9-NOUNALIGNED-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX7-UNALIGNED-LABEL: v_load_constant_v3i32_align2:
@@ -203,18 +204,18 @@ define <3 x i32> @v_load_constant_v3i32_align2(<3 x i32> addrspace(4)* %ptr) {
 ; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v5, v[0:1], s[4:7], 0 addr64 offset:4
 ; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v6, v[0:1], s[4:7], 0 addr64 offset:6
 ; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v0, v[0:1], s[4:7], 0 addr64 offset:8
-; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s8, 0xffff
+; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s4, 0xffff
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s8, v3
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s4, v3
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s8, v4
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s4, v4
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s8, v5
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s4, v5
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, s8, v6
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, s4, v6
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v6, s8, v0
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, s8, v2
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v6, s4, v0
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, s4, v2
 ; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 16, v3
 ; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 16, v5
 ; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 16, v0
@@ -432,58 +433,58 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)*
 ; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v16, s2
 ; GFX9-NOUNALIGNED-NEXT:    s_add_u32 s2, s0, 9
 ; GFX9-NOUNALIGNED-NEXT:    s_addc_u32 s3, s1, 0
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v20, v[10:11], off
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v12, v[12:13], off
-; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v11, s3
-; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v10, s2
+; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v19, s3
+; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v18, s2
 ; GFX9-NOUNALIGNED-NEXT:    s_add_u32 s2, s0, 10
 ; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NOUNALIGNED-NEXT:    s_addc_u32 s3, s1, 0
 ; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NOUNALIGNED-NEXT:    s_add_u32 s0, s0, 11
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v13, v[14:15], off
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v14, v[16:17], off
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v15, v[10:11], off
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v20, v[10:11], off
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v21, v[12:13], off
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v14, v[14:15], off
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v15, v[16:17], off
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v16, v[18:19], off
+; GFX9-NOUNALIGNED-NEXT:    s_addc_u32 s1, s1, 0
 ; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v11, s3
+; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v13, s1
 ; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v10, s2
-; GFX9-NOUNALIGNED-NEXT:    s_addc_u32 s1, s1, 0
-; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v16, v[10:11], off
-; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v11, s1
-; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v10, s0
+; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v12, s0
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v10, v[10:11], off
+; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v11, v[12:13], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v1, v[2:3], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v2, v[4:5], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v3, v[6:7], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ubyte v4, v[8:9], off
-; GFX9-NOUNALIGNED-NEXT:    s_mov_b32 s5, 8
-; GFX9-NOUNALIGNED-NEXT:    s_movk_i32 s4, 0xff
-; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v18, 0xff
-; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v19, 8
+; GFX9-NOUNALIGNED-NEXT:    s_mov_b32 s1, 8
+; GFX9-NOUNALIGNED-NEXT:    s_movk_i32 s0, 0xff
+; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v5, 0xff
+; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v6, 8
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v1, s5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v0, s4, v1
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v1, s1, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v0, s0, v1
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s4, v2
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s0, v2
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s4, v3
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s0, v3
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
 ; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v0, v0, v1, v2
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, v12, v18
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, v13, v18
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, v21, v5
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, v14, v5
 ; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v0, v19, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v0, v6, v20 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v4, v18, v0
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v4, v5, v0
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
 ; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v1, v0, v1, v2
-; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v0, v19, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, v10, v18
+; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_sdwa v0, v6, v16 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, v11, v5
 ; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, v16, v18
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v14, v18, v0
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, v10, v5
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v15, v5, v0
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 24, v2
 ; GFX9-NOUNALIGNED-NEXT:    v_or3_b32 v2, v0, v1, v2
@@ -508,60 +509,59 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align1(<3 x i32> addrspace(4)*
 ; GFX7-NOUNALIGNED:       ; %bb.0:
 ; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s2, -1
 ; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s3, 0xf000
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v1, off, s[0:3], 0 offset:5
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v2, off, s[0:3], 0 offset:6
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v3, off, s[0:3], 0 offset:7
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v4, off, s[0:3], 0 offset:8
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v5, off, s[0:3], 0 offset:9
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v6, off, s[0:3], 0 offset:10
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v7, off, s[0:3], 0 offset:11
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v8, off, s[0:3], 0
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v9, off, s[0:3], 0 offset:1
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v10, off, s[0:3], 0 offset:2
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v11, off, s[0:3], 0 offset:3
-; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v12, off, s[0:3], 0 offset:4
-; GFX7-NOUNALIGNED-NEXT:    v_mov_b32_e32 v0, 0xff
-; GFX7-NOUNALIGNED-NEXT:    s_movk_i32 s4, 0xff
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v1, off, s[0:3], 0 offset:1
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v2, off, s[0:3], 0 offset:2
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v3, off, s[0:3], 0 offset:3
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v4, off, s[0:3], 0 offset:4
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v5, off, s[0:3], 0 offset:5
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v6, off, s[0:3], 0 offset:6
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v7, off, s[0:3], 0 offset:7
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v8, off, s[0:3], 0 offset:8
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v9, off, s[0:3], 0 offset:9
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v10, off, s[0:3], 0 offset:10
+; GFX7-NOUNALIGNED-NEXT:    buffer_load_ubyte v11, off, s[0:3], 0 offset:11
+; GFX7-NOUNALIGNED-NEXT:    v_mov_b32_e32 v12, 0xff
+; GFX7-NOUNALIGNED-NEXT:    s_movk_i32 s0, 0xff
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(11)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, v1, v0
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, s0, v0
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(10)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, v2, v0
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s0, v1
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(9)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, v3, v0
-; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(8)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, v4, v0
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s0, v2
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(7)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, v5, v0
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s0, v4
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(6)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v6, v6, v0
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, v5, v12
+; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v6, v6, v12
 ; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v5, 8, v5
-; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v8, s4, v8
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v9, s4, v9
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, v7, v0
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v8, v8, v12
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v10, s4, v10
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v7, 8, v9
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v9, v9, v12
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v11, s4, v11
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v10, v10, v12
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v9, 8, v9
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s0, v3
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v7, v7, v12
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v12, s4, v12
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 8, v1
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v9, 16, v10
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v10, 24, v11
-; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v11, 24, v0
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v8, v7
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v11, v11, v12
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v12, v1
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v4, v5
 ; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 16, v6
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v4, v4, v5
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v9
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v2
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v4, v6
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v10, 16, v10
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v4, v8, v9
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v2
 ; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v3, 24, v3
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v10
-; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v3
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v7, 24, v7
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v6
+; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v11, 24, v11
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v4, v10
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v0, v0, v3
+; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v1, v1, v7
 ; GFX7-NOUNALIGNED-NEXT:    v_or_b32_e32 v2, v2, v11
 ; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX7-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
@@ -613,21 +613,21 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)*
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v2, v[4:5], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v3, v[6:7], off
 ; GFX9-NOUNALIGNED-NEXT:    global_load_ushort v4, v[8:9], off
-; GFX9-NOUNALIGNED-NEXT:    s_mov_b32 s4, 0xffff
-; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v12, 0xffff
+; GFX9-NOUNALIGNED-NEXT:    s_mov_b32 s0, 0xffff
+; GFX9-NOUNALIGNED-NEXT:    v_mov_b32_e32 v5, 0xffff
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s4, v1
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s0, v1
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v0, s4, v1
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v0, v0, s0, v1
 ; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s0, v0
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, v3, v12
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, v3, v5
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v1, v2, v12, v0
-; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, v10, v12
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v1, v2, v5, v0
+; GFX9-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, v10, v5
 ; GFX9-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; GFX9-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v2, v4, v12, v0
+; GFX9-NOUNALIGNED-NEXT:    v_and_or_b32 v2, v4, v5, v0
 ; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s1, v1
 ; GFX9-NOUNALIGNED-NEXT:    v_readfirstlane_b32 s2, v2
 ; GFX9-NOUNALIGNED-NEXT:    ; return to shader part epilog
@@ -656,19 +656,19 @@ define amdgpu_ps <3 x i32> @s_load_constant_v3i32_align2(<3 x i32> addrspace(4)*
 ; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v3, off, s[0:3], 0 offset:4
 ; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v4, off, s[0:3], 0 offset:6
 ; GFX7-NOUNALIGNED-NEXT:    buffer_load_ushort v5, off, s[0:3], 0 offset:8
-; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s4, 0xffff
+; GFX7-NOUNALIGNED-NEXT:    s_mov_b32 s0, 0xffff
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(5)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, s4, v0
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v0, s0, v0
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(4)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s4, v1
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v1, s0, v1
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(3)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s4, v2
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v2, s0, v2
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(2)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s4, v3
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v3, s0, v3
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(1)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s4, v4
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v4, s0, v4
 ; GFX7-NOUNALIGNED-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, s4, v5
+; GFX7-NOUNALIGNED-NEXT:    v_and_b32_e32 v5, s0, v5
 ; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v6, 16, v0
 ; GFX7-NOUNALIGNED-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
index d2e06fb449891..06bf7f7949309 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/localizer.ll
@@ -96,8 +96,8 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
 ; GFX9-NEXT:    s_getpc_b64 s[4:5]
 ; GFX9-NEXT:    s_add_u32 s4, s4, gv3@gotpcrel32@lo+4
 ; GFX9-NEXT:    s_addc_u32 s5, s5, gv3@gotpcrel32@hi+4
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    s_mov_b32 s0, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -116,10 +116,10 @@ define amdgpu_kernel void @localize_globals(i1 %cond) {
 ; GFX9-NEXT:    s_getpc_b64 s[0:1]
 ; GFX9-NEXT:    s_add_u32 s0, s0, gv0@gotpcrel32@lo+4
 ; GFX9-NEXT:    s_addc_u32 s1, s1, gv0@gotpcrel32@hi+4
-; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_getpc_b64 s[2:3]
 ; GFX9-NEXT:    s_add_u32 s2, s2, gv1@gotpcrel32@lo+4
 ; GFX9-NEXT:    s_addc_u32 s3, s3, gv1@gotpcrel32@hi+4
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x0
 ; GFX9-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 1
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
index 7a7967cca5da9..dc899ed8ba98a 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/zextload.ll
@@ -162,9 +162,9 @@ define i96 @zextload_global_i32_to_i96(i32 addrspace(1)* %ptr) {
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT:    s_mov_b32 s8, 0
+; GFX6-NEXT:    s_mov_b32 s4, 0
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0
-; GFX6-NEXT:    v_mov_b32_e32 v2, s8
+; GFX6-NEXT:    v_mov_b32_e32 v2, s4
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
   %load = load i32, i32 addrspace(1)* %ptr
@@ -180,8 +180,8 @@ define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) {
 ; GFX9-NEXT:    s_mov_b32 s4, 0
 ; GFX9-NEXT:    s_mov_b32 s5, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9-NEXT:    v_mov_b32_e32 v2, s5
-; GFX9-NEXT:    v_mov_b32_e32 v3, s4
+; GFX9-NEXT:    v_mov_b32_e32 v2, s4
+; GFX9-NEXT:    v_mov_b32_e32 v3, s5
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -192,8 +192,8 @@ define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) {
 ; GFX8-NEXT:    s_mov_b32 s4, 0
 ; GFX8-NEXT:    s_mov_b32 s5, 0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, 0
-; GFX8-NEXT:    v_mov_b32_e32 v2, s5
-; GFX8-NEXT:    v_mov_b32_e32 v3, s4
+; GFX8-NEXT:    v_mov_b32_e32 v2, s4
+; GFX8-NEXT:    v_mov_b32_e32 v3, s5
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -204,11 +204,11 @@ define i128 @zextload_global_i32_to_i128(i32 addrspace(1)* %ptr) {
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    s_mov_b64 s[4:5], 0
 ; GFX6-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
-; GFX6-NEXT:    s_mov_b32 s8, 0
 ; GFX6-NEXT:    s_mov_b32 s4, 0
+; GFX6-NEXT:    s_mov_b32 s5, 0
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0
 ; GFX6-NEXT:    v_mov_b32_e32 v2, s4
-; GFX6-NEXT:    v_mov_b32_e32 v3, s8
+; GFX6-NEXT:    v_mov_b32_e32 v3, s5
 ; GFX6-NEXT:    s_waitcnt vmcnt(0)
 ; GFX6-NEXT:    s_setpc_b64 s[30:31]
   %load = load i32, i32 addrspace(1)* %ptr
diff --git a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
index be58cb8873f9c..09f9abc23a6f7 100644
--- a/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/add.v2i16.ll
@@ -105,8 +105,8 @@ define amdgpu_kernel void @v_test_add_v2i16_neg_constant(<2 x i16> addrspace(1)*
 ; GCN-LABEL: {{^}}v_test_add_v2i16_inline_neg1:
 ; GFX9: v_pk_sub_u16 v{{[0-9]+}}, v{{[0-9]+}}, 1 op_sel_hi:[1,0]{{$}}
 
-; VI: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1
-; VI: flat_load_dword [[LOAD:v[0-9]+]]
+; VI-DAG: v_mov_b32_e32 v[[SCONST:[0-9]+]], -1
+; VI-DAG: flat_load_dword [[LOAD:v[0-9]+]]
 ; VI-DAG: v_add_u16_sdwa v{{[0-9]+}}, [[LOAD]], v[[SCONST]] dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-DAG: v_add_u16_e32 v{{[0-9]+}}, -1, [[LOAD]]
 ; VI: v_or_b32_e32
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index 53e3005910cd5..f7d67b9f465ae 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -5059,16 +5059,16 @@ define amdgpu_kernel void @udiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x)
 ;
 ; GCN-LABEL: udiv_i64_pow2k_denom:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GCN-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s0, s4
-; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_lshr_b64 s[4:5], s[6:7], 12
-; GCN-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NEXT:    v_mov_b32_e32 v1, s5
-; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_lshr_b64 s[0:1], s[2:3], 12
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    v_mov_b32_e32 v1, s1
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GCN-NEXT:    s_endpgm
   %r = udiv i64 %x, 4096
   store i64 %r, i64 addrspace(1)* %out
@@ -5703,20 +5703,20 @@ define amdgpu_kernel void @sdiv_i64_pow2k_denom(i64 addrspace(1)* %out, i64 %x)
 ;
 ; GCN-LABEL: sdiv_i64_pow2k_denom:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GCN-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s0, s4
-; GCN-NEXT:    s_ashr_i32 s4, s7, 31
-; GCN-NEXT:    s_lshr_b32 s4, s4, 20
-; GCN-NEXT:    s_add_u32 s4, s6, s4
-; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_addc_u32 s5, s7, 0
-; GCN-NEXT:    s_ashr_i64 s[4:5], s[4:5], 12
-; GCN-NEXT:    v_mov_b32_e32 v0, s4
-; GCN-NEXT:    v_mov_b32_e32 v1, s5
-; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_ashr_i32 s0, s3, 31
+; GCN-NEXT:    s_lshr_b32 s0, s0, 20
+; GCN-NEXT:    s_add_u32 s0, s2, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_addc_u32 s1, s3, 0
+; GCN-NEXT:    s_ashr_i64 s[0:1], s[0:1], 12
+; GCN-NEXT:    v_mov_b32_e32 v0, s0
+; GCN-NEXT:    v_mov_b32_e32 v1, s1
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GCN-NEXT:    s_endpgm
   %r = sdiv i64 %x, 4096
   store i64 %r, i64 addrspace(1)* %out
diff --git a/llvm/test/CodeGen/AMDGPU/bitreverse.ll b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
index 2fb68bda542fa..6d2106442ddca 100644
--- a/llvm/test/CodeGen/AMDGPU/bitreverse.ll
+++ b/llvm/test/CodeGen/AMDGPU/bitreverse.ll
@@ -339,35 +339,34 @@ define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrsp
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    buffer_load_dwordx2 v[0:1], v[0:1], s[0:3], 0 addr64
-; SI-NEXT:    s_mov_b32 s6, 0xff00ff
-; SI-NEXT:    s_mov_b32 s8, 0xf0f0f0f
-; SI-NEXT:    s_mov_b32 s9, 0xf0f0f0f0
-; SI-NEXT:    s_mov_b32 s10, 0x33333333
-; SI-NEXT:    s_mov_b32 s11, 0xcccccccc
-; SI-NEXT:    s_mov_b32 s0, 0x55555555
-; SI-NEXT:    s_mov_b32 s1, 0xaaaaaaaa
+; SI-NEXT:    s_mov_b32 s0, 0xff00ff
+; SI-NEXT:    s_mov_b32 s1, 0xf0f0f0f
+; SI-NEXT:    s_mov_b32 s2, 0xf0f0f0f0
+; SI-NEXT:    s_mov_b32 s3, 0x33333333
+; SI-NEXT:    s_mov_b32 s6, 0xcccccccc
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_alignbit_b32 v2, v0, v0, 8
 ; SI-NEXT:    v_alignbit_b32 v0, v0, v0, 24
 ; SI-NEXT:    v_alignbit_b32 v3, v1, v1, 8
 ; SI-NEXT:    v_alignbit_b32 v1, v1, v1, 24
-; SI-NEXT:    v_bfi_b32 v2, s6, v0, v2
-; SI-NEXT:    v_bfi_b32 v4, s6, v1, v3
-; SI-NEXT:    v_and_b32_e32 v1, s8, v2
-; SI-NEXT:    v_and_b32_e32 v0, s8, v4
-; SI-NEXT:    v_and_b32_e32 v3, s9, v2
-; SI-NEXT:    v_and_b32_e32 v2, s9, v4
+; SI-NEXT:    v_bfi_b32 v2, s0, v0, v2
+; SI-NEXT:    v_bfi_b32 v4, s0, v1, v3
+; SI-NEXT:    v_and_b32_e32 v1, s1, v2
+; SI-NEXT:    v_and_b32_e32 v0, s1, v4
+; SI-NEXT:    v_and_b32_e32 v3, s2, v2
+; SI-NEXT:    v_and_b32_e32 v2, s2, v4
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 4
 ; SI-NEXT:    v_lshr_b64 v[2:3], v[2:3], 4
-; SI-NEXT:    s_mov_b32 s6, -1
+; SI-NEXT:    s_mov_b32 s0, 0x55555555
 ; SI-NEXT:    v_or_b32_e32 v3, v3, v1
 ; SI-NEXT:    v_or_b32_e32 v2, v2, v0
-; SI-NEXT:    v_and_b32_e32 v1, s10, v3
-; SI-NEXT:    v_and_b32_e32 v0, s10, v2
-; SI-NEXT:    v_and_b32_e32 v3, s11, v3
-; SI-NEXT:    v_and_b32_e32 v2, s11, v2
+; SI-NEXT:    v_and_b32_e32 v1, s3, v3
+; SI-NEXT:    v_and_b32_e32 v0, s3, v2
+; SI-NEXT:    v_and_b32_e32 v3, s6, v3
+; SI-NEXT:    v_and_b32_e32 v2, s6, v2
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
 ; SI-NEXT:    v_lshr_b64 v[2:3], v[2:3], 2
+; SI-NEXT:    s_mov_b32 s1, 0xaaaaaaaa
 ; SI-NEXT:    v_or_b32_e32 v3, v3, v1
 ; SI-NEXT:    v_or_b32_e32 v2, v2, v0
 ; SI-NEXT:    v_and_b32_e32 v1, s0, v3
@@ -376,6 +375,7 @@ define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrsp
 ; SI-NEXT:    v_and_b32_e32 v2, s1, v2
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
 ; SI-NEXT:    v_lshr_b64 v[2:3], v[2:3], 1
+; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    v_or_b32_e32 v1, v3, v1
 ; SI-NEXT:    v_or_b32_e32 v0, v2, v0
 ; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
@@ -386,33 +386,33 @@ define amdgpu_kernel void @v_brev_i64(i64 addrspace(1)* noalias %out, i64 addrsp
 ; FLAT-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
 ; FLAT-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
 ; FLAT-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; FLAT-NEXT:    s_mov_b32 s6, 0x10203
-; FLAT-NEXT:    s_mov_b32 s2, 0x33333333
-; FLAT-NEXT:    s_mov_b32 s3, 0xcccccccc
+; FLAT-NEXT:    s_mov_b32 s2, 0xf0f0f0f0
+; FLAT-NEXT:    s_mov_b32 s3, 0x33333333
+; FLAT-NEXT:    s_mov_b32 s6, 0xcccccccc
 ; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLAT-NEXT:    v_mov_b32_e32 v1, s1
 ; FLAT-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
 ; FLAT-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; FLAT-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; FLAT-NEXT:    s_mov_b32 s0, 0xf0f0f0f
-; FLAT-NEXT:    s_mov_b32 s1, 0xf0f0f0f0
+; FLAT-NEXT:    s_mov_b32 s0, 0x10203
+; FLAT-NEXT:    s_mov_b32 s1, 0xf0f0f0f
 ; FLAT-NEXT:    s_mov_b32 s7, 0xf000
 ; FLAT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; FLAT-NEXT:    v_perm_b32 v2, 0, v0, s6
-; FLAT-NEXT:    v_perm_b32 v4, 0, v1, s6
-; FLAT-NEXT:    v_and_b32_e32 v1, s0, v2
-; FLAT-NEXT:    v_and_b32_e32 v0, s0, v4
-; FLAT-NEXT:    v_and_b32_e32 v3, s1, v2
-; FLAT-NEXT:    v_and_b32_e32 v2, s1, v4
+; FLAT-NEXT:    v_perm_b32 v2, 0, v0, s0
+; FLAT-NEXT:    v_perm_b32 v4, 0, v1, s0
+; FLAT-NEXT:    v_and_b32_e32 v1, s1, v2
+; FLAT-NEXT:    v_and_b32_e32 v0, s1, v4
+; FLAT-NEXT:    v_and_b32_e32 v3, s2, v2
+; FLAT-NEXT:    v_and_b32_e32 v2, s2, v4
 ; FLAT-NEXT:    v_lshlrev_b64 v[0:1], 4, v[0:1]
 ; FLAT-NEXT:    v_lshrrev_b64 v[2:3], 4, v[2:3]
 ; FLAT-NEXT:    s_mov_b32 s0, 0x55555555
 ; FLAT-NEXT:    v_or_b32_e32 v3, v3, v1
 ; FLAT-NEXT:    v_or_b32_e32 v2, v2, v0
-; FLAT-NEXT:    v_and_b32_e32 v1, s2, v3
-; FLAT-NEXT:    v_and_b32_e32 v0, s2, v2
-; FLAT-NEXT:    v_and_b32_e32 v3, s3, v3
-; FLAT-NEXT:    v_and_b32_e32 v2, s3, v2
+; FLAT-NEXT:    v_and_b32_e32 v1, s3, v3
+; FLAT-NEXT:    v_and_b32_e32 v0, s3, v2
+; FLAT-NEXT:    v_and_b32_e32 v3, s6, v3
+; FLAT-NEXT:    v_and_b32_e32 v2, s6, v2
 ; FLAT-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
 ; FLAT-NEXT:    v_lshrrev_b64 v[2:3], 2, v[2:3]
 ; FLAT-NEXT:    s_mov_b32 s1, 0xaaaaaaaa
@@ -600,13 +600,13 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    buffer_load_dwordx4 v[0:3], v[0:1], s[0:3], 0 addr64
-; SI-NEXT:    s_mov_b32 s8, 0xff00ff
-; SI-NEXT:    s_mov_b32 s9, 0xf0f0f0f
-; SI-NEXT:    s_mov_b32 s10, 0xf0f0f0f0
-; SI-NEXT:    s_mov_b32 s11, 0x33333333
-; SI-NEXT:    s_mov_b32 s12, 0xcccccccc
-; SI-NEXT:    s_mov_b32 s13, 0x55555555
-; SI-NEXT:    s_mov_b32 s14, 0xaaaaaaaa
+; SI-NEXT:    s_mov_b32 s0, 0xff00ff
+; SI-NEXT:    s_mov_b32 s1, 0xf0f0f0f
+; SI-NEXT:    s_mov_b32 s2, 0xf0f0f0f0
+; SI-NEXT:    s_mov_b32 s3, 0x33333333
+; SI-NEXT:    s_mov_b32 s8, 0xcccccccc
+; SI-NEXT:    s_mov_b32 s9, 0x55555555
+; SI-NEXT:    s_mov_b32 s10, 0xaaaaaaaa
 ; SI-NEXT:    s_mov_b32 s6, -1
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_alignbit_b32 v4, v2, v2, 8
@@ -617,18 +617,18 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
 ; SI-NEXT:    v_alignbit_b32 v7, v1, v1, 8
 ; SI-NEXT:    v_alignbit_b32 v1, v1, v1, 24
 ; SI-NEXT:    v_alignbit_b32 v3, v3, v3, 24
-; SI-NEXT:    v_bfi_b32 v2, s8, v2, v4
-; SI-NEXT:    v_bfi_b32 v4, s8, v3, v5
-; SI-NEXT:    v_bfi_b32 v6, s8, v0, v6
-; SI-NEXT:    v_bfi_b32 v8, s8, v1, v7
-; SI-NEXT:    v_and_b32_e32 v1, s9, v2
-; SI-NEXT:    v_and_b32_e32 v0, s9, v4
-; SI-NEXT:    v_and_b32_e32 v3, s10, v2
-; SI-NEXT:    v_and_b32_e32 v2, s10, v4
-; SI-NEXT:    v_and_b32_e32 v5, s9, v6
-; SI-NEXT:    v_and_b32_e32 v4, s9, v8
-; SI-NEXT:    v_and_b32_e32 v7, s10, v6
-; SI-NEXT:    v_and_b32_e32 v6, s10, v8
+; SI-NEXT:    v_bfi_b32 v2, s0, v2, v4
+; SI-NEXT:    v_bfi_b32 v4, s0, v3, v5
+; SI-NEXT:    v_bfi_b32 v6, s0, v0, v6
+; SI-NEXT:    v_bfi_b32 v8, s0, v1, v7
+; SI-NEXT:    v_and_b32_e32 v1, s1, v2
+; SI-NEXT:    v_and_b32_e32 v0, s1, v4
+; SI-NEXT:    v_and_b32_e32 v3, s2, v2
+; SI-NEXT:    v_and_b32_e32 v2, s2, v4
+; SI-NEXT:    v_and_b32_e32 v5, s1, v6
+; SI-NEXT:    v_and_b32_e32 v4, s1, v8
+; SI-NEXT:    v_and_b32_e32 v7, s2, v6
+; SI-NEXT:    v_and_b32_e32 v6, s2, v8
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 4
 ; SI-NEXT:    v_lshr_b64 v[2:3], v[2:3], 4
 ; SI-NEXT:    v_lshl_b64 v[4:5], v[4:5], 4
@@ -637,14 +637,14 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
 ; SI-NEXT:    v_or_b32_e32 v2, v2, v0
 ; SI-NEXT:    v_or_b32_e32 v7, v7, v5
 ; SI-NEXT:    v_or_b32_e32 v6, v6, v4
-; SI-NEXT:    v_and_b32_e32 v1, s11, v3
-; SI-NEXT:    v_and_b32_e32 v0, s11, v2
-; SI-NEXT:    v_and_b32_e32 v5, s11, v7
-; SI-NEXT:    v_and_b32_e32 v4, s11, v6
-; SI-NEXT:    v_and_b32_e32 v3, s12, v3
-; SI-NEXT:    v_and_b32_e32 v2, s12, v2
-; SI-NEXT:    v_and_b32_e32 v7, s12, v7
-; SI-NEXT:    v_and_b32_e32 v6, s12, v6
+; SI-NEXT:    v_and_b32_e32 v1, s3, v3
+; SI-NEXT:    v_and_b32_e32 v0, s3, v2
+; SI-NEXT:    v_and_b32_e32 v5, s3, v7
+; SI-NEXT:    v_and_b32_e32 v4, s3, v6
+; SI-NEXT:    v_and_b32_e32 v3, s8, v3
+; SI-NEXT:    v_and_b32_e32 v2, s8, v2
+; SI-NEXT:    v_and_b32_e32 v7, s8, v7
+; SI-NEXT:    v_and_b32_e32 v6, s8, v6
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 2
 ; SI-NEXT:    v_lshr_b64 v[2:3], v[2:3], 2
 ; SI-NEXT:    v_lshl_b64 v[4:5], v[4:5], 2
@@ -653,14 +653,14 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
 ; SI-NEXT:    v_or_b32_e32 v2, v2, v0
 ; SI-NEXT:    v_or_b32_e32 v7, v7, v5
 ; SI-NEXT:    v_or_b32_e32 v6, v6, v4
-; SI-NEXT:    v_and_b32_e32 v1, s13, v3
-; SI-NEXT:    v_and_b32_e32 v0, s13, v2
-; SI-NEXT:    v_and_b32_e32 v5, s13, v7
-; SI-NEXT:    v_and_b32_e32 v4, s13, v6
-; SI-NEXT:    v_and_b32_e32 v3, s14, v3
-; SI-NEXT:    v_and_b32_e32 v2, s14, v2
-; SI-NEXT:    v_and_b32_e32 v7, s14, v7
-; SI-NEXT:    v_and_b32_e32 v6, s14, v6
+; SI-NEXT:    v_and_b32_e32 v1, s9, v3
+; SI-NEXT:    v_and_b32_e32 v0, s9, v2
+; SI-NEXT:    v_and_b32_e32 v5, s9, v7
+; SI-NEXT:    v_and_b32_e32 v4, s9, v6
+; SI-NEXT:    v_and_b32_e32 v3, s10, v3
+; SI-NEXT:    v_and_b32_e32 v2, s10, v2
+; SI-NEXT:    v_and_b32_e32 v7, s10, v7
+; SI-NEXT:    v_and_b32_e32 v6, s10, v6
 ; SI-NEXT:    v_lshl_b64 v[0:1], v[0:1], 1
 ; SI-NEXT:    v_lshr_b64 v[2:3], v[2:3], 1
 ; SI-NEXT:    v_lshl_b64 v[4:5], v[4:5], 1
@@ -677,33 +677,33 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
 ; FLAT-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
 ; FLAT-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
 ; FLAT-NEXT:    v_lshlrev_b32_e32 v0, 4, v0
-; FLAT-NEXT:    s_mov_b32 s10, 0x10203
-; FLAT-NEXT:    s_mov_b32 s2, 0x33333333
-; FLAT-NEXT:    s_mov_b32 s3, 0xcccccccc
+; FLAT-NEXT:    s_mov_b32 s2, 0xf0f0f0f0
+; FLAT-NEXT:    s_mov_b32 s3, 0x33333333
+; FLAT-NEXT:    s_mov_b32 s8, 0xcccccccc
 ; FLAT-NEXT:    s_waitcnt lgkmcnt(0)
 ; FLAT-NEXT:    v_mov_b32_e32 v1, s1
 ; FLAT-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
 ; FLAT-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; FLAT-NEXT:    flat_load_dwordx4 v[0:3], v[0:1]
-; FLAT-NEXT:    s_mov_b32 s0, 0xf0f0f0f
-; FLAT-NEXT:    s_mov_b32 s1, 0xf0f0f0f0
-; FLAT-NEXT:    s_mov_b32 s8, 0x55555555
-; FLAT-NEXT:    s_mov_b32 s9, 0xaaaaaaaa
+; FLAT-NEXT:    s_mov_b32 s0, 0x10203
+; FLAT-NEXT:    s_mov_b32 s1, 0xf0f0f0f
+; FLAT-NEXT:    s_mov_b32 s9, 0x55555555
+; FLAT-NEXT:    s_mov_b32 s10, 0xaaaaaaaa
 ; FLAT-NEXT:    s_mov_b32 s7, 0xf000
 ; FLAT-NEXT:    s_mov_b32 s6, -1
 ; FLAT-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; FLAT-NEXT:    v_perm_b32 v6, 0, v0, s10
-; FLAT-NEXT:    v_perm_b32 v4, 0, v3, s10
-; FLAT-NEXT:    v_perm_b32 v2, 0, v2, s10
-; FLAT-NEXT:    v_perm_b32 v8, 0, v1, s10
-; FLAT-NEXT:    v_and_b32_e32 v1, s0, v2
-; FLAT-NEXT:    v_and_b32_e32 v0, s0, v4
-; FLAT-NEXT:    v_and_b32_e32 v3, s1, v2
-; FLAT-NEXT:    v_and_b32_e32 v2, s1, v4
-; FLAT-NEXT:    v_and_b32_e32 v5, s0, v6
-; FLAT-NEXT:    v_and_b32_e32 v4, s0, v8
-; FLAT-NEXT:    v_and_b32_e32 v7, s1, v6
-; FLAT-NEXT:    v_and_b32_e32 v6, s1, v8
+; FLAT-NEXT:    v_perm_b32 v6, 0, v0, s0
+; FLAT-NEXT:    v_perm_b32 v4, 0, v3, s0
+; FLAT-NEXT:    v_perm_b32 v2, 0, v2, s0
+; FLAT-NEXT:    v_perm_b32 v8, 0, v1, s0
+; FLAT-NEXT:    v_and_b32_e32 v1, s1, v2
+; FLAT-NEXT:    v_and_b32_e32 v0, s1, v4
+; FLAT-NEXT:    v_and_b32_e32 v3, s2, v2
+; FLAT-NEXT:    v_and_b32_e32 v2, s2, v4
+; FLAT-NEXT:    v_and_b32_e32 v5, s1, v6
+; FLAT-NEXT:    v_and_b32_e32 v4, s1, v8
+; FLAT-NEXT:    v_and_b32_e32 v7, s2, v6
+; FLAT-NEXT:    v_and_b32_e32 v6, s2, v8
 ; FLAT-NEXT:    v_lshlrev_b64 v[0:1], 4, v[0:1]
 ; FLAT-NEXT:    v_lshrrev_b64 v[2:3], 4, v[2:3]
 ; FLAT-NEXT:    v_lshlrev_b64 v[4:5], 4, v[4:5]
@@ -712,14 +712,14 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
 ; FLAT-NEXT:    v_or_b32_e32 v2, v2, v0
 ; FLAT-NEXT:    v_or_b32_e32 v7, v7, v5
 ; FLAT-NEXT:    v_or_b32_e32 v6, v6, v4
-; FLAT-NEXT:    v_and_b32_e32 v1, s2, v3
-; FLAT-NEXT:    v_and_b32_e32 v0, s2, v2
-; FLAT-NEXT:    v_and_b32_e32 v5, s2, v7
-; FLAT-NEXT:    v_and_b32_e32 v4, s2, v6
-; FLAT-NEXT:    v_and_b32_e32 v3, s3, v3
-; FLAT-NEXT:    v_and_b32_e32 v2, s3, v2
-; FLAT-NEXT:    v_and_b32_e32 v7, s3, v7
-; FLAT-NEXT:    v_and_b32_e32 v6, s3, v6
+; FLAT-NEXT:    v_and_b32_e32 v1, s3, v3
+; FLAT-NEXT:    v_and_b32_e32 v0, s3, v2
+; FLAT-NEXT:    v_and_b32_e32 v5, s3, v7
+; FLAT-NEXT:    v_and_b32_e32 v4, s3, v6
+; FLAT-NEXT:    v_and_b32_e32 v3, s8, v3
+; FLAT-NEXT:    v_and_b32_e32 v2, s8, v2
+; FLAT-NEXT:    v_and_b32_e32 v7, s8, v7
+; FLAT-NEXT:    v_and_b32_e32 v6, s8, v6
 ; FLAT-NEXT:    v_lshlrev_b64 v[0:1], 2, v[0:1]
 ; FLAT-NEXT:    v_lshrrev_b64 v[2:3], 2, v[2:3]
 ; FLAT-NEXT:    v_lshlrev_b64 v[4:5], 2, v[4:5]
@@ -728,14 +728,14 @@ define amdgpu_kernel void @v_brev_v2i64(<2 x i64> addrspace(1)* noalias %out, <2
 ; FLAT-NEXT:    v_or_b32_e32 v2, v2, v0
 ; FLAT-NEXT:    v_or_b32_e32 v7, v7, v5
 ; FLAT-NEXT:    v_or_b32_e32 v6, v6, v4
-; FLAT-NEXT:    v_and_b32_e32 v1, s8, v3
-; FLAT-NEXT:    v_and_b32_e32 v0, s8, v2
-; FLAT-NEXT:    v_and_b32_e32 v5, s8, v7
-; FLAT-NEXT:    v_and_b32_e32 v4, s8, v6
-; FLAT-NEXT:    v_and_b32_e32 v3, s9, v3
-; FLAT-NEXT:    v_and_b32_e32 v2, s9, v2
-; FLAT-NEXT:    v_and_b32_e32 v7, s9, v7
-; FLAT-NEXT:    v_and_b32_e32 v6, s9, v6
+; FLAT-NEXT:    v_and_b32_e32 v1, s9, v3
+; FLAT-NEXT:    v_and_b32_e32 v0, s9, v2
+; FLAT-NEXT:    v_and_b32_e32 v5, s9, v7
+; FLAT-NEXT:    v_and_b32_e32 v4, s9, v6
+; FLAT-NEXT:    v_and_b32_e32 v3, s10, v3
+; FLAT-NEXT:    v_and_b32_e32 v2, s10, v2
+; FLAT-NEXT:    v_and_b32_e32 v7, s10, v7
+; FLAT-NEXT:    v_and_b32_e32 v6, s10, v6
 ; FLAT-NEXT:    v_lshlrev_b64 v[0:1], 1, v[0:1]
 ; FLAT-NEXT:    v_lshrrev_b64 v[2:3], 1, v[2:3]
 ; FLAT-NEXT:    v_lshlrev_b64 v[4:5], 1, v[4:5]
diff --git a/llvm/test/CodeGen/AMDGPU/bswap.ll b/llvm/test/CodeGen/AMDGPU/bswap.ll
index 74fe04bcf3473..1cdd6f4e37105 100644
--- a/llvm/test/CodeGen/AMDGPU/bswap.ll
+++ b/llvm/test/CodeGen/AMDGPU/bswap.ll
@@ -33,17 +33,17 @@ define amdgpu_kernel void @test_bswap_i32(i32 addrspace(1)* %out, i32 addrspace(
 ;
 ; VI-LABEL: test_bswap_i32:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_mov_b32_e32 v0, 0x10203
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_load_dword s4, s[6:7], 0x0
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_load_dword s0, s[2:3], 0x0
+; VI-NEXT:    s_mov_b32 s5, s1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_perm_b32 v0, 0, s4, v0
-; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT:    v_perm_b32 v0, 0, s0, v0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %val = load i32, i32 addrspace(1)* %in, align 4
   %bswap = call i32 @llvm.bswap.i32(i32 %val) nounwind readnone
@@ -72,18 +72,18 @@ define amdgpu_kernel void @test_bswap_v2i32(<2 x i32> addrspace(1)* %out, <2 x i
 ;
 ; VI-LABEL: test_bswap_v2i32:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_mov_b32_e32 v0, 0x10203
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s5, s1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_perm_b32 v1, 0, s5, v0
-; VI-NEXT:    v_perm_b32 v0, 0, s4, v0
-; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT:    v_perm_b32 v1, 0, s3, v0
+; VI-NEXT:    v_perm_b32 v0, 0, s2, v0
+; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %val = load <2 x i32>, <2 x i32> addrspace(1)* %in, align 8
   %bswap = call <2 x i32> @llvm.bswap.v2i32(<2 x i32> %val) nounwind readnone
@@ -123,14 +123,14 @@ define amdgpu_kernel void @test_bswap_v4i32(<4 x i32> addrspace(1)* %out, <4 x i
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_load_dwordx4 s[8:11], s[6:7], 0x0
 ; VI-NEXT:    s_mov_b32 s0, s4
 ; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[6:7], 0x0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_perm_b32 v3, 0, s7, v0
-; VI-NEXT:    v_perm_b32 v2, 0, s6, v0
-; VI-NEXT:    v_perm_b32 v1, 0, s5, v0
-; VI-NEXT:    v_perm_b32 v0, 0, s4, v0
+; VI-NEXT:    v_perm_b32 v3, 0, s11, v0
+; VI-NEXT:    v_perm_b32 v2, 0, s10, v0
+; VI-NEXT:    v_perm_b32 v1, 0, s9, v0
+; VI-NEXT:    v_perm_b32 v0, 0, s8, v0
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
   %val = load <4 x i32>, <4 x i32> addrspace(1)* %in, align 16
@@ -226,18 +226,18 @@ define amdgpu_kernel void @test_bswap_i64(i64 addrspace(1)* %out, i64 addrspace(
 ;
 ; VI-LABEL: test_bswap_i64:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_mov_b32_e32 v0, 0x10203
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[2:3], 0x0
+; VI-NEXT:    s_mov_b32 s4, s0
+; VI-NEXT:    s_mov_b32 s5, s1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_perm_b32 v1, 0, s4, v0
-; VI-NEXT:    v_perm_b32 v0, 0, s5, v0
-; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT:    v_perm_b32 v1, 0, s2, v0
+; VI-NEXT:    v_perm_b32 v0, 0, s3, v0
+; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %val = load i64, i64 addrspace(1)* %in, align 8
   %bswap = call i64 @llvm.bswap.i64(i64 %val) nounwind readnone
@@ -277,14 +277,14 @@ define amdgpu_kernel void @test_bswap_v2i64(<2 x i64> addrspace(1)* %out, <2 x i
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_load_dwordx4 s[8:11], s[6:7], 0x0
 ; VI-NEXT:    s_mov_b32 s0, s4
 ; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[6:7], 0x0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_perm_b32 v3, 0, s6, v0
-; VI-NEXT:    v_perm_b32 v2, 0, s7, v0
-; VI-NEXT:    v_perm_b32 v1, 0, s4, v0
-; VI-NEXT:    v_perm_b32 v0, 0, s5, v0
+; VI-NEXT:    v_perm_b32 v3, 0, s10, v0
+; VI-NEXT:    v_perm_b32 v2, 0, s11, v0
+; VI-NEXT:    v_perm_b32 v1, 0, s8, v0
+; VI-NEXT:    v_perm_b32 v0, 0, s9, v0
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
   %val = load <2 x i64>, <2 x i64> addrspace(1)* %in, align 16
diff --git a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
index a85cdcc01922d..da86b8104b8ec 100644
--- a/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
+++ b/llvm/test/CodeGen/AMDGPU/chain-hi-to-lo.ll
@@ -199,17 +199,17 @@ define amdgpu_kernel void @vload2_private(i16 addrspace(1)* nocapture readonly %
 ; GCN-NEXT:    s_add_u32 s0, s0, s9
 ; GCN-NEXT:    s_addc_u32 s1, s1, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    v_mov_b32_e32 v2, s4
-; GCN-NEXT:    v_mov_b32_e32 v3, s5
-; GCN-NEXT:    global_load_ushort v4, v[2:3], off
-; GCN-NEXT:    v_mov_b32_e32 v0, s6
-; GCN-NEXT:    v_mov_b32_e32 v1, s7
+; GCN-NEXT:    v_mov_b32_e32 v0, s4
+; GCN-NEXT:    v_mov_b32_e32 v1, s5
+; GCN-NEXT:    global_load_ushort v2, v[0:1], off
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    buffer_store_short v4, off, s[0:3], 0 offset:4
-; GCN-NEXT:    global_load_ushort v4, v[2:3], off offset:2
+; GCN-NEXT:    buffer_store_short v2, off, s[0:3], 0 offset:4
+; GCN-NEXT:    global_load_ushort v2, v[0:1], off offset:2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    buffer_store_short v4, off, s[0:3], 0 offset:6
-; GCN-NEXT:    global_load_ushort v2, v[2:3], off offset:4
+; GCN-NEXT:    buffer_store_short v2, off, s[0:3], 0 offset:6
+; GCN-NEXT:    global_load_ushort v2, v[0:1], off offset:4
+; GCN-NEXT:    v_mov_b32_e32 v0, s6
+; GCN-NEXT:    v_mov_b32_e32 v1, s7
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    buffer_store_short v2, off, s[0:3], 0 offset:8
 ; GCN-NEXT:    buffer_load_ushort v2, off, s[0:3], 0 offset:4
diff --git a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
index 0306177f64231..e7030ab813269 100644
--- a/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
+++ b/llvm/test/CodeGen/AMDGPU/copy-illegal-type.ll
@@ -51,24 +51,23 @@ define amdgpu_kernel void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x
 ; SI-LABEL: test_copy_v4i8_x2:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
-; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
-; SI-NEXT:    s_mov_b32 s11, s3
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; SI-NEXT:    s_mov_b32 s11, 0xf000
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, s11
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
-; SI-NEXT:    s_mov_b32 s2, -1
-; SI-NEXT:    s_mov_b32 s0, s4
-; SI-NEXT:    s_mov_b32 s1, s5
-; SI-NEXT:    s_mov_b32 s12, s6
-; SI-NEXT:    s_mov_b32 s13, s7
-; SI-NEXT:    s_mov_b32 s14, s2
-; SI-NEXT:    s_mov_b32 s15, s3
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    s_mov_b32 s10, -1
+; SI-NEXT:    s_mov_b32 s8, s4
+; SI-NEXT:    s_mov_b32 s9, s5
+; SI-NEXT:    s_mov_b32 s2, s10
+; SI-NEXT:    s_mov_b32 s0, s6
+; SI-NEXT:    s_mov_b32 s1, s7
 ; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
-; SI-NEXT:    buffer_store_dword v0, off, s[12:15], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_copy_v4i8_x2:
@@ -78,17 +77,17 @@ define amdgpu_kernel void @test_copy_v4i8_x2(<4 x i8> addrspace(1)* %out0, <4 x
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s10, s2
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s8, s6
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
 ; VI-NEXT:    s_mov_b32 s0, s4
 ; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s9, s7
-; VI-NEXT:    s_mov_b32 s10, s2
 ; VI-NEXT:    s_mov_b32 s11, s3
+; VI-NEXT:    s_mov_b32 s8, s6
+; VI-NEXT:    s_mov_b32 s9, s7
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
@@ -106,28 +105,25 @@ define amdgpu_kernel void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s22, 0
-; SI-NEXT:    s_mov_b32 s23, s11
+; SI-NEXT:    s_mov_b32 s14, 0
+; SI-NEXT:    s_mov_b32 s15, s11
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[20:21], s[6:7]
+; SI-NEXT:    s_mov_b64 s[12:13], s[6:7]
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
-; SI-NEXT:    buffer_load_dword v0, v[0:1], s[20:23], 0 addr64
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[12:15], 0 addr64
 ; SI-NEXT:    s_mov_b32 s10, -1
 ; SI-NEXT:    s_mov_b32 s8, s0
 ; SI-NEXT:    s_mov_b32 s9, s1
+; SI-NEXT:    s_mov_b32 s14, s10
+; SI-NEXT:    s_mov_b32 s6, s10
+; SI-NEXT:    s_mov_b32 s7, s11
 ; SI-NEXT:    s_mov_b32 s12, s2
 ; SI-NEXT:    s_mov_b32 s13, s3
-; SI-NEXT:    s_mov_b32 s14, s10
-; SI-NEXT:    s_mov_b32 s15, s11
-; SI-NEXT:    s_mov_b32 s16, s4
-; SI-NEXT:    s_mov_b32 s17, s5
-; SI-NEXT:    s_mov_b32 s18, s10
-; SI-NEXT:    s_mov_b32 s19, s11
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; SI-NEXT:    buffer_store_dword v0, off, s[12:15], 0
-; SI-NEXT:    buffer_store_dword v0, off, s[16:19], 0
+; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_copy_v4i8_x3:
@@ -144,17 +140,15 @@ define amdgpu_kernel void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
 ; VI-NEXT:    s_mov_b32 s8, s0
 ; VI-NEXT:    s_mov_b32 s9, s1
+; VI-NEXT:    s_mov_b32 s15, s11
+; VI-NEXT:    s_mov_b32 s6, s10
+; VI-NEXT:    s_mov_b32 s7, s11
 ; VI-NEXT:    s_mov_b32 s12, s2
 ; VI-NEXT:    s_mov_b32 s13, s3
-; VI-NEXT:    s_mov_b32 s15, s11
-; VI-NEXT:    s_mov_b32 s16, s4
-; VI-NEXT:    s_mov_b32 s17, s5
-; VI-NEXT:    s_mov_b32 s18, s10
-; VI-NEXT:    s_mov_b32 s19, s11
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; VI-NEXT:    buffer_store_dword v0, off, s[12:15], 0
-; VI-NEXT:    buffer_store_dword v0, off, s[16:19], 0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %tid.x = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
@@ -168,68 +162,70 @@ define amdgpu_kernel void @test_copy_v4i8_x3(<4 x i8> addrspace(1)* %out0, <4 x
 define amdgpu_kernel void @test_copy_v4i8_x4(<4 x i8> addrspace(1)* %out0, <4 x i8> addrspace(1)* %out1, <4 x i8> addrspace(1)* %out2, <4 x i8> addrspace(1)* %out3, <4 x i8> addrspace(1)* %in) nounwind {
 ; SI-LABEL: test_copy_v4i8_x4:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x9
-; SI-NEXT:    s_load_dwordx2 s[12:13], s[0:1], 0x11
-; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s14, 0
-; SI-NEXT:    s_mov_b32 s15, s3
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x11
+; SI-NEXT:    s_mov_b32 s11, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, s11
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    buffer_load_dword v0, v[0:1], s[12:15], 0 addr64
-; SI-NEXT:    s_mov_b32 s2, -1
-; SI-NEXT:    s_mov_b32 s0, s4
-; SI-NEXT:    s_mov_b32 s1, s5
-; SI-NEXT:    s_mov_b32 s20, s8
-; SI-NEXT:    s_mov_b32 s21, s9
-; SI-NEXT:    s_mov_b32 s8, s10
-; SI-NEXT:    s_mov_b32 s9, s11
-; SI-NEXT:    s_mov_b32 s16, s6
-; SI-NEXT:    s_mov_b32 s17, s7
-; SI-NEXT:    s_mov_b32 s18, s2
-; SI-NEXT:    s_mov_b32 s19, s3
-; SI-NEXT:    s_mov_b32 s22, s2
-; SI-NEXT:    s_mov_b32 s23, s3
-; SI-NEXT:    s_mov_b32 s10, s2
-; SI-NEXT:    s_mov_b32 s11, s3
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s10, -1
+; SI-NEXT:    s_mov_b32 s14, s10
+; SI-NEXT:    s_mov_b32 s15, s11
+; SI-NEXT:    s_mov_b32 s18, s10
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s8, s0
+; SI-NEXT:    s_mov_b32 s9, s1
+; SI-NEXT:    s_mov_b32 s19, s11
+; SI-NEXT:    s_mov_b32 s22, s10
+; SI-NEXT:    s_mov_b32 s23, s11
+; SI-NEXT:    s_mov_b32 s12, s2
+; SI-NEXT:    s_mov_b32 s13, s3
+; SI-NEXT:    s_mov_b32 s16, s4
+; SI-NEXT:    s_mov_b32 s17, s5
+; SI-NEXT:    s_mov_b32 s20, s6
+; SI-NEXT:    s_mov_b32 s21, s7
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
+; SI-NEXT:    buffer_store_dword v0, off, s[12:15], 0
 ; SI-NEXT:    buffer_store_dword v0, off, s[16:19], 0
 ; SI-NEXT:    buffer_store_dword v0, off, s[20:23], 0
-; SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_copy_v4i8_x4:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx8 s[4:11], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x44
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x44
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s11, 0xf000
+; VI-NEXT:    s_mov_b32 s10, -1
+; VI-NEXT:    s_mov_b32 s14, s10
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s16, s8
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s17, s9
-; VI-NEXT:    s_mov_b32 s8, s10
-; VI-NEXT:    s_mov_b32 s9, s11
-; VI-NEXT:    s_mov_b32 s12, s6
-; VI-NEXT:    s_mov_b32 s13, s7
-; VI-NEXT:    s_mov_b32 s14, s2
-; VI-NEXT:    s_mov_b32 s15, s3
-; VI-NEXT:    s_mov_b32 s18, s2
-; VI-NEXT:    s_mov_b32 s19, s3
-; VI-NEXT:    s_mov_b32 s10, s2
-; VI-NEXT:    s_mov_b32 s11, s3
-; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
+; VI-NEXT:    s_mov_b32 s15, s11
+; VI-NEXT:    s_mov_b32 s18, s10
+; VI-NEXT:    s_mov_b32 s19, s11
+; VI-NEXT:    s_mov_b32 s22, s10
+; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_mov_b32 s8, s0
+; VI-NEXT:    s_mov_b32 s9, s1
+; VI-NEXT:    s_mov_b32 s23, s11
+; VI-NEXT:    s_mov_b32 s12, s2
+; VI-NEXT:    s_mov_b32 s13, s3
+; VI-NEXT:    s_mov_b32 s16, s4
+; VI-NEXT:    s_mov_b32 s17, s5
+; VI-NEXT:    s_mov_b32 s20, s6
+; VI-NEXT:    s_mov_b32 s21, s7
+; VI-NEXT:    s_waitcnt vmcnt(0)
+; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; VI-NEXT:    buffer_store_dword v0, off, s[12:15], 0
 ; VI-NEXT:    buffer_store_dword v0, off, s[16:19], 0
-; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
+; VI-NEXT:    buffer_store_dword v0, off, s[20:23], 0
 ; VI-NEXT:    s_endpgm
   %tid.x = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
@@ -245,23 +241,22 @@ define amdgpu_kernel void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0
 ; SI-LABEL: test_copy_v4i8_extra_use:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xd
-; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
-; SI-NEXT:    s_mov_b32 s11, s3
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
+; SI-NEXT:    s_mov_b32 s11, 0xf000
+; SI-NEXT:    s_mov_b32 s2, 0
+; SI-NEXT:    s_mov_b32 s3, s11
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[0:3], 0 addr64
 ; SI-NEXT:    s_mov_b32 s12, 0xff00
 ; SI-NEXT:    s_movk_i32 s13, 0xff
-; SI-NEXT:    s_mov_b32 s0, s4
-; SI-NEXT:    s_mov_b32 s1, s5
-; SI-NEXT:    s_mov_b32 s2, -1
-; SI-NEXT:    s_mov_b32 s4, s6
-; SI-NEXT:    s_mov_b32 s5, s7
-; SI-NEXT:    s_mov_b32 s6, s2
-; SI-NEXT:    s_mov_b32 s7, s3
+; SI-NEXT:    s_mov_b32 s10, -1
+; SI-NEXT:    s_mov_b32 s8, s4
+; SI-NEXT:    s_mov_b32 s9, s5
+; SI-NEXT:    s_mov_b32 s2, s10
+; SI-NEXT:    s_mov_b32 s0, s6
+; SI-NEXT:    s_mov_b32 s1, s7
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_add_i32_e32 v3, vcc, 9, v0
 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
@@ -277,47 +272,47 @@ define amdgpu_kernel void @test_copy_v4i8_extra_use(<4 x i8> addrspace(1)* %out0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_or_b32_e32 v1, v1, v2
 ; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x9000000, v1
-; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
-; SI-NEXT:    buffer_store_dword v1, off, s[4:7], 0
+; SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
+; SI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_copy_v4i8_extra_use:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_movk_i32 s10, 0x900
-; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_movk_i32 s12, 0xff00
+; VI-NEXT:    s_movk_i32 s13, 0xff
+; VI-NEXT:    s_movk_i32 s14, 0x900
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    v_mov_b32_e32 v1, s9
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s8, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    s_movk_i32 s8, 0xff00
-; VI-NEXT:    s_movk_i32 s9, 0xff
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_mov_b32 s10, s2
+; VI-NEXT:    s_mov_b32 s11, s3
+; VI-NEXT:    s_mov_b32 s8, s6
+; VI-NEXT:    s_mov_b32 s9, s7
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; VI-NEXT:    v_and_b32_e32 v4, s8, v1
+; VI-NEXT:    v_and_b32_e32 v4, s12, v1
 ; VI-NEXT:    v_add_u16_e32 v1, 9, v1
 ; VI-NEXT:    v_add_u16_e32 v3, 9, v0
-; VI-NEXT:    v_and_b32_e32 v1, s9, v1
+; VI-NEXT:    v_and_b32_e32 v1, s13, v1
 ; VI-NEXT:    v_or_b32_e32 v1, v4, v1
-; VI-NEXT:    v_and_b32_e32 v2, s8, v0
-; VI-NEXT:    v_and_b32_e32 v3, s9, v3
+; VI-NEXT:    v_and_b32_e32 v2, s12, v0
+; VI-NEXT:    v_and_b32_e32 v3, s13, v3
 ; VI-NEXT:    v_or_b32_e32 v2, v2, v3
-; VI-NEXT:    v_add_u16_e32 v1, s10, v1
-; VI-NEXT:    v_add_u16_e32 v2, s10, v2
+; VI-NEXT:    v_add_u16_e32 v1, s14, v1
+; VI-NEXT:    v_add_u16_e32 v2, s14, v2
 ; VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; VI-NEXT:    v_or_b32_e32 v1, v2, v1
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
-; VI-NEXT:    buffer_store_dword v1, off, s[4:7], 0
+; VI-NEXT:    buffer_store_dword v1, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
   %tid.x = call i32 @llvm.amdgcn.workitem.id.x()
   %gep = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
@@ -334,35 +329,32 @@ define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %o
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s18, 0
-; SI-NEXT:    s_mov_b32 s19, s11
+; SI-NEXT:    s_mov_b32 s14, 0
+; SI-NEXT:    s_mov_b32 s15, s11
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[16:17], s[6:7]
+; SI-NEXT:    s_mov_b64 s[12:13], s[6:7]
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
-; SI-NEXT:    buffer_load_dword v0, v[0:1], s[16:19], 0 addr64
-; SI-NEXT:    s_mov_b32 s12, s4
-; SI-NEXT:    s_mov_b32 s13, s5
-; SI-NEXT:    s_mov_b32 s4, 0xff00
-; SI-NEXT:    s_movk_i32 s5, 0xff
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[12:15], 0 addr64
+; SI-NEXT:    s_mov_b32 s16, 0xff00
+; SI-NEXT:    s_movk_i32 s17, 0xff
 ; SI-NEXT:    s_mov_b32 s10, -1
+; SI-NEXT:    s_mov_b32 s14, s10
 ; SI-NEXT:    s_mov_b32 s8, s0
 ; SI-NEXT:    s_mov_b32 s9, s1
-; SI-NEXT:    s_mov_b32 s0, s2
-; SI-NEXT:    s_mov_b32 s1, s3
-; SI-NEXT:    s_mov_b32 s2, s10
-; SI-NEXT:    s_mov_b32 s3, s11
-; SI-NEXT:    s_mov_b32 s14, s10
-; SI-NEXT:    s_mov_b32 s15, s11
+; SI-NEXT:    s_mov_b32 s12, s2
+; SI-NEXT:    s_mov_b32 s13, s3
+; SI-NEXT:    s_mov_b32 s6, s10
+; SI-NEXT:    s_mov_b32 s7, s11
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_add_i32_e32 v3, vcc, 9, v0
 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; SI-NEXT:    v_and_b32_e32 v4, s4, v1
+; SI-NEXT:    v_and_b32_e32 v4, s16, v1
 ; SI-NEXT:    v_add_i32_e32 v1, vcc, 9, v1
-; SI-NEXT:    v_and_b32_e32 v2, s4, v0
-; SI-NEXT:    v_and_b32_e32 v3, s5, v3
+; SI-NEXT:    v_and_b32_e32 v2, s16, v0
+; SI-NEXT:    v_and_b32_e32 v3, s17, v3
 ; SI-NEXT:    v_or_b32_e32 v2, v2, v3
-; SI-NEXT:    v_and_b32_e32 v1, s5, v1
+; SI-NEXT:    v_and_b32_e32 v1, s17, v1
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, 0x900, v2
 ; SI-NEXT:    v_or_b32_e32 v1, v4, v1
 ; SI-NEXT:    v_and_b32_e32 v2, 0xffff, v2
@@ -370,51 +362,49 @@ define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %o
 ; SI-NEXT:    v_or_b32_e32 v1, v1, v2
 ; SI-NEXT:    v_add_i32_e32 v1, vcc, 0x9000000, v1
 ; SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
-; SI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
-; SI-NEXT:    buffer_store_dword v0, off, s[12:15], 0
+; SI-NEXT:    buffer_store_dword v1, off, s[12:15], 0
+; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: test_copy_v4i8_x2_extra_use:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_mov_b32 s11, 0xf000
-; VI-NEXT:    s_mov_b32 s10, -1
-; VI-NEXT:    s_mov_b32 s14, s10
+; VI-NEXT:    s_movk_i32 s16, 0xff00
+; VI-NEXT:    s_movk_i32 s17, 0xff
+; VI-NEXT:    s_movk_i32 s18, 0x900
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v0
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    s_mov_b32 s12, s4
-; VI-NEXT:    s_movk_i32 s4, 0xff00
-; VI-NEXT:    s_mov_b32 s13, s5
-; VI-NEXT:    s_movk_i32 s5, 0xff
-; VI-NEXT:    s_movk_i32 s6, 0x900
+; VI-NEXT:    s_mov_b32 s11, 0xf000
+; VI-NEXT:    s_mov_b32 s10, -1
+; VI-NEXT:    s_mov_b32 s14, s10
+; VI-NEXT:    s_mov_b32 s15, s11
 ; VI-NEXT:    s_mov_b32 s8, s0
 ; VI-NEXT:    s_mov_b32 s9, s1
-; VI-NEXT:    s_mov_b32 s0, s2
-; VI-NEXT:    s_mov_b32 s1, s3
-; VI-NEXT:    s_mov_b32 s2, s10
-; VI-NEXT:    s_mov_b32 s3, s11
-; VI-NEXT:    s_mov_b32 s15, s11
+; VI-NEXT:    s_mov_b32 s12, s2
+; VI-NEXT:    s_mov_b32 s13, s3
+; VI-NEXT:    s_mov_b32 s6, s10
+; VI-NEXT:    s_mov_b32 s7, s11
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
-; VI-NEXT:    v_and_b32_e32 v4, s4, v1
+; VI-NEXT:    v_and_b32_e32 v4, s16, v1
 ; VI-NEXT:    v_add_u16_e32 v1, 9, v1
 ; VI-NEXT:    v_add_u16_e32 v3, 9, v0
-; VI-NEXT:    v_and_b32_e32 v1, s5, v1
+; VI-NEXT:    v_and_b32_e32 v1, s17, v1
 ; VI-NEXT:    v_or_b32_e32 v1, v4, v1
-; VI-NEXT:    v_and_b32_e32 v2, s4, v0
-; VI-NEXT:    v_and_b32_e32 v3, s5, v3
+; VI-NEXT:    v_and_b32_e32 v2, s16, v0
+; VI-NEXT:    v_and_b32_e32 v3, s17, v3
 ; VI-NEXT:    v_or_b32_e32 v2, v2, v3
-; VI-NEXT:    v_add_u16_e32 v1, s6, v1
-; VI-NEXT:    v_add_u16_e32 v2, s6, v2
+; VI-NEXT:    v_add_u16_e32 v1, s18, v1
+; VI-NEXT:    v_add_u16_e32 v2, s18, v2
 ; VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; VI-NEXT:    v_or_b32_e32 v1, v2, v1
 ; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
-; VI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
-; VI-NEXT:    buffer_store_dword v0, off, s[12:15], 0
+; VI-NEXT:    buffer_store_dword v1, off, s[12:15], 0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %tid.x = call i32 @llvm.amdgcn.workitem.id.x()
   %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
@@ -429,18 +419,18 @@ define amdgpu_kernel void @test_copy_v4i8_x2_extra_use(<4 x i8> addrspace(1)* %o
 define amdgpu_kernel void @test_copy_v3i8_align4(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) nounwind {
 ; SI-LABEL: test_copy_v3i8_align4:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
-; SI-NEXT:    s_mov_b32 s10, 0
-; SI-NEXT:    s_mov_b32 s11, s3
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, s3
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b64 s[8:9], s[6:7]
+; SI-NEXT:    s_mov_b64 s[4:5], s[10:11]
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
-; SI-NEXT:    buffer_load_dword v0, v[0:1], s[8:11], 0 addr64
+; SI-NEXT:    buffer_load_dword v0, v[0:1], s[4:7], 0 addr64
 ; SI-NEXT:    s_mov_b32 s2, -1
-; SI-NEXT:    s_mov_b32 s0, s4
-; SI-NEXT:    s_mov_b32 s1, s5
+; SI-NEXT:    s_mov_b32 s0, s8
+; SI-NEXT:    s_mov_b32 s1, s9
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
@@ -449,17 +439,15 @@ define amdgpu_kernel void @test_copy_v3i8_align4(<3 x i8> addrspace(1)* %out, <3
 ;
 ; VI-LABEL: test_copy_v3i8_align4:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s7
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; VI-NEXT:    buffer_store_short v0, off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/ctlz.ll b/llvm/test/CodeGen/AMDGPU/ctlz.ll
index c26af864e7fe3..31671b9a15766 100644
--- a/llvm/test/CodeGen/AMDGPU/ctlz.ll
+++ b/llvm/test/CodeGen/AMDGPU/ctlz.ll
@@ -526,27 +526,27 @@ define amdgpu_kernel void @v_ctlz_i64(i64 addrspace(1)* noalias %out, i64 addrsp
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x24
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
-; VI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
-; VI-NEXT:    v_mov_b32_e32 v5, 0
-; VI-NEXT:    v_mov_b32_e32 v1, 0
+; VI-NEXT:    v_lshlrev_b32_e32 v3, 3, v0
+; VI-NEXT:    v_mov_b32_e32 v4, 0
+; VI-NEXT:    v_mov_b32_e32 v2, 0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v6, s3
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v0
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, v3, v5, vcc
-; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
-; VI-NEXT:    v_add_u32_e32 v4, vcc, s2, v0
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, v6, v5, vcc
+; VI-NEXT:    v_mov_b32_e32 v5, s3
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v3
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, v1, v4, vcc
+; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; VI-NEXT:    v_add_u32_e32 v3, vcc, s2, v3
+; VI-NEXT:    v_addc_u32_e32 v4, vcc, v5, v4, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_ffbh_u32_e32 v0, v2
-; VI-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
-; VI-NEXT:    v_ffbh_u32_e32 v6, v3
-; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v3
-; VI-NEXT:    v_or_b32_e32 v2, v2, v3
-; VI-NEXT:    v_cndmask_b32_e32 v0, v6, v0, vcc
-; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v2
-; VI-NEXT:    v_cndmask_b32_e32 v0, 64, v0, vcc
-; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
+; VI-NEXT:    v_ffbh_u32_e32 v5, v0
+; VI-NEXT:    v_add_u32_e32 v5, vcc, 32, v5
+; VI-NEXT:    v_ffbh_u32_e32 v6, v1
+; VI-NEXT:    v_or_b32_e32 v0, v0, v1
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_cndmask_b32_e32 v1, v6, v5, vcc
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; VI-NEXT:    v_cndmask_b32_e32 v1, 64, v1, vcc
+; VI-NEXT:    flat_store_dwordx2 v[3:4], v[1:2]
 ; VI-NEXT:    s_endpgm
 ;
 ; EG-LABEL: v_ctlz_i64:
@@ -621,18 +621,18 @@ define amdgpu_kernel void @v_ctlz_i64_trunc(i32 addrspace(1)* noalias %out, i64
 ; VI-NEXT:    v_mov_b32_e32 v2, s1
 ; VI-NEXT:    v_add_u32_e32 v1, vcc, s0, v1
 ; VI-NEXT:    v_addc_u32_e32 v2, vcc, v2, v4, vcc
+; VI-NEXT:    flat_load_dwordx2 v[1:2], v[1:2]
 ; VI-NEXT:    v_add_u32_e32 v3, vcc, s2, v0
-; VI-NEXT:    flat_load_dwordx2 v[0:1], v[1:2]
 ; VI-NEXT:    v_addc_u32_e32 v4, vcc, v5, v4, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_ffbh_u32_e32 v2, v0
-; VI-NEXT:    v_add_u32_e32 v2, vcc, 32, v2
-; VI-NEXT:    v_ffbh_u32_e32 v5, v1
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v1
-; VI-NEXT:    v_cndmask_b32_e32 v1, v5, v2, vcc
-; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
-; VI-NEXT:    v_cndmask_b32_e32 v0, 64, v1, vcc
+; VI-NEXT:    v_ffbh_u32_e32 v0, v1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, 32, v0
+; VI-NEXT:    v_ffbh_u32_e32 v5, v2
+; VI-NEXT:    v_cmp_eq_u32_e32 vcc, 0, v2
+; VI-NEXT:    v_or_b32_e32 v1, v1, v2
+; VI-NEXT:    v_cndmask_b32_e32 v0, v5, v0, vcc
+; VI-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v1
+; VI-NEXT:    v_cndmask_b32_e32 v0, 64, v0, vcc
 ; VI-NEXT:    flat_store_dword v[3:4], v0
 ; VI-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
index 6895c3b75a619..6bc3073eb18e0 100644
--- a/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
+++ b/llvm/test/CodeGen/AMDGPU/cttz_zero_undef.ll
@@ -177,11 +177,11 @@ define amdgpu_kernel void @v_cttz_zero_undef_i32_with_select(i32 addrspace(1)* n
 ; SI-NOSDWA: v_or_b32_e32 [[VAL2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]]
 ; SI-NOSDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL2]]
-; SI-SDWA: v_or_b32_e32
 ; SI-SDWA: v_or_b32_sdwa
+; SI-SDWA: v_or_b32_e32
+; SI-SDWA: v_or_b32_e32
 ; SI-SDWA: v_or_b32_e32 [[VAL1:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; SI-SDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL1]]
-; SI-SDWA: v_or_b32_e32
 ; SI-SDWA: v_or_b32_sdwa
 ; SI-SDWA: v_or_b32_e32 [[VAL2:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}
 ; SI-SDWA: v_ffbl_b32_e32 v{{[0-9]+}}, [[VAL2]]
diff --git a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
index 0d1c61597848c..21360aa85cbc1 100644
--- a/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
+++ b/llvm/test/CodeGen/AMDGPU/cvt_f32_ubyte.ll
@@ -636,20 +636,19 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_unaligned(<4 x float> addrspace(1)
 define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* noalias %out, <4 x i8> addrspace(1)* noalias %out2, <4 x i8> addrspace(1)* noalias %in) nounwind {
 ; SI-LABEL: load_v4i8_to_v4f32_2_uses:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0xb
-; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
-; SI-NEXT:    s_mov_b32 s11, 0xf000
-; SI-NEXT:    s_mov_b32 s2, 0
-; SI-NEXT:    s_mov_b32 s3, s11
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0xd
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s6, 0
+; SI-NEXT:    s_mov_b32 s7, s3
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    buffer_load_dword v4, v[0:1], s[0:3], 0 addr64
-; SI-NEXT:    s_mov_b32 s10, -1
-; SI-NEXT:    s_mov_b32 s6, s10
-; SI-NEXT:    s_mov_b32 s7, s11
-; SI-NEXT:    s_movk_i32 s12, 0xff
+; SI-NEXT:    buffer_load_dword v4, v[0:1], s[4:7], 0 addr64
+; SI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xb
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_movk_i32 s8, 0xff
+; SI-NEXT:    s_mov_b32 s6, s2
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
 ; SI-NEXT:    v_lshrrev_b32_e32 v6, 24, v4
@@ -659,57 +658,58 @@ define amdgpu_kernel void @load_v4i8_to_v4f32_2_uses(<4 x float> addrspace(1)* n
 ; SI-NEXT:    v_cvt_f32_ubyte1_e32 v1, v4
 ; SI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v4
 ; SI-NEXT:    v_add_i32_e32 v4, vcc, 9, v4
+; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-NEXT:    s_waitcnt expcnt(0)
-; SI-NEXT:    v_and_b32_e32 v0, s12, v4
+; SI-NEXT:    v_and_b32_e32 v0, s8, v4
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, 9, v5
 ; SI-NEXT:    v_or_b32_e32 v0, v7, v0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 8, v6
-; SI-NEXT:    v_and_b32_e32 v2, s12, v2
+; SI-NEXT:    v_and_b32_e32 v2, s8, v2
 ; SI-NEXT:    v_add_i32_e32 v0, vcc, 0x900, v0
 ; SI-NEXT:    v_or_b32_e32 v1, v1, v2
 ; SI-NEXT:    v_and_b32_e32 v0, 0xffff, v0
 ; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; SI-NEXT:    v_or_b32_e32 v0, v1, v0
 ; SI-NEXT:    v_add_i32_e32 v0, vcc, 0x9000000, v0
-; SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: load_v4i8_to_v4f32_2_uses:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x2c
-; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x34
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_mov_b32 s11, 0xf000
-; VI-NEXT:    s_mov_b32 s10, -1
-; VI-NEXT:    s_mov_b32 s6, s10
+; VI-NEXT:    v_mov_b32_e32 v5, 9
+; VI-NEXT:    s_movk_i32 s8, 0x900
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v0
-; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v5, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v4, 9
-; VI-NEXT:    s_mov_b32 s7, s11
-; VI-NEXT:    s_movk_i32 s0, 0x900
+; VI-NEXT:    flat_load_dword v4, v[0:1]
+; VI-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x2c
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s6, s2
+; VI-NEXT:    s_mov_b32 s7, s3
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshrrev_b32_e32 v6, 24, v5
-; VI-NEXT:    v_cvt_f32_ubyte3_e32 v3, v5
-; VI-NEXT:    v_cvt_f32_ubyte2_e32 v2, v5
-; VI-NEXT:    v_cvt_f32_ubyte1_e32 v1, v5
-; VI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v5
+; VI-NEXT:    v_lshrrev_b32_e32 v6, 24, v4
+; VI-NEXT:    v_cvt_f32_ubyte3_e32 v3, v4
+; VI-NEXT:    v_cvt_f32_ubyte2_e32 v2, v4
+; VI-NEXT:    v_cvt_f32_ubyte1_e32 v1, v4
+; VI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v4
 ; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
-; VI-NEXT:    v_and_b32_e32 v7, 0xffffff00, v5
-; VI-NEXT:    v_add_u16_e32 v8, 9, v5
-; VI-NEXT:    v_add_u16_sdwa v4, v5, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_and_b32_e32 v7, 0xffffff00, v4
+; VI-NEXT:    v_add_u16_e32 v8, 9, v4
+; VI-NEXT:    v_add_u16_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 ; VI-NEXT:    v_lshlrev_b16_e32 v1, 8, v6
 ; VI-NEXT:    v_or_b32_sdwa v0, v7, v8 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
 ; VI-NEXT:    v_or_b32_sdwa v1, v1, v4 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
-; VI-NEXT:    v_mov_b32_e32 v2, s0
-; VI-NEXT:    v_add_u16_e32 v0, s0, v0
+; VI-NEXT:    v_mov_b32_e32 v2, s8
+; VI-NEXT:    v_add_u16_e32 v0, s8, v0
 ; VI-NEXT:    v_add_u16_sdwa v1, v1, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
+; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
   %tid.x = call i32 @llvm.amdgcn.workitem.id.x()
   %in.ptr = getelementptr <4 x i8>, <4 x i8> addrspace(1)* %in, i32 %tid.x
@@ -733,29 +733,30 @@ define amdgpu_kernel void @load_v7i8_to_v7f32(<7 x float> addrspace(1)* noalias
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 3, v0
 ; SI-NEXT:    v_mov_b32_e32 v1, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    buffer_load_ubyte v2, v[0:1], s[0:3], 0 addr64 offset:5
-; SI-NEXT:    buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:6
-; SI-NEXT:    buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64
-; SI-NEXT:    buffer_load_ubyte v5, v[0:1], s[0:3], 0 addr64 offset:1
+; SI-NEXT:    buffer_load_ubyte v2, v[0:1], s[0:3], 0 addr64
+; SI-NEXT:    buffer_load_ubyte v3, v[0:1], s[0:3], 0 addr64 offset:1
 ; SI-NEXT:    buffer_load_ubyte v6, v[0:1], s[0:3], 0 addr64 offset:2
-; SI-NEXT:    buffer_load_ubyte v7, v[0:1], s[0:3], 0 addr64 offset:3
-; SI-NEXT:    buffer_load_ubyte v8, v[0:1], s[0:3], 0 addr64 offset:4
+; SI-NEXT:    buffer_load_ubyte v4, v[0:1], s[0:3], 0 addr64 offset:3
+; SI-NEXT:    buffer_load_ubyte v7, v[0:1], s[0:3], 0 addr64 offset:4
+; SI-NEXT:    buffer_load_ubyte v5, v[0:1], s[0:3], 0 addr64 offset:5
+; SI-NEXT:    buffer_load_ubyte v8, v[0:1], s[0:3], 0 addr64 offset:6
 ; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_waitcnt vmcnt(4)
-; SI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v4
+; SI-NEXT:    s_waitcnt vmcnt(6)
+; SI-NEXT:    v_cvt_f32_ubyte0_e32 v0, v2
+; SI-NEXT:    s_waitcnt vmcnt(5)
+; SI-NEXT:    v_cvt_f32_ubyte2_e32 v1, v3
 ; SI-NEXT:    s_waitcnt vmcnt(3)
-; SI-NEXT:    v_cvt_f32_ubyte2_e32 v1, v5
-; SI-NEXT:    v_cvt_f32_ubyte2_e32 v5, v2
-; SI-NEXT:    s_waitcnt vmcnt(1)
-; SI-NEXT:    v_lshlrev_b32_e32 v7, 8, v7
-; SI-NEXT:    v_or_b32_e32 v2, v7, v6
-; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cvt_f32_ubyte0_e32 v4, v8
-; SI-NEXT:    v_cvt_f32_ubyte0_e32 v8, v3
+; SI-NEXT:    v_lshlrev_b32_e32 v9, 8, v4
+; SI-NEXT:    v_or_b32_e32 v2, v9, v6
+; SI-NEXT:    s_waitcnt vmcnt(2)
+; SI-NEXT:    v_cvt_f32_ubyte0_e32 v4, v7
 ; SI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
+; SI-NEXT:    s_waitcnt vmcnt(0)
+; SI-NEXT:    v_cvt_f32_ubyte0_e32 v7, v8
 ; SI-NEXT:    v_cvt_f32_ubyte3_e32 v3, v2
+; SI-NEXT:    v_cvt_f32_ubyte2_e32 v5, v5
 ; SI-NEXT:    v_cvt_f32_ubyte2_e32 v2, v2
-; SI-NEXT:    buffer_store_dword v8, off, s[4:7], 0 offset:24
+; SI-NEXT:    buffer_store_dword v7, off, s[4:7], 0 offset:24
 ; SI-NEXT:    buffer_store_dwordx2 v[4:5], off, s[4:7], 0 offset:16
 ; SI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
index 3409cfef880f2..66cd1e81cdee4 100644
--- a/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fcmp.f16.ll
@@ -609,10 +609,10 @@ entry:
 ; SI-DAG:  v_cvt_f32_f16_e32 v[[A_F32_1:[0-9]+]], v[[A_F16_1]]
 ; SI-DAG:  v_cvt_f32_f16_e32 v[[B_F32_1:[0-9]+]], v[[B_F16_1]]
 ; SI-DAG:  v_cmp_nlt_f32_e32 vcc, v[[A_F32_1]], v[[B_F32_1]]
-; VI-DAG:  v_cmp_nlt_f16_e32 vcc, v[[A_V2_F16]], v[[B_V2_F16]]
+; VI-DAG:  v_cmp_nlt_f16_e32 vcc, v[[B_V2_F16]], v[[A_V2_F16]]
 ; GCN: v_cndmask_b32_e64 v[[R_I32_0:[0-9]+]]
 
-; VI:  v_cmp_nlt_f16_e32 vcc, v[[A_F16_1]], v[[B_F16_1]]
+; VI:  v_cmp_nlt_f16_e32 vcc, v[[B_F16_1]], v[[A_F16_1]]
 ; GCN: v_cndmask_b32_e64 v[[R_I32_1:[0-9]+]]
 ; GCN: buffer_store_dwordx2 v{{\[}}[[R_I32_0]]:[[R_I32_1]]{{\]}}
 ; GCN: s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
index 108b5830f22a3..54ccc8fd870d4 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.ll
@@ -191,7 +191,7 @@ define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %
 
 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
 ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]]
-; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
+; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]]
 
 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]]
 ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
@@ -1343,9 +1343,9 @@ define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)*
 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]]
 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]]
 
-; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
-; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]]
-; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
+; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}}
+; GCN-SAFE-DAG: v_fma_f32 [[FMA:v[0-9]+]]
+; GCN-SAFE-DAG: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]]
 
 ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]]
 ; GCN-NSZ-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[NEG_FMA]]
diff --git a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
index cf17589f135d2..ffa17c94a93ad 100644
--- a/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
+++ b/llvm/test/CodeGen/AMDGPU/idiv-licm.ll
@@ -289,35 +289,35 @@ bb3:                                              ; preds = %bb3, %bb
 define amdgpu_kernel void @urem16_invariant_denom(i16 addrspace(1)* nocapture %arg, i16 %arg1) {
 ; GFX9-LABEL: urem16_invariant_denom:
 ; GFX9:       ; %bb.0: ; %bb
-; GFX9-NEXT:    s_load_dword s3, s[0:1], 0x2c
-; GFX9-NEXT:    s_mov_b32 s2, 0xffff
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x2c
+; GFX9-NEXT:    s_mov_b32 s4, 0xffff
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 0
-; GFX9-NEXT:    s_movk_i32 s6, 0x400
+; GFX9-NEXT:    s_movk_i32 s8, 0x400
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_and_b32 s3, s2, s3
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s3
+; GFX9-NEXT:    s_and_b32 s5, s4, s2
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v0, s5
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v0
 ; GFX9-NEXT:  BB5_1: ; %bb3
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
-; GFX9-NEXT:    v_and_b32_e32 v2, s2, v4
+; GFX9-NEXT:    v_and_b32_e32 v2, s4, v4
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v8, v2
 ; GFX9-NEXT:    v_lshlrev_b64 v[5:6], 1, v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v7, s5
-; GFX9-NEXT:    v_add_co_u32_e64 v5, s[0:1], s4, v5
-; GFX9-NEXT:    v_addc_co_u32_e64 v6, s[0:1], v7, v6, s[0:1]
-; GFX9-NEXT:    v_mul_f32_e32 v7, v8, v1
-; GFX9-NEXT:    v_trunc_f32_e32 v7, v7
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v9, v7
-; GFX9-NEXT:    v_mad_f32 v7, -v7, v0, v8
-; GFX9-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v7|, v0
 ; GFX9-NEXT:    v_add_u16_e32 v4, 1, v4
-; GFX9-NEXT:    v_addc_co_u32_e64 v7, s[0:1], 0, v9, s[0:1]
-; GFX9-NEXT:    v_mul_lo_u32 v7, v7, s3
-; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, s6, v4
+; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, s8, v4
+; GFX9-NEXT:    v_mul_f32_e32 v9, v8, v1
+; GFX9-NEXT:    v_trunc_f32_e32 v9, v9
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v10, v9
+; GFX9-NEXT:    v_mad_f32 v8, -v9, v0, v8
+; GFX9-NEXT:    v_cmp_ge_f32_e64 s[2:3], |v8|, v0
+; GFX9-NEXT:    v_mov_b32_e32 v7, s7
+; GFX9-NEXT:    v_addc_co_u32_e64 v8, s[2:3], 0, v10, s[2:3]
+; GFX9-NEXT:    v_mul_lo_u32 v8, v8, s5
+; GFX9-NEXT:    v_add_co_u32_e64 v5, s[0:1], s6, v5
 ; GFX9-NEXT:    s_and_b64 vcc, exec, vcc
-; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v7
+; GFX9-NEXT:    v_addc_co_u32_e64 v6, s[0:1], v7, v6, s[0:1]
+; GFX9-NEXT:    v_sub_u32_e32 v2, v2, v8
 ; GFX9-NEXT:    global_store_short v[5:6], v2, off
 ; GFX9-NEXT:    s_cbranch_vccz BB5_1
 ; GFX9-NEXT:  ; %bb.2: ; %bb2
@@ -398,38 +398,38 @@ define amdgpu_kernel void @srem16_invariant_denom(i16 addrspace(1)* nocapture %a
 ; GFX9-LABEL: srem16_invariant_denom:
 ; GFX9:       ; %bb.0: ; %bb
 ; GFX9-NEXT:    s_load_dword s2, s[0:1], 0x2c
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x24
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x24
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 0
-; GFX9-NEXT:    s_movk_i32 s3, 0x400
+; GFX9-NEXT:    s_movk_i32 s5, 0x400
 ; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_sext_i32_i16 s2, s2
-; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s2
+; GFX9-NEXT:    s_sext_i32_i16 s4, s2
+; GFX9-NEXT:    v_cvt_f32_i32_e32 v0, s4
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v1, v0
 ; GFX9-NEXT:  BB7_1: ; %bb3
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    v_bfe_i32 v7, v4, 0, 16
-; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v4
 ; GFX9-NEXT:    v_cvt_f32_i32_e32 v10, v7
+; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v4
+; GFX9-NEXT:    v_xor_b32_e32 v9, s4, v7
 ; GFX9-NEXT:    v_lshlrev_b64 v[5:6], 1, v[2:3]
-; GFX9-NEXT:    v_mov_b32_e32 v8, s5
-; GFX9-NEXT:    v_add_co_u32_e64 v5, s[0:1], s4, v5
-; GFX9-NEXT:    v_addc_co_u32_e64 v6, s[0:1], v8, v6, s[0:1]
-; GFX9-NEXT:    v_mul_f32_e32 v8, v10, v1
-; GFX9-NEXT:    v_xor_b32_e32 v9, s2, v7
-; GFX9-NEXT:    v_trunc_f32_e32 v8, v8
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 30, v9
-; GFX9-NEXT:    v_cvt_i32_f32_e32 v9, v8
-; GFX9-NEXT:    v_mad_f32 v8, -v8, v0, v10
+; GFX9-NEXT:    v_mul_f32_e32 v9, v10, v1
+; GFX9-NEXT:    v_trunc_f32_e32 v9, v9
+; GFX9-NEXT:    v_cvt_i32_f32_e32 v11, v9
+; GFX9-NEXT:    v_mad_f32 v9, -v9, v0, v10
 ; GFX9-NEXT:    v_or_b32_e32 v2, 1, v2
-; GFX9-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v8|, |v0|
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[0:1]
-; GFX9-NEXT:    v_add_u32_e32 v2, v9, v2
-; GFX9-NEXT:    v_mul_lo_u32 v2, v2, s2
+; GFX9-NEXT:    v_cmp_ge_f32_e64 s[2:3], |v9|, |v0|
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, 0, v2, s[2:3]
+; GFX9-NEXT:    v_add_u32_e32 v2, v11, v2
+; GFX9-NEXT:    v_mul_lo_u32 v2, v2, s4
 ; GFX9-NEXT:    v_add_u16_e32 v4, 1, v4
-; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, s3, v4
+; GFX9-NEXT:    v_cmp_eq_u16_e32 vcc, s5, v4
+; GFX9-NEXT:    v_mov_b32_e32 v8, s7
+; GFX9-NEXT:    v_add_co_u32_e64 v5, s[0:1], s6, v5
 ; GFX9-NEXT:    s_and_b64 vcc, exec, vcc
 ; GFX9-NEXT:    v_sub_u32_e32 v2, v7, v2
+; GFX9-NEXT:    v_addc_co_u32_e64 v6, s[0:1], v8, v6, s[0:1]
 ; GFX9-NEXT:    global_store_short v[5:6], v2, off
 ; GFX9-NEXT:    s_cbranch_vccz BB7_1
 ; GFX9-NEXT:  ; %bb.2: ; %bb2
diff --git a/llvm/test/CodeGen/AMDGPU/idot2.ll b/llvm/test/CodeGen/AMDGPU/idot2.ll
index ff4c874ec7b79..0ec3dec2b8c7c 100644
--- a/llvm/test/CodeGen/AMDGPU/idot2.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot2.ll
@@ -2591,20 +2591,20 @@ define amdgpu_kernel void @udot2_acc16(<2 x i16> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    flat_load_ushort v2, v[0:1]
-; GFX8-NEXT:    s_load_dword s1, s[6:7], 0x0
+; GFX8-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX8-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX8-NEXT:    s_mov_b32 s0, 0xffff
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_and_b32 s3, s1, s0
-; GFX8-NEXT:    s_lshr_b32 s1, s1, 16
-; GFX8-NEXT:    s_and_b32 s0, s2, s0
+; GFX8-NEXT:    s_and_b32 s3, s2, s0
 ; GFX8-NEXT:    s_lshr_b32 s2, s2, 16
-; GFX8-NEXT:    v_mov_b32_e32 v3, s1
+; GFX8-NEXT:    s_and_b32 s0, s1, s0
+; GFX8-NEXT:    s_lshr_b32 s1, s1, 16
+; GFX8-NEXT:    v_mov_b32_e32 v3, s2
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_mad_u32_u24 v2, s2, v3, v2
+; GFX8-NEXT:    v_mad_u32_u24 v2, s1, v3, v2
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s3
 ; GFX8-NEXT:    v_mad_u32_u24 v2, s0, v3, v2
 ; GFX8-NEXT:    flat_store_short v[0:1], v2
@@ -2615,20 +2615,20 @@ define amdgpu_kernel void @udot2_acc16(<2 x i16> addrspace(1)* %src1,
 ; GFX9-NODL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NODL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_load_dword s2, s[4:5], 0x0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NODL-NEXT:    global_load_ushort v2, v[0:1], off
-; GFX9-NODL-NEXT:    s_load_dword s1, s[6:7], 0x0
+; GFX9-NODL-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX9-NODL-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX9-NODL-NEXT:    s_mov_b32 s0, 0xffff
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_and_b32 s3, s1, s0
-; GFX9-NODL-NEXT:    s_lshr_b32 s1, s1, 16
-; GFX9-NODL-NEXT:    s_and_b32 s0, s2, s0
+; GFX9-NODL-NEXT:    s_and_b32 s3, s2, s0
 ; GFX9-NODL-NEXT:    s_lshr_b32 s2, s2, 16
-; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NODL-NEXT:    s_and_b32 s0, s1, s0
+; GFX9-NODL-NEXT:    s_lshr_b32 s1, s1, 16
+; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s2
 ; GFX9-NODL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s2, v3, v2
+; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s1, v3, v2
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s3
 ; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s0, v3, v2
 ; GFX9-NODL-NEXT:    global_store_short v[0:1], v2, off
@@ -2728,19 +2728,19 @@ define amdgpu_kernel void @notsdot2_sext8(<2 x i8> addrspace(1)* %src1,
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX8-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s7
-; GFX8-NEXT:    flat_load_ushort v2, v[2:3]
 ; GFX8-NEXT:    flat_load_ushort v0, v[0:1]
+; GFX8-NEXT:    flat_load_ushort v1, v[2:3]
 ; GFX8-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX8-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(0)
-; GFX8-NEXT:    v_bfe_i32 v3, v2, 0, 8
-; GFX8-NEXT:    v_lshrrev_b16_e32 v2, 8, v2
-; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_bfe_i32 v1, v0, 0, 8
+; GFX8-NEXT:    v_bfe_i32 v2, v0, 0, 8
 ; GFX8-NEXT:    v_lshrrev_b16_e32 v0, 8, v0
+; GFX8-NEXT:    s_waitcnt vmcnt(0)
+; GFX8-NEXT:    v_bfe_i32 v3, v1, 0, 8
+; GFX8-NEXT:    v_lshrrev_b16_e32 v1, 8, v1
 ; GFX8-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX8-NEXT:    v_bfe_i32 v2, v2, 0, 8
-; GFX8-NEXT:    v_mad_i32_i24 v0, v2, v0, s2
-; GFX8-NEXT:    v_mad_i32_i24 v2, v3, v1, v0
+; GFX8-NEXT:    v_bfe_i32 v1, v1, 0, 8
+; GFX8-NEXT:    v_mad_i32_i24 v0, v1, v0, s2
+; GFX8-NEXT:    v_mad_i32_i24 v2, v3, v2, v0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    flat_store_dword v[0:1], v2
@@ -2755,20 +2755,20 @@ define amdgpu_kernel void @notsdot2_sext8(<2 x i8> addrspace(1)* %src1,
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s7
-; GFX9-NODL-NEXT:    global_load_ushort v2, v[2:3], off
 ; GFX9-NODL-NEXT:    global_load_ushort v0, v[0:1], off
+; GFX9-NODL-NEXT:    global_load_ushort v1, v[2:3], off
 ; GFX9-NODL-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-NODL-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NODL-NEXT:    v_bfe_i32 v3, v2, 0, 8
-; GFX9-NODL-NEXT:    v_lshrrev_b16_e32 v2, 8, v2
-; GFX9-NODL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NODL-NEXT:    v_bfe_i32 v1, v0, 0, 8
+; GFX9-NODL-NEXT:    v_bfe_i32 v2, v0, 0, 8
 ; GFX9-NODL-NEXT:    v_lshrrev_b16_e32 v0, 8, v0
+; GFX9-NODL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NODL-NEXT:    v_bfe_i32 v3, v1, 0, 8
+; GFX9-NODL-NEXT:    v_lshrrev_b16_e32 v1, 8, v1
 ; GFX9-NODL-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX9-NODL-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; GFX9-NODL-NEXT:    v_bfe_i32 v1, v1, 0, 8
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    v_mad_i32_i24 v0, v2, v0, s2
-; GFX9-NODL-NEXT:    v_mad_i32_i24 v2, v3, v1, v0
+; GFX9-NODL-NEXT:    v_mad_i32_i24 v0, v1, v0, s2
+; GFX9-NODL-NEXT:    v_mad_i32_i24 v2, v3, v2, v0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NODL-NEXT:    global_store_dword v[0:1], v2, off
@@ -2783,20 +2783,20 @@ define amdgpu_kernel void @notsdot2_sext8(<2 x i8> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v2, s6
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v3, s7
-; GFX9-DL-NEXT:    global_load_ushort v2, v[2:3], off
 ; GFX9-DL-NEXT:    global_load_ushort v0, v[0:1], off
+; GFX9-DL-NEXT:    global_load_ushort v1, v[2:3], off
 ; GFX9-DL-NEXT:    s_load_dword s2, s[0:1], 0x0
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-DL-NEXT:    v_bfe_i32 v3, v2, 0, 8
-; GFX9-DL-NEXT:    v_lshrrev_b16_e32 v2, 8, v2
-; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-DL-NEXT:    v_bfe_i32 v1, v0, 0, 8
+; GFX9-DL-NEXT:    v_bfe_i32 v2, v0, 0, 8
 ; GFX9-DL-NEXT:    v_lshrrev_b16_e32 v0, 8, v0
+; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-DL-NEXT:    v_bfe_i32 v3, v1, 0, 8
+; GFX9-DL-NEXT:    v_lshrrev_b16_e32 v1, 8, v1
 ; GFX9-DL-NEXT:    v_bfe_i32 v0, v0, 0, 8
-; GFX9-DL-NEXT:    v_bfe_i32 v2, v2, 0, 8
+; GFX9-DL-NEXT:    v_bfe_i32 v1, v1, 0, 8
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-DL-NEXT:    v_mad_i32_i24 v0, v2, v0, s2
-; GFX9-DL-NEXT:    v_mad_i32_i24 v2, v3, v1, v0
+; GFX9-DL-NEXT:    v_mad_i32_i24 v0, v1, v0, s2
+; GFX9-DL-NEXT:    v_mad_i32_i24 v2, v3, v2, v0
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-DL-NEXT:    global_store_dword v[0:1], v2, off
diff --git a/llvm/test/CodeGen/AMDGPU/idot4s.ll b/llvm/test/CodeGen/AMDGPU/idot4s.ll
index 0cce3a655ec73..629538ac1bc9d 100644
--- a/llvm/test/CodeGen/AMDGPU/idot4s.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot4s.ll
@@ -221,28 +221,28 @@ define amdgpu_kernel void @idot4_acc16(<4 x i8> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    flat_load_ushort v2, v[0:1]
 ; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX8-NEXT:    s_load_dword s1, s[6:7], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_sext_i32_i8 s3, s2
-; GFX8-NEXT:    s_bfe_i32 s5, s2, 0x80008
+; GFX8-NEXT:    s_sext_i32_i8 s2, s0
+; GFX8-NEXT:    s_sext_i32_i8 s3, s1
+; GFX8-NEXT:    s_bfe_i32 s5, s1, 0x80008
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8-NEXT:    s_bfe_i32 s7, s2, 0x80010
-; GFX8-NEXT:    s_sext_i32_i8 s1, s0
+; GFX8-NEXT:    s_bfe_i32 s7, s1, 0x80010
 ; GFX8-NEXT:    s_bfe_i32 s4, s0, 0x80008
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s5
 ; GFX8-NEXT:    s_bfe_i32 s6, s0, 0x80010
-; GFX8-NEXT:    s_ashr_i32 s2, s2, 24
+; GFX8-NEXT:    s_ashr_i32 s1, s1, 24
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s7
 ; GFX8-NEXT:    s_ashr_i32 s0, s0, 24
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
-; GFX8-NEXT:    v_mad_i32_i24 v2, s1, v3, v2
+; GFX8-NEXT:    v_mad_i32_i24 v2, s2, v3, v2
 ; GFX8-NEXT:    v_mad_i32_i24 v2, s4, v4, v2
 ; GFX8-NEXT:    v_mad_i32_i24 v2, s6, v5, v2
-; GFX8-NEXT:    v_mov_b32_e32 v3, s2
+; GFX8-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX8-NEXT:    v_mad_i32_i24 v2, s0, v3, v2
 ; GFX8-NEXT:    flat_store_short v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
@@ -252,28 +252,28 @@ define amdgpu_kernel void @idot4_acc16(<4 x i8> addrspace(1)* %src1,
 ; GFX9-NODL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NODL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NODL-NEXT:    global_load_ushort v2, v[0:1], off
 ; GFX9-NODL-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX9-NODL-NEXT:    s_load_dword s1, s[6:7], 0x0
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_sext_i32_i8 s3, s2
-; GFX9-NODL-NEXT:    s_bfe_i32 s5, s2, 0x80008
+; GFX9-NODL-NEXT:    s_sext_i32_i8 s2, s0
+; GFX9-NODL-NEXT:    s_sext_i32_i8 s3, s1
+; GFX9-NODL-NEXT:    s_bfe_i32 s5, s1, 0x80008
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NODL-NEXT:    s_bfe_i32 s7, s2, 0x80010
-; GFX9-NODL-NEXT:    s_sext_i32_i8 s1, s0
+; GFX9-NODL-NEXT:    s_bfe_i32 s7, s1, 0x80010
 ; GFX9-NODL-NEXT:    s_bfe_i32 s4, s0, 0x80008
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v4, s5
 ; GFX9-NODL-NEXT:    s_bfe_i32 s6, s0, 0x80010
-; GFX9-NODL-NEXT:    s_ashr_i32 s2, s2, 24
+; GFX9-NODL-NEXT:    s_ashr_i32 s1, s1, 24
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v5, s7
 ; GFX9-NODL-NEXT:    s_ashr_i32 s0, s0, 24
 ; GFX9-NODL-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NODL-NEXT:    v_mad_i32_i24 v2, s1, v3, v2
+; GFX9-NODL-NEXT:    v_mad_i32_i24 v2, s2, v3, v2
 ; GFX9-NODL-NEXT:    v_mad_i32_i24 v2, s4, v4, v2
 ; GFX9-NODL-NEXT:    v_mad_i32_i24 v2, s6, v5, v2
-; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NODL-NEXT:    v_mad_i32_i24 v2, s0, v3, v2
 ; GFX9-NODL-NEXT:    global_store_short v[0:1], v2, off
 ; GFX9-NODL-NEXT:    s_endpgm
@@ -357,28 +357,28 @@ define amdgpu_kernel void @idot4_acc8(<4 x i8> addrspace(1)* %src1,
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_movk_i32 s8, 0xff
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_load_dword s6, s[6:7], 0x0
 ; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_load_dword s4, s[4:5], 0x0
-; GFX7-NEXT:    s_movk_i32 s5, 0xff
+; GFX7-NEXT:    s_load_dword s5, s[6:7], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_and_b32 s7, s6, s5
-; GFX7-NEXT:    s_bfe_u32 s8, s6, 0x80008
-; GFX7-NEXT:    s_and_b32 s5, s4, s5
-; GFX7-NEXT:    v_mov_b32_e32 v1, s7
-; GFX7-NEXT:    s_bfe_u32 s10, s6, 0x80010
+; GFX7-NEXT:    s_and_b32 s7, s4, s8
+; GFX7-NEXT:    s_and_b32 s6, s5, s8
+; GFX7-NEXT:    s_bfe_u32 s8, s5, 0x80008
+; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    s_bfe_u32 s10, s5, 0x80010
 ; GFX7-NEXT:    s_bfe_u32 s9, s4, 0x80008
 ; GFX7-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX7-NEXT:    s_bfe_u32 s11, s4, 0x80010
-; GFX7-NEXT:    s_lshr_b32 s6, s6, 24
+; GFX7-NEXT:    s_lshr_b32 s5, s5, 24
 ; GFX7-NEXT:    v_mov_b32_e32 v3, s10
 ; GFX7-NEXT:    s_lshr_b32 s4, s4, 24
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_mad_u32_u24 v0, s5, v1, v0
+; GFX7-NEXT:    v_mad_u32_u24 v0, s7, v1, v0
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s9, v2, v0
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s11, v3, v0
-; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s4, v1, v0
 ; GFX7-NEXT:    buffer_store_byte v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
@@ -388,30 +388,30 @@ define amdgpu_kernel void @idot4_acc8(<4 x i8> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
-; GFX8-NEXT:    s_load_dword s1, s[6:7], 0x0
+; GFX8-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX8-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX8-NEXT:    s_movk_i32 s0, 0xff
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_bfe_u32 s5, s2, 0x80008
-; GFX8-NEXT:    s_bfe_u32 s7, s2, 0x80010
-; GFX8-NEXT:    s_and_b32 s3, s1, s0
-; GFX8-NEXT:    s_and_b32 s0, s2, s0
-; GFX8-NEXT:    s_bfe_u32 s4, s1, 0x80008
+; GFX8-NEXT:    s_bfe_u32 s5, s1, 0x80008
+; GFX8-NEXT:    s_and_b32 s3, s2, s0
+; GFX8-NEXT:    s_bfe_u32 s4, s2, 0x80008
+; GFX8-NEXT:    s_and_b32 s0, s1, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8-NEXT:    s_bfe_u32 s6, s1, 0x80010
+; GFX8-NEXT:    s_bfe_u32 s6, s2, 0x80010
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s4
-; GFX8-NEXT:    s_lshr_b32 s1, s1, 24
-; GFX8-NEXT:    v_mov_b32_e32 v5, s6
+; GFX8-NEXT:    s_bfe_u32 s7, s1, 0x80010
 ; GFX8-NEXT:    s_lshr_b32 s2, s2, 24
+; GFX8-NEXT:    v_mov_b32_e32 v5, s6
+; GFX8-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_mad_u32_u24 v2, s0, v3, v2
 ; GFX8-NEXT:    v_mad_u32_u24 v2, s5, v4, v2
 ; GFX8-NEXT:    v_mad_u32_u24 v2, s7, v5, v2
-; GFX8-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8-NEXT:    v_mad_u32_u24 v2, s2, v3, v2
+; GFX8-NEXT:    v_mov_b32_e32 v3, s2
+; GFX8-NEXT:    v_mad_u32_u24 v2, s1, v3, v2
 ; GFX8-NEXT:    flat_store_byte v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -420,30 +420,30 @@ define amdgpu_kernel void @idot4_acc8(<4 x i8> addrspace(1)* %src1,
 ; GFX9-NODL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NODL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_load_dword s2, s[4:5], 0x0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NODL-NEXT:    global_load_ubyte v2, v[0:1], off
-; GFX9-NODL-NEXT:    s_load_dword s1, s[6:7], 0x0
+; GFX9-NODL-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX9-NODL-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX9-NODL-NEXT:    s_movk_i32 s0, 0xff
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_bfe_u32 s5, s2, 0x80008
-; GFX9-NODL-NEXT:    s_bfe_u32 s7, s2, 0x80010
-; GFX9-NODL-NEXT:    s_and_b32 s3, s1, s0
-; GFX9-NODL-NEXT:    s_and_b32 s0, s2, s0
-; GFX9-NODL-NEXT:    s_bfe_u32 s4, s1, 0x80008
+; GFX9-NODL-NEXT:    s_bfe_u32 s5, s1, 0x80008
+; GFX9-NODL-NEXT:    s_and_b32 s3, s2, s0
+; GFX9-NODL-NEXT:    s_bfe_u32 s4, s2, 0x80008
+; GFX9-NODL-NEXT:    s_and_b32 s0, s1, s0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NODL-NEXT:    s_bfe_u32 s6, s1, 0x80010
+; GFX9-NODL-NEXT:    s_bfe_u32 s6, s2, 0x80010
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-NODL-NEXT:    s_lshr_b32 s1, s1, 24
-; GFX9-NODL-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-NODL-NEXT:    s_bfe_u32 s7, s1, 0x80010
 ; GFX9-NODL-NEXT:    s_lshr_b32 s2, s2, 24
+; GFX9-NODL-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-NODL-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX9-NODL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s0, v3, v2
 ; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s5, v4, v2
 ; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s7, v5, v2
-; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s2, v3, v2
+; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s1, v3, v2
 ; GFX9-NODL-NEXT:    global_store_byte v[0:1], v2, off
 ; GFX9-NODL-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/idot4u.ll b/llvm/test/CodeGen/AMDGPU/idot4u.ll
index 67936200396ab..aa4dc4e143d91 100644
--- a/llvm/test/CodeGen/AMDGPU/idot4u.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot4u.ll
@@ -184,28 +184,28 @@ define amdgpu_kernel void @udot4_acc16(<4 x i8> addrspace(1)* %src1,
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_movk_i32 s8, 0xff
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_load_dword s6, s[6:7], 0x0
 ; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_load_dword s4, s[4:5], 0x0
-; GFX7-NEXT:    s_movk_i32 s5, 0xff
+; GFX7-NEXT:    s_load_dword s5, s[6:7], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_and_b32 s7, s6, s5
-; GFX7-NEXT:    s_bfe_u32 s8, s6, 0x80008
-; GFX7-NEXT:    s_and_b32 s5, s4, s5
-; GFX7-NEXT:    v_mov_b32_e32 v1, s7
-; GFX7-NEXT:    s_bfe_u32 s10, s6, 0x80010
+; GFX7-NEXT:    s_and_b32 s7, s4, s8
+; GFX7-NEXT:    s_and_b32 s6, s5, s8
+; GFX7-NEXT:    s_bfe_u32 s8, s5, 0x80008
+; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    s_bfe_u32 s10, s5, 0x80010
 ; GFX7-NEXT:    s_bfe_u32 s9, s4, 0x80008
 ; GFX7-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX7-NEXT:    s_bfe_u32 s11, s4, 0x80010
-; GFX7-NEXT:    s_lshr_b32 s6, s6, 24
+; GFX7-NEXT:    s_lshr_b32 s5, s5, 24
 ; GFX7-NEXT:    v_mov_b32_e32 v3, s10
 ; GFX7-NEXT:    s_lshr_b32 s4, s4, 24
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_mad_u32_u24 v0, s5, v1, v0
+; GFX7-NEXT:    v_mad_u32_u24 v0, s7, v1, v0
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s9, v2, v0
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s11, v3, v0
-; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s4, v1, v0
 ; GFX7-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
@@ -215,20 +215,20 @@ define amdgpu_kernel void @udot4_acc16(<4 x i8> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    flat_load_ushort v2, v[0:1]
 ; GFX8-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX8-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX8-NEXT:    s_movk_i32 s0, 0xff
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_bfe_u32 s5, s2, 0x80008
-; GFX8-NEXT:    s_bfe_u32 s7, s2, 0x80010
-; GFX8-NEXT:    v_mov_b32_e32 v4, s5
 ; GFX8-NEXT:    s_and_b32 s3, s1, s0
 ; GFX8-NEXT:    s_and_b32 s0, s2, s0
+; GFX8-NEXT:    s_bfe_u32 s5, s2, 0x80008
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s0
+; GFX8-NEXT:    s_bfe_u32 s7, s2, 0x80010
 ; GFX8-NEXT:    s_bfe_u32 s4, s1, 0x80008
+; GFX8-NEXT:    v_mov_b32_e32 v4, s5
 ; GFX8-NEXT:    s_bfe_u32 s6, s1, 0x80010
 ; GFX8-NEXT:    s_lshr_b32 s2, s2, 24
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s7
@@ -247,20 +247,20 @@ define amdgpu_kernel void @udot4_acc16(<4 x i8> addrspace(1)* %src1,
 ; GFX9-NODL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NODL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NODL-NEXT:    global_load_ushort v2, v[0:1], off
 ; GFX9-NODL-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX9-NODL-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX9-NODL-NEXT:    s_movk_i32 s0, 0xff
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_bfe_u32 s5, s2, 0x80008
-; GFX9-NODL-NEXT:    s_bfe_u32 s7, s2, 0x80010
-; GFX9-NODL-NEXT:    v_mov_b32_e32 v4, s5
 ; GFX9-NODL-NEXT:    s_and_b32 s3, s1, s0
 ; GFX9-NODL-NEXT:    s_and_b32 s0, s2, s0
+; GFX9-NODL-NEXT:    s_bfe_u32 s5, s2, 0x80008
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s0
+; GFX9-NODL-NEXT:    s_bfe_u32 s7, s2, 0x80010
 ; GFX9-NODL-NEXT:    s_bfe_u32 s4, s1, 0x80008
+; GFX9-NODL-NEXT:    v_mov_b32_e32 v4, s5
 ; GFX9-NODL-NEXT:    s_bfe_u32 s6, s1, 0x80010
 ; GFX9-NODL-NEXT:    s_lshr_b32 s2, s2, 24
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v5, s7
@@ -354,28 +354,28 @@ define amdgpu_kernel void @udot4_acc8(<4 x i8> addrspace(1)* %src1,
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_movk_i32 s8, 0xff
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_load_dword s6, s[6:7], 0x0
 ; GFX7-NEXT:    buffer_load_ubyte v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_load_dword s4, s[4:5], 0x0
-; GFX7-NEXT:    s_movk_i32 s5, 0xff
+; GFX7-NEXT:    s_load_dword s5, s[6:7], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_and_b32 s7, s6, s5
-; GFX7-NEXT:    s_bfe_u32 s8, s6, 0x80008
-; GFX7-NEXT:    s_and_b32 s5, s4, s5
-; GFX7-NEXT:    v_mov_b32_e32 v1, s7
-; GFX7-NEXT:    s_bfe_u32 s10, s6, 0x80010
+; GFX7-NEXT:    s_and_b32 s7, s4, s8
+; GFX7-NEXT:    s_and_b32 s6, s5, s8
+; GFX7-NEXT:    s_bfe_u32 s8, s5, 0x80008
+; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    s_bfe_u32 s10, s5, 0x80010
 ; GFX7-NEXT:    s_bfe_u32 s9, s4, 0x80008
 ; GFX7-NEXT:    v_mov_b32_e32 v2, s8
 ; GFX7-NEXT:    s_bfe_u32 s11, s4, 0x80010
-; GFX7-NEXT:    s_lshr_b32 s6, s6, 24
+; GFX7-NEXT:    s_lshr_b32 s5, s5, 24
 ; GFX7-NEXT:    v_mov_b32_e32 v3, s10
 ; GFX7-NEXT:    s_lshr_b32 s4, s4, 24
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
-; GFX7-NEXT:    v_mad_u32_u24 v0, s5, v1, v0
+; GFX7-NEXT:    v_mad_u32_u24 v0, s7, v1, v0
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s9, v2, v0
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s11, v3, v0
-; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s4, v1, v0
 ; GFX7-NEXT:    buffer_store_byte v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
@@ -385,30 +385,30 @@ define amdgpu_kernel void @udot4_acc8(<4 x i8> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_load_dword s2, s[4:5], 0x0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
-; GFX8-NEXT:    s_load_dword s1, s[6:7], 0x0
+; GFX8-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX8-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX8-NEXT:    s_movk_i32 s0, 0xff
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_bfe_u32 s5, s2, 0x80008
-; GFX8-NEXT:    s_bfe_u32 s7, s2, 0x80010
-; GFX8-NEXT:    s_and_b32 s3, s1, s0
-; GFX8-NEXT:    s_and_b32 s0, s2, s0
-; GFX8-NEXT:    s_bfe_u32 s4, s1, 0x80008
+; GFX8-NEXT:    s_bfe_u32 s5, s1, 0x80008
+; GFX8-NEXT:    s_and_b32 s3, s2, s0
+; GFX8-NEXT:    s_bfe_u32 s4, s2, 0x80008
+; GFX8-NEXT:    s_and_b32 s0, s1, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8-NEXT:    s_bfe_u32 s6, s1, 0x80010
+; GFX8-NEXT:    s_bfe_u32 s6, s2, 0x80010
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s4
-; GFX8-NEXT:    s_lshr_b32 s1, s1, 24
-; GFX8-NEXT:    v_mov_b32_e32 v5, s6
+; GFX8-NEXT:    s_bfe_u32 s7, s1, 0x80010
 ; GFX8-NEXT:    s_lshr_b32 s2, s2, 24
+; GFX8-NEXT:    v_mov_b32_e32 v5, s6
+; GFX8-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_mad_u32_u24 v2, s0, v3, v2
 ; GFX8-NEXT:    v_mad_u32_u24 v2, s5, v4, v2
 ; GFX8-NEXT:    v_mad_u32_u24 v2, s7, v5, v2
-; GFX8-NEXT:    v_mov_b32_e32 v3, s1
-; GFX8-NEXT:    v_mad_u32_u24 v2, s2, v3, v2
+; GFX8-NEXT:    v_mov_b32_e32 v3, s2
+; GFX8-NEXT:    v_mad_u32_u24 v2, s1, v3, v2
 ; GFX8-NEXT:    flat_store_byte v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -417,30 +417,30 @@ define amdgpu_kernel void @udot4_acc8(<4 x i8> addrspace(1)* %src1,
 ; GFX9-NODL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NODL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_load_dword s2, s[4:5], 0x0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NODL-NEXT:    global_load_ubyte v2, v[0:1], off
-; GFX9-NODL-NEXT:    s_load_dword s1, s[6:7], 0x0
+; GFX9-NODL-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX9-NODL-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX9-NODL-NEXT:    s_movk_i32 s0, 0xff
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_bfe_u32 s5, s2, 0x80008
-; GFX9-NODL-NEXT:    s_bfe_u32 s7, s2, 0x80010
-; GFX9-NODL-NEXT:    s_and_b32 s3, s1, s0
-; GFX9-NODL-NEXT:    s_and_b32 s0, s2, s0
-; GFX9-NODL-NEXT:    s_bfe_u32 s4, s1, 0x80008
+; GFX9-NODL-NEXT:    s_bfe_u32 s5, s1, 0x80008
+; GFX9-NODL-NEXT:    s_and_b32 s3, s2, s0
+; GFX9-NODL-NEXT:    s_bfe_u32 s4, s2, 0x80008
+; GFX9-NODL-NEXT:    s_and_b32 s0, s1, s0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NODL-NEXT:    s_bfe_u32 s6, s1, 0x80010
+; GFX9-NODL-NEXT:    s_bfe_u32 s6, s2, 0x80010
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-NODL-NEXT:    s_lshr_b32 s1, s1, 24
-; GFX9-NODL-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-NODL-NEXT:    s_bfe_u32 s7, s1, 0x80010
 ; GFX9-NODL-NEXT:    s_lshr_b32 s2, s2, 24
+; GFX9-NODL-NEXT:    v_mov_b32_e32 v5, s6
+; GFX9-NODL-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX9-NODL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s0, v3, v2
 ; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s5, v4, v2
 ; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s7, v5, v2
-; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s2, v3, v2
+; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s1, v3, v2
 ; GFX9-NODL-NEXT:    global_store_byte v[0:1], v2, off
 ; GFX9-NODL-NEXT:    s_endpgm
 ;
@@ -1426,28 +1426,28 @@ define amdgpu_kernel void @notdot4_mixedtypes(<4 x i8> addrspace(1)* %src1,
 ; GFX7-NEXT:    s_mov_b32 s2, -1
 ; GFX7-NEXT:    s_mov_b32 s8, 0xffff
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_load_dword s6, s[6:7], 0x0
 ; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_load_dword s4, s[4:5], 0x0
+; GFX7-NEXT:    s_load_dword s5, s[6:7], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_sext_i32_i8 s7, s6
-; GFX7-NEXT:    s_bfe_u32 s9, s6, 0x80008
-; GFX7-NEXT:    s_sext_i32_i8 s5, s4
+; GFX7-NEXT:    s_sext_i32_i8 s6, s4
+; GFX7-NEXT:    s_sext_i32_i8 s7, s5
+; GFX7-NEXT:    s_bfe_u32 s9, s5, 0x80008
 ; GFX7-NEXT:    s_and_b32 s7, s7, s8
 ; GFX7-NEXT:    s_bfe_u32 s10, s4, 0x80008
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s9
-; GFX7-NEXT:    s_bfe_u32 s11, s6, 0x80010
-; GFX7-NEXT:    s_and_b32 s5, s5, s8
+; GFX7-NEXT:    s_bfe_u32 s11, s5, 0x80010
+; GFX7-NEXT:    s_and_b32 s6, s6, s8
 ; GFX7-NEXT:    v_mov_b32_e32 v3, s7
 ; GFX7-NEXT:    s_bfe_u32 s12, s4, 0x80010
-; GFX7-NEXT:    s_lshr_b32 s6, s6, 24
+; GFX7-NEXT:    s_lshr_b32 s5, s5, 24
 ; GFX7-NEXT:    v_mov_b32_e32 v2, s11
 ; GFX7-NEXT:    s_lshr_b32 s4, s4, 24
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s10, v1, v0
-; GFX7-NEXT:    v_mad_u32_u24 v0, s5, v3, v0
+; GFX7-NEXT:    v_mad_u32_u24 v0, s6, v3, v0
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s12, v2, v0
-; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s4, v1, v0
 ; GFX7-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
@@ -1457,28 +1457,28 @@ define amdgpu_kernel void @notdot4_mixedtypes(<4 x i8> addrspace(1)* %src1,
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    flat_load_ushort v2, v[0:1]
 ; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX8-NEXT:    s_load_dword s1, s[6:7], 0x0
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_bfe_u32 s5, s2, 0x80008
-; GFX8-NEXT:    s_sext_i32_i8 s3, s2
-; GFX8-NEXT:    v_mov_b32_e32 v3, s5
-; GFX8-NEXT:    s_bfe_u32 s7, s2, 0x80010
 ; GFX8-NEXT:    s_bfe_u32 s4, s0, 0x80008
-; GFX8-NEXT:    s_sext_i32_i8 s1, s0
+; GFX8-NEXT:    s_bfe_u32 s5, s1, 0x80008
+; GFX8-NEXT:    s_sext_i32_i8 s3, s1
+; GFX8-NEXT:    v_mov_b32_e32 v3, s5
+; GFX8-NEXT:    s_bfe_u32 s7, s1, 0x80010
+; GFX8-NEXT:    s_sext_i32_i8 s2, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v4, s3
 ; GFX8-NEXT:    s_bfe_u32 s6, s0, 0x80010
-; GFX8-NEXT:    s_lshr_b32 s2, s2, 24
+; GFX8-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX8-NEXT:    v_mov_b32_e32 v5, s7
 ; GFX8-NEXT:    s_lshr_b32 s0, s0, 24
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_mad_u32_u24 v2, s4, v3, v2
-; GFX8-NEXT:    v_mad_i32_i24 v2, s1, v4, v2
+; GFX8-NEXT:    v_mad_i32_i24 v2, s2, v4, v2
 ; GFX8-NEXT:    v_mad_u32_u24 v2, s6, v5, v2
-; GFX8-NEXT:    v_mov_b32_e32 v3, s2
+; GFX8-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX8-NEXT:    v_mad_u32_u24 v2, s0, v3, v2
 ; GFX8-NEXT:    flat_store_short v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
@@ -1488,28 +1488,28 @@ define amdgpu_kernel void @notdot4_mixedtypes(<4 x i8> addrspace(1)* %src1,
 ; GFX9-NODL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NODL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NODL-NEXT:    global_load_ushort v2, v[0:1], off
 ; GFX9-NODL-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX9-NODL-NEXT:    s_load_dword s1, s[6:7], 0x0
 ; GFX9-NODL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NODL-NEXT:    s_bfe_u32 s5, s2, 0x80008
-; GFX9-NODL-NEXT:    s_sext_i32_i8 s3, s2
-; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s5
-; GFX9-NODL-NEXT:    s_bfe_u32 s7, s2, 0x80010
 ; GFX9-NODL-NEXT:    s_bfe_u32 s4, s0, 0x80008
-; GFX9-NODL-NEXT:    s_sext_i32_i8 s1, s0
+; GFX9-NODL-NEXT:    s_bfe_u32 s5, s1, 0x80008
+; GFX9-NODL-NEXT:    s_sext_i32_i8 s3, s1
+; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s5
+; GFX9-NODL-NEXT:    s_bfe_u32 s7, s1, 0x80010
+; GFX9-NODL-NEXT:    s_sext_i32_i8 s2, s0
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v4, s3
 ; GFX9-NODL-NEXT:    s_bfe_u32 s6, s0, 0x80010
-; GFX9-NODL-NEXT:    s_lshr_b32 s2, s2, 24
+; GFX9-NODL-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX9-NODL-NEXT:    v_mov_b32_e32 v5, s7
 ; GFX9-NODL-NEXT:    s_lshr_b32 s0, s0, 24
 ; GFX9-NODL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s4, v3, v2
-; GFX9-NODL-NEXT:    v_mad_i32_i24 v2, s1, v4, v2
+; GFX9-NODL-NEXT:    v_mad_i32_i24 v2, s2, v4, v2
 ; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s6, v5, v2
-; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NODL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-NODL-NEXT:    v_mad_u32_u24 v2, s0, v3, v2
 ; GFX9-NODL-NEXT:    global_store_short v[0:1], v2, off
 ; GFX9-NODL-NEXT:    s_endpgm
@@ -1519,28 +1519,28 @@ define amdgpu_kernel void @notdot4_mixedtypes(<4 x i8> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-DL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-DL-NEXT:    s_load_dword s2, s[6:7], 0x0
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-DL-NEXT:    global_load_ushort v2, v[0:1], off
 ; GFX9-DL-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX9-DL-NEXT:    s_load_dword s1, s[6:7], 0x0
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-DL-NEXT:    s_bfe_u32 s5, s2, 0x80008
-; GFX9-DL-NEXT:    s_sext_i32_i8 s3, s2
-; GFX9-DL-NEXT:    v_mov_b32_e32 v3, s5
-; GFX9-DL-NEXT:    s_bfe_u32 s7, s2, 0x80010
 ; GFX9-DL-NEXT:    s_bfe_u32 s4, s0, 0x80008
-; GFX9-DL-NEXT:    s_sext_i32_i8 s1, s0
+; GFX9-DL-NEXT:    s_bfe_u32 s5, s1, 0x80008
+; GFX9-DL-NEXT:    s_sext_i32_i8 s3, s1
+; GFX9-DL-NEXT:    v_mov_b32_e32 v3, s5
+; GFX9-DL-NEXT:    s_bfe_u32 s7, s1, 0x80010
+; GFX9-DL-NEXT:    s_sext_i32_i8 s2, s0
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v4, s3
 ; GFX9-DL-NEXT:    s_bfe_u32 s6, s0, 0x80010
-; GFX9-DL-NEXT:    s_lshr_b32 s2, s2, 24
+; GFX9-DL-NEXT:    s_lshr_b32 s1, s1, 24
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v5, s7
 ; GFX9-DL-NEXT:    s_lshr_b32 s0, s0, 24
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-DL-NEXT:    v_mad_u32_u24 v2, s4, v3, v2
-; GFX9-DL-NEXT:    v_mad_i32_i24 v2, s1, v4, v2
+; GFX9-DL-NEXT:    v_mad_i32_i24 v2, s2, v4, v2
 ; GFX9-DL-NEXT:    v_mad_u32_u24 v2, s6, v5, v2
-; GFX9-DL-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-DL-NEXT:    v_mov_b32_e32 v3, s1
 ; GFX9-DL-NEXT:    v_mad_u32_u24 v2, s0, v3, v2
 ; GFX9-DL-NEXT:    global_store_short v[0:1], v2, off
 ; GFX9-DL-NEXT:    s_endpgm
@@ -1809,29 +1809,29 @@ define amdgpu_kernel void @udot4_acc16_vecMul(<4 x i8> addrspace(1)* %src1,
 ; GFX7-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0xd
 ; GFX7-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX7-NEXT:    s_mov_b32 s2, -1
+; GFX7-NEXT:    s_movk_i32 s8, 0xff
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_load_dword s6, s[6:7], 0x0
 ; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_load_dword s4, s[4:5], 0x0
-; GFX7-NEXT:    s_movk_i32 s7, 0xff
+; GFX7-NEXT:    s_load_dword s5, s[6:7], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_bfe_u32 s10, s6, 0x80008
-; GFX7-NEXT:    s_bfe_u32 s12, s6, 0x80010
-; GFX7-NEXT:    s_lshr_b32 s9, s6, 24
-; GFX7-NEXT:    s_and_b32 s6, s6, s7
-; GFX7-NEXT:    s_lshr_b32 s5, s4, 24
-; GFX7-NEXT:    s_bfe_u32 s8, s4, 0x80008
+; GFX7-NEXT:    s_lshr_b32 s6, s4, 24
+; GFX7-NEXT:    s_bfe_u32 s10, s5, 0x80008
+; GFX7-NEXT:    s_bfe_u32 s12, s5, 0x80010
+; GFX7-NEXT:    s_lshr_b32 s9, s5, 24
+; GFX7-NEXT:    s_and_b32 s5, s5, s8
+; GFX7-NEXT:    s_bfe_u32 s7, s4, 0x80008
 ; GFX7-NEXT:    s_bfe_u32 s11, s4, 0x80010
-; GFX7-NEXT:    s_and_b32 s4, s4, s7
-; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    s_and_b32 s4, s4, s8
+; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    v_mov_b32_e32 v2, s10
 ; GFX7-NEXT:    v_mov_b32_e32 v3, s12
 ; GFX7-NEXT:    s_waitcnt vmcnt(0)
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s4, v1, v0
-; GFX7-NEXT:    v_mad_u32_u24 v0, s8, v2, v0
+; GFX7-NEXT:    v_mad_u32_u24 v0, s7, v2, v0
 ; GFX7-NEXT:    v_mad_u32_u24 v0, s11, v3, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s9
-; GFX7-NEXT:    v_mad_u32_u24 v0, s5, v1, v0
+; GFX7-NEXT:    v_mad_u32_u24 v0, s6, v1, v0
 ; GFX7-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
 ;
diff --git a/llvm/test/CodeGen/AMDGPU/idot8s.ll b/llvm/test/CodeGen/AMDGPU/idot8s.ll
index 20df4fff7f4e6..9a88c82b5a85e 100644
--- a/llvm/test/CodeGen/AMDGPU/idot8s.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot8s.ll
@@ -657,43 +657,43 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1,
 ; GFX8:       ; %bb.0: ; %entry
 ; GFX8-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX8-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GFX8-NEXT:    s_movk_i32 s2, 0xff
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_load_dword s3, s[6:7], 0x0
 ; GFX8-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX8-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX8-NEXT:    flat_load_ubyte v2, v[0:1]
-; GFX8-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX8-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX8-NEXT:    s_load_dword s2, s[6:7], 0x0
+; GFX8-NEXT:    s_movk_i32 s0, 0xff
 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX8-NEXT:    s_bfe_i32 s6, s3, 0x40000
-; GFX8-NEXT:    s_lshr_b32 s4, s3, 12
-; GFX8-NEXT:    s_bfe_i32 s8, s3, 0x40004
-; GFX8-NEXT:    s_bfe_i32 s10, s3, 0x40008
-; GFX8-NEXT:    s_lshr_b32 s1, s0, 12
-; GFX8-NEXT:    s_bfe_i32 s5, s0, 0x40000
+; GFX8-NEXT:    s_lshr_b32 s3, s1, 12
+; GFX8-NEXT:    s_bfe_i32 s6, s2, 0x40000
+; GFX8-NEXT:    s_lshr_b32 s4, s2, 12
+; GFX8-NEXT:    s_bfe_i32 s8, s2, 0x40004
+; GFX8-NEXT:    s_bfe_i32 s10, s2, 0x40008
+; GFX8-NEXT:    s_bfe_i32 s5, s1, 0x40000
 ; GFX8-NEXT:    v_mov_b32_e32 v6, s6
-; GFX8-NEXT:    v_lshlrev_b16_e64 v4, 12, s1
+; GFX8-NEXT:    v_lshlrev_b16_e64 v4, 12, s3
 ; GFX8-NEXT:    v_lshlrev_b16_e64 v5, 12, s4
-; GFX8-NEXT:    s_bfe_i32 s7, s0, 0x40004
-; GFX8-NEXT:    s_bfe_i32 s9, s0, 0x40008
+; GFX8-NEXT:    s_bfe_i32 s7, s1, 0x40004
+; GFX8-NEXT:    s_bfe_i32 s9, s1, 0x40008
 ; GFX8-NEXT:    v_mov_b32_e32 v3, s10
 ; GFX8-NEXT:    v_mov_b32_e32 v7, s8
 ; GFX8-NEXT:    v_ashrrev_i16_e32 v4, 12, v4
 ; GFX8-NEXT:    v_ashrrev_i16_e32 v5, 12, v5
 ; GFX8-NEXT:    v_mul_i32_i24_e32 v3, s9, v3
-; GFX8-NEXT:    s_bfe_i32 s12, s3, 0x40010
-; GFX8-NEXT:    v_and_b32_e32 v4, s2, v4
-; GFX8-NEXT:    v_and_b32_e32 v5, s2, v5
-; GFX8-NEXT:    s_bfe_i32 s14, s3, 0x40014
-; GFX8-NEXT:    s_bfe_i32 s11, s0, 0x40010
+; GFX8-NEXT:    s_bfe_i32 s12, s2, 0x40010
+; GFX8-NEXT:    v_and_b32_e32 v4, s0, v4
+; GFX8-NEXT:    v_and_b32_e32 v5, s0, v5
+; GFX8-NEXT:    s_bfe_i32 s14, s2, 0x40014
+; GFX8-NEXT:    s_bfe_i32 s11, s1, 0x40010
 ; GFX8-NEXT:    v_mov_b32_e32 v8, s12
-; GFX8-NEXT:    s_bfe_i32 s16, s3, 0x40018
-; GFX8-NEXT:    s_bfe_i32 s13, s0, 0x40014
+; GFX8-NEXT:    s_bfe_i32 s16, s2, 0x40018
+; GFX8-NEXT:    s_bfe_i32 s13, s1, 0x40014
 ; GFX8-NEXT:    v_mov_b32_e32 v9, s14
-; GFX8-NEXT:    s_bfe_i32 s15, s0, 0x40018
-; GFX8-NEXT:    s_ashr_i32 s3, s3, 28
+; GFX8-NEXT:    s_bfe_i32 s15, s1, 0x40018
+; GFX8-NEXT:    s_ashr_i32 s2, s2, 28
 ; GFX8-NEXT:    v_mov_b32_e32 v10, s16
-; GFX8-NEXT:    s_ashr_i32 s0, s0, 28
+; GFX8-NEXT:    s_ashr_i32 s1, s1, 28
 ; GFX8-NEXT:    s_waitcnt vmcnt(0)
 ; GFX8-NEXT:    v_mad_i32_i24 v2, s5, v6, v2
 ; GFX8-NEXT:    v_mad_i32_i24 v2, s7, v7, v2
@@ -702,8 +702,8 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1,
 ; GFX8-NEXT:    v_mad_i32_i24 v2, s11, v8, v2
 ; GFX8-NEXT:    v_mad_i32_i24 v2, s13, v9, v2
 ; GFX8-NEXT:    v_mad_i32_i24 v2, s15, v10, v2
-; GFX8-NEXT:    v_mov_b32_e32 v3, s3
-; GFX8-NEXT:    v_mad_i32_i24 v2, s0, v3, v2
+; GFX8-NEXT:    v_mov_b32_e32 v3, s2
+; GFX8-NEXT:    v_mad_i32_i24 v2, s1, v3, v2
 ; GFX8-NEXT:    flat_store_byte v[0:1], v2
 ; GFX8-NEXT:    s_endpgm
 ;
@@ -711,43 +711,43 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1,
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GFX9-NEXT:    s_movk_i32 s2, 0xff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_load_dword s3, s[6:7], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    global_load_ubyte v2, v[0:1], off
-; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s2, s[6:7], 0x0
+; GFX9-NEXT:    s_movk_i32 s0, 0xff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_bfe_i32 s6, s3, 0x40000
-; GFX9-NEXT:    s_lshr_b32 s4, s3, 12
-; GFX9-NEXT:    s_bfe_i32 s8, s3, 0x40004
-; GFX9-NEXT:    s_bfe_i32 s10, s3, 0x40008
-; GFX9-NEXT:    s_lshr_b32 s1, s0, 12
-; GFX9-NEXT:    s_bfe_i32 s5, s0, 0x40000
+; GFX9-NEXT:    s_lshr_b32 s3, s1, 12
+; GFX9-NEXT:    s_bfe_i32 s6, s2, 0x40000
+; GFX9-NEXT:    s_lshr_b32 s4, s2, 12
+; GFX9-NEXT:    s_bfe_i32 s8, s2, 0x40004
+; GFX9-NEXT:    s_bfe_i32 s10, s2, 0x40008
+; GFX9-NEXT:    s_bfe_i32 s5, s1, 0x40000
 ; GFX9-NEXT:    v_mov_b32_e32 v6, s6
-; GFX9-NEXT:    v_lshlrev_b16_e64 v4, 12, s1
+; GFX9-NEXT:    v_lshlrev_b16_e64 v4, 12, s3
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v5, 12, s4
-; GFX9-NEXT:    s_bfe_i32 s7, s0, 0x40004
-; GFX9-NEXT:    s_bfe_i32 s9, s0, 0x40008
+; GFX9-NEXT:    s_bfe_i32 s7, s1, 0x40004
+; GFX9-NEXT:    s_bfe_i32 s9, s1, 0x40008
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s10
 ; GFX9-NEXT:    v_mov_b32_e32 v7, s8
 ; GFX9-NEXT:    v_ashrrev_i16_e32 v4, 12, v4
 ; GFX9-NEXT:    v_ashrrev_i16_e32 v5, 12, v5
 ; GFX9-NEXT:    v_mul_i32_i24_e32 v3, s9, v3
-; GFX9-NEXT:    s_bfe_i32 s12, s3, 0x40010
-; GFX9-NEXT:    v_and_b32_e32 v4, s2, v4
-; GFX9-NEXT:    v_and_b32_e32 v5, s2, v5
-; GFX9-NEXT:    s_bfe_i32 s14, s3, 0x40014
-; GFX9-NEXT:    s_bfe_i32 s11, s0, 0x40010
+; GFX9-NEXT:    s_bfe_i32 s12, s2, 0x40010
+; GFX9-NEXT:    v_and_b32_e32 v4, s0, v4
+; GFX9-NEXT:    v_and_b32_e32 v5, s0, v5
+; GFX9-NEXT:    s_bfe_i32 s14, s2, 0x40014
+; GFX9-NEXT:    s_bfe_i32 s11, s1, 0x40010
 ; GFX9-NEXT:    v_mov_b32_e32 v8, s12
-; GFX9-NEXT:    s_bfe_i32 s16, s3, 0x40018
-; GFX9-NEXT:    s_bfe_i32 s13, s0, 0x40014
+; GFX9-NEXT:    s_bfe_i32 s16, s2, 0x40018
+; GFX9-NEXT:    s_bfe_i32 s13, s1, 0x40014
 ; GFX9-NEXT:    v_mov_b32_e32 v9, s14
-; GFX9-NEXT:    s_bfe_i32 s15, s0, 0x40018
-; GFX9-NEXT:    s_ashr_i32 s3, s3, 28
+; GFX9-NEXT:    s_bfe_i32 s15, s1, 0x40018
+; GFX9-NEXT:    s_ashr_i32 s2, s2, 28
 ; GFX9-NEXT:    v_mov_b32_e32 v10, s16
-; GFX9-NEXT:    s_ashr_i32 s0, s0, 28
+; GFX9-NEXT:    s_ashr_i32 s1, s1, 28
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_mad_i32_i24 v2, s5, v6, v2
 ; GFX9-NEXT:    v_mad_i32_i24 v2, s7, v7, v2
@@ -756,8 +756,8 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    v_mad_i32_i24 v2, s11, v8, v2
 ; GFX9-NEXT:    v_mad_i32_i24 v2, s13, v9, v2
 ; GFX9-NEXT:    v_mad_i32_i24 v2, s15, v10, v2
-; GFX9-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-NEXT:    v_mad_i32_i24 v2, s0, v3, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NEXT:    v_mad_i32_i24 v2, s1, v3, v2
 ; GFX9-NEXT:    global_store_byte v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -765,43 +765,43 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL:       ; %bb.0: ; %entry
 ; GFX9-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-DL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GFX9-DL-NEXT:    s_movk_i32 s2, 0xff
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-DL-NEXT:    s_load_dword s3, s[6:7], 0x0
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-DL-NEXT:    global_load_ubyte v2, v[0:1], off
-; GFX9-DL-NEXT:    s_load_dword s0, s[4:5], 0x0
+; GFX9-DL-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX9-DL-NEXT:    s_load_dword s2, s[6:7], 0x0
+; GFX9-DL-NEXT:    s_movk_i32 s0, 0xff
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-DL-NEXT:    s_bfe_i32 s6, s3, 0x40000
-; GFX9-DL-NEXT:    s_lshr_b32 s4, s3, 12
-; GFX9-DL-NEXT:    s_bfe_i32 s8, s3, 0x40004
-; GFX9-DL-NEXT:    s_bfe_i32 s10, s3, 0x40008
-; GFX9-DL-NEXT:    s_lshr_b32 s1, s0, 12
-; GFX9-DL-NEXT:    s_bfe_i32 s5, s0, 0x40000
+; GFX9-DL-NEXT:    s_lshr_b32 s3, s1, 12
+; GFX9-DL-NEXT:    s_bfe_i32 s6, s2, 0x40000
+; GFX9-DL-NEXT:    s_lshr_b32 s4, s2, 12
+; GFX9-DL-NEXT:    s_bfe_i32 s8, s2, 0x40004
+; GFX9-DL-NEXT:    s_bfe_i32 s10, s2, 0x40008
+; GFX9-DL-NEXT:    s_bfe_i32 s5, s1, 0x40000
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v6, s6
-; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v4, 12, s1
+; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v4, 12, s3
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v5, 12, s4
-; GFX9-DL-NEXT:    s_bfe_i32 s7, s0, 0x40004
-; GFX9-DL-NEXT:    s_bfe_i32 s9, s0, 0x40008
+; GFX9-DL-NEXT:    s_bfe_i32 s7, s1, 0x40004
+; GFX9-DL-NEXT:    s_bfe_i32 s9, s1, 0x40008
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v3, s10
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v7, s8
 ; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v4, 12, v4
 ; GFX9-DL-NEXT:    v_ashrrev_i16_e32 v5, 12, v5
 ; GFX9-DL-NEXT:    v_mul_i32_i24_e32 v3, s9, v3
-; GFX9-DL-NEXT:    s_bfe_i32 s12, s3, 0x40010
-; GFX9-DL-NEXT:    v_and_b32_e32 v4, s2, v4
-; GFX9-DL-NEXT:    v_and_b32_e32 v5, s2, v5
-; GFX9-DL-NEXT:    s_bfe_i32 s14, s3, 0x40014
-; GFX9-DL-NEXT:    s_bfe_i32 s11, s0, 0x40010
+; GFX9-DL-NEXT:    s_bfe_i32 s12, s2, 0x40010
+; GFX9-DL-NEXT:    v_and_b32_e32 v4, s0, v4
+; GFX9-DL-NEXT:    v_and_b32_e32 v5, s0, v5
+; GFX9-DL-NEXT:    s_bfe_i32 s14, s2, 0x40014
+; GFX9-DL-NEXT:    s_bfe_i32 s11, s1, 0x40010
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v8, s12
-; GFX9-DL-NEXT:    s_bfe_i32 s16, s3, 0x40018
-; GFX9-DL-NEXT:    s_bfe_i32 s13, s0, 0x40014
+; GFX9-DL-NEXT:    s_bfe_i32 s16, s2, 0x40018
+; GFX9-DL-NEXT:    s_bfe_i32 s13, s1, 0x40014
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v9, s14
-; GFX9-DL-NEXT:    s_bfe_i32 s15, s0, 0x40018
-; GFX9-DL-NEXT:    s_ashr_i32 s3, s3, 28
+; GFX9-DL-NEXT:    s_bfe_i32 s15, s1, 0x40018
+; GFX9-DL-NEXT:    s_ashr_i32 s2, s2, 28
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v10, s16
-; GFX9-DL-NEXT:    s_ashr_i32 s0, s0, 28
+; GFX9-DL-NEXT:    s_ashr_i32 s1, s1, 28
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-DL-NEXT:    v_mad_i32_i24 v2, s5, v6, v2
 ; GFX9-DL-NEXT:    v_mad_i32_i24 v2, s7, v7, v2
@@ -810,8 +810,8 @@ define amdgpu_kernel void @idot8_acc8(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    v_mad_i32_i24 v2, s11, v8, v2
 ; GFX9-DL-NEXT:    v_mad_i32_i24 v2, s13, v9, v2
 ; GFX9-DL-NEXT:    v_mad_i32_i24 v2, s15, v10, v2
-; GFX9-DL-NEXT:    v_mov_b32_e32 v3, s3
-; GFX9-DL-NEXT:    v_mad_i32_i24 v2, s0, v3, v2
+; GFX9-DL-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-DL-NEXT:    v_mad_i32_i24 v2, s1, v3, v2
 ; GFX9-DL-NEXT:    global_store_byte v[0:1], v2, off
 ; GFX9-DL-NEXT:    s_endpgm
 ;
@@ -1462,19 +1462,19 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX7-NEXT:    s_mov_b32 s2, -1
 ; GFX7-NEXT:    s_mov_b32 s8, 0xffff
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_load_dword s6, s[6:7], 0x0
 ; GFX7-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_load_dword s4, s[4:5], 0x0
+; GFX7-NEXT:    s_load_dword s5, s[6:7], 0x0
 ; GFX7-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX7-NEXT:    s_bfe_i32 s15, s6, 0x40018
-; GFX7-NEXT:    s_bfe_i32 s16, s6, 0x40014
-; GFX7-NEXT:    s_bfe_i32 s17, s6, 0x40010
-; GFX7-NEXT:    s_bfe_i32 s18, s6, 0x40000
-; GFX7-NEXT:    s_bfe_i32 s19, s6, 0x40004
-; GFX7-NEXT:    s_bfe_i32 s20, s6, 0x40008
-; GFX7-NEXT:    s_ashr_i32 s14, s6, 28
-; GFX7-NEXT:    s_bfe_i32 s6, s6, 0x4000c
-; GFX7-NEXT:    s_ashr_i32 s5, s4, 28
+; GFX7-NEXT:    s_ashr_i32 s6, s4, 28
+; GFX7-NEXT:    s_bfe_i32 s15, s5, 0x40018
+; GFX7-NEXT:    s_bfe_i32 s16, s5, 0x40014
+; GFX7-NEXT:    s_bfe_i32 s17, s5, 0x40010
+; GFX7-NEXT:    s_bfe_i32 s18, s5, 0x40000
+; GFX7-NEXT:    s_bfe_i32 s19, s5, 0x40004
+; GFX7-NEXT:    s_bfe_i32 s20, s5, 0x40008
+; GFX7-NEXT:    s_ashr_i32 s14, s5, 28
+; GFX7-NEXT:    s_bfe_i32 s5, s5, 0x4000c
 ; GFX7-NEXT:    s_bfe_i32 s7, s4, 0x40018
 ; GFX7-NEXT:    s_bfe_i32 s9, s4, 0x40014
 ; GFX7-NEXT:    s_bfe_i32 s10, s4, 0x40010
@@ -1485,7 +1485,7 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX7-NEXT:    s_bfe_i32 s13, s4, 0x40008
 ; GFX7-NEXT:    v_mov_b32_e32 v2, s20
 ; GFX7-NEXT:    s_bfe_i32 s4, s4, 0x4000c
-; GFX7-NEXT:    v_mov_b32_e32 v1, s6
+; GFX7-NEXT:    v_mov_b32_e32 v1, s5
 ; GFX7-NEXT:    v_mul_i32_i24_e32 v1, s4, v1
 ; GFX7-NEXT:    v_mul_i32_i24_e32 v2, s13, v2
 ; GFX7-NEXT:    v_mul_i32_i24_e32 v3, s12, v3
@@ -1510,7 +1510,7 @@ define amdgpu_kernel void @idot8_acc16_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX7-NEXT:    v_mad_i32_i24 v0, s9, v6, v0
 ; GFX7-NEXT:    v_mad_i32_i24 v0, s7, v7, v0
 ; GFX7-NEXT:    v_mov_b32_e32 v1, s14
-; GFX7-NEXT:    v_mad_i32_i24 v0, s5, v1, v0
+; GFX7-NEXT:    v_mad_i32_i24 v0, s6, v1, v0
 ; GFX7-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX7-NEXT:    s_endpgm
 ;
@@ -1954,24 +1954,24 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GFX9-NEXT:    s_mov_b32 s2, 0xffff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    global_load_ubyte v2, v[0:1], off
-; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
-; GFX9-NEXT:    s_load_dword s1, s[6:7], 0x0
+; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s2, s[6:7], 0x0
+; GFX9-NEXT:    s_mov_b32 s0, 0xffff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshr_b32 s7, s0, 4
-; GFX9-NEXT:    s_lshr_b32 s14, s1, 4
-; GFX9-NEXT:    v_lshlrev_b16_e64 v3, 12, s0
-; GFX9-NEXT:    v_lshlrev_b16_e64 v4, 12, s1
+; GFX9-NEXT:    s_lshr_b32 s7, s1, 4
+; GFX9-NEXT:    s_lshr_b32 s14, s2, 4
+; GFX9-NEXT:    v_lshlrev_b16_e64 v3, 12, s1
+; GFX9-NEXT:    v_lshlrev_b16_e64 v4, 12, s2
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v7, 12, s7
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v14, 12, s14
-; GFX9-NEXT:    s_lshr_b32 s8, s0, 12
-; GFX9-NEXT:    s_lshr_b32 s9, s0, 8
-; GFX9-NEXT:    s_lshr_b32 s15, s1, 12
-; GFX9-NEXT:    s_lshr_b32 s16, s1, 8
+; GFX9-NEXT:    s_lshr_b32 s8, s1, 12
+; GFX9-NEXT:    s_lshr_b32 s9, s1, 8
+; GFX9-NEXT:    s_lshr_b32 s15, s2, 12
+; GFX9-NEXT:    s_lshr_b32 s16, s2, 8
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v5, 12, s9
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v6, 12, s8
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v12, 12, s16
@@ -1987,21 +1987,21 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    v_mul_lo_u16_e32 v3, v3, v4
 ; GFX9-NEXT:    v_mul_lo_u16_sdwa v7, v7, v14 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_or_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-NEXT:    s_lshr_b32 s3, s0, 20
-; GFX9-NEXT:    s_lshr_b32 s4, s0, 16
-; GFX9-NEXT:    s_lshr_b32 s10, s1, 20
-; GFX9-NEXT:    s_lshr_b32 s11, s1, 16
+; GFX9-NEXT:    s_lshr_b32 s3, s1, 20
+; GFX9-NEXT:    s_lshr_b32 s4, s1, 16
+; GFX9-NEXT:    s_lshr_b32 s10, s2, 20
+; GFX9-NEXT:    s_lshr_b32 s11, s2, 16
 ; GFX9-NEXT:    v_mul_lo_u16_sdwa v6, v6, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_mul_lo_u16_e32 v5, v5, v12
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v10, 12, s4
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v11, 12, s3
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v17, 12, s11
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v18, 12, s10
-; GFX9-NEXT:    s_lshr_b32 s5, s0, 28
-; GFX9-NEXT:    s_lshr_b32 s6, s0, 24
-; GFX9-NEXT:    s_lshr_b32 s12, s1, 28
-; GFX9-NEXT:    s_lshr_b32 s13, s1, 24
-; GFX9-NEXT:    v_and_b32_e32 v3, s2, v3
+; GFX9-NEXT:    s_lshr_b32 s5, s1, 28
+; GFX9-NEXT:    s_lshr_b32 s6, s1, 24
+; GFX9-NEXT:    s_lshr_b32 s12, s2, 28
+; GFX9-NEXT:    s_lshr_b32 s13, s2, 24
+; GFX9-NEXT:    v_and_b32_e32 v3, s0, v3
 ; GFX9-NEXT:    v_or_b32_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v8, 12, s6
 ; GFX9-NEXT:    v_lshlrev_b16_e64 v9, 12, s5
@@ -2023,7 +2023,7 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    v_mul_lo_u16_sdwa v9, v9, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_mul_lo_u16_e32 v8, v8, v15
 ; GFX9-NEXT:    v_or_b32_sdwa v8, v8, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-NEXT:    v_and_b32_e32 v4, s2, v4
+; GFX9-NEXT:    v_and_b32_e32 v4, s0, v4
 ; GFX9-NEXT:    v_or_b32_e32 v6, v4, v8
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_add_u32_e32 v2, v3, v2
@@ -2042,24 +2042,24 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL:       ; %bb.0: ; %entry
 ; GFX9-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-DL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GFX9-DL-NEXT:    s_mov_b32 s2, 0xffff
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-DL-NEXT:    global_load_ubyte v2, v[0:1], off
-; GFX9-DL-NEXT:    s_load_dword s0, s[4:5], 0x0
-; GFX9-DL-NEXT:    s_load_dword s1, s[6:7], 0x0
+; GFX9-DL-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX9-DL-NEXT:    s_load_dword s2, s[6:7], 0x0
+; GFX9-DL-NEXT:    s_mov_b32 s0, 0xffff
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-DL-NEXT:    s_lshr_b32 s7, s0, 4
-; GFX9-DL-NEXT:    s_lshr_b32 s14, s1, 4
-; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v3, 12, s0
-; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v4, 12, s1
+; GFX9-DL-NEXT:    s_lshr_b32 s7, s1, 4
+; GFX9-DL-NEXT:    s_lshr_b32 s14, s2, 4
+; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v3, 12, s1
+; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v4, 12, s2
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v7, 12, s7
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v14, 12, s14
-; GFX9-DL-NEXT:    s_lshr_b32 s8, s0, 12
-; GFX9-DL-NEXT:    s_lshr_b32 s9, s0, 8
-; GFX9-DL-NEXT:    s_lshr_b32 s15, s1, 12
-; GFX9-DL-NEXT:    s_lshr_b32 s16, s1, 8
+; GFX9-DL-NEXT:    s_lshr_b32 s8, s1, 12
+; GFX9-DL-NEXT:    s_lshr_b32 s9, s1, 8
+; GFX9-DL-NEXT:    s_lshr_b32 s15, s2, 12
+; GFX9-DL-NEXT:    s_lshr_b32 s16, s2, 8
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v5, 12, s9
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v6, 12, s8
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v12, 12, s16
@@ -2075,21 +2075,21 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v3, v3, v4
 ; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v7, v7, v14 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-DL-NEXT:    v_or_b32_sdwa v3, v3, v7 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-DL-NEXT:    s_lshr_b32 s3, s0, 20
-; GFX9-DL-NEXT:    s_lshr_b32 s4, s0, 16
-; GFX9-DL-NEXT:    s_lshr_b32 s10, s1, 20
-; GFX9-DL-NEXT:    s_lshr_b32 s11, s1, 16
+; GFX9-DL-NEXT:    s_lshr_b32 s3, s1, 20
+; GFX9-DL-NEXT:    s_lshr_b32 s4, s1, 16
+; GFX9-DL-NEXT:    s_lshr_b32 s10, s2, 20
+; GFX9-DL-NEXT:    s_lshr_b32 s11, s2, 16
 ; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v6, v6, v13 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v5, v5, v12
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v10, 12, s4
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v11, 12, s3
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v17, 12, s11
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v18, 12, s10
-; GFX9-DL-NEXT:    s_lshr_b32 s5, s0, 28
-; GFX9-DL-NEXT:    s_lshr_b32 s6, s0, 24
-; GFX9-DL-NEXT:    s_lshr_b32 s12, s1, 28
-; GFX9-DL-NEXT:    s_lshr_b32 s13, s1, 24
-; GFX9-DL-NEXT:    v_and_b32_e32 v3, s2, v3
+; GFX9-DL-NEXT:    s_lshr_b32 s5, s1, 28
+; GFX9-DL-NEXT:    s_lshr_b32 s6, s1, 24
+; GFX9-DL-NEXT:    s_lshr_b32 s12, s2, 28
+; GFX9-DL-NEXT:    s_lshr_b32 s13, s2, 24
+; GFX9-DL-NEXT:    v_and_b32_e32 v3, s0, v3
 ; GFX9-DL-NEXT:    v_or_b32_sdwa v5, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v8, 12, s6
 ; GFX9-DL-NEXT:    v_lshlrev_b16_e64 v9, 12, s5
@@ -2111,7 +2111,7 @@ define amdgpu_kernel void @idot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v9, v9, v16 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v8, v8, v15
 ; GFX9-DL-NEXT:    v_or_b32_sdwa v8, v8, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
-; GFX9-DL-NEXT:    v_and_b32_e32 v4, s2, v4
+; GFX9-DL-NEXT:    v_and_b32_e32 v4, s0, v4
 ; GFX9-DL-NEXT:    v_or_b32_e32 v6, v4, v8
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-DL-NEXT:    v_add_u32_e32 v2, v3, v2
diff --git a/llvm/test/CodeGen/AMDGPU/idot8u.ll b/llvm/test/CodeGen/AMDGPU/idot8u.ll
index 31d8eb0ec8b64..171cb6ac6ea77 100644
--- a/llvm/test/CodeGen/AMDGPU/idot8u.ll
+++ b/llvm/test/CodeGen/AMDGPU/idot8u.ll
@@ -2426,38 +2426,38 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GFX9-NEXT:    s_mov_b32 s2, 0xffff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    global_load_ubyte v2, v[0:1], off
-; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
-; GFX9-NEXT:    s_load_dword s1, s[6:7], 0x0
+; GFX9-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s2, s[6:7], 0x0
+; GFX9-NEXT:    s_mov_b32 s0, 0xffff
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_bfe_u32 s3, s0, 0x40010
-; GFX9-NEXT:    s_bfe_u32 s10, s1, 0x40010
-; GFX9-NEXT:    s_bfe_u32 s11, s1, 0x40014
-; GFX9-NEXT:    s_bfe_u32 s12, s1, 0x40018
-; GFX9-NEXT:    s_lshr_b32 s13, s1, 28
-; GFX9-NEXT:    s_and_b32 s14, s1, 15
-; GFX9-NEXT:    s_bfe_u32 s15, s1, 0x40004
-; GFX9-NEXT:    s_bfe_u32 s16, s1, 0x40008
+; GFX9-NEXT:    s_bfe_u32 s3, s1, 0x40010
+; GFX9-NEXT:    s_bfe_u32 s10, s2, 0x40010
+; GFX9-NEXT:    s_bfe_u32 s11, s2, 0x40014
+; GFX9-NEXT:    s_bfe_u32 s12, s2, 0x40018
+; GFX9-NEXT:    s_lshr_b32 s13, s2, 28
+; GFX9-NEXT:    s_and_b32 s14, s2, 15
+; GFX9-NEXT:    s_bfe_u32 s15, s2, 0x40004
+; GFX9-NEXT:    s_bfe_u32 s16, s2, 0x40008
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s10
-; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x4000c
-; GFX9-NEXT:    s_bfe_u32 s4, s0, 0x40014
+; GFX9-NEXT:    s_bfe_u32 s2, s2, 0x4000c
+; GFX9-NEXT:    s_bfe_u32 s4, s1, 0x40014
 ; GFX9-NEXT:    v_mov_b32_e32 v4, s11
-; GFX9-NEXT:    s_bfe_u32 s5, s0, 0x40018
+; GFX9-NEXT:    s_bfe_u32 s5, s1, 0x40018
 ; GFX9-NEXT:    v_mov_b32_e32 v5, s12
-; GFX9-NEXT:    s_lshr_b32 s6, s0, 28
+; GFX9-NEXT:    s_lshr_b32 s6, s1, 28
 ; GFX9-NEXT:    v_mov_b32_e32 v6, s13
-; GFX9-NEXT:    s_and_b32 s7, s0, 15
+; GFX9-NEXT:    s_and_b32 s7, s1, 15
 ; GFX9-NEXT:    v_mov_b32_e32 v7, s14
-; GFX9-NEXT:    s_bfe_u32 s8, s0, 0x40004
+; GFX9-NEXT:    s_bfe_u32 s8, s1, 0x40004
 ; GFX9-NEXT:    v_mov_b32_e32 v8, s15
-; GFX9-NEXT:    s_bfe_u32 s9, s0, 0x40008
+; GFX9-NEXT:    s_bfe_u32 s9, s1, 0x40008
 ; GFX9-NEXT:    v_mov_b32_e32 v9, s16
-; GFX9-NEXT:    s_bfe_u32 s0, s0, 0x4000c
-; GFX9-NEXT:    v_mov_b32_e32 v10, s1
+; GFX9-NEXT:    s_bfe_u32 s1, s1, 0x4000c
+; GFX9-NEXT:    v_mov_b32_e32 v10, s2
 ; GFX9-NEXT:    v_mul_lo_u16_e32 v3, s3, v3
 ; GFX9-NEXT:    v_mul_lo_u16_sdwa v4, s4, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_mul_lo_u16_e32 v5, s5, v5
@@ -2468,12 +2468,12 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-NEXT:    v_or_b32_sdwa v4, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_or_b32_e32 v5, v7, v8
 ; GFX9-NEXT:    v_mul_lo_u16_e32 v9, s9, v9
-; GFX9-NEXT:    v_mul_lo_u16_sdwa v10, s0, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-NEXT:    v_and_b32_e32 v5, s2, v5
+; GFX9-NEXT:    v_mul_lo_u16_sdwa v10, s1, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-NEXT:    v_and_b32_e32 v5, s0, v5
 ; GFX9-NEXT:    v_or_b32_sdwa v6, v9, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-NEXT:    v_or_b32_e32 v6, v5, v6
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v7, 8, v6
-; GFX9-NEXT:    v_and_b32_e32 v3, s2, v3
+; GFX9-NEXT:    v_and_b32_e32 v3, s0, v3
 ; GFX9-NEXT:    v_or_b32_e32 v4, v3, v4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_add_u32_e32 v2, v5, v2
@@ -2492,38 +2492,38 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL:       ; %bb.0: ; %entry
 ; GFX9-DL-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-DL-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GFX9-DL-NEXT:    s_mov_b32 s2, 0xffff
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-DL-NEXT:    global_load_ubyte v2, v[0:1], off
-; GFX9-DL-NEXT:    s_load_dword s0, s[4:5], 0x0
-; GFX9-DL-NEXT:    s_load_dword s1, s[6:7], 0x0
+; GFX9-DL-NEXT:    s_load_dword s1, s[4:5], 0x0
+; GFX9-DL-NEXT:    s_load_dword s2, s[6:7], 0x0
+; GFX9-DL-NEXT:    s_mov_b32 s0, 0xffff
 ; GFX9-DL-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-DL-NEXT:    s_bfe_u32 s3, s0, 0x40010
-; GFX9-DL-NEXT:    s_bfe_u32 s10, s1, 0x40010
-; GFX9-DL-NEXT:    s_bfe_u32 s11, s1, 0x40014
-; GFX9-DL-NEXT:    s_bfe_u32 s12, s1, 0x40018
-; GFX9-DL-NEXT:    s_lshr_b32 s13, s1, 28
-; GFX9-DL-NEXT:    s_and_b32 s14, s1, 15
-; GFX9-DL-NEXT:    s_bfe_u32 s15, s1, 0x40004
-; GFX9-DL-NEXT:    s_bfe_u32 s16, s1, 0x40008
+; GFX9-DL-NEXT:    s_bfe_u32 s3, s1, 0x40010
+; GFX9-DL-NEXT:    s_bfe_u32 s10, s2, 0x40010
+; GFX9-DL-NEXT:    s_bfe_u32 s11, s2, 0x40014
+; GFX9-DL-NEXT:    s_bfe_u32 s12, s2, 0x40018
+; GFX9-DL-NEXT:    s_lshr_b32 s13, s2, 28
+; GFX9-DL-NEXT:    s_and_b32 s14, s2, 15
+; GFX9-DL-NEXT:    s_bfe_u32 s15, s2, 0x40004
+; GFX9-DL-NEXT:    s_bfe_u32 s16, s2, 0x40008
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v3, s10
-; GFX9-DL-NEXT:    s_bfe_u32 s1, s1, 0x4000c
-; GFX9-DL-NEXT:    s_bfe_u32 s4, s0, 0x40014
+; GFX9-DL-NEXT:    s_bfe_u32 s2, s2, 0x4000c
+; GFX9-DL-NEXT:    s_bfe_u32 s4, s1, 0x40014
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v4, s11
-; GFX9-DL-NEXT:    s_bfe_u32 s5, s0, 0x40018
+; GFX9-DL-NEXT:    s_bfe_u32 s5, s1, 0x40018
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v5, s12
-; GFX9-DL-NEXT:    s_lshr_b32 s6, s0, 28
+; GFX9-DL-NEXT:    s_lshr_b32 s6, s1, 28
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v6, s13
-; GFX9-DL-NEXT:    s_and_b32 s7, s0, 15
+; GFX9-DL-NEXT:    s_and_b32 s7, s1, 15
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v7, s14
-; GFX9-DL-NEXT:    s_bfe_u32 s8, s0, 0x40004
+; GFX9-DL-NEXT:    s_bfe_u32 s8, s1, 0x40004
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v8, s15
-; GFX9-DL-NEXT:    s_bfe_u32 s9, s0, 0x40008
+; GFX9-DL-NEXT:    s_bfe_u32 s9, s1, 0x40008
 ; GFX9-DL-NEXT:    v_mov_b32_e32 v9, s16
-; GFX9-DL-NEXT:    s_bfe_u32 s0, s0, 0x4000c
-; GFX9-DL-NEXT:    v_mov_b32_e32 v10, s1
+; GFX9-DL-NEXT:    s_bfe_u32 s1, s1, 0x4000c
+; GFX9-DL-NEXT:    v_mov_b32_e32 v10, s2
 ; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v3, s3, v3
 ; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v4, s4, v4 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v5, s5, v5
@@ -2534,12 +2534,12 @@ define amdgpu_kernel void @udot8_acc8_vecMul(<8 x i4> addrspace(1)* %src1,
 ; GFX9-DL-NEXT:    v_or_b32_sdwa v4, v5, v6 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-DL-NEXT:    v_or_b32_e32 v5, v7, v8
 ; GFX9-DL-NEXT:    v_mul_lo_u16_e32 v9, s9, v9
-; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v10, s0, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
-; GFX9-DL-NEXT:    v_and_b32_e32 v5, s2, v5
+; GFX9-DL-NEXT:    v_mul_lo_u16_sdwa v10, s1, v10 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX9-DL-NEXT:    v_and_b32_e32 v5, s0, v5
 ; GFX9-DL-NEXT:    v_or_b32_sdwa v6, v9, v10 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 ; GFX9-DL-NEXT:    v_or_b32_e32 v6, v5, v6
 ; GFX9-DL-NEXT:    v_lshrrev_b32_e32 v7, 8, v6
-; GFX9-DL-NEXT:    v_and_b32_e32 v3, s2, v3
+; GFX9-DL-NEXT:    v_and_b32_e32 v3, s0, v3
 ; GFX9-DL-NEXT:    v_or_b32_e32 v4, v3, v4
 ; GFX9-DL-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-DL-NEXT:    v_add_u32_e32 v2, v5, v2
diff --git a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
index 8d5024bd14bf5..ae620b338e408 100644
--- a/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert_vector_elt.v2i16.ll
@@ -73,12 +73,12 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reg(<2 x i16> addrspace(1)* %
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    s_load_dword s4, s[4:5], 0xc
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; CI-NEXT:    v_mov_b32_e32 v0, s0
-; CI-NEXT:    s_load_dword s0, s[2:3], 0x0
 ; CI-NEXT:    v_mov_b32_e32 v1, s1
 ; CI-NEXT:    s_and_b32 s1, s4, 0xffff
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_and_b32 s0, s0, 0xffff0000
+; CI-NEXT:    s_and_b32 s0, s2, 0xffff0000
 ; CI-NEXT:    s_or_b32 s0, s1, s0
 ; CI-NEXT:    v_mov_b32_e32 v2, s0
 ; CI-NEXT:    flat_store_dword v[0:1], v2
@@ -95,11 +95,11 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> ad
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GFX9-NEXT:    s_load_dword s4, s[4:5], 0x30
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshr_b32 s0, s0, 16
+; GFX9-NEXT:    s_lshr_b32 s0, s2, 16
 ; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s4, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
@@ -133,18 +133,18 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_multi_use_hi_reg(<2 x i16> ad
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    s_load_dword s4, s[4:5], 0xc
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    v_mov_b32_e32 v0, s0
-; CI-NEXT:    s_load_dword s0, s[2:3], 0x0
+; CI-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; CI-NEXT:    v_mov_b32_e32 v1, s1
-; CI-NEXT:    s_and_b32 s1, s4, 0xffff
+; CI-NEXT:    v_mov_b32_e32 v0, s0
+; CI-NEXT:    s_and_b32 s0, s4, 0xffff
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_lshr_b32 s0, s0, 16
-; CI-NEXT:    s_lshl_b32 s2, s0, 16
-; CI-NEXT:    s_or_b32 s1, s1, s2
-; CI-NEXT:    v_mov_b32_e32 v2, s1
+; CI-NEXT:    s_lshr_b32 s1, s2, 16
+; CI-NEXT:    s_lshl_b32 s2, s1, 16
+; CI-NEXT:    s_or_b32 s0, s0, s2
+; CI-NEXT:    v_mov_b32_e32 v2, s0
 ; CI-NEXT:    flat_store_dword v[0:1], v2
 ; CI-NEXT:    ;;#ASMSTART
-; CI-NEXT:    ; use s0
+; CI-NEXT:    ; use s1
 ; CI-NEXT:    ;;#ASMEND
 ; CI-NEXT:    s_endpgm
   %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
@@ -192,12 +192,12 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)*
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    s_load_dword s4, s[4:5], 0xc
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; CI-NEXT:    v_mov_b32_e32 v0, s0
-; CI-NEXT:    s_load_dword s0, s[2:3], 0x0
 ; CI-NEXT:    v_mov_b32_e32 v1, s1
 ; CI-NEXT:    s_lshr_b32 s1, s4, 16
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_and_b32 s0, s0, 0xffff0000
+; CI-NEXT:    s_and_b32 s0, s2, 0xffff0000
 ; CI-NEXT:    s_or_b32 s0, s1, s0
 ; CI-NEXT:    v_mov_b32_e32 v2, s0
 ; CI-NEXT:    flat_store_dword v[0:1], v2
@@ -216,16 +216,16 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> a
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GFX9-NEXT:    s_load_dword s4, s[4:5], 0x10
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    s_load_dword s0, s[2:3], 0x0
+; GFX9-NEXT:    s_lshr_b32 s0, s4, 16
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    s_lshr_b32 s1, s4, 16
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_pack_lh_b32_b16 s0, s1, s0
-; GFX9-NEXT:    v_mov_b32_e32 v2, s0
+; GFX9-NEXT:    s_pack_lh_b32_b16 s1, s0, s2
+; GFX9-NEXT:    v_mov_b32_e32 v2, s1
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    ;;#ASMSTART
-; GFX9-NEXT:    ; use s1
+; GFX9-NEXT:    ; use s0
 ; GFX9-NEXT:    ;;#ASMEND
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -234,17 +234,17 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> a
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; VI-NEXT:    s_load_dword s4, s[4:5], 0x10
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    s_load_dword s0, s[2:3], 0x0
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    s_lshr_b32 s1, s4, 16
+; VI-NEXT:    s_lshr_b32 s0, s4, 16
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_and_b32 s0, s0, 0xffff0000
-; VI-NEXT:    s_or_b32 s0, s1, s0
-; VI-NEXT:    v_mov_b32_e32 v2, s0
+; VI-NEXT:    s_and_b32 s1, s2, 0xffff0000
+; VI-NEXT:    s_or_b32 s1, s0, s1
+; VI-NEXT:    v_mov_b32_e32 v2, s1
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    ;;#ASMSTART
-; VI-NEXT:    ; use s1
+; VI-NEXT:    ; use s0
 ; VI-NEXT:    ;;#ASMEND
 ; VI-NEXT:    s_endpgm
 ;
@@ -253,17 +253,17 @@ define amdgpu_kernel void @s_insertelement_v2i16_0_reghi_multi_use_1(<2 x i16> a
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    s_load_dword s4, s[4:5], 0x4
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; CI-NEXT:    v_mov_b32_e32 v0, s0
-; CI-NEXT:    s_load_dword s0, s[2:3], 0x0
 ; CI-NEXT:    v_mov_b32_e32 v1, s1
-; CI-NEXT:    s_lshr_b32 s1, s4, 16
+; CI-NEXT:    s_lshr_b32 s0, s4, 16
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_and_b32 s0, s0, 0xffff0000
-; CI-NEXT:    s_or_b32 s0, s1, s0
-; CI-NEXT:    v_mov_b32_e32 v2, s0
+; CI-NEXT:    s_and_b32 s1, s2, 0xffff0000
+; CI-NEXT:    s_or_b32 s1, s0, s1
+; CI-NEXT:    v_mov_b32_e32 v2, s1
 ; CI-NEXT:    flat_store_dword v[0:1], v2
 ; CI-NEXT:    ;;#ASMSTART
-; CI-NEXT:    ; use s1
+; CI-NEXT:    ; use s0
 ; CI-NEXT:    ;;#ASMEND
 ; CI-NEXT:    s_endpgm
   %vec = load <2 x i16>, <2 x i16> addrspace(4)* %vec.ptr
@@ -426,12 +426,12 @@ define amdgpu_kernel void @s_insertelement_v2i16_1_reg(<2 x i16> addrspace(1)* %
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    s_load_dword s4, s[4:5], 0xc
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
+; CI-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; CI-NEXT:    v_mov_b32_e32 v0, s0
-; CI-NEXT:    s_load_dword s0, s[2:3], 0x0
 ; CI-NEXT:    v_mov_b32_e32 v1, s1
 ; CI-NEXT:    s_lshl_b32 s1, s4, 16
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_and_b32 s0, s0, 0xffff
+; CI-NEXT:    s_and_b32 s0, s2, 0xffff
 ; CI-NEXT:    s_or_b32 s0, s0, s1
 ; CI-NEXT:    v_mov_b32_e32 v2, s0
 ; CI-NEXT:    flat_store_dword v[0:1], v2
@@ -624,15 +624,15 @@ define amdgpu_kernel void @v_insertelement_v2i16_0_reghi(<2 x i16> addrspace(1)*
 ; CI-NEXT:    v_mov_b32_e32 v1, s3
 ; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v2
 ; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; CI-NEXT:    flat_load_dword v0, v[0:1]
-; CI-NEXT:    v_add_i32_e32 v2, vcc, s0, v2
-; CI-NEXT:    v_mov_b32_e32 v3, s1
+; CI-NEXT:    flat_load_dword v3, v[0:1]
+; CI-NEXT:    v_add_i32_e32 v0, vcc, s0, v2
+; CI-NEXT:    v_mov_b32_e32 v1, s1
 ; CI-NEXT:    s_lshr_b32 s0, s4, 16
-; CI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; CI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CI-NEXT:    v_and_b32_e32 v0, 0xffff0000, v0
-; CI-NEXT:    v_or_b32_e32 v0, s0, v0
-; CI-NEXT:    flat_store_dword v[2:3], v0
+; CI-NEXT:    v_and_b32_e32 v2, 0xffff0000, v3
+; CI-NEXT:    v_or_b32_e32 v2, s0, v2
+; CI-NEXT:    flat_store_dword v[0:1], v2
 ; CI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %tid.ext = sext i32 %tid to i64
@@ -849,15 +849,15 @@ define amdgpu_kernel void @v_insertelement_v2f16_0(<2 x half> addrspace(1)* %out
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    global_load_dword v0, v[0:1], off
-; GFX9-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
-; GFX9-NEXT:    v_mov_b32_e32 v1, 0x4500
-; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    global_load_dword v3, v[0:1], off
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v2
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
-; GFX9-NEXT:    v_lshl_or_b32 v0, v0, 16, v1
-; GFX9-NEXT:    global_store_dword v[2:3], v0, off
+; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v3
+; GFX9-NEXT:    v_mov_b32_e32 v3, 0x4500
+; GFX9-NEXT:    v_lshl_or_b32 v2, v2, 16, v3
+; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_insertelement_v2f16_0:
@@ -1107,13 +1107,13 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)*
 ; GFX9-NEXT:    v_mov_b32_e32 v2, 0x3e703e7
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
+; GFX9-NEXT:    s_load_dword s4, s[4:5], 0x0
+; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    s_load_dword s0, s[4:5], 0x0
-; GFX9-NEXT:    s_load_dword s1, s[2:3], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_lshl_b32 s0, s0, 4
+; GFX9-NEXT:    s_lshl_b32 s0, s4, 4
 ; GFX9-NEXT:    s_lshl_b32 s0, 0xffff, s0
-; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
 ; GFX9-NEXT:    v_bfi_b32 v2, s0, v2, v3
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
@@ -1125,13 +1125,13 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)*
 ; VI-NEXT:    v_mov_b32_e32 v2, 0x3e703e7
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
+; VI-NEXT:    s_load_dword s4, s[4:5], 0x0
+; VI-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    s_load_dword s0, s[4:5], 0x0
-; VI-NEXT:    s_load_dword s1, s[2:3], 0x0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_lshl_b32 s0, s0, 4
+; VI-NEXT:    s_lshl_b32 s0, s4, 4
 ; VI-NEXT:    s_lshl_b32 s0, 0xffff, s0
-; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_mov_b32_e32 v3, s2
 ; VI-NEXT:    v_bfi_b32 v2, s0, v2, v3
 ; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
@@ -1143,13 +1143,13 @@ define amdgpu_kernel void @s_insertelement_v2i16_dynamic(<2 x i16> addrspace(1)*
 ; CI-NEXT:    v_mov_b32_e32 v2, 0x3e703e7
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_mov_b32_e32 v0, s0
+; CI-NEXT:    s_load_dword s4, s[4:5], 0x0
+; CI-NEXT:    s_load_dword s2, s[2:3], 0x0
 ; CI-NEXT:    v_mov_b32_e32 v1, s1
-; CI-NEXT:    s_load_dword s0, s[4:5], 0x0
-; CI-NEXT:    s_load_dword s1, s[2:3], 0x0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_lshl_b32 s0, s0, 4
+; CI-NEXT:    s_lshl_b32 s0, s4, 4
 ; CI-NEXT:    s_lshl_b32 s0, 0xffff, s0
-; CI-NEXT:    v_mov_b32_e32 v3, s1
+; CI-NEXT:    v_mov_b32_e32 v3, s2
 ; CI-NEXT:    v_bfi_b32 v2, s0, v2, v3
 ; CI-NEXT:    flat_store_dword v[0:1], v2
 ; CI-NEXT:    s_endpgm
@@ -1240,24 +1240,25 @@ define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(<2 x half> addrspa
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x10
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v4
+; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s5
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s4, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
-; GFX9-NEXT:    global_load_dword v0, v[0:1], off
-; GFX9-NEXT:    global_load_dword v1, v[2:3], off
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, s0, v4
-; GFX9-NEXT:    s_mov_b32 s0, 0xffff
-; GFX9-NEXT:    v_mov_b32_e32 v5, s1
-; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
-; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
-; GFX9-NEXT:    v_lshlrev_b32_e64 v1, v1, s0
+; GFX9-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-NEXT:    global_load_dword v3, v[0:1], off
+; GFX9-NEXT:    s_mov_b32 s2, 0xffff
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v4
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    s_mov_b32 s0, 0x12341234
-; GFX9-NEXT:    v_bfi_b32 v0, v1, s0, v0
-; GFX9-NEXT:    global_store_dword v[4:5], v0, off
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
+; GFX9-NEXT:    v_lshlrev_b32_e64 v2, v2, s2
+; GFX9-NEXT:    s_waitcnt vmcnt(0)
+; GFX9-NEXT:    v_bfi_b32 v2, v2, s0, v3
+; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_insertelement_v2f16_dynamic_vgpr:
@@ -1266,24 +1267,25 @@ define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(<2 x half> addrspa
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[4:5], 0x10
 ; VI-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v3, s5
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, s4, v4
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    flat_load_dword v1, v[2:3]
-; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
-; VI-NEXT:    s_mov_b32 s0, 0xffff
-; VI-NEXT:    v_mov_b32_e32 v5, s1
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 4, v1
-; VI-NEXT:    v_lshlrev_b32_e64 v1, v1, s0
+; VI-NEXT:    flat_load_dword v2, v[2:3]
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    s_mov_b32 s2, 0xffff
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
+; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    s_mov_b32 s0, 0x12341234
-; VI-NEXT:    v_bfi_b32 v0, v1, s0, v0
-; VI-NEXT:    flat_store_dword v[4:5], v0
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
+; VI-NEXT:    v_lshlrev_b32_e64 v2, v2, s2
+; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
+; VI-NEXT:    v_bfi_b32 v2, v2, s0, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: v_insertelement_v2f16_dynamic_vgpr:
@@ -1299,17 +1301,17 @@ define amdgpu_kernel void @v_insertelement_v2f16_dynamic_vgpr(<2 x half> addrspa
 ; CI-NEXT:    v_add_i32_e32 v2, vcc, s4, v4
 ; CI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; CI-NEXT:    flat_load_dword v2, v[2:3]
-; CI-NEXT:    flat_load_dword v0, v[0:1]
-; CI-NEXT:    v_add_i32_e32 v4, vcc, s0, v4
-; CI-NEXT:    v_mov_b32_e32 v5, s1
+; CI-NEXT:    flat_load_dword v3, v[0:1]
+; CI-NEXT:    v_add_i32_e32 v0, vcc, s0, v4
+; CI-NEXT:    v_mov_b32_e32 v1, s1
 ; CI-NEXT:    s_mov_b32 s0, 0x12341234
-; CI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; CI-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
-; CI-NEXT:    v_lshlrev_b32_e32 v1, 4, v2
-; CI-NEXT:    v_lshl_b32_e32 v1, 0xffff, v1
+; CI-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
+; CI-NEXT:    v_lshl_b32_e32 v2, 0xffff, v2
 ; CI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; CI-NEXT:    v_bfi_b32 v0, v1, s0, v0
-; CI-NEXT:    flat_store_dword v[4:5], v0
+; CI-NEXT:    v_bfi_b32 v2, v2, s0, v3
+; CI-NEXT:    flat_store_dword v[0:1], v2
 ; CI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() #1
   %tid.ext = sext i32 %tid to i64
@@ -1684,26 +1686,26 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(<4 x i16> addrspac
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; GFX9-NEXT:    s_load_dword s4, s[4:5], 0x10
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v2
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    global_load_dword v4, v[0:1], off
+; GFX9-NEXT:    global_load_dword v2, v[0:1], off
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
-; GFX9-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
-; GFX9-NEXT:    s_mov_b32 s1, 0
-; GFX9-NEXT:    s_mov_b32 s0, 0xffff
-; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    s_mov_b32 s3, 0
+; GFX9-NEXT:    s_mov_b32 s2, 0xffff
+; GFX9-NEXT:    v_mov_b32_e32 v5, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, s0, v4
+; GFX9-NEXT:    s_pack_ll_b32_b16 s1, s4, s4
+; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 4, v4
-; GFX9-NEXT:    v_lshlrev_b64 v[4:5], v4, s[0:1]
-; GFX9-NEXT:    s_pack_ll_b32_b16 s0, s4, s4
+; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
+; GFX9-NEXT:    v_lshlrev_b64 v[2:3], v2, s[2:3]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_bfi_b32 v1, v5, s0, v1
-; GFX9-NEXT:    v_bfi_b32 v0, v4, s0, v0
-; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT:    v_bfi_b32 v1, v3, s1, v1
+; GFX9-NEXT:    v_bfi_b32 v0, v2, s1, v0
+; GFX9-NEXT:    global_store_dwordx2 v[4:5], v[0:1], off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_insertelement_v4i16_dynamic_vgpr:
@@ -1717,17 +1719,17 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(<4 x i16> addrspac
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dword v4, v[0:1]
 ; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    s_mov_b32 s0, 0xffff
+; VI-NEXT:    s_mov_b32 s2, 0xffff
 ; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    s_and_b32 s2, s4, s0
-; VI-NEXT:    s_mov_b32 s1, 0
-; VI-NEXT:    s_lshl_b32 s3, s2, 16
+; VI-NEXT:    s_mov_b32 s3, 0
+; VI-NEXT:    s_and_b32 s1, s4, s2
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT:    s_lshl_b32 s0, s1, 16
+; VI-NEXT:    s_or_b32 s0, s1, s0
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
 ; VI-NEXT:    v_lshlrev_b32_e32 v4, 4, v4
-; VI-NEXT:    v_lshlrev_b64 v[4:5], v4, s[0:1]
-; VI-NEXT:    s_or_b32 s0, s2, s3
+; VI-NEXT:    v_lshlrev_b64 v[4:5], v4, s[2:3]
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_bfi_b32 v1, v5, s0, v1
 ; VI-NEXT:    v_bfi_b32 v0, v4, s0, v0
@@ -1736,26 +1738,26 @@ define amdgpu_kernel void @v_insertelement_v4i16_dynamic_vgpr(<4 x i16> addrspac
 ;
 ; CI-LABEL: v_insertelement_v4i16_dynamic_vgpr:
 ; CI:       ; %bb.0:
-; CI-NEXT:    flat_load_dword v4, v[0:1]
 ; CI-NEXT:    s_load_dwordx4 s[0:3], s[4:5], 0x0
 ; CI-NEXT:    s_load_dword s4, s[4:5], 0x4
 ; CI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
-; CI-NEXT:    s_mov_b32 s6, 0xffff
-; CI-NEXT:    s_mov_b32 s7, 0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    v_mov_b32_e32 v1, s3
 ; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v2
 ; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; CI-NEXT:    flat_load_dword v4, v[0:1]
 ; CI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; CI-NEXT:    s_mov_b32 s2, 0xffff
+; CI-NEXT:    s_mov_b32 s3, 0
 ; CI-NEXT:    v_mov_b32_e32 v3, s1
 ; CI-NEXT:    s_lshl_b32 s1, s4, 16
-; CI-NEXT:    s_and_b32 s3, s4, s6
+; CI-NEXT:    s_and_b32 s4, s4, s2
 ; CI-NEXT:    v_add_i32_e32 v2, vcc, s0, v2
-; CI-NEXT:    s_or_b32 s0, s3, s1
+; CI-NEXT:    s_or_b32 s0, s4, s1
 ; CI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; CI-NEXT:    s_waitcnt vmcnt(1)
+; CI-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
 ; CI-NEXT:    v_lshlrev_b32_e32 v4, 4, v4
-; CI-NEXT:    v_lshl_b64 v[4:5], s[6:7], v4
+; CI-NEXT:    v_lshl_b64 v[4:5], s[2:3], v4
 ; CI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; CI-NEXT:    v_bfi_b32 v1, v5, s0, v1
 ; CI-NEXT:    v_bfi_b32 v0, v4, s0, v0
@@ -1785,19 +1787,19 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
-; GFX9-NEXT:    s_pack_ll_b32_b16 s2, s4, s4
+; GFX9-NEXT:    s_pack_ll_b32_b16 s4, s4, s4
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    s_mov_b32 s3, 0
+; GFX9-NEXT:    s_mov_b32 s2, 0xffff
+; GFX9-NEXT:    s_lshl_b32 s1, s5, 4
 ; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
-; GFX9-NEXT:    s_mov_b32 s1, 0
-; GFX9-NEXT:    s_mov_b32 s0, 0xffff
-; GFX9-NEXT:    s_lshl_b32 s3, s5, 4
-; GFX9-NEXT:    v_mov_b32_e32 v4, s2
-; GFX9-NEXT:    s_lshl_b64 s[0:1], s[0:1], s3
-; GFX9-NEXT:    v_mov_b32_e32 v5, s2
+; GFX9-NEXT:    s_lshl_b64 s[0:1], s[2:3], s1
+; GFX9-NEXT:    v_mov_b32_e32 v4, s4
+; GFX9-NEXT:    v_mov_b32_e32 v5, s4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_bfi_b32 v1, s1, v5, v1
-; GFX9-NEXT:    v_bfi_b32 v0, s0, v4, v0
+; GFX9-NEXT:    v_bfi_b32 v1, s1, v4, v1
+; GFX9-NEXT:    v_bfi_b32 v0, s0, v5, v0
 ; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
 ; GFX9-NEXT:    s_endpgm
 ;
@@ -1807,19 +1809,19 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa
 ; VI-NEXT:    s_load_dwordx2 s[4:5], s[4:5], 0x10
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    s_mov_b32 s0, 0xffff
+; VI-NEXT:    s_mov_b32 s2, 0xffff
 ; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    s_mov_b32 s1, 0
-; VI-NEXT:    s_lshl_b32 s2, s5, 4
-; VI-NEXT:    s_and_b32 s3, s4, s0
-; VI-NEXT:    s_lshl_b64 s[0:1], s[0:1], s2
-; VI-NEXT:    s_lshl_b32 s2, s3, 16
-; VI-NEXT:    s_or_b32 s2, s3, s2
+; VI-NEXT:    s_mov_b32 s3, 0
+; VI-NEXT:    s_lshl_b32 s1, s5, 4
+; VI-NEXT:    s_and_b32 s4, s4, s2
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
+; VI-NEXT:    s_lshl_b64 s[0:1], s[2:3], s1
+; VI-NEXT:    s_lshl_b32 s2, s4, 16
+; VI-NEXT:    s_or_b32 s2, s4, s2
 ; VI-NEXT:    v_mov_b32_e32 v4, s2
 ; VI-NEXT:    v_mov_b32_e32 v5, s2
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
@@ -1839,15 +1841,15 @@ define amdgpu_kernel void @v_insertelement_v4f16_dynamic_sgpr(<4 x half> addrspa
 ; CI-NEXT:    v_add_i32_e32 v0, vcc, s2, v2
 ; CI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; CI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; CI-NEXT:    v_add_i32_e32 v2, vcc, s0, v2
-; CI-NEXT:    s_mov_b32 s0, 0xffff
-; CI-NEXT:    s_and_b32 s2, s4, s0
-; CI-NEXT:    s_lshl_b32 s4, s4, 16
+; CI-NEXT:    s_mov_b32 s2, 0xffff
 ; CI-NEXT:    v_mov_b32_e32 v3, s1
-; CI-NEXT:    s_or_b32 s2, s2, s4
-; CI-NEXT:    s_mov_b32 s1, 0
-; CI-NEXT:    s_lshl_b32 s3, s5, 4
-; CI-NEXT:    s_lshl_b64 s[0:1], s[0:1], s3
+; CI-NEXT:    s_and_b32 s6, s4, s2
+; CI-NEXT:    s_mov_b32 s3, 0
+; CI-NEXT:    s_lshl_b32 s1, s5, 4
+; CI-NEXT:    s_lshl_b32 s4, s4, 16
+; CI-NEXT:    v_add_i32_e32 v2, vcc, s0, v2
+; CI-NEXT:    s_lshl_b64 s[0:1], s[2:3], s1
+; CI-NEXT:    s_or_b32 s2, s6, s4
 ; CI-NEXT:    v_mov_b32_e32 v4, s2
 ; CI-NEXT:    v_mov_b32_e32 v5, s2
 ; CI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
index e47dc1ea61c92..42fc7ccbc15d4 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.buffer.atomic.ll
@@ -11,6 +11,7 @@
 ;CHECK: buffer_atomic_swap v0, v2, s[0:3], 0 offen glc
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK: buffer_atomic_swap v0, v[1:2], s[0:3], 0 idxen offen glc
+;SICI: v_mov_b32_e32 v1, 0x2000
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK: buffer_atomic_swap v0, v2, s[0:3], 0 offen offset:42 glc
 ;CHECK-DAG: s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
index e7668f0f76df2..822fd79c2ec38 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.struct.buffer.atomic.ll
@@ -79,7 +79,7 @@ main_body:
 ;CHECK-NOT: s_waitcnt
 ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 idxen glc
 ;CHECK: s_waitcnt vmcnt(0)
-;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, v2, s[0:3], 0 idxen glc
+;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, {{v[0-9]+}}, s[0:3], 0 idxen glc
 ;CHECK: s_waitcnt vmcnt(0)
 ;CHECK: s_movk_i32 [[SOFS:s[0-9]+]], 0x1ffc
 ;CHECK: buffer_atomic_cmpswap {{v\[[0-9]+:[0-9]+\]}}, {{v\[[0-9]+:[0-9]+\]}}, s[0:3], 0 idxen offen glc
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
index 3f3807b274dfb..5ba8edb2c1c04 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.ubfe.ll
@@ -1559,24 +1559,22 @@ define amdgpu_kernel void @simplify_bfe_u32_multi_use_arg(i32 addrspace(1)* %out
 ; VI-LABEL: simplify_bfe_u32_multi_use_arg:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_mov_b32 s10, s2
-; VI-NEXT:    s_mov_b32 s11, s3
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
+; VI-NEXT:    s_mov_b32 s11, 0xf000
+; VI-NEXT:    s_mov_b32 s10, -1
+; VI-NEXT:    s_mov_b32 s2, s10
+; VI-NEXT:    s_mov_b32 s3, s11
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
+; VI-NEXT:    buffer_load_dword v0, off, s[0:3], 0
+; VI-NEXT:    s_mov_b32 s8, s4
+; VI-NEXT:    s_mov_b32 s9, s5
+; VI-NEXT:    s_mov_b32 s0, s6
+; VI-NEXT:    s_mov_b32 s1, s7
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_and_b32_e32 v0, 63, v0
 ; VI-NEXT:    v_bfe_u32 v1, v0, 2, 2
-; VI-NEXT:    buffer_store_dword v1, off, s[0:3], 0
-; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; VI-NEXT:    buffer_store_dword v1, off, s[8:11], 0
+; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
                                             i32 addrspace(1)* %out1,
                                             i32 addrspace(1)* %in) #0 {
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
index 27002f6bfb8b5..b86f444440cec 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.cos.f16.ll
@@ -118,20 +118,20 @@ define amdgpu_kernel void @cos_v2f16(<2 x half> addrspace(1)* %r, <2 x half> add
 ; GFX9-LABEL: cos_v2f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x3118
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3118
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mul_f16_e32 v1, 0.15915494, v0
-; GFX9-NEXT:    v_cos_f16_e32 v3, v1
-; GFX9-NEXT:    v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    v_cos_f16_e32 v2, v0
+; GFX9-NEXT:    v_mul_f16_e32 v2, 0.15915494, v0
+; GFX9-NEXT:    v_cos_f16_e32 v2, v2
+; GFX9-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_cos_f16_e32 v3, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_lshl_or_b32 v2, v2, 16, v3
+; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
index 7e7af3e586a75..70d6c2c173c11 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fma.f16.ll
@@ -173,8 +173,8 @@ define amdgpu_kernel void @fma_v2f16(
 ; SI-DAG:  v_cvt_f16_f32_e32 v[[R_F16_0:[0-9]+]], v[[R_F32_0]]
 ; SI-DAG:  v_cvt_f16_f32_e32 v[[R_F16_1:[0-9]+]], v[[R_F32_1]]
 
-; VI-DAG:  v_fma_f16 v[[R_F16_1:[0-9]+]], v[[C_F16_1]], s[[A_F16]], v[[B_F16_1]]
-; VI-DAG:  v_fma_f16 v[[R_F16_0:[0-9]+]], v[[C_V2_F16]], s[[A_F16]], v[[B_V2_F16]]
+; VI-DAG:  v_fma_f16 v[[R_F16_1:[0-9]+]], v[[B_F16_1]], s[[A_F16]], v[[C_F16_1]]
+; VI-DAG:  v_fma_f16 v[[R_F16_0:[0-9]+]], v[[B_V2_F16]], s[[A_F16]], v[[C_V2_F16]]
 
 ; GFX9: v_pk_fma_f16 v[[R_V2_F16:[0-9]+]], v[[C_V2_F16]], s[[A_F16]], v[[B_V2_F16]]
 
@@ -198,8 +198,9 @@ define amdgpu_kernel void @fma_v2f16_imm_a(
 ; SI: buffer_load_dword v[[C_V2_F16:[0-9]+]]
 ; SI: buffer_load_dword v[[A_V2_F16:[0-9]+]]
 
-; VIGFX9: buffer_load_dword v[[A_V2_F16:[0-9]+]]
-; VIGFX9: buffer_load_dword v[[C_V2_F16:[0-9]+]]
+; VI:      buffer_load_dword v[[C_V2_F16:[0-9]+]]
+; VIGFX9:  buffer_load_dword v[[A_V2_F16:[0-9]+]]
+; GFX9:    buffer_load_dword v[[C_V2_F16:[0-9]+]]
 
 ; SI:  s_mov_b32 s[[B_F32:[0-9]+]], 0x40400000{{$}}
 ; VIGFX9:  s_movk_i32 s[[B_F16:[0-9]+]], 0x4200{{$}}
@@ -243,8 +244,9 @@ define amdgpu_kernel void @fma_v2f16_imm_b(
 ; SI: buffer_load_dword v[[B_V2_F16:[0-9]+]]
 ; SI: buffer_load_dword v[[A_V2_F16:[0-9]+]]
 
-; VIGFX9: buffer_load_dword v[[A_V2_F16:[0-9]+]]
+; GFX9:   buffer_load_dword v[[A_V2_F16:[0-9]+]]
 ; VIGFX9: buffer_load_dword v[[B_V2_F16:[0-9]+]]
+; VI:     buffer_load_dword v[[A_V2_F16:[0-9]+]]
 
 ; SI:  s_mov_b32 s[[C_F32:[0-9]+]], 0x40400000{{$}}
 ; VIGFX9:  s_movk_i32 s[[C_F16:[0-9]+]], 0x4200{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
index c0c6f4f4b93b0..60f1e71a7a222 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.maxnum.f16.ll
@@ -43,17 +43,17 @@ define amdgpu_kernel void @maxnum_f16(
 ; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_mov_b32 s10, s2
+; VI-NEXT:    s_mov_b32 s14, s2
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
+; VI-NEXT:    s_mov_b32 s12, s6
+; VI-NEXT:    s_mov_b32 s13, s7
+; VI-NEXT:    s_mov_b32 s15, s3
+; VI-NEXT:    s_mov_b32 s10, s2
 ; VI-NEXT:    s_mov_b32 s11, s3
-; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
+; VI-NEXT:    buffer_load_ushort v0, off, s[12:15], 0
 ; VI-NEXT:    buffer_load_ushort v1, off, s[8:11], 0
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
 ; VI-NEXT:    s_waitcnt vmcnt(1)
 ; VI-NEXT:    v_max_f16_e32 v0, v0, v0
 ; VI-NEXT:    s_waitcnt vmcnt(0)
@@ -68,17 +68,17 @@ define amdgpu_kernel void @maxnum_f16(
 ; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    s_mov_b32 s10, s2
+; GFX9-NEXT:    s_mov_b32 s14, s2
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_mov_b32 s1, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s2
-; GFX9-NEXT:    s_mov_b32 s7, s3
+; GFX9-NEXT:    s_mov_b32 s12, s6
+; GFX9-NEXT:    s_mov_b32 s13, s7
+; GFX9-NEXT:    s_mov_b32 s15, s3
+; GFX9-NEXT:    s_mov_b32 s10, s2
 ; GFX9-NEXT:    s_mov_b32 s11, s3
-; GFX9-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
+; GFX9-NEXT:    buffer_load_ushort v0, off, s[12:15], 0
 ; GFX9-NEXT:    buffer_load_ushort v1, off, s[8:11], 0
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -292,17 +292,17 @@ define amdgpu_kernel void @maxnum_v2f16(
 ; GFX9-LABEL: maxnum_v2f16:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_load_dword s6, s[6:7], 0x0
+; GFX9-NEXT:    s_load_dword s7, s[0:1], 0x0
 ; GFX9-NEXT:    s_mov_b32 s0, s4
 ; GFX9-NEXT:    s_mov_b32 s1, s5
-; GFX9-NEXT:    s_load_dword s4, s[6:7], 0x0
-; GFX9-NEXT:    s_load_dword s5, s[8:9], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v1, s4, s4
-; GFX9-NEXT:    v_pk_max_f16 v0, s5, s5
+; GFX9-NEXT:    v_pk_max_f16 v1, s6, s6
+; GFX9-NEXT:    v_pk_max_f16 v0, s7, s7
 ; GFX9-NEXT:    v_pk_max_f16 v0, v1, v0
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
@@ -362,18 +362,18 @@ define amdgpu_kernel void @maxnum_v2f16_imm_a(
 ;
 ; GFX9-LABEL: maxnum_v2f16_imm_a:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_load_dword s4, s[6:7], 0x0
-; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s0, 0x44004200
+; GFX9-NEXT:    s_mov_b32 s5, s1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, s4, s4
-; GFX9-NEXT:    s_mov_b32 s4, 0x44004200
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, s4
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    v_pk_max_f16 v0, s2, s2
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, s0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
     <2 x half> addrspace(1)* %r,
     <2 x half> addrspace(1)* %b) #0 {
@@ -429,18 +429,18 @@ define amdgpu_kernel void @maxnum_v2f16_imm_b(
 ;
 ; GFX9-LABEL: maxnum_v2f16_imm_b:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_load_dword s4, s[6:7], 0x0
-; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s0, 0x42004400
+; GFX9-NEXT:    s_mov_b32 s5, s1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, s4, s4
-; GFX9-NEXT:    s_mov_b32 s4, 0x42004400
-; GFX9-NEXT:    v_pk_max_f16 v0, v0, s4
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    v_pk_max_f16 v0, s2, s2
+; GFX9-NEXT:    v_pk_max_f16 v0, v0, s0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
     <2 x half> addrspace(1)* %r,
     <2 x half> addrspace(1)* %a) #0 {
@@ -735,12 +735,12 @@ define amdgpu_kernel void @fmax_v4f16_imm_a(
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
 ; GFX9-NEXT:    s_mov_b32 s0, s4
 ; GFX9-NEXT:    s_mov_b32 s1, s5
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, s5, s5
-; GFX9-NEXT:    v_pk_max_f16 v2, s4, s4
+; GFX9-NEXT:    v_pk_max_f16 v0, s7, s7
+; GFX9-NEXT:    v_pk_max_f16 v2, s6, s6
 ; GFX9-NEXT:    v_pk_max_f16 v1, v0, s8
 ; GFX9-NEXT:    v_pk_max_f16 v0, v2, s9
 ; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
index fd3e3212a8ceb..6b0811d4bcd97 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.minnum.f16.ll
@@ -43,17 +43,17 @@ define amdgpu_kernel void @minnum_f16_ieee(
 ; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
-; VI-NEXT:    s_mov_b32 s10, s2
+; VI-NEXT:    s_mov_b32 s14, s2
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
+; VI-NEXT:    s_mov_b32 s12, s6
+; VI-NEXT:    s_mov_b32 s13, s7
+; VI-NEXT:    s_mov_b32 s15, s3
+; VI-NEXT:    s_mov_b32 s10, s2
 ; VI-NEXT:    s_mov_b32 s11, s3
-; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
+; VI-NEXT:    buffer_load_ushort v0, off, s[12:15], 0
 ; VI-NEXT:    buffer_load_ushort v1, off, s[8:11], 0
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
 ; VI-NEXT:    s_waitcnt vmcnt(1)
 ; VI-NEXT:    v_max_f16_e32 v0, v0, v0
 ; VI-NEXT:    s_waitcnt vmcnt(0)
@@ -68,17 +68,17 @@ define amdgpu_kernel void @minnum_f16_ieee(
 ; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
-; GFX9-NEXT:    s_mov_b32 s10, s2
+; GFX9-NEXT:    s_mov_b32 s14, s2
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_mov_b32 s1, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s2
-; GFX9-NEXT:    s_mov_b32 s7, s3
+; GFX9-NEXT:    s_mov_b32 s12, s6
+; GFX9-NEXT:    s_mov_b32 s13, s7
+; GFX9-NEXT:    s_mov_b32 s15, s3
+; GFX9-NEXT:    s_mov_b32 s10, s2
 ; GFX9-NEXT:    s_mov_b32 s11, s3
-; GFX9-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
+; GFX9-NEXT:    buffer_load_ushort v0, off, s[12:15], 0
 ; GFX9-NEXT:    buffer_load_ushort v1, off, s[8:11], 0
+; GFX9-NEXT:    s_mov_b32 s0, s4
+; GFX9-NEXT:    s_mov_b32 s1, s5
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-NEXT:    v_max_f16_e32 v0, v0, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -315,17 +315,17 @@ define amdgpu_kernel void @minnum_v2f16_ieee(
 ; GFX9-LABEL: minnum_v2f16_ieee:
 ; GFX9:       ; %bb.0: ; %entry
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_load_dword s6, s[6:7], 0x0
+; GFX9-NEXT:    s_load_dword s7, s[0:1], 0x0
 ; GFX9-NEXT:    s_mov_b32 s0, s4
 ; GFX9-NEXT:    s_mov_b32 s1, s5
-; GFX9-NEXT:    s_load_dword s4, s[6:7], 0x0
-; GFX9-NEXT:    s_load_dword s5, s[8:9], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v1, s4, s4
-; GFX9-NEXT:    v_pk_max_f16 v0, s5, s5
+; GFX9-NEXT:    v_pk_max_f16 v1, s6, s6
+; GFX9-NEXT:    v_pk_max_f16 v0, s7, s7
 ; GFX9-NEXT:    v_pk_min_f16 v0, v1, v0
 ; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
@@ -415,18 +415,18 @@ define amdgpu_kernel void @minnum_v2f16_imm_a(
 ;
 ; GFX9-LABEL: minnum_v2f16_imm_a:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_load_dword s4, s[6:7], 0x0
-; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s0, 0x44004200
+; GFX9-NEXT:    s_mov_b32 s5, s1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, s4, s4
-; GFX9-NEXT:    s_mov_b32 s4, 0x44004200
-; GFX9-NEXT:    v_pk_min_f16 v0, v0, s4
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    v_pk_max_f16 v0, s2, s2
+; GFX9-NEXT:    v_pk_min_f16 v0, v0, s0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
     <2 x half> addrspace(1)* %r,
     <2 x half> addrspace(1)* %b) #0 {
@@ -482,18 +482,18 @@ define amdgpu_kernel void @minnum_v2f16_imm_b(
 ;
 ; GFX9-LABEL: minnum_v2f16_imm_b:
 ; GFX9:       ; %bb.0: ; %entry
-; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_load_dword s4, s[6:7], 0x0
-; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_load_dword s2, s[2:3], 0x0
+; GFX9-NEXT:    s_mov_b32 s4, s0
+; GFX9-NEXT:    s_mov_b32 s0, 0x42004400
+; GFX9-NEXT:    s_mov_b32 s5, s1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, s4, s4
-; GFX9-NEXT:    s_mov_b32 s4, 0x42004400
-; GFX9-NEXT:    v_pk_min_f16 v0, v0, s4
-; GFX9-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; GFX9-NEXT:    v_pk_max_f16 v0, s2, s2
+; GFX9-NEXT:    v_pk_min_f16 v0, v0, s0
+; GFX9-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
     <2 x half> addrspace(1)* %r,
     <2 x half> addrspace(1)* %a) #0 {
@@ -788,12 +788,12 @@ define amdgpu_kernel void @fmin_v4f16_imm_a(
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_load_dwordx2 s[6:7], s[6:7], 0x0
 ; GFX9-NEXT:    s_mov_b32 s0, s4
 ; GFX9-NEXT:    s_mov_b32 s1, s5
-; GFX9-NEXT:    s_load_dwordx2 s[4:5], s[6:7], 0x0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_pk_max_f16 v0, s5, s5
-; GFX9-NEXT:    v_pk_max_f16 v2, s4, s4
+; GFX9-NEXT:    v_pk_max_f16 v0, s7, s7
+; GFX9-NEXT:    v_pk_max_f16 v2, s6, s6
 ; GFX9-NEXT:    v_pk_min_f16 v1, v0, s8
 ; GFX9-NEXT:    v_pk_min_f16 v0, v2, s9
 ; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
index 7d0d4eee1f042..76a218760e8e8 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.round.f64.ll
@@ -77,15 +77,15 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; SI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
-; SI-NEXT:    s_movk_i32 s9, 0xfc01
-; SI-NEXT:    s_mov_b32 s7, 0xfffff
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_brev_b32 s8, -2
+; SI-NEXT:    s_movk_i32 s7, 0xfc01
+; SI-NEXT:    s_mov_b32 s1, 0xfffff
+; SI-NEXT:    s_mov_b32 s0, -1
+; SI-NEXT:    s_brev_b32 s6, -2
 ; SI-NEXT:    v_mov_b32_e32 v8, 0x3ff00000
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_bfe_u32 v4, v3, 20, 11
-; SI-NEXT:    v_add_i32_e32 v6, vcc, s9, v4
-; SI-NEXT:    v_lshr_b64 v[4:5], s[6:7], v6
+; SI-NEXT:    v_add_i32_e32 v6, vcc, s7, v4
+; SI-NEXT:    v_lshr_b64 v[4:5], s[0:1], v6
 ; SI-NEXT:    v_and_b32_e32 v7, 0x80000000, v3
 ; SI-NEXT:    v_not_b32_e32 v4, v4
 ; SI-NEXT:    v_not_b32_e32 v5, v5
@@ -98,7 +98,7 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa
 ; SI-NEXT:    v_cndmask_b32_e32 v5, v5, v3, vcc
 ; SI-NEXT:    v_cndmask_b32_e32 v4, v4, v2, vcc
 ; SI-NEXT:    v_add_f64 v[6:7], v[2:3], -v[4:5]
-; SI-NEXT:    v_bfi_b32 v2, s8, v8, v3
+; SI-NEXT:    v_bfi_b32 v2, s6, v8, v3
 ; SI-NEXT:    v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5
 ; SI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; SI-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
@@ -117,14 +117,14 @@ define amdgpu_kernel void @v_round_f64(double addrspace(1)* %out, double addrspa
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; CI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
-; CI-NEXT:    s_brev_b32 s6, -2
+; CI-NEXT:    s_brev_b32 s0, -2
 ; CI-NEXT:    v_mov_b32_e32 v8, 0x3ff00000
+; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_trunc_f64_e32 v[4:5], v[2:3]
 ; CI-NEXT:    v_add_f64 v[6:7], v[2:3], -v[4:5]
-; CI-NEXT:    v_bfi_b32 v2, s6, v8, v3
+; CI-NEXT:    v_bfi_b32 v2, s0, v8, v3
 ; CI-NEXT:    v_cmp_ge_f64_e64 vcc, |v[6:7]|, 0.5
-; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    v_cndmask_b32_e32 v3, 0, v2, vcc
 ; CI-NEXT:    v_mov_b32_e32 v2, 0
 ; CI-NEXT:    v_add_f64 v[2:3], v[4:5], v[2:3]
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
index bd08e37030284..f04c5b2ebf7ab 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.sin.f16.ll
@@ -118,20 +118,20 @@ define amdgpu_kernel void @sin_v2f16(<2 x half> addrspace(1)* %r, <2 x half> add
 ; GFX9-LABEL: sin_v2f16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT:    v_mov_b32_e32 v2, 0x3118
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    global_load_dword v0, v[0:1], off
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0x3118
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mul_f16_e32 v1, 0.15915494, v0
-; GFX9-NEXT:    v_sin_f16_e32 v3, v1
-; GFX9-NEXT:    v_mul_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; GFX9-NEXT:    v_sin_f16_e32 v2, v0
+; GFX9-NEXT:    v_mul_f16_e32 v2, 0.15915494, v0
+; GFX9-NEXT:    v_sin_f16_e32 v2, v2
+; GFX9-NEXT:    v_mul_f16_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; GFX9-NEXT:    v_sin_f16_e32 v3, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_and_b32_e32 v3, 0xffff, v3
+; GFX9-NEXT:    v_and_b32_e32 v2, 0xffff, v2
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_lshl_or_b32 v2, v2, 16, v3
+; GFX9-NEXT:    v_lshl_or_b32 v2, v3, 16, v2
 ; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %a.val = load <2 x half>, <2 x half> addrspace(1)* %a
diff --git a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
index ae8dad231a122..2ac06d9240d22 100644
--- a/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshl64-to-32.ll
@@ -119,12 +119,12 @@ define amdgpu_kernel void @muli24_shl64(i64 addrspace(1)* nocapture %arg, i32 ad
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 2, v0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b64 s[0:1], s[6:7]
+; GCN-NEXT:    buffer_load_dword v1, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    v_lshlrev_b32_e32 v3, 3, v0
-; GCN-NEXT:    buffer_load_dword v0, v[1:2], s[0:3], 0 addr64
 ; GCN-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GCN-NEXT:    v_mov_b32_e32 v4, v2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_or_b32_e32 v0, 0x800000, v0
+; GCN-NEXT:    v_or_b32_e32 v0, 0x800000, v1
 ; GCN-NEXT:    v_mul_i32_i24_e32 v0, -7, v0
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 3, v0
 ; GCN-NEXT:    buffer_store_dwordx2 v[1:2], v[3:4], s[4:7], 0 addr64
diff --git a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
index ee9f8fa49a27f..a6196ce7e5708 100644
--- a/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/lshr.v2i16.ll
@@ -87,23 +87,23 @@ define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; VI-LABEL: v_lshr_v2i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 4, v0
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v5, v[0:1]
+; VI-NEXT:    flat_load_dword v2, v[2:3]
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    v_add_u32_e32 v4, vcc, 4, v0
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    flat_load_dword v1, v[4:5]
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshrrev_b16_e32 v4, v1, v0
-; VI-NEXT:    v_lshrrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_or_b32_e32 v0, v4, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_lshrrev_b16_e32 v3, v2, v5
+; VI-NEXT:    v_lshrrev_b16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: v_lshr_v2i16:
@@ -117,14 +117,14 @@ define amdgpu_kernel void @v_lshr_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; CI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4
-; CI-NEXT:    s_mov_b32 s8, 0xffff
+; CI-NEXT:    s_mov_b32 s0, 0xffff
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(1)
 ; CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; CI-NEXT:    v_and_b32_e32 v2, s8, v2
-; CI-NEXT:    v_and_b32_e32 v3, s8, v3
+; CI-NEXT:    v_and_b32_e32 v2, s0, v2
+; CI-NEXT:    v_and_b32_e32 v3, s0, v3
 ; CI-NEXT:    v_lshr_b32_e32 v2, v2, v3
 ; CI-NEXT:    v_lshr_b32_e32 v3, v4, v5
 ; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
@@ -171,39 +171,39 @@ define amdgpu_kernel void @lshr_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    flat_load_dword v3, v[0:1]
 ; VI-NEXT:    s_lshr_b32 s1, s0, 16
-; VI-NEXT:    v_mov_b32_e32 v4, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s5
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s4, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s4, v2
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshrrev_b16_e32 v1, s0, v0
-; VI-NEXT:    v_lshrrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_lshrrev_b16_e32 v4, s0, v3
+; VI-NEXT:    v_lshrrev_b16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_e32 v2, v4, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: lshr_v_s_v2i16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dword s0, s[0:1], 0xd
-; CI-NEXT:    s_mov_b32 s8, 0xffff
+; CI-NEXT:    s_load_dword s8, s[0:1], 0xd
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_lshr_b32 s9, s0, 16
-; CI-NEXT:    s_and_b32 s10, s0, s8
 ; CI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
+; CI-NEXT:    s_mov_b32 s0, 0xffff
+; CI-NEXT:    s_lshr_b32 s1, s8, 16
+; CI-NEXT:    s_and_b32 s8, s8, s0
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; CI-NEXT:    v_and_b32_e32 v2, s8, v2
-; CI-NEXT:    v_lshrrev_b32_e32 v3, s9, v3
-; CI-NEXT:    v_lshrrev_b32_e32 v2, s10, v2
+; CI-NEXT:    v_and_b32_e32 v2, s0, v2
+; CI-NEXT:    v_lshrrev_b32_e32 v3, s1, v3
+; CI-NEXT:    v_lshrrev_b32_e32 v2, s8, v2
 ; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; CI-NEXT:    v_or_b32_e32 v2, v2, v3
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
@@ -246,39 +246,39 @@ define amdgpu_kernel void @lshr_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    flat_load_dword v3, v[0:1]
 ; VI-NEXT:    s_lshr_b32 s1, s0, 16
-; VI-NEXT:    v_mov_b32_e32 v4, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s5
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s4, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s4, v2
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshrrev_b16_e64 v1, v0, s0
-; VI-NEXT:    v_lshrrev_b16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_lshrrev_b16_e64 v4, v3, s0
+; VI-NEXT:    v_lshrrev_b16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v4, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: lshr_s_v_v2i16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dword s0, s[0:1], 0xd
-; CI-NEXT:    s_mov_b32 s8, 0xffff
+; CI-NEXT:    s_load_dword s8, s[0:1], 0xd
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_lshr_b32 s9, s0, 16
-; CI-NEXT:    s_and_b32 s10, s0, s8
 ; CI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
+; CI-NEXT:    s_mov_b32 s0, 0xffff
+; CI-NEXT:    s_lshr_b32 s1, s8, 16
+; CI-NEXT:    s_and_b32 s8, s8, s0
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; CI-NEXT:    v_and_b32_e32 v2, s8, v2
-; CI-NEXT:    v_lshr_b32_e32 v3, s9, v3
-; CI-NEXT:    v_lshr_b32_e32 v2, s10, v2
+; CI-NEXT:    v_and_b32_e32 v2, s0, v2
+; CI-NEXT:    v_lshr_b32_e32 v3, s1, v3
+; CI-NEXT:    v_lshr_b32_e32 v2, s8, v2
 ; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; CI-NEXT:    v_or_b32_e32 v2, v2, v3
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
@@ -320,15 +320,15 @@ define amdgpu_kernel void @lshr_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshrrev_b16_e64 v1, v0, 8
-; VI-NEXT:    v_lshrrev_b16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_lshrrev_b16_e64 v2, v3, 8
+; VI-NEXT:    v_lshrrev_b16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: lshr_imm_v_v2i16:
@@ -428,45 +428,45 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 ; GFX9-LABEL: v_lshr_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v2
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off
+; GFX9-NEXT:    global_load_dwordx2 v[2:3], v[0:1], off
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off offset:8
-; GFX9-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v5, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, s0, v4
+; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v1, v5
-; GFX9-NEXT:    v_pk_lshrrev_b16 v0, v0, v4
-; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT:    v_pk_lshrrev_b16 v1, v1, v3
+; GFX9-NEXT:    v_pk_lshrrev_b16 v0, v0, v2
+; GFX9-NEXT:    global_store_dwordx2 v[4:5], v[0:1], off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_lshr_v4i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    v_add_u32_e32 v4, vcc, 8, v0
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 8, v0
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; VI-NEXT:    flat_load_dwordx2 v[4:5], v[4:5]
+; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshrrev_b16_e32 v6, v5, v1
-; VI-NEXT:    v_lshrrev_b16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_lshrrev_b16_e32 v5, v4, v0
-; VI-NEXT:    v_lshrrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_lshrrev_b16_e32 v6, v3, v1
+; VI-NEXT:    v_lshrrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_lshrrev_b16_e32 v3, v2, v0
+; VI-NEXT:    v_lshrrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NEXT:    v_or_b32_e32 v1, v6, v1
-; VI-NEXT:    v_or_b32_e32 v0, v5, v0
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    v_or_b32_e32 v0, v3, v0
+; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: v_lshr_v4i16:
@@ -480,7 +480,7 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 ; CI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; CI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8
-; CI-NEXT:    s_mov_b32 s8, 0xffff
+; CI-NEXT:    s_mov_b32 s0, 0xffff
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(1)
 ; CI-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
@@ -488,10 +488,10 @@ define amdgpu_kernel void @v_lshr_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16>
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v8, 16, v4
 ; CI-NEXT:    v_lshrrev_b32_e32 v9, 16, v5
-; CI-NEXT:    v_and_b32_e32 v2, s8, v2
-; CI-NEXT:    v_and_b32_e32 v4, s8, v4
-; CI-NEXT:    v_and_b32_e32 v3, s8, v3
-; CI-NEXT:    v_and_b32_e32 v5, s8, v5
+; CI-NEXT:    v_and_b32_e32 v2, s0, v2
+; CI-NEXT:    v_and_b32_e32 v4, s0, v4
+; CI-NEXT:    v_and_b32_e32 v3, s0, v3
+; CI-NEXT:    v_and_b32_e32 v5, s0, v5
 ; CI-NEXT:    v_lshr_b32_e32 v3, v3, v5
 ; CI-NEXT:    v_lshr_b32_e32 v5, v7, v9
 ; CI-NEXT:    v_lshr_b32_e32 v2, v2, v4
@@ -565,13 +565,13 @@ define amdgpu_kernel void @lshr_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; CI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
-; CI-NEXT:    s_mov_b32 s8, 0xff00ff
+; CI-NEXT:    s_mov_b32 s0, 0xff00ff
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v3, 8, v3
 ; CI-NEXT:    v_lshrrev_b32_e32 v2, 8, v2
-; CI-NEXT:    v_and_b32_e32 v3, s8, v3
-; CI-NEXT:    v_and_b32_e32 v2, s8, v2
+; CI-NEXT:    v_and_b32_e32 v3, s0, v3
+; CI-NEXT:    v_and_b32_e32 v2, s0, v2
 ; CI-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
 ; CI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
diff --git a/llvm/test/CodeGen/AMDGPU/madak.ll b/llvm/test/CodeGen/AMDGPU/madak.ll
index c92244b4f9903..d7134729c149f 100644
--- a/llvm/test/CodeGen/AMDGPU/madak.ll
+++ b/llvm/test/CodeGen/AMDGPU/madak.ll
@@ -39,7 +39,8 @@ define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float add
 ; it.
 
 ; GCN-LABEL: {{^}}madak_2_use_f32:
-; GFX8_9_10:    v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
+; GFX9:         v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
+; GFX10:        v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
 ; GFX6-DAG:     buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}}
 ; GFX6-DAG:     buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
 ; GFX6-DAG:     buffer_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8
@@ -47,6 +48,7 @@ define amdgpu_kernel void @madak_f32(float addrspace(1)* noalias %out, float add
 ; GFX8_9_10:    {{flat|global}}_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}
 ; GFX8_9_10:    {{flat|global}}_load_dword [[VC:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}
 ; GFX6-DAG:     v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
+; GFX8-DAG:     v_mov_b32_e32 [[VK:v[0-9]+]], 0x41200000
 ; GFX6_8_9-DAG: v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; GFX10-MAD-DAG:v_madak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
 ; FMA-DAG:      v_fmaak_f32 {{v[0-9]+}}, [[VA]], [[VB]], 0x41200000
diff --git a/llvm/test/CodeGen/AMDGPU/max.i16.ll b/llvm/test/CodeGen/AMDGPU/max.i16.ll
index 7f7f2bae57aea..b77bb5bb14b57 100644
--- a/llvm/test/CodeGen/AMDGPU/max.i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/max.i16.ll
@@ -73,16 +73,16 @@ define amdgpu_kernel void @v_test_imax_sge_v2i16(<2 x i16> addrspace(1)* %out, <
 ; VI-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    flat_load_dword v1, v[2:3]
-; VI-NEXT:    v_mov_b32_e32 v5, s5
-; VI-NEXT:    v_add_u32_e32 v4, vcc, s4, v4
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; VI-NEXT:    flat_load_dword v5, v[0:1]
+; VI-NEXT:    flat_load_dword v2, v[2:3]
+; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s4, v4
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_i16_e32 v2, v0, v1
-; VI-NEXT:    v_max_i16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_or_b32_e32 v0, v2, v0
-; VI-NEXT:    flat_store_dword v[4:5], v0
+; VI-NEXT:    v_max_i16_e32 v3, v5, v2
+; VI-NEXT:    v_max_i16_sdwa v2, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_imax_sge_v2i16:
@@ -124,63 +124,64 @@ define amdgpu_kernel void @v_test_imax_sge_v3i16(<3 x i16> addrspace(1)* %out, <
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; VI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v6, 3, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v4
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v6
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v4
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v6
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    v_mov_b32_e32 v5, s5
-; VI-NEXT:    v_add_u32_e32 v4, vcc, s4, v4
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
-; VI-NEXT:    v_add_u32_e32 v6, vcc, 4, v0
-; VI-NEXT:    v_addc_u32_e32 v7, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_ushort v6, v[6:7]
-; VI-NEXT:    flat_load_dword v7, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v4, vcc, 4, v0
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_ushort v4, v[4:5]
+; VI-NEXT:    flat_load_dword v5, v[0:1]
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, 4, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
-; VI-NEXT:    flat_load_ushort v0, v[0:1]
-; VI-NEXT:    flat_load_dword v8, v[2:3]
-; VI-NEXT:    v_add_u32_e32 v2, vcc, 4, v4
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v5, vcc
+; VI-NEXT:    flat_load_dword v7, v[2:3]
+; VI-NEXT:    flat_load_ushort v8, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s4, v6
+; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 4, v0
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
-; VI-NEXT:    v_max_i16_e32 v0, v6, v0
+; VI-NEXT:    v_max_i16_e32 v6, v5, v7
+; VI-NEXT:    v_max_i16_sdwa v5, v5, v7 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_i16_e32 v1, v7, v8
-; VI-NEXT:    v_max_i16_sdwa v7, v7, v8 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_or_b32_e32 v1, v1, v7
-; VI-NEXT:    flat_store_short v[2:3], v0
-; VI-NEXT:    flat_store_dword v[4:5], v1
+; VI-NEXT:    v_max_i16_e32 v4, v4, v8
+; VI-NEXT:    v_or_b32_e32 v5, v6, v5
+; VI-NEXT:    flat_store_short v[2:3], v4
+; VI-NEXT:    flat_store_dword v[0:1], v5
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_imax_sge_v3i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
-; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v5, 3, v0
+; GFX9-NEXT:    v_mov_b32_e32 v4, 0
 ; GFX9-NEXT:    v_mov_b32_e32 v6, 0
-; GFX9-NEXT:    v_mov_b32_e32 v7, 0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s7
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v4
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v5
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v4
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v5
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
-; GFX9-NEXT:    global_load_short_d16 v7, v[0:1], off offset:4
-; GFX9-NEXT:    global_load_dword v0, v[0:1], off
-; GFX9-NEXT:    global_load_short_d16 v6, v[2:3], off offset:4
-; GFX9-NEXT:    global_load_dword v1, v[2:3], off
-; GFX9-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, s4, v4
-; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
+; GFX9-NEXT:    global_load_short_d16 v6, v[0:1], off offset:4
+; GFX9-NEXT:    global_load_dword v7, v[0:1], off
+; GFX9-NEXT:    global_load_short_d16 v4, v[2:3], off offset:4
+; GFX9-NEXT:    global_load_dword v2, v[2:3], off
+; GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s4, v5
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    s_waitcnt vmcnt(1)
+; GFX9-NEXT:    v_pk_max_i16 v3, v6, v4
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_max_i16 v0, v0, v1
-; GFX9-NEXT:    v_pk_max_i16 v1, v7, v6
-; GFX9-NEXT:    global_store_short v[4:5], v1, off offset:4
-; GFX9-NEXT:    global_store_dword v[4:5], v0, off
+; GFX9-NEXT:    v_pk_max_i16 v2, v7, v2
+; GFX9-NEXT:    global_store_short v[0:1], v3, off offset:4
+; GFX9-NEXT:    global_store_dword v[0:1], v2, off
 ; GFX9-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep0 = getelementptr <3 x i16>, <3 x i16> addrspace(1)* %aptr, i32 %tid
@@ -441,16 +442,16 @@ define amdgpu_kernel void @v_test_umax_ugt_v2i16(<2 x i16> addrspace(1)* %out, <
 ; VI-NEXT:    v_mov_b32_e32 v3, s1
 ; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    flat_load_dword v1, v[2:3]
-; VI-NEXT:    v_mov_b32_e32 v5, s5
-; VI-NEXT:    v_add_u32_e32 v4, vcc, s4, v4
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
+; VI-NEXT:    flat_load_dword v5, v[0:1]
+; VI-NEXT:    flat_load_dword v2, v[2:3]
+; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s4, v4
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_max_u16_e32 v2, v0, v1
-; VI-NEXT:    v_max_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_or_b32_e32 v0, v2, v0
-; VI-NEXT:    flat_store_dword v[4:5], v0
+; VI-NEXT:    v_max_u16_e32 v3, v5, v2
+; VI-NEXT:    v_max_u16_sdwa v2, v5, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_umax_ugt_v2i16:
diff --git a/llvm/test/CodeGen/AMDGPU/memory_clause.ll b/llvm/test/CodeGen/AMDGPU/memory_clause.ll
index 5a435f01925c6..a5baa34ea3c79 100644
--- a/llvm/test/CodeGen/AMDGPU/memory_clause.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory_clause.ll
@@ -108,46 +108,56 @@ define void @mubuf_clause(<4 x i32> addrspace(5)* noalias nocapture readonly %ar
 ; GCN-NEXT:    v_and_b32_e32 v2, 0x3ff, v2
 ; GCN-NEXT:    v_lshlrev_b32_e32 v2, 4, v2
 ; GCN-NEXT:    v_add_u32_e32 v0, v0, v2
+; GCN-NEXT:    buffer_load_dword v3, v0, s[0:3], 0 offen
+; GCN-NEXT:    buffer_load_dword v4, v0, s[0:3], 0 offen offset:4
+; GCN-NEXT:    buffer_load_dword v5, v0, s[0:3], 0 offen offset:8
+; GCN-NEXT:    buffer_load_dword v6, v0, s[0:3], 0 offen offset:12
+; GCN-NEXT:    buffer_load_dword v7, v0, s[0:3], 0 offen offset:16
+; GCN-NEXT:    buffer_load_dword v8, v0, s[0:3], 0 offen offset:20
+; GCN-NEXT:    buffer_load_dword v9, v0, s[0:3], 0 offen offset:24
+; GCN-NEXT:    buffer_load_dword v10, v0, s[0:3], 0 offen offset:28
+; GCN-NEXT:    buffer_load_dword v11, v0, s[0:3], 0 offen offset:32
+; GCN-NEXT:    buffer_load_dword v12, v0, s[0:3], 0 offen offset:36
+; GCN-NEXT:    buffer_load_dword v13, v0, s[0:3], 0 offen offset:40
+; GCN-NEXT:    buffer_load_dword v14, v0, s[0:3], 0 offen offset:44
+; GCN-NEXT:    buffer_load_dword v15, v0, s[0:3], 0 offen offset:48
+; GCN-NEXT:    buffer_load_dword v16, v0, s[0:3], 0 offen offset:52
+; GCN-NEXT:    buffer_load_dword v17, v0, s[0:3], 0 offen offset:56
 ; GCN-NEXT:    v_add_u32_e32 v1, v1, v2
-; GCN-NEXT:    buffer_load_dword v6, v0, s[0:3], 0 offen offset:20
-; GCN-NEXT:    buffer_load_dword v7, v0, s[0:3], 0 offen offset:24
-; GCN-NEXT:    buffer_load_dword v8, v0, s[0:3], 0 offen offset:28
-; GCN-NEXT:    buffer_load_dword v9, v0, s[0:3], 0 offen offset:32
-; GCN-NEXT:    buffer_load_dword v10, v0, s[0:3], 0 offen offset:36
-; GCN-NEXT:    buffer_load_dword v11, v0, s[0:3], 0 offen offset:40
-; GCN-NEXT:    buffer_load_dword v12, v0, s[0:3], 0 offen offset:44
-; GCN-NEXT:    buffer_load_dword v13, v0, s[0:3], 0 offen offset:48
-; GCN-NEXT:    buffer_load_dword v14, v0, s[0:3], 0 offen offset:52
-; GCN-NEXT:    buffer_load_dword v15, v0, s[0:3], 0 offen offset:56
-; GCN-NEXT:    buffer_load_dword v16, v0, s[0:3], 0 offen offset:60
-; GCN-NEXT:    buffer_load_dword v2, v0, s[0:3], 0 offen
-; GCN-NEXT:    buffer_load_dword v3, v0, s[0:3], 0 offen offset:4
-; GCN-NEXT:    buffer_load_dword v4, v0, s[0:3], 0 offen offset:8
-; GCN-NEXT:    buffer_load_dword v5, v0, s[0:3], 0 offen offset:12
+; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:60
 ; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    buffer_load_dword v0, v0, s[0:3], 0 offen offset:16
-; GCN-NEXT:    s_nop 0
-; GCN-NEXT:    s_waitcnt vmcnt(4)
-; GCN-NEXT:    buffer_store_dword v2, v1, s[0:3], 0 offen
-; GCN-NEXT:    s_waitcnt vmcnt(4)
-; GCN-NEXT:    buffer_store_dword v3, v1, s[0:3], 0 offen offset:4
-; GCN-NEXT:    s_waitcnt vmcnt(4)
-; GCN-NEXT:    buffer_store_dword v4, v1, s[0:3], 0 offen offset:8
-; GCN-NEXT:    s_waitcnt vmcnt(4)
-; GCN-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen offset:12
-; GCN-NEXT:    s_waitcnt vmcnt(4)
-; GCN-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen offset:16
-; GCN-NEXT:    buffer_store_dword v6, v1, s[0:3], 0 offen offset:20
-; GCN-NEXT:    buffer_store_dword v7, v1, s[0:3], 0 offen offset:24
-; GCN-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen offset:28
-; GCN-NEXT:    buffer_store_dword v9, v1, s[0:3], 0 offen offset:32
-; GCN-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen offset:36
-; GCN-NEXT:    buffer_store_dword v11, v1, s[0:3], 0 offen offset:40
-; GCN-NEXT:    buffer_store_dword v12, v1, s[0:3], 0 offen offset:44
-; GCN-NEXT:    buffer_store_dword v13, v1, s[0:3], 0 offen offset:48
-; GCN-NEXT:    buffer_store_dword v14, v1, s[0:3], 0 offen offset:52
-; GCN-NEXT:    buffer_store_dword v15, v1, s[0:3], 0 offen offset:56
-; GCN-NEXT:    buffer_store_dword v16, v1, s[0:3], 0 offen offset:60
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v3, v1, s[0:3], 0 offen
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v4, v1, s[0:3], 0 offen offset:4
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v5, v1, s[0:3], 0 offen offset:8
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v6, v1, s[0:3], 0 offen offset:12
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v7, v1, s[0:3], 0 offen offset:16
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v8, v1, s[0:3], 0 offen offset:20
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v9, v1, s[0:3], 0 offen offset:24
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v10, v1, s[0:3], 0 offen offset:28
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v11, v1, s[0:3], 0 offen offset:32
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v12, v1, s[0:3], 0 offen offset:36
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v13, v1, s[0:3], 0 offen offset:40
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v14, v1, s[0:3], 0 offen offset:44
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v15, v1, s[0:3], 0 offen offset:48
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v16, v1, s[0:3], 0 offen offset:52
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v17, v1, s[0:3], 0 offen offset:56
+; GCN-NEXT:    s_waitcnt vmcnt(15)
+; GCN-NEXT:    buffer_store_dword v0, v1, s[0:3], 0 offen offset:60
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 bb:
diff --git a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
index 7328529596a22..cb619955f4d13 100644
--- a/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
+++ b/llvm/test/CodeGen/AMDGPU/mul24-pass-ordering.ll
@@ -57,7 +57,7 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
 ; GFX9-NEXT:    v_and_b32_e32 v5, 1, v18
 ; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v5
 ; GFX9-NEXT:    v_cmp_lt_u32_e64 s[4:5], v0, v1
-; GFX9-NEXT:    s_and_saveexec_b64 s[10:11], s[4:5]
+; GFX9-NEXT:    s_and_saveexec_b64 s[8:9], s[4:5]
 ; GFX9-NEXT:    s_cbranch_execz BB1_3
 ; GFX9-NEXT:  ; %bb.1: ; %bb19
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v7, v6
@@ -67,7 +67,7 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v4, v7
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v6, 2, v2
 ; GFX9-NEXT:    v_add_u32_e32 v7, v17, v12
-; GFX9-NEXT:    s_mov_b64 s[12:13], 0
+; GFX9-NEXT:    s_mov_b64 s[10:11], 0
 ; GFX9-NEXT:  BB1_2: ; %bb23
 ; GFX9-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v8, v0
@@ -76,32 +76,32 @@ define void @lsr_order_mul24_1(i32 %arg, i32 %arg1, i32 %arg2, float addrspace(3
 ; GFX9-NEXT:    v_add_u32_e32 v0, v0, v2
 ; GFX9-NEXT:    v_madak_f32 v8, v8, v4, 0x3727c5ac
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v8, v8
-; GFX9-NEXT:    v_cmp_ge_u32_e64 s[4:5], v0, v1
 ; GFX9-NEXT:    v_mul_u32_u24_e32 v18, v8, v5
 ; GFX9-NEXT:    v_add_u32_e32 v8, v8, v16
-; GFX9-NEXT:    v_cmp_lt_u32_e64 s[6:7], v8, v13
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[4:5], v8, v13
 ; GFX9-NEXT:    v_mul_lo_u32 v8, v8, v15
 ; GFX9-NEXT:    v_sub_u32_e32 v19, v9, v18
-; GFX9-NEXT:    v_cmp_lt_u32_e64 s[8:9], v19, v14
-; GFX9-NEXT:    s_and_b64 s[6:7], s[6:7], s[8:9]
+; GFX9-NEXT:    v_cmp_lt_u32_e64 s[6:7], v19, v14
+; GFX9-NEXT:    s_and_b64 s[4:5], s[4:5], s[6:7]
 ; GFX9-NEXT:    v_sub_u32_e32 v12, v12, v18
-; GFX9-NEXT:    s_and_b64 s[6:7], s[6:7], vcc
 ; GFX9-NEXT:    v_add_u32_e32 v8, v12, v8
+; GFX9-NEXT:    s_and_b64 s[4:5], s[4:5], vcc
 ; GFX9-NEXT:    v_mov_b32_e32 v9, 0
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
 ; GFX9-NEXT:    v_lshlrev_b64 v[8:9], 2, v[8:9]
-; GFX9-NEXT:    s_or_b64 s[12:13], s[4:5], s[12:13]
-; GFX9-NEXT:    v_add_co_u32_e64 v8, s[4:5], v10, v8
-; GFX9-NEXT:    v_addc_co_u32_e64 v9, s[4:5], v11, v9, s[4:5]
+; GFX9-NEXT:    v_add_co_u32_e64 v8, s[6:7], v10, v8
+; GFX9-NEXT:    v_addc_co_u32_e64 v9, s[6:7], v11, v9, s[6:7]
 ; GFX9-NEXT:    global_load_dword v8, v[8:9], off
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[6:7], v0, v1
+; GFX9-NEXT:    s_or_b64 s[10:11], s[6:7], s[10:11]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_e64 v8, 0, v8, s[4:5]
 ; GFX9-NEXT:    ds_write_b32 v3, v8
 ; GFX9-NEXT:    v_add_u32_e32 v3, v3, v6
-; GFX9-NEXT:    s_andn2_b64 exec, exec, s[12:13]
+; GFX9-NEXT:    s_andn2_b64 exec, exec, s[10:11]
 ; GFX9-NEXT:    s_cbranch_execnz BB1_2
 ; GFX9-NEXT:  BB1_3: ; %Flow3
-; GFX9-NEXT:    s_or_b64 exec, exec, s[10:11]
+; GFX9-NEXT:    s_or_b64 exec, exec, s[8:9]
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 bb:
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index c7ae08c839ee4..278c1dff29e94 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -98,17 +98,17 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)*  %buffer) {
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
 ;
-; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
-; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
-; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 entry:
   %call = tail call i64 @_Z13get_global_idj(i32 0)
   %conv = and i64 %call, 255
diff --git a/llvm/test/CodeGen/AMDGPU/saddo.ll b/llvm/test/CodeGen/AMDGPU/saddo.ll
index f0a7a80670813..fe9e6275e0d58 100644
--- a/llvm/test/CodeGen/AMDGPU/saddo.ll
+++ b/llvm/test/CodeGen/AMDGPU/saddo.ll
@@ -166,20 +166,18 @@ define amdgpu_kernel void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
 ; SI-NEXT:    s_mov_b32 s14, s10
 ; SI-NEXT:    s_mov_b32 s15, s11
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s8, s0
-; SI-NEXT:    s_mov_b32 s9, s1
-; SI-NEXT:    s_mov_b32 s12, s2
-; SI-NEXT:    s_mov_b32 s13, s3
-; SI-NEXT:    s_mov_b32 s0, s4
-; SI-NEXT:    s_mov_b32 s1, s5
-; SI-NEXT:    s_mov_b32 s2, s10
-; SI-NEXT:    s_mov_b32 s3, s11
+; SI-NEXT:    s_mov_b32 s12, s4
+; SI-NEXT:    s_mov_b32 s13, s5
 ; SI-NEXT:    s_mov_b32 s4, s6
 ; SI-NEXT:    s_mov_b32 s5, s7
 ; SI-NEXT:    s_mov_b32 s6, s10
 ; SI-NEXT:    s_mov_b32 s7, s11
-; SI-NEXT:    buffer_load_dword v0, off, s[0:3], 0
+; SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0
 ; SI-NEXT:    buffer_load_dword v1, off, s[4:7], 0
+; SI-NEXT:    s_mov_b32 s8, s0
+; SI-NEXT:    s_mov_b32 s9, s1
+; SI-NEXT:    s_mov_b32 s4, s2
+; SI-NEXT:    s_mov_b32 s5, s3
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_add_i32_e32 v2, vcc, v1, v0
 ; SI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v1
@@ -187,19 +185,19 @@ define amdgpu_kernel void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
 ; SI-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
 ; SI-NEXT:    buffer_store_dword v2, off, s[8:11], 0
-; SI-NEXT:    buffer_store_byte v0, off, s[12:15], 0
+; SI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_saddo_i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v4, s4
-; VI-NEXT:    v_mov_b32_e32 v5, s5
-; VI-NEXT:    v_mov_b32_e32 v6, s6
-; VI-NEXT:    v_mov_b32_e32 v7, s7
-; VI-NEXT:    flat_load_dword v4, v[4:5]
-; VI-NEXT:    flat_load_dword v5, v[6:7]
+; VI-NEXT:    v_mov_b32_e32 v0, s4
+; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    v_mov_b32_e32 v2, s6
+; VI-NEXT:    v_mov_b32_e32 v3, s7
+; VI-NEXT:    flat_load_dword v4, v[0:1]
+; VI-NEXT:    flat_load_dword v5, v[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_mov_b32_e32 v2, s2
@@ -218,12 +216,12 @@ define amdgpu_kernel void @v_saddo_i32(i32 addrspace(1)* %out, i1 addrspace(1)*
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-NEXT:    v_mov_b32_e32 v6, s6
-; GFX9-NEXT:    v_mov_b32_e32 v7, s7
-; GFX9-NEXT:    global_load_dword v4, v[4:5], off
-; GFX9-NEXT:    global_load_dword v5, v[6:7], off
+; GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-NEXT:    v_mov_b32_e32 v2, s6
+; GFX9-NEXT:    v_mov_b32_e32 v3, s7
+; GFX9-NEXT:    global_load_dword v4, v[0:1], off
+; GFX9-NEXT:    global_load_dword v5, v[2:3], off
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s2
@@ -335,20 +333,18 @@ define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
 ; SI-NEXT:    s_mov_b32 s14, s10
 ; SI-NEXT:    s_mov_b32 s15, s11
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s8, s0
-; SI-NEXT:    s_mov_b32 s9, s1
-; SI-NEXT:    s_mov_b32 s12, s2
-; SI-NEXT:    s_mov_b32 s13, s3
-; SI-NEXT:    s_mov_b32 s0, s4
-; SI-NEXT:    s_mov_b32 s1, s5
-; SI-NEXT:    s_mov_b32 s2, s10
-; SI-NEXT:    s_mov_b32 s3, s11
+; SI-NEXT:    s_mov_b32 s12, s4
+; SI-NEXT:    s_mov_b32 s13, s5
 ; SI-NEXT:    s_mov_b32 s4, s6
 ; SI-NEXT:    s_mov_b32 s5, s7
 ; SI-NEXT:    s_mov_b32 s6, s10
 ; SI-NEXT:    s_mov_b32 s7, s11
-; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[12:15], 0
 ; SI-NEXT:    buffer_load_dwordx2 v[2:3], off, s[4:7], 0
+; SI-NEXT:    s_mov_b32 s8, s0
+; SI-NEXT:    s_mov_b32 s9, s1
+; SI-NEXT:    s_mov_b32 s4, s2
+; SI-NEXT:    s_mov_b32 s5, s3
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_add_i32_e32 v4, vcc, v0, v2
 ; SI-NEXT:    v_addc_u32_e32 v5, vcc, v1, v3, vcc
@@ -357,57 +353,57 @@ define amdgpu_kernel void @v_saddo_i64(i64 addrspace(1)* %out, i1 addrspace(1)*
 ; SI-NEXT:    buffer_store_dwordx2 v[4:5], off, s[8:11], 0
 ; SI-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
 ; SI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; SI-NEXT:    buffer_store_byte v0, off, s[12:15], 0
+; SI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_saddo_i64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v4, s4
-; VI-NEXT:    v_mov_b32_e32 v5, s5
-; VI-NEXT:    v_mov_b32_e32 v6, s6
-; VI-NEXT:    v_mov_b32_e32 v7, s7
-; VI-NEXT:    flat_load_dwordx2 v[4:5], v[4:5]
-; VI-NEXT:    flat_load_dwordx2 v[6:7], v[6:7]
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_mov_b32_e32 v0, s4
+; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    v_mov_b32_e32 v2, s6
+; VI-NEXT:    v_mov_b32_e32 v3, s7
+; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT:    v_mov_b32_e32 v4, s0
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    v_mov_b32_e32 v6, s2
+; VI-NEXT:    v_mov_b32_e32 v7, s3
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u32_e32 v8, vcc, v4, v6
-; VI-NEXT:    v_addc_u32_e32 v9, vcc, v5, v7, vcc
-; VI-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[6:7]
-; VI-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[8:9], v[4:5]
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[8:9]
+; VI-NEXT:    v_add_u32_e32 v8, vcc, v0, v2
+; VI-NEXT:    v_addc_u32_e32 v9, vcc, v1, v3, vcc
+; VI-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; VI-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[8:9], v[0:1]
+; VI-NEXT:    flat_store_dwordx2 v[4:5], v[8:9]
 ; VI-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
 ; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; VI-NEXT:    flat_store_byte v[2:3], v0
+; VI-NEXT:    flat_store_byte v[6:7], v0
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_saddo_i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-NEXT:    v_mov_b32_e32 v6, s6
-; GFX9-NEXT:    v_mov_b32_e32 v7, s7
-; GFX9-NEXT:    global_load_dwordx2 v[4:5], v[4:5], off
-; GFX9-NEXT:    global_load_dwordx2 v[6:7], v[6:7], off
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_mov_b32_e32 v2, s2
-; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-NEXT:    v_mov_b32_e32 v2, s6
+; GFX9-NEXT:    v_mov_b32_e32 v3, s7
+; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off
+; GFX9-NEXT:    v_mov_b32_e32 v4, s0
+; GFX9-NEXT:    v_mov_b32_e32 v5, s1
+; GFX9-NEXT:    v_mov_b32_e32 v6, s2
+; GFX9-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v4, v6
-; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, v5, v7, vcc
-; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[6:7]
-; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[8:9], v[4:5]
-; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off
+; GFX9-NEXT:    v_add_co_u32_e32 v8, vcc, v0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v9, vcc, v1, v3, vcc
+; GFX9-NEXT:    v_cmp_gt_i64_e32 vcc, 0, v[2:3]
+; GFX9-NEXT:    v_cmp_lt_i64_e64 s[0:1], v[8:9], v[0:1]
+; GFX9-NEXT:    global_store_dwordx2 v[4:5], v[8:9], off
 ; GFX9-NEXT:    s_xor_b64 s[0:1], vcc, s[0:1]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX9-NEXT:    global_store_byte v[2:3], v0, off
+; GFX9-NEXT:    global_store_byte v[6:7], v0, off
 ; GFX9-NEXT:    s_endpgm
   %a = load i64, i64 addrspace(1)* %aptr, align 4
   %b = load i64, i64 addrspace(1)* %bptr, align 4
@@ -428,20 +424,18 @@ define amdgpu_kernel void @v_saddo_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32>
 ; SI-NEXT:    s_mov_b32 s14, s10
 ; SI-NEXT:    s_mov_b32 s15, s11
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s8, s0
-; SI-NEXT:    s_mov_b32 s9, s1
-; SI-NEXT:    s_mov_b32 s12, s2
-; SI-NEXT:    s_mov_b32 s13, s3
-; SI-NEXT:    s_mov_b32 s0, s4
-; SI-NEXT:    s_mov_b32 s1, s5
-; SI-NEXT:    s_mov_b32 s2, s10
-; SI-NEXT:    s_mov_b32 s3, s11
+; SI-NEXT:    s_mov_b32 s12, s4
+; SI-NEXT:    s_mov_b32 s13, s5
 ; SI-NEXT:    s_mov_b32 s4, s6
 ; SI-NEXT:    s_mov_b32 s5, s7
 ; SI-NEXT:    s_mov_b32 s6, s10
 ; SI-NEXT:    s_mov_b32 s7, s11
-; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[0:3], 0
+; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[12:15], 0
 ; SI-NEXT:    buffer_load_dwordx2 v[2:3], off, s[4:7], 0
+; SI-NEXT:    s_mov_b32 s8, s0
+; SI-NEXT:    s_mov_b32 s9, s1
+; SI-NEXT:    s_mov_b32 s12, s2
+; SI-NEXT:    s_mov_b32 s13, s3
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_add_i32_e32 v5, vcc, v1, v3
 ; SI-NEXT:    v_add_i32_e32 v4, vcc, v0, v2
@@ -461,58 +455,58 @@ define amdgpu_kernel void @v_saddo_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32>
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v4, s4
-; VI-NEXT:    v_mov_b32_e32 v5, s5
-; VI-NEXT:    v_mov_b32_e32 v6, s6
-; VI-NEXT:    v_mov_b32_e32 v7, s7
-; VI-NEXT:    flat_load_dwordx2 v[4:5], v[4:5]
-; VI-NEXT:    flat_load_dwordx2 v[6:7], v[6:7]
-; VI-NEXT:    v_mov_b32_e32 v0, s0
-; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    v_mov_b32_e32 v2, s2
-; VI-NEXT:    v_mov_b32_e32 v3, s3
+; VI-NEXT:    v_mov_b32_e32 v0, s4
+; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    v_mov_b32_e32 v2, s6
+; VI-NEXT:    v_mov_b32_e32 v3, s7
+; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
+; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT:    v_mov_b32_e32 v4, s0
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    v_mov_b32_e32 v6, s2
+; VI-NEXT:    v_mov_b32_e32 v7, s3
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u32_e32 v9, vcc, v5, v7
-; VI-NEXT:    v_add_u32_e32 v8, vcc, v4, v6
-; VI-NEXT:    v_cmp_gt_i32_e64 s[0:1], 0, v7
-; VI-NEXT:    v_cmp_lt_i32_e64 s[4:5], v9, v5
-; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v6
-; VI-NEXT:    v_cmp_lt_i32_e64 s[2:3], v8, v4
+; VI-NEXT:    v_add_u32_e32 v9, vcc, v1, v3
+; VI-NEXT:    v_add_u32_e32 v8, vcc, v0, v2
+; VI-NEXT:    v_cmp_gt_i32_e64 s[0:1], 0, v3
+; VI-NEXT:    v_cmp_lt_i32_e64 s[4:5], v9, v1
 ; VI-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
-; VI-NEXT:    flat_store_dwordx2 v[0:1], v[8:9]
+; VI-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; VI-NEXT:    v_cmp_lt_i32_e64 s[2:3], v8, v0
 ; VI-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; VI-NEXT:    s_xor_b64 s[0:1], vcc, s[2:3]
 ; VI-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    flat_store_dwordx2 v[4:5], v[8:9]
+; VI-NEXT:    flat_store_dwordx2 v[6:7], v[0:1]
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_saddo_v2i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx8 s[0:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX9-NEXT:    v_mov_b32_e32 v4, s4
-; GFX9-NEXT:    v_mov_b32_e32 v5, s5
-; GFX9-NEXT:    v_mov_b32_e32 v6, s6
-; GFX9-NEXT:    v_mov_b32_e32 v7, s7
-; GFX9-NEXT:    global_load_dwordx2 v[4:5], v[4:5], off
-; GFX9-NEXT:    global_load_dwordx2 v[6:7], v[6:7], off
-; GFX9-NEXT:    v_mov_b32_e32 v0, s0
-; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_mov_b32_e32 v2, s2
-; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_mov_b32_e32 v0, s4
+; GFX9-NEXT:    v_mov_b32_e32 v1, s5
+; GFX9-NEXT:    v_mov_b32_e32 v2, s6
+; GFX9-NEXT:    v_mov_b32_e32 v3, s7
+; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off
+; GFX9-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off
+; GFX9-NEXT:    v_mov_b32_e32 v4, s0
+; GFX9-NEXT:    v_mov_b32_e32 v5, s1
+; GFX9-NEXT:    v_mov_b32_e32 v6, s2
+; GFX9-NEXT:    v_mov_b32_e32 v7, s3
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_add_u32_e32 v9, v5, v7
-; GFX9-NEXT:    v_add_u32_e32 v8, v4, v6
-; GFX9-NEXT:    v_cmp_gt_i32_e64 s[0:1], 0, v7
-; GFX9-NEXT:    v_cmp_lt_i32_e64 s[4:5], v9, v5
-; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v6
-; GFX9-NEXT:    v_cmp_lt_i32_e64 s[2:3], v8, v4
+; GFX9-NEXT:    v_add_u32_e32 v9, v1, v3
+; GFX9-NEXT:    v_add_u32_e32 v8, v0, v2
+; GFX9-NEXT:    v_cmp_gt_i32_e64 s[0:1], 0, v3
+; GFX9-NEXT:    v_cmp_lt_i32_e64 s[4:5], v9, v1
 ; GFX9-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
-; GFX9-NEXT:    global_store_dwordx2 v[0:1], v[8:9], off
+; GFX9-NEXT:    v_cmp_gt_i32_e32 vcc, 0, v2
+; GFX9-NEXT:    v_cmp_lt_i32_e64 s[2:3], v8, v0
 ; GFX9-NEXT:    v_cndmask_b32_e64 v1, 0, 1, s[0:1]
 ; GFX9-NEXT:    s_xor_b64 s[0:1], vcc, s[2:3]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[0:1]
-; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT:    global_store_dwordx2 v[4:5], v[8:9], off
+; GFX9-NEXT:    global_store_dwordx2 v[6:7], v[0:1], off
 ; GFX9-NEXT:    s_endpgm
   %a = load <2 x i32>, <2 x i32> addrspace(1)* %aptr, align 4
   %b = load <2 x i32>, <2 x i32> addrspace(1)* %bptr, align 4
diff --git a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
index 14635ab7e708f..8b27ee9e652a4 100644
--- a/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
+++ b/llvm/test/CodeGen/AMDGPU/salu-to-valu.ll
@@ -173,9 +173,9 @@ entry:
 ; GCN-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x9a40{{$}}
 ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x9a50{{$}}
 ; CI-NOHSA-NOT: v_add
-; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
 ; CI-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET1]] addr64{{$}}
 ; GCN-NOHSA: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
 
 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 ; GCN-NOHSA: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
@@ -205,8 +205,8 @@ entry:
 ; SI: s_mov_b32 {{s[0-9]+}}, 0x13480
 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:16
 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:32
-; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48
 ; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], {{s[0-9]+}} addr64
+; SI: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], 0 addr64 offset:48
 ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET0:s[0-9]+]], 0x13480{{$}}
 ; CI-NOHSA-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}], [[OFFSET0]] addr64{{$}}
 ; CI-NOHSA-DAG: s_mov_b32 [[OFFSET1:s[0-9]+]], 0x13490{{$}}
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
index 02a58b704df0a..d10192d8f0980 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-dead-def-subreg-use-other-subreg.mir
@@ -36,10 +36,10 @@ body:             |
   ; CHECK:   INLINEASM &"", 1 /* sideeffect attdialect */, 851978 /* regdef:VGPR_LO16 */, def dead [[COPY1]], 851978 /* regdef:VGPR_LO16 */, def dead [[COPY]].sub1, 2147483657 /* reguse tiedto:$0 */, [[COPY1]], 2147549193 /* reguse tiedto:$1 */, [[COPY]].sub1
   ; CHECK:   %11.sub0:vreg_512 = COPY [[COPY]].sub0
   ; CHECK:   %11.sub3:vreg_512 = COPY [[COPY]].sub3
-  ; CHECK:   dead %10:vgpr_32 = V_ADD_CO_U32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
   ; CHECK:   %11.sub2:vreg_512 = COPY undef [[V_MOV_B32_e32_]]
   ; CHECK:   %11.sub5:vreg_512 = COPY undef [[V_MOV_B32_e32_]]
   ; CHECK:   [[COPY2:%[0-9]+]]:vreg_512 = COPY %11
+  ; CHECK:   dead %10:vgpr_32 = V_ADD_CO_U32_e32 4, [[V_MOV_B32_e32_1]], implicit-def dead $vcc, implicit $exec
   ; CHECK:   S_BRANCH %bb.1
   bb.0:
     liveins: $sgpr6_sgpr7
diff --git a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
index fd435d4adbe64..b4e4356d4fb31 100644
--- a/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
+++ b/llvm/test/CodeGen/AMDGPU/sched-assert-onlydbg-value-empty-region.mir
@@ -25,6 +25,10 @@ body:             |
   ; CHECK:   [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
   ; CHECK:   [[GLOBAL_LOAD_DWORDX2_:%[0-9]+]]:vreg_64 = GLOBAL_LOAD_DWORDX2 [[DEF]], 0, 0, 0, 0, implicit $exec
   ; CHECK:   [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[DEF]], 8, 0, 0, 0, implicit $exec
+  ; CHECK:   [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
+  ; CHECK:   undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
+  ; CHECK:   dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
+  ; CHECK:   [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec
   ; CHECK:   undef %4.sub0:vreg_64 = V_MOV_B32_e32 111, implicit $exec
   ; CHECK:   [[DEF1:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
   ; CHECK:   [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
@@ -32,11 +36,7 @@ body:             |
   ; CHECK:   undef %11.sub1:vreg_64 = IMPLICIT_DEF
   ; CHECK:   [[DEF4:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
   ; CHECK:   [[DEF5:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
-  ; CHECK:   [[COPY1:%[0-9]+]]:vreg_64 = COPY [[GLOBAL_LOAD_DWORDX2_]]
-  ; CHECK:   undef %6.sub0:vreg_64 = V_ADD_F32_e32 [[DEF]].sub0, [[COPY1]].sub0, implicit $mode, implicit $exec
-  ; CHECK:   dead undef %6.sub1:vreg_64 = V_ADD_F32_e32 [[DEF]].sub1, [[COPY1]].sub0, implicit $mode, implicit $exec
   ; CHECK:   [[DEF6:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
-  ; CHECK:   [[GLOBAL_LOAD_DWORD1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[COPY1]], 0, 0, 0, 0, implicit $exec
   ; CHECK:   undef %19.sub0:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD1]], [[GLOBAL_LOAD_DWORDX2_]].sub0, implicit $mode, implicit $exec
   ; CHECK:   [[DEF7:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
   ; CHECK:   %19.sub1:vreg_64 = V_ADD_F32_e32 [[GLOBAL_LOAD_DWORD]], [[GLOBAL_LOAD_DWORD]], implicit $mode, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv.ll b/llvm/test/CodeGen/AMDGPU/sdiv.ll
index f51d152fa157d..41c11f3b91538 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv.ll
@@ -203,14 +203,14 @@ define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)*
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s10, s2
+; GCN-NEXT:    s_mov_b32 s11, s3
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s6
+; GCN-NEXT:    s_mov_b32 s9, s7
+; GCN-NEXT:    buffer_load_dword v0, off, s[8:11], 0
 ; GCN-NEXT:    s_mov_b32 s0, s4
 ; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s2
-; GCN-NEXT:    s_mov_b32 s7, s3
-; GCN-NEXT:    buffer_load_dword v0, off, s[4:7], 0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; GCN-NEXT:    v_lshrrev_b32_e32 v1, 30, v1
@@ -224,14 +224,14 @@ define amdgpu_kernel void @sdiv_i32_4(i32 addrspace(1)* %out, i32 addrspace(1)*
 ; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; TONGA-NEXT:    s_mov_b32 s3, 0xf000
 ; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_mov_b32 s10, s2
+; TONGA-NEXT:    s_mov_b32 s11, s3
 ; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s8, s6
+; TONGA-NEXT:    s_mov_b32 s9, s7
+; TONGA-NEXT:    buffer_load_dword v0, off, s[8:11], 0
 ; TONGA-NEXT:    s_mov_b32 s0, s4
 ; TONGA-NEXT:    s_mov_b32 s1, s5
-; TONGA-NEXT:    s_mov_b32 s4, s6
-; TONGA-NEXT:    s_mov_b32 s5, s7
-; TONGA-NEXT:    s_mov_b32 s6, s2
-; TONGA-NEXT:    s_mov_b32 s7, s3
-; TONGA-NEXT:    buffer_load_dword v0, off, s[4:7], 0
 ; TONGA-NEXT:    s_waitcnt vmcnt(0)
 ; TONGA-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; TONGA-NEXT:    v_lshrrev_b32_e32 v1, 30, v1
@@ -694,14 +694,14 @@ define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32>
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s10, s2
+; GCN-NEXT:    s_mov_b32 s11, s3
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s6
+; GCN-NEXT:    s_mov_b32 s9, s7
+; GCN-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
 ; GCN-NEXT:    s_mov_b32 s0, s4
 ; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s2
-; GCN-NEXT:    s_mov_b32 s7, s3
-; GCN-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; GCN-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
@@ -719,14 +719,14 @@ define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32>
 ; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; TONGA-NEXT:    s_mov_b32 s3, 0xf000
 ; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_mov_b32 s10, s2
+; TONGA-NEXT:    s_mov_b32 s11, s3
 ; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s8, s6
+; TONGA-NEXT:    s_mov_b32 s9, s7
+; TONGA-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
 ; TONGA-NEXT:    s_mov_b32 s0, s4
 ; TONGA-NEXT:    s_mov_b32 s1, s5
-; TONGA-NEXT:    s_mov_b32 s4, s6
-; TONGA-NEXT:    s_mov_b32 s5, s7
-; TONGA-NEXT:    s_mov_b32 s6, s2
-; TONGA-NEXT:    s_mov_b32 s7, s3
-; TONGA-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
 ; TONGA-NEXT:    s_waitcnt vmcnt(0)
 ; TONGA-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; TONGA-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
@@ -744,14 +744,14 @@ define amdgpu_kernel void @sdiv_v2i32_4(<2 x i32> addrspace(1)* %out, <2 x i32>
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_mov_b32 s10, s2
+; GFX9-NEXT:    s_mov_b32 s11, s3
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s6
+; GFX9-NEXT:    s_mov_b32 s9, s7
+; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
 ; GFX9-NEXT:    s_mov_b32 s0, s4
 ; GFX9-NEXT:    s_mov_b32 s1, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s2
-; GFX9-NEXT:    s_mov_b32 s7, s3
-; GFX9-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v2, 31, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v1
@@ -1073,16 +1073,16 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; GFX9-NEXT:    s_mov_b32 s11, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s10, -1
-; GFX9-NEXT:    s_mov_b32 s4, 0x4f7ffffe
+; GFX9-NEXT:    s_mov_b32 s6, s10
+; GFX9-NEXT:    s_mov_b32 s7, s11
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s4, s2
+; GFX9-NEXT:    s_mov_b32 s5, s3
+; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
+; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
+; GFX9-NEXT:    s_mov_b32 s2, 0x4f7ffffe
 ; GFX9-NEXT:    s_mov_b32 s8, s0
 ; GFX9-NEXT:    s_mov_b32 s9, s1
-; GFX9-NEXT:    s_mov_b32 s0, s2
-; GFX9-NEXT:    s_mov_b32 s1, s3
-; GFX9-NEXT:    s_mov_b32 s2, s10
-; GFX9-NEXT:    s_mov_b32 s3, s11
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT:    buffer_load_dwordx4 v[4:7], off, s[0:3], 0 offset:16
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v8, 31, v0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
@@ -1120,14 +1120,14 @@ define amdgpu_kernel void @sdiv_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> ad
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v14, v7
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v10, v10
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v12, v12
-; GFX9-NEXT:    v_mul_f32_e32 v8, s4, v8
+; GFX9-NEXT:    v_mul_f32_e32 v8, s2, v8
 ; GFX9-NEXT:    v_rcp_iflag_f32_e32 v14, v14
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v8, v8
-; GFX9-NEXT:    v_mul_f32_e32 v10, s4, v10
-; GFX9-NEXT:    v_mul_f32_e32 v12, s4, v12
+; GFX9-NEXT:    v_mul_f32_e32 v10, s2, v10
+; GFX9-NEXT:    v_mul_f32_e32 v12, s2, v12
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v10, v10
 ; GFX9-NEXT:    v_sub_u32_e32 v9, 0, v4
-; GFX9-NEXT:    v_mul_f32_e32 v14, s4, v14
+; GFX9-NEXT:    v_mul_f32_e32 v14, s2, v14
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v12, v12
 ; GFX9-NEXT:    v_mul_lo_u32 v9, v9, v8
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v14, v14
@@ -1330,14 +1330,14 @@ define amdgpu_kernel void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32>
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s10, s2
+; GCN-NEXT:    s_mov_b32 s11, s3
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s6
+; GCN-NEXT:    s_mov_b32 s9, s7
+; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
 ; GCN-NEXT:    s_mov_b32 s0, s4
 ; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s2
-; GCN-NEXT:    s_mov_b32 s7, s3
-; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
 ; GCN-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
@@ -1363,14 +1363,14 @@ define amdgpu_kernel void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32>
 ; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; TONGA-NEXT:    s_mov_b32 s3, 0xf000
 ; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_mov_b32 s10, s2
+; TONGA-NEXT:    s_mov_b32 s11, s3
 ; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s8, s6
+; TONGA-NEXT:    s_mov_b32 s9, s7
+; TONGA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
 ; TONGA-NEXT:    s_mov_b32 s0, s4
 ; TONGA-NEXT:    s_mov_b32 s1, s5
-; TONGA-NEXT:    s_mov_b32 s4, s6
-; TONGA-NEXT:    s_mov_b32 s5, s7
-; TONGA-NEXT:    s_mov_b32 s6, s2
-; TONGA-NEXT:    s_mov_b32 s7, s3
-; TONGA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
 ; TONGA-NEXT:    s_waitcnt vmcnt(0)
 ; TONGA-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
 ; TONGA-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
@@ -1396,14 +1396,14 @@ define amdgpu_kernel void @sdiv_v4i32_4(<4 x i32> addrspace(1)* %out, <4 x i32>
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_mov_b32 s10, s2
+; GFX9-NEXT:    s_mov_b32 s11, s3
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s6
+; GFX9-NEXT:    s_mov_b32 s9, s7
+; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
 ; GFX9-NEXT:    s_mov_b32 s0, s4
 ; GFX9-NEXT:    s_mov_b32 s1, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s2
-; GFX9-NEXT:    s_mov_b32 s7, s3
-; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v4, 31, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v5, 31, v1
@@ -1619,17 +1619,17 @@ define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)*
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s10, s2
+; GCN-NEXT:    s_mov_b32 s11, s3
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s6
+; GCN-NEXT:    s_mov_b32 s9, s7
+; GCN-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
+; GCN-NEXT:    buffer_load_ubyte v1, off, s[8:11], 0 offset:2
+; GCN-NEXT:    buffer_load_ushort v2, off, s[8:11], 0 offset:4
+; GCN-NEXT:    buffer_load_ubyte v3, off, s[8:11], 0 offset:6
 ; GCN-NEXT:    s_mov_b32 s0, s4
 ; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s2
-; GCN-NEXT:    s_mov_b32 s7, s3
-; GCN-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; GCN-NEXT:    buffer_load_ubyte v1, off, s[4:7], 0 offset:2
-; GCN-NEXT:    buffer_load_ushort v2, off, s[4:7], 0 offset:4
-; GCN-NEXT:    buffer_load_ubyte v3, off, s[4:7], 0 offset:6
 ; GCN-NEXT:    s_waitcnt vmcnt(2)
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
@@ -1660,17 +1660,17 @@ define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)*
 ; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; TONGA-NEXT:    s_mov_b32 s3, 0xf000
 ; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_mov_b32 s10, s2
+; TONGA-NEXT:    s_mov_b32 s11, s3
 ; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s8, s6
+; TONGA-NEXT:    s_mov_b32 s9, s7
+; TONGA-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
+; TONGA-NEXT:    buffer_load_ubyte v1, off, s[8:11], 0 offset:2
+; TONGA-NEXT:    buffer_load_ushort v2, off, s[8:11], 0 offset:4
+; TONGA-NEXT:    buffer_load_ubyte v3, off, s[8:11], 0 offset:6
 ; TONGA-NEXT:    s_mov_b32 s0, s4
 ; TONGA-NEXT:    s_mov_b32 s1, s5
-; TONGA-NEXT:    s_mov_b32 s4, s6
-; TONGA-NEXT:    s_mov_b32 s5, s7
-; TONGA-NEXT:    s_mov_b32 s6, s2
-; TONGA-NEXT:    s_mov_b32 s7, s3
-; TONGA-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; TONGA-NEXT:    buffer_load_ubyte v1, off, s[4:7], 0 offset:2
-; TONGA-NEXT:    buffer_load_ushort v2, off, s[4:7], 0 offset:4
-; TONGA-NEXT:    buffer_load_ubyte v3, off, s[4:7], 0 offset:6
 ; TONGA-NEXT:    s_waitcnt vmcnt(2)
 ; TONGA-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; TONGA-NEXT:    v_or_b32_e32 v0, v0, v1
@@ -1701,17 +1701,17 @@ define amdgpu_kernel void @v_sdiv_i23(i32 addrspace(1)* %out, i23 addrspace(1)*
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_mov_b32 s3, 0xf000
 ; GFX9-NEXT:    s_mov_b32 s2, -1
+; GFX9-NEXT:    s_mov_b32 s10, s2
+; GFX9-NEXT:    s_mov_b32 s11, s3
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    s_mov_b32 s8, s6
+; GFX9-NEXT:    s_mov_b32 s9, s7
+; GFX9-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
+; GFX9-NEXT:    buffer_load_ubyte v1, off, s[8:11], 0 offset:2
+; GFX9-NEXT:    buffer_load_ushort v2, off, s[8:11], 0 offset:4
+; GFX9-NEXT:    buffer_load_ubyte v3, off, s[8:11], 0 offset:6
 ; GFX9-NEXT:    s_mov_b32 s0, s4
 ; GFX9-NEXT:    s_mov_b32 s1, s5
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s2
-; GFX9-NEXT:    s_mov_b32 s7, s3
-; GFX9-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; GFX9-NEXT:    buffer_load_ubyte v1, off, s[4:7], 0 offset:2
-; GFX9-NEXT:    buffer_load_ushort v2, off, s[4:7], 0 offset:4
-; GFX9-NEXT:    buffer_load_ubyte v3, off, s[4:7], 0 offset:6
 ; GFX9-NEXT:    s_waitcnt vmcnt(2)
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GFX9-NEXT:    v_or_b32_e32 v0, v0, v1
@@ -1802,17 +1802,17 @@ define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)*
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s10, s2
+; GCN-NEXT:    s_mov_b32 s11, s3
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s6
+; GCN-NEXT:    s_mov_b32 s9, s7
+; GCN-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
+; GCN-NEXT:    buffer_load_sbyte v1, off, s[8:11], 0 offset:2
+; GCN-NEXT:    buffer_load_ushort v2, off, s[8:11], 0 offset:4
+; GCN-NEXT:    buffer_load_sbyte v3, off, s[8:11], 0 offset:6
 ; GCN-NEXT:    s_mov_b32 s0, s4
 ; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s2
-; GCN-NEXT:    s_mov_b32 s7, s3
-; GCN-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; GCN-NEXT:    buffer_load_sbyte v1, off, s[4:7], 0 offset:2
-; GCN-NEXT:    buffer_load_ushort v2, off, s[4:7], 0 offset:4
-; GCN-NEXT:    buffer_load_sbyte v3, off, s[4:7], 0 offset:6
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_lshlrev_b32_e32 v4, 16, v3
 ; GCN-NEXT:    v_or_b32_e32 v2, v2, v4
@@ -1840,17 +1840,17 @@ define amdgpu_kernel void @v_sdiv_i24(i32 addrspace(1)* %out, i24 addrspace(1)*
 ; TONGA-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; TONGA-NEXT:    s_mov_b32 s3, 0xf000
 ; TONGA-NEXT:    s_mov_b32 s2, -1
+; TONGA-NEXT:    s_mov_b32 s10, s2
+; TONGA-NEXT:    s_mov_b32 s11, s3
 ; TONGA-NEXT:    s_waitcnt lgkmcnt(0)
+; TONGA-NEXT:    s_mov_b32 s8, s6
+; TONGA-NEXT:    s_mov_b32 s9, s7
+; TONGA-NEXT:    buffer_load_ushort v0, off, s[8:11], 0
+; TONGA-NEXT:    buffer_load_sbyte v1, off, s[8:11], 0 offset:2
+; TONGA-NEXT:    buffer_load_ushort v2, off, s[8:11], 0 offset:4
+; TONGA-NEXT:    buffer_load_sbyte v3, off, s[8:11], 0 offset:6
 ; TONGA-NEXT:    s_mov_b32 s0, s4
 ; TONGA-NEXT:    s_mov_b32 s1, s5
-; TONGA-NEXT:    s_mov_b32 s4, s6
-; TONGA-NEXT:    s_mov_b32 s5, s7
-; TONGA-NEXT:    s_mov_b32 s6, s2
-; TONGA-NEXT:    s_mov_b32 s7, s3
-; TONGA-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; TONGA-NEXT:    buffer_load_sbyte v1, off, s[4:7], 0 offset:2
-; TONGA-NEXT:    buffer_load_ushort v2, off, s[4:7], 0 offset:4
-; TONGA-NEXT:    buffer_load_sbyte v3, off, s[4:7], 0 offset:6
 ; TONGA-NEXT:    s_waitcnt vmcnt(0)
 ; TONGA-NEXT:    v_lshlrev_b32_e32 v4, 16, v3
 ; TONGA-NEXT:    v_or_b32_e32 v2, v2, v4
@@ -2214,16 +2214,14 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu
 ; GCN-NEXT:    s_mov_b32 s0, s4
 ; GCN-NEXT:    s_mov_b32 s1, s5
 ; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GCN-NEXT:    s_mov_b32 s0, 0x1389c755
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s2
-; GCN-NEXT:    s_mov_b32 s7, s3
+; GCN-NEXT:    s_mov_b32 s4, 0x1389c755
+; GCN-NEXT:    s_mov_b32 s0, s6
+; GCN-NEXT:    s_mov_b32 s1, s7
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_mul_hi_i32 v0, v0, s0
-; GCN-NEXT:    v_mul_hi_i32 v1, v1, s0
-; GCN-NEXT:    v_mul_hi_i32 v2, v2, s0
-; GCN-NEXT:    v_mul_hi_i32 v3, v3, s0
+; GCN-NEXT:    v_mul_hi_i32 v0, v0, s4
+; GCN-NEXT:    v_mul_hi_i32 v1, v1, s4
+; GCN-NEXT:    v_mul_hi_i32 v2, v2, s4
+; GCN-NEXT:    v_mul_hi_i32 v3, v3, s4
 ; GCN-NEXT:    v_lshrrev_b32_e32 v4, 31, v0
 ; GCN-NEXT:    v_ashrrev_i32_e32 v0, 12, v0
 ; GCN-NEXT:    v_lshrrev_b32_e32 v5, 31, v1
@@ -2236,7 +2234,7 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu
 ; GCN-NEXT:    v_add_i32_e32 v1, vcc, v1, v5
 ; GCN-NEXT:    v_add_i32_e32 v2, vcc, v2, v6
 ; GCN-NEXT:    v_add_i32_e32 v3, vcc, v3, v7
-; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GCN-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GCN-NEXT:    s_endpgm
 ;
 ; TONGA-LABEL: scalarize_mulhs_4xi32:
@@ -2248,16 +2246,14 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu
 ; TONGA-NEXT:    s_mov_b32 s0, s4
 ; TONGA-NEXT:    s_mov_b32 s1, s5
 ; TONGA-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; TONGA-NEXT:    s_mov_b32 s0, 0x1389c755
-; TONGA-NEXT:    s_mov_b32 s4, s6
-; TONGA-NEXT:    s_mov_b32 s5, s7
-; TONGA-NEXT:    s_mov_b32 s6, s2
-; TONGA-NEXT:    s_mov_b32 s7, s3
+; TONGA-NEXT:    s_mov_b32 s4, 0x1389c755
+; TONGA-NEXT:    s_mov_b32 s0, s6
+; TONGA-NEXT:    s_mov_b32 s1, s7
 ; TONGA-NEXT:    s_waitcnt vmcnt(0)
-; TONGA-NEXT:    v_mul_hi_i32 v0, v0, s0
-; TONGA-NEXT:    v_mul_hi_i32 v1, v1, s0
-; TONGA-NEXT:    v_mul_hi_i32 v2, v2, s0
-; TONGA-NEXT:    v_mul_hi_i32 v3, v3, s0
+; TONGA-NEXT:    v_mul_hi_i32 v0, v0, s4
+; TONGA-NEXT:    v_mul_hi_i32 v1, v1, s4
+; TONGA-NEXT:    v_mul_hi_i32 v2, v2, s4
+; TONGA-NEXT:    v_mul_hi_i32 v3, v3, s4
 ; TONGA-NEXT:    v_lshrrev_b32_e32 v4, 31, v0
 ; TONGA-NEXT:    v_ashrrev_i32_e32 v0, 12, v0
 ; TONGA-NEXT:    v_lshrrev_b32_e32 v5, 31, v1
@@ -2270,7 +2266,7 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu
 ; TONGA-NEXT:    v_add_u32_e32 v1, vcc, v1, v5
 ; TONGA-NEXT:    v_add_u32_e32 v2, vcc, v2, v6
 ; TONGA-NEXT:    v_add_u32_e32 v3, vcc, v3, v7
-; TONGA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; TONGA-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; TONGA-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: scalarize_mulhs_4xi32:
@@ -2282,16 +2278,14 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu
 ; GFX9-NEXT:    s_mov_b32 s0, s4
 ; GFX9-NEXT:    s_mov_b32 s1, s5
 ; GFX9-NEXT:    buffer_load_dwordx4 v[0:3], off, s[0:3], 0
-; GFX9-NEXT:    s_mov_b32 s0, 0x1389c755
-; GFX9-NEXT:    s_mov_b32 s4, s6
-; GFX9-NEXT:    s_mov_b32 s5, s7
-; GFX9-NEXT:    s_mov_b32 s6, s2
-; GFX9-NEXT:    s_mov_b32 s7, s3
+; GFX9-NEXT:    s_mov_b32 s4, 0x1389c755
+; GFX9-NEXT:    s_mov_b32 s0, s6
+; GFX9-NEXT:    s_mov_b32 s1, s7
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_mul_hi_i32 v0, v0, s0
-; GFX9-NEXT:    v_mul_hi_i32 v1, v1, s0
-; GFX9-NEXT:    v_mul_hi_i32 v2, v2, s0
-; GFX9-NEXT:    v_mul_hi_i32 v3, v3, s0
+; GFX9-NEXT:    v_mul_hi_i32 v0, v0, s4
+; GFX9-NEXT:    v_mul_hi_i32 v1, v1, s4
+; GFX9-NEXT:    v_mul_hi_i32 v2, v2, s4
+; GFX9-NEXT:    v_mul_hi_i32 v3, v3, s4
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v4, 31, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v0, 12, v0
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v5, 31, v1
@@ -2304,7 +2298,7 @@ define amdgpu_kernel void @scalarize_mulhs_4xi32(<4 x i32> addrspace(1)* nocaptu
 ; GFX9-NEXT:    v_add_u32_e32 v1, v1, v5
 ; GFX9-NEXT:    v_add_u32_e32 v2, v2, v6
 ; GFX9-NEXT:    v_add_u32_e32 v3, v3, v7
-; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
+; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
 ; GFX9-NEXT:    s_endpgm
 ;
 ; EG-LABEL: scalarize_mulhs_4xi32:
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index e0a469e11ed59..07df1108df56c 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -1867,56 +1867,56 @@ define i64 @v_test_sdiv_pow2_k_den_i64(i64 %x) {
 define amdgpu_kernel void @s_test_sdiv24_k_num_i64(i64 addrspace(1)* %out, i64 %x) {
 ; GCN-LABEL: s_test_sdiv24_k_num_i64:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GCN-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_ashr_i64 s[6:7], s[6:7], 40
-; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s6
-; GCN-NEXT:    s_mov_b32 s7, 0x41c00000
-; GCN-NEXT:    s_mov_b32 s0, s4
-; GCN-NEXT:    s_ashr_i32 s4, s6, 30
+; GCN-NEXT:    s_ashr_i64 s[2:3], s[2:3], 40
+; GCN-NEXT:    v_cvt_f32_i32_e32 v0, s2
+; GCN-NEXT:    s_mov_b32 s3, 0x41c00000
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_ashr_i32 s0, s2, 30
 ; GCN-NEXT:    v_rcp_iflag_f32_e32 v1, v0
-; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_or_b32 s6, s4, 1
-; GCN-NEXT:    v_mul_f32_e32 v1, s7, v1
+; GCN-NEXT:    s_mov_b32 s5, s1
+; GCN-NEXT:    s_or_b32 s2, s0, 1
+; GCN-NEXT:    v_mul_f32_e32 v1, s3, v1
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mad_f32 v2, -v1, v0, s7
+; GCN-NEXT:    v_mad_f32 v2, -v1, v0, s3
 ; GCN-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GCN-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, |v0|
-; GCN-NEXT:    s_cmp_lg_u32 s4, 0
-; GCN-NEXT:    s_cselect_b32 s4, s6, 0
-; GCN-NEXT:    v_add_i32_e32 v0, vcc, s4, v1
+; GCN-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v2|, |v0|
+; GCN-NEXT:    s_cmp_lg_u32 s0, 0
+; GCN-NEXT:    s_cselect_b32 s0, s2, 0
+; GCN-NEXT:    v_add_i32_e32 v0, vcc, s0, v1
 ; GCN-NEXT:    v_bfe_i32 v0, v0, 0, 24
 ; GCN-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
-; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GCN-NEXT:    s_endpgm
 ;
 ; GCN-IR-LABEL: s_test_sdiv24_k_num_i64:
 ; GCN-IR:       ; %bb.0:
-; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-IR-NEXT:    s_mov_b32 s2, -1
+; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-IR-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-IR-NEXT:    s_mov_b32 s6, -1
 ; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-IR-NEXT:    s_ashr_i64 s[6:7], s[6:7], 40
-; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s6
-; GCN-IR-NEXT:    s_mov_b32 s7, 0x41c00000
-; GCN-IR-NEXT:    s_mov_b32 s0, s4
-; GCN-IR-NEXT:    s_ashr_i32 s4, s6, 30
+; GCN-IR-NEXT:    s_ashr_i64 s[2:3], s[2:3], 40
+; GCN-IR-NEXT:    v_cvt_f32_i32_e32 v0, s2
+; GCN-IR-NEXT:    s_mov_b32 s3, 0x41c00000
+; GCN-IR-NEXT:    s_mov_b32 s4, s0
+; GCN-IR-NEXT:    s_ashr_i32 s0, s2, 30
 ; GCN-IR-NEXT:    v_rcp_iflag_f32_e32 v1, v0
-; GCN-IR-NEXT:    s_mov_b32 s1, s5
-; GCN-IR-NEXT:    s_or_b32 s6, s4, 1
-; GCN-IR-NEXT:    v_mul_f32_e32 v1, s7, v1
+; GCN-IR-NEXT:    s_mov_b32 s5, s1
+; GCN-IR-NEXT:    s_or_b32 s2, s0, 1
+; GCN-IR-NEXT:    v_mul_f32_e32 v1, s3, v1
 ; GCN-IR-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-IR-NEXT:    v_mad_f32 v2, -v1, v0, s7
+; GCN-IR-NEXT:    v_mad_f32 v2, -v1, v0, s3
 ; GCN-IR-NEXT:    v_cvt_i32_f32_e32 v1, v1
-; GCN-IR-NEXT:    v_cmp_ge_f32_e64 s[4:5], |v2|, |v0|
-; GCN-IR-NEXT:    s_cmp_lg_u32 s4, 0
-; GCN-IR-NEXT:    s_cselect_b32 s4, s6, 0
-; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, s4, v1
+; GCN-IR-NEXT:    v_cmp_ge_f32_e64 s[0:1], |v2|, |v0|
+; GCN-IR-NEXT:    s_cmp_lg_u32 s0, 0
+; GCN-IR-NEXT:    s_cselect_b32 s0, s2, 0
+; GCN-IR-NEXT:    v_add_i32_e32 v0, vcc, s0, v1
 ; GCN-IR-NEXT:    v_bfe_i32 v0, v0, 0, 24
 ; GCN-IR-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
-; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GCN-IR-NEXT:    s_endpgm
   %x.shr = ashr i64 %x, 40
   %result = sdiv i64 24, %x.shr
diff --git a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
index ae836e447cc56..7850d92bf473c 100644
--- a/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdwa-peephole.ll
@@ -73,7 +73,7 @@ entry:
 ; GCN-LABEL: {{^}}mul_v2i16:
 ; NOSDWA: v_lshrrev_b32_e32 v[[DST0:[0-9]+]], 16, v{{[0-9]+}}
 ; NOSDWA: v_lshrrev_b32_e32 v[[DST1:[0-9]+]], 16, v{{[0-9]+}}
-; NOSDWA: v_mul_u32_u24_e32 v[[DST_MUL:[0-9]+]], v[[DST1]], v[[DST0]]
+; NOSDWA: v_mul_u32_u24_e32 v[[DST_MUL:[0-9]+]], v[[DST0]], v[[DST1]]
 ; NOSDWA: v_lshlrev_b32_e32 v[[DST_SHL:[0-9]+]], 16, v[[DST_MUL]]
 ; NOSDWA: v_or_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}, v[[DST_SHL]]
 ; NOSDWA-NOT: v_mul_u32_u24_sdwa
diff --git a/llvm/test/CodeGen/AMDGPU/select.f16.ll b/llvm/test/CodeGen/AMDGPU/select.f16.ll
index 5f4c49d970970..80ebcd54bfb20 100644
--- a/llvm/test/CodeGen/AMDGPU/select.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/select.f16.ll
@@ -52,25 +52,25 @@ define amdgpu_kernel void @select_f16(
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_mov_b32 s18, s2
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
-; VI-NEXT:    s_mov_b32 s16, s8
-; VI-NEXT:    s_mov_b32 s17, s9
+; VI-NEXT:    s_mov_b32 s16, s6
+; VI-NEXT:    s_mov_b32 s17, s7
+; VI-NEXT:    s_mov_b32 s19, s3
+; VI-NEXT:    s_mov_b32 s20, s8
+; VI-NEXT:    s_mov_b32 s21, s9
 ; VI-NEXT:    s_mov_b32 s8, s10
 ; VI-NEXT:    s_mov_b32 s9, s11
-; VI-NEXT:    s_mov_b32 s19, s3
+; VI-NEXT:    s_mov_b32 s22, s2
+; VI-NEXT:    s_mov_b32 s23, s3
 ; VI-NEXT:    s_mov_b32 s10, s2
 ; VI-NEXT:    s_mov_b32 s11, s3
 ; VI-NEXT:    s_mov_b32 s14, s2
 ; VI-NEXT:    s_mov_b32 s15, s3
-; VI-NEXT:    buffer_load_ushort v0, off, s[4:7], 0
-; VI-NEXT:    buffer_load_ushort v1, off, s[16:19], 0
+; VI-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
+; VI-NEXT:    buffer_load_ushort v1, off, s[20:23], 0
 ; VI-NEXT:    buffer_load_ushort v2, off, s[8:11], 0
 ; VI-NEXT:    buffer_load_ushort v3, off, s[12:15], 0
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
 ; VI-NEXT:    s_waitcnt vmcnt(2)
 ; VI-NEXT:    v_cmp_lt_f16_e32 vcc, v0, v1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
@@ -137,21 +137,21 @@ define amdgpu_kernel void @select_f16_imm_a(
 ; VI-NEXT:    s_mov_b32 s14, s10
 ; VI-NEXT:    s_mov_b32 s15, s11
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s8, s0
-; VI-NEXT:    s_mov_b32 s9, s1
-; VI-NEXT:    s_mov_b32 s0, s2
-; VI-NEXT:    s_mov_b32 s1, s3
-; VI-NEXT:    s_mov_b32 s2, s10
-; VI-NEXT:    s_mov_b32 s3, s11
-; VI-NEXT:    s_mov_b32 s12, s4
-; VI-NEXT:    s_mov_b32 s13, s5
+; VI-NEXT:    s_mov_b32 s12, s2
+; VI-NEXT:    s_mov_b32 s13, s3
+; VI-NEXT:    s_mov_b32 s16, s4
+; VI-NEXT:    s_mov_b32 s17, s5
 ; VI-NEXT:    s_mov_b32 s4, s6
 ; VI-NEXT:    s_mov_b32 s5, s7
+; VI-NEXT:    s_mov_b32 s18, s10
+; VI-NEXT:    s_mov_b32 s19, s11
 ; VI-NEXT:    s_mov_b32 s6, s10
 ; VI-NEXT:    s_mov_b32 s7, s11
-; VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
-; VI-NEXT:    buffer_load_ushort v1, off, s[12:15], 0
+; VI-NEXT:    buffer_load_ushort v0, off, s[12:15], 0
+; VI-NEXT:    buffer_load_ushort v1, off, s[16:19], 0
 ; VI-NEXT:    buffer_load_ushort v2, off, s[4:7], 0
+; VI-NEXT:    s_mov_b32 s8, s0
+; VI-NEXT:    s_mov_b32 s9, s1
 ; VI-NEXT:    s_waitcnt vmcnt(2)
 ; VI-NEXT:    v_cmp_lt_f16_e32 vcc, 0.5, v0
 ; VI-NEXT:    s_waitcnt vmcnt(0)
@@ -216,21 +216,21 @@ define amdgpu_kernel void @select_f16_imm_b(
 ; VI-NEXT:    s_mov_b32 s14, s10
 ; VI-NEXT:    s_mov_b32 s15, s11
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s8, s0
-; VI-NEXT:    s_mov_b32 s9, s1
-; VI-NEXT:    s_mov_b32 s0, s2
-; VI-NEXT:    s_mov_b32 s1, s3
-; VI-NEXT:    s_mov_b32 s2, s10
-; VI-NEXT:    s_mov_b32 s3, s11
-; VI-NEXT:    s_mov_b32 s12, s4
-; VI-NEXT:    s_mov_b32 s13, s5
+; VI-NEXT:    s_mov_b32 s12, s2
+; VI-NEXT:    s_mov_b32 s13, s3
+; VI-NEXT:    s_mov_b32 s16, s4
+; VI-NEXT:    s_mov_b32 s17, s5
 ; VI-NEXT:    s_mov_b32 s4, s6
 ; VI-NEXT:    s_mov_b32 s5, s7
+; VI-NEXT:    s_mov_b32 s18, s10
+; VI-NEXT:    s_mov_b32 s19, s11
 ; VI-NEXT:    s_mov_b32 s6, s10
 ; VI-NEXT:    s_mov_b32 s7, s11
-; VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
-; VI-NEXT:    buffer_load_ushort v1, off, s[12:15], 0
+; VI-NEXT:    buffer_load_ushort v0, off, s[12:15], 0
+; VI-NEXT:    buffer_load_ushort v1, off, s[16:19], 0
 ; VI-NEXT:    buffer_load_ushort v2, off, s[4:7], 0
+; VI-NEXT:    s_mov_b32 s8, s0
+; VI-NEXT:    s_mov_b32 s9, s1
 ; VI-NEXT:    s_waitcnt vmcnt(2)
 ; VI-NEXT:    v_cmp_gt_f16_e32 vcc, 0.5, v0
 ; VI-NEXT:    s_waitcnt vmcnt(0)
@@ -295,26 +295,26 @@ define amdgpu_kernel void @select_f16_imm_c(
 ; VI-NEXT:    s_mov_b32 s14, s10
 ; VI-NEXT:    s_mov_b32 s15, s11
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s8, s0
-; VI-NEXT:    s_mov_b32 s9, s1
-; VI-NEXT:    s_mov_b32 s0, s2
-; VI-NEXT:    s_mov_b32 s1, s3
-; VI-NEXT:    s_mov_b32 s2, s10
-; VI-NEXT:    s_mov_b32 s3, s11
-; VI-NEXT:    s_mov_b32 s12, s4
-; VI-NEXT:    s_mov_b32 s13, s5
-; VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
-; VI-NEXT:    buffer_load_ushort v1, off, s[12:15], 0
+; VI-NEXT:    s_mov_b32 s12, s2
+; VI-NEXT:    s_mov_b32 s13, s3
+; VI-NEXT:    s_mov_b32 s16, s4
+; VI-NEXT:    s_mov_b32 s17, s5
 ; VI-NEXT:    s_mov_b32 s4, s6
 ; VI-NEXT:    s_mov_b32 s5, s7
+; VI-NEXT:    s_mov_b32 s18, s10
+; VI-NEXT:    s_mov_b32 s19, s11
 ; VI-NEXT:    s_mov_b32 s6, s10
 ; VI-NEXT:    s_mov_b32 s7, s11
-; VI-NEXT:    buffer_load_ushort v3, off, s[4:7], 0
-; VI-NEXT:    v_mov_b32_e32 v2, 0x3800
+; VI-NEXT:    buffer_load_ushort v0, off, s[12:15], 0
+; VI-NEXT:    buffer_load_ushort v1, off, s[16:19], 0
+; VI-NEXT:    buffer_load_ushort v2, off, s[4:7], 0
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3800
+; VI-NEXT:    s_mov_b32 s8, s0
+; VI-NEXT:    s_mov_b32 s9, s1
 ; VI-NEXT:    s_waitcnt vmcnt(1)
 ; VI-NEXT:    v_cmp_nlt_f16_e32 vcc, v0, v1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
 ; VI-NEXT:    buffer_store_short v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
     half addrspace(1)* %r,
@@ -375,26 +375,26 @@ define amdgpu_kernel void @select_f16_imm_d(
 ; VI-NEXT:    s_mov_b32 s14, s10
 ; VI-NEXT:    s_mov_b32 s15, s11
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s8, s0
-; VI-NEXT:    s_mov_b32 s9, s1
-; VI-NEXT:    s_mov_b32 s0, s2
-; VI-NEXT:    s_mov_b32 s1, s3
-; VI-NEXT:    s_mov_b32 s2, s10
-; VI-NEXT:    s_mov_b32 s3, s11
-; VI-NEXT:    s_mov_b32 s12, s4
-; VI-NEXT:    s_mov_b32 s13, s5
-; VI-NEXT:    buffer_load_ushort v0, off, s[0:3], 0
-; VI-NEXT:    buffer_load_ushort v1, off, s[12:15], 0
+; VI-NEXT:    s_mov_b32 s12, s2
+; VI-NEXT:    s_mov_b32 s13, s3
+; VI-NEXT:    s_mov_b32 s16, s4
+; VI-NEXT:    s_mov_b32 s17, s5
 ; VI-NEXT:    s_mov_b32 s4, s6
 ; VI-NEXT:    s_mov_b32 s5, s7
+; VI-NEXT:    s_mov_b32 s18, s10
+; VI-NEXT:    s_mov_b32 s19, s11
 ; VI-NEXT:    s_mov_b32 s6, s10
 ; VI-NEXT:    s_mov_b32 s7, s11
-; VI-NEXT:    buffer_load_ushort v3, off, s[4:7], 0
-; VI-NEXT:    v_mov_b32_e32 v2, 0x3800
+; VI-NEXT:    buffer_load_ushort v0, off, s[12:15], 0
+; VI-NEXT:    buffer_load_ushort v1, off, s[16:19], 0
+; VI-NEXT:    buffer_load_ushort v2, off, s[4:7], 0
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3800
+; VI-NEXT:    s_mov_b32 s8, s0
+; VI-NEXT:    s_mov_b32 s9, s1
 ; VI-NEXT:    s_waitcnt vmcnt(1)
 ; VI-NEXT:    v_cmp_lt_f16_e32 vcc, v0, v1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v3, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
 ; VI-NEXT:    buffer_store_short v0, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
     half addrspace(1)* %r,
@@ -474,25 +474,25 @@ define amdgpu_kernel void @select_v2f16(
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_mov_b32 s18, s2
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
-; VI-NEXT:    s_mov_b32 s16, s8
-; VI-NEXT:    s_mov_b32 s17, s9
+; VI-NEXT:    s_mov_b32 s16, s6
+; VI-NEXT:    s_mov_b32 s17, s7
+; VI-NEXT:    s_mov_b32 s19, s3
+; VI-NEXT:    s_mov_b32 s20, s8
+; VI-NEXT:    s_mov_b32 s21, s9
 ; VI-NEXT:    s_mov_b32 s8, s10
 ; VI-NEXT:    s_mov_b32 s9, s11
-; VI-NEXT:    s_mov_b32 s19, s3
+; VI-NEXT:    s_mov_b32 s22, s2
+; VI-NEXT:    s_mov_b32 s23, s3
 ; VI-NEXT:    s_mov_b32 s10, s2
 ; VI-NEXT:    s_mov_b32 s11, s3
 ; VI-NEXT:    s_mov_b32 s14, s2
 ; VI-NEXT:    s_mov_b32 s15, s3
-; VI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
-; VI-NEXT:    buffer_load_dword v1, off, s[16:19], 0
+; VI-NEXT:    buffer_load_dword v0, off, s[16:19], 0
+; VI-NEXT:    buffer_load_dword v1, off, s[20:23], 0
 ; VI-NEXT:    buffer_load_dword v2, off, s[12:15], 0
 ; VI-NEXT:    buffer_load_dword v3, off, s[8:11], 0
+; VI-NEXT:    s_mov_b32 s0, s4
+; VI-NEXT:    s_mov_b32 s1, s5
 ; VI-NEXT:    s_waitcnt vmcnt(3)
 ; VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
 ; VI-NEXT:    s_waitcnt vmcnt(2)
@@ -534,15 +534,15 @@ define amdgpu_kernel void @select_v2f16_imm_a(
 ; SI-NEXT:    s_mov_b32 s15, s11
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b32 s12, s2
+; SI-NEXT:    s_mov_b32 s13, s3
 ; SI-NEXT:    s_mov_b32 s16, s4
 ; SI-NEXT:    s_mov_b32 s17, s5
 ; SI-NEXT:    s_mov_b32 s4, s6
 ; SI-NEXT:    s_mov_b32 s5, s7
-; SI-NEXT:    s_mov_b32 s13, s3
-; SI-NEXT:    s_mov_b32 s6, s10
-; SI-NEXT:    s_mov_b32 s7, s11
 ; SI-NEXT:    s_mov_b32 s18, s10
 ; SI-NEXT:    s_mov_b32 s19, s11
+; SI-NEXT:    s_mov_b32 s6, s10
+; SI-NEXT:    s_mov_b32 s7, s11
 ; SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0
 ; SI-NEXT:    buffer_load_dword v1, off, s[16:19], 0
 ; SI-NEXT:    buffer_load_dword v2, off, s[4:7], 0
@@ -580,22 +580,22 @@ define amdgpu_kernel void @select_v2f16_imm_a(
 ; VI-NEXT:    s_mov_b32 s14, s10
 ; VI-NEXT:    s_mov_b32 s15, s11
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s8, s0
-; VI-NEXT:    s_mov_b32 s9, s1
-; VI-NEXT:    s_mov_b32 s0, s2
-; VI-NEXT:    s_mov_b32 s1, s3
-; VI-NEXT:    s_mov_b32 s12, s4
-; VI-NEXT:    s_mov_b32 s13, s5
+; VI-NEXT:    s_mov_b32 s12, s2
+; VI-NEXT:    s_mov_b32 s13, s3
+; VI-NEXT:    s_mov_b32 s16, s4
+; VI-NEXT:    s_mov_b32 s17, s5
 ; VI-NEXT:    s_mov_b32 s4, s6
 ; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s2, s10
-; VI-NEXT:    s_mov_b32 s3, s11
+; VI-NEXT:    s_mov_b32 s18, s10
+; VI-NEXT:    s_mov_b32 s19, s11
 ; VI-NEXT:    s_mov_b32 s6, s10
 ; VI-NEXT:    s_mov_b32 s7, s11
-; VI-NEXT:    buffer_load_dword v0, off, s[0:3], 0
-; VI-NEXT:    buffer_load_dword v1, off, s[12:15], 0
+; VI-NEXT:    buffer_load_dword v0, off, s[12:15], 0
+; VI-NEXT:    buffer_load_dword v1, off, s[16:19], 0
 ; VI-NEXT:    buffer_load_dword v2, off, s[4:7], 0
-; VI-NEXT:    s_movk_i32 s0, 0x3900
+; VI-NEXT:    s_movk_i32 s2, 0x3900
+; VI-NEXT:    s_mov_b32 s8, s0
+; VI-NEXT:    s_mov_b32 s9, s1
 ; VI-NEXT:    s_waitcnt vmcnt(2)
 ; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; VI-NEXT:    v_cmp_lt_f16_e32 vcc, 0.5, v0
@@ -603,7 +603,7 @@ define amdgpu_kernel void @select_v2f16_imm_a(
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; VI-NEXT:    v_cmp_lt_f16_e32 vcc, s0, v3
+; VI-NEXT:    v_cmp_lt_f16_e32 vcc, s2, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
@@ -633,15 +633,15 @@ define amdgpu_kernel void @select_v2f16_imm_b(
 ; SI-NEXT:    s_mov_b32 s15, s11
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
 ; SI-NEXT:    s_mov_b32 s12, s2
+; SI-NEXT:    s_mov_b32 s13, s3
 ; SI-NEXT:    s_mov_b32 s16, s4
 ; SI-NEXT:    s_mov_b32 s17, s5
 ; SI-NEXT:    s_mov_b32 s4, s6
 ; SI-NEXT:    s_mov_b32 s5, s7
-; SI-NEXT:    s_mov_b32 s13, s3
-; SI-NEXT:    s_mov_b32 s6, s10
-; SI-NEXT:    s_mov_b32 s7, s11
 ; SI-NEXT:    s_mov_b32 s18, s10
 ; SI-NEXT:    s_mov_b32 s19, s11
+; SI-NEXT:    s_mov_b32 s6, s10
+; SI-NEXT:    s_mov_b32 s7, s11
 ; SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0
 ; SI-NEXT:    buffer_load_dword v1, off, s[16:19], 0
 ; SI-NEXT:    buffer_load_dword v2, off, s[4:7], 0
@@ -679,22 +679,22 @@ define amdgpu_kernel void @select_v2f16_imm_b(
 ; VI-NEXT:    s_mov_b32 s14, s10
 ; VI-NEXT:    s_mov_b32 s15, s11
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s8, s0
-; VI-NEXT:    s_mov_b32 s9, s1
-; VI-NEXT:    s_mov_b32 s0, s2
-; VI-NEXT:    s_mov_b32 s1, s3
-; VI-NEXT:    s_mov_b32 s12, s4
-; VI-NEXT:    s_mov_b32 s13, s5
+; VI-NEXT:    s_mov_b32 s12, s2
+; VI-NEXT:    s_mov_b32 s13, s3
+; VI-NEXT:    s_mov_b32 s16, s4
+; VI-NEXT:    s_mov_b32 s17, s5
 ; VI-NEXT:    s_mov_b32 s4, s6
 ; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s2, s10
-; VI-NEXT:    s_mov_b32 s3, s11
+; VI-NEXT:    s_mov_b32 s18, s10
+; VI-NEXT:    s_mov_b32 s19, s11
 ; VI-NEXT:    s_mov_b32 s6, s10
 ; VI-NEXT:    s_mov_b32 s7, s11
-; VI-NEXT:    buffer_load_dword v0, off, s[0:3], 0
-; VI-NEXT:    buffer_load_dword v1, off, s[12:15], 0
+; VI-NEXT:    buffer_load_dword v0, off, s[12:15], 0
+; VI-NEXT:    buffer_load_dword v1, off, s[16:19], 0
 ; VI-NEXT:    buffer_load_dword v2, off, s[4:7], 0
-; VI-NEXT:    s_movk_i32 s0, 0x3900
+; VI-NEXT:    s_movk_i32 s2, 0x3900
+; VI-NEXT:    s_mov_b32 s8, s0
+; VI-NEXT:    s_mov_b32 s9, s1
 ; VI-NEXT:    s_waitcnt vmcnt(2)
 ; VI-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
 ; VI-NEXT:    v_cmp_gt_f16_e32 vcc, 0.5, v0
@@ -702,7 +702,7 @@ define amdgpu_kernel void @select_v2f16_imm_b(
 ; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
 ; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; VI-NEXT:    v_cmp_gt_f16_e32 vcc, s0, v3
+; VI-NEXT:    v_cmp_gt_f16_e32 vcc, s2, v3
 ; VI-NEXT:    v_cndmask_b32_e32 v1, v2, v1, vcc
 ; VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
@@ -731,38 +731,39 @@ define amdgpu_kernel void @select_v2f16_imm_c(
 ; SI-NEXT:    s_mov_b32 s14, s10
 ; SI-NEXT:    s_mov_b32 s15, s11
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s16, s4
-; SI-NEXT:    s_mov_b32 s17, s5
 ; SI-NEXT:    s_mov_b32 s12, s2
 ; SI-NEXT:    s_mov_b32 s13, s3
-; SI-NEXT:    s_mov_b32 s18, s10
-; SI-NEXT:    s_mov_b32 s19, s11
+; SI-NEXT:    s_mov_b32 s16, s4
+; SI-NEXT:    s_mov_b32 s17, s5
 ; SI-NEXT:    s_mov_b32 s4, s6
 ; SI-NEXT:    s_mov_b32 s5, s7
+; SI-NEXT:    s_mov_b32 s18, s10
+; SI-NEXT:    s_mov_b32 s19, s11
 ; SI-NEXT:    s_mov_b32 s6, s10
 ; SI-NEXT:    s_mov_b32 s7, s11
 ; SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0
-; SI-NEXT:    buffer_load_dword v1, off, s[4:7], 0
-; SI-NEXT:    buffer_load_dword v3, off, s[16:19], 0
-; SI-NEXT:    v_mov_b32_e32 v2, 0x3f200000
+; SI-NEXT:    buffer_load_dword v1, off, s[16:19], 0
+; SI-NEXT:    buffer_load_dword v2, off, s[4:7], 0
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3f200000
 ; SI-NEXT:    s_mov_b32 s8, s0
 ; SI-NEXT:    s_mov_b32 s9, s1
 ; SI-NEXT:    s_waitcnt vmcnt(2)
 ; SI-NEXT:    v_cvt_f32_f16_e32 v4, v0
+; SI-NEXT:    s_waitcnt vmcnt(1)
+; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
 ; SI-NEXT:    v_lshrrev_b32_e32 v0, 16, v0
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
-; SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
 ; SI-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, v0, v5
-; SI-NEXT:    v_cndmask_b32_e32 v0, v2, v6, vcc
-; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, v4, v3
+; SI-NEXT:    v_cndmask_b32_e32 v0, v3, v6, vcc
+; SI-NEXT:    v_cmp_nlt_f32_e32 vcc, v4, v1
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_cndmask_b32_e32 v1, 0.5, v1, vcc
+; SI-NEXT:    v_cndmask_b32_e32 v1, 0.5, v2, vcc
 ; SI-NEXT:    v_cvt_f16_f32_e32 v1, v1
 ; SI-NEXT:    v_lshlrev_b32_e32 v0, 16, v0
 ; SI-NEXT:    v_or_b32_e32 v0, v1, v0
@@ -777,32 +778,33 @@ define amdgpu_kernel void @select_v2f16_imm_c(
 ; VI-NEXT:    s_mov_b32 s14, s10
 ; VI-NEXT:    s_mov_b32 s15, s11
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s8, s0
-; VI-NEXT:    s_mov_b32 s9, s1
-; VI-NEXT:    s_mov_b32 s0, s2
-; VI-NEXT:    s_mov_b32 s1, s3
-; VI-NEXT:    s_mov_b32 s12, s4
-; VI-NEXT:    s_mov_b32 s13, s5
-; VI-NEXT:    s_mov_b32 s2, s10
-; VI-NEXT:    s_mov_b32 s3, s11
+; VI-NEXT:    s_mov_b32 s12, s2
+; VI-NEXT:    s_mov_b32 s13, s3
+; VI-NEXT:    s_mov_b32 s16, s4
+; VI-NEXT:    s_mov_b32 s17, s5
 ; VI-NEXT:    s_mov_b32 s4, s6
 ; VI-NEXT:    s_mov_b32 s5, s7
+; VI-NEXT:    s_mov_b32 s18, s10
+; VI-NEXT:    s_mov_b32 s19, s11
 ; VI-NEXT:    s_mov_b32 s6, s10
 ; VI-NEXT:    s_mov_b32 s7, s11
-; VI-NEXT:    buffer_load_dword v0, off, s[0:3], 0
-; VI-NEXT:    buffer_load_dword v1, off, s[4:7], 0
-; VI-NEXT:    buffer_load_dword v4, off, s[12:15], 0
-; VI-NEXT:    v_mov_b32_e32 v2, 0x3800
-; VI-NEXT:    v_mov_b32_e32 v3, 0x3900
+; VI-NEXT:    buffer_load_dword v0, off, s[12:15], 0
+; VI-NEXT:    buffer_load_dword v1, off, s[16:19], 0
+; VI-NEXT:    buffer_load_dword v2, off, s[4:7], 0
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3800
+; VI-NEXT:    v_mov_b32_e32 v4, 0x3900
+; VI-NEXT:    s_mov_b32 s8, s0
+; VI-NEXT:    s_mov_b32 s9, s1
 ; VI-NEXT:    s_waitcnt vmcnt(2)
 ; VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; VI-NEXT:    s_waitcnt vmcnt(1)
+; VI-NEXT:    v_cmp_nlt_f16_e32 vcc, v0, v1
+; VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_nlt_f16_e32 vcc, v0, v4
-; VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
 ; VI-NEXT:    v_cmp_nlt_f16_e32 vcc, v6, v5
-; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
@@ -830,41 +832,41 @@ define amdgpu_kernel void @select_v2f16_imm_d(
 ; SI-NEXT:    s_mov_b32 s14, s10
 ; SI-NEXT:    s_mov_b32 s15, s11
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_mov_b32 s16, s4
-; SI-NEXT:    s_mov_b32 s17, s5
 ; SI-NEXT:    s_mov_b32 s12, s2
 ; SI-NEXT:    s_mov_b32 s13, s3
-; SI-NEXT:    s_mov_b32 s18, s10
-; SI-NEXT:    s_mov_b32 s19, s11
+; SI-NEXT:    s_mov_b32 s16, s4
+; SI-NEXT:    s_mov_b32 s17, s5
 ; SI-NEXT:    s_mov_b32 s4, s6
 ; SI-NEXT:    s_mov_b32 s5, s7
+; SI-NEXT:    s_mov_b32 s18, s10
+; SI-NEXT:    s_mov_b32 s19, s11
 ; SI-NEXT:    s_mov_b32 s6, s10
 ; SI-NEXT:    s_mov_b32 s7, s11
 ; SI-NEXT:    buffer_load_dword v0, off, s[12:15], 0
-; SI-NEXT:    buffer_load_dword v1, off, s[4:7], 0
-; SI-NEXT:    buffer_load_dword v3, off, s[16:19], 0
-; SI-NEXT:    v_mov_b32_e32 v2, 0x3f200000
+; SI-NEXT:    buffer_load_dword v1, off, s[16:19], 0
+; SI-NEXT:    buffer_load_dword v2, off, s[4:7], 0
+; SI-NEXT:    v_mov_b32_e32 v3, 0x3f200000
 ; SI-NEXT:    s_mov_b32 s8, s0
 ; SI-NEXT:    s_mov_b32 s9, s1
 ; SI-NEXT:    s_waitcnt vmcnt(2)
 ; SI-NEXT:    v_lshrrev_b32_e32 v4, 16, v0
 ; SI-NEXT:    s_waitcnt vmcnt(1)
-; SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v1
+; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_lshrrev_b32_e32 v5, 16, v3
+; SI-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
 ; SI-NEXT:    v_cvt_f32_f16_e32 v4, v4
 ; SI-NEXT:    v_cvt_f32_f16_e32 v5, v5
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-NEXT:    v_cvt_f32_f16_e32 v6, v6
-; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-NEXT:    v_cmp_lt_f32_e32 vcc, v4, v5
-; SI-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
-; SI-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v3
-; SI-NEXT:    v_cndmask_b32_e32 v0, 0.5, v1, vcc
-; SI-NEXT:    v_cvt_f16_f32_e32 v2, v2
+; SI-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; SI-NEXT:    v_cmp_lt_f32_e32 vcc, v0, v1
+; SI-NEXT:    v_cndmask_b32_e32 v0, 0.5, v2, vcc
+; SI-NEXT:    v_cvt_f16_f32_e32 v3, v3
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v2
+; SI-NEXT:    v_lshlrev_b32_e32 v1, 16, v3
 ; SI-NEXT:    v_or_b32_e32 v0, v0, v1
 ; SI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
 ; SI-NEXT:    s_endpgm
@@ -877,32 +879,33 @@ define amdgpu_kernel void @select_v2f16_imm_d(
 ; VI-NEXT:    s_mov_b32 s14, s10
 ; VI-NEXT:    s_mov_b32 s15, s11
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_mov_b32 s8, s0
-; VI-NEXT:    s_mov_b32 s9, s1
-; VI-NEXT:    s_mov_b32 s0, s2
-; VI-NEXT:    s_mov_b32 s1, s3
-; VI-NEXT:    s_mov_b32 s12, s4
-; VI-NEXT:    s_mov_b32 s13, s5
-; VI-NEXT:    s_mov_b32 s2, s10
-; VI-NEXT:    s_mov_b32 s3, s11
+; VI-NEXT:    s_mov_b32 s12, s2
+; VI-NEXT:    s_mov_b32 s13, s3
+; VI-NEXT:    s_mov_b32 s16, s4
+; VI-NEXT:    s_mov_b32 s17, s5
 ; VI-NEXT:    s_mov_b32 s4, s6
 ; VI-NEXT:    s_mov_b32 s5, s7
+; VI-NEXT:    s_mov_b32 s18, s10
+; VI-NEXT:    s_mov_b32 s19, s11
 ; VI-NEXT:    s_mov_b32 s6, s10
 ; VI-NEXT:    s_mov_b32 s7, s11
-; VI-NEXT:    buffer_load_dword v0, off, s[0:3], 0
-; VI-NEXT:    buffer_load_dword v1, off, s[4:7], 0
-; VI-NEXT:    buffer_load_dword v4, off, s[12:15], 0
-; VI-NEXT:    v_mov_b32_e32 v2, 0x3800
-; VI-NEXT:    v_mov_b32_e32 v3, 0x3900
+; VI-NEXT:    buffer_load_dword v0, off, s[12:15], 0
+; VI-NEXT:    buffer_load_dword v1, off, s[16:19], 0
+; VI-NEXT:    buffer_load_dword v2, off, s[4:7], 0
+; VI-NEXT:    v_mov_b32_e32 v3, 0x3800
+; VI-NEXT:    v_mov_b32_e32 v4, 0x3900
+; VI-NEXT:    s_mov_b32 s8, s0
+; VI-NEXT:    s_mov_b32 s9, s1
 ; VI-NEXT:    s_waitcnt vmcnt(2)
 ; VI-NEXT:    v_lshrrev_b32_e32 v6, 16, v0
+; VI-NEXT:    s_waitcnt vmcnt(1)
+; VI-NEXT:    v_cmp_lt_f16_e32 vcc, v0, v1
+; VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v1
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_cmp_lt_f16_e32 vcc, v0, v4
-; VI-NEXT:    v_lshrrev_b32_e32 v5, 16, v4
-; VI-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
-; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v1
+; VI-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; VI-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
 ; VI-NEXT:    v_cmp_lt_f16_e32 vcc, v6, v5
-; VI-NEXT:    v_cndmask_b32_e32 v1, v3, v1, vcc
+; VI-NEXT:    v_cndmask_b32_e32 v1, v4, v1, vcc
 ; VI-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; VI-NEXT:    v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 ; VI-NEXT:    buffer_store_dword v0, off, s[8:11], 0
diff --git a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
index b000e9449e7eb..cdaff4f17c683 100644
--- a/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
+++ b/llvm/test/CodeGen/AMDGPU/shift-i64-opts.ll
@@ -54,8 +54,8 @@ define amdgpu_kernel void @lshr_i64_32(i64 addrspace(1)* %out, i64 addrspace(1)*
 ; after 64-bit shift is split.
 
 ; GCN-LABEL: {{^}}lshr_and_i64_35:
-; GCN: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
-; GCN: buffer_load_dword v[[LO:[0-9]+]]
+; GCN-DAG: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0{{$}}
+; GCN-DAG: buffer_load_dword v[[LO:[0-9]+]]
 ; GCN: v_bfe_u32 v[[BFE:[0-9]+]], v[[LO]], 8, 23
 ; GCN: buffer_store_dwordx2 v{{\[}}[[BFE]]:[[ZERO]]{{\]}}
 define amdgpu_kernel void @lshr_and_i64_35(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
diff --git a/llvm/test/CodeGen/AMDGPU/shl.ll b/llvm/test/CodeGen/AMDGPU/shl.ll
index da15cff4c91a1..dfb8874ac019c 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.ll
@@ -13,14 +13,14 @@ define amdgpu_kernel void @shl_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> add
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s10, s2
+; GCN-NEXT:    s_mov_b32 s11, s3
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s6
+; GCN-NEXT:    s_mov_b32 s9, s7
+; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
 ; GCN-NEXT:    s_mov_b32 s0, s4
 ; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s2
-; GCN-NEXT:    s_mov_b32 s7, s3
-; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_lshl_b32_e32 v1, v1, v3
 ; GCN-NEXT:    v_lshl_b32_e32 v0, v0, v2
@@ -59,15 +59,15 @@ define amdgpu_kernel void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> add
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s10, s2
+; GCN-NEXT:    s_mov_b32 s11, s3
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s6
+; GCN-NEXT:    s_mov_b32 s9, s7
+; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
+; GCN-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
 ; GCN-NEXT:    s_mov_b32 s0, s4
 ; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s2
-; GCN-NEXT:    s_mov_b32 s7, s3
-; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
-; GCN-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_lshl_b32_e32 v3, v3, v7
 ; GCN-NEXT:    v_lshl_b32_e32 v2, v2, v6
@@ -411,23 +411,23 @@ define amdgpu_kernel void @shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> add
 ; GCN-NEXT:    s_mov_b32 s8, s6
 ; GCN-NEXT:    s_mov_b32 s9, s7
 ; GCN-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
+; GCN-NEXT:    s_mov_b64 s[12:13], s[6:7]
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    s_mov_b32 s14, 0
 ; GCN-NEXT:    s_mov_b32 s15, s3
-; GCN-NEXT:    s_mov_b64 s[12:13], s[6:7]
 ; GCN-NEXT:    buffer_load_dword v2, off, s[8:11], 0
 ; GCN-NEXT:    buffer_load_dword v0, v[0:1], s[12:15], 0 addr64 offset:4
+; GCN-NEXT:    s_mov_b32 s6, 0xffff
 ; GCN-NEXT:    s_mov_b32 s0, s4
-; GCN-NEXT:    s_mov_b32 s4, 0xffff
 ; GCN-NEXT:    s_mov_b32 s1, s5
 ; GCN-NEXT:    s_waitcnt vmcnt(1)
 ; GCN-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    v_lshrrev_b32_e32 v3, 16, v0
-; GCN-NEXT:    v_and_b32_e32 v0, s4, v0
+; GCN-NEXT:    v_and_b32_e32 v0, s6, v0
 ; GCN-NEXT:    v_lshl_b32_e32 v0, v2, v0
 ; GCN-NEXT:    v_lshl_b32_e32 v1, v1, v3
-; GCN-NEXT:    v_and_b32_e32 v0, s4, v0
+; GCN-NEXT:    v_and_b32_e32 v0, s6, v0
 ; GCN-NEXT:    v_lshlrev_b32_e32 v1, 16, v1
 ; GCN-NEXT:    v_or_b32_e32 v0, v0, v1
 ; GCN-NEXT:    buffer_store_dword v0, off, s[0:3], 0
@@ -490,14 +490,14 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
 ; GCN-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; GCN-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
 ; GCN-NEXT:    buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8
-; GCN-NEXT:    s_mov_b32 s8, 0xffff
+; GCN-NEXT:    s_mov_b32 s0, 0xffff
 ; GCN-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GCN-NEXT:    s_waitcnt vmcnt(1)
 ; GCN-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_and_b32_e32 v8, s8, v4
+; GCN-NEXT:    v_and_b32_e32 v8, s0, v4
 ; GCN-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; GCN-NEXT:    v_and_b32_e32 v9, s8, v5
+; GCN-NEXT:    v_and_b32_e32 v9, s0, v5
 ; GCN-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
 ; GCN-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
 ; GCN-NEXT:    v_lshl_b32_e32 v5, v7, v5
@@ -505,9 +505,9 @@ define amdgpu_kernel void @shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> add
 ; GCN-NEXT:    v_lshl_b32_e32 v4, v6, v4
 ; GCN-NEXT:    v_lshl_b32_e32 v2, v2, v8
 ; GCN-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; GCN-NEXT:    v_and_b32_e32 v3, s8, v3
+; GCN-NEXT:    v_and_b32_e32 v3, s0, v3
 ; GCN-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; GCN-NEXT:    v_and_b32_e32 v2, s8, v2
+; GCN-NEXT:    v_and_b32_e32 v2, s0, v2
 ; GCN-NEXT:    v_or_b32_e32 v3, v3, v5
 ; GCN-NEXT:    v_or_b32_e32 v2, v2, v4
 ; GCN-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
@@ -732,17 +732,17 @@ define amdgpu_kernel void @shl_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> add
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_mov_b32 s10, s2
+; GCN-NEXT:    s_mov_b32 s11, s3
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
+; GCN-NEXT:    s_mov_b32 s8, s6
+; GCN-NEXT:    s_mov_b32 s9, s7
+; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[8:11], 0
+; GCN-NEXT:    buffer_load_dwordx4 v[4:7], off, s[8:11], 0 offset:16
+; GCN-NEXT:    buffer_load_dwordx4 v[8:11], off, s[8:11], 0 offset:32
+; GCN-NEXT:    buffer_load_dwordx4 v[11:14], off, s[8:11], 0 offset:48
 ; GCN-NEXT:    s_mov_b32 s0, s4
 ; GCN-NEXT:    s_mov_b32 s1, s5
-; GCN-NEXT:    s_mov_b32 s4, s6
-; GCN-NEXT:    s_mov_b32 s5, s7
-; GCN-NEXT:    s_mov_b32 s6, s2
-; GCN-NEXT:    s_mov_b32 s7, s3
-; GCN-NEXT:    buffer_load_dwordx4 v[0:3], off, s[4:7], 0
-; GCN-NEXT:    buffer_load_dwordx4 v[4:7], off, s[4:7], 0 offset:16
-; GCN-NEXT:    buffer_load_dwordx4 v[8:11], off, s[4:7], 0 offset:32
-; GCN-NEXT:    buffer_load_dwordx4 v[11:14], off, s[4:7], 0 offset:48
 ; GCN-NEXT:    s_waitcnt vmcnt(1)
 ; GCN-NEXT:    v_lshl_b64 v[2:3], v[2:3], v10
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
diff --git a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
index 97184c5a09236..43418c63057b8 100644
--- a/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/shl.v2i16.ll
@@ -86,23 +86,23 @@ define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> a
 ; VI-LABEL: v_shl_v2i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 4, v0
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v5, v[0:1]
+; VI-NEXT:    flat_load_dword v2, v[2:3]
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    v_add_u32_e32 v4, vcc, 4, v0
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    flat_load_dword v1, v[4:5]
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshlrev_b16_e32 v4, v1, v0
-; VI-NEXT:    v_lshlrev_b16_sdwa v0, v1, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_or_b32_e32 v0, v4, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_lshlrev_b16_e32 v3, v2, v5
+; VI-NEXT:    v_lshlrev_b16_sdwa v2, v2, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: v_shl_v2i16:
@@ -116,17 +116,17 @@ define amdgpu_kernel void @v_shl_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> a
 ; CI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dword v3, v[0:1], s[0:3], 0 addr64 offset:4
-; CI-NEXT:    s_mov_b32 s8, 0xffff
+; CI-NEXT:    s_mov_b32 s0, 0xffff
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(1)
 ; CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v2
 ; CI-NEXT:    s_waitcnt vmcnt(0)
-; CI-NEXT:    v_and_b32_e32 v5, s8, v3
+; CI-NEXT:    v_and_b32_e32 v5, s0, v3
 ; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v3
 ; CI-NEXT:    v_lshl_b32_e32 v3, v4, v3
 ; CI-NEXT:    v_lshl_b32_e32 v2, v2, v5
 ; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
-; CI-NEXT:    v_and_b32_e32 v2, s8, v2
+; CI-NEXT:    v_and_b32_e32 v2, s0, v2
 ; CI-NEXT:    v_or_b32_e32 v2, v2, v3
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
 ; CI-NEXT:    s_endpgm
@@ -170,39 +170,39 @@ define amdgpu_kernel void @shl_v_s_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    flat_load_dword v3, v[0:1]
 ; VI-NEXT:    s_lshr_b32 s1, s0, 16
-; VI-NEXT:    v_mov_b32_e32 v4, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s5
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s4, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s4, v2
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshlrev_b16_e32 v1, s0, v0
-; VI-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_lshlrev_b16_e32 v4, s0, v3
+; VI-NEXT:    v_lshlrev_b16_sdwa v2, v2, v3 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; VI-NEXT:    v_or_b32_e32 v2, v4, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: shl_v_s_v2i16:
 ; CI:       ; %bb.0:
 ; CI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; CI-NEXT:    s_load_dword s0, s[0:1], 0xd
-; CI-NEXT:    s_mov_b32 s8, 0xffff
+; CI-NEXT:    s_load_dword s8, s[0:1], 0xd
 ; CI-NEXT:    s_mov_b32 s3, 0xf000
 ; CI-NEXT:    s_mov_b32 s2, 0
 ; CI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
-; CI-NEXT:    s_lshr_b32 s9, s0, 16
-; CI-NEXT:    s_and_b32 s10, s0, s8
 ; CI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
+; CI-NEXT:    s_mov_b32 s0, 0xffff
+; CI-NEXT:    s_lshr_b32 s1, s8, 16
+; CI-NEXT:    s_and_b32 s8, s8, s0
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
-; CI-NEXT:    v_lshlrev_b32_e32 v2, s10, v2
-; CI-NEXT:    v_lshlrev_b32_e32 v3, s9, v3
-; CI-NEXT:    v_and_b32_e32 v2, s8, v2
+; CI-NEXT:    v_lshlrev_b32_e32 v2, s8, v2
+; CI-NEXT:    v_lshlrev_b32_e32 v3, s1, v3
+; CI-NEXT:    v_and_b32_e32 v2, s0, v2
 ; CI-NEXT:    v_lshlrev_b32_e32 v3, 16, v3
 ; CI-NEXT:    v_or_b32_e32 v2, v2, v3
 ; CI-NEXT:    buffer_store_dword v2, v[0:1], s[4:7], 0 addr64
@@ -245,17 +245,17 @@ define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
+; VI-NEXT:    flat_load_dword v3, v[0:1]
 ; VI-NEXT:    s_lshr_b32 s1, s0, 16
-; VI-NEXT:    v_mov_b32_e32 v4, s1
-; VI-NEXT:    v_mov_b32_e32 v3, s5
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s4, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s4, v2
+; VI-NEXT:    v_mov_b32_e32 v2, s1
+; VI-NEXT:    v_mov_b32_e32 v1, s5
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshlrev_b16_e64 v1, v0, s0
-; VI-NEXT:    v_lshlrev_b16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_lshlrev_b16_e64 v4, v3, s0
+; VI-NEXT:    v_lshlrev_b16_sdwa v2, v3, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v4, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: shl_s_v_v2i16:
@@ -270,12 +270,12 @@ define amdgpu_kernel void @shl_s_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16>
 ; CI-NEXT:    v_mov_b32_e32 v1, 0
 ; CI-NEXT:    buffer_load_dword v2, v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    s_mov_b32 s0, 0xffff
-; CI-NEXT:    s_lshr_b32 s9, s8, 16
+; CI-NEXT:    s_lshr_b32 s1, s8, 16
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_and_b32_e32 v3, s0, v2
 ; CI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
-; CI-NEXT:    v_lshl_b32_e32 v2, s9, v2
+; CI-NEXT:    v_lshl_b32_e32 v2, s1, v2
 ; CI-NEXT:    v_lshl_b32_e32 v3, s8, v3
 ; CI-NEXT:    v_lshlrev_b32_e32 v2, 16, v2
 ; CI-NEXT:    v_and_b32_e32 v3, s0, v3
@@ -319,15 +319,15 @@ define amdgpu_kernel void @shl_imm_v_v2i16(<2 x i16> addrspace(1)* %out, <2 x i1
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshlrev_b16_e64 v1, v0, 8
-; VI-NEXT:    v_lshlrev_b16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_lshlrev_b16_e64 v2, v3, 8
+; VI-NEXT:    v_lshlrev_b16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: shl_imm_v_v2i16:
@@ -387,16 +387,16 @@ define amdgpu_kernel void @shl_v_imm_v2i16(<2 x i16> addrspace(1)* %out, <2 x i1
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshlrev_b32_e32 v1, 8, v0
-; VI-NEXT:    v_and_b32_e32 v1, 0xff000000, v1
-; VI-NEXT:    v_lshlrev_b16_e32 v0, 8, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_lshlrev_b32_e32 v2, 8, v3
+; VI-NEXT:    v_and_b32_e32 v2, 0xff000000, v2
+; VI-NEXT:    v_lshlrev_b16_e32 v3, 8, v3
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: shl_v_imm_v2i16:
@@ -429,45 +429,45 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a
 ; GFX9-LABEL: v_shl_v4i16:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; GFX9-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v2
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v4
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    global_load_dwordx2 v[4:5], v[0:1], off
+; GFX9-NEXT:    global_load_dwordx2 v[2:3], v[0:1], off
 ; GFX9-NEXT:    global_load_dwordx2 v[0:1], v[0:1], off offset:8
-; GFX9-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v5, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, s0, v4
+; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v1, v5
-; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v0, v4
-; GFX9-NEXT:    global_store_dwordx2 v[2:3], v[0:1], off
+; GFX9-NEXT:    v_pk_lshlrev_b16 v1, v1, v3
+; GFX9-NEXT:    v_pk_lshlrev_b16 v0, v0, v2
+; GFX9-NEXT:    global_store_dwordx2 v[4:5], v[0:1], off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_shl_v4i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
-; VI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
+; VI-NEXT:    v_lshlrev_b32_e32 v4, 3, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v4
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
-; VI-NEXT:    v_add_u32_e32 v4, vcc, 8, v0
-; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v1, vcc
+; VI-NEXT:    v_add_u32_e32 v2, vcc, 8, v0
+; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; VI-NEXT:    flat_load_dwordx2 v[4:5], v[4:5]
+; VI-NEXT:    flat_load_dwordx2 v[2:3], v[2:3]
+; VI-NEXT:    v_mov_b32_e32 v5, s1
+; VI-NEXT:    v_add_u32_e32 v4, vcc, s0, v4
+; VI-NEXT:    v_addc_u32_e32 v5, vcc, 0, v5, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_lshlrev_b16_e32 v6, v5, v1
-; VI-NEXT:    v_lshlrev_b16_sdwa v1, v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    v_lshlrev_b16_e32 v5, v4, v0
-; VI-NEXT:    v_lshlrev_b16_sdwa v0, v4, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_lshlrev_b16_e32 v6, v3, v1
+; VI-NEXT:    v_lshlrev_b16_sdwa v1, v3, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
+; VI-NEXT:    v_lshlrev_b16_e32 v3, v2, v0
+; VI-NEXT:    v_lshlrev_b16_sdwa v0, v2, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NEXT:    v_or_b32_e32 v1, v6, v1
-; VI-NEXT:    v_or_b32_e32 v0, v5, v0
-; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
+; VI-NEXT:    v_or_b32_e32 v0, v3, v0
+; VI-NEXT:    flat_store_dwordx2 v[4:5], v[0:1]
 ; VI-NEXT:    s_endpgm
 ;
 ; CI-LABEL: v_shl_v4i16:
@@ -481,14 +481,14 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a
 ; CI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; CI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
 ; CI-NEXT:    buffer_load_dwordx2 v[4:5], v[0:1], s[0:3], 0 addr64 offset:8
-; CI-NEXT:    s_mov_b32 s8, 0xffff
+; CI-NEXT:    s_mov_b32 s0, 0xffff
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(1)
 ; CI-NEXT:    v_lshrrev_b32_e32 v6, 16, v2
 ; CI-NEXT:    s_waitcnt vmcnt(0)
-; CI-NEXT:    v_and_b32_e32 v8, s8, v4
+; CI-NEXT:    v_and_b32_e32 v8, s0, v4
 ; CI-NEXT:    v_lshrrev_b32_e32 v4, 16, v4
-; CI-NEXT:    v_and_b32_e32 v9, s8, v5
+; CI-NEXT:    v_and_b32_e32 v9, s0, v5
 ; CI-NEXT:    v_lshrrev_b32_e32 v7, 16, v3
 ; CI-NEXT:    v_lshrrev_b32_e32 v5, 16, v5
 ; CI-NEXT:    v_lshl_b32_e32 v5, v7, v5
@@ -496,9 +496,9 @@ define amdgpu_kernel void @v_shl_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> a
 ; CI-NEXT:    v_lshl_b32_e32 v4, v6, v4
 ; CI-NEXT:    v_lshl_b32_e32 v2, v2, v8
 ; CI-NEXT:    v_lshlrev_b32_e32 v5, 16, v5
-; CI-NEXT:    v_and_b32_e32 v3, s8, v3
+; CI-NEXT:    v_and_b32_e32 v3, s0, v3
 ; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
-; CI-NEXT:    v_and_b32_e32 v2, s8, v2
+; CI-NEXT:    v_and_b32_e32 v2, s0, v2
 ; CI-NEXT:    v_or_b32_e32 v3, v3, v5
 ; CI-NEXT:    v_or_b32_e32 v2, v2, v4
 ; CI-NEXT:    buffer_store_dwordx2 v[2:3], v[0:1], s[4:7], 0 addr64
@@ -539,21 +539,21 @@ define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i1
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 3, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dwordx2 v[0:1], v[0:1]
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    s_mov_b32 s0, 0xff000000
+; VI-NEXT:    s_mov_b32 s2, 0xff000000
 ; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_lshlrev_b32_e32 v4, 8, v1
 ; VI-NEXT:    v_lshlrev_b16_e32 v5, 8, v0
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 8, v0
-; VI-NEXT:    v_and_b32_e32 v0, s0, v0
+; VI-NEXT:    v_and_b32_e32 v0, s2, v0
 ; VI-NEXT:    v_lshlrev_b16_e32 v1, 8, v1
-; VI-NEXT:    v_and_b32_e32 v4, s0, v4
+; VI-NEXT:    v_and_b32_e32 v4, s2, v4
 ; VI-NEXT:    v_or_b32_e32 v1, v1, v4
 ; VI-NEXT:    v_or_b32_e32 v0, v5, v0
 ; VI-NEXT:    flat_store_dwordx2 v[2:3], v[0:1]
@@ -569,14 +569,14 @@ define amdgpu_kernel void @shl_v_imm_v4i16(<4 x i16> addrspace(1)* %out, <4 x i1
 ; CI-NEXT:    s_waitcnt lgkmcnt(0)
 ; CI-NEXT:    s_mov_b64 s[0:1], s[6:7]
 ; CI-NEXT:    buffer_load_dwordx2 v[2:3], v[0:1], s[0:3], 0 addr64
-; CI-NEXT:    s_mov_b32 s8, 0xff00
+; CI-NEXT:    s_mov_b32 s0, 0xff00
 ; CI-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; CI-NEXT:    s_waitcnt vmcnt(0)
 ; CI-NEXT:    v_lshrrev_b32_e32 v4, 8, v3
 ; CI-NEXT:    v_lshlrev_b32_e32 v3, 8, v3
-; CI-NEXT:    v_and_b32_e32 v4, s8, v4
+; CI-NEXT:    v_and_b32_e32 v4, s0, v4
 ; CI-NEXT:    v_lshlrev_b32_e32 v2, 8, v2
-; CI-NEXT:    v_and_b32_e32 v3, s8, v3
+; CI-NEXT:    v_and_b32_e32 v3, s0, v3
 ; CI-NEXT:    v_lshlrev_b32_e32 v4, 16, v4
 ; CI-NEXT:    v_or_b32_e32 v3, v3, v4
 ; CI-NEXT:    v_and_b32_e32 v2, 0xff00ff00, v2
diff --git a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
index ff4a8296d8dd0..95191583249ce 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
+++ b/llvm/test/CodeGen/AMDGPU/shrink-add-sub-constant.ll
@@ -112,17 +112,17 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out,
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
 ; VI-NEXT:    flat_load_dword v4, v[0:1]
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
-; VI-NEXT:    v_subrev_u32_e32 v1, vcc, 64, v4
+; VI-NEXT:    v_subrev_u32_e32 v2, vcc, 64, v3
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_subrev_u32_e32 v0, vcc, 64, v0
-; VI-NEXT:    flat_store_dword v[2:3], v1
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_subrev_u32_e32 v3, vcc, 64, v4
+; VI-NEXT:    flat_store_dword v[0:1], v2
+; VI-NEXT:    flat_store_dword v[0:1], v3
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_i32_x_sub_64_multi_use:
@@ -133,17 +133,17 @@ define amdgpu_kernel void @v_test_i32_x_sub_64_multi_use(i32 addrspace(1)* %out,
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    global_load_dword v3, v[0:1], off
 ; GFX9-NEXT:    global_load_dword v4, v[0:1], off
-; GFX9-NEXT:    global_load_dword v0, v[0:1], off
-; GFX9-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v2
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_subrev_u32_e32 v1, 64, v4
+; GFX9-NEXT:    v_subrev_u32_e32 v2, 64, v3
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_subrev_u32_e32 v0, 64, v0
-; GFX9-NEXT:    global_store_dword v[2:3], v1, off
-; GFX9-NEXT:    global_store_dword v[2:3], v0, off
+; GFX9-NEXT:    v_subrev_u32_e32 v3, 64, v4
+; GFX9-NEXT:    global_store_dword v[0:1], v2, off
+; GFX9-NEXT:    global_store_dword v[0:1], v3, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: v_test_i32_x_sub_64_multi_use:
@@ -945,17 +945,17 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out,
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_ushort v3, v[0:1]
 ; VI-NEXT:    flat_load_ushort v4, v[0:1]
-; VI-NEXT:    flat_load_ushort v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(1) lgkmcnt(1)
-; VI-NEXT:    v_subrev_u16_e32 v1, 64, v4
+; VI-NEXT:    v_subrev_u16_e32 v2, 64, v3
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_subrev_u16_e32 v0, 64, v0
-; VI-NEXT:    flat_store_short v[2:3], v1
-; VI-NEXT:    flat_store_short v[2:3], v0
+; VI-NEXT:    v_subrev_u16_e32 v3, 64, v4
+; VI-NEXT:    flat_store_short v[0:1], v2
+; VI-NEXT:    flat_store_short v[0:1], v3
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_i16_x_sub_64_multi_use:
@@ -966,17 +966,17 @@ define amdgpu_kernel void @v_test_i16_x_sub_64_multi_use(i16 addrspace(1)* %out,
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s2, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
+; GFX9-NEXT:    global_load_ushort v3, v[0:1], off
 ; GFX9-NEXT:    global_load_ushort v4, v[0:1], off
-; GFX9-NEXT:    global_load_ushort v0, v[0:1], off
-; GFX9-NEXT:    v_mov_b32_e32 v3, s1
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s0, v2
+; GFX9-NEXT:    v_mov_b32_e32 v1, s1
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_subrev_u16_e32 v1, 64, v4
+; GFX9-NEXT:    v_subrev_u16_e32 v2, 64, v3
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_subrev_u16_e32 v0, 64, v0
-; GFX9-NEXT:    global_store_short v[2:3], v1, off
-; GFX9-NEXT:    global_store_short v[2:3], v0, off
+; GFX9-NEXT:    v_subrev_u16_e32 v3, 64, v4
+; GFX9-NEXT:    global_store_short v[0:1], v2, off
+; GFX9-NEXT:    global_store_short v[0:1], v3, off
 ; GFX9-NEXT:    s_endpgm
 ;
 ; GFX10-LABEL: v_test_i16_x_sub_64_multi_use:
@@ -1037,20 +1037,20 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_64(<2 x i16> addrspace(1)* %out
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v4, 64
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v1, 64
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_sub_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_subrev_u16_e32 v0, 64, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_sub_u16_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_subrev_u16_e32 v3, 64, v3
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_sub_64_64:
@@ -1125,15 +1125,15 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_64(<2 x i16> addrspace(1)* %out,
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v1, -7, v0
-; VI-NEXT:    v_sub_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_add_u16_e32 v2, -7, v3
+; VI-NEXT:    v_sub_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_sub_7_64:
@@ -1204,20 +1204,20 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_64_123(<2 x i16> addrspace(1)* %ou
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v4, 0xffffff85
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v1, 0xffffff85
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_subrev_u16_e32 v0, 64, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_add_u16_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_subrev_u16_e32 v3, 64, v3
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_sub_64_123:
@@ -1292,15 +1292,15 @@ define amdgpu_kernel void @v_test_v2i16_x_sub_7_0(<2 x i16> addrspace(1)* %out,
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v0
-; VI-NEXT:    v_add_u16_e32 v0, -7, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_and_b32_e32 v2, 0xffff0000, v3
+; VI-NEXT:    v_add_u16_e32 v3, -7, v3
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_sub_7_0:
@@ -1608,20 +1608,20 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_neg32(<2 x i16> addrspace(1)
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
+; VI-NEXT:    v_mov_b32_e32 v4, 32
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v1, 32
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_sub_u16_sdwa v1, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_subrev_u16_e32 v0, 32, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_sub_u16_sdwa v2, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_subrev_u16_e32 v3, 32, v3
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_add_neg32_neg32:
@@ -1772,15 +1772,15 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg32_0(<2 x i16> addrspace(1)* %o
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v0
-; VI-NEXT:    v_subrev_u16_e32 v0, 32, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_and_b32_e32 v2, 0xffff0000, v3
+; VI-NEXT:    v_subrev_u16_e32 v3, 32, v3
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_add_neg32_0:
@@ -1856,15 +1856,15 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg16_neg16(<2 x i16> addrspace(1)
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v1, -16, v0
-; VI-NEXT:    v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_add_u16_e32 v2, -16, v3
+; VI-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_add_neg16_neg16:
@@ -2015,15 +2015,15 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg16_0(<2 x i16> addrspace(1)* %o
 ; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v0
-; VI-NEXT:    v_add_u16_e32 v0, -16, v0
-; VI-NEXT:    v_or_b32_e32 v0, v0, v1
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_and_b32_e32 v2, 0xffff0000, v3
+; VI-NEXT:    v_add_u16_e32 v3, -16, v3
+; VI-NEXT:    v_or_b32_e32 v2, v3, v2
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_add_neg16_0:
@@ -2094,20 +2094,20 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fpone(<2 x i16> addrspace(1)*
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    s_movk_i32 s2, 0xc400
+; VI-NEXT:    v_mov_b32_e32 v4, s2
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    s_movk_i32 s0, 0xc400
-; VI-NEXT:    v_mov_b32_e32 v4, s0
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v1, s0, v0
-; VI-NEXT:    v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_add_u16_e32 v2, s2, v3
+; VI-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_add_neg_fpone:
@@ -2179,20 +2179,20 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfpone(<2 x i16> addrspace(1
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    s_movk_i32 s2, 0x4400
+; VI-NEXT:    v_mov_b32_e32 v4, s2
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    s_movk_i32 s0, 0x4400
-; VI-NEXT:    v_mov_b32_e32 v4, s0
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v1, s0, v0
-; VI-NEXT:    v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_add_u16_e32 v2, s2, v3
+; VI-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_add_neg_negfpone:
@@ -2264,20 +2264,20 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_fptwo(<2 x i16> addrspace(1)*
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    s_movk_i32 s2, 0x4000
+; VI-NEXT:    v_mov_b32_e32 v4, s2
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    s_movk_i32 s0, 0x4000
-; VI-NEXT:    v_mov_b32_e32 v4, s0
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v1, s0, v0
-; VI-NEXT:    v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_add_u16_e32 v2, s2, v3
+; VI-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_add_neg_fptwo:
@@ -2349,20 +2349,20 @@ define amdgpu_kernel void @v_test_v2i16_x_add_neg_negfptwo(<2 x i16> addrspace(1
 ; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s3
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
+; VI-NEXT:    flat_load_dword v3, v[0:1]
+; VI-NEXT:    s_movk_i32 s2, 0xc000
+; VI-NEXT:    v_mov_b32_e32 v4, s2
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s0, v2
+; VI-NEXT:    v_mov_b32_e32 v1, s1
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
-; VI-NEXT:    s_movk_i32 s0, 0xc000
-; VI-NEXT:    v_mov_b32_e32 v4, s0
-; VI-NEXT:    v_mov_b32_e32 v3, s1
-; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v1, s0, v0
-; VI-NEXT:    v_add_u16_sdwa v0, v0, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
-; VI-NEXT:    flat_store_dword v[2:3], v0
+; VI-NEXT:    v_add_u16_e32 v2, s2, v3
+; VI-NEXT:    v_add_u16_sdwa v3, v3, v4 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v2, v2, v3
+; VI-NEXT:    flat_store_dword v[0:1], v2
 ; VI-NEXT:    s_endpgm
 ;
 ; GFX9-LABEL: v_test_v2i16_x_add_neg_negfptwo:
diff --git a/llvm/test/CodeGen/AMDGPU/sign_extend.ll b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
index 1a585f8b39be8..d263be2dcf820 100644
--- a/llvm/test/CodeGen/AMDGPU/sign_extend.ll
+++ b/llvm/test/CodeGen/AMDGPU/sign_extend.ll
@@ -399,14 +399,14 @@ define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addr
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s10, s2
+; SI-NEXT:    s_mov_b32 s11, s3
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s8, s6
+; SI-NEXT:    s_mov_b32 s9, s7
+; SI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
 ; SI-NEXT:    s_mov_b32 s0, s4
 ; SI-NEXT:    s_mov_b32 s1, s5
-; SI-NEXT:    s_mov_b32 s4, s6
-; SI-NEXT:    s_mov_b32 s5, s7
-; SI-NEXT:    s_mov_b32 s6, s2
-; SI-NEXT:    s_mov_b32 s7, s3
-; SI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_ashrrev_i32_e32 v1, 24, v0
 ; SI-NEXT:    v_bfe_i32 v2, v0, 16, 8
@@ -423,14 +423,14 @@ define amdgpu_kernel void @v_sext_v4i8_to_v4i32(i32 addrspace(1)* %out, i32 addr
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s10, s2
+; VI-NEXT:    s_mov_b32 s11, s3
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_mov_b32 s8, s6
+; VI-NEXT:    s_mov_b32 s9, s7
+; VI-NEXT:    buffer_load_dword v0, off, s[8:11], 0
 ; VI-NEXT:    s_mov_b32 s0, s4
 ; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
-; VI-NEXT:    buffer_load_dword v0, off, s[4:7], 0
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_lshrrev_b16_e32 v1, 8, v0
 ; VI-NEXT:    v_ashrrev_i32_e32 v2, 24, v0
@@ -523,14 +523,14 @@ define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 add
 ; SI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
 ; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    s_mov_b32 s10, s2
+; SI-NEXT:    s_mov_b32 s11, s3
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
+; SI-NEXT:    s_mov_b32 s8, s6
+; SI-NEXT:    s_mov_b32 s9, s7
+; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
 ; SI-NEXT:    s_mov_b32 s0, s4
 ; SI-NEXT:    s_mov_b32 s1, s5
-; SI-NEXT:    s_mov_b32 s4, s6
-; SI-NEXT:    s_mov_b32 s5, s7
-; SI-NEXT:    s_mov_b32 s6, s2
-; SI-NEXT:    s_mov_b32 s7, s3
-; SI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
 ; SI-NEXT:    s_waitcnt vmcnt(0)
 ; SI-NEXT:    v_ashr_i64 v[2:3], v[0:1], 48
 ; SI-NEXT:    v_ashrrev_i32_e32 v3, 16, v0
@@ -547,14 +547,14 @@ define amdgpu_kernel void @v_sext_v4i16_to_v4i32(i32 addrspace(1)* %out, i64 add
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; VI-NEXT:    s_mov_b32 s3, 0xf000
 ; VI-NEXT:    s_mov_b32 s2, -1
+; VI-NEXT:    s_mov_b32 s10, s2
+; VI-NEXT:    s_mov_b32 s11, s3
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_mov_b32 s8, s6
+; VI-NEXT:    s_mov_b32 s9, s7
+; VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[8:11], 0
 ; VI-NEXT:    s_mov_b32 s0, s4
 ; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
-; VI-NEXT:    buffer_load_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_waitcnt vmcnt(0)
 ; VI-NEXT:    v_ashrrev_i32_e32 v3, 16, v0
 ; VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
diff --git a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
index 06e4d0f4935eb..43536131582d2 100644
--- a/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
+++ b/llvm/test/CodeGen/AMDGPU/sub.v2i16.ll
@@ -30,26 +30,24 @@ define amdgpu_kernel void @v_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
 ; VI-LABEL: v_test_sub_v2i16:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v3, s9
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s8, v2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
 ; VI-NEXT:    flat_load_dword v1, v[2:3]
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_sub_u16_e32 v2, v0, v1
 ; VI-NEXT:    v_sub_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NEXT:    v_or_b32_e32 v0, v2, v0
-; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; VI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %out, i32 %tid
@@ -88,14 +86,14 @@ define amdgpu_kernel void @s_test_sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i
 ; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
 ; VI-NEXT:    s_load_dword s4, s[6:7], 0x0
-; VI-NEXT:    s_load_dword s5, s[8:9], 0x0
+; VI-NEXT:    s_load_dword s6, s[8:9], 0x0
+; VI-NEXT:    s_mov_b32 s1, s5
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_lshr_b32 s6, s4, 16
-; VI-NEXT:    s_lshr_b32 s7, s5, 16
-; VI-NEXT:    s_sub_i32 s4, s4, s5
-; VI-NEXT:    s_sub_i32 s5, s6, s7
+; VI-NEXT:    s_lshr_b32 s5, s4, 16
+; VI-NEXT:    s_lshr_b32 s7, s6, 16
+; VI-NEXT:    s_sub_i32 s4, s4, s6
+; VI-NEXT:    s_sub_i32 s5, s5, s7
 ; VI-NEXT:    s_and_b32 s4, s4, 0xffff
 ; VI-NEXT:    s_lshl_b32 s5, s5, 16
 ; VI-NEXT:    s_or_b32 s4, s4, s5
@@ -185,22 +183,20 @@ define amdgpu_kernel void @v_test_sub_v2i16_constant(<2 x i16> addrspace(1)* %ou
 ;
 ; VI-LABEL: v_test_sub_v2i16_constant:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    v_mov_b32_e32 v2, 0xfffffe38
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s7
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    v_mov_b32_e32 v1, 0xfffffe38
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v1, 0xffffff85, v0
-; VI-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
+; VI-NEXT:    v_add_u16_e32 v2, 0xffffff85, v0
+; VI-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v2, v0
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -235,22 +231,20 @@ define amdgpu_kernel void @v_test_sub_v2i16_neg_constant(<2 x i16> addrspace(1)*
 ;
 ; VI-LABEL: v_test_sub_v2i16_neg_constant:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    v_mov_b32_e32 v2, 0x3df
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s7
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    v_mov_b32_e32 v1, 0x3df
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v1, 0x34d, v0
-; VI-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
+; VI-NEXT:    v_add_u16_e32 v2, 0x34d, v0
+; VI-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v2, v0
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -283,22 +277,20 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_neg1(<2 x i16> addrspace(1)*
 ;
 ; VI-LABEL: v_test_sub_v2i16_inline_neg1:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    v_mov_b32_e32 v2, 1
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s7
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    v_mov_b32_e32 v1, 1
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
-; VI-NEXT:    v_add_u16_e32 v1, 1, v0
-; VI-NEXT:    v_add_u16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
-; VI-NEXT:    v_or_b32_e32 v0, v1, v0
+; VI-NEXT:    v_add_u16_e32 v2, 1, v0
+; VI-NEXT:    v_add_u16_sdwa v0, v0, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
+; VI-NEXT:    v_or_b32_e32 v0, v2, v0
 ; VI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
@@ -331,17 +323,15 @@ define amdgpu_kernel void @v_test_sub_v2i16_inline_lo_zero_hi(<2 x i16> addrspac
 ;
 ; VI-LABEL: v_test_sub_v2i16_inline_lo_zero_hi:
 ; VI:       ; %bb.0:
-; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
+; VI-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x24
 ; VI-NEXT:    v_lshlrev_b32_e32 v0, 2, v0
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    v_mov_b32_e32 v1, s7
-; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v0
+; VI-NEXT:    v_mov_b32_e32 v1, s3
+; VI-NEXT:    v_add_u32_e32 v0, vcc, s2, v0
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_mov_b32 s3, 0xf000
+; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_and_b32_e32 v1, 0xffff0000, v0
 ; VI-NEXT:    v_subrev_u16_e32 v0, 32, v0
@@ -411,50 +401,46 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i32(<2 x i32> addrspace(1)
 ; GFX9-LABEL: v_test_sub_v2i16_zext_to_v2i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s8, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX9-NEXT:    global_load_dword v0, v[0:1], off
 ; GFX9-NEXT:    global_load_dword v1, v[2:3], off
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v1
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v0
 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v0
-; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_test_sub_v2i16_zext_to_v2i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v3, s9
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s8, v2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    flat_load_dword v1, v[0:1]
 ; VI-NEXT:    flat_load_dword v2, v[2:3]
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_sub_u16_e32 v0, v1, v2
 ; VI-NEXT:    v_sub_u16_sdwa v1, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid
@@ -473,54 +459,50 @@ define amdgpu_kernel void @v_test_sub_v2i16_zext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX9-LABEL: v_test_sub_v2i16_zext_to_v2i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, s8, v2
-; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v3, vcc
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX9-NEXT:    global_load_dword v0, v[0:1], off
-; GFX9-NEXT:    global_load_dword v1, v[4:5], off
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    global_load_dword v1, v[2:3], off
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    v_mov_b32_e32 v3, 0
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_pk_sub_i16 v1, v0, v1
 ; GFX9-NEXT:    v_and_b32_e32 v0, 0xffff, v1
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
 ; GFX9-NEXT:    v_mov_b32_e32 v1, 0
-; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_test_sub_v2i16_zext_to_v2i64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v3, s9
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s8, v2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    flat_load_dword v4, v[0:1]
 ; VI-NEXT:    flat_load_dword v2, v[2:3]
 ; VI-NEXT:    v_mov_b32_e32 v1, 0
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    v_mov_b32_e32 v3, v1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_sub_u16_e32 v0, v4, v2
 ; VI-NEXT:    v_sub_u16_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
-; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid
@@ -539,52 +521,48 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i32(<2 x i32> addrspace(1)
 ; GFX9-LABEL: v_test_sub_v2i16_sext_to_v2i32:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s8, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX9-NEXT:    global_load_dword v0, v[0:1], off
 ; GFX9-NEXT:    global_load_dword v1, v[2:3], off
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_pk_sub_i16 v0, v0, v1
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 16, v0
 ; GFX9-NEXT:    v_bfe_i32 v0, v0, 0, 16
-; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GFX9-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_test_sub_v2i16_sext_to_v2i32:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v3, s9
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s8, v2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
 ; VI-NEXT:    flat_load_dword v1, v[2:3]
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_sub_u16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NEXT:    v_sub_u16_e32 v0, v0, v1
 ; VI-NEXT:    v_bfe_i32 v0, v0, 0, 16
 ; VI-NEXT:    v_bfe_i32 v1, v2, 0, 16
-; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; VI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %out, i32 %tid
@@ -603,21 +581,19 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX9-LABEL: v_test_sub_v2i16_sext_to_v2i64:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; GFX9-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; GFX9-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; GFX9-NEXT:    s_mov_b32 s3, 0xf000
-; GFX9-NEXT:    s_mov_b32 s2, -1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s7
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, s6, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
-; GFX9-NEXT:    v_mov_b32_e32 v3, s9
-; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s8, v2
+; GFX9-NEXT:    v_mov_b32_e32 v3, s1
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, s0, v2
 ; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
 ; GFX9-NEXT:    global_load_dword v0, v[0:1], off
 ; GFX9-NEXT:    global_load_dword v1, v[2:3], off
-; GFX9-NEXT:    s_mov_b32 s0, s4
-; GFX9-NEXT:    s_mov_b32 s1, s5
+; GFX9-NEXT:    s_mov_b32 s7, 0xf000
+; GFX9-NEXT:    s_mov_b32 s6, -1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    v_pk_sub_i16 v1, v0, v1
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v1
@@ -625,27 +601,25 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(<2 x i64> addrspace(1)
 ; GFX9-NEXT:    v_bfe_i32 v2, v2, 0, 16
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; GFX9-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; GFX9-NEXT:    s_endpgm
 ;
 ; VI-LABEL: v_test_sub_v2i16_sext_to_v2i64:
 ; VI:       ; %bb.0:
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
-; VI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x34
+; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    v_lshlrev_b32_e32 v2, 2, v0
-; VI-NEXT:    s_mov_b32 s3, 0xf000
-; VI-NEXT:    s_mov_b32 s2, -1
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
 ; VI-NEXT:    v_mov_b32_e32 v1, s7
 ; VI-NEXT:    v_add_u32_e32 v0, vcc, s6, v2
 ; VI-NEXT:    v_addc_u32_e32 v1, vcc, 0, v1, vcc
-; VI-NEXT:    v_mov_b32_e32 v3, s9
-; VI-NEXT:    v_add_u32_e32 v2, vcc, s8, v2
+; VI-NEXT:    v_mov_b32_e32 v3, s1
+; VI-NEXT:    v_add_u32_e32 v2, vcc, s0, v2
 ; VI-NEXT:    v_addc_u32_e32 v3, vcc, 0, v3, vcc
 ; VI-NEXT:    flat_load_dword v0, v[0:1]
 ; VI-NEXT:    flat_load_dword v1, v[2:3]
-; VI-NEXT:    s_mov_b32 s0, s4
-; VI-NEXT:    s_mov_b32 s1, s5
+; VI-NEXT:    s_mov_b32 s7, 0xf000
+; VI-NEXT:    s_mov_b32 s6, -1
 ; VI-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; VI-NEXT:    v_sub_u16_sdwa v2, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:WORD_1
 ; VI-NEXT:    v_sub_u16_e32 v0, v0, v1
@@ -653,7 +627,7 @@ define amdgpu_kernel void @v_test_sub_v2i16_sext_to_v2i64(<2 x i64> addrspace(1)
 ; VI-NEXT:    v_bfe_i32 v2, v2, 0, 16
 ; VI-NEXT:    v_ashrrev_i32_e32 v1, 31, v0
 ; VI-NEXT:    v_ashrrev_i32_e32 v3, 31, v2
-; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[0:3], 0
+; VI-NEXT:    buffer_store_dwordx4 v[0:3], off, s[4:7], 0
 ; VI-NEXT:    s_endpgm
   %tid = call i32 @llvm.amdgcn.workitem.id.x()
   %gep.out = getelementptr inbounds <2 x i64>, <2 x i64> addrspace(1)* %out, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/trunc-combine.ll b/llvm/test/CodeGen/AMDGPU/trunc-combine.ll
index 2000264941397..ff3e837235f05 100644
--- a/llvm/test/CodeGen/AMDGPU/trunc-combine.ll
+++ b/llvm/test/CodeGen/AMDGPU/trunc-combine.ll
@@ -106,13 +106,13 @@ define amdgpu_kernel void @truncate_high_elt_extract_vector(<2 x i16> addrspace(
 ; VI-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; VI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x34
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
+; VI-NEXT:    s_load_dword s2, s[4:5], 0x0
+; VI-NEXT:    s_load_dword s3, s[6:7], 0x0
 ; VI-NEXT:    v_mov_b32_e32 v0, s0
 ; VI-NEXT:    v_mov_b32_e32 v1, s1
-; VI-NEXT:    s_load_dword s0, s[4:5], 0x0
-; VI-NEXT:    s_load_dword s1, s[6:7], 0x0
 ; VI-NEXT:    s_waitcnt lgkmcnt(0)
-; VI-NEXT:    s_sext_i32_i16 s0, s0
-; VI-NEXT:    s_sext_i32_i16 s1, s1
+; VI-NEXT:    s_sext_i32_i16 s0, s2
+; VI-NEXT:    s_sext_i32_i16 s1, s3
 ; VI-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-NEXT:    v_mul_i32_i24_e32 v2, s1, v2
 ; VI-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index 7ebe78c16cb7d..f18d35f1fef01 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -1824,46 +1824,46 @@ define amdgpu_kernel void @s_test_udiv24_k_num_i64(i64 addrspace(1)* %out, i64 %
 define amdgpu_kernel void @s_test_udiv24_k_den_i64(i64 addrspace(1)* %out, i64 %x) {
 ; GCN-LABEL: s_test_udiv24_k_den_i64:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_mov_b32 s6, 0x46b6fe00
-; GCN-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-NEXT:    s_mov_b32 s2, -1
-; GCN-NEXT:    s_lshr_b32 s0, s7, 8
-; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s0
-; GCN-NEXT:    s_mov_b32 s0, s4
-; GCN-NEXT:    s_mov_b32 s1, s5
+; GCN-NEXT:    s_lshr_b32 s2, s3, 8
+; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GCN-NEXT:    s_mov_b32 s2, 0x46b6fe00
+; GCN-NEXT:    s_mov_b32 s4, s0
+; GCN-NEXT:    s_mov_b32 s5, s1
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x38331158, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v1
-; GCN-NEXT:    v_mad_f32 v0, -v1, s6, v0
-; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, s6
+; GCN-NEXT:    v_mad_f32 v0, -v1, s2, v0
+; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, s2
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-NEXT:    v_addc_u32_e32 v0, vcc, 0, v2, vcc
 ; GCN-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GCN-NEXT:    s_endpgm
 ;
 ; GCN-IR-LABEL: s_test_udiv24_k_den_i64:
 ; GCN-IR:       ; %bb.0:
-; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
+; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-IR-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-IR-NEXT:    s_mov_b32 s6, -1
 ; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-IR-NEXT:    s_mov_b32 s6, 0x46b6fe00
-; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-IR-NEXT:    s_mov_b32 s2, -1
-; GCN-IR-NEXT:    s_lshr_b32 s0, s7, 8
-; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v0, s0
-; GCN-IR-NEXT:    s_mov_b32 s0, s4
-; GCN-IR-NEXT:    s_mov_b32 s1, s5
+; GCN-IR-NEXT:    s_lshr_b32 s2, s3, 8
+; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GCN-IR-NEXT:    s_mov_b32 s2, 0x46b6fe00
+; GCN-IR-NEXT:    s_mov_b32 s4, s0
+; GCN-IR-NEXT:    s_mov_b32 s5, s1
 ; GCN-IR-NEXT:    v_mul_f32_e32 v1, 0x38331158, v0
 ; GCN-IR-NEXT:    v_trunc_f32_e32 v1, v1
 ; GCN-IR-NEXT:    v_cvt_u32_f32_e32 v2, v1
-; GCN-IR-NEXT:    v_mad_f32 v0, -v1, s6, v0
-; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, s6
+; GCN-IR-NEXT:    v_mad_f32 v0, -v1, s2, v0
+; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, s2
 ; GCN-IR-NEXT:    v_mov_b32_e32 v1, 0
 ; GCN-IR-NEXT:    v_addc_u32_e32 v0, vcc, 0, v2, vcc
 ; GCN-IR-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GCN-IR-NEXT:    s_endpgm
   %x.shr = lshr i64 %x, 40
   %result = udiv i64 %x.shr, 23423
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index ef492be025068..cec4df0cd2958 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -1479,52 +1479,52 @@ define amdgpu_kernel void @s_test_urem24_k_num_i64(i64 addrspace(1)* %out, i64 %
 define amdgpu_kernel void @s_test_urem24_k_den_i64(i64 addrspace(1)* %out, i64 %x) {
 ; GCN-LABEL: s_test_urem24_k_den_i64:
 ; GCN:       ; %bb.0:
-; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GCN-NEXT:    s_mov_b32 s1, 0x46b6fe00
-; GCN-NEXT:    s_movk_i32 s0, 0x5b7f
-; GCN-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-NEXT:    s_mov_b32 s2, -1
+; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-NEXT:    s_mov_b32 s4, 0x46b6fe00
+; GCN-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-NEXT:    s_mov_b32 s6, -1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-NEXT:    s_lshr_b32 s6, s7, 8
-; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s6
+; GCN-NEXT:    s_lshr_b32 s2, s3, 8
+; GCN-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GCN-NEXT:    s_movk_i32 s3, 0x5b7f
+; GCN-NEXT:    s_mov_b32 s5, s1
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x38331158, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v1
-; GCN-NEXT:    v_mad_f32 v0, -v1, s1, v0
-; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, s1
-; GCN-NEXT:    s_mov_b32 s1, s5
+; GCN-NEXT:    v_mad_f32 v0, -v1, s4, v0
+; GCN-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, s4
+; GCN-NEXT:    s_mov_b32 s4, s0
 ; GCN-NEXT:    v_addc_u32_e32 v0, vcc, 0, v2, vcc
-; GCN-NEXT:    v_mul_lo_u32 v0, v0, s0
-; GCN-NEXT:    s_mov_b32 s0, s4
+; GCN-NEXT:    v_mul_lo_u32 v0, v0, s3
 ; GCN-NEXT:    v_mov_b32_e32 v1, 0
-; GCN-NEXT:    v_sub_i32_e32 v0, vcc, s6, v0
+; GCN-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
 ; GCN-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GCN-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GCN-NEXT:    s_endpgm
 ;
 ; GCN-IR-LABEL: s_test_urem24_k_den_i64:
 ; GCN-IR:       ; %bb.0:
-; GCN-IR-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
-; GCN-IR-NEXT:    s_mov_b32 s1, 0x46b6fe00
-; GCN-IR-NEXT:    s_movk_i32 s0, 0x5b7f
-; GCN-IR-NEXT:    s_mov_b32 s3, 0xf000
-; GCN-IR-NEXT:    s_mov_b32 s2, -1
+; GCN-IR-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
+; GCN-IR-NEXT:    s_mov_b32 s4, 0x46b6fe00
+; GCN-IR-NEXT:    s_mov_b32 s7, 0xf000
+; GCN-IR-NEXT:    s_mov_b32 s6, -1
 ; GCN-IR-NEXT:    s_waitcnt lgkmcnt(0)
-; GCN-IR-NEXT:    s_lshr_b32 s6, s7, 8
-; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v0, s6
+; GCN-IR-NEXT:    s_lshr_b32 s2, s3, 8
+; GCN-IR-NEXT:    v_cvt_f32_u32_e32 v0, s2
+; GCN-IR-NEXT:    s_movk_i32 s3, 0x5b7f
+; GCN-IR-NEXT:    s_mov_b32 s5, s1
 ; GCN-IR-NEXT:    v_mul_f32_e32 v1, 0x38331158, v0
 ; GCN-IR-NEXT:    v_trunc_f32_e32 v1, v1
 ; GCN-IR-NEXT:    v_cvt_u32_f32_e32 v2, v1
-; GCN-IR-NEXT:    v_mad_f32 v0, -v1, s1, v0
-; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, s1
-; GCN-IR-NEXT:    s_mov_b32 s1, s5
+; GCN-IR-NEXT:    v_mad_f32 v0, -v1, s4, v0
+; GCN-IR-NEXT:    v_cmp_ge_f32_e64 vcc, |v0|, s4
+; GCN-IR-NEXT:    s_mov_b32 s4, s0
 ; GCN-IR-NEXT:    v_addc_u32_e32 v0, vcc, 0, v2, vcc
-; GCN-IR-NEXT:    v_mul_lo_u32 v0, v0, s0
-; GCN-IR-NEXT:    s_mov_b32 s0, s4
+; GCN-IR-NEXT:    v_mul_lo_u32 v0, v0, s3
 ; GCN-IR-NEXT:    v_mov_b32_e32 v1, 0
-; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, s6, v0
+; GCN-IR-NEXT:    v_sub_i32_e32 v0, vcc, s2, v0
 ; GCN-IR-NEXT:    v_and_b32_e32 v0, 0xffffff, v0
-; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
+; GCN-IR-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
 ; GCN-IR-NEXT:    s_endpgm
   %x.shr = lshr i64 %x, 40
   %result = urem i64 %x.shr, 23423
diff --git a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
index 4730d92b9f5bf..f4249f7dc8a9d 100644
--- a/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_madak_f16.ll
@@ -80,12 +80,12 @@ define amdgpu_kernel void @madak_f16_use_2(
 ; SI-NEXT:    s_mov_b32 s9, s11
 ; SI-NEXT:    s_mov_b32 s10, s2
 ; SI-NEXT:    s_mov_b32 s11, s3
-; SI-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
-; SI-NEXT:    buffer_load_ushort v1, off, s[8:11], 0
 ; SI-NEXT:    s_mov_b32 s14, s2
 ; SI-NEXT:    s_mov_b32 s15, s3
-; SI-NEXT:    buffer_load_ushort v3, off, s[12:15], 0
-; SI-NEXT:    v_mov_b32_e32 v2, 0x41200000
+; SI-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
+; SI-NEXT:    buffer_load_ushort v1, off, s[8:11], 0
+; SI-NEXT:    buffer_load_ushort v2, off, s[12:15], 0
+; SI-NEXT:    v_mov_b32_e32 v3, 0x41200000
 ; SI-NEXT:    s_mov_b32 s0, s4
 ; SI-NEXT:    s_mov_b32 s1, s5
 ; SI-NEXT:    s_mov_b32 s8, s6
@@ -95,11 +95,11 @@ define amdgpu_kernel void @madak_f16_use_2(
 ; SI-NEXT:    s_waitcnt vmcnt(1)
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-NEXT:    s_waitcnt vmcnt(0)
-; SI-NEXT:    v_cvt_f32_f16_e32 v3, v3
+; SI-NEXT:    v_cvt_f32_f16_e32 v2, v2
 ; SI-NEXT:    v_madak_f32 v1, v0, v1, 0x41200000
-; SI-NEXT:    v_mac_f32_e32 v2, v0, v3
+; SI-NEXT:    v_mac_f32_e32 v3, v0, v2
 ; SI-NEXT:    v_cvt_f16_f32_e32 v0, v1
-; SI-NEXT:    v_cvt_f16_f32_e32 v1, v2
+; SI-NEXT:    v_cvt_f16_f32_e32 v1, v3
 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; SI-NEXT:    buffer_store_short v1, off, s[8:11], 0
 ; SI-NEXT:    s_endpgm
@@ -119,24 +119,22 @@ define amdgpu_kernel void @madak_f16_use_2(
 ; VI-NEXT:    s_mov_b32 s9, s11
 ; VI-NEXT:    s_mov_b32 s10, s2
 ; VI-NEXT:    s_mov_b32 s11, s3
-; VI-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
-; VI-NEXT:    buffer_load_ushort v1, off, s[8:11], 0
 ; VI-NEXT:    s_mov_b32 s14, s2
 ; VI-NEXT:    s_mov_b32 s15, s3
-; VI-NEXT:    buffer_load_ushort v3, off, s[12:15], 0
-; VI-NEXT:    v_mov_b32_e32 v2, 0x4900
+; VI-NEXT:    buffer_load_ushort v0, off, s[16:19], 0
+; VI-NEXT:    buffer_load_ushort v1, off, s[8:11], 0
+; VI-NEXT:    buffer_load_ushort v2, off, s[12:15], 0
+; VI-NEXT:    v_mov_b32_e32 v3, 0x4900
 ; VI-NEXT:    s_mov_b32 s0, s4
 ; VI-NEXT:    s_mov_b32 s1, s5
-; VI-NEXT:    s_mov_b32 s4, s6
-; VI-NEXT:    s_mov_b32 s5, s7
-; VI-NEXT:    s_mov_b32 s6, s2
-; VI-NEXT:    s_mov_b32 s7, s3
+; VI-NEXT:    s_mov_b32 s8, s6
+; VI-NEXT:    s_mov_b32 s9, s7
 ; VI-NEXT:    s_waitcnt vmcnt(1)
 ; VI-NEXT:    v_madak_f16 v1, v0, v1, 0x4900
 ; VI-NEXT:    s_waitcnt vmcnt(0)
-; VI-NEXT:    v_mac_f16_e32 v2, v0, v3
+; VI-NEXT:    v_mac_f16_e32 v3, v0, v2
 ; VI-NEXT:    buffer_store_short v1, off, s[0:3], 0
-; VI-NEXT:    buffer_store_short v2, off, s[4:7], 0
+; VI-NEXT:    buffer_store_short v3, off, s[8:11], 0
 ; VI-NEXT:    s_endpgm
     half addrspace(1)* %r0,
     half addrspace(1)* %r1,
diff --git a/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll b/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
index d87e4990b2554..831bc8f796530 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector-extract-insert.ll
@@ -36,33 +36,33 @@ define amdgpu_kernel void @extract_insert_different_dynelt_v4i32(i32 addrspace(1
 ; GCN:       ; %bb.0:
 ; GCN-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x9
 ; GCN-NEXT:    s_load_dwordx4 s[8:11], s[0:1], 0xd
-; GCN-NEXT:    v_mov_b32_e32 v2, 0
+; GCN-NEXT:    v_mov_b32_e32 v5, 0
 ; GCN-NEXT:    s_mov_b32 s3, 0xf000
 ; GCN-NEXT:    s_mov_b32 s2, 0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_mov_b64 s[0:1], s[6:7]
-; GCN-NEXT:    v_lshlrev_b32_e32 v1, 4, v0
-; GCN-NEXT:    v_lshlrev_b32_e32 v4, 2, v0
-; GCN-NEXT:    v_mov_b32_e32 v5, v2
-; GCN-NEXT:    buffer_load_dwordx4 v[0:3], v[1:2], s[0:3], 0 addr64
-; GCN-NEXT:    v_mov_b32_e32 v6, s8
+; GCN-NEXT:    v_lshlrev_b32_e32 v4, 4, v0
+; GCN-NEXT:    buffer_load_dwordx4 v[1:4], v[4:5], s[0:3], 0 addr64
+; GCN-NEXT:    v_lshlrev_b32_e32 v6, 2, v0
+; GCN-NEXT:    v_mov_b32_e32 v0, s8
 ; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 3
+; GCN-NEXT:    v_mov_b32_e32 v7, v5
 ; GCN-NEXT:    s_mov_b64 s[6:7], s[2:3]
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v6, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v4, v4, v0, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 2
-; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v6, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v3, v3, v0, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 1
-; GCN-NEXT:    v_cndmask_b32_e32 v1, v1, v6, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v2, v2, v0, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s9, 0
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v6, vcc
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc
 ; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 1
-; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 2
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 3
+; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 2
 ; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; GCN-NEXT:    buffer_store_dword v0, v[4:5], s[4:7], 0 addr64
+; GCN-NEXT:    v_cmp_eq_u32_e64 vcc, s10, 3
+; GCN-NEXT:    v_cndmask_b32_e32 v0, v0, v4, vcc
+; GCN-NEXT:    buffer_store_dword v0, v[6:7], s[4:7], 0 addr64
 ; GCN-NEXT:    s_endpgm
   %id = call i32 @llvm.amdgcn.workitem.id.x()
   %id.ext = sext i32 %id to i64
diff --git a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
index bc77c295d4d56..fcb6619ae1eec 100644
--- a/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
+++ b/llvm/test/CodeGen/AMDGPU/vector_shuffle.packed.ll
@@ -108,12 +108,12 @@ define <4 x half> @shuffle_v4f16_35u5(<4 x half> addrspace(1)* %arg0, <4 x half>
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dword v2, v[2:3], off
 ; GFX9-NEXT:    global_load_dword v0, v[0:1], off offset:4
-; GFX9-NEXT:    v_mov_b32_e32 v3, 0xffff
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffff
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
-; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v2
+; GFX9-NEXT:    v_lshrrev_b32_e32 v3, 16, v2
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_and_b32_sdwa v0, v3, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
-; GFX9-NEXT:    v_lshl_or_b32 v0, v1, 16, v0
+; GFX9-NEXT:    v_and_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_lshl_or_b32 v0, v3, 16, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, v2
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
   %val0 = load <4 x half>, <4 x half> addrspace(1)* %arg0
@@ -128,11 +128,11 @@ define <4 x half> @shuffle_v4f16_357u(<4 x half> addrspace(1)* %arg0, <4 x half>
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    global_load_dwordx2 v[2:3], v[2:3], off
 ; GFX9-NEXT:    global_load_dword v0, v[0:1], off offset:4
-; GFX9-NEXT:    v_mov_b32_e32 v4, 0xffff
+; GFX9-NEXT:    v_mov_b32_e32 v1, 0xffff
 ; GFX9-NEXT:    s_waitcnt vmcnt(1)
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v2, 16, v2
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
-; GFX9-NEXT:    v_and_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
+; GFX9-NEXT:    v_and_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_1
 ; GFX9-NEXT:    v_lshrrev_b32_e32 v1, 16, v3
 ; GFX9-NEXT:    v_lshl_or_b32 v0, v2, 16, v0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
diff --git a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
index a43c656b06071..bff7cf6809905 100644
--- a/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
+++ b/llvm/test/CodeGen/AMDGPU/widen-smrd-loads.ll
@@ -6,17 +6,17 @@ define amdgpu_kernel void @widen_i16_constant_load(i16 addrspace(4)* %arg) {
 ; SI-LABEL: widen_i16_constant_load:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT:    s_mov_b32 s4, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s5, s4
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dword s0, s[0:1], 0x0
+; SI-NEXT:    s_load_dword s1, s[0:1], 0x0
+; SI-NEXT:    s_mov_b32 s0, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_addk_i32 s0, 0x3e7
-; SI-NEXT:    s_or_b32 s0, s0, 4
-; SI-NEXT:    v_mov_b32_e32 v0, s0
-; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; SI-NEXT:    s_addk_i32 s1, 0x3e7
+; SI-NEXT:    s_or_b32 s4, s1, 4
+; SI-NEXT:    s_mov_b32 s1, s0
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: widen_i16_constant_load:
@@ -43,18 +43,18 @@ define amdgpu_kernel void @widen_i16_constant_load_zext_i32(i16 addrspace(4)* %a
 ; SI-LABEL: widen_i16_constant_load_zext_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT:    s_mov_b32 s4, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s5, s4
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dword s0, s[0:1], 0x0
+; SI-NEXT:    s_load_dword s1, s[0:1], 0x0
+; SI-NEXT:    s_mov_b32 s0, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_and_b32 s0, s0, 0xffff
-; SI-NEXT:    s_addk_i32 s0, 0x3e7
-; SI-NEXT:    s_or_b32 s0, s0, 4
-; SI-NEXT:    v_mov_b32_e32 v0, s0
-; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_and_b32 s1, s1, 0xffff
+; SI-NEXT:    s_addk_i32 s1, 0x3e7
+; SI-NEXT:    s_or_b32 s4, s1, 4
+; SI-NEXT:    s_mov_b32 s1, s0
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: widen_i16_constant_load_zext_i32:
@@ -83,18 +83,18 @@ define amdgpu_kernel void @widen_i16_constant_load_sext_i32(i16 addrspace(4)* %a
 ; SI-LABEL: widen_i16_constant_load_sext_i32:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT:    s_mov_b32 s4, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s5, s4
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dword s0, s[0:1], 0x0
+; SI-NEXT:    s_load_dword s1, s[0:1], 0x0
+; SI-NEXT:    s_mov_b32 s0, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_sext_i32_i16 s0, s0
-; SI-NEXT:    s_addk_i32 s0, 0x3e7
-; SI-NEXT:    s_or_b32 s0, s0, 4
-; SI-NEXT:    v_mov_b32_e32 v0, s0
-; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_sext_i32_i16 s1, s1
+; SI-NEXT:    s_addk_i32 s1, 0x3e7
+; SI-NEXT:    s_or_b32 s4, s1, 4
+; SI-NEXT:    s_mov_b32 s1, s0
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: widen_i16_constant_load_sext_i32:
@@ -122,13 +122,13 @@ define amdgpu_kernel void @widen_i16_constant_load_sext_i32(i16 addrspace(4)* %a
 define amdgpu_kernel void @widen_i17_constant_load(i17 addrspace(4)* %arg) {
 ; SI-LABEL: widen_i17_constant_load:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx2 s[8:9], s[0:1], 0x9
+; SI-NEXT:    s_load_dwordx2 s[6:7], s[0:1], 0x9
 ; SI-NEXT:    s_mov_b32 s0, 0
-; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_mov_b32 s1, s0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dword s7, s[8:9], 0x0
+; SI-NEXT:    s_load_dword s7, s[6:7], 0x0
 ; SI-NEXT:    s_mov_b32 s4, 2
 ; SI-NEXT:    s_mov_b32 s5, s0
 ; SI-NEXT:    s_mov_b32 s6, s2
@@ -206,23 +206,23 @@ define amdgpu_kernel void @widen_f16_constant_load(half addrspace(4)* %arg) {
 define amdgpu_kernel void @widen_v2i8_constant_load(<2 x i8> addrspace(4)* %arg) {
 ; SI-LABEL: widen_v2i8_constant_load:
 ; SI:       ; %bb.0:
-; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT:    s_mov_b32 s4, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s5, s4
+; SI-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0x9
+; SI-NEXT:    s_mov_b32 s0, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dword s0, s[0:1], 0x0
+; SI-NEXT:    s_load_dword s1, s[2:3], 0x0
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_and_b32 s1, s0, 0xff00
-; SI-NEXT:    s_add_i32 s0, s0, 12
-; SI-NEXT:    s_or_b32 s0, s0, 4
-; SI-NEXT:    s_and_b32 s0, s0, 0xff
-; SI-NEXT:    s_or_b32 s0, s1, s0
-; SI-NEXT:    s_addk_i32 s0, 0x2c00
-; SI-NEXT:    s_or_b32 s0, s0, 0x300
-; SI-NEXT:    v_mov_b32_e32 v0, s0
-; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; SI-NEXT:    s_and_b32 s4, s1, 0xff00
+; SI-NEXT:    s_add_i32 s1, s1, 12
+; SI-NEXT:    s_or_b32 s1, s1, 4
+; SI-NEXT:    s_and_b32 s1, s1, 0xff
+; SI-NEXT:    s_or_b32 s1, s4, s1
+; SI-NEXT:    s_addk_i32 s1, 0x2c00
+; SI-NEXT:    s_or_b32 s4, s1, 0x300
+; SI-NEXT:    s_mov_b32 s1, s0
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: widen_v2i8_constant_load:
@@ -302,16 +302,16 @@ define amdgpu_kernel void @widen_i1_constant_load(i1 addrspace(4)* %arg) {
 ; SI-LABEL: widen_i1_constant_load:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT:    s_mov_b32 s4, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s5, s4
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dword s0, s[0:1], 0x0
+; SI-NEXT:    s_load_dword s1, s[0:1], 0x0
+; SI-NEXT:    s_mov_b32 s0, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_and_b32 s0, s0, 1
-; SI-NEXT:    v_mov_b32_e32 v0, s0
-; SI-NEXT:    buffer_store_byte v0, off, s[4:7], 0
+; SI-NEXT:    s_and_b32 s4, s1, 1
+; SI-NEXT:    s_mov_b32 s1, s0
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_byte v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: widen_i1_constant_load:
@@ -336,18 +336,18 @@ define amdgpu_kernel void @widen_i16_zextload_i64_constant_load(i16 addrspace(4)
 ; SI-LABEL: widen_i16_zextload_i64_constant_load:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT:    s_mov_b32 s4, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s5, s4
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dword s0, s[0:1], 0x0
+; SI-NEXT:    s_load_dword s1, s[0:1], 0x0
+; SI-NEXT:    s_mov_b32 s0, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_and_b32 s0, s0, 0xffff
-; SI-NEXT:    s_addk_i32 s0, 0x3e7
-; SI-NEXT:    s_or_b32 s0, s0, 4
-; SI-NEXT:    v_mov_b32_e32 v0, s0
-; SI-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; SI-NEXT:    s_and_b32 s1, s1, 0xffff
+; SI-NEXT:    s_addk_i32 s1, 0x3e7
+; SI-NEXT:    s_or_b32 s4, s1, 4
+; SI-NEXT:    s_mov_b32 s1, s0
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: widen_i16_zextload_i64_constant_load:
@@ -376,19 +376,19 @@ define amdgpu_kernel void @widen_i1_zext_to_i64_constant_load(i1 addrspace(4)* %
 ; SI-LABEL: widen_i1_zext_to_i64_constant_load:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT:    s_mov_b32 s4, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s5, s4
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dword s0, s[0:1], 0x0
+; SI-NEXT:    s_load_dword s1, s[0:1], 0x0
+; SI-NEXT:    s_mov_b32 s0, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_and_b32 s0, s0, 1
-; SI-NEXT:    s_add_u32 s0, s0, 0x3e7
-; SI-NEXT:    s_addc_u32 s1, 0, 0
-; SI-NEXT:    v_mov_b32_e32 v0, s0
-; SI-NEXT:    v_mov_b32_e32 v1, s1
-; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[4:7], 0
+; SI-NEXT:    s_and_b32 s1, s1, 1
+; SI-NEXT:    s_add_u32 s4, s1, 0x3e7
+; SI-NEXT:    s_addc_u32 s5, 0, 0
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    s_mov_b32 s1, s0
+; SI-NEXT:    v_mov_b32_e32 v1, s5
+; SI-NEXT:    buffer_store_dwordx2 v[0:1], off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: widen_i1_zext_to_i64_constant_load:
@@ -455,17 +455,17 @@ define amdgpu_kernel void @widen_i16_global_invariant_load(i16 addrspace(1)* %ar
 ; SI-LABEL: widen_i16_global_invariant_load:
 ; SI:       ; %bb.0:
 ; SI-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x9
-; SI-NEXT:    s_mov_b32 s4, 0
-; SI-NEXT:    s_mov_b32 s7, 0xf000
-; SI-NEXT:    s_mov_b32 s6, -1
-; SI-NEXT:    s_mov_b32 s5, s4
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_load_dword s0, s[0:1], 0x0
+; SI-NEXT:    s_load_dword s1, s[0:1], 0x0
+; SI-NEXT:    s_mov_b32 s0, 0
 ; SI-NEXT:    s_waitcnt lgkmcnt(0)
-; SI-NEXT:    s_addk_i32 s0, 0x3e7
-; SI-NEXT:    s_or_b32 s0, s0, 1
-; SI-NEXT:    v_mov_b32_e32 v0, s0
-; SI-NEXT:    buffer_store_short v0, off, s[4:7], 0
+; SI-NEXT:    s_addk_i32 s1, 0x3e7
+; SI-NEXT:    s_or_b32 s4, s1, 1
+; SI-NEXT:    s_mov_b32 s1, s0
+; SI-NEXT:    v_mov_b32_e32 v0, s4
+; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
 ; VI-LABEL: widen_i16_global_invariant_load:
diff --git a/llvm/test/CodeGen/PowerPC/PR33671.ll b/llvm/test/CodeGen/PowerPC/PR33671.ll
index a613387f3c93b..9890cc7a20f4a 100644
--- a/llvm/test/CodeGen/PowerPC/PR33671.ll
+++ b/llvm/test/CodeGen/PowerPC/PR33671.ll
@@ -26,7 +26,7 @@ entry:
   ret void
 ; CHECK-LABEL: test2
 ; CHECK: addi 3, 3, 8
-; CHECK: lxvx [[LD:[0-9]+]], 0, 3
 ; CHECK: addi [[REG:[0-9]+]], 4, 4
+; CHECK: lxvx [[LD:[0-9]+]], 0, 3
 ; CHECK: stxvx [[LD]], 0, [[REG]] 
 }
diff --git a/llvm/test/CodeGen/PowerPC/botheightreduce.mir b/llvm/test/CodeGen/PowerPC/botheightreduce.mir
new file mode 100644
index 0000000000000..72b030273e829
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/botheightreduce.mir
@@ -0,0 +1,92 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -run-pass=machine-scheduler -o - %s | FileCheck %s
+---
+# Check that machine-scheduler's BotHeightReduce heuristic puts the LD 8 in
+# between the final run of MULLDs and the LDXs that feed them, to try to hide
+# the latency of the LDXs.
+name: test
+tracksRegLiveness: true
+body: |
+  ; CHECK-LABEL: name: test
+  ; CHECK: bb.0:
+  ; CHECK:   successors: %bb.1(0x80000000)
+  ; CHECK:   liveins: $x3, $x4
+  ; CHECK:   [[COPY:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x4
+  ; CHECK:   [[COPY1:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+  ; CHECK:   [[ADDI8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[COPY1]], 1
+  ; CHECK:   [[CMPLDI:%[0-9]+]]:crrc = CMPLDI [[COPY]], 1
+  ; CHECK:   [[LI8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = LI8 1
+  ; CHECK:   [[ISEL8_:%[0-9]+]]:g8rc = ISEL8 [[COPY]], [[LI8_]], [[CMPLDI]].sub_gt
+  ; CHECK:   MTCTR8loop [[ISEL8_]], implicit-def dead $ctr8
+  ; CHECK:   [[LI8_1:%[0-9]+]]:g8rc = LI8 0
+  ; CHECK:   [[LI8_2:%[0-9]+]]:g8rc = LI8 2
+  ; CHECK:   [[LI8_3:%[0-9]+]]:g8rc = LI8 3
+  ; CHECK:   [[LI8_4:%[0-9]+]]:g8rc = LI8 5
+  ; CHECK:   [[LI8_5:%[0-9]+]]:g8rc = LI8 6
+  ; CHECK:   [[LI8_6:%[0-9]+]]:g8rc = LI8 7
+  ; CHECK: bb.1:
+  ; CHECK:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; CHECK:   [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[ADDI8_]], 1
+  ; CHECK:   [[LD:%[0-9]+]]:g8rc = LD 0, [[ADDI8_]] :: (load 8)
+  ; CHECK:   [[LDX:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_]] :: (load 8)
+  ; CHECK:   [[LDX1:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_3]] :: (load 8)
+  ; CHECK:   [[LD1:%[0-9]+]]:g8rc = LD 4, [[ADDI8_]] :: (load 8)
+  ; CHECK:   [[LDX2:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_4]] :: (load 8)
+  ; CHECK:   [[LDX3:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_5]] :: (load 8)
+  ; CHECK:   [[LDX4:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_6]] :: (load 8)
+  ; CHECK:   [[LDX5:%[0-9]+]]:g8rc = LDX [[ADDI8_]], [[LI8_2]] :: (load 8)
+  ; CHECK:   [[MULLD:%[0-9]+]]:g8rc = MULLD [[LDX]], [[LD]]
+  ; CHECK:   [[LD2:%[0-9]+]]:g8rc = LD 8, [[ADDI8_]] :: (load 8)
+  ; CHECK:   [[MULLD1:%[0-9]+]]:g8rc = MULLD [[MULLD]], [[LDX5]]
+  ; CHECK:   [[MULLD2:%[0-9]+]]:g8rc = MULLD [[MULLD1]], [[LDX1]]
+  ; CHECK:   [[MULLD3:%[0-9]+]]:g8rc = MULLD [[MULLD2]], [[LD1]]
+  ; CHECK:   [[MULLD4:%[0-9]+]]:g8rc = MULLD [[MULLD3]], [[LDX2]]
+  ; CHECK:   [[MULLD5:%[0-9]+]]:g8rc = MULLD [[MULLD4]], [[LDX3]]
+  ; CHECK:   [[MULLD6:%[0-9]+]]:g8rc = MULLD [[MULLD5]], [[LDX4]]
+  ; CHECK:   [[MADDLD8_:%[0-9]+]]:g8rc = MADDLD8 [[MULLD6]], [[LD2]], [[MADDLD8_]]
+  ; CHECK:   [[COPY2:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[ADDI8_1]]
+  ; CHECK:   BDNZ8 %bb.1, implicit-def dead $ctr8, implicit $ctr8
+  ; CHECK:   B %bb.2
+  ; CHECK: bb.2:
+  bb.0:
+    liveins: $x3, $x4
+
+    %0:g8rc_and_g8rc_nox0 = COPY $x4
+    %1:g8rc_and_g8rc_nox0 = COPY $x3
+    %2:g8rc_and_g8rc_nox0 = ADDI8 %1, 1
+    %3:crrc = CMPLDI %0, 1
+    %4:g8rc_and_g8rc_nox0 = LI8 1
+    %5:g8rc = ISEL8 %0, %4, %3.sub_gt
+    MTCTR8loop %5, implicit-def dead $ctr8
+    %6:g8rc = LI8 0
+    %7:g8rc = LI8 2
+    %8:g8rc = LI8 3
+    %9:g8rc = LI8 5
+    %10:g8rc = LI8 6
+    %11:g8rc = LI8 7
+
+  bb.1:
+    %12:g8rc = ADDI8 %2, 1
+    %13:g8rc = LD 0, %2 :: (load 8)
+    %14:g8rc = LDX %2, %4 :: (load 8)
+    %16:g8rc = LDX %2, %8 :: (load 8)
+    %17:g8rc = LD 4, %2 :: (load 8)
+    %18:g8rc = LDX %2, %9 :: (load 8)
+    %19:g8rc = LDX %2, %10 :: (load 8)
+    %20:g8rc = LDX %2, %11 :: (load 8)
+    %21:g8rc = LD 8, %2 :: (load 8)
+    %22:g8rc = MULLD %14, %13
+    %15:g8rc = LDX %2, %7 :: (load 8)
+    %23:g8rc = MULLD %22, %15
+    %24:g8rc = MULLD %23, %16
+    %25:g8rc = MULLD %24, %17
+    %26:g8rc = MULLD %25, %18
+    %27:g8rc = MULLD %26, %19
+    %28:g8rc = MULLD %27, %20
+    %6:g8rc = MADDLD8 %28, %21, %6
+    %2:g8rc_and_g8rc_nox0 = COPY %12
+    BDNZ8 %bb.1, implicit-def dead $ctr8, implicit $ctr8
+    B %bb.2
+
+  bb.2:
+...
diff --git a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
index cc349ec228f46..11bc2bae9871f 100644
--- a/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
+++ b/llvm/test/CodeGen/PowerPC/canonical-merge-shuffles.ll
@@ -417,11 +417,11 @@ define dso_local <16 x i8> @no_RAUW_in_combine_during_legalize(i32* nocapture re
 ; CHECK-P9-LABEL: no_RAUW_in_combine_during_legalize:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    sldi r4, r4, 2
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    lxsiwzx v2, r3, r4
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI16_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r3
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    blr
 ;
diff --git a/llvm/test/CodeGen/PowerPC/dform-adjust.ll b/llvm/test/CodeGen/PowerPC/dform-adjust.ll
index c32655233d860..4884bd248537f 100644
--- a/llvm/test/CodeGen/PowerPC/dform-adjust.ll
+++ b/llvm/test/CodeGen/PowerPC/dform-adjust.ll
@@ -5,18 +5,18 @@ define dso_local i64 @test1(i8* nocapture readonly %p, i32 signext %count) local
 ; CHECK-LABEL: test1:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li 5, -13
-; CHECK-NEXT:    lxvx 0, 3, 5
-; CHECK-NEXT:    li 5, 19
-; CHECK-NEXT:    lxvx 1, 3, 5
-; CHECK-NEXT:    li 5, 3
 ; CHECK-NEXT:    li 6, 7
 ; CHECK-NEXT:    li 7, 11
 ; CHECK-NEXT:    li 8, 15
-; CHECK-NEXT:    mfvsrld 9, 0
-; CHECK-NEXT:    ldx 5, 3, 5
+; CHECK-NEXT:    lxvx 0, 3, 5
+; CHECK-NEXT:    li 5, 19
 ; CHECK-NEXT:    ldx 6, 3, 6
 ; CHECK-NEXT:    ldx 7, 3, 7
+; CHECK-NEXT:    lxvx 1, 3, 5
+; CHECK-NEXT:    li 5, 3
+; CHECK-NEXT:    ldx 5, 3, 5
 ; CHECK-NEXT:    ldx 3, 3, 8
+; CHECK-NEXT:    mfvsrld 9, 0
 ; CHECK-NEXT:    mffprd 8, 0
 ; CHECK-NEXT:    mfvsrld 10, 1
 ; CHECK-NEXT:    mffprd 11, 1
diff --git a/llvm/test/CodeGen/PowerPC/extract-and-store.ll b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
index fe1e56b839f8c..86c9930b1f559 100644
--- a/llvm/test/CodeGen/PowerPC/extract-and-store.ll
+++ b/llvm/test/CodeGen/PowerPC/extract-and-store.ll
@@ -508,9 +508,9 @@ define dso_local void @test_consecutive_i32(<4 x i32> %a, i32* nocapture %b) loc
 ; CHECK-P9-BE-LABEL: test_consecutive_i32:
 ; CHECK-P9-BE:       # %bb.0: # %entry
 ; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-P9-BE-NEXT:    li r3, 4
 ; CHECK-P9-BE-NEXT:    stfiwx f0, 0, r5
 ; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
-; CHECK-P9-BE-NEXT:    li r3, 4
 ; CHECK-P9-BE-NEXT:    stfiwx f0, r5, r3
 ; CHECK-P9-BE-NEXT:    blr
 entry:
@@ -544,9 +544,9 @@ define dso_local void @test_consecutive_float(<4 x float> %a, float* nocapture %
 ; CHECK-P9-LABEL: test_consecutive_float:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
+; CHECK-P9-NEXT:    li r3, 4
 ; CHECK-P9-NEXT:    stfiwx f0, 0, r5
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 3
-; CHECK-P9-NEXT:    li r3, 4
 ; CHECK-P9-NEXT:    stfiwx f0, r5, r3
 ; CHECK-P9-NEXT:    blr
 ;
@@ -597,9 +597,9 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture
 ; CHECK-P9-LABEL: test_stores_exceed_vec_size:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI16_0@toc@ha
+; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI16_0@toc@l
 ; CHECK-P9-NEXT:    lxvx vs35, 0, r3
-; CHECK-P9-NEXT:    xxsldwi vs0, vs34, vs34, 1
 ; CHECK-P9-NEXT:    li r3, 16
 ; CHECK-P9-NEXT:    stfiwx f0, r5, r3
 ; CHECK-P9-NEXT:    li r3, 20
@@ -611,10 +611,10 @@ define dso_local void @test_stores_exceed_vec_size(<4 x i32> %a, i32* nocapture
 ; CHECK-P9-BE-LABEL: test_stores_exceed_vec_size:
 ; CHECK-P9-BE:       # %bb.0: # %entry
 ; CHECK-P9-BE-NEXT:    xxspltw vs0, vs34, 0
-; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs0, 2
 ; CHECK-P9-BE-NEXT:    li r3, 16
 ; CHECK-P9-BE-NEXT:    stxsiwx vs34, r5, r3
 ; CHECK-P9-BE-NEXT:    li r3, 20
+; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs0, 2
 ; CHECK-P9-BE-NEXT:    stxv vs0, 0(r5)
 ; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 1
 ; CHECK-P9-BE-NEXT:    stfiwx f0, r5, r3
@@ -676,9 +676,9 @@ define void @test_5_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
 ; CHECK-P9-LABEL: test_5_consecutive_stores_of_bytes:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 4
+; CHECK-P9-NEXT:    li r3, 1
 ; CHECK-P9-NEXT:    stxsibx vs35, 0, r5
 ; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 12
-; CHECK-P9-NEXT:    li r3, 1
 ; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
 ; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 15
 ; CHECK-P9-NEXT:    li r3, 2
@@ -694,9 +694,9 @@ define void @test_5_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
 ; CHECK-P9-BE-LABEL: test_5_consecutive_stores_of_bytes:
 ; CHECK-P9-BE:       # %bb.0: # %entry
 ; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 13
+; CHECK-P9-BE-NEXT:    li r3, 1
 ; CHECK-P9-BE-NEXT:    stxsibx vs35, 0, r5
 ; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 5
-; CHECK-P9-BE-NEXT:    li r3, 1
 ; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
 ; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 2
 ; CHECK-P9-BE-NEXT:    li r3, 2
@@ -807,9 +807,9 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
 ; CHECK-P9-NEXT:    li r3, 4
 ; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
 ; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 4
+; CHECK-P9-NEXT:    li r3, 5
 ; CHECK-P9-NEXT:    stxsibx vs35, 0, r5
 ; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 8
-; CHECK-P9-NEXT:    li r3, 5
 ; CHECK-P9-NEXT:    stxsibx vs35, r5, r3
 ; CHECK-P9-NEXT:    vsldoi v3, v2, v2, 13
 ; CHECK-P9-NEXT:    li r3, 6
@@ -848,9 +848,9 @@ define void @test_13_consecutive_stores_of_bytes(<16 x i8> %a, i8* nocapture %b)
 ; CHECK-P9-BE-NEXT:    li r3, 4
 ; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
 ; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 13
+; CHECK-P9-BE-NEXT:    li r3, 5
 ; CHECK-P9-BE-NEXT:    stxsibx vs35, 0, r5
 ; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 9
-; CHECK-P9-BE-NEXT:    li r3, 5
 ; CHECK-P9-BE-NEXT:    stxsibx vs35, r5, r3
 ; CHECK-P9-BE-NEXT:    vsldoi v3, v2, v2, 4
 ; CHECK-P9-BE-NEXT:    li r3, 6
@@ -947,8 +947,8 @@ define void @test_elements_from_two_vec(<4 x i32> %a, <4 x i32> %b, i32* nocaptu
 ; CHECK-P9-BE:       # %bb.0: # %entry
 ; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 3
 ; CHECK-P9-BE-NEXT:    li r3, 4
-; CHECK-P9-BE-NEXT:    stfiwx f0, r7, r3
 ; CHECK-P9-BE-NEXT:    stxsiwx vs35, 0, r7
+; CHECK-P9-BE-NEXT:    stfiwx f0, r7, r3
 ; CHECK-P9-BE-NEXT:    blr
 entry:
   %vecext = extractelement <4 x i32> %a, i32 0
@@ -996,9 +996,9 @@ define dso_local void @test_elements_from_three_vec(<4 x float> %a, <4 x float>
 ; CHECK-P9-BE-LABEL: test_elements_from_three_vec:
 ; CHECK-P9-BE:       # %bb.0: # %entry
 ; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs34, vs34, 2
+; CHECK-P9-BE-NEXT:    li r3, 4
 ; CHECK-P9-BE-NEXT:    stfiwx f0, 0, r9
 ; CHECK-P9-BE-NEXT:    xxsldwi vs0, vs35, vs35, 1
-; CHECK-P9-BE-NEXT:    li r3, 4
 ; CHECK-P9-BE-NEXT:    stfiwx f0, r9, r3
 ; CHECK-P9-BE-NEXT:    li r3, 8
 ; CHECK-P9-BE-NEXT:    stxsiwx vs36, r9, r3
diff --git a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
index 094d29e2f258d..956c30f7d8ec6 100644
--- a/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-aggregates.ll
@@ -228,8 +228,8 @@ define fp128 @testMixedAggregate_03([4 x i128] %sa.coerce) {
 ; CHECK-LABEL: testMixedAggregate_03:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK:         mtvsrwa v2, r3
-; CHECK:         xscvsdqp v2, v2
-; CHECK:         mtvsrdd v3, r6, r5
+; CHECK-DAG:     xscvsdqp v2, v2
+; CHECK-DAG:     mtvsrdd v3, r6, r5
 ; CHECK:         xsaddqp v2, v3, v2
 ; CHECK:         mtvsrd v[[REG1:[0-9]+]], r10
 ; CHECK:         xscvsdqp v[[REG:[0-9]+]], v[[REG1]]
@@ -350,12 +350,12 @@ define fp128 @sum_float128(i32 signext %count, ...) {
 ; CHECK-NEXT:    bltlr cr0
 ; CHECK-NEXT:  # %bb.1: # %if.end
 ; CHECK-NEXT:    addi r3, r1, 40
+; CHECK-NEXT:    addi [[REG2:r[0-9]+]], r1, 72
 ; CHECK-NEXT:    lxvx v3, 0, r3
+; CHECK-NEXT:    std [[REG2]], -8(r1)
 ; CHECK-NEXT:    xsaddqp v2, v3, v2
 ; CHECK-NEXT:    lxv v3, 16(r3)
 ; CHECK-NEXT:    xsaddqp v2, v2, v3
-; CHECK-NEXT:    addi [[REG2:r[0-9]+]], r1, 72
-; CHECK-NEXT:    std [[REG2]], -8(r1)
 ; CHECK-NEXT:    blr
 entry:
   %ap = alloca i8*, align 8
diff --git a/llvm/test/CodeGen/PowerPC/f128-conv.ll b/llvm/test/CodeGen/PowerPC/f128-conv.ll
index 2cb3174925457..7f0c13a23ffc1 100644
--- a/llvm/test/CodeGen/PowerPC/f128-conv.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-conv.ll
@@ -444,10 +444,10 @@ define void @qpConv2dp_03(double* nocapture %res, i32 signext %idx) {
 ; CHECK-LABEL: qpConv2dp_03:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis r5, r2, .LC7@toc@ha
+; CHECK-NEXT:    sldi r4, r4, 3
 ; CHECK-NEXT:    ld r5, .LC7@toc@l(r5)
 ; CHECK-NEXT:    lxvx v2, 0, r5
 ; CHECK-NEXT:    xscvqpdp v2, v2
-; CHECK-NEXT:    sldi r4, r4, 3
 ; CHECK-NEXT:    stxsdx v2, r3, r4
 ; CHECK-NEXT:    blr
 entry:
@@ -517,11 +517,11 @@ define void @qpConv2sp_03(float* nocapture %res, i32 signext %idx) {
 ; CHECK-LABEL: qpConv2sp_03:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis r5, r2, .LC7@toc@ha
+; CHECK-NEXT:    sldi r4, r4, 2
 ; CHECK-NEXT:    ld r5, .LC7@toc@l(r5)
 ; CHECK-NEXT:    lxv v2, 48(r5)
 ; CHECK-NEXT:    xscvqpdpo v2, v2
 ; CHECK-NEXT:    xsrsp f0, v2
-; CHECK-NEXT:    sldi r4, r4, 2
 ; CHECK-NEXT:    stfsx f0, r3, r4
 ; CHECK-NEXT:    blr
 entry:
diff --git a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
index 8b2db6b035109..028a9855a67bb 100644
--- a/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
+++ b/llvm/test/CodeGen/PowerPC/f128-passByValue.ll
@@ -153,13 +153,13 @@ define fp128 @mixParam_02(fp128 %p1, double %p2, i64* nocapture %p3,
 ; CHECK:       # %bb.0: # %entry
 ; CHECK:         lwz r3, 96(r1)
 ; CHECK:         add r4, r7, r9
+; CHECK:         xscpsgndp v[[REG0:[0-9]+]], f1, f1
 ; CHECK:         add r4, r4, r10
+; CHECK:         xscvdpqp v[[REG0]], v[[REG0]]
 ; CHECK:         add r3, r4, r3
 ; CHECK:         clrldi r3, r3, 32
 ; CHECK:         std r3, 0(r6)
 ; CHECK:         lxv v[[REG1:[0-9]+]], 0(r8)
-; CHECK:         xscpsgndp v[[REG0:[0-9]+]], f1, f1
-; CHECK:         xscvdpqp v[[REG0]], v[[REG0]]
 ; CHECK:         xsaddqp v2, v[[REG1]], v2
 ; CHECK:         xsaddqp v2, v2, v3
 ; CHECK-NEXT:    blr
@@ -185,13 +185,13 @@ define fastcc fp128 @mixParam_02f(fp128 %p1, double %p2, i64* nocapture %p3,
 ; CHECK-LABEL: mixParam_02f:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    add r4, r4, r6
+; CHECK-NEXT:    xscpsgndp v[[REG0:[0-9]+]], f1, f1
 ; CHECK-NEXT:    add r4, r4, r7
+; CHECK-NEXT:    xscvdpqp v[[REG0]], v[[REG0]]
 ; CHECK-NEXT:    add r4, r4, r8
 ; CHECK-NEXT:    clrldi r4, r4, 32
 ; CHECK-DAG:     std r4, 0(r3)
 ; CHECK-DAG:     lxv v[[REG1:[0-9]+]], 0(r5)
-; CHECK-NEXT:    xscpsgndp v[[REG0:[0-9]+]], f1, f1
-; CHECK-NEXT:    xscvdpqp v[[REG0]], v[[REG0]]
 ; CHECK-NEXT:    xsaddqp v2, v[[REG1]], v2
 ; CHECK-NEXT:    xsaddqp v2, v2, v[[REG0]] 
 ; CHECK-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll b/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll
index caeff71553343..095361716438a 100644
--- a/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll
+++ b/llvm/test/CodeGen/PowerPC/float-load-store-pair.ll
@@ -32,10 +32,19 @@ define signext i32 @test() nounwind {
 ; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -192(1)
 ; CHECK-NEXT:    addis 3, 2, a1@toc@ha
+; CHECK-NEXT:    addis 5, 2, a16@toc@ha
+; CHECK-NEXT:    addis 6, 2, a17@toc@ha
+; CHECK-NEXT:    addis 4, 2, a15@toc@ha
 ; CHECK-NEXT:    lfd 1, a1@toc@l(3)
 ; CHECK-NEXT:    addis 3, 2, a2@toc@ha
+; CHECK-NEXT:    addi 5, 5, a16@toc@l
+; CHECK-NEXT:    addi 6, 6, a17@toc@l
+; CHECK-NEXT:    ld 4, a15@toc@l(4)
 ; CHECK-NEXT:    lfd 2, a2@toc@l(3)
 ; CHECK-NEXT:    addis 3, 2, a3@toc@ha
+; CHECK-NEXT:    lxvx 34, 0, 6
+; CHECK-NEXT:    lxvx 0, 0, 5
+; CHECK-NEXT:    li 5, 152
 ; CHECK-NEXT:    lfd 3, a3@toc@l(3)
 ; CHECK-NEXT:    addis 3, 2, a4@toc@ha
 ; CHECK-NEXT:    lfd 4, a4@toc@l(3)
@@ -54,17 +63,8 @@ define signext i32 @test() nounwind {
 ; CHECK-NEXT:    addis 3, 2, a11@toc@ha
 ; CHECK-NEXT:    lfd 11, a11@toc@l(3)
 ; CHECK-NEXT:    addis 3, 2, a12@toc@ha
-; CHECK-NEXT:    addis 5, 2, a16@toc@ha
-; CHECK-NEXT:    addis 6, 2, a17@toc@ha
-; CHECK-NEXT:    addi 6, 6, a17@toc@l
-; CHECK-NEXT:    lxvx 34, 0, 6
 ; CHECK-NEXT:    lfd 12, a12@toc@l(3)
 ; CHECK-NEXT:    addis 3, 2, a13@toc@ha
-; CHECK-NEXT:    addi 5, 5, a16@toc@l
-; CHECK-NEXT:    addis 4, 2, a15@toc@ha
-; CHECK-NEXT:    lxvx 0, 0, 5
-; CHECK-NEXT:    ld 4, a15@toc@l(4)
-; CHECK-NEXT:    li 5, 152
 ; CHECK-NEXT:    lfd 13, a13@toc@l(3)
 ; CHECK-NEXT:    addis 3, 2, a14@toc@ha
 ; CHECK-NEXT:    ld 3, a14@toc@l(3)
diff --git a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
index bb15e52e1b029..9977b6b33560d 100644
--- a/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
+++ b/llvm/test/CodeGen/PowerPC/handle-f16-storage-type.ll
@@ -697,10 +697,10 @@ define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 {
 ; CHECK-NEXT:    lhz r3, 0(r3)
 ; CHECK-NEXT:    xxmrghd vs0, vs0, vs1
 ; CHECK-NEXT:    mtfprwz f3, r3
+; CHECK-NEXT:    xvcvdpsp vs35, vs0
 ; CHECK-NEXT:    xscvhpdp f3, f3
 ; CHECK-NEXT:    xxmrghd vs2, vs2, vs3
 ; CHECK-NEXT:    xvcvdpsp vs34, vs2
-; CHECK-NEXT:    xvcvdpsp vs35, vs0
 ; CHECK-NEXT:    vmrgew v2, v3, v2
 ; CHECK-NEXT:    blr
 ;
@@ -906,12 +906,12 @@ define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 {
 ; CHECK-LABEL: test_trunc32_vec4:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xxsldwi vs0, vs34, vs34, 3
+; CHECK-NEXT:    xxsldwi vs1, vs34, vs34, 1
 ; CHECK-NEXT:    xscvspdpn f0, vs0
+; CHECK-NEXT:    xscvspdpn f1, vs1
 ; CHECK-NEXT:    xscvdphp f0, f0
 ; CHECK-NEXT:    mffprwz r3, f0
 ; CHECK-NEXT:    xxswapd vs0, vs34
-; CHECK-NEXT:    xxsldwi vs1, vs34, vs34, 1
-; CHECK-NEXT:    xscvspdpn f1, vs1
 ; CHECK-NEXT:    xscvspdpn f0, vs0
 ; CHECK-NEXT:    xscvdphp f0, f0
 ; CHECK-NEXT:    xscvdphp f1, f1
@@ -920,8 +920,8 @@ define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 {
 ; CHECK-NEXT:    xscvdphp f1, f1
 ; CHECK-NEXT:    sth r4, 4(r5)
 ; CHECK-NEXT:    mffprwz r4, f0
-; CHECK-NEXT:    sth r4, 2(r5)
 ; CHECK-NEXT:    sth r3, 0(r5)
+; CHECK-NEXT:    sth r4, 2(r5)
 ; CHECK-NEXT:    mffprwz r6, f1
 ; CHECK-NEXT:    sth r6, 6(r5)
 ; CHECK-NEXT:    blr
@@ -1059,10 +1059,10 @@ define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 {
 ; CHECK-NEXT:    xscvdphp f1, vs34
 ; CHECK-NEXT:    mffprwz r4, f1
 ; CHECK-NEXT:    xscvdphp f1, vs35
+; CHECK-NEXT:    sth r3, 0(r7)
 ; CHECK-NEXT:    sth r4, 2(r7)
 ; CHECK-NEXT:    mffprwz r4, f0
 ; CHECK-NEXT:    sth r4, 4(r7)
-; CHECK-NEXT:    sth r3, 0(r7)
 ; CHECK-NEXT:    mffprwz r5, f1
 ; CHECK-NEXT:    sth r5, 6(r7)
 ; CHECK-NEXT:    blr
@@ -1169,8 +1169,8 @@ define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 {
 ; CHECK-LABEL: test_sitofp_fadd_i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    mtfprwa f1, r3
-; CHECK-NEXT:    xscvsxdsp f1, f1
 ; CHECK-NEXT:    lhz r4, 0(r4)
+; CHECK-NEXT:    xscvsxdsp f1, f1
 ; CHECK-NEXT:    mtfprwz f0, r4
 ; CHECK-NEXT:    xscvhpdp f0, f0
 ; CHECK-NEXT:    xscvdphp f1, f1
diff --git a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
index 985dee83a62d8..6b57ab1507dde 100644
--- a/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
+++ b/llvm/test/CodeGen/PowerPC/load-shuffle-and-shuffle-store.ll
@@ -132,8 +132,8 @@ define <4 x i32> @load_swap11(<4 x i32>* %vp1, <4 x i32>* %vp2) {
 ; CHECK-P9-BE-LABEL: load_swap11:
 ; CHECK-P9-BE:       # %bb.0:
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
 ; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
+; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
 ; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P9-BE-NEXT:    blr
@@ -208,8 +208,8 @@ define <8 x i16> @load_swap21(<8 x i16>* %vp1, <8 x i16>* %vp2){
 ; CHECK-P9-BE-LABEL: load_swap21:
 ; CHECK-P9-BE:       # %bb.0:
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI5_0@toc@ha
-; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI5_0@toc@l
 ; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
+; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI5_0@toc@l
 ; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P9-BE-NEXT:    blr
@@ -382,8 +382,8 @@ define <4 x float> @load_swap51(<4 x float>* %vp1, <4 x float>* %vp2) {
 ; CHECK-P9-BE-LABEL: load_swap51:
 ; CHECK-P9-BE:       # %bb.0:
 ; CHECK-P9-BE-NEXT:    addis r3, r2, .LCPI10_0@toc@ha
-; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI10_0@toc@l
 ; CHECK-P9-BE-NEXT:    lxv v2, 0(r4)
+; CHECK-P9-BE-NEXT:    addi r3, r3, .LCPI10_0@toc@l
 ; CHECK-P9-BE-NEXT:    lxvx v3, 0, r3
 ; CHECK-P9-BE-NEXT:    vperm v2, v2, v2, v3
 ; CHECK-P9-BE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
index a6f674e129131..12c9dfec50555 100644
--- a/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
+++ b/llvm/test/CodeGen/PowerPC/loop-instr-form-prepare.ll
@@ -23,11 +23,11 @@ define i64 @test_no_prep(i8* %0, i32 signext %1) {
 ; CHECK:       .LBB0_2: #
 ; CHECK-NEXT:    ldx r9, r3, r6
 ; CHECK-NEXT:    ldx r10, r3, r7
-; CHECK-NEXT:    mulld r9, r10, r9
 ; CHECK-NEXT:    ldx r11, r3, r8
-; CHECK-NEXT:    mulld r9, r9, r11
 ; CHECK-NEXT:    ld r12, 0(r3)
 ; CHECK-NEXT:    addi r3, r3, 1
+; CHECK-NEXT:    mulld r9, r10, r9
+; CHECK-NEXT:    mulld r9, r9, r11
 ; CHECK-NEXT:    maddld r5, r9, r12, r5
 ; CHECK-NEXT:    bdnz .LBB0_2
   %3 = sext i32 %1 to i64
@@ -87,11 +87,11 @@ define i64 @test_ds_prep(i8* %0, i32 signext %1) {
 ; CHECK:       .LBB1_2: #
 ; CHECK-NEXT:    ldx r9, r6, r7
 ; CHECK-NEXT:    ld r10, 0(r6)
-; CHECK-NEXT:    mulld r9, r10, r9
 ; CHECK-NEXT:    ldx r11, r6, r5
-; CHECK-NEXT:    mulld r9, r9, r11
 ; CHECK-NEXT:    addi r8, r6, 1
 ; CHECK-NEXT:    ld r6, 4(r6)
+; CHECK-NEXT:    mulld r9, r10, r9
+; CHECK-NEXT:    mulld r9, r9, r11
 ; CHECK-NEXT:    maddld r3, r9, r6, r3
 ; CHECK-NEXT:    mr r6, r8
 ; CHECK-NEXT:    bdnz .LBB1_2
@@ -162,22 +162,22 @@ define i64 @test_max_number_reminder(i8* %0, i32 signext %1) {
 ; CHECK:       .LBB2_2: #
 ; CHECK-NEXT:    ldx r12, r9, r6
 ; CHECK-NEXT:    ld r0, 0(r9)
-; CHECK-NEXT:    mulld r12, r0, r12
+; CHECK-NEXT:    ldx r30, r9, r5
+; CHECK-NEXT:    ldx r29, r9, r7
 ; CHECK-NEXT:    addi r11, r9, 1
-; CHECK-NEXT:    ldx r30, r9, r7
-; CHECK-NEXT:    ld r29, 4(r9)
-; CHECK-NEXT:    ldx r28, r9, r8
-; CHECK-NEXT:    ld r27, 12(r9)
-; CHECK-NEXT:    ld r26, 8(r9)
-; CHECK-NEXT:    ldx r25, r9, r10
-; CHECK-NEXT:    ldx r9, r9, r5
-; CHECK-NEXT:    mulld r9, r12, r9
-; CHECK-NEXT:    mulld r9, r9, r30
-; CHECK-NEXT:    mulld r9, r9, r29
-; CHECK-NEXT:    mulld r9, r9, r28
-; CHECK-NEXT:    mulld r9, r9, r27
-; CHECK-NEXT:    mulld r9, r9, r26
-; CHECK-NEXT:    maddld r3, r9, r25, r3
+; CHECK-NEXT:    mulld r12, r0, r12
+; CHECK-NEXT:    ld r28, 4(r9)
+; CHECK-NEXT:    ldx r27, r9, r8
+; CHECK-NEXT:    ld r26, 12(r9)
+; CHECK-NEXT:    ld r25, 8(r9)
+; CHECK-NEXT:    ldx r9, r9, r10
+; CHECK-NEXT:    mulld r12, r12, r30
+; CHECK-NEXT:    mulld r12, r12, r29
+; CHECK-NEXT:    mulld r12, r12, r28
+; CHECK-NEXT:    mulld r12, r12, r27
+; CHECK-NEXT:    mulld r12, r12, r26
+; CHECK-NEXT:    mulld r12, r12, r25
+; CHECK-NEXT:    maddld r3, r12, r9, r3
 ; CHECK-NEXT:    mr r9, r11
 ; CHECK-NEXT:    bdnz .LBB2_2
   %3 = sext i32 %1 to i64
@@ -257,10 +257,10 @@ define dso_local i64 @test_update_ds_prep_interact(i8* %0, i32 signext %1) {
 ; CHECK:       .LBB3_2: #
 ; CHECK-NEXT:    ldu r8, 4(r3)
 ; CHECK-NEXT:    ldx r9, r3, r7
-; CHECK-NEXT:    mulld r8, r8, r9
 ; CHECK-NEXT:    ldx r10, r3, r6
-; CHECK-NEXT:    mulld r8, r8, r10
 ; CHECK-NEXT:    ld r11, 4(r3)
+; CHECK-NEXT:    mulld r8, r8, r9
+; CHECK-NEXT:    mulld r8, r8, r10
 ; CHECK-NEXT:    maddld r5, r8, r11, r5
 ; CHECK-NEXT:    bdnz .LBB3_2
   %3 = sext i32 %1 to i64
@@ -391,21 +391,21 @@ define dso_local i64 @test_ds_multiple_chains(i8* %0, i8* %1, i32 signext %2) {
 ; CHECK:       .LBB5_2: #
 ; CHECK-NEXT:    ld r8, 0(r3)
 ; CHECK-NEXT:    ldx r9, r3, r7
-; CHECK-NEXT:    mulld r8, r9, r8
-; CHECK-NEXT:    ld r9, 4(r3)
-; CHECK-NEXT:    mulld r8, r8, r9
-; CHECK-NEXT:    ld r10, 8(r3)
+; CHECK-NEXT:    ld r10, 4(r3)
+; CHECK-NEXT:    ld r11, 8(r3)
 ; CHECK-NEXT:    addi r3, r3, 1
+; CHECK-NEXT:    mulld r8, r9, r8
+; CHECK-NEXT:    ld r12, 0(r4)
+; CHECK-NEXT:    ldx r0, r4, r7
+; CHECK-NEXT:    ld r30, 4(r4)
+; CHECK-NEXT:    ld r9, 8(r4)
+; CHECK-NEXT:    addi r4, r4, 1
 ; CHECK-NEXT:    mulld r8, r8, r10
-; CHECK-NEXT:    ld r11, 0(r4)
 ; CHECK-NEXT:    mulld r8, r8, r11
-; CHECK-NEXT:    ldx r12, r4, r7
 ; CHECK-NEXT:    mulld r8, r8, r12
-; CHECK-NEXT:    ld r0, 4(r4)
 ; CHECK-NEXT:    mulld r8, r8, r0
-; CHECK-NEXT:    ld r30, 8(r4)
-; CHECK-NEXT:    addi r4, r4, 1
-; CHECK-NEXT:    maddld r6, r8, r30, r6
+; CHECK-NEXT:    mulld r8, r8, r30
+; CHECK-NEXT:    maddld r6, r8, r9, r6
 ; CHECK-NEXT:    bdnz .LBB5_2
   %4 = sext i32 %2 to i64
   %5 = icmp eq i32 %2, 0
@@ -710,10 +710,10 @@ define float @test_ds_combine_float_int(i8* %0, i32 signext %1) {
 ; CHECK-NEXT:    lfsx f0, r3, r4
 ; CHECK-NEXT:    xscvuxdsp f4, f4
 ; CHECK-NEXT:    lfs f2, 20(r3)
-; CHECK-NEXT:    xsmulsp f0, f0, f4
-; CHECK-NEXT:    xsmulsp f0, f2, f0
 ; CHECK-NEXT:    lfs f3, 60(r3)
 ; CHECK-NEXT:    addi r3, r3, 1
+; CHECK-NEXT:    xsmulsp f0, f0, f4
+; CHECK-NEXT:    xsmulsp f0, f2, f0
 ; CHECK-NEXT:    xsmulsp f0, f3, f0
 ; CHECK-NEXT:    xsaddsp f1, f1, f0
 ; CHECK-NEXT:    bdnz .LBB8_2
diff --git a/llvm/test/CodeGen/PowerPC/machine-pre.ll b/llvm/test/CodeGen/PowerPC/machine-pre.ll
index ff1e2cf70a6f9..98ed27db965b6 100644
--- a/llvm/test/CodeGen/PowerPC/machine-pre.ll
+++ b/llvm/test/CodeGen/PowerPC/machine-pre.ll
@@ -109,10 +109,10 @@ define dso_local signext i32 @foo(i32 signext %x, i32 signext %y) nounwind {
 ; CHECK-P9-NEXT:    b .LBB1_2
 ; CHECK-P9-NEXT:  .LBB1_7: # %while.end
 ; CHECK-P9-NEXT:    lis r3, -13108
-; CHECK-P9-NEXT:    ori r3, r3, 52429
-; CHECK-P9-NEXT:    mullw r3, r28, r3
 ; CHECK-P9-NEXT:    lis r4, 13107
+; CHECK-P9-NEXT:    ori r3, r3, 52429
 ; CHECK-P9-NEXT:    ori r4, r4, 13108
+; CHECK-P9-NEXT:    mullw r3, r28, r3
 ; CHECK-P9-NEXT:    cmplw r3, r4
 ; CHECK-P9-NEXT:    blt cr0, .LBB1_9
 ; CHECK-P9-NEXT:  # %bb.8: # %if.then8
diff --git a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
index fea72eb596875..8617422aba7ea 100644
--- a/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/ppcf128-constrained-fp-intrinsics.ll
@@ -1397,10 +1397,10 @@ define void @test_constrained_libcall_multichain(float* %firstptr, ppc_fp128* %r
 ; PC64LE9-NEXT:    li 3, 0
 ; PC64LE9-NEXT:    xxlxor 2, 2, 2
 ; PC64LE9-NEXT:    xxlxor 4, 4, 4
+; PC64LE9-NEXT:    mr 30, 4
 ; PC64LE9-NEXT:    std 3, 8(4)
 ; PC64LE9-NEXT:    fmr 1, 31
 ; PC64LE9-NEXT:    fmr 3, 31
-; PC64LE9-NEXT:    mr 30, 4
 ; PC64LE9-NEXT:    stfd 31, 0(4)
 ; PC64LE9-NEXT:    bl __gcc_qadd
 ; PC64LE9-NEXT:    nop
diff --git a/llvm/test/CodeGen/PowerPC/pr45432.ll b/llvm/test/CodeGen/PowerPC/pr45432.ll
index 9adc3c1551bca..7ce996f893f58 100644
--- a/llvm/test/CodeGen/PowerPC/pr45432.ll
+++ b/llvm/test/CodeGen/PowerPC/pr45432.ll
@@ -14,8 +14,8 @@ define dso_local void @h() local_unnamed_addr #0 {
 ; CHECK-NEXT:    std 0, 16(1)
 ; CHECK-NEXT:    stdu 1, -64(1)
 ; CHECK-NEXT:    addis 3, 2, g@toc@ha
-; CHECK-NEXT:    lwz 3, g@toc@l(3)
 ; CHECK-NEXT:    std 30, 48(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lwz 3, g@toc@l(3)
 ; CHECK-NEXT:    extswsli 30, 3, 2
 ; CHECK-NEXT:    addis 3, 2, f@got@tlsld@ha
 ; CHECK-NEXT:    addi 3, 3, f@got@tlsld@l
diff --git a/llvm/test/CodeGen/PowerPC/pr45448.ll b/llvm/test/CodeGen/PowerPC/pr45448.ll
index 6ee0e3a00335e..ce6d19ed24fb6 100644
--- a/llvm/test/CodeGen/PowerPC/pr45448.ll
+++ b/llvm/test/CodeGen/PowerPC/pr45448.ll
@@ -20,13 +20,13 @@ define hidden void @julia_tryparse_internal_45896() #0 {
 ; CHECK-NEXT:  .LBB0_6: # %L1057.preheader
 ; CHECK-NEXT:  .LBB0_7: # %L670
 ; CHECK-NEXT:    lis r5, 4095
-; CHECK-NEXT:    ori r5, r5, 65533
-; CHECK-NEXT:    sldi r5, r5, 4
 ; CHECK-NEXT:    cmpdi r3, 0
 ; CHECK-NEXT:    sradi r4, r3, 63
+; CHECK-NEXT:    ori r5, r5, 65533
+; CHECK-NEXT:    crnot 4*cr5+gt, eq
+; CHECK-NEXT:    sldi r5, r5, 4
 ; CHECK-NEXT:    mulhdu r3, r3, r5
 ; CHECK-NEXT:    maddld r6, r4, r5, r3
-; CHECK-NEXT:    crnot 4*cr5+gt, eq
 ; CHECK-NEXT:    cmpld r6, r3
 ; CHECK-NEXT:    mulld r3, r4, r5
 ; CHECK-NEXT:    cmpldi cr1, r3, 0
diff --git a/llvm/test/CodeGen/PowerPC/pr45628.ll b/llvm/test/CodeGen/PowerPC/pr45628.ll
index e17e56d2db605..5ea3d05db5b55 100644
--- a/llvm/test/CodeGen/PowerPC/pr45628.ll
+++ b/llvm/test/CodeGen/PowerPC/pr45628.ll
@@ -223,9 +223,9 @@ define <1 x i128> @rotl_28(<1 x i128> %num) {
 ; P9-NOVSX-NEXT:    rldimi r5, r3, 28, 0
 ; P9-NOVSX-NEXT:    rotldi r3, r3, 28
 ; P9-NOVSX-NEXT:    rldimi r3, r4, 28, 0
+; P9-NOVSX-NEXT:    std r5, -8(r1)
 ; P9-NOVSX-NEXT:    std r3, -16(r1)
 ; P9-NOVSX-NEXT:    addi r3, r1, -16
-; P9-NOVSX-NEXT:    std r5, -8(r1)
 ; P9-NOVSX-NEXT:    lvx v2, 0, r3
 ; P9-NOVSX-NEXT:    blr
 ;
diff --git a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
index 6584cb74bdb51..93d0d296e51a1 100644
--- a/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
+++ b/llvm/test/CodeGen/PowerPC/pre-inc-disable.ll
@@ -13,29 +13,29 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxsd v5, 0(r5)
 ; CHECK-NEXT:    addis r5, r2, .LCPI0_0@toc@ha
+; CHECK-NEXT:    xxlxor v3, v3, v3
+; CHECK-NEXT:    li r6, 0
 ; CHECK-NEXT:    addi r5, r5, .LCPI0_0@toc@l
 ; CHECK-NEXT:    lxvx v2, 0, r5
 ; CHECK-NEXT:    addis r5, r2, .LCPI0_1@toc@ha
 ; CHECK-NEXT:    addi r5, r5, .LCPI0_1@toc@l
 ; CHECK-NEXT:    lxvx v4, 0, r5
 ; CHECK-NEXT:    li r5, 4
-; CHECK-NEXT:    xxlxor v3, v3, v3
 ; CHECK-NEXT:    vperm v0, v3, v5, v2
 ; CHECK-NEXT:    mtctr r5
 ; CHECK-NEXT:    li r5, 0
 ; CHECK-NEXT:    vperm v1, v3, v5, v4
-; CHECK-NEXT:    li r6, 0
 ; CHECK-NEXT:    xvnegsp v5, v0
 ; CHECK-NEXT:    xvnegsp v0, v1
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB0_1: # %for.cond1.preheader
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    lxsd v1, 0(r3)
+; CHECK-NEXT:    add r7, r3, r4
 ; CHECK-NEXT:    vperm v6, v3, v1, v4
 ; CHECK-NEXT:    vperm v1, v3, v1, v2
 ; CHECK-NEXT:    xvnegsp v1, v1
 ; CHECK-NEXT:    xvnegsp v6, v6
-; CHECK-NEXT:    add r7, r3, r4
 ; CHECK-NEXT:    vabsduw v1, v1, v5
 ; CHECK-NEXT:    vabsduw v6, v6, v0
 ; CHECK-NEXT:    vadduwm v1, v6, v1
@@ -47,10 +47,11 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
 ; CHECK-NEXT:    vextuwrx r3, r5, v1
 ; CHECK-NEXT:    vperm v7, v3, v6, v4
 ; CHECK-NEXT:    vperm v6, v3, v6, v2
+; CHECK-NEXT:    add r6, r3, r6
+; CHECK-NEXT:    add r3, r7, r4
 ; CHECK-NEXT:    xvnegsp v6, v6
 ; CHECK-NEXT:    xvnegsp v1, v7
 ; CHECK-NEXT:    vabsduw v6, v6, v5
-; CHECK-NEXT:    add r6, r3, r6
 ; CHECK-NEXT:    vabsduw v1, v1, v0
 ; CHECK-NEXT:    vadduwm v1, v1, v6
 ; CHECK-NEXT:    xxswapd v6, v1
@@ -58,7 +59,6 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
 ; CHECK-NEXT:    xxspltw v6, v1, 2
 ; CHECK-NEXT:    vadduwm v1, v1, v6
 ; CHECK-NEXT:    vextuwrx r8, r5, v1
-; CHECK-NEXT:    add r3, r7, r4
 ; CHECK-NEXT:    add r6, r8, r6
 ; CHECK-NEXT:    bdnz .LBB0_1
 ; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
@@ -69,25 +69,26 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    lfd f0, 0(r5)
 ; P9BE-NEXT:    addis r5, r2, .LCPI0_0@toc@ha
+; P9BE-NEXT:    xxlxor v3, v3, v3
+; P9BE-NEXT:    li r6, 0
 ; P9BE-NEXT:    addi r5, r5, .LCPI0_0@toc@l
 ; P9BE-NEXT:    lxvx v2, 0, r5
 ; P9BE-NEXT:    addis r5, r2, .LCPI0_1@toc@ha
+; P9BE-NEXT:    xxlor v5, vs0, vs0
 ; P9BE-NEXT:    addi r5, r5, .LCPI0_1@toc@l
 ; P9BE-NEXT:    lxvx v4, 0, r5
 ; P9BE-NEXT:    li r5, 4
-; P9BE-NEXT:    xxlor v5, vs0, vs0
-; P9BE-NEXT:    xxlxor v3, v3, v3
 ; P9BE-NEXT:    vperm v0, v3, v5, v2
 ; P9BE-NEXT:    mtctr r5
 ; P9BE-NEXT:    li r5, 0
 ; P9BE-NEXT:    vperm v1, v3, v5, v4
-; P9BE-NEXT:    li r6, 0
 ; P9BE-NEXT:    xvnegsp v5, v0
 ; P9BE-NEXT:    xvnegsp v0, v1
 ; P9BE-NEXT:    .p2align 4
 ; P9BE-NEXT:  .LBB0_1: # %for.cond1.preheader
 ; P9BE-NEXT:    #
 ; P9BE-NEXT:    lfd f0, 0(r3)
+; P9BE-NEXT:    add r7, r3, r4
 ; P9BE-NEXT:    xxlor v1, vs0, vs0
 ; P9BE-NEXT:    lfdx f0, r3, r4
 ; P9BE-NEXT:    vperm v6, v3, v1, v4
@@ -104,20 +105,19 @@ define signext i32 @test_pre_inc_disable_1(i8* nocapture readonly %pix1, i32 sig
 ; P9BE-NEXT:    xxlor v6, vs0, vs0
 ; P9BE-NEXT:    vperm v7, v3, v6, v4
 ; P9BE-NEXT:    vperm v6, v3, v6, v2
-; P9BE-NEXT:    add r7, r3, r4
 ; P9BE-NEXT:    vextuwlx r3, r5, v1
 ; P9BE-NEXT:    xvnegsp v6, v6
+; P9BE-NEXT:    add r6, r3, r6
 ; P9BE-NEXT:    xvnegsp v1, v7
-; P9BE-NEXT:    vabsduw v1, v1, v0
+; P9BE-NEXT:    add r3, r7, r4
 ; P9BE-NEXT:    vabsduw v6, v6, v5
+; P9BE-NEXT:    vabsduw v1, v1, v0
 ; P9BE-NEXT:    vadduwm v1, v1, v6
 ; P9BE-NEXT:    xxswapd v6, v1
-; P9BE-NEXT:    add r6, r3, r6
 ; P9BE-NEXT:    vadduwm v1, v1, v6
 ; P9BE-NEXT:    xxspltw v6, v1, 1
 ; P9BE-NEXT:    vadduwm v1, v1, v6
 ; P9BE-NEXT:    vextuwlx r8, r5, v1
-; P9BE-NEXT:    add r3, r7, r4
 ; P9BE-NEXT:    add r6, r8, r6
 ; P9BE-NEXT:    bdnz .LBB0_1
 ; P9BE-NEXT:  # %bb.2: # %for.cond.cleanup
@@ -180,13 +180,14 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    lxsd v2, 0(r3)
 ; CHECK-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-NEXT:    lxsd v1, 0(r4)
+; CHECK-NEXT:    xxlxor v3, v3, v3
 ; CHECK-NEXT:    addi r3, r3, .LCPI1_0@toc@l
 ; CHECK-NEXT:    lxvx v4, 0, r3
 ; CHECK-NEXT:    addis r3, r2, .LCPI1_1@toc@ha
 ; CHECK-NEXT:    addi r3, r3, .LCPI1_1@toc@l
 ; CHECK-NEXT:    lxvx v0, 0, r3
-; CHECK-NEXT:    lxsd v1, 0(r4)
-; CHECK-NEXT:    xxlxor v3, v3, v3
+; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    vperm v5, v3, v2, v4
 ; CHECK-NEXT:    vperm v2, v3, v2, v0
 ; CHECK-NEXT:    vperm v0, v3, v1, v0
@@ -198,7 +199,6 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc
 ; CHECK-NEXT:    vadduwm v2, v2, v3
 ; CHECK-NEXT:    xxspltw v3, v2, 2
 ; CHECK-NEXT:    vadduwm v2, v2, v3
-; CHECK-NEXT:    li r3, 0
 ; CHECK-NEXT:    vextuwrx r3, r3, v2
 ; CHECK-NEXT:    extsw r3, r3
 ; CHECK-NEXT:    blr
@@ -207,6 +207,7 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    lfd f0, 0(r3)
 ; P9BE-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
+; P9BE-NEXT:    xxlxor v3, v3, v3
 ; P9BE-NEXT:    addi r3, r3, .LCPI1_0@toc@l
 ; P9BE-NEXT:    lxvx v4, 0, r3
 ; P9BE-NEXT:    addis r3, r2, .LCPI1_1@toc@ha
@@ -214,8 +215,8 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc
 ; P9BE-NEXT:    xxlor v2, vs0, vs0
 ; P9BE-NEXT:    lfd f0, 0(r4)
 ; P9BE-NEXT:    lxvx v0, 0, r3
-; P9BE-NEXT:    xxlxor v3, v3, v3
 ; P9BE-NEXT:    xxlor v1, vs0, vs0
+; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vperm v5, v3, v2, v4
 ; P9BE-NEXT:    vperm v2, v3, v2, v0
 ; P9BE-NEXT:    vperm v0, v3, v1, v0
@@ -227,7 +228,6 @@ define signext i32 @test_pre_inc_disable_2(i8* nocapture readonly %pix1, i8* noc
 ; P9BE-NEXT:    vadduwm v2, v2, v3
 ; P9BE-NEXT:    xxspltw v3, v2, 1
 ; P9BE-NEXT:    vadduwm v2, v2, v3
-; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuwlx r3, r3, v2
 ; P9BE-NEXT:    extsw r3, r3
 ; P9BE-NEXT:    blr
@@ -283,11 +283,11 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
 ; CHECK-NEXT:    add r5, r3, r4
 ; CHECK-NEXT:    lxsiwzx v2, r3, r4
 ; CHECK-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
+; CHECK-NEXT:    xxlxor v3, v3, v3
 ; CHECK-NEXT:    addi r3, r3, .LCPI2_0@toc@l
 ; CHECK-NEXT:    lxvx v4, 0, r3
 ; CHECK-NEXT:    li r3, 4
 ; CHECK-NEXT:    lxsiwzx v5, r5, r3
-; CHECK-NEXT:    xxlxor v3, v3, v3
 ; CHECK-NEXT:    vperm v2, v2, v3, v4
 ; CHECK-NEXT:    vperm v3, v5, v3, v4
 ; CHECK-NEXT:    vspltisw v4, 8
@@ -304,12 +304,12 @@ define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
 ; P9BE-NEXT:    add r5, r3, r4
 ; P9BE-NEXT:    lfiwzx f0, r3, r4
 ; P9BE-NEXT:    addis r3, r2, .LCPI2_0@toc@ha
+; P9BE-NEXT:    xxlxor v3, v3, v3
+; P9BE-NEXT:    xxsldwi v2, f0, f0, 1
 ; P9BE-NEXT:    addi r3, r3, .LCPI2_0@toc@l
 ; P9BE-NEXT:    lxvx v4, 0, r3
 ; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    xxsldwi v2, f0, f0, 1
 ; P9BE-NEXT:    lfiwzx f0, r5, r3
-; P9BE-NEXT:    xxlxor v3, v3, v3
 ; P9BE-NEXT:    vperm v2, v3, v2, v4
 ; P9BE-NEXT:    xxsldwi v5, f0, f0, 1
 ; P9BE-NEXT:    vperm v3, v3, v5, v4
@@ -349,16 +349,16 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe
 ; CHECK-LABEL: test16:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    sldi r4, r4, 1
-; CHECK-NEXT:    add r6, r3, r4
 ; CHECK-NEXT:    li r7, 16
-; CHECK-NEXT:    lxsihzx v2, r6, r7
+; CHECK-NEXT:    add r6, r3, r4
 ; CHECK-NEXT:    lxsihzx v4, r3, r4
+; CHECK-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; CHECK-NEXT:    lxsihzx v2, r6, r7
 ; CHECK-NEXT:    li r6, 0
+; CHECK-NEXT:    addi r3, r3, .LCPI3_0@toc@l
 ; CHECK-NEXT:    mtvsrd v3, r6
 ; CHECK-NEXT:    vsplth v4, v4, 3
 ; CHECK-NEXT:    vsplth v2, v2, 3
-; CHECK-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; CHECK-NEXT:    addi r3, r3, .LCPI3_0@toc@l
 ; CHECK-NEXT:    vmrghh v4, v3, v4
 ; CHECK-NEXT:    vmrghh v2, v3, v2
 ; CHECK-NEXT:    vsplth v3, v3, 3
@@ -376,17 +376,17 @@ define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signe
 ; P9BE-LABEL: test16:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    sldi r4, r4, 1
-; P9BE-NEXT:    add r6, r3, r4
 ; P9BE-NEXT:    li r7, 16
-; P9BE-NEXT:    lxsihzx v2, r6, r7
+; P9BE-NEXT:    add r6, r3, r4
 ; P9BE-NEXT:    lxsihzx v4, r3, r4
+; P9BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
+; P9BE-NEXT:    lxsihzx v2, r6, r7
 ; P9BE-NEXT:    li r6, 0
+; P9BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
 ; P9BE-NEXT:    sldi r6, r6, 48
 ; P9BE-NEXT:    vsplth v4, v4, 3
 ; P9BE-NEXT:    mtvsrd v3, r6
 ; P9BE-NEXT:    vsplth v2, v2, 3
-; P9BE-NEXT:    addis r3, r2, .LCPI3_0@toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI3_0@toc@l
 ; P9BE-NEXT:    vmrghh v4, v3, v4
 ; P9BE-NEXT:    vmrghh v2, v3, v2
 ; P9BE-NEXT:    vsplth v3, v3, 0
@@ -441,11 +441,11 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext
 ; CHECK-NEXT:    mtvsrd v3, r3
 ; CHECK-NEXT:    li r3, 8
 ; CHECK-NEXT:    lxsibzx v5, r6, r3
+; CHECK-NEXT:    vspltb v4, v3, 7
 ; CHECK-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
-; CHECK-NEXT:    addi r3, r3, .LCPI4_0@toc@l
 ; CHECK-NEXT:    vspltb v2, v2, 7
+; CHECK-NEXT:    addi r3, r3, .LCPI4_0@toc@l
 ; CHECK-NEXT:    vmrghb v2, v3, v2
-; CHECK-NEXT:    vspltb v4, v3, 7
 ; CHECK-NEXT:    vspltb v5, v5, 7
 ; CHECK-NEXT:    vmrglh v2, v2, v4
 ; CHECK-NEXT:    vmrghb v3, v3, v5
@@ -466,9 +466,11 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE:       # %bb.0: # %entry
 ; P9BE-NEXT:    add r6, r3, r4
 ; P9BE-NEXT:    li r7, 8
-; P9BE-NEXT:    lxsibzx v2, r6, r7
 ; P9BE-NEXT:    lxsibzx v4, r3, r4
+; P9BE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
+; P9BE-NEXT:    lxsibzx v2, r6, r7
 ; P9BE-NEXT:    li r6, 0
+; P9BE-NEXT:    addi r3, r3, .LCPI4_0@toc@l
 ; P9BE-NEXT:    sldi r6, r6, 56
 ; P9BE-NEXT:    vspltb v4, v4, 7
 ; P9BE-NEXT:    mtvsrd v3, r6
@@ -476,8 +478,6 @@ define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext
 ; P9BE-NEXT:    vmrghb v4, v3, v4
 ; P9BE-NEXT:    vmrghb v2, v3, v2
 ; P9BE-NEXT:    vspltb v3, v3, 0
-; P9BE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
-; P9BE-NEXT:    addi r3, r3, .LCPI4_0@toc@l
 ; P9BE-NEXT:    vmrghh v4, v4, v3
 ; P9BE-NEXT:    xxspltw v3, v3, 0
 ; P9BE-NEXT:    vmrghw v2, v4, v2
diff --git a/llvm/test/CodeGen/PowerPC/recipest.ll b/llvm/test/CodeGen/PowerPC/recipest.ll
index 7ceddd95e5735..e3894bcd23f5a 100644
--- a/llvm/test/CodeGen/PowerPC/recipest.ll
+++ b/llvm/test/CodeGen/PowerPC/recipest.ll
@@ -804,8 +804,8 @@ define double @foo3_fmf(double %a) nounwind {
 ; CHECK-P9-LABEL: foo3_fmf:
 ; CHECK-P9:       # %bb.0:
 ; CHECK-P9-NEXT:    addis 3, 2, .LCPI20_2@toc@ha
-; CHECK-P9-NEXT:    lfd 2, .LCPI20_2@toc@l(3)
 ; CHECK-P9-NEXT:    xsabsdp 0, 1
+; CHECK-P9-NEXT:    lfd 2, .LCPI20_2@toc@l(3)
 ; CHECK-P9-NEXT:    xscmpudp 0, 0, 2
 ; CHECK-P9-NEXT:    xxlxor 0, 0, 0
 ; CHECK-P9-NEXT:    blt 0, .LBB20_2
@@ -899,8 +899,8 @@ define float @goo3_fmf(float %a) nounwind {
 ; CHECK-P9-LABEL: goo3_fmf:
 ; CHECK-P9:       # %bb.0:
 ; CHECK-P9-NEXT:    addis 3, 2, .LCPI22_2@toc@ha
-; CHECK-P9-NEXT:    lfs 2, .LCPI22_2@toc@l(3)
 ; CHECK-P9-NEXT:    xsabsdp 0, 1
+; CHECK-P9-NEXT:    lfs 2, .LCPI22_2@toc@l(3)
 ; CHECK-P9-NEXT:    fcmpu 0, 0, 2
 ; CHECK-P9-NEXT:    xxlxor 0, 0, 0
 ; CHECK-P9-NEXT:    blt 0, .LBB22_2
diff --git a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll
index 9ad5cb8864722..67e353257dd6a 100644
--- a/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll
+++ b/llvm/test/CodeGen/PowerPC/rematerializable-instruction-machine-licm.ll
@@ -28,69 +28,80 @@ define zeroext i32 @test1(i64 %0, i64* %1) {
 ; CHECK-NEXT:    .cfi_offset r31, -8
 ; CHECK-NEXT:    .cfi_offset r2, -152
 ; CHECK-NEXT:    lis 5, 4
+; CHECK-NEXT:    std 30, 704(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 29, 696(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 6, 5, 6292
+; CHECK-NEXT:    std 28, 688(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 27, 680(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 26, 672(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 25, 664(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 5, 5, 6291
+; CHECK-NEXT:    std 14, 576(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 15, 584(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 16, 592(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 17, 600(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 18, 608(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 19, 616(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 20, 624(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 21, 632(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 22, 640(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 23, 648(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 24, 656(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 31, 712(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 2, 568(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    sldi 6, 6, 32
 ; CHECK-NEXT:    oris 7, 6, 13030
 ; CHECK-NEXT:    oris 8, 6, 13066
-; CHECK-NEXT:    ori 7, 7, 3704
 ; CHECK-NEXT:    oris 9, 6, 13054
+; CHECK-NEXT:    oris 10, 6, 13042
+; CHECK-NEXT:    oris 11, 6, 13078
+; CHECK-NEXT:    oris 12, 6, 13115
+; CHECK-NEXT:    oris 0, 6, 13103
+; CHECK-NEXT:    oris 30, 6, 13091
+; CHECK-NEXT:    oris 29, 6, 13127
+; CHECK-NEXT:    oris 28, 6, 13164
+; CHECK-NEXT:    oris 27, 6, 13152
+; CHECK-NEXT:    oris 26, 6, 13139
+; CHECK-NEXT:    oris 25, 6, 13176
+; CHECK-NEXT:    ori 7, 7, 3704
 ; CHECK-NEXT:    ori 8, 8, 44408
 ; CHECK-NEXT:    ori 9, 9, 30840
-; CHECK-NEXT:    add 7, 4, 7
-; CHECK-NEXT:    oris 10, 6, 13042
 ; CHECK-NEXT:    ori 10, 10, 17272
-; CHECK-NEXT:    std 7, 384(1) # 8-byte Folded Spill
-; CHECK-NEXT:    add 7, 4, 8
-; CHECK-NEXT:    oris 11, 6, 13078
 ; CHECK-NEXT:    ori 11, 11, 57976
-; CHECK-NEXT:    std 7, 376(1) # 8-byte Folded Spill
-; CHECK-NEXT:    add 7, 4, 9
-; CHECK-NEXT:    oris 12, 6, 13115
 ; CHECK-NEXT:    ori 12, 12, 33144
-; CHECK-NEXT:    std 7, 368(1) # 8-byte Folded Spill
-; CHECK-NEXT:    add 7, 4, 10
-; CHECK-NEXT:    oris 0, 6, 13103
 ; CHECK-NEXT:    ori 0, 0, 19576
-; CHECK-NEXT:    std 7, 360(1) # 8-byte Folded Spill
-; CHECK-NEXT:    add 7, 4, 11
-; CHECK-NEXT:    std 30, 704(1) # 8-byte Folded Spill
-; CHECK-NEXT:    oris 30, 6, 13091
 ; CHECK-NEXT:    ori 30, 30, 6008
-; CHECK-NEXT:    std 7, 352(1) # 8-byte Folded Spill
-; CHECK-NEXT:    add 7, 4, 12
-; CHECK-NEXT:    std 29, 696(1) # 8-byte Folded Spill
-; CHECK-NEXT:    oris 29, 6, 13127
 ; CHECK-NEXT:    ori 29, 29, 46712
+; CHECK-NEXT:    ori 28, 28, 21880
+; CHECK-NEXT:    ori 27, 27, 8312
+; CHECK-NEXT:    ori 26, 26, 60280
+; CHECK-NEXT:    ori 25, 25, 35448
+; CHECK-NEXT:    add 7, 4, 7
 ; CHECK-NEXT:    sldi 5, 5, 32
 ; CHECK-NEXT:    oris 5, 5, 29347
 ; CHECK-NEXT:    ori 5, 5, 20088
+; CHECK-NEXT:    std 7, 384(1) # 8-byte Folded Spill
+; CHECK-NEXT:    add 7, 4, 8
 ; CHECK-NEXT:    lis 8, 402
+; CHECK-NEXT:    std 7, 376(1) # 8-byte Folded Spill
+; CHECK-NEXT:    add 7, 4, 9
 ; CHECK-NEXT:    lis 9, 451
+; CHECK-NEXT:    std 7, 368(1) # 8-byte Folded Spill
+; CHECK-NEXT:    add 7, 4, 10
 ; CHECK-NEXT:    lis 10, 500
+; CHECK-NEXT:    std 7, 360(1) # 8-byte Folded Spill
+; CHECK-NEXT:    add 7, 4, 11
 ; CHECK-NEXT:    lis 11, 549
-; CHECK-NEXT:    std 31, 712(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 2, 568(1) # 8-byte Folded Spill
+; CHECK-NEXT:    std 7, 352(1) # 8-byte Folded Spill
+; CHECK-NEXT:    add 7, 4, 12
 ; CHECK-NEXT:    std 7, 344(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    add 7, 4, 0
-; CHECK-NEXT:    std 28, 688(1) # 8-byte Folded Spill
-; CHECK-NEXT:    oris 28, 6, 13164
-; CHECK-NEXT:    ori 28, 28, 21880
 ; CHECK-NEXT:    std 7, 336(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    add 7, 4, 30
-; CHECK-NEXT:    std 27, 680(1) # 8-byte Folded Spill
-; CHECK-NEXT:    oris 27, 6, 13152
-; CHECK-NEXT:    ori 27, 27, 8312
 ; CHECK-NEXT:    std 7, 328(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    add 7, 4, 29
-; CHECK-NEXT:    std 26, 672(1) # 8-byte Folded Spill
-; CHECK-NEXT:    oris 26, 6, 13139
-; CHECK-NEXT:    ori 26, 26, 60280
 ; CHECK-NEXT:    std 7, 320(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    add 7, 4, 28
-; CHECK-NEXT:    std 25, 664(1) # 8-byte Folded Spill
-; CHECK-NEXT:    oris 25, 6, 13176
-; CHECK-NEXT:    ori 25, 25, 35448
 ; CHECK-NEXT:    std 7, 312(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    add 7, 4, 27
 ; CHECK-NEXT:    std 7, 304(1) # 8-byte Folded Spill
@@ -112,6 +123,10 @@ define zeroext i32 @test1(i64 %0, i64* %1) {
 ; CHECK-NEXT:    lis 5, 268
 ; CHECK-NEXT:    std 4, 256(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    lis 4, 585
+; CHECK-NEXT:    std 6, 264(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lis 6, 305
+; CHECK-NEXT:    std 7, 272(1) # 8-byte Folded Spill
+; CHECK-NEXT:    lis 7, 354
 ; CHECK-NEXT:    ori 4, 4, 61440
 ; CHECK-NEXT:    std 4, 560(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    lis 4, 48
@@ -200,94 +215,79 @@ define zeroext i32 @test1(i64 %0, i64* %1) {
 ; CHECK-NEXT:    std 4, 192(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 36352
 ; CHECK-NEXT:    lis 5, 317
+; CHECK-NEXT:    ld 30, 192(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 184(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 25088
 ; CHECK-NEXT:    lis 5, 366
+; CHECK-NEXT:    ld 29, 184(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 176(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 13824
 ; CHECK-NEXT:    lis 5, 415
+; CHECK-NEXT:    ld 28, 176(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 168(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 2560
 ; CHECK-NEXT:    lis 5, 463
+; CHECK-NEXT:    ld 27, 168(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 160(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 56832
 ; CHECK-NEXT:    lis 5, 512
+; CHECK-NEXT:    ld 26, 160(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 152(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 45568
 ; CHECK-NEXT:    lis 5, 561
+; CHECK-NEXT:    ld 25, 152(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 144(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 34304
 ; CHECK-NEXT:    lis 5, 12
+; CHECK-NEXT:    ld 24, 144(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 136(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 13568
 ; CHECK-NEXT:    lis 5, 61
+; CHECK-NEXT:    ld 23, 136(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 128(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 2304
 ; CHECK-NEXT:    lis 5, 109
 ; CHECK-NEXT:    std 4, 120(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 56576
 ; CHECK-NEXT:    lis 5, 158
+; CHECK-NEXT:    ld 0, 120(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 112(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 45312
 ; CHECK-NEXT:    lis 5, 207
+; CHECK-NEXT:    ld 22, 112(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 104(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 34048
 ; CHECK-NEXT:    lis 5, 256
-; CHECK-NEXT:    std 6, 264(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lis 6, 305
-; CHECK-NEXT:    ld 30, 192(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 29, 184(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 28, 176(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 27, 168(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 26, 160(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 25, 152(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 0, 120(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 21, 104(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 96(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 5, 22784
-; CHECK-NEXT:    std 7, 272(1) # 8-byte Folded Spill
-; CHECK-NEXT:    lis 7, 354
+; CHECK-NEXT:    ld 5, 248(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 20, 96(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 88(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 6, 11520
 ; CHECK-NEXT:    ld 6, 240(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 19, 88(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 80(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 7, 256
 ; CHECK-NEXT:    ld 7, 232(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 18, 80(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 72(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 8, 54528
 ; CHECK-NEXT:    ld 8, 224(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 17, 72(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 64(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 9, 43264
 ; CHECK-NEXT:    ld 9, 216(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 16, 64(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 56(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 10, 32000
 ; CHECK-NEXT:    ld 10, 208(1) # 8-byte Folded Reload
+; CHECK-NEXT:    ld 15, 56(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 4, 48(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    ori 4, 11, 20736
 ; CHECK-NEXT:    ld 11, 200(1) # 8-byte Folded Reload
-; CHECK-NEXT:    std 4, 40(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 14, 576(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 15, 584(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 16, 592(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 17, 600(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 18, 608(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 19, 616(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 20, 624(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 21, 632(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 22, 640(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 23, 648(1) # 8-byte Folded Spill
-; CHECK-NEXT:    std 24, 656(1) # 8-byte Folded Spill
-; CHECK-NEXT:    ld 5, 248(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 24, 144(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 23, 136(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 22, 112(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 21, 104(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 20, 96(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 19, 88(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 18, 80(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 17, 72(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 16, 64(1) # 8-byte Folded Reload
-; CHECK-NEXT:    ld 15, 56(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    ld 14, 48(1) # 8-byte Folded Reload
+; CHECK-NEXT:    std 4, 40(1) # 8-byte Folded Spill
 ; CHECK-NEXT:    li 4, 0
 ; CHECK-NEXT:    ld 31, 40(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    .p2align 4
@@ -305,6 +305,32 @@ define zeroext i32 @test1(i64 %0, i64* %1) {
 ; CHECK-NEXT:    stdux 3, 12, 2
 ; CHECK-NEXT:    ld 2, 552(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stdx 3, 12, 5
+; CHECK-NEXT:    stdx 3, 12, 6
+; CHECK-NEXT:    stdx 3, 12, 7
+; CHECK-NEXT:    stdx 3, 12, 8
+; CHECK-NEXT:    stdx 3, 12, 9
+; CHECK-NEXT:    stdx 3, 12, 10
+; CHECK-NEXT:    stdx 3, 12, 11
+; CHECK-NEXT:    stdx 3, 12, 30
+; CHECK-NEXT:    stdx 3, 12, 29
+; CHECK-NEXT:    stdx 3, 12, 28
+; CHECK-NEXT:    stdx 3, 12, 27
+; CHECK-NEXT:    stdx 3, 12, 26
+; CHECK-NEXT:    stdx 3, 12, 25
+; CHECK-NEXT:    stdx 3, 12, 24
+; CHECK-NEXT:    stdx 3, 12, 23
+; CHECK-NEXT:    stdx 3, 12, 4
+; CHECK-NEXT:    stdx 3, 12, 0
+; CHECK-NEXT:    stdx 3, 12, 22
+; CHECK-NEXT:    stdx 3, 12, 21
+; CHECK-NEXT:    stdx 3, 12, 20
+; CHECK-NEXT:    stdx 3, 12, 19
+; CHECK-NEXT:    stdx 3, 12, 18
+; CHECK-NEXT:    stdx 3, 12, 17
+; CHECK-NEXT:    stdx 3, 12, 16
+; CHECK-NEXT:    stdx 3, 12, 15
+; CHECK-NEXT:    stdx 3, 12, 14
+; CHECK-NEXT:    stdx 3, 12, 31
 ; CHECK-NEXT:    stdx 3, 12, 2
 ; CHECK-NEXT:    ld 2, 544(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stdx 3, 12, 2
@@ -344,35 +370,11 @@ define zeroext i32 @test1(i64 %0, i64* %1) {
 ; CHECK-NEXT:    stdx 3, 12, 2
 ; CHECK-NEXT:    ld 2, 400(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    stdx 3, 12, 2
-; CHECK-NEXT:    stdx 3, 12, 6
-; CHECK-NEXT:    stdx 3, 12, 7
-; CHECK-NEXT:    stdx 3, 12, 8
-; CHECK-NEXT:    stdx 3, 12, 9
-; CHECK-NEXT:    stdx 3, 12, 10
-; CHECK-NEXT:    stdx 3, 12, 11
-; CHECK-NEXT:    stdx 3, 12, 30
-; CHECK-NEXT:    stdx 3, 12, 29
-; CHECK-NEXT:    stdx 3, 12, 28
-; CHECK-NEXT:    stdx 3, 12, 27
-; CHECK-NEXT:    stdx 3, 12, 26
-; CHECK-NEXT:    stdx 3, 12, 25
-; CHECK-NEXT:    stdx 3, 12, 24
-; CHECK-NEXT:    stdx 3, 12, 23
-; CHECK-NEXT:    stdx 3, 12, 4
-; CHECK-NEXT:    stdx 3, 12, 0
-; CHECK-NEXT:    stdx 3, 12, 22
-; CHECK-NEXT:    stdx 3, 12, 21
-; CHECK-NEXT:    stdx 3, 12, 20
-; CHECK-NEXT:    stdx 3, 12, 19
-; CHECK-NEXT:    stdx 3, 12, 18
-; CHECK-NEXT:    stdx 3, 12, 17
-; CHECK-NEXT:    stdx 3, 12, 16
-; CHECK-NEXT:    stdx 3, 12, 15
-; CHECK-NEXT:    stdx 3, 12, 14
-; CHECK-NEXT:    stdx 3, 12, 31
 ; CHECK-NEXT:    bdnz .LBB0_2
 ; CHECK-NEXT:  # %bb.3:
 ; CHECK-NEXT:    ld 12, 384(1) # 8-byte Folded Reload
+; CHECK-NEXT:    lwz 4, 396(1) # 4-byte Folded Reload
+; CHECK-NEXT:    addi 4, 4, 1
 ; CHECK-NEXT:    std 3, 0(12)
 ; CHECK-NEXT:    ld 12, 376(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 3, 0(12)
@@ -399,8 +401,6 @@ define zeroext i32 @test1(i64 %0, i64* %1) {
 ; CHECK-NEXT:    ld 12, 288(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 3, 0(12)
 ; CHECK-NEXT:    ld 12, 280(1) # 8-byte Folded Reload
-; CHECK-NEXT:    lwz 4, 396(1) # 4-byte Folded Reload
-; CHECK-NEXT:    addi 4, 4, 1
 ; CHECK-NEXT:    std 3, 0(12)
 ; CHECK-NEXT:    ld 12, 272(1) # 8-byte Folded Reload
 ; CHECK-NEXT:    std 3, 0(12)
diff --git a/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll b/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll
index 151f4a37615ec..ebe2d2f561466 100644
--- a/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll
+++ b/llvm/test/CodeGen/PowerPC/remove-redundant-load-imm.ll
@@ -40,8 +40,8 @@ define void @redundancy_on_ppc_and_other_targets() nounwind {
 ; PPC64LE-NEXT:    std 0, 16(1)
 ; PPC64LE-NEXT:    stdu 1, -32(1)
 ; PPC64LE-NEXT:    addis 3, 2, .LC0@toc@ha
-; PPC64LE-NEXT:    ld 3, .LC0@toc@l(3)
 ; PPC64LE-NEXT:    li 4, 0
+; PPC64LE-NEXT:    ld 3, .LC0@toc@l(3)
 ; PPC64LE-NEXT:    std 4, 0(3)
 ; PPC64LE-NEXT:    bl barney.94
 ; PPC64LE-NEXT:    nop
diff --git a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
index 3dc34533420c3..67262f472b32b 100644
--- a/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
+++ b/llvm/test/CodeGen/PowerPC/scalar_vector_test_4.ll
@@ -166,8 +166,8 @@ define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec
 ; P9LE-LABEL: s2v_test_f2:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    addi r3, r3, 4
-; P9LE-NEXT:    lxsiwzx v3, 0, r3
 ; P9LE-NEXT:    vmrglw v2, v2, v2
+; P9LE-NEXT:    lxsiwzx v3, 0, r3
 ; P9LE-NEXT:    vmrghw v2, v2, v3
 ; P9LE-NEXT:    blr
 
@@ -208,17 +208,17 @@ define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec
 ; P9LE-LABEL: s2v_test_f3:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    sldi r4, r7, 2
-; P9LE-NEXT:    lxsiwzx v3, r3, r4
 ; P9LE-NEXT:    vmrglw v2, v2, v2
+; P9LE-NEXT:    lxsiwzx v3, r3, r4
 ; P9LE-NEXT:    vmrghw v2, v2, v3
 ; P9LE-NEXT:    blr
 
 ; P9BE-LABEL: s2v_test_f3:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE:         sldi r4, r7, 2
-; P9BE:         lfiwzx f0, r3, r4
+; P9BE-DAG:     lfiwzx f0, r3, r4
 ; P9BE-DAG:     xxspltw v2, v2, 1
-; P9BE-DAG:     xxsldwi v3, f0, f0, 1
+; P9BE:         xxsldwi v3, f0, f0, 1
 ; P9BE:         vmrghw v2, v3, v2
 ; P9BE-NEXT:    blr
 
@@ -251,17 +251,17 @@ define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec
 ; P9LE-LABEL: s2v_test_f4:
 ; P9LE:       # %bb.0: # %entry
 ; P9LE-NEXT:    addi r3, r3, 4
-; P9LE-NEXT:    lxsiwzx v3, 0, r3
 ; P9LE-NEXT:    vmrglw v2, v2, v2
+; P9LE-NEXT:    lxsiwzx v3, 0, r3
 ; P9LE-NEXT:    vmrghw v2, v2, v3
 ; P9LE-NEXT:    blr
 
 ; P9BE-LABEL: s2v_test_f4:
 ; P9BE:       # %bb.0: # %entry
 ; P9BE:         addi r3, r3, 4
-; P9BE:         lfiwzx f0, 0, r3
+; P9BE-DAG:     lfiwzx f0, 0, r3
 ; P9BE-DAG:     xxspltw v2, v2, 1
-; P9BE-DAG:     xxsldwi v3, f0, f0, 1
+; P9BE:         xxsldwi v3, f0, f0, 1
 ; P9BE:         vmrghw v2, v3, v2
 ; P9BE-NEXT:    blr
 
diff --git a/llvm/test/CodeGen/PowerPC/sched-addi.ll b/llvm/test/CodeGen/PowerPC/sched-addi.ll
index b49c337e45d15..19647b694a23c 100644
--- a/llvm/test/CodeGen/PowerPC/sched-addi.ll
+++ b/llvm/test/CodeGen/PowerPC/sched-addi.ll
@@ -18,9 +18,9 @@ define void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_a]* %
 ; CHECK-P9-NEXT:    addi 6, 6, 16
 ; CHECK-P9-NEXT:    rldicr 5, 5, 0, 58
 ; CHECK-P9-NEXT:    addi 5, 5, -32
+; CHECK-P9-NEXT:    lxvdsx 0, 0, 6
 ; CHECK-P9-NEXT:    rldicl 5, 5, 59, 5
 ; CHECK-P9-NEXT:    addi 5, 5, 1
-; CHECK-P9-NEXT:    lxvdsx 0, 0, 6
 ; CHECK-P9-NEXT:    mtctr 5
 ; CHECK-P9-NEXT:    .p2align 4
 ; CHECK-P9-NEXT:  .LBB0_1: # %vector.body
@@ -36,13 +36,13 @@ define void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_a]* %
 ; CHECK-P9-NEXT:    xvmuldp 4, 4, 0
 ; CHECK-P9-NEXT:    xvmuldp 3, 3, 0
 ; CHECK-P9-NEXT:    xvmuldp 5, 5, 0
+; CHECK-P9-NEXT:    addi 4, 4, 256
+; CHECK-P9-NEXT:    xvmuldp 6, 6, 0
 ; CHECK-P9-NEXT:    stxv 1, 16(3)
+; CHECK-P9-NEXT:    stxv 2, 0(3)
 ; CHECK-P9-NEXT:    stxv 3, 48(3)
 ; CHECK-P9-NEXT:    stxv 4, 32(3)
 ; CHECK-P9-NEXT:    stxv 5, 240(3)
-; CHECK-P9-NEXT:    addi 4, 4, 256
-; CHECK-P9-NEXT:    xvmuldp 6, 6, 0
-; CHECK-P9-NEXT:    stxv 2, 0(3)
 ; CHECK-P9-NEXT:    stxv 6, 224(3)
 ; CHECK-P9-NEXT:    addi 3, 3, 256
 ; CHECK-P9-NEXT:    bdnz .LBB0_1
@@ -57,9 +57,9 @@ define void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_a]* %
 ; CHECK-P9-NO-HEURISTIC-NEXT:    rldicr 5, 5, 0, 58
 ; CHECK-P9-NO-HEURISTIC-NEXT:    addi 6, 6, 16
 ; CHECK-P9-NO-HEURISTIC-NEXT:    addi 5, 5, -32
+; CHECK-P9-NO-HEURISTIC-NEXT:    lxvdsx 0, 0, 6
 ; CHECK-P9-NO-HEURISTIC-NEXT:    rldicl 5, 5, 59, 5
 ; CHECK-P9-NO-HEURISTIC-NEXT:    addi 5, 5, 1
-; CHECK-P9-NO-HEURISTIC-NEXT:    lxvdsx 0, 0, 6
 ; CHECK-P9-NO-HEURISTIC-NEXT:    mtctr 5
 ; CHECK-P9-NO-HEURISTIC-NEXT:    .p2align 4
 ; CHECK-P9-NO-HEURISTIC-NEXT:  .LBB0_1: # %vector.body
@@ -76,13 +76,13 @@ define void @test([0 x %_elem_type_of_x]* noalias %.x, [0 x %_elem_type_of_a]* %
 ; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 3, 3, 0
 ; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 6, 6, 0
 ; CHECK-P9-NO-HEURISTIC-NEXT:    xvmuldp 5, 5, 0
+; CHECK-P9-NO-HEURISTIC-NEXT:    addi 4, 4, 256
 ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 1, 16(3)
 ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 2, 0(3)
 ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 3, 48(3)
 ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 4, 32(3)
 ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 5, 240(3)
 ; CHECK-P9-NO-HEURISTIC-NEXT:    stxv 6, 224(3)
-; CHECK-P9-NO-HEURISTIC-NEXT:    addi 4, 4, 256
 ; CHECK-P9-NO-HEURISTIC-NEXT:    addi 3, 3, 256
 ; CHECK-P9-NO-HEURISTIC-NEXT:    bdnz .LBB0_1
 ; CHECK-P9-NO-HEURISTIC-NEXT:  # %bb.2: # %return.block
diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
index e427e81f40314..6605a1fd78cc4 100644
--- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
@@ -22,10 +22,10 @@ define void @print_res() nounwind {
 ; CHECK-NEXT:    isellt 3, 3, 4
 ; CHECK-NEXT:    li 4, 0
 ; CHECK-NEXT:    addi 3, 3, 1
-; CHECK-NEXT:    mtctr 3
-; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:    li 7, -1
 ; CHECK-NEXT:    li 5, 0
+; CHECK-NEXT:    mtctr 3
+; CHECK-NEXT:    li 3, 1
 ; CHECK-NEXT:    lbz 5, 0(5)
 ; CHECK-NEXT:    bdz .LBB0_6
 ; CHECK-NEXT:  # %bb.1:
@@ -62,23 +62,23 @@ define void @print_res() nounwind {
 ; CHECK-NEXT:    add 4, 4, 6
 ; CHECK-NEXT:  .LBB0_6:
 ; CHECK-NEXT:    xori 5, 5, 84
-; CHECK-NEXT:    cntlzw 5, 5
 ; CHECK-NEXT:    clrldi 3, 3, 32
+; CHECK-NEXT:    li 7, 0
+; CHECK-NEXT:    li 8, 3
 ; CHECK-NEXT:    std 3, 104(1)
+; CHECK-NEXT:    cntlzw 5, 5
 ; CHECK-NEXT:    addis 3, 2, .LC0@toc@ha
+; CHECK-NEXT:    li 10, 0
 ; CHECK-NEXT:    ld 3, .LC0@toc@l(3)
-; CHECK-NEXT:    li 7, 0
-; CHECK-NEXT:    li 8, 3
 ; CHECK-NEXT:    srwi 5, 5, 5
 ; CHECK-NEXT:    add 4, 4, 5
 ; CHECK-NEXT:    li 5, 0
 ; CHECK-NEXT:    std 5, 120(1)
 ; CHECK-NEXT:    li 5, 3
-; CHECK-NEXT:    std 5, 96(1)
 ; CHECK-NEXT:    clrldi 6, 4, 32
 ; CHECK-NEXT:    li 4, 3
+; CHECK-NEXT:    std 5, 96(1)
 ; CHECK-NEXT:    li 5, 0
-; CHECK-NEXT:    li 10, 0
 ; CHECK-NEXT:    bl printf
 ; CHECK-NEXT:    nop
   %1 = load i32, i32* undef, align 4
diff --git a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll
index c462e18d9f939..5525a1975a7b7 100644
--- a/llvm/test/CodeGen/PowerPC/sms-grp-order.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-grp-order.ll
@@ -7,8 +7,8 @@ define void @lame_encode_buffer_interleaved() local_unnamed_addr {
 ; CHECK:      # %bb.0:
 ; CHECK-NEXT:   lha 3, 0(3)
 ; CHECK-NEXT:   li 5, 1
-; CHECK-NEXT:   sldi 5, 5, 62
 ; CHECK-NEXT:   lhz 4, 0(0)
+; CHECK-NEXT:   sldi 5, 5, 62
 ; CHECK-NEXT:   mtctr 5
 ; CHECK-NEXT:   srawi 3, 3, 1
 ; CHECK-NEXT:   addze 3, 3
diff --git a/llvm/test/CodeGen/PowerPC/sms-phi-3.ll b/llvm/test/CodeGen/PowerPC/sms-phi-3.ll
index cdb5100b29026..39e368a4611c1 100644
--- a/llvm/test/CodeGen/PowerPC/sms-phi-3.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-phi-3.ll
@@ -21,9 +21,9 @@ define void @phi3(i32*) nounwind {
 ; CHECK-NEXT:    nop
 ; CHECK-NEXT:    addi 7, 30, -4
 ; CHECK-NEXT:    mtctr 3
-; CHECK-NEXT:    lwzu 8, 4(7)
 ; CHECK-NEXT:    addi 4, 29, -8
 ; CHECK-NEXT:    li 5, 0
+; CHECK-NEXT:    lwzu 8, 4(7)
 ; CHECK-NEXT:    bdz .LBB0_5
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    extswsli 6, 5, 5
diff --git a/llvm/test/CodeGen/PowerPC/sms-simple.ll b/llvm/test/CodeGen/PowerPC/sms-simple.ll
index 1761b4ea533e2..d147079a9fb9f 100644
--- a/llvm/test/CodeGen/PowerPC/sms-simple.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-simple.ll
@@ -10,17 +10,17 @@ define dso_local i32* @foo() local_unnamed_addr {
 ; CHECK-LABEL: foo:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addis r5, r2, x@toc@ha
-; CHECK-NEXT:    addi r5, r5, x@toc@l
-; CHECK-NEXT:    addi r5, r5, -8
 ; CHECK-NEXT:    addis r6, r2, y@toc@ha
 ; CHECK-NEXT:    li r7, 340
+; CHECK-NEXT:    addi r5, r5, x@toc@l
+; CHECK-NEXT:    addi r5, r5, -8
 ; CHECK-NEXT:    addi r3, r6, y@toc@l
 ; CHECK-NEXT:    lwz r6, y@toc@l(r6)
 ; CHECK-NEXT:    mtctr r7
+; CHECK-NEXT:    addi r4, r3, -8
 ; CHECK-NEXT:    lwzu r7, 12(r5)
 ; CHECK-NEXT:    maddld r6, r7, r7, r6
 ; CHECK-NEXT:    lwz r7, 4(r5)
-; CHECK-NEXT:    addi r4, r3, -8
 ; CHECK-NEXT:    stwu r6, 12(r4)
 ; CHECK-NEXT:    maddld r6, r7, r7, r6
 ; CHECK-NEXT:    lwz r7, 8(r5)
@@ -29,12 +29,12 @@ define dso_local i32* @foo() local_unnamed_addr {
 ; CHECK-NEXT:    #
 ; CHECK-NEXT:    maddld r7, r7, r7, r6
 ; CHECK-NEXT:    lwzu r8, 12(r5)
-; CHECK-NEXT:    maddld r8, r8, r8, r7
 ; CHECK-NEXT:    stw r6, 4(r4)
 ; CHECK-NEXT:    lwz r6, 4(r5)
-; CHECK-NEXT:    maddld r6, r6, r6, r8
+; CHECK-NEXT:    maddld r8, r8, r8, r7
 ; CHECK-NEXT:    stw r7, 8(r4)
 ; CHECK-NEXT:    lwz r7, 8(r5)
+; CHECK-NEXT:    maddld r6, r6, r6, r8
 ; CHECK-NEXT:    stwu r8, 12(r4)
 ; CHECK-NEXT:    bdnz .LBB0_1
 ; CHECK-NEXT:  # %bb.2:
diff --git a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
index 097ba07a5b1e7..e2808a4ae1418 100644
--- a/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/srem-vector-lkk.ll
@@ -12,8 +12,8 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE-LABEL: fold_srem_vec_1:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    lis r4, -21386
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 37253
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r4, r3, r4
@@ -26,9 +26,9 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r4, 31710
 ; P9LE-NEXT:    mtvsrd v3, r3
 ; P9LE-NEXT:    li r3, 2
+; P9LE-NEXT:    ori r4, r4, 63421
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    ori r4, r4, 63421
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    sub r4, r4, r3
 ; P9LE-NEXT:    srwi r5, r4, 31
@@ -39,21 +39,21 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r4, 21399
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 4
+; P9LE-NEXT:    ori r4, r4, 33437
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    ori r4, r4, 33437
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    srwi r5, r4, 31
 ; P9LE-NEXT:    srawi r4, r4, 5
 ; P9LE-NEXT:    add r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 98
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    vmrghh v3, v4, v3
+; P9LE-NEXT:    lis r4, -16728
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 6
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    lis r4, -16728
 ; P9LE-NEXT:    ori r4, r4, 63249
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    srwi r5, r4, 31
@@ -69,8 +69,8 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-LABEL: fold_srem_vec_1:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 2
-; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    lis r4, 31710
+; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 63421
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r4, r3, r4
@@ -82,11 +82,11 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -21386
 ; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    ori r4, r4, 37253
 ; P9BE-NEXT:    mtvsrd v3, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    ori r4, r4, 37253
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    add r4, r4, r3
 ; P9BE-NEXT:    srwi r5, r4, 31
@@ -96,11 +96,12 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -16728
 ; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    ori r4, r4, 63249
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    ori r4, r4, 63249
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    srwi r5, r4, 31
 ; P9BE-NEXT:    srawi r4, r4, 8
@@ -109,12 +110,11 @@ define <4 x i16> @fold_srem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 21399
 ; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    vmrghh v3, v4, v3
+; P9BE-NEXT:    ori r4, r4, 33437
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    ori r4, r4, 33437
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    srwi r5, r4, 31
 ; P9BE-NEXT:    srawi r4, r4, 5
@@ -247,8 +247,8 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9LE-LABEL: fold_srem_vec_2:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    lis r4, -21386
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 37253
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r5, r3, r4
@@ -272,6 +272,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r5, r3, r4
 ; P9LE-NEXT:    add r5, r5, r3
@@ -280,7 +281,6 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    add r5, r5, r6
 ; P9LE-NEXT:    mulli r5, r5, 95
 ; P9LE-NEXT:    sub r3, r3, r5
-; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
@@ -300,8 +300,8 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-LABEL: fold_srem_vec_2:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
-; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    lis r4, -21386
+; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 37253
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r5, r3, r4
@@ -327,6 +327,7 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r5, r3, r4
 ; P9BE-NEXT:    add r5, r5, r3
@@ -336,7 +337,6 @@ define <4 x i16> @fold_srem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
 ; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -468,8 +468,8 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-LABEL: combine_srem_sdiv:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    lis r4, -21386
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 37253
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r5, r3, r4
@@ -493,6 +493,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    extsh r7, r3
 ; P9LE-NEXT:    mulhw r8, r7, r4
 ; P9LE-NEXT:    add r7, r8, r7
@@ -501,7 +502,6 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-NEXT:    add r7, r7, r8
 ; P9LE-NEXT:    mulli r8, r7, 95
 ; P9LE-NEXT:    sub r3, r3, r8
-; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
@@ -512,6 +512,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-NEXT:    srawi r4, r4, 6
 ; P9LE-NEXT:    add r4, r4, r8
 ; P9LE-NEXT:    mulli r8, r4, 95
+; P9LE-NEXT:    mtvsrd v5, r4
 ; P9LE-NEXT:    sub r3, r3, r8
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
@@ -520,7 +521,6 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v3, r5
 ; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    mtvsrd v4, r7
-; P9LE-NEXT:    mtvsrd v5, r4
 ; P9LE-NEXT:    vmrghh v4, v5, v4
 ; P9LE-NEXT:    vmrglw v3, v4, v3
 ; P9LE-NEXT:    vadduhm v2, v2, v3
@@ -529,8 +529,8 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-LABEL: combine_srem_sdiv:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
-; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    lis r5, -21386
+; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r5, r5, 37253
 ; P9BE-NEXT:    extsh r4, r3
 ; P9BE-NEXT:    mulhw r6, r4, r5
@@ -556,6 +556,7 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    extsh r7, r3
 ; P9BE-NEXT:    mulhw r8, r7, r5
 ; P9BE-NEXT:    add r7, r8, r7
@@ -565,7 +566,6 @@ define <4 x i16> @combine_srem_sdiv(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r8, r7, 95
 ; P9BE-NEXT:    sub r3, r3, r8
 ; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -747,9 +747,10 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r4, -21386
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 6
+; P9LE-NEXT:    ori r4, r4, 37253
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    ori r4, r4, 37253
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    add r4, r4, r3
 ; P9LE-NEXT:    srwi r5, r4, 31
@@ -757,7 +758,6 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    add r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 95
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
@@ -791,11 +791,12 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -21386
 ; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    ori r4, r4, 37253
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    ori r4, r4, 37253
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    add r4, r4, r3
 ; P9BE-NEXT:    srwi r5, r4, 31
@@ -804,7 +805,6 @@ define <4 x i16> @dont_fold_srem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r4, r4, 95
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -914,8 +914,8 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9LE-LABEL: dont_fold_srem_one:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 2
-; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    lis r4, -14230
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 30865
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r4, r3, r4
@@ -928,11 +928,12 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r4, -19946
 ; P9LE-NEXT:    mtvsrd v3, r3
 ; P9LE-NEXT:    li r3, 0
+; P9LE-NEXT:    ori r4, r4, 17097
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    vmrghh v3, v3, v4
 ; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    ori r4, r4, 17097
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    add r4, r4, r3
 ; P9LE-NEXT:    srwi r5, r4, 31
@@ -940,12 +941,11 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    add r4, r4, r5
 ; P9LE-NEXT:    mulli r4, r4, 23
 ; P9LE-NEXT:    sub r3, r3, r4
-; P9LE-NEXT:    vmrghh v3, v3, v4
+; P9LE-NEXT:    lis r4, 24749
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 6
-; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    lis r4, 24749
 ; P9LE-NEXT:    ori r4, r4, 47143
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    srwi r5, r4, 31
@@ -961,8 +961,8 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-LABEL: dont_fold_srem_one:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    lis r4, -19946
+; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 17097
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r4, r3, r4
@@ -974,11 +974,11 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 24749
 ; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    ori r4, r4, 47143
 ; P9BE-NEXT:    mtvsrd v3, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    ori r4, r4, 47143
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    srwi r5, r4, 31
 ; P9BE-NEXT:    srawi r4, r4, 11
@@ -987,11 +987,12 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -14230
 ; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    ori r4, r4, 30865
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    vmrghh v3, v3, v4
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    ori r4, r4, 30865
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    add r4, r4, r3
 ; P9BE-NEXT:    srwi r5, r4, 31
@@ -1003,7 +1004,6 @@ define <4 x i16> @dont_fold_srem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v2, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    vmrghh v3, v3, v4
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    vmrghh v2, v4, v2
 ; P9BE-NEXT:    vmrghw v2, v2, v3
@@ -1112,8 +1112,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9LE-LABEL: dont_fold_urem_i16_smax:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 4
-; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    lis r4, -19946
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 17097
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    mulhw r4, r3, r4
@@ -1126,9 +1126,9 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r4, 24749
 ; P9LE-NEXT:    mtvsrd v3, r3
 ; P9LE-NEXT:    li r3, 6
+; P9LE-NEXT:    ori r4, r4, 47143
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    extsh r3, r3
-; P9LE-NEXT:    ori r4, r4, 47143
 ; P9LE-NEXT:    mulhw r4, r3, r4
 ; P9LE-NEXT:    srwi r5, r4, 31
 ; P9LE-NEXT:    srawi r4, r4, 11
@@ -1138,6 +1138,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    extsh r3, r3
 ; P9LE-NEXT:    srawi r4, r3, 15
 ; P9LE-NEXT:    addze r4, r4
@@ -1145,7 +1146,6 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9LE-NEXT:    sub r3, r3, r4
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
 ; P9LE-NEXT:    vmrglw v2, v3, v2
@@ -1154,8 +1154,8 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9BE-LABEL: dont_fold_urem_i16_smax:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 4
-; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    lis r4, -19946
+; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 17097
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    mulhw r4, r3, r4
@@ -1167,11 +1167,11 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 24749
 ; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    ori r4, r4, 47143
 ; P9BE-NEXT:    mtvsrd v3, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    extsh r3, r3
-; P9BE-NEXT:    ori r4, r4, 47143
 ; P9BE-NEXT:    mulhw r4, r3, r4
 ; P9BE-NEXT:    srwi r5, r4, 31
 ; P9BE-NEXT:    srawi r4, r4, 11
@@ -1182,6 +1182,7 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    vmrghh v3, v3, v4
 ; P9BE-NEXT:    extsh r3, r3
 ; P9BE-NEXT:    srawi r4, r3, 15
 ; P9BE-NEXT:    addze r4, r4
@@ -1191,7 +1192,6 @@ define <4 x i16> @dont_fold_urem_i16_smax(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v2, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    vmrghh v3, v3, v4
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    vmrghh v2, v4, v2
 ; P9BE-NEXT:    vmrghw v2, v2, v3
@@ -1290,10 +1290,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
 ; P9LE-LABEL: dont_fold_srem_i64:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    lis r4, 24749
+; P9LE-NEXT:    mfvsrd r3, v3
 ; P9LE-NEXT:    ori r4, r4, 47142
 ; P9LE-NEXT:    sldi r4, r4, 32
 ; P9LE-NEXT:    oris r4, r4, 58853
-; P9LE-NEXT:    mfvsrd r3, v3
 ; P9LE-NEXT:    ori r4, r4, 6055
 ; P9LE-NEXT:    mulhd r4, r3, r4
 ; P9LE-NEXT:    rldicl r5, r4, 1, 63
@@ -1316,10 +1316,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
 ; P9LE-NEXT:    sub r4, r4, r5
 ; P9LE-NEXT:    mtvsrdd v3, r3, r4
 ; P9LE-NEXT:    lis r4, 25653
+; P9LE-NEXT:    mfvsrd r3, v2
 ; P9LE-NEXT:    ori r4, r4, 15432
 ; P9LE-NEXT:    sldi r4, r4, 32
 ; P9LE-NEXT:    oris r4, r4, 1603
-; P9LE-NEXT:    mfvsrd r3, v2
 ; P9LE-NEXT:    ori r4, r4, 21445
 ; P9LE-NEXT:    mulhd r4, r3, r4
 ; P9LE-NEXT:    rldicl r5, r4, 1, 63
@@ -1334,10 +1334,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
 ; P9BE-LABEL: dont_fold_srem_i64:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    lis r4, 24749
+; P9BE-NEXT:    mfvsrld r3, v3
 ; P9BE-NEXT:    ori r4, r4, 47142
 ; P9BE-NEXT:    sldi r4, r4, 32
 ; P9BE-NEXT:    oris r4, r4, 58853
-; P9BE-NEXT:    mfvsrld r3, v3
 ; P9BE-NEXT:    ori r4, r4, 6055
 ; P9BE-NEXT:    mulhd r4, r3, r4
 ; P9BE-NEXT:    rldicl r5, r4, 1, 63
@@ -1360,10 +1360,10 @@ define <4 x i64> @dont_fold_srem_i64(<4 x i64> %x) {
 ; P9BE-NEXT:    sub r4, r4, r5
 ; P9BE-NEXT:    mtvsrdd v3, r4, r3
 ; P9BE-NEXT:    lis r4, 25653
+; P9BE-NEXT:    mfvsrld r3, v2
 ; P9BE-NEXT:    ori r4, r4, 15432
 ; P9BE-NEXT:    sldi r4, r4, 32
 ; P9BE-NEXT:    oris r4, r4, 1603
-; P9BE-NEXT:    mfvsrld r3, v2
 ; P9BE-NEXT:    ori r4, r4, 21445
 ; P9BE-NEXT:    mulhd r4, r3, r4
 ; P9BE-NEXT:    rldicl r5, r4, 1, 63
diff --git a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
index 6c136e9a541c4..b475a2f7fbf1c 100644
--- a/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
+++ b/llvm/test/CodeGen/PowerPC/stack-clash-dynamic-alloca.ll
@@ -51,15 +51,15 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin
 ; CHECK-P9-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-P9-LE-NEXT:    rldic r3, r3, 2, 30
 ; CHECK-P9-LE-NEXT:    addi r3, r3, 15
+; CHECK-P9-LE-NEXT:    li r6, -32768
+; CHECK-P9-LE-NEXT:    mr r31, r1
+; CHECK-P9-LE-NEXT:    addi r4, r31, 48
 ; CHECK-P9-LE-NEXT:    rldicl r3, r3, 60, 4
 ; CHECK-P9-LE-NEXT:    rldicl r3, r3, 4, 29
 ; CHECK-P9-LE-NEXT:    neg r5, r3
-; CHECK-P9-LE-NEXT:    li r6, -32768
 ; CHECK-P9-LE-NEXT:    divd r7, r5, r6
-; CHECK-P9-LE-NEXT:    mulld r6, r7, r6
-; CHECK-P9-LE-NEXT:    mr r31, r1
-; CHECK-P9-LE-NEXT:    addi r4, r31, 48
 ; CHECK-P9-LE-NEXT:    add r3, r1, r5
+; CHECK-P9-LE-NEXT:    mulld r6, r7, r6
 ; CHECK-P9-LE-NEXT:    sub r5, r5, r6
 ; CHECK-P9-LE-NEXT:    stdux r4, r1, r5
 ; CHECK-P9-LE-NEXT:    cmpd r1, r3
@@ -69,8 +69,8 @@ define i32 @foo(i32 %n) local_unnamed_addr #0 "stack-probe-size"="32768" nounwin
 ; CHECK-P9-LE-NEXT:    cmpd r1, r3
 ; CHECK-P9-LE-NEXT:    bne cr0, .LBB0_1
 ; CHECK-P9-LE-NEXT:  .LBB0_2:
-; CHECK-P9-LE-NEXT:    addi r3, r1, 32
 ; CHECK-P9-LE-NEXT:    li r4, 1
+; CHECK-P9-LE-NEXT:    addi r3, r1, 32
 ; CHECK-P9-LE-NEXT:    stw r4, 4792(r3)
 ; CHECK-P9-LE-NEXT:    lwz r3, 0(r3)
 ; CHECK-P9-LE-NEXT:    ld r1, 0(r1)
@@ -190,15 +190,15 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind {
 ; CHECK-P9-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-P9-LE-NEXT:    rldic r4, r3, 2, 30
 ; CHECK-P9-LE-NEXT:    addi r4, r4, 15
+; CHECK-P9-LE-NEXT:    li r7, -4096
+; CHECK-P9-LE-NEXT:    mr r31, r1
+; CHECK-P9-LE-NEXT:    addi r5, r31, 48
 ; CHECK-P9-LE-NEXT:    rldicl r4, r4, 60, 4
 ; CHECK-P9-LE-NEXT:    rldicl r4, r4, 4, 29
 ; CHECK-P9-LE-NEXT:    neg r6, r4
-; CHECK-P9-LE-NEXT:    li r7, -4096
 ; CHECK-P9-LE-NEXT:    divd r8, r6, r7
-; CHECK-P9-LE-NEXT:    mulld r7, r8, r7
-; CHECK-P9-LE-NEXT:    mr r31, r1
-; CHECK-P9-LE-NEXT:    addi r5, r31, 48
 ; CHECK-P9-LE-NEXT:    add r4, r1, r6
+; CHECK-P9-LE-NEXT:    mulld r7, r8, r7
 ; CHECK-P9-LE-NEXT:    sub r6, r6, r7
 ; CHECK-P9-LE-NEXT:    stdux r5, r1, r6
 ; CHECK-P9-LE-NEXT:    cmpd r1, r4
@@ -208,10 +208,10 @@ define i32 @bar(i32 %n) local_unnamed_addr #0 nounwind {
 ; CHECK-P9-LE-NEXT:    cmpd r1, r4
 ; CHECK-P9-LE-NEXT:    bne cr0, .LBB1_1
 ; CHECK-P9-LE-NEXT:  .LBB1_2:
-; CHECK-P9-LE-NEXT:    addi r4, r1, 32
 ; CHECK-P9-LE-NEXT:    extswsli r3, r3, 2
-; CHECK-P9-LE-NEXT:    add r3, r4, r3
 ; CHECK-P9-LE-NEXT:    li r5, 1
+; CHECK-P9-LE-NEXT:    addi r4, r1, 32
+; CHECK-P9-LE-NEXT:    add r3, r4, r3
 ; CHECK-P9-LE-NEXT:    stw r5, 4096(r3)
 ; CHECK-P9-LE-NEXT:    lwz r3, 0(r4)
 ; CHECK-P9-LE-NEXT:    ld r1, 0(r1)
@@ -334,16 +334,16 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind
 ; CHECK-P9-LE-NEXT:    stdu r1, -48(r1)
 ; CHECK-P9-LE-NEXT:    rldic r3, r3, 2, 30
 ; CHECK-P9-LE-NEXT:    addi r3, r3, 15
-; CHECK-P9-LE-NEXT:    rldicl r3, r3, 60, 4
-; CHECK-P9-LE-NEXT:    rldicl r3, r3, 4, 29
 ; CHECK-P9-LE-NEXT:    lis r5, -1
 ; CHECK-P9-LE-NEXT:    ori r5, r5, 0
-; CHECK-P9-LE-NEXT:    neg r6, r3
-; CHECK-P9-LE-NEXT:    divd r7, r6, r5
-; CHECK-P9-LE-NEXT:    mulld r7, r7, r5
 ; CHECK-P9-LE-NEXT:    mr r31, r1
 ; CHECK-P9-LE-NEXT:    addi r4, r31, 48
+; CHECK-P9-LE-NEXT:    rldicl r3, r3, 60, 4
+; CHECK-P9-LE-NEXT:    rldicl r3, r3, 4, 29
+; CHECK-P9-LE-NEXT:    neg r6, r3
+; CHECK-P9-LE-NEXT:    divd r7, r6, r5
 ; CHECK-P9-LE-NEXT:    add r3, r1, r6
+; CHECK-P9-LE-NEXT:    mulld r7, r7, r5
 ; CHECK-P9-LE-NEXT:    sub r6, r6, r7
 ; CHECK-P9-LE-NEXT:    stdux r4, r1, r6
 ; CHECK-P9-LE-NEXT:    cmpd r1, r3
@@ -353,8 +353,8 @@ define i32 @f(i32 %n) local_unnamed_addr #0 "stack-probe-size"="65536" nounwind
 ; CHECK-P9-LE-NEXT:    cmpd r1, r3
 ; CHECK-P9-LE-NEXT:    bne cr0, .LBB2_1
 ; CHECK-P9-LE-NEXT:  .LBB2_2:
-; CHECK-P9-LE-NEXT:    addi r3, r1, 32
 ; CHECK-P9-LE-NEXT:    li r4, 1
+; CHECK-P9-LE-NEXT:    addi r3, r1, 32
 ; CHECK-P9-LE-NEXT:    stw r4, 4792(r3)
 ; CHECK-P9-LE-NEXT:    lwz r3, 0(r3)
 ; CHECK-P9-LE-NEXT:    ld r1, 0(r1)
diff --git a/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir b/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir
new file mode 100644
index 0000000000000..49211342d1306
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/topdepthreduce-postra.mir
@@ -0,0 +1,18 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 -run-pass=postmisched -o - %s | FileCheck %s
+---
+# Check that postmisched's TopDepthReduce heuristic moves the MULLD later
+# because of the dependency on x5
+name: test
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: test
+    ; CHECK: renamable $x5 = LD 0, killed renamable $x5 :: (load 8)
+    ; CHECK: renamable $x4 = LD 0, killed renamable $x4 :: (load 8)
+    ; CHECK: renamable $x5 = MULLD killed renamable $x5, renamable $x3
+    ; CHECK: renamable $x3 = MADDLD8 killed renamable $x4, killed renamable $x3, killed renamable $x5
+    renamable $x5 = LD 0, killed renamable $x5 :: (load 8)
+    renamable $x5 = MULLD killed renamable $x5, renamable $x3
+    renamable $x4 = LD 0, killed renamable $x4 :: (load 8)
+    renamable $x3 = MADDLD8 killed renamable $x4, killed renamable $x3, killed renamable $x5
+...
diff --git a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
index 2020833fd897f..c04f0ff35f705 100644
--- a/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
+++ b/llvm/test/CodeGen/PowerPC/uint-to-fp-v4i32.ll
@@ -20,9 +20,9 @@ define dso_local <2 x double> @test1(<8 x i16> %a) {
 ; P9BE-NEXT:    mtfprwz f0, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    xscvuxddp f0, f0
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mtfprwz f1, r3
-; P9BE-NEXT:    xscvuxddp f0, f0
 ; P9BE-NEXT:    xscvuxddp f1, f1
 ; P9BE-NEXT:    xxmrghd v2, vs0, vs1
 ; P9BE-NEXT:    blr
@@ -35,9 +35,9 @@ define dso_local <2 x double> @test1(<8 x i16> %a) {
 ; P9LE-NEXT:    mtfprwz f0, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    xscvuxddp f0, f0
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mtfprwz f1, r3
-; P9LE-NEXT:    xscvuxddp f0, f0
 ; P9LE-NEXT:    xscvuxddp f1, f1
 ; P9LE-NEXT:    xxmrghd v2, vs1, vs0
 ; P9LE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll b/llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll
index 6fa8be9650b61..73ba4c93ac8ab 100644
--- a/llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll
+++ b/llvm/test/CodeGen/PowerPC/unaligned-addressing-mode.ll
@@ -6,8 +6,8 @@ define i8 @test_xaddr(i8* %p) {
 ; CHECK-LABEL: test_xaddr:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    li r4, 0
-; CHECK-NEXT:    ori r4, r4, 40000
 ; CHECK-NEXT:    std r3, -8(r1)
+; CHECK-NEXT:    ori r4, r4, 40000
 ; CHECK-NEXT:    lbzx r3, r3, r4
 ; CHECK-NEXT:    blr
 entry:
@@ -56,8 +56,8 @@ define void @test_xoaddr(i32* %arr, i32* %arrTo) {
 ; CHECK-LABEL: test_xoaddr:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi r3, r3, 8
-; CHECK-NEXT:    lxvx vs0, 0, r3
 ; CHECK-NEXT:    addi r4, r4, 4
+; CHECK-NEXT:    lxvx vs0, 0, r3
 ; CHECK-NEXT:    stxvx vs0, 0, r4
 ; CHECK-NEXT:    blr
 entry:
@@ -77,9 +77,9 @@ define i64 @test_xaddrX4_loop(i8* %p) {
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    addi r4, r3, -8
 ; CHECK-NEXT:    li r3, 8
+; CHECK-NEXT:    li r5, 3
 ; CHECK-NEXT:    mtctr r3
 ; CHECK-NEXT:    li r3, 0
-; CHECK-NEXT:    li r5, 3
 ; loop instruction number is changed from 5 to 4, so its align is changed from 5 to 4.
 ; CHECK-NEXT:    .p2align 4
 ; CHECK-NEXT:  .LBB4_1: # %for.body
diff --git a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
index 4bb3730aa0437..f889fad8df6ce 100644
--- a/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
+++ b/llvm/test/CodeGen/PowerPC/urem-vector-lkk.ll
@@ -12,9 +12,11 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9LE-LABEL: fold_urem_vec_1:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 4
-; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    lis r4, 21399
+; P9LE-NEXT:    lis r5, 8456
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 33437
+; P9LE-NEXT:    ori r5, r5, 16913
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mulhwu r4, r3, r4
 ; P9LE-NEXT:    srwi r4, r4, 5
@@ -23,9 +25,9 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r4, 16727
 ; P9LE-NEXT:    mtvsrd v3, r3
 ; P9LE-NEXT:    li r3, 6
+; P9LE-NEXT:    ori r4, r4, 2287
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    ori r4, r4, 2287
 ; P9LE-NEXT:    mulhwu r4, r3, r4
 ; P9LE-NEXT:    srwi r4, r4, 8
 ; P9LE-NEXT:    mulli r4, r4, 1003
@@ -33,8 +35,6 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    lis r5, 8456
-; P9LE-NEXT:    ori r5, r5, 16913
 ; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    clrlwi r4, r3, 16
 ; P9LE-NEXT:    rlwinm r3, r3, 30, 18, 31
@@ -45,9 +45,9 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r4, 22765
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 0
+; P9LE-NEXT:    ori r4, r4, 8969
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    ori r4, r4, 8969
 ; P9LE-NEXT:    mulhwu r4, r3, r4
 ; P9LE-NEXT:    sub r5, r3, r4
 ; P9LE-NEXT:    srwi r5, r5, 1
@@ -63,9 +63,11 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-LABEL: fold_urem_vec_1:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
-; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    lis r4, 16727
+; P9BE-NEXT:    lis r5, 8456
+; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 2287
+; P9BE-NEXT:    ori r5, r5, 16913
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    srwi r4, r4, 8
@@ -73,11 +75,11 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, 21399
 ; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    ori r4, r4, 33437
 ; P9BE-NEXT:    mtvsrd v3, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    ori r4, r4, 33437
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    srwi r4, r4, 5
 ; P9BE-NEXT:    mulli r4, r4, 98
@@ -86,8 +88,6 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    lis r5, 8456
-; P9BE-NEXT:    ori r5, r5, 16913
 ; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    clrlwi r4, r3, 16
 ; P9BE-NEXT:    rlwinm r3, r3, 30, 18, 31
@@ -97,11 +97,11 @@ define <4 x i16> @fold_urem_vec_1(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r4, r3
 ; P9BE-NEXT:    lis r4, 22765
 ; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    ori r4, r4, 8969
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    ori r4, r4, 8969
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    sub r5, r3, r4
 ; P9BE-NEXT:    srwi r5, r5, 1
@@ -223,8 +223,8 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9LE-LABEL: fold_urem_vec_2:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    lis r4, 22765
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 8969
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mulhwu r5, r3, r4
@@ -248,6 +248,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mulhwu r5, r3, r4
 ; P9LE-NEXT:    sub r6, r3, r5
@@ -256,7 +257,6 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9LE-NEXT:    srwi r5, r5, 6
 ; P9LE-NEXT:    mulli r5, r5, 95
 ; P9LE-NEXT:    sub r3, r3, r5
-; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
@@ -276,8 +276,8 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-LABEL: fold_urem_vec_2:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
-; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    lis r4, 22765
+; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 8969
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mulhwu r5, r3, r4
@@ -303,6 +303,7 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mulhwu r5, r3, r4
 ; P9BE-NEXT:    sub r6, r3, r5
@@ -312,7 +313,6 @@ define <4 x i16> @fold_urem_vec_2(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r5, r5, 95
 ; P9BE-NEXT:    sub r3, r3, r5
 ; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -444,8 +444,8 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-LABEL: combine_urem_udiv:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
-; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    lis r4, 22765
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 8969
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mulhwu r5, r3, r4
@@ -469,6 +469,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 4
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    clrlwi r7, r3, 16
 ; P9LE-NEXT:    mulhwu r8, r7, r4
 ; P9LE-NEXT:    sub r7, r7, r8
@@ -477,7 +478,6 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-NEXT:    srwi r7, r7, 6
 ; P9LE-NEXT:    mulli r8, r7, 95
 ; P9LE-NEXT:    sub r3, r3, r8
-; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
@@ -488,6 +488,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-NEXT:    add r4, r8, r4
 ; P9LE-NEXT:    srwi r4, r4, 6
 ; P9LE-NEXT:    mulli r8, r4, 95
+; P9LE-NEXT:    mtvsrd v5, r4
 ; P9LE-NEXT:    sub r3, r3, r8
 ; P9LE-NEXT:    mtvsrd v2, r3
 ; P9LE-NEXT:    vmrghh v2, v2, v4
@@ -496,7 +497,6 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v3, r5
 ; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    mtvsrd v4, r7
-; P9LE-NEXT:    mtvsrd v5, r4
 ; P9LE-NEXT:    vmrghh v4, v5, v4
 ; P9LE-NEXT:    vmrglw v3, v4, v3
 ; P9LE-NEXT:    vadduhm v2, v2, v3
@@ -505,8 +505,8 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-LABEL: combine_urem_udiv:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
-; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    lis r5, 22765
+; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r5, r5, 8969
 ; P9BE-NEXT:    clrlwi r4, r3, 16
 ; P9BE-NEXT:    mulhwu r6, r4, r5
@@ -532,6 +532,7 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    clrlwi r7, r3, 16
 ; P9BE-NEXT:    mulhwu r8, r7, r5
 ; P9BE-NEXT:    sub r7, r7, r8
@@ -541,7 +542,6 @@ define <4 x i16> @combine_urem_udiv(<4 x i16> %x) {
 ; P9BE-NEXT:    mulli r8, r7, 95
 ; P9BE-NEXT:    sub r3, r3, r8
 ; P9BE-NEXT:    sldi r3, r3, 48
-; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 0
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
@@ -708,7 +708,9 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9LE-LABEL: dont_fold_urem_power_of_two:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 0
+; P9LE-NEXT:    lis r4, 22765
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
+; P9LE-NEXT:    ori r4, r4, 8969
 ; P9LE-NEXT:    clrlwi r3, r3, 26
 ; P9LE-NEXT:    mtvsrd v3, r3
 ; P9LE-NEXT:    li r3, 2
@@ -717,8 +719,6 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 6
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    lis r4, 22765
-; P9LE-NEXT:    ori r4, r4, 8969
 ; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mulhwu r4, r3, r4
@@ -740,7 +740,9 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9BE-LABEL: dont_fold_urem_power_of_two:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 2
+; P9BE-NEXT:    lis r4, 22765
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
+; P9BE-NEXT:    ori r4, r4, 8969
 ; P9BE-NEXT:    clrlwi r3, r3, 27
 ; P9BE-NEXT:    sldi r3, r3, 48
 ; P9BE-NEXT:    mtvsrd v3, r3
@@ -751,8 +753,6 @@ define <4 x i16> @dont_fold_urem_power_of_two(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 6
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    lis r4, 22765
-; P9BE-NEXT:    ori r4, r4, 8969
 ; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mulhwu r4, r3, r4
@@ -844,9 +844,11 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9LE-LABEL: dont_fold_urem_one:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    li r3, 4
-; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    lis r4, -19946
+; P9LE-NEXT:    lis r5, -14230
+; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    ori r4, r4, 17097
+; P9LE-NEXT:    ori r5, r5, 30865
 ; P9LE-NEXT:    clrlwi r3, r3, 16
 ; P9LE-NEXT:    mulhwu r4, r3, r4
 ; P9LE-NEXT:    srwi r4, r4, 4
@@ -855,9 +857,9 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    lis r4, 24749
 ; P9LE-NEXT:    mtvsrd v3, r3
 ; P9LE-NEXT:    li r3, 6
+; P9LE-NEXT:    ori r4, r4, 47143
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
 ; P9LE-NEXT:    clrlwi r3, r3, 16
-; P9LE-NEXT:    ori r4, r4, 47143
 ; P9LE-NEXT:    mulhwu r4, r3, r4
 ; P9LE-NEXT:    srwi r4, r4, 11
 ; P9LE-NEXT:    mulli r4, r4, 5423
@@ -865,8 +867,6 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9LE-NEXT:    mtvsrd v4, r3
 ; P9LE-NEXT:    li r3, 2
 ; P9LE-NEXT:    vextuhrx r3, r3, v2
-; P9LE-NEXT:    lis r5, -14230
-; P9LE-NEXT:    ori r5, r5, 30865
 ; P9LE-NEXT:    vmrghh v3, v4, v3
 ; P9LE-NEXT:    clrlwi r4, r3, 16
 ; P9LE-NEXT:    rlwinm r3, r3, 31, 17, 31
@@ -884,9 +884,11 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9BE-LABEL: dont_fold_urem_one:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    li r3, 6
-; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    lis r4, 24749
+; P9BE-NEXT:    lis r5, -14230
+; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    ori r4, r4, 47143
+; P9BE-NEXT:    ori r5, r5, 30865
 ; P9BE-NEXT:    clrlwi r3, r3, 16
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    srwi r4, r4, 11
@@ -894,11 +896,11 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    sub r3, r3, r4
 ; P9BE-NEXT:    lis r4, -19946
 ; P9BE-NEXT:    sldi r3, r3, 48
+; P9BE-NEXT:    ori r4, r4, 17097
 ; P9BE-NEXT:    mtvsrd v3, r3
 ; P9BE-NEXT:    li r3, 4
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
 ; P9BE-NEXT:    clrlwi r3, r3, 16
-; P9BE-NEXT:    ori r4, r4, 17097
 ; P9BE-NEXT:    mulhwu r4, r3, r4
 ; P9BE-NEXT:    srwi r4, r4, 4
 ; P9BE-NEXT:    mulli r4, r4, 23
@@ -907,8 +909,6 @@ define <4 x i16> @dont_fold_urem_one(<4 x i16> %x) {
 ; P9BE-NEXT:    mtvsrd v4, r3
 ; P9BE-NEXT:    li r3, 2
 ; P9BE-NEXT:    vextuhlx r3, r3, v2
-; P9BE-NEXT:    lis r5, -14230
-; P9BE-NEXT:    ori r5, r5, 30865
 ; P9BE-NEXT:    vmrghh v3, v4, v3
 ; P9BE-NEXT:    clrlwi r4, r3, 16
 ; P9BE-NEXT:    rlwinm r3, r3, 31, 17, 31
@@ -1023,10 +1023,10 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
 ; P9LE-LABEL: dont_fold_urem_i64:
 ; P9LE:       # %bb.0:
 ; P9LE-NEXT:    lis r4, 25644
+; P9LE-NEXT:    mfvsrld r3, v3
 ; P9LE-NEXT:    ori r4, r4, 34192
 ; P9LE-NEXT:    sldi r4, r4, 32
 ; P9LE-NEXT:    oris r4, r4, 45590
-; P9LE-NEXT:    mfvsrld r3, v3
 ; P9LE-NEXT:    ori r4, r4, 17097
 ; P9LE-NEXT:    mulhdu r4, r3, r4
 ; P9LE-NEXT:    sub r5, r3, r4
@@ -1047,9 +1047,9 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
 ; P9LE-NEXT:    sub r4, r4, r5
 ; P9LE-NEXT:    lis r5, 25653
 ; P9LE-NEXT:    ori r5, r5, 15432
-; P9LE-NEXT:    sldi r5, r5, 32
 ; P9LE-NEXT:    mtvsrdd v3, r4, r3
 ; P9LE-NEXT:    mfvsrd r3, v2
+; P9LE-NEXT:    sldi r5, r5, 32
 ; P9LE-NEXT:    rldicl r4, r3, 63, 1
 ; P9LE-NEXT:    oris r5, r5, 1603
 ; P9LE-NEXT:    ori r5, r5, 21445
@@ -1064,10 +1064,10 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
 ; P9BE-LABEL: dont_fold_urem_i64:
 ; P9BE:       # %bb.0:
 ; P9BE-NEXT:    lis r4, 25644
+; P9BE-NEXT:    mfvsrd r3, v3
 ; P9BE-NEXT:    ori r4, r4, 34192
 ; P9BE-NEXT:    sldi r4, r4, 32
 ; P9BE-NEXT:    oris r4, r4, 45590
-; P9BE-NEXT:    mfvsrd r3, v3
 ; P9BE-NEXT:    ori r4, r4, 17097
 ; P9BE-NEXT:    mulhdu r4, r3, r4
 ; P9BE-NEXT:    sub r5, r3, r4
@@ -1075,8 +1075,8 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
 ; P9BE-NEXT:    add r4, r5, r4
 ; P9BE-NEXT:    lis r5, -16037
 ; P9BE-NEXT:    rldicl r4, r4, 60, 4
-; P9BE-NEXT:    mulli r4, r4, 23
 ; P9BE-NEXT:    ori r5, r5, 28749
+; P9BE-NEXT:    mulli r4, r4, 23
 ; P9BE-NEXT:    sldi r5, r5, 32
 ; P9BE-NEXT:    oris r5, r5, 52170
 ; P9BE-NEXT:    ori r5, r5, 12109
@@ -1088,9 +1088,9 @@ define <4 x i64> @dont_fold_urem_i64(<4 x i64> %x) {
 ; P9BE-NEXT:    sub r4, r4, r5
 ; P9BE-NEXT:    lis r5, 25653
 ; P9BE-NEXT:    ori r5, r5, 15432
-; P9BE-NEXT:    sldi r5, r5, 32
 ; P9BE-NEXT:    mtvsrdd v3, r3, r4
 ; P9BE-NEXT:    mfvsrld r3, v2
+; P9BE-NEXT:    sldi r5, r5, 32
 ; P9BE-NEXT:    rldicl r4, r3, 63, 1
 ; P9BE-NEXT:    oris r5, r5, 1603
 ; P9BE-NEXT:    ori r5, r5, 21445
diff --git a/llvm/test/CodeGen/PowerPC/vavg.ll b/llvm/test/CodeGen/PowerPC/vavg.ll
index 735b39da8056a..6a1ba7b95399a 100644
--- a/llvm/test/CodeGen/PowerPC/vavg.ll
+++ b/llvm/test/CodeGen/PowerPC/vavg.ll
@@ -138,8 +138,8 @@ define <8 x i16> @test_v8i16_sign_negative(<8 x i16> %m, <8 x i16> %n) {
 ; CHECK-P9-LABEL: test_v8i16_sign_negative:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis 3, 2, .LCPI6_0@toc@ha
-; CHECK-P9-NEXT:    addi 3, 3, .LCPI6_0@toc@l
 ; CHECK-P9-NEXT:    vadduhm 2, 2, 3
+; CHECK-P9-NEXT:    addi 3, 3, .LCPI6_0@toc@l
 ; CHECK-P9-NEXT:    lxvx 35, 0, 3
 ; CHECK-P9-NEXT:    vadduhm 2, 2, 3
 ; CHECK-P9-NEXT:    vspltish 3, 1
diff --git a/llvm/test/CodeGen/PowerPC/vec-bswap.ll b/llvm/test/CodeGen/PowerPC/vec-bswap.ll
index e6864d8250d26..7ff5b97780b2b 100644
--- a/llvm/test/CodeGen/PowerPC/vec-bswap.ll
+++ b/llvm/test/CodeGen/PowerPC/vec-bswap.ll
@@ -3,7 +3,8 @@
 define dso_local void @test(i32* %Arr, i32 signext %Len) {
 ; CHECK-LABEL: test:
 ; CHECK:         lxvx [[REG:vs[0-9]+]], r{{[0-9]+}}, r{{[0-9]+}}
-; CHECK-NEXT:    xxbrw vs{{[0-9]+}}, [[REG]]
+; CHECK-NOT:     [[REG]]
+; CHECK:         xxbrw vs{{[0-9]+}}, [[REG]]
 entry:
   %cmp1 = icmp slt i32 0, %Len
   br i1 %cmp1, label %for.body.lr.ph, label %for.cond.cleanup
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
index 48b62f57c1c9e..ecf02feff826d 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i16_elts.ll
@@ -34,9 +34,9 @@ define i32 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
@@ -219,10 +219,10 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
@@ -270,10 +270,10 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
@@ -298,14 +298,14 @@ define <8 x i16> @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
@@ -438,19 +438,20 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs1, 16(r4)
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
 ; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
 ; CHECK-P9-NEXT:    xxswapd vs4, vs2
+; CHECK-P9-NEXT:    xscvspdpn f5, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT:    xxsldwi vs6, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs2
-; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    lxv vs1, 16(r4)
-; CHECK-P9-NEXT:    xxsldwi vs6, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs1
 ; CHECK-P9-NEXT:    mtvsrd v2, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f4
@@ -458,6 +459,7 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
 ; CHECK-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mffprwz r5, f4
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs6
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
@@ -465,15 +467,13 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
 ; CHECK-P9-NEXT:    mtvsrd v4, r5
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    mffprwz r5, f4
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mtvsrd v4, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f3
 ; CHECK-P9-NEXT:    xxsldwi vs3, vs0, vs0, 3
@@ -506,6 +506,7 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P9-NEXT:    mtvsrd v4, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 3
+; CHECK-P9-NEXT:    stxv vs2, 0(r3)
 ; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    vmrghh v2, v4, v2
@@ -532,31 +533,31 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-P9-NEXT:    vmrglw v3, v4, v3
 ; CHECK-P9-NEXT:    xxmrgld vs0, v3, v2
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
-; CHECK-P9-NEXT:    stxv vs2, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r4)
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v2, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f4
-; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r5, f3
@@ -564,7 +565,6 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r5
@@ -591,24 +591,24 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-BE-NEXT:    lxv vs0, 32(r4)
 ; CHECK-BE-NEXT:    xscvspdpn f5, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v0, r5
-; CHECK-BE-NEXT:    vmrghh v5, v5, v0
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    vmrghh v5, v5, v0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
 ; CHECK-BE-NEXT:    mffprwz r4, f5
 ; CHECK-BE-NEXT:    xxmrghd vs4, v3, v2
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
@@ -618,18 +618,18 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x flo
 ; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v4, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
@@ -682,9 +682,9 @@ define i32 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
@@ -867,10 +867,10 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
@@ -918,10 +918,10 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
@@ -946,14 +946,14 @@ define <8 x i16> @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghh v3, v3, v4
+; CHECK-BE-NEXT:    vmrghw v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghh v3, v3, v4
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
-; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
@@ -1086,19 +1086,20 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs2, 0(r4)
+; CHECK-P9-NEXT:    lxv vs1, 16(r4)
+; CHECK-P9-NEXT:    lxv vs0, 32(r4)
 ; CHECK-P9-NEXT:    xxsldwi vs3, vs2, vs2, 3
 ; CHECK-P9-NEXT:    xxswapd vs4, vs2
+; CHECK-P9-NEXT:    xscvspdpn f5, vs2
+; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
+; CHECK-P9-NEXT:    xxsldwi vs6, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvspdpn f5, vs2
-; CHECK-P9-NEXT:    xxsldwi vs2, vs2, vs2, 1
-; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
 ; CHECK-P9-NEXT:    mffprwz r5, f3
-; CHECK-P9-NEXT:    lxv vs1, 16(r4)
-; CHECK-P9-NEXT:    xxsldwi vs6, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xxswapd vs3, vs1
 ; CHECK-P9-NEXT:    mtvsrd v2, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f4
@@ -1106,6 +1107,7 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    xscvspdpn f3, vs3
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
 ; CHECK-P9-NEXT:    vmrghh v2, v3, v2
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mffprwz r5, f4
 ; CHECK-P9-NEXT:    xscvspdpn f4, vs6
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
@@ -1113,15 +1115,13 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs1
 ; CHECK-P9-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-P9-NEXT:    xscvdpsxws f4, f4
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    lxv vs0, 32(r4)
 ; CHECK-P9-NEXT:    mtvsrd v4, r5
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v4
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    vmrglw v2, v3, v2
 ; CHECK-P9-NEXT:    mffprwz r5, f4
-; CHECK-P9-NEXT:    xscvspdpn f1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mtvsrd v4, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f3
 ; CHECK-P9-NEXT:    xxsldwi vs3, vs0, vs0, 3
@@ -1154,6 +1154,7 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    mtvsrd v4, r4
 ; CHECK-P9-NEXT:    mffprwz r4, f0
 ; CHECK-P9-NEXT:    xxsldwi vs0, vs1, vs1, 3
+; CHECK-P9-NEXT:    stxv vs2, 0(r3)
 ; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    vmrghh v2, v4, v2
@@ -1180,31 +1181,31 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    vmrglw v3, v4, v3
 ; CHECK-P9-NEXT:    xxmrgld vs0, v3, v2
 ; CHECK-P9-NEXT:    stxv vs0, 16(r3)
-; CHECK-P9-NEXT:    stxv vs2, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r4)
+; CHECK-BE-NEXT:    lxv vs0, 0(r4)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xxswapd vs3, vs1
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    mffprwz r5, f2
 ; CHECK-BE-NEXT:    xscvspdpn f4, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    mffprwz r5, f2
+; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v2, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f4
-; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    xxsldwi vs2, vs0, vs0, 3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
 ; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r5, f3
@@ -1212,7 +1213,6 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r5
@@ -1239,24 +1239,24 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    lxv vs0, 32(r4)
 ; CHECK-BE-NEXT:    xscvspdpn f5, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f5, f5
-; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xxswapd vs3, vs1
+; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
+; CHECK-BE-NEXT:    xscvdpsxws f5, f5
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v0, r5
-; CHECK-BE-NEXT:    vmrghh v5, v5, v0
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    vmrghh v5, v5, v0
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vmrghw v3, v5, v4
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghw v3, v5, v4
 ; CHECK-BE-NEXT:    mffprwz r4, f5
 ; CHECK-BE-NEXT:    xxmrghd vs4, v3, v2
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f2
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
@@ -1266,18 +1266,18 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs0, vs0, 3
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    mtvsrd v4, r4
-; CHECK-BE-NEXT:    mffprwz r4, f1
-; CHECK-BE-NEXT:    xxswapd vs1, vs0
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
+; CHECK-BE-NEXT:    mtvsrd v4, r4
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v4
-; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    vmrghw v2, v2, v3
+; CHECK-BE-NEXT:    mffprwz r4, f1
+; CHECK-BE-NEXT:    xxswapd vs1, vs0
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
index 928a19f3a55c9..c7965d6c3e091 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp32_to_i8_elts.ll
@@ -37,9 +37,9 @@ define i16 @test2elt(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
@@ -230,10 +230,10 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
@@ -282,10 +282,10 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -310,14 +310,14 @@ define i64 @test8elt(<8 x float>* nocapture readonly) local_unnamed_addr #2 {
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
@@ -451,12 +451,12 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
+; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    mffprwz r3, f4
 ; CHECK-P9-NEXT:    xxswapd vs4, vs3
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
@@ -550,12 +550,12 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -580,14 +580,14 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
@@ -606,15 +606,15 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
@@ -633,14 +633,14 @@ define <16 x i8> @test16elt(<16 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
@@ -695,9 +695,9 @@ define i16 @test2elt_signed(i64 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    xxswapd v2, vs0
 ; CHECK-P9-NEXT:    xscvspdpn f0, vs0
 ; CHECK-P9-NEXT:    xxsldwi vs1, v2, v2, 3
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    xscvspdpn f1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
 ; CHECK-P9-NEXT:    mffprwz r3, f1
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f0
@@ -888,10 +888,10 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-P9-NEXT:    xscvspdpn f2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    xxswapd vs2, vs1
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
@@ -940,10 +940,10 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs1, vs1, 3
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xxswapd vs2, vs1
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -968,14 +968,14 @@ define i64 @test8elt_signed(<8 x float>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
@@ -1109,12 +1109,12 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-P9-NEXT:    xscvspdpn f4, vs4
-; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
+; CHECK-P9-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-P9-NEXT:    xscvspdpn f4, vs4
+; CHECK-P9-NEXT:    xscvdpsxws f4, f4
 ; CHECK-P9-NEXT:    mffprwz r3, f4
 ; CHECK-P9-NEXT:    xxswapd vs4, vs3
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
@@ -1208,12 +1208,12 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
-; CHECK-BE-NEXT:    xscvspdpn f4, vs4
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
+; CHECK-BE-NEXT:    xxsldwi vs4, vs3, vs3, 3
+; CHECK-BE-NEXT:    xscvspdpn f4, vs4
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs3
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
@@ -1238,14 +1238,14 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f3, f3
+; CHECK-BE-NEXT:    vmrghb v3, v3, v4
+; CHECK-BE-NEXT:    vmrghh v2, v3, v2
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs2
-; CHECK-BE-NEXT:    xscvspdpn f3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
-; CHECK-BE-NEXT:    vmrghb v3, v3, v4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v2, v3, v2
+; CHECK-BE-NEXT:    xscvspdpn f3, vs3
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f3
 ; CHECK-BE-NEXT:    xscvspdpn f3, vs2
 ; CHECK-BE-NEXT:    xxsldwi vs2, vs2, vs2, 1
@@ -1264,15 +1264,15 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    mffprwz r3, f2
-; CHECK-BE-NEXT:    xxswapd vs2, vs1
-; CHECK-BE-NEXT:    xscvspdpn f2, vs2
-; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    vmrghb v4, v4, v5
 ; CHECK-BE-NEXT:    vmrghh v3, v4, v3
-; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    vmrghw v2, v3, v2
+; CHECK-BE-NEXT:    mffprwz r3, f2
+; CHECK-BE-NEXT:    xxswapd vs2, vs1
+; CHECK-BE-NEXT:    sldi r3, r3, 56
+; CHECK-BE-NEXT:    xscvspdpn f2, vs2
 ; CHECK-BE-NEXT:    mtvsrd v3, r3
+; CHECK-BE-NEXT:    xscvdpsxws f2, f2
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    xscvspdpn f2, vs1
 ; CHECK-BE-NEXT:    xxsldwi vs1, vs1, vs1, 1
@@ -1291,14 +1291,14 @@ define <16 x i8> @test16elt_signed(<16 x float>* nocapture readonly) local_unnam
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v5, r3
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    vmrghb v4, v4, v5
+; CHECK-BE-NEXT:    vmrghh v3, v4, v3
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs0
-; CHECK-BE-NEXT:    xscvspdpn f1, vs1
-; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    vmrghb v4, v4, v5
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
-; CHECK-BE-NEXT:    vmrghh v3, v4, v3
+; CHECK-BE-NEXT:    xscvspdpn f1, vs1
 ; CHECK-BE-NEXT:    mtvsrd v4, r3
+; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mffprwz r3, f1
 ; CHECK-BE-NEXT:    xscvspdpn f1, vs0
 ; CHECK-BE-NEXT:    xxsldwi vs0, vs0, vs0, 1
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
index dbc2774fed8cb..0e0a3240f471f 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i16_elts.ll
@@ -89,10 +89,10 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
@@ -113,10 +113,10 @@ define i64 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
@@ -194,12 +194,12 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mffprwz r3, f4
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
@@ -237,12 +237,12 @@ define <8 x i16> @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
@@ -387,18 +387,20 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-P9-NEXT:    lxv vs3, 0(r4)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r4)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f1
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f0
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r5, f4
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mffprwz r5, f4
 ; CHECK-P9-NEXT:    mtvsrd v2, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f5
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
@@ -408,8 +410,6 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-P9-NEXT:    mtvsrd v5, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f3
 ; CHECK-P9-NEXT:    lxv vs3, 64(r4)
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mtvsrd v0, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f2
 ; CHECK-P9-NEXT:    lxv vs2, 80(r4)
@@ -469,30 +469,30 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    lxv vs3, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    lxv vs3, 32(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r5, f5
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    lxv vs2, 16(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f2
+; CHECK-BE-NEXT:    lxv vs0, 112(r4)
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f6
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    lxv vs0, 112(r4)
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f3
@@ -524,12 +524,15 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-BE-NEXT:    vmrghh v4, v4, v1
 ; CHECK-BE-NEXT:    mtvsrd v1, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vmrghh v5, v5, v1
 ; CHECK-BE-NEXT:    mffprwz r5, f0
 ; CHECK-BE-NEXT:    lxv vs0, 64(r4)
+; CHECK-BE-NEXT:    vmrghh v5, v5, v1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v1, r5
 ; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    vmrghh v0, v0, v1
 ; CHECK-BE-NEXT:    xxmrghd vs3, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f2
@@ -537,10 +540,12 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    vmrghw v2, v2, v0
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
@@ -553,11 +558,6 @@ define void @test16elt(<16 x i16>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v1, r5
-; CHECK-BE-NEXT:    vmrghh v0, v0, v1
-; CHECK-BE-NEXT:    vmrghw v2, v2, v0
-; CHECK-BE-NEXT:    stxv vs3, 0(r3)
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r4
 ; CHECK-BE-NEXT:    vmrghh v4, v4, v5
@@ -652,10 +652,10 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
@@ -676,10 +676,10 @@ define i64 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
@@ -757,12 +757,12 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mffprwz r3, f4
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
@@ -800,12 +800,12 @@ define <8 x i16> @test8elt_signed(<8 x double>* nocapture readonly) local_unname
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
@@ -950,18 +950,20 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    lxv vs3, 0(r4)
 ; CHECK-P9-NEXT:    lxv vs2, 16(r4)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r4)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    lxv vs0, 48(r4)
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
 ; CHECK-P9-NEXT:    xscvdpsxws f5, f2
 ; CHECK-P9-NEXT:    xscvdpsxws f6, f1
 ; CHECK-P9-NEXT:    xxswapd vs3, vs3
 ; CHECK-P9-NEXT:    xscvdpsxws f7, f0
+; CHECK-P9-NEXT:    xxswapd vs2, vs2
+; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xxswapd vs0, vs0
-; CHECK-P9-NEXT:    xscvdpsxws f0, f0
-; CHECK-P9-NEXT:    mffprwz r5, f4
 ; CHECK-P9-NEXT:    xscvdpsxws f3, f3
-; CHECK-P9-NEXT:    xxswapd vs2, vs2
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f2
+; CHECK-P9-NEXT:    xscvdpsxws f1, f1
+; CHECK-P9-NEXT:    xscvdpsxws f0, f0
+; CHECK-P9-NEXT:    mffprwz r5, f4
 ; CHECK-P9-NEXT:    mtvsrd v2, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f5
 ; CHECK-P9-NEXT:    mtvsrd v3, r5
@@ -971,8 +973,6 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9-NEXT:    mtvsrd v5, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f3
 ; CHECK-P9-NEXT:    lxv vs3, 64(r4)
-; CHECK-P9-NEXT:    xxswapd vs1, vs1
-; CHECK-P9-NEXT:    xscvdpsxws f1, f1
 ; CHECK-P9-NEXT:    mtvsrd v0, r5
 ; CHECK-P9-NEXT:    mffprwz r5, f2
 ; CHECK-P9-NEXT:    lxv vs2, 80(r4)
@@ -1032,30 +1032,30 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    lxv vs3, 32(r4)
+; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f5, f4
 ; CHECK-BE-NEXT:    xxswapd vs4, vs4
-; CHECK-BE-NEXT:    lxv vs3, 32(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f6, f3
 ; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f4, f4
-; CHECK-BE-NEXT:    mffprwz r5, f5
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    lxv vs2, 16(r4)
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f7, f2
+; CHECK-BE-NEXT:    lxv vs0, 112(r4)
 ; CHECK-BE-NEXT:    xxswapd vs2, vs2
+; CHECK-BE-NEXT:    xscvdpsxws f4, f4
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
+; CHECK-BE-NEXT:    mffprwz r5, f5
+; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mtvsrd v2, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f4
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    lxv vs1, 0(r4)
 ; CHECK-BE-NEXT:    xscvdpsxws f4, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f6
 ; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    lxv vs0, 112(r4)
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    mtvsrd v3, r5
 ; CHECK-BE-NEXT:    mffprwz r5, f3
@@ -1087,12 +1087,15 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    vmrghh v4, v4, v1
 ; CHECK-BE-NEXT:    mtvsrd v1, r5
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f2
-; CHECK-BE-NEXT:    vmrghh v5, v5, v1
 ; CHECK-BE-NEXT:    mffprwz r5, f0
 ; CHECK-BE-NEXT:    lxv vs0, 64(r4)
+; CHECK-BE-NEXT:    vmrghh v5, v5, v1
+; CHECK-BE-NEXT:    sldi r5, r5, 48
 ; CHECK-BE-NEXT:    mffprwz r4, f3
-; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    mtvsrd v1, r5
 ; CHECK-BE-NEXT:    vmrghw v3, v5, v4
+; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    vmrghh v0, v0, v1
 ; CHECK-BE-NEXT:    xxmrghd vs3, v3, v2
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f2
@@ -1100,10 +1103,12 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
+; CHECK-BE-NEXT:    stxv vs3, 0(r3)
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v3
 ; CHECK-BE-NEXT:    mffprwz r4, f2
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
+; CHECK-BE-NEXT:    vmrghw v2, v2, v0
 ; CHECK-BE-NEXT:    mtvsrd v3, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f0
@@ -1116,11 +1121,6 @@ define void @test16elt_signed(<16 x i16>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v4, r4
 ; CHECK-BE-NEXT:    mffprwz r4, f0
-; CHECK-BE-NEXT:    sldi r5, r5, 48
-; CHECK-BE-NEXT:    mtvsrd v1, r5
-; CHECK-BE-NEXT:    vmrghh v0, v0, v1
-; CHECK-BE-NEXT:    vmrghw v2, v2, v0
-; CHECK-BE-NEXT:    stxv vs3, 0(r3)
 ; CHECK-BE-NEXT:    sldi r4, r4, 48
 ; CHECK-BE-NEXT:    mtvsrd v5, r4
 ; CHECK-BE-NEXT:    vmrghh v4, v4, v5
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
index 173ced964ad62..c0d2dd35aeb85 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i32_elts.ll
@@ -129,10 +129,10 @@ define void @test8elt(<8 x i32>* noalias nocapture sret %agg.result, <8 x double
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs2, 0(r4)
 ; CHECK-P9-NEXT:    lxv vs3, 16(r4)
-; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
 ; CHECK-P9-NEXT:    lxv vs0, 32(r4)
 ; CHECK-P9-NEXT:    lxv vs1, 48(r4)
+; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
 ; CHECK-P9-NEXT:    xvcvdpuxws v2, vs4
 ; CHECK-P9-NEXT:    xvcvdpuxws v3, vs2
 ; CHECK-P9-NEXT:    xxmrgld vs2, vs1, vs0
@@ -149,10 +149,10 @@ define void @test8elt(<8 x i32>* noalias nocapture sret %agg.result, <8 x double
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs2, 16(r4)
 ; CHECK-BE-NEXT:    lxv vs3, 0(r4)
-; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
 ; CHECK-BE-NEXT:    lxv vs0, 48(r4)
 ; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
 ; CHECK-BE-NEXT:    xvcvdpuxws v2, vs4
 ; CHECK-BE-NEXT:    xvcvdpuxws v3, vs2
 ; CHECK-BE-NEXT:    xxmrgld vs2, vs1, vs0
@@ -227,23 +227,23 @@ define void @test16elt(<16 x i32>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs6, 0(r4)
 ; CHECK-P9-NEXT:    lxv vs7, 16(r4)
-; CHECK-P9-NEXT:    xxmrgld vs8, vs7, vs6
-; CHECK-P9-NEXT:    xxmrghd vs6, vs7, vs6
 ; CHECK-P9-NEXT:    lxv vs4, 32(r4)
 ; CHECK-P9-NEXT:    lxv vs5, 48(r4)
+; CHECK-P9-NEXT:    xxmrgld vs8, vs7, vs6
+; CHECK-P9-NEXT:    xxmrghd vs6, vs7, vs6
 ; CHECK-P9-NEXT:    xxmrgld vs7, vs5, vs4
 ; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
-; CHECK-P9-NEXT:    xvcvdpuxws v2, vs8
-; CHECK-P9-NEXT:    xvcvdpuxws v3, vs6
 ; CHECK-P9-NEXT:    lxv vs2, 64(r4)
 ; CHECK-P9-NEXT:    lxv vs3, 80(r4)
+; CHECK-P9-NEXT:    lxv vs0, 96(r4)
+; CHECK-P9-NEXT:    lxv vs1, 112(r4)
+; CHECK-P9-NEXT:    xvcvdpuxws v2, vs8
+; CHECK-P9-NEXT:    xvcvdpuxws v3, vs6
 ; CHECK-P9-NEXT:    xvcvdpuxws v4, vs7
 ; CHECK-P9-NEXT:    vmrgew v2, v3, v2
 ; CHECK-P9-NEXT:    xvcvdpuxws v3, vs4
 ; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
 ; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT:    lxv vs0, 96(r4)
-; CHECK-P9-NEXT:    lxv vs1, 112(r4)
 ; CHECK-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-P9-NEXT:    xvcvdpuxws v5, vs2
 ; CHECK-P9-NEXT:    xxmrgld vs2, vs1, vs0
@@ -263,23 +263,23 @@ define void @test16elt(<16 x i32>* noalias nocapture sret %agg.result, <16 x dou
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs6, 16(r4)
 ; CHECK-BE-NEXT:    lxv vs7, 0(r4)
-; CHECK-BE-NEXT:    xxmrgld vs8, vs7, vs6
-; CHECK-BE-NEXT:    xxmrghd vs6, vs7, vs6
 ; CHECK-BE-NEXT:    lxv vs4, 48(r4)
 ; CHECK-BE-NEXT:    lxv vs5, 32(r4)
+; CHECK-BE-NEXT:    xxmrgld vs8, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd vs6, vs7, vs6
 ; CHECK-BE-NEXT:    xxmrgld vs7, vs5, vs4
 ; CHECK-BE-NEXT:    xxmrghd vs4, vs5, vs4
-; CHECK-BE-NEXT:    xvcvdpuxws v2, vs8
-; CHECK-BE-NEXT:    xvcvdpuxws v3, vs6
 ; CHECK-BE-NEXT:    lxv vs2, 80(r4)
 ; CHECK-BE-NEXT:    lxv vs3, 64(r4)
+; CHECK-BE-NEXT:    lxv vs0, 112(r4)
+; CHECK-BE-NEXT:    lxv vs1, 96(r4)
+; CHECK-BE-NEXT:    xvcvdpuxws v2, vs8
+; CHECK-BE-NEXT:    xvcvdpuxws v3, vs6
 ; CHECK-BE-NEXT:    xvcvdpuxws v4, vs7
 ; CHECK-BE-NEXT:    vmrgew v2, v3, v2
 ; CHECK-BE-NEXT:    xvcvdpuxws v3, vs4
 ; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
 ; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT:    lxv vs0, 112(r4)
-; CHECK-BE-NEXT:    lxv vs1, 96(r4)
 ; CHECK-BE-NEXT:    stxv v2, 0(r3)
 ; CHECK-BE-NEXT:    xvcvdpuxws v5, vs2
 ; CHECK-BE-NEXT:    xxmrgld vs2, vs1, vs0
@@ -421,10 +421,10 @@ define void @test8elt_signed(<8 x i32>* noalias nocapture sret %agg.result, <8 x
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs2, 0(r4)
 ; CHECK-P9-NEXT:    lxv vs3, 16(r4)
-; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
-; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
 ; CHECK-P9-NEXT:    lxv vs0, 32(r4)
 ; CHECK-P9-NEXT:    lxv vs1, 48(r4)
+; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
 ; CHECK-P9-NEXT:    xvcvdpsxws v2, vs4
 ; CHECK-P9-NEXT:    xvcvdpsxws v3, vs2
 ; CHECK-P9-NEXT:    xxmrgld vs2, vs1, vs0
@@ -441,10 +441,10 @@ define void @test8elt_signed(<8 x i32>* noalias nocapture sret %agg.result, <8 x
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs2, 16(r4)
 ; CHECK-BE-NEXT:    lxv vs3, 0(r4)
-; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
-; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
 ; CHECK-BE-NEXT:    lxv vs0, 48(r4)
 ; CHECK-BE-NEXT:    lxv vs1, 32(r4)
+; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
+; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
 ; CHECK-BE-NEXT:    xvcvdpsxws v2, vs4
 ; CHECK-BE-NEXT:    xvcvdpsxws v3, vs2
 ; CHECK-BE-NEXT:    xxmrgld vs2, vs1, vs0
@@ -519,23 +519,23 @@ define void @test16elt_signed(<16 x i32>* noalias nocapture sret %agg.result, <1
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs6, 0(r4)
 ; CHECK-P9-NEXT:    lxv vs7, 16(r4)
-; CHECK-P9-NEXT:    xxmrgld vs8, vs7, vs6
-; CHECK-P9-NEXT:    xxmrghd vs6, vs7, vs6
 ; CHECK-P9-NEXT:    lxv vs4, 32(r4)
 ; CHECK-P9-NEXT:    lxv vs5, 48(r4)
+; CHECK-P9-NEXT:    xxmrgld vs8, vs7, vs6
+; CHECK-P9-NEXT:    xxmrghd vs6, vs7, vs6
 ; CHECK-P9-NEXT:    xxmrgld vs7, vs5, vs4
 ; CHECK-P9-NEXT:    xxmrghd vs4, vs5, vs4
-; CHECK-P9-NEXT:    xvcvdpsxws v2, vs8
-; CHECK-P9-NEXT:    xvcvdpsxws v3, vs6
 ; CHECK-P9-NEXT:    lxv vs2, 64(r4)
 ; CHECK-P9-NEXT:    lxv vs3, 80(r4)
+; CHECK-P9-NEXT:    lxv vs0, 96(r4)
+; CHECK-P9-NEXT:    lxv vs1, 112(r4)
+; CHECK-P9-NEXT:    xvcvdpsxws v2, vs8
+; CHECK-P9-NEXT:    xvcvdpsxws v3, vs6
 ; CHECK-P9-NEXT:    xvcvdpsxws v4, vs7
 ; CHECK-P9-NEXT:    vmrgew v2, v3, v2
 ; CHECK-P9-NEXT:    xvcvdpsxws v3, vs4
 ; CHECK-P9-NEXT:    xxmrgld vs4, vs3, vs2
 ; CHECK-P9-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-P9-NEXT:    lxv vs0, 96(r4)
-; CHECK-P9-NEXT:    lxv vs1, 112(r4)
 ; CHECK-P9-NEXT:    stxv v2, 0(r3)
 ; CHECK-P9-NEXT:    xvcvdpsxws v5, vs2
 ; CHECK-P9-NEXT:    xxmrgld vs2, vs1, vs0
@@ -555,23 +555,23 @@ define void @test16elt_signed(<16 x i32>* noalias nocapture sret %agg.result, <1
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs6, 16(r4)
 ; CHECK-BE-NEXT:    lxv vs7, 0(r4)
-; CHECK-BE-NEXT:    xxmrgld vs8, vs7, vs6
-; CHECK-BE-NEXT:    xxmrghd vs6, vs7, vs6
 ; CHECK-BE-NEXT:    lxv vs4, 48(r4)
 ; CHECK-BE-NEXT:    lxv vs5, 32(r4)
+; CHECK-BE-NEXT:    xxmrgld vs8, vs7, vs6
+; CHECK-BE-NEXT:    xxmrghd vs6, vs7, vs6
 ; CHECK-BE-NEXT:    xxmrgld vs7, vs5, vs4
 ; CHECK-BE-NEXT:    xxmrghd vs4, vs5, vs4
-; CHECK-BE-NEXT:    xvcvdpsxws v2, vs8
-; CHECK-BE-NEXT:    xvcvdpsxws v3, vs6
 ; CHECK-BE-NEXT:    lxv vs2, 80(r4)
 ; CHECK-BE-NEXT:    lxv vs3, 64(r4)
+; CHECK-BE-NEXT:    lxv vs0, 112(r4)
+; CHECK-BE-NEXT:    lxv vs1, 96(r4)
+; CHECK-BE-NEXT:    xvcvdpsxws v2, vs8
+; CHECK-BE-NEXT:    xvcvdpsxws v3, vs6
 ; CHECK-BE-NEXT:    xvcvdpsxws v4, vs7
 ; CHECK-BE-NEXT:    vmrgew v2, v3, v2
 ; CHECK-BE-NEXT:    xvcvdpsxws v3, vs4
 ; CHECK-BE-NEXT:    xxmrgld vs4, vs3, vs2
 ; CHECK-BE-NEXT:    xxmrghd vs2, vs3, vs2
-; CHECK-BE-NEXT:    lxv vs0, 112(r4)
-; CHECK-BE-NEXT:    lxv vs1, 96(r4)
 ; CHECK-BE-NEXT:    stxv v2, 0(r3)
 ; CHECK-BE-NEXT:    xvcvdpsxws v5, vs2
 ; CHECK-BE-NEXT:    xxmrgld vs2, vs1, vs0
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
index fd28d9a1afdc3..603572b19e1be 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_fp64_to_i8_elts.ll
@@ -96,10 +96,10 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
@@ -121,10 +121,10 @@ define i32 @test4elt(<4 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
@@ -205,12 +205,12 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mffprwz r3, f4
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
@@ -249,12 +249,12 @@ define i64 @test8elt(<8 x double>* nocapture readonly) local_unnamed_addr #1 {
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
@@ -398,16 +398,16 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs7, 0(r3)
-; CHECK-P9-NEXT:    xscvdpsxws f8, f7
-; CHECK-P9-NEXT:    xxswapd vs7, vs7
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    lxv vs6, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 112(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 96(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f8, f7
+; CHECK-P9-NEXT:    xxswapd vs7, vs7
 ; CHECK-P9-NEXT:    lxv vs2, 80(r3)
 ; CHECK-P9-NEXT:    lxv vs3, 64(r3)
 ; CHECK-P9-NEXT:    lxv vs4, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs5, 32(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    mffprwz r3, f8
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f7
@@ -481,16 +481,16 @@ define <16 x i8> @test16elt(<16 x double>* nocapture readonly) local_unnamed_add
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs7, 112(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f8, f7
-; CHECK-BE-NEXT:    xxswapd vs7, vs7
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
 ; CHECK-BE-NEXT:    lxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f8, f7
+; CHECK-BE-NEXT:    xxswapd vs7, vs7
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs4, 64(r3)
 ; CHECK-BE-NEXT:    lxv vs5, 80(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
 ; CHECK-BE-NEXT:    mffprwz r3, f8
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
@@ -669,10 +669,10 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r3)
+; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    xscvdpsxws f2, f1
 ; CHECK-P9-NEXT:    xxswapd vs1, vs1
 ; CHECK-P9-NEXT:    xscvdpsxws f1, f1
-; CHECK-P9-NEXT:    lxv vs0, 16(r3)
 ; CHECK-P9-NEXT:    mffprwz r3, f2
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f1
@@ -694,10 +694,10 @@ define i32 @test4elt_signed(<4 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    xscvdpsxws f2, f1
 ; CHECK-BE-NEXT:    xxswapd vs1, vs1
 ; CHECK-BE-NEXT:    xscvdpsxws f1, f1
-; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    mffprwz r3, f2
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
@@ -778,12 +778,12 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs3, 0(r3)
-; CHECK-P9-NEXT:    xscvdpsxws f4, f3
-; CHECK-P9-NEXT:    xxswapd vs3, vs3
-; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    lxv vs2, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 32(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f4, f3
+; CHECK-P9-NEXT:    xxswapd vs3, vs3
+; CHECK-P9-NEXT:    xscvdpsxws f3, f3
 ; CHECK-P9-NEXT:    mffprwz r3, f4
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f3
@@ -822,12 +822,12 @@ define i64 @test8elt_signed(<8 x double>* nocapture readonly) local_unnamed_addr
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f4, f3
-; CHECK-BE-NEXT:    xxswapd vs3, vs3
-; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f4, f3
+; CHECK-BE-NEXT:    xxswapd vs3, vs3
+; CHECK-BE-NEXT:    xscvdpsxws f3, f3
 ; CHECK-BE-NEXT:    mffprwz r3, f4
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
@@ -971,16 +971,16 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs7, 0(r3)
-; CHECK-P9-NEXT:    xscvdpsxws f8, f7
-; CHECK-P9-NEXT:    xxswapd vs7, vs7
-; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    lxv vs6, 16(r3)
 ; CHECK-P9-NEXT:    lxv vs0, 112(r3)
 ; CHECK-P9-NEXT:    lxv vs1, 96(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f8, f7
+; CHECK-P9-NEXT:    xxswapd vs7, vs7
 ; CHECK-P9-NEXT:    lxv vs2, 80(r3)
 ; CHECK-P9-NEXT:    lxv vs3, 64(r3)
 ; CHECK-P9-NEXT:    lxv vs4, 48(r3)
 ; CHECK-P9-NEXT:    lxv vs5, 32(r3)
+; CHECK-P9-NEXT:    xscvdpsxws f7, f7
 ; CHECK-P9-NEXT:    mffprwz r3, f8
 ; CHECK-P9-NEXT:    mtvsrd v2, r3
 ; CHECK-P9-NEXT:    mffprwz r3, f7
@@ -1054,16 +1054,16 @@ define <16 x i8> @test16elt_signed(<16 x double>* nocapture readonly) local_unna
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs7, 112(r3)
-; CHECK-BE-NEXT:    xscvdpsxws f8, f7
-; CHECK-BE-NEXT:    xxswapd vs7, vs7
-; CHECK-BE-NEXT:    xscvdpsxws f7, f7
 ; CHECK-BE-NEXT:    lxv vs6, 96(r3)
 ; CHECK-BE-NEXT:    lxv vs0, 0(r3)
 ; CHECK-BE-NEXT:    lxv vs1, 16(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f8, f7
+; CHECK-BE-NEXT:    xxswapd vs7, vs7
 ; CHECK-BE-NEXT:    lxv vs2, 32(r3)
 ; CHECK-BE-NEXT:    lxv vs3, 48(r3)
 ; CHECK-BE-NEXT:    lxv vs4, 64(r3)
 ; CHECK-BE-NEXT:    lxv vs5, 80(r3)
+; CHECK-BE-NEXT:    xscvdpsxws f7, f7
 ; CHECK-BE-NEXT:    mffprwz r3, f8
 ; CHECK-BE-NEXT:    sldi r3, r3, 56
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
index 5ecd34941b39c..60fb0c29b5588 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp32_elts.ll
@@ -40,9 +40,9 @@ define i64 @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    li r3, 2
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
 ; CHECK-P9-NEXT:    clrlwi r3, r3, 16
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
@@ -98,9 +98,9 @@ define <4 x float> @test4elt(i64 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
@@ -137,9 +137,9 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i16>
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1@toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
@@ -190,9 +190,9 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i
 ; CHECK-P9-NEXT:    lxv v2, 16(r4)
 ; CHECK-P9-NEXT:    lxv v3, 0(r4)
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v5, v5, v5
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v4, 0, r4
-; CHECK-P9-NEXT:    xxlxor v5, v5, v5
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1@toc@l
 ; CHECK-P9-NEXT:    vperm v0, v5, v3, v4
@@ -215,9 +215,9 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i
 ; CHECK-BE-NEXT:    lxv v2, 16(r4)
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v4, 0, r4
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1@toc@l
 ; CHECK-BE-NEXT:    vperm v0, v3, v5, v4
@@ -272,9 +272,9 @@ define i64 @test2elt_signed(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    mtfprwa f0, r3
 ; CHECK-P9-NEXT:    li r3, 2
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    vextuhrx r3, r3, v2
 ; CHECK-P9-NEXT:    extsh r3, r3
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
 ; CHECK-P9-NEXT:    mtfprwa f0, r3
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
@@ -375,9 +375,9 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    vmrghh v2, v2, v2
 ; CHECK-BE-NEXT:    vextsh2w v3, v3
@@ -432,10 +432,10 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result,
 ; CHECK-P9-NEXT:    vmrglh v4, v3, v3
 ; CHECK-P9-NEXT:    vmrghh v3, v3, v3
 ; CHECK-P9-NEXT:    vextsh2w v3, v3
+; CHECK-P9-NEXT:    vextsh2w v4, v4
 ; CHECK-P9-NEXT:    xvcvsxwsp vs1, v3
 ; CHECK-P9-NEXT:    vmrglh v3, v2, v2
 ; CHECK-P9-NEXT:    vmrghh v2, v2, v2
-; CHECK-P9-NEXT:    vextsh2w v4, v4
 ; CHECK-P9-NEXT:    xvcvsxwsp vs0, v4
 ; CHECK-P9-NEXT:    vextsh2w v3, v3
 ; CHECK-P9-NEXT:    vextsh2w v2, v2
@@ -452,9 +452,9 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result,
 ; CHECK-BE-NEXT:    lxv v2, 16(r4)
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v4, 0, r4
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
 ; CHECK-BE-NEXT:    vperm v0, v5, v3, v4
 ; CHECK-BE-NEXT:    vperm v4, v5, v2, v4
 ; CHECK-BE-NEXT:    vmrghh v3, v3, v3
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
index ea8ede3af22a0..903b492e33963 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i16_to_fp64_elts.ll
@@ -25,9 +25,9 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI0_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r3
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxddp v2, v2
 ; CHECK-P9-NEXT:    blr
@@ -36,9 +36,9 @@ define <2 x double> @test2elt(i32 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxddp v2, v2
 ; CHECK-BE-NEXT:    blr
@@ -74,9 +74,9 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.c
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_1@toc@l
 ; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
@@ -92,9 +92,9 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i64 %a.c
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_1@toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
@@ -152,9 +152,9 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i16
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_1@toc@l
 ; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
@@ -181,9 +181,9 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i16
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1@toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
@@ -276,9 +276,9 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x
 ; CHECK-P9-NEXT:    lxv v2, 16(r4)
 ; CHECK-P9-NEXT:    lxv v3, 0(r4)
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v5, v5, v5
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v4, 0, r4
-; CHECK-P9-NEXT:    xxlxor v5, v5, v5
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1@toc@l
 ; CHECK-P9-NEXT:    vperm v0, v5, v3, v4
@@ -319,9 +319,9 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x
 ; CHECK-BE-NEXT:    lxv v2, 16(r4)
 ; CHECK-BE-NEXT:    lxv v3, 0(r4)
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v4, 0, r4
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1@toc@l
 ; CHECK-BE-NEXT:    vperm v0, v3, v5, v4
@@ -459,13 +459,13 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v4, 0, r4
-; CHECK-BE-NEXT:    xxlxor v3, v3, v3
-; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_1@toc@ha
-; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_1@toc@l
+; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
@@ -564,12 +564,12 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, <
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1@toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
@@ -680,8 +680,8 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result,
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_0@toc@ha
-; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_0@toc@l
 ; CHECK-P9-NEXT:    lxv v2, 0(r4)
+; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r5
 ; CHECK-P9-NEXT:    addis r5, r2, .LCPI7_1@toc@ha
 ; CHECK-P9-NEXT:    addi r5, r5, .LCPI7_1@toc@l
@@ -700,16 +700,17 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result,
 ; CHECK-P9-NEXT:    xvcvsxddp vs1, v4
 ; CHECK-P9-NEXT:    vperm v4, v2, v2, v0
 ; CHECK-P9-NEXT:    vperm v2, v2, v2, v1
+; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    vextsh2d v4, v4
 ; CHECK-P9-NEXT:    xvcvsxddp vs2, v4
 ; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
 ; CHECK-P9-NEXT:    vperm v2, v4, v4, v3
 ; CHECK-P9-NEXT:    stxv vs2, 32(r3)
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    stxv vs3, 48(r3)
-; CHECK-P9-NEXT:    stxv vs1, 16(r3)
 ; CHECK-P9-NEXT:    xvcvsxddp vs4, v2
 ; CHECK-P9-NEXT:    vperm v2, v4, v4, v5
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
@@ -720,60 +721,59 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result,
 ; CHECK-P9-NEXT:    xvcvsxddp vs6, v2
 ; CHECK-P9-NEXT:    vperm v2, v4, v4, v1
 ; CHECK-P9-NEXT:    stxv vs5, 80(r3)
-; CHECK-P9-NEXT:    stxv vs6, 96(r3)
 ; CHECK-P9-NEXT:    vextsh2d v2, v2
 ; CHECK-P9-NEXT:    xvcvsxddp vs7, v2
+; CHECK-P9-NEXT:    stxv vs6, 96(r3)
 ; CHECK-P9-NEXT:    stxv vs7, 112(r3)
-; CHECK-P9-NEXT:    stxv vs0, 0(r3)
 ; CHECK-P9-NEXT:    blr
 ;
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_0@toc@ha
-; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0@toc@l
-; CHECK-BE-NEXT:    lxvx v2, 0, r5
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
 ; CHECK-BE-NEXT:    lxv v1, 16(r4)
+; CHECK-BE-NEXT:    xxlxor v5, v5, v5
+; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_2@toc@ha
+; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_0@toc@l
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_2@toc@l
+; CHECK-BE-NEXT:    lxvx v2, 0, r5
 ; CHECK-BE-NEXT:    addis r5, r2, .LCPI7_1@toc@ha
 ; CHECK-BE-NEXT:    addi r5, r5, .LCPI7_1@toc@l
-; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_2@toc@ha
-; CHECK-BE-NEXT:    xxlxor v5, v5, v5
-; CHECK-BE-NEXT:    vperm v0, v5, v4, v2
 ; CHECK-BE-NEXT:    lxvx v3, 0, r5
+; CHECK-BE-NEXT:    vperm v0, v5, v4, v2
 ; CHECK-BE-NEXT:    vperm v2, v5, v1, v2
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_2@toc@l
 ; CHECK-BE-NEXT:    vextsh2d v0, v0
 ; CHECK-BE-NEXT:    xvcvsxddp vs2, v2
 ; CHECK-BE-NEXT:    vperm v2, v5, v1, v3
+; CHECK-BE-NEXT:    xvcvsxddp vs0, v0
+; CHECK-BE-NEXT:    vperm v0, v5, v4, v3
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
-; CHECK-BE-NEXT:    stxv vs2, 80(r3)
+; CHECK-BE-NEXT:    vextsh2d v0, v0
 ; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
 ; CHECK-BE-NEXT:    lxvx v2, 0, r4
-; CHECK-BE-NEXT:    xvcvsxddp vs0, v0
-; CHECK-BE-NEXT:    vperm v0, v5, v4, v3
-; CHECK-BE-NEXT:    vperm v3, v4, v4, v2
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_3@toc@ha
-; CHECK-BE-NEXT:    vextsh2d v0, v0
 ; CHECK-BE-NEXT:    xvcvsxddp vs1, v0
+; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_3@toc@l
+; CHECK-BE-NEXT:    stxv vs2, 80(r3)
+; CHECK-BE-NEXT:    stxv vs0, 16(r3)
+; CHECK-BE-NEXT:    vperm v3, v4, v4, v2
+; CHECK-BE-NEXT:    vperm v2, v1, v1, v2
+; CHECK-BE-NEXT:    stxv vs3, 112(r3)
 ; CHECK-BE-NEXT:    stxv vs1, 48(r3)
 ; CHECK-BE-NEXT:    vextsh2d v3, v3
-; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_3@toc@l
+; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs4, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    vperm v2, v1, v1, v2
-; CHECK-BE-NEXT:    vextsh2d v2, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs6, v2
-; CHECK-BE-NEXT:    vperm v2, v1, v1, v3
 ; CHECK-BE-NEXT:    vperm v4, v4, v4, v3
+; CHECK-BE-NEXT:    vperm v2, v1, v1, v3
+; CHECK-BE-NEXT:    stxv vs6, 64(r3)
+; CHECK-BE-NEXT:    stxv vs4, 0(r3)
 ; CHECK-BE-NEXT:    vextsh2d v4, v4
 ; CHECK-BE-NEXT:    vextsh2d v2, v2
-; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
 ; CHECK-BE-NEXT:    xvcvsxddp vs5, v4
-; CHECK-BE-NEXT:    stxv vs3, 112(r3)
-; CHECK-BE-NEXT:    stxv vs6, 64(r3)
-; CHECK-BE-NEXT:    stxv vs0, 16(r3)
-; CHECK-BE-NEXT:    stxv vs4, 0(r3)
+; CHECK-BE-NEXT:    xvcvsxddp vs7, v2
 ; CHECK-BE-NEXT:    stxv vs7, 96(r3)
 ; CHECK-BE-NEXT:    stxv vs5, 32(r3)
 ; CHECK-BE-NEXT:    blr
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
index 18957e6b59a2f..71a1718ab8776 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i32_to_fp64_elts.ll
@@ -106,8 +106,8 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i32
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r4)
-; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
 ; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
 ; CHECK-P9-NEXT:    xvcvuxwdp vs2, v2
 ; CHECK-P9-NEXT:    xxmrghw v2, vs1, vs1
 ; CHECK-P9-NEXT:    xvcvuxwdp vs1, v2
@@ -124,8 +124,8 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, <8 x i32
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
 ; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
 ; CHECK-BE-NEXT:    xvcvuxwdp vs2, v2
 ; CHECK-BE-NEXT:    xxmrglw v2, vs1, vs1
 ; CHECK-BE-NEXT:    xvcvuxwdp vs1, v2
@@ -196,12 +196,12 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs0, 0(r4)
-; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
 ; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs5, 32(r4)
 ; CHECK-P9-NEXT:    lxv vs4, 48(r4)
+; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
 ; CHECK-P9-NEXT:    xvcvuxwdp vs1, v2
 ; CHECK-P9-NEXT:    xxmrghw v2, vs0, vs0
-; CHECK-P9-NEXT:    lxv vs5, 32(r4)
 ; CHECK-P9-NEXT:    xvcvuxwdp vs0, v2
 ; CHECK-P9-NEXT:    xxmrglw v2, vs2, vs2
 ; CHECK-P9-NEXT:    xvcvuxwdp vs3, v2
@@ -228,12 +228,12 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
 ; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs5, 32(r4)
 ; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
 ; CHECK-BE-NEXT:    xvcvuxwdp vs1, v2
 ; CHECK-BE-NEXT:    xxmrglw v2, vs0, vs0
-; CHECK-BE-NEXT:    lxv vs5, 32(r4)
 ; CHECK-BE-NEXT:    xvcvuxwdp vs0, v2
 ; CHECK-BE-NEXT:    xxmrghw v2, vs2, vs2
 ; CHECK-BE-NEXT:    xvcvuxwdp vs3, v2
@@ -360,8 +360,8 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, <
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs1, 0(r4)
-; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
 ; CHECK-P9-NEXT:    lxv vs0, 16(r4)
+; CHECK-P9-NEXT:    xxmrglw v2, vs1, vs1
 ; CHECK-P9-NEXT:    xvcvsxwdp vs2, v2
 ; CHECK-P9-NEXT:    xxmrghw v2, vs1, vs1
 ; CHECK-P9-NEXT:    xvcvsxwdp vs1, v2
@@ -378,8 +378,8 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, <
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs1, 0(r4)
-; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
 ; CHECK-BE-NEXT:    lxv vs0, 16(r4)
+; CHECK-BE-NEXT:    xxmrghw v2, vs1, vs1
 ; CHECK-BE-NEXT:    xvcvsxwdp vs2, v2
 ; CHECK-BE-NEXT:    xxmrglw v2, vs1, vs1
 ; CHECK-BE-NEXT:    xvcvsxwdp vs1, v2
@@ -450,12 +450,12 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result,
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv vs0, 0(r4)
-; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
 ; CHECK-P9-NEXT:    lxv vs2, 16(r4)
+; CHECK-P9-NEXT:    lxv vs5, 32(r4)
 ; CHECK-P9-NEXT:    lxv vs4, 48(r4)
+; CHECK-P9-NEXT:    xxmrglw v2, vs0, vs0
 ; CHECK-P9-NEXT:    xvcvsxwdp vs1, v2
 ; CHECK-P9-NEXT:    xxmrghw v2, vs0, vs0
-; CHECK-P9-NEXT:    lxv vs5, 32(r4)
 ; CHECK-P9-NEXT:    xvcvsxwdp vs0, v2
 ; CHECK-P9-NEXT:    xxmrglw v2, vs2, vs2
 ; CHECK-P9-NEXT:    xvcvsxwdp vs3, v2
@@ -482,12 +482,12 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result,
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv vs0, 0(r4)
-; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
 ; CHECK-BE-NEXT:    lxv vs2, 16(r4)
+; CHECK-BE-NEXT:    lxv vs5, 32(r4)
 ; CHECK-BE-NEXT:    lxv vs4, 48(r4)
+; CHECK-BE-NEXT:    xxmrghw v2, vs0, vs0
 ; CHECK-BE-NEXT:    xvcvsxwdp vs1, v2
 ; CHECK-BE-NEXT:    xxmrglw v2, vs0, vs0
-; CHECK-BE-NEXT:    lxv vs5, 32(r4)
 ; CHECK-BE-NEXT:    xvcvsxwdp vs0, v2
 ; CHECK-BE-NEXT:    xxmrghw v2, vs2, vs2
 ; CHECK-BE-NEXT:    xvcvsxwdp vs3, v2
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
index f152c2b008ff2..2e757152e4285 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i64_to_fp32_elts.ll
@@ -74,8 +74,8 @@ define <4 x float> @test4elt(<4 x i64>* nocapture readonly) local_unnamed_addr #
 ; CHECK-P9-LABEL: test4elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-NEXT:    xvcvuxdsp vs0, v3
 ; CHECK-P9-NEXT:    lxv v2, 16(r3)
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v3
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v2
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
@@ -85,8 +85,8 @@ define <4 x float> @test4elt(<4 x i64>* nocapture readonly) local_unnamed_addr #
 ; CHECK-BE-LABEL: test4elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v3, 16(r3)
-; CHECK-BE-NEXT:    xvcvuxdsp vs0, v3
 ; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v3
 ; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v2
 ; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
@@ -129,14 +129,14 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i64>
 ; CHECK-P9-LABEL: test8elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv v5, 0(r4)
-; CHECK-P9-NEXT:    xvcvuxdsp vs0, v5
 ; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v5
 ; CHECK-P9-NEXT:    xxsldwi v5, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v4
-; CHECK-P9-NEXT:    lxv v3, 32(r4)
 ; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v3
-; CHECK-P9-NEXT:    lxv v2, 48(r4)
 ; CHECK-P9-NEXT:    vpkudum v3, v4, v5
 ; CHECK-P9-NEXT:    stxv v3, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
@@ -149,14 +149,14 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, <8 x i64>
 ; CHECK-BE-LABEL: test8elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
-; CHECK-BE-NEXT:    xvcvuxdsp vs0, v5
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v3, 48(r4)
+; CHECK-BE-NEXT:    lxv v2, 32(r4)
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v5
 ; CHECK-BE-NEXT:    xxsldwi v5, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v4
-; CHECK-BE-NEXT:    lxv v3, 48(r4)
 ; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v3
-; CHECK-BE-NEXT:    lxv v2, 32(r4)
 ; CHECK-BE-NEXT:    vpkudum v3, v4, v5
 ; CHECK-BE-NEXT:    stxv v3, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
@@ -227,30 +227,30 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv v7, 0(r4)
-; CHECK-P9-NEXT:    xvcvuxdsp vs0, v7
 ; CHECK-P9-NEXT:    lxv v6, 16(r4)
+; CHECK-P9-NEXT:    lxv v1, 32(r4)
+; CHECK-P9-NEXT:    lxv v0, 48(r4)
+; CHECK-P9-NEXT:    xvcvuxdsp vs0, v7
+; CHECK-P9-NEXT:    lxv v5, 64(r4)
+; CHECK-P9-NEXT:    lxv v4, 80(r4)
+; CHECK-P9-NEXT:    lxv v3, 96(r4)
+; CHECK-P9-NEXT:    lxv v2, 112(r4)
 ; CHECK-P9-NEXT:    xxsldwi v7, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v6
-; CHECK-P9-NEXT:    lxv v1, 32(r4)
 ; CHECK-P9-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v1
-; CHECK-P9-NEXT:    lxv v0, 48(r4)
 ; CHECK-P9-NEXT:    vpkudum v1, v6, v7
+; CHECK-P9-NEXT:    stxv v1, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v0
-; CHECK-P9-NEXT:    lxv v5, 64(r4)
-; CHECK-P9-NEXT:    stxv v1, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v5
-; CHECK-P9-NEXT:    lxv v4, 80(r4)
 ; CHECK-P9-NEXT:    vpkudum v0, v0, v6
 ; CHECK-P9-NEXT:    stxv v0, 16(r3)
 ; CHECK-P9-NEXT:    xxsldwi v5, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v4
-; CHECK-P9-NEXT:    lxv v3, 96(r4)
 ; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvuxdsp vs0, v3
-; CHECK-P9-NEXT:    lxv v2, 112(r4)
 ; CHECK-P9-NEXT:    vpkudum v4, v4, v5
 ; CHECK-P9-NEXT:    stxv v4, 32(r3)
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
@@ -263,30 +263,30 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v7, 16(r4)
-; CHECK-BE-NEXT:    xvcvuxdsp vs0, v7
 ; CHECK-BE-NEXT:    lxv v6, 0(r4)
+; CHECK-BE-NEXT:    lxv v1, 48(r4)
+; CHECK-BE-NEXT:    lxv v0, 32(r4)
+; CHECK-BE-NEXT:    xvcvuxdsp vs0, v7
+; CHECK-BE-NEXT:    lxv v5, 80(r4)
+; CHECK-BE-NEXT:    lxv v4, 64(r4)
+; CHECK-BE-NEXT:    lxv v3, 112(r4)
+; CHECK-BE-NEXT:    lxv v2, 96(r4)
 ; CHECK-BE-NEXT:    xxsldwi v7, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v6
-; CHECK-BE-NEXT:    lxv v1, 48(r4)
 ; CHECK-BE-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v1
-; CHECK-BE-NEXT:    lxv v0, 32(r4)
 ; CHECK-BE-NEXT:    vpkudum v1, v6, v7
+; CHECK-BE-NEXT:    stxv v1, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v0
-; CHECK-BE-NEXT:    lxv v5, 80(r4)
-; CHECK-BE-NEXT:    stxv v1, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v5
-; CHECK-BE-NEXT:    lxv v4, 64(r4)
 ; CHECK-BE-NEXT:    vpkudum v0, v0, v6
 ; CHECK-BE-NEXT:    stxv v0, 16(r3)
 ; CHECK-BE-NEXT:    xxsldwi v5, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v4
-; CHECK-BE-NEXT:    lxv v3, 112(r4)
 ; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvuxdsp vs0, v3
-; CHECK-BE-NEXT:    lxv v2, 96(r4)
 ; CHECK-BE-NEXT:    vpkudum v4, v4, v5
 ; CHECK-BE-NEXT:    stxv v4, 32(r3)
 ; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 3
@@ -367,8 +367,8 @@ define <4 x float> @test4elt_signed(<4 x i64>* nocapture readonly) local_unnamed
 ; CHECK-P9-LABEL: test4elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv v3, 0(r3)
-; CHECK-P9-NEXT:    xvcvsxdsp vs0, v3
 ; CHECK-P9-NEXT:    lxv v2, 16(r3)
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v3
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v2
 ; CHECK-P9-NEXT:    xxsldwi v2, vs0, vs0, 3
@@ -378,8 +378,8 @@ define <4 x float> @test4elt_signed(<4 x i64>* nocapture readonly) local_unnamed
 ; CHECK-BE-LABEL: test4elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v3, 16(r3)
-; CHECK-BE-NEXT:    xvcvsxdsp vs0, v3
 ; CHECK-BE-NEXT:    lxv v2, 0(r3)
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v3
 ; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v2
 ; CHECK-BE-NEXT:    xxsldwi v2, vs0, vs0, 3
@@ -422,14 +422,14 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8
 ; CHECK-P9-LABEL: test8elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv v5, 0(r4)
-; CHECK-P9-NEXT:    xvcvsxdsp vs0, v5
 ; CHECK-P9-NEXT:    lxv v4, 16(r4)
+; CHECK-P9-NEXT:    lxv v3, 32(r4)
+; CHECK-P9-NEXT:    lxv v2, 48(r4)
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v5
 ; CHECK-P9-NEXT:    xxsldwi v5, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v4
-; CHECK-P9-NEXT:    lxv v3, 32(r4)
 ; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v3
-; CHECK-P9-NEXT:    lxv v2, 48(r4)
 ; CHECK-P9-NEXT:    vpkudum v3, v4, v5
 ; CHECK-P9-NEXT:    stxv v3, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
@@ -442,14 +442,14 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, <8
 ; CHECK-BE-LABEL: test8elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v5, 16(r4)
-; CHECK-BE-NEXT:    xvcvsxdsp vs0, v5
 ; CHECK-BE-NEXT:    lxv v4, 0(r4)
+; CHECK-BE-NEXT:    lxv v3, 48(r4)
+; CHECK-BE-NEXT:    lxv v2, 32(r4)
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v5
 ; CHECK-BE-NEXT:    xxsldwi v5, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v4
-; CHECK-BE-NEXT:    lxv v3, 48(r4)
 ; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v3
-; CHECK-BE-NEXT:    lxv v2, 32(r4)
 ; CHECK-BE-NEXT:    vpkudum v3, v4, v5
 ; CHECK-BE-NEXT:    stxv v3, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
@@ -520,30 +520,30 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result,
 ; CHECK-P9-LABEL: test16elt_signed:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    lxv v7, 0(r4)
-; CHECK-P9-NEXT:    xvcvsxdsp vs0, v7
 ; CHECK-P9-NEXT:    lxv v6, 16(r4)
+; CHECK-P9-NEXT:    lxv v1, 32(r4)
+; CHECK-P9-NEXT:    lxv v0, 48(r4)
+; CHECK-P9-NEXT:    xvcvsxdsp vs0, v7
+; CHECK-P9-NEXT:    lxv v5, 64(r4)
+; CHECK-P9-NEXT:    lxv v4, 80(r4)
+; CHECK-P9-NEXT:    lxv v3, 96(r4)
+; CHECK-P9-NEXT:    lxv v2, 112(r4)
 ; CHECK-P9-NEXT:    xxsldwi v7, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v6
-; CHECK-P9-NEXT:    lxv v1, 32(r4)
 ; CHECK-P9-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v1
-; CHECK-P9-NEXT:    lxv v0, 48(r4)
 ; CHECK-P9-NEXT:    vpkudum v1, v6, v7
+; CHECK-P9-NEXT:    stxv v1, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v0
-; CHECK-P9-NEXT:    lxv v5, 64(r4)
-; CHECK-P9-NEXT:    stxv v1, 0(r3)
 ; CHECK-P9-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v5
-; CHECK-P9-NEXT:    lxv v4, 80(r4)
 ; CHECK-P9-NEXT:    vpkudum v0, v0, v6
 ; CHECK-P9-NEXT:    stxv v0, 16(r3)
 ; CHECK-P9-NEXT:    xxsldwi v5, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v4
-; CHECK-P9-NEXT:    lxv v3, 96(r4)
 ; CHECK-P9-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-P9-NEXT:    xvcvsxdsp vs0, v3
-; CHECK-P9-NEXT:    lxv v2, 112(r4)
 ; CHECK-P9-NEXT:    vpkudum v4, v4, v5
 ; CHECK-P9-NEXT:    stxv v4, 32(r3)
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
@@ -556,30 +556,30 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result,
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    lxv v7, 16(r4)
-; CHECK-BE-NEXT:    xvcvsxdsp vs0, v7
 ; CHECK-BE-NEXT:    lxv v6, 0(r4)
+; CHECK-BE-NEXT:    lxv v1, 48(r4)
+; CHECK-BE-NEXT:    lxv v0, 32(r4)
+; CHECK-BE-NEXT:    xvcvsxdsp vs0, v7
+; CHECK-BE-NEXT:    lxv v5, 80(r4)
+; CHECK-BE-NEXT:    lxv v4, 64(r4)
+; CHECK-BE-NEXT:    lxv v3, 112(r4)
+; CHECK-BE-NEXT:    lxv v2, 96(r4)
 ; CHECK-BE-NEXT:    xxsldwi v7, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v6
-; CHECK-BE-NEXT:    lxv v1, 48(r4)
 ; CHECK-BE-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v1
-; CHECK-BE-NEXT:    lxv v0, 32(r4)
 ; CHECK-BE-NEXT:    vpkudum v1, v6, v7
+; CHECK-BE-NEXT:    stxv v1, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi v6, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v0
-; CHECK-BE-NEXT:    lxv v5, 80(r4)
-; CHECK-BE-NEXT:    stxv v1, 0(r3)
 ; CHECK-BE-NEXT:    xxsldwi v0, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v5
-; CHECK-BE-NEXT:    lxv v4, 64(r4)
 ; CHECK-BE-NEXT:    vpkudum v0, v0, v6
 ; CHECK-BE-NEXT:    stxv v0, 16(r3)
 ; CHECK-BE-NEXT:    xxsldwi v5, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v4
-; CHECK-BE-NEXT:    lxv v3, 112(r4)
 ; CHECK-BE-NEXT:    xxsldwi v4, vs0, vs0, 3
 ; CHECK-BE-NEXT:    xvcvsxdsp vs0, v3
-; CHECK-BE-NEXT:    lxv v2, 96(r4)
 ; CHECK-BE-NEXT:    vpkudum v4, v4, v5
 ; CHECK-BE-NEXT:    stxv v4, 32(r3)
 ; CHECK-BE-NEXT:    xxsldwi v3, vs0, vs0, 3
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
index f2cb9f5f45fbb..1962ffa653112 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp32_elts.ll
@@ -40,9 +40,9 @@ define i64 @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    li r3, 1
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    vextubrx r3, r3, v2
 ; CHECK-P9-NEXT:    clrlwi r3, r3, 24
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
 ; CHECK-P9-NEXT:    mtfprwz f0, r3
 ; CHECK-P9-NEXT:    xscvuxdsp f0, f0
@@ -93,9 +93,9 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI1_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r3
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-P9-NEXT:    blr
@@ -104,9 +104,9 @@ define <4 x float> @test4elt(i32 %a.coerce) local_unnamed_addr #1 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI1_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI1_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxwsp v2, v2
 ; CHECK-BE-NEXT:    blr
@@ -140,9 +140,9 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, i64 %a.co
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_1@toc@l
 ; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
@@ -158,9 +158,9 @@ define void @test8elt(<8 x float>* noalias nocapture sret %agg.result, i64 %a.co
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1@toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
@@ -214,9 +214,9 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1@toc@l
 ; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
@@ -243,9 +243,9 @@ define void @test16elt(<16 x float>* noalias nocapture sret %agg.result, <16 x i
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1@toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
@@ -305,9 +305,9 @@ define i64 @test2elt_signed(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9-NEXT:    mtfprwa f0, r3
 ; CHECK-P9-NEXT:    li r3, 1
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
-; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    vextubrx r3, r3, v2
 ; CHECK-P9-NEXT:    extsb r3, r3
+; CHECK-P9-NEXT:    xscvdpspn vs0, f0
 ; CHECK-P9-NEXT:    xxsldwi v3, vs0, vs0, 3
 ; CHECK-P9-NEXT:    mtfprwa f0, r3
 ; CHECK-P9-NEXT:    xscvsxdsp f0, f0
@@ -432,13 +432,13 @@ define void @test8elt_signed(<8 x float>* noalias nocapture sret %agg.result, i6
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v4, 0, r4
-; CHECK-BE-NEXT:    xxlxor v3, v3, v3
-; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1@toc@ha
-; CHECK-BE-NEXT:    vextsb2w v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1@toc@l
+; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    vextsb2w v3, v3
 ; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
@@ -531,12 +531,12 @@ define void @test16elt_signed(<16 x float>* noalias nocapture sret %agg.result,
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_1@toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    vextsb2w v3, v3
 ; CHECK-BE-NEXT:    xvcvsxwsp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
diff --git a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
index 268fc9b7d4cc8..c68fa812ffe54 100644
--- a/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
+++ b/llvm/test/CodeGen/PowerPC/vec_conv_i8_to_fp64_elts.ll
@@ -25,9 +25,9 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r3
 ; CHECK-P9-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r3, r3, .LCPI0_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r3
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
 ; CHECK-P9-NEXT:    xvcvuxddp v2, v2
 ; CHECK-P9-NEXT:    blr
@@ -36,9 +36,9 @@ define <2 x double> @test2elt(i16 %a.coerce) local_unnamed_addr #0 {
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r3
 ; CHECK-BE-NEXT:    addis r3, r2, .LCPI0_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r3, r3, .LCPI0_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r3
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    vperm v2, v2, v4, v3
 ; CHECK-BE-NEXT:    xvcvuxddp v2, v2
 ; CHECK-BE-NEXT:    blr
@@ -74,9 +74,9 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i32 %a.c
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrws v2, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_1@toc@l
 ; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
@@ -92,9 +92,9 @@ define void @test4elt(<4 x double>* noalias nocapture sret %agg.result, i32 %a.c
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_1@toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
@@ -154,9 +154,9 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.c
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    mtvsrd v2, r4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_1@toc@l
 ; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
@@ -184,9 +184,9 @@ define void @test8elt(<8 x double>* noalias nocapture sret %agg.result, i64 %a.c
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_1@toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
@@ -288,9 +288,9 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x
 ; CHECK-P9-LABEL: test16elt:
 ; CHECK-P9:       # %bb.0: # %entry
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0@toc@l
 ; CHECK-P9-NEXT:    lxvx v3, 0, r4
-; CHECK-P9-NEXT:    xxlxor v4, v4, v4
 ; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
 ; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1@toc@l
 ; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
@@ -341,9 +341,9 @@ define void @test16elt(<16 x double>* noalias nocapture sret %agg.result, <16 x
 ; CHECK-BE-LABEL: test16elt:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1@toc@l
 ; CHECK-BE-NEXT:    vperm v3, v2, v4, v3
@@ -492,13 +492,13 @@ define void @test4elt_signed(<4 x double>* noalias nocapture sret %agg.result, i
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrws v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v4, 0, r4
-; CHECK-BE-NEXT:    xxlxor v3, v3, v3
-; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_1@toc@ha
-; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_1@toc@l
+; CHECK-BE-NEXT:    vperm v3, v3, v2, v4
+; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
@@ -600,13 +600,13 @@ define void @test8elt_signed(<8 x double>* noalias nocapture sret %agg.result, i
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    mtvsrd v2, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v4, v4, v4
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
-; CHECK-BE-NEXT:    xxlxor v4, v4, v4
-; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_1@toc@ha
-; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI6_1@toc@l
+; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
+; CHECK-BE-NEXT:    vextsb2d v3, v3
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
 ; CHECK-BE-NEXT:    lxvx v3, 0, r4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI6_2@toc@ha
@@ -787,12 +787,12 @@ define void @test16elt_signed(<16 x double>* noalias nocapture sret %agg.result,
 ; CHECK-BE-LABEL: test16elt_signed:
 ; CHECK-BE:       # %bb.0: # %entry
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_0@toc@ha
+; CHECK-BE-NEXT:    xxlxor v3, v3, v3
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_0@toc@l
 ; CHECK-BE-NEXT:    lxvx v4, 0, r4
-; CHECK-BE-NEXT:    xxlxor v3, v3, v3
-; CHECK-BE-NEXT:    vperm v4, v3, v2, v4
 ; CHECK-BE-NEXT:    addis r4, r2, .LCPI7_1@toc@ha
 ; CHECK-BE-NEXT:    addi r4, r4, .LCPI7_1@toc@l
+; CHECK-BE-NEXT:    vperm v4, v3, v2, v4
 ; CHECK-BE-NEXT:    vextsb2d v4, v4
 ; CHECK-BE-NEXT:    xvcvsxddp vs0, v4
 ; CHECK-BE-NEXT:    lxvx v4, 0, r4
diff --git a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
index 29955dc17f674..cefa662c0eae5 100644
--- a/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
+++ b/llvm/test/CodeGen/PowerPC/vector-constrained-fp-intrinsics.ll
@@ -305,8 +305,8 @@ define <2 x double> @constrained_vector_frem_v2f64() #0 {
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI6_2@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI6_2@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfs 1, .LCPI6_2@toc@l(3)
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
@@ -390,24 +390,24 @@ define <3 x float> @constrained_vector_frem_v3f32() #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI7_2@toc@ha
 ; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI7_2@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfs 1, .LCPI7_2@toc@l(3)
 ; PC64LE9-NEXT:    bl fmodf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI7_3@toc@ha
 ; PC64LE9-NEXT:    fmr 29, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI7_3@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfs 1, .LCPI7_3@toc@l(3)
 ; PC64LE9-NEXT:    bl fmodf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI7_4@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI7_4@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 29
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    addis 3, 2, .LCPI7_4@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI7_4@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
@@ -478,27 +478,27 @@ define <3 x double> @constrained_vector_frem_v3f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI8_0@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 1, .LCPI8_0@toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI8_1@toc@ha
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 31, .LCPI8_1@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI8_2@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI8_2@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfs 1, .LCPI8_2@toc@l(3)
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI8_3@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE9-NEXT:    lfs 1, .LCPI8_3@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
@@ -580,34 +580,34 @@ define <4 x double> @constrained_vector_frem_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI9_0@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 1, .LCPI9_0@toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI9_1@toc@ha
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 31, .LCPI9_1@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI9_2@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI9_2@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfs 1, .LCPI9_2@toc@l(3)
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI9_3@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE9-NEXT:    lfs 1, .LCPI9_3@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI9_4@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI9_4@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfs 1, .LCPI9_4@toc@l(3)
 ; PC64LE9-NEXT:    bl fmod
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
@@ -935,23 +935,23 @@ define <3 x float> @constrained_vector_fadd_v3f32() #0 {
 ; PC64LE9-LABEL: constrained_vector_fadd_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI17_0@toc@ha
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
 ; PC64LE9-NEXT:    lfs 0, .LCPI17_0@toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI17_1@toc@ha
 ; PC64LE9-NEXT:    lfs 2, .LCPI17_1@toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI17_2@toc@ha
-; PC64LE9-NEXT:    xsaddsp 2, 0, 2
 ; PC64LE9-NEXT:    lfs 3, .LCPI17_2@toc@l(3)
-; PC64LE9-NEXT:    xxlxor 1, 1, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI17_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI17_3@toc@l
 ; PC64LE9-NEXT:    xsaddsp 1, 0, 1
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    xsaddsp 2, 0, 2
 ; PC64LE9-NEXT:    xsaddsp 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 2
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    addis 3, 2, .LCPI17_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI17_3@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
@@ -990,9 +990,9 @@ define <3 x double> @constrained_vector_fadd_v3f64() #0 {
 ; PC64LE9-LABEL: constrained_vector_fadd_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI18_0@toc@ha
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
 ; PC64LE9-NEXT:    lfd 0, .LCPI18_0@toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI18_1@toc@ha
-; PC64LE9-NEXT:    xxlxor 1, 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI18_1@toc@l
 ; PC64LE9-NEXT:    xsadddp 3, 0, 1
 ; PC64LE9-NEXT:    lxvx 0, 0, 3
@@ -1147,23 +1147,23 @@ define <3 x float> @constrained_vector_fsub_v3f32() #0 {
 ; PC64LE9-LABEL: constrained_vector_fsub_v3f32:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI22_0@toc@ha
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
 ; PC64LE9-NEXT:    lfs 0, .LCPI22_0@toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI22_1@toc@ha
 ; PC64LE9-NEXT:    lfs 2, .LCPI22_1@toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI22_2@toc@ha
-; PC64LE9-NEXT:    xssubsp 2, 0, 2
 ; PC64LE9-NEXT:    lfs 3, .LCPI22_2@toc@l(3)
-; PC64LE9-NEXT:    xxlxor 1, 1, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI22_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI22_3@toc@l
 ; PC64LE9-NEXT:    xssubsp 1, 0, 1
+; PC64LE9-NEXT:    lxvx 36, 0, 3
+; PC64LE9-NEXT:    xssubsp 2, 0, 2
 ; PC64LE9-NEXT:    xssubsp 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 0
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 2
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
-; PC64LE9-NEXT:    addis 3, 2, .LCPI22_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI22_3@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
@@ -1202,9 +1202,9 @@ define <3 x double> @constrained_vector_fsub_v3f64() #0 {
 ; PC64LE9-LABEL: constrained_vector_fsub_v3f64:
 ; PC64LE9:       # %bb.0: # %entry
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI23_0@toc@ha
+; PC64LE9-NEXT:    xxlxor 1, 1, 1
 ; PC64LE9-NEXT:    lfd 0, .LCPI23_0@toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI23_1@toc@ha
-; PC64LE9-NEXT:    xxlxor 1, 1, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI23_1@toc@l
 ; PC64LE9-NEXT:    xssubdp 3, 0, 1
 ; PC64LE9-NEXT:    lxvx 0, 0, 3
@@ -1534,8 +1534,8 @@ define <2 x double> @constrained_vector_pow_v2f64() #0 {
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI31_2@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI31_2@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfd 1, .LCPI31_2@toc@l(3)
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
@@ -1619,24 +1619,24 @@ define <3 x float> @constrained_vector_pow_v3f32() #0 {
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI32_2@toc@ha
 ; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI32_2@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfs 1, .LCPI32_2@toc@l(3)
 ; PC64LE9-NEXT:    bl powf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI32_3@toc@ha
 ; PC64LE9-NEXT:    fmr 29, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI32_3@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfs 1, .LCPI32_3@toc@l(3)
 ; PC64LE9-NEXT:    bl powf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI32_4@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI32_4@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 29
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    addis 3, 2, .LCPI32_4@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI32_4@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
@@ -1707,27 +1707,27 @@ define <3 x double> @constrained_vector_pow_v3f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI33_0@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 1, .LCPI33_0@toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI33_1@toc@ha
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 31, .LCPI33_1@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI33_2@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI33_2@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfd 1, .LCPI33_2@toc@l(3)
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI33_3@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE9-NEXT:    lfd 1, .LCPI33_3@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
@@ -1809,34 +1809,34 @@ define <4 x double> @constrained_vector_pow_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -80(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI34_0@toc@ha
+; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
+; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    lfd 1, .LCPI34_0@toc@l(3)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI34_1@toc@ha
-; PC64LE9-NEXT:    stfd 31, 72(1) # 8-byte Folded Spill
 ; PC64LE9-NEXT:    lfs 31, .LCPI34_1@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
-; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI34_2@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI34_2@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfd 1, .LCPI34_2@toc@l(3)
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI34_3@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE9-NEXT:    lfd 1, .LCPI34_3@toc@l(3)
-; PC64LE9-NEXT:    fmr 2, 31
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI34_4@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI34_4@toc@l(3)
 ; PC64LE9-NEXT:    fmr 2, 31
+; PC64LE9-NEXT:    lfd 1, .LCPI34_4@toc@l(3)
 ; PC64LE9-NEXT:    bl pow
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
@@ -1882,8 +1882,8 @@ define <1 x float> @constrained_vector_powi_v1f32() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -32(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI35_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI35_0@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    lfs 1, .LCPI35_0@toc@l(3)
 ; PC64LE9-NEXT:    bl __powisf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addi 1, 1, 32
@@ -1933,15 +1933,15 @@ define <2 x double> @constrained_vector_powi_v2f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI36_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI36_0@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    lfd 1, .LCPI36_0@toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI36_1@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI36_1@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    lfd 1, .LCPI36_1@toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
@@ -2013,30 +2013,30 @@ define <3 x float> @constrained_vector_powi_v3f32() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -48(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI37_0@toc@ha
-; PC64LE9-NEXT:    lfs 1, .LCPI37_0@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    lfs 1, .LCPI37_0@toc@l(3)
 ; PC64LE9-NEXT:    bl __powisf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI37_1@toc@ha
 ; PC64LE9-NEXT:    fmr 31, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI37_1@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    lfs 1, .LCPI37_1@toc@l(3)
 ; PC64LE9-NEXT:    bl __powisf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI37_2@toc@ha
 ; PC64LE9-NEXT:    fmr 30, 1
-; PC64LE9-NEXT:    lfs 1, .LCPI37_2@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    lfs 1, .LCPI37_2@toc@l(3)
 ; PC64LE9-NEXT:    bl __powisf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI37_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI37_3@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI37_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI37_3@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
@@ -2102,24 +2102,24 @@ define <3 x double> @constrained_vector_powi_v3f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI38_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI38_0@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI38_0@toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI38_1@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfs 1, .LCPI38_1@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    lfs 1, .LCPI38_1@toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI38_2@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    li 4, 3
 ; PC64LE9-NEXT:    xxmrghd 63, 0, 1
 ; PC64LE9-NEXT:    lfd 1, .LCPI38_2@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    fmr 3, 1
@@ -2196,31 +2196,31 @@ define <4 x double> @constrained_vector_powi_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI39_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI39_0@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI39_0@toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI39_1@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI39_1@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    lfd 1, .LCPI39_1@toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI39_2@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
+; PC64LE9-NEXT:    li 4, 3
 ; PC64LE9-NEXT:    xxmrghd 63, 1, 0
 ; PC64LE9-NEXT:    lfd 1, .LCPI39_2@toc@l(3)
-; PC64LE9-NEXT:    li 4, 3
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI39_3@toc@ha
 ; PC64LE9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
 ; PC64LE9-NEXT:    stxv 1, 32(1) # 16-byte Folded Spill
-; PC64LE9-NEXT:    lfd 1, .LCPI39_3@toc@l(3)
 ; PC64LE9-NEXT:    li 4, 3
+; PC64LE9-NEXT:    lfd 1, .LCPI39_3@toc@l(3)
 ; PC64LE9-NEXT:    bl __powidf2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    lxv 0, 32(1) # 16-byte Folded Reload
@@ -2396,12 +2396,12 @@ define <3 x float> @constrained_vector_sin_v3f32() #0 {
 ; PC64LE9-NEXT:    bl sinf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI42_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI42_3@toc@l
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI42_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI42_3@toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
@@ -2464,8 +2464,8 @@ define <3 x double> @constrained_vector_sin_v3f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI43_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI43_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI43_0@toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI43_1@toc@ha
@@ -2550,8 +2550,8 @@ define <4 x double> @constrained_vector_sin_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI44_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI44_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI44_0@toc@l(3)
 ; PC64LE9-NEXT:    bl sin
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI44_1@toc@ha
@@ -2745,12 +2745,12 @@ define <3 x float> @constrained_vector_cos_v3f32() #0 {
 ; PC64LE9-NEXT:    bl cosf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI47_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI47_3@toc@l
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI47_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI47_3@toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
@@ -2813,8 +2813,8 @@ define <3 x double> @constrained_vector_cos_v3f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI48_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI48_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI48_0@toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI48_1@toc@ha
@@ -2899,8 +2899,8 @@ define <4 x double> @constrained_vector_cos_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI49_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI49_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI49_0@toc@l(3)
 ; PC64LE9-NEXT:    bl cos
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI49_1@toc@ha
@@ -3094,12 +3094,12 @@ define <3 x float> @constrained_vector_exp_v3f32() #0 {
 ; PC64LE9-NEXT:    bl expf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI52_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI52_3@toc@l
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI52_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI52_3@toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
@@ -3162,8 +3162,8 @@ define <3 x double> @constrained_vector_exp_v3f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI53_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI53_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI53_0@toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI53_1@toc@ha
@@ -3248,8 +3248,8 @@ define <4 x double> @constrained_vector_exp_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI54_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI54_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI54_0@toc@l(3)
 ; PC64LE9-NEXT:    bl exp
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI54_1@toc@ha
@@ -3443,12 +3443,12 @@ define <3 x float> @constrained_vector_exp2_v3f32() #0 {
 ; PC64LE9-NEXT:    bl exp2f
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI57_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI57_3@toc@l
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI57_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI57_3@toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
@@ -3511,8 +3511,8 @@ define <3 x double> @constrained_vector_exp2_v3f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI58_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI58_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI58_0@toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI58_1@toc@ha
@@ -3597,8 +3597,8 @@ define <4 x double> @constrained_vector_exp2_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI59_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI59_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI59_0@toc@l(3)
 ; PC64LE9-NEXT:    bl exp2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI59_1@toc@ha
@@ -3792,12 +3792,12 @@ define <3 x float> @constrained_vector_log_v3f32() #0 {
 ; PC64LE9-NEXT:    bl logf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI62_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI62_3@toc@l
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI62_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI62_3@toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
@@ -3860,8 +3860,8 @@ define <3 x double> @constrained_vector_log_v3f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI63_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI63_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI63_0@toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI63_1@toc@ha
@@ -3946,8 +3946,8 @@ define <4 x double> @constrained_vector_log_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI64_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI64_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI64_0@toc@l(3)
 ; PC64LE9-NEXT:    bl log
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI64_1@toc@ha
@@ -4141,12 +4141,12 @@ define <3 x float> @constrained_vector_log10_v3f32() #0 {
 ; PC64LE9-NEXT:    bl log10f
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI67_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI67_3@toc@l
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI67_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI67_3@toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
@@ -4209,8 +4209,8 @@ define <3 x double> @constrained_vector_log10_v3f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI68_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI68_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI68_0@toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI68_1@toc@ha
@@ -4295,8 +4295,8 @@ define <4 x double> @constrained_vector_log10_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI69_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI69_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI69_0@toc@l(3)
 ; PC64LE9-NEXT:    bl log10
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI69_1@toc@ha
@@ -4490,12 +4490,12 @@ define <3 x float> @constrained_vector_log2_v3f32() #0 {
 ; PC64LE9-NEXT:    bl log2f
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI72_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI72_3@toc@l
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI72_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI72_3@toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
@@ -4558,8 +4558,8 @@ define <3 x double> @constrained_vector_log2_v3f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI73_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI73_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI73_0@toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI73_1@toc@ha
@@ -4644,8 +4644,8 @@ define <4 x double> @constrained_vector_log2_v4f64() #0 {
 ; PC64LE9-NEXT:    std 0, 16(1)
 ; PC64LE9-NEXT:    stdu 1, -64(1)
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI74_0@toc@ha
-; PC64LE9-NEXT:    lfd 1, .LCPI74_0@toc@l(3)
 ; PC64LE9-NEXT:    stxv 63, 48(1) # 16-byte Folded Spill
+; PC64LE9-NEXT:    lfd 1, .LCPI74_0@toc@l(3)
 ; PC64LE9-NEXT:    bl log2
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI74_1@toc@ha
@@ -4983,12 +4983,12 @@ define <3 x float> @constrained_vector_nearbyint_v3f32() #0 {
 ; PC64LE9-NEXT:    bl nearbyintf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI82_3@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI82_3@toc@l
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 31
-; PC64LE9-NEXT:    addis 3, 2, .LCPI82_3@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI82_3@toc@l
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    lxvx 35, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 36, 0, 0, 3
@@ -5221,19 +5221,19 @@ define <3 x float> @constrained_vector_maxnum_v3f32() #0 {
 ; PC64LE9-NEXT:    bl fmaxf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI87_4@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI87_4@toc@l(3)
 ; PC64LE9-NEXT:    fmr 29, 1
 ; PC64LE9-NEXT:    fmr 1, 31
+; PC64LE9-NEXT:    lfs 2, .LCPI87_4@toc@l(3)
 ; PC64LE9-NEXT:    bl fmaxf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI87_5@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI87_5@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 29
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    addis 3, 2, .LCPI87_5@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI87_5@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
@@ -5294,11 +5294,11 @@ define <3 x double> @constrained_vector_max_v3f64() #0 {
 ; PC64LE9-NEXT:    bl fmax
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI88_2@toc@ha
+; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI88_2@toc@l
 ; PC64LE9-NEXT:    lxvx 0, 0, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI88_3@toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI88_3@toc@l
-; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    lxvx 1, 0, 3
 ; PC64LE9-NEXT:    xvmaxdp 2, 1, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
@@ -5508,19 +5508,19 @@ define <3 x float> @constrained_vector_minnum_v3f32() #0 {
 ; PC64LE9-NEXT:    bl fminf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI92_4@toc@ha
-; PC64LE9-NEXT:    lfs 2, .LCPI92_4@toc@l(3)
 ; PC64LE9-NEXT:    fmr 29, 1
 ; PC64LE9-NEXT:    fmr 1, 31
+; PC64LE9-NEXT:    lfs 2, .LCPI92_4@toc@l(3)
 ; PC64LE9-NEXT:    bl fminf
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    xscvdpspn 0, 1
+; PC64LE9-NEXT:    addis 3, 2, .LCPI92_5@toc@ha
+; PC64LE9-NEXT:    addi 3, 3, .LCPI92_5@toc@l
+; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    xxsldwi 34, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 29
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    xscvdpspn 0, 30
-; PC64LE9-NEXT:    addis 3, 2, .LCPI92_5@toc@ha
-; PC64LE9-NEXT:    addi 3, 3, .LCPI92_5@toc@l
-; PC64LE9-NEXT:    lxvx 36, 0, 3
 ; PC64LE9-NEXT:    vmrghw 2, 3, 2
 ; PC64LE9-NEXT:    xxsldwi 35, 0, 0, 3
 ; PC64LE9-NEXT:    vperm 2, 3, 2, 4
@@ -5581,11 +5581,11 @@ define <3 x double> @constrained_vector_min_v3f64() #0 {
 ; PC64LE9-NEXT:    bl fmin
 ; PC64LE9-NEXT:    nop
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI93_2@toc@ha
+; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI93_2@toc@l
 ; PC64LE9-NEXT:    lxvx 0, 0, 3
 ; PC64LE9-NEXT:    addis 3, 2, .LCPI93_3@toc@ha
 ; PC64LE9-NEXT:    addi 3, 3, .LCPI93_3@toc@l
-; PC64LE9-NEXT:    fmr 3, 1
 ; PC64LE9-NEXT:    lxvx 1, 0, 3
 ; PC64LE9-NEXT:    xvmindp 2, 1, 0
 ; PC64LE9-NEXT:    xxswapd 1, 2
diff --git a/llvm/test/CodeGen/X86/testb-je-fusion.ll b/llvm/test/CodeGen/X86/testb-je-fusion.ll
index e631d8993dc82..90e011e08d1fd 100644
--- a/llvm/test/CodeGen/X86/testb-je-fusion.ll
+++ b/llvm/test/CodeGen/X86/testb-je-fusion.ll
@@ -238,8 +238,8 @@ define i32 @macrofuse_alu_je(i32 %flags, i8* %p) nounwind {
 ; NOFUSION_MISCHEDPOSTRA-LABEL: macrofuse_alu_je:
 ; NOFUSION_MISCHEDPOSTRA:       # %bb.0: # %entry
 ; NOFUSION_MISCHEDPOSTRA-NEXT:    movl %edi, %eax
-; NOFUSION_MISCHEDPOSTRA-NEXT:    addl $-512, %eax # imm = 0xFE00
 ; NOFUSION_MISCHEDPOSTRA-NEXT:    movb $1, (%rsi)
+; NOFUSION_MISCHEDPOSTRA-NEXT:    addl $-512, %eax # imm = 0xFE00
 ; NOFUSION_MISCHEDPOSTRA-NEXT:    je .LBB2_2
 ; NOFUSION_MISCHEDPOSTRA-NEXT:  # %bb.1: # %if.then
 ; NOFUSION_MISCHEDPOSTRA-NEXT:    movl $1, %eax
@@ -249,8 +249,8 @@ define i32 @macrofuse_alu_je(i32 %flags, i8* %p) nounwind {
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-LABEL: macrofuse_alu_je:
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA:       # %bb.0: # %entry
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    movl %edi, %eax
-; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    addl $-512, %eax # imm = 0xFE00
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    movb $1, (%rsi)
+; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    addl $-512, %eax # imm = 0xFE00
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    je .LBB2_2
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:  # %bb.1: # %if.then
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    movl $1, %eax
@@ -340,8 +340,8 @@ define i32 @macrofuse_dec_je(i32 %flags, i8* %p) nounwind {
 ; NOFUSION_MISCHEDPOSTRA-LABEL: macrofuse_dec_je:
 ; NOFUSION_MISCHEDPOSTRA:       # %bb.0: # %entry
 ; NOFUSION_MISCHEDPOSTRA-NEXT:    movl %edi, %eax
-; NOFUSION_MISCHEDPOSTRA-NEXT:    decl %eax
 ; NOFUSION_MISCHEDPOSTRA-NEXT:    movb $1, (%rsi)
+; NOFUSION_MISCHEDPOSTRA-NEXT:    decl %eax
 ; NOFUSION_MISCHEDPOSTRA-NEXT:    je .LBB3_2
 ; NOFUSION_MISCHEDPOSTRA-NEXT:  # %bb.1: # %if.then
 ; NOFUSION_MISCHEDPOSTRA-NEXT:    movl $1, %eax
@@ -351,8 +351,8 @@ define i32 @macrofuse_dec_je(i32 %flags, i8* %p) nounwind {
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-LABEL: macrofuse_dec_je:
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA:       # %bb.0: # %entry
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    movl %edi, %eax
-; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    decl %eax
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    movb $1, (%rsi)
+; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    decl %eax
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    je .LBB3_2
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:  # %bb.1: # %if.then
 ; BRANCHFUSIONONLY_MISCHEDPOSTRA-NEXT:    movl $1, %eax
diff --git a/llvm/test/CodeGen/X86/topdepthreduce-postra.mir b/llvm/test/CodeGen/X86/topdepthreduce-postra.mir
new file mode 100644
index 0000000000000..7ca826d582b5c
--- /dev/null
+++ b/llvm/test/CodeGen/X86/topdepthreduce-postra.mir
@@ -0,0 +1,16 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=x86_64 -enable-post-misched -run-pass=postmisched -o - %s | FileCheck %s
+---
+# Check that postmisched's TopDepthReduce heuristic moves the DEC32r later
+# because of the dependency on eax
+name: test
+body: |
+  bb.0:
+    ; CHECK-LABEL: name: test
+    ; CHECK: $eax = MOV32rr killed $edi
+    ; CHECK: MOV8mi killed renamable $rsi, 1, $noreg, 0, $noreg, 1 :: (store 1)
+    ; CHECK: renamable $eax = DEC32r killed renamable $eax, implicit-def $eflags
+    $eax = MOV32rr $edi
+    renamable $eax = DEC32r killed renamable $eax, implicit-def $eflags
+    MOV8mi killed renamable $rsi, 1, $noreg, 0, $noreg, 1 :: (store 1)
+...

From bb160e769dbef25fe0aa130c71458a8f686ccf80 Mon Sep 17 00:00:00 2001
From: Cullen Rhodes <cullen.rhodes@arm.com>
Date: Fri, 29 May 2020 09:17:37 +0000
Subject: [PATCH 610/771] [Sema][AArch64] Add parsing support for
 arm_sve_vector_bits attribute

Summary:

This patch implements parsing support for the 'arm_sve_vector_bits' type
attribute, defined by the Arm C Language Extensions (ACLE, version 00bet5,
section 3.7.3) for SVE [1].

The purpose of this attribute is to define fixed-length (VLST) versions
of existing sizeless types (VLAT). For example:

    #if __ARM_FEATURE_SVE_BITS==512
    typedef svint32_t fixed_svint32_t __attribute__((arm_sve_vector_bits(512)));
    #endif

Creates a type 'fixed_svint32_t' that is a fixed-length version of
'svint32_t' that is normal-sized (rather than sizeless) and contains
exactly 512 bits. Unlike 'svint32_t', this type can be used in places
such as structs and arrays where sizeless types can't.

Implemented in this patch is the following:

  * Defined and tested attribute taking single argument.
  * Checks the argument is an integer constant expression.
  * Attribute can only be attached to a single SVE vector or predicate
    type, excluding tuple types such as svint32x4_t.
  * Added the `-msve-vector-bits=<bits>` flag. When specified the
    `__ARM_FEATURE_SVE_BITS__EXPERIMENTAL` macro is defined.
  * Added a language option to store the vector size specified by the
    `-msve-vector-bits=<bits>` flag. This is used to validate `N ==
    __ARM_FEATURE_SVE_BITS`, where N is the number of bits passed to the
    attribute and `__ARM_FEATURE_SVE_BITS` is the feature macro defined under
    the same flag.

The `__ARM_FEATURE_SVE_BITS` macro will be made non-experimental in the final
patch of the series.

[1] https://developer.arm.com/documentation/100987/latest

This is patch 1/4 of a patch series.

Reviewers: sdesmalen, rsandifo-arm, efriedma, ctetreau, cameron.mcinally, rengolin, aaron.ballman

Reviewed By: sdesmalen, aaron.ballman

Differential Revision: https://reviews.llvm.org/D83550
---
 clang/include/clang/AST/Type.h                |  5 ++
 clang/include/clang/Basic/Attr.td             |  6 ++
 clang/include/clang/Basic/AttrDocs.td         | 37 +++++++++
 .../clang/Basic/DiagnosticDriverKinds.td      |  3 +
 .../clang/Basic/DiagnosticSemaKinds.td        |  7 ++
 clang/include/clang/Basic/LangOptions.def     |  2 +
 clang/include/clang/Driver/Options.td         |  5 ++
 clang/lib/AST/Type.cpp                        | 24 ++++++
 clang/lib/AST/TypePrinter.cpp                 |  3 +
 clang/lib/Basic/Targets/AArch64.cpp           |  4 +
 clang/lib/Driver/ToolChains/Arch/AArch64.cpp  |  6 ++
 clang/lib/Driver/ToolChains/Clang.cpp         | 15 ++++
 clang/lib/Frontend/CompilerInvocation.cpp     |  3 +
 clang/lib/Sema/SemaType.cpp                   | 79 +++++++++++++++++--
 clang/test/Driver/aarch64-sve-vector-bits.c   | 63 +++++++++++++++
 .../Preprocessor/aarch64-target-features.c    | 20 +++++
 clang/test/Sema/attr-arm-sve-vector-bits.c    | 62 +++++++++++++++
 17 files changed, 336 insertions(+), 8 deletions(-)
 create mode 100644 clang/test/Driver/aarch64-sve-vector-bits.c
 create mode 100644 clang/test/Sema/attr-arm-sve-vector-bits.c

diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index 0fc50e0e799ff..131658fbc8c45 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -1925,6 +1925,11 @@ class alignas(8) Type : public ExtQualsTypeCommonBase {
   bool isSizelessType() const;
   bool isSizelessBuiltinType() const;
 
+  /// Determines if this is a sizeless type supported by the
+  /// 'arm_sve_vector_bits' type attribute, which can be applied to a single
+  /// SVE vector or predicate, excluding tuple types such as svint32x4_t.
+  bool isVLSTBuiltinType() const;
+
   /// Types are partitioned into 3 broad categories (C99 6.2.5p1):
   /// object types, function types, and incomplete types.
 
diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td
index bc4a380545afe..8e0c57bd2efd7 100644
--- a/clang/include/clang/Basic/Attr.td
+++ b/clang/include/clang/Basic/Attr.td
@@ -1532,6 +1532,12 @@ def NeonVectorType : TypeAttr {
   let ASTNode = 0;
 }
 
+def ArmSveVectorBits : TypeAttr {
+  let Spellings = [GNU<"arm_sve_vector_bits">];
+  let Args = [IntArgument<"NumBits">];
+  let Documentation = [ArmSveVectorBitsDocs];
+}
+
 def ArmMveStrictPolymorphism : TypeAttr, TargetSpecificAttr<TargetARM> {
   let Spellings = [Clang<"__clang_arm_mve_strict_polymorphism">];
   let Documentation = [ArmMveStrictPolymorphismDocs];
diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td
index 3cba3a3d96f96..c835e6da04d59 100644
--- a/clang/include/clang/Basic/AttrDocs.td
+++ b/clang/include/clang/Basic/AttrDocs.td
@@ -4855,6 +4855,43 @@ close the handle. It is also assumed to require an open handle to work with.
   }];
 }
 
+def ArmSveVectorBitsDocs : Documentation {
+  let Category = DocCatType;
+  let Content = [{
+The ``arm_sve_vector_bits(N)`` attribute is defined by the Arm C Language
+Extensions (ACLE) for SVE. It is used to define fixed-length (VLST) variants of
+sizeless types (VLAT).
+
+For example:
+
+.. code-block:: c
+
+  #include <arm_sve.h>
+
+  #if __ARM_FEATURE_SVE_BITS==512
+  typedef svint32_t fixed_svint32_t __attribute__((arm_sve_vector_bits(512)));
+  #endif
+
+Creates a type ``fixed_svint32_t`` that is a fixed-length variant of
+``svint32_t`` that contains exactly 512-bits. Unlike ``svint32_t``, this type
+can be used in globals, structs, unions, and arrays, all of which are
+unsupported for sizeless types.
+
+The attribute can be attached to a single SVE vector (such as ``svint32_t``) or
+to the SVE predicate type ``svbool_t``, this excludes tuple types such as
+``svint32x4_t``. The behavior of the attribute is undefined unless
+``N==__ARM_FEATURE_SVE_BITS``, the implementation defined feature macro that is
+enabled under the ``-msve-vector-bits`` flag.
+
+NOTE: This feature is currently WIP, the ``-msve-vector-bits=`` flag defines
+the ``__ARM_FEATURE_SVE_BITS_EXPERIMENTAL`` macro. This feature is complete
+when experimental is dropped.
+
+For more information See `Arm C Language Extensions for SVE
+<https://developer.arm.com/documentation/100987/latest>`_ for more information.
+}];
+}
+
 def ArmMveStrictPolymorphismDocs : Documentation {
     let Category = DocCatType;
     let Content = [{
diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td
index 558639ecad6aa..3c266846c6898 100644
--- a/clang/include/clang/Basic/DiagnosticDriverKinds.td
+++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td
@@ -511,4 +511,7 @@ def warn_drv_libstdcxx_not_found : Warning<
 def err_drv_cannot_mix_options : Error<"cannot specify '%1' along with '%0'">;
 
 def err_drv_invalid_object_mode : Error<"OBJECT_MODE setting %0 is not recognized and is not a valid setting.">;
+
+def err_drv_invalid_sve_vector_bits : Error<
+  "'-msve-vector-bits' is not supported without SVE enabled">;
 }
diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td
index aa4de2812312d..d1de4e0488d3f 100644
--- a/clang/include/clang/Basic/DiagnosticSemaKinds.td
+++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td
@@ -2810,6 +2810,13 @@ def err_attribute_invalid_vector_type : Error<"invalid vector element type %0">;
 def err_attribute_invalid_matrix_type : Error<"invalid matrix element type %0">;
 def err_attribute_bad_neon_vector_size : Error<
   "Neon vector size must be 64 or 128 bits">;
+def err_attribute_invalid_sve_type : Error<
+  "%0 attribute applied to non-SVE type %1">;
+def err_attribute_bad_sve_vector_size : Error<
+  "invalid SVE vector size '%0', must match value set by "
+  "'-msve-vector-bits' ('%1')">;
+def err_attribute_arm_feature_sve_bits_unsupported : Error<
+  "%0 is not supported when '-msve-vector-bits=<bits>' is not specified">;
 def err_attribute_requires_positive_integer : Error<
   "%0 attribute requires a %select{positive|non-negative}1 "
   "integral compile time constant expression">;
diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def
index 70f68d664bb71..c458e7014d62d 100644
--- a/clang/include/clang/Basic/LangOptions.def
+++ b/clang/include/clang/Basic/LangOptions.def
@@ -382,6 +382,8 @@ LANGOPT(SpeculativeLoadHardening, 1, 0, "Speculative load hardening enabled")
 LANGOPT(RelativeCXXABIVTables, 1, 0,
         "Use an ABI-incompatible v-table layout that uses relative references")
 
+LANGOPT(ArmSveVectorBits, 32, 0, "SVE vector size in bits")
+
 #undef LANGOPT
 #undef COMPATIBLE_LANGOPT
 #undef BENIGN_LANGOPT
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index f4556c15d744c..85ff0e6857d74 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2343,6 +2343,11 @@ foreach i = {8-15,18} in
   def fcall_saved_x#i : Flag<["-"], "fcall-saved-x"#i>, Group<m_aarch64_Features_Group>,
     HelpText<"Make the x"#i#" register call-saved (AArch64 only)">;
 
+def msve_vector_bits_EQ : Joined<["-"], "msve-vector-bits=">,
+  Group<m_aarch64_Features_Group>, Flags<[DriverOption,CC1Option]>,
+  HelpText<"Set the size of fixed-length SVE vectors in bits.">,
+  Values<"128,256,512,1024,2048">;
+
 def msign_return_address_EQ : Joined<["-"], "msign-return-address=">,
   Flags<[CC1Option]>, Group<m_Group>, Values<"none,all,non-leaf">,
   HelpText<"Select return address signing scope">;
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index 10a6a26101302..068c355d48bc6 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2294,6 +2294,30 @@ bool Type::isSizelessBuiltinType() const {
 
 bool Type::isSizelessType() const { return isSizelessBuiltinType(); }
 
+bool Type::isVLSTBuiltinType() const {
+  if (const BuiltinType *BT = getAs<BuiltinType>()) {
+    switch (BT->getKind()) {
+    case BuiltinType::SveInt8:
+    case BuiltinType::SveInt16:
+    case BuiltinType::SveInt32:
+    case BuiltinType::SveInt64:
+    case BuiltinType::SveUint8:
+    case BuiltinType::SveUint16:
+    case BuiltinType::SveUint32:
+    case BuiltinType::SveUint64:
+    case BuiltinType::SveFloat16:
+    case BuiltinType::SveFloat32:
+    case BuiltinType::SveFloat64:
+    case BuiltinType::SveBFloat16:
+    case BuiltinType::SveBool:
+      return true;
+    default:
+      return false;
+    }
+  }
+  return false;
+}
+
 bool QualType::isPODType(const ASTContext &Context) const {
   // C++11 has a more relaxed definition of POD.
   if (Context.getLangOpts().CPlusPlus11)
diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp
index 6f6932e652146..eff8e99764350 100644
--- a/clang/lib/AST/TypePrinter.cpp
+++ b/clang/lib/AST/TypePrinter.cpp
@@ -1632,6 +1632,9 @@ void TypePrinter::printAttributedAfter(const AttributedType *T,
   case attr::ArmMveStrictPolymorphism:
     OS << "__clang_arm_mve_strict_polymorphism";
     break;
+  case attr::ArmSveVectorBits:
+    OS << "arm_sve_vector_bits";
+    break;
   }
   OS << "))";
 }
diff --git a/clang/lib/Basic/Targets/AArch64.cpp b/clang/lib/Basic/Targets/AArch64.cpp
index 25c02cb888c1b..6fd97d4e57869 100644
--- a/clang/lib/Basic/Targets/AArch64.cpp
+++ b/clang/lib/Basic/Targets/AArch64.cpp
@@ -376,6 +376,10 @@ void AArch64TargetInfo::getTargetDefines(const LangOptions &Opts,
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_2");
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_4");
   Builder.defineMacro("__GCC_HAVE_SYNC_COMPARE_AND_SWAP_8");
+
+  if (Opts.ArmSveVectorBits)
+    Builder.defineMacro("__ARM_FEATURE_SVE_BITS_EXPERIMENTAL",
+                        Twine(Opts.ArmSveVectorBits));
 }
 
 ArrayRef<Builtin::Info> AArch64TargetInfo::getTargetBuiltins() const {
diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
index 487c50dfc4663..428b72a489041 100644
--- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp
@@ -369,6 +369,12 @@ void aarch64::getAArch64TargetFeatures(const Driver &D,
   if (V8_6Pos != std::end(Features))
     V8_6Pos = Features.insert(std::next(V8_6Pos), {"+i8mm", "+bf16"});
 
+  bool HasSve = llvm::is_contained(Features, "+sve");
+  // -msve_vector_bits=<bits> flag is valid only if SVE is enabled.
+  if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ))
+    if (!HasSve)
+      D.Diag(diag::err_drv_invalid_sve_vector_bits);
+
   if (Arg *A = Args.getLastArg(options::OPT_mno_unaligned_access,
                                options::OPT_munaligned_access))
     if (A->getOption().matches(options::OPT_mno_unaligned_access))
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index 9d6333bb5f1dd..91f1338972715 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -1715,6 +1715,21 @@ void Clang::AddAArch64TargetArgs(const ArgList &Args,
     if (IndirectBranches)
       CmdArgs.push_back("-mbranch-target-enforce");
   }
+
+  // Handle -msve_vector_bits=<bits>
+  if (Arg *A = Args.getLastArg(options::OPT_msve_vector_bits_EQ)) {
+    StringRef Val = A->getValue();
+    const Driver &D = getToolChain().getDriver();
+    if (!Val.equals("128") && !Val.equals("256") && !Val.equals("512") &&
+        !Val.equals("1024") && !Val.equals("2048")) {
+      // Handle the unsupported values passed to msve-vector-bits.
+      D.Diag(diag::err_drv_unsupported_option_argument)
+          << A->getOption().getName() << Val;
+    } else if (A->getOption().matches(options::OPT_msve_vector_bits_EQ)) {
+      CmdArgs.push_back(
+          Args.MakeArgString(llvm::Twine("-msve-vector-bits=") + Val));
+    }
+  }
 }
 
 void Clang::AddMIPSTargetArgs(const ArgList &Args,
diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp
index 75d7cf5d26d3f..c34c2a18b048e 100644
--- a/clang/lib/Frontend/CompilerInvocation.cpp
+++ b/clang/lib/Frontend/CompilerInvocation.cpp
@@ -2997,6 +2997,9 @@ static void ParseLangArgs(LangOptions &Opts, ArgList &Args, InputKind IK,
   Opts.GNUAsm = !Args.hasArg(OPT_fno_gnu_inline_asm);
   Opts.Cmse = Args.hasArg(OPT_mcmse); // Armv8-M Security Extensions
 
+  Opts.ArmSveVectorBits =
+      getLastArgIntValue(Args, options::OPT_msve_vector_bits_EQ, 0, Diags);
+
   // __declspec is enabled by default for the PS4 by the driver, and also
   // enabled for Microsoft Extensions or Borland Extensions, here.
   //
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index b8f7f1a581590..629fdff5ccf97 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -7686,6 +7686,19 @@ static bool isPermittedNeonBaseType(QualType &Ty,
          BTy->getKind() == BuiltinType::BFloat16;
 }
 
+bool verifyValidIntegerConstantExpr(Sema &S, const ParsedAttr &Attr,
+                                    llvm::APSInt &Result) {
+  const auto *AttrExpr = Attr.getArgAsExpr(0);
+  if (AttrExpr->isTypeDependent() || AttrExpr->isValueDependent() ||
+      !AttrExpr->isIntegerConstantExpr(Result, S.Context)) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
+        << Attr << AANT_ArgumentIntegerConstant << AttrExpr->getSourceRange();
+    Attr.setInvalid();
+    return false;
+  }
+  return true;
+}
+
 /// HandleNeonVectorTypeAttr - The "neon_vector_type" and
 /// "neon_polyvector_type" attributes are used to create vector types that
 /// are mangled according to ARM's ABI.  Otherwise, these types are identical
@@ -7711,16 +7724,10 @@ static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
     return;
   }
   // The number of elements must be an ICE.
-  Expr *numEltsExpr = static_cast<Expr *>(Attr.getArgAsExpr(0));
   llvm::APSInt numEltsInt(32);
-  if (numEltsExpr->isTypeDependent() || numEltsExpr->isValueDependent() ||
-      !numEltsExpr->isIntegerConstantExpr(numEltsInt, S.Context)) {
-    S.Diag(Attr.getLoc(), diag::err_attribute_argument_type)
-        << Attr << AANT_ArgumentIntegerConstant
-        << numEltsExpr->getSourceRange();
-    Attr.setInvalid();
+  if (!verifyValidIntegerConstantExpr(S, Attr, numEltsInt))
     return;
-  }
+
   // Only certain element types are supported for Neon vectors.
   if (!isPermittedNeonBaseType(CurType, VecKind, S)) {
     S.Diag(Attr.getLoc(), diag::err_attribute_invalid_vector_type) << CurType;
@@ -7741,6 +7748,58 @@ static void HandleNeonVectorTypeAttr(QualType &CurType, const ParsedAttr &Attr,
   CurType = S.Context.getVectorType(CurType, numElts, VecKind);
 }
 
+/// HandleArmSveVectorBitsTypeAttr - The "arm_sve_vector_bits" attribute is
+/// used to create fixed-length versions of sizeless SVE types defined by
+/// the ACLE, such as svint32_t and svbool_t.
+static void HandleArmSveVectorBitsTypeAttr(QualType &CurType,
+                                           const ParsedAttr &Attr, Sema &S) {
+  // Target must have SVE.
+  if (!S.Context.getTargetInfo().hasFeature("sve")) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_unsupported) << Attr;
+    Attr.setInvalid();
+    return;
+  }
+
+  // Attribute is unsupported if '-msve-vector-bits=<bits>' isn't specified.
+  if (!S.getLangOpts().ArmSveVectorBits) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_arm_feature_sve_bits_unsupported)
+        << Attr;
+    Attr.setInvalid();
+    return;
+  }
+
+  // Check the attribute arguments.
+  if (Attr.getNumArgs() != 1) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_wrong_number_arguments)
+        << Attr << 1;
+    Attr.setInvalid();
+    return;
+  }
+
+  // The vector size must be an integer constant expression.
+  llvm::APSInt SveVectorSizeInBits(32);
+  if (!verifyValidIntegerConstantExpr(S, Attr, SveVectorSizeInBits))
+    return;
+
+  unsigned VecSize = static_cast<unsigned>(SveVectorSizeInBits.getZExtValue());
+
+  // The attribute vector size must match -msve-vector-bits.
+  if (VecSize != S.getLangOpts().ArmSveVectorBits) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_bad_sve_vector_size)
+        << VecSize << S.getLangOpts().ArmSveVectorBits;
+    Attr.setInvalid();
+    return;
+  }
+
+  // Attribute can only be attached to a single SVE vector or predicate type.
+  if (!CurType->isVLSTBuiltinType()) {
+    S.Diag(Attr.getLoc(), diag::err_attribute_invalid_sve_type)
+        << Attr << CurType;
+    Attr.setInvalid();
+    return;
+  }
+}
+
 static void HandleArmMveStrictPolymorphismAttr(TypeProcessingState &State,
                                                QualType &CurType,
                                                ParsedAttr &Attr) {
@@ -8004,6 +8063,10 @@ static void processTypeAttrs(TypeProcessingState &state, QualType &type,
                                VectorType::NeonPolyVector);
       attr.setUsedAsTypeAttr();
       break;
+    case ParsedAttr::AT_ArmSveVectorBits:
+      HandleArmSveVectorBitsTypeAttr(type, attr, state.getSema());
+      attr.setUsedAsTypeAttr();
+      break;
     case ParsedAttr::AT_ArmMveStrictPolymorphism: {
       HandleArmMveStrictPolymorphismAttr(state, type, attr);
       attr.setUsedAsTypeAttr();
diff --git a/clang/test/Driver/aarch64-sve-vector-bits.c b/clang/test/Driver/aarch64-sve-vector-bits.c
new file mode 100644
index 0000000000000..b7138d4a0772c
--- /dev/null
+++ b/clang/test/Driver/aarch64-sve-vector-bits.c
@@ -0,0 +1,63 @@
+// -----------------------------------------------------------------------------
+// Tests for the -msve-vector-bits flag
+// -----------------------------------------------------------------------------
+
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=128 2>&1 | FileCheck --check-prefix=CHECK-128 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=256 2>&1 | FileCheck --check-prefix=CHECK-256 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=512 2>&1 | FileCheck --check-prefix=CHECK-512 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=1024 2>&1 | FileCheck --check-prefix=CHECK-1024 %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=2048 2>&1 | FileCheck --check-prefix=CHECK-2048 %s
+
+// CHECK-128: "-msve-vector-bits=128"
+// CHECK-256: "-msve-vector-bits=256"
+// CHECK-512: "-msve-vector-bits=512"
+// CHECK-1024: "-msve-vector-bits=1024"
+// CHECK-2048: "-msve-vector-bits=2048"
+
+// Bail out if -msve-vector-bits is specified without SVE enabled
+// -----------------------------------------------------------------------------
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -msve-vector-bits=128 \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-SVE-ERROR %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -msve-vector-bits=256 \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-SVE-ERROR %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -msve-vector-bits=512 \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-SVE-ERROR %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -msve-vector-bits=1024 \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-SVE-ERROR %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -msve-vector-bits=2048 \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-SVE-ERROR %s
+
+// CHECK-NO-SVE-ERROR: error: '-msve-vector-bits' is not supported without SVE enabled
+
+// Error out if an unsupported value is passed to -msve-vector-bits.
+// -----------------------------------------------------------------------------
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=64 2>&1 | FileCheck --check-prefix=CHECK-BAD-VALUE-ERROR %s
+// RUN: %clang -c %s -### -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=A 2>&1 | FileCheck --check-prefix=CHECK-BAD-VALUE-ERROR %s
+
+// CHECK-BAD-VALUE-ERROR: error: unsupported argument '{{.*}}' to option 'msve-vector-bits='
+
+// Error if using attribute without -msve-vector-bits
+// -----------------------------------------------------------------------------
+// RUN: not %clang -c %s -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s
+
+typedef __SVInt32_t svint32_t;
+typedef svint32_t noflag __attribute__((arm_sve_vector_bits(256)));
+
+// CHECK-NO-FLAG-ERROR: error: 'arm_sve_vector_bits' is not supported when '-msve-vector-bits=<bits>' is not specified
+
+// Error if attribute vector size != -msve-vector-bits
+// -----------------------------------------------------------------------------
+// RUN: not %clang -c %s -target aarch64-none-linux-gnu -march=armv8-a+sve \
+// RUN:  -msve-vector-bits=128 2>&1 | FileCheck --check-prefix=CHECK-BAD-VECTOR-SIZE-ERROR %s
+
+typedef svint32_t bad_vector_size __attribute__((arm_sve_vector_bits(256)));
+
+// CHECK-BAD-VECTOR-SIZE-ERROR: error: invalid SVE vector size '256', must match value set by '-msve-vector-bits' ('128')
diff --git a/clang/test/Preprocessor/aarch64-target-features.c b/clang/test/Preprocessor/aarch64-target-features.c
index 12af9e043c75c..905a77785a9d8 100644
--- a/clang/test/Preprocessor/aarch64-target-features.c
+++ b/clang/test/Preprocessor/aarch64-target-features.c
@@ -44,6 +44,12 @@
 // CHECK-NOT: __ARM_BF16_FORMAT_ALTERNATIVE 1
 // CHECK-NOT: __ARM_FEATURE_BF16 1
 // CHECK-NOT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 0
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 128
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 256
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 512
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 1024
+// CHECK-NOT: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 2048
 
 // RUN: %clang -target aarch64_be-eabi -x c -E -dM %s -o - | FileCheck %s -check-prefix CHECK-BIGENDIAN
 // CHECK-BIGENDIAN: __ARM_BIG_ENDIAN 1
@@ -431,3 +437,17 @@
 // CHECK-BFLOAT: __ARM_FEATURE_BF16 1
 // CHECK-BFLOAT: __ARM_FEATURE_BF16_VECTOR_ARITHMETIC 1
 
+// ================== Check sve-vector-bits flag.
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=128 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-128 %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=256 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-256 %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=512 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-512 %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=1024 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-1024 %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-2048 %s
+// RUN: %clang -target aarch64-arm-none-eabi -march=armv8-a+sve -msve-vector-bits=2048 -x c -E -dM %s -o - 2>&1 | FileCheck -check-prefix=CHECK-SVE-VECTOR-BITS-2048 %s
+// NOTE: The __ARM_FEATURE_SVE_BITS feature macro is experimental until the
+// feature is complete.
+// CHECK-SVE-VECTOR-BITS-128: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 128
+// CHECK-SVE-VECTOR-BITS-256: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 256
+// CHECK-SVE-VECTOR-BITS-512: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 512
+// CHECK-SVE-VECTOR-BITS-1024: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 1024
+// CHECK-SVE-VECTOR-BITS-2048: __ARM_FEATURE_SVE_BITS_EXPERIMENTAL 2048
diff --git a/clang/test/Sema/attr-arm-sve-vector-bits.c b/clang/test/Sema/attr-arm-sve-vector-bits.c
new file mode 100644
index 0000000000000..48ca7d8fa8125
--- /dev/null
+++ b/clang/test/Sema/attr-arm-sve-vector-bits.c
@@ -0,0 +1,62 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=128 -fallow-half-arguments-and-returns %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=256 -fallow-half-arguments-and-returns %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=512 -fallow-half-arguments-and-returns %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=1024 -fallow-half-arguments-and-returns %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +bf16 -fsyntax-only -verify -msve-vector-bits=2048 -fallow-half-arguments-and-returns %s
+
+#define N __ARM_FEATURE_SVE_BITS_EXPERIMENTAL
+
+typedef __SVInt8_t svint8_t;
+typedef __SVInt16_t svint16_t;
+typedef __SVInt32_t svint32_t;
+typedef __SVInt64_t svint64_t;
+typedef __SVUint8_t svuint8_t;
+typedef __SVUint16_t svuint16_t;
+typedef __SVUint32_t svuint32_t;
+typedef __SVUint64_t svuint64_t;
+typedef __SVFloat16_t svfloat16_t;
+typedef __SVFloat32_t svfloat32_t;
+typedef __SVFloat64_t svfloat64_t;
+
+#if defined(__ARM_FEATURE_SVE_BF16)
+typedef __SVBFloat16_t svbfloat16_t;
+#endif
+
+typedef __SVBool_t svbool_t;
+
+// Define valid fixed-width SVE types
+typedef svint8_t fixed_int8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint16_t fixed_int16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint32_t fixed_int32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svint64_t fixed_int64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svuint8_t fixed_uint8_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint16_t fixed_uint16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint32_t fixed_uint32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svuint64_t fixed_uint64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svfloat16_t fixed_float16_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat32_t fixed_float32_t __attribute__((arm_sve_vector_bits(N)));
+typedef svfloat64_t fixed_float64_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbfloat16_t fixed_bfloat16_t __attribute__((arm_sve_vector_bits(N)));
+
+typedef svbool_t fixed_bool_t __attribute__((arm_sve_vector_bits(N)));
+
+// Attribute must have a single argument
+typedef svint8_t no_argument __attribute__((arm_sve_vector_bits));         // expected-error {{'arm_sve_vector_bits' attribute takes one argument}}
+typedef svint8_t two_arguments __attribute__((arm_sve_vector_bits(2, 4))); // expected-error {{'arm_sve_vector_bits' attribute takes one argument}}
+
+// The number of SVE vector bits must be an integer constant expression
+typedef svint8_t non_int_size1 __attribute__((arm_sve_vector_bits(2.0)));   // expected-error {{'arm_sve_vector_bits' attribute requires an integer constant}}
+typedef svint8_t non_int_size2 __attribute__((arm_sve_vector_bits("256"))); // expected-error {{'arm_sve_vector_bits' attribute requires an integer constant}}
+
+typedef __clang_svint8x2_t svint8x2_t;
+typedef __clang_svfloat32x3_t svfloat32x3_t;
+
+// Attribute must be attached to a single SVE vector or predicate type.
+typedef void *badtype1 __attribute__((arm_sve_vector_bits(N)));         // expected-error {{'arm_sve_vector_bits' attribute applied to non-SVE type 'void *'}}
+typedef int badtype2 __attribute__((arm_sve_vector_bits(N)));           // expected-error {{'arm_sve_vector_bits' attribute applied to non-SVE type 'int'}}
+typedef float badtype3 __attribute__((arm_sve_vector_bits(N)));         // expected-error {{'arm_sve_vector_bits' attribute applied to non-SVE type 'float'}}
+typedef svint8x2_t badtype4 __attribute__((arm_sve_vector_bits(N)));    // expected-error {{'arm_sve_vector_bits' attribute applied to non-SVE type 'svint8x2_t' (aka '__clang_svint8x2_t')}}
+typedef svfloat32x3_t badtype5 __attribute__((arm_sve_vector_bits(N))); // expected-error {{'arm_sve_vector_bits' attribute applied to non-SVE type 'svfloat32x3_t' (aka '__clang_svfloat32x3_t')}}

From df6e185e8f895686510117301e568e5043909b66 Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Fri, 17 Jul 2020 16:59:19 +0700
Subject: [PATCH 611/771] [InstCombine][Test] Test for fix of replacing select
 with Phis when branch has the same labels

An additional test that allows to check the correctness of handling the case of the same
branch labels in the dominator when trying to replace select with phi-node.

Patch By: Kirill Polushin
Differential Revision: https://reviews.llvm.org/D84006
Reviewed By: mkazantsev
---
 llvm/test/Transforms/InstCombine/select.ll | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll
index 1f16f92d83a64..93281ce1f799d 100644
--- a/llvm/test/Transforms/InstCombine/select.ll
+++ b/llvm/test/Transforms/InstCombine/select.ll
@@ -2000,6 +2000,21 @@ merge:
   ret i32 %s
 }
 
+define i32 @select_dominating_cond_same_labels(i1 %cond) {
+; CHECK-LABEL: @select_dominating_cond_same_labels(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[EXIT:%.*]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    [[RESULT:%.*]] = select i1 [[COND:%.*]], i32 123, i32 456
+; CHECK-NEXT:    ret i32 [[RESULT]]
+;
+entry:
+  %result = select i1 %cond, i32 123, i32 456
+  br i1 %cond, label %exit, label %exit
+exit:
+  ret i32 %result
+}
+
 define i32 @select_phi_same_condition(i1 %cond, i32 %x, i32 %y, i32 %z) {
 ; CHECK-LABEL: @select_phi_same_condition(
 ; CHECK-NEXT:  entry:

From 1cfb207737cc347baeb55999bd2cbd46fb5d9309 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Thu, 16 Apr 2020 09:29:28 +0100
Subject: [PATCH 612/771] [TableGen] Report an error instead of asserting

This gives a nice error if you accidentally try to use an empty list for
the RegTypes of a RegisterClass.

Differential Revision: https://reviews.llvm.org/D78285
---
 llvm/test/TableGen/RegisterClass.td      | 7 +++++++
 llvm/utils/TableGen/CodeGenRegisters.cpp | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/TableGen/RegisterClass.td

diff --git a/llvm/test/TableGen/RegisterClass.td b/llvm/test/TableGen/RegisterClass.td
new file mode 100644
index 0000000000000..d81c2df453092
--- /dev/null
+++ b/llvm/test/TableGen/RegisterClass.td
@@ -0,0 +1,7 @@
+// RUN: not llvm-tblgen -gen-register-bank -I %p/../../include %s 2>&1 | FileCheck %s
+
+include "llvm/Target/Target.td"
+
+def MyTarget : Target;
+def R0 : Register<"r0">;
+def ClassA : RegisterClass<"MyTarget", [], 32, (add R0)>; // CHECK: [[@LINE]]:1: error: RegTypes list must not be empty!
diff --git a/llvm/utils/TableGen/CodeGenRegisters.cpp b/llvm/utils/TableGen/CodeGenRegisters.cpp
index 4584bc7cfae32..eeb715dded43e 100644
--- a/llvm/utils/TableGen/CodeGenRegisters.cpp
+++ b/llvm/utils/TableGen/CodeGenRegisters.cpp
@@ -743,6 +743,8 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
       TopoSigs(RegBank.getNumTopoSigs()), EnumValue(-1) {
   GeneratePressureSet = R->getValueAsBit("GeneratePressureSet");
   std::vector<Record*> TypeList = R->getValueAsListOfDefs("RegTypes");
+  if (TypeList.empty())
+    PrintFatalError(R->getLoc(), "RegTypes list must not be empty!");
   for (unsigned i = 0, e = TypeList.size(); i != e; ++i) {
     Record *Type = TypeList[i];
     if (!Type->isSubClassOf("ValueType"))
@@ -751,7 +753,6 @@ CodeGenRegisterClass::CodeGenRegisterClass(CodeGenRegBank &RegBank, Record *R)
                           "' does not derive from the ValueType class!");
     VTs.push_back(getValueTypeByHwMode(Type, RegBank.getHwModes()));
   }
-  assert(!VTs.empty() && "RegisterClass must contain at least one ValueType!");
 
   // Allocation order 0 is the full set. AltOrders provides others.
   const SetTheory::RecVec *Elements = RegBank.getSets().expand(R);

From 760af7a0743278b6dd7782b177f4d6d086c726e0 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Wed, 8 Jul 2020 14:13:32 +0100
Subject: [PATCH 613/771] [AMDGPU] Avoid splitting FLAT offsets in unsafe ways

As explained in the comment:

// For a FLAT instruction the hardware decides whether to access
// global/scratch/shared memory based on the high bits of vaddr,
// ignoring the offset field, so we have to ensure that when we add
// remainder to vaddr it still points into the same underlying object.
// The easiest way to do that is to make sure that we split the offset
// into two pieces that are both >= 0 or both <= 0.

In particular FLAT (as opposed to SCRATCH and GLOBAL) instructions have
an unsigned immediate offset field, so we can't use it to help split a
negative offset.

Differential Revision: https://reviews.llvm.org/D83394
---
 llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp | 38 ++++++--------
 .../test/CodeGen/AMDGPU/flat-address-space.ll |  8 +--
 llvm/test/CodeGen/AMDGPU/offset-split-flat.ll | 43 ++++++++--------
 .../CodeGen/AMDGPU/offset-split-global.ll     | 51 ++++++++++---------
 .../AMDGPU/promote-constOffset-to-imm.ll      | 20 ++++----
 llvm/test/CodeGen/AMDGPU/store-hi16.ll        | 12 ++---
 6 files changed, 86 insertions(+), 86 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
index cf65daa99c438..5539f4e8699db 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelDAGToDAG.cpp
@@ -1688,33 +1688,27 @@ bool AMDGPUDAGToDAGISel::SelectFlatOffset(SDNode *N,
       } else {
         // If the offset doesn't fit, put the low bits into the offset field and
         // add the rest.
+        //
+        // For a FLAT instruction the hardware decides whether to access
+        // global/scratch/shared memory based on the high bits of vaddr,
+        // ignoring the offset field, so we have to ensure that when we add
+        // remainder to vaddr it still points into the same underlying object.
+        // The easiest way to do that is to make sure that we split the offset
+        // into two pieces that are both >= 0 or both <= 0.
 
         SDLoc DL(N);
-        uint64_t ImmField;
+        uint64_t RemainderOffset = COffsetVal;
+        uint64_t ImmField = 0;
         const unsigned NumBits = TII->getNumFlatOffsetBits(AS, IsSigned);
         if (IsSigned) {
-          ImmField = SignExtend64(COffsetVal, NumBits);
-
-          // Don't use a negative offset field if the base offset is positive.
-          // Since the scheduler currently relies on the offset field, doing so
-          // could result in strange scheduling decisions.
-
-          // TODO: Should we not do this in the opposite direction as well?
-          if (static_cast<int64_t>(COffsetVal) > 0) {
-            if (static_cast<int64_t>(ImmField) < 0) {
-              const uint64_t OffsetMask =
-                  maskTrailingOnes<uint64_t>(NumBits - 1);
-              ImmField = COffsetVal & OffsetMask;
-            }
-          }
-        } else {
-          // TODO: Should we do this for a negative offset?
-          const uint64_t OffsetMask = maskTrailingOnes<uint64_t>(NumBits);
-          ImmField = COffsetVal & OffsetMask;
+          // Use signed division by a power of two to truncate towards 0.
+          int64_t D = 1LL << (NumBits - 1);
+          RemainderOffset = (static_cast<int64_t>(COffsetVal) / D) * D;
+          ImmField = COffsetVal - RemainderOffset;
+        } else if (static_cast<int64_t>(COffsetVal) >= 0) {
+          ImmField = COffsetVal & maskTrailingOnes<uint64_t>(NumBits);
+          RemainderOffset = COffsetVal - ImmField;
         }
-
-        uint64_t RemainderOffset = COffsetVal - ImmField;
-
         assert(TII->isLegalFLATOffset(ImmField, AS, IsSigned));
         assert(RemainderOffset + ImmField == COffsetVal);
 
diff --git a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
index b2e74d2819b58..4027312c69699 100644
--- a/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
+++ b/llvm/test/CodeGen/AMDGPU/flat-address-space.ll
@@ -191,9 +191,9 @@ define amdgpu_kernel void @store_flat_i8_max_offset_p1(i8* %fptr, i8 %x) #0 {
 ; CHECK-LABEL: {{^}}store_flat_i8_neg_offset:
 ; CIVI: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
 
-; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v
+; GFX9: v_add_co_u32_e64 v{{[0-9]+}}, vcc, -2, s
 ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1,
-; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset:4094{{$}}
+; GFX9: flat_store_byte v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}{{$}}
 define amdgpu_kernel void @store_flat_i8_neg_offset(i8* %fptr, i8 %x) #0 {
   %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
   store volatile i8 %x, i8* %fptr.offset
@@ -220,9 +220,9 @@ define amdgpu_kernel void @load_flat_i8_max_offset_p1(i8* %fptr) #0 {
 ; CHECK-LABEL: {{^}}load_flat_i8_neg_offset:
 ; CIVI: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
 
-; GFX9: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v
+; GFX9: v_add_co_u32_e64 v{{[0-9]+}}, vcc, -2, s
 ; GFX9: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1,
-; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}} offset:4094{{$}}
+; GFX9: flat_load_ubyte v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}{{$}}
 define amdgpu_kernel void @load_flat_i8_neg_offset(i8* %fptr) #0 {
   %fptr.offset = getelementptr inbounds i8, i8* %fptr, i64 -2
   %val = load volatile i8, i8* %fptr.offset
diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
index f7538c081e6d4..1b36d3003eb74 100644
--- a/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
+++ b/llvm/test/CodeGen/AMDGPU/offset-split-flat.ll
@@ -103,9 +103,9 @@ define i8 @flat_inst_valu_offset_neg_11bit_max(i8* %p) {
 ; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2048
+; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -483,10 +483,10 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(i8* %p) {
 ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
 ; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
-; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047
+; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -510,10 +510,10 @@ define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(i8* %p) {
 ; GFX9-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
 ; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
-; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2048
+; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -537,10 +537,10 @@ define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(i8* %p) {
 ; GFX9-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
 ; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
-; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
+; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -591,10 +591,10 @@ define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(i8* %p) {
 ; GFX9-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
 ; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
-; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
+; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -773,9 +773,9 @@ define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(i8* %p) {
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v1, s1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
-; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2048
+; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    flat_store_byte v[0:1], v0
 ; GFX9-NEXT:    s_endpgm
@@ -1269,10 +1269,11 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(i8*
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s1
-; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x7ff, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2047
+; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    flat_store_byte v[0:1], v0
 ; GFX9-NEXT:    s_endpgm
@@ -1303,10 +1304,11 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(i8*
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s1
-; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x800, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:2048
+; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    flat_store_byte v[0:1], v0
 ; GFX9-NEXT:    s_endpgm
@@ -1337,10 +1339,11 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(i8*
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s1
-; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0xfff, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
+; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    flat_store_byte v[0:1], v0
 ; GFX9-NEXT:    s_endpgm
@@ -1408,9 +1411,9 @@ define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(i8*
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1fff, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-NEXT:    flat_load_ubyte v0, v[0:1] offset:4095
+; GFX9-NEXT:    flat_load_ubyte v0, v[0:1]
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    flat_store_byte v[0:1], v0
 ; GFX9-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
index add4e687926b1..731a95b2a3e9b 100644
--- a/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
+++ b/llvm/test/CodeGen/AMDGPU/offset-split-global.ll
@@ -471,10 +471,10 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)*
 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
 ; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
-; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2049
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -482,10 +482,10 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)*
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0, v0
+; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
+; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
@@ -498,10 +498,10 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)*
 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
 ; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
-; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2048
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -509,10 +509,10 @@ define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)*
 ; GFX10:       ; %bb.0:
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
-; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
+; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
+; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
@@ -525,10 +525,10 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)*
 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
 ; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
-; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -552,10 +552,10 @@ define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)*
 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
 ; GFX9:       ; %bb.0:
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
 ; GFX9-NEXT:    v_bfrev_b32_e32 v2, 1
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
-; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1213,10 +1213,11 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s1
-; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2049
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_store_byte v[0:1], v0, off
 ; GFX9-NEXT:    s_endpgm
@@ -1227,9 +1228,9 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
-; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0, s0
+; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, s0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:2047
+; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
 ; GFX10-NEXT:    s_endpgm
@@ -1246,10 +1247,11 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s1
-; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:2048
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_store_byte v[0:1], v0, off
 ; GFX9-NEXT:    s_endpgm
@@ -1260,9 +1262,9 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i
 ; GFX10-NEXT:    ; implicit-def: $vcc_hi
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX10-NEXT:    v_mov_b32_e32 v1, s1
-; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
+; GFX10-NEXT:    v_add_co_u32_e64 v0, vcc_lo, 0x800, s0
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
-; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off offset:-2048
+; GFX10-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    global_store_byte v[0:1], v0, off
 ; GFX10-NEXT:    s_endpgm
@@ -1279,10 +1281,11 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i
 ; GFX9-NEXT:    s_load_dwordx2 s[0:1], s[0:1], 0x24
 ; GFX9-NEXT:    v_bfrev_b32_e32 v1, 1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s1
-; GFX9-NEXT:    v_add_co_u32_e64 v0, vcc, 0, s0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:4095
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-1
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_store_byte v[0:1], v0, off
 ; GFX9-NEXT:    s_endpgm
@@ -1314,9 +1317,9 @@ define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX9-NEXT:    v_mov_b32_e32 v0, s0
 ; GFX9-NEXT:    v_mov_b32_e32 v2, s1
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x2000, v0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, 0x1000, v0
 ; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
-; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off offset:-4096
+; GFX9-NEXT:    global_load_ubyte v0, v[0:1], off
 ; GFX9-NEXT:    s_waitcnt vmcnt(0)
 ; GFX9-NEXT:    global_store_byte v[0:1], v0, off
 ; GFX9-NEXT:    s_endpgm
diff --git a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
index 278c1dff29e94..a9a60b93ef54d 100644
--- a/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
+++ b/llvm/test/CodeGen/AMDGPU/promote-constOffset-to-imm.ll
@@ -87,18 +87,17 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)*  %buffer) {
 ; GFX8:    flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 ;
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
 ; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-4096
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ;
-; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
@@ -107,6 +106,7 @@ define hidden amdgpu_kernel void @clmem_read(i8 addrspace(1)*  %buffer) {
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
 ; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 entry:
@@ -519,11 +519,11 @@ define hidden amdgpu_kernel void @negativeoffset(i8 addrspace(1)* nocapture %buf
 ; GFX8:    flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 ; GFX8:    flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]
 ;
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:2048
-; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
+; GFX9:    global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 ;
-; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off offset:-2048
-; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
+; GFX10:   global_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], off{{$}}
 entry:
   %call = tail call i64 @_Z13get_global_idj(i32 0) #2
   %conv = and i64 %call, 255
diff --git a/llvm/test/CodeGen/AMDGPU/store-hi16.ll b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
index 51ed07aa2964b..b7d4829b0426c 100644
--- a/llvm/test/CodeGen/AMDGPU/store-hi16.ll
+++ b/llvm/test/CodeGen/AMDGPU/store-hi16.ll
@@ -314,13 +314,13 @@ entry:
 ; GFX803: v_add{{(_co)?}}_{{i|u}}32_e32
 ; GFX803: v_addc_u32_e32
 
-; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v
+; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff802, v
 ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v
 
 ; GFX906-DAG: v_lshrrev_b32_e32
-; GFX906: flat_store_short v[0:1], v2 offset:2050{{$}}
+; GFX906: flat_store_short v[0:1], v2{{$}}
 
-; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2 offset:2050{{$}}
+; GFX900-NEXT: flat_store_short_d16_hi v[0:1], v2{{$}}
 ; GFX803: flat_store_short v[0:1], v2{{$}}
 ; GCN-NEXT: s_waitcnt
 ; GCN-NEXT: s_setpc_b64
@@ -363,13 +363,13 @@ entry:
 ; GFX803-DAG: v_add_u32_e32
 ; GFX803-DAG: v_addc_u32_e32
 
-; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff000, v
+; GFX9-DAG: v_add_co_u32_e32 v{{[0-9]+}}, vcc, 0xfffff001, v
 ; GFX9-DAG: v_addc_co_u32_e32 v{{[0-9]+}}, vcc, -1, v{{[0-9]+}}, vcc
 
-; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2 offset:1{{$}}
+; GFX900-NEXT: flat_store_byte_d16_hi v[0:1], v2{{$}}
 
 ; GFX906-DAG: v_lshrrev_b32_e32 v2, 16, v2
-; GFX906: flat_store_byte v[0:1], v2 offset:1{{$}}
+; GFX906: flat_store_byte v[0:1], v2{{$}}
 
 ; GFX803-DAG: v_lshrrev_b32_e32 v2, 16, v2
 ; GFX803: flat_store_byte v[0:1], v2{{$}}

From 6c348e4067b0826449caef2f77af2fb67c27040a Mon Sep 17 00:00:00 2001
From: Sam Tebbs <samuel.tebbs@arm.com>
Date: Thu, 16 Jul 2020 15:55:50 +0100
Subject: [PATCH 614/771] [HWLoops] Stop converting to a while loop when it
 would be unsafe to

There were cases where a do-while loop would be converted to a while
loop before finding out that it would be unsafe to expand the SCEV in
this situation and then bailing out of hardware loop conversion.

This patch checks if it would be unsafe to expand the SCEV and if so stops converting the do-while into a while, allowing conversion to a hardware loop.

Differential Revision: https://reviews.llvm.org/D83953
---
 llvm/lib/CodeGen/HardwareLoops.cpp            | 11 ++-
 .../Thumb2/LowOverheadLoops/exitcount.ll      | 89 +++++++++++++++++++
 2 files changed, 98 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll

diff --git a/llvm/lib/CodeGen/HardwareLoops.cpp b/llvm/lib/CodeGen/HardwareLoops.cpp
index 0ba7e920e5075..ffffc7c243d83 100644
--- a/llvm/lib/CodeGen/HardwareLoops.cpp
+++ b/llvm/lib/CodeGen/HardwareLoops.cpp
@@ -402,8 +402,15 @@ Value *HardwareLoop::InitLoopCount() {
 
   BasicBlock *BB = L->getLoopPreheader();
   if (UseLoopGuard && BB->getSinglePredecessor() &&
-      cast<BranchInst>(BB->getTerminator())->isUnconditional())
-    BB = BB->getSinglePredecessor();
+      cast<BranchInst>(BB->getTerminator())->isUnconditional()) {
+    BasicBlock *Predecessor = BB->getSinglePredecessor();
+    // If it's not safe to create a while loop then don't force it and create a
+    // do-while loop instead
+    if (!isSafeToExpandAt(ExitCount, Predecessor->getTerminator(), SE))
+        UseLoopGuard = false;
+    else
+        BB = Predecessor;
+  }
 
   if (!isSafeToExpandAt(ExitCount, BB->getTerminator(), SE)) {
     LLVM_DEBUG(dbgs() << "- Bailing, unsafe to expand ExitCount "
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll
new file mode 100644
index 0000000000000..162ccf55d068c
--- /dev/null
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/exitcount.ll
@@ -0,0 +1,89 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -verify-machineinstrs -tail-predication=enabled -o - %s | FileCheck %s
+%struct.SpeexPreprocessState_ = type { i32, i32, half*, half* }
+
+define void @foo(%struct.SpeexPreprocessState_* nocapture readonly %st, i16* %x) {
+; CHECK-LABEL: foo:
+; CHECK:       @ %bb.0: @ %entry
+; CHECK:    dlstp.16 lr, r4
+; CHECK-NEXT:  .LBB0_1: @ %do.body
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vldrh.u16 q0, [r2], #16
+; CHECK-NEXT:    vstrh.16 q0, [r3], #16
+; CHECK-NEXT:    letp lr, .LBB0_1
+; CHECK:    dlstp.16 lr, r3
+; CHECK-NEXT:  .LBB0_3: @ %do.body6
+; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    vldrh.u16 q1, [r1], #16
+; CHECK-NEXT:    vcvt.f16.s16 q1, q1
+; CHECK-NEXT:    vmul.f16 q1, q1, q0
+; CHECK-NEXT:    vstrh.16 q1, [r0], #16
+; CHECK-NEXT:    letp lr, .LBB0_3
+; CHECK-NEXT:  @ %bb.4: @ %do.end13
+; CHECK-NEXT:    pop {r4, pc}
+entry:
+  %ps_size = getelementptr inbounds %struct.SpeexPreprocessState_, %struct.SpeexPreprocessState_* %st, i32 0, i32 1
+  %0 = load i32, i32* %ps_size, align 4
+  %mul = shl nsw i32 %0, 1
+  %frame_size = getelementptr inbounds %struct.SpeexPreprocessState_, %struct.SpeexPreprocessState_* %st, i32 0, i32 0
+  %1 = load i32, i32* %frame_size, align 4
+  %sub = sub nsw i32 %mul, %1
+  %inbuf = getelementptr inbounds %struct.SpeexPreprocessState_, %struct.SpeexPreprocessState_* %st, i32 0, i32 3
+  %2 = load half*, half** %inbuf, align 4
+  %frame = getelementptr inbounds %struct.SpeexPreprocessState_, %struct.SpeexPreprocessState_* %st, i32 0, i32 2
+  %3 = load half*, half** %frame, align 4
+  br label %do.body
+
+do.body:                                          ; preds = %do.body, %entry
+  %pinbuff16.0 = phi half* [ %2, %entry ], [ %add.ptr, %do.body ]
+  %blkCnt.0 = phi i32 [ %sub, %entry ], [ %sub2, %do.body ]
+  %pframef16.0 = phi half* [ %3, %entry ], [ %add.ptr1, %do.body ]
+  %4 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %blkCnt.0)
+  %5 = bitcast half* %pinbuff16.0 to <8 x half>*
+  %6 = tail call fast <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>* %5, i32 2, <8 x i1> %4, <8 x half> zeroinitializer)
+  %7 = bitcast half* %pframef16.0 to <8 x half>*
+  tail call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %6, <8 x half>* %7, i32 2, <8 x i1> %4)
+  %add.ptr = getelementptr inbounds half, half* %pinbuff16.0, i32 8
+  %add.ptr1 = getelementptr inbounds half, half* %pframef16.0, i32 8
+  %sub2 = add nsw i32 %blkCnt.0, -8
+  %cmp = icmp sgt i32 %blkCnt.0, 8
+  br i1 %cmp, label %do.body, label %do.end
+
+do.end:                                           ; preds = %do.body
+  %8 = load half*, half** %frame, align 4
+  %add.ptr4 = getelementptr inbounds half, half* %8, i32 %sub
+  %9 = load i32, i32* %frame_size, align 4
+  br label %do.body6
+
+do.body6:                                         ; preds = %do.body6, %do.end
+  %px.0 = phi i16* [ %x, %do.end ], [ %add.ptr8, %do.body6 ]
+  %blkCnt.1 = phi i32 [ %9, %do.end ], [ %sub10, %do.body6 ]
+  %pframef16.1 = phi half* [ %add.ptr4, %do.end ], [ %add.ptr9, %do.body6 ]
+  %10 = tail call <8 x i1> @llvm.arm.mve.vctp16(i32 %blkCnt.1)
+  %11 = bitcast i16* %px.0 to <8 x i16>*
+  %12 = tail call <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>* %11, i32 2, <8 x i1> %10, <8 x i16> zeroinitializer)
+  %13 = tail call fast <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16> %12, i32 0, <8 x i1> %10, <8 x half> undef)
+  %14 = tail call fast <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half> %13, <8 x half> <half 0xH1800, half 0xH1800, half 0xH1800, half 0xH1800, half 0xH1800, half 0xH1800, half 0xH1800, half 0xH1800>, <8 x i1> %10, <8 x half> undef)
+  %15 = bitcast half* %pframef16.1 to <8 x half>*
+  tail call void @llvm.masked.store.v8f16.p0v8f16(<8 x half> %14, <8 x half>* %15, i32 2, <8 x i1> %10)
+  %add.ptr8 = getelementptr inbounds i16, i16* %px.0, i32 8
+  %add.ptr9 = getelementptr inbounds half, half* %pframef16.1, i32 8
+  %sub10 = add nsw i32 %blkCnt.1, -8
+  %cmp12 = icmp sgt i32 %blkCnt.1, 8
+  br i1 %cmp12, label %do.body6, label %do.end13
+
+do.end13:                                         ; preds = %do.body6
+  ret void
+}
+
+declare <8 x i1> @llvm.arm.mve.vctp16(i32)
+
+declare <8 x half> @llvm.masked.load.v8f16.p0v8f16(<8 x half>*, i32 immarg, <8 x i1>, <8 x half>)
+
+declare void @llvm.masked.store.v8f16.p0v8f16(<8 x half>, <8 x half>*, i32 immarg, <8 x i1>)
+
+declare <8 x i16> @llvm.masked.load.v8i16.p0v8i16(<8 x i16>*, i32 immarg, <8 x i1>, <8 x i16>)
+
+declare <8 x half> @llvm.arm.mve.vcvt.fp.int.predicated.v8f16.v8i16.v8i1(<8 x i16>, i32, <8 x i1>, <8 x half>)
+
+declare <8 x half> @llvm.arm.mve.mul.predicated.v8f16.v8i1(<8 x half>, <8 x half>, <8 x i1>, <8 x half>)

From ede7c02b38c0c3adf7fb9ee308bd0f6d92a0eb4e Mon Sep 17 00:00:00 2001
From: Pavel Labath <pavel@labath.sk>
Date: Wed, 15 Jul 2020 17:13:20 +0200
Subject: [PATCH 615/771] [lldb/COFF] Remove strtab zeroing hack

Summary:
This code (recently responsible for a unaligned access sanitizer
failure) claims that the string table offset zero should result in an
empty string.

I cannot find any mention of this detail in the Microsoft COFF
documentation, and the llvm COFF parser also does not handle offset zero
specially. This code was introduced in 0076e7159, which also does not go
into specifics, citing "various bugfixes".

Given that this is obviously a hack, and does not cause tests to fail, I
think we should just delete it.

Reviewers: amccarth, markmentovai

Subscribers: lldb-commits

Tags: #lldb

Differential Revision: https://reviews.llvm.org/D83881
---
 .../Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp   | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
index d606b49130c47..5feec8167186d 100644
--- a/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
+++ b/lldb/source/Plugins/ObjectFile/PECOFF/ObjectFilePECOFF.cpp
@@ -543,12 +543,6 @@ DataExtractor ObjectFilePECOFF::ReadImageData(uint32_t offset, size_t size) {
   if (m_data.ValidOffsetForDataOfSize(offset, size))
     return DataExtractor(m_data, offset, size);
 
-  if (m_file) {
-    // A bit of a hack, but we intend to write to this buffer, so we can't
-    // mmap it.
-    auto buffer_sp = MapFileData(m_file, size, offset);
-    return DataExtractor(buffer_sp, GetByteOrder(), GetAddressByteSize());
-  }
   ProcessSP process_sp(m_process_wp.lock());
   DataExtractor data;
   if (process_sp) {
@@ -652,12 +646,6 @@ Symtab *ObjectFilePECOFF::GetSymtab() {
           DataExtractor strtab_data = ReadImageData(
               m_coff_header.symoff + symbol_data_size, strtab_size);
 
-          // First 4 bytes should be zeroed after strtab_size has been read,
-          // because it is used as offset 0 to encode a NULL string.
-          uint32_t *strtab_data_start = const_cast<uint32_t *>(
-              reinterpret_cast<const uint32_t *>(strtab_data.GetDataStart()));
-          ::memset(&strtab_data_start[0], 0, sizeof(uint32_t));
-
           offset = 0;
           std::string symbol_name;
           Symbol *symbols = m_symtab_up->Resize(num_syms);

From 033ef8420cec57187fffac1f06322f73aa945c4c Mon Sep 17 00:00:00 2001
From: Hans Wennborg <hans@chromium.org>
Date: Fri, 17 Jul 2020 13:22:17 +0200
Subject: [PATCH 616/771] Add -flang flag to the test-release.sh script

The flag is off by default.
---
 llvm/utils/release/test-release.sh | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/llvm/utils/release/test-release.sh b/llvm/utils/release/test-release.sh
index 5a8e5c244ecfd..5351ae1064f9e 100755
--- a/llvm/utils/release/test-release.sh
+++ b/llvm/utils/release/test-release.sh
@@ -41,6 +41,7 @@ do_lld="yes"
 do_lldb="no"
 do_polly="yes"
 do_mlir="yes"
+do_flang="no"
 BuildDir="`pwd`"
 ExtraConfigureFlags=""
 ExportBranch=""
@@ -172,6 +173,9 @@ while [ $# -gt 0 ]; do
         -no-mlir )
             do_mlir="no"
             ;;
+        -flang )
+            do_flang="yes"
+            ;;
         -help | --help | -h | --h | -\? )
             usage
             exit 0
@@ -261,6 +265,9 @@ fi
 if [ $do_mlir = "yes" ]; then
   projects="$projects mlir"
 fi
+if [ $do_flang = "yes" ]; then
+  projects="$projects flang"
+fi
 
 # Go to the build directory (may be different from CWD)
 BuildDir=$BuildDir/$RC

From 9a0689e072da1a60c8b858e829d57d5b1136bd30 Mon Sep 17 00:00:00 2001
From: Benjamin Kramer <benny.kra@googlemail.com>
Date: Fri, 17 Jul 2020 13:49:11 +0200
Subject: [PATCH 617/771] Make helpers static. NFC.

---
 clang/lib/ASTMatchers/Dynamic/Marshallers.cpp     | 6 ++++--
 clang/lib/Driver/ToolChains/Arch/RISCV.cpp        | 8 ++++----
 clang/lib/Sema/SemaType.cpp                       | 4 ++--
 lld/COFF/DebugTypes.cpp                           | 3 ++-
 lld/MachO/Driver.cpp                              | 2 +-
 llvm/lib/Analysis/InlineAdvisor.cpp               | 5 ++---
 llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp   | 2 ++
 llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 2 ++
 8 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp b/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp
index 989ee0fa75cdd..33fb7a92955bb 100644
--- a/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp
+++ b/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp
@@ -120,7 +120,8 @@ static constexpr std::pair<llvm::StringRef, llvm::Regex::RegexFlags>
         {"BasicRegex", llvm::Regex::RegexFlags::BasicRegex},
 };
 
-llvm::Optional<llvm::Regex::RegexFlags> getRegexFlag(llvm::StringRef Flag) {
+static llvm::Optional<llvm::Regex::RegexFlags>
+getRegexFlag(llvm::StringRef Flag) {
   for (const auto &StringFlag : RegexMap) {
     if (Flag == StringFlag.first)
       return StringFlag.second;
@@ -128,7 +129,8 @@ llvm::Optional<llvm::Regex::RegexFlags> getRegexFlag(llvm::StringRef Flag) {
   return llvm::None;
 }
 
-llvm::Optional<llvm::StringRef> getCloseRegexMatch(llvm::StringRef Flag) {
+static llvm::Optional<llvm::StringRef>
+getCloseRegexMatch(llvm::StringRef Flag) {
   for (const auto &StringFlag : RegexMap) {
     if (Flag.edit_distance(StringFlag.first) < 3)
       return StringFlag.first;
diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
index be3f0a07b5763..09ae4538b3acc 100644
--- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
+++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp
@@ -447,10 +447,10 @@ static bool getArchFeatures(const Driver &D, StringRef MArch,
 }
 
 // Get features except standard extension feature
-void getRISCFeaturesFromMcpu(const Driver &D, const llvm::Triple &Triple,
-                             const llvm::opt::ArgList &Args,
-                             const llvm::opt::Arg *A, StringRef Mcpu,
-                             std::vector<StringRef> &Features) {
+static void getRISCFeaturesFromMcpu(const Driver &D, const llvm::Triple &Triple,
+                                    const llvm::opt::ArgList &Args,
+                                    const llvm::opt::Arg *A, StringRef Mcpu,
+                                    std::vector<StringRef> &Features) {
   bool Is64Bit = (Triple.getArch() == llvm::Triple::riscv64);
   llvm::RISCV::CPUKind CPUKind = llvm::RISCV::parseCPUKind(Mcpu);
   if (!llvm::RISCV::checkCPUKind(CPUKind, Is64Bit) ||
diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp
index 629fdff5ccf97..ee7bf98e9ca62 100644
--- a/clang/lib/Sema/SemaType.cpp
+++ b/clang/lib/Sema/SemaType.cpp
@@ -7686,8 +7686,8 @@ static bool isPermittedNeonBaseType(QualType &Ty,
          BTy->getKind() == BuiltinType::BFloat16;
 }
 
-bool verifyValidIntegerConstantExpr(Sema &S, const ParsedAttr &Attr,
-                                    llvm::APSInt &Result) {
+static bool verifyValidIntegerConstantExpr(Sema &S, const ParsedAttr &Attr,
+                                           llvm::APSInt &Result) {
   const auto *AttrExpr = Attr.getArgAsExpr(0);
   if (AttrExpr->isTypeDependent() || AttrExpr->isValueDependent() ||
       !AttrExpr->isIntegerConstantExpr(Result, S.Context)) {
diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp
index 4790b0166799c..b8c488f26908a 100644
--- a/lld/COFF/DebugTypes.cpp
+++ b/lld/COFF/DebugTypes.cpp
@@ -377,7 +377,8 @@ static PrecompSource *findObjByName(StringRef fileNameOnly) {
   return nullptr;
 }
 
-Expected<const CVIndexMap *> findPrecompMap(ObjFile *file, PrecompRecord &pr) {
+static Expected<const CVIndexMap *> findPrecompMap(ObjFile *file,
+                                                   PrecompRecord &pr) {
   // Cross-compile warning: given that Clang doesn't generate LF_PRECOMP
   // records, we assume the OBJ comes from a Windows build of cl.exe. Thusly,
   // the paths embedded in the OBJs are in the Windows format.
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp
index 2a3b0042162eb..4dfb387e4e622 100644
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -204,7 +204,7 @@ static bool isArchString(StringRef s) {
 // entry (the one nearest to the front of the list.)
 //
 // The file can also have line comments that start with '#'.
-void parseOrderFile(StringRef path) {
+static void parseOrderFile(StringRef path) {
   Optional<MemoryBufferRef> buffer = readFile(path);
   if (!buffer) {
     error("Could not read order file at " + path);
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index 74a536d1ce2f4..e18f681278d3a 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -84,9 +84,8 @@ class DefaultInlineAdvice : public InlineAdvice {
 
 } // namespace
 
-llvm::Optional<llvm::InlineCost>
-getDefaultInlineAdvice(CallBase &CB, FunctionAnalysisManager &FAM,
-                       const InlineParams &Params) {
+llvm::Optional<llvm::InlineCost> static getDefaultInlineAdvice(
+    CallBase &CB, FunctionAnalysisManager &FAM, const InlineParams &Params) {
   Function &Caller = *CB.getCaller();
   ProfileSummaryInfo *PSI =
       FAM.getResult<ModuleAnalysisManagerFunctionProxy>(Caller)
diff --git a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
index 48bca4502920b..8b078690dea24 100644
--- a/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/ELF_x86_64.cpp
@@ -23,6 +23,7 @@ using namespace llvm;
 using namespace llvm::jitlink;
 using namespace llvm::jitlink::ELF_x86_64_Edges;
 
+namespace {
 class ELF_x86_64_GOTAndStubsBuilder
     : public BasicGOTAndStubsBuilder<ELF_x86_64_GOTAndStubsBuilder> {
 public:
@@ -110,6 +111,7 @@ class ELF_x86_64_GOTAndStubsBuilder
   Section *GOTSection = nullptr;
   Section *StubsSection = nullptr;
 };
+} // namespace
 
 const uint8_t ELF_x86_64_GOTAndStubsBuilder::NullGOTEntryContent[8] = {
     0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
diff --git a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
index 3cb4df12e9b08..aaf2840f8ff6f 100644
--- a/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopIdiomRecognize.cpp
@@ -1010,6 +1010,7 @@ bool LoopIdiomRecognize::processLoopStridedStore(
   return true;
 }
 
+namespace {
 class ExpandedValuesCleaner {
   SCEVExpander &Expander;
   TargetLibraryInfo *TLI;
@@ -1032,6 +1033,7 @@ class ExpandedValuesCleaner {
     }
   }
 };
+} // namespace
 
 /// If the stored value is a strided load in the same loop with the same stride
 /// this may be transformable into a memcpy.  This kicks in for stuff like

From 4fc752b30b9acac73a282cb844a6240e6cb70cca Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Wed, 15 Jul 2020 13:25:32 -0400
Subject: [PATCH 618/771] [CUDA][HIP] Always defer diagnostics for wrong-sided
 reference

When a device function calls a host function or vice versa, this is wrong-sided
reference. Currently clang immediately diagnose it. This is different from nvcc
behavior, where it is diagnosed only if the function is really emitted.

Current clang behavior causes false alarms for valid use cases.

This patch let clang always defer diagnostics for wrong-sided
reference.

Differential Revision: https://reviews.llvm.org/D83893
---
 clang/lib/Sema/SemaCUDA.cpp                   |  3 +-
 clang/test/SemaCUDA/builtins.cu               | 12 +++----
 .../test/SemaCUDA/call-kernel-from-kernel.cu  |  4 +--
 clang/test/SemaCUDA/function-overload.cu      | 34 +++++++++----------
 clang/test/SemaCUDA/function-target.cu        |  6 ++--
 clang/test/SemaCUDA/implicit-device-lambda.cu |  8 +++--
 clang/test/SemaCUDA/method-target.cu          | 19 ++++++-----
 clang/test/SemaCUDA/reference-to-kernel-fn.cu | 14 ++++----
 8 files changed, 51 insertions(+), 49 deletions(-)

diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 283a04683a32a..e2190fc42de44 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -715,9 +715,8 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
                                       CallerKnownEmitted] {
     switch (IdentifyCUDAPreference(Caller, Callee)) {
     case CFP_Never:
-      return DeviceDiagBuilder::K_Immediate;
     case CFP_WrongSide:
-      assert(Caller && "WrongSide calls require a non-null caller");
+      assert(Caller && "Never/wrongSide calls require a non-null caller");
       // If we know the caller will be emitted, we know this wrong-side call
       // will be emitted, so it's an immediate error.  Otherwise, defer the
       // error until we know the caller is emitted.
diff --git a/clang/test/SemaCUDA/builtins.cu b/clang/test/SemaCUDA/builtins.cu
index 814fda2ac7d34..c01a687e12c09 100644
--- a/clang/test/SemaCUDA/builtins.cu
+++ b/clang/test/SemaCUDA/builtins.cu
@@ -7,10 +7,10 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
 // RUN:     -aux-triple nvptx64-unknown-cuda \
-// RUN:     -fsyntax-only -verify %s
+// RUN:     -fsyntax-only -verify=host %s
 // RUN: %clang_cc1 -triple nvptx64-unknown-cuda -fcuda-is-device \
 // RUN:     -aux-triple x86_64-unknown-unknown \
-// RUN:     -fsyntax-only -verify %s
+// RUN:     -fsyntax-only -verify=dev %s
 
 #if !(defined(__amd64__) && defined(__PTX__))
 #error "Expected to see preprocessor macros from both sides of compilation."
@@ -18,14 +18,14 @@
 
 void hf() {
   int x = __builtin_ia32_rdtsc();
-  int y = __nvvm_read_ptx_sreg_tid_x(); // expected-note  {{'__nvvm_read_ptx_sreg_tid_x' declared here}}
-  // expected-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}}
+  int y = __nvvm_read_ptx_sreg_tid_x(); // host-note  {{'__nvvm_read_ptx_sreg_tid_x' declared here}}
+  // host-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}}
   x = __builtin_abs(1);
 }
 
 __attribute__((device)) void df() {
   int x = __nvvm_read_ptx_sreg_tid_x();
-  int y = __builtin_ia32_rdtsc(); // expected-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
-                                  // expected-note@20 {{'__builtin_ia32_rdtsc' declared here}}
+  int y = __builtin_ia32_rdtsc(); // dev-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
+                                  // dev-note@20 {{'__builtin_ia32_rdtsc' declared here}}
   x = __builtin_abs(1);
 }
diff --git a/clang/test/SemaCUDA/call-kernel-from-kernel.cu b/clang/test/SemaCUDA/call-kernel-from-kernel.cu
index c89037c52bff4..900efcef43b80 100644
--- a/clang/test/SemaCUDA/call-kernel-from-kernel.cu
+++ b/clang/test/SemaCUDA/call-kernel-from-kernel.cu
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
-// RUN:   -verify -fsyntax-only -verify-ignore-unexpected=note
+// RUN: %clang_cc1 %s --std=c++11 -triple nvptx -emit-llvm -o - \
+// RUN:   -verify -fcuda-is-device -fsyntax-only -verify-ignore-unexpected=note
 
 #include "Inputs/cuda.h"
 
diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu
index b9efd1c09e699..191268c9a5f14 100644
--- a/clang/test/SemaCUDA/function-overload.cu
+++ b/clang/test/SemaCUDA/function-overload.cu
@@ -1,8 +1,8 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=host,expected %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=dev,expected %s
 
 #include "Inputs/cuda.h"
 
@@ -75,37 +75,37 @@ extern "C" __host__ __device__ int chhd2() { return 0; }
 
 // Helper functions to verify calling restrictions.
 __device__ DeviceReturnTy d() { return DeviceReturnTy(); }
-// expected-note@-1 1+ {{'d' declared here}}
+// host-note@-1 1+ {{'d' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
 
 __host__ HostReturnTy h() { return HostReturnTy(); }
-// expected-note@-1 1+ {{'h' declared here}}
+// dev-note@-1 1+ {{'h' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}}
 
 __global__ void g() {}
-// expected-note@-1 1+ {{'g' declared here}}
+// dev-note@-1 1+ {{'g' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __global__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __global__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __global__ function from __global__ function}}
 
 extern "C" __device__ DeviceReturnTy cd() { return DeviceReturnTy(); }
-// expected-note@-1 1+ {{'cd' declared here}}
+// host-note@-1 1+ {{'cd' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
 
 extern "C" __host__ HostReturnTy ch() { return HostReturnTy(); }
-// expected-note@-1 1+ {{'ch' declared here}}
+// dev-note@-1 1+ {{'ch' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}}
 
 __host__ void hostf() {
-  DeviceFnPtr fp_d = d;         // expected-error {{reference to __device__ function 'd' in __host__ function}}
+  DeviceFnPtr fp_d = d;         // host-error {{reference to __device__ function 'd' in __host__ function}}
   DeviceReturnTy ret_d = d();   // expected-error {{no matching function for call to 'd'}}
-  DeviceFnPtr fp_cd = cd;       // expected-error {{reference to __device__ function 'cd' in __host__ function}}
+  DeviceFnPtr fp_cd = cd;       // host-error {{reference to __device__ function 'cd' in __host__ function}}
   DeviceReturnTy ret_cd = cd(); // expected-error {{no matching function for call to 'cd'}}
 
   HostFnPtr fp_h = h;
@@ -129,9 +129,9 @@ __device__ void devicef() {
   DeviceFnPtr fp_cd = cd;
   DeviceReturnTy ret_cd = cd();
 
-  HostFnPtr fp_h = h;         // expected-error {{reference to __host__ function 'h' in __device__ function}}
+  HostFnPtr fp_h = h;         // dev-error {{reference to __host__ function 'h' in __device__ function}}
   HostReturnTy ret_h = h();   // expected-error {{no matching function for call to 'h'}}
-  HostFnPtr fp_ch = ch;       // expected-error {{reference to __host__ function 'ch' in __device__ function}}
+  HostFnPtr fp_ch = ch;       // dev-error {{reference to __host__ function 'ch' in __device__ function}}
   HostReturnTy ret_ch = ch(); // expected-error {{no matching function for call to 'ch'}}
 
   DeviceFnPtr fp_dh = dh;
@@ -139,9 +139,9 @@ __device__ void devicef() {
   DeviceFnPtr fp_cdh = cdh;
   DeviceReturnTy ret_cdh = cdh();
 
-  GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __device__ function}}
+  GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __device__ function}}
   g(); // expected-error {{no matching function for call to 'g'}}
-  g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __device__ function}}
+  g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __device__ function}}
 }
 
 __global__ void globalf() {
@@ -150,9 +150,9 @@ __global__ void globalf() {
   DeviceFnPtr fp_cd = cd;
   DeviceReturnTy ret_cd = cd();
 
-  HostFnPtr fp_h = h;         // expected-error {{reference to __host__ function 'h' in __global__ function}}
+  HostFnPtr fp_h = h;         // dev-error {{reference to __host__ function 'h' in __global__ function}}
   HostReturnTy ret_h = h();   // expected-error {{no matching function for call to 'h'}}
-  HostFnPtr fp_ch = ch;       // expected-error {{reference to __host__ function 'ch' in __global__ function}}
+  HostFnPtr fp_ch = ch;       // dev-error {{reference to __host__ function 'ch' in __global__ function}}
   HostReturnTy ret_ch = ch(); // expected-error {{no matching function for call to 'ch'}}
 
   DeviceFnPtr fp_dh = dh;
@@ -160,9 +160,9 @@ __global__ void globalf() {
   DeviceFnPtr fp_cdh = cdh;
   DeviceReturnTy ret_cdh = cdh();
 
-  GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __global__ function}}
+  GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __global__ function}}
   g(); // expected-error {{no matching function for call to 'g'}}
-  g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __global__ function}}
+  g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __global__ function}}
 }
 
 __host__ __device__ void hostdevicef() {
diff --git a/clang/test/SemaCUDA/function-target.cu b/clang/test/SemaCUDA/function-target.cu
index 83dce50b4af83..48f7229df21f2 100644
--- a/clang/test/SemaCUDA/function-target.cu
+++ b/clang/test/SemaCUDA/function-target.cu
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify=dev,expected %s
 
 #include "Inputs/cuda.h"
 
@@ -23,11 +23,11 @@ __host__ void h1(void) {
 __host__ void d1h(void); // expected-note {{candidate function not viable: call to __host__ function from __device__ function}}
 __device__ void d1d(void);
 __host__ __device__ void d1hd(void);
-__global__ void d1g(void); // expected-note {{'d1g' declared here}}
+__global__ void d1g(void); // dev-note {{'d1g' declared here}}
 
 __device__ void d1(void) {
   d1h(); // expected-error {{no matching function}}
   d1d();
   d1hd();
-  d1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'd1g' in __device__ function}}
+  d1g<<<1, 1>>>(); // dev-error {{reference to __global__ function 'd1g' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/implicit-device-lambda.cu b/clang/test/SemaCUDA/implicit-device-lambda.cu
index 8e5b7ddddb8f6..d2e59b8033c31 100644
--- a/clang/test/SemaCUDA/implicit-device-lambda.cu
+++ b/clang/test/SemaCUDA/implicit-device-lambda.cu
@@ -1,5 +1,7 @@
-// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify -fsyntax-only -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
-// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify=dev,expected -fsyntax-only \
+// RUN:   -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only \
+// RUN:   -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
 
 #include "Inputs/cuda.h"
 
@@ -102,5 +104,5 @@ __device__ void foo() {
     void foo() {}
   };
   X x;
-  x.foo(); // expected-error {{reference to __host__ function 'foo' in __device__ function}}
+  x.foo(); // dev-error {{reference to __host__ function 'foo' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/method-target.cu b/clang/test/SemaCUDA/method-target.cu
index 8e17daa0c1233..85c27ce436322 100644
--- a/clang/test/SemaCUDA/method-target.cu
+++ b/clang/test/SemaCUDA/method-target.cu
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify=host,expected %s
+// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify=dev,expected %s
 
 #include "Inputs/cuda.h"
 
@@ -6,11 +7,11 @@
 // Test 1: host method called from device function
 
 struct S1 {
-  void method() {} // expected-note {{'method' declared here}}
+  void method() {} // dev-note {{'method' declared here}}
 };
 
 __device__ void foo1(S1& s) {
-  s.method(); // expected-error {{reference to __host__ function 'method' in __device__ function}}
+  s.method(); // dev-error {{reference to __host__ function 'method' in __device__ function}}
 }
 
 //------------------------------------------------------------------------------
@@ -29,22 +30,22 @@ __device__ void foo2(S2& s, int i, float f) {
 // Test 3: device method called from host function
 
 struct S3 {
-  __device__ void method() {} // expected-note {{'method' declared here}}
+  __device__ void method() {} // host-note {{'method' declared here}}
 };
 
 void foo3(S3& s) {
-  s.method(); // expected-error {{reference to __device__ function 'method' in __host__ function}}
+  s.method(); // host-error {{reference to __device__ function 'method' in __host__ function}}
 }
 
 //------------------------------------------------------------------------------
 // Test 4: device method called from host&device function
 
 struct S4 {
-  __device__ void method() {}  // expected-note {{'method' declared here}}
+  __device__ void method() {}  // host-note {{'method' declared here}}
 };
 
 __host__ __device__ void foo4(S4& s) {
-  s.method(); // expected-error {{reference to __device__ function 'method' in __host__ __device__ function}}
+  s.method(); // host-error {{reference to __device__ function 'method' in __host__ __device__ function}}
 }
 
 //------------------------------------------------------------------------------
@@ -63,9 +64,9 @@ __device__ void foo5(S5& s, S5& t) {
 // Test 6: call method through pointer
 
 struct S6 {
-  void method() {} // expected-note {{'method' declared here}};
+  void method() {} // dev-note {{'method' declared here}};
 };
 
 __device__ void foo6(S6* s) {
-  s->method(); // expected-error {{reference to __host__ function 'method' in __device__ function}}
+  s->method(); // dev-error {{reference to __host__ function 'method' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/reference-to-kernel-fn.cu b/clang/test/SemaCUDA/reference-to-kernel-fn.cu
index e502d134b0869..70a1cda6ab0c8 100644
--- a/clang/test/SemaCUDA/reference-to-kernel-fn.cu
+++ b/clang/test/SemaCUDA/reference-to-kernel-fn.cu
@@ -1,12 +1,14 @@
-// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify \
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify=host \
+// RUN:   -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify=dev \
 // RUN:   -verify-ignore-unexpected=note %s
-// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify \
-// RUN:   -verify-ignore-unexpected=note -DDEVICE %s
 
 // Check that we can reference (get a function pointer to) a __global__
 // function from the host side, but not the device side.  (We don't yet support
 // device-side kernel launches.)
 
+// host-no-diagnostics
+
 #include "Inputs/cuda.h"
 
 struct Dummy {};
@@ -17,13 +19,11 @@ typedef void (*fn_ptr_t)();
 
 __host__ __device__ fn_ptr_t get_ptr_hd() {
   return kernel;
-#ifdef DEVICE
-  // expected-error@-2 {{reference to __global__ function}}
-#endif
+  // dev-error@-1 {{reference to __global__ function}}
 }
 __host__ fn_ptr_t get_ptr_h() {
   return kernel;
 }
 __device__ fn_ptr_t get_ptr_d() {
-  return kernel;  // expected-error {{reference to __global__ function}}
+  return kernel;  // dev-error {{reference to __global__ function}}
 }

From 7598ad3ead7cbc5f754bd84a81c2c35fad5090cc Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Thu, 16 Jul 2020 15:01:01 -0400
Subject: [PATCH 619/771] [x86] add tests for FMA with FMF; NFC

---
 llvm/test/CodeGen/X86/fma.ll | 188 +++++++++++++++++++++++++++++++----
 1 file changed, 166 insertions(+), 22 deletions(-)

diff --git a/llvm/test/CodeGen/X86/fma.ll b/llvm/test/CodeGen/X86/fma.ll
index a687bfd43fa63..01b80c2dfdec1 100644
--- a/llvm/test/CodeGen/X86/fma.ll
+++ b/llvm/test/CodeGen/X86/fma.ll
@@ -10,7 +10,7 @@
 
 define float @test_f32(float %a, float %b, float %c) #0 {
 ; FMA32-LABEL: test_f32:
-; FMA32:       ## %bb.0: ## %entry
+; FMA32:       ## %bb.0:
 ; FMA32-NEXT:    pushl %eax ## encoding: [0x50]
 ; FMA32-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
 ; FMA32-NEXT:    ## xmm0 = mem[0],zero,zero,zero
@@ -24,39 +24,86 @@ define float @test_f32(float %a, float %b, float %c) #0 {
 ; FMA32-NEXT:    retl ## encoding: [0xc3]
 ;
 ; FMACALL32-LABEL: test_f32:
-; FMACALL32:       ## %bb.0: ## %entry
+; FMACALL32:       ## %bb.0:
 ; FMACALL32-NEXT:    jmp _fmaf ## TAILCALL
 ; FMACALL32-NEXT:    ## encoding: [0xeb,A]
 ; FMACALL32-NEXT:    ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1
 ;
 ; FMA64-LABEL: test_f32:
-; FMA64:       ## %bb.0: ## %entry
+; FMA64:       ## %bb.0:
 ; FMA64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
 ; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
 ; FMA64-NEXT:    retq ## encoding: [0xc3]
 ;
 ; FMACALL64-LABEL: test_f32:
-; FMACALL64:       ## %bb.0: ## %entry
+; FMACALL64:       ## %bb.0:
 ; FMACALL64-NEXT:    jmp _fmaf ## TAILCALL
 ; FMACALL64-NEXT:    ## encoding: [0xeb,A]
 ; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1
 ;
 ; AVX512-LABEL: test_f32:
-; AVX512:       ## %bb.0: ## %entry
+; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
 ; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
 ; AVX512-NEXT:    retq ## encoding: [0xc3]
 ;
 ; AVX512VL-LABEL: test_f32:
-; AVX512VL:       ## %bb.0: ## %entry
+; AVX512VL:       ## %bb.0:
 ; AVX512VL-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
 ; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
-entry:
   %call = call float @llvm.fma.f32(float %a, float %b, float %c)
   ret float %call
 }
 
+define float @test_f32_reassoc(float %a, float %b, float %c) #0 {
+; FMA32-LABEL: test_f32_reassoc:
+; FMA32:       ## %bb.0:
+; FMA32-NEXT:    pushl %eax ## encoding: [0x50]
+; FMA32-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
+; FMA32-NEXT:    ## xmm0 = mem[0],zero,zero,zero
+; FMA32-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm1 ## encoding: [0xc5,0xfa,0x10,0x4c,0x24,0x0c]
+; FMA32-NEXT:    ## xmm1 = mem[0],zero,zero,zero
+; FMA32-NEXT:    vfmadd213ss {{[0-9]+}}(%esp), %xmm0, %xmm1 ## encoding: [0xc4,0xe2,0x79,0xa9,0x4c,0x24,0x10]
+; FMA32-NEXT:    ## xmm1 = (xmm0 * xmm1) + mem
+; FMA32-NEXT:    vmovss %xmm1, (%esp) ## encoding: [0xc5,0xfa,0x11,0x0c,0x24]
+; FMA32-NEXT:    flds (%esp) ## encoding: [0xd9,0x04,0x24]
+; FMA32-NEXT:    popl %eax ## encoding: [0x58]
+; FMA32-NEXT:    retl ## encoding: [0xc3]
+;
+; FMACALL32-LABEL: test_f32_reassoc:
+; FMACALL32:       ## %bb.0:
+; FMACALL32-NEXT:    jmp _fmaf ## TAILCALL
+; FMACALL32-NEXT:    ## encoding: [0xeb,A]
+; FMACALL32-NEXT:    ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1
+;
+; FMA64-LABEL: test_f32_reassoc:
+; FMA64:       ## %bb.0:
+; FMA64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
+; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
+; FMA64-NEXT:    retq ## encoding: [0xc3]
+;
+; FMACALL64-LABEL: test_f32_reassoc:
+; FMACALL64:       ## %bb.0:
+; FMACALL64-NEXT:    jmp _fmaf ## TAILCALL
+; FMACALL64-NEXT:    ## encoding: [0xeb,A]
+; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1
+;
+; AVX512-LABEL: test_f32_reassoc:
+; AVX512:       ## %bb.0:
+; AVX512-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
+; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
+; AVX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX512VL-LABEL: test_f32_reassoc:
+; AVX512VL:       ## %bb.0:
+; AVX512VL-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xc2]
+; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
+; AVX512VL-NEXT:    retq ## encoding: [0xc3]
+  %call = call reassoc float @llvm.fma.f32(float %a, float %b, float %c)
+  ret float %call
+}
+
 define double @test_f64(double %a, double %b, double %c) #0 {
 ; FMA32-LABEL: test_f64:
 ; FMA32:       ## %bb.0: ## %entry
@@ -198,41 +245,41 @@ entry:
 define float @test_f32_cst() #0 {
 ; FMA32-LABEL: test_f32_cst:
 ; FMA32:       ## %bb.0: ## %entry
-; FMA32-NEXT:    flds LCPI3_0 ## encoding: [0xd9,0x05,A,A,A,A]
-; FMA32-NEXT:    ## fixup A - offset: 2, value: LCPI3_0, kind: FK_Data_4
+; FMA32-NEXT:    flds LCPI4_0 ## encoding: [0xd9,0x05,A,A,A,A]
+; FMA32-NEXT:    ## fixup A - offset: 2, value: LCPI4_0, kind: FK_Data_4
 ; FMA32-NEXT:    retl ## encoding: [0xc3]
 ;
 ; FMACALL32-LABEL: test_f32_cst:
 ; FMACALL32:       ## %bb.0: ## %entry
-; FMACALL32-NEXT:    flds LCPI3_0 ## encoding: [0xd9,0x05,A,A,A,A]
-; FMACALL32-NEXT:    ## fixup A - offset: 2, value: LCPI3_0, kind: FK_Data_4
+; FMACALL32-NEXT:    flds LCPI4_0 ## encoding: [0xd9,0x05,A,A,A,A]
+; FMACALL32-NEXT:    ## fixup A - offset: 2, value: LCPI4_0, kind: FK_Data_4
 ; FMACALL32-NEXT:    retl ## encoding: [0xc3]
 ;
 ; FMA64-LABEL: test_f32_cst:
 ; FMA64:       ## %bb.0: ## %entry
 ; FMA64-NEXT:    vmovss {{.*}}(%rip), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
-; FMA64-NEXT:    ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte
+; FMA64-NEXT:    ## fixup A - offset: 4, value: LCPI4_0-4, kind: reloc_riprel_4byte
 ; FMA64-NEXT:    ## xmm0 = mem[0],zero,zero,zero
 ; FMA64-NEXT:    retq ## encoding: [0xc3]
 ;
 ; FMACALL64-LABEL: test_f32_cst:
 ; FMACALL64:       ## %bb.0: ## %entry
 ; FMACALL64-NEXT:    movss {{.*}}(%rip), %xmm0 ## encoding: [0xf3,0x0f,0x10,0x05,A,A,A,A]
-; FMACALL64-NEXT:    ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte
+; FMACALL64-NEXT:    ## fixup A - offset: 4, value: LCPI4_0-4, kind: reloc_riprel_4byte
 ; FMACALL64-NEXT:    ## xmm0 = mem[0],zero,zero,zero
 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
 ;
 ; AVX512-LABEL: test_f32_cst:
 ; AVX512:       ## %bb.0: ## %entry
 ; AVX512-NEXT:    vmovss {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
-; AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte
+; AVX512-NEXT:    ## fixup A - offset: 4, value: LCPI4_0-4, kind: reloc_riprel_4byte
 ; AVX512-NEXT:    ## xmm0 = mem[0],zero,zero,zero
 ; AVX512-NEXT:    retq ## encoding: [0xc3]
 ;
 ; AVX512VL-LABEL: test_f32_cst:
 ; AVX512VL:       ## %bb.0: ## %entry
 ; AVX512VL-NEXT:    vmovss {{.*}}(%rip), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x05,A,A,A,A]
-; AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI3_0-4, kind: reloc_riprel_4byte
+; AVX512VL-NEXT:    ## fixup A - offset: 4, value: LCPI4_0-4, kind: reloc_riprel_4byte
 ; AVX512VL-NEXT:    ## xmm0 = mem[0],zero,zero,zero
 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
 entry:
@@ -1373,19 +1420,19 @@ entry:
 
 define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
 ; FMA32-LABEL: test_v2f64:
-; FMA32:       ## %bb.0: ## %entry
+; FMA32:       ## %bb.0:
 ; FMA32-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
 ; FMA32-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
 ; FMA32-NEXT:    retl ## encoding: [0xc3]
 ;
 ; FMA64-LABEL: test_v2f64:
-; FMA64:       ## %bb.0: ## %entry
+; FMA64:       ## %bb.0:
 ; FMA64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
 ; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
 ; FMA64-NEXT:    retq ## encoding: [0xc3]
 ;
 ; FMACALL64-LABEL: test_v2f64:
-; FMACALL64:       ## %bb.0: ## %entry
+; FMACALL64:       ## %bb.0:
 ; FMACALL64-NEXT:    subq $72, %rsp ## encoding: [0x48,0x83,0xec,0x48]
 ; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
 ; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x20]
@@ -1420,19 +1467,19 @@ define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %
 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
 ;
 ; AVX512-LABEL: test_v2f64:
-; AVX512:       ## %bb.0: ## %entry
+; AVX512:       ## %bb.0:
 ; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
 ; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
 ; AVX512-NEXT:    retq ## encoding: [0xc3]
 ;
 ; AVX512VL-LABEL: test_v2f64:
-; AVX512VL:       ## %bb.0: ## %entry
+; AVX512VL:       ## %bb.0:
 ; AVX512VL-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
 ; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
 ;
 ; FMACALL32_BDVER2-LABEL: test_v2f64:
-; FMACALL32_BDVER2:       ## %bb.0: ## %entry
+; FMACALL32_BDVER2:       ## %bb.0:
 ; FMACALL32_BDVER2-NEXT:    subl $108, %esp ## encoding: [0x83,0xec,0x6c]
 ; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
 ; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50]
@@ -1465,11 +1512,108 @@ define <2 x double> @test_v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %
 ; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0,1]
 ; FMACALL32_BDVER2-NEXT:    addl $108, %esp ## encoding: [0x83,0xc4,0x6c]
 ; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
-entry:
   %call = call <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %call
 }
 
+define <2 x double> @test_v2f64_reassoc(<2 x double> %a, <2 x double> %b, <2 x double> %c) #0 {
+; FMA32-LABEL: test_v2f64_reassoc:
+; FMA32:       ## %bb.0:
+; FMA32-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
+; FMA32-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
+; FMA32-NEXT:    retl ## encoding: [0xc3]
+;
+; FMA64-LABEL: test_v2f64_reassoc:
+; FMA64:       ## %bb.0:
+; FMA64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
+; FMA64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
+; FMA64-NEXT:    retq ## encoding: [0xc3]
+;
+; FMACALL64-LABEL: test_v2f64_reassoc:
+; FMACALL64:       ## %bb.0:
+; FMACALL64-NEXT:    subq $72, %rsp ## encoding: [0x48,0x83,0xec,0x48]
+; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x20]
+; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
+; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
+; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
+; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
+; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
+; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
+; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x30]
+; FMACALL64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
+; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x04,0x24]
+; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
+; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
+; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
+; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
+; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
+; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
+; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
+; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x20]
+; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
+; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
+; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
+; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
+; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
+; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x30]
+; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
+; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
+; FMACALL64-NEXT:    movaps %xmm1, %xmm0 ## encoding: [0x0f,0x28,0xc1]
+; FMACALL64-NEXT:    addq $72, %rsp ## encoding: [0x48,0x83,0xc4,0x48]
+; FMACALL64-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX512-LABEL: test_v2f64_reassoc:
+; AVX512:       ## %bb.0:
+; AVX512-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
+; AVX512-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
+; AVX512-NEXT:    retq ## encoding: [0xc3]
+;
+; AVX512VL-LABEL: test_v2f64_reassoc:
+; AVX512VL:       ## %bb.0:
+; AVX512VL-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
+; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
+; AVX512VL-NEXT:    retq ## encoding: [0xc3]
+;
+; FMACALL32_BDVER2-LABEL: test_v2f64_reassoc:
+; FMACALL32_BDVER2:       ## %bb.0:
+; FMACALL32_BDVER2-NEXT:    subl $108, %esp ## encoding: [0x83,0xec,0x6c]
+; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50]
+; FMACALL32_BDVER2-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc1]
+; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],xmm1[0]
+; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x30]
+; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
+; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x40]
+; FMACALL32_BDVER2-NEXT:    vmovlps %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x54,0x24,0x10]
+; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
+; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
+; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
+; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30]
+; FMACALL32_BDVER2-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10]
+; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
+; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
+; FMACALL32_BDVER2-NEXT:    vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
+; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x58]
+; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0,1],xmm0[2,3]
+; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
+; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28]
+; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
+; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
+; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20]
+; FMACALL32_BDVER2-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x28]
+; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
+; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x20]
+; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0,1]
+; FMACALL32_BDVER2-NEXT:    addl $108, %esp ## encoding: [0x83,0xc4,0x6c]
+; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
+  %call = call reassoc <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
+  ret <2 x double> %call
+}
+
 define <4 x double> @test_v4f64(<4 x double> %a, <4 x double> %b, <4 x double> %c) #0 {
 ; FMA32-LABEL: test_v4f64:
 ; FMA32:       ## %bb.0: ## %entry

From 2dc3d1b3136522e7c8e92d742d8ecc3405b9b4bb Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Fri, 17 Jul 2020 12:56:29 +0100
Subject: [PATCH 620/771] [AMDGPU] Add some missing check prefixes

---
 llvm/test/CodeGen/AMDGPU/perfhint.ll | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/perfhint.ll b/llvm/test/CodeGen/AMDGPU/perfhint.ll
index a8990be7f19e1..375a699e040e1 100644
--- a/llvm/test/CodeGen/AMDGPU/perfhint.ll
+++ b/llvm/test/CodeGen/AMDGPU/perfhint.ll
@@ -1,8 +1,8 @@
 ; RUN: llc -march=amdgcn < %s | FileCheck -check-prefix=GCN %s
 
 ; GCN-LABEL: {{^}}test_membound:
-; MemoryBound: 1
-; WaveLimiterHint : 1
+; GCN: MemoryBound: 1
+; GCN: WaveLimiterHint : 1
 define amdgpu_kernel void @test_membound(<4 x i32> addrspace(1)* nocapture readonly %arg, <4 x i32> addrspace(1)* nocapture %arg1) {
 bb:
   %tmp = tail call i32 @llvm.amdgcn.workitem.id.x()
@@ -50,8 +50,8 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_indirect:
-; MemoryBound: 0
-; WaveLimiterHint : 1
+; GCN: MemoryBound: 0
+; GCN: WaveLimiterHint : 1
 define amdgpu_kernel void @test_indirect(i32 addrspace(1)* nocapture %arg) {
 bb:
   %tmp = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1

From f05bce86af32d7b5cf1ab28b3abf6ee473bf3ef1 Mon Sep 17 00:00:00 2001
From: Jay Foad <jay.foad@amd.com>
Date: Fri, 17 Jul 2020 12:57:23 +0100
Subject: [PATCH 621/771] [AMDGPU] Add some missing check prefixes and tweak
 test

The test needed some extra ALU instructions to prevent it from being
memory bound.
---
 llvm/test/CodeGen/AMDGPU/perfhint.ll | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/perfhint.ll b/llvm/test/CodeGen/AMDGPU/perfhint.ll
index 375a699e040e1..1fef1423ac4f4 100644
--- a/llvm/test/CodeGen/AMDGPU/perfhint.ll
+++ b/llvm/test/CodeGen/AMDGPU/perfhint.ll
@@ -30,22 +30,25 @@ bb:
 }
 
 ; GCN-LABEL: {{^}}test_large_stride:
-; MemoryBound: 0
-; WaveLimiterHint : 1
+; GCN: MemoryBound: 0
+; GCN: WaveLimiterHint : 1
 define amdgpu_kernel void @test_large_stride(i32 addrspace(1)* nocapture %arg) {
 bb:
   %tmp = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 4096
   %tmp1 = load i32, i32 addrspace(1)* %tmp, align 4
+  %mul1 = mul i32 %tmp1, %tmp1
   %tmp2 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 1
-  store i32 %tmp1, i32 addrspace(1)* %tmp2, align 4
+  store i32 %mul1, i32 addrspace(1)* %tmp2, align 4
   %tmp3 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 8192
   %tmp4 = load i32, i32 addrspace(1)* %tmp3, align 4
+  %mul4 = mul i32 %tmp4, %tmp4
   %tmp5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 2
-  store i32 %tmp4, i32 addrspace(1)* %tmp5, align 4
+  store i32 %mul4, i32 addrspace(1)* %tmp5, align 4
   %tmp6 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 12288
   %tmp7 = load i32, i32 addrspace(1)* %tmp6, align 4
+  %mul7 = mul i32 %tmp7, %tmp7
   %tmp8 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 3
-  store i32 %tmp7, i32 addrspace(1)* %tmp8, align 4
+  store i32 %mul7, i32 addrspace(1)* %tmp8, align 4
   ret void
 }
 

From a46ef7d42dc8aa5083319bef678262bddf299f82 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Fri, 17 Jul 2020 08:09:44 -0400
Subject: [PATCH 622/771] Revert "[CUDA][HIP] Always defer diagnostics for
 wrong-sided reference"

This reverts commit 4fc752b30b9acac73a282cb844a6240e6cb70cca.
---
 clang/lib/Sema/SemaCUDA.cpp                   |  3 +-
 clang/test/SemaCUDA/builtins.cu               | 12 +++----
 .../test/SemaCUDA/call-kernel-from-kernel.cu  |  4 +--
 clang/test/SemaCUDA/function-overload.cu      | 34 +++++++++----------
 clang/test/SemaCUDA/function-target.cu        |  6 ++--
 clang/test/SemaCUDA/implicit-device-lambda.cu |  8 ++---
 clang/test/SemaCUDA/method-target.cu          | 19 +++++------
 clang/test/SemaCUDA/reference-to-kernel-fn.cu | 14 ++++----
 8 files changed, 49 insertions(+), 51 deletions(-)

diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index e2190fc42de44..283a04683a32a 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -715,8 +715,9 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
                                       CallerKnownEmitted] {
     switch (IdentifyCUDAPreference(Caller, Callee)) {
     case CFP_Never:
+      return DeviceDiagBuilder::K_Immediate;
     case CFP_WrongSide:
-      assert(Caller && "Never/wrongSide calls require a non-null caller");
+      assert(Caller && "WrongSide calls require a non-null caller");
       // If we know the caller will be emitted, we know this wrong-side call
       // will be emitted, so it's an immediate error.  Otherwise, defer the
       // error until we know the caller is emitted.
diff --git a/clang/test/SemaCUDA/builtins.cu b/clang/test/SemaCUDA/builtins.cu
index c01a687e12c09..814fda2ac7d34 100644
--- a/clang/test/SemaCUDA/builtins.cu
+++ b/clang/test/SemaCUDA/builtins.cu
@@ -7,10 +7,10 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
 // RUN:     -aux-triple nvptx64-unknown-cuda \
-// RUN:     -fsyntax-only -verify=host %s
+// RUN:     -fsyntax-only -verify %s
 // RUN: %clang_cc1 -triple nvptx64-unknown-cuda -fcuda-is-device \
 // RUN:     -aux-triple x86_64-unknown-unknown \
-// RUN:     -fsyntax-only -verify=dev %s
+// RUN:     -fsyntax-only -verify %s
 
 #if !(defined(__amd64__) && defined(__PTX__))
 #error "Expected to see preprocessor macros from both sides of compilation."
@@ -18,14 +18,14 @@
 
 void hf() {
   int x = __builtin_ia32_rdtsc();
-  int y = __nvvm_read_ptx_sreg_tid_x(); // host-note  {{'__nvvm_read_ptx_sreg_tid_x' declared here}}
-  // host-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}}
+  int y = __nvvm_read_ptx_sreg_tid_x(); // expected-note  {{'__nvvm_read_ptx_sreg_tid_x' declared here}}
+  // expected-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}}
   x = __builtin_abs(1);
 }
 
 __attribute__((device)) void df() {
   int x = __nvvm_read_ptx_sreg_tid_x();
-  int y = __builtin_ia32_rdtsc(); // dev-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
-                                  // dev-note@20 {{'__builtin_ia32_rdtsc' declared here}}
+  int y = __builtin_ia32_rdtsc(); // expected-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
+                                  // expected-note@20 {{'__builtin_ia32_rdtsc' declared here}}
   x = __builtin_abs(1);
 }
diff --git a/clang/test/SemaCUDA/call-kernel-from-kernel.cu b/clang/test/SemaCUDA/call-kernel-from-kernel.cu
index 900efcef43b80..c89037c52bff4 100644
--- a/clang/test/SemaCUDA/call-kernel-from-kernel.cu
+++ b/clang/test/SemaCUDA/call-kernel-from-kernel.cu
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s --std=c++11 -triple nvptx -emit-llvm -o - \
-// RUN:   -verify -fcuda-is-device -fsyntax-only -verify-ignore-unexpected=note
+// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
+// RUN:   -verify -fsyntax-only -verify-ignore-unexpected=note
 
 #include "Inputs/cuda.h"
 
diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu
index 191268c9a5f14..b9efd1c09e699 100644
--- a/clang/test/SemaCUDA/function-overload.cu
+++ b/clang/test/SemaCUDA/function-overload.cu
@@ -1,8 +1,8 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=host,expected %s
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=dev,expected %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
 
 #include "Inputs/cuda.h"
 
@@ -75,37 +75,37 @@ extern "C" __host__ __device__ int chhd2() { return 0; }
 
 // Helper functions to verify calling restrictions.
 __device__ DeviceReturnTy d() { return DeviceReturnTy(); }
-// host-note@-1 1+ {{'d' declared here}}
+// expected-note@-1 1+ {{'d' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
 
 __host__ HostReturnTy h() { return HostReturnTy(); }
-// dev-note@-1 1+ {{'h' declared here}}
+// expected-note@-1 1+ {{'h' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}}
 
 __global__ void g() {}
-// dev-note@-1 1+ {{'g' declared here}}
+// expected-note@-1 1+ {{'g' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __global__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __global__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __global__ function from __global__ function}}
 
 extern "C" __device__ DeviceReturnTy cd() { return DeviceReturnTy(); }
-// host-note@-1 1+ {{'cd' declared here}}
+// expected-note@-1 1+ {{'cd' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
 
 extern "C" __host__ HostReturnTy ch() { return HostReturnTy(); }
-// dev-note@-1 1+ {{'ch' declared here}}
+// expected-note@-1 1+ {{'ch' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}}
 
 __host__ void hostf() {
-  DeviceFnPtr fp_d = d;         // host-error {{reference to __device__ function 'd' in __host__ function}}
+  DeviceFnPtr fp_d = d;         // expected-error {{reference to __device__ function 'd' in __host__ function}}
   DeviceReturnTy ret_d = d();   // expected-error {{no matching function for call to 'd'}}
-  DeviceFnPtr fp_cd = cd;       // host-error {{reference to __device__ function 'cd' in __host__ function}}
+  DeviceFnPtr fp_cd = cd;       // expected-error {{reference to __device__ function 'cd' in __host__ function}}
   DeviceReturnTy ret_cd = cd(); // expected-error {{no matching function for call to 'cd'}}
 
   HostFnPtr fp_h = h;
@@ -129,9 +129,9 @@ __device__ void devicef() {
   DeviceFnPtr fp_cd = cd;
   DeviceReturnTy ret_cd = cd();
 
-  HostFnPtr fp_h = h;         // dev-error {{reference to __host__ function 'h' in __device__ function}}
+  HostFnPtr fp_h = h;         // expected-error {{reference to __host__ function 'h' in __device__ function}}
   HostReturnTy ret_h = h();   // expected-error {{no matching function for call to 'h'}}
-  HostFnPtr fp_ch = ch;       // dev-error {{reference to __host__ function 'ch' in __device__ function}}
+  HostFnPtr fp_ch = ch;       // expected-error {{reference to __host__ function 'ch' in __device__ function}}
   HostReturnTy ret_ch = ch(); // expected-error {{no matching function for call to 'ch'}}
 
   DeviceFnPtr fp_dh = dh;
@@ -139,9 +139,9 @@ __device__ void devicef() {
   DeviceFnPtr fp_cdh = cdh;
   DeviceReturnTy ret_cdh = cdh();
 
-  GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __device__ function}}
+  GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __device__ function}}
   g(); // expected-error {{no matching function for call to 'g'}}
-  g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __device__ function}}
+  g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __device__ function}}
 }
 
 __global__ void globalf() {
@@ -150,9 +150,9 @@ __global__ void globalf() {
   DeviceFnPtr fp_cd = cd;
   DeviceReturnTy ret_cd = cd();
 
-  HostFnPtr fp_h = h;         // dev-error {{reference to __host__ function 'h' in __global__ function}}
+  HostFnPtr fp_h = h;         // expected-error {{reference to __host__ function 'h' in __global__ function}}
   HostReturnTy ret_h = h();   // expected-error {{no matching function for call to 'h'}}
-  HostFnPtr fp_ch = ch;       // dev-error {{reference to __host__ function 'ch' in __global__ function}}
+  HostFnPtr fp_ch = ch;       // expected-error {{reference to __host__ function 'ch' in __global__ function}}
   HostReturnTy ret_ch = ch(); // expected-error {{no matching function for call to 'ch'}}
 
   DeviceFnPtr fp_dh = dh;
@@ -160,9 +160,9 @@ __global__ void globalf() {
   DeviceFnPtr fp_cdh = cdh;
   DeviceReturnTy ret_cdh = cdh();
 
-  GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __global__ function}}
+  GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __global__ function}}
   g(); // expected-error {{no matching function for call to 'g'}}
-  g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __global__ function}}
+  g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __global__ function}}
 }
 
 __host__ __device__ void hostdevicef() {
diff --git a/clang/test/SemaCUDA/function-target.cu b/clang/test/SemaCUDA/function-target.cu
index 48f7229df21f2..83dce50b4af83 100644
--- a/clang/test/SemaCUDA/function-target.cu
+++ b/clang/test/SemaCUDA/function-target.cu
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify=dev,expected %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
 
 #include "Inputs/cuda.h"
 
@@ -23,11 +23,11 @@ __host__ void h1(void) {
 __host__ void d1h(void); // expected-note {{candidate function not viable: call to __host__ function from __device__ function}}
 __device__ void d1d(void);
 __host__ __device__ void d1hd(void);
-__global__ void d1g(void); // dev-note {{'d1g' declared here}}
+__global__ void d1g(void); // expected-note {{'d1g' declared here}}
 
 __device__ void d1(void) {
   d1h(); // expected-error {{no matching function}}
   d1d();
   d1hd();
-  d1g<<<1, 1>>>(); // dev-error {{reference to __global__ function 'd1g' in __device__ function}}
+  d1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'd1g' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/implicit-device-lambda.cu b/clang/test/SemaCUDA/implicit-device-lambda.cu
index d2e59b8033c31..8e5b7ddddb8f6 100644
--- a/clang/test/SemaCUDA/implicit-device-lambda.cu
+++ b/clang/test/SemaCUDA/implicit-device-lambda.cu
@@ -1,7 +1,5 @@
-// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify=dev,expected -fsyntax-only \
-// RUN:   -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
-// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only \
-// RUN:   -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify -fsyntax-only -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
 
 #include "Inputs/cuda.h"
 
@@ -104,5 +102,5 @@ __device__ void foo() {
     void foo() {}
   };
   X x;
-  x.foo(); // dev-error {{reference to __host__ function 'foo' in __device__ function}}
+  x.foo(); // expected-error {{reference to __host__ function 'foo' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/method-target.cu b/clang/test/SemaCUDA/method-target.cu
index 85c27ce436322..8e17daa0c1233 100644
--- a/clang/test/SemaCUDA/method-target.cu
+++ b/clang/test/SemaCUDA/method-target.cu
@@ -1,5 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify=host,expected %s
-// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify=dev,expected %s
+// RUN: %clang_cc1 -fsyntax-only -verify %s
 
 #include "Inputs/cuda.h"
 
@@ -7,11 +6,11 @@
 // Test 1: host method called from device function
 
 struct S1 {
-  void method() {} // dev-note {{'method' declared here}}
+  void method() {} // expected-note {{'method' declared here}}
 };
 
 __device__ void foo1(S1& s) {
-  s.method(); // dev-error {{reference to __host__ function 'method' in __device__ function}}
+  s.method(); // expected-error {{reference to __host__ function 'method' in __device__ function}}
 }
 
 //------------------------------------------------------------------------------
@@ -30,22 +29,22 @@ __device__ void foo2(S2& s, int i, float f) {
 // Test 3: device method called from host function
 
 struct S3 {
-  __device__ void method() {} // host-note {{'method' declared here}}
+  __device__ void method() {} // expected-note {{'method' declared here}}
 };
 
 void foo3(S3& s) {
-  s.method(); // host-error {{reference to __device__ function 'method' in __host__ function}}
+  s.method(); // expected-error {{reference to __device__ function 'method' in __host__ function}}
 }
 
 //------------------------------------------------------------------------------
 // Test 4: device method called from host&device function
 
 struct S4 {
-  __device__ void method() {}  // host-note {{'method' declared here}}
+  __device__ void method() {}  // expected-note {{'method' declared here}}
 };
 
 __host__ __device__ void foo4(S4& s) {
-  s.method(); // host-error {{reference to __device__ function 'method' in __host__ __device__ function}}
+  s.method(); // expected-error {{reference to __device__ function 'method' in __host__ __device__ function}}
 }
 
 //------------------------------------------------------------------------------
@@ -64,9 +63,9 @@ __device__ void foo5(S5& s, S5& t) {
 // Test 6: call method through pointer
 
 struct S6 {
-  void method() {} // dev-note {{'method' declared here}};
+  void method() {} // expected-note {{'method' declared here}};
 };
 
 __device__ void foo6(S6* s) {
-  s->method(); // dev-error {{reference to __host__ function 'method' in __device__ function}}
+  s->method(); // expected-error {{reference to __host__ function 'method' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/reference-to-kernel-fn.cu b/clang/test/SemaCUDA/reference-to-kernel-fn.cu
index 70a1cda6ab0c8..e502d134b0869 100644
--- a/clang/test/SemaCUDA/reference-to-kernel-fn.cu
+++ b/clang/test/SemaCUDA/reference-to-kernel-fn.cu
@@ -1,14 +1,12 @@
-// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify=host \
-// RUN:   -verify-ignore-unexpected=note %s
-// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify=dev \
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify \
 // RUN:   -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify \
+// RUN:   -verify-ignore-unexpected=note -DDEVICE %s
 
 // Check that we can reference (get a function pointer to) a __global__
 // function from the host side, but not the device side.  (We don't yet support
 // device-side kernel launches.)
 
-// host-no-diagnostics
-
 #include "Inputs/cuda.h"
 
 struct Dummy {};
@@ -19,11 +17,13 @@ typedef void (*fn_ptr_t)();
 
 __host__ __device__ fn_ptr_t get_ptr_hd() {
   return kernel;
-  // dev-error@-1 {{reference to __global__ function}}
+#ifdef DEVICE
+  // expected-error@-2 {{reference to __global__ function}}
+#endif
 }
 __host__ fn_ptr_t get_ptr_h() {
   return kernel;
 }
 __device__ fn_ptr_t get_ptr_d() {
-  return kernel;  // dev-error {{reference to __global__ function}}
+  return kernel;  // expected-error {{reference to __global__ function}}
 }

From de0c6bd56b41081f1b89a1c7a0bf2597fd6d0104 Mon Sep 17 00:00:00 2001
From: Adrian Kuegel <akuegel@google.com>
Date: Fri, 17 Jul 2020 14:21:13 +0200
Subject: [PATCH 623/771] Add -o /dev/null to make it explicit that we don't
 care about the compiler output.

---
 clang/test/Driver/aarch64-sve-vector-bits.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/clang/test/Driver/aarch64-sve-vector-bits.c b/clang/test/Driver/aarch64-sve-vector-bits.c
index b7138d4a0772c..c3d0d05bb9b6d 100644
--- a/clang/test/Driver/aarch64-sve-vector-bits.c
+++ b/clang/test/Driver/aarch64-sve-vector-bits.c
@@ -45,8 +45,8 @@
 
 // Error if using attribute without -msve-vector-bits
 // -----------------------------------------------------------------------------
-// RUN: not %clang -c %s -target aarch64-none-linux-gnu -march=armv8-a+sve \
-// RUN:  2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s
+// RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
+// RUN:  -march=armv8-a+sve 2>&1 | FileCheck --check-prefix=CHECK-NO-FLAG-ERROR %s
 
 typedef __SVInt32_t svint32_t;
 typedef svint32_t noflag __attribute__((arm_sve_vector_bits(256)));
@@ -55,8 +55,8 @@ typedef svint32_t noflag __attribute__((arm_sve_vector_bits(256)));
 
 // Error if attribute vector size != -msve-vector-bits
 // -----------------------------------------------------------------------------
-// RUN: not %clang -c %s -target aarch64-none-linux-gnu -march=armv8-a+sve \
-// RUN:  -msve-vector-bits=128 2>&1 | FileCheck --check-prefix=CHECK-BAD-VECTOR-SIZE-ERROR %s
+// RUN: not %clang -c %s -o /dev/null -target aarch64-none-linux-gnu \
+// RUN:  -march=armv8-a+sve -msve-vector-bits=128 2>&1 | FileCheck --check-prefix=CHECK-BAD-VECTOR-SIZE-ERROR %s
 
 typedef svint32_t bad_vector_size __attribute__((arm_sve_vector_bits(256)));
 

From 6227f04a09f664a45e7fa75304c636f59a115fa9 Mon Sep 17 00:00:00 2001
From: Georgii Rymar <grimar@accesssoftek.com>
Date: Thu, 16 Jul 2020 18:07:33 +0300
Subject: [PATCH 624/771] [llvm-readobj] - Add proper testing for the
 SHT_MIPS_ABIFLAGS section.

This rewrites the mips-abiflags.test to stop using recompiled objects,
adds testing for all missed bits and also adds two missing enum values
to lib/ObjectYAML, which are used in the new test.

Differential revision: https://reviews.llvm.org/D83954
---
 llvm/lib/ObjectYAML/ELFYAML.cpp               |   2 +
 .../ELF/Inputs/abiflags.obj.elf-mips          | Bin 920 -> 0 bytes
 .../ELF/Inputs/abiflags.obj.elf-mipsel        | Bin 1320 -> 0 bytes
 .../tools/llvm-readobj/ELF/mips-abiflags.test | 385 ++++++++++++++----
 4 files changed, 316 insertions(+), 71 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/abiflags.obj.elf-mips
 delete mode 100644 llvm/test/tools/llvm-readobj/ELF/Inputs/abiflags.obj.elf-mipsel

diff --git a/llvm/lib/ObjectYAML/ELFYAML.cpp b/llvm/lib/ObjectYAML/ELFYAML.cpp
index 2353b34f188b1..f85d6a5d30efa 100644
--- a/llvm/lib/ObjectYAML/ELFYAML.cpp
+++ b/llvm/lib/ObjectYAML/ELFYAML.cpp
@@ -823,6 +823,8 @@ void ScalarBitSetTraits<ELFYAML::MIPS_AFL_ASE>::bitset(
   BCase(MIPS16);
   BCase(MICROMIPS);
   BCase(XPA);
+  BCase(CRC);
+  BCase(GINV);
 #undef BCase
 }
 
diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/abiflags.obj.elf-mips b/llvm/test/tools/llvm-readobj/ELF/Inputs/abiflags.obj.elf-mips
deleted file mode 100644
index edcd50becec4ebf60ebad1373409139051db2b51..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 920
zcma)5!AiqG5S_HOjaaeX1TT70MD|oYE2vOVD0uL^v1uAGX-T%AC-Vt@hDVS7mIuLG
ze3N9t5=A>@cyHdk&hBjK^X2uG({iW|hg@)5cu#21rD5SXq&}_JUXVUMtHmCl(k{I&
zV{yhg$W{<9i>lXYlS-c=qdlNWN}LHZ{-Yr^(MFT<(v(!<2Vp#&le!s=Z<Y4K>4OfS
zFD418XySXzfYs9>DIB4+F-hnxji3H{<9g=~<sY^Km#urOqi!|&)n28Fq6fzBSVs2a
z`yFX9!oSw2yN%kVNQbeyFTn1tz<1%{`^(HT@Bvan_&x;o$u46tR4Bd!)V$k&zT-uC
zro0u;+o@(T@EdS_0^bo(e6vai6v1b)?*eK64jF1Y<V{Bq|ME%iZl33#)_@FLc4S$6
p+u*V*>o3TFNfqK_lpVPBg?vM9RLS?_`TYN}<~gsZH<1#>yl?i+J4OHi

diff --git a/llvm/test/tools/llvm-readobj/ELF/Inputs/abiflags.obj.elf-mipsel b/llvm/test/tools/llvm-readobj/ELF/Inputs/abiflags.obj.elf-mipsel
deleted file mode 100644
index bb5f43d286239a7a70e7b7904de9bf255904fa3b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 1320
zcmbVMJx{|h5Ix6jDFQ?hOC=^IDug^C2Bt88R0&WeRu+eb5~*KmT&Ya(3lKknzsSHG
z&W*2$k943{HFx*!o}YbA`*wYM69@rh0-8`{gaUMeJZ`|pd$<fk*oEz)!9}9;2U-J!
z^&kLoh*gP#q<OWEz+gNFv4ufRfJ$FSMn~w)EcY)InRsD9^|jHUqBI3{*X=#1$<!p1
zF=aYRo`-sX>9jusHR{LFe4r4$I0uC~N*gmvqPdCFHT%qAyT2NIXH`vdwR^uR!41ua
zLE@*%B-k_i)?B7E9ls`VM^z@FUIn9SV-23Gc*T)U#_9>I^Y9ImBjnjr@EwG1y_WOj
zcz%op%jc8p5rKKmKf$^^<)-{I<n94X@kYI2k$#8iQxo0XCDu=KzSp^MS=&Pk5yHDz
zr?twhpXH<!^Ec!EpM0+Mi2R={o0KG$A?fpZbN&H#^V!=4zU%{Sn7h@J^KD-BYiha1
art>@4XTLOmnxFlrs?u+}Me%Mu<NCi!2Rjo0

diff --git a/llvm/test/tools/llvm-readobj/ELF/mips-abiflags.test b/llvm/test/tools/llvm-readobj/ELF/mips-abiflags.test
index 54797bfa76f62..1a8affa079843 100644
--- a/llvm/test/tools/llvm-readobj/ELF/mips-abiflags.test
+++ b/llvm/test/tools/llvm-readobj/ELF/mips-abiflags.test
@@ -1,71 +1,314 @@
-RUN: llvm-readobj -A %p/Inputs/abiflags.obj.elf-mipsel | \
-RUN:   FileCheck -check-prefix=EL64 %s
-RUN: llvm-readobj -A %p/Inputs/abiflags.obj.elf-mips | \
-RUN:   FileCheck -check-prefix=BE32 %s
-
-RUN: llvm-readelf -A %p/Inputs/abiflags.obj.elf-mipsel | \
-RUN:   FileCheck -check-prefix=GNU-EL64 %s
-RUN: llvm-readelf -A %p/Inputs/abiflags.obj.elf-mips | \
-RUN:   FileCheck -check-prefix=GNU-BE32 %s
-
-EL64:      MIPS ABI Flags {
-EL64-NEXT:   Version: 0
-EL64-NEXT:   ISA: MIPS64r5
-EL64-NEXT:   ISA Extension: Cavium Networks Octeon3 (0x13)
-EL64-NEXT:   ASEs [ (0x103)
-EL64-NEXT:     DSP (0x1)
-EL64-NEXT:     DSPR2 (0x2)
-EL64-NEXT:     VZ (0x100)
-EL64-NEXT:   ]
-EL64-NEXT:   FP ABI: Hard float (double precision) (0x1)
-EL64-NEXT:   GPR size: 64
-EL64-NEXT:   CPR1 size: 64
-EL64-NEXT:   CPR2 size: 0
-EL64-NEXT:   Flags 1 [ (0x1)
-EL64-NEXT:     ODDSPREG (0x1)
-EL64-NEXT:   ]
-EL64-NEXT:   Flags 2: 0x0
-EL64-NEXT: }
-
-BE32:      MIPS ABI Flags {
-BE32-NEXT:   Version: 0
-BE32-NEXT:   ISA: MIPS32r2
-BE32-NEXT:   ISA Extension: None (0x0)
-BE32-NEXT:   ASEs [ (0x803)
-BE32-NEXT:     DSP (0x1)
-BE32-NEXT:     DSPR2 (0x2)
-BE32-NEXT:     microMIPS (0x800)
-BE32-NEXT:   ]
-BE32-NEXT:   FP ABI: Soft float (0x3)
-BE32-NEXT:   GPR size: 32
-BE32-NEXT:   CPR1 size: 0
-BE32-NEXT:   CPR2 size: 0
-BE32-NEXT:   Flags 1 [ (0x1)
-BE32-NEXT:     ODDSPREG (0x1)
-BE32-NEXT:   ]
-BE32-NEXT:   Flags 2: 0x0
-BE32-NEXT: }
-
-GNU-EL64:       MIPS ABI Flags Version: 0
-GNU-EL64-EMPTY:
-GNU-EL64-NEXT:  ISA: MIPS64r5
-GNU-EL64-NEXT:  GPR size: 64
-GNU-EL64-NEXT:  CPR1 size: 64
-GNU-EL64-NEXT:  CPR2 size: 0
-GNU-EL64-NEXT:  FP ABI: Hard float (double precision)
-GNU-EL64-NEXT:  ISA Extension: Cavium Networks Octeon3
-GNU-EL64-NEXT:  ASEs: DSP, DSPR2, VZ
-GNU-EL64-NEXT:  FLAGS 1: 00000001
-GNU-EL64-NEXT:  FLAGS 2: 00000000
-
-GNU-BE32:       MIPS ABI Flags Version: 0
-GNU-BE32-EMPTY:
-GNU-BE32-NEXT:  ISA: MIPS32r2
-GNU-BE32-NEXT:  GPR size: 32
-GNU-BE32-NEXT:  CPR1 size: 0
-GNU-BE32-NEXT:  CPR2 size: 0
-GNU-BE32-NEXT:  FP ABI: Soft float
-GNU-BE32-NEXT:  ISA Extension: None
-GNU-BE32-NEXT:  ASEs: DSP, DSPR2, microMIPS
-GNU-BE32-NEXT:  FLAGS 1: 00000001
-GNU-BE32-NEXT:  FLAGS 2: 00000000
+## Check that we are able to dump the SHT_MIPS_ABIFLAGS section using -A properly.
+
+## Show how the full output looks like, check the formatting and the output order.
+# RUN: yaml2obj %s -DBITS=32 -DENCODE=LSB -o %t.le32
+# RUN: llvm-readelf -A %t.le32 | \
+# RUN:   FileCheck %s --check-prefix=GNU --strict-whitespace --match-full-lines
+# RUN: yaml2obj %s -DBITS=32 -DENCODE=MSB -o %t.be32
+# RUN: llvm-readelf -A %t.be32 | \
+# RUN:   FileCheck %s --check-prefix=GNU --strict-whitespace --match-full-lines
+# RUN: yaml2obj %s -DBITS=64 -DENCODE=LSB -o %t.le64
+# RUN: llvm-readelf -A %t.le64 | \
+# RUN:   FileCheck %s --check-prefix=GNU --strict-whitespace --match-full-lines
+# RUN: yaml2obj %s -DBITS=64 -DENCODE=MSB -o %t.be64
+# RUN: llvm-readelf -A %t.be64 | \
+# RUN:   FileCheck %s --check-prefix=GNU --strict-whitespace --match-full-lines
+
+#       GNU:MIPS ABI Flags Version: 101
+# GNU-EMPTY:
+#  GNU-NEXT:ISA: MIPS32r102
+#  GNU-NEXT:GPR size: 32
+#  GNU-NEXT:CPR1 size: 64
+#  GNU-NEXT:CPR2 size: 128
+#  GNU-NEXT:FP ABI: Soft float
+#  GNU-NEXT:ISA Extension: Broadcom SB-1
+#  GNU-NEXT:ASEs: DSP, DSPR2
+#  GNU-NEXT:FLAGS 1: 00000001
+#  GNU-NEXT:FLAGS 2: ffffffff
+
+# RUN: llvm-readobj -A %t.le32 | FileCheck %s --check-prefix=LLVM
+# RUN: llvm-readobj -A %t.be32 | FileCheck %s --check-prefix=LLVM
+# RUN: llvm-readobj -A %t.le64 | FileCheck %s --check-prefix=LLVM
+# RUN: llvm-readobj -A %t.be64 | FileCheck %s --check-prefix=LLVM
+
+# LLVM:      MIPS ABI Flags {
+# LLVM-NEXT:   Version: 101
+# LLVM-NEXT:   ISA: MIPS32r102
+# LLVM-NEXT:   ISA Extension: Broadcom SB-1 (0xC)
+# LLVM-NEXT:   ASEs [ (0x3)
+# LLVM-NEXT:     DSP (0x1)
+# LLVM-NEXT:     DSPR2 (0x2)
+# LLVM-NEXT:   ]
+# LLVM-NEXT:   FP ABI: Soft float (0x3)
+# LLVM-NEXT:   GPR size: 32
+# LLVM-NEXT:   CPR1 size: 64
+# LLVM-NEXT:   CPR2 size: 128
+# LLVM-NEXT:   Flags 1 [ (0x1)
+# LLVM-NEXT:     ODDSPREG (0x1)
+# LLVM-NEXT:   ]
+# LLVM-NEXT:   Flags 2: 0xFFFFFFFF
+# LLVM-NEXT: }
+
+--- !ELF
+FileHeader:
+  Class:   ELFCLASS[[BITS=64]]
+  Data:    ELFDATA2[[ENCODE=LSB]]
+  Type:    ET_REL
+  Machine: EM_MIPS
+Sections:
+## Set arbitrary default values.
+  - Name:         .MIPS.abiflags
+    Type:         SHT_MIPS_ABIFLAGS
+    Version:      [[VERSION=101]]
+    ISA:          [[ISA=MIPS32]]
+    ISARevision:  [[ISAREV=102]]
+    ISAExtension: EXT_SB1
+    ASEs:         [ [[ASES=DSP,DSPR2]] ]
+    FpABI:        [[FPABI=FP_SOFT]]
+    GPRSize:      [[GPR=REG_32]]
+    CPR1Size:     [[CPR1=REG_64]]
+    CPR2Size:     [[CPR2=REG_128]]
+    Flags1:       [ [[FLAG1=ODDSPREG]] ]
+    Flags2:       [[FLAG2=0xffffffff]]
+
+## Check that we are able to dump the version properly.
+## Document we do not report warnings for any version.
+# RUN: yaml2obj %s -DVERSION=0 -o %t.version.a
+# RUN: llvm-readelf -A %t.version.a | \
+# RUN:   FileCheck %s --check-prefix=GNU-VERSION-A --implicit-check-not=warning:
+# RUN: llvm-readobj -A %t.version.a | \
+# RUN:   FileCheck %s --check-prefix=LLVM-VERSION-A --implicit-check-not=warning:
+
+# GNU-VERSION-A: MIPS ABI Flags Version: 0
+
+# LLVM-VERSION-A:      MIPS ABI Flags {
+# LLVM-VERSION-A-NEXT:   Version: 0
+
+# RUN: yaml2obj %s -DVERSION=0xffff -o %t.version.b
+# RUN: llvm-readelf -A %t.version.b | \
+# RUN:   FileCheck %s --check-prefix=GNU-VERSION-B --implicit-check-not=warning:
+# RUN: llvm-readobj -A %t.version.b | \
+# RUN:   FileCheck %s --check-prefix=LLVM-VERSION-B --implicit-check-not=warning:
+
+# GNU-VERSION-B: MIPS ABI Flags Version: 65535
+
+# LLVM-VERSION-B:      MIPS ABI Flags {
+# LLVM-VERSION-B-NEXT:   Version: 65535
+
+## Check how we dump isa_level and isa_rev fields.
+
+# RUN: yaml2obj %s -DISAREV=1 -DISA=MIPS1 -o %t.isa1
+# RUN: llvm-readelf -A %t.isa1 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS1
+# RUN: llvm-readobj -A %t.isa1 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS1
+
+# CHECK-ISA: ISA: [[VAL]]{{$}}
+
+# RUN: yaml2obj %s -DISAREV=1 -DISA=MIPS2 -o %t.isa2
+# RUN: llvm-readelf -A %t.isa2 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS2
+# RUN: llvm-readobj -A %t.isa2 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS2
+
+# RUN: yaml2obj %s -DISAREV=1 -DISA=MIPS3 -o %t.isa3
+# RUN: llvm-readelf -A %t.isa3 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS3
+# RUN: llvm-readobj -A %t.isa3 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS3
+
+# RUN: yaml2obj %s -DISAREV=1 -DISA=MIPS4 -o %t.isa4
+# RUN: llvm-readelf -A %t.isa4 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS4
+# RUN: llvm-readobj -A %t.isa4 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS4
+
+# RUN: yaml2obj %s -DISAREV=1 -DISA=MIPS5 -o %t.isa5
+# RUN: llvm-readelf -A %t.isa5 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS5
+# RUN: llvm-readobj -A %t.isa5 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS5
+
+# RUN: yaml2obj %s -DISAREV=1 -DISA=MIPS32 -o %t.isa32
+# RUN: llvm-readelf -A %t.isa32 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS32
+# RUN: llvm-readobj -A %t.isa32 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS32
+
+# RUN: yaml2obj %s -DISAREV=1 -DISA=MIPS64 -o %t.isa64
+# RUN: llvm-readelf -A %t.isa64 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS64
+# RUN: llvm-readobj -A %t.isa64 | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS64
+
+## Check that isa_rev is only dumped when its value > 1.
+# RUN: yaml2obj %s -DISAREV=2 -DISA=MIPS1 -o %t.isa1.r
+# RUN: llvm-readelf -A %t.isa1.r | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS1r2
+# RUN: llvm-readobj -A %t.isa1.r | FileCheck %s --check-prefix=CHECK-ISA -DVAL=MIPS1r2
+
+## Check how we dump the gpr_size field.
+
+# RUN: yaml2obj %s -DGPR=REG_NONE -o %t.gpr.none
+# RUN: llvm-readelf -A %t.gpr.none | FileCheck %s --check-prefix=GPRSIZE -DVAL=0
+# RUN: llvm-readobj -A %t.gpr.none | FileCheck %s --check-prefix=GPRSIZE -DVAL=0
+
+# GPRSIZE: GPR size: [[VAL]]{{$}}
+
+# RUN: yaml2obj %s -DGPR=REG_32 -o %t.gpr.32
+# RUN: llvm-readelf -A %t.gpr.32 | FileCheck %s --check-prefix=GPRSIZE -DVAL=32
+# RUN: llvm-readobj -A %t.gpr.32 | FileCheck %s --check-prefix=GPRSIZE -DVAL=32
+
+# RUN: yaml2obj %s -DGPR=REG_64 -o %t.gpr.64
+# RUN: llvm-readelf -A %t.gpr.64 | FileCheck %s --check-prefix=GPRSIZE -DVAL=64
+# RUN: llvm-readobj -A %t.gpr.64 | FileCheck %s --check-prefix=GPRSIZE -DVAL=64
+
+# RUN: yaml2obj %s -DGPR=REG_128 -o %t.gpr.128
+# RUN: llvm-readelf -A %t.gpr.128 | FileCheck %s --check-prefix=GPRSIZE -DVAL=128
+# RUN: llvm-readobj -A %t.gpr.128 | FileCheck %s --check-prefix=GPRSIZE -DVAL=128
+
+## Check how we dump the cpr1_size field.
+
+# RUN: yaml2obj %s -DCPR1=REG_NONE -o %t.cpr1.none
+# RUN: llvm-readelf -A %t.cpr1.none | FileCheck %s --check-prefix=CPR1SIZE -DVAL=0
+# RUN: llvm-readobj -A %t.cpr1.none | FileCheck %s --check-prefix=CPR1SIZE -DVAL=0
+
+# CPR1SIZE: CPR1 size: [[VAL]]{{$}}
+
+# RUN: yaml2obj %s -DCPR1=REG_32 -o %t.cpr1.32
+# RUN: llvm-readelf -A %t.cpr1.32 | FileCheck %s --check-prefix=CPR1SIZE -DVAL=32
+# RUN: llvm-readobj -A %t.cpr1.32 | FileCheck %s --check-prefix=CPR1SIZE -DVAL=32
+
+# RUN: yaml2obj %s -DCPR1=REG_64 -o %t.cpr1.64
+# RUN: llvm-readelf -A %t.cpr1.64 | FileCheck %s --check-prefix=CPR1SIZE -DVAL=64
+# RUN: llvm-readobj -A %t.cpr1.64 | FileCheck %s --check-prefix=CPR1SIZE -DVAL=64
+
+# RUN: yaml2obj %s -DCPR1=REG_128 -o %t.cpr1.128
+# RUN: llvm-readelf -A %t.cpr1.128 | FileCheck %s --check-prefix=CPR1SIZE -DVAL=128
+# RUN: llvm-readobj -A %t.cpr1.128 | FileCheck %s --check-prefix=CPR1SIZE -DVAL=128
+
+## Check how we dump the cpr2_size field.
+
+# RUN: yaml2obj %s -DCPR2=REG_NONE -o %t.cpr2.none
+# RUN: llvm-readelf -A %t.cpr2.none | FileCheck %s --check-prefix=CPR2SIZE -DVAL=0
+# RUN: llvm-readobj -A %t.cpr2.none | FileCheck %s --check-prefix=CPR2SIZE -DVAL=0
+
+# CPR2SIZE: CPR2 size: [[VAL]]{{$}}
+
+# RUN: yaml2obj %s -DCPR2=REG_32 -o %t.cpr2.32
+# RUN: llvm-readelf -A %t.cpr2.32 | FileCheck %s --check-prefix=CPR2SIZE -DVAL=32
+# RUN: llvm-readobj -A %t.cpr2.32 | FileCheck %s --check-prefix=CPR2SIZE -DVAL=32
+
+# RUN: yaml2obj %s -DCPR2=REG_64 -o %t.cpr2.64
+# RUN: llvm-readelf -A %t.cpr2.64 | FileCheck %s --check-prefix=CPR2SIZE -DVAL=64
+# RUN: llvm-readobj -A %t.cpr2.64 | FileCheck %s --check-prefix=CPR2SIZE -DVAL=64
+
+# RUN: yaml2obj %s -DCPR2=REG_128 -o %t.cpr2.128
+# RUN: llvm-readelf -A %t.cpr2.128 | FileCheck %s --check-prefix=CPR2SIZE -DVAL=128
+# RUN: llvm-readobj -A %t.cpr2.128 | FileCheck %s --check-prefix=CPR2SIZE -DVAL=128
+
+## Check how we dump the fp_abi field.
+
+# RUN: yaml2obj %s -DFPABI=FP_ANY -o %t.fpabi.any
+# RUN: llvm-readelf -A %t.fpabi.any | FileCheck %s --check-prefix=FPABI-ANY
+# RUN: llvm-readobj -A %t.fpabi.any | FileCheck %s --check-prefix=FPABI-ANY
+
+# FPABI-ANY: FP ABI: Hard or soft float
+
+# RUN: yaml2obj %s -DFPABI=FP_DOUBLE -o %t.fpabi.double
+# RUN: llvm-readelf -A %t.fpabi.double | FileCheck %s --check-prefix=FPABI-DOUBLE
+# RUN: llvm-readobj -A %t.fpabi.double | FileCheck %s --check-prefix=FPABI-DOUBLE
+
+# FPABI-DOUBLE: FP ABI: Hard float (double precision)
+
+# RUN: yaml2obj %s -DFPABI=FP_SINGLE -o %t.fpabi.single
+# RUN: llvm-readelf -A %t.fpabi.single | FileCheck %s --check-prefix=FPABI-SINGLE
+# RUN: llvm-readobj -A %t.fpabi.single | FileCheck %s --check-prefix=FPABI-SINGLE
+
+# FPABI-SINGLE: FP ABI: Hard float (single precision)
+
+# RUN: yaml2obj %s -DFPABI=FP_SOFT -o %t.fpabi.soft
+# RUN: llvm-readelf -A %t.fpabi.soft | FileCheck %s --check-prefix=FPABI-SOFT
+# RUN: llvm-readobj -A %t.fpabi.soft | FileCheck %s --check-prefix=FPABI-SOFT
+
+# FPABI-SOFT: FP ABI: Soft float
+
+# RUN: yaml2obj %s -DFPABI=FP_OLD_64 -o %t.fpabi.old64
+# RUN: llvm-readelf -A %t.fpabi.old64 | FileCheck %s --check-prefix=FPABI-OLD64
+# RUN: llvm-readobj -A %t.fpabi.old64 | FileCheck %s --check-prefix=FPABI-OLD64
+
+# FPABI-OLD64: FP ABI: Hard float (MIPS32r2 64-bit FPU 12 callee-saved)
+
+# RUN: yaml2obj %s -DFPABI=FP_XX -o %t.fpabi.xx
+# RUN: llvm-readelf -A %t.fpabi.xx | FileCheck %s --check-prefix=FPABI-XX
+# RUN: llvm-readobj -A %t.fpabi.xx | FileCheck %s --check-prefix=FPABI-XX
+
+# FPABI-XX: FP ABI: Hard float (32-bit CPU, Any FPU)
+
+# RUN: yaml2obj %s -DFPABI=FP_64 -o %t.fpabi.fp64
+# RUN: llvm-readelf -A %t.fpabi.fp64 | FileCheck %s --check-prefix=FPABI-FP64
+# RUN: llvm-readobj -A %t.fpabi.fp64 | FileCheck %s --check-prefix=FPABI-FP64
+
+# FPABI-FP64: FP ABI: Hard float (32-bit CPU, 64-bit FPU)
+
+# RUN: yaml2obj %s -DFPABI=FP_64A -o %t.fpabi.fp64a
+# RUN: llvm-readelf -A %t.fpabi.fp64a | FileCheck %s --check-prefix=FPABI-FP64A
+# RUN: llvm-readobj -A %t.fpabi.fp64a | FileCheck %s --check-prefix=FPABI-FP64A
+
+# FPABI-FP64A: FP ABI: Hard float compat (32-bit CPU, 64-bit FPU)
+
+## Check how we dump ASEs.
+# RUN: yaml2obj %s -DASES="" -o %t.ases.no
+# RUN: llvm-readelf -A %t.ases.no | FileCheck %s --check-prefix=FPABI-ASES-NONE-GNU
+# RUN: llvm-readobj -A %t.ases.no | FileCheck %s --check-prefix=FPABI-ASES-NONE-LLVM
+
+# FPABI-ASES-NONE-GNU: ASEs: None
+
+# FPABI-ASES-NONE-LLVM:      ASEs [ (0x0)
+# FPABI-ASES-NONE-LLVM-NEXT: ]
+
+# RUN: yaml2obj %s -DASES="DSP,DSPR2,EVA,MCU,MDMX,MIPS3D,MT,SMARTMIPS,VIRT,MSA,MIPS16,MICROMIPS,XPA,CRC,GINV" -o %t.ases.all
+# RUN: llvm-readelf -A %t.ases.all | FileCheck %s --check-prefix=FPABI-ASES-ALL-GNU
+# RUN: llvm-readobj -A %t.ases.all | FileCheck %s --check-prefix=FPABI-ASES-ALL-LLVM
+
+# FPABI-ASES-ALL-GNU: ASEs: DSP, DSPR2, Enhanced VA Scheme, MCU, MDMX, MIPS-3D, MT, SmartMIPS, VZ, MSA, MIPS16, microMIPS, XPA, CRC, GINV
+
+# FPABI-ASES-ALL-LLVM:      ASEs [ (0x29FFF)
+# FPABI-ASES-ALL-LLVM-NEXT:   CRC (0x8000)
+# FPABI-ASES-ALL-LLVM-NEXT:   DSP (0x1)
+# FPABI-ASES-ALL-LLVM-NEXT:   DSPR2 (0x2)
+# FPABI-ASES-ALL-LLVM-NEXT:   Enhanced VA Scheme (0x4)
+# FPABI-ASES-ALL-LLVM-NEXT:   GINV (0x20000)
+# FPABI-ASES-ALL-LLVM-NEXT:   MCU (0x8)
+# FPABI-ASES-ALL-LLVM-NEXT:   MDMX (0x10)
+# FPABI-ASES-ALL-LLVM-NEXT:   MIPS-3D (0x20)
+# FPABI-ASES-ALL-LLVM-NEXT:   MIPS16 (0x400)
+# FPABI-ASES-ALL-LLVM-NEXT:   MSA (0x200)
+# FPABI-ASES-ALL-LLVM-NEXT:   MT (0x40)
+# FPABI-ASES-ALL-LLVM-NEXT:   SmartMIPS (0x80)
+# FPABI-ASES-ALL-LLVM-NEXT:   VZ (0x100)
+# FPABI-ASES-ALL-LLVM-NEXT:   XPA (0x1000)
+# FPABI-ASES-ALL-LLVM-NEXT:   microMIPS (0x800)
+# FPABI-ASES-ALL-LLVM-NEXT: ]
+
+## Check how we dump the flags1 field.
+
+# RUN: yaml2obj %s -DFLAG1="" -o %t.flag1.empty
+# RUN: llvm-readelf -A %t.flag1.empty | FileCheck %s --check-prefix=FLAG1-EMPTY-GNU
+# RUN: llvm-readobj -A %t.flag1.empty | FileCheck %s --check-prefix=FLAG1-EMPTY-LLVM
+
+# FLAG1-EMPTY-GNU: FLAGS 1: 00000000
+
+# FLAG1-EMPTY-LLVM:      Flags 1 [ (0x0)
+# FLAG1-EMPTY-LLVM-NEXT: ]
+
+# RUN: yaml2obj %s -DFLAG1="ODDSPREG" -o %t.flag1.all
+# RUN: llvm-readelf -A %t.flag1.all | FileCheck %s --check-prefix=FLAG1-ALL-GNU
+# RUN: llvm-readobj -A %t.flag1.all | FileCheck %s --check-prefix=FLAG1-ALL-LLVM
+
+# FLAG1-ALL-GNU: FLAGS 1: 00000001
+
+# FLAG1-ALL-LLVM:      Flags 1 [ (0x1)
+# FLAG1-ALL-LLVM-NEXT:  ODDSPREG (0x1)
+# FLAG1-ALL-LLVM-NEXT: ]
+
+## Check how we dump the flags2 field.
+
+# RUN: yaml2obj %s -DFLAG2=0x0 -o %t.flag2.empty
+# RUN: llvm-readelf -A %t.flag2.empty | FileCheck %s --check-prefix=FLAG2-EMPTY-GNU
+# RUN: llvm-readobj -A %t.flag2.empty | FileCheck %s --check-prefix=FLAG2-EMPTY-LLVM
+
+# FLAG2-EMPTY-GNU:  FLAGS 2: 00000000
+# FLAG2-EMPTY-LLVM: Flags 2: 0x0
+
+# RUN: yaml2obj %s -DFLAG2=0xffffffff -o %t.flag2.all
+# RUN: llvm-readelf -A %t.flag2.all | FileCheck %s --check-prefix=FLAG2-ALL-GNU
+# RUN: llvm-readobj -A %t.flag2.all | FileCheck %s --check-prefix=FLAG2-ALL-LLVM
+
+# FLAG2-ALL-GNU:  FLAGS 2: ffffffff
+# FLAG2-ALL-LLVM: Flags 2: 0xFFFFFFF

From 23c9534515eeaec537044f4babcd0d84f9cc3716 Mon Sep 17 00:00:00 2001
From: Anna Welker <anna.welker@arm.com>
Date: Fri, 17 Jul 2020 11:34:28 +0100
Subject: [PATCH 625/771] [LV] Enable the LoopVectorizer to create pointer
 inductions

This patch enables the LoopVectorizer to build a phi of pointer
type and provide the vector loads and stores with vector type
getelementptrs built from the pointer induction variable, which
produces much less instructions than the previous approach of
creating scalar getelementpointers and glue them together to a
vector.

Differential Revision: https://reviews.llvm.org/D81267
---
 .../Transforms/Vectorize/LoopVectorize.cpp    | 142 ++-
 .../LoopVectorize/ARM/pointer_iv.ll           | 972 ++++++++++++++++++
 .../LoopVectorize/pointer-induction.ll        |  62 ++
 3 files changed, 1124 insertions(+), 52 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/pointer-induction.ll

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5e3c5a69cd905..e9e0ccb8b2ee7 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -4224,26 +4224,68 @@ void InnerLoopVectorizer::widenPHIInstruction(Instruction *PN, unsigned UF,
   case InductionDescriptor::IK_PtrInduction: {
     // Handle the pointer induction variable case.
     assert(P->getType()->isPointerTy() && "Unexpected type.");
-    // This is the normalized GEP that starts counting at zero.
-    Value *PtrInd = Induction;
-    PtrInd = Builder.CreateSExtOrTrunc(PtrInd, II.getStep()->getType());
-    // Determine the number of scalars we need to generate for each unroll
-    // iteration. If the instruction is uniform, we only need to generate the
-    // first lane. Otherwise, we generate all VF values.
-    unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF;
-    // These are the scalar results. Notice that we don't generate vector GEPs
-    // because scalar GEPs result in better code.
-    for (unsigned Part = 0; Part < UF; ++Part) {
-      for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
-        Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF);
-        Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
-        Value *SclrGep =
-            emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);
-        SclrGep->setName("next.gep");
-        VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep);
+
+    if (Cost->isScalarAfterVectorization(P, VF)) {
+      // This is the normalized GEP that starts counting at zero.
+      Value *PtrInd =
+          Builder.CreateSExtOrTrunc(Induction, II.getStep()->getType());
+      // Determine the number of scalars we need to generate for each unroll
+      // iteration. If the instruction is uniform, we only need to generate the
+      // first lane. Otherwise, we generate all VF values.
+      unsigned Lanes = Cost->isUniformAfterVectorization(P, VF) ? 1 : VF;
+      for (unsigned Part = 0; Part < UF; ++Part) {
+        for (unsigned Lane = 0; Lane < Lanes; ++Lane) {
+          Constant *Idx = ConstantInt::get(PtrInd->getType(), Lane + Part * VF);
+          Value *GlobalIdx = Builder.CreateAdd(PtrInd, Idx);
+          Value *SclrGep =
+              emitTransformedIndex(Builder, GlobalIdx, PSE.getSE(), DL, II);
+          SclrGep->setName("next.gep");
+          VectorLoopValueMap.setScalarValue(P, {Part, Lane}, SclrGep);
+        }
       }
+      return;
+    }
+    assert(isa<SCEVConstant>(II.getStep()) &&
+           "Induction step not a SCEV constant!");
+    Type *PhiType = II.getStep()->getType();
+
+    // Build a pointer phi
+    Value *ScalarStartValue = II.getStartValue();
+    Type *ScStValueType = ScalarStartValue->getType();
+    PHINode *NewPointerPhi =
+        PHINode::Create(ScStValueType, 2, "pointer.phi", Induction);
+    NewPointerPhi->addIncoming(ScalarStartValue, LoopVectorPreHeader);
+
+    // A pointer induction, performed by using a gep
+    BasicBlock *LoopLatch = LI->getLoopFor(LoopVectorBody)->getLoopLatch();
+    Instruction *InductionLoc = LoopLatch->getTerminator();
+    const SCEV *ScalarStep = II.getStep();
+    SCEVExpander Exp(*PSE.getSE(), DL, "induction");
+    Value *ScalarStepValue =
+        Exp.expandCodeFor(ScalarStep, PhiType, InductionLoc);
+    Value *InductionGEP = GetElementPtrInst::Create(
+        ScStValueType->getPointerElementType(), NewPointerPhi,
+        Builder.CreateMul(ScalarStepValue, ConstantInt::get(PhiType, VF * UF)),
+        "ptr.ind", InductionLoc);
+    NewPointerPhi->addIncoming(InductionGEP, LoopLatch);
+
+    // Create UF many actual address geps that use the pointer
+    // phi as base and a vectorized version of the step value
+    // (<step*0, ..., step*N>) as offset.
+    for (unsigned Part = 0; Part < UF; ++Part) {
+      SmallVector<Constant *, 8> Indices;
+      // Create a vector of consecutive numbers from zero to VF.
+      for (unsigned i = 0; i < VF; ++i)
+        Indices.push_back(ConstantInt::get(PhiType, i + Part * VF));
+      Constant *StartOffset = ConstantVector::get(Indices);
+
+      Value *GEP = Builder.CreateGEP(
+          ScStValueType->getPointerElementType(), NewPointerPhi,
+          Builder.CreateMul(StartOffset,
+                            Builder.CreateVectorSplat(VF, ScalarStepValue),
+                            "vector.gep"));
+      VectorLoopValueMap.setVectorValue(P, Part, GEP);
     }
-    return;
   }
   }
 }
@@ -4479,6 +4521,7 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
   // accesses that will remain scalar.
   SmallSetVector<Instruction *, 8> ScalarPtrs;
   SmallPtrSet<Instruction *, 8> PossibleNonScalarPtrs;
+  auto *Latch = TheLoop->getLoopLatch();
 
   // A helper that returns true if the use of Ptr by MemAccess will be scalar.
   // The pointer operands of loads and stores will be scalar as long as the
@@ -4504,11 +4547,33 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
            !TheLoop->isLoopInvariant(V);
   };
 
-  // A helper that evaluates a memory access's use of a pointer. If the use
-  // will be a scalar use, and the pointer is only used by memory accesses, we
-  // place the pointer in ScalarPtrs. Otherwise, the pointer is placed in
-  // PossibleNonScalarPtrs.
+  auto isScalarPtrInduction = [&](Instruction *MemAccess, Value *Ptr) {
+    if (!isa<PHINode>(Ptr) ||
+        !Legal->getInductionVars().count(cast<PHINode>(Ptr)))
+      return false;
+    auto &Induction = Legal->getInductionVars()[cast<PHINode>(Ptr)];
+    if (Induction.getKind() != InductionDescriptor::IK_PtrInduction)
+      return false;
+    return isScalarUse(MemAccess, Ptr);
+  };
+
+  // A helper that evaluates a memory access's use of a pointer. If the
+  // pointer is actually the pointer induction of a loop, it is being
+  // inserted into Worklist. If the use will be a scalar use, and the
+  // pointer is only used by memory accesses, we place the pointer in
+  // ScalarPtrs. Otherwise, the pointer is placed in PossibleNonScalarPtrs.
   auto evaluatePtrUse = [&](Instruction *MemAccess, Value *Ptr) {
+    if (isScalarPtrInduction(MemAccess, Ptr)) {
+      Worklist.insert(cast<Instruction>(Ptr));
+      Instruction *Update = cast<Instruction>(
+          cast<PHINode>(Ptr)->getIncomingValueForBlock(Latch));
+      Worklist.insert(Update);
+      LLVM_DEBUG(dbgs() << "LV: Found new scalar instruction: " << *Ptr
+                        << "\n");
+      LLVM_DEBUG(dbgs() << "LV: Found new scalar instruction: " << *Update
+                        << "\n");
+      return;
+    }
     // We only care about bitcast and getelementptr instructions contained in
     // the loop.
     if (!isLoopVaryingBitCastOrGEP(Ptr))
@@ -4532,10 +4597,9 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
   };
 
   // We seed the scalars analysis with three classes of instructions: (1)
-  // instructions marked uniform-after-vectorization, (2) bitcast and
-  // getelementptr instructions used by memory accesses requiring a scalar use,
-  // and (3) pointer induction variables and their update instructions (we
-  // currently only scalarize these).
+  // instructions marked uniform-after-vectorization and (2) bitcast,
+  // getelementptr and (pointer) phi instructions used by memory accesses
+  // requiring a scalar use.
   //
   // (1) Add to the worklist all instructions that have been identified as
   // uniform-after-vectorization.
@@ -4561,24 +4625,6 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
       Worklist.insert(I);
     }
 
-  // (3) Add to the worklist all pointer induction variables and their update
-  // instructions.
-  //
-  // TODO: Once we are able to vectorize pointer induction variables we should
-  //       no longer insert them into the worklist here.
-  auto *Latch = TheLoop->getLoopLatch();
-  for (auto &Induction : Legal->getInductionVars()) {
-    auto *Ind = Induction.first;
-    auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
-    if (Induction.second.getKind() != InductionDescriptor::IK_PtrInduction)
-      continue;
-    Worklist.insert(Ind);
-    Worklist.insert(IndUpdate);
-    LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *Ind << "\n");
-    LLVM_DEBUG(dbgs() << "LV: Found scalar instruction: " << *IndUpdate
-                      << "\n");
-  }
-
   // Insert the forced scalars.
   // FIXME: Currently widenPHIInstruction() often creates a dead vector
   // induction variable when the PHI user is scalarized.
@@ -4614,14 +4660,6 @@ void LoopVectorizationCostModel::collectLoopScalars(unsigned VF) {
     auto *Ind = Induction.first;
     auto *IndUpdate = cast<Instruction>(Ind->getIncomingValueForBlock(Latch));
 
-    // We already considered pointer induction variables, so there's no reason
-    // to look at their users again.
-    //
-    // TODO: Once we are able to vectorize pointer induction variables we
-    //       should no longer skip over them here.
-    if (Induction.second.getKind() == InductionDescriptor::IK_PtrInduction)
-      continue;
-
     // If tail-folding is applied, the primary induction variable will be used
     // to feed a vector compare.
     if (Ind == Legal->getPrimaryInduction() && foldTailByMasking())
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
new file mode 100644
index 0000000000000..dad92a82cf41e
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/ARM/pointer_iv.ll
@@ -0,0 +1,972 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-vectorize -S -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -dce -instcombine --simplifycfg -enable-arm-maskedgatscat < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8.1m.main-none-none-eabi"
+
+define hidden void @pointer_phi_v4i32_add1(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %s, i32%y) {
+; CHECK-LABEL: @pointer_phi_v4i32_add1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[NEXT_GEP4]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+for.body:
+  %A.addr.09 = phi i32* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.07 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load i32, i32* %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds i32, i32* %A.addr.09, i32 1
+  %add = add nsw i32 %0, %y
+  store i32 %add, i32* %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.07, i32 1
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v4i32_add2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) {
+; CHECK-LABEL: @pointer_phi_v4i32_add2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 1992
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 996
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[A]], i32 [[TMP0]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[NEXT_GEP]] to <8 x i32>*
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[NEXT_GEP4]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
+; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !2
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_09]], i32 2
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP5]], [[Y]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop !3
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+for.body:
+  %A.addr.09 = phi i32* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.07 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load i32, i32* %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds i32, i32* %A.addr.09, i32 2
+  %add = add nsw i32 %0, %y
+  store i32 %add, i32* %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.07, i32 1
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v4i32_add3(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %y) {
+; CHECK-LABEL: @pointer_phi_v4i32_add3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 2988
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 996
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    [[TMP1:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP1]], <4 x i32>* [[TMP2]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 12
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !5
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_09]], i32 3
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], [[Y]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop !6
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+for.body:
+  %A.addr.09 = phi i32* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.07 = phi i32* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load i32, i32* %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds i32, i32* %A.addr.09, i32 3
+  %add = add nsw i32 %0, %y
+  store i32 %add, i32* %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.07, i32 1
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v8i16_add1(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) {
+; CHECK-LABEL: @pointer_phi_v8i16_add1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[NEXT_GEP]] to <8 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP1]], align 2
+; CHECK-NEXT:    [[TMP2:%.*]] = add <8 x i16> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>*
+; CHECK-NEXT:    store <8 x i16> [[TMP2]], <8 x i16>* [[TMP3]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[TMP4]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop !7
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = trunc i32 %y to i16
+  br label %for.body
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %A.addr.011 = phi i16* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.09 = phi i16* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %l1 = load i16, i16* %A.addr.011, align 2
+  %add.ptr = getelementptr inbounds i16, i16* %A.addr.011, i32 1
+  %conv1 = add i16 %l1, %0
+  store i16 %conv1, i16* %B.addr.09, align 2
+  %incdec.ptr = getelementptr inbounds i16, i16* %B.addr.09, i32 1
+  %inc = add nuw nsw i32 %i.010, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v8i16_add2(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) {
+; CHECK-LABEL: @pointer_phi_v8i16_add2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i16, i16* [[A:%.*]], i32 1984
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i16, i16* [[B:%.*]], i32 992
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> undef, i16 [[TMP0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i16, i16* [[A]], i32 [[TMP1]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i16, i16* [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[NEXT_GEP]] to <16 x i16>*
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i16>, <16 x i16>* [[TMP2]], align 2
+; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x i16> [[WIDE_VEC]], <16 x i16> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <8 x i16> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i16* [[NEXT_GEP4]] to <8 x i16>*
+; CHECK-NEXT:    store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP5]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !8
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_011:%.*]] = phi i16* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_09:%.*]] = phi i16* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[L1:%.*]] = load i16, i16* [[A_ADDR_011]], align 2
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, i16* [[A_ADDR_011]], i32 2
+; CHECK-NEXT:    [[CONV1:%.*]] = add i16 [[L1]], [[TMP0]]
+; CHECK-NEXT:    store i16 [[CONV1]], i16* [[B_ADDR_09]], align 2
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[B_ADDR_09]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop !9
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = trunc i32 %y to i16
+  br label %for.body
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %A.addr.011 = phi i16* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.09 = phi i16* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %l1 = load i16, i16* %A.addr.011, align 2
+  %add.ptr = getelementptr inbounds i16, i16* %A.addr.011, i32 2
+  %conv1 = add i16 %l1, %0
+  store i16 %conv1, i16* %B.addr.09, align 2
+  %incdec.ptr = getelementptr inbounds i16, i16* %B.addr.09, i32 1
+  %inc = add nuw nsw i32 %i.010, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v8i16_add3(i16* noalias nocapture readonly %A, i16* noalias nocapture %B, i32 %y) {
+; CHECK-LABEL: @pointer_phi_v8i16_add3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i16
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_011:%.*]] = phi i16* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_09:%.*]] = phi i16* [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[L1:%.*]] = load i16, i16* [[A_ADDR_011]], align 2
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i16, i16* [[A_ADDR_011]], i32 3
+; CHECK-NEXT:    [[CONV1:%.*]] = add i16 [[L1]], [[TMP0]]
+; CHECK-NEXT:    store i16 [[CONV1]], i16* [[B_ADDR_09]], align 2
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i16, i16* [[B_ADDR_09]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = trunc i32 %y to i16
+  br label %for.body
+for.body:                                         ; preds = %for.body, %for.body.lr.ph
+  %A.addr.011 = phi i16* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %i.010 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.09 = phi i16* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %l1 = load i16, i16* %A.addr.011, align 2
+  %add.ptr = getelementptr inbounds i16, i16* %A.addr.011, i32 3
+  %conv1 = add i16 %l1, %0
+  store i16 %conv1, i16* %B.addr.09, align 2
+  %incdec.ptr = getelementptr inbounds i16, i16* %B.addr.09, i32 1
+  %inc = add nuw nsw i32 %i.010, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v16i8_add1(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i32 %y) {
+; CHECK-LABEL: @pointer_phi_v16i8_add1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* [[A:%.*]], i32 992
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 992
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[A]], i32 [[INDEX]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8* [[NEXT_GEP]] to <16 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = add <16 x i8> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[NEXT_GEP4]] to <16 x i8>*
+; CHECK-NEXT:    store <16 x i8> [[TMP2]], <16 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !10
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi i8* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[A_ADDR_010]], align 1
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_010]], i32 1
+; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP5]], [[TMP0]]
+; CHECK-NEXT:    store i8 [[CONV1]], i8* [[B_ADDR_08]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[B_ADDR_08]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_09]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop !11
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = trunc i32 %y to i8
+  br label %for.body
+
+for.body:
+  %A.addr.010 = phi i8* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.08 = phi i8* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %1 = load i8, i8* %A.addr.010, align 1
+  %add.ptr = getelementptr inbounds i8, i8* %A.addr.010, i32 1
+  %conv1 = add i8 %1, %0
+  store i8 %conv1, i8* %B.addr.08, align 1
+  %incdec.ptr = getelementptr inbounds i8, i8* %B.addr.08, i32 1
+  %inc = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v16i8_add2(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i32 %y) {
+; CHECK-LABEL: @pointer_phi_v16i8_add2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* [[A:%.*]], i32 1984
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i8, i8* [[B:%.*]], i32 992
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i8> undef, i8 [[TMP0]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i8> [[BROADCAST_SPLATINSERT]], <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i8, i8* [[A]], i32 [[TMP1]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr i8, i8* [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[NEXT_GEP]] to <32 x i8>*
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP2]], align 1
+; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> undef, <16 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14, i32 16, i32 18, i32 20, i32 22, i32 24, i32 26, i32 28, i32 30>
+; CHECK-NEXT:    [[TMP3:%.*]] = add <16 x i8> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8* [[NEXT_GEP4]] to <16 x i8>*
+; CHECK-NEXT:    store <16 x i8> [[TMP3]], <16 x i8>* [[TMP4]], align 1
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP5]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !12
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi i8* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi i8* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP6:%.*]] = load i8, i8* [[A_ADDR_010]], align 1
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_010]], i32 2
+; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP6]], [[TMP0]]
+; CHECK-NEXT:    store i8 [[CONV1]], i8* [[B_ADDR_08]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[B_ADDR_08]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_09]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop !13
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = trunc i32 %y to i8
+  br label %for.body
+
+for.body:
+  %A.addr.010 = phi i8* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.08 = phi i8* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %1 = load i8, i8* %A.addr.010, align 1
+  %add.ptr = getelementptr inbounds i8, i8* %A.addr.010, i32 2
+  %conv1 = add i8 %1, %0
+  store i8 %conv1, i8* %B.addr.08, align 1
+  %incdec.ptr = getelementptr inbounds i8, i8* %B.addr.08, i32 1
+  %inc = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v16i8_add3(i8* noalias nocapture readonly %A, i8* noalias nocapture %B, i32 %y) {
+; CHECK-LABEL: @pointer_phi_v16i8_add3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = trunc i32 [[Y:%.*]] to i8
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_010:%.*]] = phi i8* [ [[A:%.*]], [[ENTRY:%.*]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_08:%.*]] = phi i8* [ [[B:%.*]], [[ENTRY]] ], [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[A_ADDR_010]], align 1
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i8, i8* [[A_ADDR_010]], i32 3
+; CHECK-NEXT:    [[CONV1:%.*]] = add i8 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    store i8 [[CONV1]], i8* [[B_ADDR_08]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i8, i8* [[B_ADDR_08]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_09]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %0 = trunc i32 %y to i8
+  br label %for.body
+
+for.body:
+  %A.addr.010 = phi i8* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %i.09 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.08 = phi i8* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %1 = load i8, i8* %A.addr.010, align 1
+  %add.ptr = getelementptr inbounds i8, i8* %A.addr.010, i32 3
+  %conv1 = add i8 %1, %0
+  store i8 %conv1, i8* %B.addr.08, align 1
+  %incdec.ptr = getelementptr inbounds i8, i8* %B.addr.08, i32 1
+  %inc = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v4f32_add1(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) {
+; CHECK-LABEL: @pointer_phi_v4f32_add1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr float, float* [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr float, float* [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[NEXT_GEP4]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop !14
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+for.body:
+  %A.addr.09 = phi float* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.07 = phi float* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load float, float* %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds float, float* %A.addr.09, i32 1
+  %add = fadd fast float %0, %y
+  store float %add, float* %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds float, float* %B.addr.07, i32 1
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v4f32_add2(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) {
+; CHECK-LABEL: @pointer_phi_v4f32_add2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr float, float* [[A:%.*]], i32 1992
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr float, float* [[B:%.*]], i32 996
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr float, float* [[A]], i32 [[TMP0]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr float, float* [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[NEXT_GEP]] to <8 x float>*
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <8 x float>, <8 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x float> [[WIDE_VEC]], <8 x float> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <4 x float> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[NEXT_GEP4]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP2]], <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
+; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !15
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = load float, float* [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, float* [[A_ADDR_09]], i32 2
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP5]], [[Y]]
+; CHECK-NEXT:    store float [[ADD]], float* [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop !16
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+for.body:
+  %A.addr.09 = phi float* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.07 = phi float* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load float, float* %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds float, float* %A.addr.09, i32 2
+  %add = fadd fast float %0, %y
+  store float %add, float* %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds float, float* %B.addr.07, i32 1
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v4f32_add3(float* noalias nocapture readonly %A, float* noalias nocapture %B, float %y) {
+; CHECK-LABEL: @pointer_phi_v4f32_add3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr float, float* [[A:%.*]], i32 2988
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr float, float* [[B:%.*]], i32 996
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> undef, float [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi float* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr float, float* [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr float, float* [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x float> undef)
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <4 x float> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast float* [[NEXT_GEP]] to <4 x float>*
+; CHECK-NEXT:    store <4 x float> [[TMP1]], <4 x float>* [[TMP2]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 996
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr float, float* [[POINTER_PHI]], i32 12
+; CHECK-NEXT:    br i1 [[TMP3]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !17
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi float* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 996, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi float* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP4:%.*]] = load float, float* [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds float, float* [[A_ADDR_09]], i32 3
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[TMP4]], [[Y]]
+; CHECK-NEXT:    store float [[ADD]], float* [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds float, float* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop !18
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+for.body:
+  %A.addr.09 = phi float* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.07 = phi float* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load float, float* %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds float, float* %A.addr.09, i32 3
+  %add = fadd fast float %0, %y
+  store float %add, float* %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds float, float* %B.addr.07, i32 1
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v4half_add1(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) {
+; CHECK-LABEL: @pointer_phi_v4half_add1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr half, half* [[A:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B:%.*]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast half* [[NEXT_GEP]] to <8 x half>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x half>, <8 x half>* [[TMP0]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast <8 x half> [[WIDE_LOAD]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>*
+; CHECK-NEXT:    store <8 x half> [[TMP1]], <8 x half>* [[TMP2]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    br i1 [[TMP3]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop !19
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+for.body:
+  %A.addr.09 = phi half* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.07 = phi half* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load half, half* %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds half, half* %A.addr.09, i32 1
+  %add = fadd fast half %0, %y
+  store half %add, half* %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds half, half* %B.addr.07, i32 1
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v4half_add2(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) {
+; CHECK-LABEL: @pointer_phi_v4half_add2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr half, half* [[A:%.*]], i32 1984
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr half, half* [[B:%.*]], i32 992
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x half> undef, half [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x half> [[BROADCAST_SPLATINSERT]], <8 x half> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = shl i32 [[INDEX]], 1
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr half, half* [[A]], i32 [[TMP0]]
+; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr half, half* [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast half* [[NEXT_GEP]] to <16 x half>*
+; CHECK-NEXT:    [[WIDE_VEC:%.*]] = load <16 x half>, <16 x half>* [[TMP1]], align 4
+; CHECK-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <16 x half> [[WIDE_VEC]], <16 x half> undef, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
+; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast <8 x half> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast half* [[NEXT_GEP4]] to <8 x half>*
+; CHECK-NEXT:    store <8 x half> [[TMP2]], <8 x half>* [[TMP3]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 992
+; CHECK-NEXT:    br i1 [[TMP4]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !20
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi half* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 992, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi half* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP5:%.*]] = load half, half* [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds half, half* [[A_ADDR_09]], i32 2
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast half [[TMP5]], [[Y]]
+; CHECK-NEXT:    store half [[ADD]], half* [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds half, half* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]], !llvm.loop !21
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+for.body:
+  %A.addr.09 = phi half* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.07 = phi half* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load half, half* %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds half, half* %A.addr.09, i32 2
+  %add = fadd fast half %0, %y
+  store half %add, half* %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds half, half* %B.addr.07, i32 1
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+define hidden void @pointer_phi_v4half_add3(half* noalias nocapture readonly %A, half* noalias nocapture %B, half %y) {
+; CHECK-LABEL: @pointer_phi_v4half_add3(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_09:%.*]] = phi half* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[A:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[B_ADDR_07:%.*]] = phi half* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[B:%.*]], [[ENTRY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = load half, half* [[A_ADDR_09]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds half, half* [[A_ADDR_09]], i32 3
+; CHECK-NEXT:    [[ADD:%.*]] = fadd fast half [[TMP0]], [[Y:%.*]]
+; CHECK-NEXT:    store half [[ADD]], half* [[B_ADDR_07]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds half, half* [[B_ADDR_07]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[FOR_BODY]]
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+for.body:
+  %A.addr.09 = phi half* [ %add.ptr, %for.body ], [ %A, %entry ]
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %B.addr.07 = phi half* [ %incdec.ptr, %for.body ], [ %B, %entry ]
+  %0 = load half, half* %A.addr.09, align 4
+  %add.ptr = getelementptr inbounds half, half* %A.addr.09, i32 3
+  %add = fadd fast half %0, %y
+  store half %add, half* %B.addr.07, align 4
+  %incdec.ptr = getelementptr inbounds half, half* %B.addr.07, i32 1
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+end:
+  ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.interleave.count", i32 2}
+
+define hidden void @pointer_phi_v4i32_uf2(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %n, i32 %y) {
+; CHECK-LABEL: @pointer_phi_v4i32_uf2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 59952
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 9992
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT7:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT6]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 0, i32 6, i32 12, i32 18>
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 24, i32 30, i32 36, i32 42>
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER5:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    [[TMP2:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP3:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER5]], [[BROADCAST_SPLAT7]]
+; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP2]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 4
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP3]], <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9992
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 48
+; CHECK-NEXT:    br i1 [[TMP7]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !22
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_08:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 9992, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_06:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[A_ADDR_08]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_08]], i32 6
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP8]], [[Y]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B_ADDR_06]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_06]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_07]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop !23
+;
+
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %A.addr.08 = phi i32* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.06 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %0 = load i32, i32* %A.addr.08, align 4
+  %add.ptr = getelementptr inbounds i32, i32* %A.addr.08, i32 6
+  %add = add nsw i32 %0, %y
+  store i32 %add, i32* %B.addr.06, align 4
+  %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.06, i32 1
+  %inc = add nuw nsw i32 %i.07, 1
+  %exitcond = icmp eq i32 %inc, 10000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0
+}
+
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.interleave.count", i32 4}
+
+define hidden void @pointer_phi_v4i32_uf4(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i32 %n, i32 %y) {
+; CHECK-LABEL: @pointer_phi_v4i32_uf4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i32, i32* [[A:%.*]], i32 59904
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i32, i32* [[B:%.*]], i32 9984
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> undef, i32 [[Y:%.*]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT11:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT10]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT12:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT13:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT12]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x i32> undef, i32 [[Y]], i32 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT14]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i32* [ [[A]], [[ENTRY:%.*]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 0, i32 6, i32 12, i32 18>
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 24, i32 30, i32 36, i32 42>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 48, i32 54, i32 60, i32 66>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[POINTER_PHI]], <4 x i32> <i32 72, i32 78, i32 84, i32 90>
+; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr i32, i32* [[B]], i32 [[INDEX]]
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP0]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP1]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP2]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER9:%.*]] = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> [[TMP3]], i32 4, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> undef)
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER7]], [[BROADCAST_SPLAT11]]
+; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER8]], [[BROADCAST_SPLAT13]]
+; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_MASKED_GATHER9]], [[BROADCAST_SPLAT15]]
+; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[NEXT_GEP]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP4]], <4 x i32>* [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 4
+; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP5]], <4 x i32>* [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 8
+; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr i32, i32* [[NEXT_GEP]], i32 12
+; CHECK-NEXT:    [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
+; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP14]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 9984
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i32, i32* [[POINTER_PHI]], i32 96
+; CHECK-NEXT:    br i1 [[TMP15]], label [[FOR_BODY:%.*]], label [[VECTOR_BODY]], !llvm.loop !24
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    ret void
+; CHECK:       for.body:
+; CHECK-NEXT:    [[A_ADDR_08:%.*]] = phi i32* [ [[ADD_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[I_07:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 9984, [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[B_ADDR_06:%.*]] = phi i32* [ [[INCDEC_PTR:%.*]], [[FOR_BODY]] ], [ [[IND_END3]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[A_ADDR_08]], align 4
+; CHECK-NEXT:    [[ADD_PTR]] = getelementptr inbounds i32, i32* [[A_ADDR_08]], i32 6
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP16]], [[Y]]
+; CHECK-NEXT:    store i32 [[ADD]], i32* [[B_ADDR_06]], align 4
+; CHECK-NEXT:    [[INCDEC_PTR]] = getelementptr inbounds i32, i32* [[B_ADDR_06]], i32 1
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_07]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 10000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]], !llvm.loop !25
+;
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %A.addr.08 = phi i32* [ %A, %entry ], [ %add.ptr, %for.body ]
+  %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+  %B.addr.06 = phi i32* [ %B, %entry ], [ %incdec.ptr, %for.body ]
+  %0 = load i32, i32* %A.addr.08, align 4
+  %add.ptr = getelementptr inbounds i32, i32* %A.addr.08, i32 6
+  %add = add nsw i32 %0, %y
+  store i32 %add, i32* %B.addr.06, align 4
+  %incdec.ptr = getelementptr inbounds i32, i32* %B.addr.06, i32 1
+  %inc = add nuw nsw i32 %i.07, 1
+  %exitcond = icmp eq i32 %inc, 10000
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !2
+}
+
+define hidden void @mult_ptr_iv(i8* noalias nocapture readonly %x, i8* noalias nocapture %z) {
+; CHECK-LABEL: @mult_ptr_iv(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i8, i8* [[Z:%.*]], i32 3000
+; CHECK-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i8, i8* [[X:%.*]], i32 3000
+; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i8* [[SCEVGEP1]], [[Z]]
+; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i8* [[SCEVGEP]], [[X]]
+; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
+; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* [[X]], i32 3000
+; CHECK-NEXT:    [[IND_END3:%.*]] = getelementptr i8, i8* [[Z]], i32 3000
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i8* [ [[X]], [[VECTOR_PH]] ], [ [[PTR_IND:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[POINTER_PHI5:%.*]] = phi i8* [ [[Z]], [[VECTOR_PH]] ], [ [[PTR_IND6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i8, i8* [[POINTER_PHI5]], <4 x i32> <i32 0, i32 3, i32 6, i32 9>
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 1
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP0]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef), !alias.scope !26
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP0]], i32 2
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER7:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP2]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef), !alias.scope !26
+; CHECK-NEXT:    [[WIDE_MASKED_GATHER8:%.*]] = call <4 x i8> @llvm.masked.gather.v4i8.v4p0i8(<4 x i8*> [[TMP3]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i8> undef), !alias.scope !26
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], <i8 10, i8 10, i8 10, i8 10>
+; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER7]]
+; CHECK-NEXT:    [[TMP6:%.*]] = mul <4 x i8> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER8]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 1
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP4]], <4 x i8*> [[TMP1]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !29, !noalias !26
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP1]], i32 2
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP5]], <4 x i8*> [[TMP7]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !29, !noalias !26
+; CHECK-NEXT:    call void @llvm.masked.scatter.v4i8.v4p0i8(<4 x i8> [[TMP6]], <4 x i8*> [[TMP8]], i32 1, <4 x i1> <i1 true, i1 true, i1 true, i1 true>), !alias.scope !29, !noalias !26
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i32 12
+; CHECK-NEXT:    [[PTR_IND6]] = getelementptr i8, i8* [[POINTER_PHI5]], i32 12
+; CHECK-NEXT:    br i1 [[TMP9]], label [[END:%.*]], label [[VECTOR_BODY]], !llvm.loop !31
+; CHECK:       for.body:
+; CHECK-NEXT:    [[X_ADDR_050:%.*]] = phi i8* [ [[INCDEC_PTR2:%.*]], [[FOR_BODY]] ], [ [[X]], [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[Z_ADDR_049:%.*]] = phi i8* [ [[INCDEC_PTR34:%.*]], [[FOR_BODY]] ], [ [[Z]], [[ENTRY]] ]
+; CHECK-NEXT:    [[I_048:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[INCDEC_PTR:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 1
+; CHECK-NEXT:    [[TMP10:%.*]] = load i8, i8* [[X_ADDR_050]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR1:%.*]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 2
+; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[INCDEC_PTR]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR2]] = getelementptr inbounds i8, i8* [[X_ADDR_050]], i32 3
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[INCDEC_PTR1]], align 1
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[TMP10]], 10
+; CHECK-NEXT:    [[MUL1:%.*]] = mul i8 [[TMP10]], [[TMP11]]
+; CHECK-NEXT:    [[MUL2:%.*]] = mul i8 [[TMP10]], [[TMP12]]
+; CHECK-NEXT:    [[INCDEC_PTR32:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 1
+; CHECK-NEXT:    store i8 [[MUL]], i8* [[Z_ADDR_049]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR33:%.*]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 2
+; CHECK-NEXT:    store i8 [[MUL1]], i8* [[INCDEC_PTR32]], align 1
+; CHECK-NEXT:    [[INCDEC_PTR34]] = getelementptr inbounds i8, i8* [[Z_ADDR_049]], i32 3
+; CHECK-NEXT:    store i8 [[MUL2]], i8* [[INCDEC_PTR33]], align 1
+; CHECK-NEXT:    [[INC]] = add nuw i32 [[I_048]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END]], label [[FOR_BODY]], !llvm.loop !32
+; CHECK:       end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %x.addr.050 = phi i8* [ %incdec.ptr2, %for.body ], [ %x, %entry ]
+  %z.addr.049 = phi i8* [ %incdec.ptr34, %for.body ], [ %z, %entry ]
+  %i.048 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %incdec.ptr = getelementptr inbounds i8, i8* %x.addr.050, i32 1
+  %0 = load i8, i8* %x.addr.050, align 1
+  %incdec.ptr1 = getelementptr inbounds i8, i8* %x.addr.050, i32 2
+  %1 = load i8, i8* %incdec.ptr, align 1
+  %incdec.ptr2 = getelementptr inbounds i8, i8* %x.addr.050, i32 3
+  %2 = load i8, i8* %incdec.ptr1, align 1
+  %conv = zext i8 %0 to i32
+  %mul = mul nuw nsw i32 %conv, 10
+  %conv1 = zext i8 %1 to i32
+  %conv2 = zext i8 %2 to i32
+  %mul1 = mul nuw nsw i32 %conv, %conv1
+  %mul2 = mul nuw nsw i32 %conv, %conv2
+  %conv3 = trunc i32 %mul to i8
+  %conv4 = trunc i32 %mul1 to i8
+  %conv5 = trunc i32 %mul2 to i8
+  %incdec.ptr32 = getelementptr inbounds i8, i8* %z.addr.049, i32 1
+  store i8 %conv3, i8* %z.addr.049, align 1
+  %incdec.ptr33 = getelementptr inbounds i8, i8* %z.addr.049, i32 2
+  store i8 %conv4, i8* %incdec.ptr32, align 1
+  %incdec.ptr34 = getelementptr inbounds i8, i8* %z.addr.049, i32 3
+  store i8 %conv5, i8* %incdec.ptr33, align 1
+  %inc = add nuw i32 %i.048, 1
+  %exitcond = icmp eq i32 %inc, 1000
+  br i1 %exitcond, label %end, label %for.body
+
+end:
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopVectorize/pointer-induction.ll b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
new file mode 100644
index 0000000000000..c09f35256967f
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/pointer-induction.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
+
+; Function Attrs: nofree norecurse nounwind
+define void @a(i8* readnone %b) {
+; CHECK-LABEL: @a(
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP0:%.*]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
+; CHECK-NEXT:    [[TMP1:%.*]] = mul i64 [[N_VEC]], -1
+; CHECK-NEXT:    [[IND_END:%.*]] = getelementptr i8, i8* null, i64 [[TMP1]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[POINTER_PHI:%.*]] = phi i8* [ null, %vector.ph ], [ [[PTR_IND:%.*]], %pred.store.continue7 ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %pred.store.continue7 ]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i8, i8* [[POINTER_PHI]], <4 x i64> <i64 0, i64 -1, i64 -2, i64 -3>
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, <4 x i8*> [[TMP2]], i64 -1
+; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i8*> [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i8, i8* [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i8, i8* [[TMP5]], i32 -3
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1
+; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i8> [[WIDE_LOAD]], <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq <4 x i8> [[REVERSE]], zeroinitializer
+; CHECK-NEXT:    [[TMP9:%.*]] = xor <4 x i1> [[TMP8]], <i1 true, i1 true, i1 true, i1 true>
+; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP9]], i32 0
+; CHECK:       pred.store.continue7:
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    [[PTR_IND]] = getelementptr i8, i8* [[POINTER_PHI]], i64 -4
+
+entry:
+  %cmp.not4 = icmp eq i8* %b, null
+  br i1 %cmp.not4, label %for.cond.cleanup, label %for.body.preheader
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %if.end
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %if.end
+  %c.05 = phi i8* [ %incdec.ptr, %if.end ], [ null, %for.body.preheader ]
+  %incdec.ptr = getelementptr inbounds i8, i8* %c.05, i64 -1
+  %0 = load i8, i8* %incdec.ptr, align 1
+  %tobool.not = icmp eq i8 %0, 0
+  br i1 %tobool.not, label %if.end, label %if.then
+
+if.then:                                          ; preds = %for.body
+  store i8 95, i8* %incdec.ptr, align 1
+  br label %if.end
+
+if.end:                                           ; preds = %for.body, %if.then
+  %cmp.not = icmp eq i8* %incdec.ptr, %b
+  br i1 %cmp.not, label %for.cond.cleanup.loopexit, label %for.body
+}
+

From fd02a86260b3fb01361175af9600d53354631fb2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Endre=20F=C3=BCl=C3=B6p?= <endre.fulop@sigmatechnology.se>
Date: Mon, 6 Jul 2020 16:25:57 +0200
Subject: [PATCH 626/771] [analyzer] Add system header simulator a symmetric
 random access iterator operator+

Summary:
Random access iterators must handle operator+, where the iterator is on the
RHS. The system header simulator library is extended with these operators.

Reviewers: Szelethus

Subscribers: whisperity, xazax.hun, baloghadamsoftware, szepet, a.sidorin, mikhail.ramalho, Szelethus, donat.nagy, dkrupp, Charusso, steakhal, martong, ASDenysPetrov, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83226
---
 .../test/Analysis/Inputs/system-header-simulator-cxx.h | 10 ++++++++++
 .../test/Analysis/diagnostics/explicit-suppression.cpp |  2 +-
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/clang/test/Analysis/Inputs/system-header-simulator-cxx.h b/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
index fe4b9d081e9c8..1dee3294d7323 100644
--- a/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
+++ b/clang/test/Analysis/Inputs/system-header-simulator-cxx.h
@@ -60,6 +60,11 @@ template <typename T, typename Ptr, typename Ref> struct __vector_iterator {
   __vector_iterator<T, Ptr, Ref> operator+(difference_type n) {
     return ptr + n;
   }
+  friend __vector_iterator<T, Ptr, Ref> operator+(
+      difference_type n,
+      const __vector_iterator<T, Ptr, Ref> &iter) {
+    return n + iter.ptr;
+  }
   __vector_iterator<T, Ptr, Ref> operator-(difference_type n) {
     return ptr - n;
   }
@@ -118,6 +123,11 @@ template <typename T, typename Ptr, typename Ref> struct __deque_iterator {
   __deque_iterator<T, Ptr, Ref> operator+(difference_type n) {
     return ptr + n;
   }
+  friend __deque_iterator<T, Ptr, Ref> operator+(
+      difference_type n,
+      const __deque_iterator<T, Ptr, Ref> &iter) {
+    return n + iter.ptr;
+  }
   __deque_iterator<T, Ptr, Ref> operator-(difference_type n) {
     return ptr - n;
   }
diff --git a/clang/test/Analysis/diagnostics/explicit-suppression.cpp b/clang/test/Analysis/diagnostics/explicit-suppression.cpp
index 2b586add19eee..0ef01771e58bc 100644
--- a/clang/test/Analysis/diagnostics/explicit-suppression.cpp
+++ b/clang/test/Analysis/diagnostics/explicit-suppression.cpp
@@ -19,6 +19,6 @@ class C {
 void testCopyNull(C *I, C *E) {
   std::copy(I, E, (C *)0);
 #ifndef SUPPRESSED
-  // expected-warning@../Inputs/system-header-simulator-cxx.h:699 {{Called C++ object pointer is null}}
+  // expected-warning@../Inputs/system-header-simulator-cxx.h:709 {{Called C++ object pointer is null}}
 #endif
 }

From 16a4350f76d2bead7af32617dd557d2ec096d2c5 Mon Sep 17 00:00:00 2001
From: Anatoly Trosinenko <atrosinenko@accesssoftek.com>
Date: Fri, 17 Jul 2020 13:35:57 +0300
Subject: [PATCH 627/771] [MSP430] Actualize the toolchain description

Reviewed By: krisb

Differential Revision: https://reviews.llvm.org/D81676
---
 clang/include/clang/Driver/Options.td         |   5 +-
 clang/lib/Basic/Targets/MSP430.cpp            |   1 +
 clang/lib/Driver/ToolChains/Gnu.cpp           |  10 +-
 clang/lib/Driver/ToolChains/MSP430.cpp        | 153 +++++++--
 clang/lib/Driver/ToolChains/MSP430.h          |  13 +
 .../{7.3.1 => 8.3.1}/430/crtbegin.o           |   0
 .../crtend.o => 8.3.1/430/crtbegin_no_eh.o}   |   0
 .../gcc/msp430-elf/8.3.1/430/crtend.o}        |   0
 .../gcc/msp430-elf/8.3.1/430/crtend_no_eh.o   |   0
 .../8.3.1/430/exceptions/crtbegin.o           |   0
 .../8.3.1/430/exceptions/crtbegin_no_eh.o     |   0
 .../msp430-elf/8.3.1/430/exceptions/crtend.o  |   0
 .../8.3.1/430/exceptions/crtend_no_eh.o       |   0
 .../lib/gcc/msp430-elf/8.3.1/crtbegin.o       |   0
 .../lib/gcc/msp430-elf/8.3.1/crtbegin_no_eh.o |   0
 .../lib/gcc/msp430-elf/8.3.1/crtend.o         |   0
 .../lib/gcc/msp430-elf/8.3.1/crtend_no_eh.o   |   0
 .../msp430-elf/8.3.1/exceptions/crtbegin.o    |   0
 .../8.3.1/exceptions/crtbegin_no_eh.o         |   0
 .../gcc/msp430-elf/8.3.1/exceptions/crtend.o  |   0
 .../8.3.1/exceptions/crtend_no_eh.o           |   0
 .../lib/gcc/msp430-elf/8.3.1/large/crtbegin.o |   0
 .../msp430-elf/8.3.1/large/crtbegin_no_eh.o   |   0
 .../lib/gcc/msp430-elf/8.3.1/large/crtend.o   |   0
 .../gcc/msp430-elf/8.3.1/large/crtend_no_eh.o |   0
 .../8.3.1/large/exceptions/crtbegin.o         |   0
 .../8.3.1/large/exceptions/crtbegin_no_eh.o   |   0
 .../8.3.1/large/exceptions/crtend.o           |   0
 .../8.3.1/large/exceptions/crtend_no_eh.o     |   0
 .../8.3.1/large/full-memory-range/crtbegin.o  |   0
 .../large/full-memory-range/crtbegin_no_eh.o  |   0
 .../8.3.1/large/full-memory-range/crtend.o    |   0
 .../large/full-memory-range/crtend_no_eh.o    |   0
 .../full-memory-range/exceptions/crtbegin.o   |   0
 .../exceptions/crtbegin_no_eh.o               |   0
 .../full-memory-range/exceptions/crtend.o     |   0
 .../exceptions/crtend_no_eh.o                 |   0
 .../msp430-elf/include/stdio.h                |   0
 .../msp430-elf/lib/430/exceptions/crt0.o      |   0
 .../basic_msp430_tree/msp430-elf/lib/crt0.o   |   0
 .../msp430-elf/lib/exceptions/crt0.o          |   0
 .../msp430-elf/lib/large/crt0.o               |   0
 .../msp430-elf/lib/large/exceptions/crt0.o    |   0
 .../lib/large/full-memory-range/crt0.o        |   0
 .../large/full-memory-range/exceptions/crt0.o |   0
 clang/test/Driver/msp430-toolchain.c          | 296 ++++++++++++++----
 46 files changed, 385 insertions(+), 93 deletions(-)
 rename clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/{7.3.1 => 8.3.1}/430/crtbegin.o (100%)
 rename clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/{7.3.1/430/crtend.o => 8.3.1/430/crtbegin_no_eh.o} (100%)
 rename clang/test/Driver/Inputs/basic_msp430_tree/{msp430-elf/lib/430/crtn.o => lib/gcc/msp430-elf/8.3.1/430/crtend.o} (100%)
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtend_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtbegin.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtbegin_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtend.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtend_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtbegin.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtbegin_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtend.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtend_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtbegin.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtbegin_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtend.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtend_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtbegin.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtbegin_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtend.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtend_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtbegin.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtbegin_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtend.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtend_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtbegin.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtbegin_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtend.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtend_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtbegin.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtbegin_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtend.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtend_no_eh.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/include/stdio.h
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/430/exceptions/crt0.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/crt0.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/exceptions/crt0.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/crt0.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/exceptions/crt0.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/full-memory-range/crt0.o
 create mode 100644 clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/full-memory-range/exceptions/crt0.o

diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 85ff0e6857d74..65049074dc8e3 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -2193,6 +2193,7 @@ def municode : Joined<["-"], "municode">, Group<m_Group>, Flags<[DriverOption]>;
 def mthreads : Joined<["-"], "mthreads">, Group<m_Group>, Flags<[DriverOption]>;
 def mcpu_EQ : Joined<["-"], "mcpu=">, Group<m_Group>;
 def mmcu_EQ : Joined<["-"], "mmcu=">, Group<m_Group>;
+def msim : Flag<["-"], "msim">, Group<m_Group>;
 def mdynamic_no_pic : Joined<["-"], "mdynamic-no-pic">, Group<m_Group>;
 def mfix_and_continue : Flag<["-"], "mfix-and-continue">, Group<clang_ignored_m_Group>;
 def mieee_fp : Flag<["-"], "mieee-fp">, Group<clang_ignored_m_Group>;
@@ -2278,9 +2279,9 @@ def m_seses : Flag<["-"], "mseses">, Group<m_Group>, Flags<[CoreOption, DriverOp
 def mno_seses : Flag<["-"], "mno-seses">, Group<m_Group>, Flags<[CoreOption, DriverOption]>,
   HelpText<"Disable speculative execution side effect suppression (SESES)">;
 
-def mrelax : Flag<["-"], "mrelax">, Group<m_riscv_Features_Group>,
+def mrelax : Flag<["-"], "mrelax">, Group<m_Group>,
   HelpText<"Enable linker relaxation">;
-def mno_relax : Flag<["-"], "mno-relax">, Group<m_riscv_Features_Group>,
+def mno_relax : Flag<["-"], "mno-relax">, Group<m_Group>,
   HelpText<"Disable linker relaxation">;
 def msmall_data_limit_EQ : Joined<["-"], "msmall-data-limit=">, Group<m_Group>,
   Alias<G>,
diff --git a/clang/lib/Basic/Targets/MSP430.cpp b/clang/lib/Basic/Targets/MSP430.cpp
index ef53ee352c329..90890500ae274 100644
--- a/clang/lib/Basic/Targets/MSP430.cpp
+++ b/clang/lib/Basic/Targets/MSP430.cpp
@@ -29,5 +29,6 @@ void MSP430TargetInfo::getTargetDefines(const LangOptions &Opts,
                                         MacroBuilder &Builder) const {
   Builder.defineMacro("MSP430");
   Builder.defineMacro("__MSP430__");
+  Builder.defineMacro("__ELF__");
   // FIXME: defines for different 'flavours' of MCU
 }
diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp
index c8a7fce07ef1c..9ea07e44e37ec 100644
--- a/clang/lib/Driver/ToolChains/Gnu.cpp
+++ b/clang/lib/Driver/ToolChains/Gnu.cpp
@@ -1552,15 +1552,21 @@ static bool findMSP430Multilibs(const Driver &D,
                                 StringRef Path, const ArgList &Args,
                                 DetectedMultilibs &Result) {
   FilterNonExistent NonExistent(Path, "/crtbegin.o", D.getVFS());
-  Multilib MSP430Multilib = makeMultilib("/430");
+  Multilib WithoutExceptions = makeMultilib("/430").flag("-exceptions");
+  Multilib WithExceptions = makeMultilib("/430/exceptions").flag("+exceptions");
+
   // FIXME: when clang starts to support msp430x ISA additional logic
   // to select between multilib must be implemented
   // Multilib MSP430xMultilib = makeMultilib("/large");
 
-  Result.Multilibs.push_back(MSP430Multilib);
+  Result.Multilibs.push_back(WithoutExceptions);
+  Result.Multilibs.push_back(WithExceptions);
   Result.Multilibs.FilterOut(NonExistent);
 
   Multilib::flags_list Flags;
+  addMultilibFlag(Args.hasFlag(options::OPT_fexceptions,
+                               options::OPT_fno_exceptions, false),
+                  "exceptions", Flags);
   if (Result.Multilibs.select(Flags, Result.SelectedMultilib))
     return true;
 
diff --git a/clang/lib/Driver/ToolChains/MSP430.cpp b/clang/lib/Driver/ToolChains/MSP430.cpp
index b0bc2e014b487..6d663e4909e59 100644
--- a/clang/lib/Driver/ToolChains/MSP430.cpp
+++ b/clang/lib/Driver/ToolChains/MSP430.cpp
@@ -128,7 +128,7 @@ MSP430ToolChain::MSP430ToolChain(const Driver &D, const llvm::Triple &Triple,
   }
 
   SmallString<128> SysRootDir(computeSysRoot());
-  llvm::sys::path::append(SysRootDir, "lib", MultilibSuf);
+  llvm::sys::path::append(SysRootDir, "msp430-elf", "lib", MultilibSuf);
   addPathIfExists(D, SysRootDir, getFilePaths());
 }
 
@@ -138,10 +138,9 @@ std::string MSP430ToolChain::computeSysRoot() const {
 
   SmallString<128> Dir;
   if (GCCInstallation.isValid())
-    llvm::sys::path::append(Dir, GCCInstallation.getParentLibPath(), "..",
-                            GCCInstallation.getTriple().str());
+    llvm::sys::path::append(Dir, GCCInstallation.getParentLibPath(), "..");
   else
-    llvm::sys::path::append(Dir, getDriver().Dir, "..", getTriple().str());
+    llvm::sys::path::append(Dir, getDriver().Dir, "..");
 
   return std::string(Dir.str());
 }
@@ -153,7 +152,7 @@ void MSP430ToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs,
     return;
 
   SmallString<128> Dir(computeSysRoot());
-  llvm::sys::path::append(Dir, "include");
+  llvm::sys::path::append(Dir, "msp430-elf", "include");
   addSystemInclude(DriverArgs, CC1Args, Dir.str());
 }
 
@@ -180,6 +179,87 @@ Tool *MSP430ToolChain::buildLinker() const {
   return new tools::msp430::Linker(*this);
 }
 
+void msp430::Linker::AddStartFiles(bool UseExceptions, const ArgList &Args,
+                                   ArgStringList &CmdArgs) const {
+  const ToolChain &ToolChain = getToolChain();
+
+  CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
+  const char *crtbegin = UseExceptions ? "crtbegin.o" : "crtbegin_no_eh.o";
+  CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtbegin)));
+}
+
+void msp430::Linker::AddDefaultLibs(const llvm::opt::ArgList &Args,
+                                    llvm::opt::ArgStringList &CmdArgs) const {
+  const ToolChain &ToolChain = getToolChain();
+  const Driver &D = ToolChain.getDriver();
+
+  CmdArgs.push_back("--start-group");
+  CmdArgs.push_back(Args.MakeArgString(getHWMultLib(Args)));
+  CmdArgs.push_back("-lc");
+  AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
+  CmdArgs.push_back("-lcrt");
+
+  if (Args.hasArg(options::OPT_msim)) {
+    CmdArgs.push_back("-lsim");
+
+    // msp430-sim.ld relies on __crt0_call_exit being implicitly .refsym-ed
+    // in main() by msp430-gcc.
+    // This workaround should work seamlessly unless the compilation unit that
+    // contains main() is compiled by clang and then passed to
+    // gcc compiler driver for linkage.
+    CmdArgs.push_back("--undefined=__crt0_call_exit");
+  } else
+    CmdArgs.push_back("-lnosys");
+
+  CmdArgs.push_back("--end-group");
+  AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
+}
+
+void msp430::Linker::AddEndFiles(bool UseExceptions, const ArgList &Args,
+                                 ArgStringList &CmdArgs) const {
+  const ToolChain &ToolChain = getToolChain();
+  const Driver &D = ToolChain.getDriver();
+
+  const char *crtend = UseExceptions ? "crtend.o" : "crtend_no_eh.o";
+  CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath(crtend)));
+  AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
+}
+
+static void AddSspArgs(const ArgList &Args, ArgStringList &CmdArgs) {
+  Arg *SspFlag = Args.getLastArg(
+      options::OPT_fno_stack_protector, options::OPT_fstack_protector,
+      options::OPT_fstack_protector_all, options::OPT_fstack_protector_strong);
+
+  if (SspFlag &&
+      !SspFlag->getOption().matches(options::OPT_fno_stack_protector)) {
+    CmdArgs.push_back("-lssp_nonshared");
+    CmdArgs.push_back("-lssp");
+  }
+}
+
+static void AddImplicitLinkerScript(const std::string SysRoot,
+                                    const ArgList &Args,
+                                    ArgStringList &CmdArgs) {
+  if (Args.hasArg(options::OPT_T))
+    return;
+
+  if (Args.hasArg(options::OPT_msim)) {
+    CmdArgs.push_back("-Tmsp430-sim.ld");
+    return;
+  }
+
+  const Arg *MCUArg = Args.getLastArg(options::OPT_mmcu_EQ);
+  if (!MCUArg)
+    return;
+
+  SmallString<128> MCULinkerScriptPath(SysRoot);
+  llvm::sys::path::append(MCULinkerScriptPath, "include");
+  // -L because <mcu>.ld INCLUDEs <mcu>_symbols.ld
+  CmdArgs.push_back(Args.MakeArgString("-L" + MCULinkerScriptPath));
+  CmdArgs.push_back(
+      Args.MakeArgString("-T" + StringRef(MCUArg->getValue()) + ".ld"));
+}
+
 void msp430::Linker::ConstructJob(Compilation &C, const JobAction &JA,
                                   const InputInfo &Output,
                                   const InputInfoList &Inputs,
@@ -189,44 +269,49 @@ void msp430::Linker::ConstructJob(Compilation &C, const JobAction &JA,
   const Driver &D = ToolChain.getDriver();
   std::string Linker = ToolChain.GetProgramPath(getShortName());
   ArgStringList CmdArgs;
-
-  if (!D.SysRoot.empty())
-    CmdArgs.push_back(Args.MakeArgString("--sysroot=" + D.SysRoot));
+  bool UseExceptions = Args.hasFlag(options::OPT_fexceptions,
+                                    options::OPT_fno_exceptions, false);
+  bool UseStartAndEndFiles = !Args.hasArg(options::OPT_nostdlib, options::OPT_r,
+                                          options::OPT_nostartfiles);
+
+  if (Args.hasArg(options::OPT_mrelax))
+    CmdArgs.push_back("--relax");
+  if (!Args.hasArg(options::OPT_r, options::OPT_g_Group))
+    CmdArgs.push_back("--gc-sections");
+
+  Args.AddAllArgs(CmdArgs, {
+                               options::OPT_e,
+                               options::OPT_n,
+                               options::OPT_s,
+                               options::OPT_t,
+                               options::OPT_u,
+                           });
+
+  if (UseStartAndEndFiles)
+    AddStartFiles(UseExceptions, Args, CmdArgs);
 
   Args.AddAllArgs(CmdArgs, options::OPT_L);
   ToolChain.AddFilePathLibArgs(Args, CmdArgs);
-
-  if (!Args.hasArg(options::OPT_T)) {
-    if (const Arg *MCUArg = Args.getLastArg(options::OPT_mmcu_EQ))
-      CmdArgs.push_back(
-          Args.MakeArgString("-T" + StringRef(MCUArg->getValue()) + ".ld"));
-  } else {
-    Args.AddAllArgs(CmdArgs, options::OPT_T);
-  }
-
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crt0.o")));
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtbegin.o")));
-  }
-
   AddLinkerInputs(getToolChain(), Inputs, Args, CmdArgs, JA);
 
-  CmdArgs.push_back("--start-group");
-  CmdArgs.push_back(Args.MakeArgString(getHWMultLib(Args)));
-  CmdArgs.push_back("-lgcc");
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) {
-    CmdArgs.push_back("-lc");
-    CmdArgs.push_back("-lcrt");
-    CmdArgs.push_back("-lnosys");
+  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_r,
+                   options::OPT_nodefaultlibs)) {
+    AddSspArgs(Args, CmdArgs);
+    AddRunTimeLibs(ToolChain, D, CmdArgs, Args);
+    if (!Args.hasArg(options::OPT_nolibc)) {
+      AddDefaultLibs(Args, CmdArgs);
+      AddImplicitLinkerScript(D.SysRoot, Args, CmdArgs);
+    }
   }
-  CmdArgs.push_back("--end-group");
 
-  if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nostartfiles)) {
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtend.o")));
-    CmdArgs.push_back(Args.MakeArgString(ToolChain.GetFilePath("crtn.o")));
-  }
+  if (UseStartAndEndFiles)
+    AddEndFiles(UseExceptions, Args, CmdArgs);
+
   CmdArgs.push_back("-o");
   CmdArgs.push_back(Output.getFilename());
+
+  Args.AddAllArgs(CmdArgs, options::OPT_T);
+
   C.addCommand(
       std::make_unique<Command>(JA, *this, ResponseFileSupport::AtFileCurCP(),
                                 Args.MakeArgString(Linker), CmdArgs, Inputs));
diff --git a/clang/lib/Driver/ToolChains/MSP430.h b/clang/lib/Driver/ToolChains/MSP430.h
index 58fd158cd12f9..3789e7442a238 100644
--- a/clang/lib/Driver/ToolChains/MSP430.h
+++ b/clang/lib/Driver/ToolChains/MSP430.h
@@ -40,6 +40,11 @@ class LLVM_LIBRARY_VISIBILITY MSP430ToolChain : public Generic_ELF {
   bool isPIEDefault() const override { return false; }
   bool isPICDefaultForced() const override { return true; }
 
+  UnwindLibType
+  GetUnwindLibType(const llvm::opt::ArgList &Args) const override {
+    return UNW_None;
+  }
+
 protected:
   Tool *buildLinker() const override;
 
@@ -61,6 +66,14 @@ class LLVM_LIBRARY_VISIBILITY Linker : public Tool {
                     const InputInfo &Output, const InputInfoList &Inputs,
                     const llvm::opt::ArgList &TCArgs,
                     const char *LinkingOutput) const override;
+
+private:
+  void AddStartFiles(bool UseExceptions, const llvm::opt::ArgList &Args,
+                     llvm::opt::ArgStringList &CmdArgs) const;
+  void AddDefaultLibs(const llvm::opt::ArgList &Args,
+                      llvm::opt::ArgStringList &CmdArgs) const;
+  void AddEndFiles(bool UseExceptions, const llvm::opt::ArgList &Args,
+                   llvm::opt::ArgStringList &CmdArgs) const;
 };
 
 void getMSP430TargetFeatures(const Driver &D, const llvm::opt::ArgList &Args,
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtbegin.o
similarity index 100%
rename from clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtbegin.o
rename to clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtbegin.o
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtbegin_no_eh.o
similarity index 100%
rename from clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430/crtend.o
rename to clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtbegin_no_eh.o
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/430/crtn.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtend.o
similarity index 100%
rename from clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/430/crtn.o
rename to clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtend.o
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/exceptions/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/exceptions/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtbegin.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtbegin.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtbegin_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtbegin_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtend.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtend.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtend_no_eh.o b/clang/test/Driver/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/large/full-memory-range/exceptions/crtend_no_eh.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/include/stdio.h b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/include/stdio.h
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/430/exceptions/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/430/exceptions/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/exceptions/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/exceptions/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/exceptions/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/exceptions/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/full-memory-range/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/full-memory-range/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/full-memory-range/exceptions/crt0.o b/clang/test/Driver/Inputs/basic_msp430_tree/msp430-elf/lib/large/full-memory-range/exceptions/crt0.o
new file mode 100644
index 0000000000000..e69de29bb2d1d
diff --git a/clang/test/Driver/msp430-toolchain.c b/clang/test/Driver/msp430-toolchain.c
index f741491c94e8c..f88b32d66a9be 100644
--- a/clang/test/Driver/msp430-toolchain.c
+++ b/clang/test/Driver/msp430-toolchain.c
@@ -1,78 +1,264 @@
-// A basic clang -cc1 command-line, and simple environment check.
+// Splitting some tests into POS and NEG parts so the latter can validate
+// output fragments as large as possible for absence of some text.
 
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=CC1 %s
-// CC1: clang{{.*}} "-cc1" "-triple" "msp430"
+// Test for include paths and other cc1 flags
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -E \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" 2>&1 \
+// RUN:   | FileCheck -check-prefix=INCLUDE-DIRS %s
+// INCLUDE-DIRS: "{{.*}}clang{{.*}}" "-cc1" "-triple" "msp430"
+// INCLUDE-DIRS: "-internal-isystem" "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}include"
+
+// Tests for passing flags to msp430-elf-ld (not file-related)
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" > %t 2>&1
+// RUN: FileCheck -check-prefix=DEFAULT-POS %s < %t
+// RUN: FileCheck -check-prefix=DEFAULT-NEG %s < %t
+// DEFAULT-POS: "{{.*}}msp430-elf-ld"
+// DEFAULT-POS: "--gc-sections"
+// DEFAULT-NEG-NOT: "--relax"
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" \
+// RUN:   -r 2>&1 | FileCheck --check-prefixes=NO-GC-SECTIONS,RELOCATABLE-OBJECT %s
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" \
+// RUN:   -g 2>&1 | FileCheck -check-prefix=NO-GC-SECTIONS %s
+// NO-GC-SECTIONS: "{{.*}}msp430-elf-ld"
+// NO-GC-SECTIONS-NOT: "--gc-sections"
+// RELOCATABLE-OBJECT-NOT: crt0.o
+// RELOCATABLE-OBJECT-NOT: crtbegin
+// RELOCATABLE-OBJECT-NOT: crtend
+// RELOCATABLE-OBJECT-NOT: "-l{{.*}}"
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" \
+// RUN:   -Wl,--some-linker-arg 2>&1 | FileCheck -check-prefix=WL-ARG %s
+// WL-ARG: "{{.*}}msp430-elf-ld"
+// WL-ARG: "--some-linker-arg"
+
+// Trivially mapped options: arbitrarily split into two disjoint groups
+// to check both "on"/present and "off"/absent state (when appropriate).
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" \
+// RUN:   -o /tmp/test.elf -r -t -z muldefs -mrelax > %t 2>&1
+// RUN: FileCheck -check-prefix=MISC-FLAGS-1-POS %s < %t
+// RUN: FileCheck -check-prefix=MISC-FLAGS-1-NEG %s < %t
+// MISC-FLAGS-1-POS: "{{.*}}msp430-elf-ld"
+// MISC-FLAGS-1-POS-DAG: "--relax"
+// MISC-FLAGS-1-POS-DAG: "-o" "/tmp/test.elf"
+// MISC-FLAGS-1-POS-DAG: "-r"
+// MISC-FLAGS-1-POS-DAG: "-t"
+// MISC-FLAGS-1-POS-DAG: "-z" "muldefs"
+// MISC-FLAGS-1-NEG: "{{.*}}msp430-elf-ld"
+// MISC-FLAGS-1-NEG-NOT: "-e{{.*}}"
+// MISC-FLAGS-1-NEG-NOT: "-s"
+// MISC-FLAGS-1-NEG-NOT: "-u"
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 --sysroot="" \
+// RUN:   -e EntryPoint -s -u __undef > %t 2>&1
+// RUN: FileCheck -check-prefix=MISC-FLAGS-2-POS %s < %t
+// RUN: FileCheck -check-prefix=MISC-FLAGS-2-NEG %s < %t
+// MISC-FLAGS-2-POS: "{{.*}}msp430-elf-ld"
+// MISC-FLAGS-2-POS: "-e" "EntryPoint" "-s" "-u" "__undef"
+// MISC-FLAGS-2-NEG: "{{.*}}msp430-elf-ld"
+// MISC-FLAGS-2-NEG-NOT: "-r"
+// MISC-FLAGS-2-NEG-NOT: "-t"
+// MISC-FLAGS-2-NEG-NOT: "-z"
+// MISC-FLAGS-2-NEG-NOT: "--relax"
+
+// Tests for -nostdlib, -nostartfiles, -nodefaultfiles and -f(no-)exceptions
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-DEFAULT-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-DEFAULT-NEG %s < %t
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 \
+// RUN:   --gcc-toolchain="%S/Inputs/basic_msp430_tree" --sysroot="" 2>&1 \
+// RUN:   | FileCheck -check-prefix=LIBS-DEFAULT-GCC-TOOLCHAIN %s
+// LIBS-DEFAULT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-DEFAULT-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-DEFAULT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-DEFAULT-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-DEFAULT-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-DEFAULT-POS: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group" "-lgcc"
+// LIBS-DEFAULT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group" "-lgcc"
+// LIBS-DEFAULT-GCC-TOOLCHAIN: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+// LIBS-DEFAULT-NEG-NOT: crtbegin.o
+// LIBS-DEFAULT-NEG-NOT: -lssp_nonshared
+// LIBS-DEFAULT-NEG-NOT: -lssp
+// LIBS-DEFAULT-NEG-NOT: clang_rt
+// LIBS-DEFAULT-NEG-NOT: crtend.o
+// LIBS-DEFAULT-NEG-NOT: /exceptions
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=compiler-rt \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-COMPILER-RT-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-COMPILER-RT-NEG %s < %t
+// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-COMPILER-RT-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-COMPILER-RT-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-COMPILER-RT-POS: "{{[^"]*}}libclang_rt.builtins-msp430.a" "--start-group" "-lmul_none" "-lc" "{{[^"]*}}libclang_rt.builtins-msp430.a" "-lcrt" "-lnosys" "--end-group" "{{[^"]*}}libclang_rt.builtins-msp430.a"
+// LIBS-COMPILER-RT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "{{[^"]*}}libclang_rt.builtins-msp430.a"
+// LIBS-COMPILER-RT-NEG-NOT: crtbegin.o
+// LIBS-COMPILER-RT-NEG-NOT: -lssp_nonshared
+// LIBS-COMPILER-RT-NEG-NOT: -lssp
+// LIBS-COMPILER-RT-NEG-NOT: -lgcc
+// LIBS-COMPILER-RT-NEG-NOT: crtend.o
+// LIBS-COMPILER-RT-NEG-NOT: /exceptions
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -fexceptions \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-EXC-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-EXC-NEG %s < %t
+// LIBS-EXC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-EXC-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430/exceptions{{/|\\\\}}crt0.o"
+// LIBS-EXC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions{{/|\\\\}}crtbegin.o"
+// LIBS-EXC-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions"
+// LIBS-EXC-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430/exceptions"
+// LIBS-EXC-POS: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group"
+// LIBS-EXC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430/exceptions{{/|\\\\}}crtend.o" "-lgcc"
+// LIBS-EXC-NEG-NOT: "{{.*}}/430"
+// LIBS-EXC-NEG-NOT: "{{.*}}430/crt{{.*}}"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 \
-// RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430 %s
-
-// MSP430: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
-// MSP430: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtbegin.o"
-// MSP430: "--start-group" "-lmul_none" "-lgcc" "-lc" "-lcrt" "-lnosys" "--end-group"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtend.o"
-// MSP430: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crtn.o"
+// RUN:   -fstack-protector  --sysroot="%S/Inputs/basic_msp430_tree" 2>&1 \
+// RUN:   | FileCheck -check-prefix=LIBS-SSP %s
+// LIBS-SSP: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-SSP: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-SSP: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-SSP: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-SSP: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-SSP: "-lssp_nonshared" "-lssp"
+// LIBS-SSP: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group"
+// LIBS-SSP: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nodefaultlibs \
-// RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-NO-DFT-LIB %s
-
-// MSP430-NO-DFT-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
-// MSP430-NO-DFT-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430-NO-DFT-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtbegin.o"
-// MSP430-NO-DFT-LIB: "--start-group" "-lmul_none" "-lgcc" "--end-group"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430{{/|\\\\}}crtend.o"
-// MSP430-NO-DFT-LIB: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crtn.o"
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-NO-DFT-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-NO-DFT-NEG %s < %t
+// LIBS-NO-DFT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-NO-DFT-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-NO-DFT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-NO-DFT-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-NO-DFT-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-NO-DFT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+// LIBS-NO-DFT-NEG-NOT: "-lc"
+// LIBS-NO-DFT-NEG-NOT: "-lcrt"
+// LIBS-NO-DFT-NEG-NOT: "-lsim"
+// LIBS-NO-DFT-NEG-NOT: "-lnosys"
+// LIBS-NO-DFT-NEG-NOT: "--start-group"
+// LIBS-NO-DFT-NEG-NOT: "--end-group"
 
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nostartfiles \
-// RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-NO-START %s
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nolibc \
+// RUN:   -fstack-protector --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-NO-LIBC-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-NO-LIBC-NEG %s < %t
+// LIBS-NO-LIBC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-NO-LIBC-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// LIBS-NO-LIBC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// LIBS-NO-LIBC-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-NO-LIBC-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-NO-LIBC-POS: "-lssp_nonshared" "-lssp" "-lgcc"
+// LIBS-NO-LIBC-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+// LIBS-NO-LIBC-NEG-NOT: "-lc"
+// LIBS-NO-LIBC-NEG-NOT: "-lcrt"
+// LIBS-NO-LIBC-NEG-NOT: "-lsim"
+// LIBS-NO-LIBC-NEG-NOT: "-lnosys"
+// LIBS-NO-LIBC-NEG-NOT: "--start-group"
+// LIBS-NO-LIBC-NEG-NOT: "--end-group"
 
-// MSP430-NO-START: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
-// MSP430-NO-START: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430-NO-START: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
-// MSP430-NO-START: "--start-group" "-lmul_none" "-lgcc" "-lc" "-lcrt" "-lnosys" "--end-group"
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nostartfiles \
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-NO-START-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-NO-START-NEG %s < %t
+// LIBS-NO-START-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-NO-START-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-NO-START-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-NO-START-POS: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group"
+// LIBS-NO-START-NEG-NOT: crt0.o
+// LIBS-NO-START-NEG-NOT: crtbegin
+// LIBS-NO-START-NEG-NOT: crtend
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nostdlib \
-// RUN:   --gcc-toolchain=%S/Inputs/basic_msp430_tree --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-NO-STD-LIB %s
+// RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
+// RUN: FileCheck -check-prefix=LIBS-NO-STD-POS %s < %t
+// RUN: FileCheck -check-prefix=LIBS-NO-STD-NEG %s < %t
+// LIBS-NO-STD-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LIBS-NO-STD-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// LIBS-NO-STD-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// LIBS-NO-STD-NEG-NOT: crt0.o
+// LIBS-NO-STD-NEG-NOT: crtbegin
+// LIBS-NO-STD-NEG-NOT: crtend
+// LIBS-NO-STD-NEG-NOT: "-lc"
+// LIBS-NO-STD-NEG-NOT: "-lcrt"
+// LIBS-NO-STD-NEG-NOT: "-lnosys"
+// LIBS-NO-STD-NEG-NOT: "--start-group"
+// LIBS-NO-STD-NEG-NOT: "--end-group"
+
+// Test for linker script autodiscovery
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430g2553 \
+// RUN:   --sysroot=%S/Inputs/basic_msp430_tree 2>&1 \
+// RUN:   | FileCheck -check-prefix=LD-SCRIPT %s
+// LD-SCRIPT: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// LD-SCRIPT: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}include"
+// LD-SCRIPT: "-Tmsp430g2553.ld"
 
-// MSP430-NO-STD-LIB: "{{.*}}Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
-// MSP430-NO-STD-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/430"
-// MSP430-NO-STD-LIB: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/7.3.1/../../..{{/|\\\\}}..{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
-// MSP430-NO-STD-LIB: "--start-group" "-lmul_none" "-lgcc" "--end-group"
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430g2553 \
+// RUN:   --sysroot=%S/Inputs/basic_msp430_tree \
+// RUN:   -T custom_script.ld 2>&1 \
+// RUN:   | FileCheck -check-prefix=CUSTOM-LD-SCRIPT %s
+// CUSTOM-LD-SCRIPT: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// CUSTOM-LD_SCRIPT-NOT: "-Tmsp430g2553.ld"
+// CUSTOM-LD-SCRIPT: "-T" "custom_script.ld"
+// CUSTOM-LD_SCRIPT-NOT: "-Tmsp430g2553.ld"
+
+// Test for compiling for simulator
+
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430g2553 \
+// RUN:   -msim --sysroot=%S/Inputs/basic_msp430_tree > %t 2>&1
+// RUN: FileCheck -check-prefix=SIMULATOR-POS %s < %t
+// RUN: FileCheck -check-prefix=SIMULATOR-NEG %s < %t
+// SIMULATOR-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
+// SIMULATOR-POS: "{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430{{/|\\\\}}crt0.o"
+// SIMULATOR-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtbegin_no_eh.o"
+// SIMULATOR-POS: "-L{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430"
+// SIMULATOR-POS: "-L{{.*}}/Inputs/basic_msp430_tree{{/|\\\\}}msp430-elf{{/|\\\\}}lib/430"
+// SIMULATOR-POS: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lsim" "--undefined=__crt0_call_exit" "--end-group"
+// SIMULATOR-POS: "-Tmsp430-sim.ld"
+// SIMULATOR-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
+// SIMULATOR-NEG-NOT: "-lnosys"
+
+// Tests for HWMult
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430f147 --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-16BIT %s
+// RUN:   | FileCheck -check-prefix=HWMult-16BIT %s
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430f147 -mhwmult=auto --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-16BIT %s
+// RUN:   | FileCheck -check-prefix=HWMult-16BIT %s
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mhwmult=16bit --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-16BIT %s
-
-// MSP430-HWMult-16BIT: "--start-group" "-lmul_16"
+// RUN:   | FileCheck -check-prefix=HWMult-16BIT %s
+// HWMult-16BIT: "--start-group" "-lmul_16"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430f4783 --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-32BIT %s
+// RUN:   | FileCheck -check-prefix=HWMult-32BIT %s
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430f4783 -mhwmult=auto --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-32BIT %s
+// RUN:   | FileCheck -check-prefix=HWMult-32BIT %s
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mhwmult=32bit --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-32BIT %s
-
-// MSP430-HWMult-32BIT: "--start-group" "-lmul_32"
+// RUN:   | FileCheck -check-prefix=HWMult-32BIT %s
+// HWMult-32BIT: "--start-group" "-lmul_32"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mhwmult=f5series --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-F5 %s
-// MSP430-HWMult-F5: "--start-group" "-lmul_f5"
+// RUN:   | FileCheck -check-prefix=HWMult-F5 %s
+// HWMult-F5: "--start-group" "-lmul_f5"
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mhwmult=none --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-NONE %s
+// RUN:   | FileCheck -check-prefix=HWMult-NONE %s
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mhwmult=none -mmcu=msp430f4783 --sysroot="" 2>&1 \
-// RUN:   | FileCheck -check-prefix=MSP430-HWMult-NONE %s
-
-// MSP430-HWMult-NONE: "--start-group" "-lmul_none"
+// RUN:   | FileCheck -check-prefix=HWMult-NONE %s
+// HWMult-NONE: "--start-group" "-lmul_none"

From 47a3b85a97136fca4a388646cbaec10b71414b60 Mon Sep 17 00:00:00 2001
From: Eric Astor <epastor@google.com>
Date: Wed, 15 Jul 2020 15:00:34 -0400
Subject: [PATCH 628/771] [ms] [llvm-ml] Remove unused function

Summary: Remove unused function

Reviewed By: lbenes

Differential Revision: https://reviews.llvm.org/D83898
---
 llvm/lib/MC/MCParser/MasmParser.cpp | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/llvm/lib/MC/MCParser/MasmParser.cpp b/llvm/lib/MC/MCParser/MasmParser.cpp
index 3ed9623e4d5a0..fb3bb11d1f430 100644
--- a/llvm/lib/MC/MCParser/MasmParser.cpp
+++ b/llvm/lib/MC/MCParser/MasmParser.cpp
@@ -810,9 +810,6 @@ class MasmParser : public MCAsmParser {
                              const StructInitializer &Initializer);
 
   // User-defined types (structs, unions):
-  bool emitStructValue(const StructInfo &Structure,
-                       const StructInitializer &Initializer,
-                       size_t InitialOffset = 0, size_t InitialField = 0);
   bool emitStructValues(const StructInfo &Structure);
   bool addStructField(StringRef Name, const StructInfo &Structure);
   bool parseDirectiveStructValue(const StructInfo &Structure,

From ed48e6fa65ddfb15d03d24d050f56b5f16889422 Mon Sep 17 00:00:00 2001
From: Sam Parker <sam.parker@arm.com>
Date: Fri, 17 Jul 2020 14:06:32 +0100
Subject: [PATCH 629/771] [NFC][ARM] Add SimplifyCFG test

---
 .../SimplifyCFG/ARM/select-costs.ll           | 105 ++++++++++++++++++
 1 file changed, 105 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/ARM/select-costs.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/ARM/select-costs.ll b/llvm/test/Transforms/SimplifyCFG/ARM/select-costs.ll
new file mode 100644
index 0000000000000..f2cc99f07c771
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/ARM/select-costs.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -mtriple=thumbv8.1m.main -simplifycfg -S %s -o - | FileCheck %s --check-prefix=V8M
+; RUN: opt -mtriple=thumbv8m.main -simplifycfg -S %s -o - | FileCheck %s --check-prefix=V8M
+; RUN: opt -mtriple=thumbv8m.base -simplifycfg -S %s -o - | FileCheck %s --check-prefix=V8M
+; RUN: opt -mtriple=armv8a -simplifycfg -S %s -o - | FileCheck %s --check-prefix=V8A
+
+; Test that the phis from for.inc.preheader aren't hoisted into sw.bb92 because
+; the cost is too great - we can make a better decision later on.
+define i32 @multiple_spec_select_costs(i8* %a, i32* %idx, i8 %in) {
+; V8M-LABEL: @multiple_spec_select_costs(
+; V8M-NEXT:  entry:
+; V8M-NEXT:    [[GEP_A_2:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 2
+; V8M-NEXT:    [[INCDEC_PTR109_C4:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 3
+; V8M-NEXT:    [[ZERO:%.*]] = icmp ne i8 [[IN:%.*]], 0
+; V8M-NEXT:    br i1 [[ZERO]], label [[FOR_INC_PREHEADER:%.*]], label [[SW_BB92:%.*]]
+; V8M:       sw.bb92:
+; V8M-NEXT:    [[C_OFF_I150:%.*]] = add i8 [[IN]], -48
+; V8M-NEXT:    [[UGT_9:%.*]] = icmp ugt i8 [[C_OFF_I150]], 9
+; V8M-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[UGT_9]], i1 false, i1 true
+; V8M-NEXT:    [[SPEC_SELECT1:%.*]] = select i1 [[UGT_9]], i32 1, i32 7
+; V8M-NEXT:    br label [[FOR_INC_PREHEADER]]
+; V8M:       for.inc.preheader:
+; V8M-NEXT:    [[STR_PH_0:%.*]] = phi i8* [ [[GEP_A_2]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR109_C4]], [[SW_BB92]] ]
+; V8M-NEXT:    [[CMP:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[SPEC_SELECT]], [[SW_BB92]] ]
+; V8M-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[SPEC_SELECT1]], [[SW_BB92]] ]
+; V8M-NEXT:    br label [[FOR_INC:%.*]]
+; V8M:       for.inc:
+; V8M-NEXT:    [[STR_PH_1:%.*]] = phi i8* [ [[INCDEC_PTR109:%.*]], [[FOR_BODY:%.*]] ], [ [[STR_PH_0]], [[FOR_INC_PREHEADER]] ]
+; V8M-NEXT:    [[INCDEC_PTR109]] = getelementptr inbounds i8, i8* [[STR_PH_1]], i32 1
+; V8M-NEXT:    [[LOAD_1:%.*]] = load i8, i8* [[INCDEC_PTR109]], align 1
+; V8M-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[LOAD_1]], 0
+; V8M-NEXT:    [[AND:%.*]] = and i1 [[CMP]], [[TOBOOL]]
+; V8M-NEXT:    br i1 [[AND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; V8M:       for.body:
+; V8M-NEXT:    [[CMP_1:%.*]] = icmp eq i8 [[LOAD_1]], 1
+; V8M-NEXT:    br i1 [[CMP_1]], label [[FOR_INC]], label [[SW_BB92]]
+; V8M:       exit:
+; V8M-NEXT:    ret i32 [[PHI_RES]]
+;
+; V8A-LABEL: @multiple_spec_select_costs(
+; V8A-NEXT:  entry:
+; V8A-NEXT:    [[GEP_A_2:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i32 2
+; V8A-NEXT:    [[INCDEC_PTR109_C4:%.*]] = getelementptr inbounds i8, i8* [[A]], i32 3
+; V8A-NEXT:    [[ZERO:%.*]] = icmp ne i8 [[IN:%.*]], 0
+; V8A-NEXT:    br i1 [[ZERO]], label [[FOR_INC_PREHEADER:%.*]], label [[SW_BB92:%.*]]
+; V8A:       sw.bb92:
+; V8A-NEXT:    [[C_OFF_I150:%.*]] = add i8 [[IN]], -48
+; V8A-NEXT:    [[UGT_9:%.*]] = icmp ugt i8 [[C_OFF_I150]], 9
+; V8A-NEXT:    [[SPEC_SELECT:%.*]] = select i1 [[UGT_9]], i1 false, i1 true
+; V8A-NEXT:    [[SPEC_SELECT1:%.*]] = select i1 [[UGT_9]], i32 1, i32 7
+; V8A-NEXT:    br label [[FOR_INC_PREHEADER]]
+; V8A:       for.inc.preheader:
+; V8A-NEXT:    [[STR_PH_0:%.*]] = phi i8* [ [[GEP_A_2]], [[ENTRY:%.*]] ], [ [[INCDEC_PTR109_C4]], [[SW_BB92]] ]
+; V8A-NEXT:    [[CMP:%.*]] = phi i1 [ false, [[ENTRY]] ], [ [[SPEC_SELECT]], [[SW_BB92]] ]
+; V8A-NEXT:    [[PHI_RES:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ [[SPEC_SELECT1]], [[SW_BB92]] ]
+; V8A-NEXT:    br label [[FOR_INC:%.*]]
+; V8A:       for.inc:
+; V8A-NEXT:    [[STR_PH_1:%.*]] = phi i8* [ [[INCDEC_PTR109:%.*]], [[FOR_BODY:%.*]] ], [ [[STR_PH_0]], [[FOR_INC_PREHEADER]] ]
+; V8A-NEXT:    [[INCDEC_PTR109]] = getelementptr inbounds i8, i8* [[STR_PH_1]], i32 1
+; V8A-NEXT:    [[LOAD_1:%.*]] = load i8, i8* [[INCDEC_PTR109]], align 1
+; V8A-NEXT:    [[TOBOOL:%.*]] = icmp ne i8 [[LOAD_1]], 0
+; V8A-NEXT:    [[AND:%.*]] = and i1 [[CMP]], [[TOBOOL]]
+; V8A-NEXT:    br i1 [[AND]], label [[FOR_BODY]], label [[EXIT:%.*]]
+; V8A:       for.body:
+; V8A-NEXT:    [[CMP_1:%.*]] = icmp eq i8 [[LOAD_1]], 1
+; V8A-NEXT:    br i1 [[CMP_1]], label [[FOR_INC]], label [[SW_BB92]]
+; V8A:       exit:
+; V8A-NEXT:    ret i32 [[PHI_RES]]
+;
+entry:
+  %gep.a.2 = getelementptr inbounds i8, i8* %a, i32 2
+  %incdec.ptr109.c4 = getelementptr inbounds i8, i8* %a, i32 3
+  %zero = icmp ne i8 %in, 0
+  br i1 %zero , label %for.inc.preheader, label %sw.bb92
+
+sw.bb92:
+  %c.off.i150 = add i8 %in, -48
+  %ugt.9 = icmp ugt i8 %c.off.i150, 9
+  br i1 %ugt.9, label %for.inc.preheader, label %select.unfold198
+
+select.unfold198:
+  br label %for.inc.preheader
+
+for.inc.preheader:
+  %str.ph.0 = phi i8* [ %incdec.ptr109.c4, %select.unfold198 ], [ %incdec.ptr109.c4, %sw.bb92 ], [ %gep.a.2, %entry ]
+  %cmp = phi i1 [ true, %select.unfold198 ], [ false, %sw.bb92 ], [ false, %entry ]
+  %phi.res = phi i32 [ 7, %select.unfold198 ], [ 1, %sw.bb92 ], [ 1, %entry ]
+  br label %for.inc
+
+for.inc:
+  %str.ph.1 = phi i8* [ %incdec.ptr109, %for.body ], [ %str.ph.0, %for.inc.preheader ]
+  %incdec.ptr109 = getelementptr inbounds i8, i8* %str.ph.1, i32 1
+  %load.1 = load i8, i8* %incdec.ptr109, align 1
+  %tobool = icmp ne i8 %load.1, 0
+  %and = and i1 %cmp, %tobool
+  br i1 %and, label %for.body, label %exit
+
+for.body:
+  %cmp.1 = icmp eq i8 %load.1, 1
+  br i1 %cmp.1, label %for.inc, label %sw.bb92
+
+exit:
+  ret i32 %phi.res
+}
+

From 9275e14379961a4304de559f16fdbac275fb6301 Mon Sep 17 00:00:00 2001
From: "Yaxun (Sam) Liu" <yaxun.liu@amd.com>
Date: Wed, 15 Jul 2020 13:25:32 -0400
Subject: [PATCH 630/771] recommit 4fc752b30b9a [CUDA][HIP] Always defer
 diagnostics for wrong-sided reference

Fixed regression in test builtin-amdgcn-atomic-inc-dec-failure.cpp.
---
 clang/lib/Sema/SemaCUDA.cpp                   | 10 +++---
 .../builtin-amdgcn-atomic-inc-dec-failure.cpp | 17 +++++++---
 clang/test/SemaCUDA/builtins.cu               | 11 +++---
 .../test/SemaCUDA/call-kernel-from-kernel.cu  |  4 +--
 clang/test/SemaCUDA/function-overload.cu      | 34 +++++++++----------
 clang/test/SemaCUDA/function-target.cu        |  6 ++--
 clang/test/SemaCUDA/implicit-device-lambda.cu |  8 +++--
 clang/test/SemaCUDA/method-target.cu          | 19 ++++++-----
 clang/test/SemaCUDA/reference-to-kernel-fn.cu | 14 ++++----
 9 files changed, 66 insertions(+), 57 deletions(-)

diff --git a/clang/lib/Sema/SemaCUDA.cpp b/clang/lib/Sema/SemaCUDA.cpp
index 283a04683a32a..6203edea7112f 100644
--- a/clang/lib/Sema/SemaCUDA.cpp
+++ b/clang/lib/Sema/SemaCUDA.cpp
@@ -715,9 +715,8 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
                                       CallerKnownEmitted] {
     switch (IdentifyCUDAPreference(Caller, Callee)) {
     case CFP_Never:
-      return DeviceDiagBuilder::K_Immediate;
     case CFP_WrongSide:
-      assert(Caller && "WrongSide calls require a non-null caller");
+      assert(Caller && "Never/wrongSide calls require a non-null caller");
       // If we know the caller will be emitted, we know this wrong-side call
       // will be emitted, so it's an immediate error.  Otherwise, defer the
       // error until we know the caller is emitted.
@@ -740,9 +739,10 @@ bool Sema::CheckCUDACall(SourceLocation Loc, FunctionDecl *Callee) {
 
   DeviceDiagBuilder(DiagKind, Loc, diag::err_ref_bad_target, Caller, *this)
       << IdentifyCUDATarget(Callee) << Callee << IdentifyCUDATarget(Caller);
-  DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
-                    Caller, *this)
-      << Callee;
+  if (!Callee->getBuiltinID())
+    DeviceDiagBuilder(DiagKind, Callee->getLocation(), diag::note_previous_decl,
+                      Caller, *this)
+        << Callee;
   return DiagKind != DeviceDiagBuilder::K_Immediate &&
          DiagKind != DeviceDiagBuilder::K_ImmediateWithCallStack;
 }
diff --git a/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp b/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
index 9351b4ecb032d..88fcbd716ef45 100644
--- a/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
+++ b/clang/test/Sema/builtin-amdgcn-atomic-inc-dec-failure.cpp
@@ -1,19 +1,26 @@
 // REQUIRES: amdgpu-registered-target
-// RUN: not %clang_cc1 %s -x hip -fcuda-is-device -o - -emit-llvm -triple=amdgcn-amd-amdhsa 2>&1 | FileCheck %s
+// RUN: %clang_cc1 %s -x hip -fcuda-is-device -o - \
+// RUN:   -triple=amdgcn-amd-amdhsa -fsyntax-only \
+// RUN:   -verify=dev
+// RUN: %clang_cc1 %s -x hip -triple x86_64 -o - \
+// RUN:   -aux-triple amdgcn-amd-amdhsa -fsyntax-only \
+// RUN:   -verify=host
+
+// dev-no-diagnostics
 
 void test_host() {
   __UINT32_TYPE__ val32;
   __UINT64_TYPE__ val64;
 
-  // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc32' in __host__ function
+  // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_inc32' in __host__ function}}
   val32 = __builtin_amdgcn_atomic_inc32(&val32, val32, __ATOMIC_SEQ_CST, "");
 
-  // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_inc64' in __host__ function
+  // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_inc64' in __host__ function}}
   val64 = __builtin_amdgcn_atomic_inc64(&val64, val64, __ATOMIC_SEQ_CST, "");
 
-  // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec32' in __host__ function
+  // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_dec32' in __host__ function}}
   val32 = __builtin_amdgcn_atomic_dec32(&val32, val32, __ATOMIC_SEQ_CST, "");
 
-  // CHECK: error: reference to __device__ function '__builtin_amdgcn_atomic_dec64' in __host__ function
+  // host-error@+1 {{reference to __device__ function '__builtin_amdgcn_atomic_dec64' in __host__ function}}
   val64 = __builtin_amdgcn_atomic_dec64(&val64, val64, __ATOMIC_SEQ_CST, "");
 }
diff --git a/clang/test/SemaCUDA/builtins.cu b/clang/test/SemaCUDA/builtins.cu
index 814fda2ac7d34..78a333e511a5d 100644
--- a/clang/test/SemaCUDA/builtins.cu
+++ b/clang/test/SemaCUDA/builtins.cu
@@ -7,10 +7,10 @@
 // REQUIRES: nvptx-registered-target
 // RUN: %clang_cc1 -triple x86_64-unknown-unknown \
 // RUN:     -aux-triple nvptx64-unknown-cuda \
-// RUN:     -fsyntax-only -verify %s
+// RUN:     -fsyntax-only -verify=host %s
 // RUN: %clang_cc1 -triple nvptx64-unknown-cuda -fcuda-is-device \
 // RUN:     -aux-triple x86_64-unknown-unknown \
-// RUN:     -fsyntax-only -verify %s
+// RUN:     -fsyntax-only -verify=dev %s
 
 #if !(defined(__amd64__) && defined(__PTX__))
 #error "Expected to see preprocessor macros from both sides of compilation."
@@ -18,14 +18,13 @@
 
 void hf() {
   int x = __builtin_ia32_rdtsc();
-  int y = __nvvm_read_ptx_sreg_tid_x(); // expected-note  {{'__nvvm_read_ptx_sreg_tid_x' declared here}}
-  // expected-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}}
+  int y = __nvvm_read_ptx_sreg_tid_x();
+  // host-error@-1 {{reference to __device__ function '__nvvm_read_ptx_sreg_tid_x' in __host__ function}}
   x = __builtin_abs(1);
 }
 
 __attribute__((device)) void df() {
   int x = __nvvm_read_ptx_sreg_tid_x();
-  int y = __builtin_ia32_rdtsc(); // expected-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
-                                  // expected-note@20 {{'__builtin_ia32_rdtsc' declared here}}
+  int y = __builtin_ia32_rdtsc(); // dev-error {{reference to __host__ function '__builtin_ia32_rdtsc' in __device__ function}}
   x = __builtin_abs(1);
 }
diff --git a/clang/test/SemaCUDA/call-kernel-from-kernel.cu b/clang/test/SemaCUDA/call-kernel-from-kernel.cu
index c89037c52bff4..900efcef43b80 100644
--- a/clang/test/SemaCUDA/call-kernel-from-kernel.cu
+++ b/clang/test/SemaCUDA/call-kernel-from-kernel.cu
@@ -1,5 +1,5 @@
-// RUN: %clang_cc1 %s --std=c++11 -triple x86_64-unknown-linux -emit-llvm -o - \
-// RUN:   -verify -fsyntax-only -verify-ignore-unexpected=note
+// RUN: %clang_cc1 %s --std=c++11 -triple nvptx -emit-llvm -o - \
+// RUN:   -verify -fcuda-is-device -fsyntax-only -verify-ignore-unexpected=note
 
 #include "Inputs/cuda.h"
 
diff --git a/clang/test/SemaCUDA/function-overload.cu b/clang/test/SemaCUDA/function-overload.cu
index b9efd1c09e699..191268c9a5f14 100644
--- a/clang/test/SemaCUDA/function-overload.cu
+++ b/clang/test/SemaCUDA/function-overload.cu
@@ -1,8 +1,8 @@
 // REQUIRES: x86-registered-target
 // REQUIRES: nvptx-registered-target
 
-// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify %s
-// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fsyntax-only -verify=host,expected %s
+// RUN: %clang_cc1 -triple nvptx64-nvidia-cuda -fsyntax-only -fcuda-is-device -verify=dev,expected %s
 
 #include "Inputs/cuda.h"
 
@@ -75,37 +75,37 @@ extern "C" __host__ __device__ int chhd2() { return 0; }
 
 // Helper functions to verify calling restrictions.
 __device__ DeviceReturnTy d() { return DeviceReturnTy(); }
-// expected-note@-1 1+ {{'d' declared here}}
+// host-note@-1 1+ {{'d' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
 
 __host__ HostReturnTy h() { return HostReturnTy(); }
-// expected-note@-1 1+ {{'h' declared here}}
+// dev-note@-1 1+ {{'h' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}}
 
 __global__ void g() {}
-// expected-note@-1 1+ {{'g' declared here}}
+// dev-note@-1 1+ {{'g' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __global__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __global__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __global__ function from __global__ function}}
 
 extern "C" __device__ DeviceReturnTy cd() { return DeviceReturnTy(); }
-// expected-note@-1 1+ {{'cd' declared here}}
+// host-note@-1 1+ {{'cd' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __device__ function from __host__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __device__ function from __host__ __device__ function}}
 
 extern "C" __host__ HostReturnTy ch() { return HostReturnTy(); }
-// expected-note@-1 1+ {{'ch' declared here}}
+// dev-note@-1 1+ {{'ch' declared here}}
 // expected-note@-2 1+ {{candidate function not viable: call to __host__ function from __device__ function}}
 // expected-note@-3 0+ {{candidate function not viable: call to __host__ function from __host__ __device__ function}}
 // expected-note@-4 1+ {{candidate function not viable: call to __host__ function from __global__ function}}
 
 __host__ void hostf() {
-  DeviceFnPtr fp_d = d;         // expected-error {{reference to __device__ function 'd' in __host__ function}}
+  DeviceFnPtr fp_d = d;         // host-error {{reference to __device__ function 'd' in __host__ function}}
   DeviceReturnTy ret_d = d();   // expected-error {{no matching function for call to 'd'}}
-  DeviceFnPtr fp_cd = cd;       // expected-error {{reference to __device__ function 'cd' in __host__ function}}
+  DeviceFnPtr fp_cd = cd;       // host-error {{reference to __device__ function 'cd' in __host__ function}}
   DeviceReturnTy ret_cd = cd(); // expected-error {{no matching function for call to 'cd'}}
 
   HostFnPtr fp_h = h;
@@ -129,9 +129,9 @@ __device__ void devicef() {
   DeviceFnPtr fp_cd = cd;
   DeviceReturnTy ret_cd = cd();
 
-  HostFnPtr fp_h = h;         // expected-error {{reference to __host__ function 'h' in __device__ function}}
+  HostFnPtr fp_h = h;         // dev-error {{reference to __host__ function 'h' in __device__ function}}
   HostReturnTy ret_h = h();   // expected-error {{no matching function for call to 'h'}}
-  HostFnPtr fp_ch = ch;       // expected-error {{reference to __host__ function 'ch' in __device__ function}}
+  HostFnPtr fp_ch = ch;       // dev-error {{reference to __host__ function 'ch' in __device__ function}}
   HostReturnTy ret_ch = ch(); // expected-error {{no matching function for call to 'ch'}}
 
   DeviceFnPtr fp_dh = dh;
@@ -139,9 +139,9 @@ __device__ void devicef() {
   DeviceFnPtr fp_cdh = cdh;
   DeviceReturnTy ret_cdh = cdh();
 
-  GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __device__ function}}
+  GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __device__ function}}
   g(); // expected-error {{no matching function for call to 'g'}}
-  g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __device__ function}}
+  g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __device__ function}}
 }
 
 __global__ void globalf() {
@@ -150,9 +150,9 @@ __global__ void globalf() {
   DeviceFnPtr fp_cd = cd;
   DeviceReturnTy ret_cd = cd();
 
-  HostFnPtr fp_h = h;         // expected-error {{reference to __host__ function 'h' in __global__ function}}
+  HostFnPtr fp_h = h;         // dev-error {{reference to __host__ function 'h' in __global__ function}}
   HostReturnTy ret_h = h();   // expected-error {{no matching function for call to 'h'}}
-  HostFnPtr fp_ch = ch;       // expected-error {{reference to __host__ function 'ch' in __global__ function}}
+  HostFnPtr fp_ch = ch;       // dev-error {{reference to __host__ function 'ch' in __global__ function}}
   HostReturnTy ret_ch = ch(); // expected-error {{no matching function for call to 'ch'}}
 
   DeviceFnPtr fp_dh = dh;
@@ -160,9 +160,9 @@ __global__ void globalf() {
   DeviceFnPtr fp_cdh = cdh;
   DeviceReturnTy ret_cdh = cdh();
 
-  GlobalFnPtr fp_g = g; // expected-error {{reference to __global__ function 'g' in __global__ function}}
+  GlobalFnPtr fp_g = g; // dev-error {{reference to __global__ function 'g' in __global__ function}}
   g(); // expected-error {{no matching function for call to 'g'}}
-  g<<<0,0>>>(); // expected-error {{reference to __global__ function 'g' in __global__ function}}
+  g<<<0,0>>>(); // dev-error {{reference to __global__ function 'g' in __global__ function}}
 }
 
 __host__ __device__ void hostdevicef() {
diff --git a/clang/test/SemaCUDA/function-target.cu b/clang/test/SemaCUDA/function-target.cu
index 83dce50b4af83..48f7229df21f2 100644
--- a/clang/test/SemaCUDA/function-target.cu
+++ b/clang/test/SemaCUDA/function-target.cu
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify %s
+// RUN: %clang_cc1 -fsyntax-only -fcuda-is-device -verify=dev,expected %s
 
 #include "Inputs/cuda.h"
 
@@ -23,11 +23,11 @@ __host__ void h1(void) {
 __host__ void d1h(void); // expected-note {{candidate function not viable: call to __host__ function from __device__ function}}
 __device__ void d1d(void);
 __host__ __device__ void d1hd(void);
-__global__ void d1g(void); // expected-note {{'d1g' declared here}}
+__global__ void d1g(void); // dev-note {{'d1g' declared here}}
 
 __device__ void d1(void) {
   d1h(); // expected-error {{no matching function}}
   d1d();
   d1hd();
-  d1g<<<1, 1>>>(); // expected-error {{reference to __global__ function 'd1g' in __device__ function}}
+  d1g<<<1, 1>>>(); // dev-error {{reference to __global__ function 'd1g' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/implicit-device-lambda.cu b/clang/test/SemaCUDA/implicit-device-lambda.cu
index 8e5b7ddddb8f6..d2e59b8033c31 100644
--- a/clang/test/SemaCUDA/implicit-device-lambda.cu
+++ b/clang/test/SemaCUDA/implicit-device-lambda.cu
@@ -1,5 +1,7 @@
-// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify -fsyntax-only -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
-// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -verify=dev,expected -fsyntax-only \
+// RUN:   -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -verify -fsyntax-only \
+// RUN:   -verify-ignore-unexpected=warning -verify-ignore-unexpected=note %s
 
 #include "Inputs/cuda.h"
 
@@ -102,5 +104,5 @@ __device__ void foo() {
     void foo() {}
   };
   X x;
-  x.foo(); // expected-error {{reference to __host__ function 'foo' in __device__ function}}
+  x.foo(); // dev-error {{reference to __host__ function 'foo' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/method-target.cu b/clang/test/SemaCUDA/method-target.cu
index 8e17daa0c1233..85c27ce436322 100644
--- a/clang/test/SemaCUDA/method-target.cu
+++ b/clang/test/SemaCUDA/method-target.cu
@@ -1,4 +1,5 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -fsyntax-only -verify=host,expected %s
+// RUN: %clang_cc1 -fcuda-is-device -fsyntax-only -verify=dev,expected %s
 
 #include "Inputs/cuda.h"
 
@@ -6,11 +7,11 @@
 // Test 1: host method called from device function
 
 struct S1 {
-  void method() {} // expected-note {{'method' declared here}}
+  void method() {} // dev-note {{'method' declared here}}
 };
 
 __device__ void foo1(S1& s) {
-  s.method(); // expected-error {{reference to __host__ function 'method' in __device__ function}}
+  s.method(); // dev-error {{reference to __host__ function 'method' in __device__ function}}
 }
 
 //------------------------------------------------------------------------------
@@ -29,22 +30,22 @@ __device__ void foo2(S2& s, int i, float f) {
 // Test 3: device method called from host function
 
 struct S3 {
-  __device__ void method() {} // expected-note {{'method' declared here}}
+  __device__ void method() {} // host-note {{'method' declared here}}
 };
 
 void foo3(S3& s) {
-  s.method(); // expected-error {{reference to __device__ function 'method' in __host__ function}}
+  s.method(); // host-error {{reference to __device__ function 'method' in __host__ function}}
 }
 
 //------------------------------------------------------------------------------
 // Test 4: device method called from host&device function
 
 struct S4 {
-  __device__ void method() {}  // expected-note {{'method' declared here}}
+  __device__ void method() {}  // host-note {{'method' declared here}}
 };
 
 __host__ __device__ void foo4(S4& s) {
-  s.method(); // expected-error {{reference to __device__ function 'method' in __host__ __device__ function}}
+  s.method(); // host-error {{reference to __device__ function 'method' in __host__ __device__ function}}
 }
 
 //------------------------------------------------------------------------------
@@ -63,9 +64,9 @@ __device__ void foo5(S5& s, S5& t) {
 // Test 6: call method through pointer
 
 struct S6 {
-  void method() {} // expected-note {{'method' declared here}};
+  void method() {} // dev-note {{'method' declared here}};
 };
 
 __device__ void foo6(S6* s) {
-  s->method(); // expected-error {{reference to __host__ function 'method' in __device__ function}}
+  s->method(); // dev-error {{reference to __host__ function 'method' in __device__ function}}
 }
diff --git a/clang/test/SemaCUDA/reference-to-kernel-fn.cu b/clang/test/SemaCUDA/reference-to-kernel-fn.cu
index e502d134b0869..70a1cda6ab0c8 100644
--- a/clang/test/SemaCUDA/reference-to-kernel-fn.cu
+++ b/clang/test/SemaCUDA/reference-to-kernel-fn.cu
@@ -1,12 +1,14 @@
-// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify \
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify=host \
+// RUN:   -verify-ignore-unexpected=note %s
+// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify=dev \
 // RUN:   -verify-ignore-unexpected=note %s
-// RUN: %clang_cc1 -std=c++11 -fcuda-is-device -fsyntax-only -verify \
-// RUN:   -verify-ignore-unexpected=note -DDEVICE %s
 
 // Check that we can reference (get a function pointer to) a __global__
 // function from the host side, but not the device side.  (We don't yet support
 // device-side kernel launches.)
 
+// host-no-diagnostics
+
 #include "Inputs/cuda.h"
 
 struct Dummy {};
@@ -17,13 +19,11 @@ typedef void (*fn_ptr_t)();
 
 __host__ __device__ fn_ptr_t get_ptr_hd() {
   return kernel;
-#ifdef DEVICE
-  // expected-error@-2 {{reference to __global__ function}}
-#endif
+  // dev-error@-1 {{reference to __global__ function}}
 }
 __host__ fn_ptr_t get_ptr_h() {
   return kernel;
 }
 __device__ fn_ptr_t get_ptr_d() {
-  return kernel;  // expected-error {{reference to __global__ function}}
+  return kernel;  // dev-error {{reference to __global__ function}}
 }

From 11e879d4f111d21a669b2cde1444b85768b8a241 Mon Sep 17 00:00:00 2001
From: Sidharth Baveja <sidharth.baveja@ibm.com>
Date: Fri, 17 Jul 2020 14:01:03 +0000
Subject: [PATCH 631/771] [Loop Simplify] Resolve an issue where metadata is
 not applied to a loop latch.

Summary:
This patch resolves an issue where the metadata of a loop is not added to the
new loop latch, and not removed from the old loop latch. This issue occurs in
the SplitBlockPredecessors function, which  adds a new block in a loop, and
in the case that the block passed into this function is the header of the loop,
the loop can be modified such that the latch of the loop is replaced.
This patch applies to the Loop Simplify pass since it ensures that each loop
has exit blocks which only have predecessors that are inside of the loop. In
the case that this is not true, the pass will create a new exit block for the
loop. This guarantees that the loop preheader/header will dominate the exit blocks.

Author: sidbav (Sidharth Baveja)

Reviewers: asbirlea (Alina Sbirlea), chandlerc (Chandler Carruth), Whitney (Whitney Tsang), bmahjour (Bardia Mahjour)

Reviewed By:  asbirlea (Alina Sbirlea)

Subscribers: hiraditya (Aditya Kumar), llvm-commits

Tag: LLVM

Differential Revision: https://reviews.llvm.org/D83869
---
 llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 25 ++++++++--
 .../LoopSimplify/update_latch_md.ll           | 47 +++++++++++++++++++
 2 files changed, 69 insertions(+), 3 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopSimplify/update_latch_md.ll

diff --git a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
index 085d91031cf90..86b2eb0464cb4 100644
--- a/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/llvm/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -758,12 +758,22 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
 
   // The new block unconditionally branches to the old block.
   BranchInst *BI = BranchInst::Create(BB, NewBB);
+
+  Loop *L = nullptr;
+  BasicBlock *OldLatch = nullptr;
   // Splitting the predecessors of a loop header creates a preheader block.
-  if (LI && LI->isLoopHeader(BB))
+  if (LI && LI->isLoopHeader(BB)) {
+    L = LI->getLoopFor(BB);
     // Using the loop start line number prevents debuggers stepping into the
     // loop body for this instruction.
-    BI->setDebugLoc(LI->getLoopFor(BB)->getStartLoc());
-  else
+    BI->setDebugLoc(L->getStartLoc());
+
+    // If BB is the header of the Loop, it is possible that the loop is
+    // modified, such that the current latch does not remain the latch of the
+    // loop. If that is the case, the loop metadata from the current latch needs
+    // to be applied to the new latch.
+    OldLatch = L->getLoopLatch();
+  } else
     BI->setDebugLoc(BB->getFirstNonPHIOrDbg()->getDebugLoc());
 
   // Move the edges from Preds to point to NewBB instead of BB.
@@ -798,6 +808,15 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
     UpdatePHINodes(BB, NewBB, Preds, BI, HasLoopExit);
   }
 
+  if (OldLatch) {
+    BasicBlock *NewLatch = L->getLoopLatch();
+    if (NewLatch != OldLatch) {
+      MDNode *MD = OldLatch->getTerminator()->getMetadata("llvm.loop");
+      NewLatch->getTerminator()->setMetadata("llvm.loop", MD);
+      OldLatch->getTerminator()->setMetadata("llvm.loop", nullptr);
+    }
+  }
+
   return NewBB;
 }
 
diff --git a/llvm/test/Transforms/LoopSimplify/update_latch_md.ll b/llvm/test/Transforms/LoopSimplify/update_latch_md.ll
new file mode 100644
index 0000000000000..17d4b68e66a71
--- /dev/null
+++ b/llvm/test/Transforms/LoopSimplify/update_latch_md.ll
@@ -0,0 +1,47 @@
+; RUN: opt -loop-simplify -S < %s | FileCheck %s
+
+; This will test whether or not the metadata from the current loop 1 latch
+; is removed, and applied to the new latch after running the loop-simplify
+; pass on this function. The loop simplify pass ensures that each loop has exit
+; blocks which only have predecessors that are inside of the loop. This
+; guarantees that the loop preheader/header will dominate the exit blocks. For
+; this function currently loop 2 does not have a dedicated exit block.
+
+; CHECK: loop_1_loopHeader.loopexit:
+; CHECK: br label %loop_1_loopHeader, !llvm.loop [[LOOP_1_LATCH_MD:![0-9]+]]
+; CHECK: loop_2_loopHeader
+; CHECK: br i1 %grt_B, label %loop_1_loopHeader.loopexit, label %loop_2_do
+; CHECK-NOT:  br i1 %grt_B, label %loop_1_loopHeader, label %loop_2_do, !llvm.loop{{.*}}
+
+define void @function(i32 %A) {
+entry:
+  %B = add i32 %A, 45
+  %C = add i32 %A, 22
+  br label %loop_1_loopHeader
+
+loop_1_loopHeader:                              ; preds = %loop_2_loopHeader, %entry
+  %loop_1_idx = phi i32 [ 1, %entry], [ %loop_1_update_idx, %loop_2_loopHeader ]
+  %grt_C = icmp slt i32 %loop_1_idx, %C
+  br i1 %grt_C, label %exit, label %loop_1_do
+
+loop_1_do:                                      ; preds = %loop_1_loopHeader
+  %loop_1_update_idx = add nuw nsw i32 %loop_1_idx, 1
+  br label %loop_2_loopHeader
+
+loop_2_loopHeader:                              ; preds = %loop_2_do, %_loop_1_do
+  %loop_2_idx = phi i32 [ 1, %loop_1_do ], [ %loop_2_update_idx, %loop_2_do ]
+  %grt_B = icmp slt i32 %loop_2_idx, %B
+  br i1 %grt_B, label %loop_1_loopHeader, label %loop_2_do, !llvm.loop !0
+
+loop_2_do:                                      ; preds = %loop_2_loopHeader
+  %loop_2_update_idx = add nuw nsw i32 %loop_2_idx, 1
+  br label %loop_2_loopHeader, !llvm.loop !2
+
+exit:                                       ; preds = %loop_1_loopHeader
+  ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable"}
+!2 = distinct !{!2, !1}
+

From 8dea7f3202a70f71d412ecc2616a1adb81c5c148 Mon Sep 17 00:00:00 2001
From: David Tenty <daltenty@ibm.com>
Date: Thu, 16 Jul 2020 19:55:45 -0400
Subject: [PATCH 632/771] [z/OS][AIX] Move lambda definition to fix build
 problem

This is a follow on change to eed19bd8 and contains a fix for a build
failure that occurs on both z/OS and AIX as a result of this commit:

https://reviews.llvm.org/rG670915094462d831e3733e5b01a76471b8cf6dd8.
---
 .../lib/ExecutionEngine/Orc/LazyReexports.cpp | 46 +++++++++----------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
index ff1de3810835c..5e604130d6eab 100644
--- a/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/LazyReexports.cpp
@@ -75,31 +75,31 @@ void LazyCallThroughManager::resolveTrampolineLandingAddress(
   if (!Entry)
     return NotifyLandingResolved(reportCallThroughError(Entry.takeError()));
 
-  // Declaring SLS outside of the call to ES.lookup is a workaround to fix build
-  // failures on AIX and on z/OS platforms.
+  // Declaring SLS and the callback outside of the call to ES.lookup is a
+  // workaround to fix build failures on AIX and on z/OS platforms.
   SymbolLookupSet SLS({Entry->SymbolName});
+  auto Callback = [this, TrampolineAddr, SymbolName = Entry->SymbolName,
+                   NotifyLandingResolved = std::move(NotifyLandingResolved)](
+                      Expected<SymbolMap> Result) mutable {
+    if (Result) {
+      assert(Result->size() == 1 && "Unexpected result size");
+      assert(Result->count(SymbolName) && "Unexpected result value");
+      JITTargetAddress LandingAddr = (*Result)[SymbolName].getAddress();
+
+      if (auto Err = notifyResolved(TrampolineAddr, LandingAddr))
+        NotifyLandingResolved(reportCallThroughError(std::move(Err)));
+      else
+        NotifyLandingResolved(LandingAddr);
+    } else {
+      NotifyLandingResolved(reportCallThroughError(Result.takeError()));
+    }
+  };
 
-  ES.lookup(
-      LookupKind::Static,
-      makeJITDylibSearchOrder(Entry->SourceJD,
-                              JITDylibLookupFlags::MatchAllSymbols),
-      std::move(SLS), SymbolState::Ready,
-      [this, TrampolineAddr, SymbolName = Entry->SymbolName,
-       NotifyLandingResolved = std::move(NotifyLandingResolved)](
-          Expected<SymbolMap> Result) mutable {
-        if (Result) {
-          assert(Result->size() == 1 && "Unexpected result size");
-          assert(Result->count(SymbolName) && "Unexpected result value");
-          JITTargetAddress LandingAddr = (*Result)[SymbolName].getAddress();
-
-          if (auto Err = notifyResolved(TrampolineAddr, LandingAddr))
-            NotifyLandingResolved(reportCallThroughError(std::move(Err)));
-          else
-            NotifyLandingResolved(LandingAddr);
-        } else
-          NotifyLandingResolved(reportCallThroughError(Result.takeError()));
-      },
-      NoDependenciesToRegister);
+  ES.lookup(LookupKind::Static,
+            makeJITDylibSearchOrder(Entry->SourceJD,
+                                    JITDylibLookupFlags::MatchAllSymbols),
+            std::move(SLS), SymbolState::Ready, std::move(Callback),
+            NoDependenciesToRegister);
 }
 
 Expected<std::unique_ptr<LazyCallThroughManager>>

From 079b49b3fb22b9f1f7dfd9b18ab0179d0ad7e158 Mon Sep 17 00:00:00 2001
From: Sourabh Singh Tomar <SourabhSingh.Tomar@amd.com>
Date: Fri, 17 Jul 2020 01:30:11 +0530
Subject: [PATCH 633/771] [flang][OpenMP] Added support for lowering OpenMP
 taskwait construct

Summary:
This patch lower `!OMP TASKWAIT` construct from PFT to
OpenMPDialect operations.
Construct is lowered with conformance to OpenMP 4.5 spec.

Patch is carved out of following approved PR:
https://github.com/flang-compiler/f18-llvm-project/pull/280

Reviewed By: kiranchandramohan, clementval

Differential Revision: https://reviews.llvm.org/D83983
---
 flang/lib/Lower/OpenMP.cpp                   |  4 +++-
 flang/unittests/Lower/OpenMPLoweringTest.cpp | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp
index c476f1b0d7d31..e839c14a2a5d1 100644
--- a/flang/lib/Lower/OpenMP.cpp
+++ b/flang/lib/Lower/OpenMP.cpp
@@ -35,7 +35,9 @@ static void genOMP(Fortran::lower::AbstractConverter &absConv,
         absConv.getCurrentLocation());
     break;
   case llvm::omp::Directive::OMPD_taskwait:
-    TODO();
+    absConv.getFirOpBuilder().create<mlir::omp::TaskwaitOp>(
+        absConv.getCurrentLocation());
+    break;
   case llvm::omp::Directive::OMPD_taskyield:
     TODO();
   case llvm::omp::Directive::OMPD_target_enter_data:
diff --git a/flang/unittests/Lower/OpenMPLoweringTest.cpp b/flang/unittests/Lower/OpenMPLoweringTest.cpp
index 0942b9deab989..185d07105a192 100644
--- a/flang/unittests/Lower/OpenMPLoweringTest.cpp
+++ b/flang/unittests/Lower/OpenMPLoweringTest.cpp
@@ -41,4 +41,19 @@ TEST_F(OpenMPLoweringTest, Barrier) {
   EXPECT_EQ(succeeded(barrierOp.verify()), true);
 }
 
+TEST_F(OpenMPLoweringTest, TaskWait) {
+  // Construct a dummy parse tree node for `!OMP taskwait`.
+  struct Fortran::parser::OmpSimpleStandaloneDirective taskWaitDirective(
+      llvm::omp::Directive::OMPD_taskwait);
+
+  // Check and lower the `!OMP taskwait` node to `TaskwaitOp` operation of
+  // OpenMPDialect.
+  EXPECT_EQ(taskWaitDirective.v, llvm::omp::Directive::OMPD_taskwait);
+  auto taskWaitOp = mlirOpBuilder->create<mlir::omp::TaskwaitOp>(
+      mlirOpBuilder->getUnknownLoc());
+
+  EXPECT_EQ(taskWaitOp.getOperationName(), "omp.taskwait");
+  EXPECT_EQ(succeeded(taskWaitOp.verify()), true);
+}
+
 // main() from gtest_main

From c7562e77b3ace370679398b1325c66cd79418f41 Mon Sep 17 00:00:00 2001
From: Saiyedul Islam <Saiyedul.Islam@amd.com>
Date: Fri, 17 Jul 2020 14:17:32 +0000
Subject: [PATCH 634/771] [OpenMP][NFC] Generalize CGOpenMPRuntimeNVPTX as
 CGOpenMPRuntimeGPU

Refactors CGOpenMPRuntimeNVPTX as CGOpenMPRuntimeGPU to make it a
generalization for OpenMP GPU Codegen. Target specific specialized
methods for NVPTX are defined in class CGOpenMPRuntimeNVPTX. This
paves the way for a clean and maintainable extension to more GPU
targets for OpenMP Codegen.

For original author (git blame) list of CGOpenMPRuntimeGPU code,
look in history of CGOpenMPRuntimeNVPTX.cpp and .h, after this commit.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D83723
---
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp   | 5230 ++++++++++++++++++++
 clang/lib/CodeGen/CGOpenMPRuntimeGPU.h     |  495 ++
 clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp | 5209 +------------------
 clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h   |  463 +-
 clang/lib/CodeGen/CMakeLists.txt           |    1 +
 5 files changed, 5737 insertions(+), 5661 deletions(-)
 create mode 100644 clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
 create mode 100644 clang/lib/CodeGen/CGOpenMPRuntimeGPU.h

diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
new file mode 100644
index 0000000000000..92eca33ee97d6
--- /dev/null
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp
@@ -0,0 +1,5230 @@
+//===---- CGOpenMPRuntimeGPU.cpp - Interface to OpenMP GPU Runtimes ----===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a generalized class for OpenMP runtime code generation
+// specialized by GPU target NVPTX.
+//
+//===----------------------------------------------------------------------===//
+
+#include "CGOpenMPRuntimeGPU.h"
+#include "CGOpenMPRuntimeNVPTX.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/Attr.h"
+#include "clang/AST/DeclOpenMP.h"
+#include "clang/AST/StmtOpenMP.h"
+#include "clang/AST/StmtVisitor.h"
+#include "clang/Basic/Cuda.h"
+#include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/IR/IntrinsicsNVPTX.h"
+
+using namespace clang;
+using namespace CodeGen;
+using namespace llvm::omp;
+
+namespace {
+enum OpenMPRTLFunctionNVPTX {
+  /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
+  /// int16_t RequiresOMPRuntime);
+  OMPRTL_NVPTX__kmpc_kernel_init,
+  /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
+  OMPRTL_NVPTX__kmpc_kernel_deinit,
+  /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+  /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
+  OMPRTL_NVPTX__kmpc_spmd_kernel_init,
+  /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+  OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
+  /// Call to void __kmpc_kernel_prepare_parallel(void
+  /// *outlined_function);
+  OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
+  /// Call to bool __kmpc_kernel_parallel(void **outlined_function);
+  OMPRTL_NVPTX__kmpc_kernel_parallel,
+  /// Call to void __kmpc_kernel_end_parallel();
+  OMPRTL_NVPTX__kmpc_kernel_end_parallel,
+  /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
+  /// global_tid);
+  OMPRTL_NVPTX__kmpc_serialized_parallel,
+  /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
+  /// global_tid);
+  OMPRTL_NVPTX__kmpc_end_serialized_parallel,
+  /// Call to int32_t __kmpc_shuffle_int32(int32_t element,
+  /// int16_t lane_offset, int16_t warp_size);
+  OMPRTL_NVPTX__kmpc_shuffle_int32,
+  /// Call to int64_t __kmpc_shuffle_int64(int64_t element,
+  /// int16_t lane_offset, int16_t warp_size);
+  OMPRTL_NVPTX__kmpc_shuffle_int64,
+  /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
+  /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+  /// lane_offset, int16_t shortCircuit),
+  /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
+  OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
+  /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
+  /// global_tid, void *global_buffer, int32_t num_of_records, void*
+  /// reduce_data,
+  /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+  /// lane_offset, int16_t shortCircuit),
+  /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
+  /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
+  /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
+  /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
+  /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
+  /// *buffer, int idx, void *reduce_data));
+  OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
+  /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
+  OMPRTL_NVPTX__kmpc_end_reduce_nowait,
+  /// Call to void __kmpc_data_sharing_init_stack();
+  OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
+  /// Call to void __kmpc_data_sharing_init_stack_spmd();
+  OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
+  /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
+  /// int16_t UseSharedMemory);
+  OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
+  /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t
+  /// UseSharedMemory);
+  OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
+  /// Call to void __kmpc_data_sharing_pop_stack(void *a);
+  OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
+  /// Call to void __kmpc_begin_sharing_variables(void ***args,
+  /// size_t n_args);
+  OMPRTL_NVPTX__kmpc_begin_sharing_variables,
+  /// Call to void __kmpc_end_sharing_variables();
+  OMPRTL_NVPTX__kmpc_end_sharing_variables,
+  /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs)
+  OMPRTL_NVPTX__kmpc_get_shared_variables,
+  /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32
+  /// global_tid);
+  OMPRTL_NVPTX__kmpc_parallel_level,
+  /// Call to int8_t __kmpc_is_spmd_exec_mode();
+  OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
+  /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
+  /// const void *buf, size_t size, int16_t is_shared, const void **res);
+  OMPRTL_NVPTX__kmpc_get_team_static_memory,
+  /// Call to void __kmpc_restore_team_static_memory(int16_t
+  /// isSPMDExecutionMode, int16_t is_shared);
+  OMPRTL_NVPTX__kmpc_restore_team_static_memory,
+  /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+  OMPRTL__kmpc_barrier,
+  /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+  /// global_tid);
+  OMPRTL__kmpc_barrier_simple_spmd,
+  /// Call to int32_t __kmpc_warp_active_thread_mask(void);
+  OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
+  /// Call to void __kmpc_syncwarp(int32_t Mask);
+  OMPRTL_NVPTX__kmpc_syncwarp,
+};
+
+/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
+class NVPTXActionTy final : public PrePostActionTy {
+  llvm::FunctionCallee EnterCallee = nullptr;
+  ArrayRef<llvm::Value *> EnterArgs;
+  llvm::FunctionCallee ExitCallee = nullptr;
+  ArrayRef<llvm::Value *> ExitArgs;
+  bool Conditional = false;
+  llvm::BasicBlock *ContBlock = nullptr;
+
+public:
+  NVPTXActionTy(llvm::FunctionCallee EnterCallee,
+                ArrayRef<llvm::Value *> EnterArgs,
+                llvm::FunctionCallee ExitCallee,
+                ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
+      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
+        ExitArgs(ExitArgs), Conditional(Conditional) {}
+  void Enter(CodeGenFunction &CGF) override {
+    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
+    if (Conditional) {
+      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
+      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
+      ContBlock = CGF.createBasicBlock("omp_if.end");
+      // Generate the branch (If-stmt)
+      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
+      CGF.EmitBlock(ThenBlock);
+    }
+  }
+  void Done(CodeGenFunction &CGF) {
+    // Emit the rest of blocks/branches
+    CGF.EmitBranch(ContBlock);
+    CGF.EmitBlock(ContBlock, true);
+  }
+  void Exit(CodeGenFunction &CGF) override {
+    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
+  }
+};
+
+/// A class to track the execution mode when codegening directives within
+/// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry
+/// to the target region and used by containing directives such as 'parallel'
+/// to emit optimized code.
+class ExecutionRuntimeModesRAII {
+private:
+  CGOpenMPRuntimeGPU::ExecutionMode SavedExecMode =
+      CGOpenMPRuntimeGPU::EM_Unknown;
+  CGOpenMPRuntimeGPU::ExecutionMode &ExecMode;
+  bool SavedRuntimeMode = false;
+  bool *RuntimeMode = nullptr;
+
+public:
+  /// Constructor for Non-SPMD mode.
+  ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode)
+      : ExecMode(ExecMode) {
+    SavedExecMode = ExecMode;
+    ExecMode = CGOpenMPRuntimeGPU::EM_NonSPMD;
+  }
+  /// Constructor for SPMD mode.
+  ExecutionRuntimeModesRAII(CGOpenMPRuntimeGPU::ExecutionMode &ExecMode,
+                            bool &RuntimeMode, bool FullRuntimeMode)
+      : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
+    SavedExecMode = ExecMode;
+    SavedRuntimeMode = RuntimeMode;
+    ExecMode = CGOpenMPRuntimeGPU::EM_SPMD;
+    RuntimeMode = FullRuntimeMode;
+  }
+  ~ExecutionRuntimeModesRAII() {
+    ExecMode = SavedExecMode;
+    if (RuntimeMode)
+      *RuntimeMode = SavedRuntimeMode;
+  }
+};
+
+/// GPU Configuration:  This information can be derived from cuda registers,
+/// however, providing compile time constants helps generate more efficient
+/// code.  For all practical purposes this is fine because the configuration
+/// is the same for all known NVPTX architectures.
+enum MachineConfiguration : unsigned {
+  WarpSize = 32,
+  /// Number of bits required to represent a lane identifier, which is
+  /// computed as log_2(WarpSize).
+  LaneIDBits = 5,
+  LaneIDMask = WarpSize - 1,
+
+  /// Global memory alignment for performance.
+  GlobalMemoryAlignment = 128,
+
+  /// Maximal size of the shared memory buffer.
+  SharedMemorySize = 128,
+};
+
+static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
+  RefExpr = RefExpr->IgnoreParens();
+  if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
+    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
+    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+      Base = TempASE->getBase()->IgnoreParenImpCasts();
+    RefExpr = Base;
+  } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
+    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
+    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
+      Base = TempOASE->getBase()->IgnoreParenImpCasts();
+    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
+      Base = TempASE->getBase()->IgnoreParenImpCasts();
+    RefExpr = Base;
+  }
+  RefExpr = RefExpr->IgnoreParenImpCasts();
+  if (const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
+    return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
+  const auto *ME = cast<MemberExpr>(RefExpr);
+  return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
+}
+
+
+static RecordDecl *buildRecordForGlobalizedVars(
+    ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,
+    ArrayRef<const ValueDecl *> EscapedDeclsForTeams,
+    llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+        &MappedDeclsFields, int BufSize) {
+  using VarsDataTy = std::pair<CharUnits /*Align*/, const ValueDecl *>;
+  if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
+    return nullptr;
+  SmallVector<VarsDataTy, 4> GlobalizedVars;
+  for (const ValueDecl *D : EscapedDecls)
+    GlobalizedVars.emplace_back(
+        CharUnits::fromQuantity(std::max(
+            C.getDeclAlign(D).getQuantity(),
+            static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))),
+        D);
+  for (const ValueDecl *D : EscapedDeclsForTeams)
+    GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
+  llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) {
+    return L.first > R.first;
+  });
+
+  // Build struct _globalized_locals_ty {
+  //         /*  globalized vars  */[WarSize] align (max(decl_align,
+  //         GlobalMemoryAlignment))
+  //         /*  globalized vars  */ for EscapedDeclsForTeams
+  //       };
+  RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
+  GlobalizedRD->startDefinition();
+  llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped(
+      EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
+  for (const auto &Pair : GlobalizedVars) {
+    const ValueDecl *VD = Pair.second;
+    QualType Type = VD->getType();
+    if (Type->isLValueReferenceType())
+      Type = C.getPointerType(Type.getNonReferenceType());
+    else
+      Type = Type.getNonReferenceType();
+    SourceLocation Loc = VD->getLocation();
+    FieldDecl *Field;
+    if (SingleEscaped.count(VD)) {
+      Field = FieldDecl::Create(
+          C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+          C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+          /*BW=*/nullptr, /*Mutable=*/false,
+          /*InitStyle=*/ICIS_NoInit);
+      Field->setAccess(AS_public);
+      if (VD->hasAttrs()) {
+        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
+             E(VD->getAttrs().end());
+             I != E; ++I)
+          Field->addAttr(*I);
+      }
+    } else {
+      llvm::APInt ArraySize(32, BufSize);
+      Type = C.getConstantArrayType(Type, ArraySize, nullptr, ArrayType::Normal,
+                                    0);
+      Field = FieldDecl::Create(
+          C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
+          C.getTrivialTypeSourceInfo(Type, SourceLocation()),
+          /*BW=*/nullptr, /*Mutable=*/false,
+          /*InitStyle=*/ICIS_NoInit);
+      Field->setAccess(AS_public);
+      llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(),
+                                     static_cast<CharUnits::QuantityType>(
+                                         GlobalMemoryAlignment)));
+      Field->addAttr(AlignedAttr::CreateImplicit(
+          C, /*IsAlignmentExpr=*/true,
+          IntegerLiteral::Create(C, Align,
+                                 C.getIntTypeForBitwidth(32, /*Signed=*/0),
+                                 SourceLocation()),
+          {}, AttributeCommonInfo::AS_GNU, AlignedAttr::GNU_aligned));
+    }
+    GlobalizedRD->addDecl(Field);
+    MappedDeclsFields.try_emplace(VD, Field);
+  }
+  GlobalizedRD->completeDefinition();
+  return GlobalizedRD;
+}
+
+/// Get the list of variables that can escape their declaration context.
+class CheckVarsEscapingDeclContext final
+    : public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
+  CodeGenFunction &CGF;
+  llvm::SetVector<const ValueDecl *> EscapedDecls;
+  llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
+  llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
+  RecordDecl *GlobalizedRD = nullptr;
+  llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
+  bool AllEscaped = false;
+  bool IsForCombinedParallelRegion = false;
+
+  void markAsEscaped(const ValueDecl *VD) {
+    // Do not globalize declare target variables.
+    if (!isa<VarDecl>(VD) ||
+        OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
+      return;
+    VD = cast<ValueDecl>(VD->getCanonicalDecl());
+    // Use user-specified allocation.
+    if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>())
+      return;
+    // Variables captured by value must be globalized.
+    if (auto *CSI = CGF.CapturedStmtInfo) {
+      if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
+        // Check if need to capture the variable that was already captured by
+        // value in the outer region.
+        if (!IsForCombinedParallelRegion) {
+          if (!FD->hasAttrs())
+            return;
+          const auto *Attr = FD->getAttr<OMPCaptureKindAttr>();
+          if (!Attr)
+            return;
+          if (((Attr->getCaptureKind() != OMPC_map) &&
+               !isOpenMPPrivate(Attr->getCaptureKind())) ||
+              ((Attr->getCaptureKind() == OMPC_map) &&
+               !FD->getType()->isAnyPointerType()))
+            return;
+        }
+        if (!FD->getType()->isReferenceType()) {
+          assert(!VD->getType()->isVariablyModifiedType() &&
+                 "Parameter captured by value with variably modified type");
+          EscapedParameters.insert(VD);
+        } else if (!IsForCombinedParallelRegion) {
+          return;
+        }
+      }
+    }
+    if ((!CGF.CapturedStmtInfo ||
+         (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
+        VD->getType()->isReferenceType())
+      // Do not globalize variables with reference type.
+      return;
+    if (VD->getType()->isVariablyModifiedType())
+      EscapedVariableLengthDecls.insert(VD);
+    else
+      EscapedDecls.insert(VD);
+  }
+
+  void VisitValueDecl(const ValueDecl *VD) {
+    if (VD->getType()->isLValueReferenceType())
+      markAsEscaped(VD);
+    if (const auto *VarD = dyn_cast<VarDecl>(VD)) {
+      if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
+        const bool SavedAllEscaped = AllEscaped;
+        AllEscaped = VD->getType()->isLValueReferenceType();
+        Visit(VarD->getInit());
+        AllEscaped = SavedAllEscaped;
+      }
+    }
+  }
+  void VisitOpenMPCapturedStmt(const CapturedStmt *S,
+                               ArrayRef<OMPClause *> Clauses,
+                               bool IsCombinedParallelRegion) {
+    if (!S)
+      return;
+    for (const CapturedStmt::Capture &C : S->captures()) {
+      if (C.capturesVariable() && !C.capturesVariableByCopy()) {
+        const ValueDecl *VD = C.getCapturedVar();
+        bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
+        if (IsCombinedParallelRegion) {
+          // Check if the variable is privatized in the combined construct and
+          // those private copies must be shared in the inner parallel
+          // directive.
+          IsForCombinedParallelRegion = false;
+          for (const OMPClause *C : Clauses) {
+            if (!isOpenMPPrivate(C->getClauseKind()) ||
+                C->getClauseKind() == OMPC_reduction ||
+                C->getClauseKind() == OMPC_linear ||
+                C->getClauseKind() == OMPC_private)
+              continue;
+            ArrayRef<const Expr *> Vars;
+            if (const auto *PC = dyn_cast<OMPFirstprivateClause>(C))
+              Vars = PC->getVarRefs();
+            else if (const auto *PC = dyn_cast<OMPLastprivateClause>(C))
+              Vars = PC->getVarRefs();
+            else
+              llvm_unreachable("Unexpected clause.");
+            for (const auto *E : Vars) {
+              const Decl *D =
+                  cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();
+              if (D == VD->getCanonicalDecl()) {
+                IsForCombinedParallelRegion = true;
+                break;
+              }
+            }
+            if (IsForCombinedParallelRegion)
+              break;
+          }
+        }
+        markAsEscaped(VD);
+        if (isa<OMPCapturedExprDecl>(VD))
+          VisitValueDecl(VD);
+        IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
+      }
+    }
+  }
+
+  void buildRecordForGlobalizedVars(bool IsInTTDRegion) {
+    assert(!GlobalizedRD &&
+           "Record for globalized variables is built already.");
+    ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
+    if (IsInTTDRegion)
+      EscapedDeclsForTeams = EscapedDecls.getArrayRef();
+    else
+      EscapedDeclsForParallel = EscapedDecls.getArrayRef();
+    GlobalizedRD = ::buildRecordForGlobalizedVars(
+        CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
+        MappedDeclsFields, WarpSize);
+  }
+
+public:
+  CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
+                               ArrayRef<const ValueDecl *> TeamsReductions)
+      : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
+  }
+  virtual ~CheckVarsEscapingDeclContext() = default;
+  void VisitDeclStmt(const DeclStmt *S) {
+    if (!S)
+      return;
+    for (const Decl *D : S->decls())
+      if (const auto *VD = dyn_cast_or_null<ValueDecl>(D))
+        VisitValueDecl(VD);
+  }
+  void VisitOMPExecutableDirective(const OMPExecutableDirective *D) {
+    if (!D)
+      return;
+    if (!D->hasAssociatedStmt())
+      return;
+    if (const auto *S =
+            dyn_cast_or_null<CapturedStmt>(D->getAssociatedStmt())) {
+      // Do not analyze directives that do not actually require capturing,
+      // like `omp for` or `omp simd` directives.
+      llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
+      getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind());
+      if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
+        VisitStmt(S->getCapturedStmt());
+        return;
+      }
+      VisitOpenMPCapturedStmt(
+          S, D->clauses(),
+          CaptureRegions.back() == OMPD_parallel &&
+              isOpenMPDistributeDirective(D->getDirectiveKind()));
+    }
+  }
+  void VisitCapturedStmt(const CapturedStmt *S) {
+    if (!S)
+      return;
+    for (const CapturedStmt::Capture &C : S->captures()) {
+      if (C.capturesVariable() && !C.capturesVariableByCopy()) {
+        const ValueDecl *VD = C.getCapturedVar();
+        markAsEscaped(VD);
+        if (isa<OMPCapturedExprDecl>(VD))
+          VisitValueDecl(VD);
+      }
+    }
+  }
+  void VisitLambdaExpr(const LambdaExpr *E) {
+    if (!E)
+      return;
+    for (const LambdaCapture &C : E->captures()) {
+      if (C.capturesVariable()) {
+        if (C.getCaptureKind() == LCK_ByRef) {
+          const ValueDecl *VD = C.getCapturedVar();
+          markAsEscaped(VD);
+          if (E->isInitCapture(&C) || isa<OMPCapturedExprDecl>(VD))
+            VisitValueDecl(VD);
+        }
+      }
+    }
+  }
+  void VisitBlockExpr(const BlockExpr *E) {
+    if (!E)
+      return;
+    for (const BlockDecl::Capture &C : E->getBlockDecl()->captures()) {
+      if (C.isByRef()) {
+        const VarDecl *VD = C.getVariable();
+        markAsEscaped(VD);
+        if (isa<OMPCapturedExprDecl>(VD) || VD->isInitCapture())
+          VisitValueDecl(VD);
+      }
+    }
+  }
+  void VisitCallExpr(const CallExpr *E) {
+    if (!E)
+      return;
+    for (const Expr *Arg : E->arguments()) {
+      if (!Arg)
+        continue;
+      if (Arg->isLValue()) {
+        const bool SavedAllEscaped = AllEscaped;
+        AllEscaped = true;
+        Visit(Arg);
+        AllEscaped = SavedAllEscaped;
+      } else {
+        Visit(Arg);
+      }
+    }
+    Visit(E->getCallee());
+  }
+  void VisitDeclRefExpr(const DeclRefExpr *E) {
+    if (!E)
+      return;
+    const ValueDecl *VD = E->getDecl();
+    if (AllEscaped)
+      markAsEscaped(VD);
+    if (isa<OMPCapturedExprDecl>(VD))
+      VisitValueDecl(VD);
+    else if (const auto *VarD = dyn_cast<VarDecl>(VD))
+      if (VarD->isInitCapture())
+        VisitValueDecl(VD);
+  }
+  void VisitUnaryOperator(const UnaryOperator *E) {
+    if (!E)
+      return;
+    if (E->getOpcode() == UO_AddrOf) {
+      const bool SavedAllEscaped = AllEscaped;
+      AllEscaped = true;
+      Visit(E->getSubExpr());
+      AllEscaped = SavedAllEscaped;
+    } else {
+      Visit(E->getSubExpr());
+    }
+  }
+  void VisitImplicitCastExpr(const ImplicitCastExpr *E) {
+    if (!E)
+      return;
+    if (E->getCastKind() == CK_ArrayToPointerDecay) {
+      const bool SavedAllEscaped = AllEscaped;
+      AllEscaped = true;
+      Visit(E->getSubExpr());
+      AllEscaped = SavedAllEscaped;
+    } else {
+      Visit(E->getSubExpr());
+    }
+  }
+  void VisitExpr(const Expr *E) {
+    if (!E)
+      return;
+    bool SavedAllEscaped = AllEscaped;
+    if (!E->isLValue())
+      AllEscaped = false;
+    for (const Stmt *Child : E->children())
+      if (Child)
+        Visit(Child);
+    AllEscaped = SavedAllEscaped;
+  }
+  void VisitStmt(const Stmt *S) {
+    if (!S)
+      return;
+    for (const Stmt *Child : S->children())
+      if (Child)
+        Visit(Child);
+  }
+
+  /// Returns the record that handles all the escaped local variables and used
+  /// instead of their original storage.
+  const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {
+    if (!GlobalizedRD)
+      buildRecordForGlobalizedVars(IsInTTDRegion);
+    return GlobalizedRD;
+  }
+
+  /// Returns the field in the globalized record for the escaped variable.
+  const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {
+    assert(GlobalizedRD &&
+           "Record for globalized variables must be generated already.");
+    auto I = MappedDeclsFields.find(VD);
+    if (I == MappedDeclsFields.end())
+      return nullptr;
+    return I->getSecond();
+  }
+
+  /// Returns the list of the escaped local variables/parameters.
+  ArrayRef<const ValueDecl *> getEscapedDecls() const {
+    return EscapedDecls.getArrayRef();
+  }
+
+  /// Checks if the escaped local variable is actually a parameter passed by
+  /// value.
+  const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const {
+    return EscapedParameters;
+  }
+
+  /// Returns the list of the escaped variables with the variably modified
+  /// types.
+  ArrayRef<const ValueDecl *> getEscapedVariableLengthDecls() const {
+    return EscapedVariableLengthDecls.getArrayRef();
+  }
+};
+} // anonymous namespace
+
+/// Get the id of the current thread on the GPU.
+static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
+  return CGF.EmitRuntimeCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
+      "nvptx_tid");
+}
+
+/// Get the id of the warp in the block.
+/// We assume that the warp size is 32, which is always the case
+/// on the NVPTX device, to generate more efficient code.
+static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id");
+}
+
+/// Get the id of the current lane in the Warp.
+/// We assume that the warp size is 32, which is always the case
+/// on the NVPTX device, to generate more efficient code.
+static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask),
+                       "nvptx_lane_id");
+}
+
+/// Get the maximum number of threads in a block of the GPU.
+static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
+  return CGF.EmitRuntimeCall(
+      llvm::Intrinsic::getDeclaration(
+          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
+      "nvptx_num_threads");
+}
+
+/// Get the value of the thread_limit clause in the teams directive.
+/// For the 'generic' execution mode, the runtime encodes thread_limit in
+/// the launch parameters, always starting thread_limit+warpSize threads per
+/// CTA. The threads in the last warp are reserved for master execution.
+/// For the 'spmd' execution mode, all threads in a CTA are part of the team.
+static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
+                                   bool IsInSPMDExecutionMode = false) {
+  CGBuilderTy &Bld = CGF.Builder;
+  auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+  return IsInSPMDExecutionMode
+             ? getNVPTXNumThreads(CGF)
+             : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), RT.getGPUWarpSize(CGF),
+                                "thread_limit");
+}
+
+/// Get the thread id of the OMP master thread.
+/// The master thread id is the first thread (lane) of the last warp in the
+/// GPU block.  Warp size is assumed to be some power of 2.
+/// Thread id is 0 indexed.
+/// E.g: If NumThreads is 33, master id is 32.
+///      If NumThreads is 64, master id is 32.
+///      If NumThreads is 1024, master id is 992.
+static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
+  CGBuilderTy &Bld = CGF.Builder;
+  llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
+  auto &RT = static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+  // We assume that the warp size is a power of 2.
+  llvm::Value *Mask = Bld.CreateNUWSub(RT.getGPUWarpSize(CGF), Bld.getInt32(1));
+
+  return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
+                       Bld.CreateNot(Mask), "master_tid");
+}
+
+CGOpenMPRuntimeGPU::WorkerFunctionState::WorkerFunctionState(
+    CodeGenModule &CGM, SourceLocation Loc)
+    : WorkerFn(nullptr), CGFI(CGM.getTypes().arrangeNullaryFunction()),
+      Loc(Loc) {
+  createWorkerFunction(CGM);
+}
+
+void CGOpenMPRuntimeGPU::WorkerFunctionState::createWorkerFunction(
+    CodeGenModule &CGM) {
+  // Create an worker function with no arguments.
+
+  WorkerFn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      /*placeholder=*/"_worker", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, CGFI);
+  WorkerFn->setDoesNotRecurse();
+}
+
+CGOpenMPRuntimeGPU::ExecutionMode
+CGOpenMPRuntimeGPU::getExecutionMode() const {
+  return CurrentExecutionMode;
+}
+
+static CGOpenMPRuntimeGPU::DataSharingMode
+getDataSharingMode(CodeGenModule &CGM) {
+  return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeGPU::CUDA
+                                          : CGOpenMPRuntimeGPU::Generic;
+}
+
+/// Check for inner (nested) SPMD construct, if any
+static bool hasNestedSPMDDirective(ASTContext &Ctx,
+                                   const OMPExecutableDirective &D) {
+  const auto *CS = D.getInnermostCapturedStmt();
+  const auto *Body =
+      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+  const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+
+  if (const auto *NestedDir =
+          dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
+    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+    switch (D.getDirectiveKind()) {
+    case OMPD_target:
+      if (isOpenMPParallelDirective(DKind))
+        return true;
+      if (DKind == OMPD_teams) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+            /*IgnoreCaptured=*/true);
+        if (!Body)
+          return false;
+        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+        if (const auto *NND =
+                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if (isOpenMPParallelDirective(DKind))
+            return true;
+        }
+      }
+      return false;
+    case OMPD_target_teams:
+      return isOpenMPParallelDirective(DKind);
+    case OMPD_target_simd:
+    case OMPD_target_parallel:
+    case OMPD_target_parallel_for:
+    case OMPD_target_parallel_for_simd:
+    case OMPD_target_teams_distribute:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+    case OMPD_parallel:
+    case OMPD_for:
+    case OMPD_parallel_for:
+    case OMPD_parallel_master:
+    case OMPD_parallel_sections:
+    case OMPD_for_simd:
+    case OMPD_parallel_for_simd:
+    case OMPD_cancel:
+    case OMPD_cancellation_point:
+    case OMPD_ordered:
+    case OMPD_threadprivate:
+    case OMPD_allocate:
+    case OMPD_task:
+    case OMPD_simd:
+    case OMPD_sections:
+    case OMPD_section:
+    case OMPD_single:
+    case OMPD_master:
+    case OMPD_critical:
+    case OMPD_taskyield:
+    case OMPD_barrier:
+    case OMPD_taskwait:
+    case OMPD_taskgroup:
+    case OMPD_atomic:
+    case OMPD_flush:
+    case OMPD_depobj:
+    case OMPD_scan:
+    case OMPD_teams:
+    case OMPD_target_data:
+    case OMPD_target_exit_data:
+    case OMPD_target_enter_data:
+    case OMPD_distribute:
+    case OMPD_distribute_simd:
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_target_update:
+    case OMPD_declare_simd:
+    case OMPD_declare_variant:
+    case OMPD_begin_declare_variant:
+    case OMPD_end_declare_variant:
+    case OMPD_declare_target:
+    case OMPD_end_declare_target:
+    case OMPD_declare_reduction:
+    case OMPD_declare_mapper:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_master_taskloop:
+    case OMPD_master_taskloop_simd:
+    case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
+    case OMPD_requires:
+    case OMPD_unknown:
+    default:
+      llvm_unreachable("Unexpected directive.");
+    }
+  }
+
+  return false;
+}
+
+static bool supportsSPMDExecutionMode(ASTContext &Ctx,
+                                      const OMPExecutableDirective &D) {
+  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+  switch (DirectiveKind) {
+  case OMPD_target:
+  case OMPD_target_teams:
+    return hasNestedSPMDDirective(Ctx, D);
+  case OMPD_target_parallel:
+  case OMPD_target_parallel_for:
+  case OMPD_target_parallel_for_simd:
+  case OMPD_target_teams_distribute_parallel_for:
+  case OMPD_target_teams_distribute_parallel_for_simd:
+  case OMPD_target_simd:
+  case OMPD_target_teams_distribute_simd:
+    return true;
+  case OMPD_target_teams_distribute:
+    return false;
+  case OMPD_parallel:
+  case OMPD_for:
+  case OMPD_parallel_for:
+  case OMPD_parallel_master:
+  case OMPD_parallel_sections:
+  case OMPD_for_simd:
+  case OMPD_parallel_for_simd:
+  case OMPD_cancel:
+  case OMPD_cancellation_point:
+  case OMPD_ordered:
+  case OMPD_threadprivate:
+  case OMPD_allocate:
+  case OMPD_task:
+  case OMPD_simd:
+  case OMPD_sections:
+  case OMPD_section:
+  case OMPD_single:
+  case OMPD_master:
+  case OMPD_critical:
+  case OMPD_taskyield:
+  case OMPD_barrier:
+  case OMPD_taskwait:
+  case OMPD_taskgroup:
+  case OMPD_atomic:
+  case OMPD_flush:
+  case OMPD_depobj:
+  case OMPD_scan:
+  case OMPD_teams:
+  case OMPD_target_data:
+  case OMPD_target_exit_data:
+  case OMPD_target_enter_data:
+  case OMPD_distribute:
+  case OMPD_distribute_simd:
+  case OMPD_distribute_parallel_for:
+  case OMPD_distribute_parallel_for_simd:
+  case OMPD_teams_distribute:
+  case OMPD_teams_distribute_simd:
+  case OMPD_teams_distribute_parallel_for:
+  case OMPD_teams_distribute_parallel_for_simd:
+  case OMPD_target_update:
+  case OMPD_declare_simd:
+  case OMPD_declare_variant:
+  case OMPD_begin_declare_variant:
+  case OMPD_end_declare_variant:
+  case OMPD_declare_target:
+  case OMPD_end_declare_target:
+  case OMPD_declare_reduction:
+  case OMPD_declare_mapper:
+  case OMPD_taskloop:
+  case OMPD_taskloop_simd:
+  case OMPD_master_taskloop:
+  case OMPD_master_taskloop_simd:
+  case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
+  case OMPD_requires:
+  case OMPD_unknown:
+  default:
+    break;
+  }
+  llvm_unreachable(
+      "Unknown programming model for OpenMP directive on NVPTX target.");
+}
+
+/// Check if the directive is loops based and has schedule clause at all or has
+/// static scheduling.
+static bool hasStaticScheduling(const OMPExecutableDirective &D) {
+  assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
+         isOpenMPLoopDirective(D.getDirectiveKind()) &&
+         "Expected loop-based directive.");
+  return !D.hasClausesOfKind<OMPOrderedClause>() &&
+         (!D.hasClausesOfKind<OMPScheduleClause>() ||
+          llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(),
+                       [](const OMPScheduleClause *C) {
+                         return C->getScheduleKind() == OMPC_SCHEDULE_static;
+                       }));
+}
+
+/// Check for inner (nested) lightweight runtime construct, if any
+static bool hasNestedLightweightDirective(ASTContext &Ctx,
+                                          const OMPExecutableDirective &D) {
+  assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
+  const auto *CS = D.getInnermostCapturedStmt();
+  const auto *Body =
+      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
+  const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+
+  if (const auto *NestedDir =
+          dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
+    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
+    switch (D.getDirectiveKind()) {
+    case OMPD_target:
+      if (isOpenMPParallelDirective(DKind) &&
+          isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
+          hasStaticScheduling(*NestedDir))
+        return true;
+      if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd)
+        return true;
+      if (DKind == OMPD_parallel) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+            /*IgnoreCaptured=*/true);
+        if (!Body)
+          return false;
+        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+        if (const auto *NND =
+                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if (isOpenMPWorksharingDirective(DKind) &&
+              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+            return true;
+        }
+      } else if (DKind == OMPD_teams) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+            /*IgnoreCaptured=*/true);
+        if (!Body)
+          return false;
+        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+        if (const auto *NND =
+                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if (isOpenMPParallelDirective(DKind) &&
+              isOpenMPWorksharingDirective(DKind) &&
+              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+            return true;
+          if (DKind == OMPD_parallel) {
+            Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
+                /*IgnoreCaptured=*/true);
+            if (!Body)
+              return false;
+            ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+            if (const auto *NND =
+                    dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
+              DKind = NND->getDirectiveKind();
+              if (isOpenMPWorksharingDirective(DKind) &&
+                  isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+                return true;
+            }
+          }
+        }
+      }
+      return false;
+    case OMPD_target_teams:
+      if (isOpenMPParallelDirective(DKind) &&
+          isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
+          hasStaticScheduling(*NestedDir))
+        return true;
+      if (DKind == OMPD_distribute_simd || DKind == OMPD_simd)
+        return true;
+      if (DKind == OMPD_parallel) {
+        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
+            /*IgnoreCaptured=*/true);
+        if (!Body)
+          return false;
+        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
+        if (const auto *NND =
+                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
+          DKind = NND->getDirectiveKind();
+          if (isOpenMPWorksharingDirective(DKind) &&
+              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
+            return true;
+        }
+      }
+      return false;
+    case OMPD_target_parallel:
+      if (DKind == OMPD_simd)
+        return true;
+      return isOpenMPWorksharingDirective(DKind) &&
+             isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
+    case OMPD_target_teams_distribute:
+    case OMPD_target_simd:
+    case OMPD_target_parallel_for:
+    case OMPD_target_parallel_for_simd:
+    case OMPD_target_teams_distribute_simd:
+    case OMPD_target_teams_distribute_parallel_for:
+    case OMPD_target_teams_distribute_parallel_for_simd:
+    case OMPD_parallel:
+    case OMPD_for:
+    case OMPD_parallel_for:
+    case OMPD_parallel_master:
+    case OMPD_parallel_sections:
+    case OMPD_for_simd:
+    case OMPD_parallel_for_simd:
+    case OMPD_cancel:
+    case OMPD_cancellation_point:
+    case OMPD_ordered:
+    case OMPD_threadprivate:
+    case OMPD_allocate:
+    case OMPD_task:
+    case OMPD_simd:
+    case OMPD_sections:
+    case OMPD_section:
+    case OMPD_single:
+    case OMPD_master:
+    case OMPD_critical:
+    case OMPD_taskyield:
+    case OMPD_barrier:
+    case OMPD_taskwait:
+    case OMPD_taskgroup:
+    case OMPD_atomic:
+    case OMPD_flush:
+    case OMPD_depobj:
+    case OMPD_scan:
+    case OMPD_teams:
+    case OMPD_target_data:
+    case OMPD_target_exit_data:
+    case OMPD_target_enter_data:
+    case OMPD_distribute:
+    case OMPD_distribute_simd:
+    case OMPD_distribute_parallel_for:
+    case OMPD_distribute_parallel_for_simd:
+    case OMPD_teams_distribute:
+    case OMPD_teams_distribute_simd:
+    case OMPD_teams_distribute_parallel_for:
+    case OMPD_teams_distribute_parallel_for_simd:
+    case OMPD_target_update:
+    case OMPD_declare_simd:
+    case OMPD_declare_variant:
+    case OMPD_begin_declare_variant:
+    case OMPD_end_declare_variant:
+    case OMPD_declare_target:
+    case OMPD_end_declare_target:
+    case OMPD_declare_reduction:
+    case OMPD_declare_mapper:
+    case OMPD_taskloop:
+    case OMPD_taskloop_simd:
+    case OMPD_master_taskloop:
+    case OMPD_master_taskloop_simd:
+    case OMPD_parallel_master_taskloop:
+    case OMPD_parallel_master_taskloop_simd:
+    case OMPD_requires:
+    case OMPD_unknown:
+    default:
+      llvm_unreachable("Unexpected directive.");
+    }
+  }
+
+  return false;
+}
+
+/// Checks if the construct supports lightweight runtime. It must be SPMD
+/// construct + inner loop-based construct with static scheduling.
+static bool supportsLightweightRuntime(ASTContext &Ctx,
+                                       const OMPExecutableDirective &D) {
+  if (!supportsSPMDExecutionMode(Ctx, D))
+    return false;
+  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
+  switch (DirectiveKind) {
+  case OMPD_target:
+  case OMPD_target_teams:
+  case OMPD_target_parallel:
+    return hasNestedLightweightDirective(Ctx, D);
+  case OMPD_target_parallel_for:
+  case OMPD_target_parallel_for_simd:
+  case OMPD_target_teams_distribute_parallel_for:
+  case OMPD_target_teams_distribute_parallel_for_simd:
+    // (Last|First)-privates must be shared in parallel region.
+    return hasStaticScheduling(D);
+  case OMPD_target_simd:
+  case OMPD_target_teams_distribute_simd:
+    return true;
+  case OMPD_target_teams_distribute:
+    return false;
+  case OMPD_parallel:
+  case OMPD_for:
+  case OMPD_parallel_for:
+  case OMPD_parallel_master:
+  case OMPD_parallel_sections:
+  case OMPD_for_simd:
+  case OMPD_parallel_for_simd:
+  case OMPD_cancel:
+  case OMPD_cancellation_point:
+  case OMPD_ordered:
+  case OMPD_threadprivate:
+  case OMPD_allocate:
+  case OMPD_task:
+  case OMPD_simd:
+  case OMPD_sections:
+  case OMPD_section:
+  case OMPD_single:
+  case OMPD_master:
+  case OMPD_critical:
+  case OMPD_taskyield:
+  case OMPD_barrier:
+  case OMPD_taskwait:
+  case OMPD_taskgroup:
+  case OMPD_atomic:
+  case OMPD_flush:
+  case OMPD_depobj:
+  case OMPD_scan:
+  case OMPD_teams:
+  case OMPD_target_data:
+  case OMPD_target_exit_data:
+  case OMPD_target_enter_data:
+  case OMPD_distribute:
+  case OMPD_distribute_simd:
+  case OMPD_distribute_parallel_for:
+  case OMPD_distribute_parallel_for_simd:
+  case OMPD_teams_distribute:
+  case OMPD_teams_distribute_simd:
+  case OMPD_teams_distribute_parallel_for:
+  case OMPD_teams_distribute_parallel_for_simd:
+  case OMPD_target_update:
+  case OMPD_declare_simd:
+  case OMPD_declare_variant:
+  case OMPD_begin_declare_variant:
+  case OMPD_end_declare_variant:
+  case OMPD_declare_target:
+  case OMPD_end_declare_target:
+  case OMPD_declare_reduction:
+  case OMPD_declare_mapper:
+  case OMPD_taskloop:
+  case OMPD_taskloop_simd:
+  case OMPD_master_taskloop:
+  case OMPD_master_taskloop_simd:
+  case OMPD_parallel_master_taskloop:
+  case OMPD_parallel_master_taskloop_simd:
+  case OMPD_requires:
+  case OMPD_unknown:
+  default:
+    break;
+  }
+  llvm_unreachable(
+      "Unknown programming model for OpenMP directive on NVPTX target.");
+}
+
+void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D,
+                                             StringRef ParentName,
+                                             llvm::Function *&OutlinedFn,
+                                             llvm::Constant *&OutlinedFnID,
+                                             bool IsOffloadEntry,
+                                             const RegionCodeGenTy &CodeGen) {
+  ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
+  EntryFunctionState EST;
+  WorkerFunctionState WST(CGM, D.getBeginLoc());
+  Work.clear();
+  WrapperFunctionsMap.clear();
+
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    CGOpenMPRuntimeGPU::EntryFunctionState &EST;
+    CGOpenMPRuntimeGPU::WorkerFunctionState &WST;
+
+  public:
+    NVPTXPrePostActionTy(CGOpenMPRuntimeGPU::EntryFunctionState &EST,
+                         CGOpenMPRuntimeGPU::WorkerFunctionState &WST)
+        : EST(EST), WST(WST) {}
+    void Enter(CodeGenFunction &CGF) override {
+      auto &RT =
+          static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+      RT.emitNonSPMDEntryHeader(CGF, EST, WST);
+      // Skip target region initialization.
+      RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
+    }
+    void Exit(CodeGenFunction &CGF) override {
+      auto &RT =
+          static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+      RT.clearLocThreadIdInsertPt(CGF);
+      RT.emitNonSPMDEntryFooter(CGF, EST);
+    }
+  } Action(EST, WST);
+  CodeGen.setAction(Action);
+  IsInTTDRegion = true;
+  // Reserve place for the globalized memory.
+  GlobalizedRecords.emplace_back();
+  if (!KernelStaticGlobalized) {
+    KernelStaticGlobalized = new llvm::GlobalVariable(
+        CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+        llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+        "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+        llvm::GlobalValue::NotThreadLocal,
+        CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+  }
+  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+                                   IsOffloadEntry, CodeGen);
+  IsInTTDRegion = false;
+
+  // Now change the name of the worker function to correspond to this target
+  // region's entry function.
+  WST.WorkerFn->setName(Twine(OutlinedFn->getName(), "_worker"));
+
+  // Create the worker function
+  emitWorkerFunction(WST);
+}
+
+// Setup NVPTX threads for master-worker OpenMP scheme.
+void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF,
+                                                  EntryFunctionState &EST,
+                                                  WorkerFunctionState &WST) {
+  CGBuilderTy &Bld = CGF.Builder;
+
+  llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
+  llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
+  llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
+  EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::Value *IsWorker =
+      Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF));
+  Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
+
+  CGF.EmitBlock(WorkerBB);
+  emitCall(CGF, WST.Loc, WST.WorkerFn);
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(MasterCheckBB);
+  llvm::Value *IsMaster =
+      Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
+  Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
+
+  CGF.EmitBlock(MasterBB);
+  IsInTargetMasterThreadRegion = true;
+  // SEQUENTIAL (MASTER) REGION START
+  // First action in sequential region:
+  // Initialize the state of the OpenMP runtime library on the GPU.
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {getThreadLimit(CGF),
+                         Bld.getInt16(/*RequiresOMPRuntime=*/1)};
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
+
+  // For data sharing, we need to initialize the stack.
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(
+          OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
+
+  emitGenericVarsProlog(CGF, WST.Loc);
+}
+
+void CGOpenMPRuntimeGPU::emitNonSPMDEntryFooter(CodeGenFunction &CGF,
+                                                  EntryFunctionState &EST) {
+  IsInTargetMasterThreadRegion = false;
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  emitGenericVarsEpilog(CGF);
+
+  if (!EST.ExitBB)
+    EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
+  CGF.EmitBranch(TerminateBB);
+
+  CGF.EmitBlock(TerminateBB);
+  // Signal termination condition.
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)};
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
+  // Barrier to terminate worker threads.
+  syncCTAThreads(CGF);
+  // Master thread jumps to exit point.
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(EST.ExitBB);
+  EST.ExitBB = nullptr;
+}
+
+void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D,
+                                          StringRef ParentName,
+                                          llvm::Function *&OutlinedFn,
+                                          llvm::Constant *&OutlinedFnID,
+                                          bool IsOffloadEntry,
+                                          const RegionCodeGenTy &CodeGen) {
+  ExecutionRuntimeModesRAII ModeRAII(
+      CurrentExecutionMode, RequiresFullRuntime,
+      CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
+          !supportsLightweightRuntime(CGM.getContext(), D));
+  EntryFunctionState EST;
+
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    CGOpenMPRuntimeGPU &RT;
+    CGOpenMPRuntimeGPU::EntryFunctionState &EST;
+    const OMPExecutableDirective &D;
+
+  public:
+    NVPTXPrePostActionTy(CGOpenMPRuntimeGPU &RT,
+                         CGOpenMPRuntimeGPU::EntryFunctionState &EST,
+                         const OMPExecutableDirective &D)
+        : RT(RT), EST(EST), D(D) {}
+    void Enter(CodeGenFunction &CGF) override {
+      RT.emitSPMDEntryHeader(CGF, EST, D);
+      // Skip target region initialization.
+      RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
+    }
+    void Exit(CodeGenFunction &CGF) override {
+      RT.clearLocThreadIdInsertPt(CGF);
+      RT.emitSPMDEntryFooter(CGF, EST);
+    }
+  } Action(*this, EST, D);
+  CodeGen.setAction(Action);
+  IsInTTDRegion = true;
+  // Reserve place for the globalized memory.
+  GlobalizedRecords.emplace_back();
+  if (!KernelStaticGlobalized) {
+    KernelStaticGlobalized = new llvm::GlobalVariable(
+        CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
+        llvm::GlobalValue::InternalLinkage,
+        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
+        "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
+        llvm::GlobalValue::NotThreadLocal,
+        CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
+  }
+  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
+                                   IsOffloadEntry, CodeGen);
+  IsInTTDRegion = false;
+}
+
+void CGOpenMPRuntimeGPU::emitSPMDEntryHeader(
+    CodeGenFunction &CGF, EntryFunctionState &EST,
+    const OMPExecutableDirective &D) {
+  CGBuilderTy &Bld = CGF.Builder;
+
+  // Setup BBs in entry function.
+  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
+  EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
+                         /*RequiresOMPRuntime=*/
+                         Bld.getInt16(RequiresFullRuntime ? 1 : 0),
+                         /*RequiresDataSharing=*/Bld.getInt16(0)};
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
+
+  if (RequiresFullRuntime) {
+    // For data sharing, we need to initialize the stack.
+    CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+        OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
+  }
+
+  CGF.EmitBranch(ExecuteBB);
+
+  CGF.EmitBlock(ExecuteBB);
+
+  IsInTargetMasterThreadRegion = true;
+}
+
+void CGOpenMPRuntimeGPU::emitSPMDEntryFooter(CodeGenFunction &CGF,
+                                               EntryFunctionState &EST) {
+  IsInTargetMasterThreadRegion = false;
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  if (!EST.ExitBB)
+    EST.ExitBB = CGF.createBasicBlock(".exit");
+
+  llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit");
+  CGF.EmitBranch(OMPDeInitBB);
+
+  CGF.EmitBlock(OMPDeInitBB);
+  // DeInitialize the OMP state in the runtime; called by all active threads.
+  llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
+                         CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(
+          OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
+  CGF.EmitBranch(EST.ExitBB);
+
+  CGF.EmitBlock(EST.ExitBB);
+  EST.ExitBB = nullptr;
+}
+
+// Create a unique global variable to indicate the execution mode of this target
+// region. The execution mode is either 'generic', or 'spmd' depending on the
+// target directive. This variable is picked up by the offload library to setup
+// the device appropriately before kernel launch. If the execution mode is
+// 'generic', the runtime reserves one warp for the master, otherwise, all
+// warps participate in parallel work.
+static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
+                                     bool Mode) {
+  auto *GVMode =
+      new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
+                               llvm::GlobalValue::WeakAnyLinkage,
+                               llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0 : 1),
+                               Twine(Name, "_exec_mode"));
+  CGM.addCompilerUsedGlobal(GVMode);
+}
+
+void CGOpenMPRuntimeGPU::emitWorkerFunction(WorkerFunctionState &WST) {
+  ASTContext &Ctx = CGM.getContext();
+
+  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, WST.CGFI, {},
+                    WST.Loc, WST.Loc);
+  emitWorkerLoop(CGF, WST);
+  CGF.FinishFunction();
+}
+
+void CGOpenMPRuntimeGPU::emitWorkerLoop(CodeGenFunction &CGF,
+                                          WorkerFunctionState &WST) {
+  //
+  // The workers enter this loop and wait for parallel work from the master.
+  // When the master encounters a parallel region it sets up the work + variable
+  // arguments, and wakes up the workers.  The workers first check to see if
+  // they are required for the parallel region, i.e., within the # of requested
+  // parallel threads.  The activated workers load the variable arguments and
+  // execute the parallel work.
+  //
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work");
+  llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers");
+  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel");
+  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel");
+  llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel");
+  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+
+  CGF.EmitBranch(AwaitBB);
+
+  // Workers wait for work from master.
+  CGF.EmitBlock(AwaitBB);
+  // Wait for parallel work
+  syncCTAThreads(CGF);
+
+  Address WorkFn =
+      CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn");
+  Address ExecStatus =
+      CGF.CreateDefaultAlignTempAlloca(CGF.Int8Ty, /*Name=*/"exec_status");
+  CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0));
+  CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
+
+  // TODO: Optimize runtime initialization and pass in correct value.
+  llvm::Value *Args[] = {WorkFn.getPointer()};
+  llvm::Value *Ret = CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
+  Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
+
+  // On termination condition (workid == 0), exit loop.
+  llvm::Value *WorkID = Bld.CreateLoad(WorkFn);
+  llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID, "should_terminate");
+  Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
+
+  // Activate requested workers.
+  CGF.EmitBlock(SelectWorkersBB);
+  llvm::Value *IsActive =
+      Bld.CreateIsNotNull(Bld.CreateLoad(ExecStatus), "is_active");
+  Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
+
+  // Signal start of parallel region.
+  CGF.EmitBlock(ExecuteBB);
+  // Skip initialization.
+  setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
+
+  // Process work items: outlined parallel functions.
+  for (llvm::Function *W : Work) {
+    // Try to match this outlined function.
+    llvm::Value *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
+
+    llvm::Value *WorkFnMatch =
+        Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match");
+
+    llvm::BasicBlock *ExecuteFNBB = CGF.createBasicBlock(".execute.fn");
+    llvm::BasicBlock *CheckNextBB = CGF.createBasicBlock(".check.next");
+    Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
+
+    // Execute this outlined function.
+    CGF.EmitBlock(ExecuteFNBB);
+
+    // Insert call to work function via shared wrapper. The shared
+    // wrapper takes two arguments:
+    //   - the parallelism level;
+    //   - the thread ID;
+    emitCall(CGF, WST.Loc, W,
+             {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
+
+    // Go to end of parallel region.
+    CGF.EmitBranch(TerminateBB);
+
+    CGF.EmitBlock(CheckNextBB);
+  }
+  // Default case: call to outlined function through pointer if the target
+  // region makes a declare target call that may contain an orphaned parallel
+  // directive.
+  auto *ParallelFnTy =
+      llvm::FunctionType::get(CGM.VoidTy, {CGM.Int16Ty, CGM.Int32Ty},
+                              /*isVarArg=*/false);
+  llvm::Value *WorkFnCast =
+      Bld.CreateBitCast(WorkID, ParallelFnTy->getPointerTo());
+  // Insert call to work function via shared wrapper. The shared
+  // wrapper takes two arguments:
+  //   - the parallelism level;
+  //   - the thread ID;
+  emitCall(CGF, WST.Loc, {ParallelFnTy, WorkFnCast},
+           {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
+  // Go to end of parallel region.
+  CGF.EmitBranch(TerminateBB);
+
+  // Signal end of parallel region.
+  CGF.EmitBlock(TerminateBB);
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
+      llvm::None);
+  CGF.EmitBranch(BarrierBB);
+
+  // All active and inactive workers wait at a barrier after parallel region.
+  CGF.EmitBlock(BarrierBB);
+  // Barrier after parallel region.
+  syncCTAThreads(CGF);
+  CGF.EmitBranch(AwaitBB);
+
+  // Exit target region.
+  CGF.EmitBlock(ExitBB);
+  // Skip initialization.
+  clearLocThreadIdInsertPt(CGF);
+}
+
+/// Returns specified OpenMP runtime function for the current OpenMP
+/// implementation.  Specialized for the NVPTX device.
+/// \param Function OpenMP runtime function.
+/// \return Specified function.
+llvm::FunctionCallee
+CGOpenMPRuntimeGPU::createNVPTXRuntimeFunction(unsigned Function) {
+  llvm::FunctionCallee RTLFn = nullptr;
+  switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
+  case OMPRTL_NVPTX__kmpc_kernel_init: {
+    // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
+    // RequiresOMPRuntime);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_kernel_deinit: {
+    // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
+    llvm::Type *TypeParams[] = {CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
+    // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
+    // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
+    // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
+    llvm::Type *TypeParams[] = {CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
+    /// Build void __kmpc_kernel_prepare_parallel(
+    /// void *outlined_function);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_kernel_parallel: {
+    /// Build bool __kmpc_kernel_parallel(void **outlined_function);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
+    llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
+    auto *FnTy =
+        llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
+    /// Build void __kmpc_kernel_end_parallel();
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_serialized_parallel: {
+    // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
+    // global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
+    // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
+    // global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_shuffle_int32: {
+    // Build int32_t __kmpc_shuffle_int32(int32_t element,
+    // int16_t lane_offset, int16_t warp_size);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_shuffle_int64: {
+    // Build int64_t __kmpc_shuffle_int64(int64_t element,
+    // int16_t lane_offset, int16_t warp_size);
+    llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: {
+    // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc,
+    // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void*
+    // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t
+    // lane_id, int16_t lane_offset, int16_t Algorithm Version), void
+    // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
+    llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+                                             CGM.Int16Ty, CGM.Int16Ty};
+    auto *ShuffleReduceFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+                                /*isVarArg=*/false);
+    llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+    auto *InterWarpCopyFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+                                /*isVarArg=*/false);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+                                CGM.Int32Ty,
+                                CGM.Int32Ty,
+                                CGM.SizeTy,
+                                CGM.VoidPtrTy,
+                                ShuffleReduceFnTy->getPointerTo(),
+                                InterWarpCopyFnTy->getPointerTo()};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
+    // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: {
+    // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
+    // global_tid, void *global_buffer, int32_t num_of_records, void*
+    // reduce_data,
+    // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
+    // lane_offset, int16_t shortCircuit),
+    // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
+    // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
+    // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
+    // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
+    // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
+    // *buffer, int idx, void *reduce_data));
+    llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
+                                             CGM.Int16Ty, CGM.Int16Ty};
+    auto *ShuffleReduceFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
+                                /*isVarArg=*/false);
+    llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
+    auto *InterWarpCopyFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
+                                /*isVarArg=*/false);
+    llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy,
+                                          CGM.VoidPtrTy};
+    auto *GlobalListFnTy =
+        llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams,
+                                /*isVarArg=*/false);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
+                                CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                CGM.Int32Ty,
+                                CGM.VoidPtrTy,
+                                ShuffleReduceFnTy->getPointerTo(),
+                                InterWarpCopyFnTy->getPointerTo(),
+                                GlobalListFnTy->getPointerTo(),
+                                GlobalListFnTy->getPointerTo(),
+                                GlobalListFnTy->getPointerTo(),
+                                GlobalListFnTy->getPointerTo()};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
+    /// Build void __kmpc_data_sharing_init_stack();
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
+    /// Build void __kmpc_data_sharing_init_stack_spmd();
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+    RTLFn =
+        CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
+    // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
+    // int16_t UseSharedMemory);
+    llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
+    // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t
+    // UseSharedMemory);
+    llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(
+        FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
+    // Build void __kmpc_data_sharing_pop_stack(void *a);
+    llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy,
+                                      /*Name=*/"__kmpc_data_sharing_pop_stack");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
+    /// Build void __kmpc_begin_sharing_variables(void ***args,
+    /// size_t n_args);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
+    /// Build void __kmpc_end_sharing_variables();
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_get_shared_variables: {
+    /// Build void __kmpc_get_shared_variables(void ***GlobalArgs);
+    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_parallel_level: {
+    // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
+    // Build int8_t __kmpc_is_spmd_exec_mode();
+    auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
+    // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
+    // const void *buf, size_t size, int16_t is_shared, const void **res);
+    llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy,
+                                CGM.Int16Ty, CGM.VoidPtrPtrTy};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
+    // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
+    // int16_t is_shared);
+    llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
+    RTLFn =
+        CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
+    break;
+  }
+  case OMPRTL__kmpc_barrier: {
+    // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn =
+        CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
+    break;
+  }
+  case OMPRTL__kmpc_barrier_simple_spmd: {
+    // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
+    // global_tid);
+    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
+    RTLFn = CGM.CreateConvergentRuntimeFunction(
+        FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
+    // Build int32_t __kmpc_warp_active_thread_mask(void);
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
+    RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
+    break;
+  }
+  case OMPRTL_NVPTX__kmpc_syncwarp: {
+    // Build void __kmpc_syncwarp(kmp_int32 Mask);
+    auto *FnTy =
+        llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
+    RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp");
+    break;
+  }
+  }
+  return RTLFn;
+}
+
+void CGOpenMPRuntimeGPU::createOffloadEntry(llvm::Constant *ID,
+                                              llvm::Constant *Addr,
+                                              uint64_t Size, int32_t,
+                                              llvm::GlobalValue::LinkageTypes) {
+  // TODO: Add support for global variables on the device after declare target
+  // support.
+  if (!isa<llvm::Function>(Addr))
+    return;
+  llvm::Module &M = CGM.getModule();
+  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
+
+  // Get "nvvm.annotations" metadata node
+  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
+
+  llvm::Metadata *MDVals[] = {
+      llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"),
+      llvm::ConstantAsMetadata::get(
+          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
+  // Append metadata to nvvm.annotations
+  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
+}
+
+void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
+    const OMPExecutableDirective &D, StringRef ParentName,
+    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
+    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
+  if (!IsOffloadEntry) // Nothing to do.
+    return;
+
+  assert(!ParentName.empty() && "Invalid target region parent name!");
+
+  bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
+  if (Mode)
+    emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+                   CodeGen);
+  else
+    emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
+                      CodeGen);
+
+  setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
+}
+
+namespace {
+LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
+/// Enum for accesseing the reserved_2 field of the ident_t struct.
+enum ModeFlagsTy : unsigned {
+  /// Bit set to 1 when in SPMD mode.
+  KMP_IDENT_SPMD_MODE = 0x01,
+  /// Bit set to 1 when a simplified runtime is used.
+  KMP_IDENT_SIMPLE_RT_MODE = 0x02,
+  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE)
+};
+
+/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime.
+static const ModeFlagsTy UndefinedMode =
+    (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
+} // anonymous namespace
+
+unsigned CGOpenMPRuntimeGPU::getDefaultLocationReserved2Flags() const {
+  switch (getExecutionMode()) {
+  case EM_SPMD:
+    if (requiresFullRuntime())
+      return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
+    return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
+  case EM_NonSPMD:
+    assert(requiresFullRuntime() && "Expected full runtime.");
+    return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
+  case EM_Unknown:
+    return UndefinedMode;
+  }
+  llvm_unreachable("Unknown flags are requested.");
+}
+
+CGOpenMPRuntimeGPU::CGOpenMPRuntimeGPU(CodeGenModule &CGM)
+    : CGOpenMPRuntime(CGM, "_", "$") {
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    llvm_unreachable("OpenMP NVPTX can only handle device code.");
+}
+
+void CGOpenMPRuntimeGPU::emitProcBindClause(CodeGenFunction &CGF,
+                                              ProcBindKind ProcBind,
+                                              SourceLocation Loc) {
+  // Do nothing in case of SPMD mode and L0 parallel.
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
+    return;
+
+  CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
+}
+
+void CGOpenMPRuntimeGPU::emitNumThreadsClause(CodeGenFunction &CGF,
+                                                llvm::Value *NumThreads,
+                                                SourceLocation Loc) {
+  // Do nothing in case of SPMD mode and L0 parallel.
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
+    return;
+
+  CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
+}
+
+void CGOpenMPRuntimeGPU::emitNumTeamsClause(CodeGenFunction &CGF,
+                                              const Expr *NumTeams,
+                                              const Expr *ThreadLimit,
+                                              SourceLocation Loc) {}
+
+llvm::Function *CGOpenMPRuntimeGPU::emitParallelOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    bool &IsInParallelRegion;
+    bool PrevIsInParallelRegion;
+
+  public:
+    NVPTXPrePostActionTy(bool &IsInParallelRegion)
+        : IsInParallelRegion(IsInParallelRegion) {}
+    void Enter(CodeGenFunction &CGF) override {
+      PrevIsInParallelRegion = IsInParallelRegion;
+      IsInParallelRegion = true;
+    }
+    void Exit(CodeGenFunction &CGF) override {
+      IsInParallelRegion = PrevIsInParallelRegion;
+    }
+  } Action(IsInParallelRegion);
+  CodeGen.setAction(Action);
+  bool PrevIsInTTDRegion = IsInTTDRegion;
+  IsInTTDRegion = false;
+  bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
+  IsInTargetMasterThreadRegion = false;
+  auto *OutlinedFun =
+      cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
+          D, ThreadIDVar, InnermostKind, CodeGen));
+  if (CGM.getLangOpts().Optimize) {
+    OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+    OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
+    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+  IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
+  IsInTTDRegion = PrevIsInTTDRegion;
+  if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD &&
+      !IsInParallelRegion) {
+    llvm::Function *WrapperFun =
+        createParallelDataSharingWrapper(OutlinedFun, D);
+    WrapperFunctionsMap[OutlinedFun] = WrapperFun;
+  }
+
+  return OutlinedFun;
+}
+
+/// Get list of lastprivate variables from the teams distribute ... or
+/// teams {distribute ...} directives.
+static void
+getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D,
+                             llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
+  assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
+         "expected teams directive.");
+  const OMPExecutableDirective *Dir = &D;
+  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
+    if (const Stmt *S = CGOpenMPRuntime::getSingleCompoundChild(
+            Ctx,
+            D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
+                /*IgnoreCaptured=*/true))) {
+      Dir = dyn_cast_or_null<OMPExecutableDirective>(S);
+      if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind()))
+        Dir = nullptr;
+    }
+  }
+  if (!Dir)
+    return;
+  for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) {
+    for (const Expr *E : C->getVarRefs())
+      Vars.push_back(getPrivateItem(E));
+  }
+}
+
+/// Get list of reduction variables from the teams ... directives.
+static void
+getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D,
+                      llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
+  assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
+         "expected teams directive.");
+  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
+    for (const Expr *E : C->privates())
+      Vars.push_back(getPrivateItem(E));
+  }
+}
+
+llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction(
+    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
+    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
+  SourceLocation Loc = D.getBeginLoc();
+
+  const RecordDecl *GlobalizedRD = nullptr;
+  llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
+  llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
+  // Globalize team reductions variable unconditionally in all modes.
+  if (getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
+    getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) {
+    getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
+    if (!LastPrivatesReductions.empty()) {
+      GlobalizedRD = ::buildRecordForGlobalizedVars(
+          CGM.getContext(), llvm::None, LastPrivatesReductions,
+          MappedDeclsFields, WarpSize);
+    }
+  } else if (!LastPrivatesReductions.empty()) {
+    assert(!TeamAndReductions.first &&
+           "Previous team declaration is not expected.");
+    TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
+    std::swap(TeamAndReductions.second, LastPrivatesReductions);
+  }
+
+  // Emit target region as a standalone region.
+  class NVPTXPrePostActionTy : public PrePostActionTy {
+    SourceLocation &Loc;
+    const RecordDecl *GlobalizedRD;
+    llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+        &MappedDeclsFields;
+
+  public:
+    NVPTXPrePostActionTy(
+        SourceLocation &Loc, const RecordDecl *GlobalizedRD,
+        llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+            &MappedDeclsFields)
+        : Loc(Loc), GlobalizedRD(GlobalizedRD),
+          MappedDeclsFields(MappedDeclsFields) {}
+    void Enter(CodeGenFunction &CGF) override {
+      auto &Rt =
+          static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime());
+      if (GlobalizedRD) {
+        auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+        I->getSecond().GlobalRecord = GlobalizedRD;
+        I->getSecond().MappedParams =
+            std::make_unique<CodeGenFunction::OMPMapVars>();
+        DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
+        for (const auto &Pair : MappedDeclsFields) {
+          assert(Pair.getFirst()->isCanonicalDecl() &&
+                 "Expected canonical declaration");
+          Data.insert(std::make_pair(Pair.getFirst(),
+                                     MappedVarData(Pair.getSecond(),
+                                                   /*IsOnePerTeam=*/true)));
+        }
+      }
+      Rt.emitGenericVarsProlog(CGF, Loc);
+    }
+    void Exit(CodeGenFunction &CGF) override {
+      static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())
+          .emitGenericVarsEpilog(CGF);
+    }
+  } Action(Loc, GlobalizedRD, MappedDeclsFields);
+  CodeGen.setAction(Action);
+  llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
+      D, ThreadIDVar, InnermostKind, CodeGen);
+  if (CGM.getLangOpts().Optimize) {
+    OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
+    OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
+    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+
+  return OutlinedFun;
+}
+
+void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF,
+                                                 SourceLocation Loc,
+                                                 bool WithSPMDCheck) {
+  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
+      getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
+    return;
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+  if (I == FunctionGlobalizedDecls.end())
+    return;
+  if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
+    QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
+    QualType SecGlobalRecTy;
+
+    // Recover pointer to this function's global record. The runtime will
+    // handle the specifics of the allocation of the memory.
+    // Use actual memory size of the record including the padding
+    // for alignment purposes.
+    unsigned Alignment =
+        CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
+    unsigned GlobalRecordSize =
+        CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity();
+    GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+
+    llvm::PointerType *GlobalRecPtrTy =
+        CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
+    llvm::Value *GlobalRecCastAddr;
+    llvm::Value *IsTTD = nullptr;
+    if (!IsInTTDRegion &&
+        (WithSPMDCheck ||
+         getExecutionMode() == CGOpenMPRuntimeGPU::EM_Unknown)) {
+      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+      llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
+      llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+      if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
+        llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+        llvm::Value *ThreadID = getThreadID(CGF, Loc);
+        llvm::Value *PL = CGF.EmitRuntimeCall(
+            createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+            {RTLoc, ThreadID});
+        IsTTD = Bld.CreateIsNull(PL);
+      }
+      llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+      Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      CGF.EmitBlock(SPMDBB);
+      Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
+                               CharUnits::fromQuantity(Alignment));
+      CGF.EmitBranch(ExitBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      CGF.EmitBlock(NonSPMDBB);
+      llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);
+      if (const RecordDecl *SecGlobalizedVarsRecord =
+              I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
+        SecGlobalRecTy =
+            CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
+
+        // Recover pointer to this function's global record. The runtime will
+        // handle the specifics of the allocation of the memory.
+        // Use actual memory size of the record including the padding
+        // for alignment purposes.
+        unsigned Alignment =
+            CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
+        unsigned GlobalRecordSize =
+            CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
+        GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
+        Size = Bld.CreateSelect(
+            IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);
+      }
+      // TODO: allow the usage of shared memory to be controlled by
+      // the user, for now, default to global.
+      llvm::Value *GlobalRecordSizeArg[] = {
+          Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+      llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(
+              OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+          GlobalRecordSizeArg);
+      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+          GlobalRecValue, GlobalRecPtrTy);
+      CGF.EmitBlock(ExitBB);
+      auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
+                                /*NumReservedValues=*/2, "_select_stack");
+      Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
+      Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
+      GlobalRecCastAddr = Phi;
+      I->getSecond().GlobalRecordAddr = Phi;
+      I->getSecond().IsInSPMDModeFlag = IsSPMD;
+    } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
+      assert(GlobalizedRecords.back().Records.size() < 2 &&
+             "Expected less than 2 globalized records: one for target and one "
+             "for teams.");
+      unsigned Offset = 0;
+      for (const RecordDecl *RD : GlobalizedRecords.back().Records) {
+        QualType RDTy = CGM.getContext().getRecordType(RD);
+        unsigned Alignment =
+            CGM.getContext().getTypeAlignInChars(RDTy).getQuantity();
+        unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity();
+        Offset =
+            llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment);
+      }
+      unsigned Alignment =
+          CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
+      Offset = llvm::alignTo(Offset, Alignment);
+      GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
+      ++GlobalizedRecords.back().RegionCounter;
+      if (GlobalizedRecords.back().Records.size() == 1) {
+        assert(KernelStaticGlobalized &&
+               "Kernel static pointer must be initialized already.");
+        auto *UseSharedMemory = new llvm::GlobalVariable(
+            CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true,
+            llvm::GlobalValue::InternalLinkage, nullptr,
+            "_openmp_static_kernel$is_shared");
+        UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+        QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+            /*DestWidth=*/16, /*Signed=*/0);
+        llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+            Address(UseSharedMemory,
+                    CGM.getContext().getTypeAlignInChars(Int16Ty)),
+            /*Volatile=*/false, Int16Ty, Loc);
+        auto *StaticGlobalized = new llvm::GlobalVariable(
+            CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
+            llvm::GlobalValue::CommonLinkage, nullptr);
+        auto *RecSize = new llvm::GlobalVariable(
+            CGM.getModule(), CGM.SizeTy, /*isConstant=*/true,
+            llvm::GlobalValue::InternalLinkage, nullptr,
+            "_openmp_static_kernel$size");
+        RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
+        llvm::Value *Ld = CGF.EmitLoadOfScalar(
+            Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false,
+            CGM.getContext().getSizeType(), Loc);
+        llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+            KernelStaticGlobalized, CGM.VoidPtrPtrTy);
+        llvm::Value *GlobalRecordSizeArg[] = {
+            llvm::ConstantInt::get(
+                CGM.Int16Ty,
+                getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0),
+            StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
+        CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+                                OMPRTL_NVPTX__kmpc_get_team_static_memory),
+                            GlobalRecordSizeArg);
+        GlobalizedRecords.back().Buffer = StaticGlobalized;
+        GlobalizedRecords.back().RecSize = RecSize;
+        GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
+        GlobalizedRecords.back().Loc = Loc;
+      }
+      assert(KernelStaticGlobalized && "Global address must be set already.");
+      Address FrameAddr = CGF.EmitLoadOfPointer(
+          Address(KernelStaticGlobalized, CGM.getPointerAlign()),
+          CGM.getContext()
+              .getPointerType(CGM.getContext().VoidPtrTy)
+              .castAs<PointerType>());
+      llvm::Value *GlobalRecValue =
+          Bld.CreateConstInBoundsGEP(FrameAddr, Offset).getPointer();
+      I->getSecond().GlobalRecordAddr = GlobalRecValue;
+      I->getSecond().IsInSPMDModeFlag = nullptr;
+      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+          GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo());
+    } else {
+      // TODO: allow the usage of shared memory to be controlled by
+      // the user, for now, default to global.
+      bool UseSharedMemory =
+          IsInTTDRegion && GlobalRecordSize <= SharedMemorySize;
+      llvm::Value *GlobalRecordSizeArg[] = {
+          llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
+          CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)};
+      llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(
+              IsInTTDRegion
+                  ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack
+                  : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+          GlobalRecordSizeArg);
+      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+          GlobalRecValue, GlobalRecPtrTy);
+      I->getSecond().GlobalRecordAddr = GlobalRecValue;
+      I->getSecond().IsInSPMDModeFlag = nullptr;
+    }
+    LValue Base =
+        CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy);
+
+    // Emit the "global alloca" which is a GEP from the global declaration
+    // record using the pointer returned by the runtime.
+    LValue SecBase;
+    decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
+    if (IsTTD) {
+      SecIt = I->getSecond().SecondaryLocalVarData->begin();
+      llvm::PointerType *SecGlobalRecPtrTy =
+          CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
+      SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
+          Bld.CreatePointerBitCastOrAddrSpaceCast(
+              I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
+          SecGlobalRecTy);
+    }
+    for (auto &Rec : I->getSecond().LocalVarData) {
+      bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
+      llvm::Value *ParValue;
+      if (EscapedParam) {
+        const auto *VD = cast<VarDecl>(Rec.first);
+        LValue ParLVal =
+            CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
+        ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
+      }
+      LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD);
+      // Emit VarAddr basing on lane-id if required.
+      QualType VarTy;
+      if (Rec.second.IsOnePerTeam) {
+        VarTy = Rec.second.FD->getType();
+      } else {
+        llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
+            VarAddr.getAddress(CGF).getPointer(),
+            {Bld.getInt32(0), getNVPTXLaneID(CGF)});
+        VarTy =
+            Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
+        VarAddr = CGF.MakeAddrLValue(
+            Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
+            AlignmentSource::Decl);
+      }
+      Rec.second.PrivateAddr = VarAddr.getAddress(CGF);
+      if (!IsInTTDRegion &&
+          (WithSPMDCheck ||
+           getExecutionMode() == CGOpenMPRuntimeGPU::EM_Unknown)) {
+        assert(I->getSecond().IsInSPMDModeFlag &&
+               "Expected unknown execution mode or required SPMD check.");
+        if (IsTTD) {
+          assert(SecIt->second.IsOnePerTeam &&
+                 "Secondary glob data must be one per team.");
+          LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);
+          VarAddr.setAddress(
+              Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(CGF),
+                                       VarAddr.getPointer(CGF)),
+                      VarAddr.getAlignment()));
+          Rec.second.PrivateAddr = VarAddr.getAddress(CGF);
+        }
+        Address GlobalPtr = Rec.second.PrivateAddr;
+        Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());
+        Rec.second.PrivateAddr = Address(
+            Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
+                             LocalAddr.getPointer(), GlobalPtr.getPointer()),
+            LocalAddr.getAlignment());
+      }
+      if (EscapedParam) {
+        const auto *VD = cast<VarDecl>(Rec.first);
+        CGF.EmitStoreOfScalar(ParValue, VarAddr);
+        I->getSecond().MappedParams->setVarAddr(CGF, VD,
+                                                VarAddr.getAddress(CGF));
+      }
+      if (IsTTD)
+        ++SecIt;
+    }
+  }
+  for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
+    // Recover pointer to this function's global record. The runtime will
+    // handle the specifics of the allocation of the memory.
+    // Use actual memory size of the record including the padding
+    // for alignment purposes.
+    CGBuilderTy &Bld = CGF.Builder;
+    llvm::Value *Size = CGF.getTypeSize(VD->getType());
+    CharUnits Align = CGM.getContext().getDeclAlign(VD);
+    Size = Bld.CreateNUWAdd(
+        Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1));
+    llvm::Value *AlignVal =
+        llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity());
+    Size = Bld.CreateUDiv(Size, AlignVal);
+    Size = Bld.CreateNUWMul(Size, AlignVal);
+    // TODO: allow the usage of shared memory to be controlled by
+    // the user, for now, default to global.
+    llvm::Value *GlobalRecordSizeArg[] = {
+        Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
+    llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
+        createNVPTXRuntimeFunction(
+            OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
+        GlobalRecordSizeArg);
+    llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+        GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo());
+    LValue Base = CGF.MakeAddrLValue(GlobalRecCastAddr, VD->getType(),
+                                     CGM.getContext().getDeclAlign(VD),
+                                     AlignmentSource::Decl);
+    I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD),
+                                            Base.getAddress(CGF));
+    I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue);
+  }
+  I->getSecond().MappedParams->apply(CGF);
+}
+
+void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF,
+                                                 bool WithSPMDCheck) {
+  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic &&
+      getExecutionMode() != CGOpenMPRuntimeGPU::EM_SPMD)
+    return;
+
+  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+  if (I != FunctionGlobalizedDecls.end()) {
+    I->getSecond().MappedParams->restore(CGF);
+    if (!CGF.HaveInsertPoint())
+      return;
+    for (llvm::Value *Addr :
+         llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
+      CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+          Addr);
+    }
+    if (I->getSecond().GlobalRecordAddr) {
+      if (!IsInTTDRegion &&
+          (WithSPMDCheck ||
+           getExecutionMode() == CGOpenMPRuntimeGPU::EM_Unknown)) {
+        CGBuilderTy &Bld = CGF.Builder;
+        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+        llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
+        Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
+        // There is no need to emit line number for unconditional branch.
+        (void)ApplyDebugLocation::CreateEmpty(CGF);
+        CGF.EmitBlock(NonSPMDBB);
+        CGF.EmitRuntimeCall(
+            createNVPTXRuntimeFunction(
+                OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+            CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
+        CGF.EmitBlock(ExitBB);
+      } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
+        assert(GlobalizedRecords.back().RegionCounter > 0 &&
+               "region counter must be > 0.");
+        --GlobalizedRecords.back().RegionCounter;
+        // Emit the restore function only in the target region.
+        if (GlobalizedRecords.back().RegionCounter == 0) {
+          QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
+              /*DestWidth=*/16, /*Signed=*/0);
+          llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
+              Address(GlobalizedRecords.back().UseSharedMemory,
+                      CGM.getContext().getTypeAlignInChars(Int16Ty)),
+              /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc);
+          llvm::Value *Args[] = {
+              llvm::ConstantInt::get(
+                  CGM.Int16Ty,
+                  getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0),
+              IsInSharedMemory};
+          CGF.EmitRuntimeCall(
+              createNVPTXRuntimeFunction(
+                  OMPRTL_NVPTX__kmpc_restore_team_static_memory),
+              Args);
+        }
+      } else {
+        CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+                                OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
+                            I->getSecond().GlobalRecordAddr);
+      }
+    }
+  }
+}
+
+void CGOpenMPRuntimeGPU::emitTeamsCall(CodeGenFunction &CGF,
+                                         const OMPExecutableDirective &D,
+                                         SourceLocation Loc,
+                                         llvm::Function *OutlinedFn,
+                                         ArrayRef<llvm::Value *> CapturedVars) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+  OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
+  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+  emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+}
+
+void CGOpenMPRuntimeGPU::emitParallelCall(
+    CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
+    ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
+    emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+  else
+    emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
+}
+
+void CGOpenMPRuntimeGPU::emitNonSPMDParallelCall(
+    CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
+    ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
+  llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
+
+  // Force inline this outlined function at its call site.
+  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  // ThreadId for serialized parallels is 0.
+  Address ThreadIDAddr = ZeroAddr;
+  auto &&CodeGen = [this, Fn, CapturedVars, Loc, &ThreadIDAddr](
+                       CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
+
+    Address ZeroAddr =
+        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                         /*Name=*/".bound.zero.addr");
+    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+    emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
+  };
+  auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
+                                        PrePostActionTy &) {
+
+    RegionCodeGenTy RCG(CodeGen);
+    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+    llvm::Value *ThreadID = getThreadID(CGF, Loc);
+    llvm::Value *Args[] = {RTLoc, ThreadID};
+
+    NVPTXActionTy Action(
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+        Args,
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+        Args);
+    RCG.setAction(Action);
+    RCG(CGF);
+  };
+
+  auto &&L0ParallelGen = [this, CapturedVars, Fn](CodeGenFunction &CGF,
+                                                  PrePostActionTy &Action) {
+    CGBuilderTy &Bld = CGF.Builder;
+    llvm::Function *WFn = WrapperFunctionsMap[Fn];
+    assert(WFn && "Wrapper function does not exist!");
+    llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
+
+    // Prepare for parallel region. Indicate the outlined function.
+    llvm::Value *Args[] = {ID};
+    CGF.EmitRuntimeCall(
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
+        Args);
+
+    // Create a private scope that will globalize the arguments
+    // passed from the outside of the target region.
+    CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);
+
+    // There's something to share.
+    if (!CapturedVars.empty()) {
+      // Prepare for parallel region. Indicate the outlined function.
+      Address SharedArgs =
+          CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "shared_arg_refs");
+      llvm::Value *SharedArgsPtr = SharedArgs.getPointer();
+
+      llvm::Value *DataSharingArgs[] = {
+          SharedArgsPtr,
+          llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
+      CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
+                              OMPRTL_NVPTX__kmpc_begin_sharing_variables),
+                          DataSharingArgs);
+
+      // Store variable address in a list of references to pass to workers.
+      unsigned Idx = 0;
+      ASTContext &Ctx = CGF.getContext();
+      Address SharedArgListAddress = CGF.EmitLoadOfPointer(
+          SharedArgs, Ctx.getPointerType(Ctx.getPointerType(Ctx.VoidPtrTy))
+                          .castAs<PointerType>());
+      for (llvm::Value *V : CapturedVars) {
+        Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
+        llvm::Value *PtrV;
+        if (V->getType()->isIntegerTy())
+          PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);
+        else
+          PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy);
+        CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
+                              Ctx.getPointerType(Ctx.VoidPtrTy));
+        ++Idx;
+      }
+    }
+
+    // Activate workers. This barrier is used by the master to signal
+    // work for the workers.
+    syncCTAThreads(CGF);
+
+    // OpenMP [2.5, Parallel Construct, p.49]
+    // There is an implied barrier at the end of a parallel region. After the
+    // end of a parallel region, only the master thread of the team resumes
+    // execution of the enclosing task region.
+    //
+    // The master waits at this barrier until all workers are done.
+    syncCTAThreads(CGF);
+
+    if (!CapturedVars.empty())
+      CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables));
+
+    // Remember for post-processing in worker loop.
+    Work.emplace_back(WFn);
+  };
+
+  auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen](
+                             CodeGenFunction &CGF, PrePostActionTy &Action) {
+    if (IsInParallelRegion) {
+      SeqGen(CGF, Action);
+    } else if (IsInTargetMasterThreadRegion) {
+      L0ParallelGen(CGF, Action);
+    } else {
+      // Check for master and then parallelism:
+      // if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) {
+      //   Serialized execution.
+      // } else {
+      //   Worker call.
+      // }
+      CGBuilderTy &Bld = CGF.Builder;
+      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
+      llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential");
+      llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck");
+      llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
+      llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
+      Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      CGF.EmitBlock(ParallelCheckBB);
+      llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+      llvm::Value *ThreadID = getThreadID(CGF, Loc);
+      llvm::Value *PL = CGF.EmitRuntimeCall(
+          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
+          {RTLoc, ThreadID});
+      llvm::Value *Res = Bld.CreateIsNotNull(PL);
+      Bld.CreateCondBr(Res, SeqBB, MasterBB);
+      CGF.EmitBlock(SeqBB);
+      SeqGen(CGF, Action);
+      CGF.EmitBranch(ExitBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      CGF.EmitBlock(MasterBB);
+      L0ParallelGen(CGF, Action);
+      CGF.EmitBranch(ExitBB);
+      // There is no need to emit line number for unconditional branch.
+      (void)ApplyDebugLocation::CreateEmpty(CGF);
+      // Emit the continuation block for code after the if.
+      CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+    }
+  };
+
+  if (IfCond) {
+    emitIfClause(CGF, IfCond, LNParallelGen, SeqGen);
+  } else {
+    CodeGenFunction::RunCleanupsScope Scope(CGF);
+    RegionCodeGenTy ThenRCG(LNParallelGen);
+    ThenRCG(CGF);
+  }
+}
+
+void CGOpenMPRuntimeGPU::emitSPMDParallelCall(
+    CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
+    ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
+  // Just call the outlined function to execute the parallel region.
+  // OutlinedFn(&GTid, &zero, CapturedStruct);
+  //
+  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  // ThreadId for serialized parallels is 0.
+  Address ThreadIDAddr = ZeroAddr;
+  auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr](
+                       CodeGenFunction &CGF, PrePostActionTy &Action) {
+    Action.Enter(CGF);
+
+    Address ZeroAddr =
+        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                         /*Name=*/".bound.zero.addr");
+    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
+    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
+    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
+    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
+    emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
+  };
+  auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
+                                        PrePostActionTy &) {
+
+    RegionCodeGenTy RCG(CodeGen);
+    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+    llvm::Value *ThreadID = getThreadID(CGF, Loc);
+    llvm::Value *Args[] = {RTLoc, ThreadID};
+
+    NVPTXActionTy Action(
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
+        Args,
+        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
+        Args);
+    RCG.setAction(Action);
+    RCG(CGF);
+  };
+
+  if (IsInTargetMasterThreadRegion) {
+    // In the worker need to use the real thread id.
+    ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
+    RegionCodeGenTy RCG(CodeGen);
+    RCG(CGF);
+  } else {
+    // If we are not in the target region, it is definitely L2 parallelism or
+    // more, because for SPMD mode we always has L1 parallel level, sowe don't
+    // need to check for orphaned directives.
+    RegionCodeGenTy RCG(SeqGen);
+    RCG(CGF);
+  }
+}
+
+void CGOpenMPRuntimeGPU::syncCTAThreads(CodeGenFunction &CGF) {
+  // Always emit simple barriers!
+  if (!CGF.HaveInsertPoint())
+    return;
+  // Build call __kmpc_barrier_simple_spmd(nullptr, 0);
+  // This function does not use parameters, so we can emit just default values.
+  llvm::Value *Args[] = {
+      llvm::ConstantPointerNull::get(
+          cast<llvm::PointerType>(getIdentTyPointerTy())),
+      llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
+  llvm::CallInst *Call = CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
+  Call->setConvergent();
+}
+
+void CGOpenMPRuntimeGPU::emitBarrierCall(CodeGenFunction &CGF,
+                                           SourceLocation Loc,
+                                           OpenMPDirectiveKind Kind, bool,
+                                           bool) {
+  // Always emit simple barriers!
+  if (!CGF.HaveInsertPoint())
+    return;
+  // Build call __kmpc_cancel_barrier(loc, thread_id);
+  unsigned Flags = getDefaultFlagsForBarriers(Kind);
+  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
+                         getThreadID(CGF, Loc)};
+  llvm::CallInst *Call = CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
+  Call->setConvergent();
+}
+
+void CGOpenMPRuntimeGPU::emitCriticalRegion(
+    CodeGenFunction &CGF, StringRef CriticalName,
+    const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
+    const Expr *Hint) {
+  llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop");
+  llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test");
+  llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync");
+  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
+  llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
+
+  // Get the mask of active threads in the warp.
+  llvm::Value *Mask = CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
+  // Fetch team-local id of the thread.
+  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+
+  // Get the width of the team.
+  llvm::Value *TeamWidth = getNVPTXNumThreads(CGF);
+
+  // Initialize the counter variable for the loop.
+  QualType Int32Ty =
+      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/0);
+  Address Counter = CGF.CreateMemTemp(Int32Ty, "critical_counter");
+  LValue CounterLVal = CGF.MakeAddrLValue(Counter, Int32Ty);
+  CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal,
+                        /*isInit=*/true);
+
+  // Block checks if loop counter exceeds upper bound.
+  CGF.EmitBlock(LoopBB);
+  llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
+  llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth);
+  CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
+
+  // Block tests which single thread should execute region, and which threads
+  // should go straight to synchronisation point.
+  CGF.EmitBlock(TestBB);
+  CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
+  llvm::Value *CmpThreadToCounter =
+      CGF.Builder.CreateICmpEQ(ThreadID, CounterVal);
+  CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
+
+  // Block emits the body of the critical region.
+  CGF.EmitBlock(BodyBB);
+
+  // Output the critical statement.
+  CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc,
+                                      Hint);
+
+  // After the body surrounded by the critical region, the single executing
+  // thread will jump to the synchronisation point.
+  // Block waits for all threads in current team to finish then increments the
+  // counter variable and returns to the loop.
+  CGF.EmitBlock(SyncBB);
+  // Reconverge active threads in the warp.
+  (void)CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
+
+  llvm::Value *IncCounterVal =
+      CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
+  CGF.EmitStoreOfScalar(IncCounterVal, CounterLVal);
+  CGF.EmitBranch(LoopBB);
+
+  // Block that is reached when  all threads in the team complete the region.
+  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+}
+
+/// Cast value to the specified type.
+static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
+                                    QualType ValTy, QualType CastTy,
+                                    SourceLocation Loc) {
+  assert(!CGF.getContext().getTypeSizeInChars(CastTy).isZero() &&
+         "Cast type must sized.");
+  assert(!CGF.getContext().getTypeSizeInChars(ValTy).isZero() &&
+         "Val type must sized.");
+  llvm::Type *LLVMCastTy = CGF.ConvertTypeForMem(CastTy);
+  if (ValTy == CastTy)
+    return Val;
+  if (CGF.getContext().getTypeSizeInChars(ValTy) ==
+      CGF.getContext().getTypeSizeInChars(CastTy))
+    return CGF.Builder.CreateBitCast(Val, LLVMCastTy);
+  if (CastTy->isIntegerType() && ValTy->isIntegerType())
+    return CGF.Builder.CreateIntCast(Val, LLVMCastTy,
+                                     CastTy->hasSignedIntegerRepresentation());
+  Address CastItem = CGF.CreateMemTemp(CastTy);
+  Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
+  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy);
+  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc);
+}
+
+/// This function creates calls to one of two shuffle functions to copy
+/// variables between lanes in a warp.
+static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
+                                                 llvm::Value *Elem,
+                                                 QualType ElemType,
+                                                 llvm::Value *Offset,
+                                                 SourceLocation Loc) {
+  CodeGenModule &CGM = CGF.CGM;
+  CGBuilderTy &Bld = CGF.Builder;
+  CGOpenMPRuntimeGPU &RT =
+      *(static_cast<CGOpenMPRuntimeGPU *>(&CGM.getOpenMPRuntime()));
+
+  CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
+  assert(Size.getQuantity() <= 8 &&
+         "Unsupported bitwidth in shuffle instruction.");
+
+  OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4
+                                         ? OMPRTL_NVPTX__kmpc_shuffle_int32
+                                         : OMPRTL_NVPTX__kmpc_shuffle_int64;
+
+  // Cast all types to 32- or 64-bit values before calling shuffle routines.
+  QualType CastTy = CGF.getContext().getIntTypeForBitwidth(
+      Size.getQuantity() <= 4 ? 32 : 64, /*Signed=*/1);
+  llvm::Value *ElemCast = castValueToType(CGF, Elem, ElemType, CastTy, Loc);
+  llvm::Value *WarpSize =
+      Bld.CreateIntCast(RT.getGPUWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
+
+  llvm::Value *ShuffledVal = CGF.EmitRuntimeCall(
+      RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize});
+
+  return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc);
+}
+
+static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
+                            Address DestAddr, QualType ElemType,
+                            llvm::Value *Offset, SourceLocation Loc) {
+  CGBuilderTy &Bld = CGF.Builder;
+
+  CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
+  // Create the loop over the big sized data.
+  // ptr = (void*)Elem;
+  // ptrEnd = (void*) Elem + 1;
+  // Step = 8;
+  // while (ptr + Step < ptrEnd)
+  //   shuffle((int64_t)*ptr);
+  // Step = 4;
+  // while (ptr + Step < ptrEnd)
+  //   shuffle((int32_t)*ptr);
+  // ...
+  Address ElemPtr = DestAddr;
+  Address Ptr = SrcAddr;
+  Address PtrEnd = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      Bld.CreateConstGEP(SrcAddr, 1), CGF.VoidPtrTy);
+  for (int IntSize = 8; IntSize >= 1; IntSize /= 2) {
+    if (Size < CharUnits::fromQuantity(IntSize))
+      continue;
+    QualType IntType = CGF.getContext().getIntTypeForBitwidth(
+        CGF.getContext().toBits(CharUnits::fromQuantity(IntSize)),
+        /*Signed=*/1);
+    llvm::Type *IntTy = CGF.ConvertTypeForMem(IntType);
+    Ptr = Bld.CreatePointerBitCastOrAddrSpaceCast(Ptr, IntTy->getPointerTo());
+    ElemPtr =
+        Bld.CreatePointerBitCastOrAddrSpaceCast(ElemPtr, IntTy->getPointerTo());
+    if (Size.getQuantity() / IntSize > 1) {
+      llvm::BasicBlock *PreCondBB = CGF.createBasicBlock(".shuffle.pre_cond");
+      llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".shuffle.then");
+      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".shuffle.exit");
+      llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
+      CGF.EmitBlock(PreCondBB);
+      llvm::PHINode *PhiSrc =
+          Bld.CreatePHI(Ptr.getType(), /*NumReservedValues=*/2);
+      PhiSrc->addIncoming(Ptr.getPointer(), CurrentBB);
+      llvm::PHINode *PhiDest =
+          Bld.CreatePHI(ElemPtr.getType(), /*NumReservedValues=*/2);
+      PhiDest->addIncoming(ElemPtr.getPointer(), CurrentBB);
+      Ptr = Address(PhiSrc, Ptr.getAlignment());
+      ElemPtr = Address(PhiDest, ElemPtr.getAlignment());
+      llvm::Value *PtrDiff = Bld.CreatePtrDiff(
+          PtrEnd.getPointer(), Bld.CreatePointerBitCastOrAddrSpaceCast(
+                                   Ptr.getPointer(), CGF.VoidPtrTy));
+      Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
+                       ThenBB, ExitBB);
+      CGF.EmitBlock(ThenBB);
+      llvm::Value *Res = createRuntimeShuffleFunction(
+          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+          IntType, Offset, Loc);
+      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+      Address LocalPtr = Bld.CreateConstGEP(Ptr, 1);
+      Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
+      PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB);
+      PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB);
+      CGF.EmitBranch(PreCondBB);
+      CGF.EmitBlock(ExitBB);
+    } else {
+      llvm::Value *Res = createRuntimeShuffleFunction(
+          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
+          IntType, Offset, Loc);
+      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
+      Ptr = Bld.CreateConstGEP(Ptr, 1);
+      ElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
+    }
+    Size = Size % IntSize;
+  }
+}
+
+namespace {
+enum CopyAction : unsigned {
+  // RemoteLaneToThread: Copy over a Reduce list from a remote lane in
+  // the warp using shuffle instructions.
+  RemoteLaneToThread,
+  // ThreadCopy: Make a copy of a Reduce list on the thread's stack.
+  ThreadCopy,
+  // ThreadToScratchpad: Copy a team-reduced array to the scratchpad.
+  ThreadToScratchpad,
+  // ScratchpadToThread: Copy from a scratchpad array in global memory
+  // containing team-reduced data to a thread's stack.
+  ScratchpadToThread,
+};
+} // namespace
+
+struct CopyOptionsTy {
+  llvm::Value *RemoteLaneOffset;
+  llvm::Value *ScratchpadIndex;
+  llvm::Value *ScratchpadWidth;
+};
+
+/// Emit instructions to copy a Reduce list, which contains partially
+/// aggregated values, in the specified direction.
+static void emitReductionListCopy(
+    CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy,
+    ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase,
+    CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) {
+
+  CodeGenModule &CGM = CGF.CGM;
+  ASTContext &C = CGM.getContext();
+  CGBuilderTy &Bld = CGF.Builder;
+
+  llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
+  llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
+  llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
+
+  // Iterates, element-by-element, through the source Reduce list and
+  // make a copy.
+  unsigned Idx = 0;
+  unsigned Size = Privates.size();
+  for (const Expr *Private : Privates) {
+    Address SrcElementAddr = Address::invalid();
+    Address DestElementAddr = Address::invalid();
+    Address DestElementPtrAddr = Address::invalid();
+    // Should we shuffle in an element from a remote lane?
+    bool ShuffleInElement = false;
+    // Set to true to update the pointer in the dest Reduce list to a
+    // newly created element.
+    bool UpdateDestListPtr = false;
+    // Increment the src or dest pointer to the scratchpad, for each
+    // new element.
+    bool IncrScratchpadSrc = false;
+    bool IncrScratchpadDest = false;
+
+    switch (Action) {
+    case RemoteLaneToThread: {
+      // Step 1.1: Get the address for the src element in the Reduce list.
+      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
+      SrcElementAddr = CGF.EmitLoadOfPointer(
+          SrcElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs<PointerType>());
+
+      // Step 1.2: Create a temporary to store the element in the destination
+      // Reduce list.
+      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
+      DestElementAddr =
+          CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
+      ShuffleInElement = true;
+      UpdateDestListPtr = true;
+      break;
+    }
+    case ThreadCopy: {
+      // Step 1.1: Get the address for the src element in the Reduce list.
+      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
+      SrcElementAddr = CGF.EmitLoadOfPointer(
+          SrcElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs<PointerType>());
+
+      // Step 1.2: Get the address for dest element.  The destination
+      // element has already been created on the thread's stack.
+      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
+      DestElementAddr = CGF.EmitLoadOfPointer(
+          DestElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs<PointerType>());
+      break;
+    }
+    case ThreadToScratchpad: {
+      // Step 1.1: Get the address for the src element in the Reduce list.
+      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
+      SrcElementAddr = CGF.EmitLoadOfPointer(
+          SrcElementPtrAddr,
+          C.getPointerType(Private->getType())->castAs<PointerType>());
+
+      // Step 1.2: Get the address for dest element:
+      // address = base + index * ElementSizeInChars.
+      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+      llvm::Value *CurrentOffset =
+          Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
+      llvm::Value *ScratchPadElemAbsolutePtrVal =
+          Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset);
+      ScratchPadElemAbsolutePtrVal =
+          Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
+      DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+                                C.getTypeAlignInChars(Private->getType()));
+      IncrScratchpadDest = true;
+      break;
+    }
+    case ScratchpadToThread: {
+      // Step 1.1: Get the address for the src element in the scratchpad.
+      // address = base + index * ElementSizeInChars.
+      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+      llvm::Value *CurrentOffset =
+          Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
+      llvm::Value *ScratchPadElemAbsolutePtrVal =
+          Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset);
+      ScratchPadElemAbsolutePtrVal =
+          Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
+      SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal,
+                               C.getTypeAlignInChars(Private->getType()));
+      IncrScratchpadSrc = true;
+
+      // Step 1.2: Create a temporary to store the element in the destination
+      // Reduce list.
+      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
+      DestElementAddr =
+          CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
+      UpdateDestListPtr = true;
+      break;
+    }
+    }
+
+    // Regardless of src and dest of copy, we emit the load of src
+    // element as this is required in all directions
+    SrcElementAddr = Bld.CreateElementBitCast(
+        SrcElementAddr, CGF.ConvertTypeForMem(Private->getType()));
+    DestElementAddr = Bld.CreateElementBitCast(DestElementAddr,
+                                               SrcElementAddr.getElementType());
+
+    // Now that all active lanes have read the element in the
+    // Reduce list, shuffle over the value from the remote lane.
+    if (ShuffleInElement) {
+      shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(),
+                      RemoteLaneOffset, Private->getExprLoc());
+    } else {
+      switch (CGF.getEvaluationKind(Private->getType())) {
+      case TEK_Scalar: {
+        llvm::Value *Elem =
+            CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
+                                 Private->getType(), Private->getExprLoc());
+        // Store the source element value to the dest element address.
+        CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
+                              Private->getType());
+        break;
+      }
+      case TEK_Complex: {
+        CodeGenFunction::ComplexPairTy Elem = CGF.EmitLoadOfComplex(
+            CGF.MakeAddrLValue(SrcElementAddr, Private->getType()),
+            Private->getExprLoc());
+        CGF.EmitStoreOfComplex(
+            Elem, CGF.MakeAddrLValue(DestElementAddr, Private->getType()),
+            /*isInit=*/false);
+        break;
+      }
+      case TEK_Aggregate:
+        CGF.EmitAggregateCopy(
+            CGF.MakeAddrLValue(DestElementAddr, Private->getType()),
+            CGF.MakeAddrLValue(SrcElementAddr, Private->getType()),
+            Private->getType(), AggValueSlot::DoesNotOverlap);
+        break;
+      }
+    }
+
+    // Step 3.1: Modify reference in dest Reduce list as needed.
+    // Modifying the reference in Reduce list to point to the newly
+    // created element.  The element is live in the current function
+    // scope and that of functions it invokes (i.e., reduce_function).
+    // RemoteReduceData[i] = (void*)&RemoteElem
+    if (UpdateDestListPtr) {
+      CGF.EmitStoreOfScalar(Bld.CreatePointerBitCastOrAddrSpaceCast(
+                                DestElementAddr.getPointer(), CGF.VoidPtrTy),
+                            DestElementPtrAddr, /*Volatile=*/false,
+                            C.VoidPtrTy);
+    }
+
+    // Step 4.1: Increment SrcBase/DestBase so that it points to the starting
+    // address of the next element in scratchpad memory, unless we're currently
+    // processing the last one.  Memory alignment is also taken care of here.
+    if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
+      llvm::Value *ScratchpadBasePtr =
+          IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
+      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
+      ScratchpadBasePtr = Bld.CreateNUWAdd(
+          ScratchpadBasePtr,
+          Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
+
+      // Take care of global memory alignment for performance
+      ScratchpadBasePtr = Bld.CreateNUWSub(
+          ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+      ScratchpadBasePtr = Bld.CreateUDiv(
+          ScratchpadBasePtr,
+          llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
+      ScratchpadBasePtr = Bld.CreateNUWAdd(
+          ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
+      ScratchpadBasePtr = Bld.CreateNUWMul(
+          ScratchpadBasePtr,
+          llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
+
+      if (IncrScratchpadDest)
+        DestBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
+      else /* IncrScratchpadSrc = true */
+        SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
+    }
+
+    ++Idx;
+  }
+}
+
+/// This function emits a helper that gathers Reduce lists from the first
+/// lane of every active warp to lanes in the first warp.
+///
+/// void inter_warp_copy_func(void* reduce_data, num_warps)
+///   shared smem[warp_size];
+///   For all data entries D in reduce_data:
+///     sync
+///     If (I am the first lane in each warp)
+///       Copy my local D to smem[warp_id]
+///     sync
+///     if (I am the first warp)
+///       Copy smem[thread_id] to my local D
+static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
+                                              ArrayRef<const Expr *> Privates,
+                                              QualType ReductionArrayTy,
+                                              SourceLocation Loc) {
+  ASTContext &C = CGM.getContext();
+  llvm::Module &M = CGM.getModule();
+
+  // ReduceList: thread local Reduce list.
+  // At the stage of the computation when this function is called, partially
+  // aggregated values reside in the first lane of every active warp.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  // NumWarps: number of warps active in the parallel region.  This could
+  // be smaller than 32 (max warps in a CTA) for partial block reduction.
+  ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                C.getIntTypeForBitwidth(32, /* Signed */ true),
+                                ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&ReduceListArg);
+  Args.push_back(&NumWarpsArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
+                                    llvm::GlobalValue::InternalLinkage,
+                                    "_omp_reduction_inter_warp_copy_func", &M);
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  // This array is used as a medium to transfer, one reduce element at a time,
+  // the data from the first lane of every warp to lanes in the first warp
+  // in order to perform the final step of a reduction in a parallel region
+  // (reduction across warps).  The array is placed in NVPTX __shared__ memory
+  // for reduced latency, as well as to have a distinct copy for concurrently
+  // executing target regions.  The array is declared with common linkage so
+  // as to be shared across compilation units.
+  StringRef TransferMediumName =
+      "__openmp_nvptx_data_transfer_temporary_storage";
+  llvm::GlobalVariable *TransferMedium =
+      M.getGlobalVariable(TransferMediumName);
+  if (!TransferMedium) {
+    auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
+    unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
+    TransferMedium = new llvm::GlobalVariable(
+        M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
+        llvm::Constant::getNullValue(Ty), TransferMediumName,
+        /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
+        SharedAddressSpace);
+    CGM.addCompilerUsedGlobal(TransferMedium);
+  }
+
+  // Get the CUDA thread id of the current OpenMP thread on the GPU.
+  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
+  // nvptx_lane_id = nvptx_id % warpsize
+  llvm::Value *LaneID = getNVPTXLaneID(CGF);
+  // nvptx_warp_id = nvptx_id / warpsize
+  llvm::Value *WarpID = getNVPTXWarpID(CGF);
+
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  Address LocalReduceList(
+      Bld.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+                               C.VoidPtrTy, Loc),
+          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+      CGF.getPointerAlign());
+
+  unsigned Idx = 0;
+  for (const Expr *Private : Privates) {
+    //
+    // Warp master copies reduce element to transfer medium in __shared__
+    // memory.
+    //
+    unsigned RealTySize =
+        C.getTypeSizeInChars(Private->getType())
+            .alignTo(C.getTypeAlignInChars(Private->getType()))
+            .getQuantity();
+    for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
+      unsigned NumIters = RealTySize / TySize;
+      if (NumIters == 0)
+        continue;
+      QualType CType = C.getIntTypeForBitwidth(
+          C.toBits(CharUnits::fromQuantity(TySize)), /*Signed=*/1);
+      llvm::Type *CopyType = CGF.ConvertTypeForMem(CType);
+      CharUnits Align = CharUnits::fromQuantity(TySize);
+      llvm::Value *Cnt = nullptr;
+      Address CntAddr = Address::invalid();
+      llvm::BasicBlock *PrecondBB = nullptr;
+      llvm::BasicBlock *ExitBB = nullptr;
+      if (NumIters > 1) {
+        CntAddr = CGF.CreateMemTemp(C.IntTy, ".cnt.addr");
+        CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.IntTy), CntAddr,
+                              /*Volatile=*/false, C.IntTy);
+        PrecondBB = CGF.createBasicBlock("precond");
+        ExitBB = CGF.createBasicBlock("exit");
+        llvm::BasicBlock *BodyBB = CGF.createBasicBlock("body");
+        // There is no need to emit line number for unconditional branch.
+        (void)ApplyDebugLocation::CreateEmpty(CGF);
+        CGF.EmitBlock(PrecondBB);
+        Cnt = CGF.EmitLoadOfScalar(CntAddr, /*Volatile=*/false, C.IntTy, Loc);
+        llvm::Value *Cmp =
+            Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.IntTy, NumIters));
+        Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
+        CGF.EmitBlock(BodyBB);
+      }
+      // kmpc_barrier.
+      CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
+                                             /*EmitChecks=*/false,
+                                             /*ForceSimpleCall=*/true);
+      llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+      llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+      llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+
+      // if (lane_id == 0)
+      llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master");
+      Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
+      CGF.EmitBlock(ThenBB);
+
+      // Reduce element = LocalReduceList[i]
+      Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
+      llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
+          ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+      // elemptr = ((CopyType*)(elemptrptr)) + I
+      Address ElemPtr = Address(ElemPtrPtr, Align);
+      ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType);
+      if (NumIters > 1) {
+        ElemPtr = Address(Bld.CreateGEP(ElemPtr.getPointer(), Cnt),
+                          ElemPtr.getAlignment());
+      }
+
+      // Get pointer to location in transfer medium.
+      // MediumPtr = &medium[warp_id]
+      llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
+          TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
+      Address MediumPtr(MediumPtrVal, Align);
+      // Casting to actual data type.
+      // MediumPtr = (CopyType*)MediumPtrAddr;
+      MediumPtr = Bld.CreateElementBitCast(MediumPtr, CopyType);
+
+      // elem = *elemptr
+      //*MediumPtr = elem
+      llvm::Value *Elem =
+          CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc);
+      // Store the source element value to the dest element address.
+      CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType);
+
+      Bld.CreateBr(MergeBB);
+
+      CGF.EmitBlock(ElseBB);
+      Bld.CreateBr(MergeBB);
+
+      CGF.EmitBlock(MergeBB);
+
+      // kmpc_barrier.
+      CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
+                                             /*EmitChecks=*/false,
+                                             /*ForceSimpleCall=*/true);
+
+      //
+      // Warp 0 copies reduce element from transfer medium.
+      //
+      llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then");
+      llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
+      llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
+
+      Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
+      llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
+          AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, Loc);
+
+      // Up to 32 threads in warp 0 are active.
+      llvm::Value *IsActiveThread =
+          Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
+      Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
+
+      CGF.EmitBlock(W0ThenBB);
+
+      // SrcMediumPtr = &medium[tid]
+      llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
+          TransferMedium,
+          {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
+      Address SrcMediumPtr(SrcMediumPtrVal, Align);
+      // SrcMediumVal = *SrcMediumPtr;
+      SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType);
+
+      // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I
+      Address TargetElemPtrPtr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
+      llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar(
+          TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc);
+      Address TargetElemPtr = Address(TargetElemPtrVal, Align);
+      TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType);
+      if (NumIters > 1) {
+        TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getPointer(), Cnt),
+                                TargetElemPtr.getAlignment());
+      }
+
+      // *TargetElemPtr = SrcMediumVal;
+      llvm::Value *SrcMediumValue =
+          CGF.EmitLoadOfScalar(SrcMediumPtr, /*Volatile=*/true, CType, Loc);
+      CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false,
+                            CType);
+      Bld.CreateBr(W0MergeBB);
+
+      CGF.EmitBlock(W0ElseBB);
+      Bld.CreateBr(W0MergeBB);
+
+      CGF.EmitBlock(W0MergeBB);
+
+      if (NumIters > 1) {
+        Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, /*V=*/1));
+        CGF.EmitStoreOfScalar(Cnt, CntAddr, /*Volatile=*/false, C.IntTy);
+        CGF.EmitBranch(PrecondBB);
+        (void)ApplyDebugLocation::CreateEmpty(CGF);
+        CGF.EmitBlock(ExitBB);
+      }
+      RealTySize %= TySize;
+    }
+    ++Idx;
+  }
+
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// Emit a helper that reduces data across two OpenMP threads (lanes)
+/// in the same warp.  It uses shuffle instructions to copy over data from
+/// a remote lane's stack.  The reduction algorithm performed is specified
+/// by the fourth parameter.
+///
+/// Algorithm Versions.
+/// Full Warp Reduce (argument value 0):
+///   This algorithm assumes that all 32 lanes are active and gathers
+///   data from these 32 lanes, producing a single resultant value.
+/// Contiguous Partial Warp Reduce (argument value 1):
+///   This algorithm assumes that only a *contiguous* subset of lanes
+///   are active.  This happens for the last warp in a parallel region
+///   when the user specified num_threads is not an integer multiple of
+///   32.  This contiguous subset always starts with the zeroth lane.
+/// Partial Warp Reduce (argument value 2):
+///   This algorithm gathers data from any number of lanes at any position.
+/// All reduced values are stored in the lowest possible lane.  The set
+/// of problems every algorithm addresses is a super set of those
+/// addressable by algorithms with a lower version number.  Overhead
+/// increases as algorithm version increases.
+///
+/// Terminology
+/// Reduce element:
+///   Reduce element refers to the individual data field with primitive
+///   data types to be combined and reduced across threads.
+/// Reduce list:
+///   Reduce list refers to a collection of local, thread-private
+///   reduce elements.
+/// Remote Reduce list:
+///   Remote Reduce list refers to a collection of remote (relative to
+///   the current thread) reduce elements.
+///
+/// We distinguish between three states of threads that are important to
+/// the implementation of this function.
+/// Alive threads:
+///   Threads in a warp executing the SIMT instruction, as distinguished from
+///   threads that are inactive due to divergent control flow.
+/// Active threads:
+///   The minimal set of threads that has to be alive upon entry to this
+///   function.  The computation is correct iff active threads are alive.
+///   Some threads are alive but they are not active because they do not
+///   contribute to the computation in any useful manner.  Turning them off
+///   may introduce control flow overheads without any tangible benefits.
+/// Effective threads:
+///   In order to comply with the argument requirements of the shuffle
+///   function, we must keep all lanes holding data alive.  But at most
+///   half of them perform value aggregation; we refer to this half of
+///   threads as effective. The other half is simply handing off their
+///   data.
+///
+/// Procedure
+/// Value shuffle:
+///   In this step active threads transfer data from higher lane positions
+///   in the warp to lower lane positions, creating Remote Reduce list.
+/// Value aggregation:
+///   In this step, effective threads combine their thread local Reduce list
+///   with Remote Reduce list and store the result in the thread local
+///   Reduce list.
+/// Value copy:
+///   In this step, we deal with the assumption made by algorithm 2
+///   (i.e. contiguity assumption).  When we have an odd number of lanes
+///   active, say 2k+1, only k threads will be effective and therefore k
+///   new values will be produced.  However, the Reduce list owned by the
+///   (2k+1)th thread is ignored in the value aggregation.  Therefore
+///   we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
+///   that the contiguity assumption still holds.
+static llvm::Function *emitShuffleAndReduceFunction(
+    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+    QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc) {
+  ASTContext &C = CGM.getContext();
+
+  // Thread local Reduce list used to host the values of data to be reduced.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  // Current lane id; could be logical.
+  ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.ShortTy,
+                              ImplicitParamDecl::Other);
+  // Offset of the remote source lane relative to the current lane.
+  ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                        C.ShortTy, ImplicitParamDecl::Other);
+  // Algorithm version.  This is expected to be known at compile time.
+  ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                               C.ShortTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&ReduceListArg);
+  Args.push_back(&LaneIDArg);
+  Args.push_back(&RemoteLaneOffsetArg);
+  Args.push_back(&AlgoVerArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  if (CGM.getLangOpts().Optimize) {
+    Fn->removeFnAttr(llvm::Attribute::NoInline);
+    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
+    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
+  }
+
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  Address LocalReduceList(
+      Bld.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+                               C.VoidPtrTy, SourceLocation()),
+          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+      CGF.getPointerAlign());
+
+  Address AddrLaneIDArg = CGF.GetAddrOfLocalVar(&LaneIDArg);
+  llvm::Value *LaneIDArgVal = CGF.EmitLoadOfScalar(
+      AddrLaneIDArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+  Address AddrRemoteLaneOffsetArg = CGF.GetAddrOfLocalVar(&RemoteLaneOffsetArg);
+  llvm::Value *RemoteLaneOffsetArgVal = CGF.EmitLoadOfScalar(
+      AddrRemoteLaneOffsetArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+  Address AddrAlgoVerArg = CGF.GetAddrOfLocalVar(&AlgoVerArg);
+  llvm::Value *AlgoVerArgVal = CGF.EmitLoadOfScalar(
+      AddrAlgoVerArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
+
+  // Create a local thread-private variable to host the Reduce list
+  // from a remote lane.
+  Address RemoteReduceList =
+      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_reduce_list");
+
+  // This loop iterates through the list of reduce elements and copies,
+  // element by element, from a remote lane in the warp to RemoteReduceList,
+  // hosted on the thread's stack.
+  emitReductionListCopy(RemoteLaneToThread, CGF, ReductionArrayTy, Privates,
+                        LocalReduceList, RemoteReduceList,
+                        {/*RemoteLaneOffset=*/RemoteLaneOffsetArgVal,
+                         /*ScratchpadIndex=*/nullptr,
+                         /*ScratchpadWidth=*/nullptr});
+
+  // The actions to be performed on the Remote Reduce list is dependent
+  // on the algorithm version.
+  //
+  //  if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 &&
+  //  LaneId % 2 == 0 && Offset > 0):
+  //    do the reduction value aggregation
+  //
+  //  The thread local variable Reduce list is mutated in place to host the
+  //  reduced data, which is the aggregated value produced from local and
+  //  remote lanes.
+  //
+  //  Note that AlgoVer is expected to be a constant integer known at compile
+  //  time.
+  //  When AlgoVer==0, the first conjunction evaluates to true, making
+  //    the entire predicate true during compile time.
+  //  When AlgoVer==1, the second conjunction has only the second part to be
+  //    evaluated during runtime.  Other conjunctions evaluates to false
+  //    during compile time.
+  //  When AlgoVer==2, the third conjunction has only the second part to be
+  //    evaluated during runtime.  Other conjunctions evaluates to false
+  //    during compile time.
+  llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
+
+  llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
+  llvm::Value *CondAlgo1 = Bld.CreateAnd(
+      Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
+
+  llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
+  llvm::Value *CondAlgo2 = Bld.CreateAnd(
+      Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
+  CondAlgo2 = Bld.CreateAnd(
+      CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
+
+  llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
+  CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
+
+  llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
+  llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
+  llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
+  Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
+
+  CGF.EmitBlock(ThenBB);
+  // reduce_function(LocalReduceList, RemoteReduceList)
+  llvm::Value *LocalReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      LocalReduceList.getPointer(), CGF.VoidPtrTy);
+  llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      RemoteReduceList.getPointer(), CGF.VoidPtrTy);
+  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+      CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
+  Bld.CreateBr(MergeBB);
+
+  CGF.EmitBlock(ElseBB);
+  Bld.CreateBr(MergeBB);
+
+  CGF.EmitBlock(MergeBB);
+
+  // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local
+  // Reduce list.
+  Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
+  llvm::Value *CondCopy = Bld.CreateAnd(
+      Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
+
+  llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then");
+  llvm::BasicBlock *CpyElseBB = CGF.createBasicBlock("else");
+  llvm::BasicBlock *CpyMergeBB = CGF.createBasicBlock("ifcont");
+  Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
+
+  CGF.EmitBlock(CpyThenBB);
+  emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates,
+                        RemoteReduceList, LocalReduceList);
+  Bld.CreateBr(CpyMergeBB);
+
+  CGF.EmitBlock(CpyElseBB);
+  Bld.CreateBr(CpyMergeBB);
+
+  CGF.EmitBlock(CpyMergeBB);
+
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// This function emits a helper that copies all the reduction variables from
+/// the team into the provided global buffer for the reduction variables.
+///
+/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
+///   For all data entries D in reduce_data:
+///     Copy local D to buffer.D[Idx]
+static llvm::Value *emitListToGlobalCopyFunction(
+    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+    QualType ReductionArrayTy, SourceLocation Loc,
+    const RecordDecl *TeamReductionRec,
+    const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+        &VarFieldMap) {
+  ASTContext &C = CGM.getContext();
+
+  // Buffer: global reduction buffer.
+  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                              C.VoidPtrTy, ImplicitParamDecl::Other);
+  // Idx: index of the buffer.
+  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+                           ImplicitParamDecl::Other);
+  // ReduceList: thread local Reduce list.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&BufferArg);
+  Args.push_back(&IdxArg);
+  Args.push_back(&ReduceListArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      "_omp_reduction_list_to_global_copy_func", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
+  Address LocalReduceList(
+      Bld.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+                               C.VoidPtrTy, Loc),
+          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+      CGF.getPointerAlign());
+  QualType StaticTy = C.getRecordType(TeamReductionRec);
+  llvm::Type *LLVMReductionsBufferTy =
+      CGM.getTypes().ConvertTypeForMem(StaticTy);
+  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
+      LLVMReductionsBufferTy->getPointerTo());
+  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
+                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+                                              /*Volatile=*/false, C.IntTy,
+                                              Loc)};
+  unsigned Idx = 0;
+  for (const Expr *Private : Privates) {
+    // Reduce element = LocalReduceList[i]
+    Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
+    llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
+        ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+    // elemptr = ((CopyType*)(elemptrptr)) + I
+    ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+        ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo());
+    Address ElemPtr =
+        Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
+    const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
+    // Global = Buffer.VD[Idx];
+    const FieldDecl *FD = VarFieldMap.lookup(VD);
+    LValue GlobLVal = CGF.EmitLValueForField(
+        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+    llvm::Value *BufferPtr =
+        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
+    GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
+    switch (CGF.getEvaluationKind(Private->getType())) {
+    case TEK_Scalar: {
+      llvm::Value *V = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false,
+                                            Private->getType(), Loc);
+      CGF.EmitStoreOfScalar(V, GlobLVal);
+      break;
+    }
+    case TEK_Complex: {
+      CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(
+          CGF.MakeAddrLValue(ElemPtr, Private->getType()), Loc);
+      CGF.EmitStoreOfComplex(V, GlobLVal, /*isInit=*/false);
+      break;
+    }
+    case TEK_Aggregate:
+      CGF.EmitAggregateCopy(GlobLVal,
+                            CGF.MakeAddrLValue(ElemPtr, Private->getType()),
+                            Private->getType(), AggValueSlot::DoesNotOverlap);
+      break;
+    }
+    ++Idx;
+  }
+
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// This function emits a helper that reduces all the reduction variables from
+/// the team into the provided global buffer for the reduction variables.
+///
+/// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data)
+///  void *GlobPtrs[];
+///  GlobPtrs[0] = (void*)&buffer.D0[Idx];
+///  ...
+///  GlobPtrs[N] = (void*)&buffer.DN[Idx];
+///  reduce_function(GlobPtrs, reduce_data);
+static llvm::Value *emitListToGlobalReduceFunction(
+    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+    QualType ReductionArrayTy, SourceLocation Loc,
+    const RecordDecl *TeamReductionRec,
+    const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+        &VarFieldMap,
+    llvm::Function *ReduceFn) {
+  ASTContext &C = CGM.getContext();
+
+  // Buffer: global reduction buffer.
+  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                              C.VoidPtrTy, ImplicitParamDecl::Other);
+  // Idx: index of the buffer.
+  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+                           ImplicitParamDecl::Other);
+  // ReduceList: thread local Reduce list.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&BufferArg);
+  Args.push_back(&IdxArg);
+  Args.push_back(&ReduceListArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      "_omp_reduction_list_to_global_reduce_func", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
+  QualType StaticTy = C.getRecordType(TeamReductionRec);
+  llvm::Type *LLVMReductionsBufferTy =
+      CGM.getTypes().ConvertTypeForMem(StaticTy);
+  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
+      LLVMReductionsBufferTy->getPointerTo());
+
+  // 1. Build a list of reduction variables.
+  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
+  Address ReductionList =
+      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+  auto IPriv = Privates.begin();
+  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
+                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+                                              /*Volatile=*/false, C.IntTy,
+                                              Loc)};
+  unsigned Idx = 0;
+  for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
+    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+    // Global = Buffer.VD[Idx];
+    const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
+    const FieldDecl *FD = VarFieldMap.lookup(VD);
+    LValue GlobLVal = CGF.EmitLValueForField(
+        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+    llvm::Value *BufferPtr =
+        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
+    llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr);
+    CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy);
+    if ((*IPriv)->getType()->isVariablyModifiedType()) {
+      // Store array size.
+      ++Idx;
+      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+      llvm::Value *Size = CGF.Builder.CreateIntCast(
+          CGF.getVLASize(
+                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+              .NumElts,
+          CGF.SizeTy, /*isSigned=*/false);
+      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+                              Elem);
+    }
+  }
+
+  // Call reduce_function(GlobalReduceList, ReduceList)
+  llvm::Value *GlobalReduceList =
+      CGF.EmitCastToVoidPtr(ReductionList.getPointer());
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar(
+      AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
+  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+      CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr});
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// This function emits a helper that copies all the reduction variables from
+/// the team into the provided global buffer for the reduction variables.
+///
+/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
+///   For all data entries D in reduce_data:
+///     Copy buffer.D[Idx] to local D;
+static llvm::Value *emitGlobalToListCopyFunction(
+    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+    QualType ReductionArrayTy, SourceLocation Loc,
+    const RecordDecl *TeamReductionRec,
+    const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+        &VarFieldMap) {
+  ASTContext &C = CGM.getContext();
+
+  // Buffer: global reduction buffer.
+  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                              C.VoidPtrTy, ImplicitParamDecl::Other);
+  // Idx: index of the buffer.
+  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+                           ImplicitParamDecl::Other);
+  // ReduceList: thread local Reduce list.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&BufferArg);
+  Args.push_back(&IdxArg);
+  Args.push_back(&ReduceListArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      "_omp_reduction_global_to_list_copy_func", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
+  Address LocalReduceList(
+      Bld.CreatePointerBitCastOrAddrSpaceCast(
+          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
+                               C.VoidPtrTy, Loc),
+          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
+      CGF.getPointerAlign());
+  QualType StaticTy = C.getRecordType(TeamReductionRec);
+  llvm::Type *LLVMReductionsBufferTy =
+      CGM.getTypes().ConvertTypeForMem(StaticTy);
+  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
+      LLVMReductionsBufferTy->getPointerTo());
+
+  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
+                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+                                              /*Volatile=*/false, C.IntTy,
+                                              Loc)};
+  unsigned Idx = 0;
+  for (const Expr *Private : Privates) {
+    // Reduce element = LocalReduceList[i]
+    Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
+    llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
+        ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
+    // elemptr = ((CopyType*)(elemptrptr)) + I
+    ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+        ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo());
+    Address ElemPtr =
+        Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
+    const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
+    // Global = Buffer.VD[Idx];
+    const FieldDecl *FD = VarFieldMap.lookup(VD);
+    LValue GlobLVal = CGF.EmitLValueForField(
+        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+    llvm::Value *BufferPtr =
+        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
+    GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
+    switch (CGF.getEvaluationKind(Private->getType())) {
+    case TEK_Scalar: {
+      llvm::Value *V = CGF.EmitLoadOfScalar(GlobLVal, Loc);
+      CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType());
+      break;
+    }
+    case TEK_Complex: {
+      CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(GlobLVal, Loc);
+      CGF.EmitStoreOfComplex(V, CGF.MakeAddrLValue(ElemPtr, Private->getType()),
+                             /*isInit=*/false);
+      break;
+    }
+    case TEK_Aggregate:
+      CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()),
+                            GlobLVal, Private->getType(),
+                            AggValueSlot::DoesNotOverlap);
+      break;
+    }
+    ++Idx;
+  }
+
+  CGF.FinishFunction();
+  return Fn;
+}
+
+/// This function emits a helper that reduces all the reduction variables from
+/// the team into the provided global buffer for the reduction variables.
+///
+/// void global_to_list_reduce_func(void *buffer, int Idx, void *reduce_data)
+///  void *GlobPtrs[];
+///  GlobPtrs[0] = (void*)&buffer.D0[Idx];
+///  ...
+///  GlobPtrs[N] = (void*)&buffer.DN[Idx];
+///  reduce_function(reduce_data, GlobPtrs);
+static llvm::Value *emitGlobalToListReduceFunction(
+    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
+    QualType ReductionArrayTy, SourceLocation Loc,
+    const RecordDecl *TeamReductionRec,
+    const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
+        &VarFieldMap,
+    llvm::Function *ReduceFn) {
+  ASTContext &C = CGM.getContext();
+
+  // Buffer: global reduction buffer.
+  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                              C.VoidPtrTy, ImplicitParamDecl::Other);
+  // Idx: index of the buffer.
+  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
+                           ImplicitParamDecl::Other);
+  // ReduceList: thread local Reduce list.
+  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
+                                  C.VoidPtrTy, ImplicitParamDecl::Other);
+  FunctionArgList Args;
+  Args.push_back(&BufferArg);
+  Args.push_back(&IdxArg);
+  Args.push_back(&ReduceListArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      "_omp_reduction_global_to_list_reduce_func", &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setDoesNotRecurse();
+  CodeGenFunction CGF(CGM);
+  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
+
+  CGBuilderTy &Bld = CGF.Builder;
+
+  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
+  QualType StaticTy = C.getRecordType(TeamReductionRec);
+  llvm::Type *LLVMReductionsBufferTy =
+      CGM.getTypes().ConvertTypeForMem(StaticTy);
+  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
+      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
+      LLVMReductionsBufferTy->getPointerTo());
+
+  // 1. Build a list of reduction variables.
+  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
+  Address ReductionList =
+      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+  auto IPriv = Privates.begin();
+  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
+                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
+                                              /*Volatile=*/false, C.IntTy,
+                                              Loc)};
+  unsigned Idx = 0;
+  for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
+    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+    // Global = Buffer.VD[Idx];
+    const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
+    const FieldDecl *FD = VarFieldMap.lookup(VD);
+    LValue GlobLVal = CGF.EmitLValueForField(
+        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
+    llvm::Value *BufferPtr =
+        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
+    llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr);
+    CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy);
+    if ((*IPriv)->getType()->isVariablyModifiedType()) {
+      // Store array size.
+      ++Idx;
+      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+      llvm::Value *Size = CGF.Builder.CreateIntCast(
+          CGF.getVLASize(
+                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+              .NumElts,
+          CGF.SizeTy, /*isSigned=*/false);
+      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+                              Elem);
+    }
+  }
+
+  // Call reduce_function(ReduceList, GlobalReduceList)
+  llvm::Value *GlobalReduceList =
+      CGF.EmitCastToVoidPtr(ReductionList.getPointer());
+  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
+  llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar(
+      AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
+  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
+      CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList});
+  CGF.FinishFunction();
+  return Fn;
+}
+
+///
+/// Design of OpenMP reductions on the GPU
+///
+/// Consider a typical OpenMP program with one or more reduction
+/// clauses:
+///
+/// float foo;
+/// double bar;
+/// #pragma omp target teams distribute parallel for \
+///             reduction(+:foo) reduction(*:bar)
+/// for (int i = 0; i < N; i++) {
+///   foo += A[i]; bar *= B[i];
+/// }
+///
+/// where 'foo' and 'bar' are reduced across all OpenMP threads in
+/// all teams.  In our OpenMP implementation on the NVPTX device an
+/// OpenMP team is mapped to a CUDA threadblock and OpenMP threads
+/// within a team are mapped to CUDA threads within a threadblock.
+/// Our goal is to efficiently aggregate values across all OpenMP
+/// threads such that:
+///
+///   - the compiler and runtime are logically concise, and
+///   - the reduction is performed efficiently in a hierarchical
+///     manner as follows: within OpenMP threads in the same warp,
+///     across warps in a threadblock, and finally across teams on
+///     the NVPTX device.
+///
+/// Introduction to Decoupling
+///
+/// We would like to decouple the compiler and the runtime so that the
+/// latter is ignorant of the reduction variables (number, data types)
+/// and the reduction operators.  This allows a simpler interface
+/// and implementation while still attaining good performance.
+///
+/// Pseudocode for the aforementioned OpenMP program generated by the
+/// compiler is as follows:
+///
+/// 1. Create private copies of reduction variables on each OpenMP
+///    thread: 'foo_private', 'bar_private'
+/// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned
+///    to it and writes the result in 'foo_private' and 'bar_private'
+///    respectively.
+/// 3. Call the OpenMP runtime on the GPU to reduce within a team
+///    and store the result on the team master:
+///
+///     __kmpc_nvptx_parallel_reduce_nowait_v2(...,
+///        reduceData, shuffleReduceFn, interWarpCpyFn)
+///
+///     where:
+///       struct ReduceData {
+///         double *foo;
+///         double *bar;
+///       } reduceData
+///       reduceData.foo = &foo_private
+///       reduceData.bar = &bar_private
+///
+///     'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two
+///     auxiliary functions generated by the compiler that operate on
+///     variables of type 'ReduceData'.  They aid the runtime perform
+///     algorithmic steps in a data agnostic manner.
+///
+///     'shuffleReduceFn' is a pointer to a function that reduces data
+///     of type 'ReduceData' across two OpenMP threads (lanes) in the
+///     same warp.  It takes the following arguments as input:
+///
+///     a. variable of type 'ReduceData' on the calling lane,
+///     b. its lane_id,
+///     c. an offset relative to the current lane_id to generate a
+///        remote_lane_id.  The remote lane contains the second
+///        variable of type 'ReduceData' that is to be reduced.
+///     d. an algorithm version parameter determining which reduction
+///        algorithm to use.
+///
+///     'shuffleReduceFn' retrieves data from the remote lane using
+///     efficient GPU shuffle intrinsics and reduces, using the
+///     algorithm specified by the 4th parameter, the two operands
+///     element-wise.  The result is written to the first operand.
+///
+///     Different reduction algorithms are implemented in different
+///     runtime functions, all calling 'shuffleReduceFn' to perform
+///     the essential reduction step.  Therefore, based on the 4th
+///     parameter, this function behaves slightly differently to
+///     cooperate with the runtime to ensure correctness under
+///     different circumstances.
+///
+///     'InterWarpCpyFn' is a pointer to a function that transfers
+///     reduced variables across warps.  It tunnels, through CUDA
+///     shared memory, the thread-private data of type 'ReduceData'
+///     from lane 0 of each warp to a lane in the first warp.
+/// 4. Call the OpenMP runtime on the GPU to reduce across teams.
+///    The last team writes the global reduced value to memory.
+///
+///     ret = __kmpc_nvptx_teams_reduce_nowait(...,
+///             reduceData, shuffleReduceFn, interWarpCpyFn,
+///             scratchpadCopyFn, loadAndReduceFn)
+///
+///     'scratchpadCopyFn' is a helper that stores reduced
+///     data from the team master to a scratchpad array in
+///     global memory.
+///
+///     'loadAndReduceFn' is a helper that loads data from
+///     the scratchpad array and reduces it with the input
+///     operand.
+///
+///     These compiler generated functions hide address
+///     calculation and alignment information from the runtime.
+/// 5. if ret == 1:
+///     The team master of the last team stores the reduced
+///     result to the globals in memory.
+///     foo += reduceData.foo; bar *= reduceData.bar
+///
+///
+/// Warp Reduction Algorithms
+///
+/// On the warp level, we have three algorithms implemented in the
+/// OpenMP runtime depending on the number of active lanes:
+///
+/// Full Warp Reduction
+///
+/// The reduce algorithm within a warp where all lanes are active
+/// is implemented in the runtime as follows:
+///
+/// full_warp_reduce(void *reduce_data,
+///                  kmp_ShuffleReductFctPtr ShuffleReduceFn) {
+///   for (int offset = WARPSIZE/2; offset > 0; offset /= 2)
+///     ShuffleReduceFn(reduce_data, 0, offset, 0);
+/// }
+///
+/// The algorithm completes in log(2, WARPSIZE) steps.
+///
+/// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is
+/// not used therefore we save instructions by not retrieving lane_id
+/// from the corresponding special registers.  The 4th parameter, which
+/// represents the version of the algorithm being used, is set to 0 to
+/// signify full warp reduction.
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// #reduce_elem refers to an element in the local lane's data structure
+/// #remote_elem is retrieved from a remote lane
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// reduce_elem = reduce_elem REDUCE_OP remote_elem;
+///
+/// Contiguous Partial Warp Reduction
+///
+/// This reduce algorithm is used within a warp where only the first
+/// 'n' (n <= WARPSIZE) lanes are active.  It is typically used when the
+/// number of OpenMP threads in a parallel region is not a multiple of
+/// WARPSIZE.  The algorithm is implemented in the runtime as follows:
+///
+/// void
+/// contiguous_partial_reduce(void *reduce_data,
+///                           kmp_ShuffleReductFctPtr ShuffleReduceFn,
+///                           int size, int lane_id) {
+///   int curr_size;
+///   int offset;
+///   curr_size = size;
+///   mask = curr_size/2;
+///   while (offset>0) {
+///     ShuffleReduceFn(reduce_data, lane_id, offset, 1);
+///     curr_size = (curr_size+1)/2;
+///     offset = curr_size/2;
+///   }
+/// }
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// if (lane_id < offset)
+///     reduce_elem = reduce_elem REDUCE_OP remote_elem
+/// else
+///     reduce_elem = remote_elem
+///
+/// This algorithm assumes that the data to be reduced are located in a
+/// contiguous subset of lanes starting from the first.  When there is
+/// an odd number of active lanes, the data in the last lane is not
+/// aggregated with any other lane's dat but is instead copied over.
+///
+/// Dispersed Partial Warp Reduction
+///
+/// This algorithm is used within a warp when any discontiguous subset of
+/// lanes are active.  It is used to implement the reduction operation
+/// across lanes in an OpenMP simd region or in a nested parallel region.
+///
+/// void
+/// dispersed_partial_reduce(void *reduce_data,
+///                          kmp_ShuffleReductFctPtr ShuffleReduceFn) {
+///   int size, remote_id;
+///   int logical_lane_id = number_of_active_lanes_before_me() * 2;
+///   do {
+///       remote_id = next_active_lane_id_right_after_me();
+///       # the above function returns 0 of no active lane
+///       # is present right after the current lane.
+///       size = number_of_active_lanes_in_this_warp();
+///       logical_lane_id /= 2;
+///       ShuffleReduceFn(reduce_data, logical_lane_id,
+///                       remote_id-1-threadIdx.x, 2);
+///   } while (logical_lane_id % 2 == 0 && size > 1);
+/// }
+///
+/// There is no assumption made about the initial state of the reduction.
+/// Any number of lanes (>=1) could be active at any position.  The reduction
+/// result is returned in the first active lane.
+///
+/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
+///
+/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
+/// if (lane_id % 2 == 0 && offset > 0)
+///     reduce_elem = reduce_elem REDUCE_OP remote_elem
+/// else
+///     reduce_elem = remote_elem
+///
+///
+/// Intra-Team Reduction
+///
+/// This function, as implemented in the runtime call
+/// '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP
+/// threads in a team.  It first reduces within a warp using the
+/// aforementioned algorithms.  We then proceed to gather all such
+/// reduced values at the first warp.
+///
+/// The runtime makes use of the function 'InterWarpCpyFn', which copies
+/// data from each of the "warp master" (zeroth lane of each warp, where
+/// warp-reduced data is held) to the zeroth warp.  This step reduces (in
+/// a mathematical sense) the problem of reduction across warp masters in
+/// a block to the problem of warp reduction.
+///
+///
+/// Inter-Team Reduction
+///
+/// Once a team has reduced its data to a single value, it is stored in
+/// a global scratchpad array.  Since each team has a distinct slot, this
+/// can be done without locking.
+///
+/// The last team to write to the scratchpad array proceeds to reduce the
+/// scratchpad array.  One or more workers in the last team use the helper
+/// 'loadAndReduceDataFn' to load and reduce values from the array, i.e.,
+/// the k'th worker reduces every k'th element.
+///
+/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to
+/// reduce across workers and compute a globally reduced value.
+///
+void CGOpenMPRuntimeGPU::emitReduction(
+    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
+    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
+    ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
+  if (!CGF.HaveInsertPoint())
+    return;
+
+  bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
+#ifndef NDEBUG
+  bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
+#endif
+
+  if (Options.SimpleReduction) {
+    assert(!TeamsReduction && !ParallelReduction &&
+           "Invalid reduction selection in emitReduction.");
+    CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
+                                   ReductionOps, Options);
+    return;
+  }
+
+  assert((TeamsReduction || ParallelReduction) &&
+         "Invalid reduction selection in emitReduction.");
+
+  // Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
+  // RedList, shuffle_reduce_func, interwarp_copy_func);
+  // or
+  // Build res = __kmpc_reduce_teams_nowait_simple(<loc>, <gtid>, <lck>);
+  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
+  llvm::Value *ThreadId = getThreadID(CGF, Loc);
+
+  llvm::Value *Res;
+  ASTContext &C = CGM.getContext();
+  // 1. Build a list of reduction variables.
+  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
+  auto Size = RHSExprs.size();
+  for (const Expr *E : Privates) {
+    if (E->getType()->isVariablyModifiedType())
+      // Reserve place for array size.
+      ++Size;
+  }
+  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
+  QualType ReductionArrayTy =
+      C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
+                             /*IndexTypeQuals=*/0);
+  Address ReductionList =
+      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
+  auto IPriv = Privates.begin();
+  unsigned Idx = 0;
+  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
+    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+    CGF.Builder.CreateStore(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
+        Elem);
+    if ((*IPriv)->getType()->isVariablyModifiedType()) {
+      // Store array size.
+      ++Idx;
+      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
+      llvm::Value *Size = CGF.Builder.CreateIntCast(
+          CGF.getVLASize(
+                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
+              .NumElts,
+          CGF.SizeTy, /*isSigned=*/false);
+      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
+                              Elem);
+    }
+  }
+
+  llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      ReductionList.getPointer(), CGF.VoidPtrTy);
+  llvm::Function *ReductionFn = emitReductionFunction(
+      Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
+      LHSExprs, RHSExprs, ReductionOps);
+  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
+  llvm::Function *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
+      CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
+  llvm::Value *InterWarpCopyFn =
+      emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
+
+  if (ParallelReduction) {
+    llvm::Value *Args[] = {RTLoc,
+                           ThreadId,
+                           CGF.Builder.getInt32(RHSExprs.size()),
+                           ReductionArrayTySize,
+                           RL,
+                           ShuffleAndReduceFn,
+                           InterWarpCopyFn};
+
+    Res = CGF.EmitRuntimeCall(
+        createNVPTXRuntimeFunction(
+            OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2),
+        Args);
+  } else {
+    assert(TeamsReduction && "expected teams reduction.");
+    llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;
+    llvm::SmallVector<const ValueDecl *, 4> PrivatesReductions(Privates.size());
+    int Cnt = 0;
+    for (const Expr *DRE : Privates) {
+      PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();
+      ++Cnt;
+    }
+    const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(
+        CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap,
+        C.getLangOpts().OpenMPCUDAReductionBufNum);
+    TeamsReductions.push_back(TeamReductionRec);
+    if (!KernelTeamsReductionPtr) {
+      KernelTeamsReductionPtr = new llvm::GlobalVariable(
+          CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true,
+          llvm::GlobalValue::InternalLinkage, nullptr,
+          "_openmp_teams_reductions_buffer_$_$ptr");
+    }
+    llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar(
+        Address(KernelTeamsReductionPtr, CGM.getPointerAlign()),
+        /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
+    llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction(
+        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
+    llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction(
+        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
+        ReductionFn);
+    llvm::Value *BufferToGlobalCpyFn = ::emitGlobalToListCopyFunction(
+        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
+    llvm::Value *BufferToGlobalRedFn = ::emitGlobalToListReduceFunction(
+        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
+        ReductionFn);
+
+    llvm::Value *Args[] = {
+        RTLoc,
+        ThreadId,
+        GlobalBufferPtr,
+        CGF.Builder.getInt32(C.getLangOpts().OpenMPCUDAReductionBufNum),
+        RL,
+        ShuffleAndReduceFn,
+        InterWarpCopyFn,
+        GlobalToBufferCpyFn,
+        GlobalToBufferRedFn,
+        BufferToGlobalCpyFn,
+        BufferToGlobalRedFn};
+
+    Res = CGF.EmitRuntimeCall(
+        createNVPTXRuntimeFunction(
+            OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2),
+        Args);
+  }
+
+  // 5. Build if (res == 1)
+  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.reduction.done");
+  llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.then");
+  llvm::Value *Cond = CGF.Builder.CreateICmpEQ(
+      Res, llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1));
+  CGF.Builder.CreateCondBr(Cond, ThenBB, ExitBB);
+
+  // 6. Build then branch: where we have reduced values in the master
+  //    thread in each team.
+  //    __kmpc_end_reduce{_nowait}(<gtid>);
+  //    break;
+  CGF.EmitBlock(ThenBB);
+
+  // Add emission of __kmpc_end_reduce{_nowait}(<gtid>);
+  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps,
+                    this](CodeGenFunction &CGF, PrePostActionTy &Action) {
+    auto IPriv = Privates.begin();
+    auto ILHS = LHSExprs.begin();
+    auto IRHS = RHSExprs.begin();
+    for (const Expr *E : ReductionOps) {
+      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
+                                  cast<DeclRefExpr>(*IRHS));
+      ++IPriv;
+      ++ILHS;
+      ++IRHS;
+    }
+  };
+  llvm::Value *EndArgs[] = {ThreadId};
+  RegionCodeGenTy RCG(CodeGen);
+  NVPTXActionTy Action(
+      nullptr, llvm::None,
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
+      EndArgs);
+  RCG.setAction(Action);
+  RCG(CGF);
+  // There is no need to emit line number for unconditional branch.
+  (void)ApplyDebugLocation::CreateEmpty(CGF);
+  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
+}
+
+const VarDecl *
+CGOpenMPRuntimeGPU::translateParameter(const FieldDecl *FD,
+                                         const VarDecl *NativeParam) const {
+  if (!NativeParam->getType()->isReferenceType())
+    return NativeParam;
+  QualType ArgType = NativeParam->getType();
+  QualifierCollector QC;
+  const Type *NonQualTy = QC.strip(ArgType);
+  QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
+  if (const auto *Attr = FD->getAttr<OMPCaptureKindAttr>()) {
+    if (Attr->getCaptureKind() == OMPC_map) {
+      PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,
+                                                        LangAS::opencl_global);
+    } else if (Attr->getCaptureKind() == OMPC_firstprivate &&
+               PointeeTy.isConstant(CGM.getContext())) {
+      PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,
+                                                        LangAS::opencl_generic);
+    }
+  }
+  ArgType = CGM.getContext().getPointerType(PointeeTy);
+  QC.addRestrict();
+  enum { NVPTX_local_addr = 5 };
+  QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr));
+  ArgType = QC.apply(CGM.getContext(), ArgType);
+  if (isa<ImplicitParamDecl>(NativeParam))
+    return ImplicitParamDecl::Create(
+        CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),
+        NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other);
+  return ParmVarDecl::Create(
+      CGM.getContext(),
+      const_cast<DeclContext *>(NativeParam->getDeclContext()),
+      NativeParam->getBeginLoc(), NativeParam->getLocation(),
+      NativeParam->getIdentifier(), ArgType,
+      /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
+}
+
+Address
+CGOpenMPRuntimeGPU::getParameterAddress(CodeGenFunction &CGF,
+                                          const VarDecl *NativeParam,
+                                          const VarDecl *TargetParam) const {
+  assert(NativeParam != TargetParam &&
+         NativeParam->getType()->isReferenceType() &&
+         "Native arg must not be the same as target arg.");
+  Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam);
+  QualType NativeParamType = NativeParam->getType();
+  QualifierCollector QC;
+  const Type *NonQualTy = QC.strip(NativeParamType);
+  QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
+  unsigned NativePointeeAddrSpace =
+      CGF.getContext().getTargetAddressSpace(NativePointeeTy);
+  QualType TargetTy = TargetParam->getType();
+  llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(
+      LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation());
+  // First cast to generic.
+  TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
+                      /*AddrSpace=*/0));
+  // Cast from generic to native address space.
+  TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
+                      NativePointeeAddrSpace));
+  Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType);
+  CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false,
+                        NativeParamType);
+  return NativeParamAddr;
+}
+
+void CGOpenMPRuntimeGPU::emitOutlinedFunctionCall(
+    CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
+    ArrayRef<llvm::Value *> Args) const {
+  SmallVector<llvm::Value *, 4> TargetArgs;
+  TargetArgs.reserve(Args.size());
+  auto *FnType = OutlinedFn.getFunctionType();
+  for (unsigned I = 0, E = Args.size(); I < E; ++I) {
+    if (FnType->isVarArg() && FnType->getNumParams() <= I) {
+      TargetArgs.append(std::next(Args.begin(), I), Args.end());
+      break;
+    }
+    llvm::Type *TargetType = FnType->getParamType(I);
+    llvm::Value *NativeArg = Args[I];
+    if (!TargetType->isPointerTy()) {
+      TargetArgs.emplace_back(NativeArg);
+      continue;
+    }
+    llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+        NativeArg,
+        NativeArg->getType()->getPointerElementType()->getPointerTo());
+    TargetArgs.emplace_back(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
+  }
+  CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);
+}
+
+/// Emit function which wraps the outline parallel region
+/// and controls the arguments which are passed to this function.
+/// The wrapper ensures that the outlined function is called
+/// with the correct arguments when data is shared.
+llvm::Function *CGOpenMPRuntimeGPU::createParallelDataSharingWrapper(
+    llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) {
+  ASTContext &Ctx = CGM.getContext();
+  const auto &CS = *D.getCapturedStmt(OMPD_parallel);
+
+  // Create a function that takes as argument the source thread.
+  FunctionArgList WrapperArgs;
+  QualType Int16QTy =
+      Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false);
+  QualType Int32QTy =
+      Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false);
+  ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
+                                     /*Id=*/nullptr, Int16QTy,
+                                     ImplicitParamDecl::Other);
+  ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
+                               /*Id=*/nullptr, Int32QTy,
+                               ImplicitParamDecl::Other);
+  WrapperArgs.emplace_back(&ParallelLevelArg);
+  WrapperArgs.emplace_back(&WrapperArg);
+
+  const CGFunctionInfo &CGFI =
+      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs);
+
+  auto *Fn = llvm::Function::Create(
+      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
+      Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());
+  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
+  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
+  Fn->setDoesNotRecurse();
+
+  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
+  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,
+                    D.getBeginLoc(), D.getBeginLoc());
+
+  const auto *RD = CS.getCapturedRecordDecl();
+  auto CurField = RD->field_begin();
+
+  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
+                                                      /*Name=*/".zero.addr");
+  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
+  // Get the array of arguments.
+  SmallVector<llvm::Value *, 8> Args;
+
+  Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer());
+  Args.emplace_back(ZeroAddr.getPointer());
+
+  CGBuilderTy &Bld = CGF.Builder;
+  auto CI = CS.capture_begin();
+
+  // Use global memory for data sharing.
+  // Handle passing of global args to workers.
+  Address GlobalArgs =
+      CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args");
+  llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();
+  llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
+  CGF.EmitRuntimeCall(
+      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables),
+      DataSharingArgs);
+
+  // Retrieve the shared variables from the list of references returned
+  // by the runtime. Pass the variables to the outlined function.
+  Address SharedArgListAddress = Address::invalid();
+  if (CS.capture_size() > 0 ||
+      isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {
+    SharedArgListAddress = CGF.EmitLoadOfPointer(
+        GlobalArgs, CGF.getContext()
+                        .getPointerType(CGF.getContext().getPointerType(
+                            CGF.getContext().VoidPtrTy))
+                        .castAs<PointerType>());
+  }
+  unsigned Idx = 0;
+  if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {
+    Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
+    Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
+        Src, CGF.SizeTy->getPointerTo());
+    llvm::Value *LB = CGF.EmitLoadOfScalar(
+        TypedAddress,
+        /*Volatile=*/false,
+        CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
+        cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc());
+    Args.emplace_back(LB);
+    ++Idx;
+    Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
+    TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
+        Src, CGF.SizeTy->getPointerTo());
+    llvm::Value *UB = CGF.EmitLoadOfScalar(
+        TypedAddress,
+        /*Volatile=*/false,
+        CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
+        cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc());
+    Args.emplace_back(UB);
+    ++Idx;
+  }
+  if (CS.capture_size() > 0) {
+    ASTContext &CGFContext = CGF.getContext();
+    for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
+      QualType ElemTy = CurField->getType();
+      Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx);
+      Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
+          Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)));
+      llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress,
+                                              /*Volatile=*/false,
+                                              CGFContext.getPointerType(ElemTy),
+                                              CI->getLocation());
+      if (CI->capturesVariableByCopy() &&
+          !CI->getCapturedVar()->getType()->isAnyPointerType()) {
+        Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(),
+                              CI->getLocation());
+      }
+      Args.emplace_back(Arg);
+    }
+  }
+
+  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args);
+  CGF.FinishFunction();
+  return Fn;
+}
+
+void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF,
+                                              const Decl *D) {
+  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+    return;
+
+  assert(D && "Expected function or captured|block decl.");
+  assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&
+         "Function is registered already.");
+  assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
+         "Team is set but not processed.");
+  const Stmt *Body = nullptr;
+  bool NeedToDelayGlobalization = false;
+  if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
+    Body = FD->getBody();
+  } else if (const auto *BD = dyn_cast<BlockDecl>(D)) {
+    Body = BD->getBody();
+  } else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
+    Body = CD->getBody();
+    NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
+    if (NeedToDelayGlobalization &&
+        getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD)
+      return;
+  }
+  if (!Body)
+    return;
+  CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
+  VarChecker.Visit(Body);
+  const RecordDecl *GlobalizedVarsRecord =
+      VarChecker.getGlobalizedRecord(IsInTTDRegion);
+  TeamAndReductions.first = nullptr;
+  TeamAndReductions.second.clear();
+  ArrayRef<const ValueDecl *> EscapedVariableLengthDecls =
+      VarChecker.getEscapedVariableLengthDecls();
+  if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
+    return;
+  auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
+  I->getSecond().MappedParams =
+      std::make_unique<CodeGenFunction::OMPMapVars>();
+  I->getSecond().GlobalRecord = GlobalizedVarsRecord;
+  I->getSecond().EscapedParameters.insert(
+      VarChecker.getEscapedParameters().begin(),
+      VarChecker.getEscapedParameters().end());
+  I->getSecond().EscapedVariableLengthDecls.append(
+      EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
+  DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
+  for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
+    assert(VD->isCanonicalDecl() && "Expected canonical declaration");
+    const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
+    Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
+  }
+  if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
+    CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);
+    VarChecker.Visit(Body);
+    I->getSecond().SecondaryGlobalRecord =
+        VarChecker.getGlobalizedRecord(/*IsInTTDRegion=*/true);
+    I->getSecond().SecondaryLocalVarData.emplace();
+    DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
+    for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
+      assert(VD->isCanonicalDecl() && "Expected canonical declaration");
+      const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
+      Data.insert(
+          std::make_pair(VD, MappedVarData(FD, /*IsInTTDRegion=*/true)));
+    }
+  }
+  if (!NeedToDelayGlobalization) {
+    emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
+    struct GlobalizationScope final : EHScopeStack::Cleanup {
+      GlobalizationScope() = default;
+
+      void Emit(CodeGenFunction &CGF, Flags flags) override {
+        static_cast<CGOpenMPRuntimeGPU &>(CGF.CGM.getOpenMPRuntime())
+            .emitGenericVarsEpilog(CGF, /*WithSPMDCheck=*/true);
+      }
+    };
+    CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
+  }
+}
+
+Address CGOpenMPRuntimeGPU::getAddressOfLocalVariable(CodeGenFunction &CGF,
+                                                        const VarDecl *VD) {
+  if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
+    const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+    auto AS = LangAS::Default;
+    switch (A->getAllocatorType()) {
+      // Use the default allocator here as by default local vars are
+      // threadlocal.
+    case OMPAllocateDeclAttr::OMPNullMemAlloc:
+    case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+    case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+    case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+    case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+      // Follow the user decision - use default allocation.
+      return Address::invalid();
+    case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+      // TODO: implement aupport for user-defined allocators.
+      return Address::invalid();
+    case OMPAllocateDeclAttr::OMPConstMemAlloc:
+      AS = LangAS::cuda_constant;
+      break;
+    case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+      AS = LangAS::cuda_shared;
+      break;
+    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+    case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+      break;
+    }
+    llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
+    auto *GV = new llvm::GlobalVariable(
+        CGM.getModule(), VarTy, /*isConstant=*/false,
+        llvm::GlobalValue::InternalLinkage, llvm::Constant::getNullValue(VarTy),
+        VD->getName(),
+        /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
+        CGM.getContext().getTargetAddressSpace(AS));
+    CharUnits Align = CGM.getContext().getDeclAlign(VD);
+    GV->setAlignment(Align.getAsAlign());
+    return Address(
+        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
+            GV, VarTy->getPointerTo(CGM.getContext().getTargetAddressSpace(
+                    VD->getType().getAddressSpace()))),
+        Align);
+  }
+
+  if (getDataSharingMode(CGM) != CGOpenMPRuntimeGPU::Generic)
+    return Address::invalid();
+
+  VD = VD->getCanonicalDecl();
+  auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
+  if (I == FunctionGlobalizedDecls.end())
+    return Address::invalid();
+  auto VDI = I->getSecond().LocalVarData.find(VD);
+  if (VDI != I->getSecond().LocalVarData.end())
+    return VDI->second.PrivateAddr;
+  if (VD->hasAttrs()) {
+    for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()),
+         E(VD->attr_end());
+         IT != E; ++IT) {
+      auto VDI = I->getSecond().LocalVarData.find(
+          cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
+              ->getCanonicalDecl());
+      if (VDI != I->getSecond().LocalVarData.end())
+        return VDI->second.PrivateAddr;
+    }
+  }
+
+  return Address::invalid();
+}
+
+void CGOpenMPRuntimeGPU::functionFinished(CodeGenFunction &CGF) {
+  FunctionGlobalizedDecls.erase(CGF.CurFn);
+  CGOpenMPRuntime::functionFinished(CGF);
+}
+
+void CGOpenMPRuntimeGPU::getDefaultDistScheduleAndChunk(
+    CodeGenFunction &CGF, const OMPLoopDirective &S,
+    OpenMPDistScheduleClauseKind &ScheduleKind,
+    llvm::Value *&Chunk) const {
+  if (getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD) {
+    ScheduleKind = OMPC_DIST_SCHEDULE_static;
+    Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF),
+        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+        S.getIterationVariable()->getType(), S.getBeginLoc());
+    return;
+  }
+  CGOpenMPRuntime::getDefaultDistScheduleAndChunk(
+      CGF, S, ScheduleKind, Chunk);
+}
+
+void CGOpenMPRuntimeGPU::getDefaultScheduleAndChunk(
+    CodeGenFunction &CGF, const OMPLoopDirective &S,
+    OpenMPScheduleClauseKind &ScheduleKind,
+    const Expr *&ChunkExpr) const {
+  ScheduleKind = OMPC_SCHEDULE_static;
+  // Chunk size is 1 in this case.
+  llvm::APInt ChunkSize(32, 1);
+  ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
+      CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
+      SourceLocation());
+}
+
+void CGOpenMPRuntimeGPU::adjustTargetSpecificDataForLambdas(
+    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
+  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
+         " Expected target-based directive.");
+  const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);
+  for (const CapturedStmt::Capture &C : CS->captures()) {
+    // Capture variables captured by reference in lambdas for target-based
+    // directives.
+    if (!C.capturesVariable())
+      continue;
+    const VarDecl *VD = C.getCapturedVar();
+    const auto *RD = VD->getType()
+                         .getCanonicalType()
+                         .getNonReferenceType()
+                         ->getAsCXXRecordDecl();
+    if (!RD || !RD->isLambda())
+      continue;
+    Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+    LValue VDLVal;
+    if (VD->getType().getCanonicalType()->isReferenceType())
+      VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());
+    else
+      VDLVal = CGF.MakeAddrLValue(
+          VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
+    llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
+    FieldDecl *ThisCapture = nullptr;
+    RD->getCaptureFields(Captures, ThisCapture);
+    if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
+      LValue ThisLVal =
+          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
+      llvm::Value *CXXThis = CGF.LoadCXXThis();
+      CGF.EmitStoreOfScalar(CXXThis, ThisLVal);
+    }
+    for (const LambdaCapture &LC : RD->captures()) {
+      if (LC.getCaptureKind() != LCK_ByRef)
+        continue;
+      const VarDecl *VD = LC.getCapturedVar();
+      if (!CS->capturesVariable(VD))
+        continue;
+      auto It = Captures.find(VD);
+      assert(It != Captures.end() && "Found lambda capture without field.");
+      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
+      Address VDAddr = CGF.GetAddrOfLocalVar(VD);
+      if (VD->getType().getCanonicalType()->isReferenceType())
+        VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
+                                               VD->getType().getCanonicalType())
+                     .getAddress(CGF);
+      CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal);
+    }
+  }
+}
+
+unsigned CGOpenMPRuntimeGPU::getDefaultFirstprivateAddressSpace() const {
+  return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant);
+}
+
+bool CGOpenMPRuntimeGPU::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
+                                                            LangAS &AS) {
+  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
+    return false;
+  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
+  switch(A->getAllocatorType()) {
+  case OMPAllocateDeclAttr::OMPNullMemAlloc:
+  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
+  // Not supported, fallback to the default mem space.
+  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
+  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
+  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
+  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
+  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
+    AS = LangAS::Default;
+    return true;
+  case OMPAllocateDeclAttr::OMPConstMemAlloc:
+    AS = LangAS::cuda_constant;
+    return true;
+  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
+    AS = LangAS::cuda_shared;
+    return true;
+  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
+    llvm_unreachable("Expected predefined allocator for the variables with the "
+                     "static storage.");
+  }
+  return false;
+}
+
+// Get current CudaArch and ignore any unknown values
+static CudaArch getCudaArch(CodeGenModule &CGM) {
+  if (!CGM.getTarget().hasFeature("ptx"))
+    return CudaArch::UNKNOWN;
+  llvm::StringMap<bool> Features;
+  CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
+                                 CGM.getTarget().getTargetOpts().CPU,
+                                 CGM.getTarget().getTargetOpts().Features);
+  for (const auto &Feature : Features) {
+    if (Feature.getValue()) {
+      CudaArch Arch = StringToCudaArch(Feature.getKey());
+      if (Arch != CudaArch::UNKNOWN)
+        return Arch;
+    }
+  }
+  return CudaArch::UNKNOWN;
+}
+
+/// Check to see if target architecture supports unified addressing which is
+/// a restriction for OpenMP requires clause "unified_shared_memory".
+void CGOpenMPRuntimeGPU::processRequiresDirective(
+    const OMPRequiresDecl *D) {
+  for (const OMPClause *Clause : D->clauselists()) {
+    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
+      CudaArch Arch = getCudaArch(CGM);
+      switch (Arch) {
+      case CudaArch::SM_20:
+      case CudaArch::SM_21:
+      case CudaArch::SM_30:
+      case CudaArch::SM_32:
+      case CudaArch::SM_35:
+      case CudaArch::SM_37:
+      case CudaArch::SM_50:
+      case CudaArch::SM_52:
+      case CudaArch::SM_53:
+      case CudaArch::SM_60:
+      case CudaArch::SM_61:
+      case CudaArch::SM_62: {
+        SmallString<256> Buffer;
+        llvm::raw_svector_ostream Out(Buffer);
+        Out << "Target architecture " << CudaArchToString(Arch)
+            << " does not support unified addressing";
+        CGM.Error(Clause->getBeginLoc(), Out.str());
+        return;
+      }
+      case CudaArch::SM_70:
+      case CudaArch::SM_72:
+      case CudaArch::SM_75:
+      case CudaArch::SM_80:
+      case CudaArch::GFX600:
+      case CudaArch::GFX601:
+      case CudaArch::GFX700:
+      case CudaArch::GFX701:
+      case CudaArch::GFX702:
+      case CudaArch::GFX703:
+      case CudaArch::GFX704:
+      case CudaArch::GFX801:
+      case CudaArch::GFX802:
+      case CudaArch::GFX803:
+      case CudaArch::GFX810:
+      case CudaArch::GFX900:
+      case CudaArch::GFX902:
+      case CudaArch::GFX904:
+      case CudaArch::GFX906:
+      case CudaArch::GFX908:
+      case CudaArch::GFX909:
+      case CudaArch::GFX1010:
+      case CudaArch::GFX1011:
+      case CudaArch::GFX1012:
+      case CudaArch::GFX1030:
+      case CudaArch::UNKNOWN:
+        break;
+      case CudaArch::LAST:
+        llvm_unreachable("Unexpected Cuda arch.");
+      }
+    }
+  }
+  CGOpenMPRuntime::processRequiresDirective(D);
+}
+
+/// Get number of SMs and number of blocks per SM.
+static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
+  std::pair<unsigned, unsigned> Data;
+  if (CGM.getLangOpts().OpenMPCUDANumSMs)
+    Data.first = CGM.getLangOpts().OpenMPCUDANumSMs;
+  if (CGM.getLangOpts().OpenMPCUDABlocksPerSM)
+    Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM;
+  if (Data.first && Data.second)
+    return Data;
+  switch (getCudaArch(CGM)) {
+  case CudaArch::SM_20:
+  case CudaArch::SM_21:
+  case CudaArch::SM_30:
+  case CudaArch::SM_32:
+  case CudaArch::SM_35:
+  case CudaArch::SM_37:
+  case CudaArch::SM_50:
+  case CudaArch::SM_52:
+  case CudaArch::SM_53:
+    return {16, 16};
+  case CudaArch::SM_60:
+  case CudaArch::SM_61:
+  case CudaArch::SM_62:
+    return {56, 32};
+  case CudaArch::SM_70:
+  case CudaArch::SM_72:
+  case CudaArch::SM_75:
+  case CudaArch::SM_80:
+    return {84, 32};
+  case CudaArch::GFX600:
+  case CudaArch::GFX601:
+  case CudaArch::GFX700:
+  case CudaArch::GFX701:
+  case CudaArch::GFX702:
+  case CudaArch::GFX703:
+  case CudaArch::GFX704:
+  case CudaArch::GFX801:
+  case CudaArch::GFX802:
+  case CudaArch::GFX803:
+  case CudaArch::GFX810:
+  case CudaArch::GFX900:
+  case CudaArch::GFX902:
+  case CudaArch::GFX904:
+  case CudaArch::GFX906:
+  case CudaArch::GFX908:
+  case CudaArch::GFX909:
+  case CudaArch::GFX1010:
+  case CudaArch::GFX1011:
+  case CudaArch::GFX1012:
+  case CudaArch::GFX1030:
+  case CudaArch::UNKNOWN:
+    break;
+  case CudaArch::LAST:
+    llvm_unreachable("Unexpected Cuda arch.");
+  }
+  llvm_unreachable("Unexpected NVPTX target without ptx feature.");
+}
+
+void CGOpenMPRuntimeGPU::clear() {
+  if (!GlobalizedRecords.empty() &&
+      !CGM.getLangOpts().OpenMPCUDATargetParallel) {
+    ASTContext &C = CGM.getContext();
+    llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs;
+    llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs;
+    RecordDecl *StaticRD = C.buildImplicitRecord(
+        "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
+    StaticRD->startDefinition();
+    RecordDecl *SharedStaticRD = C.buildImplicitRecord(
+        "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
+    SharedStaticRD->startDefinition();
+    for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
+      if (Records.Records.empty())
+        continue;
+      unsigned Size = 0;
+      unsigned RecAlignment = 0;
+      for (const RecordDecl *RD : Records.Records) {
+        QualType RDTy = C.getRecordType(RD);
+        unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity();
+        RecAlignment = std::max(RecAlignment, Alignment);
+        unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity();
+        Size =
+            llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
+      }
+      Size = llvm::alignTo(Size, RecAlignment);
+      llvm::APInt ArySize(/*numBits=*/64, Size);
+      QualType SubTy = C.getConstantArrayType(
+          C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
+      const bool UseSharedMemory = Size <= SharedMemorySize;
+      auto *Field =
+          FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD,
+                            SourceLocation(), SourceLocation(), nullptr, SubTy,
+                            C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
+                            /*BW=*/nullptr, /*Mutable=*/false,
+                            /*InitStyle=*/ICIS_NoInit);
+      Field->setAccess(AS_public);
+      if (UseSharedMemory) {
+        SharedStaticRD->addDecl(Field);
+        SharedRecs.push_back(&Records);
+      } else {
+        StaticRD->addDecl(Field);
+        GlobalRecs.push_back(&Records);
+      }
+      Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size));
+      Records.UseSharedMemory->setInitializer(
+          llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0));
+    }
+    // Allocate SharedMemorySize buffer for the shared memory.
+    // FIXME: nvlink does not handle weak linkage correctly (object with the
+    // different size are reported as erroneous).
+    // Restore this code as sson as nvlink is fixed.
+    if (!SharedStaticRD->field_empty()) {
+      llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize);
+      QualType SubTy = C.getConstantArrayType(
+          C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
+      auto *Field = FieldDecl::Create(
+          C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy,
+          C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
+          /*BW=*/nullptr, /*Mutable=*/false,
+          /*InitStyle=*/ICIS_NoInit);
+      Field->setAccess(AS_public);
+      SharedStaticRD->addDecl(Field);
+    }
+    SharedStaticRD->completeDefinition();
+    if (!SharedStaticRD->field_empty()) {
+      QualType StaticTy = C.getRecordType(SharedStaticRD);
+      llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
+      auto *GV = new llvm::GlobalVariable(
+          CGM.getModule(), LLVMStaticTy,
+          /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
+          llvm::Constant::getNullValue(LLVMStaticTy),
+          "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
+          llvm::GlobalValue::NotThreadLocal,
+          C.getTargetAddressSpace(LangAS::cuda_shared));
+      auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+          GV, CGM.VoidPtrTy);
+      for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
+        Rec->Buffer->replaceAllUsesWith(Replacement);
+        Rec->Buffer->eraseFromParent();
+      }
+    }
+    StaticRD->completeDefinition();
+    if (!StaticRD->field_empty()) {
+      QualType StaticTy = C.getRecordType(StaticRD);
+      std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM);
+      llvm::APInt Size1(32, SMsBlockPerSM.second);
+      QualType Arr1Ty =
+          C.getConstantArrayType(StaticTy, Size1, nullptr, ArrayType::Normal,
+                                 /*IndexTypeQuals=*/0);
+      llvm::APInt Size2(32, SMsBlockPerSM.first);
+      QualType Arr2Ty =
+          C.getConstantArrayType(Arr1Ty, Size2, nullptr, ArrayType::Normal,
+                                 /*IndexTypeQuals=*/0);
+      llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
+      // FIXME: nvlink does not handle weak linkage correctly (object with the
+      // different size are reported as erroneous).
+      // Restore CommonLinkage as soon as nvlink is fixed.
+      auto *GV = new llvm::GlobalVariable(
+          CGM.getModule(), LLVMArr2Ty,
+          /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
+          llvm::Constant::getNullValue(LLVMArr2Ty),
+          "_openmp_static_glob_rd_$_");
+      auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
+          GV, CGM.VoidPtrTy);
+      for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
+        Rec->Buffer->replaceAllUsesWith(Replacement);
+        Rec->Buffer->eraseFromParent();
+      }
+    }
+  }
+  if (!TeamsReductions.empty()) {
+    ASTContext &C = CGM.getContext();
+    RecordDecl *StaticRD = C.buildImplicitRecord(
+        "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::TTK_Union);
+    StaticRD->startDefinition();
+    for (const RecordDecl *TeamReductionRec : TeamsReductions) {
+      QualType RecTy = C.getRecordType(TeamReductionRec);
+      auto *Field = FieldDecl::Create(
+          C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy,
+          C.getTrivialTypeSourceInfo(RecTy, SourceLocation()),
+          /*BW=*/nullptr, /*Mutable=*/false,
+          /*InitStyle=*/ICIS_NoInit);
+      Field->setAccess(AS_public);
+      StaticRD->addDecl(Field);
+    }
+    StaticRD->completeDefinition();
+    QualType StaticTy = C.getRecordType(StaticRD);
+    llvm::Type *LLVMReductionsBufferTy =
+        CGM.getTypes().ConvertTypeForMem(StaticTy);
+    // FIXME: nvlink does not handle weak linkage correctly (object with the
+    // different size are reported as erroneous).
+    // Restore CommonLinkage as soon as nvlink is fixed.
+    auto *GV = new llvm::GlobalVariable(
+        CGM.getModule(), LLVMReductionsBufferTy,
+        /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
+        llvm::Constant::getNullValue(LLVMReductionsBufferTy),
+        "_openmp_teams_reductions_buffer_$_");
+    KernelTeamsReductionPtr->setInitializer(
+        llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
+                                                             CGM.VoidPtrTy));
+  }
+  CGOpenMPRuntime::clear();
+}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
new file mode 100644
index 0000000000000..316333072c5bc
--- /dev/null
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h
@@ -0,0 +1,495 @@
+//===------ CGOpenMPRuntimeGPU.h - Interface to OpenMP GPU Runtimes ------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This provides a generalized class for OpenMP runtime code generation
+// specialized by GPU target NVPTX.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
+#define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
+
+#include "CGOpenMPRuntime.h"
+#include "CodeGenFunction.h"
+#include "clang/AST/StmtOpenMP.h"
+#include "llvm/Frontend/OpenMP/OMPGridValues.h"
+
+namespace clang {
+namespace CodeGen {
+
+class CGOpenMPRuntimeGPU : public CGOpenMPRuntime {
+public:
+  /// Defines the execution mode.
+  enum ExecutionMode {
+    /// SPMD execution mode (all threads are worker threads).
+    EM_SPMD,
+    /// Non-SPMD execution mode (1 master thread, others are workers).
+    EM_NonSPMD,
+    /// Unknown execution mode (orphaned directive).
+    EM_Unknown,
+  };
+private:
+  /// Parallel outlined function work for workers to execute.
+  llvm::SmallVector<llvm::Function *, 16> Work;
+
+  struct EntryFunctionState {
+    llvm::BasicBlock *ExitBB = nullptr;
+  };
+
+  class WorkerFunctionState {
+  public:
+    llvm::Function *WorkerFn;
+    const CGFunctionInfo &CGFI;
+    SourceLocation Loc;
+
+    WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc);
+
+  private:
+    void createWorkerFunction(CodeGenModule &CGM);
+  };
+
+  ExecutionMode getExecutionMode() const;
+
+  bool requiresFullRuntime() const { return RequiresFullRuntime; }
+
+  /// Get barrier to synchronize all threads in a block.
+  void syncCTAThreads(CodeGenFunction &CGF);
+
+  /// Emit the worker function for the current target region.
+  void emitWorkerFunction(WorkerFunctionState &WST);
+
+  /// Helper for worker function. Emit body of worker loop.
+  void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
+
+  /// Helper for non-SPMD target entry function. Guide the master and
+  /// worker threads to their respective locations.
+  void emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+                              WorkerFunctionState &WST);
+
+  /// Signal termination of OMP execution for non-SPMD target entry
+  /// function.
+  void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+
+  /// Helper for generic variables globalization prolog.
+  void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
+                             bool WithSPMDCheck = false);
+
+  /// Helper for generic variables globalization epilog.
+  void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
+
+  /// Helper for SPMD mode target directive's entry function.
+  void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
+                           const OMPExecutableDirective &D);
+
+  /// Signal termination of SPMD mode execution.
+  void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
+
+  //
+  // Base class overrides.
+  //
+
+  /// Creates offloading entry for the provided entry ID \a ID,
+  /// address \a Addr, size \a Size, and flags \a Flags.
+  void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
+                          uint64_t Size, int32_t Flags,
+                          llvm::GlobalValue::LinkageTypes Linkage) override;
+
+  /// Emit outlined function specialized for the Fork-Join
+  /// programming model for applicable target directives on the NVPTX device.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// An outlined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
+                         llvm::Function *&OutlinedFn,
+                         llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+                         const RegionCodeGenTy &CodeGen);
+
+  /// Emit outlined function specialized for the Single Program
+  /// Multiple Data programming model for applicable target directives on the
+  /// NVPTX device.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// \param CodeGen Object containing the target statements.
+  /// An outlined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
+                      llvm::Function *&OutlinedFn,
+                      llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
+                      const RegionCodeGenTy &CodeGen);
+
+  /// Emit outlined function for 'target' directive on the NVPTX
+  /// device.
+  /// \param D Directive to emit.
+  /// \param ParentName Name of the function that encloses the target region.
+  /// \param OutlinedFn Outlined function value to be defined by this call.
+  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
+  /// \param IsOffloadEntry True if the outlined function is an offload entry.
+  /// An outlined function may not be an entry if, e.g. the if clause always
+  /// evaluates to false.
+  void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
+                                  StringRef ParentName,
+                                  llvm::Function *&OutlinedFn,
+                                  llvm::Constant *&OutlinedFnID,
+                                  bool IsOffloadEntry,
+                                  const RegionCodeGenTy &CodeGen) override;
+
+  /// Emits code for parallel or serial call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// This call is for the Non-SPMD Execution Mode.
+  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  /// \param IfCond Condition in the associated 'if' clause, if it was
+  /// specified, nullptr otherwise.
+  void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                               llvm::Value *OutlinedFn,
+                               ArrayRef<llvm::Value *> CapturedVars,
+                               const Expr *IfCond);
+
+  /// Emits code for parallel or serial call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// This call is for a parallel directive within an SPMD target directive.
+  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  /// \param IfCond Condition in the associated 'if' clause, if it was
+  /// specified, nullptr otherwise.
+  ///
+  void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                            llvm::Function *OutlinedFn,
+                            ArrayRef<llvm::Value *> CapturedVars,
+                            const Expr *IfCond);
+
+protected:
+  /// Get the function name of an outlined region.
+  //  The name can be customized depending on the target.
+  //
+  StringRef getOutlinedHelperName() const override {
+    return "__omp_outlined__";
+  }
+
+  /// Check if the default location must be constant.
+  /// Constant for NVPTX for better optimization.
+  bool isDefaultLocationConstant() const override { return true; }
+
+  /// Returns additional flags that can be stored in reserved_2 field of the
+  /// default location.
+  /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
+  /// Full/Lightweight runtime mode. Used for better optimization.
+  unsigned getDefaultLocationReserved2Flags() const override;
+
+public:
+  explicit CGOpenMPRuntimeGPU(CodeGenModule &CGM);
+  void clear() override;
+
+  /// Declare generalized virtual functions which need to be defined
+  /// by all specializations of OpenMPGPURuntime Targets.
+  virtual llvm::Value *getGPUWarpSize(CodeGenFunction &CGF) = 0;
+
+  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
+  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
+  virtual void emitProcBindClause(CodeGenFunction &CGF,
+                                  llvm::omp::ProcBindKind ProcBind,
+                                  SourceLocation Loc) override;
+
+  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
+  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
+  /// clause.
+  /// \param NumThreads An integer value of threads.
+  virtual void emitNumThreadsClause(CodeGenFunction &CGF,
+                                    llvm::Value *NumThreads,
+                                    SourceLocation Loc) override;
+
+  /// This function ought to emit, in the general case, a call to
+  // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
+  // as these numbers are obtained through the PTX grid and block configuration.
+  /// \param NumTeams An integer expression of teams.
+  /// \param ThreadLimit An integer expression of threads.
+  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
+                          const Expr *ThreadLimit, SourceLocation Loc) override;
+
+  /// Emits inlined function for the specified OpenMP parallel
+  //  directive.
+  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+  /// kmp_int32 BoundID, struct context_vars*).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  llvm::Function *
+  emitParallelOutlinedFunction(const OMPExecutableDirective &D,
+                               const VarDecl *ThreadIDVar,
+                               OpenMPDirectiveKind InnermostKind,
+                               const RegionCodeGenTy &CodeGen) override;
+
+  /// Emits inlined function for the specified OpenMP teams
+  //  directive.
+  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
+  /// kmp_int32 BoundID, struct context_vars*).
+  /// \param D OpenMP directive.
+  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
+  /// \param InnermostKind Kind of innermost directive (for simple directives it
+  /// is a directive itself, for combined - its innermost directive).
+  /// \param CodeGen Code generation sequence for the \a D directive.
+  llvm::Function *
+  emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
+                            const VarDecl *ThreadIDVar,
+                            OpenMPDirectiveKind InnermostKind,
+                            const RegionCodeGenTy &CodeGen) override;
+
+  /// Emits code for teams call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run by team masters. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  ///
+  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
+                     SourceLocation Loc, llvm::Function *OutlinedFn,
+                     ArrayRef<llvm::Value *> CapturedVars) override;
+
+  /// Emits code for parallel or serial call of the \a OutlinedFn with
+  /// variables captured in a record which address is stored in \a
+  /// CapturedStruct.
+  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
+  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
+  /// \param CapturedVars A pointer to the record with the references to
+  /// variables used in \a OutlinedFn function.
+  /// \param IfCond Condition in the associated 'if' clause, if it was
+  /// specified, nullptr otherwise.
+  void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
+                        llvm::Function *OutlinedFn,
+                        ArrayRef<llvm::Value *> CapturedVars,
+                        const Expr *IfCond) override;
+
+  /// Emit an implicit/explicit barrier for OpenMP threads.
+  /// \param Kind Directive for which this implicit barrier call must be
+  /// generated. Must be OMPD_barrier for explicit barrier generation.
+  /// \param EmitChecks true if need to emit checks for cancellation barriers.
+  /// \param ForceSimpleCall true simple barrier call must be emitted, false if
+  /// runtime class decides which one to emit (simple or with cancellation
+  /// checks).
+  ///
+  void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
+                       OpenMPDirectiveKind Kind, bool EmitChecks = true,
+                       bool ForceSimpleCall = false) override;
+
+  /// Emits a critical region.
+  /// \param CriticalName Name of the critical region.
+  /// \param CriticalOpGen Generator for the statement associated with the given
+  /// critical region.
+  /// \param Hint Value of the 'hint' clause (optional).
+  void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
+                          const RegionCodeGenTy &CriticalOpGen,
+                          SourceLocation Loc,
+                          const Expr *Hint = nullptr) override;
+
+  /// Emit a code for reduction clause.
+  ///
+  /// \param Privates List of private copies for original reduction arguments.
+  /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
+  /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
+  /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
+  /// or 'operator binop(LHS, RHS)'.
+  /// \param Options List of options for reduction codegen:
+  ///     WithNowait true if parent directive has also nowait clause, false
+  ///     otherwise.
+  ///     SimpleReduction Emit reduction operation only. Used for omp simd
+  ///     directive on the host.
+  ///     ReductionKind The kind of reduction to perform.
+  virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
+                             ArrayRef<const Expr *> Privates,
+                             ArrayRef<const Expr *> LHSExprs,
+                             ArrayRef<const Expr *> RHSExprs,
+                             ArrayRef<const Expr *> ReductionOps,
+                             ReductionOptionsTy Options) override;
+
+  /// Returns specified OpenMP runtime function for the current OpenMP
+  /// implementation.  Specialized for the NVPTX device.
+  /// \param Function OpenMP runtime function.
+  /// \return Specified function.
+  llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function);
+
+  /// Translates the native parameter of outlined function if this is required
+  /// for target.
+  /// \param FD Field decl from captured record for the parameter.
+  /// \param NativeParam Parameter itself.
+  const VarDecl *translateParameter(const FieldDecl *FD,
+                                    const VarDecl *NativeParam) const override;
+
+  /// Gets the address of the native argument basing on the address of the
+  /// target-specific parameter.
+  /// \param NativeParam Parameter itself.
+  /// \param TargetParam Corresponding target-specific parameter.
+  Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
+                              const VarDecl *TargetParam) const override;
+
+  /// Emits call of the outlined function with the provided arguments,
+  /// translating these arguments to correct target-specific arguments.
+  void emitOutlinedFunctionCall(
+      CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
+      ArrayRef<llvm::Value *> Args = llvm::None) const override;
+
+  /// Emits OpenMP-specific function prolog.
+  /// Required for device constructs.
+  void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
+
+  /// Gets the OpenMP-specific address of the local variable.
+  Address getAddressOfLocalVariable(CodeGenFunction &CGF,
+                                    const VarDecl *VD) override;
+
+  /// Target codegen is specialized based on two data-sharing modes: CUDA, in
+  /// which the local variables are actually global threadlocal, and Generic, in
+  /// which the local variables are placed in global memory if they may escape
+  /// their declaration context.
+  enum DataSharingMode {
+    /// CUDA data sharing mode.
+    CUDA,
+    /// Generic data-sharing mode.
+    Generic,
+  };
+
+  /// Cleans up references to the objects in finished function.
+  ///
+  void functionFinished(CodeGenFunction &CGF) override;
+
+  /// Choose a default value for the dist_schedule clause.
+  void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
+      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
+      llvm::Value *&Chunk) const override;
+
+  /// Choose a default value for the schedule clause.
+  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
+      const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
+      const Expr *&ChunkExpr) const override;
+
+  /// Adjust some parameters for the target-based directives, like addresses of
+  /// the variables captured by reference in lambdas.
+  void adjustTargetSpecificDataForLambdas(
+      CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
+
+  /// Perform check on requires decl to ensure that target architecture
+  /// supports unified addressing
+  void processRequiresDirective(const OMPRequiresDecl *D) override;
+
+  /// Returns default address space for the constant firstprivates, __constant__
+  /// address space by default.
+  unsigned getDefaultFirstprivateAddressSpace() const override;
+
+  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
+  /// the predefined allocator and translates it into the corresponding address
+  /// space.
+  bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
+
+private:
+  /// Track the execution mode when codegening directives within a target
+  /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
+  /// target region and used by containing directives such as 'parallel'
+  /// to emit optimized code.
+  ExecutionMode CurrentExecutionMode = EM_Unknown;
+
+  /// Check if the full runtime is required (default - yes).
+  bool RequiresFullRuntime = true;
+
+  /// true if we're emitting the code for the target region and next parallel
+  /// region is L0 for sure.
+  bool IsInTargetMasterThreadRegion = false;
+  /// true if currently emitting code for target/teams/distribute region, false
+  /// - otherwise.
+  bool IsInTTDRegion = false;
+  /// true if we're definitely in the parallel region.
+  bool IsInParallelRegion = false;
+
+  /// Map between an outlined function and its wrapper.
+  llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
+
+  /// Emit function which wraps the outline parallel region
+  /// and controls the parameters which are passed to this function.
+  /// The wrapper ensures that the outlined function is called
+  /// with the correct arguments when data is shared.
+  llvm::Function *createParallelDataSharingWrapper(
+      llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
+
+  /// The data for the single globalized variable.
+  struct MappedVarData {
+    /// Corresponding field in the global record.
+    const FieldDecl *FD = nullptr;
+    /// Corresponding address.
+    Address PrivateAddr = Address::invalid();
+    /// true, if only one element is required (for latprivates in SPMD mode),
+    /// false, if need to create based on the warp-size.
+    bool IsOnePerTeam = false;
+    MappedVarData() = delete;
+    MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false)
+        : FD(FD), IsOnePerTeam(IsOnePerTeam) {}
+  };
+  /// The map of local variables to their addresses in the global memory.
+  using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
+  /// Set of the parameters passed by value escaping OpenMP context.
+  using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
+  struct FunctionData {
+    DeclToAddrMapTy LocalVarData;
+    llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
+    EscapedParamsTy EscapedParameters;
+    llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
+    llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
+    const RecordDecl *GlobalRecord = nullptr;
+    llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None;
+    llvm::Value *GlobalRecordAddr = nullptr;
+    llvm::Value *IsInSPMDModeFlag = nullptr;
+    std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
+  };
+  /// Maps the function to the list of the globalized variables with their
+  /// addresses.
+  llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
+  /// List of records for the globalized variables in target/teams/distribute
+  /// contexts. Inner records are going to be joined into the single record,
+  /// while those resulting records are going to be joined into the single
+  /// union. This resulting union (one per CU) is the entry point for the static
+  /// memory management runtime functions.
+  struct GlobalPtrSizeRecsTy {
+    llvm::GlobalVariable *UseSharedMemory = nullptr;
+    llvm::GlobalVariable *RecSize = nullptr;
+    llvm::GlobalVariable *Buffer = nullptr;
+    SourceLocation Loc;
+    llvm::SmallVector<const RecordDecl *, 2> Records;
+    unsigned RegionCounter = 0;
+  };
+  llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords;
+  llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr;
+  /// List of the records with the list of fields for the reductions across the
+  /// teams. Used to build the intermediate buffer for the fast teams
+  /// reductions.
+  /// All the records are gathered into a union `union.type` is created.
+  llvm::SmallVector<const RecordDecl *, 4> TeamsReductions;
+  /// Shared pointer for the global memory in the global memory buffer used for
+  /// the given kernel.
+  llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
+  /// Pair of the Non-SPMD team and all reductions variables in this team
+  /// region.
+  std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
+      TeamAndReductions;
+};
+
+} // CodeGen namespace.
+} // clang namespace.
+
+#endif // LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMEGPU_H
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
index ac6ec742335c8..5fefc95ee4130 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.cpp
@@ -7,11 +7,12 @@
 //===----------------------------------------------------------------------===//
 //
 // This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets.
+// targets from generalized CGOpenMPRuntimeGPU class.
 //
 //===----------------------------------------------------------------------===//
 
 #include "CGOpenMPRuntimeNVPTX.h"
+#include "CGOpenMPRuntimeGPU.h"
 #include "CodeGenFunction.h"
 #include "clang/AST/Attr.h"
 #include "clang/AST/DeclOpenMP.h"
@@ -25,5212 +26,16 @@ using namespace clang;
 using namespace CodeGen;
 using namespace llvm::omp;
 
-namespace {
-enum OpenMPRTLFunctionNVPTX {
-  /// Call to void __kmpc_kernel_init(kmp_int32 thread_limit,
-  /// int16_t RequiresOMPRuntime);
-  OMPRTL_NVPTX__kmpc_kernel_init,
-  /// Call to void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
-  OMPRTL_NVPTX__kmpc_kernel_deinit,
-  /// Call to void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
-  /// int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
-  OMPRTL_NVPTX__kmpc_spmd_kernel_init,
-  /// Call to void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
-  OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2,
-  /// Call to void __kmpc_kernel_prepare_parallel(void
-  /// *outlined_function);
-  OMPRTL_NVPTX__kmpc_kernel_prepare_parallel,
-  /// Call to bool __kmpc_kernel_parallel(void **outlined_function);
-  OMPRTL_NVPTX__kmpc_kernel_parallel,
-  /// Call to void __kmpc_kernel_end_parallel();
-  OMPRTL_NVPTX__kmpc_kernel_end_parallel,
-  /// Call to void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL_NVPTX__kmpc_serialized_parallel,
-  /// Call to void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL_NVPTX__kmpc_end_serialized_parallel,
-  /// Call to int32_t __kmpc_shuffle_int32(int32_t element,
-  /// int16_t lane_offset, int16_t warp_size);
-  OMPRTL_NVPTX__kmpc_shuffle_int32,
-  /// Call to int64_t __kmpc_shuffle_int64(int64_t element,
-  /// int16_t lane_offset, int16_t warp_size);
-  OMPRTL_NVPTX__kmpc_shuffle_int64,
-  /// Call to __kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc, kmp_int32
-  /// global_tid, kmp_int32 num_vars, size_t reduce_size, void* reduce_data,
-  /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
-  /// lane_offset, int16_t shortCircuit),
-  /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num));
-  OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2,
-  /// Call to __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
-  /// global_tid, void *global_buffer, int32_t num_of_records, void*
-  /// reduce_data,
-  /// void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
-  /// lane_offset, int16_t shortCircuit),
-  /// void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
-  /// (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
-  /// void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
-  /// void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
-  /// int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
-  /// *buffer, int idx, void *reduce_data));
-  OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2,
-  /// Call to __kmpc_nvptx_end_reduce_nowait(int32_t global_tid);
-  OMPRTL_NVPTX__kmpc_end_reduce_nowait,
-  /// Call to void __kmpc_data_sharing_init_stack();
-  OMPRTL_NVPTX__kmpc_data_sharing_init_stack,
-  /// Call to void __kmpc_data_sharing_init_stack_spmd();
-  OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd,
-  /// Call to void* __kmpc_data_sharing_coalesced_push_stack(size_t size,
-  /// int16_t UseSharedMemory);
-  OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack,
-  /// Call to void* __kmpc_data_sharing_push_stack(size_t size, int16_t
-  /// UseSharedMemory);
-  OMPRTL_NVPTX__kmpc_data_sharing_push_stack,
-  /// Call to void __kmpc_data_sharing_pop_stack(void *a);
-  OMPRTL_NVPTX__kmpc_data_sharing_pop_stack,
-  /// Call to void __kmpc_begin_sharing_variables(void ***args,
-  /// size_t n_args);
-  OMPRTL_NVPTX__kmpc_begin_sharing_variables,
-  /// Call to void __kmpc_end_sharing_variables();
-  OMPRTL_NVPTX__kmpc_end_sharing_variables,
-  /// Call to void __kmpc_get_shared_variables(void ***GlobalArgs)
-  OMPRTL_NVPTX__kmpc_get_shared_variables,
-  /// Call to uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL_NVPTX__kmpc_parallel_level,
-  /// Call to int8_t __kmpc_is_spmd_exec_mode();
-  OMPRTL_NVPTX__kmpc_is_spmd_exec_mode,
-  /// Call to void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
-  /// const void *buf, size_t size, int16_t is_shared, const void **res);
-  OMPRTL_NVPTX__kmpc_get_team_static_memory,
-  /// Call to void __kmpc_restore_team_static_memory(int16_t
-  /// isSPMDExecutionMode, int16_t is_shared);
-  OMPRTL_NVPTX__kmpc_restore_team_static_memory,
-  /// Call to void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
-  OMPRTL__kmpc_barrier,
-  /// Call to void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
-  /// global_tid);
-  OMPRTL__kmpc_barrier_simple_spmd,
-  /// Call to int32_t __kmpc_warp_active_thread_mask(void);
-  OMPRTL_NVPTX__kmpc_warp_active_thread_mask,
-  /// Call to void __kmpc_syncwarp(int32_t Mask);
-  OMPRTL_NVPTX__kmpc_syncwarp,
-};
-
-/// Pre(post)-action for different OpenMP constructs specialized for NVPTX.
-class NVPTXActionTy final : public PrePostActionTy {
-  llvm::FunctionCallee EnterCallee = nullptr;
-  ArrayRef<llvm::Value *> EnterArgs;
-  llvm::FunctionCallee ExitCallee = nullptr;
-  ArrayRef<llvm::Value *> ExitArgs;
-  bool Conditional = false;
-  llvm::BasicBlock *ContBlock = nullptr;
-
-public:
-  NVPTXActionTy(llvm::FunctionCallee EnterCallee,
-                ArrayRef<llvm::Value *> EnterArgs,
-                llvm::FunctionCallee ExitCallee,
-                ArrayRef<llvm::Value *> ExitArgs, bool Conditional = false)
-      : EnterCallee(EnterCallee), EnterArgs(EnterArgs), ExitCallee(ExitCallee),
-        ExitArgs(ExitArgs), Conditional(Conditional) {}
-  void Enter(CodeGenFunction &CGF) override {
-    llvm::Value *EnterRes = CGF.EmitRuntimeCall(EnterCallee, EnterArgs);
-    if (Conditional) {
-      llvm::Value *CallBool = CGF.Builder.CreateIsNotNull(EnterRes);
-      auto *ThenBlock = CGF.createBasicBlock("omp_if.then");
-      ContBlock = CGF.createBasicBlock("omp_if.end");
-      // Generate the branch (If-stmt)
-      CGF.Builder.CreateCondBr(CallBool, ThenBlock, ContBlock);
-      CGF.EmitBlock(ThenBlock);
-    }
-  }
-  void Done(CodeGenFunction &CGF) {
-    // Emit the rest of blocks/branches
-    CGF.EmitBranch(ContBlock);
-    CGF.EmitBlock(ContBlock, true);
-  }
-  void Exit(CodeGenFunction &CGF) override {
-    CGF.EmitRuntimeCall(ExitCallee, ExitArgs);
-  }
-};
-
-/// A class to track the execution mode when codegening directives within
-/// a target region. The appropriate mode (SPMD|NON-SPMD) is set on entry
-/// to the target region and used by containing directives such as 'parallel'
-/// to emit optimized code.
-class ExecutionRuntimeModesRAII {
-private:
-  CGOpenMPRuntimeNVPTX::ExecutionMode SavedExecMode =
-      CGOpenMPRuntimeNVPTX::EM_Unknown;
-  CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode;
-  bool SavedRuntimeMode = false;
-  bool *RuntimeMode = nullptr;
-
-public:
-  /// Constructor for Non-SPMD mode.
-  ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode)
-      : ExecMode(ExecMode) {
-    SavedExecMode = ExecMode;
-    ExecMode = CGOpenMPRuntimeNVPTX::EM_NonSPMD;
-  }
-  /// Constructor for SPMD mode.
-  ExecutionRuntimeModesRAII(CGOpenMPRuntimeNVPTX::ExecutionMode &ExecMode,
-                            bool &RuntimeMode, bool FullRuntimeMode)
-      : ExecMode(ExecMode), RuntimeMode(&RuntimeMode) {
-    SavedExecMode = ExecMode;
-    SavedRuntimeMode = RuntimeMode;
-    ExecMode = CGOpenMPRuntimeNVPTX::EM_SPMD;
-    RuntimeMode = FullRuntimeMode;
-  }
-  ~ExecutionRuntimeModesRAII() {
-    ExecMode = SavedExecMode;
-    if (RuntimeMode)
-      *RuntimeMode = SavedRuntimeMode;
-  }
-};
-
-/// GPU Configuration:  This information can be derived from cuda registers,
-/// however, providing compile time constants helps generate more efficient
-/// code.  For all practical purposes this is fine because the configuration
-/// is the same for all known NVPTX architectures.
-enum MachineConfiguration : unsigned {
-  WarpSize = 32,
-  /// Number of bits required to represent a lane identifier, which is
-  /// computed as log_2(WarpSize).
-  LaneIDBits = 5,
-  LaneIDMask = WarpSize - 1,
-
-  /// Global memory alignment for performance.
-  GlobalMemoryAlignment = 128,
-
-  /// Maximal size of the shared memory buffer.
-  SharedMemorySize = 128,
-};
-
-static const ValueDecl *getPrivateItem(const Expr *RefExpr) {
-  RefExpr = RefExpr->IgnoreParens();
-  if (const auto *ASE = dyn_cast<ArraySubscriptExpr>(RefExpr)) {
-    const Expr *Base = ASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
-      Base = TempASE->getBase()->IgnoreParenImpCasts();
-    RefExpr = Base;
-  } else if (auto *OASE = dyn_cast<OMPArraySectionExpr>(RefExpr)) {
-    const Expr *Base = OASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempOASE = dyn_cast<OMPArraySectionExpr>(Base))
-      Base = TempOASE->getBase()->IgnoreParenImpCasts();
-    while (const auto *TempASE = dyn_cast<ArraySubscriptExpr>(Base))
-      Base = TempASE->getBase()->IgnoreParenImpCasts();
-    RefExpr = Base;
-  }
-  RefExpr = RefExpr->IgnoreParenImpCasts();
-  if (const auto *DE = dyn_cast<DeclRefExpr>(RefExpr))
-    return cast<ValueDecl>(DE->getDecl()->getCanonicalDecl());
-  const auto *ME = cast<MemberExpr>(RefExpr);
-  return cast<ValueDecl>(ME->getMemberDecl()->getCanonicalDecl());
-}
-
-
-static RecordDecl *buildRecordForGlobalizedVars(
-    ASTContext &C, ArrayRef<const ValueDecl *> EscapedDecls,
-    ArrayRef<const ValueDecl *> EscapedDeclsForTeams,
-    llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
-        &MappedDeclsFields, int BufSize) {
-  using VarsDataTy = std::pair<CharUnits /*Align*/, const ValueDecl *>;
-  if (EscapedDecls.empty() && EscapedDeclsForTeams.empty())
-    return nullptr;
-  SmallVector<VarsDataTy, 4> GlobalizedVars;
-  for (const ValueDecl *D : EscapedDecls)
-    GlobalizedVars.emplace_back(
-        CharUnits::fromQuantity(std::max(
-            C.getDeclAlign(D).getQuantity(),
-            static_cast<CharUnits::QuantityType>(GlobalMemoryAlignment))),
-        D);
-  for (const ValueDecl *D : EscapedDeclsForTeams)
-    GlobalizedVars.emplace_back(C.getDeclAlign(D), D);
-  llvm::stable_sort(GlobalizedVars, [](VarsDataTy L, VarsDataTy R) {
-    return L.first > R.first;
-  });
-
-  // Build struct _globalized_locals_ty {
-  //         /*  globalized vars  */[WarSize] align (max(decl_align,
-  //         GlobalMemoryAlignment))
-  //         /*  globalized vars  */ for EscapedDeclsForTeams
-  //       };
-  RecordDecl *GlobalizedRD = C.buildImplicitRecord("_globalized_locals_ty");
-  GlobalizedRD->startDefinition();
-  llvm::SmallPtrSet<const ValueDecl *, 16> SingleEscaped(
-      EscapedDeclsForTeams.begin(), EscapedDeclsForTeams.end());
-  for (const auto &Pair : GlobalizedVars) {
-    const ValueDecl *VD = Pair.second;
-    QualType Type = VD->getType();
-    if (Type->isLValueReferenceType())
-      Type = C.getPointerType(Type.getNonReferenceType());
-    else
-      Type = Type.getNonReferenceType();
-    SourceLocation Loc = VD->getLocation();
-    FieldDecl *Field;
-    if (SingleEscaped.count(VD)) {
-      Field = FieldDecl::Create(
-          C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
-          C.getTrivialTypeSourceInfo(Type, SourceLocation()),
-          /*BW=*/nullptr, /*Mutable=*/false,
-          /*InitStyle=*/ICIS_NoInit);
-      Field->setAccess(AS_public);
-      if (VD->hasAttrs()) {
-        for (specific_attr_iterator<AlignedAttr> I(VD->getAttrs().begin()),
-             E(VD->getAttrs().end());
-             I != E; ++I)
-          Field->addAttr(*I);
-      }
-    } else {
-      llvm::APInt ArraySize(32, BufSize);
-      Type = C.getConstantArrayType(Type, ArraySize, nullptr, ArrayType::Normal,
-                                    0);
-      Field = FieldDecl::Create(
-          C, GlobalizedRD, Loc, Loc, VD->getIdentifier(), Type,
-          C.getTrivialTypeSourceInfo(Type, SourceLocation()),
-          /*BW=*/nullptr, /*Mutable=*/false,
-          /*InitStyle=*/ICIS_NoInit);
-      Field->setAccess(AS_public);
-      llvm::APInt Align(32, std::max(C.getDeclAlign(VD).getQuantity(),
-                                     static_cast<CharUnits::QuantityType>(
-                                         GlobalMemoryAlignment)));
-      Field->addAttr(AlignedAttr::CreateImplicit(
-          C, /*IsAlignmentExpr=*/true,
-          IntegerLiteral::Create(C, Align,
-                                 C.getIntTypeForBitwidth(32, /*Signed=*/0),
-                                 SourceLocation()),
-          {}, AttributeCommonInfo::AS_GNU, AlignedAttr::GNU_aligned));
-    }
-    GlobalizedRD->addDecl(Field);
-    MappedDeclsFields.try_emplace(VD, Field);
-  }
-  GlobalizedRD->completeDefinition();
-  return GlobalizedRD;
+CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
+    : CGOpenMPRuntimeGPU(CGM) {
+  if (!CGM.getLangOpts().OpenMPIsDevice)
+    llvm_unreachable("OpenMP NVPTX can only handle device code.");
 }
 
-/// Get the list of variables that can escape their declaration context.
-class CheckVarsEscapingDeclContext final
-    : public ConstStmtVisitor<CheckVarsEscapingDeclContext> {
-  CodeGenFunction &CGF;
-  llvm::SetVector<const ValueDecl *> EscapedDecls;
-  llvm::SetVector<const ValueDecl *> EscapedVariableLengthDecls;
-  llvm::SmallPtrSet<const Decl *, 4> EscapedParameters;
-  RecordDecl *GlobalizedRD = nullptr;
-  llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
-  bool AllEscaped = false;
-  bool IsForCombinedParallelRegion = false;
-
-  void markAsEscaped(const ValueDecl *VD) {
-    // Do not globalize declare target variables.
-    if (!isa<VarDecl>(VD) ||
-        OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD))
-      return;
-    VD = cast<ValueDecl>(VD->getCanonicalDecl());
-    // Use user-specified allocation.
-    if (VD->hasAttrs() && VD->hasAttr<OMPAllocateDeclAttr>())
-      return;
-    // Variables captured by value must be globalized.
-    if (auto *CSI = CGF.CapturedStmtInfo) {
-      if (const FieldDecl *FD = CSI->lookup(cast<VarDecl>(VD))) {
-        // Check if need to capture the variable that was already captured by
-        // value in the outer region.
-        if (!IsForCombinedParallelRegion) {
-          if (!FD->hasAttrs())
-            return;
-          const auto *Attr = FD->getAttr<OMPCaptureKindAttr>();
-          if (!Attr)
-            return;
-          if (((Attr->getCaptureKind() != OMPC_map) &&
-               !isOpenMPPrivate(Attr->getCaptureKind())) ||
-              ((Attr->getCaptureKind() == OMPC_map) &&
-               !FD->getType()->isAnyPointerType()))
-            return;
-        }
-        if (!FD->getType()->isReferenceType()) {
-          assert(!VD->getType()->isVariablyModifiedType() &&
-                 "Parameter captured by value with variably modified type");
-          EscapedParameters.insert(VD);
-        } else if (!IsForCombinedParallelRegion) {
-          return;
-        }
-      }
-    }
-    if ((!CGF.CapturedStmtInfo ||
-         (IsForCombinedParallelRegion && CGF.CapturedStmtInfo)) &&
-        VD->getType()->isReferenceType())
-      // Do not globalize variables with reference type.
-      return;
-    if (VD->getType()->isVariablyModifiedType())
-      EscapedVariableLengthDecls.insert(VD);
-    else
-      EscapedDecls.insert(VD);
-  }
-
-  void VisitValueDecl(const ValueDecl *VD) {
-    if (VD->getType()->isLValueReferenceType())
-      markAsEscaped(VD);
-    if (const auto *VarD = dyn_cast<VarDecl>(VD)) {
-      if (!isa<ParmVarDecl>(VarD) && VarD->hasInit()) {
-        const bool SavedAllEscaped = AllEscaped;
-        AllEscaped = VD->getType()->isLValueReferenceType();
-        Visit(VarD->getInit());
-        AllEscaped = SavedAllEscaped;
-      }
-    }
-  }
-  void VisitOpenMPCapturedStmt(const CapturedStmt *S,
-                               ArrayRef<OMPClause *> Clauses,
-                               bool IsCombinedParallelRegion) {
-    if (!S)
-      return;
-    for (const CapturedStmt::Capture &C : S->captures()) {
-      if (C.capturesVariable() && !C.capturesVariableByCopy()) {
-        const ValueDecl *VD = C.getCapturedVar();
-        bool SavedIsForCombinedParallelRegion = IsForCombinedParallelRegion;
-        if (IsCombinedParallelRegion) {
-          // Check if the variable is privatized in the combined construct and
-          // those private copies must be shared in the inner parallel
-          // directive.
-          IsForCombinedParallelRegion = false;
-          for (const OMPClause *C : Clauses) {
-            if (!isOpenMPPrivate(C->getClauseKind()) ||
-                C->getClauseKind() == OMPC_reduction ||
-                C->getClauseKind() == OMPC_linear ||
-                C->getClauseKind() == OMPC_private)
-              continue;
-            ArrayRef<const Expr *> Vars;
-            if (const auto *PC = dyn_cast<OMPFirstprivateClause>(C))
-              Vars = PC->getVarRefs();
-            else if (const auto *PC = dyn_cast<OMPLastprivateClause>(C))
-              Vars = PC->getVarRefs();
-            else
-              llvm_unreachable("Unexpected clause.");
-            for (const auto *E : Vars) {
-              const Decl *D =
-                  cast<DeclRefExpr>(E)->getDecl()->getCanonicalDecl();
-              if (D == VD->getCanonicalDecl()) {
-                IsForCombinedParallelRegion = true;
-                break;
-              }
-            }
-            if (IsForCombinedParallelRegion)
-              break;
-          }
-        }
-        markAsEscaped(VD);
-        if (isa<OMPCapturedExprDecl>(VD))
-          VisitValueDecl(VD);
-        IsForCombinedParallelRegion = SavedIsForCombinedParallelRegion;
-      }
-    }
-  }
-
-  void buildRecordForGlobalizedVars(bool IsInTTDRegion) {
-    assert(!GlobalizedRD &&
-           "Record for globalized variables is built already.");
-    ArrayRef<const ValueDecl *> EscapedDeclsForParallel, EscapedDeclsForTeams;
-    if (IsInTTDRegion)
-      EscapedDeclsForTeams = EscapedDecls.getArrayRef();
-    else
-      EscapedDeclsForParallel = EscapedDecls.getArrayRef();
-    GlobalizedRD = ::buildRecordForGlobalizedVars(
-        CGF.getContext(), EscapedDeclsForParallel, EscapedDeclsForTeams,
-        MappedDeclsFields, WarpSize);
-  }
-
-public:
-  CheckVarsEscapingDeclContext(CodeGenFunction &CGF,
-                               ArrayRef<const ValueDecl *> TeamsReductions)
-      : CGF(CGF), EscapedDecls(TeamsReductions.begin(), TeamsReductions.end()) {
-  }
-  virtual ~CheckVarsEscapingDeclContext() = default;
-  void VisitDeclStmt(const DeclStmt *S) {
-    if (!S)
-      return;
-    for (const Decl *D : S->decls())
-      if (const auto *VD = dyn_cast_or_null<ValueDecl>(D))
-        VisitValueDecl(VD);
-  }
-  void VisitOMPExecutableDirective(const OMPExecutableDirective *D) {
-    if (!D)
-      return;
-    if (!D->hasAssociatedStmt())
-      return;
-    if (const auto *S =
-            dyn_cast_or_null<CapturedStmt>(D->getAssociatedStmt())) {
-      // Do not analyze directives that do not actually require capturing,
-      // like `omp for` or `omp simd` directives.
-      llvm::SmallVector<OpenMPDirectiveKind, 4> CaptureRegions;
-      getOpenMPCaptureRegions(CaptureRegions, D->getDirectiveKind());
-      if (CaptureRegions.size() == 1 && CaptureRegions.back() == OMPD_unknown) {
-        VisitStmt(S->getCapturedStmt());
-        return;
-      }
-      VisitOpenMPCapturedStmt(
-          S, D->clauses(),
-          CaptureRegions.back() == OMPD_parallel &&
-              isOpenMPDistributeDirective(D->getDirectiveKind()));
-    }
-  }
-  void VisitCapturedStmt(const CapturedStmt *S) {
-    if (!S)
-      return;
-    for (const CapturedStmt::Capture &C : S->captures()) {
-      if (C.capturesVariable() && !C.capturesVariableByCopy()) {
-        const ValueDecl *VD = C.getCapturedVar();
-        markAsEscaped(VD);
-        if (isa<OMPCapturedExprDecl>(VD))
-          VisitValueDecl(VD);
-      }
-    }
-  }
-  void VisitLambdaExpr(const LambdaExpr *E) {
-    if (!E)
-      return;
-    for (const LambdaCapture &C : E->captures()) {
-      if (C.capturesVariable()) {
-        if (C.getCaptureKind() == LCK_ByRef) {
-          const ValueDecl *VD = C.getCapturedVar();
-          markAsEscaped(VD);
-          if (E->isInitCapture(&C) || isa<OMPCapturedExprDecl>(VD))
-            VisitValueDecl(VD);
-        }
-      }
-    }
-  }
-  void VisitBlockExpr(const BlockExpr *E) {
-    if (!E)
-      return;
-    for (const BlockDecl::Capture &C : E->getBlockDecl()->captures()) {
-      if (C.isByRef()) {
-        const VarDecl *VD = C.getVariable();
-        markAsEscaped(VD);
-        if (isa<OMPCapturedExprDecl>(VD) || VD->isInitCapture())
-          VisitValueDecl(VD);
-      }
-    }
-  }
-  void VisitCallExpr(const CallExpr *E) {
-    if (!E)
-      return;
-    for (const Expr *Arg : E->arguments()) {
-      if (!Arg)
-        continue;
-      if (Arg->isLValue()) {
-        const bool SavedAllEscaped = AllEscaped;
-        AllEscaped = true;
-        Visit(Arg);
-        AllEscaped = SavedAllEscaped;
-      } else {
-        Visit(Arg);
-      }
-    }
-    Visit(E->getCallee());
-  }
-  void VisitDeclRefExpr(const DeclRefExpr *E) {
-    if (!E)
-      return;
-    const ValueDecl *VD = E->getDecl();
-    if (AllEscaped)
-      markAsEscaped(VD);
-    if (isa<OMPCapturedExprDecl>(VD))
-      VisitValueDecl(VD);
-    else if (const auto *VarD = dyn_cast<VarDecl>(VD))
-      if (VarD->isInitCapture())
-        VisitValueDecl(VD);
-  }
-  void VisitUnaryOperator(const UnaryOperator *E) {
-    if (!E)
-      return;
-    if (E->getOpcode() == UO_AddrOf) {
-      const bool SavedAllEscaped = AllEscaped;
-      AllEscaped = true;
-      Visit(E->getSubExpr());
-      AllEscaped = SavedAllEscaped;
-    } else {
-      Visit(E->getSubExpr());
-    }
-  }
-  void VisitImplicitCastExpr(const ImplicitCastExpr *E) {
-    if (!E)
-      return;
-    if (E->getCastKind() == CK_ArrayToPointerDecay) {
-      const bool SavedAllEscaped = AllEscaped;
-      AllEscaped = true;
-      Visit(E->getSubExpr());
-      AllEscaped = SavedAllEscaped;
-    } else {
-      Visit(E->getSubExpr());
-    }
-  }
-  void VisitExpr(const Expr *E) {
-    if (!E)
-      return;
-    bool SavedAllEscaped = AllEscaped;
-    if (!E->isLValue())
-      AllEscaped = false;
-    for (const Stmt *Child : E->children())
-      if (Child)
-        Visit(Child);
-    AllEscaped = SavedAllEscaped;
-  }
-  void VisitStmt(const Stmt *S) {
-    if (!S)
-      return;
-    for (const Stmt *Child : S->children())
-      if (Child)
-        Visit(Child);
-  }
-
-  /// Returns the record that handles all the escaped local variables and used
-  /// instead of their original storage.
-  const RecordDecl *getGlobalizedRecord(bool IsInTTDRegion) {
-    if (!GlobalizedRD)
-      buildRecordForGlobalizedVars(IsInTTDRegion);
-    return GlobalizedRD;
-  }
-
-  /// Returns the field in the globalized record for the escaped variable.
-  const FieldDecl *getFieldForGlobalizedVar(const ValueDecl *VD) const {
-    assert(GlobalizedRD &&
-           "Record for globalized variables must be generated already.");
-    auto I = MappedDeclsFields.find(VD);
-    if (I == MappedDeclsFields.end())
-      return nullptr;
-    return I->getSecond();
-  }
-
-  /// Returns the list of the escaped local variables/parameters.
-  ArrayRef<const ValueDecl *> getEscapedDecls() const {
-    return EscapedDecls.getArrayRef();
-  }
-
-  /// Checks if the escaped local variable is actually a parameter passed by
-  /// value.
-  const llvm::SmallPtrSetImpl<const Decl *> &getEscapedParameters() const {
-    return EscapedParameters;
-  }
-
-  /// Returns the list of the escaped variables with the variably modified
-  /// types.
-  ArrayRef<const ValueDecl *> getEscapedVariableLengthDecls() const {
-    return EscapedVariableLengthDecls.getArrayRef();
-  }
-};
-} // anonymous namespace
-
 /// Get the GPU warp size.
-static llvm::Value *getNVPTXWarpSize(CodeGenFunction &CGF) {
+llvm::Value *CGOpenMPRuntimeNVPTX::getGPUWarpSize(CodeGenFunction &CGF) {
   return CGF.EmitRuntimeCall(
       llvm::Intrinsic::getDeclaration(
           &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_warpsize),
       "nvptx_warp_size");
 }
-
-/// Get the id of the current thread on the GPU.
-static llvm::Value *getNVPTXThreadID(CodeGenFunction &CGF) {
-  return CGF.EmitRuntimeCall(
-      llvm::Intrinsic::getDeclaration(
-          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_tid_x),
-      "nvptx_tid");
-}
-
-/// Get the id of the warp in the block.
-/// We assume that the warp size is 32, which is always the case
-/// on the NVPTX device, to generate more efficient code.
-static llvm::Value *getNVPTXWarpID(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  return Bld.CreateAShr(getNVPTXThreadID(CGF), LaneIDBits, "nvptx_warp_id");
-}
-
-/// Get the id of the current lane in the Warp.
-/// We assume that the warp size is 32, which is always the case
-/// on the NVPTX device, to generate more efficient code.
-static llvm::Value *getNVPTXLaneID(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  return Bld.CreateAnd(getNVPTXThreadID(CGF), Bld.getInt32(LaneIDMask),
-                       "nvptx_lane_id");
-}
-
-/// Get the maximum number of threads in a block of the GPU.
-static llvm::Value *getNVPTXNumThreads(CodeGenFunction &CGF) {
-  return CGF.EmitRuntimeCall(
-      llvm::Intrinsic::getDeclaration(
-          &CGF.CGM.getModule(), llvm::Intrinsic::nvvm_read_ptx_sreg_ntid_x),
-      "nvptx_num_threads");
-}
-
-/// Get the value of the thread_limit clause in the teams directive.
-/// For the 'generic' execution mode, the runtime encodes thread_limit in
-/// the launch parameters, always starting thread_limit+warpSize threads per
-/// CTA. The threads in the last warp are reserved for master execution.
-/// For the 'spmd' execution mode, all threads in a CTA are part of the team.
-static llvm::Value *getThreadLimit(CodeGenFunction &CGF,
-                                   bool IsInSPMDExecutionMode = false) {
-  CGBuilderTy &Bld = CGF.Builder;
-  return IsInSPMDExecutionMode
-             ? getNVPTXNumThreads(CGF)
-             : Bld.CreateNUWSub(getNVPTXNumThreads(CGF), getNVPTXWarpSize(CGF),
-                                "thread_limit");
-}
-
-/// Get the thread id of the OMP master thread.
-/// The master thread id is the first thread (lane) of the last warp in the
-/// GPU block.  Warp size is assumed to be some power of 2.
-/// Thread id is 0 indexed.
-/// E.g: If NumThreads is 33, master id is 32.
-///      If NumThreads is 64, master id is 32.
-///      If NumThreads is 1024, master id is 992.
-static llvm::Value *getMasterThreadID(CodeGenFunction &CGF) {
-  CGBuilderTy &Bld = CGF.Builder;
-  llvm::Value *NumThreads = getNVPTXNumThreads(CGF);
-
-  // We assume that the warp size is a power of 2.
-  llvm::Value *Mask = Bld.CreateNUWSub(getNVPTXWarpSize(CGF), Bld.getInt32(1));
-
-  return Bld.CreateAnd(Bld.CreateNUWSub(NumThreads, Bld.getInt32(1)),
-                       Bld.CreateNot(Mask), "master_tid");
-}
-
-CGOpenMPRuntimeNVPTX::WorkerFunctionState::WorkerFunctionState(
-    CodeGenModule &CGM, SourceLocation Loc)
-    : WorkerFn(nullptr), CGFI(CGM.getTypes().arrangeNullaryFunction()),
-      Loc(Loc) {
-  createWorkerFunction(CGM);
-}
-
-void CGOpenMPRuntimeNVPTX::WorkerFunctionState::createWorkerFunction(
-    CodeGenModule &CGM) {
-  // Create an worker function with no arguments.
-
-  WorkerFn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      /*placeholder=*/"_worker", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), WorkerFn, CGFI);
-  WorkerFn->setDoesNotRecurse();
-}
-
-CGOpenMPRuntimeNVPTX::ExecutionMode
-CGOpenMPRuntimeNVPTX::getExecutionMode() const {
-  return CurrentExecutionMode;
-}
-
-static CGOpenMPRuntimeNVPTX::DataSharingMode
-getDataSharingMode(CodeGenModule &CGM) {
-  return CGM.getLangOpts().OpenMPCUDAMode ? CGOpenMPRuntimeNVPTX::CUDA
-                                          : CGOpenMPRuntimeNVPTX::Generic;
-}
-
-/// Check for inner (nested) SPMD construct, if any
-static bool hasNestedSPMDDirective(ASTContext &Ctx,
-                                   const OMPExecutableDirective &D) {
-  const auto *CS = D.getInnermostCapturedStmt();
-  const auto *Body =
-      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
-  const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-
-  if (const auto *NestedDir =
-          dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
-    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
-    switch (D.getDirectiveKind()) {
-    case OMPD_target:
-      if (isOpenMPParallelDirective(DKind))
-        return true;
-      if (DKind == OMPD_teams) {
-        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
-            /*IgnoreCaptured=*/true);
-        if (!Body)
-          return false;
-        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-        if (const auto *NND =
-                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
-          DKind = NND->getDirectiveKind();
-          if (isOpenMPParallelDirective(DKind))
-            return true;
-        }
-      }
-      return false;
-    case OMPD_target_teams:
-      return isOpenMPParallelDirective(DKind);
-    case OMPD_target_simd:
-    case OMPD_target_parallel:
-    case OMPD_target_parallel_for:
-    case OMPD_target_parallel_for_simd:
-    case OMPD_target_teams_distribute:
-    case OMPD_target_teams_distribute_simd:
-    case OMPD_target_teams_distribute_parallel_for:
-    case OMPD_target_teams_distribute_parallel_for_simd:
-    case OMPD_parallel:
-    case OMPD_for:
-    case OMPD_parallel_for:
-    case OMPD_parallel_master:
-    case OMPD_parallel_sections:
-    case OMPD_for_simd:
-    case OMPD_parallel_for_simd:
-    case OMPD_cancel:
-    case OMPD_cancellation_point:
-    case OMPD_ordered:
-    case OMPD_threadprivate:
-    case OMPD_allocate:
-    case OMPD_task:
-    case OMPD_simd:
-    case OMPD_sections:
-    case OMPD_section:
-    case OMPD_single:
-    case OMPD_master:
-    case OMPD_critical:
-    case OMPD_taskyield:
-    case OMPD_barrier:
-    case OMPD_taskwait:
-    case OMPD_taskgroup:
-    case OMPD_atomic:
-    case OMPD_flush:
-    case OMPD_depobj:
-    case OMPD_scan:
-    case OMPD_teams:
-    case OMPD_target_data:
-    case OMPD_target_exit_data:
-    case OMPD_target_enter_data:
-    case OMPD_distribute:
-    case OMPD_distribute_simd:
-    case OMPD_distribute_parallel_for:
-    case OMPD_distribute_parallel_for_simd:
-    case OMPD_teams_distribute:
-    case OMPD_teams_distribute_simd:
-    case OMPD_teams_distribute_parallel_for:
-    case OMPD_teams_distribute_parallel_for_simd:
-    case OMPD_target_update:
-    case OMPD_declare_simd:
-    case OMPD_declare_variant:
-    case OMPD_begin_declare_variant:
-    case OMPD_end_declare_variant:
-    case OMPD_declare_target:
-    case OMPD_end_declare_target:
-    case OMPD_declare_reduction:
-    case OMPD_declare_mapper:
-    case OMPD_taskloop:
-    case OMPD_taskloop_simd:
-    case OMPD_master_taskloop:
-    case OMPD_master_taskloop_simd:
-    case OMPD_parallel_master_taskloop:
-    case OMPD_parallel_master_taskloop_simd:
-    case OMPD_requires:
-    case OMPD_unknown:
-    default:
-      llvm_unreachable("Unexpected directive.");
-    }
-  }
-
-  return false;
-}
-
-static bool supportsSPMDExecutionMode(ASTContext &Ctx,
-                                      const OMPExecutableDirective &D) {
-  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
-  switch (DirectiveKind) {
-  case OMPD_target:
-  case OMPD_target_teams:
-    return hasNestedSPMDDirective(Ctx, D);
-  case OMPD_target_parallel:
-  case OMPD_target_parallel_for:
-  case OMPD_target_parallel_for_simd:
-  case OMPD_target_teams_distribute_parallel_for:
-  case OMPD_target_teams_distribute_parallel_for_simd:
-  case OMPD_target_simd:
-  case OMPD_target_teams_distribute_simd:
-    return true;
-  case OMPD_target_teams_distribute:
-    return false;
-  case OMPD_parallel:
-  case OMPD_for:
-  case OMPD_parallel_for:
-  case OMPD_parallel_master:
-  case OMPD_parallel_sections:
-  case OMPD_for_simd:
-  case OMPD_parallel_for_simd:
-  case OMPD_cancel:
-  case OMPD_cancellation_point:
-  case OMPD_ordered:
-  case OMPD_threadprivate:
-  case OMPD_allocate:
-  case OMPD_task:
-  case OMPD_simd:
-  case OMPD_sections:
-  case OMPD_section:
-  case OMPD_single:
-  case OMPD_master:
-  case OMPD_critical:
-  case OMPD_taskyield:
-  case OMPD_barrier:
-  case OMPD_taskwait:
-  case OMPD_taskgroup:
-  case OMPD_atomic:
-  case OMPD_flush:
-  case OMPD_depobj:
-  case OMPD_scan:
-  case OMPD_teams:
-  case OMPD_target_data:
-  case OMPD_target_exit_data:
-  case OMPD_target_enter_data:
-  case OMPD_distribute:
-  case OMPD_distribute_simd:
-  case OMPD_distribute_parallel_for:
-  case OMPD_distribute_parallel_for_simd:
-  case OMPD_teams_distribute:
-  case OMPD_teams_distribute_simd:
-  case OMPD_teams_distribute_parallel_for:
-  case OMPD_teams_distribute_parallel_for_simd:
-  case OMPD_target_update:
-  case OMPD_declare_simd:
-  case OMPD_declare_variant:
-  case OMPD_begin_declare_variant:
-  case OMPD_end_declare_variant:
-  case OMPD_declare_target:
-  case OMPD_end_declare_target:
-  case OMPD_declare_reduction:
-  case OMPD_declare_mapper:
-  case OMPD_taskloop:
-  case OMPD_taskloop_simd:
-  case OMPD_master_taskloop:
-  case OMPD_master_taskloop_simd:
-  case OMPD_parallel_master_taskloop:
-  case OMPD_parallel_master_taskloop_simd:
-  case OMPD_requires:
-  case OMPD_unknown:
-  default:
-    break;
-  }
-  llvm_unreachable(
-      "Unknown programming model for OpenMP directive on NVPTX target.");
-}
-
-/// Check if the directive is loops based and has schedule clause at all or has
-/// static scheduling.
-static bool hasStaticScheduling(const OMPExecutableDirective &D) {
-  assert(isOpenMPWorksharingDirective(D.getDirectiveKind()) &&
-         isOpenMPLoopDirective(D.getDirectiveKind()) &&
-         "Expected loop-based directive.");
-  return !D.hasClausesOfKind<OMPOrderedClause>() &&
-         (!D.hasClausesOfKind<OMPScheduleClause>() ||
-          llvm::any_of(D.getClausesOfKind<OMPScheduleClause>(),
-                       [](const OMPScheduleClause *C) {
-                         return C->getScheduleKind() == OMPC_SCHEDULE_static;
-                       }));
-}
-
-/// Check for inner (nested) lightweight runtime construct, if any
-static bool hasNestedLightweightDirective(ASTContext &Ctx,
-                                          const OMPExecutableDirective &D) {
-  assert(supportsSPMDExecutionMode(Ctx, D) && "Expected SPMD mode directive.");
-  const auto *CS = D.getInnermostCapturedStmt();
-  const auto *Body =
-      CS->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
-  const Stmt *ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-
-  if (const auto *NestedDir =
-          dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
-    OpenMPDirectiveKind DKind = NestedDir->getDirectiveKind();
-    switch (D.getDirectiveKind()) {
-    case OMPD_target:
-      if (isOpenMPParallelDirective(DKind) &&
-          isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
-          hasStaticScheduling(*NestedDir))
-        return true;
-      if (DKind == OMPD_teams_distribute_simd || DKind == OMPD_simd)
-        return true;
-      if (DKind == OMPD_parallel) {
-        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
-            /*IgnoreCaptured=*/true);
-        if (!Body)
-          return false;
-        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-        if (const auto *NND =
-                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
-          DKind = NND->getDirectiveKind();
-          if (isOpenMPWorksharingDirective(DKind) &&
-              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
-            return true;
-        }
-      } else if (DKind == OMPD_teams) {
-        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
-            /*IgnoreCaptured=*/true);
-        if (!Body)
-          return false;
-        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-        if (const auto *NND =
-                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
-          DKind = NND->getDirectiveKind();
-          if (isOpenMPParallelDirective(DKind) &&
-              isOpenMPWorksharingDirective(DKind) &&
-              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
-            return true;
-          if (DKind == OMPD_parallel) {
-            Body = NND->getInnermostCapturedStmt()->IgnoreContainers(
-                /*IgnoreCaptured=*/true);
-            if (!Body)
-              return false;
-            ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-            if (const auto *NND =
-                    dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
-              DKind = NND->getDirectiveKind();
-              if (isOpenMPWorksharingDirective(DKind) &&
-                  isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
-                return true;
-            }
-          }
-        }
-      }
-      return false;
-    case OMPD_target_teams:
-      if (isOpenMPParallelDirective(DKind) &&
-          isOpenMPWorksharingDirective(DKind) && isOpenMPLoopDirective(DKind) &&
-          hasStaticScheduling(*NestedDir))
-        return true;
-      if (DKind == OMPD_distribute_simd || DKind == OMPD_simd)
-        return true;
-      if (DKind == OMPD_parallel) {
-        Body = NestedDir->getInnermostCapturedStmt()->IgnoreContainers(
-            /*IgnoreCaptured=*/true);
-        if (!Body)
-          return false;
-        ChildStmt = CGOpenMPRuntime::getSingleCompoundChild(Ctx, Body);
-        if (const auto *NND =
-                dyn_cast_or_null<OMPExecutableDirective>(ChildStmt)) {
-          DKind = NND->getDirectiveKind();
-          if (isOpenMPWorksharingDirective(DKind) &&
-              isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NND))
-            return true;
-        }
-      }
-      return false;
-    case OMPD_target_parallel:
-      if (DKind == OMPD_simd)
-        return true;
-      return isOpenMPWorksharingDirective(DKind) &&
-             isOpenMPLoopDirective(DKind) && hasStaticScheduling(*NestedDir);
-    case OMPD_target_teams_distribute:
-    case OMPD_target_simd:
-    case OMPD_target_parallel_for:
-    case OMPD_target_parallel_for_simd:
-    case OMPD_target_teams_distribute_simd:
-    case OMPD_target_teams_distribute_parallel_for:
-    case OMPD_target_teams_distribute_parallel_for_simd:
-    case OMPD_parallel:
-    case OMPD_for:
-    case OMPD_parallel_for:
-    case OMPD_parallel_master:
-    case OMPD_parallel_sections:
-    case OMPD_for_simd:
-    case OMPD_parallel_for_simd:
-    case OMPD_cancel:
-    case OMPD_cancellation_point:
-    case OMPD_ordered:
-    case OMPD_threadprivate:
-    case OMPD_allocate:
-    case OMPD_task:
-    case OMPD_simd:
-    case OMPD_sections:
-    case OMPD_section:
-    case OMPD_single:
-    case OMPD_master:
-    case OMPD_critical:
-    case OMPD_taskyield:
-    case OMPD_barrier:
-    case OMPD_taskwait:
-    case OMPD_taskgroup:
-    case OMPD_atomic:
-    case OMPD_flush:
-    case OMPD_depobj:
-    case OMPD_scan:
-    case OMPD_teams:
-    case OMPD_target_data:
-    case OMPD_target_exit_data:
-    case OMPD_target_enter_data:
-    case OMPD_distribute:
-    case OMPD_distribute_simd:
-    case OMPD_distribute_parallel_for:
-    case OMPD_distribute_parallel_for_simd:
-    case OMPD_teams_distribute:
-    case OMPD_teams_distribute_simd:
-    case OMPD_teams_distribute_parallel_for:
-    case OMPD_teams_distribute_parallel_for_simd:
-    case OMPD_target_update:
-    case OMPD_declare_simd:
-    case OMPD_declare_variant:
-    case OMPD_begin_declare_variant:
-    case OMPD_end_declare_variant:
-    case OMPD_declare_target:
-    case OMPD_end_declare_target:
-    case OMPD_declare_reduction:
-    case OMPD_declare_mapper:
-    case OMPD_taskloop:
-    case OMPD_taskloop_simd:
-    case OMPD_master_taskloop:
-    case OMPD_master_taskloop_simd:
-    case OMPD_parallel_master_taskloop:
-    case OMPD_parallel_master_taskloop_simd:
-    case OMPD_requires:
-    case OMPD_unknown:
-    default:
-      llvm_unreachable("Unexpected directive.");
-    }
-  }
-
-  return false;
-}
-
-/// Checks if the construct supports lightweight runtime. It must be SPMD
-/// construct + inner loop-based construct with static scheduling.
-static bool supportsLightweightRuntime(ASTContext &Ctx,
-                                       const OMPExecutableDirective &D) {
-  if (!supportsSPMDExecutionMode(Ctx, D))
-    return false;
-  OpenMPDirectiveKind DirectiveKind = D.getDirectiveKind();
-  switch (DirectiveKind) {
-  case OMPD_target:
-  case OMPD_target_teams:
-  case OMPD_target_parallel:
-    return hasNestedLightweightDirective(Ctx, D);
-  case OMPD_target_parallel_for:
-  case OMPD_target_parallel_for_simd:
-  case OMPD_target_teams_distribute_parallel_for:
-  case OMPD_target_teams_distribute_parallel_for_simd:
-    // (Last|First)-privates must be shared in parallel region.
-    return hasStaticScheduling(D);
-  case OMPD_target_simd:
-  case OMPD_target_teams_distribute_simd:
-    return true;
-  case OMPD_target_teams_distribute:
-    return false;
-  case OMPD_parallel:
-  case OMPD_for:
-  case OMPD_parallel_for:
-  case OMPD_parallel_master:
-  case OMPD_parallel_sections:
-  case OMPD_for_simd:
-  case OMPD_parallel_for_simd:
-  case OMPD_cancel:
-  case OMPD_cancellation_point:
-  case OMPD_ordered:
-  case OMPD_threadprivate:
-  case OMPD_allocate:
-  case OMPD_task:
-  case OMPD_simd:
-  case OMPD_sections:
-  case OMPD_section:
-  case OMPD_single:
-  case OMPD_master:
-  case OMPD_critical:
-  case OMPD_taskyield:
-  case OMPD_barrier:
-  case OMPD_taskwait:
-  case OMPD_taskgroup:
-  case OMPD_atomic:
-  case OMPD_flush:
-  case OMPD_depobj:
-  case OMPD_scan:
-  case OMPD_teams:
-  case OMPD_target_data:
-  case OMPD_target_exit_data:
-  case OMPD_target_enter_data:
-  case OMPD_distribute:
-  case OMPD_distribute_simd:
-  case OMPD_distribute_parallel_for:
-  case OMPD_distribute_parallel_for_simd:
-  case OMPD_teams_distribute:
-  case OMPD_teams_distribute_simd:
-  case OMPD_teams_distribute_parallel_for:
-  case OMPD_teams_distribute_parallel_for_simd:
-  case OMPD_target_update:
-  case OMPD_declare_simd:
-  case OMPD_declare_variant:
-  case OMPD_begin_declare_variant:
-  case OMPD_end_declare_variant:
-  case OMPD_declare_target:
-  case OMPD_end_declare_target:
-  case OMPD_declare_reduction:
-  case OMPD_declare_mapper:
-  case OMPD_taskloop:
-  case OMPD_taskloop_simd:
-  case OMPD_master_taskloop:
-  case OMPD_master_taskloop_simd:
-  case OMPD_parallel_master_taskloop:
-  case OMPD_parallel_master_taskloop_simd:
-  case OMPD_requires:
-  case OMPD_unknown:
-  default:
-    break;
-  }
-  llvm_unreachable(
-      "Unknown programming model for OpenMP directive on NVPTX target.");
-}
-
-void CGOpenMPRuntimeNVPTX::emitNonSPMDKernel(const OMPExecutableDirective &D,
-                                             StringRef ParentName,
-                                             llvm::Function *&OutlinedFn,
-                                             llvm::Constant *&OutlinedFnID,
-                                             bool IsOffloadEntry,
-                                             const RegionCodeGenTy &CodeGen) {
-  ExecutionRuntimeModesRAII ModeRAII(CurrentExecutionMode);
-  EntryFunctionState EST;
-  WorkerFunctionState WST(CGM, D.getBeginLoc());
-  Work.clear();
-  WrapperFunctionsMap.clear();
-
-  // Emit target region as a standalone region.
-  class NVPTXPrePostActionTy : public PrePostActionTy {
-    CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
-    CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST;
-
-  public:
-    NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
-                         CGOpenMPRuntimeNVPTX::WorkerFunctionState &WST)
-        : EST(EST), WST(WST) {}
-    void Enter(CodeGenFunction &CGF) override {
-      auto &RT =
-          static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
-      RT.emitNonSPMDEntryHeader(CGF, EST, WST);
-      // Skip target region initialization.
-      RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
-    }
-    void Exit(CodeGenFunction &CGF) override {
-      auto &RT =
-          static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
-      RT.clearLocThreadIdInsertPt(CGF);
-      RT.emitNonSPMDEntryFooter(CGF, EST);
-    }
-  } Action(EST, WST);
-  CodeGen.setAction(Action);
-  IsInTTDRegion = true;
-  // Reserve place for the globalized memory.
-  GlobalizedRecords.emplace_back();
-  if (!KernelStaticGlobalized) {
-    KernelStaticGlobalized = new llvm::GlobalVariable(
-        CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
-        llvm::GlobalValue::InternalLinkage,
-        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
-        "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
-        llvm::GlobalValue::NotThreadLocal,
-        CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
-  }
-  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
-                                   IsOffloadEntry, CodeGen);
-  IsInTTDRegion = false;
-
-  // Now change the name of the worker function to correspond to this target
-  // region's entry function.
-  WST.WorkerFn->setName(Twine(OutlinedFn->getName(), "_worker"));
-
-  // Create the worker function
-  emitWorkerFunction(WST);
-}
-
-// Setup NVPTX threads for master-worker OpenMP scheme.
-void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryHeader(CodeGenFunction &CGF,
-                                                  EntryFunctionState &EST,
-                                                  WorkerFunctionState &WST) {
-  CGBuilderTy &Bld = CGF.Builder;
-
-  llvm::BasicBlock *WorkerBB = CGF.createBasicBlock(".worker");
-  llvm::BasicBlock *MasterCheckBB = CGF.createBasicBlock(".mastercheck");
-  llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
-  EST.ExitBB = CGF.createBasicBlock(".exit");
-
-  llvm::Value *IsWorker =
-      Bld.CreateICmpULT(getNVPTXThreadID(CGF), getThreadLimit(CGF));
-  Bld.CreateCondBr(IsWorker, WorkerBB, MasterCheckBB);
-
-  CGF.EmitBlock(WorkerBB);
-  emitCall(CGF, WST.Loc, WST.WorkerFn);
-  CGF.EmitBranch(EST.ExitBB);
-
-  CGF.EmitBlock(MasterCheckBB);
-  llvm::Value *IsMaster =
-      Bld.CreateICmpEQ(getNVPTXThreadID(CGF), getMasterThreadID(CGF));
-  Bld.CreateCondBr(IsMaster, MasterBB, EST.ExitBB);
-
-  CGF.EmitBlock(MasterBB);
-  IsInTargetMasterThreadRegion = true;
-  // SEQUENTIAL (MASTER) REGION START
-  // First action in sequential region:
-  // Initialize the state of the OpenMP runtime library on the GPU.
-  // TODO: Optimize runtime initialization and pass in correct value.
-  llvm::Value *Args[] = {getThreadLimit(CGF),
-                         Bld.getInt16(/*RequiresOMPRuntime=*/1)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_init), Args);
-
-  // For data sharing, we need to initialize the stack.
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(
-          OMPRTL_NVPTX__kmpc_data_sharing_init_stack));
-
-  emitGenericVarsProlog(CGF, WST.Loc);
-}
-
-void CGOpenMPRuntimeNVPTX::emitNonSPMDEntryFooter(CodeGenFunction &CGF,
-                                                  EntryFunctionState &EST) {
-  IsInTargetMasterThreadRegion = false;
-  if (!CGF.HaveInsertPoint())
-    return;
-
-  emitGenericVarsEpilog(CGF);
-
-  if (!EST.ExitBB)
-    EST.ExitBB = CGF.createBasicBlock(".exit");
-
-  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".termination.notifier");
-  CGF.EmitBranch(TerminateBB);
-
-  CGF.EmitBlock(TerminateBB);
-  // Signal termination condition.
-  // TODO: Optimize runtime initialization and pass in correct value.
-  llvm::Value *Args[] = {CGF.Builder.getInt16(/*IsOMPRuntimeInitialized=*/1)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_deinit), Args);
-  // Barrier to terminate worker threads.
-  syncCTAThreads(CGF);
-  // Master thread jumps to exit point.
-  CGF.EmitBranch(EST.ExitBB);
-
-  CGF.EmitBlock(EST.ExitBB);
-  EST.ExitBB = nullptr;
-}
-
-void CGOpenMPRuntimeNVPTX::emitSPMDKernel(const OMPExecutableDirective &D,
-                                          StringRef ParentName,
-                                          llvm::Function *&OutlinedFn,
-                                          llvm::Constant *&OutlinedFnID,
-                                          bool IsOffloadEntry,
-                                          const RegionCodeGenTy &CodeGen) {
-  ExecutionRuntimeModesRAII ModeRAII(
-      CurrentExecutionMode, RequiresFullRuntime,
-      CGM.getLangOpts().OpenMPCUDAForceFullRuntime ||
-          !supportsLightweightRuntime(CGM.getContext(), D));
-  EntryFunctionState EST;
-
-  // Emit target region as a standalone region.
-  class NVPTXPrePostActionTy : public PrePostActionTy {
-    CGOpenMPRuntimeNVPTX &RT;
-    CGOpenMPRuntimeNVPTX::EntryFunctionState &EST;
-    const OMPExecutableDirective &D;
-
-  public:
-    NVPTXPrePostActionTy(CGOpenMPRuntimeNVPTX &RT,
-                         CGOpenMPRuntimeNVPTX::EntryFunctionState &EST,
-                         const OMPExecutableDirective &D)
-        : RT(RT), EST(EST), D(D) {}
-    void Enter(CodeGenFunction &CGF) override {
-      RT.emitSPMDEntryHeader(CGF, EST, D);
-      // Skip target region initialization.
-      RT.setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
-    }
-    void Exit(CodeGenFunction &CGF) override {
-      RT.clearLocThreadIdInsertPt(CGF);
-      RT.emitSPMDEntryFooter(CGF, EST);
-    }
-  } Action(*this, EST, D);
-  CodeGen.setAction(Action);
-  IsInTTDRegion = true;
-  // Reserve place for the globalized memory.
-  GlobalizedRecords.emplace_back();
-  if (!KernelStaticGlobalized) {
-    KernelStaticGlobalized = new llvm::GlobalVariable(
-        CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false,
-        llvm::GlobalValue::InternalLinkage,
-        llvm::ConstantPointerNull::get(CGM.VoidPtrTy),
-        "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr,
-        llvm::GlobalValue::NotThreadLocal,
-        CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared));
-  }
-  emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID,
-                                   IsOffloadEntry, CodeGen);
-  IsInTTDRegion = false;
-}
-
-void CGOpenMPRuntimeNVPTX::emitSPMDEntryHeader(
-    CodeGenFunction &CGF, EntryFunctionState &EST,
-    const OMPExecutableDirective &D) {
-  CGBuilderTy &Bld = CGF.Builder;
-
-  // Setup BBs in entry function.
-  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute");
-  EST.ExitBB = CGF.createBasicBlock(".exit");
-
-  llvm::Value *Args[] = {getThreadLimit(CGF, /*IsInSPMDExecutionMode=*/true),
-                         /*RequiresOMPRuntime=*/
-                         Bld.getInt16(RequiresFullRuntime ? 1 : 0),
-                         /*RequiresDataSharing=*/Bld.getInt16(0)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_spmd_kernel_init), Args);
-
-  if (RequiresFullRuntime) {
-    // For data sharing, we need to initialize the stack.
-    CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-        OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd));
-  }
-
-  CGF.EmitBranch(ExecuteBB);
-
-  CGF.EmitBlock(ExecuteBB);
-
-  IsInTargetMasterThreadRegion = true;
-}
-
-void CGOpenMPRuntimeNVPTX::emitSPMDEntryFooter(CodeGenFunction &CGF,
-                                               EntryFunctionState &EST) {
-  IsInTargetMasterThreadRegion = false;
-  if (!CGF.HaveInsertPoint())
-    return;
-
-  if (!EST.ExitBB)
-    EST.ExitBB = CGF.createBasicBlock(".exit");
-
-  llvm::BasicBlock *OMPDeInitBB = CGF.createBasicBlock(".omp.deinit");
-  CGF.EmitBranch(OMPDeInitBB);
-
-  CGF.EmitBlock(OMPDeInitBB);
-  // DeInitialize the OMP state in the runtime; called by all active threads.
-  llvm::Value *Args[] = {/*RequiresOMPRuntime=*/
-                         CGF.Builder.getInt16(RequiresFullRuntime ? 1 : 0)};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(
-          OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2), Args);
-  CGF.EmitBranch(EST.ExitBB);
-
-  CGF.EmitBlock(EST.ExitBB);
-  EST.ExitBB = nullptr;
-}
-
-// Create a unique global variable to indicate the execution mode of this target
-// region. The execution mode is either 'generic', or 'spmd' depending on the
-// target directive. This variable is picked up by the offload library to setup
-// the device appropriately before kernel launch. If the execution mode is
-// 'generic', the runtime reserves one warp for the master, otherwise, all
-// warps participate in parallel work.
-static void setPropertyExecutionMode(CodeGenModule &CGM, StringRef Name,
-                                     bool Mode) {
-  auto *GVMode =
-      new llvm::GlobalVariable(CGM.getModule(), CGM.Int8Ty, /*isConstant=*/true,
-                               llvm::GlobalValue::WeakAnyLinkage,
-                               llvm::ConstantInt::get(CGM.Int8Ty, Mode ? 0 : 1),
-                               Twine(Name, "_exec_mode"));
-  CGM.addCompilerUsedGlobal(GVMode);
-}
-
-void CGOpenMPRuntimeNVPTX::emitWorkerFunction(WorkerFunctionState &WST) {
-  ASTContext &Ctx = CGM.getContext();
-
-  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
-  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, WST.WorkerFn, WST.CGFI, {},
-                    WST.Loc, WST.Loc);
-  emitWorkerLoop(CGF, WST);
-  CGF.FinishFunction();
-}
-
-void CGOpenMPRuntimeNVPTX::emitWorkerLoop(CodeGenFunction &CGF,
-                                          WorkerFunctionState &WST) {
-  //
-  // The workers enter this loop and wait for parallel work from the master.
-  // When the master encounters a parallel region it sets up the work + variable
-  // arguments, and wakes up the workers.  The workers first check to see if
-  // they are required for the parallel region, i.e., within the # of requested
-  // parallel threads.  The activated workers load the variable arguments and
-  // execute the parallel work.
-  //
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  llvm::BasicBlock *AwaitBB = CGF.createBasicBlock(".await.work");
-  llvm::BasicBlock *SelectWorkersBB = CGF.createBasicBlock(".select.workers");
-  llvm::BasicBlock *ExecuteBB = CGF.createBasicBlock(".execute.parallel");
-  llvm::BasicBlock *TerminateBB = CGF.createBasicBlock(".terminate.parallel");
-  llvm::BasicBlock *BarrierBB = CGF.createBasicBlock(".barrier.parallel");
-  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
-
-  CGF.EmitBranch(AwaitBB);
-
-  // Workers wait for work from master.
-  CGF.EmitBlock(AwaitBB);
-  // Wait for parallel work
-  syncCTAThreads(CGF);
-
-  Address WorkFn =
-      CGF.CreateDefaultAlignTempAlloca(CGF.Int8PtrTy, /*Name=*/"work_fn");
-  Address ExecStatus =
-      CGF.CreateDefaultAlignTempAlloca(CGF.Int8Ty, /*Name=*/"exec_status");
-  CGF.InitTempAlloca(ExecStatus, Bld.getInt8(/*C=*/0));
-  CGF.InitTempAlloca(WorkFn, llvm::Constant::getNullValue(CGF.Int8PtrTy));
-
-  // TODO: Optimize runtime initialization and pass in correct value.
-  llvm::Value *Args[] = {WorkFn.getPointer()};
-  llvm::Value *Ret = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_parallel), Args);
-  Bld.CreateStore(Bld.CreateZExt(Ret, CGF.Int8Ty), ExecStatus);
-
-  // On termination condition (workid == 0), exit loop.
-  llvm::Value *WorkID = Bld.CreateLoad(WorkFn);
-  llvm::Value *ShouldTerminate = Bld.CreateIsNull(WorkID, "should_terminate");
-  Bld.CreateCondBr(ShouldTerminate, ExitBB, SelectWorkersBB);
-
-  // Activate requested workers.
-  CGF.EmitBlock(SelectWorkersBB);
-  llvm::Value *IsActive =
-      Bld.CreateIsNotNull(Bld.CreateLoad(ExecStatus), "is_active");
-  Bld.CreateCondBr(IsActive, ExecuteBB, BarrierBB);
-
-  // Signal start of parallel region.
-  CGF.EmitBlock(ExecuteBB);
-  // Skip initialization.
-  setLocThreadIdInsertPt(CGF, /*AtCurrentPoint=*/true);
-
-  // Process work items: outlined parallel functions.
-  for (llvm::Function *W : Work) {
-    // Try to match this outlined function.
-    llvm::Value *ID = Bld.CreatePointerBitCastOrAddrSpaceCast(W, CGM.Int8PtrTy);
-
-    llvm::Value *WorkFnMatch =
-        Bld.CreateICmpEQ(Bld.CreateLoad(WorkFn), ID, "work_match");
-
-    llvm::BasicBlock *ExecuteFNBB = CGF.createBasicBlock(".execute.fn");
-    llvm::BasicBlock *CheckNextBB = CGF.createBasicBlock(".check.next");
-    Bld.CreateCondBr(WorkFnMatch, ExecuteFNBB, CheckNextBB);
-
-    // Execute this outlined function.
-    CGF.EmitBlock(ExecuteFNBB);
-
-    // Insert call to work function via shared wrapper. The shared
-    // wrapper takes two arguments:
-    //   - the parallelism level;
-    //   - the thread ID;
-    emitCall(CGF, WST.Loc, W,
-             {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
-
-    // Go to end of parallel region.
-    CGF.EmitBranch(TerminateBB);
-
-    CGF.EmitBlock(CheckNextBB);
-  }
-  // Default case: call to outlined function through pointer if the target
-  // region makes a declare target call that may contain an orphaned parallel
-  // directive.
-  auto *ParallelFnTy =
-      llvm::FunctionType::get(CGM.VoidTy, {CGM.Int16Ty, CGM.Int32Ty},
-                              /*isVarArg=*/false);
-  llvm::Value *WorkFnCast =
-      Bld.CreateBitCast(WorkID, ParallelFnTy->getPointerTo());
-  // Insert call to work function via shared wrapper. The shared
-  // wrapper takes two arguments:
-  //   - the parallelism level;
-  //   - the thread ID;
-  emitCall(CGF, WST.Loc, {ParallelFnTy, WorkFnCast},
-           {Bld.getInt16(/*ParallelLevel=*/0), getThreadID(CGF, WST.Loc)});
-  // Go to end of parallel region.
-  CGF.EmitBranch(TerminateBB);
-
-  // Signal end of parallel region.
-  CGF.EmitBlock(TerminateBB);
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_end_parallel),
-      llvm::None);
-  CGF.EmitBranch(BarrierBB);
-
-  // All active and inactive workers wait at a barrier after parallel region.
-  CGF.EmitBlock(BarrierBB);
-  // Barrier after parallel region.
-  syncCTAThreads(CGF);
-  CGF.EmitBranch(AwaitBB);
-
-  // Exit target region.
-  CGF.EmitBlock(ExitBB);
-  // Skip initialization.
-  clearLocThreadIdInsertPt(CGF);
-}
-
-/// Returns specified OpenMP runtime function for the current OpenMP
-/// implementation.  Specialized for the NVPTX device.
-/// \param Function OpenMP runtime function.
-/// \return Specified function.
-llvm::FunctionCallee
-CGOpenMPRuntimeNVPTX::createNVPTXRuntimeFunction(unsigned Function) {
-  llvm::FunctionCallee RTLFn = nullptr;
-  switch (static_cast<OpenMPRTLFunctionNVPTX>(Function)) {
-  case OMPRTL_NVPTX__kmpc_kernel_init: {
-    // Build void __kmpc_kernel_init(kmp_int32 thread_limit, int16_t
-    // RequiresOMPRuntime);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_init");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_deinit: {
-    // Build void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_deinit");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_spmd_kernel_init: {
-    // Build void __kmpc_spmd_kernel_init(kmp_int32 thread_limit,
-    // int16_t RequiresOMPRuntime, int16_t RequiresDataSharing);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_init");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_spmd_kernel_deinit_v2: {
-    // Build void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_spmd_kernel_deinit_v2");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_prepare_parallel: {
-    /// Build void __kmpc_kernel_prepare_parallel(
-    /// void *outlined_function);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_prepare_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_parallel: {
-    /// Build bool __kmpc_kernel_parallel(void **outlined_function);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy};
-    llvm::Type *RetTy = CGM.getTypes().ConvertType(CGM.getContext().BoolTy);
-    auto *FnTy =
-        llvm::FunctionType::get(RetTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_kernel_end_parallel: {
-    /// Build void __kmpc_kernel_end_parallel();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_kernel_end_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_serialized_parallel: {
-    // Build void __kmpc_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_serialized_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_end_serialized_parallel: {
-    // Build void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32
-    // global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_serialized_parallel");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_shuffle_int32: {
-    // Build int32_t __kmpc_shuffle_int32(int32_t element,
-    // int16_t lane_offset, int16_t warp_size);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty, CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int32");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_shuffle_int64: {
-    // Build int64_t __kmpc_shuffle_int64(int64_t element,
-    // int16_t lane_offset, int16_t warp_size);
-    llvm::Type *TypeParams[] = {CGM.Int64Ty, CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int64Ty, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_shuffle_int64");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2: {
-    // Build int32_t kmpc_nvptx_parallel_reduce_nowait_v2(ident_t *loc,
-    // kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void*
-    // reduce_data, void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t
-    // lane_id, int16_t lane_offset, int16_t Algorithm Version), void
-    // (*kmp_InterWarpCopyFctPtr)(void* src, int warp_num));
-    llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
-                                             CGM.Int16Ty, CGM.Int16Ty};
-    auto *ShuffleReduceFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
-    auto *InterWarpCopyFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
-                                CGM.Int32Ty,
-                                CGM.Int32Ty,
-                                CGM.SizeTy,
-                                CGM.VoidPtrTy,
-                                ShuffleReduceFnTy->getPointerTo(),
-                                InterWarpCopyFnTy->getPointerTo()};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_nvptx_parallel_reduce_nowait_v2");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_end_reduce_nowait: {
-    // Build __kmpc_end_reduce_nowait(kmp_int32 global_tid);
-    llvm::Type *TypeParams[] = {CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_nvptx_end_reduce_nowait");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2: {
-    // Build int32_t __kmpc_nvptx_teams_reduce_nowait_v2(ident_t *loc, kmp_int32
-    // global_tid, void *global_buffer, int32_t num_of_records, void*
-    // reduce_data,
-    // void (*kmp_ShuffleReductFctPtr)(void *rhsData, int16_t lane_id, int16_t
-    // lane_offset, int16_t shortCircuit),
-    // void (*kmp_InterWarpCopyFctPtr)(void* src, int32_t warp_num), void
-    // (*kmp_ListToGlobalCpyFctPtr)(void *buffer, int idx, void *reduce_data),
-    // void (*kmp_GlobalToListCpyFctPtr)(void *buffer, int idx,
-    // void *reduce_data), void (*kmp_GlobalToListCpyPtrsFctPtr)(void *buffer,
-    // int idx, void *reduce_data), void (*kmp_GlobalToListRedFctPtr)(void
-    // *buffer, int idx, void *reduce_data));
-    llvm::Type *ShuffleReduceTypeParams[] = {CGM.VoidPtrTy, CGM.Int16Ty,
-                                             CGM.Int16Ty, CGM.Int16Ty};
-    auto *ShuffleReduceFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, ShuffleReduceTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *InterWarpCopyTypeParams[] = {CGM.VoidPtrTy, CGM.Int32Ty};
-    auto *InterWarpCopyFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, InterWarpCopyTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *GlobalListTypeParams[] = {CGM.VoidPtrTy, CGM.IntTy,
-                                          CGM.VoidPtrTy};
-    auto *GlobalListFnTy =
-        llvm::FunctionType::get(CGM.VoidTy, GlobalListTypeParams,
-                                /*isVarArg=*/false);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(),
-                                CGM.Int32Ty,
-                                CGM.VoidPtrTy,
-                                CGM.Int32Ty,
-                                CGM.VoidPtrTy,
-                                ShuffleReduceFnTy->getPointerTo(),
-                                InterWarpCopyFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo(),
-                                GlobalListFnTy->getPointerTo()};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_nvptx_teams_reduce_nowait_v2");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_init_stack: {
-    /// Build void __kmpc_data_sharing_init_stack();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_init_stack_spmd: {
-    /// Build void __kmpc_data_sharing_init_stack_spmd();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn =
-        CGM.CreateRuntimeFunction(FnTy, "__kmpc_data_sharing_init_stack_spmd");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack: {
-    // Build void *__kmpc_data_sharing_coalesced_push_stack(size_t size,
-    // int16_t UseSharedMemory);
-    llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_data_sharing_coalesced_push_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_push_stack: {
-    // Build void *__kmpc_data_sharing_push_stack(size_t size, int16_t
-    // UseSharedMemory);
-    llvm::Type *TypeParams[] = {CGM.SizeTy, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidPtrTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(
-        FnTy, /*Name=*/"__kmpc_data_sharing_push_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_data_sharing_pop_stack: {
-    // Build void __kmpc_data_sharing_pop_stack(void *a);
-    llvm::Type *TypeParams[] = {CGM.VoidPtrTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy,
-                                      /*Name=*/"__kmpc_data_sharing_pop_stack");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_begin_sharing_variables: {
-    /// Build void __kmpc_begin_sharing_variables(void ***args,
-    /// size_t n_args);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo(), CGM.SizeTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_begin_sharing_variables");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_end_sharing_variables: {
-    /// Build void __kmpc_end_sharing_variables();
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, llvm::None, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_end_sharing_variables");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_get_shared_variables: {
-    /// Build void __kmpc_get_shared_variables(void ***GlobalArgs);
-    llvm::Type *TypeParams[] = {CGM.Int8PtrPtrTy->getPointerTo()};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_shared_variables");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_parallel_level: {
-    // Build uint16_t __kmpc_parallel_level(ident_t *loc, kmp_int32 global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int16Ty, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_parallel_level");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_is_spmd_exec_mode: {
-    // Build int8_t __kmpc_is_spmd_exec_mode();
-    auto *FnTy = llvm::FunctionType::get(CGM.Int8Ty, /*isVarArg=*/false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_is_spmd_exec_mode");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_get_team_static_memory: {
-    // Build void __kmpc_get_team_static_memory(int16_t isSPMDExecutionMode,
-    // const void *buf, size_t size, int16_t is_shared, const void **res);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.VoidPtrTy, CGM.SizeTy,
-                                CGM.Int16Ty, CGM.VoidPtrPtrTy};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateRuntimeFunction(FnTy, "__kmpc_get_team_static_memory");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_restore_team_static_memory: {
-    // Build void __kmpc_restore_team_static_memory(int16_t isSPMDExecutionMode,
-    // int16_t is_shared);
-    llvm::Type *TypeParams[] = {CGM.Int16Ty, CGM.Int16Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg=*/false);
-    RTLFn =
-        CGM.CreateRuntimeFunction(FnTy, "__kmpc_restore_team_static_memory");
-    break;
-  }
-  case OMPRTL__kmpc_barrier: {
-    // Build void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn =
-        CGM.CreateConvergentRuntimeFunction(FnTy, /*Name*/ "__kmpc_barrier");
-    break;
-  }
-  case OMPRTL__kmpc_barrier_simple_spmd: {
-    // Build void __kmpc_barrier_simple_spmd(ident_t *loc, kmp_int32
-    // global_tid);
-    llvm::Type *TypeParams[] = {getIdentTyPointerTy(), CGM.Int32Ty};
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, TypeParams, /*isVarArg*/ false);
-    RTLFn = CGM.CreateConvergentRuntimeFunction(
-        FnTy, /*Name*/ "__kmpc_barrier_simple_spmd");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_warp_active_thread_mask: {
-    // Build int32_t __kmpc_warp_active_thread_mask(void);
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.Int32Ty, llvm::None, /*isVarArg=*/false);
-    RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_warp_active_thread_mask");
-    break;
-  }
-  case OMPRTL_NVPTX__kmpc_syncwarp: {
-    // Build void __kmpc_syncwarp(kmp_int32 Mask);
-    auto *FnTy =
-        llvm::FunctionType::get(CGM.VoidTy, CGM.Int32Ty, /*isVarArg=*/false);
-    RTLFn = CGM.CreateConvergentRuntimeFunction(FnTy, "__kmpc_syncwarp");
-    break;
-  }
-  }
-  return RTLFn;
-}
-
-void CGOpenMPRuntimeNVPTX::createOffloadEntry(llvm::Constant *ID,
-                                              llvm::Constant *Addr,
-                                              uint64_t Size, int32_t,
-                                              llvm::GlobalValue::LinkageTypes) {
-  // TODO: Add support for global variables on the device after declare target
-  // support.
-  if (!isa<llvm::Function>(Addr))
-    return;
-  llvm::Module &M = CGM.getModule();
-  llvm::LLVMContext &Ctx = CGM.getLLVMContext();
-
-  // Get "nvvm.annotations" metadata node
-  llvm::NamedMDNode *MD = M.getOrInsertNamedMetadata("nvvm.annotations");
-
-  llvm::Metadata *MDVals[] = {
-      llvm::ConstantAsMetadata::get(Addr), llvm::MDString::get(Ctx, "kernel"),
-      llvm::ConstantAsMetadata::get(
-          llvm::ConstantInt::get(llvm::Type::getInt32Ty(Ctx), 1))};
-  // Append metadata to nvvm.annotations
-  MD->addOperand(llvm::MDNode::get(Ctx, MDVals));
-}
-
-void CGOpenMPRuntimeNVPTX::emitTargetOutlinedFunction(
-    const OMPExecutableDirective &D, StringRef ParentName,
-    llvm::Function *&OutlinedFn, llvm::Constant *&OutlinedFnID,
-    bool IsOffloadEntry, const RegionCodeGenTy &CodeGen) {
-  if (!IsOffloadEntry) // Nothing to do.
-    return;
-
-  assert(!ParentName.empty() && "Invalid target region parent name!");
-
-  bool Mode = supportsSPMDExecutionMode(CGM.getContext(), D);
-  if (Mode)
-    emitSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
-                   CodeGen);
-  else
-    emitNonSPMDKernel(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry,
-                      CodeGen);
-
-  setPropertyExecutionMode(CGM, OutlinedFn->getName(), Mode);
-}
-
-namespace {
-LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
-/// Enum for accesseing the reserved_2 field of the ident_t struct.
-enum ModeFlagsTy : unsigned {
-  /// Bit set to 1 when in SPMD mode.
-  KMP_IDENT_SPMD_MODE = 0x01,
-  /// Bit set to 1 when a simplified runtime is used.
-  KMP_IDENT_SIMPLE_RT_MODE = 0x02,
-  LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/KMP_IDENT_SIMPLE_RT_MODE)
-};
-
-/// Special mode Undefined. Is the combination of Non-SPMD mode + SimpleRuntime.
-static const ModeFlagsTy UndefinedMode =
-    (~KMP_IDENT_SPMD_MODE) & KMP_IDENT_SIMPLE_RT_MODE;
-} // anonymous namespace
-
-unsigned CGOpenMPRuntimeNVPTX::getDefaultLocationReserved2Flags() const {
-  switch (getExecutionMode()) {
-  case EM_SPMD:
-    if (requiresFullRuntime())
-      return KMP_IDENT_SPMD_MODE & (~KMP_IDENT_SIMPLE_RT_MODE);
-    return KMP_IDENT_SPMD_MODE | KMP_IDENT_SIMPLE_RT_MODE;
-  case EM_NonSPMD:
-    assert(requiresFullRuntime() && "Expected full runtime.");
-    return (~KMP_IDENT_SPMD_MODE) & (~KMP_IDENT_SIMPLE_RT_MODE);
-  case EM_Unknown:
-    return UndefinedMode;
-  }
-  llvm_unreachable("Unknown flags are requested.");
-}
-
-CGOpenMPRuntimeNVPTX::CGOpenMPRuntimeNVPTX(CodeGenModule &CGM)
-    : CGOpenMPRuntime(CGM, "_", "$") {
-  if (!CGM.getLangOpts().OpenMPIsDevice)
-    llvm_unreachable("OpenMP NVPTX can only handle device code.");
-}
-
-void CGOpenMPRuntimeNVPTX::emitProcBindClause(CodeGenFunction &CGF,
-                                              ProcBindKind ProcBind,
-                                              SourceLocation Loc) {
-  // Do nothing in case of SPMD mode and L0 parallel.
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-    return;
-
-  CGOpenMPRuntime::emitProcBindClause(CGF, ProcBind, Loc);
-}
-
-void CGOpenMPRuntimeNVPTX::emitNumThreadsClause(CodeGenFunction &CGF,
-                                                llvm::Value *NumThreads,
-                                                SourceLocation Loc) {
-  // Do nothing in case of SPMD mode and L0 parallel.
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-    return;
-
-  CGOpenMPRuntime::emitNumThreadsClause(CGF, NumThreads, Loc);
-}
-
-void CGOpenMPRuntimeNVPTX::emitNumTeamsClause(CodeGenFunction &CGF,
-                                              const Expr *NumTeams,
-                                              const Expr *ThreadLimit,
-                                              SourceLocation Loc) {}
-
-llvm::Function *CGOpenMPRuntimeNVPTX::emitParallelOutlinedFunction(
-    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
-    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
-  // Emit target region as a standalone region.
-  class NVPTXPrePostActionTy : public PrePostActionTy {
-    bool &IsInParallelRegion;
-    bool PrevIsInParallelRegion;
-
-  public:
-    NVPTXPrePostActionTy(bool &IsInParallelRegion)
-        : IsInParallelRegion(IsInParallelRegion) {}
-    void Enter(CodeGenFunction &CGF) override {
-      PrevIsInParallelRegion = IsInParallelRegion;
-      IsInParallelRegion = true;
-    }
-    void Exit(CodeGenFunction &CGF) override {
-      IsInParallelRegion = PrevIsInParallelRegion;
-    }
-  } Action(IsInParallelRegion);
-  CodeGen.setAction(Action);
-  bool PrevIsInTTDRegion = IsInTTDRegion;
-  IsInTTDRegion = false;
-  bool PrevIsInTargetMasterThreadRegion = IsInTargetMasterThreadRegion;
-  IsInTargetMasterThreadRegion = false;
-  auto *OutlinedFun =
-      cast<llvm::Function>(CGOpenMPRuntime::emitParallelOutlinedFunction(
-          D, ThreadIDVar, InnermostKind, CodeGen));
-  if (CGM.getLangOpts().Optimize) {
-    OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
-    OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
-    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
-  }
-  IsInTargetMasterThreadRegion = PrevIsInTargetMasterThreadRegion;
-  IsInTTDRegion = PrevIsInTTDRegion;
-  if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD &&
-      !IsInParallelRegion) {
-    llvm::Function *WrapperFun =
-        createParallelDataSharingWrapper(OutlinedFun, D);
-    WrapperFunctionsMap[OutlinedFun] = WrapperFun;
-  }
-
-  return OutlinedFun;
-}
-
-/// Get list of lastprivate variables from the teams distribute ... or
-/// teams {distribute ...} directives.
-static void
-getDistributeLastprivateVars(ASTContext &Ctx, const OMPExecutableDirective &D,
-                             llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
-  assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
-         "expected teams directive.");
-  const OMPExecutableDirective *Dir = &D;
-  if (!isOpenMPDistributeDirective(D.getDirectiveKind())) {
-    if (const Stmt *S = CGOpenMPRuntime::getSingleCompoundChild(
-            Ctx,
-            D.getInnermostCapturedStmt()->getCapturedStmt()->IgnoreContainers(
-                /*IgnoreCaptured=*/true))) {
-      Dir = dyn_cast_or_null<OMPExecutableDirective>(S);
-      if (Dir && !isOpenMPDistributeDirective(Dir->getDirectiveKind()))
-        Dir = nullptr;
-    }
-  }
-  if (!Dir)
-    return;
-  for (const auto *C : Dir->getClausesOfKind<OMPLastprivateClause>()) {
-    for (const Expr *E : C->getVarRefs())
-      Vars.push_back(getPrivateItem(E));
-  }
-}
-
-/// Get list of reduction variables from the teams ... directives.
-static void
-getTeamsReductionVars(ASTContext &Ctx, const OMPExecutableDirective &D,
-                      llvm::SmallVectorImpl<const ValueDecl *> &Vars) {
-  assert(isOpenMPTeamsDirective(D.getDirectiveKind()) &&
-         "expected teams directive.");
-  for (const auto *C : D.getClausesOfKind<OMPReductionClause>()) {
-    for (const Expr *E : C->privates())
-      Vars.push_back(getPrivateItem(E));
-  }
-}
-
-llvm::Function *CGOpenMPRuntimeNVPTX::emitTeamsOutlinedFunction(
-    const OMPExecutableDirective &D, const VarDecl *ThreadIDVar,
-    OpenMPDirectiveKind InnermostKind, const RegionCodeGenTy &CodeGen) {
-  SourceLocation Loc = D.getBeginLoc();
-
-  const RecordDecl *GlobalizedRD = nullptr;
-  llvm::SmallVector<const ValueDecl *, 4> LastPrivatesReductions;
-  llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> MappedDeclsFields;
-  // Globalize team reductions variable unconditionally in all modes.
-  if (getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
-    getTeamsReductionVars(CGM.getContext(), D, LastPrivatesReductions);
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
-    getDistributeLastprivateVars(CGM.getContext(), D, LastPrivatesReductions);
-    if (!LastPrivatesReductions.empty()) {
-      GlobalizedRD = ::buildRecordForGlobalizedVars(
-          CGM.getContext(), llvm::None, LastPrivatesReductions,
-          MappedDeclsFields, WarpSize);
-    }
-  } else if (!LastPrivatesReductions.empty()) {
-    assert(!TeamAndReductions.first &&
-           "Previous team declaration is not expected.");
-    TeamAndReductions.first = D.getCapturedStmt(OMPD_teams)->getCapturedDecl();
-    std::swap(TeamAndReductions.second, LastPrivatesReductions);
-  }
-
-  // Emit target region as a standalone region.
-  class NVPTXPrePostActionTy : public PrePostActionTy {
-    SourceLocation &Loc;
-    const RecordDecl *GlobalizedRD;
-    llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
-        &MappedDeclsFields;
-
-  public:
-    NVPTXPrePostActionTy(
-        SourceLocation &Loc, const RecordDecl *GlobalizedRD,
-        llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
-            &MappedDeclsFields)
-        : Loc(Loc), GlobalizedRD(GlobalizedRD),
-          MappedDeclsFields(MappedDeclsFields) {}
-    void Enter(CodeGenFunction &CGF) override {
-      auto &Rt =
-          static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime());
-      if (GlobalizedRD) {
-        auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
-        I->getSecond().GlobalRecord = GlobalizedRD;
-        I->getSecond().MappedParams =
-            std::make_unique<CodeGenFunction::OMPMapVars>();
-        DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
-        for (const auto &Pair : MappedDeclsFields) {
-          assert(Pair.getFirst()->isCanonicalDecl() &&
-                 "Expected canonical declaration");
-          Data.insert(std::make_pair(Pair.getFirst(),
-                                     MappedVarData(Pair.getSecond(),
-                                                   /*IsOnePerTeam=*/true)));
-        }
-      }
-      Rt.emitGenericVarsProlog(CGF, Loc);
-    }
-    void Exit(CodeGenFunction &CGF) override {
-      static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
-          .emitGenericVarsEpilog(CGF);
-    }
-  } Action(Loc, GlobalizedRD, MappedDeclsFields);
-  CodeGen.setAction(Action);
-  llvm::Function *OutlinedFun = CGOpenMPRuntime::emitTeamsOutlinedFunction(
-      D, ThreadIDVar, InnermostKind, CodeGen);
-  if (CGM.getLangOpts().Optimize) {
-    OutlinedFun->removeFnAttr(llvm::Attribute::NoInline);
-    OutlinedFun->removeFnAttr(llvm::Attribute::OptimizeNone);
-    OutlinedFun->addFnAttr(llvm::Attribute::AlwaysInline);
-  }
-
-  return OutlinedFun;
-}
-
-void CGOpenMPRuntimeNVPTX::emitGenericVarsProlog(CodeGenFunction &CGF,
-                                                 SourceLocation Loc,
-                                                 bool WithSPMDCheck) {
-  if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
-      getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
-    return;
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
-  if (I == FunctionGlobalizedDecls.end())
-    return;
-  if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) {
-    QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord);
-    QualType SecGlobalRecTy;
-
-    // Recover pointer to this function's global record. The runtime will
-    // handle the specifics of the allocation of the memory.
-    // Use actual memory size of the record including the padding
-    // for alignment purposes.
-    unsigned Alignment =
-        CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
-    unsigned GlobalRecordSize =
-        CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity();
-    GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
-
-    llvm::PointerType *GlobalRecPtrTy =
-        CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo();
-    llvm::Value *GlobalRecCastAddr;
-    llvm::Value *IsTTD = nullptr;
-    if (!IsInTTDRegion &&
-        (WithSPMDCheck ||
-         getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
-      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
-      llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd");
-      llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
-      if (I->getSecond().SecondaryGlobalRecord.hasValue()) {
-        llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
-        llvm::Value *ThreadID = getThreadID(CGF, Loc);
-        llvm::Value *PL = CGF.EmitRuntimeCall(
-            createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
-            {RTLoc, ThreadID});
-        IsTTD = Bld.CreateIsNull(PL);
-      }
-      llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
-      Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB);
-      // There is no need to emit line number for unconditional branch.
-      (void)ApplyDebugLocation::CreateEmpty(CGF);
-      CGF.EmitBlock(SPMDBB);
-      Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy),
-                               CharUnits::fromQuantity(Alignment));
-      CGF.EmitBranch(ExitBB);
-      // There is no need to emit line number for unconditional branch.
-      (void)ApplyDebugLocation::CreateEmpty(CGF);
-      CGF.EmitBlock(NonSPMDBB);
-      llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize);
-      if (const RecordDecl *SecGlobalizedVarsRecord =
-              I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) {
-        SecGlobalRecTy =
-            CGM.getContext().getRecordType(SecGlobalizedVarsRecord);
-
-        // Recover pointer to this function's global record. The runtime will
-        // handle the specifics of the allocation of the memory.
-        // Use actual memory size of the record including the padding
-        // for alignment purposes.
-        unsigned Alignment =
-            CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity();
-        unsigned GlobalRecordSize =
-            CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity();
-        GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment);
-        Size = Bld.CreateSelect(
-            IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size);
-      }
-      // TODO: allow the usage of shared memory to be controlled by
-      // the user, for now, default to global.
-      llvm::Value *GlobalRecordSizeArg[] = {
-          Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
-      llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(
-              OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
-          GlobalRecordSizeArg);
-      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-          GlobalRecValue, GlobalRecPtrTy);
-      CGF.EmitBlock(ExitBB);
-      auto *Phi = Bld.CreatePHI(GlobalRecPtrTy,
-                                /*NumReservedValues=*/2, "_select_stack");
-      Phi->addIncoming(RecPtr.getPointer(), SPMDBB);
-      Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB);
-      GlobalRecCastAddr = Phi;
-      I->getSecond().GlobalRecordAddr = Phi;
-      I->getSecond().IsInSPMDModeFlag = IsSPMD;
-    } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
-      assert(GlobalizedRecords.back().Records.size() < 2 &&
-             "Expected less than 2 globalized records: one for target and one "
-             "for teams.");
-      unsigned Offset = 0;
-      for (const RecordDecl *RD : GlobalizedRecords.back().Records) {
-        QualType RDTy = CGM.getContext().getRecordType(RD);
-        unsigned Alignment =
-            CGM.getContext().getTypeAlignInChars(RDTy).getQuantity();
-        unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity();
-        Offset =
-            llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment);
-      }
-      unsigned Alignment =
-          CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity();
-      Offset = llvm::alignTo(Offset, Alignment);
-      GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord);
-      ++GlobalizedRecords.back().RegionCounter;
-      if (GlobalizedRecords.back().Records.size() == 1) {
-        assert(KernelStaticGlobalized &&
-               "Kernel static pointer must be initialized already.");
-        auto *UseSharedMemory = new llvm::GlobalVariable(
-            CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true,
-            llvm::GlobalValue::InternalLinkage, nullptr,
-            "_openmp_static_kernel$is_shared");
-        UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-        QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
-            /*DestWidth=*/16, /*Signed=*/0);
-        llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
-            Address(UseSharedMemory,
-                    CGM.getContext().getTypeAlignInChars(Int16Ty)),
-            /*Volatile=*/false, Int16Ty, Loc);
-        auto *StaticGlobalized = new llvm::GlobalVariable(
-            CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false,
-            llvm::GlobalValue::CommonLinkage, nullptr);
-        auto *RecSize = new llvm::GlobalVariable(
-            CGM.getModule(), CGM.SizeTy, /*isConstant=*/true,
-            llvm::GlobalValue::InternalLinkage, nullptr,
-            "_openmp_static_kernel$size");
-        RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global);
-        llvm::Value *Ld = CGF.EmitLoadOfScalar(
-            Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false,
-            CGM.getContext().getSizeType(), Loc);
-        llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-            KernelStaticGlobalized, CGM.VoidPtrPtrTy);
-        llvm::Value *GlobalRecordSizeArg[] = {
-            llvm::ConstantInt::get(
-                CGM.Int16Ty,
-                getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0),
-            StaticGlobalized, Ld, IsInSharedMemory, ResAddr};
-        CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-                                OMPRTL_NVPTX__kmpc_get_team_static_memory),
-                            GlobalRecordSizeArg);
-        GlobalizedRecords.back().Buffer = StaticGlobalized;
-        GlobalizedRecords.back().RecSize = RecSize;
-        GlobalizedRecords.back().UseSharedMemory = UseSharedMemory;
-        GlobalizedRecords.back().Loc = Loc;
-      }
-      assert(KernelStaticGlobalized && "Global address must be set already.");
-      Address FrameAddr = CGF.EmitLoadOfPointer(
-          Address(KernelStaticGlobalized, CGM.getPointerAlign()),
-          CGM.getContext()
-              .getPointerType(CGM.getContext().VoidPtrTy)
-              .castAs<PointerType>());
-      llvm::Value *GlobalRecValue =
-          Bld.CreateConstInBoundsGEP(FrameAddr, Offset).getPointer();
-      I->getSecond().GlobalRecordAddr = GlobalRecValue;
-      I->getSecond().IsInSPMDModeFlag = nullptr;
-      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-          GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo());
-    } else {
-      // TODO: allow the usage of shared memory to be controlled by
-      // the user, for now, default to global.
-      bool UseSharedMemory =
-          IsInTTDRegion && GlobalRecordSize <= SharedMemorySize;
-      llvm::Value *GlobalRecordSizeArg[] = {
-          llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize),
-          CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)};
-      llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(
-              IsInTTDRegion
-                  ? OMPRTL_NVPTX__kmpc_data_sharing_push_stack
-                  : OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
-          GlobalRecordSizeArg);
-      GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-          GlobalRecValue, GlobalRecPtrTy);
-      I->getSecond().GlobalRecordAddr = GlobalRecValue;
-      I->getSecond().IsInSPMDModeFlag = nullptr;
-    }
-    LValue Base =
-        CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy);
-
-    // Emit the "global alloca" which is a GEP from the global declaration
-    // record using the pointer returned by the runtime.
-    LValue SecBase;
-    decltype(I->getSecond().LocalVarData)::const_iterator SecIt;
-    if (IsTTD) {
-      SecIt = I->getSecond().SecondaryLocalVarData->begin();
-      llvm::PointerType *SecGlobalRecPtrTy =
-          CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo();
-      SecBase = CGF.MakeNaturalAlignPointeeAddrLValue(
-          Bld.CreatePointerBitCastOrAddrSpaceCast(
-              I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy),
-          SecGlobalRecTy);
-    }
-    for (auto &Rec : I->getSecond().LocalVarData) {
-      bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first);
-      llvm::Value *ParValue;
-      if (EscapedParam) {
-        const auto *VD = cast<VarDecl>(Rec.first);
-        LValue ParLVal =
-            CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType());
-        ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc);
-      }
-      LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD);
-      // Emit VarAddr basing on lane-id if required.
-      QualType VarTy;
-      if (Rec.second.IsOnePerTeam) {
-        VarTy = Rec.second.FD->getType();
-      } else {
-        llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP(
-            VarAddr.getAddress(CGF).getPointer(),
-            {Bld.getInt32(0), getNVPTXLaneID(CGF)});
-        VarTy =
-            Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType();
-        VarAddr = CGF.MakeAddrLValue(
-            Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy,
-            AlignmentSource::Decl);
-      }
-      Rec.second.PrivateAddr = VarAddr.getAddress(CGF);
-      if (!IsInTTDRegion &&
-          (WithSPMDCheck ||
-           getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
-        assert(I->getSecond().IsInSPMDModeFlag &&
-               "Expected unknown execution mode or required SPMD check.");
-        if (IsTTD) {
-          assert(SecIt->second.IsOnePerTeam &&
-                 "Secondary glob data must be one per team.");
-          LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD);
-          VarAddr.setAddress(
-              Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(CGF),
-                                       VarAddr.getPointer(CGF)),
-                      VarAddr.getAlignment()));
-          Rec.second.PrivateAddr = VarAddr.getAddress(CGF);
-        }
-        Address GlobalPtr = Rec.second.PrivateAddr;
-        Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName());
-        Rec.second.PrivateAddr = Address(
-            Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag,
-                             LocalAddr.getPointer(), GlobalPtr.getPointer()),
-            LocalAddr.getAlignment());
-      }
-      if (EscapedParam) {
-        const auto *VD = cast<VarDecl>(Rec.first);
-        CGF.EmitStoreOfScalar(ParValue, VarAddr);
-        I->getSecond().MappedParams->setVarAddr(CGF, VD,
-                                                VarAddr.getAddress(CGF));
-      }
-      if (IsTTD)
-        ++SecIt;
-    }
-  }
-  for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) {
-    // Recover pointer to this function's global record. The runtime will
-    // handle the specifics of the allocation of the memory.
-    // Use actual memory size of the record including the padding
-    // for alignment purposes.
-    CGBuilderTy &Bld = CGF.Builder;
-    llvm::Value *Size = CGF.getTypeSize(VD->getType());
-    CharUnits Align = CGM.getContext().getDeclAlign(VD);
-    Size = Bld.CreateNUWAdd(
-        Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1));
-    llvm::Value *AlignVal =
-        llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity());
-    Size = Bld.CreateUDiv(Size, AlignVal);
-    Size = Bld.CreateNUWMul(Size, AlignVal);
-    // TODO: allow the usage of shared memory to be controlled by
-    // the user, for now, default to global.
-    llvm::Value *GlobalRecordSizeArg[] = {
-        Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)};
-    llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(
-            OMPRTL_NVPTX__kmpc_data_sharing_coalesced_push_stack),
-        GlobalRecordSizeArg);
-    llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-        GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo());
-    LValue Base = CGF.MakeAddrLValue(GlobalRecCastAddr, VD->getType(),
-                                     CGM.getContext().getDeclAlign(VD),
-                                     AlignmentSource::Decl);
-    I->getSecond().MappedParams->setVarAddr(CGF, cast<VarDecl>(VD),
-                                            Base.getAddress(CGF));
-    I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue);
-  }
-  I->getSecond().MappedParams->apply(CGF);
-}
-
-void CGOpenMPRuntimeNVPTX::emitGenericVarsEpilog(CodeGenFunction &CGF,
-                                                 bool WithSPMDCheck) {
-  if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic &&
-      getExecutionMode() != CGOpenMPRuntimeNVPTX::EM_SPMD)
-    return;
-
-  const auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
-  if (I != FunctionGlobalizedDecls.end()) {
-    I->getSecond().MappedParams->restore(CGF);
-    if (!CGF.HaveInsertPoint())
-      return;
-    for (llvm::Value *Addr :
-         llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) {
-      CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
-          Addr);
-    }
-    if (I->getSecond().GlobalRecordAddr) {
-      if (!IsInTTDRegion &&
-          (WithSPMDCheck ||
-           getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_Unknown)) {
-        CGBuilderTy &Bld = CGF.Builder;
-        llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
-        llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd");
-        Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB);
-        // There is no need to emit line number for unconditional branch.
-        (void)ApplyDebugLocation::CreateEmpty(CGF);
-        CGF.EmitBlock(NonSPMDBB);
-        CGF.EmitRuntimeCall(
-            createNVPTXRuntimeFunction(
-                OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
-            CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr));
-        CGF.EmitBlock(ExitBB);
-      } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) {
-        assert(GlobalizedRecords.back().RegionCounter > 0 &&
-               "region counter must be > 0.");
-        --GlobalizedRecords.back().RegionCounter;
-        // Emit the restore function only in the target region.
-        if (GlobalizedRecords.back().RegionCounter == 0) {
-          QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth(
-              /*DestWidth=*/16, /*Signed=*/0);
-          llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar(
-              Address(GlobalizedRecords.back().UseSharedMemory,
-                      CGM.getContext().getTypeAlignInChars(Int16Ty)),
-              /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc);
-          llvm::Value *Args[] = {
-              llvm::ConstantInt::get(
-                  CGM.Int16Ty,
-                  getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD ? 1 : 0),
-              IsInSharedMemory};
-          CGF.EmitRuntimeCall(
-              createNVPTXRuntimeFunction(
-                  OMPRTL_NVPTX__kmpc_restore_team_static_memory),
-              Args);
-        }
-      } else {
-        CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-                                OMPRTL_NVPTX__kmpc_data_sharing_pop_stack),
-                            I->getSecond().GlobalRecordAddr);
-      }
-    }
-  }
-}
-
-void CGOpenMPRuntimeNVPTX::emitTeamsCall(CodeGenFunction &CGF,
-                                         const OMPExecutableDirective &D,
-                                         SourceLocation Loc,
-                                         llvm::Function *OutlinedFn,
-                                         ArrayRef<llvm::Value *> CapturedVars) {
-  if (!CGF.HaveInsertPoint())
-    return;
-
-  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                                      /*Name=*/".zero.addr");
-  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
-  OutlinedFnArgs.push_back(emitThreadIDAddress(CGF, Loc).getPointer());
-  OutlinedFnArgs.push_back(ZeroAddr.getPointer());
-  OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
-  emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
-}
-
-void CGOpenMPRuntimeNVPTX::emitParallelCall(
-    CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
-    ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
-  if (!CGF.HaveInsertPoint())
-    return;
-
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-    emitSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
-  else
-    emitNonSPMDParallelCall(CGF, Loc, OutlinedFn, CapturedVars, IfCond);
-}
-
-void CGOpenMPRuntimeNVPTX::emitNonSPMDParallelCall(
-    CodeGenFunction &CGF, SourceLocation Loc, llvm::Value *OutlinedFn,
-    ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
-  llvm::Function *Fn = cast<llvm::Function>(OutlinedFn);
-
-  // Force inline this outlined function at its call site.
-  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
-
-  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                                      /*Name=*/".zero.addr");
-  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  // ThreadId for serialized parallels is 0.
-  Address ThreadIDAddr = ZeroAddr;
-  auto &&CodeGen = [this, Fn, CapturedVars, Loc, &ThreadIDAddr](
-                       CodeGenFunction &CGF, PrePostActionTy &Action) {
-    Action.Enter(CGF);
-
-    Address ZeroAddr =
-        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                         /*Name=*/".bound.zero.addr");
-    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
-    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
-    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
-    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
-    emitOutlinedFunctionCall(CGF, Loc, Fn, OutlinedFnArgs);
-  };
-  auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
-                                        PrePostActionTy &) {
-
-    RegionCodeGenTy RCG(CodeGen);
-    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
-    llvm::Value *ThreadID = getThreadID(CGF, Loc);
-    llvm::Value *Args[] = {RTLoc, ThreadID};
-
-    NVPTXActionTy Action(
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
-        Args,
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
-        Args);
-    RCG.setAction(Action);
-    RCG(CGF);
-  };
-
-  auto &&L0ParallelGen = [this, CapturedVars, Fn](CodeGenFunction &CGF,
-                                                  PrePostActionTy &Action) {
-    CGBuilderTy &Bld = CGF.Builder;
-    llvm::Function *WFn = WrapperFunctionsMap[Fn];
-    assert(WFn && "Wrapper function does not exist!");
-    llvm::Value *ID = Bld.CreateBitOrPointerCast(WFn, CGM.Int8PtrTy);
-
-    // Prepare for parallel region. Indicate the outlined function.
-    llvm::Value *Args[] = {ID};
-    CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_kernel_prepare_parallel),
-        Args);
-
-    // Create a private scope that will globalize the arguments
-    // passed from the outside of the target region.
-    CodeGenFunction::OMPPrivateScope PrivateArgScope(CGF);
-
-    // There's something to share.
-    if (!CapturedVars.empty()) {
-      // Prepare for parallel region. Indicate the outlined function.
-      Address SharedArgs =
-          CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "shared_arg_refs");
-      llvm::Value *SharedArgsPtr = SharedArgs.getPointer();
-
-      llvm::Value *DataSharingArgs[] = {
-          SharedArgsPtr,
-          llvm::ConstantInt::get(CGM.SizeTy, CapturedVars.size())};
-      CGF.EmitRuntimeCall(createNVPTXRuntimeFunction(
-                              OMPRTL_NVPTX__kmpc_begin_sharing_variables),
-                          DataSharingArgs);
-
-      // Store variable address in a list of references to pass to workers.
-      unsigned Idx = 0;
-      ASTContext &Ctx = CGF.getContext();
-      Address SharedArgListAddress = CGF.EmitLoadOfPointer(
-          SharedArgs, Ctx.getPointerType(Ctx.getPointerType(Ctx.VoidPtrTy))
-                          .castAs<PointerType>());
-      for (llvm::Value *V : CapturedVars) {
-        Address Dst = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
-        llvm::Value *PtrV;
-        if (V->getType()->isIntegerTy())
-          PtrV = Bld.CreateIntToPtr(V, CGF.VoidPtrTy);
-        else
-          PtrV = Bld.CreatePointerBitCastOrAddrSpaceCast(V, CGF.VoidPtrTy);
-        CGF.EmitStoreOfScalar(PtrV, Dst, /*Volatile=*/false,
-                              Ctx.getPointerType(Ctx.VoidPtrTy));
-        ++Idx;
-      }
-    }
-
-    // Activate workers. This barrier is used by the master to signal
-    // work for the workers.
-    syncCTAThreads(CGF);
-
-    // OpenMP [2.5, Parallel Construct, p.49]
-    // There is an implied barrier at the end of a parallel region. After the
-    // end of a parallel region, only the master thread of the team resumes
-    // execution of the enclosing task region.
-    //
-    // The master waits at this barrier until all workers are done.
-    syncCTAThreads(CGF);
-
-    if (!CapturedVars.empty())
-      CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_sharing_variables));
-
-    // Remember for post-processing in worker loop.
-    Work.emplace_back(WFn);
-  };
-
-  auto &&LNParallelGen = [this, Loc, &SeqGen, &L0ParallelGen](
-                             CodeGenFunction &CGF, PrePostActionTy &Action) {
-    if (IsInParallelRegion) {
-      SeqGen(CGF, Action);
-    } else if (IsInTargetMasterThreadRegion) {
-      L0ParallelGen(CGF, Action);
-    } else {
-      // Check for master and then parallelism:
-      // if (__kmpc_is_spmd_exec_mode() || __kmpc_parallel_level(loc, gtid)) {
-      //   Serialized execution.
-      // } else {
-      //   Worker call.
-      // }
-      CGBuilderTy &Bld = CGF.Builder;
-      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit");
-      llvm::BasicBlock *SeqBB = CGF.createBasicBlock(".sequential");
-      llvm::BasicBlock *ParallelCheckBB = CGF.createBasicBlock(".parcheck");
-      llvm::BasicBlock *MasterBB = CGF.createBasicBlock(".master");
-      llvm::Value *IsSPMD = Bld.CreateIsNotNull(CGF.EmitNounwindRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_is_spmd_exec_mode)));
-      Bld.CreateCondBr(IsSPMD, SeqBB, ParallelCheckBB);
-      // There is no need to emit line number for unconditional branch.
-      (void)ApplyDebugLocation::CreateEmpty(CGF);
-      CGF.EmitBlock(ParallelCheckBB);
-      llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
-      llvm::Value *ThreadID = getThreadID(CGF, Loc);
-      llvm::Value *PL = CGF.EmitRuntimeCall(
-          createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_parallel_level),
-          {RTLoc, ThreadID});
-      llvm::Value *Res = Bld.CreateIsNotNull(PL);
-      Bld.CreateCondBr(Res, SeqBB, MasterBB);
-      CGF.EmitBlock(SeqBB);
-      SeqGen(CGF, Action);
-      CGF.EmitBranch(ExitBB);
-      // There is no need to emit line number for unconditional branch.
-      (void)ApplyDebugLocation::CreateEmpty(CGF);
-      CGF.EmitBlock(MasterBB);
-      L0ParallelGen(CGF, Action);
-      CGF.EmitBranch(ExitBB);
-      // There is no need to emit line number for unconditional branch.
-      (void)ApplyDebugLocation::CreateEmpty(CGF);
-      // Emit the continuation block for code after the if.
-      CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
-    }
-  };
-
-  if (IfCond) {
-    emitIfClause(CGF, IfCond, LNParallelGen, SeqGen);
-  } else {
-    CodeGenFunction::RunCleanupsScope Scope(CGF);
-    RegionCodeGenTy ThenRCG(LNParallelGen);
-    ThenRCG(CGF);
-  }
-}
-
-void CGOpenMPRuntimeNVPTX::emitSPMDParallelCall(
-    CodeGenFunction &CGF, SourceLocation Loc, llvm::Function *OutlinedFn,
-    ArrayRef<llvm::Value *> CapturedVars, const Expr *IfCond) {
-  // Just call the outlined function to execute the parallel region.
-  // OutlinedFn(&GTid, &zero, CapturedStruct);
-  //
-  llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
-
-  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                                      /*Name=*/".zero.addr");
-  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  // ThreadId for serialized parallels is 0.
-  Address ThreadIDAddr = ZeroAddr;
-  auto &&CodeGen = [this, OutlinedFn, CapturedVars, Loc, &ThreadIDAddr](
-                       CodeGenFunction &CGF, PrePostActionTy &Action) {
-    Action.Enter(CGF);
-
-    Address ZeroAddr =
-        CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                         /*Name=*/".bound.zero.addr");
-    CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-    llvm::SmallVector<llvm::Value *, 16> OutlinedFnArgs;
-    OutlinedFnArgs.push_back(ThreadIDAddr.getPointer());
-    OutlinedFnArgs.push_back(ZeroAddr.getPointer());
-    OutlinedFnArgs.append(CapturedVars.begin(), CapturedVars.end());
-    emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, OutlinedFnArgs);
-  };
-  auto &&SeqGen = [this, &CodeGen, Loc](CodeGenFunction &CGF,
-                                        PrePostActionTy &) {
-
-    RegionCodeGenTy RCG(CodeGen);
-    llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
-    llvm::Value *ThreadID = getThreadID(CGF, Loc);
-    llvm::Value *Args[] = {RTLoc, ThreadID};
-
-    NVPTXActionTy Action(
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_serialized_parallel),
-        Args,
-        createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_serialized_parallel),
-        Args);
-    RCG.setAction(Action);
-    RCG(CGF);
-  };
-
-  if (IsInTargetMasterThreadRegion) {
-    // In the worker need to use the real thread id.
-    ThreadIDAddr = emitThreadIDAddress(CGF, Loc);
-    RegionCodeGenTy RCG(CodeGen);
-    RCG(CGF);
-  } else {
-    // If we are not in the target region, it is definitely L2 parallelism or
-    // more, because for SPMD mode we always has L1 parallel level, sowe don't
-    // need to check for orphaned directives.
-    RegionCodeGenTy RCG(SeqGen);
-    RCG(CGF);
-  }
-}
-
-void CGOpenMPRuntimeNVPTX::syncCTAThreads(CodeGenFunction &CGF) {
-  // Always emit simple barriers!
-  if (!CGF.HaveInsertPoint())
-    return;
-  // Build call __kmpc_barrier_simple_spmd(nullptr, 0);
-  // This function does not use parameters, so we can emit just default values.
-  llvm::Value *Args[] = {
-      llvm::ConstantPointerNull::get(
-          cast<llvm::PointerType>(getIdentTyPointerTy())),
-      llvm::ConstantInt::get(CGF.Int32Ty, /*V=*/0, /*isSigned=*/true)};
-  llvm::CallInst *Call = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier_simple_spmd), Args);
-  Call->setConvergent();
-}
-
-void CGOpenMPRuntimeNVPTX::emitBarrierCall(CodeGenFunction &CGF,
-                                           SourceLocation Loc,
-                                           OpenMPDirectiveKind Kind, bool,
-                                           bool) {
-  // Always emit simple barriers!
-  if (!CGF.HaveInsertPoint())
-    return;
-  // Build call __kmpc_cancel_barrier(loc, thread_id);
-  unsigned Flags = getDefaultFlagsForBarriers(Kind);
-  llvm::Value *Args[] = {emitUpdateLocation(CGF, Loc, Flags),
-                         getThreadID(CGF, Loc)};
-  llvm::CallInst *Call = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL__kmpc_barrier), Args);
-  Call->setConvergent();
-}
-
-void CGOpenMPRuntimeNVPTX::emitCriticalRegion(
-    CodeGenFunction &CGF, StringRef CriticalName,
-    const RegionCodeGenTy &CriticalOpGen, SourceLocation Loc,
-    const Expr *Hint) {
-  llvm::BasicBlock *LoopBB = CGF.createBasicBlock("omp.critical.loop");
-  llvm::BasicBlock *TestBB = CGF.createBasicBlock("omp.critical.test");
-  llvm::BasicBlock *SyncBB = CGF.createBasicBlock("omp.critical.sync");
-  llvm::BasicBlock *BodyBB = CGF.createBasicBlock("omp.critical.body");
-  llvm::BasicBlock *ExitBB = CGF.createBasicBlock("omp.critical.exit");
-
-  // Get the mask of active threads in the warp.
-  llvm::Value *Mask = CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_warp_active_thread_mask));
-  // Fetch team-local id of the thread.
-  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
-
-  // Get the width of the team.
-  llvm::Value *TeamWidth = getNVPTXNumThreads(CGF);
-
-  // Initialize the counter variable for the loop.
-  QualType Int32Ty =
-      CGF.getContext().getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/0);
-  Address Counter = CGF.CreateMemTemp(Int32Ty, "critical_counter");
-  LValue CounterLVal = CGF.MakeAddrLValue(Counter, Int32Ty);
-  CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.Int32Ty), CounterLVal,
-                        /*isInit=*/true);
-
-  // Block checks if loop counter exceeds upper bound.
-  CGF.EmitBlock(LoopBB);
-  llvm::Value *CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
-  llvm::Value *CmpLoopBound = CGF.Builder.CreateICmpSLT(CounterVal, TeamWidth);
-  CGF.Builder.CreateCondBr(CmpLoopBound, TestBB, ExitBB);
-
-  // Block tests which single thread should execute region, and which threads
-  // should go straight to synchronisation point.
-  CGF.EmitBlock(TestBB);
-  CounterVal = CGF.EmitLoadOfScalar(CounterLVal, Loc);
-  llvm::Value *CmpThreadToCounter =
-      CGF.Builder.CreateICmpEQ(ThreadID, CounterVal);
-  CGF.Builder.CreateCondBr(CmpThreadToCounter, BodyBB, SyncBB);
-
-  // Block emits the body of the critical region.
-  CGF.EmitBlock(BodyBB);
-
-  // Output the critical statement.
-  CGOpenMPRuntime::emitCriticalRegion(CGF, CriticalName, CriticalOpGen, Loc,
-                                      Hint);
-
-  // After the body surrounded by the critical region, the single executing
-  // thread will jump to the synchronisation point.
-  // Block waits for all threads in current team to finish then increments the
-  // counter variable and returns to the loop.
-  CGF.EmitBlock(SyncBB);
-  // Reconverge active threads in the warp.
-  (void)CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_syncwarp), Mask);
-
-  llvm::Value *IncCounterVal =
-      CGF.Builder.CreateNSWAdd(CounterVal, CGF.Builder.getInt32(1));
-  CGF.EmitStoreOfScalar(IncCounterVal, CounterLVal);
-  CGF.EmitBranch(LoopBB);
-
-  // Block that is reached when  all threads in the team complete the region.
-  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
-}
-
-/// Cast value to the specified type.
-static llvm::Value *castValueToType(CodeGenFunction &CGF, llvm::Value *Val,
-                                    QualType ValTy, QualType CastTy,
-                                    SourceLocation Loc) {
-  assert(!CGF.getContext().getTypeSizeInChars(CastTy).isZero() &&
-         "Cast type must sized.");
-  assert(!CGF.getContext().getTypeSizeInChars(ValTy).isZero() &&
-         "Val type must sized.");
-  llvm::Type *LLVMCastTy = CGF.ConvertTypeForMem(CastTy);
-  if (ValTy == CastTy)
-    return Val;
-  if (CGF.getContext().getTypeSizeInChars(ValTy) ==
-      CGF.getContext().getTypeSizeInChars(CastTy))
-    return CGF.Builder.CreateBitCast(Val, LLVMCastTy);
-  if (CastTy->isIntegerType() && ValTy->isIntegerType())
-    return CGF.Builder.CreateIntCast(Val, LLVMCastTy,
-                                     CastTy->hasSignedIntegerRepresentation());
-  Address CastItem = CGF.CreateMemTemp(CastTy);
-  Address ValCastItem = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      CastItem, Val->getType()->getPointerTo(CastItem.getAddressSpace()));
-  CGF.EmitStoreOfScalar(Val, ValCastItem, /*Volatile=*/false, ValTy);
-  return CGF.EmitLoadOfScalar(CastItem, /*Volatile=*/false, CastTy, Loc);
-}
-
-/// This function creates calls to one of two shuffle functions to copy
-/// variables between lanes in a warp.
-static llvm::Value *createRuntimeShuffleFunction(CodeGenFunction &CGF,
-                                                 llvm::Value *Elem,
-                                                 QualType ElemType,
-                                                 llvm::Value *Offset,
-                                                 SourceLocation Loc) {
-  CodeGenModule &CGM = CGF.CGM;
-  CGBuilderTy &Bld = CGF.Builder;
-  CGOpenMPRuntimeNVPTX &RT =
-      *(static_cast<CGOpenMPRuntimeNVPTX *>(&CGM.getOpenMPRuntime()));
-
-  CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
-  assert(Size.getQuantity() <= 8 &&
-         "Unsupported bitwidth in shuffle instruction.");
-
-  OpenMPRTLFunctionNVPTX ShuffleFn = Size.getQuantity() <= 4
-                                         ? OMPRTL_NVPTX__kmpc_shuffle_int32
-                                         : OMPRTL_NVPTX__kmpc_shuffle_int64;
-
-  // Cast all types to 32- or 64-bit values before calling shuffle routines.
-  QualType CastTy = CGF.getContext().getIntTypeForBitwidth(
-      Size.getQuantity() <= 4 ? 32 : 64, /*Signed=*/1);
-  llvm::Value *ElemCast = castValueToType(CGF, Elem, ElemType, CastTy, Loc);
-  llvm::Value *WarpSize =
-      Bld.CreateIntCast(getNVPTXWarpSize(CGF), CGM.Int16Ty, /*isSigned=*/true);
-
-  llvm::Value *ShuffledVal = CGF.EmitRuntimeCall(
-      RT.createNVPTXRuntimeFunction(ShuffleFn), {ElemCast, Offset, WarpSize});
-
-  return castValueToType(CGF, ShuffledVal, CastTy, ElemType, Loc);
-}
-
-static void shuffleAndStore(CodeGenFunction &CGF, Address SrcAddr,
-                            Address DestAddr, QualType ElemType,
-                            llvm::Value *Offset, SourceLocation Loc) {
-  CGBuilderTy &Bld = CGF.Builder;
-
-  CharUnits Size = CGF.getContext().getTypeSizeInChars(ElemType);
-  // Create the loop over the big sized data.
-  // ptr = (void*)Elem;
-  // ptrEnd = (void*) Elem + 1;
-  // Step = 8;
-  // while (ptr + Step < ptrEnd)
-  //   shuffle((int64_t)*ptr);
-  // Step = 4;
-  // while (ptr + Step < ptrEnd)
-  //   shuffle((int32_t)*ptr);
-  // ...
-  Address ElemPtr = DestAddr;
-  Address Ptr = SrcAddr;
-  Address PtrEnd = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      Bld.CreateConstGEP(SrcAddr, 1), CGF.VoidPtrTy);
-  for (int IntSize = 8; IntSize >= 1; IntSize /= 2) {
-    if (Size < CharUnits::fromQuantity(IntSize))
-      continue;
-    QualType IntType = CGF.getContext().getIntTypeForBitwidth(
-        CGF.getContext().toBits(CharUnits::fromQuantity(IntSize)),
-        /*Signed=*/1);
-    llvm::Type *IntTy = CGF.ConvertTypeForMem(IntType);
-    Ptr = Bld.CreatePointerBitCastOrAddrSpaceCast(Ptr, IntTy->getPointerTo());
-    ElemPtr =
-        Bld.CreatePointerBitCastOrAddrSpaceCast(ElemPtr, IntTy->getPointerTo());
-    if (Size.getQuantity() / IntSize > 1) {
-      llvm::BasicBlock *PreCondBB = CGF.createBasicBlock(".shuffle.pre_cond");
-      llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".shuffle.then");
-      llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".shuffle.exit");
-      llvm::BasicBlock *CurrentBB = Bld.GetInsertBlock();
-      CGF.EmitBlock(PreCondBB);
-      llvm::PHINode *PhiSrc =
-          Bld.CreatePHI(Ptr.getType(), /*NumReservedValues=*/2);
-      PhiSrc->addIncoming(Ptr.getPointer(), CurrentBB);
-      llvm::PHINode *PhiDest =
-          Bld.CreatePHI(ElemPtr.getType(), /*NumReservedValues=*/2);
-      PhiDest->addIncoming(ElemPtr.getPointer(), CurrentBB);
-      Ptr = Address(PhiSrc, Ptr.getAlignment());
-      ElemPtr = Address(PhiDest, ElemPtr.getAlignment());
-      llvm::Value *PtrDiff = Bld.CreatePtrDiff(
-          PtrEnd.getPointer(), Bld.CreatePointerBitCastOrAddrSpaceCast(
-                                   Ptr.getPointer(), CGF.VoidPtrTy));
-      Bld.CreateCondBr(Bld.CreateICmpSGT(PtrDiff, Bld.getInt64(IntSize - 1)),
-                       ThenBB, ExitBB);
-      CGF.EmitBlock(ThenBB);
-      llvm::Value *Res = createRuntimeShuffleFunction(
-          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
-          IntType, Offset, Loc);
-      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
-      Address LocalPtr = Bld.CreateConstGEP(Ptr, 1);
-      Address LocalElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
-      PhiSrc->addIncoming(LocalPtr.getPointer(), ThenBB);
-      PhiDest->addIncoming(LocalElemPtr.getPointer(), ThenBB);
-      CGF.EmitBranch(PreCondBB);
-      CGF.EmitBlock(ExitBB);
-    } else {
-      llvm::Value *Res = createRuntimeShuffleFunction(
-          CGF, CGF.EmitLoadOfScalar(Ptr, /*Volatile=*/false, IntType, Loc),
-          IntType, Offset, Loc);
-      CGF.EmitStoreOfScalar(Res, ElemPtr, /*Volatile=*/false, IntType);
-      Ptr = Bld.CreateConstGEP(Ptr, 1);
-      ElemPtr = Bld.CreateConstGEP(ElemPtr, 1);
-    }
-    Size = Size % IntSize;
-  }
-}
-
-namespace {
-enum CopyAction : unsigned {
-  // RemoteLaneToThread: Copy over a Reduce list from a remote lane in
-  // the warp using shuffle instructions.
-  RemoteLaneToThread,
-  // ThreadCopy: Make a copy of a Reduce list on the thread's stack.
-  ThreadCopy,
-  // ThreadToScratchpad: Copy a team-reduced array to the scratchpad.
-  ThreadToScratchpad,
-  // ScratchpadToThread: Copy from a scratchpad array in global memory
-  // containing team-reduced data to a thread's stack.
-  ScratchpadToThread,
-};
-} // namespace
-
-struct CopyOptionsTy {
-  llvm::Value *RemoteLaneOffset;
-  llvm::Value *ScratchpadIndex;
-  llvm::Value *ScratchpadWidth;
-};
-
-/// Emit instructions to copy a Reduce list, which contains partially
-/// aggregated values, in the specified direction.
-static void emitReductionListCopy(
-    CopyAction Action, CodeGenFunction &CGF, QualType ReductionArrayTy,
-    ArrayRef<const Expr *> Privates, Address SrcBase, Address DestBase,
-    CopyOptionsTy CopyOptions = {nullptr, nullptr, nullptr}) {
-
-  CodeGenModule &CGM = CGF.CGM;
-  ASTContext &C = CGM.getContext();
-  CGBuilderTy &Bld = CGF.Builder;
-
-  llvm::Value *RemoteLaneOffset = CopyOptions.RemoteLaneOffset;
-  llvm::Value *ScratchpadIndex = CopyOptions.ScratchpadIndex;
-  llvm::Value *ScratchpadWidth = CopyOptions.ScratchpadWidth;
-
-  // Iterates, element-by-element, through the source Reduce list and
-  // make a copy.
-  unsigned Idx = 0;
-  unsigned Size = Privates.size();
-  for (const Expr *Private : Privates) {
-    Address SrcElementAddr = Address::invalid();
-    Address DestElementAddr = Address::invalid();
-    Address DestElementPtrAddr = Address::invalid();
-    // Should we shuffle in an element from a remote lane?
-    bool ShuffleInElement = false;
-    // Set to true to update the pointer in the dest Reduce list to a
-    // newly created element.
-    bool UpdateDestListPtr = false;
-    // Increment the src or dest pointer to the scratchpad, for each
-    // new element.
-    bool IncrScratchpadSrc = false;
-    bool IncrScratchpadDest = false;
-
-    switch (Action) {
-    case RemoteLaneToThread: {
-      // Step 1.1: Get the address for the src element in the Reduce list.
-      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
-      SrcElementAddr = CGF.EmitLoadOfPointer(
-          SrcElementPtrAddr,
-          C.getPointerType(Private->getType())->castAs<PointerType>());
-
-      // Step 1.2: Create a temporary to store the element in the destination
-      // Reduce list.
-      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
-      DestElementAddr =
-          CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
-      ShuffleInElement = true;
-      UpdateDestListPtr = true;
-      break;
-    }
-    case ThreadCopy: {
-      // Step 1.1: Get the address for the src element in the Reduce list.
-      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
-      SrcElementAddr = CGF.EmitLoadOfPointer(
-          SrcElementPtrAddr,
-          C.getPointerType(Private->getType())->castAs<PointerType>());
-
-      // Step 1.2: Get the address for dest element.  The destination
-      // element has already been created on the thread's stack.
-      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
-      DestElementAddr = CGF.EmitLoadOfPointer(
-          DestElementPtrAddr,
-          C.getPointerType(Private->getType())->castAs<PointerType>());
-      break;
-    }
-    case ThreadToScratchpad: {
-      // Step 1.1: Get the address for the src element in the Reduce list.
-      Address SrcElementPtrAddr = Bld.CreateConstArrayGEP(SrcBase, Idx);
-      SrcElementAddr = CGF.EmitLoadOfPointer(
-          SrcElementPtrAddr,
-          C.getPointerType(Private->getType())->castAs<PointerType>());
-
-      // Step 1.2: Get the address for dest element:
-      // address = base + index * ElementSizeInChars.
-      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
-      llvm::Value *CurrentOffset =
-          Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
-      llvm::Value *ScratchPadElemAbsolutePtrVal =
-          Bld.CreateNUWAdd(DestBase.getPointer(), CurrentOffset);
-      ScratchPadElemAbsolutePtrVal =
-          Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
-      DestElementAddr = Address(ScratchPadElemAbsolutePtrVal,
-                                C.getTypeAlignInChars(Private->getType()));
-      IncrScratchpadDest = true;
-      break;
-    }
-    case ScratchpadToThread: {
-      // Step 1.1: Get the address for the src element in the scratchpad.
-      // address = base + index * ElementSizeInChars.
-      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
-      llvm::Value *CurrentOffset =
-          Bld.CreateNUWMul(ElementSizeInChars, ScratchpadIndex);
-      llvm::Value *ScratchPadElemAbsolutePtrVal =
-          Bld.CreateNUWAdd(SrcBase.getPointer(), CurrentOffset);
-      ScratchPadElemAbsolutePtrVal =
-          Bld.CreateIntToPtr(ScratchPadElemAbsolutePtrVal, CGF.VoidPtrTy);
-      SrcElementAddr = Address(ScratchPadElemAbsolutePtrVal,
-                               C.getTypeAlignInChars(Private->getType()));
-      IncrScratchpadSrc = true;
-
-      // Step 1.2: Create a temporary to store the element in the destination
-      // Reduce list.
-      DestElementPtrAddr = Bld.CreateConstArrayGEP(DestBase, Idx);
-      DestElementAddr =
-          CGF.CreateMemTemp(Private->getType(), ".omp.reduction.element");
-      UpdateDestListPtr = true;
-      break;
-    }
-    }
-
-    // Regardless of src and dest of copy, we emit the load of src
-    // element as this is required in all directions
-    SrcElementAddr = Bld.CreateElementBitCast(
-        SrcElementAddr, CGF.ConvertTypeForMem(Private->getType()));
-    DestElementAddr = Bld.CreateElementBitCast(DestElementAddr,
-                                               SrcElementAddr.getElementType());
-
-    // Now that all active lanes have read the element in the
-    // Reduce list, shuffle over the value from the remote lane.
-    if (ShuffleInElement) {
-      shuffleAndStore(CGF, SrcElementAddr, DestElementAddr, Private->getType(),
-                      RemoteLaneOffset, Private->getExprLoc());
-    } else {
-      switch (CGF.getEvaluationKind(Private->getType())) {
-      case TEK_Scalar: {
-        llvm::Value *Elem =
-            CGF.EmitLoadOfScalar(SrcElementAddr, /*Volatile=*/false,
-                                 Private->getType(), Private->getExprLoc());
-        // Store the source element value to the dest element address.
-        CGF.EmitStoreOfScalar(Elem, DestElementAddr, /*Volatile=*/false,
-                              Private->getType());
-        break;
-      }
-      case TEK_Complex: {
-        CodeGenFunction::ComplexPairTy Elem = CGF.EmitLoadOfComplex(
-            CGF.MakeAddrLValue(SrcElementAddr, Private->getType()),
-            Private->getExprLoc());
-        CGF.EmitStoreOfComplex(
-            Elem, CGF.MakeAddrLValue(DestElementAddr, Private->getType()),
-            /*isInit=*/false);
-        break;
-      }
-      case TEK_Aggregate:
-        CGF.EmitAggregateCopy(
-            CGF.MakeAddrLValue(DestElementAddr, Private->getType()),
-            CGF.MakeAddrLValue(SrcElementAddr, Private->getType()),
-            Private->getType(), AggValueSlot::DoesNotOverlap);
-        break;
-      }
-    }
-
-    // Step 3.1: Modify reference in dest Reduce list as needed.
-    // Modifying the reference in Reduce list to point to the newly
-    // created element.  The element is live in the current function
-    // scope and that of functions it invokes (i.e., reduce_function).
-    // RemoteReduceData[i] = (void*)&RemoteElem
-    if (UpdateDestListPtr) {
-      CGF.EmitStoreOfScalar(Bld.CreatePointerBitCastOrAddrSpaceCast(
-                                DestElementAddr.getPointer(), CGF.VoidPtrTy),
-                            DestElementPtrAddr, /*Volatile=*/false,
-                            C.VoidPtrTy);
-    }
-
-    // Step 4.1: Increment SrcBase/DestBase so that it points to the starting
-    // address of the next element in scratchpad memory, unless we're currently
-    // processing the last one.  Memory alignment is also taken care of here.
-    if ((IncrScratchpadDest || IncrScratchpadSrc) && (Idx + 1 < Size)) {
-      llvm::Value *ScratchpadBasePtr =
-          IncrScratchpadDest ? DestBase.getPointer() : SrcBase.getPointer();
-      llvm::Value *ElementSizeInChars = CGF.getTypeSize(Private->getType());
-      ScratchpadBasePtr = Bld.CreateNUWAdd(
-          ScratchpadBasePtr,
-          Bld.CreateNUWMul(ScratchpadWidth, ElementSizeInChars));
-
-      // Take care of global memory alignment for performance
-      ScratchpadBasePtr = Bld.CreateNUWSub(
-          ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
-      ScratchpadBasePtr = Bld.CreateUDiv(
-          ScratchpadBasePtr,
-          llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
-      ScratchpadBasePtr = Bld.CreateNUWAdd(
-          ScratchpadBasePtr, llvm::ConstantInt::get(CGM.SizeTy, 1));
-      ScratchpadBasePtr = Bld.CreateNUWMul(
-          ScratchpadBasePtr,
-          llvm::ConstantInt::get(CGM.SizeTy, GlobalMemoryAlignment));
-
-      if (IncrScratchpadDest)
-        DestBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
-      else /* IncrScratchpadSrc = true */
-        SrcBase = Address(ScratchpadBasePtr, CGF.getPointerAlign());
-    }
-
-    ++Idx;
-  }
-}
-
-/// This function emits a helper that gathers Reduce lists from the first
-/// lane of every active warp to lanes in the first warp.
-///
-/// void inter_warp_copy_func(void* reduce_data, num_warps)
-///   shared smem[warp_size];
-///   For all data entries D in reduce_data:
-///     sync
-///     If (I am the first lane in each warp)
-///       Copy my local D to smem[warp_id]
-///     sync
-///     if (I am the first warp)
-///       Copy smem[thread_id] to my local D
-static llvm::Value *emitInterWarpCopyFunction(CodeGenModule &CGM,
-                                              ArrayRef<const Expr *> Privates,
-                                              QualType ReductionArrayTy,
-                                              SourceLocation Loc) {
-  ASTContext &C = CGM.getContext();
-  llvm::Module &M = CGM.getModule();
-
-  // ReduceList: thread local Reduce list.
-  // At the stage of the computation when this function is called, partially
-  // aggregated values reside in the first lane of every active warp.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  // NumWarps: number of warps active in the parallel region.  This could
-  // be smaller than 32 (max warps in a CTA) for partial block reduction.
-  ImplicitParamDecl NumWarpsArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                C.getIntTypeForBitwidth(32, /* Signed */ true),
-                                ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&ReduceListArg);
-  Args.push_back(&NumWarpsArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(CGM.getTypes().GetFunctionType(CGFI),
-                                    llvm::GlobalValue::InternalLinkage,
-                                    "_omp_reduction_inter_warp_copy_func", &M);
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  // This array is used as a medium to transfer, one reduce element at a time,
-  // the data from the first lane of every warp to lanes in the first warp
-  // in order to perform the final step of a reduction in a parallel region
-  // (reduction across warps).  The array is placed in NVPTX __shared__ memory
-  // for reduced latency, as well as to have a distinct copy for concurrently
-  // executing target regions.  The array is declared with common linkage so
-  // as to be shared across compilation units.
-  StringRef TransferMediumName =
-      "__openmp_nvptx_data_transfer_temporary_storage";
-  llvm::GlobalVariable *TransferMedium =
-      M.getGlobalVariable(TransferMediumName);
-  if (!TransferMedium) {
-    auto *Ty = llvm::ArrayType::get(CGM.Int32Ty, WarpSize);
-    unsigned SharedAddressSpace = C.getTargetAddressSpace(LangAS::cuda_shared);
-    TransferMedium = new llvm::GlobalVariable(
-        M, Ty, /*isConstant=*/false, llvm::GlobalVariable::CommonLinkage,
-        llvm::Constant::getNullValue(Ty), TransferMediumName,
-        /*InsertBefore=*/nullptr, llvm::GlobalVariable::NotThreadLocal,
-        SharedAddressSpace);
-    CGM.addCompilerUsedGlobal(TransferMedium);
-  }
-
-  // Get the CUDA thread id of the current OpenMP thread on the GPU.
-  llvm::Value *ThreadID = getNVPTXThreadID(CGF);
-  // nvptx_lane_id = nvptx_id % warpsize
-  llvm::Value *LaneID = getNVPTXLaneID(CGF);
-  // nvptx_warp_id = nvptx_id / warpsize
-  llvm::Value *WarpID = getNVPTXWarpID(CGF);
-
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  Address LocalReduceList(
-      Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, Loc),
-          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
-      CGF.getPointerAlign());
-
-  unsigned Idx = 0;
-  for (const Expr *Private : Privates) {
-    //
-    // Warp master copies reduce element to transfer medium in __shared__
-    // memory.
-    //
-    unsigned RealTySize =
-        C.getTypeSizeInChars(Private->getType())
-            .alignTo(C.getTypeAlignInChars(Private->getType()))
-            .getQuantity();
-    for (unsigned TySize = 4; TySize > 0 && RealTySize > 0; TySize /=2) {
-      unsigned NumIters = RealTySize / TySize;
-      if (NumIters == 0)
-        continue;
-      QualType CType = C.getIntTypeForBitwidth(
-          C.toBits(CharUnits::fromQuantity(TySize)), /*Signed=*/1);
-      llvm::Type *CopyType = CGF.ConvertTypeForMem(CType);
-      CharUnits Align = CharUnits::fromQuantity(TySize);
-      llvm::Value *Cnt = nullptr;
-      Address CntAddr = Address::invalid();
-      llvm::BasicBlock *PrecondBB = nullptr;
-      llvm::BasicBlock *ExitBB = nullptr;
-      if (NumIters > 1) {
-        CntAddr = CGF.CreateMemTemp(C.IntTy, ".cnt.addr");
-        CGF.EmitStoreOfScalar(llvm::Constant::getNullValue(CGM.IntTy), CntAddr,
-                              /*Volatile=*/false, C.IntTy);
-        PrecondBB = CGF.createBasicBlock("precond");
-        ExitBB = CGF.createBasicBlock("exit");
-        llvm::BasicBlock *BodyBB = CGF.createBasicBlock("body");
-        // There is no need to emit line number for unconditional branch.
-        (void)ApplyDebugLocation::CreateEmpty(CGF);
-        CGF.EmitBlock(PrecondBB);
-        Cnt = CGF.EmitLoadOfScalar(CntAddr, /*Volatile=*/false, C.IntTy, Loc);
-        llvm::Value *Cmp =
-            Bld.CreateICmpULT(Cnt, llvm::ConstantInt::get(CGM.IntTy, NumIters));
-        Bld.CreateCondBr(Cmp, BodyBB, ExitBB);
-        CGF.EmitBlock(BodyBB);
-      }
-      // kmpc_barrier.
-      CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
-                                             /*EmitChecks=*/false,
-                                             /*ForceSimpleCall=*/true);
-      llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
-      llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
-      llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
-
-      // if (lane_id == 0)
-      llvm::Value *IsWarpMaster = Bld.CreateIsNull(LaneID, "warp_master");
-      Bld.CreateCondBr(IsWarpMaster, ThenBB, ElseBB);
-      CGF.EmitBlock(ThenBB);
-
-      // Reduce element = LocalReduceList[i]
-      Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
-      llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
-          ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
-      // elemptr = ((CopyType*)(elemptrptr)) + I
-      Address ElemPtr = Address(ElemPtrPtr, Align);
-      ElemPtr = Bld.CreateElementBitCast(ElemPtr, CopyType);
-      if (NumIters > 1) {
-        ElemPtr = Address(Bld.CreateGEP(ElemPtr.getPointer(), Cnt),
-                          ElemPtr.getAlignment());
-      }
-
-      // Get pointer to location in transfer medium.
-      // MediumPtr = &medium[warp_id]
-      llvm::Value *MediumPtrVal = Bld.CreateInBoundsGEP(
-          TransferMedium, {llvm::Constant::getNullValue(CGM.Int64Ty), WarpID});
-      Address MediumPtr(MediumPtrVal, Align);
-      // Casting to actual data type.
-      // MediumPtr = (CopyType*)MediumPtrAddr;
-      MediumPtr = Bld.CreateElementBitCast(MediumPtr, CopyType);
-
-      // elem = *elemptr
-      //*MediumPtr = elem
-      llvm::Value *Elem =
-          CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false, CType, Loc);
-      // Store the source element value to the dest element address.
-      CGF.EmitStoreOfScalar(Elem, MediumPtr, /*Volatile=*/true, CType);
-
-      Bld.CreateBr(MergeBB);
-
-      CGF.EmitBlock(ElseBB);
-      Bld.CreateBr(MergeBB);
-
-      CGF.EmitBlock(MergeBB);
-
-      // kmpc_barrier.
-      CGM.getOpenMPRuntime().emitBarrierCall(CGF, Loc, OMPD_unknown,
-                                             /*EmitChecks=*/false,
-                                             /*ForceSimpleCall=*/true);
-
-      //
-      // Warp 0 copies reduce element from transfer medium.
-      //
-      llvm::BasicBlock *W0ThenBB = CGF.createBasicBlock("then");
-      llvm::BasicBlock *W0ElseBB = CGF.createBasicBlock("else");
-      llvm::BasicBlock *W0MergeBB = CGF.createBasicBlock("ifcont");
-
-      Address AddrNumWarpsArg = CGF.GetAddrOfLocalVar(&NumWarpsArg);
-      llvm::Value *NumWarpsVal = CGF.EmitLoadOfScalar(
-          AddrNumWarpsArg, /*Volatile=*/false, C.IntTy, Loc);
-
-      // Up to 32 threads in warp 0 are active.
-      llvm::Value *IsActiveThread =
-          Bld.CreateICmpULT(ThreadID, NumWarpsVal, "is_active_thread");
-      Bld.CreateCondBr(IsActiveThread, W0ThenBB, W0ElseBB);
-
-      CGF.EmitBlock(W0ThenBB);
-
-      // SrcMediumPtr = &medium[tid]
-      llvm::Value *SrcMediumPtrVal = Bld.CreateInBoundsGEP(
-          TransferMedium,
-          {llvm::Constant::getNullValue(CGM.Int64Ty), ThreadID});
-      Address SrcMediumPtr(SrcMediumPtrVal, Align);
-      // SrcMediumVal = *SrcMediumPtr;
-      SrcMediumPtr = Bld.CreateElementBitCast(SrcMediumPtr, CopyType);
-
-      // TargetElemPtr = (CopyType*)(SrcDataAddr[i]) + I
-      Address TargetElemPtrPtr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
-      llvm::Value *TargetElemPtrVal = CGF.EmitLoadOfScalar(
-          TargetElemPtrPtr, /*Volatile=*/false, C.VoidPtrTy, Loc);
-      Address TargetElemPtr = Address(TargetElemPtrVal, Align);
-      TargetElemPtr = Bld.CreateElementBitCast(TargetElemPtr, CopyType);
-      if (NumIters > 1) {
-        TargetElemPtr = Address(Bld.CreateGEP(TargetElemPtr.getPointer(), Cnt),
-                                TargetElemPtr.getAlignment());
-      }
-
-      // *TargetElemPtr = SrcMediumVal;
-      llvm::Value *SrcMediumValue =
-          CGF.EmitLoadOfScalar(SrcMediumPtr, /*Volatile=*/true, CType, Loc);
-      CGF.EmitStoreOfScalar(SrcMediumValue, TargetElemPtr, /*Volatile=*/false,
-                            CType);
-      Bld.CreateBr(W0MergeBB);
-
-      CGF.EmitBlock(W0ElseBB);
-      Bld.CreateBr(W0MergeBB);
-
-      CGF.EmitBlock(W0MergeBB);
-
-      if (NumIters > 1) {
-        Cnt = Bld.CreateNSWAdd(Cnt, llvm::ConstantInt::get(CGM.IntTy, /*V=*/1));
-        CGF.EmitStoreOfScalar(Cnt, CntAddr, /*Volatile=*/false, C.IntTy);
-        CGF.EmitBranch(PrecondBB);
-        (void)ApplyDebugLocation::CreateEmpty(CGF);
-        CGF.EmitBlock(ExitBB);
-      }
-      RealTySize %= TySize;
-    }
-    ++Idx;
-  }
-
-  CGF.FinishFunction();
-  return Fn;
-}
-
-/// Emit a helper that reduces data across two OpenMP threads (lanes)
-/// in the same warp.  It uses shuffle instructions to copy over data from
-/// a remote lane's stack.  The reduction algorithm performed is specified
-/// by the fourth parameter.
-///
-/// Algorithm Versions.
-/// Full Warp Reduce (argument value 0):
-///   This algorithm assumes that all 32 lanes are active and gathers
-///   data from these 32 lanes, producing a single resultant value.
-/// Contiguous Partial Warp Reduce (argument value 1):
-///   This algorithm assumes that only a *contiguous* subset of lanes
-///   are active.  This happens for the last warp in a parallel region
-///   when the user specified num_threads is not an integer multiple of
-///   32.  This contiguous subset always starts with the zeroth lane.
-/// Partial Warp Reduce (argument value 2):
-///   This algorithm gathers data from any number of lanes at any position.
-/// All reduced values are stored in the lowest possible lane.  The set
-/// of problems every algorithm addresses is a super set of those
-/// addressable by algorithms with a lower version number.  Overhead
-/// increases as algorithm version increases.
-///
-/// Terminology
-/// Reduce element:
-///   Reduce element refers to the individual data field with primitive
-///   data types to be combined and reduced across threads.
-/// Reduce list:
-///   Reduce list refers to a collection of local, thread-private
-///   reduce elements.
-/// Remote Reduce list:
-///   Remote Reduce list refers to a collection of remote (relative to
-///   the current thread) reduce elements.
-///
-/// We distinguish between three states of threads that are important to
-/// the implementation of this function.
-/// Alive threads:
-///   Threads in a warp executing the SIMT instruction, as distinguished from
-///   threads that are inactive due to divergent control flow.
-/// Active threads:
-///   The minimal set of threads that has to be alive upon entry to this
-///   function.  The computation is correct iff active threads are alive.
-///   Some threads are alive but they are not active because they do not
-///   contribute to the computation in any useful manner.  Turning them off
-///   may introduce control flow overheads without any tangible benefits.
-/// Effective threads:
-///   In order to comply with the argument requirements of the shuffle
-///   function, we must keep all lanes holding data alive.  But at most
-///   half of them perform value aggregation; we refer to this half of
-///   threads as effective. The other half is simply handing off their
-///   data.
-///
-/// Procedure
-/// Value shuffle:
-///   In this step active threads transfer data from higher lane positions
-///   in the warp to lower lane positions, creating Remote Reduce list.
-/// Value aggregation:
-///   In this step, effective threads combine their thread local Reduce list
-///   with Remote Reduce list and store the result in the thread local
-///   Reduce list.
-/// Value copy:
-///   In this step, we deal with the assumption made by algorithm 2
-///   (i.e. contiguity assumption).  When we have an odd number of lanes
-///   active, say 2k+1, only k threads will be effective and therefore k
-///   new values will be produced.  However, the Reduce list owned by the
-///   (2k+1)th thread is ignored in the value aggregation.  Therefore
-///   we copy the Reduce list from the (2k+1)th lane to (k+1)th lane so
-///   that the contiguity assumption still holds.
-static llvm::Function *emitShuffleAndReduceFunction(
-    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
-    QualType ReductionArrayTy, llvm::Function *ReduceFn, SourceLocation Loc) {
-  ASTContext &C = CGM.getContext();
-
-  // Thread local Reduce list used to host the values of data to be reduced.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  // Current lane id; could be logical.
-  ImplicitParamDecl LaneIDArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.ShortTy,
-                              ImplicitParamDecl::Other);
-  // Offset of the remote source lane relative to the current lane.
-  ImplicitParamDecl RemoteLaneOffsetArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                        C.ShortTy, ImplicitParamDecl::Other);
-  // Algorithm version.  This is expected to be known at compile time.
-  ImplicitParamDecl AlgoVerArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                               C.ShortTy, ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&ReduceListArg);
-  Args.push_back(&LaneIDArg);
-  Args.push_back(&RemoteLaneOffsetArg);
-  Args.push_back(&AlgoVerArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      "_omp_reduction_shuffle_and_reduce_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  if (CGM.getLangOpts().Optimize) {
-    Fn->removeFnAttr(llvm::Attribute::NoInline);
-    Fn->removeFnAttr(llvm::Attribute::OptimizeNone);
-    Fn->addFnAttr(llvm::Attribute::AlwaysInline);
-  }
-
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  Address LocalReduceList(
-      Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, SourceLocation()),
-          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
-      CGF.getPointerAlign());
-
-  Address AddrLaneIDArg = CGF.GetAddrOfLocalVar(&LaneIDArg);
-  llvm::Value *LaneIDArgVal = CGF.EmitLoadOfScalar(
-      AddrLaneIDArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
-
-  Address AddrRemoteLaneOffsetArg = CGF.GetAddrOfLocalVar(&RemoteLaneOffsetArg);
-  llvm::Value *RemoteLaneOffsetArgVal = CGF.EmitLoadOfScalar(
-      AddrRemoteLaneOffsetArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
-
-  Address AddrAlgoVerArg = CGF.GetAddrOfLocalVar(&AlgoVerArg);
-  llvm::Value *AlgoVerArgVal = CGF.EmitLoadOfScalar(
-      AddrAlgoVerArg, /*Volatile=*/false, C.ShortTy, SourceLocation());
-
-  // Create a local thread-private variable to host the Reduce list
-  // from a remote lane.
-  Address RemoteReduceList =
-      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.remote_reduce_list");
-
-  // This loop iterates through the list of reduce elements and copies,
-  // element by element, from a remote lane in the warp to RemoteReduceList,
-  // hosted on the thread's stack.
-  emitReductionListCopy(RemoteLaneToThread, CGF, ReductionArrayTy, Privates,
-                        LocalReduceList, RemoteReduceList,
-                        {/*RemoteLaneOffset=*/RemoteLaneOffsetArgVal,
-                         /*ScratchpadIndex=*/nullptr,
-                         /*ScratchpadWidth=*/nullptr});
-
-  // The actions to be performed on the Remote Reduce list is dependent
-  // on the algorithm version.
-  //
-  //  if (AlgoVer==0) || (AlgoVer==1 && (LaneId < Offset)) || (AlgoVer==2 &&
-  //  LaneId % 2 == 0 && Offset > 0):
-  //    do the reduction value aggregation
-  //
-  //  The thread local variable Reduce list is mutated in place to host the
-  //  reduced data, which is the aggregated value produced from local and
-  //  remote lanes.
-  //
-  //  Note that AlgoVer is expected to be a constant integer known at compile
-  //  time.
-  //  When AlgoVer==0, the first conjunction evaluates to true, making
-  //    the entire predicate true during compile time.
-  //  When AlgoVer==1, the second conjunction has only the second part to be
-  //    evaluated during runtime.  Other conjunctions evaluates to false
-  //    during compile time.
-  //  When AlgoVer==2, the third conjunction has only the second part to be
-  //    evaluated during runtime.  Other conjunctions evaluates to false
-  //    during compile time.
-  llvm::Value *CondAlgo0 = Bld.CreateIsNull(AlgoVerArgVal);
-
-  llvm::Value *Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
-  llvm::Value *CondAlgo1 = Bld.CreateAnd(
-      Algo1, Bld.CreateICmpULT(LaneIDArgVal, RemoteLaneOffsetArgVal));
-
-  llvm::Value *Algo2 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(2));
-  llvm::Value *CondAlgo2 = Bld.CreateAnd(
-      Algo2, Bld.CreateIsNull(Bld.CreateAnd(LaneIDArgVal, Bld.getInt16(1))));
-  CondAlgo2 = Bld.CreateAnd(
-      CondAlgo2, Bld.CreateICmpSGT(RemoteLaneOffsetArgVal, Bld.getInt16(0)));
-
-  llvm::Value *CondReduce = Bld.CreateOr(CondAlgo0, CondAlgo1);
-  CondReduce = Bld.CreateOr(CondReduce, CondAlgo2);
-
-  llvm::BasicBlock *ThenBB = CGF.createBasicBlock("then");
-  llvm::BasicBlock *ElseBB = CGF.createBasicBlock("else");
-  llvm::BasicBlock *MergeBB = CGF.createBasicBlock("ifcont");
-  Bld.CreateCondBr(CondReduce, ThenBB, ElseBB);
-
-  CGF.EmitBlock(ThenBB);
-  // reduce_function(LocalReduceList, RemoteReduceList)
-  llvm::Value *LocalReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      LocalReduceList.getPointer(), CGF.VoidPtrTy);
-  llvm::Value *RemoteReduceListPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      RemoteReduceList.getPointer(), CGF.VoidPtrTy);
-  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
-      CGF, Loc, ReduceFn, {LocalReduceListPtr, RemoteReduceListPtr});
-  Bld.CreateBr(MergeBB);
-
-  CGF.EmitBlock(ElseBB);
-  Bld.CreateBr(MergeBB);
-
-  CGF.EmitBlock(MergeBB);
-
-  // if (AlgoVer==1 && (LaneId >= Offset)) copy Remote Reduce list to local
-  // Reduce list.
-  Algo1 = Bld.CreateICmpEQ(AlgoVerArgVal, Bld.getInt16(1));
-  llvm::Value *CondCopy = Bld.CreateAnd(
-      Algo1, Bld.CreateICmpUGE(LaneIDArgVal, RemoteLaneOffsetArgVal));
-
-  llvm::BasicBlock *CpyThenBB = CGF.createBasicBlock("then");
-  llvm::BasicBlock *CpyElseBB = CGF.createBasicBlock("else");
-  llvm::BasicBlock *CpyMergeBB = CGF.createBasicBlock("ifcont");
-  Bld.CreateCondBr(CondCopy, CpyThenBB, CpyElseBB);
-
-  CGF.EmitBlock(CpyThenBB);
-  emitReductionListCopy(ThreadCopy, CGF, ReductionArrayTy, Privates,
-                        RemoteReduceList, LocalReduceList);
-  Bld.CreateBr(CpyMergeBB);
-
-  CGF.EmitBlock(CpyElseBB);
-  Bld.CreateBr(CpyMergeBB);
-
-  CGF.EmitBlock(CpyMergeBB);
-
-  CGF.FinishFunction();
-  return Fn;
-}
-
-/// This function emits a helper that copies all the reduction variables from
-/// the team into the provided global buffer for the reduction variables.
-///
-/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
-///   For all data entries D in reduce_data:
-///     Copy local D to buffer.D[Idx]
-static llvm::Value *emitListToGlobalCopyFunction(
-    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
-    QualType ReductionArrayTy, SourceLocation Loc,
-    const RecordDecl *TeamReductionRec,
-    const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
-        &VarFieldMap) {
-  ASTContext &C = CGM.getContext();
-
-  // Buffer: global reduction buffer.
-  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                              C.VoidPtrTy, ImplicitParamDecl::Other);
-  // Idx: index of the buffer.
-  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
-                           ImplicitParamDecl::Other);
-  // ReduceList: thread local Reduce list.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&BufferArg);
-  Args.push_back(&IdxArg);
-  Args.push_back(&ReduceListArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      "_omp_reduction_list_to_global_copy_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
-  Address LocalReduceList(
-      Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, Loc),
-          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
-      CGF.getPointerAlign());
-  QualType StaticTy = C.getRecordType(TeamReductionRec);
-  llvm::Type *LLVMReductionsBufferTy =
-      CGM.getTypes().ConvertTypeForMem(StaticTy);
-  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
-      LLVMReductionsBufferTy->getPointerTo());
-  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
-                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
-                                              /*Volatile=*/false, C.IntTy,
-                                              Loc)};
-  unsigned Idx = 0;
-  for (const Expr *Private : Privates) {
-    // Reduce element = LocalReduceList[i]
-    Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
-    llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
-        ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
-    // elemptr = ((CopyType*)(elemptrptr)) + I
-    ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-        ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo());
-    Address ElemPtr =
-        Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
-    const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
-    // Global = Buffer.VD[Idx];
-    const FieldDecl *FD = VarFieldMap.lookup(VD);
-    LValue GlobLVal = CGF.EmitLValueForField(
-        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
-    llvm::Value *BufferPtr =
-        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
-    GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
-    switch (CGF.getEvaluationKind(Private->getType())) {
-    case TEK_Scalar: {
-      llvm::Value *V = CGF.EmitLoadOfScalar(ElemPtr, /*Volatile=*/false,
-                                            Private->getType(), Loc);
-      CGF.EmitStoreOfScalar(V, GlobLVal);
-      break;
-    }
-    case TEK_Complex: {
-      CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(
-          CGF.MakeAddrLValue(ElemPtr, Private->getType()), Loc);
-      CGF.EmitStoreOfComplex(V, GlobLVal, /*isInit=*/false);
-      break;
-    }
-    case TEK_Aggregate:
-      CGF.EmitAggregateCopy(GlobLVal,
-                            CGF.MakeAddrLValue(ElemPtr, Private->getType()),
-                            Private->getType(), AggValueSlot::DoesNotOverlap);
-      break;
-    }
-    ++Idx;
-  }
-
-  CGF.FinishFunction();
-  return Fn;
-}
-
-/// This function emits a helper that reduces all the reduction variables from
-/// the team into the provided global buffer for the reduction variables.
-///
-/// void list_to_global_reduce_func(void *buffer, int Idx, void *reduce_data)
-///  void *GlobPtrs[];
-///  GlobPtrs[0] = (void*)&buffer.D0[Idx];
-///  ...
-///  GlobPtrs[N] = (void*)&buffer.DN[Idx];
-///  reduce_function(GlobPtrs, reduce_data);
-static llvm::Value *emitListToGlobalReduceFunction(
-    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
-    QualType ReductionArrayTy, SourceLocation Loc,
-    const RecordDecl *TeamReductionRec,
-    const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
-        &VarFieldMap,
-    llvm::Function *ReduceFn) {
-  ASTContext &C = CGM.getContext();
-
-  // Buffer: global reduction buffer.
-  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                              C.VoidPtrTy, ImplicitParamDecl::Other);
-  // Idx: index of the buffer.
-  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
-                           ImplicitParamDecl::Other);
-  // ReduceList: thread local Reduce list.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&BufferArg);
-  Args.push_back(&IdxArg);
-  Args.push_back(&ReduceListArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      "_omp_reduction_list_to_global_reduce_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
-  QualType StaticTy = C.getRecordType(TeamReductionRec);
-  llvm::Type *LLVMReductionsBufferTy =
-      CGM.getTypes().ConvertTypeForMem(StaticTy);
-  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
-      LLVMReductionsBufferTy->getPointerTo());
-
-  // 1. Build a list of reduction variables.
-  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
-  Address ReductionList =
-      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
-  auto IPriv = Privates.begin();
-  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
-                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
-                                              /*Volatile=*/false, C.IntTy,
-                                              Loc)};
-  unsigned Idx = 0;
-  for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
-    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-    // Global = Buffer.VD[Idx];
-    const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
-    const FieldDecl *FD = VarFieldMap.lookup(VD);
-    LValue GlobLVal = CGF.EmitLValueForField(
-        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
-    llvm::Value *BufferPtr =
-        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
-    llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr);
-    CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy);
-    if ((*IPriv)->getType()->isVariablyModifiedType()) {
-      // Store array size.
-      ++Idx;
-      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-      llvm::Value *Size = CGF.Builder.CreateIntCast(
-          CGF.getVLASize(
-                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
-              .NumElts,
-          CGF.SizeTy, /*isSigned=*/false);
-      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
-                              Elem);
-    }
-  }
-
-  // Call reduce_function(GlobalReduceList, ReduceList)
-  llvm::Value *GlobalReduceList =
-      CGF.EmitCastToVoidPtr(ReductionList.getPointer());
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar(
-      AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
-  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
-      CGF, Loc, ReduceFn, {GlobalReduceList, ReducedPtr});
-  CGF.FinishFunction();
-  return Fn;
-}
-
-/// This function emits a helper that copies all the reduction variables from
-/// the team into the provided global buffer for the reduction variables.
-///
-/// void list_to_global_copy_func(void *buffer, int Idx, void *reduce_data)
-///   For all data entries D in reduce_data:
-///     Copy buffer.D[Idx] to local D;
-static llvm::Value *emitGlobalToListCopyFunction(
-    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
-    QualType ReductionArrayTy, SourceLocation Loc,
-    const RecordDecl *TeamReductionRec,
-    const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
-        &VarFieldMap) {
-  ASTContext &C = CGM.getContext();
-
-  // Buffer: global reduction buffer.
-  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                              C.VoidPtrTy, ImplicitParamDecl::Other);
-  // Idx: index of the buffer.
-  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
-                           ImplicitParamDecl::Other);
-  // ReduceList: thread local Reduce list.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&BufferArg);
-  Args.push_back(&IdxArg);
-  Args.push_back(&ReduceListArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      "_omp_reduction_global_to_list_copy_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
-  Address LocalReduceList(
-      Bld.CreatePointerBitCastOrAddrSpaceCast(
-          CGF.EmitLoadOfScalar(AddrReduceListArg, /*Volatile=*/false,
-                               C.VoidPtrTy, Loc),
-          CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo()),
-      CGF.getPointerAlign());
-  QualType StaticTy = C.getRecordType(TeamReductionRec);
-  llvm::Type *LLVMReductionsBufferTy =
-      CGM.getTypes().ConvertTypeForMem(StaticTy);
-  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
-      LLVMReductionsBufferTy->getPointerTo());
-
-  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
-                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
-                                              /*Volatile=*/false, C.IntTy,
-                                              Loc)};
-  unsigned Idx = 0;
-  for (const Expr *Private : Privates) {
-    // Reduce element = LocalReduceList[i]
-    Address ElemPtrPtrAddr = Bld.CreateConstArrayGEP(LocalReduceList, Idx);
-    llvm::Value *ElemPtrPtr = CGF.EmitLoadOfScalar(
-        ElemPtrPtrAddr, /*Volatile=*/false, C.VoidPtrTy, SourceLocation());
-    // elemptr = ((CopyType*)(elemptrptr)) + I
-    ElemPtrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-        ElemPtrPtr, CGF.ConvertTypeForMem(Private->getType())->getPointerTo());
-    Address ElemPtr =
-        Address(ElemPtrPtr, C.getTypeAlignInChars(Private->getType()));
-    const ValueDecl *VD = cast<DeclRefExpr>(Private)->getDecl();
-    // Global = Buffer.VD[Idx];
-    const FieldDecl *FD = VarFieldMap.lookup(VD);
-    LValue GlobLVal = CGF.EmitLValueForField(
-        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
-    llvm::Value *BufferPtr =
-        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
-    GlobLVal.setAddress(Address(BufferPtr, GlobLVal.getAlignment()));
-    switch (CGF.getEvaluationKind(Private->getType())) {
-    case TEK_Scalar: {
-      llvm::Value *V = CGF.EmitLoadOfScalar(GlobLVal, Loc);
-      CGF.EmitStoreOfScalar(V, ElemPtr, /*Volatile=*/false, Private->getType());
-      break;
-    }
-    case TEK_Complex: {
-      CodeGenFunction::ComplexPairTy V = CGF.EmitLoadOfComplex(GlobLVal, Loc);
-      CGF.EmitStoreOfComplex(V, CGF.MakeAddrLValue(ElemPtr, Private->getType()),
-                             /*isInit=*/false);
-      break;
-    }
-    case TEK_Aggregate:
-      CGF.EmitAggregateCopy(CGF.MakeAddrLValue(ElemPtr, Private->getType()),
-                            GlobLVal, Private->getType(),
-                            AggValueSlot::DoesNotOverlap);
-      break;
-    }
-    ++Idx;
-  }
-
-  CGF.FinishFunction();
-  return Fn;
-}
-
-/// This function emits a helper that reduces all the reduction variables from
-/// the team into the provided global buffer for the reduction variables.
-///
-/// void global_to_list_reduce_func(void *buffer, int Idx, void *reduce_data)
-///  void *GlobPtrs[];
-///  GlobPtrs[0] = (void*)&buffer.D0[Idx];
-///  ...
-///  GlobPtrs[N] = (void*)&buffer.DN[Idx];
-///  reduce_function(reduce_data, GlobPtrs);
-static llvm::Value *emitGlobalToListReduceFunction(
-    CodeGenModule &CGM, ArrayRef<const Expr *> Privates,
-    QualType ReductionArrayTy, SourceLocation Loc,
-    const RecordDecl *TeamReductionRec,
-    const llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *>
-        &VarFieldMap,
-    llvm::Function *ReduceFn) {
-  ASTContext &C = CGM.getContext();
-
-  // Buffer: global reduction buffer.
-  ImplicitParamDecl BufferArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                              C.VoidPtrTy, ImplicitParamDecl::Other);
-  // Idx: index of the buffer.
-  ImplicitParamDecl IdxArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr, C.IntTy,
-                           ImplicitParamDecl::Other);
-  // ReduceList: thread local Reduce list.
-  ImplicitParamDecl ReduceListArg(C, /*DC=*/nullptr, Loc, /*Id=*/nullptr,
-                                  C.VoidPtrTy, ImplicitParamDecl::Other);
-  FunctionArgList Args;
-  Args.push_back(&BufferArg);
-  Args.push_back(&IdxArg);
-  Args.push_back(&ReduceListArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(C.VoidTy, Args);
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      "_omp_reduction_global_to_list_reduce_func", &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setDoesNotRecurse();
-  CodeGenFunction CGF(CGM);
-  CGF.StartFunction(GlobalDecl(), C.VoidTy, Fn, CGFI, Args, Loc, Loc);
-
-  CGBuilderTy &Bld = CGF.Builder;
-
-  Address AddrBufferArg = CGF.GetAddrOfLocalVar(&BufferArg);
-  QualType StaticTy = C.getRecordType(TeamReductionRec);
-  llvm::Type *LLVMReductionsBufferTy =
-      CGM.getTypes().ConvertTypeForMem(StaticTy);
-  llvm::Value *BufferArrPtr = Bld.CreatePointerBitCastOrAddrSpaceCast(
-      CGF.EmitLoadOfScalar(AddrBufferArg, /*Volatile=*/false, C.VoidPtrTy, Loc),
-      LLVMReductionsBufferTy->getPointerTo());
-
-  // 1. Build a list of reduction variables.
-  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
-  Address ReductionList =
-      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
-  auto IPriv = Privates.begin();
-  llvm::Value *Idxs[] = {llvm::ConstantInt::getNullValue(CGF.Int32Ty),
-                         CGF.EmitLoadOfScalar(CGF.GetAddrOfLocalVar(&IdxArg),
-                                              /*Volatile=*/false, C.IntTy,
-                                              Loc)};
-  unsigned Idx = 0;
-  for (unsigned I = 0, E = Privates.size(); I < E; ++I, ++IPriv, ++Idx) {
-    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-    // Global = Buffer.VD[Idx];
-    const ValueDecl *VD = cast<DeclRefExpr>(*IPriv)->getDecl();
-    const FieldDecl *FD = VarFieldMap.lookup(VD);
-    LValue GlobLVal = CGF.EmitLValueForField(
-        CGF.MakeNaturalAlignAddrLValue(BufferArrPtr, StaticTy), FD);
-    llvm::Value *BufferPtr =
-        Bld.CreateInBoundsGEP(GlobLVal.getPointer(CGF), Idxs);
-    llvm::Value *Ptr = CGF.EmitCastToVoidPtr(BufferPtr);
-    CGF.EmitStoreOfScalar(Ptr, Elem, /*Volatile=*/false, C.VoidPtrTy);
-    if ((*IPriv)->getType()->isVariablyModifiedType()) {
-      // Store array size.
-      ++Idx;
-      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-      llvm::Value *Size = CGF.Builder.CreateIntCast(
-          CGF.getVLASize(
-                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
-              .NumElts,
-          CGF.SizeTy, /*isSigned=*/false);
-      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
-                              Elem);
-    }
-  }
-
-  // Call reduce_function(ReduceList, GlobalReduceList)
-  llvm::Value *GlobalReduceList =
-      CGF.EmitCastToVoidPtr(ReductionList.getPointer());
-  Address AddrReduceListArg = CGF.GetAddrOfLocalVar(&ReduceListArg);
-  llvm::Value *ReducedPtr = CGF.EmitLoadOfScalar(
-      AddrReduceListArg, /*Volatile=*/false, C.VoidPtrTy, Loc);
-  CGM.getOpenMPRuntime().emitOutlinedFunctionCall(
-      CGF, Loc, ReduceFn, {ReducedPtr, GlobalReduceList});
-  CGF.FinishFunction();
-  return Fn;
-}
-
-///
-/// Design of OpenMP reductions on the GPU
-///
-/// Consider a typical OpenMP program with one or more reduction
-/// clauses:
-///
-/// float foo;
-/// double bar;
-/// #pragma omp target teams distribute parallel for \
-///             reduction(+:foo) reduction(*:bar)
-/// for (int i = 0; i < N; i++) {
-///   foo += A[i]; bar *= B[i];
-/// }
-///
-/// where 'foo' and 'bar' are reduced across all OpenMP threads in
-/// all teams.  In our OpenMP implementation on the NVPTX device an
-/// OpenMP team is mapped to a CUDA threadblock and OpenMP threads
-/// within a team are mapped to CUDA threads within a threadblock.
-/// Our goal is to efficiently aggregate values across all OpenMP
-/// threads such that:
-///
-///   - the compiler and runtime are logically concise, and
-///   - the reduction is performed efficiently in a hierarchical
-///     manner as follows: within OpenMP threads in the same warp,
-///     across warps in a threadblock, and finally across teams on
-///     the NVPTX device.
-///
-/// Introduction to Decoupling
-///
-/// We would like to decouple the compiler and the runtime so that the
-/// latter is ignorant of the reduction variables (number, data types)
-/// and the reduction operators.  This allows a simpler interface
-/// and implementation while still attaining good performance.
-///
-/// Pseudocode for the aforementioned OpenMP program generated by the
-/// compiler is as follows:
-///
-/// 1. Create private copies of reduction variables on each OpenMP
-///    thread: 'foo_private', 'bar_private'
-/// 2. Each OpenMP thread reduces the chunk of 'A' and 'B' assigned
-///    to it and writes the result in 'foo_private' and 'bar_private'
-///    respectively.
-/// 3. Call the OpenMP runtime on the GPU to reduce within a team
-///    and store the result on the team master:
-///
-///     __kmpc_nvptx_parallel_reduce_nowait_v2(...,
-///        reduceData, shuffleReduceFn, interWarpCpyFn)
-///
-///     where:
-///       struct ReduceData {
-///         double *foo;
-///         double *bar;
-///       } reduceData
-///       reduceData.foo = &foo_private
-///       reduceData.bar = &bar_private
-///
-///     'shuffleReduceFn' and 'interWarpCpyFn' are pointers to two
-///     auxiliary functions generated by the compiler that operate on
-///     variables of type 'ReduceData'.  They aid the runtime perform
-///     algorithmic steps in a data agnostic manner.
-///
-///     'shuffleReduceFn' is a pointer to a function that reduces data
-///     of type 'ReduceData' across two OpenMP threads (lanes) in the
-///     same warp.  It takes the following arguments as input:
-///
-///     a. variable of type 'ReduceData' on the calling lane,
-///     b. its lane_id,
-///     c. an offset relative to the current lane_id to generate a
-///        remote_lane_id.  The remote lane contains the second
-///        variable of type 'ReduceData' that is to be reduced.
-///     d. an algorithm version parameter determining which reduction
-///        algorithm to use.
-///
-///     'shuffleReduceFn' retrieves data from the remote lane using
-///     efficient GPU shuffle intrinsics and reduces, using the
-///     algorithm specified by the 4th parameter, the two operands
-///     element-wise.  The result is written to the first operand.
-///
-///     Different reduction algorithms are implemented in different
-///     runtime functions, all calling 'shuffleReduceFn' to perform
-///     the essential reduction step.  Therefore, based on the 4th
-///     parameter, this function behaves slightly differently to
-///     cooperate with the runtime to ensure correctness under
-///     different circumstances.
-///
-///     'InterWarpCpyFn' is a pointer to a function that transfers
-///     reduced variables across warps.  It tunnels, through CUDA
-///     shared memory, the thread-private data of type 'ReduceData'
-///     from lane 0 of each warp to a lane in the first warp.
-/// 4. Call the OpenMP runtime on the GPU to reduce across teams.
-///    The last team writes the global reduced value to memory.
-///
-///     ret = __kmpc_nvptx_teams_reduce_nowait(...,
-///             reduceData, shuffleReduceFn, interWarpCpyFn,
-///             scratchpadCopyFn, loadAndReduceFn)
-///
-///     'scratchpadCopyFn' is a helper that stores reduced
-///     data from the team master to a scratchpad array in
-///     global memory.
-///
-///     'loadAndReduceFn' is a helper that loads data from
-///     the scratchpad array and reduces it with the input
-///     operand.
-///
-///     These compiler generated functions hide address
-///     calculation and alignment information from the runtime.
-/// 5. if ret == 1:
-///     The team master of the last team stores the reduced
-///     result to the globals in memory.
-///     foo += reduceData.foo; bar *= reduceData.bar
-///
-///
-/// Warp Reduction Algorithms
-///
-/// On the warp level, we have three algorithms implemented in the
-/// OpenMP runtime depending on the number of active lanes:
-///
-/// Full Warp Reduction
-///
-/// The reduce algorithm within a warp where all lanes are active
-/// is implemented in the runtime as follows:
-///
-/// full_warp_reduce(void *reduce_data,
-///                  kmp_ShuffleReductFctPtr ShuffleReduceFn) {
-///   for (int offset = WARPSIZE/2; offset > 0; offset /= 2)
-///     ShuffleReduceFn(reduce_data, 0, offset, 0);
-/// }
-///
-/// The algorithm completes in log(2, WARPSIZE) steps.
-///
-/// 'ShuffleReduceFn' is used here with lane_id set to 0 because it is
-/// not used therefore we save instructions by not retrieving lane_id
-/// from the corresponding special registers.  The 4th parameter, which
-/// represents the version of the algorithm being used, is set to 0 to
-/// signify full warp reduction.
-///
-/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
-///
-/// #reduce_elem refers to an element in the local lane's data structure
-/// #remote_elem is retrieved from a remote lane
-/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
-/// reduce_elem = reduce_elem REDUCE_OP remote_elem;
-///
-/// Contiguous Partial Warp Reduction
-///
-/// This reduce algorithm is used within a warp where only the first
-/// 'n' (n <= WARPSIZE) lanes are active.  It is typically used when the
-/// number of OpenMP threads in a parallel region is not a multiple of
-/// WARPSIZE.  The algorithm is implemented in the runtime as follows:
-///
-/// void
-/// contiguous_partial_reduce(void *reduce_data,
-///                           kmp_ShuffleReductFctPtr ShuffleReduceFn,
-///                           int size, int lane_id) {
-///   int curr_size;
-///   int offset;
-///   curr_size = size;
-///   mask = curr_size/2;
-///   while (offset>0) {
-///     ShuffleReduceFn(reduce_data, lane_id, offset, 1);
-///     curr_size = (curr_size+1)/2;
-///     offset = curr_size/2;
-///   }
-/// }
-///
-/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
-///
-/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
-/// if (lane_id < offset)
-///     reduce_elem = reduce_elem REDUCE_OP remote_elem
-/// else
-///     reduce_elem = remote_elem
-///
-/// This algorithm assumes that the data to be reduced are located in a
-/// contiguous subset of lanes starting from the first.  When there is
-/// an odd number of active lanes, the data in the last lane is not
-/// aggregated with any other lane's dat but is instead copied over.
-///
-/// Dispersed Partial Warp Reduction
-///
-/// This algorithm is used within a warp when any discontiguous subset of
-/// lanes are active.  It is used to implement the reduction operation
-/// across lanes in an OpenMP simd region or in a nested parallel region.
-///
-/// void
-/// dispersed_partial_reduce(void *reduce_data,
-///                          kmp_ShuffleReductFctPtr ShuffleReduceFn) {
-///   int size, remote_id;
-///   int logical_lane_id = number_of_active_lanes_before_me() * 2;
-///   do {
-///       remote_id = next_active_lane_id_right_after_me();
-///       # the above function returns 0 of no active lane
-///       # is present right after the current lane.
-///       size = number_of_active_lanes_in_this_warp();
-///       logical_lane_id /= 2;
-///       ShuffleReduceFn(reduce_data, logical_lane_id,
-///                       remote_id-1-threadIdx.x, 2);
-///   } while (logical_lane_id % 2 == 0 && size > 1);
-/// }
-///
-/// There is no assumption made about the initial state of the reduction.
-/// Any number of lanes (>=1) could be active at any position.  The reduction
-/// result is returned in the first active lane.
-///
-/// In this version, 'ShuffleReduceFn' behaves, per element, as follows:
-///
-/// remote_elem = shuffle_down(reduce_elem, offset, WARPSIZE);
-/// if (lane_id % 2 == 0 && offset > 0)
-///     reduce_elem = reduce_elem REDUCE_OP remote_elem
-/// else
-///     reduce_elem = remote_elem
-///
-///
-/// Intra-Team Reduction
-///
-/// This function, as implemented in the runtime call
-/// '__kmpc_nvptx_parallel_reduce_nowait_v2', aggregates data across OpenMP
-/// threads in a team.  It first reduces within a warp using the
-/// aforementioned algorithms.  We then proceed to gather all such
-/// reduced values at the first warp.
-///
-/// The runtime makes use of the function 'InterWarpCpyFn', which copies
-/// data from each of the "warp master" (zeroth lane of each warp, where
-/// warp-reduced data is held) to the zeroth warp.  This step reduces (in
-/// a mathematical sense) the problem of reduction across warp masters in
-/// a block to the problem of warp reduction.
-///
-///
-/// Inter-Team Reduction
-///
-/// Once a team has reduced its data to a single value, it is stored in
-/// a global scratchpad array.  Since each team has a distinct slot, this
-/// can be done without locking.
-///
-/// The last team to write to the scratchpad array proceeds to reduce the
-/// scratchpad array.  One or more workers in the last team use the helper
-/// 'loadAndReduceDataFn' to load and reduce values from the array, i.e.,
-/// the k'th worker reduces every k'th element.
-///
-/// Finally, a call is made to '__kmpc_nvptx_parallel_reduce_nowait_v2' to
-/// reduce across workers and compute a globally reduced value.
-///
-void CGOpenMPRuntimeNVPTX::emitReduction(
-    CodeGenFunction &CGF, SourceLocation Loc, ArrayRef<const Expr *> Privates,
-    ArrayRef<const Expr *> LHSExprs, ArrayRef<const Expr *> RHSExprs,
-    ArrayRef<const Expr *> ReductionOps, ReductionOptionsTy Options) {
-  if (!CGF.HaveInsertPoint())
-    return;
-
-  bool ParallelReduction = isOpenMPParallelDirective(Options.ReductionKind);
-#ifndef NDEBUG
-  bool TeamsReduction = isOpenMPTeamsDirective(Options.ReductionKind);
-#endif
-
-  if (Options.SimpleReduction) {
-    assert(!TeamsReduction && !ParallelReduction &&
-           "Invalid reduction selection in emitReduction.");
-    CGOpenMPRuntime::emitReduction(CGF, Loc, Privates, LHSExprs, RHSExprs,
-                                   ReductionOps, Options);
-    return;
-  }
-
-  assert((TeamsReduction || ParallelReduction) &&
-         "Invalid reduction selection in emitReduction.");
-
-  // Build res = __kmpc_reduce{_nowait}(<gtid>, <n>, sizeof(RedList),
-  // RedList, shuffle_reduce_func, interwarp_copy_func);
-  // or
-  // Build res = __kmpc_reduce_teams_nowait_simple(<loc>, <gtid>, <lck>);
-  llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc);
-  llvm::Value *ThreadId = getThreadID(CGF, Loc);
-
-  llvm::Value *Res;
-  ASTContext &C = CGM.getContext();
-  // 1. Build a list of reduction variables.
-  // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
-  auto Size = RHSExprs.size();
-  for (const Expr *E : Privates) {
-    if (E->getType()->isVariablyModifiedType())
-      // Reserve place for array size.
-      ++Size;
-  }
-  llvm::APInt ArraySize(/*unsigned int numBits=*/32, Size);
-  QualType ReductionArrayTy =
-      C.getConstantArrayType(C.VoidPtrTy, ArraySize, nullptr, ArrayType::Normal,
-                             /*IndexTypeQuals=*/0);
-  Address ReductionList =
-      CGF.CreateMemTemp(ReductionArrayTy, ".omp.reduction.red_list");
-  auto IPriv = Privates.begin();
-  unsigned Idx = 0;
-  for (unsigned I = 0, E = RHSExprs.size(); I < E; ++I, ++IPriv, ++Idx) {
-    Address Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-    CGF.Builder.CreateStore(
-        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-            CGF.EmitLValue(RHSExprs[I]).getPointer(CGF), CGF.VoidPtrTy),
-        Elem);
-    if ((*IPriv)->getType()->isVariablyModifiedType()) {
-      // Store array size.
-      ++Idx;
-      Elem = CGF.Builder.CreateConstArrayGEP(ReductionList, Idx);
-      llvm::Value *Size = CGF.Builder.CreateIntCast(
-          CGF.getVLASize(
-                 CGF.getContext().getAsVariableArrayType((*IPriv)->getType()))
-              .NumElts,
-          CGF.SizeTy, /*isSigned=*/false);
-      CGF.Builder.CreateStore(CGF.Builder.CreateIntToPtr(Size, CGF.VoidPtrTy),
-                              Elem);
-    }
-  }
-
-  llvm::Value *RL = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      ReductionList.getPointer(), CGF.VoidPtrTy);
-  llvm::Function *ReductionFn = emitReductionFunction(
-      Loc, CGF.ConvertTypeForMem(ReductionArrayTy)->getPointerTo(), Privates,
-      LHSExprs, RHSExprs, ReductionOps);
-  llvm::Value *ReductionArrayTySize = CGF.getTypeSize(ReductionArrayTy);
-  llvm::Function *ShuffleAndReduceFn = emitShuffleAndReduceFunction(
-      CGM, Privates, ReductionArrayTy, ReductionFn, Loc);
-  llvm::Value *InterWarpCopyFn =
-      emitInterWarpCopyFunction(CGM, Privates, ReductionArrayTy, Loc);
-
-  if (ParallelReduction) {
-    llvm::Value *Args[] = {RTLoc,
-                           ThreadId,
-                           CGF.Builder.getInt32(RHSExprs.size()),
-                           ReductionArrayTySize,
-                           RL,
-                           ShuffleAndReduceFn,
-                           InterWarpCopyFn};
-
-    Res = CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(
-            OMPRTL_NVPTX__kmpc_nvptx_parallel_reduce_nowait_v2),
-        Args);
-  } else {
-    assert(TeamsReduction && "expected teams reduction.");
-    llvm::SmallDenseMap<const ValueDecl *, const FieldDecl *> VarFieldMap;
-    llvm::SmallVector<const ValueDecl *, 4> PrivatesReductions(Privates.size());
-    int Cnt = 0;
-    for (const Expr *DRE : Privates) {
-      PrivatesReductions[Cnt] = cast<DeclRefExpr>(DRE)->getDecl();
-      ++Cnt;
-    }
-    const RecordDecl *TeamReductionRec = ::buildRecordForGlobalizedVars(
-        CGM.getContext(), PrivatesReductions, llvm::None, VarFieldMap,
-        C.getLangOpts().OpenMPCUDAReductionBufNum);
-    TeamsReductions.push_back(TeamReductionRec);
-    if (!KernelTeamsReductionPtr) {
-      KernelTeamsReductionPtr = new llvm::GlobalVariable(
-          CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/true,
-          llvm::GlobalValue::InternalLinkage, nullptr,
-          "_openmp_teams_reductions_buffer_$_$ptr");
-    }
-    llvm::Value *GlobalBufferPtr = CGF.EmitLoadOfScalar(
-        Address(KernelTeamsReductionPtr, CGM.getPointerAlign()),
-        /*Volatile=*/false, C.getPointerType(C.VoidPtrTy), Loc);
-    llvm::Value *GlobalToBufferCpyFn = ::emitListToGlobalCopyFunction(
-        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
-    llvm::Value *GlobalToBufferRedFn = ::emitListToGlobalReduceFunction(
-        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
-        ReductionFn);
-    llvm::Value *BufferToGlobalCpyFn = ::emitGlobalToListCopyFunction(
-        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap);
-    llvm::Value *BufferToGlobalRedFn = ::emitGlobalToListReduceFunction(
-        CGM, Privates, ReductionArrayTy, Loc, TeamReductionRec, VarFieldMap,
-        ReductionFn);
-
-    llvm::Value *Args[] = {
-        RTLoc,
-        ThreadId,
-        GlobalBufferPtr,
-        CGF.Builder.getInt32(C.getLangOpts().OpenMPCUDAReductionBufNum),
-        RL,
-        ShuffleAndReduceFn,
-        InterWarpCopyFn,
-        GlobalToBufferCpyFn,
-        GlobalToBufferRedFn,
-        BufferToGlobalCpyFn,
-        BufferToGlobalRedFn};
-
-    Res = CGF.EmitRuntimeCall(
-        createNVPTXRuntimeFunction(
-            OMPRTL_NVPTX__kmpc_nvptx_teams_reduce_nowait_v2),
-        Args);
-  }
-
-  // 5. Build if (res == 1)
-  llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".omp.reduction.done");
-  llvm::BasicBlock *ThenBB = CGF.createBasicBlock(".omp.reduction.then");
-  llvm::Value *Cond = CGF.Builder.CreateICmpEQ(
-      Res, llvm::ConstantInt::get(CGM.Int32Ty, /*V=*/1));
-  CGF.Builder.CreateCondBr(Cond, ThenBB, ExitBB);
-
-  // 6. Build then branch: where we have reduced values in the master
-  //    thread in each team.
-  //    __kmpc_end_reduce{_nowait}(<gtid>);
-  //    break;
-  CGF.EmitBlock(ThenBB);
-
-  // Add emission of __kmpc_end_reduce{_nowait}(<gtid>);
-  auto &&CodeGen = [Privates, LHSExprs, RHSExprs, ReductionOps,
-                    this](CodeGenFunction &CGF, PrePostActionTy &Action) {
-    auto IPriv = Privates.begin();
-    auto ILHS = LHSExprs.begin();
-    auto IRHS = RHSExprs.begin();
-    for (const Expr *E : ReductionOps) {
-      emitSingleReductionCombiner(CGF, E, *IPriv, cast<DeclRefExpr>(*ILHS),
-                                  cast<DeclRefExpr>(*IRHS));
-      ++IPriv;
-      ++ILHS;
-      ++IRHS;
-    }
-  };
-  llvm::Value *EndArgs[] = {ThreadId};
-  RegionCodeGenTy RCG(CodeGen);
-  NVPTXActionTy Action(
-      nullptr, llvm::None,
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_end_reduce_nowait),
-      EndArgs);
-  RCG.setAction(Action);
-  RCG(CGF);
-  // There is no need to emit line number for unconditional branch.
-  (void)ApplyDebugLocation::CreateEmpty(CGF);
-  CGF.EmitBlock(ExitBB, /*IsFinished=*/true);
-}
-
-const VarDecl *
-CGOpenMPRuntimeNVPTX::translateParameter(const FieldDecl *FD,
-                                         const VarDecl *NativeParam) const {
-  if (!NativeParam->getType()->isReferenceType())
-    return NativeParam;
-  QualType ArgType = NativeParam->getType();
-  QualifierCollector QC;
-  const Type *NonQualTy = QC.strip(ArgType);
-  QualType PointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
-  if (const auto *Attr = FD->getAttr<OMPCaptureKindAttr>()) {
-    if (Attr->getCaptureKind() == OMPC_map) {
-      PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,
-                                                        LangAS::opencl_global);
-    } else if (Attr->getCaptureKind() == OMPC_firstprivate &&
-               PointeeTy.isConstant(CGM.getContext())) {
-      PointeeTy = CGM.getContext().getAddrSpaceQualType(PointeeTy,
-                                                        LangAS::opencl_generic);
-    }
-  }
-  ArgType = CGM.getContext().getPointerType(PointeeTy);
-  QC.addRestrict();
-  enum { NVPTX_local_addr = 5 };
-  QC.addAddressSpace(getLangASFromTargetAS(NVPTX_local_addr));
-  ArgType = QC.apply(CGM.getContext(), ArgType);
-  if (isa<ImplicitParamDecl>(NativeParam))
-    return ImplicitParamDecl::Create(
-        CGM.getContext(), /*DC=*/nullptr, NativeParam->getLocation(),
-        NativeParam->getIdentifier(), ArgType, ImplicitParamDecl::Other);
-  return ParmVarDecl::Create(
-      CGM.getContext(),
-      const_cast<DeclContext *>(NativeParam->getDeclContext()),
-      NativeParam->getBeginLoc(), NativeParam->getLocation(),
-      NativeParam->getIdentifier(), ArgType,
-      /*TInfo=*/nullptr, SC_None, /*DefArg=*/nullptr);
-}
-
-Address
-CGOpenMPRuntimeNVPTX::getParameterAddress(CodeGenFunction &CGF,
-                                          const VarDecl *NativeParam,
-                                          const VarDecl *TargetParam) const {
-  assert(NativeParam != TargetParam &&
-         NativeParam->getType()->isReferenceType() &&
-         "Native arg must not be the same as target arg.");
-  Address LocalAddr = CGF.GetAddrOfLocalVar(TargetParam);
-  QualType NativeParamType = NativeParam->getType();
-  QualifierCollector QC;
-  const Type *NonQualTy = QC.strip(NativeParamType);
-  QualType NativePointeeTy = cast<ReferenceType>(NonQualTy)->getPointeeType();
-  unsigned NativePointeeAddrSpace =
-      CGF.getContext().getTargetAddressSpace(NativePointeeTy);
-  QualType TargetTy = TargetParam->getType();
-  llvm::Value *TargetAddr = CGF.EmitLoadOfScalar(
-      LocalAddr, /*Volatile=*/false, TargetTy, SourceLocation());
-  // First cast to generic.
-  TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
-                      /*AddrSpace=*/0));
-  // Cast from generic to native address space.
-  TargetAddr = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-      TargetAddr, TargetAddr->getType()->getPointerElementType()->getPointerTo(
-                      NativePointeeAddrSpace));
-  Address NativeParamAddr = CGF.CreateMemTemp(NativeParamType);
-  CGF.EmitStoreOfScalar(TargetAddr, NativeParamAddr, /*Volatile=*/false,
-                        NativeParamType);
-  return NativeParamAddr;
-}
-
-void CGOpenMPRuntimeNVPTX::emitOutlinedFunctionCall(
-    CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
-    ArrayRef<llvm::Value *> Args) const {
-  SmallVector<llvm::Value *, 4> TargetArgs;
-  TargetArgs.reserve(Args.size());
-  auto *FnType = OutlinedFn.getFunctionType();
-  for (unsigned I = 0, E = Args.size(); I < E; ++I) {
-    if (FnType->isVarArg() && FnType->getNumParams() <= I) {
-      TargetArgs.append(std::next(Args.begin(), I), Args.end());
-      break;
-    }
-    llvm::Type *TargetType = FnType->getParamType(I);
-    llvm::Value *NativeArg = Args[I];
-    if (!TargetType->isPointerTy()) {
-      TargetArgs.emplace_back(NativeArg);
-      continue;
-    }
-    llvm::Value *TargetArg = CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-        NativeArg,
-        NativeArg->getType()->getPointerElementType()->getPointerTo());
-    TargetArgs.emplace_back(
-        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(TargetArg, TargetType));
-  }
-  CGOpenMPRuntime::emitOutlinedFunctionCall(CGF, Loc, OutlinedFn, TargetArgs);
-}
-
-/// Emit function which wraps the outline parallel region
-/// and controls the arguments which are passed to this function.
-/// The wrapper ensures that the outlined function is called
-/// with the correct arguments when data is shared.
-llvm::Function *CGOpenMPRuntimeNVPTX::createParallelDataSharingWrapper(
-    llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D) {
-  ASTContext &Ctx = CGM.getContext();
-  const auto &CS = *D.getCapturedStmt(OMPD_parallel);
-
-  // Create a function that takes as argument the source thread.
-  FunctionArgList WrapperArgs;
-  QualType Int16QTy =
-      Ctx.getIntTypeForBitwidth(/*DestWidth=*/16, /*Signed=*/false);
-  QualType Int32QTy =
-      Ctx.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false);
-  ImplicitParamDecl ParallelLevelArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
-                                     /*Id=*/nullptr, Int16QTy,
-                                     ImplicitParamDecl::Other);
-  ImplicitParamDecl WrapperArg(Ctx, /*DC=*/nullptr, D.getBeginLoc(),
-                               /*Id=*/nullptr, Int32QTy,
-                               ImplicitParamDecl::Other);
-  WrapperArgs.emplace_back(&ParallelLevelArg);
-  WrapperArgs.emplace_back(&WrapperArg);
-
-  const CGFunctionInfo &CGFI =
-      CGM.getTypes().arrangeBuiltinFunctionDeclaration(Ctx.VoidTy, WrapperArgs);
-
-  auto *Fn = llvm::Function::Create(
-      CGM.getTypes().GetFunctionType(CGFI), llvm::GlobalValue::InternalLinkage,
-      Twine(OutlinedParallelFn->getName(), "_wrapper"), &CGM.getModule());
-  CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, CGFI);
-  Fn->setLinkage(llvm::GlobalValue::InternalLinkage);
-  Fn->setDoesNotRecurse();
-
-  CodeGenFunction CGF(CGM, /*suppressNewContext=*/true);
-  CGF.StartFunction(GlobalDecl(), Ctx.VoidTy, Fn, CGFI, WrapperArgs,
-                    D.getBeginLoc(), D.getBeginLoc());
-
-  const auto *RD = CS.getCapturedRecordDecl();
-  auto CurField = RD->field_begin();
-
-  Address ZeroAddr = CGF.CreateDefaultAlignTempAlloca(CGF.Int32Ty,
-                                                      /*Name=*/".zero.addr");
-  CGF.InitTempAlloca(ZeroAddr, CGF.Builder.getInt32(/*C*/ 0));
-  // Get the array of arguments.
-  SmallVector<llvm::Value *, 8> Args;
-
-  Args.emplace_back(CGF.GetAddrOfLocalVar(&WrapperArg).getPointer());
-  Args.emplace_back(ZeroAddr.getPointer());
-
-  CGBuilderTy &Bld = CGF.Builder;
-  auto CI = CS.capture_begin();
-
-  // Use global memory for data sharing.
-  // Handle passing of global args to workers.
-  Address GlobalArgs =
-      CGF.CreateDefaultAlignTempAlloca(CGF.VoidPtrPtrTy, "global_args");
-  llvm::Value *GlobalArgsPtr = GlobalArgs.getPointer();
-  llvm::Value *DataSharingArgs[] = {GlobalArgsPtr};
-  CGF.EmitRuntimeCall(
-      createNVPTXRuntimeFunction(OMPRTL_NVPTX__kmpc_get_shared_variables),
-      DataSharingArgs);
-
-  // Retrieve the shared variables from the list of references returned
-  // by the runtime. Pass the variables to the outlined function.
-  Address SharedArgListAddress = Address::invalid();
-  if (CS.capture_size() > 0 ||
-      isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {
-    SharedArgListAddress = CGF.EmitLoadOfPointer(
-        GlobalArgs, CGF.getContext()
-                        .getPointerType(CGF.getContext().getPointerType(
-                            CGF.getContext().VoidPtrTy))
-                        .castAs<PointerType>());
-  }
-  unsigned Idx = 0;
-  if (isOpenMPLoopBoundSharingDirective(D.getDirectiveKind())) {
-    Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
-    Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
-        Src, CGF.SizeTy->getPointerTo());
-    llvm::Value *LB = CGF.EmitLoadOfScalar(
-        TypedAddress,
-        /*Volatile=*/false,
-        CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
-        cast<OMPLoopDirective>(D).getLowerBoundVariable()->getExprLoc());
-    Args.emplace_back(LB);
-    ++Idx;
-    Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, Idx);
-    TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
-        Src, CGF.SizeTy->getPointerTo());
-    llvm::Value *UB = CGF.EmitLoadOfScalar(
-        TypedAddress,
-        /*Volatile=*/false,
-        CGF.getContext().getPointerType(CGF.getContext().getSizeType()),
-        cast<OMPLoopDirective>(D).getUpperBoundVariable()->getExprLoc());
-    Args.emplace_back(UB);
-    ++Idx;
-  }
-  if (CS.capture_size() > 0) {
-    ASTContext &CGFContext = CGF.getContext();
-    for (unsigned I = 0, E = CS.capture_size(); I < E; ++I, ++CI, ++CurField) {
-      QualType ElemTy = CurField->getType();
-      Address Src = Bld.CreateConstInBoundsGEP(SharedArgListAddress, I + Idx);
-      Address TypedAddress = Bld.CreatePointerBitCastOrAddrSpaceCast(
-          Src, CGF.ConvertTypeForMem(CGFContext.getPointerType(ElemTy)));
-      llvm::Value *Arg = CGF.EmitLoadOfScalar(TypedAddress,
-                                              /*Volatile=*/false,
-                                              CGFContext.getPointerType(ElemTy),
-                                              CI->getLocation());
-      if (CI->capturesVariableByCopy() &&
-          !CI->getCapturedVar()->getType()->isAnyPointerType()) {
-        Arg = castValueToType(CGF, Arg, ElemTy, CGFContext.getUIntPtrType(),
-                              CI->getLocation());
-      }
-      Args.emplace_back(Arg);
-    }
-  }
-
-  emitOutlinedFunctionCall(CGF, D.getBeginLoc(), OutlinedParallelFn, Args);
-  CGF.FinishFunction();
-  return Fn;
-}
-
-void CGOpenMPRuntimeNVPTX::emitFunctionProlog(CodeGenFunction &CGF,
-                                              const Decl *D) {
-  if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
-    return;
-
-  assert(D && "Expected function or captured|block decl.");
-  assert(FunctionGlobalizedDecls.count(CGF.CurFn) == 0 &&
-         "Function is registered already.");
-  assert((!TeamAndReductions.first || TeamAndReductions.first == D) &&
-         "Team is set but not processed.");
-  const Stmt *Body = nullptr;
-  bool NeedToDelayGlobalization = false;
-  if (const auto *FD = dyn_cast<FunctionDecl>(D)) {
-    Body = FD->getBody();
-  } else if (const auto *BD = dyn_cast<BlockDecl>(D)) {
-    Body = BD->getBody();
-  } else if (const auto *CD = dyn_cast<CapturedDecl>(D)) {
-    Body = CD->getBody();
-    NeedToDelayGlobalization = CGF.CapturedStmtInfo->getKind() == CR_OpenMP;
-    if (NeedToDelayGlobalization &&
-        getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD)
-      return;
-  }
-  if (!Body)
-    return;
-  CheckVarsEscapingDeclContext VarChecker(CGF, TeamAndReductions.second);
-  VarChecker.Visit(Body);
-  const RecordDecl *GlobalizedVarsRecord =
-      VarChecker.getGlobalizedRecord(IsInTTDRegion);
-  TeamAndReductions.first = nullptr;
-  TeamAndReductions.second.clear();
-  ArrayRef<const ValueDecl *> EscapedVariableLengthDecls =
-      VarChecker.getEscapedVariableLengthDecls();
-  if (!GlobalizedVarsRecord && EscapedVariableLengthDecls.empty())
-    return;
-  auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first;
-  I->getSecond().MappedParams =
-      std::make_unique<CodeGenFunction::OMPMapVars>();
-  I->getSecond().GlobalRecord = GlobalizedVarsRecord;
-  I->getSecond().EscapedParameters.insert(
-      VarChecker.getEscapedParameters().begin(),
-      VarChecker.getEscapedParameters().end());
-  I->getSecond().EscapedVariableLengthDecls.append(
-      EscapedVariableLengthDecls.begin(), EscapedVariableLengthDecls.end());
-  DeclToAddrMapTy &Data = I->getSecond().LocalVarData;
-  for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
-    assert(VD->isCanonicalDecl() && "Expected canonical declaration");
-    const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
-    Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion)));
-  }
-  if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) {
-    CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None);
-    VarChecker.Visit(Body);
-    I->getSecond().SecondaryGlobalRecord =
-        VarChecker.getGlobalizedRecord(/*IsInTTDRegion=*/true);
-    I->getSecond().SecondaryLocalVarData.emplace();
-    DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue();
-    for (const ValueDecl *VD : VarChecker.getEscapedDecls()) {
-      assert(VD->isCanonicalDecl() && "Expected canonical declaration");
-      const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD);
-      Data.insert(
-          std::make_pair(VD, MappedVarData(FD, /*IsInTTDRegion=*/true)));
-    }
-  }
-  if (!NeedToDelayGlobalization) {
-    emitGenericVarsProlog(CGF, D->getBeginLoc(), /*WithSPMDCheck=*/true);
-    struct GlobalizationScope final : EHScopeStack::Cleanup {
-      GlobalizationScope() = default;
-
-      void Emit(CodeGenFunction &CGF, Flags flags) override {
-        static_cast<CGOpenMPRuntimeNVPTX &>(CGF.CGM.getOpenMPRuntime())
-            .emitGenericVarsEpilog(CGF, /*WithSPMDCheck=*/true);
-      }
-    };
-    CGF.EHStack.pushCleanup<GlobalizationScope>(NormalAndEHCleanup);
-  }
-}
-
-Address CGOpenMPRuntimeNVPTX::getAddressOfLocalVariable(CodeGenFunction &CGF,
-                                                        const VarDecl *VD) {
-  if (VD && VD->hasAttr<OMPAllocateDeclAttr>()) {
-    const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
-    auto AS = LangAS::Default;
-    switch (A->getAllocatorType()) {
-      // Use the default allocator here as by default local vars are
-      // threadlocal.
-    case OMPAllocateDeclAttr::OMPNullMemAlloc:
-    case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
-    case OMPAllocateDeclAttr::OMPThreadMemAlloc:
-    case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
-    case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
-      // Follow the user decision - use default allocation.
-      return Address::invalid();
-    case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
-      // TODO: implement aupport for user-defined allocators.
-      return Address::invalid();
-    case OMPAllocateDeclAttr::OMPConstMemAlloc:
-      AS = LangAS::cuda_constant;
-      break;
-    case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
-      AS = LangAS::cuda_shared;
-      break;
-    case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
-    case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
-      break;
-    }
-    llvm::Type *VarTy = CGF.ConvertTypeForMem(VD->getType());
-    auto *GV = new llvm::GlobalVariable(
-        CGM.getModule(), VarTy, /*isConstant=*/false,
-        llvm::GlobalValue::InternalLinkage, llvm::Constant::getNullValue(VarTy),
-        VD->getName(),
-        /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal,
-        CGM.getContext().getTargetAddressSpace(AS));
-    CharUnits Align = CGM.getContext().getDeclAlign(VD);
-    GV->setAlignment(Align.getAsAlign());
-    return Address(
-        CGF.Builder.CreatePointerBitCastOrAddrSpaceCast(
-            GV, VarTy->getPointerTo(CGM.getContext().getTargetAddressSpace(
-                    VD->getType().getAddressSpace()))),
-        Align);
-  }
-
-  if (getDataSharingMode(CGM) != CGOpenMPRuntimeNVPTX::Generic)
-    return Address::invalid();
-
-  VD = VD->getCanonicalDecl();
-  auto I = FunctionGlobalizedDecls.find(CGF.CurFn);
-  if (I == FunctionGlobalizedDecls.end())
-    return Address::invalid();
-  auto VDI = I->getSecond().LocalVarData.find(VD);
-  if (VDI != I->getSecond().LocalVarData.end())
-    return VDI->second.PrivateAddr;
-  if (VD->hasAttrs()) {
-    for (specific_attr_iterator<OMPReferencedVarAttr> IT(VD->attr_begin()),
-         E(VD->attr_end());
-         IT != E; ++IT) {
-      auto VDI = I->getSecond().LocalVarData.find(
-          cast<VarDecl>(cast<DeclRefExpr>(IT->getRef())->getDecl())
-              ->getCanonicalDecl());
-      if (VDI != I->getSecond().LocalVarData.end())
-        return VDI->second.PrivateAddr;
-    }
-  }
-
-  return Address::invalid();
-}
-
-void CGOpenMPRuntimeNVPTX::functionFinished(CodeGenFunction &CGF) {
-  FunctionGlobalizedDecls.erase(CGF.CurFn);
-  CGOpenMPRuntime::functionFinished(CGF);
-}
-
-void CGOpenMPRuntimeNVPTX::getDefaultDistScheduleAndChunk(
-    CodeGenFunction &CGF, const OMPLoopDirective &S,
-    OpenMPDistScheduleClauseKind &ScheduleKind,
-    llvm::Value *&Chunk) const {
-  if (getExecutionMode() == CGOpenMPRuntimeNVPTX::EM_SPMD) {
-    ScheduleKind = OMPC_DIST_SCHEDULE_static;
-    Chunk = CGF.EmitScalarConversion(getNVPTXNumThreads(CGF),
-        CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
-        S.getIterationVariable()->getType(), S.getBeginLoc());
-    return;
-  }
-  CGOpenMPRuntime::getDefaultDistScheduleAndChunk(
-      CGF, S, ScheduleKind, Chunk);
-}
-
-void CGOpenMPRuntimeNVPTX::getDefaultScheduleAndChunk(
-    CodeGenFunction &CGF, const OMPLoopDirective &S,
-    OpenMPScheduleClauseKind &ScheduleKind,
-    const Expr *&ChunkExpr) const {
-  ScheduleKind = OMPC_SCHEDULE_static;
-  // Chunk size is 1 in this case.
-  llvm::APInt ChunkSize(32, 1);
-  ChunkExpr = IntegerLiteral::Create(CGF.getContext(), ChunkSize,
-      CGF.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
-      SourceLocation());
-}
-
-void CGOpenMPRuntimeNVPTX::adjustTargetSpecificDataForLambdas(
-    CodeGenFunction &CGF, const OMPExecutableDirective &D) const {
-  assert(isOpenMPTargetExecutionDirective(D.getDirectiveKind()) &&
-         " Expected target-based directive.");
-  const CapturedStmt *CS = D.getCapturedStmt(OMPD_target);
-  for (const CapturedStmt::Capture &C : CS->captures()) {
-    // Capture variables captured by reference in lambdas for target-based
-    // directives.
-    if (!C.capturesVariable())
-      continue;
-    const VarDecl *VD = C.getCapturedVar();
-    const auto *RD = VD->getType()
-                         .getCanonicalType()
-                         .getNonReferenceType()
-                         ->getAsCXXRecordDecl();
-    if (!RD || !RD->isLambda())
-      continue;
-    Address VDAddr = CGF.GetAddrOfLocalVar(VD);
-    LValue VDLVal;
-    if (VD->getType().getCanonicalType()->isReferenceType())
-      VDLVal = CGF.EmitLoadOfReferenceLValue(VDAddr, VD->getType());
-    else
-      VDLVal = CGF.MakeAddrLValue(
-          VDAddr, VD->getType().getCanonicalType().getNonReferenceType());
-    llvm::DenseMap<const VarDecl *, FieldDecl *> Captures;
-    FieldDecl *ThisCapture = nullptr;
-    RD->getCaptureFields(Captures, ThisCapture);
-    if (ThisCapture && CGF.CapturedStmtInfo->isCXXThisExprCaptured()) {
-      LValue ThisLVal =
-          CGF.EmitLValueForFieldInitialization(VDLVal, ThisCapture);
-      llvm::Value *CXXThis = CGF.LoadCXXThis();
-      CGF.EmitStoreOfScalar(CXXThis, ThisLVal);
-    }
-    for (const LambdaCapture &LC : RD->captures()) {
-      if (LC.getCaptureKind() != LCK_ByRef)
-        continue;
-      const VarDecl *VD = LC.getCapturedVar();
-      if (!CS->capturesVariable(VD))
-        continue;
-      auto It = Captures.find(VD);
-      assert(It != Captures.end() && "Found lambda capture without field.");
-      LValue VarLVal = CGF.EmitLValueForFieldInitialization(VDLVal, It->second);
-      Address VDAddr = CGF.GetAddrOfLocalVar(VD);
-      if (VD->getType().getCanonicalType()->isReferenceType())
-        VDAddr = CGF.EmitLoadOfReferenceLValue(VDAddr,
-                                               VD->getType().getCanonicalType())
-                     .getAddress(CGF);
-      CGF.EmitStoreOfScalar(VDAddr.getPointer(), VarLVal);
-    }
-  }
-}
-
-unsigned CGOpenMPRuntimeNVPTX::getDefaultFirstprivateAddressSpace() const {
-  return CGM.getContext().getTargetAddressSpace(LangAS::cuda_constant);
-}
-
-bool CGOpenMPRuntimeNVPTX::hasAllocateAttributeForGlobalVar(const VarDecl *VD,
-                                                            LangAS &AS) {
-  if (!VD || !VD->hasAttr<OMPAllocateDeclAttr>())
-    return false;
-  const auto *A = VD->getAttr<OMPAllocateDeclAttr>();
-  switch(A->getAllocatorType()) {
-  case OMPAllocateDeclAttr::OMPNullMemAlloc:
-  case OMPAllocateDeclAttr::OMPDefaultMemAlloc:
-  // Not supported, fallback to the default mem space.
-  case OMPAllocateDeclAttr::OMPThreadMemAlloc:
-  case OMPAllocateDeclAttr::OMPLargeCapMemAlloc:
-  case OMPAllocateDeclAttr::OMPCGroupMemAlloc:
-  case OMPAllocateDeclAttr::OMPHighBWMemAlloc:
-  case OMPAllocateDeclAttr::OMPLowLatMemAlloc:
-    AS = LangAS::Default;
-    return true;
-  case OMPAllocateDeclAttr::OMPConstMemAlloc:
-    AS = LangAS::cuda_constant;
-    return true;
-  case OMPAllocateDeclAttr::OMPPTeamMemAlloc:
-    AS = LangAS::cuda_shared;
-    return true;
-  case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc:
-    llvm_unreachable("Expected predefined allocator for the variables with the "
-                     "static storage.");
-  }
-  return false;
-}
-
-// Get current CudaArch and ignore any unknown values
-static CudaArch getCudaArch(CodeGenModule &CGM) {
-  if (!CGM.getTarget().hasFeature("ptx"))
-    return CudaArch::UNKNOWN;
-  llvm::StringMap<bool> Features;
-  CGM.getTarget().initFeatureMap(Features, CGM.getDiags(),
-                                 CGM.getTarget().getTargetOpts().CPU,
-                                 CGM.getTarget().getTargetOpts().Features);
-  for (const auto &Feature : Features) {
-    if (Feature.getValue()) {
-      CudaArch Arch = StringToCudaArch(Feature.getKey());
-      if (Arch != CudaArch::UNKNOWN)
-        return Arch;
-    }
-  }
-  return CudaArch::UNKNOWN;
-}
-
-/// Check to see if target architecture supports unified addressing which is
-/// a restriction for OpenMP requires clause "unified_shared_memory".
-void CGOpenMPRuntimeNVPTX::processRequiresDirective(
-    const OMPRequiresDecl *D) {
-  for (const OMPClause *Clause : D->clauselists()) {
-    if (Clause->getClauseKind() == OMPC_unified_shared_memory) {
-      CudaArch Arch = getCudaArch(CGM);
-      switch (Arch) {
-      case CudaArch::SM_20:
-      case CudaArch::SM_21:
-      case CudaArch::SM_30:
-      case CudaArch::SM_32:
-      case CudaArch::SM_35:
-      case CudaArch::SM_37:
-      case CudaArch::SM_50:
-      case CudaArch::SM_52:
-      case CudaArch::SM_53:
-      case CudaArch::SM_60:
-      case CudaArch::SM_61:
-      case CudaArch::SM_62: {
-        SmallString<256> Buffer;
-        llvm::raw_svector_ostream Out(Buffer);
-        Out << "Target architecture " << CudaArchToString(Arch)
-            << " does not support unified addressing";
-        CGM.Error(Clause->getBeginLoc(), Out.str());
-        return;
-      }
-      case CudaArch::SM_70:
-      case CudaArch::SM_72:
-      case CudaArch::SM_75:
-      case CudaArch::SM_80:
-      case CudaArch::GFX600:
-      case CudaArch::GFX601:
-      case CudaArch::GFX700:
-      case CudaArch::GFX701:
-      case CudaArch::GFX702:
-      case CudaArch::GFX703:
-      case CudaArch::GFX704:
-      case CudaArch::GFX801:
-      case CudaArch::GFX802:
-      case CudaArch::GFX803:
-      case CudaArch::GFX810:
-      case CudaArch::GFX900:
-      case CudaArch::GFX902:
-      case CudaArch::GFX904:
-      case CudaArch::GFX906:
-      case CudaArch::GFX908:
-      case CudaArch::GFX909:
-      case CudaArch::GFX1010:
-      case CudaArch::GFX1011:
-      case CudaArch::GFX1012:
-      case CudaArch::GFX1030:
-      case CudaArch::UNKNOWN:
-        break;
-      case CudaArch::LAST:
-        llvm_unreachable("Unexpected Cuda arch.");
-      }
-    }
-  }
-  CGOpenMPRuntime::processRequiresDirective(D);
-}
-
-/// Get number of SMs and number of blocks per SM.
-static std::pair<unsigned, unsigned> getSMsBlocksPerSM(CodeGenModule &CGM) {
-  std::pair<unsigned, unsigned> Data;
-  if (CGM.getLangOpts().OpenMPCUDANumSMs)
-    Data.first = CGM.getLangOpts().OpenMPCUDANumSMs;
-  if (CGM.getLangOpts().OpenMPCUDABlocksPerSM)
-    Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM;
-  if (Data.first && Data.second)
-    return Data;
-  switch (getCudaArch(CGM)) {
-  case CudaArch::SM_20:
-  case CudaArch::SM_21:
-  case CudaArch::SM_30:
-  case CudaArch::SM_32:
-  case CudaArch::SM_35:
-  case CudaArch::SM_37:
-  case CudaArch::SM_50:
-  case CudaArch::SM_52:
-  case CudaArch::SM_53:
-    return {16, 16};
-  case CudaArch::SM_60:
-  case CudaArch::SM_61:
-  case CudaArch::SM_62:
-    return {56, 32};
-  case CudaArch::SM_70:
-  case CudaArch::SM_72:
-  case CudaArch::SM_75:
-  case CudaArch::SM_80:
-    return {84, 32};
-  case CudaArch::GFX600:
-  case CudaArch::GFX601:
-  case CudaArch::GFX700:
-  case CudaArch::GFX701:
-  case CudaArch::GFX702:
-  case CudaArch::GFX703:
-  case CudaArch::GFX704:
-  case CudaArch::GFX801:
-  case CudaArch::GFX802:
-  case CudaArch::GFX803:
-  case CudaArch::GFX810:
-  case CudaArch::GFX900:
-  case CudaArch::GFX902:
-  case CudaArch::GFX904:
-  case CudaArch::GFX906:
-  case CudaArch::GFX908:
-  case CudaArch::GFX909:
-  case CudaArch::GFX1010:
-  case CudaArch::GFX1011:
-  case CudaArch::GFX1012:
-  case CudaArch::GFX1030:
-  case CudaArch::UNKNOWN:
-    break;
-  case CudaArch::LAST:
-    llvm_unreachable("Unexpected Cuda arch.");
-  }
-  llvm_unreachable("Unexpected NVPTX target without ptx feature.");
-}
-
-void CGOpenMPRuntimeNVPTX::clear() {
-  if (!GlobalizedRecords.empty() &&
-      !CGM.getLangOpts().OpenMPCUDATargetParallel) {
-    ASTContext &C = CGM.getContext();
-    llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> GlobalRecs;
-    llvm::SmallVector<const GlobalPtrSizeRecsTy *, 4> SharedRecs;
-    RecordDecl *StaticRD = C.buildImplicitRecord(
-        "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
-    StaticRD->startDefinition();
-    RecordDecl *SharedStaticRD = C.buildImplicitRecord(
-        "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union);
-    SharedStaticRD->startDefinition();
-    for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) {
-      if (Records.Records.empty())
-        continue;
-      unsigned Size = 0;
-      unsigned RecAlignment = 0;
-      for (const RecordDecl *RD : Records.Records) {
-        QualType RDTy = C.getRecordType(RD);
-        unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity();
-        RecAlignment = std::max(RecAlignment, Alignment);
-        unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity();
-        Size =
-            llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment);
-      }
-      Size = llvm::alignTo(Size, RecAlignment);
-      llvm::APInt ArySize(/*numBits=*/64, Size);
-      QualType SubTy = C.getConstantArrayType(
-          C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
-      const bool UseSharedMemory = Size <= SharedMemorySize;
-      auto *Field =
-          FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD,
-                            SourceLocation(), SourceLocation(), nullptr, SubTy,
-                            C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
-                            /*BW=*/nullptr, /*Mutable=*/false,
-                            /*InitStyle=*/ICIS_NoInit);
-      Field->setAccess(AS_public);
-      if (UseSharedMemory) {
-        SharedStaticRD->addDecl(Field);
-        SharedRecs.push_back(&Records);
-      } else {
-        StaticRD->addDecl(Field);
-        GlobalRecs.push_back(&Records);
-      }
-      Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size));
-      Records.UseSharedMemory->setInitializer(
-          llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0));
-    }
-    // Allocate SharedMemorySize buffer for the shared memory.
-    // FIXME: nvlink does not handle weak linkage correctly (object with the
-    // different size are reported as erroneous).
-    // Restore this code as sson as nvlink is fixed.
-    if (!SharedStaticRD->field_empty()) {
-      llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize);
-      QualType SubTy = C.getConstantArrayType(
-          C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0);
-      auto *Field = FieldDecl::Create(
-          C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy,
-          C.getTrivialTypeSourceInfo(SubTy, SourceLocation()),
-          /*BW=*/nullptr, /*Mutable=*/false,
-          /*InitStyle=*/ICIS_NoInit);
-      Field->setAccess(AS_public);
-      SharedStaticRD->addDecl(Field);
-    }
-    SharedStaticRD->completeDefinition();
-    if (!SharedStaticRD->field_empty()) {
-      QualType StaticTy = C.getRecordType(SharedStaticRD);
-      llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy);
-      auto *GV = new llvm::GlobalVariable(
-          CGM.getModule(), LLVMStaticTy,
-          /*isConstant=*/false, llvm::GlobalValue::CommonLinkage,
-          llvm::Constant::getNullValue(LLVMStaticTy),
-          "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr,
-          llvm::GlobalValue::NotThreadLocal,
-          C.getTargetAddressSpace(LangAS::cuda_shared));
-      auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-          GV, CGM.VoidPtrTy);
-      for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) {
-        Rec->Buffer->replaceAllUsesWith(Replacement);
-        Rec->Buffer->eraseFromParent();
-      }
-    }
-    StaticRD->completeDefinition();
-    if (!StaticRD->field_empty()) {
-      QualType StaticTy = C.getRecordType(StaticRD);
-      std::pair<unsigned, unsigned> SMsBlockPerSM = getSMsBlocksPerSM(CGM);
-      llvm::APInt Size1(32, SMsBlockPerSM.second);
-      QualType Arr1Ty =
-          C.getConstantArrayType(StaticTy, Size1, nullptr, ArrayType::Normal,
-                                 /*IndexTypeQuals=*/0);
-      llvm::APInt Size2(32, SMsBlockPerSM.first);
-      QualType Arr2Ty =
-          C.getConstantArrayType(Arr1Ty, Size2, nullptr, ArrayType::Normal,
-                                 /*IndexTypeQuals=*/0);
-      llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty);
-      // FIXME: nvlink does not handle weak linkage correctly (object with the
-      // different size are reported as erroneous).
-      // Restore CommonLinkage as soon as nvlink is fixed.
-      auto *GV = new llvm::GlobalVariable(
-          CGM.getModule(), LLVMArr2Ty,
-          /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
-          llvm::Constant::getNullValue(LLVMArr2Ty),
-          "_openmp_static_glob_rd_$_");
-      auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(
-          GV, CGM.VoidPtrTy);
-      for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) {
-        Rec->Buffer->replaceAllUsesWith(Replacement);
-        Rec->Buffer->eraseFromParent();
-      }
-    }
-  }
-  if (!TeamsReductions.empty()) {
-    ASTContext &C = CGM.getContext();
-    RecordDecl *StaticRD = C.buildImplicitRecord(
-        "_openmp_teams_reduction_type_$_", RecordDecl::TagKind::TTK_Union);
-    StaticRD->startDefinition();
-    for (const RecordDecl *TeamReductionRec : TeamsReductions) {
-      QualType RecTy = C.getRecordType(TeamReductionRec);
-      auto *Field = FieldDecl::Create(
-          C, StaticRD, SourceLocation(), SourceLocation(), nullptr, RecTy,
-          C.getTrivialTypeSourceInfo(RecTy, SourceLocation()),
-          /*BW=*/nullptr, /*Mutable=*/false,
-          /*InitStyle=*/ICIS_NoInit);
-      Field->setAccess(AS_public);
-      StaticRD->addDecl(Field);
-    }
-    StaticRD->completeDefinition();
-    QualType StaticTy = C.getRecordType(StaticRD);
-    llvm::Type *LLVMReductionsBufferTy =
-        CGM.getTypes().ConvertTypeForMem(StaticTy);
-    // FIXME: nvlink does not handle weak linkage correctly (object with the
-    // different size are reported as erroneous).
-    // Restore CommonLinkage as soon as nvlink is fixed.
-    auto *GV = new llvm::GlobalVariable(
-        CGM.getModule(), LLVMReductionsBufferTy,
-        /*isConstant=*/false, llvm::GlobalValue::InternalLinkage,
-        llvm::Constant::getNullValue(LLVMReductionsBufferTy),
-        "_openmp_teams_reductions_buffer_$_");
-    KernelTeamsReductionPtr->setInitializer(
-        llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast(GV,
-                                                             CGM.VoidPtrTy));
-  }
-  CGOpenMPRuntime::clear();
-}
diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
index c52ae43817c75..40679f9143b96 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
+++ b/clang/lib/CodeGen/CGOpenMPRuntimeNVPTX.h
@@ -7,7 +7,7 @@
 //===----------------------------------------------------------------------===//
 //
 // This provides a class for OpenMP runtime code generation specialized to NVPTX
-// targets.
+// targets from generalized CGOpenMPRuntimeGPU class.
 //
 //===----------------------------------------------------------------------===//
 
@@ -15,473 +15,18 @@
 #define LLVM_CLANG_LIB_CODEGEN_CGOPENMPRUNTIMENVPTX_H
 
 #include "CGOpenMPRuntime.h"
+#include "CGOpenMPRuntimeGPU.h"
 #include "CodeGenFunction.h"
 #include "clang/AST/StmtOpenMP.h"
 
 namespace clang {
 namespace CodeGen {
 
-class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntime {
-public:
-  /// Defines the execution mode.
-  enum ExecutionMode {
-    /// SPMD execution mode (all threads are worker threads).
-    EM_SPMD,
-    /// Non-SPMD execution mode (1 master thread, others are workers).
-    EM_NonSPMD,
-    /// Unknown execution mode (orphaned directive).
-    EM_Unknown,
-  };
-private:
-  /// Parallel outlined function work for workers to execute.
-  llvm::SmallVector<llvm::Function *, 16> Work;
-
-  struct EntryFunctionState {
-    llvm::BasicBlock *ExitBB = nullptr;
-  };
-
-  class WorkerFunctionState {
-  public:
-    llvm::Function *WorkerFn;
-    const CGFunctionInfo &CGFI;
-    SourceLocation Loc;
-
-    WorkerFunctionState(CodeGenModule &CGM, SourceLocation Loc);
-
-  private:
-    void createWorkerFunction(CodeGenModule &CGM);
-  };
-
-  ExecutionMode getExecutionMode() const;
-
-  bool requiresFullRuntime() const { return RequiresFullRuntime; }
-
-  /// Get barrier to synchronize all threads in a block.
-  void syncCTAThreads(CodeGenFunction &CGF);
-
-  /// Emit the worker function for the current target region.
-  void emitWorkerFunction(WorkerFunctionState &WST);
-
-  /// Helper for worker function. Emit body of worker loop.
-  void emitWorkerLoop(CodeGenFunction &CGF, WorkerFunctionState &WST);
-
-  /// Helper for non-SPMD target entry function. Guide the master and
-  /// worker threads to their respective locations.
-  void emitNonSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
-                              WorkerFunctionState &WST);
-
-  /// Signal termination of OMP execution for non-SPMD target entry
-  /// function.
-  void emitNonSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
-
-  /// Helper for generic variables globalization prolog.
-  void emitGenericVarsProlog(CodeGenFunction &CGF, SourceLocation Loc,
-                             bool WithSPMDCheck = false);
-
-  /// Helper for generic variables globalization epilog.
-  void emitGenericVarsEpilog(CodeGenFunction &CGF, bool WithSPMDCheck = false);
-
-  /// Helper for SPMD mode target directive's entry function.
-  void emitSPMDEntryHeader(CodeGenFunction &CGF, EntryFunctionState &EST,
-                           const OMPExecutableDirective &D);
-
-  /// Signal termination of SPMD mode execution.
-  void emitSPMDEntryFooter(CodeGenFunction &CGF, EntryFunctionState &EST);
-
-  //
-  // Base class overrides.
-  //
-
-  /// Creates offloading entry for the provided entry ID \a ID,
-  /// address \a Addr, size \a Size, and flags \a Flags.
-  void createOffloadEntry(llvm::Constant *ID, llvm::Constant *Addr,
-                          uint64_t Size, int32_t Flags,
-                          llvm::GlobalValue::LinkageTypes Linkage) override;
-
-  /// Emit outlined function specialized for the Fork-Join
-  /// programming model for applicable target directives on the NVPTX device.
-  /// \param D Directive to emit.
-  /// \param ParentName Name of the function that encloses the target region.
-  /// \param OutlinedFn Outlined function value to be defined by this call.
-  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
-  /// \param IsOffloadEntry True if the outlined function is an offload entry.
-  /// An outlined function may not be an entry if, e.g. the if clause always
-  /// evaluates to false.
-  void emitNonSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
-                         llvm::Function *&OutlinedFn,
-                         llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
-                         const RegionCodeGenTy &CodeGen);
-
-  /// Emit outlined function specialized for the Single Program
-  /// Multiple Data programming model for applicable target directives on the
-  /// NVPTX device.
-  /// \param D Directive to emit.
-  /// \param ParentName Name of the function that encloses the target region.
-  /// \param OutlinedFn Outlined function value to be defined by this call.
-  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
-  /// \param IsOffloadEntry True if the outlined function is an offload entry.
-  /// \param CodeGen Object containing the target statements.
-  /// An outlined function may not be an entry if, e.g. the if clause always
-  /// evaluates to false.
-  void emitSPMDKernel(const OMPExecutableDirective &D, StringRef ParentName,
-                      llvm::Function *&OutlinedFn,
-                      llvm::Constant *&OutlinedFnID, bool IsOffloadEntry,
-                      const RegionCodeGenTy &CodeGen);
-
-  /// Emit outlined function for 'target' directive on the NVPTX
-  /// device.
-  /// \param D Directive to emit.
-  /// \param ParentName Name of the function that encloses the target region.
-  /// \param OutlinedFn Outlined function value to be defined by this call.
-  /// \param OutlinedFnID Outlined function ID value to be defined by this call.
-  /// \param IsOffloadEntry True if the outlined function is an offload entry.
-  /// An outlined function may not be an entry if, e.g. the if clause always
-  /// evaluates to false.
-  void emitTargetOutlinedFunction(const OMPExecutableDirective &D,
-                                  StringRef ParentName,
-                                  llvm::Function *&OutlinedFn,
-                                  llvm::Constant *&OutlinedFnID,
-                                  bool IsOffloadEntry,
-                                  const RegionCodeGenTy &CodeGen) override;
-
-  /// Emits code for parallel or serial call of the \a OutlinedFn with
-  /// variables captured in a record which address is stored in \a
-  /// CapturedStruct.
-  /// This call is for the Non-SPMD Execution Mode.
-  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
-  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
-  /// \param CapturedVars A pointer to the record with the references to
-  /// variables used in \a OutlinedFn function.
-  /// \param IfCond Condition in the associated 'if' clause, if it was
-  /// specified, nullptr otherwise.
-  void emitNonSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
-                               llvm::Value *OutlinedFn,
-                               ArrayRef<llvm::Value *> CapturedVars,
-                               const Expr *IfCond);
-
-  /// Emits code for parallel or serial call of the \a OutlinedFn with
-  /// variables captured in a record which address is stored in \a
-  /// CapturedStruct.
-  /// This call is for a parallel directive within an SPMD target directive.
-  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
-  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
-  /// \param CapturedVars A pointer to the record with the references to
-  /// variables used in \a OutlinedFn function.
-  /// \param IfCond Condition in the associated 'if' clause, if it was
-  /// specified, nullptr otherwise.
-  ///
-  void emitSPMDParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
-                            llvm::Function *OutlinedFn,
-                            ArrayRef<llvm::Value *> CapturedVars,
-                            const Expr *IfCond);
-
-protected:
-  /// Get the function name of an outlined region.
-  //  The name can be customized depending on the target.
-  //
-  StringRef getOutlinedHelperName() const override {
-    return "__omp_outlined__";
-  }
-
-  /// Check if the default location must be constant.
-  /// Constant for NVPTX for better optimization.
-  bool isDefaultLocationConstant() const override { return true; }
-
-  /// Returns additional flags that can be stored in reserved_2 field of the
-  /// default location.
-  /// For NVPTX target contains data about SPMD/Non-SPMD execution mode +
-  /// Full/Lightweight runtime mode. Used for better optimization.
-  unsigned getDefaultLocationReserved2Flags() const override;
+class CGOpenMPRuntimeNVPTX : public CGOpenMPRuntimeGPU {
 
 public:
   explicit CGOpenMPRuntimeNVPTX(CodeGenModule &CGM);
-  void clear() override;
-
-  /// Emit call to void __kmpc_push_proc_bind(ident_t *loc, kmp_int32
-  /// global_tid, int proc_bind) to generate code for 'proc_bind' clause.
-  virtual void emitProcBindClause(CodeGenFunction &CGF,
-                                  llvm::omp::ProcBindKind ProcBind,
-                                  SourceLocation Loc) override;
-
-  /// Emits call to void __kmpc_push_num_threads(ident_t *loc, kmp_int32
-  /// global_tid, kmp_int32 num_threads) to generate code for 'num_threads'
-  /// clause.
-  /// \param NumThreads An integer value of threads.
-  virtual void emitNumThreadsClause(CodeGenFunction &CGF,
-                                    llvm::Value *NumThreads,
-                                    SourceLocation Loc) override;
-
-  /// This function ought to emit, in the general case, a call to
-  // the openmp runtime kmpc_push_num_teams. In NVPTX backend it is not needed
-  // as these numbers are obtained through the PTX grid and block configuration.
-  /// \param NumTeams An integer expression of teams.
-  /// \param ThreadLimit An integer expression of threads.
-  void emitNumTeamsClause(CodeGenFunction &CGF, const Expr *NumTeams,
-                          const Expr *ThreadLimit, SourceLocation Loc) override;
-
-  /// Emits inlined function for the specified OpenMP parallel
-  //  directive.
-  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
-  /// kmp_int32 BoundID, struct context_vars*).
-  /// \param D OpenMP directive.
-  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
-  /// \param InnermostKind Kind of innermost directive (for simple directives it
-  /// is a directive itself, for combined - its innermost directive).
-  /// \param CodeGen Code generation sequence for the \a D directive.
-  llvm::Function *
-  emitParallelOutlinedFunction(const OMPExecutableDirective &D,
-                               const VarDecl *ThreadIDVar,
-                               OpenMPDirectiveKind InnermostKind,
-                               const RegionCodeGenTy &CodeGen) override;
-
-  /// Emits inlined function for the specified OpenMP teams
-  //  directive.
-  /// \a D. This outlined function has type void(*)(kmp_int32 *ThreadID,
-  /// kmp_int32 BoundID, struct context_vars*).
-  /// \param D OpenMP directive.
-  /// \param ThreadIDVar Variable for thread id in the current OpenMP region.
-  /// \param InnermostKind Kind of innermost directive (for simple directives it
-  /// is a directive itself, for combined - its innermost directive).
-  /// \param CodeGen Code generation sequence for the \a D directive.
-  llvm::Function *
-  emitTeamsOutlinedFunction(const OMPExecutableDirective &D,
-                            const VarDecl *ThreadIDVar,
-                            OpenMPDirectiveKind InnermostKind,
-                            const RegionCodeGenTy &CodeGen) override;
-
-  /// Emits code for teams call of the \a OutlinedFn with
-  /// variables captured in a record which address is stored in \a
-  /// CapturedStruct.
-  /// \param OutlinedFn Outlined function to be run by team masters. Type of
-  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
-  /// \param CapturedVars A pointer to the record with the references to
-  /// variables used in \a OutlinedFn function.
-  ///
-  void emitTeamsCall(CodeGenFunction &CGF, const OMPExecutableDirective &D,
-                     SourceLocation Loc, llvm::Function *OutlinedFn,
-                     ArrayRef<llvm::Value *> CapturedVars) override;
-
-  /// Emits code for parallel or serial call of the \a OutlinedFn with
-  /// variables captured in a record which address is stored in \a
-  /// CapturedStruct.
-  /// \param OutlinedFn Outlined function to be run in parallel threads. Type of
-  /// this function is void(*)(kmp_int32 *, kmp_int32, struct context_vars*).
-  /// \param CapturedVars A pointer to the record with the references to
-  /// variables used in \a OutlinedFn function.
-  /// \param IfCond Condition in the associated 'if' clause, if it was
-  /// specified, nullptr otherwise.
-  void emitParallelCall(CodeGenFunction &CGF, SourceLocation Loc,
-                        llvm::Function *OutlinedFn,
-                        ArrayRef<llvm::Value *> CapturedVars,
-                        const Expr *IfCond) override;
-
-  /// Emit an implicit/explicit barrier for OpenMP threads.
-  /// \param Kind Directive for which this implicit barrier call must be
-  /// generated. Must be OMPD_barrier for explicit barrier generation.
-  /// \param EmitChecks true if need to emit checks for cancellation barriers.
-  /// \param ForceSimpleCall true simple barrier call must be emitted, false if
-  /// runtime class decides which one to emit (simple or with cancellation
-  /// checks).
-  ///
-  void emitBarrierCall(CodeGenFunction &CGF, SourceLocation Loc,
-                       OpenMPDirectiveKind Kind, bool EmitChecks = true,
-                       bool ForceSimpleCall = false) override;
-
-  /// Emits a critical region.
-  /// \param CriticalName Name of the critical region.
-  /// \param CriticalOpGen Generator for the statement associated with the given
-  /// critical region.
-  /// \param Hint Value of the 'hint' clause (optional).
-  void emitCriticalRegion(CodeGenFunction &CGF, StringRef CriticalName,
-                          const RegionCodeGenTy &CriticalOpGen,
-                          SourceLocation Loc,
-                          const Expr *Hint = nullptr) override;
-
-  /// Emit a code for reduction clause.
-  ///
-  /// \param Privates List of private copies for original reduction arguments.
-  /// \param LHSExprs List of LHS in \a ReductionOps reduction operations.
-  /// \param RHSExprs List of RHS in \a ReductionOps reduction operations.
-  /// \param ReductionOps List of reduction operations in form 'LHS binop RHS'
-  /// or 'operator binop(LHS, RHS)'.
-  /// \param Options List of options for reduction codegen:
-  ///     WithNowait true if parent directive has also nowait clause, false
-  ///     otherwise.
-  ///     SimpleReduction Emit reduction operation only. Used for omp simd
-  ///     directive on the host.
-  ///     ReductionKind The kind of reduction to perform.
-  virtual void emitReduction(CodeGenFunction &CGF, SourceLocation Loc,
-                             ArrayRef<const Expr *> Privates,
-                             ArrayRef<const Expr *> LHSExprs,
-                             ArrayRef<const Expr *> RHSExprs,
-                             ArrayRef<const Expr *> ReductionOps,
-                             ReductionOptionsTy Options) override;
-
-  /// Returns specified OpenMP runtime function for the current OpenMP
-  /// implementation.  Specialized for the NVPTX device.
-  /// \param Function OpenMP runtime function.
-  /// \return Specified function.
-  llvm::FunctionCallee createNVPTXRuntimeFunction(unsigned Function);
-
-  /// Translates the native parameter of outlined function if this is required
-  /// for target.
-  /// \param FD Field decl from captured record for the parameter.
-  /// \param NativeParam Parameter itself.
-  const VarDecl *translateParameter(const FieldDecl *FD,
-                                    const VarDecl *NativeParam) const override;
-
-  /// Gets the address of the native argument basing on the address of the
-  /// target-specific parameter.
-  /// \param NativeParam Parameter itself.
-  /// \param TargetParam Corresponding target-specific parameter.
-  Address getParameterAddress(CodeGenFunction &CGF, const VarDecl *NativeParam,
-                              const VarDecl *TargetParam) const override;
-
-  /// Emits call of the outlined function with the provided arguments,
-  /// translating these arguments to correct target-specific arguments.
-  void emitOutlinedFunctionCall(
-      CodeGenFunction &CGF, SourceLocation Loc, llvm::FunctionCallee OutlinedFn,
-      ArrayRef<llvm::Value *> Args = llvm::None) const override;
-
-  /// Emits OpenMP-specific function prolog.
-  /// Required for device constructs.
-  void emitFunctionProlog(CodeGenFunction &CGF, const Decl *D) override;
-
-  /// Gets the OpenMP-specific address of the local variable.
-  Address getAddressOfLocalVariable(CodeGenFunction &CGF,
-                                    const VarDecl *VD) override;
-
-  /// Target codegen is specialized based on two data-sharing modes: CUDA, in
-  /// which the local variables are actually global threadlocal, and Generic, in
-  /// which the local variables are placed in global memory if they may escape
-  /// their declaration context.
-  enum DataSharingMode {
-    /// CUDA data sharing mode.
-    CUDA,
-    /// Generic data-sharing mode.
-    Generic,
-  };
-
-  /// Cleans up references to the objects in finished function.
-  ///
-  void functionFinished(CodeGenFunction &CGF) override;
-
-  /// Choose a default value for the dist_schedule clause.
-  void getDefaultDistScheduleAndChunk(CodeGenFunction &CGF,
-      const OMPLoopDirective &S, OpenMPDistScheduleClauseKind &ScheduleKind,
-      llvm::Value *&Chunk) const override;
-
-  /// Choose a default value for the schedule clause.
-  void getDefaultScheduleAndChunk(CodeGenFunction &CGF,
-      const OMPLoopDirective &S, OpenMPScheduleClauseKind &ScheduleKind,
-      const Expr *&ChunkExpr) const override;
-
-  /// Adjust some parameters for the target-based directives, like addresses of
-  /// the variables captured by reference in lambdas.
-  void adjustTargetSpecificDataForLambdas(
-      CodeGenFunction &CGF, const OMPExecutableDirective &D) const override;
-
-  /// Perform check on requires decl to ensure that target architecture
-  /// supports unified addressing
-  void processRequiresDirective(const OMPRequiresDecl *D) override;
-
-  /// Returns default address space for the constant firstprivates, __constant__
-  /// address space by default.
-  unsigned getDefaultFirstprivateAddressSpace() const override;
-
-  /// Checks if the variable has associated OMPAllocateDeclAttr attribute with
-  /// the predefined allocator and translates it into the corresponding address
-  /// space.
-  bool hasAllocateAttributeForGlobalVar(const VarDecl *VD, LangAS &AS) override;
-
-private:
-  /// Track the execution mode when codegening directives within a target
-  /// region. The appropriate mode (SPMD/NON-SPMD) is set on entry to the
-  /// target region and used by containing directives such as 'parallel'
-  /// to emit optimized code.
-  ExecutionMode CurrentExecutionMode = EM_Unknown;
-
-  /// Check if the full runtime is required (default - yes).
-  bool RequiresFullRuntime = true;
-
-  /// true if we're emitting the code for the target region and next parallel
-  /// region is L0 for sure.
-  bool IsInTargetMasterThreadRegion = false;
-  /// true if currently emitting code for target/teams/distribute region, false
-  /// - otherwise.
-  bool IsInTTDRegion = false;
-  /// true if we're definitely in the parallel region.
-  bool IsInParallelRegion = false;
-
-  /// Map between an outlined function and its wrapper.
-  llvm::DenseMap<llvm::Function *, llvm::Function *> WrapperFunctionsMap;
-
-  /// Emit function which wraps the outline parallel region
-  /// and controls the parameters which are passed to this function.
-  /// The wrapper ensures that the outlined function is called
-  /// with the correct arguments when data is shared.
-  llvm::Function *createParallelDataSharingWrapper(
-      llvm::Function *OutlinedParallelFn, const OMPExecutableDirective &D);
-
-  /// The data for the single globalized variable.
-  struct MappedVarData {
-    /// Corresponding field in the global record.
-    const FieldDecl *FD = nullptr;
-    /// Corresponding address.
-    Address PrivateAddr = Address::invalid();
-    /// true, if only one element is required (for latprivates in SPMD mode),
-    /// false, if need to create based on the warp-size.
-    bool IsOnePerTeam = false;
-    MappedVarData() = delete;
-    MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false)
-        : FD(FD), IsOnePerTeam(IsOnePerTeam) {}
-  };
-  /// The map of local variables to their addresses in the global memory.
-  using DeclToAddrMapTy = llvm::MapVector<const Decl *, MappedVarData>;
-  /// Set of the parameters passed by value escaping OpenMP context.
-  using EscapedParamsTy = llvm::SmallPtrSet<const Decl *, 4>;
-  struct FunctionData {
-    DeclToAddrMapTy LocalVarData;
-    llvm::Optional<DeclToAddrMapTy> SecondaryLocalVarData = llvm::None;
-    EscapedParamsTy EscapedParameters;
-    llvm::SmallVector<const ValueDecl*, 4> EscapedVariableLengthDecls;
-    llvm::SmallVector<llvm::Value *, 4> EscapedVariableLengthDeclsAddrs;
-    const RecordDecl *GlobalRecord = nullptr;
-    llvm::Optional<const RecordDecl *> SecondaryGlobalRecord = llvm::None;
-    llvm::Value *GlobalRecordAddr = nullptr;
-    llvm::Value *IsInSPMDModeFlag = nullptr;
-    std::unique_ptr<CodeGenFunction::OMPMapVars> MappedParams;
-  };
-  /// Maps the function to the list of the globalized variables with their
-  /// addresses.
-  llvm::SmallDenseMap<llvm::Function *, FunctionData> FunctionGlobalizedDecls;
-  /// List of records for the globalized variables in target/teams/distribute
-  /// contexts. Inner records are going to be joined into the single record,
-  /// while those resulting records are going to be joined into the single
-  /// union. This resulting union (one per CU) is the entry point for the static
-  /// memory management runtime functions.
-  struct GlobalPtrSizeRecsTy {
-    llvm::GlobalVariable *UseSharedMemory = nullptr;
-    llvm::GlobalVariable *RecSize = nullptr;
-    llvm::GlobalVariable *Buffer = nullptr;
-    SourceLocation Loc;
-    llvm::SmallVector<const RecordDecl *, 2> Records;
-    unsigned RegionCounter = 0;
-  };
-  llvm::SmallVector<GlobalPtrSizeRecsTy, 8> GlobalizedRecords;
-  llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr;
-  /// List of the records with the list of fields for the reductions across the
-  /// teams. Used to build the intermediate buffer for the fast teams
-  /// reductions.
-  /// All the records are gathered into a union `union.type` is created.
-  llvm::SmallVector<const RecordDecl *, 4> TeamsReductions;
-  /// Shared pointer for the global memory in the global memory buffer used for
-  /// the given kernel.
-  llvm::GlobalVariable *KernelStaticGlobalized = nullptr;
-  /// Pair of the Non-SPMD team and all reductions variables in this team
-  /// region.
-  std::pair<const Decl *, llvm::SmallVector<const ValueDecl *, 4>>
-      TeamAndReductions;
+  llvm::Value *getGPUWarpSize(CodeGenFunction &CGF);
 };
 
 } // CodeGen namespace.
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index c300c1b021f39..c4bedf34921cc 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -71,6 +71,7 @@ add_clang_library(clangCodeGen
   CGObjCRuntime.cpp
   CGOpenCLRuntime.cpp
   CGOpenMPRuntime.cpp
+  CGOpenMPRuntimeGPU.cpp
   CGOpenMPRuntimeNVPTX.cpp
   CGRecordLayoutBuilder.cpp
   CGStmt.cpp

From 8475981b309165fbcc12b764ec5aee2ca13c2ea3 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Fri, 17 Jul 2020 14:38:56 +0000
Subject: [PATCH 635/771] [gn build] Port c7562e77b3a

---
 llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn | 1 +
 1 file changed, 1 insertion(+)

diff --git a/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn
index 84dbfb0a5540c..c290312bd67ea 100644
--- a/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/CodeGen/BUILD.gn
@@ -63,6 +63,7 @@ static_library("CodeGen") {
     "CGObjCRuntime.cpp",
     "CGOpenCLRuntime.cpp",
     "CGOpenMPRuntime.cpp",
+    "CGOpenMPRuntimeGPU.cpp",
     "CGOpenMPRuntimeNVPTX.cpp",
     "CGRecordLayoutBuilder.cpp",
     "CGStmt.cpp",

From 31d71c69f1b1da99f99073785a0292820ec34045 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Fri, 17 Jul 2020 15:50:47 +0100
Subject: [PATCH 636/771] [Matrix] Only run matrix lowering early with -O0.

Currently matrix lowering is run twice if OptLevel > 0. Fix that and
also add a test for OptLevel > 0 with matrix lowering enabled.
---
 .../lib/Transforms/IPO/PassManagerBuilder.cpp |   2 +-
 .../Other/opt-O3-pipeline-enable-matrix.ll    | 346 ++++++++++++++++++
 2 files changed, 347 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/Other/opt-O3-pipeline-enable-matrix.ll

diff --git a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
index f5ba86b04ed06..a109d69d0f5e9 100644
--- a/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/llvm/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -298,7 +298,7 @@ void PassManagerBuilder::populateFunctionPassManager(
   // Make sure they are also lowered in O0.
   // FIXME: A lightweight version of the pass should run in the backend
   //        pipeline on demand.
-  if (EnableMatrix)
+  if (EnableMatrix && OptLevel == 0)
     FPM.add(createLowerMatrixIntrinsicsPass());
 
   if (OptLevel == 0) return;
diff --git a/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
new file mode 100644
index 0000000000000..a0b7a8f5e1e3d
--- /dev/null
+++ b/llvm/test/Other/opt-O3-pipeline-enable-matrix.ll
@@ -0,0 +1,346 @@
+; RUN: opt -O3 -enable-matrix -debug-pass=Structure < %s -o /dev/null 2>&1 | FileCheck %s
+
+; REQUIRES: asserts
+
+; CHECK-LABEL: Pass Arguments:
+; CHECK-NEXT: Target Transform Information
+; CHECK-NEXT: Type-Based Alias Analysis
+; CHECK-NEXT: Scoped NoAlias Alias Analysis
+; CHECK-NEXT: Assumption Cache Tracker
+; CHECK-NEXT: Target Library Information
+; CHECK-NEXT:   FunctionPass Manager
+; CHECK-NEXT:     Module Verifier
+; CHECK-EXT:      Good Bye World Pass
+; CHECK-NOEXT-NOT:      Good Bye World Pass
+; CHECK-NEXT:     Instrument function entry/exit with calls to e.g. mcount() (pre inlining)
+; CHECK-NEXT:     Simplify the CFG
+; CHECK-NEXT:     Dominator Tree Construction
+; CHECK-NEXT:     SROA
+; CHECK-NEXT:     Early CSE
+; CHECK-NEXT:     Lower 'expect' Intrinsics
+; CHECK-NEXT: Pass Arguments:
+; CHECK-NEXT: Target Library Information
+; CHECK-NEXT: Target Transform Information
+;             Target Pass Configuration
+; CHECK:      Type-Based Alias Analysis
+; CHECK-NEXT: Scoped NoAlias Alias Analysis
+; CHECK-NEXT: Assumption Cache Tracker
+; CHECK-NEXT: Profile summary info
+; CHECK-NEXT:   ModulePass Manager
+; CHECK-NEXT:     Force set function attributes
+; CHECK-NEXT:     Infer set function attributes
+; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Dominator Tree Construction
+; CHECK-NEXT:       Call-site splitting
+; CHECK-NEXT:     Interprocedural Sparse Conditional Constant Propagation
+; CHECK-NEXT:       FunctionPass Manager
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:     Called Value Propagation
+; CHECK-NEXT:     Global Variable Optimizer
+; CHECK-NEXT:       FunctionPass Manager
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Post-Dominator Tree Construction
+; CHECK-NEXT:         Branch Probability Analysis
+; CHECK-NEXT:         Block Frequency Analysis
+; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Dominator Tree Construction
+; CHECK-NEXT:       Promote Memory to Register
+; CHECK-NEXT:     Dead Argument Elimination
+; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Dominator Tree Construction
+; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:       Function Alias Analysis Results
+; CHECK-NEXT:       Natural Loop Information
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Combine redundant instructions
+; CHECK-NEXT:       Simplify the CFG
+; CHECK-NEXT:     CallGraph Construction
+; CHECK-NEXT:     Globals Alias Analysis
+; CHECK-NEXT:     Call Graph SCC Pass Manager
+; CHECK-NEXT:       Remove unused exception handling info
+; CHECK-NEXT:       Function Integration/Inlining
+; CHECK-NEXT:       OpenMP specific optimizations
+; CHECK-NEXT:       Deduce function attributes
+; CHECK-NEXT:       Promote 'by reference' arguments to scalars
+; CHECK-NEXT:       FunctionPass Manager
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         SROA
+; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Memory SSA
+; CHECK-NEXT:         Early CSE w/ MemorySSA
+; CHECK-NEXT:         Speculatively execute instructions if target has divergent branches
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Lazy Value Information Analysis
+; CHECK-NEXT:         Jump Threading
+; CHECK-NEXT:         Value Propagation
+; CHECK-NEXT:         Simplify the CFG
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         Combine pattern based expressions
+; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:         Optimization Remark Emitter
+; CHECK-NEXT:         Combine redundant instructions
+; CHECK-NEXT:         Conditionally eliminate dead library calls
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Post-Dominator Tree Construction
+; CHECK-NEXT:         Branch Probability Analysis
+; CHECK-NEXT:         Block Frequency Analysis
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:         Optimization Remark Emitter
+; CHECK-NEXT:         PGOMemOPSize
+; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:         Optimization Remark Emitter
+; CHECK-NEXT:         Tail Call Elimination
+; CHECK-NEXT:         Simplify the CFG
+; CHECK-NEXT:         Reassociate expressions
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Canonicalize natural loops
+; CHECK-NEXT:         LCSSA Verifier
+; CHECK-NEXT:         Loop-Closed SSA Form Pass
+; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Scalar Evolution Analysis
+; CHECK-NEXT:         Loop Pass Manager
+; CHECK-NEXT:           Rotate Loops
+; CHECK-NEXT:         Memory SSA
+; CHECK-NEXT:         Loop Pass Manager
+; CHECK-NEXT:           Loop Invariant Code Motion
+; CHECK-NEXT:           Unswitch loops
+; CHECK-NEXT:         Simplify the CFG
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:         Optimization Remark Emitter
+; CHECK-NEXT:         Combine redundant instructions
+; CHECK-NEXT:         Canonicalize natural loops
+; CHECK-NEXT:         LCSSA Verifier
+; CHECK-NEXT:         Loop-Closed SSA Form Pass
+; CHECK-NEXT:         Scalar Evolution Analysis
+; CHECK-NEXT:         Loop Pass Manager
+; CHECK-NEXT:           Induction Variable Simplification
+; CHECK-NEXT:           Recognize loop idioms
+; CHECK-NEXT:           Delete dead loops
+; CHECK-NEXT:           Unroll loops
+; CHECK-NEXT:         MergedLoadStoreMotion
+; CHECK-NEXT:         Phi Values Analysis
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Memory Dependence Analysis
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:         Optimization Remark Emitter
+; CHECK-NEXT:         Global Value Numbering
+; CHECK-NEXT:         Phi Values Analysis
+; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Memory Dependence Analysis
+; CHECK-NEXT:         MemCpy Optimization
+; CHECK-NEXT:         Sparse Conditional Constant Propagation
+; CHECK-NEXT:         Demanded bits analysis
+; CHECK-NEXT:         Bit-Tracking Dead Code Elimination
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:         Optimization Remark Emitter
+; CHECK-NEXT:         Combine redundant instructions
+; CHECK-NEXT:         Lazy Value Information Analysis
+; CHECK-NEXT:         Jump Threading
+; CHECK-NEXT:         Value Propagation
+; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Phi Values Analysis
+; CHECK-NEXT:         Memory Dependence Analysis
+; CHECK-NEXT:         Dead Store Elimination
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Memory SSA
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Canonicalize natural loops
+; CHECK-NEXT:         LCSSA Verifier
+; CHECK-NEXT:         Loop-Closed SSA Form Pass
+; CHECK-NEXT:         Scalar Evolution Analysis
+; CHECK-NEXT:         Loop Pass Manager
+; CHECK-NEXT:           Loop Invariant Code Motion
+; CHECK-NEXT:         Post-Dominator Tree Construction
+; CHECK-NEXT:         Aggressive Dead Code Elimination
+; CHECK-NEXT:         Simplify the CFG
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:         Function Alias Analysis Results
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:         Optimization Remark Emitter
+; CHECK-NEXT:         Combine redundant instructions
+; CHECK-NEXT:     A No-Op Barrier Pass
+; CHECK-NEXT:     Eliminate Available Externally Globals
+; CHECK-NEXT:     CallGraph Construction
+; CHECK-NEXT:     Deduce function attributes in RPO
+; CHECK-NEXT:     Global Variable Optimizer
+; CHECK-NEXT:       FunctionPass Manager
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Post-Dominator Tree Construction
+; CHECK-NEXT:         Branch Probability Analysis
+; CHECK-NEXT:         Block Frequency Analysis
+; CHECK-NEXT:     Dead Global Elimination
+; CHECK-NEXT:     CallGraph Construction
+; CHECK-NEXT:     Globals Alias Analysis
+; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Dominator Tree Construction
+; CHECK-NEXT:       Float to int
+; CHECK-NEXT:       Lower constant intrinsics
+; CHECK-NEXT:       Dominator Tree Construction
+; CHECK-NEXT:       Natural Loop Information
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:       Function Alias Analysis Results
+; CHECK-NEXT:       Lower the matrix intrinsics
+; CHECK-NEXT:       Early CSE
+; CHECK-NEXT:       Canonicalize natural loops
+; CHECK-NEXT:       LCSSA Verifier
+; CHECK-NEXT:       Loop-Closed SSA Form Pass
+; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:       Function Alias Analysis Results
+; CHECK-NEXT:       Scalar Evolution Analysis
+; CHECK-NEXT:       Loop Pass Manager
+; CHECK-NEXT:         Rotate Loops
+; CHECK-NEXT:       Loop Access Analysis
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Loop Distribution
+; CHECK-NEXT:       Post-Dominator Tree Construction
+; CHECK-NEXT:       Branch Probability Analysis
+; CHECK-NEXT:       Block Frequency Analysis
+; CHECK-NEXT:       Scalar Evolution Analysis
+; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:       Function Alias Analysis Results
+; CHECK-NEXT:       Loop Access Analysis
+; CHECK-NEXT:       Demanded bits analysis
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Inject TLI Mappings
+; CHECK-NEXT:       Loop Vectorization
+; CHECK-NEXT:       Canonicalize natural loops
+; CHECK-NEXT:       Scalar Evolution Analysis
+; CHECK-NEXT:       Function Alias Analysis Results
+; CHECK-NEXT:       Loop Access Analysis
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Loop Load Elimination
+; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:       Function Alias Analysis Results
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Combine redundant instructions
+; CHECK-NEXT:       Simplify the CFG
+; CHECK-NEXT:       Dominator Tree Construction
+; CHECK-NEXT:       Natural Loop Information
+; CHECK-NEXT:       Scalar Evolution Analysis
+; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:       Function Alias Analysis Results
+; CHECK-NEXT:       Demanded bits analysis
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Inject TLI Mappings
+; CHECK-NEXT:       SLP Vectorizer
+; CHECK-NEXT:       Optimize scalar/vector ops
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Combine redundant instructions
+; CHECK-NEXT:       Canonicalize natural loops
+; CHECK-NEXT:       LCSSA Verifier
+; CHECK-NEXT:       Loop-Closed SSA Form Pass
+; CHECK-NEXT:       Scalar Evolution Analysis
+; CHECK-NEXT:       Loop Pass Manager
+; CHECK-NEXT:         Unroll loops
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Combine redundant instructions
+; CHECK-NEXT:       Memory SSA
+; CHECK-NEXT:       Canonicalize natural loops
+; CHECK-NEXT:       LCSSA Verifier
+; CHECK-NEXT:       Loop-Closed SSA Form Pass
+; CHECK-NEXT:       Scalar Evolution Analysis
+; CHECK-NEXT:       Loop Pass Manager
+; CHECK-NEXT:         Loop Invariant Code Motion
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Warn about non-applied transformations
+; CHECK-NEXT:       Alignment from assumptions
+; CHECK-NEXT:     Strip Unused Function Prototypes
+; CHECK-NEXT:     Dead Global Elimination
+; CHECK-NEXT:     Merge Duplicate Global Constants
+; CHECK-NEXT:     Call Graph Profile
+; CHECK-NEXT:       FunctionPass Manager
+; CHECK-NEXT:         Dominator Tree Construction
+; CHECK-NEXT:         Natural Loop Information
+; CHECK-NEXT:         Lazy Branch Probability Analysis
+; CHECK-NEXT:         Lazy Block Frequency Analysis
+; CHECK-NEXT:     FunctionPass Manager
+; CHECK-NEXT:       Dominator Tree Construction
+; CHECK-NEXT:       Natural Loop Information
+; CHECK-NEXT:       Post-Dominator Tree Construction
+; CHECK-NEXT:       Branch Probability Analysis
+; CHECK-NEXT:       Block Frequency Analysis
+; CHECK-NEXT:       Canonicalize natural loops
+; CHECK-NEXT:       LCSSA Verifier
+; CHECK-NEXT:       Loop-Closed SSA Form Pass
+; CHECK-NEXT:       Basic Alias Analysis (stateless AA impl)
+; CHECK-NEXT:       Function Alias Analysis Results
+; CHECK-NEXT:       Scalar Evolution Analysis
+; CHECK-NEXT:       Block Frequency Analysis
+; CHECK-NEXT:       Loop Pass Manager
+; CHECK-NEXT:         Loop Sink
+; CHECK-NEXT:       Lazy Branch Probability Analysis
+; CHECK-NEXT:       Lazy Block Frequency Analysis
+; CHECK-NEXT:       Optimization Remark Emitter
+; CHECK-NEXT:       Remove redundant instructions
+; CHECK-NEXT:       Hoist/decompose integer division and remainder
+; CHECK-NEXT:       Simplify the CFG
+; CHECK-NEXT:       Module Verifier
+; CHECK-NEXT:     Bitcode Writer
+; CHECK-NEXT: Pass Arguments:
+; CHECK-NEXT:  FunctionPass Manager
+; CHECK-NEXT:     Dominator Tree Construction
+; CHECK-NEXT: Pass Arguments:
+; CHECK-NEXT: Target Library Information
+; CHECK-NEXT:   FunctionPass Manager
+; CHECK-NEXT:     Dominator Tree Construction
+; CHECK-NEXT:     Natural Loop Information
+; CHECK-NEXT:     Post-Dominator Tree Construction
+; CHECK-NEXT:     Branch Probability Analysis
+; CHECK-NEXT:     Block Frequency Analysis
+; CHECK-NEXT: Pass Arguments:
+; CHECK-NEXT: Target Library Information
+; CHECK-NEXT:   FunctionPass Manager
+; CHECK-NEXT:     Dominator Tree Construction
+; CHECK-NEXT:     Natural Loop Information
+; CHECK-NEXT:     Post-Dominator Tree Construction
+; CHECK-NEXT:     Branch Probability Analysis
+; CHECK-NEXT:     Block Frequency Analysis
+
+define void @f() {
+  ret void
+}

From 7ebc6bed849a75ffcabe9ff114b6dddd3ee17260 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Fri, 17 Jul 2020 15:13:26 +0100
Subject: [PATCH 637/771] [ARM][MVE] Reorg of the LV tail-folding tests

It was getting difficult to see which test was in which file, so this
reorganises the test files so that now all filenames start with tail-folding-*
followed by a more descriptive name what that group of tests check.
---
 .../ARM/prefer-tail-loop-folding.ll           | 235 -----------
 .../LoopVectorize/ARM/tail-folding-allowed.ll |  36 ++
 .../ARM/tail-folding-loop-hint.ll             |  80 ++++
 ...folding.ll => tail-folding-not-allowed.ll} | 370 ++++++++++--------
 .../ARM/tail-folding-prefer-flag.ll           | 108 +++++
 .../ARM/tail-folding-reductions-allowed.ll    |  43 ++
 6 files changed, 475 insertions(+), 397 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll
 rename llvm/test/Transforms/LoopVectorize/ARM/{tail-loop-folding.ll => tail-folding-not-allowed.ll} (59%)
 create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll

diff --git a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
index ac6bb56ff5f80..9473d4ed0dc30 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/prefer-tail-loop-folding.ll
@@ -113,32 +113,6 @@ for.body:
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @unsupported_i64_type(i64* noalias nocapture %A, i64* noalias nocapture readonly %B, i64* noalias nocapture readonly %C) #0 {
-; CHECK-LABEL:        unsupported_i64_type(
-; PREFER-FOLDING-NOT: vector.body:
-; PREFER-FOLDING-NOT: llvm.masked.load
-; PREFER-FOLDING-NOT: llvm.masked.store
-; PREFER-FOLDING:     for.body:
-entry:
-  br label %for.body
-
-for.cond.cleanup:
-  ret void
-
-for.body:
-  %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i64, i64* %B, i32 %i.09
-  %0 = load i64, i64* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds i64, i64* %C, i32 %i.09
-  %1 = load i64, i64* %arrayidx1, align 8
-  %add = add nsw i64 %1, %0
-  %arrayidx2 = getelementptr inbounds i64, i64* %A, i32 %i.09
-  store i64 %add, i64* %arrayidx2, align 8
-  %add3 = add nuw nsw i32 %i.09, 1
-  %exitcond = icmp eq i32 %add3, 431
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
 define void @zero_extending_load_allowed(i32* noalias nocapture %A, i8* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    zero_extending_load_allowed(
 ; PREFER-FOLDING: vector.body:
@@ -195,39 +169,6 @@ for.body:
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @narrowing_load_not_allowed(i8* noalias nocapture %A, i8* noalias nocapture readonly %B, i16* noalias nocapture readonly %C) #0 {
-; CHECK-LABEL:        narrowing_load_not_allowed(
-; PREFER-FOLDING:     vector.body:
-; PREFER-FOLDING-NOT: llvm.masked.load
-; PREFER-FOLDING-NOT: llvm.masked.store
-; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %vector.body
-
-; FOLDING-OPT:        vector.body:
-; FOLDING-OPT:        call <8 x i16> @llvm.masked.load.v8i16.p0v8i16
-; FOLDING-OPT:        call <8 x i8> @llvm.masked.load.v8i8.p0v8i8
-; FOLDING-OPT:        call void @llvm.masked.store.v8i8.p0v8i8
-; FOLDING-OPT:        br i1 %{{.*}}, label %{{.*}}, label %vector.body
-entry:
-  br label %for.body
-
-for.cond.cleanup:                                 ; preds = %for.body
-  ret void
-
-for.body:                                         ; preds = %for.body, %entry
-  %i.012 = phi i32 [ 0, %entry ], [ %add6, %for.body ]
-  %arrayidx = getelementptr inbounds i16, i16* %C, i32 %i.012
-  %0 = load i16, i16* %arrayidx, align 2
-  %arrayidx1 = getelementptr inbounds i8, i8* %B, i32 %i.012
-  %1 = load i8, i8* %arrayidx1, align 1
-  %conv3 = trunc i16 %0 to i8
-  %add = add i8 %1, %conv3
-  %arrayidx5 = getelementptr inbounds i8, i8* %A, i32 %i.012
-  store i8 %add, i8* %arrayidx5, align 1
-  %add6 = add nuw nsw i32 %i.012, 1
-  %exitcond = icmp eq i32 %add6, 431
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
 define void @narrowing_store_allowed(i8* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    narrowing_store_allowed(
 ; PREFER-FOLDING: call void @llvm.masked.store.v4i8.p0v4i8
@@ -253,70 +194,6 @@ for.body:
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-; This is a trunc not connected to a store, so we don't allow this.
-; TODO: this is conservative, because the trunc is only used in the
-; loop control statements, and thus not affecting element sizes, so
-; we could allow this case.
-define void @trunc_not_allowed(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
-; CHECK-LABEL:        trunc_not_allowed(
-; PREFER-FOLDING:     vector.body:
-; PREFER-FOLDING-NOT: llvm.masked.load
-; PREFER-FOLDING-NOT: llvm.masked.store
-; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %vector.body
-entry:
-  br label %for.body
-
-for.cond.cleanup:
-  ret void
-
-for.body:
-  %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
-  %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
-  %add3 = add nuw nsw i32 %i.09, 1
-
-  %add.iv = trunc i32 %add3 to i16
-
-  %exitcond = icmp eq i16 %add.iv, 431
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
-define void @trunc_not_allowed_different_vec_elemns(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i16* noalias nocapture %D) #0 {
-; CHECK-LABEL:        trunc_not_allowed_different_vec_elemns(
-; PREFER-FOLDING:     vector.body:
-; PREFER-FOLDING-NOT: llvm.masked.load
-; PREFER-FOLDING-NOT: llvm.masked.store
-; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %vector.body
-entry:
-  br label %for.body
-
-for.cond.cleanup:
-  ret void
-
-for.body:
-  %i.021 = phi i32 [ 0, %entry ], [ %add9, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.021
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.021
-  %1 = load i32, i32* %arrayidx1, align 4
-  %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.021
-  store i32 %add, i32* %arrayidx2, align 4
-  %add.tr = trunc i32 %add to i16
-  %conv7 = shl i16 %add.tr, 1
-  %arrayidx8 = getelementptr inbounds i16, i16* %D, i32 %i.021
-  store i16 %conv7, i16* %arrayidx8, align 2
-  %add9 = add nuw nsw i32 %i.021, 1
-  %exitcond = icmp eq i32 %add9, 431
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
-
 @tab = common global [32 x i8] zeroinitializer, align 1
 
 define i32 @icmp_not_allowed() #0 {
@@ -396,35 +273,6 @@ for.body:
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !7
 }
 
-; Test directions for array indices i and N-1. I.e. check strides 1 and -1, and
-; force vectorisation with a loop hint.
-define void @strides_different_direction(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) #0 {
-; CHECK-LABEL: strides_different_direction(
-; PREFER-FOLDING:     vector.body:
-; PREFER-FOLDING-NOT: llvm.masked.load
-; PREFER-FOLDING-NOT: llvm.masked.store
-; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %vector.body
-entry:
-  br label %for.body
-
-for.cond.cleanup:
-  ret void
-
-for.body:
-  %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %sub = sub nsw i32 %N, %i.09
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %sub
-  %1 = load i32, i32* %arrayidx1, align 4
-  %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
-  %add3 = add nuw nsw i32 %i.09, 1
-  %exitcond = icmp eq i32 %add3, 431
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !10
-}
-
 define void @stride_4(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:        stride_4(
 ; PREFER-FOLDING:     vector.body:
@@ -451,35 +299,6 @@ for.body:
   br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !5
 }
 
-define void @too_many_loop_blocks(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
-; CHECK-LABEL:        too_many_loop_blocks(
-; PREFER-FOLDING:     vector.body:
-; PREFER-FOLDING-NOT: llvm.masked.load
-; PREFER-FOLDING-NOT: llvm.masked.store
-; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %vector.body
-entry:
-  br label %for.body
-
-for.cond.cleanup:
-  ret void
-
-for.body:
-  %i.09 = phi i32 [ 0, %entry ], [ %add3, %loopincr ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
-  %1 = load i32, i32* %arrayidx1, align 4
-  %add = add nsw i32 %1, %0
-  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
-  store i32 %add, i32* %arrayidx2, align 4
-  br label %loopincr
-
-loopincr:
-  %add3 = add nuw nsw i32 %i.09, 1
-  %exitcond = icmp eq i32 %add3, 431
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
 define dso_local void @half(half* noalias nocapture %A, half* noalias nocapture readonly %B, half* noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:    half(
 ; PREFER-FOLDING: vector.body:
@@ -538,30 +357,6 @@ for.body:
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !10
 }
 
-define void @double(double* noalias nocapture %A, double* noalias nocapture readonly %B, double* noalias nocapture readonly %C) #0 {
-; CHECK-LABEL:        double(
-; PREFER-FOLDING:     for.body:
-; PREFER-FOLDING-NOT: vector.body:
-entry:
-  br label %for.body
-
-for.cond.cleanup:
-  ret void
-
-for.body:
-  %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
-  %arrayidx = getelementptr inbounds double, double* %B, i32 %i.09
-  %0 = load double, double* %arrayidx, align 8
-  %arrayidx1 = getelementptr inbounds double, double* %C, i32 %i.09
-  %1 = load double, double* %arrayidx1, align 8
-  %add = fadd fast double %1, %0
-  %arrayidx2 = getelementptr inbounds double, double* %A, i32 %i.09
-  store double %add, double* %arrayidx2, align 8
-  %add3 = add nuw nsw i32 %i.09, 1
-  %exitcond = icmp eq i32 %add3, 431
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
 define void @fpext_allowed(float* noalias nocapture %A, half* noalias nocapture readonly %B, float* noalias nocapture readonly %C) #0 {
 ; CHECK-LABEL:        fpext_allowed(
 ; PREFER-FOLDING:     vector.body:
@@ -616,36 +411,6 @@ for.body:
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define void @fptrunc_not_allowed(float* noalias nocapture %A, float* noalias nocapture readonly %B, float* noalias nocapture readonly %C, half* noalias nocapture %D) #0 {
-; CHECK-LABEL:        fptrunc_not_allowed(
-; PREFER-FOLDING-NOT: vector.body:
-; PREFER-FOLDING-NOT: llvm.masked.load
-; PREFER-FOLDING-NOT: llvm.masked.store
-; PREFER-FOLDING:     br i1 %{{.*}}, label %{{.*}}, label %for.body
-entry:
-  br label %for.body
-
-for.cond.cleanup:
-  ret void
-
-for.body:
-  %i.017 = phi i32 [ 0, %entry ], [ %add6, %for.body ]
-  %arrayidx = getelementptr inbounds float, float* %B, i32 %i.017
-  %0 = load float, float* %arrayidx, align 4
-  %arrayidx1 = getelementptr inbounds float, float* %C, i32 %i.017
-  %1 = load float, float* %arrayidx1, align 4
-  %add = fadd fast float %1, %0
-  %arrayidx2 = getelementptr inbounds float, float* %A, i32 %i.017
-  store float %add, float* %arrayidx2, align 4
-  %conv = fptrunc float %add to half
-  %factor = fmul fast half %conv, 0xH4000
-  %arrayidx5 = getelementptr inbounds half, half* %D, i32 %i.017
-  store half %factor, half* %arrayidx5, align 2
-  %add6 = add nuw nsw i32 %i.017, 1
-  %exitcond = icmp eq i32 %add6, 431
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body
-}
-
 attributes #0 = { nofree norecurse nounwind "target-features"="+armv8.1-m.main,+mve.fp" }
 
 !5 = distinct !{!5, !6}
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll
new file mode 100644
index 0000000000000..251c2175bb8b5
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-allowed.ll
@@ -0,0 +1,36 @@
+; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -S < %s | \
+; RUN:  FileCheck %s
+
+; Test that ARMTTIImpl::preferPredicateOverEpilogue triggers tail-folding.
+
+define dso_local void @f1(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) {
+; CHECK-LABEL: f1(
+; CHECK:       entry:
+; CHECK:       @llvm.get.active.lane.mask
+; CHECK:       }
+entry:
+  %cmp8 = icmp sgt i32 %N, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add nuw nsw i32 %i.09, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll
new file mode 100644
index 0000000000000..2fa7390cdca1f
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-loop-hint.ll
@@ -0,0 +1,80 @@
+; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -S < %s | \
+; RUN:  FileCheck %s
+
+; Check that loop hint predicate.enable loop can overrule the TTI hook. For
+; this test case, the TTI hook rejects tail-predication:
+;
+;   ARMHWLoops: Trip count does not fit into 32bits
+;   preferPredicateOverEpilogue: hardware-loop is not profitable.
+;
+define dso_local void @tail_folding(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) {
+; CHECK-LABEL: tail_folding(
+; CHECK:       vector.body:
+; CHECK-NOT:   call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(
+; CHECK-NOT:   call void @llvm.masked.store.v4i32.p0v4i32(
+; CHECK:       br i1 %{{.*}}, label %{{.*}}, label %vector.body
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 430
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; The same test case but now with predicate.enable = true should get
+; tail-folded.
+;
+define dso_local void @predicate_loop_hint(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) {
+; CHECK-LABEL: predicate_loop_hint(
+; CHECK:       vector.body:
+; CHECK:         %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK:         %[[ELEM0:.*]] = add i64 %index, 0
+; CHECK:         %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %[[ELEM0]], i64 429)
+; CHECK:         %[[WML1:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}<4 x i1> %active.lane.mask
+; CHECK:         %[[WML2:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}<4 x i1> %active.lane.mask
+; CHECK:         %[[ADD:.*]] = add nsw <4 x i32> %[[WML2]], %[[WML1]]
+; CHECK:         call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %[[ADD]], {{.*}}<4 x i1> %active.lane.mask
+; CHECK:         %index.next = add i64 %index, 4
+; CHECK:         br i1 %{{.*}}, label %{{.*}}, label %vector.body
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 430
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !6
+}
+
+; CHECK:      !0 = distinct !{!0, !1}
+; CHECK-NEXT: !1 = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK-NEXT: !2 = distinct !{!2, !3, !1}
+; CHECK-NEXT: !3 = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK-NEXT: !4 = distinct !{!4, !1}
+; CHECK-NEXT: !5 = distinct !{!5, !3, !1}
+
+!6 = distinct !{!6, !7, !8}
+!7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
+!8 = !{!"llvm.loop.vectorize.enable", i1 true}
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll
similarity index 59%
rename from llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
rename to llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll
index c6b415ff9cd49..e86f101854413 100644
--- a/llvm/test/Transforms/LoopVectorize/ARM/tail-loop-folding.ll
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll
@@ -1,29 +1,43 @@
-; RUN: opt < %s -loop-vectorize -tail-predication=enabled -S | \
-; RUN:  FileCheck %s -check-prefixes=COMMON,CHECK
+; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp \
+; RUN:   -tail-predication=enabled -loop-vectorize -S < %s | \
+; RUN:   FileCheck %s
 
-; RUN: opt < %s -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S | \
-; RUN:   FileCheck -check-prefixes=COMMON,PREDFLAG %s
-
-; RUN: opt < %s -loop-vectorize -tail-predication=enabled-no-reductions -S | \
-; RUN:  FileCheck %s -check-prefixes=COMMON,NORED
-
-; RUN: opt < %s -loop-vectorize -tail-predication=force-enabled-no-reductions -S | \
-; RUN:  FileCheck %s -check-prefixes=COMMON,NORED
+define void @trunc_not_allowed_different_vec_elemns(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i16* noalias nocapture %D) #0 {
+; CHECK-LABEL: trunc_not_allowed_different_vec_elemns(
+; CHECK:       vector.body:
+; CHECK-NOT:   llvm.masked.load
+; CHECK-NOT:   llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label %{{.*}}, label %vector.body
+entry:
+  br label %for.body
 
+for.cond.cleanup:
+  ret void
 
-target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
-target triple = "thumbv8.1m.main-arm-unknown-eabihf"
+for.body:
+  %i.021 = phi i32 [ 0, %entry ], [ %add9, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.021
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.021
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.021
+  store i32 %add, i32* %arrayidx2, align 4
+  %add.tr = trunc i32 %add to i16
+  %conv7 = shl i16 %add.tr, 1
+  %arrayidx8 = getelementptr inbounds i16, i16* %D, i32 %i.021
+  store i16 %conv7, i16* %arrayidx8, align 2
+  %add9 = add nuw nsw i32 %i.021, 1
+  %exitcond = icmp eq i32 %add9, 431
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
 
-define dso_local void @tail_folding(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
-; CHECK-LABEL: tail_folding(
-; CHECK: vector.body:
-;
-; This needs implementation of TTI::preferPredicateOverEpilogue,
-; then this will be tail-folded too:
-;
-; CHECK-NOT:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(
-; CHECK-NOT:  call void @llvm.masked.store.v4i32.p0v4i32(
-; CHECK:      br i1 %{{.*}}, label %{{.*}}, label %vector.body
+define void @unsupported_i64_type(i64* noalias nocapture %A, i64* noalias nocapture readonly %B, i64* noalias nocapture readonly %C) #0 {
+; CHECK-LABEL: unsupported_i64_type(
+; CHECK-NOT:   vector.body:
+; CHECK-NOT:   llvm.masked.load
+; CHECK-NOT:   llvm.masked.store
+; CHECK:       for.body:
 entry:
   br label %for.body
 
@@ -31,32 +45,57 @@ for.cond.cleanup:
   ret void
 
 for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
-  %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 430
+  %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %arrayidx = getelementptr inbounds i64, i64* %B, i32 %i.09
+  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds i64, i64* %C, i32 %i.09
+  %1 = load i64, i64* %arrayidx1, align 8
+  %add = add nsw i64 %1, %0
+  %arrayidx2 = getelementptr inbounds i64, i64* %A, i32 %i.09
+  store i64 %add, i64* %arrayidx2, align 8
+  %add3 = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
+define void @narrowing_load_not_allowed(i8* noalias nocapture %A, i8* noalias nocapture readonly %B, i16* noalias nocapture readonly %C) #0 {
+; CHECK-LABEL: narrowing_load_not_allowed(
+; CHECK:       vector.body:
+; CHECK-NOT:   llvm.masked.load
+; CHECK-NOT:   llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label %{{.*}}, label %vector.body
+entry:
+  br label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body
+  ret void
+
+for.body:                                         ; preds = %for.body, %entry
+  %i.012 = phi i32 [ 0, %entry ], [ %add6, %for.body ]
+  %arrayidx = getelementptr inbounds i16, i16* %C, i32 %i.012
+  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx1 = getelementptr inbounds i8, i8* %B, i32 %i.012
+  %1 = load i8, i8* %arrayidx1, align 1
+  %conv3 = trunc i16 %0 to i8
+  %add = add i8 %1, %conv3
+  %arrayidx5 = getelementptr inbounds i8, i8* %A, i32 %i.012
+  store i8 %add, i8* %arrayidx5, align 1
+  %add6 = add nuw nsw i32 %i.012, 1
+  %exitcond = icmp eq i32 %add6, 431
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
 
-define dso_local void @tail_folding_enabled(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) local_unnamed_addr #0 {
-; COMMON-LABEL: tail_folding_enabled(
-; COMMON: vector.body:
-; COMMON:   %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; COMMON:   %[[ELEM0:.*]] = add i64 %index, 0
-; COMMON:   %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %[[ELEM0]], i64 429)
-; COMMON:   %[[WML1:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}<4 x i1> %active.lane.mask
-; COMMON:   %[[WML2:.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}<4 x i1> %active.lane.mask
-; COMMON:   %[[ADD:.*]] = add nsw <4 x i32> %[[WML2]], %[[WML1]]
-; COMMON:   call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %[[ADD]], {{.*}}<4 x i1> %active.lane.mask
-; COMMON:   %index.next = add i64 %index, 4
-; COMMON:   br i1 %{{.*}}, label %{{.*}}, label %vector.body
+; This is a trunc not connected to a store, so we don't allow this.
+; TODO: this is conservative, because the trunc is only used in the
+; loop control statements, and thus not affecting element sizes, so
+; we could allow this case.
+;
+define void @trunc_not_allowed(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+; CHECK-LABEL:  trunc_not_allowed(
+; CHECK:        vector.body:
+; CHECK-NOT:    llvm.masked.load
+; CHECK-NOT:    llvm.masked.store
+; CHECK:        br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
 
@@ -64,38 +103,31 @@ for.cond.cleanup:
   ret void
 
 for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
   %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
+  %1 = load i32, i32* %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 430
-  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !6
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
+  store i32 %add, i32* %arrayidx2, align 4
+  %add3 = add nuw nsw i32 %i.09, 1
+
+  %add.iv = trunc i32 %add3 to i16
+
+  %exitcond = icmp eq i16 %add.iv, 431
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define dso_local void @tail_folding_disabled(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) local_unnamed_addr #0 {
-; CHECK-LABEL: tail_folding_disabled(
-; CHECK:      vector.body:
-; CHECK-NOT:  @llvm.masked.load.v8i32.p0v8i32(
-; CHECK-NOT:  @llvm.masked.store.v8i32.p0v8i32(
-; CHECK:      br i1 %{{.*}}, label {{.*}}, label %vector.body
-
-; PREDFLAG-LABEL: tail_folding_disabled(
-; PREDFLAG:  vector.body:
-; PREDFLAG:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; PREDFLAG:  %[[ELEM0:.*]] = add i64 %index, 0
-; PREDFLAG:  %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %[[ELEM0]], i64 429)
-; PREDFLAG:  %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
-; PREDFLAG:  %wide.masked.load1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
-; PREDFLAG:  %{{.*}} = add nsw <4 x i32> %wide.masked.load1, %wide.masked.load
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
-; PREDFLAG:  %index.next = add i64 %index, 4
-; PREDFLAG:  %[[CMP:.*]] = icmp eq i64 %index.next, 432
-; PREDFLAG:  br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !6
+; Test directions for array indices i and N-1. I.e. check strides 1 and -1, and
+; force vectorisation with a loop hint.
+;
+define void @strides_different_direction(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) #0 {
+; CHECK-LABEL: strides_different_direction(
+; CHECK:       vector.body:
+; CHECK-NOT:   llvm.masked.load
+; CHECK-NOT:   llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
   br label %for.body
 
@@ -103,60 +135,34 @@ for.cond.cleanup:
   ret void
 
 for.body:
-  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
   %0 = load i32, i32* %arrayidx, align 4
-  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
+  %sub = sub nsw i32 %N, %i.09
+  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %sub
+  %1 = load i32, i32* %arrayidx1, align 4
   %add = add nsw i32 %1, %0
-  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  store i32 %add, i32* %arrayidx4, align 4
-  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
-  %exitcond = icmp eq i64 %indvars.iv.next, 430
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
+  store i32 %add, i32* %arrayidx2, align 4
+  %add3 = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %add3, 431
   br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !10
 }
 
-define dso_local void @interleave4(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
-; PREDFLAG-LABEL: interleave4(
-; PREDFLAG:  %[[ADD1:.*]] = add i32 %index, 0
-; PREDFLAG:  %[[ADD2:.*]] = add i32 %index, 4
-; PREDFLAG:  %[[ADD3:.*]] = add i32 %index, 8
-; PREDFLAG:  %[[ADD4:.*]] = add i32 %index, 12
-; PREDFLAG:  %[[BTC:.*]] = extractelement <4 x i32> %broadcast.splat, i32 0
-; PREDFLAG:  %[[ALM1:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD1]], i32 %[[BTC]])
-; PREDFLAG:  %[[ALM2:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD2]], i32 %[[BTC]])
-; PREDFLAG:  %[[ALM3:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD3]], i32 %[[BTC]])
-; PREDFLAG:  %[[ALM4:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD4]], i32 %[[BTC]])
-;
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
-; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
-;
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]])
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]])
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]])
-; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]])
-;
+define void @too_many_loop_blocks(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) #0 {
+; CHECK-LABEL: too_many_loop_blocks(
+; CHECK:       vector.body:
+; CHECK-NOT:   llvm.masked.load
+; CHECK-NOT:   llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label %{{.*}}, label %vector.body
 entry:
-  %cmp8 = icmp sgt i32 %N, 0
-  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
-
-for.body.preheader:                               ; preds = %entry
   br label %for.body
 
-for.cond.cleanup.loopexit:                        ; preds = %for.body
-  br label %for.cond.cleanup
-
-for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+for.cond.cleanup:
   ret void
 
-for.body:                                         ; preds = %for.body.preheader, %for.body
-  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+for.body:
+  %i.09 = phi i32 [ 0, %entry ], [ %add3, %loopincr ]
   %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
   %0 = load i32, i32* %arrayidx, align 4
   %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
@@ -164,43 +170,101 @@ for.body:                                         ; preds = %for.body.preheader,
   %add = add nsw i32 %1, %0
   %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
   store i32 %add, i32* %arrayidx2, align 4
-  %inc = add nuw nsw i32 %i.09, 1
-  %exitcond = icmp eq i32 %inc, %N
-  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !14
+  br label %loopincr
+
+loopincr:
+  %add3 = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %add3, 431
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
-define dso_local i32 @i32_add_reduction(i32* noalias nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
-; COMMON-LABEL: i32_add_reduction(
-; COMMON:       entry:
-; CHECK:        @llvm.get.active.lane.mask
-; NORED-NOT:    @llvm.get.active.lane.mask
-; COMMON:       }
+define void @double(double* noalias nocapture %A, double* noalias nocapture readonly %B, double* noalias nocapture readonly %C) #0 {
+; CHECK-LABEL: double(
+; CHECK:       for.body:
+; CHECK-NOT:   vector.body:
 entry:
-  %cmp6 = icmp sgt i32 %N, 0
-  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
-
-for.body.preheader:
   br label %for.body
 
-for.cond.cleanup.loopexit:
-  %add.lcssa = phi i32 [ %add, %for.body ]
-  br label %for.cond.cleanup
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %i.09 = phi i32 [ 0, %entry ], [ %add3, %for.body ]
+  %arrayidx = getelementptr inbounds double, double* %B, i32 %i.09
+  %0 = load double, double* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds double, double* %C, i32 %i.09
+  %1 = load double, double* %arrayidx1, align 8
+  %add = fadd fast double %1, %0
+  %arrayidx2 = getelementptr inbounds double, double* %A, i32 %i.09
+  store double %add, double* %arrayidx2, align 8
+  %add3 = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %add3, 431
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+define void @fptrunc_not_allowed(float* noalias nocapture %A, float* noalias nocapture readonly %B, float* noalias nocapture readonly %C, half* noalias nocapture %D) #0 {
+; CHECK-LABEL: fptrunc_not_allowed(
+; CHECK-NOT:   vector.body:
+; CHECK-NOT:   llvm.masked.load
+; CHECK-NOT:   llvm.masked.store
+; CHECK:       br i1 %{{.*}}, label %{{.*}}, label %for.body
+entry:
+  br label %for.body
 
 for.cond.cleanup:
-  %S.0.lcssa = phi i32 [ 1, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ]
-  ret i32 %S.0.lcssa
+  ret void
 
 for.body:
-  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
-  %S.07 = phi i32 [ %add, %for.body ], [ 1, %for.body.preheader ]
-  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08
-  %0 = load i32, i32* %arrayidx, align 4
-  %add = add nsw i32 %0, %S.07
-  %inc = add nuw nsw i32 %i.08, 1
-  %exitcond = icmp eq i32 %inc, %N
-  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+  %i.017 = phi i32 [ 0, %entry ], [ %add6, %for.body ]
+  %arrayidx = getelementptr inbounds float, float* %B, i32 %i.017
+  %0 = load float, float* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds float, float* %C, i32 %i.017
+  %1 = load float, float* %arrayidx1, align 4
+  %add = fadd fast float %1, %0
+  %arrayidx2 = getelementptr inbounds float, float* %A, i32 %i.017
+  store float %add, float* %arrayidx2, align 4
+  %conv = fptrunc float %add to half
+  %factor = fmul fast half %conv, 0xH4000
+  %arrayidx5 = getelementptr inbounds half, half* %D, i32 %i.017
+  store half %factor, half* %arrayidx5, align 2
+  %add6 = add nuw nsw i32 %i.017, 1
+  %exitcond = icmp eq i32 %add6, 431
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
 
+; This is a select which isn't a max or min (it isn't live-out), that we don't
+; want to tail-fold. Because this select will result in some mov lanes,
+; which aren't supported by the lowoverhead loop pass, causing the tail-predication
+; to be reverted which is expensive and what we would like to avoid.
+;
+define dso_local void @select_not_allowed(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N, i32* noalias nocapture readonly %Cond) {
+entry:
+  %cmp10 = icmp sgt i32 %N, 0
+  br i1 %cmp10, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %Cond, i32 %i.011
+  %0 = load i32, i32* %arrayidx, align 4
+  %tobool.not = icmp eq i32 %0, 0
+  %C.B = select i1 %tobool.not, i32* %C, i32* %B
+  %cond.in = getelementptr inbounds i32, i32* %C.B, i32 %i.011
+  %cond = load i32, i32* %cond.in, align 4
+  %arrayidx3 = getelementptr inbounds i32, i32* %A, i32 %i.011
+  store i32 %cond, i32* %arrayidx3, align 4
+  %inc = add nuw nsw i32 %i.011, 1
+  %exitcond.not = icmp eq i32 %inc, %N
+  br i1 %exitcond.not, label %for.cond.cleanup.loopexit, label %for.body
+}
 
 ; Don't tail-fold float reductions.
 ;
@@ -489,23 +553,5 @@ for.cond.cleanup:                                 ; preds = %for.body, %entry
   ret i32 %r.0.lcssa
 }
 
-; CHECK:      !0 = distinct !{!0, !1}
-; CHECK-NEXT: !1 = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK-NEXT: !2 = distinct !{!2, !3, !1}
-; CHECK-NEXT: !3 = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK-NEXT: !4 = distinct !{!4, !1}
-; CHECK-NEXT: !5 = distinct !{!5, !3, !1}
-; CHECK-NEXT: !6 = distinct !{!6, !1}
-
-attributes #0 = { nofree norecurse nounwind "target-features"="+armv8.1-m.main,+mve.fp" }
-
-!6 = distinct !{!6, !7, !8}
-!7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
-!8 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-!10 = distinct !{!10, !11, !12}
-!11 = !{!"llvm.loop.vectorize.predicate.enable", i1 false}
-!12 = !{!"llvm.loop.vectorize.enable", i1 true}
-
-!14 = distinct !{!14, !15}
-!15 = !{!"llvm.loop.interleave.count", i32 4}
+!10 = distinct !{!10, !11}
+!11 = !{!"llvm.loop.vectorize.width", i32 4}
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll
new file mode 100644
index 0000000000000..3261d7aa16fbb
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-prefer-flag.ll
@@ -0,0 +1,108 @@
+; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -S < %s | \
+; RUN:  FileCheck %s -check-prefix=CHECK
+
+; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -prefer-predicate-over-epilog -S < %s | \
+; RUN:   FileCheck -check-prefix=PREDFLAG %s
+
+; This test has a loop hint "predicate.predicate" set to false, so shouldn't
+; get tail-folded, except with -prefer-predicate-over-epilog which then
+; overrules this.
+;
+define dso_local void @flag_overrules_hint(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C) local_unnamed_addr #0 {
+; CHECK-LABEL: flag_overrules_hint(
+; CHECK:       vector.body:
+; CHECK-NOT:   @llvm.masked.load.v8i32.p0v8i32(
+; CHECK-NOT:   @llvm.masked.store.v8i32.p0v8i32(
+; CHECK:       br i1 %{{.*}}, label {{.*}}, label %vector.body
+
+; PREDFLAG-LABEL: flag_overrules_hint(
+; PREDFLAG:  vector.body:
+; PREDFLAG:  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; PREDFLAG:  %[[ELEM0:.*]] = add i64 %index, 0
+; PREDFLAG:  %active.lane.mask = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i64(i64 %[[ELEM0]], i64 429)
+; PREDFLAG:  %wide.masked.load = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
+; PREDFLAG:  %wide.masked.load1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
+; PREDFLAG:  %{{.*}} = add nsw <4 x i32> %wide.masked.load1, %wide.masked.load
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %active.lane.mask
+; PREDFLAG:  %index.next = add i64 %index, 4
+; PREDFLAG:  %[[CMP:.*]] = icmp eq i64 %index.next, 432
+; PREDFLAG:  br i1 %[[CMP]], label %middle.block, label %vector.body, !llvm.loop !0
+entry:
+  br label %for.body
+
+for.cond.cleanup:
+  ret void
+
+for.body:
+  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+  %1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  store i32 %add, i32* %arrayidx4, align 4
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+  %exitcond = icmp eq i64 %indvars.iv.next, 430
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !10
+}
+
+define dso_local void @interleave4(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i32* noalias nocapture readonly %C, i32 %N) local_unnamed_addr #0 {
+; PREDFLAG-LABEL: interleave4(
+; PREDFLAG:  %[[ADD1:.*]] = add i32 %index, 0
+; PREDFLAG:  %[[ADD2:.*]] = add i32 %index, 4
+; PREDFLAG:  %[[ADD3:.*]] = add i32 %index, 8
+; PREDFLAG:  %[[ADD4:.*]] = add i32 %index, 12
+; PREDFLAG:  %[[BTC:.*]] = extractelement <4 x i32> %broadcast.splat, i32 0
+; PREDFLAG:  %[[ALM1:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD1]], i32 %[[BTC]])
+; PREDFLAG:  %[[ALM2:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD2]], i32 %[[BTC]])
+; PREDFLAG:  %[[ALM3:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD3]], i32 %[[BTC]])
+; PREDFLAG:  %[[ALM4:active.lane.mask.*]] = call <4 x i1> @llvm.get.active.lane.mask.v4i1.i32(i32 %[[ADD4]], i32 %[[BTC]])
+;
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]],{{.*}}
+; PREDFLAG:  call <4 x i32> @llvm.masked.load.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]],{{.*}}
+;
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM1]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM2]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM3]])
+; PREDFLAG:  call void @llvm.masked.store.v4i32.p0v4i32({{.*}}, <4 x i1> %[[ALM4]])
+;
+entry:
+  %cmp8 = icmp sgt i32 %N, 0
+  br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:                               ; preds = %entry
+  br label %for.body
+
+for.cond.cleanup.loopexit:                        ; preds = %for.body
+  br label %for.cond.cleanup
+
+for.cond.cleanup:                                 ; preds = %for.cond.cleanup.loopexit, %entry
+  ret void
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.09
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %C, i32 %i.09
+  %1 = load i32, i32* %arrayidx1, align 4
+  %add = add nsw i32 %1, %0
+  %arrayidx2 = getelementptr inbounds i32, i32* %A, i32 %i.09
+  store i32 %add, i32* %arrayidx2, align 4
+  %inc = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !14
+}
+
+!10 = distinct !{!10, !11, !12}
+!11 = !{!"llvm.loop.vectorize.predicate.enable", i1 false}
+!12 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+!14 = distinct !{!14, !15}
+!15 = !{!"llvm.loop.interleave.count", i32 4}
diff --git a/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll
new file mode 100644
index 0000000000000..67ee95af0d098
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/ARM/tail-folding-reductions-allowed.ll
@@ -0,0 +1,43 @@
+; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled -S < %s | \
+; RUN:  FileCheck %s -check-prefixes=COMMON,CHECK
+
+; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=enabled-no-reductions -S < %s | \
+; RUN:  FileCheck %s -check-prefixes=COMMON,NORED
+
+; RUN: opt -mtriple=thumbv8.1m.main-arm-eabihf -mattr=+mve.fp -loop-vectorize -tail-predication=force-enabled-no-reductions -S < %s | \
+; RUN:  FileCheck %s -check-prefixes=COMMON,NORED
+
+; Check that this reduction is allowed, except when reductions are disable on
+; the command line.
+;
+define dso_local i32 @i32_add_reduction(i32* noalias nocapture readonly %B, i32 %N) local_unnamed_addr #0 {
+; COMMON-LABEL: i32_add_reduction(
+; COMMON:       entry:
+; CHECK:        @llvm.get.active.lane.mask
+; NORED-NOT:    @llvm.get.active.lane.mask
+; COMMON:       }
+entry:
+  %cmp6 = icmp sgt i32 %N, 0
+  br i1 %cmp6, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+  br label %for.body
+
+for.cond.cleanup.loopexit:
+  %add.lcssa = phi i32 [ %add, %for.body ]
+  br label %for.cond.cleanup
+
+for.cond.cleanup:
+  %S.0.lcssa = phi i32 [ 1, %entry ], [ %add.lcssa, %for.cond.cleanup.loopexit ]
+  ret i32 %S.0.lcssa
+
+for.body:
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+  %S.07 = phi i32 [ %add, %for.body ], [ 1, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %B, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %S.07
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %N
+  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}

From 6bddd099ac1434547dcc73da2552e97e1d89e58e Mon Sep 17 00:00:00 2001
From: Clement Courbet <courbet@google.com>
Date: Fri, 17 Jul 2020 16:53:44 +0200
Subject: [PATCH 638/771] Revert "[llvm-exegesis] Add benchmark latency option
 on X86 that uses LBR for more precise measurements."
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

From @erichkeane:
```
This patch doesn't seem to build for me:
/iusers/ekeane1/workspaces/llvm-project/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp: In function ‘llvm::Error llvm::exegesis::parseDataBuffer(const char*, size_t, const void*, const void*, llvm::SmallVector<long int, 4>*)’:
/iusers/ekeane1/workspaces/llvm-project/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp:99:37: error: ‘struct perf_branch_entry’ has no member named ‘cycles’

CycleArray->push_back(Entry.cycles);
I'm on RHEL7, so I have kernel 3.10, so it doesn't have 'cycles'.

According ot this: https://elixir.bootlin.com/linux/v4.3/source/include/uapi/linux/perf_event.h#L963 kernel 4.3 is the first time that 'cycles' appeared in this structure.
```
---
 llvm/docs/CommandGuide/llvm-exegesis.rst      |  20 +-
 .../llvm-exegesis/X86/lbr/Inputs/mov_add.att  |   4 -
 .../tools/llvm-exegesis/X86/lbr/lit.local.cfg |  31 ---
 .../tools/llvm-exegesis/X86/lbr/mov-add.s     |  18 --
 .../llvm-exegesis/lib/BenchmarkRunner.cpp     |   5 +-
 llvm/tools/llvm-exegesis/lib/PerfHelper.cpp   |   6 +-
 llvm/tools/llvm-exegesis/lib/PerfHelper.h     |  15 +-
 .../llvm-exegesis/lib/X86/CMakeLists.txt      |   1 -
 llvm/tools/llvm-exegesis/lib/X86/Target.cpp   |  44 ----
 .../llvm-exegesis/lib/X86/X86Counter.cpp      | 218 ------------------
 llvm/tools/llvm-exegesis/lib/X86/X86Counter.h |  53 -----
 llvm/tools/llvm-exegesis/llvm-exegesis.cpp    |  13 --
 12 files changed, 12 insertions(+), 416 deletions(-)
 delete mode 100644 llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att
 delete mode 100644 llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg
 delete mode 100644 llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s
 delete mode 100644 llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
 delete mode 100644 llvm/tools/llvm-exegesis/lib/X86/X86Counter.h

diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst
index f3bce16b93dda..321cdf5a6dab1 100644
--- a/llvm/docs/CommandGuide/llvm-exegesis.rst
+++ b/llvm/docs/CommandGuide/llvm-exegesis.rst
@@ -192,24 +192,10 @@ OPTIONS
 
 .. option:: -mode=[latency|uops|inverse_throughput|analysis]
 
- Specify the run mode. Note that some modes have additional requirements and options.
+ Specify the run mode. Note that if you pick `analysis` mode, you also need
+ to specify at least one of the `-analysis-clusters-output-file=` and
+ `-analysis-inconsistencies-output-file=`.
 
- `latency` mode can be  make use of either RDTSC or LBR.
- `latency[LBR]` is only available on X86 (at least `Skylake`).
- To run in this mode, a positive value  must be specified for `x86-lbr-sample-period` and `--repetition-mode=loop`
-
- In `analysis` mode, you also need to specify at least one of the
- `-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`.
-
-.. option:: -x86-lbr-sample-period=<nBranches/sample>
-
-  Specify the LBR sampling period - how many branches before we take a sample.
-  When a positive value is specified for this option and when the mode is `latency`,
-  we will use LBRs for measuring.
-  On choosing the "right" sampling period, a small value is preferred, but throttling
-  could occur if the sampling is too frequent. A prime number should be used to
-  avoid consistently skipping certain blocks.
-  
 .. option:: -repetition-mode=[duplicate|loop|min]
 
  Specify the repetition mode. `duplicate` will create a large, straight line
diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att b/llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att
deleted file mode 100644
index 8f85b395e7319..0000000000000
--- a/llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att
+++ /dev/null
@@ -1,4 +0,0 @@
-# LLVM-EXEGESIS-LIVEIN RDI
-# LLVM-EXEGESIS-DEFREG XMM1 42
-movq $2, %rdi
-addq $0x10, %rdi
\ No newline at end of file
diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg
deleted file mode 100644
index 431967c1ec9b0..0000000000000
--- a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg
+++ /dev/null
@@ -1,31 +0,0 @@
-import subprocess
-import lit.util
-
-if not ('X86' in config.root.targets):
-    # We need support for X86.
-    config.unsupported = True
-
-elif not ('x86_64' in config.root.host_triple):
-    # We need to be running on an X86 host.
-    config.unsupported = True
-    
-else:    
-    # We need libpfm to be installed and the host to be at least skylake.
-    llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir)
-    if not llvm_exegesis_exe:
-        print('llvm-exegesis not found')
-        config.unsupported = True
-    else:
-      try:
-          with open(os.devnull, 'w') as quiet:
-              check_llvm_exegesis_uops_result = subprocess.call(
-                [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
-              check_llvm_exegesis_latency_result = subprocess.call(
-                [llvm_exegesis_exe, '-allowed-host-cpu', 'skylake', '-allowed-host-cpu', 'skylake-avx512', '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
-      except OSError:
-          print('could not exec llvm-exegesis')
-          config.unsupported = True
-      if not check_llvm_exegesis_uops_result == 0:
-        config.unsupported = True
-      if not check_llvm_exegesis_latency_result == 0:
-        config.unsupported = True
diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s b/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s
deleted file mode 100644
index 5f72e8f99b30d..0000000000000
--- a/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s
+++ /dev/null
@@ -1,18 +0,0 @@
-# RUN: llvm-exegesis -mode=latency --repetition-mode=loop --x86-lbr-sample-period=521 --snippets-file=%p/Inputs/mov_add.att
-
-
-CHECK:      ---
-CHECK-NEXT: mode: latency
-CHECK-NEXT: key:
-CHECK-NEXT:   instructions:
-CHECK-NEXT:     'MOV64ri32 RDI i_0x2'
-CHECK-NEXT:     'ADD64ri8 RDI RDI i_0x10'
-CHECK-NEXT: config: ''
-CHECK-NEXT: {{.*}}
-CHECK-NEXT: {{.*}}
-CHECK-NEXT: {{.*}}
-CHECK-NEXT: {{.*}}
-CHECK-NEXT: num_repetitions: 10000
-CHECK-NEXT: measurements:
-CHECK-NEXT: {{.*}} value: 0.0001, per_snippet_value: 0.0002 {{.*}}
-CHECK-LAST: ...
diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
index f015147b0fc2f..bdef8f8a89189 100644
--- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
+++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp
@@ -55,6 +55,7 @@ class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
   static void
   accumulateCounterValues(const llvm::SmallVector<int64_t, 4> &NewValues,
                           llvm::SmallVector<int64_t, 4> *Result) {
+
     const size_t NumValues = std::max(NewValues.size(), Result->size());
     if (NumValues > Result->size())
       Result->resize(NumValues, 0);
@@ -105,10 +106,10 @@ class FunctionExecutorImpl : public BenchmarkRunner::FunctionExecutor {
         if (Crashed)
           return make_error<SnippetCrash>("snippet crashed while running");
       }
-
-      auto ValueOrError = Counter->readOrError(Function.getFunctionBytes());
+      auto ValueOrError = Counter->readOrError();
       if (!ValueOrError)
         return ValueOrError.takeError();
+
       accumulateCounterValues(ValueOrError.get(), &CounterValues);
     }
     return CounterValues;
diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
index 58e1f4dc2a2b2..cba4846709e80 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.cpp
@@ -128,8 +128,7 @@ int64_t Counter::read() const {
   return -1;
 }
 
-llvm::Expected<llvm::SmallVector<int64_t, 4>>
-Counter::readOrError(StringRef /*unused*/) const {
+llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
   int64_t Count = 0;
   ssize_t ReadSize = ::read(FileDescriptor, &Count, sizeof(Count));
   if (ReadSize != sizeof(Count))
@@ -153,8 +152,7 @@ void Counter::stop() {}
 
 int64_t Counter::read() const { return 42; }
 
-llvm::Expected<llvm::SmallVector<int64_t, 4>>
-Counter::readOrError(StringRef /*unused*/) const {
+llvm::Expected<llvm::SmallVector<int64_t, 4>> Counter::readOrError() const {
   return llvm::make_error<llvm::StringError>("Not implemented",
                                              llvm::errc::io_error);
 }
diff --git a/llvm/tools/llvm-exegesis/lib/PerfHelper.h b/llvm/tools/llvm-exegesis/lib/PerfHelper.h
index 19a35595c9af7..d41b090e85f17 100644
--- a/llvm/tools/llvm-exegesis/lib/PerfHelper.h
+++ b/llvm/tools/llvm-exegesis/lib/PerfHelper.h
@@ -59,9 +59,8 @@ class PerfEvent {
   // e.g. "snb_ep::INSTRUCTION_RETIRED:e=0:i=0:c=0:t=0:u=1:k=0:mg=0:mh=1"
   StringRef getPfmEventString() const;
 
-protected:
-  PerfEvent() = default;
-  std::string EventString;
+private:
+  const std::string EventString;
   std::string FullQualifiedEventString;
   perf_event_attr *Attr;
 };
@@ -88,17 +87,11 @@ class Counter {
   int64_t read() const;
 
   /// Returns the current value of the counter or error if it cannot be read.
-  /// FunctionBytes: The benchmark function being executed.
-  /// This is used to filter out the measurements to ensure they are only
-  /// within the benchmarked code.
-  /// If empty (or not specified), then no filtering will be done.
-  /// Not all counters choose to use this.
-  virtual llvm::Expected<llvm::SmallVector<int64_t, 4>>
-  readOrError(StringRef FunctionBytes = StringRef()) const;
+  virtual llvm::Expected<llvm::SmallVector<int64_t, 4>> readOrError() const;
 
   virtual int numValues() const;
 
-protected:
+private:
   PerfEvent Event;
 #ifdef HAVE_LIBPFM
   int FileDescriptor = -1;
diff --git a/llvm/tools/llvm-exegesis/lib/X86/CMakeLists.txt b/llvm/tools/llvm-exegesis/lib/X86/CMakeLists.txt
index ce3bbd5908a83..912877dd6ed1f 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/CMakeLists.txt
+++ b/llvm/tools/llvm-exegesis/lib/X86/CMakeLists.txt
@@ -6,7 +6,6 @@ include_directories(
 add_library(LLVMExegesisX86
   STATIC
   Target.cpp
-  X86Counter.cpp
   )
 
 llvm_update_compile_flags(LLVMExegesisX86)
diff --git a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
index 327cddccb30f3..7a84f936e0d0e 100644
--- a/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
+++ b/llvm/tools/llvm-exegesis/lib/X86/Target.cpp
@@ -14,40 +14,15 @@
 #include "MCTargetDesc/X86BaseInfo.h"
 #include "MCTargetDesc/X86MCTargetDesc.h"
 #include "X86.h"
-#include "X86Counter.h"
 #include "X86RegisterInfo.h"
 #include "X86Subtarget.h"
 #include "llvm/ADT/Sequence.h"
 #include "llvm/MC/MCInstBuilder.h"
-#include "llvm/Support/Errc.h"
-#include "llvm/Support/Error.h"
 #include "llvm/Support/FormatVariadic.h"
 
-#include <memory>
-#include <string>
-#include <vector>
-
 namespace llvm {
 namespace exegesis {
 
-static cl::OptionCategory
-    BenchmarkOptions("llvm-exegesis benchmark x86-options");
-
-// If a positive value is specified, we are going to use the LBR in
-// latency-mode.
-//
-// Note:
-//  -  A small value is preferred, but too low a value could result in
-//     throttling.
-//  -  A prime number is preferred to avoid always skipping certain blocks.
-//
-static cl::opt<unsigned> LbrSamplingPeriod(
-    "x86-lbr-sample-period",
-    cl::desc("The sample period (nbranches/sample), used for LBR sampling"),
-    cl::cat(BenchmarkOptions), cl::init(0));
-
-// FIXME: Validates that repetition-mode is loop if LBR is requested.
-
 // Returns a non-null reason if we cannot handle the memory references in this
 // instruction.
 static const char *isInvalidMemoryInstr(const Instruction &Instr) {
@@ -593,29 +568,10 @@ void ConstantInliner::initStack(unsigned Bytes) {
 #include "X86GenExegesis.inc"
 
 namespace {
-
 class ExegesisX86Target : public ExegesisTarget {
 public:
   ExegesisX86Target() : ExegesisTarget(X86CpuPfmCounters) {}
 
-  Expected<std::unique_ptr<pfm::Counter>>
-  createCounter(StringRef CounterName, const LLVMState &State) const override {
-    // If LbrSamplingPeriod was provided, then ignore the
-    // CounterName because we only have one for LBR.
-    if (LbrSamplingPeriod > 0) {
-      // Can't use LBR without HAVE_LIBPFM, or __linux__ (for now)
-#if defined(HAVE_LIBPFM) && defined(__linux__)
-      return std::make_unique<X86LbrCounter>(
-          X86LbrPerfEvent(LbrSamplingPeriod));
-#else
-      return llvm::make_error<llvm::StringError>(
-          "LBR counter requested without HAVE_LIBPFM or running on Linux.",
-          llvm::errc::invalid_argument);
-#endif
-    }
-    return ExegesisTarget::createCounter(CounterName, State);
-  }
-
 private:
   void addTargetSpecificPasses(PassManagerBase &PM) const override;
 
diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
deleted file mode 100644
index c9f9264d7d2a1..0000000000000
--- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.cpp
+++ /dev/null
@@ -1,218 +0,0 @@
-//===-- X86Counter.cpp ------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "X86Counter.h"
-
-// FIXME: Use appropriate wrappers for poll.h and mman.h
-// to support Windows and remove this linux-only guard.
-#ifdef __linux__
-#include "llvm/Support/Endian.h"
-#include "llvm/Support/Errc.h"
-
-#ifdef HAVE_LIBPFM
-#include "perfmon/perf_event.h"
-#include "perfmon/pfmlib.h"
-#include "perfmon/pfmlib_perf_event.h"
-#endif // HAVE_LIBPFM
-
-#include <atomic>
-#include <cstddef>
-#include <cstdint>
-#include <limits>
-#include <memory>
-#include <vector>
-
-#include <poll.h>
-#include <sys/mman.h>
-#include <unistd.h>
-
-#ifdef HAVE_LIBPFM
-namespace llvm {
-namespace exegesis {
-
-static constexpr size_t kBufferPages = 8;
-static const size_t kDataBufferSize = kBufferPages * getpagesize();
-
-// Waits for the LBR perf events.
-static int pollLbrPerfEvent(const int FileDescriptor) {
-  struct pollfd PollFd;
-  PollFd.fd = FileDescriptor;
-  PollFd.events = POLLIN;
-  PollFd.revents = 0;
-  return poll(&PollFd, 1 /* num of fds */, 10000 /* timeout in ms */);
-}
-
-// Copies the data-buffer into Buf, given the pointer to MMapped.
-static void copyDataBuffer(void *MMappedBuffer, char *Buf, uint64_t Tail,
-                           size_t DataSize) {
-  // First page is reserved for perf_event_mmap_page. Data buffer starts on
-  // the next page.
-  char *Start = reinterpret_cast<char *>(MMappedBuffer) + getpagesize();
-  // The LBR buffer is a cyclic buffer, we copy data to another buffer.
-  uint64_t Offset = Tail % kDataBufferSize;
-  size_t CopySize = kDataBufferSize - Offset;
-  memcpy(Buf, Start + Offset, CopySize);
-  if (CopySize >= DataSize)
-    return;
-
-  memcpy(Buf + CopySize, Start, Offset);
-  return;
-}
-
-// Parses the given data-buffer for stats and fill the CycleArray.
-// If data has been extracted successfully, also modifies the code to jump
-// out the benchmark loop.
-static llvm::Error parseDataBuffer(const char *DataBuf, size_t DataSize,
-                                   const void *From, const void *To,
-                                   llvm::SmallVector<int64_t, 4> *CycleArray) {
-  assert(From != nullptr && To != nullptr);
-  const char *DataPtr = DataBuf;
-  while (DataPtr < DataBuf + DataSize) {
-    struct perf_event_header Header;
-    memcpy(&Header, DataPtr, sizeof(struct perf_event_header));
-    if (Header.type != PERF_RECORD_SAMPLE) {
-      // Ignores non-sample records.
-      DataPtr += Header.size;
-      continue;
-    }
-    DataPtr += sizeof(Header);
-    uint64_t Count = llvm::support::endian::read64(DataPtr, support::native);
-    DataPtr += sizeof(Count);
-
-    struct perf_branch_entry Entry;
-    memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
-    // Read the perf_branch_entry array.
-    for (uint64_t i = 0; i < Count; ++i) {
-      const uint64_t BlockStart = From == nullptr
-                                      ? std::numeric_limits<uint64_t>::min()
-                                      : reinterpret_cast<uint64_t>(From);
-      const uint64_t BlockEnd = To == nullptr
-                                    ? std::numeric_limits<uint64_t>::max()
-                                    : reinterpret_cast<uint64_t>(To);
-
-      if (BlockStart <= Entry.from && BlockEnd >= Entry.to)
-        CycleArray->push_back(Entry.cycles);
-
-      if (i == Count - 1)
-        // We've reached the last entry.
-        return llvm::Error::success();
-
-      // Advance to next entry
-      DataPtr += sizeof(Entry);
-      memcpy(&Entry, DataPtr, sizeof(struct perf_branch_entry));
-    }
-  }
-  return llvm::make_error<llvm::StringError>("Unable to parse databuffer.",
-                                             llvm::errc::io_error);
-}
-
-#ifdef HAVE_LIBPFM
-X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
-  assert(SamplingPeriod > 0 && "SamplingPeriod must be positive");
-  EventString = "BR_INST_RETIRED.NEAR_TAKEN";
-  Attr = new perf_event_attr();
-  Attr->size = sizeof(*Attr);
-  Attr->type = PERF_TYPE_RAW;
-  // FIXME This is SKL's encoding. Not sure if it'll change.
-  Attr->config = 0x20c4; // BR_INST_RETIRED.NEAR_TAKEN
-  Attr->sample_type = PERF_SAMPLE_BRANCH_STACK;
-  // Don't need to specify "USER" because we've already excluded HV and Kernel.
-  Attr->branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
-  Attr->sample_period = SamplingPeriod;
-  Attr->wakeup_events = 1; // We need this even when using ioctl REFRESH.
-  Attr->disabled = 1;
-  Attr->exclude_kernel = 1;
-  Attr->exclude_hv = 1;
-  Attr->read_format = PERF_FORMAT_GROUP;
-
-  FullQualifiedEventString = EventString;
-}
-#else
-X86LbrPerfEvent::X86LbrPerfEvent(unsigned SamplingPeriod) {
-  EventString = "";
-  Attr = nullptr;
-}
-#endif
-
-X86LbrCounter::X86LbrCounter(pfm::PerfEvent &&NewEvent)
-    : Counter(std::move(NewEvent)) {
-  // First page is reserved for perf_event_mmap_page. Data buffer starts on
-  // the next page, so we allocate one more page.
-  MMappedBuffer = mmap(nullptr, (kBufferPages + 1) * getpagesize(),
-                       PROT_READ | PROT_WRITE, MAP_SHARED, FileDescriptor, 0);
-  if (MMappedBuffer == MAP_FAILED)
-    llvm::errs() << "Failed to mmap buffer.";
-}
-
-X86LbrCounter::~X86LbrCounter() { close(FileDescriptor); }
-
-void X86LbrCounter::start() {
-  ioctl(FileDescriptor, PERF_EVENT_IOC_REFRESH, 1024 /* kMaxPollsPerFd */);
-}
-
-llvm::Expected<llvm::SmallVector<int64_t, 4>>
-X86LbrCounter::readOrError(StringRef FunctionBytes) const {
-  // The max number of time-outs/retries before we give up.
-  static constexpr int kMaxTimeouts = 160;
-
-  // Disable the event before reading
-  ioctl(FileDescriptor, PERF_EVENT_IOC_DISABLE, 0);
-
-  // Parses the LBR buffer and fills CycleArray with the sequence of cycle
-  // counts from the buffer.
-  llvm::SmallVector<int64_t, 4> CycleArray;
-  std::unique_ptr<char[]> DataBuf(new char[kDataBufferSize]);
-  int NumTimeouts = 0;
-  int PollResult = 0;
-
-  // Find the boundary of the function so that we could filter the LBRs
-  // to keep only the relevant records.
-  if (FunctionBytes.empty())
-    return llvm::make_error<llvm::StringError>("Empty function bytes",
-                                               llvm::errc::invalid_argument);
-  const void *From = reinterpret_cast<const void *>(FunctionBytes.data());
-  const void *To = reinterpret_cast<const void *>(FunctionBytes.data() +
-                                                  FunctionBytes.size());
-  while (PollResult <= 0) {
-    PollResult = pollLbrPerfEvent(FileDescriptor);
-    if (PollResult > 0)
-      break;
-    if (PollResult == -1)
-      return llvm::make_error<llvm::StringError>("Cannot poll LBR perf event.",
-                                                 llvm::errc::io_error);
-    if (NumTimeouts++ >= kMaxTimeouts)
-      return llvm::make_error<llvm::StringError>(
-          "LBR polling still timed out after max number of attempts.",
-          llvm::errc::device_or_resource_busy);
-  }
-
-  struct perf_event_mmap_page Page;
-  memcpy(&Page, MMappedBuffer, sizeof(struct perf_event_mmap_page));
-
-  const uint64_t DataTail = Page.data_tail;
-  const uint64_t DataHead = Page.data_head;
-  // We're supposed to use a barrier after reading data_head.
-  std::atomic_thread_fence(std::memory_order_acq_rel);
-  const size_t DataSize = DataHead - DataTail;
-  if (DataSize > kDataBufferSize)
-    return llvm::make_error<llvm::StringError>(
-        "DataSize larger than buffer size.", llvm::errc::invalid_argument);
-
-  copyDataBuffer(MMappedBuffer, DataBuf.get(), DataTail, DataSize);
-  llvm::Error error =
-      parseDataBuffer(DataBuf.get(), DataSize, From, To, &CycleArray);
-  if (!error)
-    return CycleArray;
-  return std::move(error);
-}
-
-} // namespace exegesis
-} // namespace llvm
-
-#endif // HAVE_LIBPFM
-#endif // __linux__
diff --git a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h b/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h
deleted file mode 100644
index 4409d7c93a03a..0000000000000
--- a/llvm/tools/llvm-exegesis/lib/X86/X86Counter.h
+++ /dev/null
@@ -1,53 +0,0 @@
-//===-- X86Counter.h --------------------------------------------*- C++ -*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-///
-/// \file
-/// Perf counter that reads the LBRs for measuring the benchmarked block's
-/// throughput.
-///
-/// More info at: https://lwn.net/Articles/680985
-//===----------------------------------------------------------------------===//
-#ifndef LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H
-#define LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H
-
-#include "../PerfHelper.h"
-#include "llvm/Support/Error.h"
-
-// FIXME: Use appropriate wrappers for poll.h and mman.h
-// to support Windows and remove this linux-only guard.
-#if defined(__linux__) && defined(HAVE_LIBPFM)
-
-namespace llvm {
-namespace exegesis {
-
-class X86LbrPerfEvent : public pfm::PerfEvent {
-public:
-  X86LbrPerfEvent(unsigned SamplingPeriod);
-};
-
-class X86LbrCounter : public pfm::Counter {
-public:
-  explicit X86LbrCounter(pfm::PerfEvent &&Event);
-
-  virtual ~X86LbrCounter();
-
-  void start() override;
-
-  llvm::Expected<llvm::SmallVector<int64_t, 4>>
-  readOrError(StringRef FunctionBytes) const override;
-
-private:
-  void *MMappedBuffer = nullptr;
-};
-
-} // namespace exegesis
-} // namespace llvm
-
-#endif // defined(__linux__) && defined(HAVE_LIBPFM)
-
-#endif // LLVM_TOOLS_LLVM_EXEGESIS_LIB_X86_X86COUNTER_H
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
index 8eeda48823859..507015b97472b 100644
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -160,12 +160,6 @@ static cl::opt<std::string>
                                       cl::desc(""), cl::cat(AnalysisOptions),
                                       cl::init(""));
 
-static cl::list<std::string>
-    AllowedHostCpus("allowed-host-cpu",
-                    cl::desc("If specified, only run the benchmark if the host "
-                             "CPU matches the names"),
-                    cl::cat(Options), cl::ZeroOrMore);
-
 static cl::opt<bool> AnalysisDisplayUnstableOpcodes(
     "analysis-display-unstable-clusters",
     cl::desc("if there is more than one benchmark for an opcode, said "
@@ -302,13 +296,6 @@ void benchmarkMain() {
 
   const LLVMState State(CpuName);
 
-  llvm::StringRef ActualCpu = State.getTargetMachine().getTargetCPU();
-  for (auto Begin = AllowedHostCpus.begin(); Begin != AllowedHostCpus.end();
-       ++Begin) {
-    if (ActualCpu != *Begin)
-      ExitWithError(llvm::Twine("Unexpected host CPU ").concat(ActualCpu));
-  }
-
   const std::unique_ptr<BenchmarkRunner> Runner =
       ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
           BenchmarkMode, State, ResultAggMode));

From 6831f8ae58c3f32d7fa6cd10c2fdac4f732df2b3 Mon Sep 17 00:00:00 2001
From: LLVM GN Syncbot <llvmgnsyncbot@gmail.com>
Date: Fri, 17 Jul 2020 14:58:08 +0000
Subject: [PATCH 639/771] [gn build] Port 6bddd099ac1

---
 .../gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn   | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
index a4e028e653186..42bce295c9a7e 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
@@ -14,9 +14,6 @@ static_library("X86") {
     # depend on this Target/X86-internal build target.
     "//llvm/lib/Target/X86/MCTargetDesc",
   ]
-  sources = [
-    "Target.cpp",
-    "X86Counter.cpp",
-  ]
+  sources = [ "Target.cpp" ]
   include_dirs = [ "//llvm/lib/Target/X86" ]
 }

From 6c43ed608d5de84f0d17960b2b449a4a7fc0ca2a Mon Sep 17 00:00:00 2001
From: Sameer Arora <sameerarora101@fb.com>
Date: Wed, 1 Jul 2020 14:41:07 -0700
Subject: [PATCH 640/771] Introducing llvm-libtool-darwin

This diff starts the implementation of llvm-libtool-darwin
(an llvm based replacement of cctool's libtool).
Libtool is used for creating static and dynamic libraries
from a bunch of object files given as input.

Reviewed by alexshap, smeenai, jhenderson, MaskRay

Differential Revision: https://reviews.llvm.org/D82923
---
 llvm/docs/CommandGuide/index.rst              | 23 ++++----
 .../docs/CommandGuide/llvm-libtool-darwin.rst | 59 +++++++++++++++++++
 llvm/test/CMakeLists.txt                      |  1 +
 .../llvm-libtool-darwin/Inputs/input1.yaml    | 56 ++++++++++++++++++
 .../llvm-libtool-darwin/Inputs/input2.yaml    | 57 ++++++++++++++++++
 .../test/tools/llvm-libtool-darwin/basic.test | 10 ++++
 .../llvm-libtool-darwin/help-message.test     | 23 ++++++++
 .../invalid-input-output-args.test            | 25 ++++++++
 llvm/tools/llvm-libtool-darwin/CMakeLists.txt |  7 +++
 llvm/tools/llvm-libtool-darwin/LLVMBuild.txt  | 20 +++++++
 .../llvm-libtool-darwin.cpp                   | 34 +++++++++++
 11 files changed, 304 insertions(+), 11 deletions(-)
 create mode 100644 llvm/docs/CommandGuide/llvm-libtool-darwin.rst
 create mode 100644 llvm/test/tools/llvm-libtool-darwin/Inputs/input1.yaml
 create mode 100644 llvm/test/tools/llvm-libtool-darwin/Inputs/input2.yaml
 create mode 100644 llvm/test/tools/llvm-libtool-darwin/basic.test
 create mode 100644 llvm/test/tools/llvm-libtool-darwin/help-message.test
 create mode 100644 llvm/test/tools/llvm-libtool-darwin/invalid-input-output-args.test
 create mode 100644 llvm/tools/llvm-libtool-darwin/CMakeLists.txt
 create mode 100644 llvm/tools/llvm-libtool-darwin/LLVMBuild.txt
 create mode 100644 llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp

diff --git a/llvm/docs/CommandGuide/index.rst b/llvm/docs/CommandGuide/index.rst
index 77fece030eb60..d13c2a0fc0a9b 100644
--- a/llvm/docs/CommandGuide/index.rst
+++ b/llvm/docs/CommandGuide/index.rst
@@ -14,25 +14,26 @@ Basic Commands
 .. toctree::
    :maxdepth: 1
 
-   llvm-as
-   llvm-dis
-   opt
+   dsymutil
    llc
    lli
-   llvm-link
-   llvm-lib
-   llvm-lipo
+   llvm-as
    llvm-config
+   llvm-cov
    llvm-cxxmap
    llvm-diff
-   llvm-cov
-   llvm-profdata
-   llvm-stress
-   llvm-symbolizer
+   llvm-dis
    llvm-dwarfdump
-   dsymutil
+   llvm-lib
+   llvm-libtool-darwin
+   llvm-link
+   llvm-lipo
    llvm-mca
+   llvm-profdata
    llvm-readobj
+   llvm-stress
+   llvm-symbolizer
+   opt
 
 GNU binutils replacements
 ~~~~~~~~~~~~~~~~~~~~~~~~~
diff --git a/llvm/docs/CommandGuide/llvm-libtool-darwin.rst b/llvm/docs/CommandGuide/llvm-libtool-darwin.rst
new file mode 100644
index 0000000000000..0baacfd88e8a5
--- /dev/null
+++ b/llvm/docs/CommandGuide/llvm-libtool-darwin.rst
@@ -0,0 +1,59 @@
+llvm-libtool-darwin - LLVM tool for creating libraries for Darwin
+=================================================================
+
+.. program:: llvm-libtool-darwin
+
+SYNOPSIS
+--------
+
+:program:`llvm-libtool-darwin` [*options*] *<input files>*
+
+DESCRIPTION
+-----------
+
+:program:`llvm-libtool-darwin` is a tool for creating static and dynamic
+libraries for Darwin.
+
+For most scenarios, it works as a drop-in replacement for cctools'
+:program:`libtool`.
+
+OPTIONS
+--------
+:program:`llvm-libtool-darwin` supports the following options:
+
+.. option:: -h, -help
+
+  Show help and usage for this command.
+
+.. option:: -help-list
+
+  Show help and usage for this command without grouping the options
+  into categories.
+
+.. option:: -color
+
+  Use colors in output.
+
+.. option:: -version
+
+  Display the version of this program.
+
+.. option:: -o <filename>
+
+  Specify the output file name. Must be specified exactly once.
+
+EXIT STATUS
+-----------
+
+:program:`llvm-libtool-darwin` exits with a non-zero exit code if there is an error.
+Otherwise, it exits with code 0.
+
+BUGS
+----
+
+To report bugs, please visit <https://bugs.llvm.org/>.
+
+SEE ALSO
+--------
+
+:manpage:`llvm-ar(1)`
diff --git a/llvm/test/CMakeLists.txt b/llvm/test/CMakeLists.txt
index b0a0a259d7413..6994c29efa9a5 100644
--- a/llvm/test/CMakeLists.txt
+++ b/llvm/test/CMakeLists.txt
@@ -83,6 +83,7 @@ set(LLVM_TEST_DEPENDS
           llvm-install-name-tool
           llvm-jitlink
           llvm-lib
+          llvm-libtool-darwin
           llvm-link
           llvm-lipo
           llvm-locstats
diff --git a/llvm/test/tools/llvm-libtool-darwin/Inputs/input1.yaml b/llvm/test/tools/llvm-libtool-darwin/Inputs/input1.yaml
new file mode 100644
index 0000000000000..185f21c8b7369
--- /dev/null
+++ b/llvm/test/tools/llvm-libtool-darwin/Inputs/input1.yaml
@@ -0,0 +1,56 @@
+# int symbol1() {
+#   return 0;
+# }
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000001
+  ncmds:           2
+  sizeofcmds:      176
+  flags:           0x00002000
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         ''
+    vmaddr:          0
+    vmsize:          8
+    fileoff:         312
+    filesize:        8
+    maxprot:         7
+    initprot:        7
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        size:            8
+        offset:          0x00000138
+        align:           4
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+        content:         554889E531C05DC3
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          320
+    nsyms:           1
+    stroff:          336
+    strsize:         8
+LinkEditData:
+  NameList:
+    - n_strx:          1
+      n_type:          0x0F
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+  StringTable:
+    - ''
+    - _symbol1
+...
diff --git a/llvm/test/tools/llvm-libtool-darwin/Inputs/input2.yaml b/llvm/test/tools/llvm-libtool-darwin/Inputs/input2.yaml
new file mode 100644
index 0000000000000..521ef93ae23b7
--- /dev/null
+++ b/llvm/test/tools/llvm-libtool-darwin/Inputs/input2.yaml
@@ -0,0 +1,57 @@
+# int symbol2() {
+#   return 0;
+# }
+--- !mach-o
+FileHeader:
+  magic:           0xFEEDFACF
+  cputype:         0x01000007
+  cpusubtype:      0x00000003
+  filetype:        0x00000001
+  ncmds:           2
+  sizeofcmds:      176
+  flags:           0x00002000
+  reserved:        0x00000000
+LoadCommands:
+  - cmd:             LC_SEGMENT_64
+    cmdsize:         152
+    segname:         ''
+    vmaddr:          0
+    vmsize:          15
+    fileoff:         312
+    filesize:        15
+    maxprot:         7
+    initprot:        7
+    nsects:          1
+    flags:           0
+    Sections:
+      - sectname:        __text
+        segname:         __TEXT
+        addr:            0x0000000000000000
+        size:            15
+        offset:          0x00000138
+        align:           4
+        reloff:          0x00000000
+        nreloc:          0
+        flags:           0x80000400
+        reserved1:       0x00000000
+        reserved2:       0x00000000
+        reserved3:       0x00000000
+        content:         554889E531C0C745FC000000005DC3
+  - cmd:             LC_SYMTAB
+    cmdsize:         24
+    symoff:          328
+    nsyms:           1
+    stroff:          344
+    strsize:         8
+LinkEditData:
+  NameList:
+    - n_strx:          1
+      n_type:          0x0F
+      n_sect:          1
+      n_desc:          0
+      n_value:         0
+  StringTable:
+    - ''
+    - _symbol2
+    - ''
+...
diff --git a/llvm/test/tools/llvm-libtool-darwin/basic.test b/llvm/test/tools/llvm-libtool-darwin/basic.test
new file mode 100644
index 0000000000000..5cb6fea899bfa
--- /dev/null
+++ b/llvm/test/tools/llvm-libtool-darwin/basic.test
@@ -0,0 +1,10 @@
+## This test checks that main exits normally (error code 0) for correct input/output args.
+
+# RUN: yaml2obj %S/Inputs/input1.yaml -o %t-input1.o
+# RUN: yaml2obj %S/Inputs/input2.yaml -o %t-input2.o
+
+## Pass single input:
+# RUN: llvm-libtool-darwin -o %t.lib %t-input1.o
+
+## Pass multiple inputs:
+# RUN: llvm-libtool-darwin -o %t.lib %t-input1.o %t-input2.o
diff --git a/llvm/test/tools/llvm-libtool-darwin/help-message.test b/llvm/test/tools/llvm-libtool-darwin/help-message.test
new file mode 100644
index 0000000000000..4face3887ed2b
--- /dev/null
+++ b/llvm/test/tools/llvm-libtool-darwin/help-message.test
@@ -0,0 +1,23 @@
+## This test checks that the help message is displayed correctly.
+
+# RUN: llvm-libtool-darwin -h | FileCheck --check-prefixes=LIBTOOL-USAGE,CATEG %s --match-full-lines
+# RUN: llvm-libtool-darwin -help | FileCheck --check-prefixes=LIBTOOL-USAGE,CATEG %s --match-full-lines
+# RUN: llvm-libtool-darwin --help | FileCheck --check-prefixes=LIBTOOL-USAGE,CATEG %s --match-full-lines
+# RUN: llvm-libtool-darwin --help-list | \
+# RUN:   FileCheck -check-prefixes=LIBTOOL-USAGE,LIST %s --match-full-lines
+
+# RUN: not llvm-libtool-darwin -abcabc 2>&1 | FileCheck --check-prefix=UNKNOWN-ARG %s
+# RUN: not llvm-libtool-darwin --abcabc 2>&1 | FileCheck --check-prefix=UNKNOWN-ARG %s
+
+# LIBTOOL-USAGE: OVERVIEW: llvm-libtool-darwin
+# LIBTOOL-USAGE: USAGE: llvm-libtool-darwin{{(\.exe)?}} [options] <input files>
+# LIBTOOL-USAGE: OPTIONS:
+
+# CATEG:    Color Options:
+# LIST-NOT: Color Options:
+# CATEG:    Generic Options:
+# LIST-NOT: Generic Options:
+# CATEG:    llvm-libtool-darwin Options:
+# LIST-NOT: llvm-libtool-darwin Options:
+
+# UNKNOWN-ARG: Unknown command line argument '{{-+}}abcabc'
diff --git a/llvm/test/tools/llvm-libtool-darwin/invalid-input-output-args.test b/llvm/test/tools/llvm-libtool-darwin/invalid-input-output-args.test
new file mode 100644
index 0000000000000..5b4d2d988ae4c
--- /dev/null
+++ b/llvm/test/tools/llvm-libtool-darwin/invalid-input-output-args.test
@@ -0,0 +1,25 @@
+## This test checks that an error is thrown in case of invalid input/output args.
+
+## Missing input file:
+# RUN: not llvm-libtool-darwin -o %t.lib 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=NO-INPUT
+
+# NO-INPUT: Must specify at least 1 positional argument
+
+## Missing output file:
+# RUN: not llvm-libtool-darwin %t.input 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=NO-OUTPUT
+
+# NO-OUTPUT: for the -o option: must be specified at least once!
+
+## Missing argument to -o:
+# RUN: not llvm-libtool-darwin %t.input -o 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=MISSING
+
+# MISSING: for the -o option: requires a value!
+
+## Passing in two output files:
+# RUN: not llvm-libtool-darwin %t.input -o %t.lib1 -o %t.lib2 2>&1 | \
+# RUN:   FileCheck %s --check-prefix=DOUBLE-OUTPUT
+
+# DOUBLE-OUTPUT: for the -o option: must occur exactly one time!
diff --git a/llvm/tools/llvm-libtool-darwin/CMakeLists.txt b/llvm/tools/llvm-libtool-darwin/CMakeLists.txt
new file mode 100644
index 0000000000000..b2ea63a8da493
--- /dev/null
+++ b/llvm/tools/llvm-libtool-darwin/CMakeLists.txt
@@ -0,0 +1,7 @@
+set(LLVM_LINK_COMPONENTS
+  Support
+  )
+
+add_llvm_tool(llvm-libtool-darwin
+  llvm-libtool-darwin.cpp
+)
diff --git a/llvm/tools/llvm-libtool-darwin/LLVMBuild.txt b/llvm/tools/llvm-libtool-darwin/LLVMBuild.txt
new file mode 100644
index 0000000000000..3858d2faa778f
--- /dev/null
+++ b/llvm/tools/llvm-libtool-darwin/LLVMBuild.txt
@@ -0,0 +1,20 @@
+;===- ./tools/llvm-libtool-darwin/LVMBuild.txt -----------------*- Conf -*--===;
+;
+; Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+; See https://llvm.org/LICENSE.txt for license information.
+; SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+;
+;===------------------------------------------------------------------------===;
+;
+; This is an LLVMBuild description file for the components in this subdirectory.
+;
+; For more information on the LLVMBuild system, please see:
+;
+;   http://llvm.org/docs/LLVMBuild.html
+;
+;===------------------------------------------------------------------------===;
+[component_0]
+type = Tool
+name = llvm-libtool-darwin
+parent = Tools
+required_libraries = Support
diff --git a/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp
new file mode 100644
index 0000000000000..bc2f9f765f437
--- /dev/null
+++ b/llvm/tools/llvm-libtool-darwin/llvm-libtool-darwin.cpp
@@ -0,0 +1,34 @@
+//===-- llvm-libtool-darwin.cpp - a tool for creating libraries -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// A utility for creating static and dynamic libraries for Darwin.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/InitLLVM.h"
+#include "llvm/Support/WithColor.h"
+
+using namespace llvm;
+
+cl::OptionCategory LibtoolCategory("llvm-libtool-darwin Options");
+
+static cl::opt<std::string> OutputFile("o", cl::desc("Specify output filename"),
+                                       cl::value_desc("filename"), cl::Required,
+                                       cl::cat(LibtoolCategory));
+
+static cl::list<std::string> InputFiles(cl::Positional,
+                                        cl::desc("<input files>"),
+                                        cl::OneOrMore,
+                                        cl::cat(LibtoolCategory));
+
+int main(int Argc, char **Argv) {
+  InitLLVM X(Argc, Argv);
+  cl::HideUnrelatedOptions({&LibtoolCategory, &ColorCategory});
+  cl::ParseCommandLineOptions(Argc, Argv, "llvm-libtool-darwin\n");
+}

From 08521abb3a7cafa50a0acdb16eca75162d96f514 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <ntv@google.com>
Date: Fri, 17 Jul 2020 11:09:35 -0400
Subject: [PATCH 641/771] [mlir][EDSC] Allow conditionBuilder to capture the
 IfOp

When the IfOp returns values, it can easily be obtained from one of the Values.
However, when no values are returned, the information is lost.
This revision lets the caller specify a capture IfOp* to return the produced
IfOp.

Differential Revision: https://reviews.llvm.org/D84025
---
 mlir/include/mlir/Dialect/SCF/EDSC/Builders.h | 13 ++++++++-----
 mlir/lib/Dialect/SCF/EDSC/Builders.cpp        | 16 +++++++++++-----
 2 files changed, 19 insertions(+), 10 deletions(-)

diff --git a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
index 1605f588bc000..50adec2f9b8bd 100644
--- a/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
+++ b/mlir/include/mlir/Dialect/SCF/EDSC/Builders.h
@@ -36,12 +36,15 @@ scf::ValueVector loopNestBuilder(
 /// Adapters for building if conditions using the builder and the location
 /// stored in ScopedContext. 'thenBody' is mandatory, 'elseBody' can be omitted
 /// if the condition should not have an 'else' part.
-ValueRange
-conditionBuilder(TypeRange results, Value condition,
-                 function_ref<scf::ValueVector()> thenBody,
-                 function_ref<scf::ValueVector()> elseBody = nullptr);
+/// When `ifOp` is specified, the scf::IfOp is captured. This is particularly
+/// convenient for 0-result conditions.
+ValueRange conditionBuilder(TypeRange results, Value condition,
+                            function_ref<scf::ValueVector()> thenBody,
+                            function_ref<scf::ValueVector()> elseBody = nullptr,
+                            scf::IfOp *ifOp = nullptr);
 ValueRange conditionBuilder(Value condition, function_ref<void()> thenBody,
-                            function_ref<void()> elseBody = nullptr);
+                            function_ref<void()> elseBody = nullptr,
+                            scf::IfOp *ifOp = nullptr);
 
 } // namespace edsc
 } // namespace mlir
diff --git a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
index 082c8c371ddca..2098ca1bf7d00 100644
--- a/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
+++ b/mlir/lib/Dialect/SCF/EDSC/Builders.cpp
@@ -76,14 +76,17 @@ wrapIfBody(function_ref<scf::ValueVector()> body, TypeRange expectedTypes) {
 ValueRange
 mlir::edsc::conditionBuilder(TypeRange results, Value condition,
                              function_ref<scf::ValueVector()> thenBody,
-                             function_ref<scf::ValueVector()> elseBody) {
+                             function_ref<scf::ValueVector()> elseBody,
+                             scf::IfOp *ifOp) {
   assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
   assert(thenBody && "thenBody is mandatory");
 
-  auto ifOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
+  auto newOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
       ScopedContext::getLocation(), results, condition,
       wrapIfBody(thenBody, results), wrapIfBody(elseBody, results));
-  return ifOp.getResults();
+  if (ifOp)
+    *ifOp = newOp;
+  return newOp.getResults();
 }
 
 static std::function<void(OpBuilder &, Location)>
@@ -97,14 +100,17 @@ wrapZeroResultIfBody(function_ref<void()> body) {
 
 ValueRange mlir::edsc::conditionBuilder(Value condition,
                                         function_ref<void()> thenBody,
-                                        function_ref<void()> elseBody) {
+                                        function_ref<void()> elseBody,
+                                        scf::IfOp *ifOp) {
   assert(ScopedContext::getContext() && "EDSC ScopedContext not set up");
   assert(thenBody && "thenBody is mandatory");
 
-  ScopedContext::getBuilderRef().create<scf::IfOp>(
+  auto newOp = ScopedContext::getBuilderRef().create<scf::IfOp>(
       ScopedContext::getLocation(), condition, wrapZeroResultIfBody(thenBody),
       elseBody ? llvm::function_ref<void(OpBuilder &, Location)>(
                      wrapZeroResultIfBody(elseBody))
                : llvm::function_ref<void(OpBuilder &, Location)>(nullptr));
+  if (ifOp)
+    *ifOp = newOp;
   return {};
 }

From 63f0b10b8cefa2d6089dd99d7645da385939672f Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Fri, 17 Jul 2020 11:21:42 -0400
Subject: [PATCH 642/771] [gn format] (manually) merge 6bddd099ac

---
 .../gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn   | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
index 42bce295c9a7e..c93827d8cbd7d 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/X86/BUILD.gn
@@ -14,6 +14,9 @@ static_library("X86") {
     # depend on this Target/X86-internal build target.
     "//llvm/lib/Target/X86/MCTargetDesc",
   ]
-  sources = [ "Target.cpp" ]
+  sources = [
+    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
+    "Target.cpp",
+  ]
   include_dirs = [ "//llvm/lib/Target/X86" ]
 }

From 994fb86bc2a83b24340b803bde243341dee651fa Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Fri, 17 Jul 2020 11:02:10 -0400
Subject: [PATCH 643/771] AMDGPU: Fix promoting f16 fpowi with legal f16

---
 llvm/lib/Target/AMDGPU/SIISelLowering.cpp |   1 +
 llvm/test/CodeGen/AMDGPU/llvm.powi.ll     | 250 ++++++++++++++++++++++
 2 files changed, 251 insertions(+)
 create mode 100644 llvm/test/CodeGen/AMDGPU/llvm.powi.ll

diff --git a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
index e22e526d343e3..c2666f3302151 100644
--- a/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -449,6 +449,7 @@ SITargetLowering::SITargetLowering(const TargetMachine &TM,
 
   if (Subtarget->has16BitInsts()) {
     setOperationAction(ISD::FPOW, MVT::f16, Promote);
+    setOperationAction(ISD::FPOWI, MVT::f16, Promote);
     setOperationAction(ISD::FLOG, MVT::f16, Custom);
     setOperationAction(ISD::FEXP, MVT::f16, Custom);
     setOperationAction(ISD::FLOG10, MVT::f16, Custom);
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.powi.ll b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll
new file mode 100644
index 0000000000000..5618256dbcdfa
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/llvm.powi.ll
@@ -0,0 +1,250 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=hawaii < %s | FileCheck -check-prefixes=GCN,GFX7 %s
+; RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=fiji < %s | FileCheck -check-prefixes=GCN,GFX8 %s
+
+define i16 @v_powi_f16(i16 %l, i32 %r) {
+; GCN-LABEL: v_powi_f16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GCN-NEXT:    v_log_f32_e32 v0, v0
+; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
+; GCN-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-NEXT:    v_cvt_f16_f32_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %l.cast = bitcast i16 %l to half
+  %res = call half @llvm.powi.f16(half %l.cast, i32 %r)
+  %res.cast = bitcast half %res to i16
+  ret i16 %res.cast
+}
+
+define float @v_powi_f32(float %l, i32 %r) {
+; GCN-LABEL: v_powi_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_log_f32_e32 v0, v0
+; GCN-NEXT:    v_cvt_f32_i32_e32 v1, v1
+; GCN-NEXT:    v_mul_legacy_f32_e32 v0, v1, v0
+; GCN-NEXT:    v_exp_f32_e32 v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 %r)
+  ret float %res
+}
+
+define float @v_powi_0_f32(float %l) {
+; GCN-LABEL: v_powi_0_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mov_b32_e32 v0, 1.0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 0)
+  ret float %res
+}
+
+define float @v_powi_1_f32(float %l) {
+; GCN-LABEL: v_powi_1_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 1)
+  ret float %res
+}
+
+define float @v_powi_neg1_f32(float %l) {
+; GFX7-LABEL: v_powi_neg1_f32:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
+; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX7-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
+; GFX7-NEXT:    v_fma_f32 v2, v3, v2, v2
+; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
+; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
+; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_powi_neg1_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
+; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
+; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
+; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
+; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
+; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
+; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
+; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
+; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 -1)
+  ret float %res
+}
+
+define float @v_powi_2_f32(float %l) {
+; GCN-LABEL: v_powi_2_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 2)
+  ret float %res
+}
+
+define float @v_powi_neg2_f32(float %l) {
+; GFX7-LABEL: v_powi_neg2_f32:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
+; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX7-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
+; GFX7-NEXT:    v_fma_f32 v2, v3, v2, v2
+; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
+; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
+; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_powi_neg2_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
+; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
+; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
+; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
+; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
+; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
+; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
+; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
+; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 -2)
+  ret float %res
+}
+
+define float @v_powi_4_f32(float %l) {
+; GCN-LABEL: v_powi_4_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 4)
+  ret float %res
+}
+
+define float @v_powi_8_f32(float %l) {
+; GCN-LABEL: v_powi_8_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 8)
+  ret float %res
+}
+
+define float @v_powi_16_f32(float %l) {
+; GCN-LABEL: v_powi_16_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 16)
+  ret float %res
+}
+
+define float @v_powi_128_f32(float %l) {
+; GCN-LABEL: v_powi_128_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 128)
+  ret float %res
+}
+
+define float @v_powi_neg128_f32(float %l) {
+; GFX7-LABEL: v_powi_neg128_f32:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX7-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX7-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
+; GFX7-NEXT:    v_rcp_f32_e32 v2, v1
+; GFX7-NEXT:    v_fma_f32 v3, -v1, v2, 1.0
+; GFX7-NEXT:    v_fma_f32 v2, v3, v2, v2
+; GFX7-NEXT:    v_div_scale_f32 v3, vcc, 1.0, v0, 1.0
+; GFX7-NEXT:    v_mul_f32_e32 v4, v3, v2
+; GFX7-NEXT:    v_fma_f32 v5, -v1, v4, v3
+; GFX7-NEXT:    v_fma_f32 v4, v5, v2, v4
+; GFX7-NEXT:    v_fma_f32 v1, -v1, v4, v3
+; GFX7-NEXT:    v_div_fmas_f32 v1, v1, v2, v4
+; GFX7-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
+; GFX7-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: v_powi_neg128_f32:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX8-NEXT:    v_mul_f32_e32 v0, v0, v0
+; GFX8-NEXT:    v_div_scale_f32 v1, s[4:5], v0, v0, 1.0
+; GFX8-NEXT:    v_div_scale_f32 v2, vcc, 1.0, v0, 1.0
+; GFX8-NEXT:    v_rcp_f32_e32 v3, v1
+; GFX8-NEXT:    v_fma_f32 v4, -v1, v3, 1.0
+; GFX8-NEXT:    v_fma_f32 v3, v4, v3, v3
+; GFX8-NEXT:    v_mul_f32_e32 v4, v2, v3
+; GFX8-NEXT:    v_fma_f32 v5, -v1, v4, v2
+; GFX8-NEXT:    v_fma_f32 v4, v5, v3, v4
+; GFX8-NEXT:    v_fma_f32 v1, -v1, v4, v2
+; GFX8-NEXT:    v_div_fmas_f32 v1, v1, v3, v4
+; GFX8-NEXT:    v_div_fixup_f32 v0, v1, v0, 1.0
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+  %res = call float @llvm.powi.f32(float %l, i32 -128)
+  ret float %res
+}
+
+; FIXME: f64 broken
+; define double @v_powi_f64(double %l, i32 %r) {
+;   %res = call double @llvm.powi.f64(double %l, i32 %r)
+;   ret double %res
+; }
+
+declare half @llvm.powi.f16(half, i32) #0
+declare float @llvm.powi.f32(float, i32) #0
+declare double @llvm.powi.f64(double, i32) #0
+
+attributes #0 = { nounwind readnone speculatable willreturn }

From acbc6882632c3cf6120d936ae110996949b708d2 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Fri, 17 Jul 2020 09:48:27 -0400
Subject: [PATCH 644/771] [InstSimplify] add tests for fcmp with infinity; NFC

---
 .../InstSimplify/floating-point-compare.ll    | 68 +++++++++++++++++++
 1 file changed, 68 insertions(+)

diff --git a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
index 0cee7b3ce20fa..4f7334762f536 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
@@ -1038,3 +1038,71 @@ define <2 x i1> @unorderedCompareWithNaNVector_undef_elt(<2 x double> %A) {
   %cmp = fcmp ult <2 x double> %A, <double undef, double 0xFFFFFFFFFFFFFFFF>
   ret <2 x i1> %cmp
 }
+
+define i1 @is_infinite(float %x) {
+; CHECK-LABEL: @is_infinite(
+; CHECK-NEXT:    [[XABS:%.*]] = call ninf float @llvm.fabs.f32(float [[X:%.*]])
+; CHECK-NEXT:    [[R:%.*]] = fcmp oeq float [[XABS]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xabs = call ninf float @llvm.fabs.f32(float %x)
+  %r = fcmp oeq float %xabs, 0x7FF0000000000000
+  ret i1 %r
+}
+
+define <2 x i1> @is_infinite_neg(<2 x float> %x) {
+; CHECK-LABEL: @is_infinite_neg(
+; CHECK-NEXT:    [[X42:%.*]] = fadd ninf <2 x float> [[X:%.*]], <float 4.200000e+01, float 4.200000e+01>
+; CHECK-NEXT:    [[R:%.*]] = fcmp oeq <2 x float> [[X42]], <float 0xFFF0000000000000, float 0xFFF0000000000000>
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %x42 = fadd ninf <2 x float> %x, <float 42.0, float 42.0>
+  %r = fcmp oeq <2 x float> %x42, <float 0xFFF0000000000000, float 0xFFF0000000000000>
+  ret <2 x i1> %r
+}
+
+define i1 @is_infinite_or_nan(float %x) {
+; CHECK-LABEL: @is_infinite_or_nan(
+; CHECK-NEXT:    [[X42:%.*]] = fadd ninf float [[X:%.*]], 4.200000e+01
+; CHECK-NEXT:    [[R:%.*]] = fcmp ueq float [[X42]], 0xFFF0000000000000
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %x42 = fadd ninf float %x, 42.0
+  %r = fcmp ueq float %x42, 0xFFF0000000000000
+  ret i1 %r
+}
+
+define i1 @is_finite_or_nan(i1 %c, double %x) {
+; CHECK-LABEL: @is_finite_or_nan(
+; CHECK-NEXT:    [[XX:%.*]] = fmul ninf double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[S:%.*]] = select i1 [[C:%.*]], double 4.200000e+01, double [[XX]]
+; CHECK-NEXT:    [[R:%.*]] = fcmp une double [[S]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xx = fmul ninf double %x, %x
+  %s = select i1 %c, double 42.0, double %xx
+  %r = fcmp une double %s, 0x7FF0000000000000
+  ret i1 %r
+}
+
+define <2 x i1> @is_finite_or_nan_commute(<2 x i8> %x) {
+; CHECK-LABEL: @is_finite_or_nan_commute(
+; CHECK-NEXT:    [[CAST:%.*]] = uitofp <2 x i8> [[X:%.*]] to <2 x float>
+; CHECK-NEXT:    [[R:%.*]] = fcmp une <2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>, [[CAST]]
+; CHECK-NEXT:    ret <2 x i1> [[R]]
+;
+  %cast = uitofp <2 x i8> %x to <2 x float>
+  %r = fcmp une <2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>, %cast
+  ret <2 x i1> %r
+}
+
+define i1 @is_finite_and_ordered(double %x) {
+; CHECK-LABEL: @is_finite_and_ordered(
+; CHECK-NEXT:    [[XX:%.*]] = fmul ninf double [[X:%.*]], [[X]]
+; CHECK-NEXT:    [[R:%.*]] = fcmp one double [[XX]], 0x7FF0000000000000
+; CHECK-NEXT:    ret i1 [[R]]
+;
+  %xx = fmul ninf double %x, %x
+  %r = fcmp one double %xx, 0x7FF0000000000000
+  ret i1 %r
+}

From 1afd889d0b4352ddaef33680ace94045938591f1 Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Fri, 17 Jul 2020 11:51:08 -0400
Subject: [PATCH 645/771] [gn build] Make sync_source_lists_from_cmake handle
 one-line sources lines

sync_source_lists_from_cmake now also looks for source files in
`sources += [ "foo.cc" ]` lines, which allows us to remove most
`# Make `gn format` not collapse this` comments.

(sync_source_lists_from_cmake doesn't look for `foo_headers += [...]`
still, so the comment is still needed in two places for that.)

No intentional behavior change.
---
 libcxxabi/src/CMakeLists.txt                  |  4 ++-
 .../gn/build/sync_source_lists_from_cmake.py  |  5 ++++
 .../clang-apply-replacements/BUILD.gn         |  5 +---
 .../clang-apply-replacements/tool/BUILD.gn    |  5 +---
 .../clang-change-namespace/BUILD.gn           |  5 +---
 .../clang-change-namespace/tool/BUILD.gn      |  5 +---
 .../clang-tools-extra/clang-doc/tool/BUILD.gn |  5 +---
 .../find-all-symbols/tool/BUILD.gn            |  5 +---
 .../clang-include-fixer/plugin/BUILD.gn       |  5 +---
 .../clang-include-fixer/tool/BUILD.gn         |  5 +---
 .../clang-move/tool/BUILD.gn                  |  5 +---
 .../clang-query/tool/BUILD.gn                 |  5 +---
 .../clang-reorder-fields/BUILD.gn             |  5 +---
 .../clang-reorder-fields/tool/BUILD.gn        |  5 +---
 .../clang-tidy/plugin/BUILD.gn                |  5 +---
 .../clang-tidy/tool/BUILD.gn                  |  1 -
 .../clangd/index/dex/dexp/BUILD.gn            |  5 +---
 .../clangd/index/remote/BUILD.gn              |  5 +---
 .../index/remote/unimplemented/BUILD.gn       |  5 +---
 .../clang-tools-extra/clangd/indexer/BUILD.gn |  5 +---
 .../clang-tools-extra/clangd/tool/BUILD.gn    |  5 +---
 .../clangd/unittests/xpc/BUILD.gn             |  5 +---
 .../clang-tools-extra/clangd/xpc/BUILD.gn     | 10 ++-----
 .../clangd/xpc/framework/BUILD.gn             |  5 +---
 .../clangd/xpc/test-client/BUILD.gn           |  5 +---
 .../clang-apply-replacements/BUILD.gn         |  5 +---
 .../unittests/clang-change-namespace/BUILD.gn |  5 +---
 .../find-all-symbols/BUILD.gn                 |  5 +---
 .../unittests/clang-move/BUILD.gn             |  5 +---
 .../gn/secondary/clang/lib/CrossTU/BUILD.gn   |  5 +---
 .../secondary/clang/lib/FrontendTool/BUILD.gn |  5 +---
 .../gn/secondary/clang/lib/Testing/BUILD.gn   |  5 +---
 .../clang/lib/Tooling/ASTDiff/BUILD.gn        |  5 +---
 .../secondary/clang/tools/arcmt-test/BUILD.gn |  5 +---
 .../clang/tools/c-arcmt-test/BUILD.gn         |  5 +---
 .../clang/tools/clang-check/BUILD.gn          |  5 +---
 .../secondary/clang/tools/clang-diff/BUILD.gn |  5 +---
 .../clang/tools/clang-extdef-mapping/BUILD.gn |  5 +---
 .../clang/tools/clang-format/BUILD.gn         |  5 +---
 .../clang/tools/clang-import-test/BUILD.gn    |  5 +---
 .../tools/clang-offload-bundler/BUILD.gn      |  5 +---
 .../tools/clang-offload-wrapper/BUILD.gn      |  5 +---
 .../clang/tools/clang-rename/BUILD.gn         |  5 +---
 .../clang/tools/clang-scan-deps/BUILD.gn      |  5 +---
 .../clang/unittests/CrossTU/BUILD.gn          |  5 +---
 .../secondary/clang/unittests/Index/BUILD.gn  |  5 +---
 .../clang/unittests/Serialization/BUILD.gn    |  5 +---
 .../clang/unittests/libclang/BUILD.gn         |  5 +---
 .../unittests/libclang/CrashTests/BUILD.gn    |  5 +---
 .../secondary/compiler-rt/lib/asan/BUILD.gn   | 10 ++-----
 .../compiler-rt/lib/builtins/BUILD.gn         | 28 ++++---------------
 .../gn/secondary/compiler-rt/lib/cfi/BUILD.gn |  5 +---
 .../secondary/compiler-rt/lib/hwasan/BUILD.gn |  5 +---
 .../compiler-rt/lib/sanitizer_common/BUILD.gn |  5 +---
 .../compiler-rt/lib/scudo/standalone/BUILD.gn | 10 ++-----
 .../lib/scudo/standalone/benchmarks/BUILD.gn  |  5 +---
 llvm/utils/gn/secondary/libcxx/src/BUILD.gn   | 15 ++--------
 .../utils/gn/secondary/libcxxabi/src/BUILD.gn | 10 ++-----
 .../utils/gn/secondary/libunwind/src/BUILD.gn |  5 +---
 llvm/utils/gn/secondary/lld/MinGW/BUILD.gn    |  5 +---
 .../gn/secondary/lld/lib/Driver/BUILD.gn      |  5 +---
 .../secondary/lld/lib/ReaderWriter/BUILD.gn   |  5 +---
 .../lld/lib/ReaderWriter/YAML/BUILD.gn        |  5 +---
 .../utils/gn/secondary/lld/tools/lld/BUILD.gn |  5 +---
 .../lld/unittests/DriverTests/BUILD.gn        |  5 +---
 .../llvm/lib/Bitstream/Reader/BUILD.gn        |  5 +---
 .../llvm/lib/ExecutionEngine/MCJIT/BUILD.gn   |  5 +---
 .../gn/secondary/llvm/lib/Extensions/BUILD.gn |  5 +---
 .../gn/secondary/llvm/lib/IRReader/BUILD.gn   |  5 +---
 .../gn/secondary/llvm/lib/LineEditor/BUILD.gn |  5 +---
 .../lib/Target/AArch64/AsmParser/BUILD.gn     |  5 +---
 .../lib/Target/AArch64/TargetInfo/BUILD.gn    |  5 +---
 .../llvm/lib/Target/AArch64/Utils/BUILD.gn    |  5 +---
 .../llvm/lib/Target/AMDGPU/AsmParser/BUILD.gn |  5 +---
 .../lib/Target/AMDGPU/Disassembler/BUILD.gn   |  5 +---
 .../lib/Target/AMDGPU/TargetInfo/BUILD.gn     |  5 +---
 .../llvm/lib/Target/ARM/AsmParser/BUILD.gn    |  5 +---
 .../llvm/lib/Target/ARM/Disassembler/BUILD.gn |  5 +---
 .../llvm/lib/Target/ARM/TargetInfo/BUILD.gn   |  5 +---
 .../llvm/lib/Target/ARM/Utils/BUILD.gn        |  5 +---
 .../llvm/lib/Target/AVR/AsmParser/BUILD.gn    |  5 +---
 .../llvm/lib/Target/AVR/Disassembler/BUILD.gn |  5 +---
 .../llvm/lib/Target/AVR/TargetInfo/BUILD.gn   |  5 +---
 .../llvm/lib/Target/BPF/AsmParser/BUILD.gn    |  5 +---
 .../llvm/lib/Target/BPF/Disassembler/BUILD.gn |  5 +---
 .../llvm/lib/Target/BPF/TargetInfo/BUILD.gn   |  5 +---
 .../lib/Target/Hexagon/AsmParser/BUILD.gn     |  5 +---
 .../lib/Target/Hexagon/Disassembler/BUILD.gn  |  5 +---
 .../lib/Target/Hexagon/TargetInfo/BUILD.gn    |  5 +---
 .../llvm/lib/Target/Lanai/AsmParser/BUILD.gn  |  5 +---
 .../lib/Target/Lanai/Disassembler/BUILD.gn    |  5 +---
 .../llvm/lib/Target/Lanai/TargetInfo/BUILD.gn |  5 +---
 .../llvm/lib/Target/Mips/AsmParser/BUILD.gn   |  5 +---
 .../lib/Target/Mips/Disassembler/BUILD.gn     |  5 +---
 .../llvm/lib/Target/Mips/TargetInfo/BUILD.gn  |  5 +---
 .../llvm/lib/Target/NVPTX/TargetInfo/BUILD.gn |  5 +---
 .../lib/Target/PowerPC/AsmParser/BUILD.gn     |  5 +---
 .../lib/Target/PowerPC/Disassembler/BUILD.gn  |  5 +---
 .../lib/Target/PowerPC/TargetInfo/BUILD.gn    |  5 +---
 .../llvm/lib/Target/RISCV/AsmParser/BUILD.gn  |  5 +---
 .../lib/Target/RISCV/Disassembler/BUILD.gn    |  5 +---
 .../llvm/lib/Target/RISCV/TargetInfo/BUILD.gn |  5 +---
 .../llvm/lib/Target/Sparc/AsmParser/BUILD.gn  |  5 +---
 .../lib/Target/Sparc/Disassembler/BUILD.gn    |  5 +---
 .../llvm/lib/Target/Sparc/TargetInfo/BUILD.gn |  5 +---
 .../lib/Target/SystemZ/AsmParser/BUILD.gn     |  5 +---
 .../lib/Target/SystemZ/Disassembler/BUILD.gn  |  5 +---
 .../lib/Target/SystemZ/TargetInfo/BUILD.gn    |  5 +---
 .../lib/Target/WebAssembly/AsmParser/BUILD.gn |  5 +---
 .../Target/WebAssembly/Disassembler/BUILD.gn  |  5 +---
 .../Target/WebAssembly/TargetInfo/BUILD.gn    |  5 +---
 .../llvm/lib/Target/X86/AsmParser/BUILD.gn    |  5 +---
 .../llvm/lib/Target/X86/Disassembler/BUILD.gn |  5 +---
 .../llvm/lib/Target/X86/TargetInfo/BUILD.gn   |  5 +---
 .../lib/ToolDrivers/llvm-dlltool/BUILD.gn     |  5 +---
 .../llvm/lib/ToolDrivers/llvm-lib/BUILD.gn    |  5 +---
 .../llvm/lib/Transforms/CFGuard/BUILD.gn      |  5 +---
 .../llvm/lib/Transforms/Hello/BUILD.gn        |  5 +---
 .../llvm/lib/WindowsManifest/BUILD.gn         |  5 +---
 .../llvm/tools/bugpoint-passes/BUILD.gn       |  5 +---
 .../gn/secondary/llvm/tools/llc/BUILD.gn      |  5 +---
 .../gn/secondary/llvm/tools/lli/BUILD.gn      |  5 +---
 .../llvm/tools/lli/ChildTarget/BUILD.gn       |  5 +---
 .../gn/secondary/llvm/tools/llvm-ar/BUILD.gn  |  5 +---
 .../gn/secondary/llvm/tools/llvm-as/BUILD.gn  |  5 +---
 .../llvm/tools/llvm-bcanalyzer/BUILD.gn       |  5 +---
 .../gn/secondary/llvm/tools/llvm-cat/BUILD.gn |  5 +---
 .../llvm/tools/llvm-cfi-verify/BUILD.gn       |  5 +---
 .../secondary/llvm/tools/llvm-config/BUILD.gn |  5 +---
 .../secondary/llvm/tools/llvm-cvtres/BUILD.gn |  5 +---
 .../llvm/tools/llvm-cxxfilt/BUILD.gn          |  5 +---
 .../secondary/llvm/tools/llvm-cxxmap/BUILD.gn |  5 +---
 .../gn/secondary/llvm/tools/llvm-dis/BUILD.gn |  5 +---
 .../llvm/tools/llvm-exegesis/BUILD.gn         |  5 +---
 .../tools/llvm-exegesis/lib/AArch64/BUILD.gn  |  5 +---
 .../tools/llvm-exegesis/lib/Mips/BUILD.gn     |  5 +---
 .../tools/llvm-exegesis/lib/PowerPC/BUILD.gn  |  5 +---
 .../llvm/tools/llvm-extract/BUILD.gn          |  5 +---
 .../llvm/tools/llvm-gsymutil/BUILD.gn         |  5 +---
 .../gn/secondary/llvm/tools/llvm-ifs/BUILD.gn |  5 +---
 .../llvm/tools/llvm-isel-fuzzer/BUILD.gn      |  5 +---
 .../secondary/llvm/tools/llvm-link/BUILD.gn   |  5 +---
 .../secondary/llvm/tools/llvm-lipo/BUILD.gn   |  5 +---
 .../gn/secondary/llvm/tools/llvm-lto/BUILD.gn |  5 +---
 .../secondary/llvm/tools/llvm-lto2/BUILD.gn   |  5 +---
 .../llvm/tools/llvm-modextract/BUILD.gn       |  5 +---
 .../gn/secondary/llvm/tools/llvm-mt/BUILD.gn  |  5 +---
 .../gn/secondary/llvm/tools/llvm-nm/BUILD.gn  |  5 +---
 .../llvm/tools/llvm-opt-fuzzer/BUILD.gn       |  5 +---
 .../llvm/tools/llvm-opt-report/BUILD.gn       |  5 +---
 .../llvm/tools/llvm-profdata/BUILD.gn         |  5 +---
 .../secondary/llvm/tools/llvm-rtdyld/BUILD.gn |  5 +---
 .../secondary/llvm/tools/llvm-size/BUILD.gn   |  5 +---
 .../secondary/llvm/tools/llvm-split/BUILD.gn  |  5 +---
 .../llvm/tools/llvm-strings/BUILD.gn          |  5 +---
 .../llvm/tools/llvm-symbolizer/BUILD.gn       |  5 +---
 .../llvm/tools/llvm-undname/BUILD.gn          |  5 +---
 .../gn/secondary/llvm/tools/sancov/BUILD.gn   |  5 +---
 .../gn/secondary/llvm/tools/sanstats/BUILD.gn |  5 +---
 .../llvm/tools/verify-uselistorder/BUILD.gn   |  5 +---
 .../gn/secondary/llvm/tools/yaml2obj/BUILD.gn |  5 +---
 .../llvm/unittests/AsmParser/BUILD.gn         |  5 +---
 .../llvm/unittests/DebugInfo/GSYM/BUILD.gn    |  5 +---
 .../llvm/unittests/ExecutionEngine/BUILD.gn   |  5 +---
 .../ExecutionEngine/JITLink/BUILD.gn          |  5 +---
 .../llvm/unittests/Frontend/BUILD.gn          |  2 --
 .../llvm/unittests/LineEditor/BUILD.gn        |  5 +---
 .../secondary/llvm/unittests/Linker/BUILD.gn  |  5 +---
 .../llvm/unittests/MC/AMDGPU/BUILD.gn         |  5 +---
 .../gn/secondary/llvm/unittests/MI/BUILD.gn   |  5 +---
 .../secondary/llvm/unittests/Option/BUILD.gn  |  5 +---
 .../secondary/llvm/unittests/Passes/BUILD.gn  | 10 ++-----
 .../unittests/Support/DynamicLibrary/BUILD.gn |  5 +---
 .../llvm/unittests/Target/AMDGPU/BUILD.gn     |  5 +---
 .../llvm/unittests/Target/ARM/BUILD.gn        |  5 +---
 .../llvm/unittests/Target/PowerPC/BUILD.gn    |  5 +---
 .../unittests/Target/WebAssembly/BUILD.gn     |  5 +---
 .../llvm/unittests/Target/X86/BUILD.gn        |  5 +---
 .../tools/llvm-exegesis/AArch64/BUILD.gn      |  5 +---
 .../tools/llvm-exegesis/ARM/BUILD.gn          |  5 +---
 .../secondary/llvm/utils/FileCheck/BUILD.gn   |  5 +---
 .../gn/secondary/llvm/utils/count/BUILD.gn    |  5 +---
 .../gn/secondary/llvm/utils/not/BUILD.gn      |  5 +---
 .../llvm/utils/unittest/UnitTestMain/BUILD.gn |  5 +---
 .../secondary/llvm/utils/yaml-bench/BUILD.gn  |  5 +---
 185 files changed, 200 insertions(+), 775 deletions(-)

diff --git a/libcxxabi/src/CMakeLists.txt b/libcxxabi/src/CMakeLists.txt
index 42bec421d2be7..0503569115286 100644
--- a/libcxxabi/src/CMakeLists.txt
+++ b/libcxxabi/src/CMakeLists.txt
@@ -20,7 +20,9 @@ set(LIBCXXABI_SOURCES
 )
 
 if (LIBCXXABI_ENABLE_NEW_DELETE_DEFINITIONS)
-  list(APPEND LIBCXXABI_SOURCES stdlib_new_delete.cpp)
+  list(APPEND LIBCXXABI_SOURCES
+    stdlib_new_delete.cpp
+  )
 endif()
 
 if (LIBCXXABI_ENABLE_EXCEPTIONS)
diff --git a/llvm/utils/gn/build/sync_source_lists_from_cmake.py b/llvm/utils/gn/build/sync_source_lists_from_cmake.py
index 46a779ef376c7..b88b3b8007712 100755
--- a/llvm/utils/gn/build/sync_source_lists_from_cmake.py
+++ b/llvm/utils/gn/build/sync_source_lists_from_cmake.py
@@ -59,6 +59,10 @@ def git_out(args):
 
     # Matches e.g. |   "foo.cpp",|, captures |foo| in group 1.
     gn_cpp_re = re.compile(r'^\s*"([^"]+\.(?:cpp|c|h|S))",$', re.MULTILINE)
+    # Matches e.g. |   bar_sources = [ "foo.cpp" ]|, captures |foo| in group 1.
+    gn_cpp_re2 = re.compile(
+        r'^\s*(?:.*_)?sources \+?= \[ "([^"]+\.(?:cpp|c|h|S))" ]$',
+        re.MULTILINE)
     # Matches e.g. |   foo.cpp|, captures |foo| in group 1.
     cmake_cpp_re = re.compile(r'^\s*([A-Za-z_0-9./-]+\.(?:cpp|c|h|S))$',
                               re.MULTILINE)
@@ -88,6 +92,7 @@ def find_gitrev(touched_line, in_file):
         def get_sources(source_re, text):
             return set([m.group(1) for m in source_re.finditer(text)])
         gn_cpp = get_sources(gn_cpp_re, open(gn_file).read())
+        gn_cpp |= get_sources(gn_cpp_re2, open(gn_file).read())
         cmake_cpp = get_sources(cmake_cpp_re, open(cmake_file).read())
 
         if gn_cpp == cmake_cpp:
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-apply-replacements/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-apply-replacements/BUILD.gn
index 81d339cf5c08d..9a43105dc6ae8 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-apply-replacements/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-apply-replacements/BUILD.gn
@@ -10,8 +10,5 @@ static_library("clang-apply-replacements") {
     "//llvm/lib/Support",
   ]
   include_dirs = [ "include" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "lib/Tooling/ApplyReplacements.cpp",
-  ]
+  sources = [ "lib/Tooling/ApplyReplacements.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-apply-replacements/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-apply-replacements/tool/BUILD.gn
index f703f9ac0c6e5..6bb86188c5041 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-apply-replacements/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-apply-replacements/tool/BUILD.gn
@@ -10,8 +10,5 @@ executable("clang-apply-replacements") {
     "//llvm/lib/Support",
   ]
   include_dirs = [ "../include" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangApplyReplacementsMain.cpp",
-  ]
+  sources = [ "ClangApplyReplacementsMain.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-change-namespace/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-change-namespace/BUILD.gn
index 982f1d4cec815..f834047324899 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-change-namespace/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-change-namespace/BUILD.gn
@@ -13,8 +13,5 @@ static_library("clang-change-namespace") {
     "//clang/lib/Tooling/Core",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ChangeNamespace.cpp",
-  ]
+  sources = [ "ChangeNamespace.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-change-namespace/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-change-namespace/tool/BUILD.gn
index b52c6f85d6e22..01580370b21fb 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-change-namespace/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-change-namespace/tool/BUILD.gn
@@ -13,8 +13,5 @@ executable("clang-change-namespace") {
     "//clang/lib/Tooling/Core",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangChangeNamespace.cpp",
-  ]
+  sources = [ "ClangChangeNamespace.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn
index c3a84bbc2d880..bfc1a96cdab00 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-doc/tool/BUILD.gn
@@ -10,8 +10,5 @@ executable("clang-doc") {
     "//clang/lib/Tooling/Core",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangDocMain.cpp",
-  ]
+  sources = [ "ClangDocMain.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/BUILD.gn
index b4bada949598c..8085641bc776f 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/find-all-symbols/tool/BUILD.gn
@@ -11,8 +11,5 @@ executable("find-all-symbols") {
     "//clang/lib/Tooling",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "FindAllSymbolsMain.cpp",
-  ]
+  sources = [ "FindAllSymbolsMain.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/plugin/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/plugin/BUILD.gn
index 7dc9b25aff57b..498c15ef8850f 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/plugin/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/plugin/BUILD.gn
@@ -12,8 +12,5 @@ static_library("plugin") {
     "//llvm/utils/gn/build/libs/pthread",
   ]
 
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "IncludeFixerPlugin.cpp",
-  ]
+  sources = [ "IncludeFixerPlugin.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/tool/BUILD.gn
index a1e9a92ff9adb..4759feb187537 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-include-fixer/tool/BUILD.gn
@@ -12,8 +12,5 @@ executable("clang-include-fixer") {
     "//clang/lib/Tooling/Core",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangIncludeFixer.cpp",
-  ]
+  sources = [ "ClangIncludeFixer.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-move/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-move/tool/BUILD.gn
index 611d82de48fdb..3e08ba2cbf4c3 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-move/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-move/tool/BUILD.gn
@@ -13,8 +13,5 @@ executable("clang-move") {
     "//clang/lib/Tooling/Core",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangMove.cpp",
-  ]
+  sources = [ "ClangMove.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-query/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-query/tool/BUILD.gn
index 0000e0abb1129..faeb573ebebc4 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-query/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-query/tool/BUILD.gn
@@ -11,8 +11,5 @@ executable("clang-query") {
     "//clang/lib/Tooling",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangQuery.cpp",
-  ]
+  sources = [ "ClangQuery.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-reorder-fields/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-reorder-fields/BUILD.gn
index 4b4c2c3206641..3b13c0517307d 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-reorder-fields/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-reorder-fields/BUILD.gn
@@ -11,8 +11,5 @@ static_library("clang-reorder-fields") {
     "//clang/lib/Tooling/Core",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ReorderFieldsAction.cpp",
-  ]
+  sources = [ "ReorderFieldsAction.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-reorder-fields/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-reorder-fields/tool/BUILD.gn
index 5c16dd206adce..ee8bbc729819a 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-reorder-fields/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-reorder-fields/tool/BUILD.gn
@@ -9,8 +9,5 @@ executable("clang-reorder-fields") {
     "//clang/lib/Tooling",
     "//clang/lib/Tooling/Core",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangReorderFields.cpp",
-  ]
+  sources = [ "ClangReorderFields.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/plugin/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/plugin/BUILD.gn
index f3a830503b50b..4e090d685140f 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/plugin/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/plugin/BUILD.gn
@@ -12,8 +12,5 @@ static_library("plugin") {
     "//clang/lib/Tooling",
   ]
 
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangTidyPlugin.cpp",
-  ]
+  sources = [ "ClangTidyPlugin.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/tool/BUILD.gn
index bfe696cad4632..3f06214498d60 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/tool/BUILD.gn
@@ -14,7 +14,6 @@ executable("clang-tidy") {
     "//llvm/lib/Target:AllTargetsInfos",
   ]
   sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
     "ClangTidyMain.cpp",
     "ClangTidyToolMain.cpp",
   ]
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/dex/dexp/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/dex/dexp/BUILD.gn
index 98ed29388bce5..85b4e52f1b1ab 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/dex/dexp/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/dex/dexp/BUILD.gn
@@ -9,8 +9,5 @@ executable("dexp") {
     "//llvm/lib/Support",
   ]
   include_dirs = [ "../../.." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "Dexp.cpp",
-  ]
+  sources = [ "Dexp.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/BUILD.gn
index de3c8f8b395bb..6bb246d894ee5 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/BUILD.gn
@@ -8,9 +8,6 @@ source_set("remote") {
   include_dirs = [ "../.." ]
   sources = []
   if (false) {
-    sources += [
-      # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-      "Client.cpp",
-    ]
+    sources += [ "Client.cpp" ]
   }
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/unimplemented/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/unimplemented/BUILD.gn
index 3343fc9c315bb..708e0f54a0861 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/unimplemented/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/index/remote/unimplemented/BUILD.gn
@@ -6,8 +6,5 @@ source_set("unimplemented") {
     "//llvm/lib/Support",
   ]
   include_dirs = [ "../../.." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "UnimplementedClient.cpp",
-  ]
+  sources = [ "UnimplementedClient.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/indexer/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/indexer/BUILD.gn
index 504b00c5808f5..15ce87eaf894e 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/indexer/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/indexer/BUILD.gn
@@ -11,8 +11,5 @@ executable("clangd-indexer") {
     "//llvm/lib/Support",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "IndexerMain.cpp",
-  ]
+  sources = [ "IndexerMain.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn
index 94047e3997d74..6d535215156bf 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/tool/BUILD.gn
@@ -25,8 +25,5 @@ executable("clangd") {
   }
 
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangdMain.cpp",
-  ]
+  sources = [ "ClangdMain.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/xpc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/xpc/BUILD.gn
index bf373e351836b..d87d3d2648712 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/xpc/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/unittests/xpc/BUILD.gn
@@ -9,8 +9,5 @@ unittest("ClangdXpcTests") {
     "//llvm/lib/Testing/Support",
   ]
   include_dirs = [ "//clang-tools-extra/clangd" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ConversionTests.cpp",
-  ]
+  sources = [ "ConversionTests.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn
index e15340ec990ac..0d375392ae257 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/BUILD.gn
@@ -7,10 +7,7 @@ static_library("conversions") {
     "//llvm/lib/Support",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "Conversion.cpp",
-  ]
+  sources = [ "Conversion.cpp" ]
 }
 
 static_library("transport") {
@@ -23,8 +20,5 @@ static_library("transport") {
     "//llvm/lib/Support",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "XPCTransport.cpp",
-  ]
+  sources = [ "XPCTransport.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/framework/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/framework/BUILD.gn
index 151d652ec8912..5bc400378f2b4 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/framework/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/framework/BUILD.gn
@@ -63,10 +63,7 @@ bundle_data("clangdxpc_bundle_info_plist") {
 
 shared_library("ClangdXPCLib") {
   deps = [ "//clang-tools-extra/clangd/tool:clangd" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangdXPC.cpp",
-  ]
+  sources = [ "ClangdXPC.cpp" ]
 }
 
 bundle_data("clangdxpc_bundle_executable") {
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/test-client/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/test-client/BUILD.gn
index 40f3aff911a0a..ffb4f69d3d53a 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/test-client/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/xpc/test-client/BUILD.gn
@@ -14,8 +14,5 @@ executable("clangd-xpc-test-client") {
   ]
 
   include_dirs = [ "../.." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangdXPCTestClient.cpp",
-  ]
+  sources = [ "ClangdXPCTestClient.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-apply-replacements/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-apply-replacements/BUILD.gn
index 4582a978899df..cdd30c9f7782d 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-apply-replacements/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-apply-replacements/BUILD.gn
@@ -9,8 +9,5 @@ unittest("ClangApplyReplacementsTests") {
     "//clang/lib/Tooling/Refactoring",
   ]
   include_dirs = [ "//clang-tools-extra/clang-apply-replacements/include" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ApplyReplacementsTest.cpp",
-  ]
+  sources = [ "ApplyReplacementsTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-change-namespace/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-change-namespace/BUILD.gn
index 2b6123ee10505..1e1161bc85c0c 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-change-namespace/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-change-namespace/BUILD.gn
@@ -21,8 +21,5 @@ unittest("ClangChangeNamespaceTests") {
     # We'd like clang/unittests/Tooling/RewriterTestContext.h in the test.
     "//clang",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ChangeNamespaceTests.cpp",
-  ]
+  sources = [ "ChangeNamespaceTests.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-include-fixer/find-all-symbols/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-include-fixer/find-all-symbols/BUILD.gn
index 47f23186d10d4..87bfaa9c6f362 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-include-fixer/find-all-symbols/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-include-fixer/find-all-symbols/BUILD.gn
@@ -14,8 +14,5 @@ unittest("FindAllSymbolsTests") {
     "//llvm/lib/Support",
   ]
   include_dirs = [ "//clang-tools-extra/clang-include-fixer/find-all-symbols" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "FindAllSymbolsTests.cpp",
-  ]
+  sources = [ "FindAllSymbolsTests.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-move/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-move/BUILD.gn
index 217c820c8409a..5fa983f5b7d3a 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-move/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/unittests/clang-move/BUILD.gn
@@ -21,8 +21,5 @@ unittest("ClangMoveTests") {
     # We'd like clang/unittests/Tooling/RewriterTestContext.h in the test.
     "//clang",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangMoveTests.cpp",
-  ]
+  sources = [ "ClangMoveTests.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/lib/CrossTU/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/CrossTU/BUILD.gn
index 83411bac3eada..0f5cffaf451a8 100644
--- a/llvm/utils/gn/secondary/clang/lib/CrossTU/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/CrossTU/BUILD.gn
@@ -8,8 +8,5 @@ static_library("CrossTU") {
     "//clang/lib/Index",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "CrossTranslationUnit.cpp",
-  ]
+  sources = [ "CrossTranslationUnit.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/lib/FrontendTool/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/FrontendTool/BUILD.gn
index cd78388850675..5f8f021591583 100644
--- a/llvm/utils/gn/secondary/clang/lib/FrontendTool/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/FrontendTool/BUILD.gn
@@ -23,8 +23,5 @@ static_library("FrontendTool") {
   if (clang_enable_static_analyzer) {
     deps += [ "//clang/lib/StaticAnalyzer/Frontend" ]
   }
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ExecuteCompilerInvocation.cpp",
-  ]
+  sources = [ "ExecuteCompilerInvocation.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/lib/Testing/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Testing/BUILD.gn
index 2a00a2248babe..727cc5b937dd3 100644
--- a/llvm/utils/gn/secondary/clang/lib/Testing/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Testing/BUILD.gn
@@ -4,8 +4,5 @@ static_library("Testing") {
   deps = [
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "CommandLineArgs.cpp",
-  ]
+  sources = [ "CommandLineArgs.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/lib/Tooling/ASTDiff/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Tooling/ASTDiff/BUILD.gn
index e563d444b7dca..3c1d001079a31 100644
--- a/llvm/utils/gn/secondary/clang/lib/Tooling/ASTDiff/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Tooling/ASTDiff/BUILD.gn
@@ -7,8 +7,5 @@ static_library("ASTDiff") {
     "//clang/lib/Lex",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ASTDiff.cpp",
-  ]
+  sources = [ "ASTDiff.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/arcmt-test/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/arcmt-test/BUILD.gn
index 433ab82d08f88..8d7fece1bc024 100644
--- a/llvm/utils/gn/secondary/clang/tools/arcmt-test/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/arcmt-test/BUILD.gn
@@ -7,8 +7,5 @@ executable("arcmt-test") {
     "//clang/lib/Lex",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "arcmt-test.cpp",
-  ]
+  sources = [ "arcmt-test.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/c-arcmt-test/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/c-arcmt-test/BUILD.gn
index d77a8a759db70..a1c8fe10b5904 100644
--- a/llvm/utils/gn/secondary/clang/tools/c-arcmt-test/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/c-arcmt-test/BUILD.gn
@@ -1,10 +1,7 @@
 executable("c-arcmt-test") {
   configs += [ "//llvm/utils/gn/build:clang_code" ]
   deps = [ "//clang/tools/libclang" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "c-arcmt-test.c",
-  ]
+  sources = [ "c-arcmt-test.c" ]
 
   # See comment at top of clang/tools/libclang/BUILD.gn for why this isn't
   # needed on Linux.
diff --git a/llvm/utils/gn/secondary/clang/tools/clang-check/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-check/BUILD.gn
index 4bd97efdc69ec..cc2e46dc0bcf9 100644
--- a/llvm/utils/gn/secondary/clang/tools/clang-check/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/clang-check/BUILD.gn
@@ -12,8 +12,5 @@ executable("clang-check") {
     "//llvm/lib/Support",
     "//llvm/lib/Target:TargetsToBuild",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangCheck.cpp",
-  ]
+  sources = [ "ClangCheck.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/clang-diff/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-diff/BUILD.gn
index 66396992f2412..cbf40be649f3d 100644
--- a/llvm/utils/gn/secondary/clang/tools/clang-diff/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/clang-diff/BUILD.gn
@@ -7,8 +7,5 @@ executable("clang-diff") {
     "//clang/lib/Tooling/ASTDiff",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangDiff.cpp",
-  ]
+  sources = [ "ClangDiff.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/clang-extdef-mapping/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-extdef-mapping/BUILD.gn
index 1fb36e5dae6b2..b412840cc9792 100644
--- a/llvm/utils/gn/secondary/clang/tools/clang-extdef-mapping/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/clang-extdef-mapping/BUILD.gn
@@ -11,8 +11,5 @@ executable("clang-extdef-mapping") {
     "//llvm/lib/MC",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangExtDefMapGen.cpp",
-  ]
+  sources = [ "ClangExtDefMapGen.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/clang-format/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-format/BUILD.gn
index 63e1618981075..55cbb98c7a061 100644
--- a/llvm/utils/gn/secondary/clang/tools/clang-format/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/clang-format/BUILD.gn
@@ -8,8 +8,5 @@ executable("clang-format") {
     "//clang/lib/Tooling/Core",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangFormat.cpp",
-  ]
+  sources = [ "ClangFormat.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/clang-import-test/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-import-test/BUILD.gn
index e169fbd93ca5b..dd09384782d75 100644
--- a/llvm/utils/gn/secondary/clang/tools/clang-import-test/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/clang-import-test/BUILD.gn
@@ -11,8 +11,5 @@ executable("clang-import-test") {
     "//llvm/lib/IR",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "clang-import-test.cpp",
-  ]
+  sources = [ "clang-import-test.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/clang-offload-bundler/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-offload-bundler/BUILD.gn
index 25d077ed63839..9cab746917954 100644
--- a/llvm/utils/gn/secondary/clang/tools/clang-offload-bundler/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/clang-offload-bundler/BUILD.gn
@@ -7,8 +7,5 @@ executable("clang-offload-bundler") {
     "//llvm/lib/Object",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangOffloadBundler.cpp",
-  ]
+  sources = [ "ClangOffloadBundler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/clang-offload-wrapper/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-offload-wrapper/BUILD.gn
index 071f2fc518d2f..660b223f20d2e 100644
--- a/llvm/utils/gn/secondary/clang/tools/clang-offload-wrapper/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/clang-offload-wrapper/BUILD.gn
@@ -7,8 +7,5 @@ executable("clang-offload-wrapper") {
     "//llvm/lib/Support",
     "//llvm/lib/Transforms/Utils",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangOffloadWrapper.cpp",
-  ]
+  sources = [ "ClangOffloadWrapper.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn
index e3f236fe7cef6..1c517b9893451 100644
--- a/llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/clang-rename/BUILD.gn
@@ -10,8 +10,5 @@ executable("clang-rename") {
     "//llvm/lib/Option",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangRename.cpp",
-  ]
+  sources = [ "ClangRename.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/clang-scan-deps/BUILD.gn b/llvm/utils/gn/secondary/clang/tools/clang-scan-deps/BUILD.gn
index 2650be873cde9..a97d84ec508e3 100644
--- a/llvm/utils/gn/secondary/clang/tools/clang-scan-deps/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/tools/clang-scan-deps/BUILD.gn
@@ -15,8 +15,5 @@ executable("clang-scan-deps") {
     "//llvm/lib/IR",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ClangScanDeps.cpp",
-  ]
+  sources = [ "ClangScanDeps.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/unittests/CrossTU/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/CrossTU/BUILD.gn
index b8bf5b0373f23..4f87006e0c2b0 100644
--- a/llvm/utils/gn/secondary/clang/unittests/CrossTU/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/CrossTU/BUILD.gn
@@ -11,8 +11,5 @@ unittest("CrossTUTests") {
     "//llvm/lib/Support",
     "//llvm/lib/Target:TargetsToBuild",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "CrossTranslationUnitTest.cpp",
-  ]
+  sources = [ "CrossTranslationUnitTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/unittests/Index/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Index/BUILD.gn
index 6060313869a83..60f211ffb88bd 100644
--- a/llvm/utils/gn/secondary/clang/unittests/Index/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/Index/BUILD.gn
@@ -12,8 +12,5 @@ unittest("IndexTests") {
     "//clang/lib/Tooling",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "IndexTests.cpp",
-  ]
+  sources = [ "IndexTests.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/unittests/Serialization/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/Serialization/BUILD.gn
index 242603ae41414..ca29912d2f247 100644
--- a/llvm/utils/gn/secondary/clang/unittests/Serialization/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/Serialization/BUILD.gn
@@ -11,8 +11,5 @@ unittest("SerializationTests") {
     "//llvm/lib/Bitcode/Reader",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "InMemoryModuleCacheTest.cpp",
-  ]
+  sources = [ "InMemoryModuleCacheTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/unittests/libclang/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/libclang/BUILD.gn
index 7602c76d67a6d..4f6681aba4299 100644
--- a/llvm/utils/gn/secondary/clang/unittests/libclang/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/libclang/BUILD.gn
@@ -3,10 +3,7 @@ import("//llvm/utils/unittest/unittest.gni")
 unittest("libclangTests") {
   configs += [ "//llvm/utils/gn/build:clang_code" ]
   deps = [ "//clang/tools/libclang" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LibclangTest.cpp",
-  ]
+  sources = [ "LibclangTest.cpp" ]
   if (host_os == "mac") {
     ldflags = [ "-Wl,-rpath," + rebase_path("$root_out_dir/lib") ]
   }
diff --git a/llvm/utils/gn/secondary/clang/unittests/libclang/CrashTests/BUILD.gn b/llvm/utils/gn/secondary/clang/unittests/libclang/CrashTests/BUILD.gn
index a1525e7acac17..37f277d08d59c 100644
--- a/llvm/utils/gn/secondary/clang/unittests/libclang/CrashTests/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/unittests/libclang/CrashTests/BUILD.gn
@@ -3,10 +3,7 @@ import("//llvm/utils/unittest/unittest.gni")
 unittest("libclangCrashTests") {
   configs += [ "//llvm/utils/gn/build:clang_code" ]
   deps = [ "//clang/tools/libclang" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LibclangCrashTest.cpp",
-  ]
+  sources = [ "LibclangCrashTest.cpp" ]
   if (host_os == "mac") {
     ldflags = [ "-Wl,-rpath," + rebase_path("$root_out_dir/lib") ]
   }
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
index b4d1583306a2a..1f754d07db374 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/asan/BUILD.gn
@@ -3,10 +3,7 @@ import("//compiler-rt/target.gni")
 source_set("cxx_sources") {
   configs -= [ "//llvm/utils/gn/build:llvm_code" ]
   configs += [ "//llvm/utils/gn/build:crt_code" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "asan_new_delete.cpp",
-  ]
+  sources = [ "asan_new_delete.cpp" ]
 }
 
 if (current_os == "mac") {
@@ -103,10 +100,7 @@ target(asan_target_type, "asan") {
     "asan_win.cpp",
   ]
   if (target_os != "mac" && target_os != "win") {
-    sources += [
-      # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-      "asan_interceptors_vfork.S",
-    ]
+    sources += [ "asan_interceptors_vfork.S" ]
   }
 
   # To be able to include sanitizer_common.
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
index 1291a5d33cbb4..e771df5b3e278 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/builtins/BUILD.gn
@@ -173,10 +173,7 @@ static_library("builtins") {
   }
 
   if (current_os != "fuchsia" && !compiler_rt_baremetal_build) {
-    sources += [
-      # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-      "clear_cache.c",
-    ]
+    sources += [ "clear_cache.c" ]
   }
 
   if (current_os == "mac") {
@@ -423,16 +420,9 @@ static_library("builtins") {
   }
 
   if (current_cpu == "arm64") {
-    sources += [
-      # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-      "aarch64/fp_mode.c",
-    ]
+    sources += [ "aarch64/fp_mode.c" ]
     if (current_os == "mingw") {
-      sources += [
-        # Make `gn format` not collapse this, for
-        # sync_source_lists_from_cmake.py.
-        "aarch64/chkstk.S",
-      ]
+      sources += [ "aarch64/chkstk.S" ]
     }
   }
 
@@ -486,11 +476,7 @@ static_library("builtins") {
   }
 
   if (current_cpu == "riscv") {
-    sources += [
-      # This comment prevents `gn format` from putting the file on the same line
-      # as `sources +=`, for sync_source_lists_from_cmake.py.
-      "riscv/mulsi3.S",
-    ]
+    sources += [ "riscv/mulsi3.S" ]
   }
 
   if (current_cpu == "ve") {
@@ -501,11 +487,7 @@ static_library("builtins") {
   }
 
   if (!compiler_rt_exclude_atomic_builtin) {
-    sources += [
-      # This comment prevents `gn format` from putting the file on the same line
-      # as `sources +=`, for sync_source_lists_from_cmake.py.
-      "atomic.c",
-    ]
+    sources += [ "atomic.c" ]
   }
 }
 
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/cfi/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/cfi/BUILD.gn
index 8e34109f89c77..3ff5f3f9a5f9a 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/cfi/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/cfi/BUILD.gn
@@ -3,10 +3,7 @@ import("//clang/resource_dir.gni")
 source_set("sources") {
   configs -= [ "//llvm/utils/gn/build:llvm_code" ]
   configs += [ "//llvm/utils/gn/build:crt_code" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "cfi.cpp",
-  ]
+  sources = [ "cfi.cpp" ]
 }
 
 copy("blacklist") {
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn
index 5b425a47afdc1..75a77622d4632 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/hwasan/BUILD.gn
@@ -70,10 +70,7 @@ source_set("cxx_sources") {
   configs -= [ "//llvm/utils/gn/build:llvm_code" ]
   configs += [ "//llvm/utils/gn/build:crt_code" ]
   deps = [ "//compiler-rt/lib/ubsan:cxx_sources" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "hwasan_new_delete.cpp",
-  ]
+  sources = [ "hwasan_new_delete.cpp" ]
 }
 
 static_library("hwasan") {
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn
index a4042de7708ff..28d8e2da02e55 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/sanitizer_common/BUILD.gn
@@ -170,8 +170,5 @@ source_set("nolibc_sources") {
   configs -= [ "//llvm/utils/gn/build:llvm_code" ]
   configs += [ "//llvm/utils/gn/build:crt_code" ]
   deps = [ "//llvm/utils/gn/build/libs/pthread" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "sanitizer_common_nolibc.cpp",
-  ]
+  sources = [ "sanitizer_common_nolibc.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn
index 382f5e7066385..d6b125a0eb557 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/BUILD.gn
@@ -67,10 +67,7 @@ source_set("c_wrapper_sources") {
   configs -= [ "//llvm/utils/gn/build:llvm_code" ]
   configs += [ "//llvm/utils/gn/build:crt_code" ]
   cflags = scudo_cflags
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "wrappers_c.cpp",
-  ]
+  sources = [ "wrappers_c.cpp" ]
 
   public_configs = [ ":scudo_config" ]
 }
@@ -79,10 +76,7 @@ source_set("cxx_wrapper_sources") {
   configs -= [ "//llvm/utils/gn/build:llvm_code" ]
   configs += [ "//llvm/utils/gn/build:crt_code" ]
   cflags = scudo_cflags
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "wrappers_cpp.cpp",
-  ]
+  sources = [ "wrappers_cpp.cpp" ]
 
   public_configs = [ ":scudo_config" ]
 }
diff --git a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/benchmarks/BUILD.gn b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/benchmarks/BUILD.gn
index d23535ab82d11..879d5a2de8637 100644
--- a/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/benchmarks/BUILD.gn
+++ b/llvm/utils/gn/secondary/compiler-rt/lib/scudo/standalone/benchmarks/BUILD.gn
@@ -1,9 +1,6 @@
 executable("ScudoBenchmarks") {
   configs += [ "//llvm/utils/gn/build:crt_code" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "malloc_benchmark.cpp",
-  ]
+  sources = [ "malloc_benchmark.cpp" ]
   deps = [
     "//compiler-rt/lib/scudo/standalone:sources",
     "//llvm/utils/benchmark",
diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn
index 09d850036ad1b..6dccffa7aa34d 100644
--- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn
@@ -167,10 +167,7 @@ if (target_os == "win") {
   ]
 }
 if (target_os == "solaris") {
-  cxx_sources += [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "support/solaris/xlocale.cpp",
-  ]
+  cxx_sources += [ "support/solaris/xlocale.cpp" ]
 }
 if (libcxx_enable_filesystem) {
   cxx_sources += [
@@ -179,10 +176,7 @@ if (libcxx_enable_filesystem) {
     "filesystem/operations.cpp",
   ]
   if (libcxx_use_compiler_rt) {
-    cxx_sources += [
-      # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-      "filesystem/int128_builtins.cpp",
-    ]
+    cxx_sources += [ "filesystem/int128_builtins.cpp" ]
   }
 }
 
@@ -268,10 +262,7 @@ if (libcxx_enable_experimental) {
   static_library("cxx_experimental") {
     output_dir = runtimes_dir
     output_name = "c++experimental"
-    sources = [
-      # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-      "experimental/memory_resource.cpp",
-    ]
+    sources = [ "experimental/memory_resource.cpp" ]
     configs += [ ":cxx_config" ]
     configs -= [
       "//llvm/utils/gn/build:no_exceptions",
diff --git a/llvm/utils/gn/secondary/libcxxabi/src/BUILD.gn b/llvm/utils/gn/secondary/libcxxabi/src/BUILD.gn
index 8d43581c3669b..3e6063a211753 100644
--- a/llvm/utils/gn/secondary/libcxxabi/src/BUILD.gn
+++ b/llvm/utils/gn/secondary/libcxxabi/src/BUILD.gn
@@ -52,16 +52,10 @@ if (libcxxabi_enable_exceptions) {
     "cxa_personality.cpp",
   ]
 } else {
-  cxxabi_sources += [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "cxa_noexception.cpp",
-  ]
+  cxxabi_sources += [ "cxa_noexception.cpp" ]
 }
 if (target_os == "linux" || target_os == "fuchsia") {
-  cxxabi_sources += [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "cxa_thread_atexit.cpp",
-  ]
+  cxxabi_sources += [ "cxa_thread_atexit.cpp" ]
 }
 
 config("cxxabi_config") {
diff --git a/llvm/utils/gn/secondary/libunwind/src/BUILD.gn b/llvm/utils/gn/secondary/libunwind/src/BUILD.gn
index 7aadfff8a6e88..64c9df55a10f4 100644
--- a/llvm/utils/gn/secondary/libunwind/src/BUILD.gn
+++ b/llvm/utils/gn/secondary/libunwind/src/BUILD.gn
@@ -44,10 +44,7 @@ unwind_sources = [
   "libunwind_ext.h",
 ]
 if (target_os == "mac") {
-  unwind_sources += [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "Unwind_AppleExtras.cpp",
-  ]
+  unwind_sources += [ "Unwind_AppleExtras.cpp" ]
 }
 
 config("unwind_config") {
diff --git a/llvm/utils/gn/secondary/lld/MinGW/BUILD.gn b/llvm/utils/gn/secondary/lld/MinGW/BUILD.gn
index 66dbd56420766..c4dca7b00d691 100644
--- a/llvm/utils/gn/secondary/lld/MinGW/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/MinGW/BUILD.gn
@@ -15,8 +15,5 @@ static_library("MinGW") {
     "//llvm/lib/Option",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "Driver.cpp",
-  ]
+  sources = [ "Driver.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/lld/lib/Driver/BUILD.gn b/llvm/utils/gn/secondary/lld/lib/Driver/BUILD.gn
index a78ea8a57ba3f..2f20b57f43adb 100644
--- a/llvm/utils/gn/secondary/lld/lib/Driver/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/lib/Driver/BUILD.gn
@@ -18,8 +18,5 @@ static_library("Driver") {
     "//llvm/lib/Option",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "DarwinLdDriver.cpp",
-  ]
+  sources = [ "DarwinLdDriver.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/lld/lib/ReaderWriter/BUILD.gn b/llvm/utils/gn/secondary/lld/lib/ReaderWriter/BUILD.gn
index 92c2b9d935af8..295a68e652bde 100644
--- a/llvm/utils/gn/secondary/lld/lib/ReaderWriter/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/lib/ReaderWriter/BUILD.gn
@@ -6,10 +6,7 @@ static_library("ReaderWriter") {
     "//llvm/lib/Object",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "FileArchive.cpp",
-  ]
+  sources = [ "FileArchive.cpp" ]
 
   # FIXME:
   # if (is_msvc) {
diff --git a/llvm/utils/gn/secondary/lld/lib/ReaderWriter/YAML/BUILD.gn b/llvm/utils/gn/secondary/lld/lib/ReaderWriter/YAML/BUILD.gn
index 97fe87743ef5b..c3339efdd9964 100644
--- a/llvm/utils/gn/secondary/lld/lib/ReaderWriter/YAML/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/lib/ReaderWriter/YAML/BUILD.gn
@@ -5,8 +5,5 @@ static_library("YAML") {
     "//lld/lib/Core",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ReaderWriterYAML.cpp",
-  ]
+  sources = [ "ReaderWriterYAML.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/lld/tools/lld/BUILD.gn b/llvm/utils/gn/secondary/lld/tools/lld/BUILD.gn
index 746def906f31a..f93a44fa634cb 100644
--- a/llvm/utils/gn/secondary/lld/tools/lld/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/tools/lld/BUILD.gn
@@ -33,8 +33,5 @@ executable("lld") {
     "//lld/wasm",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "lld.cpp",
-  ]
+  sources = [ "lld.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/lld/unittests/DriverTests/BUILD.gn b/llvm/utils/gn/secondary/lld/unittests/DriverTests/BUILD.gn
index 9768c6ffa2488..e5cc57e8812fa 100644
--- a/llvm/utils/gn/secondary/lld/unittests/DriverTests/BUILD.gn
+++ b/llvm/utils/gn/secondary/lld/unittests/DriverTests/BUILD.gn
@@ -6,8 +6,5 @@ unittest("DriverTests") {
     "//lld/lib/Driver",
     "//lld/lib/ReaderWriter/MachO",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "DarwinLdDriverTest.cpp",
-  ]
+  sources = [ "DarwinLdDriverTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Bitstream/Reader/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Bitstream/Reader/BUILD.gn
index 2acb5fcb135f6..b300be2ca5c2e 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Bitstream/Reader/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Bitstream/Reader/BUILD.gn
@@ -2,8 +2,5 @@ static_library("Reader") {
   output_name = "LLVMBitstreamReader"
   deps = [ "//llvm/lib/Support" ]
 
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "BitstreamReader.cpp",
-  ]
+  sources = [ "BitstreamReader.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/MCJIT/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/MCJIT/BUILD.gn
index 35c7b3b35ec9a..7bbf7c18b9211 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/MCJIT/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ExecutionEngine/MCJIT/BUILD.gn
@@ -8,8 +8,5 @@ static_library("MCJIT") {
     "//llvm/lib/Support",
     "//llvm/lib/Target",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "MCJIT.cpp",
-  ]
+  sources = [ "MCJIT.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn
index 7102024b109d5..dccbd5c2d530a 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Extensions/BUILD.gn
@@ -1,7 +1,4 @@
 static_library("Extensions") {
   output_name = "LLVMExtensions"
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "Extensions.cpp",
-  ]
+  sources = [ "Extensions.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/IRReader/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/IRReader/BUILD.gn
index aa9458638b0ba..9667a26094ead 100644
--- a/llvm/utils/gn/secondary/llvm/lib/IRReader/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/IRReader/BUILD.gn
@@ -6,8 +6,5 @@ static_library("IRReader") {
     "//llvm/lib/IR",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "IRReader.cpp",
-  ]
+  sources = [ "IRReader.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/LineEditor/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/LineEditor/BUILD.gn
index 77d80eb055a4d..56fa79c7bc669 100644
--- a/llvm/utils/gn/secondary/llvm/lib/LineEditor/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/LineEditor/BUILD.gn
@@ -6,8 +6,5 @@ static_library("LineEditor") {
     "//llvm/utils/gn/build/libs/edit",
   ]
 
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LineEditor.cpp",
-  ]
+  sources = [ "LineEditor.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/AsmParser/BUILD.gn
index 902fbfa3eedd0..604a349cb3957 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/AsmParser/BUILD.gn
@@ -21,8 +21,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/AArch64/Utils",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AArch64AsmParser.cpp",
-  ]
+  sources = [ "AArch64AsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/TargetInfo/BUILD.gn
index bfdf408aeecd2..11b8c91242d90 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMAArch64Info"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AArch64TargetInfo.cpp",
-  ]
+  sources = [ "AArch64TargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/Utils/BUILD.gn
index 9f199e032ce28..af71879a1ae74 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/Utils/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AArch64/Utils/BUILD.gn
@@ -20,8 +20,5 @@ static_library("Utils") {
 
   # AArch64BaseInfo.h includes a header from MCTargetDesc :-/
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AArch64BaseInfo.cpp",
-  ]
+  sources = [ "AArch64BaseInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/AsmParser/BUILD.gn
index d707b2633b82c..03b11a2f6fe58 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/AsmParser/BUILD.gn
@@ -18,8 +18,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/AMDGPU/Utils",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AMDGPUAsmParser.cpp",
-  ]
+  sources = [ "AMDGPUAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/Disassembler/BUILD.gn
index fc647321af429..07985dd380e75 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/Disassembler/BUILD.gn
@@ -18,8 +18,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/AMDGPU/Utils",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AMDGPUDisassembler.cpp",
-  ]
+  sources = [ "AMDGPUDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/TargetInfo/BUILD.gn
index 4a53add243ce5..985c0ac7838fa 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AMDGPU/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMAMDGPUInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AMDGPUTargetInfo.cpp",
-  ]
+  sources = [ "AMDGPUTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/ARM/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/ARM/AsmParser/BUILD.gn
index 02088e110713a..e35f7dee54b11 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/ARM/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/ARM/AsmParser/BUILD.gn
@@ -18,8 +18,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/ARM/Utils",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ARMAsmParser.cpp",
-  ]
+  sources = [ "ARMAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/ARM/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/ARM/Disassembler/BUILD.gn
index 1105625b753e5..a8e800531dc20 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/ARM/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/ARM/Disassembler/BUILD.gn
@@ -17,8 +17,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/ARM/Utils",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ARMDisassembler.cpp",
-  ]
+  sources = [ "ARMDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/ARM/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/ARM/TargetInfo/BUILD.gn
index 8e5e8a9844530..47ee26d438a46 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/ARM/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/ARM/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMARMInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ARMTargetInfo.cpp",
-  ]
+  sources = [ "ARMTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/ARM/Utils/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/ARM/Utils/BUILD.gn
index 146b63e217204..c9744b84d503d 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/ARM/Utils/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/ARM/Utils/BUILD.gn
@@ -21,8 +21,5 @@ static_library("Utils") {
   # ARMBaseInfo.h includes a header from MCTargetDesc,
   # https://reviews.llvm.org/D35209#1075113 :-/
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ARMBaseInfo.cpp",
-  ]
+  sources = [ "ARMBaseInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AVR/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AVR/AsmParser/BUILD.gn
index 91bf7b61730ba..8f37f20be4b10 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AVR/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AVR/AsmParser/BUILD.gn
@@ -17,8 +17,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/AVR/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AVRAsmParser.cpp",
-  ]
+  sources = [ "AVRAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AVR/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AVR/Disassembler/BUILD.gn
index 8857fb84a0944..4f7c3acbc8612 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AVR/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AVR/Disassembler/BUILD.gn
@@ -17,8 +17,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/AVR/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AVRDisassembler.cpp",
-  ]
+  sources = [ "AVRDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/AVR/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/AVR/TargetInfo/BUILD.gn
index b08046ef011e9..eb3ab129112a3 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/AVR/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/AVR/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMAVRInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AVRTargetInfo.cpp",
-  ]
+  sources = [ "AVRTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/AsmParser/BUILD.gn
index bd8a77cd00e3e..a6595ac054cd4 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/AsmParser/BUILD.gn
@@ -17,8 +17,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/BPF/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "BPFAsmParser.cpp",
-  ]
+  sources = [ "BPFAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/Disassembler/BUILD.gn
index 3f7dc38e858c8..f47fe7ac28cee 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/Disassembler/BUILD.gn
@@ -16,8 +16,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/BPF/MCTargetDesc",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "BPFDisassembler.cpp",
-  ]
+  sources = [ "BPFDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/TargetInfo/BUILD.gn
index dc87dff47cd38..65022f3ec6317 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/BPF/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/BPF/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMBPFInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "BPFTargetInfo.cpp",
-  ]
+  sources = [ "BPFTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/AsmParser/BUILD.gn
index 326f3e53ee999..b4ea8f3e737be 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/AsmParser/BUILD.gn
@@ -16,8 +16,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/Hexagon/MCTargetDesc",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "HexagonAsmParser.cpp",
-  ]
+  sources = [ "HexagonAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/Disassembler/BUILD.gn
index 012ddb28c78c7..35a5d86c7e135 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/Disassembler/BUILD.gn
@@ -16,8 +16,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/Hexagon/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "HexagonDisassembler.cpp",
-  ]
+  sources = [ "HexagonDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/TargetInfo/BUILD.gn
index e73540463f68c..3fea3a54c8226 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Hexagon/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMHexagonInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "HexagonTargetInfo.cpp",
-  ]
+  sources = [ "HexagonTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/AsmParser/BUILD.gn
index 82af0bd42081d..72c98631be07b 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/AsmParser/BUILD.gn
@@ -16,8 +16,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/Lanai/MCTargetDesc",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LanaiAsmParser.cpp",
-  ]
+  sources = [ "LanaiAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/Disassembler/BUILD.gn
index 35541f42ea46a..3a5ef16b54c76 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/Disassembler/BUILD.gn
@@ -16,8 +16,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/Lanai/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LanaiDisassembler.cpp",
-  ]
+  sources = [ "LanaiDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/TargetInfo/BUILD.gn
index 0a76709ec725b..02fcdcc19f1fa 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Lanai/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMLanaiInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LanaiTargetInfo.cpp",
-  ]
+  sources = [ "LanaiTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Mips/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Mips/AsmParser/BUILD.gn
index 55676914e7f2e..35bb2fd4d6e45 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Mips/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Mips/AsmParser/BUILD.gn
@@ -17,8 +17,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/Mips/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "MipsAsmParser.cpp",
-  ]
+  sources = [ "MipsAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Mips/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Mips/Disassembler/BUILD.gn
index 6552a208ce3cb..72032cfb75600 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Mips/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Mips/Disassembler/BUILD.gn
@@ -16,8 +16,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/Mips/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "MipsDisassembler.cpp",
-  ]
+  sources = [ "MipsDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Mips/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Mips/TargetInfo/BUILD.gn
index f021ab430ad07..5ee9eb3a82588 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Mips/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Mips/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMMipsInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "MipsTargetInfo.cpp",
-  ]
+  sources = [ "MipsTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/NVPTX/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/NVPTX/TargetInfo/BUILD.gn
index 2f3214851f5a4..3c7bf184f761e 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/NVPTX/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/NVPTX/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMNVPTXInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "NVPTXTargetInfo.cpp",
-  ]
+  sources = [ "NVPTXTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/AsmParser/BUILD.gn
index a1197f09c67d1..250ff3ae11cf6 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/AsmParser/BUILD.gn
@@ -17,8 +17,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/PowerPC/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "PPCAsmParser.cpp",
-  ]
+  sources = [ "PPCAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/Disassembler/BUILD.gn
index b8ca1c2c8f813..375b9839fe2a7 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/Disassembler/BUILD.gn
@@ -16,8 +16,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/PowerPC/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "PPCDisassembler.cpp",
-  ]
+  sources = [ "PPCDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/TargetInfo/BUILD.gn
index 8dddf52ee4dcb..df6ee0ed67548 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/PowerPC/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMPowerPCInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "PowerPCTargetInfo.cpp",
-  ]
+  sources = [ "PowerPCTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/AsmParser/BUILD.gn
index 84e802f67060a..5469696c0de3e 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/AsmParser/BUILD.gn
@@ -21,8 +21,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/RISCV/Utils",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "RISCVAsmParser.cpp",
-  ]
+  sources = [ "RISCVAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn
index c0f27d5143e61..09a5a367164a5 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/Disassembler/BUILD.gn
@@ -16,8 +16,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/RISCV/Utils",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "RISCVDisassembler.cpp",
-  ]
+  sources = [ "RISCVDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/TargetInfo/BUILD.gn
index f974c8e6df4a7..5126400dfaebc 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/RISCV/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMRISCVInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "RISCVTargetInfo.cpp",
-  ]
+  sources = [ "RISCVTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/AsmParser/BUILD.gn
index 35d1c68c1405e..64d533a6a6500 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/AsmParser/BUILD.gn
@@ -16,8 +16,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/Sparc/MCTargetDesc",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "SparcAsmParser.cpp",
-  ]
+  sources = [ "SparcAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/Disassembler/BUILD.gn
index 08c5d3bb64afa..f77de6231857b 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/Disassembler/BUILD.gn
@@ -15,8 +15,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/Sparc/MCTargetDesc",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "SparcDisassembler.cpp",
-  ]
+  sources = [ "SparcDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/TargetInfo/BUILD.gn
index e5f548180ecfe..5a26365fc55a1 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/Sparc/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMSparcInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "SparcTargetInfo.cpp",
-  ]
+  sources = [ "SparcTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/AsmParser/BUILD.gn
index 5d4e9b0348bad..5ae08516d032f 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/AsmParser/BUILD.gn
@@ -17,8 +17,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/SystemZ/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "SystemZAsmParser.cpp",
-  ]
+  sources = [ "SystemZAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/Disassembler/BUILD.gn
index 69c009b04d42a..e2e2513dd8810 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/Disassembler/BUILD.gn
@@ -17,8 +17,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/SystemZ/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "SystemZDisassembler.cpp",
-  ]
+  sources = [ "SystemZDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/TargetInfo/BUILD.gn
index 0a17e354015d2..56d47868cfb34 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/SystemZ/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMSystemZInfo"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "SystemZTargetInfo.cpp",
-  ]
+  sources = [ "SystemZTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/AsmParser/BUILD.gn
index 43df7fd9eaa1f..d7180b52f0f70 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/AsmParser/BUILD.gn
@@ -17,8 +17,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/WebAssembly/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "WebAssemblyAsmParser.cpp",
-  ]
+  sources = [ "WebAssemblyAsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Disassembler/BUILD.gn
index 40babbb99be08..8a86b67f8ae71 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/Disassembler/BUILD.gn
@@ -17,8 +17,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/WebAssembly/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "WebAssemblyDisassembler.cpp",
-  ]
+  sources = [ "WebAssemblyDisassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/TargetInfo/BUILD.gn
index e2f14c0997854..67a207a6c26f7 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/WebAssembly/TargetInfo/BUILD.gn
@@ -15,8 +15,5 @@ static_library("TargetInfo") {
   public_deps = [ ":WebAssemblyGenInstrInfo" ]
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "WebAssemblyTargetInfo.cpp",
-  ]
+  sources = [ "WebAssemblyTargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/AsmParser/BUILD.gn
index 7a33e0cf1ea2a..89312a42d8816 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/AsmParser/BUILD.gn
@@ -17,8 +17,5 @@ static_library("AsmParser") {
     "//llvm/lib/Target/X86/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "X86AsmParser.cpp",
-  ]
+  sources = [ "X86AsmParser.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/Disassembler/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/Disassembler/BUILD.gn
index 27d2532c0f04c..c311484ab5263 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/Disassembler/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/Disassembler/BUILD.gn
@@ -16,8 +16,5 @@ static_library("Disassembler") {
     "//llvm/lib/Target/X86/TargetInfo",
   ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "X86Disassembler.cpp",
-  ]
+  sources = [ "X86Disassembler.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Target/X86/TargetInfo/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Target/X86/TargetInfo/BUILD.gn
index 5ab7a55f9a2cd..6a502bc0ca1fb 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Target/X86/TargetInfo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Target/X86/TargetInfo/BUILD.gn
@@ -2,8 +2,5 @@ static_library("TargetInfo") {
   output_name = "LLVMX86Info"
   deps = [ "//llvm/lib/Support" ]
   include_dirs = [ ".." ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "X86TargetInfo.cpp",
-  ]
+  sources = [ "X86TargetInfo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/ToolDrivers/llvm-dlltool/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ToolDrivers/llvm-dlltool/BUILD.gn
index 0067352f7afff..601aec8d0b46e 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ToolDrivers/llvm-dlltool/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ToolDrivers/llvm-dlltool/BUILD.gn
@@ -13,8 +13,5 @@ static_library("DlltoolDriver") {
     "//llvm/lib/Option",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "DlltoolDriver.cpp",
-  ]
+  sources = [ "DlltoolDriver.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/ToolDrivers/llvm-lib/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/ToolDrivers/llvm-lib/BUILD.gn
index 7f88948e4829c..e91588e487b7f 100644
--- a/llvm/utils/gn/secondary/llvm/lib/ToolDrivers/llvm-lib/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/ToolDrivers/llvm-lib/BUILD.gn
@@ -15,8 +15,5 @@ static_library("LibDriver") {
     "//llvm/lib/Option",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LibDriver.cpp",
-  ]
+  sources = [ "LibDriver.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/CFGuard/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/CFGuard/BUILD.gn
index 173c55669fef3..22d5b8d653aa3 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/CFGuard/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/CFGuard/BUILD.gn
@@ -4,8 +4,5 @@ static_library("CFGuard") {
     "//llvm/lib/IR",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "CFGuard.cpp",
-  ]
+  sources = [ "CFGuard.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/lib/Transforms/Hello/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Transforms/Hello/BUILD.gn
index 06c5c85e4ae1e..5ed1019c0933f 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Transforms/Hello/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Transforms/Hello/BUILD.gn
@@ -7,10 +7,7 @@ loadable_module("Hello") {
     # needs its headers.
     "//llvm/include/llvm/IR:public_tablegen",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "Hello.cpp",
-  ]
+  sources = [ "Hello.cpp" ]
 
   if (host_os != "mac" && host_os != "win") {
     # The GN build currently doesn't globally pass -fPIC, but that's
diff --git a/llvm/utils/gn/secondary/llvm/lib/WindowsManifest/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/WindowsManifest/BUILD.gn
index 00f8fbd9d829e..3498a0ea539bb 100644
--- a/llvm/utils/gn/secondary/llvm/lib/WindowsManifest/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/WindowsManifest/BUILD.gn
@@ -5,8 +5,5 @@ static_library("WindowsManifest") {
     "//llvm/lib/Support",
     "//llvm/utils/gn/build/libs/xml",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "WindowsManifestMerger.cpp",
-  ]
+  sources = [ "WindowsManifestMerger.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/bugpoint-passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/bugpoint-passes/BUILD.gn
index 49c2fc81c8f5a..f2ead028677a5 100644
--- a/llvm/utils/gn/secondary/llvm/tools/bugpoint-passes/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/bugpoint-passes/BUILD.gn
@@ -7,10 +7,7 @@ loadable_module("bugpoint-passes") {
     # needs its headers.
     "//llvm/include/llvm/IR:public_tablegen",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "TestPasses.cpp",
-  ]
+  sources = [ "TestPasses.cpp" ]
 
   if (host_os != "mac" && host_os != "win") {
     # The GN build currently doesn't globally pass -fPIC, but that's
diff --git a/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn
index f8ef25ef51da3..0ec5215434b94 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llc/BUILD.gn
@@ -15,10 +15,7 @@ executable("llc") {
     "//llvm/lib/Transforms/Utils",
     "//llvm/lib/Transforms/Vectorize",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llc.cpp",
-  ]
+  sources = [ "llc.cpp" ]
 
   # Support plugins.
   # FIXME: Disable dead stripping once other binaries are dead-stripped.
diff --git a/llvm/utils/gn/secondary/llvm/tools/lli/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/lli/BUILD.gn
index a9af1cf3ca8ae..e356479d2d554 100644
--- a/llvm/utils/gn/secondary/llvm/tools/lli/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/lli/BUILD.gn
@@ -19,10 +19,7 @@ executable("lli") {
     "//llvm/lib/Transforms/Instrumentation",
     "//llvm/lib/Transforms/Utils",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "lli.cpp",
-  ]
+  sources = [ "lli.cpp" ]
   if (host_os != "mac" && host_os != "win") {
     # Corresponds to export_executable_symbols() in cmake.
     ldflags = [ "-rdynamic" ]
diff --git a/llvm/utils/gn/secondary/llvm/tools/lli/ChildTarget/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/lli/ChildTarget/BUILD.gn
index ccea542bd0d83..03bcb5f9838be 100644
--- a/llvm/utils/gn/secondary/llvm/tools/lli/ChildTarget/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/lli/ChildTarget/BUILD.gn
@@ -4,8 +4,5 @@ executable("lli-child-target") {
     "//llvm/lib/ExecutionEngine/RuntimeDyld",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ChildTarget.cpp",
-  ]
+  sources = [ "ChildTarget.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-ar/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-ar/BUILD.gn
index b3d997d5f7271..db8c2f09a138a 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-ar/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-ar/BUILD.gn
@@ -38,8 +38,5 @@ executable("llvm-ar") {
     "//llvm/lib/ToolDrivers/llvm-dlltool:DlltoolDriver",
     "//llvm/lib/ToolDrivers/llvm-lib:LibDriver",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-ar.cpp",
-  ]
+  sources = [ "llvm-ar.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-as/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-as/BUILD.gn
index 3d2f20b3e1147..7433a151f0cf6 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-as/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-as/BUILD.gn
@@ -5,8 +5,5 @@ executable("llvm-as") {
     "//llvm/lib/IR",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-as.cpp",
-  ]
+  sources = [ "llvm-as.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-bcanalyzer/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-bcanalyzer/BUILD.gn
index c46350487b637..ab538ef257f8d 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-bcanalyzer/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-bcanalyzer/BUILD.gn
@@ -3,8 +3,5 @@ executable("llvm-bcanalyzer") {
     "//llvm/lib/Bitcode/Reader",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-bcanalyzer.cpp",
-  ]
+  sources = [ "llvm-bcanalyzer.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-cat/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-cat/BUILD.gn
index 777eab91c0b45..76cc837ca41fc 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-cat/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-cat/BUILD.gn
@@ -6,8 +6,5 @@ executable("llvm-cat") {
     "//llvm/lib/IRReader",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-cat.cpp",
-  ]
+  sources = [ "llvm-cat.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-cfi-verify/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-cfi-verify/BUILD.gn
index 69d13f1688b97..8e0f2b393c5d8 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-cfi-verify/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-cfi-verify/BUILD.gn
@@ -11,8 +11,5 @@ executable("llvm-cfi-verify") {
     "//llvm/lib/Target:AllTargetsDisassemblers",
     "//llvm/lib/Target:AllTargetsInfos",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-cfi-verify.cpp",
-  ]
+  sources = [ "llvm-cfi-verify.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn
index 70c06909c8eb2..a9a216d34e480 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-config/BUILD.gn
@@ -359,8 +359,5 @@ executable("llvm-config") {
 
   # To pick up the generated inc files.
   include_dirs = [ "$target_gen_dir" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-config.cpp",
-  ]
+  sources = [ "llvm-config.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-cvtres/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-cvtres/BUILD.gn
index a621bd8b2d04f..a4c9afc89ca19 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-cvtres/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-cvtres/BUILD.gn
@@ -12,8 +12,5 @@ executable("llvm-cvtres") {
     "//llvm/lib/Option",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-cvtres.cpp",
-  ]
+  sources = [ "llvm-cvtres.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-cxxfilt/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-cxxfilt/BUILD.gn
index 92ee96d212251..1f419ac99d260 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-cxxfilt/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-cxxfilt/BUILD.gn
@@ -22,8 +22,5 @@ executable("llvm-cxxfilt") {
     "//llvm/lib/Demangle",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-cxxfilt.cpp",
-  ]
+  sources = [ "llvm-cxxfilt.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-cxxmap/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-cxxmap/BUILD.gn
index 577ade68bae1c..adcb8171168c1 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-cxxmap/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-cxxmap/BUILD.gn
@@ -4,8 +4,5 @@ executable("llvm-cxxmap") {
     "//llvm/lib/Support",
     "//llvm/lib/Target:TargetsToBuild",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-cxxmap.cpp",
-  ]
+  sources = [ "llvm-cxxmap.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-dis/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-dis/BUILD.gn
index 2a047e08f4895..691ec912c76b8 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-dis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-dis/BUILD.gn
@@ -4,8 +4,5 @@ executable("llvm-dis") {
     "//llvm/lib/IR",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-dis.cpp",
-  ]
+  sources = [ "llvm-dis.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/BUILD.gn
index 8b8a20d51c6bf..b027a6b9f517d 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/BUILD.gn
@@ -9,8 +9,5 @@ executable("llvm-exegesis") {
     "//llvm/lib/Support",
     "//llvm/lib/Target:NativeTarget",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-exegesis.cpp",
-  ]
+  sources = [ "llvm-exegesis.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/AArch64/BUILD.gn
index a9b44c129644c..58eea54e5ca0e 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/AArch64/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/AArch64/BUILD.gn
@@ -15,9 +15,6 @@ static_library("AArch64") {
     "//llvm/lib/Target/AArch64/MCTargetDesc",
     "//llvm/lib/Target/AArch64/Utils",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "Target.cpp",
-  ]
+  sources = [ "Target.cpp" ]
   include_dirs = [ "//llvm/lib/Target/AArch64" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/Mips/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/Mips/BUILD.gn
index afd32355c9d2e..1c4d739492036 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/Mips/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/Mips/BUILD.gn
@@ -14,9 +14,6 @@ static_library("Mips") {
     # depend on these Target/Mips-internal build targets.
     "//llvm/lib/Target/Mips/MCTargetDesc",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "Target.cpp",
-  ]
+  sources = [ "Target.cpp" ]
   include_dirs = [ "//llvm/lib/Target/Mips" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/PowerPC/BUILD.gn
index f2d17f07cdc95..dbd0e20fae6c0 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/PowerPC/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-exegesis/lib/PowerPC/BUILD.gn
@@ -14,9 +14,6 @@ static_library("PowerPC") {
     # depend on these Target/PowerPC-internal build targets.
     "//llvm/lib/Target/PowerPC/MCTargetDesc",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "Target.cpp",
-  ]
+  sources = [ "Target.cpp" ]
   include_dirs = [ "//llvm/lib/Target/PowerPC" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-extract/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-extract/BUILD.gn
index 7a45704680b6f..b486cf0780dfe 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-extract/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-extract/BUILD.gn
@@ -6,8 +6,5 @@ executable("llvm-extract") {
     "//llvm/lib/Support",
     "//llvm/lib/Transforms/IPO",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-extract.cpp",
-  ]
+  sources = [ "llvm-extract.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-gsymutil/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-gsymutil/BUILD.gn
index 4a35aa0d39a36..f585d59ba566d 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-gsymutil/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-gsymutil/BUILD.gn
@@ -7,8 +7,5 @@ executable("llvm-gsymutil") {
     "//llvm/lib/Target",
     "//llvm/lib/Target:TargetsToBuild",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-gsymutil.cpp",
-  ]
+  sources = [ "llvm-gsymutil.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-ifs/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-ifs/BUILD.gn
index 9edd30bda0310..66d21241e37e0 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-ifs/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-ifs/BUILD.gn
@@ -5,8 +5,5 @@ executable("llvm-ifs") {
     "//llvm/lib/Support",
     "//llvm/lib/TextAPI",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-ifs.cpp",
-  ]
+  sources = [ "llvm-ifs.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-isel-fuzzer/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-isel-fuzzer/BUILD.gn
index 9839b1e7bd04c..70aa511666dbd 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-isel-fuzzer/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-isel-fuzzer/BUILD.gn
@@ -18,8 +18,5 @@ fuzzer("llvm-isel-fuzzer") {
     "//llvm/lib/Transforms/Scalar",
   ]
   dummy_main = "DummyISelFuzzer.cpp"
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-isel-fuzzer.cpp",
-  ]
+  sources = [ "llvm-isel-fuzzer.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-link/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-link/BUILD.gn
index f3f65cc4572cc..ac0d3239e2e12 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-link/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-link/BUILD.gn
@@ -10,8 +10,5 @@ executable("llvm-link") {
     "//llvm/lib/Transforms/IPO",
     "//llvm/lib/Transforms/Utils",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-link.cpp",
-  ]
+  sources = [ "llvm-link.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-lipo/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-lipo/BUILD.gn
index e431c00a3cecd..3132d2c6d9121 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-lipo/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-lipo/BUILD.gn
@@ -13,8 +13,5 @@ executable("llvm-lipo") {
     "//llvm/lib/Support",
     "//llvm/lib/Target:TargetsToBuild",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-lipo.cpp",
-  ]
+  sources = [ "llvm-lipo.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-lto/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-lto/BUILD.gn
index 32ce7a51cd019..ebf42a2adbc2a 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-lto/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-lto/BUILD.gn
@@ -11,8 +11,5 @@ executable("llvm-lto") {
     "//llvm/lib/Target",
     "//llvm/lib/Target:TargetsToBuild",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-lto.cpp",
-  ]
+  sources = [ "llvm-lto.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-lto2/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-lto2/BUILD.gn
index 04f215b7d9044..0888fb7658b45 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-lto2/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-lto2/BUILD.gn
@@ -10,8 +10,5 @@ executable("llvm-lto2") {
     "//llvm/lib/Target",
     "//llvm/lib/Target:TargetsToBuild",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-lto2.cpp",
-  ]
+  sources = [ "llvm-lto2.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-modextract/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-modextract/BUILD.gn
index be78c93ee64a1..8c6df4f6c98e0 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-modextract/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-modextract/BUILD.gn
@@ -6,8 +6,5 @@ executable("llvm-modextract") {
     "//llvm/lib/IRReader",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-modextract.cpp",
-  ]
+  sources = [ "llvm-modextract.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-mt/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-mt/BUILD.gn
index cb3e36893071f..221637036cecd 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-mt/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-mt/BUILD.gn
@@ -12,8 +12,5 @@ executable("llvm-mt") {
     "//llvm/lib/Support",
     "//llvm/lib/WindowsManifest",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-mt.cpp",
-  ]
+  sources = [ "llvm-mt.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-nm/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-nm/BUILD.gn
index 8b6109c87c062..85aa339c8dda2 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-nm/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-nm/BUILD.gn
@@ -28,8 +28,5 @@ executable("llvm-nm") {
     "//llvm/lib/Target:AllTargetsDescs",
     "//llvm/lib/Target:AllTargetsInfos",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-nm.cpp",
-  ]
+  sources = [ "llvm-nm.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-opt-fuzzer/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-opt-fuzzer/BUILD.gn
index 9632777de006c..0e3e53e2b0d5a 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-opt-fuzzer/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-opt-fuzzer/BUILD.gn
@@ -25,8 +25,5 @@ fuzzer("llvm-opt-fuzzer") {
     "//llvm/lib/Transforms/Vectorize",
   ]
   dummy_main = "DummyOptFuzzer.cpp"
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-opt-fuzzer.cpp",
-  ]
+  sources = [ "llvm-opt-fuzzer.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-opt-report/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-opt-report/BUILD.gn
index d5465bde14257..1348575c056eb 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-opt-report/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-opt-report/BUILD.gn
@@ -6,8 +6,5 @@ executable("llvm-opt-report") {
     "//llvm/lib/Remarks",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "OptReport.cpp",
-  ]
+  sources = [ "OptReport.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-profdata/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-profdata/BUILD.gn
index 4284d8820185e..df13636e433cf 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-profdata/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-profdata/BUILD.gn
@@ -4,8 +4,5 @@ executable("llvm-profdata") {
     "//llvm/lib/ProfileData",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-profdata.cpp",
-  ]
+  sources = [ "llvm-profdata.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-rtdyld/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-rtdyld/BUILD.gn
index 8834068667569..7e51367d271fc 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-rtdyld/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-rtdyld/BUILD.gn
@@ -8,8 +8,5 @@ executable("llvm-rtdyld") {
     "//llvm/lib/Support",
     "//llvm/lib/Target:TargetsToBuild",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-rtdyld.cpp",
-  ]
+  sources = [ "llvm-rtdyld.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-size/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-size/BUILD.gn
index df444df3f50ec..c3e2f30eede13 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-size/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-size/BUILD.gn
@@ -22,8 +22,5 @@ executable("llvm-size") {
     "//llvm/lib/Object",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-size.cpp",
-  ]
+  sources = [ "llvm-size.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-split/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-split/BUILD.gn
index 9d5751241bbf3..f37e75aad0a57 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-split/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-split/BUILD.gn
@@ -6,8 +6,5 @@ executable("llvm-split") {
     "//llvm/lib/Support",
     "//llvm/lib/Transforms/Utils",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-split.cpp",
-  ]
+  sources = [ "llvm-split.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-strings/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-strings/BUILD.gn
index 26caa2bea555d..c88c72990bca6 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-strings/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-strings/BUILD.gn
@@ -23,8 +23,5 @@ executable("llvm-strings") {
     "//llvm/lib/Object",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-strings.cpp",
-  ]
+  sources = [ "llvm-strings.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-symbolizer/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-symbolizer/BUILD.gn
index 1e27efbf44ae6..ad7280b9e1fdb 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-symbolizer/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-symbolizer/BUILD.gn
@@ -30,8 +30,5 @@ executable("llvm-symbolizer") {
     "//llvm/lib/Object",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-symbolizer.cpp",
-  ]
+  sources = [ "llvm-symbolizer.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-undname/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-undname/BUILD.gn
index f8ca57952b1d5..97cc65e2e1159 100644
--- a/llvm/utils/gn/secondary/llvm/tools/llvm-undname/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-undname/BUILD.gn
@@ -3,8 +3,5 @@ executable("llvm-undname") {
     "//llvm/lib/Demangle",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "llvm-undname.cpp",
-  ]
+  sources = [ "llvm-undname.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn
index e802392a717d1..82e09a98beca8 100644
--- a/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/sancov/BUILD.gn
@@ -11,8 +11,5 @@ executable("sancov") {
     "//llvm/lib/Target:AllTargetsDisassemblers",
     "//llvm/lib/Target:AllTargetsInfos",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "sancov.cpp",
-  ]
+  sources = [ "sancov.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/sanstats/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/sanstats/BUILD.gn
index 2a0f43fb20a6f..a43b9a275e24a 100644
--- a/llvm/utils/gn/secondary/llvm/tools/sanstats/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/sanstats/BUILD.gn
@@ -3,8 +3,5 @@ executable("sanstats") {
     "//llvm/lib/DebugInfo/Symbolize",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "sanstats.cpp",
-  ]
+  sources = [ "sanstats.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/verify-uselistorder/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/verify-uselistorder/BUILD.gn
index b0c12497e8839..92a1f3cd2f521 100644
--- a/llvm/utils/gn/secondary/llvm/tools/verify-uselistorder/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/verify-uselistorder/BUILD.gn
@@ -7,8 +7,5 @@ executable("verify-uselistorder") {
     "//llvm/lib/IRReader",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "verify-uselistorder.cpp",
-  ]
+  sources = [ "verify-uselistorder.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/tools/yaml2obj/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/yaml2obj/BUILD.gn
index e0b5526b81f4b..7e4ba3be0d479 100644
--- a/llvm/utils/gn/secondary/llvm/tools/yaml2obj/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/tools/yaml2obj/BUILD.gn
@@ -3,8 +3,5 @@ executable("yaml2obj") {
     "//llvm/lib/ObjectYAML",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "yaml2obj.cpp",
-  ]
+  sources = [ "yaml2obj.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/AsmParser/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/AsmParser/BUILD.gn
index c1fb9328bb12d..ca576b1ffe322 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/AsmParser/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/AsmParser/BUILD.gn
@@ -6,8 +6,5 @@ unittest("AsmParserTests") {
     "//llvm/lib/IR",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AsmParserTest.cpp",
-  ]
+  sources = [ "AsmParserTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/GSYM/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/GSYM/BUILD.gn
index e23c08a2804c0..27507406150f6 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/GSYM/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/DebugInfo/GSYM/BUILD.gn
@@ -11,8 +11,5 @@ unittest("DebugInfoGSYMTests") {
     "//llvm/lib/Target:TargetsToBuild",
     "//llvm/lib/Testing/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "GSYMTest.cpp",
-  ]
+  sources = [ "GSYMTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/BUILD.gn
index eeeea7bba0808..095ca3e6b6d05 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/BUILD.gn
@@ -10,8 +10,5 @@ unittest("ExecutionEngineTests") {
     "//llvm/lib/MC",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "ExecutionEngineTest.cpp",
-  ]
+  sources = [ "ExecutionEngineTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn
index 23973a5998543..4844f3645d442 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/ExecutionEngine/JITLink/BUILD.gn
@@ -9,8 +9,5 @@ unittest("JITLinkTests") {
     "//llvm/lib/Target:TargetsToBuild",
     "//llvm/lib/Testing/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LinkGraphTests.cpp",
-  ]
+  sources = [ "LinkGraphTests.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn
index eeda1b7fab94f..0e69e502ca845 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Frontend/BUILD.gn
@@ -11,8 +11,6 @@ unittest("LLVMFrontendTests") {
   ]
   sources = [
     "OpenMPContextTest.cpp",
-
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
     "OpenMPIRBuilderTest.cpp",
   ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/LineEditor/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/LineEditor/BUILD.gn
index 843f54dfca071..f2daa813cb20c 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/LineEditor/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/LineEditor/BUILD.gn
@@ -5,8 +5,5 @@ unittest("LineEditorTests") {
     "//llvm/lib/LineEditor",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LineEditor.cpp",
-  ]
+  sources = [ "LineEditor.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Linker/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Linker/BUILD.gn
index 1b81e98b381a4..ab88ae9b495e5 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Linker/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Linker/BUILD.gn
@@ -6,8 +6,5 @@ unittest("LinkerTests") {
     "//llvm/lib/IR",
     "//llvm/lib/Linker",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LinkModulesTest.cpp",
-  ]
+  sources = [ "LinkModulesTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/MC/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/MC/AMDGPU/BUILD.gn
index ccfe716e8ed78..0ddca65697952 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/MC/AMDGPU/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/MC/AMDGPU/BUILD.gn
@@ -8,8 +8,5 @@ unittest("AMDGPUDwarfTests") {
     "//llvm/lib/Target/AMDGPU/MCTargetDesc",
     "//llvm/lib/Target/AMDGPU/TargetInfo",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "DwarfRegMappings.cpp",
-  ]
+  sources = [ "DwarfRegMappings.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/MI/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/MI/BUILD.gn
index 1efe9f62c211e..6ac92ec49d225 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/MI/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/MI/BUILD.gn
@@ -10,9 +10,6 @@ unittest("MITests") {
     "//llvm/lib/Target",
     "//llvm/lib/Target:TargetsToBuild",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "LiveIntervalTest.cpp",
-  ]
+  sources = [ "LiveIntervalTest.cpp" ]
   has_custom_main = true
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Option/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Option/BUILD.gn
index 12b253204fdfc..48c90bf634758 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Option/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Option/BUILD.gn
@@ -12,8 +12,5 @@ unittest("OptionTests") {
     "//llvm/lib/Option",
     "//llvm/lib/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "OptionParsingTest.cpp",
-  ]
+  sources = [ "OptionParsingTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Passes/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Passes/BUILD.gn
index 93e1cfae15099..fd308a98671dc 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Passes/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Passes/BUILD.gn
@@ -7,10 +7,7 @@ if (host_os != "win") {
     # Put plugin next to the unit test executable.
     output_dir = target_out_dir
 
-    sources = [
-      # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-      "TestPlugin.cpp",
-    ]
+    sources = [ "TestPlugin.cpp" ]
 
     deps = [
       # TestPlugin doesn't want to link in any LLVM code, it just needs its
@@ -35,10 +32,7 @@ unittest("PluginsTests") {
     "//llvm/lib/Support",
     "//llvm/lib/Testing/Support",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "PluginsTest.cpp",
-  ]
+  sources = [ "PluginsTest.cpp" ]
 
   # If plugins are disabled, this test will disable itself at runtime.
   # Otherwise, reconfiguring with plugins disabled will leave behind a stale
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Support/DynamicLibrary/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Support/DynamicLibrary/BUILD.gn
index dd5ec5522802b..28c0b4b6d3d14 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Support/DynamicLibrary/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Support/DynamicLibrary/BUILD.gn
@@ -11,10 +11,7 @@ template("dynlib_add_module") {
     # This assumes that unittest() puts tests in target_out_dir.
     output_dir = target_out_dir
 
-    sources = [
-      # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-      "PipSqueak.cpp",
-    ]
+    sources = [ "PipSqueak.cpp" ]
 
     if (host_os != "mac" && host_os != "win") {
       # The GN build currently doesn't globally pass -fPIC, but that's
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Target/AMDGPU/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Target/AMDGPU/BUILD.gn
index 238ff86b771c3..592a10a5d1c50 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Target/AMDGPU/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Target/AMDGPU/BUILD.gn
@@ -10,8 +10,5 @@ unittest("AMDGPUTests") {
     "//llvm/lib/Target/AMDGPU/Utils",
   ]
   include_dirs = [ "//llvm/lib/Target/AMDGPU" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "DwarfRegMappings.cpp",
-  ]
+  sources = [ "DwarfRegMappings.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Target/ARM/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Target/ARM/BUILD.gn
index 61f49823bb687..fb57c3cce2d1a 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Target/ARM/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Target/ARM/BUILD.gn
@@ -11,8 +11,5 @@ unittest("ARMTests") {
     "//llvm/lib/Target/ARM/Utils",
   ]
   include_dirs = [ "//llvm/lib/Target/ARM" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "MachineInstrTest.cpp",
-  ]
+  sources = [ "MachineInstrTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Target/PowerPC/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Target/PowerPC/BUILD.gn
index f2e3e9754d60b..85a15fb588147 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Target/PowerPC/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Target/PowerPC/BUILD.gn
@@ -8,8 +8,5 @@ unittest("PowerPCTests") {
     "//llvm/lib/Target/PowerPC/MCTargetDesc",
     "//llvm/lib/Target/PowerPC/TargetInfo",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AIXRelocModelTest.cpp",
-  ]
+  sources = [ "AIXRelocModelTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Target/WebAssembly/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Target/WebAssembly/BUILD.gn
index 42d08882f78d0..e5a3dda94ed46 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Target/WebAssembly/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Target/WebAssembly/BUILD.gn
@@ -12,8 +12,5 @@ unittest("WebAssemblyTests") {
     "//llvm/lib/Target/WebAssembly/TargetInfo",
   ]
   include_dirs = [ "//llvm/lib/Target/WebAssembly" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "WebAssemblyExceptionInfoTest.cpp",
-  ]
+  sources = [ "WebAssemblyExceptionInfoTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/Target/X86/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/Target/X86/BUILD.gn
index 1f7d412521d65..b181780a971f8 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/Target/X86/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/Target/X86/BUILD.gn
@@ -14,8 +14,5 @@ unittest("X86Tests") {
     "//llvm/lib/Target/X86/TargetInfo",
   ]
   include_dirs = [ "//llvm/lib/Target/X86" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "MachineSizeOptsTest.cpp",
-  ]
+  sources = [ "MachineSizeOptsTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/AArch64/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/AArch64/BUILD.gn
index 25550ac4512cb..d7a5b09804932 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/AArch64/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/AArch64/BUILD.gn
@@ -19,8 +19,5 @@ unittest("LLVMExegesisAArch64Tests") {
     "//llvm/lib/Target/AArch64",
     "//llvm/tools/llvm-exegesis/lib",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "TargetTest.cpp",
-  ]
+  sources = [ "TargetTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/ARM/BUILD.gn b/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/ARM/BUILD.gn
index e7e6bdd15d164..141b30b174c02 100644
--- a/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/ARM/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/unittests/tools/llvm-exegesis/ARM/BUILD.gn
@@ -19,8 +19,5 @@ unittest("LLVMExegesisARMTests") {
     "//llvm/lib/Target/ARM",
     "//llvm/tools/llvm-exegesis/lib",
   ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "AssemblerTest.cpp",
-  ]
+  sources = [ "AssemblerTest.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/utils/FileCheck/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/FileCheck/BUILD.gn
index 164ed5dd5c913..08ba9b528e116 100644
--- a/llvm/utils/gn/secondary/llvm/utils/FileCheck/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/utils/FileCheck/BUILD.gn
@@ -1,7 +1,4 @@
 executable("FileCheck") {
   deps = [ "//llvm/lib/Support" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "FileCheck.cpp",
-  ]
+  sources = [ "FileCheck.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/utils/count/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/count/BUILD.gn
index c0844df3def98..347e8b473da7e 100644
--- a/llvm/utils/gn/secondary/llvm/utils/count/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/utils/count/BUILD.gn
@@ -1,6 +1,3 @@
 executable("count") {
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "count.c",
-  ]
+  sources = [ "count.c" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/utils/not/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/not/BUILD.gn
index 73f18622c2277..59d32013ab74b 100644
--- a/llvm/utils/gn/secondary/llvm/utils/not/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/utils/not/BUILD.gn
@@ -1,7 +1,4 @@
 executable("not") {
   deps = [ "//llvm/lib/Support" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "not.cpp",
-  ]
+  sources = [ "not.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/llvm/utils/unittest/UnitTestMain/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/unittest/UnitTestMain/BUILD.gn
index 575217cae1f06..66d0dd338d244 100644
--- a/llvm/utils/gn/secondary/llvm/utils/unittest/UnitTestMain/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/utils/unittest/UnitTestMain/BUILD.gn
@@ -4,10 +4,7 @@ source_set("UnitTestMain") {
   # Make targets depending on this also depend on gtest, to get the gtest
   # include_dir.
   public_deps = [ "..:gtest" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "TestMain.cpp",
-  ]
+  sources = [ "TestMain.cpp" ]
   testonly = true
 }
 
diff --git a/llvm/utils/gn/secondary/llvm/utils/yaml-bench/BUILD.gn b/llvm/utils/gn/secondary/llvm/utils/yaml-bench/BUILD.gn
index 3cbc548d2fba7..92c751e4925e0 100644
--- a/llvm/utils/gn/secondary/llvm/utils/yaml-bench/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/utils/yaml-bench/BUILD.gn
@@ -1,7 +1,4 @@
 executable("yaml-bench") {
   deps = [ "//llvm/lib/Support" ]
-  sources = [
-    # Make `gn format` not collapse this, for sync_source_lists_from_cmake.py.
-    "YAMLBench.cpp",
-  ]
+  sources = [ "YAMLBench.cpp" ]
 }

From ec62e37c86fa67a40bc9e04b9112668deb003b9a Mon Sep 17 00:00:00 2001
From: Pierre Oechsel <pierre.oechsel@gmail.com>
Date: Fri, 17 Jul 2020 12:02:11 -0400
Subject: [PATCH 646/771] [mlir] [vector] Add an optional filter to vector
 contract lowering patterns.

Summary: Vector contract patterns were only parameterized by a `vectorTransformsOptions`. As a result, even if an mlir file was containing several occurrences of `vector.contract`, all of them would be lowered in the same way. More granularity might be required . This Diff adds a `constraint` argument to each of these patterns which allows the user to specify with more precision on which `vector.contract` should each of the lowering apply.

Differential Revision: https://reviews.llvm.org/D83960
---
 .../mlir/Dialect/Vector/VectorTransforms.h    | 35 +++++++++++++++----
 mlir/lib/Dialect/Vector/VectorTransforms.cpp  | 10 ++++++
 .../Vector/vector-contract-transforms.mlir    | 31 ++++++++++++++++
 .../lib/Transforms/TestVectorTransforms.cpp   | 21 +++++++++++
 4 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h
index e95329c3e505d..0d18c5aa782d1 100644
--- a/mlir/include/mlir/Dialect/Vector/VectorTransforms.h
+++ b/mlir/include/mlir/Dialect/Vector/VectorTransforms.h
@@ -127,12 +127,18 @@ class ContractionOpToMatmulOpLowering
     : public OpRewritePattern<vector::ContractionOp> {
 public:
   using OpRewritePattern<vector::ContractionOp>::OpRewritePattern;
+  using FilterConstraintType =
+      std::function<LogicalResult(vector::ContractionOp op)>;
+
+  static LogicalResult defaultFilter(vector::ContractionOp op) {
+    return success();
+  }
 
   ContractionOpToMatmulOpLowering(
       vector::VectorTransformsOptions vectorTransformsOptions,
-      MLIRContext *context)
+      MLIRContext *context, FilterConstraintType constraint = defaultFilter)
       : OpRewritePattern<vector::ContractionOp>(context),
-        vectorTransformsOptions(vectorTransformsOptions) {}
+        vectorTransformsOptions(vectorTransformsOptions), filter(constraint) {}
 
   LogicalResult match(vector::ContractionOp op) const override;
   void rewrite(vector::ContractionOp op,
@@ -141,6 +147,7 @@ class ContractionOpToMatmulOpLowering
 private:
   /// Options to control the vector patterns.
   vector::VectorTransformsOptions vectorTransformsOptions;
+  FilterConstraintType filter;
 };
 
 /// Progressive lowering of a `vector.contract %a, %b, %c` with row-major matmul
@@ -162,11 +169,18 @@ class ContractionOpToOuterProductOpLowering
     : public OpRewritePattern<vector::ContractionOp> {
 public:
   using OpRewritePattern<vector::ContractionOp>::OpRewritePattern;
+  using FilterConstraintType =
+      std::function<LogicalResult(vector::ContractionOp op)>;
+
+  static LogicalResult defaultFilter(vector::ContractionOp op) {
+    return success();
+  }
+
   ContractionOpToOuterProductOpLowering(
       vector::VectorTransformsOptions vectorTransformsOptions,
-      MLIRContext *context)
+      MLIRContext *context, FilterConstraintType constraint = defaultFilter)
       : OpRewritePattern<vector::ContractionOp>(context),
-        vectorTransformsOptions(vectorTransformsOptions) {}
+        vectorTransformsOptions(vectorTransformsOptions), filter(constraint) {}
 
   LogicalResult match(vector::ContractionOp op) const override;
   void rewrite(vector::ContractionOp op,
@@ -175,6 +189,7 @@ class ContractionOpToOuterProductOpLowering
 private:
   /// Options to control the vector patterns.
   vector::VectorTransformsOptions vectorTransformsOptions;
+  FilterConstraintType filter;
 };
 
 /// Progressive lowering of ContractionOp.
@@ -194,11 +209,18 @@ class ContractionOpToOuterProductOpLowering
 class ContractionOpLowering : public OpRewritePattern<vector::ContractionOp> {
 public:
   using OpRewritePattern<vector::ContractionOp>::OpRewritePattern;
+  using FilterConstraintType =
+      std::function<LogicalResult(vector::ContractionOp op)>;
+
+  static LogicalResult defaultFilter(vector::ContractionOp op) {
+    return success();
+  }
 
   ContractionOpLowering(vector::VectorTransformsOptions vectorTransformsOptions,
-                        MLIRContext *context)
+                        MLIRContext *context,
+                        FilterConstraintType constraint = defaultFilter)
       : OpRewritePattern<vector::ContractionOp>(context),
-        vectorTransformsOptions(vectorTransformsOptions) {}
+        vectorTransformsOptions(vectorTransformsOptions), filter(constraint) {}
 
   LogicalResult matchAndRewrite(vector::ContractionOp op,
                                 PatternRewriter &rewriter) const override;
@@ -206,6 +228,7 @@ class ContractionOpLowering : public OpRewritePattern<vector::ContractionOp> {
 private:
   /// Options to control the vector patterns.
   vector::VectorTransformsOptions vectorTransformsOptions;
+  FilterConstraintType filter;
   // Lower one parallel dimension.
   Value lowerParallel(vector::ContractionOp op, int64_t lhsIndex,
                       int64_t rhsIndex, PatternRewriter &rewriter) const;
diff --git a/mlir/lib/Dialect/Vector/VectorTransforms.cpp b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
index 2f77fd5ff60a4..a63862c1a4fef 100644
--- a/mlir/lib/Dialect/Vector/VectorTransforms.cpp
+++ b/mlir/lib/Dialect/Vector/VectorTransforms.cpp
@@ -1581,6 +1581,9 @@ ContractionOpToMatmulOpLowering::match(vector::ContractionOp op) const {
       vector::VectorContractLowering::Matmul)
     return failure();
 
+  if (failed(filter(op)))
+    return failure();
+
   auto iteratorTypes = op.iterator_types().getValue();
   if (!isParallelIterator(iteratorTypes[0]) ||
       !isParallelIterator(iteratorTypes[1]) ||
@@ -1647,6 +1650,9 @@ ContractionOpToOuterProductOpLowering::match(vector::ContractionOp op) const {
       vector::VectorContractLowering::OuterProduct)
     return failure();
 
+  if (failed(filter(op)))
+    return failure();
+
   // Determine if the parallel/reduction structure matches something
   // that can be expressed a reduction_size unrolled sequence.
   using MapList = ArrayRef<ArrayRef<AffineExpr>>;
@@ -1808,6 +1814,10 @@ ContractionOpLowering::matchAndRewrite(vector::ContractionOp op,
   // TODO: implement masks.
   if (llvm::size(op.masks()) != 0)
     return failure();
+
+  if (failed(filter(op)))
+    return failure();
+
   // TODO: support mixed mode contract lowering.
   if (op.getLhsType().getElementType() !=
           getElementTypeOrSelf(op.getAccType()) ||
diff --git a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
index 82faadf100e9a..6dae907b8bb0e 100644
--- a/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
+++ b/mlir/test/Dialect/Vector/vector-contract-transforms.mlir
@@ -1,6 +1,7 @@
 // RUN: mlir-opt %s -test-vector-contraction-conversion | FileCheck %s
 // RUN: mlir-opt %s -test-vector-contraction-conversion=vector-lower-matrix-intrinsics=1 | FileCheck %s --check-prefix=MATRIX
 // RUN: mlir-opt %s -test-vector-contraction-conversion=vector-outerproduct=1 | FileCheck %s --check-prefix=OUTERPRODUCT
+// RUN: mlir-opt %s -test-vector-contraction-conversion=vector-filter-outerproduct=1 | FileCheck %s --check-prefix=FILTEROUTERPRODUCT
 
 #dotp_accesses = [
   affine_map<(i) -> (i)>,
@@ -1029,3 +1030,33 @@ func @matmul_7(%arg0: vector<2x1xf32>, %arg1: vector<1x3xf32>, %arg2: vector<3x2
     : vector<2x1xf32>, vector<1x3xf32> into vector<3x2xf32>
   return %0 : vector<3x2xf32>
 }
+
+// FILTEROUTERPRODUCT-LABEL: func @matmul_4_filtered
+// FILTEROUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<4x4xf32>,
+// FILTEROUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x4xf32>,
+// FILTEROUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<4x4xf32>
+//      FILTEROUTERPRODUCT: %[[c0:.*]] = vector.contract {{{.*}}} %[[A]], %[[B]], %[[C]]
+func @matmul_4_filtered(%arg0: vector<4x4xf32>, %arg1: vector<4x4xf32>, %arg2: vector<4x4xf32>)
+-> vector<4x4xf32>
+{
+  %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2
+    : vector<4x4xf32>, vector<4x4xf32> into vector<4x4xf32>
+  return %0 : vector<4x4xf32>
+}
+
+// FILTEROUTERPRODUCT-LABEL: func @matmul_4_not_filtered
+// FILTEROUTERPRODUCT-SAME: %[[A:[a-zA-Z0-9]*]]: vector<3x4xf32>,
+// FILTEROUTERPRODUCT-SAME: %[[B:[a-zA-Z0-9]*]]: vector<4x4xf32>,
+// FILTEROUTERPRODUCT-SAME: %[[C:[a-zA-Z0-9]*]]: vector<3x4xf32>
+//      FILTEROUTERPRODUCT: %[[c0:.*]] = vector.contract {{{.*}}} %[[A]], %[[B]], %[[C]]
+func @matmul_4_not_filtered(%arg0: vector<3x4xf32>, %arg1: vector<4x4xf32>, %arg2: vector<3x4xf32>)
+-> vector<3x4xf32>
+{
+  %0 = vector.contract #matmat_trait_0 %arg0, %arg1, %arg2
+    : vector<3x4xf32>, vector<4x4xf32> into vector<3x4xf32>
+  return %0 : vector<3x4xf32>
+}
+
+
+
+
diff --git a/mlir/test/lib/Transforms/TestVectorTransforms.cpp b/mlir/test/lib/Transforms/TestVectorTransforms.cpp
index 7e28ebbd9b721..2dffd88ed7093 100644
--- a/mlir/test/lib/Transforms/TestVectorTransforms.cpp
+++ b/mlir/test/lib/Transforms/TestVectorTransforms.cpp
@@ -59,6 +59,11 @@ struct TestVectorContractionConversion
       *this, "vector-outerproduct",
       llvm::cl::desc("Lower vector.contract to vector.outerproduct"),
       llvm::cl::init(false)};
+  Option<bool> lowerToFilterOuterProduct{
+      *this, "vector-filter-outerproduct",
+      llvm::cl::desc("Lower vector.contract to vector.outerproduct but not for "
+                     "vectors of size 4."),
+      llvm::cl::init(false)};
 
   void runOnFunction() override {
     OwningRewritePatternList patterns;
@@ -73,6 +78,22 @@ struct TestVectorContractionConversion
       return;
     }
 
+    // Test on one pattern in isolation.
+    if (lowerToFilterOuterProduct) {
+      VectorContractLowering lowering = VectorContractLowering::OuterProduct;
+      VectorTransformsOptions options{lowering};
+      patterns.insert<ContractionOpToOuterProductOpLowering>(
+          options, &getContext(), [](vector::ContractionOp op) {
+            // Only lowers vector.contract where the lhs as a type vector<MxNx?>
+            // where M is not 4.
+            if (op.getRhsType().getShape()[0] == 4)
+              return failure();
+            return success();
+          });
+      applyPatternsAndFoldGreedily(getFunction(), patterns);
+      return;
+    }
+
     // Test on all contract lowering patterns.
     VectorContractLowering contractLowering = VectorContractLowering::Dot;
     if (lowerToFlatMatrix)

From 16458b5a6d624c07ffbf33ce958669b579bfc54e Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Fri, 17 Jul 2020 12:27:21 -0400
Subject: [PATCH 647/771] [gn build] (manually) merge 6c43ed608

---
 llvm/utils/gn/secondary/llvm/test/BUILD.gn                 | 1 +
 .../gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn   | 7 +++++++
 2 files changed, 8 insertions(+)
 create mode 100644 llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn

diff --git a/llvm/utils/gn/secondary/llvm/test/BUILD.gn b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
index bf61bf41a3ec0..550793839183f 100644
--- a/llvm/utils/gn/secondary/llvm/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/test/BUILD.gn
@@ -228,6 +228,7 @@ group("test") {
     "//llvm/tools/llvm-ifs",
     "//llvm/tools/llvm-isel-fuzzer",
     "//llvm/tools/llvm-jitlink",
+    "//llvm/tools/llvm-libtool-darwin",
     "//llvm/tools/llvm-link",
     "//llvm/tools/llvm-lipo",
     "//llvm/tools/llvm-lto",
diff --git a/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn b/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn
new file mode 100644
index 0000000000000..5a5bf75eab67b
--- /dev/null
+++ b/llvm/utils/gn/secondary/llvm/tools/llvm-libtool-darwin/BUILD.gn
@@ -0,0 +1,7 @@
+executable("llvm-libtool-darwin") {
+  deps = [
+    "//llvm/lib/Object",
+    "//llvm/lib/Support",
+  ]
+  sources = [ "llvm-libtool-darwin.cpp" ]
+}

From f7dce88915ad1629df2d19cd41e45a5e64f2664c Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Fri, 17 Jul 2020 18:25:56 +0200
Subject: [PATCH 648/771] [IR] Fix MSVC warning (NFC)

As requested by Andrew Kaylor, rewrite this code in a way that does
not warn on old MSVC versions.

Avoid the buggy constexpr warning by just not using constexpr and
removing the static_assert that depends on it.
---
 llvm/lib/IR/Attributes.cpp | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index d57597a55dc8a..8bf4e82357c69 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -969,7 +969,7 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const {
 
 /// Map from AttributeList index to the internal array index. Adding one happens
 /// to work, because -1 wraps around to 0.
-static constexpr unsigned attrIdxToArrayIdx(unsigned Index) {
+static unsigned attrIdxToArrayIdx(unsigned Index) {
   return Index + 1;
 }
 
@@ -982,9 +982,7 @@ AttributeListImpl::AttributeListImpl(ArrayRef<AttributeSet> Sets)
 
   // Initialize AvailableFunctionAttrs and AvailableSomewhereAttrs
   // summary bitsets.
-  static_assert(attrIdxToArrayIdx(AttributeList::FunctionIndex) == 0U,
-                "function should be stored in slot 0");
-  for (const auto &I : Sets[0])
+  for (const auto &I : Sets[attrIdxToArrayIdx(AttributeList::FunctionIndex)])
     if (!I.isStringAttribute())
       AvailableFunctionAttrs.addAttribute(I.getKindAsEnum());
 

From 9551ae22fca65d6e3bd2586472a5f4bce8cc25ac Mon Sep 17 00:00:00 2001
From: Nico Weber <thakis@chromium.org>
Date: Fri, 17 Jul 2020 12:30:25 -0400
Subject: [PATCH 649/771] [gn build] Run `git ls-files '*.gn' '*.gni' | xargs
 llvm/utils/gn/gn.py format`

---
 .../gn/secondary/clang-tools-extra/clangd/test/BUILD.gn     | 5 +++--
 llvm/utils/gn/secondary/clang/lib/Testing/BUILD.gn          | 4 +---
 .../clang/tools/libclang/include_clang_tools_extra.gni      | 1 -
 llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn   | 6 +++---
 4 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn
index 540a8a3642c49..4dc4c278ebcdc 100644
--- a/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang-tools-extra/clangd/test/BUILD.gn
@@ -21,8 +21,9 @@ write_lit_config("lit_site_cfg") {
   output = clangd_lit_site_cfg_file
 
   extra_values = [
-    "CMAKE_CURRENT_BINARY_DIR=" + rebase_path(
-            get_label_info("//clang-tools-extra/clangd/test", "target_out_dir")),
+    "CMAKE_CURRENT_BINARY_DIR=" +
+        rebase_path(get_label_info("//clang-tools-extra/clangd/test",
+                                   "target_out_dir")),
     "CMAKE_CURRENT_SOURCE_DIR=" +
         rebase_path("//clang-tools-extra/clangd/test"),
 
diff --git a/llvm/utils/gn/secondary/clang/lib/Testing/BUILD.gn b/llvm/utils/gn/secondary/clang/lib/Testing/BUILD.gn
index 727cc5b937dd3..2d050154f3e85 100644
--- a/llvm/utils/gn/secondary/clang/lib/Testing/BUILD.gn
+++ b/llvm/utils/gn/secondary/clang/lib/Testing/BUILD.gn
@@ -1,8 +1,6 @@
 static_library("Testing") {
   output_name = "clangTesting"
   configs += [ "//llvm/utils/gn/build:clang_code" ]
-  deps = [
-    "//llvm/lib/Support",
-  ]
+  deps = [ "//llvm/lib/Support" ]
   sources = [ "CommandLineArgs.cpp" ]
 }
diff --git a/llvm/utils/gn/secondary/clang/tools/libclang/include_clang_tools_extra.gni b/llvm/utils/gn/secondary/clang/tools/libclang/include_clang_tools_extra.gni
index 41502f22a35bf..0beaa37c7ec17 100644
--- a/llvm/utils/gn/secondary/clang/tools/libclang/include_clang_tools_extra.gni
+++ b/llvm/utils/gn/secondary/clang/tools/libclang/include_clang_tools_extra.gni
@@ -2,4 +2,3 @@ declare_args() {
   # Whether to include code from clang-tools-extra in libclang.
   libclang_include_clang_tools_extra = false
 }
-
diff --git a/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
index 688a25e3c1df1..579e37c6d5cb7 100644
--- a/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
+++ b/llvm/utils/gn/secondary/llvm/lib/Frontend/OpenMP/BUILD.gn
@@ -17,7 +17,7 @@ static_library("OpenMP") {
   ]
   public_deps = [ "//llvm/include/llvm/Frontend/OpenMP:public_tablegen" ]
   sources = [
-    "OMPContext.cpp",
-    "OMPIRBuilder.cpp",
-  ] + get_target_outputs(":OMPImpl")
+              "OMPContext.cpp",
+              "OMPIRBuilder.cpp",
+            ] + get_target_outputs(":OMPImpl")
 }

From f8a29b174a965acb942dd3ef5f8ef2c32620777b Mon Sep 17 00:00:00 2001
From: Fangrui Song <maskray@google.com>
Date: Fri, 17 Jul 2020 09:18:24 -0700
Subject: [PATCH 650/771] [OptTable] Support grouped short options

POSIX.1-2017 12.2 Utility Syntax Guidelines, Guideline 5 says:

> One or more options without option-arguments, followed by at most one option that takes an option-argument, should be accepted when grouped behind one '-' delimiter.

i.e. -abc represents -a -b -c. The grouped short options are very common.  Many
utilities extend the syntax by allowing (an option with an argument) following a
sequence of short options.

This patch adds the support to OptTable, similar to cl::Group for CommandLine
(D58711).  llvm-symbolizer will use the feature (D83530). CommandLine is exotic
in some aspects. OptTable is preferred if the user wants to get rid of the
behaviors.

* `cl::opt<bool> i(...)` can be disabled via -i=false or -i=0, which is
  different from conventional --no-i.
* Handling --foo & --no-foo requires a comparison of argument positions,
  which is a bit clumsy in user code.

OptTable::parseOneArg (non-const reference InputArgList) is added along with
ParseOneArg (const ArgList &). The duplicate does not look great at first
glance. However, The implementation can be simpler if ArgList is mutable.
(ParseOneArg is used by clang-cl (FlagsToInclude/FlagsToExclude) and lld COFF
(case-insensitive). Adding grouped short options can make the function even more
complex.)

The implementation allows a long option following a group of short options. We
probably should refine the code to disallow this in the future. Allowing this
seems benign for now.

Reviewed By: grimar, jhenderson

Differential Revision: https://reviews.llvm.org/D83639
---
 llvm/include/llvm/Option/ArgList.h          |  4 ++
 llvm/include/llvm/Option/OptTable.h         |  6 ++
 llvm/include/llvm/Option/Option.h           | 14 +++--
 llvm/lib/Option/OptTable.cpp                | 64 ++++++++++++++++++++-
 llvm/lib/Option/Option.cpp                  | 15 ++---
 llvm/unittests/Option/OptionParsingTest.cpp | 44 ++++++++++++++
 llvm/unittests/Option/Opts.td               |  1 +
 7 files changed, 132 insertions(+), 16 deletions(-)

diff --git a/llvm/include/llvm/Option/ArgList.h b/llvm/include/llvm/Option/ArgList.h
index 74bfadcba7267..9ce7839781859 100644
--- a/llvm/include/llvm/Option/ArgList.h
+++ b/llvm/include/llvm/Option/ArgList.h
@@ -412,6 +412,10 @@ class InputArgList final : public ArgList {
     return ArgStrings[Index];
   }
 
+  void replaceArgString(unsigned Index, const Twine &S) {
+    ArgStrings[Index] = MakeArgString(S);
+  }
+
   unsigned getNumInputArgStrings() const override {
     return NumInputArgStrings;
   }
diff --git a/llvm/include/llvm/Option/OptTable.h b/llvm/include/llvm/Option/OptTable.h
index 5db30436069db..b9984bed55a7b 100644
--- a/llvm/include/llvm/Option/OptTable.h
+++ b/llvm/include/llvm/Option/OptTable.h
@@ -59,6 +59,7 @@ class OptTable {
   /// The option information table.
   std::vector<Info> OptionInfos;
   bool IgnoreCase;
+  bool GroupedShortOptions = false;
 
   unsigned TheInputOptionID = 0;
   unsigned TheUnknownOptionID = 0;
@@ -79,6 +80,8 @@ class OptTable {
     return OptionInfos[id - 1];
   }
 
+  Arg *parseOneArgGrouped(InputArgList &Args, unsigned &Index) const;
+
 protected:
   OptTable(ArrayRef<Info> OptionInfos, bool IgnoreCase = false);
 
@@ -120,6 +123,9 @@ class OptTable {
     return getInfo(id).MetaVar;
   }
 
+  /// Support grouped short options. e.g. -ab represents -a -b.
+  void setGroupedShortOptions(bool Value) { GroupedShortOptions = Value; }
+
   /// Find possible value for given flags. This is used for shell
   /// autocompletion.
   ///
diff --git a/llvm/include/llvm/Option/Option.h b/llvm/include/llvm/Option/Option.h
index 73ee8e0073b85..196cf656355de 100644
--- a/llvm/include/llvm/Option/Option.h
+++ b/llvm/include/llvm/Option/Option.h
@@ -213,14 +213,16 @@ class Option {
   /// Index to the position where argument parsing should resume
   /// (even if the argument is missing values).
   ///
-  /// \param ArgSize The number of bytes taken up by the matched Option prefix
-  ///                and name. This is used to determine where joined values
-  ///                start.
-  Arg *accept(const ArgList &Args, unsigned &Index, unsigned ArgSize) const;
+  /// \p CurArg The argument to be matched. It may be shorter than the
+  /// underlying storage to represent a Joined argument.
+  /// \p GroupedShortOption If true, we are handling the fallback case of
+  /// parsing a prefix of the current argument as a short option.
+  Arg *accept(const ArgList &Args, StringRef CurArg, bool GroupedShortOption,
+              unsigned &Index) const;
 
 private:
-  Arg *acceptInternal(const ArgList &Args, unsigned &Index,
-                      unsigned ArgSize) const;
+  Arg *acceptInternal(const ArgList &Args, StringRef CurArg,
+                      unsigned &Index) const;
 
 public:
   void print(raw_ostream &O) const;
diff --git a/llvm/lib/Option/OptTable.cpp b/llvm/lib/Option/OptTable.cpp
index 926eb8e0437f6..16404d3d81078 100644
--- a/llvm/lib/Option/OptTable.cpp
+++ b/llvm/lib/Option/OptTable.cpp
@@ -330,6 +330,60 @@ bool OptTable::addValues(const char *Option, const char *Values) {
   return false;
 }
 
+// Parse a single argument, return the new argument, and update Index. If
+// GroupedShortOptions is true, -a matches "-abc" and the argument in Args will
+// be updated to "-bc". This overload does not support
+// FlagsToInclude/FlagsToExclude or case insensitive options.
+Arg *OptTable::parseOneArgGrouped(InputArgList &Args, unsigned &Index) const {
+  // Anything that doesn't start with PrefixesUnion is an input, as is '-'
+  // itself.
+  const char *CStr = Args.getArgString(Index);
+  StringRef Str(CStr);
+  if (isInput(PrefixesUnion, Str))
+    return new Arg(getOption(TheInputOptionID), Str, Index++, CStr);
+
+  const Info *End = OptionInfos.data() + OptionInfos.size();
+  StringRef Name = Str.ltrim(PrefixChars);
+  const Info *Start = std::lower_bound(
+      OptionInfos.data() + FirstSearchableIndex, End, Name.data());
+  const Info *Fallback = nullptr;
+  unsigned Prev = Index;
+
+  // Search for the option which matches Str.
+  for (; Start != End; ++Start) {
+    unsigned ArgSize = matchOption(Start, Str, IgnoreCase);
+    if (!ArgSize)
+      continue;
+
+    Option Opt(Start, this);
+    if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
+                            false, Index))
+      return A;
+
+    // If Opt is a Flag of length 2 (e.g. "-a"), we know it is a prefix of
+    // the current argument (e.g. "-abc"). Match it as a fallback if no longer
+    // option (e.g. "-ab") exists.
+    if (ArgSize == 2 && Opt.getKind() == Option::FlagClass)
+      Fallback = Start;
+
+    // Otherwise, see if the argument is missing.
+    if (Prev != Index)
+      return nullptr;
+  }
+  if (Fallback) {
+    Option Opt(Fallback, this);
+    if (Arg *A = Opt.accept(Args, Str.substr(0, 2), true, Index)) {
+      if (Str.size() == 2)
+        ++Index;
+      else
+        Args.replaceArgString(Index, Twine('-') + Str.substr(2));
+      return A;
+    }
+  }
+
+  return new Arg(getOption(TheUnknownOptionID), Str, Index++, CStr);
+}
+
 Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
                            unsigned FlagsToInclude,
                            unsigned FlagsToExclude) const {
@@ -373,7 +427,8 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index,
       continue;
 
     // See if this option matches.
-    if (Arg *A = Opt.accept(Args, Index, ArgSize))
+    if (Arg *A = Opt.accept(Args, StringRef(Args.getArgString(Index), ArgSize),
+                            false, Index))
       return A;
 
     // Otherwise, see if this argument was missing values.
@@ -414,8 +469,11 @@ InputArgList OptTable::ParseArgs(ArrayRef<const char *> ArgArr,
     }
 
     unsigned Prev = Index;
-    Arg *A = ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude);
-    assert(Index > Prev && "Parser failed to consume argument.");
+    Arg *A = GroupedShortOptions
+                 ? parseOneArgGrouped(Args, Index)
+                 : ParseOneArg(Args, Index, FlagsToInclude, FlagsToExclude);
+    assert((Index > Prev || GroupedShortOptions) &&
+           "Parser failed to consume argument.");
 
     // Check for missing argument error.
     if (!A) {
diff --git a/llvm/lib/Option/Option.cpp b/llvm/lib/Option/Option.cpp
index 9abc9fdce4c72..68d074b2702e2 100644
--- a/llvm/lib/Option/Option.cpp
+++ b/llvm/lib/Option/Option.cpp
@@ -106,9 +106,9 @@ bool Option::matches(OptSpecifier Opt) const {
   return false;
 }
 
-Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index,
-                            unsigned ArgSize) const {
-  StringRef Spelling = StringRef(Args.getArgString(Index), ArgSize);
+Arg *Option::acceptInternal(const ArgList &Args, StringRef Spelling,
+                            unsigned &Index) const {
+  size_t ArgSize = Spelling.size();
   switch (getKind()) {
   case FlagClass: {
     if (ArgSize != strlen(Args.getArgString(Index)))
@@ -230,10 +230,11 @@ Arg *Option::acceptInternal(const ArgList &Args, unsigned &Index,
   }
 }
 
-Arg *Option::accept(const ArgList &Args,
-                    unsigned &Index,
-                    unsigned ArgSize) const {
-  std::unique_ptr<Arg> A(acceptInternal(Args, Index, ArgSize));
+Arg *Option::accept(const ArgList &Args, StringRef CurArg,
+                    bool GroupedShortOption, unsigned &Index) const {
+  std::unique_ptr<Arg> A(GroupedShortOption && getKind() == FlagClass
+                             ? new Arg(*this, CurArg, Index)
+                             : acceptInternal(Args, CurArg, Index));
   if (!A)
     return nullptr;
 
diff --git a/llvm/unittests/Option/OptionParsingTest.cpp b/llvm/unittests/Option/OptionParsingTest.cpp
index e1d7a473ee7f2..feeb3b7866cda 100644
--- a/llvm/unittests/Option/OptionParsingTest.cpp
+++ b/llvm/unittests/Option/OptionParsingTest.cpp
@@ -332,3 +332,47 @@ TEST(DISABLED_Option, FindNearestFIXME) {
   EXPECT_EQ(Nearest, "--ermghFoo");
 
 }
+
+TEST(Option, ParseGroupedShortOptions) {
+  TestOptTable T;
+  T.setGroupedShortOptions(true);
+  unsigned MAI, MAC;
+
+  // Grouped short options can be followed by a long Flag (-Joo), or a non-Flag
+  // option (-C=1).
+  const char *Args1[] = {"-AIJ", "-AIJoo", "-AC=1"};
+  InputArgList AL = T.ParseArgs(Args1, MAI, MAC);
+  EXPECT_TRUE(AL.hasArg(OPT_A));
+  EXPECT_TRUE(AL.hasArg(OPT_H));
+  ASSERT_EQ((size_t)2, AL.getAllArgValues(OPT_B).size());
+  EXPECT_EQ("foo", AL.getAllArgValues(OPT_B)[0]);
+  EXPECT_EQ("bar", AL.getAllArgValues(OPT_B)[1]);
+  ASSERT_TRUE(AL.hasArg(OPT_C));
+  EXPECT_EQ("1", AL.getAllArgValues(OPT_C)[0]);
+
+  // Prefer a long option to a short option.
+  const char *Args2[] = {"-AB"};
+  InputArgList AL2 = T.ParseArgs(Args2, MAI, MAC);
+  EXPECT_TRUE(!AL2.hasArg(OPT_A));
+  EXPECT_TRUE(AL2.hasArg(OPT_AB));
+
+  // Short options followed by a long option. We probably should disallow this.
+  const char *Args3[] = {"-AIblorp"};
+  InputArgList AL3 = T.ParseArgs(Args3, MAI, MAC);
+  EXPECT_TRUE(AL3.hasArg(OPT_A));
+  EXPECT_TRUE(AL3.hasArg(OPT_Blorp));
+}
+
+TEST(Option, UnknownOptions) {
+  TestOptTable T;
+  unsigned MAI, MAC;
+  const char *Args[] = {"-u", "--long", "0"};
+  for (int I = 0; I < 2; ++I) {
+    T.setGroupedShortOptions(I != 0);
+    InputArgList AL = T.ParseArgs(Args, MAI, MAC);
+    const std::vector<std::string> Unknown = AL.getAllArgValues(OPT_UNKNOWN);
+    ASSERT_EQ((size_t)2, Unknown.size());
+    EXPECT_EQ("-u", Unknown[0]);
+    EXPECT_EQ("--long", Unknown[1]);
+  }
+}
diff --git a/llvm/unittests/Option/Opts.td b/llvm/unittests/Option/Opts.td
index 70920a6a76b44..e1ebffd1881f5 100644
--- a/llvm/unittests/Option/Opts.td
+++ b/llvm/unittests/Option/Opts.td
@@ -5,6 +5,7 @@ def OptFlag2 : OptionFlag;
 def OptFlag3 : OptionFlag;
 
 def A : Flag<["-"], "A">, HelpText<"The A option">, Flags<[OptFlag1]>;
+def AB : Flag<["-"], "AB">;
 def B : Joined<["-"], "B">, HelpText<"The B option">, MetaVarName<"B">, Flags<[OptFlag2]>;
 def C : Separate<["-"], "C">, HelpText<"The C option">, MetaVarName<"C">, Flags<[OptFlag1]>;
 def SLASH_C : Separate<["/", "-"], "C">, HelpText<"The C option">, MetaVarName<"C">, Flags<[OptFlag3]>;

From 14dde438d69c81ab4651157a94d32e5555e804ff Mon Sep 17 00:00:00 2001
From: Adrian McCarthy <amccarth@google.com>
Date: Thu, 16 Jul 2020 15:15:51 -0700
Subject: [PATCH 651/771] With MSVC, file needs to be compiled with /BIGOBJ

MSVC, by default, limits the number of sections generated by a single
translation unit to 2^16.  In a debug build, each function or method
can require 4 sections, so it's not uncommon to hit it.

I saw the problem when building tests for LLDB (but, interestingly, not
for LLDB itself).

Differential Revision: https://reviews.llvm.org/D83991
---
 clang/lib/ARCMigrate/CMakeLists.txt | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/clang/lib/ARCMigrate/CMakeLists.txt b/clang/lib/ARCMigrate/CMakeLists.txt
index 6f19bea476daa..1d5a185c3b6a7 100644
--- a/clang/lib/ARCMigrate/CMakeLists.txt
+++ b/clang/lib/ARCMigrate/CMakeLists.txt
@@ -2,6 +2,12 @@ set(LLVM_LINK_COMPONENTS
   Support
   )
 
+# By default MSVC has a 2^16 limit on the number of sections in an object
+# file, and Transforms.cpp needs more than that.
+if (MSVC)
+  set_source_files_properties(Transforms.cpp PROPERTIES COMPILE_FLAGS /bigobj)
+endif()
+
 add_clang_library(clangARCMigrate
   ARCMT.cpp
   ARCMTActions.cpp

From 0e347c0ff0a88a8412299d024c2f32201fe342d1 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 16 Jul 2020 14:26:09 -0700
Subject: [PATCH 652/771] BPF: generate .rodata BTF datasec for certain
 initialized local var's

Currently, BTF datasec type for .rodata is generated only if there are
user-defined readonly global variables which have debuginfo generated.

Certain readonly global variables may be generated from initialized
local variables. For example,
  void foo(const void *);
  int test() {
    const struct {
      unsigned a[4];
      char b;
    } val = { .a = {2, 3, 4, 5}, .b = 6 };
    foo(&val);
    return 0;
  }

The clang will create a private linkage const global to store
the initialized value:
  @__const.test.val = private unnamed_addr constant %struct.anon
      { [4 x i32] [i32 2, i32 3, i32 4, i32 5], i8 6 }, align 4

This global variable eventually is put in .rodata ELF section.

If there is .rodata ELF section, libbpf expects a BTF .rodata
datasec as well even though it may be empty meaning there are no
global readonly variables with proper debuginfo. Martin reported
a bug where without this empty BTF .rodata datasec, the bpftool
gen will exit with an error.

This patch fixed the issue by generating .rodata BTF datasec
if there exists local var intial data which will result in
.rodata ELF section.

Differential Revision: https://reviews.llvm.org/D84002
---
 llvm/lib/Target/BPF/BTFDebug.cpp              |  15 +++
 .../CodeGen/BPF/BTF/local-var-readonly-1.ll   | 105 ++++++++++++++++++
 .../CodeGen/BPF/BTF/local-var-readonly-2.ll   |  97 ++++++++++++++++
 3 files changed, 217 insertions(+)
 create mode 100644 llvm/test/CodeGen/BPF/BTF/local-var-readonly-1.ll
 create mode 100644 llvm/test/CodeGen/BPF/BTF/local-var-readonly-2.ll

diff --git a/llvm/lib/Target/BPF/BTFDebug.cpp b/llvm/lib/Target/BPF/BTFDebug.cpp
index 4510e93574892..13999d800a800 100644
--- a/llvm/lib/Target/BPF/BTFDebug.cpp
+++ b/llvm/lib/Target/BPF/BTFDebug.cpp
@@ -22,6 +22,7 @@
 #include "llvm/MC/MCSectionELF.h"
 #include "llvm/MC/MCStreamer.h"
 #include "llvm/Support/LineIterator.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
 
 using namespace llvm;
 
@@ -1125,6 +1126,20 @@ void BTFDebug::processGlobals(bool ProcessingMapDef) {
     if (ProcessingMapDef != SecName.startswith(".maps"))
       continue;
 
+    // Create a .rodata datasec if the global variable is an initialized
+    // constant with private linkage and if it won't be in .rodata.str<#>
+    // and .rodata.cst<#> sections.
+    if (SecName == ".rodata" && Global.hasPrivateLinkage() &&
+        DataSecEntries.find(std::string(SecName)) == DataSecEntries.end()) {
+      SectionKind GVKind =
+          TargetLoweringObjectFile::getKindForGlobal(&Global, Asm->TM);
+      // skip .rodata.str<#> and .rodata.cst<#> sections
+      if (!GVKind.isMergeableCString() && !GVKind.isMergeableConst()) {
+        DataSecEntries[std::string(SecName)] =
+            std::make_unique<BTFKindDataSec>(Asm, std::string(SecName));
+      }
+    }
+
     SmallVector<DIGlobalVariableExpression *, 1> GVs;
     Global.getDebugInfo(GVs);
 
diff --git a/llvm/test/CodeGen/BPF/BTF/local-var-readonly-1.ll b/llvm/test/CodeGen/BPF/BTF/local-var-readonly-1.ll
new file mode 100644
index 0000000000000..3da7e64c22002
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/BTF/local-var-readonly-1.ll
@@ -0,0 +1,105 @@
+; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+;
+; Source:
+;   void foo(const void *);
+;   int test() {
+;     const char *str = "abcd";
+;     const struct {
+;       unsigned a[4];
+;     } val = { .a = {2, 3, 4, 5} };
+;     foo(str);
+;     foo(&val);
+;     return 0;
+;   }
+; Compilation flag:
+;   clang -target bpf -O2 -g -S -emit-llvm t.c
+
+%struct.anon = type { [4 x i32] }
+
+@.str = private unnamed_addr constant [5 x i8] c"abcd\00", align 1
+@__const.test.val = private unnamed_addr constant %struct.anon { [4 x i32] [i32 2, i32 3, i32 4, i32 5] }, align 4
+
+; Function Attrs: nounwind
+define dso_local i32 @test() local_unnamed_addr #0 !dbg !7 {
+entry:
+  %val = alloca %struct.anon, align 4
+  call void @llvm.dbg.value(metadata i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), metadata !12, metadata !DIExpression()), !dbg !25
+  %0 = bitcast %struct.anon* %val to i8*, !dbg !26
+  call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) #4, !dbg !26
+  call void @llvm.dbg.declare(metadata %struct.anon* %val, metadata !16, metadata !DIExpression()), !dbg !27
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(16) %0, i8* nonnull align 4 dereferenceable(16) bitcast (%struct.anon* @__const.test.val to i8*), i64 16, i1 false), !dbg !27
+  tail call void @foo(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0)) #4, !dbg !28
+  call void @foo(i8* nonnull %0) #4, !dbg !29
+  call void @llvm.lifetime.end.p0i8(i64 16, i8* nonnull %0) #4, !dbg !30
+  ret i32 0, !dbg !31
+}
+
+; the initial value of "str" is stored in section .rodata.str1.1
+; the initial value of "val" is stored in section .rodata.cst16
+; CHECK-NOT:   BTF_KIND_DATASEC
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1
+
+declare !dbg !32 dso_local void @foo(i8*) local_unnamed_addr #3
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind willreturn }
+attributes #2 = { nounwind readnone speculatable willreturn }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 1e92cffe18a07c12042b57504dfa7fb709b833c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "t.c", directory: "/tmp/home/yhs/tmp")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 1e92cffe18a07c12042b57504dfa7fb709b833c8)"}
+!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !{!12, !16}
+!12 = !DILocalVariable(name: "str", scope: !7, file: !1, line: 3, type: !13)
+!13 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !14, size: 64)
+!14 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !15)
+!15 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!16 = !DILocalVariable(name: "val", scope: !7, file: !1, line: 6, type: !17)
+!17 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !18)
+!18 = distinct !DICompositeType(tag: DW_TAG_structure_type, scope: !7, file: !1, line: 4, size: 128, elements: !19)
+!19 = !{!20}
+!20 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !18, file: !1, line: 5, baseType: !21, size: 128)
+!21 = !DICompositeType(tag: DW_TAG_array_type, baseType: !22, size: 128, elements: !23)
+!22 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!23 = !{!24}
+!24 = !DISubrange(count: 4)
+!25 = !DILocation(line: 0, scope: !7)
+!26 = !DILocation(line: 4, column: 3, scope: !7)
+!27 = !DILocation(line: 6, column: 5, scope: !7)
+!28 = !DILocation(line: 7, column: 3, scope: !7)
+!29 = !DILocation(line: 8, column: 3, scope: !7)
+!30 = !DILocation(line: 10, column: 1, scope: !7)
+!31 = !DILocation(line: 9, column: 3, scope: !7)
+!32 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !33, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)
+!33 = !DISubroutineType(types: !34)
+!34 = !{null, !35}
+!35 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !36, size: 64)
+!36 = !DIDerivedType(tag: DW_TAG_const_type, baseType: null)
diff --git a/llvm/test/CodeGen/BPF/BTF/local-var-readonly-2.ll b/llvm/test/CodeGen/BPF/BTF/local-var-readonly-2.ll
new file mode 100644
index 0000000000000..772b566698f40
--- /dev/null
+++ b/llvm/test/CodeGen/BPF/BTF/local-var-readonly-2.ll
@@ -0,0 +1,97 @@
+; RUN: llc -march=bpfel -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+; RUN: llc -march=bpfeb -filetype=asm -o - %s | FileCheck -check-prefixes=CHECK %s
+;
+; Source:
+;   void foo(const void *);
+;   int test() {
+;     const struct {
+;       unsigned a[4];
+;       char b;
+;     } val = { .a = {2, 3, 4, 5}, .b = 4 };
+;     foo(&val);
+;     return 0;
+;   }
+; Compilation flag:
+;   clang -target bpf -O2 -g -S -emit-llvm t.c
+
+%struct.anon = type { [4 x i32], i8 }
+
+@__const.test.val = private unnamed_addr constant %struct.anon { [4 x i32] [i32 2, i32 3, i32 4, i32 5], i8 4 }, align 4
+
+; Function Attrs: nounwind
+define dso_local i32 @test() local_unnamed_addr #0 !dbg !7 {
+entry:
+  %val = alloca %struct.anon, align 4
+  %0 = bitcast %struct.anon* %val to i8*, !dbg !23
+  call void @llvm.lifetime.start.p0i8(i64 20, i8* nonnull %0) #4, !dbg !23
+  call void @llvm.dbg.declare(metadata %struct.anon* %val, metadata !12, metadata !DIExpression()), !dbg !24
+  call void @llvm.memcpy.p0i8.p0i8.i64(i8* nonnull align 4 dereferenceable(20) %0, i8* nonnull align 4 dereferenceable(20) bitcast (%struct.anon* @__const.test.val to i8*), i64 20, i1 false), !dbg !24
+  call void @foo(i8* nonnull %0) #4, !dbg !25
+  call void @llvm.lifetime.end.p0i8(i64 20, i8* nonnull %0) #4, !dbg !26
+  ret i32 0, !dbg !27
+}
+
+; the init value of local variable "val" is stored in .rodata section
+; CHECK:             .long   42                              # BTF_KIND_DATASEC
+; CHECK-NEXT:        .long   251658240                       # 0xf000000
+; CHECK-NEXT:        .long   0
+
+; CHECK:             .ascii  ".rodata"                       # string offset=42
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone speculatable willreturn
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #2
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* noalias nocapture writeonly, i8* noalias nocapture readonly, i64, i1 immarg) #1
+
+declare !dbg !28 dso_local void @foo(i8*) local_unnamed_addr #3
+
+; Function Attrs: argmemonly nounwind willreturn
+declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #1
+
+attributes #0 = { nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind willreturn }
+attributes #2 = { nounwind readnone speculatable willreturn }
+attributes #3 = { "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4, !5}
+!llvm.ident = !{!6}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 11.0.0 (https://github.com/llvm/llvm-project.git 1e92cffe18a07c12042b57504dfa7fb709b833c8)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "t.c", directory: "/tmp/home/yhs/tmp")
+!2 = !{}
+!3 = !{i32 7, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{i32 1, !"wchar_size", i32 4}
+!6 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git 1e92cffe18a07c12042b57504dfa7fb709b833c8)"}
+!7 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 2, type: !8, scopeLine: 2, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !11)
+!8 = !DISubroutineType(types: !9)
+!9 = !{!10}
+!10 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!11 = !{!12}
+!12 = !DILocalVariable(name: "val", scope: !7, file: !1, line: 6, type: !13)
+!13 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !14)
+!14 = distinct !DICompositeType(tag: DW_TAG_structure_type, scope: !7, file: !1, line: 3, size: 160, elements: !15)
+!15 = !{!16, !21}
+!16 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !14, file: !1, line: 4, baseType: !17, size: 128)
+!17 = !DICompositeType(tag: DW_TAG_array_type, baseType: !18, size: 128, elements: !19)
+!18 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned)
+!19 = !{!20}
+!20 = !DISubrange(count: 4)
+!21 = !DIDerivedType(tag: DW_TAG_member, name: "b", scope: !14, file: !1, line: 5, baseType: !22, size: 8, offset: 128)
+!22 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char)
+!23 = !DILocation(line: 3, column: 3, scope: !7)
+!24 = !DILocation(line: 6, column: 5, scope: !7)
+!25 = !DILocation(line: 7, column: 3, scope: !7)
+!26 = !DILocation(line: 9, column: 1, scope: !7)
+!27 = !DILocation(line: 8, column: 3, scope: !7)
+!28 = !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !29, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized, retainedNodes: !2)
+!29 = !DISubroutineType(types: !30)
+!30 = !{null, !31}
+!31 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !32, size: 64)
+!32 = !DIDerivedType(tag: DW_TAG_const_type, baseType: null)

From d19f0666bcd8f7d26aaf4019244c3ed91e47b1b7 Mon Sep 17 00:00:00 2001
From: Aleksandr Platonov <platonov.aleksandr@huawei.com>
Date: Fri, 17 Jul 2020 18:48:56 +0200
Subject: [PATCH 653/771] [clang][Tooling] Try to avoid file system access if
 there is no record for the file in compile_commads.json

Summary:
If there is no record in compile_commands.json, we try to find suitable record with `MatchTrie.findEquivalent()` call.
This is very expensive operation with a lot of `llvm::sys::fs::equivalent()` calls in some cases.

This patch disables file symlinks for performance reasons.

Example scenario without this patch:
- compile_commands.json generated at clangd build (contains ~3000 files).
- it tooks more than 1 second to get compile command for newly created file in the root folder of LLVM project.
- we wait for 1 second every time when clangd requests compile command for this file (at file change).

Reviewers: sammccall, kadircet, hokein

Reviewed By: sammccall

Subscribers: chandlerc, djasper, klimek, ilya-biryukov, kadircet, usaxena95, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D83621
---
 clang/lib/Tooling/FileMatchTrie.cpp                | 14 +++++++++++++-
 .../unittests/Tooling/CompilationDatabaseTest.cpp  |  9 +++++++++
 2 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/clang/lib/Tooling/FileMatchTrie.cpp b/clang/lib/Tooling/FileMatchTrie.cpp
index 88dea6bb6c9f9..3b02405da2f28 100644
--- a/clang/lib/Tooling/FileMatchTrie.cpp
+++ b/clang/lib/Tooling/FileMatchTrie.cpp
@@ -105,8 +105,13 @@ class FileMatchTrieNode {
                            StringRef FileName,
                            bool &IsAmbiguous,
                            unsigned ConsumedLength = 0) const {
+    // Note: we support only directory symlinks for performance reasons.
     if (Children.empty()) {
-      if (Comparator.equivalent(StringRef(Path), FileName))
+      // As far as we do not support file symlinks, compare
+      // basenames here to avoid request to file system.
+      if (llvm::sys::path::filename(Path) ==
+              llvm::sys::path::filename(FileName) &&
+          Comparator.equivalent(StringRef(Path), FileName))
         return StringRef(Path);
       return {};
     }
@@ -121,6 +126,13 @@ class FileMatchTrieNode {
       if (!Result.empty() || IsAmbiguous)
         return Result;
     }
+
+    // If `ConsumedLength` is zero, this is the root and we have no filename
+    // match. Give up in this case, we don't try to find symlinks with
+    // different names.
+    if (ConsumedLength == 0)
+      return {};
+
     std::vector<StringRef> AllChildren;
     getAll(AllChildren, MatchingChild);
     StringRef Result;
diff --git a/clang/unittests/Tooling/CompilationDatabaseTest.cpp b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
index cc948b800f4e7..3bfb0ec1f7d56 100644
--- a/clang/unittests/Tooling/CompilationDatabaseTest.cpp
+++ b/clang/unittests/Tooling/CompilationDatabaseTest.cpp
@@ -281,6 +281,15 @@ TEST_F(FileMatchTrieTest, CannotResolveRelativePath) {
   EXPECT_EQ("Cannot resolve relative paths", Error);
 }
 
+TEST_F(FileMatchTrieTest, SingleFile) {
+  Trie.insert("/root/RootFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+  // Add subpath to avoid `if (Children.empty())` special case
+  // which we hit at previous `find()`.
+  Trie.insert("/root/otherpath/OtherFile.cc");
+  EXPECT_EQ("", find("/root/rootfile.cc"));
+}
+
 TEST(findCompileArgsInJsonDatabase, FindsNothingIfEmpty) {
   std::string ErrorMessage;
   CompileCommand NotFound = findCompileArgsInJsonDatabase(

From 2e87acac9b88e06a9a7a0b63fb3f426a5535f11c Mon Sep 17 00:00:00 2001
From: Dmitry Preobrazhensky <dmitry.preobrazhensky@amd.com>
Date: Fri, 17 Jul 2020 19:48:36 +0300
Subject: [PATCH 654/771] [AMDGPU] Removed s_mov_regrd and mov_fed opcodes

These opcodes are not intended for public use.

Reviewers: arsenm, rampitec

Differential Revision: https://reviews.llvm.org/D81659
---
 llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h      |   4 -
 .../lib/Target/AMDGPU/GCNHazardRecognizer.cpp |  33 +-
 llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h  |   1 -
 llvm/lib/Target/AMDGPU/SOPInstructions.td     |   6 -
 llvm/lib/Target/AMDGPU/VOP1Instructions.td    |   4 -
 .../CodeGen/AMDGPU/inserted-wait-states.mir   | 106 -----
 llvm/test/MC/AMDGPU/gfx10_asm_all.s           | 312 ---------------
 llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s          |   6 -
 llvm/test/MC/AMDGPU/gfx10_asm_err.s           |   3 -
 llvm/test/MC/AMDGPU/gfx7_asm_all.s            | 231 -----------
 llvm/test/MC/AMDGPU/gfx8_asm_all.s            | 375 ------------------
 llvm/test/MC/AMDGPU/gfx9_asm_all.s            | 354 -----------------
 llvm/test/MC/AMDGPU/lds_direct.s              |   6 -
 llvm/test/MC/AMDGPU/sop1.s                    |   3 -
 llvm/test/MC/AMDGPU/vop1.s                    |   4 -
 .../MC/Disassembler/AMDGPU/gfx10_dasm_all.txt | 297 --------------
 .../Disassembler/AMDGPU/gfx10_dasm_dpp8.txt   |   6 -
 .../MC/Disassembler/AMDGPU/gfx8_dasm_all.txt  | 360 -----------------
 .../MC/Disassembler/AMDGPU/gfx9_dasm_all.txt  | 339 ----------------
 19 files changed, 1 insertion(+), 2449 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
index c833bfbcf9366..6b23830491235 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
+++ b/llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h
@@ -1041,10 +1041,6 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
     return CIInsts;
   }
 
-  bool hasSMovFedHazard() const {
-    return getGeneration() == AMDGPUSubtarget::GFX9;
-  }
-
   bool hasReadM0MovRelInterpHazard() const {
     return getGeneration() == AMDGPUSubtarget::GFX9;
   }
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
index 2229231870819..8b446a4342986 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp
@@ -191,9 +191,6 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
   if (MI->isInlineAsm() && checkInlineAsmHazards(MI) > 0)
     return NoopHazard;
 
-  if (checkAnyInstHazards(MI) > 0)
-    return NoopHazard;
-
   return NoHazard;
 }
 
@@ -241,7 +238,7 @@ unsigned GCNHazardRecognizer::PreEmitNoopsCommon(MachineInstr *MI) {
   if (MI->isBundle())
     return 0;
 
-  int WaitStates = std::max(0, checkAnyInstHazards(MI));
+  int WaitStates = 0;
 
   if (SIInstrInfo::isSMRD(*MI))
     return std::max(WaitStates, checkSMRDHazards(MI));
@@ -821,34 +818,6 @@ int GCNHazardRecognizer::checkRFEHazards(MachineInstr *RFE) {
   return RFEWaitStates - WaitStatesNeeded;
 }
 
-int GCNHazardRecognizer::checkAnyInstHazards(MachineInstr *MI) {
-  if (MI->isDebugInstr())
-    return 0;
-
-  const SIRegisterInfo *TRI = ST.getRegisterInfo();
-  if (!ST.hasSMovFedHazard())
-    return 0;
-
-  // Check for any instruction reading an SGPR after a write from
-  // s_mov_fed_b32.
-  int MovFedWaitStates = 1;
-  int WaitStatesNeeded = 0;
-
-  for (const MachineOperand &Use : MI->uses()) {
-    if (!Use.isReg() || TRI->isVGPR(MF.getRegInfo(), Use.getReg()))
-      continue;
-    auto IsHazardFn = [] (MachineInstr *MI) {
-      return MI->getOpcode() == AMDGPU::S_MOV_FED_B32;
-    };
-    int WaitStatesNeededForUse =
-        MovFedWaitStates - getWaitStatesSinceDef(Use.getReg(), IsHazardFn,
-                                                 MovFedWaitStates);
-    WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse);
-  }
-
-  return WaitStatesNeeded;
-}
-
 int GCNHazardRecognizer::checkReadM0Hazards(MachineInstr *MI) {
   const SIInstrInfo *TII = ST.getInstrInfo();
   const int SMovRelWaitStates = 1;
diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
index cd17f2755bd10..59d6d4f90755a 100644
--- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
+++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h
@@ -83,7 +83,6 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer {
   int checkRWLaneHazards(MachineInstr *RWLane);
   int checkRFEHazards(MachineInstr *RFE);
   int checkInlineAsmHazards(MachineInstr *IA);
-  int checkAnyInstHazards(MachineInstr *MI);
   int checkReadM0Hazards(MachineInstr *SMovRel);
   int checkNSAtoVMEMHazard(MachineInstr *MI);
   int checkFPAtomicToDenormModeHazard(MachineInstr *MI);
diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td
index 9d7b25d552170..d60fa58a0a74e 100644
--- a/llvm/lib/Target/AMDGPU/SOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td
@@ -288,13 +288,11 @@ def S_MOVRELD_B64 : SOP1_64_movreld <"s_movreld_b64">;
 
 let SubtargetPredicate = isGFX6GFX7GFX8GFX9 in {
 def S_CBRANCH_JOIN : SOP1_0_32R <"s_cbranch_join">;
-def S_MOV_REGRD_B32 : SOP1_32 <"s_mov_regrd_b32">;
 } // End SubtargetPredicate = isGFX6GFX7GFX8GFX9
 
 let Defs = [SCC] in {
 def S_ABS_I32 : SOP1_32 <"s_abs_i32">;
 } // End Defs = [SCC]
-def S_MOV_FED_B32 : SOP1_32 <"s_mov_fed_b32">;
 
 let SubtargetPredicate = HasVGPRIndexMode in {
 def S_SET_GPR_IDX_IDX : SOP1_0_32<"s_set_gpr_idx_idx"> {
@@ -1381,7 +1379,6 @@ multiclass SOP1_Real_gfx6_gfx7_gfx10<bits<8> op> :
   SOP1_Real_gfx6_gfx7<op>, SOP1_Real_gfx10<op>;
 
 defm S_CBRANCH_JOIN  : SOP1_Real_gfx6_gfx7<0x032>;
-defm S_MOV_REGRD_B32 : SOP1_Real_gfx6_gfx7<0x033>;
 
 defm S_MOV_B32            : SOP1_Real_gfx6_gfx7_gfx10<0x003>;
 defm S_MOV_B64            : SOP1_Real_gfx6_gfx7_gfx10<0x004>;
@@ -1430,7 +1427,6 @@ defm S_MOVRELS_B64        : SOP1_Real_gfx6_gfx7_gfx10<0x02f>;
 defm S_MOVRELD_B32        : SOP1_Real_gfx6_gfx7_gfx10<0x030>;
 defm S_MOVRELD_B64        : SOP1_Real_gfx6_gfx7_gfx10<0x031>;
 defm S_ABS_I32            : SOP1_Real_gfx6_gfx7_gfx10<0x034>;
-defm S_MOV_FED_B32        : SOP1_Real_gfx6_gfx7_gfx10<0x035>;
 
 //===----------------------------------------------------------------------===//
 // SOP2 - GFX10.
@@ -1643,9 +1639,7 @@ def S_MOVRELS_B64_vi       : SOP1_Real_vi <0x2b, S_MOVRELS_B64>;
 def S_MOVRELD_B32_vi       : SOP1_Real_vi <0x2c, S_MOVRELD_B32>;
 def S_MOVRELD_B64_vi       : SOP1_Real_vi <0x2d, S_MOVRELD_B64>;
 def S_CBRANCH_JOIN_vi      : SOP1_Real_vi <0x2e, S_CBRANCH_JOIN>;
-def S_MOV_REGRD_B32_vi     : SOP1_Real_vi <0x2f, S_MOV_REGRD_B32>;
 def S_ABS_I32_vi           : SOP1_Real_vi <0x30, S_ABS_I32>;
-def S_MOV_FED_B32_vi       : SOP1_Real_vi <0x31, S_MOV_FED_B32>;
 def S_SET_GPR_IDX_IDX_vi   : SOP1_Real_vi <0x32, S_SET_GPR_IDX_IDX>;
 
 def S_ADD_U32_vi           : SOP2_Real_vi <0x00, S_ADD_U32>;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index 17f334f62a30b..11ec2953e1e84 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -338,8 +338,6 @@ defm V_MOVRELS_B32 : VOP1Inst <"v_movrels_b32", VOP_MOVRELS>;
 defm V_MOVRELSD_B32 : VOP1Inst <"v_movrelsd_b32", VOP_MOVRELSD>;
 } // End Uses = [M0, EXEC]
 
-defm V_MOV_FED_B32 : VOP1Inst <"v_mov_fed_b32", VOP_I32_I32>;
-
 let SubtargetPredicate = isGFX6GFX7 in {
   let SchedRW = [WriteTrans32] in {
     defm V_LOG_CLAMP_F32 :
@@ -650,7 +648,6 @@ defm V_CVT_F32_I32       : VOP1_Real_gfx6_gfx7_gfx10<0x005>;
 defm V_CVT_F32_U32       : VOP1_Real_gfx6_gfx7_gfx10<0x006>;
 defm V_CVT_U32_F32       : VOP1_Real_gfx6_gfx7_gfx10<0x007>;
 defm V_CVT_I32_F32       : VOP1_Real_gfx6_gfx7_gfx10<0x008>;
-defm V_MOV_FED_B32       : VOP1_Real_gfx6_gfx7_gfx10<0x009>;
 defm V_CVT_F16_F32       : VOP1_Real_gfx6_gfx7_gfx10<0x00a>;
 defm V_CVT_F32_F16       : VOP1_Real_gfx6_gfx7_gfx10<0x00b>;
 defm V_CVT_RPI_I32_F32   : VOP1_Real_gfx6_gfx7_gfx10<0x00c>;
@@ -754,7 +751,6 @@ defm V_CVT_F32_I32       : VOP1_Real_vi <0x5>;
 defm V_CVT_F32_U32       : VOP1_Real_vi <0x6>;
 defm V_CVT_U32_F32       : VOP1_Real_vi <0x7>;
 defm V_CVT_I32_F32       : VOP1_Real_vi <0x8>;
-defm V_MOV_FED_B32       : VOP1_Real_vi <0x9>;
 defm V_CVT_F16_F32       : VOP1_Real_vi <0xa>;
 defm V_CVT_F32_F16       : VOP1_Real_vi <0xb>;
 defm V_CVT_RPI_I32_F32   : VOP1_Real_vi <0xc>;
diff --git a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
index 64b7299c45d91..1922adf5ee6cf 100644
--- a/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
+++ b/llvm/test/CodeGen/AMDGPU/inserted-wait-states.mir
@@ -10,38 +10,9 @@
   define amdgpu_kernel void @vmem_gt_8dw_store() { ret void }
   define amdgpu_kernel void @readwrite_lane() { ret void }
   define amdgpu_kernel void @rfe() { ret void }
-  define amdgpu_kernel void @s_mov_fed_b32() { ret void }
   define amdgpu_kernel void @s_movrel() { ret void }
   define amdgpu_kernel void @v_interp() { ret void }
   define amdgpu_kernel void @dpp() { ret void }
-
-  define amdgpu_kernel void @mov_fed_hazard_crash_on_dbg_value(i32 addrspace(1)* %A) {
-  entry:
-    %A.addr = alloca i32 addrspace(1)*, align 4, addrspace(5)
-    store i32 addrspace(1)* %A, i32 addrspace(1)* addrspace(5)* %A.addr, align 4
-    call void @llvm.dbg.declare(metadata i32 addrspace(1)* addrspace(5)* %A.addr, metadata !5, metadata !11), !dbg !12
-    ret void
-  }
-
-  declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
-
-  !llvm.dbg.cu = !{!0}
-  !llvm.module.flags = !{!3, !4}
-
-  !0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 268929)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
-  !1 = !DIFile(filename: "test01.cl", directory: "/dev/null")
-  !2 = !{}
-  !3 = !{i32 2, !"Dwarf Version", i32 2}
-  !4 = !{i32 2, !"Debug Info Version", i32 3}
-  !5 = !DILocalVariable(name: "A", arg: 1, scope: !6, file: !1, line: 1, type: !9)
-  !6 = distinct !DISubprogram(name: "test", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, retainedNodes: !2)
-  !7 = !DISubroutineType(types: !8)
-  !8 = !{null, !9}
-  !9 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !10, size: 64, align: 32)
-  !10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-  !11 = !DIExpression()
-  !12 = !DILocation(line: 1, column: 30, scope: !6)
-
 ...
 ---
 # GCN-LABEL: name: div_fmas
@@ -356,35 +327,6 @@ body: |
 ...
 ---
 
-# GCN-LABEL: name: s_mov_fed_b32
-
-# GCN-LABEL: bb.0:
-# GCN: S_MOV_FED_B32
-# GFX9: S_NOP
-# GCN-NEXT: S_MOV_B32
-
-# GCN-LABEL: bb.1:
-# GCN: S_MOV_FED_B32
-# GFX9: S_NOP
-# GCN-NEXT: V_MOV_B32
-name: s_mov_fed_b32
-
-body: |
-  bb.0:
-    $sgpr0 = S_MOV_FED_B32 $sgpr0
-    $sgpr0 = S_MOV_B32 $sgpr0
-    S_BRANCH %bb.1
-
-  bb.1:
-    $sgpr0 = S_MOV_FED_B32 $sgpr0
-    $vgpr0 = V_MOV_B32_e32 $sgpr0, implicit $exec
-    S_ENDPGM 0
-
-...
-
-...
----
-
 # GCN-LABEL: name: s_movrel
 
 # GCN-LABEL: bb.0:
@@ -513,51 +455,3 @@ body: |
     $vgpr3 = V_MOV_B32_dpp $vgpr3, $vgpr0, 0, 15, 15, 0, implicit $exec
     S_ENDPGM 0
 ...
----
-name:            mov_fed_hazard_crash_on_dbg_value
-alignment:       1
-exposesReturnsTwice: false
-legalized:       false
-regBankSelected: false
-selected:        false
-tracksRegLiveness: true
-liveins:
-  - { reg: '$sgpr4_sgpr5' }
-  - { reg: '$sgpr6_sgpr7' }
-  - { reg: '$sgpr9' }
-  - { reg: '$sgpr0_sgpr1_sgpr2_sgpr3' }
-frameInfo:
-  isFrameAddressTaken: false
-  isReturnAddressTaken: false
-  hasStackMap:     false
-  hasPatchPoint:   false
-  stackSize:       16
-  offsetAdjustment: 0
-  maxAlignment:    8
-  adjustsStack:    false
-  hasCalls:        false
-  maxCallFrameSize: 0
-  hasOpaqueSPAdjustment: false
-  hasVAStart:      false
-  hasMustTailInVarArgFunc: false
-stack:
-  - { id: 0, name: A.addr, offset: 0, size: 8, alignment: 8, local-offset: 0 }
-  - { id: 1, offset: 8, size: 4, alignment: 4 }
-body:             |
-  bb.0.entry:
-    liveins: $sgpr4_sgpr5, $sgpr6_sgpr7, $sgpr9, $sgpr0_sgpr1_sgpr2_sgpr3
-
-    $flat_scr_lo = S_ADD_U32 $sgpr6, $sgpr9, implicit-def $scc
-    $flat_scr_hi = S_ADDC_U32 $sgpr7, 0, implicit-def $scc, implicit $scc
-    DBG_VALUE $noreg, 2, !5, !11, debug-location !12
-    $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed $sgpr4_sgpr5, 0, 0, 0 :: (non-temporal dereferenceable invariant load 8 from `i64 addrspace(4)* undef`)
-    dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5
-    $sgpr8 = S_MOV_B32 $sgpr5
-    $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
-    $sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5
-    $vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
-    BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
-    S_ENDPGM 0
-
-...
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_all.s b/llvm/test/MC/AMDGPU/gfx10_asm_all.s
index 99fcf0ac8ed9e..d1bbde6539417 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_all.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_all.s
@@ -12805,66 +12805,6 @@ s_abs_i32 s0, 0x3f717273
 s_abs_i32 s0, 0xaf123456
 // GFX10: encoding: [0xff,0x34,0x80,0xbe,0x56,0x34,0x12,0xaf]
 
-s_mov_fed_b32 s0, s1
-// GFX10: encoding: [0x01,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s105, s104
-// GFX10: encoding: [0x68,0x35,0xe9,0xbe]
-
-s_mov_fed_b32 s0, s104
-// GFX10: encoding: [0x68,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s105, s1
-// GFX10: encoding: [0x01,0x35,0xe9,0xbe]
-
-s_mov_fed_b32 exec_lo, s1
-// GFX10: encoding: [0x01,0x35,0xfe,0xbe]
-
-s_mov_fed_b32 exec_hi, s1
-// GFX10: encoding: [0x01,0x35,0xff,0xbe]
-
-s_mov_fed_b32 vcc_lo, s1
-// GFX10: encoding: [0x01,0x35,0xea,0xbe]
-
-s_mov_fed_b32 vcc_hi, s1
-// GFX10: encoding: [0x01,0x35,0xeb,0xbe]
-
-s_mov_fed_b32 m0, s1
-// GFX10: encoding: [0x01,0x35,0xfc,0xbe]
-
-s_mov_fed_b32 s0, exec_lo
-// GFX10: encoding: [0x7e,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s0, exec_hi
-// GFX10: encoding: [0x7f,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s0, vcc_lo
-// GFX10: encoding: [0x6a,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s0, vcc_hi
-// GFX10: encoding: [0x6b,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s0, m0
-// GFX10: encoding: [0x7c,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s0, 0
-// GFX10: encoding: [0x80,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s0, -1
-// GFX10: encoding: [0xc1,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s0, 0.5
-// GFX10: encoding: [0xf0,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s0, -4.0
-// GFX10: encoding: [0xf7,0x35,0x80,0xbe]
-
-s_mov_fed_b32 s0, 0x3f717273
-// GFX10: encoding: [0xff,0x35,0x80,0xbe,0x73,0x72,0x71,0x3f]
-
-s_mov_fed_b32 s0, 0xaf123456
-// GFX10: encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf]
-
 s_andn1_saveexec_b64 s[0:1], s[2:3]
 // GFX10: encoding: [0x02,0x37,0x80,0xbe]
 
@@ -22278,258 +22218,6 @@ v_cvt_i32_f32_dpp v5, v1 row_xmask:1 row_mask:0x0 bank_mask:0x0
 v_cvt_i32_f32_dpp v5, v1 row_xmask:15 row_mask:0x0 bank_mask:0x0
 // GFX10: encoding: [0xfa,0x10,0x0a,0x7e,0x01,0x6f,0x01,0x00]
 
-v_mov_fed_b32_e32 v5, v1
-// GFX10: encoding: [0x01,0x13,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v255, v1
-// GFX10: encoding: [0x01,0x13,0xfe,0x7f]
-
-v_mov_fed_b32_e32 v5, v255
-// GFX10: encoding: [0xff,0x13,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, s1
-// GFX10: encoding: [0x01,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, s103
-// GFX10: encoding: [0x67,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, vcc_lo
-// GFX10: encoding: [0x6a,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, vcc_hi
-// GFX10: encoding: [0x6b,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, ttmp11
-// GFX10: encoding: [0x77,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, m0
-// GFX10: encoding: [0x7c,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, exec_lo
-// GFX10: encoding: [0x7e,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, exec_hi
-// GFX10: encoding: [0x7f,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, 0
-// GFX10: encoding: [0x80,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, -1
-// GFX10: encoding: [0xc1,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, 0.5
-// GFX10: encoding: [0xf0,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, -4.0
-// GFX10: encoding: [0xf7,0x12,0x0a,0x7e]
-
-v_mov_fed_b32_e32 v5, 0xaf123456
-// GFX10: encoding: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf]
-
-v_mov_fed_b32_e32 v5, 0x3f717273
-// GFX10: encoding: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f]
-
-v_mov_fed_b32_e64 v5, v1
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x01,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v255, v1
-// GFX10: encoding: [0xff,0x00,0x89,0xd5,0x01,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, v255
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0xff,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, s1
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x01,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, s101
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x65,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, vcc_lo
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x6a,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, vcc_hi
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x6b,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, m0
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x7c,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, exec_lo
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x7e,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, exec_hi
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x7f,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, 0
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0x80,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, -1
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0xc1,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, 0.5
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0xf0,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, -4.0
-// GFX10: encoding: [0x05,0x00,0x89,0xd5,0xf7,0x00,0x00,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, s101 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, vcc_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, m0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, exec_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, exec_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00]
-
-v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// GFX10: encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00]
-
-v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00]
-
-v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_share:1 row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x51,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_share:15 row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x5f,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_xmask:1 row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x61,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_xmask:15 row_mask:0x0 bank_mask:0x0
-// GFX10: encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x6f,0x01,0x00]
-
 v_cvt_f16_f32 v5, v1
 // GFX10: encoding: [0x01,0x15,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s
index 70d779a047ba2..b8ede28ec0763 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_dpp8.s
@@ -18,9 +18,6 @@ v_cvt_u32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7]
 v_cvt_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7]
 // GFX10: encoding: [0xe9,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
 
-v_mov_fed_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7]
-// GFX10: encoding: [0xe9,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
-
 v_cvt_f16_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7]
 // GFX10: encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
 
@@ -273,9 +270,6 @@ v_cvt_u32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1
 v_cvt_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1
 // GFX10: encoding: [0xea,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
 
-v_mov_fed_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1
-// GFX10: encoding: [0xea,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
-
 v_cvt_f16_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1
 // GFX10: encoding: [0xea,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
 
diff --git a/llvm/test/MC/AMDGPU/gfx10_asm_err.s b/llvm/test/MC/AMDGPU/gfx10_asm_err.s
index eff0cff4272da..251dde827b71c 100644
--- a/llvm/test/MC/AMDGPU/gfx10_asm_err.s
+++ b/llvm/test/MC/AMDGPU/gfx10_asm_err.s
@@ -130,9 +130,6 @@ s_set_gpr_idx_idx s0
 s_cbranch_join s0
 // GFX10: error: instruction not supported on this GPU
 
-s_mov_regrd_b32 s0, s1
-// GFX10: error: instruction not supported on this GPU
-
 //===----------------------------------------------------------------------===//
 // ENC_SOP2.
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/MC/AMDGPU/gfx7_asm_all.s b/llvm/test/MC/AMDGPU/gfx7_asm_all.s
index 434c64b72ff11..73247d17aed1a 100644
--- a/llvm/test/MC/AMDGPU/gfx7_asm_all.s
+++ b/llvm/test/MC/AMDGPU/gfx7_asm_all.s
@@ -14148,105 +14148,6 @@ s_abs_i32 s5, 0xaf123456
 s_abs_i32 s5, 0x3f717273
 // CHECK: [0xff,0x34,0x85,0xbe,0x73,0x72,0x71,0x3f]
 
-s_mov_fed_b32 s5, s1
-// CHECK: [0x01,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s103, s1
-// CHECK: [0x01,0x35,0xe7,0xbe]
-
-s_mov_fed_b32 flat_scratch_lo, s1
-// CHECK: [0x01,0x35,0xe8,0xbe]
-
-s_mov_fed_b32 flat_scratch_hi, s1
-// CHECK: [0x01,0x35,0xe9,0xbe]
-
-s_mov_fed_b32 vcc_lo, s1
-// CHECK: [0x01,0x35,0xea,0xbe]
-
-s_mov_fed_b32 vcc_hi, s1
-// CHECK: [0x01,0x35,0xeb,0xbe]
-
-s_mov_fed_b32 tba_lo, s1
-// CHECK: [0x01,0x35,0xec,0xbe]
-
-s_mov_fed_b32 tba_hi, s1
-// CHECK: [0x01,0x35,0xed,0xbe]
-
-s_mov_fed_b32 tma_lo, s1
-// CHECK: [0x01,0x35,0xee,0xbe]
-
-s_mov_fed_b32 tma_hi, s1
-// CHECK: [0x01,0x35,0xef,0xbe]
-
-s_mov_fed_b32 ttmp11, s1
-// CHECK: [0x01,0x35,0xfb,0xbe]
-
-s_mov_fed_b32 m0, s1
-// CHECK: [0x01,0x35,0xfc,0xbe]
-
-s_mov_fed_b32 exec_lo, s1
-// CHECK: [0x01,0x35,0xfe,0xbe]
-
-s_mov_fed_b32 exec_hi, s1
-// CHECK: [0x01,0x35,0xff,0xbe]
-
-s_mov_fed_b32 s5, s103
-// CHECK: [0x67,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, flat_scratch_lo
-// CHECK: [0x68,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, flat_scratch_hi
-// CHECK: [0x69,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, vcc_lo
-// CHECK: [0x6a,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, vcc_hi
-// CHECK: [0x6b,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, tba_lo
-// CHECK: [0x6c,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, tba_hi
-// CHECK: [0x6d,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, tma_lo
-// CHECK: [0x6e,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, tma_hi
-// CHECK: [0x6f,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, ttmp11
-// CHECK: [0x7b,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, m0
-// CHECK: [0x7c,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, exec_lo
-// CHECK: [0x7e,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, exec_hi
-// CHECK: [0x7f,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, 0
-// CHECK: [0x80,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, -1
-// CHECK: [0xc1,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, 0.5
-// CHECK: [0xf0,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, -4.0
-// CHECK: [0xf7,0x35,0x85,0xbe]
-
-s_mov_fed_b32 s5, 0xaf123456
-// CHECK: [0xff,0x35,0x85,0xbe,0x56,0x34,0x12,0xaf]
-
-s_mov_fed_b32 s5, 0x3f717273
-// CHECK: [0xff,0x35,0x85,0xbe,0x73,0x72,0x71,0x3f]
-
 s_add_u32 s5, s1, s2
 // CHECK: [0x01,0x02,0x05,0x80]
 
@@ -24318,138 +24219,6 @@ v_cvt_i32_f32_e64 v5, -v1
 v_cvt_i32_f32_e64 v5, |v1|
 // CHECK: [0x05,0x01,0x10,0xd3,0x01,0x01,0x00,0x00]
 
-v_mov_fed_b32 v5, v1
-// CHECK: [0x01,0x13,0x0a,0x7e]
-
-v_mov_fed_b32 v255, v1
-// CHECK: [0x01,0x13,0xfe,0x7f]
-
-v_mov_fed_b32 v5, v255
-// CHECK: [0xff,0x13,0x0a,0x7e]
-
-v_mov_fed_b32 v5, s1
-// CHECK: [0x01,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, s103
-// CHECK: [0x67,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, flat_scratch_lo
-// CHECK: [0x68,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, flat_scratch_hi
-// CHECK: [0x69,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, vcc_lo
-// CHECK: [0x6a,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, vcc_hi
-// CHECK: [0x6b,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, tba_lo
-// CHECK: [0x6c,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, tba_hi
-// CHECK: [0x6d,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, tma_lo
-// CHECK: [0x6e,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, tma_hi
-// CHECK: [0x6f,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, ttmp11
-// CHECK: [0x7b,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, m0
-// CHECK: [0x7c,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, exec_lo
-// CHECK: [0x7e,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, exec_hi
-// CHECK: [0x7f,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, 0
-// CHECK: [0x80,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, -1
-// CHECK: [0xc1,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, 0.5
-// CHECK: [0xf0,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, -4.0
-// CHECK: [0xf7,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, 0xaf123456
-// CHECK: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf]
-
-v_mov_fed_b32 v5, 0x3f717273
-// CHECK: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f]
-
-v_mov_fed_b32_e64 v5, v1
-// CHECK: [0x05,0x00,0x12,0xd3,0x01,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v255, v1
-// CHECK: [0xff,0x00,0x12,0xd3,0x01,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, v255
-// CHECK: [0x05,0x00,0x12,0xd3,0xff,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, s1
-// CHECK: [0x05,0x00,0x12,0xd3,0x01,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, s103
-// CHECK: [0x05,0x00,0x12,0xd3,0x67,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, flat_scratch_lo
-// CHECK: [0x05,0x00,0x12,0xd3,0x68,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, flat_scratch_hi
-// CHECK: [0x05,0x00,0x12,0xd3,0x69,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, vcc_lo
-// CHECK: [0x05,0x00,0x12,0xd3,0x6a,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, vcc_hi
-// CHECK: [0x05,0x00,0x12,0xd3,0x6b,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, tba_lo
-// CHECK: [0x05,0x00,0x12,0xd3,0x6c,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, tba_hi
-// CHECK: [0x05,0x00,0x12,0xd3,0x6d,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, tma_lo
-// CHECK: [0x05,0x00,0x12,0xd3,0x6e,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, tma_hi
-// CHECK: [0x05,0x00,0x12,0xd3,0x6f,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, ttmp11
-// CHECK: [0x05,0x00,0x12,0xd3,0x7b,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, m0
-// CHECK: [0x05,0x00,0x12,0xd3,0x7c,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, exec_lo
-// CHECK: [0x05,0x00,0x12,0xd3,0x7e,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, exec_hi
-// CHECK: [0x05,0x00,0x12,0xd3,0x7f,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, 0
-// CHECK: [0x05,0x00,0x12,0xd3,0x80,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, -1
-// CHECK: [0x05,0x00,0x12,0xd3,0xc1,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, 0.5
-// CHECK: [0x05,0x00,0x12,0xd3,0xf0,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, -4.0
-// CHECK: [0x05,0x00,0x12,0xd3,0xf7,0x00,0x00,0x00]
-
 v_cvt_f16_f32 v5, v1
 // CHECK: [0x01,0x15,0x0a,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/gfx8_asm_all.s b/llvm/test/MC/AMDGPU/gfx8_asm_all.s
index 369faf09a6385..1610bfa7d92a9 100644
--- a/llvm/test/MC/AMDGPU/gfx8_asm_all.s
+++ b/llvm/test/MC/AMDGPU/gfx8_asm_all.s
@@ -14926,105 +14926,6 @@ s_abs_i32 s5, 0xaf123456
 s_abs_i32 s5, 0x3f717273
 // CHECK: [0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f]
 
-s_mov_fed_b32 s5, s1
-// CHECK: [0x01,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s101, s1
-// CHECK: [0x01,0x31,0xe5,0xbe]
-
-s_mov_fed_b32 flat_scratch_lo, s1
-// CHECK: [0x01,0x31,0xe6,0xbe]
-
-s_mov_fed_b32 flat_scratch_hi, s1
-// CHECK: [0x01,0x31,0xe7,0xbe]
-
-s_mov_fed_b32 vcc_lo, s1
-// CHECK: [0x01,0x31,0xea,0xbe]
-
-s_mov_fed_b32 vcc_hi, s1
-// CHECK: [0x01,0x31,0xeb,0xbe]
-
-s_mov_fed_b32 tba_lo, s1
-// CHECK: [0x01,0x31,0xec,0xbe]
-
-s_mov_fed_b32 tba_hi, s1
-// CHECK: [0x01,0x31,0xed,0xbe]
-
-s_mov_fed_b32 tma_lo, s1
-// CHECK: [0x01,0x31,0xee,0xbe]
-
-s_mov_fed_b32 tma_hi, s1
-// CHECK: [0x01,0x31,0xef,0xbe]
-
-s_mov_fed_b32 ttmp11, s1
-// CHECK: [0x01,0x31,0xfb,0xbe]
-
-s_mov_fed_b32 m0, s1
-// CHECK: [0x01,0x31,0xfc,0xbe]
-
-s_mov_fed_b32 exec_lo, s1
-// CHECK: [0x01,0x31,0xfe,0xbe]
-
-s_mov_fed_b32 exec_hi, s1
-// CHECK: [0x01,0x31,0xff,0xbe]
-
-s_mov_fed_b32 s5, s101
-// CHECK: [0x65,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, flat_scratch_lo
-// CHECK: [0x66,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, flat_scratch_hi
-// CHECK: [0x67,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, vcc_lo
-// CHECK: [0x6a,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, vcc_hi
-// CHECK: [0x6b,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, tba_lo
-// CHECK: [0x6c,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, tba_hi
-// CHECK: [0x6d,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, tma_lo
-// CHECK: [0x6e,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, tma_hi
-// CHECK: [0x6f,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, ttmp11
-// CHECK: [0x7b,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, m0
-// CHECK: [0x7c,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, exec_lo
-// CHECK: [0x7e,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, exec_hi
-// CHECK: [0x7f,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, 0
-// CHECK: [0x80,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, -1
-// CHECK: [0xc1,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, 0.5
-// CHECK: [0xf0,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, -4.0
-// CHECK: [0xf7,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, 0xaf123456
-// CHECK: [0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf]
-
-s_mov_fed_b32 s5, 0x3f717273
-// CHECK: [0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f]
-
 s_set_gpr_idx_idx s1
 // CHECK: [0x01,0x32,0x80,0xbe]
 
@@ -25642,138 +25543,6 @@ v_cvt_i32_f32_e64 v5, -v1
 v_cvt_i32_f32_e64 v5, |v1|
 // CHECK: [0x05,0x01,0x48,0xd1,0x01,0x01,0x00,0x00]
 
-v_mov_fed_b32 v5, v1
-// CHECK: [0x01,0x13,0x0a,0x7e]
-
-v_mov_fed_b32 v255, v1
-// CHECK: [0x01,0x13,0xfe,0x7f]
-
-v_mov_fed_b32 v5, v255
-// CHECK: [0xff,0x13,0x0a,0x7e]
-
-v_mov_fed_b32 v5, s1
-// CHECK: [0x01,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, s101
-// CHECK: [0x65,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, flat_scratch_lo
-// CHECK: [0x66,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, flat_scratch_hi
-// CHECK: [0x67,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, vcc_lo
-// CHECK: [0x6a,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, vcc_hi
-// CHECK: [0x6b,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, tba_lo
-// CHECK: [0x6c,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, tba_hi
-// CHECK: [0x6d,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, tma_lo
-// CHECK: [0x6e,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, tma_hi
-// CHECK: [0x6f,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, ttmp11
-// CHECK: [0x7b,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, m0
-// CHECK: [0x7c,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, exec_lo
-// CHECK: [0x7e,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, exec_hi
-// CHECK: [0x7f,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, 0
-// CHECK: [0x80,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, -1
-// CHECK: [0xc1,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, 0.5
-// CHECK: [0xf0,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, -4.0
-// CHECK: [0xf7,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, 0xaf123456
-// CHECK: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf]
-
-v_mov_fed_b32 v5, 0x3f717273
-// CHECK: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f]
-
-v_mov_fed_b32_e64 v5, v1
-// CHECK: [0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v255, v1
-// CHECK: [0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, v255
-// CHECK: [0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, s1
-// CHECK: [0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, s101
-// CHECK: [0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, flat_scratch_lo
-// CHECK: [0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, flat_scratch_hi
-// CHECK: [0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, vcc_lo
-// CHECK: [0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, vcc_hi
-// CHECK: [0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, tba_lo
-// CHECK: [0x05,0x00,0x49,0xd1,0x6c,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, tba_hi
-// CHECK: [0x05,0x00,0x49,0xd1,0x6d,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, tma_lo
-// CHECK: [0x05,0x00,0x49,0xd1,0x6e,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, tma_hi
-// CHECK: [0x05,0x00,0x49,0xd1,0x6f,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, ttmp11
-// CHECK: [0x05,0x00,0x49,0xd1,0x7b,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, m0
-// CHECK: [0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, exec_lo
-// CHECK: [0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, exec_hi
-// CHECK: [0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, 0
-// CHECK: [0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, -1
-// CHECK: [0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, 0.5
-// CHECK: [0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, -4.0
-// CHECK: [0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00]
-
 v_cvt_f16_f32 v5, v1
 // CHECK: [0x01,0x15,0x0a,0x7e]
 
@@ -95656,150 +95425,6 @@ v_cvt_i32_f32_dpp v5, -v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 v_cvt_i32_f32_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // CHECK: [0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00]
 
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00]
-
-v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00]
-
-v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00]
-
-v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_bcast:15 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_bcast:31 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 wave_shl:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 wave_rol:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 wave_shr:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 wave_ror:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00]
-
 v_cvt_f16_f32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
 // CHECK: [0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/gfx9_asm_all.s b/llvm/test/MC/AMDGPU/gfx9_asm_all.s
index 70771291545f9..b3b8bf86a131b 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_all.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_all.s
@@ -14273,75 +14273,6 @@ s_abs_i32 s5, 0xaf123456
 s_abs_i32 s5, 0x3f717273
 // CHECK: [0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f]
 
-s_mov_fed_b32 s5, s1
-// CHECK: [0x01,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s101, s1
-// CHECK: [0x01,0x31,0xe5,0xbe]
-
-s_mov_fed_b32 flat_scratch_lo, s1
-// CHECK: [0x01,0x31,0xe6,0xbe]
-
-s_mov_fed_b32 flat_scratch_hi, s1
-// CHECK: [0x01,0x31,0xe7,0xbe]
-
-s_mov_fed_b32 vcc_lo, s1
-// CHECK: [0x01,0x31,0xea,0xbe]
-
-s_mov_fed_b32 vcc_hi, s1
-// CHECK: [0x01,0x31,0xeb,0xbe]
-
-s_mov_fed_b32 m0, s1
-// CHECK: [0x01,0x31,0xfc,0xbe]
-
-s_mov_fed_b32 exec_lo, s1
-// CHECK: [0x01,0x31,0xfe,0xbe]
-
-s_mov_fed_b32 exec_hi, s1
-// CHECK: [0x01,0x31,0xff,0xbe]
-
-s_mov_fed_b32 s5, s101
-// CHECK: [0x65,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, flat_scratch_lo
-// CHECK: [0x66,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, flat_scratch_hi
-// CHECK: [0x67,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, vcc_lo
-// CHECK: [0x6a,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, vcc_hi
-// CHECK: [0x6b,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, m0
-// CHECK: [0x7c,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, exec_lo
-// CHECK: [0x7e,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, exec_hi
-// CHECK: [0x7f,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, 0
-// CHECK: [0x80,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, -1
-// CHECK: [0xc1,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, 0.5
-// CHECK: [0xf0,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, -4.0
-// CHECK: [0xf7,0x31,0x85,0xbe]
-
-s_mov_fed_b32 s5, 0xaf123456
-// CHECK: [0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf]
-
-s_mov_fed_b32 s5, 0x3f717273
-// CHECK: [0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f]
-
 s_set_gpr_idx_idx s1
 // CHECK: [0x01,0x32,0x80,0xbe]
 
@@ -22541,108 +22472,6 @@ v_cvt_i32_f32_e64 v5, |v1|
 v_cvt_i32_f32_e64 v5, v1 clamp
 // CHECK: [0x05,0x80,0x48,0xd1,0x01,0x01,0x00,0x00]
 
-v_mov_fed_b32 v5, v1
-// CHECK: [0x01,0x13,0x0a,0x7e]
-
-v_mov_fed_b32 v255, v1
-// CHECK: [0x01,0x13,0xfe,0x7f]
-
-v_mov_fed_b32 v5, v255
-// CHECK: [0xff,0x13,0x0a,0x7e]
-
-v_mov_fed_b32 v5, s1
-// CHECK: [0x01,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, s101
-// CHECK: [0x65,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, flat_scratch_lo
-// CHECK: [0x66,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, flat_scratch_hi
-// CHECK: [0x67,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, vcc_lo
-// CHECK: [0x6a,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, vcc_hi
-// CHECK: [0x6b,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, m0
-// CHECK: [0x7c,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, exec_lo
-// CHECK: [0x7e,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, exec_hi
-// CHECK: [0x7f,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, 0
-// CHECK: [0x80,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, -1
-// CHECK: [0xc1,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, 0.5
-// CHECK: [0xf0,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, -4.0
-// CHECK: [0xf7,0x12,0x0a,0x7e]
-
-v_mov_fed_b32 v5, 0xaf123456
-// CHECK: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf]
-
-v_mov_fed_b32 v5, 0x3f717273
-// CHECK: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f]
-
-v_mov_fed_b32_e64 v5, v1
-// CHECK: [0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v255, v1
-// CHECK: [0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, v255
-// CHECK: [0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, s1
-// CHECK: [0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, s101
-// CHECK: [0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, flat_scratch_lo
-// CHECK: [0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, flat_scratch_hi
-// CHECK: [0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, vcc_lo
-// CHECK: [0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, vcc_hi
-// CHECK: [0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, m0
-// CHECK: [0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, exec_lo
-// CHECK: [0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, exec_hi
-// CHECK: [0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, 0
-// CHECK: [0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, -1
-// CHECK: [0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, 0.5
-// CHECK: [0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00]
-
-v_mov_fed_b32_e64 v5, -4.0
-// CHECK: [0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00]
-
 v_cvt_f16_f32 v5, v1
 // CHECK: [0x01,0x15,0x0a,0x7e]
 
@@ -84467,189 +84296,6 @@ v_cvt_i32_f32_dpp v5, -v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 v_cvt_i32_f32_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
 // CHECK: [0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00]
 
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, s101 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x66,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, flat_scratch_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x67,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, vcc_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, m0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, exec_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, exec_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x80,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0xc1,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0xf0,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, -4.0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0xf7,0x06,0x86,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00]
-
-v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00]
-
-v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
-// CHECK: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00]
-
-v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00]
-
-v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_bcast:15 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_bcast:31 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 wave_shl:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 wave_rol:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 wave_shr:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 wave_ror:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] bank_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f]
-
-v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0
-// CHECK: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00]
-
 v_cvt_f16_f32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD
 // CHECK: [0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00]
 
diff --git a/llvm/test/MC/AMDGPU/lds_direct.s b/llvm/test/MC/AMDGPU/lds_direct.s
index 6a879ee03d187..e677f59ad7484 100644
--- a/llvm/test/MC/AMDGPU/lds_direct.s
+++ b/llvm/test/MC/AMDGPU/lds_direct.s
@@ -16,12 +16,6 @@ v_cvt_f64_i32 v[0:1], src_lds_direct
 v_cvt_f64_i32_e64 v[0:1], src_lds_direct
 // GFX9: v_cvt_f64_i32_e64 v[0:1], src_lds_direct ; encoding: [0x00,0x00,0x44,0xd1,0xfe,0x00,0x00,0x00]
 
-v_mov_fed_b32 v0, src_lds_direct
-// GFX9: v_mov_fed_b32_e32 v0, src_lds_direct ; encoding: [0xfe,0x12,0x00,0x7e]
-
-v_mov_fed_b32_e64 v0, src_lds_direct
-// GFX9: v_mov_fed_b32_e64 v0, src_lds_direct ; encoding: [0x00,0x00,0x49,0xd1,0xfe,0x00,0x00,0x00]
-
 v_fract_f32 v0, src_lds_direct
 // GFX9: v_fract_f32_e32 v0, src_lds_direct ; encoding: [0xfe,0x36,0x00,0x7e]
 
diff --git a/llvm/test/MC/AMDGPU/sop1.s b/llvm/test/MC/AMDGPU/sop1.s
index 97920ac70517f..76525b943cad1 100644
--- a/llvm/test/MC/AMDGPU/sop1.s
+++ b/llvm/test/MC/AMDGPU/sop1.s
@@ -259,9 +259,6 @@ s_abs_i32 s1, s2
 // SICI: s_abs_i32 s1, s2 ; encoding: [0x02,0x34,0x81,0xbe]
 // GFX89: s_abs_i32 s1, s2 ; encoding: [0x02,0x30,0x81,0xbe]
 
-s_mov_fed_b32 s1, s2
-// SICI: s_mov_fed_b32 s1, s2 ; encoding: [0x02,0x35,0x81,0xbe]
-
 s_set_gpr_idx_idx s0
 // GFX89: s_set_gpr_idx_idx s0 ; encoding: [0x00,0x32,0x80,0xbe]
 // NOSICI: error: instruction not supported on this GPU
diff --git a/llvm/test/MC/AMDGPU/vop1.s b/llvm/test/MC/AMDGPU/vop1.s
index 8ef640caa6154..e9d288418c42a 100644
--- a/llvm/test/MC/AMDGPU/vop1.s
+++ b/llvm/test/MC/AMDGPU/vop1.s
@@ -55,10 +55,6 @@ v_cvt_u32_f32_e32 v1, v2
 // GCN: v_cvt_i32_f32_e32 v1, v2 ; encoding: [0x02,0x11,0x02,0x7e]
 v_cvt_i32_f32_e32 v1, v2
 
-// SICI: v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e]
-// VI:   v_mov_fed_b32_e32 v1, v2 ; encoding: [0x02,0x13,0x02,0x7e]
-v_mov_fed_b32_e32 v1, v2
-
 // GCN: v_cvt_f16_f32_e32 v1, v2 ; encoding: [0x02,0x15,0x02,0x7e]
 v_cvt_f16_f32_e32 v1, v2
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
index 72477fecdb16a..fe9986c05ee00 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_all.txt
@@ -15707,66 +15707,6 @@
 # GFX10: s_mov_b64 vcc, s[2:3]           ; encoding: [0x02,0x04,0xea,0xbe]
 0x02,0x04,0xea,0xbe
 
-# GFX10: s_mov_fed_b32 exec_hi, s1       ; encoding: [0x01,0x35,0xff,0xbe]
-0x01,0x35,0xff,0xbe
-
-# GFX10: s_mov_fed_b32 exec_lo, s1       ; encoding: [0x01,0x35,0xfe,0xbe]
-0x01,0x35,0xfe,0xbe
-
-# GFX10: s_mov_fed_b32 m0, s1            ; encoding: [0x01,0x35,0xfc,0xbe]
-0x01,0x35,0xfc,0xbe
-
-# GFX10: s_mov_fed_b32 s0, -1            ; encoding: [0xc1,0x35,0x80,0xbe]
-0xc1,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s0, -4.0          ; encoding: [0xf7,0x35,0x80,0xbe]
-0xf7,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s0, 0             ; encoding: [0x80,0x35,0x80,0xbe]
-0x80,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s0, 0.5           ; encoding: [0xf0,0x35,0x80,0xbe]
-0xf0,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s0, 0x3f717273    ; encoding: [0xff,0x35,0x80,0xbe,0x73,0x72,0x71,0x3f]
-0xff,0x35,0x80,0xbe,0x73,0x72,0x71,0x3f
-
-# GFX10: s_mov_fed_b32 s0, 0xaf123456    ; encoding: [0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf]
-0xff,0x35,0x80,0xbe,0x56,0x34,0x12,0xaf
-
-# GFX10: s_mov_fed_b32 s0, exec_hi       ; encoding: [0x7f,0x35,0x80,0xbe]
-0x7f,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s0, exec_lo       ; encoding: [0x7e,0x35,0x80,0xbe]
-0x7e,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s0, m0            ; encoding: [0x7c,0x35,0x80,0xbe]
-0x7c,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s0, s1            ; encoding: [0x01,0x35,0x80,0xbe]
-0x01,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s0, s104          ; encoding: [0x68,0x35,0x80,0xbe]
-0x68,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s0, vcc_hi        ; encoding: [0x6b,0x35,0x80,0xbe]
-0x6b,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s0, vcc_lo        ; encoding: [0x6a,0x35,0x80,0xbe]
-0x6a,0x35,0x80,0xbe
-
-# GFX10: s_mov_fed_b32 s105, s1          ; encoding: [0x01,0x35,0xe9,0xbe]
-0x01,0x35,0xe9,0xbe
-
-# GFX10: s_mov_fed_b32 s105, s104        ; encoding: [0x68,0x35,0xe9,0xbe]
-0x68,0x35,0xe9,0xbe
-
-# GFX10: s_mov_fed_b32 vcc_hi, s1        ; encoding: [0x01,0x35,0xeb,0xbe]
-0x01,0x35,0xeb,0xbe
-
-# GFX10: s_mov_fed_b32 vcc_lo, s1        ; encoding: [0x01,0x35,0xea,0xbe]
-0x01,0x35,0xea,0xbe
-
 # GFX10: s_movk_i32 exec_hi, 0x1234      ; encoding: [0x34,0x12,0x7f,0xb0]
 0x34,0x12,0x7f,0xb0
 
@@ -86004,243 +85944,6 @@
 # GFX10: v_mov_b32_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x02,0x0a,0x7e,0x6a,0x06,0x86,0x00]
 0xf9,0x02,0x0a,0x7e,0x6a,0x06,0x86,0x00
 
-# GFX10: v_mov_fed_b32_dpp v255, v1  quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00]
-0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  row_half_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  row_mirror row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  row_ror:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  row_ror:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  row_share:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x51,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x51,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  row_share:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x5f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x5f,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  row_shl:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  row_shl:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  row_shr:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1  row_shr:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1 row_xmask:1 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x61,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x61,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v1 row_xmask:15 row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x6f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x6f,0x01,0x00
-
-# GFX10: v_mov_fed_b32_dpp v5, v255  quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 ; encoding: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00]
-0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e32 v255, v1      ; encoding: [0x01,0x13,0xfe,0x7f]
-0x01,0x13,0xfe,0x7f
-
-# GFX10: v_mov_fed_b32_e32 v5, -1        ; encoding: [0xc1,0x12,0x0a,0x7e]
-0xc1,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, -4.0      ; encoding: [0xf7,0x12,0x0a,0x7e]
-0xf7,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, 0         ; encoding: [0x80,0x12,0x0a,0x7e]
-0x80,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, 0.5       ; encoding: [0xf0,0x12,0x0a,0x7e]
-0xf0,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, 0x3f717273 ; encoding: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f]
-0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f
-
-# GFX10: v_mov_fed_b32_e32 v5, 0xaf123456 ; encoding: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf]
-0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf
-
-# GFX10: v_mov_fed_b32_e32 v5, exec_hi   ; encoding: [0x7f,0x12,0x0a,0x7e]
-0x7f,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, exec_lo   ; encoding: [0x7e,0x12,0x0a,0x7e]
-0x7e,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, m0        ; encoding: [0x7c,0x12,0x0a,0x7e]
-0x7c,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, s1        ; encoding: [0x01,0x12,0x0a,0x7e]
-0x01,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, s103      ; encoding: [0x67,0x12,0x0a,0x7e]
-0x67,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, ttmp11    ; encoding: [0x77,0x12,0x0a,0x7e]
-0x77,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, v1        ; encoding: [0x01,0x13,0x0a,0x7e]
-0x01,0x13,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, v255      ; encoding: [0xff,0x13,0x0a,0x7e]
-0xff,0x13,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, vcc_hi    ; encoding: [0x6b,0x12,0x0a,0x7e]
-0x6b,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e32 v5, vcc_lo    ; encoding: [0x6a,0x12,0x0a,0x7e]
-0x6a,0x12,0x0a,0x7e
-
-# GFX10: v_mov_fed_b32_e64 v255, v1      ; encoding: [0xff,0x00,0x89,0xd5,0x01,0x01,0x00,0x00]
-0xff,0x00,0x89,0xd5,0x01,0x01,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, -1        ; encoding: [0x05,0x00,0x89,0xd5,0xc1,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0xc1,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, -4.0      ; encoding: [0x05,0x00,0x89,0xd5,0xf7,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0xf7,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, 0         ; encoding: [0x05,0x00,0x89,0xd5,0x80,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0x80,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, 0.5       ; encoding: [0x05,0x00,0x89,0xd5,0xf0,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0xf0,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, exec_hi   ; encoding: [0x05,0x00,0x89,0xd5,0x7f,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0x7f,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, exec_lo   ; encoding: [0x05,0x00,0x89,0xd5,0x7e,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0x7e,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, m0        ; encoding: [0x05,0x00,0x89,0xd5,0x7c,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0x7c,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, s1        ; encoding: [0x05,0x00,0x89,0xd5,0x01,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0x01,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, s101      ; encoding: [0x05,0x00,0x89,0xd5,0x65,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0x65,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, v1        ; encoding: [0x05,0x00,0x89,0xd5,0x01,0x01,0x00,0x00]
-0x05,0x00,0x89,0xd5,0x01,0x01,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, v255      ; encoding: [0x05,0x00,0x89,0xd5,0xff,0x01,0x00,0x00]
-0x05,0x00,0x89,0xd5,0xff,0x01,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, vcc_hi    ; encoding: [0x05,0x00,0x89,0xd5,0x6b,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0x6b,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_e64 v5, vcc_lo    ; encoding: [0x05,0x00,0x89,0xd5,0x6a,0x00,0x00,0x00]
-0x05,0x00,0x89,0xd5,0x6a,0x00,0x00,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00]
-0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, exec_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, exec_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, m0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, s101 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, vcc_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00
-
-# GFX10: v_mov_fed_b32_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD ; encoding: [0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00
-
 # GFX10: v_movreld_b32_e32 v255, v1      ; encoding: [0x01,0x85,0xfe,0x7f]
 0x01,0x85,0xfe,0x7f
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt
index afd0e63f3b645..f77ac1247b918 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx10_dasm_dpp8.txt
@@ -16,9 +16,6 @@
 # GFX10: v_cvt_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
 0xe9,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa
 
-# GFX10: v_mov_fed_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
-0xe9,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa
-
 # GFX10: v_cvt_f16_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] ; encoding: [0xe9,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
 0xe9,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa
 
@@ -268,9 +265,6 @@
 # GFX10: v_cvt_i32_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
 0xea,0x10,0x0a,0x7e,0x01,0x88,0xc6,0xfa
 
-# GFX10: v_mov_fed_b32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
-0xea,0x12,0x0a,0x7e,0x01,0x88,0xc6,0xfa
-
 # GFX10: v_cvt_f16_f32_dpp v5, v1 dpp8:[0,1,2,3,4,5,6,7] fi:1 ; encoding: [0xea,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa]
 0xea,0x14,0x0a,0x7e,0x01,0x88,0xc6,0xfa
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt
index 3171355ed3d06..d790254a7ae1a 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx8_dasm_all.txt
@@ -14295,105 +14295,6 @@
 # CHECK: s_abs_i32 s5, 0x3f717273    ; encoding: [0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f]
 0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f
 
-# CHECK: s_mov_fed_b32 s5, s1    ; encoding: [0x01,0x31,0x85,0xbe]
-0x01,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s101, s1    ; encoding: [0x01,0x31,0xe5,0xbe]
-0x01,0x31,0xe5,0xbe
-
-# CHECK: s_mov_fed_b32 flat_scratch_lo, s1    ; encoding: [0x01,0x31,0xe6,0xbe]
-0x01,0x31,0xe6,0xbe
-
-# CHECK: s_mov_fed_b32 flat_scratch_hi, s1    ; encoding: [0x01,0x31,0xe7,0xbe]
-0x01,0x31,0xe7,0xbe
-
-# CHECK: s_mov_fed_b32 vcc_lo, s1    ; encoding: [0x01,0x31,0xea,0xbe]
-0x01,0x31,0xea,0xbe
-
-# CHECK: s_mov_fed_b32 vcc_hi, s1    ; encoding: [0x01,0x31,0xeb,0xbe]
-0x01,0x31,0xeb,0xbe
-
-# CHECK: s_mov_fed_b32 tba_lo, s1    ; encoding: [0x01,0x31,0xec,0xbe]
-0x01,0x31,0xec,0xbe
-
-# CHECK: s_mov_fed_b32 tba_hi, s1    ; encoding: [0x01,0x31,0xed,0xbe]
-0x01,0x31,0xed,0xbe
-
-# CHECK: s_mov_fed_b32 tma_lo, s1    ; encoding: [0x01,0x31,0xee,0xbe]
-0x01,0x31,0xee,0xbe
-
-# CHECK: s_mov_fed_b32 tma_hi, s1    ; encoding: [0x01,0x31,0xef,0xbe]
-0x01,0x31,0xef,0xbe
-
-# CHECK: s_mov_fed_b32 ttmp11, s1    ; encoding: [0x01,0x31,0xfb,0xbe]
-0x01,0x31,0xfb,0xbe
-
-# CHECK: s_mov_fed_b32 m0, s1    ; encoding: [0x01,0x31,0xfc,0xbe]
-0x01,0x31,0xfc,0xbe
-
-# CHECK: s_mov_fed_b32 exec_lo, s1    ; encoding: [0x01,0x31,0xfe,0xbe]
-0x01,0x31,0xfe,0xbe
-
-# CHECK: s_mov_fed_b32 exec_hi, s1    ; encoding: [0x01,0x31,0xff,0xbe]
-0x01,0x31,0xff,0xbe
-
-# CHECK: s_mov_fed_b32 s5, s101    ; encoding: [0x65,0x31,0x85,0xbe]
-0x65,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, flat_scratch_lo    ; encoding: [0x66,0x31,0x85,0xbe]
-0x66,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, flat_scratch_hi    ; encoding: [0x67,0x31,0x85,0xbe]
-0x67,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, vcc_lo    ; encoding: [0x6a,0x31,0x85,0xbe]
-0x6a,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, vcc_hi    ; encoding: [0x6b,0x31,0x85,0xbe]
-0x6b,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, tba_lo    ; encoding: [0x6c,0x31,0x85,0xbe]
-0x6c,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, tba_hi    ; encoding: [0x6d,0x31,0x85,0xbe]
-0x6d,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, tma_lo    ; encoding: [0x6e,0x31,0x85,0xbe]
-0x6e,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, tma_hi    ; encoding: [0x6f,0x31,0x85,0xbe]
-0x6f,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, ttmp11    ; encoding: [0x7b,0x31,0x85,0xbe]
-0x7b,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, m0    ; encoding: [0x7c,0x31,0x85,0xbe]
-0x7c,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, exec_lo    ; encoding: [0x7e,0x31,0x85,0xbe]
-0x7e,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, exec_hi    ; encoding: [0x7f,0x31,0x85,0xbe]
-0x7f,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, 0    ; encoding: [0x80,0x31,0x85,0xbe]
-0x80,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, -1    ; encoding: [0xc1,0x31,0x85,0xbe]
-0xc1,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, 0.5    ; encoding: [0xf0,0x31,0x85,0xbe]
-0xf0,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, -4.0    ; encoding: [0xf7,0x31,0x85,0xbe]
-0xf7,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, 0xaf123456    ; encoding: [0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf]
-0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf
-
-# CHECK: s_mov_fed_b32 s5, 0x3f717273    ; encoding: [0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f]
-0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f
-
 # CHECK: s_set_gpr_idx_idx s1    ; encoding: [0x01,0x32,0x80,0xbe]
 0x01,0x32,0x80,0xbe
 
@@ -25011,138 +24912,6 @@
 # CHECK: v_cvt_i32_f32_e64 v5, |v1|    ; encoding: [0x05,0x01,0x48,0xd1,0x01,0x01,0x00,0x00]
 0x05,0x01,0x48,0xd1,0x01,0x01,0x00,0x00
 
-# CHECK: v_mov_fed_b32_e32 v5, v1    ; encoding: [0x01,0x13,0x0a,0x7e]
-0x01,0x13,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v255, v1    ; encoding: [0x01,0x13,0xfe,0x7f]
-0x01,0x13,0xfe,0x7f
-
-# CHECK: v_mov_fed_b32_e32 v5, v255    ; encoding: [0xff,0x13,0x0a,0x7e]
-0xff,0x13,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, s1    ; encoding: [0x01,0x12,0x0a,0x7e]
-0x01,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, s101    ; encoding: [0x65,0x12,0x0a,0x7e]
-0x65,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, flat_scratch_lo    ; encoding: [0x66,0x12,0x0a,0x7e]
-0x66,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, flat_scratch_hi    ; encoding: [0x67,0x12,0x0a,0x7e]
-0x67,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, vcc_lo    ; encoding: [0x6a,0x12,0x0a,0x7e]
-0x6a,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, vcc_hi    ; encoding: [0x6b,0x12,0x0a,0x7e]
-0x6b,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, tba_lo    ; encoding: [0x6c,0x12,0x0a,0x7e]
-0x6c,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, tba_hi    ; encoding: [0x6d,0x12,0x0a,0x7e]
-0x6d,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, tma_lo    ; encoding: [0x6e,0x12,0x0a,0x7e]
-0x6e,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, tma_hi    ; encoding: [0x6f,0x12,0x0a,0x7e]
-0x6f,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, ttmp11    ; encoding: [0x7b,0x12,0x0a,0x7e]
-0x7b,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, m0    ; encoding: [0x7c,0x12,0x0a,0x7e]
-0x7c,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, exec_lo    ; encoding: [0x7e,0x12,0x0a,0x7e]
-0x7e,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, exec_hi    ; encoding: [0x7f,0x12,0x0a,0x7e]
-0x7f,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, 0    ; encoding: [0x80,0x12,0x0a,0x7e]
-0x80,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, -1    ; encoding: [0xc1,0x12,0x0a,0x7e]
-0xc1,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, 0.5    ; encoding: [0xf0,0x12,0x0a,0x7e]
-0xf0,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, -4.0    ; encoding: [0xf7,0x12,0x0a,0x7e]
-0xf7,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, 0xaf123456    ; encoding: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf]
-0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf
-
-# CHECK: v_mov_fed_b32_e32 v5, 0x3f717273    ; encoding: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f]
-0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f
-
-# CHECK: v_mov_fed_b32_e64 v5, v1    ; encoding: [0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v255, v1    ; encoding: [0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00]
-0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, v255    ; encoding: [0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00]
-0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, s1    ; encoding: [0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, s101    ; encoding: [0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, flat_scratch_lo    ; encoding: [0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, flat_scratch_hi    ; encoding: [0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, vcc_lo    ; encoding: [0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, vcc_hi    ; encoding: [0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, tba_lo    ; encoding: [0x05,0x00,0x49,0xd1,0x6c,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x6c,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, tba_hi    ; encoding: [0x05,0x00,0x49,0xd1,0x6d,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x6d,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, tma_lo    ; encoding: [0x05,0x00,0x49,0xd1,0x6e,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x6e,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, tma_hi    ; encoding: [0x05,0x00,0x49,0xd1,0x6f,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x6f,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, ttmp11    ; encoding: [0x05,0x00,0x49,0xd1,0x7b,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x7b,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, m0    ; encoding: [0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, exec_lo    ; encoding: [0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, exec_hi    ; encoding: [0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, 0    ; encoding: [0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, -1    ; encoding: [0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, 0.5    ; encoding: [0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, -4.0    ; encoding: [0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00
-
 # CHECK: v_cvt_f16_f32_e32 v5, v1    ; encoding: [0x01,0x15,0x0a,0x7e]
 0x01,0x15,0x0a,0x7e
 
@@ -94950,135 +94719,6 @@
 # CHECK: v_cvt_i32_f32_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00]
 0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00
 
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00]
-0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00
-
-# CHECK: v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00]
-0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00]
-0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_bcast:15 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_bcast:31 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 wave_shl:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 wave_rol:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 wave_shr:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 wave_ror:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00
-
 # CHECK: v_cvt_f16_f32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00]
 0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00
 
diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
index d4552dceb7778..c60d4850279ef 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_dasm_all.txt
@@ -13383,75 +13383,6 @@
 # CHECK: s_abs_i32 s5, 0x3f717273    ; encoding: [0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f]
 0xff,0x30,0x85,0xbe,0x73,0x72,0x71,0x3f
 
-# CHECK: s_mov_fed_b32 s5, s1    ; encoding: [0x01,0x31,0x85,0xbe]
-0x01,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s101, s1    ; encoding: [0x01,0x31,0xe5,0xbe]
-0x01,0x31,0xe5,0xbe
-
-# CHECK: s_mov_fed_b32 flat_scratch_lo, s1    ; encoding: [0x01,0x31,0xe6,0xbe]
-0x01,0x31,0xe6,0xbe
-
-# CHECK: s_mov_fed_b32 flat_scratch_hi, s1    ; encoding: [0x01,0x31,0xe7,0xbe]
-0x01,0x31,0xe7,0xbe
-
-# CHECK: s_mov_fed_b32 vcc_lo, s1    ; encoding: [0x01,0x31,0xea,0xbe]
-0x01,0x31,0xea,0xbe
-
-# CHECK: s_mov_fed_b32 vcc_hi, s1    ; encoding: [0x01,0x31,0xeb,0xbe]
-0x01,0x31,0xeb,0xbe
-
-# CHECK: s_mov_fed_b32 m0, s1    ; encoding: [0x01,0x31,0xfc,0xbe]
-0x01,0x31,0xfc,0xbe
-
-# CHECK: s_mov_fed_b32 exec_lo, s1    ; encoding: [0x01,0x31,0xfe,0xbe]
-0x01,0x31,0xfe,0xbe
-
-# CHECK: s_mov_fed_b32 exec_hi, s1    ; encoding: [0x01,0x31,0xff,0xbe]
-0x01,0x31,0xff,0xbe
-
-# CHECK: s_mov_fed_b32 s5, s101    ; encoding: [0x65,0x31,0x85,0xbe]
-0x65,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, flat_scratch_lo    ; encoding: [0x66,0x31,0x85,0xbe]
-0x66,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, flat_scratch_hi    ; encoding: [0x67,0x31,0x85,0xbe]
-0x67,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, vcc_lo    ; encoding: [0x6a,0x31,0x85,0xbe]
-0x6a,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, vcc_hi    ; encoding: [0x6b,0x31,0x85,0xbe]
-0x6b,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, m0    ; encoding: [0x7c,0x31,0x85,0xbe]
-0x7c,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, exec_lo    ; encoding: [0x7e,0x31,0x85,0xbe]
-0x7e,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, exec_hi    ; encoding: [0x7f,0x31,0x85,0xbe]
-0x7f,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, 0    ; encoding: [0x80,0x31,0x85,0xbe]
-0x80,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, -1    ; encoding: [0xc1,0x31,0x85,0xbe]
-0xc1,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, 0.5    ; encoding: [0xf0,0x31,0x85,0xbe]
-0xf0,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, -4.0    ; encoding: [0xf7,0x31,0x85,0xbe]
-0xf7,0x31,0x85,0xbe
-
-# CHECK: s_mov_fed_b32 s5, 0xaf123456    ; encoding: [0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf]
-0xff,0x31,0x85,0xbe,0x56,0x34,0x12,0xaf
-
-# CHECK: s_mov_fed_b32 s5, 0x3f717273    ; encoding: [0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f]
-0xff,0x31,0x85,0xbe,0x73,0x72,0x71,0x3f
-
 # CHECK: s_set_gpr_idx_idx s1    ; encoding: [0x01,0x32,0x80,0xbe]
 0x01,0x32,0x80,0xbe
 
@@ -21651,108 +21582,6 @@
 # CHECK: v_cvt_i32_f32_e64 v5, v1 clamp    ; encoding: [0x05,0x80,0x48,0xd1,0x01,0x01,0x00,0x00]
 0x05,0x80,0x48,0xd1,0x01,0x01,0x00,0x00
 
-# CHECK: v_mov_fed_b32_e32 v5, v1    ; encoding: [0x01,0x13,0x0a,0x7e]
-0x01,0x13,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v255, v1    ; encoding: [0x01,0x13,0xfe,0x7f]
-0x01,0x13,0xfe,0x7f
-
-# CHECK: v_mov_fed_b32_e32 v5, v255    ; encoding: [0xff,0x13,0x0a,0x7e]
-0xff,0x13,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, s1    ; encoding: [0x01,0x12,0x0a,0x7e]
-0x01,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, s101    ; encoding: [0x65,0x12,0x0a,0x7e]
-0x65,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, flat_scratch_lo    ; encoding: [0x66,0x12,0x0a,0x7e]
-0x66,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, flat_scratch_hi    ; encoding: [0x67,0x12,0x0a,0x7e]
-0x67,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, vcc_lo    ; encoding: [0x6a,0x12,0x0a,0x7e]
-0x6a,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, vcc_hi    ; encoding: [0x6b,0x12,0x0a,0x7e]
-0x6b,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, m0    ; encoding: [0x7c,0x12,0x0a,0x7e]
-0x7c,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, exec_lo    ; encoding: [0x7e,0x12,0x0a,0x7e]
-0x7e,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, exec_hi    ; encoding: [0x7f,0x12,0x0a,0x7e]
-0x7f,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, 0    ; encoding: [0x80,0x12,0x0a,0x7e]
-0x80,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, -1    ; encoding: [0xc1,0x12,0x0a,0x7e]
-0xc1,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, 0.5    ; encoding: [0xf0,0x12,0x0a,0x7e]
-0xf0,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, -4.0    ; encoding: [0xf7,0x12,0x0a,0x7e]
-0xf7,0x12,0x0a,0x7e
-
-# CHECK: v_mov_fed_b32_e32 v5, 0xaf123456    ; encoding: [0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf]
-0xff,0x12,0x0a,0x7e,0x56,0x34,0x12,0xaf
-
-# CHECK: v_mov_fed_b32_e32 v5, 0x3f717273    ; encoding: [0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f]
-0xff,0x12,0x0a,0x7e,0x73,0x72,0x71,0x3f
-
-# CHECK: v_mov_fed_b32_e64 v5, v1    ; encoding: [0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x01,0x01,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v255, v1    ; encoding: [0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00]
-0xff,0x00,0x49,0xd1,0x01,0x01,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, v255    ; encoding: [0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00]
-0x05,0x00,0x49,0xd1,0xff,0x01,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, s1    ; encoding: [0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x01,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, s101    ; encoding: [0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x65,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, flat_scratch_lo    ; encoding: [0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x66,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, flat_scratch_hi    ; encoding: [0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x67,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, vcc_lo    ; encoding: [0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x6a,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, vcc_hi    ; encoding: [0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x6b,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, m0    ; encoding: [0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x7c,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, exec_lo    ; encoding: [0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x7e,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, exec_hi    ; encoding: [0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x7f,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, 0    ; encoding: [0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0x80,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, -1    ; encoding: [0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0xc1,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, 0.5    ; encoding: [0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0xf0,0x00,0x00,0x00
-
-# CHECK: v_mov_fed_b32_e64 v5, -4.0    ; encoding: [0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00]
-0x05,0x00,0x49,0xd1,0xf7,0x00,0x00,0x00
-
 # CHECK: v_cvt_f16_f32_e32 v5, v1    ; encoding: [0x01,0x15,0x0a,0x7e]
 0x01,0x15,0x0a,0x7e
 
@@ -83355,174 +83184,6 @@
 # CHECK: v_cvt_i32_f32_dpp v5, |v1| quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00]
 0xfa,0x10,0x0a,0x7e,0x01,0xe4,0x20,0x00
 
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v255, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00]
-0xf9,0x12,0xfe,0x7f,0x01,0x06,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v255 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0xff,0x06,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, s1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, s101 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x65,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, flat_scratch_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x66,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x66,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, flat_scratch_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x67,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x67,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, vcc_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x6a,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, vcc_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x6b,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, m0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x7c,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, exec_lo dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x7e,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, exec_hi dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x7f,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, 0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x80,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0x80,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, -1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0xc1,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0xc1,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, 0.5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0xf0,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0xf0,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, -4.0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0xf7,0x06,0x86,0x00]
-0xf9,0x12,0x0a,0x7e,0xf7,0x06,0x86,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_0 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x00,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x01,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_2 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x02,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x03,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_0 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x04,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x05,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_SEXT src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x0e,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PRESERVE src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x16,0x06,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x00,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x01,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_2    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x02,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_3    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x03,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x04,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x05,0x00
-
-# CHECK: v_mov_fed_b32_sdwa v5, sext(v1) dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00]
-0xf9,0x12,0x0a,0x7e,0x01,0x06,0x0e,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x00
-
-# CHECK: v_mov_fed_b32_dpp v255, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00]
-0xfa,0x12,0xfe,0x7f,0x01,0xe4,0x00,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v255 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00]
-0xfa,0x12,0x0a,0x7e,0xff,0xe4,0x00,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[3,2,1,0] row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x1b,0x00,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_mirror row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x40,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_half_mirror row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x41,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_bcast:15 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x42,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_bcast:31 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x43,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 wave_shl:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x30,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 wave_rol:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x34,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 wave_shr:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x38,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 wave_ror:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x3c,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_shl:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x01,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_shl:15 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x0f,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_shr:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x11,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_shr:15 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x1f,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_ror:1 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x21,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 row_ror:15 row_mask:0x0 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0x2f,0x01,0x00
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x1 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x10
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x3 bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x30
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0x0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0xf0
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x1    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x01
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x3    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x03
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0xf    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x00,0x0f
-
-# CHECK: v_mov_fed_b32_dpp v5, v1 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 bound_ctrl:0    ; encoding: [0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00]
-0xfa,0x12,0x0a,0x7e,0x01,0xe4,0x08,0x00
-
 # CHECK: v_cvt_f16_f32_sdwa v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD    ; encoding: [0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00]
 0xf9,0x14,0x0a,0x7e,0x01,0x06,0x06,0x00
 

From cc0a58d7cd3b0cfc21a7108fbccf90b5d12de917 Mon Sep 17 00:00:00 2001
From: Nicolas Vasilache <ntv@google.com>
Date: Fri, 17 Jul 2020 13:17:09 -0400
Subject: [PATCH 655/771] [mlir][Vector] Fix masking logic in VectorToSCF

Summary: The logic was conservative but inverted: cases that should remain unmasked became 1-D masked.

Differential Revision: https://reviews.llvm.org/D84051
---
 mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp    |  8 ++++----
 .../Conversion/VectorToSCF/vector-to-loops.mlir    | 14 +++++++-------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
index f05f0be1d4db2..d0529668b2ee4 100644
--- a/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
+++ b/mlir/lib/Conversion/VectorToSCF/VectorToSCF.cpp
@@ -252,9 +252,9 @@ LogicalResult NDTransferOpHelper<TransferReadOp>::doReplace() {
       auto map = TransferReadOp::getTransferMinorIdentityMap(
           xferOp.getMemRefType(), minorVectorType);
       ArrayAttr masked;
-      if (xferOp.isMaskedDim(xferOp.getVectorType().getRank() - 1)) {
+      if (!xferOp.isMaskedDim(xferOp.getVectorType().getRank() - 1)) {
         OpBuilder &b = ScopedContext::getBuilderRef();
-        masked = b.getBoolArrayAttr({true});
+        masked = b.getBoolArrayAttr({false});
       }
       return vector_transfer_read(minorVectorType, memref, indexing,
                                   AffineMapAttr::get(map), xferOp.padding(),
@@ -356,9 +356,9 @@ LogicalResult NDTransferOpHelper<TransferWriteOp>::doReplace() {
       auto map = TransferWriteOp::getTransferMinorIdentityMap(
           xferOp.getMemRefType(), minorVectorType);
       ArrayAttr masked;
-      if (xferOp.isMaskedDim(xferOp.getVectorType().getRank() - 1)) {
+      if (!xferOp.isMaskedDim(xferOp.getVectorType().getRank() - 1)) {
         OpBuilder &b = ScopedContext::getBuilderRef();
-        masked = b.getBoolArrayAttr({true});
+        masked = b.getBoolArrayAttr({false});
       }
       vector_transfer_write(result, xferOp.memref(), indexing,
                             AffineMapAttr::get(map), masked);
diff --git a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
index b8c27b51b4694..213877cd36af7 100644
--- a/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
+++ b/mlir/test/Conversion/VectorToSCF/vector-to-loops.mlir
@@ -353,15 +353,15 @@ func @transfer_write_progressive(%A : memref<?x?xf32>, %base: index, %vec: vecto
 // FULL-UNROLL-DAG: #[[$MAP1:.*]] = affine_map<()[s0] -> (s0 + 1)>
 // FULL-UNROLL-DAG: #[[$MAP2:.*]] = affine_map<()[s0] -> (s0 + 2)>
 
-// CHECK-LABEL: transfer_write_progressive_not_masked(
+// CHECK-LABEL: transfer_write_progressive_unmasked(
 //  CHECK-SAME:   %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
 //  CHECK-SAME:   %[[base:[a-zA-Z0-9]+]]: index,
 //  CHECK-SAME:   %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
-// FULL-UNROLL-LABEL: transfer_write_progressive_not_masked(
+// FULL-UNROLL-LABEL: transfer_write_progressive_unmasked(
 //  FULL-UNROLL-SAME:   %[[A:[a-zA-Z0-9]+]]: memref<?x?xf32>,
 //  FULL-UNROLL-SAME:   %[[base:[a-zA-Z0-9]+]]: index,
 //  FULL-UNROLL-SAME:   %[[vec:[a-zA-Z0-9]+]]: vector<3x15xf32>
-func @transfer_write_progressive_not_masked(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
+func @transfer_write_progressive_unmasked(%A : memref<?x?xf32>, %base: index, %vec: vector<3x15xf32>) {
   // CHECK-NOT:    scf.if
   // CHECK-NEXT: %[[alloc:.*]] = alloca() {alignment = 128 : i64} : memref<3xvector<15xf32>>
   // CHECK-NEXT: %[[vmemref:.*]] = vector.type_cast %[[alloc]] : memref<3xvector<15xf32>> to memref<vector<3x15xf32>>
@@ -369,16 +369,16 @@ func @transfer_write_progressive_not_masked(%A : memref<?x?xf32>, %base: index,
   // CHECK-NEXT: affine.for %[[I:.*]] = 0 to 3 {
   // CHECK-NEXT:   %[[add:.*]] = affine.apply #[[$MAP0]](%[[I]])[%[[base]]]
   // CHECK-NEXT:   %[[vec_1d:.*]] = load %0[%[[I]]] : memref<3xvector<15xf32>>
-  // CHECK-NEXT:   vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
+  // CHECK-NEXT:   vector.transfer_write %[[vec_1d]], %[[A]][%[[add]], %[[base]]] {masked = [false]} : vector<15xf32>, memref<?x?xf32>
 
   // FULL-UNROLL: %[[VEC0:.*]] = vector.extract %[[vec]][0] : vector<3x15xf32>
-  // FULL-UNROLL: vector.transfer_write %[[VEC0]], %[[A]][%[[base]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
+  // FULL-UNROLL: vector.transfer_write %[[VEC0]], %[[A]][%[[base]], %[[base]]] {masked = [false]} : vector<15xf32>, memref<?x?xf32>
   // FULL-UNROLL: %[[I1:.*]] = affine.apply #[[$MAP1]]()[%[[base]]]
   // FULL-UNROLL: %[[VEC1:.*]] = vector.extract %[[vec]][1] : vector<3x15xf32>
-  // FULL-UNROLL: vector.transfer_write %2, %[[A]][%[[I1]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
+  // FULL-UNROLL: vector.transfer_write %2, %[[A]][%[[I1]], %[[base]]] {masked = [false]} : vector<15xf32>, memref<?x?xf32>
   // FULL-UNROLL: %[[I2:.*]] = affine.apply #[[$MAP2]]()[%[[base]]]
   // FULL-UNROLL: %[[VEC2:.*]] = vector.extract %[[vec]][2] : vector<3x15xf32>
-  // FULL-UNROLL: vector.transfer_write %[[VEC2:.*]], %[[A]][%[[I2]], %[[base]]] : vector<15xf32>, memref<?x?xf32>
+  // FULL-UNROLL: vector.transfer_write %[[VEC2:.*]], %[[A]][%[[I2]], %[[base]]] {masked = [false]} : vector<15xf32>, memref<?x?xf32>
   vector.transfer_write %vec, %A[%base, %base] {masked = [false, false]} :
     vector<3x15xf32>, memref<?x?xf32>
   return

From 04713f8aa614ee86da4aeea3d438a2b886554a8e Mon Sep 17 00:00:00 2001
From: George Rokos <georgios.rokos@intel.com>
Date: Thu, 16 Jul 2020 15:57:27 -0700
Subject: [PATCH 656/771] Added missing API call to OpenMP test

---
 llvm/test/Transforms/OpenMP/add_attributes.ll | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll
index cd3c5541aa822..6e10613ed43c3 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -635,6 +635,8 @@ declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i
 
 declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
 
+declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
+
 declare void @__tgt_register_requires(i64)
 
 declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
@@ -1149,6 +1151,9 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; CHECK: Function Attrs: nounwind
 ; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
 
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
+
 ; CHECK: Function Attrs: nounwind
 ; CHECK-NEXT: declare void @__tgt_register_requires(i64)
 

From 0fbbf3a98ca6fc75aafb6405943ee00e7c4d5594 Mon Sep 17 00:00:00 2001
From: Jonas Devlieghere <jonas@devlieghere.com>
Date: Fri, 17 Jul 2020 08:18:22 -0700
Subject: [PATCH 657/771] [lldb] Unify sleep and time outs in GDB remote
 testcases

Reduce sleep and time outs in GDB remote testcases to one default value
for each. Stop passing these values around and always use the default
instead.

Differential revision: https://reviews.llvm.org/D83904
---
 .../tools/lldb-server/gdbremote_testcase.py   | 53 ++++++++-----------
 .../tools/lldb-server/TestGdbRemoteKill.py    |  2 +-
 .../lldb-server/TestGdbRemoteProcessInfo.py   |  2 +-
 .../TestGdbRemoteThreadsInStopReply.py        |  5 +-
 .../TestGdbRemote_qThreadStopInfo.py          |  5 +-
 .../tools/lldb-server/TestLldbGdbServer.py    | 11 ++--
 .../commandline/TestStubReverseConnect.py     |  8 ++-
 7 files changed, 37 insertions(+), 49 deletions(-)

diff --git a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
index 0b81912e3d3f9..d702d8ee68208 100644
--- a/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
+++ b/lldb/packages/Python/lldbsuite/test/tools/lldb-server/gdbremote_testcase.py
@@ -31,10 +31,10 @@ class GdbRemoteTestCaseBase(TestBase):
 
     NO_DEBUG_INFO_TESTCASE = True
 
-    _TIMEOUT_SECONDS = 120 * (10 if ('ASAN_OPTIONS' in os.environ) else 1)
-    _DEFAULT_TIMEOUT =  10 * (10 if ('ASAN_OPTIONS' in os.environ) else 1)
-    _READ_TIMEOUT    =   5 * (10 if ('ASAN_OPTIONS' in os.environ) else 1)
-    _WAIT_TIMEOUT    =   5 * (10 if ('ASAN_OPTIONS' in os.environ) else 1)
+    # Default time out in seconds. The timeout is increased tenfold under Asan.
+    DEFAULT_TIMEOUT =  10 * (10 if ('ASAN_OPTIONS' in os.environ) else 1)
+    # Default sleep time in seconds. The sleep time is doubled under Asan.
+    DEFAULT_SLEEP   =  5  * (2  if ('ASAN_OPTIONS' in os.environ) else 1)
 
     _GDBREMOTE_KILL_PACKET = "$k#6b"
 
@@ -204,10 +204,10 @@ def shutdown_named_pipe():
 
         return (named_pipe_path, named_pipe, named_pipe_fd)
 
-    def get_stub_port_from_named_socket(self, read_timeout_seconds):
+    def get_stub_port_from_named_socket(self):
         # Wait for something to read with a max timeout.
         (ready_readers, _, _) = select.select(
-            [self.named_pipe_fd], [], [], read_timeout_seconds)
+            [self.named_pipe_fd], [], [], self.DEFAULT_TIMEOUT)
         self.assertIsNotNone(
             ready_readers,
             "write side of pipe has not written anything - stub isn't writing to pipe.")
@@ -407,7 +407,7 @@ def launch_debug_monitor(self, attach_pid=None, logfile=None):
         # If we're receiving the stub's listening port from the named pipe, do
         # that here.
         if self.named_pipe:
-            self.port = self.get_stub_port_from_named_socket(self._READ_TIMEOUT)
+            self.port = self.get_stub_port_from_named_socket()
 
         return server
 
@@ -568,14 +568,14 @@ def prep_debug_monitor_and_inferior(
     def expect_socket_recv(
             self,
             sock,
-            expected_content_regex,
-            timeout_seconds):
+            expected_content_regex
+            ):
         response = ""
-        timeout_time = time.time() + timeout_seconds
+        timeout_time = time.time() + self.DEFAULT_TIMEOUT
 
         while not expected_content_regex.match(
                 response) and time.time() < timeout_time:
-            can_read, _, _ = select.select([sock], [], [], timeout_seconds)
+            can_read, _, _ = select.select([sock], [], [], self.DEFAULT_TIMEOUT)
             if can_read and sock in can_read:
                 recv_bytes = sock.recv(4096)
                 if recv_bytes:
@@ -583,24 +583,21 @@ def expect_socket_recv(
 
         self.assertTrue(expected_content_regex.match(response))
 
-    def expect_socket_send(self, sock, content, timeout_seconds):
+    def expect_socket_send(self, sock, content):
         request_bytes_remaining = content
-        timeout_time = time.time() + timeout_seconds
+        timeout_time = time.time() + self.DEFAULT_TIMEOUT
 
         while len(request_bytes_remaining) > 0 and time.time() < timeout_time:
-            _, can_write, _ = select.select([], [sock], [], timeout_seconds)
+            _, can_write, _ = select.select([], [sock], [], self.DEFAULT_TIMEOUT)
             if can_write and sock in can_write:
                 written_byte_count = sock.send(request_bytes_remaining.encode())
                 request_bytes_remaining = request_bytes_remaining[
                     written_byte_count:]
         self.assertEqual(len(request_bytes_remaining), 0)
 
-    def do_handshake(self, stub_socket, timeout_seconds=None):
-        if not timeout_seconds:
-            timeout_seconds = self._WAIT_TIMEOUT
-
+    def do_handshake(self, stub_socket):
         # Write the ack.
-        self.expect_socket_send(stub_socket, "+", timeout_seconds)
+        self.expect_socket_send(stub_socket, "+")
 
         # Send the start no ack mode packet.
         NO_ACK_MODE_REQUEST = "$QStartNoAckMode#b0"
@@ -609,10 +606,10 @@ def do_handshake(self, stub_socket, timeout_seconds=None):
 
         # Receive the ack and "OK"
         self.expect_socket_recv(stub_socket, re.compile(
-            r"^\+\$OK#[0-9a-fA-F]{2}$"), timeout_seconds)
+            r"^\+\$OK#[0-9a-fA-F]{2}$"))
 
         # Send the final ack.
-        self.expect_socket_send(stub_socket, "+", timeout_seconds)
+        self.expect_socket_send(stub_socket, "+")
 
     def add_no_ack_remote_stream(self):
         self.test_sequence.add_log_lines(
@@ -700,15 +697,13 @@ def parse_register_info_packets(self, context):
         return [parse_reg_info_response(reg_info_response)
                 for reg_info_response in reg_info_responses]
 
-    def expect_gdbremote_sequence(self, timeout_seconds=None):
-        if not timeout_seconds:
-            timeout_seconds = self._TIMEOUT_SECONDS
+    def expect_gdbremote_sequence(self):
         return expect_lldb_gdbserver_replay(
             self,
             self.sock,
             self.test_sequence,
             self._pump_queues,
-            timeout_seconds,
+            self.DEFAULT_TIMEOUT,
             self.logger)
 
     _KNOWN_REGINFO_KEYS = [
@@ -856,11 +851,9 @@ def parse_threadinfo_packets(self, context):
             thread_ids.extend(new_thread_infos)
         return thread_ids
 
-    def wait_for_thread_count(self, thread_count, timeout_seconds=None):
-        if not timeout_seconds:
-            timeout_seconds = self._WAIT_TIMEOUT
+    def wait_for_thread_count(self, thread_count):
         start_time = time.time()
-        timeout_time = start_time + timeout_seconds
+        timeout_time = start_time + self.DEFAULT_TIMEOUT
 
         actual_thread_count = 0
         while actual_thread_count < thread_count:
@@ -878,7 +871,7 @@ def wait_for_thread_count(self, thread_count, timeout_seconds=None):
             if time.time() > timeout_time:
                 raise Exception(
                     'timed out after {} seconds while waiting for theads: waiting for at least {} threads, found {}'.format(
-                        timeout_seconds, thread_count, actual_thread_count))
+                        self.DEFAULT_TIMEOUT, thread_count, actual_thread_count))
 
         return threads
 
diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py
index 48f919aa94b90..bab02e7cfc1df 100644
--- a/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py
+++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteKill.py
@@ -31,7 +31,7 @@ def attach_commandline_kill_after_initial_stop(self):
 
         # Wait a moment for completed and now-detached inferior process to
         # clear.
-        time.sleep(self._WAIT_TIMEOUT)
+        time.sleep(self.DEFAULT_SLEEP)
 
         if not lldb.remote_platform:
             # Process should be dead now. Reap results.
diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py
index bc793a36e9985..5ee32a5d18ccf 100644
--- a/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py
+++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteProcessInfo.py
@@ -52,7 +52,7 @@ def attach_commandline_qProcessInfo_reports_correct_pid(self):
         self.add_process_info_collection_packets()
 
         # Run the stream
-        context = self.expect_gdbremote_sequence(timeout_seconds=self._DEFAULT_TIMEOUT)
+        context = self.expect_gdbremote_sequence()
         self.assertIsNotNone(context)
 
         # Gather process info response
diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py b/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py
index 951932863409e..891a0101614a6 100644
--- a/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py
+++ b/lldb/test/API/tools/lldb-server/TestGdbRemoteThreadsInStopReply.py
@@ -42,7 +42,7 @@ def gather_stop_reply_fields(self, post_startup_log_lines, thread_count,
         hw_info = self.parse_hw_info(context)
 
         # Give threads time to start up, then break.
-        time.sleep(self._WAIT_TIMEOUT)
+        time.sleep(self.DEFAULT_SLEEP)
         self.reset_test_sequence()
         self.test_sequence.add_log_lines(
             [
@@ -60,8 +60,7 @@ def gather_stop_reply_fields(self, post_startup_log_lines, thread_count,
         self.assertIsNotNone(context)
 
         # Wait until all threads have started.
-        threads = self.wait_for_thread_count(thread_count,
-                                             timeout_seconds=self._WAIT_TIMEOUT)
+        threads = self.wait_for_thread_count(thread_count)
         self.assertIsNotNone(threads)
         self.assertEqual(len(threads), thread_count)
 
diff --git a/lldb/test/API/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py b/lldb/test/API/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py
index 51dd0cb1a3bd0..d7fd97e693c67 100644
--- a/lldb/test/API/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py
+++ b/lldb/test/API/tools/lldb-server/TestGdbRemote_qThreadStopInfo.py
@@ -33,7 +33,7 @@ def gather_stop_replies_via_qThreadStopInfo(self, thread_count):
         self.assertIsNotNone(context)
 
         # Give threads time to start up, then break.
-        time.sleep(self._WAIT_TIMEOUT)
+        time.sleep(self.DEFAULT_SLEEP)
         self.reset_test_sequence()
         self.test_sequence.add_log_lines(
             [
@@ -51,8 +51,7 @@ def gather_stop_replies_via_qThreadStopInfo(self, thread_count):
         self.assertIsNotNone(context)
 
         # Wait until all threads have started.
-        threads = self.wait_for_thread_count(thread_count,
-                                             timeout_seconds=self._WAIT_TIMEOUT)
+        threads = self.wait_for_thread_count(thread_count)
         self.assertIsNotNone(threads)
 
         # On Windows, there could be more threads spawned. For example, DebugBreakProcess will
diff --git a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py
index d46123e337c80..154f8b629dcc4 100644
--- a/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py
+++ b/lldb/test/API/tools/lldb-server/TestLldbGdbServer.py
@@ -642,7 +642,7 @@ def Hg_switches_to_3_threads(self):
         self.run_process_then_stop(run_seconds=1)
 
         # Wait at most x seconds for 3 threads to be present.
-        threads = self.wait_for_thread_count(3, timeout_seconds=self._WAIT_TIMEOUT)
+        threads = self.wait_for_thread_count(3)
         self.assertEqual(len(threads), 3)
 
         # verify we can $H to each thead, and $qC matches the thread we set.
@@ -723,7 +723,7 @@ def Hc_then_Csignal_signals_correct_thread(self, segfault_signo):
         # context = self.run_process_then_stop(run_seconds=1)
 
         # Wait at most x seconds for all threads to be present.
-        # threads = self.wait_for_thread_count(NUM_THREADS, timeout_seconds=5)
+        # threads = self.wait_for_thread_count(NUM_THREADS)
         # self.assertEquals(len(threads), NUM_THREADS)
 
         signaled_tids = {}
@@ -739,7 +739,7 @@ def Hc_then_Csignal_signals_correct_thread(self, segfault_signo):
                                                             2: "thread_id"}}],
                                              True)
 
-            context = self.expect_gdbremote_sequence(timeout_seconds=self._DEFAULT_TIMEOUT)
+            context = self.expect_gdbremote_sequence()
             self.assertIsNotNone(context)
             signo = context.get("signo")
             self.assertEqual(int(signo, 16), segfault_signo)
@@ -775,8 +775,7 @@ def Hc_then_Csignal_signals_correct_thread(self, segfault_signo):
                 True)
 
             # Run the sequence.
-            context = self.expect_gdbremote_sequence(
-                timeout_seconds=self._DEFAULT_TIMEOUT)
+            context = self.expect_gdbremote_sequence()
             self.assertIsNotNone(context)
 
             # Ensure the stop signal is the signal we delivered.
@@ -1491,7 +1490,7 @@ def P_and_p_thread_suffix_work(self):
         self.assertIsNotNone(context)
 
         # Wait for 3 threads to be present.
-        threads = self.wait_for_thread_count(3, timeout_seconds=self._WAIT_TIMEOUT)
+        threads = self.wait_for_thread_count(3)
         self.assertEqual(len(threads), 3)
 
         expected_reg_values = []
diff --git a/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py b/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py
index 664b6001d8dae..a3250ab4f1bfb 100644
--- a/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py
+++ b/lldb/test/API/tools/lldb-server/commandline/TestStubReverseConnect.py
@@ -14,8 +14,6 @@ class TestStubReverseConnect(gdbremote_testcase.GdbRemoteTestCaseBase):
 
     mydir = TestBase.compute_mydir(__file__)
 
-    _DEFAULT_TIMEOUT = 20 * (10 if ('ASAN_OPTIONS' in os.environ) else 1)
-
     def setUp(self):
         # Set up the test.
         gdbremote_testcase.GdbRemoteTestCaseBase.setUp(self)
@@ -25,11 +23,11 @@ def setUp(self):
         self.assertIsNotNone(self.listener_socket)
         self.listener_port = self.listener_socket.getsockname()[1]
 
-    def create_listener_socket(self, timeout_seconds=_DEFAULT_TIMEOUT):
+    def create_listener_socket(self):
         sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
         self.assertIsNotNone(sock)
 
-        sock.settimeout(timeout_seconds)
+        sock.settimeout(self.DEFAULT_TIMEOUT)
         sock.bind(("127.0.0.1", 0))
         sock.listen(1)
 
@@ -77,7 +75,7 @@ def reverse_connect_works(self):
             address, stub_socket.getsockname()))
 
         # Verify we can do the handshake.  If that works, we'll call it good.
-        self.do_handshake(stub_socket, timeout_seconds=self._DEFAULT_TIMEOUT)
+        self.do_handshake(stub_socket)
 
         # Clean up.
         stub_socket.shutdown(socket.SHUT_RDWR)

From efb5040262930908a88632ff91f3df3cba4a7c1d Mon Sep 17 00:00:00 2001
From: Stanislav Mekhanoshin <Stanislav.Mekhanoshin@amd.com>
Date: Wed, 15 Jul 2020 15:30:46 -0700
Subject: [PATCH 658/771] Fixed warning about signed/unsigned comparison

I've got the report clang11 issues signed/unsigned mismatch
warning here. For some reason only clang11 seems to issue
this warning.

Differential Revision: https://reviews.llvm.org/D83916
---
 llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 5bc35aa4695f8..5fb8ad56d8b3e 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -7407,7 +7407,7 @@ bool SLPVectorizerPass::vectorizeChainsInBlock(BasicBlock *BB, BoUpSLP &R) {
 
       while (SameTypeIt != E &&
              (*SameTypeIt)->getType() == EltTy &&
-             (SameTypeIt - IncIt) < MaxNumElts) {
+             static_cast<unsigned>(SameTypeIt - IncIt) < MaxNumElts) {
         VisitedInstrs.insert(*SameTypeIt);
         ++SameTypeIt;
       }

From 62d88a1c79f58b7c9a8f9d21f24bb22802d3646b Mon Sep 17 00:00:00 2001
From: AndreyChurbanov <andrey.churbanov@intel.com>
Date: Fri, 17 Jul 2020 21:10:25 +0300
Subject: [PATCH 659/771] [OpenMP] libomp: add itt notifications for teams
 construct on host

Add barrier/region notification for parallel inside teams construct
when number of teams is 1, as VTune only shows outer level regions for
simplicity.

Differential Revision: https://reviews.llvm.org/D84024
---
 openmp/runtime/src/kmp_barrier.cpp |  7 +++++--
 openmp/runtime/src/kmp_runtime.cpp | 25 ++++++++++++++++++++-----
 2 files changed, 25 insertions(+), 7 deletions(-)

diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp
index 4aa7a084f53af..de661288bab30 100644
--- a/openmp/runtime/src/kmp_barrier.cpp
+++ b/openmp/runtime/src/kmp_barrier.cpp
@@ -1453,7 +1453,8 @@ static int __kmp_barrier_template(enum barrier_type bt, int gtid, int is_split,
       // Barrier - report frame end (only if active_level == 1)
       if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
           __kmp_forkjoin_frames_mode &&
-          this_thr->th.th_teams_microtask == NULL &&
+          (this_thr->th.th_teams_microtask == NULL || // either not in teams
+           this_thr->th.th_teams_size.nteams == 1) && // or inside single team
           team->t.t_active_level == 1) {
         ident_t *loc = __kmp_threads[gtid]->th.th_ident;
         kmp_uint64 cur_time = __itt_get_timestamp();
@@ -1839,7 +1840,9 @@ void __kmp_join_barrier(int gtid) {
 #if USE_ITT_BUILD && USE_ITT_NOTIFY
     // Join barrier - report frame end
     if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) &&
-        __kmp_forkjoin_frames_mode && this_thr->th.th_teams_microtask == NULL &&
+        __kmp_forkjoin_frames_mode &&
+        (this_thr->th.th_teams_microtask == NULL || // either not in teams
+         this_thr->th.th_teams_size.nteams == 1) && // or inside single team
         team->t.t_active_level == 1) {
       kmp_uint64 cur_time = __itt_get_timestamp();
       ident_t *loc = team->t.t_ident;
diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp
index a53920436901f..00e0c7db4536f 100644
--- a/openmp/runtime/src/kmp_runtime.cpp
+++ b/openmp/runtime/src/kmp_runtime.cpp
@@ -1625,6 +1625,22 @@ int __kmp_fork_call(ident_t *loc, int gtid,
       }
 #endif
 
+#if USE_ITT_BUILD
+      if (((__itt_frame_submit_v3_ptr && __itt_get_timestamp_ptr) ||
+           KMP_ITT_DEBUG) &&
+          __kmp_forkjoin_frames_mode == 3 &&
+          parent_team->t.t_active_level == 1 // only report frames at level 1
+          && master_th->th.th_teams_size.nteams == 1) {
+        kmp_uint64 tmp_time = __itt_get_timestamp();
+        master_th->th.th_frame_time = tmp_time;
+        parent_team->t.t_region_time = tmp_time;
+      }
+      if (__itt_stack_caller_create_ptr) {
+        // create new stack stitching id before entering fork barrier
+        parent_team->t.t_stack_id = __kmp_itt_stack_caller_create();
+      }
+#endif /* USE_ITT_BUILD */
+
       KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, "
                     "master_th=%p, gtid=%d\n",
                     root, parent_team, master_th, gtid));
@@ -2367,14 +2383,13 @@ void __kmp_join_call(ident_t *loc, int gtid
 
 #if USE_ITT_BUILD
   if (__itt_stack_caller_create_ptr) {
-    __kmp_itt_stack_caller_destroy(
-        (__itt_caller)team->t
-            .t_stack_id); // destroy the stack stitching id after join barrier
+    // destroy the stack stitching id after join barrier
+    __kmp_itt_stack_caller_destroy((__itt_caller)team->t.t_stack_id);
   }
-
   // Mark end of "parallel" region for Intel(R) VTune(TM) analyzer.
   if (team->t.t_active_level == 1 &&
-      !master_th->th.th_teams_microtask) { /* not in teams construct */
+      (!master_th->th.th_teams_microtask || /* not in teams construct */
+       master_th->th.th_teams_size.nteams == 1)) {
     master_th->th.th_ident = loc;
     // only one notification scheme (either "submit" or "forking/joined", not
     // both)

From d3ce3dc4867b80605a024d1f1b905a0d709992ca Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Fri, 17 Jul 2020 10:40:42 -0700
Subject: [PATCH 660/771] [llvm] Add contains(KeyType) -> bool methods to
 DenseSet

Matches C++20 API addition.

Differential Revision: https://reviews.llvm.org/D83449
---
 llvm/include/llvm/ADT/DenseSet.h    | 5 +++++
 llvm/unittests/ADT/DenseSetTest.cpp | 4 ++--
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ADT/DenseSet.h b/llvm/include/llvm/ADT/DenseSet.h
index 07edc3d8e4ec7..dca217358ff8e 100644
--- a/llvm/include/llvm/ADT/DenseSet.h
+++ b/llvm/include/llvm/ADT/DenseSet.h
@@ -173,6 +173,11 @@ class DenseSetImpl {
     return ConstIterator(TheMap.find(V));
   }
 
+  /// Check if the set contains the given element.
+  bool contains(const_arg_type_t<ValueT> V) const {
+    return TheMap.find(V) != TheMap.end();
+  }
+
   /// Alternative version of find() which allows a different, and possibly less
   /// expensive, key type.
   /// The DenseMapInfo is responsible for supplying methods
diff --git a/llvm/unittests/ADT/DenseSetTest.cpp b/llvm/unittests/ADT/DenseSetTest.cpp
index 556bd46537dbf..ada53cac9b78d 100644
--- a/llvm/unittests/ADT/DenseSetTest.cpp
+++ b/llvm/unittests/ADT/DenseSetTest.cpp
@@ -227,7 +227,7 @@ TEST(DenseSetCustomTest, ConstTest) {
   Map.insert(B);
   EXPECT_EQ(Map.count(B), 1u);
   EXPECT_EQ(Map.count(C), 1u);
-  EXPECT_NE(Map.find(B), Map.end());
-  EXPECT_NE(Map.find(C), Map.end());
+  EXPECT_TRUE(Map.contains(B));
+  EXPECT_TRUE(Map.contains(C));
 }
 }

From 1d8eef41f5afa9503ca9a7506c6fcb874da62941 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Fri, 17 Jul 2020 10:41:03 -0700
Subject: [PATCH 661/771] [llvm] Add contains(KeyType) -> bool methods to
 SetVector

Matches C++20 API addition.

Differential Revision: https://reviews.llvm.org/D83449
---
 llvm/include/llvm/ADT/SetVector.h    |  5 +++++
 llvm/unittests/ADT/SetVectorTest.cpp | 17 +++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/llvm/include/llvm/ADT/SetVector.h b/llvm/include/llvm/ADT/SetVector.h
index 91ad72143ed35..32bcd50966cca 100644
--- a/llvm/include/llvm/ADT/SetVector.h
+++ b/llvm/include/llvm/ADT/SetVector.h
@@ -205,6 +205,11 @@ class SetVector {
     return true;
   }
 
+  /// Check if the SetVector contains the given key.
+  bool contains(const key_type &key) const {
+    return set_.find(key) != set_.end();
+  }
+
   /// Count the number of elements of a given key in the SetVector.
   /// \returns 0 if the element is not in the SetVector, 1 if it is.
   size_type count(const key_type &key) const {
diff --git a/llvm/unittests/ADT/SetVectorTest.cpp b/llvm/unittests/ADT/SetVectorTest.cpp
index b9fef78d2fdb3..d5d49fba5e58f 100644
--- a/llvm/unittests/ADT/SetVectorTest.cpp
+++ b/llvm/unittests/ADT/SetVectorTest.cpp
@@ -31,3 +31,20 @@ TEST(SetVector, EraseTest) {
   EXPECT_EQ(2, *std::next(S.begin()));
 }
 
+TEST(SetVector, ContainsTest) {
+  SetVector<int> S;
+  S.insert(0);
+  S.insert(1);
+  S.insert(2);
+
+  EXPECT_TRUE(S.contains(0));
+  EXPECT_TRUE(S.contains(1));
+  EXPECT_TRUE(S.contains(2));
+  EXPECT_FALSE(S.contains(-1));
+
+  S.insert(2);
+  EXPECT_TRUE(S.contains(2));
+
+  S.remove(2);
+  EXPECT_FALSE(S.contains(2));
+}

From a0385bd7acd6e1d16224b4257f4cb50e59f1d75e Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Fri, 17 Jul 2020 10:41:35 -0700
Subject: [PATCH 662/771] [llvm] Add contains(KeyType) -> bool methods to
 SmallPtrSet

Matches C++20 API addition.

Differential Revision: https://reviews.llvm.org/D83449
---
 llvm/include/llvm/ADT/SmallPtrSet.h    |  3 +++
 llvm/unittests/ADT/SmallPtrSetTest.cpp | 34 +++++++++++++++++++++++---
 2 files changed, 34 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ADT/SmallPtrSet.h b/llvm/include/llvm/ADT/SmallPtrSet.h
index 0ab05cfe611aa..57dd8f6b695d7 100644
--- a/llvm/include/llvm/ADT/SmallPtrSet.h
+++ b/llvm/include/llvm/ADT/SmallPtrSet.h
@@ -378,6 +378,9 @@ class SmallPtrSetImpl : public SmallPtrSetImplBase {
   iterator find(ConstPtrType Ptr) const {
     return makeIterator(find_imp(ConstPtrTraits::getAsVoidPointer(Ptr)));
   }
+  bool contains(ConstPtrType Ptr) const {
+    return find_imp(ConstPtrTraits::getAsVoidPointer(Ptr)) != EndPointer();
+  }
 
   template <typename IterT>
   void insert(IterT I, IterT E) {
diff --git a/llvm/unittests/ADT/SmallPtrSetTest.cpp b/llvm/unittests/ADT/SmallPtrSetTest.cpp
index 3226fe615509c..eacd62ffc6ff0 100644
--- a/llvm/unittests/ADT/SmallPtrSetTest.cpp
+++ b/llvm/unittests/ADT/SmallPtrSetTest.cpp
@@ -313,8 +313,8 @@ TEST(SmallPtrSetTest, ConstTest) {
   IntSet.insert(B);
   EXPECT_EQ(IntSet.count(B), 1u);
   EXPECT_EQ(IntSet.count(C), 1u);
-  EXPECT_NE(IntSet.find(B), IntSet.end());
-  EXPECT_NE(IntSet.find(C), IntSet.end());
+  EXPECT_TRUE(IntSet.contains(B));
+  EXPECT_TRUE(IntSet.contains(C));
 }
 
 // Verify that we automatically get the const version of PointerLikeTypeTraits
@@ -327,7 +327,7 @@ TEST(SmallPtrSetTest, ConstNonPtrTest) {
   TestPair Pair(&A[0], 1);
   IntSet.insert(Pair);
   EXPECT_EQ(IntSet.count(Pair), 1u);
-  EXPECT_NE(IntSet.find(Pair), IntSet.end());
+  EXPECT_TRUE(IntSet.contains(Pair));
 }
 
 // Test equality comparison.
@@ -367,3 +367,31 @@ TEST(SmallPtrSetTest, EqualityComparison) {
   EXPECT_NE(c, e);
   EXPECT_NE(e, d);
 }
+
+TEST(SmallPtrSetTest, Contains) {
+  SmallPtrSet<int *, 2> Set;
+  int buf[4] = {0, 11, 22, 11};
+  EXPECT_FALSE(Set.contains(&buf[0]));
+  EXPECT_FALSE(Set.contains(&buf[1]));
+
+  Set.insert(&buf[0]);
+  Set.insert(&buf[1]);
+  EXPECT_TRUE(Set.contains(&buf[0]));
+  EXPECT_TRUE(Set.contains(&buf[1]));
+  EXPECT_FALSE(Set.contains(&buf[3]));
+
+  Set.insert(&buf[1]);
+  EXPECT_TRUE(Set.contains(&buf[0]));
+  EXPECT_TRUE(Set.contains(&buf[1]));
+  EXPECT_FALSE(Set.contains(&buf[3]));
+
+  Set.erase(&buf[1]);
+  EXPECT_TRUE(Set.contains(&buf[0]));
+  EXPECT_FALSE(Set.contains(&buf[1]));
+
+  Set.insert(&buf[1]);
+  Set.insert(&buf[2]);
+  EXPECT_TRUE(Set.contains(&buf[0]));
+  EXPECT_TRUE(Set.contains(&buf[1]));
+  EXPECT_TRUE(Set.contains(&buf[2]));
+}

From dd4426b9a66e0363184adcfc3967e6b9f36f95dc Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Fri, 17 Jul 2020 10:42:23 -0700
Subject: [PATCH 663/771] [llvm] Add contains(KeyType) -> bool methods to
 SmallSet

Matches C++20 API addition.

Differential Revision: https://reviews.llvm.org/D83449
---
 llvm/include/llvm/ADT/SmallSet.h    |  7 +++++++
 llvm/unittests/ADT/SmallSetTest.cpp | 25 +++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/llvm/include/llvm/ADT/SmallSet.h b/llvm/include/llvm/ADT/SmallSet.h
index a03fa7dd84235..0600e528ee692 100644
--- a/llvm/include/llvm/ADT/SmallSet.h
+++ b/llvm/include/llvm/ADT/SmallSet.h
@@ -232,6 +232,13 @@ class SmallSet {
     return {Set.end()};
   }
 
+  /// Check if the SmallSet contains the given element.
+  bool contains(const T &V) const {
+    if (isSmall())
+      return vfind(V) != Vector.end();
+    return Set.find(V) != Set.end();
+  }
+
 private:
   bool isSmall() const { return Set.empty(); }
 
diff --git a/llvm/unittests/ADT/SmallSetTest.cpp b/llvm/unittests/ADT/SmallSetTest.cpp
index 06682ce823dcf..26cab828c784d 100644
--- a/llvm/unittests/ADT/SmallSetTest.cpp
+++ b/llvm/unittests/ADT/SmallSetTest.cpp
@@ -167,3 +167,28 @@ TEST(SmallSetTest, EqualityComparisonTest) {
   EXPECT_NE(s1small, s4large);
   EXPECT_NE(s4large, s3large);
 }
+
+TEST(SmallSetTest, Contains) {
+  SmallSet<int, 2> Set;
+  EXPECT_FALSE(Set.contains(0));
+  EXPECT_FALSE(Set.contains(1));
+
+  Set.insert(0);
+  Set.insert(1);
+  EXPECT_TRUE(Set.contains(0));
+  EXPECT_TRUE(Set.contains(1));
+
+  Set.insert(1);
+  EXPECT_TRUE(Set.contains(0));
+  EXPECT_TRUE(Set.contains(1));
+
+  Set.erase(1);
+  EXPECT_TRUE(Set.contains(0));
+  EXPECT_FALSE(Set.contains(1));
+
+  Set.insert(1);
+  Set.insert(2);
+  EXPECT_TRUE(Set.contains(0));
+  EXPECT_TRUE(Set.contains(1));
+  EXPECT_TRUE(Set.contains(2));
+}

From 39000aad81ff30b68c443087842e960d25346e88 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Fri, 17 Jul 2020 10:43:01 -0700
Subject: [PATCH 664/771] [llvm] Add contains(KeyType) -> bool methods to
 SparseSet

Matches C++20 API addition.

Differential Revision: https://reviews.llvm.org/D83449
---
 llvm/include/llvm/ADT/SparseSet.h    |  9 ++++++---
 llvm/unittests/ADT/SparseSetTest.cpp | 12 +++++++-----
 2 files changed, 13 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/ADT/SparseSet.h b/llvm/include/llvm/ADT/SparseSet.h
index 74457d5fd679a..d8acf1ee2f3a2 100644
--- a/llvm/include/llvm/ADT/SparseSet.h
+++ b/llvm/include/llvm/ADT/SparseSet.h
@@ -229,12 +229,15 @@ class SparseSet {
     return const_cast<SparseSet*>(this)->findIndex(KeyIndexOf(Key));
   }
 
+  /// Check if the set contains the given \c Key.
+  ///
+  /// @param Key A valid key to find.
+  bool contains(const KeyT &Key) const { return find(Key) == end() ? 0 : 1; }
+
   /// count - Returns 1 if this set contains an element identified by Key,
   /// 0 otherwise.
   ///
-  size_type count(const KeyT &Key) const {
-    return find(Key) == end() ? 0 : 1;
-  }
+  size_type count(const KeyT &Key) const { return contains(Key) ? 1 : 0; }
 
   /// insert - Attempts to insert a new element.
   ///
diff --git a/llvm/unittests/ADT/SparseSetTest.cpp b/llvm/unittests/ADT/SparseSetTest.cpp
index 2b065ea901f36..3eea4bde8c07c 100644
--- a/llvm/unittests/ADT/SparseSetTest.cpp
+++ b/llvm/unittests/ADT/SparseSetTest.cpp
@@ -25,15 +25,15 @@ TEST(SparseSetTest, EmptySet) {
   Set.setUniverse(10);
 
   // Lookups on empty set.
-  EXPECT_TRUE(Set.find(0) == Set.end());
-  EXPECT_TRUE(Set.find(9) == Set.end());
+  EXPECT_FALSE(Set.contains(0));
+  EXPECT_FALSE(Set.contains(9));
 
   // Same thing on a const reference.
   const USet &CSet = Set;
   EXPECT_TRUE(CSet.empty());
   EXPECT_TRUE(CSet.begin() == CSet.end());
   EXPECT_EQ(0u, CSet.size());
-  EXPECT_TRUE(CSet.find(0) == CSet.end());
+  EXPECT_FALSE(CSet.contains(0));
   USet::const_iterator I = CSet.find(5);
   EXPECT_TRUE(I == CSet.end());
 }
@@ -51,8 +51,9 @@ TEST(SparseSetTest, SingleEntrySet) {
   EXPECT_TRUE(Set.begin() + 1 == Set.end());
   EXPECT_EQ(1u, Set.size());
 
-  EXPECT_TRUE(Set.find(0) == Set.end());
-  EXPECT_TRUE(Set.find(9) == Set.end());
+  EXPECT_FALSE(Set.contains(0));
+  EXPECT_FALSE(Set.contains(9));
+  EXPECT_TRUE(Set.contains(5));
 
   EXPECT_FALSE(Set.count(0));
   EXPECT_TRUE(Set.count(5));
@@ -71,6 +72,7 @@ TEST(SparseSetTest, SingleEntrySet) {
   USet::iterator I = Set.find(5);
   EXPECT_TRUE(I == Set.begin());
   I = Set.erase(I);
+  EXPECT_FALSE(Set.contains(5));
   EXPECT_TRUE(I == Set.end());
   EXPECT_TRUE(Set.empty());
 }

From 645bb8e2086c9d72a5c29d0b224c563f70555365 Mon Sep 17 00:00:00 2001
From: David Blaikie <dblaikie@gmail.com>
Date: Fri, 17 Jul 2020 10:43:12 -0700
Subject: [PATCH 665/771] [llvm] Add contains(KeyType) -> bool methods to
 StringSet

Matches C++20 API addition.

Differential Revision: https://reviews.llvm.org/D83449
---
 llvm/include/llvm/ADT/StringSet.h    |  3 +++
 llvm/unittests/ADT/StringSetTest.cpp | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/llvm/include/llvm/ADT/StringSet.h b/llvm/include/llvm/ADT/StringSet.h
index 63d929399a4ec..c4245175544b4 100644
--- a/llvm/include/llvm/ADT/StringSet.h
+++ b/llvm/include/llvm/ADT/StringSet.h
@@ -45,6 +45,9 @@ class StringSet : public StringMap<NoneType, AllocatorTy> {
   insert(const StringMapEntry<ValueTy> &mapEntry) {
     return insert(mapEntry.getKey());
   }
+
+  /// Check if the set contains the given \c key.
+  bool contains(StringRef key) const { return Base::FindKey(key) != -1; }
 };
 
 } // end namespace llvm
diff --git a/llvm/unittests/ADT/StringSetTest.cpp b/llvm/unittests/ADT/StringSetTest.cpp
index e27306dd696ff..0071f5a3775be 100644
--- a/llvm/unittests/ADT/StringSetTest.cpp
+++ b/llvm/unittests/ADT/StringSetTest.cpp
@@ -53,4 +53,23 @@ TEST_F(StringSetTest, EmptyString) {
   EXPECT_EQ(Count, 1UL);
 }
 
+TEST_F(StringSetTest, Contains) {
+  StringSet<> Set;
+  EXPECT_FALSE(Set.contains(""));
+  EXPECT_FALSE(Set.contains("test"));
+
+  Set.insert("");
+  Set.insert("test");
+  EXPECT_TRUE(Set.contains(""));
+  EXPECT_TRUE(Set.contains("test"));
+
+  Set.insert("test");
+  EXPECT_TRUE(Set.contains(""));
+  EXPECT_TRUE(Set.contains("test"));
+
+  Set.erase("test");
+  EXPECT_TRUE(Set.contains(""));
+  EXPECT_FALSE(Set.contains("test"));
+}
+
 } // end anonymous namespace

From 5d06e8b24f97202764c7522efcfb6e2febdce6b7 Mon Sep 17 00:00:00 2001
From: Stella Laurenzo <laurenzo@google.com>
Date: Fri, 17 Jul 2020 11:12:07 -0700
Subject: [PATCH 666/771] Pass -rtlib=libgcc in tests conditioned on the
 default.

Summary:
* This test was failing in our builds that configure compiler-rt as the configure-time rtlib.
* Opted for this test fix instead of a rollback, and hopefully TI can fix forward if this weakens the tests beyond expectations.
* Suspected this failure introduced in D81676.

Subscribers: cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D84058
---
 clang/test/Driver/msp430-toolchain.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/clang/test/Driver/msp430-toolchain.c b/clang/test/Driver/msp430-toolchain.c
index f88b32d66a9be..1aee2f75a9d9d 100644
--- a/clang/test/Driver/msp430-toolchain.c
+++ b/clang/test/Driver/msp430-toolchain.c
@@ -66,11 +66,11 @@
 
 // Tests for -nostdlib, -nostartfiles, -nodefaultfiles and -f(no-)exceptions
 
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 \
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc \
 // RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
 // RUN: FileCheck -check-prefix=LIBS-DEFAULT-POS %s < %t
 // RUN: FileCheck -check-prefix=LIBS-DEFAULT-NEG %s < %t
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 \
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc \
 // RUN:   --gcc-toolchain="%S/Inputs/basic_msp430_tree" --sysroot="" 2>&1 \
 // RUN:   | FileCheck -check-prefix=LIBS-DEFAULT-GCC-TOOLCHAIN %s
 // LIBS-DEFAULT-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
@@ -112,7 +112,7 @@
 // LIBS-COMPILER-RT-NEG-NOT: crtend.o
 // LIBS-COMPILER-RT-NEG-NOT: /exceptions
 
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -fexceptions \
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc -fexceptions \
 // RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
 // RUN: FileCheck -check-prefix=LIBS-EXC-POS %s < %t
 // RUN: FileCheck -check-prefix=LIBS-EXC-NEG %s < %t
@@ -126,7 +126,7 @@
 // LIBS-EXC-NEG-NOT: "{{.*}}/430"
 // LIBS-EXC-NEG-NOT: "{{.*}}430/crt{{.*}}"
 
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 \
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc \
 // RUN:   -fstack-protector  --sysroot="%S/Inputs/basic_msp430_tree" 2>&1 \
 // RUN:   | FileCheck -check-prefix=LIBS-SSP %s
 // LIBS-SSP: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"
@@ -138,7 +138,7 @@
 // LIBS-SSP: "-lgcc" "--start-group" "-lmul_none" "-lc" "-lgcc" "-lcrt" "-lnosys" "--end-group"
 // LIBS-SSP: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/430{{/|\\\\}}crtend_no_eh.o" "-lgcc"
 
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nodefaultlibs \
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc -nodefaultlibs \
 // RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
 // RUN: FileCheck -check-prefix=LIBS-NO-DFT-POS %s < %t
 // RUN: FileCheck -check-prefix=LIBS-NO-DFT-NEG %s < %t
@@ -155,7 +155,7 @@
 // LIBS-NO-DFT-NEG-NOT: "--start-group"
 // LIBS-NO-DFT-NEG-NOT: "--end-group"
 
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nolibc \
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc -nolibc \
 // RUN:   -fstack-protector --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
 // RUN: FileCheck -check-prefix=LIBS-NO-LIBC-POS %s < %t
 // RUN: FileCheck -check-prefix=LIBS-NO-LIBC-NEG %s < %t
@@ -173,7 +173,7 @@
 // LIBS-NO-LIBC-NEG-NOT: "--start-group"
 // LIBS-NO-LIBC-NEG-NOT: "--end-group"
 
-// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -nostartfiles \
+// RUN: %clang %s -### -no-canonical-prefixes -target msp430 -rtlib=libgcc -nostartfiles \
 // RUN:   --sysroot="%S/Inputs/basic_msp430_tree" > %t 2>&1
 // RUN: FileCheck -check-prefix=LIBS-NO-START-POS %s < %t
 // RUN: FileCheck -check-prefix=LIBS-NO-START-NEG %s < %t
@@ -222,7 +222,7 @@
 // Test for compiling for simulator
 
 // RUN: %clang %s -### -no-canonical-prefixes -target msp430 -mmcu=msp430g2553 \
-// RUN:   -msim --sysroot=%S/Inputs/basic_msp430_tree > %t 2>&1
+// RUN:   -msim -rtlib=libgcc --sysroot=%S/Inputs/basic_msp430_tree > %t 2>&1
 // RUN: FileCheck -check-prefix=SIMULATOR-POS %s < %t
 // RUN: FileCheck -check-prefix=SIMULATOR-NEG %s < %t
 // SIMULATOR-POS: "{{.*}}/Inputs/basic_msp430_tree/lib/gcc/msp430-elf/8.3.1/../../..{{/|\\\\}}..{{/|\\\\}}bin{{/|\\\\}}msp430-elf-ld"

From 7738c0341829bdc49d0ad4e9694bdd64aa332023 Mon Sep 17 00:00:00 2001
From: Siva Chandra Reddy <sivachandra@google.com>
Date: Thu, 16 Jul 2020 12:49:47 -0700
Subject: [PATCH 667/771] [libc][NFC] Use RemoveCVType to implement IsIntegral
 and IsPointerType.

Added IsSameV as a convenience variable and used it where convenient.

Reviewers: abrachet, lntue

Differential Revision: https://reviews.llvm.org/D83980
---
 libc/utils/CPP/TypeTraits.h | 45 ++++++++++++++++++++-----------------
 1 file changed, 24 insertions(+), 21 deletions(-)

diff --git a/libc/utils/CPP/TypeTraits.h b/libc/utils/CPP/TypeTraits.h
index dfc16b00ab745..9b121c03f8b98 100644
--- a/libc/utils/CPP/TypeTraits.h
+++ b/libc/utils/CPP/TypeTraits.h
@@ -26,27 +26,12 @@ struct FalseValue {
   static constexpr bool Value = false;
 };
 
-template <typename Type> struct IsIntegral : public FalseValue {};
-template <> struct IsIntegral<char> : public TrueValue {};
-template <> struct IsIntegral<signed char> : public TrueValue {};
-template <> struct IsIntegral<unsigned char> : public TrueValue {};
-template <> struct IsIntegral<short> : public TrueValue {};
-template <> struct IsIntegral<unsigned short> : public TrueValue {};
-template <> struct IsIntegral<int> : public TrueValue {};
-template <> struct IsIntegral<unsigned int> : public TrueValue {};
-template <> struct IsIntegral<long> : public TrueValue {};
-template <> struct IsIntegral<unsigned long> : public TrueValue {};
-template <> struct IsIntegral<long long> : public TrueValue {};
-template <> struct IsIntegral<unsigned long long> : public TrueValue {};
-template <> struct IsIntegral<bool> : public TrueValue {};
-
-template <typename T> struct IsPointerType : public FalseValue {};
-template <typename T> struct IsPointerType<T *> : public TrueValue {};
+template <typename T> struct TypeIdentity { typedef T Type; };
 
 template <typename T1, typename T2> struct IsSame : public FalseValue {};
 template <typename T> struct IsSame<T, T> : public TrueValue {};
-
-template <typename T> struct TypeIdentity { typedef T Type; };
+template <typename T1, typename T2>
+static constexpr bool IsSameV = IsSame<T1, T2>::Value;
 
 template <typename T> struct RemoveCV : public TypeIdentity<T> {};
 template <typename T> struct RemoveCV<const T> : public TypeIdentity<T> {};
@@ -56,10 +41,28 @@ struct RemoveCV<const volatile T> : public TypeIdentity<T> {};
 
 template <typename T> using RemoveCVType = typename RemoveCV<T>::Type;
 
+template <typename Type> struct IsIntegral {
+  using TypeNoCV = RemoveCVType<Type>;
+  static constexpr bool Value =
+      IsSameV<char, TypeNoCV> || IsSameV<signed char, TypeNoCV> ||
+      IsSameV<unsigned char, TypeNoCV> || IsSameV<short, TypeNoCV> ||
+      IsSameV<unsigned short, TypeNoCV> || IsSameV<int, TypeNoCV> ||
+      IsSameV<unsigned int, TypeNoCV> || IsSameV<long, TypeNoCV> ||
+      IsSameV<unsigned long, TypeNoCV> || IsSameV<long long, TypeNoCV> ||
+      IsSameV<unsigned long long, TypeNoCV> || IsSameV<bool, TypeNoCV>;
+};
+
+template <typename T> struct IsPointerTypeNoCV : public FalseValue {};
+template <typename T> struct IsPointerTypeNoCV<T *> : public TrueValue {};
+template <typename T> struct IsPointerType {
+  static constexpr bool Value = IsPointerTypeNoCV<RemoveCVType<T>>::Value;
+};
+
 template <typename Type> struct IsFloatingPointType {
-  static constexpr bool Value = IsSame<float, RemoveCVType<Type>>::Value ||
-                                IsSame<double, RemoveCVType<Type>>::Value ||
-                                IsSame<long double, RemoveCVType<Type>>::Value;
+  using TypeNoCV = RemoveCVType<Type>;
+  static constexpr bool Value = IsSame<float, TypeNoCV>::Value ||
+                                IsSame<double, TypeNoCV>::Value ||
+                                IsSame<long double, TypeNoCV>::Value;
 };
 
 } // namespace cpp

From fda901a987ddd7a59599c3dcaa6c9a18d959a9b3 Mon Sep 17 00:00:00 2001
From: Michael Spencer <bigcheesegs@gmail.com>
Date: Fri, 17 Jul 2020 13:32:12 -0600
Subject: [PATCH 668/771] [Clang] Fix building with Clang < 3.9.

This is a workaround for a bug in older versions of Clang when. The
constructor that is supposed to allow for Derived to Base conversion
does not work. Remove this if we drop support for such configurations.
---
 llvm/utils/TableGen/OptParserEmitter.cpp | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp
index 251533a8d1545..34699b55e274b 100644
--- a/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -110,7 +110,11 @@ class MarshallingFlagInfo final : public MarshallingKindInfo {
   static std::unique_ptr<MarshallingKindInfo> create(const Record &R) {
     std::unique_ptr<MarshallingFlagInfo> Ret(new MarshallingFlagInfo(R));
     Ret->IsPositive = R.getValueAsBit("IsPositive");
-    return Ret;
+    // FIXME: This is a workaround for a bug in older versions of libstdc++ when
+    //   compiled with Clang. The constructor that is supposed to allow for
+    //   Derived to Base conversion does not work. Remove this if we drop
+    //   support for such configurations.
+    return std::unique_ptr<MarshallingKindInfo>(Ret.release());
   }
 
 private:
@@ -204,7 +208,11 @@ struct SimpleEnumValueTable {
              "values");
     }
 
-    return Ret;
+    // FIXME: This is a workaround for a bug in older versions of libstdc++ when
+    //   compiled with Clang. The constructor that is supposed to allow for
+    //   Derived to Base conversion does not work. Remove this if we drop
+    //   support for such configurations.
+    return std::unique_ptr<MarshallingKindInfo>(Ret.release());
   }
 
 private:

From b30fa1c3dab70c171edfa14f9fdbaa70a30c2060 Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Fri, 17 Jul 2020 11:27:40 -0700
Subject: [PATCH 669/771] [flang] Add entry points for internal scalar
 list-directed I/O

BeginInternalListInput and BeginInternalListOutput were missing
from the I/O API implementation; add them.

Reviewed By: PeteSteinfeld

Differential Revision: https://reviews.llvm.org/D84066
---
 flang/runtime/io-api.cpp | 28 ++++++++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index 2163b3bca0d8d..0bd827bc53aa2 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -73,6 +73,32 @@ Cookie IONAME(BeginInternalArrayFormattedInput)(const Descriptor &descriptor,
       formatLength, scratchArea, scratchBytes, sourceFile, sourceLine);
 }
 
+template <Direction DIR>
+Cookie BeginInternalListIO(
+    std::conditional_t<DIR == Direction::Input, const char, char> *internal,
+    std::size_t internalLength, void ** /*scratchArea*/,
+    std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) {
+  Terminator oom{sourceFile, sourceLine};
+  return &New<InternalListIoStatementState<DIR>>{oom}(
+      internal, internalLength, sourceFile, sourceLine)
+              .release()
+              ->ioStatementState();
+}
+
+Cookie IONAME(BeginInternalListOutput)(char *internal,
+    std::size_t internalLength, void **scratchArea, std::size_t scratchBytes,
+    const char *sourceFile, int sourceLine) {
+  return BeginInternalListIO<Direction::Output>(internal, internalLength,
+      scratchArea, scratchBytes, sourceFile, sourceLine);
+}
+
+Cookie IONAME(BeginInternalListInput)(const char *internal,
+    std::size_t internalLength, void **scratchArea, std::size_t scratchBytes,
+    const char *sourceFile, int sourceLine) {
+  return BeginInternalListIO<Direction::Input>(internal, internalLength,
+      scratchArea, scratchBytes, sourceFile, sourceLine);
+}
+
 template <Direction DIR>
 Cookie BeginInternalFormattedIO(
     std::conditional_t<DIR == Direction::Input, const char, char> *internal,
@@ -90,7 +116,6 @@ Cookie IONAME(BeginInternalFormattedOutput)(char *internal,
     std::size_t internalLength, const char *format, std::size_t formatLength,
     void **scratchArea, std::size_t scratchBytes, const char *sourceFile,
     int sourceLine) {
-  Terminator oom{sourceFile, sourceLine};
   return BeginInternalFormattedIO<Direction::Output>(internal, internalLength,
       format, formatLength, scratchArea, scratchBytes, sourceFile, sourceLine);
 }
@@ -99,7 +124,6 @@ Cookie IONAME(BeginInternalFormattedInput)(const char *internal,
     std::size_t internalLength, const char *format, std::size_t formatLength,
     void **scratchArea, std::size_t scratchBytes, const char *sourceFile,
     int sourceLine) {
-  Terminator oom{sourceFile, sourceLine};
   return BeginInternalFormattedIO<Direction::Input>(internal, internalLength,
       format, formatLength, scratchArea, scratchBytes, sourceFile, sourceLine);
 }

From 66b66988e613a2349d06600e12601ecbe8032256 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Fri, 17 Jul 2020 22:28:36 +0300
Subject: [PATCH 670/771] [NFC][InstCombine] Add some tests with
 sdiv-by-negative-power-of-two

---
 ...f-non-negative-by-negative-power-of-two.ll | 47 +++++++++++++++++++
 1 file changed, 47 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll

diff --git a/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll
new file mode 100644
index 0000000000000..c10a98a41acba
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt %s -instcombine -S | FileCheck %s
+
+; Fold
+;   x s/ (-1 << y)
+; to
+;   -(x >> y)
+; iff x is known non-negative.
+
+declare void @llvm.assume(i1)
+
+define i8 @t0(i8 %x, i8 %y) {
+; CHECK-LABEL: @t0(
+; CHECK-NEXT:    [[X_IS_NONNEGATIVE:%.*]] = icmp sgt i8 [[X:%.*]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[X_IS_NONNEGATIVE]])
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i8 [[X]], -32
+; CHECK-NEXT:    ret i8 [[DIV]]
+;
+  %x_is_nonnegative = icmp sge i8 %x, 0
+  call void @llvm.assume(i1 %x_is_nonnegative)
+  %div = sdiv i8 %x, -32
+  ret i8 %div
+}
+define i8 @n1(i8 %x, i8 %y) {
+; CHECK-LABEL: @n1(
+; CHECK-NEXT:    [[X_IS_NONNEGATIVE:%.*]] = icmp sgt i8 [[X:%.*]], -2
+; CHECK-NEXT:    call void @llvm.assume(i1 [[X_IS_NONNEGATIVE]])
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i8 [[X]], -32
+; CHECK-NEXT:    ret i8 [[DIV]]
+;
+  %x_is_nonnegative = icmp sge i8 %x, -1 ; could be negative
+  call void @llvm.assume(i1 %x_is_nonnegative)
+  %div = sdiv i8 %x, -32
+  ret i8 %div
+}
+define i8 @n2(i8 %x, i8 %y) {
+; CHECK-LABEL: @n2(
+; CHECK-NEXT:    [[X_IS_NONNEGATIVE:%.*]] = icmp sgt i8 [[X:%.*]], -1
+; CHECK-NEXT:    call void @llvm.assume(i1 [[X_IS_NONNEGATIVE]])
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i8 [[X]], -31
+; CHECK-NEXT:    ret i8 [[DIV]]
+;
+  %x_is_nonnegative = icmp sge i8 %x, 0
+  call void @llvm.assume(i1 %x_is_nonnegative)
+  %div = sdiv i8 %x, -31 ; not a negative power of two
+  ret i8 %div
+}

From 0fdcca07ad2c0bdc2cdd40ba638109926f4f513b Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Fri, 17 Jul 2020 22:35:21 +0300
Subject: [PATCH 671/771] [InstCombine] Fold  X sdiv (-1 << C) -> -(X u>> Y) 
 iff X is non-negative

This is the one i'm seeing as missed optimization,
although there are likely other possibilities, as usual.

There are 4 variants of a general sdiv->udiv fold:
https://rise4fun.com/Alive/VS6

Name: v0
Pre: C0 >= 0 && C1 >= 0
%r = sdiv i8 C0, C1
  =>
%r = udiv i8 C0, C1

Name: v1
Pre: C0 <= 0 && C1 >= 0
%r = sdiv i8 C0, C1
  =>
%t0 = udiv i8 -C0, C1
%r = sub i8 0, %t0

Name: v2
Pre: C0 >= 0 && C1 <= 0
%r = sdiv i8 C0, C1
  =>
%t0 = udiv i8 C0, -C1
%r = sub i8 0, %t0

Name: v3
Pre: C0 <= 0 && C1 <= 0
%r = sdiv i8 C0, C1
  =>
%r = udiv i8 -C0, -C1


If we really don't like sdiv (more than udiv that is),
and are okay with increasing instruction count (2 new negations),
and we ensure that we don't undo the fold,
then we could just implement these..
---
 llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp   | 7 +++++++
 .../sdiv-of-non-negative-by-negative-power-of-two.ll       | 3 ++-
 2 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index c6233a68847dd..f039989c004ce 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -1182,6 +1182,13 @@ Instruction *InstCombiner::visitSDiv(BinaryOperator &I) {
       return BO;
     }
 
+    if (match(Op1, m_NegatedPower2())) {
+      // X sdiv (-(1 << C)) -> -(X sdiv (1 << C)) ->
+      //                    -> -(X udiv (1 << C)) -> -(X u>> C)
+      return BinaryOperator::CreateNeg(Builder.Insert(foldUDivPow2Cst(
+          Op0, ConstantExpr::getNeg(cast<Constant>(Op1)), I, *this)));
+    }
+
     if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/ true, 0, &I)) {
       // X sdiv (1 << Y) -> X udiv (1 << Y) ( -> X u>> Y)
       // Safe because the only negative value (1 << Y) can take on is
diff --git a/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll b/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll
index c10a98a41acba..f9dd32bfc612d 100644
--- a/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll
+++ b/llvm/test/Transforms/InstCombine/sdiv-of-non-negative-by-negative-power-of-two.ll
@@ -13,7 +13,8 @@ define i8 @t0(i8 %x, i8 %y) {
 ; CHECK-LABEL: @t0(
 ; CHECK-NEXT:    [[X_IS_NONNEGATIVE:%.*]] = icmp sgt i8 [[X:%.*]], -1
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[X_IS_NONNEGATIVE]])
-; CHECK-NEXT:    [[DIV:%.*]] = sdiv i8 [[X]], -32
+; CHECK-NEXT:    [[TMP1:%.*]] = lshr i8 [[X]], 5
+; CHECK-NEXT:    [[DIV:%.*]] = sub nsw i8 0, [[TMP1]]
 ; CHECK-NEXT:    ret i8 [[DIV]]
 ;
   %x_is_nonnegative = icmp sge i8 %x, 0

From d8e0baf29daa58233c349d94ca4617065efc4c95 Mon Sep 17 00:00:00 2001
From: Xinan Jiang <xinan.jxn@gmail.com>
Date: Fri, 17 Jul 2020 20:57:08 +0100
Subject: [PATCH 672/771] [InstCombine] Fix typo in comment.

Reviewers: fhahn

Reviewed By: fhahn

Differential Revision: https://reviews.llvm.org/D83951
---
 .../shift-amount-reassociation-with-truncation-shl.ll           | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
index 7ae9b76fa5fee..0808b9a3763ef 100644
--- a/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
+++ b/llvm/test/Transforms/InstCombine/shift-amount-reassociation-with-truncation-shl.ll
@@ -194,7 +194,7 @@ define i16 @n11(i32 %x, i16 %y) {
   ret i16 %t5
 }
 
-; Bit width mismatch of shit amount
+; Bit width mismatch of shift amount
 
 @Y32 = global i32 42
 @Y16 = global i16 42

From 502f0cc0e3889229e923e187f38dda91324ae139 Mon Sep 17 00:00:00 2001
From: Mitch Phillips <31459023+hctim@users.noreply.github.com>
Date: Fri, 17 Jul 2020 12:59:09 -0700
Subject: [PATCH 673/771] [GWP-ASan] Split the unwinder into segv/non-segv.

Summary:
Splits the unwinder into a non-segv (for allocation/deallocation traces) and a
segv unwinder. This ensures that implementations can select an accurate, slower
unwinder in the segv handler (if they choose to use the GWP-ASan provided one).
This is important as fast frame-pointer unwinders (like the sanitizer unwinder)
don't like unwinding through signal handlers.

Reviewers: morehouse, cryptoad

Reviewed By: morehouse, cryptoad

Subscribers: cryptoad, mgorny, eugenis, pcc, #sanitizers

Tags: #sanitizers

Differential Revision: https://reviews.llvm.org/D83994
---
 .../optional/backtrace_linux_libc.cpp         | 12 ++++++
 .../optional/backtrace_sanitizer_common.cpp   | 37 +++++++++++++++----
 .../lib/gwp_asan/optional/segv_handler.h      | 15 ++++++--
 .../gwp_asan/optional/segv_handler_posix.cpp  | 17 +++++----
 compiler-rt/lib/gwp_asan/tests/CMakeLists.txt |  3 +-
 compiler-rt/lib/gwp_asan/tests/harness.h      |  3 +-
 compiler-rt/lib/scudo/scudo_allocator.cpp     |  4 +-
 compiler-rt/lib/scudo/standalone/combined.h   |  2 +-
 compiler-rt/test/gwp_asan/backtrace.c         | 29 +++++++++++++++
 9 files changed, 99 insertions(+), 23 deletions(-)
 create mode 100644 compiler-rt/test/gwp_asan/backtrace.c

diff --git a/compiler-rt/lib/gwp_asan/optional/backtrace_linux_libc.cpp b/compiler-rt/lib/gwp_asan/optional/backtrace_linux_libc.cpp
index bb0aad224a14c..92eb293dab499 100644
--- a/compiler-rt/lib/gwp_asan/optional/backtrace_linux_libc.cpp
+++ b/compiler-rt/lib/gwp_asan/optional/backtrace_linux_libc.cpp
@@ -23,6 +23,14 @@ size_t Backtrace(uintptr_t *TraceBuffer, size_t Size) {
   return backtrace(reinterpret_cast<void **>(TraceBuffer), Size);
 }
 
+// We don't need any custom handling for the Segv backtrace - the libc unwinder
+// has no problems with unwinding through a signal handler. Force inlining here
+// to avoid the additional frame.
+GWP_ASAN_ALWAYS_INLINE size_t SegvBacktrace(uintptr_t *TraceBuffer, size_t Size,
+                                            void * /*Context*/) {
+  return Backtrace(TraceBuffer, Size);
+}
+
 static void PrintBacktrace(uintptr_t *Trace, size_t TraceLength,
                            gwp_asan::crash_handler::Printf_t Printf) {
   if (TraceLength == 0) {
@@ -53,4 +61,8 @@ crash_handler::PrintBacktrace_t getPrintBacktraceFunction() {
   return PrintBacktrace;
 }
 } // namespace options
+
+namespace crash_handler {
+SegvBacktrace_t getSegvBacktraceFunction() { return SegvBacktrace; }
+} // namespace crash_handler
 } // namespace gwp_asan
diff --git a/compiler-rt/lib/gwp_asan/optional/backtrace_sanitizer_common.cpp b/compiler-rt/lib/gwp_asan/optional/backtrace_sanitizer_common.cpp
index 3ac4b52bfc271..a8083e4e64cb3 100644
--- a/compiler-rt/lib/gwp_asan/optional/backtrace_sanitizer_common.cpp
+++ b/compiler-rt/lib/gwp_asan/optional/backtrace_sanitizer_common.cpp
@@ -22,28 +22,45 @@ void __sanitizer::BufferedStackTrace::UnwindImpl(uptr pc, uptr bp,
                                                  void *context,
                                                  bool request_fast,
                                                  u32 max_depth) {
-  if (!StackTrace::WillUseFastUnwind(request_fast)) {
-    return Unwind(max_depth, pc, bp, context, 0, 0, request_fast);
-  }
-  Unwind(max_depth, pc, 0, context, 0, 0, false);
+  if (!StackTrace::WillUseFastUnwind(request_fast))
+    return Unwind(max_depth, pc, 0, context, 0, 0, false);
+
+  uptr top = 0;
+  uptr bottom = 0;
+  GetThreadStackTopAndBottom(/*at_initialization*/ false, &top, &bottom);
+
+  return Unwind(max_depth, pc, bp, context, top, bottom, request_fast);
 }
 
 namespace {
-size_t Backtrace(uintptr_t *TraceBuffer, size_t Size) {
+size_t BacktraceCommon(uintptr_t *TraceBuffer, size_t Size, void *Context) {
+  // Use the slow sanitizer unwinder in the segv handler. Fast frame pointer
+  // unwinders can end up dropping frames because the kernel sigreturn() frame's
+  // return address is the return address at time of fault. This has the result
+  // of never actually capturing the PC where the signal was raised.
+  bool UseFastUnwind = (Context == nullptr);
+
   __sanitizer::BufferedStackTrace Trace;
   Trace.Reset();
   if (Size > __sanitizer::kStackTraceMax)
     Size = __sanitizer::kStackTraceMax;
 
   Trace.Unwind((__sanitizer::uptr)__builtin_return_address(0),
-               (__sanitizer::uptr)__builtin_frame_address(0),
-               /* ucontext */ nullptr,
-               /* fast unwind */ true, Size - 1);
+               (__sanitizer::uptr)__builtin_frame_address(0), Context,
+               UseFastUnwind, Size - 1);
 
   memcpy(TraceBuffer, Trace.trace, Trace.size * sizeof(uintptr_t));
   return Trace.size;
 }
 
+size_t Backtrace(uintptr_t *TraceBuffer, size_t Size) {
+  return BacktraceCommon(TraceBuffer, Size, nullptr);
+}
+
+size_t SegvBacktrace(uintptr_t *TraceBuffer, size_t Size, void *Context) {
+  return BacktraceCommon(TraceBuffer, Size, Context);
+}
+
 static void PrintBacktrace(uintptr_t *Trace, size_t TraceLength,
                            gwp_asan::crash_handler::Printf_t Printf) {
   __sanitizer::StackTrace StackTrace;
@@ -77,4 +94,8 @@ crash_handler::PrintBacktrace_t getPrintBacktraceFunction() {
   return PrintBacktrace;
 }
 } // namespace options
+
+namespace crash_handler {
+SegvBacktrace_t getSegvBacktraceFunction() { return SegvBacktrace; }
+} // namespace crash_handler
 } // namespace gwp_asan
diff --git a/compiler-rt/lib/gwp_asan/optional/segv_handler.h b/compiler-rt/lib/gwp_asan/optional/segv_handler.h
index 10af15055e2a8..0fed4f2e012e9 100644
--- a/compiler-rt/lib/gwp_asan/optional/segv_handler.h
+++ b/compiler-rt/lib/gwp_asan/optional/segv_handler.h
@@ -59,6 +59,15 @@ typedef void (*PrintBacktrace_t)(uintptr_t *TraceBuffer, size_t TraceLength,
 // without any symbolization.
 PrintBacktrace_t getBasicPrintBacktraceFunction();
 
+// Returns a function pointer to a backtrace function that's suitable for
+// unwinding through a signal handler. This is important primarily for frame-
+// pointer based unwinders, DWARF or other unwinders can simply provide the
+// normal backtrace function as the implementation here. On POSIX, SignalContext
+// should be the `ucontext_t` from the signal handler.
+typedef size_t (*SegvBacktrace_t)(uintptr_t *TraceBuffer, size_t Size,
+                                  void *SignalContext);
+SegvBacktrace_t getSegvBacktraceFunction();
+
 // Install the SIGSEGV crash handler for printing use-after-free and heap-
 // buffer-{under|over}flow exceptions if the user asked for it. This is platform
 // specific as even though POSIX and Windows both support registering handlers
@@ -67,14 +76,14 @@ PrintBacktrace_t getBasicPrintBacktraceFunction();
 // before this function.
 void installSignalHandlers(gwp_asan::GuardedPoolAllocator *GPA, Printf_t Printf,
                            PrintBacktrace_t PrintBacktrace,
-                           options::Backtrace_t Backtrace);
+                           SegvBacktrace_t SegvBacktrace);
 
 void uninstallSignalHandlers();
 
 void dumpReport(uintptr_t ErrorPtr, const gwp_asan::AllocatorState *State,
                 const gwp_asan::AllocationMetadata *Metadata,
-                options::Backtrace_t Backtrace, Printf_t Printf,
-                PrintBacktrace_t PrintBacktrace);
+                SegvBacktrace_t SegvBacktrace, Printf_t Printf,
+                PrintBacktrace_t PrintBacktrace, void *Context);
 } // namespace crash_handler
 } // namespace gwp_asan
 
diff --git a/compiler-rt/lib/gwp_asan/optional/segv_handler_posix.cpp b/compiler-rt/lib/gwp_asan/optional/segv_handler_posix.cpp
index 22589b893604f..1bd7a606c2136 100644
--- a/compiler-rt/lib/gwp_asan/optional/segv_handler_posix.cpp
+++ b/compiler-rt/lib/gwp_asan/optional/segv_handler_posix.cpp
@@ -23,14 +23,14 @@ using gwp_asan::Error;
 using gwp_asan::GuardedPoolAllocator;
 using gwp_asan::crash_handler::PrintBacktrace_t;
 using gwp_asan::crash_handler::Printf_t;
-using gwp_asan::options::Backtrace_t;
+using gwp_asan::crash_handler::SegvBacktrace_t;
 
 struct sigaction PreviousHandler;
 bool SignalHandlerInstalled;
 gwp_asan::GuardedPoolAllocator *GPAForSignalHandler;
 Printf_t PrintfForSignalHandler;
 PrintBacktrace_t PrintBacktraceForSignalHandler;
-Backtrace_t BacktraceForSignalHandler;
+SegvBacktrace_t BacktraceForSignalHandler;
 
 static void sigSegvHandler(int sig, siginfo_t *info, void *ucontext) {
   if (GPAForSignalHandler) {
@@ -40,7 +40,7 @@ static void sigSegvHandler(int sig, siginfo_t *info, void *ucontext) {
         reinterpret_cast<uintptr_t>(info->si_addr),
         GPAForSignalHandler->getAllocatorState(),
         GPAForSignalHandler->getMetadataRegion(), BacktraceForSignalHandler,
-        PrintfForSignalHandler, PrintBacktraceForSignalHandler);
+        PrintfForSignalHandler, PrintBacktraceForSignalHandler, ucontext);
   }
 
   // Process any previous handlers.
@@ -138,11 +138,11 @@ PrintBacktrace_t getBasicPrintBacktraceFunction() {
 
 void installSignalHandlers(gwp_asan::GuardedPoolAllocator *GPA, Printf_t Printf,
                            PrintBacktrace_t PrintBacktrace,
-                           options::Backtrace_t Backtrace) {
+                           SegvBacktrace_t SegvBacktrace) {
   GPAForSignalHandler = GPA;
   PrintfForSignalHandler = Printf;
   PrintBacktraceForSignalHandler = PrintBacktrace;
-  BacktraceForSignalHandler = Backtrace;
+  BacktraceForSignalHandler = SegvBacktrace;
 
   struct sigaction Action;
   Action.sa_sigaction = sigSegvHandler;
@@ -160,8 +160,8 @@ void uninstallSignalHandlers() {
 
 void dumpReport(uintptr_t ErrorPtr, const gwp_asan::AllocatorState *State,
                 const gwp_asan::AllocationMetadata *Metadata,
-                options::Backtrace_t Backtrace, Printf_t Printf,
-                PrintBacktrace_t PrintBacktrace) {
+                SegvBacktrace_t SegvBacktrace, Printf_t Printf,
+                PrintBacktrace_t PrintBacktrace, void *Context) {
   assert(State && "dumpReport missing Allocator State.");
   assert(Metadata && "dumpReport missing Metadata.");
   assert(Printf && "dumpReport missing Printf.");
@@ -194,7 +194,8 @@ void dumpReport(uintptr_t ErrorPtr, const gwp_asan::AllocatorState *State,
   // Print the fault backtrace.
   static constexpr unsigned kMaximumStackFramesForCrashTrace = 512;
   uintptr_t Trace[kMaximumStackFramesForCrashTrace];
-  size_t TraceLength = Backtrace(Trace, kMaximumStackFramesForCrashTrace);
+  size_t TraceLength =
+      SegvBacktrace(Trace, kMaximumStackFramesForCrashTrace, Context);
 
   PrintBacktrace(Trace, TraceLength, Printf);
 
diff --git a/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt b/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
index feac23df9fe5d..f88d90c19d5b9 100644
--- a/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
+++ b/compiler-rt/lib/gwp_asan/tests/CMakeLists.txt
@@ -5,7 +5,8 @@ set(GWP_ASAN_UNITTEST_CFLAGS
   ${COMPILER_RT_GTEST_CFLAGS}
   -I${COMPILER_RT_SOURCE_DIR}/lib/
   -O2
-  -g)
+  -g
+  -fno-omit-frame-pointer)
 
 file(GLOB GWP_ASAN_HEADERS ../*.h)
 set(GWP_ASAN_UNITTESTS
diff --git a/compiler-rt/lib/gwp_asan/tests/harness.h b/compiler-rt/lib/gwp_asan/tests/harness.h
index e47254e13c467..d303b2cfa6470 100644
--- a/compiler-rt/lib/gwp_asan/tests/harness.h
+++ b/compiler-rt/lib/gwp_asan/tests/harness.h
@@ -86,7 +86,8 @@ class BacktraceGuardedPoolAllocator : public ::testing::Test {
 
     gwp_asan::crash_handler::installSignalHandlers(
         &GPA, gwp_asan::test::getPrintfFunction(),
-        gwp_asan::options::getPrintBacktraceFunction(), Opts.Backtrace);
+        gwp_asan::options::getPrintBacktraceFunction(),
+        gwp_asan::crash_handler::getSegvBacktraceFunction());
   }
 
   void TearDown() override {
diff --git a/compiler-rt/lib/scudo/scudo_allocator.cpp b/compiler-rt/lib/scudo/scudo_allocator.cpp
index d9023c2f7ab64..343f85a4ef88b 100644
--- a/compiler-rt/lib/scudo/scudo_allocator.cpp
+++ b/compiler-rt/lib/scudo/scudo_allocator.cpp
@@ -29,6 +29,7 @@
 # include "gwp_asan/guarded_pool_allocator.h"
 # include "gwp_asan/optional/backtrace.h"
 # include "gwp_asan/optional/options_parser.h"
+#include "gwp_asan/optional/segv_handler.h"
 #endif // GWP_ASAN_HOOKS
 
 #include <errno.h>
@@ -679,7 +680,8 @@ void initScudo() {
   if (Opts.InstallSignalHandlers)
     gwp_asan::crash_handler::installSignalHandlers(
         &GuardedAlloc, __sanitizer::Printf,
-        gwp_asan::options::getPrintBacktraceFunction(), Opts.Backtrace);
+        gwp_asan::options::getPrintBacktraceFunction(),
+        gwp_asan::crash_handler::getSegvBacktraceFunction());
 #endif // GWP_ASAN_HOOKS
 }
 
diff --git a/compiler-rt/lib/scudo/standalone/combined.h b/compiler-rt/lib/scudo/standalone/combined.h
index 3bb41eca88f72..ae085befc4f15 100644
--- a/compiler-rt/lib/scudo/standalone/combined.h
+++ b/compiler-rt/lib/scudo/standalone/combined.h
@@ -192,7 +192,7 @@ class Allocator {
     if (Opt.InstallSignalHandlers)
       gwp_asan::crash_handler::installSignalHandlers(
           &GuardedAlloc, Printf, gwp_asan::options::getPrintBacktraceFunction(),
-          Opt.Backtrace);
+          gwp_asan::crash_handler::getSegvBacktraceFunction());
 #endif // GWP_ASAN_HOOKS
   }
 
diff --git a/compiler-rt/test/gwp_asan/backtrace.c b/compiler-rt/test/gwp_asan/backtrace.c
new file mode 100644
index 0000000000000..0ba32f85cbf97
--- /dev/null
+++ b/compiler-rt/test/gwp_asan/backtrace.c
@@ -0,0 +1,29 @@
+// REQUIRES: gwp_asan
+// RUN: %clang_gwp_asan %s -g -o %t
+// RUN: %expect_crash %t 2>&1 | FileCheck %s
+
+#include <stdlib.h>
+
+__attribute__((noinline)) void *allocate_mem() { return malloc(1); }
+
+__attribute__((noinline)) void free_mem(void *ptr) { free(ptr); }
+
+__attribute__((noinline)) void touch_mem(void *ptr) {
+  volatile char sink = *((volatile char *)ptr);
+}
+
+// CHECK: Use After Free
+// CHECK: touch_mem
+// CHECK: was deallocated
+// CHECK: free_mem
+// CHECK: was allocated
+// CHECK: allocate_mem
+
+int main() {
+  for (unsigned i = 0; i < 0x10000; ++i) {
+    void *ptr = allocate_mem();
+    free_mem(ptr);
+    touch_mem(ptr);
+  }
+  return 0;
+}

From ec6ada62643cf7cded8160e04cce163323112ade Mon Sep 17 00:00:00 2001
From: Xiangling Liao <Xiangling.Liao@ibm.com>
Date: Thu, 16 Jul 2020 14:29:13 -0400
Subject: [PATCH 674/771] [AIX] report_fatal_error on
 `-fregister_global_dtors_with_atexit` for static init

On AIX, the semantic of global_dtors contains __sterm functions associated with C++
cleanup actions and user-declared __attribute__((destructor)) functions. We should
never merely register __sterm with atexit(), so currently
-fregister_global_dtors_with_atexit does not work well on AIX: It would cause
finalization actions to not occur when unloading shared libraries.  We need to figure
out a way to handle that when we start supporting user-declared
__attribute__((destructor)) functions.

Currently we report_fatal_error on this option temporarily.

Differential Revision: https://reviews.llvm.org/D83974
---
 clang/lib/CodeGen/CodeGenModule.cpp                |  3 +++
 ...aix-sinit-register-global-dtors-with-atexit.cpp | 14 ++++++++++++++
 clang/test/Driver/cxa-atexit.cpp                   | 14 ++++++++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 clang/test/CodeGenCXX/aix-sinit-register-global-dtors-with-atexit.cpp

diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp
index 4ae8ce7e5ccf1..4c792520b5f37 100644
--- a/clang/lib/CodeGen/CodeGenModule.cpp
+++ b/clang/lib/CodeGen/CodeGenModule.cpp
@@ -1209,6 +1209,9 @@ void CodeGenModule::AddGlobalCtor(llvm::Function *Ctor, int Priority,
 /// when the module is unloaded.
 void CodeGenModule::AddGlobalDtor(llvm::Function *Dtor, int Priority) {
   if (CodeGenOpts.RegisterGlobalDtorsWithAtExit) {
+    if (getCXXABI().useSinitAndSterm())
+      llvm::report_fatal_error(
+          "register global dtors with atexit() is not supported yet");
     DtorsUsingAtExit[Priority].push_back(Dtor);
     return;
   }
diff --git a/clang/test/CodeGenCXX/aix-sinit-register-global-dtors-with-atexit.cpp b/clang/test/CodeGenCXX/aix-sinit-register-global-dtors-with-atexit.cpp
new file mode 100644
index 0000000000000..4cec83d461ade
--- /dev/null
+++ b/clang/test/CodeGenCXX/aix-sinit-register-global-dtors-with-atexit.cpp
@@ -0,0 +1,14 @@
+// RUN: not %clang_cc1 -triple powerpc-ibm-aix-xcoff -S -emit-llvm -x c++ \
+// RUN:     -fregister-global-dtors-with-atexit < %s 2>&1 | \
+// RUN:   FileCheck %s
+
+// RUN: not %clang_cc1 -triple powerpc64-ibm-aix-xcoff -S -emit-llvm -x c++ \
+// RUN:     -fregister-global-dtors-with-atexit < %s 2>&1 | \
+// RUN:   FileCheck %s
+
+struct T {
+  T();
+  ~T();
+} t;
+
+// CHECK: error in backend: register global dtors with atexit() is not supported yet
diff --git a/clang/test/Driver/cxa-atexit.cpp b/clang/test/Driver/cxa-atexit.cpp
index e81af6cd5963d..537a11a35f51b 100644
--- a/clang/test/Driver/cxa-atexit.cpp
+++ b/clang/test/Driver/cxa-atexit.cpp
@@ -36,6 +36,7 @@
 // RUN: FileCheck --check-prefix=WITHATEXIT %s
 // RUN: %clang -target x86_64-apple-darwin -c -mkernel -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
+
 // RUN: %clang -target x86_64-pc-linux-gnu -fregister-global-dtors-with-atexit -fno-register-global-dtors-with-atexit -c -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
 // RUN: %clang -target x86_64-pc-linux-gnu -fno-register-global-dtors-with-atexit -fregister-global-dtors-with-atexit -c -### %s 2>&1 | \
@@ -43,5 +44,18 @@
 // RUN: %clang -target x86_64-pc-linux-gnu -c -### %s 2>&1 | \
 // RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
 
+// RUN: %clang -target powerpc-ibm-aix-xcoff -fregister-global-dtors-with-atexit -fno-register-global-dtors-with-atexit -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
+// RUN: %clang -target powerpc-ibm-aix-xcoff -fno-register-global-dtors-with-atexit -fregister-global-dtors-with-atexit -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHATEXIT %s
+// RUN: %clang -target powerpc-ibm-aix-xcoff -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
+// RUN: %clang -target powerpc64-ibm-aix-xcoff -fregister-global-dtors-with-atexit -fno-register-global-dtors-with-atexit -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
+// RUN: %clang -target powerpc64-ibm-aix-xcoff -fno-register-global-dtors-with-atexit -fregister-global-dtors-with-atexit -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHATEXIT %s
+// RUN: %clang -target powerpc64-ibm-aix-xcoff -c -### %s 2>&1 | \
+// RUN: FileCheck --check-prefix=WITHOUTATEXIT %s
+
 // WITHATEXIT: -fregister-global-dtors-with-atexit
 // WITHOUTATEXIT-NOT: -fregister-global-dtors-with-atexit

From 029946b112684c27b27f7c2d7554f22b33ae1e0b Mon Sep 17 00:00:00 2001
From: Wenlei He <aktoon@gmail.com>
Date: Mon, 13 Jul 2020 22:12:28 -0700
Subject: [PATCH 675/771] [InlineAdvisor] New inliner advisor to replay
 inlining from optimization remarks

Summary:
This change added a new inline advisor that takes optimization remarks for previous inlining as input, and provide the decision as advice so current inlining can replay inline decision of a different compilation. Dwarf inline stack with line and discriminator is used as anchor for call sites. The change can be useful for Inliner tuning.
A switch -sample-profile-inline-replay=<inline_remarks_file> is added to hook up the new inliner advisor with SampleProfileLoader's inline decision for replay. The new inline advisor can also be used by regular CGSCC inliner later if needed.

Reviewers: davidxl, mtrofin, wmi, hoy

Subscribers: aprantl, hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D83743
---
 llvm/include/llvm/Analysis/InlineAdvisor.h    |   3 +
 .../llvm/Analysis/ReplayInlineAdvisor.h       |  37 ++++++
 llvm/lib/Analysis/CMakeLists.txt              |   1 +
 llvm/lib/Analysis/InlineAdvisor.cpp           |  25 ++++
 llvm/lib/Analysis/ReplayInlineAdvisor.cpp     |  61 +++++++++
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |  37 +++++-
 .../SampleProfile/Inputs/inline-replay.txt    |   2 +
 .../Transforms/SampleProfile/inline-replay.ll | 122 ++++++++++++++++++
 8 files changed, 284 insertions(+), 4 deletions(-)
 create mode 100644 llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
 create mode 100644 llvm/lib/Analysis/ReplayInlineAdvisor.cpp
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
 create mode 100644 llvm/test/Transforms/SampleProfile/inline-replay.ll

diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 3480d93385a8e..a0ff09679dfed 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -226,6 +226,9 @@ void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
                      bool ForProfileContext = false,
                      const char *PassName = nullptr);
 
+/// get call site location as string
+StringRef getCallSiteLocation(DebugLoc DLoc);
+
 /// Add location info to ORE message.
 void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
 
diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
new file mode 100644
index 0000000000000..e312d59a9f87b
--- /dev/null
+++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
@@ -0,0 +1,37 @@
+//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_REPLAYINLINEADVISOR_H_
+#define LLVM_REPLAYINLINEADVISOR_H_
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace llvm {
+class BasicBlock;
+class CallBase;
+class Function;
+class Module;
+class OptimizationRemarkEmitter;
+
+/// Replay inline advisor that uses optimization remarks from inlining of
+/// previous build to guide current inlining. This is useful for inliner tuning.
+class ReplayInlineAdvisor : public InlineAdvisor {
+public:
+  ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context,
+                      StringRef RemarksFile);
+  std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
+  bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
+
+private:
+  StringSet<> InlineSitesFromRemarks;
+  bool HasReplayRemarks = false;
+};
+} // namespace llvm
+#endif // LLVM_REPLAYINLINEADVISOR_H_
\ No newline at end of file
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 703623396d96a..8f10bac588e52 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -117,6 +117,7 @@ add_llvm_component_library(LLVMAnalysis
   RegionInfo.cpp
   RegionPass.cpp
   RegionPrinter.cpp
+  ReplayInlineAdvisor.cpp
   ScalarEvolution.cpp
   ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionDivision.cpp
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index e18f681278d3a..fedc5282ee646 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -365,6 +365,31 @@ llvm::shouldInline(CallBase &CB,
   return IC;
 }
 
+StringRef llvm::getCallSiteLocation(DebugLoc DLoc) {
+  std::ostringstream CallSiteLoc;
+  bool First = true;
+  for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) {
+    if (!First)
+      CallSiteLoc << " @ ";
+    // Note that negative line offset is actually possible, but we use
+    // unsigned int to match line offset representation in remarks so
+    // it's directly consumable by relay advisor.
+    uint32_t Offset =
+        DIL->getLine() - DIL->getScope()->getSubprogram()->getLine();
+    uint32_t Discriminator = DIL->getBaseDiscriminator();
+    StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
+    if (Name.empty())
+      Name = DIL->getScope()->getSubprogram()->getName();
+    CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset);
+    if (Discriminator) {
+      CallSiteLoc << "." << llvm::utostr(Discriminator);
+    }
+    First = false;
+  }
+
+  return CallSiteLoc.str();
+}
+
 void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
   if (!DLoc.get())
     return;
diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
new file mode 100644
index 0000000000000..c12b58021a606
--- /dev/null
+++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
@@ -0,0 +1,61 @@
+//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ReplayInlineAdvisor that replays inline decision based
+// on previous inline remarks from optimization remark log.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/LineIterator.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inline-replay"
+
+ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM,
+                                         LLVMContext &Context,
+                                         StringRef RemarksFile)
+    : InlineAdvisor(FAM), HasReplayRemarks(false) {
+  auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
+  std::error_code EC = BufferOrErr.getError();
+  if (EC) {
+    Context.emitError("Could not open remarks file: " + EC.message());
+    return;
+  }
+
+  // Example for inline remarks to parse:
+  //   _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1
+  // We use the callsite string after `at callsite` to replay inlining.
+  line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
+  for (; !LineIt.is_at_eof(); ++LineIt) {
+    StringRef Line = *LineIt;
+    auto Pair = Line.split(" at callsite ");
+    if (Pair.second.empty())
+      continue;
+    InlineSitesFromRemarks.insert(Pair.second);
+  }
+  HasReplayRemarks = true;
+}
+
+std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdvice(CallBase &CB) {
+  assert(HasReplayRemarks);
+
+  Function &Caller = *CB.getCaller();
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
+
+  if (InlineSitesFromRemarks.empty())
+    return std::make_unique<InlineAdvice>(this, CB, ORE, false);
+
+  StringRef CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
+  bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc) > 0;
+  return std::make_unique<InlineAdvice>(this, CB, ORE, InlineRecommended);
+}
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index b6871e260532d..7b5fc030cf88c 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
@@ -170,6 +171,13 @@ static cl::opt<int> SampleColdCallSiteThreshold(
     "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
     cl::desc("Threshold for inlining cold callsites"));
 
+static cl::opt<std::string> ProfileInlineReplayFile(
+    "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
+    cl::desc(
+        "Optimization remarks file containing inline remarks to be replayed "
+        "by inlining from sample profile loader."),
+    cl::Hidden);
+
 namespace {
 
 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -319,7 +327,7 @@ class SampleProfileLoader {
         RemappingFilename(std::string(RemapName)),
         IsThinLTOPreLink(IsThinLTOPreLink) {}
 
-  bool doInitialization(Module &M);
+  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM,
                    ProfileSummaryInfo *_PSI, CallGraph *CG);
 
@@ -473,6 +481,9 @@ class SampleProfileLoader {
   // overriden by -profile-sample-accurate or profile-sample-accurate
   // attribute.
   bool ProfAccForSymsInList;
+
+  // External inline advisor used to replay inline decision from remarks.
+  std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
 };
 
 class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -898,6 +909,16 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
 }
 
 bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
+  if (ExternalInlineAdvisor) {
+    auto Advice = ExternalInlineAdvisor->getAdvice(CB);
+    if (!Advice->isInliningRecommended()) {
+      Advice->recordUnattemptedInlining();
+      return false;
+    }
+    // Dummy record, we don't use it for replay.
+    Advice->recordInlining();
+  }
+
   Function *CalledFunction = CB.getCalledFunction();
   assert(CalledFunction);
   DebugLoc DLoc = CB.getDebugLoc();
@@ -1005,7 +1026,7 @@ bool SampleProfileLoader::inlineHotFunctions(
           }
         }
       }
-      if (Hot) {
+      if (Hot || ExternalInlineAdvisor) {
         CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
         emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
       } else {
@@ -1818,7 +1839,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
   return FunctionOrderList;
 }
 
-bool SampleProfileLoader::doInitialization(Module &M) {
+bool SampleProfileLoader::doInitialization(Module &M,
+                                           FunctionAnalysisManager *FAM) {
   auto &Ctx = M.getContext();
 
   std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
@@ -1843,6 +1865,13 @@ bool SampleProfileLoader::doInitialization(Module &M) {
       NamesInProfile.insert(NameTable->begin(), NameTable->end());
   }
 
+  if (FAM && !ProfileInlineReplayFile.empty()) {
+    ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
+        *FAM, Ctx, ProfileInlineReplayFile);
+    if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
+      ExternalInlineAdvisor.reset();
+  }
+
   return true;
 }
 
@@ -1995,7 +2024,7 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
                                        : ProfileRemappingFileName,
       IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
 
-  if (!SampleLoader.doInitialization(M))
+  if (!SampleLoader.doInitialization(M, &FAM))
     return PreservedAnalyses::all();
 
   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
new file mode 100644
index 0000000000000..6842845d56554
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
@@ -0,0 +1,2 @@
+remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3.1
+remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1 @ main:3.1
diff --git a/llvm/test/Transforms/SampleProfile/inline-replay.ll b/llvm/test/Transforms/SampleProfile/inline-replay.ll
new file mode 100644
index 0000000000000..ecf6f51850f26
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-replay.ll
@@ -0,0 +1,122 @@
+;; Note that this needs new pass manager for now. Passing `-sample-profile-inline-replay` to legacy pass manager is a no-op.
+
+;; Check baseline inline decisions
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s
+
+;; Check replay inline decisions
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-inline-replay=%S/Inputs/inline-replay.txt -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s
+
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4, !dbg !8
+  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
+  %add = add nsw i32 %tmp, %tmp1, !dbg !8
+  %tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
+  %tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
+  %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
+  ret i32 %add, !dbg !8
+}
+
+define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4, !dbg !10
+  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
+  %add = sub nsw i32 %tmp, %tmp1, !dbg !10
+  ret i32 %add, !dbg !11
+}
+
+define i32 @main() #0 !dbg !12 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !13
+  br label %while.cond, !dbg !14
+
+while.cond:                                       ; preds = %if.end, %entry
+  %tmp = load i32, i32* %i, align 4, !dbg !15
+  %inc = add nsw i32 %tmp, 1, !dbg !15
+  store i32 %inc, i32* %i, align 4, !dbg !15
+  %cmp = icmp slt i32 %tmp, 400000000, !dbg !15
+  br i1 %cmp, label %while.body, label %while.end, !dbg !15
+
+while.body:                                       ; preds = %while.cond
+  %tmp1 = load i32, i32* %i, align 4, !dbg !17
+  %cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !17
+
+if.then:                                          ; preds = %while.body
+  %tmp2 = load i32, i32* %i, align 4, !dbg !19
+  %tmp3 = load i32, i32* %s, align 4, !dbg !19
+  %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
+  store i32 %call, i32* %s, align 4, !dbg !19
+  br label %if.end, !dbg !19
+
+if.else:                                          ; preds = %while.body
+  store i32 30, i32* %s, align 4, !dbg !21
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !23
+
+while.end:                                        ; preds = %while.cond
+  %tmp4 = load i32, i32* %s, align 4, !dbg !25
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
+  ret i32 0, !dbg !26
+}
+
+declare i32 @printf(i8*, ...)
+
+attributes #0 = { "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 1, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.5 "}
+!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 4, scope: !6)
+!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!10 = !DILocation(line: 20, scope: !9)
+!11 = !DILocation(line: 21, scope: !9)
+!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!13 = !DILocation(line: 8, scope: !12)
+!14 = !DILocation(line: 9, scope: !12)
+!15 = !DILocation(line: 9, scope: !16)
+!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
+!17 = !DILocation(line: 10, scope: !18)
+!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
+!19 = !DILocation(line: 10, scope: !20)
+!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
+!21 = !DILocation(line: 10, scope: !22)
+!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
+!23 = !DILocation(line: 10, scope: !24)
+!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
+!25 = !DILocation(line: 11, scope: !12)
+!26 = !DILocation(line: 12, scope: !12)
+
+
+; DEFAULT: _Z3sumii inlined into main
+; DEFAULT: _Z3subii inlined into _Z3sumii
+; DEFAULT-NOT: _Z3subii inlined into main 
+
+; REPLAY: _Z3sumii inlined into main
+; REPLAY: _Z3subii inlined into main 
+; REPLA-NOT: _Z3subii inlined into _Z3sumii

From c2d69d8d62f2aac941453177e2ae872f5f82feda Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Fri, 17 Jul 2020 22:40:41 +0100
Subject: [PATCH 676/771] Remove clang matrix lowering test for now as it is
 still failing under the NPM.

---
 .../test/CodeGen/matrix-lowering-opt-levels.c | 23 -------------------
 1 file changed, 23 deletions(-)
 delete mode 100644 clang/test/CodeGen/matrix-lowering-opt-levels.c

diff --git a/clang/test/CodeGen/matrix-lowering-opt-levels.c b/clang/test/CodeGen/matrix-lowering-opt-levels.c
deleted file mode 100644
index 9edecbe46bc83..0000000000000
--- a/clang/test/CodeGen/matrix-lowering-opt-levels.c
+++ /dev/null
@@ -1,23 +0,0 @@
-// RUN: %clang -O1 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
-// RUN: %clang -O2 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
-// RUN: %clang -O3 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
-// RUN: %clang -Ofast -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
-// RUN: %clang -Os -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
-// RUN: %clang -Oz -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
-
-// Smoke test that the matrix intrinsics are lowered at any optimisation level.
-
-// FIXME: this fails with the NPM:
-//
-// %clang -O0 -fenable-matrix -S -emit-llvm %s -o - | FileCheck  %s
-
-typedef float m4x4_t __attribute__((matrix_type(4, 4)));
-
-m4x4_t f(m4x4_t a, m4x4_t b, m4x4_t c) {
-  //
-  // CHECK-LABEL: f(
-  // CHECK-NOT:     @llvm.matrix
-  // CHECK:       }
-  //
-  return a + b * c;
-}

From ea4758a125298cc25639007509a8012f2f71fb00 Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Fri, 17 Jul 2020 11:24:29 -0700
Subject: [PATCH 677/771] [flang] Rework read/write permission management for
 runtime file opening

Anonymous Fortran unit files (e.g., "./fort.7") need to be created
O_RDWR so that they can be written, rewound, and read.  Other
files opened with no ACTION= specifier need to set read/write
permissions based on the file, if it exists.

Reviewed By: sscalpone

Differential Revision: https://reviews.llvm.org/D84063
---
 flang/runtime/file.cpp    | 110 +++++++++++++++++++++++---------------
 flang/runtime/file.h      |   8 ++-
 flang/runtime/io-api.cpp  |  18 +++----
 flang/runtime/io-stmt.cpp |   3 +-
 flang/runtime/io-stmt.h   |   4 +-
 flang/runtime/unit.cpp    |  15 ++----
 flang/runtime/unit.h      |   4 +-
 7 files changed, 92 insertions(+), 70 deletions(-)

diff --git a/flang/runtime/file.cpp b/flang/runtime/file.cpp
index 19c86a9d4b82f..341702df995b8 100644
--- a/flang/runtime/file.cpp
+++ b/flang/runtime/file.cpp
@@ -57,63 +57,86 @@ static int openfile_mkstemp(IoErrorHandler &handler) {
   return fd;
 }
 
-void OpenFile::Open(
-    OpenStatus status, Position position, IoErrorHandler &handler) {
-  int flags{mayRead_ ? mayWrite_ ? O_RDWR : O_RDONLY : O_WRONLY};
-  switch (status) {
-  case OpenStatus::Old:
-    if (fd_ >= 0) {
-      return;
+void OpenFile::Open(OpenStatus status, std::optional<Action> action,
+    Position position, IoErrorHandler &handler) {
+  if (fd_ >= 0 &&
+      (status == OpenStatus::Old || status == OpenStatus::Unknown)) {
+    return;
+  }
+  if (fd_ >= 0) {
+    if (fd_ <= 2) {
+      // don't actually close a standard file descriptor, we might need it
+    } else {
+      if (::close(fd_) != 0) {
+        handler.SignalErrno();
+      }
     }
-    knownSize_.reset();
-    break;
-  case OpenStatus::New:
-    flags |= O_CREAT | O_EXCL;
-    knownSize_ = 0;
-    break;
-  case OpenStatus::Scratch:
+    fd_ = -1;
+  }
+  if (status == OpenStatus::Scratch) {
     if (path_.get()) {
       handler.SignalError("FILE= must not appear with STATUS='SCRATCH'");
       path_.reset();
     }
+    if (!action) {
+      action = Action::ReadWrite;
+    }
     fd_ = openfile_mkstemp(handler);
-    knownSize_ = 0;
-    return;
-  case OpenStatus::Replace:
-    flags |= O_CREAT | O_TRUNC;
-    knownSize_ = 0;
-    break;
-  case OpenStatus::Unknown:
-    if (fd_ >= 0) {
+  } else {
+    if (!path_.get()) {
+      handler.SignalError(
+          "FILE= is required unless STATUS='OLD' and unit is connected");
       return;
     }
-    flags |= O_CREAT;
-    knownSize_.reset();
-    break;
-  }
-  // If we reach this point, we're opening a new file.
-  // TODO: Fortran shouldn't create a new file until the first WRITE.
-  if (fd_ >= 0) {
-    if (fd_ <= 2) {
-      // don't actually close a standard file descriptor, we might need it
-    } else if (::close(fd_) != 0) {
-      handler.SignalErrno();
+    int flags{0};
+    if (status != OpenStatus::Old) {
+      flags |= O_CREAT;
+    }
+    if (status == OpenStatus::New) {
+      flags |= O_EXCL;
+    } else if (status == OpenStatus::Replace) {
+      flags |= O_TRUNC;
+    }
+    if (!action) {
+      // Try to open read/write, back off to read-only on failure
+      fd_ = ::open(path_.get(), flags | O_RDWR, 0600);
+      if (fd_ >= 0) {
+        action = Action::ReadWrite;
+      } else {
+        action = Action::Read;
+      }
+    }
+    if (fd_ < 0) {
+      switch (*action) {
+      case Action::Read:
+        flags |= O_RDONLY;
+        break;
+      case Action::Write:
+        flags |= O_WRONLY;
+        break;
+      case Action::ReadWrite:
+        flags |= O_RDWR;
+        break;
+      }
+      fd_ = ::open(path_.get(), flags, 0600);
+      if (fd_ < 0) {
+        handler.SignalErrno();
+      }
     }
   }
-  if (!path_.get()) {
-    handler.SignalError(
-        "FILE= is required unless STATUS='OLD' and unit is connected");
-    return;
-  }
-  fd_ = ::open(path_.get(), flags, 0600);
-  if (fd_ < 0) {
-    handler.SignalErrno();
-  }
+  RUNTIME_CHECK(handler, action.has_value());
   pending_.reset();
   if (position == Position::Append && !RawSeekToEnd()) {
     handler.SignalErrno();
   }
   isTerminal_ = ::isatty(fd_) == 1;
+  mayRead_ = *action != Action::Write;
+  mayWrite_ = *action != Action::Read;
+  if (status == OpenStatus::Old || status == OpenStatus::Unknown) {
+    knownSize_.reset();
+  } else {
+    knownSize_ = 0;
+  }
 }
 
 void OpenFile::Predefine(int fd) {
@@ -124,6 +147,9 @@ void OpenFile::Predefine(int fd) {
   knownSize_.reset();
   nextId_ = 0;
   pending_.reset();
+  mayRead_ = fd == 0;
+  mayWrite_ = fd != 0;
+  mayPosition_ = false;
 }
 
 void OpenFile::Close(CloseStatus status, IoErrorHandler &handler) {
diff --git a/flang/runtime/file.h b/flang/runtime/file.h
index 17a5e910ecae8..1d25a91558a4c 100644
--- a/flang/runtime/file.h
+++ b/flang/runtime/file.h
@@ -21,6 +21,7 @@ namespace Fortran::runtime::io {
 enum class OpenStatus { Old, New, Scratch, Replace, Unknown };
 enum class CloseStatus { Keep, Delete };
 enum class Position { AsIs, Rewind, Append };
+enum class Action { Read, Write, ReadWrite };
 
 class OpenFile {
 public:
@@ -30,19 +31,16 @@ class OpenFile {
   void set_path(OwningPtr<char> &&, std::size_t bytes);
   std::size_t pathLength() const { return pathLength_; }
   bool mayRead() const { return mayRead_; }
-  void set_mayRead(bool yes) { mayRead_ = yes; }
   bool mayWrite() const { return mayWrite_; }
-  void set_mayWrite(bool yes) { mayWrite_ = yes; }
+  bool mayPosition() const { return mayPosition_; }
   bool mayAsynchronous() const { return mayAsynchronous_; }
   void set_mayAsynchronous(bool yes) { mayAsynchronous_ = yes; }
-  bool mayPosition() const { return mayPosition_; }
-  void set_mayPosition(bool yes) { mayPosition_ = yes; }
   FileOffset position() const { return position_; }
   bool isTerminal() const { return isTerminal_; }
   std::optional<FileOffset> knownSize() const { return knownSize_; }
 
   bool IsOpen() const { return fd_ >= 0; }
-  void Open(OpenStatus, Position, IoErrorHandler &);
+  void Open(OpenStatus, std::optional<Action>, Position, IoErrorHandler &);
   void Predefine(int fd);
   void Close(CloseStatus, IoErrorHandler &);
 
diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp
index 0bd827bc53aa2..2f077e1f9ff8f 100644
--- a/flang/runtime/io-api.cpp
+++ b/flang/runtime/io-api.cpp
@@ -563,31 +563,31 @@ bool IONAME(SetAction)(Cookie cookie, const char *keyword, std::size_t length) {
     io.GetIoErrorHandler().Crash(
         "SetAction() called when not in an OPEN statement");
   }
-  bool mayRead{true};
-  bool mayWrite{true};
+  std::optional<Action> action;
   static const char *keywords[]{"READ", "WRITE", "READWRITE", nullptr};
   switch (IdentifyValue(keyword, length, keywords)) {
   case 0:
-    mayWrite = false;
+    action = Action::Read;
     break;
   case 1:
-    mayRead = false;
+    action = Action::Write;
     break;
   case 2:
+    action = Action::ReadWrite;
     break;
   default:
     open->SignalError(IostatErrorInKeyword, "Invalid ACTION='%.*s'",
         static_cast<int>(length), keyword);
     return false;
   }
-  if (mayRead != open->unit().mayRead() ||
-      mayWrite != open->unit().mayWrite()) {
-    if (open->wasExtant()) {
+  RUNTIME_CHECK(io.GetIoErrorHandler(), action.has_value());
+  if (open->wasExtant()) {
+    if ((*action != Action::Write) != open->unit().mayRead() ||
+        (*action != Action::Read) != open->unit().mayWrite()) {
       open->SignalError("ACTION= may not be changed on an open unit");
     }
-    open->unit().set_mayRead(mayRead);
-    open->unit().set_mayWrite(mayWrite);
   }
+  open->set_action(*action);
   return true;
 }
 
diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp
index 0681da215d1e4..70fb3f9350bc8 100644
--- a/flang/runtime/io-stmt.cpp
+++ b/flang/runtime/io-stmt.cpp
@@ -165,7 +165,8 @@ int OpenStatementState::EndIoStatement() {
   if (wasExtant_ && status_ != OpenStatus::Old) {
     SignalError("OPEN statement for connected unit must have STATUS='OLD'");
   }
-  unit().OpenUnit(status_, position_, std::move(path_), pathLength_, *this);
+  unit().OpenUnit(
+      status_, action_, position_, std::move(path_), pathLength_, *this);
   return ExternalIoStatementBase::EndIoStatement();
 }
 
diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index 066391bd15664..6f5ca2c481128 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -294,15 +294,17 @@ class OpenStatementState : public ExternalIoStatementBase {
       : ExternalIoStatementBase{unit, sourceFile, sourceLine}, wasExtant_{
                                                                    wasExtant} {}
   bool wasExtant() const { return wasExtant_; }
-  void set_status(OpenStatus status) { status_ = status; }
+  void set_status(OpenStatus status) { status_ = status; } // STATUS=
   void set_path(const char *, std::size_t, int kind); // FILE=
   void set_position(Position position) { position_ = position; } // POSITION=
+  void set_action(Action action) { action_ = action; } // ACTION=
   int EndIoStatement();
 
 private:
   bool wasExtant_;
   OpenStatus status_{OpenStatus::Unknown};
   Position position_{Position::AsIs};
+  std::optional<Action> action_;
   OwningPtr<char> path_;
   std::size_t pathLength_;
 };
diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp
index cf20d7cd81c66..c6af53e6ec223 100644
--- a/flang/runtime/unit.cpp
+++ b/flang/runtime/unit.cpp
@@ -64,7 +64,8 @@ ExternalFileUnit &ExternalFileUnit::LookUpOrCreateAnonymous(
     IoErrorHandler handler{terminator};
     result.OpenUnit(
         dir == Direction::Input ? OpenStatus::Old : OpenStatus::Replace,
-        Position::Rewind, std::move(path), std::strlen(path.get()), handler);
+        Action::ReadWrite, Position::Rewind, std::move(path),
+        std::strlen(path.get()), handler);
     result.isUnformatted = isUnformatted;
   }
   return result;
@@ -87,8 +88,8 @@ int ExternalFileUnit::NewUnit(const Terminator &terminator) {
   return GetUnitMap().NewUnit(terminator).unitNumber();
 }
 
-void ExternalFileUnit::OpenUnit(OpenStatus status, Position position,
-    OwningPtr<char> &&newPath, std::size_t newPathLength,
+void ExternalFileUnit::OpenUnit(OpenStatus status, std::optional<Action> action,
+    Position position, OwningPtr<char> &&newPath, std::size_t newPathLength,
     IoErrorHandler &handler) {
   if (IsOpen()) {
     if (status == OpenStatus::Old &&
@@ -105,7 +106,7 @@ void ExternalFileUnit::OpenUnit(OpenStatus status, Position position,
     Close(CloseStatus::Keep, handler);
   }
   set_path(std::move(newPath), newPathLength);
-  Open(status, position, handler);
+  Open(status, action, position, handler);
   auto totalBytes{knownSize()};
   if (access == Access::Direct) {
     if (!isFixedRecordLength || !recordLength) {
@@ -186,16 +187,10 @@ UnitMap &ExternalFileUnit::GetUnitMap() {
   unitMap = New<UnitMap>{terminator}().release();
   ExternalFileUnit &out{ExternalFileUnit::CreateNew(6, terminator)};
   out.Predefine(1);
-  out.set_mayRead(false);
-  out.set_mayWrite(true);
-  out.set_mayPosition(false);
   out.SetDirection(Direction::Output, handler);
   defaultOutput = &out;
   ExternalFileUnit &in{ExternalFileUnit::CreateNew(5, terminator)};
   in.Predefine(0);
-  in.set_mayRead(true);
-  in.set_mayWrite(false);
-  in.set_mayPosition(false);
   in.SetDirection(Direction::Input, handler);
   defaultInput = &in;
   // TODO: Set UTF-8 mode from the environment
diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h
index f0edeedef0812..d2d2dce035f14 100644
--- a/flang/runtime/unit.h
+++ b/flang/runtime/unit.h
@@ -48,8 +48,8 @@ class ExternalFileUnit : public ConnectionState,
   static void CloseAll(IoErrorHandler &);
   static void FlushAll(IoErrorHandler &);
 
-  void OpenUnit(OpenStatus, Position, OwningPtr<char> &&path,
-      std::size_t pathLength, IoErrorHandler &);
+  void OpenUnit(OpenStatus, std::optional<Action>, Position,
+      OwningPtr<char> &&path, std::size_t pathLength, IoErrorHandler &);
   void CloseUnit(CloseStatus, IoErrorHandler &);
   void DestroyClosed();
 

From db15b8ab90793e3707f72e7668c3844922fdf356 Mon Sep 17 00:00:00 2001
From: Teresa Johnson <tejohnson@google.com>
Date: Fri, 17 Jul 2020 14:48:28 -0700
Subject: [PATCH 678/771] [compiler-rt][asan][hwasan] Fix Fuchsia build

Fix build failure in Fuchsia build from refactoring in
5d2be1a18845c528d3e86f7efcc59872e4a757c3

Guard the moved versions of ReserveShadowMemoryRange and ProtectGap
the same way they were in the asan code originally (not for Fuchsia or
RTEMS). Otherwise we end up with unsats as they invoke functions not
defined there.
---
 compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
index ddd688bb2dca4..047c5a17ea6e7 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_libcdep.cpp
@@ -139,6 +139,8 @@ uptr ReservedAddressRange::InitAligned(uptr size, uptr align,
   return start;
 }
 
+#if !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
+
 // Reserve memory range [beg, end].
 // We need to use inclusive range because end+1 may not be representable.
 void ReserveShadowMemoryRange(uptr beg, uptr end, const char *name,
@@ -188,6 +190,8 @@ void ProtectGap(uptr addr, uptr size, uptr zero_base_shadow_start,
   Die();
 }
 
+#endif  // !SANITIZER_FUCHSIA && !SANITIZER_RTEMS
+
 }  // namespace __sanitizer
 
 SANITIZER_INTERFACE_WEAK_DEF(void, __sanitizer_sandbox_on_notify,

From 09fe0c5ab9ca5846d6cb625e9be47fdcfe49b93c Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Fri, 17 Jul 2020 17:54:01 -0400
Subject: [PATCH 679/771] [OpenMP] Add Additional Function Attribute
 Information to OMPKinds.def

Summary:
This patch adds more function attribute information to the runtime function definitions in OMPKinds.def. The goal is to provide sufficient information about OpenMP runtime functions to perform more optimizations on OpenMP code.

Reviewers: jdoerfert

Subscribers: aaron.ballman cfe-commits yaxunl guansong sstefan1 llvm-commits

Tags: #OpenMP #clang #llvm

Differential Revision: https://reviews.llvm.org/D81031
---
 .../include/llvm/Frontend/OpenMP/OMPKinds.def | 669 +++++++++-------
 llvm/test/Transforms/OpenMP/add_attributes.ll | 745 ++++++++++++------
 .../Transforms/OpenMP/parallel_deletion.ll    |   2 +-
 3 files changed, 896 insertions(+), 520 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 0dc2b34f2e4d6..bb476f6a34d4e 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -383,7 +383,8 @@ __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32)
 __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32,
-          Int8Ptr, Int32, Int8Ptr)
+          /* kmp_task_t */ VoidPtr, Int32,
+          /* kmp_task_affinity_info_t */ VoidPtr)
 
 __OMP_RTL(omp_get_thread_num, false, Int32, )
 __OMP_RTL(omp_get_num_threads, false, Int32, )
@@ -430,8 +431,7 @@ __OMP_RTL(__kmpc_reduce, false, Int32, IdentPtr, Int32, Int32, SizeTy, VoidPtr,
           ReduceFunctionPtr, KmpCriticalNamePtrTy)
 __OMP_RTL(__kmpc_reduce_nowait, false, Int32, IdentPtr, Int32, Int32, SizeTy,
           VoidPtr, ReduceFunctionPtr, KmpCriticalNamePtrTy)
-__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32,
-          KmpCriticalNamePtrTy)
+__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32, KmpCriticalNamePtrTy)
 __OMP_RTL(__kmpc_end_reduce_nowait, false, Void, IdentPtr, Int32,
           KmpCriticalNamePtrTy)
 
@@ -514,10 +514,10 @@ __OMP_RTL(__kmpc_taskloop, false, Void, IdentPtr, /* Int */ Int32, VoidPtr,
           /* Int */ Int32, Int64, VoidPtr)
 __OMP_RTL(__kmpc_omp_target_task_alloc, false, /* kmp_task_t */ VoidPtr,
           IdentPtr, Int32, Int32, SizeTy, SizeTy, TaskRoutineEntryPtr, Int64)
-__OMP_RTL(__kmpc_taskred_modifier_init, false, VoidPtr, IdentPtr,
-          /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr)
-__OMP_RTL(__kmpc_taskred_init, false, VoidPtr, /* Int */ Int32,
-          /* Int */ Int32, VoidPtr)
+__OMP_RTL(__kmpc_taskred_modifier_init, false, /* kmp_taskgroup */ VoidPtr,
+          IdentPtr, /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr)
+__OMP_RTL(__kmpc_taskred_init, false, /* kmp_taskgroup */ VoidPtr,
+          /* Int */ Int32, /* Int */ Int32, VoidPtr)
 __OMP_RTL(__kmpc_task_reduction_modifier_fini, false, Void, IdentPtr,
           /* Int */ Int32, /* Int */ Int32)
 __OMP_RTL(__kmpc_task_reduction_get_th_data, false, VoidPtr, Int32, VoidPtr,
@@ -594,7 +594,9 @@ __OMP_RTL(__last, false, Void, )
 #undef __OMP_RTL
 #undef OMP_RTL
 
+#define ParamAttrs(...) ArrayRef<AttributeSet>({__VA_ARGS__})
 #define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind)
+#define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N)
 #define AttributeSet(...)                                                      \
   AttributeSet::get(Ctx, ArrayRef<Attribute>({__VA_ARGS__}))
 
@@ -607,19 +609,88 @@ __OMP_RTL(__last, false, Void, )
 __OMP_ATTRS_SET(GetterAttrs,
                 OptimisticAttributes
                     ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly),
-                                   EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly))
+                                   EnumAttr(NoSync), EnumAttr(NoFree),
+                                   EnumAttr(InaccessibleMemOnly),
+                                   EnumAttr(WillReturn))
                     : AttributeSet(EnumAttr(NoUnwind)))
 __OMP_ATTRS_SET(GetterArgWriteAttrs,
                 OptimisticAttributes
                     ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
-                                   EnumAttr(NoFree), EnumAttr(InaccessibleMemOrArgMemOnly))
+                                   EnumAttr(NoFree),
+                                   EnumAttr(InaccessibleMemOrArgMemOnly),
+                                   EnumAttr(WillReturn))
                     : AttributeSet(EnumAttr(NoUnwind)))
 __OMP_ATTRS_SET(SetterAttrs,
                 OptimisticAttributes
                     ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly),
-                                   EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly))
+                                   EnumAttr(NoSync), EnumAttr(NoFree),
+                                   EnumAttr(InaccessibleMemOnly),
+                                   EnumAttr(WillReturn))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(DefaultAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+                                   EnumAttr(WillReturn), EnumAttr(NoFree))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(BarrierAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(InaccessibleArgOnlyAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+                                   EnumAttr(InaccessibleMemOrArgMemOnly),
+                                   EnumAttr(WillReturn), EnumAttr(NoFree))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(InaccessibleOnlyAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+                                   EnumAttr(InaccessibleMemOnly),
+                                   EnumAttr(WillReturn), EnumAttr(NoFree))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(AllocAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+                                   EnumAttr(WillReturn))
                     : AttributeSet(EnumAttr(NoUnwind)))
 
+__OMP_ATTRS_SET(ForkAttrs, OptimisticAttributes
+                               ? AttributeSet(EnumAttr(NoUnwind))
+                               : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(ReadOnlyPtrAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoFree),
+                                   EnumAttr(NoCapture))
+                    : AttributeSet())
+
+__OMP_ATTRS_SET(WriteOnlyPtrAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoFree),
+                                   EnumAttr(NoCapture))
+                    : AttributeSet())
+
+__OMP_ATTRS_SET(ArgPtrAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoCapture), EnumAttr(NoFree))
+                    : AttributeSet())
+
+__OMP_ATTRS_SET(ReturnPtrAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoAlias))
+                    : AttributeSet())
+
+__OMP_ATTRS_SET(ReturnAlignedPtrAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoAlias), EnumAttrInt(Alignment, 8),
+                                   EnumAttrInt(DereferenceableOrNull, 8))
+                    : AttributeSet())
+
 #undef __OMP_ATTRS_SET
 #undef OMP_ATTRS_SET
 
@@ -630,295 +701,309 @@ __OMP_ATTRS_SET(SetterAttrs,
 #define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets)              \
   OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets)
 
-__OMP_RTL_ATTRS(__kmpc_barrier, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_cancel,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_cancel_barrier, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_flush, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_taskwait, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_taskyield,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_push_num_threads,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_push_proc_bind,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_serialized_parallel,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_schedule, GetterArgWriteAttrs, AttributeSet(),
-                ArrayRef<AttributeSet>(
-                    {AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)),
-                     AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))}))
-__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_flush, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_fork_call, ForkAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_taskwait, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_taskyield, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_push_num_threads, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_push_proc_bind, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_reg_task_with_affinity, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs,
+                           AttributeSet(), ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(
+    omp_get_schedule, GetterArgWriteAttrs, AttributeSet(),
+    ParamAttrs(AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)),
+               AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))))
+__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), ParamAttrs())
 __OMP_RTL_ATTRS(omp_get_supported_active_levels, GetterAttrs, AttributeSet(),
-                {})
-__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), {})
+                ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(),
+                ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(),
+                ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), ParamAttrs())
 __OMP_RTL_ATTRS(omp_get_place_proc_ids, GetterArgWriteAttrs, AttributeSet(),
-                ArrayRef<AttributeSet>({AttributeSet(),
-                                        AttributeSet(EnumAttr(NoCapture),
-                                                     EnumAttr(WriteOnly))}))
-__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), {})
-
-__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_master,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_master,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_critical,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_critical_with_hint,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_critical,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_begin, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_reduce, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_reduce, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_ordered, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_ordered, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_fini, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_single, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_single, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_taskgroup, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskgroup, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskloop, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskred_init,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_init,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_cancellationpoint, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_fork_teams, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_push_num_teams, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_copyprivate, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_threadprivate_register, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_doacross_init, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_doacross_post, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_doacross_wait, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_doacross_fini, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_alloc, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_free, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_init_allocator, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_push_target_tripcount,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_nowait_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_teams_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_register_requires,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_end_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_update_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_mapper_num_components,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_push_mapper_component,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
+                ParamAttrs(AttributeSet(), AttributeSet(EnumAttr(NoCapture),
+                                                        EnumAttr(WriteOnly))))
+__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(),
+                ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(),
+                ParamAttrs())
+
+__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(),
+                ParamAttrs())
+
+__OMP_RTL_ATTRS(__kmpc_master, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_master, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_critical, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_critical_with_hint, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_critical, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+
+__OMP_RTL_ATTRS(__kmpc_begin, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_reduce, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), ReadOnlyPtrAttrs, AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_reduce_nowait, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), ReadOnlyPtrAttrs, AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_reduce, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+
+__OMP_RTL_ATTRS(__kmpc_ordered, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_ordered, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_for_static_init_4, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_init_8, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_4, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_8, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           ArgPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_single, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_single, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_taskgroup, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_taskgroup, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), ReadOnlyPtrAttrs, AttributeSet(),
+                           ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_taskloop, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet(), AttributeSet(),
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs,
+                           AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_taskred_init, DefaultAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini, BarrierAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_task_reduction_init, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init, DefaultAttrs,
+                ReturnPtrAttrs, ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo, DefaultAttrs, AttributeSet(),
+                ParamAttrs())
+
+__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_cancellationpoint, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_fork_teams, ForkAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_push_num_teams, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_copyprivate, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_threadprivate_register, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs,
+                           ReadOnlyPtrAttrs, ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_doacross_init, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_doacross_post, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, {})
+__OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {})
+__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_register_requires, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs,
+        AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_end_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper, ForkAttrs,
+        AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_update_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs,
+        AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_mapper_num_components, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
+                ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs))
 
 #undef __OMP_RTL_ATTRS
 #undef OMP_RTL_ATTRS
 #undef AttributeSet
 #undef EnumAttr
+#undef EnumAttrInt
+#undef ParamAttrs
 
 ///}
 
diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll
index 6e10613ed43c3..e92447d79feac 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -890,373 +890,373 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; CHECK: ; Function Attrs: nounwind
 ; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #0
 
-; CHECK-NOT: Function Attrs
-; CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) #0
 
-; CHECK-NOT: Function Attrs
-; CHECK: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) #0
 
-; CHECK-NOT: Function Attrs
-; CHECK: declare void @__kmpc_flush(%struct.ident_t*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0
 
-; CHECK-NOT: Function Attrs
-; CHECK: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_register_requires(i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_register_requires(i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) #0
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_dynamic(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_nested(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_max_active_levels(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() #1
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local void @use_int(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) #2
 
 ; OPTIMISTIC-NOT: Function Attrs
@@ -1418,8 +1418,299 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t* nocapture nofree readonly, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t* nocapture nofree readonly, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t* nocapture nofree readonly, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_master(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_begin(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_omp_task_alloc(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i32 (i32, i8*)* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t* nocapture nofree readonly, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t* nocapture nofree readonly, i32, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_taskloop(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i64* nocapture nofree, i64* nocapture nofree, i64, i32, i32, i64, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i32 (i32, i8*)* nocapture nofree readonly, i64)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_taskred_modifier_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t* nocapture nofree readonly, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_threadprivate_cached(%struct.ident_t* nocapture nofree readonly, i32, i8*, i64, i8***)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_alloc(i32, i64, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_free(i32, i8*, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_register_requires(i64)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i64 @__tgt_mapper_num_components(i8*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_allow_completion_event(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_init(i32, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*)
diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
index 4d2f8e7cbc5e8..07976660546f8 100644
--- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
@@ -393,7 +393,7 @@ entry:
 
 define internal void @.omp.reduction.reduction_func(i8* %arg, i8* %arg1) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
-; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #10
+; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #{{[0-9]+}}
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32**
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8

From ae08dbc67326eaebefef7f4401767ddf7583c9b0 Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Fri, 17 Jul 2020 14:58:01 -0700
Subject: [PATCH 680/771] Temporarily Revert "[InlineAdvisor] New inliner
 advisor to replay inlining from optimization remarks" as it is failing the
 inline-replay.ll test as well as sanitizers/Werror from returning a stack
 local variable.

This reverts commit 029946b112684c27b27f7c2d7554f22b33ae1e0b.
---
 llvm/include/llvm/Analysis/InlineAdvisor.h    |   3 -
 .../llvm/Analysis/ReplayInlineAdvisor.h       |  37 ------
 llvm/lib/Analysis/CMakeLists.txt              |   1 -
 llvm/lib/Analysis/InlineAdvisor.cpp           |  25 ----
 llvm/lib/Analysis/ReplayInlineAdvisor.cpp     |  61 ---------
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |  37 +-----
 .../SampleProfile/Inputs/inline-replay.txt    |   2 -
 .../Transforms/SampleProfile/inline-replay.ll | 122 ------------------
 8 files changed, 4 insertions(+), 284 deletions(-)
 delete mode 100644 llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
 delete mode 100644 llvm/lib/Analysis/ReplayInlineAdvisor.cpp
 delete mode 100644 llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
 delete mode 100644 llvm/test/Transforms/SampleProfile/inline-replay.ll

diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index a0ff09679dfed..3480d93385a8e 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -226,9 +226,6 @@ void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
                      bool ForProfileContext = false,
                      const char *PassName = nullptr);
 
-/// get call site location as string
-StringRef getCallSiteLocation(DebugLoc DLoc);
-
 /// Add location info to ORE message.
 void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
 
diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
deleted file mode 100644
index e312d59a9f87b..0000000000000
--- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-#ifndef LLVM_REPLAYINLINEADVISOR_H_
-#define LLVM_REPLAYINLINEADVISOR_H_
-
-#include "llvm/ADT/StringSet.h"
-#include "llvm/Analysis/InlineAdvisor.h"
-#include "llvm/IR/LLVMContext.h"
-
-namespace llvm {
-class BasicBlock;
-class CallBase;
-class Function;
-class Module;
-class OptimizationRemarkEmitter;
-
-/// Replay inline advisor that uses optimization remarks from inlining of
-/// previous build to guide current inlining. This is useful for inliner tuning.
-class ReplayInlineAdvisor : public InlineAdvisor {
-public:
-  ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context,
-                      StringRef RemarksFile);
-  std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
-  bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
-
-private:
-  StringSet<> InlineSitesFromRemarks;
-  bool HasReplayRemarks = false;
-};
-} // namespace llvm
-#endif // LLVM_REPLAYINLINEADVISOR_H_
\ No newline at end of file
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 8f10bac588e52..703623396d96a 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -117,7 +117,6 @@ add_llvm_component_library(LLVMAnalysis
   RegionInfo.cpp
   RegionPass.cpp
   RegionPrinter.cpp
-  ReplayInlineAdvisor.cpp
   ScalarEvolution.cpp
   ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionDivision.cpp
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index fedc5282ee646..e18f681278d3a 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -365,31 +365,6 @@ llvm::shouldInline(CallBase &CB,
   return IC;
 }
 
-StringRef llvm::getCallSiteLocation(DebugLoc DLoc) {
-  std::ostringstream CallSiteLoc;
-  bool First = true;
-  for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) {
-    if (!First)
-      CallSiteLoc << " @ ";
-    // Note that negative line offset is actually possible, but we use
-    // unsigned int to match line offset representation in remarks so
-    // it's directly consumable by relay advisor.
-    uint32_t Offset =
-        DIL->getLine() - DIL->getScope()->getSubprogram()->getLine();
-    uint32_t Discriminator = DIL->getBaseDiscriminator();
-    StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
-    if (Name.empty())
-      Name = DIL->getScope()->getSubprogram()->getName();
-    CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset);
-    if (Discriminator) {
-      CallSiteLoc << "." << llvm::utostr(Discriminator);
-    }
-    First = false;
-  }
-
-  return CallSiteLoc.str();
-}
-
 void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
   if (!DLoc.get())
     return;
diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
deleted file mode 100644
index c12b58021a606..0000000000000
--- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ReplayInlineAdvisor that replays inline decision based
-// on previous inline remarks from optimization remark log.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/InlineAdvisor.h"
-#include "llvm/Analysis/ReplayInlineAdvisor.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/Support/LineIterator.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "inline-replay"
-
-ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM,
-                                         LLVMContext &Context,
-                                         StringRef RemarksFile)
-    : InlineAdvisor(FAM), HasReplayRemarks(false) {
-  auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
-  std::error_code EC = BufferOrErr.getError();
-  if (EC) {
-    Context.emitError("Could not open remarks file: " + EC.message());
-    return;
-  }
-
-  // Example for inline remarks to parse:
-  //   _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1
-  // We use the callsite string after `at callsite` to replay inlining.
-  line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
-  for (; !LineIt.is_at_eof(); ++LineIt) {
-    StringRef Line = *LineIt;
-    auto Pair = Line.split(" at callsite ");
-    if (Pair.second.empty())
-      continue;
-    InlineSitesFromRemarks.insert(Pair.second);
-  }
-  HasReplayRemarks = true;
-}
-
-std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdvice(CallBase &CB) {
-  assert(HasReplayRemarks);
-
-  Function &Caller = *CB.getCaller();
-  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
-
-  if (InlineSitesFromRemarks.empty())
-    return std::make_unique<InlineAdvice>(this, CB, ORE, false);
-
-  StringRef CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
-  bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc) > 0;
-  return std::make_unique<InlineAdvice>(this, CB, ORE, InlineRecommended);
-}
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 7b5fc030cf88c..b6871e260532d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -43,7 +43,6 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/ReplayInlineAdvisor.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
@@ -171,13 +170,6 @@ static cl::opt<int> SampleColdCallSiteThreshold(
     "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
     cl::desc("Threshold for inlining cold callsites"));
 
-static cl::opt<std::string> ProfileInlineReplayFile(
-    "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
-    cl::desc(
-        "Optimization remarks file containing inline remarks to be replayed "
-        "by inlining from sample profile loader."),
-    cl::Hidden);
-
 namespace {
 
 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -327,7 +319,7 @@ class SampleProfileLoader {
         RemappingFilename(std::string(RemapName)),
         IsThinLTOPreLink(IsThinLTOPreLink) {}
 
-  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
+  bool doInitialization(Module &M);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM,
                    ProfileSummaryInfo *_PSI, CallGraph *CG);
 
@@ -481,9 +473,6 @@ class SampleProfileLoader {
   // overriden by -profile-sample-accurate or profile-sample-accurate
   // attribute.
   bool ProfAccForSymsInList;
-
-  // External inline advisor used to replay inline decision from remarks.
-  std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
 };
 
 class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -909,16 +898,6 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
 }
 
 bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
-  if (ExternalInlineAdvisor) {
-    auto Advice = ExternalInlineAdvisor->getAdvice(CB);
-    if (!Advice->isInliningRecommended()) {
-      Advice->recordUnattemptedInlining();
-      return false;
-    }
-    // Dummy record, we don't use it for replay.
-    Advice->recordInlining();
-  }
-
   Function *CalledFunction = CB.getCalledFunction();
   assert(CalledFunction);
   DebugLoc DLoc = CB.getDebugLoc();
@@ -1026,7 +1005,7 @@ bool SampleProfileLoader::inlineHotFunctions(
           }
         }
       }
-      if (Hot || ExternalInlineAdvisor) {
+      if (Hot) {
         CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
         emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
       } else {
@@ -1839,8 +1818,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
   return FunctionOrderList;
 }
 
-bool SampleProfileLoader::doInitialization(Module &M,
-                                           FunctionAnalysisManager *FAM) {
+bool SampleProfileLoader::doInitialization(Module &M) {
   auto &Ctx = M.getContext();
 
   std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
@@ -1865,13 +1843,6 @@ bool SampleProfileLoader::doInitialization(Module &M,
       NamesInProfile.insert(NameTable->begin(), NameTable->end());
   }
 
-  if (FAM && !ProfileInlineReplayFile.empty()) {
-    ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
-        *FAM, Ctx, ProfileInlineReplayFile);
-    if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
-      ExternalInlineAdvisor.reset();
-  }
-
   return true;
 }
 
@@ -2024,7 +1995,7 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
                                        : ProfileRemappingFileName,
       IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
 
-  if (!SampleLoader.doInitialization(M, &FAM))
+  if (!SampleLoader.doInitialization(M))
     return PreservedAnalyses::all();
 
   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
deleted file mode 100644
index 6842845d56554..0000000000000
--- a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3.1
-remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1 @ main:3.1
diff --git a/llvm/test/Transforms/SampleProfile/inline-replay.ll b/llvm/test/Transforms/SampleProfile/inline-replay.ll
deleted file mode 100644
index ecf6f51850f26..0000000000000
--- a/llvm/test/Transforms/SampleProfile/inline-replay.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-;; Note that this needs new pass manager for now. Passing `-sample-profile-inline-replay` to legacy pass manager is a no-op.
-
-;; Check baseline inline decisions
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s
-
-;; Check replay inline decisions
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-inline-replay=%S/Inputs/inline-replay.txt -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s
-
-@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
-
-define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
-entry:
-  %x.addr = alloca i32, align 4
-  %y.addr = alloca i32, align 4
-  store i32 %x, i32* %x.addr, align 4
-  store i32 %y, i32* %y.addr, align 4
-  %tmp = load i32, i32* %x.addr, align 4, !dbg !8
-  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
-  %add = add nsw i32 %tmp, %tmp1, !dbg !8
-  %tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
-  %tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
-  %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
-  ret i32 %add, !dbg !8
-}
-
-define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 {
-entry:
-  %x.addr = alloca i32, align 4
-  %y.addr = alloca i32, align 4
-  store i32 %x, i32* %x.addr, align 4
-  store i32 %y, i32* %y.addr, align 4
-  %tmp = load i32, i32* %x.addr, align 4, !dbg !10
-  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
-  %add = sub nsw i32 %tmp, %tmp1, !dbg !10
-  ret i32 %add, !dbg !11
-}
-
-define i32 @main() #0 !dbg !12 {
-entry:
-  %retval = alloca i32, align 4
-  %s = alloca i32, align 4
-  %i = alloca i32, align 4
-  store i32 0, i32* %retval
-  store i32 0, i32* %i, align 4, !dbg !13
-  br label %while.cond, !dbg !14
-
-while.cond:                                       ; preds = %if.end, %entry
-  %tmp = load i32, i32* %i, align 4, !dbg !15
-  %inc = add nsw i32 %tmp, 1, !dbg !15
-  store i32 %inc, i32* %i, align 4, !dbg !15
-  %cmp = icmp slt i32 %tmp, 400000000, !dbg !15
-  br i1 %cmp, label %while.body, label %while.end, !dbg !15
-
-while.body:                                       ; preds = %while.cond
-  %tmp1 = load i32, i32* %i, align 4, !dbg !17
-  %cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
-  br i1 %cmp1, label %if.then, label %if.else, !dbg !17
-
-if.then:                                          ; preds = %while.body
-  %tmp2 = load i32, i32* %i, align 4, !dbg !19
-  %tmp3 = load i32, i32* %s, align 4, !dbg !19
-  %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
-  store i32 %call, i32* %s, align 4, !dbg !19
-  br label %if.end, !dbg !19
-
-if.else:                                          ; preds = %while.body
-  store i32 30, i32* %s, align 4, !dbg !21
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  br label %while.cond, !dbg !23
-
-while.end:                                        ; preds = %while.cond
-  %tmp4 = load i32, i32* %s, align 4, !dbg !25
-  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
-  ret i32 0, !dbg !26
-}
-
-declare i32 @printf(i8*, ...)
-
-attributes #0 = { "use-sample-profile" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4}
-!llvm.ident = !{!5}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
-!1 = !DIFile(filename: "calls.cc", directory: ".")
-!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
-!4 = !{i32 1, !"Debug Info Version", i32 3}
-!5 = !{!"clang version 3.5 "}
-!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
-!7 = !DISubroutineType(types: !2)
-!8 = !DILocation(line: 4, scope: !6)
-!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
-!10 = !DILocation(line: 20, scope: !9)
-!11 = !DILocation(line: 21, scope: !9)
-!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
-!13 = !DILocation(line: 8, scope: !12)
-!14 = !DILocation(line: 9, scope: !12)
-!15 = !DILocation(line: 9, scope: !16)
-!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
-!17 = !DILocation(line: 10, scope: !18)
-!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
-!19 = !DILocation(line: 10, scope: !20)
-!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
-!21 = !DILocation(line: 10, scope: !22)
-!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
-!23 = !DILocation(line: 10, scope: !24)
-!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
-!25 = !DILocation(line: 11, scope: !12)
-!26 = !DILocation(line: 12, scope: !12)
-
-
-; DEFAULT: _Z3sumii inlined into main
-; DEFAULT: _Z3subii inlined into _Z3sumii
-; DEFAULT-NOT: _Z3subii inlined into main 
-
-; REPLAY: _Z3sumii inlined into main
-; REPLAY: _Z3subii inlined into main 
-; REPLA-NOT: _Z3subii inlined into _Z3sumii

From 020545d386cf70b6c6cd2fb1d1c6bf0090afe4aa Mon Sep 17 00:00:00 2001
From: Eric Christopher <echristo@gmail.com>
Date: Fri, 17 Jul 2020 15:05:42 -0700
Subject: [PATCH 681/771] Temporarily Revert "[OpenMP] Add Additional Function
 Attribute Information to OMPKinds.def" as it's causing a few unused variable
 warnings via the macro instantiation:

sources/llvm-project/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def:649:17: error: unused variable 'InaccessibleOnlyAttrs' [-Werror,-Wunused-variable]
__OMP_ATTRS_SET(InaccessibleOnlyAttrs,
                ^

This reverts commit 09fe0c5ab9ca5846d6cb625e9be47fdcfe49b93c.
---
 .../include/llvm/Frontend/OpenMP/OMPKinds.def | 669 +++++++---------
 llvm/test/Transforms/OpenMP/add_attributes.ll | 745 ++++++------------
 .../Transforms/OpenMP/parallel_deletion.ll    |   2 +-
 3 files changed, 520 insertions(+), 896 deletions(-)

diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index bb476f6a34d4e..0dc2b34f2e4d6 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -383,8 +383,7 @@ __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32)
 __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32,
-          /* kmp_task_t */ VoidPtr, Int32,
-          /* kmp_task_affinity_info_t */ VoidPtr)
+          Int8Ptr, Int32, Int8Ptr)
 
 __OMP_RTL(omp_get_thread_num, false, Int32, )
 __OMP_RTL(omp_get_num_threads, false, Int32, )
@@ -431,7 +430,8 @@ __OMP_RTL(__kmpc_reduce, false, Int32, IdentPtr, Int32, Int32, SizeTy, VoidPtr,
           ReduceFunctionPtr, KmpCriticalNamePtrTy)
 __OMP_RTL(__kmpc_reduce_nowait, false, Int32, IdentPtr, Int32, Int32, SizeTy,
           VoidPtr, ReduceFunctionPtr, KmpCriticalNamePtrTy)
-__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32, KmpCriticalNamePtrTy)
+__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32,
+          KmpCriticalNamePtrTy)
 __OMP_RTL(__kmpc_end_reduce_nowait, false, Void, IdentPtr, Int32,
           KmpCriticalNamePtrTy)
 
@@ -514,10 +514,10 @@ __OMP_RTL(__kmpc_taskloop, false, Void, IdentPtr, /* Int */ Int32, VoidPtr,
           /* Int */ Int32, Int64, VoidPtr)
 __OMP_RTL(__kmpc_omp_target_task_alloc, false, /* kmp_task_t */ VoidPtr,
           IdentPtr, Int32, Int32, SizeTy, SizeTy, TaskRoutineEntryPtr, Int64)
-__OMP_RTL(__kmpc_taskred_modifier_init, false, /* kmp_taskgroup */ VoidPtr,
-          IdentPtr, /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr)
-__OMP_RTL(__kmpc_taskred_init, false, /* kmp_taskgroup */ VoidPtr,
-          /* Int */ Int32, /* Int */ Int32, VoidPtr)
+__OMP_RTL(__kmpc_taskred_modifier_init, false, VoidPtr, IdentPtr,
+          /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr)
+__OMP_RTL(__kmpc_taskred_init, false, VoidPtr, /* Int */ Int32,
+          /* Int */ Int32, VoidPtr)
 __OMP_RTL(__kmpc_task_reduction_modifier_fini, false, Void, IdentPtr,
           /* Int */ Int32, /* Int */ Int32)
 __OMP_RTL(__kmpc_task_reduction_get_th_data, false, VoidPtr, Int32, VoidPtr,
@@ -594,9 +594,7 @@ __OMP_RTL(__last, false, Void, )
 #undef __OMP_RTL
 #undef OMP_RTL
 
-#define ParamAttrs(...) ArrayRef<AttributeSet>({__VA_ARGS__})
 #define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind)
-#define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N)
 #define AttributeSet(...)                                                      \
   AttributeSet::get(Ctx, ArrayRef<Attribute>({__VA_ARGS__}))
 
@@ -609,88 +607,19 @@ __OMP_RTL(__last, false, Void, )
 __OMP_ATTRS_SET(GetterAttrs,
                 OptimisticAttributes
                     ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly),
-                                   EnumAttr(NoSync), EnumAttr(NoFree),
-                                   EnumAttr(InaccessibleMemOnly),
-                                   EnumAttr(WillReturn))
+                                   EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly))
                     : AttributeSet(EnumAttr(NoUnwind)))
 __OMP_ATTRS_SET(GetterArgWriteAttrs,
                 OptimisticAttributes
                     ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
-                                   EnumAttr(NoFree),
-                                   EnumAttr(InaccessibleMemOrArgMemOnly),
-                                   EnumAttr(WillReturn))
+                                   EnumAttr(NoFree), EnumAttr(InaccessibleMemOrArgMemOnly))
                     : AttributeSet(EnumAttr(NoUnwind)))
 __OMP_ATTRS_SET(SetterAttrs,
                 OptimisticAttributes
                     ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly),
-                                   EnumAttr(NoSync), EnumAttr(NoFree),
-                                   EnumAttr(InaccessibleMemOnly),
-                                   EnumAttr(WillReturn))
-                    : AttributeSet(EnumAttr(NoUnwind)))
-
-__OMP_ATTRS_SET(DefaultAttrs,
-                OptimisticAttributes
-                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
-                                   EnumAttr(WillReturn), EnumAttr(NoFree))
-                    : AttributeSet(EnumAttr(NoUnwind)))
-
-__OMP_ATTRS_SET(BarrierAttrs,
-                OptimisticAttributes
-                    ? AttributeSet(EnumAttr(NoUnwind))
-                    : AttributeSet(EnumAttr(NoUnwind)))
-
-__OMP_ATTRS_SET(InaccessibleArgOnlyAttrs,
-                OptimisticAttributes
-                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
-                                   EnumAttr(InaccessibleMemOrArgMemOnly),
-                                   EnumAttr(WillReturn), EnumAttr(NoFree))
-                    : AttributeSet(EnumAttr(NoUnwind)))
-
-__OMP_ATTRS_SET(InaccessibleOnlyAttrs,
-                OptimisticAttributes
-                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
-                                   EnumAttr(InaccessibleMemOnly),
-                                   EnumAttr(WillReturn), EnumAttr(NoFree))
-                    : AttributeSet(EnumAttr(NoUnwind)))
-
-__OMP_ATTRS_SET(AllocAttrs,
-                OptimisticAttributes
-                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
-                                   EnumAttr(WillReturn))
+                                   EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly))
                     : AttributeSet(EnumAttr(NoUnwind)))
 
-__OMP_ATTRS_SET(ForkAttrs, OptimisticAttributes
-                               ? AttributeSet(EnumAttr(NoUnwind))
-                               : AttributeSet(EnumAttr(NoUnwind)))
-
-__OMP_ATTRS_SET(ReadOnlyPtrAttrs,
-                OptimisticAttributes
-                    ? AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoFree),
-                                   EnumAttr(NoCapture))
-                    : AttributeSet())
-
-__OMP_ATTRS_SET(WriteOnlyPtrAttrs,
-                OptimisticAttributes
-                    ? AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoFree),
-                                   EnumAttr(NoCapture))
-                    : AttributeSet())
-
-__OMP_ATTRS_SET(ArgPtrAttrs,
-                OptimisticAttributes
-                    ? AttributeSet(EnumAttr(NoCapture), EnumAttr(NoFree))
-                    : AttributeSet())
-
-__OMP_ATTRS_SET(ReturnPtrAttrs,
-                OptimisticAttributes
-                    ? AttributeSet(EnumAttr(NoAlias))
-                    : AttributeSet())
-
-__OMP_ATTRS_SET(ReturnAlignedPtrAttrs,
-                OptimisticAttributes
-                    ? AttributeSet(EnumAttr(NoAlias), EnumAttrInt(Alignment, 8),
-                                   EnumAttrInt(DereferenceableOrNull, 8))
-                    : AttributeSet())
-
 #undef __OMP_ATTRS_SET
 #undef OMP_ATTRS_SET
 
@@ -701,309 +630,295 @@ __OMP_ATTRS_SET(ReturnAlignedPtrAttrs,
 #define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets)              \
   OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets)
 
-__OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_flush, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_fork_call, ForkAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_omp_taskwait, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_omp_taskyield, InaccessibleArgOnlyAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_push_num_threads, InaccessibleArgOnlyAttrs,
-                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_push_proc_bind, InaccessibleArgOnlyAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs,
-                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs,
-                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_omp_reg_task_with_affinity, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs,
-                           AttributeSet(), ReadOnlyPtrAttrs))
-
-__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(
-    omp_get_schedule, GetterArgWriteAttrs, AttributeSet(),
-    ParamAttrs(AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)),
-               AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))))
-__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_barrier, AttributeSet(), AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_cancel,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_cancel_barrier, AttributeSet(), AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_flush, AttributeSet(), AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_omp_taskwait, AttributeSet(), AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_omp_taskyield,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_push_num_threads,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_push_proc_bind,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_serialized_parallel,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_schedule, GetterArgWriteAttrs, AttributeSet(),
+                ArrayRef<AttributeSet>(
+                    {AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)),
+                     AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))}))
+__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(omp_get_supported_active_levels, GetterAttrs, AttributeSet(),
-                ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(),
-                ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(),
-                ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), ParamAttrs())
+                {})
+__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), {})
 __OMP_RTL_ATTRS(omp_get_place_proc_ids, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(AttributeSet(), AttributeSet(EnumAttr(NoCapture),
-                                                        EnumAttr(WriteOnly))))
-__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(),
-                ParamAttrs())
-__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(),
-                ParamAttrs())
-
-__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(),
-                ParamAttrs())
-
-__OMP_RTL_ATTRS(__kmpc_master, InaccessibleArgOnlyAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_end_master, InaccessibleArgOnlyAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_critical, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_critical_with_hint, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_end_critical, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
-
-__OMP_RTL_ATTRS(__kmpc_begin, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_end, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-
-__OMP_RTL_ATTRS(__kmpc_reduce, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           AttributeSet(), ReadOnlyPtrAttrs, AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_reduce_nowait, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           AttributeSet(), ReadOnlyPtrAttrs, AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_end_reduce, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
-
-__OMP_RTL_ATTRS(__kmpc_ordered, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_end_ordered, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-
-__OMP_RTL_ATTRS(__kmpc_for_static_init_4, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
-                           AttributeSet(), AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
-                           AttributeSet(), AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_for_static_init_8, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
-                           AttributeSet(), AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
-                           AttributeSet(), AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs,
-                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs,
-                AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, GetterArgWriteAttrs,
-                AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, GetterArgWriteAttrs,
-                AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, GetterArgWriteAttrs,
-                AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, InaccessibleArgOnlyAttrs,
-                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, InaccessibleArgOnlyAttrs,
-                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, InaccessibleArgOnlyAttrs,
-                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, InaccessibleArgOnlyAttrs,
-                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_team_static_init_4, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_team_static_init_8, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, GetterArgWriteAttrs,
-                AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
-                           ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, GetterArgWriteAttrs,
-                AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
-                           ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, GetterArgWriteAttrs,
-                AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
-                           ArgPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, GetterArgWriteAttrs,
-                AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
-                           ArgPtrAttrs))
-
-__OMP_RTL_ATTRS(__kmpc_single, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_end_single, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-
-__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, DefaultAttrs, ReturnPtrAttrs,
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_omp_task, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_end_taskgroup, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_taskgroup, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           AttributeSet(), ReadOnlyPtrAttrs, AttributeSet(),
-                           ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_taskloop, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           AttributeSet(), ArgPtrAttrs, ArgPtrAttrs,
-                           AttributeSet(), AttributeSet(), AttributeSet(),
-                           AttributeSet(), AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc, DefaultAttrs, ReturnPtrAttrs,
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs,
-                           AttributeSet()))
-__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init, DefaultAttrs, ReturnPtrAttrs,
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_taskred_init, DefaultAttrs, AttributeSet(), ParamAttrs())
-__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini, BarrierAttrs,
-                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data, DefaultAttrs, ReturnPtrAttrs,
-                ParamAttrs())
-__OMP_RTL_ATTRS(__kmpc_task_reduction_init, DefaultAttrs, ReturnPtrAttrs,
-                ParamAttrs())
-__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init, DefaultAttrs,
-                ReturnPtrAttrs, ParamAttrs())
-__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo, DefaultAttrs, AttributeSet(),
-                ParamAttrs())
-
-__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_cancellationpoint, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-
-__OMP_RTL_ATTRS(__kmpc_fork_teams, ForkAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_push_num_teams, InaccessibleArgOnlyAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-
-__OMP_RTL_ATTRS(__kmpc_copyprivate, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
-                           ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, DefaultAttrs, ReturnPtrAttrs,
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_threadprivate_register, DefaultAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs,
-                           ReadOnlyPtrAttrs, ReadOnlyPtrAttrs))
-
-__OMP_RTL_ATTRS(__kmpc_doacross_init, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_doacross_post, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
-__OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(),
-                ParamAttrs(ReadOnlyPtrAttrs))
-
-__OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, {})
-__OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {})
-__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper, ForkAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_register_requires, ForkAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs,
-        AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_end_mapper, ForkAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper, ForkAttrs,
-        AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_update_mapper, ForkAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs,
-        AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_mapper_num_components, ForkAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
-                ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs))
+                ArrayRef<AttributeSet>({AttributeSet(),
+                                        AttributeSet(EnumAttr(NoCapture),
+                                                     EnumAttr(WriteOnly))}))
+__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), {})
+
+__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_master,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_end_master,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_critical,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_critical_with_hint,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_end_critical,
+                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_begin, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_end, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_reduce, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_end_reduce, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_ordered, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_end_ordered, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_for_static_fini, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_team_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_team_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_single, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_end_single, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_omp_task, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_end_taskgroup, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_taskgroup, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_taskloop, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_taskred_init,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_task_reduction_init,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_cancellationpoint, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_fork_teams, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_push_num_teams, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_copyprivate, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_threadprivate_register, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_doacross_init, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_doacross_post, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_doacross_wait, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_doacross_fini, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_alloc, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_free, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_init_allocator, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_push_target_tripcount,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_mapper,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_nowait_mapper,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_teams_mapper,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_register_requires,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_end_mapper,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_update_mapper,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_mapper_num_components,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_push_mapper_component,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event,
+                AttributeSet(EnumAttr(NoUnwind)),
+                AttributeSet(), {})
 
 #undef __OMP_RTL_ATTRS
 #undef OMP_RTL_ATTRS
 #undef AttributeSet
 #undef EnumAttr
-#undef EnumAttrInt
-#undef ParamAttrs
 
 ///}
 
diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll
index e92447d79feac..6e10613ed43c3 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -890,373 +890,373 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; CHECK: ; Function Attrs: nounwind
 ; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #0
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) #0
+; CHECK-NOT: Function Attrs
+; CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) #0
+; CHECK-NOT: Function Attrs
+; CHECK: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) #0
+; CHECK-NOT: Function Attrs
+; CHECK: declare void @__kmpc_flush(%struct.ident_t*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) #0
+; CHECK-NOT: Function Attrs
+; CHECK: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) #0
+; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
+; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_register_requires(i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_register_requires(i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
 
-; CHECK: ; Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) #0
+; CHECK: Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_dynamic(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_nested(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_max_active_levels(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() #1
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local void @use_int(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) #2
 
 ; OPTIMISTIC-NOT: Function Attrs
@@ -1418,299 +1418,8 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t* nocapture nofree readonly, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t* nocapture nofree readonly, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t* nocapture nofree readonly, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_master(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_begin(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_omp_task_alloc(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i32 (i32, i8*)* nocapture nofree readonly)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t* nocapture nofree readonly, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t* nocapture nofree readonly, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t* nocapture nofree readonly, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t* nocapture nofree readonly, i32, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_taskloop(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i64* nocapture nofree, i64* nocapture nofree, i64, i32, i32, i64, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i32 (i32, i8*)* nocapture nofree readonly, i64)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_taskred_modifier_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t* nocapture nofree readonly, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, i32)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_threadprivate_cached(%struct.ident_t* nocapture nofree readonly, i32, i8*, i64, i8***)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_alloc(i32, i64, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_free(i32, i8*, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
-; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_register_requires(i64)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare i64 @__tgt_mapper_num_components(i8*)
-
-; OPTIMISTIC: ; Function Attrs: nounwind
-; OPTIMISTIC-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_allow_completion_event(%struct.ident_t* nocapture nofree readonly, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_init(i32, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
-
-; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
-; OPTIMISTIC-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*)
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
index 07976660546f8..4d2f8e7cbc5e8 100644
--- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
@@ -393,7 +393,7 @@ entry:
 
 define internal void @.omp.reduction.reduction_func(i8* %arg, i8* %arg1) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
-; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #{{[0-9]+}}
+; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #10
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32**
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8

From 7fcc1bb4b654461c3109b01e1fe7eae191a86f7f Mon Sep 17 00:00:00 2001
From: Michael Spencer <bigcheesegs@gmail.com>
Date: Fri, 17 Jul 2020 16:12:18 -0600
Subject: [PATCH 682/771] [clangd] Fix the build with clang <3.9.

In clang <3.9 the `unique_ptr` constructor that is supposed to allow
for Derived to Base conversion does not work. Remove this if we drop
support for such configurations.

This is the same fix as in fda901a987ddd, and it updates the comments
to better reflect the actual issue. The same thing reproduces with
libc++ with older clangs.
---
 clang-tools-extra/clangd/ConfigProvider.cpp |  6 +++++-
 llvm/utils/TableGen/OptParserEmitter.cpp    | 16 ++++++++--------
 2 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/clang-tools-extra/clangd/ConfigProvider.cpp b/clang-tools-extra/clangd/ConfigProvider.cpp
index eec1ae9921947..a56cdd755322a 100644
--- a/clang-tools-extra/clangd/ConfigProvider.cpp
+++ b/clang-tools-extra/clangd/ConfigProvider.cpp
@@ -209,7 +209,11 @@ Provider::combine(std::vector<const Provider *> Providers) {
   };
   auto Result = std::make_unique<CombinedProvider>();
   Result->Providers = std::move(Providers);
-  return Result;
+  // FIXME: This is a workaround for a bug in older versions of clang (< 3.9)
+  //   The constructor that is supposed to allow for Derived to Base
+  //   conversion does not work. Remove this if we drop support for such
+  //   configurations.
+  return std::unique_ptr<Provider>(Result.release());
 }
 
 Config Provider::getConfig(const Params &P, DiagnosticCallback DC) const {
diff --git a/llvm/utils/TableGen/OptParserEmitter.cpp b/llvm/utils/TableGen/OptParserEmitter.cpp
index 34699b55e274b..6e49e248e4b81 100644
--- a/llvm/utils/TableGen/OptParserEmitter.cpp
+++ b/llvm/utils/TableGen/OptParserEmitter.cpp
@@ -110,10 +110,10 @@ class MarshallingFlagInfo final : public MarshallingKindInfo {
   static std::unique_ptr<MarshallingKindInfo> create(const Record &R) {
     std::unique_ptr<MarshallingFlagInfo> Ret(new MarshallingFlagInfo(R));
     Ret->IsPositive = R.getValueAsBit("IsPositive");
-    // FIXME: This is a workaround for a bug in older versions of libstdc++ when
-    //   compiled with Clang. The constructor that is supposed to allow for
-    //   Derived to Base conversion does not work. Remove this if we drop
-    //   support for such configurations.
+    // FIXME: This is a workaround for a bug in older versions of clang (< 3.9)
+    //   The constructor that is supposed to allow for Derived to Base
+    //   conversion does not work. Remove this if we drop support for such
+    //   configurations.
     return std::unique_ptr<MarshallingKindInfo>(Ret.release());
   }
 
@@ -208,10 +208,10 @@ struct SimpleEnumValueTable {
              "values");
     }
 
-    // FIXME: This is a workaround for a bug in older versions of libstdc++ when
-    //   compiled with Clang. The constructor that is supposed to allow for
-    //   Derived to Base conversion does not work. Remove this if we drop
-    //   support for such configurations.
+    // FIXME: This is a workaround for a bug in older versions of clang (< 3.9)
+    //   The constructor that is supposed to allow for Derived to Base
+    //   conversion does not work. Remove this if we drop support for such
+    //   configurations.
     return std::unique_ptr<MarshallingKindInfo>(Ret.release());
   }
 

From 86fb2db49b33aa4759d351b30ea1f6ebbe252c60 Mon Sep 17 00:00:00 2001
From: AndreyChurbanov <andrey.churbanov@intel.com>
Date: Sat, 18 Jul 2020 01:15:37 +0300
Subject: [PATCH 683/771] [OpenMP] libomp cleanup: check presence of hwloc
 objects CORE, PACKAGE

hwloc documentation guarantees the only object that is always present
in the topology is PU. We can check the presence of other objects
in the topology, just in case.

Differential Revision: https://reviews.llvm.org/D84065
---
 openmp/runtime/src/kmp_affinity.cpp | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp
index 47e70477ced6c..f78288710349a 100644
--- a/openmp/runtime/src/kmp_affinity.cpp
+++ b/openmp/runtime/src/kmp_affinity.cpp
@@ -577,11 +577,17 @@ static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
     // Hack to try and infer the machine topology using only the data
     // available from cpuid on the current thread, and __kmp_xproc.
     KMP_ASSERT(__kmp_affinity_type == affinity_none);
-
-    nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
-        hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE);
-    __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
-        hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU);
+    // hwloc only guarantees existance of PU object, so check PACKAGE and CORE
+    hwloc_obj_t o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PACKAGE, 0);
+    if (o != NULL)
+      nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_CORE);
+    else
+      nCoresPerPkg = 1; // no PACKAGE found
+    o = hwloc_get_obj_by_type(tp, HWLOC_OBJ_CORE, 0);
+    if (o != NULL)
+      __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(o, HWLOC_OBJ_PU);
+    else
+      __kmp_nThreadsPerCore = 1; // no CORE found
     __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
     nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
     if (__kmp_affinity_verbose) {

From cf2274b779f5ffee476cfe40994e6963a51c6428 Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Fri, 17 Jul 2020 11:21:08 -0700
Subject: [PATCH 684/771] [flang] Allow ! and // comments after some
 preprocessing directives

Old-style C /*comments*/ are omitted from preprocessor directive
token sequences by the prescanner, but line-ending C++ and Fortran
free-form comments are not since their handling might depend on
the directive.  Add code to skip these line-ending comments as
appropriate in place of existing code that just skipped blanks.

Reviewed By: sscalpone

Differential Revision: https://reviews.llvm.org/D84061
---
 flang/lib/Parser/preprocessor.cpp     | 16 +++++++---------
 flang/lib/Parser/token-sequence.cpp   | 25 +++++++++++++++++++++++++
 flang/lib/Parser/token-sequence.h     |  4 ++++
 flang/test/Parser/pp-dir-comments.f90 | 19 +++++++++++++++++++
 4 files changed, 55 insertions(+), 9 deletions(-)
 create mode 100644 flang/test/Parser/pp-dir-comments.f90

diff --git a/flang/lib/Parser/preprocessor.cpp b/flang/lib/Parser/preprocessor.cpp
index 3b09597ddeb75..a1f07967d9b08 100644
--- a/flang/lib/Parser/preprocessor.cpp
+++ b/flang/lib/Parser/preprocessor.cpp
@@ -453,10 +453,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
           "# missing or invalid name"_err_en_US);
     } else {
-      j = dir.SkipBlanks(j + 1);
-      if (j != tokens) {
+      if (dir.IsAnythingLeft(++j)) {
         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
-            "#undef: excess tokens at end of directive"_err_en_US);
+            "#undef: excess tokens at end of directive"_en_US);
       } else {
         definitions_.erase(nameToken);
       }
@@ -468,8 +467,7 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
           dir.GetIntervalProvenanceRange(dirOffset, tokens - dirOffset),
           "#%s: missing name"_err_en_US, dirName);
     } else {
-      j = dir.SkipBlanks(j + 1);
-      if (j != tokens) {
+      if (dir.IsAnythingLeft(++j)) {
         prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
             "#%s: excess tokens at end of directive"_en_US, dirName);
       }
@@ -489,9 +487,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
           dir.GetTokenProvenanceRange(dirOffset));
     }
   } else if (dirName == "else") {
-    if (j != tokens) {
+    if (dir.IsAnythingLeft(j)) {
       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
-          "#else: excess tokens at end of directive"_err_en_US);
+          "#else: excess tokens at end of directive"_en_US);
     } else if (ifStack_.empty()) {
       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
           "#else: not nested within #if, #ifdef, or #ifndef"_err_en_US);
@@ -516,9 +514,9 @@ void Preprocessor::Directive(const TokenSequence &dir, Prescanner *prescanner) {
           dir.GetTokenProvenanceRange(dirOffset));
     }
   } else if (dirName == "endif") {
-    if (j != tokens) {
+    if (dir.IsAnythingLeft(j)) {
       prescanner->Say(dir.GetIntervalProvenanceRange(j, tokens - j),
-          "#endif: excess tokens at end of directive"_err_en_US);
+          "#endif: excess tokens at end of directive"_en_US);
     } else if (ifStack_.empty()) {
       prescanner->Say(dir.GetTokenProvenanceRange(dirOffset),
           "#endif: no #if, #ifdef, or #ifndef"_err_en_US);
diff --git a/flang/lib/Parser/token-sequence.cpp b/flang/lib/Parser/token-sequence.cpp
index ce94f26235013..07c5b12e5f759 100644
--- a/flang/lib/Parser/token-sequence.cpp
+++ b/flang/lib/Parser/token-sequence.cpp
@@ -56,6 +56,31 @@ std::size_t TokenSequence::SkipBlanks(std::size_t at) const {
   return tokens; // even if at > tokens
 }
 
+// C-style /*comments*/ are removed from preprocessing directive
+// token sequences by the prescanner, but not C++ or Fortran
+// free-form line-ending comments (//...  and !...) because
+// ignoring them is directive-specific.
+bool TokenSequence::IsAnythingLeft(std::size_t at) const {
+  std::size_t tokens{start_.size()};
+  for (; at < tokens; ++at) {
+    auto tok{TokenAt(at)};
+    const char *end{tok.end()};
+    for (const char *p{tok.begin()}; p < end; ++p) {
+      switch (*p) {
+      case '/':
+        return p + 1 >= end || p[1] != '/';
+      case '!':
+        return false;
+      case ' ':
+        break;
+      default:
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 void TokenSequence::RemoveLastToken() {
   CHECK(!start_.empty());
   CHECK(nextStart_ > start_.back());
diff --git a/flang/lib/Parser/token-sequence.h b/flang/lib/Parser/token-sequence.h
index d73b3c20be6f4..d98c0b955c5e9 100644
--- a/flang/lib/Parser/token-sequence.h
+++ b/flang/lib/Parser/token-sequence.h
@@ -71,6 +71,10 @@ class TokenSequence {
 
   std::size_t SkipBlanks(std::size_t) const;
 
+  // True if anything remains in the sequence at & after the given offset
+  // except blanks and line-ending C++ and Fortran free-form comments.
+  bool IsAnythingLeft(std::size_t) const;
+
   void PutNextTokenChar(char ch, Provenance provenance) {
     char_.emplace_back(ch);
     provenances_.Put({provenance, 1});
diff --git a/flang/test/Parser/pp-dir-comments.f90 b/flang/test/Parser/pp-dir-comments.f90
new file mode 100644
index 0000000000000..f5fe4ca5c71e8
--- /dev/null
+++ b/flang/test/Parser/pp-dir-comments.f90
@@ -0,0 +1,19 @@
+! RUN: %f18 -funparse %s 2>&1 | FileCheck %s
+
+#define pmk
+#ifdef pmk // comment
+! CHECK: t1
+real t1
+#endif // comment
+#undef pmk ! comment
+#ifndef pmk ! comment
+! CHECK: t2
+real t2
+#endif // comment
+#if 0 /* C comment */ + 0
+! CHECK-NOT: misinterpreted
+# error misinterpreted #if
+#else // comment
+! CHECK: END PROGRAM
+end
+#endif ! comment

From cf5df40c4cf1a53a02ab1d56a488642e3dda8f6d Mon Sep 17 00:00:00 2001
From: Leonard Chan <leonardchan@google.com>
Date: Fri, 17 Jul 2020 15:29:50 -0700
Subject: [PATCH 685/771] Revert "[AddressSanitizer] Don't use weak linkage for
 __{start,stop}_asan_globals"

This reverts commit d76e62fdb7a93d9a33f642b6b528f2562cc3c3f4.

Reverting since this can lead to linker errors:

```
ld.lld: error: undefined hidden symbol: __start_asan_globals
```

when using --gc-sections. The linker can discard __start_asan_globals
once there are no more `asan_globals` sections left, which can lead to
this error if we have external linkages to them.
---
 .../Instrumentation/AddressSanitizer.cpp      | 38 ++++++-------------
 .../AddressSanitizer/global_metadata.ll       |  4 --
 2 files changed, 11 insertions(+), 31 deletions(-)

diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index ee09a4d9db7e1..7516a64c6a354 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -2103,23 +2103,10 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF(
     SetComdatForGlobalMetadata(G, Metadata, UniqueModuleId);
   }
 
-  // This should never be called when there are no globals, by the logic that
-  // computes the UniqueModuleId string, which is "" when there are no globals.
-  // It's important that this path is only used when there are actually some
-  // globals, because that means that there will certainly be a live
-  // `asan_globals` input section at link time and thus `__start_asan_globals`
-  // and `__stop_asan_globals` symbols will definitely be defined at link time.
-  // This means there's no need for the references to them to be weak, which
-  // enables better code generation because ExternalWeakLinkage implies
-  // isInterposable() and thus requires GOT indirection for PIC.  Since these
-  // are known-defined hidden/dso_local symbols, direct PIC accesses without
-  // dynamic relocation are always sufficient.
-  assert(!MetadataGlobals.empty());
-  assert(!UniqueModuleId.empty());
-
   // Update llvm.compiler.used, adding the new metadata globals. This is
   // needed so that during LTO these variables stay alive.
-  appendToCompilerUsed(M, MetadataGlobals);
+  if (!MetadataGlobals.empty())
+    appendToCompilerUsed(M, MetadataGlobals);
 
   // RegisteredFlag serves two purposes. First, we can pass it to dladdr()
   // to look up the loaded image that contains it. Second, we can store in it
@@ -2132,18 +2119,15 @@ void ModuleAddressSanitizer::InstrumentGlobalsELF(
       ConstantInt::get(IntptrTy, 0), kAsanGlobalsRegisteredFlagName);
   RegisteredFlag->setVisibility(GlobalVariable::HiddenVisibility);
 
-  // Create start and stop symbols.  These are known to be defined by
-  // the linker, see comment above.
-  auto MakeStartStopGV = [&](const char *Prefix) {
-    GlobalVariable *StartStop =
-        new GlobalVariable(M, IntptrTy, false, GlobalVariable::ExternalLinkage,
-                           nullptr, Prefix + getGlobalMetadataSection());
-    StartStop->setVisibility(GlobalVariable::HiddenVisibility);
-    assert(StartStop->isImplicitDSOLocal());
-    return StartStop;
-  };
-  GlobalVariable *StartELFMetadata = MakeStartStopGV("__start_");
-  GlobalVariable *StopELFMetadata = MakeStartStopGV("__stop_");
+  // Create start and stop symbols.
+  GlobalVariable *StartELFMetadata = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+      "__start_" + getGlobalMetadataSection());
+  StartELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
+  GlobalVariable *StopELFMetadata = new GlobalVariable(
+      M, IntptrTy, false, GlobalVariable::ExternalWeakLinkage, nullptr,
+      "__stop_" + getGlobalMetadataSection());
+  StopELFMetadata->setVisibility(GlobalVariable::HiddenVisibility);
 
   // Create a call to register the globals with the runtime.
   IRB.CreateCall(AsanRegisterElfGlobals,
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll
index 4a6f426443769..ea9f2cf3f1a9d 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll
@@ -28,10 +28,6 @@ target triple = "x86_64-unknown-linux-gnu"
 ; during LTO.
 ; CHECK: @llvm.compiler.used {{.*}} @__asan_global_global {{.*}} section "llvm.metadata"
 
-; Check that start and stop symbols will be accessed as dso_local.
-; CHECK: @__start_asan_globals = external hidden global i64
-; CHECK: @__stop_asan_globals = external hidden global i64
-
 ; Check that location descriptors and global names were passed into __asan_register_globals:
 ; CHECK: call void @__asan_register_elf_globals(i64 ptrtoint (i64* @___asan_globals_registered to i64), i64 ptrtoint (i64* @__start_asan_globals to i64), i64 ptrtoint (i64* @__stop_asan_globals to i64))
 

From 63c081e73d3d6d75ab6c6eefa37a69d73f46ed0f Mon Sep 17 00:00:00 2001
From: Aditya Nandakumar <aditya_nandakumar@apple.com>
Date: Fri, 17 Jul 2020 16:03:20 -0700
Subject: [PATCH 686/771] [GISel: Add support for CSEing SrcOps which are
 immediates

https://reviews.llvm.org/D84072

Add G_EXTRACT to CSEConfigFull and add unit test as well.
---
 llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp       | 1 +
 llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp | 3 +++
 llvm/unittests/CodeGen/GlobalISel/CSETest.cpp | 9 +++++++++
 3 files changed, 13 insertions(+)

diff --git a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
index c4d8777615d27..071cc5b737358 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEInfo.cpp
@@ -59,6 +59,7 @@ bool CSEConfigFull::shouldCSEOpc(unsigned Opc) {
   case TargetOpcode::G_UNMERGE_VALUES:
   case TargetOpcode::G_TRUNC:
   case TargetOpcode::G_PTR_ADD:
+  case TargetOpcode::G_EXTRACT:
     return true;
   }
   return false;
diff --git a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
index 88173dc4d302c..6f8fd309e1b58 100644
--- a/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CSEMIRBuilder.cpp
@@ -70,6 +70,9 @@ void CSEMIRBuilder::profileDstOp(const DstOp &Op,
 void CSEMIRBuilder::profileSrcOp(const SrcOp &Op,
                                  GISelInstProfileBuilder &B) const {
   switch (Op.getSrcOpKind()) {
+  case SrcOp::SrcType::Ty_Imm:
+    B.addNodeIDImmediate(static_cast<int64_t>(Op.getImm()));
+    break;
   case SrcOp::SrcType::Ty_Predicate:
     B.addNodeIDImmediate(static_cast<int64_t>(Op.getPredicate()));
     break;
diff --git a/llvm/unittests/CodeGen/GlobalISel/CSETest.cpp b/llvm/unittests/CodeGen/GlobalISel/CSETest.cpp
index 556f4f29b992e..69c21d6afb009 100644
--- a/llvm/unittests/CodeGen/GlobalISel/CSETest.cpp
+++ b/llvm/unittests/CodeGen/GlobalISel/CSETest.cpp
@@ -96,6 +96,15 @@ TEST_F(AArch64GISelMITest, TestCSE) {
   auto CSEFMul =
       CSEB.buildInstr(TargetOpcode::G_AND, {s32}, {Copies[0], Copies[1]});
   EXPECT_EQ(&*CSEFMul, &*NonCSEFMul);
+
+  auto ExtractMIB = CSEB.buildInstr(TargetOpcode::G_EXTRACT, {s16},
+                                    {Copies[0], static_cast<uint64_t>(0)});
+  auto ExtractMIB1 = CSEB.buildInstr(TargetOpcode::G_EXTRACT, {s16},
+                                     {Copies[0], static_cast<uint64_t>(0)});
+  auto ExtractMIB2 = CSEB.buildInstr(TargetOpcode::G_EXTRACT, {s16},
+                                     {Copies[0], static_cast<uint64_t>(1)});
+  EXPECT_EQ(&*ExtractMIB, &*ExtractMIB1);
+  EXPECT_NE(&*ExtractMIB, &*ExtractMIB2);
 }
 
 TEST_F(AArch64GISelMITest, TestCSEConstantConfig) {

From 53880b8cb9c61e81457d13c0adefe51ff41664fa Mon Sep 17 00:00:00 2001
From: Michele Scandale <michele.scandale@gmail.com>
Date: Fri, 17 Jul 2020 16:43:05 -0700
Subject: [PATCH 687/771] [CMake] Make `intrinsics_gen` dependency
 unconditional.

The `intrinsics_gen` target exists in the CMake exports since r309389
(see LLVMConfig.cmake.in), hence projects can depend on `intrinsics_gen`
even it they are built separately from LLVM.

Reviewed By: MaskRay, JDevlieghere

Differential Revision: https://reviews.llvm.org/D83454
---
 clang/CMakeLists.txt                                  |  2 +-
 clang/lib/CodeGen/CMakeLists.txt                      | 11 +----------
 clang/lib/Frontend/CMakeLists.txt                     |  7 +------
 clang/tools/clang-fuzzer/handle-llvm/CMakeLists.txt   |  8 +-------
 clang/tools/clang-import-test/CMakeLists.txt          |  6 +-----
 clang/tools/clang-offload-bundler/CMakeLists.txt      |  6 +-----
 clang/tools/clang-offload-wrapper/CMakeLists.txt      |  6 +-----
 clang/tools/driver/CMakeLists.txt                     |  6 +-----
 lld/COFF/CMakeLists.txt                               |  6 +-----
 lld/Common/CMakeLists.txt                             |  6 +-----
 lld/ELF/CMakeLists.txt                                |  6 +-----
 lld/MinGW/CMakeLists.txt                              |  6 +-----
 lld/lib/Core/CMakeLists.txt                           |  6 +-----
 lld/wasm/CMakeLists.txt                               |  6 +-----
 lldb/CMakeLists.txt                                   |  2 +-
 lldb/source/Expression/CMakeLists.txt                 |  6 +-----
 .../Plugins/ExpressionParser/Clang/CMakeLists.txt     |  6 +-----
 .../RenderScript/RenderScriptRuntime/CMakeLists.txt   |  7 +------
 18 files changed, 18 insertions(+), 91 deletions(-)

diff --git a/clang/CMakeLists.txt b/clang/CMakeLists.txt
index 7f8e0718c2ebc..948452661a32f 100644
--- a/clang/CMakeLists.txt
+++ b/clang/CMakeLists.txt
@@ -531,7 +531,7 @@ list(APPEND LLVM_COMMON_DEPENDS clang-tablegen-targets)
 # Force target to be built as soon as possible. Clang modules builds depend
 # header-wise on it as they ship all headers from the umbrella folders. Building
 # an entire module might include header, which depends on intrinsics_gen.
-if(LLVM_ENABLE_MODULES AND NOT CLANG_BUILT_STANDALONE)
+if(LLVM_ENABLE_MODULES)
   list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen)
 endif()
 
diff --git a/clang/lib/CodeGen/CMakeLists.txt b/clang/lib/CodeGen/CMakeLists.txt
index c4bedf34921cc..8afd7219fbe1e 100644
--- a/clang/lib/CodeGen/CMakeLists.txt
+++ b/clang/lib/CodeGen/CMakeLists.txt
@@ -26,15 +26,6 @@ set(LLVM_LINK_COMPONENTS
   TransformUtils
   )
 
-# In a standard Clang+LLVM build, we need to generate intrinsics before
-# building codegen. In a standalone build, LLVM is already built and we don't
-# need this dependency. Furthermore, LLVM doesn't export it so we can't have
-# this dependency.
-set(codegen_deps intrinsics_gen)
-if (CLANG_BUILT_STANDALONE)
-  set(codegen_deps)
-endif()
-
 if (MSVC)
   set_source_files_properties(CodeGenModule.cpp PROPERTIES COMPILE_FLAGS /bigobj)
 endif()
@@ -99,7 +90,7 @@ add_clang_library(clangCodeGen
   VarBypassDetector.cpp
 
   DEPENDS
-  ${codegen_deps}
+  intrinsics_gen
 
   LINK_LIBS
   clangAnalysis
diff --git a/clang/lib/Frontend/CMakeLists.txt b/clang/lib/Frontend/CMakeLists.txt
index 0e23b92e2dea9..af5446618b036 100644
--- a/clang/lib/Frontend/CMakeLists.txt
+++ b/clang/lib/Frontend/CMakeLists.txt
@@ -8,11 +8,6 @@ set(LLVM_LINK_COMPONENTS
   Support
   )
 
-set(optional_deps intrinsics_gen)
-if (CLANG_BUILT_STANDALONE)
-  set(optional_deps)
-endif()
-
 add_clang_library(clangFrontend
   ASTConsumers.cpp
   ASTMerge.cpp
@@ -49,7 +44,7 @@ add_clang_library(clangFrontend
 
   DEPENDS
   ClangDriverOptions
-  ${optional_deps}
+  intrinsics_gen
 
   LINK_LIBS
   clangAST
diff --git a/clang/tools/clang-fuzzer/handle-llvm/CMakeLists.txt b/clang/tools/clang-fuzzer/handle-llvm/CMakeLists.txt
index 47f9fdf68f409..9ceb1d3318283 100644
--- a/clang/tools/clang-fuzzer/handle-llvm/CMakeLists.txt
+++ b/clang/tools/clang-fuzzer/handle-llvm/CMakeLists.txt
@@ -16,15 +16,9 @@ set(LLVM_LINK_COMPONENTS
   native
 )
 
-# Depend on LLVM IR intrinsic generation.
-set(handle_llvm_deps intrinsics_gen)
-if (CLANG_BUILT_STANDALONE)
-  set(handle_llvm_deps)
-endif()
-
 add_clang_library(clangHandleLLVM
   handle_llvm.cpp
 
   DEPENDS
-  ${handle_llvm_deps}
+  intrinsics_gen
   )
diff --git a/clang/tools/clang-import-test/CMakeLists.txt b/clang/tools/clang-import-test/CMakeLists.txt
index 4ccc2d752aac4..e459de8f635f5 100644
--- a/clang/tools/clang-import-test/CMakeLists.txt
+++ b/clang/tools/clang-import-test/CMakeLists.txt
@@ -3,14 +3,10 @@ set(LLVM_LINK_COMPONENTS
   Support
   )
 
-if(NOT CLANG_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_clang_executable(clang-import-test
   clang-import-test.cpp
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
   )
 
 set(CLANG_IMPORT_TEST_LIB_DEPS
diff --git a/clang/tools/clang-offload-bundler/CMakeLists.txt b/clang/tools/clang-offload-bundler/CMakeLists.txt
index 4ef099493364a..2738bf02e729f 100644
--- a/clang/tools/clang-offload-bundler/CMakeLists.txt
+++ b/clang/tools/clang-offload-bundler/CMakeLists.txt
@@ -1,14 +1,10 @@
 set(LLVM_LINK_COMPONENTS Object Support)
 
-if(NOT CLANG_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_clang_tool(clang-offload-bundler
   ClangOffloadBundler.cpp
   
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
   )
 
 set(CLANG_OFFLOAD_BUNDLER_LIB_DEPS
diff --git a/clang/tools/clang-offload-wrapper/CMakeLists.txt b/clang/tools/clang-offload-wrapper/CMakeLists.txt
index 6f8940f88eabd..8bcb46267a37c 100644
--- a/clang/tools/clang-offload-wrapper/CMakeLists.txt
+++ b/clang/tools/clang-offload-wrapper/CMakeLists.txt
@@ -1,14 +1,10 @@
 set(LLVM_LINK_COMPONENTS BitWriter Core Support TransformUtils)
 
-if(NOT CLANG_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_clang_tool(clang-offload-wrapper
   ClangOffloadWrapper.cpp
 
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
   )
 
 set(CLANG_OFFLOAD_WRAPPER_LIB_DEPS
diff --git a/clang/tools/driver/CMakeLists.txt b/clang/tools/driver/CMakeLists.txt
index c53485ef19576..01efebdcb929c 100644
--- a/clang/tools/driver/CMakeLists.txt
+++ b/clang/tools/driver/CMakeLists.txt
@@ -24,10 +24,6 @@ if(CLANG_PLUGIN_SUPPORT)
   set(support_plugins SUPPORT_PLUGINS)
 endif()
 
-if(NOT CLANG_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_clang_tool(clang
   driver.cpp
   cc1_main.cpp
@@ -35,7 +31,7 @@ add_clang_tool(clang
   cc1gen_reproducer_main.cpp
 
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
   ${support_plugins}
   )
 
diff --git a/lld/COFF/CMakeLists.txt b/lld/COFF/CMakeLists.txt
index 4592ace373efa..796f7a82a3de1 100644
--- a/lld/COFF/CMakeLists.txt
+++ b/lld/COFF/CMakeLists.txt
@@ -2,10 +2,6 @@ set(LLVM_TARGET_DEFINITIONS Options.td)
 tablegen(LLVM Options.inc -gen-opt-parser-defs)
 add_public_tablegen_target(COFFOptionsTableGen)
 
-if(NOT LLD_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_lld_library(lldCOFF
   Chunks.cpp
   DebugTypes.cpp
@@ -48,5 +44,5 @@ add_lld_library(lldCOFF
 
   DEPENDS
   COFFOptionsTableGen
-  ${tablegen_deps}
+  intrinsics_gen
   )
diff --git a/lld/Common/CMakeLists.txt b/lld/Common/CMakeLists.txt
index 53649032bd987..212328b784c5b 100644
--- a/lld/Common/CMakeLists.txt
+++ b/lld/Common/CMakeLists.txt
@@ -1,7 +1,3 @@
-if(NOT LLD_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 find_first_existing_vc_file("${LLVM_MAIN_SRC_DIR}" llvm_vc)
 find_first_existing_vc_file("${LLD_SOURCE_DIR}" lld_vc)
 
@@ -57,5 +53,5 @@ add_lld_library(lldCommon
   ${LLVM_PTHREAD_LIB}
 
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
   )
diff --git a/lld/ELF/CMakeLists.txt b/lld/ELF/CMakeLists.txt
index b89f4436288aa..f85d0fb9f55e3 100644
--- a/lld/ELF/CMakeLists.txt
+++ b/lld/ELF/CMakeLists.txt
@@ -2,10 +2,6 @@ set(LLVM_TARGET_DEFINITIONS Options.td)
 tablegen(LLVM Options.inc -gen-opt-parser-defs)
 add_public_tablegen_target(ELFOptionsTableGen)
 
-if(NOT LLD_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_lld_library(lldELF
   AArch64ErrataFix.cpp
   Arch/AArch64.cpp
@@ -66,5 +62,5 @@ add_lld_library(lldELF
 
   DEPENDS
   ELFOptionsTableGen
-  ${tablegen_deps}
+  intrinsics_gen
   )
diff --git a/lld/MinGW/CMakeLists.txt b/lld/MinGW/CMakeLists.txt
index bb0fe4a3887d7..1dc04d73eca65 100644
--- a/lld/MinGW/CMakeLists.txt
+++ b/lld/MinGW/CMakeLists.txt
@@ -2,10 +2,6 @@ set(LLVM_TARGET_DEFINITIONS Options.td)
 tablegen(LLVM Options.inc -gen-opt-parser-defs)
 add_public_tablegen_target(MinGWOptionsTableGen)
 
-if(NOT LLD_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_lld_library(lldMinGW
   Driver.cpp
 
@@ -19,5 +15,5 @@ add_lld_library(lldMinGW
 
   DEPENDS
   MinGWOptionsTableGen
-  ${tablegen_deps}
+  intrinsics_gen
 )
diff --git a/lld/lib/Core/CMakeLists.txt b/lld/lib/Core/CMakeLists.txt
index 2d4d9ded08862..d5e507536b720 100644
--- a/lld/lib/Core/CMakeLists.txt
+++ b/lld/lib/Core/CMakeLists.txt
@@ -1,7 +1,3 @@
-if(NOT LLD_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_lld_library(lldCore
   DefinedAtom.cpp
   Error.cpp
@@ -24,5 +20,5 @@ add_lld_library(lldCore
   ${LLVM_PTHREAD_LIB}
 
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
   )
diff --git a/lld/wasm/CMakeLists.txt b/lld/wasm/CMakeLists.txt
index d2ba862c1e4a0..cd46f0a826ac9 100644
--- a/lld/wasm/CMakeLists.txt
+++ b/lld/wasm/CMakeLists.txt
@@ -2,10 +2,6 @@ set(LLVM_TARGET_DEFINITIONS Options.td)
 tablegen(LLVM Options.inc -gen-opt-parser-defs)
 add_public_tablegen_target(WasmOptionsTableGen)
 
-if(NOT LLD_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_lld_library(lldWasm
   Driver.cpp
   InputChunks.cpp
@@ -37,5 +33,5 @@ add_lld_library(lldWasm
 
   DEPENDS
   WasmOptionsTableGen
-  ${tablegen_deps}
+  intrinsics_gen
   )
diff --git a/lldb/CMakeLists.txt b/lldb/CMakeLists.txt
index bf748020ea407..b1c0597cf3b3f 100644
--- a/lldb/CMakeLists.txt
+++ b/lldb/CMakeLists.txt
@@ -64,7 +64,7 @@ endif ()
 # some of these generated headers. This approach is copied from Clang's main
 # CMakeLists.txt, so it should kept in sync the code in Clang which was added
 # in llvm-svn 308844.
-if(LLVM_ENABLE_MODULES AND NOT LLDB_BUILT_STANDALONE)
+if(LLVM_ENABLE_MODULES)
   list(APPEND LLVM_COMMON_DEPENDS intrinsics_gen)
 endif()
 
diff --git a/lldb/source/Expression/CMakeLists.txt b/lldb/source/Expression/CMakeLists.txt
index 7e2f19ed5b09a..bf94361dd6c19 100644
--- a/lldb/source/Expression/CMakeLists.txt
+++ b/lldb/source/Expression/CMakeLists.txt
@@ -1,7 +1,3 @@
-if(NOT LLDB_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_lldb_library(lldbExpression
   DiagnosticManager.cpp
   DWARFExpression.cpp
@@ -18,7 +14,7 @@ add_lldb_library(lldbExpression
   UtilityFunction.cpp
 
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
 
   LINK_LIBS
     lldbCore
diff --git a/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt b/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt
index 69696b9aa76ad..04f6cdf9d9bd5 100644
--- a/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt
+++ b/lldb/source/Plugins/ExpressionParser/Clang/CMakeLists.txt
@@ -1,7 +1,3 @@
-if(NOT LLDB_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
 add_lldb_library(lldbPluginExpressionParserClang
   ASTResultSynthesizer.cpp
   ASTStructExtractor.cpp
@@ -29,7 +25,7 @@ add_lldb_library(lldbPluginExpressionParserClang
   NameSearchContext.cpp
 
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
 
   LINK_LIBS
     lldbCore
diff --git a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/CMakeLists.txt b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/CMakeLists.txt
index c122e09e8febe..9efb2c44d846e 100644
--- a/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/CMakeLists.txt
+++ b/lldb/source/Plugins/LanguageRuntime/RenderScript/RenderScriptRuntime/CMakeLists.txt
@@ -1,8 +1,3 @@
-if(NOT LLDB_BUILT_STANDALONE)
-  set(tablegen_deps intrinsics_gen)
-endif()
-
-
 add_lldb_library(lldbPluginRenderScriptRuntime PLUGIN
   RenderScriptRuntime.cpp
   RenderScriptExpressionOpts.cpp
@@ -10,7 +5,7 @@ add_lldb_library(lldbPluginRenderScriptRuntime PLUGIN
   RenderScriptScriptGroup.cpp
 
   DEPENDS
-  ${tablegen_deps}
+  intrinsics_gen
 
   LINK_LIBS
     lldbBreakpoint

From 0dfa4a83fad43f95c90dd67cb23f63baaa907bd1 Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Fri, 17 Jul 2020 16:47:41 -0700
Subject: [PATCH 688/771] Revert "[PGO][PGSO] Add profile guided size
 optimization to loop vectorization legality."

This reverts commit 30c382a7c6607a7d898730f8d288768110cdf1d2.

See https://crbug.com/1106813.
---
 .../Vectorize/LoopVectorizationLegality.h     |  9 +--
 .../Vectorize/LoopVectorizationLegality.cpp   |  7 +-
 .../Transforms/Vectorize/LoopVectorize.cpp    | 33 ++++------
 llvm/test/Transforms/LoopVectorize/optsize.ll | 65 +------------------
 4 files changed, 15 insertions(+), 99 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
index 7235aa5861120..c6c3450f77608 100644
--- a/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
+++ b/llvm/include/llvm/Transforms/Vectorize/LoopVectorizationLegality.h
@@ -202,10 +202,9 @@ class LoopVectorizationLegality {
       Function *F, std::function<const LoopAccessInfo &(Loop &)> *GetLAA,
       LoopInfo *LI, OptimizationRemarkEmitter *ORE,
       LoopVectorizationRequirements *R, LoopVectorizeHints *H, DemandedBits *DB,
-      AssumptionCache *AC, BlockFrequencyInfo *BFI, ProfileSummaryInfo *PSI)
+      AssumptionCache *AC)
       : TheLoop(L), LI(LI), PSE(PSE), TTI(TTI), TLI(TLI), DT(DT),
-        GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC),
-        BFI(BFI), PSI(PSI) {}
+        GetLAA(GetLAA), ORE(ORE), Requirements(R), Hints(H), DB(DB), AC(AC) {}
 
   /// ReductionList contains the reduction descriptors for all
   /// of the reductions that were found in the loop.
@@ -479,10 +478,6 @@ class LoopVectorizationLegality {
   /// Assume instructions in predicated blocks must be dropped if the CFG gets
   /// flattened.
   SmallPtrSet<Instruction *, 8> ConditionalAssumes;
-
-  /// BFI and PSI are used to check for profile guided size optimizations.
-  BlockFrequencyInfo *BFI;
-  ProfileSummaryInfo *PSI;
 };
 
 } // namespace llvm
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 120b544808bed..23613775d896d 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -20,7 +20,6 @@
 #include "llvm/Analysis/VectorUtils.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/PatternMatch.h"
-#include "llvm/Transforms/Utils/SizeOpts.h"
 #include "llvm/Transforms/Vectorize/LoopVectorize.h"
 
 using namespace llvm;
@@ -413,11 +412,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) {
   const ValueToValueMap &Strides =
       getSymbolicStrides() ? *getSymbolicStrides() : ValueToValueMap();
 
-  Function *F = TheLoop->getHeader()->getParent();
-  bool OptForSize = F->hasOptSize() ||
-                    llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
-                                                PGSOQueryType::IRPass);
-  bool CanAddPredicate = !OptForSize;
+  bool CanAddPredicate = !TheLoop->getHeader()->getParent()->hasOptSize();
   int Stride = getPtrStride(PSE, Ptr, TheLoop, Strides, CanAddPredicate, false);
   if (Stride == 1 || Stride == -1)
     return Stride;
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index e9e0ccb8b2ee7..7f13a689cedbd 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -395,13 +395,11 @@ class InnerLoopVectorizer {
                       const TargetTransformInfo *TTI, AssumptionCache *AC,
                       OptimizationRemarkEmitter *ORE, unsigned VecWidth,
                       unsigned UnrollFactor, LoopVectorizationLegality *LVL,
-                      LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
-                      ProfileSummaryInfo *PSI)
+                      LoopVectorizationCostModel *CM)
       : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI),
         AC(AC), ORE(ORE), VF(VecWidth), UF(UnrollFactor),
         Builder(PSE.getSE()->getContext()),
-        VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM),
-        BFI(BFI), PSI(PSI) {}
+        VectorLoopValueMap(UnrollFactor, VecWidth), Legal(LVL), Cost(CM) {}
   virtual ~InnerLoopVectorizer() = default;
 
   /// Create a new empty loop. Unlink the old loop and connect the new one.
@@ -781,10 +779,6 @@ class InnerLoopVectorizer {
   // Vector of original scalar PHIs whose corresponding widened PHIs need to be
   // fixed up at the end of vector code generation.
   SmallVector<PHINode *, 8> OrigPHIsToFix;
-
-  /// BFI and PSI are used to check for profile guided size optimizations.
-  BlockFrequencyInfo *BFI;
-  ProfileSummaryInfo *PSI;
 };
 
 class InnerLoopUnroller : public InnerLoopVectorizer {
@@ -795,10 +789,9 @@ class InnerLoopUnroller : public InnerLoopVectorizer {
                     const TargetTransformInfo *TTI, AssumptionCache *AC,
                     OptimizationRemarkEmitter *ORE, unsigned UnrollFactor,
                     LoopVectorizationLegality *LVL,
-                    LoopVectorizationCostModel *CM, BlockFrequencyInfo *BFI,
-                    ProfileSummaryInfo *PSI)
+                    LoopVectorizationCostModel *CM)
       : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, AC, ORE, 1,
-                            UnrollFactor, LVL, CM, BFI, PSI) {}
+                            UnrollFactor, LVL, CM) {}
 
 private:
   Value *getBroadcastInstrs(Value *V) override;
@@ -2761,9 +2754,7 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) {
     if (C->isZero())
       return;
 
-  assert(!(SCEVCheckBlock->getParent()->hasOptSize() ||
-           llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
-                                       PGSOQueryType::IRPass)) &&
+  assert(!SCEVCheckBlock->getParent()->hasOptSize() &&
          "Cannot SCEV check stride or overflow when optimizing for size");
 
   SCEVCheckBlock->setName("vector.scevcheck");
@@ -2809,9 +2800,7 @@ void InnerLoopVectorizer::emitMemRuntimeChecks(Loop *L, BasicBlock *Bypass) {
   assert(MemRuntimeCheck && "no RT checks generated although RtPtrChecking "
                             "claimed checks are required");
 
-  if (MemCheckBlock->getParent()->hasOptSize() ||
-      llvm::shouldOptimizeForSize(L->getHeader(), PSI, BFI,
-                                  PGSOQueryType::IRPass)) {
+  if (MemCheckBlock->getParent()->hasOptSize()) {
     assert(Cost->Hints->getForce() == LoopVectorizeHints::FK_Enabled &&
            "Cannot emit memory checks when optimizing for size, unless forced "
            "to vectorize.");
@@ -7740,7 +7729,7 @@ static bool processLoopInVPlanNativePath(
   LVP.setBestPlan(VF.Width, 1);
 
   InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, 1, LVL,
-                         &CM, BFI, PSI);
+                         &CM);
   LLVM_DEBUG(dbgs() << "Vectorizing outer loop in \""
                     << L->getHeader()->getParent()->getName() << "\"\n");
   LVP.executePlan(LB, DT);
@@ -7804,7 +7793,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   // Check if it is legal to vectorize the loop.
   LoopVectorizationRequirements Requirements(*ORE);
   LoopVectorizationLegality LVL(L, PSE, DT, TTI, TLI, AA, F, GetLAA, LI, ORE,
-                                &Requirements, &Hints, DB, AC, BFI, PSI);
+                                &Requirements, &Hints, DB, AC);
   if (!LVL.canVectorize(EnableVPlanNativePath)) {
     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
     Hints.emitRemarkWithHints();
@@ -8004,8 +7993,8 @@ bool LoopVectorizePass::processLoop(Loop *L) {
     assert(IC > 1 && "interleave count should not be 1 or 0");
     // If we decided that it is not legal to vectorize the loop, then
     // interleave it.
-    InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL, &CM,
-                               BFI, PSI);
+    InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, AC, ORE, IC, &LVL,
+                               &CM);
     LVP.executePlan(Unroller, DT);
 
     ORE->emit([&]() {
@@ -8017,7 +8006,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
   } else {
     // If we decided that it is *legal* to vectorize the loop, then do it.
     InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, AC, ORE, VF.Width, IC,
-                           &LVL, &CM, BFI, PSI);
+                           &LVL, &CM);
     LVP.executePlan(LB, DT);
     ++LoopsVectorized;
 
diff --git a/llvm/test/Transforms/LoopVectorize/optsize.ll b/llvm/test/Transforms/LoopVectorize/optsize.ll
index b4233e6751cbc..0e88f362746fb 100644
--- a/llvm/test/Transforms/LoopVectorize/optsize.ll
+++ b/llvm/test/Transforms/LoopVectorize/optsize.ll
@@ -121,38 +121,6 @@ for.body29:
   br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
 }
 
-define void @pr43371_pgso() !prof !14 {
-;
-; CHECK-LABEL: @pr43371_pgso
-; CHECK-NOT:   vector.scevcheck
-;
-; We do not want to generate SCEV predicates when optimising for size, because
-; that will lead to extra code generation such as the SCEV overflow runtime
-; checks. Not generating SCEV predicates can still result in vectorisation as
-; the non-consecutive loads/stores can be scalarized:
-;
-; CHECK: vector.body:
-; CHECK: store i16 0, i16* %{{.*}}, align 1
-; CHECK: store i16 0, i16* %{{.*}}, align 1
-; CHECK: br i1 {{.*}}, label %vector.body
-;
-entry:
-  br label %for.body29
-
-for.cond.cleanup28:
-  unreachable
-
-for.body29:
-  %i24.0170 = phi i16 [ 0, %entry], [ %inc37, %for.body29]
-  %add33 = add i16 undef, %i24.0170
-  %idxprom34 = zext i16 %add33 to i32
-  %arrayidx35 = getelementptr [2592 x i16], [2592 x i16] * @cm_array, i32 0, i32 %idxprom34
-  store i16 0, i16 * %arrayidx35, align 1
-  %inc37 = add i16 %i24.0170, 1
-  %cmp26 = icmp ult i16 %inc37, 756
-  br i1 %cmp26, label %for.body29, label %for.cond.cleanup28
-}
-
 ; PR45526: don't vectorize with fold-tail if first-order-recurrence is live-out.
 ;
 define i32 @pr45526() optsize {
@@ -186,37 +154,6 @@ exit:
   ret i32 %for
 }
 
-define i32 @pr45526_pgso() !prof !14 {
-;
-; CHECK-LABEL: @pr45526_pgso
-; CHECK-NEXT: entry:
-; CHECK-NEXT:   br label %loop
-; CHECK-EMPTY:
-; CHECK-NEXT: loop:
-; CHECK-NEXT:   %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
-; CHECK-NEXT:   %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
-; CHECK-NEXT:   %pivPlus1 = add nuw nsw i32 %piv, 1
-; CHECK-NEXT:   %cond = icmp ult i32 %piv, 510
-; CHECK-NEXT:   br i1 %cond, label %loop, label %exit
-; CHECK-EMPTY:
-; CHECK-NEXT: exit:
-; CHECK-NEXT:   %for.lcssa = phi i32 [ %for, %loop ]
-; CHECK-NEXT:   ret i32 %for.lcssa
-;
-entry:
-  br label %loop
-
-loop:
-  %piv = phi i32 [ 0, %entry ], [ %pivPlus1, %loop ]
-  %for = phi i32 [ 5, %entry ], [ %pivPlus1, %loop ]
-  %pivPlus1 = add nuw nsw i32 %piv, 1
-  %cond = icmp ult i32 %piv, 510
-  br i1 %cond, label %loop, label %exit
-
-exit:
-  ret i32 %for
-}
-
 ; PR46228: Vectorize w/o versioning for unit stride under optsize and enabled
 ; vectorization.
 
@@ -253,7 +190,7 @@ define void @stride1(i16* noalias %B, i32 %BStride) optsize {
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
 ; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1026
-; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !21
+; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !19
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:

From 8e2b4e50f27094b71840bdfc5b8ed7dbd4e85c2c Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Fri, 17 Jul 2020 16:36:59 -0700
Subject: [PATCH 689/771] [flang] Support <name>=<integer> syntax in compiler
 directives

Accept name=value as part of a !DIR$ compiler directive.  These
are currently ignored in semantics, but we should recognize
more directive forms to facilitate testing.  In due course,
these placeholding directive parsers will be replaced.

Reviewed By: sscalpone

Differential Revision: https://reviews.llvm.org/D84077
---
 flang/include/flang/Parser/dump-parse-tree.h | 1 +
 flang/include/flang/Parser/parse-tree.h      | 6 +++++-
 flang/lib/Parser/Fortran-parsers.cpp         | 4 +++-
 flang/lib/Parser/unparse.cpp                 | 8 +++++++-
 4 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/flang/include/flang/Parser/dump-parse-tree.h b/flang/include/flang/Parser/dump-parse-tree.h
index 36e593eb3b781..02da3f53b44e0 100644
--- a/flang/include/flang/Parser/dump-parse-tree.h
+++ b/flang/include/flang/Parser/dump-parse-tree.h
@@ -222,6 +222,7 @@ class ParseTreeDumper {
   NODE(CommonStmt, Block)
   NODE(parser, CompilerDirective)
   NODE(CompilerDirective, IgnoreTKR)
+  NODE(CompilerDirective, NameValue)
   NODE(parser, ComplexLiteralConstant)
   NODE(parser, ComplexPart)
   NODE(parser, ComponentArraySpec)
diff --git a/flang/include/flang/Parser/parse-tree.h b/flang/include/flang/Parser/parse-tree.h
index d9ecebfc3fdda..4b34d2cd674cc 100644
--- a/flang/include/flang/Parser/parse-tree.h
+++ b/flang/include/flang/Parser/parse-tree.h
@@ -3211,8 +3211,12 @@ struct CompilerDirective {
     TUPLE_CLASS_BOILERPLATE(IgnoreTKR);
     std::tuple<std::list<const char *>, Name> t;
   };
+  struct NameValue {
+    TUPLE_CLASS_BOILERPLATE(NameValue);
+    std::tuple<Name, std::optional<std::uint64_t>> t;
+  };
   CharBlock source;
-  std::variant<std::list<IgnoreTKR>, std::list<Name>> u;
+  std::variant<std::list<IgnoreTKR>, std::list<NameValue>> u;
 };
 
 // Legacy extensions
diff --git a/flang/lib/Parser/Fortran-parsers.cpp b/flang/lib/Parser/Fortran-parsers.cpp
index 3192781d4bcc9..f46186323ada1 100644
--- a/flang/lib/Parser/Fortran-parsers.cpp
+++ b/flang/lib/Parser/Fortran-parsers.cpp
@@ -1173,7 +1173,9 @@ constexpr auto ignore_tkr{
                              defaulted(parenthesized(some("tkr"_ch))), name))};
 TYPE_PARSER(
     beginDirective >> sourced(construct<CompilerDirective>(ignore_tkr) ||
-                          construct<CompilerDirective>("DIR$" >> many(name))) /
+                          construct<CompilerDirective>("DIR$" >>
+                              many(construct<CompilerDirective::NameValue>(
+                                  name, maybe("=" >> digitString64))))) /
         endDirective)
 
 TYPE_PARSER(extension<LanguageFeature::CrayPointer>(construct<BasedPointerStmt>(
diff --git a/flang/lib/Parser/unparse.cpp b/flang/lib/Parser/unparse.cpp
index 99792cbf706fa..3b95636fc3e59 100644
--- a/flang/lib/Parser/unparse.cpp
+++ b/flang/lib/Parser/unparse.cpp
@@ -1761,7 +1761,9 @@ class UnparseVisitor {
               Word("!DIR$ IGNORE_TKR"); // emitted even if tkr list is empty
               Walk(" ", tkr, ", ");
             },
-            [&](const std::list<Name> &names) { Walk("!DIR$ ", names, " "); },
+            [&](const std::list<CompilerDirective::NameValue> &names) {
+              Walk("!DIR$ ", names, " ");
+            },
         },
         x.u);
     Put('\n');
@@ -1777,6 +1779,10 @@ class UnparseVisitor {
     }
     Walk(std::get<Name>(x.t));
   }
+  void Unparse(const CompilerDirective::NameValue &x) {
+    Walk(std::get<Name>(x.t));
+    Walk("=", std::get<std::optional<std::uint64_t>>(x.t));
+  }
 
   // OpenACC Directives & Clauses
   void Unparse(const AccAtomicCapture &x) {

From 8305a92a4752aff8118b501407cfd158c653e67c Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Fri, 17 Jul 2020 16:40:49 -0700
Subject: [PATCH 690/771] [flang] Treat tabs like spaces in formatted input.

Reviewed By: sscalpone

Differential Revision: https://reviews.llvm.org/D84078
---
 flang/runtime/edit-input.cpp |  9 +++++----
 flang/runtime/io-stmt.cpp    | 10 ++++++----
 2 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/flang/runtime/edit-input.cpp b/flang/runtime/edit-input.cpp
index 27e8122d9ae6b..dd708c7bfb913 100644
--- a/flang/runtime/edit-input.cpp
+++ b/flang/runtime/edit-input.cpp
@@ -34,7 +34,7 @@ static bool EditBOZInput(IoStatementState &io, const DataEdit &edit, void *n,
   common::UnsignedInt128 value{0};
   for (; next; next = io.NextInField(remaining)) {
     char32_t ch{*next};
-    if (ch == ' ') {
+    if (ch == ' ' || ch == '\t') {
       continue;
     }
     int digit{0};
@@ -101,7 +101,7 @@ bool EditIntegerInput(
   common::UnsignedInt128 value;
   for (; next; next = io.NextInField(remaining)) {
     char32_t ch{*next};
-    if (ch == ' ') {
+    if (ch == ' ' || ch == '\t') {
       if (edit.modes.editingFlags & blankZero) {
         ch = '0'; // BZ mode - treat blank as if it were zero
       } else {
@@ -170,7 +170,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
   } else if (*next == decimal || (*next >= '0' && *next <= '9')) {
     for (; next; next = io.NextInField(remaining)) {
       char32_t ch{*next};
-      if (ch == ' ') {
+      if (ch == ' ' || ch == '\t') {
         if (edit.modes.editingFlags & blankZero) {
           ch = '0'; // BZ mode - treat blank as if it were zero
         } else {
@@ -229,7 +229,7 @@ static int ScanRealInput(char *buffer, int bufferSize, IoStatementState &io,
     return 0;
   }
   if (remaining) {
-    while (next && *next == ' ') {
+    while (next && (*next == ' ' || *next == '\t')) {
       next = io.NextInField(remaining);
     }
     if (next) {
@@ -386,6 +386,7 @@ static bool EditListDirectedDefaultCharacterInput(
        next = io.NextInField(remaining)) {
     switch (*next) {
     case ' ':
+    case '\t':
     case ',':
     case ';':
     case '/':
diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp
index 70fb3f9350bc8..8efda2d09a773 100644
--- a/flang/runtime/io-stmt.cpp
+++ b/flang/runtime/io-stmt.cpp
@@ -353,7 +353,7 @@ std::optional<char32_t> IoStatementState::SkipSpaces(
     std::optional<int> &remaining) {
   while (!remaining || *remaining > 0) {
     if (auto ch{GetCurrentChar()}) {
-      if (*ch != ' ') {
+      if (*ch != ' ' && *ch != '\t') {
         return ch;
       }
       HandleRelativePosition(1);
@@ -373,6 +373,7 @@ std::optional<char32_t> IoStatementState::NextInField(
     if (auto next{GetCurrentChar()}) {
       switch (*next) {
       case ' ':
+      case '\t':
       case ',':
       case ';':
       case '/':
@@ -415,7 +416,7 @@ std::optional<char32_t> IoStatementState::NextInField(
 
 std::optional<char32_t> IoStatementState::GetNextNonBlank() {
   auto ch{GetCurrentChar()};
-  while (ch.value_or(' ') == ' ') {
+  while (!ch || *ch == ' ' || *ch == '\t') {
     if (ch) {
       HandleRelativePosition(1);
     } else if (!AdvanceRecord()) {
@@ -485,7 +486,8 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
     if (!imaginaryPart_) {
       edit.repeat = std::min<int>(remaining_, maxRepeat);
       auto ch{io.GetNextNonBlank()};
-      if (!ch || *ch == ' ' || *ch == comma) { // "r*" repeated null
+      if (!ch || *ch == ' ' || *ch == '\t' || *ch == comma) {
+        // "r*" repeated null
         edit.descriptor = DataEdit::ListDirectedNullValue;
       }
     }
@@ -554,7 +556,7 @@ ListDirectedStatementState<Direction::Input>::GetNextDataEdit(
         edit.descriptor = DataEdit::ListDirectedNullValue;
         return edit;
       }
-      if (!ch || *ch == ' ' || *ch == comma) { // "r*" null
+      if (!ch || *ch == ' ' || *ch == '\t' || *ch == comma) { // "r*" null
         edit.descriptor = DataEdit::ListDirectedNullValue;
       }
       edit.repeat = std::min<int>(r, maxRepeat);

From b0a971d25cdc9fdb7ca1a21db1d0fd409f58f85a Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Fri, 17 Jul 2020 17:02:47 -0700
Subject: [PATCH 691/771] [flang] Prevent bogus runtime I/O error message

The runtime was requiring that STATUS='OLD' be explicitly specified
on an OPEN statement for a connected unit.  There error should issue
only if a STATUS= other than 'OLD' is specified; an OPEN with no
STATUS= specifier is okay.

Reviewed By: sscalpone

Differential Revision: https://reviews.llvm.org/D84079
---
 flang/runtime/io-stmt.cpp | 9 +++++----
 flang/runtime/io-stmt.h   | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp
index 8efda2d09a773..9e89e0c28816d 100644
--- a/flang/runtime/io-stmt.cpp
+++ b/flang/runtime/io-stmt.cpp
@@ -162,11 +162,12 @@ void OpenStatementState::set_path(
 }
 
 int OpenStatementState::EndIoStatement() {
-  if (wasExtant_ && status_ != OpenStatus::Old) {
-    SignalError("OPEN statement for connected unit must have STATUS='OLD'");
+  if (wasExtant_ && status_ && *status_ != OpenStatus::Old) {
+    SignalError("OPEN statement for connected unit may not have STATUS= other "
+                "than 'OLD'");
   }
-  unit().OpenUnit(
-      status_, action_, position_, std::move(path_), pathLength_, *this);
+  unit().OpenUnit(status_.value_or(OpenStatus::Unknown), action_, position_,
+      std::move(path_), pathLength_, *this);
   return ExternalIoStatementBase::EndIoStatement();
 }
 
diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h
index 6f5ca2c481128..da58769ef1143 100644
--- a/flang/runtime/io-stmt.h
+++ b/flang/runtime/io-stmt.h
@@ -302,7 +302,7 @@ class OpenStatementState : public ExternalIoStatementBase {
 
 private:
   bool wasExtant_;
-  OpenStatus status_{OpenStatus::Unknown};
+  std::optional<OpenStatus> status_;
   Position position_{Position::AsIs};
   std::optional<Action> action_;
   OwningPtr<char> path_;

From 1bbed69059d5ac35c0775e8ed598a34628d8ba6d Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Fri, 17 Jul 2020 17:29:20 -0700
Subject: [PATCH 692/771] [sanitizer] Another attempt to fix protoent test

Now we are going to pick name and index based on output of getprotoent_r.
---
 .../sanitizer_common/TestCases/Linux/protoent.cpp   | 13 +++++++++++--
 1 file changed, 11 insertions(+), 2 deletions(-)

diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp
index a10fd114022c9..003790067d1b1 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/protoent.cpp
@@ -7,6 +7,10 @@
 #include <errno.h>
 #include <netdb.h>
 #include <stdio.h>
+#include <string>
+
+std::string any_name;
+int total_count;
 
 void print_protoent(protoent *curr_entry) {
   fprintf(stderr, "%s (%d)\n", curr_entry->p_name, curr_entry->p_proto);
@@ -23,6 +27,8 @@ void print_all_protoent() {
   protoent *curr_entry;
 
   while (getprotoent_r(&entry, buf, sizeof(buf), &curr_entry) != ENOENT && curr_entry) {
+    ++total_count;
+    any_name = curr_entry->p_name;
     print_protoent(curr_entry);
   }
 }
@@ -51,10 +57,13 @@ int main() {
   fprintf(stderr, "All protoent\n");
   print_all_protoent();
 
+  if (!total_count)
+    return 0;
+
   fprintf(stderr, "Protoent by name\n");
-  print_protoent_by_name("ipv6");
+  print_protoent_by_name(any_name.c_str());
 
   fprintf(stderr, "Protoent by num\n");
-  print_protoent_by_num(17);
+  print_protoent_by_num(total_count / 2);
   return 0;
 }

From 31eb83496fb4e41e322e19f162aeae885ed91301 Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Thu, 16 Jul 2020 20:36:46 -0700
Subject: [PATCH 693/771] [llvm][NFC] Add missing 'override's in unittests/

---
 llvm/unittests/ADT/TwineTest.cpp                       |  2 +-
 .../DebugInfo/CodeView/RandomAccessVisitorTest.cpp     |  4 ++--
 llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp  | 10 +++++-----
 llvm/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp  |  2 +-
 llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp    |  2 +-
 .../Orc/LegacyCompileOnDemandLayerTest.cpp             |  2 +-
 .../Orc/RTDyldObjectLinkingLayerTest.cpp               |  4 ++--
 llvm/unittests/IR/LegacyPassManagerTest.cpp            |  4 ++--
 llvm/unittests/IR/ModuleTest.cpp                       |  2 +-
 llvm/unittests/ProfileData/InstrProfTest.cpp           |  6 +++---
 llvm/unittests/Support/CrashRecoveryTest.cpp           |  2 +-
 llvm/unittests/Support/ELFAttributeParserTest.cpp      |  2 +-
 llvm/unittests/Support/FileCheckTest.cpp               |  2 +-
 llvm/unittests/Transforms/Utils/LocalTest.cpp          |  2 +-
 llvm/unittests/tools/llvm-cfi-verify/FileAnalysis.cpp  |  2 +-
 llvm/unittests/tools/llvm-cfi-verify/GraphBuilder.cpp  |  2 +-
 .../tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp   |  2 +-
 17 files changed, 26 insertions(+), 26 deletions(-)

diff --git a/llvm/unittests/ADT/TwineTest.cpp b/llvm/unittests/ADT/TwineTest.cpp
index a717036c13645..52cec68210b7d 100644
--- a/llvm/unittests/ADT/TwineTest.cpp
+++ b/llvm/unittests/ADT/TwineTest.cpp
@@ -105,7 +105,7 @@ TEST(TwineTest, LazyEvaluation) {
     explicit formatter(int &Count) : FormatAdapter(0), Count(Count) {}
     int &Count;
 
-    void format(raw_ostream &OS, StringRef Style) { ++Count; }
+    void format(raw_ostream &OS, StringRef Style) override { ++Count; }
   };
 
   int Count = 0;
diff --git a/llvm/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp b/llvm/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp
index 2e95465fb5ac7..cbf8d47c6471b 100644
--- a/llvm/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp
+++ b/llvm/unittests/DebugInfo/CodeView/RandomAccessVisitorTest.cpp
@@ -64,11 +64,11 @@ namespace {
 
 class MockCallbacks : public TypeVisitorCallbacks {
 public:
-  virtual Error visitTypeBegin(CVType &CVR, TypeIndex Index) {
+  Error visitTypeBegin(CVType &CVR, TypeIndex Index) override {
     Indices.push_back(Index);
     return Error::success();
   }
-  virtual Error visitKnownRecord(CVType &CVR, ArrayRecord &AR) {
+  Error visitKnownRecord(CVType &CVR, ArrayRecord &AR) override {
     VisitedRecords.push_back(AR);
     RawRecords.push_back(CVR);
     return Error::success();
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
index f4ea34d02094a..26fb8e824923f 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugLineTest.cpp
@@ -126,7 +126,7 @@ struct DebugLineBasicFixture : public Test, public CommonFixture {};
 struct DebugLineParameterisedFixture
     : public TestWithParam<std::pair<uint16_t, DwarfFormat>>,
       public CommonFixture {
-  void SetUp() { std::tie(Version, Format) = GetParam(); }
+  void SetUp() override { std::tie(Version, Format) = GetParam(); }
 
   uint16_t Version;
   DwarfFormat Format;
@@ -328,7 +328,7 @@ TEST_F(DebugLineBasicFixture, ErrorForReservedLength) {
 
 struct DebugLineUnsupportedVersionFixture : public TestWithParam<uint16_t>,
                                             public CommonFixture {
-  void SetUp() { Version = GetParam(); }
+  void SetUp() override { Version = GetParam(); }
 
   uint16_t Version;
 };
@@ -1384,7 +1384,7 @@ struct TruncatedPrologueFixture
     : public TestWithParam<
           std::tuple<uint64_t, uint64_t, uint16_t, DwarfFormat, StringRef>>,
       public CommonFixture {
-  void SetUp() {
+  void SetUp() override {
     std::tie(Length, ExpectedOffset, Version, Format, ExpectedErr) = GetParam();
   }
 
@@ -1554,7 +1554,7 @@ struct TruncatedStandardOpcodeFixture
     : public TestWithParam<
           std::tuple<uint64_t, uint8_t, ValueAndLengths, StringRef, StringRef>>,
       public TruncatedOpcodeFixtureBase {
-  void SetUp() {
+  void SetUp() override {
     std::tie(BodyLength, Opcode, Operands, ExpectedOutput, ExpectedErr) =
         GetParam();
   }
@@ -1564,7 +1564,7 @@ struct TruncatedExtendedOpcodeFixture
     : public TestWithParam<std::tuple<uint64_t, uint64_t, uint8_t,
                                       ValueAndLengths, StringRef, StringRef>>,
       public TruncatedOpcodeFixtureBase {
-  void SetUp() {
+  void SetUp() override {
     std::tie(BodyLength, OpcodeLength, Opcode, Operands, ExpectedOutput,
              ExpectedErr) = GetParam();
   }
diff --git a/llvm/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp
index 4df77baf9471c..0a0b8084f65f0 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFFormValueTest.cpp
@@ -125,7 +125,7 @@ using ParamType = std::tuple<Form, uint16_t, uint8_t, DwarfFormat,
                              ArrayRef<uint8_t>, uint64_t, bool>;
 
 struct FormSkipValueFixtureBase : public testing::TestWithParam<ParamType> {
-  void SetUp() {
+  void SetUp() override {
     std::tie(Fm, Version, AddrSize, Dwarf, InitialData, ExpectedSkipped,
              ExpectedResult) = GetParam();
   }
diff --git a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp
index b4e8a8302d3bd..6f6e1d43af93d 100644
--- a/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/CoreAPIsTest.cpp
@@ -1044,7 +1044,7 @@ TEST_F(CoreAPIsStandardTest, GeneratorTest) {
     TestGenerator(SymbolMap Symbols) : Symbols(std::move(Symbols)) {}
     Error tryToGenerate(LookupKind K, JITDylib &JD,
                         JITDylibLookupFlags JDLookupFlags,
-                        const SymbolLookupSet &Names) {
+                        const SymbolLookupSet &Names) override {
       SymbolMap NewDefs;
 
       for (const auto &KV : Names) {
diff --git a/llvm/unittests/ExecutionEngine/Orc/LegacyCompileOnDemandLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/LegacyCompileOnDemandLayerTest.cpp
index 59cd11c5e5a68..a13d8bdeeeb38 100644
--- a/llvm/unittests/ExecutionEngine/Orc/LegacyCompileOnDemandLayerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/LegacyCompileOnDemandLayerTest.cpp
@@ -17,7 +17,7 @@ namespace {
 
 class DummyTrampolinePool : public orc::TrampolinePool {
 public:
-  Expected<JITTargetAddress> getTrampoline() {
+  Expected<JITTargetAddress> getTrampoline() override {
     llvm_unreachable("Unimplemented");
   }
 };
diff --git a/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp b/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp
index 4a192c1f28ac3..c9c15aa3dffd7 100644
--- a/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp
+++ b/llvm/unittests/ExecutionEngine/Orc/RTDyldObjectLinkingLayerTest.cpp
@@ -117,7 +117,7 @@ TEST(RTDyldObjectLinkingLayerTest, TestOverrideObjectFlags) {
   public:
     FunkySimpleCompiler(TargetMachine &TM) : SimpleCompiler(TM) {}
 
-    Expected<CompileResult> operator()(Module &M) {
+    Expected<CompileResult> operator()(Module &M) override {
       auto *Foo = M.getFunction("foo");
       assert(Foo && "Expected function Foo not found");
       Foo->setVisibility(GlobalValue::HiddenVisibility);
@@ -187,7 +187,7 @@ TEST(RTDyldObjectLinkingLayerTest, TestAutoClaimResponsibilityForSymbols) {
   public:
     FunkySimpleCompiler(TargetMachine &TM) : SimpleCompiler(TM) {}
 
-    Expected<CompileResult> operator()(Module &M) {
+    Expected<CompileResult> operator()(Module &M) override {
       Function *BarImpl = Function::Create(
           FunctionType::get(Type::getVoidTy(M.getContext()), {}, false),
           GlobalValue::ExternalLinkage, "bar", &M);
diff --git a/llvm/unittests/IR/LegacyPassManagerTest.cpp b/llvm/unittests/IR/LegacyPassManagerTest.cpp
index 8dda94b1b0326..72ac4be229974 100644
--- a/llvm/unittests/IR/LegacyPassManagerTest.cpp
+++ b/llvm/unittests/IR/LegacyPassManagerTest.cpp
@@ -355,12 +355,12 @@ namespace llvm {
     struct CustomOptPassGate : public OptPassGate {
       bool Skip;
       CustomOptPassGate(bool Skip) : Skip(Skip) { }
-      bool shouldRunPass(const Pass *P, StringRef IRDescription) {
+      bool shouldRunPass(const Pass *P, StringRef IRDescription) override {
         if (P->getPassKind() == PT_Module)
           return !Skip;
         return OptPassGate::shouldRunPass(P, IRDescription);
       }
-      bool isEnabled() const { return true; }
+      bool isEnabled() const override { return true; }
     };
 
     // Optional module pass.
diff --git a/llvm/unittests/IR/ModuleTest.cpp b/llvm/unittests/IR/ModuleTest.cpp
index 9fb9b8b057a5e..4e2e394a92504 100644
--- a/llvm/unittests/IR/ModuleTest.cpp
+++ b/llvm/unittests/IR/ModuleTest.cpp
@@ -56,7 +56,7 @@ TEST(ModuleTest, randomNumberGenerator) {
   static char ID;
   struct DummyPass : ModulePass {
     DummyPass() : ModulePass(ID) {}
-    bool runOnModule(Module &) { return true; }
+    bool runOnModule(Module &) override { return true; }
   } DP;
 
   Module M("R", Context);
diff --git a/llvm/unittests/ProfileData/InstrProfTest.cpp b/llvm/unittests/ProfileData/InstrProfTest.cpp
index 3e862aafcf05c..d94906991b0c6 100644
--- a/llvm/unittests/ProfileData/InstrProfTest.cpp
+++ b/llvm/unittests/ProfileData/InstrProfTest.cpp
@@ -39,7 +39,7 @@ struct InstrProfTest : ::testing::Test {
   InstrProfWriter Writer;
   std::unique_ptr<IndexedInstrProfReader> Reader;
 
-  void SetUp() { Writer.setOutputSparse(false); }
+  void SetUp() override { Writer.setOutputSparse(false); }
 
   void readProfile(std::unique_ptr<MemoryBuffer> Profile,
                    std::unique_ptr<MemoryBuffer> Remapping = nullptr) {
@@ -51,12 +51,12 @@ struct InstrProfTest : ::testing::Test {
 };
 
 struct SparseInstrProfTest : public InstrProfTest {
-  void SetUp() { Writer.setOutputSparse(true); }
+  void SetUp() override { Writer.setOutputSparse(true); }
 };
 
 struct MaybeSparseInstrProfTest : public InstrProfTest,
                                   public ::testing::WithParamInterface<bool> {
-  void SetUp() { Writer.setOutputSparse(GetParam()); }
+  void SetUp() override { Writer.setOutputSparse(GetParam()); }
 };
 
 TEST_P(MaybeSparseInstrProfTest, write_and_read_empty_profile) {
diff --git a/llvm/unittests/Support/CrashRecoveryTest.cpp b/llvm/unittests/Support/CrashRecoveryTest.cpp
index 798ed20145d87..6a62c7cab9d83 100644
--- a/llvm/unittests/Support/CrashRecoveryTest.cpp
+++ b/llvm/unittests/Support/CrashRecoveryTest.cpp
@@ -39,7 +39,7 @@ TEST(CrashRecoveryTest, Basic) {
 struct IncrementGlobalCleanup : CrashRecoveryContextCleanup {
   IncrementGlobalCleanup(CrashRecoveryContext *CRC)
       : CrashRecoveryContextCleanup(CRC) {}
-  virtual void recoverResources() { ++GlobalInt; }
+  void recoverResources() override { ++GlobalInt; }
 };
 
 static void noop() {}
diff --git a/llvm/unittests/Support/ELFAttributeParserTest.cpp b/llvm/unittests/Support/ELFAttributeParserTest.cpp
index 8234d4ee176f2..5eaed39a71057 100644
--- a/llvm/unittests/Support/ELFAttributeParserTest.cpp
+++ b/llvm/unittests/Support/ELFAttributeParserTest.cpp
@@ -17,7 +17,7 @@ static const TagNameMap emptyTagNameMap;
 
 // This class is used to test the common part of the ELF attribute section.
 class AttributeHeaderParser : public ELFAttributeParser {
-  Error handler(uint64_t tag, bool &handled) {
+  Error handler(uint64_t tag, bool &handled) override {
     // Treat all attributes as handled.
     handled = true;
     return Error::success();
diff --git a/llvm/unittests/Support/FileCheckTest.cpp b/llvm/unittests/Support/FileCheckTest.cpp
index 92975dcd76b74..8cf823425fc37 100644
--- a/llvm/unittests/Support/FileCheckTest.cpp
+++ b/llvm/unittests/Support/FileCheckTest.cpp
@@ -104,7 +104,7 @@ struct ExpressionFormatParameterisedFixture
 
   SourceMgr SM;
 
-  void SetUp() {
+  void SetUp() override {
     ExpressionFormat::Kind Kind = GetParam();
     AllowHex = Kind == ExpressionFormat::Kind::HexLower ||
                Kind == ExpressionFormat::Kind::HexUpper;
diff --git a/llvm/unittests/Transforms/Utils/LocalTest.cpp b/llvm/unittests/Transforms/Utils/LocalTest.cpp
index 3862a418603b2..3dec930a27271 100644
--- a/llvm/unittests/Transforms/Utils/LocalTest.cpp
+++ b/llvm/unittests/Transforms/Utils/LocalTest.cpp
@@ -489,7 +489,7 @@ struct SalvageDebugInfoTest : ::testing::Test {
   std::unique_ptr<Module> M;
   Function *F = nullptr;
 
-  void SetUp() {
+  void SetUp() override {
     M = parseIR(C,
                 R"(
       define void @f() !dbg !8 {
diff --git a/llvm/unittests/tools/llvm-cfi-verify/FileAnalysis.cpp b/llvm/unittests/tools/llvm-cfi-verify/FileAnalysis.cpp
index ef53a70aeea05..6bb083b4d2bf7 100644
--- a/llvm/unittests/tools/llvm-cfi-verify/FileAnalysis.cpp
+++ b/llvm/unittests/tools/llvm-cfi-verify/FileAnalysis.cpp
@@ -65,7 +65,7 @@ class BasicFileAnalysisTest : public ::testing::Test {
   BasicFileAnalysisTest(StringRef Trip)
       : SuccessfullyInitialised(false), Analysis(Trip) {}
 protected:
-  virtual void SetUp() {
+  void SetUp() override {
     IgnoreDWARFFlag = true;
     SuccessfullyInitialised = true;
     if (auto Err = Analysis.initialiseDisassemblyMembers()) {
diff --git a/llvm/unittests/tools/llvm-cfi-verify/GraphBuilder.cpp b/llvm/unittests/tools/llvm-cfi-verify/GraphBuilder.cpp
index a57958d60e68b..954c113b915e4 100644
--- a/llvm/unittests/tools/llvm-cfi-verify/GraphBuilder.cpp
+++ b/llvm/unittests/tools/llvm-cfi-verify/GraphBuilder.cpp
@@ -124,7 +124,7 @@ class ELFx86TestFileAnalysis : public FileAnalysis {
 
 class BasicGraphBuilderTest : public ::testing::Test {
 protected:
-  virtual void SetUp() {
+  void SetUp() override {
     IgnoreDWARFFlag = true;
     SuccessfullyInitialised = true;
     if (auto Err = Analysis.initialiseDisassemblyMembers()) {
diff --git a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
index 298a54abffc88..dcc9617065b03 100644
--- a/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
+++ b/llvm/unittests/tools/llvm-exegesis/X86/SnippetRepetitorTest.cpp
@@ -29,7 +29,7 @@ using testing::UnorderedElementsAre;
 
 class X86SnippetRepetitorTest : public X86TestBase {
 protected:
-  void SetUp() {
+  void SetUp() override {
     TM = State.createTargetMachine();
     Context = std::make_unique<LLVMContext>();
     Mod = std::make_unique<Module>("X86SnippetRepetitorTest", *Context);

From 3ee7fe4cfda1169786f4d2595cb2d51e31bead08 Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Thu, 16 Jul 2020 20:38:41 -0700
Subject: [PATCH 694/771] [llvm][NFC] Add missing 'override's

---
 llvm/include/llvm/ExecutionEngine/JITSymbol.h             | 2 +-
 llvm/include/llvm/ExecutionEngine/Orc/Speculation.h       | 2 +-
 llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp | 4 ++--
 llvm/tools/llvm-jitlink/llvm-jitlink.cpp                  | 2 +-
 llvm/tools/llvm-pdbutil/FormatUtil.h                      | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/include/llvm/ExecutionEngine/JITSymbol.h b/llvm/include/llvm/ExecutionEngine/JITSymbol.h
index 6f0030a18f475..9bbdd21f77de1 100644
--- a/llvm/include/llvm/ExecutionEngine/JITSymbol.h
+++ b/llvm/include/llvm/ExecutionEngine/JITSymbol.h
@@ -429,7 +429,7 @@ class LegacyJITSymbolResolver : public JITSymbolResolver {
   virtual JITSymbol findSymbol(const std::string &Name) = 0;
 
 private:
-  virtual void anchor();
+  void anchor() override;
 };
 
 } // end namespace llvm
diff --git a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
index d8213d3b35e8f..a6537dd3093bd 100644
--- a/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
+++ b/llvm/include/llvm/ExecutionEngine/Orc/Speculation.h
@@ -185,7 +185,7 @@ class IRSpeculationLayer : public IRLayer {
       : IRLayer(ES, BaseLayer.getManglingOptions()), NextLayer(BaseLayer),
         S(Spec), Mangle(Mangle), QueryAnalysis(Interpreter) {}
 
-  void emit(MaterializationResponsibility R, ThreadSafeModule TSM);
+  void emit(MaterializationResponsibility R, ThreadSafeModule TSM) override;
 
 private:
   TargetAndLikelies
diff --git a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
index 21925726072e3..7888c2fcbdbd9 100644
--- a/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
+++ b/llvm/lib/ExecutionEngine/Orc/RTDyldObjectLinkingLayer.cpp
@@ -18,7 +18,7 @@ class JITDylibSearchOrderResolver : public JITSymbolResolver {
 public:
   JITDylibSearchOrderResolver(MaterializationResponsibility &MR) : MR(MR) {}
 
-  void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) {
+  void lookup(const LookupSet &Symbols, OnResolvedFunction OnResolved) override {
     auto &ES = MR.getTargetJITDylib().getExecutionSession();
     SymbolLookupSet InternedSymbols;
 
@@ -55,7 +55,7 @@ class JITDylibSearchOrderResolver : public JITSymbolResolver {
               RegisterDependencies);
   }
 
-  Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) {
+  Expected<LookupSet> getResponsibilitySet(const LookupSet &Symbols) override {
     LookupSet Result;
 
     for (auto &KV : MR.getSymbols()) {
diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp
index b44a56e0ac925..04132f0769662 100644
--- a/llvm/tools/llvm-jitlink/llvm-jitlink.cpp
+++ b/llvm/tools/llvm-jitlink/llvm-jitlink.cpp
@@ -435,7 +435,7 @@ Session::Session(Triple TT, Error &Err)
   public:
     JITLinkSessionPlugin(Session &S) : S(S) {}
     void modifyPassConfig(MaterializationResponsibility &MR, const Triple &TT,
-                          PassConfiguration &PassConfig) {
+                          PassConfiguration &PassConfig) override {
       S.modifyPassConfig(TT, PassConfig);
     }
 
diff --git a/llvm/tools/llvm-pdbutil/FormatUtil.h b/llvm/tools/llvm-pdbutil/FormatUtil.h
index 1a006844e011a..133a0eb40e121 100644
--- a/llvm/tools/llvm-pdbutil/FormatUtil.h
+++ b/llvm/tools/llvm-pdbutil/FormatUtil.h
@@ -123,7 +123,7 @@ struct EndianAdapter final
   explicit EndianAdapter(EndianType &&Item)
       : FormatAdapter<EndianType>(std::move(Item)) {}
 
-  void format(llvm::raw_ostream &Stream, StringRef Style) {
+  void format(llvm::raw_ostream &Stream, StringRef Style) override {
     format_provider<T>::format(static_cast<T>(this->Item), Stream, Style);
   }
 };

From 105056045d9ab0b1a49781a18129ada48893452e Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Thu, 16 Jul 2020 20:39:45 -0700
Subject: [PATCH 695/771] [clang][NFC] Add a missing 'override'

---
 clang/unittests/CrossTU/CrossTranslationUnitTest.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/unittests/CrossTU/CrossTranslationUnitTest.cpp b/clang/unittests/CrossTU/CrossTranslationUnitTest.cpp
index 5495f27f5b32a..4e6fbeee86a3f 100644
--- a/clang/unittests/CrossTU/CrossTranslationUnitTest.cpp
+++ b/clang/unittests/CrossTU/CrossTranslationUnitTest.cpp
@@ -29,7 +29,7 @@ class CTUASTConsumer : public clang::ASTConsumer {
   explicit CTUASTConsumer(clang::CompilerInstance &CI, bool *Success)
       : CTU(CI), Success(Success) {}
 
-  void HandleTranslationUnit(ASTContext &Ctx) {
+  void HandleTranslationUnit(ASTContext &Ctx) override {
     auto FindFInTU = [](const TranslationUnitDecl *TU) {
       const FunctionDecl *FD = nullptr;
       for (const Decl *D : TU->decls()) {

From 176a6e7abe33d58a65ce9cfac15fe320962e7b6e Mon Sep 17 00:00:00 2001
From: Kuba Mracek <mracek@apple.com>
Date: Fri, 17 Jul 2020 17:39:43 -0700
Subject: [PATCH 696/771] [asan] Use dynamic shadow memory position on Apple
 Silicon macOS

This is needed because macOS on Apple Silicon has some reserved pages inside the "regular" shadow memory location, and mapping over that location fails.

Differential Revision: https://reviews.llvm.org/D82912
---
 compiler-rt/lib/asan/asan_mapping.h                           | 2 ++
 llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp      | 3 +++
 .../Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll | 4 +++-
 3 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/compiler-rt/lib/asan/asan_mapping.h b/compiler-rt/lib/asan/asan_mapping.h
index 36e2cb5364f3a..27598171fc29b 100644
--- a/compiler-rt/lib/asan/asan_mapping.h
+++ b/compiler-rt/lib/asan/asan_mapping.h
@@ -206,6 +206,8 @@ static const u64 kMyriadCacheBitMask32 = 0x40000000ULL;
 #else
 #  if SANITIZER_IOS
 #    define SHADOW_OFFSET __asan_shadow_memory_dynamic_address
+#  elif SANITIZER_MAC && defined(__aarch64__)
+#    define SHADOW_OFFSET __asan_shadow_memory_dynamic_address
 #  elif defined(__aarch64__)
 #    define SHADOW_OFFSET kAArch64_ShadowOffset64
 #  elif defined(__powerpc64__)
diff --git a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
index 7516a64c6a354..647d25e6a24e0 100644
--- a/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/AddressSanitizer.cpp
@@ -434,6 +434,7 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
                                       bool IsKasan) {
   bool IsAndroid = TargetTriple.isAndroid();
   bool IsIOS = TargetTriple.isiOS() || TargetTriple.isWatchOS();
+  bool IsMacOS = TargetTriple.isMacOSX();
   bool IsFreeBSD = TargetTriple.isOSFreeBSD();
   bool IsNetBSD = TargetTriple.isOSNetBSD();
   bool IsPS4CPU = TargetTriple.isPS4CPU();
@@ -510,6 +511,8 @@ static ShadowMapping getShadowMapping(Triple &TargetTriple, int LongSize,
       Mapping.Offset = kMIPS64_ShadowOffset64;
     else if (IsIOS)
       Mapping.Offset = kDynamicShadowSentinel;
+    else if (IsMacOS && IsAArch64)
+      Mapping.Offset = kDynamicShadowSentinel;
     else if (IsAArch64)
       Mapping.Offset = kAArch64_ShadowOffset64;
     else
diff --git a/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll b/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll
index 391693c2c6f62..7d6b84144e9f8 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll
@@ -8,8 +8,10 @@
 ; RUN: opt -asan -asan-module -mtriple=i386-apple-ios-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
 ; RUN: opt -asan -asan-module -mtriple=x86_64-apple-ios-simulator --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
 ;
-; // macOS does not use dynamic shadow placement
+; // macOS does not use dynamic shadow placement on x86_64
 ; RUN: opt -asan -asan-module -mtriple=x86_64-apple-macosx --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NONDYNAMIC -DPTR_SIZE=64
+; // macOS does use dynamic shadow placement on arm64
+; RUN: opt -asan -asan-module -mtriple=arm64-apple-macosx --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
 
 define i32 @test_load(i32* %a) sanitize_address {
 ; First instrumentation in the function must be to load the dynamic shadow

From 3b55bfad2a3b7dba8815e043fcd9a2c0cb7b2987 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Fri, 17 Jul 2020 14:56:51 -0700
Subject: [PATCH 697/771] [llvm-jitlink] Add suppport for testing GOT entries
 and stubs for ELF.

This enables regression testing of GOT and stub handling with
llvm-jitlink.
---
 llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp  | 71 ++++++++++++++++++-
 .../tools/llvm-jitlink/llvm-jitlink-macho.cpp |  6 --
 2 files changed, 70 insertions(+), 7 deletions(-)

diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp
index 1b74f1016ae99..beb73fb8edf71 100644
--- a/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp
+++ b/llvm/tools/llvm-jitlink/llvm-jitlink-elf.cpp
@@ -20,6 +20,50 @@
 using namespace llvm;
 using namespace llvm::jitlink;
 
+static bool isELFGOTSection(Section &S) { return S.getName() == "$__GOT"; }
+
+static bool isELFStubsSection(Section &S) { return S.getName() == "$__STUBS"; }
+
+static Expected<Edge &> getFirstRelocationEdge(LinkGraph &G, Block &B) {
+  auto EItr = std::find_if(B.edges().begin(), B.edges().end(),
+                           [](Edge &E) { return E.isRelocation(); });
+  if (EItr == B.edges().end())
+    return make_error<StringError>("GOT entry in " + G.getName() + ", \"" +
+                                       B.getSection().getName() +
+                                       "\" has no relocations",
+                                   inconvertibleErrorCode());
+  return *EItr;
+}
+
+static Expected<Symbol &> getELFGOTTarget(LinkGraph &G, Block &B) {
+  auto E = getFirstRelocationEdge(G, B);
+  if (!E)
+    return E.takeError();
+  auto &TargetSym = E->getTarget();
+  if (!TargetSym.hasName())
+    return make_error<StringError>(
+        "GOT entry in " + G.getName() + ", \"" +
+            TargetSym.getBlock().getSection().getName() +
+            "\" points to anonymous "
+            "symbol",
+        inconvertibleErrorCode());
+  return TargetSym;
+}
+
+static Expected<Symbol &> getELFStubTarget(LinkGraph &G, Block &B) {
+  auto E = getFirstRelocationEdge(G, B);
+  if (!E)
+    return E.takeError();
+  auto &GOTSym = E->getTarget();
+  if (!GOTSym.isDefined() || !isELFGOTSection(GOTSym.getBlock().getSection()))
+    return make_error<StringError>(
+        "Stubs entry in " + G.getName() + ", \"" +
+            GOTSym.getBlock().getSection().getName() +
+            "\" does not point to GOT entry",
+        inconvertibleErrorCode());
+  return getELFGOTTarget(G, GOTSym.getBlock());
+}
+
 namespace llvm {
 
 Error registerELFGraphInfo(Session &S, LinkGraph &G) {
@@ -53,6 +97,9 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) {
                                          "\"",
                                      inconvertibleErrorCode());
 
+    bool isGOTSection = isELFGOTSection(Sec);
+    bool isStubsSection = isELFStubsSection(Sec);
+
     bool SectionContainsContent = false;
     bool SectionContainsZeroFill = false;
 
@@ -64,7 +111,29 @@ Error registerELFGraphInfo(Session &S, LinkGraph &G) {
       if (Sym->getAddress() > LastSym->getAddress())
         LastSym = Sym;
 
-      if (Sym->hasName()) {
+      if (isGOTSection) {
+        if (Sym->isSymbolZeroFill())
+          return make_error<StringError>("zero-fill atom in GOT section",
+                                         inconvertibleErrorCode());
+
+        if (auto TS = getELFGOTTarget(G, Sym->getBlock()))
+          FileInfo.GOTEntryInfos[TS->getName()] = {Sym->getSymbolContent(),
+                                                   Sym->getAddress()};
+        else
+          return TS.takeError();
+        SectionContainsContent = true;
+      } else if (isStubsSection) {
+        if (Sym->isSymbolZeroFill())
+          return make_error<StringError>("zero-fill atom in Stub section",
+                                         inconvertibleErrorCode());
+
+        if (auto TS = getELFStubTarget(G, Sym->getBlock()))
+          FileInfo.StubInfos[TS->getName()] = {Sym->getSymbolContent(),
+                                               Sym->getAddress()};
+        else
+          return TS.takeError();
+        SectionContainsContent = true;
+      } else if (Sym->hasName()) {
         dbgs() << "Symbol: " << Sym->getName() << "\n";
         if (Sym->isSymbolZeroFill()) {
           S.SymbolInfos[Sym->getName()] = {Sym->getSize(), Sym->getAddress()};
diff --git a/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp b/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp
index 18584e55d0f5c..fc70934ea1d59 100644
--- a/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp
+++ b/llvm/tools/llvm-jitlink/llvm-jitlink-macho.cpp
@@ -49,12 +49,6 @@ static Expected<Symbol &> getMachOGOTTarget(LinkGraph &G, Block &B) {
             "\" points to anonymous "
             "symbol",
         inconvertibleErrorCode());
-  if (TargetSym.isDefined() || TargetSym.isAbsolute())
-    return make_error<StringError>(
-        "GOT entry \"" + TargetSym.getName() + "\" in " + G.getName() + ", \"" +
-            TargetSym.getBlock().getSection().getName() +
-            "\" does not point to an external symbol",
-        inconvertibleErrorCode());
   return TargetSym;
 }
 

From 7c2f2762695896aee5d3c5ebb3943f1a28a5ed3b Mon Sep 17 00:00:00 2001
From: Arthur Eubanks <aeubanks@google.com>
Date: Fri, 17 Jul 2020 17:49:46 -0700
Subject: [PATCH 698/771] [NewPM][ASan] Make ASan tests work under NPM

Under NPM, the asan-globals-md analysis is required but cannot be run
within the asan function pass due to module analyses not being able to
run from a function pass. So this pins all tests using "-asan" to the
legacy PM and adds a corresponding RUN line with
-passes='require<asan-globals-md>,function(asan)'.

Now all tests in Instrumentation/AddressSanitizer pass when
-enable-new-pm is by default on.

Tests were automatically converted using the following python script and
failures were manually fixed up.

import sys
for i in sys.argv:
    with open(i, 'r') as f:
        s = f.read()
    with open(i, 'w') as f:
        for l in s.splitlines():
            if "RUN:" in l and ' -asan -asan-module ' in l and '\\' not in l:
                f.write(l.replace(' -asan -asan-module ', ' -asan -asan-module -enable-new-pm=0 '))
                f.write('\n')
                f.write(l.replace(' -asan -asan-module ', " -passes='require<asan-globals-md>,function(asan),module(asan-module)' "))
                f.write('\n')
            elif "RUN:" in l and ' -asan ' in l and '\\' not in l:
                f.write(l.replace(' -asan ', ' -asan -enable-new-pm=0 '))
                f.write('\n')
                f.write(l.replace(' -asan ', " -passes='require<asan-globals-md>,function(asan)' "))
                f.write('\n')
            else:
                f.write(l)
                f.write('\n')

See https://bugs.llvm.org/show_bug.cgi?id=46611.

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D83921
---
 .../AddressSanitizer/X86/asm_cpuid.ll         |  3 ++-
 .../X86/asm_more_registers_than_available.ll  |  3 ++-
 .../AddressSanitizer/X86/bug_11395.ll         |  3 ++-
 .../adaptive_global_redzones.ll               |  6 +++--
 .../asan-detect-invalid-pointer-pair.ll       | 12 ++++++---
 .../asan-masked-load-store.ll                 | 16 ++++++++---
 .../AddressSanitizer/asan-vs-gvn.ll           |  3 ++-
 .../asan_address_space_attr.ll                |  3 ++-
 .../AddressSanitizer/basic-msvc64.ll          |  3 ++-
 .../AddressSanitizer/basic-myriad.ll          |  3 ++-
 .../Instrumentation/AddressSanitizer/basic.ll |  9 +++----
 .../AddressSanitizer/byval-args.ll            |  3 ++-
 .../AddressSanitizer/debug-info-alloca.ll     |  3 ++-
 .../AddressSanitizer/debug-info-global-var.ll |  3 ++-
 .../AddressSanitizer/debug_info.ll            |  3 ++-
 .../debug_info_noninstrumented_alloca.ll      |  6 +++--
 .../debug_info_noninstrumented_alloca2.ll     |  6 +++--
 .../do-not-instrument-globals-darwin.ll       |  3 ++-
 .../do-not-instrument-globals-linux.ll        |  3 ++-
 .../do-not-instrument-internal-globals.ll     |  3 ++-
 .../do-not-instrument-profiling-globals.ll    |  3 ++-
 .../do-not-instrument-promotable-allocas.ll   |  3 ++-
 .../do-not-instrument-sanitizers.ll           |  3 ++-
 .../do-not-touch-comdat-global.ll             |  3 ++-
 .../do-not-touch-odr-global.ll                |  3 ++-
 .../do-not-touch-threadlocal.ll               |  3 ++-
 .../AddressSanitizer/dynamic-shadow-darwin.ll | 27 ++++++++++++-------
 .../AddressSanitizer/experiment-call.ll       |  3 ++-
 .../AddressSanitizer/experiment.ll            |  3 ++-
 .../AddressSanitizer/force-dynamic-shadow.ll  |  6 +++--
 .../AddressSanitizer/freebsd.ll               | 21 ++++++++++++---
 .../AddressSanitizer/global_addrspace.ll      |  3 ++-
 .../AddressSanitizer/global_cstring_darwin.ll |  3 ++-
 .../AddressSanitizer/global_lto_merge.ll      |  6 +++--
 .../AddressSanitizer/global_metadata.ll       |  6 +++--
 .../AddressSanitizer/global_metadata_array.ll | 12 ++++++---
 .../global_metadata_bitcasts.ll               |  3 ++-
 .../global_metadata_darwin.ll                 |  3 ++-
 .../global_metadata_external_comdat.ll        |  3 ++-
 .../global_metadata_windows.ll                |  3 ++-
 .../hoist-argument-init-insts.ll              |  3 ++-
 .../instrument-dynamic-allocas.ll             |  3 ++-
 .../AddressSanitizer/instrument-no-return.ll  |  3 ++-
 .../AddressSanitizer/instrument-stack.ll      |  6 +++--
 .../AddressSanitizer/instrument_global.ll     |  6 +++--
 .../instrument_initializer_metadata.ll        |  6 +++--
 .../instrument_load_then_store.ll             |  6 +++--
 .../instrumentation-with-call-threshold.ll    | 15 +++++++----
 .../AddressSanitizer/keep_going.ll            |  3 ++-
 .../AddressSanitizer/lifetime-throw.ll        |  3 ++-
 .../AddressSanitizer/lifetime-uar-uas.ll      | 12 ++++++---
 .../AddressSanitizer/lifetime.ll              |  6 +++--
 .../AddressSanitizer/local_alias.ll           | 12 ++++++---
 .../AddressSanitizer/local_stack_base.ll      |  3 ++-
 .../AddressSanitizer/localescape.ll           |  6 +++--
 .../AddressSanitizer/no-globals.ll            |  3 ++-
 .../AddressSanitizer/odr-check-ignore.ll      |  3 ++-
 .../Instrumentation/AddressSanitizer/ps4.ll   |  3 ++-
 .../AddressSanitizer/scale-offset.ll          |  9 ++++---
 .../stack-poisoning-and-lifetime-be.ll        |  6 +++--
 .../stack-poisoning-and-lifetime.ll           |  6 +++--
 .../stack-poisoning-byval-args.ll             | 12 ++++++---
 .../AddressSanitizer/stack-poisoning.ll       |  6 +++--
 .../AddressSanitizer/stack_dynamic_alloca.ll  |  4 +++
 .../AddressSanitizer/stack_layout.ll          |  8 ++++--
 .../AddressSanitizer/str-nobuiltin.ll         |  3 ++-
 .../AddressSanitizer/test64.ll                |  6 +++--
 .../Instrumentation/AddressSanitizer/twice.ll |  3 ++-
 .../Instrumentation/AddressSanitizer/ubsan.ll |  3 ++-
 .../AddressSanitizer/win-sorted-sections.ll   |  3 ++-
 .../AddressSanitizer/win-string-literal.ll    |  3 ++-
 .../AddressSanitizer/with-ifunc.ll            | 24 ++++++++++++-----
 llvm/tools/opt/NewPMDriver.cpp                | 20 ++++++++++++++
 73 files changed, 305 insertions(+), 130 deletions(-)

diff --git a/llvm/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll b/llvm/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll
index cfa91d4da633d..c842f1963d691 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/X86/asm_cpuid.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -S -o %t.ll
+; RUN: opt < %s -asan -enable-new-pm=0 -S -o %t.ll
+; RUN: opt < %s -passes='asan-function-pipeline' -S -o %t.ll
 ; RUN: FileCheck %s < %t.ll
 ; RUN: llc < %t.ll | FileCheck %s --check-prefix=ASM
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll b/llvm/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll
index 7827f3fbf278a..191917581b748 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/X86/asm_more_registers_than_available.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -S -o %t.ll
+; RUN: opt < %s -asan -enable-new-pm=0 -S -o %t.ll
+; RUN: opt < %s -passes='asan-function-pipeline' -S -o %t.ll
 ; RUN: FileCheck %s < %t.ll
 
 ; Don't do stack malloc on functions containing inline assembly on 64-bit
diff --git a/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll b/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
index 027148a0acd6d..bf9cc11a2b903 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/X86/bug_11395.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -asan-module -S | llc -o /dev/null
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | llc -o /dev/null
+; RUN: opt < %s -passes='asan-pipeline' -S | llc -o /dev/null
 ; The bug manifests as a reg alloc failure:
 ; error: ran out of registers during register allocation
 ; ModuleID = 'z.o'
diff --git a/llvm/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll b/llvm/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll
index 2c8df25e53c1c..749f4ad220158 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/adaptive_global_redzones.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-scale=5 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-detect-invalid-pointer-pair.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-detect-invalid-pointer-pair.ll
index 3df73e54803ff..5d746c3053c68 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan-detect-invalid-pointer-pair.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-detect-invalid-pointer-pair.ll
@@ -1,8 +1,14 @@
-; RUN: opt < %s -asan -asan-detect-invalid-pointer-cmp -S \
+; RUN: opt < %s -asan -asan-detect-invalid-pointer-cmp -S -enable-new-pm=0 \
 ; RUN:     | FileCheck %s --check-prefixes=CMP,NOSUB,ALL
-; RUN: opt < %s -asan -asan-detect-invalid-pointer-sub -S \
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-detect-invalid-pointer-cmp -S \
+; RUN:     | FileCheck %s --check-prefixes=CMP,NOSUB,ALL
+; RUN: opt < %s -asan -asan-detect-invalid-pointer-sub -S -enable-new-pm=0 \
+; RUN:     | FileCheck %s --check-prefixes=SUB,NOCMP,ALL
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-detect-invalid-pointer-sub -S \
 ; RUN:     | FileCheck %s --check-prefixes=SUB,NOCMP,ALL
-; RUN: opt < %s -asan -asan-detect-invalid-pointer-pair -S \
+; RUN: opt < %s -asan -asan-detect-invalid-pointer-pair -S -enable-new-pm=0 \
+; RUN:     | FileCheck %s --check-prefixes=CMP,SUB,ALL
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-detect-invalid-pointer-pair -S \
 ; RUN:     | FileCheck %s --check-prefixes=CMP,SUB,ALL
 ; Support instrumentation of invalid pointer pair detection.
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
index 1b069255880a9..7a3b69bac2f26 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-masked-load-store.ll
@@ -1,10 +1,18 @@
-; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -S \
+; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -S -enable-new-pm=0 \
 ; RUN:     | FileCheck %s -check-prefix=LOAD -check-prefix=STORE -check-prefix=ALL
-; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -S \
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-instrumentation-with-call-threshold=0 -S \
+; RUN:     | FileCheck %s -check-prefix=LOAD -check-prefix=STORE -check-prefix=ALL
+; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -S -enable-new-pm=0 \
+; RUN:     | FileCheck %s -check-prefix=NOLOAD -check-prefix=STORE -check-prefix=ALL
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -S \
 ; RUN:     | FileCheck %s -check-prefix=NOLOAD -check-prefix=STORE -check-prefix=ALL
-; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-writes=0 -S \
+; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-writes=0 -S -enable-new-pm=0 \
 ; RUN:     | FileCheck %s -check-prefix=LOAD -check-prefix=NOSTORE -check-prefix=ALL
-; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-instrumentation-with-call-threshold=0 -asan-instrument-writes=0 -S \
+; RUN:     | FileCheck %s -check-prefix=LOAD -check-prefix=NOSTORE -check-prefix=ALL
+; RUN: opt < %s -asan -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S -enable-new-pm=0 \
+; RUN:     | FileCheck %s -check-prefix=NOLOAD -check-prefix=NOSTORE -check-prefix=ALL
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-instrumentation-with-call-threshold=0 -asan-instrument-reads=0 -asan-instrument-writes=0 -S \
 ; RUN:     | FileCheck %s -check-prefix=NOLOAD -check-prefix=NOSTORE -check-prefix=ALL
 ; Support ASan instrumentation for constant-mask llvm.masked.{load,store}
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll b/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
index f9cfa7af19f62..39e705354fe71 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan-vs-gvn.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basic-aa -gvn -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -basic-aa -gvn -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s "-passes=function(require<basic-aa>,gvn),asan-pipeline" -S | FileCheck %s
 ; ASAN conflicts with load widening iff the widened load accesses data out of bounds
 ; (while the original unwidened loads do not).
 ; https://github.com/google/sanitizers/issues/20#issuecomment-136381262
diff --git a/llvm/test/Instrumentation/AddressSanitizer/asan_address_space_attr.ll b/llvm/test/Instrumentation/AddressSanitizer/asan_address_space_attr.ll
index 87d72bbe142f7..35744a810ccc7 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/asan_address_space_attr.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/asan_address_space_attr.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -S | FileCheck %s
+; RUN: opt < %s -asan -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-function-pipeline' -S | FileCheck %s
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.11.0"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/basic-msvc64.ll b/llvm/test/Instrumentation/AddressSanitizer/basic-msvc64.ll
index 2d59b31ef917b..2019011581de9 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/basic-msvc64.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/basic-msvc64.ll
@@ -1,6 +1,7 @@
 ; Test basic address sanitizer instrumentation.
 ;
-; RUN: opt -asan -asan-module -S  < %s | FileCheck %s
+; RUN: opt -asan -asan-module -enable-new-pm=0 -S  < %s | FileCheck %s
+; RUN: opt -passes='asan-pipeline' -S  < %s | FileCheck %s
 
 target triple = "x86_64-pc-windows-msvc"
 ; CHECK: @llvm.global_ctors = {{.*}}@asan.module_ctor
diff --git a/llvm/test/Instrumentation/AddressSanitizer/basic-myriad.ll b/llvm/test/Instrumentation/AddressSanitizer/basic-myriad.ll
index 9ece86e66f923..fb234ff0014f4 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/basic-myriad.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/basic-myriad.ll
@@ -1,6 +1,7 @@
 ; Test basic address sanitizer instrumentation for Myriad.
 ;
-; RUN: opt -asan -asan-module -S  < %s | FileCheck %s
+; RUN: opt -asan -asan-module -enable-new-pm=0 -S  < %s | FileCheck %s
+; RUN: opt -passes='asan-pipeline' -S  < %s | FileCheck %s
 
 target triple = "sparc-myriad-rtems"
 target datalayout = "E-m:e-p:32:32-i64:64-f128:64-n32-S64"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/basic.ll b/llvm/test/Instrumentation/AddressSanitizer/basic.ll
index 6397338344d6d..32462bb730c6a 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/basic.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/basic.ll
@@ -1,11 +1,10 @@
 ; Test basic address sanitizer instrumentation.
 ;
-; RUN: opt < %s -asan -asan-module -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
-; RUN: opt < %s -asan -asan-module -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
+; RUN: opt < %s -asan -asan-module -S -enable-new-pm=0 | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+; RUN: opt < %s -asan -asan-module -asan-mapping-scale=5 -S -enable-new-pm=0 | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
 
-; We need the requires since both asan and asan-module require reading module level metadata which is done once by the asan-globals-md analysis
-; RUN: opt < %s -passes='require<asan-globals-md>,function(asan),module(asan-module)' -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
-; RUN: opt < %s -passes='require<asan-globals-md>,function(asan),module(asan-module)' -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll b/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll
index a070cedca37d9..e2d2464013a06 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/byval-args.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -S | FileCheck %s
+; RUN: opt < %s -asan -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-function-pipeline' -S | FileCheck %s
 ; Test that for call instructions, the by-value arguments are instrumented.
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll
index 48cda7d7f48c2..2fe3790af558f 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/debug-info-alloca.ll
@@ -3,7 +3,8 @@
 ; first instruction.  Breaking on the instrumented function in a debugger
 ; would then stop at that instruction, before the prologue is finished.
 
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 ; 1: void f(int *arg) {
 ; 2: }
 ; 3: int main(int argc, char **argv) {
diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug-info-global-var.ll b/llvm/test/Instrumentation/AddressSanitizer/debug-info-global-var.ll
index 959693e086fac..50bba89f88898 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/debug-info-global-var.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/debug-info-global-var.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 source_filename = "version.c"
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.12.0"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll b/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll
index c0389daddacd4..ce0126a08c19b 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/debug_info.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=0 -S | FileCheck %s
 
 ; Checks that llvm.dbg.declare instructions are updated 
 ; accordingly as we merge allocas.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
index 911ef6de32db1..745ef165ecb0a 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca.ll
@@ -2,8 +2,10 @@
 ; Only first-basic-block allocas are considered stack slots, and moving them
 ; breaks debug info.
 
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -asan-instrument-dynamic-allocas -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrument-dynamic-allocas -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-instrument-dynamic-allocas -S | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca2.ll b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca2.ll
index b6d393d0c33f0..69a4238342cd5 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca2.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/debug_info_noninstrumented_alloca2.ll
@@ -1,7 +1,9 @@
 ; Make sure we don't break the IR when moving non-instrumented allocas
 
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -asan-instrument-dynamic-allocas -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrument-dynamic-allocas -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-instrument-dynamic-allocas -S | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll
index 7d15cd9537f17..3fc42e256b3bc 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-darwin.ll
@@ -1,6 +1,7 @@
 ; This test checks that we are not instrumenting unnecessary globals
 ; (llvm.metadata and other llvm internal globals).
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll
index fc0e676ec1391..c946c6d4ac27d 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-globals-linux.ll
@@ -1,6 +1,7 @@
 ; This test checks that we are not instrumenting unnecessary globals
 ; (llvm.metadata, init_array sections, and other llvm internal globals).
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
index cff83ab718bbb..f93d3e2ff9223 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-internal-globals.ll
@@ -1,6 +1,7 @@
 ; This test checks that we are not instrumenting globals
 ; that we created ourselves.
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-profiling-globals.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-profiling-globals.ll
index f20977fc98831..2d0b952ece2d4 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-profiling-globals.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-profiling-globals.ll
@@ -1,5 +1,6 @@
 ; This test checks that we don't instrument globals created by profiling passes.
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 
 @__profc_test = private global [1 x i64] zeroinitializer, section "__DATA,__llvm_prf_cnts", align 8
 @__llvm_gcov_ctr = internal global [1 x i64] zeroinitializer
diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll
index 68913d321fe15..05b701731dff5 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-promotable-allocas.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -asan-module -asan-instrument-dynamic-allocas -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrument-dynamic-allocas -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-instrument-dynamic-allocas -S | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-sanitizers.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-sanitizers.ll
index b4407a8fc6df8..81b7ef7e0a37d 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-sanitizers.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-instrument-sanitizers.ll
@@ -1,5 +1,6 @@
 ; This test checks that we are not instrumenting sanitizer code.
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
index 24141ee2190c7..545adcc0d7c0c 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-comdat-global.ll
@@ -1,6 +1,7 @@
 ; This test checks that we instrument regular globals, but do not touch
 ; the COMDAT ones.
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
 target triple = "i686-pc-windows-msvc"
 ; no action should be taken for these globals
diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll
index bdcd6595a0f61..8379f634b2e13 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-odr-global.ll
@@ -1,6 +1,7 @@
 ; This test checks that we instrument regular globals, but do not touch
 ; the linkonce_odr ones.
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 ; no action should be taken for these globals
diff --git a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll
index f863f44d51256..9b222452e1c5e 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/do-not-touch-threadlocal.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 ; no action should be taken for thread locals
diff --git a/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll b/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll
index 7d6b84144e9f8..0e6b2c45d5108 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/dynamic-shadow-darwin.ll
@@ -1,17 +1,26 @@
 ; Test using dynamic shadow address on darwin
 ;
-; RUN: opt -asan -asan-module -mtriple=arm64_32-apple-watchos --data-layout="e-m:o-p:32:32-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
-; RUN: opt -asan -asan-module -mtriple=armv7k-apple-watchos --data-layout="e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
-; RUN: opt -asan -asan-module -mtriple=arm64-apple-ios --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
-; RUN: opt -asan -asan-module -mtriple=armv7s-apple-ios --data-layout="e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
-; RUN: opt -asan -asan-module -mtriple=i386-apple-watchos-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
-; RUN: opt -asan -asan-module -mtriple=i386-apple-ios-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
-; RUN: opt -asan -asan-module -mtriple=x86_64-apple-ios-simulator --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
+; RUN: opt -asan -asan-module -mtriple=arm64_32-apple-watchos --data-layout="e-m:o-p:32:32-i64:64-i128:128-n32:64-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
+; RUN: opt -passes='asan-pipeline' -mtriple=arm64_32-apple-watchos --data-layout="e-m:o-p:32:32-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
+; RUN: opt -asan -asan-module -mtriple=armv7k-apple-watchos --data-layout="e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
+; RUN: opt -passes='asan-pipeline' -mtriple=armv7k-apple-watchos --data-layout="e-m:o-p:32:32-Fi8-i64:64-a:0:32-n32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
+; RUN: opt -asan -asan-module -mtriple=arm64-apple-ios --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
+; RUN: opt -passes='asan-pipeline' -mtriple=arm64-apple-ios --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
+; RUN: opt -asan -asan-module -mtriple=armv7s-apple-ios --data-layout="e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
+; RUN: opt -passes='asan-pipeline' -mtriple=armv7s-apple-ios --data-layout="e-m:o-p:32:32-Fi8-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
+; RUN: opt -asan -asan-module -mtriple=i386-apple-watchos-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
+; RUN: opt -passes='asan-pipeline' -mtriple=i386-apple-watchos-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
+; RUN: opt -asan -asan-module -mtriple=i386-apple-ios-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
+; RUN: opt -passes='asan-pipeline' -mtriple=i386-apple-ios-simulator --data-layout="e-m:o-p:32:32-f64:32:64-f80:128-n8:16:32-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=32
+; RUN: opt -asan -asan-module -mtriple=x86_64-apple-ios-simulator --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
+; RUN: opt -passes='asan-pipeline' -mtriple=x86_64-apple-ios-simulator --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
 ;
 ; // macOS does not use dynamic shadow placement on x86_64
-; RUN: opt -asan -asan-module -mtriple=x86_64-apple-macosx --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NONDYNAMIC -DPTR_SIZE=64
+; RUN: opt -asan -asan-module -mtriple=x86_64-apple-macosx --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-NONDYNAMIC -DPTR_SIZE=64
+; RUN: opt -passes='asan-pipeline' -mtriple=x86_64-apple-macosx --data-layout="e-m:o-i64:64-f80:128-n8:16:32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NONDYNAMIC -DPTR_SIZE=64
 ; // macOS does use dynamic shadow placement on arm64
-; RUN: opt -asan -asan-module -mtriple=arm64-apple-macosx --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
+; RUN: opt -asan -asan-module -mtriple=arm64-apple-macosx --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s -enable-new-pm=0 | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
+; RUN: opt -passes='asan-pipeline' -mtriple=arm64-apple-macosx --data-layout="e-m:o-i64:64-i128:128-n32:64-S128" -S < %s | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC -DPTR_SIZE=64
 
 define i32 @test_load(i32* %a) sanitize_address {
 ; First instrumentation in the function must be to load the dynamic shadow
diff --git a/llvm/test/Instrumentation/AddressSanitizer/experiment-call.ll b/llvm/test/Instrumentation/AddressSanitizer/experiment-call.ll
index 0e339cc8041a3..4215d54574132 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/experiment-call.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/experiment-call.ll
@@ -1,6 +1,7 @@
 ; Test optimization experiments.
 ; -asan-force-experiment flag turns all memory accesses into experiments.
-; RUN: opt < %s -asan -asan-module -asan-force-experiment=42 -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-force-experiment=42 -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-force-experiment=42 -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/experiment.ll b/llvm/test/Instrumentation/AddressSanitizer/experiment.ll
index aaa125f5d4086..274e0fdd136c3 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/experiment.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/experiment.ll
@@ -1,6 +1,7 @@
 ; Test optimization experiments.
 ; -asan-force-experiment flag turns all memory accesses into experiments.
-; RUN: opt < %s -asan -asan-module -asan-force-experiment=42 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-force-experiment=42 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-force-experiment=42 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/force-dynamic-shadow.ll b/llvm/test/Instrumentation/AddressSanitizer/force-dynamic-shadow.ll
index e364c23253f2f..70927ec2f0f75 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/force-dynamic-shadow.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/force-dynamic-shadow.ll
@@ -1,7 +1,9 @@
 ; Test -asan-force-dynamic-shadow flag.
 ;
-; RUN: opt -asan -asan-module -S -asan-force-dynamic-shadow=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FDS
-; RUN: opt -asan -asan-module -S -asan-force-dynamic-shadow=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NDS
+; RUN: opt -asan -asan-module -enable-new-pm=0 -S -asan-force-dynamic-shadow=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FDS
+; RUN: opt -passes='asan-pipeline' -S -asan-force-dynamic-shadow=1 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-FDS
+; RUN: opt -asan -asan-module -enable-new-pm=0 -S -asan-force-dynamic-shadow=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NDS
+; RUN: opt -passes='asan-pipeline' -S -asan-force-dynamic-shadow=0 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-NDS
 
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll b/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll
index 3fbbfa3cb1ea6..b9ec93ee266e0 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/freebsd.ll
@@ -1,14 +1,29 @@
-; RUN: opt < %s -asan -asan-module -S \
+; RUN: opt < %s -asan -asan-module -S -enable-new-pm=0 \
 ; RUN:     -mtriple=i386-unknown-freebsd \
 ; RUN:     -data-layout="e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" | \
 ; RUN:     FileCheck --check-prefix=CHECK-32 %s
 
-; RUN: opt < %s -asan -asan-module -S \
+; RUN: opt < %s -passes='asan-pipeline' -S \
+; RUN:     -mtriple=i386-unknown-freebsd \
+; RUN:     -data-layout="e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128" | \
+; RUN:     FileCheck --check-prefix=CHECK-32 %s
+
+; RUN: opt < %s -asan -asan-module -S -enable-new-pm=0 \
+; RUN:     -mtriple=x86_64-unknown-freebsd \
+; RUN:     -data-layout="e-m:e-i64:64-f80:128-n8:16:32:64-S128" | \
+; RUN:     FileCheck --check-prefix=CHECK-64 %s
+
+; RUN: opt < %s -passes='asan-pipeline' -S \
 ; RUN:     -mtriple=x86_64-unknown-freebsd \
 ; RUN:     -data-layout="e-m:e-i64:64-f80:128-n8:16:32:64-S128" | \
 ; RUN:     FileCheck --check-prefix=CHECK-64 %s
 
-; RUN: opt < %s -asan -asan-module -S \
+; RUN: opt < %s -asan -asan-module -S -enable-new-pm=0 \
+; RUN:     -mtriple=mips64-unknown-freebsd \
+; RUN:     -data-layout="E-m:e-i64:64-n32:64-S128" | \
+; RUN:     FileCheck --check-prefix=CHECK-MIPS64 %s
+
+; RUN: opt < %s -passes='asan-pipeline' -S \
 ; RUN:     -mtriple=mips64-unknown-freebsd \
 ; RUN:     -data-layout="E-m:e-i64:64-n32:64-S128" | \
 ; RUN:     FileCheck --check-prefix=CHECK-MIPS64 %s
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_addrspace.ll b/llvm/test/Instrumentation/AddressSanitizer/global_addrspace.ll
index 19b76e6512511..fbc7ae1f09a78 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_addrspace.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_addrspace.ll
@@ -1,7 +1,8 @@
 ; Only verify that asan don't crash on global variables of different
 ; address space. The global variable should be unmodified by asan.
 
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_cstring_darwin.ll b/llvm/test/Instrumentation/AddressSanitizer/global_cstring_darwin.ll
index 0fc3205ba492c..71d83eccb334f 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_cstring_darwin.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_cstring_darwin.ll
@@ -1,5 +1,6 @@
 ; This test checks that instrumented global C (null terminated) strings are put into a special section on Darwin.
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.10.0"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_lto_merge.ll b/llvm/test/Instrumentation/AddressSanitizer/global_lto_merge.ll
index aa02553f4416a..304dfb6d0496e 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_lto_merge.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_lto_merge.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -constmerge -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -constmerge -S | FileCheck %s
+; RUN: opt < %s "-passes=asan-pipeline,constmerge" -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.11.0"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll
index ea9f2cf3f1a9d..25033599b62d6 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll
index b1a600f3ceb8d..f5b9e4c2408dd 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_array.ll
@@ -1,7 +1,11 @@
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=0 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=0 -mtriple=x86_64-apple-macosx10.11.0 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=0 -mtriple=x86_64-pc-windows-msvc19.0.24215 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=0 -asan-mapping-scale=5 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=0 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=0 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=0 -mtriple=x86_64-apple-macosx10.11.0 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=0 -mtriple=x86_64-apple-macosx10.11.0 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=0 -mtriple=x86_64-pc-windows-msvc19.0.24215 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=0 -mtriple=x86_64-pc-windows-msvc19.0.24215 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=0 -asan-mapping-scale=5 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=0 -asan-mapping-scale=5 -mtriple=x86_64-unknown-linux-gnu -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_bitcasts.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_bitcasts.ll
index 324a04e3b8321..3b4c8444feaf0 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_bitcasts.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_bitcasts.ll
@@ -1,7 +1,8 @@
 ; Test that the compiler doesn't crash when the llvm.asan.globals containts
 ; an entry that points to a BitCast instruction.
 
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -S
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -S
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.11.0"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll
index 1723b33636226..2790ff6fc7499 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_darwin.ll
@@ -2,7 +2,8 @@
 ; allowing dead stripping to be performed, and that the appropriate runtime
 ; routines are invoked.
 
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -S | FileCheck %s
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.11.0"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_external_comdat.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_external_comdat.ll
index 29725adcd039a..30f2f12f33512 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_external_comdat.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_external_comdat.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -mtriple=x86_64-linux -asan -asan-module -asan-globals-live-support=0 -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-linux -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=0 -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-linux -passes='asan-pipeline' -asan-globals-live-support=0 -S | FileCheck %s
 
 $my_var = comdat any
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll
index 744366e4c3766..628f32df36219 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/global_metadata_windows.ll
@@ -4,7 +4,8 @@
 
 ; FIXME: Later we can use this to instrument linkonce odr string literals.
 
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -S | FileCheck %s
 
 target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-windows-msvc19.0.24215"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll
index 1414b2122d983..821bfc86a0e5b 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/hoist-argument-init-insts.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -asan-module -asan-use-after-return -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return -S | FileCheck %s
 
 ; Source (-O0 -fsanitize=address -fsanitize-address-use-after-scope):
 ;; struct S { int x, y; };
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
index 6fc52bb66ded4..434e4be4e8e6d 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrument-dynamic-allocas.ll
@@ -1,7 +1,8 @@
 ; Test asan internal compiler flags:
 ;   -asan-instrument-dynamic-allocas
 
-; RUN: opt < %s -asan -asan-module -asan-instrument-dynamic-allocas -S | FileCheck %s --check-prefix=CHECK-ALLOCA
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrument-dynamic-allocas -S | FileCheck %s --check-prefix=CHECK-ALLOCA
+; RUN: opt < %s -passes='asan-pipeline' -asan-instrument-dynamic-allocas -S | FileCheck %s --check-prefix=CHECK-ALLOCA
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument-no-return.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
index 22ee66301de25..b255a15411ed1 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrument-no-return.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -S | FileCheck %s
+; RUN: opt < %s -asan -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-function-pipeline' -S | FileCheck %s
 ; AddressSanitizer must insert __asan_handle_no_return
 ; before noreturn calls that aren't inserted by sanitizers.
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument-stack.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument-stack.ll
index f33481112f872..e563f702749b8 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrument-stack.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrument-stack.ll
@@ -1,6 +1,8 @@
 ; This test checks that we are not instrumenting direct inbound stack accesses.
-; RUN: opt < %s -asan -asan-module -asan-opt-stack -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -asan-opt-stack -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-opt-stack -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-opt-stack -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-opt-stack -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-opt-stack -asan-mapping-scale=5 -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll
index 5631572d93da2..18c40a503b5c5 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrument_global.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-globals-live-support=1 -asan-mapping-scale=5 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 @xxx = global i32 0, align 4
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
index d392662efc711..ae043dc0c2c59 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrument_initializer_metadata.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-scale=5 -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 @xxx = internal global i32 0, align 4  ; With dynamic initializer.
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll b/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
index 8341697ff48c9..ea350dab4e3b8 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrument_load_then_store.ll
@@ -1,6 +1,8 @@
 ; Test that AddressSanitizer instruments "(*a)++" only once.
-; RUN: opt < %s -asan -asan-module -S -asan-opt=1 | FileCheck %s -check-prefix=OPT1
-; RUN: opt < %s -asan -asan-module -S -asan-opt=0 | FileCheck %s -check-prefix=OPT0
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S -asan-opt=1 | FileCheck %s -check-prefix=OPT1
+; RUN: opt < %s -passes='asan-pipeline' -S -asan-opt=1 | FileCheck %s -check-prefix=OPT1
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S -asan-opt=0 | FileCheck %s -check-prefix=OPT0
+; RUN: opt < %s -passes='asan-pipeline' -S -asan-opt=0 | FileCheck %s -check-prefix=OPT0
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll b/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll
index 8e0275d2c17de..82a61aabea4d9 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/instrumentation-with-call-threshold.ll
@@ -2,11 +2,16 @@
 ;   -asan-instrumentation-with-call-threshold
 ;   -asan-memory-access-callback-prefix
 
-; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=1 -S | FileCheck %s --check-prefix=CHECK-CALL
-; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s --check-prefix=CHECK-CALL
-; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=0 -asan-memory-access-callback-prefix=__foo_ -S | FileCheck %s --check-prefix=CHECK-CUSTOM-PREFIX
-; RUN: opt < %s -asan -asan-module -asan-instrumentation-with-call-threshold=5 -S | FileCheck %s --check-prefix=CHECK-INLINE
-; RUN: opt < %s -asan -asan-module  -S | FileCheck %s --check-prefix=CHECK-INLINE
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrumentation-with-call-threshold=1 -S | FileCheck %s --check-prefix=CHECK-CALL
+; RUN: opt < %s -passes='asan-pipeline' -asan-instrumentation-with-call-threshold=1 -S | FileCheck %s --check-prefix=CHECK-CALL
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s --check-prefix=CHECK-CALL
+; RUN: opt < %s -passes='asan-pipeline' -asan-instrumentation-with-call-threshold=0 -S | FileCheck %s --check-prefix=CHECK-CALL
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrumentation-with-call-threshold=0 -asan-memory-access-callback-prefix=__foo_ -S | FileCheck %s --check-prefix=CHECK-CUSTOM-PREFIX
+; RUN: opt < %s -passes='asan-pipeline' -asan-instrumentation-with-call-threshold=0 -asan-memory-access-callback-prefix=__foo_ -S | FileCheck %s --check-prefix=CHECK-CUSTOM-PREFIX
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-instrumentation-with-call-threshold=5 -S | FileCheck %s --check-prefix=CHECK-INLINE
+; RUN: opt < %s -passes='asan-pipeline' -asan-instrumentation-with-call-threshold=5 -S | FileCheck %s --check-prefix=CHECK-INLINE
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0  -S | FileCheck %s --check-prefix=CHECK-INLINE
+; RUN: opt < %s -passes='asan-pipeline'  -S | FileCheck %s --check-prefix=CHECK-INLINE
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/keep_going.ll b/llvm/test/Instrumentation/AddressSanitizer/keep_going.ll
index 4bb59e74e8f14..7a3fbc39ff271 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/keep_going.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/keep_going.ll
@@ -1,7 +1,8 @@
 ; Test asan internal compiler flags:
 ;   -asan-recover=1
 
-; RUN: opt < %s -asan -asan-recover -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -enable-new-pm=0 -asan-recover -asan-module -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-recover -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/lifetime-throw.ll b/llvm/test/Instrumentation/AddressSanitizer/lifetime-throw.ll
index ff03d10c7c5d7..64473fb59f76e 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/lifetime-throw.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/lifetime-throw.ll
@@ -1,5 +1,6 @@
 ; Test handling of llvm.lifetime intrinsics with C++ exceptions.
-; RUN: opt < %s -asan -asan-module -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/lifetime-uar-uas.ll b/llvm/test/Instrumentation/AddressSanitizer/lifetime-uar-uas.ll
index 437b6a94185b8..136a8457fcf19 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/lifetime-uar-uas.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/lifetime-uar-uas.ll
@@ -1,8 +1,12 @@
 ; Test handling of llvm.lifetime intrinsics in UAR/UAS modes.
-; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -asan-use-after-scope=0 -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -asan-use-after-return=1 -asan-use-after-scope=0 -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS
-; RUN: opt < %s -asan -asan-module -asan-use-after-return=1 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=0 -asan-use-after-scope=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=0 -asan-use-after-scope=0 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=1 -asan-use-after-scope=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=1 -asan-use-after-scope=0 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=0 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=0 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=1 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=1 -asan-use-after-scope=1 -S | FileCheck %s --check-prefix=CHECK-UAS
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll b/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll
index b951afdc670f1..26aa65715d262 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/lifetime.ll
@@ -1,6 +1,8 @@
 ; Test handling of llvm.lifetime intrinsics.
-; RUN: opt < %s -asan -asan-module -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT
-; RUN: opt < %s -asan -asan-module -asan-use-after-scope -asan-use-after-return=0 -asan-instrument-dynamic-allocas=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-NO-DYNAMIC
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope -asan-use-after-return=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-DEFAULT
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope -asan-use-after-return=0 -asan-instrument-dynamic-allocas=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-NO-DYNAMIC
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope -asan-use-after-return=0 -asan-instrument-dynamic-allocas=0 -S | FileCheck %s --check-prefixes=CHECK,CHECK-NO-DYNAMIC
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll b/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll
index 9b95bb3fa6b04..a4c5803fc8189 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/local_alias.ll
@@ -1,7 +1,11 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s --check-prefixes=CHECK-NOALIAS,CHECK-NOINDICATOR
-; RUN: opt < %s -asan -asan-module -asan-use-private-alias=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-NOINDICATOR
-; RUN: opt < %s -asan -asan-module -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-INDICATOR,CHECK-NOALIAS
-; RUN: opt < %s -asan -asan-module -asan-use-private-alias=1 -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-INDICATOR
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s --check-prefixes=CHECK-NOALIAS,CHECK-NOINDICATOR
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s --check-prefixes=CHECK-NOALIAS,CHECK-NOINDICATOR
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-private-alias=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-NOINDICATOR
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-private-alias=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-NOINDICATOR
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-INDICATOR,CHECK-NOALIAS
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-INDICATOR,CHECK-NOALIAS
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-private-alias=1 -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-INDICATOR
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-private-alias=1 -asan-use-odr-indicator=1 -S | FileCheck %s --check-prefixes=CHECK-ALIAS,CHECK-INDICATOR
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll
index 7bf294cb6b600..c9dec38d227bc 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/local_stack_base.ll
@@ -1,4 +1,5 @@
-; RUN: opt -S -asan -asan-skip-promotable-allocas=0 %s -o - | FileCheck %s
+; RUN: opt -S -asan -enable-new-pm=0 -asan-skip-promotable-allocas=0 %s -o - | FileCheck %s
+; RUN: opt -S -passes='asan-function-pipeline' -asan-skip-promotable-allocas=0 %s -o - | FileCheck %s
 ; Generated from:
 ; int bar(int y) {
 ;   return y + 2;
diff --git a/llvm/test/Instrumentation/AddressSanitizer/localescape.ll b/llvm/test/Instrumentation/AddressSanitizer/localescape.ll
index 015b0e84ff16c..8daeb2927f935 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/localescape.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/localescape.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -asan -asan-module -asan-use-after-return -asan-stack-dynamic-alloca -S | FileCheck %s
-; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -asan-stack-dynamic-alloca=0 -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return -asan-stack-dynamic-alloca -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return -asan-stack-dynamic-alloca -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=0 -asan-stack-dynamic-alloca=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=0 -asan-stack-dynamic-alloca=0 -S | FileCheck %s
 
 target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
 target triple = "i686-pc-windows-msvc18.0.0"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/no-globals.ll b/llvm/test/Instrumentation/AddressSanitizer/no-globals.ll
index 30388b1865eb5..ea84ac387a71f 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/no-globals.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/no-globals.ll
@@ -1,5 +1,6 @@
 ; A module with no asan-instrumented globals has no asan destructor, and has an asan constructor in a comdat.
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -asan -asan-module -asan-with-comdat=1 -asan-globals-live-support=1 -S | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -asan -asan-module -enable-new-pm=0 -asan-with-comdat=1 -asan-globals-live-support=1 -S | FileCheck %s
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu < %s -passes='asan-pipeline' -asan-with-comdat=1 -asan-globals-live-support=1 -S | FileCheck %s
 
 define void @f() {
   ret void
diff --git a/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll b/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll
index cf48d19c16c2d..09b3d2f519297 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/odr-check-ignore.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s 
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s 
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s 
 
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/ps4.ll b/llvm/test/Instrumentation/AddressSanitizer/ps4.ll
index e160996866b4f..5930e31a4dd79 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/ps4.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/ps4.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -asan-module -S -mtriple=x86_64-scei-ps4 | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S -mtriple=x86_64-scei-ps4 | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S -mtriple=x86_64-scei-ps4 | FileCheck %s
 
 define i32 @read_4_bytes(i32* %a) sanitize_address {
 entry:
diff --git a/llvm/test/Instrumentation/AddressSanitizer/scale-offset.ll b/llvm/test/Instrumentation/AddressSanitizer/scale-offset.ll
index 8345586fec4e8..f0b8fb8fc8374 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/scale-offset.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/scale-offset.ll
@@ -1,8 +1,11 @@
 ; Test that the scale (-asan-mapping-scale) and offset (-asan-mapping-offset) command-line options work as expected
 ;
-; RUN: opt < %s -asan -asan-module -asan-mapping-offset 0xdeadbeef -S | FileCheck --check-prefix=CHECK-OFFSET %s
-; RUN: opt < %s -asan -asan-module -asan-mapping-scale 1 -S | FileCheck --check-prefix=CHECK-SCALE %s
-; RUN: opt < %s -asan -asan-module -asan-mapping-offset 0xc0ffee -asan-mapping-scale 0 -S | FileCheck --check-prefix=CHECK-BOTH %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-offset 0xdeadbeef -S | FileCheck --check-prefix=CHECK-OFFSET %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-offset 0xdeadbeef -S | FileCheck --check-prefix=CHECK-OFFSET %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-scale 1 -S | FileCheck --check-prefix=CHECK-SCALE %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-scale 1 -S | FileCheck --check-prefix=CHECK-SCALE %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-offset 0xc0ffee -asan-mapping-scale 0 -S | FileCheck --check-prefix=CHECK-BOTH %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-offset 0xc0ffee -asan-mapping-scale 0 -S | FileCheck --check-prefix=CHECK-BOTH %s
 target triple = "x86_64-unknown-linux-gnu"
 
 define i32 @read_offset(i32* %a) sanitize_address {
diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime-be.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime-be.ll
index 2261094275123..a9181fc4c3de9 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime-be.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime-be.ll
@@ -1,8 +1,10 @@
 ; Regular stack poisoning.
-; RUN: opt < %s -asan -asan-module -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s
 
 ; Stack poisoning with stack-use-after-scope.
-; RUN: opt < %s -asan -asan-module -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s
 
 target datalayout = "E-m:e-i64:64-n32:64"
 target triple = "powerpc64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime.ll
index 5523da63d373b..54128bb0c9e4c 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-and-lifetime.ll
@@ -1,8 +1,10 @@
 ; Regular stack poisoning.
-; RUN: opt < %s -asan -asan-module -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope=0 -S | FileCheck --check-prefixes=CHECK,ENTRY,EXIT %s
 
 ; Stack poisoning with stack-use-after-scope.
-; RUN: opt < %s -asan -asan-module -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-scope=1 -S | FileCheck --check-prefixes=CHECK,ENTRY-UAS,EXIT-UAS %s
 
 target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll
index 859404b12de2e..616c18ea09d60 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning-byval-args.ll
@@ -1,8 +1,12 @@
 ; This check verifies that arguments passed by value get redzones.
-; RUN: opt < %s -asan -asan-realign-stack=32 -S | FileCheck %s
-; RUN: opt < %s -asan -asan-realign-stack=32 -asan-force-dynamic-shadow -S | FileCheck %s
-; RUN: opt < %s -asan -asan-realign-stack=32 -asan-mapping-scale=5 -S | FileCheck %s
-; RUN: opt < %s -asan -asan-realign-stack=32 -asan-force-dynamic-shadow -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -asan -enable-new-pm=0 -asan-realign-stack=32 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-realign-stack=32 -S | FileCheck %s
+; RUN: opt < %s -asan -enable-new-pm=0 -asan-realign-stack=32 -asan-force-dynamic-shadow -S | FileCheck %s
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-realign-stack=32 -asan-force-dynamic-shadow -S | FileCheck %s
+; RUN: opt < %s -asan -enable-new-pm=0 -asan-realign-stack=32 -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-realign-stack=32 -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -asan -enable-new-pm=0 -asan-realign-stack=32 -asan-force-dynamic-shadow -asan-mapping-scale=5 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-function-pipeline' -asan-realign-stack=32 -asan-force-dynamic-shadow -asan-mapping-scale=5 -S | FileCheck %s
 
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll
index 4e5c2958ddb8f..0505f9a1e0920 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/stack-poisoning.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -asan -asan-module -asan-use-after-return -S | FileCheck --check-prefix=CHECK-UAR %s
-; RUN: opt < %s -asan -asan-module -asan-use-after-return=0 -S | FileCheck --check-prefix=CHECK-PLAIN %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return -S | FileCheck --check-prefix=CHECK-UAR %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return -S | FileCheck --check-prefix=CHECK-UAR %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-use-after-return=0 -S | FileCheck --check-prefix=CHECK-PLAIN %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-use-after-return=0 -S | FileCheck --check-prefix=CHECK-PLAIN %s
 target datalayout = "e-i64:64-f80:128-s:64-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll b/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
index 90ddd7786b9bf..6140ba6b7a80e 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/stack_dynamic_alloca.ll
@@ -1,6 +1,10 @@
 ; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca \
+; RUN:       -asan-use-after-return -S -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-stack-dynamic-alloca \
 ; RUN:       -asan-use-after-return -S | FileCheck %s
 ; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca -asan-mapping-scale=5 \
+; RUN:       -asan-use-after-return -S -enable-new-pm=0 | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-stack-dynamic-alloca -asan-mapping-scale=5 \
 ; RUN:       -asan-use-after-return -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/stack_layout.ll b/llvm/test/Instrumentation/AddressSanitizer/stack_layout.ll
index 85169d523b685..58b6714c90533 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/stack_layout.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/stack_layout.ll
@@ -1,8 +1,12 @@
 ; Test the ASan's stack layout.
 ; More tests in tests/Transforms/Utils/ASanStackFrameLayoutTest.cpp
-; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca=0 -asan-use-after-scope -S \
+; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca=0 -asan-use-after-scope -S -enable-new-pm=0 \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-STATIC
-; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca=1 -asan-use-after-scope -S \
+; RUN: opt < %s -passes='asan-pipeline' -asan-stack-dynamic-alloca=0 -asan-use-after-scope -S \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-STATIC
+; RUN: opt < %s -asan -asan-module -asan-stack-dynamic-alloca=1 -asan-use-after-scope -S -enable-new-pm=0 \
+; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC
+; RUN: opt < %s -passes='asan-pipeline' -asan-stack-dynamic-alloca=1 -asan-use-after-scope -S \
 ; RUN:     | FileCheck %s --check-prefixes=CHECK,CHECK-DYNAMIC
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/str-nobuiltin.ll b/llvm/test/Instrumentation/AddressSanitizer/str-nobuiltin.ll
index dd0132d435bd2..446e7f6793ba6 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/str-nobuiltin.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/str-nobuiltin.ll
@@ -1,6 +1,7 @@
 ; Test marking string functions as nobuiltin in address sanitizer.
 ;
-; RUN: opt < %s -asan -S | FileCheck %s
+; RUN: opt < %s -asan -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-function-pipeline' -S | FileCheck %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 
diff --git a/llvm/test/Instrumentation/AddressSanitizer/test64.ll b/llvm/test/Instrumentation/AddressSanitizer/test64.ll
index 4aab5310635fd..c6b190c861a1e 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/test64.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/test64.ll
@@ -1,5 +1,7 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
-; RUN: opt < %s -asan -asan-module -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck --check-prefixes=CHECK,CHECK-S3 %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
+; RUN: opt < %s -passes='asan-pipeline' -asan-mapping-scale=5 -S | FileCheck --check-prefixes=CHECK,CHECK-S5 %s
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
 define i32 @read_4_bytes(i32* %a) sanitize_address {
diff --git a/llvm/test/Instrumentation/AddressSanitizer/twice.ll b/llvm/test/Instrumentation/AddressSanitizer/twice.ll
index 9f7826f739521..4b5b64080dd29 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/twice.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/twice.ll
@@ -1,5 +1,6 @@
 ; Check that the address sanitizer pass can be reused
-; RUN: opt < %s -S -run-twice -asan
+; RUN: opt < %s -S -run-twice -asan -enable-new-pm=0
+; RUN: opt < %s -S -run-twice -passes='asan-function-pipeline'
 
 define void @foo(i64* %b) nounwind uwtable sanitize_address {
   entry:
diff --git a/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll b/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll
index 23b7ef4537c4f..41fab72ddbea9 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/ubsan.ll
@@ -1,6 +1,7 @@
 ; ASan shouldn't instrument code added by UBSan.
 
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Instrumentation/AddressSanitizer/win-sorted-sections.ll b/llvm/test/Instrumentation/AddressSanitizer/win-sorted-sections.ll
index 85f759cf740c1..a384186255f02 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/win-sorted-sections.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/win-sorted-sections.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 
 ; All of these globals should pass through uninstrumented because of their
 ; custom section name. The .CRT section is the standard way to register custom
diff --git a/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll b/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll
index 4d5126be87c60..3eb98508fec6b 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/win-string-literal.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -asan -asan-module -S | FileCheck %s
+; RUN: opt < %s -asan -asan-module -enable-new-pm=0 -S | FileCheck %s
+; RUN: opt < %s -passes='asan-pipeline' -S | FileCheck %s
 
 ; Generated like so:
 ; $ clang -S -emit-llvm -Xclang -disable-llvm-passes -fsanitize=address -O1 t.cpp -o t.ll
diff --git a/llvm/test/Instrumentation/AddressSanitizer/with-ifunc.ll b/llvm/test/Instrumentation/AddressSanitizer/with-ifunc.ll
index 4771a9674017b..ed5df2a2dfb05 100644
--- a/llvm/test/Instrumentation/AddressSanitizer/with-ifunc.ll
+++ b/llvm/test/Instrumentation/AddressSanitizer/with-ifunc.ll
@@ -1,18 +1,30 @@
 ; Test -asan-with-ifunc flag.
 ;
-; RUN: opt -asan -asan-module -S -asan-with-ifunc=0 < %s | \
+; RUN: opt -asan -asan-module -S -asan-with-ifunc=0 < %s -enable-new-pm=0 | \
 ; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC
-; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 < %s | \
+; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=0 < %s | \
+; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC
+; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 < %s -enable-new-pm=0 | \
+; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC
+; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 < %s | \
 ; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC
-; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=1 < %s | \
+; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=1 < %s -enable-new-pm=0 | \
+; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC-NOREMAT
+; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=1 < %s | \
 ; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC-NOREMAT
 
 ; Pre-Lollipop Android does not support ifunc.
-; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android20 < %s | \
+; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android20 < %s -enable-new-pm=0 | \
 ; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC
-; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android < %s | \
+; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android20 < %s | \
 ; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC
-; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android21 < %s | \
+; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android < %s -enable-new-pm=0 | \
+; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC
+; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android < %s | \
+; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-NOIFUNC
+; RUN: opt -asan -asan-module -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android21 < %s -enable-new-pm=0 | \
+; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC
+; RUN: opt -passes='asan-pipeline' -S -asan-with-ifunc=1 -asan-with-ifunc-suppress-remat=0 -mtriple=armv7-linux-android21 < %s | \
 ; RUN:     FileCheck %s --check-prefixes=CHECK,CHECK-IFUNC
 
 target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
diff --git a/llvm/tools/opt/NewPMDriver.cpp b/llvm/tools/opt/NewPMDriver.cpp
index b94c58decdda2..47c9cfc65e7ba 100644
--- a/llvm/tools/opt/NewPMDriver.cpp
+++ b/llvm/tools/opt/NewPMDriver.cpp
@@ -34,6 +34,7 @@
 #include "llvm/Support/ToolOutputFile.h"
 #include "llvm/Target/TargetMachine.h"
 #include "llvm/Transforms/IPO/ThinLTOBitcodeWriter.h"
+#include "llvm/Transforms/Instrumentation/AddressSanitizer.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
 #include "llvm/Transforms/Utils/Debugify.h"
 
@@ -297,6 +298,25 @@ bool llvm::runPassPipeline(StringRef Arg0, Module &M, TargetMachine *TM,
         }
         return false;
       });
+  PB.registerPipelineParsingCallback(
+      [](StringRef Name, ModulePassManager &MPM,
+         ArrayRef<PassBuilder::PipelineElement>) {
+        if (Name == "asan-pipeline") {
+          MPM.addPass(
+              RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
+          MPM.addPass(
+              createModuleToFunctionPassAdaptor(AddressSanitizerPass()));
+          MPM.addPass(ModuleAddressSanitizerPass());
+          return true;
+        } else if (Name == "asan-function-pipeline") {
+          MPM.addPass(
+              RequireAnalysisPass<ASanGlobalsMetadataAnalysis, Module>());
+          MPM.addPass(
+              createModuleToFunctionPassAdaptor(AddressSanitizerPass()));
+          return true;
+        }
+        return false;
+      });
 
 #define HANDLE_EXTENSION(Ext)                                                  \
   get##Ext##PluginInfo().RegisterPassBuilderCallbacks(PB);

From c12f11184682c55e10922665cea628332eb158eb Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Fri, 17 Jul 2020 18:29:47 -0700
Subject: [PATCH 699/771] Corrected __libfuzzer_is_present symbol inclusion for
 MSVC x86 32-bit

The incorrect symbol will cause linking failures for 32-bit targets:

clang_rt.fuzzer-i386.lib(FuzzerDriver.obj) : error LNK2001: unresolved external symbol __libfuzzer_is_present

Verified no longer fails to link with this change for 32-bit and still succeeds for 64-bit MSVC.

Reviewed By: vitalybuka

Differential Revision: https://reviews.llvm.org/D83594
---
 compiler-rt/lib/fuzzer/FuzzerDriver.cpp | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
index a847c76e292d7..00a33a413d2f3 100644
--- a/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
+++ b/compiler-rt/lib/fuzzer/FuzzerDriver.cpp
@@ -33,7 +33,11 @@
 // binary can test for its existence.
 #if LIBFUZZER_MSVC
 extern "C" void __libfuzzer_is_present() {}
+#if defined(_M_IX86) || defined(__i386__)
+#pragma comment(linker, "/include:___libfuzzer_is_present")
+#else
 #pragma comment(linker, "/include:__libfuzzer_is_present")
+#endif
 #else
 extern "C" __attribute__((used)) void __libfuzzer_is_present() {}
 #endif  // LIBFUZZER_MSVC

From cbf64b58345dd9c1f0032c4fce558ed2f1fd0fe4 Mon Sep 17 00:00:00 2001
From: "Joel E. Denny" <jdenny.ornl@gmail.com>
Date: Fri, 17 Jul 2020 21:35:21 -0400
Subject: [PATCH 700/771] [OpenMP] Fix map clause for unused var: don't ignore
 it

For example, without this patch:

```
 $ cat test.c
 int main() {
   int x[3];
   #pragma omp target map(tofrom:x[0:3])
 #ifdef USE
   x[0] = 1
 #endif
   ;
   return 0;
 }
 $ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -S -emit-llvm test.c
 $ grep '^@.offload_maptypes' test.ll
 $ echo $?
 1
 $ clang -fopenmp -fopenmp-targets=nvptx64-nvidia-cuda -S -emit-llvm test.c \
         -DUSE
 $ grep '^@.offload_maptypes' test.ll
 @.offload_maptypes = private unnamed_addr constant [1 x i64] [i64 35]
```

With this patch, both greps produce the same result.

Reviewed By: ABataev

Differential Revision: https://reviews.llvm.org/D83922
---
 clang/lib/CodeGen/CGOpenMPRuntime.cpp         |   63 +-
 clang/test/OpenMP/target_map_codegen.cpp      | 1175 +++++++++++------
 .../test/OpenMP/target_teams_map_codegen.cpp  |   13 +-
 3 files changed, 789 insertions(+), 462 deletions(-)

diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
index 89f403f2c82f4..f6d36bd84385f 100644
--- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp
+++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp
@@ -7977,7 +7977,10 @@ class MappableExprsHandler {
   /// CombinedInfo). Also, for each item that relates with a device pointer, a
   /// pair of the relevant declaration and index where it occurs is appended to
   /// the device pointers info array.
-  void generateAllInfo(MapCombinedInfoTy &CombinedInfo) const {
+  void generateAllInfo(
+      MapCombinedInfoTy &CombinedInfo,
+      const llvm::DenseSet<CanonicalDeclPtr<const Decl>> &SkipVarSet =
+          llvm::DenseSet<CanonicalDeclPtr<const Decl>>()) const {
     // We have to process the component lists that relate with the same
     // declaration in a single chunk so that we can generate the map flags
     // correctly. Therefore, we organize all lists in a map.
@@ -7986,14 +7989,17 @@ class MappableExprsHandler {
     // Helper function to fill the information map for the different supported
     // clauses.
     auto &&InfoGen =
-        [&Info](const ValueDecl *D,
-                OMPClauseMappableExprCommon::MappableExprComponentListRef L,
-                OpenMPMapClauseKind MapType,
-                ArrayRef<OpenMPMapModifierKind> MapModifiers,
-                bool ReturnDevicePointer, bool IsImplicit,
-                const ValueDecl *Mapper, bool ForDeviceAddr = false) {
+        [&Info, &SkipVarSet](
+            const ValueDecl *D,
+            OMPClauseMappableExprCommon::MappableExprComponentListRef L,
+            OpenMPMapClauseKind MapType,
+            ArrayRef<OpenMPMapModifierKind> MapModifiers,
+            bool ReturnDevicePointer, bool IsImplicit, const ValueDecl *Mapper,
+            bool ForDeviceAddr = false) {
           const ValueDecl *VD =
               D ? cast<ValueDecl>(D->getCanonicalDecl()) : nullptr;
+          if (SkipVarSet.count(VD))
+            return;
           Info[VD].emplace_back(L, MapType, MapModifiers, ReturnDevicePointer,
                                 IsImplicit, Mapper, ForDeviceAddr);
         };
@@ -8561,38 +8567,6 @@ class MappableExprsHandler {
     }
   }
 
-  /// Generate the base pointers, section pointers, sizes, map types, and
-  /// mappers associated with the declare target link variables (all included in
-  /// \a CombinedInfo).
-  void generateInfoForDeclareTargetLink(MapCombinedInfoTy &CombinedInfo) const {
-    assert(CurDir.is<const OMPExecutableDirective *>() &&
-           "Expect a executable directive");
-    const auto *CurExecDir = CurDir.get<const OMPExecutableDirective *>();
-    // Map other list items in the map clause which are not captured variables
-    // but "declare target link" global variables.
-    for (const auto *C : CurExecDir->getClausesOfKind<OMPMapClause>()) {
-      for (const auto L : C->component_lists()) {
-        if (!std::get<0>(L))
-          continue;
-        const auto *VD = dyn_cast_or_null<VarDecl>(std::get<0>(L));
-        if (!VD)
-          continue;
-        llvm::Optional<OMPDeclareTargetDeclAttr::MapTypeTy> Res =
-            OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD);
-        if (CGF.CGM.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
-            !Res || *Res != OMPDeclareTargetDeclAttr::MT_Link)
-          continue;
-        StructRangeInfoTy PartialStruct;
-        generateInfoForComponentList(
-            C->getMapType(), C->getMapTypeModifiers(), std::get<1>(L),
-            CombinedInfo, PartialStruct, /*IsFirstComponentList=*/true,
-            C->isImplicit());
-        assert(!PartialStruct.Base.isValid() &&
-               "No partial structs for declare target link expected.");
-      }
-    }
-  }
-
   /// Generate the default map information for a given capture \a CI,
   /// record field declaration \a RI and captured value \a CV.
   void generateDefaultMapInfo(const CapturedStmt::Capture &CI,
@@ -9521,6 +9495,7 @@ void CGOpenMPRuntime::emitTargetCall(
     // Get mappable expression information.
     MappableExprsHandler MEHandler(D, CGF);
     llvm::DenseMap<llvm::Value *, llvm::Value *> LambdaPointers;
+    llvm::DenseSet<CanonicalDeclPtr<const Decl>> MappedVarSet;
 
     auto RI = CS.getCapturedRecordDecl()->field_begin();
     auto CV = CapturedVars.begin();
@@ -9546,6 +9521,10 @@ void CGOpenMPRuntime::emitTargetCall(
         // If we have any information in the map clause, we use it, otherwise we
         // just do a default mapping.
         MEHandler.generateInfoForCapture(CI, *CV, CurInfo, PartialStruct);
+        if (!CI->capturesThis())
+          MappedVarSet.insert(CI->getCapturedVar());
+        else
+          MappedVarSet.insert(nullptr);
         if (CurInfo.BasePointers.empty())
           MEHandler.generateDefaultMapInfo(*CI, **RI, *CV, CurInfo);
         // Generate correct mapping for variables captured by reference in
@@ -9575,9 +9554,9 @@ void CGOpenMPRuntime::emitTargetCall(
     MEHandler.adjustMemberOfForLambdaCaptures(
         LambdaPointers, CombinedInfo.BasePointers, CombinedInfo.Pointers,
         CombinedInfo.Types);
-    // Map other list items in the map clause which are not captured variables
-    // but "declare target link" global variables.
-    MEHandler.generateInfoForDeclareTargetLink(CombinedInfo);
+    // Map any list items in a map clause that were not captures because they
+    // weren't referenced within the construct.
+    MEHandler.generateAllInfo(CombinedInfo, MappedVarSet);
 
     TargetDataInfo Info;
     // Fill up the arrays and create the arguments.
diff --git a/clang/test/OpenMP/target_map_codegen.cpp b/clang/test/OpenMP/target_map_codegen.cpp
index 69d0fc3c5f30c..2eab004eeff2e 100644
--- a/clang/test/OpenMP/target_map_codegen.cpp
+++ b/clang/test/OpenMP/target_map_codegen.cpp
@@ -1307,12 +1307,26 @@ void implicit_maps_template_type_capture (int a){
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefix CK19 --check-prefix CK19-64
+// RUN: %clang_cc1 -DUSE -DCK19 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefixes=CK19,CK19-64,CK19-USE
+// RUN: %clang_cc1 -DUSE -DCK19 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-64,CK19-USE
+// RUN: %clang_cc1 -DUSE -DCK19 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-32,CK19-USE
+// RUN: %clang_cc1 -DUSE -DCK19 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-32,CK19-USE
+
+// RUN: %clang_cc1 -DUSE -DCK19 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
+// RUN: %clang_cc1 -DUSE -DCK19 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
+// RUN: %clang_cc1 -DUSE -DCK19 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
+// RUN: %clang_cc1 -DUSE -DCK19 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
+
+// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefixes=CK19,CK19-64,CK19-NOUSE
 // RUN: %clang_cc1 -DCK19 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK19 --check-prefix CK19-64
-// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK19 --check-prefix CK19-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-64,CK19-NOUSE
+// RUN: %clang_cc1 -DCK19 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-32,CK19-NOUSE
 // RUN: %clang_cc1 -DCK19 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK19 --check-prefix CK19-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK19,CK19-32,CK19-NOUSE
 
 // RUN: %clang_cc1 -DCK19 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
 // RUN: %clang_cc1 -DCK19 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
@@ -1320,6 +1334,8 @@ void implicit_maps_template_type_capture (int a){
 // RUN: %clang_cc1 -DCK19 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
 // RUN: %clang_cc1 -DCK19 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY18 %s
+
+
 // SIMD-ONLY18-NOT: {{__kmpc|__tgt}}
 #ifdef CK19
 
@@ -1388,29 +1404,40 @@ void implicit_maps_template_type_capture (int a){
 // CK19: [[MTYPE15:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE16:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 33]
+// CK19-USE: [[MTYPE16:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 33]
+// CK19-NOUSE: [[MTYPE16:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE17:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 240]
-// CK19: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 34]
+// CK19-USE: [[SIZE17:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 240]
+// CK19-USE: [[MTYPE17:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 34]
+// CK19-NOUSE: [[SIZE17:@.+]] = private {{.*}}constant [1 x i64] [i64 240]
+// CK19-NOUSE: [[MTYPE17:@.+]] = private {{.*}}constant [1 x i64] [i64 34]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE18:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 240]
-// CK19: [[MTYPE18:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-USE: [[SIZE18:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 240]
+// CK19-USE: [[MTYPE18:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-NOUSE: [[SIZE18:@.+]] = private {{.*}}constant [1 x i64] [i64 240]
+// CK19-NOUSE: [[MTYPE18:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 32]
+// CK19-USE: [[MTYPE19:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 32]
+// CK19-NOUSE: [[MTYPE19:@.+]] = private {{.*}}constant [1 x i64] [i64 32]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE20:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 4]
-// CK19: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 33]
+// CK19-USE: [[SIZE20:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 4]
+// CK19-USE: [[MTYPE20:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 33]
+// CK19-NOUSE: [[SIZE20:@.+]] = private {{.*}}constant [1 x i64] [i64 4]
+// CK19-NOUSE: [[MTYPE20:@.+]] = private {{.*}}constant [1 x i64] [i64 33]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-USE: [[MTYPE21:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE21:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE22:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 4]
-// CK19: [[MTYPE22:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-USE: [[SIZE22:@.+]] = private {{.*}}constant [2 x i64] [i64 {{8|4}}, i64 4]
+// CK19-USE: [[MTYPE22:@.+]] = private {{.*}}constant [2 x i64] [i64 800, i64 35]
+// CK19-NOUSE: [[SIZE22:@.+]] = private {{.*}}constant [1 x i64] [i64 4]
+// CK19-NOUSE: [[MTYPE22:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK19: [[SIZE23:@.+]] = private {{.*}}constant [1 x i64] [i64 4]
@@ -1441,11 +1468,14 @@ void implicit_maps_template_type_capture (int a){
 // CK19: [[MTYPE29:@.+]] = private {{.*}}constant [3 x i64] [i64 35, i64 16, i64 19]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE30:@.+]] = private {{.*}}constant [4 x i64] [i64 800, i64 800, i64 800, i64 35]
+// CK19-USE: [[MTYPE30:@.+]] = private {{.*}}constant [4 x i64] [i64 800, i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE30:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE31:@.+]] = private {{.*}}constant [4 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 {{8|4}}, i64 40]
-// CK19: [[MTYPE31:@.+]] = private {{.*}}constant [4 x i64] [i64 800, i64 800, i64 800, i64 35]
+// CK19-USE: [[SIZE31:@.+]] = private {{.*}}constant [4 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 {{8|4}}, i64 40]
+// CK19-USE: [[MTYPE31:@.+]] = private {{.*}}constant [4 x i64] [i64 800, i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[SIZE31:@.+]] = private {{.*}}constant [1 x i64] [i64 40]
+// CK19-NOUSE: [[MTYPE31:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK19: [[SIZE32:@.+]] = private {{.*}}constant [1 x i64] [i64 13728]
@@ -1467,20 +1497,26 @@ void implicit_maps_template_type_capture (int a){
 // CK19: [[MTYPE36:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE37:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-USE: [[MTYPE37:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE37:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE38:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-USE: [[MTYPE38:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE38:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE39:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-USE: [[MTYPE39:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE39:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[MTYPE40:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-USE: [[MTYPE40:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[MTYPE40:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
-// CK19: [[SIZE41:@.+]] = private {{.*}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 208]
-// CK19: [[MTYPE41:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-USE: [[SIZE41:@.+]] = private {{.*}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 208]
+// CK19-USE: [[MTYPE41:@.+]] = private {{.*}}constant [3 x i64] [i64 800, i64 800, i64 35]
+// CK19-NOUSE: [[SIZE41:@.+]] = private {{.*}}constant [1 x i64] [i64 208]
+// CK19-NOUSE: [[MTYPE41:@.+]] = private {{.*}}constant [1 x i64] [i64 35]
 
 // CK19-LABEL: @.__omp_offloading_{{.*}}explicit_maps_single{{.*}}_l{{[0-9]+}}.region_id = weak constant i8 0
 // CK19: [[SIZE42:@.+]] = private {{.*}}constant [3 x i64] [i64 {{8|4}}, i64 {{8|4}}, i64 104]
@@ -1510,10 +1546,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
   // CK19-DAG: store i32* [[VAR0]], i32** [[CP0]]
 
-  // CK19: call void [[CALL00:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL00:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL00:@.+]]()
   #pragma omp target map(alloc:a)
   {
+#ifdef USE
     ++a;
+#endif
   }
 
   // Map of a scalar in nested region.
@@ -1531,11 +1570,14 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
   // CK19-DAG: store i32* [[VAR0]], i32** [[CP0]]
 
-  // CK19: call void [[CALL00n:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL00n:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL00n:@.+]]()
   #pragma omp target map(alloc:b)
   #pragma omp parallel
   {
+#ifdef USE
     ++b;
+#endif
   }
 
   // Map of an array.
@@ -1553,10 +1595,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store [100 x i32]* [[VAR0:%.+]], [100 x i32]** [[CBP0]]
   // CK19-DAG: store [100 x i32]* [[VAR0]], [100 x i32]** [[CP0]]
 
-  // CK19: call void [[CALL01:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL01:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL01:@.+]]()
   #pragma omp target map(to:arra)
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 02
@@ -1572,10 +1617,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 20
 
-  // CK19: call void [[CALL02:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL02:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL02:@.+]]()
   #pragma omp target map(from:arra[20:60])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 03
@@ -1591,10 +1639,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 0
 
-  // CK19: call void [[CALL03:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL03:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL03:@.+]]()
   #pragma omp target map(tofrom:arra[:60])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 04
@@ -1610,10 +1661,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 0
 
-  // CK19: call void [[CALL04:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL04:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL04:@.+]]()
   #pragma omp target map(alloc:arra[:])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 05
@@ -1629,10 +1683,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 15
 
-  // CK19: call void [[CALL05:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL05:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL05:@.+]]()
   #pragma omp target map(to:arra[15])
   {
+#ifdef USE
     arra[15]++;
+#endif
   }
 
   // Region 06
@@ -1652,10 +1709,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[CSVAL0]] = {{mul nuw i.+ %.*, 4|sext i32 .+ to i64}}
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} %{{.*}}
 
-  // CK19: call void [[CALL06:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL06:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL06:@.+]]()
   #pragma omp target map(tofrom:arra[ii:ii+23])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 07
@@ -1675,10 +1735,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[CSVAL0]] = {{mul nuw i.+ %.*, 4|sext i32 .+ to i64}}
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 0
 
-  // CK19: call void [[CALL07:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL07:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL07:@.+]]()
   #pragma omp target map(alloc:arra[:ii])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
   // Region 08
@@ -1694,10 +1757,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} %{{.*}}
 
-  // CK19: call void [[CALL08:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL08:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL08:@.+]]()
   #pragma omp target map(tofrom:arra[ii])
   {
+#ifdef USE
     arra[15]++;
+#endif
   }
 
   // Map of a pointer.
@@ -1715,10 +1781,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32** [[VAR0:%.+]], i32*** [[CBP0]]
   // CK19-DAG: store i32** [[VAR0]], i32*** [[CP0]]
 
-  // CK19: call void [[CALL09:@.+]](i32** {{[^,]+}})
+  // CK19-USE: call void [[CALL09:@.+]](i32** {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL09:@.+]]()
   #pragma omp target map(from:pa)
   {
+#ifdef USE
     pa[50]++;
+#endif
   }
 
   // Region 10
@@ -1736,10 +1805,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 20
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL10:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL10:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL10:@.+]]()
   #pragma omp target map(tofrom:pa[20:60])
   {
+#ifdef USE
     pa[50]++;
+#endif
   }
 
   // Region 11
@@ -1757,10 +1829,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 0
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL11:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL11:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL11:@.+]]()
   #pragma omp target map(alloc:pa[:60])
   {
+#ifdef USE
     pa[50]++;
+#endif
   }
 
   // Region 12
@@ -1778,10 +1853,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 15
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL12:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL12:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL12:@.+]]()
   #pragma omp target map(to:pa[15])
   {
+#ifdef USE
     pa[15]++;
+#endif
   }
 
   // Region 13
@@ -1803,10 +1881,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} %{{.*}}
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL13:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL13:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL13:@.+]]()
   #pragma omp target map(alloc:pa[ii-23:ii])
   {
+#ifdef USE
     pa[50]++;
+#endif
   }
 
   // Region 14
@@ -1828,10 +1909,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 0
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL14:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL14:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL14:@.+]]()
   #pragma omp target map(to:pa[:ii])
   {
+#ifdef USE
     pa[50]++;
+#endif
   }
 
   // Region 15
@@ -1849,212 +1933,300 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} %{{.*}}
   // CK19-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-  // CK19: call void [[CALL15:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL15:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL15:@.+]]()
   #pragma omp target map(from:pa[ii+12])
   {
+#ifdef USE
     pa[15]++;
+#endif
   }
 
   // Map of a variable-size array.
   int va[ii];
 
   // Region 16
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE16]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE16]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z:64|32]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[VAR1]], i32** [[CP1]]
-  // CK19-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
-  // CK19-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
-
-  // CK19: call void [[CALL16:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z:64|32]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[VAR1]], i32** [[CP1]]
+  // CK19-USE-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
+  // CK19-USE-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[VAR0]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: store i{{.+}} [[CSVAL0:%[^,]+]], i{{.+}}* [[S0]]
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+
+  // CK19-USE: call void [[CALL16:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL16:@.+]]()
   #pragma omp target map(to:va)
   {
+#ifdef USE
    va[50]++;
+#endif
   }
 
   // Region 17
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE17]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[SIZE17]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE17]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 20
-
-  // CK19: call void [[CALL17:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 20
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} 20
+
+  // CK19-USE: call void [[CALL17:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL17:@.+]]()
   #pragma omp target map(from:va[20:60])
   {
+#ifdef USE
    va[50]++;
+#endif
   }
 
   // Region 18
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE18]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[SIZE18]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE18]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 0
-
-  // CK19: call void [[CALL18:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 0
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} 0
+
+  // CK19-USE: call void [[CALL18:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL18:@.+]]()
   #pragma omp target map(tofrom:va[:60])
   {
+#ifdef USE
    va[50]++;
+#endif
   }
 
   // Region 19
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE19]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE19]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
-  // CK19-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 0
-
-  // CK19: call void [[CALL19:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
+  // CK19-USE-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 0
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: store i{{.+}} [[CSVAL0:%[^,]+]], i{{.+}}* [[S0]]
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} 0
+
+  // CK19-USE: call void [[CALL19:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL19:@.+]]()
   #pragma omp target map(alloc:va[:])
   {
+#ifdef USE
    va[50]++;
+#endif
   }
 
   // Region 20
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE20]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[SIZE20]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE20]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 15
-
-  // CK19: call void [[CALL20:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} 15
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} 15
+
+  // CK19-USE: call void [[CALL20:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL20:@.+]]()
   #pragma omp target map(to:va[15])
   {
+#ifdef USE
    va[15]++;
+#endif
   }
 
   // Region 21
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE21]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE21]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
-  // CK19-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} %{{.+}}
-
-  // CK19: call void [[CALL21:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i{{.+}} {{8|4}}, i{{.+}}* [[S0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: store i{{.+}} [[CSVAL1:%[^,]+]], i{{.+}}* [[S1]]
+  // CK19-USE-DAG: [[CSVAL1]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} %{{.+}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: store i{{.+}} [[CSVAL0:%[^,]+]], i{{.+}}* [[S0]]
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %.*, 4|sext i32 .+ to i64}}
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} %{{.+}}
+
+  // CK19-USE: call void [[CALL21:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL21:@.+]]()
   #pragma omp target map(tofrom:va[ii:ii+23])
   {
+#ifdef USE
    va[50]++;
+#endif
   }
 
   // Region 22
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 2, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[2 x i{{.+}}]* [[MTYPE22]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|2}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[SIZE22]], {{.+}}getelementptr {{.+}}[{{1|2}} x i{{.+}}]* [[MTYPE22]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
 
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
-
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
-  // CK19-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
-  // CK19-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
-  // CK19-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} %{{.+}}
-
-  // CK19: call void [[CALL22:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] {{%.+}}, i[[Z]]* [[CP0]]
+
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i32**
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i32**
+  // CK19-USE-DAG: store i32* [[VAR1:%.+]], i32** [[CBP1]]
+  // CK19-USE-DAG: store i32* [[SEC1:%.+]], i32** [[CP1]]
+  // CK19-USE-DAG: [[SEC1]] = getelementptr {{.*}}i32* [[VAR1]], i{{.+}} %{{.+}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i32**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i32**
+  // CK19-NOUSE-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
+  // CK19-NOUSE-DAG: store i32* [[SEC0:%.+]], i32** [[CP0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[VAR0]], i{{.+}} %{{.+}}
+
+  // CK19-USE: call void [[CALL22:@.+]](i{{.+}} {{[^,]+}}, i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL22:@.+]]()
   #pragma omp target map(tofrom:va[ii])
   {
+#ifdef USE
    va[15]++;
+#endif
   }
 
   // Always.
@@ -2070,10 +2242,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
   // CK19-DAG: store i32* [[VAR0]], i32** [[CP0]]
 
-  // CK19: call void [[CALL23:@.+]](i32* {{[^,]+}})
+  // CK19-USE: call void [[CALL23:@.+]](i32* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL23:@.+]]()
   #pragma omp target map(always, tofrom: a)
   {
+#ifdef USE
    a++;
+#endif
   }
 
   // Multidimensional arrays.
@@ -2092,10 +2267,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store [4 x [5 x [6 x i32]]]* [[VAR0:%.+]], [4 x [5 x [6 x i32]]]** [[CBP0]]
   // CK19-DAG: store [4 x [5 x [6 x i32]]]* [[VAR0]], [4 x [5 x [6 x i32]]]** [[CP0]]
 
-  // CK19: call void [[CALL24:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL24:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL24:@.+]]()
   #pragma omp target map(tofrom: marr)
   {
+#ifdef USE
    marr[1][2][3]++;
+#endif
   }
 
   // Region 25
@@ -2113,10 +2291,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.*}}[5 x [6 x i32]]* [[SEC000:[^,]+]], i{{.+}} 0, i{{.+}} 2
   // CK19-DAG: [[SEC000]] = getelementptr {{.*}}[4 x [5 x [6 x i32]]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1
 
-  // CK19: call void [[CALL25:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL25:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL25:@.+]]()
   #pragma omp target map(tofrom: marr[1][2][2:4])
   {
+#ifdef USE
    marr[1][2][3]++;
+#endif
   }
 
   // Region 26
@@ -2134,10 +2315,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.*}}[5 x [6 x i32]]* [[SEC000:[^,]+]], i{{.+}} 0, i{{.+}} 2
   // CK19-DAG: [[SEC000]] = getelementptr {{.*}}[4 x [5 x [6 x i32]]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1
 
-  // CK19: call void [[CALL26:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL26:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL26:@.+]]()
   #pragma omp target map(tofrom: marr[1][2][:])
   {
+#ifdef USE
    marr[1][2][3]++;
+#endif
   }
 
   // Region 27
@@ -2155,10 +2339,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.*}}[5 x [6 x i32]]* [[SEC000:[^,]+]], i{{.+}} 0, i{{.+}} 2
   // CK19-DAG: [[SEC000]] = getelementptr {{.*}}[4 x [5 x [6 x i32]]]* [[VAR0]], i{{.+}} 0, i{{.+}} 1
 
-  // CK19: call void [[CALL27:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL27:@.+]]([4 x [5 x [6 x i32]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL27:@.+]]()
   #pragma omp target map(tofrom: marr[1][2][3])
   {
+#ifdef USE
    marr[1][2][3]++;
+#endif
   }
 
   // Region 28
@@ -2200,10 +2387,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC22222]] = getelementptr {{.*}}i32*** [[SEC222222:[^,]+]], i{{.+}} 1
   // CK19-DAG: [[SEC222222]] = load i32***, i32**** [[PTR]],
 
-  // CK19: call void [[CALL28:@.+]](i32*** {{[^,]+}})
+  // CK19-USE: call void [[CALL28:@.+]](i32*** {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL28:@.+]]()
   #pragma omp target map(tofrom: mptr[1][2][2:4])
   {
+#ifdef USE
     mptr[1][2][3]++;
+#endif
   }
 
   // Region 29
@@ -2245,110 +2435,141 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC22222]] = getelementptr {{.*}}i32*** [[SEC222222:[^,]+]], i{{.+}} 1
   // CK19-DAG: [[SEC222222]] = load i32***, i32**** [[PTR]],
 
-  // CK19: call void [[CALL29:@.+]](i32*** {{[^,]+}})
+  // CK19-USE: call void [[CALL29:@.+]](i32*** {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL29:@.+]]()
   #pragma omp target map(tofrom: mptr[1][2][3])
   {
+#ifdef USE
     mptr[1][2][3]++;
+#endif
   }
 
   // Multidimensional VLA.
   double mva[23][ii][ii+5];
 
   // Region 30
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE30]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|4}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|4}} x i{{.+}}]* [[MTYPE30]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 23, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 23, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 23, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 23, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S1]]
-  // CK19-64-DAG: [[VAR1]] = zext i32 %{{[^,]+}} to i64
-  // CK19-64-DAG: [[VAR11]] = zext i32 %{{[^,]+}} to i64
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
+  // CK19-64-USE-DAG: [[VAR1]] = zext i32 %{{[^,]+}} to i64
+  // CK19-64-USE-DAG: [[VAR11]] = zext i32 %{{[^,]+}} to i64
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to i[[Z]]*
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR2:%.+]], i[[Z]]* [[CBP2]]
-  // CK19-DAG: store i[[Z]] [[VAR22:%.+]], i[[Z]]* [[CP2]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S2]]
-  // CK19-64-DAG: [[VAR2]] = zext i32 %{{[^,]+}} to i64
-  // CK19-64-DAG: [[VAR22]] = zext i32 %{{[^,]+}} to i64
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR2:%.+]], i[[Z]]* [[CBP2]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR22:%.+]], i[[Z]]* [[CP2]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S2]]
+  // CK19-64-USE-DAG: [[VAR2]] = zext i32 %{{[^,]+}} to i64
+  // CK19-64-USE-DAG: [[VAR22]] = zext i32 %{{[^,]+}} to i64
   //
-  // CK19-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
-  // CK19-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-  // CK19-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3
-  // CK19-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to double**
-  // CK19-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to double**
-  // CK19-DAG: store double* [[VAR3:%.+]], double** [[CBP3]]
-  // CK19-DAG: store double* [[VAR3]], double** [[CP3]]
-  // CK19-DAG: store i64 [[CSVAL3:%[^,]+]], i64* [[S3]]
-  // CK19-DAG: [[CSVAL3]] = {{mul nuw i64 %[^,]+, 8|sext i32 .+ to i64}}
-
-  // CK19: call void [[CALL30:@.+]](i[[Z]] 23, i[[Z]] %{{[^,]+}}, i[[Z]] %{{[^,]+}}, double* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
+  // CK19-USE-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
+  // CK19-USE-DAG: [[S3:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 3
+  // CK19-USE-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to double**
+  // CK19-USE-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to double**
+  // CK19-USE-DAG: store double* [[VAR3:%.+]], double** [[CBP3]]
+  // CK19-USE-DAG: store double* [[VAR3]], double** [[CP3]]
+  // CK19-USE-DAG: store i64 [[CSVAL3:%[^,]+]], i64* [[S3]]
+  // CK19-USE-DAG: [[CSVAL3]] = {{mul nuw i64 %[^,]+, 8|sext i32 .+ to i64}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to double**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double**
+  // CK19-NOUSE-DAG: store double* [[VAR0:%.+]], double** [[CBP0]]
+  // CK19-NOUSE-DAG: store double* [[VAR0]], double** [[CP0]]
+  // CK19-NOUSE-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 8|sext i32 .+ to i64}}
+
+  // CK19-USE: call void [[CALL30:@.+]](i[[Z]] 23, i[[Z]] %{{[^,]+}}, i[[Z]] %{{[^,]+}}, double* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL30:@.+]]()
   #pragma omp target map(tofrom: mva)
   {
+#ifdef USE
     mva[1][2][3]++;
+#endif
   }
 
   // Region 31
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 4, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[SIZE31]], {{.+}}getelementptr {{.+}}[4 x i{{.+}}]* [[MTYPE31]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|4}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|4}} x i{{.+}}]* [[SIZE31]], {{.+}}getelementptr {{.+}}[{{1|4}} x i{{.+}}]* [[MTYPE31]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 23, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 23, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 23, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 23, i[[Z]]* [[CP0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to i[[Z]]*
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR2:%.+]], i[[Z]]* [[CBP2]]
-  // CK19-DAG: store i[[Z]] [[VAR22:%.+]], i[[Z]]* [[CP2]]
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR2:%.+]], i[[Z]]* [[CBP2]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR22:%.+]], i[[Z]]* [[CP2]]
   //
-  // CK19-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
-  // CK19-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
-  // CK19-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to double**
-  // CK19-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to double**
-  // CK19-DAG: store double* [[VAR3:%.+]], double** [[CBP3]]
-  // CK19-DAG: store double* [[SEC3:%.+]], double** [[CP3]]
-  // CK19-DAG: [[SEC3]] = getelementptr {{.*}}double* [[SEC33:%.+]], i[[Z]] 0
-  // CK19-DAG: [[SEC33]] = getelementptr {{.*}}double* [[SEC333:%.+]], i[[Z]] [[IDX3:%.+]]
-  // CK19-DAG: [[IDX3]] = mul nsw i[[Z]] %{{[^,]+}}, %{{[^,]+}}
-  // CK19-DAG: [[SEC333]] = getelementptr {{.*}}double* [[VAR3]], i[[Z]] [[IDX33:%.+]]
-  // CK19-DAG: [[IDX33]] = mul nsw i[[Z]] 1, %{{[^,]+}}
-
-  // CK19: call void [[CALL31:@.+]](i[[Z]] 23, i[[Z]] %{{[^,]+}}, i[[Z]] %{{[^,]+}}, double* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP3:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 3
+  // CK19-USE-DAG: [[P3:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 3
+  // CK19-USE-DAG: [[CBP3:%.+]] = bitcast i8** [[BP3]] to double**
+  // CK19-USE-DAG: [[CP3:%.+]] = bitcast i8** [[P3]] to double**
+  // CK19-USE-DAG: store double* [[VAR3:%.+]], double** [[CBP3]]
+  // CK19-USE-DAG: store double* [[SEC3:%.+]], double** [[CP3]]
+  // CK19-USE-DAG: [[SEC3]] = getelementptr {{.*}}double* [[SEC33:%.+]], i[[Z]] 0
+  // CK19-USE-DAG: [[SEC33]] = getelementptr {{.*}}double* [[SEC333:%.+]], i[[Z]] [[IDX3:%.+]]
+  // CK19-USE-DAG: [[IDX3]] = mul nsw i[[Z]] %{{[^,]+}}, %{{[^,]+}}
+  // CK19-USE-DAG: [[SEC333]] = getelementptr {{.*}}double* [[VAR3]], i[[Z]] [[IDX33:%.+]]
+  // CK19-USE-DAG: [[IDX33]] = mul nsw i[[Z]] 1, %{{[^,]+}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to double**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to double**
+  // CK19-NOUSE-DAG: store double* [[VAR0:%.+]], double** [[CBP0]]
+  // CK19-NOUSE-DAG: store double* [[SEC0:%.+]], double** [[CP0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.*}}double* [[SEC00:%.+]], i[[Z:64|32]] 0
+  // CK19-NOUSE-DAG: [[SEC00]] = getelementptr {{.*}}double* [[SEC000:%.+]], i[[Z]] [[IDX0:%.+]]
+  // CK19-NOUSE-DAG: [[IDX0]] = mul nsw i[[Z]] %{{[^,]+}}, %{{[^,]+}}
+  // CK19-NOUSE-DAG: [[SEC000]] = getelementptr {{.*}}double* [[VAR0]], i[[Z]] [[IDX00:%.+]]
+  // CK19-NOUSE-DAG: [[IDX00]] = mul nsw i[[Z]] 1, %{{[^,]+}}
+
+  // CK19-USE: call void [[CALL31:@.+]](i[[Z]] 23, i[[Z]] %{{[^,]+}}, i[[Z]] %{{[^,]+}}, double* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL31:@.+]]()
   #pragma omp target map(tofrom: mva[1][ii-2][:5])
   {
+#ifdef USE
     mva[1][2][3]++;
+#endif
   }
 
   // Multidimensional array sections.
@@ -2368,10 +2589,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store [11 x [12 x [13 x double]]]* [[VAR0:%.+]], [11 x [12 x [13 x double]]]** [[CBP0]]
   // CK19-DAG: store [11 x [12 x [13 x double]]]* [[VAR0]], [11 x [12 x [13 x double]]]** [[CP0]]
 
-  // CK19: call void [[CALL32:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL32:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL32:@.+]]()
   #pragma omp target map(marras)
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 33
@@ -2387,10 +2611,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store [12 x [13 x double]]* [[SEC0:%.+]], [12 x [13 x double]]** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.+}}[11 x [12 x [13 x double]]]* [[VAR0]], i[[Z]] 0, i[[Z]] 0
 
-  // CK19: call void [[CALL33:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL33:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL33:@.+]]()
   #pragma omp target map(marras[:])
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 34
@@ -2406,10 +2633,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store [12 x [13 x double]]* [[SEC0:%.+]], [12 x [13 x double]]** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.+}}[11 x [12 x [13 x double]]]* [[VAR0]], i[[Z]] 0, i[[Z]] 0
 
-  // CK19: call void [[CALL34:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL34:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL34:@.+]]()
   #pragma omp target map(marras[:][:][:])
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 35
@@ -2431,10 +2661,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.+}}[11 x [12 x [13 x double]]]* [[VAR0]], i[[Z]] 0, i[[Z]] 1
   // CK19-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
 
-  // CK19: call void [[CALL35:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL35:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL35:@.+]]()
   #pragma omp target map(marras[1][:ii][:])
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 36
@@ -2452,211 +2685,285 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.+}}[12 x [13 x double]]* [[SEC000:%[^,]+]], i{{.+}} 0, i{{.+}} 0
   // CK19-DAG: [[SEC000]] = getelementptr {{.+}}[11 x [12 x [13 x double]]]* [[VAR0]], i{{.+}} 0, i{{.+}} 0
 
-  // CK19: call void [[CALL36:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL36:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL36:@.+]]()
   #pragma omp target map(marras[:1][:2][:13])
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 37
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE37]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|3}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[MTYPE37]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
-  // CK19-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
-  // CK19-DAG: store [13 x double]* [[VAR2]], [13 x double]** [[CP2]]
-  // CK19-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
-  // CK19-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
-
-  // CK19: call void [[CALL37:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
+  // CK19-USE-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
+  // CK19-USE-DAG: store [13 x double]* [[VAR2]], [13 x double]** [[CP2]]
+  // CK19-USE-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
+  // CK19-USE-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [13 x double]**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [13 x double]**
+  // CK19-NOUSE-DAG: store [13 x double]* [[VAR0:%.+]], [13 x double]** [[CBP0]]
+  // CK19-NOUSE-DAG: store [13 x double]* [[VAR0]], [13 x double]** [[CP0]]
+  // CK19-NOUSE-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-USE: call void [[CALL37:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL37:@.+]]()
   #pragma omp target map(mvlaas)
   {
+#ifdef USE
     mvlaas[1][2][3]++;
+#endif
   }
 
   // Region 38
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE38]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|3}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[MTYPE38]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
-  // CK19-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
-  // CK19-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
-  // CK19-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
-  // CK19-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC22:%[^,]+]]
-  // CK19-DAG: [[SEC22]] = mul nsw i[[Z]] 0, %{{[^,]+}}
-  // CK19-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
-
-  // CK19: call void [[CALL38:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
+  // CK19-USE-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
+  // CK19-USE-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
+  // CK19-USE-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
+  // CK19-USE-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC22:%[^,]+]]
+  // CK19-USE-DAG: [[SEC22]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+  // CK19-USE-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [13 x double]**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [13 x double]**
+  // CK19-NOUSE-DAG: store [13 x double]* [[VAR0:%.+]], [13 x double]** [[CBP0]]
+  // CK19-NOUSE-DAG: store [13 x double]* [[SEC0:%.+]], [13 x double]** [[CP0]]
+  // CK19-NOUSE-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.+}}[13 x double]* [[VAR0]], i[[Z]] [[SEC00:%[^,]+]]
+  // CK19-NOUSE-DAG: [[SEC00]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-USE: call void [[CALL38:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL38:@.+]]()
   #pragma omp target map(mvlaas[:])
   {
+#ifdef USE
     mvlaas[1][2][3]++;
+#endif
   }
 
   // Region 39
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE39]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|3}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[MTYPE39]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
-  // CK19-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
-  // CK19-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
-  // CK19-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
-  // CK19-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC22:%[^,]+]]
-  // CK19-DAG: [[SEC22]] = mul nsw i[[Z]] 0, %{{[^,]+}}
-  // CK19-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
-
-  // CK19: call void [[CALL39:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
+  // CK19-USE-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
+  // CK19-USE-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
+  // CK19-USE-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
+  // CK19-USE-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC22:%[^,]+]]
+  // CK19-USE-DAG: [[SEC22]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+  // CK19-USE-DAG: [[CSVAL2]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [13 x double]**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [13 x double]**
+  // CK19-NOUSE-DAG: store [13 x double]* [[VAR0:%.+]], [13 x double]** [[CBP0]]
+  // CK19-NOUSE-DAG: store [13 x double]* [[SEC0:%.+]], [13 x double]** [[CP0]]
+  // CK19-NOUSE-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.+}}[13 x double]* [[VAR0]], i[[Z]] [[SEC00:%[^,]+]]
+  // CK19-NOUSE-DAG: [[SEC00]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+  // CK19-NOUSE-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
+
+  // CK19-USE: call void [[CALL39:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL39:@.+]]()
   #pragma omp target map(mvlaas[:][:][:])
   {
+#ifdef USE
     mvlaas[1][2][3]++;
+#endif
   }
 
   // Region 40
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE40]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|3}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], i64* [[GEPS:%.+]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[MTYPE40]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   // CK19-DAG: [[GEPS]] = getelementptr inbounds {{.+}}[[S:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
-  // CK19-DAG: store i64 {{8|4}}, i64* [[S1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[S1:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: store i64 {{8|4}}, i64* [[S1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
-  // CK19-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
-  // CK19-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
-  // CK19-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
-  // CK19-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[SEC22:%[^,]+]], i[[Z]] 0
-  // CK19-DAG: [[SEC22]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC222:%[^,]+]]
-  // CK19-DAG: [[SEC222]] = mul nsw i[[Z]] 1, %{{[^,]+}}
-
-  // CK19: call void [[CALL40:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[S2:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
+  // CK19-USE-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
+  // CK19-USE-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
+  // CK19-USE-DAG: store i64 [[CSVAL2:%[^,]+]], i64* [[S2]]
+  // CK19-USE-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[SEC22:%[^,]+]], i[[Z]] 0
+  // CK19-USE-DAG: [[SEC22]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC222:%[^,]+]]
+  // CK19-USE-DAG: [[SEC222]] = mul nsw i[[Z]] 1, %{{[^,]+}}
+
+  // CK19-NOUSE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[S0:%.+]] = getelementptr inbounds {{.+}}[[S]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NOUSE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [13 x double]**
+  // CK19-NOUSE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [13 x double]**
+  // CK19-NOUSE-DAG: store [13 x double]* [[VAR0:%.+]], [13 x double]** [[CBP0]]
+  // CK19-NOUSE-DAG: store [13 x double]* [[SEC0:%.+]], [13 x double]** [[CP0]]
+  // CK19-NOUSE-DAG: store i64 [[CSVAL0:%[^,]+]], i64* [[S0]]
+  // CK19-NOUSE-DAG: [[SEC0]] = getelementptr {{.+}}[13 x double]* [[SEC00:%[^,]+]], i[[Z]] 0
+  // CK19-NOUSE-DAG: [[SEC00]] = getelementptr {{.+}}[13 x double]* [[VAR0]], i[[Z]] [[SEC000:%[^,]+]]
+  // CK19-NOUSE-DAG: [[SEC000]] = mul nsw i[[Z]] 1, %{{[^,]+}}
+
+  // CK19-USE: call void [[CALL40:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL40:@.+]]()
   #pragma omp target map(mvlaas[1][:ii][:])
   {
+#ifdef USE
     mvlaas[1][2][3]++;
+#endif
   }
 
   // Region 41
-  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 3, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[SIZE41]], {{.+}}getelementptr {{.+}}[3 x i{{.+}}]* [[MTYPE41]]{{.+}}, i8** null)
+  // CK19-DAG: call i32 @__tgt_target_mapper(i64 {{[^,]+}}, i8* {{[^,]+}}, i32 {{1|3}}, i8** [[GEPBP:%.+]], i8** [[GEPP:%.+]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[SIZE41]], {{.+}}getelementptr {{.+}}[{{1|3}} x i{{.+}}]* [[MTYPE41]]{{.+}}, i8** null)
   // CK19-DAG: [[GEPBP]] = getelementptr inbounds {{.+}}[[BP:%[^,]+]]
   // CK19-DAG: [[GEPP]] = getelementptr inbounds {{.+}}[[P:%[^,]+]]
   //
-  // CK19-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
-  // CK19-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
-  // CK19-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
-  // CK19-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
+  // CK19-USE-DAG: [[BP0:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CBP0]]
+  // CK19-USE-DAG: store i[[Z]] 11, i[[Z]]* [[CP0]]
   //
-  // CK19-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
-  // CK19-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
-  // CK19-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
-  // CK19-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
-  // CK19-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
+  // CK19-USE-DAG: [[BP1:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[P1:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 1
+  // CK19-USE-DAG: [[CBP1:%.+]] = bitcast i8** [[BP1]] to i[[Z]]*
+  // CK19-USE-DAG: [[CP1:%.+]] = bitcast i8** [[P1]] to i[[Z]]*
+  // CK19-USE-DAG: store i[[Z]] [[VAR1:%.+]], i[[Z]]* [[CBP1]]
+  // CK19-USE-DAG: store i[[Z]] [[VAR11:%.+]], i[[Z]]* [[CP1]]
   //
-  // CK19-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
-  // CK19-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
-  // CK19-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
-  // CK19-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
-  // CK19-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
-  // CK19-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[SEC22:%[^,]+]], i[[Z]] 0
-  // CK19-DAG: [[SEC22]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC222:%[^,]+]]
-  // CK19-DAG: [[SEC222]] = mul nsw i[[Z]] 0, %{{[^,]+}}
-
-  // CK19: call void [[CALL41:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[P2:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 2
+  // CK19-USE-DAG: [[CBP2:%.+]] = bitcast i8** [[BP2]] to [13 x double]**
+  // CK19-USE-DAG: [[CP2:%.+]] = bitcast i8** [[P2]] to [13 x double]**
+  // CK19-USE-DAG: store [13 x double]* [[VAR2:%.+]], [13 x double]** [[CBP2]]
+  // CK19-USE-DAG: store [13 x double]* [[SEC2:%.+]], [13 x double]** [[CP2]]
+  // CK19-USE-DAG: [[SEC2]] = getelementptr {{.+}}[13 x double]* [[SEC22:%[^,]+]], i[[Z]] 0
+  // CK19-USE-DAG: [[SEC22]] = getelementptr {{.+}}[13 x double]* [[VAR2]], i[[Z]] [[SEC222:%[^,]+]]
+  // CK19-USE-DAG: [[SEC222]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+  // CK19-USE-DAG: [[BP2:%.+]] = getelementptr inbounds {{.+}}[[BP]], i{{.+}} 0, i{{.+}} 2
+
+  // CK19-NO-USE-DAG: [[P0:%.+]] = getelementptr inbounds {{.+}}[[P]], i{{.+}} 0, i{{.+}} 0
+  // CK19-NO-USE-DAG: [[CBP0:%.+]] = bitcast i8** [[BP0]] to [13 x double]**
+  // CK19-NO-USE-DAG: [[CP0:%.+]] = bitcast i8** [[P0]] to [13 x double]**
+  // CK19-NO-USE-DAG: store [13 x double]* [[VAR0:%.+]], [13 x double]** [[CBP0]]
+  // CK19-NO-USE-DAG: store [13 x double]* [[SEC0:%.+]], [13 x double]** [[CP0]]
+  // CK19-NO-USE-DAG: [[SEC0]] = getelementptr {{.+}}[13 x double]* [[SEC00:%[^,]+]], i[[Z]] 0
+  // CK19-NO-USE-DAG: [[SEC00]] = getelementptr {{.+}}[13 x double]* [[VAR0]], i[[Z]] [[SEC000:%[^,]+]]
+  // CK19-NO-USE-DAG: [[SEC000]] = mul nsw i[[Z]] 0, %{{[^,]+}}
+
+  // CK19-USE: call void [[CALL41:@.+]](i[[Z]] 11, i[[Z]] %{{[^,]+}}, [13 x double]* %{{[^,]+}})
+  // CK19-NOUSE: call void [[CALL41:@.+]]()
   #pragma omp target map(mvlaas[:1][:2][:13])
   {
+#ifdef USE
     mvlaas[1][2][3]++;
+#endif
   }
 
   // Region 42
@@ -2698,10 +3005,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC22222]] = getelementptr {{.*}}double*** [[SEC222222:[^,]+]], i{{.+}} 0
   // CK19-DAG: [[SEC222222]] = load double***, double**** [[PTR]],
 
-  // CK19: call void [[CALL42:@.+]](double*** {{[^,]+}})
+  // CK19-USE: call void [[CALL42:@.+]](double*** {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL42:@.+]]()
   #pragma omp target map(mptras[:1][2][:13])
   {
+#ifdef USE
     mptras[1][2][3]++;
+#endif
   }
 
   // Region 43 - the memory is not contiguous for this map - will map the whole last dimension.
@@ -2723,10 +3033,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: [[SEC00]] = getelementptr {{.+}}[11 x [12 x [13 x double]]]* [[VAR0]], i[[Z]] 0, i[[Z]] 1
   // CK19-DAG: [[CSVAL0]] = {{mul nuw i64 %[^,]+, 104|sext i32 .+ to i64}}
 
-  // CK19: call void [[CALL43:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-USE: call void [[CALL43:@.+]]([11 x [12 x [13 x double]]]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL43:@.+]]()
   #pragma omp target map(marras[1][:ii][1:])
   {
+#ifdef USE
     marras[1][2][3]++;
+#endif
   }
 
   // Region 44
@@ -2742,10 +3055,13 @@ void explicit_maps_single (int ii){
   // CK19-DAG: store i32* [[SEC0:%[^,]+]], i32** [[CP0]]
   // CK19-DAG: [[SEC0]] = getelementptr {{.*}}[100 x i32]* [[VAR0]], i{{.+}} 0, i{{.+}} 20
 
-  // CK19: call void [[CALL44:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-USE: call void [[CALL44:@.+]]([100 x i32]* {{[^,]+}})
+  // CK19-NOUSE: call void [[CALL44:@.+]]()
   #pragma omp target map(from:arra[20:])
   {
+#ifdef USE
     arra[50]++;
+#endif
   }
 
 }
@@ -2926,12 +3242,26 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
 
 #endif
 ///==========================================================================///
-// RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefix CK21 --check-prefix CK21-64
+// RUN: %clang_cc1 -DUSE -DCK21 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefixes=CK21,CK21-64,CK21-USE
+// RUN: %clang_cc1 -DUSE -DCK21 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-64,CK21-USE
+// RUN: %clang_cc1 -DUSE -DCK21 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-32,CK21-USE
+// RUN: %clang_cc1 -DUSE -DCK21 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-32,CK21-USE
+
+// RUN: %clang_cc1 -DUSE -DCK21 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
+// RUN: %clang_cc1 -DUSE -DCK21 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
+// RUN: %clang_cc1 -DUSE -DCK21 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
+// RUN: %clang_cc1 -DUSE -DCK21 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
+// RUN: %clang_cc1 -DUSE -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
+
+// RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s --check-prefixes=CK21,CK21-64,CK21-NOUSE
 // RUN: %clang_cc1 -DCK21 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK21 --check-prefix CK21-64
-// RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK21 --check-prefix CK21-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-64,CK21-NOUSE
+// RUN: %clang_cc1 -DCK21 -verify -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-32,CK21-NOUSE
 // RUN: %clang_cc1 -DCK21 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
-// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefix CK21 --check-prefix CK21-32
+// RUN: %clang_cc1 -fopenmp -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  %s  --check-prefixes=CK21,CK21-32,CK21-NOUSE
 
 // RUN: %clang_cc1 -DCK21 -verify -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
 // RUN: %clang_cc1 -DCK21 -fopenmp-simd -fopenmp-targets=powerpc64le-ibm-linux-gnu -x c++ -std=c++11 -triple powerpc64le-unknown-unknown -emit-pch -o %t %s
@@ -2939,6 +3269,7 @@ void explicit_maps_references_and_function_args (int a, float b, int (&c)[10], f
 // RUN: %clang_cc1 -DCK21 -verify -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -emit-llvm %s -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
 // RUN: %clang_cc1 -DCK21 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -std=c++11 -triple i386-unknown-unknown -emit-pch -o %t %s
 // RUN: %clang_cc1 -fopenmp-simd -fopenmp-targets=i386-pc-linux-gnu -x c++ -triple i386-unknown-unknown -std=c++11 -include-pch %t -verify %s -emit-llvm -o - | FileCheck -allow-deprecated-dag-overlap  --check-prefix SIMD-ONLY20 %s
+
 // SIMD-ONLY20-NOT: {{__kmpc|__tgt}}
 #ifdef CK21
 // CK21: [[ST:%.+]] = type { i32, i32, float* }
@@ -3002,10 +3333,13 @@ struct CC {
     // CK21-DAG: store i64 {{.+}}, i64* [[S1]]
     // CK21-DAG: [[SEC1]] = getelementptr {{.*}}[[ST]]* [[VAR1:%.+]], i{{.+}} 0, i{{.+}} 0
 
-    // CK21: call void [[CALL00:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-USE: call void [[CALL00:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL00:@.+]]()
     #pragma omp target map(A)
     {
+#ifdef USE
       A += 1;
+#endif
     }
 
     // Region 01
@@ -3023,10 +3357,13 @@ struct CC {
     // CK21-DAG: [[SEC0]] = getelementptr {{.*}}i32* [[RVAR00:%.+]], i{{.+}} 0
     // CK21-DAG: [[RVAR00]] = load i32*, i32** [[VAR0]]
 
-    // CK21: call void [[CALL01:@.+]](i32* {{[^,]+}})
+    // CK21-USE: call void [[CALL01:@.+]](i32* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL01:@.+]]()
     #pragma omp target map(lb[:X])
     {
+#ifdef USE
       lb[4] += 1;
+#endif
     }
 
     // Region 02
@@ -3057,10 +3394,13 @@ struct CC {
     // CK21-DAG: [[RVAR1]] = load float*, float** [[SEC1_:%[^,]+]]
     // CK21-DAG: [[SEC1_]] = getelementptr {{.*}}[[ST]]* [[VAR0]], i{{.+}} 0, i{{.+}} 2
 
-    // CK21: call void [[CALL02:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-USE: call void [[CALL02:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL02:@.+]]()
     #pragma omp target map(from:B[X:X+2])
     {
+#ifdef USE
       B[2] += 1.0f;
+#endif
     }
 
     // Region 03
@@ -3075,10 +3415,13 @@ struct CC {
     // CK21-DAG: store [123 x float]* [[VAR0:%.+]], [123 x float]** [[CBP0]]
     // CK21-DAG: store [123 x float]* [[VAR0]], [123 x float]** [[CP0]]
 
-    // CK21: call void [[CALL03:@.+]]([123 x float]* {{[^,]+}})
+    // CK21-USE: call void [[CALL03:@.+]]([123 x float]* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL03:@.+]]()
     #pragma omp target map(from:la)
     {
+#ifdef USE
       la[3] += 1.0f;
+#endif
     }
 
     // Region 04
@@ -3093,10 +3436,13 @@ struct CC {
     // CK21-DAG: store i32* [[VAR0:%.+]], i32** [[CBP0]]
     // CK21-DAG: store i32* [[VAR0]], i32** [[CP0]]
 
-    // CK21: call void [[CALL04:@.+]](i32* {{[^,]+}})
+    // CK21-USE: call void [[CALL04:@.+]](i32* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL04:@.+]]()
     #pragma omp target map(from:arg)
     {
+#ifdef USE
       arg +=1;
+#endif
     }
 
     // Make sure the extra flag is passed to the second map.
@@ -3135,11 +3481,14 @@ struct CC {
     // CK21-DAG: store i64 {{.+}}, i64* [[S2]]
     // CK21-DAG: [[SEC2]] = getelementptr {{.*}}[[ST]]* [[VAR2]], i{{.+}} 0, i{{.+}} 1
 
-    // CK21: call void [[CALL05:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-USE: call void [[CALL05:@.+]]([[ST]]* {{[^,]+}})
+    // CK21-NOUSE: call void [[CALL05:@.+]]()
     #pragma omp target map(A, A2)
     {
+#ifdef USE
       A += 1;
       A2 += 1;
+#endif
     }
     return A;
   }
diff --git a/clang/test/OpenMP/target_teams_map_codegen.cpp b/clang/test/OpenMP/target_teams_map_codegen.cpp
index 00d2839781075..328e45a652a91 100644
--- a/clang/test/OpenMP/target_teams_map_codegen.cpp
+++ b/clang/test/OpenMP/target_teams_map_codegen.cpp
@@ -20,15 +20,16 @@
 #ifndef HEADER
 #define HEADER
 
+// HOST: @[[MAPTYPES_PRIVATE:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
 // HOST: @[[MAPTYPES_FIRSTPRIVATE:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
 // HOST: @[[MAPTYPES_REDUCTION:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
 // HOST: @[[MAPTYPES_FROM:.offload_maptypes[0-9.]*]] = private {{.*}}constant [1 x i64] [i64 34]
 // HOST: @[[MAPTYPES_TO:.offload_maptypes[0-9.]*]] = private {{.*}}constant [1 x i64] [i64 33]
 // HOST: @[[MAPTYPES_ALLOC:.offload_maptypes[0-9.]*]] = private {{.*}}constant [1 x i64] [i64 32]
-// HOST: @[[MAPTYPES_ARRAY_R0:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
-// HOST: @[[MAPTYPES_ARRAY_R1:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 33, i64 33]
-// HOST-INT128: @[[MAPTYPES_INT128_R0:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 35, i64 35]
-// HOST-INT128: @[[MAPTYPES_INT128_R1:.offload_maptypes[0-9.]*]] = private {{.*}}constant [2 x i64] [i64 34, i64 34]
+// HOST: @[[MAPTYPES_ARRAY_R0:.offload_maptypes[0-9.]*]] = private {{.*}}constant [3 x i64] [i64 35, i64 35, i64 35]
+// HOST: @[[MAPTYPES_ARRAY_R1:.offload_maptypes[0-9.]*]] = private {{.*}}constant [3 x i64] [i64 33, i64 33, i64 33]
+// HOST-INT128: @[[MAPTYPES_INT128_R0:.offload_maptypes[0-9.]*]] = private {{.*}}constant [3 x i64] [i64 35, i64 35, i64 35]
+// HOST-INT128: @[[MAPTYPES_INT128_R1:.offload_maptypes[0-9.]*]] = private {{.*}}constant [3 x i64] [i64 34, i64 34, i64 34]
 //
 // CHECK: @.omp_offloading.entry_name{{[0-9.]*}} = {{.*}} c"[[OFFLOAD_PRIVATE:__omp_offloading_[^"\\]*mapWithPrivate[^"\\]*]]\00"
 // CHECK: @.omp_offloading.entry_name{{[0-9.]*}} = {{.*}} c"[[OFFLOAD_FIRSTPRIVATE:__omp_offloading_[^"\\]*mapWithFirstprivate[^"\\]*]]\00"
@@ -42,9 +43,7 @@
 // INT128: @.omp_offloading.entry_name{{[0-9.]*}} = {{.*}} c"[[OFFLOAD_INT128_R1:__omp_offloading_[^"\\]*mapInt128[^"\\]*]]\00"
 
 // HOST: define {{.*}}mapWithPrivate
-// HOST: call {{.*}} @.[[OFFLOAD_PRIVATE]].region_id
-// HOST-NOT: offload_maptypes
-// HOST-SAME: {{$}}
+// HOST: call {{.*}} @.[[OFFLOAD_PRIVATE]].region_id{{.*}} @[[MAPTYPES_PRIVATE]]
 //
 // CHECK: define {{.*}} void @[[OFFLOAD_PRIVATE]]()
 // CHECK: call void ({{.*}}@[[OUTLINE_PRIVATE:.omp_outlined.[.0-9]*]]

From 6d247f980d922055018137c0d1400fde83d9449f Mon Sep 17 00:00:00 2001
From: Chen Zheng <czhengsz@cn.ibm.com>
Date: Wed, 15 Jul 2020 04:03:35 -0400
Subject: [PATCH 701/771] [SCEV][IndVarSimplify] insert point should not be
 block front.

Recommit after removing the unused cast instructions.

Differential Revision:  https://reviews.llvm.org/D80975
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 15 ++++++++++--
 .../Utils/ScalarEvolutionExpander.cpp         | 14 +++++------
 .../IndVarSimplify/widen-i32-i8ptr.ll         | 24 +++++++++++++++++++
 3 files changed, 43 insertions(+), 10 deletions(-)
 create mode 100644 llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll

diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index 0f36c3f772e65..51d12faf712ad 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1435,8 +1435,19 @@ PHINode *WidenIV::createWideIV(SCEVExpander &Rewriter) {
   // either find an existing phi or materialize a new one. Either way, we
   // expect a well-formed cyclic phi-with-increments. i.e. any operand not part
   // of the phi-SCC dominates the loop entry.
-  Instruction *InsertPt = &L->getHeader()->front();
-  WidePhi = cast<PHINode>(Rewriter.expandCodeFor(AddRec, WideType, InsertPt));
+  Instruction *InsertPt = &*L->getHeader()->getFirstInsertionPt();
+  Value *ExpandInst = Rewriter.expandCodeFor(AddRec, WideType, InsertPt);
+  // If the wide phi is not a phi node, for example a cast node, like bitcast,
+  // inttoptr, ptrtoint, just skip for now.
+  if (!(WidePhi = dyn_cast<PHINode>(ExpandInst))) {
+    // if the cast node is an inserted instruction without any user, we should
+    // remove it to make sure the pass don't touch the function as we can not
+    // wide the phi.
+    if (ExpandInst->hasNUses(0) &&
+        Rewriter.isInsertedInstruction(cast<Instruction>(ExpandInst)))
+      DeadInsts.emplace_back(ExpandInst);
+    return nullptr;
+  }
 
   // Remembering the WideIV increment generated by SCEVExpander allows
   // widenIVUse to reuse it when widening the narrow IV's increment. We don't
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 71b48482f26aa..c54ae26b53234 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1292,7 +1292,8 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
   if (useSubtract)
     Step = SE.getNegativeSCEV(Step);
   // Expand the step somewhere that dominates the loop header.
-  Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
+  Value *StepV = expandCodeFor(Step, IntTy,
+                               &*L->getHeader()->getFirstInsertionPt());
 
   // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if
   // we actually do emit an addition.  It does not apply if we emit a
@@ -1438,7 +1439,8 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
       {
         // Expand the step somewhere that dominates the loop header.
         SCEVInsertPointGuard Guard(Builder, this);
-        StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front());
+        StepV = expandCodeFor(Step, IntTy,
+                              &*L->getHeader()->getFirstInsertionPt());
       }
       Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract);
     }
@@ -1870,11 +1872,6 @@ Value *SCEVExpander::expand(const SCEV *S) {
     }
   }
 
-  // IndVarSimplify sometimes sets the insertion point at the block start, even
-  // when there are PHIs at that point.  We must correct for this.
-  if (isa<PHINode>(*InsertPt))
-    InsertPt = &*InsertPt->getParent()->getFirstInsertionPt();
-
   // Check to see if we already expanded this here.
   auto I = InsertedExpressions.find(std::make_pair(S, InsertPt));
   if (I != InsertedExpressions.end())
@@ -1945,7 +1942,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L,
   // Emit code for it.
   SCEVInsertPointGuard Guard(Builder, this);
   PHINode *V =
-      cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front()));
+      cast<PHINode>(expandCodeFor(H, nullptr,
+                                  &*L->getHeader()->getFirstInsertionPt()));
 
   return V;
 }
diff --git a/llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll b/llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll
new file mode 100644
index 0000000000000..80191d4e5b771
--- /dev/null
+++ b/llvm/test/Transforms/IndVarSimplify/widen-i32-i8ptr.ll
@@ -0,0 +1,24 @@
+; RUN: opt < %s -indvars -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-n32:64"
+
+define dso_local void @Widen_i32_i8ptr() local_unnamed_addr {
+; CHECK-LABEL: @Widen_i32_i8ptr(
+; CHECK: phi i8*
+; CHECK: phi i32
+entry:
+  %ptrids = alloca [15 x i8*], align 8
+  %arraydecay2032 = getelementptr inbounds [15 x i8*], [15 x i8*]* %ptrids, i64 0, i64 0
+  store i8** %arraydecay2032, i8*** inttoptr (i64 8 to i8***), align 8
+  br label %for.cond2106
+
+for.cond2106:                                     ; preds = %for.cond2106, %entry
+  %gid.0 = phi i8* [ null, %entry ], [ %incdec.ptr, %for.cond2106 ]
+  %i.0 = phi i32 [ 0, %entry ], [ %inc2117, %for.cond2106 ]
+  %incdec.ptr = getelementptr inbounds i8, i8* %gid.0, i64 1
+  %idxprom2114 = zext i32 %i.0 to i64
+  %arrayidx2115 = getelementptr inbounds [15 x i8*], [15 x i8*]* %ptrids, i64 0, i64 %idxprom2114
+  store i8* %gid.0, i8** %arrayidx2115, align 8
+  %inc2117 = add nuw nsw i32 %i.0, 1
+  br label %for.cond2106
+}

From bb07eb944f9b9ca7b3d8ac786c92e1f5bf59e3e1 Mon Sep 17 00:00:00 2001
From: Chen Zheng <czhengsz@cn.ibm.com>
Date: Fri, 17 Jul 2020 22:51:00 -0400
Subject: [PATCH 702/771] [PowerPC]add testcase for adding store (load float*)
 pattern, nfc

---
 .../SimplifyCFG/PowerPC/prefer-load-i32.ll    | 33 +++++++++++++++++++
 1 file changed, 33 insertions(+)
 create mode 100644 llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll

diff --git a/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll
new file mode 100644
index 0000000000000..943fcba57c654
--- /dev/null
+++ b/llvm/test/Transforms/SimplifyCFG/PowerPC/prefer-load-i32.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -mtriple=powerpc64le-unknown-linux-gnu -simplifycfg -S | \
+; RUN: FileCheck %s
+
+define float @foo(float* %src, float* %dest, i32 signext %count, i32 signext %cond) {
+; CHECK-LABEL: @foo(
+; CHECK-LABEL: entry:
+; CHECK:  %0 = load float, float* %arrayidx, align 4
+entry:
+  %cmp = icmp sgt i32 %cond, 10
+  %idxprom = sext i32 %count to i64
+  %arrayidx = getelementptr inbounds float, float* %src, i64 %idxprom
+  br i1 %cmp, label %if.then, label %if.else
+
+; CHECK-LABEL: if.then:
+; CHECK-NOT:   load float
+if.then:                                          ; preds = %entry
+  %0 = load float, float* %arrayidx, align 4
+  %res = fmul float %0, 3.000000e+00
+  br label %if.end
+
+; CHECK-LABEL: if.else:
+; CHECK-NOT:   load float
+if.else:                                          ; preds = %entry
+  %1 = load float, float* %arrayidx, align 4
+  %idxprom3 = sext i32 %count to i64
+  %arrayidx4 = getelementptr inbounds float, float* %dest, i64 %idxprom3
+  store float %1, float* %arrayidx4, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %res2.0 = phi float [ %res, %if.then ], [ 0.000000e+00, %if.else ]
+  ret float %res2.0
+}

From 780528d9da707b15849d6c9711cc3ab19f6c7f00 Mon Sep 17 00:00:00 2001
From: Gui Andrade <guiand@google.com>
Date: Sat, 18 Jul 2020 03:09:39 +0000
Subject: [PATCH 703/771] sanitizers: Implement sig{and,or}set interceptors

Also adds a sanitizers-wide test, and a msan test, for these functions.
---
 .../sanitizer_common_interceptors.inc         | 28 +++++++
 .../sanitizer_platform_interceptors.h         |  1 +
 compiler-rt/test/msan/Linux/sigandorset.cpp   | 27 +++++++
 .../TestCases/Linux/signal_send.cpp           | 75 +++++++++++++++++++
 4 files changed, 131 insertions(+)
 create mode 100644 compiler-rt/test/msan/Linux/sigandorset.cpp
 create mode 100644 compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp

diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
index d7e0bba762941..0fdaf00e67c17 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_common_interceptors.inc
@@ -4085,6 +4085,33 @@ INTERCEPTOR(int, sigfillset, __sanitizer_sigset_t *set) {
 #define INIT_SIGSETOPS
 #endif
 
+#if SANITIZER_INTERCEPT_SIGSET_LOGICOPS
+INTERCEPTOR(int, sigandset, __sanitizer_sigset_t *dst, __sanitizer_sigset_t *src1, __sanitizer_sigset_t *src2) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, sigandset, dst, src1, src2);
+  if (src1) COMMON_INTERCEPTOR_READ_RANGE(ctx, src1, sizeof(*src1));
+  if (src2) COMMON_INTERCEPTOR_READ_RANGE(ctx, src2, sizeof(*src2));
+  int res = REAL(sigandset)(dst, src1, src2);
+  if (!res && dst) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));
+  return res;
+}
+
+INTERCEPTOR(int, sigorset, __sanitizer_sigset_t *dst, __sanitizer_sigset_t *src1, __sanitizer_sigset_t *src2) {
+  void *ctx;
+  COMMON_INTERCEPTOR_ENTER(ctx, sigorset, dst, src1, src2);
+  if (src1) COMMON_INTERCEPTOR_READ_RANGE(ctx, src1, sizeof(*src1));
+  if (src2) COMMON_INTERCEPTOR_READ_RANGE(ctx, src2, sizeof(*src2));
+  int res = REAL(sigorset)(dst, src1, src2);
+  if (!res && dst) COMMON_INTERCEPTOR_WRITE_RANGE(ctx, dst, sizeof(*dst));
+  return res;
+}
+#define INIT_SIGSET_LOGICOPS                    \
+  COMMON_INTERCEPT_FUNCTION(sigandset);   \
+  COMMON_INTERCEPT_FUNCTION(sigorset);
+#else
+#define INIT_SIGSET_LOGICOPS
+#endif
+
 #if SANITIZER_INTERCEPT_SIGPENDING
 INTERCEPTOR(int, sigpending, __sanitizer_sigset_t *set) {
   void *ctx;
@@ -9996,6 +10023,7 @@ static void InitializeCommonInterceptors() {
   INIT_SIGWAITINFO;
   INIT_SIGTIMEDWAIT;
   INIT_SIGSETOPS;
+  INIT_SIGSET_LOGICOPS;
   INIT_SIGPENDING;
   INIT_SIGPROCMASK;
   INIT_PTHREAD_SIGMASK;
diff --git a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
index e28bb937ae83d..04b61d6daae78 100644
--- a/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
+++ b/compiler-rt/lib/sanitizer_common/sanitizer_platform_interceptors.h
@@ -333,6 +333,7 @@
 #define SANITIZER_INTERCEPT_SIGTIMEDWAIT SI_LINUX_NOT_ANDROID || SI_SOLARIS
 #define SANITIZER_INTERCEPT_SIGSETOPS \
   (SI_FREEBSD || SI_NETBSD || SI_MAC || SI_LINUX_NOT_ANDROID || SI_SOLARIS)
+#define SANITIZER_INTERCEPT_SIGSET_LOGICOPS SI_LINUX_NOT_ANDROID
 #define SANITIZER_INTERCEPT_SIGPENDING SI_POSIX
 #define SANITIZER_INTERCEPT_SIGPROCMASK SI_POSIX
 #define SANITIZER_INTERCEPT_PTHREAD_SIGMASK SI_POSIX
diff --git a/compiler-rt/test/msan/Linux/sigandorset.cpp b/compiler-rt/test/msan/Linux/sigandorset.cpp
new file mode 100644
index 0000000000000..d0e552fcfa019
--- /dev/null
+++ b/compiler-rt/test/msan/Linux/sigandorset.cpp
@@ -0,0 +1,27 @@
+// RUN: %clangxx_msan -std=c++11 -O0 -g %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_msan -DLEFT_OK -std=c++11 -O0 -g %s -o %t && not %run %t 2>&1 | FileCheck %s
+// RUN: %clangxx_msan -DRIGHT_OK -std=c++11 -O0 -g %s -o %t && not %run %t 2<&1 | FileCheck %s
+// RUN: %clangxx_msan -DLEFT_OK -DRIGHT_OK -std=c++11 -O0 -g %s -o %t && %run %t
+
+#include <assert.h>
+#include <sanitizer/msan_interface.h>
+#include <signal.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+int main(void) {
+  sigset_t s, t, u;
+#ifdef LEFT_OK
+  sigemptyset(&t);
+#endif
+#ifdef RIGHT_OK
+  sigemptyset(&u);
+#endif
+
+  // CHECK:  MemorySanitizer: use-of-uninitialized-value
+  // CHECK-NEXT: in main {{.*}}sigandorset.cpp:[[@LINE+1]]
+  sigandset(&s, &t, &u);
+  sigorset(&s, &t, &u);
+  __msan_check_mem_is_initialized(&s, sizeof s);
+  return 0;
+}
diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp
new file mode 100644
index 0000000000000..54014da8b532a
--- /dev/null
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp
@@ -0,0 +1,75 @@
+// RUN: %clangxx -std=c++11 -O0 -g %s -o %t && %run %t 2>&1 | FileCheck %s
+
+#include <assert.h>
+#include <signal.h>
+#include <stdio.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+sigset_t mkset(int n, ...) {
+  sigset_t s;
+  int res = 0;
+  res |= sigemptyset(&s);
+  va_list va;
+  va_start(va, n);
+  while (n--) {
+    res |= sigaddset(&s, va_arg(va, int));
+  }
+  va_end(va);
+  assert(!res);
+  return s;
+}
+
+sigset_t sigset_or(sigset_t first, sigset_t second) {
+  sigset_t out;
+  int res = sigorset(&out, &first, &second);
+  assert(!res);
+  return out;
+}
+
+sigset_t sigset_and(sigset_t first, sigset_t second) {
+  sigset_t out;
+  int res = sigandset(&out, &first, &second);
+  assert(!res);
+  return out;
+}
+
+int fork_and_signal(sigset_t s) {
+  if (pid_t pid = fork()) {
+    kill(pid, SIGUSR1);
+    kill(pid, SIGUSR2);
+    int child_stat;
+    wait(&child_stat);
+    return !WIFEXITED(child_stat);
+  } else {
+    int sig;
+    int res = sigwait(&s, &sig);
+    assert(!res);
+    fprintf(stderr, "died with sig %d\n", sig);
+    _exit(0);
+  }
+}
+
+void test_sigwait() {
+  // test sigorset... s should now contain SIGUSR1 | SIGUSR2
+  sigset_t s = sigset_or(mkset(1, SIGUSR1), mkset(1, SIGUSR2));
+  sigprocmask(SIG_BLOCK, &s, 0);
+  int res;
+  res = fork_and_signal(s);
+  fprintf(stderr, "fork_and_signal with SIGUSR1,2: %d\n", res);
+  // CHECK: died with sig 10
+  // CHECK: fork_and_signal with SIGUSR1,2: 0
+
+  // test sigandset... s should only have SIGUSR2 now
+  s = sigset_and(s, mkset(1, SIGUSR2));
+  res = fork_and_signal(s);
+  fprintf(stderr, "fork_and_signal with SIGUSR2: %d\n", res);
+  // CHECK: died with sig 12
+  // CHECK: fork_and_signal with SIGUSR2: 0
+}
+
+int main(void) {
+  test_sigwait();
+  return 0;
+}

From c42509413f69667361e93b5834fe368bfa4a6e82 Mon Sep 17 00:00:00 2001
From: Gui Andrade <guiand@google.com>
Date: Sat, 18 Jul 2020 03:18:48 +0000
Subject: [PATCH 704/771] [LLVM] Add libatomic load/store functions to
 TargetLibraryInfo

This allows treating these functions like libcalls.
This patch is a prerequisite to instrumenting them in MSAN: https://reviews.llvm.org/D83337

Differential Revision: https://reviews.llvm.org/D83361
---
 llvm/include/llvm/Analysis/TargetLibraryInfo.def  | 6 ++++++
 llvm/lib/Analysis/TargetLibraryInfo.cpp           | 9 +++++++++
 llvm/unittests/Analysis/TargetLibraryInfoTest.cpp | 3 +++
 3 files changed, 18 insertions(+)

diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.def b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
index 3864d4955104b..36b39f4a0e231 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.def
@@ -262,6 +262,12 @@ TLI_DEFINE_STRING_INTERNAL("__atanhf_finite")
 /// long double __atanhl_finite(long double x);
 TLI_DEFINE_ENUM_INTERNAL(atanhl_finite)
 TLI_DEFINE_STRING_INTERNAL("__atanhl_finite")
+/// void __atomic_load(size_t size, void *mptr, void *vptr, int smodel);
+TLI_DEFINE_ENUM_INTERNAL(atomic_load)
+TLI_DEFINE_STRING_INTERNAL("__atomic_load")
+/// void __atomic_store(size_t size, void *mptr, void *vptr, int smodel);
+TLI_DEFINE_ENUM_INTERNAL(atomic_store)
+TLI_DEFINE_STRING_INTERNAL("__atomic_store")
 /// double __cosh_finite(double x);
 TLI_DEFINE_ENUM_INTERNAL(cosh_finite)
 TLI_DEFINE_STRING_INTERNAL("__cosh_finite")
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 60cfb04634c4a..0b465d3c31773 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -1228,6 +1228,15 @@ bool TargetLibraryInfoImpl::isValidProtoForLibFunc(const FunctionType &FTy,
   case LibFunc_ZdaPvmSt11align_val_t:
     return (NumParams == 3 && FTy.getParamType(0)->isPointerTy());
 
+  // void __atomic_load(size_t, void *, void *, int)
+  case LibFunc_atomic_load:
+  // void __atomic_store(size_t, void *, void *, int)
+  case LibFunc_atomic_store:
+    return (NumParams == 4 && FTy.getParamType(0)->isIntegerTy() &&
+            FTy.getParamType(1)->isPointerTy() &&
+            FTy.getParamType(2)->isPointerTy() &&
+            FTy.getParamType(3)->isIntegerTy());
+
   case LibFunc_memset_pattern16:
     return (!FTy.isVarArg() && NumParams == 3 &&
             FTy.getParamType(0)->isPointerTy() &&
diff --git a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
index 83637596282e2..db75192640def 100644
--- a/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
+++ b/llvm/unittests/Analysis/TargetLibraryInfoTest.cpp
@@ -495,6 +495,9 @@ TEST_F(TargetLibraryInfoTest, ValidProto) {
       "declare i8* @mempcpy(i8*, i8*, i64)\n"
       "declare i8* @memrchr(i8*, i32, i64)\n"
 
+      "declare void @__atomic_load(i64, i8*, i8*, i32)\n"
+      "declare void @__atomic_store(i64, i8*, i8*, i32)\n"
+
       // These are similar to the FILE* fgetc/fputc.
       "declare i32 @_IO_getc(%struct*)\n"
       "declare i32 @_IO_putc(i32, %struct*)\n"

From 1f29171ae77f81cacea32808b67d7ae62da23e0c Mon Sep 17 00:00:00 2001
From: Gui Andrade <guiand@google.com>
Date: Sat, 18 Jul 2020 03:39:14 +0000
Subject: [PATCH 705/771] update libatomic instrumentation

---
 compiler-rt/test/msan/libatomic.c             |  37 ++++++
 .../Instrumentation/MemorySanitizer.cpp       | 113 ++++++++++++++++++
 .../MemorySanitizer/libatomic.ll              |  70 +++++++++++
 3 files changed, 220 insertions(+)
 create mode 100644 compiler-rt/test/msan/libatomic.c
 create mode 100644 llvm/test/Instrumentation/MemorySanitizer/libatomic.ll

diff --git a/compiler-rt/test/msan/libatomic.c b/compiler-rt/test/msan/libatomic.c
new file mode 100644
index 0000000000000..b230a0972c6db
--- /dev/null
+++ b/compiler-rt/test/msan/libatomic.c
@@ -0,0 +1,37 @@
+// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -latomic -DTEST_STORE -O0 %s -o %t && %run %t 2>&1
+// RUN: %clangxx_msan -fsanitize-memory-track-origins=0 -latomic -DTEST_LOAD -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
+// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -latomic -DTEST_LOAD -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SHADOW
+
+#include <sanitizer/msan_interface.h>
+#include <stdatomic.h>
+
+typedef struct __attribute((packed)) {
+  uint8_t val[3];
+} i24;
+
+void copy(i24 *dst, i24 *src);
+
+int main() {
+  i24 uninit;
+  i24 init = {0};
+
+  __msan_check_mem_is_initialized(&init, 3);
+  copy(&init, &uninit);
+  __msan_check_mem_is_initialized(&init, 3);
+}
+
+void copy(i24 *dst, i24 *src) {
+#ifdef TEST_LOAD
+  __atomic_load(src, dst, __ATOMIC_RELAXED);
+
+  // CHECK: MemorySanitizer: use-of-uninitialized-value
+  // CHECK: #0 {{0x[a-f0-9]+}} in main{{.*}}libatomic.c:[[@LINE-8]]
+
+  // CHECK-SHADOW: Uninitialized value was stored to memory at
+  // CHECK-SHADOW: #0 {{0x[a-f0-9]+}} in copy{{.*}}libatomic.c:[[@LINE-6]]
+#endif
+#ifdef TEST_STORE
+  // Store always writes a clean shadow
+  __atomic_store(src, dst, __ATOMIC_RELAXED);
+#endif
+}
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index fcf7f470b3e10..0001559c4051d 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -572,6 +572,9 @@ class MemorySanitizer {
   /// uninitialized value and returns an updated origin id encoding this info.
   FunctionCallee MsanChainOriginFn;
 
+  /// Run-time helper that paints an origin over a region.
+  FunctionCallee MsanSetOriginFn;
+
   /// MSan runtime replacements for memmove, memcpy and memset.
   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
 
@@ -850,6 +853,9 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
   // instrumentation.
   MsanChainOriginFn = M.getOrInsertFunction(
     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
+  MsanSetOriginFn =
+      M.getOrInsertFunction("__msan_set_origin", IRB.getVoidTy(),
+                            IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
   MemmoveFn = M.getOrInsertFunction(
     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
     IRB.getInt8PtrTy(), IntptrTy);
@@ -1769,6 +1775,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     llvm_unreachable("Unknown ordering");
   }
 
+  Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
+    constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
+    uint32_t OrderingTable[NumOrderings] = {};
+
+    OrderingTable[(int)AtomicOrderingCABI::relaxed] =
+        OrderingTable[(int)AtomicOrderingCABI::release] =
+            (int)AtomicOrderingCABI::release;
+    OrderingTable[(int)AtomicOrderingCABI::consume] =
+        OrderingTable[(int)AtomicOrderingCABI::acquire] =
+            OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
+                (int)AtomicOrderingCABI::acq_rel;
+    OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
+        (int)AtomicOrderingCABI::seq_cst;
+
+    return ConstantDataVector::get(IRB.getContext(),
+                                   makeArrayRef(OrderingTable, NumOrderings));
+  }
+
   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
     switch (a) {
       case AtomicOrdering::NotAtomic:
@@ -1786,6 +1810,24 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     llvm_unreachable("Unknown ordering");
   }
 
+  Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
+    constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
+    uint32_t OrderingTable[NumOrderings] = {};
+
+    OrderingTable[(int)AtomicOrderingCABI::relaxed] =
+        OrderingTable[(int)AtomicOrderingCABI::acquire] =
+            OrderingTable[(int)AtomicOrderingCABI::consume] =
+                (int)AtomicOrderingCABI::acquire;
+    OrderingTable[(int)AtomicOrderingCABI::release] =
+        OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
+            (int)AtomicOrderingCABI::acq_rel;
+    OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
+        (int)AtomicOrderingCABI::seq_cst;
+
+    return ConstantDataVector::get(IRB.getContext(),
+                                   makeArrayRef(OrderingTable, NumOrderings));
+  }
+
   // ------------------- Visitors.
   using InstVisitor<MemorySanitizerVisitor>::visit;
   void visit(Instruction &I) {
@@ -3404,6 +3446,60 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
+  void visitLibAtomicLoad(CallBase &CB) {
+    IRBuilder<> IRB(&CB);
+    Value *Size = CB.getArgOperand(0);
+    Value *SrcPtr = CB.getArgOperand(1);
+    Value *DstPtr = CB.getArgOperand(2);
+    Value *Ordering = CB.getArgOperand(3);
+    // Convert the call to have at least Acquire ordering to make sure
+    // the shadow operations aren't reordered before it.
+    Value *NewOrdering =
+        IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
+    CB.setArgOperand(3, NewOrdering);
+
+    IRBuilder<> NextIRB(CB.getNextNode());
+    NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
+
+    Value *SrcShadowPtr, *SrcOriginPtr;
+    std::tie(SrcShadowPtr, SrcOriginPtr) =
+        getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
+                           /*isStore*/ false);
+    Value *DstShadowPtr =
+        getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
+                           /*isStore*/ true)
+            .first;
+
+    NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
+    if (MS.TrackOrigins) {
+      Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
+                                                   kMinOriginAlignment);
+      Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
+      NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
+    }
+  }
+
+  void visitLibAtomicStore(CallBase &CB) {
+    IRBuilder<> IRB(&CB);
+    Value *Size = CB.getArgOperand(0);
+    Value *DstPtr = CB.getArgOperand(2);
+    Value *Ordering = CB.getArgOperand(3);
+    // Convert the call to have at least Release ordering to make sure
+    // the shadow operations aren't reordered after it.
+    Value *NewOrdering =
+        IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
+    CB.setArgOperand(3, NewOrdering);
+
+    Value *DstShadowPtr =
+        getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
+                           /*isStore*/ true)
+            .first;
+
+    // Atomic store always paints clean shadow/origin. See file header.
+    IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
+                     Align(1));
+  }
+
   void visitCallBase(CallBase &CB) {
     assert(!CB.getMetadata("nosanitize"));
     if (CB.isInlineAsm()) {
@@ -3417,6 +3513,23 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         visitInstruction(CB);
       return;
     }
+    LibFunc LF;
+    if (TLI->getLibFunc(CB, LF)) {
+      // libatomic.a functions need to have special handling because there isn't
+      // a good way to intercept them or compile the library with
+      // instrumentation.
+      switch (LF) {
+      case LibFunc_atomic_load:
+        visitLibAtomicLoad(CB);
+        return;
+      case LibFunc_atomic_store:
+        visitLibAtomicStore(CB);
+        return;
+      default:
+        break;
+      }
+    }
+
     if (auto *Call = dyn_cast<CallInst>(&CB)) {
       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/libatomic.ll b/llvm/test/Instrumentation/MemorySanitizer/libatomic.ll
new file mode 100644
index 0000000000000..a2515740b4591
--- /dev/null
+++ b/llvm/test/Instrumentation/MemorySanitizer/libatomic.ll
@@ -0,0 +1,70 @@
+; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 | FileCheck %s
+; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=2 -S -passes=msan 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-ORIGIN
+; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @__atomic_load(i64, i8*, i8*, i32)
+declare void @__atomic_store(i64, i8*, i8*, i32)
+
+define i24 @odd_sized_load(i24* %ptr) sanitize_memory {
+; CHECK: @odd_sized_load(i24* {{.*}}[[PTR:%.+]])
+; CHECK: [[VAL_PTR:%.*]] = alloca i24, align 1
+; CHECK-ORIGIN: @__msan_set_alloca_origin
+; CHECK: [[VAL_PTR_I8:%.*]] = bitcast i24* [[VAL_PTR]] to i8*
+; CHECK: [[PTR_I8:%.*]] = bitcast i24* [[PTR]] to i8*
+; CHECK: call void @__atomic_load(i64 3, i8* [[PTR_I8]], i8* [[VAL_PTR_I8]], i32 2)
+
+; CHECK: ptrtoint i8* [[PTR_I8]]
+; CHECK: xor
+; CHECK: [[SPTR_I8:%.*]] = inttoptr
+; CHECK-ORIGIN: add
+; CHECK-ORIGIN: and
+; CHECK-ORIGIN: [[OPTR:%.*]] = inttoptr
+
+; CHECK: ptrtoint i8* [[VAL_PTR_I8]]
+; CHECK: xor
+; CHECK: [[VAL_SPTR_I8:%.*]] = inttoptr
+; CHECK-ORIGIN: add
+; CHECK-ORIGIN: and
+; CHECK-ORIGIN: [[VAL_OPTR:%.*]] = inttoptr
+
+; CHECK: call void @llvm.memcpy{{.*}}(i8* align 1 [[VAL_SPTR_I8]], i8* align 1 [[SPTR_I8]], i64 3
+
+; CHECK-ORIGIN: [[ARG_ORIGIN:%.*]] = load i32, i32* [[OPTR]]
+; CHECK-ORIGIN: [[VAL_ORIGIN:%.*]] = call i32 @__msan_chain_origin(i32 [[ARG_ORIGIN]])
+; CHECK-ORIGIN: call void @__msan_set_origin(i8* [[VAL_PTR_I8]], i64 3, i32 [[VAL_ORIGIN]])
+
+; CHECK: [[VAL:%.*]] = load i24, i24* [[VAL_PTR]]
+; CHECK: ret i24 [[VAL]]
+  %val_ptr = alloca i24, align 1
+  %val_ptr_i8 = bitcast i24* %val_ptr to i8*
+  %ptr_i8 = bitcast i24* %ptr to i8*
+  call void @__atomic_load(i64 3, i8* %ptr_i8, i8* %val_ptr_i8, i32 0)
+  %val = load i24, i24* %val_ptr
+  ret i24 %val
+}
+
+define void @odd_sized_store(i24* %ptr, i24 %val) sanitize_memory {
+; CHECK: @odd_sized_store(i24* {{.*}}[[PTR:%.+]], i24 {{.*}}[[VAL:%.+]])
+; CHECK: [[VAL_PTR:%.*]] = alloca i24, align 1
+; CHECK: store i24 [[VAL]], i24* [[VAL_PTR]]
+; CHECK: [[VAL_PTR_I8:%.*]] = bitcast i24* [[VAL_PTR]] to i8*
+; CHECK: [[PTR_I8:%.*]] = bitcast i24* [[PTR]] to i8*
+
+; CHECK: ptrtoint i8* [[PTR_I8]]
+; CHECK: xor
+; CHECK: [[SPTR_I8:%.*]] = inttoptr
+; CHECK: call void @llvm.memset{{.*}}(i8* align 1 [[SPTR_I8]], i8 0, i64 3
+; CHECK-ORIGIN: call void @__msan_set_origin(i8* [[PTR_I8]], i64 3, i32 0)
+
+; CHECK: call void @__atomic_store(i64 3, i8* [[VAL_PTR_I8]], i8* [[PTR_I8]], i32 3)
+; CHECK: ret void
+  %val_ptr = alloca i24, align 1
+  store i24 %val, i24* %val_ptr
+  %val_ptr_i8 = bitcast i24* %val_ptr to i8*
+  %ptr_i8 = bitcast i24* %ptr to i8*
+  call void @__atomic_store(i64 3, i8* %val_ptr_i8, i8* %ptr_i8, i32 0)
+  ret void
+}
+

From 951584db4ffbc8e8cfe954134edfee9974e8ae28 Mon Sep 17 00:00:00 2001
From: Gui Andrade <guiand@google.com>
Date: Sat, 18 Jul 2020 03:53:00 +0000
Subject: [PATCH 706/771] Revert "update libatomic instrumentation"

This was committed mistakenly.

This reverts commit 1f29171ae77f81cacea32808b67d7ae62da23e0c.
---
 compiler-rt/test/msan/libatomic.c             |  37 ------
 .../Instrumentation/MemorySanitizer.cpp       | 113 ------------------
 .../MemorySanitizer/libatomic.ll              |  70 -----------
 3 files changed, 220 deletions(-)
 delete mode 100644 compiler-rt/test/msan/libatomic.c
 delete mode 100644 llvm/test/Instrumentation/MemorySanitizer/libatomic.ll

diff --git a/compiler-rt/test/msan/libatomic.c b/compiler-rt/test/msan/libatomic.c
deleted file mode 100644
index b230a0972c6db..0000000000000
--- a/compiler-rt/test/msan/libatomic.c
+++ /dev/null
@@ -1,37 +0,0 @@
-// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -latomic -DTEST_STORE -O0 %s -o %t && %run %t 2>&1
-// RUN: %clangxx_msan -fsanitize-memory-track-origins=0 -latomic -DTEST_LOAD -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK
-// RUN: %clangxx_msan -fsanitize-memory-track-origins=2 -latomic -DTEST_LOAD -O0 %s -o %t && not %run %t 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SHADOW
-
-#include <sanitizer/msan_interface.h>
-#include <stdatomic.h>
-
-typedef struct __attribute((packed)) {
-  uint8_t val[3];
-} i24;
-
-void copy(i24 *dst, i24 *src);
-
-int main() {
-  i24 uninit;
-  i24 init = {0};
-
-  __msan_check_mem_is_initialized(&init, 3);
-  copy(&init, &uninit);
-  __msan_check_mem_is_initialized(&init, 3);
-}
-
-void copy(i24 *dst, i24 *src) {
-#ifdef TEST_LOAD
-  __atomic_load(src, dst, __ATOMIC_RELAXED);
-
-  // CHECK: MemorySanitizer: use-of-uninitialized-value
-  // CHECK: #0 {{0x[a-f0-9]+}} in main{{.*}}libatomic.c:[[@LINE-8]]
-
-  // CHECK-SHADOW: Uninitialized value was stored to memory at
-  // CHECK-SHADOW: #0 {{0x[a-f0-9]+}} in copy{{.*}}libatomic.c:[[@LINE-6]]
-#endif
-#ifdef TEST_STORE
-  // Store always writes a clean shadow
-  __atomic_store(src, dst, __ATOMIC_RELAXED);
-#endif
-}
diff --git a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
index 0001559c4051d..fcf7f470b3e10 100644
--- a/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
+++ b/llvm/lib/Transforms/Instrumentation/MemorySanitizer.cpp
@@ -572,9 +572,6 @@ class MemorySanitizer {
   /// uninitialized value and returns an updated origin id encoding this info.
   FunctionCallee MsanChainOriginFn;
 
-  /// Run-time helper that paints an origin over a region.
-  FunctionCallee MsanSetOriginFn;
-
   /// MSan runtime replacements for memmove, memcpy and memset.
   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
 
@@ -853,9 +850,6 @@ void MemorySanitizer::initializeCallbacks(Module &M) {
   // instrumentation.
   MsanChainOriginFn = M.getOrInsertFunction(
     "__msan_chain_origin", IRB.getInt32Ty(), IRB.getInt32Ty());
-  MsanSetOriginFn =
-      M.getOrInsertFunction("__msan_set_origin", IRB.getVoidTy(),
-                            IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
   MemmoveFn = M.getOrInsertFunction(
     "__msan_memmove", IRB.getInt8PtrTy(), IRB.getInt8PtrTy(),
     IRB.getInt8PtrTy(), IntptrTy);
@@ -1775,24 +1769,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     llvm_unreachable("Unknown ordering");
   }
 
-  Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
-    constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
-    uint32_t OrderingTable[NumOrderings] = {};
-
-    OrderingTable[(int)AtomicOrderingCABI::relaxed] =
-        OrderingTable[(int)AtomicOrderingCABI::release] =
-            (int)AtomicOrderingCABI::release;
-    OrderingTable[(int)AtomicOrderingCABI::consume] =
-        OrderingTable[(int)AtomicOrderingCABI::acquire] =
-            OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
-                (int)AtomicOrderingCABI::acq_rel;
-    OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
-        (int)AtomicOrderingCABI::seq_cst;
-
-    return ConstantDataVector::get(IRB.getContext(),
-                                   makeArrayRef(OrderingTable, NumOrderings));
-  }
-
   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
     switch (a) {
       case AtomicOrdering::NotAtomic:
@@ -1810,24 +1786,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     llvm_unreachable("Unknown ordering");
   }
 
-  Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
-    constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
-    uint32_t OrderingTable[NumOrderings] = {};
-
-    OrderingTable[(int)AtomicOrderingCABI::relaxed] =
-        OrderingTable[(int)AtomicOrderingCABI::acquire] =
-            OrderingTable[(int)AtomicOrderingCABI::consume] =
-                (int)AtomicOrderingCABI::acquire;
-    OrderingTable[(int)AtomicOrderingCABI::release] =
-        OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
-            (int)AtomicOrderingCABI::acq_rel;
-    OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
-        (int)AtomicOrderingCABI::seq_cst;
-
-    return ConstantDataVector::get(IRB.getContext(),
-                                   makeArrayRef(OrderingTable, NumOrderings));
-  }
-
   // ------------------- Visitors.
   using InstVisitor<MemorySanitizerVisitor>::visit;
   void visit(Instruction &I) {
@@ -3446,60 +3404,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
     }
   }
 
-  void visitLibAtomicLoad(CallBase &CB) {
-    IRBuilder<> IRB(&CB);
-    Value *Size = CB.getArgOperand(0);
-    Value *SrcPtr = CB.getArgOperand(1);
-    Value *DstPtr = CB.getArgOperand(2);
-    Value *Ordering = CB.getArgOperand(3);
-    // Convert the call to have at least Acquire ordering to make sure
-    // the shadow operations aren't reordered before it.
-    Value *NewOrdering =
-        IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
-    CB.setArgOperand(3, NewOrdering);
-
-    IRBuilder<> NextIRB(CB.getNextNode());
-    NextIRB.SetCurrentDebugLocation(CB.getDebugLoc());
-
-    Value *SrcShadowPtr, *SrcOriginPtr;
-    std::tie(SrcShadowPtr, SrcOriginPtr) =
-        getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
-                           /*isStore*/ false);
-    Value *DstShadowPtr =
-        getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
-                           /*isStore*/ true)
-            .first;
-
-    NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
-    if (MS.TrackOrigins) {
-      Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
-                                                   kMinOriginAlignment);
-      Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
-      NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
-    }
-  }
-
-  void visitLibAtomicStore(CallBase &CB) {
-    IRBuilder<> IRB(&CB);
-    Value *Size = CB.getArgOperand(0);
-    Value *DstPtr = CB.getArgOperand(2);
-    Value *Ordering = CB.getArgOperand(3);
-    // Convert the call to have at least Release ordering to make sure
-    // the shadow operations aren't reordered after it.
-    Value *NewOrdering =
-        IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
-    CB.setArgOperand(3, NewOrdering);
-
-    Value *DstShadowPtr =
-        getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
-                           /*isStore*/ true)
-            .first;
-
-    // Atomic store always paints clean shadow/origin. See file header.
-    IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
-                     Align(1));
-  }
-
   void visitCallBase(CallBase &CB) {
     assert(!CB.getMetadata("nosanitize"));
     if (CB.isInlineAsm()) {
@@ -3513,23 +3417,6 @@ struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
         visitInstruction(CB);
       return;
     }
-    LibFunc LF;
-    if (TLI->getLibFunc(CB, LF)) {
-      // libatomic.a functions need to have special handling because there isn't
-      // a good way to intercept them or compile the library with
-      // instrumentation.
-      switch (LF) {
-      case LibFunc_atomic_load:
-        visitLibAtomicLoad(CB);
-        return;
-      case LibFunc_atomic_store:
-        visitLibAtomicStore(CB);
-        return;
-      default:
-        break;
-      }
-    }
-
     if (auto *Call = dyn_cast<CallInst>(&CB)) {
       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
 
diff --git a/llvm/test/Instrumentation/MemorySanitizer/libatomic.ll b/llvm/test/Instrumentation/MemorySanitizer/libatomic.ll
deleted file mode 100644
index a2515740b4591..0000000000000
--- a/llvm/test/Instrumentation/MemorySanitizer/libatomic.ll
+++ /dev/null
@@ -1,70 +0,0 @@
-; RUN: opt < %s -msan-check-access-address=0 -S -passes=msan 2>&1 | FileCheck %s
-; RUN: opt < %s -msan-check-access-address=0 -msan-track-origins=2 -S -passes=msan 2>&1 | FileCheck %s -check-prefixes=CHECK,CHECK-ORIGIN
-; RUN: opt < %s -msan -msan-check-access-address=0 -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare void @__atomic_load(i64, i8*, i8*, i32)
-declare void @__atomic_store(i64, i8*, i8*, i32)
-
-define i24 @odd_sized_load(i24* %ptr) sanitize_memory {
-; CHECK: @odd_sized_load(i24* {{.*}}[[PTR:%.+]])
-; CHECK: [[VAL_PTR:%.*]] = alloca i24, align 1
-; CHECK-ORIGIN: @__msan_set_alloca_origin
-; CHECK: [[VAL_PTR_I8:%.*]] = bitcast i24* [[VAL_PTR]] to i8*
-; CHECK: [[PTR_I8:%.*]] = bitcast i24* [[PTR]] to i8*
-; CHECK: call void @__atomic_load(i64 3, i8* [[PTR_I8]], i8* [[VAL_PTR_I8]], i32 2)
-
-; CHECK: ptrtoint i8* [[PTR_I8]]
-; CHECK: xor
-; CHECK: [[SPTR_I8:%.*]] = inttoptr
-; CHECK-ORIGIN: add
-; CHECK-ORIGIN: and
-; CHECK-ORIGIN: [[OPTR:%.*]] = inttoptr
-
-; CHECK: ptrtoint i8* [[VAL_PTR_I8]]
-; CHECK: xor
-; CHECK: [[VAL_SPTR_I8:%.*]] = inttoptr
-; CHECK-ORIGIN: add
-; CHECK-ORIGIN: and
-; CHECK-ORIGIN: [[VAL_OPTR:%.*]] = inttoptr
-
-; CHECK: call void @llvm.memcpy{{.*}}(i8* align 1 [[VAL_SPTR_I8]], i8* align 1 [[SPTR_I8]], i64 3
-
-; CHECK-ORIGIN: [[ARG_ORIGIN:%.*]] = load i32, i32* [[OPTR]]
-; CHECK-ORIGIN: [[VAL_ORIGIN:%.*]] = call i32 @__msan_chain_origin(i32 [[ARG_ORIGIN]])
-; CHECK-ORIGIN: call void @__msan_set_origin(i8* [[VAL_PTR_I8]], i64 3, i32 [[VAL_ORIGIN]])
-
-; CHECK: [[VAL:%.*]] = load i24, i24* [[VAL_PTR]]
-; CHECK: ret i24 [[VAL]]
-  %val_ptr = alloca i24, align 1
-  %val_ptr_i8 = bitcast i24* %val_ptr to i8*
-  %ptr_i8 = bitcast i24* %ptr to i8*
-  call void @__atomic_load(i64 3, i8* %ptr_i8, i8* %val_ptr_i8, i32 0)
-  %val = load i24, i24* %val_ptr
-  ret i24 %val
-}
-
-define void @odd_sized_store(i24* %ptr, i24 %val) sanitize_memory {
-; CHECK: @odd_sized_store(i24* {{.*}}[[PTR:%.+]], i24 {{.*}}[[VAL:%.+]])
-; CHECK: [[VAL_PTR:%.*]] = alloca i24, align 1
-; CHECK: store i24 [[VAL]], i24* [[VAL_PTR]]
-; CHECK: [[VAL_PTR_I8:%.*]] = bitcast i24* [[VAL_PTR]] to i8*
-; CHECK: [[PTR_I8:%.*]] = bitcast i24* [[PTR]] to i8*
-
-; CHECK: ptrtoint i8* [[PTR_I8]]
-; CHECK: xor
-; CHECK: [[SPTR_I8:%.*]] = inttoptr
-; CHECK: call void @llvm.memset{{.*}}(i8* align 1 [[SPTR_I8]], i8 0, i64 3
-; CHECK-ORIGIN: call void @__msan_set_origin(i8* [[PTR_I8]], i64 3, i32 0)
-
-; CHECK: call void @__atomic_store(i64 3, i8* [[VAL_PTR_I8]], i8* [[PTR_I8]], i32 3)
-; CHECK: ret void
-  %val_ptr = alloca i24, align 1
-  store i24 %val, i24* %val_ptr
-  %val_ptr_i8 = bitcast i24* %val_ptr to i8*
-  %ptr_i8 = bitcast i24* %ptr to i8*
-  call void @__atomic_store(i64 3, i8* %val_ptr_i8, i8* %ptr_i8, i32 0)
-  ret void
-}
-

From 65936fed1490dd1ef8d9a0a88b7d305b0840eb05 Mon Sep 17 00:00:00 2001
From: Gui Andrade <guiand@google.com>
Date: Sat, 18 Jul 2020 03:49:17 +0000
Subject: [PATCH 707/771] [MSAN tests] Require android for sigandorset.cpp

---
 compiler-rt/test/msan/Linux/sigandorset.cpp | 1 +
 1 file changed, 1 insertion(+)

diff --git a/compiler-rt/test/msan/Linux/sigandorset.cpp b/compiler-rt/test/msan/Linux/sigandorset.cpp
index d0e552fcfa019..da983020a4c68 100644
--- a/compiler-rt/test/msan/Linux/sigandorset.cpp
+++ b/compiler-rt/test/msan/Linux/sigandorset.cpp
@@ -2,6 +2,7 @@
 // RUN: %clangxx_msan -DLEFT_OK -std=c++11 -O0 -g %s -o %t && not %run %t 2>&1 | FileCheck %s
 // RUN: %clangxx_msan -DRIGHT_OK -std=c++11 -O0 -g %s -o %t && not %run %t 2<&1 | FileCheck %s
 // RUN: %clangxx_msan -DLEFT_OK -DRIGHT_OK -std=c++11 -O0 -g %s -o %t && %run %t
+// REQUIRES: !android
 
 #include <assert.h>
 #include <sanitizer/msan_interface.h>

From b922004ea29d54534c4f09b9cfa655bf5f3360f0 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Fri, 17 Jul 2020 23:29:50 -0700
Subject: [PATCH 708/771] [RelocationResolver] Support R_PPC_REL32 &
 R_PPC64_REL{32,64}

This suppresses `failed to compute relocation: R_PPC_REL32, Invalid data was encountered while parsing the file`
and its 64-bit variants when running llvm-dwarfdump on a PowerPC object file with .eh_frame

Unfortunately it is difficult to test the computation:
DWARFDataExtractor::getEncodedPointer does not use the relocated value
and even if it does, we need to teach llvm-dwarfdump --eh-frame to do
some linker job to report a reasonable address.
---
 llvm/lib/Object/RelocationResolver.cpp  | 20 +++++++++++--
 llvm/test/DebugInfo/PowerPC/eh-frame.ll | 39 +++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/DebugInfo/PowerPC/eh-frame.ll

diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index 93917655073fc..77df7fd0aad9d 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -152,6 +152,8 @@ static bool supportsPPC64(uint64_t Type) {
   switch (Type) {
   case ELF::R_PPC64_ADDR32:
   case ELF::R_PPC64_ADDR64:
+  case ELF::R_PPC64_REL32:
+  case ELF::R_PPC64_REL64:
     return true;
   default:
     return false;
@@ -164,6 +166,10 @@ static uint64_t resolvePPC64(RelocationRef R, uint64_t S, uint64_t A) {
     return (S + getELFAddend(R)) & 0xFFFFFFFF;
   case ELF::R_PPC64_ADDR64:
     return S + getELFAddend(R);
+  case ELF::R_PPC64_REL32:
+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
+  case ELF::R_PPC64_REL64:
+    return S + getELFAddend(R) - R.getOffset();
   default:
     llvm_unreachable("Invalid relocation type");
   }
@@ -259,12 +265,22 @@ static uint64_t resolveX86(RelocationRef R, uint64_t S, uint64_t A) {
 }
 
 static bool supportsPPC32(uint64_t Type) {
-  return Type == ELF::R_PPC_ADDR32;
+  switch (Type) {
+  case ELF::R_PPC_ADDR32:
+  case ELF::R_PPC_REL32:
+    return true;
+  default:
+    return false;
+  }
 }
 
 static uint64_t resolvePPC32(RelocationRef R, uint64_t S, uint64_t A) {
-  if (R.getType() == ELF::R_PPC_ADDR32)
+  switch (R.getType()) {
+  case ELF::R_PPC_ADDR32:
     return (S + getELFAddend(R)) & 0xFFFFFFFF;
+  case ELF::R_PPC_REL32:
+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
+  }
   llvm_unreachable("Invalid relocation type");
 }
 
diff --git a/llvm/test/DebugInfo/PowerPC/eh-frame.ll b/llvm/test/DebugInfo/PowerPC/eh-frame.ll
new file mode 100644
index 0000000000000..3a8f7df6b61a9
--- /dev/null
+++ b/llvm/test/DebugInfo/PowerPC/eh-frame.ll
@@ -0,0 +1,39 @@
+; RUN: llc -filetype=obj -mtriple=powerpc %s -o %t32.o
+; RUN: llvm-readobj -r %t32.o | FileCheck %s --check-prefix=PPC_REL
+; RUN: llvm-dwarfdump --eh-frame %t32.o 2>&1 | FileCheck %s --check-prefix=PPC
+
+; PPC_REL:      R_PPC_REL32 .text 0x0
+; PPC_REL-NEXT: R_PPC_REL32 .text 0x4
+
+; PPC-NOT: warning:
+; PPC: FDE cie=00000000 pc=00000000...00000004
+;; TODO Take relocation into consideration
+; PPC: FDE cie=00000000 pc=00000000...00000004
+
+; RUN: llc -filetype=obj -mtriple=ppc64 %s -o %t64.o
+; RUN: llvm-readobj -r %t64.o | FileCheck %s --check-prefix=PPC64_REL
+; RUN: llvm-dwarfdump --eh-frame %t64.o 2>&1 | FileCheck %s --check-prefix=PPC64
+
+; PPC64_REL:      R_PPC64_REL32 .text 0x0
+; PPC64_REL-NEXT: R_PPC64_REL32 .text 0x10
+
+; PPC64-NOT: warning:
+; PPC64: FDE cie=00000000 pc=00000000...00000010
+; PPC64: FDE cie=00000000 pc=00000000...00000010
+
+; RUN: llc -filetype=obj -mtriple=ppc64le -code-model=large %s -o %t64l.o
+; RUN: llvm-readobj -r %t64l.o | FileCheck %s --check-prefix=PPC64L_REL
+; RUN: llvm-dwarfdump --eh-frame %t64l.o 2>&1 | FileCheck %s --check-prefix=PPC64
+
+; PPC64L_REL:      R_PPC64_REL64 .text 0x0
+; PPC64L_REL-NEXT: R_PPC64_REL64 .text 0x10
+
+define void @foo() {
+entry:
+  ret void
+}
+
+define void @bar() {
+entry:
+  ret void
+}

From 3073a3aa1ef1ce8c9cac9b97a8e5905dd8779e16 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Fri, 17 Jul 2020 23:49:15 -0700
Subject: [PATCH 709/771] [RelocationResolver] Support R_AARCH64_PREL32

Code from D83800 by Yichao Yu
---
 llvm/lib/Object/RelocationResolver.cpp  |  6 ++++++
 llvm/test/DebugInfo/AArch64/eh-frame.ll | 21 +++++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 llvm/test/DebugInfo/AArch64/eh-frame.ll

diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index 77df7fd0aad9d..919799a25c6f2 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -62,6 +62,8 @@ static bool supportsAArch64(uint64_t Type) {
   switch (Type) {
   case ELF::R_AARCH64_ABS32:
   case ELF::R_AARCH64_ABS64:
+  case ELF::R_AARCH64_PREL32:
+  case ELF::R_AARCH64_PREL64:
     return true;
   default:
     return false;
@@ -74,6 +76,10 @@ static uint64_t resolveAArch64(RelocationRef R, uint64_t S, uint64_t A) {
     return (S + getELFAddend(R)) & 0xFFFFFFFF;
   case ELF::R_AARCH64_ABS64:
     return S + getELFAddend(R);
+  case ELF::R_AARCH64_PREL32:
+    return (S + getELFAddend(R) - R.getOffset()) & 0xFFFFFFFF;
+  case ELF::R_AARCH64_PREL64:
+    return S + getELFAddend(R) - R.getOffset();
   default:
     llvm_unreachable("Invalid relocation type");
   }
diff --git a/llvm/test/DebugInfo/AArch64/eh-frame.ll b/llvm/test/DebugInfo/AArch64/eh-frame.ll
new file mode 100644
index 0000000000000..9651159271e55
--- /dev/null
+++ b/llvm/test/DebugInfo/AArch64/eh-frame.ll
@@ -0,0 +1,21 @@
+; RUN: llc -filetype=obj -mtriple=aarch64 %s -o %t.o
+; RUN: llvm-readobj -r %t.o | FileCheck %s --check-prefix=REL32
+; RUN: llvm-dwarfdump --eh-frame %t.o 2>&1 | FileCheck %s
+
+; REL32:      R_AARCH64_PREL32 .text 0x0
+; REL32-NEXT: R_AARCH64_PREL32 .text 0x4
+
+; CHECK-NOT:  warning:
+; CHECK: FDE cie=00000000 pc=00000000...00000004
+;; TODO Take relocation into consideration
+; CHECK: FDE cie=00000000 pc=00000000...00000004
+
+define void @foo() {
+entry:
+  ret void
+}
+
+define void @bar() {
+entry:
+  ret void
+}

From 3382b7177f0410144d70154aee9b2031221ba838 Mon Sep 17 00:00:00 2001
From: Yash Jain <yash.jain@polymagelabs.com>
Date: Sat, 18 Jul 2020 13:09:30 +0530
Subject: [PATCH 710/771] [MLIR] Add lowering for affine.parallel to
 scf.parallel

Add lowering conversion from affine.parallel to scf.parallel.

Differential Revision: https://reviews.llvm.org/D83239
---
 .../AffineToStandard/AffineToStandard.h       |  1 +
 .../AffineToStandard/AffineToStandard.cpp     | 38 +++++++++++++
 .../AffineToStandard/lower-affine.mlir        | 54 +++++++++++++++++++
 3 files changed, 93 insertions(+)

diff --git a/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h b/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h
index 5d04f157b8ce8..4deffafe0ec60 100644
--- a/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h
+++ b/mlir/include/mlir/Conversion/AffineToStandard/AffineToStandard.h
@@ -15,6 +15,7 @@ namespace mlir {
 class AffineExpr;
 class AffineForOp;
 class AffineMap;
+class AffineParallelOp;
 class Location;
 struct LogicalResult;
 class MLIRContext;
diff --git a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
index e8032348e1821..bc48ef35fcd10 100644
--- a/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
+++ b/mlir/lib/Conversion/AffineToStandard/AffineToStandard.cpp
@@ -357,6 +357,43 @@ class AffineForLowering : public OpRewritePattern<AffineForOp> {
   }
 };
 
+/// Convert an `affine.parallel` (loop nest) operation into a `scf.parallel`
+/// operation.
+class AffineParallelLowering : public OpRewritePattern<AffineParallelOp> {
+public:
+  using OpRewritePattern<AffineParallelOp>::OpRewritePattern;
+
+  LogicalResult matchAndRewrite(AffineParallelOp op,
+                                PatternRewriter &rewriter) const override {
+    Location loc = op.getLoc();
+    SmallVector<Value, 8> steps;
+    SmallVector<Value, 8> upperBoundTuple;
+    SmallVector<Value, 8> lowerBoundTuple;
+    // Finding lower and upper bound by expanding the map expression.
+    // Checking if expandAffineMap is not giving NULL.
+    Optional<SmallVector<Value, 8>> upperBound = expandAffineMap(
+        rewriter, loc, op.upperBoundsMap(), op.getUpperBoundsOperands());
+    Optional<SmallVector<Value, 8>> lowerBound = expandAffineMap(
+        rewriter, loc, op.lowerBoundsMap(), op.getLowerBoundsOperands());
+    if (!lowerBound || !upperBound)
+      return failure();
+    upperBoundTuple = *upperBound;
+    lowerBoundTuple = *lowerBound;
+    steps.reserve(op.steps().size());
+    for (Attribute step : op.steps())
+      steps.push_back(rewriter.create<ConstantIndexOp>(
+          loc, step.cast<IntegerAttr>().getInt()));
+    // Creating empty scf.parallel op body with appropriate bounds.
+    auto parallelOp = rewriter.create<scf::ParallelOp>(loc, lowerBoundTuple,
+                                                       upperBoundTuple, steps);
+    rewriter.eraseBlock(parallelOp.getBody());
+    rewriter.inlineRegionBefore(op.region(), parallelOp.region(),
+                                parallelOp.region().end());
+    rewriter.eraseOp(op);
+    return success();
+  }
+};
+
 class AffineIfLowering : public OpRewritePattern<AffineIfOp> {
 public:
   using OpRewritePattern<AffineIfOp>::OpRewritePattern;
@@ -615,6 +652,7 @@ void mlir::populateAffineToStdConversionPatterns(
       AffineLoadLowering,
       AffineMinLowering,
       AffineMaxLowering,
+      AffineParallelLowering,
       AffinePrefetchLowering,
       AffineStoreLowering,
       AffineForLowering,
diff --git a/mlir/test/Conversion/AffineToStandard/lower-affine.mlir b/mlir/test/Conversion/AffineToStandard/lower-affine.mlir
index 2ca3ffe947fbd..7aac76b870b74 100644
--- a/mlir/test/Conversion/AffineToStandard/lower-affine.mlir
+++ b/mlir/test/Conversion/AffineToStandard/lower-affine.mlir
@@ -620,3 +620,57 @@ func @affine_max(%arg0: index, %arg1: index) -> index{
   %0 = affine.max affine_map<(d0,d1) -> (d0 - d1, d1 - d0)>(%arg0, %arg1)
   return %0 : index
 }
+
+// CHECK-LABEL: func @affine_parallel(
+// CHECK-SAME: %[[ARG0:.*]]: memref<100x100xf32>, %[[ARG1:.*]]: memref<100x100xf32>) {
+func @affine_parallel(%o: memref<100x100xf32>, %a: memref<100x100xf32>) {
+  affine.parallel (%i, %j) = (0, 0) to (100, 100) {
+  }
+  return
+}
+
+// CHECK-DAG:    %[[C100:.*]] = constant 100
+// CHECK-DAG:    %[[C100_1:.*]] = constant 100
+// CHECK-DAG:    %[[C0:.*]] = constant 0
+// CHECK-DAG:    %[[C0_1:.*]] = constant 0
+// CHECK-DAG:    %[[C1:.*]] = constant 1
+// CHECK-DAG:    %[[C1_1:.*]] = constant 1
+// CHECK-DAG:    scf.parallel (%arg2, %arg3) = (%[[C0]], %[[C0_1]]) to (%[[C100]], %[[C100_1]]) step (%[[C1]], %[[C1_1]]) {
+
+// CHECK-LABEL: func @affine_parallel_tiled(
+// CHECK-SAME: %[[ARG0:.*]]: memref<100x100xf32>, %[[ARG1:.*]]: memref<100x100xf32>, %[[ARG2:.*]]: memref<100x100xf32>) {
+func @affine_parallel_tiled(%o: memref<100x100xf32>, %a: memref<100x100xf32>, %b: memref<100x100xf32>) {
+  affine.parallel (%i0, %j0, %k0) = (0, 0, 0) to (100, 100, 100) step (10, 10, 10) {
+    affine.parallel (%i1, %j1, %k1) = (%i0, %j0, %k0) to (%i0 + 10, %j0 + 10, %k0 + 10) {
+      %0 = affine.load %a[%i1, %k1] : memref<100x100xf32>
+      %1 = affine.load %b[%k1, %j1] : memref<100x100xf32>
+      %2 = mulf %0, %1 : f32
+    }
+  }
+  return
+}
+
+// CHECK-DAG:     %[[C100:.*]] = constant 100
+// CHECK-DAG:     %[[C100_0:.*]] = constant 100
+// CHECK-DAG:     %[[C100_1:.*]] = constant 100
+// CHECK-DAG:     %[[C0:.*]] = constant 0
+// CHECK-DAG:     %[[C0_2:.*]] = constant 0
+// CHECK-DAG:     %[[C0_3:.*]] = constant 0
+// CHECK-DAG:     %[[C10:.*]] = constant 10
+// CHECK-DAG:     %[[C10_4:.*]] = constant 10
+// CHECK-DAG:     %[[C10_5:.*]] = constant 10
+// CHECK:         scf.parallel (%[[arg3:.*]], %[[arg4:.*]], %[[arg5:.*]]) = (%[[C0]], %[[C0_2]], %[[C0_3]]) to (%[[C100]], %[[C100_0]], %[[C100_1]]) step (%[[C10]], %[[C10_4]], %[[C10_5]]) {
+// CHECK-DAG:       %[[C10_6:.*]] = constant 10
+// CHECK-DAG:       %[[A0:.*]] = addi %[[arg3]], %[[C10_6]]
+// CHECK-DAG:       %[[C10_7:.*]] = constant 10
+// CHECK-DAG:       %[[A1:.*]] = addi %[[arg4]], %[[C10_7]]
+// CHECK-DAG:       %[[C10_8:.*]] = constant 10
+// CHECK-DAG:       %[[A2:.*]] = addi %[[arg5]], %[[C10_8]]
+// CHECK-DAG:       %[[C1:.*]] = constant 1
+// CHECK-DAG:       %[[C1_9:.*]] = constant 1
+// CHECK-DAG:       %[[C1_10:.*]] = constant 1
+// CHECK:           scf.parallel (%[[arg6:.*]], %[[arg7:.*]], %[[arg8:.*]]) = (%[[arg3]], %[[arg4]], %[[arg5]]) to (%[[A0]], %[[A1]], %[[A2]]) step (%[[C1]], %[[C1_9]], %[[C1_10]]) {
+// CHECK:             %[[A3:.*]] = load %[[ARG1]][%[[arg6]], %[[arg8]]] : memref<100x100xf32>
+// CHECK:             %[[A4:.*]] = load %[[ARG2]][%[[arg8]], %[[arg7]]] : memref<100x100xf32>
+// CHECK:             mulf %[[A3]], %[[A4]] : f32
+// CHECK:             scf.yield

From 1fe0705cb17ab48aab0e486db1148e00e544c0d3 Mon Sep 17 00:00:00 2001
From: Vitaly Buka <vitalybuka@google.com>
Date: Sat, 18 Jul 2020 02:16:17 -0700
Subject: [PATCH 711/771] [sanitizer] Disable test on incompatible platforms

---
 .../test/sanitizer_common/TestCases/Linux/signal_send.cpp      | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp
index 54014da8b532a..84084b9291a70 100644
--- a/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp
+++ b/compiler-rt/test/sanitizer_common/TestCases/Linux/signal_send.cpp
@@ -1,5 +1,8 @@
 // RUN: %clangxx -std=c++11 -O0 -g %s -o %t && %run %t 2>&1 | FileCheck %s
 
+// sigandset is glibc specific.
+// UNSUPPORTED: android, freebsd, netbsd
+
 #include <assert.h>
 #include <signal.h>
 #include <stdio.h>

From 24089928be7cb04a40975f4d037b1a1c15034614 Mon Sep 17 00:00:00 2001
From: Evgeny Leviant <v.evgeny.leviant@ntd.nintendo.com>
Date: Sat, 18 Jul 2020 14:11:40 +0300
Subject: [PATCH 712/771] [CodeGen][TargetPassConfig] Add TargetTransformInfo
 pass correctly

Patch adds tti pass directly enforcing its execution with correctly set
TargetTransformInfo.

Differential revision: https://reviews.llvm.org/D84047
---
 llvm/lib/CodeGen/TargetPassConfig.cpp         |  2 +-
 .../AArch64/partial-pipeline-execution.ll     | 87 +++++++++++++++++++
 2 files changed, 88 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll

diff --git a/llvm/lib/CodeGen/TargetPassConfig.cpp b/llvm/lib/CodeGen/TargetPassConfig.cpp
index e0fdb0cefcb8b..ffff56f545b2d 100644
--- a/llvm/lib/CodeGen/TargetPassConfig.cpp
+++ b/llvm/lib/CodeGen/TargetPassConfig.cpp
@@ -879,7 +879,7 @@ bool TargetPassConfig::addISelPasses() {
     addPass(createLowerEmuTLSPass());
 
   addPass(createPreISelIntrinsicLoweringPass());
-  addPass(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
+  PM->add(createTargetTransformInfoWrapperPass(TM->getTargetIRAnalysis()));
   addIRPasses();
   addCodeGenPrepare();
   addPassesToHandleExceptions();
diff --git a/llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll b/llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll
new file mode 100644
index 0000000000000..82cb0a35f2a58
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/partial-pipeline-execution.ll
@@ -0,0 +1,87 @@
+; RUN: llc -O3 %s -o %t.s
+; RUN: llc -O3 -stop-after=atomic-expand %s -o %t.mir
+; RUN: llc -O3 -start-after=atomic-expand %s -o %t2.s
+
+; If we add tti pass correctly files should be identical
+; Otherwise LSR will use default TargetTransformInfo and
+; optimize the loop differently
+; RUN: cmp %t.s %t2.s
+
+; ModuleID = 'loop.c'
+source_filename = "loop.c"
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64-none-linux-gnu"
+
+@q = dso_local local_unnamed_addr global i32* null, align 8
+
+; Function Attrs: nofree norecurse nounwind
+define dso_local i32 @main(i32 %argc, i8** nocapture readnone %argv) local_unnamed_addr #0 {
+entry:
+  %cmp5 = icmp sgt i32 %argc, 0
+  br i1 %cmp5, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:                                   ; preds = %entry
+  %0 = load i32*, i32** @q, align 8, !tbaa !2
+  %1 = zext i32 %argc to i64
+  %2 = add nsw i64 %1, -1
+  %3 = lshr i64 %2, 5
+  %4 = add nuw nsw i64 %3, 1
+  %min.iters.check = icmp eq i64 %3, 0
+  br i1 %min.iters.check, label %for.body.preheader, label %vector.ph
+
+for.body.preheader:                               ; preds = %middle.block, %for.body.lr.ph
+  %indvars.iv.ph = phi i64 [ 0, %for.body.lr.ph ], [ %ind.end, %middle.block ]
+  br label %for.body
+
+vector.ph:                                        ; preds = %for.body.lr.ph
+  %n.vec = and i64 %4, 1152921504606846974
+  %ind.end = shl i64 %n.vec, 5
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %offset.idx = shl i64 %index, 5
+  %induction7 = or i64 %offset.idx, 32
+  %5 = getelementptr inbounds i32, i32* %0, i64 %offset.idx
+  %6 = getelementptr inbounds i32, i32* %0, i64 %induction7
+  %7 = trunc i64 %offset.idx to i32
+  %8 = trunc i64 %induction7 to i32
+  store i32 %7, i32* %5, align 4, !tbaa !6
+  store i32 %8, i32* %6, align 4, !tbaa !6
+  %index.next = add i64 %index, 2
+  %9 = icmp eq i64 %index.next, %n.vec
+  br i1 %9, label %middle.block, label %vector.body, !llvm.loop !8
+
+middle.block:                                     ; preds = %vector.body
+  %cmp.n = icmp eq i64 %4, %n.vec
+  br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader
+
+for.cond.cleanup:                                 ; preds = %for.body, %middle.block, %entry
+  ret i32 0
+
+for.body:                                         ; preds = %for.body.preheader, %for.body
+  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ %indvars.iv.ph, %for.body.preheader ]
+  %arrayidx = getelementptr inbounds i32, i32* %0, i64 %indvars.iv
+  %10 = trunc i64 %indvars.iv to i32
+  store i32 %10, i32* %arrayidx, align 4, !tbaa !6
+  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32
+  %cmp = icmp ult i64 %indvars.iv.next, %1
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !llvm.loop !10
+}
+
+attributes #0 = { nofree norecurse nounwind "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="non-leaf" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git d9943e7f0ce888733ee7ba91da432e5f01f7aa85)"}
+!2 = !{!3, !3, i64 0}
+!3 = !{!"any pointer", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"int", !4, i64 0}
+!8 = distinct !{!8, !9}
+!9 = !{!"llvm.loop.isvectorized", i32 1}
+!10 = distinct !{!10, !9}

From 2f4c3e80970c85078747a528b0af0fc5168e3e1a Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Fri, 17 Jul 2020 08:35:04 +0100
Subject: [PATCH 713/771] [LV] Add additional InLoop redution tests. NFC

---
 .../LoopVectorize/ARM/mve-reductions.ll       | 1290 ++++++++++++++
 .../LoopVectorize/reduction-inloop-uf4.ll     |   68 +
 .../LoopVectorize/reduction-inloop.ll         | 1493 +++++++----------
 3 files changed, 1970 insertions(+), 881 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
 create mode 100644 llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll

diff --git a/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
new file mode 100644
index 0000000000000..0d4cc31677b80
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll
@@ -0,0 +1,1290 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -loop-vectorize < %s -S -o - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv8.1m.main-arm-none-eabi"
+
+define i64 @add_i64_i64(i64* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: @add_i64_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[X:%.*]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ADD]] = add nsw i64 [[TMP0]], [[R_07]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[R_0_LCSSA]]
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i64, i64* %x, i32 %i.08
+  %0 = load i64, i64* %arrayidx, align 8
+  %add = add nsw i64 %0, %r.07
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  ret i64 %r.0.lcssa
+}
+
+; FIXME: 4x
+define i64 @add_i32_i64(i32* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: @add_i32_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[TMP0]] to i64
+; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_07]], [[CONV]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[R_0_LCSSA]]
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %conv = sext i32 %0 to i64
+  %add = add nsw i64 %r.07, %conv
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  ret i64 %r.0.lcssa
+}
+
+; FIXME: 4x ?
+define i64 @add_i16_i64(i16* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: @add_i16_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i64
+; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_07]], [[CONV]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[R_0_LCSSA]]
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.08
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %0 to i64
+  %add = add nsw i64 %r.07, %conv
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  ret i64 %r.0.lcssa
+}
+
+; FIXME: 4x ?
+define i64 @add_i8_i64(i8* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: @add_i8_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[R_07:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i64
+; CHECK-NEXT:    [[ADD]] = add nuw nsw i64 [[R_07]], [[CONV]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[R_0_LCSSA]]
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i64 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i64
+  %add = add nuw nsw i64 %r.07, %conv
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  ret i64 %r.0.lcssa
+}
+
+define i32 @add_i32_i32(i32* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: @add_i32_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4]] = add <4 x i32> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[TMP7]], [[R_07]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !2
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[R_0_LCSSA]]
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.08
+  %0 = load i32, i32* %arrayidx, align 4
+  %add = add nsw i32 %0, %r.07
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %r.0.lcssa
+}
+
+; FIXME: 8x
+define i32 @add_i16_i32(i16* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: @add_i16_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <4 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
+; CHECK-NEXT:    [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[TMP4]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP8]] to i32
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[R_07]], [[CONV]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !5
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[R_0_LCSSA]]
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.08
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %0 to i32
+  %add = add nsw i32 %r.07, %conv
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %r.0.lcssa
+}
+
+; FIXME: 16x
+define i32 @add_i8_i32(i8* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: @add_i8_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
+; CHECK-NEXT:    [[TMP5]] = add <4 x i32> [[VEC_PHI]], [[TMP4]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_08]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP8]] to i32
+; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[R_07]], [[CONV]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_08]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !7
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[R_0_LCSSA]]
+;
+entry:
+  %cmp6 = icmp sgt i32 %n, 0
+  br i1 %cmp6, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.08 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.08
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %add = add nuw nsw i32 %r.07, %conv
+  %inc = add nuw nsw i32 %i.08, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %r.0.lcssa
+}
+
+define signext i16 @add_i16_i16(i16* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: @add_i16_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 8
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <8 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2
+; CHECK-NEXT:    [[TMP4]] = add <8 x i16> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP4]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_09:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[ADD]] = add i16 [[TMP7]], [[R_09]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !9
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i16 [[R_0_LCSSA]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.09 = phi i16 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.010
+  %0 = load i16, i16* %arrayidx, align 2
+  %add = add i16 %0, %r.09
+  %inc = add nuw nsw i32 %i.010, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ]
+  ret i16 %r.0.lcssa
+}
+
+; FIXME: 16x ?
+define signext i16 @add_i8_i16(i8* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: @add_i8_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 8
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <8 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16>
+; CHECK-NEXT:    [[TMP5]] = add <8 x i16> [[VEC_PHI]], [[TMP4]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP7:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP5]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_09:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP8:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP8]] to i16
+; CHECK-NEXT:    [[ADD]] = add i16 [[R_09]], [[CONV]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !11
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i16 [[R_0_LCSSA]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.09 = phi i16 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.010
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i16
+  %add = add i16 %r.09, %conv
+  %inc = add nuw nsw i32 %i.010, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ]
+  ret i16 %r.0.lcssa
+}
+
+define zeroext i8 @add_i8_i8(i8* nocapture readonly %x, i32 %n) #0 {
+; CHECK-LABEL: @add_i8_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 16
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 16
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP4]] = add <16 x i8> [[WIDE_LOAD]], [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP4]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_09:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_08:%.*]] = phi i8 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_09]]
+; CHECK-NEXT:    [[TMP7:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ADD]] = add i8 [[TMP7]], [[R_08]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_09]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !13
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i8 [[R_0_LCSSA]]
+;
+entry:
+  %cmp7 = icmp sgt i32 %n, 0
+  br i1 %cmp7, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.09 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.08 = phi i8 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.09
+  %0 = load i8, i8* %arrayidx, align 1
+  %add = add i8 %0, %r.08
+  %inc = add nuw nsw i32 %i.09, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i8 [ 0, %entry ], [ %add, %for.body ]
+  ret i8 %r.0.lcssa
+}
+
+define i64 @mla_i64_i64(i64* nocapture readonly %x, i64* nocapture readonly %y, i32 %n) #0 {
+; CHECK-LABEL: @mla_i64_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i64, i64* [[X:%.*]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, i64* [[ARRAYIDX]], align 8
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i64, i64* [[Y:%.*]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[ARRAYIDX1]], align 8
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i64 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[ADD]] = add nsw i64 [[MUL]], [[R_09]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[R_0_LCSSA]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.09 = phi i64 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i64, i64* %x, i32 %i.010
+  %0 = load i64, i64* %arrayidx, align 8
+  %arrayidx1 = getelementptr inbounds i64, i64* %y, i32 %i.010
+  %1 = load i64, i64* %arrayidx1, align 8
+  %mul = mul nsw i64 %1, %0
+  %add = add nsw i64 %mul, %r.09
+  %inc = add nuw nsw i32 %i.010, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  ret i64 %r.0.lcssa
+}
+
+define i64 @mla_i32_i64(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n) #0 {
+; CHECK-LABEL: @mla_i32_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[R_09:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP1]], [[TMP0]]
+; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_09]], [[CONV]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[R_0_LCSSA]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.09 = phi i64 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.010
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %y, i32 %i.010
+  %1 = load i32, i32* %arrayidx1, align 4
+  %mul = mul nsw i32 %1, %0
+  %conv = sext i32 %mul to i64
+  %add = add nsw i64 %r.09, %conv
+  %inc = add nuw nsw i32 %i.010, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  ret i64 %r.0.lcssa
+}
+
+define i64 @mla_i16_i64(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
+; CHECK-LABEL: @mla_i16_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
+; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP1]] to i32
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
+; CHECK-NEXT:    [[CONV3:%.*]] = sext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[ADD]] = add nsw i64 [[R_011]], [[CONV3]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_012]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[R_0_LCSSA]]
+;
+entry:
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.012 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.011 = phi i64 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.012
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %0 to i32
+  %arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.012
+  %1 = load i16, i16* %arrayidx1, align 2
+  %conv2 = sext i16 %1 to i32
+  %mul = mul nsw i32 %conv2, %conv
+  %conv3 = sext i32 %mul to i64
+  %add = add nsw i64 %r.011, %conv3
+  %inc = add nuw nsw i32 %i.012, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  ret i64 %r.0.lcssa
+}
+
+define i64 @mla_i8_i64(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+; CHECK-LABEL: @mla_i8_i64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[R_011:%.*]] = phi i64 [ [[ADD:%.*]], [[FOR_BODY]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP1]] to i32
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV2]], [[CONV]]
+; CHECK-NEXT:    [[CONV3:%.*]] = zext i32 [[MUL]] to i64
+; CHECK-NEXT:    [[ADD]] = add nuw nsw i64 [[R_011]], [[CONV3]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_012]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[FOR_BODY]]
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i64 [ [[ADD]], [[FOR_BODY]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i64 [[R_0_LCSSA]]
+;
+entry:
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.012 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.011 = phi i64 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.012
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.012
+  %1 = load i8, i8* %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i32
+  %mul = mul nuw nsw i32 %conv2, %conv
+  %conv3 = zext i32 %mul to i64
+  %add = add nuw nsw i64 %r.011, %conv3
+  %inc = add nuw nsw i32 %i.012, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i64 [ 0, %entry ], [ %add, %for.body ]
+  ret i64 %r.0.lcssa
+}
+
+define i32 @mla_i32_i32(i32* nocapture readonly %x, i32* nocapture readonly %y, i32 %n) #0 {
+; CHECK-LABEL: @mla_i32_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP8]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[Y:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP8]] = add <4 x i32> [[TMP7]], [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_010:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_09:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[Y]], i32 [[I_010]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]]
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[R_09]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_010]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !15
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[R_0_LCSSA]]
+;
+entry:
+  %cmp8 = icmp sgt i32 %n, 0
+  br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.010 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.09 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i32, i32* %x, i32 %i.010
+  %0 = load i32, i32* %arrayidx, align 4
+  %arrayidx1 = getelementptr inbounds i32, i32* %y, i32 %i.010
+  %1 = load i32, i32* %arrayidx1, align 4
+  %mul = mul nsw i32 %1, %0
+  %add = add nsw i32 %mul, %r.09
+  %inc = add nuw nsw i32 %i.010, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %r.0.lcssa
+}
+
+define i32 @mla_i16_i32(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
+; CHECK-LABEL: @mla_i16_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <4 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP3]], align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
+; CHECK-NEXT:    [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP4]]
+; CHECK-NEXT:    [[TMP10]] = add <4 x i32> [[TMP9]], [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_011]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP13]] to i32
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[Y]], i32 [[I_011]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
+; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[TMP14]] to i32
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV2]], [[CONV]]
+; CHECK-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[R_010]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_011]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !17
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[R_0_LCSSA]]
+;
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.011
+  %0 = load i16, i16* %arrayidx, align 2
+  %conv = sext i16 %0 to i32
+  %arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.011
+  %1 = load i16, i16* %arrayidx1, align 2
+  %conv2 = sext i16 %1 to i32
+  %mul = mul nsw i32 %conv2, %conv
+  %add = add nsw i32 %mul, %r.010
+  %inc = add nuw nsw i32 %i.011, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %r.0.lcssa
+}
+
+define i32 @mla_i8_i32(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+; CHECK-LABEL: @mla_i8_i32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 4
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 4
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <4 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <4 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP7]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = zext <4 x i8> [[WIDE_LOAD1]] to <4 x i32>
+; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw nsw <4 x i32> [[TMP8]], [[TMP4]]
+; CHECK-NEXT:    [[TMP10]] = add <4 x i32> [[TMP9]], [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP10]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_011:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_010:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_011]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP13]] to i32
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 [[I_011]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP14]] to i32
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw nsw i32 [[CONV2]], [[CONV]]
+; CHECK-NEXT:    [[ADD]] = add nuw nsw i32 [[MUL]], [[R_010]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_011]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !19
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i32 [[R_0_LCSSA]]
+;
+entry:
+  %cmp9 = icmp sgt i32 %n, 0
+  br i1 %cmp9, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.010 = phi i32 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.011
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i32
+  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.011
+  %1 = load i8, i8* %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i32
+  %mul = mul nuw nsw i32 %conv2, %conv
+  %add = add nuw nsw i32 %mul, %r.010
+  %inc = add nuw nsw i32 %i.011, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+  ret i32 %r.0.lcssa
+}
+
+define signext i16 @mla_i16_i16(i16* nocapture readonly %x, i16* nocapture readonly %y, i32 %n) #0 {
+; CHECK-LABEL: @mla_i16_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 8
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i16* [[TMP2]] to <8 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP3]], align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[Y:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <8 x i16>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP6]], align 2
+; CHECK-NEXT:    [[TMP7:%.*]] = mul <8 x i16> [[WIDE_LOAD1]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP8]] = add <8 x i16> [[TMP7]], [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP10:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP8]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_013:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_012:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[X]], i32 [[I_013]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[ARRAYIDX]], align 2
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i16, i16* [[Y]], i32 [[I_013]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[ARRAYIDX1]], align 2
+; CHECK-NEXT:    [[MUL:%.*]] = mul i16 [[TMP12]], [[TMP11]]
+; CHECK-NEXT:    [[ADD]] = add i16 [[MUL]], [[R_012]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_013]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !21
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i16 [[R_0_LCSSA]]
+;
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.012 = phi i16 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i16, i16* %x, i32 %i.013
+  %0 = load i16, i16* %arrayidx, align 2
+  %arrayidx1 = getelementptr inbounds i16, i16* %y, i32 %i.013
+  %1 = load i16, i16* %arrayidx1, align 2
+  %mul = mul i16 %1, %0
+  %add = add i16 %mul, %r.012
+  %inc = add nuw nsw i32 %i.013, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ]
+  ret i16 %r.0.lcssa
+}
+
+define signext i16 @mla_i8_i16(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+; CHECK-LABEL: @mla_i8_i16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP11:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP11]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 8
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 8
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <8 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = zext <8 x i8> [[WIDE_LOAD]] to <8 x i16>
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i8, i8* [[TMP5]], i32 0
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i8* [[TMP6]] to <8 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <8 x i8>, <8 x i8>* [[TMP7]], align 1
+; CHECK-NEXT:    [[TMP8:%.*]] = zext <8 x i8> [[WIDE_LOAD1]] to <8 x i16>
+; CHECK-NEXT:    [[TMP9:%.*]] = mul nuw <8 x i16> [[TMP8]], [[TMP4]]
+; CHECK-NEXT:    [[TMP10]] = add <8 x i16> [[TMP9]], [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
+; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !22
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP12:%.*]] = call i16 @llvm.experimental.vector.reduce.add.v8i16(<8 x i16> [[TMP10]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i16 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_013:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_012:%.*]] = phi i16 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_013]]
+; CHECK-NEXT:    [[TMP13:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP13]] to i16
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 [[I_013]]
+; CHECK-NEXT:    [[TMP14:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[CONV2:%.*]] = zext i8 [[TMP14]] to i16
+; CHECK-NEXT:    [[MUL:%.*]] = mul nuw i16 [[CONV2]], [[CONV]]
+; CHECK-NEXT:    [[ADD]] = add i16 [[MUL]], [[R_012]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_013]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !23
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i16 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i16 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i16 [[R_0_LCSSA]]
+;
+entry:
+  %cmp11 = icmp sgt i32 %n, 0
+  br i1 %cmp11, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.013 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.012 = phi i16 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.013
+  %0 = load i8, i8* %arrayidx, align 1
+  %conv = zext i8 %0 to i16
+  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.013
+  %1 = load i8, i8* %arrayidx1, align 1
+  %conv2 = zext i8 %1 to i16
+  %mul = mul nuw i16 %conv2, %conv
+  %add = add i16 %mul, %r.012
+  %inc = add nuw nsw i32 %i.013, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i16 [ 0, %entry ], [ %add, %for.body ]
+  ret i16 %r.0.lcssa
+}
+
+define zeroext i8 @mla_i8_i8(i8* nocapture readonly %x, i8* nocapture readonly %y, i32 %n) #0 {
+; CHECK-LABEL: @mla_i8_i8(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP10:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP10]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CHECK:       for.body.preheader:
+; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N]], 16
+; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N]], 16
+; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]]
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <16 x i8> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[X:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i32 0
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i8* [[TMP2]] to <16 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <16 x i8>, <16 x i8>* [[TMP3]], align 1
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[Y:%.*]], i32 [[TMP0]]
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i8, i8* [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i8* [[TMP5]] to <16 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP6]], align 1
+; CHECK-NEXT:    [[TMP7:%.*]] = mul <16 x i8> [[WIDE_LOAD1]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP8]] = add <16 x i8> [[TMP7]], [[VEC_PHI]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !24
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP10:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v16i8(<16 x i8> [[TMP8]])
+; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]]
+; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i8 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[I_012:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[R_011:%.*]] = phi i8 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
+; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, i8* [[X]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP11:%.*]] = load i8, i8* [[ARRAYIDX]], align 1
+; CHECK-NEXT:    [[ARRAYIDX1:%.*]] = getelementptr inbounds i8, i8* [[Y]], i32 [[I_012]]
+; CHECK-NEXT:    [[TMP12:%.*]] = load i8, i8* [[ARRAYIDX1]], align 1
+; CHECK-NEXT:    [[MUL:%.*]] = mul i8 [[TMP12]], [[TMP11]]
+; CHECK-NEXT:    [[ADD]] = add i8 [[MUL]], [[R_011]]
+; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_012]], 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]]
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !25
+; CHECK:       for.cond.cleanup.loopexit:
+; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i8 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
+; CHECK:       for.cond.cleanup:
+; CHECK-NEXT:    [[R_0_LCSSA:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
+; CHECK-NEXT:    ret i8 [[R_0_LCSSA]]
+;
+entry:
+  %cmp10 = icmp sgt i32 %n, 0
+  br i1 %cmp10, label %for.body, label %for.cond.cleanup
+
+for.body:                                         ; preds = %entry, %for.body
+  %i.012 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+  %r.011 = phi i8 [ %add, %for.body ], [ 0, %entry ]
+  %arrayidx = getelementptr inbounds i8, i8* %x, i32 %i.012
+  %0 = load i8, i8* %arrayidx, align 1
+  %arrayidx1 = getelementptr inbounds i8, i8* %y, i32 %i.012
+  %1 = load i8, i8* %arrayidx1, align 1
+  %mul = mul i8 %1, %0
+  %add = add i8 %mul, %r.011
+  %inc = add nuw nsw i32 %i.012, 1
+  %exitcond = icmp eq i32 %inc, %n
+  br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:                                 ; preds = %for.body, %entry
+  %r.0.lcssa = phi i8 [ 0, %entry ], [ %add, %for.body ]
+  ret i8 %r.0.lcssa
+}
+
+attributes #0 = { "target-features"="+mve" }
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
new file mode 100644
index 0000000000000..013e08de44f08
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll
@@ -0,0 +1,68 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s  -loop-vectorize -force-vector-interleave=4 -force-vector-width=4 -force-reduction-intrinsics -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+; CHECK-LABEL: @reduction_sum_single(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD4:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 8
+; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD5:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 12
+; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD6:%.*]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
+; CHECK-NEXT:    [[TMP8]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP9]] = add <4 x i32> [[VEC_PHI1]], [[WIDE_LOAD4]]
+; CHECK-NEXT:    [[TMP10]] = add <4 x i32> [[VEC_PHI2]], [[WIDE_LOAD5]]
+; CHECK-NEXT:    [[TMP11]] = add <4 x i32> [[VEC_PHI3]], [[WIDE_LOAD6]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 16
+; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP9]], [[TMP8]]
+; CHECK-NEXT:    [[BIN_RDX7:%.*]] = add <4 x i32> [[TMP10]], [[BIN_RDX]]
+; CHECK-NEXT:    [[BIN_RDX8:%.*]] = add <4 x i32> [[TMP11]], [[BIN_RDX7]]
+; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX8]])
+; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
+; CHECK:       .lr.ph:
+; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !2
+; CHECK:       ._crit_edge:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
+;
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
+  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %l3 = load i32, i32* %l2, align 4
+  %l7 = add i32 %sum.02, %l3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph
+  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
diff --git a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
index 0886b8eca2ef3..a6747fac8055e 100644
--- a/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
+++ b/llvm/test/Transforms/LoopVectorize/reduction-inloop.ll
@@ -3,1020 +3,785 @@
 
 target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
 
-define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_sum_single(i32* noalias nocapture %A) {
+; CHECK-LABEL: @reduction_sum_single(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
+; CHECK:       .lr.ph:
+; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !2
+; CHECK:       ._crit_edge:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
+;
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 0, %entry ]
+  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %l3 = load i32, i32* %l2, align 4
+  %l7 = add i32 %sum.02, %l3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph
+  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
+
+define i32 @reduction_sum(i32* noalias nocapture %A, i32* noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]]
-; CHECK:       .lr.ph.preheader:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
-; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP11]] = add <4 x i32> [[TMP10]], [[WIDE_LOAD1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP6]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
+; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
-; CHECK-NEXT:    [[TMP18:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[TMP19:%.*]] = add i32 [[SUM_02]], [[TMP18]]
-; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[TMP19]], [[TMP15]]
-; CHECK-NEXT:    [[TMP21]] = add i32 [[TMP20]], [[TMP17]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !2
-; CHECK:       ._crit_edge.loopexit:
-; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[DOT_CRIT_EDGE]]
+; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !5
 ; CHECK:       ._crit_edge:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ;
-  %1 = icmp sgt i32 %n, 0
-  br i1 %1, label %.lr.ph, label %._crit_edge
-
-.lr.ph:                                           ; preds = %0, %.lr.ph
-  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
-  %6 = trunc i64 %indvars.iv to i32
-  %7 = add i32 %sum.02, %6
-  %8 = add i32 %7, %3
-  %9 = add i32 %8, %5
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
+  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %l3 = load i32, i32* %l2, align 4
+  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %l5 = load i32, i32* %l4, align 4
+  %l6 = trunc i64 %indvars.iv to i32
+  %l7 = add i32 %sum.02, %l6
+  %l8 = add i32 %l7, %l3
+  %l9 = add i32 %l8, %l5
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %._crit_edge, label %.lr.ph
 
-._crit_edge:                                      ; preds = %.lr.ph, %0
-  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+._crit_edge:                                      ; preds = %.lr.ph
+  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_sum_const(i32* noalias nocapture %A) {
+; CHECK-LABEL: @reduction_sum_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = add <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP3]] = add <4 x i32> [[TMP2]], <i32 3, i32 3, i32 3, i32 3>
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP3]])
+; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
+; CHECK:       .lr.ph:
+; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !7
+; CHECK:       ._crit_edge:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
+;
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
+  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %l3 = load i32, i32* %l2, align 4
+  %l7 = add i32 %sum.02, %l3
+  %l9 = add i32 %l7, 3
+  %indvars.iv.next = add i64 %indvars.iv, 1
+  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph
+  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
+  ret i32 %sum.0.lcssa
+}
+
+define i32 @reduction_prod(i32* noalias nocapture %A, i32* noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_prod(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]]
-; CHECK:       .lr.ph.preheader:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
-; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[VEC_PHI]], [[VEC_IND2]]
-; CHECK-NEXT:    [[TMP10:%.*]] = mul <4 x i32> [[TMP9]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP11]] = mul <4 x i32> [[TMP10]], [[WIDE_LOAD1]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], [[VEC_IND2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = mul <4 x i32> [[TMP4]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP6]] = mul <4 x i32> [[TMP5]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP11]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP6]])
+; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 1, [[DOTLR_PH_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[PROD_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
-; CHECK-NEXT:    [[TMP18:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[TMP19:%.*]] = mul i32 [[PROD_02]], [[TMP18]]
-; CHECK-NEXT:    [[TMP20:%.*]] = mul i32 [[TMP19]], [[TMP15]]
-; CHECK-NEXT:    [[TMP21]] = mul i32 [[TMP20]], [[TMP17]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !5
-; CHECK:       ._crit_edge.loopexit:
-; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[DOT_CRIT_EDGE]]
+; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !9
 ; CHECK:       ._crit_edge:
-; CHECK-NEXT:    [[PROD_0_LCSSA:%.*]] = phi i32 [ 1, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-NEXT:    [[PROD_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[PROD_0_LCSSA]]
 ;
-  %1 = icmp sgt i32 %n, 0
-  br i1 %1, label %.lr.ph, label %._crit_edge
-
-.lr.ph:                                           ; preds = %0, %.lr.ph
-  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
-  %6 = trunc i64 %indvars.iv to i32
-  %7 = mul i32 %prod.02, %6
-  %8 = mul i32 %7, %3
-  %9 = mul i32 %8, %5
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %prod.02 = phi i32 [ %l9, %.lr.ph ], [ 1, %entry ]
+  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %l3 = load i32, i32* %l2, align 4
+  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %l5 = load i32, i32* %l4, align 4
+  %l6 = trunc i64 %indvars.iv to i32
+  %l7 = mul i32 %prod.02, %l6
+  %l8 = mul i32 %l7, %l3
+  %l9 = mul i32 %l8, %l5
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %._crit_edge, label %.lr.ph
 
-._crit_edge:                                      ; preds = %.lr.ph, %0
-  %prod.0.lcssa = phi i32 [ 1, %0 ], [ %9, %.lr.ph ]
+._crit_edge:                                      ; preds = %.lr.ph
+  %prod.0.lcssa = phi i32 [ %l9, %.lr.ph ]
   ret i32 %prod.0.lcssa
 }
 
-define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_mix(i32* noalias nocapture %A, i32* noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_mix(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]]
-; CHECK:       .lr.ph.preheader:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
-; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]]
-; CHECK-NEXT:    [[TMP11]] = add <4 x i32> [[TMP10]], [[TMP9]]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]]
+; CHECK-NEXT:    [[TMP6]] = add <4 x i32> [[TMP5]], [[TMP4]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
+; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
-; CHECK-NEXT:    [[TMP18:%.*]] = mul nsw i32 [[TMP17]], [[TMP15]]
-; CHECK-NEXT:    [[TMP19:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[SUM_02]], [[TMP19]]
-; CHECK-NEXT:    [[TMP21]] = add i32 [[TMP20]], [[TMP18]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !7
-; CHECK:       ._crit_edge.loopexit:
-; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[DOT_CRIT_EDGE]]
+; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !11
 ; CHECK:       ._crit_edge:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ;
-  %1 = icmp sgt i32 %n, 0
-  br i1 %1, label %.lr.ph, label %._crit_edge
-
-.lr.ph:                                           ; preds = %0, %.lr.ph
-  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
-  %6 = mul nsw i32 %5, %3
-  %7 = trunc i64 %indvars.iv to i32
-  %8 = add i32 %sum.02, %7
-  %9 = add i32 %8, %6
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
+  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %l3 = load i32, i32* %l2, align 4
+  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %l5 = load i32, i32* %l4, align 4
+  %l6 = mul nsw i32 %l5, %l3
+  %l7 = trunc i64 %indvars.iv to i32
+  %l8 = add i32 %sum.02, %l7
+  %l9 = add i32 %l8, %l6
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %._crit_edge, label %.lr.ph
 
-._crit_edge:                                      ; preds = %.lr.ph, %0
-  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+._crit_edge:                                      ; preds = %.lr.ph
+  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_mul(i32* noalias nocapture %A, i32* noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_mul(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]]
-; CHECK:       .lr.ph.preheader:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
-; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 19, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP10]] = mul <4 x i32> [[TMP9]], [[WIDE_LOAD1]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 19, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5]] = mul <4 x i32> [[TMP4]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP10]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 19, [[DOTLR_PH_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[TMP18:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4
-; CHECK-NEXT:    [[TMP17:%.*]] = mul i32 [[SUM_02]], [[TMP14]]
-; CHECK-NEXT:    [[TMP18]] = mul i32 [[TMP17]], [[TMP16]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !9
-; CHECK:       ._crit_edge.loopexit:
-; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP18]], [[DOTLR_PH]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[DOT_CRIT_EDGE]]
+; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !13
 ; CHECK:       ._crit_edge:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ;
-  %1 = icmp sgt i32 %n, 0
-  br i1 %1, label %.lr.ph, label %._crit_edge
-
-.lr.ph:                                           ; preds = %0, %.lr.ph
-  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %sum.02 = phi i32 [ %7, %.lr.ph ], [ 19, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
-  %6 = mul i32 %sum.02, %3
-  %7 = mul i32 %6, %5
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %l7, %.lr.ph ], [ 19, %entry ]
+  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %l3 = load i32, i32* %l2, align 4
+  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %l5 = load i32, i32* %l4, align 4
+  %l6 = mul i32 %sum.02, %l3
+  %l7 = mul i32 %l6, %l5
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %._crit_edge, label %.lr.ph
 
-._crit_edge:                                      ; preds = %.lr.ph, %0
-  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %7, %.lr.ph ]
+._crit_edge:                                      ; preds = %.lr.ph
+  %sum.0.lcssa = phi i32 [ %l7, %.lr.ph ]
   ret i32 %sum.0.lcssa
 }
 
-define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
+define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out) {
 ; CHECK-LABEL: @start_at_non_zero(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 120, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[COEFF:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP8]] = add <4 x i32> [[TMP7]], [[VEC_PHI]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 120, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[COEFF:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5]] = add <4 x i32> [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 120, [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[SUM_09:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[COEFF]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]]
-; CHECK-NEXT:    [[ADD]] = add nsw i32 [[MUL]], [[SUM_09]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !11
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !15
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 120, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ;
 entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body, label %for.end
+  br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %in, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %l0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %coeff, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
-  %mul = mul nsw i32 %1, %0
+  %l1 = load i32, i32* %arrayidx2, align 4
+  %mul = mul nsw i32 %l1, %l0
   %add = add nsw i32 %mul, %sum.09
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  %sum.0.lcssa = phi i32 [ 120, %entry ], [ %add, %for.body ]
+  %sum.0.lcssa = phi i32 [ %add, %for.body ]
   ret i32 %sum.0.lcssa
 }
 
-define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_and(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-LABEL: @reduction_and(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP8]] = and <4 x i32> [[TMP7]], [[WIDE_LOAD1]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = and <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5]] = and <4 x i32> [[TMP4]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP8]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ -1, [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[RESULT_08:%.*]] = phi i32 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = and i32 [[RESULT_08]], [[TMP11]]
-; CHECK-NEXT:    [[AND]] = and i32 [[ADD]], [[TMP12]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !13
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !17
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[AND_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
 ;
 entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body, label %for.end
+  br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %l0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
-  %add = and i32 %result.08, %0
-  %and = and i32 %add, %1
+  %l1 = load i32, i32* %arrayidx2, align 4
+  %add = and i32 %result.08, %l0
+  %and = and i32 %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ]
+  %result.0.lcssa = phi i32 [ %and, %for.body ]
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_or(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-LABEL: @reduction_or(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP8]] = or <4 x i32> [[TMP7]], [[VEC_PHI]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5]] = or <4 x i32> [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP8]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-; CHECK-NEXT:    [[OR]] = or i32 [[ADD]], [[RESULT_08]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !15
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !19
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OR_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
 ;
 entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body, label %for.end
+  br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %l0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
-  %add = add nsw i32 %1, %0
+  %l1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %l1, %l0
   %or = or i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ]
+  %result.0.lcssa = phi i32 [ %or, %for.body ]
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_xor(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-LABEL: @reduction_xor(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP8]] = xor <4 x i32> [[TMP7]], [[VEC_PHI]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5]] = xor <4 x i32> [[TMP4]], [[VEC_PHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP8]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP5]])
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[RESULT_08:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP11]]
-; CHECK-NEXT:    [[XOR]] = xor i32 [[ADD]], [[RESULT_08]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !17
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !21
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[XOR_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
 ;
 entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body, label %for.end
+  br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
+  %l0 = load i32, i32* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %1 = load i32, i32* %arrayidx2, align 4
-  %add = add nsw i32 %1, %0
+  %l1 = load i32, i32* %arrayidx2, align 4
+  %add = add nsw i32 %l1, %l0
   %xor = xor i32 %add, %result.08
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
+  %result.0.lcssa = phi i32 [ %xor, %for.body ]
   ret i32 %result.0.lcssa
 }
 
-define float @reduction_fadd(i32 %n, float* nocapture %A, float* nocapture %B) nounwind uwtable readonly {
+define float @reduction_fadd(float* nocapture %A, float* nocapture %B) {
 ; CHECK-LABEL: @reduction_fadd(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP8]] = fadd fast <4 x float> [[TMP7]], [[WIDE_LOAD1]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5]] = fadd fast <4 x float> [[TMP4]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !22
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP8]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP5]])
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[RESULT_08:%.*]] = phi float [ [[FADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load float, float* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = fadd fast float [[RESULT_08]], [[TMP11]]
-; CHECK-NEXT:    [[FADD]] = fadd fast float [[ADD]], [[TMP12]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !19
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[FADD_LCSSA:%.*]] = phi float [ [[FADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !23
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[FADD_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret float [[RESULT_0_LCSSA]]
 ;
 entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body, label %for.end
+  br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %l0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
-  %add = fadd fast float %result.08, %0
-  %fadd = fadd fast float %add, %1
+  %l1 = load float, float* %arrayidx2, align 4
+  %add = fadd fast float %result.08, %l0
+  %fadd = fadd fast float %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  %result.0.lcssa = phi float [ 0.0, %entry ], [ %fadd, %for.body ]
+  %result.0.lcssa = phi float [ %fadd, %for.body ]
   ret float %result.0.lcssa
 }
 
-define float @reduction_fmul(i32 %n, float* nocapture %A, float* nocapture %B) nounwind uwtable readonly {
+define float @reduction_fmul(float* nocapture %A, float* nocapture %B) {
 ; CHECK-LABEL: @reduction_fmul(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP8]] = fmul fast <4 x float> [[TMP7]], [[WIDE_LOAD1]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP5]] = fmul fast <4 x float> [[TMP4]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20
+; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !24
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP10:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP8]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP7:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP5]])
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[RESULT_08:%.*]] = phi float [ [[FMUL:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP12:%.*]] = load float, float* [[ARRAYIDX2]], align 4
-; CHECK-NEXT:    [[ADD:%.*]] = fmul fast float [[RESULT_08]], [[TMP11]]
-; CHECK-NEXT:    [[FMUL]] = fmul fast float [[ADD]], [[TMP12]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !21
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[FMUL_LCSSA:%.*]] = phi float [ [[FMUL]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !25
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[FMUL_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi float [ undef, [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret float [[RESULT_0_LCSSA]]
 ;
 entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body, label %for.end
+  br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ]
   %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %l0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
-  %add = fmul fast float %result.08, %0
-  %fmul = fmul fast float %add, %1
+  %l1 = load float, float* %arrayidx2, align 4
+  %add = fmul fast float %result.08, %l0
+  %fmul = fmul fast float %add, %l1
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  %result.0.lcssa = phi float [ 0.0, %entry ], [ %fmul, %for.body ]
+  %result.0.lcssa = phi float [ %fmul, %for.body ]
   ret float %result.0.lcssa
 }
 
-define i32 @reduction_min(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_min(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-LABEL: @reduction_min(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !22
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !26
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP6]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP3]])
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 1000, [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[C0:%.*]] = icmp slt i32 [[RESULT_08]], [[TMP9]]
-; CHECK-NEXT:    [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[TMP9]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !23
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[V0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !27
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[V0_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
 ;
 entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body, label %for.end
+  br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %c0 = icmp slt i32 %result.08, %0
-  %v0 = select i1 %c0, i32 %result.08, i32 %0
+  %l0 = load i32, i32* %arrayidx, align 4
+  %c0 = icmp slt i32 %result.08, %l0
+  %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  %result.0.lcssa = phi i32 [ 0, %entry ], [ %v0, %for.body ]
+  %result.0.lcssa = phi i32 [ %v0, %for.body ]
   ret i32 %result.0.lcssa
 }
 
-define i32 @reduction_max(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+define i32 @reduction_max(i32* nocapture %A, i32* nocapture %B) {
 ; CHECK-LABEL: @reduction_max(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP3]] = select <4 x i1> [[TMP2]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !24
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !28
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP6]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP3]])
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 1000, [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[C0:%.*]] = icmp ugt i32 [[RESULT_08]], [[TMP9]]
-; CHECK-NEXT:    [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[TMP9]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !25
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[V0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !29
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[V0_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[RESULT_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[RESULT_0_LCSSA]]
 ;
 entry:
-  %cmp7 = icmp sgt i32 %n, 0
-  br i1 %cmp7, label %for.body, label %for.end
+  br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %c0 = icmp ugt i32 %result.08, %0
-  %v0 = select i1 %c0, i32 %result.08, i32 %0
+  %l0 = load i32, i32* %arrayidx, align 4
+  %c0 = icmp ugt i32 %result.08, %l0
+  %v0 = select i1 %c0, i32 %result.08, i32 %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  %result.0.lcssa = phi i32 [ 0, %entry ], [ %v0, %for.body ]
+  %result.0.lcssa = phi i32 [ %v0, %for.body ]
   ret i32 %result.0.lcssa
 }
 
 ; Sub we can create a reduction, but not inloop
-define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly {
+define i32 @reduction_sub_lhs(i32* noalias nocapture %A) {
 ; CHECK-LABEL: @reduction_sub_lhs(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
-; CHECK:       for.body.preheader:
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
-; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
-; CHECK-NEXT:    [[TMP5]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !26
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !30
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
+; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
 ; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[X_05:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[SUB]] = sub nsw i32 [[X_05]], [[TMP8]]
-; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
-; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !27
-; CHECK:       for.end.loopexit:
-; CHECK-NEXT:    [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[FOR_END]]
+; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop !31
 ; CHECK:       for.end:
-; CHECK-NEXT:    [[X_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SUB_LCSSA]], [[FOR_END_LOOPEXIT]] ]
+; CHECK-NEXT:    [[X_0_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[X_0_LCSSA]]
 ;
 entry:
-  %cmp4 = icmp sgt i32 %n, 0
-  br i1 %cmp4, label %for.body, label %for.end
+  br label %for.body
 
 for.body:                                         ; preds = %entry, %for.body
   %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
   %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ]
   %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %0 = load i32, i32* %arrayidx, align 4
-  %sub = sub nsw i32 %x.05, %0
+  %l0 = load i32, i32* %arrayidx, align 4
+  %sub = sub nsw i32 %x.05, %l0
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %for.end, label %for.body
 
 for.end:                                          ; preds = %for.body, %entry
-  %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ]
+  %x.0.lcssa = phi i32 [ %sub, %for.body ]
   ret i32 %x.0.lcssa
 }
 
@@ -1051,7 +816,7 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
 ; CHECK-NEXT:    [[PREDPHI3]] = select <4 x i1> [[TMP13]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128
-; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !28
+; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !32
 ; CHECK:       middle.block:
 ; CHECK-NEXT:    [[TMP15:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]])
 ; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
@@ -1068,7 +833,7 @@ define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) {
 ; CHECK:       if.then16:
 ; CHECK-NEXT:    br label [[FOR_INC]]
 ; CHECK:       for.inc:
-; CHECK-NEXT:    br i1 undef, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop !29
+; CHECK-NEXT:    br i1 undef, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop !33
 ; CHECK:       for.end:
 ; CHECK-NEXT:    [[SUM_1_LCSSA:%.*]] = phi float [ undef, [[FOR_INC]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret float [[SUM_1_LCSSA]]
@@ -1080,26 +845,26 @@ for.body:
   %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
   %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ]
   %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
-  %0 = load float, float* %arrayidx, align 4
+  %l0 = load float, float* %arrayidx, align 4
   %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv
-  %1 = load float, float* %arrayidx2, align 4
-  %cmp3 = fcmp ogt float %0, %1
+  %l1 = load float, float* %arrayidx2, align 4
+  %cmp3 = fcmp ogt float %l0, %l1
   br i1 %cmp3, label %if.then, label %for.inc
 
 if.then:
-  %cmp6 = fcmp ogt float %1, 1.000000e+00
+  %cmp6 = fcmp ogt float %l1, 1.000000e+00
   br i1 %cmp6, label %if.then8, label %if.else
 
 if.then8:
-  %add = fadd fast float %sum.033, %0
+  %add = fadd fast float %sum.033, %l0
   br label %for.inc
 
 if.else:
-  %cmp14 = fcmp ogt float %0, 2.000000e+00
+  %cmp14 = fcmp ogt float %l0, 2.000000e+00
   br i1 %cmp14, label %if.then16, label %for.inc
 
 if.then16:
-  %add19 = fadd fast float %sum.033, %1
+  %add19 = fadd fast float %sum.033, %l1
   br label %for.inc
 
 for.inc:
@@ -1114,245 +879,211 @@ for.end:
   ret float %sum.1.lcssa
 }
 
-define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) {
+define i32 @reduction_sum_multiuse(i32* noalias nocapture %A, i32* noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_sum_multiuse(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[END:%.*]]
-; CHECK:       .lr.ph.preheader:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
-; CHECK-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3
-; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
-; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[WIDE_LOAD]]
-; CHECK-NEXT:    [[TMP11]] = add <4 x i32> [[TMP10]], [[WIDE_LOAD1]]
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !30
-; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]])
-; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[CMP_N]], label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
-; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ]
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
-; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
-; CHECK-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4
-; CHECK-NEXT:    [[TMP18:%.*]] = trunc i64 [[INDVARS_IV]] to i32
-; CHECK-NEXT:    [[TMP19:%.*]] = add i32 [[SUM_02]], [[TMP18]]
-; CHECK-NEXT:    [[TMP20:%.*]] = add i32 [[TMP19]], [[TMP15]]
-; CHECK-NEXT:    [[TMP21]] = add i32 [[TMP20]], [[TMP17]]
+; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ 0, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[SUM_02:%.*]] = phi i32 [ [[L10:%.*]], [[DOTLR_PH]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[L2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDVARS_IV]]
+; CHECK-NEXT:    [[L3:%.*]] = load i32, i32* [[L2]], align 4
+; CHECK-NEXT:    [[L6:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT:    [[L7:%.*]] = add i32 [[SUM_02]], [[L6]]
+; CHECK-NEXT:    [[L8:%.*]] = add i32 [[L7]], [[L3]]
+; CHECK-NEXT:    [[L10]] = add i32 [[L8]], [[SUM_02]]
 ; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
 ; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
-; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
-; CHECK-NEXT:    br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !31
-; CHECK:       ._crit_edge:
-; CHECK-NEXT:    [[SUM_LCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    [[SUM_COPY:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[END]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], 256
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[END:%.*]], label [[DOTLR_PH]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[F1:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[SUM_LCSSA]], [[DOT_CRIT_EDGE]] ]
-; CHECK-NEXT:    [[F2:%.*]] = phi i32 [ 0, [[TMP0]] ], [ [[SUM_COPY]], [[DOT_CRIT_EDGE]] ]
-; CHECK-NEXT:    [[FINAL:%.*]] = add i32 [[F1]], [[F2]]
-; CHECK-NEXT:    ret i32 [[FINAL]]
+; CHECK-NEXT:    ret i32 [[L10]]
 ;
-  %1 = icmp sgt i32 %n, 0
-  br i1 %1, label %.lr.ph.preheader, label %end
-.lr.ph.preheader:                                 ; preds = %0
+entry:
   br label %.lr.ph
 
-.lr.ph:                                           ; preds = %0, %.lr.ph
-  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ]
-  %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
-  %6 = trunc i64 %indvars.iv to i32
-  %7 = add i32 %sum.02, %6
-  %8 = add i32 %7, %3
-  %9 = add i32 %8, %5
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %l10, %.lr.ph ], [ 0, %entry ]
+  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %l3 = load i32, i32* %l2, align 4
+  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %l5 = load i32, i32* %l4, align 4
+  %l6 = trunc i64 %indvars.iv to i32
+  %l7 = add i32 %sum.02, %l6
+  %l8 = add i32 %l7, %l3
+  %l9 = add i32 %l8, %l5
+  %l10 = add i32 %l8, %sum.02
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
-  br i1 %exitcond, label %._crit_edge, label %.lr.ph
-
-._crit_edge:                                      ; preds = %.lr.ph, %0
-  %sum.lcssa = phi i32 [ %9, %.lr.ph ]
-  %sum.copy = phi i32 [ %9, %.lr.ph ]
-  br label %end
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
+  br i1 %exitcond, label %end, label %.lr.ph
 
 end:
-  %f1 = phi i32 [ 0, %0 ], [ %sum.lcssa, %._crit_edge ]
-  %f2 = phi i32 [ 0, %0 ], [ %sum.copy, %._crit_edge ]
-  %final = add i32 %f1, %f2
-  ret i32 %final
+  %f1 = phi i32 [ %l10, %.lr.ph ]
+  ret i32 %f1
 }
 
 ; Predicated loop, cannot (yet) use in-loop reductions.
-define i32 @reduction_predicated(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+define i32 @reduction_predicated(i32* noalias nocapture %A, i32* noalias nocapture %B) {
 ; CHECK-LABEL: @reduction_predicated(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0
-; CHECK-NEXT:    br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]]
-; CHECK:       .lr.ph.preheader:
-; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[N]], -1
-; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
 ; CHECK:       vector.ph:
-; CHECK-NEXT:    [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4
-; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP3]], i32 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ]
-; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[PRED_LOAD_CONTINUE14]] ]
-; CHECK-NEXT:    [[VEC_IND15:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[PRED_LOAD_CONTINUE14]] ]
-; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 1
-; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP7:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
-; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]]
-; CHECK:       pred.load.if:
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4
-; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> undef, i32 [[TMP10]], i32 0
-; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE]]
-; CHECK:       pred.load.continue:
-; CHECK-NEXT:    [[TMP12:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_LOAD_IF]] ]
-; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
-; CHECK-NEXT:    br i1 [[TMP13]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]]
-; CHECK:       pred.load.if1:
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4
-; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i32 1
-; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE2]]
-; CHECK:       pred.load.continue2:
-; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], [[PRED_LOAD_IF1]] ]
-; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
-; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]]
-; CHECK:       pred.load.if3:
-; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4
-; CHECK-NEXT:    [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 2
-; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE4]]
-; CHECK:       pred.load.continue4:
-; CHECK-NEXT:    [[TMP22:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], [[PRED_LOAD_IF3]] ]
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
-; CHECK-NEXT:    br i1 [[TMP23]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]]
-; CHECK:       pred.load.if5:
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4
-; CHECK-NEXT:    [[TMP26:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP25]], i32 3
-; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE6]]
-; CHECK:       pred.load.continue6:
-; CHECK-NEXT:    [[TMP27:%.*]] = phi <4 x i32> [ [[TMP22]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ]
-; CHECK-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0
-; CHECK-NEXT:    br i1 [[TMP28]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]]
-; CHECK:       pred.load.if7:
-; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
-; CHECK-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4
-; CHECK-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> undef, i32 [[TMP30]], i32 0
-; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE8]]
-; CHECK:       pred.load.continue8:
-; CHECK-NEXT:    [[TMP32:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP31]], [[PRED_LOAD_IF7]] ]
-; CHECK-NEXT:    [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1
-; CHECK-NEXT:    br i1 [[TMP33]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]]
-; CHECK:       pred.load.if9:
-; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
-; CHECK-NEXT:    [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4
-; CHECK-NEXT:    [[TMP36:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP35]], i32 1
-; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE10]]
-; CHECK:       pred.load.continue10:
-; CHECK-NEXT:    [[TMP37:%.*]] = phi <4 x i32> [ [[TMP32]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ]
-; CHECK-NEXT:    [[TMP38:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2
-; CHECK-NEXT:    br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]]
-; CHECK:       pred.load.if11:
-; CHECK-NEXT:    [[TMP39:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4
-; CHECK-NEXT:    [[TMP41:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP40]], i32 2
-; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE12]]
-; CHECK:       pred.load.continue12:
-; CHECK-NEXT:    [[TMP42:%.*]] = phi <4 x i32> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP41]], [[PRED_LOAD_IF11]] ]
-; CHECK-NEXT:    [[TMP43:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3
-; CHECK-NEXT:    br i1 [[TMP43]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]]
-; CHECK:       pred.load.if13:
-; CHECK-NEXT:    [[TMP44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP6]]
-; CHECK-NEXT:    [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4
-; CHECK-NEXT:    [[TMP46:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP45]], i32 3
-; CHECK-NEXT:    br label [[PRED_LOAD_CONTINUE14]]
-; CHECK:       pred.load.continue14:
-; CHECK-NEXT:    [[TMP47:%.*]] = phi <4 x i32> [ [[TMP42]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP46]], [[PRED_LOAD_IF13]] ]
-; CHECK-NEXT:    [[TMP48:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND15]]
-; CHECK-NEXT:    [[TMP49:%.*]] = add <4 x i32> [[TMP48]], [[TMP27]]
-; CHECK-NEXT:    [[TMP50]] = add <4 x i32> [[TMP49]], [[TMP47]]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
+; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]]
+; CHECK-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP4]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[TMP6]] = add <4 x i32> [[TMP5]], [[WIDE_LOAD1]]
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
-; CHECK-NEXT:    [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], <i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT:    br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !32
+; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !34
 ; CHECK:       middle.block:
-; CHECK-NEXT:    [[TMP52:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP50]], <4 x i32> [[VEC_PHI]]
-; CHECK-NEXT:    [[TMP53:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP52]])
-; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]]
+; CHECK-NEXT:    [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP6]])
+; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
 ; CHECK:       scalar.ph:
 ; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
 ; CHECK:       .lr.ph:
-; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !33
-; CHECK:       ._crit_edge.loopexit:
-; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP53]], [[MIDDLE_BLOCK]] ]
-; CHECK-NEXT:    br label [[DOT_CRIT_EDGE]]
+; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !35
 ; CHECK:       ._crit_edge:
-; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ]
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
 ; CHECK-NEXT:    ret i32 [[SUM_0_LCSSA]]
 ;
-  %1 = icmp sgt i32 %n, 0
-  br i1 %1, label %.lr.ph, label %._crit_edge
-
-.lr.ph:                                           ; preds = %0, %.lr.ph
-  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
-  %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
-  %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
-  %3 = load i32, i32* %2, align 4
-  %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
-  %5 = load i32, i32* %4, align 4
-  %6 = trunc i64 %indvars.iv to i32
-  %7 = add i32 %sum.02, %6
-  %8 = add i32 %7, %3
-  %9 = add i32 %8, %5
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02 = phi i32 [ %l9, %.lr.ph ], [ 0, %entry ]
+  %l2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+  %l3 = load i32, i32* %l2, align 4
+  %l4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+  %l5 = load i32, i32* %l4, align 4
+  %l6 = trunc i64 %indvars.iv to i32
+  %l7 = add i32 %sum.02, %l6
+  %l8 = add i32 %l7, %l3
+  %l9 = add i32 %l8, %l5
   %indvars.iv.next = add i64 %indvars.iv, 1
   %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-  %exitcond = icmp eq i32 %lftr.wideiv, %n
+  %exitcond = icmp eq i32 %lftr.wideiv, 256
   br i1 %exitcond, label %._crit_edge, label %.lr.ph, !llvm.loop !6
 
-._crit_edge:                                      ; preds = %.lr.ph, %0
-  %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+._crit_edge:                                      ; preds = %.lr.ph
+  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
   ret i32 %sum.0.lcssa
 }
 
+define i8 @reduction_add_trunc(i8* noalias nocapture %A) {
+; CHECK-LABEL: @reduction_add_trunc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i8> [ <i8 -1, i8 0, i8 0, i8 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3]] = add <4 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !36
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.experimental.vector.reduce.add.v4i8(<4 x i8> [[TMP3]])
+; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
+; CHECK:       .lr.ph:
+; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !37
+; CHECK:       ._crit_edge:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i8 [ undef, [[DOTLR_PH]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    ret i8 [[SUM_0_LCSSA]]
+;
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
+  %sum.02 = and i32 %sum.02p, 255
+  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
+  %l3 = load i8, i8* %l2, align 4
+  %l3e = zext i8 %l3 to i32
+  %l9 = add i32 %sum.02, %l3e
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 256
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph
+  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
+  %ret = trunc i32 %sum.0.lcssa to i8
+  ret i8 %ret
+}
+
+
+define i8 @reduction_and_trunc(i8* noalias nocapture %A) {
+; CHECK-LABEL: @reduction_and_trunc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 255, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[A:%.*]], i64 [[TMP0]]
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <4 x i8>*
+; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i8>, <4 x i8>* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = zext <4 x i8> [[WIDE_LOAD]] to <4 x i32>
+; CHECK-NEXT:    [[TMP4]] = and <4 x i32> [[VEC_PHI]], [[TMP3]]
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
+; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
+; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !38
+; CHECK:       middle.block:
+; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP4]])
+; CHECK-NEXT:    br i1 true, label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    br label [[DOTLR_PH:%.*]]
+; CHECK:       .lr.ph:
+; CHECK-NEXT:    br i1 undef, label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !39
+; CHECK:       ._crit_edge:
+; CHECK-NEXT:    [[SUM_0_LCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
+; CHECK-NEXT:    [[RET:%.*]] = trunc i32 [[SUM_0_LCSSA]] to i8
+; CHECK-NEXT:    ret i8 [[RET]]
+;
+entry:
+  br label %.lr.ph
+
+.lr.ph:                                           ; preds = %entry, %.lr.ph
+  %indvars.iv = phi i32 [ %indvars.iv.next, %.lr.ph ], [ 0, %entry ]
+  %sum.02p = phi i32 [ %l9, %.lr.ph ], [ 255, %entry ]
+  %sum.02 = and i32 %sum.02p, 255
+  %l2 = getelementptr inbounds i8, i8* %A, i32 %indvars.iv
+  %l3 = load i8, i8* %l2, align 4
+  %l3e = zext i8 %l3 to i32
+  %l9 = and i32 %sum.02, %l3e
+  %indvars.iv.next = add i32 %indvars.iv, 1
+  %exitcond = icmp eq i32 %indvars.iv.next, 256
+  br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge:                                      ; preds = %.lr.ph
+  %sum.0.lcssa = phi i32 [ %l9, %.lr.ph ]
+  %ret = trunc i32 %sum.0.lcssa to i8
+  ret i8 %ret
+}
+
 !6 = distinct !{!6, !7, !8}
 !7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true}
 !8 = !{!"llvm.loop.vectorize.enable", i1 true}

From 918f3fc2c7cf8116b04bfc00125af53214671fc6 Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sat, 18 Jul 2020 10:08:17 -0400
Subject: [PATCH 714/771] AMDGPU/GlobalISel: Fix test copy paste error

---
 .../AMDGPU/GlobalISel/legalize-fmaxnum.mir    | 322 +++++++++---------
 1 file changed, 161 insertions(+), 161 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
index f6456cd57f01e..8b9b0e972e6f4 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-fmaxnum.mir
@@ -4,7 +4,7 @@
 # RUN: llc -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -run-pass=legalizer %s -o - | FileCheck -check-prefix=GFX9  %s
 
 ---
-name: test_fminnum_s32_ieee_mode_on
+name: test_fmaxnum_s32_ieee_mode_on
 machineFunctionInfo:
   mode:
     ieee: true
@@ -12,35 +12,35 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; SI-LABEL: name: test_fminnum_s32_ieee_mode_on
+    ; SI-LABEL: name: test_fmaxnum_s32_ieee_mode_on
     ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]]
     ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]]
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
-    ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
-    ; VI-LABEL: name: test_fminnum_s32_ieee_mode_on
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_on
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]]
     ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]]
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
-    ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
-    ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_on
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_on
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]]
     ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]]
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
-    ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
-    %2:_(s32) = G_FMINNUM %0, %1
+    %2:_(s32) = G_FMAXNUM %0, %1
     $vgpr0 = COPY %2
 ...
 
 ---
-name: test_fminnum_s32_ieee_mode_off
+name: test_fmaxnum_s32_ieee_mode_off
 machineFunctionInfo:
   mode:
     ieee: false
@@ -48,280 +48,280 @@ body: |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; SI-LABEL: name: test_fminnum_s32_ieee_mode_off
+    ; SI-LABEL: name: test_fmaxnum_s32_ieee_mode_off
     ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; SI: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]]
-    ; SI: $vgpr0 = COPY [[FMINNUM]](s32)
-    ; VI-LABEL: name: test_fminnum_s32_ieee_mode_off
+    ; SI: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]]
+    ; SI: $vgpr0 = COPY [[FMAXNUM]](s32)
+    ; VI-LABEL: name: test_fmaxnum_s32_ieee_mode_off
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; VI: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]]
-    ; VI: $vgpr0 = COPY [[FMINNUM]](s32)
-    ; GFX9-LABEL: name: test_fminnum_s32_ieee_mode_off
+    ; VI: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]]
+    ; VI: $vgpr0 = COPY [[FMAXNUM]](s32)
+    ; GFX9-LABEL: name: test_fmaxnum_s32_ieee_mode_off
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[FMINNUM:%[0-9]+]]:_(s32) = G_FMINNUM [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0 = COPY [[FMINNUM]](s32)
+    ; GFX9: [[FMAXNUM:%[0-9]+]]:_(s32) = G_FMAXNUM [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[FMAXNUM]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
-    %2:_(s32) = G_FMINNUM %0, %1
+    %2:_(s32) = G_FMAXNUM %0, %1
     $vgpr0 = COPY %2
 ...
 
 ---
-name: test_fminnum_s32_nnan
+name: test_fmaxnum_s32_nnan
 body: |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; SI-LABEL: name: test_fminnum_s32_nnan
+    ; SI-LABEL: name: test_fmaxnum_s32_nnan
     ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]]
-    ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
-    ; VI-LABEL: name: test_fminnum_s32_nnan
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+    ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; VI-LABEL: name: test_fmaxnum_s32_nnan
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]]
-    ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
-    ; GFX9-LABEL: name: test_fminnum_s32_nnan
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+    ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; GFX9-LABEL: name: test_fmaxnum_s32_nnan
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMINNUM_IEEE [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = nnan G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
-    %2:_(s32) = nnan G_FMINNUM %0, %1
+    %2:_(s32) = nnan G_FMAXNUM %0, %1
     $vgpr0 = COPY %2
 ...
 
 
 ---
-name: test_fminnum_s32_nnan_lhs
+name: test_fmaxnum_s32_nnan_lhs
 body: |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; SI-LABEL: name: test_fminnum_s32_nnan_lhs
+    ; SI-LABEL: name: test_fmaxnum_s32_nnan_lhs
     ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]]
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]]
-    ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
-    ; VI-LABEL: name: test_fminnum_s32_nnan_lhs
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]]
+    ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs
     ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]]
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]]
-    ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
-    ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]]
+    ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY1]]
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[FCANONICALIZE]]
-    ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[FCANONICALIZE]]
+    ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
     %0:_(s32) = nnan COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
-    %2:_(s32) = G_FMINNUM %0, %1
+    %2:_(s32) = G_FMAXNUM %0, %1
     $vgpr0 = COPY %2
 ...
 
 
 ---
-name: test_fminnum_s32_nnan_rhs
+name: test_fmaxnum_s32_nnan_rhs
 body: |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; SI-LABEL: name: test_fminnum_s32_nnan_rhs
+    ; SI-LABEL: name: test_fmaxnum_s32_nnan_rhs
     ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1
     ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]]
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
-    ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
-    ; VI-LABEL: name: test_fminnum_s32_nnan_rhs
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
+    ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; VI-LABEL: name: test_fmaxnum_s32_nnan_rhs
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1
     ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]]
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
-    ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
-    ; GFX9-LABEL: name: test_fminnum_s32_nnan_rhs
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
+    ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_rhs
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1
     ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[COPY]]
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
-    ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = nnan COPY $vgpr1
-    %2:_(s32) = G_FMINNUM %0, %1
+    %2:_(s32) = G_FMAXNUM %0, %1
     $vgpr0 = COPY %2
 ...
 
 ---
-name: test_fminnum_s32_nnan_lhs_rhs
+name: test_fmaxnum_s32_nnan_lhs_rhs
 body: |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; SI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs
+    ; SI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs
     ; SI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]]
-    ; SI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
-    ; VI-LABEL: name: test_fminnum_s32_nnan_lhs_rhs
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+    ; SI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; VI-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs
     ; VI: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]]
-    ; VI: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
-    ; GFX9-LABEL: name: test_fminnum_s32_nnan_lhs_rhs
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+    ; VI: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
+    ; GFX9-LABEL: name: test_fmaxnum_s32_nnan_lhs_rhs
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = nnan COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = nnan COPY $vgpr1
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[COPY]], [[COPY1]]
-    ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](s32)
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[COPY]], [[COPY1]]
+    ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](s32)
     %0:_(s32) = nnan COPY $vgpr0
     %1:_(s32) = nnan COPY $vgpr1
-    %2:_(s32) = G_FMINNUM %0, %1
+    %2:_(s32) = G_FMAXNUM %0, %1
     $vgpr0 = COPY %2
 ...
 
 ---
-name: test_fminnum_s64
+name: test_fmaxnum_s64
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; SI-LABEL: name: test_fminnum_s64
+    ; SI-LABEL: name: test_fmaxnum_s64
     ; SI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; SI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
     ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]]
     ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]]
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
-    ; SI: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64)
-    ; VI-LABEL: name: test_fminnum_s64
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; SI: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64)
+    ; VI-LABEL: name: test_fmaxnum_s64
     ; VI: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; VI: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
     ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]]
     ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]]
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
-    ; VI: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64)
-    ; GFX9-LABEL: name: test_fminnum_s64
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; VI: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64)
+    ; GFX9-LABEL: name: test_fmaxnum_s64
     ; GFX9: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3
     ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY]]
     ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s64) = G_FCANONICALIZE [[COPY1]]
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s64) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
-    ; GFX9: $vgpr0_vgpr1 = COPY [[FMINNUM_IEEE]](s64)
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s64) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; GFX9: $vgpr0_vgpr1 = COPY [[FMAXNUM_IEEE]](s64)
     %0:_(s64) = COPY $vgpr0_vgpr1
     %1:_(s64) = COPY $vgpr2_vgpr3
-    %2:_(s64) = G_FMINNUM %0, %1
+    %2:_(s64) = G_FMAXNUM %0, %1
     $vgpr0_vgpr1 = COPY %2
 ...
 
 ---
-name: test_fminnum_s16
+name: test_fmaxnum_s16
 body: |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; SI-LABEL: name: test_fminnum_s16
+    ; SI-LABEL: name: test_fmaxnum_s16
     ; SI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; SI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; SI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]]
-    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32)
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32)
     ; SI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FPTRUNC]](s16)
     ; SI: $vgpr0 = COPY [[ANYEXT]](s32)
-    ; VI-LABEL: name: test_fminnum_s16
+    ; VI-LABEL: name: test_fmaxnum_s16
     ; VI: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; VI: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; VI: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]]
     ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]]
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
-    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16)
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; VI: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16)
     ; VI: $vgpr0 = COPY [[ANYEXT]](s32)
-    ; GFX9-LABEL: name: test_fminnum_s16
+    ; GFX9-LABEL: name: test_fmaxnum_s16
     ; GFX9: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1
     ; GFX9: [[TRUNC:%[0-9]+]]:_(s16) = G_TRUNC [[COPY]](s32)
     ; GFX9: [[TRUNC1:%[0-9]+]]:_(s16) = G_TRUNC [[COPY1]](s32)
     ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]]
     ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]]
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
-    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMINNUM_IEEE]](s16)
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; GFX9: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[FMAXNUM_IEEE]](s16)
     ; GFX9: $vgpr0 = COPY [[ANYEXT]](s32)
     %0:_(s32) = COPY $vgpr0
     %1:_(s32) = COPY $vgpr1
     %2:_(s16) = G_TRUNC %0
     %3:_(s16) = G_TRUNC %1
-    %4:_(s16) = G_FMINNUM %2, %3
+    %4:_(s16) = G_FMAXNUM %2, %3
     %5:_(s32) = G_ANYEXT %4
     $vgpr0 = COPY %5
 ...
 
 ---
-name: test_fminnum_v2s32
+name: test_fmaxnum_v2s32
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; SI-LABEL: name: test_fminnum_v2s32
+    ; SI-LABEL: name: test_fmaxnum_v2s32
     ; SI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; SI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
     ; SI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
     ; SI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; SI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]]
     ; SI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]]
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; SI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]]
     ; SI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]]
-    ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
-    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32)
+    ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
+    ; SI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32)
     ; SI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
-    ; VI-LABEL: name: test_fminnum_v2s32
+    ; VI-LABEL: name: test_fmaxnum_v2s32
     ; VI: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; VI: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
     ; VI: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
     ; VI: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]]
     ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]]
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]]
     ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]]
-    ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
-    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32)
+    ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
+    ; VI: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32)
     ; VI: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
-    ; GFX9-LABEL: name: test_fminnum_v2s32
+    ; GFX9-LABEL: name: test_fmaxnum_v2s32
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s32>) = COPY $vgpr2_vgpr3
     ; GFX9: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY]](<2 x s32>)
     ; GFX9: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[COPY1]](<2 x s32>)
     ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV]]
     ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV2]]
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV1]]
     ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(s32) = G_FCANONICALIZE [[UV3]]
-    ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
-    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMINNUM_IEEE]](s32), [[FMINNUM_IEEE1]](s32)
+    ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
+    ; GFX9: [[BUILD_VECTOR:%[0-9]+]]:_(<2 x s32>) = G_BUILD_VECTOR [[FMAXNUM_IEEE]](s32), [[FMAXNUM_IEEE1]](s32)
     ; GFX9: $vgpr0_vgpr1 = COPY [[BUILD_VECTOR]](<2 x s32>)
     %0:_(<2 x s32>) = COPY $vgpr0_vgpr1
     %1:_(<2 x s32>) = COPY $vgpr2_vgpr3
-    %2:_(<2 x s32>) = G_FMINNUM %0, %1
+    %2:_(<2 x s32>) = G_FMAXNUM %0, %1
     $vgpr0_vgpr1 = COPY %2
 ...
 
 ---
-name: test_fminnum_v2s16
+name: test_fmaxnum_v2s16
 body: |
   bb.0:
     liveins: $vgpr0, $vgpr1
 
-    ; SI-LABEL: name: test_fminnum_v2s16
+    ; SI-LABEL: name: test_fmaxnum_v2s16
     ; SI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; SI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
     ; SI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
@@ -335,19 +335,19 @@ body: |
     ; SI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
     ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]]
-    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32)
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32)
     ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
-    ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]]
-    ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32)
+    ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32)
     ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
     ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
     ; SI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
     ; SI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; SI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
-    ; VI-LABEL: name: test_fminnum_v2s16
+    ; VI-LABEL: name: test_fmaxnum_v2s16
     ; VI: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; VI: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
     ; VI: [[BITCAST:%[0-9]+]]:_(s32) = G_BITCAST [[COPY]](<2 x s16>)
@@ -361,36 +361,36 @@ body: |
     ; VI: [[TRUNC3:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR1]](s32)
     ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]]
     ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]]
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]]
     ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]]
-    ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16)
+    ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16)
     ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
     ; VI: [[BITCAST2:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
     ; VI: $vgpr0 = COPY [[BITCAST2]](<2 x s16>)
-    ; GFX9-LABEL: name: test_fminnum_v2s16
+    ; GFX9-LABEL: name: test_fmaxnum_v2s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr0
     ; GFX9: [[COPY1:%[0-9]+]]:_(<2 x s16>) = COPY $vgpr1
     ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY]]
     ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[COPY1]]
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
-    ; GFX9: $vgpr0 = COPY [[FMINNUM_IEEE]](<2 x s16>)
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; GFX9: $vgpr0 = COPY [[FMAXNUM_IEEE]](<2 x s16>)
     %0:_(<2 x s16>) = COPY $vgpr0
     %1:_(<2 x s16>) = COPY $vgpr1
-    %2:_(<2 x s16>) = G_FMINNUM %0, %1
+    %2:_(<2 x s16>) = G_FMAXNUM %0, %1
     $vgpr0 = COPY %2
 ...
 
 ---
-name: test_fminnum_v3s16
+name: test_fmaxnum_v3s16
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; SI-LABEL: name: test_fminnum_v3s16
+    ; SI-LABEL: name: test_fmaxnum_v3s16
     ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
     ; SI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
@@ -417,16 +417,16 @@ body: |
     ; SI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
     ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]]
-    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32)
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32)
     ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
-    ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]]
-    ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32)
+    ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32)
     ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
     ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
-    ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]]
-    ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32)
+    ; SI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]]
+    ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32)
     ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
     ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
@@ -441,7 +441,7 @@ body: |
     ; SI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
     ; SI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT2]](<3 x s16>), 0
     ; SI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
-    ; VI-LABEL: name: test_fminnum_v3s16
+    ; VI-LABEL: name: test_fmaxnum_v3s16
     ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
     ; VI: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
@@ -468,19 +468,19 @@ body: |
     ; VI: [[LSHR3:%[0-9]+]]:_(s32) = G_LSHR [[BITCAST3]], [[C]](s32)
     ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]]
     ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]]
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]]
     ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]]
-    ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
+    ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]]
     ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]]
-    ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16)
+    ; VI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16)
     ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
     ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16)
+    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16)
     ; VI: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[C1]], [[C]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
@@ -489,7 +489,7 @@ body: |
     ; VI: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
     ; VI: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT2]](<3 x s16>), 0
     ; VI: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
-    ; GFX9-LABEL: name: test_fminnum_v3s16
+    ; GFX9-LABEL: name: test_fmaxnum_v3s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
     ; GFX9: [[EXTRACT:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[COPY]](<4 x s16>), 0
@@ -501,11 +501,11 @@ body: |
     ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[INSERT1]](<4 x s16>)
     ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]]
     ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]]
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]]
     ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]]
-    ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>)
+    ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x s16>), [[FMAXNUM_IEEE1]](<2 x s16>)
     ; GFX9: [[EXTRACT2:%[0-9]+]]:_(<3 x s16>) = G_EXTRACT [[CONCAT_VECTORS]](<4 x s16>), 0
     ; GFX9: [[INSERT2:%[0-9]+]]:_(<4 x s16>) = G_INSERT [[DEF]], [[EXTRACT2]](<3 x s16>), 0
     ; GFX9: $vgpr0_vgpr1 = COPY [[INSERT2]](<4 x s16>)
@@ -513,19 +513,19 @@ body: |
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
     %2:_(<3 x s16>) = G_EXTRACT %0, 0
     %3:_(<3 x s16>) = G_EXTRACT %1, 0
-    %4:_(<3 x s16>) = G_FMINNUM %2, %3
+    %4:_(<3 x s16>) = G_FMAXNUM %2, %3
     %5:_(<4 x s16>) = G_IMPLICIT_DEF
     %6:_(<4 x s16>) = G_INSERT %5, %4, 0
     $vgpr0_vgpr1 = COPY %6
 ...
 
 ---
-name: test_fminnum_v4s16
+name: test_fmaxnum_v4s16
 body: |
   bb.0:
     liveins: $vgpr0_vgpr1, $vgpr2_vgpr3
 
-    ; SI-LABEL: name: test_fminnum_v4s16
+    ; SI-LABEL: name: test_fmaxnum_v4s16
     ; SI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; SI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
     ; SI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
@@ -549,20 +549,20 @@ body: |
     ; SI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
     ; SI: [[FPEXT:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC]](s16)
     ; SI: [[FPEXT1:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC4]](s16)
-    ; SI: [[FMINNUM_IEEE:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT]], [[FPEXT1]]
-    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE]](s32)
+    ; SI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT]], [[FPEXT1]]
+    ; SI: [[FPTRUNC:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE]](s32)
     ; SI: [[FPEXT2:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC1]](s16)
     ; SI: [[FPEXT3:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC5]](s16)
-    ; SI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT2]], [[FPEXT3]]
-    ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE1]](s32)
+    ; SI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT2]], [[FPEXT3]]
+    ; SI: [[FPTRUNC1:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE1]](s32)
     ; SI: [[FPEXT4:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC2]](s16)
     ; SI: [[FPEXT5:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC6]](s16)
-    ; SI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT4]], [[FPEXT5]]
-    ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE2]](s32)
+    ; SI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT4]], [[FPEXT5]]
+    ; SI: [[FPTRUNC2:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE2]](s32)
     ; SI: [[FPEXT6:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC3]](s16)
     ; SI: [[FPEXT7:%[0-9]+]]:_(s32) = G_FPEXT [[TRUNC7]](s16)
-    ; SI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMINNUM_IEEE [[FPEXT6]], [[FPEXT7]]
-    ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMINNUM_IEEE3]](s32)
+    ; SI: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s32) = G_FMAXNUM_IEEE [[FPEXT6]], [[FPEXT7]]
+    ; SI: [[FPTRUNC3:%[0-9]+]]:_(s16) = G_FPTRUNC [[FMAXNUM_IEEE3]](s32)
     ; SI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC]](s16)
     ; SI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FPTRUNC1]](s16)
     ; SI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
@@ -575,7 +575,7 @@ body: |
     ; SI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
     ; SI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
     ; SI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
-    ; VI-LABEL: name: test_fminnum_v4s16
+    ; VI-LABEL: name: test_fmaxnum_v4s16
     ; VI: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; VI: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
     ; VI: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
@@ -599,43 +599,43 @@ body: |
     ; VI: [[TRUNC7:%[0-9]+]]:_(s16) = G_TRUNC [[LSHR3]](s32)
     ; VI: [[FCANONICALIZE:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC]]
     ; VI: [[FCANONICALIZE1:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC4]]
-    ; VI: [[FMINNUM_IEEE:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; VI: [[FMAXNUM_IEEE:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; VI: [[FCANONICALIZE2:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC1]]
     ; VI: [[FCANONICALIZE3:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC5]]
-    ; VI: [[FMINNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
+    ; VI: [[FMAXNUM_IEEE1:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
     ; VI: [[FCANONICALIZE4:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC2]]
     ; VI: [[FCANONICALIZE5:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC6]]
-    ; VI: [[FMINNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
+    ; VI: [[FMAXNUM_IEEE2:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE4]], [[FCANONICALIZE5]]
     ; VI: [[FCANONICALIZE6:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC3]]
     ; VI: [[FCANONICALIZE7:%[0-9]+]]:_(s16) = G_FCANONICALIZE [[TRUNC7]]
-    ; VI: [[FMINNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMINNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]]
-    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE]](s16)
-    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE1]](s16)
+    ; VI: [[FMAXNUM_IEEE3:%[0-9]+]]:_(s16) = G_FMAXNUM_IEEE [[FCANONICALIZE6]], [[FCANONICALIZE7]]
+    ; VI: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE]](s16)
+    ; VI: [[ZEXT1:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE1]](s16)
     ; VI: [[SHL:%[0-9]+]]:_(s32) = G_SHL [[ZEXT1]], [[C]](s32)
     ; VI: [[OR:%[0-9]+]]:_(s32) = G_OR [[ZEXT]], [[SHL]]
     ; VI: [[BITCAST4:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR]](s32)
-    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE2]](s16)
-    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMINNUM_IEEE3]](s16)
+    ; VI: [[ZEXT2:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE2]](s16)
+    ; VI: [[ZEXT3:%[0-9]+]]:_(s32) = G_ZEXT [[FMAXNUM_IEEE3]](s16)
     ; VI: [[SHL1:%[0-9]+]]:_(s32) = G_SHL [[ZEXT3]], [[C]](s32)
     ; VI: [[OR1:%[0-9]+]]:_(s32) = G_OR [[ZEXT2]], [[SHL1]]
     ; VI: [[BITCAST5:%[0-9]+]]:_(<2 x s16>) = G_BITCAST [[OR1]](s32)
     ; VI: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[BITCAST4]](<2 x s16>), [[BITCAST5]](<2 x s16>)
     ; VI: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
-    ; GFX9-LABEL: name: test_fminnum_v4s16
+    ; GFX9-LABEL: name: test_fmaxnum_v4s16
     ; GFX9: [[COPY:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr0_vgpr1
     ; GFX9: [[COPY1:%[0-9]+]]:_(<4 x s16>) = COPY $vgpr2_vgpr3
     ; GFX9: [[UV:%[0-9]+]]:_(<2 x s16>), [[UV1:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY]](<4 x s16>)
     ; GFX9: [[UV2:%[0-9]+]]:_(<2 x s16>), [[UV3:%[0-9]+]]:_(<2 x s16>) = G_UNMERGE_VALUES [[COPY1]](<4 x s16>)
     ; GFX9: [[FCANONICALIZE:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV]]
     ; GFX9: [[FCANONICALIZE1:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV2]]
-    ; GFX9: [[FMINNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
+    ; GFX9: [[FMAXNUM_IEEE:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE]], [[FCANONICALIZE1]]
     ; GFX9: [[FCANONICALIZE2:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV1]]
     ; GFX9: [[FCANONICALIZE3:%[0-9]+]]:_(<2 x s16>) = G_FCANONICALIZE [[UV3]]
-    ; GFX9: [[FMINNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMINNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
-    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMINNUM_IEEE]](<2 x s16>), [[FMINNUM_IEEE1]](<2 x s16>)
+    ; GFX9: [[FMAXNUM_IEEE1:%[0-9]+]]:_(<2 x s16>) = G_FMAXNUM_IEEE [[FCANONICALIZE2]], [[FCANONICALIZE3]]
+    ; GFX9: [[CONCAT_VECTORS:%[0-9]+]]:_(<4 x s16>) = G_CONCAT_VECTORS [[FMAXNUM_IEEE]](<2 x s16>), [[FMAXNUM_IEEE1]](<2 x s16>)
     ; GFX9: $vgpr0_vgpr1 = COPY [[CONCAT_VECTORS]](<4 x s16>)
     %0:_(<4 x s16>) = COPY $vgpr0_vgpr1
     %1:_(<4 x s16>) = COPY $vgpr2_vgpr3
-    %2:_(<4 x s16>) = G_FMINNUM %0, %1
+    %2:_(<4 x s16>) = G_FMAXNUM %0, %1
     $vgpr0_vgpr1 = COPY %2
 ...

From c73df5696696327a15af2f05b30923cd66361ddc Mon Sep 17 00:00:00 2001
From: Matt Arsenault <Matthew.Arsenault@amd.com>
Date: Sat, 18 Jul 2020 10:35:40 -0400
Subject: [PATCH 715/771] AMDGPU/GlobalISel: Address some test fixmes that
 don't fail now

---
 .../GlobalISel/constant-bus-restriction.ll    | 505 ++++++------------
 .../GlobalISel/llvm.amdgcn.ds.ordered.add.ll  |   3 +-
 .../GlobalISel/llvm.amdgcn.ds.ordered.swap.ll |   3 +-
 .../AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll     |  50 +-
 llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll   | 140 ++++-
 5 files changed, 341 insertions(+), 360 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
index ff0de0d1f6090..c815220ef97d9 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/constant-bus-restriction.ll
@@ -1,294 +1,182 @@
-; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=regbankselect -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
-; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -stop-after=regbankselect -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
+; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX10 %s
 
 ; Make sure we don't violate the constant bus restriction
-; FIXME: Make this test isa output when div.fmas works.
-
 
 define amdgpu_ps float @fmul_s_s(float inreg %src0, float inreg %src1) {
-  ; GFX9-LABEL: name: fmul_s_s
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX9:   [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]]
-  ; GFX9:   $vgpr0 = COPY [[FMUL]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: fmul_s_s
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX10:   [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY2]], [[COPY3]]
-  ; GFX10:   $vgpr0 = COPY [[FMUL]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fmul_s_s:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_mul_f32_e32 v0, s2, v0
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: fmul_s_s:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_mul_f32_e64 v0, s2, s3
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
   %result = fmul float %src0, %src1
   ret float %result
 }
 
 define amdgpu_ps float @fmul_ss(float inreg %src) {
-  ; GFX9-LABEL: name: fmul_ss
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY1]], [[COPY2]]
-  ; GFX9:   $vgpr0 = COPY [[FMUL]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: fmul_ss
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[FMUL:%[0-9]+]]:vgpr(s32) = G_FMUL [[COPY1]], [[COPY2]]
-  ; GFX10:   $vgpr0 = COPY [[FMUL]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fmul_ss:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mul_f32_e64 v0, s2, s2
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: fmul_ss:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_mul_f32_e64 v0, s2, s2
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
   %result = fmul float %src, %src
   ret float %result
 }
 
 ; Ternary operation with 3 different SGPRs
 define amdgpu_ps float @fma_s_s_s(float inreg %src0, float inreg %src1, float inreg %src2) {
-  ; GFX9-LABEL: name: fma_s_s_s
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $sgpr4
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX9:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
-  ; GFX9:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]]
-  ; GFX9:   $vgpr0 = COPY [[FMA]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: fma_s_s_s
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $sgpr4
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr4
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX10:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
-  ; GFX10:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY3]], [[COPY4]], [[COPY5]]
-  ; GFX10:   $vgpr0 = COPY [[FMA]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fma_s_s_s:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_mov_b32_e32 v1, s4
+; GFX9-NEXT:    v_fma_f32 v0, s2, v0, v1
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: fma_s_s_s:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_mov_b32_e32 v0, s4
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_fma_f32 v0, s3, s2, v0
+; GFX10-NEXT:    ; return to shader part epilog
   %result = call float @llvm.fma.f32(float %src0, float %src1, float %src2)
   ret float %result
 }
 
 ; Ternary operation with 3 identical SGPRs
 define amdgpu_ps float @fma_sss(float inreg %src) {
-  ; GFX9-LABEL: name: fma_sss
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY1]], [[COPY2]], [[COPY3]]
-  ; GFX9:   $vgpr0 = COPY [[FMA]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: fma_sss
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY1]], [[COPY2]], [[COPY3]]
-  ; GFX10:   $vgpr0 = COPY [[FMA]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fma_sss:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_fma_f32 v0, s2, s2, s2
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: fma_sss:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_fma_f32 v0, s2, s2, s2
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
   %result = call float @llvm.fma.f32(float %src, float %src, float %src)
   ret float %result
 }
 
 ; src0/1 are same SGPR
 define amdgpu_ps float @fma_ss_s(float inreg %src01, float inreg %src2) {
-  ; GFX9-LABEL: name: fma_ss_s
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX9:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
-  ; GFX9:   $vgpr0 = COPY [[FMA]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: fma_ss_s
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX10:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
-  ; GFX10:   $vgpr0 = COPY [[FMA]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fma_ss_s:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_fma_f32 v0, s2, s2, v0
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: fma_ss_s:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_fma_f32 v0, s2, s2, s3
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
   %result = call float @llvm.fma.f32(float %src01, float %src01, float %src2)
   ret float %result
 }
 
 ; src1/2 are same SGPR
 define amdgpu_ps float @fma_s_ss(float inreg %src0, float inreg %src12) {
-  ; GFX9-LABEL: name: fma_s_ss
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX9:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
-  ; GFX9:   $vgpr0 = COPY [[FMA]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: fma_s_ss
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX10:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
-  ; GFX10:   $vgpr0 = COPY [[FMA]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fma_s_ss:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_fma_f32 v0, s2, v0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: fma_s_ss:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_fma_f32 v0, s2, s3, s3
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
   %result = call float @llvm.fma.f32(float %src0, float %src12, float %src12)
   ret float %result
 }
 
 ; src0/2 are same SGPR
 define amdgpu_ps float @fma_ss_s_same_outer(float inreg %src02, float inreg %src1) {
-  ; GFX9-LABEL: name: fma_ss_s_same_outer
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
-  ; GFX9:   $vgpr0 = COPY [[FMA]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: fma_ss_s_same_outer
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[FMA:%[0-9]+]]:vgpr(s32) = G_FMA [[COPY2]], [[COPY3]], [[COPY4]]
-  ; GFX10:   $vgpr0 = COPY [[FMA]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fma_ss_s_same_outer:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_fma_f32 v0, s2, v0, s2
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: fma_ss_s_same_outer:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_fma_f32 v0, s2, s3, s2
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
   %result = call float @llvm.fma.f32(float %src02, float %src1, float %src02)
   ret float %result
 }
 
 define amdgpu_ps float @fcmp_s_s(float inreg %src0, float inreg %src1) {
-  ; GFX9-LABEL: name: fcmp_s_s
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX9:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]]
-  ; GFX9:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-  ; GFX9:   [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-  ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-  ; GFX9:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]]
-  ; GFX9:   $vgpr0 = COPY [[SELECT]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: fcmp_s_s
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX10:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY2]]
-  ; GFX10:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-  ; GFX10:   [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-  ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-  ; GFX10:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY3]], [[COPY4]]
-  ; GFX10:   $vgpr0 = COPY [[SELECT]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: fcmp_s_s:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_cmp_eq_f32_e32 vcc, s2, v0
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: fcmp_s_s:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cmp_eq_f32_e64 s0, s2, s3
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, s0
+; GFX10-NEXT:    ; return to shader part epilog
   %cmp = fcmp oeq float %src0, %src1
   %result = select i1 %cmp, float 1.0, float 0.0
   ret float %result
 }
 
 define amdgpu_ps float @select_vcc_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) {
-  ; GFX9-LABEL: name: select_vcc_s_s
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
-  ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
-  ; GFX9:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
-  ; GFX9:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
-  ; GFX9:   $vgpr0 = COPY [[SELECT]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: select_vcc_s_s
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
-  ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY2]](s32)
-  ; GFX10:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
-  ; GFX10:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
-  ; GFX10:   $vgpr0 = COPY [[SELECT]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: select_vcc_s_s:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v2, s2
+; GFX9-NEXT:    v_mov_b32_e32 v3, s3
+; GFX9-NEXT:    v_cmp_eq_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e32 v0, v3, v2, vcc
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: select_vcc_s_s:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_mov_b32_e32 v2, s3
+; GFX10-NEXT:    v_cmp_eq_f32_e32 vcc_lo, v0, v1
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, v2, s2, vcc_lo
+; GFX10-NEXT:    ; return to shader part epilog
   %cmp = fcmp oeq float %cmp0, %cmp1
   %result = select i1 %cmp, float %src0, float %src1
   ret float %result
 }
 
 define amdgpu_ps float @select_vcc_fneg_s_s(float %cmp0, float %cmp1, float inreg %src0, float inreg %src1) {
-  ; GFX9-LABEL: name: select_vcc_fneg_s_s
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
-  ; GFX9:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-  ; GFX9:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-  ; GFX9:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
-  ; GFX9:   [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY2]]
-  ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[FNEG]](s32)
-  ; GFX9:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
-  ; GFX9:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
-  ; GFX9:   $vgpr0 = COPY [[SELECT]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: select_vcc_fneg_s_s
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3, $vgpr0, $vgpr1
-  ; GFX10:   [[COPY:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-  ; GFX10:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr1
-  ; GFX10:   [[COPY2:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY3:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY]](s32), [[COPY1]]
-  ; GFX10:   [[FNEG:%[0-9]+]]:sgpr(s32) = G_FNEG [[COPY2]]
-  ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[FNEG]](s32)
-  ; GFX10:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY3]](s32)
-  ; GFX10:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[FCMP]](s1), [[COPY4]], [[COPY5]]
-  ; GFX10:   $vgpr0 = COPY [[SELECT]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: select_vcc_fneg_s_s:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v2, s3
+; GFX9-NEXT:    v_mov_b32_e32 v3, s2
+; GFX9-NEXT:    v_cmp_eq_f32_e32 vcc, v0, v1
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, v2, -v3, vcc
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: select_vcc_fneg_s_s:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_mov_b32_e32 v2, s2
+; GFX10-NEXT:    v_cmp_eq_f32_e32 vcc_lo, v0, v1
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, s3, -v2, vcc_lo
+; GFX10-NEXT:    ; return to shader part epilog
   %cmp = fcmp oeq float %cmp0, %cmp1
   %neg.src0 = fneg float %src0
   %result = select i1 %cmp, float %neg.src0, float %src1
@@ -297,122 +185,73 @@ define amdgpu_ps float @select_vcc_fneg_s_s(float %cmp0, float %cmp1, float inre
 
 ; Constant bus used by vcc
 define amdgpu_ps float @amdgcn_div_fmas_sss(float inreg %src, float %cmp.src) {
-  ; GFX9-LABEL: name: amdgcn_div_fmas_sss
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $vgpr0
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-  ; GFX9:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-  ; GFX9:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY1]](s32), [[COPY2]]
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[FCMP]](s1)
-  ; GFX9:   $vgpr0 = COPY [[INT]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: amdgcn_div_fmas_sss
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $vgpr0
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:vgpr(s32) = COPY $vgpr0
-  ; GFX10:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-  ; GFX10:   [[FCMP:%[0-9]+]]:vcc(s1) = G_FCMP floatpred(oeq), [[COPY1]](s32), [[COPY2]]
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[INT:%[0-9]+]]:vgpr(s32) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.fmas), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[FCMP]](s1)
-  ; GFX10:   $vgpr0 = COPY [[INT]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: amdgcn_div_fmas_sss:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_cmp_eq_f32_e32 vcc, 0, v0
+; GFX9-NEXT:    v_mov_b32_e32 v0, s2
+; GFX9-NEXT:    s_nop 2
+; GFX9-NEXT:    v_div_fmas_f32 v0, v0, v0, v0
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: amdgcn_div_fmas_sss:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cmp_eq_f32_e32 vcc_lo, 0, v0
+; GFX10-NEXT:    v_div_fmas_f32 v0, s2, s2, s2
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
   %vcc = fcmp oeq float %cmp.src, 0.0
   %result = call float @llvm.amdgcn.div.fmas.f32(float %src, float %src, float %src, i1 %vcc)
   ret float %result
 }
 
 define amdgpu_ps float @class_s_s(float inreg %src0, i32 inreg %src1) {
-  ; GFX9-LABEL: name: class_s_s
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX9:   [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32)
-  ; GFX9:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-  ; GFX9:   [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
-  ; GFX9:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-  ; GFX9:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-  ; GFX9:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]]
-  ; GFX9:   $vgpr0 = COPY [[SELECT]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: class_s_s
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX10:   [[INT:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.class), [[COPY2]](s32), [[COPY3]](s32)
-  ; GFX10:   [[C:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 0.000000e+00
-  ; GFX10:   [[C1:%[0-9]+]]:sgpr(s32) = G_FCONSTANT float 1.000000e+00
-  ; GFX10:   [[COPY4:%[0-9]+]]:vgpr(s32) = COPY [[C1]](s32)
-  ; GFX10:   [[COPY5:%[0-9]+]]:vgpr(s32) = COPY [[C]](s32)
-  ; GFX10:   [[SELECT:%[0-9]+]]:vgpr(s32) = G_SELECT [[INT]](s1), [[COPY4]], [[COPY5]]
-  ; GFX10:   $vgpr0 = COPY [[SELECT]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: class_s_s:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_cmp_class_f32_e32 vcc, s2, v0
+; GFX9-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, vcc
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: class_s_s:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_cmp_class_f32_e64 s0, s2, s3
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    v_cndmask_b32_e64 v0, 0, 1.0, s0
+; GFX10-NEXT:    ; return to shader part epilog
   %class = call i1 @llvm.amdgcn.class.f32(float %src0, i32 %src1)
   %result = select i1 %class, float 1.0, float 0.0
   ret float %result
 }
 
 define amdgpu_ps float @div_scale_s_s_true(float inreg %src0, float inreg %src1) {
-  ; GFX9-LABEL: name: div_scale_s_s_true
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX9:   [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), -1
-  ; GFX9:   $vgpr0 = COPY [[INT]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: div_scale_s_s_true
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX10:   [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), -1
-  ; GFX10:   $vgpr0 = COPY [[INT]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: div_scale_s_s_true:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_div_scale_f32 v0, s[0:1], s2, v0, s2
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: div_scale_s_s_true:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_div_scale_f32 v0, s0, s2, s3, s2
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
   %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 true)
   %result = extractvalue { float, i1 } %div.scale, 0
   ret float %result
 }
 
 define amdgpu_ps float @div_scale_s_s_false(float inreg %src0, float inreg %src1) {
-  ; GFX9-LABEL: name: div_scale_s_s_false
-  ; GFX9: bb.1 (%ir-block.0):
-  ; GFX9:   liveins: $sgpr2, $sgpr3
-  ; GFX9:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX9:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX9:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX9:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX9:   [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0
-  ; GFX9:   $vgpr0 = COPY [[INT]](s32)
-  ; GFX9:   SI_RETURN_TO_EPILOG implicit $vgpr0
-  ; GFX10-LABEL: name: div_scale_s_s_false
-  ; GFX10: bb.1 (%ir-block.0):
-  ; GFX10:   liveins: $sgpr2, $sgpr3
-  ; GFX10:   [[COPY:%[0-9]+]]:sgpr(s32) = COPY $sgpr2
-  ; GFX10:   [[COPY1:%[0-9]+]]:sgpr(s32) = COPY $sgpr3
-  ; GFX10:   [[COPY2:%[0-9]+]]:vgpr(s32) = COPY [[COPY]](s32)
-  ; GFX10:   [[COPY3:%[0-9]+]]:vgpr(s32) = COPY [[COPY1]](s32)
-  ; GFX10:   [[INT:%[0-9]+]]:vgpr(s32), [[INT1:%[0-9]+]]:vcc(s1) = G_INTRINSIC intrinsic(@llvm.amdgcn.div.scale), [[COPY2]](s32), [[COPY3]](s32), 0
-  ; GFX10:   $vgpr0 = COPY [[INT]](s32)
-  ; GFX10:   SI_RETURN_TO_EPILOG implicit $vgpr0
+; GFX9-LABEL: div_scale_s_s_false:
+; GFX9:       ; %bb.0:
+; GFX9-NEXT:    v_mov_b32_e32 v0, s3
+; GFX9-NEXT:    v_div_scale_f32 v0, s[0:1], v0, v0, s2
+; GFX9-NEXT:    ; return to shader part epilog
+;
+; GFX10-LABEL: div_scale_s_s_false:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    v_div_scale_f32 v0, s0, s3, s3, s2
+; GFX10-NEXT:    ; implicit-def: $vcc_hi
+; GFX10-NEXT:    ; return to shader part epilog
   %div.scale = call { float, i1 } @llvm.amdgcn.div.scale.f32(float %src0, float %src1, i1 false)
   %result = extractvalue { float, i1 } %div.scale, 0
   ret float %result
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll
index 8cba08f016daf..4193d976afd65 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.add.ll
@@ -1,5 +1,4 @@
-; FIXME: Broken SI run line
-; XUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
+; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
 ; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
 ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.add.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.add.ll
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll
index 28c2c7a4e9bfb..e2c3b625395a7 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.ordered.swap.ll
@@ -1,5 +1,4 @@
-; FIXME: Broken SI run line
-; XUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
+; RUN: llc -global-isel -march=amdgcn -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
 ; RUN: llc -global-isel -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
 ; RUN: llc -global-isel -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %S/../llvm.amdgcn.ds.ordered.swap.ll | FileCheck -check-prefixes=GCN,VIGFX9,FUNC %S/../llvm.amdgcn.ds.ordered.swap.ll
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
index 5389adf5a526e..7d116f8e8925f 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.sbfe.ll
@@ -687,14 +687,48 @@ define amdgpu_kernel void @bfe_sext_in_reg_i24(i32 addrspace(1)* %out, i32 addrs
   ret void
 }
 
-; FIXME
-; define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
-;   %src = load i32, i32 addrspace(1)* %in, align 4
-;   %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
-;   %div = sdiv i32 %bfe, 2
-;   store i32 %div, i32 addrspace(1)* %out, align 4
-;   ret void
-; }
+define amdgpu_kernel void @simplify_demanded_bfe_sdiv(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+; GFX6-LABEL: simplify_demanded_bfe_sdiv:
+; GFX6:       ; %bb.0:
+; GFX6-NEXT:    v_cvt_f32_ubyte0_e32 v0, 2
+; GFX6-NEXT:    v_rcp_iflag_f32_e32 v0, v0
+; GFX6-NEXT:    s_load_dwordx2 s[2:3], s[0:1], 0xb
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT:    s_mov_b32 s6, -1
+; GFX6-NEXT:    v_mul_f32_e32 v0, 0x4f7ffffe, v0
+; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
+; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    s_load_dword s2, s[2:3], 0x0
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    v_mul_lo_u32 v1, -2, v0
+; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    s_bfe_i32 s2, s2, 0x100001
+; GFX6-NEXT:    s_ashr_i32 s3, s2, 31
+; GFX6-NEXT:    v_mul_hi_u32 v1, v0, v1
+; GFX6-NEXT:    s_add_i32 s2, s2, s3
+; GFX6-NEXT:    s_xor_b32 s2, s2, s3
+; GFX6-NEXT:    v_add_i32_e32 v0, vcc, v0, v1
+; GFX6-NEXT:    v_mul_hi_u32 v0, s2, v0
+; GFX6-NEXT:    v_lshlrev_b32_e32 v1, 1, v0
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
+; GFX6-NEXT:    v_sub_i32_e32 v1, vcc, s2, v1
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 2, v1
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT:    v_subrev_i32_e64 v2, s[0:1], 2, v1
+; GFX6-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GFX6-NEXT:    v_add_i32_e32 v2, vcc, 1, v0
+; GFX6-NEXT:    v_cmp_le_u32_e32 vcc, 2, v1
+; GFX6-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
+; GFX6-NEXT:    v_xor_b32_e32 v0, s3, v0
+; GFX6-NEXT:    v_subrev_i32_e32 v0, vcc, s3, v0
+; GFX6-NEXT:    buffer_store_dword v0, off, s[4:7], 0
+; GFX6-NEXT:    s_endpgm
+  %src = load i32, i32 addrspace(1)* %in, align 4
+  %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 1, i32 16)
+  %div = sdiv i32 %bfe, 2
+  store i32 %div, i32 addrspace(1)* %out, align 4
+  ret void
+}
 
 define amdgpu_kernel void @bfe_0_width(i32 addrspace(1)* %out, i32 addrspace(1)* %ptr) #0 {
 ; GFX6-LABEL: bfe_0_width:
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
index a8631a18de3cd..2512aaaeb082c 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/xnor.ll
@@ -15,14 +15,58 @@ entry:
   ret i32 %r0.val
 }
 
-; FIXME:
-; define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) {
-; entry:
-;   %xor = xor <2 x i16> %a, %b
-;   %r0.val = xor <2 x i16> %xor, <i16 -1, i16 -1>
-;   %cast = bitcast <2 x i16> %r0.val to i32
-;   ret i32 %cast
-; }
+; FIXME: fails to match
+define amdgpu_ps i32 @scalar_xnor_v2i16_one_use(<2 x i16> inreg %a, <2 x i16> inreg %b) {
+; GFX7-LABEL: scalar_xnor_v2i16_one_use:
+; GFX7:       ; %bb.0: ; %entry
+; GFX7-NEXT:    s_mov_b32 s4, 0xffff
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_and_b32 s0, s0, s4
+; GFX7-NEXT:    s_or_b32 s0, s1, s0
+; GFX7-NEXT:    s_lshl_b32 s1, s3, 16
+; GFX7-NEXT:    s_and_b32 s2, s2, s4
+; GFX7-NEXT:    s_or_b32 s1, s1, s2
+; GFX7-NEXT:    s_xor_b32 s0, s0, s1
+; GFX7-NEXT:    s_xor_b32 s0, s0, -1
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: scalar_xnor_v2i16_one_use:
+; GFX8:       ; %bb.0: ; %entry
+; GFX8-NEXT:    s_mov_b32 s2, 0xffff
+; GFX8-NEXT:    s_lshr_b32 s5, s0, 16
+; GFX8-NEXT:    s_lshr_b32 s6, s1, 16
+; GFX8-NEXT:    s_and_b32 s4, s0, s2
+; GFX8-NEXT:    s_and_b32 s0, s1, s2
+; GFX8-NEXT:    s_and_b32 s5, s5, s2
+; GFX8-NEXT:    s_and_b32 s1, s6, s2
+; GFX8-NEXT:    s_mov_b32 s3, s2
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[4:5], s[0:1]
+; GFX8-NEXT:    s_and_b64 s[0:1], s[0:1], s[2:3]
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX8-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX8-NEXT:    s_and_b32 s0, s0, s2
+; GFX8-NEXT:    s_or_b32 s0, s1, s0
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX900-LABEL: scalar_xnor_v2i16_one_use:
+; GFX900:       ; %bb.0: ; %entry
+; GFX900-NEXT:    s_pack_ll_b32_b16 s2, -1, -1
+; GFX900-NEXT:    s_xor_b32 s0, s0, s1
+; GFX900-NEXT:    s_xor_b32 s0, s0, s2
+; GFX900-NEXT:    ; return to shader part epilog
+;
+; GFX906-LABEL: scalar_xnor_v2i16_one_use:
+; GFX906:       ; %bb.0: ; %entry
+; GFX906-NEXT:    s_pack_ll_b32_b16 s2, -1, -1
+; GFX906-NEXT:    s_xor_b32 s0, s0, s1
+; GFX906-NEXT:    s_xor_b32 s0, s0, s2
+; GFX906-NEXT:    ; return to shader part epilog
+entry:
+  %xor = xor <2 x i16> %a, %b
+  %r0.val = xor <2 x i16> %xor, <i16 -1, i16 -1>
+  %cast = bitcast <2 x i16> %r0.val to i32
+  ret i32 %cast
+}
 
 define amdgpu_ps <2 x i32> @scalar_xnor_i32_mul_use(i32 inreg %a, i32 inreg %b) {
 ; GCN-LABEL: scalar_xnor_i32_mul_use:
@@ -51,13 +95,79 @@ define amdgpu_ps i64 @scalar_xnor_i64_one_use(i64 inreg %a, i64 inreg %b) {
   ret i64 %r0.val
 }
 
-; FIXME:
-; define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) {
-;   %xor = xor <4 x i16> %a, %b
-;   %ret = xor <4 x i16> %xor, <i16 -1, i16 -1, i16 -1, i16 -1>
-;   %cast = bitcast <4 x i16> %ret to i64
-;   ret i64 %cast
-; }
+; FIXME: fails to match
+define amdgpu_ps i64 @scalar_xnor_v4i16_one_use(<4 x i16> inreg %a, <4 x i16> inreg %b) {
+; GFX7-LABEL: scalar_xnor_v4i16_one_use:
+; GFX7:       ; %bb.0:
+; GFX7-NEXT:    s_mov_b32 s8, 0xffff
+; GFX7-NEXT:    s_lshl_b32 s1, s1, 16
+; GFX7-NEXT:    s_and_b32 s0, s0, s8
+; GFX7-NEXT:    s_or_b32 s0, s1, s0
+; GFX7-NEXT:    s_lshl_b32 s1, s3, 16
+; GFX7-NEXT:    s_and_b32 s2, s2, s8
+; GFX7-NEXT:    s_or_b32 s1, s1, s2
+; GFX7-NEXT:    s_and_b32 s3, s4, s8
+; GFX7-NEXT:    s_lshl_b32 s2, s5, 16
+; GFX7-NEXT:    s_or_b32 s2, s2, s3
+; GFX7-NEXT:    s_lshl_b32 s3, s7, 16
+; GFX7-NEXT:    s_and_b32 s4, s6, s8
+; GFX7-NEXT:    s_or_b32 s3, s3, s4
+; GFX7-NEXT:    s_mov_b32 s4, -1
+; GFX7-NEXT:    s_mov_b32 s5, s4
+; GFX7-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX7-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
+; GFX7-NEXT:    ; return to shader part epilog
+;
+; GFX8-LABEL: scalar_xnor_v4i16_one_use:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_mov_b32 s4, 0xffff
+; GFX8-NEXT:    s_lshr_b32 s5, s0, 16
+; GFX8-NEXT:    s_and_b32 s7, s5, s4
+; GFX8-NEXT:    s_lshr_b32 s5, s1, 16
+; GFX8-NEXT:    s_and_b32 s6, s0, s4
+; GFX8-NEXT:    s_and_b32 s0, s1, s4
+; GFX8-NEXT:    s_and_b32 s1, s5, s4
+; GFX8-NEXT:    s_lshr_b32 s5, s2, 16
+; GFX8-NEXT:    s_and_b32 s8, s2, s4
+; GFX8-NEXT:    s_and_b32 s9, s5, s4
+; GFX8-NEXT:    s_lshr_b32 s5, s3, 16
+; GFX8-NEXT:    s_and_b32 s2, s3, s4
+; GFX8-NEXT:    s_and_b32 s3, s5, s4
+; GFX8-NEXT:    s_xor_b64 s[6:7], s[6:7], s[8:9]
+; GFX8-NEXT:    s_mov_b32 s5, s4
+; GFX8-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX8-NEXT:    s_and_b64 s[2:3], s[6:7], s[4:5]
+; GFX8-NEXT:    s_and_b64 s[0:1], s[0:1], s[4:5]
+; GFX8-NEXT:    s_xor_b64 s[2:3], s[2:3], s[4:5]
+; GFX8-NEXT:    s_xor_b64 s[6:7], s[0:1], s[4:5]
+; GFX8-NEXT:    s_and_b32 s1, s2, s4
+; GFX8-NEXT:    s_lshl_b32 s0, s3, 16
+; GFX8-NEXT:    s_or_b32 s0, s0, s1
+; GFX8-NEXT:    s_lshl_b32 s1, s7, 16
+; GFX8-NEXT:    s_and_b32 s2, s6, s4
+; GFX8-NEXT:    s_or_b32 s1, s1, s2
+; GFX8-NEXT:    ; return to shader part epilog
+;
+; GFX900-LABEL: scalar_xnor_v4i16_one_use:
+; GFX900:       ; %bb.0:
+; GFX900-NEXT:    s_pack_ll_b32_b16 s4, -1, -1
+; GFX900-NEXT:    s_mov_b32 s5, s4
+; GFX900-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX900-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
+; GFX900-NEXT:    ; return to shader part epilog
+;
+; GFX906-LABEL: scalar_xnor_v4i16_one_use:
+; GFX906:       ; %bb.0:
+; GFX906-NEXT:    s_pack_ll_b32_b16 s4, -1, -1
+; GFX906-NEXT:    s_mov_b32 s5, s4
+; GFX906-NEXT:    s_xor_b64 s[0:1], s[0:1], s[2:3]
+; GFX906-NEXT:    s_xor_b64 s[0:1], s[0:1], s[4:5]
+; GFX906-NEXT:    ; return to shader part epilog
+  %xor = xor <4 x i16> %a, %b
+  %ret = xor <4 x i16> %xor, <i16 -1, i16 -1, i16 -1, i16 -1>
+  %cast = bitcast <4 x i16> %ret to i64
+  ret i64 %cast
+}
 
 define amdgpu_ps <2 x i64> @scalar_xnor_i64_mul_use(i64 inreg %a, i64 inreg %b) {
 ; GCN-LABEL: scalar_xnor_i64_mul_use:

From 7b16fd8a2514287765cdcdb09b9059d5d9a2933a Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 18 Jul 2020 17:30:42 +0300
Subject: [PATCH 716/771] [NFC][CVP] Add tests for possible sdiv->udiv where
 operands are not non-negative

Currently that fold requires both operands to be non-negative,
but the only real requirement for the fold is that we must know
the domains of the operands.
---
 .../CorrelatedValuePropagation/sdiv.ll        | 173 +++++++++++++++++-
 1 file changed, 163 insertions(+), 10 deletions(-)

diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll
index b037bfaee7a21..ec5de0010a14f 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll
@@ -1,7 +1,22 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -correlated-propagation -S | FileCheck %s
 
-; CHECK-LABEL: @test0(
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+
 define void @test0(i32 %n) {
+; CHECK-LABEL: @test0(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[J_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[DIV1:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[J_0]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[DIV1]] = udiv i32 [[J_0]], 2
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %for.cond
 
@@ -11,7 +26,6 @@ for.cond:                                         ; preds = %for.body, %entry
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-; CHECK: %div1 = udiv i32 %j.0, 2
   %div = sdiv i32 %j.0, 2
   br label %for.cond
 
@@ -19,8 +33,20 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
-; CHECK-LABEL: @test1(
 define void @test1(i32 %n) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[J_0:%.*]] = phi i32 [ [[N:%.*]], [[ENTRY:%.*]] ], [ [[DIV:%.*]], [[FOR_BODY:%.*]] ]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[J_0]], -2
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[DIV]] = sdiv i32 [[J_0]], 2
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
 entry:
   br label %for.cond
 
@@ -30,7 +56,6 @@ for.cond:                                         ; preds = %for.body, %entry
   br i1 %cmp, label %for.body, label %for.end
 
 for.body:                                         ; preds = %for.cond
-; CHECK: %div = sdiv i32 %j.0, 2
   %div = sdiv i32 %j.0, 2
   br label %for.cond
 
@@ -38,14 +63,22 @@ for.end:                                          ; preds = %for.cond
   ret void
 }
 
-; CHECK-LABEL: @test2(
 define void @test2(i32 %n) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 1
+; CHECK-NEXT:    br i1 [[CMP]], label [[BB:%.*]], label [[EXIT:%.*]]
+; CHECK:       bb:
+; CHECK-NEXT:    [[DIV1:%.*]] = udiv i32 [[N]], 2
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp = icmp sgt i32 %n, 1
   br i1 %cmp, label %bb, label %exit
 
 bb:
-; CHECK: %div1 = udiv i32 %n, 2 
   %div = sdiv i32 %n, 2
   br label %exit
 
@@ -57,14 +90,25 @@ exit:
 ; at the point of sdiv, we know that %a is always greater than 0,
 ; because of the guard before it, so we can transform it to udiv.
 declare void @llvm.experimental.guard(i1,...)
-; CHECK-LABEL: @test4
 define void @test4(i32 %n) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[DIV1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[COND:%.*]] = icmp sgt i32 [[A]], 4
+; CHECK-NEXT:    call void (i1, ...) @llvm.experimental.guard(i1 [[COND]]) [ "deopt"() ]
+; CHECK-NEXT:    [[DIV1]] = udiv i32 [[A]], 6
+; CHECK-NEXT:    br i1 [[COND]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp = icmp sgt i32 %n, 0
   br i1 %cmp, label %loop, label %exit
 
 loop:
-; CHECK: udiv i32 %a, 6
   %a = phi i32 [ %n, %entry ], [ %div, %loop ]
   %cond = icmp sgt i32 %a, 4
   call void(i1,...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
@@ -77,14 +121,26 @@ exit:
 
 ; same test as above with assume instead of guard.
 declare void @llvm.assume(i1)
-; CHECK-LABEL: @test5
 define void @test5(i32 %n) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[N:%.*]], 0
+; CHECK-NEXT:    br i1 [[CMP]], label [[LOOP:%.*]], label [[EXIT:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[A:%.*]] = phi i32 [ [[N]], [[ENTRY:%.*]] ], [ [[DIV1:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[COND:%.*]] = icmp sgt i32 [[A]], 4
+; CHECK-NEXT:    call void @llvm.assume(i1 [[COND]])
+; CHECK-NEXT:    [[DIV1]] = udiv i32 [[A]], 6
+; CHECK-NEXT:    [[LOOPCOND:%.*]] = icmp sgt i32 [[DIV1]], 8
+; CHECK-NEXT:    br i1 [[LOOPCOND]], label [[LOOP]], label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp = icmp sgt i32 %n, 0
   br i1 %cmp, label %loop, label %exit
 
 loop:
-; CHECK: udiv i32 %a, 6
   %a = phi i32 [ %n, %entry ], [ %div, %loop ]
   %cond = icmp sgt i32 %a, 4
   call void @llvm.assume(i1 %cond)
@@ -95,3 +151,100 @@ loop:
 exit:
   ret void
 }
+
+; Now, let's try various domain combinations for operands.
+
+define i32 @test6_pos_pos(i32 %x, i32 %y) {
+; CHECK-LABEL: @test6_pos_pos(
+; CHECK-NEXT:    [[C0:%.*]] = icmp sge i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
+; CHECK-NEXT:    [[C1:%.*]] = icmp sge i32 [[Y:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
+; CHECK-NEXT:    [[DIV1:%.*]] = udiv i32 [[X]], [[Y]]
+; CHECK-NEXT:    ret i32 [[DIV1]]
+;
+  %c0 = icmp sge i32 %x, 0
+  call void @llvm.assume(i1 %c0)
+  %c1 = icmp sge i32 %y, 0
+  call void @llvm.assume(i1 %c1)
+
+  %div = sdiv i32 %x, %y
+  ret i32 %div
+}
+define i32 @test7_pos_neg(i32 %x, i32 %y) {
+; CHECK-LABEL: @test7_pos_neg(
+; CHECK-NEXT:    [[C0:%.*]] = icmp sge i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
+; CHECK-NEXT:    [[C1:%.*]] = icmp sle i32 [[Y:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[X]], [[Y]]
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %c0 = icmp sge i32 %x, 0
+  call void @llvm.assume(i1 %c0)
+  %c1 = icmp sle i32 %y, 0
+  call void @llvm.assume(i1 %c1)
+
+  %div = sdiv i32 %x, %y
+  ret i32 %div
+}
+define i32 @test8_neg_pos(i32 %x, i32 %y) {
+; CHECK-LABEL: @test8_neg_pos(
+; CHECK-NEXT:    [[C0:%.*]] = icmp sle i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
+; CHECK-NEXT:    [[C1:%.*]] = icmp sge i32 [[Y:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[X]], [[Y]]
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %c0 = icmp sle i32 %x, 0
+  call void @llvm.assume(i1 %c0)
+  %c1 = icmp sge i32 %y, 0
+  call void @llvm.assume(i1 %c1)
+
+  %div = sdiv i32 %x, %y
+  ret i32 %div
+}
+define i32 @test9_neg_neg(i32 %x, i32 %y) {
+; CHECK-LABEL: @test9_neg_neg(
+; CHECK-NEXT:    [[C0:%.*]] = icmp sle i32 [[X:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
+; CHECK-NEXT:    [[C1:%.*]] = icmp sle i32 [[Y:%.*]], 0
+; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
+; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[X]], [[Y]]
+; CHECK-NEXT:    ret i32 [[DIV]]
+;
+  %c0 = icmp sle i32 %x, 0
+  call void @llvm.assume(i1 %c0)
+  %c1 = icmp sle i32 %y, 0
+  call void @llvm.assume(i1 %c1)
+
+  %div = sdiv i32 %x, %y
+  ret i32 %div
+}
+
+; After making division unsigned, can we narrow it?
+define i32 @test10_narrow(i32 %x, i32 %y) {
+; CHECK-LABEL: @test10_narrow(
+; CHECK-NEXT:    [[C0:%.*]] = icmp ult i32 [[X:%.*]], 128
+; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
+; CHECK-NEXT:    [[C1:%.*]] = icmp ult i32 [[Y:%.*]], 128
+; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
+; CHECK-NEXT:    br label [[END:%.*]]
+; CHECK:       end:
+; CHECK-NEXT:    [[DIV1_LHS_TRUNC:%.*]] = trunc i32 [[X]] to i8
+; CHECK-NEXT:    [[DIV1_RHS_TRUNC:%.*]] = trunc i32 [[Y]] to i8
+; CHECK-NEXT:    [[DIV12:%.*]] = udiv i8 [[DIV1_LHS_TRUNC]], [[DIV1_RHS_TRUNC]]
+; CHECK-NEXT:    [[DIV1_ZEXT:%.*]] = zext i8 [[DIV12]] to i32
+; CHECK-NEXT:    ret i32 [[DIV1_ZEXT]]
+;
+  %c0 = icmp ult i32 %x, 128
+  call void @llvm.assume(i1 %c0)
+  %c1 = icmp ult i32 %y, 128
+  call void @llvm.assume(i1 %c1)
+  br label %end
+
+end:
+  %div = sdiv i32 %x, %y
+  ret i32 %div
+}

From 2cde6984d8fbaf9d3ce9e09ce632f7de553df5bb Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 18 Jul 2020 13:07:13 +0300
Subject: [PATCH 717/771] [NFC][CVP] Refactor isPositive() out of
 hasPositiveOperands()

---
 .../Scalar/CorrelatedValuePropagation.cpp     | 23 ++++++++-----------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index cd2f4ca36f3bb..6a18bc2d32409 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -601,14 +601,15 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
   return true;
 }
 
+static bool isPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+  Constant *Zero = ConstantInt::get(V->getType(), 0);
+  auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, V, Zero, CxtI);
+  return Result == LazyValueInfo::True;
+}
+
 static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) {
-  Constant *Zero = ConstantInt::get(SDI->getType(), 0);
-  for (Value *O : SDI->operands()) {
-    auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, O, Zero, SDI);
-    if (Result != LazyValueInfo::True)
-      return false;
-  }
-  return true;
+  return all_of(SDI->operands(),
+                [&](Value *Op) { return isPositive(Op, LVI, SDI); });
 }
 
 /// Try to shrink a udiv/urem's width down to the smallest power of two that's
@@ -697,9 +698,7 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
   if (SDI->getType()->isVectorTy())
     return false;
 
-  Constant *Zero = ConstantInt::get(SDI->getType(), 0);
-  if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, SDI->getOperand(0), Zero, SDI) !=
-      LazyValueInfo::True)
+  if (!isPositive(SDI->getOperand(0), LVI, SDI))
     return false;
 
   ++NumAShrs;
@@ -719,9 +718,7 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
 
   Value *Base = SDI->getOperand(0);
 
-  Constant *Zero = ConstantInt::get(Base->getType(), 0);
-  if (LVI->getPredicateAt(ICmpInst::ICMP_SGE, Base, Zero, SDI) !=
-      LazyValueInfo::True)
+  if (!isPositive(Base, LVI, SDI))
     return false;
 
   ++NumSExt;

From 45b738882474e615ccf15e289b765bca0ccfc1d2 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 18 Jul 2020 13:13:01 +0300
Subject: [PATCH 718/771] [NFC][CVP] Rename predicates - s/positive/non
 negative/ to better note that zero is ok

---
 .../Scalar/CorrelatedValuePropagation.cpp          | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 6a18bc2d32409..284312eaf8220 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -601,15 +601,15 @@ static bool processCallSite(CallBase &CB, LazyValueInfo *LVI) {
   return true;
 }
 
-static bool isPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
   Constant *Zero = ConstantInt::get(V->getType(), 0);
   auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SGE, V, Zero, CxtI);
   return Result == LazyValueInfo::True;
 }
 
-static bool hasPositiveOperands(BinaryOperator *SDI, LazyValueInfo *LVI) {
+static bool allOperandsAreNonNegative(BinaryOperator *SDI, LazyValueInfo *LVI) {
   return all_of(SDI->operands(),
-                [&](Value *Op) { return isPositive(Op, LVI, SDI); });
+                [&](Value *Op) { return isNonNegative(Op, LVI, SDI); });
 }
 
 /// Try to shrink a udiv/urem's width down to the smallest power of two that's
@@ -655,7 +655,7 @@ static bool processUDivOrURem(BinaryOperator *Instr, LazyValueInfo *LVI) {
 }
 
 static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
-  if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI))
+  if (SDI->getType()->isVectorTy() || !allOperandsAreNonNegative(SDI, LVI))
     return false;
 
   ++NumSRems;
@@ -677,7 +677,7 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
 /// conditions, this can sometimes prove conditions instcombine can't by
 /// exploiting range information.
 static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
-  if (SDI->getType()->isVectorTy() || !hasPositiveOperands(SDI, LVI))
+  if (SDI->getType()->isVectorTy() || !allOperandsAreNonNegative(SDI, LVI))
     return false;
 
   ++NumSDivs;
@@ -698,7 +698,7 @@ static bool processAShr(BinaryOperator *SDI, LazyValueInfo *LVI) {
   if (SDI->getType()->isVectorTy())
     return false;
 
-  if (!isPositive(SDI->getOperand(0), LVI, SDI))
+  if (!isNonNegative(SDI->getOperand(0), LVI, SDI))
     return false;
 
   ++NumAShrs;
@@ -718,7 +718,7 @@ static bool processSExt(SExtInst *SDI, LazyValueInfo *LVI) {
 
   Value *Base = SDI->getOperand(0);
 
-  if (!isPositive(Base, LVI, SDI))
+  if (!isNonNegative(Base, LVI, SDI))
     return false;
 
   ++NumSExt;

From 8d487668d09fb0e4e54f36207f07c1480ffabbfd Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 18 Jul 2020 13:23:26 +0300
Subject: [PATCH 719/771] [CVP] Soften SDiv into a UDiv as long as we know
 domains of both of the operands.

Yes, if operands are non-positive this comes at the extra cost
of two extra negations. But  a. division is already just
ridiculously costly, two more subtractions can't hurt much :)
and  b. we have better/more analyzes/folds for an unsigned division,
we could end up narrowing it's bitwidth, converting it to lshr, etc.

This is essentially a take two on 0fdcca07ad2c0bdc2cdd40ba638109926f4f513b,
which didn't fix the potential regression i was seeing,
because ValueTracking's computeKnownBits() doesn't make use
of dominating conditions in it's analysis.
While i could teach it that, this seems like the more general fix.

This big hammer actually does catch said potential regression.

Over vanilla test-suite + RawSpeed + darktable
(10M IR instrs, 1M IR BB, 1M X86 ASM instrs), this fires/converts 5 more
(+2%) SDiv's, the total instruction count at the end of middle-end pipeline
is only +6, so out of +10 extra negations, ~half are folded away,
and asm instr count is only +1, so practically speaking all extra
negations are folded away and are therefore free.
Sadly, all these new UDiv's remained, none folded away.
But there are two less basic blocks.

https://rise4fun.com/Alive/VS6

Name: v0
Pre: C0 >= 0 && C1 >= 0
%r = sdiv i8 C0, C1
  =>
%r = udiv i8 C0, C1

Name: v1
Pre: C0 <= 0 && C1 >= 0
%r = sdiv i8 C0, C1
  =>
%t0 = udiv i8 -C0, C1
%r = sub i8 0, %t0

Name: v2
Pre: C0 >= 0 && C1 <= 0
%r = sdiv i8 C0, C1
  =>
%t0 = udiv i8 C0, -C1
%r = sub i8 0, %t0

Name: v3
Pre: C0 <= 0 && C1 <= 0
%r = sdiv i8 C0, C1
  =>
%r = udiv i8 -C0, -C1
---
 .../Scalar/CorrelatedValuePropagation.cpp     | 65 ++++++++++++++++---
 .../CorrelatedValuePropagation/sdiv.ll        | 18 +++--
 2 files changed, 68 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 284312eaf8220..fb7a005708e56 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -607,6 +607,12 @@ static bool isNonNegative(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
   return Result == LazyValueInfo::True;
 }
 
+static bool isNonPositive(Value *V, LazyValueInfo *LVI, Instruction *CxtI) {
+  Constant *Zero = ConstantInt::get(V->getType(), 0);
+  auto Result = LVI->getPredicateAt(ICmpInst::ICMP_SLE, V, Zero, CxtI);
+  return Result == LazyValueInfo::True;
+}
+
 static bool allOperandsAreNonNegative(BinaryOperator *SDI, LazyValueInfo *LVI) {
   return all_of(SDI->operands(),
                 [&](Value *Op) { return isNonNegative(Op, LVI, SDI); });
@@ -672,24 +678,65 @@ static bool processSRem(BinaryOperator *SDI, LazyValueInfo *LVI) {
 }
 
 /// See if LazyValueInfo's ability to exploit edge conditions or range
-/// information is sufficient to prove the both operands of this SDiv are
-/// positive.  If this is the case, replace the SDiv with a UDiv. Even for local
+/// information is sufficient to prove the signs of both operands of this SDiv.
+/// If this is the case, replace the SDiv with a UDiv. Even for local
 /// conditions, this can sometimes prove conditions instcombine can't by
 /// exploiting range information.
 static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
-  if (SDI->getType()->isVectorTy() || !allOperandsAreNonNegative(SDI, LVI))
+  if (SDI->getType()->isVectorTy())
     return false;
 
+  enum class Domain { NonNegative, NonPositive, Unknown };
+  auto getDomain = [&](Value *V) {
+    if (isNonNegative(V, LVI, SDI))
+      return Domain::NonNegative;
+    if (isNonPositive(V, LVI, SDI))
+      return Domain::NonPositive;
+    return Domain::Unknown;
+  };
+
+  struct Operand {
+    Value *V;
+    Domain Domain;
+  };
+  std::array<Operand, 2> Ops;
+  for (const auto &I : zip(Ops, SDI->operands())) {
+    Operand &Op = std::get<0>(I);
+    Op.V = std::get<1>(I);
+    Op.Domain = getDomain(Op.V);
+    if (Op.Domain == Domain::Unknown)
+      return false;
+  }
+
+  // We know domains of both of the operands!
   ++NumSDivs;
-  auto *BO = BinaryOperator::CreateUDiv(SDI->getOperand(0), SDI->getOperand(1),
-                                        SDI->getName(), SDI);
-  BO->setDebugLoc(SDI->getDebugLoc());
-  BO->setIsExact(SDI->isExact());
-  SDI->replaceAllUsesWith(BO);
+
+  // We need operands to be non-negative, so negate each one that isn't.
+  for (Operand &Op : Ops) {
+    if (Op.Domain == Domain::NonNegative)
+      continue;
+    auto *BO =
+        BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
+    BO->setDebugLoc(SDI->getDebugLoc());
+    Op.V = BO;
+  }
+
+  auto *UDiv =
+      BinaryOperator::CreateUDiv(Ops[0].V, Ops[1].V, SDI->getName(), SDI);
+  UDiv->setDebugLoc(SDI->getDebugLoc());
+  UDiv->setIsExact(SDI->isExact());
+
+  Value *Res = UDiv;
+
+  // If the operands had two different domains, we need to negate the result.
+  if (Ops[0].Domain != Ops[1].Domain)
+    Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
+
+  SDI->replaceAllUsesWith(Res);
   SDI->eraseFromParent();
 
   // Try to simplify our new udiv.
-  processUDivOrURem(BO, LVI);
+  processUDivOrURem(UDiv, LVI);
 
   return true;
 }
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll
index ec5de0010a14f..8dfa09d477925 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/sdiv.ll
@@ -177,8 +177,10 @@ define i32 @test7_pos_neg(i32 %x, i32 %y) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
 ; CHECK-NEXT:    [[C1:%.*]] = icmp sle i32 [[Y:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
-; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[X]], [[Y]]
-; CHECK-NEXT:    ret i32 [[DIV]]
+; CHECK-NEXT:    [[Y_NONNEG:%.*]] = sub i32 0, [[Y]]
+; CHECK-NEXT:    [[DIV1:%.*]] = udiv i32 [[X]], [[Y_NONNEG]]
+; CHECK-NEXT:    [[DIV1_NEG:%.*]] = sub i32 0, [[DIV1]]
+; CHECK-NEXT:    ret i32 [[DIV1_NEG]]
 ;
   %c0 = icmp sge i32 %x, 0
   call void @llvm.assume(i1 %c0)
@@ -194,8 +196,10 @@ define i32 @test8_neg_pos(i32 %x, i32 %y) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
 ; CHECK-NEXT:    [[C1:%.*]] = icmp sge i32 [[Y:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
-; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[X]], [[Y]]
-; CHECK-NEXT:    ret i32 [[DIV]]
+; CHECK-NEXT:    [[X_NONNEG:%.*]] = sub i32 0, [[X]]
+; CHECK-NEXT:    [[DIV1:%.*]] = udiv i32 [[X_NONNEG]], [[Y]]
+; CHECK-NEXT:    [[DIV1_NEG:%.*]] = sub i32 0, [[DIV1]]
+; CHECK-NEXT:    ret i32 [[DIV1_NEG]]
 ;
   %c0 = icmp sle i32 %x, 0
   call void @llvm.assume(i1 %c0)
@@ -211,8 +215,10 @@ define i32 @test9_neg_neg(i32 %x, i32 %y) {
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C0]])
 ; CHECK-NEXT:    [[C1:%.*]] = icmp sle i32 [[Y:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[C1]])
-; CHECK-NEXT:    [[DIV:%.*]] = sdiv i32 [[X]], [[Y]]
-; CHECK-NEXT:    ret i32 [[DIV]]
+; CHECK-NEXT:    [[X_NONNEG:%.*]] = sub i32 0, [[X]]
+; CHECK-NEXT:    [[Y_NONNEG:%.*]] = sub i32 0, [[Y]]
+; CHECK-NEXT:    [[DIV1:%.*]] = udiv i32 [[X_NONNEG]], [[Y_NONNEG]]
+; CHECK-NEXT:    ret i32 [[DIV1]]
 ;
   %c0 = icmp sle i32 %x, 0
   call void @llvm.assume(i1 %c0)

From 4b19cccbb5d8d77750da96cef2daefa6c28b0e37 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo@fhahn.com>
Date: Sat, 18 Jul 2020 15:59:51 +0100
Subject: [PATCH 720/771] [PredicateInfo] Fold PredicateWithCondition into
 PredicateBase (NFC).

Each concrete instance of a predicate has a condition (also noted in the
original PredicateBase comment) and to me it seems like there is no
clear benefit of having both PredicateBase and PredicateWithCondition
and they can be folded together.

Reviewers: nikic, efriedma

Reviewed By: nikic

Differential Revision: https://reviews.llvm.org/D84089
---
 .../llvm/Transforms/Utils/PredicateInfo.h     | 24 +++++++------------
 llvm/lib/Transforms/Scalar/NewGVN.cpp         |  6 +----
 2 files changed, 10 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index 657b97c67a8b3..cdac4142555db 100644
--- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -83,37 +83,31 @@ class PredicateBase : public ilist_node<PredicateBase> {
   // predicates, this is different to OriginalOp which refers to the initial
   // operand.
   Value *RenamedOp;
+  // The condition associated with this predicate.
+  Value *Condition;
+
   PredicateBase(const PredicateBase &) = delete;
   PredicateBase &operator=(const PredicateBase &) = delete;
   PredicateBase() = delete;
   virtual ~PredicateBase() = default;
-
-protected:
-  PredicateBase(PredicateType PT, Value *Op) : Type(PT), OriginalOp(Op) {}
-};
-
-class PredicateWithCondition : public PredicateBase {
-public:
-  Value *Condition;
   static bool classof(const PredicateBase *PB) {
     return PB->Type == PT_Assume || PB->Type == PT_Branch ||
            PB->Type == PT_Switch;
   }
 
 protected:
-  PredicateWithCondition(PredicateType PT, Value *Op, Value *Condition)
-      : PredicateBase(PT, Op), Condition(Condition) {}
+  PredicateBase(PredicateType PT, Value *Op, Value *Condition)
+      : Type(PT), OriginalOp(Op), Condition(Condition) {}
 };
 
 // Provides predicate information for assumes.  Since assumes are always true,
 // we simply provide the assume instruction, so you can tell your relative
 // position to it.
-class PredicateAssume : public PredicateWithCondition {
+class PredicateAssume : public PredicateBase {
 public:
   IntrinsicInst *AssumeInst;
   PredicateAssume(Value *Op, IntrinsicInst *AssumeInst, Value *Condition)
-      : PredicateWithCondition(PT_Assume, Op, Condition),
-        AssumeInst(AssumeInst) {}
+      : PredicateBase(PT_Assume, Op, Condition), AssumeInst(AssumeInst) {}
   PredicateAssume() = delete;
   static bool classof(const PredicateBase *PB) {
     return PB->Type == PT_Assume;
@@ -123,7 +117,7 @@ class PredicateAssume : public PredicateWithCondition {
 // Mixin class for edge predicates.  The FROM block is the block where the
 // predicate originates, and the TO block is the block where the predicate is
 // valid.
-class PredicateWithEdge : public PredicateWithCondition {
+class PredicateWithEdge : public PredicateBase {
 public:
   BasicBlock *From;
   BasicBlock *To;
@@ -135,7 +129,7 @@ class PredicateWithEdge : public PredicateWithCondition {
 protected:
   PredicateWithEdge(PredicateType PType, Value *Op, BasicBlock *From,
                     BasicBlock *To, Value *Cond)
-      : PredicateWithCondition(PType, Op, Cond), From(From), To(To) {}
+      : PredicateBase(PType, Op, Cond), From(From), To(To) {}
 };
 
 // Provides predicate information for branches.
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 0ed1773373a71..45d01cc1b5845 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1539,12 +1539,8 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
 
   LLVM_DEBUG(dbgs() << "Found predicate info from instruction !\n");
 
-  auto *PWC = dyn_cast<PredicateWithCondition>(PI);
-  if (!PWC)
-    return nullptr;
-
   auto *CopyOf = I->getOperand(0);
-  auto *Cond = PWC->Condition;
+  auto *Cond = PI->Condition;
 
   // If this a copy of the condition, it must be either true or false depending
   // on the predicate info type and edge.

From 3ab0f53ef3c9947288e04ee028818176df8f15b1 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 18 Jul 2020 08:59:05 -0700
Subject: [PATCH 721/771] [DebugInfo] Respect relocations when decoding
 DW_EH_PE_sdata4 & DW_EH_PE_sdata8 and support R_ARM_REL32

The addresses in llvm-dwarfdump --eh-frame output for object files are closer to readelf -wf output now.
---
 llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp    |  4 ++--
 llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp       |  6 +++---
 llvm/lib/Object/RelocationResolver.cpp             | 14 ++++++++++++--
 llvm/test/DebugInfo/AArch64/eh-frame.ll            |  3 +--
 llvm/test/DebugInfo/PowerPC/eh-frame.ll            |  5 ++---
 llvm/test/MC/Mips/eh-frame.s                       |  3 ++-
 .../llvm-dwarfdump/X86/debug_frame_offset.test     |  2 +-
 .../tools/llvm-objdump/MachO/eh_frame-arm64.test   |  2 +-
 .../tools/llvm-readobj/ELF/AArch64/dwarf-cfi.s     |  5 +++--
 llvm/test/tools/llvm-readobj/ELF/ARM/dwarf-cfi.s   |  5 +++--
 10 files changed, 30 insertions(+), 19 deletions(-)

diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
index 886fe1dff9769..fa0ceb4bbc01f 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDataExtractor.cpp
@@ -104,10 +104,10 @@ DWARFDataExtractor::getEncodedPointer(uint64_t *Offset, uint8_t Encoding,
     Result = getSigned(Offset, 2);
     break;
   case dwarf::DW_EH_PE_sdata4:
-    Result = getSigned(Offset, 4);
+    Result = SignExtend64<32>(getRelocatedValue(4, Offset));
     break;
   case dwarf::DW_EH_PE_sdata8:
-    Result = getSigned(Offset, 8);
+    Result = getRelocatedValue(8, Offset);
     break;
   default:
     return None;
diff --git a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
index 0a1b75592290c..ba7449baaf7f0 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFDebugFrame.cpp
@@ -521,9 +521,9 @@ Error DWARFDebugFrame::parse(DWARFDataExtractor Data) {
                                    "parsing FDE data at 0x%" PRIx64
                                    " failed due to missing CIE",
                                    StartOffset);
-        if (auto Val = Data.getEncodedPointer(
-                &Offset, Cie->getFDEPointerEncoding(),
-                EHFrameAddress ? EHFrameAddress + Offset : 0)) {
+        if (auto Val =
+                Data.getEncodedPointer(&Offset, Cie->getFDEPointerEncoding(),
+                                       EHFrameAddress + Offset)) {
           InitialLocation = *Val;
         }
         if (auto Val = Data.getEncodedPointer(
diff --git a/llvm/lib/Object/RelocationResolver.cpp b/llvm/lib/Object/RelocationResolver.cpp
index 919799a25c6f2..83170bbc4e6d9 100644
--- a/llvm/lib/Object/RelocationResolver.cpp
+++ b/llvm/lib/Object/RelocationResolver.cpp
@@ -291,12 +291,22 @@ static uint64_t resolvePPC32(RelocationRef R, uint64_t S, uint64_t A) {
 }
 
 static bool supportsARM(uint64_t Type) {
-  return Type == ELF::R_ARM_ABS32;
+  switch (Type) {
+  case ELF::R_ARM_ABS32:
+  case ELF::R_ARM_REL32:
+    return true;
+  default:
+    return false;
+  }
 }
 
 static uint64_t resolveARM(RelocationRef R, uint64_t S, uint64_t A) {
-  if (R.getType() == ELF::R_ARM_ABS32)
+  switch (R.getType()) {
+  case ELF::R_ARM_ABS32:
     return (S + A) & 0xFFFFFFFF;
+  case ELF::R_ARM_REL32:
+    return (S + A - R.getOffset()) & 0xFFFFFFFF;
+  }
   llvm_unreachable("Invalid relocation type");
 }
 
diff --git a/llvm/test/DebugInfo/AArch64/eh-frame.ll b/llvm/test/DebugInfo/AArch64/eh-frame.ll
index 9651159271e55..1becd769d52fc 100644
--- a/llvm/test/DebugInfo/AArch64/eh-frame.ll
+++ b/llvm/test/DebugInfo/AArch64/eh-frame.ll
@@ -7,8 +7,7 @@
 
 ; CHECK-NOT:  warning:
 ; CHECK: FDE cie=00000000 pc=00000000...00000004
-;; TODO Take relocation into consideration
-; CHECK: FDE cie=00000000 pc=00000000...00000004
+; CHECK: FDE cie=00000000 pc=00000004...00000008
 
 define void @foo() {
 entry:
diff --git a/llvm/test/DebugInfo/PowerPC/eh-frame.ll b/llvm/test/DebugInfo/PowerPC/eh-frame.ll
index 3a8f7df6b61a9..36b1c272f94cc 100644
--- a/llvm/test/DebugInfo/PowerPC/eh-frame.ll
+++ b/llvm/test/DebugInfo/PowerPC/eh-frame.ll
@@ -7,8 +7,7 @@
 
 ; PPC-NOT: warning:
 ; PPC: FDE cie=00000000 pc=00000000...00000004
-;; TODO Take relocation into consideration
-; PPC: FDE cie=00000000 pc=00000000...00000004
+; PPC: FDE cie=00000000 pc=00000004...00000008
 
 ; RUN: llc -filetype=obj -mtriple=ppc64 %s -o %t64.o
 ; RUN: llvm-readobj -r %t64.o | FileCheck %s --check-prefix=PPC64_REL
@@ -19,7 +18,7 @@
 
 ; PPC64-NOT: warning:
 ; PPC64: FDE cie=00000000 pc=00000000...00000010
-; PPC64: FDE cie=00000000 pc=00000000...00000010
+; PPC64: FDE cie=00000000 pc=00000010...00000020
 
 ; RUN: llc -filetype=obj -mtriple=ppc64le -code-model=large %s -o %t64l.o
 ; RUN: llvm-readobj -r %t64l.o | FileCheck %s --check-prefix=PPC64L_REL
diff --git a/llvm/test/MC/Mips/eh-frame.s b/llvm/test/MC/Mips/eh-frame.s
index 5be0d709a896c..024b9e6ac4889 100644
--- a/llvm/test/MC/Mips/eh-frame.s
+++ b/llvm/test/MC/Mips/eh-frame.s
@@ -68,7 +68,8 @@ func:
 // DWARF32-EMPTY:
 // DWARF32-NEXT:     DW_CFA_def_cfa_register: reg29
 //
-// DWARF32: 00000014 00000010 00000018 FDE cie=00000000 pc=00000000...00000000
+// DWARF32_ABS: 00000014 00000010 00000018 FDE cie=00000000 pc=00000000...00000000
+// DWARF32_PIC: 00000014 00000010 00000018 FDE cie=00000000 pc=0000001c...0000001c
 // DWARF32-NEXT:     Format:       DWARF32
 // DWARF32-NEXT:     DW_CFA_nop:
 // DWARF32-NEXT:     DW_CFA_nop:
diff --git a/llvm/test/tools/llvm-dwarfdump/X86/debug_frame_offset.test b/llvm/test/tools/llvm-dwarfdump/X86/debug_frame_offset.test
index 598f80379c38f..27e5dfa622b31 100644
--- a/llvm/test/tools/llvm-dwarfdump/X86/debug_frame_offset.test
+++ b/llvm/test/tools/llvm-dwarfdump/X86/debug_frame_offset.test
@@ -9,7 +9,7 @@ CHECK-NOT: pc
 RUN: llvm-dwarfdump %p/../../dsymutil/Inputs/basic1.macho.x86_64.o \
 RUN:   -eh-frame=0x00000018 | FileCheck %s --check-prefix=EH
 EH: .eh_frame contents:
-EH-NEXT: 00000018 00000024 0000001c FDE cie=00000000 pc=fffffffffffffd00...fffffffffffffd24
+EH-NEXT: 00000018 00000024 0000001c FDE cie=00000000 pc=fffffffffffffd20...fffffffffffffd44
 EH-NEXT:   Format:       DWARF32
 EH-NEXT:   DW_CFA_advance_loc: 1
 EH-NOT: pc
diff --git a/llvm/test/tools/llvm-objdump/MachO/eh_frame-arm64.test b/llvm/test/tools/llvm-objdump/MachO/eh_frame-arm64.test
index 1768d019597a5..31f87a5035e7e 100644
--- a/llvm/test/tools/llvm-objdump/MachO/eh_frame-arm64.test
+++ b/llvm/test/tools/llvm-objdump/MachO/eh_frame-arm64.test
@@ -12,7 +12,7 @@
 
 # CHECK:   DW_CFA_def_cfa: reg31 +0
 
-# CHECK: 00000014 00000020 00000018 FDE cie=00000000 pc=ffffffffffffffe4...00000004
+# CHECK: 00000014 00000020 00000018 FDE cie=00000000 pc=00000000...00000020
 # CHECK:   DW_CFA_advance_loc: 8
 # CHECK:   DW_CFA_def_cfa_offset: +16
 # CHECK:   DW_CFA_offset: reg30 -8
diff --git a/llvm/test/tools/llvm-readobj/ELF/AArch64/dwarf-cfi.s b/llvm/test/tools/llvm-readobj/ELF/AArch64/dwarf-cfi.s
index f129546787602..cefc00171544e 100644
--- a/llvm/test/tools/llvm-readobj/ELF/AArch64/dwarf-cfi.s
+++ b/llvm/test/tools/llvm-readobj/ELF/AArch64/dwarf-cfi.s
@@ -10,9 +10,10 @@
 # CHECK:        Program:
 # CHECK-NEXT: DW_CFA_def_cfa: reg31 +0
 
+## FIXME Use getEHFrameSection() so that the address is decoded correctly.
 # CHECK:      [0x14] FDE length=16 cie=[0x0]
-# CHECK-NEXT:   initial_location: 0x0
-# CHECK-NEXT:   address_range: 0x4 (end : 0x4)
+# CHECK-NEXT:   initial_location: 0x1c
+# CHECK-NEXT:   address_range: 0x4 (end : 0x20)
 
 # CHECK:        Program:
 # CHECK-NEXT: DW_CFA_nop:
diff --git a/llvm/test/tools/llvm-readobj/ELF/ARM/dwarf-cfi.s b/llvm/test/tools/llvm-readobj/ELF/ARM/dwarf-cfi.s
index 36d71d61c9f0a..227ac263b4c6b 100644
--- a/llvm/test/tools/llvm-readobj/ELF/ARM/dwarf-cfi.s
+++ b/llvm/test/tools/llvm-readobj/ELF/ARM/dwarf-cfi.s
@@ -10,9 +10,10 @@
 # CHECK:        Program:
 # CHECK-NEXT: DW_CFA_def_cfa: reg13 +0
 
+## FIXME Use getEHFrameSection() so that the address is decoded correctly.
 # CHECK:      [0x14] FDE length=16 cie=[0x0]
-# CHECK-NEXT:   initial_location: 0x0
-# CHECK-NEXT:   address_range: 0x4 (end : 0x4)
+# CHECK-NEXT:   initial_location: 0x1c
+# CHECK-NEXT:   address_range: 0x4 (end : 0x20)
 
 # CHECK:        Program:
 # CHECK-NEXT: DW_CFA_nop:

From 6d7ec54170f9ef68710f484299caa1a6dd42ff48 Mon Sep 17 00:00:00 2001
From: Anders Waldenborg <anders@0x63.nu>
Date: Sat, 18 Jul 2020 17:26:06 +0200
Subject: [PATCH 722/771] [clang-format] Make sure rst documentation matches
 comments

clang/docs/tools/dump_format_style.py is used to read the comments
from clang/include/clang/Format/Format.h and update the contents of
clang/docs/ClangFormatStyleOptions.rst

Recent changes made these out of date. This commit syncs them by
folding the improved wording back to the comments and then
regenerating the rst file.

Differential Revision: https://reviews.llvm.org/D84103
---
 clang/docs/ClangFormatStyleOptions.rst |  9 +++++----
 clang/include/clang/Format/Format.h    | 11 ++++++++---
 2 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst
index e84676760c300..6647b117ac596 100644
--- a/clang/docs/ClangFormatStyleOptions.rst
+++ b/clang/docs/ClangFormatStyleOptions.rst
@@ -2694,8 +2694,11 @@ the configuration (without a prefix: ``Auto``).
     Use tabs whenever we need to fill whitespace that spans at least from
     one tab stop to the next one.
 
+
+
 **WhitespaceSensitiveMacros** (``std::vector<std::string>``)
-  A vector of macros which are whitespace-sensitive and should not be touched.
+  A vector of macros which are whitespace-sensitive and should not
+  be touched.
 
   These are expected to be macros of the form:
 
@@ -2709,9 +2712,7 @@ the configuration (without a prefix: ``Auto``).
 
     WhitespaceSensitiveMacros: ['STRINGIZE', 'PP_STRINGIZE']
 
-  For example: BOOST_PP_STRINGIZE.
-
-
+  For example: BOOST_PP_STRINGIZE
 
 .. END_FORMAT_STYLE_OPTIONS
 
diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h
index 3549ec9eee0e5..7201c11f1158e 100755
--- a/clang/include/clang/Format/Format.h
+++ b/clang/include/clang/Format/Format.h
@@ -1425,15 +1425,20 @@ struct FormatStyle {
   /// For example: TESTSUITE
   std::vector<std::string> NamespaceMacros;
 
-  /// A vector of macros which are whitespace-sensitive and shouldn't be
-  /// touched.
+  /// A vector of macros which are whitespace-sensitive and should not
+  /// be touched.
   ///
   /// These are expected to be macros of the form:
   /// \code
   ///   STRINGIZE(...)
   /// \endcode
   ///
-  /// For example: STRINGIZE
+  /// In the .clang-format configuration file, this can be configured like:
+  /// \code{.yaml}
+  ///   WhitespaceSensitiveMacros: ['STRINGIZE', 'PP_STRINGIZE']
+  /// \endcode
+  ///
+  /// For example: BOOST_PP_STRINGIZE
   std::vector<std::string> WhitespaceSensitiveMacros;
 
   tooling::IncludeStyle IncludeStyle;

From 7099a4b56bebf44158d413ddb2b4879e9060f9ce Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 18 Jul 2020 09:07:40 -0700
Subject: [PATCH 723/771] [ELF][test] Update test after
 DW_EH_PE_sdata4/DW_EH_PE_sdata8 change

---
 lld/test/ELF/eh-frame-hdr-augmentation.s | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lld/test/ELF/eh-frame-hdr-augmentation.s b/lld/test/ELF/eh-frame-hdr-augmentation.s
index dbec54a1a8a22..9c9b862ae80d5 100644
--- a/lld/test/ELF/eh-frame-hdr-augmentation.s
+++ b/lld/test/ELF/eh-frame-hdr-augmentation.s
@@ -20,7 +20,8 @@
 // CHECK-NEXT: DW_CFA_nop:
 // CHECK-NEXT: DW_CFA_nop:
 
-// CHECK:      00000020 00000014 00000024 FDE cie=00000000 pc=00001014...00001014
+/// FIXME Handle relocation correctly
+// CHECK:      00000020 00000014 00000024 FDE cie=00000000 pc=0000103c...0000103c
 // CHECK-NEXT:   Format:       DWARF32
 // CHECK-NEXT:   LSDA Address: 000000000000100b
 // CHECK-NEXT:   DW_CFA_nop:

From 9548697df9c6f65a3dba4e8de4d015650e73101c Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Sat, 18 Jul 2020 16:12:41 +0000
Subject: [PATCH 724/771] Fix Markdown format for lists in the Standard Dialect
 documentation

This affects the rendering on the website.
---
 .../mlir/Dialect/StandardOps/IR/Ops.td        | 39 ++++++++++---------
 1 file changed, 21 insertions(+), 18 deletions(-)

diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index c3e3ada8cd40e..1b525455dd8dc 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -2567,15 +2567,16 @@ def SubViewOp : Std_Op<"subview", [
     the operation's offsets, sizes and strides arguments.
 
     The SubView operation supports the following arguments:
-    *) Memref: the "base" memref on which to create a "view" memref.
-    *) Offsets: memref-rank number of dynamic offsets or static integer
-                attributes into the "base" memref at which to create the "view"
-                memref.
-    *) Sizes: memref-rank number of dynamic sizes or static integer attributes
-              which specify the sizes of the result "view" memref type.
-    *) Strides: memref-rank number of dynamic strides or static integer
-                attributes multiplicatively to the base memref strides in each
-                dimension.
+
+    * Memref: the "base" memref on which to create a "view" memref.
+    * Offsets: memref-rank number of dynamic offsets or static integer
+               attributes into the "base" memref at which to create the "view"
+               memref.
+    * Sizes: memref-rank number of dynamic sizes or static integer attributes
+             which specify the sizes of the result "view" memref type.
+    * Strides: memref-rank number of dynamic strides or static integer
+               attributes multiplicatively to the base memref strides in each
+               dimension.
 
     Example 1:
 
@@ -3129,21 +3130,23 @@ def ViewOp : Std_Op<"view", [
     The "view" operation extracts an N-D contiguous memref with empty layout map
     with arbitrary element type from a 1-D contiguous memref with empty layout
     map of i8 element  type. The ViewOp supports the following arguments:
-    *) A single dynamic byte-shift operand must be specified which represents a
-       a shift of the base 1-D memref pointer from which to create the resulting
-       contiguous memref view with identity layout.
-    *) A dynamic size operand that must be specified for each dynamic dimension
-       in the resulting view memref type.
+
+    * A single dynamic byte-shift operand must be specified which represents a
+      a shift of the base 1-D memref pointer from which to create the resulting
+      contiguous memref view with identity layout.
+    * A dynamic size operand that must be specified for each dynamic dimension
+      in the resulting view memref type.
 
     The "view" operation gives a structured indexing form to a flat 1-D buffer.
     Unlike "subview" it can perform a type change. The type change behavior
     requires the op to have special semantics because, e.g. a byte shift of 3
     cannot be represented as an offset on f64.
     For now, a "view" op:
-      1) Only takes a contiguous source memref with 0 offset and empty layout.
-      2) Must specify a byte_shift operand (in the future, a special integer
-      attribute may be added to support the folded case).
-      3) Returns a contiguous memref with 0 offset and empty layout.
+
+    1. Only takes a contiguous source memref with 0 offset and empty layout.
+    2. Must specify a byte_shift operand (in the future, a special integer
+       attribute may be added to support the folded case).
+    3. Returns a contiguous memref with 0 offset and empty layout.
 
     Example:
 

From 570a3977de9253bd93171ebd456bfd345d08e4eb Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Sat, 18 Jul 2020 16:22:31 +0000
Subject: [PATCH 725/771] Fix dead link on MLIR website

---
 mlir/include/mlir/Dialect/StandardOps/IR/Ops.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index 1b525455dd8dc..cde317065ffee 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -1700,7 +1700,7 @@ def LoadOp : Std_Op<"load",
 
     In an `affine.if` or `affine.for` body, the indices of a load are restricted
     to SSA values bound to surrounding loop induction variables,
-    [symbols](../Affine.md#dimensions-and-symbols), results of a
+    [symbols](AffineOps.md#dimensions-and-symbols), results of a
     [`constant` operation](#stdconstant-constantop), or the result of an
     `affine.apply` operation that can in turn take as arguments all of the
     aforementioned SSA values or the recursively result of such an

From 9dceb32f300d021bf6ace6d27ffaf670799e2e8d Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sat, 18 Jul 2020 19:41:21 +0300
Subject: [PATCH 726/771] [NFC][CVP] processSDiv(): pacify gcc compilers

---
 .../Transforms/Scalar/CorrelatedValuePropagation.cpp | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index fb7a005708e56..48968166c605f 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -697,14 +697,14 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
 
   struct Operand {
     Value *V;
-    Domain Domain;
+    Domain D;
   };
   std::array<Operand, 2> Ops;
-  for (const auto &I : zip(Ops, SDI->operands())) {
+  for (const auto I : zip(Ops, SDI->operands())) {
     Operand &Op = std::get<0>(I);
     Op.V = std::get<1>(I);
-    Op.Domain = getDomain(Op.V);
-    if (Op.Domain == Domain::Unknown)
+    Op.D = getDomain(Op.V);
+    if (Op.D == Domain::Unknown)
       return false;
   }
 
@@ -713,7 +713,7 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
 
   // We need operands to be non-negative, so negate each one that isn't.
   for (Operand &Op : Ops) {
-    if (Op.Domain == Domain::NonNegative)
+    if (Op.D == Domain::NonNegative)
       continue;
     auto *BO =
         BinaryOperator::CreateNeg(Op.V, Op.V->getName() + ".nonneg", SDI);
@@ -729,7 +729,7 @@ static bool processSDiv(BinaryOperator *SDI, LazyValueInfo *LVI) {
   Value *Res = UDiv;
 
   // If the operands had two different domains, we need to negate the result.
-  if (Ops[0].Domain != Ops[1].Domain)
+  if (Ops[0].D != Ops[1].D)
     Res = BinaryOperator::CreateNeg(Res, Res->getName() + ".neg", SDI);
 
   SDI->replaceAllUsesWith(Res);

From 3bbbe4c4b6c8e20538a388df164da6f8d935e0cc Mon Sep 17 00:00:00 2001
From: Joseph Huber <jhuber6@vols.utk.edu>
Date: Sat, 18 Jul 2020 12:00:08 -0400
Subject: [PATCH 727/771] [OpenMP] Add Additional Function Attribute
 Information to OMPKinds.def

Summary:
This patch adds more function attribute information to the runtime function definitions in OMPKinds.def. The goal is to provide sufficient information about OpenMP runtime functions to perform more optimizations on OpenMP code.

Reviewers: jdoerfert

Subscribers: aaron.ballman cfe-commits yaxunl guansong sstefan1 llvm-commits

Tags: #OpenMP #clang #LLVM

Differential Revision: https://reviews.llvm.org/D81031
---
 clang/test/OpenMP/barrier_codegen.cpp         |   2 +-
 .../include/llvm/Frontend/OpenMP/OMPKinds.def | 675 +++++++++-------
 llvm/test/Transforms/OpenMP/add_attributes.ll | 745 ++++++++++++------
 .../Transforms/OpenMP/parallel_deletion.ll    |   2 +-
 4 files changed, 903 insertions(+), 521 deletions(-)

diff --git a/clang/test/OpenMP/barrier_codegen.cpp b/clang/test/OpenMP/barrier_codegen.cpp
index f84a26380df9e..35b2ed7212761 100644
--- a/clang/test/OpenMP/barrier_codegen.cpp
+++ b/clang/test/OpenMP/barrier_codegen.cpp
@@ -46,7 +46,7 @@ int main(int argc, char **argv) {
 // IRBUILDER:          ; Function Attrs: nounwind
 // IRBUILDER-NEXT:     declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #
 // IRBUILDER_OPT:      ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
-// IRBUILDER_OPT-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #
+// IRBUILDER_OPT-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly) #
 
 // CHECK: define {{.+}} [[TMAIN_INT]](
 // CHECK: [[GTID:%.+]] = call i32 @__kmpc_global_thread_num([[IDENT_T]]* [[LOC]])
diff --git a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
index 0dc2b34f2e4d6..4f2fcb8af5d1d 100644
--- a/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
+++ b/llvm/include/llvm/Frontend/OpenMP/OMPKinds.def
@@ -383,7 +383,8 @@ __OMP_RTL(__kmpc_push_proc_bind, false, Void, IdentPtr, Int32, /* Int */ Int32)
 __OMP_RTL(__kmpc_serialized_parallel, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
 __OMP_RTL(__kmpc_omp_reg_task_with_affinity, false, Int32, IdentPtr, Int32,
-          Int8Ptr, Int32, Int8Ptr)
+          /* kmp_task_t */ VoidPtr, Int32,
+          /* kmp_task_affinity_info_t */ VoidPtr)
 
 __OMP_RTL(omp_get_thread_num, false, Int32, )
 __OMP_RTL(omp_get_num_threads, false, Int32, )
@@ -430,8 +431,7 @@ __OMP_RTL(__kmpc_reduce, false, Int32, IdentPtr, Int32, Int32, SizeTy, VoidPtr,
           ReduceFunctionPtr, KmpCriticalNamePtrTy)
 __OMP_RTL(__kmpc_reduce_nowait, false, Int32, IdentPtr, Int32, Int32, SizeTy,
           VoidPtr, ReduceFunctionPtr, KmpCriticalNamePtrTy)
-__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32,
-          KmpCriticalNamePtrTy)
+__OMP_RTL(__kmpc_end_reduce, false, Void, IdentPtr, Int32, KmpCriticalNamePtrTy)
 __OMP_RTL(__kmpc_end_reduce_nowait, false, Void, IdentPtr, Int32,
           KmpCriticalNamePtrTy)
 
@@ -514,10 +514,10 @@ __OMP_RTL(__kmpc_taskloop, false, Void, IdentPtr, /* Int */ Int32, VoidPtr,
           /* Int */ Int32, Int64, VoidPtr)
 __OMP_RTL(__kmpc_omp_target_task_alloc, false, /* kmp_task_t */ VoidPtr,
           IdentPtr, Int32, Int32, SizeTy, SizeTy, TaskRoutineEntryPtr, Int64)
-__OMP_RTL(__kmpc_taskred_modifier_init, false, VoidPtr, IdentPtr,
-          /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr)
-__OMP_RTL(__kmpc_taskred_init, false, VoidPtr, /* Int */ Int32,
-          /* Int */ Int32, VoidPtr)
+__OMP_RTL(__kmpc_taskred_modifier_init, false, /* kmp_taskgroup */ VoidPtr,
+          IdentPtr, /* Int */ Int32, /* Int */ Int32, /* Int */ Int32, VoidPtr)
+__OMP_RTL(__kmpc_taskred_init, false, /* kmp_taskgroup */ VoidPtr,
+          /* Int */ Int32, /* Int */ Int32, VoidPtr)
 __OMP_RTL(__kmpc_task_reduction_modifier_fini, false, Void, IdentPtr,
           /* Int */ Int32, /* Int */ Int32)
 __OMP_RTL(__kmpc_task_reduction_get_th_data, false, VoidPtr, Int32, VoidPtr,
@@ -594,7 +594,9 @@ __OMP_RTL(__last, false, Void, )
 #undef __OMP_RTL
 #undef OMP_RTL
 
+#define ParamAttrs(...) ArrayRef<AttributeSet>({__VA_ARGS__})
 #define EnumAttr(Kind) Attribute::get(Ctx, Attribute::AttrKind::Kind)
+#define EnumAttrInt(Kind, N) Attribute::get(Ctx, Attribute::AttrKind::Kind, N)
 #define AttributeSet(...)                                                      \
   AttributeSet::get(Ctx, ArrayRef<Attribute>({__VA_ARGS__}))
 
@@ -607,19 +609,94 @@ __OMP_RTL(__last, false, Void, )
 __OMP_ATTRS_SET(GetterAttrs,
                 OptimisticAttributes
                     ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(ReadOnly),
-                                   EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly))
+                                   EnumAttr(NoSync), EnumAttr(NoFree),
+                                   EnumAttr(InaccessibleMemOnly),
+                                   EnumAttr(WillReturn))
                     : AttributeSet(EnumAttr(NoUnwind)))
 __OMP_ATTRS_SET(GetterArgWriteAttrs,
                 OptimisticAttributes
                     ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
-                                   EnumAttr(NoFree), EnumAttr(InaccessibleMemOrArgMemOnly))
+                                   EnumAttr(NoFree),
+                                   EnumAttr(InaccessibleMemOrArgMemOnly),
+                                   EnumAttr(WillReturn))
                     : AttributeSet(EnumAttr(NoUnwind)))
 __OMP_ATTRS_SET(SetterAttrs,
                 OptimisticAttributes
                     ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(WriteOnly),
-                                   EnumAttr(NoSync), EnumAttr(NoFree), EnumAttr(InaccessibleMemOnly))
+                                   EnumAttr(NoSync), EnumAttr(NoFree),
+                                   EnumAttr(InaccessibleMemOnly),
+                                   EnumAttr(WillReturn))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(DefaultAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+                                   EnumAttr(WillReturn), EnumAttr(NoFree))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(BarrierAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind))
                     : AttributeSet(EnumAttr(NoUnwind)))
 
+__OMP_ATTRS_SET(InaccessibleArgOnlyAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+                                   EnumAttr(InaccessibleMemOrArgMemOnly),
+                                   EnumAttr(WillReturn), EnumAttr(NoFree))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+
+#if 0
+__OMP_ATTRS_SET(InaccessibleOnlyAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+                                   EnumAttr(InaccessibleMemOnly),
+                                   EnumAttr(WillReturn), EnumAttr(NoFree))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+#endif
+
+__OMP_ATTRS_SET(AllocAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoUnwind), EnumAttr(NoSync),
+                                   EnumAttr(WillReturn))
+                    : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(ForkAttrs, OptimisticAttributes
+                               ? AttributeSet(EnumAttr(NoUnwind))
+                               : AttributeSet(EnumAttr(NoUnwind)))
+
+__OMP_ATTRS_SET(ReadOnlyPtrAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(ReadOnly), EnumAttr(NoFree),
+                                   EnumAttr(NoCapture))
+                    : AttributeSet())
+
+#if 0
+__OMP_ATTRS_SET(WriteOnlyPtrAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(WriteOnly), EnumAttr(NoFree),
+                                   EnumAttr(NoCapture))
+                    : AttributeSet())
+#endif
+
+__OMP_ATTRS_SET(ArgPtrAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoCapture), EnumAttr(NoFree))
+                    : AttributeSet())
+
+__OMP_ATTRS_SET(ReturnPtrAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoAlias))
+                    : AttributeSet())
+
+#if 0
+__OMP_ATTRS_SET(ReturnAlignedPtrAttrs,
+                OptimisticAttributes
+                    ? AttributeSet(EnumAttr(NoAlias), EnumAttrInt(Alignment, 8),
+                                   EnumAttrInt(DereferenceableOrNull, 8))
+                    : AttributeSet())
+#endif
+
 #undef __OMP_ATTRS_SET
 #undef OMP_ATTRS_SET
 
@@ -630,295 +707,309 @@ __OMP_ATTRS_SET(SetterAttrs,
 #define __OMP_RTL_ATTRS(Name, FnAttrSet, RetAttrSet, ArgAttrSets)              \
   OMP_RTL_ATTRS(OMPRTL_##Name, FnAttrSet, RetAttrSet, ArgAttrSets)
 
-__OMP_RTL_ATTRS(__kmpc_barrier, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_cancel,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_cancel_barrier, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_flush, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_fork_call, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_taskwait, AttributeSet(), AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_taskyield,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_push_num_threads,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_push_proc_bind,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_serialized_parallel,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_schedule, GetterArgWriteAttrs, AttributeSet(),
-                ArrayRef<AttributeSet>(
-                    {AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)),
-                     AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))}))
-__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_barrier, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_cancel, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_cancel_barrier, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_flush, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_global_thread_num, GetterAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_fork_call, ForkAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_taskwait, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_taskyield, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_push_num_threads, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_push_proc_bind, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_serialized_parallel, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_serialized_parallel, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_reg_task_with_affinity, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs,
+                           AttributeSet(), ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(omp_get_thread_num, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_num_threads, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_max_threads, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_in_parallel, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_dynamic, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_cancellation, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_nested, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(
+    omp_get_schedule, GetterArgWriteAttrs, AttributeSet(),
+    ParamAttrs(AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly)),
+               AttributeSet(EnumAttr(NoCapture), EnumAttr(WriteOnly))))
+__OMP_RTL_ATTRS(omp_get_thread_limit, GetterAttrs, AttributeSet(), ParamAttrs())
 __OMP_RTL_ATTRS(omp_get_supported_active_levels, GetterAttrs, AttributeSet(),
-                {})
-__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), {})
+                ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_max_active_levels, GetterAttrs, AttributeSet(),
+                ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_level, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_ancestor_thread_num, GetterAttrs, AttributeSet(),
+                ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_team_size, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_active_level, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_in_final, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_proc_bind, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_num_places, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_num_procs, GetterAttrs, AttributeSet(), ParamAttrs())
 __OMP_RTL_ATTRS(omp_get_place_proc_ids, GetterArgWriteAttrs, AttributeSet(),
-                ArrayRef<AttributeSet>({AttributeSet(),
-                                        AttributeSet(EnumAttr(NoCapture),
-                                                     EnumAttr(WriteOnly))}))
-__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(), {})
-
-__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), {})
-__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_master,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_master,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_critical,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_critical_with_hint,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_critical,
-                AttributeSet(EnumAttr(InaccessibleMemOrArgMemOnly)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_begin, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_reduce, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_reduce, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_ordered, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_ordered, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_for_static_fini, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_single, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_single, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_end_taskgroup, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskgroup, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskloop, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_taskred_init,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_init,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_cancellationpoint, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_fork_teams, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_push_num_teams, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_copyprivate, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_threadprivate_register, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_doacross_init, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_doacross_post, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_doacross_wait, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_doacross_fini, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_alloc, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_free, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_init_allocator, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-
-__OMP_RTL_ATTRS(__kmpc_push_target_tripcount,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_nowait_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_teams_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_register_requires,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_end_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_update_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_mapper_num_components,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__tgt_push_mapper_component,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
-__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event,
-                AttributeSet(EnumAttr(NoUnwind)),
-                AttributeSet(), {})
+                ParamAttrs(AttributeSet(), AttributeSet(EnumAttr(NoCapture),
+                                                        EnumAttr(WriteOnly))))
+__OMP_RTL_ATTRS(omp_get_place_num, GetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_partition_num_places, GetterAttrs, AttributeSet(),
+                ParamAttrs())
+__OMP_RTL_ATTRS(omp_get_partition_place_nums, GetterAttrs, AttributeSet(),
+                ParamAttrs())
+
+__OMP_RTL_ATTRS(omp_set_num_threads, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_dynamic, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_nested, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_schedule, SetterAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(omp_set_max_active_levels, SetterAttrs, AttributeSet(),
+                ParamAttrs())
+
+__OMP_RTL_ATTRS(__kmpc_master, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_master, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_critical, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_critical_with_hint, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_critical, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+
+__OMP_RTL_ATTRS(__kmpc_begin, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_reduce, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), ReadOnlyPtrAttrs, AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_reduce_nowait, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), ReadOnlyPtrAttrs, AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_reduce, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_reduce_nowait, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+
+__OMP_RTL_ATTRS(__kmpc_ordered, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_ordered, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_for_static_init_4, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_init_4u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_init_8, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_for_static_fini, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_4u, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_dispatch_init_8u, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_4, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_4u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_8, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_init_8u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_4, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_4u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_8, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_next_8u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_4u, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dispatch_fini_8u, InaccessibleArgOnlyAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_4, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_4u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_8, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_team_static_init_8u, GetterArgWriteAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ArgPtrAttrs,
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_4u, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           ArgPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_dist_for_static_init_8u, GetterArgWriteAttrs,
+                AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs, ArgPtrAttrs,
+                           ArgPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_single, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_end_single, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_omp_task_alloc, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_end_taskgroup, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_taskgroup, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task_begin_if0, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task_complete_if0, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_omp_task_with_deps, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), ReadOnlyPtrAttrs, AttributeSet(),
+                           ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_taskloop, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), ArgPtrAttrs, ArgPtrAttrs,
+                           AttributeSet(), AttributeSet(), AttributeSet(),
+                           AttributeSet(), AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_omp_target_task_alloc, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           AttributeSet(), AttributeSet(), ReadOnlyPtrAttrs,
+                           AttributeSet()))
+__OMP_RTL_ATTRS(__kmpc_taskred_modifier_init, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_taskred_init, DefaultAttrs, AttributeSet(), ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_fini, BarrierAttrs,
+                AttributeSet(), ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_task_reduction_get_th_data, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_task_reduction_init, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_task_reduction_modifier_init, DefaultAttrs,
+                ReturnPtrAttrs, ParamAttrs())
+__OMP_RTL_ATTRS(__kmpc_proxy_task_completed_ooo, DefaultAttrs, AttributeSet(),
+                ParamAttrs())
+
+__OMP_RTL_ATTRS(__kmpc_omp_wait_deps, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_cancellationpoint, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_fork_teams, ForkAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_push_num_teams, InaccessibleArgOnlyAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_copyprivate, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), AttributeSet(),
+                           ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_threadprivate_cached, DefaultAttrs, ReturnPtrAttrs,
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_threadprivate_register, DefaultAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs,
+                           ReadOnlyPtrAttrs, ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_doacross_init, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_doacross_post, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_doacross_wait, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs, AttributeSet(), ReadOnlyPtrAttrs))
+__OMP_RTL_ATTRS(__kmpc_doacross_fini, BarrierAttrs, AttributeSet(),
+                ParamAttrs(ReadOnlyPtrAttrs))
+
+__OMP_RTL_ATTRS(__kmpc_alloc, DefaultAttrs, ReturnPtrAttrs, {})
+__OMP_RTL_ATTRS(__kmpc_free, AllocAttrs, AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_init_allocator, DefaultAttrs, ReturnPtrAttrs, {})
+__OMP_RTL_ATTRS(__kmpc_destroy_allocator, AllocAttrs, AttributeSet(), {})
+
+__OMP_RTL_ATTRS(__kmpc_push_target_tripcount, SetterAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_nowait_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_teams_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_teams_nowait_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_register_requires, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_begin_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_begin_nowait_mapper, ForkAttrs,
+        AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_end_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_end_nowait_mapper, ForkAttrs,
+        AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_update_mapper, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_target_data_update_nowait_mapper, ForkAttrs,
+        AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_mapper_num_components, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__tgt_push_mapper_component, ForkAttrs, AttributeSet(), {})
+__OMP_RTL_ATTRS(__kmpc_task_allow_completion_event, DefaultAttrs,
+                ReturnPtrAttrs, ParamAttrs(ReadOnlyPtrAttrs))
 
 #undef __OMP_RTL_ATTRS
 #undef OMP_RTL_ATTRS
 #undef AttributeSet
 #undef EnumAttr
+#undef EnumAttrInt
+#undef ParamAttrs
 
 ///}
 
diff --git a/llvm/test/Transforms/OpenMP/add_attributes.ll b/llvm/test/Transforms/OpenMP/add_attributes.ll
index 6e10613ed43c3..e92447d79feac 100644
--- a/llvm/test/Transforms/OpenMP/add_attributes.ll
+++ b/llvm/test/Transforms/OpenMP/add_attributes.ll
@@ -890,373 +890,373 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; CHECK: ; Function Attrs: nounwind
 ; CHECK-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #0
 
-; CHECK-NOT: Function Attrs
-; CHECK: declare void @__kmpc_barrier(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_barrier(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_cancel(%struct.ident_t*, i32, i32) #0
 
-; CHECK-NOT: Function Attrs
-; CHECK: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_cancel_barrier(%struct.ident_t*, i32) #0
 
-; CHECK-NOT: Function Attrs
-; CHECK: declare void @__kmpc_flush(%struct.ident_t*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_flush(%struct.ident_t*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_fork_call(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0
 
-; CHECK-NOT: Function Attrs
-; CHECK: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t*, i32, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t*, i32, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t*, i32, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_master(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_master(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_critical(%struct.ident_t*, i32, [8 x i32]*) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t*, i32, [8 x i32]*, i32) #0
 
-; CHECK: Function Attrs: inaccessiblemem_or_argmemonly
-; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_critical(%struct.ident_t*, i32, [8 x i32]*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_begin(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end(%struct.ident_t*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t*, i32, i32, i64, i8*, void (i8*, i8*)*, [8 x i32]*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t*, i32, [8 x i32]*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t*, i32, [8 x i32]*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_ordered(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t*, i32, i32, i32*, i32*, i32*, i32*, i32*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t*, i32, i32, i32*, i64*, i64*, i64*, i64*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_single(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_single(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_omp_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t*, i32, i32, i32*, i32, i32, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t*, i32, i32, i32*, i64, i64, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t*, i32, i32, i32, i32, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t*, i32, i32, i64, i64, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t*, i32, i32*, i32*, i32*, i32*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t*, i32, i32*, i64*, i64*, i64*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t*, i32, i8*, i32, i8*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t*, i32, i32, i8*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t*, i32, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_taskloop(%struct.ident_t*, i32, i8*, i32, i64*, i64*, i64, i32, i32, i64, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_omp_target_task_alloc(%struct.ident_t*, i32, i32, i64, i64, i32 (i32, i8*)*, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_taskred_modifier_init(%struct.ident_t*, i32, i32, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t*, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t*, i32, i64, i8*, void (i8*, i8*)*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_threadprivate_cached(%struct.ident_t*, i32, i8*, i64, i8***) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t*, i8*, i8* (i8*)*, i8* (i8*, i8*)*, void (i8*)*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t*, i32, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t*, i32, i64*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t*, i32, i64*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t*, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_alloc(i32, i64, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_free(i32, i8*, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_init_allocator(i32, i8*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_register_requires(i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_register_requires(i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i64 @__tgt_mapper_num_components(i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_allow_completion_event(%struct.ident_t*, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_init(i32, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*) #0
 
-; CHECK: Function Attrs: nounwind
-; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*)
+; CHECK: ; Function Attrs: nounwind
+; CHECK-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*) #0
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_num_threads(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_dynamic(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_nested(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_max_active_levels(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind writeonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_set_schedule(i32, i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_threads() #1
 
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local void @use_int(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_dynamic() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_nested() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_threads() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_num() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_num_procs() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_parallel() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_in_final() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_active_level() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_level() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_ancestor_thread_num(i32) #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_team_size(i32) #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_thread_limit() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_max_active_levels() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
 ; OPTIMISTIC-NEXT: declare dso_local void @omp_get_schedule(i32* nocapture writeonly, i32* nocapture writeonly) #2
 
 ; OPTIMISTIC-NOT: Function Attrs
@@ -1418,8 +1418,299 @@ declare void @__kmpc_proxy_task_completed_ooo(i8*)
 ; OPTIMISTIC-NOT: Function Attrs
 ; OPTIMISTIC: declare dso_local i32 @omp_pause_resource_all(i32)
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
 ; OPTIMISTIC-NEXT: declare dso_local i32 @omp_get_supported_active_levels() #1
 
-; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly
-; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind readonly willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_global_thread_num(%struct.ident_t* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_fork_call(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskwait(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_taskyield(%struct.ident_t* nocapture nofree readonly, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_threads(%struct.ident_t* nocapture nofree readonly, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_proc_bind(%struct.ident_t* nocapture nofree readonly, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_serialized_parallel(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_master(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_master(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_critical_with_hint(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_critical(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_begin(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_end(%struct.ident_t* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_reduce_nowait(%struct.ident_t* nocapture nofree readonly, i32, [8 x i32]*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_ordered(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_ordered(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_for_static_fini(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_team_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_for_static_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_single(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_single(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_omp_task_alloc(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i32 (i32, i8*)* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_end_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_taskgroup(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i32, i32, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dist_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32* nocapture nofree, i64, i64, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_4u(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i32, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_init_8u(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_4u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree, i32* nocapture nofree)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_dispatch_next_8u(%struct.ident_t* nocapture nofree readonly, i32, i32* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree, i64* nocapture nofree)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_4u(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_dispatch_fini_8u(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_begin_if0(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_omp_task_complete_if0(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_omp_task_with_deps(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i8* nocapture nofree readonly, i32, i8* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_omp_wait_deps(%struct.ident_t* nocapture nofree readonly, i32, i32, i8* nocapture nofree readonly, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i32 @__kmpc_cancellationpoint(%struct.ident_t* nocapture nofree readonly, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblemem_or_argmemonly nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_num_teams(%struct.ident_t* nocapture nofree readonly, i32, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_fork_teams(%struct.ident_t* nocapture nofree readonly, i32, void (i32*, i32*, ...)* nocapture nofree readonly, ...)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_taskloop(%struct.ident_t* nocapture nofree readonly, i32, i8*, i32, i64* nocapture nofree, i64* nocapture nofree, i64, i32, i32, i64, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_omp_target_task_alloc(%struct.ident_t* nocapture nofree readonly, i32, i32, i64, i64, i32 (i32, i8*)* nocapture nofree readonly, i64)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_taskred_modifier_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare i8* @__kmpc_taskred_init(i32, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_task_reduction_modifier_fini(%struct.ident_t* nocapture nofree readonly, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_copyprivate(%struct.ident_t* nocapture nofree readonly, i32, i64, i8* nocapture nofree readonly, void (i8*, i8*)*, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_threadprivate_cached(%struct.ident_t* nocapture nofree readonly, i32, i8*, i64, i8***)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_threadprivate_register(%struct.ident_t* nocapture nofree readonly, i8*, i8* (i8*)* nocapture nofree readonly, i8* (i8*, i8*)* nocapture nofree readonly, void (i8*)* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_init(%struct.ident_t* nocapture nofree readonly, i32, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_wait(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_post(%struct.ident_t* nocapture nofree readonly, i32, i64* nocapture nofree readonly)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__kmpc_doacross_fini(%struct.ident_t* nocapture nofree readonly, i32)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_alloc(i32, i64, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_free(i32, i8*, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_init_allocator(i32, i8*, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_destroy_allocator(i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: inaccessiblememonly nofree nosync nounwind willreturn writeonly
+; OPTIMISTIC-NEXT: declare void @__kmpc_push_target_tripcount(i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i32 @__tgt_target_teams_nowait_mapper(i64, i8*, i32, i8**, i8**, i64*, i64*, i8**, i32, i32)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_register_requires(i64)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_begin_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_end_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_target_data_update_nowait_mapper(i64, i32, i8**, i8**, i64*, i64*, i8**)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare i64 @__tgt_mapper_num_components(i8*)
+
+; OPTIMISTIC: ; Function Attrs: nounwind
+; OPTIMISTIC-NEXT: declare void @__tgt_push_mapper_component(i8*, i8*, i8*, i64, i64)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_allow_completion_event(%struct.ident_t* nocapture nofree readonly, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_get_th_data(i32, i8*, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_init(i32, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare noalias i8* @__kmpc_task_reduction_modifier_init(i8*, i32, i32, i32, i8*)
+
+; OPTIMISTIC: ; Function Attrs: nofree nosync nounwind willreturn
+; OPTIMISTIC-NEXT: declare void @__kmpc_proxy_task_completed_ooo(i8*)
diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
index 4d2f8e7cbc5e8..07976660546f8 100644
--- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll
@@ -393,7 +393,7 @@ entry:
 
 define internal void @.omp.reduction.reduction_func(i8* %arg, i8* %arg1) {
 ; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func
-; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #10
+; CHECK-SAME: (i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG:%.*]], i8* nocapture nofree nonnull readonly align 8 dereferenceable(8) [[ARG1:%.*]]) #{{[0-9]+}}
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[TMP:%.*]] = bitcast i8* [[ARG1]] to i32**
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32*, i32** [[TMP]], align 8

From 0b2a92224630f6e177d091b8391cfa943764aba5 Mon Sep 17 00:00:00 2001
From: Artem Dergachev <artem.dergachev@gmail.com>
Date: Fri, 17 Jul 2020 19:42:20 -0700
Subject: [PATCH 728/771] [analyzer] scan-build: Fix silencing multiple core
 checkers.

It was only silencing one checker because -analyzer-config flags
can only carry one value at a time.
---
 .../null_dereference_and_division_by_zero.c   |  8 +++++
 .../scan-build/silence-core-checkers.test     | 30 +++++++++++++++++++
 clang/tools/scan-build/bin/scan-build         | 10 ++++---
 3 files changed, 44 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/Analysis/scan-build/Inputs/null_dereference_and_division_by_zero.c
 create mode 100644 clang/test/Analysis/scan-build/silence-core-checkers.test

diff --git a/clang/test/Analysis/scan-build/Inputs/null_dereference_and_division_by_zero.c b/clang/test/Analysis/scan-build/Inputs/null_dereference_and_division_by_zero.c
new file mode 100644
index 0000000000000..438af79c90bbd
--- /dev/null
+++ b/clang/test/Analysis/scan-build/Inputs/null_dereference_and_division_by_zero.c
@@ -0,0 +1,8 @@
+int test(int x) {
+  if (x) {
+    int *p = 0;
+    return *p; // Null dereference.
+  } else {
+    return 1 / x; // Division by zero.
+  }
+}
diff --git a/clang/test/Analysis/scan-build/silence-core-checkers.test b/clang/test/Analysis/scan-build/silence-core-checkers.test
new file mode 100644
index 0000000000000..6d9a3017fcd61
--- /dev/null
+++ b/clang/test/Analysis/scan-build/silence-core-checkers.test
@@ -0,0 +1,30 @@
+// FIXME: Actually, "perl".
+REQUIRES: shell
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:   %clang -S %S/Inputs/null_dereference_and_division_by_zero.c \
+RUN:   | FileCheck %s -check-prefix CHECK-TWO-BUGS
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:   -disable-checker core.DivideZero \
+RUN:   %clang -S %S/Inputs/null_dereference_and_division_by_zero.c \
+RUN:   | FileCheck %s -check-prefix CHECK-ONE-BUG
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:   -disable-checker core.NullDereference \
+RUN:   %clang -S %S/Inputs/null_dereference_and_division_by_zero.c \
+RUN:   | FileCheck %s -check-prefix CHECK-ONE-BUG
+
+RUN: rm -rf %t.output_dir && mkdir %t.output_dir
+RUN: %scan-build -o %t.output_dir \
+RUN:   -disable-checker core.NullDereference \
+RUN:   -disable-checker core.DivideZero \
+RUN:   %clang -S %S/Inputs/null_dereference_and_division_by_zero.c \
+RUN:   | FileCheck %s -check-prefix CHECK-NO-BUGS
+
+CHECK-NO-BUGS: scan-build: No bugs found.
+CHECK-ONE-BUG: scan-build: 1 bug found.
+CHECK-TWO-BUGS: scan-build: 2 bugs found.
diff --git a/clang/tools/scan-build/bin/scan-build b/clang/tools/scan-build/bin/scan-build
index 11334a0b96269..aed8c417b6ccd 100755
--- a/clang/tools/scan-build/bin/scan-build
+++ b/clang/tools/scan-build/bin/scan-build
@@ -1973,11 +1973,13 @@ my $CCC_ANALYZER_ANALYSIS = join ' ', @AnalysesToRun;
 my $CCC_ANALYZER_PLUGINS = join ' ', map { "-load ".$_ } @{$Options{PluginsToLoad}};
 my $CCC_ANALYZER_CONFIG = join ' ', map { "-analyzer-config ".$_ } @{$Options{ConfigOptions}};
 
-foreach (sort { $Options{SilenceCheckers}{$a} <=> $Options{SilenceCheckers}{$b} }
-         keys %{$Options{SilenceCheckers}}) {
-  # Add checkers in order they were silenced.
+if (%{$Options{SilenceCheckers}}) {
   $CCC_ANALYZER_CONFIG =
-      $CCC_ANALYZER_CONFIG." -analyzer-config silence-checkers=".$_;
+      $CCC_ANALYZER_CONFIG." -analyzer-config silence-checkers="
+                          .join(';', sort {
+                                            $Options{SilenceCheckers}{$a} <=>
+                                            $Options{SilenceCheckers}{$b}
+                                          } keys %{$Options{SilenceCheckers}});
 }
 
 my %EnvVars = (

From e79a86e45b21cacc57e0cb8bd40d137d8768fc26 Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Fri, 17 Jul 2020 11:14:28 -0700
Subject: [PATCH 729/771] [flang] Insert leading blanks in LOGICAL formatted
 output fields

Summary: For Lw output editing, emit (w-1) blanks before the T or the F.

Reviewed By: sscalpone, PeteSteinfeld

Differential Revision: https://reviews.llvm.org/D84059
---
 flang/runtime/edit-output.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp
index 941c5ccf3d593..4680c81129ed2 100644
--- a/flang/runtime/edit-output.cpp
+++ b/flang/runtime/edit-output.cpp
@@ -424,7 +424,8 @@ bool EditLogicalOutput(IoStatementState &io, const DataEdit &edit, bool truth) {
   switch (edit.descriptor) {
   case 'L':
   case 'G':
-    return io.Emit(truth ? "T" : "F", 1);
+    return io.EmitRepeated(' ', std::max(0, edit.width.value_or(1) - 1)) &&
+        io.Emit(truth ? "T" : "F", 1);
   default:
     io.GetIoErrorHandler().SignalError(IostatErrorInFormat,
         "Data edit descriptor '%c' may not be used with a LOGICAL data item",

From ca1cc5c4e067d496e92e775bd0ac30c1b775bb62 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 18 Jul 2020 11:10:18 -0700
Subject: [PATCH 730/771] [gcov][test] Call wait() to make gcov-fork.c reliable

If the parent exit before the child, the line counts might be 1.

next:18'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          26:  1: 21: if (fork() == -1) return 1; // CHECK-NEXT: 1: [[#@LINE]]:
next:18'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          27:  1: 22: func2(); // CHECK-NEXT: 2: [[#@LINE]]:
next:18'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
          28:  1: 23: return 0; // CHECK-NEXT: 2: [[#@LINE]]:
next:18'0     ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
---
 compiler-rt/test/profile/Posix/gcov-fork.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/compiler-rt/test/profile/Posix/gcov-fork.c b/compiler-rt/test/profile/Posix/gcov-fork.c
index 4942d5ac92888..022ce716a8dcb 100644
--- a/compiler-rt/test/profile/Posix/gcov-fork.c
+++ b/compiler-rt/test/profile/Posix/gcov-fork.c
@@ -1,10 +1,6 @@
 /// A basic block with fork/exec* is split. .gcda is flushed immediately before
 /// fork/exec* so the lines before fork are counted once while succeeding
 /// lines are counted twice.
-// UNSUPPORTED: darwin
-/// FIXME: http://lab.llvm.org:8011/builders/clang-ppc64be-linux/builds/50913
-// UNSUPPORTED: host-byteorder-big-endian
-
 // RUN: mkdir -p %t.d && cd %t.d
 // RUN: %clang --coverage %s -o %t
 // RUN: test -f gcov-fork.gcno
@@ -17,8 +13,12 @@
 void func1() {}                    // CHECK:      1: [[#@LINE]]:void func1()
 void func2() {}                    // CHECK-NEXT: 2: [[#@LINE]]:
 int main(void) {                   // CHECK-NEXT: 1: [[#@LINE]]:
+  int status;                      // CHECK-NEXT: -: [[#@LINE]]:
   func1();                         // CHECK-NEXT: 1: [[#@LINE]]:
-  if (fork() == -1) return 1;      // CHECK-NEXT: 1: [[#@LINE]]:
+  pid_t pid = fork();              // CHECK-NEXT: 1: [[#@LINE]]:
+  if (pid == -1) return 1;         // CHECK-NEXT: 2: [[#@LINE]]:
+  if (pid)                         // CHECK-NEXT: 2: [[#@LINE]]:
+    wait(&status);                 // CHECK-NEXT: 1: [[#@LINE]]:
   func2();                         // CHECK-NEXT: 2: [[#@LINE]]:
   return 0;                        // CHECK-NEXT: 2: [[#@LINE]]:
 }

From cbff0c75b9ea120bc6ec1ecc2e8d431fd6143236 Mon Sep 17 00:00:00 2001
From: peter klausler <pklausler@nvidia.com>
Date: Fri, 17 Jul 2020 11:17:39 -0700
Subject: [PATCH 731/771] [flang] Improve output from a STOP statement

Add a missing newline to IEEE FP flag formatting, and
don't neglect to emit STOP when there's no code number.

Reviewed By: tskeith

Differential Revision: https://reviews.llvm.org/D84060
---
 flang/runtime/stop.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/flang/runtime/stop.cpp b/flang/runtime/stop.cpp
index 49592b75a2ade..3b8c1385293ad 100644
--- a/flang/runtime/stop.cpp
+++ b/flang/runtime/stop.cpp
@@ -40,6 +40,7 @@ static void DescribeIEEESignaledExceptions() {
     if (excepts & FE_UNDERFLOW) {
       std::fputs(" UNDERFLOW", stderr);
     }
+    std::fputc('\n', stderr);
   }
 }
 
@@ -52,10 +53,11 @@ static void CloseAllExternalUnits(const char *why) {
     int code, bool isErrorStop, bool quiet) {
   CloseAllExternalUnits("STOP statement");
   if (!quiet) {
+    std::fprintf(stderr, "Fortran %s", isErrorStop ? "ERROR STOP" : "STOP");
     if (code != EXIT_SUCCESS) {
-      std::fprintf(stderr, "Fortran %s: code %d\n",
-          isErrorStop ? "ERROR STOP" : "STOP", code);
+      std::fprintf(stderr, ": code %d\n", code);
     }
+    std::fputc('\n', stderr);
     DescribeIEEESignaledExceptions();
   }
   std::exit(code);

From 32db24a7f24236d78beaeb5cfd96b115d67a5c21 Mon Sep 17 00:00:00 2001
From: Bruno Ricci <riccibrun@gmail.com>
Date: Sat, 18 Jul 2020 20:35:16 +0100
Subject: [PATCH 732/771] [clang] Provide a more specific diagnostic for a
 misplaced lambda capture-default.

Currently a capture-default which is not the first element in the lambda-capture
is diagnosed with a generic expected variable name or 'this' in lambda capture
list, which is true but not very helpful.

If we don't have already parsed a capture-default then a lone "&" or "=" is
likely to be a misplaced capture-default, so diagnose it as such.

Differential Revision: https://reviews.llvm.org/D83681

Reviewed By: aaron.ballman
---
 .../clang/Basic/DiagnosticParseKinds.td       |  2 +
 clang/lib/Parse/ParseExprCXX.cpp              |  9 +++++
 .../lambda-misplaced-capture-default.cpp      | 38 +++++++++++++++++++
 3 files changed, 49 insertions(+)
 create mode 100644 clang/test/Parser/lambda-misplaced-capture-default.cpp

diff --git a/clang/include/clang/Basic/DiagnosticParseKinds.td b/clang/include/clang/Basic/DiagnosticParseKinds.td
index 1038a4119d4cb..a10191e91be38 100644
--- a/clang/include/clang/Basic/DiagnosticParseKinds.td
+++ b/clang/include/clang/Basic/DiagnosticParseKinds.td
@@ -934,6 +934,8 @@ def err_lambda_capture_misplaced_ellipsis : Error<
   "the name of the capture">;
 def err_lambda_capture_multiple_ellipses : Error<
   "multiple ellipses in pack capture">;
+def err_capture_default_first : Error<
+  "capture default must be first">;
 // C++17 lambda expressions
 def err_expected_star_this_capture : Error<
   "expected 'this' following '*' in lambda capture list">;
diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp
index aa35200c33b66..b225bb7c8b36c 100644
--- a/clang/lib/Parse/ParseExprCXX.cpp
+++ b/clang/lib/Parse/ParseExprCXX.cpp
@@ -926,6 +926,15 @@ bool Parser::ParseLambdaIntroducer(LambdaIntroducer &Intro,
     } else if (Tok.is(tok::kw_this)) {
       Kind = LCK_This;
       Loc = ConsumeToken();
+    } else if (Tok.isOneOf(tok::amp, tok::equal) &&
+               NextToken().isOneOf(tok::comma, tok::r_square) &&
+               Intro.Default == LCD_None) {
+      // We have a lone "&" or "=" which is either a misplaced capture-default
+      // or the start of a capture (in the "&" case) with the rest of the
+      // capture missing. Both are an error but a misplaced capture-default
+      // is more likely if we don't already have a capture default.
+      return Invalid(
+          [&] { Diag(Tok.getLocation(), diag::err_capture_default_first); });
     } else {
       TryConsumeToken(tok::ellipsis, EllipsisLocs[0]);
 
diff --git a/clang/test/Parser/lambda-misplaced-capture-default.cpp b/clang/test/Parser/lambda-misplaced-capture-default.cpp
new file mode 100644
index 0000000000000..d65b875102da7
--- /dev/null
+++ b/clang/test/Parser/lambda-misplaced-capture-default.cpp
@@ -0,0 +1,38 @@
+// RUN: %clang_cc1 -std=c++20 -Wno-unused-value -fsyntax-only -verify %s
+
+namespace misplaced_capture_default {
+void Test() {
+  int i = 0;
+  [&, i, &] {};   // expected-error {{expected variable name or 'this' in lambda capture list}}
+  [&, i, = ] {};  // expected-error {{expected variable name or 'this' in lambda capture list}}
+  [=, &i, &] {};  // expected-error {{expected variable name or 'this' in lambda capture list}}
+  [=, &i, = ] {}; // expected-error {{expected variable name or 'this' in lambda capture list}}
+
+  [i, &] {};   // expected-error {{capture default must be first}}
+  [i, = ] {};  // expected-error {{capture default must be first}}
+  [i, = x] {}; // expected-error {{expected variable name or 'this' in lambda capture list}}
+  [=, &i] {};  // ok
+  [&, &i] {};  // expected-error {{'&' cannot precede a capture when the capture default is '&'}}
+  [&x = i] {}; // ok
+  [=, &x = i] {};  // ok
+  [x = &i] {};     // ok
+  [=, &x = &i] {}; // expected-error {{non-const lvalue reference to type 'int *' cannot bind to a temporary of type 'int *'}}
+  [&, this] {}; // expected-error {{'this' cannot be captured in this context}}
+
+  [i, &, x = 2] {}; // expected-error {{capture default must be first}}
+  [i, =, x = 2] {}; // expected-error {{capture default must be first}}
+}
+} // namespace misplaced_capture_default
+
+namespace misplaced_capture_default_pack {
+template <typename... Args> void Test(Args... args) {
+  [&, args...] {};         // ok
+  [args..., &] {};         // expected-error {{capture default must be first}}
+  [=, &args...] {};        // ok
+  [&, ... xs = &args] {};  // ok
+  [&, ... xs = &] {};      // expected-error {{expected expression}}
+  [... xs = &] {};         // expected-error {{expected expression}}
+  [... xs = &args, = ] {}; // expected-error {{capture default must be first}}
+  [... xs = &args, &] {};  // expected-error {{capture default must be first}}
+}
+} // namespace misplaced_capture_default_pack

From be8e5fee91b44522056f1e780cdc861427f8738f Mon Sep 17 00:00:00 2001
From: Bruno Ricci <riccibrun@gmail.com>
Date: Sat, 18 Jul 2020 20:39:16 +0100
Subject: [PATCH 733/771] [clang][NFC] Tests showing the problems with some
 uses of NamedDecl::getDeclName in diagnostics, SemaExpr.cpp part

---
 .../dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp    | 14 +++++++++++++-
 clang/test/SemaCXX/attr-unused.cpp                 | 10 ++++++++++
 clang/test/SemaCXX/default2.cpp                    |  6 ++++++
 clang/test/SemaCXX/incomplete-call.cpp             |  6 ++++--
 clang/test/SemaCXX/lambda-expressions.cpp          | 11 +++++++++++
 5 files changed, 44 insertions(+), 3 deletions(-)

diff --git a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp
index af2e7cf09ceb4..52986faa4e859 100644
--- a/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp
+++ b/clang/test/CXX/dcl.decl/dcl.meaning/dcl.fct.default/p7.cpp
@@ -1,4 +1,4 @@
-// RUN: %clang_cc1 -fsyntax-only -verify %s
+// RUN: %clang_cc1 -std=c++17 -fsyntax-only -verify %s
 
 void h() {
   int i1 = 0;
@@ -16,4 +16,16 @@ void h() {
   const int i4 = 0;
   extern void h4(int x = sizeof(i4));         // ok, not odr-use
   extern void h5(int x = decltype(i4 + 4)()); // ok, not odr-use
+
+  union {
+    int i5;
+  };
+
+  extern void h6(int = i5);
+  // expected-error@-1 {{default argument references local variable '' of enclosing function}}
+
+  struct S { int i; };
+  auto [x] = S();
+
+  extern void h7(int = x); // FIXME: reject
 }
diff --git a/clang/test/SemaCXX/attr-unused.cpp b/clang/test/SemaCXX/attr-unused.cpp
index b74bc915ce070..e3878152eca97 100644
--- a/clang/test/SemaCXX/attr-unused.cpp
+++ b/clang/test/SemaCXX/attr-unused.cpp
@@ -3,7 +3,17 @@
 namespace ns_unused { typedef int Int_unused __attribute__((unused)); }
 namespace ns_not_unused { typedef int Int_not_unused; }
 
+template <typename T> class C;
+template <> class __attribute__((unused)) C<int> {};
+
 void f() {
   ns_not_unused::Int_not_unused i1; // expected-warning {{unused variable}}
   ns_unused::Int_unused i0; // expected-warning {{'Int_unused' was marked unused but was used}}
+
+  union __attribute__((unused)) { // expected-warning {{'' was marked unused but was used}}
+    int i;
+  };
+  (void) i;
+
+  C<int>(); // expected-warning {{'C' was marked unused but was used}}
 }
diff --git a/clang/test/SemaCXX/default2.cpp b/clang/test/SemaCXX/default2.cpp
index 4c8e8ce6941a4..7651233f8636f 100644
--- a/clang/test/SemaCXX/default2.cpp
+++ b/clang/test/SemaCXX/default2.cpp
@@ -117,6 +117,12 @@ class C2 {
   static int f(int = 10); // expected-note{{default argument declared here}}
 };
 
+template <typename T> class C3;
+template <> class C3<int> {
+  static void g(int = f()); // expected-error {{use of default argument to function 'f' that is declared later in class 'C3'}}
+  static int f(int = 10); // expected-note {{default argument declared here}}
+};
+
 // Make sure we actually parse the default argument for an inline definition
 class XX {
   void A(int length = -1 ) {  } 
diff --git a/clang/test/SemaCXX/incomplete-call.cpp b/clang/test/SemaCXX/incomplete-call.cpp
index 0fb1ef5f07a50..46f470e4a8810 100644
--- a/clang/test/SemaCXX/incomplete-call.cpp
+++ b/clang/test/SemaCXX/incomplete-call.cpp
@@ -1,7 +1,8 @@
 // RUN: %clang_cc1 -fsyntax-only -verify %s
-struct A; // expected-note 14 {{forward declaration of 'A'}}
+struct A; // expected-note 15 {{forward declaration of 'A'}}
 
 A f(); // expected-note {{'f' declared here}}
+template <typename T> A ft(T); // expected-note {{'ft' declared here}}
 
 struct B {
   A f(); // expected-note {{'f' declared here}}
@@ -38,7 +39,8 @@ void g() {
   
   A (B::*mfp)() = 0;
   (b.*mfp)(); // expected-error {{calling function with incomplete return type 'A'}}
-  
+
+  ft(42); // expected-error {{calling 'ft' with incomplete return type 'A'}}
 }
 
 
diff --git a/clang/test/SemaCXX/lambda-expressions.cpp b/clang/test/SemaCXX/lambda-expressions.cpp
index 3240d5351fc5b..7f7f9c5704872 100644
--- a/clang/test/SemaCXX/lambda-expressions.cpp
+++ b/clang/test/SemaCXX/lambda-expressions.cpp
@@ -649,3 +649,14 @@ void Run(const int& points) {
 void operator_parens() {
   [&](int x){ operator()(); }(0); // expected-error {{undeclared 'operator()'}}
 }
+
+namespace captured_name {
+void Test() {
+  union {           // expected-note {{'' declared here}}
+    int i;
+  };
+  [] { return i; }; // expected-error {{variable '' cannot be implicitly captured in a lambda with no capture-default specified}}
+                    // expected-note@-1 {{lambda expression begins here}}
+
+}
+};

From acf3bdc283ecf6e2c3a85a391a24becc4814b8b8 Mon Sep 17 00:00:00 2001
From: Bruno Ricci <riccibrun@gmail.com>
Date: Sat, 18 Jul 2020 20:44:06 +0100
Subject: [PATCH 734/771] [clang][NFC] Tests showing the problems with some
 uses of NamedDecl::getDeclName in diagnostics, SemaOverload.cpp+SemaStmt.cpp
 part

---
 .../dcl.dcl/dcl.attr/dcl.attr.noreturn/p1.cpp |  6 ++++
 clang/test/Sema/return-non-void.c             |  5 +++
 .../SemaCXX/constant-expression-cxx11.cpp     |  5 +++
 clang/test/SemaCXX/consteval-return-void.cpp  | 10 ++++++
 clang/test/SemaCXX/return-void.cpp            | 26 +++++++++++++++
 clang/test/SemaCXX/return.cpp                 | 12 ++++++-
 .../warn-pure-virtual-call-from-ctor-dtor.cpp | 32 +++++++++++++++++++
 clang/test/SemaCXX/warn-pure-virtual-kext.cpp | 12 +++++++
 clang/test/SemaObjC/method-return-void.m      |  9 ++++++
 9 files changed, 116 insertions(+), 1 deletion(-)
 create mode 100644 clang/test/Sema/return-non-void.c
 create mode 100644 clang/test/SemaCXX/consteval-return-void.cpp
 create mode 100644 clang/test/SemaCXX/return-void.cpp
 create mode 100644 clang/test/SemaObjC/method-return-void.m

diff --git a/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.noreturn/p1.cpp b/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.noreturn/p1.cpp
index 59cac367dbf23..0d4d34ac0e147 100644
--- a/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.noreturn/p1.cpp
+++ b/clang/test/CXX/dcl.dcl/dcl.attr/dcl.attr.noreturn/p1.cpp
@@ -7,6 +7,12 @@ void a2 [[noreturn]] () {
   return; // expected-warning {{function 'a2' declared 'noreturn' should not return}}
 }
 
+template <typename T> void a3 [[noreturn]] () {}
+template <> void a3<int> () { return; } // expected-warning {{function 'a3' declared 'noreturn' should not return}}
+
+template <typename T> void a4 [[noreturn]] () { return; } // expected-warning 2{{function 'a4' declared 'noreturn' should not return}}
+void a4_test() { a4<long>(); } // expected-note {{in instantiation of function template specialization 'a4<long>' requested here}}
+
 [[noreturn, noreturn]] void b() { throw 0; } // expected-error {{attribute 'noreturn' cannot appear multiple times in an attribute specifier}}
 [[noreturn]] [[noreturn]] void b2() { throw 0; } // ok
 
diff --git a/clang/test/Sema/return-non-void.c b/clang/test/Sema/return-non-void.c
new file mode 100644
index 0000000000000..f1ee3722af489
--- /dev/null
+++ b/clang/test/Sema/return-non-void.c
@@ -0,0 +1,5 @@
+// RUN: %clang_cc1 -Wreturn-type -std=c99 -fsyntax-only -verify=c99 %s
+// RUN: %clang_cc1 -Wreturn-type -std=c90 -fsyntax-only -verify=c90 %s
+
+int foo(void) { return; } // c99-error {{non-void function 'foo' should return a value}}
+                          // c90-error@-1 {{non-void function 'foo' should return a value}}
diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp
index b69bcb2fef9d0..7ff260c37c698 100644
--- a/clang/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp
@@ -2167,6 +2167,11 @@ namespace PR21786 {
 namespace PR21859 {
   constexpr int Fun() { return; } // expected-error {{non-void constexpr function 'Fun' should return a value}}
   constexpr int Var = Fun();
+
+  template <typename T> constexpr int FunT1() { return; } // expected-error {{non-void constexpr function 'FunT1' should return a value}}
+  template <typename T> constexpr int FunT2() { return 0; }
+  template <> constexpr int FunT2<double>() { return 0; }
+  template <> constexpr int FunT2<int>() { return; } // expected-error {{non-void constexpr function 'FunT2' should return a value}}
 }
 
 struct InvalidRedef {
diff --git a/clang/test/SemaCXX/consteval-return-void.cpp b/clang/test/SemaCXX/consteval-return-void.cpp
new file mode 100644
index 0000000000000..a5207f41bf2c7
--- /dev/null
+++ b/clang/test/SemaCXX/consteval-return-void.cpp
@@ -0,0 +1,10 @@
+// RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s
+
+consteval int Fun() { return; } // expected-error {{non-void constexpr function 'Fun' should return a value}}
+
+// FIXME: The diagnostic is wrong; should be "consteval".
+
+template <typename T> consteval int FunT1() { return; } // expected-error {{non-void constexpr function 'FunT1' should return a value}}
+template <typename T> consteval int FunT2() { return 0; }
+template <> consteval int FunT2<double>() { return 0; }
+template <> consteval int FunT2<int>() { return; } // expected-error {{non-void constexpr function 'FunT2' should return a value}}
diff --git a/clang/test/SemaCXX/return-void.cpp b/clang/test/SemaCXX/return-void.cpp
new file mode 100644
index 0000000000000..b3aa203133dc3
--- /dev/null
+++ b/clang/test/SemaCXX/return-void.cpp
@@ -0,0 +1,26 @@
+// RUN: %clang_cc1 %s -std=c++11 -fsyntax-only -verify
+
+void f1() { return {1,2}; } // expected-error {{void function 'f1' must not return a value}}
+
+template <typename T> void f2() { return {1,2}; } // expected-error {{void function 'f2' must not return a value}}
+
+template <> void f2<float>() { return {1,2}; } // expected-error {{void function 'f2' must not return a value}}
+
+void test_f2() {
+  f2<int>();
+  f2<float>();
+}
+
+struct S {
+  void f3() { return {1,2}; } // expected-error {{void function 'f3' must not return a value}}
+  S() { return {1,2}; } // expected-error {{constructor 'S' must not return a value}}
+  ~S() { return {1,2}; } // expected-error {{destructor '~S' must not return a value}}
+};
+
+template <typename T> struct ST {
+  void f4() { return {1,2}; } // expected-error {{void function 'f4' must not return a value}}
+  ST() { return {1,2}; } // expected-error {{constructor 'ST<T>' must not return a value}}
+  ~ST() { return {1,2}; } // expected-error {{destructor '~ST<T>' must not return a value}}
+};
+
+ST<int> st;
diff --git a/clang/test/SemaCXX/return.cpp b/clang/test/SemaCXX/return.cpp
index db289240d1ce6..1550d009b0617 100644
--- a/clang/test/SemaCXX/return.cpp
+++ b/clang/test/SemaCXX/return.cpp
@@ -108,9 +108,19 @@ namespace return_has_expr {
 namespace ctor_returns_void {
   void f() {}
   struct S { 
-    S() { return f(); }; // expected-error {{constructor 'S' must not return void expression}}
+    S() { return f(); } // expected-error {{constructor 'S' must not return void expression}}
     ~S() { return f(); } // expected-error {{destructor '~S' must not return void expression}}
   };
+
+  template <typename T> struct ST {
+    ST() { return f(); } // expected-error {{constructor 'ST<T>' must not return void expression}}
+                         // expected-error@-1 {{constructor 'ST' must not return void expression}}
+    ~ST() { return f(); } // expected-error {{destructor '~ST<T>' must not return void expression}}
+                          // expected-error@-1 {{destructor '~ST' must not return void expression}}
+  };
+
+  ST<int> st; // expected-note {{in instantiation of member function 'ctor_returns_void::ST<int>::ST'}}
+              // expected-note@-1 {{in instantiation of member function 'ctor_returns_void::ST<int>::~ST'}}
 }
 
 void cxx_unresolved_expr() {
diff --git a/clang/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp b/clang/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp
index 3312b5635f062..789935e3470ac 100644
--- a/clang/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp
+++ b/clang/test/SemaCXX/warn-pure-virtual-call-from-ctor-dtor.cpp
@@ -20,3 +20,35 @@ struct C {
         C::f();
     }
 };
+
+template <typename T> struct TA {
+  TA() { f(); } // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the constructor of 'TA'}}
+  ~TA() { f(); } // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the destructor of 'TA'}}
+
+  virtual void f() = 0; // expected-note 2{{'f' declared here}}
+};
+
+template <> struct TA<int> {
+  TA() { f(); }
+  ~TA() { f(); }
+  void f();
+};
+
+template <> struct TA<long> {
+  TA() { f(); }  // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the constructor of 'TA'}}
+  ~TA() { f(); } // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the destructor of 'TA'}}
+  virtual void f() = 0; // expected-note 2{{'f' declared here}}
+};
+
+struct TB : TA<float> { // expected-note {{in instantiation of member function 'TA<float>::TA' requested here}}
+  void f() override;    // expected-note@-1 {{in instantiation of member function 'TA<float>::~TA' requested here}}
+};
+TB tb;
+
+struct TC : TA<int> {}; // ok
+TC tc; // ok
+
+struct TD : TA<long> {
+  void f() override;
+};
+TD td;
diff --git a/clang/test/SemaCXX/warn-pure-virtual-kext.cpp b/clang/test/SemaCXX/warn-pure-virtual-kext.cpp
index e681a02cc9166..8431e202ad714 100644
--- a/clang/test/SemaCXX/warn-pure-virtual-kext.cpp
+++ b/clang/test/SemaCXX/warn-pure-virtual-kext.cpp
@@ -6,3 +6,15 @@ struct A {
         A::f(); // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the constructor of 'A'}} // expected-note {{qualified call to 'A'::'f' is treated as a virtual call to 'f' due to -fapple-kext}}
     }
 };
+
+template <typename T> struct TA {
+  virtual void f() = 0; // expected-note {{'f' declared here}}
+
+  TA() { TA::f(); } // expected-warning {{call to pure virtual member function 'f' has undefined behavior; overrides of 'f' in subclasses are not available in the constructor of 'TA'}} // expected-note {{qualified call to 'TA'::'f' is treated as a virtual call to 'f' due to -fapple-kext}}
+};
+
+struct B : TA<int> { // expected-note {{in instantiation of member function 'TA<int>::TA' requested here}}
+  void f() override;
+};
+
+B b;
diff --git a/clang/test/SemaObjC/method-return-void.m b/clang/test/SemaObjC/method-return-void.m
new file mode 100644
index 0000000000000..850c81bad1fca
--- /dev/null
+++ b/clang/test/SemaObjC/method-return-void.m
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 -Wmethod-signatures -fsyntax-only -verify -Wno-objc-root-class %s
+
+@interface Test
+- (int)foo;
+@end
+
+@implementation Test
+- (int)foo { return; } // expected-error {{non-void method 'foo' should return a value}}
+@end

From 13ae440de4a408cf9d1a448def09769ecbecfdf7 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 18 Jul 2020 23:36:42 +0200
Subject: [PATCH 735/771] [InstCombine] Add test for PR46680 (NFC)

---
 llvm/test/Transforms/InstCombine/pr46680.ll | 92 +++++++++++++++++++++
 1 file changed, 92 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/pr46680.ll

diff --git a/llvm/test/Transforms/InstCombine/pr46680.ll b/llvm/test/Transforms/InstCombine/pr46680.ll
new file mode 100644
index 0000000000000..90ea2e110afe4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/pr46680.ll
@@ -0,0 +1,92 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -S -instcombine -instcombine-infinite-loop-threshold=3 < %s | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux-gnu"
+
+@a = dso_local local_unnamed_addr global i64 0, align 8
+@d = dso_local local_unnamed_addr global i64 0, align 8
+@c = external dso_local local_unnamed_addr global i8, align 1
+
+define void @test(i16* nocapture readonly %arg) local_unnamed_addr {
+; CHECK-LABEL: @test(
+; CHECK-NEXT:  bb:
+; CHECK-NEXT:    [[I:%.*]] = load i64, i64* @d, align 8
+; CHECK-NEXT:    [[I1:%.*]] = icmp eq i64 [[I]], 0
+; CHECK-NEXT:    [[I2:%.*]] = load i64, i64* @a, align 8
+; CHECK-NEXT:    [[I3:%.*]] = icmp ne i64 [[I2]], 0
+; CHECK-NEXT:    br i1 [[I1]], label [[BB13:%.*]], label [[BB4:%.*]]
+; CHECK:       bb4:
+; CHECK-NEXT:    [[I5:%.*]] = load i16, i16* [[ARG:%.*]], align 2
+; CHECK-NEXT:    [[I6:%.*]] = trunc i16 [[I5]] to i8
+; CHECK-NEXT:    store i8 [[I6]], i8* @c, align 1
+; CHECK-NEXT:    tail call void @llvm.assume(i1 [[I3]])
+; CHECK-NEXT:    br label [[BB22:%.*]]
+; CHECK:       bb13:
+; CHECK-NEXT:    [[I14:%.*]] = load i16, i16* [[ARG]], align 2
+; CHECK-NEXT:    [[I15:%.*]] = trunc i16 [[I14]] to i8
+; CHECK-NEXT:    store i8 [[I15]], i8* @c, align 1
+; CHECK-NEXT:    br label [[BB22]]
+; CHECK:       bb22:
+; CHECK-NEXT:    [[STOREMERGE2_IN:%.*]] = load i16, i16* [[ARG]], align 2
+; CHECK-NEXT:    [[STOREMERGE2:%.*]] = trunc i16 [[STOREMERGE2_IN]] to i8
+; CHECK-NEXT:    store i8 [[STOREMERGE2]], i8* @c, align 1
+; CHECK-NEXT:    [[STOREMERGE1_IN:%.*]] = load i16, i16* [[ARG]], align 2
+; CHECK-NEXT:    [[STOREMERGE1:%.*]] = trunc i16 [[STOREMERGE1_IN]] to i8
+; CHECK-NEXT:    store i8 [[STOREMERGE1]], i8* @c, align 1
+; CHECK-NEXT:    [[STOREMERGE_IN:%.*]] = load i16, i16* [[ARG]], align 2
+; CHECK-NEXT:    [[STOREMERGE:%.*]] = trunc i16 [[STOREMERGE_IN]] to i8
+; CHECK-NEXT:    store i8 [[STOREMERGE]], i8* @c, align 1
+; CHECK-NEXT:    br label [[BB23:%.*]]
+; CHECK:       bb23:
+; CHECK-NEXT:    br label [[BB23]]
+;
+bb:
+  %i = load i64, i64* @d, align 8
+  %i1 = icmp eq i64 %i, 0
+  %i2 = load i64, i64* @a, align 8
+  %i3 = icmp ne i64 %i2, 0
+  br i1 %i1, label %bb13, label %bb4
+
+bb4:                                              ; preds = %bb
+  %i5 = load i16, i16* %arg, align 2
+  %i6 = trunc i16 %i5 to i8
+  store i8 %i6, i8* @c, align 1
+  tail call void @llvm.assume(i1 %i3)
+  %i7 = load i16, i16* %arg, align 2
+  %i8 = trunc i16 %i7 to i8
+  store i8 %i8, i8* @c, align 1
+  %i9 = load i16, i16* %arg, align 2
+  %i10 = trunc i16 %i9 to i8
+  store i8 %i10, i8* @c, align 1
+  %i11 = load i16, i16* %arg, align 2
+  %i12 = trunc i16 %i11 to i8
+  store i8 %i12, i8* @c, align 1
+  br label %bb22
+
+bb13:                                             ; preds = %bb
+  %i14 = load i16, i16* %arg, align 2
+  %i15 = trunc i16 %i14 to i8
+  store i8 %i15, i8* @c, align 1
+  %i16 = load i16, i16* %arg, align 2
+  %i17 = trunc i16 %i16 to i8
+  store i8 %i17, i8* @c, align 1
+  %i18 = load i16, i16* %arg, align 2
+  %i19 = trunc i16 %i18 to i8
+  store i8 %i19, i8* @c, align 1
+  %i20 = load i16, i16* %arg, align 2
+  %i21 = trunc i16 %i20 to i8
+  store i8 %i21, i8* @c, align 1
+  br label %bb22
+
+bb22:                                             ; preds = %bb13, %bb4
+  br label %bb23
+
+bb23:                                             ; preds = %bb23, %bb22
+  br label %bb23
+}
+
+; Function Attrs: nounwind willreturn
+declare void @llvm.assume(i1) #0
+
+attributes #0 = { nounwind willreturn }

From 6a25838be6a0b242443320559cf1f97f466c59cc Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 11 Jul 2020 17:17:21 -0700
Subject: [PATCH 736/771] [gcov][test] Simplify
 instrprof-dlopen-dlclose-gcov.test

---
 .../instrprof-dlopen-dlclose-main.c.gcov      | 91 -------------------
 ...prof-dlopen-dlclose-main_three-libs.c.gcov | 91 -------------------
 .../Inputs/instrprof-dlopen-func.c.gcov       |  6 --
 .../Inputs/instrprof-dlopen-func2.c.gcov      |  6 --
 .../Inputs/instrprof-dlopen-func3.c.gcov      |  6 --
 compiler-rt/test/profile/Posix/gcov-dlopen.c  | 82 +++++++++++++++++
 .../Posix/instrprof-dlopen-dlclose-gcov.test  | 33 -------
 7 files changed, 82 insertions(+), 233 deletions(-)
 delete mode 100644 compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c.gcov
 delete mode 100644 compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main_three-libs.c.gcov
 delete mode 100644 compiler-rt/test/profile/Inputs/instrprof-dlopen-func.c.gcov
 delete mode 100644 compiler-rt/test/profile/Inputs/instrprof-dlopen-func2.c.gcov
 delete mode 100644 compiler-rt/test/profile/Inputs/instrprof-dlopen-func3.c.gcov
 create mode 100644 compiler-rt/test/profile/Posix/gcov-dlopen.c
 delete mode 100644 compiler-rt/test/profile/Posix/instrprof-dlopen-dlclose-gcov.test

diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c.gcov
deleted file mode 100644
index 2d538f63eb46b..0000000000000
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c.gcov
+++ /dev/null
@@ -1,91 +0,0 @@
-// CHECK:        -:    0:Source:{{.*}}Inputs/instrprof-dlopen-dlclose-main.c
-// CHECK-NEXT:        -:    0:Graph:instrprof-dlopen-dlclose-main.gcno
-// CHECK-NEXT:        -:    0:Data:instrprof-dlopen-dlclose-main.gcda
-// CHECK-NEXT:        -:    0:Runs:1
-// CHECK-NEXT:        -:    0:Programs:1
-// CHECK-NEXT:        -:    1:#include <dlfcn.h>
-// CHECK-NEXT:        -:    2:#include <stdio.h>
-// CHECK-NEXT:        -:    3:#include <stdlib.h>
-// CHECK-NEXT:        -:    4:
-// CHECK-NEXT:        1:    5:int main(int argc, char *argv[]) {
-// CHECK-NEXT:        1:    6:  dlerror();
-// CHECK-NEXT:        1:    7:  void *f1_handle = dlopen("func.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        1:    8:  if (f1_handle == NULL) {
-// CHECK-NEXT:    #####:    9:    fprintf(stderr, "unable to open 'func.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   10:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   11:  }
-// CHECK-NEXT:        -:   12:
-// CHECK-NEXT:        1:   13:  void (*func)(void) = (void (*)(void))dlsym(f1_handle, "func");
-// CHECK-NEXT:        1:   14:  if (func == NULL) {
-// CHECK-NEXT:    #####:   15:    fprintf(stderr, "unable to lookup symbol 'func': %s\n", dlerror());
-// CHECK-NEXT:    #####:   16:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   17:  }
-// CHECK-NEXT:        -:   18:
-// CHECK-NEXT:        1:   19:  dlerror();
-// CHECK-NEXT:        1:   20:  void *f2_handle = dlopen("func2.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        1:   21:  if (f2_handle == NULL) {
-// CHECK-NEXT:    #####:   22:    fprintf(stderr, "unable to open 'func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   23:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   24:  }
-// CHECK-NEXT:        -:   25:
-// CHECK-NEXT:        1:   26:  void (*func2)(void) = (void (*)(void))dlsym(f2_handle, "func2");
-// CHECK-NEXT:        1:   27:  if (func2 == NULL) {
-// CHECK-NEXT:    #####:   28:    fprintf(stderr, "unable to lookup symbol 'func2': %s\n", dlerror());
-// CHECK-NEXT:    #####:   29:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   30:  }
-// CHECK-NEXT:        1:   31:  func2();
-// CHECK-NEXT:        -:   32:
-// CHECK-NEXT:        -:   33:#ifdef USE_LIB3
-// CHECK-NEXT:        -:   34:  void *f3_handle = dlopen("func3.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        -:   35:  if (f3_handle == NULL) {
-// CHECK-NEXT:        -:   36:    fprintf(stderr, "unable to open 'func3.shared': %s\n", dlerror());
-// CHECK-NEXT:        -:   37:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   38:  }
-// CHECK-NEXT:        -:   39:
-// CHECK-NEXT:        -:   40:  void (*func3)(void) = (void (*)(void))dlsym(f3_handle, "func3");
-// CHECK-NEXT:        -:   41:  if (func3 == NULL) {
-// CHECK-NEXT:        -:   42:    fprintf(stderr, "unable to lookup symbol 'func3': %s\n", dlerror());
-// CHECK-NEXT:        -:   43:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   44:  }
-// CHECK-NEXT:        -:   45:  func3();
-// CHECK-NEXT:        -:   46:#endif
-// CHECK-NEXT:        -:   47:
-// CHECK-NEXT:        1:   48:  dlerror();
-// CHECK-NEXT:        1:   49:  void (*gcov_flush1)() = (void (*)())dlsym(f1_handle, "__gcov_flush");
-// CHECK-NEXT:        1:   50:  if (gcov_flush1 == NULL) {
-// CHECK-NEXT:    #####:   51:    fprintf(stderr, "unable to find __gcov_flush in func.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   52:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   53:  }
-// CHECK-NEXT:        -:   54:
-// CHECK-NEXT:        1:   55:  dlerror();
-// CHECK-NEXT:        1:   56:  void (*gcov_flush2)() = (void (*)())dlsym(f2_handle, "__gcov_flush");
-// CHECK-NEXT:        1:   57:  if (gcov_flush2 == NULL) {
-// CHECK-NEXT:    #####:   58:    fprintf(stderr, "unable to find __gcov_flush in func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   59:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   60:  }
-// CHECK-NEXT:        -:   61:
-// CHECK-NEXT:        1:   62:  if (gcov_flush1 == gcov_flush2) {
-// CHECK-NEXT:    #####:   63:    fprintf(stderr, "Same __gcov_flush found in func.shared and func2.shared\n");
-// CHECK-NEXT:    #####:   64:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   65:  }
-// CHECK-NEXT:        -:   66:
-// CHECK-NEXT:        1:   67:  dlerror();
-// CHECK-NEXT:        1:   68:  if (dlclose(f2_handle) != 0) {
-// CHECK-NEXT:    #####:   69:    fprintf(stderr, "unable to close 'func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   70:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   71:  }
-// CHECK-NEXT:        -:   72:
-// CHECK-NEXT:        1:   73:  func();
-// CHECK-NEXT:        -:   74:
-// CHECK-NEXT:        1:   75:  int g1 = 0;
-// CHECK-NEXT:        1:   76:  int g2 = 0;
-// CHECK-NEXT:        1:   77:  int n = 10;
-// CHECK-NEXT:        -:   78:
-// CHECK-NEXT:        1:   79:  if (n % 5 == 0)
-// CHECK-NEXT:        1:   80:    g1++;
-// CHECK-NEXT:        -:   81:  else
-// CHECK-NEXT:    #####:   82:    g2++;
-// CHECK-NEXT:        -:   83:
-// CHECK-NEXT:        1:   84:  return EXIT_SUCCESS;
-// CHECK-NEXT:        1:   85:}
-// CHECK-NEXT:        -:   86:
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main_three-libs.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main_three-libs.c.gcov
deleted file mode 100644
index f1dd1757144f4..0000000000000
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main_three-libs.c.gcov
+++ /dev/null
@@ -1,91 +0,0 @@
-// CHECK:        -:    0:Source:{{.*}}Inputs/instrprof-dlopen-dlclose-main.c
-// CHECK-NEXT:        -:    0:Graph:instrprof-dlopen-dlclose-main.gcno
-// CHECK-NEXT:        -:    0:Data:instrprof-dlopen-dlclose-main.gcda
-// CHECK-NEXT:        -:    0:Runs:1
-// CHECK-NEXT:        -:    0:Programs:1
-// CHECK-NEXT:        -:    1:#include <dlfcn.h>
-// CHECK-NEXT:        -:    2:#include <stdio.h>
-// CHECK-NEXT:        -:    3:#include <stdlib.h>
-// CHECK-NEXT:        -:    4:
-// CHECK-NEXT:        1:    5:int main(int argc, char *argv[]) {
-// CHECK-NEXT:        1:    6:  dlerror();
-// CHECK-NEXT:        1:    7:  void *f1_handle = dlopen("func.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        1:    8:  if (f1_handle == NULL) {
-// CHECK-NEXT:    #####:    9:    fprintf(stderr, "unable to open 'func.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   10:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   11:  }
-// CHECK-NEXT:        -:   12:
-// CHECK-NEXT:        1:   13:  void (*func)(void) = (void (*)(void))dlsym(f1_handle, "func");
-// CHECK-NEXT:        1:   14:  if (func == NULL) {
-// CHECK-NEXT:    #####:   15:    fprintf(stderr, "unable to lookup symbol 'func': %s\n", dlerror());
-// CHECK-NEXT:    #####:   16:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   17:  }
-// CHECK-NEXT:        -:   18:
-// CHECK-NEXT:        1:   19:  dlerror();
-// CHECK-NEXT:        1:   20:  void *f2_handle = dlopen("func2.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        1:   21:  if (f2_handle == NULL) {
-// CHECK-NEXT:    #####:   22:    fprintf(stderr, "unable to open 'func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   23:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   24:  }
-// CHECK-NEXT:        -:   25:
-// CHECK-NEXT:        1:   26:  void (*func2)(void) = (void (*)(void))dlsym(f2_handle, "func2");
-// CHECK-NEXT:        1:   27:  if (func2 == NULL) {
-// CHECK-NEXT:    #####:   28:    fprintf(stderr, "unable to lookup symbol 'func2': %s\n", dlerror());
-// CHECK-NEXT:    #####:   29:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   30:  }
-// CHECK-NEXT:        1:   31:  func2();
-// CHECK-NEXT:        -:   32:
-// CHECK-NEXT:        -:   33:#ifdef USE_LIB3
-// CHECK-NEXT:        1:   34:  void *f3_handle = dlopen("func3.shared", RTLD_LAZY | RTLD_GLOBAL);
-// CHECK-NEXT:        1:   35:  if (f3_handle == NULL) {
-// CHECK-NEXT:    #####:   36:    fprintf(stderr, "unable to open 'func3.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   37:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   38:  }
-// CHECK-NEXT:        -:   39:
-// CHECK-NEXT:        1:   40:  void (*func3)(void) = (void (*)(void))dlsym(f3_handle, "func3");
-// CHECK-NEXT:        1:   41:  if (func3 == NULL) {
-// CHECK-NEXT:    #####:   42:    fprintf(stderr, "unable to lookup symbol 'func3': %s\n", dlerror());
-// CHECK-NEXT:    #####:   43:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   44:  }
-// CHECK-NEXT:        1:   45:  func3();
-// CHECK-NEXT:        -:   46:#endif
-// CHECK-NEXT:        -:   47:
-// CHECK-NEXT:        1:   48:  dlerror();
-// CHECK-NEXT:        1:   49:  void (*gcov_flush1)() = (void (*)())dlsym(f1_handle, "__gcov_flush");
-// CHECK-NEXT:        1:   50:  if (gcov_flush1 == NULL) {
-// CHECK-NEXT:    #####:   51:    fprintf(stderr, "unable to find __gcov_flush in func.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   52:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   53:  }
-// CHECK-NEXT:        -:   54:
-// CHECK-NEXT:        1:   55:  dlerror();
-// CHECK-NEXT:        1:   56:  void (*gcov_flush2)() = (void (*)())dlsym(f2_handle, "__gcov_flush");
-// CHECK-NEXT:        1:   57:  if (gcov_flush2 == NULL) {
-// CHECK-NEXT:    #####:   58:    fprintf(stderr, "unable to find __gcov_flush in func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   59:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   60:  }
-// CHECK-NEXT:        -:   61:
-// CHECK-NEXT:        1:   62:  if (gcov_flush1 == gcov_flush2) {
-// CHECK-NEXT:    #####:   63:    fprintf(stderr, "Same __gcov_flush found in func.shared and func2.shared\n");
-// CHECK-NEXT:    #####:   64:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   65:  }
-// CHECK-NEXT:        -:   66:
-// CHECK-NEXT:        1:   67:  dlerror();
-// CHECK-NEXT:        1:   68:  if (dlclose(f2_handle) != 0) {
-// CHECK-NEXT:    #####:   69:    fprintf(stderr, "unable to close 'func2.shared': %s\n", dlerror());
-// CHECK-NEXT:    #####:   70:    return EXIT_FAILURE;
-// CHECK-NEXT:        -:   71:  }
-// CHECK-NEXT:        -:   72:
-// CHECK-NEXT:        1:   73:  func();
-// CHECK-NEXT:        -:   74:
-// CHECK-NEXT:        1:   75:  int g1 = 0;
-// CHECK-NEXT:        1:   76:  int g2 = 0;
-// CHECK-NEXT:        1:   77:  int n = 10;
-// CHECK-NEXT:        -:   78:
-// CHECK-NEXT:        1:   79:  if (n % 5 == 0)
-// CHECK-NEXT:        1:   80:    g1++;
-// CHECK-NEXT:        -:   81:  else
-// CHECK-NEXT:    #####:   82:    g2++;
-// CHECK-NEXT:        -:   83:
-// CHECK-NEXT:        1:   84:  return EXIT_SUCCESS;
-// CHECK-NEXT:        1:   85:}
-// CHECK-NEXT:        -:   86:
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-dlopen-func.c.gcov
deleted file mode 100644
index 3af4ec94a1b01..0000000000000
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func.c.gcov
+++ /dev/null
@@ -1,6 +0,0 @@
-// CHECK:        -:    0:Source:{{.*}}Inputs/instrprof-dlopen-func.c
-// CHECK-NEXT:        -:    0:Graph:instrprof-dlopen-func.gcno
-// CHECK-NEXT:        -:    0:Data:instrprof-dlopen-func.gcda
-// CHECK-NEXT:        -:    0:Runs:1
-// CHECK-NEXT:        -:    0:Programs:1
-// CHECK-NEXT:        1:    1:void func(int K) {}
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func2.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-dlopen-func2.c.gcov
deleted file mode 100644
index 7101f74b938d6..0000000000000
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func2.c.gcov
+++ /dev/null
@@ -1,6 +0,0 @@
-// CHECK:        -:    0:Source:{{.*}}Inputs/instrprof-dlopen-func2.c
-// CHECK-NEXT:        -:    0:Graph:instrprof-dlopen-func2.gcno
-// CHECK-NEXT:        -:    0:Data:instrprof-dlopen-func2.gcda
-// CHECK-NEXT:        -:    0:Runs:1
-// CHECK-NEXT:        -:    0:Programs:1
-// CHECK-NEXT:        1:    1:void func2(int K) {}
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func3.c.gcov b/compiler-rt/test/profile/Inputs/instrprof-dlopen-func3.c.gcov
deleted file mode 100644
index 7101f74b938d6..0000000000000
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-func3.c.gcov
+++ /dev/null
@@ -1,6 +0,0 @@
-// CHECK:        -:    0:Source:{{.*}}Inputs/instrprof-dlopen-func2.c
-// CHECK-NEXT:        -:    0:Graph:instrprof-dlopen-func2.gcno
-// CHECK-NEXT:        -:    0:Data:instrprof-dlopen-func2.gcda
-// CHECK-NEXT:        -:    0:Runs:1
-// CHECK-NEXT:        -:    0:Programs:1
-// CHECK-NEXT:        1:    1:void func2(int K) {}
diff --git a/compiler-rt/test/profile/Posix/gcov-dlopen.c b/compiler-rt/test/profile/Posix/gcov-dlopen.c
new file mode 100644
index 0000000000000..f09129d3b7c46
--- /dev/null
+++ b/compiler-rt/test/profile/Posix/gcov-dlopen.c
@@ -0,0 +1,82 @@
+/// atexit(3) not supported in dlopen(3)ed+dlclose(3)d DSO
+// XFAIL: netbsd
+
+// RUN: mkdir -p %t.d && cd %t.d
+
+// RUN: echo 'void func1(int k) {}' > func1.c
+// RUN: echo 'void func2(int k) {}' > func2.c
+// RUN: echo 'void func3(int k) {}' > func3.c
+// RUN: %clang --coverage -fPIC -shared func1.c -o func1.so
+// RUN: %clang --coverage -fPIC -shared func2.c -o func2.so
+// RUN: %clang --coverage -fPIC -shared func3.c -o func3.so
+// RUN: %clang --coverage -fPIC -rpath %t.d %s -o %t
+
+/// Test with two dlopened libraries.
+// RUN: rm -f gcov-dlopen.gcda func1.gcda func2.gcda
+// RUN: %run %t
+// RUN: llvm-cov gcov -t gcov-dlopen.gcda | FileCheck %s
+// RUN: llvm-cov gcov -t func1.gcda | FileCheck %s --check-prefix=FUNC1
+// RUN: llvm-cov gcov -t func2.gcda | FileCheck %s --check-prefix=FUNC2
+
+// FUNC1:     1:    1:void func1(int k) {}
+// FUNC2:     1:    1:void func2(int k) {}
+
+/// Test with three dlopened libraries.
+// RUN: %clang -DUSE_LIB3 --coverage -fPIC -rpath %t.d %s -o %t
+// RUN: rm -f gcov-dlopen.gcda func1.gcda func2.gcda func3.gcda
+// RUN: %run %t
+// RUN: llvm-cov gcov -t gcov-dlopen.gcda | FileCheck %s --check-prefix=LIB3
+// RUN: llvm-cov gcov -t func1.gcda | FileCheck %s --check-prefix=FUNC1
+// RUN: llvm-cov gcov -t func2.gcda | FileCheck %s --check-prefix=FUNC2
+// RUN: llvm-cov gcov -t func3.gcda | FileCheck %s --check-prefix=FUNC3
+
+// FUNC3:     1:    1:void func3(int k) {}
+
+#include <dlfcn.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+int main(int argc, char *argv[]) {
+  void *f1_handle = dlopen("func1.so", RTLD_LAZY | RTLD_GLOBAL);
+  if (f1_handle == NULL)
+    return fprintf(stderr, "unable to open 'func1.so': %s\n", dlerror());
+  void (*func1)(void) = (void (*)(void))dlsym(f1_handle, "func1");
+  if (func1 == NULL)
+    return fprintf(stderr, "unable to lookup symbol 'func1': %s\n", dlerror());
+
+  void *f2_handle = dlopen("func2.so", RTLD_LAZY | RTLD_GLOBAL);
+  if (f2_handle == NULL)
+    return fprintf(stderr, "unable to open 'func2.so': %s\n", dlerror());
+  void (*func2)(void) = (void (*)(void))dlsym(f2_handle, "func2");
+  if (func2 == NULL)
+    return fprintf(stderr, "unable to lookup symbol 'func2': %s\n", dlerror());
+  func2();
+
+#ifdef USE_LIB3
+// CHECK:          -: [[#@LINE+2]]:  void *f3_handle
+// LIB3:           1: [[#@LINE+1]]:  void *f3_handle
+  void *f3_handle = dlopen("func3.so", RTLD_LAZY | RTLD_GLOBAL);
+  if (f3_handle == NULL)
+    return fprintf(stderr, "unable to open 'func3.so': %s\n", dlerror());
+  void (*func3)(void) = (void (*)(void))dlsym(f3_handle, "func3");
+  if (func3 == NULL)
+    return fprintf(stderr, "unable to lookup symbol 'func3': %s\n", dlerror());
+  func3();
+#endif
+
+  void (*gcov_flush1)() = (void (*)())dlsym(f1_handle, "__gcov_flush");
+  if (gcov_flush1 == NULL)
+    return fprintf(stderr, "unable to find __gcov_flush in func1.so': %s\n", dlerror());
+  void (*gcov_flush2)() = (void (*)())dlsym(f2_handle, "__gcov_flush");
+  if (gcov_flush2 == NULL)
+    return fprintf(stderr, "unable to find __gcov_flush in func2.so': %s\n", dlerror());
+  if (gcov_flush1 == gcov_flush2)
+    return fprintf(stderr, "same __gcov_flush found in func1.so and func2.so\n");
+
+  if (dlclose(f2_handle) != 0)
+    return fprintf(stderr, "unable to close 'func2.so': %s\n", dlerror());
+
+  func1();
+
+  return 0;
+}
diff --git a/compiler-rt/test/profile/Posix/instrprof-dlopen-dlclose-gcov.test b/compiler-rt/test/profile/Posix/instrprof-dlopen-dlclose-gcov.test
deleted file mode 100644
index b845303a8afdc..0000000000000
--- a/compiler-rt/test/profile/Posix/instrprof-dlopen-dlclose-gcov.test
+++ /dev/null
@@ -1,33 +0,0 @@
-# atexit(3) not supported in dlopen(3)ed+dlclose(3)d DSO
-XFAIL: netbsd
-
-RUN: mkdir -p %t.d
-RUN: cd %t.d
-
-RUN: %clang --coverage -o func.shared -fPIC -shared %S/../Inputs/instrprof-dlopen-func.c
-RUN: %clang --coverage -o func2.shared -fPIC -shared %S/../Inputs/instrprof-dlopen-func2.c
-RUN: %clang --coverage -o func3.shared -fPIC -shared %S/../Inputs/instrprof-dlopen-func3.c
-RUN: %clang --coverage -o %t -fPIC -rpath %t.d %S/../Inputs/instrprof-dlopen-dlclose-main.c
-
-# Test with two dlopened libraries.
-RUN: rm -f instrprof-dlopen-dlclose-main.gcda instrprof-dlopen-func.gcda instrprof-dlopen-func2.gcda
-RUN: %run %t
-RUN: llvm-cov gcov instrprof-dlopen-dlclose-main.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-dlclose-main.c.gcov %S/../Inputs/instrprof-dlopen-dlclose-main.c.gcov
-RUN: llvm-cov gcov instrprof-dlopen-func.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-func.c.gcov %S/../Inputs/instrprof-dlopen-func.c.gcov
-RUN: llvm-cov gcov instrprof-dlopen-func2.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-func2.c.gcov %S/../Inputs/instrprof-dlopen-func2.c.gcov
-
-# Test with three dlopened libraries.
-RUN: %clang -DUSE_LIB3 --coverage -o %t -fPIC -rpath %t.d %S/../Inputs/instrprof-dlopen-dlclose-main.c
-RUN: rm -f instrprof-dlopen-dlclose-main.gcda instrprof-dlopen-func.gcda instrprof-dlopen-func2.gcda instrprof-dlopen-func3.gcda
-RUN: %run %t
-RUN: llvm-cov gcov instrprof-dlopen-dlclose-main.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-dlclose-main.c.gcov %S/../Inputs/instrprof-dlopen-dlclose-main_three-libs.c.gcov
-RUN: llvm-cov gcov instrprof-dlopen-func.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-func.c.gcov %S/../Inputs/instrprof-dlopen-func.c.gcov
-RUN: llvm-cov gcov instrprof-dlopen-func2.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-func2.c.gcov %S/../Inputs/instrprof-dlopen-func2.c.gcov
-RUN: llvm-cov gcov instrprof-dlopen-func3.gcda
-RUN: FileCheck --match-full-lines --strict-whitespace --input-file instrprof-dlopen-func2.c.gcov %S/../Inputs/instrprof-dlopen-func3.c.gcov

From 5809a32e7c2d79a9a463eb9c15cde994b42e3002 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 18 Jul 2020 15:07:46 -0700
Subject: [PATCH 737/771] [gcov] Add __gcov_dump/__gcov_reset and delete
 __gcov_flush

GCC r187297 (2012-05) introduced `__gcov_dump` and `__gcov_reset`.
  `__gcov_flush = __gcov_dump + __gcov_reset`

The resolution to https://gcc.gnu.org/PR93623 ("No need to dump gcdas when forking" target GCC 11.0) removed the unuseful and undocumented __gcov_flush.

Close PR38064.

Reviewed By: calixte, serge-sans-paille

Differential Revision: https://reviews.llvm.org/D83149
---
 clang/lib/Driver/ToolChains/Darwin.cpp        |  3 +-
 clang/test/CodeGen/code-coverage.c            |  1 -
 clang/test/Driver/darwin-ld.c                 |  3 +-
 compiler-rt/lib/profile/GCDAProfiling.c       | 35 +++++-----------
 .../Inputs/instrprof-dlopen-dlclose-main.c    | 16 +++----
 compiler-rt/test/profile/Posix/gcov-dlopen.c  | 21 ++++++----
 .../test/profile/Posix/gcov-shared-flush.c    | 23 ++++++----
 .../profile/gcov-__gcov_flush-terminate.c     |  6 ++-
 .../test/profile/gcov-dump-and-remove.c       | 13 +++---
 .../Instrumentation/GCOVProfiling.cpp         | 42 ++-----------------
 10 files changed, 65 insertions(+), 98 deletions(-)

diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp
index 7b879f8cb6521..f910c88fa9674 100644
--- a/clang/lib/Driver/ToolChains/Darwin.cpp
+++ b/clang/lib/Driver/ToolChains/Darwin.cpp
@@ -1196,7 +1196,8 @@ void Darwin::addProfileRTLibs(const ArgList &Args,
   // runtime's functionality.
   if (hasExportSymbolDirective(Args)) {
     if (ForGCOV) {
-      addExportedSymbol(CmdArgs, "___gcov_flush");
+      addExportedSymbol(CmdArgs, "___gcov_dump");
+      addExportedSymbol(CmdArgs, "___gcov_reset");
       addExportedSymbol(CmdArgs, "_flush_fn_list");
       addExportedSymbol(CmdArgs, "_writeout_fn_list");
       addExportedSymbol(CmdArgs, "_reset_fn_list");
diff --git a/clang/test/CodeGen/code-coverage.c b/clang/test/CodeGen/code-coverage.c
index 34ba3554f5b54..5a663135e2f03 100644
--- a/clang/test/CodeGen/code-coverage.c
+++ b/clang/test/CodeGen/code-coverage.c
@@ -51,7 +51,6 @@ int test2(int b) {
 // Check that the noredzone flag is set on the generated functions.
 
 // CHECK: void @__llvm_gcov_writeout() unnamed_addr [[NRZ:#[0-9]+]]
-// CHECK: void @__llvm_gcov_flush() unnamed_addr [[NRZ]]
 // CHECK: void @__llvm_gcov_init() unnamed_addr [[NRZ]]
 
 // CHECK: attributes [[NRZ]] = { {{.*}}noredzone{{.*}} }
diff --git a/clang/test/Driver/darwin-ld.c b/clang/test/Driver/darwin-ld.c
index 3fc0556a2bde9..ea71142e88c19 100644
--- a/clang/test/Driver/darwin-ld.c
+++ b/clang/test/Driver/darwin-ld.c
@@ -351,7 +351,8 @@
 // RUN: FileCheck -check-prefix=GCOV_EXPORT %s < %t.log
 // RUN: %clang -target x86_64-apple-darwin12 -fprofile-arcs -Xlinker -exported_symbols_list -Xlinker /dev/null -### %t.o 2> %t.log
 // RUN: FileCheck -check-prefix=GCOV_EXPORT %s < %t.log
-// GCOV_EXPORT: "-exported_symbol" "___gcov_flush"
+// GCOV_EXPORT: "-exported_symbol" "___gcov_dump"
+// GCOV_EXPORT: "-exported_symbol" "___gcov_reset"
 //
 // Check that we can pass the outliner down to the linker.
 // RUN: env IPHONEOS_DEPLOYMENT_TARGET=7.0 \
diff --git a/compiler-rt/lib/profile/GCDAProfiling.c b/compiler-rt/lib/profile/GCDAProfiling.c
index 57d8dec423cc0..fa4b951383324 100644
--- a/compiler-rt/lib/profile/GCDAProfiling.c
+++ b/compiler-rt/lib/profile/GCDAProfiling.c
@@ -639,25 +639,6 @@ static void llvm_writeout_and_clear(void) {
   fn_list_remove(&writeout_fn_list);
 }
 
-COMPILER_RT_VISIBILITY
-void llvm_register_flush_function(fn_ptr fn) {
-  fn_list_insert(&flush_fn_list, fn);
-}
-
-void __gcov_flush() {
-  struct fn_node* curr = flush_fn_list.head;
-
-  while (curr) {
-    curr->fn();
-    curr = curr->next;
-  }
-}
-
-COMPILER_RT_VISIBILITY
-void llvm_delete_flush_function_list(void) {
-  fn_list_remove(&flush_fn_list);
-}
-
 COMPILER_RT_VISIBILITY
 void llvm_register_reset_function(fn_ptr fn) {
   fn_list_insert(&reset_fn_list, fn);
@@ -698,15 +679,12 @@ pid_t __gcov_fork() {
 #endif
 
 COMPILER_RT_VISIBILITY
-void llvm_gcov_init(fn_ptr wfn, fn_ptr ffn, fn_ptr rfn) {
+void llvm_gcov_init(fn_ptr wfn, fn_ptr rfn) {
   static int atexit_ran = 0;
 
   if (wfn)
     llvm_register_writeout_function(wfn);
 
-  if (ffn)
-    llvm_register_flush_function(ffn);
-
   if (rfn)
     llvm_register_reset_function(rfn);
 
@@ -715,11 +693,20 @@ void llvm_gcov_init(fn_ptr wfn, fn_ptr ffn, fn_ptr rfn) {
 
     /* Make sure we write out the data and delete the data structures. */
     atexit(llvm_delete_reset_function_list);
-    atexit(llvm_delete_flush_function_list);
 #ifdef _WIN32
     atexit(llvm_writeout_and_clear);
 #endif
   }
 }
 
+void __gcov_dump(void) {
+  for (struct fn_node *f = writeout_fn_list.head; f; f = f->next)
+    f->fn();
+}
+
+void __gcov_reset(void) {
+  for (struct fn_node *f = reset_fn_list.head; f; f = f->next)
+    f->fn();
+}
+
 #endif
diff --git a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c b/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c
index 3f4a4f6cc6a63..416b90384c7d2 100644
--- a/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c
+++ b/compiler-rt/test/profile/Inputs/instrprof-dlopen-dlclose-main.c
@@ -46,21 +46,21 @@ int main(int argc, char *argv[]) {
 #endif
 
   dlerror();
-  void (*gcov_flush1)() = (void (*)())dlsym(f1_handle, "__gcov_flush");
-  if (gcov_flush1 == NULL) {
-    fprintf(stderr, "unable to find __gcov_flush in func.shared': %s\n", dlerror());
+  void (*gcov_reset1)() = (void (*)())dlsym(f1_handle, "__gcov_reset");
+  if (gcov_reset1 == NULL) {
+    fprintf(stderr, "unable to find __gcov_reset in func.shared': %s\n", dlerror());
     return EXIT_FAILURE;
   }
 
   dlerror();
-  void (*gcov_flush2)() = (void (*)())dlsym(f2_handle, "__gcov_flush");
-  if (gcov_flush2 == NULL) {
-    fprintf(stderr, "unable to find __gcov_flush in func2.shared': %s\n", dlerror());
+  void (*gcov_reset2)() = (void (*)())dlsym(f2_handle, "__gcov_reset");
+  if (gcov_reset2 == NULL) {
+    fprintf(stderr, "unable to find __gcov_reset in func2.shared': %s\n", dlerror());
     return EXIT_FAILURE;
   }
 
-  if (gcov_flush1 == gcov_flush2) {
-    fprintf(stderr, "Same __gcov_flush found in func.shared and func2.shared\n");
+  if (gcov_reset1 == gcov_reset2) {
+    fprintf(stderr, "Same __gcov_reset found in func.shared and func2.shared\n");
     return EXIT_FAILURE;
   }
 
diff --git a/compiler-rt/test/profile/Posix/gcov-dlopen.c b/compiler-rt/test/profile/Posix/gcov-dlopen.c
index f09129d3b7c46..0212979e87ffd 100644
--- a/compiler-rt/test/profile/Posix/gcov-dlopen.c
+++ b/compiler-rt/test/profile/Posix/gcov-dlopen.c
@@ -64,14 +64,19 @@ int main(int argc, char *argv[]) {
   func3();
 #endif
 
-  void (*gcov_flush1)() = (void (*)())dlsym(f1_handle, "__gcov_flush");
-  if (gcov_flush1 == NULL)
-    return fprintf(stderr, "unable to find __gcov_flush in func1.so': %s\n", dlerror());
-  void (*gcov_flush2)() = (void (*)())dlsym(f2_handle, "__gcov_flush");
-  if (gcov_flush2 == NULL)
-    return fprintf(stderr, "unable to find __gcov_flush in func2.so': %s\n", dlerror());
-  if (gcov_flush1 == gcov_flush2)
-    return fprintf(stderr, "same __gcov_flush found in func1.so and func2.so\n");
+  void (*gcov_reset1)() = (void (*)())dlsym(f1_handle, "__gcov_reset");
+  if (gcov_reset1 == NULL)
+    return fprintf(stderr, "unable to find __gcov_reset in func1.so': %s\n", dlerror());
+  void (*gcov_reset2)() = (void (*)())dlsym(f2_handle, "__gcov_reset");
+  if (gcov_reset2 == NULL)
+    return fprintf(stderr, "unable to find __gcov_reset in func2.so': %s\n", dlerror());
+  if (gcov_reset1 == gcov_reset2)
+    return fprintf(stderr, "same __gcov_reset found in func1.so and func2.so\n");
+
+  /// Test that __gcov_dump is in the dynamic symbol table.
+  void (*gcov_dump1)() = (void (*)())dlsym(f1_handle, "__gcov_dump");
+  if (gcov_dump1 == NULL)
+    return fprintf(stderr, "unable to find __gcov_dump in func1.so': %s\n", dlerror());
 
   if (dlclose(f2_handle) != 0)
     return fprintf(stderr, "unable to close 'func2.so': %s\n", dlerror());
diff --git a/compiler-rt/test/profile/Posix/gcov-shared-flush.c b/compiler-rt/test/profile/Posix/gcov-shared-flush.c
index 97d44ad5204e1..494fb9be761db 100644
--- a/compiler-rt/test/profile/Posix/gcov-shared-flush.c
+++ b/compiler-rt/test/profile/Posix/gcov-shared-flush.c
@@ -7,7 +7,7 @@
 // RUN: %clang --coverage -fPIC -shared shared.c -o libfunc.so
 // RUN: test -f shared.gcno
 
-/// Test the case where we exit abruptly after calling __gcov_flush, which means we don't write out the counters at exit.
+/// Test the case where we exit abruptly after calling __gcov_dump, which means we don't write out the counters at exit.
 // RUN: %clang -DEXIT_ABRUPTLY -DSHARED_CALL_BEFORE_FLUSH -DSHARED_CALL_AFTER_FLUSH --coverage %s -L%t.d -rpath %t.d -lfunc -o %t
 // RUN: test -f gcov-shared-flush.gcno
 
@@ -21,7 +21,7 @@
 
 // SHARED: 1: {{[[0-9]+}}:void foo(int n)
 
-/// Test the case where we exit normally and we have a call to the shared library function before __gcov_flush.
+/// Test the case where we exit normally and we have a call to the shared library function before __gcov_dump.
 // RUN: %clang -DSHARED_CALL_BEFORE_FLUSH --coverage %s -L%t.d -rpath %t.d -lfunc -o %t
 // RUN: test -f gcov-shared-flush.gcno
 
@@ -32,14 +32,15 @@
 
 // BEFORE:      -: {{[0-9]+}}:#ifdef SHARED_CALL_BEFORE_FLUSH
 // BEFORE-NEXT: 1: {{[0-9]+}}:  foo(1);
-// BEFORE:      1: {{[0-9]+}}:  __gcov_flush();
+// BEFORE:      1: {{[0-9]+}}:  __gcov_dump();
+// BEFORE-NEXT: 1: {{[0-9]+}}:  __gcov_reset();
 // BEFORE:      -: {{[0-9]+}}:#ifdef SHARED_CALL_AFTER_FLUSH
 // BEFORE-NEXT: -: {{[0-9]+}}:  foo(1);
 // BEFORE:      1: {{[0-9]+}}:  bar(5);
 
 // SHARED_ONCE: 1: {{[0-9]+}}:void foo(int n)
 
-// # Test the case where we exit normally and we have a call to the shared library function after __gcov_flush.
+// # Test the case where we exit normally and we have a call to the shared library function after __gcov_dump.
 // RUN: %clang -DSHARED_CALL_AFTER_FLUSH --coverage %s -L%t.d -rpath %t.d -lfunc -o %t
 // RUN: test -f gcov-shared-flush.gcno
 
@@ -50,12 +51,13 @@
 
 // AFTER:      -: {{[0-9]+}}:#ifdef SHARED_CALL_BEFORE_FLUSH
 // AFTER-NEXT: -: {{[0-9]+}}:  foo(1);
-// AFTER:      1: {{[0-9]+}}:  __gcov_flush();
+// AFTER:      1: {{[0-9]+}}:  __gcov_dump();
+// AFTER-NEXT: 1: {{[0-9]+}}:  __gcov_reset();
 // AFTER:      -: {{[0-9]+}}:#ifdef SHARED_CALL_AFTER_FLUSH
 // AFTER-NEXT: 1: {{[0-9]+}}:  foo(1);
 // AFTER:      1: {{[0-9]+}}:  bar(5);
 
-// # Test the case where we exit normally and we have calls to the shared library function before and after __gcov_flush.
+// # Test the case where we exit normally and we have calls to the shared library function before and after __gcov_dump.
 // RUN: %clang -DSHARED_CALL_BEFORE_FLUSH -DSHARED_CALL_AFTER_FLUSH --coverage %s -L%t.d -rpath %t.d -lfunc -o %t
 // RUN: test -f gcov-shared-flush.gcno
 
@@ -66,7 +68,8 @@
 
 // BEFORE_AFTER:      -: {{[0-9]+}}:#ifdef SHARED_CALL_BEFORE_FLUSH
 // BEFORE_AFTER-NEXT: 1: {{[0-9]+}}:  foo(1);
-// BEFORE_AFTER:      1: {{[0-9]+}}:  __gcov_flush();
+// BEFORE_AFTER:      1: {{[0-9]+}}:  __gcov_dump();
+// BEFORE_AFTER-NEXT: 1: {{[0-9]+}}:  __gcov_reset();
 // BEFORE_AFTER:      -: {{[0-9]+}}:#ifdef SHARED_CALL_AFTER_FLUSH
 // BEFORE_AFTER-NEXT: 1: {{[0-9]+}}:  foo(1);
 // BEFORE_AFTER:      1: {{[0-9]+}}:  bar(5);
@@ -78,7 +81,8 @@ void foo(int n) {
 }
 #else
 extern void foo(int n);
-extern void __gcov_flush(void);
+extern void __gcov_dump(void);
+extern void __gcov_reset(void);
 
 int bar1 = 0;
 int bar2 = 1;
@@ -96,7 +100,8 @@ int main(int argc, char *argv[]) {
 #endif
 
   bar(5);
-  __gcov_flush();
+  __gcov_dump();
+  __gcov_reset();
   bar(5);
 
 #ifdef SHARED_CALL_AFTER_FLUSH
diff --git a/compiler-rt/test/profile/gcov-__gcov_flush-terminate.c b/compiler-rt/test/profile/gcov-__gcov_flush-terminate.c
index 5303e045063e8..649538dc2aaf1 100644
--- a/compiler-rt/test/profile/gcov-__gcov_flush-terminate.c
+++ b/compiler-rt/test/profile/gcov-__gcov_flush-terminate.c
@@ -10,11 +10,13 @@
 // CHECK:             -:    0:Runs:1
 // CHECK-NEXT:        -:    0:Programs:1
 
-void __gcov_flush(void);
+void __gcov_dump(void);
+void __gcov_reset(void);
 
 int main(void) {                   // CHECK:      1: [[#@LINE]]:int main(void)
   int i = 22;                      // CHECK-NEXT: 1: [[#@LINE]]:
-  __gcov_flush();                  // CHECK-NEXT: 1: [[#@LINE]]:
+  __gcov_dump();                   // CHECK-NEXT: 1: [[#@LINE]]:
+  __gcov_reset();                  // CHECK-NEXT: 1: [[#@LINE]]:
   i = 42;                          // CHECK-NEXT: 1: [[#@LINE]]:
   __builtin_trap();                // CHECK-NEXT: 1: [[#@LINE]]:
   i = 84;                          // CHECK-NEXT: 1: [[#@LINE]]:
diff --git a/compiler-rt/test/profile/gcov-dump-and-remove.c b/compiler-rt/test/profile/gcov-dump-and-remove.c
index 1dcf7b5bd5ca8..b7f80535aada3 100644
--- a/compiler-rt/test/profile/gcov-dump-and-remove.c
+++ b/compiler-rt/test/profile/gcov-dump-and-remove.c
@@ -8,16 +8,19 @@
 // RUN: rm -f gcov-dump-and-remove.gcda && %run %t
 // RUN: llvm-cov gcov -t gcov-dump-and-remove.gcda | FileCheck %s
 
-extern void __gcov_flush(void);
+extern void __gcov_dump(void);
+extern void __gcov_reset(void);
 extern int remove(const char *);   // CHECK:          -: [[#@LINE]]:extern int remove
 int main(void) {                   // CHECK-NEXT: #####: [[#@LINE]]:
-  __gcov_flush();                  // CHECK-NEXT: #####: [[#@LINE]]:
+  __gcov_dump();                   // CHECK-NEXT: #####: [[#@LINE]]:
+  __gcov_reset();                  // CHECK-NEXT: #####: [[#@LINE]]:
   if (remove("gcov-dump-and-remove.gcda") != 0) // CHECK-NEXT: #####: [[#@LINE]]:
     return 1;                      // CHECK-NEXT: #####: [[#@LINE]]: return 1;
                                    // CHECK-NEXT:     -: [[#@LINE]]:
-  __gcov_flush();                  // CHECK-NEXT: #####: [[#@LINE]]:
-  __gcov_flush();                  // CHECK-NEXT: #####: [[#@LINE]]:
-  if (remove("gcov-dump-and-remove.gcda") != 0) // CHECK-NEXT: #####: [[#@LINE]]:
+  __gcov_dump();                   // CHECK-NEXT:     1: [[#@LINE]]:
+  __gcov_reset();                  // CHECK-NEXT:     1: [[#@LINE]]:
+  __gcov_dump();                   // CHECK-NEXT:     1: [[#@LINE]]:
+  if (remove("gcov-dump-and-remove.gcda") != 0) // CHECK-NEXT:     1: [[#@LINE]]:
     return 1;                      // CHECK-NEXT: #####: [[#@LINE]]: return 1;
 
   return 0;
diff --git a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
index d8a965a90127b..cd2ea8d5e4edd 100644
--- a/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
+++ b/llvm/lib/Transforms/Instrumentation/GCOVProfiling.cpp
@@ -130,7 +130,6 @@ class GCOVProfiler {
   Function *
   insertCounterWriteout(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
   Function *insertReset(ArrayRef<std::pair<GlobalVariable *, MDNode *>>);
-  Function *insertFlush(Function *ResetF);
 
   bool AddFlushBeforeForkAndExec();
 
@@ -909,7 +908,6 @@ bool GCOVProfiler::emitProfileArcs() {
 
     Function *WriteoutF = insertCounterWriteout(CountersBySP);
     Function *ResetF = insertReset(CountersBySP);
-    Function *FlushF = insertFlush(ResetF);
 
     // Create a small bit of code that registers the "__llvm_gcov_writeout" to
     // be executed at exit and the "__llvm_gcov_flush" function to be executed
@@ -927,14 +925,13 @@ bool GCOVProfiler::emitProfileArcs() {
     IRBuilder<> Builder(BB);
 
     FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
-    Type *Params[] = {PointerType::get(FTy, 0), PointerType::get(FTy, 0),
-                      PointerType::get(FTy, 0)};
-    FTy = FunctionType::get(Builder.getVoidTy(), Params, false);
+    auto *PFTy = PointerType::get(FTy, 0);
+    FTy = FunctionType::get(Builder.getVoidTy(), {PFTy, PFTy}, false);
 
     // Initialize the environment and register the local writeout, flush and
     // reset functions.
     FunctionCallee GCOVInit = M->getOrInsertFunction("llvm_gcov_init", FTy);
-    Builder.CreateCall(GCOVInit, {WriteoutF, FlushF, ResetF});
+    Builder.CreateCall(GCOVInit, {WriteoutF, ResetF});
     Builder.CreateRetVoid();
 
     appendToGlobalCtors(*M, F, 0);
@@ -1266,36 +1263,3 @@ Function *GCOVProfiler::insertReset(
 
   return ResetF;
 }
-
-Function *GCOVProfiler::insertFlush(Function *ResetF) {
-  FunctionType *FTy = FunctionType::get(Type::getVoidTy(*Ctx), false);
-  Function *FlushF = M->getFunction("__llvm_gcov_flush");
-  if (!FlushF)
-    FlushF = Function::Create(FTy, GlobalValue::InternalLinkage,
-                              "__llvm_gcov_flush", M);
-  FlushF->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
-  FlushF->addFnAttr(Attribute::NoInline);
-  if (Options.NoRedZone)
-    FlushF->addFnAttr(Attribute::NoRedZone);
-
-  BasicBlock *Entry = BasicBlock::Create(*Ctx, "entry", FlushF);
-
-  // Write out the current counters.
-  Function *WriteoutF = M->getFunction("__llvm_gcov_writeout");
-  assert(WriteoutF && "Need to create the writeout function first!");
-
-  IRBuilder<> Builder(Entry);
-  Builder.CreateCall(WriteoutF, {});
-  Builder.CreateCall(ResetF, {});
-
-  Type *RetTy = FlushF->getReturnType();
-  if (RetTy->isVoidTy())
-    Builder.CreateRetVoid();
-  else if (RetTy->isIntegerTy())
-    // Used if __llvm_gcov_flush was implicitly declared.
-    Builder.CreateRet(ConstantInt::get(RetTy, 0));
-  else
-    report_fatal_error("invalid return type for __llvm_gcov_flush");
-
-  return FlushF;
-}

From 1ee88e6efe4e578c064c2c3779ca104478dfe136 Mon Sep 17 00:00:00 2001
From: Mehdi Amini <joker.eph@gmail.com>
Date: Sat, 18 Jul 2020 22:21:29 +0000
Subject: [PATCH 738/771] Fix invalid link in the MLIR Standard Dialect www
 page (2nd attempt)

---
 mlir/include/mlir/Dialect/StandardOps/IR/Ops.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
index cde317065ffee..702b912d3103c 100644
--- a/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
+++ b/mlir/include/mlir/Dialect/StandardOps/IR/Ops.td
@@ -1700,7 +1700,7 @@ def LoadOp : Std_Op<"load",
 
     In an `affine.if` or `affine.for` body, the indices of a load are restricted
     to SSA values bound to surrounding loop induction variables,
-    [symbols](AffineOps.md#dimensions-and-symbols), results of a
+    [symbols](Affine.md#dimensions-and-symbols), results of a
     [`constant` operation](#stdconstant-constantop), or the result of an
     `affine.apply` operation that can in turn take as arguments all of the
     aforementioned SSA values or the recursively result of such an

From 3452a0d8c17f7166f479706b293caf6ac76ffd90 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 18 Jul 2020 20:26:01 -0700
Subject: [PATCH 739/771] [Driver] -B: don't search for target triple prefixes

To match GCC (either crossing or not), which doesn't prepend target triple prefixes to `exec_prefixes`.

As an example, powerpc64le-linux-gnu-gcc does not search "powerpc64le-linux-gnu-${name}" in a -B path.
---
 clang/lib/Driver/Driver.cpp                    |  8 +++-----
 clang/test/Driver/B-opt.c                      |  5 +++--
 .../usr/bin/{armv7-windows-itanium-ld => ld}   |  0
 clang/test/Driver/fuse-ld.c                    |  8 +++-----
 clang/test/Driver/prefixed-tools.c             |  4 ++--
 clang/test/Driver/program-path-priority.c      |  6 +++---
 clang/test/Driver/windows-cross.c              | 18 +++++++++---------
 7 files changed, 23 insertions(+), 26 deletions(-)
 rename clang/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/{armv7-windows-itanium-ld => ld} (100%)

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index d2b6268d5fa35..7d52882f85325 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -4785,8 +4785,7 @@ void Driver::generatePrefixedToolNames(
     Names.emplace_back((DefaultTargetTriple + "-" + Tool).str());
 }
 
-static bool ScanDirForExecutable(SmallString<128> &Dir,
-                                 const std::string &Name) {
+static bool ScanDirForExecutable(SmallString<128> &Dir, StringRef Name) {
   llvm::sys::path::append(Dir, Name);
   if (llvm::sys::fs::can_execute(Twine(Dir)))
     return true;
@@ -4803,9 +4802,8 @@ std::string Driver::GetProgramPath(StringRef Name, const ToolChain &TC) const {
   for (const auto &PrefixDir : PrefixDirs) {
     if (llvm::sys::fs::is_directory(PrefixDir)) {
       SmallString<128> P(PrefixDir);
-      for (const auto &TargetSpecificExecutable : TargetSpecificExecutables)
-        if (ScanDirForExecutable(P, TargetSpecificExecutable))
-          return std::string(P.str());
+      if (ScanDirForExecutable(P, Name))
+        return std::string(P.str());
     } else {
       SmallString<128> P((PrefixDir + Name).str());
       if (llvm::sys::fs::can_execute(Twine(P)))
diff --git a/clang/test/Driver/B-opt.c b/clang/test/Driver/B-opt.c
index 5e5ff42fd0956..df85dee4b7040 100644
--- a/clang/test/Driver/B-opt.c
+++ b/clang/test/Driver/B-opt.c
@@ -1,9 +1,10 @@
 // Check -B driver option.
-//
+
+/// Target triple prefix is not detected for -B.
 // RUN: %clang %s -### -o %t.o -target i386-unknown-linux \
 // RUN:     -B %S/Inputs/B_opt_tree/dir1 -fuse-ld=ld 2>&1 \
 // RUN:   | FileCheck --check-prefix=CHECK-B-OPT-TRIPLE %s
-// CHECK-B-OPT-TRIPLE: "{{.*}}/Inputs/B_opt_tree/dir1{{/|\\\\}}i386-unknown-linux-ld"
+// CHECK-B-OPT-TRIPLE-NOT: "{{.*}}/Inputs/B_opt_tree/dir1{{/|\\\\}}i386-unknown-linux-ld"
 //
 // RUN: %clang %s -### -o %t.o -target i386-unknown-linux \
 // RUN:     -B %S/Inputs/B_opt_tree/dir2 -fuse-ld=ld 2>&1 \
diff --git a/clang/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/armv7-windows-itanium-ld b/clang/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/ld
similarity index 100%
rename from clang/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/armv7-windows-itanium-ld
rename to clang/test/Driver/Inputs/Windows/ARM/8.1/usr/bin/ld
diff --git a/clang/test/Driver/fuse-ld.c b/clang/test/Driver/fuse-ld.c
index 13e709ccfdfa4..f2ca9fb36194e 100644
--- a/clang/test/Driver/fuse-ld.c
+++ b/clang/test/Driver/fuse-ld.c
@@ -31,23 +31,21 @@
 // RUN:   | FileCheck %s -check-prefix=CHECK-FREEBSD-PLIB
 // CHECK-FREEBSD-PLIB: error: invalid linker name
 
-
-
 // RUN: %clang %s -### -fuse-ld=ld \
 // RUN:     -target arm-linux-androideabi \
-// RUN:     -B%S/Inputs/basic_android_tree/bin 2>&1 \
+// RUN:     -B%S/Inputs/basic_android_tree/bin/arm-linux-androideabi- 2>&1 \
 // RUN:   | FileCheck %s --check-prefix=CHECK-ANDROID-ARM-LD
 // CHECK-ANDROID-ARM-LD: Inputs/basic_android_tree/bin{{/|\\+}}arm-linux-androideabi-ld
 
 // RUN: %clang %s -### -fuse-ld=bfd \
 // RUN:     -target arm-linux-androideabi \
-// RUN:     -B%S/Inputs/basic_android_tree/bin 2>&1 \
+// RUN:     -B%S/Inputs/basic_android_tree/bin/arm-linux-androideabi- 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=CHECK-ANDROID-ARM-BFD
 // CHECK-ANDROID-ARM-BFD: Inputs/basic_android_tree/bin{{/|\\+}}arm-linux-androideabi-ld.bfd
 
 // RUN: %clang %s -### -fuse-ld=gold \
 // RUN:     -target arm-linux-androideabi \
-// RUN:     -B%S/Inputs/basic_android_tree/bin 2>&1 \
+// RUN:     -B%S/Inputs/basic_android_tree/bin/arm-linux-androideabi- 2>&1 \
 // RUN:   | FileCheck %s -check-prefix=CHECK-ANDROID-ARM-GOLD
 // CHECK-ANDROID-ARM-GOLD: Inputs/basic_android_tree/bin{{/|\\+}}arm-linux-androideabi-ld.gold
 
diff --git a/clang/test/Driver/prefixed-tools.c b/clang/test/Driver/prefixed-tools.c
index 63f7f29ae9635..0252a2f701436 100644
--- a/clang/test/Driver/prefixed-tools.c
+++ b/clang/test/Driver/prefixed-tools.c
@@ -1,8 +1,8 @@
-// RUN: %clang -### -B%S/Inputs/prefixed_tools_tree -o %t.o -no-integrated-as -fuse-ld=ld \
+// RUN: %clang -### -B%S/Inputs/prefixed_tools_tree/x86_64--linux- -o %t.o -no-integrated-as -fuse-ld=ld \
 // RUN:        -target x86_64--linux %s 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-M64 %s
 
-// RUN: %clang -### -B%S/Inputs/prefixed_tools_tree -o %t.o -no-integrated-as -fuse-ld=ld \
+// RUN: %clang -### -B%S/Inputs/prefixed_tools_tree/x86_64--linux- -o %t.o -no-integrated-as -fuse-ld=ld \
 // RUN:        -m32 -target x86_64--linux %s 2>&1 | \
 // RUN: FileCheck --check-prefix=CHECK-M32 %s
 
diff --git a/clang/test/Driver/program-path-priority.c b/clang/test/Driver/program-path-priority.c
index 9f1109f530c65..cba5f9f4d7430 100644
--- a/clang/test/Driver/program-path-priority.c
+++ b/clang/test/Driver/program-path-priority.c
@@ -119,15 +119,15 @@
 // RUN: test -f %t/%target_triple-gcc && \
 // RUN:   mv %t/%target_triple-gcc %t/prefix || true
 // RUN: touch %t/notreal-none-elf-gcc && chmod +x %t/notreal-none-elf-gcc
+// RUN: touch %t/prefix/gcc && chmod +x %t/prefix/gcc
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s -B %t/prefix 2>&1 | \
 // RUN:   FileCheck --check-prefix=DEFAULT_TRIPLE_IN_PREFIX %s
-// DEFAULT_TRIPLE_IN_PREFIX: prefix/{{.*}}-gcc"
+// DEFAULT_TRIPLE_IN_PREFIX: prefix/gcc"
 // DEFAULT_TRIPLE_IN_PREFIX-NOT: notreal-none-elf-gcc"
 
 /// Only if there is nothing in the prefix will we search other paths
 /// -f in case $DEFAULT_TRIPLE == %target_triple
-// RUN: rm -f %t/prefix/$DEFAULT_TRIPLE-gcc
-// RUN: rm -f %t/prefix/%target_triple-gcc
+// RUN: rm -f %t/prefix/$DEFAULT_TRIPLE-gcc %t/prefix/%target_triple-gcc %t/prefix/gcc
 // RUN: env "PATH=" %t/clang -### -target notreal-none-elf %s -B %t/prefix 2>&1 | \
 // RUN:   FileCheck --check-prefix=EMPTY_PREFIX_DIR %s
 // EMPTY_PREFIX_DIR: notreal-none-elf-gcc"
diff --git a/clang/test/Driver/windows-cross.c b/clang/test/Driver/windows-cross.c
index d96b0283a90ec..96497da61c0d5 100644
--- a/clang/test/Driver/windows-cross.c
+++ b/clang/test/Driver/windows-cross.c
@@ -1,47 +1,47 @@
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -stdlib=libstdc++ -rtlib=compiler-rt -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-BASIC-LIBCXX
 
-// CHECK-BASIC-LIBCXX: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{[^"]*}}.o" "-lmsvcrt"
+// CHECK-BASIC-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{[^"]*}}.o" "-lmsvcrt"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -stdlib=libstdc++ -rtlib=compiler-rt -static -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-STATIC
 
-// CHECK-STATIC: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bstatic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{[^"]*}}.o" "-lmsvcrt"
+// CHECK-STATIC: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bstatic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{[^"]*}}.o" "-lmsvcrt"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %s/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libstdc++ -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-RTLIB
 
-// CHECK-RTLIB: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-RTLIB: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libc++ -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-C-LIBCXX
 
-// CHECK-C-LIBCXX: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-C-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clangxx -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -rtlib=compiler-rt -stdlib=libc++ -o /dev/null %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-LIBCXX
 
-// CHECK-LIBCXX: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lc++" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-LIBCXX: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-Bdynamic" "--entry" "mainCRTStartup" "--allow-multiple-definition" "-o" "{{[^"]*}}" "{{.*}}.o" "-lc++" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -o shared.dll %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-SHARED
 
-// CHECK-SHARED: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-SHARED: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -static -o shared.dll %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-SHARED-STATIC
 
-// CHECK-SHARED-STATIC: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bstatic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-SHARED-STATIC: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bstatic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %s/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -nostartfiles -o shared.dll %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-NOSTARTFILES
 
-// CHECK-NOSTARTFILES: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
+// CHECK-NOSTARTFILES: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o" "-lmsvcrt" "{{.*[\\/]}}clang_rt.builtins-arm.lib"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=ld -shared -rtlib=compiler-rt -stdlib=libc++ -nostartfiles -nodefaultlibs -o shared.dll %s 2>&1 \
 // RUN:   | FileCheck %s --check-prefix CHECK-STANDALONE
 
-// CHECK-STANDALONE: armv7-windows-itanium-ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o"
+// CHECK-STANDALONE: {{[/\\]}}ld" "--sysroot={{.*}}/Inputs/Windows/ARM/8.1" "-m" "thumb2pe" "-shared" "-Bdynamic" "--enable-auto-image-base" "--entry" "_DllMainCRTStartup" "--allow-multiple-definition" "-o" "shared.dll" "--out-implib" "shared.lib" "{{.*}}.o"
 
 // RUN: %clang -### -target armv7-windows-itanium --sysroot %S/Inputs/Windows/ARM/8.1 -B %S/Inputs/Windows/ARM/8.1/usr/bin -fuse-ld=lld-link2 -shared -o shared.dll -x c++ %s 2>&1 \
 // RUN:    | FileCheck %s --check-prefix CHECK-FUSE-LD

From b2b39c5d455b950c6fffcc902924516fe7f8ec9f Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sat, 18 Jul 2020 21:01:41 -0700
Subject: [PATCH 740/771] [Driver] --print-search-dirs: print -B options and
 COMPILER_PATH

---
 clang/lib/Driver/Driver.cpp           | 7 +++++++
 clang/test/Driver/immediate-options.c | 4 ----
 clang/test/Driver/print-search-dirs.c | 6 ++++++
 3 files changed, 13 insertions(+), 4 deletions(-)
 create mode 100644 clang/test/Driver/print-search-dirs.c

diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp
index 7d52882f85325..317098e248233 100644
--- a/clang/lib/Driver/Driver.cpp
+++ b/clang/lib/Driver/Driver.cpp
@@ -1748,6 +1748,13 @@ bool Driver::HandleImmediateArgs(const Compilation &C) {
   if (C.getArgs().hasArg(options::OPT_print_search_dirs)) {
     llvm::outs() << "programs: =";
     bool separator = false;
+    // Print -B and COMPILER_PATH.
+    for (const std::string &Path : PrefixDirs) {
+      if (separator)
+        llvm::outs() << llvm::sys::EnvPathSeparator;
+      llvm::outs() << Path;
+      separator = true;
+    }
     for (const std::string &Path : TC.getProgramPaths()) {
       if (separator)
         llvm::outs() << llvm::sys::EnvPathSeparator;
diff --git a/clang/test/Driver/immediate-options.c b/clang/test/Driver/immediate-options.c
index 71494eec616f9..d7cd6be408016 100644
--- a/clang/test/Driver/immediate-options.c
+++ b/clang/test/Driver/immediate-options.c
@@ -9,10 +9,6 @@
 // RUN: %clang -dumpversion | FileCheck %s -check-prefix=DUMPVERSION
 // DUMPVERSION: {{[0-9]+\.[0-9.]+}}
 
-// RUN: %clang -print-search-dirs | FileCheck %s -check-prefix=PRINT-SEARCH-DIRS
-// PRINT-SEARCH-DIRS: programs: ={{.*}}
-// PRINT-SEARCH-DIRS: libraries: ={{.*}}
-
 // Test if the -print-resource-dir option is accepted without error.
 // Allow unspecified output because the value of CLANG_RESOURCE_DIR is unknown.
 // RUN: %clang -print-resource-dir | FileCheck %s -check-prefix=PRINT-RESOURCE-DIR
diff --git a/clang/test/Driver/print-search-dirs.c b/clang/test/Driver/print-search-dirs.c
new file mode 100644
index 0000000000000..0ac13125c9a13
--- /dev/null
+++ b/clang/test/Driver/print-search-dirs.c
@@ -0,0 +1,6 @@
+// UNSUPPORTED: system-windows
+
+// RUN: env COMPILER_PATH=cpath1:cpath2 %clang %s -target x86_64-pc-freebsd --sysroot=%S/Inputs/basic_freebsd64_tree \
+// RUN:   -B b1 -B b2 -print-search-dirs | FileCheck %s
+// CHECK:      programs: =b1:b2:cpath1:cpath2:{{.*}}
+// CHECK-NEXT: libraries: ={{.*}}Inputs/basic_freebsd64_tree/usr/lib

From 606e756bb10a52318bece68a8b844e06ce71641d Mon Sep 17 00:00:00 2001
From: Yuanfang Chen <yuanfang.chen@sony.com>
Date: Sat, 18 Jul 2020 22:26:37 -0700
Subject: [PATCH 741/771] [NewPM] make parsePassPipeline parse adaptor-wrapped
 user passes

Currently, when parsing text pipeline, different kinds of passes always
introduce nested pass managers. This makes it impossible to test the
adaptor-wrapped user passes from the text pipeline interface which is needed
by D82344 test cases. This also seems useful in general. See comments above
`parsePassPipeline`.

The syntax would be like mixing passes of different types, but it is
not the same as inferring the correct pass type and then adding the
matching nested pass managers. Strictly speaking, the resulted pipelines
are different.

Reviewed By: asbirlea, aeubanks

Differential Revision: https://reviews.llvm.org/D82698
---
 llvm/include/llvm/Passes/PassBuilder.h   | 19 ++++--
 llvm/lib/Passes/PassBuilder.cpp          | 82 ++++++++++++++++++++++++
 llvm/test/Other/pass-pipeline-parsing.ll | 31 +++++++++
 3 files changed, 128 insertions(+), 4 deletions(-)

diff --git a/llvm/include/llvm/Passes/PassBuilder.h b/llvm/include/llvm/Passes/PassBuilder.h
index 0357e4a2fc058..b0703457656b2 100644
--- a/llvm/include/llvm/Passes/PassBuilder.h
+++ b/llvm/include/llvm/Passes/PassBuilder.h
@@ -472,10 +472,21 @@ class PassBuilder {
   ///   module(function(loop(lpass1,lpass2,lpass3)))
   ///
   /// This shortcut is especially useful for debugging and testing small pass
-  /// combinations. Note that these shortcuts don't introduce any other magic.
-  /// If the sequence of passes aren't all the exact same kind of pass, it will
-  /// be an error. You cannot mix different levels implicitly, you must
-  /// explicitly form a pass manager in which to nest passes.
+  /// combinations.
+  ///
+  /// The sequence of passes aren't necessarily the exact same kind of pass.
+  /// You can mix different levels implicitly if adaptor passes are defined to
+  /// make them work. For example,
+  ///
+  ///   mpass1,fpass1,fpass2,mpass2,lpass1
+  ///
+  /// This pipeline uses only one pass manager: the top-level module manager.
+  /// fpass1,fpass2 and lpass1 are added into the the top-level module manager
+  /// using only adaptor passes. No nested function/loop pass managers are
+  /// added. The purpose is to allow easy pass testing when the user
+  /// specifically want the pass to run under a adaptor directly. This is
+  /// preferred when a pipeline is largely of one type, but one or just a few
+  /// passes are of different types(See PassBuilder.cpp for examples).
   Error parsePassPipeline(ModulePassManager &MPM, StringRef PipelineText,
                           bool VerifyEachPass = true,
                           bool DebugLogging = false);
diff --git a/llvm/lib/Passes/PassBuilder.cpp b/llvm/lib/Passes/PassBuilder.cpp
index 4db7bebcb77ce..1766e579c33d6 100644
--- a/llvm/lib/Passes/PassBuilder.cpp
+++ b/llvm/lib/Passes/PassBuilder.cpp
@@ -2212,6 +2212,40 @@ Error PassBuilder::parseModulePass(ModulePassManager &MPM,
                 std::remove_reference<decltype(CREATE_PASS)>::type>());        \
     return Error::success();                                                   \
   }
+#define CGSCC_PASS(NAME, CREATE_PASS)                                          \
+  if (Name == NAME) {                                                          \
+    MPM.addPass(createModuleToPostOrderCGSCCPassAdaptor(CREATE_PASS));         \
+    return Error::success();                                                   \
+  }
+#define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
+  if (Name == NAME) {                                                          \
+    MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS));               \
+    return Error::success();                                                   \
+  }
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                   \
+  if (checkParametrizedPassName(Name, NAME)) {                                 \
+    auto Params = parsePassParameters(PARSER, Name, NAME);                     \
+    if (!Params)                                                               \
+      return Params.takeError();                                               \
+    MPM.addPass(createModuleToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
+    return Error::success();                                                   \
+  }
+#define LOOP_PASS(NAME, CREATE_PASS)                                           \
+  if (Name == NAME) {                                                          \
+    MPM.addPass(createModuleToFunctionPassAdaptor(                             \
+        createFunctionToLoopPassAdaptor(CREATE_PASS, false, DebugLogging)));   \
+    return Error::success();                                                   \
+  }
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                       \
+  if (checkParametrizedPassName(Name, NAME)) {                                 \
+    auto Params = parsePassParameters(PARSER, Name, NAME);                     \
+    if (!Params)                                                               \
+      return Params.takeError();                                               \
+    MPM.addPass(                                                               \
+        createModuleToFunctionPassAdaptor(createFunctionToLoopPassAdaptor(     \
+            CREATE_PASS(Params.get()), false, DebugLogging)));                 \
+    return Error::success();                                                   \
+  }
 #include "PassRegistry.def"
 
   for (auto &C : ModulePipelineParsingCallbacks)
@@ -2295,6 +2329,35 @@ Error PassBuilder::parseCGSCCPass(CGSCCPassManager &CGPM,
                  std::remove_reference<decltype(CREATE_PASS)>::type>());       \
     return Error::success();                                                   \
   }
+#define FUNCTION_PASS(NAME, CREATE_PASS)                                       \
+  if (Name == NAME) {                                                          \
+    CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS));               \
+    return Error::success();                                                   \
+  }
+#define FUNCTION_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                   \
+  if (checkParametrizedPassName(Name, NAME)) {                                 \
+    auto Params = parsePassParameters(PARSER, Name, NAME);                     \
+    if (!Params)                                                               \
+      return Params.takeError();                                               \
+    CGPM.addPass(createCGSCCToFunctionPassAdaptor(CREATE_PASS(Params.get()))); \
+    return Error::success();                                                   \
+  }
+#define LOOP_PASS(NAME, CREATE_PASS)                                           \
+  if (Name == NAME) {                                                          \
+    CGPM.addPass(createCGSCCToFunctionPassAdaptor(                             \
+        createFunctionToLoopPassAdaptor(CREATE_PASS, false, DebugLogging)));   \
+    return Error::success();                                                   \
+  }
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                       \
+  if (checkParametrizedPassName(Name, NAME)) {                                 \
+    auto Params = parsePassParameters(PARSER, Name, NAME);                     \
+    if (!Params)                                                               \
+      return Params.takeError();                                               \
+    CGPM.addPass(                                                              \
+        createCGSCCToFunctionPassAdaptor(createFunctionToLoopPassAdaptor(      \
+            CREATE_PASS(Params.get()), false, DebugLogging)));                 \
+    return Error::success();                                                   \
+  }
 #include "PassRegistry.def"
 
   for (auto &C : CGSCCPipelineParsingCallbacks)
@@ -2378,6 +2441,25 @@ Error PassBuilder::parseFunctionPass(FunctionPassManager &FPM,
                 std::remove_reference<decltype(CREATE_PASS)>::type>());        \
     return Error::success();                                                   \
   }
+// FIXME: UseMemorySSA is set to false. Maybe we could do things like:
+//        bool UseMemorySSA = !("canon-freeze" || "loop-predication" ||
+//                              "guard-widening");
+//        The risk is that it may become obsolete if we're not careful.
+#define LOOP_PASS(NAME, CREATE_PASS)                                           \
+  if (Name == NAME) {                                                          \
+    FPM.addPass(                                                               \
+        createFunctionToLoopPassAdaptor(CREATE_PASS, false, DebugLogging));    \
+    return Error::success();                                                   \
+  }
+#define LOOP_PASS_WITH_PARAMS(NAME, CREATE_PASS, PARSER)                       \
+  if (checkParametrizedPassName(Name, NAME)) {                                 \
+    auto Params = parsePassParameters(PARSER, Name, NAME);                     \
+    if (!Params)                                                               \
+      return Params.takeError();                                               \
+    FPM.addPass(createFunctionToLoopPassAdaptor(CREATE_PASS(Params.get()),     \
+                                                false, DebugLogging));         \
+    return Error::success();                                                   \
+  }
 #include "PassRegistry.def"
 
   for (auto &C : FunctionPipelineParsingCallbacks)
diff --git a/llvm/test/Other/pass-pipeline-parsing.ll b/llvm/test/Other/pass-pipeline-parsing.ll
index 2e8bc7c873025..902bd9b3eabb1 100644
--- a/llvm/test/Other/pass-pipeline-parsing.ll
+++ b/llvm/test/Other/pass-pipeline-parsing.ll
@@ -173,6 +173,37 @@
 ; CHECK-NESTED-FP-LP: Finished llvm::Function pass manager run
 ; CHECK-NESTED-FP-LP: Finished llvm::Module pass manager run
 
+; RUN: opt -disable-output -debug-pass-manager \
+; RUN:     -passes='module(no-op-function,no-op-loop,no-op-cgscc,cgscc(no-op-function,no-op-loop),function(no-op-loop))' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-ADAPTORS
+; CHECK-ADAPTORS: Starting llvm::Module pass manager run
+; CHECK-ADAPTORS: Starting llvm::Module pass manager run
+; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}NoOpFunctionPass>
+; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass>{{.*}}>
+; CHECK-ADAPTORS: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}NoOpCGSCCPass>
+; CHECK-ADAPTORS: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
+; CHECK-ADAPTORS: Starting CGSCC pass manager run
+; CHECK-ADAPTORS: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}NoOpFunctionPass>
+; CHECK-ADAPTORS: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass>{{.*}}>
+; CHECK-ADAPTORS: Finished CGSCC pass manager run
+; CHECK-ADAPTORS: Running pass: ModuleToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
+; CHECK-ADAPTORS: Starting llvm::Function pass manager run
+; CHECK-ADAPTORS: Running pass: FunctionToLoopPassAdaptor<{{.*}}NoOpLoopPass>
+; CHECK-ADAPTORS: Finished llvm::Function pass manager run
+; CHECK-ADAPTORS: Finished llvm::Module pass manager run
+; CHECK-ADAPTORS: Finished llvm::Module pass manager run
+
+; RUN: opt -disable-output -debug-pass-manager \
+; RUN:     -passes='cgscc(print)' %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-PRINT-IN-CGSCC
+; CHECK-PRINT-IN-CGSCC: Starting llvm::Module pass manager run
+; CHECK-PRINT-IN-CGSCC: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}LazyCallGraph{{.*}}>
+; CHECK-PRINT-IN-CGSCC: Starting CGSCC pass manager run
+; CHECK-PRINT-IN-CGSCC: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PrintFunctionPass>
+; CHECK-PRINT-IN-CGSCC: Finished CGSCC pass manager run
+; CHECK-PRINT-IN-CGSCC: Running pass: VerifierPass
+; CHECK-PRINT-IN-CGSCC: Finished llvm::Module pass manager run
+
 ; RUN: not opt -disable-output -debug-pass-manager \
 ; RUN:     -passes='function(no-op-function)function(no-op-function)' %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-MISSING-COMMA1

From af4c8730924fb6617494c223dac62d6c72c97c6f Mon Sep 17 00:00:00 2001
From: Yuanfang Chen <yuanfang.chen@sony.com>
Date: Sat, 18 Jul 2020 22:23:07 -0700
Subject: [PATCH 742/771] [NewPM] Allow passes to never be skipped

A pass declares itself unskippable by defining a method `static bool isRequired()`.

Also, this patch makes pass managers and adaptor passes required (unskippable).

PassInstrumentation before-pass-callbacks could be used to skip passes by returning false.
However, some passes should not be skipped at all. Especially so for special-purpose passes such as pass managers and adaptor passes since if they are skipped for any reason, the passes contained by them would also be skipped ignoring contained passes's return value of `isRequired()`.

Reviewed By: aeubanks

Differential Revision: https://reviews.llvm.org/D82344
---
 llvm/include/llvm/Analysis/CGSCCPassManager.h |  4 ++
 llvm/include/llvm/IR/PassInstrumentation.h    | 21 +++++++
 llvm/include/llvm/IR/PassManager.h            |  4 ++
 llvm/include/llvm/IR/PassManagerInternal.h    | 22 +++++++
 .../llvm/Transforms/Scalar/LoopPassManager.h  |  2 +
 .../unittests/IR/PassBuilderCallbacksTest.cpp | 63 +++++++++++++++++--
 6 files changed, 111 insertions(+), 5 deletions(-)

diff --git a/llvm/include/llvm/Analysis/CGSCCPassManager.h b/llvm/include/llvm/Analysis/CGSCCPassManager.h
index eb0d3ae8fedfc..e70af71b3da67 100644
--- a/llvm/include/llvm/Analysis/CGSCCPassManager.h
+++ b/llvm/include/llvm/Analysis/CGSCCPassManager.h
@@ -355,6 +355,8 @@ class ModuleToPostOrderCGSCCPassAdaptor
   /// Runs the CGSCC pass across every SCC in the module.
   PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
 
+  static bool isRequired() { return true; }
+
 private:
   CGSCCPassT Pass;
 };
@@ -543,6 +545,8 @@ class CGSCCToFunctionPassAdaptor
     return PA;
   }
 
+  static bool isRequired() { return true; }
+
 private:
   FunctionPassT Pass;
 };
diff --git a/llvm/include/llvm/IR/PassInstrumentation.h b/llvm/include/llvm/IR/PassInstrumentation.h
index bcc434548e670..37390e4e682ba 100644
--- a/llvm/include/llvm/IR/PassInstrumentation.h
+++ b/llvm/include/llvm/IR/PassInstrumentation.h
@@ -129,6 +129,26 @@ class PassInstrumentationCallbacks {
 class PassInstrumentation {
   PassInstrumentationCallbacks *Callbacks;
 
+  // Template argument PassT of PassInstrumentation::runBeforePass could be two
+  // kinds: (1) a regular pass inherited from PassInfoMixin (happen when
+  // creating a adaptor pass for a regular pass); (2) a type-erased PassConcept
+  // created from (1). Here we want to make case (1) skippable unconditionally
+  // since they are regular passes. We call PassConcept::isRequired to decide
+  // for case (2).
+  template <typename PassT>
+  using has_required_t = decltype(std::declval<PassT &>().isRequired());
+
+  template <typename PassT>
+  static std::enable_if_t<is_detected<has_required_t, PassT>::value, bool>
+  isRequired(const PassT &Pass) {
+    return Pass.isRequired();
+  }
+  template <typename PassT>
+  static std::enable_if_t<!is_detected<has_required_t, PassT>::value, bool>
+  isRequired(const PassT &Pass) {
+    return false;
+  }
+
 public:
   /// Callbacks object is not owned by PassInstrumentation, its life-time
   /// should at least match the life-time of corresponding
@@ -148,6 +168,7 @@ class PassInstrumentation {
     bool ShouldRun = true;
     for (auto &C : Callbacks->BeforePassCallbacks)
       ShouldRun &= C(Pass.name(), llvm::Any(&IR));
+    ShouldRun = ShouldRun || isRequired(Pass);
     return ShouldRun;
   }
 
diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h
index 4d5f292ba9a13..f503871e23609 100644
--- a/llvm/include/llvm/IR/PassManager.h
+++ b/llvm/include/llvm/IR/PassManager.h
@@ -559,6 +559,8 @@ class PassManager : public PassInfoMixin<
     Passes.emplace_back(new PassModelT(std::move(Pass)));
   }
 
+  static bool isRequired() { return true; }
+
 private:
   using PassConceptT =
       detail::PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...>;
@@ -1260,6 +1262,8 @@ class ModuleToFunctionPassAdaptor
     return PA;
   }
 
+  static bool isRequired() { return true; }
+
 private:
   FunctionPassT Pass;
 };
diff --git a/llvm/include/llvm/IR/PassManagerInternal.h b/llvm/include/llvm/IR/PassManagerInternal.h
index c602c0b5cc20a..986ed0b5a7ac6 100644
--- a/llvm/include/llvm/IR/PassManagerInternal.h
+++ b/llvm/include/llvm/IR/PassManagerInternal.h
@@ -48,6 +48,12 @@ struct PassConcept {
 
   /// Polymorphic method to access the name of a pass.
   virtual StringRef name() const = 0;
+
+  /// Polymorphic method to to let a pass optionally exempted from skipping by
+  /// PassInstrumentation.
+  /// To opt-in, pass should implement `static bool isRequired()`. It's no-op
+  /// to have `isRequired` always return false since that is the default.
+  virtual bool isRequired() const = 0;
 };
 
 /// A template wrapper used to implement the polymorphic API.
@@ -81,6 +87,22 @@ struct PassModel : PassConcept<IRUnitT, AnalysisManagerT, ExtraArgTs...> {
 
   StringRef name() const override { return PassT::name(); }
 
+  template <typename T>
+  using has_required_t = decltype(std::declval<T &>().isRequired());
+
+  template <typename T>
+  static std::enable_if_t<is_detected<has_required_t, T>::value, bool>
+  passIsRequiredImpl() {
+    return T::isRequired();
+  }
+  template <typename T>
+  static std::enable_if_t<!is_detected<has_required_t, T>::value, bool>
+  passIsRequiredImpl() {
+    return false;
+  }
+
+  bool isRequired() const override { return passIsRequiredImpl<PassT>(); }
+
   PassT Pass;
 };
 
diff --git a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
index 9b2f0fcab95be..aff80ef1dcfab 100644
--- a/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
+++ b/llvm/include/llvm/Transforms/Scalar/LoopPassManager.h
@@ -366,6 +366,8 @@ class FunctionToLoopPassAdaptor
     return PA;
   }
 
+  static bool isRequired() { return true; }
+
 private:
   LoopPassT Pass;
 
diff --git a/llvm/unittests/IR/PassBuilderCallbacksTest.cpp b/llvm/unittests/IR/PassBuilderCallbacksTest.cpp
index 1f17a4f34b18e..c2c15069f4132 100644
--- a/llvm/unittests/IR/PassBuilderCallbacksTest.cpp
+++ b/llvm/unittests/IR/PassBuilderCallbacksTest.cpp
@@ -524,10 +524,10 @@ TEST_F(ModuleCallbacksTest, InstrumentedSkippedPasses) {
   // Non-mock instrumentation run here can safely be ignored.
   CallbacksHandle.ignoreNonMockPassInstrumentation("<string>");
 
-  // Skip the pass by returning false.
-  EXPECT_CALL(CallbacksHandle, runBeforePass(HasNameRegex("MockPassHandle"),
-                                             HasName("<string>")))
-      .WillOnce(Return(false));
+  // Skip all passes by returning false. Pass managers and adaptor passes are
+  // also passes that observed by the callbacks.
+  EXPECT_CALL(CallbacksHandle, runBeforePass(_, _))
+      .WillRepeatedly(Return(false));
 
   EXPECT_CALL(AnalysisHandle, run(HasName("<string>"), _)).Times(0);
   EXPECT_CALL(PassHandle, run(HasName("<string>"), _)).Times(0);
@@ -543,7 +543,60 @@ TEST_F(ModuleCallbacksTest, InstrumentedSkippedPasses) {
               runAfterAnalysis(HasNameRegex("MockAnalysisHandle"), _))
       .Times(0);
 
-  StringRef PipelineText = "test-transform";
+  // Order is important here. `Adaptor` expectations should be checked first
+  // because the its argument contains 'PassManager' (for example:
+  // ModuleToFunctionPassAdaptor{{.*}}PassManager{{.*}}). Here only check
+  // `runAfterPass` to show that they are not skipped.
+
+  // Pass managers are not ignored.
+  // 5 = (1) ModulePassManager + (2) FunctionPassMangers + (1) LoopPassManager +
+  //     (1) CGSCCPassManager
+  EXPECT_CALL(CallbacksHandle, runAfterPass(HasNameRegex("PassManager"), _))
+      .Times(5);
+  EXPECT_CALL(CallbacksHandle,
+              runAfterPass(HasNameRegex("ModuleToFunctionPassAdaptor"), _))
+      .Times(1);
+  EXPECT_CALL(
+      CallbacksHandle,
+      runAfterPass(HasNameRegex("ModuleToPostOrderCGSCCPassAdaptor"), _))
+      .Times(1);
+  EXPECT_CALL(CallbacksHandle,
+              runAfterPass(HasNameRegex("CGSCCToFunctionPassAdaptor"), _))
+      .Times(1);
+  EXPECT_CALL(CallbacksHandle,
+              runAfterPass(HasNameRegex("FunctionToLoopPassAdaptor"), _))
+      .Times(1);
+
+  // Ignore analyses introduced by adaptor passes.
+  EXPECT_CALL(CallbacksHandle,
+              runBeforeAnalysis(Not(HasNameRegex("MockAnalysisHandle")), _))
+      .Times(AnyNumber());
+  EXPECT_CALL(CallbacksHandle,
+              runAfterAnalysis(Not(HasNameRegex("MockAnalysisHandle")), _))
+      .Times(AnyNumber());
+
+  // Register Funtion and Loop version of "test-transform" for testing
+  PB.registerPipelineParsingCallback(
+      [](StringRef Name, FunctionPassManager &FPM,
+         ArrayRef<PassBuilder::PipelineElement>) {
+        if (Name == "test-transform") {
+          FPM.addPass(MockPassHandle<Function>().getPass());
+          return true;
+        }
+        return false;
+      });
+  PB.registerPipelineParsingCallback(
+      [](StringRef Name, LoopPassManager &LPM,
+         ArrayRef<PassBuilder::PipelineElement>) {
+        if (Name == "test-transform") {
+          LPM.addPass(MockPassHandle<Loop>().getPass());
+          return true;
+        }
+        return false;
+      });
+
+  StringRef PipelineText = "test-transform,function(test-transform),cgscc("
+                           "function(loop(test-transform)))";
   ASSERT_THAT_ERROR(PB.parsePassPipeline(PM, PipelineText, true), Succeeded())
       << "Pipeline was: " << PipelineText;
 

From d37befdfe5cd93666f8077c32836673ad70385d1 Mon Sep 17 00:00:00 2001
From: Kang Zhang <shkzhang@cn.ibm.com>
Date: Sun, 19 Jul 2020 07:01:45 +0000
Subject: [PATCH 743/771] [PowerPC] Remove the redundant implicit operands in
 ppc-early-ret pass

Summary:
In the `ppc-early-ret` pass, we have use `BuildMI` and `copyImplicitOps` when the branch instructions can do the early return. But the two functions will add implicit operands twice, this is not correct.

This patch is to remove the redundant implicit operands in `ppc-early-ret pass`.

Reviewed By: jsji

Differential Revision: https://reviews.llvm.org/D76042
---
 llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp    | 26 ++++++++++++-------
 .../test/CodeGen/PowerPC/early-ret-verify.mir |  2 +-
 llvm/test/CodeGen/PowerPC/early-ret.mir       |  8 +++---
 3 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
index c9f74bbf861ca..08b7bdb3ac1e2 100644
--- a/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
+++ b/llvm/lib/Target/PowerPC/PPCEarlyReturn.cpp
@@ -77,8 +77,9 @@ namespace {
             if (J->getOperand(0).getMBB() == &ReturnMBB) {
               // This is an unconditional branch to the return. Replace the
               // branch with a blr.
-              BuildMI(**PI, J, J->getDebugLoc(), TII->get(I->getOpcode()))
-                  .copyImplicitOps(*I);
+              MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
+              (*PI)->insert(J, MI);
+
               MachineBasicBlock::iterator K = J--;
               K->eraseFromParent();
               BlockChanged = true;
@@ -89,10 +90,13 @@ namespace {
             if (J->getOperand(2).getMBB() == &ReturnMBB) {
               // This is a conditional branch to the return. Replace the branch
               // with a bclr.
-              BuildMI(**PI, J, J->getDebugLoc(), TII->get(PPC::BCCLR))
+              MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
+              MI->setDesc(TII->get(PPC::BCCLR));
+              MachineInstrBuilder(*ReturnMBB.getParent(), MI)
                   .add(J->getOperand(0))
-                  .add(J->getOperand(1))
-                  .copyImplicitOps(*I);
+                  .add(J->getOperand(1));
+              (*PI)->insert(J, MI);
+
               MachineBasicBlock::iterator K = J--;
               K->eraseFromParent();
               BlockChanged = true;
@@ -103,11 +107,13 @@ namespace {
             if (J->getOperand(1).getMBB() == &ReturnMBB) {
               // This is a conditional branch to the return. Replace the branch
               // with a bclr.
-              BuildMI(
-                  **PI, J, J->getDebugLoc(),
-                  TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn))
-                  .add(J->getOperand(0))
-                  .copyImplicitOps(*I);
+              MachineInstr *MI = ReturnMBB.getParent()->CloneMachineInstr(&*I);
+              MI->setDesc(
+                  TII->get(J->getOpcode() == PPC::BC ? PPC::BCLR : PPC::BCLRn));
+              MachineInstrBuilder(*ReturnMBB.getParent(), MI)
+                  .add(J->getOperand(0));
+              (*PI)->insert(J, MI);
+
               MachineBasicBlock::iterator K = J--;
               K->eraseFromParent();
               BlockChanged = true;
diff --git a/llvm/test/CodeGen/PowerPC/early-ret-verify.mir b/llvm/test/CodeGen/PowerPC/early-ret-verify.mir
index 5a01aa4effa5a..967e53302607f 100644
--- a/llvm/test/CodeGen/PowerPC/early-ret-verify.mir
+++ b/llvm/test/CodeGen/PowerPC/early-ret-verify.mir
@@ -40,7 +40,7 @@ body:             |
 
   ; CHECK-LABEL: testEarlyRet
   ; CHECK: bb.0.entry:
-  ; CHECK:   BCLR undef renamable $cr5lt, implicit $lr, implicit $rm, implicit $lr, implicit $rm
+  ; CHECK:   BCLR undef renamable $cr5lt, implicit $lr, implicit $rm
   ; CHECK: bb.1:
   ; CHECK:   renamable $r3 = IMPLICIT_DEF
   ; CHECK:   renamable $r4 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/PowerPC/early-ret.mir b/llvm/test/CodeGen/PowerPC/early-ret.mir
index bfbaa8edeb972..9bc97695bb65c 100644
--- a/llvm/test/CodeGen/PowerPC/early-ret.mir
+++ b/llvm/test/CodeGen/PowerPC/early-ret.mir
@@ -27,7 +27,7 @@ body:             |
   ; CHECK: bb.0.entry:
   ; CHECK:   renamable $cr0 = CMPWI renamable $r3, 0
   ; CHECK:   BC killed renamable $cr0gt, %bb.1
-  ; CHECK:   BLR implicit $lr, implicit $rm, implicit $lr, implicit $rm, implicit killed $r3
+  ; CHECK:   BLR implicit $lr, implicit $rm, implicit killed $r3
   ; CHECK: bb.1.entry:
   ; CHECK:   renamable $r3 = ADDI killed renamable $r4, 0
   ; CHECK:   BLR implicit $lr, implicit $rm, implicit killed $r3
@@ -106,7 +106,7 @@ body:             |
   ; CHECK-LABEL: name: testBCLR
   ; CHECK: bb.0.entry:
   ; CHECK:   renamable $cr0 = FCMPUS killed renamable $f3, killed renamable $f4
-  ; CHECK:   BCLR killed renamable $cr0eq, implicit $lr, implicit $rm, implicit $lr, implicit $rm, implicit killed $v2
+  ; CHECK:   BCLR killed renamable $cr0eq, implicit $lr, implicit $rm, implicit killed $v2
   ; CHECK: bb.1.entry:
   ; CHECK:   renamable $cr0 = FCMPUS killed renamable $f1, killed renamable $f2
   ; CHECK:   BCLRn killed renamable $cr0eq, implicit $lr, implicit $rm, implicit killed $v2
@@ -139,8 +139,8 @@ body:             |
   ; CHECK: bb.0.entry:
   ; CHECK:   renamable $r4 = LI 0
   ; CHECK:   renamable $cr0 = CMPLWI killed renamable $r4, 0
-  ; CHECK:   BCCLR 68, renamable $cr0, implicit $lr, implicit $rm, implicit $lr, implicit $rm
+  ; CHECK:   BCCLR 68, renamable $cr0, implicit $lr, implicit $rm
   ; CHECK: bb.1:
-  ; CHECK:   BCCLR 68, killed renamable $cr0, implicit $lr, implicit $rm, implicit $lr, implicit $rm
+  ; CHECK:   BCCLR 68, killed renamable $cr0, implicit $lr, implicit $rm
   ; CHECK:   BLR implicit $lr, implicit $rm
 ...

From 2e74b6d80f347203ae17ec8b09e6b3e86ff6c179 Mon Sep 17 00:00:00 2001
From: Fangrui Song <i@maskray.me>
Date: Sun, 19 Jul 2020 00:26:01 -0700
Subject: [PATCH 744/771] [llvm-cov gcov] Don't require NUL terminator when
 reading files

.gcno, .gcda and source files can be modified while we are reading them. If the
concurrent modification of a file being read nullifies the NUL terminator
assumption, llvm-cov can trip over an assertion failure in MemoryBuffer::init.
This is not so rare - the source files can be in an editor and .gcda can be
written by an running process (if the process forks, when .gcda gets written is
probably more unpredictable).

There is no accompanying test because an assertion failure requires data
races with some involved setting.
---
 llvm/lib/ProfileData/GCOV.cpp | 5 ++++-
 llvm/tools/llvm-cov/gcov.cpp  | 6 ++++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/llvm/lib/ProfileData/GCOV.cpp b/llvm/lib/ProfileData/GCOV.cpp
index 71ea44a1a722d..7b97723da60cc 100644
--- a/llvm/lib/ProfileData/GCOV.cpp
+++ b/llvm/lib/ProfileData/GCOV.cpp
@@ -522,8 +522,11 @@ class LineConsumer {
 public:
   LineConsumer() = default;
   LineConsumer(StringRef Filename) {
+    // Open source files without requiring a NUL terminator. The concurrent
+    // modification may nullify the NUL terminator condition.
     ErrorOr<std::unique_ptr<MemoryBuffer>> BufferOrErr =
-        MemoryBuffer::getFileOrSTDIN(Filename);
+        MemoryBuffer::getFileOrSTDIN(Filename, -1,
+                                     /*RequiresNullTerminator=*/false);
     if (std::error_code EC = BufferOrErr.getError()) {
       errs() << Filename << ": " << EC.message() << "\n";
       Remaining = "";
diff --git a/llvm/tools/llvm-cov/gcov.cpp b/llvm/tools/llvm-cov/gcov.cpp
index 7a1dbbfe9338b..d99e792c68a95 100644
--- a/llvm/tools/llvm-cov/gcov.cpp
+++ b/llvm/tools/llvm-cov/gcov.cpp
@@ -43,8 +43,10 @@ static void reportCoverage(StringRef SourceFile, StringRef ObjectDir,
                          : InputGCDA;
   GCOVFile GF;
 
+  // Open .gcda and .gcda without requiring a NUL terminator. The concurrent
+  // modification may nullify the NUL terminator condition.
   ErrorOr<std::unique_ptr<MemoryBuffer>> GCNO_Buff =
-      MemoryBuffer::getFileOrSTDIN(GCNO);
+      MemoryBuffer::getFileOrSTDIN(GCNO, -1, /*RequiresNullTerminator=*/false);
   if (std::error_code EC = GCNO_Buff.getError()) {
     errs() << GCNO << ": " << EC.message() << "\n";
     return;
@@ -56,7 +58,7 @@ static void reportCoverage(StringRef SourceFile, StringRef ObjectDir,
   }
 
   ErrorOr<std::unique_ptr<MemoryBuffer>> GCDA_Buff =
-      MemoryBuffer::getFileOrSTDIN(GCDA);
+      MemoryBuffer::getFileOrSTDIN(GCDA, -1, /*RequiresNullTerminator=*/false);
   if (std::error_code EC = GCDA_Buff.getError()) {
     if (EC != errc::no_such_file_or_directory) {
       errs() << GCDA << ": " << EC.message() << "\n";

From 3504acc33e8b9ddd7de71d75cdc329a14dbf58d9 Mon Sep 17 00:00:00 2001
From: David Green <david.green@arm.com>
Date: Thu, 16 Jul 2020 10:36:23 +0100
Subject: [PATCH 745/771] [ARM] Don't mark vctp as having sideeffects

As far as I can tell, it should not be necessary for VCTP to be
unpredictable in tail predicated loops. Either it has a a valid loop
counter as a operand which will naturally keep it in the right loop, or
it doesn't and it won't be converted to a tail predicated loop. Not
marking it as having side effects allows it to be scheduled more cleanly
for cases where it is not expected to become a tail predicate loop.

Differential Revision: https://reviews.llvm.org/D83907
---
 llvm/lib/Target/ARM/ARMInstrMVE.td            |  1 -
 .../CodeGen/Thumb2/mve-float16regloops.ll     | 34 +++++++++----------
 llvm/test/CodeGen/Thumb2/mve-vctp.ll          |  6 ++--
 3 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/ARM/ARMInstrMVE.td b/llvm/lib/Target/ARM/ARMInstrMVE.td
index 2a1f50d97e3b3..d6e725084d7f9 100644
--- a/llvm/lib/Target/ARM/ARMInstrMVE.td
+++ b/llvm/lib/Target/ARM/ARMInstrMVE.td
@@ -5605,7 +5605,6 @@ def MVE_VDWDUPu8  : MVE_VxWDUP<"vdwdup", "u8",  0b00, 0b1>;
 def MVE_VDWDUPu16 : MVE_VxWDUP<"vdwdup", "u16", 0b01, 0b1>;
 def MVE_VDWDUPu32 : MVE_VxWDUP<"vdwdup", "u32", 0b10, 0b1>;
 
-let hasSideEffects = 1 in
 class MVE_VCTPInst<string suffix, bits<2> size, list<dag> pattern=[]>
   : MVE_p<(outs VCCR:$P0), (ins rGPR:$Rn), NoItinerary, "vctp", suffix,
           "$Rn", vpred_n, "", pattern> {
diff --git a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
index 42e8cc91ede8e..e8ab7792b6dfc 100644
--- a/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-float16regloops.ll
@@ -796,23 +796,23 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl
 ; CHECK-NEXT:    .pad #16
 ; CHECK-NEXT:    sub sp, #16
 ; CHECK-NEXT:    ldrh r5, [r0]
-; CHECK-NEXT:    ldr.w r12, [r0, #4]
+; CHECK-NEXT:    ldr.w r9, [r0, #4]
 ; CHECK-NEXT:    subs r6, r5, #1
 ; CHECK-NEXT:    cmp r6, #3
 ; CHECK-NEXT:    bhi .LBB15_6
 ; CHECK-NEXT:  @ %bb.1: @ %if.then
 ; CHECK-NEXT:    ldr r7, [r0, #8]
-; CHECK-NEXT:    add.w r4, r12, r6, lsl #1
+; CHECK-NEXT:    add.w r4, r9, r6, lsl #1
 ; CHECK-NEXT:    lsr.w lr, r3, #2
 ; CHECK-NEXT:    ldrh.w r8, [r7, #6]
-; CHECK-NEXT:    ldrh.w r9, [r7, #4]
+; CHECK-NEXT:    ldrh.w r12, [r7, #4]
 ; CHECK-NEXT:    ldrh r6, [r7, #2]
 ; CHECK-NEXT:    ldrh r7, [r7]
 ; CHECK-NEXT:    wls lr, lr, .LBB15_5
 ; CHECK-NEXT:  @ %bb.2: @ %while.body.lr.ph
 ; CHECK-NEXT:    str r5, [sp, #12] @ 4-byte Spill
 ; CHECK-NEXT:    bic r5, r3, #3
-; CHECK-NEXT:    add.w r10, r12, #2
+; CHECK-NEXT:    add.w r10, r9, #2
 ; CHECK-NEXT:    str r5, [sp] @ 4-byte Spill
 ; CHECK-NEXT:    add.w r5, r2, r5, lsl #1
 ; CHECK-NEXT:    str r5, [sp, #4] @ 4-byte Spill
@@ -828,7 +828,7 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl
 ; CHECK-NEXT:    vmul.f16 q0, q0, r7
 ; CHECK-NEXT:    vfma.f16 q0, q1, r6
 ; CHECK-NEXT:    vldrw.u32 q1, [r5]
-; CHECK-NEXT:    vfma.f16 q0, q1, r9
+; CHECK-NEXT:    vfma.f16 q0, q1, r12
 ; CHECK-NEXT:    vldrw.u32 q1, [r10, #4]
 ; CHECK-NEXT:    add.w r10, r10, #8
 ; CHECK-NEXT:    vfma.f16 q0, q1, r8
@@ -838,7 +838,7 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl
 ; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
 ; CHECK-NEXT:    ldr r1, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    ldr r5, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT:    add.w r12, r12, r2, lsl #1
+; CHECK-NEXT:    add.w r9, r9, r2, lsl #1
 ; CHECK-NEXT:    add.w r1, r1, r2, lsl #1
 ; CHECK-NEXT:    ldr r2, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:  .LBB15_5: @ %while.end
@@ -847,35 +847,35 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl
 ; CHECK-NEXT:    vctp.16 lr
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vstrht.16 q0, [r4]
-; CHECK-NEXT:    vldrw.u32 q0, [r12]
-; CHECK-NEXT:    add.w r1, r12, #2
+; CHECK-NEXT:    vldrw.u32 q0, [r9]
+; CHECK-NEXT:    add.w r1, r9, #2
 ; CHECK-NEXT:    vldrw.u32 q1, [r1]
-; CHECK-NEXT:    add.w r1, r12, #6
+; CHECK-NEXT:    add.w r1, r9, #6
 ; CHECK-NEXT:    vmul.f16 q0, q0, r7
 ; CHECK-NEXT:    vfma.f16 q0, q1, r6
-; CHECK-NEXT:    vldrw.u32 q1, [r12, #4]
-; CHECK-NEXT:    vfma.f16 q0, q1, r9
+; CHECK-NEXT:    vldrw.u32 q1, [r9, #4]
+; CHECK-NEXT:    vfma.f16 q0, q1, r12
 ; CHECK-NEXT:    vldrw.u32 q1, [r1]
 ; CHECK-NEXT:    vfma.f16 q0, q1, r8
 ; CHECK-NEXT:    vpst
 ; CHECK-NEXT:    vstrht.16 q0, [r2]
-; CHECK-NEXT:    ldr.w r12, [r0, #4]
+; CHECK-NEXT:    ldr.w r9, [r0, #4]
 ; CHECK-NEXT:  .LBB15_6: @ %if.end
-; CHECK-NEXT:    add.w r0, r12, r3, lsl #1
+; CHECK-NEXT:    add.w r0, r9, r3, lsl #1
 ; CHECK-NEXT:    lsr.w lr, r5, #2
 ; CHECK-NEXT:    wls lr, lr, .LBB15_10
 ; CHECK-NEXT:  @ %bb.7: @ %while.body51.preheader
 ; CHECK-NEXT:    bic r2, r5, #3
 ; CHECK-NEXT:    adds r1, r2, r3
-; CHECK-NEXT:    mov r3, r12
-; CHECK-NEXT:    add.w r1, r12, r1, lsl #1
+; CHECK-NEXT:    mov r3, r9
+; CHECK-NEXT:    add.w r1, r9, r1, lsl #1
 ; CHECK-NEXT:  .LBB15_8: @ %while.body51
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [r0], #8
 ; CHECK-NEXT:    vstrb.8 q0, [r3], #8
 ; CHECK-NEXT:    le lr, .LBB15_8
 ; CHECK-NEXT:  @ %bb.9: @ %while.end55.loopexit
-; CHECK-NEXT:    add.w r12, r12, r2, lsl #1
+; CHECK-NEXT:    add.w r9, r9, r2, lsl #1
 ; CHECK-NEXT:    mov r0, r1
 ; CHECK-NEXT:  .LBB15_10: @ %while.end55
 ; CHECK-NEXT:    ands r1, r5, #3
@@ -884,7 +884,7 @@ define void @arm_fir_f32_1_4_mve(%struct.arm_fir_instance_f32* nocapture readonl
 ; CHECK-NEXT:    vldrw.u32 q0, [r0]
 ; CHECK-NEXT:    vctp.16 r1
 ; CHECK-NEXT:    vpst
-; CHECK-NEXT:    vstrht.16 q0, [r12]
+; CHECK-NEXT:    vstrht.16 q0, [r9]
 ; CHECK-NEXT:  .LBB15_12: @ %if.end61
 ; CHECK-NEXT:    add sp, #16
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
diff --git a/llvm/test/CodeGen/Thumb2/mve-vctp.ll b/llvm/test/CodeGen/Thumb2/mve-vctp.ll
index d6e4d492f5351..67bc161e02c64 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vctp.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vctp.ll
@@ -4,8 +4,8 @@
 define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) {
 ; CHECK-LABEL: vctp8:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vctp.8 r0
 ; CHECK-NEXT:    vldrw.u32 q1, [r1]
+; CHECK-NEXT:    vctp.8 r0
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    vpsel q0, q1, q0
 ; CHECK-NEXT:    vstrw.32 q0, [r2]
@@ -20,8 +20,8 @@ define void @vctp8(i32 %arg, <16 x i8> *%in, <16 x i8>* %out) {
 define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) {
 ; CHECK-LABEL: vctp16:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vctp.16 r0
 ; CHECK-NEXT:    vldrw.u32 q1, [r1]
+; CHECK-NEXT:    vctp.16 r0
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    vpsel q0, q1, q0
 ; CHECK-NEXT:    vstrw.32 q0, [r2]
@@ -36,8 +36,8 @@ define void @vctp16(i32 %arg, <8 x i16> *%in, <8 x i16>* %out) {
 define void @vctp32(i32 %arg, <4 x i32> *%in, <4 x i32>* %out) {
 ; CHECK-LABEL: vctp32:
 ; CHECK:       @ %bb.0:
-; CHECK-NEXT:    vctp.32 r0
 ; CHECK-NEXT:    vldrw.u32 q1, [r1]
+; CHECK-NEXT:    vctp.32 r0
 ; CHECK-NEXT:    vmov.i32 q0, #0x0
 ; CHECK-NEXT:    vpsel q0, q1, q0
 ; CHECK-NEXT:    vstrw.32 q0, [r2]

From d12ec0f752e7f2c7f7252539da2d124264ec33f7 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Sat, 18 Jul 2020 22:22:41 +0200
Subject: [PATCH 746/771] [InstCombine] Fix store merge worklist management
 (PR46680)

Fixes https://bugs.llvm.org/show_bug.cgi?id=46680.

Just like insertions through IRBuilder, InsertNewInstBefore()
should be using the deferred worklist mechanism, so that processing
of newly added instructions is prioritized.

There's one side-effect of the worklist order change which could be
classified as a regression. An add op gets pushed through a select
that at the time is not a umax. We could add a reverse transform
that tries to push adds in the reverse direction to restore a min/max,
but that seems like a sure way of getting infinite loops... Seems
like something that should best wait on min/max intrinsics.

Differential Revision: https://reviews.llvm.org/D84109
---
 .../InstCombine/InstCombineInternal.h         |  2 +-
 .../Transforms/InstCombine/minmax-fold.ll     | 20 +++++++++----------
 llvm/test/Transforms/InstCombine/pr46680.ll   |  2 +-
 3 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
index f918dc7198ca9..ca51f37af4d93 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
+++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h
@@ -653,7 +653,7 @@ class LLVM_LIBRARY_VISIBILITY InstCombiner
            "New instruction already inserted into a basic block!");
     BasicBlock *BB = Old.getParent();
     BB->getInstList().insert(Old.getIterator(), New); // Insert inst
-    Worklist.push(New);
+    Worklist.add(New);
     return New;
   }
 
diff --git a/llvm/test/Transforms/InstCombine/minmax-fold.ll b/llvm/test/Transforms/InstCombine/minmax-fold.ll
index 5ee38978ed782..dcf060c09613e 100644
--- a/llvm/test/Transforms/InstCombine/minmax-fold.ll
+++ b/llvm/test/Transforms/InstCombine/minmax-fold.ll
@@ -953,8 +953,8 @@ define i32 @add_umin(i32 %x) {
 
 define i32 @add_umin_constant_limit(i32 %x) {
 ; CHECK-LABEL: @add_umin_constant_limit(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[X:%.*]], 0
-; CHECK-NEXT:    [[R:%.*]] = select i1 [[TMP1]], i32 41, i32 42
+; CHECK-NEXT:    [[DOTNOT:%.*]] = icmp eq i32 [[X:%.*]], 0
+; CHECK-NEXT:    [[R:%.*]] = select i1 [[DOTNOT]], i32 41, i32 42
 ; CHECK-NEXT:    ret i32 [[R]]
 ;
   %a = add nuw i32 %x, 41
@@ -1165,8 +1165,8 @@ define <2 x i33> @add_umax_vec(<2 x i33> %x) {
 
 define i8 @PR14613_umin(i8 %x) {
 ; CHECK-LABEL: @PR14613_umin(
-; CHECK-NEXT:    [[U7:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X:%.*]], i8 15)
-; CHECK-NEXT:    ret i8 [[U7]]
+; CHECK-NEXT:    [[TMP1:%.*]] = call i8 @llvm.uadd.sat.i8(i8 [[X:%.*]], i8 15)
+; CHECK-NEXT:    ret i8 [[TMP1]]
 ;
   %u4 = zext i8 %x to i32
   %u5 = add nuw nsw i32 %u4, 15
@@ -1179,8 +1179,8 @@ define i8 @PR14613_umin(i8 %x) {
 define i8 @PR14613_umax(i8 %x) {
 ; CHECK-LABEL: @PR14613_umax(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt i8 [[X:%.*]], -16
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 -16
-; CHECK-NEXT:    [[U7:%.*]] = add nsw i8 [[TMP2]], 15
+; CHECK-NEXT:    [[X_OP:%.*]] = add i8 [[X]], 15
+; CHECK-NEXT:    [[U7:%.*]] = select i1 [[TMP1]], i8 [[X_OP]], i8 -1
 ; CHECK-NEXT:    ret i8 [[U7]]
 ;
   %u4 = zext i8 %x to i32
@@ -1422,8 +1422,8 @@ define <2 x i33> @add_smax_vec(<2 x i33> %x) {
 define i8 @PR14613_smin(i8 %x) {
 ; CHECK-LABEL: @PR14613_smin(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt i8 [[X:%.*]], 40
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 40
-; CHECK-NEXT:    [[U7:%.*]] = add nsw i8 [[TMP2]], 15
+; CHECK-NEXT:    [[X_OP:%.*]] = add i8 [[X]], 15
+; CHECK-NEXT:    [[U7:%.*]] = select i1 [[TMP1]], i8 [[X_OP]], i8 55
 ; CHECK-NEXT:    ret i8 [[U7]]
 ;
   %u4 = sext i8 %x to i32
@@ -1437,8 +1437,8 @@ define i8 @PR14613_smin(i8 %x) {
 define i8 @PR14613_smax(i8 %x) {
 ; CHECK-LABEL: @PR14613_smax(
 ; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt i8 [[X:%.*]], 40
-; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i8 [[X]], i8 40
-; CHECK-NEXT:    [[U7:%.*]] = add nuw i8 [[TMP2]], 15
+; CHECK-NEXT:    [[X_OP:%.*]] = add i8 [[X]], 15
+; CHECK-NEXT:    [[U7:%.*]] = select i1 [[TMP1]], i8 [[X_OP]], i8 55
 ; CHECK-NEXT:    ret i8 [[U7]]
 ;
   %u4 = sext i8 %x to i32
diff --git a/llvm/test/Transforms/InstCombine/pr46680.ll b/llvm/test/Transforms/InstCombine/pr46680.ll
index 90ea2e110afe4..59d449d5dc23a 100644
--- a/llvm/test/Transforms/InstCombine/pr46680.ll
+++ b/llvm/test/Transforms/InstCombine/pr46680.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -S -instcombine -instcombine-infinite-loop-threshold=3 < %s | FileCheck %s
+; RUN: opt -S -instcombine -instcombine-infinite-loop-threshold=2 < %s | FileCheck %s
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-pc-linux-gnu"

From 7393d7574c0911b2fd88dd10d093af3efe4dd0cf Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 19 Jul 2020 09:24:03 -0400
Subject: [PATCH 747/771] [InstSimplify] fold fcmp with infinity constant using
 isKnownNeverInfinity

This is a step towards trying to remove unnecessary FP compares
with infinity when compiling with -ffinite-math-only or similar.
I'm intentionally not checking FMF on the fcmp itself because
I'm assuming that will go away eventually.
The analysis part of this was added with rGcd481136 for use with
isKnownNeverNaN. Similarly, that could be an enhancement here to
get predicates like 'one' and 'ueq'.

Differential Revision: https://reviews.llvm.org/D84035
---
 llvm/lib/Analysis/InstructionSimplify.cpp     |  7 +++++++
 .../InstSimplify/floating-point-compare.ll    | 21 +++++++------------
 2 files changed, 15 insertions(+), 13 deletions(-)

diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp
index d3bdf9d6aafd0..8fbcee84a1567 100644
--- a/llvm/lib/Analysis/InstructionSimplify.cpp
+++ b/llvm/lib/Analysis/InstructionSimplify.cpp
@@ -3703,6 +3703,13 @@ static Value *SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
           break;
         }
       }
+
+      // LHS == Inf
+      if (Pred == FCmpInst::FCMP_OEQ && isKnownNeverInfinity(LHS, Q.TLI))
+        return getFalse(RetTy);
+      // LHS != Inf
+      if (Pred == FCmpInst::FCMP_UNE && isKnownNeverInfinity(LHS, Q.TLI))
+        return getTrue(RetTy);
     }
     if (C->isNegative() && !C->isNegZero()) {
       assert(!C->isNaN() && "Unexpected NaN constant!");
diff --git a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
index 4f7334762f536..718a4427e15e3 100644
--- a/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
+++ b/llvm/test/Transforms/InstSimplify/floating-point-compare.ll
@@ -1041,9 +1041,7 @@ define <2 x i1> @unorderedCompareWithNaNVector_undef_elt(<2 x double> %A) {
 
 define i1 @is_infinite(float %x) {
 ; CHECK-LABEL: @is_infinite(
-; CHECK-NEXT:    [[XABS:%.*]] = call ninf float @llvm.fabs.f32(float [[X:%.*]])
-; CHECK-NEXT:    [[R:%.*]] = fcmp oeq float [[XABS]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 false
 ;
   %xabs = call ninf float @llvm.fabs.f32(float %x)
   %r = fcmp oeq float %xabs, 0x7FF0000000000000
@@ -1052,15 +1050,15 @@ define i1 @is_infinite(float %x) {
 
 define <2 x i1> @is_infinite_neg(<2 x float> %x) {
 ; CHECK-LABEL: @is_infinite_neg(
-; CHECK-NEXT:    [[X42:%.*]] = fadd ninf <2 x float> [[X:%.*]], <float 4.200000e+01, float 4.200000e+01>
-; CHECK-NEXT:    [[R:%.*]] = fcmp oeq <2 x float> [[X42]], <float 0xFFF0000000000000, float 0xFFF0000000000000>
-; CHECK-NEXT:    ret <2 x i1> [[R]]
+; CHECK-NEXT:    ret <2 x i1> zeroinitializer
 ;
   %x42 = fadd ninf <2 x float> %x, <float 42.0, float 42.0>
   %r = fcmp oeq <2 x float> %x42, <float 0xFFF0000000000000, float 0xFFF0000000000000>
   ret <2 x i1> %r
 }
 
+; Negative test - but this could be reduced to 'uno' outside of instsimplify.
+
 define i1 @is_infinite_or_nan(float %x) {
 ; CHECK-LABEL: @is_infinite_or_nan(
 ; CHECK-NEXT:    [[X42:%.*]] = fadd ninf float [[X:%.*]], 4.200000e+01
@@ -1074,10 +1072,7 @@ define i1 @is_infinite_or_nan(float %x) {
 
 define i1 @is_finite_or_nan(i1 %c, double %x) {
 ; CHECK-LABEL: @is_finite_or_nan(
-; CHECK-NEXT:    [[XX:%.*]] = fmul ninf double [[X:%.*]], [[X]]
-; CHECK-NEXT:    [[S:%.*]] = select i1 [[C:%.*]], double 4.200000e+01, double [[XX]]
-; CHECK-NEXT:    [[R:%.*]] = fcmp une double [[S]], 0x7FF0000000000000
-; CHECK-NEXT:    ret i1 [[R]]
+; CHECK-NEXT:    ret i1 true
 ;
   %xx = fmul ninf double %x, %x
   %s = select i1 %c, double 42.0, double %xx
@@ -1087,15 +1082,15 @@ define i1 @is_finite_or_nan(i1 %c, double %x) {
 
 define <2 x i1> @is_finite_or_nan_commute(<2 x i8> %x) {
 ; CHECK-LABEL: @is_finite_or_nan_commute(
-; CHECK-NEXT:    [[CAST:%.*]] = uitofp <2 x i8> [[X:%.*]] to <2 x float>
-; CHECK-NEXT:    [[R:%.*]] = fcmp une <2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>, [[CAST]]
-; CHECK-NEXT:    ret <2 x i1> [[R]]
+; CHECK-NEXT:    ret <2 x i1> <i1 true, i1 true>
 ;
   %cast = uitofp <2 x i8> %x to <2 x float>
   %r = fcmp une <2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000>, %cast
   ret <2 x i1> %r
 }
 
+; Negative test - but this could be reduced to 'ord' outside of instsimplify.
+
 define i1 @is_finite_and_ordered(double %x) {
 ; CHECK-LABEL: @is_finite_and_ordered(
 ; CHECK-NEXT:    [[XX:%.*]] = fmul ninf double [[X:%.*]], [[X]]

From fb5577d4f883ba21a6fe048ffd59ca3659cdb491 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sun, 19 Jul 2020 16:09:46 +0300
Subject: [PATCH 748/771] [NFCI][GVN] Make IsValueFullyAvailableInBlock()
 readable - use enum class instead of magic numbers

This does not change any logic, it only wraps the magic 0/1/2/3 constants
into an enum class.
---
 llvm/lib/Transforms/Scalar/GVN.cpp | 65 +++++++++++++++++++-----------
 1 file changed, 41 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Transforms/Scalar/GVN.cpp b/llvm/lib/Transforms/Scalar/GVN.cpp
index b16f8591b5a46..0b416cc4afb86 100644
--- a/llvm/lib/Transforms/Scalar/GVN.cpp
+++ b/llvm/lib/Transforms/Scalar/GVN.cpp
@@ -667,6 +667,19 @@ LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
 }
 #endif
 
+enum class AvaliabilityState : char {
+  /// We know the block *is not* fully available. This is a fixpoint.
+  Unavaliable = 0,
+  /// We know the block *is* fully available. This is a fixpoint.
+  Avaliable = 1,
+  /// We do not know whether the block is fully available or not,
+  /// but we are currently speculating that it will be.
+  SpeculativelyAvaliable = 2,
+  /// We are speculating for this block and have used that
+  /// to speculate for other blocks.
+  SpeculativelyAvaliableAndUsedForSpeculation = 3,
+};
+
 /// Return true if we can prove that the value
 /// we're analyzing is fully available in the specified block.  As we go, keep
 /// track of which blocks we know are fully alive in FullyAvailableBlocks.  This
@@ -677,24 +690,27 @@ LLVM_DUMP_METHOD void GVN::dump(DenseMap<uint32_t, Value*>& d) const {
 ///      currently speculating that it will be.
 ///   3) we are speculating for this block and have used that to speculate for
 ///      other blocks.
-static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
-                            DenseMap<BasicBlock*, char> &FullyAvailableBlocks,
-                            uint32_t RecurseDepth) {
+static bool IsValueFullyAvailableInBlock(
+    BasicBlock *BB,
+    DenseMap<BasicBlock *, AvaliabilityState> &FullyAvailableBlocks,
+    uint32_t RecurseDepth) {
   if (RecurseDepth > MaxRecurseDepth)
     return false;
 
-  // Optimistically assume that the block is fully available and check to see
-  // if we already know about this block in one lookup.
-  std::pair<DenseMap<BasicBlock*, char>::iterator, bool> IV =
-    FullyAvailableBlocks.insert(std::make_pair(BB, 2));
+  // Optimistically assume that the block is speculatively available and check
+  // to see if we already know about this block in one lookup.
+  std::pair<DenseMap<BasicBlock *, AvaliabilityState>::iterator, bool> IV =
+      FullyAvailableBlocks.insert(
+          std::make_pair(BB, AvaliabilityState::SpeculativelyAvaliable));
 
   // If the entry already existed for this block, return the precomputed value.
   if (!IV.second) {
     // If this is a speculative "available" value, mark it as being used for
     // speculation of other blocks.
-    if (IV.first->second == 2)
-      IV.first->second = 3;
-    return IV.first->second != 0;
+    if (IV.first->second == AvaliabilityState::SpeculativelyAvaliable)
+      IV.first->second =
+          AvaliabilityState::SpeculativelyAvaliableAndUsedForSpeculation;
+    return IV.first->second != AvaliabilityState::Unavaliable;
   }
 
   // Otherwise, see if it is fully available in all predecessors.
@@ -717,29 +733,30 @@ static bool IsValueFullyAvailableInBlock(BasicBlock *BB,
 // all, a fully-available block.  We have a problem if we speculated on this and
 // used the speculation to mark other blocks as available.
 SpeculationFailure:
-  char &BBVal = FullyAvailableBlocks[BB];
+  AvaliabilityState &BBVal = FullyAvailableBlocks[BB];
 
-  // If we didn't speculate on this, just return with it set to false.
-  if (BBVal == 2) {
-    BBVal = 0;
+  // If we didn't speculate on this, just return with it set to unavaliable.
+  if (BBVal == AvaliabilityState::SpeculativelyAvaliable) {
+    BBVal = AvaliabilityState::Unavaliable;
     return false;
   }
 
-  // If we did speculate on this value, we could have blocks set to 1 that are
-  // incorrect.  Walk the (transitive) successors of this block and mark them as
-  // 0 if set to one.
+  // If we did speculate on this value, we could have blocks set to
+  // speculatively avaliable that are incorrect. Walk the (transitive)
+  // successors of this block and mark them as unavaliable instead.
   SmallVector<BasicBlock*, 32> BBWorklist;
   BBWorklist.push_back(BB);
 
   do {
     BasicBlock *Entry = BBWorklist.pop_back_val();
-    // Note that this sets blocks to 0 (unavailable) if they happen to not
+    // Note that this sets blocks to unavailable if they happen to not
     // already be in FullyAvailableBlocks.  This is safe.
-    char &EntryVal = FullyAvailableBlocks[Entry];
-    if (EntryVal == 0) continue;  // Already unavailable.
+    AvaliabilityState &EntryVal = FullyAvailableBlocks[Entry];
+    if (EntryVal == AvaliabilityState::Unavaliable)
+      continue; // Already unavailable.
 
     // Mark as unavailable.
-    EntryVal = 0;
+    EntryVal = AvaliabilityState::Unavaliable;
 
     BBWorklist.append(succ_begin(Entry), succ_end(Entry));
   } while (!BBWorklist.empty());
@@ -1107,11 +1124,11 @@ bool GVN::PerformLoadPRE(LoadInst *LI, AvailValInBlkVect &ValuesPerBlock,
   // Check to see how many predecessors have the loaded value fully
   // available.
   MapVector<BasicBlock *, Value *> PredLoads;
-  DenseMap<BasicBlock*, char> FullyAvailableBlocks;
+  DenseMap<BasicBlock *, AvaliabilityState> FullyAvailableBlocks;
   for (const AvailableValueInBlock &AV : ValuesPerBlock)
-    FullyAvailableBlocks[AV.BB] = true;
+    FullyAvailableBlocks[AV.BB] = AvaliabilityState::Avaliable;
   for (BasicBlock *UnavailableBB : UnavailableBlocks)
-    FullyAvailableBlocks[UnavailableBB] = false;
+    FullyAvailableBlocks[UnavailableBB] = AvaliabilityState::Unavaliable;
 
   SmallVector<BasicBlock *, 4> CriticalEdgePred;
   for (BasicBlock *Pred : predecessors(LoadBB)) {

From c6e13667e787b3a72b794422ab506d5403ddcd21 Mon Sep 17 00:00:00 2001
From: Nikita Popov <nikita.ppv@gmail.com>
Date: Thu, 9 Jul 2020 22:52:50 +0200
Subject: [PATCH 749/771] [PredicateInfo] Add a method to interpret predicate
 as cmp constraint

Both users of predicteinfo (NewGVN and SCCP) are interested in
getting a cmp constraint on the predicated value. They currently
implement separate logic for this. This patch adds a common method
for this in PredicateBase.

This enables a missing bit of PredicateInfo handling in SCCP: Now
the predicate on the condition itself is also used. For switches
it means we know that the switched-on value is the same as the case
value. For assumes/branches we know that the condition is true or
false.

Differential Revision: https://reviews.llvm.org/D83640
---
 .../llvm/Transforms/Utils/PredicateInfo.h     | 10 ++
 llvm/lib/Transforms/Scalar/NewGVN.cpp         | 97 +++++--------------
 llvm/lib/Transforms/Scalar/SCCP.cpp           | 53 ++--------
 llvm/lib/Transforms/Utils/PredicateInfo.cpp   | 47 +++++++++
 .../Transforms/SCCP/predicateinfo-cond.ll     | 23 ++---
 5 files changed, 99 insertions(+), 131 deletions(-)

diff --git a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
index cdac4142555db..c922476ac79da 100644
--- a/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
+++ b/llvm/include/llvm/Transforms/Utils/PredicateInfo.h
@@ -70,6 +70,13 @@ class raw_ostream;
 
 enum PredicateType { PT_Branch, PT_Assume, PT_Switch };
 
+/// Constraint for a predicate of the form "cmp Pred Op, OtherOp", where Op
+/// is the value the constraint applies to (the ssa.copy result).
+struct PredicateConstraint {
+  CmpInst::Predicate Predicate;
+  Value *OtherOp;
+};
+
 // Base class for all predicate information we provide.
 // All of our predicate information has at least a comparison.
 class PredicateBase : public ilist_node<PredicateBase> {
@@ -95,6 +102,9 @@ class PredicateBase : public ilist_node<PredicateBase> {
            PB->Type == PT_Switch;
   }
 
+  /// Fetch condition in the form of PredicateConstraint, if possible.
+  Optional<PredicateConstraint> getConstraint() const;
+
 protected:
   PredicateBase(PredicateType PT, Value *Op, Value *Condition)
       : Type(PT), OriginalOp(Op), Condition(Condition) {}
diff --git a/llvm/lib/Transforms/Scalar/NewGVN.cpp b/llvm/lib/Transforms/Scalar/NewGVN.cpp
index 45d01cc1b5845..cfadfbb585b9f 100644
--- a/llvm/lib/Transforms/Scalar/NewGVN.cpp
+++ b/llvm/lib/Transforms/Scalar/NewGVN.cpp
@@ -1539,86 +1539,39 @@ NewGVN::performSymbolicPredicateInfoEvaluation(Instruction *I) const {
 
   LLVM_DEBUG(dbgs() << "Found predicate info from instruction !\n");
 
-  auto *CopyOf = I->getOperand(0);
-  auto *Cond = PI->Condition;
-
-  // If this a copy of the condition, it must be either true or false depending
-  // on the predicate info type and edge.
-  if (CopyOf == Cond) {
-    // We should not need to add predicate users because the predicate info is
-    // already a use of this operand.
-    if (isa<PredicateAssume>(PI))
-      return createConstantExpression(ConstantInt::getTrue(Cond->getType()));
-    if (auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
-      if (PBranch->TrueEdge)
-        return createConstantExpression(ConstantInt::getTrue(Cond->getType()));
-      return createConstantExpression(ConstantInt::getFalse(Cond->getType()));
-    }
-    if (auto *PSwitch = dyn_cast<PredicateSwitch>(PI))
-      return createConstantExpression(cast<Constant>(PSwitch->CaseValue));
-  }
-
-  // Not a copy of the condition, so see what the predicates tell us about this
-  // value.  First, though, we check to make sure the value is actually a copy
-  // of one of the condition operands. It's possible, in certain cases, for it
-  // to be a copy of a predicateinfo copy. In particular, if two branch
-  // operations use the same condition, and one branch dominates the other, we
-  // will end up with a copy of a copy.  This is currently a small deficiency in
-  // predicateinfo.  What will end up happening here is that we will value
-  // number both copies the same anyway.
-
-  // Everything below relies on the condition being a comparison.
-  auto *Cmp = dyn_cast<CmpInst>(Cond);
-  if (!Cmp)
+  const Optional<PredicateConstraint> &Constraint = PI->getConstraint();
+  if (!Constraint)
     return nullptr;
 
-  if (CopyOf != Cmp->getOperand(0) && CopyOf != Cmp->getOperand(1)) {
-    LLVM_DEBUG(dbgs() << "Copy is not of any condition operands!\n");
-    return nullptr;
-  }
-  Value *FirstOp = lookupOperandLeader(Cmp->getOperand(0));
-  Value *SecondOp = lookupOperandLeader(Cmp->getOperand(1));
-  bool SwappedOps = false;
+  CmpInst::Predicate Predicate = Constraint->Predicate;
+  Value *CmpOp0 = I->getOperand(0);
+  Value *CmpOp1 = Constraint->OtherOp;
+
+  Value *FirstOp = lookupOperandLeader(CmpOp0);
+  Value *SecondOp = lookupOperandLeader(CmpOp1);
+  Value *AdditionallyUsedValue = CmpOp0;
+
   // Sort the ops.
   if (shouldSwapOperands(FirstOp, SecondOp)) {
     std::swap(FirstOp, SecondOp);
-    SwappedOps = true;
+    Predicate = CmpInst::getSwappedPredicate(Predicate);
+    AdditionallyUsedValue = CmpOp1;
   }
-  CmpInst::Predicate Predicate =
-      SwappedOps ? Cmp->getSwappedPredicate() : Cmp->getPredicate();
-
-  if (isa<PredicateAssume>(PI)) {
-    // If we assume the operands are equal, then they are equal.
-    if (Predicate == CmpInst::ICMP_EQ) {
-      addPredicateUsers(PI, I);
-      addAdditionalUsers(SwappedOps ? Cmp->getOperand(1) : Cmp->getOperand(0),
-                         I);
-      return createVariableOrConstant(FirstOp);
-    }
+
+  if (Predicate == CmpInst::ICMP_EQ) {
+    addPredicateUsers(PI, I);
+    addAdditionalUsers(AdditionallyUsedValue, I);
+    return createVariableOrConstant(FirstOp);
   }
-  if (const auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
-    // If we are *not* a copy of the comparison, we may equal to the other
-    // operand when the predicate implies something about equality of
-    // operations.  In particular, if the comparison is true/false when the
-    // operands are equal, and we are on the right edge, we know this operation
-    // is equal to something.
-    if ((PBranch->TrueEdge && Predicate == CmpInst::ICMP_EQ) ||
-        (!PBranch->TrueEdge && Predicate == CmpInst::ICMP_NE)) {
-      addPredicateUsers(PI, I);
-      addAdditionalUsers(SwappedOps ? Cmp->getOperand(1) : Cmp->getOperand(0),
-                         I);
-      return createVariableOrConstant(FirstOp);
-    }
-    // Handle the special case of floating point.
-    if (((PBranch->TrueEdge && Predicate == CmpInst::FCMP_OEQ) ||
-         (!PBranch->TrueEdge && Predicate == CmpInst::FCMP_UNE)) &&
-        isa<ConstantFP>(FirstOp) && !cast<ConstantFP>(FirstOp)->isZero()) {
-      addPredicateUsers(PI, I);
-      addAdditionalUsers(SwappedOps ? Cmp->getOperand(1) : Cmp->getOperand(0),
-                         I);
-      return createConstantExpression(cast<Constant>(FirstOp));
-    }
+
+  // Handle the special case of floating point.
+  if (Predicate == CmpInst::FCMP_OEQ && isa<ConstantFP>(FirstOp) &&
+      !cast<ConstantFP>(FirstOp)->isZero()) {
+    addPredicateUsers(PI, I);
+    addAdditionalUsers(AdditionallyUsedValue, I);
+    return createConstantExpression(cast<Constant>(FirstOp));
   }
+
   return nullptr;
 }
 
diff --git a/llvm/lib/Transforms/Scalar/SCCP.cpp b/llvm/lib/Transforms/Scalar/SCCP.cpp
index 2a5fcfc092685..11ac7d7e15847 100644
--- a/llvm/lib/Transforms/Scalar/SCCP.cpp
+++ b/llvm/lib/Transforms/Scalar/SCCP.cpp
@@ -1262,55 +1262,22 @@ void SCCPSolver::handleCallResult(CallBase &CB) {
       auto *PI = getPredicateInfoFor(&CB);
       assert(PI && "Missing predicate info for ssa.copy");
 
-      CmpInst *Cmp;
-      bool TrueEdge;
-      if (auto *PBranch = dyn_cast<PredicateBranch>(PI)) {
-        Cmp = dyn_cast<CmpInst>(PBranch->Condition);
-        TrueEdge = PBranch->TrueEdge;
-      } else if (auto *PAssume = dyn_cast<PredicateAssume>(PI)) {
-        Cmp = dyn_cast<CmpInst>(PAssume->Condition);
-        TrueEdge = true;
-      } else {
+      const Optional<PredicateConstraint> &Constraint = PI->getConstraint();
+      if (!Constraint) {
         mergeInValue(ValueState[&CB], &CB, CopyOfVal);
         return;
       }
 
-      // Everything below relies on the condition being a comparison.
-      if (!Cmp) {
-        mergeInValue(ValueState[&CB], &CB, CopyOfVal);
-        return;
-      }
+      CmpInst::Predicate Pred = Constraint->Predicate;
+      Value *OtherOp = Constraint->OtherOp;
 
-      Value *RenamedOp = PI->RenamedOp;
-      Value *CmpOp0 = Cmp->getOperand(0);
-      Value *CmpOp1 = Cmp->getOperand(1);
-      // Bail out if neither of the operands matches RenamedOp.
-      if (CmpOp0 != RenamedOp && CmpOp1 != RenamedOp) {
-        mergeInValue(ValueState[&CB], &CB, getValueState(CopyOf));
+      // Wait until OtherOp is resolved.
+      if (getValueState(OtherOp).isUnknown()) {
+        addAdditionalUser(OtherOp, &CB);
         return;
       }
 
-      auto Pred = Cmp->getPredicate();
-      if (CmpOp1 == RenamedOp) {
-        std::swap(CmpOp0, CmpOp1);
-        Pred = Cmp->getSwappedPredicate();
-      }
-
-      // Wait until CmpOp1 is resolved.
-      if (getValueState(CmpOp1).isUnknown()) {
-        addAdditionalUser(CmpOp1, &CB);
-        return;
-      }
-
-      // The code below relies on PredicateInfo only inserting copies for the
-      // true branch when the branch condition is an AND and only inserting
-      // copies for the false branch when the branch condition is an OR. This
-      // ensures we can intersect the range from the condition with the range of
-      // CopyOf.
-      if (!TrueEdge)
-        Pred = CmpInst::getInversePredicate(Pred);
-
-      ValueLatticeElement CondVal = getValueState(CmpOp1);
+      ValueLatticeElement CondVal = getValueState(OtherOp);
       ValueLatticeElement &IV = ValueState[&CB];
       if (CondVal.isConstantRange() || CopyOfVal.isConstantRange()) {
         auto ImposedCR =
@@ -1334,7 +1301,7 @@ void SCCPSolver::handleCallResult(CallBase &CB) {
         if (!CopyOfCR.contains(NewCR) && CopyOfCR.getSingleMissingElement())
           NewCR = CopyOfCR;
 
-        addAdditionalUser(CmpOp1, &CB);
+        addAdditionalUser(OtherOp, &CB);
         // TODO: Actually filp MayIncludeUndef for the created range to false,
         // once most places in the optimizer respect the branches on
         // undef/poison are UB rule. The reason why the new range cannot be
@@ -1351,7 +1318,7 @@ void SCCPSolver::handleCallResult(CallBase &CB) {
       } else if (Pred == CmpInst::ICMP_EQ && CondVal.isConstant()) {
         // For non-integer values or integer constant expressions, only
         // propagate equal constants.
-        addAdditionalUser(CmpOp1, &CB);
+        addAdditionalUser(OtherOp, &CB);
         mergeInValue(IV, &CB, CondVal);
         return;
       }
diff --git a/llvm/lib/Transforms/Utils/PredicateInfo.cpp b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
index 99b64a7462f62..280d3a996d508 100644
--- a/llvm/lib/Transforms/Utils/PredicateInfo.cpp
+++ b/llvm/lib/Transforms/Utils/PredicateInfo.cpp
@@ -822,6 +822,53 @@ PredicateInfo::~PredicateInfo() {
   }
 }
 
+Optional<PredicateConstraint> PredicateBase::getConstraint() const {
+  switch (Type) {
+  case PT_Assume:
+  case PT_Branch: {
+    bool TrueEdge = true;
+    if (auto *PBranch = dyn_cast<PredicateBranch>(this))
+      TrueEdge = PBranch->TrueEdge;
+
+    if (Condition == RenamedOp) {
+      return {{CmpInst::ICMP_EQ,
+               TrueEdge ? ConstantInt::getTrue(Condition->getType())
+                        : ConstantInt::getFalse(Condition->getType())}};
+    }
+
+    CmpInst *Cmp = dyn_cast<CmpInst>(Condition);
+    assert(Cmp && "Condition should be a CmpInst");
+
+    CmpInst::Predicate Pred;
+    Value *OtherOp;
+    if (Cmp->getOperand(0) == RenamedOp) {
+      Pred = Cmp->getPredicate();
+      OtherOp = Cmp->getOperand(1);
+    } else if (Cmp->getOperand(1) == RenamedOp) {
+      Pred = Cmp->getSwappedPredicate();
+      OtherOp = Cmp->getOperand(0);
+    } else {
+      // TODO: Make this an assertion once RenamedOp is fully accurate.
+      return None;
+    }
+
+    // Invert predicate along false edge.
+    if (!TrueEdge)
+      Pred = CmpInst::getInversePredicate(Pred);
+
+    return {{Pred, OtherOp}};
+  }
+  case PT_Switch:
+    if (Condition != RenamedOp) {
+      // TODO: Make this an assertion once RenamedOp is fully accurate.
+      return None;
+    }
+
+    return {{CmpInst::ICMP_EQ, cast<PredicateSwitch>(this)->CaseValue}};
+  }
+  llvm_unreachable("Unknown predicate type");
+}
+
 void PredicateInfo::verifyPredicateInfo() const {}
 
 char PredicateInfoPrinterLegacyPass::ID = 0;
diff --git a/llvm/test/Transforms/SCCP/predicateinfo-cond.ll b/llvm/test/Transforms/SCCP/predicateinfo-cond.ll
index d8528918babed..d98b3cc76d928 100644
--- a/llvm/test/Transforms/SCCP/predicateinfo-cond.ll
+++ b/llvm/test/Transforms/SCCP/predicateinfo-cond.ll
@@ -11,16 +11,13 @@ define i32 @switch(i32 %x) {
 ; CHECK-NEXT:    i32 2, label [[CASE_2:%.*]]
 ; CHECK-NEXT:    ]
 ; CHECK:       case.0:
-; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[X]], 1
 ; CHECK-NEXT:    br label [[END:%.*]]
 ; CHECK:       case.2:
-; CHECK-NEXT:    [[SUB:%.*]] = sub i32 [[X]], 1
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       case.default:
 ; CHECK-NEXT:    br label [[END]]
 ; CHECK:       end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ [[ADD]], [[CASE_0]] ], [ [[SUB]], [[CASE_2]] ], [ 1, [[CASE_DEFAULT]] ]
-; CHECK-NEXT:    ret i32 [[PHI]]
+; CHECK-NEXT:    ret i32 1
 ;
   switch i32 %x, label %case.default [
   i32 0, label %case.0
@@ -47,7 +44,7 @@ define i1 @assume(i32 %x) {
 ; CHECK-LABEL: @assume(
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0
 ; CHECK-NEXT:    call void @llvm.assume(i1 [[CMP]])
-; CHECK-NEXT:    ret i1 [[CMP]]
+; CHECK-NEXT:    ret i1 true
 ;
   %cmp = icmp sge i32 %x, 0
   call void @llvm.assume(i1 %cmp)
@@ -59,23 +56,17 @@ define i32 @branch(i32 %x) {
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp sge i32 [[X:%.*]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN1:%.*]], label [[IF_THEN2:%.*]]
 ; CHECK:       if.then1:
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF2_THEN1:%.*]], label [[IF2_THEN2:%.*]]
+; CHECK-NEXT:    br label [[IF2_THEN1:%.*]]
 ; CHECK:       if2.then1:
 ; CHECK-NEXT:    br label [[IF2_END:%.*]]
-; CHECK:       if2.then2:
-; CHECK-NEXT:    br label [[IF2_END]]
 ; CHECK:       if2.end:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i32 [ 0, [[IF2_THEN1]] ], [ 1, [[IF2_THEN2]] ]
-; CHECK-NEXT:    ret i32 [[PHI]]
+; CHECK-NEXT:    ret i32 0
 ; CHECK:       if.then2:
-; CHECK-NEXT:    br i1 [[CMP]], label [[IF3_THEN1:%.*]], label [[IF3_THEN2:%.*]]
-; CHECK:       if3.then1:
-; CHECK-NEXT:    br label [[IF3_END:%.*]]
+; CHECK-NEXT:    br label [[IF3_THEN2:%.*]]
 ; CHECK:       if3.then2:
-; CHECK-NEXT:    br label [[IF3_END]]
+; CHECK-NEXT:    br label [[IF3_END:%.*]]
 ; CHECK:       if3.end:
-; CHECK-NEXT:    [[PHI2:%.*]] = phi i32 [ 0, [[IF3_THEN1]] ], [ 1, [[IF3_THEN2]] ]
-; CHECK-NEXT:    ret i32 [[PHI2]]
+; CHECK-NEXT:    ret i32 1
 ;
   %cmp = icmp sge i32 %x, 0
   br i1 %cmp, label %if.then1, label %if.then2

From 2f3862eb9f21e8a0d48505637fefe6e5e295c18c Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Sun, 19 Jul 2020 16:37:03 +0300
Subject: [PATCH 750/771] Reland "[InstCombine] Lower infinite combine loop
 detection thresholds"

This reverts commit 4500db8c59621a31c622862a2946457fdee481ce,
which was reverted because lower thresholds exposed a new issue (PR46680).

Now that it was resolved by d12ec0f752e7f2c7f7252539da2d124264ec33f7,
we can reinstate lower limits and wait for a new bugreport before
reverting this again...
---
 llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index b3254c10a0b2b..8eac8637cb9e7 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -123,8 +123,13 @@ STATISTIC(NumReassoc  , "Number of reassociations");
 DEBUG_COUNTER(VisitCounter, "instcombine-visit",
               "Controls which instructions are visited");
 
+// FIXME: these limits eventually should be as low as 2.
 static constexpr unsigned InstCombineDefaultMaxIterations = 1000;
+#ifndef NDEBUG
+static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 100;
+#else
 static constexpr unsigned InstCombineDefaultInfiniteLoopThreshold = 1000;
+#endif
 
 static cl::opt<bool>
 EnableCodeSinking("instcombine-code-sinking", cl::desc("Enable code sinking"),

From 50afa18772daca0b6de253a7c5311c81b0a46682 Mon Sep 17 00:00:00 2001
From: Sanjay Patel <spatel@rotateright.com>
Date: Sun, 19 Jul 2020 10:03:55 -0400
Subject: [PATCH 751/771] [x86] split FMA with fast-math-flags to avoid libcall

fma reassoc A, B, C --> fadd (fmul A, B), C (when target has no FMA hardware)

C/C++ code may use explicit fma() calls (which become LLVM fma
intrinsics in IR) but then gets compiled with -ffast-math or similar.
For targets that do not have FMA hardware, we don't want to go out to
the math library for a precise but slow FMA result.

I tried this as a generic DAGCombine, but it caused infinite looping
on more than 1 other target, so there's likely some over-reaching fma
formation happening.

There's also a potential intersection of strict FP with fast-math here.
Deferring to current behavior for that case (assuming that strict-ness
overrides fast-ness).

Differential Revision: https://reviews.llvm.org/D83981
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 17 +++--
 llvm/test/CodeGen/X86/fma.ll            | 92 ++++++-------------------
 2 files changed, 33 insertions(+), 76 deletions(-)

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ea4b4734225d4..bb32a17bcc122 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -46131,14 +46131,23 @@ static SDValue combineFMA(SDNode *N, SelectionDAG &DAG,
   if (!TLI.isTypeLegal(VT))
     return SDValue();
 
-  EVT ScalarVT = VT.getScalarType();
-  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
-    return SDValue();
-
   SDValue A = N->getOperand(IsStrict ? 1 : 0);
   SDValue B = N->getOperand(IsStrict ? 2 : 1);
   SDValue C = N->getOperand(IsStrict ? 3 : 2);
 
+  // If the operation allows fast-math and the target does not support FMA,
+  // split this into mul+add to avoid libcall(s).
+  SDNodeFlags Flags = N->getFlags();
+  if (!IsStrict && Flags.hasAllowReassociation() &&
+      TLI.isOperationExpand(ISD::FMA, VT)) {
+    SDValue Fmul = DAG.getNode(ISD::FMUL, dl, VT, A, B, Flags);
+    return DAG.getNode(ISD::FADD, dl, VT, Fmul, C, Flags);
+  }
+
+  EVT ScalarVT = VT.getScalarType();
+  if ((ScalarVT != MVT::f32 && ScalarVT != MVT::f64) || !Subtarget.hasAnyFMA())
+    return SDValue();
+
   auto invertIfNegative = [&DAG, &TLI, &DCI](SDValue &V) {
     bool CodeSize = DAG.getMachineFunction().getFunction().hasOptSize();
     bool LegalOperations = !DCI.isBeforeLegalizeOps();
diff --git a/llvm/test/CodeGen/X86/fma.ll b/llvm/test/CodeGen/X86/fma.ll
index 01b80c2dfdec1..91ba1c8891409 100644
--- a/llvm/test/CodeGen/X86/fma.ll
+++ b/llvm/test/CodeGen/X86/fma.ll
@@ -73,9 +73,15 @@ define float @test_f32_reassoc(float %a, float %b, float %c) #0 {
 ;
 ; FMACALL32-LABEL: test_f32_reassoc:
 ; FMACALL32:       ## %bb.0:
-; FMACALL32-NEXT:    jmp _fmaf ## TAILCALL
-; FMACALL32-NEXT:    ## encoding: [0xeb,A]
-; FMACALL32-NEXT:    ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1
+; FMACALL32-NEXT:    pushl %eax ## encoding: [0x50]
+; FMACALL32-NEXT:    vmovss {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfa,0x10,0x44,0x24,0x08]
+; FMACALL32-NEXT:    ## xmm0 = mem[0],zero,zero,zero
+; FMACALL32-NEXT:    vmulss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x59,0x44,0x24,0x0c]
+; FMACALL32-NEXT:    vaddss {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xfa,0x58,0x44,0x24,0x10]
+; FMACALL32-NEXT:    vmovss %xmm0, (%esp) ## encoding: [0xc5,0xfa,0x11,0x04,0x24]
+; FMACALL32-NEXT:    flds (%esp) ## encoding: [0xd9,0x04,0x24]
+; FMACALL32-NEXT:    popl %eax ## encoding: [0x58]
+; FMACALL32-NEXT:    retl ## encoding: [0xc3]
 ;
 ; FMA64-LABEL: test_f32_reassoc:
 ; FMA64:       ## %bb.0:
@@ -85,9 +91,9 @@ define float @test_f32_reassoc(float %a, float %b, float %c) #0 {
 ;
 ; FMACALL64-LABEL: test_f32_reassoc:
 ; FMACALL64:       ## %bb.0:
-; FMACALL64-NEXT:    jmp _fmaf ## TAILCALL
-; FMACALL64-NEXT:    ## encoding: [0xeb,A]
-; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fmaf-1, kind: FK_PCRel_1
+; FMACALL64-NEXT:    mulss %xmm1, %xmm0 ## encoding: [0xf3,0x0f,0x59,0xc1]
+; FMACALL64-NEXT:    addss %xmm2, %xmm0 ## encoding: [0xf3,0x0f,0x58,0xc2]
+; FMACALL64-NEXT:    retq ## encoding: [0xc3]
 ;
 ; AVX512-LABEL: test_f32_reassoc:
 ; AVX512:       ## %bb.0:
@@ -1523,6 +1529,12 @@ define <2 x double> @test_v2f64_reassoc(<2 x double> %a, <2 x double> %b, <2 x d
 ; FMA32-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
 ; FMA32-NEXT:    retl ## encoding: [0xc3]
 ;
+; FMACALL32-LABEL: test_v2f64_reassoc:
+; FMACALL32:       ## %bb.0:
+; FMACALL32-NEXT:    vmulpd %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x59,0xc1]
+; FMACALL32-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0x58,0xc2]
+; FMACALL32-NEXT:    retl ## encoding: [0xc3]
+;
 ; FMA64-LABEL: test_v2f64_reassoc:
 ; FMA64:       ## %bb.0:
 ; FMA64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
@@ -1531,37 +1543,8 @@ define <2 x double> @test_v2f64_reassoc(<2 x double> %a, <2 x double> %b, <2 x d
 ;
 ; FMACALL64-LABEL: test_v2f64_reassoc:
 ; FMACALL64:       ## %bb.0:
-; FMACALL64-NEXT:    subq $72, %rsp ## encoding: [0x48,0x83,0xec,0x48]
-; FMACALL64-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
-; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x54,0x24,0x20]
-; FMACALL64-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
-; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x4c,0x24,0x10]
-; FMACALL64-NEXT:    movaps %xmm0, (%rsp) ## 16-byte Spill
-; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x04,0x24]
-; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
-; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
-; FMACALL64-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 16-byte Spill
-; FMACALL64-NEXT:    ## encoding: [0x0f,0x29,0x44,0x24,0x30]
-; FMACALL64-NEXT:    movaps (%rsp), %xmm0 ## 16-byte Reload
-; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x04,0x24]
-; FMACALL64-NEXT:    movhlps %xmm0, %xmm0 ## encoding: [0x0f,0x12,0xc0]
-; FMACALL64-NEXT:    ## xmm0 = xmm0[1,1]
-; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
-; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x10]
-; FMACALL64-NEXT:    movhlps %xmm1, %xmm1 ## encoding: [0x0f,0x12,0xc9]
-; FMACALL64-NEXT:    ## xmm1 = xmm1[1,1]
-; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 ## 16-byte Reload
-; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x54,0x24,0x20]
-; FMACALL64-NEXT:    movhlps %xmm2, %xmm2 ## encoding: [0x0f,0x12,0xd2]
-; FMACALL64-NEXT:    ## xmm2 = xmm2[1,1]
-; FMACALL64-NEXT:    callq _fma ## encoding: [0xe8,A,A,A,A]
-; FMACALL64-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: reloc_branch_4byte_pcrel
-; FMACALL64-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 ## 16-byte Reload
-; FMACALL64-NEXT:    ## encoding: [0x0f,0x28,0x4c,0x24,0x30]
-; FMACALL64-NEXT:    movlhps %xmm0, %xmm1 ## encoding: [0x0f,0x16,0xc8]
-; FMACALL64-NEXT:    ## xmm1 = xmm1[0],xmm0[0]
-; FMACALL64-NEXT:    movaps %xmm1, %xmm0 ## encoding: [0x0f,0x28,0xc1]
-; FMACALL64-NEXT:    addq $72, %rsp ## encoding: [0x48,0x83,0xc4,0x48]
+; FMACALL64-NEXT:    mulpd %xmm1, %xmm0 ## encoding: [0x66,0x0f,0x59,0xc1]
+; FMACALL64-NEXT:    addpd %xmm2, %xmm0 ## encoding: [0x66,0x0f,0x58,0xc2]
 ; FMACALL64-NEXT:    retq ## encoding: [0xc3]
 ;
 ; AVX512-LABEL: test_v2f64_reassoc:
@@ -1575,41 +1558,6 @@ define <2 x double> @test_v2f64_reassoc(<2 x double> %a, <2 x double> %b, <2 x d
 ; AVX512VL-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
 ; AVX512VL-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
 ; AVX512VL-NEXT:    retq ## encoding: [0xc3]
-;
-; FMACALL32_BDVER2-LABEL: test_v2f64_reassoc:
-; FMACALL32_BDVER2:       ## %bb.0:
-; FMACALL32_BDVER2-NEXT:    subl $108, %esp ## encoding: [0x83,0xec,0x6c]
-; FMACALL32_BDVER2-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
-; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x44,0x24,0x50]
-; FMACALL32_BDVER2-NEXT:    vmovlhps %xmm1, %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0xc1]
-; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0],xmm1[0]
-; FMACALL32_BDVER2-NEXT:    vmovaps %xmm2, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
-; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x54,0x24,0x30]
-; FMACALL32_BDVER2-NEXT:    vmovaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) ## 16-byte Spill
-; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x29,0x4c,0x24,0x40]
-; FMACALL32_BDVER2-NEXT:    vmovlps %xmm2, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x13,0x54,0x24,0x10]
-; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
-; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
-; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
-; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
-; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x30]
-; FMACALL32_BDVER2-NEXT:    vmovhps %xmm0, {{[0-9]+}}(%esp) ## encoding: [0xc5,0xf8,0x17,0x44,0x24,0x10]
-; FMACALL32_BDVER2-NEXT:    vmovaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 ## 16-byte Reload
-; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x28,0x44,0x24,0x40]
-; FMACALL32_BDVER2-NEXT:    vmovlps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0, %xmm0 ## 16-byte Folded Reload
-; FMACALL32_BDVER2-NEXT:    ## encoding: [0xc5,0xf8,0x12,0x44,0x24,0x58]
-; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0,1],xmm0[2,3]
-; FMACALL32_BDVER2-NEXT:    vmovups %xmm0, (%esp) ## encoding: [0xc5,0xf8,0x11,0x04,0x24]
-; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x28]
-; FMACALL32_BDVER2-NEXT:    calll _fma ## encoding: [0xe8,A,A,A,A]
-; FMACALL32_BDVER2-NEXT:    ## fixup A - offset: 1, value: _fma-4, kind: FK_PCRel_4
-; FMACALL32_BDVER2-NEXT:    fstpl {{[0-9]+}}(%esp) ## encoding: [0xdd,0x5c,0x24,0x20]
-; FMACALL32_BDVER2-NEXT:    vmovsd {{[0-9]+}}(%esp), %xmm0 ## encoding: [0xc5,0xfb,0x10,0x44,0x24,0x28]
-; FMACALL32_BDVER2-NEXT:    ## xmm0 = mem[0],zero
-; FMACALL32_BDVER2-NEXT:    vmovhps {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0xc5,0xf8,0x16,0x44,0x24,0x20]
-; FMACALL32_BDVER2-NEXT:    ## xmm0 = xmm0[0,1],mem[0,1]
-; FMACALL32_BDVER2-NEXT:    addl $108, %esp ## encoding: [0x83,0xc4,0x6c]
-; FMACALL32_BDVER2-NEXT:    retl ## encoding: [0xc3]
   %call = call reassoc <2 x double> @llvm.fma.v2f64(<2 x double> %a, <2 x double> %b, <2 x double> %c)
   ret <2 x double> %call
 }

From 7dfff42f019583076c55993d30a63889613922dd Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Sun, 19 Jul 2020 10:50:51 -0400
Subject: [PATCH 752/771] Silencing some 'logical operation on address of
 string constant diagnostics; NFC

---
 clang-tools-extra/clangd/CompileCommands.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp
index f6210a43b34eb..6df29bd823196 100644
--- a/clang-tools-extra/clangd/CompileCommands.cpp
+++ b/clang-tools-extra/clangd/CompileCommands.cpp
@@ -354,7 +354,7 @@ llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
 #define PREFIX(NAME, VALUE) static const char *const NAME[] = VALUE;
 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
                HELP, METAVAR, VALUES)                                          \
-  if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && ALIASARGS == nullptr)  \
+  if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && !ALIASARGS)            \
     AddAlias(DriverID::OPT_##ID, DriverID::OPT_##ALIAS);                       \
   Prefixes[DriverID::OPT_##ID] = PREFIX;
 #include "clang/Driver/Options.inc"

From b79ca34530e88c3a13a9c7afb1df42d7a622e3dd Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Sun, 19 Jul 2020 11:12:39 -0400
Subject: [PATCH 753/771] Revert 7dfff42f019583076c55993d30a63889613922dd as it
 broke non-Windows builds.

---
 clang-tools-extra/clangd/CompileCommands.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp
index 6df29bd823196..f6210a43b34eb 100644
--- a/clang-tools-extra/clangd/CompileCommands.cpp
+++ b/clang-tools-extra/clangd/CompileCommands.cpp
@@ -354,7 +354,7 @@ llvm::ArrayRef<ArgStripper::Rule> ArgStripper::rulesFor(llvm::StringRef Arg) {
 #define PREFIX(NAME, VALUE) static const char *const NAME[] = VALUE;
 #define OPTION(PREFIX, NAME, ID, KIND, GROUP, ALIAS, ALIASARGS, FLAGS, PARAM,  \
                HELP, METAVAR, VALUES)                                          \
-  if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && !ALIASARGS)            \
+  if (DriverID::OPT_##ALIAS != DriverID::OPT_INVALID && ALIASARGS == nullptr)  \
     AddAlias(DriverID::OPT_##ID, DriverID::OPT_##ALIAS);                       \
   Prefixes[DriverID::OPT_##ID] = PREFIX;
 #include "clang/Driver/Options.inc"

From 97914164f8454e745219566d58479b5762cccd51 Mon Sep 17 00:00:00 2001
From: Aaron Ballman <aaron@aaronballman.com>
Date: Sun, 19 Jul 2020 11:19:48 -0400
Subject: [PATCH 754/771] Silence a "logical operation on address of string
 constant" via CMake instead.

---
 clang-tools-extra/clangd/CMakeLists.txt | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/clang-tools-extra/clangd/CMakeLists.txt b/clang-tools-extra/clangd/CMakeLists.txt
index b3002b1d56981..8db6656e5291a 100644
--- a/clang-tools-extra/clangd/CMakeLists.txt
+++ b/clang-tools-extra/clangd/CMakeLists.txt
@@ -28,6 +28,10 @@ set(LLVM_LINK_COMPONENTS
   Option
   )
 
+if(MSVC AND NOT CLANG_CL)
+ set_source_files_properties(CompileCommands.cpp PROPERTIES COMPILE_FLAGS -wd4130) # disables C4130: logical operation on address of string constant
+endif()
+
 add_clang_library(clangDaemon
   AST.cpp
   ClangdLSPServer.cpp

From 2d6ecfa168c2d36ac88efc854f19b05d1c540ded Mon Sep 17 00:00:00 2001
From: Wenlei He <aktoon@gmail.com>
Date: Fri, 17 Jul 2020 16:35:56 -0700
Subject: [PATCH 755/771] [InlineAdvisor] New inliner advisor to replay
 inlining from optimization remarks

Summary:
This change added a new inline advisor that takes optimization remarks from previous inlining as input, and provides the decision as advice so current inlining can replay inline decisions of a different compilation. Dwarf inline stack with line and discriminator is used as anchor for call sites including call context. The change can be useful for Inliner tuning as it provides a channel to allow external input for tweaking inline decisions. Existing alternatives like alwaysinline attribute is per-function, not per-callsite. Per-callsite inline intrinsic can be another solution (not yet existing), but it's intrusive to implement and also does not differentiate call context.

A switch -sample-profile-inline-replay=<inline_remarks_file> is added to hook up the new inline advisor with SampleProfileLoader's inline decision for replay. Since SampleProfileLoader does top-down inlining, inline decision can be specialized for each call context, hence we should be able to replay inlining accurately. However with a bottom-up inliner like CGSCC inlining, the replay can be limited due to lack of specialization for different call context. Apart from that limitation, the new inline advisor can still be used by regular CGSCC inliner later if needed for tuning purpose.

Subscribers: mgorny, aprantl, hiraditya, llvm-commits

Tags: #llvm

Resubmit for https://reviews.llvm.org/D84086
---
 llvm/include/llvm/Analysis/InlineAdvisor.h    |   3 +
 .../llvm/Analysis/ReplayInlineAdvisor.h       |  37 ++++++
 llvm/lib/Analysis/CMakeLists.txt              |   1 +
 llvm/lib/Analysis/InlineAdvisor.cpp           |  25 ++++
 llvm/lib/Analysis/ReplayInlineAdvisor.cpp     |  61 +++++++++
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |  37 +++++-
 .../SampleProfile/Inputs/inline-replay.txt    |   2 +
 .../Transforms/SampleProfile/inline-replay.ll | 122 ++++++++++++++++++
 8 files changed, 284 insertions(+), 4 deletions(-)
 create mode 100644 llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
 create mode 100644 llvm/lib/Analysis/ReplayInlineAdvisor.cpp
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
 create mode 100644 llvm/test/Transforms/SampleProfile/inline-replay.ll

diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 3480d93385a8e..991c1455cc033 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -226,6 +226,9 @@ void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
                      bool ForProfileContext = false,
                      const char *PassName = nullptr);
 
+/// get call site location as string
+std::string getCallSiteLocation(DebugLoc DLoc);
+
 /// Add location info to ORE message.
 void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
 
diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
new file mode 100644
index 0000000000000..e312d59a9f87b
--- /dev/null
+++ b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
@@ -0,0 +1,37 @@
+//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+#ifndef LLVM_REPLAYINLINEADVISOR_H_
+#define LLVM_REPLAYINLINEADVISOR_H_
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/IR/LLVMContext.h"
+
+namespace llvm {
+class BasicBlock;
+class CallBase;
+class Function;
+class Module;
+class OptimizationRemarkEmitter;
+
+/// Replay inline advisor that uses optimization remarks from inlining of
+/// previous build to guide current inlining. This is useful for inliner tuning.
+class ReplayInlineAdvisor : public InlineAdvisor {
+public:
+  ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context,
+                      StringRef RemarksFile);
+  std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
+  bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
+
+private:
+  StringSet<> InlineSitesFromRemarks;
+  bool HasReplayRemarks = false;
+};
+} // namespace llvm
+#endif // LLVM_REPLAYINLINEADVISOR_H_
\ No newline at end of file
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 703623396d96a..8f10bac588e52 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -117,6 +117,7 @@ add_llvm_component_library(LLVMAnalysis
   RegionInfo.cpp
   RegionPass.cpp
   RegionPrinter.cpp
+  ReplayInlineAdvisor.cpp
   ScalarEvolution.cpp
   ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionDivision.cpp
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index e18f681278d3a..497363a5cdf63 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -365,6 +365,31 @@ llvm::shouldInline(CallBase &CB,
   return IC;
 }
 
+std::string llvm::getCallSiteLocation(DebugLoc DLoc) {
+  std::ostringstream CallSiteLoc;
+  bool First = true;
+  for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) {
+    if (!First)
+      CallSiteLoc << " @ ";
+    // Note that negative line offset is actually possible, but we use
+    // unsigned int to match line offset representation in remarks so
+    // it's directly consumable by relay advisor.
+    uint32_t Offset =
+        DIL->getLine() - DIL->getScope()->getSubprogram()->getLine();
+    uint32_t Discriminator = DIL->getBaseDiscriminator();
+    StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
+    if (Name.empty())
+      Name = DIL->getScope()->getSubprogram()->getName();
+    CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset);
+    if (Discriminator) {
+      CallSiteLoc << "." << llvm::utostr(Discriminator);
+    }
+    First = false;
+  }
+
+  return CallSiteLoc.str();
+}
+
 void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
   if (!DLoc.get())
     return;
diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
new file mode 100644
index 0000000000000..c12b58021a606
--- /dev/null
+++ b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
@@ -0,0 +1,61 @@
+//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements ReplayInlineAdvisor that replays inline decision based
+// on previous inline remarks from optimization remark log.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InlineAdvisor.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
+#include "llvm/IR/DebugInfoMetadata.h"
+#include "llvm/IR/Instructions.h"
+#include "llvm/Support/LineIterator.h"
+
+using namespace llvm;
+
+#define DEBUG_TYPE "inline-replay"
+
+ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM,
+                                         LLVMContext &Context,
+                                         StringRef RemarksFile)
+    : InlineAdvisor(FAM), HasReplayRemarks(false) {
+  auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
+  std::error_code EC = BufferOrErr.getError();
+  if (EC) {
+    Context.emitError("Could not open remarks file: " + EC.message());
+    return;
+  }
+
+  // Example for inline remarks to parse:
+  //   _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1
+  // We use the callsite string after `at callsite` to replay inlining.
+  line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
+  for (; !LineIt.is_at_eof(); ++LineIt) {
+    StringRef Line = *LineIt;
+    auto Pair = Line.split(" at callsite ");
+    if (Pair.second.empty())
+      continue;
+    InlineSitesFromRemarks.insert(Pair.second);
+  }
+  HasReplayRemarks = true;
+}
+
+std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdvice(CallBase &CB) {
+  assert(HasReplayRemarks);
+
+  Function &Caller = *CB.getCaller();
+  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
+
+  if (InlineSitesFromRemarks.empty())
+    return std::make_unique<InlineAdvice>(this, CB, ORE, false);
+
+  StringRef CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
+  bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc) > 0;
+  return std::make_unique<InlineAdvice>(this, CB, ORE, InlineRecommended);
+}
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index b6871e260532d..7b5fc030cf88c 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -43,6 +43,7 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
+#include "llvm/Analysis/ReplayInlineAdvisor.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
@@ -170,6 +171,13 @@ static cl::opt<int> SampleColdCallSiteThreshold(
     "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
     cl::desc("Threshold for inlining cold callsites"));
 
+static cl::opt<std::string> ProfileInlineReplayFile(
+    "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
+    cl::desc(
+        "Optimization remarks file containing inline remarks to be replayed "
+        "by inlining from sample profile loader."),
+    cl::Hidden);
+
 namespace {
 
 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -319,7 +327,7 @@ class SampleProfileLoader {
         RemappingFilename(std::string(RemapName)),
         IsThinLTOPreLink(IsThinLTOPreLink) {}
 
-  bool doInitialization(Module &M);
+  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM,
                    ProfileSummaryInfo *_PSI, CallGraph *CG);
 
@@ -473,6 +481,9 @@ class SampleProfileLoader {
   // overriden by -profile-sample-accurate or profile-sample-accurate
   // attribute.
   bool ProfAccForSymsInList;
+
+  // External inline advisor used to replay inline decision from remarks.
+  std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
 };
 
 class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -898,6 +909,16 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
 }
 
 bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
+  if (ExternalInlineAdvisor) {
+    auto Advice = ExternalInlineAdvisor->getAdvice(CB);
+    if (!Advice->isInliningRecommended()) {
+      Advice->recordUnattemptedInlining();
+      return false;
+    }
+    // Dummy record, we don't use it for replay.
+    Advice->recordInlining();
+  }
+
   Function *CalledFunction = CB.getCalledFunction();
   assert(CalledFunction);
   DebugLoc DLoc = CB.getDebugLoc();
@@ -1005,7 +1026,7 @@ bool SampleProfileLoader::inlineHotFunctions(
           }
         }
       }
-      if (Hot) {
+      if (Hot || ExternalInlineAdvisor) {
         CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
         emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
       } else {
@@ -1818,7 +1839,8 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
   return FunctionOrderList;
 }
 
-bool SampleProfileLoader::doInitialization(Module &M) {
+bool SampleProfileLoader::doInitialization(Module &M,
+                                           FunctionAnalysisManager *FAM) {
   auto &Ctx = M.getContext();
 
   std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
@@ -1843,6 +1865,13 @@ bool SampleProfileLoader::doInitialization(Module &M) {
       NamesInProfile.insert(NameTable->begin(), NameTable->end());
   }
 
+  if (FAM && !ProfileInlineReplayFile.empty()) {
+    ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
+        *FAM, Ctx, ProfileInlineReplayFile);
+    if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
+      ExternalInlineAdvisor.reset();
+  }
+
   return true;
 }
 
@@ -1995,7 +2024,7 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
                                        : ProfileRemappingFileName,
       IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
 
-  if (!SampleLoader.doInitialization(M))
+  if (!SampleLoader.doInitialization(M, &FAM))
     return PreservedAnalyses::all();
 
   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
new file mode 100644
index 0000000000000..6842845d56554
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
@@ -0,0 +1,2 @@
+remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3.1
+remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1 @ main:3.1
diff --git a/llvm/test/Transforms/SampleProfile/inline-replay.ll b/llvm/test/Transforms/SampleProfile/inline-replay.ll
new file mode 100644
index 0000000000000..ecf6f51850f26
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/inline-replay.ll
@@ -0,0 +1,122 @@
+;; Note that this needs new pass manager for now. Passing `-sample-profile-inline-replay` to legacy pass manager is a no-op.
+
+;; Check baseline inline decisions
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s
+
+;; Check replay inline decisions
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-inline-replay=%S/Inputs/inline-replay.txt -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s
+
+@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
+
+define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4, !dbg !8
+  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
+  %add = add nsw i32 %tmp, %tmp1, !dbg !8
+  %tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
+  %tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
+  %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
+  ret i32 %add, !dbg !8
+}
+
+define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 {
+entry:
+  %x.addr = alloca i32, align 4
+  %y.addr = alloca i32, align 4
+  store i32 %x, i32* %x.addr, align 4
+  store i32 %y, i32* %y.addr, align 4
+  %tmp = load i32, i32* %x.addr, align 4, !dbg !10
+  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
+  %add = sub nsw i32 %tmp, %tmp1, !dbg !10
+  ret i32 %add, !dbg !11
+}
+
+define i32 @main() #0 !dbg !12 {
+entry:
+  %retval = alloca i32, align 4
+  %s = alloca i32, align 4
+  %i = alloca i32, align 4
+  store i32 0, i32* %retval
+  store i32 0, i32* %i, align 4, !dbg !13
+  br label %while.cond, !dbg !14
+
+while.cond:                                       ; preds = %if.end, %entry
+  %tmp = load i32, i32* %i, align 4, !dbg !15
+  %inc = add nsw i32 %tmp, 1, !dbg !15
+  store i32 %inc, i32* %i, align 4, !dbg !15
+  %cmp = icmp slt i32 %tmp, 400000000, !dbg !15
+  br i1 %cmp, label %while.body, label %while.end, !dbg !15
+
+while.body:                                       ; preds = %while.cond
+  %tmp1 = load i32, i32* %i, align 4, !dbg !17
+  %cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
+  br i1 %cmp1, label %if.then, label %if.else, !dbg !17
+
+if.then:                                          ; preds = %while.body
+  %tmp2 = load i32, i32* %i, align 4, !dbg !19
+  %tmp3 = load i32, i32* %s, align 4, !dbg !19
+  %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
+  store i32 %call, i32* %s, align 4, !dbg !19
+  br label %if.end, !dbg !19
+
+if.else:                                          ; preds = %while.body
+  store i32 30, i32* %s, align 4, !dbg !21
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  br label %while.cond, !dbg !23
+
+while.end:                                        ; preds = %while.cond
+  %tmp4 = load i32, i32* %s, align 4, !dbg !25
+  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
+  ret i32 0, !dbg !26
+}
+
+declare i32 @printf(i8*, ...)
+
+attributes #0 = { "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
+!1 = !DIFile(filename: "calls.cc", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 1, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.5 "}
+!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 4, scope: !6)
+!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!10 = !DILocation(line: 20, scope: !9)
+!11 = !DILocation(line: 21, scope: !9)
+!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
+!13 = !DILocation(line: 8, scope: !12)
+!14 = !DILocation(line: 9, scope: !12)
+!15 = !DILocation(line: 9, scope: !16)
+!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
+!17 = !DILocation(line: 10, scope: !18)
+!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
+!19 = !DILocation(line: 10, scope: !20)
+!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
+!21 = !DILocation(line: 10, scope: !22)
+!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
+!23 = !DILocation(line: 10, scope: !24)
+!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
+!25 = !DILocation(line: 11, scope: !12)
+!26 = !DILocation(line: 12, scope: !12)
+
+
+; DEFAULT: _Z3sumii inlined into main
+; DEFAULT: _Z3subii inlined into _Z3sumii
+; DEFAULT-NOT: _Z3subii inlined into main 
+
+; REPLAY: _Z3sumii inlined into main
+; REPLAY: _Z3subii inlined into main 
+; REPLA-NOT: _Z3subii inlined into _Z3sumii

From d41d952be9773ad79fe2563074c612c1f810b83c Mon Sep 17 00:00:00 2001
From: Wenlei He <aktoon@gmail.com>
Date: Sun, 19 Jul 2020 08:49:04 -0700
Subject: [PATCH 756/771] Revert "[InlineAdvisor] New inliner advisor to replay
 inlining from optimization remarks"

This reverts commit 2d6ecfa168c2d36ac88efc854f19b05d1c540ded.
---
 llvm/include/llvm/Analysis/InlineAdvisor.h    |   3 -
 .../llvm/Analysis/ReplayInlineAdvisor.h       |  37 ------
 llvm/lib/Analysis/CMakeLists.txt              |   1 -
 llvm/lib/Analysis/InlineAdvisor.cpp           |  25 ----
 llvm/lib/Analysis/ReplayInlineAdvisor.cpp     |  61 ---------
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |  37 +-----
 .../SampleProfile/Inputs/inline-replay.txt    |   2 -
 .../Transforms/SampleProfile/inline-replay.ll | 122 ------------------
 8 files changed, 4 insertions(+), 284 deletions(-)
 delete mode 100644 llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
 delete mode 100644 llvm/lib/Analysis/ReplayInlineAdvisor.cpp
 delete mode 100644 llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
 delete mode 100644 llvm/test/Transforms/SampleProfile/inline-replay.ll

diff --git a/llvm/include/llvm/Analysis/InlineAdvisor.h b/llvm/include/llvm/Analysis/InlineAdvisor.h
index 991c1455cc033..3480d93385a8e 100644
--- a/llvm/include/llvm/Analysis/InlineAdvisor.h
+++ b/llvm/include/llvm/Analysis/InlineAdvisor.h
@@ -226,9 +226,6 @@ void emitInlinedInto(OptimizationRemarkEmitter &ORE, DebugLoc DLoc,
                      bool ForProfileContext = false,
                      const char *PassName = nullptr);
 
-/// get call site location as string
-std::string getCallSiteLocation(DebugLoc DLoc);
-
 /// Add location info to ORE message.
 void addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc);
 
diff --git a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h b/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
deleted file mode 100644
index e312d59a9f87b..0000000000000
--- a/llvm/include/llvm/Analysis/ReplayInlineAdvisor.h
+++ /dev/null
@@ -1,37 +0,0 @@
-//===- ReplayInlineAdvisor.h - Replay Inline Advisor interface -*- C++ --*-===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-#ifndef LLVM_REPLAYINLINEADVISOR_H_
-#define LLVM_REPLAYINLINEADVISOR_H_
-
-#include "llvm/ADT/StringSet.h"
-#include "llvm/Analysis/InlineAdvisor.h"
-#include "llvm/IR/LLVMContext.h"
-
-namespace llvm {
-class BasicBlock;
-class CallBase;
-class Function;
-class Module;
-class OptimizationRemarkEmitter;
-
-/// Replay inline advisor that uses optimization remarks from inlining of
-/// previous build to guide current inlining. This is useful for inliner tuning.
-class ReplayInlineAdvisor : public InlineAdvisor {
-public:
-  ReplayInlineAdvisor(FunctionAnalysisManager &FAM, LLVMContext &Context,
-                      StringRef RemarksFile);
-  std::unique_ptr<InlineAdvice> getAdvice(CallBase &CB) override;
-  bool areReplayRemarksLoaded() const { return HasReplayRemarks; }
-
-private:
-  StringSet<> InlineSitesFromRemarks;
-  bool HasReplayRemarks = false;
-};
-} // namespace llvm
-#endif // LLVM_REPLAYINLINEADVISOR_H_
\ No newline at end of file
diff --git a/llvm/lib/Analysis/CMakeLists.txt b/llvm/lib/Analysis/CMakeLists.txt
index 8f10bac588e52..703623396d96a 100644
--- a/llvm/lib/Analysis/CMakeLists.txt
+++ b/llvm/lib/Analysis/CMakeLists.txt
@@ -117,7 +117,6 @@ add_llvm_component_library(LLVMAnalysis
   RegionInfo.cpp
   RegionPass.cpp
   RegionPrinter.cpp
-  ReplayInlineAdvisor.cpp
   ScalarEvolution.cpp
   ScalarEvolutionAliasAnalysis.cpp
   ScalarEvolutionDivision.cpp
diff --git a/llvm/lib/Analysis/InlineAdvisor.cpp b/llvm/lib/Analysis/InlineAdvisor.cpp
index 497363a5cdf63..e18f681278d3a 100644
--- a/llvm/lib/Analysis/InlineAdvisor.cpp
+++ b/llvm/lib/Analysis/InlineAdvisor.cpp
@@ -365,31 +365,6 @@ llvm::shouldInline(CallBase &CB,
   return IC;
 }
 
-std::string llvm::getCallSiteLocation(DebugLoc DLoc) {
-  std::ostringstream CallSiteLoc;
-  bool First = true;
-  for (DILocation *DIL = DLoc.get(); DIL; DIL = DIL->getInlinedAt()) {
-    if (!First)
-      CallSiteLoc << " @ ";
-    // Note that negative line offset is actually possible, but we use
-    // unsigned int to match line offset representation in remarks so
-    // it's directly consumable by relay advisor.
-    uint32_t Offset =
-        DIL->getLine() - DIL->getScope()->getSubprogram()->getLine();
-    uint32_t Discriminator = DIL->getBaseDiscriminator();
-    StringRef Name = DIL->getScope()->getSubprogram()->getLinkageName();
-    if (Name.empty())
-      Name = DIL->getScope()->getSubprogram()->getName();
-    CallSiteLoc << Name.str() << ":" << llvm::utostr(Offset);
-    if (Discriminator) {
-      CallSiteLoc << "." << llvm::utostr(Discriminator);
-    }
-    First = false;
-  }
-
-  return CallSiteLoc.str();
-}
-
 void llvm::addLocationToRemarks(OptimizationRemark &Remark, DebugLoc DLoc) {
   if (!DLoc.get())
     return;
diff --git a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp b/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
deleted file mode 100644
index c12b58021a606..0000000000000
--- a/llvm/lib/Analysis/ReplayInlineAdvisor.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-//===- ReplayInlineAdvisor.cpp - Replay InlineAdvisor ---------------------===//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file implements ReplayInlineAdvisor that replays inline decision based
-// on previous inline remarks from optimization remark log.
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Analysis/InlineAdvisor.h"
-#include "llvm/Analysis/ReplayInlineAdvisor.h"
-#include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/Instructions.h"
-#include "llvm/Support/LineIterator.h"
-
-using namespace llvm;
-
-#define DEBUG_TYPE "inline-replay"
-
-ReplayInlineAdvisor::ReplayInlineAdvisor(FunctionAnalysisManager &FAM,
-                                         LLVMContext &Context,
-                                         StringRef RemarksFile)
-    : InlineAdvisor(FAM), HasReplayRemarks(false) {
-  auto BufferOrErr = MemoryBuffer::getFileOrSTDIN(RemarksFile);
-  std::error_code EC = BufferOrErr.getError();
-  if (EC) {
-    Context.emitError("Could not open remarks file: " + EC.message());
-    return;
-  }
-
-  // Example for inline remarks to parse:
-  //   _Z3subii inlined into main [details] at callsite sum:1 @ main:3.1
-  // We use the callsite string after `at callsite` to replay inlining.
-  line_iterator LineIt(*BufferOrErr.get(), /*SkipBlanks=*/true);
-  for (; !LineIt.is_at_eof(); ++LineIt) {
-    StringRef Line = *LineIt;
-    auto Pair = Line.split(" at callsite ");
-    if (Pair.second.empty())
-      continue;
-    InlineSitesFromRemarks.insert(Pair.second);
-  }
-  HasReplayRemarks = true;
-}
-
-std::unique_ptr<InlineAdvice> ReplayInlineAdvisor::getAdvice(CallBase &CB) {
-  assert(HasReplayRemarks);
-
-  Function &Caller = *CB.getCaller();
-  auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(Caller);
-
-  if (InlineSitesFromRemarks.empty())
-    return std::make_unique<InlineAdvice>(this, CB, ORE, false);
-
-  StringRef CallSiteLoc = getCallSiteLocation(CB.getDebugLoc());
-  bool InlineRecommended = InlineSitesFromRemarks.count(CallSiteLoc) > 0;
-  return std::make_unique<InlineAdvice>(this, CB, ORE, InlineRecommended);
-}
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 7b5fc030cf88c..b6871e260532d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -43,7 +43,6 @@
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/Analysis/ProfileSummaryInfo.h"
-#include "llvm/Analysis/ReplayInlineAdvisor.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/BasicBlock.h"
@@ -171,13 +170,6 @@ static cl::opt<int> SampleColdCallSiteThreshold(
     "sample-profile-cold-inline-threshold", cl::Hidden, cl::init(45),
     cl::desc("Threshold for inlining cold callsites"));
 
-static cl::opt<std::string> ProfileInlineReplayFile(
-    "sample-profile-inline-replay", cl::init(""), cl::value_desc("filename"),
-    cl::desc(
-        "Optimization remarks file containing inline remarks to be replayed "
-        "by inlining from sample profile loader."),
-    cl::Hidden);
-
 namespace {
 
 using BlockWeightMap = DenseMap<const BasicBlock *, uint64_t>;
@@ -327,7 +319,7 @@ class SampleProfileLoader {
         RemappingFilename(std::string(RemapName)),
         IsThinLTOPreLink(IsThinLTOPreLink) {}
 
-  bool doInitialization(Module &M, FunctionAnalysisManager *FAM = nullptr);
+  bool doInitialization(Module &M);
   bool runOnModule(Module &M, ModuleAnalysisManager *AM,
                    ProfileSummaryInfo *_PSI, CallGraph *CG);
 
@@ -481,9 +473,6 @@ class SampleProfileLoader {
   // overriden by -profile-sample-accurate or profile-sample-accurate
   // attribute.
   bool ProfAccForSymsInList;
-
-  // External inline advisor used to replay inline decision from remarks.
-  std::unique_ptr<ReplayInlineAdvisor> ExternalInlineAdvisor;
 };
 
 class SampleProfileLoaderLegacyPass : public ModulePass {
@@ -909,16 +898,6 @@ SampleProfileLoader::findFunctionSamples(const Instruction &Inst) const {
 }
 
 bool SampleProfileLoader::inlineCallInstruction(CallBase &CB) {
-  if (ExternalInlineAdvisor) {
-    auto Advice = ExternalInlineAdvisor->getAdvice(CB);
-    if (!Advice->isInliningRecommended()) {
-      Advice->recordUnattemptedInlining();
-      return false;
-    }
-    // Dummy record, we don't use it for replay.
-    Advice->recordInlining();
-  }
-
   Function *CalledFunction = CB.getCalledFunction();
   assert(CalledFunction);
   DebugLoc DLoc = CB.getDebugLoc();
@@ -1026,7 +1005,7 @@ bool SampleProfileLoader::inlineHotFunctions(
           }
         }
       }
-      if (Hot || ExternalInlineAdvisor) {
+      if (Hot) {
         CIS.insert(CIS.begin(), AllCandidates.begin(), AllCandidates.end());
         emitOptimizationRemarksForInlineCandidates(AllCandidates, F, true);
       } else {
@@ -1839,8 +1818,7 @@ SampleProfileLoader::buildFunctionOrder(Module &M, CallGraph *CG) {
   return FunctionOrderList;
 }
 
-bool SampleProfileLoader::doInitialization(Module &M,
-                                           FunctionAnalysisManager *FAM) {
+bool SampleProfileLoader::doInitialization(Module &M) {
   auto &Ctx = M.getContext();
 
   std::unique_ptr<SampleProfileReaderItaniumRemapper> RemapReader;
@@ -1865,13 +1843,6 @@ bool SampleProfileLoader::doInitialization(Module &M,
       NamesInProfile.insert(NameTable->begin(), NameTable->end());
   }
 
-  if (FAM && !ProfileInlineReplayFile.empty()) {
-    ExternalInlineAdvisor = std::make_unique<ReplayInlineAdvisor>(
-        *FAM, Ctx, ProfileInlineReplayFile);
-    if (!ExternalInlineAdvisor->areReplayRemarksLoaded())
-      ExternalInlineAdvisor.reset();
-  }
-
   return true;
 }
 
@@ -2024,7 +1995,7 @@ PreservedAnalyses SampleProfileLoaderPass::run(Module &M,
                                        : ProfileRemappingFileName,
       IsThinLTOPreLink, GetAssumptionCache, GetTTI, GetTLI);
 
-  if (!SampleLoader.doInitialization(M, &FAM))
+  if (!SampleLoader.doInitialization(M))
     return PreservedAnalyses::all();
 
   ProfileSummaryInfo *PSI = &AM.getResult<ProfileSummaryAnalysis>(M);
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt b/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
deleted file mode 100644
index 6842845d56554..0000000000000
--- a/llvm/test/Transforms/SampleProfile/Inputs/inline-replay.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-remark: calls.cc:10:0: _Z3sumii inlined into main to match profiling context with (cost=45, threshold=337) at callsite main:3.1
-remark: calls.cc:4:0: _Z3subii inlined into main to match profiling context with (cost=-5, threshold=337) at callsite _Z3sumii:1 @ main:3.1
diff --git a/llvm/test/Transforms/SampleProfile/inline-replay.ll b/llvm/test/Transforms/SampleProfile/inline-replay.ll
deleted file mode 100644
index ecf6f51850f26..0000000000000
--- a/llvm/test/Transforms/SampleProfile/inline-replay.ll
+++ /dev/null
@@ -1,122 +0,0 @@
-;; Note that this needs new pass manager for now. Passing `-sample-profile-inline-replay` to legacy pass manager is a no-op.
-
-;; Check baseline inline decisions
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=DEFAULT %s
-
-;; Check replay inline decisions
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline-topdown.prof -sample-profile-inline-replay=%S/Inputs/inline-replay.txt -sample-profile-merge-inlinee -sample-profile-top-down-load -pass-remarks=inline -S 2>&1 | FileCheck -check-prefix=REPLAY %s
-
-@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
-
-define i32 @_Z3sumii(i32 %x, i32 %y) #0 !dbg !6 {
-entry:
-  %x.addr = alloca i32, align 4
-  %y.addr = alloca i32, align 4
-  store i32 %x, i32* %x.addr, align 4
-  store i32 %y, i32* %y.addr, align 4
-  %tmp = load i32, i32* %x.addr, align 4, !dbg !8
-  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !8
-  %add = add nsw i32 %tmp, %tmp1, !dbg !8
-  %tmp2 = load i32, i32* %x.addr, align 4, !dbg !8
-  %tmp3 = load i32, i32* %y.addr, align 4, !dbg !8
-  %call = call i32 @_Z3subii(i32 %tmp2, i32 %tmp3), !dbg !8
-  ret i32 %add, !dbg !8
-}
-
-define i32 @_Z3subii(i32 %x, i32 %y) #0 !dbg !9 {
-entry:
-  %x.addr = alloca i32, align 4
-  %y.addr = alloca i32, align 4
-  store i32 %x, i32* %x.addr, align 4
-  store i32 %y, i32* %y.addr, align 4
-  %tmp = load i32, i32* %x.addr, align 4, !dbg !10
-  %tmp1 = load i32, i32* %y.addr, align 4, !dbg !10
-  %add = sub nsw i32 %tmp, %tmp1, !dbg !10
-  ret i32 %add, !dbg !11
-}
-
-define i32 @main() #0 !dbg !12 {
-entry:
-  %retval = alloca i32, align 4
-  %s = alloca i32, align 4
-  %i = alloca i32, align 4
-  store i32 0, i32* %retval
-  store i32 0, i32* %i, align 4, !dbg !13
-  br label %while.cond, !dbg !14
-
-while.cond:                                       ; preds = %if.end, %entry
-  %tmp = load i32, i32* %i, align 4, !dbg !15
-  %inc = add nsw i32 %tmp, 1, !dbg !15
-  store i32 %inc, i32* %i, align 4, !dbg !15
-  %cmp = icmp slt i32 %tmp, 400000000, !dbg !15
-  br i1 %cmp, label %while.body, label %while.end, !dbg !15
-
-while.body:                                       ; preds = %while.cond
-  %tmp1 = load i32, i32* %i, align 4, !dbg !17
-  %cmp1 = icmp ne i32 %tmp1, 100, !dbg !17
-  br i1 %cmp1, label %if.then, label %if.else, !dbg !17
-
-if.then:                                          ; preds = %while.body
-  %tmp2 = load i32, i32* %i, align 4, !dbg !19
-  %tmp3 = load i32, i32* %s, align 4, !dbg !19
-  %call = call i32 @_Z3sumii(i32 %tmp2, i32 %tmp3), !dbg !19
-  store i32 %call, i32* %s, align 4, !dbg !19
-  br label %if.end, !dbg !19
-
-if.else:                                          ; preds = %while.body
-  store i32 30, i32* %s, align 4, !dbg !21
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  br label %while.cond, !dbg !23
-
-while.end:                                        ; preds = %while.cond
-  %tmp4 = load i32, i32* %s, align 4, !dbg !25
-  %call2 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i32 0, i32 0), i32 %tmp4), !dbg !25
-  ret i32 0, !dbg !26
-}
-
-declare i32 @printf(i8*, ...)
-
-attributes #0 = { "use-sample-profile" }
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!3, !4}
-!llvm.ident = !{!5}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.5 ", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
-!1 = !DIFile(filename: "calls.cc", directory: ".")
-!2 = !{}
-!3 = !{i32 2, !"Dwarf Version", i32 4}
-!4 = !{i32 1, !"Debug Info Version", i32 3}
-!5 = !{!"clang version 3.5 "}
-!6 = distinct !DISubprogram(name: "sum", linkageName: "_Z3sumii", scope: !1, file: !1, line: 3, type: !7, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
-!7 = !DISubroutineType(types: !2)
-!8 = !DILocation(line: 4, scope: !6)
-!9 = distinct !DISubprogram(name: "sub", linkageName: "_Z3subii", scope: !1, file: !1, line: 20, type: !7, scopeLine: 20, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
-!10 = !DILocation(line: 20, scope: !9)
-!11 = !DILocation(line: 21, scope: !9)
-!12 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !7, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !0, retainedNodes: !2)
-!13 = !DILocation(line: 8, scope: !12)
-!14 = !DILocation(line: 9, scope: !12)
-!15 = !DILocation(line: 9, scope: !16)
-!16 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 2)
-!17 = !DILocation(line: 10, scope: !18)
-!18 = distinct !DILexicalBlock(scope: !12, file: !1, line: 10)
-!19 = !DILocation(line: 10, scope: !20)
-!20 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 2)
-!21 = !DILocation(line: 10, scope: !22)
-!22 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 4)
-!23 = !DILocation(line: 10, scope: !24)
-!24 = !DILexicalBlockFile(scope: !18, file: !1, discriminator: 6)
-!25 = !DILocation(line: 11, scope: !12)
-!26 = !DILocation(line: 12, scope: !12)
-
-
-; DEFAULT: _Z3sumii inlined into main
-; DEFAULT: _Z3subii inlined into _Z3sumii
-; DEFAULT-NOT: _Z3subii inlined into main 
-
-; REPLAY: _Z3sumii inlined into main
-; REPLAY: _Z3subii inlined into main 
-; REPLA-NOT: _Z3subii inlined into _Z3sumii

From 13316a77053514be552a0dad932e3455413b4f82 Mon Sep 17 00:00:00 2001
From: Bruno Ricci <riccibrun@gmail.com>
Date: Sun, 19 Jul 2020 16:53:59 +0100
Subject: [PATCH 757/771] [clang] Disable a few formatting options for test/

Hopefully this will make the bot a little less noisy. Rationale for each:

AlignTrailingComments: We don't want to force-align the various expected-error
                       and friends.

CommentPragmas:       Tell clang-format to leave the "// CHECK:" and the
                      "// expected-" alone.

AlwaysBreakTemplateDeclarations: Templates in tests often have no break between
                                 the template-head and the declaration.

Differential Revision: https://reviews.llvm.org/D83901
---
 clang/test/.clang-format | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/clang/test/.clang-format b/clang/test/.clang-format
index 4799b66f3e9a6..a6176c2e00131 100644
--- a/clang/test/.clang-format
+++ b/clang/test/.clang-format
@@ -1,2 +1,5 @@
 BasedOnStyle: LLVM
 ColumnLimit: 0
+AlignTrailingComments: false
+CommentPragmas: "(^ ?CHECK|^ ?expected-)"
+AlwaysBreakTemplateDeclarations: No

From 89ff9bf061b4985d11cd4785958d8f8156d10f5d Mon Sep 17 00:00:00 2001
From: Bruno Ricci <riccibrun@gmail.com>
Date: Sun, 19 Jul 2020 17:08:17 +0100
Subject: [PATCH 758/771] [clang] Fix the warning for a non-void consteval
 function without a return value to actually say "consteval".

This warning was modified in 796ed03b8412 to use the term "consteval"
for consteval functions. However the warning has never worked as
intended since the diagnostic's arguments are used in the wrong order.

This was unfortunately missed by 796ed03b8412 since no test did exercise
this specific warning.

Additionally send the NamedDecl* into the diagnostic instead of just the
IdentifierInfo* to correctly work with special names and template
arguments.
---
 clang/lib/Sema/SemaStmt.cpp                   | 25 ++++++++++---------
 .../SemaCXX/constant-expression-cxx11.cpp     |  2 +-
 clang/test/SemaCXX/consteval-return-void.cpp  | 20 +++++++++++----
 3 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/clang/lib/Sema/SemaStmt.cpp b/clang/lib/Sema/SemaStmt.cpp
index 73f3183c163f3..948c187804dcc 100644
--- a/clang/lib/Sema/SemaStmt.cpp
+++ b/clang/lib/Sema/SemaStmt.cpp
@@ -3766,25 +3766,26 @@ StmtResult Sema::BuildReturnStmt(SourceLocation ReturnLoc, Expr *RetValExp) {
   } else if (!RetValExp && !HasDependentReturnType) {
     FunctionDecl *FD = getCurFunctionDecl();
 
-    unsigned DiagID;
     if (getLangOpts().CPlusPlus11 && FD && FD->isConstexpr()) {
       // C++11 [stmt.return]p2
-      DiagID = diag::err_constexpr_return_missing_expr;
+      Diag(ReturnLoc, diag::err_constexpr_return_missing_expr)
+          << FD << FD->isConsteval();
       FD->setInvalidDecl();
-    } else if (getLangOpts().C99) {
-      // C99 6.8.6.4p1 (ext_ since GCC warns)
-      DiagID = diag::ext_return_missing_expr;
     } else {
+      // C99 6.8.6.4p1 (ext_ since GCC warns)
       // C90 6.6.6.4p4
-      DiagID = diag::warn_return_missing_expr;
+      unsigned DiagID = getLangOpts().C99 ? diag::ext_return_missing_expr
+                                          : diag::warn_return_missing_expr;
+      // Note that at this point one of getCurFunctionDecl() or
+      // getCurMethodDecl() must be non-null (see above).
+      assert((getCurFunctionDecl() || getCurMethodDecl()) &&
+             "Not in a FunctionDecl or ObjCMethodDecl?");
+      bool IsMethod = FD == nullptr;
+      const NamedDecl *ND =
+          IsMethod ? cast<NamedDecl>(getCurMethodDecl()) : cast<NamedDecl>(FD);
+      Diag(ReturnLoc, DiagID) << ND << IsMethod;
     }
 
-    if (FD)
-      Diag(ReturnLoc, DiagID)
-          << FD->getIdentifier() << 0 /*fn*/ << FD->isConsteval();
-    else
-      Diag(ReturnLoc, DiagID) << getCurMethodDecl()->getDeclName() << 1/*meth*/;
-
     Result = ReturnStmt::Create(Context, ReturnLoc, /* RetExpr=*/nullptr,
                                 /* NRVOCandidate=*/nullptr);
   } else {
diff --git a/clang/test/SemaCXX/constant-expression-cxx11.cpp b/clang/test/SemaCXX/constant-expression-cxx11.cpp
index 7ff260c37c698..eac0256c4fb21 100644
--- a/clang/test/SemaCXX/constant-expression-cxx11.cpp
+++ b/clang/test/SemaCXX/constant-expression-cxx11.cpp
@@ -2171,7 +2171,7 @@ namespace PR21859 {
   template <typename T> constexpr int FunT1() { return; } // expected-error {{non-void constexpr function 'FunT1' should return a value}}
   template <typename T> constexpr int FunT2() { return 0; }
   template <> constexpr int FunT2<double>() { return 0; }
-  template <> constexpr int FunT2<int>() { return; } // expected-error {{non-void constexpr function 'FunT2' should return a value}}
+  template <> constexpr int FunT2<int>() { return; } // expected-error {{non-void constexpr function 'FunT2<int>' should return a value}}
 }
 
 struct InvalidRedef {
diff --git a/clang/test/SemaCXX/consteval-return-void.cpp b/clang/test/SemaCXX/consteval-return-void.cpp
index a5207f41bf2c7..39e1418306f50 100644
--- a/clang/test/SemaCXX/consteval-return-void.cpp
+++ b/clang/test/SemaCXX/consteval-return-void.cpp
@@ -1,10 +1,20 @@
 // RUN: %clang_cc1 -std=c++20 -fsyntax-only -verify %s
 
-consteval int Fun() { return; } // expected-error {{non-void constexpr function 'Fun' should return a value}}
+consteval int Fun() { return; } // expected-error {{non-void consteval function 'Fun' should return a value}}
 
-// FIXME: The diagnostic is wrong; should be "consteval".
-
-template <typename T> consteval int FunT1() { return; } // expected-error {{non-void constexpr function 'FunT1' should return a value}}
+template <typename T> consteval int FunT1() { return; } // expected-error {{non-void consteval function 'FunT1' should return a value}}
 template <typename T> consteval int FunT2() { return 0; }
 template <> consteval int FunT2<double>() { return 0; }
-template <> consteval int FunT2<int>() { return; } // expected-error {{non-void constexpr function 'FunT2' should return a value}}
+template <> consteval int FunT2<int>() { return; } // expected-error {{non-void consteval function 'FunT2<int>' should return a value}}
+
+enum E {};
+
+constexpr E operator+(E,E) { return; }	// expected-error {{non-void constexpr function 'operator+' should return a value}}
+consteval E operator+(E,E) { return; }  // expected-error {{non-void consteval function 'operator+' should return a value}}
+template <typename T> constexpr E operator-(E,E) { return; } // expected-error {{non-void constexpr function 'operator-' should return a value}}
+template <typename T> consteval E operator-(E,E) { return; } // expected-error {{non-void consteval function 'operator-' should return a value}}
+
+template <typename T> constexpr E operator*(E,E);
+template <typename T> consteval E operator/(E,E);
+template <> constexpr E operator*<int>(E,E) { return; } // expected-error {{non-void constexpr function 'operator*<int>' should return a value}}
+template <> consteval E operator/<int>(E,E) { return; } // expected-error {{non-void consteval function 'operator/<int>' should return a value}}

From 0a6aee51608df8502d1d20746d011b3024230c9a Mon Sep 17 00:00:00 2001
From: Juneyoung Lee <aqjune@gmail.com>
Date: Mon, 20 Jul 2020 01:23:58 +0900
Subject: [PATCH 759/771] [ValueTracking] Add canCreateUndefOrPoison & let
 canCreatePoison use Operator

This patch
- adds `canCreateUndefOrPoison`
- refactors `canCreatePoison` so it can deal with constantexprs

`canCreateUndefOrPoison` will be used at D83926.

Reviewed By: nikic, jdoerfert

Differential Revision: https://reviews.llvm.org/D84007
---
 llvm/include/llvm/Analysis/ValueTracking.h    |  24 ++--
 llvm/lib/Analysis/ValueTracking.cpp           |  70 +++++++----
 .../Instrumentation/PoisonChecking.cpp        |   2 +-
 llvm/unittests/Analysis/ValueTrackingTest.cpp | 114 ++++++++++--------
 4 files changed, 127 insertions(+), 83 deletions(-)

diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index 9510739ef5ab4..178f61563cd7f 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -21,6 +21,7 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/InstrTypes.h"
 #include "llvm/IR/Intrinsics.h"
+#include "llvm/IR/Operator.h"
 #include <cassert>
 #include <cstdint>
 
@@ -591,18 +592,25 @@ class Value;
   /// the parent of I.
   bool programUndefinedIfPoison(const Instruction *PoisonI);
 
-  /// Return true if I can create poison from non-poison operands.
-  /// For vectors, canCreatePoison returns true if there is potential poison in
-  /// any element of the result when vectors without poison are given as
+  /// canCreateUndefOrPoison returns true if Op can create undef or poison from
+  /// non-undef & non-poison operands.
+  /// For vectors, canCreateUndefOrPoison returns true if there is potential
+  /// poison or undef in any element of the result when vectors without
+  /// undef/poison poison are given as operands.
+  /// For example, given `Op = shl <2 x i32> %x, <0, 32>`, this function returns
+  /// true. If Op raises immediate UB but never creates poison or undef
+  /// (e.g. sdiv I, 0), canCreatePoison returns false.
+  ///
+  /// canCreatePoison returns true if Op can create poison from non-poison
   /// operands.
-  /// For example, given `I = shl <2 x i32> %x, <0, 32>`, this function returns
-  /// true. If I raises immediate UB but never creates poison (e.g. sdiv I, 0),
-  /// canCreatePoison returns false.
-  bool canCreatePoison(const Instruction *I);
+  bool canCreateUndefOrPoison(const Operator *Op);
+  bool canCreatePoison(const Operator *Op);
 
   /// Return true if this function can prove that V is never undef value
   /// or poison value.
-  //
+  /// Note that this is different from canCreateUndefOrPoison because the
+  /// function assumes Op's operands are not poison/undef.
+  ///
   /// If CtxI and DT are specified this method performs flow-sensitive analysis
   /// and returns true if it is guaranteed to be never undef or poison
   /// immediately before the CtxI.
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index 8d7bb1805a57d..380022c10acec 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -4665,31 +4665,30 @@ bool llvm::isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
   return llvm::any_of(GuardingBranches, AllUsesGuardedByBranch);
 }
 
-bool llvm::canCreatePoison(const Instruction *I) {
+static bool canCreateUndefOrPoison(const Operator *Op, bool PoisonOnly) {
   // See whether I has flags that may create poison
-  if (isa<OverflowingBinaryOperator>(I) &&
-      (I->hasNoSignedWrap() || I->hasNoUnsignedWrap()))
-    return true;
-  if (isa<PossiblyExactOperator>(I) && I->isExact())
-    return true;
-  if (auto *FP = dyn_cast<FPMathOperator>(I)) {
+  if (const auto *OvOp = dyn_cast<OverflowingBinaryOperator>(Op)) {
+    if (OvOp->hasNoSignedWrap() || OvOp->hasNoUnsignedWrap())
+      return true;
+  }
+  if (const auto *ExactOp = dyn_cast<PossiblyExactOperator>(Op))
+    if (ExactOp->isExact())
+      return true;
+  if (const auto *FP = dyn_cast<FPMathOperator>(Op)) {
     auto FMF = FP->getFastMathFlags();
     if (FMF.noNaNs() || FMF.noInfs())
       return true;
   }
-  if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
-    if (GEP->isInBounds())
-      return true;
 
-  unsigned Opcode = I->getOpcode();
+  unsigned Opcode = Op->getOpcode();
 
-  // Check whether opcode is a poison-generating operation
+  // Check whether opcode is a poison/undef-generating operation
   switch (Opcode) {
   case Instruction::Shl:
   case Instruction::AShr:
   case Instruction::LShr: {
     // Shifts return poison if shiftwidth is larger than the bitwidth.
-    if (auto *C = dyn_cast<Constant>(I->getOperand(1))) {
+    if (auto *C = dyn_cast<Constant>(Op->getOperand(1))) {
       SmallVector<Constant *, 4> ShiftAmounts;
       if (auto *FVTy = dyn_cast<FixedVectorType>(C->getType())) {
         unsigned NumElts = FVTy->getNumElements();
@@ -4715,41 +4714,62 @@ bool llvm::canCreatePoison(const Instruction *I) {
     return true;
   case Instruction::Call:
   case Instruction::CallBr:
-  case Instruction::Invoke:
-    // Function calls can return a poison value even if args are non-poison
-    // values.
-    return true;
+  case Instruction::Invoke: {
+    const auto *CB = cast<CallBase>(Op);
+    return !CB->hasRetAttr(Attribute::NoUndef);
+  }
   case Instruction::InsertElement:
   case Instruction::ExtractElement: {
     // If index exceeds the length of the vector, it returns poison
-    auto *VTy = cast<VectorType>(I->getOperand(0)->getType());
-    unsigned IdxOp = I->getOpcode() == Instruction::InsertElement ? 2 : 1;
-    auto *Idx = dyn_cast<ConstantInt>(I->getOperand(IdxOp));
+    auto *VTy = cast<VectorType>(Op->getOperand(0)->getType());
+    unsigned IdxOp = Op->getOpcode() == Instruction::InsertElement ? 2 : 1;
+    auto *Idx = dyn_cast<ConstantInt>(Op->getOperand(IdxOp));
     if (!Idx || Idx->getZExtValue() >= VTy->getElementCount().Min)
       return true;
     return false;
   }
+  case Instruction::ShuffleVector: {
+    // shufflevector may return undef.
+    if (PoisonOnly)
+      return false;
+    ArrayRef<int> Mask = isa<ConstantExpr>(Op)
+                             ? cast<ConstantExpr>(Op)->getShuffleMask()
+                             : cast<ShuffleVectorInst>(Op)->getShuffleMask();
+    return any_of(Mask, [](int Elt) { return Elt == UndefMaskElem; });
+  }
   case Instruction::FNeg:
   case Instruction::PHI:
   case Instruction::Select:
   case Instruction::URem:
   case Instruction::SRem:
-  case Instruction::ShuffleVector:
   case Instruction::ExtractValue:
   case Instruction::InsertValue:
   case Instruction::Freeze:
   case Instruction::ICmp:
   case Instruction::FCmp:
-  case Instruction::GetElementPtr:
     return false;
-  default:
-    if (isa<CastInst>(I))
+  case Instruction::GetElementPtr: {
+    const auto *GEP = cast<GEPOperator>(Op);
+    return GEP->isInBounds();
+  }
+  default: {
+    const auto *CE = dyn_cast<ConstantExpr>(Op);
+    if (isa<CastInst>(Op) || (CE && CE->isCast()))
       return false;
-    else if (isa<BinaryOperator>(I))
+    else if (isa<BinaryOperator>(Op))
       return false;
     // Be conservative and return true.
     return true;
   }
+  }
+}
+
+bool llvm::canCreateUndefOrPoison(const Operator *Op) {
+  return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/false);
+}
+
+bool llvm::canCreatePoison(const Operator *Op) {
+  return ::canCreateUndefOrPoison(Op, /*PoisonOnly=*/true);
 }
 
 bool llvm::isGuaranteedNotToBeUndefOrPoison(const Value *V,
diff --git a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
index 85e096112fca1..fa97a194ea2b5 100644
--- a/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
+++ b/llvm/lib/Transforms/Instrumentation/PoisonChecking.cpp
@@ -297,7 +297,7 @@ static bool rewrite(Function &F) {
         for (Value *V : I.operands())
           Checks.push_back(getPoisonFor(ValToPoison, V));
 
-      if (canCreatePoison(&I))
+      if (canCreatePoison(cast<Operator>(&I)))
         generateCreationChecks(I, Checks);
       ValToPoison[&I] = buildOrChain(B, Checks);
     }
diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index 7dcb6204ba40f..a5d6df9818185 100644
--- a/llvm/unittests/Analysis/ValueTrackingTest.cpp
+++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp
@@ -722,61 +722,71 @@ TEST(ValueTracking, propagatesPoison) {
   }
 }
 
-TEST(ValueTracking, canCreatePoison) {
+TEST(ValueTracking, canCreatePoisonOrUndef) {
   std::string AsmHead =
       "declare i32 @g(i32)\n"
       "define void @f(i32 %x, i32 %y, float %fx, float %fy, i1 %cond, "
       "<4 x i32> %vx, <4 x i32> %vx2, <vscale x 4 x i32> %svx, i8* %p) {\n";
   std::string AsmTail = "  ret void\n}";
-  // (can create poison?, IR instruction)
-  SmallVector<std::pair<bool, std::string>, 32> Data = {
-      {false, "add i32 %x, %y"},
-      {true, "add nsw nuw i32 %x, %y"},
-      {true, "shl i32 %x, %y"},
-      {true, "shl <4 x i32> %vx, %vx2"},
-      {true, "shl nsw i32 %x, %y"},
-      {true, "shl nsw <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
-      {false, "shl i32 %x, 31"},
-      {true, "shl i32 %x, 32"},
-      {false, "shl <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
-      {true, "shl <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 32>"},
-      {true, "ashr i32 %x, %y"},
-      {true, "ashr exact i32 %x, %y"},
-      {false, "ashr i32 %x, 31"},
-      {true, "ashr exact i32 %x, 31"},
-      {false, "ashr <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
-      {true, "ashr <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 32>"},
-      {true, "ashr exact <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
-      {true, "lshr i32 %x, %y"},
-      {true, "lshr exact i32 %x, 31"},
-      {false, "udiv i32 %x, %y"},
-      {true, "udiv exact i32 %x, %y"},
-      {false, "getelementptr i8, i8* %p, i32 %x"},
-      {true, "getelementptr inbounds i8, i8* %p, i32 %x"},
-      {true, "fneg nnan float %fx"},
-      {false, "fneg float %fx"},
-      {false, "fadd float %fx, %fy"},
-      {true, "fadd nnan float %fx, %fy"},
-      {false, "urem i32 %x, %y"},
-      {true, "fptoui float %fx to i32"},
-      {true, "fptosi float %fx to i32"},
-      {false, "bitcast float %fx to i32"},
-      {false, "select i1 %cond, i32 %x, i32 %y"},
-      {true, "select nnan i1 %cond, float %fx, float %fy"},
-      {true, "extractelement <4 x i32> %vx, i32 %x"},
-      {false, "extractelement <4 x i32> %vx, i32 3"},
-      {true, "extractelement <vscale x 4 x i32> %svx, i32 4"},
-      {true, "insertelement <4 x i32> %vx, i32 %x, i32 %y"},
-      {false, "insertelement <4 x i32> %vx, i32 %x, i32 3"},
-      {true, "insertelement <vscale x 4 x i32> %svx, i32 %x, i32 4"},
-      {false, "freeze i32 %x"},
-      {true, "call i32 @g(i32 %x)"},
-      {true, "fcmp nnan oeq float %fx, %fy"},
-      {false, "fcmp oeq float %fx, %fy"}};
+  // (can create poison?, can create undef?, IR instruction)
+  SmallVector<std::tuple<bool, bool, std::string>, 32> Data = {
+      {false, false, "add i32 %x, %y"},
+      {true, false, "add nsw nuw i32 %x, %y"},
+      {true, false, "shl i32 %x, %y"},
+      {true, false, "shl <4 x i32> %vx, %vx2"},
+      {true, false, "shl nsw i32 %x, %y"},
+      {true, false, "shl nsw <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
+      {false, false, "shl i32 %x, 31"},
+      {true, false, "shl i32 %x, 32"},
+      {false, false, "shl <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
+      {true, false, "shl <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 32>"},
+      {true, false, "ashr i32 %x, %y"},
+      {true, false, "ashr exact i32 %x, %y"},
+      {false, false, "ashr i32 %x, 31"},
+      {true, false, "ashr exact i32 %x, 31"},
+      {false, false, "ashr <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
+      {true, false, "ashr <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 32>"},
+      {true, false, "ashr exact <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
+      {true, false, "lshr i32 %x, %y"},
+      {true, false, "lshr exact i32 %x, 31"},
+      {false, false, "udiv i32 %x, %y"},
+      {true, false, "udiv exact i32 %x, %y"},
+      {false, false, "getelementptr i8, i8* %p, i32 %x"},
+      {true, false, "getelementptr inbounds i8, i8* %p, i32 %x"},
+      {true, false, "fneg nnan float %fx"},
+      {false, false, "fneg float %fx"},
+      {false, false, "fadd float %fx, %fy"},
+      {true, false, "fadd nnan float %fx, %fy"},
+      {false, false, "urem i32 %x, %y"},
+      {true, false, "fptoui float %fx to i32"},
+      {true, false, "fptosi float %fx to i32"},
+      {false, false, "bitcast float %fx to i32"},
+      {false, false, "select i1 %cond, i32 %x, i32 %y"},
+      {true, false, "select nnan i1 %cond, float %fx, float %fy"},
+      {true, false, "extractelement <4 x i32> %vx, i32 %x"},
+      {false, false, "extractelement <4 x i32> %vx, i32 3"},
+      {true, false, "extractelement <vscale x 4 x i32> %svx, i32 4"},
+      {true, false, "insertelement <4 x i32> %vx, i32 %x, i32 %y"},
+      {false, false, "insertelement <4 x i32> %vx, i32 %x, i32 3"},
+      {true, false, "insertelement <vscale x 4 x i32> %svx, i32 %x, i32 4"},
+      {false, false, "freeze i32 %x"},
+      {false, false,
+       "shufflevector <4 x i32> %vx, <4 x i32> %vx2, "
+       "<4 x i32> <i32 0, i32 1, i32 2, i32 3>"},
+      {false, true,
+       "shufflevector <4 x i32> %vx, <4 x i32> %vx2, "
+       "<4 x i32> <i32 0, i32 1, i32 2, i32 undef>"},
+      {false, true,
+       "shufflevector <vscale x 4 x i32> %svx, "
+       "<vscale x 4 x i32> %svx, <vscale x 4 x i32> undef"},
+      {true, false, "call i32 @g(i32 %x)"},
+      {false, false, "call noundef i32 @g(i32 %x)"},
+      {true, false, "fcmp nnan oeq float %fx, %fy"},
+      {false, false, "fcmp oeq float %fx, %fy"}};
 
   std::string AssemblyStr = AsmHead;
   for (auto &Itm : Data)
-    AssemblyStr += Itm.second + "\n";
+    AssemblyStr += std::get<2>(Itm) + "\n";
   AssemblyStr += AsmTail;
 
   LLVMContext Context;
@@ -793,8 +803,14 @@ TEST(ValueTracking, canCreatePoison) {
   for (auto &I : BB) {
     if (isa<ReturnInst>(&I))
       break;
-    EXPECT_EQ(canCreatePoison(&I), Data[Index].first)
-        << "Incorrect answer at instruction " << Index << " = " << I;
+    bool Poison = std::get<0>(Data[Index]);
+    bool Undef = std::get<1>(Data[Index]);
+    EXPECT_EQ(canCreatePoison(cast<Operator>(&I)), Poison)
+        << "Incorrect answer of canCreatePoison at instruction " << Index
+        << " = " << I;
+    EXPECT_EQ(canCreateUndefOrPoison(cast<Operator>(&I)), Undef || Poison)
+        << "Incorrect answer of canCreateUndef at instruction " << Index
+        << " = " << I;
     Index++;
   }
 }

From 19dd3712e5ae6278656003b0734e4d83c72abf12 Mon Sep 17 00:00:00 2001
From: Logan Smith <logan.r.smith0@gmail.com>
Date: Sun, 19 Jul 2020 09:35:29 -0700
Subject: [PATCH 760/771] [llvm][NFC] Add missing 'override'

---
 llvm/tools/llvm-rc/ResourceScriptStmt.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/llvm/tools/llvm-rc/ResourceScriptStmt.h b/llvm/tools/llvm-rc/ResourceScriptStmt.h
index 7076eca96a23d..b772732e78e69 100644
--- a/llvm/tools/llvm-rc/ResourceScriptStmt.h
+++ b/llvm/tools/llvm-rc/ResourceScriptStmt.h
@@ -289,7 +289,9 @@ class OptStatementsRCResource : public RCResource {
       : RCResource(Flags),
         OptStatements(std::make_unique<OptionalStmtList>(std::move(Stmts))) {}
 
-  virtual Error applyStmts(Visitor *V) const { return OptStatements->visit(V); }
+  Error applyStmts(Visitor *V) const override {
+    return OptStatements->visit(V);
+  }
 };
 
 // LANGUAGE statement. It can occur both as a top-level statement (in such

From 6187eeb683d8c639282d437e6af585e9b7f9c93e Mon Sep 17 00:00:00 2001
From: Alex Richardson <Alexander.Richardson@cl.cam.ac.uk>
Date: Sun, 19 Jul 2020 17:24:57 +0100
Subject: [PATCH 761/771] [llvm-reduce] Fix incorrect indices in argument
 reduction pass

The function extractArgumentsFromModule() was passing a one-based index to,
but replaceFunctionCalls() was expecting a zero-based argument index. This
resulted in assertion errors when reducing function call arguments with
different types. Additionally, the

Reviewed By: lebedev.ri

Differential Revision: https://reviews.llvm.org/D84099
---
 llvm/test/Reduce/remove-args-2.ll             | 20 +++++++++++++++++++
 .../llvm-reduce/deltas/ReduceArguments.cpp    |  7 +++----
 2 files changed, 23 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Reduce/remove-args-2.ll

diff --git a/llvm/test/Reduce/remove-args-2.ll b/llvm/test/Reduce/remove-args-2.ll
new file mode 100644
index 0000000000000..fddcfc75195cf
--- /dev/null
+++ b/llvm/test/Reduce/remove-args-2.ll
@@ -0,0 +1,20 @@
+; Test that llvm-reduce can remove uninteresting function arguments from function definitions as well as their calls.
+; This test checks that functions with different argument types are handled correctly
+;
+; RUN: llvm-reduce --test %python --test-arg %p/Inputs/remove-args.py %s -o %t
+; RUN: cat %t | FileCheck -implicit-check-not=uninteresting %s
+
+%struct.foo = type { %struct.foo*, i32, i32, i8* }
+
+define dso_local void @bar() {
+entry:
+  ; CHECK: call void @interesting(%struct.foo* null)
+  call void @interesting(i32 0, i8* null, %struct.foo* null, i8* null, i64 0)
+  ret void
+}
+
+; CHECK: define internal void @interesting(%struct.foo* %interesting) {
+define internal void @interesting(i32 %uninteresting1, i8* %uninteresting2, %struct.foo* %interesting, i8* %uninteresting3, i64 %uninteresting4) {
+entry:
+  ret void
+}
diff --git a/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp b/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp
index a119b40018b34..88c3e326ff97d 100644
--- a/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp
+++ b/llvm/tools/llvm-reduce/deltas/ReduceArguments.cpp
@@ -81,10 +81,9 @@ static void extractArgumentsFromModule(std::vector<Chunk> ChunksToKeep,
       continue;
 
     std::set<int> ArgIndexesToKeep;
-    int ArgI = 0;
-    for (auto &Arg : F->args())
-      if (ArgsToKeep.count(&Arg))
-        ArgIndexesToKeep.insert(++ArgI);
+    for (auto &Arg : enumerate(F->args()))
+      if (ArgsToKeep.count(&Arg.value()))
+        ArgIndexesToKeep.insert(Arg.index());
 
     auto *ClonedFunc = CloneFunction(F, VMap);
     // In order to preserve function order, we move Clone after old Function

From 8b354cc8db413f596c95b4f3240fabaa3e2c931e Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash@gmail.com>
Date: Wed, 15 Jul 2020 09:17:42 -0400
Subject: [PATCH 762/771] [ConstantFolding] check applicability of AllOnes
 constant creation first

The getAllOnesValue can only handle things that are bitcast from a
ConstantInt, while here we bitcast through a pointer, so we may see more
complex objects (like Array or Struct).

Differential Revision: https://reviews.llvm.org/D83870
---
 llvm/lib/Analysis/ConstantFolding.cpp         |  8 +++-
 llvm/test/Analysis/ConstantFolding/allones.ll | 46 +++++++++++++++++++
 2 files changed, 52 insertions(+), 2 deletions(-)
 create mode 100644 llvm/test/Analysis/ConstantFolding/allones.ll

diff --git a/llvm/lib/Analysis/ConstantFolding.cpp b/llvm/lib/Analysis/ConstantFolding.cpp
index 8c66decaaf58d..6feffcbb98e1f 100644
--- a/llvm/lib/Analysis/ConstantFolding.cpp
+++ b/llvm/lib/Analysis/ConstantFolding.cpp
@@ -342,8 +342,12 @@ Constant *llvm::ConstantFoldLoadThroughBitcast(Constant *C, Type *DestTy,
     // pointers legally).
     if (C->isNullValue() && !DestTy->isX86_MMXTy())
       return Constant::getNullValue(DestTy);
-    if (C->isAllOnesValue() && !DestTy->isX86_MMXTy() &&
-        !DestTy->isPtrOrPtrVectorTy()) // Don't get ones for ptr types!
+    if (C->isAllOnesValue() &&
+        (DestTy->isIntegerTy() || DestTy->isFloatingPointTy() ||
+         DestTy->isVectorTy()) &&
+        !DestTy->isX86_MMXTy() && !DestTy->isPtrOrPtrVectorTy())
+      // Get ones when the input is trivial, but
+      // only for supported types inside getAllOnesValue.
       return Constant::getAllOnesValue(DestTy);
 
     // If the type sizes are the same and a cast is legal, just directly
diff --git a/llvm/test/Analysis/ConstantFolding/allones.ll b/llvm/test/Analysis/ConstantFolding/allones.ll
new file mode 100644
index 0000000000000..1315b3628475e
--- /dev/null
+++ b/llvm/test/Analysis/ConstantFolding/allones.ll
@@ -0,0 +1,46 @@
+; RUN: opt -early-cse -S -o - %s | FileCheck %s
+target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64-ni:2"
+target triple = "armv7-unknown-linux-gnueabi"
+
+%struct.anon = type { i32 }
+
+@onesstruct = private constant %struct.anon { i32 -1 }, align 4
+
+define i32 @allones_struct() {
+; CHECK-LABEL: @allones_struct()
+; CHECK-NEXT:    %1 = load [1 x i32], [1 x i32]* bitcast (%struct.anon* @onesstruct to [1 x i32]*), align 4
+; CHECK-NEXT:    %2 = extractvalue [1 x i32] %1, 0
+; CHECK-NEXT:    ret i32 %2
+  %1 = load [1 x i32], [1 x i32]* bitcast (%struct.anon* @onesstruct to [1 x i32]*), align 4
+  %2 = extractvalue [1 x i32] %1, 0
+  ret i32 %2
+}
+
+define i32 @allones_int() {
+; CHECK-LABEL: @allones_int()
+; CHECK-NEXT:    ret i32 -1
+  %1 = load i32, i32* bitcast (%struct.anon* @onesstruct to i32*), align 4
+  ret i32 %1
+}
+
+define i32* @allones_ptr() {
+; CHECK-LABEL: @allones_ptr()
+; CHECK-NEXT:    ret i32* inttoptr (i32 -1 to i32*)
+  %1 = load i32*, i32** bitcast (%struct.anon* @onesstruct to i32**), align 4
+  ret i32* %1
+}
+
+define i32 addrspace(1)* @allones_ptr1() {
+; CHECK-LABEL: @allones_ptr1()
+; CHECK-NEXT:    ret i32 addrspace(1)* inttoptr (i32 -1 to i32 addrspace(1)*)
+  %1 = load i32 addrspace(1)*, i32 addrspace(1)** bitcast (%struct.anon* @onesstruct to i32 addrspace(1)**), align 4
+  ret i32 addrspace(1)* %1
+}
+
+define i32 addrspace(2)* @allones_ptr2() {
+; CHECK-LABEL: @allones_ptr2()
+; CHECK-NEXT:    %1 = load i32 addrspace(2)*, i32 addrspace(2)** bitcast (%struct.anon* @onesstruct to i32 addrspace(2)**), align 4
+; CHECK-NEXT:    ret i32 addrspace(2)* %1
+  %1 = load i32 addrspace(2)*, i32 addrspace(2)** bitcast (%struct.anon* @onesstruct to i32 addrspace(2)**), align 4
+  ret i32 addrspace(2)* %1
+}

From ef66e3d086308800d7947a385c2ae09d3f55a695 Mon Sep 17 00:00:00 2001
From: Juneyoung Lee <aqjune@gmail.com>
Date: Mon, 20 Jul 2020 02:21:52 +0900
Subject: [PATCH 763/771] Fix ValueTrackingTest.cpp to use pair instead of
 tuple

---
 llvm/unittests/Analysis/ValueTrackingTest.cpp | 102 +++++++++---------
 1 file changed, 51 insertions(+), 51 deletions(-)

diff --git a/llvm/unittests/Analysis/ValueTrackingTest.cpp b/llvm/unittests/Analysis/ValueTrackingTest.cpp
index a5d6df9818185..75ebe9cbf8aa0 100644
--- a/llvm/unittests/Analysis/ValueTrackingTest.cpp
+++ b/llvm/unittests/Analysis/ValueTrackingTest.cpp
@@ -729,64 +729,64 @@ TEST(ValueTracking, canCreatePoisonOrUndef) {
       "<4 x i32> %vx, <4 x i32> %vx2, <vscale x 4 x i32> %svx, i8* %p) {\n";
   std::string AsmTail = "  ret void\n}";
   // (can create poison?, can create undef?, IR instruction)
-  SmallVector<std::tuple<bool, bool, std::string>, 32> Data = {
-      {false, false, "add i32 %x, %y"},
-      {true, false, "add nsw nuw i32 %x, %y"},
-      {true, false, "shl i32 %x, %y"},
-      {true, false, "shl <4 x i32> %vx, %vx2"},
-      {true, false, "shl nsw i32 %x, %y"},
-      {true, false, "shl nsw <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
-      {false, false, "shl i32 %x, 31"},
-      {true, false, "shl i32 %x, 32"},
-      {false, false, "shl <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
-      {true, false, "shl <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 32>"},
-      {true, false, "ashr i32 %x, %y"},
-      {true, false, "ashr exact i32 %x, %y"},
-      {false, false, "ashr i32 %x, 31"},
-      {true, false, "ashr exact i32 %x, 31"},
-      {false, false, "ashr <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
-      {true, false, "ashr <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 32>"},
-      {true, false, "ashr exact <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
-      {true, false, "lshr i32 %x, %y"},
-      {true, false, "lshr exact i32 %x, 31"},
-      {false, false, "udiv i32 %x, %y"},
-      {true, false, "udiv exact i32 %x, %y"},
-      {false, false, "getelementptr i8, i8* %p, i32 %x"},
-      {true, false, "getelementptr inbounds i8, i8* %p, i32 %x"},
-      {true, false, "fneg nnan float %fx"},
-      {false, false, "fneg float %fx"},
-      {false, false, "fadd float %fx, %fy"},
-      {true, false, "fadd nnan float %fx, %fy"},
-      {false, false, "urem i32 %x, %y"},
-      {true, false, "fptoui float %fx to i32"},
-      {true, false, "fptosi float %fx to i32"},
-      {false, false, "bitcast float %fx to i32"},
-      {false, false, "select i1 %cond, i32 %x, i32 %y"},
-      {true, false, "select nnan i1 %cond, float %fx, float %fy"},
-      {true, false, "extractelement <4 x i32> %vx, i32 %x"},
-      {false, false, "extractelement <4 x i32> %vx, i32 3"},
-      {true, false, "extractelement <vscale x 4 x i32> %svx, i32 4"},
-      {true, false, "insertelement <4 x i32> %vx, i32 %x, i32 %y"},
-      {false, false, "insertelement <4 x i32> %vx, i32 %x, i32 3"},
-      {true, false, "insertelement <vscale x 4 x i32> %svx, i32 %x, i32 4"},
-      {false, false, "freeze i32 %x"},
-      {false, false,
+  SmallVector<std::pair<std::pair<bool, bool>, std::string>, 32> Data = {
+      {{false, false}, "add i32 %x, %y"},
+      {{true, false}, "add nsw nuw i32 %x, %y"},
+      {{true, false}, "shl i32 %x, %y"},
+      {{true, false}, "shl <4 x i32> %vx, %vx2"},
+      {{true, false}, "shl nsw i32 %x, %y"},
+      {{true, false}, "shl nsw <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
+      {{false, false}, "shl i32 %x, 31"},
+      {{true, false}, "shl i32 %x, 32"},
+      {{false, false}, "shl <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
+      {{true, false}, "shl <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 32>"},
+      {{true, false}, "ashr i32 %x, %y"},
+      {{true, false}, "ashr exact i32 %x, %y"},
+      {{false, false}, "ashr i32 %x, 31"},
+      {{true, false}, "ashr exact i32 %x, 31"},
+      {{false, false}, "ashr <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
+      {{true, false}, "ashr <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 32>"},
+      {{true, false}, "ashr exact <4 x i32> %vx, <i32 0, i32 1, i32 2, i32 3>"},
+      {{true, false}, "lshr i32 %x, %y"},
+      {{true, false}, "lshr exact i32 %x, 31"},
+      {{false, false}, "udiv i32 %x, %y"},
+      {{true, false}, "udiv exact i32 %x, %y"},
+      {{false, false}, "getelementptr i8, i8* %p, i32 %x"},
+      {{true, false}, "getelementptr inbounds i8, i8* %p, i32 %x"},
+      {{true, false}, "fneg nnan float %fx"},
+      {{false, false}, "fneg float %fx"},
+      {{false, false}, "fadd float %fx, %fy"},
+      {{true, false}, "fadd nnan float %fx, %fy"},
+      {{false, false}, "urem i32 %x, %y"},
+      {{true, false}, "fptoui float %fx to i32"},
+      {{true, false}, "fptosi float %fx to i32"},
+      {{false, false}, "bitcast float %fx to i32"},
+      {{false, false}, "select i1 %cond, i32 %x, i32 %y"},
+      {{true, false}, "select nnan i1 %cond, float %fx, float %fy"},
+      {{true, false}, "extractelement <4 x i32> %vx, i32 %x"},
+      {{false, false}, "extractelement <4 x i32> %vx, i32 3"},
+      {{true, false}, "extractelement <vscale x 4 x i32> %svx, i32 4"},
+      {{true, false}, "insertelement <4 x i32> %vx, i32 %x, i32 %y"},
+      {{false, false}, "insertelement <4 x i32> %vx, i32 %x, i32 3"},
+      {{true, false}, "insertelement <vscale x 4 x i32> %svx, i32 %x, i32 4"},
+      {{false, false}, "freeze i32 %x"},
+      {{false, false},
        "shufflevector <4 x i32> %vx, <4 x i32> %vx2, "
        "<4 x i32> <i32 0, i32 1, i32 2, i32 3>"},
-      {false, true,
+      {{false, true},
        "shufflevector <4 x i32> %vx, <4 x i32> %vx2, "
        "<4 x i32> <i32 0, i32 1, i32 2, i32 undef>"},
-      {false, true,
+      {{false, true},
        "shufflevector <vscale x 4 x i32> %svx, "
        "<vscale x 4 x i32> %svx, <vscale x 4 x i32> undef"},
-      {true, false, "call i32 @g(i32 %x)"},
-      {false, false, "call noundef i32 @g(i32 %x)"},
-      {true, false, "fcmp nnan oeq float %fx, %fy"},
-      {false, false, "fcmp oeq float %fx, %fy"}};
+      {{true, false}, "call i32 @g(i32 %x)"},
+      {{false, false}, "call noundef i32 @g(i32 %x)"},
+      {{true, false}, "fcmp nnan oeq float %fx, %fy"},
+      {{false, false}, "fcmp oeq float %fx, %fy"}};
 
   std::string AssemblyStr = AsmHead;
   for (auto &Itm : Data)
-    AssemblyStr += std::get<2>(Itm) + "\n";
+    AssemblyStr += Itm.second + "\n";
   AssemblyStr += AsmTail;
 
   LLVMContext Context;
@@ -803,8 +803,8 @@ TEST(ValueTracking, canCreatePoisonOrUndef) {
   for (auto &I : BB) {
     if (isa<ReturnInst>(&I))
       break;
-    bool Poison = std::get<0>(Data[Index]);
-    bool Undef = std::get<1>(Data[Index]);
+    bool Poison = Data[Index].first.first;
+    bool Undef = Data[Index].first.second;
     EXPECT_EQ(canCreatePoison(cast<Operator>(&I)), Poison)
         << "Incorrect answer of canCreatePoison at instruction " << Index
         << " = " << I;

From f7a571537a14da83e5701864471b99b61cdcca54 Mon Sep 17 00:00:00 2001
From: Lang Hames <lhames@gmail.com>
Date: Sat, 18 Jul 2020 20:43:13 -0700
Subject: [PATCH 764/771] [JITLink][MachO] Fix handling of non-extern UNSIGNED
 pair of SUBTRACTOR relocs.

When processing a MachO SUBTRACTOR/UNSIGNED pair, if the UNSIGNED target
is non-extern then check the r_symbolnum field of the relocation to find
the targeted section and use the section's address to find 'ToSymbol'.

Previously 'ToSymbol' was found by loading the initial value stored at
the fixup location and treating this as an address to search for. This
is incorrect, however: the initial value includes the addend and will
point to the wrong block if the addend is less than zero or greater than
the block size.

rdar://65756694
---
 llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp         | 9 +++++----
 llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp        | 9 +++++----
 .../JITLink/X86/MachO_x86-64_relocations.s               | 8 ++++----
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
index 463845a5b8cbd..28adf9b3fb718 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_arm64.cpp
@@ -148,10 +148,11 @@ class MachOLinkGraphBuilder_arm64 : public MachOLinkGraphBuilder {
       else
         return ToSymbolOrErr.takeError();
     } else {
-      if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue))
-        ToSymbol = &*ToSymbolOrErr;
-      else
-        return ToSymbolOrErr.takeError();
+      auto ToSymbolSec = findSectionByIndex(UnsignedRI.r_symbolnum - 1);
+      if (!ToSymbolSec)
+        return ToSymbolSec.takeError();
+      ToSymbol = getSymbolByAddress(ToSymbolSec->Address);
+      assert(ToSymbol && "No symbol for section");
       FixupValue -= ToSymbol->getAddress();
     }
 
diff --git a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
index a91bc3b6033cf..54d725eac144a 100644
--- a/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
+++ b/llvm/lib/ExecutionEngine/JITLink/MachO_x86_64.cpp
@@ -150,10 +150,11 @@ class MachOLinkGraphBuilder_x86_64 : public MachOLinkGraphBuilder {
       else
         return ToSymbolOrErr.takeError();
     } else {
-      if (auto ToSymbolOrErr = findSymbolByAddress(FixupValue))
-        ToSymbol = &*ToSymbolOrErr;
-      else
-        return ToSymbolOrErr.takeError();
+      auto ToSymbolSec = findSectionByIndex(UnsignedRI.r_symbolnum - 1);
+      if (!ToSymbolSec)
+        return ToSymbolSec.takeError();
+      ToSymbol = getSymbolByAddress(ToSymbolSec->Address);
+      assert(ToSymbol && "No symbol for section");
       FixupValue -= ToSymbol->getAddress();
     }
 
diff --git a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s
index 6d9c26484f8fd..dd3cc455bd349 100644
--- a/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s
+++ b/llvm/test/ExecutionEngine/JITLink/X86/MachO_x86-64_relocations.s
@@ -193,19 +193,19 @@ anon_func_addr_quad:
 
 # X86_64_RELOC_SUBTRACTOR Quad/Long in named storage with anonymous minuend
 #
-# jitlink-check: *{8}anon_minuend_quad1 = section_addr(macho_reloc.o, __data) - anon_minuend_quad1 + 2
+# jitlink-check: *{8}anon_minuend_quad1 = section_addr(macho_reloc.o, __data) - anon_minuend_quad1 - 2
 # Only the form "B: .quad LA - B + C" is tested. The form "B: .quad B - LA + C" is
 # invalid because the subtrahend can not be local.
         .globl  anon_minuend_quad1
         .p2align  3
 anon_minuend_quad1:
-        .quad Lanon_data - anon_minuend_quad1 + 2
+        .quad Lanon_data - anon_minuend_quad1 - 2
 
-# jitlink-check: *{4}anon_minuend_long1 = (section_addr(macho_reloc.o, __data) - anon_minuend_long1 + 2)[31:0]
+# jitlink-check: *{4}anon_minuend_long1 = (section_addr(macho_reloc.o, __data) - anon_minuend_long1 - 2)[31:0]
         .globl  anon_minuend_long1
         .p2align  2
 anon_minuend_long1:
-        .long Lanon_data - anon_minuend_long1 + 2
+        .long Lanon_data - anon_minuend_long1 - 2
 
 # Check X86_64_RELOC_SUBTRACTOR Quad/Long in named storage with minuend and subtrahend.
 # Both forms "A: .quad A - B + C" and "A: .quad B - A + C" are tested.

From fcc607a0849acafad7c655cd6932422027b99c79 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Fri, 10 Jul 2020 10:49:14 +0100
Subject: [PATCH 765/771] Move SPIRVMap initializations out of OCLUtil.h

The map initialization unnecessarily ends up in almost every
translation unit due to transitive inclusion.
---
 llvm-spirv/lib/SPIRV/OCLUtil.cpp | 537 +++++++++++++++++++++++++++++++
 llvm-spirv/lib/SPIRV/OCLUtil.h   | 525 ------------------------------
 2 files changed, 537 insertions(+), 525 deletions(-)

diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.cpp b/llvm-spirv/lib/SPIRV/OCLUtil.cpp
index eff0cda426198..cb230ce01f528 100644
--- a/llvm-spirv/lib/SPIRV/OCLUtil.cpp
+++ b/llvm-spirv/lib/SPIRV/OCLUtil.cpp
@@ -96,11 +96,548 @@ namespace OCLUtil {
 #define SPIRV_IMAGE_ADDR_SPACE SPIRAS_Global
 #endif
 
+} // namespace OCLUtil
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Map definitions
+//
+///////////////////////////////////////////////////////////////////////////////
+
+using namespace OCLUtil;
+namespace SPIRV {
+
+template <> void SPIRVMap<OCLMemFenceKind, MemorySemanticsMask>::init() {
+  add(OCLMF_Local, MemorySemanticsWorkgroupMemoryMask);
+  add(OCLMF_Global, MemorySemanticsCrossWorkgroupMemoryMask);
+  add(OCLMF_Image, MemorySemanticsImageMemoryMask);
+}
+
+template <>
+void SPIRVMap<OCLMemFenceExtendedKind, MemorySemanticsMask>::init() {
+  add(OCLMFEx_Local, MemorySemanticsWorkgroupMemoryMask);
+  add(OCLMFEx_Global, MemorySemanticsCrossWorkgroupMemoryMask);
+  add(OCLMFEx_Local_Global, MemorySemanticsWorkgroupMemoryMask |
+                                MemorySemanticsCrossWorkgroupMemoryMask);
+  add(OCLMFEx_Image, MemorySemanticsImageMemoryMask);
+  add(OCLMFEx_Image_Local,
+      MemorySemanticsWorkgroupMemoryMask | MemorySemanticsImageMemoryMask);
+  add(OCLMFEx_Image_Global,
+      MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsImageMemoryMask);
+  add(OCLMFEx_Image_Local_Global, MemorySemanticsWorkgroupMemoryMask |
+                                      MemorySemanticsCrossWorkgroupMemoryMask |
+                                      MemorySemanticsImageMemoryMask);
+}
+
+template <>
+void SPIRVMap<OCLMemOrderKind, unsigned, MemorySemanticsMask>::init() {
+  add(OCLMO_relaxed, MemorySemanticsMaskNone);
+  add(OCLMO_acquire, MemorySemanticsAcquireMask);
+  add(OCLMO_release, MemorySemanticsReleaseMask);
+  add(OCLMO_acq_rel, MemorySemanticsAcquireReleaseMask);
+  add(OCLMO_seq_cst, MemorySemanticsSequentiallyConsistentMask);
+}
+
+template <> void SPIRVMap<OCLScopeKind, Scope>::init() {
+  add(OCLMS_work_item, ScopeInvocation);
+  add(OCLMS_work_group, ScopeWorkgroup);
+  add(OCLMS_device, ScopeDevice);
+  add(OCLMS_all_svm_devices, ScopeCrossDevice);
+  add(OCLMS_sub_group, ScopeSubgroup);
+}
+
+template <> void SPIRVMap<std::string, SPIRVGroupOperationKind>::init() {
+  add("reduce", GroupOperationReduce);
+  add("scan_inclusive", GroupOperationInclusiveScan);
+  add("scan_exclusive", GroupOperationExclusiveScan);
+  add("ballot_bit_count", GroupOperationReduce);
+  add("ballot_inclusive_scan", GroupOperationInclusiveScan);
+  add("ballot_exclusive_scan", GroupOperationExclusiveScan);
+  add("non_uniform_reduce", GroupOperationReduce);
+  add("non_uniform_scan_inclusive", GroupOperationInclusiveScan);
+  add("non_uniform_scan_exclusive", GroupOperationExclusiveScan);
+  add("non_uniform_reduce_logical", GroupOperationReduce);
+  add("non_uniform_scan_inclusive_logical", GroupOperationInclusiveScan);
+  add("non_uniform_scan_exclusive_logical", GroupOperationExclusiveScan);
+  add("clustered_reduce", GroupOperationClusteredReduce);
+}
+
+template <> void SPIRVMap<std::string, SPIRVFPRoundingModeKind>::init() {
+  add("rte", FPRoundingModeRTE);
+  add("rtz", FPRoundingModeRTZ);
+  add("rtp", FPRoundingModeRTP);
+  add("rtn", FPRoundingModeRTN);
+}
+
+template <> void SPIRVMap<OclExt::Kind, std::string>::init() {
+#define _SPIRV_OP(x) add(OclExt::x, #x);
+  _SPIRV_OP(cl_images)
+  _SPIRV_OP(cl_doubles)
+  _SPIRV_OP(cl_khr_int64_base_atomics)
+  _SPIRV_OP(cl_khr_int64_extended_atomics)
+  _SPIRV_OP(cl_khr_fp16)
+  _SPIRV_OP(cl_khr_gl_sharing)
+  _SPIRV_OP(cl_khr_gl_event)
+  _SPIRV_OP(cl_khr_d3d10_sharing)
+  _SPIRV_OP(cl_khr_media_sharing)
+  _SPIRV_OP(cl_khr_d3d11_sharing)
+  _SPIRV_OP(cl_khr_global_int32_base_atomics)
+  _SPIRV_OP(cl_khr_global_int32_extended_atomics)
+  _SPIRV_OP(cl_khr_local_int32_base_atomics)
+  _SPIRV_OP(cl_khr_local_int32_extended_atomics)
+  _SPIRV_OP(cl_khr_byte_addressable_store)
+  _SPIRV_OP(cl_khr_3d_image_writes)
+  _SPIRV_OP(cl_khr_gl_msaa_sharing)
+  _SPIRV_OP(cl_khr_depth_images)
+  _SPIRV_OP(cl_khr_gl_depth_images)
+  _SPIRV_OP(cl_khr_subgroups)
+  _SPIRV_OP(cl_khr_mipmap_image)
+  _SPIRV_OP(cl_khr_mipmap_image_writes)
+  _SPIRV_OP(cl_khr_egl_event)
+  _SPIRV_OP(cl_khr_srgb_image_writes)
+#undef _SPIRV_OP
+}
+
+template <> void SPIRVMap<OclExt::Kind, SPIRVCapabilityKind>::init() {
+  add(OclExt::cl_images, CapabilityImageBasic);
+  add(OclExt::cl_doubles, CapabilityFloat64);
+  add(OclExt::cl_khr_int64_base_atomics, CapabilityInt64Atomics);
+  add(OclExt::cl_khr_int64_extended_atomics, CapabilityInt64Atomics);
+  add(OclExt::cl_khr_fp16, CapabilityFloat16);
+  add(OclExt::cl_khr_subgroups, CapabilityGroups);
+  add(OclExt::cl_khr_mipmap_image, CapabilityImageMipmap);
+  add(OclExt::cl_khr_mipmap_image_writes, CapabilityImageMipmap);
+}
+
+/// Map OpenCL work functions to SPIR-V builtin variables.
+template <> void SPIRVMap<std::string, SPIRVBuiltinVariableKind>::init() {
+  add("get_work_dim", BuiltInWorkDim);
+  add("get_global_size", BuiltInGlobalSize);
+  add("get_global_id", BuiltInGlobalInvocationId);
+  add("get_global_offset", BuiltInGlobalOffset);
+  add("get_local_size", BuiltInWorkgroupSize);
+  add("get_enqueued_local_size", BuiltInEnqueuedWorkgroupSize);
+  add("get_local_id", BuiltInLocalInvocationId);
+  add("get_num_groups", BuiltInNumWorkgroups);
+  add("get_group_id", BuiltInWorkgroupId);
+  add("get_global_linear_id", BuiltInGlobalLinearId);
+  add("get_local_linear_id", BuiltInLocalInvocationIndex);
+  // cl_khr_subgroups
+  add("get_sub_group_size", BuiltInSubgroupSize);
+  add("get_max_sub_group_size", BuiltInSubgroupMaxSize);
+  add("get_num_sub_groups", BuiltInNumSubgroups);
+  add("get_enqueued_num_sub_groups", BuiltInNumEnqueuedSubgroups);
+  add("get_sub_group_id", BuiltInSubgroupId);
+  add("get_sub_group_local_id", BuiltInSubgroupLocalInvocationId);
+  // cl_khr_subgroup_ballot
+  add("get_sub_group_eq_mask", BuiltInSubgroupEqMask);
+  add("get_sub_group_ge_mask", BuiltInSubgroupGeMask);
+  add("get_sub_group_gt_mask", BuiltInSubgroupGtMask);
+  add("get_sub_group_le_mask", BuiltInSubgroupLeMask);
+  add("get_sub_group_lt_mask", BuiltInSubgroupLtMask);
+}
+
+// Maps uniqued OCL builtin function name to SPIR-V op code.
+// A uniqued OCL builtin function name may be different from the real
+// OCL builtin function name. e.g. instead of atomic_min, atomic_umin
+// is used for atomic_min with unsigned integer parameter.
+// work_group_ and sub_group_ functions are unified as group_ functions
+// except work_group_barrier.
+class SPIRVInstruction;
+template <> void SPIRVMap<std::string, Op, SPIRVInstruction>::init() {
+#define _SPIRV_OP(x, y) add("atom_" #x, OpAtomic##y);
+  // cl_khr_int64_base_atomics builtins
+  _SPIRV_OP(add, IAdd)
+  _SPIRV_OP(sub, ISub)
+  _SPIRV_OP(xchg, Exchange)
+  _SPIRV_OP(dec, IDecrement)
+  _SPIRV_OP(inc, IIncrement)
+  _SPIRV_OP(cmpxchg, CompareExchange)
+  // cl_khr_int64_extended_atomics builtins
+  _SPIRV_OP(min, SMin)
+  _SPIRV_OP(max, SMax)
+  _SPIRV_OP(and, And)
+  _SPIRV_OP(or, Or)
+  _SPIRV_OP(xor, Xor)
+#undef _SPIRV_OP
+#define _SPIRV_OP(x, y) add("atomic_" #x, Op##y);
+  // CL 2.0 atomic builtins
+  _SPIRV_OP(flag_test_and_set_explicit, AtomicFlagTestAndSet)
+  _SPIRV_OP(flag_clear_explicit, AtomicFlagClear)
+  _SPIRV_OP(load_explicit, AtomicLoad)
+  _SPIRV_OP(store_explicit, AtomicStore)
+  _SPIRV_OP(exchange_explicit, AtomicExchange)
+  _SPIRV_OP(compare_exchange_strong_explicit, AtomicCompareExchange)
+  _SPIRV_OP(compare_exchange_weak_explicit, AtomicCompareExchangeWeak)
+  _SPIRV_OP(inc, AtomicIIncrement)
+  _SPIRV_OP(dec, AtomicIDecrement)
+  _SPIRV_OP(fetch_add_explicit, AtomicIAdd)
+  _SPIRV_OP(fetch_sub_explicit, AtomicISub)
+  _SPIRV_OP(fetch_umin_explicit, AtomicUMin)
+  _SPIRV_OP(fetch_umax_explicit, AtomicUMax)
+  _SPIRV_OP(fetch_min_explicit, AtomicSMin)
+  _SPIRV_OP(fetch_max_explicit, AtomicSMax)
+  _SPIRV_OP(fetch_and_explicit, AtomicAnd)
+  _SPIRV_OP(fetch_or_explicit, AtomicOr)
+  _SPIRV_OP(fetch_xor_explicit, AtomicXor)
+#undef _SPIRV_OP
+#define _SPIRV_OP(x, y) add(#x, Op##y);
+  _SPIRV_OP(dot, Dot)
+  _SPIRV_OP(async_work_group_copy, GroupAsyncCopy)
+  _SPIRV_OP(async_work_group_strided_copy, GroupAsyncCopy)
+  _SPIRV_OP(wait_group_events, GroupWaitEvents)
+  _SPIRV_OP(isequal, FOrdEqual)
+  _SPIRV_OP(isnotequal, FUnordNotEqual)
+  _SPIRV_OP(isgreater, FOrdGreaterThan)
+  _SPIRV_OP(isgreaterequal, FOrdGreaterThanEqual)
+  _SPIRV_OP(isless, FOrdLessThan)
+  _SPIRV_OP(islessequal, FOrdLessThanEqual)
+  _SPIRV_OP(islessgreater, LessOrGreater)
+  _SPIRV_OP(isordered, Ordered)
+  _SPIRV_OP(isunordered, Unordered)
+  _SPIRV_OP(isfinite, IsFinite)
+  _SPIRV_OP(isinf, IsInf)
+  _SPIRV_OP(isnan, IsNan)
+  _SPIRV_OP(isnormal, IsNormal)
+  _SPIRV_OP(signbit, SignBitSet)
+  _SPIRV_OP(any, Any)
+  _SPIRV_OP(all, All)
+  _SPIRV_OP(popcount, BitCount)
+  _SPIRV_OP(get_fence, GenericPtrMemSemantics)
+  // CL 2.0 kernel enqueue builtins
+  _SPIRV_OP(enqueue_marker, EnqueueMarker)
+  _SPIRV_OP(enqueue_kernel, EnqueueKernel)
+  _SPIRV_OP(get_kernel_sub_group_count_for_ndrange_impl,
+            GetKernelNDrangeSubGroupCount)
+  _SPIRV_OP(get_kernel_max_sub_group_size_for_ndrange_impl,
+            GetKernelNDrangeMaxSubGroupSize)
+  _SPIRV_OP(get_kernel_work_group_size_impl, GetKernelWorkGroupSize)
+  _SPIRV_OP(get_kernel_preferred_work_group_size_multiple_impl,
+            GetKernelPreferredWorkGroupSizeMultiple)
+  _SPIRV_OP(retain_event, RetainEvent)
+  _SPIRV_OP(release_event, ReleaseEvent)
+  _SPIRV_OP(create_user_event, CreateUserEvent)
+  _SPIRV_OP(is_valid_event, IsValidEvent)
+  _SPIRV_OP(set_user_event_status, SetUserEventStatus)
+  _SPIRV_OP(capture_event_profiling_info, CaptureEventProfilingInfo)
+  _SPIRV_OP(get_default_queue, GetDefaultQueue)
+  _SPIRV_OP(ndrange_1D, BuildNDRange)
+  _SPIRV_OP(ndrange_2D, BuildNDRange)
+  _SPIRV_OP(ndrange_3D, BuildNDRange)
+  // Generic Address Space Casts
+  _SPIRV_OP(to_global, GenericCastToPtrExplicit)
+  _SPIRV_OP(to_local, GenericCastToPtrExplicit)
+  _SPIRV_OP(to_private, GenericCastToPtrExplicit)
+  // CL 2.0 pipe builtins
+  _SPIRV_OP(read_pipe_2, ReadPipe)
+  _SPIRV_OP(write_pipe_2, WritePipe)
+  _SPIRV_OP(read_pipe_2_bl, ReadPipeBlockingINTEL)
+  _SPIRV_OP(write_pipe_2_bl, WritePipeBlockingINTEL)
+  _SPIRV_OP(read_pipe_4, ReservedReadPipe)
+  _SPIRV_OP(write_pipe_4, ReservedWritePipe)
+  _SPIRV_OP(reserve_read_pipe, ReserveReadPipePackets)
+  _SPIRV_OP(reserve_write_pipe, ReserveWritePipePackets)
+  _SPIRV_OP(commit_read_pipe, CommitReadPipe)
+  _SPIRV_OP(commit_write_pipe, CommitWritePipe)
+  _SPIRV_OP(is_valid_reserve_id, IsValidReserveId)
+  _SPIRV_OP(group_reserve_read_pipe, GroupReserveReadPipePackets)
+  _SPIRV_OP(group_reserve_write_pipe, GroupReserveWritePipePackets)
+  _SPIRV_OP(group_commit_read_pipe, GroupCommitReadPipe)
+  _SPIRV_OP(group_commit_write_pipe, GroupCommitWritePipe)
+  _SPIRV_OP(get_pipe_num_packets_ro, GetNumPipePackets)
+  _SPIRV_OP(get_pipe_num_packets_wo, GetNumPipePackets)
+  _SPIRV_OP(get_pipe_max_packets_ro, GetMaxPipePackets)
+  _SPIRV_OP(get_pipe_max_packets_wo, GetMaxPipePackets)
+  // CL 2.0 workgroup builtins
+  _SPIRV_OP(group_all, GroupAll)
+  _SPIRV_OP(group_any, GroupAny)
+  _SPIRV_OP(group_broadcast, GroupBroadcast)
+  _SPIRV_OP(group_iadd, GroupIAdd)
+  _SPIRV_OP(group_fadd, GroupFAdd)
+  _SPIRV_OP(group_fmin, GroupFMin)
+  _SPIRV_OP(group_umin, GroupUMin)
+  _SPIRV_OP(group_smin, GroupSMin)
+  _SPIRV_OP(group_fmax, GroupFMax)
+  _SPIRV_OP(group_umax, GroupUMax)
+  _SPIRV_OP(group_smax, GroupSMax)
+  // CL image builtins
+  _SPIRV_OP(SampledImage, SampledImage)
+  _SPIRV_OP(ImageSampleExplicitLod, ImageSampleExplicitLod)
+  _SPIRV_OP(read_image, ImageRead)
+  _SPIRV_OP(write_image, ImageWrite)
+  _SPIRV_OP(get_image_channel_data_type, ImageQueryFormat)
+  _SPIRV_OP(get_image_channel_order, ImageQueryOrder)
+  _SPIRV_OP(get_image_num_mip_levels, ImageQueryLevels)
+  _SPIRV_OP(get_image_num_samples, ImageQuerySamples)
+  // Intel Subgroups builtins
+  _SPIRV_OP(intel_sub_group_shuffle, SubgroupShuffleINTEL)
+  _SPIRV_OP(intel_sub_group_shuffle_down, SubgroupShuffleDownINTEL)
+  _SPIRV_OP(intel_sub_group_shuffle_up, SubgroupShuffleUpINTEL)
+  _SPIRV_OP(intel_sub_group_shuffle_xor, SubgroupShuffleXorINTEL)
+  // Intel media_block_io builtins
+  _SPIRV_OP(intel_sub_group_media_block_read, SubgroupImageMediaBlockReadINTEL)
+  _SPIRV_OP(intel_sub_group_media_block_write,
+            SubgroupImageMediaBlockWriteINTEL)
+  // cl_khr_subgroup_non_uniform_vote
+  _SPIRV_OP(group_elect, GroupNonUniformElect)
+  _SPIRV_OP(group_non_uniform_all, GroupNonUniformAll)
+  _SPIRV_OP(group_non_uniform_any, GroupNonUniformAny)
+  _SPIRV_OP(group_non_uniform_all_equal, GroupNonUniformAllEqual)
+  // cl_khr_subgroup_ballot
+  _SPIRV_OP(group_non_uniform_broadcast, GroupNonUniformBroadcast)
+  _SPIRV_OP(group_broadcast_first, GroupNonUniformBroadcastFirst)
+  _SPIRV_OP(group_ballot, GroupNonUniformBallot)
+  _SPIRV_OP(group_inverse_ballot, GroupNonUniformInverseBallot)
+  _SPIRV_OP(group_ballot_bit_extract, GroupNonUniformBallotBitExtract)
+  _SPIRV_OP(group_ballot_bit_count_iadd, GroupNonUniformBallotBitCount)
+  _SPIRV_OP(group_ballot_find_lsb, GroupNonUniformBallotFindLSB)
+  _SPIRV_OP(group_ballot_find_msb, GroupNonUniformBallotFindMSB)
+  // cl_khr_subgroup_non_uniform_arithmetic
+  _SPIRV_OP(group_non_uniform_iadd, GroupNonUniformIAdd)
+  _SPIRV_OP(group_non_uniform_fadd, GroupNonUniformFAdd)
+  _SPIRV_OP(group_non_uniform_imul, GroupNonUniformIMul)
+  _SPIRV_OP(group_non_uniform_fmul, GroupNonUniformFMul)
+  _SPIRV_OP(group_non_uniform_smin, GroupNonUniformSMin)
+  _SPIRV_OP(group_non_uniform_umin, GroupNonUniformUMin)
+  _SPIRV_OP(group_non_uniform_fmin, GroupNonUniformFMin)
+  _SPIRV_OP(group_non_uniform_smax, GroupNonUniformSMax)
+  _SPIRV_OP(group_non_uniform_umax, GroupNonUniformUMax)
+  _SPIRV_OP(group_non_uniform_fmax, GroupNonUniformFMax)
+  _SPIRV_OP(group_non_uniform_iand, GroupNonUniformBitwiseAnd)
+  _SPIRV_OP(group_non_uniform_ior, GroupNonUniformBitwiseOr)
+  _SPIRV_OP(group_non_uniform_ixor, GroupNonUniformBitwiseXor)
+  _SPIRV_OP(group_non_uniform_logical_iand, GroupNonUniformLogicalAnd)
+  _SPIRV_OP(group_non_uniform_logical_ior, GroupNonUniformLogicalOr)
+  _SPIRV_OP(group_non_uniform_logical_ixor, GroupNonUniformLogicalXor)
+  // cl_khr_subgroup_shuffle
+  _SPIRV_OP(group_shuffle, GroupNonUniformShuffle)
+  _SPIRV_OP(group_shuffle_xor, GroupNonUniformShuffleXor)
+  // cl_khr_subgroup_shuffle_relative
+  _SPIRV_OP(group_shuffle_up, GroupNonUniformShuffleUp)
+  _SPIRV_OP(group_shuffle_down, GroupNonUniformShuffleDown)
+#undef _SPIRV_OP
+}
+
+template <> void SPIRVMap<std::string, Op, OCL12Builtin>::init() {
+#define _SPIRV_OP(x, y) add(#x, Op##y);
+  _SPIRV_OP(add, AtomicIAdd)
+  _SPIRV_OP(sub, AtomicISub)
+  _SPIRV_OP(xchg, AtomicExchange)
+  _SPIRV_OP(cmpxchg, AtomicCompareExchange)
+  _SPIRV_OP(inc, AtomicIIncrement)
+  _SPIRV_OP(dec, AtomicIDecrement)
+  _SPIRV_OP(min, AtomicSMin)
+  _SPIRV_OP(max, AtomicSMax)
+  _SPIRV_OP(umin, AtomicUMin)
+  _SPIRV_OP(umax, AtomicUMax)
+  _SPIRV_OP(and, AtomicAnd)
+  _SPIRV_OP(or, AtomicOr)
+  _SPIRV_OP(xor, AtomicXor)
+#undef _SPIRV_OP
+}
+
+// SPV_INTEL_device_side_avc_motion_estimation extension builtins
+class SPIRVSubgroupsAVCIntelInst;
+template <> void SPIRVMap<std::string, Op, SPIRVSubgroupsAVCIntelInst>::init() {
+  // Here is a workaround for a bug in the specification:
+  // 'avc' missed in 'intel_sub_group_avc' prefix.
+  add("intel_sub_group_ime_ref_window_size",
+      OpSubgroupAvcImeRefWindowSizeINTEL);
+
+#define _SPIRV_OP(x, y) add("intel_sub_group_avc_" #x, OpSubgroupAvc##y##INTEL);
+  // Initialization phase functions
+  _SPIRV_OP(ime_initialize, ImeInitialize)
+  _SPIRV_OP(fme_initialize, FmeInitialize)
+  _SPIRV_OP(bme_initialize, BmeInitialize)
+  _SPIRV_OP(sic_initialize, SicInitialize)
+
+  // Result and payload types conversion functions
+  _SPIRV_OP(mce_convert_to_ime_payload, MceConvertToImePayload)
+  _SPIRV_OP(mce_convert_to_ime_result, MceConvertToImeResult)
+  _SPIRV_OP(mce_convert_to_ref_payload, MceConvertToRefPayload)
+  _SPIRV_OP(mce_convert_to_ref_result, MceConvertToRefResult)
+  _SPIRV_OP(mce_convert_to_sic_payload, MceConvertToSicPayload)
+  _SPIRV_OP(mce_convert_to_sic_result, MceConvertToSicResult)
+  _SPIRV_OP(ime_convert_to_mce_payload, ImeConvertToMcePayload)
+  _SPIRV_OP(ime_convert_to_mce_result, ImeConvertToMceResult)
+  _SPIRV_OP(ref_convert_to_mce_payload, RefConvertToMcePayload)
+  _SPIRV_OP(ref_convert_to_mce_result, RefConvertToMceResult)
+  _SPIRV_OP(sic_convert_to_mce_payload, SicConvertToMcePayload)
+  _SPIRV_OP(sic_convert_to_mce_result, SicConvertToMceResult)
+#undef _SPIRV_OP
+
+// MCE instructions
+#define _SPIRV_OP(x, y)                                                        \
+  add("intel_sub_group_avc_mce_" #x, OpSubgroupAvcMce##y##INTEL);
+  _SPIRV_OP(get_default_inter_base_multi_reference_penalty,
+            GetDefaultInterBaseMultiReferencePenalty)
+  _SPIRV_OP(set_inter_base_multi_reference_penalty,
+            SetInterBaseMultiReferencePenalty)
+  _SPIRV_OP(get_default_inter_shape_penalty, GetDefaultInterShapePenalty)
+  _SPIRV_OP(set_inter_shape_penalty, SetInterShapePenalty)
+  _SPIRV_OP(get_default_inter_direction_penalty,
+            GetDefaultInterDirectionPenalty)
+  _SPIRV_OP(set_inter_direction_penalty, SetInterDirectionPenalty)
+  _SPIRV_OP(get_default_intra_luma_shape_penalty,
+            GetDefaultIntraLumaShapePenalty)
+  _SPIRV_OP(get_default_inter_motion_vector_cost_table,
+            GetDefaultInterMotionVectorCostTable)
+  _SPIRV_OP(get_default_high_penalty_cost_table, GetDefaultHighPenaltyCostTable)
+  _SPIRV_OP(get_default_medium_penalty_cost_table,
+            GetDefaultMediumPenaltyCostTable)
+  _SPIRV_OP(get_default_low_penalty_cost_table, GetDefaultLowPenaltyCostTable)
+  _SPIRV_OP(set_motion_vector_cost_function, SetMotionVectorCostFunction)
+  _SPIRV_OP(get_default_intra_luma_mode_penalty, GetDefaultIntraLumaModePenalty)
+  _SPIRV_OP(get_default_non_dc_luma_intra_penalty,
+            GetDefaultNonDcLumaIntraPenalty)
+  _SPIRV_OP(get_default_intra_chroma_mode_base_penalty,
+            GetDefaultIntraChromaModeBasePenalty)
+  _SPIRV_OP(set_ac_only_haar, SetAcOnlyHaar)
+  _SPIRV_OP(set_source_interlaced_field_polarity,
+            SetSourceInterlacedFieldPolarity)
+  _SPIRV_OP(set_single_reference_interlaced_field_polarity,
+            SetSingleReferenceInterlacedFieldPolarity)
+  _SPIRV_OP(set_dual_reference_interlaced_field_polarities,
+            SetDualReferenceInterlacedFieldPolarities)
+  _SPIRV_OP(get_motion_vectors, GetMotionVectors)
+  _SPIRV_OP(get_inter_distortions, GetInterDistortions)
+  _SPIRV_OP(get_best_inter_distortion, GetBestInterDistortions)
+  _SPIRV_OP(get_inter_major_shape, GetInterMajorShape)
+  _SPIRV_OP(get_inter_minor_shapes, GetInterMinorShape)
+  _SPIRV_OP(get_inter_directions, GetInterDirections)
+  _SPIRV_OP(get_inter_motion_vector_count, GetInterMotionVectorCount)
+  _SPIRV_OP(get_inter_reference_ids, GetInterReferenceIds)
+  _SPIRV_OP(get_inter_reference_interlaced_field_polarities,
+            GetInterReferenceInterlacedFieldPolarities)
+#undef _SPIRV_OP
+
+// IME instructions
+#define _SPIRV_OP(x, y)                                                        \
+  add("intel_sub_group_avc_ime_" #x, OpSubgroupAvcIme##y##INTEL);
+  _SPIRV_OP(set_single_reference, SetSingleReference)
+  _SPIRV_OP(set_dual_reference, SetDualReference)
+  _SPIRV_OP(ref_window_size, RefWindowSize)
+  _SPIRV_OP(adjust_ref_offset, AdjustRefOffset)
+  _SPIRV_OP(set_max_motion_vector_count, SetMaxMotionVectorCount)
+  _SPIRV_OP(set_unidirectional_mix_disable, SetUnidirectionalMixDisable)
+  _SPIRV_OP(set_early_search_termination_threshold,
+            SetEarlySearchTerminationThreshold)
+  _SPIRV_OP(set_weighted_sad, SetWeightedSad)
+  _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
+  _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
+  _SPIRV_OP(evaluate_with_single_reference_streamin,
+            EvaluateWithSingleReferenceStreamin)
+  _SPIRV_OP(evaluate_with_dual_reference_streamin,
+            EvaluateWithDualReferenceStreamin)
+  _SPIRV_OP(evaluate_with_single_reference_streamout,
+            EvaluateWithSingleReferenceStreamout)
+  _SPIRV_OP(evaluate_with_dual_reference_streamout,
+            EvaluateWithDualReferenceStreamout)
+  _SPIRV_OP(evaluate_with_single_reference_streaminout,
+            EvaluateWithSingleReferenceStreaminout)
+  _SPIRV_OP(evaluate_with_dual_reference_streaminout,
+            EvaluateWithDualReferenceStreaminout)
+  _SPIRV_OP(get_single_reference_streamin, GetSingleReferenceStreamin)
+  _SPIRV_OP(get_dual_reference_streamin, GetDualReferenceStreamin)
+  _SPIRV_OP(strip_single_reference_streamout, StripSingleReferenceStreamout)
+  _SPIRV_OP(strip_dual_reference_streamout, StripDualReferenceStreamout)
+  _SPIRV_OP(get_border_reached, GetBorderReached)
+  _SPIRV_OP(get_truncated_search_indication, GetTruncatedSearchIndication)
+  _SPIRV_OP(get_unidirectional_early_search_termination,
+            GetUnidirectionalEarlySearchTermination)
+  _SPIRV_OP(get_weighting_pattern_minimum_motion_vector,
+            GetWeightingPatternMinimumMotionVector)
+  _SPIRV_OP(get_weighting_pattern_minimum_distortion,
+            GetWeightingPatternMinimumDistortion)
+#undef _SPIRV_OP
+
+#define _SPIRV_OP(x, y)                                                        \
+  add("intel_sub_group_avc_ime_get_streamout_major_shape_" #x,                 \
+      OpSubgroupAvcImeGetStreamout##y##INTEL);
+  _SPIRV_OP(motion_vectors_single_reference,
+            SingleReferenceMajorShapeMotionVectors)
+  _SPIRV_OP(distortions_single_reference, SingleReferenceMajorShapeDistortions)
+  _SPIRV_OP(reference_ids_single_reference,
+            SingleReferenceMajorShapeReferenceIds)
+  _SPIRV_OP(motion_vectors_dual_reference, DualReferenceMajorShapeMotionVectors)
+  _SPIRV_OP(distortions_dual_reference, DualReferenceMajorShapeDistortions)
+  _SPIRV_OP(reference_ids_dual_reference, DualReferenceMajorShapeReferenceIds)
+#undef _SPIRV_OP
+
+// REF instructions
+#define _SPIRV_OP(x, y)                                                        \
+  add("intel_sub_group_avc_ref_" #x, OpSubgroupAvcRef##y##INTEL);
+  _SPIRV_OP(set_bidirectional_mix_disable, SetBidirectionalMixDisable)
+  _SPIRV_OP(set_bilinear_filter_enable, SetBilinearFilterEnable)
+  _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
+  _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
+  _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference)
+  _SPIRV_OP(evaluate_with_multi_reference_interlaced,
+            EvaluateWithMultiReferenceInterlaced)
+#undef _SPIRV_OP
+
+// SIC instructions
+#define _SPIRV_OP(x, y)                                                        \
+  add("intel_sub_group_avc_sic_" #x, OpSubgroupAvcSic##y##INTEL);
+  _SPIRV_OP(configure_skc, ConfigureSkc)
+  _SPIRV_OP(configure_ipe_luma, ConfigureIpeLuma)
+  _SPIRV_OP(configure_ipe_luma_chroma, ConfigureIpeLumaChroma)
+  _SPIRV_OP(get_motion_vector_mask, GetMotionVectorMask)
+  _SPIRV_OP(set_intra_luma_shape_penalty, SetIntraLumaShapePenalty)
+  _SPIRV_OP(set_intra_luma_mode_cost_function, SetIntraLumaModeCostFunction)
+  _SPIRV_OP(set_intra_chroma_mode_cost_function, SetIntraChromaModeCostFunction)
+  _SPIRV_OP(set_skc_bilinear_filter_enable, SetBilinearFilterEnable)
+  _SPIRV_OP(set_skc_forward_transform_enable, SetSkcForwardTransformEnable)
+  _SPIRV_OP(set_block_based_raw_skip_sad, SetBlockBasedRawSkipSad)
+  _SPIRV_OP(evaluate_ipe, EvaluateIpe)
+  _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
+  _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
+  _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference)
+  _SPIRV_OP(evaluate_with_multi_reference_interlaced,
+            EvaluateWithMultiReferenceInterlaced)
+  _SPIRV_OP(get_ipe_luma_shape, GetIpeLumaShape)
+  _SPIRV_OP(get_best_ipe_luma_distortion, GetBestIpeLumaDistortion)
+  _SPIRV_OP(get_best_ipe_chroma_distortion, GetBestIpeChromaDistortion)
+  _SPIRV_OP(get_packed_ipe_luma_modes, GetPackedIpeLumaModes)
+  _SPIRV_OP(get_ipe_chroma_mode, GetIpeChromaMode)
+  _SPIRV_OP(get_packed_skc_luma_count_threshold, GetPackedSkcLumaCountThreshold)
+  _SPIRV_OP(get_packed_skc_luma_sum_threshold, GetPackedSkcLumaSumThreshold)
+  _SPIRV_OP(get_inter_raw_sads, GetInterRawSads)
+#undef _SPIRV_OP
+}
+
+template <> void SPIRVMap<std::string, Op, OCLOpaqueType>::init() {
+  add("opencl.event_t", OpTypeEvent);
+  add("opencl.pipe_t", OpTypePipe);
+  add("opencl.clk_event_t", OpTypeDeviceEvent);
+  add("opencl.reserve_id_t", OpTypeReserveId);
+  add("opencl.queue_t", OpTypeQueue);
+  add("opencl.sampler_t", OpTypeSampler);
+}
+
+template <> void LLVMSPIRVAtomicRmwOpCodeMap::init() {
+  add(llvm::AtomicRMWInst::Xchg, OpAtomicExchange);
+  add(llvm::AtomicRMWInst::Add, OpAtomicIAdd);
+  add(llvm::AtomicRMWInst::Sub, OpAtomicISub);
+  add(llvm::AtomicRMWInst::And, OpAtomicAnd);
+  add(llvm::AtomicRMWInst::Or, OpAtomicOr);
+  add(llvm::AtomicRMWInst::Xor, OpAtomicXor);
+  add(llvm::AtomicRMWInst::Max, OpAtomicSMax);
+  add(llvm::AtomicRMWInst::Min, OpAtomicSMin);
+  add(llvm::AtomicRMWInst::UMax, OpAtomicUMax);
+  add(llvm::AtomicRMWInst::UMin, OpAtomicUMin);
+}
+
+} // namespace SPIRV
+
 ///////////////////////////////////////////////////////////////////////////////
 //
 // Functions for getting builtin call info
 //
 ///////////////////////////////////////////////////////////////////////////////
+
+namespace OCLUtil {
+
 AtomicWorkItemFenceLiterals getAtomicWorkItemFenceLiterals(CallInst *CI) {
   return std::make_tuple(getArgAsInt(CI, 0),
                          static_cast<OCLMemOrderKind>(getArgAsInt(CI, 1)),
diff --git a/llvm-spirv/lib/SPIRV/OCLUtil.h b/llvm-spirv/lib/SPIRV/OCLUtil.h
index d91316e0a310f..3d4ae9b9ae2e5 100644
--- a/llvm-spirv/lib/SPIRV/OCLUtil.h
+++ b/llvm-spirv/lib/SPIRV/OCLUtil.h
@@ -444,52 +444,8 @@ std::string getIntelSubgroupBlockDataPostfix(unsigned ElementBitSize,
                                              unsigned VectorNumElements);
 } // namespace OCLUtil
 
-///////////////////////////////////////////////////////////////////////////////
-//
-// Map definitions
-//
-///////////////////////////////////////////////////////////////////////////////
-
 using namespace OCLUtil;
 namespace SPIRV {
-template <> inline void SPIRVMap<OCLMemFenceKind, MemorySemanticsMask>::init() {
-  add(OCLMF_Local, MemorySemanticsWorkgroupMemoryMask);
-  add(OCLMF_Global, MemorySemanticsCrossWorkgroupMemoryMask);
-  add(OCLMF_Image, MemorySemanticsImageMemoryMask);
-}
-
-template <>
-inline void SPIRVMap<OCLMemFenceExtendedKind, MemorySemanticsMask>::init() {
-  add(OCLMFEx_Local, MemorySemanticsWorkgroupMemoryMask);
-  add(OCLMFEx_Global, MemorySemanticsCrossWorkgroupMemoryMask);
-  add(OCLMFEx_Local_Global, MemorySemanticsWorkgroupMemoryMask |
-                                MemorySemanticsCrossWorkgroupMemoryMask);
-  add(OCLMFEx_Image, MemorySemanticsImageMemoryMask);
-  add(OCLMFEx_Image_Local,
-      MemorySemanticsWorkgroupMemoryMask | MemorySemanticsImageMemoryMask);
-  add(OCLMFEx_Image_Global,
-      MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsImageMemoryMask);
-  add(OCLMFEx_Image_Local_Global, MemorySemanticsWorkgroupMemoryMask |
-                                      MemorySemanticsCrossWorkgroupMemoryMask |
-                                      MemorySemanticsImageMemoryMask);
-}
-
-template <>
-inline void SPIRVMap<OCLMemOrderKind, unsigned, MemorySemanticsMask>::init() {
-  add(OCLMO_relaxed, MemorySemanticsMaskNone);
-  add(OCLMO_acquire, MemorySemanticsAcquireMask);
-  add(OCLMO_release, MemorySemanticsReleaseMask);
-  add(OCLMO_acq_rel, MemorySemanticsAcquireReleaseMask);
-  add(OCLMO_seq_cst, MemorySemanticsSequentiallyConsistentMask);
-}
-
-template <> inline void SPIRVMap<OCLScopeKind, Scope>::init() {
-  add(OCLMS_work_item, ScopeInvocation);
-  add(OCLMS_work_group, ScopeWorkgroup);
-  add(OCLMS_device, ScopeDevice);
-  add(OCLMS_all_svm_devices, ScopeCrossDevice);
-  add(OCLMS_sub_group, ScopeSubgroup);
-}
 
 template <class KeyTy, class ValTy, class Identifier = void>
 Instruction *
@@ -623,492 +579,11 @@ Value *transSPIRVMemorySemanticsIntoOCLMemoryOrder(Value *MemorySemantics,
 Value *transSPIRVMemorySemanticsIntoOCLMemFenceFlags(Value *MemorySemantics,
                                                      Instruction *InsertBefore);
 
-template <> inline void SPIRVMap<std::string, SPIRVGroupOperationKind>::init() {
-  add("reduce", GroupOperationReduce);
-  add("scan_inclusive", GroupOperationInclusiveScan);
-  add("scan_exclusive", GroupOperationExclusiveScan);
-  add("ballot_bit_count", GroupOperationReduce);
-  add("ballot_inclusive_scan", GroupOperationInclusiveScan);
-  add("ballot_exclusive_scan", GroupOperationExclusiveScan);
-  add("non_uniform_reduce", GroupOperationReduce);
-  add("non_uniform_scan_inclusive", GroupOperationInclusiveScan);
-  add("non_uniform_scan_exclusive", GroupOperationExclusiveScan);
-  add("non_uniform_reduce_logical", GroupOperationReduce);
-  add("non_uniform_scan_inclusive_logical", GroupOperationInclusiveScan);
-  add("non_uniform_scan_exclusive_logical", GroupOperationExclusiveScan);
-  add("clustered_reduce", GroupOperationClusteredReduce);
-}
-
-template <> inline void SPIRVMap<std::string, SPIRVFPRoundingModeKind>::init() {
-  add("rte", FPRoundingModeRTE);
-  add("rtz", FPRoundingModeRTZ);
-  add("rtp", FPRoundingModeRTP);
-  add("rtn", FPRoundingModeRTN);
-}
-
-template <> inline void SPIRVMap<OclExt::Kind, std::string>::init() {
-#define _SPIRV_OP(x) add(OclExt::x, #x);
-  _SPIRV_OP(cl_images)
-  _SPIRV_OP(cl_doubles)
-  _SPIRV_OP(cl_khr_int64_base_atomics)
-  _SPIRV_OP(cl_khr_int64_extended_atomics)
-  _SPIRV_OP(cl_khr_fp16)
-  _SPIRV_OP(cl_khr_gl_sharing)
-  _SPIRV_OP(cl_khr_gl_event)
-  _SPIRV_OP(cl_khr_d3d10_sharing)
-  _SPIRV_OP(cl_khr_media_sharing)
-  _SPIRV_OP(cl_khr_d3d11_sharing)
-  _SPIRV_OP(cl_khr_global_int32_base_atomics)
-  _SPIRV_OP(cl_khr_global_int32_extended_atomics)
-  _SPIRV_OP(cl_khr_local_int32_base_atomics)
-  _SPIRV_OP(cl_khr_local_int32_extended_atomics)
-  _SPIRV_OP(cl_khr_byte_addressable_store)
-  _SPIRV_OP(cl_khr_3d_image_writes)
-  _SPIRV_OP(cl_khr_gl_msaa_sharing)
-  _SPIRV_OP(cl_khr_depth_images)
-  _SPIRV_OP(cl_khr_gl_depth_images)
-  _SPIRV_OP(cl_khr_subgroups)
-  _SPIRV_OP(cl_khr_mipmap_image)
-  _SPIRV_OP(cl_khr_mipmap_image_writes)
-  _SPIRV_OP(cl_khr_egl_event)
-  _SPIRV_OP(cl_khr_srgb_image_writes)
-#undef _SPIRV_OP
-}
-
-template <> inline void SPIRVMap<OclExt::Kind, SPIRVCapabilityKind>::init() {
-  add(OclExt::cl_images, CapabilityImageBasic);
-  add(OclExt::cl_doubles, CapabilityFloat64);
-  add(OclExt::cl_khr_int64_base_atomics, CapabilityInt64Atomics);
-  add(OclExt::cl_khr_int64_extended_atomics, CapabilityInt64Atomics);
-  add(OclExt::cl_khr_fp16, CapabilityFloat16);
-  add(OclExt::cl_khr_subgroups, CapabilityGroups);
-  add(OclExt::cl_khr_mipmap_image, CapabilityImageMipmap);
-  add(OclExt::cl_khr_mipmap_image_writes, CapabilityImageMipmap);
-}
-
-/// Map OpenCL work functions to SPIR-V builtin variables.
-template <>
-inline void SPIRVMap<std::string, SPIRVBuiltinVariableKind>::init() {
-  add("get_work_dim", BuiltInWorkDim);
-  add("get_global_size", BuiltInGlobalSize);
-  add("get_global_id", BuiltInGlobalInvocationId);
-  add("get_global_offset", BuiltInGlobalOffset);
-  add("get_local_size", BuiltInWorkgroupSize);
-  add("get_enqueued_local_size", BuiltInEnqueuedWorkgroupSize);
-  add("get_local_id", BuiltInLocalInvocationId);
-  add("get_num_groups", BuiltInNumWorkgroups);
-  add("get_group_id", BuiltInWorkgroupId);
-  add("get_global_linear_id", BuiltInGlobalLinearId);
-  add("get_local_linear_id", BuiltInLocalInvocationIndex);
-  // cl_khr_subgroups
-  add("get_sub_group_size", BuiltInSubgroupSize);
-  add("get_max_sub_group_size", BuiltInSubgroupMaxSize);
-  add("get_num_sub_groups", BuiltInNumSubgroups);
-  add("get_enqueued_num_sub_groups", BuiltInNumEnqueuedSubgroups);
-  add("get_sub_group_id", BuiltInSubgroupId);
-  add("get_sub_group_local_id", BuiltInSubgroupLocalInvocationId);
-  // cl_khr_subgroup_ballot
-  add("get_sub_group_eq_mask", BuiltInSubgroupEqMask);
-  add("get_sub_group_ge_mask", BuiltInSubgroupGeMask);
-  add("get_sub_group_gt_mask", BuiltInSubgroupGtMask);
-  add("get_sub_group_le_mask", BuiltInSubgroupLeMask);
-  add("get_sub_group_lt_mask", BuiltInSubgroupLtMask);
-}
-
-// Maps uniqued OCL builtin function name to SPIR-V op code.
-// A uniqued OCL builtin function name may be different from the real
-// OCL builtin function name. e.g. instead of atomic_min, atomic_umin
-// is used for atomic_min with unsigned integer parameter.
-// work_group_ and sub_group_ functions are unified as group_ functions
-// except work_group_barrier.
-class SPIRVInstruction;
-template <> inline void SPIRVMap<std::string, Op, SPIRVInstruction>::init() {
-#define _SPIRV_OP(x, y) add("atom_" #x, OpAtomic##y);
-  // cl_khr_int64_base_atomics builtins
-  _SPIRV_OP(add, IAdd)
-  _SPIRV_OP(sub, ISub)
-  _SPIRV_OP(xchg, Exchange)
-  _SPIRV_OP(dec, IDecrement)
-  _SPIRV_OP(inc, IIncrement)
-  _SPIRV_OP(cmpxchg, CompareExchange)
-  // cl_khr_int64_extended_atomics builtins
-  _SPIRV_OP(min, SMin)
-  _SPIRV_OP(max, SMax)
-  _SPIRV_OP(and, And)
-  _SPIRV_OP(or, Or)
-  _SPIRV_OP(xor, Xor)
-#undef _SPIRV_OP
-#define _SPIRV_OP(x, y) add("atomic_" #x, Op##y);
-  // CL 2.0 atomic builtins
-  _SPIRV_OP(flag_test_and_set_explicit, AtomicFlagTestAndSet)
-  _SPIRV_OP(flag_clear_explicit, AtomicFlagClear)
-  _SPIRV_OP(load_explicit, AtomicLoad)
-  _SPIRV_OP(store_explicit, AtomicStore)
-  _SPIRV_OP(exchange_explicit, AtomicExchange)
-  _SPIRV_OP(compare_exchange_strong_explicit, AtomicCompareExchange)
-  _SPIRV_OP(compare_exchange_weak_explicit, AtomicCompareExchangeWeak)
-  _SPIRV_OP(inc, AtomicIIncrement)
-  _SPIRV_OP(dec, AtomicIDecrement)
-  _SPIRV_OP(fetch_add_explicit, AtomicIAdd)
-  _SPIRV_OP(fetch_sub_explicit, AtomicISub)
-  _SPIRV_OP(fetch_umin_explicit, AtomicUMin)
-  _SPIRV_OP(fetch_umax_explicit, AtomicUMax)
-  _SPIRV_OP(fetch_min_explicit, AtomicSMin)
-  _SPIRV_OP(fetch_max_explicit, AtomicSMax)
-  _SPIRV_OP(fetch_and_explicit, AtomicAnd)
-  _SPIRV_OP(fetch_or_explicit, AtomicOr)
-  _SPIRV_OP(fetch_xor_explicit, AtomicXor)
-#undef _SPIRV_OP
-#define _SPIRV_OP(x, y) add(#x, Op##y);
-  _SPIRV_OP(dot, Dot)
-  _SPIRV_OP(async_work_group_copy, GroupAsyncCopy)
-  _SPIRV_OP(async_work_group_strided_copy, GroupAsyncCopy)
-  _SPIRV_OP(wait_group_events, GroupWaitEvents)
-  _SPIRV_OP(isequal, FOrdEqual)
-  _SPIRV_OP(isnotequal, FUnordNotEqual)
-  _SPIRV_OP(isgreater, FOrdGreaterThan)
-  _SPIRV_OP(isgreaterequal, FOrdGreaterThanEqual)
-  _SPIRV_OP(isless, FOrdLessThan)
-  _SPIRV_OP(islessequal, FOrdLessThanEqual)
-  _SPIRV_OP(islessgreater, LessOrGreater)
-  _SPIRV_OP(isordered, Ordered)
-  _SPIRV_OP(isunordered, Unordered)
-  _SPIRV_OP(isfinite, IsFinite)
-  _SPIRV_OP(isinf, IsInf)
-  _SPIRV_OP(isnan, IsNan)
-  _SPIRV_OP(isnormal, IsNormal)
-  _SPIRV_OP(signbit, SignBitSet)
-  _SPIRV_OP(any, Any)
-  _SPIRV_OP(all, All)
-  _SPIRV_OP(popcount, BitCount)
-  _SPIRV_OP(get_fence, GenericPtrMemSemantics)
-  // CL 2.0 kernel enqueue builtins
-  _SPIRV_OP(enqueue_marker, EnqueueMarker)
-  _SPIRV_OP(enqueue_kernel, EnqueueKernel)
-  _SPIRV_OP(get_kernel_sub_group_count_for_ndrange_impl,
-            GetKernelNDrangeSubGroupCount)
-  _SPIRV_OP(get_kernel_max_sub_group_size_for_ndrange_impl,
-            GetKernelNDrangeMaxSubGroupSize)
-  _SPIRV_OP(get_kernel_work_group_size_impl, GetKernelWorkGroupSize)
-  _SPIRV_OP(get_kernel_preferred_work_group_size_multiple_impl,
-            GetKernelPreferredWorkGroupSizeMultiple)
-  _SPIRV_OP(retain_event, RetainEvent)
-  _SPIRV_OP(release_event, ReleaseEvent)
-  _SPIRV_OP(create_user_event, CreateUserEvent)
-  _SPIRV_OP(is_valid_event, IsValidEvent)
-  _SPIRV_OP(set_user_event_status, SetUserEventStatus)
-  _SPIRV_OP(capture_event_profiling_info, CaptureEventProfilingInfo)
-  _SPIRV_OP(get_default_queue, GetDefaultQueue)
-  _SPIRV_OP(ndrange_1D, BuildNDRange)
-  _SPIRV_OP(ndrange_2D, BuildNDRange)
-  _SPIRV_OP(ndrange_3D, BuildNDRange)
-  // Generic Address Space Casts
-  _SPIRV_OP(to_global, GenericCastToPtrExplicit)
-  _SPIRV_OP(to_local, GenericCastToPtrExplicit)
-  _SPIRV_OP(to_private, GenericCastToPtrExplicit)
-  // CL 2.0 pipe builtins
-  _SPIRV_OP(read_pipe_2, ReadPipe)
-  _SPIRV_OP(write_pipe_2, WritePipe)
-  _SPIRV_OP(read_pipe_2_bl, ReadPipeBlockingINTEL)
-  _SPIRV_OP(write_pipe_2_bl, WritePipeBlockingINTEL)
-  _SPIRV_OP(read_pipe_4, ReservedReadPipe)
-  _SPIRV_OP(write_pipe_4, ReservedWritePipe)
-  _SPIRV_OP(reserve_read_pipe, ReserveReadPipePackets)
-  _SPIRV_OP(reserve_write_pipe, ReserveWritePipePackets)
-  _SPIRV_OP(commit_read_pipe, CommitReadPipe)
-  _SPIRV_OP(commit_write_pipe, CommitWritePipe)
-  _SPIRV_OP(is_valid_reserve_id, IsValidReserveId)
-  _SPIRV_OP(group_reserve_read_pipe, GroupReserveReadPipePackets)
-  _SPIRV_OP(group_reserve_write_pipe, GroupReserveWritePipePackets)
-  _SPIRV_OP(group_commit_read_pipe, GroupCommitReadPipe)
-  _SPIRV_OP(group_commit_write_pipe, GroupCommitWritePipe)
-  _SPIRV_OP(get_pipe_num_packets_ro, GetNumPipePackets)
-  _SPIRV_OP(get_pipe_num_packets_wo, GetNumPipePackets)
-  _SPIRV_OP(get_pipe_max_packets_ro, GetMaxPipePackets)
-  _SPIRV_OP(get_pipe_max_packets_wo, GetMaxPipePackets)
-  // CL 2.0 workgroup builtins
-  _SPIRV_OP(group_all, GroupAll)
-  _SPIRV_OP(group_any, GroupAny)
-  _SPIRV_OP(group_broadcast, GroupBroadcast)
-  _SPIRV_OP(group_iadd, GroupIAdd)
-  _SPIRV_OP(group_fadd, GroupFAdd)
-  _SPIRV_OP(group_fmin, GroupFMin)
-  _SPIRV_OP(group_umin, GroupUMin)
-  _SPIRV_OP(group_smin, GroupSMin)
-  _SPIRV_OP(group_fmax, GroupFMax)
-  _SPIRV_OP(group_umax, GroupUMax)
-  _SPIRV_OP(group_smax, GroupSMax)
-  // CL image builtins
-  _SPIRV_OP(SampledImage, SampledImage)
-  _SPIRV_OP(ImageSampleExplicitLod, ImageSampleExplicitLod)
-  _SPIRV_OP(read_image, ImageRead)
-  _SPIRV_OP(write_image, ImageWrite)
-  _SPIRV_OP(get_image_channel_data_type, ImageQueryFormat)
-  _SPIRV_OP(get_image_channel_order, ImageQueryOrder)
-  _SPIRV_OP(get_image_num_mip_levels, ImageQueryLevels)
-  _SPIRV_OP(get_image_num_samples, ImageQuerySamples)
-  // Intel Subgroups builtins
-  _SPIRV_OP(intel_sub_group_shuffle, SubgroupShuffleINTEL)
-  _SPIRV_OP(intel_sub_group_shuffle_down, SubgroupShuffleDownINTEL)
-  _SPIRV_OP(intel_sub_group_shuffle_up, SubgroupShuffleUpINTEL)
-  _SPIRV_OP(intel_sub_group_shuffle_xor, SubgroupShuffleXorINTEL)
-  // Intel media_block_io builtins
-  _SPIRV_OP(intel_sub_group_media_block_read, SubgroupImageMediaBlockReadINTEL)
-  _SPIRV_OP(intel_sub_group_media_block_write,
-            SubgroupImageMediaBlockWriteINTEL)
-  // cl_khr_subgroup_non_uniform_vote
-  _SPIRV_OP(group_elect, GroupNonUniformElect)
-  _SPIRV_OP(group_non_uniform_all, GroupNonUniformAll)
-  _SPIRV_OP(group_non_uniform_any, GroupNonUniformAny)
-  _SPIRV_OP(group_non_uniform_all_equal, GroupNonUniformAllEqual)
-  // cl_khr_subgroup_ballot
-  _SPIRV_OP(group_non_uniform_broadcast, GroupNonUniformBroadcast)
-  _SPIRV_OP(group_broadcast_first, GroupNonUniformBroadcastFirst)
-  _SPIRV_OP(group_ballot, GroupNonUniformBallot)
-  _SPIRV_OP(group_inverse_ballot, GroupNonUniformInverseBallot)
-  _SPIRV_OP(group_ballot_bit_extract, GroupNonUniformBallotBitExtract)
-  _SPIRV_OP(group_ballot_bit_count_iadd, GroupNonUniformBallotBitCount)
-  _SPIRV_OP(group_ballot_find_lsb, GroupNonUniformBallotFindLSB)
-  _SPIRV_OP(group_ballot_find_msb, GroupNonUniformBallotFindMSB)
-  // cl_khr_subgroup_non_uniform_arithmetic
-  _SPIRV_OP(group_non_uniform_iadd, GroupNonUniformIAdd)
-  _SPIRV_OP(group_non_uniform_fadd, GroupNonUniformFAdd)
-  _SPIRV_OP(group_non_uniform_imul, GroupNonUniformIMul)
-  _SPIRV_OP(group_non_uniform_fmul, GroupNonUniformFMul)
-  _SPIRV_OP(group_non_uniform_smin, GroupNonUniformSMin)
-  _SPIRV_OP(group_non_uniform_umin, GroupNonUniformUMin)
-  _SPIRV_OP(group_non_uniform_fmin, GroupNonUniformFMin)
-  _SPIRV_OP(group_non_uniform_smax, GroupNonUniformSMax)
-  _SPIRV_OP(group_non_uniform_umax, GroupNonUniformUMax)
-  _SPIRV_OP(group_non_uniform_fmax, GroupNonUniformFMax)
-  _SPIRV_OP(group_non_uniform_iand, GroupNonUniformBitwiseAnd)
-  _SPIRV_OP(group_non_uniform_ior, GroupNonUniformBitwiseOr)
-  _SPIRV_OP(group_non_uniform_ixor, GroupNonUniformBitwiseXor)
-  _SPIRV_OP(group_non_uniform_logical_iand, GroupNonUniformLogicalAnd)
-  _SPIRV_OP(group_non_uniform_logical_ior, GroupNonUniformLogicalOr)
-  _SPIRV_OP(group_non_uniform_logical_ixor, GroupNonUniformLogicalXor)
-  // cl_khr_subgroup_shuffle
-  _SPIRV_OP(group_shuffle, GroupNonUniformShuffle)
-  _SPIRV_OP(group_shuffle_xor, GroupNonUniformShuffleXor)
-  // cl_khr_subgroup_shuffle_relative
-  _SPIRV_OP(group_shuffle_up, GroupNonUniformShuffleUp)
-  _SPIRV_OP(group_shuffle_down, GroupNonUniformShuffleDown)
-#undef _SPIRV_OP
-}
-
-template <> inline void SPIRVMap<std::string, Op, OCL12Builtin>::init() {
-#define _SPIRV_OP(x, y) add(#x, Op##y);
-  _SPIRV_OP(add, AtomicIAdd)
-  _SPIRV_OP(sub, AtomicISub)
-  _SPIRV_OP(xchg, AtomicExchange)
-  _SPIRV_OP(cmpxchg, AtomicCompareExchange)
-  _SPIRV_OP(inc, AtomicIIncrement)
-  _SPIRV_OP(dec, AtomicIDecrement)
-  _SPIRV_OP(min, AtomicSMin)
-  _SPIRV_OP(max, AtomicSMax)
-  _SPIRV_OP(umin, AtomicUMin)
-  _SPIRV_OP(umax, AtomicUMax)
-  _SPIRV_OP(and, AtomicAnd)
-  _SPIRV_OP(or, AtomicOr)
-  _SPIRV_OP(xor, AtomicXor)
-#undef _SPIRV_OP
-}
-
-// SPV_INTEL_device_side_avc_motion_estimation extension builtins
 class SPIRVSubgroupsAVCIntelInst;
-template <>
-inline void SPIRVMap<std::string, Op, SPIRVSubgroupsAVCIntelInst>::init() {
-  // Here is a workaround for a bug in the specification:
-  // 'avc' missed in 'intel_sub_group_avc' prefix.
-  add("intel_sub_group_ime_ref_window_size",
-      OpSubgroupAvcImeRefWindowSizeINTEL);
-
-#define _SPIRV_OP(x, y) add("intel_sub_group_avc_" #x, OpSubgroupAvc##y##INTEL);
-  // Initialization phase functions
-  _SPIRV_OP(ime_initialize, ImeInitialize)
-  _SPIRV_OP(fme_initialize, FmeInitialize)
-  _SPIRV_OP(bme_initialize, BmeInitialize)
-  _SPIRV_OP(sic_initialize, SicInitialize)
-
-  // Result and payload types conversion functions
-  _SPIRV_OP(mce_convert_to_ime_payload, MceConvertToImePayload)
-  _SPIRV_OP(mce_convert_to_ime_result, MceConvertToImeResult)
-  _SPIRV_OP(mce_convert_to_ref_payload, MceConvertToRefPayload)
-  _SPIRV_OP(mce_convert_to_ref_result, MceConvertToRefResult)
-  _SPIRV_OP(mce_convert_to_sic_payload, MceConvertToSicPayload)
-  _SPIRV_OP(mce_convert_to_sic_result, MceConvertToSicResult)
-  _SPIRV_OP(ime_convert_to_mce_payload, ImeConvertToMcePayload)
-  _SPIRV_OP(ime_convert_to_mce_result, ImeConvertToMceResult)
-  _SPIRV_OP(ref_convert_to_mce_payload, RefConvertToMcePayload)
-  _SPIRV_OP(ref_convert_to_mce_result, RefConvertToMceResult)
-  _SPIRV_OP(sic_convert_to_mce_payload, SicConvertToMcePayload)
-  _SPIRV_OP(sic_convert_to_mce_result, SicConvertToMceResult)
-#undef _SPIRV_OP
-
-// MCE instructions
-#define _SPIRV_OP(x, y)                                                        \
-  add("intel_sub_group_avc_mce_" #x, OpSubgroupAvcMce##y##INTEL);
-  _SPIRV_OP(get_default_inter_base_multi_reference_penalty,
-            GetDefaultInterBaseMultiReferencePenalty)
-  _SPIRV_OP(set_inter_base_multi_reference_penalty,
-            SetInterBaseMultiReferencePenalty)
-  _SPIRV_OP(get_default_inter_shape_penalty, GetDefaultInterShapePenalty)
-  _SPIRV_OP(set_inter_shape_penalty, SetInterShapePenalty)
-  _SPIRV_OP(get_default_inter_direction_penalty,
-            GetDefaultInterDirectionPenalty)
-  _SPIRV_OP(set_inter_direction_penalty, SetInterDirectionPenalty)
-  _SPIRV_OP(get_default_intra_luma_shape_penalty,
-            GetDefaultIntraLumaShapePenalty)
-  _SPIRV_OP(get_default_inter_motion_vector_cost_table,
-            GetDefaultInterMotionVectorCostTable)
-  _SPIRV_OP(get_default_high_penalty_cost_table, GetDefaultHighPenaltyCostTable)
-  _SPIRV_OP(get_default_medium_penalty_cost_table,
-            GetDefaultMediumPenaltyCostTable)
-  _SPIRV_OP(get_default_low_penalty_cost_table, GetDefaultLowPenaltyCostTable)
-  _SPIRV_OP(set_motion_vector_cost_function, SetMotionVectorCostFunction)
-  _SPIRV_OP(get_default_intra_luma_mode_penalty, GetDefaultIntraLumaModePenalty)
-  _SPIRV_OP(get_default_non_dc_luma_intra_penalty,
-            GetDefaultNonDcLumaIntraPenalty)
-  _SPIRV_OP(get_default_intra_chroma_mode_base_penalty,
-            GetDefaultIntraChromaModeBasePenalty)
-  _SPIRV_OP(set_ac_only_haar, SetAcOnlyHaar)
-  _SPIRV_OP(set_source_interlaced_field_polarity,
-            SetSourceInterlacedFieldPolarity)
-  _SPIRV_OP(set_single_reference_interlaced_field_polarity,
-            SetSingleReferenceInterlacedFieldPolarity)
-  _SPIRV_OP(set_dual_reference_interlaced_field_polarities,
-            SetDualReferenceInterlacedFieldPolarities)
-  _SPIRV_OP(get_motion_vectors, GetMotionVectors)
-  _SPIRV_OP(get_inter_distortions, GetInterDistortions)
-  _SPIRV_OP(get_best_inter_distortion, GetBestInterDistortions)
-  _SPIRV_OP(get_inter_major_shape, GetInterMajorShape)
-  _SPIRV_OP(get_inter_minor_shapes, GetInterMinorShape)
-  _SPIRV_OP(get_inter_directions, GetInterDirections)
-  _SPIRV_OP(get_inter_motion_vector_count, GetInterMotionVectorCount)
-  _SPIRV_OP(get_inter_reference_ids, GetInterReferenceIds)
-  _SPIRV_OP(get_inter_reference_interlaced_field_polarities,
-            GetInterReferenceInterlacedFieldPolarities)
-#undef _SPIRV_OP
-
-// IME instructions
-#define _SPIRV_OP(x, y)                                                        \
-  add("intel_sub_group_avc_ime_" #x, OpSubgroupAvcIme##y##INTEL);
-  _SPIRV_OP(set_single_reference, SetSingleReference)
-  _SPIRV_OP(set_dual_reference, SetDualReference)
-  _SPIRV_OP(ref_window_size, RefWindowSize)
-  _SPIRV_OP(adjust_ref_offset, AdjustRefOffset)
-  _SPIRV_OP(set_max_motion_vector_count, SetMaxMotionVectorCount)
-  _SPIRV_OP(set_unidirectional_mix_disable, SetUnidirectionalMixDisable)
-  _SPIRV_OP(set_early_search_termination_threshold,
-            SetEarlySearchTerminationThreshold)
-  _SPIRV_OP(set_weighted_sad, SetWeightedSad)
-  _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
-  _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
-  _SPIRV_OP(evaluate_with_single_reference_streamin,
-            EvaluateWithSingleReferenceStreamin)
-  _SPIRV_OP(evaluate_with_dual_reference_streamin,
-            EvaluateWithDualReferenceStreamin)
-  _SPIRV_OP(evaluate_with_single_reference_streamout,
-            EvaluateWithSingleReferenceStreamout)
-  _SPIRV_OP(evaluate_with_dual_reference_streamout,
-            EvaluateWithDualReferenceStreamout)
-  _SPIRV_OP(evaluate_with_single_reference_streaminout,
-            EvaluateWithSingleReferenceStreaminout)
-  _SPIRV_OP(evaluate_with_dual_reference_streaminout,
-            EvaluateWithDualReferenceStreaminout)
-  _SPIRV_OP(get_single_reference_streamin, GetSingleReferenceStreamin)
-  _SPIRV_OP(get_dual_reference_streamin, GetDualReferenceStreamin)
-  _SPIRV_OP(strip_single_reference_streamout, StripSingleReferenceStreamout)
-  _SPIRV_OP(strip_dual_reference_streamout, StripDualReferenceStreamout)
-  _SPIRV_OP(get_border_reached, GetBorderReached)
-  _SPIRV_OP(get_truncated_search_indication, GetTruncatedSearchIndication)
-  _SPIRV_OP(get_unidirectional_early_search_termination,
-            GetUnidirectionalEarlySearchTermination)
-  _SPIRV_OP(get_weighting_pattern_minimum_motion_vector,
-            GetWeightingPatternMinimumMotionVector)
-  _SPIRV_OP(get_weighting_pattern_minimum_distortion,
-            GetWeightingPatternMinimumDistortion)
-#undef _SPIRV_OP
-
-#define _SPIRV_OP(x, y)                                                        \
-  add("intel_sub_group_avc_ime_get_streamout_major_shape_" #x,                 \
-      OpSubgroupAvcImeGetStreamout##y##INTEL);
-  _SPIRV_OP(motion_vectors_single_reference,
-            SingleReferenceMajorShapeMotionVectors)
-  _SPIRV_OP(distortions_single_reference, SingleReferenceMajorShapeDistortions)
-  _SPIRV_OP(reference_ids_single_reference,
-            SingleReferenceMajorShapeReferenceIds)
-  _SPIRV_OP(motion_vectors_dual_reference, DualReferenceMajorShapeMotionVectors)
-  _SPIRV_OP(distortions_dual_reference, DualReferenceMajorShapeDistortions)
-  _SPIRV_OP(reference_ids_dual_reference, DualReferenceMajorShapeReferenceIds)
-#undef _SPIRV_OP
-
-// REF instructions
-#define _SPIRV_OP(x, y)                                                        \
-  add("intel_sub_group_avc_ref_" #x, OpSubgroupAvcRef##y##INTEL);
-  _SPIRV_OP(set_bidirectional_mix_disable, SetBidirectionalMixDisable)
-  _SPIRV_OP(set_bilinear_filter_enable, SetBilinearFilterEnable)
-  _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
-  _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
-  _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference)
-  _SPIRV_OP(evaluate_with_multi_reference_interlaced,
-            EvaluateWithMultiReferenceInterlaced)
-#undef _SPIRV_OP
-
-// SIC instructions
-#define _SPIRV_OP(x, y)                                                        \
-  add("intel_sub_group_avc_sic_" #x, OpSubgroupAvcSic##y##INTEL);
-  _SPIRV_OP(configure_skc, ConfigureSkc)
-  _SPIRV_OP(configure_ipe_luma, ConfigureIpeLuma)
-  _SPIRV_OP(configure_ipe_luma_chroma, ConfigureIpeLumaChroma)
-  _SPIRV_OP(get_motion_vector_mask, GetMotionVectorMask)
-  _SPIRV_OP(set_intra_luma_shape_penalty, SetIntraLumaShapePenalty)
-  _SPIRV_OP(set_intra_luma_mode_cost_function, SetIntraLumaModeCostFunction)
-  _SPIRV_OP(set_intra_chroma_mode_cost_function, SetIntraChromaModeCostFunction)
-  _SPIRV_OP(set_skc_bilinear_filter_enable, SetBilinearFilterEnable)
-  _SPIRV_OP(set_skc_forward_transform_enable, SetSkcForwardTransformEnable)
-  _SPIRV_OP(set_block_based_raw_skip_sad, SetBlockBasedRawSkipSad)
-  _SPIRV_OP(evaluate_ipe, EvaluateIpe)
-  _SPIRV_OP(evaluate_with_single_reference, EvaluateWithSingleReference)
-  _SPIRV_OP(evaluate_with_dual_reference, EvaluateWithDualReference)
-  _SPIRV_OP(evaluate_with_multi_reference, EvaluateWithMultiReference)
-  _SPIRV_OP(evaluate_with_multi_reference_interlaced,
-            EvaluateWithMultiReferenceInterlaced)
-  _SPIRV_OP(get_ipe_luma_shape, GetIpeLumaShape)
-  _SPIRV_OP(get_best_ipe_luma_distortion, GetBestIpeLumaDistortion)
-  _SPIRV_OP(get_best_ipe_chroma_distortion, GetBestIpeChromaDistortion)
-  _SPIRV_OP(get_packed_ipe_luma_modes, GetPackedIpeLumaModes)
-  _SPIRV_OP(get_ipe_chroma_mode, GetIpeChromaMode)
-  _SPIRV_OP(get_packed_skc_luma_count_threshold, GetPackedSkcLumaCountThreshold)
-  _SPIRV_OP(get_packed_skc_luma_sum_threshold, GetPackedSkcLumaSumThreshold)
-  _SPIRV_OP(get_inter_raw_sads, GetInterRawSads)
-#undef _SPIRV_OP
-}
 typedef SPIRVMap<std::string, Op, SPIRVSubgroupsAVCIntelInst>
     OCLSPIRVSubgroupAVCIntelBuiltinMap;
 
-template <> inline void SPIRVMap<std::string, Op, OCLOpaqueType>::init() {
-  add("opencl.event_t", OpTypeEvent);
-  add("opencl.pipe_t", OpTypePipe);
-  add("opencl.clk_event_t", OpTypeDeviceEvent);
-  add("opencl.reserve_id_t", OpTypeReserveId);
-  add("opencl.queue_t", OpTypeQueue);
-  add("opencl.sampler_t", OpTypeSampler);
-}
-
 typedef SPIRVMap<AtomicRMWInst::BinOp, Op> LLVMSPIRVAtomicRmwOpCodeMap;
-template <> inline void LLVMSPIRVAtomicRmwOpCodeMap::init() {
-  add(llvm::AtomicRMWInst::Xchg, OpAtomicExchange);
-  add(llvm::AtomicRMWInst::Add, OpAtomicIAdd);
-  add(llvm::AtomicRMWInst::Sub, OpAtomicISub);
-  add(llvm::AtomicRMWInst::And, OpAtomicAnd);
-  add(llvm::AtomicRMWInst::Or, OpAtomicOr);
-  add(llvm::AtomicRMWInst::Xor, OpAtomicXor);
-  add(llvm::AtomicRMWInst::Max, OpAtomicSMax);
-  add(llvm::AtomicRMWInst::Min, OpAtomicSMin);
-  add(llvm::AtomicRMWInst::UMax, OpAtomicUMax);
-  add(llvm::AtomicRMWInst::UMin, OpAtomicUMin);
-}
 
 } // namespace SPIRV
 

From 68d4c4cc98c4ec50197b5548f943c0bc20a3b9f3 Mon Sep 17 00:00:00 2001
From: nrudenko <nikita.rudenko@intel.com>
Date: Thu, 9 Jul 2020 14:51:47 +0300
Subject: [PATCH 766/771] Move SPIRVAllowUnknownIntrinsics option to
 TranslatorOpts class

---
 llvm-spirv/include/LLVMSPIRVOpts.h          | 13 +++++++++++++
 llvm-spirv/lib/SPIRV/SPIRVWriter.cpp        | 11 +++--------
 llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h |  4 ++++
 llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp  | 15 +++++++++++++++
 4 files changed, 35 insertions(+), 8 deletions(-)

diff --git a/llvm-spirv/include/LLVMSPIRVOpts.h b/llvm-spirv/include/LLVMSPIRVOpts.h
index 62831d0c4b6b7..6abbf15eb9bbe 100644
--- a/llvm-spirv/include/LLVMSPIRVOpts.h
+++ b/llvm-spirv/include/LLVMSPIRVOpts.h
@@ -137,6 +137,15 @@ class TranslatorOpts {
 
   FPContractMode getFPContractMode() const { return FPCMode; }
 
+  bool isSPIRVAllowUnknownIntrinsicsEnabled() const noexcept {
+    return SPIRVAllowUnknownIntrinsics;
+  }
+
+  void
+  setSPIRVAllowUnknownIntrinsicsEnabled(bool AllowUnknownIntrinsics) noexcept {
+    SPIRVAllowUnknownIntrinsics = AllowUnknownIntrinsics;
+  }
+
 private:
   // Common translation options
   VersionNumber MaxVersion = VersionNumber::MaximumVersion;
@@ -159,6 +168,10 @@ class TranslatorOpts {
   // - FPContractMode::Fast allows *all* operations to be contracted
   //   for all entry points
   FPContractMode FPCMode = FPContractMode::On;
+
+  // Unknown LLVM intrinsics will be translated as external function calls in
+  // SPIR-V
+  bool SPIRVAllowUnknownIntrinsics = false;
 };
 
 } // namespace SPIRV
diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
index 1161ad5c97dc0..13abcabbc1fa0 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
@@ -92,11 +92,6 @@ using namespace OCLUtil;
 
 namespace SPIRV {
 
-cl::opt<bool> SPIRVAllowUnknownIntrinsics(
-    "spirv-allow-unknown-intrinsics", cl::init(false),
-    cl::desc("Unknown LLVM intrinsics will be translated as external function "
-             "calls in SPIR-V"));
-
 static void foreachKernelArgMD(
     MDNode *MD, SPIRVFunction *BF,
     std::function<void(const std::string &Str, SPIRVFunctionParameter *BA)>
@@ -509,8 +504,8 @@ SPIRVFunction *LLVMToSPIRV::transFunctionDecl(Function *F) {
   if (auto BF = getTranslatedValue(F))
     return static_cast<SPIRVFunction *>(BF);
 
-  if (F->isIntrinsic() &&
-      (!SPIRVAllowUnknownIntrinsics || isKnownIntrinsic(F->getIntrinsicID()))) {
+  if (F->isIntrinsic() && (!BM->isSPIRVAllowUnknownIntrinsicsEnabled() ||
+                           isKnownIntrinsic(F->getIntrinsicID()))) {
     // We should not translate LLVM intrinsics as a function
     assert(none_of(F->user_begin(), F->user_end(),
                    [this](User *U) { return getTranslatedValue(U); }) &&
@@ -2131,7 +2126,7 @@ SPIRVValue *LLVMToSPIRV::transIntrinsicInst(IntrinsicInst *II,
     // change is pending the trap/abort intrisinc implementation.
     return nullptr;
   default:
-    if (SPIRVAllowUnknownIntrinsics)
+    if (BM->isSPIRVAllowUnknownIntrinsicsEnabled())
       return BM->addCallInst(
           transFunctionDecl(II->getCalledFunction()),
           transArguments(II, BB,
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h
index a4ec3ef3b7140..24620bb97085a 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVModule.h
@@ -476,6 +476,10 @@ class SPIRVModule {
     return TranslationOpts.getFPContractMode();
   }
 
+  bool isSPIRVAllowUnknownIntrinsicsEnabled() const noexcept {
+    return TranslationOpts.isSPIRVAllowUnknownIntrinsicsEnabled();
+  }
+
   // I/O functions
   friend spv_ostream &operator<<(spv_ostream &O, SPIRVModule &M);
   friend std::istream &operator>>(std::istream &I, SPIRVModule &M);
diff --git a/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp b/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp
index ce4cd190dda3b..42be0f4f581f2 100644
--- a/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp
+++ b/llvm-spirv/tools/llvm-spirv/llvm-spirv.cpp
@@ -178,6 +178,11 @@ static cl::opt<SPIRV::FPContractMode> FPCMode(
             SPIRV::FPContractMode::Fast, "fast",
             "allow all operations to be contracted for all entry points")));
 
+cl::opt<bool> SPIRVAllowUnknownIntrinsics(
+    "spirv-allow-unknown-intrinsics", cl::init(false),
+    cl::desc("Unknown LLVM intrinsics will be translated as external function "
+             "calls in SPIR-V"));
+
 static std::string removeExt(const std::string &FileName) {
   size_t Pos = FileName.find_last_of(".");
   if (Pos != std::string::npos)
@@ -553,6 +558,16 @@ int main(int Ac, char **Av) {
       return -1;
   }
 
+  if (SPIRVAllowUnknownIntrinsics.getNumOccurrences() != 0) {
+    if (IsReverse) {
+      errs()
+          << "Note: --spirv-allow-unknown-intrinsics option ignored as it only "
+             "affects translation from LLVM IR to SPIR-V";
+    } else {
+      Opts.setSPIRVAllowUnknownIntrinsicsEnabled(SPIRVAllowUnknownIntrinsics);
+    }
+  }
+
 #ifdef _SPIRV_SUPPORT_TEXT_FMT
   if (ToText && (ToBinary || IsReverse || IsRegularization)) {
     errs() << "Cannot use -to-text with -to-binary, -r, -s\n";

From d35a1add245c1f3e0ec99f57290d8dde2aef9426 Mon Sep 17 00:00:00 2001
From: nrudenko <nikita.rudenko@intel.com>
Date: Thu, 9 Jul 2020 15:28:41 +0300
Subject: [PATCH 767/771] Extend LLVMSPIRVOpts.h interface allowing pass
 options to spirv passes

---
 llvm-spirv/include/LLVMSPIRVLib.h        |  5 +++++
 llvm-spirv/lib/SPIRV/SPIRVWriterPass.cpp | 20 ++++++++++++++++----
 llvm-spirv/lib/SPIRV/SPIRVWriterPass.h   | 14 +++++++++++++-
 3 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/llvm-spirv/include/LLVMSPIRVLib.h b/llvm-spirv/include/LLVMSPIRVLib.h
index 3e8a9f0908828..2fe5b3e8720c3 100644
--- a/llvm-spirv/include/LLVMSPIRVLib.h
+++ b/llvm-spirv/include/LLVMSPIRVLib.h
@@ -215,6 +215,11 @@ ModulePass *createPreprocessMetadata();
 /// ostream.
 ModulePass *createSPIRVWriterPass(std::ostream &Str);
 
+/// Create and return a pass that writes the module to the specified
+/// ostream.
+ModulePass *createSPIRVWriterPass(std::ostream &Str,
+                                  const SPIRV::TranslatorOpts &Opts);
+
 } // namespace llvm
 
 #endif // SPIRV_H
diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriterPass.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriterPass.cpp
index ffd0676045e41..4e211d0567d83 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVWriterPass.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVWriterPass.cpp
@@ -21,23 +21,26 @@ using namespace llvm;
 PreservedAnalyses SPIRVWriterPass::run(Module &M) {
   // FIXME: at the moment LLVM/SPIR-V translation errors are ignored.
   std::string Err;
-  writeSpirv(&M, OS, Err);
+  writeSpirv(&M, Opts, OS, Err);
   return PreservedAnalyses::all();
 }
 
 namespace {
 class WriteSPIRVPass : public ModulePass {
   std::ostream &OS; // std::ostream to print on
+  SPIRV::TranslatorOpts Opts;
+
 public:
   static char ID; // Pass identification, replacement for typeid
-  explicit WriteSPIRVPass(std::ostream &O) : ModulePass(ID), OS(O) {}
+  WriteSPIRVPass(std::ostream &OS, const SPIRV::TranslatorOpts &Opts)
+      : ModulePass(ID), OS(OS), Opts(Opts) {}
 
   StringRef getPassName() const override { return "SPIRV Writer"; }
 
   bool runOnModule(Module &M) override {
     // FIXME: at the moment LLVM/SPIR-V translation errors are ignored.
     std::string Err;
-    writeSpirv(&M, OS, Err);
+    writeSpirv(&M, Opts, OS, Err);
     return false;
   }
 };
@@ -46,5 +49,14 @@ class WriteSPIRVPass : public ModulePass {
 char WriteSPIRVPass::ID = 0;
 
 ModulePass *llvm::createSPIRVWriterPass(std::ostream &Str) {
-  return new WriteSPIRVPass(Str);
+  SPIRV::TranslatorOpts DefaultOpts;
+  // To preserve old behavior of the translator, let's enable all extensions
+  // by default in this API
+  DefaultOpts.enableAllExtensions();
+  return createSPIRVWriterPass(Str, DefaultOpts);
+}
+
+ModulePass *llvm::createSPIRVWriterPass(std::ostream &Str,
+                                        const SPIRV::TranslatorOpts &Opts) {
+  return new WriteSPIRVPass(Str, Opts);
 }
diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriterPass.h b/llvm-spirv/lib/SPIRV/SPIRVWriterPass.h
index b8005486bbaad..fe80217230be8 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVWriterPass.h
+++ b/llvm-spirv/lib/SPIRV/SPIRVWriterPass.h
@@ -15,6 +15,7 @@
 #ifndef SPIRV_SPIRVWRITERPASS_H
 #define SPIRV_SPIRVWRITERPASS_H
 
+#include "LLVMSPIRVOpts.h"
 #include "llvm/ADT/StringRef.h"
 
 namespace llvm {
@@ -27,16 +28,27 @@ class PreservedAnalyses;
 /// manager.
 ModulePass *createSPIRVWriterPass(std::ostream &Str);
 
+/// \brief Create and return a pass that writes the module to the specified
+/// ostream. Note that this pass is designed for use with the legacy pass
+/// manager.
+ModulePass *createSPIRVWriterPass(std::ostream &Str,
+                                  const SPIRV::TranslatorOpts &Opts);
+
 /// \brief Pass for writing a module of IR out to a SPIRV file.
 ///
 /// Note that this is intended for use with the new pass manager. To construct
 /// a pass for the legacy pass manager, use the function above.
 class SPIRVWriterPass {
   std::ostream &OS;
+  SPIRV::TranslatorOpts Opts;
 
 public:
   /// \brief Construct a SPIRV writer pass around a particular output stream.
-  explicit SPIRVWriterPass(std::ostream &OS) : OS(OS) {}
+  explicit SPIRVWriterPass(std::ostream &OS) : OS(OS) {
+    Opts.enableAllExtensions();
+  }
+  SPIRVWriterPass(std::ostream &OS, const SPIRV::TranslatorOpts &Opts)
+      : OS(OS), Opts(Opts) {}
 
   /// \brief Run the SPIRV writer pass, and output the module to the selected
   /// output stream.

From a2d863f30ae60667b5a990f4c0ff859fe7e45ce3 Mon Sep 17 00:00:00 2001
From: Sven van Haastregt <sven.vanhaastregt@arm.com>
Date: Wed, 15 Jul 2020 10:25:56 +0100
Subject: [PATCH 768/771] Drop unused CLVer member

Its use was removed in dff07df ("[SPIRV] Fix assertion due to
duplicate operands in opencl version metadata. This happens when two
SPIR modules are linked together. Remove check for OCL version in
OCLTypeToSPIRV. Only check if language is C or C++.", 2015-12-16).
---
 llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp | 3 +--
 llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h   | 1 -
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp
index 1c60fa8af22a7..bb1f47d9129c6 100644
--- a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp
+++ b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.cpp
@@ -58,8 +58,7 @@ namespace SPIRV {
 
 char OCLTypeToSPIRV::ID = 0;
 
-OCLTypeToSPIRV::OCLTypeToSPIRV()
-    : ModulePass(ID), M(nullptr), Ctx(nullptr), CLVer(0) {
+OCLTypeToSPIRV::OCLTypeToSPIRV() : ModulePass(ID), M(nullptr), Ctx(nullptr) {
   initializeOCLTypeToSPIRVPass(*PassRegistry::getPassRegistry());
 }
 
diff --git a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h
index 30025cd2aba1f..00d3104d0e780 100644
--- a/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h
+++ b/llvm-spirv/lib/SPIRV/OCLTypeToSPIRV.h
@@ -71,7 +71,6 @@ class OCLTypeToSPIRV : public ModulePass {
 private:
   Module *M;
   LLVMContext *Ctx;
-  unsigned CLVer;
   std::map<Value *, Type *> AdaptedTy; // Adapted types for values
   std::set<Function *> WorkSet;        // Functions to be adapted
 

From 326f12dbb6f78b847073569e146bcd2c7ab081fa Mon Sep 17 00:00:00 2001
From: Dmitry Sidorov <dmitry.sidorov@intel.com>
Date: Mon, 13 Jul 2020 19:30:25 +0300
Subject: [PATCH 769/771] Fix type mapping for pointers with
 Device/HostOnlyINTEL storage classes

a7b763b265d introduces a bug, when it could be possible to map a
single pointer type in LLVM IR to two different pointer types in
SPIR-V (when SPV_INTEL_usm_storage_classes extension is not allowed).
This patch fixes this bug.

Signed-off-by: Dmitry Sidorov <dmitry.sidorov@intel.com>
---
 llvm-spirv/lib/SPIRV/SPIRVWriter.cpp                |  7 +++++--
 llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h    | 12 +++---------
 llvm-spirv/test/transcoding/intel_usm_addrspaces.ll |  9 +++++----
 3 files changed, 13 insertions(+), 15 deletions(-)

diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
index 13abcabbc1fa0..26c5ea77cc165 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
@@ -308,8 +308,11 @@ SPIRVType *LLVMToSPIRV::transType(Type *T) {
     // extension
     if (!BM->isAllowedToUseExtension(
             ExtensionID::SPV_INTEL_usm_storage_classes) &&
-        ((AddrSpc == SPIRAS_GlobalDevice) || (AddrSpc == SPIRAS_GlobalHost)))
-      AddrSpc = SPIRAS_Global;
+        ((AddrSpc == SPIRAS_GlobalDevice) || (AddrSpc == SPIRAS_GlobalHost))) {
+      auto NewType =
+          PointerType::get(T->getPointerElementType(), SPIRAS_Global);
+      return mapType(T, transType(NewType));
+    }
     if (ST && !ST->isSized()) {
       Op OpCode;
       StringRef STName = ST->getName();
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h
index 4ea503613854f..60a9bcc3de60e 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVInstruction.h
@@ -538,15 +538,9 @@ class SPIRVStore : public SPIRVInstruction, public SPIRVMemoryAccess {
     SPIRVInstruction::validate();
     if (getSrc()->isForward() || getDst()->isForward())
       return;
-#ifndef NDEBUG
-    if (getValueType(PtrId)->getPointerElementType() != getValueType(ValId)) {
-      assert(getValueType(PtrId)
-                     ->getPointerElementType()
-                     ->getPointerStorageClass() ==
-                 getValueType(ValId)->getPointerStorageClass() &&
-             "Inconsistent operand types");
-    }
-#endif // NDEBUG
+    assert(getValueType(PtrId)->getPointerElementType() ==
+               getValueType(ValId) &&
+           "Inconsistent operand types");
   }
 
 private:
diff --git a/llvm-spirv/test/transcoding/intel_usm_addrspaces.ll b/llvm-spirv/test/transcoding/intel_usm_addrspaces.ll
index 457222be9c372..98a40601e6e73 100644
--- a/llvm-spirv/test/transcoding/intel_usm_addrspaces.ll
+++ b/llvm-spirv/test/transcoding/intel_usm_addrspaces.ll
@@ -23,10 +23,11 @@
 ; CHECK-SPIRV: Name [[HOST_ARG2:[0-9]+]] "arg_host.addr"
 ; CHECK-SPIRV-EXT: TypePointer [[DEVICE_TY:[0-9]+]] 5936 {{[0-9]+}}
 ; CHECK-SPIRV-EXT: TypePointer [[HOST_TY:[0-9]+]] 5937 {{[0-9]+}}
-; CHECK-SPIRV-NO-EXT: TypePointer [[DEVICE_TY:[0-9]+]] 5 {{[0-9]+}}
-; CHECK-SPIRV-NO-EXT: TypePointer [[HOST_TY:[0-9]+]] 5 {{[0-9]+}}
-; CHECK-SPIRV: Load [[DEVICE_TY]] {{[0-9]+}} [[DEVICE]] {{[0-9]+}} {{[0-9]+}}
-; CHECK-SPIRV: Load [[HOST_TY]] {{[0-9]+}} [[HOST]] {{[0-9]+}} {{[0-9]+}}
+; CHECK-SPIRV-NO-EXT: TypePointer [[GLOB_TY:[0-9]+]] 5 {{[0-9]+}}
+; CHECK-SPIRV-EXT: Load [[DEVICE_TY]] {{[0-9]+}} [[DEVICE]] {{[0-9]+}} {{[0-9]+}}
+; CHECK-SPIRV-EXT: Load [[HOST_TY]] {{[0-9]+}} [[HOST]] {{[0-9]+}} {{[0-9]+}}
+; CHECK-SPIRV-NO-EXT: Load [[GLOB_TY]] {{[0-9]+}} [[DEVICE]] {{[0-9]+}} {{[0-9]+}}
+; CHECK-SPIRV-NO-EXT: Load [[GLOB_TY]] {{[0-9]+}} [[HOST]] {{[0-9]+}} {{[0-9]+}}
 
 ; ModuleID = 'intel_usm_addrspaces.cpp'
 source_filename = "intel_usm_addrspaces.cpp"

From 6f47dbdddddf052dd4f320fc680e24ea5322f2d0 Mon Sep 17 00:00:00 2001
From: nrudenko <nikita.rudenko@intel.com>
Date: Thu, 9 Jul 2020 16:42:01 +0300
Subject: [PATCH 770/771] Add SIMTCallINTEL decoration of
 SPV_INTEL_vector_compute extension

Extension is published at https://github.com/intel/llvm/pull/1612
---
 llvm-spirv/lib/SPIRV/SPIRVReader.cpp          |  4 +++
 llvm-spirv/lib/SPIRV/SPIRVWriter.cpp          |  8 +++++
 llvm-spirv/lib/SPIRV/VectorComputeUtil.h      |  1 +
 llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h     |  1 +
 .../lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h     |  1 +
 .../lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h     |  1 +
 llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp       |  1 +
 .../test/transcoding/decoration_simt_call.ll  | 35 +++++++++++++++++++
 8 files changed, 52 insertions(+)
 create mode 100755 llvm-spirv/test/transcoding/decoration_simt_call.ll

diff --git a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
index ca90e1bbcb98a..bd6317c9bdfb1 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVReader.cpp
@@ -3460,6 +3460,10 @@ bool SPIRVToLLVM::transVectorComputeMetadata(SPIRVFunction *BF) {
     return true;
   F->addFnAttr(kVCMetadata::VCFunction);
 
+  SPIRVWord SIMTMode = 0;
+  if (BF->hasDecorate(DecorationSIMTCallINTEL, 0, &SIMTMode))
+    F->addFnAttr(kVCMetadata::VCSIMTCall, std::to_string(SIMTMode));
+
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
        ++I) {
     auto ArgNo = I->getArgNo();
diff --git a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
index 26c5ea77cc165..347111bb34454 100644
--- a/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
+++ b/llvm-spirv/lib/SPIRV/SPIRVWriter.cpp
@@ -585,6 +585,14 @@ void LLVMToSPIRV::transVectorComputeMetadata(Function *F) {
   else
     return;
 
+  if (Attrs.hasFnAttribute(kVCMetadata::VCSIMTCall)) {
+    SPIRVWord SIMTMode = 0;
+    Attrs.getAttribute(AttributeList::FunctionIndex, kVCMetadata::VCSIMTCall)
+        .getValueAsString()
+        .getAsInteger(0, SIMTMode);
+    BF->addDecorate(DecorationSIMTCallINTEL, SIMTMode);
+  }
+
   for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E;
        ++I) {
     auto ArgNo = I->getArgNo();
diff --git a/llvm-spirv/lib/SPIRV/VectorComputeUtil.h b/llvm-spirv/lib/SPIRV/VectorComputeUtil.h
index f215b2dc2df3e..772d682a5e42a 100755
--- a/llvm-spirv/lib/SPIRV/VectorComputeUtil.h
+++ b/llvm-spirv/lib/SPIRV/VectorComputeUtil.h
@@ -116,6 +116,7 @@ const static char VCSLMSize[] = "VCSLMSize";
 const static char VCGlobalVariable[] = "VCGlobalVariable";
 const static char VCVolatile[] = "VCVolatile";
 const static char VCByteOffset[] = "VCByteOffset";
+const static char VCSIMTCall[] = "VCSIMTCall";
 const static char VCArgumentKind[] = "VCArgumentKind";
 const static char VCArgumentDesc[] = "VCArgumentDesc";
 } // namespace kVCMetadata
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h
index 57ce0d3eedc0c..078a6ec426376 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVEnum.h
@@ -393,6 +393,7 @@ template <> inline void SPIRVMap<Decoration, SPIRVCapVec>::init() {
                {CapabilityVectorComputeINTEL});
   ADD_VEC_INIT(DecorationFuncParamIOKind, {CapabilityVectorComputeINTEL});
   ADD_VEC_INIT(DecorationStackCallINTEL, {CapabilityVectorComputeINTEL});
+  ADD_VEC_INIT(DecorationSIMTCallINTEL, {CapabilityVectorComputeINTEL});
   ADD_VEC_INIT(DecorationFuncParamKindINTEL, {CapabilityVectorComputeINTEL});
   ADD_VEC_INIT(DecorationFuncParamDescINTEL, {CapabilityVectorComputeINTEL});
 }
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h
index a4d23c2d51aa0..2b96e6c298497 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVIsValidEnum.h
@@ -431,6 +431,7 @@ inline bool isValid(spv::Decoration V) {
   case DecorationVectorComputeVariableINTEL:
   case DecorationGlobalVariableOffsetINTEL:
   case DecorationFuncParamIOKind:
+  case DecorationSIMTCallINTEL:
     return true;
   default:
     return false;
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
index 5cff7007899f2..5290bdb9bb5a0 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/SPIRVNameMapEnum.h
@@ -371,6 +371,7 @@ template <> inline void SPIRVMap<Decoration, std::string>::init() {
   add(DecorationVectorComputeVariableINTEL, "VectorComputeVariableINTEL");
   add(DecorationGlobalVariableOffsetINTEL, "GlobalVariableOffsetINTEL");
   add(DecorationFuncParamIOKind, "FuncParamIOKind");
+  add(DecorationSIMTCallINTEL, "SIMTCallINTEL");
 }
 SPIRV_DEF_NAMEMAP(Decoration, SPIRVDecorationNameMap)
 
diff --git a/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp b/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp
index c6992238452a2..862c45e4f4ae6 100644
--- a/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp
+++ b/llvm-spirv/lib/SPIRV/libSPIRV/spirv.hpp
@@ -477,6 +477,7 @@ enum Decoration {
   DecorationRestrictPointerEXT = 5355,
   DecorationAliasedPointer = 5356,
   DecorationAliasedPointerEXT = 5356,
+  DecorationSIMTCallINTEL = 5599,
   DecorationFuncParamKindINTEL = 9624,
   DecorationFuncParamDescINTEL = 9625,
   DecorationReferencedIndirectlyINTEL = 5602,
diff --git a/llvm-spirv/test/transcoding/decoration_simt_call.ll b/llvm-spirv/test/transcoding/decoration_simt_call.ll
new file mode 100755
index 0000000000000..0bd687ea7897d
--- /dev/null
+++ b/llvm-spirv/test/transcoding/decoration_simt_call.ll
@@ -0,0 +1,35 @@
+; RUN: llvm-as %s -o %t.bc
+; RUN: llvm-spirv %t.bc -o %t.spv --spirv-ext=+SPV_INTEL_vector_compute --spirv-allow-unknown-intrinsics
+; RUN: llvm-spirv %t.spv -o %t.spt --to-text
+; RUN: llvm-spirv -r %t.spv -o %t.bc
+; RUN: llvm-dis %t.bc -o %t.ll
+; RUN: FileCheck %s --input-file %t.spt -check-prefix=SPV
+; RUN: FileCheck %s --input-file %t.ll  -check-prefix=LLVM
+
+; ModuleID = 'slm.bc'
+source_filename = "slm.cpp"
+target datalayout = "e-p:64:64-i64:64-n8:16:32"
+target triple = "spir"
+
+; LLVM-DAG: @k_rte{{[^#]*}}#[[K_RTE:[0-9]+]]
+; LLVM-DAG: attributes #[[K_RTE]]{{.*"VCSIMTCall"="5" }}
+; SPV-DAG: EntryPoint 6 [[K_RTE:[0-9]+]] "k_rte"
+; SPV-DAG: Decorate [[K_RTE]] SIMTCallINTEL 5
+
+@in = internal global <256 x i8> undef, align 256 #0
+declare <256 x i8> @llvm.genx.vload(<256 x i8>* nonnull %aaa)
+
+; Function Attrs: noinline norecurse nounwind readnone
+define dso_local dllexport spir_kernel void @k_rte(i32 %ibuf, i32 %obuf) local_unnamed_addr #1 {
+entry:
+  %gload53 = tail call <256 x i8> @llvm.genx.vload(<256 x i8>* nonnull @in)
+  ret void
+}
+
+attributes #1 = { noinline norecurse nounwind readnone "VCFunction" "VCSIMTCall"="5" "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "no-signed-zeros-fp-math"="false" "no-trapping-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 8.0.1"}

From 5b18de1a0eff9dcbfef9ce36a99e9991036e425d Mon Sep 17 00:00:00 2001
From: Vladimir Lazarev <vladimir.lazarev@intel.com>
Date: Mon, 20 Jul 2020 11:03:13 +0300
Subject: [PATCH 771/771] [SYCL] Fix LIT test

---
 clang/test/CodeGenSYCL/noexcept.cpp | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/clang/test/CodeGenSYCL/noexcept.cpp b/clang/test/CodeGenSYCL/noexcept.cpp
index c1299229ef92f..816f2c43ebfe2 100644
--- a/clang/test/CodeGenSYCL/noexcept.cpp
+++ b/clang/test/CodeGenSYCL/noexcept.cpp
@@ -18,15 +18,15 @@ void f3() {}
 
 void foo_noexcept() noexcept {
   // CHECK-DEVICE: call spir_func void @_Z2f1v()
-  // CHECK-HOST-LIN: invoke void @_Z2f1v()
-  // CHECK-HOST-WIN: invoke void @"?f1@@YAXXZ"()
+  // CHECK-HOST-LIN: call void @_Z2f1v()
+  // CHECK-HOST-WIN: call void @"?f1@@YAXXZ"()
   f1();
 }
 
 void foo_throw() throw() {
   // CHECK-DEVICE: call spir_func void @_Z2f2v()
-  // CHECK-HOST-LIN: invoke void @_Z2f2v()
-  // CHECK-HOST-WIN: invoke void @"?f3@@YAXXZ"()
+  // CHECK-HOST-LIN: call void @_Z2f2v()
+  // CHECK-HOST-WIN: call void @"?f3@@YAXXZ"()
   f2();
 }
 
@@ -38,14 +38,13 @@ struct A {
 void foo_cleanup() {
   A a;
   // CHECK-DEVICE: call spir_func void @_Z2f3v()
-  // CHECK-HOST: invoke void @_Z2f3v()
+  // CHECK-HOST: call void @_Z2f3v()
   f3();
   // CHECK-DEVICE: call spir_func void @_ZN1AD1Ev
   // Regular + exception cleanup
   // CHECK-HOST-LIN: call void @_ZN1AD1Ev
-  // CHECK-HOST-LIN: call void @_ZN1AD1Ev
+  // CHECK-HOST-LIN: call void @_ZN1AD2Ev
   // CHECK-HOST-WIN: call void @"??1A@@QEAA@XZ"(%struct.A* %a)
-  // CHECK-HOST-WIN: call void @"??1A@@QEAA@XZ"(%struct.A* %a) #4 [ "funclet"(token %0) ]
 }
 
 template <typename name, typename Func>